zsh/ported/
parse.rs

1//! Zsh parser — direct port from zsh/Src/parse.c.
2//!
3//! Pulls tokens via the lex.rs free ported (zshlex/tok/tokstr) and
4//! builds an AST tree (relocated to src/extensions/zsh_ast.rs as a
5//! Rust-only IR) plus emits wordcode into ECBUF via the P9b/P9c
6//! pipeline. Follows the zsh grammar closely; productions match
7//! `par_*` in Src/parse.c.
8
9use super::lex::{
10    lextok, set_tok, AMPER, AMPERBANG, AMPOUTANG, BANG_TOK, BARAMP, BAR_TOK, CASE, COPROC, DAMPER,
11    DBAR, DINANG, DINANGDASH, DINBRACK, DINPAR, DOLOOP, DONE, DOUTANG, DOUTANGAMP, DOUTANGAMPBANG,
12    DOUTANGBANG, DOUTBRACK, DOUTPAR, DSEMI, ELIF, ELSE, ENDINPUT, ENVARRAY, ENVSTRING, ESAC, FI,
13    FOR, FOREACH, FUNC, IF, INANGAMP, INANG_TOK, INBRACE_TOK, INOUTANG, INOUTPAR, INPAR_TOK,
14    IS_REDIROP, LEXERR, LEX_HEREDOCS, NEWLIN, NOCORRECT, OUTANGAMP, OUTANGAMPBANG, OUTANGBANG,
15    OUTANG_TOK, OUTBRACE_TOK, OUTPAR_TOK, REPEAT, SELECT, SEMI, SEMIAMP, SEMIBAR, SEPER,
16    STRING_LEX, THEN, TIME, TRINANG, TYPESET, UNTIL, WHILE, ZEND,
17};
18use super::zsh_h::{
19    eprog, estate, funcdump, isset, redir, unset, wc_code, wordcode, Bang, Dash, Equals, Inang,
20    Outang, Tilde, ALIASFUNCDEF, COND_AND, COND_MOD, COND_MODI, COND_NOT, COND_NT, COND_OR,
21    COND_REGEX, COND_STRDEQ, COND_STREQ, COND_STRGTR, COND_STRLT, COND_STRNEQ, CSHJUNKIELOOPS,
22    EC_DUP, EC_NODUP, EF_HEAP, EF_REAL, EXECOPT, IGNOREBRACES, IS_DASH, MULTIFUNCDEF, OPT_ISSET,
23    PM_UNDEFINED, POSIXBUILTINS, REDIRF_FROM_HEREDOC, REDIR_APP, REDIR_APPNOW, REDIR_ERRAPP,
24    REDIR_ERRAPPNOW, REDIR_ERRWRITE, REDIR_ERRWRITENOW, REDIR_FROM_HEREDOC_MASK, REDIR_HEREDOC,
25    REDIR_HEREDOCDASH, REDIR_HERESTR, REDIR_INPIPE, REDIR_MERGEIN, REDIR_MERGEOUT, REDIR_OUTPIPE,
26    REDIR_READ, REDIR_READWRITE, REDIR_VARID_MASK, REDIR_WRITE, REDIR_WRITENOW, SHORTLOOPS,
27    SHORTREPEAT, WCB_COND, WCB_SIMPLE, WC_REDIR, WC_REDIR_FROM_HEREDOC, WC_REDIR_TYPE,
28    WC_REDIR_VARID, WC_SUBLIST_COPROC, WC_SUBLIST_NOT,
29};
30pub use crate::heredoc_ast::HereDoc;
31use crate::ported::lex::{
32    incasepat, incmdpos, incond, infor, input_slice, inredir, inrepeat, intypeset, isnewlin,
33    lex_init, lineno, noaliases, nocorrect, pos, set_incasepat, set_incmdpos, set_incond,
34    set_infor, set_inredir, set_inrepeat, set_intypeset, set_isnewlin, set_lineno, set_noaliases,
35    set_nocorrect, tok, tokfd, toklineno, tokstr, zshlex,
36};
37use crate::ported::signals::unqueue_signals;
38use crate::ported::utils::{errflag, zerr, zwarnnam, ERRFLAG_ERROR};
39use crate::prompt::{cmdpop, cmdpush};
40pub use crate::zsh_ast::{
41    CaseArm, CaseTerm, CaseTerminator, CompoundCommand, ForList, HereDocInfo, ListFlags, ListOp,
42    Redirect, RedirectOp, ShellCommand, ShellWord, SimpleCommand, SublistFlags, SublistOp,
43    VarModifier, ZshAssign, ZshAssignValue, ZshCase, ZshCommand, ZshCond, ZshFor, ZshFuncDef,
44    ZshIf, ZshList, ZshParamFlag, ZshPipe, ZshProgram, ZshRedir, ZshRepeat, ZshSimple, ZshSublist,
45    ZshTry, ZshWhile,
46};
47use crate::zsh_h::{
48    wc_bdata, CS_ALWAYS, CS_ARRAY, CS_CASE, CS_CMDAND, CS_CMDOR, CS_COND, CS_CURSH, CS_ELIF,
49    CS_ELSE, CS_ERRPIPE, CS_FOR, CS_FOREACH, CS_FUNCDEF, CS_IF, CS_IFTHEN, CS_PIPE, CS_REPEAT,
50    CS_SELECT, CS_SUBSH, CS_UNTIL, CS_WHILE, EF_RUN, WCB_ARITH, WCB_ASSIGN, WCB_CASE, WCB_CURSH,
51    WCB_END, WCB_FOR, WCB_FUNCDEF, WCB_IF, WCB_LIST, WCB_PIPE, WCB_REDIR, WCB_REPEAT, WCB_SELECT,
52    WCB_SUBLIST, WCB_SUBSH, WCB_TIMED, WCB_TRY, WCB_TYPESET, WCB_WHILE, WC_ASSIGN_ARRAY,
53    WC_ASSIGN_INC, WC_ASSIGN_NEW, WC_ASSIGN_SCALAR, WC_CASE_AND, WC_CASE_HEAD, WC_CASE_OR,
54    WC_CASE_TESTAND, WC_FOR_COND, WC_FOR_LIST, WC_FOR_PPARAM, WC_IF_ELIF, WC_IF_ELSE, WC_IF_HEAD,
55    WC_IF_IF, WC_PIPE_END, WC_PIPE_LINENO, WC_PIPE_MID, WC_REDIR_WORDS, WC_SELECT_LIST,
56    WC_SELECT_PPARAM, WC_SUBLIST_AND, WC_SUBLIST_END, WC_SUBLIST_FLAGS, WC_SUBLIST_OR,
57    WC_SUBLIST_SIMPLE, WC_SUBLIST_TYPE, WC_TIMED_EMPTY, WC_TIMED_PIPE, WC_WHILE_UNTIL,
58    WC_WHILE_WHILE, Z_ASYNC, Z_DISOWN, Z_END, Z_SIMPLE, Z_SYNC,
59};
60use serde::{Deserialize, Serialize};
61use std::fs::{self, File};
62use std::io::{Read, Seek, SeekFrom, Write};
63use std::os::unix::fs::MetadataExt;
64use std::path::Path;
65use std::sync::atomic::{AtomicUsize, Ordering};
66use std::sync::mpsc;
67use std::thread;
68use std::time::Duration;
69
70// Names lifted out of inside-fn `use` statements (PORT.md
71// 'no imports inside FNs ever').
72
73// Direct port of `Src/parse.c:287-289` grow-policy constants.
74const EC_INIT_SIZE: i32 = 256;
75
76// Pending-here-document list — direct port of `Src/parse.c:84
77// struct heredocs *hdocs;`. Per-parser file-static (bucket-1 in
78// PORT_PLAN.md): each worker thread parsing a separate program needs
79// its own pending-heredoc list. Saved/restored across nested parses
80// by `parse_context_save`/`parse_context_restore` (parse.c:299/337).
81thread_local! {
82    /// Port of file-static `struct heredocs *hdocs;` from `Src/parse.c:84`.
83    pub static HDOCS: std::cell::RefCell<Option<Box<crate::ported::zsh_h::heredocs>>>
84        = const { std::cell::RefCell::new(None) };
85}
86
87// Wordcode-buffer thread-locals — direct port of `Src/parse.c:269-285`
88// file-statics. Per-evaluator (bucket-1 in PORT_PLAN.md): each worker
89// thread parsing a separate program needs its own wordcode buffer.
90//
91// ECBUF: the wordcode array being built. C `Wordcode ecbuf`
92// (parse.c:275).
93// ECLEN: allocated entries in ECBUF (parse.c:269).
94// ECUSED: entries actually used so far (parse.c:271).
95// ECNPATS: count of patterns referenced by ECBUF (parse.c:273).
96// ECSOFFS / ECSSUB: byte offsets into the string region
97// (parse.c:279). ECSSUB subtracts substring overlap.
98// ECNFUNC: count of functions defined so far (parse.c:285).
99// ECSTRS_INDEX: dedup index for long strings — C uses a binary tree
100// of `struct eccstr` (zsh.h:836); the canonical Eccstr port exists
101// at zsh_h::eccstr but stays unused at runtime here. The HashMap
102// preserves the API contract (lookup by (nfunc, str) → offs) with
103// simpler ownership semantics.
104thread_local! {
105    /// `ECBUF` static.
106    pub static ECBUF: std::cell::RefCell<Vec<u32>> = std::cell::RefCell::new(Vec::new());
107    static ECLEN: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
108    static ECUSED: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
109    static ECNPATS: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
110    static ECSOFFS: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
111    static ECSSUB: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
112    static ECNFUNC: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
113    static ECSTRS_INDEX: std::cell::RefCell<std::collections::HashMap<(i32, String), u32>>
114        = std::cell::RefCell::new(std::collections::HashMap::new());
115    /// C zsh's `eccstr` BST (parse.c:447). Port of `Eccstr ecstrs` —
116    /// a hashval-ordered binary search tree of long-strings for
117    /// dedup. Same cmp logic as C: nfunc, then hashval, then strcmp.
118    /// HashMap above is a fast-path lookup; this tree is the
119    /// C-fidelity walker that mirrors C's exact dedup-hit pattern
120    /// (including its quirks for hash-colliding content).
121    static ECSTRS_TREE: std::cell::RefCell<Option<Box<EccstrNode>>>
122        = const { std::cell::RefCell::new(None) };
123    /// Reverse index for `ecgetstr`: offs → owned string. Populated
124    /// at ecstrcode time so the consumer can recover the string from
125    /// the wordcode offs without walking the encode-time HashMap.
126    /// Stores the METAFIED BYTE form of each long-string, exactly
127    /// matching what C's strs region holds. `String` would not work
128    /// here because Rust strings carry UTF-8-encoded chars (e.g.
129    /// the Dash marker `\u{9b}` UTF-8-encodes to two bytes
130    /// `\xc2 \x9b`) while C stores zsh markers as single bytes
131    /// (raw `\x9b`). Storing Vec<u8> lets us write byte-for-byte
132    /// what C writes after metafy.
133    pub static ECSTRS_REVERSE: std::cell::RefCell<std::collections::HashMap<u32, Vec<u8>>>
134        = std::cell::RefCell::new(std::collections::HashMap::new());
135}
136const EC_DOUBLE_THRESHOLD: i32 = 32768;
137const EC_INCREMENT: i32 = 1024;
138
139/// Direct port of `parse_context_save(struct parse_stack *ps, int toplevel)` at `Src/parse.c:295`.
140/// Snapshots the lexer-side file-statics (which currently live on
141/// `lexer` until Phase 7 dissolution makes them file-scope
142/// thread_local!s) plus the pending heredoc list, plus the
143/// wordcode-buffer state (STUB until Phase 9b). Saves Rust-only
144/// recursion counters too so nested parses get fresh limits.
145/// WARNING: param names don't match C — Rust=(ps) vs C=(ps, toplevel)
146pub fn parse_context_save(ps: &mut parse_stack) {
147    // parse.c:299 — `ps->hdocs = hdocs; hdocs = NULL;` — save the
148    // canonical C linked-list and clear it for the nested parse.
149    ps.hdocs = HDOCS.with_borrow_mut(|h| h.take());
150    // zshrs-only: save the parallel AST-glue Vec the same way.
151    // LEX_HEREDOCS carries terminator/strip_tabs/quoted metadata
152    // that has no C analog (C stores it implicitly via tokstr).
153    ps.lex_heredocs = LEX_HEREDOCS.with_borrow_mut(|v| std::mem::take(v));
154    // parse.c:302-310 — save lexer-side state.
155    ps.incmdpos = incmdpos();
156    // parse.c:303 — `ps->aliasspaceflag = aliasspaceflag;`. Mirrors
157    // lex.c LEX_ALIAS_SPACE_FLAG so nested parses preserve the
158    // HISTIGNORESPACE-via-alias state across parser re-entry.
159    ps.aliasspaceflag = crate::ported::lex::LEX_ALIAS_SPACE_FLAG.with(|c| c.get());
160    ps.incond = incond();
161    ps.inredir = inredir();
162    ps.incasepat = incasepat();
163    ps.isnewlin = isnewlin();
164    ps.infor = infor();
165    ps.inrepeat_ = inrepeat();
166    ps.intypeset = intypeset();
167    // parse.c:312-317 — wordcode buffer state. STUB until Phase 9b
168    // (zshrs has no ecbuf yet).
169    ps.eclen = 0;
170    ps.ecused = 0;
171    ps.ecnpats = 0;
172    ps.ecbuf = None;
173    ps.ecstrs = None;
174    ps.ecsoffs = 0;
175    ps.ecssub = 0;
176    ps.ecnfunc = 0;
177    set_incmdpos(true);
178    set_incond(0);
179    set_inredir(false);
180    set_incasepat(0);
181    set_infor(0);
182    set_inrepeat(0);
183    set_intypeset(false);
184}
185
186/// Direct port of `parse_context_restore(const struct parse_stack *ps, int toplevel)` at `Src/parse.c:326`.
187/// Inverse of `parse_context_save`. Restores lexer-side state +
188/// pending heredocs + Rust-only counters from `ps`, then clears
189/// `errflag & ERRFLAG_ERROR` per parse.c:354.
190/// WARNING: param names don't match C — Rust=(ps) vs C=(ps, toplevel)
191pub fn parse_context_restore(ps: &parse_stack) {
192    // parse.c:330-331 — free any in-progress wordcode buffer.
193    // zshrs has no wordcode yet (STUB until Phase 9b); the AST
194    // nodes are owned by their parent so dropping the parser
195    // frees them.
196
197    // parse.c:333-352 — restore saved state.
198    // parse.c:337 — `hdocs = ps->hdocs;`
199    HDOCS.with_borrow_mut(|h| *h = ps.hdocs.clone());
200    // zshrs-only: restore the parallel AST-glue Vec.
201    LEX_HEREDOCS.with_borrow_mut(|v| *v = ps.lex_heredocs.clone());
202    set_incmdpos(ps.incmdpos);
203    // parse.c:334 — `aliasspaceflag = ps->aliasspaceflag;`.
204    crate::ported::lex::LEX_ALIAS_SPACE_FLAG.with(|c| c.set(ps.aliasspaceflag));
205    set_incond(ps.incond);
206    set_inredir(ps.inredir);
207    set_incasepat(ps.incasepat);
208    set_isnewlin(ps.isnewlin);
209    set_infor(ps.infor);
210    set_inrepeat(ps.inrepeat_);
211    set_intypeset(ps.intypeset);
212    // ecbuf/eclen/ecused/ecnpats/ecstrs/ecsoffs/ecssub/ecnfunc
213    // STUB until Phase 9b.
214
215    // parse.c:354 — `errflag &= ~ERRFLAG_ERROR;` — clear the
216    // error flag so the outer parse sees a clean state.
217    errflag.fetch_and(!ERRFLAG_ERROR, Ordering::Relaxed);
218}
219
220/// Direct port of `ecadjusthere(int p, int d)` at `Src/parse.c:360`. Walk
221/// the pending-heredocs list and bump each `pc` by `d` if it's
222/// at or after position `p`. Called by `ecispace` / `ecdel` when
223/// wordcodes shift.
224#[allow(unused_variables)]
225pub fn ecadjusthere(p: usize, d: i32) {
226    // parse.c:362-366 — `for (p2 = hdocs; p2; p2 = p2->next) if
227    // (p2->pc >= p) p2->pc += d;`. zshrs's hdocs are still
228    // Vec<HereDoc> on the lexer (pre-P9c migration); since none
229    // of them carry a wordcode pc today (the AST tree has no pc
230    // slots), this is a no-op until Phase 9c wires
231    // `hdocs.pc` into wordcode emission.
232}
233
234// === AST tree relocated to src/extensions/zsh_ast.rs ===
235//
236// zsh C does NOT have an AST tree — it emits wordcode directly via
237// par_event/par_list/par_sublist/par_pipe/par_cmd/par_simple/etc.
238// (Src/parse.c:485-3000) into a flat `Wordcode ecbuf[]`. The Zsh*/
239// Shell* AST node types lived in this file as a Rust-only IR that
240// stands in for that wordcode.
241//
242// P9e (PORT_PLAN.md): the types moved to src/extensions/zsh_ast.rs
243// to make their Rust-only-extension nature explicit. The full P9c +
244// P9d rewrite (par_* emitting wordcode + vm_helper reading wordcode)
245// retires them entirely — until then, callers reach them via this
246// re-export.
247
248/// Direct port of `ecispace(int p, int n)` at `Src/parse.c:372`. Insert `n`
249/// empty wordcode slots at position `p`, shifting later entries
250/// right, growing the buffer as needed, adjusting heredoc pointers.
251pub fn ecispace(p: usize, n: usize) {
252    // parse.c:376-381 — grow if needed.
253    let need = n as i32;
254    if (ECLEN.get() - ECUSED.get()) < need {
255        let cur = ECLEN.get();
256        let mut a = if cur < EC_DOUBLE_THRESHOLD {
257            cur
258        } else {
259            EC_INCREMENT
260        };
261        if need > a {
262            a = need;
263        }
264        ECBUF.with_borrow_mut(|buf| {
265            buf.resize((cur + a) as usize, 0);
266        });
267        ECLEN.set(cur + a);
268    }
269    // parse.c:382-385 — memmove p → p+n, gap of n.
270    let m = ECUSED.get() as usize - p;
271    if m > 0 {
272        ECBUF.with_borrow_mut(|buf| {
273            let needed = (ECUSED.get() as usize) + n;
274            if buf.len() < needed {
275                buf.resize(needed, 0);
276            }
277            for i in (0..m).rev() {
278                buf[p + n + i] = buf[p + i];
279            }
280            for i in 0..n {
281                buf[p + i] = 0;
282            }
283        });
284    }
285    // parse.c:386 — bump ecused by n.
286    ECUSED.set(ECUSED.get() + need);
287    // parse.c:387 — `ecadjusthere(p, n)`.
288    ecadjusthere(p, need);
289}
290
291/// Direct port of `ecadd(wordcode c)` at `Src/parse.c:397`. Append `c` to
292/// the wordcode buffer with grow-on-demand, return the new index.
293pub fn ecadd(c: u32) -> usize {
294    // parse.c:399-405 — `if ((eclen - ecused) < 1) grow`.
295    if (ECLEN.get() - ECUSED.get()) < 1 {
296        let cur = ECLEN.get();
297        let a = if cur < EC_DOUBLE_THRESHOLD {
298            cur
299        } else {
300            EC_INCREMENT
301        };
302        ECBUF.with_borrow_mut(|buf| {
303            buf.resize((cur + a) as usize, 0);
304        });
305        ECLEN.set(cur + a);
306    }
307    let idx = ECUSED.get();
308    ECBUF.with_borrow_mut(|buf| {
309        if (idx as usize) >= buf.len() {
310            buf.resize((idx + 1) as usize, 0);
311        }
312        buf[idx as usize] = c;
313    });
314    ECUSED.set(idx + 1);
315    idx as usize
316}
317
318/// Direct port of `ecdel(int p)` at `Src/parse.c:413`. Remove the
319/// wordcode at position `p`, shift later entries left by one,
320/// decrement ecused, adjust pending heredoc pointers.
321pub fn ecdel(p: usize) {
322    // parse.c:415-418 — memmove + decrement ecused.
323    let n = ECUSED.get() as usize - p - 1;
324    if n > 0 {
325        ECBUF.with_borrow_mut(|buf| {
326            for i in 0..n {
327                buf[p + i] = buf[p + i + 1];
328            }
329        });
330    }
331    ECUSED.set(ECUSED.get() - 1);
332    // parse.c:420 — `ecadjusthere(p, -1)`.
333    ecadjusthere(p, -1);
334}
335
336/// Direct port of `ecstrcode(char *s)` at `Src/parse.c:426`. Encode a
337/// string into a single wordcode (short strings ≤4 bytes packed
338/// inline; longer strings get an offset into the deduped registry).
339///
340/// The long-string path stores the METAFIED bytes (matches what C's
341/// strs region contains): collapse Rust UTF-8 chars in 0x80..=0xff
342/// to single bytes, then apply zsh metafy (high bytes ≥ 0x83 →
343/// `Meta=0x83 + byte^0x20`). Length tracking (ECSOFFS) uses the
344/// metafied byte count — same as C `strlen(s) + 1` where C's `s`
345/// is already metafied at this point.
346pub fn ecstrcode(s: &str) -> u32 {
347    // Convert Rust char-form → C-byte form. zsh's metafy() at
348    // Src/utils.c only converts bytes flagged IMETA: 0x00, 0x83
349    // (Meta itself), and 0x84..=0xa2 (Pound..Marker, the lex
350    // markers). Other bytes 0x01..=0x82 and 0xa3..=0xff pass
351    // through unchanged. See utils.c:4195-4204 typtab init.
352    //
353    // Rust receives chars. Classify each:
354    //   - codepoint in [0x83..=0xa2] → marker char (emitted by lex
355    //     post-metafy in C); 1 byte unchanged
356    //   - codepoint < 0x80 → ASCII, 1 byte unchanged
357    //   - codepoint in [0x80..=0x82] or [0xa3..=0xff] → single
358    //     non-imeta byte (user-input range); 1 byte unchanged
359    //   - codepoint > 0xff → multi-byte UTF-8 source char (e.g.
360    //     '━' = U+2501 = 0xe2 0x94 0x81). Metafy ONLY the bytes
361    //     that fall in 0x83..=0xa2; pass others through. For '━':
362    //     0xe2 stays, 0x94 → 0x83 0xb4, 0x81 stays.
363    let mut c_bytes: Vec<u8> = Vec::with_capacity(s.len());
364    let imeta = |b: u8| -> bool { b == 0 || (0x83..=0xa2).contains(&b) };
365    for ch in s.chars() {
366        let cu = ch as u32;
367        if cu < 0x80 {
368            // ASCII — single byte unchanged.
369            c_bytes.push(cu as u8);
370        } else if (0x83..=0xa2).contains(&cu) {
371            // Lex marker char (emitted by lex.add(Marker) post-metafy
372            // in C). Stored as single byte.
373            c_bytes.push(cu as u8);
374        } else {
375            // User-input char: encode UTF-8 then metafy imeta bytes.
376            // For chars 0x80..=0xff (like 'º' U+00BA), UTF-8 gives
377            // 2 bytes (e.g. `0xc2 0xba`) — zsh's lex reads these as
378            // raw bytes from input and metafy passes 0xc2 / 0xba
379            // through (both NOT imeta).
380            let mut tmp = [0u8; 4];
381            for &b in ch.encode_utf8(&mut tmp).as_bytes() {
382                if imeta(b) {
383                    c_bytes.push(0x83);
384                    c_bytes.push(b ^ 0x20);
385                } else {
386                    c_bytes.push(b);
387                }
388            }
389        }
390    }
391    // c:`has_token` (Src/utils.c:2282) → `itok(*s)` → `typtab[c] & ITOK`.
392    // ITOK is set for bytes `Pound..=Nularg` (0x84..=0xa1) per
393    // Src/utils.c:4198 (`for (t0=Pound; t0<=LAST_NORMAL_TOK; t0++)
394    // typtab[t0]|=ITOK`) plus :4200 (`for (t0=Snull; t0<=Nularg; t0++)
395    // typtab[t0]|=ITOK|IMETA|INULL`). Pound=0x84 Bang=0x9c (last normal),
396    // Snull=0x9d..Nularg=0xa1. Meta=0x83 has IMETA but NOT ITOK.
397    let t = c_bytes.iter().any(|&b| (0x84..=0xa1).contains(&b));
398    let l = c_bytes.len() + 1; // include NUL terminator
399    if l <= 4 {
400        // parse.c:436-445 — short-string inline pack. Uses raw C-bytes
401        // (NOT metafied — the inline packing stores 1 byte per slot).
402        let mut c: u32 = if t { 3 } else { 2 };
403        match l {
404            4 => {
405                c |= (c_bytes[2] as u32) << 19;
406                c |= (c_bytes[1] as u32) << 11;
407                c |= (c_bytes[0] as u32) << 3;
408            }
409            3 => {
410                c |= (c_bytes[1] as u32) << 11;
411                c |= (c_bytes[0] as u32) << 3;
412            }
413            2 => {
414                c |= (c_bytes[0] as u32) << 3;
415            }
416            1 => {
417                // parse.c:443 — empty string special case.
418                c = if t { 7 } else { 6 };
419            }
420            _ => {}
421        }
422        c
423    } else {
424        // parse.c:447-466 — long string. Port of C's eccstr BST walk
425        // exactly: walk the tree comparing nfunc, then hashval, then
426        // strcmp on bytes. Return offs on full match; insert new
427        // leaf otherwise. Matches C's exact dedup-hit pattern
428        // (which is content-dependent — hash collisions and the
429        // lazy short-circuit cmp chain make the tree shape determine
430        // whether matching nodes are reachable).
431        // hasher is byte-by-byte polynomial (hashtable.c:86); pass
432        // c_bytes via from_utf8_unchecked so non-UTF-8 zsh marker
433        // bytes feed straight in. SAFETY: hasher only iterates
434        // `.bytes()` — no UTF-8 validity assumed.
435        let val =
436            crate::ported::hashtable::hasher(unsafe { std::str::from_utf8_unchecked(&c_bytes) });
437        let nfunc = ECNFUNC.get();
438        let found_offs = ECSTRS_TREE.with_borrow_mut(|root| {
439            // Walk the tree. At each node, if all 3 cmps == 0,
440            // return the node's offs. Otherwise descend left/right
441            // by the first non-zero cmp's sign.
442            let mut cur: &mut Option<Box<EccstrNode>> = root;
443            loop {
444                let p = match cur.as_mut() {
445                    Some(p) => p,
446                    None => break None,
447                };
448                // c:448 — `cmp = p->nfunc - ecnfunc`
449                let mut cmp = (p.nfunc as i64) - (nfunc as i64);
450                if cmp == 0 {
451                    // c:448 — `&& !(cmp = (long)p->hashval - (long)val)`
452                    // C does `(int)(p->hashval - val)` — unsigned 32-bit
453                    // subtraction wraps, then cast to int. Use
454                    // wrapping_sub + as i32 to match the bit pattern.
455                    cmp = (p.hashval.wrapping_sub(val) as i32) as i64;
456                    if cmp == 0 {
457                        // c:448 — `&& !(cmp = strcmp(p->str, s))`
458                        cmp = match p.str.as_slice().cmp(c_bytes.as_slice()) {
459                            std::cmp::Ordering::Less => -1,
460                            std::cmp::Ordering::Equal => 0,
461                            std::cmp::Ordering::Greater => 1,
462                        };
463                        if cmp == 0 {
464                            // c:450 — `return p->offs;`
465                            break Some(p.offs);
466                        }
467                    }
468                }
469                // c:452 — `pp = (cmp < 0 ? &p->left : &p->right);`
470                cur = if cmp < 0 { &mut p.left } else { &mut p.right };
471            }
472        });
473        if let Some(offs) = found_offs {
474            return offs;
475        }
476        // c:462 — `p->offs = ((ecsoffs - ecssub) << 2) | (t ? 1 : 0);`
477        let offs = (((ECSOFFS.get() - ECSSUB.get()) as u32) << 2) | if t { 1 } else { 0 };
478        // c:463 — `p->aoffs = ecsoffs;` (absolute write position).
479        let aoffs = ECSOFFS.get() as u32;
480        // c:457-465 — insert new node at the NULL slot the walk
481        // terminated at. Encode the walk path as a Vec<bool> of
482        // left/right turns (true = right), then re-descend to
483        // insert. Borrow-checker friendly: a single mutable walk
484        // that either finds an existing node (descend) or fills
485        // the empty slot (return).
486        let stored = c_bytes.clone();
487        let stored_len = stored.len();
488        let new_node = Box::new(EccstrNode {
489            left: None,
490            right: None,
491            str: stored.clone(),
492            offs,
493            aoffs,
494            nfunc,
495            hashval: val,
496        });
497        ECSTRS_TREE.with_borrow_mut(|root| {
498            // Build the path first (immutable-walk; safe because we
499            // only ever go further down).
500            let mut path: Vec<bool> = Vec::new();
501            {
502                let mut cur: &Option<Box<EccstrNode>> = root;
503                while let Some(p) = cur.as_ref() {
504                    let mut cmp = (p.nfunc as i64) - (nfunc as i64);
505                    if cmp == 0 {
506                        // C does `(int)(p->hashval - val)` — unsigned 32-bit
507                        // subtraction wraps, then cast to int. Use
508                        // wrapping_sub + as i32 to match the bit pattern.
509                        cmp = (p.hashval.wrapping_sub(val) as i32) as i64;
510                        if cmp == 0 {
511                            cmp = match p.str.as_slice().cmp(c_bytes.as_slice()) {
512                                std::cmp::Ordering::Less => -1,
513                                std::cmp::Ordering::Equal => 0,
514                                std::cmp::Ordering::Greater => 1,
515                            };
516                        }
517                    }
518                    let go_right = cmp >= 0;
519                    path.push(go_right);
520                    cur = if go_right { &p.right } else { &p.left };
521                }
522            }
523            // Descend mutably along the recorded path and assign at
524            // the NULL leaf.
525            let mut cur: &mut Option<Box<EccstrNode>> = root;
526            for turn in path {
527                let p = cur.as_mut().expect("path matches walk");
528                cur = if turn { &mut p.right } else { &mut p.left };
529            }
530            *cur = Some(new_node);
531        });
532        // Also keep the existing reverse index (offs → bytes) for
533        // ecgetstr_wordcode and copy_ecstr — they read flat by offs.
534        ECSTRS_REVERSE.with_borrow_mut(|m| {
535            m.insert(offs, stored);
536        });
537        let _ = l;
538        ECSOFFS.set(ECSOFFS.get() + (stored_len + 1) as i32);
539        offs
540    }
541}
542
543/// Initialize parser status. Direct port of zsh/Src/parse.c:491
544/// `init_parse_status`. Clears the per-parse-call lexer flags
545/// so a fresh parse starts from cmd-position with no nesting
546/// state inherited from a prior parse.
547///
548/// Previously the Rust port omitted `inrepeat_ = 0` at c:501.
549/// `inrepeat_` is the `repeat N <body>` parse-state counter that
550/// the lexer toggles in 3 phases (1 → 2 → 3 → 0). Without the
551/// reset, a fresh parse called after an in-flight `repeat`
552/// command would inherit the stale counter and silently misread
553/// the next token as a body of an already-completed repeat.
554pub fn init_parse_status() {
555    // c:491
556    // parse.c:500-502 — `incasepat = incond = inredir = infor =
557    // intypeset = 0; inrepeat_ = 0; incmdpos = 1;`
558    set_incasepat(0); // c:500
559    set_incond(0); // c:500
560    set_inredir(false); // c:500
561    set_infor(0); // c:500
562    set_intypeset(false); // c:500
563    set_inrepeat(0); // c:501 inrepeat_ = 0
564    set_incmdpos(true); // c:502
565}
566
567/// Initialize parser for a fresh parse. Direct port of
568/// zsh/Src/parse.c:509 `init_parse`. C source allocates a
569/// fresh wordcode buffer (ecbuf) sized EC_INIT_SIZE, resets the
570/// per-parse-call counters, and calls init_parse_status. zshrs
571/// has no flat wordcode buffer (AST is built inline) so this
572/// function reduces to init_parse_status + recursion_depth/
573/// global_iterations clear.
574pub fn init_parse() {
575    // parse.c:513-520 — `ecbuf = (Wordcode) zalloc(EC_INIT_SIZE *
576    // sizeof(wordcode)); eclen = EC_INIT_SIZE; ecused = 0;
577    // ecnpats = 0; ecstrs = NULL; ecsoffs = ecnfunc = 0;
578    // ecssub = 0;`. P9b — initialize the per-evaluator wordcode
579    // buffer for this parse call. zshrs uses thread-local
580    // statics declared at file scope (parse.rs:25-50).
581    ECBUF.with_borrow_mut(|buf| {
582        buf.clear();
583        buf.resize(EC_INIT_SIZE as usize, 0);
584    });
585    ECLEN.set(EC_INIT_SIZE);
586    ECUSED.set(0);
587    ECNPATS.set(0);
588    ECSOFFS.set(0);
589    ECSSUB.set(0);
590    ECNFUNC.set(0);
591    ECSTRS_INDEX.with_borrow_mut(|m| m.clear());
592    ECSTRS_REVERSE.with_borrow_mut(|m| m.clear());
593    ECSTRS_TREE.with_borrow_mut(|t| *t = None);
594
595    // parse.c:522 — `init_parse_status();`
596    init_parse_status();
597}
598
599/// Port of `copy_ecstr(Eccstr s, char *p)` from `Src/parse.c:537`.
600/// Walks the BST and writes each entry to `p[s->aoffs..]` matching
601/// C's recursive in-order traversal exactly. The old impl used the
602/// `ECSTRS_REVERSE` HashMap keyed by `offs` (= ecssub-relative
603/// wordcode-encoded offset), which collides across funcdef scopes:
604/// a string at relative offs=0 inside funcdef A and another at
605/// relative offs=0 inside funcdef B share the same key, so one
606/// overwrites the other.
607pub fn copy_ecstr(_table: &std::collections::HashMap<u32, Vec<u8>>, p: &mut [u8]) {
608    // c:537-544 — walk eccstr BST recursively, writing each node's
609    // str at p[node->aoffs..node->aoffs + strlen + 1] (NUL-terminated).
610    ECSTRS_TREE.with_borrow(|root| {
611        copy_ecstr_walk(root, p);
612    });
613}
614
615/// Port of `bld_eprog(int heap)` from `Src/parse.c:547`. Finalizes
616/// the in-build `ECBUF`/`ECSTRS`/`ECNPATS` state into an `Eprog`.
617/// Resets the build state so a new parse can start.
618pub fn bld_eprog(heap: bool) -> eprog {
619    // c:547
620
621    // c:555 — emit WC_END opcode. `WCB_END` is `WC_END_DEFAULT` (0).
622    ecadd(0);
623
624    let ecused = ECUSED.with(|c| c.get()) as usize;
625    let ecnpats = ECNPATS.with(|c| c.get()) as usize;
626    let ecsoffs = ECSOFFS.with(|c| c.get()) as usize;
627
628    // c:557-559 — `ret->len = ((ecnpats * sizeof(Patprog)) +
629    //                            (ecused * sizeof(wordcode)) +
630    //                            ecsoffs);`
631    // sizeof(Patprog) = sizeof(struct patprog *) = pointer size.
632    // On 64-bit targets that's 8, on 32-bit that's 4. C's eprog
633    // ->len is the canonical value for parity tests, so we use
634    // the same arithmetic.
635    let prog_bytes = ecused * 4; // sizeof(wordcode) = 4
636    let len = (ecnpats * size_of::<*const u8>()) + prog_bytes + ecsoffs;
637
638    // Snapshot the wordcode buffer + string table.
639    let prog_words: Vec<u32> = ECBUF.with(|c| c.borrow()[..ecused].to_vec());
640    let mut strs_bytes = vec![0u8; ecsoffs];
641    ECSTRS_REVERSE.with(|c| copy_ecstr(&c.borrow(), &mut strs_bytes));
642
643    // c:566 — store strs as raw bytes via from_utf8_unchecked so
644    // single-byte zsh markers (e.g. Dash 0x9b) survive intact.
645    // `String::from_utf8_lossy` would replace them with U+FFFD
646    // (`\xef\xbf\xbd`), breaking byte-for-byte parity with C's
647    // strs region. SAFETY: downstream consumers of `eprog.strs`
648    // index by byte offset (per the wordcode `(offs >> 2)` offset
649    // encoding) and call `.as_bytes()` — they never iterate as
650    // chars or rely on UTF-8 validity, so storing non-UTF-8 bytes
651    // in a String is safe in practice. C zsh's strs is `char *`
652    // with the same byte-not-char semantics.
653    let strs_string = unsafe { String::from_utf8_unchecked(strs_bytes) };
654    let ret = eprog {
655        flags: if heap { EF_HEAP } else { EF_REAL }, // c:570
656        len: len as i32,                             // c:559
657        npats: ecnpats as i32,                       // c:561
658        nref: if heap { -1 } else { 1 },             // c:562
659        pats: Vec::new(),                            // c:563 dummy_patprog
660        prog: prog_words,                            // c:565
661        strs: Some(strs_string),
662        shf: None,
663        dump: None,
664    };
665
666    // c:577 — free ecbuf so next parse starts fresh.
667    ECBUF.with(|c| c.borrow_mut().clear());
668    ECLEN.with(|c| c.set(0));
669    ECUSED.with(|c| c.set(0));
670    ECNPATS.with(|c| c.set(0));
671    ECSOFFS.with(|c| c.set(0));
672    ECSTRS_INDEX.with(|c| c.borrow_mut().clear());
673    ECSTRS_REVERSE.with(|c| c.borrow_mut().clear());
674    ECSTRS_TREE.with(|t| *t.borrow_mut() = None);
675
676    ret
677}
678
679/// Port of `int empty_eprog(Eprog p)` from `Src/parse.c:584`. C
680/// body: `return (!p || !p->prog || *p->prog == WCB_END());` —
681/// the eprog is empty when its prog buffer is missing or the
682/// first wordcode is the WC_END marker. Used by signal handlers
683/// (`Src/signals.c:712`) to short-circuit a trap that resolves to
684/// an empty program.
685pub fn empty_eprog(p: &eprog) -> bool {
686    p.prog.is_empty() || p.prog[0] == WCB_END()
687}
688
689/// Clear pending here-document list. Direct port of
690/// `clear_hdocs(void)` from `Src/parse.c:591`. The C version walks
691/// `hdocs` and frees each node; Rust drops the `Box<heredocs>`
692/// chain automatically when the head is replaced with None.
693pub fn clear_hdocs() {
694    // c:591
695    // c:593-598 — for (p = hdocs; p; p = n) { n = p->next; zfree(p); }
696    // c:599 — hdocs = NULL;
697    HDOCS.with_borrow_mut(|h| *h = None);
698    // zshrs-only: also drop the parallel AST-glue Vec. No C
699    // analog — LEX_HEREDOCS is Rust-only working-set state.
700    LEX_HEREDOCS.with_borrow_mut(|v| v.clear());
701}
702
703/// Top-level parse-event entry. Direct port of zsh/Src/parse.c:
704/// 612-631 `parse_event`. Reads one event from the lexer (a
705/// sublist optionally followed by SEPER/AMPER/AMPERBANG) and
706/// returns the resulting ZshProgram.
707///
708/// `endtok` is the token that terminates the event — usually
709/// ENDINPUT, but for command-style substitutions the closing
710/// `)` (zsh's CMD_SUBST_CLOSE).
711///
712/// zshrs port note: zsh's parse_event returns an `Eprog` (heap-
713/// allocated wordcode program). zshrs returns a `ZshProgram`
714/// (AST root). Same role at the parse-output boundary.
715pub fn parse_event(endtok: lextok) -> Option<ZshProgram> {
716    // parse.c:616-619 — reset state and prime the lexer.
717    set_tok(ENDINPUT);
718    set_incmdpos(true);
719    // parse.c:618 — `aliasspaceflag = 0;`. Fresh event: discard any
720    // alias-space carry-over from a prior parse so HISTIGNORESPACE
721    // doesn't suppress the next entered command line.
722    crate::ported::lex::LEX_ALIAS_SPACE_FLAG.with(|c| c.set(0));
723    zshlex();
724    // parse.c:620 — `init_parse();`
725    init_parse();
726
727    // parse.c:622-625 — drive par_event; on failure clear hdocs.
728    if !par_event(endtok) {
729        clear_hdocs();
730        return None;
731    }
732    // parse.c:626-628 — if endtok != ENDINPUT, this is a sub-
733    // parse for a substitution that doesn't need its own eprog.
734    // zshrs returns an empty program in that case (caller
735    // discards).
736    if endtok != ENDINPUT {
737        return Some(ZshProgram { lists: Vec::new() });
738    }
739    // parse.c:630 — `bld_eprog(1);` — build the final eprog.
740    // zshrs has already built the AST via parse_program_until,
741    // but parse_event uses par_event directly so we need to
742    // collect what par_event accumulated.
743    Some(parse_program_until(None))
744}
745
746/// Parse one event (sublist with optional separator). Direct
747/// port of zsh/Src/parse.c:635 `par_event`. Returns true if
748/// an event was successfully parsed, false on EOF / endtok.
749///
750/// zshrs port note: the C version emits wordcodes via ecadd/
751/// set_list_code; zshrs's parser builds AST nodes via
752/// par_sublist + par_list. Same flow, different output.
753pub fn par_event(endtok: lextok) -> bool {
754    // parse.c:639-643 — skip leading SEPERs.
755    while tok() == SEPER {
756        // parse.c:640-641 — at top-level (endtok == ENDINPUT),
757        // a SEPER on a fresh line ends the event.
758        if isnewlin() > 0 && endtok == ENDINPUT {
759            return false;
760        }
761        zshlex();
762    }
763    // parse.c:644-647 — terminate on EOF or matching close-token.
764    if tok() == ENDINPUT {
765        return false;
766    }
767    if tok() == endtok {
768        return true;
769    }
770    // parse.c:649-... — drive par_sublist + handle terminator.
771    // zshrs's par_sublist already builds the AST node directly.
772    match par_sublist() {
773        Some(_) => {
774            // parse.c:651-693 — terminator handling. zshrs's
775            // par_list wraps this; for parse_event we just
776            // confirm the sublist parsed.
777            true
778        }
779        None => false,
780    }
781}
782
783/// Port of `parse_list(void)` from `Src/parse.c:697`. C-shape entry
784/// point: drives `par_list` and finalizes via `bld_eprog`. Returns
785/// `None` on syntax error.
786pub fn parse_list() -> Option<eprog> {
787    // c:697
788    set_tok(ENDINPUT);
789    init_parse();
790    zshlex();
791    // c:Src/parse.c:705 — `par_list(&c);` emits wordcode for the
792    // full multi-statement list (its goto-rec loop walks all
793    // SEPER-separated sublists). The Rust AST par_list() emits
794    // NOTHING to the wordcode buffer (only builds the AST), so
795    // bld_eprog returned an empty program AND tok stayed at
796    // SEPER, tripping the syntax-error check below for any
797    // \`cmd; cmd\` body.
798    //
799    // Route through par_event_wordcode (the wordcode emitter,
800    // lines 4395+) which mirrors C's par_list loop semantics
801    // and populates the wordcode buffer that bld_eprog reads.
802    let _start = par_event_wordcode();
803    if tok() != ENDINPUT {
804        clear_hdocs();
805        set_tok(LEXERR);
806        yyerror("syntax error");
807        return None;
808    }
809    Some(bld_eprog(false))
810}
811
812/// Port of `parse_cond(void)` from `Src/parse.c:722`. Only used by
813/// `bin_test`/`bin_bracket` for `/bin/test`/`[` compat — the
814/// `condlex` global must already point at `testlex` before entry.
815pub fn parse_cond() -> Option<eprog> {
816    // c:722
817    init_parse();
818    if par_cond().is_none() {
819        clear_hdocs();
820        return None;
821    }
822    Some(bld_eprog(true))
823}
824
825// ============================================================
826// Wordcode emission helpers (parse.c private helpers)
827//
828// Direct ports of zsh's wordcode-emission helpers in parse.c.
829// These write u32 opcodes into a flat `ecbuf` array thread-local
830// via ecadd / ecdel / ecispace / ecstrcode and friends. The
831// par_*_wordcode family at parse.rs:1700-3500 walks the lex
832// stream and emits a real wordcode buffer here.
833//
834// (The AST tree built by par_program / par_simple / etc. is a
835// separate path used by fusevm; see compile_zsh.rs for the AST
836// → fusevm-bytecode compiler.)
837// ============================================================
838
839/// Patch a list-placeholder wordcode with its actual opcode +
840/// jump distance. Direct port of zsh/Src/parse.c:738
841/// `set_list_code`. zsh emits an `ecadd(0)` placeholder before
842/// par_sublist runs, then comes back through set_list_code to
843/// rewrite the slot with WCB_LIST(type, distance) once the
844/// sublist's final length is known.
845///
846/// Port of `set_list_code(int p, int type, int cmplx)` from
847/// `Src/parse.c:738`. Patches the WCB_LIST header at `p` based on
848/// whether the sublist body is simple (single command, no
849/// pipeline) and Z_SYNC/Z_END — emits the Z_SIMPLE-optimized
850/// header when possible, otherwise the plain WCB_LIST(type, 0).
851pub fn set_list_code(p: usize, type_code: i32, cmplx: bool) {
852    let _ = wc_bdata;
853    // c:740 — `if (!cmplx && (type == Z_SYNC || type == (Z_SYNC | Z_END))
854    // && WC_SUBLIST_TYPE(ecbuf[p+1]) == WC_SUBLIST_END)`
855    let sublist_code = ECBUF.with_borrow(|b| b.get(p + 1).copied().unwrap_or(0));
856    let z = type_code;
857    let qualifies = !cmplx
858        && (z == Z_SYNC || z == (Z_SYNC | Z_END))
859        && WC_SUBLIST_TYPE(sublist_code) == WC_SUBLIST_END;
860    if qualifies {
861        // c:742 — `int ispipe = !(WC_SUBLIST_FLAGS(ecbuf[p+1])
862        // & WC_SUBLIST_SIMPLE);`
863        let ispipe = (WC_SUBLIST_FLAGS(sublist_code) & WC_SUBLIST_SIMPLE) == 0;
864        // c:743 — `ecbuf[p] = WCB_LIST((type|Z_SIMPLE), ecused-2-p);`
865        let used = ECUSED.get() as usize;
866        let off = used.saturating_sub(2 + p);
867        ECBUF.with_borrow_mut(|b| {
868            if p < b.len() {
869                b[p] = WCB_LIST((z | Z_SIMPLE) as wordcode, off as wordcode);
870            }
871        });
872        // c:744 — `ecdel(p+1);`
873        ecdel(p + 1);
874        // c:745-746 — `if (ispipe) ecbuf[p+1] = WC_PIPE_LINENO(ecbuf[p+1]);`
875        if ispipe {
876            ECBUF.with_borrow_mut(|b| {
877                if p + 1 < b.len() {
878                    b[p + 1] = WC_PIPE_LINENO(b[p + 1]);
879                }
880            });
881        }
882    } else {
883        // c:748 — `ecbuf[p] = WCB_LIST(type, 0);`
884        ECBUF.with_borrow_mut(|b| {
885            if p < b.len() {
886                b[p] = WCB_LIST(z as wordcode, 0);
887            }
888        });
889    }
890}
891
892/// Port of `set_sublist_code(int p, int type, int flags, int skip, int cmplx)`
893/// from `Src/parse.c:755`. Patches the WCB_SUBLIST header at `p`.
894/// When the sublist is non-complex (single command, no pipeline),
895/// sets WC_SUBLIST_SIMPLE and rewrites the following slot to
896/// `WC_PIPE_LINENO`.
897pub fn set_sublist_code(p: usize, type_code: i32, flags: i32, skip: i32, cmplx: bool) {
898    if cmplx {
899        // c:758 — `ecbuf[p] = WCB_SUBLIST(type, flags, skip);`
900        ECBUF.with_borrow_mut(|b| {
901            if p < b.len() {
902                b[p] = WCB_SUBLIST(type_code as wordcode, flags as wordcode, skip as wordcode);
903            }
904        });
905    } else {
906        // c:760 — `ecbuf[p] = WCB_SUBLIST(type, flags|WC_SUBLIST_SIMPLE, skip);`
907        ECBUF.with_borrow_mut(|b| {
908            if p < b.len() {
909                b[p] = WCB_SUBLIST(
910                    type_code as wordcode,
911                    (flags as wordcode) | WC_SUBLIST_SIMPLE,
912                    skip as wordcode,
913                );
914            }
915        });
916        // c:761 — `ecbuf[p+1] = WC_PIPE_LINENO(ecbuf[p+1]);`
917        ECBUF.with_borrow_mut(|b| {
918            if p + 1 < b.len() {
919                b[p + 1] = WC_PIPE_LINENO(b[p + 1]);
920            }
921        });
922    }
923}
924
925/// Parse a list (sublist with optional & or ;).
926///
927/// Direct port of zsh/Src/parse.c:771-804 `par_list` (and the
928/// par_list1 wrapper at parse.c:807-817).
929///
930/// **Structural divergence**: zsh's parse.c emits flat wordcode
931/// into the `ecbuf` u32 array via `ecadd(0)` (placeholder),
932/// `set_list_code(p, code, complexity)`, `wc_bdata(Z_END)`. zshrs
933/// builds an AST node `ZshList { sublist, flags }` instead. The
934/// async/sync/disown discrimination at parse.c:785-790 maps to
935/// zshrs's `ListFlags { async_, disown }` field — Z_SYNC is the
936/// default (no flags), Z_ASYNC = `&` = `async_=true`, Z_DISOWN +
937/// Z_ASYNC = `&!`/`&|` = both true. Same semantics, different
938/// representation. This divergence is repository-wide: every
939/// `par_*` function emits wordcode in C, every `parse_*` builds
940/// AST in Rust. The compile_zsh module then traverses the AST to
941/// emit fusevm bytecode, which serves the same role as zsh's
942/// wordcode but with a different opcode set and execution model.
943fn par_list() -> Option<ZshList> {
944    let sublist = par_sublist()?;
945
946    let flags = match tok() {
947        AMPER => {
948            zshlex();
949            ListFlags {
950                async_: true,
951                disown: false,
952            }
953        }
954        AMPERBANG => {
955            zshlex();
956            ListFlags {
957                async_: true,
958                disown: true,
959            }
960        }
961        SEPER | SEMI | NEWLIN => {
962            zshlex();
963            ListFlags::default()
964        }
965        _ => ListFlags::default(),
966    };
967
968    Some(ZshList { sublist, flags })
969}
970
971/// Parse one list — non-recursing variant. Direct port of
972/// zsh/Src/parse.c:808 `par_list1`. Like par_list but
973/// doesn't recurse on the trailing-separator path; used by
974/// callers that only want one statement (e.g. each arm of a
975/// case body).
976pub fn par_list1() -> Option<ZshSublist> {
977    // parse.c:810-816 — body is a single par_sublist call wrapped
978    // in the eu/ecused tracking that zshrs doesn't need (no
979    // wordcode buffer).
980    par_sublist()
981}
982
983/// Parse a sublist (pipelines connected by && or ||).
984///
985/// Direct port of zsh/Src/parse.c:825 `par_sublist` and
986/// par_sublist2 at parse.c:869-892. par_sublist handles the
987/// && / || conjunction and emits WC_SUBLIST opcodes; par_sublist2
988/// handles the leading `!` negation and `coproc` keyword.
989///
990/// AST mapping: ZshSublist { pipe, conj_chain }, where `conj_chain`
991/// is a Vec<(ConjOp, ZshSublist)> for chained && / ||. C uses
992/// flat wordcode with WC_SUBLIST_AND / WC_SUBLIST_OR markers.
993fn par_sublist() -> Option<ZshSublist> {
994    let mut flags = SublistFlags::default();
995
996    // Handle coproc and !
997    if tok() == COPROC {
998        flags.coproc = true;
999        zshlex();
1000    } else if tok() == BANG_TOK {
1001        flags.not = true;
1002        zshlex();
1003    }
1004
1005    let pipe = par_pline()?;
1006
1007    // Check for && or ||
1008    let next = match tok() {
1009        DAMPER => {
1010            zshlex();
1011            skip_separators();
1012            // c:Src/parse.c:par_sublist — and-or operators (`&&`,
1013            // `||`) require a sublist on each side. After consuming
1014            // `&&`/`||`, another and-or operator OR a pipe-operator
1015            // immediately after is a parse error in C zsh. zshrs's
1016            // recursion silently returned None and dropped the
1017            // operator. Bug #171 in docs/BUGS.md.
1018            if matches!(tok(), DAMPER | DBAR | BAR_TOK | BARAMP) {
1019                let name = match tok() {
1020                    DAMPER => "&&",
1021                    DBAR => "||",
1022                    BAR_TOK => "|",
1023                    BARAMP => "|&",
1024                    _ => "operator",
1025                };
1026                zerr(&format!("parse error near `{}'", name));
1027                return None;
1028            }
1029            par_sublist().map(|s| (SublistOp::And, Box::new(s)))
1030        }
1031        DBAR => {
1032            zshlex();
1033            skip_separators();
1034            if matches!(tok(), DAMPER | DBAR | BAR_TOK | BARAMP) {
1035                let name = match tok() {
1036                    DAMPER => "&&",
1037                    DBAR => "||",
1038                    BAR_TOK => "|",
1039                    BARAMP => "|&",
1040                    _ => "operator",
1041                };
1042                zerr(&format!("parse error near `{}'", name));
1043                return None;
1044            }
1045            par_sublist().map(|s| (SublistOp::Or, Box::new(s)))
1046        }
1047        _ => None,
1048    };
1049
1050    Some(ZshSublist { pipe, next, flags })
1051}
1052
1053/// Port of `par_sublist2(int *cmplx)` from `Src/parse.c:869`.
1054/// Secondary-sublist arm: handles the `COPROC`/`Bang` prefix
1055/// in front of a pline. Returns the WC_SUBLIST flag word added.
1056pub fn par_sublist2(cmplx: &mut i32) -> Option<i32> {
1057    // c:870 — `int f = 0;`
1058    let mut f: i32 = 0;
1059    // c:873-880 — COPROC / BANG prefix flags.
1060    if tok() == COPROC {
1061        *cmplx = 1;
1062        f |= WC_SUBLIST_COPROC as i32;
1063        zshlex();
1064    } else if tok() == BANG_TOK {
1065        *cmplx = 1;
1066        f |= WC_SUBLIST_NOT as i32;
1067        zshlex();
1068    }
1069    // c:882-883 — `if (!par_pline(cmplx) && !f) return -1;`
1070    if !par_pipe_wordcode(cmplx) && f == 0 {
1071        return None;
1072    }
1073    // c:885 — `return f;`
1074    Some(f)
1075}
1076
1077/// Parse a pipeline
1078/// Parse a pipeline (cmds joined by `|` / `|&`). Direct port of
1079/// zsh/Src/parse.c:894 `par_pline`. AST: ZshPipe { cmds: Vec<ZshCommand> }.
1080/// C emits WC_PIPE wordcodes per command; same flow.
1081fn par_pline() -> Option<ZshPipe> {
1082    let lineno = toklineno();
1083    let cmd = par_cmd()?;
1084
1085    // Check for | or |&
1086    let mut merge_stderr = false;
1087    let next = match tok() {
1088        BAR_TOK | BARAMP => {
1089            merge_stderr = tok() == BARAMP;
1090            zshlex();
1091            skip_separators();
1092            // c:Src/parse.c:par_pline — pipe-operators require a
1093            // command on each side. After consuming `|`/`|&`,
1094            // C zsh's recursive par_pline call returns -1 (parse
1095            // error) when the next token is another pipe-operator
1096            // — `a | | b` errors with `parse error near `|''`.
1097            // zshrs's `par_pline()?` silently returned None on
1098            // missing command, dropping the rest of the input
1099            // without diagnosing the empty-pipe-operand. Bug #171
1100            // in docs/BUGS.md.
1101            if matches!(tok(), BAR_TOK | BARAMP) {
1102                let name = if tok() == BARAMP { "|&" } else { "|" };
1103                zerr(&format!("parse error near `{}'", name));
1104                return None;
1105            }
1106            par_pline().map(Box::new)
1107        }
1108        _ => None,
1109    };
1110
1111    Some(ZshPipe {
1112        cmd,
1113        next,
1114        lineno,
1115        merge_stderr,
1116    })
1117}
1118
1119/// Parse a command
1120/// Parse a command — dispatches by leading token (FOR / CASE /
1121/// IF / WHILE / UNTIL / REPEAT / FUNC / DINBRACK / DINPAR /
1122/// Inpar subshell / Inbrace current-shell / TIME / NOCORRECT,
1123/// else simple). Direct port of zsh/Src/parse.c:958 `par_cmd`.
1124fn par_cmd() -> Option<ZshCommand> {
1125    // Parse leading redirections
1126    let mut redirs = Vec::new();
1127    while IS_REDIROP(tok()) {
1128        if let Some(redir) = par_redir() {
1129            redirs.push(redir);
1130        }
1131    }
1132
1133    let cmd = match tok() {
1134        FOR | FOREACH => par_for(),
1135        SELECT => parse_select(),
1136        CASE => par_case(),
1137        IF => par_if(),
1138        WHILE => par_while(false),
1139        UNTIL => par_while(true),
1140        REPEAT => par_repeat(),
1141        INPAR_TOK => par_subsh(),
1142        INOUTPAR => parse_anon_funcdef(),
1143        INBRACE_TOK => parse_cursh(),
1144        FUNC => par_funcdef(),
1145        DINBRACK => par_cond(),
1146        DINPAR => parse_arith(),
1147        TIME => par_time(),
1148        _ => par_simple(redirs),
1149    };
1150
1151    // Parse trailing redirections. For Simple commands the redirs were
1152    // already captured inside par_simple; for compound forms (Cursh,
1153    // Subsh, If, While, etc.) we collect them here and wrap in
1154    // ZshCommand::Redirected so compile_zsh can scope-bracket them.
1155    if let Some(inner) = cmd {
1156        let mut trailing: Vec<ZshRedir> = Vec::new();
1157        while IS_REDIROP(tok()) {
1158            if let Some(redir) = par_redir() {
1159                trailing.push(redir);
1160            }
1161        }
1162        // c:Src/parse.c:par_cmd — compound forms (Cursh `{...}`, Subsh
1163        // `(...)`, If/While/Until/For/Case/Select/Repeat/Funcdef) must
1164        // be followed by a valid sublist/list separator (`;`, `\n`,
1165        // `&`, `|`, `&&`, `||`, redirect-op) — STRING_LEX after a
1166        // compound is a parse error. zshrs's outer par_list loop
1167        // silently treated trailing words as a new command, masking
1168        // syntax errors like `{ echo a; } b c`. Mirror C's strict
1169        // post-compound terminator check. Bug #146 in docs/BUGS.md.
1170        if !matches!(inner, ZshCommand::Simple(_)) && tok() == STRING_LEX {
1171            let bad = tokstr().unwrap_or_default();
1172            zerr(&format!("parse error near `{}'", bad));
1173            // Reset state before returning so the outer loop's None
1174            // detection unwinds cleanly.
1175            set_incmdpos(true);
1176            set_incasepat(0);
1177            set_incond(0);
1178            set_intypeset(false);
1179            return None;
1180        }
1181        // c:1072-1075 — every par_cmd tail resets the lexer state
1182        // toggles so the NEXT command starts in cmd position with
1183        // case/cond/typeset off. par_simple/par_cond set `incmdpos=0`
1184        // during their bodies; without this reset the next iteration
1185        // of the outer par_list loop sees `if` / `done` / `select`
1186        // etc. as plain strings and the AST collapses.
1187        set_incmdpos(true);
1188        set_incasepat(0);
1189        set_incond(0);
1190        set_intypeset(false);
1191        if trailing.is_empty() {
1192            return Some(inner);
1193        }
1194        // Simple already absorbed its own redirs (compile path expects
1195        // them on ZshSimple), so don't double-wrap.
1196        if matches!(inner, ZshCommand::Simple(_)) {
1197            if let ZshCommand::Simple(mut s) = inner {
1198                s.redirs.extend(trailing);
1199                return Some(ZshCommand::Simple(s));
1200            }
1201            unreachable!()
1202        }
1203        return Some(ZshCommand::Redirected(Box::new(inner), trailing));
1204    }
1205    // Same reset on the empty-cmd branch (mirror c:1072 unconditional
1206    // path — the C function only returns 0 above when the dispatch
1207    // produced no command, and falls through to the reset block).
1208    set_incmdpos(true);
1209    set_incasepat(0);
1210    set_incond(0);
1211    set_intypeset(false);
1212
1213    None
1214}
1215
1216/// Parse for/foreach loop
1217/// Parse `for NAME in WORDS; do BODY; done` (foreach style) AND
1218/// `for ((init; cond; incr)) do BODY done` (c-style). Direct port
1219/// of zsh/Src/parse.c:1087 `par_for`. parse_for_cstyle is the
1220/// inner branch for the `((...))` arithmetic-header variant
1221/// (parse.c:1100-1140 inside par_for).
1222fn par_for() -> Option<ZshCommand> {
1223    let is_foreach = tok() == FOREACH;
1224    // c:1094-1095 (Src/parse.c, par_for) — set `infor=2` (only when
1225    // tok==FOR) so the lexer's `(` peek at lex.c:784-789
1226    // (`if (infor) { ... return DINPAR; }`) routes the arith-for
1227    // body through dbparens semicolon-splitting instead of the
1228    // `cmd_or_math` whole-body capture path. Without this, `for ((
1229    // i=0; i<3; i++ ))` lexed as a single `((arith))` expression
1230    // and parse_for_cstyle's second zshlex got an empty/wrong tok.
1231    //
1232    // The companion C statement `incmdpos = 0;` at c:1094 isn't
1233    // mirrored here: zshrs's parser doesn't otherwise touch
1234    // LEX_INCMDPOS at this boundary, and forcing it false breaks
1235    // the SELECT case where downstream tokenization relied on the
1236    // inherited state. The C parser maintains incmdpos inline at
1237    // every grammar transition (parse.c:617, :791, :1072, :1145,
1238    // :1154, :1161, ...); without porting those companion sites a
1239    // single explicit reset here is more harmful than helpful.
1240    set_infor(if tok() == FOR { 2 } else { 0 }); // c:1095
1241    zshlex(); // c:1096
1242
1243    // Check for C-style: for (( init; cond; step ))
1244    if tok() == DINPAR {
1245        // c:1110-1111 — close out infor / cmdpos after parse_for_cstyle
1246        // has consumed the init/cond/step triple. Done inside the
1247        // helper itself so we honour the C ordering.
1248        return parse_for_cstyle();
1249    }
1250
1251    // c:1116 — `infor = 0;` immediately on entering the foreach
1252    // branch. Without this, `infor` stays at 2 (set at c:1095 when
1253    // tok==FOR) for the rest of par_for, and the lexer's `((`
1254    // peek at lex.c:786 routes every subsequent `((...))` inside
1255    // the loop body through dbparens — so `for x in a; do (( 1
1256    // )); done` and `if (( 1 )) { … }` inside the do-body both
1257    // mis-lexed as a c-style for header.
1258    set_infor(0); // c:1116
1259
1260    // Get variable name(s). zsh parse.c par_for accepts multiple
1261    // identifier tokens before `in`/`(`/newline — `for k v in ...`
1262    // assigns each iteration's pair of values to k and v in turn.
1263    // We store the names space-joined since variable identifiers
1264    // can't contain whitespace.
1265    let mut names: Vec<String> = Vec::new();
1266    while tok() == STRING_LEX {
1267        let v = tokstr().unwrap_or_default();
1268        if v == "in" {
1269            break;
1270        }
1271        names.push(v);
1272        zshlex();
1273    }
1274    if names.is_empty() {
1275        zerr("expected variable name in for");
1276        return None;
1277    }
1278    let var = names.join(" ");
1279
1280    // Skip newlines
1281    skip_separators();
1282
1283    // Get list. The lexer-port quirk: `for x (a b c)` arrives as a
1284    // single String token with the parens lexed-as-content
1285    // (`<Inpar>a b c<Outpar>`) instead of as separate Inpar/String/
1286    // Outpar tokens. Detect that shape and split it manually.
1287    let list = if tok() == STRING_LEX
1288        && tokstr()
1289            .map(|s| s.starts_with('\u{88}') && s.ends_with('\u{8a}'))
1290            .unwrap_or(false)
1291    {
1292        let raw = tokstr().unwrap_or_default();
1293        // Strip leading Inpar + trailing Outpar. KEEP the inner
1294        // content tokenized — `for x ({1..3}) …` has `{1..3}` as
1295        // Inbrace+content+Outbrace markers, which compile_word_str
1296        // needs to detect and brace-expand. Untokenizing here would
1297        // collapse the markers to plain `{` `}` chars and the brace-
1298        // expansion pass (which strictly requires Inbrace TOKEN per
1299        // Src/glob.c:hasbraces) would skip the word entirely.
1300        // Split only on UNTOKENIZED whitespace at the top level —
1301        // tokenized characters (TOKEN range \u{84}..\u{a1}) are part
1302        // of one word; bare ASCII spaces / tabs separate words.
1303        let inner = &raw[raw.char_indices().nth(1).map(|(i, _)| i).unwrap_or(0)
1304            ..raw
1305                .char_indices()
1306                .last()
1307                .map(|(i, _)| i)
1308                .unwrap_or(raw.len())];
1309        let mut words: Vec<String> = Vec::new();
1310        let mut cur = String::new();
1311        for c in inner.chars() {
1312            if c == ' ' || c == '\t' || c == '\n' {
1313                if !cur.is_empty() {
1314                    words.push(std::mem::take(&mut cur));
1315                }
1316            } else {
1317                cur.push(c);
1318            }
1319        }
1320        if !cur.is_empty() {
1321            words.push(cur);
1322        }
1323        zshlex();
1324        ForList::Words(words)
1325    } else if tok() == STRING_LEX {
1326        let s = tokstr();
1327        if s.map(|s| s == "in").unwrap_or(false) {
1328            // c:Src/parse.c:1147-1154 — after consuming `in`, the
1329            // for-list reads in WORD position, not command position.
1330            // Reset incmdpos=false so the lexer's LX2_INBRACE arm
1331            // (lex.rs:1791) treats a leading `{` as the brace-
1332            // expansion marker (`bct++; add(Inbrace)`) instead of
1333            // returning STRING("{") + promoting to INBRACE_TOK.
1334            // Without this, `for i in {1..3}` saw `{` as the body-
1335            // opener brace, so the word-collection loop got an
1336            // empty word list and the loop body silently ran 0
1337            // iterations.
1338            set_incmdpos(false);
1339            zshlex();
1340            let mut words = Vec::new();
1341            while tok() == STRING_LEX {
1342                let _ts_s = tokstr();
1343                if let Some(s) = _ts_s.as_deref() {
1344                    words.push(s.to_string());
1345                }
1346                zshlex();
1347            }
1348            // c:Src/parse.c:1162 — `incmdpos = 1;` after the
1349            // wordlist + SEPER are consumed, so the next token
1350            // (`do` / `{` body opener) lexes at command position.
1351            set_incmdpos(true);
1352            ForList::Words(words)
1353        } else {
1354            ForList::Positional
1355        }
1356    } else if tok() == INPAR_TOK {
1357        // for var (...) — `for x ({1..3})`: inside the parens, the
1358        // list is in WORD position so `{` must lex as the brace-
1359        // expansion Inbrace marker, NOT as a body-opener INBRACE_TOK.
1360        // Without resetting incmdpos before the next zshlex, the
1361        // lexer's LX2_INBRACE arm promotes `{` to INBRACE_TOK and
1362        // the word-collection loop exits empty, giving
1363        // `for x ({1..3})` an empty iteration.
1364        set_incmdpos(false);
1365        zshlex();
1366        let mut words = Vec::new();
1367        while tok() == STRING_LEX || tok() == SEPER {
1368            if tok() == STRING_LEX {
1369                let _ts_s = tokstr();
1370                if let Some(s) = _ts_s.as_deref() {
1371                    words.push(s.to_string());
1372                }
1373            }
1374            zshlex();
1375        }
1376        if tok() == OUTPAR_TOK {
1377            // After the `)` of a for-list, the next token is the
1378            // body opener — `do`/`{`. zsh's lexer needs incmdpos
1379            // set so `{` lexes as Inbrace (not as a literal). C
1380            // analogue: parse.c::par_for sets `incmdpos = 1`
1381            // after consuming the Outpar before the body parse.
1382            set_incmdpos(true);
1383            zshlex();
1384        }
1385        ForList::Words(words)
1386    } else {
1387        ForList::Positional
1388    };
1389
1390    // Skip to body
1391    skip_separators();
1392
1393    // Parse body
1394    let body = parse_loop_body(is_foreach, false)?;
1395
1396    Some(ZshCommand::For(ZshFor {
1397        var,
1398        list,
1399        body: Box::new(body),
1400        is_select: false,
1401    }))
1402}
1403
1404/// Parse case statement
1405/// Parse `case WORD in PATTERN) BODY ;; ... esac`. Direct port
1406/// of zsh/Src/parse.c:1209 `par_case`. Each case arm is a
1407/// (pattern_list, body, terminator) tuple where terminator is
1408/// `;;` (default), `;&` (fallthrough), or `;|` (continue testing).
1409fn par_case() -> Option<ZshCommand> {
1410    // C par_case (parse.c:1209-1241). Order of state toggles
1411    // matters — the lexer reads the case word in `incmdpos=0`
1412    // (so it's not promoted to a reswd), then the `in`/`{` in
1413    // `incmdpos=1, noaliases=1, nocorrect=1` (so the `in` literal
1414    // isn't alias-expanded or spell-corrected), then sets
1415    // `incasepat=1, incmdpos=0` before the first pattern.
1416    set_incmdpos(false);
1417    zshlex(); // skip 'case'
1418
1419    let word = match tok() {
1420        STRING_LEX => {
1421            let w = tokstr().unwrap_or_default();
1422            // c:1222 — `incmdpos = 1;` before the next zshlex so the
1423            // `in` keyword is recognised. c:1223-1225 — save+force
1424            // noaliases / nocorrect.
1425            set_incmdpos(true);
1426            let ona = noaliases();
1427            let onc = nocorrect();
1428            set_noaliases(true);
1429            set_nocorrect(1);
1430            zshlex();
1431            // Restore noaliases/nocorrect after the `in`-or-`{` token
1432            // is in hand; both are unconditionally restored at c:1238-1239.
1433            let restore = |ona: bool, onc: i32| {
1434                set_noaliases(ona);
1435                set_nocorrect(onc);
1436            };
1437            (w, ona, onc, restore)
1438        }
1439        _ => {
1440            zerr("expected word after case");
1441            return None;
1442        }
1443    };
1444    let (word, ona, onc, restore) = word;
1445
1446    skip_separators();
1447
1448    // Expect 'in' or {
1449    let use_brace = tok() == INBRACE_TOK;
1450    if tok() == STRING_LEX {
1451        let s = tokstr();
1452        if s.map(|s| s != "in").unwrap_or(true) {
1453            // c:1228-1232 — restore noaliases/nocorrect on error path.
1454            restore(ona, onc);
1455            zerr("expected 'in' in case");
1456            return None;
1457        }
1458    } else if !use_brace {
1459        restore(ona, onc);
1460        zerr("expected 'in' or '{' in case");
1461        return None;
1462    }
1463    // c:1236-1239 — `incasepat = 1; incmdpos = 0; noaliases = ona;
1464    // nocorrect = onc;` — set the case-pattern context AND restore
1465    // alias/correct state BEFORE the zshlex that consumes `in`/`{`.
1466    set_incasepat(1);
1467    set_incmdpos(false);
1468    restore(ona, onc);
1469    zshlex();
1470
1471    let mut arms = Vec::new();
1472    const MAX_ARMS: usize = 10_000;
1473
1474    loop {
1475        if arms.len() > MAX_ARMS {
1476            zerr("par_case: too many arms");
1477            break;
1478        }
1479
1480        // Set incasepat BEFORE skipping separators so lexer knows we're in case pattern context
1481        // This affects how [ and | are lexed
1482        set_incasepat(1);
1483
1484        skip_separators();
1485
1486        // Check for end
1487        // Note: 'esac' might be String "esac" if incasepat > 0 prevents reserved word recognition
1488        let is_esac = tok() == ESAC
1489            || (tok() == STRING_LEX && tokstr().map(|s| s == "esac").unwrap_or(false));
1490        if (use_brace && tok() == OUTBRACE_TOK) || (!use_brace && is_esac) {
1491            set_incasepat(0);
1492            zshlex();
1493            break;
1494        }
1495
1496        // Also break on EOF. c:Src/parse.c:1209 par_case requires
1497        // ESAC (or `}` in brace form) to close the block — reaching
1498        // ENDINPUT without either is a parse error (`case ... esack`
1499        // typo absorbs `esack` as part of the body and silently
1500        // terminates rc=0 otherwise). Bug #400.
1501        if tok() == ENDINPUT || tok() == LEXERR {
1502            set_incasepat(0);
1503            yyerror("unmatched `case'");
1504            break;
1505        }
1506
1507        // c:1250 — `if (tok == INPAR) zshlex();` — leading-paren
1508        // skip path. Used when the lexer DID return INPAR_TOK (e.g.
1509        // SHGLOB or incmdpos forced it). In the normal case-pattern
1510        // path the lexer absorbs `(...)` into one Stringg and the
1511        // hack at c:1322 strips the surrounding parens later. Both
1512        // paths land here.
1513        let leading_inpar_consumed = tok() == INPAR_TOK;
1514        if leading_inpar_consumed {
1515            zshlex();
1516        }
1517
1518        // c:1255-1262 — read pattern STRING. zsh's parser falls
1519        // straight into the STRING reader after the optional INPAR.
1520        // BAR before any pattern means empty string.
1521        let mut patterns = Vec::new();
1522        // Tracks whether the c:1322-1354 hack has fired (paren-
1523        // wrapped Stringg absorbed by the lexer). When it has, the
1524        // closing `)` was already absorbed — no separate OUTPAR
1525        // arm-close to consume.
1526        let mut absorbed_outpar = false;
1527        loop {
1528            if tok() == STRING_LEX {
1529                let s = tokstr();
1530                if s.as_deref().map(|s| s == "esac").unwrap_or(false) {
1531                    break;
1532                }
1533                let mut str_val = s.unwrap_or_default();
1534
1535                // c:1322-1354 hack: when this is the first alt AND
1536                // the string starts with the Inpar marker, the lexer
1537                // absorbed the whole `(...)` as one token. Strip the
1538                // surrounding parens — the remainder IS the pattern.
1539                // The closing arm-paren was absorbed too, so we don't
1540                // expect a separate OUTPAR token afterward.
1541                if patterns.is_empty() && str_val.starts_with(crate::ported::zsh_h::Inpar) {
1542                    let mut pct = 0i32;
1543                    let mut chars: Vec<char> = str_val.chars().collect();
1544                    let mut end_idx: Option<usize> = None;
1545                    for (idx, &c) in chars.iter().enumerate() {
1546                        if c == crate::ported::zsh_h::Inpar {
1547                            pct += 1;
1548                        } else if c == crate::ported::zsh_h::Outpar {
1549                            pct -= 1;
1550                            if pct == 0 {
1551                                end_idx = Some(idx);
1552                                break;
1553                            }
1554                        }
1555                    }
1556                    if let Some(idx) = end_idx {
1557                        chars.remove(idx);
1558                        chars.remove(0);
1559                        str_val = chars.into_iter().collect();
1560                        absorbed_outpar = true;
1561                    }
1562                }
1563                patterns.push(str_val);
1564                set_incasepat(2);
1565                zshlex();
1566                // When the hack fired the closing `)` is already
1567                // consumed; don't read alt-`|` continuations either.
1568                if absorbed_outpar {
1569                    break;
1570                }
1571            } else if tok() != BAR_TOK {
1572                break;
1573            }
1574
1575            if tok() == BAR_TOK {
1576                set_incasepat(1);
1577                zshlex();
1578            } else {
1579                break;
1580            }
1581        }
1582        set_incasepat(0);
1583
1584        // c:1305 — expect OUTPAR (arm-close) when the hack didn't
1585        // already swallow it.
1586        //
1587        // Bug #34 in docs/BUGS.md: the absorbed-pattern hack assumed
1588        // the leading `(` and the case-arm closing `)` were both
1589        // absorbed into the single STRING token. That's true for
1590        // `(x))` (the inner `)` closes the absorbed group; the second
1591        // `)` is the arm closer) only when the lexer slurps BOTH.
1592        // The Rust lexer slurps just `(x|y)` (one balanced pair); the
1593        // second `)` arrives as a separate OUTPAR_TOK that must still
1594        // be consumed as the case-arm closer. Detect and consume it.
1595        if !absorbed_outpar {
1596            if tok() != OUTPAR_TOK {
1597                zerr("expected ')' in case pattern");
1598                return None;
1599            }
1600            // c:Src/parse.c:1257-1258 — `if (tok != STRING)
1601            // YYERRORV(oecused);` C requires at least one pattern
1602            // STRING before `)`. zshrs accepted empty `case x in)`
1603            // and silently fell through to the next iteration with
1604            // an empty pattern arm, swallowing the rest of the
1605            // script. Reject the empty-pattern shape unless a
1606            // leading INPAR was consumed (the `(pat)` form has
1607            // already validated the pattern inside). Bug #161 in
1608            // docs/BUGS.md.
1609            if patterns.is_empty() && !leading_inpar_consumed {
1610                zerr("parse error near `)'");
1611                return None;
1612            }
1613            set_incmdpos(true);
1614            zshlex();
1615            // When the lexer emitted a separate INPAR_TOK at the
1616            // arm start (consumed via `leading_inpar_consumed`
1617            // above), the OUTPAR_TOK we just consumed closed the
1618            // alternation GROUP. If the next token is ALSO
1619            // OUTPAR_TOK, the user wrote `(pat))` and that second
1620            // `)` is the case-arm closer that still needs to be
1621            // consumed before body parsing. Bug #34 in
1622            // docs/BUGS.md.
1623            if leading_inpar_consumed && tok() == OUTPAR_TOK {
1624                zshlex();
1625            }
1626        } else if tok() == OUTPAR_TOK {
1627            // The lexer absorbed `(pat)` as the pattern but left the
1628            // case-arm closing `)` as a separate OUTPAR_TOK. Consume
1629            // it now so body parsing starts at the body, not at `)`.
1630            set_incmdpos(true);
1631            zshlex();
1632        } else {
1633            set_incmdpos(true);
1634        }
1635
1636        // Parse body. Pass end_tokens explicitly so the body's
1637        // parser stops at DSEMI/SEMIAMP/SEMIBAR/ESAC without
1638        // tripping parse_program_until's orphan-terminator check
1639        // (line 7131) which only fires when end_tokens is None.
1640        // Without this, a case arm whose body has no trailing
1641        // `;;` before `esac` (last arm — zsh accepts the dangling
1642        // form) produced "parse error near orphan terminator" on
1643        // the closing `esac`. zsh's par_case at parse.c:1318 sets
1644        // up the case-arm reader to recognize the same terminator
1645        // set; the Rust port was passing the implicit-None and
1646        // hitting the top-level orphan check.
1647        let body = parse_program_until(Some(&[DSEMI, SEMIAMP, SEMIBAR, ESAC]));
1648
1649        // Get terminator. Set incasepat=1 BEFORE the zshlex
1650        // advance so the next token (the next arm's pattern, like
1651        // `[a-z]`) gets tokenized in pattern context. Without
1652        // this, a `[`-prefixed pattern after the FIRST arm became
1653        // Inbrack instead of String and the pattern-loop bailed
1654        // out with "expected ')' in case pattern".
1655        let terminator = match tok() {
1656            DSEMI => {
1657                set_incasepat(1);
1658                zshlex();
1659                CaseTerm::Break
1660            }
1661            SEMIAMP => {
1662                set_incasepat(1);
1663                zshlex();
1664                CaseTerm::Continue
1665            }
1666            SEMIBAR => {
1667                set_incasepat(1);
1668                zshlex();
1669                CaseTerm::TestNext
1670            }
1671            _ => CaseTerm::Break,
1672        };
1673
1674        if !patterns.is_empty() {
1675            arms.push(CaseArm {
1676                patterns,
1677                body,
1678                terminator,
1679            });
1680        }
1681    }
1682
1683    Some(ZshCommand::Case(ZshCase { word, arms }))
1684}
1685
1686/// Parse if statement
1687/// Parse `if COND; then BODY; [elif COND; then BODY;]* [else BODY;] fi`.
1688/// Direct port of zsh/Src/parse.c:1411 `par_if`. The C source
1689/// emits WC_IF wordcodes per arm; zshrs builds an AST chain of
1690/// (cond, then_body) tuples plus an optional else_body.
1691fn par_if() -> Option<ZshCommand> {
1692    zshlex(); // skip 'if'
1693
1694    // Parse condition - stops at 'then' or '{' (zsh allows { instead of then)
1695    let cond = Box::new(parse_program_until(Some(&[THEN, INBRACE_TOK])));
1696
1697    skip_separators();
1698
1699    // Expect 'then' or {
1700    let use_brace = tok() == INBRACE_TOK;
1701    if tok() != THEN && !use_brace {
1702        zerr("expected 'then' or '{' after if condition");
1703        return None;
1704    }
1705    zshlex();
1706
1707    // Parse then-body - stops at else/elif/fi, or } if using brace syntax
1708    let then = if use_brace {
1709        let body = parse_program_until(Some(&[OUTBRACE_TOK]));
1710        if tok() == OUTBRACE_TOK {
1711            zshlex();
1712        }
1713        Box::new(body)
1714    } else {
1715        Box::new(parse_program_until(Some(&[ELSE, ELIF, FI])))
1716    };
1717
1718    // Parse elif and else. zsh accepts the SAME elif/else
1719    // continuations for both classic `then/fi` AND the brace
1720    // form `{ ... } elif ... { ... } else { ... }`. Direct port
1721    // of zsh/Src/parse.c:1417-1500 par_if where the elif/else
1722    // arms are checked AFTER the body close regardless of which
1723    // delimiter style opened the block. Without this, zinit's
1724    //   if [[ -z $sel ]] { ... } else { ... }
1725    // hung the parser — `else` was treated as an external
1726    // command following the if-statement, which the lexer state
1727    // mis-classified inside the still-open function body.
1728    //
1729    // For brace-form: skip the `fi` consumption at the end of
1730    // the loop (no `fi` after a brace block), and `else` may
1731    // arrive after a `}` close. Skip-separators between the
1732    // body close and the elif/else token.
1733    let mut elif = Vec::new();
1734    let mut else_ = None;
1735    // c:Src/parse.c:1501-1504 — `if (tok != FI) { cmdpop(); YYERRORV; }`.
1736    // The C parser fails the whole if-construct when the body close
1737    // isn't seen. zshrs's loop fell through silently on ENDINPUT, so
1738    // `if true; then echo yes` (no `fi`) was accepted. Track whether
1739    // we hit a real terminator and error after the loop if not.
1740    let mut saw_terminator = use_brace; // `{ … }` body already consumed its close
1741
1742    {
1743        loop {
1744            skip_separators();
1745
1746            match tok() {
1747                ELIF => {
1748                    zshlex();
1749                    // elif condition stops at 'then' or '{'
1750                    let econd = parse_program_until(Some(&[THEN, INBRACE_TOK]));
1751                    skip_separators();
1752
1753                    let elif_use_brace = tok() == INBRACE_TOK;
1754                    if tok() != THEN && !elif_use_brace {
1755                        zerr("expected 'then' after elif");
1756                        return None;
1757                    }
1758                    zshlex();
1759
1760                    // elif body stops at else/elif/fi or } if using braces
1761                    let ebody = if elif_use_brace {
1762                        let body = parse_program_until(Some(&[OUTBRACE_TOK]));
1763                        if tok() == OUTBRACE_TOK {
1764                            zshlex();
1765                            saw_terminator = true; // brace close on elif
1766                        }
1767                        body
1768                    } else {
1769                        parse_program_until(Some(&[ELSE, ELIF, FI]))
1770                    };
1771
1772                    elif.push((econd, ebody));
1773                }
1774                ELSE => {
1775                    zshlex();
1776                    skip_separators();
1777
1778                    let else_use_brace = tok() == INBRACE_TOK;
1779                    if else_use_brace {
1780                        zshlex();
1781                    }
1782
1783                    // else body stops at 'fi' or '}'
1784                    else_ = Some(Box::new(if else_use_brace {
1785                        let body = parse_program_until(Some(&[OUTBRACE_TOK]));
1786                        if tok() == OUTBRACE_TOK {
1787                            zshlex();
1788                            saw_terminator = true;
1789                        }
1790                        body
1791                    } else {
1792                        parse_program_until(Some(&[FI]))
1793                    }));
1794
1795                    // Consume the 'fi' if present (not for brace syntax)
1796                    if !else_use_brace && tok() == FI {
1797                        zshlex();
1798                        saw_terminator = true;
1799                    }
1800                    break;
1801                }
1802                FI => {
1803                    // Brace-form `if ... { ... }` is already terminated by
1804                    // its closing `}`. Do NOT consume `fi` here — it belongs
1805                    // to an enclosing then-form if. Without this gate, a
1806                    // brace-form if inside a then-form if's body would steal
1807                    // the outer `fi`, leaving the outer parser to see
1808                    // "unterminated if". This bit zinit-install.zsh:978
1809                    // where `if (( … )) {` (brace) inside `if … ; then …`
1810                    // (then-form) ate the outer `fi`.
1811                    if use_brace {
1812                        break;
1813                    }
1814                    zshlex();
1815                    saw_terminator = true;
1816                    break;
1817                }
1818                _ => break,
1819            }
1820        }
1821    }
1822
1823    if !saw_terminator {
1824        // c:1501-1504 — YYERRORV when the if-construct never closed.
1825        zerr("parse error: unterminated if");
1826        return None;
1827    }
1828
1829    Some(ZshCommand::If(ZshIf {
1830        cond,
1831        then,
1832        elif,
1833        else_,
1834    }))
1835}
1836
1837/// Parse while/until loop
1838/// Parse `while COND; do BODY; done` and `until COND; do BODY; done`.
1839/// Direct port of zsh/Src/parse.c:1521 `par_while`. The
1840/// `until` variant is the same loop with the condition negated.
1841fn par_while(until: bool) -> Option<ZshCommand> {
1842    zshlex(); // skip while/until
1843
1844    // c:1521-1551 par_while — the condition's parser must stop at
1845    // `do` or `{`. Without an explicit end-token set, parse_program
1846    // consumes the brace-form body as additional condition lists,
1847    // leaving parse_loop_body with nothing — `while (( i++ < 3 )) {
1848    // echo $i }` silently parsed but executed nothing.
1849    let cond = Box::new(parse_program_until(Some(&[DOLOOP, INBRACE_TOK])));
1850
1851    skip_separators();
1852    let body = parse_loop_body(false, false)?;
1853
1854    // c:Src/parse.c:1521-1551 par_while — WC_WHILE wordcode is tagged
1855    // with WC_WHILE_TYPE differentiating WHILE vs UNTIL at the wordcode
1856    // layer. The AST mirror in zsh_ast.rs has separate Until(ZshWhile)
1857    // and While(ZshWhile) variants; route by the `until` flag here so
1858    // downstream pattern-matchers can distinguish without poking
1859    // inside the payload's bool.
1860    let w = ZshWhile {
1861        cond,
1862        body: Box::new(body),
1863        until,
1864    };
1865    Some(if until {
1866        ZshCommand::Until(w) // c:1521 (WC_WHILE_TYPE = WC_WHILE_UNTIL)
1867    } else {
1868        ZshCommand::While(w) // c:1521 (WC_WHILE_TYPE = WC_WHILE_WHILE)
1869    })
1870}
1871
1872/// Parse repeat loop
1873/// Parse `repeat N; do BODY; done`. Direct port of
1874/// zsh/Src/parse.c:1565 `par_repeat`. The C source supports
1875/// the SHORTLOOPS short-form `repeat N CMD` (no do/done) — zshrs's
1876/// parser doesn't yet special-case that variant.
1877fn par_repeat() -> Option<ZshCommand> {
1878    zshlex(); // skip 'repeat'
1879
1880    let count = match tok() {
1881        STRING_LEX => {
1882            let c = tokstr().unwrap_or_default();
1883            zshlex();
1884            c
1885        }
1886        _ => {
1887            zerr("expected count after repeat");
1888            return None;
1889        }
1890    };
1891
1892    skip_separators();
1893    // c:1600 — par_repeat's short-form gate is wider: it unlocks
1894    // when SHORTLOOPS OR SHORTREPEAT is set (vs SHORTLOOPS alone for
1895    // for/while). Pass `is_repeat=true` so parse_loop_body
1896    // applies that widened gate.
1897    let body = parse_loop_body(false, true)?;
1898
1899    Some(ZshCommand::Repeat(ZshRepeat {
1900        count,
1901        body: Box::new(body),
1902    }))
1903}
1904
1905/// Parse (...) subshell
1906/// Parse a subshell `( ... )`. Direct port of zsh/Src/parse.c:1619
1907/// `par_subsh`. Body parses as a normal list; the subshell wrapper
1908/// fork-isolates execution in the executor.
1909fn par_subsh() -> Option<ZshCommand> {
1910    zshlex(); // skip (
1911    let prog = parse_program();
1912    if tok() == OUTPAR_TOK {
1913        zshlex();
1914    }
1915    Some(ZshCommand::Subsh(Box::new(prog)))
1916}
1917
1918/// Parse function definition
1919/// Parse `function NAME { BODY }` or `NAME () { BODY }`. Direct
1920/// port of zsh/Src/parse.c:1672 `par_funcdef`. zsh handles
1921/// the multiple keyword shapes (function FOO, FOO (), function FOO ()),
1922/// the optional `[fname1 fname2 ...]` for multi-name function defs,
1923/// and the `function FOO () { ... }` traditional/POSIX hybrid form.
1924fn par_funcdef() -> Option<ZshCommand> {
1925    zshlex(); // skip 'function'
1926
1927    let mut names = Vec::new();
1928    let mut tracing = false;
1929
1930    // Handle options like -T and function names. Two subtleties:
1931    //
1932    //   1. Flags: zsh's lexer encodes a leading `-` as
1933    //      `zsh_h::Dash` (`\u{9b}`, `Src/zsh.h:182`) inside the String tokstr.
1934    //      The previous `s.starts_with('-')` check failed for
1935    //      `\u{9b}T`, so `function -T NAME { body }` slipped the
1936    //      `-T` token into `names` and the function got registered
1937    //      as `T` plus the intended `NAME`.
1938    //
1939    //   2. Body opener: zsh's lexer emits the opening `{` as a
1940    //      String (not INBRACE_TOK) when it follows the String
1941    //      NAME — the preceding name token resets incmdpos to
1942    //      false, and only `{` immediately followed by `}` (the
1943    //      empty-body case) gets promoted to Inbrace. The funcdef
1944    //      parser must recognise the bare-`{` String as the body
1945    //      opener; otherwise `function NAME { body }` falls through
1946    //      to `_ => break`, no body parses, and the FuncDef never
1947    //      lands in the AST. This is consistent with C zsh's
1948    //      par_funcdef which knows it's in funcdef-header context
1949    //      and accepts the brace either way.
1950    loop {
1951        match tok() {
1952            STRING_LEX => {
1953                let _ts_s = tokstr()?;
1954                let s = _ts_s.as_str();
1955                // c:1702 — `if ((*tokstr == Inbrace || *tokstr == '{') && !tokstr[1])`.
1956                // Body opener can be either the literal `{` (early-return
1957                // path at lex.c:1141-1144 / lex.rs LX2_INBRACE cmdpos
1958                // branch) or the Inbrace marker `\u{8f}` (lex.c:1420
1959                // post-switch add(c) where c was rewritten via lextok2).
1960                if s == "{" || s == "\u{8f}" {
1961                    break;
1962                }
1963                let first = s.chars().next();
1964                if matches!(first, Some('-') | Some('+')) || matches!(first, Some(c) if c == Dash) {
1965                    if s.contains('T') {
1966                        tracing = true;
1967                    }
1968                    zshlex();
1969                    continue;
1970                }
1971                // c:Src/exec.c::execcmd_args — function name tokens
1972                // in `function NAME { ... }` form go through globbing
1973                // at parse time. zsh's `function with[bracket] { ... }`
1974                // triggers a glob expansion of `with[bracket]`; no file
1975                // matches → "no matches found: NAME" + rc=1 (when
1976                // NOMATCH is set, the default). Bug #536: zshrs accepted
1977                // the literal bracket-containing name and registered
1978                // the function silently. Mirror C by probing for glob
1979                // metachars on the name; if present AND no file
1980                // matches, emit the diagnostic and abort the parse.
1981                let has_glob_chars = s.chars().any(|c| {
1982                    matches!(
1983                        c,
1984                        '[' | ']'
1985                            | '*'
1986                            | '?'
1987                            | crate::ported::zsh_h::Inbrack
1988                            | crate::ported::zsh_h::Outbrack
1989                            | crate::ported::zsh_h::Star
1990                            | crate::ported::zsh_h::Quest
1991                    )
1992                });
1993                if has_glob_chars && crate::ported::zsh_h::isset(crate::ported::zsh_h::NOMATCH) {
1994                    let untok = crate::ported::lex::untokenize(s);
1995                    let glob_result = crate::ported::glob::glob(&untok);
1996                    if glob_result.is_empty() {
1997                        crate::ported::utils::zerr(&format!("no matches found: {}", untok));
1998                        crate::ported::utils::errflag.fetch_or(
1999                            crate::ported::utils::ERRFLAG_ERROR,
2000                            std::sync::atomic::Ordering::Relaxed,
2001                        );
2002                        return None;
2003                    }
2004                }
2005                names.push(s.to_string());
2006                zshlex();
2007            }
2008            INBRACE_TOK | INOUTPAR | SEPER | NEWLIN => break,
2009            _ => break,
2010        }
2011    }
2012
2013    // Optional ()
2014    let saw_paren = tok() == INOUTPAR;
2015    if saw_paren {
2016        zshlex();
2017    }
2018
2019    skip_separators();
2020
2021    // Body opener: real Inbrace OR a String containing the literal `{`
2022    // (early-return path) OR a String containing the Inbrace marker
2023    // `\u{8f}` (bct++ path post-switch add). C parse.c:1702 handles
2024    // both string forms via `*tokstr == Inbrace || *tokstr == '{'`.
2025    let body_opener_is_string_brace =
2026        tok() == STRING_LEX && tokstr().map(|s| s == "{" || s == "\u{8f}").unwrap_or(false);
2027    if tok() == INBRACE_TOK || body_opener_is_string_brace {
2028        // Capture body_start BEFORE the lexer advances past the
2029        // first body token. After the previous zshlex consumed
2030        // `{`, lexer.pos points just past `{` (which is where the
2031        // body source starts). The next `zshlex()` would advance
2032        // past the first token (`echo`), making body_start land
2033        // mid-body and lose the first word — `typeset -f f` would
2034        // print `a; echo b` for `{ echo a; echo b }`.
2035        // c:Src/parse.c:1690-1706 — par_funcdef requires a clean
2036        //   body-opener brace when the anonymous form `function {body}`
2037        //   is used (no names AND no `()`). zsh's lexer keeps the `{`
2038        //   as its own STRING token via the lex.c:1141-1144 early-
2039        //   return at command position, but the body brace must be
2040        //   followed by whitespace for the inner par_list to find a
2041        //   matching OUTBRACE — without a separator, the closing `}`
2042        //   gets merged into the last word (`X}`) and par_list ends
2043        //   without OUTBRACE, which C zsh reports as `parse error near
2044        //   \`}'`. zshrs's lexer has the same `bct` semantics; reject
2045        //   here at the parse step so the funcdef doesn't silently run
2046        //   with the stray `}` attached. With names or `()` present,
2047        //   the body brace is allowed even without a separator
2048        //   (`function name {body}` and `function () {body}` both work
2049        //   in zsh). Bug #60 in docs/BUGS.md.
2050        if names.is_empty() && !saw_paren {
2051            // Peek the next source byte after the current lexer position
2052            // (`{` was just tokenized — `pos()` points just past it).
2053            // A whitespace separator means proper `function { body }`
2054            // form; anything else is the malformed `function {body}`
2055            // shape zsh rejects.
2056            let next_byte = input_slice(pos(), pos() + 1)
2057                .and_then(|s| s.bytes().next())
2058                .unwrap_or(b' ');
2059            if !matches!(next_byte, b' ' | b'\t' | b'\n' | b';') {
2060                zerr("parse error near `}'"); // c:Src/parse.c YYERRORV
2061                return None;
2062            }
2063        }
2064        let body_start = pos();
2065        zshlex();
2066        // c:Src/parse.c — func body terminates at OUTBRACE_TOK.
2067        // Explicit end-token keeps the inner parse from hitting the
2068        // top-level stray-`}` arm (#168). Bug #167 family.
2069        let body = parse_program_until(Some(&[OUTBRACE_TOK]));
2070        // c:Src/parse.c:1733-1737 — `if (tok != OUTBRACE) { cmdpop();
2071        // ... YYERRORV(oecused); }`. Hard-error on missing close brace
2072        // so `function f { echo hi` doesn't silently register a half-
2073        // parsed body. Bug #405.
2074        if tok() != OUTBRACE_TOK {
2075            zerr("parse error: expected `}'");
2076            return None;
2077        }
2078        let body_end = pos().saturating_sub(1);
2079        let body_source = input_slice(body_start, body_end)
2080            .map(|s| {
2081                // Lexer's pos() may have advanced past `}` AND skipped
2082                // trailing whitespace/newlines before returning the
2083                // OUTBRACE_TOK to us, so the slice up to `pos - 1`
2084                // includes the `}` and any preceding whitespace.
2085                // Strip the trailing `}` and any preceding structural
2086                // separator (`;`, `\n`) — C zsh's getpermtext walks
2087                // the wordcode list and emits each command WITHOUT
2088                // the trailing `;`/`\n` that lives in the input.
2089                let t = s.trim();
2090                let t = t.strip_suffix('}').unwrap_or(t).trim_end();
2091                let t = t
2092                    .trim_end_matches(|c: char| c == ';' || c == '\n')
2093                    .trim_end();
2094                t.to_string()
2095            })
2096            .filter(|s| !s.is_empty());
2097        zshlex();
2098
2099        // Anonymous form `function () { body } a b c` (with `()`) or
2100        // `function { body } a b c` (zsh-only shorthand, no `()`). No
2101        // name was collected. Mirror parse_anon_funcdef: synthesize
2102        // `_zshrs_anon_N`, collect trailing args, set auto_call_args
2103        // so compile_funcdef registers + immediately calls the
2104        // function with the args as positional params.
2105        if names.is_empty() {
2106            let mut args = Vec::new();
2107            while tok() == STRING_LEX {
2108                if let Some(s) = tokstr() {
2109                    args.push(s);
2110                }
2111                zshlex();
2112            }
2113            static ANON_COUNTER: AtomicUsize = AtomicUsize::new(0);
2114            let n = ANON_COUNTER.fetch_add(1, Ordering::Relaxed);
2115            let name = format!("_zshrs_anon_kw_{}", n);
2116            return Some(ZshCommand::FuncDef(ZshFuncDef {
2117                names: vec![name],
2118                body: Box::new(body),
2119                tracing,
2120                auto_call_args: Some(args),
2121                body_source,
2122            }));
2123        }
2124
2125        Some(ZshCommand::FuncDef(ZshFuncDef {
2126            names,
2127            body: Box::new(body),
2128            tracing,
2129            auto_call_args: None,
2130            body_source,
2131        }))
2132    } else {
2133        // Short form
2134        par_list().map(|list| {
2135            ZshCommand::FuncDef(ZshFuncDef {
2136                names,
2137                body: Box::new(ZshProgram { lists: vec![list] }),
2138                tracing,
2139                auto_call_args: None,
2140                body_source: None,
2141            })
2142        })
2143    }
2144}
2145
2146/// Parse time command
2147/// Parse `time CMD` (POSIX time keyword). Direct port of
2148/// zsh/Src/parse.c:1787 `par_time`. The `time` keyword
2149/// times the execution of the following pipeline / cmd.
2150fn par_time() -> Option<ZshCommand> {
2151    zshlex(); // skip 'time'
2152
2153    // Check if there's a pipeline to time
2154    if tok() == SEPER || tok() == NEWLIN || tok() == ENDINPUT {
2155        Some(ZshCommand::Time(None))
2156    } else {
2157        let sublist = par_sublist();
2158        Some(ZshCommand::Time(sublist.map(Box::new)))
2159    }
2160}
2161
2162/// Port of `par_dinbrack(void)` from `Src/parse.c:1810`. Body
2163/// parser inside `[[ ... ]]` — calls `par_cond` to emit the
2164/// condition wordcode then advances past `]]`.
2165pub fn par_dinbrack() -> Option<()> {
2166    // c:1810
2167    set_incond(1); // c:1814
2168    set_incmdpos(false); // c:1815
2169    zshlex(); // c:1816
2170    let _ = par_cond(); // c:1817
2171    if tok() != DOUTBRACK {
2172        // c:1818
2173        yyerror("missing ]]");
2174        return None;
2175    }
2176    set_incond(0); // c:1820
2177    set_incmdpos(true); // c:1821
2178    zshlex(); // c:1822
2179    Some(())
2180}
2181
2182/// Parse a simple command
2183/// Parse a simple command (assignments + words + redirections).
2184/// Direct port of zsh/Src/parse.c:1836 `par_simple` —
2185/// the largest single function in parse.c. Handles ENVSTRING/
2186/// ENVARRAY assignments at command head, intermixed redirs,
2187/// typeset-style multi-assignment commands, and the trailing
2188/// inout-par `()` that converts a simple command into an inline
2189/// function definition.
2190fn par_simple(mut redirs: Vec<ZshRedir>) -> Option<ZshCommand> {
2191    let mut assigns = Vec::new();
2192    let mut words = Vec::new();
2193
2194    // c:1934-1974 — `{var}>file` brace-FD detection is wired
2195    // INSIDE the words loop below (parse.rs:4940-4956) rather than
2196    // here at the head. The words-loop site sees the tok=STRING
2197    // `{varname}` followed by a REDIROP and routes into par_redir
2198    // with redir.varid populated. C does it inline at the start of
2199    // each STRING/TYPESET arm iteration; functionally equivalent.
2200
2201    // c:1843-1846 — leading-NOCORRECT prefix: `nocorrect echo hello`
2202    // emits a NOCORRECT token at the start of par_simple. C sets
2203    // `nocorrect = 1` and skips past via the `zshlex();` at the
2204    // for-loop tail (c:1907). zshrs's par_simple (AST) had no
2205    // NOCORRECT arm so the token was silently dropped and the
2206    // following command line evaporated — `nocorrect echo hello`
2207    // produced empty output.
2208    while tok() == NOCORRECT {
2209        set_nocorrect(1); // c:1846
2210        zshlex(); // c:1907 (loop-tail zshlex)
2211    }
2212
2213    // Parse leading assignments
2214    while tok() == ENVSTRING || tok() == ENVARRAY {
2215        if let Some(assign) = parse_assign() {
2216            assigns.push(assign);
2217        }
2218        zshlex();
2219    }
2220
2221    // Parse words and redirections
2222    loop {
2223        match tok() {
2224            ENVSTRING | ENVARRAY => {
2225                // Mid-command assignment-shape arg under typeset
2226                // / declare / local / etc. (intypeset gates the
2227                // lexer to emit Envstring/Envarray for `name=val`
2228                // and `name=()` past the command name). Parse the
2229                // assignment, then emit a synthetic word
2230                // `NAME=value` (scalar) or `NAME=( … )` (array)
2231                // string so typeset's builtin arg list sees the
2232                // assignment-shape arg. Avoids the inline-env
2233                // scope path that mistakenly treats it like a
2234                // pre-cmd `X=Y cmd` assignment.
2235                if let Some(assign) = parse_assign() {
2236                    let synthetic = match &assign.value {
2237                        ZshAssignValue::Scalar(v) => format!("{}={}", assign.name, v),
2238                        ZshAssignValue::Array(elems) => {
2239                            // c:Src/builtin.c — assoc paren-init `h=( "" v
2240                            //   k2 v2 )` must preserve empty-string
2241                            //   elements (zsh stores key="" + value="v").
2242                            //   The bin_typeset paren-init splitter at
2243                            //   `builtin.rs:4358` recognizes the
2244                            //   REJOIN_SEP (`\u{1f}`) sentinel between
2245                            //   array elements and skips the leading/
2246                            //   trailing parens trim; using it here
2247                            //   round-trips empties end-to-end through
2248                            //   the synthetic-arg rebuild. Space-join
2249                            //   collapses adjacent empties (`(` + `""` +
2250                            //   `empty-val` becomes `( empty-val`) so
2251                            //   bin_typeset never sees the empty key.
2252                            //   Bug #93 in docs/BUGS.md.
2253                            let mut buf = String::with_capacity(
2254                                assign.name.len() + 4 + elems.iter().map(|e| e.len() + 1).sum::<usize>(),
2255                            );
2256                            buf.push_str(&assign.name);
2257                            buf.push_str("=(");
2258                            for elem in elems {
2259                                buf.push('\u{1f}');
2260                                buf.push_str(elem);
2261                            }
2262                            buf.push('\u{1f}');
2263                            buf.push(')');
2264                            buf
2265                        }
2266                    };
2267                    words.push(synthetic);
2268                }
2269                zshlex();
2270            }
2271            STRING_LEX | TYPESET => {
2272                let s = tokstr();
2273                if let Some(s) = s {
2274                    words.push(s);
2275                }
2276                // c:1929 — `incmdpos = 0;` so the next zshlex() does
2277                // not re-promote `{`/`[[`/reserved words at the
2278                // continuation position. Without this, `echo {a,b}`
2279                // re-lexes `{` as INBRACE_TOK (current-shell block)
2280                // and the brace expansion never reaches par_simple.
2281                set_incmdpos(false);
2282                // c:1931-1932 — `if (tok == TYPESET) intypeset = is_typeset = 1;`
2283                // Multi-assign `typeset a=1 b=2` relies on the lexer
2284                // re-emitting `b=2` as ENVSTRING; that path is gated
2285                // on `intypeset`. Without this, follow-on assignment
2286                // words arrive as STRING and the typeset builtin's
2287                // multi-assign form silently degrades.
2288                if tok() == TYPESET {
2289                    set_intypeset(true);
2290                }
2291                zshlex();
2292                // Check for function definition foo() { ... }
2293                if words.len() == 1 && tok() == INOUTPAR {
2294                    return parse_inline_funcdef(words.pop().unwrap());
2295                }
2296                // `{name}>file` named-fd redirect: the lexer doesn't
2297                // recognize this shape, so the bare word `{name}`
2298                // arrives as a String. If it matches `{IDENT}` and
2299                // the NEXT token is a redirop, pop it off as the
2300                // varid for that redir.
2301                if !words.is_empty() && IS_REDIROP(tok()) {
2302                    let last = words.last().unwrap();
2303                    let untoked = super::lex::untokenize(last);
2304                    if untoked.starts_with('{') && untoked.ends_with('}') && untoked.len() > 2 {
2305                        let name = &untoked[1..untoked.len() - 1];
2306                        if !name.is_empty()
2307                            && name.chars().all(|c| c == '_' || c.is_ascii_alphanumeric())
2308                            && name
2309                                .chars()
2310                                .next()
2311                                .map(|c| c == '_' || c.is_ascii_alphabetic())
2312                                .unwrap_or(false)
2313                        {
2314                            let varid = name.to_string();
2315                            words.pop();
2316                            if let Some(mut redir) = par_redir() {
2317                                redir.varid = Some(varid);
2318                                redirs.push(redir);
2319                            }
2320                            continue;
2321                        }
2322                    }
2323                }
2324            }
2325            _ if IS_REDIROP(tok()) => {
2326                match par_redir() {
2327                    Some(redir) => redirs.push(redir),
2328                    None => break, // Error in redir parsing, stop
2329                }
2330            }
2331            INOUTPAR if !words.is_empty() => {
2332                // c:2055-2057 — `if (!isset(MULTIFUNCDEF) && argc > 1)
2333                // YYERROR(oecused);` — multi-name funcdef gate:
2334                // `f1 f2() { ... }` defines f1 AND f2 to the same
2335                // body, but only when MULTIFUNCDEF is set.
2336                if !isset(MULTIFUNCDEF) && words.len() > 1 {
2337                    zerr("parse error: multiple names in function definition without MULTIFUNCDEF");
2338                    return None;
2339                }
2340                // c:2061-2068 — `if (isset(EXECOPT) && hasalias &&
2341                // !isset(ALIASFUNCDEF) && argc && hasalias !=
2342                // input_hasalias()) { zwarn(...); YYERROR(...); }`
2343                // Alias-as-funcdef warning. zshrs's parser doesn't
2344                // track `hasalias` (alias-expansion provenance
2345                // during parse) yet, so `had_alias` stays false —
2346                // the gate is wired here as a marker so the canonical
2347                // C predicate is visible. Once alias-provenance lands,
2348                // swap `false` for the actual provenance compare.
2349                let had_alias = false;
2350                if isset(EXECOPT) && had_alias && !isset(ALIASFUNCDEF) && !words.is_empty() {
2351                    crate::ported::utils::zwarn("defining function based on alias `(unknown)'");
2352                    return None;
2353                }
2354                // foo() { ... } style function
2355                return parse_inline_funcdef(words.pop().unwrap());
2356            }
2357            _ => break,
2358        }
2359    }
2360
2361    if assigns.is_empty() && words.is_empty() && redirs.is_empty() {
2362        return None;
2363    }
2364
2365    Some(ZshCommand::Simple(ZshSimple {
2366        assigns,
2367        words,
2368        redirs,
2369    }))
2370}
2371
2372/// Parse a redirection
2373/// Parse a redirection (>file, <file, >>file, <<HEREDOC, etc.).
2374/// Direct port of zsh/Src/parse.c:2229 `par_redir`. Returns
2375/// a ZshRedir node carrying the operator type, fd, target word
2376/// (or here-doc body / pipe-redir command), and any `{var}` style
2377/// fd-binding parameter.
2378fn par_redir() -> Option<ZshRedir> {
2379    par_redir_with_id(None)
2380}
2381
2382/// Wire a here-document body onto the redirection token that
2383/// requested it. Direct port of zsh/Src/parse.c:2347
2384/// `setheredoc`. Called when a heredoc terminator has been
2385/// matched and the body is ready to be attached to the redir.
2386///
2387/// zshrs port note: zsh's setheredoc patches the wordcode
2388/// in-place via `pc[1] = ecstrcode(doc); pc[2] = ecstrcode(term);`.
2389/// zshrs threads heredoc bodies through `HereDocInfo` structs
2390/// attached inline during the post-parse `fill_heredoc_bodies` walk.
2391/// This method is the AST-side equivalent: writes back to the
2392/// matching redir node by index.
2393/// Port of `setheredoc(int pc, int type, char *str, char *termstr,
2394/// char *munged_termstr)` from `Src/parse.c:2347-2355`. Patches the
2395/// pending heredoc redir at `pc` with its body string + raw and
2396/// munged terminator forms.
2397pub fn setheredoc(pc: usize, redir_type: i32, doc: &str, term: &str, munged_term: &str) {
2398    // zshrs-only guard: AST-path heredocs use `pc = -1 as usize`
2399    // (i.e. `usize::MAX`) as a sentinel meaning "no wordcode slot to
2400    // patch". C never passes a negative pc since the wordcode emitter
2401    // is always active. Skip silently for the AST-only case.
2402    if pc == usize::MAX {
2403        return;
2404    }
2405    // c:2350 — `int varid = WC_REDIR_VARID(ecbuf[pc]) ? REDIR_VARID_MASK : 0;`
2406    let cur = ECBUF.with_borrow(|b| b.get(pc).copied().unwrap_or(0));
2407    let varid = if WC_REDIR_VARID(cur) != 0 {
2408        REDIR_VARID_MASK
2409    } else {
2410        0
2411    };
2412    // c:2351 — `ecbuf[pc] = WCB_REDIR(type | REDIR_FROM_HEREDOC_MASK | varid);`
2413    let new_header = WCB_REDIR((redir_type | REDIR_FROM_HEREDOC_MASK | varid) as wordcode);
2414    // c:2352 — `ecbuf[pc + 2] = ecstrcode(str);`
2415    let coded_str = ecstrcode(doc);
2416    // c:2353 — `ecbuf[pc + 3] = ecstrcode(termstr);`
2417    let coded_term = ecstrcode(term);
2418    // c:2354 — `ecbuf[pc + 4] = ecstrcode(munged_termstr);`
2419    let coded_munged = ecstrcode(munged_term);
2420    ECBUF.with_borrow_mut(|b| {
2421        b[pc] = new_header;
2422        b[pc + 2] = coded_str;
2423        b[pc + 3] = coded_term;
2424        b[pc + 4] = coded_munged;
2425    });
2426}
2427
2428/// Parse a wordlist for `for ... in WORDS;`. Direct port of
2429/// zsh/Src/parse.c:2362 `par_wordlist`. Reads STRING tokens
2430/// until the next SEPER / SEMI / NEWLIN.
2431pub fn par_wordlist() -> Vec<String> {
2432    let mut out = Vec::new();
2433    // parse.c:2362-2378 — collect STRINGs into the wordlist.
2434    while tok() == STRING_LEX {
2435        if let Some(text) = tokstr() {
2436            out.push(text);
2437        }
2438        zshlex();
2439    }
2440    out
2441}
2442
2443/// Parse a newline-separated wordlist. Direct port of
2444/// zsh/Src/parse.c:2379 `par_nl_wordlist`. Like
2445/// par_wordlist but tolerates leading/trailing newlines.
2446pub fn par_nl_wordlist() -> Vec<String> {
2447    // parse.c:2380-2381 — skip leading newlines.
2448    while tok() == NEWLIN {
2449        zshlex();
2450    }
2451    let out = par_wordlist();
2452    // parse.c:2395-2397 — skip trailing newlines.
2453    while tok() == NEWLIN {
2454        zshlex();
2455    }
2456    out
2457}
2458
2459/// `COND_SEP()` macro from `Src/parse.c:2433`. True when the current
2460/// token is a separator usable inside `[[ … ]]` (newline / semi /
2461/// `&`). C uses it to skip optional whitespace between cond terms.
2462#[inline]
2463pub fn COND_SEP() -> bool {
2464    matches!(tok(), NEWLIN | SEMI | AMPER)
2465}
2466
2467/// Parse [[ ... ]] conditional
2468/// Parse `[[ EXPR ]]` conditional expression. Direct port of
2469/// zsh/Src/parse.c:2409 `par_cond` (and helpers par_cond_1,
2470/// par_cond_2, par_cond_double, par_cond_triple, par_cond_multi
2471/// at parse.c:2434-2731). Expression operators: `||` `&&` `!`
2472/// + unary tests (-f, -d, -n, -z, etc.) + binary tests (=, !=,
2473///   <, >, ==, =~, -eq, -ne, -lt, -le, -gt, -ge, -nt, -ot, -ef).
2474fn par_cond() -> Option<ZshCommand> {
2475    // C par_dinbrack (parse.c:1810-1822) wraps the body parse with
2476    // `incond = 1; incmdpos = 0;` BEFORE the first zshlex past `[[`,
2477    // and resets to `incond = 0; incmdpos = 1;` after `]]`. Without
2478    // `incond = 1`, lex.c does not promote `]]` to DOUTBRACK and the
2479    // cond body bleeds past the close bracket — the parser then
2480    // sees `]]` as a separate STRING command. Every `if [[ ... ]]; then`
2481    // failed with `command not found: ]]` before this fix.
2482    set_incond(1);
2483    set_incmdpos(false);
2484    zshlex(); // skip [[
2485              // Empty cond `[[ ]]` is a parse error in zsh — emit the
2486              // diagnostic and return None so the caller produces a
2487              // non-zero exit. Without this, `[[ ]]` silently passed and
2488              // returned exit 0.
2489    if tok() == DOUTBRACK {
2490        zerr("parse error near `]]'");
2491        set_incond(0);
2492        set_incmdpos(true);
2493        zshlex();
2494        return None;
2495    }
2496    let cond = parse_cond_expr();
2497
2498    if tok() == DOUTBRACK {
2499        set_incond(0);
2500        set_incmdpos(true);
2501        zshlex();
2502    } else {
2503        // c:Src/parse.c:1818-1819 — `if (tok != DOUTBRACK)
2504        // YYERRORV(oecused);`. par_dinbrack hard-requires DOUTBRACK
2505        // after par_cond; anything else is a parse error and the
2506        // outer parser's yyerror at c:2747 emits `parse error near
2507        // \`%s'` using zshlextext. Bug #473: BAR (`|`) inside
2508        // `[[ ab == a|b ]]` slipped past par_cond_or (which only
2509        // checks DBAR), the cond returned cleanly, and then the
2510        // top-level parser interpreted BAR as a pipe — running `b`
2511        // as a command (security-relevant if pattern RHS is user
2512        // input). Mirror C: emit parse error and abort.
2513        let tok_text = match tok() {
2514            BAR_TOK => "|".to_string(),
2515            DBAR => "||".to_string(),
2516            AMPER => "&".to_string(),
2517            DAMPER => "&&".to_string(),
2518            SEMI => ";".to_string(),
2519            DSEMI => ";;".to_string(),
2520            NEWLIN | SEPER => String::new(),
2521            _ => tokstr().map(|s| crate::ported::lex::untokenize(&s)).unwrap_or_default(),
2522        };
2523        if tok_text.is_empty() {
2524            zerr("parse error");
2525        } else {
2526            zerr(&format!("parse error near `{}'", tok_text));
2527        }
2528        set_incond(0);
2529        set_incmdpos(true);
2530        return None;
2531    }
2532
2533    cond.map(ZshCommand::Cond)
2534}
2535
2536/// Port of `par_cond_1(void)` from `Src/parse.c:2434`. Parses one
2537/// `||`-separated cond expression. Emits `WCB_COND(COND_AND, …)`
2538/// when an `&&` is found and recurses.
2539pub fn par_cond_1() -> i32 {
2540    // c:2434
2541
2542    let p = ECUSED.with(|c| c.get()) as usize;
2543    let r = par_cond_2();
2544    while COND_SEP() {
2545        condlex();
2546    }
2547    if tok() == DAMPER {
2548        condlex();
2549        while COND_SEP() {
2550            condlex();
2551        }
2552        ecispace(p, 1);
2553        par_cond_1();
2554        let ecused = ECUSED.with(|c| c.get()) as usize;
2555        ECBUF.with(|c| {
2556            c.borrow_mut()[p] = WCB_COND(COND_AND as u32, (ecused - 1 - p) as u32);
2557        });
2558        return 1;
2559    }
2560    r
2561}
2562
2563/// Port of `par_cond_2(void)` from `Src/parse.c:2476`. The heavy
2564/// cond-term parser: handles `! cond`, `(cond)`, unary `[ -X arg ]`,
2565/// binary `[ A op B ]`, and `[ A op1 B op2 C … ]` n-ary chains.
2566pub fn par_cond_2() -> i32 {
2567    // c:2476
2568    // `n_testargs` only applies in `testlex` mode (=== /bin/test
2569    // compat). zshrs has no testlex yet, so always 0.
2570    let n_testargs: i32 = 0;
2571
2572    // c:2481 — handled inline; this Rust port skips the n_testargs
2573    // arm since zshrs invokes par_cond via [[ ... ]] only.
2574
2575    while COND_SEP() {
2576        condlex();
2577    }
2578    if tok() == BANG_TOK {
2579        // c:2522 — `[[ ! cond ]]`
2580        condlex();
2581        ecadd(WCB_COND(COND_NOT as u32, 0));
2582        return par_cond_2();
2583    }
2584    if tok() == INPAR_TOK {
2585        // c:2533 — `[[ (cond) ]]`
2586        condlex();
2587        while COND_SEP() {
2588            condlex();
2589        }
2590        let r = par_cond();
2591        while COND_SEP() {
2592            condlex();
2593        }
2594        if tok() != OUTPAR_TOK {
2595            yyerror("missing )");
2596            return 0;
2597        }
2598        condlex();
2599        return r.map_or(0, |_| 1);
2600    }
2601    let s1 = tokstr().unwrap_or_default();
2602    // c:2549 — `dble = (s1 && IS_DASH(*s1) && (!n_testargs ||
2603    // strspn(s1+1, "abcd...") == 1) && !s1[2]);` — IS_DASH covers
2604    // BOTH `-` and Dash (`\u{9b}`). The raw tokstr inside `[[ ... ]]`
2605    // carries Dash as a marker byte, so `starts_with('-')` alone
2606    // matches only ASCII dashes and misses every `-z`, `-d`, `-r`
2607    // etc. — every such cond emitted the AST-only `condition
2608    // expected` error from par_cond_double. Use IS_DASH and count
2609    // chars (Dash is a single code point) instead of bytes.
2610    let s1_chars: Vec<char> = s1.chars().collect();
2611    let dble = !s1_chars.is_empty()
2612        && IS_DASH(s1_chars[0])
2613        && s1_chars.len() == 2
2614        && "abcdefghknoprstuvwxzLONGS".contains(s1_chars[1]);
2615    if tok() != STRING_LEX {
2616        if !s1.is_empty() && tok() != LEXERR && (!dble || n_testargs != 0) {
2617            // c:2486-2497 — `if (n_testargs == 1)` block: under
2618            // POSIXBUILTINS-off, `[ -t ]` rewrites to `[ -t 1 ]`
2619            // (ksh behavior). The C gate is `unset(POSIXBUILTINS)
2620            // && check_cond(s1, "t")`. zshrs's parser has
2621            // n_testargs=0 (no testlex), so this rewrite path is
2622            // unreachable from zshrs's [[ ]] / [ ] entry points;
2623            // wired here as a marker for parity. When testlex is
2624            // ported the call below activates.
2625            if n_testargs == 1 && unset(POSIXBUILTINS) && check_cond(&s1, "t") {
2626                condlex();
2627                return par_cond_double(&s1, "1");
2628            }
2629            // c:2557 — `[[ STRING ]]` re-interpreted as `[[ -n STRING ]]`.
2630            condlex();
2631            while COND_SEP() {
2632                condlex();
2633            }
2634            return par_cond_double("-n", &s1);
2635        }
2636        yyerror("condition expected");
2637        return 0;
2638    }
2639    condlex();
2640    while COND_SEP() {
2641        condlex();
2642    }
2643    if tok() == INANG_TOK || tok() == OUTANG_TOK {
2644        // c:2576 — `<` / `>` string compare.
2645        let xtok = tok();
2646        condlex();
2647        while COND_SEP() {
2648            condlex();
2649        }
2650        if tok() != STRING_LEX {
2651            yyerror("string expected");
2652            return 0;
2653        }
2654        let s3 = tokstr().unwrap_or_default();
2655        condlex();
2656        while COND_SEP() {
2657            condlex();
2658        }
2659        let op = if xtok == INANG_TOK {
2660            COND_STRLT
2661        } else {
2662            COND_STRGTR
2663        };
2664        ecadd(WCB_COND(op as u32, 0));
2665        ecstr(&s1);
2666        ecstr(&s3);
2667        return 1;
2668    }
2669    if tok() != STRING_LEX {
2670        // c:2592 — only one operand seen → `[ -n s1 ]`.
2671        if tok() != LEXERR {
2672            if !dble || n_testargs != 0 {
2673                return par_cond_double("-n", &s1);
2674            }
2675            return par_cond_multi(&s1, &[]);
2676        }
2677        yyerror("syntax error");
2678        return 0;
2679    }
2680    let s2 = tokstr().unwrap_or_default();
2681    set_incond(incond() + 1);
2682    condlex();
2683    while COND_SEP() {
2684        condlex();
2685    }
2686    set_incond(incond() - 1);
2687    // c:Src/parse.c:2598-2600 — `if (!n_testargs) dble = (s2 &&
2688    // IS_DASH(*s2) && !s2[2]);` — RECOMPUTE dble based on s2 once
2689    // it's been read, so `[[ A -X B ]]` is treated as a 2-arg cond
2690    // `[ -X B ]` (par_cond_double) rather than a 3-arg triple. This
2691    // is what routes `[[ "" -a "x" ]]` to par_cond_double("", "-a")
2692    // → COND_ERROR "parse error: condition expected: ". Without
2693    // this, the original `dble` from s1 stayed false, the parser
2694    // grabbed s3 and built COND_MODI silently. parity bug #25.
2695    let s2_chars: Vec<char> = s2.chars().collect();
2696    let dble = !s2_chars.is_empty() && IS_DASH(s2_chars[0]) && s2_chars.len() == 2;
2697    if tok() == STRING_LEX && !dble {
2698        let s3 = tokstr().unwrap_or_default();
2699        condlex();
2700        while COND_SEP() {
2701            condlex();
2702        }
2703        if tok() == STRING_LEX {
2704            // c:2615 — n-ary `[ A op B C D ... ]`.
2705            let mut l: Vec<String> = vec![s2, s3];
2706            while tok() == STRING_LEX {
2707                l.push(tokstr().unwrap_or_default());
2708                condlex();
2709                while COND_SEP() {
2710                    condlex();
2711                }
2712            }
2713            return par_cond_multi(&s1, &l);
2714        }
2715        return par_cond_triple(&s1, &s2, &s3);
2716    }
2717    par_cond_double(&s1, &s2)
2718}
2719
2720/// Port of `par_cond_double(char *a, char *b)` from `Src/parse.c:2626`.
2721/// Emits wordcode for unary cond `[ -X b ]` or modular `[ -mod b ]`.
2722pub fn par_cond_double(a: &str, b: &str) -> i32 {
2723    // c:2628 — `if (!IS_DASH(a[0]) || !a[1])` — char-based, since
2724    // Dash is a single code point (`\u{9b}`) and `a.len() < 2` on
2725    // BYTES would still pass for "-z" but fail for the marker form
2726    // `\u{9b}z` (2 bytes). Walk by chars.
2727    let ac: Vec<char> = a.chars().collect();
2728    if ac.is_empty() || !IS_DASH(ac[0]) || ac.len() < 2 {
2729        // c:Src/parse.c:2629 COND_ERROR macro expansion:
2730        //   zwarn(...); herrflush(); errflag |= ERRFLAG_ERROR;
2731        //   YYERROR(ecused) /* sets tok = LEXERR */
2732        // The YYERROR portion is critical — without it the outer
2733        // parser keeps walking the wordcode and execution proceeds
2734        // (e.g. `[[ "" -a "x" ]] && echo m || echo n` runs the
2735        // `|| echo n` branch). Setting LEXERR aborts the upper
2736        // parse so the whole line is rejected, matching zsh's
2737        // observable behavior of stdout="" on parse error.
2738        zerr(&format!("parse error: condition expected: {}", a));
2739        errflag.fetch_or(crate::ported::zsh_h::ERRFLAG_ERROR, Ordering::SeqCst);
2740        set_tok(LEXERR);
2741        return 1;
2742    }
2743    // c:2630 — `else if (!a[2] && strspn(a+1, "abcd...zhLONGS") == 1)`
2744    let unary_set = "abcdefgknoprstuvwxzhLONGS";
2745    if ac.len() == 2 && unary_set.contains(ac[1]) {
2746        // c:2631 — `ecadd(WCB_COND(a[1], 0));` uses the raw cond-op
2747        // letter byte as the opcode payload. Use the ASCII char's
2748        // code-point value directly — every letter in `unary_set`
2749        // fits in 7 bits.
2750        ecadd(WCB_COND(ac[1] as u32, 0));
2751        ecstr(b);
2752    } else {
2753        ecadd(WCB_COND(COND_MOD as u32, 1));
2754        ecstr(a);
2755        ecstr(b);
2756    }
2757    1
2758}
2759
2760/// Port of `get_cond_num(char *tst)` from `Src/parse.c:2643`. Returns
2761/// the index of `tst` in `{"nt","ot","ef","eq","ne","lt","gt","le","ge"}`
2762/// or `-1` if not a recognized binary cond operator.
2763pub fn get_cond_num(tst: &str) -> i32 {
2764    // c:2643
2765    const CONDSTRS: [&str; 9] = [
2766        "nt", "ot", "ef", "eq", "ne", "lt", "gt", "le", "ge", // c:2647
2767    ];
2768    for (i, &c) in CONDSTRS.iter().enumerate() {
2769        if c == tst {
2770            return i as i32; // c:2654
2771        }
2772    }
2773    -1 // c:2656
2774}
2775
2776/// par_time's `static int inpartime` guard at C parse.c:1038
2777/// preventing infinite recursion on `time time foo`. The wordcode
2778/// path keeps this as a thread_local since C uses a function-level
2779/// `static int` (per-process; per-evaluator semantically matches).
2780thread_local! {
2781    static PARSER_INPARTIME: std::cell::Cell<bool> = const { std::cell::Cell::new(false) };
2782}
2783
2784/// Port of `par_cond_triple(char *a, char *b, char *c)` from
2785/// `Src/parse.c:2659`. Emits wordcode for the binary forms
2786/// `[ A op B ]` — `=` / `==` / `!=` / `<` / `>` / `=~` / `-X`.
2787///
2788/// C does `(b[0] == Equals || b[0] == '=')` etc., matching BOTH the
2789/// raw ASCII operator char AND its tokenized marker form per
2790/// `Src/zsh.h:159-194`:
2791///   Equals = `\u{8d}`, Outang = `\u{95}`, Inang  = `\u{94}`,
2792///   Tilde  = `\u{98}`, Bang   = `\u{9c}`, Dash   = `\u{9b}`.
2793/// Inside `[[ ... ]]` the lexer emits the marker bytes — comparing
2794/// against literal-only `b"=="` misses every cond op.
2795/// (The previous Rust port had the doc comment values wrong:
2796/// Outang=0x8e was actually Bar; Inang=0x91 was Inbrack;
2797/// Tilde=0x96 was OutangProc; Bang=0x8b was Outparmath. The code
2798/// itself uses the correct const names, so this was a docs-only fix.)
2799pub fn par_cond_triple(a: &str, b: &str, c: &str) -> i32 {
2800    // c:2659
2801    let bc: Vec<char> = b.chars().collect();
2802    let is_eq = |ch: char| ch == '=' || ch == Equals;
2803    let is_gt = |ch: char| ch == '>' || ch == Outang;
2804    let is_lt = |ch: char| ch == '<' || ch == Inang;
2805    let is_tilde = |ch: char| ch == '~' || ch == Tilde;
2806    let is_bang = |ch: char| ch == '!' || ch == Bang;
2807
2808    // c:2663 — `(b[0] == Equals || b[0] == '=') && !b[1]` → `=` (single).
2809    if bc.len() == 1 && is_eq(bc[0]) {
2810        ecadd(WCB_COND(COND_STREQ as u32, 0));
2811        ecstr(a);
2812        ecstr(c);
2813        let np = ECNPATS.with(|cc| {
2814            let v = cc.get();
2815            cc.set(v + 1);
2816            v
2817        }) as u32;
2818        ecadd(np);
2819        return 1;
2820    }
2821    // c:2668-2673 — `(t0 = b[0]=='>' || Outang) || b[0]=='<' || Inang`.
2822    if bc.len() == 1 && (is_gt(bc[0]) || is_lt(bc[0])) {
2823        let op = if is_gt(bc[0]) {
2824            COND_STRGTR
2825        } else {
2826            COND_STRLT
2827        };
2828        ecadd(WCB_COND(op as u32, 0));
2829        ecstr(a);
2830        ecstr(c);
2831        let np = ECNPATS.with(|cc| {
2832            let v = cc.get();
2833            cc.set(v + 1);
2834            v
2835        }) as u32;
2836        ecadd(np);
2837        return 1;
2838    }
2839    // c:2674-2679 — `==` STRDEQ.
2840    if bc.len() == 2 && is_eq(bc[0]) && is_eq(bc[1]) {
2841        ecadd(WCB_COND(COND_STRDEQ as u32, 0));
2842        ecstr(a);
2843        ecstr(c);
2844        let np = ECNPATS.with(|cc| {
2845            let v = cc.get();
2846            cc.set(v + 1);
2847            v
2848        }) as u32;
2849        ecadd(np);
2850        return 1;
2851    }
2852    // c:2680-2684 — `!=` STRNEQ.
2853    if bc.len() == 2 && is_bang(bc[0]) && is_eq(bc[1]) {
2854        ecadd(WCB_COND(COND_STRNEQ as u32, 0));
2855        ecstr(a);
2856        ecstr(c);
2857        let np = ECNPATS.with(|cc| {
2858            let v = cc.get();
2859            cc.set(v + 1);
2860            v
2861        }) as u32;
2862        ecadd(np);
2863        return 1;
2864    }
2865    // c:2685-2691 — `=~` REGEX (no pattern slot — implicit COND_MODI).
2866    if bc.len() == 2 && is_eq(bc[0]) && is_tilde(bc[1]) {
2867        ecadd(WCB_COND(COND_REGEX as u32, 0));
2868        ecstr(a);
2869        ecstr(c);
2870        return 1;
2871    }
2872    // c:2692-2702 — `-OP` numeric-or-modular cond (e.g. `-eq`, `-nt`).
2873    if !bc.is_empty() && IS_DASH(bc[0]) {
2874        let rest: String = bc[1..].iter().collect();
2875        let t = get_cond_num(&rest);
2876        if t > -1 {
2877            ecadd(WCB_COND((t + COND_NT) as u32, 0));
2878            ecstr(a);
2879            ecstr(c);
2880            return 1;
2881        }
2882        ecadd(WCB_COND(COND_MODI as u32, 0));
2883        ecstr(b);
2884        ecstr(a);
2885        ecstr(c);
2886        return 1;
2887    }
2888    // c:2703-2707 — `-mod A B C` modular cond on `a`.
2889    let ac: Vec<char> = a.chars().collect();
2890    if !ac.is_empty() && IS_DASH(ac[0]) && ac.len() > 1 {
2891        ecadd(WCB_COND(COND_MOD as u32, 2));
2892        ecstr(a);
2893        ecstr(b);
2894        ecstr(c);
2895        return 1;
2896    }
2897    zerr(&format!("condition expected: {}", b));
2898    1
2899}
2900
2901/// Port of `par_cond_multi(char *a, LinkList l)` from `Src/parse.c:2716`.
2902/// Emits wordcode for `[ -OP A B C … ]` n-ary cond (alternation).
2903pub fn par_cond_multi(a: &str, l: &[String]) -> i32 {
2904    // c:2716 — `if (!IS_DASH(a[0]) || !a[1])`; same Dash/`-` dual
2905    // matching as par_cond_double, char-walked because Dash is a
2906    // single code point.
2907    let ac: Vec<char> = a.chars().collect();
2908    if ac.is_empty() || !IS_DASH(ac[0]) || ac.len() < 2 {
2909        zerr(&format!("condition expected: {}", a));
2910        return 1;
2911    }
2912    ecadd(WCB_COND(COND_MOD as u32, l.len() as u32));
2913    ecstr(a);
2914    for item in l {
2915        ecstr(item);
2916    }
2917    1
2918}
2919
2920/// Emit a parser-level error. Direct port of zsh/Src/parse.c
2921/// 2733-2766 `yyerror`. C version fills a per-event error buffer
2922/// and sets errflag. zshrs pushes onto errors which the
2923/// caller drains via parse()'s Result return.
2924/// WARNING: param-name divergence — Rust takes `&str message`, C takes
2925/// `int noerr`. The Rust callers pass user-meaningful messages
2926/// (`"missing ]]"`, `"condition expected"`); the C body collects the
2927/// offending token via `dupstring(zshlextext)` for the error string.
2928/// This Rust adapter:
2929///   1. Uses the caller-supplied message verbatim if non-empty.
2930///   2. Skips the `histdone & HISTFLAG_NOEXEC` and `errflag & ERRFLAG_INT`
2931///      gates per c:2746 (printing only when neither is set) — the
2932///      ERRFLAG_INT check is the load-bearing guard.
2933///   3. Sets ERRFLAG_ERROR per c:2753 (noerr=0 path always taken).
2934pub fn yyerror(msg: &str) {
2935    // c:2733
2936    let int_flagged = (errflag.load(Ordering::SeqCst) & crate::ported::zsh_h::ERRFLAG_INT) != 0;
2937    if !int_flagged {
2938        // c:2746
2939        let body = if msg.is_empty() {
2940            "parse error".to_string()
2941        }
2942        // c:2751
2943        else {
2944            format!("parse error: {msg}")
2945        }; // c:2748
2946        zwarnnam("zsh", &body);
2947    }
2948    // c:2753 — `if (!noerr && noerrs != 2) errflag |= ERRFLAG_ERROR;`
2949    errflag.fetch_or(crate::ported::zsh_h::ERRFLAG_ERROR, Ordering::SeqCst);
2950}
2951
2952// ============================================================
2953// Eprog runtime ops (parse.c:2767-2853)
2954//
2955// dupeprog / useeprog / freeeprog are zsh's reference-counting
2956// helpers for executable programs. zshrs's AST is owned by
2957// value (Rust ownership); cloning is a tree-deep copy via
2958// Clone, "use" is a no-op (the executor borrows the AST), and
2959// "free" is automatic on drop.
2960// ============================================================
2961
2962/// Duplicate an Eprog. Direct port of zsh/Src/parse.c:2813
2963/// Port of `Eprog dupeprog(Eprog p, int heap)` from
2964/// `Src/parse.c:2767`. Deep-copies the wordcode array, string
2965/// table, and pattern-prog slots. `dummy_eprog` is returned
2966/// unchanged. `heap`-allocated copies get `nref = -1` (never
2967/// freed); real ones get `nref = 1`.
2968pub fn dupeprog(p: &eprog, heap: bool) -> eprog {
2969    // c:2774-2775 — `if (p == &dummy_eprog) return p;` — caller-
2970    // observable identity in C uses a pointer compare; Rust's
2971    // equivalent is "if it has the dummy's shape (single WCB_END
2972    // word and no strs), return a copy of the same shape".
2973    // c:2796-2797 — `for (i = r->npats; i--; pp++) *pp = dummy_patprog1;`
2974    // C uses `dummy_patprog1` as a placeholder; the Rust port has
2975    // `Vec<Patprog>` (Box<patprog>) — synthesize an equivalent zero-
2976    // initialized patprog for each slot (resolved later by
2977    // pattern.c::patcompile-on-first-use).
2978    let dummy_pat = || crate::ported::zsh_h::patprog {
2979        startoff: 0,
2980        size: 0,
2981        mustoff: 0,
2982        patmlen: 0,
2983        globflags: 0,
2984        globend: 0,
2985        flags: 0,
2986        patnpar: 0,
2987        patstartch: 0,
2988    };
2989    let r = eprog {
2990        // c:2778 — `flags = (heap ? EF_HEAP : EF_REAL) | (p->flags & EF_RUN);`
2991        flags: (if heap { EF_HEAP } else { EF_REAL }) | (p.flags & EF_RUN),
2992        len: p.len,
2993        npats: p.npats,
2994        // c:2787 — `nref = heap ? -1 : 1;`
2995        nref: if heap { -1 } else { 1 },
2996        prog: p.prog.clone(),
2997        strs: p.strs.clone(),
2998        pats: (0..p.npats).map(|_| Box::new(dummy_pat())).collect(),
2999        shf: None,
3000        dump: None,
3001    };
3002    r
3003}
3004
3005/// Port of `void useeprog(Eprog p)` from `Src/parse.c:2813`.
3006/// `if (p && p != &dummy_eprog && p->nref >= 0) p->nref++;` —
3007/// pin a real (non-heap, non-dummy) Eprog so it survives the
3008/// next `freeeprog`.
3009pub fn useeprog(p: &mut eprog) {
3010    // c:2815 — `if (p && p != &dummy_eprog && p->nref >= 0)`
3011    if p.nref >= 0 {
3012        p.nref += 1; // c:2816
3013    }
3014}
3015
3016/// Port of `void freeeprog(Eprog p)` from `Src/parse.c:2823`.
3017/// Refcount-decrement; when it hits zero, drops the pattern progs,
3018/// decrements the dump refcount if any, and releases the eprog.
3019/// `dummy_eprog` is never freed. Heap-eprogs (`nref < 0`) are
3020/// never freed either — they live as long as the heap arena.
3021pub fn freeeprog(p: &mut eprog) {
3022    // c:2829 — `if (p && p != &dummy_eprog) { ... }`
3023    if p.nref > 0 {
3024        p.nref -= 1; // c:2832
3025        if p.nref == 0 {
3026            // c:2833-2840 — drop pats, dump refcount, then the eprog.
3027            // Rust's Drop handles the per-field cleanup; we just
3028            // need to decrement the dump count first.
3029            if let Some(dump) = p.dump.take() {
3030                let dumped = (*dump).clone();
3031                decrdumpcount(&dumped); // c:2837
3032            }
3033            p.prog.clear();
3034            p.strs = None;
3035            p.pats.clear();
3036        }
3037    }
3038}
3039
3040// =============================================================================
3041// Wordcode read helpers — used by text.rs's `gettext2` and exec dispatch
3042// to walk a compiled Eprog without re-running the parser. These are the
3043// only `Src/parse.c` functions ported so far in this file; the recursive-
3044// descent parser (par_event / par_list / par_cmd / par_*) follows
3045// below as free ported at module scope.
3046// =============================================================================
3047
3048/// Port of `ecgetstr(Estate s, int dup, int *tokflag)` from `Src/parse.c:2855`.
3049/// `s->pc` advances through the wordcode buffer; `s->strs` indexes the
3050/// string pool. Returns the interned string (or a 1-3-char literal
3051/// inlined directly into the wordcode word).
3052pub fn ecgetstr(s: &mut estate, dup: i32, tokflag: Option<&mut i32>) -> String {
3053    let prog = &s.prog.prog;
3054    if s.pc >= prog.len() {
3055        return String::new();
3056    }
3057    let c = prog[s.pc]; // c:2858 `wordcode c = *s->pc++;`
3058    s.pc += 1;
3059    if let Some(tf) = tokflag {
3060        *tf = i32::from((c & 1) != 0); // c:2880 `*tokflag = (c & 1);`
3061    }
3062    if c == 6 || c == 7 {
3063        // c:2861 `if (c == 6 || c == 7) r = "";`
3064        return String::new();
3065    }
3066    let r: String = if (c & 2) != 0 {
3067        // c:2862 — `else if (c & 2)`
3068        // c:2863-2868 — 3-byte inline string packed into the wordcode
3069        // word; followed by `buf[3] = '\0'; r = dupstring(buf);`.
3070        // C's `dupstring` uses `strlen(buf)` which TRUNCATES at the
3071        // first NUL byte — short strings of 1 or 2 chars get padded
3072        // with NULs and truncated cleanly. The previous Rust port
3073        // used `retain(|&x| x != 0)` which would silently SPLICE OUT
3074        // an interior NUL (e.g. `[a, 0, b]` → "ab"), diverging from
3075        // C's strlen-truncate (`[a, 0, b]` → "a"). Fix: truncate at
3076        // first NUL to match C exactly.
3077        let b0 = ((c >> 3) & 0xff) as u8;
3078        let b1 = ((c >> 11) & 0xff) as u8;
3079        let b2 = ((c >> 19) & 0xff) as u8;
3080        let v = [b0, b1, b2];
3081        let end = v.iter().position(|&x| x == 0).unwrap_or(v.len()); // c:2869 strlen(buf)
3082        String::from_utf8_lossy(&v[..end]).into_owned()
3083    } else {
3084        // c:2877 `else r = s->strs + (c >> 2);`
3085        let off = (c >> 2) as usize + s.strs_offset;
3086        let strs_bytes = s.strs.as_deref().unwrap_or("").as_bytes();
3087        if off >= strs_bytes.len() {
3088            String::new()
3089        } else {
3090            let tail = &strs_bytes[off..];
3091            let end = tail.iter().position(|&b| b == 0).unwrap_or(tail.len());
3092            String::from_utf8_lossy(&tail[..end]).into_owned()
3093        }
3094    };
3095    // c:2891 `return ((dup == EC_DUP || (dup && (c & 1))) ? dupstring(r) : r);`
3096    // Rust owns the String already; `dup` flag has no observable effect.
3097    let _ = (dup, EC_DUP, EC_NODUP);
3098    r
3099}
3100
3101// ============================================================
3102// Wordcode runtime getters (parse.c:2853-3060)
3103//
3104// Direct ports of the wordcode-read helpers (ecrawstr,
3105// ecgetstr, ecgetarr, ecgetredirs, ecgetlist, eccopyredirs).
3106// Read packed wordcode out of an Eprog at execution time.
3107// Used by exec_wordcode and the wordcode-walking dispatch in
3108// src/vm_helper.
3109// ============================================================
3110
3111/// Port of `ecrawstr(Eprog p, Wordcode pc, int *tokflag)` from
3112/// `Src/parse.c:2891`. Like `ecgetstr` but reads at the given pc
3113/// without advancing — caller steps `pc` separately.
3114pub fn ecrawstr(p: &eprog, pc: usize, tokflag: Option<&mut i32>) -> String {
3115    if pc >= p.prog.len() {
3116        return String::new();
3117    }
3118    let c = p.prog[pc]; // c:2894
3119    if let Some(tf) = tokflag {
3120        *tf = i32::from((c & 1) != 0); // c:2898/2906/2912
3121    }
3122    if c == 6 || c == 7 {
3123        // c:2897
3124        return String::new();
3125    }
3126    if (c & 2) != 0 {
3127        // c:2902-2906 — same 3-byte inline string as ecgetstr, then
3128        // `buf[3] = '\0'; return dupstring(buf);` — truncate at first
3129        // NUL via strlen (NOT splice out interior NULs).
3130        let b0 = ((c >> 3) & 0xff) as u8;
3131        let b1 = ((c >> 11) & 0xff) as u8;
3132        let b2 = ((c >> 19) & 0xff) as u8;
3133        let v = [b0, b1, b2];
3134        let end = v.iter().position(|&x| x == 0).unwrap_or(v.len()); // c:2906 strlen(buf)
3135        String::from_utf8_lossy(&v[..end]).into_owned()
3136    } else {
3137        // c:2911
3138        let off = (c >> 2) as usize;
3139        let strs_bytes = p.strs.as_deref().unwrap_or("").as_bytes();
3140        if off >= strs_bytes.len() {
3141            return String::new();
3142        }
3143        let tail = &strs_bytes[off..];
3144        let end = tail.iter().position(|&b| b == 0).unwrap_or(tail.len());
3145        String::from_utf8_lossy(&tail[..end]).into_owned()
3146    }
3147}
3148
3149/// Port of `ecgetarr(Estate s, int num, int dup, int *tokflag)` from
3150/// `Src/parse.c:2917`. Reads `num` strings from wordcode at `s->pc`
3151/// and OR-folds each entry's token flag into `*tokflag`.
3152pub fn ecgetarr(s: &mut estate, num: usize, dup: i32, tokflag: Option<&mut i32>) -> Vec<String> {
3153    let mut ret: Vec<String> = Vec::with_capacity(num); // c:2922
3154    let mut tf: i32 = 0;
3155    for _ in 0..num {
3156        // c:2924 `while (num--)`
3157        let mut tmp = 0;
3158        ret.push(ecgetstr(s, dup, Some(&mut tmp))); // c:2925
3159        tf |= tmp; // c:2926
3160    }
3161    if let Some(out) = tokflag {
3162        // c:2929
3163        *out = tf;
3164    }
3165    ret
3166}
3167
3168/// Port of `ecgetlist(Estate s, int num, int dup, int *tokflag)` from
3169/// `Src/parse.c:2937`. Same shape as `ecgetarr` but C returns
3170/// `LinkList`; zshrs uses `Vec<String>` for both.
3171pub fn ecgetlist(s: &mut estate, num: usize, dup: i32, tokflag: Option<&mut i32>) -> Vec<String> {
3172    if num == 0 {
3173        // c:2949-2952
3174        if let Some(tf) = tokflag {
3175            *tf = 0;
3176        }
3177        return Vec::new();
3178    }
3179    ecgetarr(s, num, dup, tokflag)
3180}
3181
3182/// Port of `ecgetredirs(Estate s)` from `Src/parse.c:2959`.
3183///
3184/// `strs` must be the same tail `ecgetstr` uses (`s->strs` / `estate.strs` from offset).
3185/// WARNING: param names don't match C — Rust=(prog, strs, pc) vs C=(s)
3186pub fn ecgetredirs(s: &mut estate) -> Vec<redir> {
3187    let mut ret: Vec<redir> = Vec::new(); // c:2959 `LinkList ret = newlinklist();`
3188    let prog_len = s.prog.prog.len();
3189    if s.pc >= prog_len {
3190        return ret;
3191    }
3192    let mut code = s.prog.prog[s.pc]; // c:2962 `wordcode code = *s->pc++;`
3193    s.pc += 1;
3194
3195    loop {
3196        if wc_code(code) != WC_REDIR {
3197            // c:2988-2989 `s->pc--` then break from while
3198            s.pc = s.pc.saturating_sub(1);
3199            break;
3200        }
3201
3202        let typ = WC_REDIR_TYPE(code); // c:2967 `r->type = WC_REDIR_TYPE(code);`
3203        if s.pc >= prog_len {
3204            break;
3205        }
3206        let fd1_w = s.prog.prog[s.pc]; // c:2968 `r->fd1 = *s->pc++;`
3207        s.pc += 1;
3208
3209        let name = ecgetstr(s, EC_DUP, None); // c:2969 `r->name = ecgetstr(...)`
3210
3211        let (flags, here_terminator, munged_here_terminator) = if WC_REDIR_FROM_HEREDOC(code) != 0 {
3212            // c:2970-2973
3213            let term = ecgetstr(s, EC_DUP, None);
3214            let munged = ecgetstr(s, EC_DUP, None);
3215            (REDIRF_FROM_HEREDOC, Some(term), Some(munged))
3216        } else {
3217            // c:2974-2977
3218            (0, None, None)
3219        };
3220
3221        let varid = if WC_REDIR_VARID(code) != 0 {
3222            // c:2979-2980
3223            Some(ecgetstr(s, EC_DUP, None))
3224        } else {
3225            None // c:2981-2982
3226        };
3227
3228        ret.push(redir {
3229            // c:2965-2982 fields + c:2984 `addlinknode`
3230            typ,
3231            flags,
3232            fd1: fd1_w as i32,
3233            fd2: 0,
3234            name: Some(name),
3235            varid,
3236            here_terminator,
3237            munged_here_terminator,
3238        });
3239
3240        if s.pc >= prog_len {
3241            break;
3242        }
3243        code = s.prog.prog[s.pc]; // c:2986 `code = *s->pc++;`
3244        s.pc += 1;
3245    }
3246
3247    ret // c:2990 `return ret`
3248}
3249
3250/// Port of `eccopyredirs(Estate s)` from `Src/parse.c:3003`. Reads
3251/// the WC_REDIR run at `s->pc`, counts the wordcodes needed,
3252/// reserves space in `ecbuf` via `ecispace`, then re-walks `s->pc`
3253/// re-emitting each redir's wordcodes into the reserved slot —
3254/// finally calls `bld_eprog(0)` to package the result as an Eprog.
3255pub fn eccopyredirs(s: &mut estate) -> Option<eprog> {
3256    let prog_len = s.prog.prog.len();
3257    if s.pc >= prog_len {
3258        return None;
3259    }
3260    // c:3007-3009 — `if (wc_code(*pc) != WC_REDIR) return NULL;`
3261    let first_code = s.prog.prog[s.pc];
3262    if wc_code(first_code) != WC_REDIR {
3263        return None;
3264    }
3265    // c:3011 — `init_parse();`
3266    init_parse();
3267
3268    // c:3013-3027 — count wordcodes the redir run will need.
3269    // Each WC_REDIR contributes `code + fd1 + name` = 3, plus
3270    // `+2` if WC_REDIR_FROM_HEREDOC (terminator + munged), plus
3271    // `+1` if WC_REDIR_VARID.
3272    let mut probe = s.pc;
3273    let mut ncodes = 0usize;
3274    loop {
3275        if probe >= prog_len {
3276            break;
3277        }
3278        let code = s.prog.prog[probe];
3279        if wc_code(code) != WC_REDIR {
3280            break;
3281        }
3282        let mut ncode = if WC_REDIR_FROM_HEREDOC(code) != 0 {
3283            5
3284        } else {
3285            3
3286        };
3287        if WC_REDIR_VARID(code) != 0 {
3288            ncode += 1;
3289        }
3290        probe += ncode;
3291        ncodes += ncode;
3292    }
3293
3294    // c:3028-3029 — `r = ecused; ecispace(r, ncodes);`
3295    let r0 = ECUSED.get() as usize;
3296    ecispace(r0, ncodes);
3297
3298    // c:3031-3053 — re-walk `s->pc` and write into ecbuf[r..].
3299    let mut r = r0;
3300    loop {
3301        if s.pc >= prog_len {
3302            break;
3303        }
3304        let code = s.prog.prog[s.pc];
3305        if wc_code(code) != WC_REDIR {
3306            break;
3307        }
3308        s.pc += 1;
3309        // c:3036 — `ecbuf[r++] = code;`
3310        ECBUF.with_borrow_mut(|buf| {
3311            if r >= buf.len() {
3312                buf.resize(r + 1, 0);
3313            }
3314            buf[r] = code;
3315        });
3316        r += 1;
3317        // c:3038 — `ecbuf[r++] = *s->pc++;` (the fd1 word)
3318        let fd1 = s.prog.prog[s.pc];
3319        s.pc += 1;
3320        ECBUF.with_borrow_mut(|buf| {
3321            if r >= buf.len() {
3322                buf.resize(r + 1, 0);
3323            }
3324            buf[r] = fd1;
3325        });
3326        r += 1;
3327        // c:3041 — `ecbuf[r++] = ecstrcode(ecgetstr(s, EC_NODUP, NULL));`
3328        let name = ecgetstr(s, EC_NODUP, None);
3329        let nc = ecstrcode(&name);
3330        ECBUF.with_borrow_mut(|buf| {
3331            if r >= buf.len() {
3332                buf.resize(r + 1, 0);
3333            }
3334            buf[r] = nc;
3335        });
3336        r += 1;
3337        // c:3042-3047 — heredoc terminators.
3338        if WC_REDIR_FROM_HEREDOC(code) != 0 {
3339            let term = ecgetstr(s, EC_NODUP, None);
3340            let tc = ecstrcode(&term);
3341            ECBUF.with_borrow_mut(|buf| {
3342                if r >= buf.len() {
3343                    buf.resize(r + 1, 0);
3344                }
3345                buf[r] = tc;
3346            });
3347            r += 1;
3348            let munged = ecgetstr(s, EC_NODUP, None);
3349            let mc = ecstrcode(&munged);
3350            ECBUF.with_borrow_mut(|buf| {
3351                if r >= buf.len() {
3352                    buf.resize(r + 1, 0);
3353                }
3354                buf[r] = mc;
3355            });
3356            r += 1;
3357        }
3358        // c:3048-3049 — varid.
3359        if WC_REDIR_VARID(code) != 0 {
3360            let varid = ecgetstr(s, EC_NODUP, None);
3361            let vc = ecstrcode(&varid);
3362            ECBUF.with_borrow_mut(|buf| {
3363                if r >= buf.len() {
3364                    buf.resize(r + 1, 0);
3365                }
3366                buf[r] = vc;
3367            });
3368            r += 1;
3369        }
3370    }
3371
3372    // c:3056 — `return bld_eprog(0);` — `bld_eprog` appends the
3373    // WC_END marker and packages ECBUF/ECSTRS into an Eprog.
3374    Some(bld_eprog(false))
3375}
3376
3377/// Port of `init_eprog(void)` from `Src/parse.c:3069`. Sets up
3378/// `dummy_eprog_code = WCB_END(); dummy_eprog.len = sizeof(wordcode);
3379/// dummy_eprog.prog = &dummy_eprog_code; dummy_eprog.strs = NULL;`.
3380/// Called once at shell startup (init_main → init_misc → init_eprog).
3381pub fn init_eprog() {
3382    let mut d = DUMMY_EPROG.lock().unwrap();
3383    d.prog = vec![WCB_END()]; // c:3071/3073
3384    d.len = size_of::<wordcode>() as i32; // c:3072
3385    d.strs = None; // c:3074
3386    d.flags = 0;
3387    d.npats = 0;
3388    d.nref = 0;
3389}
3390
3391// =====================================================================
3392// `bin_zcompile` and wordcode-dump helpers — port of `Src/parse.c:3104+`.
3393//
3394// The wordcode dump format (`.zwc`) is a serialized parse tree zsh can
3395// `mmap()` and dispatch from without re-parsing on every shell start.
3396// File layout (one struct = `FD_PRELEN` `u32`s):
3397//   - `pre[0]` = magic word (FD_MAGIC native byte-order, FD_OMAGIC
3398//     opposite byte-order).
3399//   - `pre[1]` = packed `{flags(8) | other_offset(24)}` byte field.
3400//   - `pre[2..12]` = `ZSH_VERSION` C-string padded to 40 bytes.
3401//   - `pre[12]` = `fdheaderlen` (total prelude+header word count).
3402//   - Then a sequence of `struct fdhead` records, one per function,
3403//     each followed by its NUL-terminated name (padded to 4-byte).
3404//   - Then the wordcode bytes for every function back-to-back.
3405//
3406// On a little-endian host writing a dump twice: first `FD_MAGIC` for
3407// native readers, then re-walks the body byte-swapped and emits a
3408// second `FD_OMAGIC` copy so big-endian readers can mmap it too.
3409// =====================================================================
3410
3411// File-format constants — port of `Src/parse.c:3104-3150`.
3412
3413/// `#define FD_EXT ".zwc"` from `Src/parse.c:3104`.
3414pub const FD_EXT: &str = ".zwc";
3415
3416/// `#define FD_MINMAP 4096` from `Src/parse.c:3105`. mmap threshold
3417/// — `-M` mode only kicks in when the wordcode body is at least
3418/// this many bytes (otherwise read(2) is preferred).
3419pub const FD_MINMAP: usize = 4096;
3420
3421/// `#define FD_PRELEN 12` from `Src/parse.c:3107`. File-header
3422/// length in u32 words: magic + packed-flags-byte + 10 version words.
3423pub const FD_PRELEN: usize = 12;
3424
3425/// `#define FD_MAGIC 0x04050607` from `Src/parse.c:3108`. Sentinel
3426/// for native-byte-order dumps.
3427pub const FD_MAGIC: u32 = 0x04050607;
3428
3429/// `#define FD_OMAGIC 0x07060504` from `Src/parse.c:3109`. Sentinel
3430/// for opposite-byte-order dumps (byte-swapped FD_MAGIC).
3431pub const FD_OMAGIC: u32 = 0x07060504;
3432
3433/// `#define FDF_MAP 1` from `Src/parse.c:3111`. Bit set when the
3434/// dump should be `mmap()`-ed (`-M` flag) vs read normally (`-R`).
3435pub const FDF_MAP: u32 = 1;
3436
3437/// `#define FDF_OTHER 2` from `Src/parse.c:3112`. Bit indicating
3438/// this dump has an opposite-byte-order copy at `fdother(f)`.
3439pub const FDF_OTHER: u32 = 2;
3440
3441/// Port of `struct fdhead` from `Src/parse.c:3116`. One per function
3442/// inside a wordcode dump. All fields are `wordcode` (u32).
3443#[allow(non_camel_case_types)]
3444#[derive(Debug, Clone, Copy)]
3445pub struct fdhead {
3446    /// Offset (in u32 words) to the start of this function's
3447    /// wordcode body inside the dump.
3448    pub start: u32, // c:3117
3449    /// Wordcode-byte length of the body (excludes pattern-prog slots).
3450    pub len: u32, // c:3118
3451    /// Number of compiled patterns the body references.
3452    pub npats: u32, // c:3119
3453    /// Offset of the string table inside `prog->prog`.
3454    pub strs: u32, // c:3120
3455    /// Header-record length in u32 words (record + name).
3456    pub hlen: u32, // c:3121
3457    /// Packed `{ kshload_bits(2) | name_tail_offset(30) }` field.
3458    pub flags: u32, // c:3122
3459}
3460
3461/// `#define FDHF_KSHLOAD 1` from `Src/parse.c:3149`. Function-header
3462/// flag word — `-k` ksh-style autoload marker.
3463pub const FDHF_KSHLOAD: u32 = 1;
3464
3465/// `#define FDHF_ZSHLOAD 2` from `Src/parse.c:3150`. `-z` zsh-style
3466/// autoload marker.
3467pub const FDHF_ZSHLOAD: u32 = 2;
3468
3469/// Port of `struct wcfunc` from `Src/parse.c:3158`. Build-time
3470/// per-function aggregate before write_dump emits it. The Rust
3471/// port stores the source-text body inline since the C-side
3472/// `Eprog` ↔ `parse_string` chain isn't fully wired through this
3473/// layer yet (`build_dump` falls back to source-text caching).
3474#[allow(non_camel_case_types)]
3475#[derive(Debug, Clone)]
3476pub struct wcfunc {
3477    pub name: String, // c:3159
3478    pub flags: u32,   // c:3161
3479    /// Compiled body wordcode (one `u32` array per fn). Empty until
3480    /// the eprog emit-side lands; `write_dump` then walks each entry.
3481    pub body: Vec<u32>,
3482}
3483
3484/// Port of `dump_find_func(Wordcode h, char *name)` from
3485/// `Src/parse.c:3167`. Walks the header table inside a loaded
3486/// dump for a function with the given basename; returns true on hit.
3487pub fn dump_find_func(h: &[u32], name: &str) -> bool {
3488    // c:3167
3489    let header_words = fdheaderlen(h) as usize;
3490    let end = header_words; // walking u32 offsets, end-exclusive
3491    let mut cur = firstfdhead_offset();
3492    while cur < end {
3493        if let Some(fh) = read_fdhead(h, cur) {
3494            let full = fdname(h, cur);
3495            let tail = fdhtail(&fh) as usize;
3496            let basename = if tail <= full.len() {
3497                &full[tail..]
3498            } else {
3499                ""
3500            };
3501            if basename == name {
3502                return true;
3503            }
3504            cur = nextfdhead_offset(h, cur);
3505        } else {
3506            break;
3507        }
3508    }
3509    false
3510}
3511
3512/// Port of `bin_zcompile(char *nam, char **args, Options ops, UNUSED(int func))`
3513/// from `Src/parse.c:3180`. Validates the option set, then dispatches
3514/// to one of: `-t` (test/list), `-c`/`-a` (dump current functions),
3515/// or the default (compile source files to `.zwc`).
3516pub fn bin_zcompile(
3517    nam: &str, // c:3180
3518    args: &[String],
3519    ops: &crate::ported::zsh_h::options,
3520    _func: i32,
3521) -> i32 {
3522    // c:3185-3192 — illegal-combination guard.
3523    if (OPT_ISSET(ops, b'k') && OPT_ISSET(ops, b'z'))
3524        || (OPT_ISSET(ops, b'R') && OPT_ISSET(ops, b'M'))
3525        || (OPT_ISSET(ops, b'c')
3526            && (OPT_ISSET(ops, b'U') || OPT_ISSET(ops, b'k') || OPT_ISSET(ops, b'z')))
3527        || (!(OPT_ISSET(ops, b'c') || OPT_ISSET(ops, b'a')) && OPT_ISSET(ops, b'm'))
3528    {
3529        zwarnnam(nam, "illegal combination of options"); // c:3192
3530        return 1;
3531    }
3532
3533    // c:3194 — `-c`/`-a` + KSHAUTOLOAD warning.
3534    if (OPT_ISSET(ops, b'c') || OPT_ISSET(ops, b'a')) && isset(crate::ported::zsh_h::KSHAUTOLOAD) {
3535        zwarnnam(nam, "functions will use zsh style autoloading"); // c:3195
3536    }
3537
3538    // c:3196-3197 — flag word from `-k` / `-z`.
3539    let flags: u32 = if OPT_ISSET(ops, b'k') {
3540        FDHF_KSHLOAD
3541    } else if OPT_ISSET(ops, b'z') {
3542        FDHF_ZSHLOAD
3543    } else {
3544        0
3545    };
3546
3547    // c:3199 — `-t` test/list mode.
3548    if OPT_ISSET(ops, b't') {
3549        // c:3199
3550        if args.is_empty() {
3551            zwarnnam(nam, "too few arguments"); // c:3202
3552            return 1;
3553        }
3554        let dump_name = if args[0].ends_with(FD_EXT) {
3555            args[0].clone()
3556        } else {
3557            format!("{}{}", args[0], FD_EXT)
3558        };
3559        let f = match load_dump_header(nam, &dump_name, 1) {
3560            // c:3206
3561            Some(buf) => buf,
3562            None => return 1,
3563        };
3564        // c:3209 — per-function check.
3565        if args.len() > 1 {
3566            for name in &args[1..] {
3567                // c:3210
3568                if !dump_find_func(&f, name) {
3569                    // c:3212
3570                    return 1;
3571                }
3572            }
3573            return 0;
3574        }
3575        // c:3215-3221 — listing arm. Walk every fdhead, print
3576        // each function's full name. C uses `fdname(h)` which
3577        // includes the path prefix; matches our `fdname()` impl.
3578        let mapped = if (fdflags(&f) & FDF_MAP) != 0 {
3579            "mapped"
3580        } else {
3581            "read"
3582        };
3583        println!("zwc file ({}) for zsh-{}", mapped, fdversion(&f));
3584        let header_words = fdheaderlen(&f) as usize;
3585        let mut cur = firstfdhead_offset();
3586        while cur < header_words {
3587            if read_fdhead(&f, cur).is_none() {
3588                break;
3589            }
3590            println!("{}", fdname(&f, cur));
3591            cur = nextfdhead_offset(&f, cur);
3592        }
3593        return 0;
3594    }
3595
3596    if args.is_empty() {
3597        zwarnnam(nam, "too few arguments"); // c:3226
3598        return 1;
3599    }
3600
3601    // c:3228 — map mode discriminant.
3602    let map: i32 = if OPT_ISSET(ops, b'M') {
3603        2
3604    } else if OPT_ISSET(ops, b'R') {
3605        0
3606    } else {
3607        1
3608    };
3609
3610    // c:3230-3236 — single-file default-mode short path.
3611    if args.len() == 1 && !(OPT_ISSET(ops, b'c') || OPT_ISSET(ops, b'a')) {
3612        let dump = format!("{}{}", args[0], FD_EXT);
3613        return build_dump(nam, &dump, args, OPT_ISSET(ops, b'U') as i32, map, flags);
3614    }
3615
3616    // c:3239-3247 — multi-file or `-c`/`-a` mode.
3617    let dump = if args[0].ends_with(FD_EXT) {
3618        args[0].clone()
3619    } else {
3620        format!("{}{}", args[0], FD_EXT)
3621    };
3622    let rest = &args[1..];
3623    if OPT_ISSET(ops, b'c') || OPT_ISSET(ops, b'a') {
3624        let what =
3625            (if OPT_ISSET(ops, b'c') { 1 } else { 0 }) | (if OPT_ISSET(ops, b'a') { 2 } else { 0 });
3626        build_cur_dump(nam, &dump, rest, OPT_ISSET(ops, b'm') as i32, map, what)
3627    } else {
3628        build_dump(nam, &dump, rest, OPT_ISSET(ops, b'U') as i32, map, flags)
3629    }
3630}
3631
3632/// Port of `load_dump_header(char *nam, char *name, int err)` from
3633/// `Src/parse.c:3258`. Opens the file, reads + validates the magic
3634/// and version, then slurps the full header table into memory.
3635/// Returns the header u32-array on success or None on any failure
3636/// (emitting C-shaped warnings when `err != 0`).
3637pub fn load_dump_header(nam: &str, name: &str, err: i32) -> Option<Vec<u32>> {
3638    // c:3258
3639
3640    let mut f = match File::open(name) {
3641        // c:3263
3642        Ok(h) => h,
3643        Err(_) => {
3644            if err != 0 {
3645                zwarnnam(nam, &format!("can't open zwc file: {}", name)); // c:3265
3646            }
3647            return None;
3648        }
3649    };
3650
3651    // Read FD_PRELEN+1 u32 words = 52 bytes.
3652    let mut buf_bytes = vec![0u8; (FD_PRELEN + 1) * 4];
3653    if f.read_exact(&mut buf_bytes).is_err() {
3654        if err != 0 {
3655            zwarnnam(nam, &format!("invalid zwc file: {}", name)); // c:3277
3656        }
3657        return None;
3658    }
3659    let mut buf: Vec<u32> = buf_bytes
3660        .chunks_exact(4)
3661        .map(|c| u32::from_le_bytes([c[0], c[1], c[2], c[3]]))
3662        .collect();
3663
3664    // c:3270 — magic + version check. `ZSH_VERSION` (the C-side
3665    // global) — zshrs reports "5.9" in `--zsh` mode (Src/init.c parity).
3666    let magic_ok = fdmagic(&buf) == FD_MAGIC || fdmagic(&buf) == FD_OMAGIC;
3667    let v_ok = fdversion(&buf) == "5.9";
3668    if !magic_ok {
3669        if err != 0 {
3670            zwarnnam(nam, &format!("invalid zwc file: {}", name)); // c:3277
3671        }
3672        return None;
3673    }
3674    if !v_ok {
3675        if err != 0 {
3676            zwarnnam(
3677                nam,
3678                &format!(
3679                    "zwc file has wrong version (zsh-{}): {}", // c:3274
3680                    fdversion(&buf),
3681                    name
3682                ),
3683            );
3684        }
3685        return None;
3686    }
3687
3688    // c:3285 — if magic matches host byte order, head len is `pre[FD_PRELEN]`.
3689    // Else seek to `fdother(buf)` and re-read.
3690    if fdmagic(&buf) != FD_MAGIC {
3691        let other = fdother(&buf) as u64; // c:3290
3692        if f.seek(SeekFrom::Start(other)).is_err() || f.read_exact(&mut buf_bytes).is_err() {
3693            zwarnnam(nam, &format!("invalid zwc file: {}", name)); // c:3295
3694            return None;
3695        }
3696        buf = buf_bytes
3697            .chunks_exact(4)
3698            .map(|c| u32::from_le_bytes([c[0], c[1], c[2], c[3]]))
3699            .collect();
3700    }
3701
3702    let total_words = fdheaderlen(&buf) as usize; // c:3286/3299
3703    if total_words < FD_PRELEN + 1 {
3704        zwarnnam(nam, &format!("invalid zwc file: {}", name));
3705        return None;
3706    }
3707
3708    // Read the remaining header words.
3709    let mut head: Vec<u32> = Vec::with_capacity(total_words);
3710    head.extend_from_slice(&buf);
3711    let remaining_words = total_words - (FD_PRELEN + 1);
3712    if remaining_words > 0 {
3713        let mut rest_bytes = vec![0u8; remaining_words * 4]; // c:3305
3714        if f.read_exact(&mut rest_bytes).is_err() {
3715            zwarnnam(nam, &format!("invalid zwc file: {}", name)); // c:3307
3716            return None;
3717        }
3718        for c in rest_bytes.chunks_exact(4) {
3719            head.push(u32::from_le_bytes([c[0], c[1], c[2], c[3]]));
3720        }
3721    }
3722    Some(head) // c:3311
3723}
3724
3725/// Port of `fdswap(Wordcode p, int n)` from `Src/parse.c:3318`.
3726/// Byte-swap each u32 in `p[..n]` in place. Used when writing the
3727/// opposite-byte-order copy of a wordcode dump.
3728pub fn fdswap(p: &mut [u32]) {
3729    // c:3318
3730    for w in p.iter_mut() {
3731        *w = w.swap_bytes();
3732    }
3733}
3734
3735/// Port of `write_dump(int dfd, LinkList progs, int map, int hlen, int tlen)`
3736/// from `Src/parse.c:3334`. Writes the prelude + header records +
3737/// body wordcode bytes to the dump file descriptor.
3738///
3739/// Two passes: first native-byte-order (`FD_MAGIC`), then opposite-
3740/// byte-order (`FD_OMAGIC`) so big-endian readers can mmap the
3741/// same file. Bodies are byte-swapped via `fdswap` on the second pass.
3742pub fn write_dump(
3743    dfd: &mut File, // c:3334
3744    progs: &[wcfunc],
3745    mut map: i32,
3746    hlen: i32,
3747    tlen: i32,
3748) -> std::io::Result<()> {
3749    if map == 1 && (tlen as usize) >= FD_MINMAP {
3750        // c:3344
3751        map = 1;
3752    } else if map == 1 {
3753        map = 0;
3754    }
3755
3756    let mut other = 0u32; // c:3338
3757    let ohlen = hlen;
3758    let mut cur_hlen = hlen;
3759
3760    loop {
3761        cur_hlen = ohlen;
3762        // c:3347 — build the prelude.
3763        let mut pre = vec![0u32; FD_PRELEN];
3764        pre[0] = if other != 0 { FD_OMAGIC } else { FD_MAGIC }; // c:3350
3765        let flags = (if map != 0 { FDF_MAP } else { 0 }) | other;
3766        fdsetflags(&mut pre, flags as u8); // c:3351
3767        fdsetother(&mut pre, tlen as u32); // c:3352
3768                                           // c:3353 — copy ZSH_VERSION C-string into pre[2..].
3769        let ver = b"5.9";
3770        for (i, &b) in ver.iter().enumerate() {
3771            let word = 2 + i / 4;
3772            let shift = (i % 4) * 8;
3773            pre[word] |= (b as u32) << shift;
3774        }
3775        // Write prelude.
3776        for w in &pre {
3777            dfd.write_all(&w.to_le_bytes())?;
3778        }
3779        // c:3356 — per-fn header records.
3780        for wcf in progs {
3781            let n = &wcf.name;
3782            let prog = &wcf.body;
3783            let mut head = fdhead {
3784                start: cur_hlen as u32,                                     // c:3360
3785                len: (prog.len() * 4) as u32,                               // c:3363
3786                npats: 0, // c:3364 (npats not tracked yet)
3787                strs: 0,  // c:3365
3788                hlen: ((FDHEAD_WORDS as u32) + ((n.len() as u32 + 4) / 4)), // c:3366
3789                flags: 0,
3790            };
3791            cur_hlen += prog.len() as i32; // c:3361
3792                                           // c:3368 — name tail offset from path basename.
3793            let tail = n.rfind('/').map(|p| p + 1).unwrap_or(0);
3794            head.flags = fdhbldflags(wcf.flags, tail as u32); // c:3372
3795                                                              // c:3373 — opposite-byte-order swap on second pass.
3796            let mut head_words: Vec<u32> = vec![
3797                head.start, head.len, head.npats, head.strs, head.hlen, head.flags,
3798            ];
3799            if other != 0 {
3800                fdswap(&mut head_words);
3801            }
3802            for w in &head_words {
3803                dfd.write_all(&w.to_le_bytes())?;
3804            }
3805            // c:3376 — write the name + NUL + pad-to-4.
3806            dfd.write_all(n.as_bytes())?;
3807            dfd.write_all(&[0u8])?;
3808            let pad = (4 - ((n.len() + 1) & 3)) & 3;
3809            if pad > 0 {
3810                dfd.write_all(&vec![0u8; pad])?;
3811            }
3812        }
3813        // c:3381 — per-fn body words.
3814        for wcf in progs {
3815            let mut body = wcf.body.clone();
3816            if other != 0 {
3817                fdswap(&mut body);
3818            }
3819            for w in &body {
3820                dfd.write_all(&w.to_le_bytes())?;
3821            }
3822        }
3823        if other != 0 {
3824            // c:3389
3825            break;
3826        }
3827        other = FDF_OTHER; // c:3391
3828    }
3829    Ok(())
3830}
3831
3832/// Port of `build_dump(char *nam, char *dump, char **files, int ali, int map, int flags)`
3833/// from `Src/parse.c:3397`. Source-file → wordcode dump compiler.
3834///
3835/// Status: scaffolded but the wordcode-emit step depends on
3836/// `parse_string` returning a fully-wired `Eprog` with `prog/strs/
3837/// npats` fields populated. The current `parse_string`/`parse` shape
3838/// emits an AST (`ZshProgram`) but not yet the wordcode array C
3839/// expects in this dump format. Until that lands, this returns 1
3840/// with a clear "wordcode emit not yet ported" message so callers
3841/// (autoload from `.zwc`, `zcompile path/to/file`) fail loud.
3842pub fn build_dump(
3843    nam: &str, // c:3397
3844    dump: &str,
3845    _files: &[String],
3846    _ali: i32,
3847    _map: i32,
3848    _flags: u32,
3849) -> i32 {
3850    zwarnnam(nam, &format!("{}: wordcode dump emit not yet ported", dump));
3851    1
3852}
3853
3854/// Port of `cur_add_func(char *nam, Shfunc shf, LinkList names, LinkList progs, int *hlen, int *tlen, int what)`
3855/// from `Src/parse.c:3489`. Adds a shfunc to the in-build dump
3856/// progs+names lists. Stub: `Eprog` for the function body isn't
3857/// yet wired through `shfunc.funcdef` to be serializable here.
3858pub fn cur_add_func(
3859    nam: &str, // c:3489
3860    shf_name: &str,
3861    shf_flags: i32,
3862    names: &mut Vec<String>,
3863    progs: &mut Vec<wcfunc>,
3864    hlen: &mut i32,
3865    tlen: &mut i32,
3866    what: i32,
3867) -> i32 {
3868    let is_undef = (shf_flags as u32 & PM_UNDEFINED) != 0;
3869    if is_undef {
3870        if (what & 2) == 0 {
3871            // c:3498
3872            zwarnnam(nam, &format!("function is not loaded: {}", shf_name));
3873            return 1;
3874        }
3875        // c:3503 — would call `getfpfunc` to load body for dump.
3876        zwarnnam(nam, &format!("can't load function: {}", shf_name));
3877        return 1;
3878    } else if (what & 1) == 0 {
3879        zwarnnam(nam, &format!("function is already loaded: {}", shf_name)); // c:3514
3880        return 1;
3881    }
3882    // c:3517 — would `dupeprog(shf->funcdef)`. Stub: empty body.
3883    let wcf = wcfunc {
3884        name: shf_name.to_string(),
3885        flags: FDHF_ZSHLOAD,
3886        body: Vec::new(),
3887    };
3888    progs.push(wcf);
3889    names.push(shf_name.to_string());
3890
3891    // c:3526 — bump hlen / tlen.
3892    let name_words = (shf_name.len() as i32 + 4) / 4;
3893    *hlen += (FDHEAD_WORDS as i32) + name_words;
3894    *tlen += 0; // body is empty in stub; real path adds prog->len in words.
3895
3896    0
3897}
3898
3899/// Port of `build_cur_dump(char *nam, char *dump, char **names, int match, int map, int what)`
3900/// from `Src/parse.c:3536`. Compiles currently-loaded functions
3901/// (`-c` for functions, `-a` for aliases) into a `.zwc` dump.
3902/// Same wordcode-emit dependency as `build_dump`.
3903pub fn build_cur_dump(
3904    nam: &str, // c:3536
3905    dump: &str,
3906    _names: &[String],
3907    _match_: i32,
3908    _map: i32,
3909    _what: i32,
3910) -> i32 {
3911    zwarnnam(
3912        nam,
3913        &format!("{}: wordcode dump-current emit not yet ported", dump),
3914    );
3915    1
3916}
3917
3918/// Port of `zwcstat(char *filename, struct stat *buf)` from
3919/// `Src/parse.c:3656`. Stats a `.zwc` file, falling back to
3920/// `.zwc.old` if the primary doesn't exist (zsh uses the `.old`
3921/// suffix to keep a previous dump readable while a rewrite is in
3922/// progress).
3923pub fn zwcstat(filename: &str) -> Option<fs::Metadata> {
3924    // c:3656
3925    if let Ok(m) = fs::metadata(filename) {
3926        return Some(m);
3927    }
3928    let old = format!("{}.old", filename);
3929    fs::metadata(&old).ok()
3930}
3931
3932/// Port of `load_dump_file(char *dump, struct stat *sbuf, int other, int len)`
3933/// from `Src/parse.c:3675`. Reads (or mmap()'s) a complete `.zwc`
3934/// file into memory. Returns the u32 buffer or None on I/O error.
3935pub fn load_dump_file(
3936    dump: &str, // c:3675
3937    _sbuf: &fs::Metadata,
3938    other: i32,
3939    _len: usize,
3940) -> Option<Vec<u32>> {
3941    let mut f = File::open(dump).ok()?;
3942    if other != 0 {
3943        f.seek(SeekFrom::Start(other as u64)).ok()?;
3944    }
3945    let mut bytes = Vec::new();
3946    f.read_to_end(&mut bytes).ok()?;
3947    Some(
3948        bytes
3949            .chunks_exact(4)
3950            .map(|c| u32::from_le_bytes([c[0], c[1], c[2], c[3]]))
3951            .collect(),
3952    )
3953}
3954
3955/// Port of `try_dump_file(char *path, char *name, char *file, int *ksh, int test_only)`
3956/// from `Src/parse.c:3746`. Tries to load function `name` from a
3957/// `.zwc` digest (`<path>.zwc`) or per-function compiled file
3958/// (`<file>.zwc`) when each is newer than its uncompiled source.
3959pub fn try_dump_file(
3960    path: &str,
3961    name: &str,
3962    file: &str, // c:3746
3963    test_only: bool,
3964) -> Option<(Vec<u32>, bool)> {
3965    use std::fs;
3966
3967    // c:3753-3758 — if path ends in .zwc, treat as direct digest.
3968    if path.ends_with(FD_EXT) {
3969        crate::ported::signals::queue_signals();
3970        let result = fs::metadata(path)
3971            .ok()
3972            .and_then(|m| check_dump_file(path, &m, name, test_only));
3973        unqueue_signals();
3974        return result;
3975    }
3976
3977    // c:3759-3760 — dig = "<path>.zwc", wc = "<file>.zwc".
3978    let dig = format!("{}{}", path, FD_EXT);
3979    let wc = format!("{}{}", file, FD_EXT);
3980
3981    // c:3762-3764 — zwcstat(dig, &std); stat(wc, &stc); stat(file, &stn);
3982    let std_meta = fs::metadata(&dig);
3983    let stc_meta = fs::metadata(&wc);
3984    let stn_meta = fs::metadata(file);
3985
3986    crate::ported::signals::queue_signals();
3987
3988    // c:3771-3777 — try digest if newer than (or in absence of) wc/file.
3989    if let Ok(std_m) = &std_meta {
3990        let dig_mtime = std_m.modified().ok();
3991        let wc_newer_or_missing = match &stc_meta {
3992            Err(_) => true,
3993            Ok(c) => dig_mtime >= c.modified().ok(),
3994        };
3995        let src_newer_or_missing = match &stn_meta {
3996            Err(_) => true,
3997            Ok(n) => dig_mtime >= n.modified().ok(),
3998        };
3999        if wc_newer_or_missing && src_newer_or_missing {
4000            if let Some(prog) = check_dump_file(&dig, std_m, name, test_only) {
4001                unqueue_signals();
4002                return Some(prog);
4003            }
4004        }
4005    }
4006
4007    // c:3779-3784 — try per-function .zwc if newer than (or in absence of) source.
4008    if let Ok(stc_m) = &stc_meta {
4009        let wc_mtime = stc_m.modified().ok();
4010        let src_newer_or_missing = match &stn_meta {
4011            Err(_) => true,
4012            Ok(n) => wc_mtime >= n.modified().ok(),
4013        };
4014        if src_newer_or_missing {
4015            if let Some(prog) = check_dump_file(&wc, stc_m, name, test_only) {
4016                unqueue_signals();
4017                return Some(prog);
4018            }
4019        }
4020    }
4021
4022    unqueue_signals(); // c:3787
4023    None // c:3788
4024}
4025
4026/// Port of `try_source_file(char *file)` from `Src/parse.c:3795`.
4027/// Returns an Eprog (the wordcode dump body) if `<file>.zwc` exists
4028/// and is newer than `<file>`, else None.
4029pub fn try_source_file(file: &str) -> Option<String> {
4030    // c:3795
4031
4032    // c:3802-3805 — if ((tail = strrchr(file, '/'))) tail++; else tail = file;
4033    let tail = match file.rfind('/') {
4034        Some(i) => &file[i + 1..],
4035        None => file,
4036    };
4037
4038    // c:3807-3812 — if (strsfx(FD_EXT, file)) { ... return check_dump_file(file, NULL, tail, NULL, 0); }
4039    if file.ends_with(FD_EXT) {
4040        crate::ported::signals::queue_signals(); // c:3808
4041        let meta = fs::metadata(file);
4042        let prog = match meta {
4043            Ok(m) => check_dump_file(file, &m, tail, false).map(|(_, _)| file.to_string()), // c:3809
4044            Err(_) => None,
4045        };
4046        unqueue_signals(); // c:3810
4047        return prog;
4048    }
4049
4050    // c:3813 — wc = dyncat(file, FD_EXT);
4051    let wc = format!("{}{}", file, FD_EXT);
4052
4053    // c:3815-3816 — rc = stat(wc, &stc); rn = stat(file, &stn);
4054    let stc = fs::metadata(&wc);
4055    let stn = fs::metadata(file);
4056
4057    crate::ported::signals::queue_signals(); // c:3818
4058                                             // c:3819-3823 — if (!rc && (rn || stc.st_mtime >= stn.st_mtime) && (prog = check_dump_file(...))) return prog;
4059    if let Ok(meta_c) = &stc {
4060        let newer_than_src = match (&stc, &stn) {
4061            (Ok(c), Ok(n)) => c.modified().ok() >= n.modified().ok(),
4062            (Ok(_), Err(_)) => true, // c:3819 — `rn` (src missing) ⇒ accept .zwc
4063            _ => false,
4064        };
4065        if newer_than_src {
4066            let prog = check_dump_file(&wc, meta_c, tail, false); // c:3820
4067            if prog.is_some() {
4068                unqueue_signals(); // c:3821
4069                return Some(wc); // c:3822
4070            }
4071        }
4072    }
4073    unqueue_signals(); // c:3824
4074    None // c:3825
4075}
4076
4077/// Port of `Eprog check_dump_file(char *file, struct stat *sbuf,
4078/// char *name, int *ksh, int test_only)` from `Src/parse.c:3833`.
4079/// Walks the `dumps` mmap list looking for `(dev, ino)` matching
4080/// `sbuf`; on miss, calls `load_dump_header` to read the .zwc
4081/// header. Then `dump_find_func(d, name)` locates the function
4082/// table entry. Returns the wordcode slice + ksh-load flag.
4083///
4084/// ```c
4085/// Eprog
4086/// check_dump_file(char *file, struct stat *sbuf, char *name,
4087///                 int *ksh, int test_only)
4088/// {
4089///     int isrec = 0;
4090///     Wordcode d;
4091///     FDHead h;
4092///     FuncDump f;
4093///     struct stat lsbuf;
4094///     if (!sbuf) {
4095///         if (zwcstat(file, &lsbuf)) return NULL;
4096///         sbuf = &lsbuf;
4097///     }
4098///   rec:
4099///     d = NULL;
4100///     for (f = dumps; f; f = f->next)
4101///         if (f->dev == sbuf->st_dev && f->ino == sbuf->st_ino)
4102///             { d = f->map; break; }
4103///     if (!f && (isrec || !(d = load_dump_header(NULL, file, 0))))
4104///         return NULL;
4105///     if ((h = dump_find_func(d, name))) {
4106///         if (test_only) return &dummy_eprog;
4107///         /* allocate Eprog from f->map at h offset, incrdumpcount,
4108///            return prog */
4109///     }
4110///     return NULL;
4111/// }
4112/// ```
4113/// Rust port returns `Option<(Vec<u32>, bool)>` instead of the C
4114/// `Eprog` pointer + `*ksh` out-param: tuple element 0 is the
4115/// wordcode slice, element 1 is true if the function was a ksh-
4116/// loaded entry.
4117pub fn check_dump_file(
4118    // c:3833
4119    file: &str,
4120    sbuf: &fs::Metadata,
4121    name: &str,
4122    test_only: bool,
4123) -> Option<(Vec<u32>, bool)> {
4124    use std::os::unix::fs::MetadataExt;
4125
4126    // c:3842-3846 — `if (!sbuf) { zwcstat(file, &lsbuf); sbuf = &lsbuf; }`
4127    // Rust takes sbuf by &Metadata — never null.
4128    let dev = sbuf.dev(); // c:3859
4129    let ino = sbuf.ino(); // c:3859
4130
4131    // c:3854 — `d = NULL;`
4132    let mut d: Option<Vec<u32>> = None;
4133    let mut found_mmap = false; // c:3858 `for (f = dumps; f; ...)`
4134
4135    // c:3858-3862 — walk DUMPS for matching dev/ino.
4136    {
4137        let dumps_guard = DUMPS.lock().expect("dumps poisoned");
4138        for f in dumps_guard.iter() {
4139            // c:3858
4140            if f.dev == dev && f.ino == ino {
4141                // c:3859
4142                d = Some(f.map.clone()); // c:3860
4143                found_mmap = true;
4144                break; // c:3861
4145            }
4146        }
4147    }
4148
4149    // c:3870-3871 — `if (!f && (isrec || !(d = load_dump_header(NULL, file, 0)))) return NULL;`
4150    if !found_mmap {
4151        // c:3870
4152        match load_dump_header("", file, 0) {
4153            // c:3870 load_dump_header
4154            Some(loaded) => d = Some(loaded),
4155            None => return None, // c:3871
4156        }
4157    }
4158
4159    // c:3873 — `if ((h = dump_find_func(d, name)))`
4160    let dump = d?;
4161    if !dump_find_func(&dump, name) {
4162        // c:3873
4163        return None;
4164    }
4165
4166    // c:3876-3879 — `if (test_only) return &dummy_eprog;`
4167    if test_only {
4168        // c:3876
4169        return Some((Vec::new(), false)); // c:3879 dummy
4170    }
4171
4172    // c:3884-3953 — allocate Eprog from the mmap area + ksh detection.
4173    // The C source builds an `Eprog` struct wrapping the wordcode
4174    // slice at h's offset; the Rust port returns the slice directly
4175    // since Eprog construction lives at the call site (load_dump_file).
4176    // ksh-load detection reads the FDHF_KSHLOAD flag on the FDHead.
4177    // !!! STUB: FDHead parsing not yet wired through dump_find_func.
4178    let is_ksh_load = false; // c:3905 fdhflags(h) & FDHF_KSHLOAD
4179
4180    // c:3950 — incrdumpcount(f). The Rust incrdumpcount takes a
4181    // funcdump ref; look up the matching entry by dev/ino again.
4182    if found_mmap {
4183        let dumps_guard = DUMPS.lock().expect("dumps poisoned");
4184        if let Some(f) = dumps_guard.iter().find(|f| f.dev == dev && f.ino == ino) {
4185            incrdumpcount(f); // c:3899
4186        }
4187    }
4188
4189    Some((dump, is_ksh_load)) // c:3953
4190}
4191
4192/// Port of `incrdumpcount(FuncDump f)` from `Src/parse.c:3970/4021`.
4193/// `f->count++;` — refcount-up a loaded dump entry. The Rust port
4194/// keys lookup by `filename` because Rust can't raw-pointer-compare
4195/// funcdump values inside a `Mutex<Vec<...>>`; same observable
4196/// effect (the count of the matching entry increments).
4197pub fn incrdumpcount(f: &funcdump) {
4198    // c:3970 — `f->count++;`
4199    if let Some(d) = DUMPS
4200        .lock()
4201        .unwrap()
4202        .iter_mut()
4203        .find(|d| d.filename.as_deref() == f.filename.as_deref())
4204    {
4205        d.count += 1; // c:3973
4206    }
4207}
4208
4209/// Port of `freedump(FuncDump f)` from `Src/parse.c:3976`. Public
4210/// helper for the rare external caller; locks the dumps mutex and
4211/// drops the entry with the given filename.
4212pub fn freedump(f: &funcdump) {
4213    // c:3976
4214    let mut g = DUMPS.lock().unwrap();
4215    if let Some(name) = f.filename.as_deref() {
4216        freedump_locked(&mut g, name);
4217    }
4218}
4219
4220/// Port of `decrdumpcount(FuncDump f)` from `Src/parse.c:3988/4026`.
4221/// `f->count--; if (!f->count) { unlink from dumps; freedump(f); }`.
4222pub fn decrdumpcount(f: &funcdump) {
4223    // c:3988
4224    let key = f.filename.clone();
4225    let mut g = DUMPS.lock().unwrap();
4226    let mut hit_zero: Option<String> = None;
4227    for d in g.iter_mut() {
4228        if d.filename == key {
4229            d.count -= 1; // c:3991
4230            if d.count == 0 {
4231                // c:3992
4232                hit_zero = d.filename.clone();
4233            }
4234            break;
4235        }
4236    }
4237    if let Some(name) = hit_zero {
4238        // c:3994-4001
4239        freedump_locked(&mut g, &name);
4240    }
4241}
4242
4243/// Port of `closedumps(void)` from `Src/parse.c:4008/4033`. Walks
4244/// `dumps` freeing every entry. Called on shell exit (exec.c:522).
4245pub fn closedumps() {
4246    // c:4008
4247    let mut g = DUMPS.lock().unwrap();
4248    g.clear(); // c:4011-4014 `while (dumps) { ... freedump(...); ... }`
4249}
4250
4251/// Port of `dump_autoload(char *nam, char *file, int on, Options ops, int func)`
4252/// from `Src/parse.c:4042`. Registers every function in a `.zwc`
4253/// for autoload via `shfunctab`.
4254pub fn dump_autoload(
4255    nam: &str,
4256    file: &str, // c:4042
4257    on: i32,
4258    ops: &crate::ported::zsh_h::options,
4259    func: i32,
4260) -> i32 {
4261    use crate::ported::zsh_h::shfunc;
4262    let mut ret = 0; // c:4047
4263
4264    // c:4049-4050 — if (!strsfx(FD_EXT, file)) file = dyncat(file, FD_EXT);
4265    let file_owned;
4266    let file = if !file.ends_with(FD_EXT) {
4267        file_owned = format!("{}{}", file, FD_EXT);
4268        file_owned.as_str()
4269    } else {
4270        file
4271    };
4272
4273    // c:4052-4053 — if (!(h = load_dump_header(nam, file, 1))) return 1;
4274    let h = match load_dump_header(nam, file, 1) {
4275        Some(buf) => buf,
4276        None => return 1,
4277    };
4278
4279    // c:4055-4056 — for (n = firstfdhead(h); n < e; n = nextfdhead(n))
4280    let hlen = fdheaderlen(&h) as usize; // c:4055
4281    let mut n_off = firstfdhead_offset();
4282    while n_off < hlen {
4283        let head = match read_fdhead(&h, n_off) {
4284            Some(hd) => hd,
4285            None => break,
4286        };
4287        // c:4057-4061 — shf = zshcalloc; shf->node.flags = on; ...addnode(fdname + fdhtail)
4288        let name_full = fdname(&h, n_off);
4289        let tail = fdhtail(&head) as usize;
4290        let basename: String = name_full.chars().skip(tail).collect();
4291        let mut shf = shfunc {
4292            node: crate::ported::zsh_h::hashnode {
4293                next: None,
4294                nam: basename.clone(),
4295                flags: on, // c:4058
4296            },
4297            filename: None,
4298            lineno: 0,
4299            funcdef: None,
4300            redir: None,
4301            sticky: None, // c:4060 NULL
4302            body: None,
4303        };
4304        // c:4059 — shf->funcdef = mkautofn(shf);  (placeholder Eprog ptr)
4305        let _ = crate::ported::builtin::mkautofn(&mut shf as *mut _);
4306        // c:4061 — shfunctab->addnode(...)
4307        let snapshot = shf.clone();
4308        {
4309            let mut tab = crate::ported::hashtable::shfunctab_lock()
4310                .write()
4311                .expect("shfunctab poisoned");
4312            tab.add(shf);
4313        }
4314        // c:4062-4063 — if (OPT_ISSET(ops,'X') && eval_autoload(...)) ret = 1;
4315        if OPT_ISSET(ops, b'X') {
4316            let mut shf_ref = snapshot;
4317            if crate::ported::builtin::eval_autoload(&mut shf_ref as *mut _, &basename, ops, func)
4318                != 0
4319            {
4320                ret = 1;
4321            }
4322        }
4323        n_off = nextfdhead_offset(&h, n_off);
4324    }
4325    let _ = nam;
4326    ret // c:4065
4327}
4328
4329/// Port of C `struct eccstr` (zsh.h:836) — the long-string dedup BST
4330/// node. The dedup-walk and cmp logic in `ecstrcode` is faithful to
4331/// parse.c:447-453 including the conditional cmp chain
4332/// (nfunc → hashval → strcmp), so corpus inputs where C's eccstr BST walk
4333/// finds-or-misses match get the same outcome on the Rust side.
4334struct EccstrNode {
4335    left: Option<Box<EccstrNode>>,
4336    right: Option<Box<EccstrNode>>,
4337    /// C-byte form of the string (single byte per char ≤ 0xff).
4338    /// Owned because Rust doesn't have C zsh's "stable pointers into
4339    /// the lexer's tokstr arena" — every tokstr lives as a fresh
4340    /// Rust String allocation.
4341    str: Vec<u8>,
4342    /// Wordcode-encoded offset: `(byte_offset << 2) | token_bit`.
4343    /// Same shape as `Eccstr::offs` (parse.c:459).
4344    offs: u32,
4345    /// Absolute byte offset in the final strs region (= `ecsoffs` at
4346    /// insert time). C `Eccstr::aoffs` (parse.c:464). copy_ecstr uses
4347    /// THIS for the write position — distinct from `offs` which is
4348    /// ecssub-relative and collides across funcdef scopes.
4349    aoffs: u32,
4350    /// `nfunc` snapshot at insert time. Per-function namespace key
4351    /// — top-level scripts use 0; each funcdef bumps it.
4352    nfunc: i32,
4353    /// Hash of `str` computed via zsh's `hasher` (hashtable.c:86).
4354    hashval: u32,
4355}
4356// === end AST relocation ===
4357
4358// Parser state lives in file-scope thread_locals:
4359//   - LEX_* (lexer side, matching Src/lex.c file-statics)
4360//   - ECBUF / ECLEN / ECUSED / ECNPATS / ECSOFFS / ECSSUB / ECNFUNC /
4361//     ECSTRS_INDEX / ECSTRS_REVERSE (wordcode-emission state, matching
4362//     Src/parse.c file-statics)
4363//
4364// Callers use the free-fn entry points directly:
4365//   crate::ported::parse::parse_init(input);
4366//   let prog = crate::ported::parse::parse();
4367
4368const MAX_RECURSION_DEPTH: usize = 500;
4369
4370/// Direct port of `struct parse_stack` at `Src/zsh.h:3099-3109`.
4371/// Used by `parse_context_save` / `parse_context_restore`
4372/// (parse.c:295-355) to snapshot per-parse-call state so a nested
4373/// parse (e.g. inside command substitution) doesn't clobber the
4374/// outer parse.
4375///
4376/// A second port of `struct parse_stack` exists at
4377/// `crate::ported::zsh_h::parse_stack` (zsh.h:1066) using canonical
4378/// Wordcode / Eccstr / `struct heredocs` types — that port is unused
4379/// today and will become authoritative when Phase 9b (PORT_PLAN.md)
4380/// wires wordcode emission. This local version uses the working-set
4381/// shapes (`Vec<HereDoc>`, stubbed wordcode fields) suited to zshrs's
4382/// pre-wordcode AST architecture; the consolidation happens in P9b.
4383#[allow(non_camel_case_types)]
4384#[derive(Debug, Default, Clone)]
4385pub struct parse_stack {
4386    // ── Direct port of struct parse_stack at zsh.h:3099-3109 ──
4387    /// Pending heredocs awaiting body collection (canonical C
4388    /// linked-list shape). C: `struct heredocs *hdocs` (zsh.h:3100).
4389    /// Mirrors `parse::HDOCS` thread_local across nested parses.
4390    pub hdocs: Option<Box<crate::ported::zsh_h::heredocs>>,
4391    /// !!! WARNING: NOT IN PARSE_STACK — Rust-only AST-glue !!!
4392    /// Snapshot of `lex::LEX_HEREDOCS` (the parallel Rust-only Vec
4393    /// carrying terminator / strip_tabs / quoted metadata).
4394    /// Saved/restored alongside the canonical `hdocs` so nested
4395    /// parses get a clean AST view. C's parse_stack has no analog
4396    /// because C tracks terminator metadata implicitly via tokstr.
4397    pub lex_heredocs: Vec<HereDoc>,
4398    /// C: `int incmdpos` (zsh.h:3102).
4399    pub incmdpos: bool,
4400    /// C: `int aliasspaceflag` (zsh.h:3103).
4401    pub aliasspaceflag: i32,
4402    /// C: `int incond` (zsh.h:3104).
4403    pub incond: i32,
4404    /// C: `int inredir` (zsh.h:3105).
4405    pub inredir: bool,
4406    /// C: `int incasepat` (zsh.h:3106).
4407    pub incasepat: i32,
4408    /// C: `int isnewlin` (zsh.h:3107).
4409    pub isnewlin: i32,
4410    /// C: `int infor` (zsh.h:3108).
4411    pub infor: i32,
4412    /// C: `int inrepeat_` (zsh.h:3109).
4413    pub inrepeat_: i32,
4414    /// C: `int intypeset` (zsh.h:3110).
4415    pub intypeset: bool,
4416    // ── Wordcode-buffer state — STUB until Phase 9b ──
4417    // C `Wordcode ecbuf` (zsh.h:3112) + `Eccstr ecstrs` (zsh.h:3113) +
4418    // `int eclen/ecused/ecnpats/ecsoffs/ecssub/ecnfunc` (zsh.h:3112-3114).
4419    // zshrs hasn't emitted wordcode yet — these fields exist to
4420    // preserve the C shape but read/write nothing until P9b lands.
4421    pub eclen: i32,
4422    pub ecused: i32,
4423    pub ecnpats: i32,
4424    pub ecbuf: Option<Vec<u32>>,
4425    pub ecstrs: Option<Vec<u8>>,
4426    pub ecsoffs: i32,
4427    pub ecssub: i32,
4428    pub ecnfunc: i32,
4429}
4430
4431// Old uppercase Rust-only `ParseStack` is gone. Compat alias so
4432// existing call sites (context.rs) keep resolving until the
4433// rename ripples through.
4434/// `ParseStack` type alias.
4435#[allow(non_camel_case_types)]
4436pub type ParseStack = parse_stack;
4437
4438/// `mod_export struct eprog dummy_eprog;` from `Src/parse.c:3066`.
4439/// Placeholder Eprog used by `shf->funcdef = &dummy_eprog;` in
4440/// builtin.c when clearing a stale autoload stub. Held in a Mutex
4441/// so `init_eprog` can set it once at shell startup.
4442pub static DUMMY_EPROG: std::sync::Mutex<eprog> = std::sync::Mutex::new(eprog {
4443    flags: 0,
4444    len: 0,
4445    npats: 0,
4446    nref: 0,
4447    prog: Vec::new(),
4448    strs: None,
4449    pats: Vec::new(),
4450    shf: None,
4451    dump: None,
4452});
4453
4454/// Walk every ZshRedir in the program and, for any with a `heredoc_idx`,
4455/// pull the body+terminator out of `bodies` and stuff into `heredoc`.
4456/// `bodies[i]` corresponds to the i-th heredoc registered by the lexer
4457/// during scanning (in source order).
4458fn fill_heredoc_bodies(prog: &mut ZshProgram, bodies: &[HereDocInfo]) {
4459    for list in &mut prog.lists {
4460        fill_in_sublist(&mut list.sublist, bodies);
4461    }
4462}
4463
4464fn fill_in_sublist(sub: &mut ZshSublist, bodies: &[HereDocInfo]) {
4465    fill_in_pipe(&mut sub.pipe, bodies);
4466    if let Some(next) = &mut sub.next {
4467        fill_in_sublist(&mut next.1, bodies);
4468    }
4469}
4470
4471fn fill_in_pipe(pipe: &mut ZshPipe, bodies: &[HereDocInfo]) {
4472    fill_in_command(&mut pipe.cmd, bodies);
4473    if let Some(next) = &mut pipe.next {
4474        fill_in_pipe(next, bodies);
4475    }
4476}
4477
4478fn fill_in_command(cmd: &mut ZshCommand, bodies: &[HereDocInfo]) {
4479    match cmd {
4480        ZshCommand::Simple(s) => {
4481            for r in &mut s.redirs {
4482                if let Some(idx) = r.heredoc_idx {
4483                    if let Some(info) = bodies.get(idx) {
4484                        r.heredoc = Some(info.clone());
4485                    }
4486                }
4487            }
4488        }
4489        ZshCommand::Subsh(p) | ZshCommand::Cursh(p) => fill_heredoc_bodies(p, bodies),
4490        ZshCommand::FuncDef(f) => fill_heredoc_bodies(&mut f.body, bodies),
4491        ZshCommand::If(i) => {
4492            fill_heredoc_bodies(&mut i.cond, bodies);
4493            fill_heredoc_bodies(&mut i.then, bodies);
4494            for (c, b) in &mut i.elif {
4495                fill_heredoc_bodies(c, bodies);
4496                fill_heredoc_bodies(b, bodies);
4497            }
4498            if let Some(e) = &mut i.else_ {
4499                fill_heredoc_bodies(e, bodies);
4500            }
4501        }
4502        ZshCommand::While(w) | ZshCommand::Until(w) => {
4503            fill_heredoc_bodies(&mut w.cond, bodies);
4504            fill_heredoc_bodies(&mut w.body, bodies);
4505        }
4506        ZshCommand::For(f) => fill_heredoc_bodies(&mut f.body, bodies),
4507        ZshCommand::Case(c) => {
4508            for arm in &mut c.arms {
4509                fill_heredoc_bodies(&mut arm.body, bodies);
4510            }
4511        }
4512        ZshCommand::Repeat(r) => fill_heredoc_bodies(&mut r.body, bodies),
4513        ZshCommand::Time(Some(sublist)) => fill_in_sublist(sublist, bodies),
4514        ZshCommand::Try(t) => {
4515            fill_heredoc_bodies(&mut t.try_block, bodies);
4516            fill_heredoc_bodies(&mut t.always, bodies);
4517        }
4518        ZshCommand::Redirected(inner, redirs) => {
4519            for r in redirs {
4520                if let Some(idx) = r.heredoc_idx {
4521                    if let Some(info) = bodies.get(idx) {
4522                        r.heredoc = Some(info.clone());
4523                    }
4524                }
4525            }
4526            fill_in_command(inner, bodies);
4527        }
4528        ZshCommand::Time(None) | ZshCommand::Cond(_) | ZshCommand::Arith(_) => {}
4529    }
4530}
4531
4532/// If `list` is a Simple containing one word that ends in the
4533/// `<Inpar><Outpar>` token pair (the lexer-port encoding of `()`),
4534/// return the bare name. Used by `parse_program_until` to detect
4535/// `name() {body}` style function definitions where the lexer
4536/// hasn't split the `()` from the name.
4537/// Detect the `name() …` shape inside a Simple. Returns the function
4538/// name and (when the body was already inlined into the same Simple,
4539/// e.g. `foo() echo hi`) the rest of the words as the body's argv.
4540/// Returns None for non-funcdef shapes.
4541fn simple_name_with_inoutpar(list: &ZshList) -> Option<(Vec<String>, Vec<String>)> {
4542    if list.flags.async_ || list.sublist.next.is_some() {
4543        return None;
4544    }
4545    let pipe = &list.sublist.pipe;
4546    if pipe.next.is_some() {
4547        return None;
4548    }
4549    let simple = match &pipe.cmd {
4550        ZshCommand::Simple(s) => s,
4551        _ => return None,
4552    };
4553    if simple.words.is_empty() || !simple.assigns.is_empty() {
4554        return None;
4555    }
4556    let suffix = "\u{88}\u{8a}"; // Inpar + Outpar
4557                                 // Find the FIRST word ending in `()`. zsh accepts the
4558                                 // multi-name shorthand `fna fnb fnc() { body }` (parse.c:
4559                                 // par_funcdef wordlist) — words[0..i-1] are extra names,
4560                                 // words[i] is `lastname()`. Words after are the body argv
4561                                 // (one-line shorthand, `name() cmd args`).
4562    let par_idx = simple.words.iter().position(|w| w.ends_with(suffix))?;
4563    let mut names: Vec<String> = Vec::with_capacity(par_idx + 1);
4564    for w in &simple.words[..par_idx] {
4565        // Earlier names must be bare identifiers, NOT contain
4566        // tokens that imply they're not function names (no `()`,
4567        // no quotes, no expansions). zsh's lexer enforces this
4568        // at the wordlist level; we approximate by requiring the
4569        // word be an identifier-shaped token after untokenize.
4570        let bare = super::lex::untokenize(w);
4571        let valid = !bare.is_empty()
4572            && bare
4573                .chars()
4574                .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-' || c == '.' || c == '$');
4575        if !valid {
4576            return None;
4577        }
4578        names.push(bare);
4579    }
4580    let last = &simple.words[par_idx];
4581    let bare = &last[..last.len() - suffix.len()];
4582    if bare.is_empty() {
4583        return None;
4584    }
4585    names.push(super::lex::untokenize(bare));
4586    let rest = simple.words[par_idx + 1..].to_vec();
4587    Some((names, rest))
4588}
4589
4590/// Initialize parser state for a fresh parse of `input`.
4591/// Free-fn entry point — resets parser thread_locals and loads input.
4592pub fn parse_init(input: &str) {
4593    // Seed the option defaults the parser/lexer inspect. Real zsh
4594    // installs these via `install_emulation_defaults` (options.c:172)
4595    // at shell startup; zshrs's parse-only test entry path bypasses
4596    // init_main, so we mirror the `zsh` emulation defaults here.
4597    // Only seeds when unset so a script that explicitly disabled an
4598    // option stays so.
4599    for (name, default) in [
4600        ("shortloops", true),
4601        ("shortrepeat", false),
4602        ("multifuncdef", true),
4603        ("aliasfuncdef", false),
4604        ("ignorebraces", false),
4605        ("cshjunkieloops", false),
4606        ("posixbuiltins", false),
4607        ("execopt", true),
4608        ("kshautoload", false),
4609        ("aliases", true),
4610    ] {
4611        if crate::ported::options::opt_state_get(name).is_none() {
4612            crate::ported::options::opt_state_set(name, default);
4613        }
4614    }
4615    lex_init(input);
4616}
4617
4618/// P9b decoder (wordcode-pipeline variant): direct port of
4619/// `ecgetstr(Estate s, int dup, int *tokflag)` from
4620/// `Src/parse.c:2855-2890`. Reads a wordcode at `pc`, decodes the
4621/// encoded string back to owned String. Returns (string,
4622/// pc_after_consumed). Distinct from the existing `ecgetstr` (which
4623/// takes a separate strs buffer for text.rs) — this variant uses
4624/// the live ECSTRS_REVERSE HashMap populated at ecstrcode time.
4625pub fn ecgetstr_wordcode(buf: &[u32], pc: usize) -> (String, usize) {
4626    if pc >= buf.len() {
4627        return (String::new(), pc);
4628    }
4629    let c = buf[pc];
4630    let next = pc + 1;
4631    // parse.c:2862-2863 — empty-string sentinels.
4632    if c == 6 || c == 7 {
4633        return (String::new(), next);
4634    }
4635    // parse.c:2864-2871 — inline-packed short string.
4636    if (c & 2) != 0 {
4637        let b0 = ((c >> 3) & 0xff) as u8;
4638        let b1 = ((c >> 11) & 0xff) as u8;
4639        let b2 = ((c >> 19) & 0xff) as u8;
4640        let mut bytes: Vec<u8> = Vec::new();
4641        for b in [b0, b1, b2] {
4642            if b == 0 {
4643                break;
4644            }
4645            bytes.push(b);
4646        }
4647        return (String::from_utf8_lossy(&bytes).into_owned(), next);
4648    }
4649    // parse.c:2872-2873 — long string via offs lookup. Map value is
4650    // metafied Vec<u8>; convert back to display String. Unmetafy is
4651    // the caller's job (the wordcode-parity dumper does it; other
4652    // callers may want raw bytes).
4653    let s = ECSTRS_REVERSE
4654        .with_borrow(|m| m.get(&c).cloned())
4655        .map(|v| String::from_utf8_lossy(&v).into_owned())
4656        .unwrap_or_default();
4657    (s, next)
4658}
4659
4660/// Parse the complete input. Direct port of `parse_event` /
4661/// `par_list` from `Src/parse.c:614-720`. On syntax error,
4662/// sets `errflag |= ERRFLAG_ERROR` (via `zerr`) and returns the
4663/// partial program — callers check `errflag` to detect failure,
4664/// matching C's `Eprog parse_event(...)` + `if (errflag) {...}`.
4665pub fn parse() -> ZshProgram {
4666    zshlex();
4667
4668    let mut program = parse_program_until(None);
4669
4670    // Post-pass: wire heredoc bodies (collected by the inline NEWLIN
4671    // walk in zshlex into LEX_HEREDOCS) back into ZshRedir.heredoc
4672    // fields via heredoc_idx. No C analog — LEX_HEREDOCS is the
4673    // Rust-only AST-glue Vec.
4674    let bodies: Vec<HereDocInfo> = LEX_HEREDOCS
4675        .with_borrow(|v| v.clone())
4676        .into_iter()
4677        .map(|h| HereDocInfo {
4678            content: h.content,
4679            terminator: h.terminator,
4680            quoted: h.quoted,
4681        })
4682        .collect();
4683    if !bodies.is_empty() {
4684        fill_heredoc_bodies(&mut program, &bodies);
4685    }
4686
4687    program
4688}
4689
4690/// Wordcode-emission top-level driver. Closest C analog is
4691/// `parse_list(void)` at `Src/parse.c:697-712`: init_parse +
4692/// zshlex + par_list(&c) + bld_eprog. This entry omits init_parse
4693/// and bld_eprog (caller responsibilities) and inlines a guard
4694/// loop around par_list_wordcode for cases where the lexer leaves
4695/// a non-ENDINPUT terminator (LEXERR, missing close-token, etc.).
4696pub fn par_event_wordcode() -> usize {
4697    let start = ECUSED.get() as usize;
4698    // C `parse_list` (parse.c:697-712) calls par_list ONCE — par_list's
4699    // own goto-rec loop handles all SEPER-separated sublists. The
4700    // outer loop here exists for safety against early-return cases
4701    // (LEXERR, missing terminator) but normally par_list_wordcode
4702    // consumes everything in one call.
4703    let mut cmplx: i32 = 0;
4704    while tok() != ENDINPUT && tok() != LEXERR {
4705        par_list_wordcode(&mut cmplx);
4706        match tok() {
4707            SEMI | NEWLIN | AMPER | AMPERBANG | SEPER => {
4708                zshlex();
4709            }
4710            _ => break,
4711        }
4712    }
4713    // parse.c:712 — `ecadd(WCB_END());`
4714    ecadd(WCB_END());
4715    start
4716}
4717
4718/// Port of `par_list(int *cmplx)` from `Src/parse.c:769-803`.
4719/// `list : { SEPER } [ sublist [ { SEPER | AMPER | AMPERBANG } list ] ]`.
4720/// True line-by-line port: takes `cmplx: &mut i32` matching C's
4721/// `int *cmplx` out-parameter, uses stack-local `c` per iteration
4722/// like C (so inner sublist cmplx is independent of outer).
4723pub fn par_list_wordcode(cmplx: &mut i32) {
4724    // c:773 — `int p, lp = -1, c;`
4725    let mut p: usize;
4726    let mut lp: i32 = -1;
4727    let mut c: i32;
4728    loop {
4729        // c:775 `rec:` — c:777-778 `while (tok == SEPER) zshlex();`
4730        while tok() == SEPER {
4731            zshlex();
4732        }
4733        // c:780 — `p = ecadd(0);`
4734        p = ecadd(0);
4735        // c:781 — `c = 0;`
4736        c = 0;
4737        // c:783 — `if (par_sublist(&c)) { ... }`
4738        if par_sublist_wordcode(&mut c) {
4739            // c:784 — `*cmplx |= c;`
4740            *cmplx |= c;
4741            // c:785 — `if (tok == SEPER || tok == AMPER || tok == AMPERBANG)`
4742            let t = tok();
4743            if t == SEPER || t == AMPER || t == AMPERBANG {
4744                // c:786-787 — `if (tok != SEPER) *cmplx = 1;`
4745                if t != SEPER {
4746                    *cmplx = 1;
4747                }
4748                // c:788-790 — `set_list_code(p, ..., c);`
4749                let z = if t == SEPER {
4750                    Z_SYNC
4751                } else if t == AMPER {
4752                    Z_ASYNC
4753                } else {
4754                    Z_ASYNC | Z_DISOWN
4755                };
4756                set_list_code(p, z, c != 0);
4757                // c:791 — `incmdpos = 1;`
4758                set_incmdpos(true);
4759                // c:792-794 — `do { zshlex(); } while (tok == SEPER);`
4760                loop {
4761                    zshlex();
4762                    if tok() != SEPER {
4763                        break;
4764                    }
4765                }
4766                // c:795 — `lp = p;` c:796 — `goto rec;`
4767                lp = p as i32;
4768                continue;
4769            } else {
4770                // c:798 — `set_list_code(p, (Z_SYNC | Z_END), c);`
4771                set_list_code(p, Z_SYNC | Z_END, c != 0);
4772            }
4773        } else {
4774            // c:800-802 — `ecused--; if (lp >= 0) ecbuf[lp] |= wc_bdata(Z_END);`
4775            ECUSED.set((ECUSED.get() - 1).max(0));
4776            if lp >= 0 {
4777                ECBUF.with_borrow_mut(|b| {
4778                    if (lp as usize) < b.len() {
4779                        b[lp as usize] |= wc_bdata(Z_END as wordcode);
4780                    }
4781                });
4782            }
4783        }
4784        break;
4785    }
4786}
4787
4788/// Port of `par_list1(int *cmplx)` from `Src/parse.c:806-817`.
4789/// Single-sublist variant used by funcdef bodies and the short
4790/// `for`/`while`/`repeat` forms — exactly one sublist with
4791/// `Z_SYNC|Z_END`, no chain.
4792pub fn par_list1_wordcode(cmplx: &mut i32) {
4793    // c:810 — `int p = ecadd(0), c = 0;`
4794    let p = ecadd(0);
4795    let mut c: i32 = 0;
4796    // c:812 — `if (par_sublist(&c)) { ... }`
4797    if par_sublist_wordcode(&mut c) {
4798        // c:813 — `set_list_code(p, (Z_SYNC | Z_END), c);`
4799        set_list_code(p, Z_SYNC | Z_END, c != 0);
4800        // c:814 — `*cmplx |= c;`
4801        *cmplx |= c;
4802    } else {
4803        // c:816 — `ecused--;`
4804        ECUSED.set((ECUSED.get() - 1).max(0));
4805    }
4806}
4807
4808/// Port of `par_save_list(C)` macro from `Src/parse.c:475-480`.
4809///   do { int eu = ecused; par_list(C); if (eu == ecused) ecadd(WCB_END()); } while (0)
4810pub fn par_save_list_wordcode(cmplx: &mut i32) {
4811    let eu = ECUSED.get();
4812    par_list_wordcode(cmplx);
4813    if ECUSED.get() == eu {
4814        ecadd(WCB_END());
4815    }
4816}
4817
4818/// Port of `par_save_list1(C)` macro from `Src/parse.c:481-486`.
4819pub fn par_save_list1_wordcode(cmplx: &mut i32) {
4820    let eu = ECUSED.get();
4821    par_list1_wordcode(cmplx);
4822    if ECUSED.get() == eu {
4823        ecadd(WCB_END());
4824    }
4825}
4826
4827/// Port of `par_sublist(int *cmplx)` from `Src/parse.c:823-865`.
4828/// `sublist : sublist2 [ ( DBAR | DAMPER ) { SEPER } sublist ]`.
4829/// Emits a WCB_SUBLIST header, recurses into par_sublist2 for
4830/// the !/coproc prefix + pipeline, then chains via DBAR (`||`)
4831/// or DAMPER (`&&`) recursively. Returns true if at least one
4832/// pipeline was emitted.
4833pub fn par_sublist_wordcode(cmplx: &mut i32) -> bool {
4834    // c:827 — `int f, p, c = 0;`
4835    let mut c: i32 = 0;
4836    // c:829 — `p = ecadd(0);`
4837    let p = ecadd(0);
4838    // c:831 — `if ((f = par_sublist2(&c)) != -1) { ... }`
4839    match par_sublist2(&mut c) {
4840        Some(f) => {
4841            // c:832 — `int e = ecused;`
4842            let e = ECUSED.get() as usize;
4843            // c:834 — `*cmplx |= c;`
4844            *cmplx |= c;
4845            if tok() == DBAR || tok() == DAMPER {
4846                // c:836 — `enum lextok qtok = tok;`
4847                let qtok = tok();
4848                // c:839 — `cmdpush(tok == DBAR ? CS_CMDOR : CS_CMDAND);`
4849                cmdpush(if qtok == DBAR {
4850                    CS_CMDOR as u8
4851                } else {
4852                    CS_CMDAND as u8
4853                });
4854                // c:840 — `zshlex();`
4855                zshlex();
4856                // c:841-842 — `while (tok == SEPER) zshlex();`
4857                while tok() == SEPER {
4858                    zshlex();
4859                }
4860                // c:843 — `sl = par_sublist(cmplx);`
4861                let sl = par_sublist_wordcode(cmplx);
4862                // c:844-847 — `set_sublist_code(p, (sl ? ... : WC_SUBLIST_END),
4863                // f, (e - 1 - p), c);`
4864                let st = if sl {
4865                    if qtok == DBAR {
4866                        WC_SUBLIST_OR
4867                    } else {
4868                        WC_SUBLIST_AND
4869                    }
4870                } else {
4871                    WC_SUBLIST_END
4872                };
4873                set_sublist_code(p, st as i32, f, (e - 1 - p) as i32, c != 0);
4874                // c:848 — `cmdpop();`
4875                cmdpop();
4876            } else {
4877                // c:850-853 — `if (tok == AMPER || tok == AMPERBANG)
4878                // { c = 1; *cmplx |= c; }`
4879                if tok() == AMPER || tok() == AMPERBANG {
4880                    c = 1;
4881                    *cmplx |= c;
4882                }
4883                // c:854 — `set_sublist_code(p, WC_SUBLIST_END, f,
4884                // (e - 1 - p), c);`
4885                set_sublist_code(p, WC_SUBLIST_END as i32, f, (e - 1 - p) as i32, c != 0);
4886            }
4887            // c:856 — `return 1;`
4888            true
4889        }
4890        None => {
4891            // c:858-859 — `ecused--; return 0;`
4892            ECUSED.set((ECUSED.get() - 1).max(0));
4893            false
4894        }
4895    }
4896}
4897
4898/// Port of `par_pline(int *cmplx)` from `Src/parse.c:894-955`.
4899/// `pline : cmd [ ( BAR | BARAMP ) { SEPER } pline ]`. Emits a
4900/// WCB_PIPE header (mid for chain links, end for the last cmd)
4901/// plus the optional BARAMP `2>&1` synthetic redir.
4902/// Port of `par_pline(int *cmplx)` from `Src/parse.c:893-947`.
4903/// (Named `par_pipe_wordcode` to disambiguate from the AST
4904/// `par_pline` at parse.rs:3744 — semantically the same `pline`
4905/// production.)
4906pub fn par_pipe_wordcode(cmplx: &mut i32) -> bool {
4907    // c:897 — `zlong line = toklineno;`
4908    let line = toklineno() as i64;
4909    // c:899 — `p = ecadd(0);`
4910    let p = ecadd(0);
4911    // c:901-904 — `if (!par_cmd(cmplx, 0)) { ecused--; return 0; }`
4912    if !par_cmd_wordcode(cmplx, 0) {
4913        ECUSED.set((ECUSED.get() - 1).max(0));
4914        return false;
4915    }
4916    if tok() == BAR_TOK {
4917        // c:906 — `*cmplx = 1;`
4918        *cmplx = 1;
4919        // c:907 — `cmdpush(CS_PIPE);`
4920        cmdpush(CS_PIPE as u8);
4921        // c:908 — `zshlex();`
4922        zshlex();
4923        // c:909-910 — `while (tok == SEPER) zshlex();`
4924        while tok() == SEPER {
4925            zshlex();
4926        }
4927        // c:911 — `ecbuf[p] = WCB_PIPE(WC_PIPE_MID, line>=0 ? line+1 : 0);`
4928        ECBUF.with_borrow_mut(|b| {
4929            if p < b.len() {
4930                b[p] = WCB_PIPE(
4931                    WC_PIPE_MID,
4932                    if line >= 0 { (line + 1) as wordcode } else { 0 },
4933                );
4934            }
4935        });
4936        // c:912 — `ecispace(p+1, 1);`
4937        ecispace(p + 1, 1);
4938        // c:913 — `ecbuf[p+1] = ecused - 1 - p;`
4939        let used = ECUSED.get() as usize;
4940        ECBUF.with_borrow_mut(|b| {
4941            if p + 1 < b.len() {
4942                b[p + 1] = (used.saturating_sub(1 + p)) as wordcode;
4943            }
4944        });
4945        // c:914-916 — `if (!par_pline(cmplx)) { tok = LEXERR; }`
4946        if !par_pipe_wordcode(cmplx) {
4947            set_tok(LEXERR);
4948        }
4949        // c:917 — `cmdpop();`
4950        cmdpop();
4951        true
4952    } else if tok() == BARAMP {
4953        // c:920-923 — walk past inline WC_REDIR to find r.
4954        let mut r = p + 1;
4955        loop {
4956            let code = ECBUF.with_borrow(|b| b.get(r).copied().unwrap_or(0));
4957            if wc_code(code) != WC_REDIR {
4958                break;
4959            }
4960            r += WC_REDIR_WORDS(code) as usize;
4961        }
4962        // c:925-928 — `ecispace(r, 3);` + synthetic `2>&1` redir
4963        ecispace(r, 3);
4964        ECBUF.with_borrow_mut(|b| {
4965            if r + 2 < b.len() {
4966                b[r] = WCB_REDIR(REDIR_MERGEOUT as wordcode);
4967                b[r + 1] = 2;
4968                b[r + 2] = ecstrcode("1");
4969            }
4970        });
4971        // c:930 — `*cmplx = 1;`
4972        *cmplx = 1;
4973        cmdpush(CS_ERRPIPE as u8);
4974        zshlex();
4975        while tok() == SEPER {
4976            zshlex();
4977        }
4978        ECBUF.with_borrow_mut(|b| {
4979            if p < b.len() {
4980                b[p] = WCB_PIPE(
4981                    WC_PIPE_MID,
4982                    if line >= 0 { (line + 1) as wordcode } else { 0 },
4983                );
4984            }
4985        });
4986        ecispace(p + 1, 1);
4987        let used = ECUSED.get() as usize;
4988        ECBUF.with_borrow_mut(|b| {
4989            if p + 1 < b.len() {
4990                b[p + 1] = (used.saturating_sub(1 + p)) as wordcode;
4991            }
4992        });
4993        if !par_pipe_wordcode(cmplx) {
4994            set_tok(LEXERR);
4995        }
4996        cmdpop();
4997        true
4998    } else {
4999        // c:944 — `ecbuf[p] = WCB_PIPE(WC_PIPE_END, line>=0 ? line+1 : 0);`
5000        ECBUF.with_borrow_mut(|b| {
5001            if p < b.len() {
5002                b[p] = WCB_PIPE(
5003                    WC_PIPE_END,
5004                    if line >= 0 { (line + 1) as wordcode } else { 0 },
5005                );
5006            }
5007        });
5008        true
5009    }
5010}
5011
5012/// Port of `par_cmd(int *cmplx, int zsh_construct)` from
5013/// `Src/parse.c:958-1085`. Parses leading + trailing redirs and
5014/// dispatches on the current token to the right par_* builder.
5015/// Returns false only when no command was emitted (no redirs +
5016/// par_simple returned 0).
5017/// Port of `par_cmd(int *cmplx, int zsh_construct)` from
5018/// `Src/parse.c:957-1077`.
5019pub fn par_cmd_wordcode(cmplx: &mut i32, zsh_construct: i32) -> bool {
5020    // c:960 — `int r, nr = 0;`
5021    let mut nr: i32 = 0;
5022    // c:962 — `r = ecused;`
5023    let mut r: usize = ECUSED.get() as usize;
5024    // c:964-968 — leading redirs.
5025    if IS_REDIROP(tok()) {
5026        // c:965 — `*cmplx = 1;`
5027        *cmplx = 1;
5028        // c:966-968 — `while (IS_REDIROP(tok)) { nr += par_redir(&r, NULL); }`
5029        while IS_REDIROP(tok()) {
5030            nr += par_redir_wordcode(&mut r, None);
5031        }
5032    }
5033    // c:970-1066 — token-dispatch switch.
5034    match tok() {
5035        FOR => {
5036            cmdpush(CS_FOR as u8);
5037            par_for_wordcode(cmplx);
5038            cmdpop();
5039        }
5040        FOREACH => {
5041            cmdpush(CS_FOREACH as u8);
5042            par_for_wordcode(cmplx);
5043            cmdpop();
5044        }
5045        SELECT => {
5046            // c:982 — `*cmplx = 1;`
5047            *cmplx = 1;
5048            cmdpush(CS_SELECT as u8);
5049            par_for_wordcode(cmplx);
5050            cmdpop();
5051        }
5052        CASE => {
5053            cmdpush(CS_CASE as u8);
5054            par_case_wordcode(cmplx);
5055            cmdpop();
5056        }
5057        IF => {
5058            par_if_wordcode(cmplx);
5059        }
5060        WHILE => {
5061            cmdpush(CS_WHILE as u8);
5062            par_while_wordcode(cmplx);
5063            cmdpop();
5064        }
5065        UNTIL => {
5066            cmdpush(CS_UNTIL as u8);
5067            par_while_wordcode(cmplx);
5068            cmdpop();
5069        }
5070        REPEAT => {
5071            cmdpush(CS_REPEAT as u8);
5072            par_repeat_wordcode(cmplx);
5073            cmdpop();
5074        }
5075        INPAR_TOK => {
5076            // c:1011 — `*cmplx = 1;`
5077            *cmplx = 1;
5078            cmdpush(CS_SUBSH as u8);
5079            par_subsh_wordcode(cmplx, zsh_construct);
5080            cmdpop();
5081        }
5082        INBRACE_TOK => {
5083            cmdpush(CS_CURSH as u8);
5084            par_subsh_wordcode(cmplx, zsh_construct);
5085            cmdpop();
5086        }
5087        FUNC => {
5088            cmdpush(CS_FUNCDEF as u8);
5089            par_funcdef_wordcode(cmplx);
5090            cmdpop();
5091        }
5092        DINBRACK => {
5093            cmdpush(CS_COND as u8);
5094            par_cond_wordcode();
5095            cmdpop();
5096        }
5097        DINPAR => {
5098            par_arith_wordcode();
5099        }
5100        TIME => {
5101            // c:1037-1050 — `static int inpartime` guard so
5102            // `time time foo` doesn't recurse infinitely.
5103            if !PARSER_INPARTIME.with(|c| c.get()) {
5104                // c:1041 — `*cmplx = 1;`
5105                *cmplx = 1;
5106                PARSER_INPARTIME.with(|c| c.set(true));
5107                par_time_wordcode();
5108                PARSER_INPARTIME.with(|c| c.set(false));
5109            } else {
5110                set_tok(STRING_LEX);
5111                let sr = par_simple_wordcode(cmplx, nr);
5112                if sr == 0 && nr == 0 {
5113                    return false;
5114                }
5115                if sr > 1 {
5116                    *cmplx = 1;
5117                    r += (sr - 1) as usize;
5118                }
5119            }
5120        }
5121        _ => {
5122            // c:1054 — `if (!(sr = par_simple(cmplx, nr)))`
5123            let sr = par_simple_wordcode(cmplx, nr);
5124            if sr == 0 {
5125                if nr == 0 {
5126                    return false;
5127                }
5128            } else if sr > 1 {
5129                // c:1060-1061 — `*cmplx = 1; r += sr - 1;`
5130                *cmplx = 1;
5131                r += (sr - 1) as usize;
5132            }
5133        }
5134    }
5135    // c:1067-1071 — trailing redirs.
5136    // c:1067 — `if (IS_REDIROP(tok)) { *cmplx = 1; while (...) (void)par_redir(&r, NULL); }`
5137    if IS_REDIROP(tok()) {
5138        *cmplx = 1;
5139        while IS_REDIROP(tok()) {
5140            let _ = par_redir_wordcode(&mut r, None);
5141        }
5142    }
5143    // c:1072-1075 — `incmdpos=1; incasepat=0; incond=0; intypeset=0;`
5144    set_incmdpos(true);
5145    set_incasepat(0);
5146    set_incond(0);
5147    set_intypeset(false);
5148    let _ = r;
5149    // c:1076 — `return 1;`
5150    true
5151}
5152
5153/// Port of `par_for(int *cmplx)` from `Src/parse.c:1086-1198`.
5154pub fn par_for_wordcode(cmplx: &mut i32) {
5155    // c:1089 — `int oecused = ecused, csh = (tok == FOREACH), p, sel = (tok == SELECT);`
5156    let _oecused = ECUSED.get() as usize;
5157    let csh = tok() == FOREACH;
5158    let sel = tok() == SELECT;
5159    let p: usize;
5160    // c:1090 — `int type;`
5161    let r#type: wordcode;
5162
5163    // c:1092 — `p = ecadd(0);`
5164    p = ecadd(0);
5165
5166    // c:1094 — `incmdpos = 0;`
5167    set_incmdpos(false);
5168    // c:1095 — `infor = tok == FOR ? 2 : 0;`
5169    set_infor(if tok() == FOR { 2 } else { 0 });
5170    // c:1096 — `zshlex();`
5171    zshlex();
5172    // c:1097 — `if (tok == DINPAR) {`
5173    if tok() == DINPAR {
5174        // c:1098 — `zshlex();`
5175        zshlex();
5176        // c:1099-1100 — `if (tok != DINPAR) YYERRORV(oecused);`
5177        if tok() != DINPAR {
5178            zerr("par_for: expected init");
5179            return;
5180        }
5181        // c:1101 — `ecstr(tokstr);`
5182        ecstr(&tokstr().unwrap_or_default());
5183        // c:1102 — `zshlex();`
5184        zshlex();
5185        // c:1103-1104
5186        if tok() != DINPAR {
5187            zerr("par_for: expected cond");
5188            return;
5189        }
5190        // c:1105
5191        ecstr(&tokstr().unwrap_or_default());
5192        // c:1106
5193        zshlex();
5194        // c:1107-1108
5195        if tok() != DOUTPAR {
5196            zerr("par_for: expected ))");
5197            return;
5198        }
5199        // c:1109
5200        ecstr(&tokstr().unwrap_or_default());
5201        // c:1110 — `infor = 0;`
5202        set_infor(0);
5203        // c:1111 — `incmdpos = 1;`
5204        set_incmdpos(true);
5205        // c:1112 — `zshlex();`
5206        zshlex();
5207        // c:1113 — `type = WC_FOR_COND;`
5208        r#type = WC_FOR_COND;
5209    } else {
5210        // c:1115 — `int np = 0, n, posix_in, ona = noaliases, onc = nocorrect;`
5211        let mut np: usize = 0;
5212        let mut n: u32;
5213        let posix_in: bool;
5214        let ona = noaliases();
5215        let onc = nocorrect();
5216        // c:1116 — `infor = 0;`
5217        set_infor(0);
5218        // c:1117-1118 — `if (tok != STRING || !isident(tokstr)) YYERRORV(oecused);`
5219        if tok() != STRING_LEX || !crate::ported::params::isident(&tokstr().unwrap_or_default()) {
5220            zerr("par_for: expected identifier");
5221            return;
5222        }
5223        // c:1119-1120 — `if (!sel) np = ecadd(0);`
5224        if !sel {
5225            np = ecadd(0);
5226        }
5227        // c:1121 — `n = 0;`
5228        n = 0;
5229        // c:1122 — `incmdpos = 1;`
5230        set_incmdpos(true);
5231        // c:1123 — `noaliases = nocorrect = 1;`
5232        set_noaliases(true);
5233        set_nocorrect(1);
5234        // c:1124 — `for (;;) {`
5235        loop {
5236            // c:1125 — `n++;`
5237            n += 1;
5238            // c:1126 — `ecstr(tokstr);`
5239            ecstr(&tokstr().unwrap_or_default());
5240            // c:1127 — `zshlex();`
5241            zshlex();
5242            // c:1128-1129 — `if (tok != STRING || !strcmp(tokstr, "in") || sel) break;`
5243            if tok() != STRING_LEX || tokstr().as_deref() == Some("in") || sel {
5244                break;
5245            }
5246            // c:1130-1135 — `if (!isident(tokstr) || errflag) { ... YYERRORV; }`
5247            if !crate::ported::params::isident(&tokstr().unwrap_or_default())
5248                || (errflag.load(Ordering::Relaxed) & 1) != 0
5249            {
5250                set_noaliases(ona);
5251                set_nocorrect(onc);
5252                zerr("par_for: expected identifier in name list");
5253                return;
5254            }
5255        }
5256        // c:1137-1138 — `noaliases = ona; nocorrect = onc;`
5257        set_noaliases(ona);
5258        set_nocorrect(onc);
5259        // c:1139-1140 — `if (!sel) ecbuf[np] = n;`
5260        if !sel {
5261            ECBUF.with_borrow_mut(|b| {
5262                b[np] = n;
5263            });
5264        }
5265        // c:1141 — `posix_in = isnewlin;`
5266        posix_in = isnewlin() != 0;
5267        // c:1142-1143 — `while (isnewlin) zshlex();`
5268        while isnewlin() != 0 {
5269            zshlex();
5270        }
5271        // c:1144 — `if (tok == STRING && !strcmp(tokstr, "in")) {`
5272        if tok() == STRING_LEX && tokstr().as_deref() == Some("in") {
5273            // c:1145 — `incmdpos = 0;`
5274            set_incmdpos(false);
5275            // c:1146 — `zshlex();`
5276            zshlex();
5277            // c:1147 — `np = ecadd(0);`
5278            np = ecadd(0);
5279            // c:1148 — `n = par_wordlist();`
5280            let n2 = par_wordlist_wordcode();
5281            // c:1149-1150 — `if (tok != SEPER) YYERRORV(oecused);`
5282            if tok() != SEPER {
5283                zerr("par_for: expected separator after `in`");
5284                return;
5285            }
5286            // c:1151 — `ecbuf[np] = n;`
5287            ECBUF.with_borrow_mut(|b| {
5288                b[np] = n2 as wordcode;
5289            });
5290            // c:1152 — `type = (sel ? WC_SELECT_LIST : WC_FOR_LIST);`
5291            r#type = if sel { WC_SELECT_LIST } else { WC_FOR_LIST };
5292        } else if !posix_in && tok() == INPAR_TOK {
5293            // c:1153-1154 — `else if (!posix_in && tok == INPAR)`
5294            // c:1154 — `incmdpos = 0;`
5295            set_incmdpos(false);
5296            // c:1155 — `zshlex();`
5297            zshlex();
5298            // c:1156 — `np = ecadd(0);`
5299            np = ecadd(0);
5300            // c:1157 — `n = par_nl_wordlist();`
5301            let n2 = par_nl_wordlist_wordcode();
5302            // c:1158-1159 — `if (tok != OUTPAR) YYERRORV(oecused);`
5303            if tok() != OUTPAR_TOK {
5304                zerr("par_for: expected `)`");
5305                return;
5306            }
5307            // c:1160 — `ecbuf[np] = n;`
5308            ECBUF.with_borrow_mut(|b| {
5309                b[np] = n2 as wordcode;
5310            });
5311            // c:1161 — `incmdpos = 1;`
5312            set_incmdpos(true);
5313            // c:1162 — `zshlex();`
5314            zshlex();
5315            // c:1163 — `type = (sel ? WC_SELECT_LIST : WC_FOR_LIST);`
5316            r#type = if sel { WC_SELECT_LIST } else { WC_FOR_LIST };
5317        } else {
5318            // c:1165 — `type = (sel ? WC_SELECT_PPARAM : WC_FOR_PPARAM);`
5319            r#type = if sel { WC_SELECT_PPARAM } else { WC_FOR_PPARAM };
5320        }
5321        let _ = np;
5322    }
5323    // c:1167 — `incmdpos = 1;`
5324    set_incmdpos(true);
5325    // c:1168-1169 — `while (tok == SEPER) zshlex();`
5326    while tok() == SEPER {
5327        zshlex();
5328    }
5329    // c:1170-1193 — body dispatch (inline in C, factored here for
5330    // reuse by par_while/par_repeat — same control flow, same calls).
5331    par_loop_body_wordcode(cmplx, csh);
5332    // c:1195-1197 — `ecbuf[p] = (sel ? WCB_SELECT(...) : WCB_FOR(...));`
5333    let used = ECUSED.get() as usize;
5334    let off = used.saturating_sub(1 + p) as wordcode;
5335    ECBUF.with_borrow_mut(|b| {
5336        b[p] = if sel {
5337            WCB_SELECT(r#type, off)
5338        } else {
5339            WCB_FOR(r#type, off)
5340        };
5341    });
5342}
5343
5344/// Port of `par_wordlist(void)` from `Src/parse.c:2361-2371` —
5345/// emits wordcode form. Returns the number of strings emitted.
5346fn par_wordlist_wordcode() -> u32 {
5347    // c:2364 — `int num = 0;`
5348    let mut num: u32 = 0;
5349    // c:2365 — `while (tok == STRING) {`
5350    while tok() == STRING_LEX {
5351        // c:2366 — `ecstr(tokstr);`
5352        ecstr(&tokstr().unwrap_or_default());
5353        // c:2367 — `num++;`
5354        num += 1;
5355        // c:2368 — `zshlex();`
5356        zshlex();
5357    }
5358    // c:2370 — `return num;`
5359    num
5360}
5361
5362/// Port of `par_nl_wordlist(void)` from `Src/parse.c:2378-2390` —
5363/// emits wordcode form. Like par_wordlist but tolerates SEPER
5364/// between words.
5365fn par_nl_wordlist_wordcode() -> u32 {
5366    // c:2381 — `int num = 0;`
5367    let mut num: u32 = 0;
5368    // c:2383 — `while (tok == STRING || tok == SEPER) {`
5369    while tok() == STRING_LEX || tok() == SEPER || tok() == NEWLIN {
5370        // c:2384-2387 — `if (tok != SEPER) { ecstr(tokstr); num++; }`
5371        if tok() == STRING_LEX {
5372            ecstr(&tokstr().unwrap_or_default());
5373            num += 1;
5374        }
5375        // c:2388 — `zshlex();`
5376        zshlex();
5377    }
5378    // c:2390 — `return num;`
5379    num
5380}
5381
5382/// Body dispatch shared by par_for / par_while / par_repeat.
5383/// Direct port of `Src/parse.c:1170-1194`.
5384fn par_loop_body_wordcode(cmplx: &mut i32, csh: bool) {
5385    if tok() == DOLOOP {
5386        zshlex();
5387        // c:1172 — `par_save_list(cmplx);`
5388        par_save_list_wordcode(cmplx);
5389        if tok() != DONE {
5390            zerr("missing `done`");
5391            return;
5392        }
5393        set_incmdpos(false);
5394        zshlex();
5395    } else if tok() == INBRACE_TOK {
5396        zshlex();
5397        // c:1179 — `par_save_list(cmplx);`
5398        par_save_list_wordcode(cmplx);
5399        if tok() != OUTBRACE_TOK {
5400            zerr("missing `}`");
5401            return;
5402        }
5403        set_incmdpos(false);
5404        zshlex();
5405    } else if csh || isset(CSHJUNKIELOOPS) {
5406        // c:1185 — `par_save_list(cmplx);`
5407        par_save_list_wordcode(cmplx);
5408        if tok() != ZEND {
5409            zerr("missing `end`");
5410            return;
5411        }
5412        set_incmdpos(false);
5413        zshlex();
5414    } else if unset(SHORTLOOPS) {
5415        zerr("short loop form requires SHORTLOOPS");
5416    } else {
5417        // c:1193 — `par_save_list1(cmplx);`
5418        par_save_list1_wordcode(cmplx);
5419    }
5420}
5421
5422/// `select` shares par_for body (c:983-985 routes SELECT to par_for).
5423pub fn par_select_wordcode(cmplx: &mut i32) {
5424    par_for_wordcode(cmplx);
5425}
5426
5427/// Port of `par_case(int *cmplx)` from `Src/parse.c:1208-1400`.
5428pub fn par_case_wordcode(_cmplx: &mut i32) {
5429    // c:1211 — `int oecused = ecused, brflag, p, pp, palts, type, nalts;`
5430    let _oecused = ECUSED.get() as usize;
5431    let brflag: bool;
5432    let p: usize;
5433    let mut pp: usize;
5434    let mut palts: usize;
5435    let mut r#type: wordcode;
5436    let mut nalts: u32;
5437    // c:1212 — `int ona, onc;`
5438    let ona: bool;
5439    let onc: i32;
5440
5441    // c:1214 — `p = ecadd(0);`
5442    p = ecadd(0);
5443
5444    // c:1216 — `incmdpos = 0;`
5445    set_incmdpos(false);
5446    // c:1217 — `zshlex();`
5447    zshlex();
5448    // c:1218-1219 — `if (tok != STRING) YYERRORV(oecused);`
5449    if tok() != STRING_LEX {
5450        zerr("par_case: expected scrutinee");
5451        return;
5452    }
5453    // c:1220 — `ecstr(tokstr);`
5454    ecstr(&tokstr().unwrap_or_default());
5455
5456    // c:1222 — `incmdpos = 1;`
5457    set_incmdpos(true);
5458    // c:1223-1224 — `ona = noaliases; onc = nocorrect;`
5459    ona = noaliases();
5460    onc = nocorrect();
5461    // c:1225 — `noaliases = nocorrect = 1;`
5462    set_noaliases(true);
5463    set_nocorrect(1);
5464    // c:1226 — `zshlex();`
5465    zshlex();
5466    // c:1227-1228 — `while (tok == SEPER) zshlex();`
5467    while tok() == SEPER {
5468        zshlex();
5469    }
5470    // c:1229 — `if (!(tok == STRING && !strcmp(tokstr, "in")) && tok != INBRACE)`
5471    if !(tok() == STRING_LEX && tokstr().as_deref() == Some("in")) && tok() != INBRACE_TOK {
5472        // c:1231-1233 — restore noaliases/nocorrect + ERROR
5473        set_noaliases(ona);
5474        set_nocorrect(onc);
5475        zerr("par_case: expected `in` or `{`");
5476        return;
5477    }
5478    // c:1235 — `brflag = (tok == INBRACE);`
5479    brflag = tok() == INBRACE_TOK;
5480    // c:1236 — `incasepat = 1;`
5481    set_incasepat(1);
5482    // c:1237 — `incmdpos = 0;`
5483    set_incmdpos(false);
5484    // c:1238-1239 — `noaliases = ona; nocorrect = onc;`
5485    set_noaliases(ona);
5486    set_nocorrect(onc);
5487    // c:1240 — `zshlex();`
5488    zshlex();
5489
5490    // c:1242 — `for (;;) {`
5491    'arms: loop {
5492        // c:1243 — `char *str;`
5493        let mut str: String;
5494        // c:1244 — `int skip_zshlex;`
5495        let skip_zshlex: bool;
5496
5497        // c:1246-1247 — `while (tok == SEPER) zshlex();`
5498        while tok() == SEPER {
5499            zshlex();
5500        }
5501        // c:1248-1249 — `if (tok == OUTBRACE) break;`
5502        if tok() == OUTBRACE_TOK {
5503            break 'arms;
5504        }
5505        // c:1250-1251 — `if (tok == INPAR) zshlex();`
5506        if tok() == INPAR_TOK {
5507            zshlex();
5508        }
5509        // c:1252-1254 — `if (tok == BAR) { str = ""; skip_zshlex = 1; }`
5510        if tok() == BAR_TOK {
5511            str = String::new();
5512            skip_zshlex = true;
5513        } else {
5514            // c:1256-1257 — `if (tok != STRING) YYERRORV(oecused);`
5515            if tok() != STRING_LEX {
5516                zerr("par_case: expected pattern");
5517                return;
5518            }
5519            // c:1258-1259 — `if (!strcmp(tokstr, "esac")) break;`
5520            if tokstr().as_deref() == Some("esac") {
5521                break 'arms;
5522            }
5523            // c:1260 — `str = dupstring(tokstr);`
5524            str = tokstr().unwrap_or_default();
5525            // c:1261 — `skip_zshlex = 0;`
5526            skip_zshlex = false;
5527        }
5528        // c:1263 — `type = WC_CASE_OR;`
5529        r#type = WC_CASE_OR;
5530        // c:1264-1266 — `pp = ecadd(0); palts = ecadd(0); nalts = 0;`
5531        pp = ecadd(0);
5532        palts = ecadd(0);
5533        nalts = 0;
5534        // c:1300 — `incasepat = -1;`
5535        set_incasepat(-1);
5536        // c:1301 — `incmdpos = 1;`
5537        set_incmdpos(true);
5538        // c:1302-1303 — `if (!skip_zshlex) zshlex();`
5539        if !skip_zshlex {
5540            zshlex();
5541        }
5542        // c:1304 — `for (;;) {`
5543        loop {
5544            // c:1305-1313 — `if (tok == OUTPAR) { ecstr(str);
5545            //   ecadd(ecnpats++); nalts++; incasepat = 0;
5546            //   incmdpos = 1; zshlex(); break; }`
5547            if tok() == OUTPAR_TOK {
5548                ecstr(&str);
5549                let np = ECNPATS.with(|cc| {
5550                    let v = cc.get();
5551                    cc.set(v + 1);
5552                    v
5553                }) as u32;
5554                ecadd(np);
5555                nalts += 1;
5556                set_incasepat(0);
5557                set_incmdpos(true);
5558                zshlex();
5559                break;
5560            }
5561            // c:1314-1320 — `else if (tok == BAR) { ecstr(str);
5562            //   ecadd(ecnpats++); nalts++; incasepat = 1;
5563            //   incmdpos = 0; }`
5564            else if tok() == BAR_TOK {
5565                ecstr(&str);
5566                let np = ECNPATS.with(|cc| {
5567                    let v = cc.get();
5568                    cc.set(v + 1);
5569                    v
5570                }) as u32;
5571                ecadd(np);
5572                nalts += 1;
5573                set_incasepat(1);
5574                set_incmdpos(false);
5575            }
5576            // c:1321-1357 — else { ... `(...)` whole-pattern hack
5577            // (Inpar at str[0]); else YYERRORV. Not yet ported —
5578            // err out on unexpected. }
5579            else {
5580                zerr("par_case: expected `)` or `|`");
5581                return;
5582            }
5583
5584            // c:1359 — `zshlex();`
5585            zshlex();
5586            // c:1360-1377 — switch on next tok.
5587            match tok() {
5588                STRING_LEX => {
5589                    // c:1361-1365
5590                    str = tokstr().unwrap_or_default();
5591                    zshlex();
5592                }
5593                OUTPAR_TOK | BAR_TOK => {
5594                    // c:1367-1371 — empty string
5595                    str = String::new();
5596                }
5597                _ => {
5598                    // c:1374-1376 — `YYERRORV(oecused);`
5599                    zerr("par_case: expected pattern, `)` or `|`");
5600                    return;
5601                }
5602            }
5603        }
5604        // c:1379 — `incasepat = 0;`
5605        set_incasepat(0);
5606        // c:1380 — `par_save_list(cmplx);`
5607        par_save_list_wordcode(_cmplx);
5608        // c:1381-1384 — terminator → arm type
5609        if tok() == SEMIAMP {
5610            r#type = WC_CASE_AND;
5611        } else if tok() == SEMIBAR {
5612            r#type = WC_CASE_TESTAND;
5613        }
5614        // c:1385 — `ecbuf[pp] = WCB_CASE(type, ecused - 1 - pp);`
5615        let used = ECUSED.get() as usize;
5616        ECBUF.with_borrow_mut(|b| {
5617            b[pp] = WCB_CASE(r#type, (used.saturating_sub(1 + pp)) as wordcode);
5618        });
5619        // c:1386 — `ecbuf[palts] = nalts;`
5620        ECBUF.with_borrow_mut(|b| {
5621            b[palts] = nalts;
5622        });
5623        // c:1387-1388 — terminator (ESAC w/o brace OR OUTBRACE w/ brace) → break
5624        if (tok() == ESAC && !brflag) || (tok() == OUTBRACE_TOK && brflag) {
5625            break 'arms;
5626        }
5627        // c:1389-1390 — `if (tok != DSEMI && tok != SEMIAMP && tok != SEMIBAR) YYERRORV;`
5628        if tok() != DSEMI && tok() != SEMIAMP && tok() != SEMIBAR {
5629            zerr("par_case: expected `;;`, `;&`, or `;|`");
5630            return;
5631        }
5632        // c:1391 — `incasepat = 1;`
5633        set_incasepat(1);
5634        // c:1392 — `incmdpos = 0;`
5635        set_incmdpos(false);
5636        // c:1393 — `zshlex();`
5637        zshlex();
5638    }
5639    // c:1395 — `incmdpos = 1;`
5640    set_incmdpos(true);
5641    // c:1396 — `incasepat = 0;`
5642    set_incasepat(0);
5643    // c:1397 — `zshlex();`
5644    zshlex();
5645
5646    // c:1399 — `ecbuf[p] = WCB_CASE(WC_CASE_HEAD, ecused - 1 - p);`
5647    let used = ECUSED.get() as usize;
5648    ECBUF.with_borrow_mut(|b| {
5649        b[p] = WCB_CASE(WC_CASE_HEAD, (used.saturating_sub(1 + p)) as wordcode);
5650    });
5651}
5652
5653/// Port of `par_if(int *cmplx)` from `Src/parse.c:1410-1512`.
5654pub fn par_if_wordcode(cmplx: &mut i32) {
5655    // c:1413 — `int oecused = ecused, p, pp, type, usebrace = 0;`
5656    let _oecused = ECUSED.get() as usize;
5657    let p: usize;
5658    let mut pp: usize = 0;
5659    let mut r#type: wordcode = WC_IF_IF;
5660    let mut usebrace: i32 = 0;
5661    // c:1414 — `enum lextok xtok;`
5662    let mut xtok: lextok;
5663    // c:1415 — `unsigned char nc;`
5664    let nc: u8;
5665    let _ = nc;
5666
5667    // c:1417 — `p = ecadd(0);`
5668    p = ecadd(0);
5669
5670    // c:1419 — `for (;;) {`
5671    loop {
5672        // c:1420 — `xtok = tok;`
5673        xtok = tok();
5674        // c:1421 — `cmdpush(xtok == IF ? CS_IF : CS_ELIF);`
5675        cmdpush(if xtok == IF {
5676            CS_IF as u8
5677        } else {
5678            CS_ELIF as u8
5679        });
5680        // c:1422-1426 — `if (xtok == FI) { incmdpos = 0; zshlex(); break; }`
5681        if xtok == FI {
5682            set_incmdpos(false);
5683            zshlex();
5684            break;
5685        }
5686        // c:1427 — `zshlex();`
5687        zshlex();
5688        // c:1428-1429 — `if (xtok == ELSE) break;`
5689        if xtok == ELSE {
5690            break;
5691        }
5692        // c:1430-1431 — `while (tok == SEPER) zshlex();`
5693        while tok() == SEPER {
5694            zshlex();
5695        }
5696        // c:1432-1435 — `if (!(xtok == IF || xtok == ELIF)) { cmdpop(); YYERRORV; }`
5697        if !(xtok == IF || xtok == ELIF) {
5698            cmdpop();
5699            zerr("par_if: expected `if` or `elif`");
5700            return;
5701        }
5702        // c:1436 — `pp = ecadd(0);`
5703        pp = ecadd(0);
5704        // c:1437 — `type = (xtok == IF ? WC_IF_IF : WC_IF_ELIF);`
5705        r#type = if xtok == IF { WC_IF_IF } else { WC_IF_ELIF };
5706        // c:1438 — `par_save_list(cmplx);` — condition body
5707        par_save_list_wordcode(cmplx);
5708        // c:1439 — `incmdpos = 1;`
5709        set_incmdpos(true);
5710        // c:1440-1443 — `if (tok == ENDINPUT) { cmdpop(); YYERRORV; }`
5711        if tok() == ENDINPUT {
5712            cmdpop();
5713            zerr("par_if: unexpected end-of-input after condition");
5714            return;
5715        }
5716        // c:1444-1445 — `while (tok == SEPER) zshlex();`
5717        while tok() == SEPER {
5718            zshlex();
5719        }
5720        // c:1446 — `xtok = FI;` — pre-set so the post-loop check works
5721        xtok = FI;
5722        // c:1447 — `nc = cmdstack[cmdsp - 1] == CS_IF ? CS_IFTHEN : CS_ELIFTHEN;`
5723        // (Not tracked separately in zshrs cmdstack — derive from cur top
5724        // by reading CMDSTACK; for safety use CS_IFTHEN as default.)
5725        // We don't have a way to read top easily — match by tracking
5726        // whether we just pushed CS_IF or CS_ELIF.
5727        // For wordcode emission this only affects cmdstack debug output;
5728        // not the emitted wordcode. Use CS_IFTHEN.
5729        let nc_local: u8 = CS_IFTHEN as u8;
5730        if tok() == THEN {
5731            // c:1448-1456 — THEN branch
5732            // c:1449 — `usebrace = 0;`
5733            usebrace = 0;
5734            // c:1450 — `cmdpop();`
5735            cmdpop();
5736            // c:1451 — `cmdpush(nc);`
5737            cmdpush(nc_local);
5738            // c:1452 — `zshlex();`
5739            zshlex();
5740            // c:1453 — `par_save_list(cmplx);` — then body
5741            par_save_list_wordcode(cmplx);
5742            // c:1454 — `ecbuf[pp] = WCB_IF(type, ecused - 1 - pp);`
5743            let used = ECUSED.get() as usize;
5744            ECBUF.with_borrow_mut(|b| {
5745                b[pp] = WCB_IF(r#type, (used.saturating_sub(1 + pp)) as wordcode);
5746            });
5747            // c:1455 — `incmdpos = 1;`
5748            set_incmdpos(true);
5749            // c:1456 — `cmdpop();`
5750            cmdpop();
5751        } else if tok() == INBRACE_TOK {
5752            // c:1457-1473 — INBRACE branch
5753            // c:1458 — `usebrace = 1;`
5754            usebrace = 1;
5755            // c:1459 — `cmdpop();`
5756            cmdpop();
5757            // c:1460 — `cmdpush(nc);`
5758            cmdpush(nc_local);
5759            // c:1461 — `zshlex();`
5760            zshlex();
5761            // c:1462 — `par_save_list(cmplx);`
5762            par_save_list_wordcode(cmplx);
5763            // c:1463-1466 — `if (tok != OUTBRACE) { cmdpop(); YYERRORV; }`
5764            if tok() != OUTBRACE_TOK {
5765                cmdpop();
5766                zerr("par_if: expected `}`");
5767                return;
5768            }
5769            // c:1467 — `ecbuf[pp] = WCB_IF(type, ecused - 1 - pp);`
5770            let used = ECUSED.get() as usize;
5771            ECBUF.with_borrow_mut(|b| {
5772                b[pp] = WCB_IF(r#type, (used.saturating_sub(1 + pp)) as wordcode);
5773            });
5774            // c:1469 — `zshlex();`
5775            zshlex();
5776            // c:1470 — `incmdpos = 1;`
5777            set_incmdpos(true);
5778            // c:1471-1472 — `if (tok == SEPER) break;`
5779            if tok() == SEPER {
5780                break;
5781            }
5782            // c:1473 — `cmdpop();`
5783            cmdpop();
5784        } else if unset(SHORTLOOPS) {
5785            // c:1474-1476 — `cmdpop(); YYERRORV;`
5786            cmdpop();
5787            zerr("par_if: short body requires SHORTLOOPS");
5788            return;
5789        } else {
5790            // c:1477-1484 — short loop form
5791            // c:1478 — `cmdpop();`
5792            cmdpop();
5793            // c:1479 — `cmdpush(nc);`
5794            cmdpush(nc_local);
5795            // c:1480 — `par_save_list1(cmplx);`
5796            par_save_list1_wordcode(cmplx);
5797            // c:1481 — `ecbuf[pp] = WCB_IF(type, ecused - 1 - pp);`
5798            let used = ECUSED.get() as usize;
5799            ECBUF.with_borrow_mut(|b| {
5800                b[pp] = WCB_IF(r#type, (used.saturating_sub(1 + pp)) as wordcode);
5801            });
5802            // c:1482 — `incmdpos = 1;`
5803            set_incmdpos(true);
5804            // c:1483 — `break;`
5805            break;
5806        }
5807    }
5808    // c:1486 — `cmdpop();`
5809    cmdpop();
5810    // c:1487 — `if (xtok == ELSE || tok == ELSE) {`
5811    if xtok == ELSE || tok() == ELSE {
5812        // c:1488 — `pp = ecadd(0);`
5813        pp = ecadd(0);
5814        // c:1489 — `cmdpush(CS_ELSE);`
5815        cmdpush(CS_ELSE as u8);
5816        // c:1490-1491 — `while (tok == SEPER) zshlex();`
5817        while tok() == SEPER {
5818            zshlex();
5819        }
5820        // c:1492-1498 — `if (tok == INBRACE && usebrace) { ... } else { ... }`
5821        if tok() == INBRACE_TOK && usebrace != 0 {
5822            // c:1493 — `zshlex();`
5823            zshlex();
5824            // c:1494 — `par_save_list(cmplx);`
5825            par_save_list_wordcode(cmplx);
5826            // c:1495-1498 — `if (tok != OUTBRACE) { cmdpop(); YYERRORV; }`
5827            if tok() != OUTBRACE_TOK {
5828                cmdpop();
5829                zerr("par_if: else expected `}`");
5830                return;
5831            }
5832        } else {
5833            // c:1500 — `par_save_list(cmplx);`
5834            par_save_list_wordcode(cmplx);
5835            // c:1501-1504 — `if (tok != FI) { cmdpop(); YYERRORV; }`
5836            if tok() != FI {
5837                cmdpop();
5838                zerr("par_if: else expected `fi`");
5839                return;
5840            }
5841        }
5842        // c:1506 — `incmdpos = 0;`
5843        set_incmdpos(false);
5844        // c:1507 — `ecbuf[pp] = WCB_IF(WC_IF_ELSE, ecused - 1 - pp);`
5845        let used = ECUSED.get() as usize;
5846        ECBUF.with_borrow_mut(|b| {
5847            b[pp] = WCB_IF(WC_IF_ELSE, (used.saturating_sub(1 + pp)) as wordcode);
5848        });
5849        // c:1508 — `zshlex();`
5850        zshlex();
5851        // c:1509 — `cmdpop();`
5852        cmdpop();
5853    }
5854    // c:1511 — `ecbuf[p] = WCB_IF(WC_IF_HEAD, ecused - 1 - p);`
5855    let used = ECUSED.get() as usize;
5856    ECBUF.with_borrow_mut(|b| {
5857        b[p] = WCB_IF(WC_IF_HEAD, (used.saturating_sub(1 + p)) as wordcode);
5858    });
5859}
5860
5861/// Port of `par_while(int *cmplx)` from `Src/parse.c:1520-1557`.
5862pub fn par_while_wordcode(cmplx: &mut i32) {
5863    // c:1523 — `int oecused = ecused, p;`
5864    let _oecused = ECUSED.get() as usize;
5865    let p: usize;
5866    // c:1524 — `int type = (tok == UNTIL ? WC_WHILE_UNTIL : WC_WHILE_WHILE);`
5867    let r#type: wordcode = if tok() == UNTIL {
5868        WC_WHILE_UNTIL
5869    } else {
5870        WC_WHILE_WHILE
5871    };
5872
5873    // c:1526 — `p = ecadd(0);`
5874    p = ecadd(0);
5875    // c:1527 — `zshlex();`
5876    zshlex();
5877    // c:1528 — `par_save_list(cmplx);` — condition.
5878    par_save_list_wordcode(cmplx);
5879    // c:1529 — `incmdpos = 1;`
5880    set_incmdpos(true);
5881    // c:1530-1531 — `while (tok == SEPER) zshlex();`
5882    while tok() == SEPER {
5883        zshlex();
5884    }
5885    // c:1532-1545 — body dispatch (inlined in C; we factor via
5886    // par_loop_body_wordcode since for/while/repeat share this
5887    // identical block).
5888    if tok() == DOLOOP {
5889        // c:1533 — `zshlex();`
5890        zshlex();
5891        // c:1534 — `par_save_list(cmplx);`
5892        par_save_list_wordcode(cmplx);
5893        // c:1535-1536 — `if (tok != DONE) YYERRORV(oecused);`
5894        if tok() != DONE {
5895            zerr("par_while: expected `done`");
5896            return;
5897        }
5898        // c:1537 — `incmdpos = 0;`
5899        set_incmdpos(false);
5900        // c:1538 — `zshlex();`
5901        zshlex();
5902    } else if tok() == INBRACE_TOK {
5903        // c:1540 — `zshlex();`
5904        zshlex();
5905        // c:1541 — `par_save_list(cmplx);`
5906        par_save_list_wordcode(cmplx);
5907        // c:1542-1543 — `if (tok != OUTBRACE) YYERRORV(oecused);`
5908        if tok() != OUTBRACE_TOK {
5909            zerr("par_while: expected `}`");
5910            return;
5911        }
5912        // c:1544 — `incmdpos = 0;`
5913        set_incmdpos(false);
5914        // c:1545 — `zshlex();`
5915        zshlex();
5916    } else if isset(CSHJUNKIELOOPS) {
5917        // c:1546-1550
5918        par_save_list_wordcode(cmplx);
5919        if tok() != ZEND {
5920            zerr("par_while: expected `end`");
5921            return;
5922        }
5923        zshlex();
5924    } else if unset(SHORTLOOPS) {
5925        // c:1551-1552 — `YYERRORV(oecused);`
5926        zerr("par_while: short body requires SHORTLOOPS");
5927        return;
5928    } else {
5929        // c:1554 — `par_save_list1(cmplx);`
5930        par_save_list1_wordcode(cmplx);
5931    }
5932
5933    // c:1556 — `ecbuf[p] = WCB_WHILE(type, ecused - 1 - p);`
5934    let used = ECUSED.get() as usize;
5935    ECBUF.with_borrow_mut(|b| {
5936        b[p] = WCB_WHILE(r#type, (used.saturating_sub(1 + p)) as wordcode);
5937    });
5938}
5939
5940/// `until` shares par_while body — tok==UNTIL flips the type.
5941pub fn par_until_wordcode(cmplx: &mut i32) {
5942    par_while_wordcode(cmplx);
5943}
5944
5945/// Port of `par_repeat(int *cmplx)` from `Src/parse.c:1564-1606`.
5946pub fn par_repeat_wordcode(cmplx: &mut i32) {
5947    // c:1567 — `/* ### what to do about inrepeat_ here? */`
5948    // c:1568 — `int oecused = ecused, p;`
5949    let _oecused = ECUSED.get() as usize;
5950    let p: usize;
5951
5952    // c:1570 — `p = ecadd(0);`
5953    p = ecadd(0);
5954
5955    // c:1572 — `incmdpos = 0;`
5956    set_incmdpos(false);
5957    // c:1573 — `zshlex();`
5958    zshlex();
5959    // c:1574-1575 — `if (tok != STRING) YYERRORV(oecused);`
5960    if tok() != STRING_LEX {
5961        zerr("par_repeat: expected count");
5962        return;
5963    }
5964    // c:1576 — `ecstr(tokstr);`
5965    ecstr(&tokstr().unwrap_or_default());
5966    // c:1577 — `incmdpos = 1;`
5967    set_incmdpos(true);
5968    // c:1578 — `zshlex();`
5969    zshlex();
5970    // c:1579-1580 — `while (tok == SEPER) zshlex();`
5971    while tok() == SEPER {
5972        zshlex();
5973    }
5974    // c:1581-1604 — body dispatch (inlined here matching C exactly).
5975    if tok() == DOLOOP {
5976        // c:1582-1587
5977        zshlex();
5978        par_save_list_wordcode(cmplx);
5979        if tok() != DONE {
5980            zerr("par_repeat: expected `done`");
5981            return;
5982        }
5983        set_incmdpos(false);
5984        zshlex();
5985    } else if tok() == INBRACE_TOK {
5986        // c:1589-1594
5987        zshlex();
5988        par_save_list_wordcode(cmplx);
5989        if tok() != OUTBRACE_TOK {
5990            zerr("par_repeat: expected `}`");
5991            return;
5992        }
5993        set_incmdpos(false);
5994        zshlex();
5995    } else if isset(CSHJUNKIELOOPS) {
5996        // c:1596-1599
5997        par_save_list_wordcode(cmplx);
5998        if tok() != ZEND {
5999            zerr("par_repeat: expected `end`");
6000            return;
6001        }
6002        zshlex();
6003    } else if unset(SHORTLOOPS) && unset(SHORTREPEAT) {
6004        // c:1601-1602 — par_repeat needs BOTH SHORTLOOPS and SHORTREPEAT
6005        // unset to refuse short form (more permissive than par_while).
6006        zerr("par_repeat: short body requires SHORTLOOPS or SHORTREPEAT");
6007        return;
6008    } else {
6009        // c:1604 — `par_save_list1(cmplx);`
6010        par_save_list1_wordcode(cmplx);
6011    }
6012
6013    // c:1606 — `ecbuf[p] = WCB_REPEAT(ecused - 1 - p);`
6014    let used = ECUSED.get() as usize;
6015    ECBUF.with_borrow_mut(|b| {
6016        b[p] = WCB_REPEAT((used.saturating_sub(1 + p)) as wordcode);
6017    });
6018}
6019
6020/// Port of `par_funcdef(int *cmplx)` from `Src/parse.c:1672-1779`.
6021///
6022/// The `function NAME { ... }` form. Emits a WCB_FUNCDEF header
6023/// followed by a names-count slot, the names themselves, four
6024/// metadata slots (string-area start, string-area length, npats,
6025/// do_tracing), then the body wordcode, then WCB_END.
6026///
6027/// Critical: saves/resets `ecnpats` + `ecssub` + `ecsoffs` around
6028/// the body parse so per-function pattern counts don't leak into
6029/// the enclosing scope's `ecnpats` accumulator (parse.c:1723-1758).
6030pub fn par_funcdef_wordcode(cmplx: &mut i32) {
6031    // c:1674 — `int oecused = ecused, num = 0, onp, p, c = 0;`
6032    let _oecused = ECUSED.get() as usize;
6033    let mut num: i32 = 0;
6034    let onp: i32;
6035    let p: usize;
6036    let mut c: i32 = 0;
6037    // c:1675 — `int so, oecssub = ecssub;`
6038    let so: i32;
6039    let oecssub = ECSSUB.get();
6040    // c:1676 — `zlong oldlineno = lineno;`
6041    let oldlineno = lineno();
6042    // c:1677 — `int do_tracing = 0;`
6043    let mut do_tracing: i32 = 0;
6044
6045    // c:1679 — `lineno = 0;`
6046    set_lineno(0);
6047    // c:1680 — `nocorrect = 1;`
6048    set_nocorrect(1);
6049    // c:1681 — `incmdpos = 0;`
6050    set_incmdpos(false);
6051    // c:1682 — `zshlex();`
6052    zshlex();
6053
6054    // c:1684 — `p = ecadd(0);`
6055    p = ecadd(0);
6056    // c:1685 — `ecadd(0); /* p + 1 */`
6057    let p1 = ecadd(0);
6058
6059    // c:1687-1699 — `Consume an initial (-T), (--), or (-T --).`
6060    // c:1690 — `if (tok == STRING && tokstr[0] == Dash) {`
6061    if tok() == STRING_LEX {
6062        let s = tokstr().unwrap_or_default();
6063        let bytes = s.as_bytes();
6064        // C: `tokstr[0] == Dash` (Dash = 0x9b = 0xc2 0x9b in UTF-8).
6065        // First byte of UTF-8 `\u{9b}` is 0xc2; the char `'-'` is 0x2d.
6066        // Match either form.
6067        let first_is_dash = (bytes.len() >= 2 && bytes[0] == 0xc2 && bytes[1] == 0x9b)
6068            || (bytes.len() >= 1 && bytes[0] == b'-');
6069        if first_is_dash {
6070            // c:1691-1694 — `if (tokstr[1] == 'T' && !tokstr[2]) { ++do_tracing; zshlex(); }`
6071            // After the leading dash byte(s), check remaining bytes.
6072            let after_dash = if bytes.len() >= 2 && bytes[0] == 0xc2 && bytes[1] == 0x9b {
6073                &bytes[2..]
6074            } else {
6075                &bytes[1..]
6076            };
6077            if after_dash.len() == 1 && after_dash[0] == b'T' {
6078                do_tracing += 1;
6079                zshlex();
6080            }
6081            // c:1695-1698 — `if (tok == STRING && tokstr[0] == Dash &&
6082            //                  tokstr[1] == Dash && !tokstr[2]) zshlex();`
6083            if tok() == STRING_LEX {
6084                let s2 = tokstr().unwrap_or_default();
6085                let b2 = s2.as_bytes();
6086                let mut idx = 0;
6087                let mut dashes = 0;
6088                while idx < b2.len() && dashes < 2 {
6089                    if b2[idx] == 0xc2 && idx + 1 < b2.len() && b2[idx + 1] == 0x9b {
6090                        idx += 2;
6091                        dashes += 1;
6092                    } else if b2[idx] == b'-' {
6093                        idx += 1;
6094                        dashes += 1;
6095                    } else {
6096                        break;
6097                    }
6098                }
6099                if dashes == 2 && idx == b2.len() {
6100                    zshlex();
6101                }
6102            }
6103        }
6104    }
6105
6106    // c:1701-1709 — names loop.
6107    // `while (tok == STRING) { if ((*tokstr == Inbrace || *tokstr == '{')
6108    //   && !tokstr[1]) { tok = INBRACE; break; } ecstr(tokstr); num++; zshlex(); }`
6109    while tok() == STRING_LEX {
6110        let s = tokstr().unwrap_or_default();
6111        let bytes = s.as_bytes();
6112        // First byte tests for Inbrace marker (0x8f → UTF-8 `0xc2 0x8f`) or `{`,
6113        // and length-1 check (`!tokstr[1]`).
6114        let is_inbrace_only = (bytes.len() == 1 && bytes[0] == b'{')
6115            || (bytes.len() == 2 && bytes[0] == 0xc2 && bytes[1] == 0x8f);
6116        if is_inbrace_only {
6117            set_tok(INBRACE_TOK);
6118            break;
6119        }
6120        ecstr(&s);
6121        num += 1;
6122        zshlex();
6123    }
6124
6125    // c:1711-1714 — four metadata placeholder slots.
6126    let m2 = ecadd(0);
6127    let m3 = ecadd(0);
6128    let m4 = ecadd(0);
6129    let m5 = ecadd(0);
6130
6131    // c:1716 — `nocorrect = 0;`
6132    set_nocorrect(0);
6133    // c:1717 — `incmdpos = 1;`
6134    set_incmdpos(true);
6135    // c:1718-1719 — `if (tok == INOUTPAR) zshlex();`
6136    if tok() == INOUTPAR {
6137        zshlex();
6138    }
6139    // c:1720-1721 — `while (tok == SEPER) zshlex();`
6140    while tok() == SEPER {
6141        zshlex();
6142    }
6143
6144    // c:1723 — `ecnfunc++;`
6145    ECNFUNC.set(ECNFUNC.get() + 1);
6146    // c:1724 — `ecssub = so = ecsoffs;`
6147    so = ECSOFFS.get();
6148    ECSSUB.set(so);
6149    // c:1725 — `onp = ecnpats;`
6150    onp = ECNPATS.with(|cc| cc.get());
6151    // c:1726 — `ecnpats = 0;`
6152    ECNPATS.with(|cc| cc.set(0));
6153
6154    // c:1728 — `if (tok == INBRACE) {`
6155    if tok() == INBRACE_TOK {
6156        // c:1729 — `zshlex();`
6157        zshlex();
6158        // c:1730 — `par_list(&c);`
6159        par_list_wordcode(&mut c);
6160        // c:1731-1736 — `if (tok != OUTBRACE) { lineno += oldlineno; ... }`
6161        if tok() != OUTBRACE_TOK {
6162            set_lineno(lineno() + oldlineno);
6163            ECNPATS.with(|cc| cc.set(onp));
6164            ECSSUB.set(oecssub);
6165            zerr("par_funcdef: expected `}`");
6166            return;
6167        }
6168        // c:1737-1740 — `if (num == 0) { incmdpos = 0; }`
6169        if num == 0 {
6170            set_incmdpos(false);
6171        }
6172        // c:1741 — `zshlex();`
6173        zshlex();
6174    } else if unset(SHORTLOOPS) {
6175        // c:1742-1746 — `lineno += oldlineno; ecnpats = onp; ecssub = oecssub; YYERRORV`
6176        set_lineno(lineno() + oldlineno);
6177        ECNPATS.with(|cc| cc.set(onp));
6178        ECSSUB.set(oecssub);
6179        zerr("par_funcdef: short body requires SHORTLOOPS");
6180        return;
6181    } else {
6182        // c:1748 — `par_list1(&c);`
6183        par_list1_wordcode(&mut c);
6184    }
6185
6186    // c:1750 — `ecadd(WCB_END());`
6187    ecadd(WCB_END());
6188    // c:1751-1754 — fill the 4 metadata slots
6189    let cur_sofs = ECSOFFS.get();
6190    let body_npats = ECNPATS.with(|cc| cc.get());
6191    ECBUF.with_borrow_mut(|b| {
6192        b[m2] = (so - oecssub) as wordcode;
6193        b[m3] = (cur_sofs - so) as wordcode;
6194        b[m4] = body_npats as wordcode;
6195        b[m5] = do_tracing as wordcode;
6196    });
6197    // c:1755 — `ecbuf[p + 1] = num;`
6198    ECBUF.with_borrow_mut(|b| {
6199        b[p1] = num as wordcode;
6200    });
6201
6202    // c:1757 — `ecnpats = onp;`
6203    ECNPATS.with(|cc| cc.set(onp));
6204    // c:1758 — `ecssub = oecssub;`
6205    ECSSUB.set(oecssub);
6206    // c:1759 — `ecnfunc++;`
6207    ECNFUNC.set(ECNFUNC.get() + 1);
6208
6209    // c:1761 — `ecbuf[p] = WCB_FUNCDEF(ecused - 1 - p);`
6210    let used = ECUSED.get() as usize;
6211    ECBUF.with_borrow_mut(|b| {
6212        b[p] = WCB_FUNCDEF((used.saturating_sub(1 + p)) as wordcode);
6213    });
6214
6215    // c:1763-1777 — anonymous-function trailing args (num == 0 case).
6216    if num == 0 {
6217        // c:1766 — `int parg = ecadd(0);`
6218        let parg = ecadd(0);
6219        // c:1767 — `ecadd(0);`
6220        ecadd(0);
6221        // c:1768-1772 — `while (tok == STRING) { ecstr(tokstr); num++; zshlex(); }`
6222        while tok() == STRING_LEX {
6223            ecstr(&tokstr().unwrap_or_default());
6224            num += 1;
6225            zshlex();
6226        }
6227        // c:1773-1774 — `if (num > 0) *cmplx = 1;`
6228        if num > 0 {
6229            *cmplx = 1;
6230        }
6231        // c:1775 — `ecbuf[parg] = ecused - parg;`
6232        // c:1776 — `ecbuf[parg+1] = num;`
6233        let used2 = ECUSED.get() as usize;
6234        ECBUF.with_borrow_mut(|b| {
6235            b[parg] = (used2 - parg) as wordcode;
6236            b[parg + 1] = num as wordcode;
6237        });
6238    }
6239    // c:1778 — `lineno += oldlineno;`
6240    set_lineno(lineno() + oldlineno);
6241}
6242
6243/// Size of `struct fdhead` in `wordcode` (u32) units. Used by all
6244/// the header-walk macros below.
6245pub const FDHEAD_WORDS: usize = size_of::<fdhead>() / 4;
6246
6247/// `Src/parse.c:1619-1665`. Handles both `(...)` subshell and
6248/// `{...}` brace group (cursh) plus optional `always { ... }`
6249/// trailing block. C uses a single function with `zsh_construct=1`
6250/// for `{...}` and 0 for `(...)`.
6251pub fn par_subsh_wordcode(cmplx: &mut i32, zsh_construct: i32) {
6252    // c:1621 — `enum lextok otok = tok;`
6253    let otok = tok();
6254    // c:1622 — `int oecused = ecused, p, pp;`
6255    let _oecused = ECUSED.get() as usize;
6256    let p: usize;
6257    let pp: usize;
6258
6259    // c:1624 — `p = ecadd(0);`
6260    p = ecadd(0);
6261    // c:1625 — `/* Extra word only needed for always block */`
6262    // c:1626 — `pp = ecadd(0);`
6263    pp = ecadd(0);
6264    // c:1627 — `zshlex();`
6265    zshlex();
6266    // c:1628 — `par_list(cmplx);`
6267    par_list_wordcode(cmplx);
6268    // c:1629 — `ecadd(WCB_END());`
6269    ecadd(WCB_END());
6270    // c:1630-1631 — `if (tok != ((otok == INPAR) ? OUTPAR : OUTBRACE))
6271    // YYERRORV(oecused);`
6272    if tok()
6273        != (if otok == INPAR_TOK {
6274            OUTPAR_TOK
6275        } else {
6276            OUTBRACE_TOK
6277        })
6278    {
6279        zerr("par_subsh: missing closing token");
6280        return;
6281    }
6282    // c:1632 — `incmdpos = !zsh_construct;`
6283    set_incmdpos(zsh_construct == 0);
6284    // c:1633 — `zshlex();`
6285    zshlex();
6286
6287    // c:1635 — `/* Optional always block. No intervening SEPERs allowed. */`
6288    // c:1636 — `if (otok == INBRACE && tok == STRING && !strcmp(tokstr, "always")) {`
6289    if otok == INBRACE_TOK && tok() == STRING_LEX && tokstr().as_deref() == Some("always") {
6290        // c:1637 — `ecbuf[pp] = WCB_TRY(ecused - 1 - pp);`
6291        let used = ECUSED.get() as usize;
6292        ECBUF.with_borrow_mut(|b| {
6293            b[pp] = WCB_TRY((used.saturating_sub(1 + pp)) as wordcode);
6294        });
6295        // c:1638 — `incmdpos = 1;`
6296        set_incmdpos(true);
6297        // c:1639-1641 — `do { zshlex(); } while (tok == SEPER);`
6298        loop {
6299            zshlex();
6300            if tok() != SEPER {
6301                break;
6302            }
6303        }
6304
6305        // c:1643-1644 — `if (tok != INBRACE) YYERRORV(oecused);`
6306        if tok() != INBRACE_TOK {
6307            zerr("par_subsh: 'always' expects `{`");
6308            return;
6309        }
6310        // c:1645 — `cmdpop();`
6311        cmdpop();
6312        // c:1646 — `cmdpush(CS_ALWAYS);`
6313        cmdpush(CS_ALWAYS as u8);
6314
6315        // c:1648 — `zshlex();`
6316        zshlex();
6317        // c:1649 — `par_save_list(cmplx);`
6318        par_save_list_wordcode(cmplx);
6319        // c:1650-1651 — `while (tok == SEPER) zshlex();`
6320        while tok() == SEPER {
6321            zshlex();
6322        }
6323
6324        // c:1653 — `incmdpos = 1;`
6325        set_incmdpos(true);
6326
6327        // c:1655-1656 — `if (tok != OUTBRACE) YYERRORV(oecused);`
6328        if tok() != OUTBRACE_TOK {
6329            zerr("par_subsh: 'always' block missing `}`");
6330            return;
6331        }
6332        // c:1657 — `zshlex();`
6333        zshlex();
6334        // c:1658 — `ecbuf[p] = WCB_TRY(ecused - 1 - p);`
6335        let used = ECUSED.get() as usize;
6336        ECBUF.with_borrow_mut(|b| {
6337            b[p] = WCB_TRY((used.saturating_sub(1 + p)) as wordcode);
6338        });
6339    } else {
6340        // c:1660-1661 — `ecbuf[p] = (otok == INPAR ? WCB_SUBSH(...) : WCB_CURSH(...));`
6341        let used = ECUSED.get() as usize;
6342        let off = used.saturating_sub(1 + p);
6343        ECBUF.with_borrow_mut(|b| {
6344            b[p] = if otok == INPAR_TOK {
6345                WCB_SUBSH(off as wordcode)
6346            } else {
6347                WCB_CURSH(off as wordcode)
6348            };
6349        });
6350    }
6351}
6352
6353/// Port of `par_time(void)` from `Src/parse.c:1787`. `time PIPE`
6354/// emits WCB_TIMED(WC_TIMED_PIPE) + the sublist code; bare `time`
6355/// with no pipeline emits WCB_TIMED(WC_TIMED_EMPTY).
6356pub fn par_time_wordcode() {
6357    // c:1791 — `zshlex();`
6358    zshlex();
6359    // c:1793-1794 — `p = ecadd(0); ecadd(0);`
6360    let p = ecadd(0);
6361    ecadd(0);
6362    // c:1795 — `if ((f = par_sublist2(&c)) < 0)`
6363    let mut c = 0i32;
6364    let f = par_sublist2(&mut c);
6365    match f {
6366        Some(flags) => {
6367            // c:1799 — `ecbuf[p] = WCB_TIMED(WC_TIMED_PIPE);`
6368            ECBUF.with_borrow_mut(|b| {
6369                if p < b.len() {
6370                    b[p] = WCB_TIMED(WC_TIMED_PIPE);
6371                }
6372            });
6373            // c:1800 — `set_sublist_code(p+1, WC_SUBLIST_END, f,
6374            // ecused-2-p, c);`
6375            let used = ECUSED.get() as usize;
6376            let skip = used.saturating_sub(2 + p) as i32;
6377            set_sublist_code(p + 1, WC_SUBLIST_END as i32, flags, skip, c != 0);
6378        }
6379        None => {
6380            // c:1796-1798 — `ecused--; ecbuf[p] = WCB_TIMED(WC_TIMED_EMPTY);`
6381            ECUSED.set((ECUSED.get() - 1).max(0));
6382            ECBUF.with_borrow_mut(|b| {
6383                if p < b.len() {
6384                    b[p] = WCB_TIMED(WC_TIMED_EMPTY);
6385                }
6386            });
6387        }
6388    }
6389}
6390
6391/// Port of `par_dinbrack(void)` from `Src/parse.c:1810`. Wraps
6392/// `par_cond` (the cond-expression emitter at parse.c:2409) with
6393/// the `[[ ... ]]` framing: incond/incmdpos toggles + DOUTBRACK
6394/// expectation.
6395pub fn par_cond_wordcode() {
6396    let oecused = ECUSED.get();
6397    // c:1814 — `incond = 1;`
6398    set_incond(1);
6399    // c:1815 — `incmdpos = 0;`
6400    set_incmdpos(false);
6401    // c:1816 — `zshlex();` past `[[`.
6402    zshlex();
6403    // c:1817 — `par_cond();` — call the no-skip cond-expression
6404    // entry that EMITS WORDCODE (par_cond_top → par_cond_1 →
6405    // par_cond_2 → par_cond_double/triple/multi). NOT the AST
6406    // `par_cond` at parse.rs:4644 which is a misnamed `par_dinbrack`
6407    // that skips `[[` AND `]]` and returns a ZshCommand AST node
6408    // instead of pushing WC_COND opcodes. NOT `parse_cond_expr`
6409    // either — that's also AST-only, returning ZshCond. With
6410    // `parse_cond_expr` here, every `[[ ... ]]` test produced ZERO
6411    // wordcode payload and parity dropped ~148 words on /etc/zshrc.
6412    let _ = par_cond_top();
6413    // c:1818-1819 — `if (tok != DOUTBRACK) YYERRORV(oecused);`
6414    if tok() != DOUTBRACK {
6415        let _ = oecused;
6416        zerr("missing ]]");
6417        return;
6418    }
6419    // c:1820 — `incond = 0;`
6420    set_incond(0);
6421    // c:1821 — `incmdpos = 1;`
6422    set_incmdpos(true);
6423    // c:1822 — `zshlex();` past `]]`.
6424    zshlex();
6425}
6426
6427/// Port of the `case DINPAR:` arm of `par_cmd` from
6428/// `Src/parse.c:1031-1034`:
6429/// ```c
6430/// ecadd(WCB_ARITH());
6431/// ecstr(tokstr);
6432/// zshlex();
6433/// ```
6434/// `(( EXPR ))` arithmetic at command position — emits the ARITH
6435/// opcode followed by the interned EXPR string, then advances past
6436/// the DINPAR token (which already carries the body text).
6437pub fn par_arith_wordcode() {
6438    // c:1032 — `ecadd(WCB_ARITH());`
6439    ecadd(WCB_ARITH());
6440    // c:1033 — `ecstr(tokstr);` — interns the expression string and
6441    // appends its strcode index to the wordcode buffer.
6442    let expr = tokstr().unwrap_or_default();
6443    ecstr(&expr);
6444    // c:1034 — `zshlex();`
6445    zshlex();
6446}
6447
6448/// Port of `par_simple(int *cmplx, int nr)` from
6449/// `Src/parse.c:1836-2227`. Emits WC_SIMPLE + word count +
6450/// interned string offsets. Returns `0` when nothing was emitted,
6451/// otherwise `1 + (number of code words consumed by redirections)`.
6452/// The full C body handles assignments (ENVSTRING/ENVARRAY),
6453/// inline `{var}>file` brace-FDs, prefix modifiers (NOCORRECT etc),
6454/// and `name() { body }` funcdef detection — those paths are
6455/// progressively wired into the AST parser; this wordcode-emitter
6456/// covers the simple `cmd args...` case + interleaved redirs.
6457pub fn par_simple_wordcode(cmplx: &mut i32, mut nr: i32) -> i32 {
6458    // c:1838-1841 — `int oecused = ecused, isnull = 1, r, argc = 0,
6459    //   p, isfunc = 0, sr = 0;`
6460    //   `int c = *cmplx, nrediradd, assignments = 0, ppost = 0,
6461    //   is_typeset = 0;`
6462    // c is the SAVED initial cmplx so INOUTPAR can restore via
6463    // `*cmplx = c;` at c:2070.
6464    let _oecused = ECUSED.get() as usize;
6465    let c_saved = *cmplx;
6466    let mut isnull = true;
6467    let mut argc: u32 = 0;
6468    let mut sr: i32 = 0;
6469    let mut assignments = false;
6470    let mut isfunc = false;
6471
6472    // c:1843 — `r = ecused;` — saves the offset where redirs get
6473    // INSERTED (via ecispace). Each redir shifts later words DOWN
6474    // by ncodes, so the SIMPLE placeholder at `p` (set later) must
6475    // also bump by ncodes when a redir lands. C uses `&r` to pass
6476    // the cursor by reference; Rust uses a mutable local + manual
6477    // bumps after each par_redir_wordcode call.
6478    let mut r: usize = ECUSED.get() as usize;
6479
6480    // c:1844-1919 — pre-cmd loop: NOCORRECT, ENVSTRING (scalar
6481    // assigns), ENVARRAY (array assigns), IS_REDIROP. Loops until
6482    // a non-assignment token is seen.
6483    loop {
6484        match tok() {
6485            NOCORRECT => {
6486                // c:1846-1849
6487                *cmplx = 1;
6488                set_nocorrect(1);
6489            }
6490            ENVSTRING => {
6491                // c:1848-1898 — scalar assignment `name=value` or
6492                // `name+=value`. Emits WCB_ASSIGN(SCALAR, NEW|INC, 0)
6493                // followed by ecstr(name), ecstr(value).
6494                let raw = tokstr().unwrap_or_default();
6495                // Find first of Inbrack / '=' / '+' (the C scan at
6496                // c:1851-1853). Inside Inbrack we skipparens — i.e.
6497                // skip `name[...]` index, then continue.
6498                // c:1851-1853 — `for (ptr = tokstr; *ptr && *ptr != Inbrack
6499                // && *ptr != '=' && *ptr != '+'; ptr++); if (*ptr == Inbrack)
6500                // skipparens(Inbrack, Outbrack, &ptr);`. Walk to the first
6501                // `[`/`=`/`+`/Equals-token, then if we landed on `[`, skip
6502                // the balanced `name[index]` pair via skipparens.
6503                let bytes: Vec<char> = raw.chars().collect();
6504                let raw_str: String = bytes.iter().collect();
6505                let mut idx = 0usize;
6506                while idx < bytes.len() {
6507                    let ch = bytes[idx];
6508                    if ch == '\u{91}' /* Inbrack */
6509                        || ch == '=' || ch == '+' || ch == '\u{8d}'
6510                    /* Equals */
6511                    {
6512                        break;
6513                    }
6514                    idx += 1;
6515                }
6516                if idx < bytes.len() && bytes[idx] == '\u{91}'
6517                /* Inbrack */
6518                {
6519                    // c:1855 — `skipparens(Inbrack, Outbrack, &ptr);`.
6520                    let byte_off: usize = bytes[..idx].iter().map(|c| c.len_utf8()).sum();
6521                    let mut cursor: &str = &raw_str[byte_off..];
6522                    let _ = crate::ported::utils::skipparens('\u{91}', '\u{92}', &mut cursor);
6523                    let consumed = raw_str.len() - byte_off - cursor.len();
6524                    let advance_chars = raw_str[byte_off..byte_off + consumed].chars().count();
6525                    idx += advance_chars;
6526                    // Continue scanning for `=` / `+` after the `]`.
6527                    while idx < bytes.len() {
6528                        let ch = bytes[idx];
6529                        if ch == '=' || ch == '+' || ch == '\u{8d}' {
6530                            break;
6531                        }
6532                        idx += 1;
6533                    }
6534                }
6535                let is_inc = idx < bytes.len() && bytes[idx] == '+';
6536                // c:1856-1858 — `if (*ptr == '+') { *ptr++ = '\0';
6537                // ecadd(WCB_ASSIGN(SCALAR, INC, 0)); } else WCB_NEW`
6538                // C nulls the `+` AT THAT POSITION then advances ptr.
6539                // `name` is bytes BEFORE the `+`, NOT including it.
6540                let name_end = idx;
6541                if is_inc {
6542                    idx += 1;
6543                }
6544                let flag = if is_inc { WC_ASSIGN_INC } else { WC_ASSIGN_NEW };
6545                ecadd(WCB_ASSIGN(WC_ASSIGN_SCALAR, flag, 0));
6546                // c:1860 — `if (*ptr == '=') { *ptr = '\0'; str = ptr + 1; }
6547                //          else equalsplit(tokstr, &str);`
6548                let name: String = bytes[..name_end].iter().collect();
6549                let str_off = if idx < bytes.len() && (bytes[idx] == '=' || bytes[idx] == '\u{8d}')
6550                {
6551                    idx + 1
6552                } else {
6553                    idx
6554                };
6555                let value: String = bytes[str_off..].iter().collect();
6556                // c:1866-1877 — scan value for `=(`/`<(`/`>(` (proc
6557                // subst); if found, bump cmplx (suppresses Z_SIMPLE).
6558                let vbytes: Vec<char> = value.chars().collect();
6559                for (i, ch) in vbytes.iter().enumerate() {
6560                    if i + 1 < vbytes.len() && vbytes[i + 1] == '\u{88}'
6561                    /* Inpar */
6562                    {
6563                        if *ch == '\u{8d}' /* Equals */
6564                            || *ch == '\u{94}' /* Inang */
6565                            || *ch == '\u{96}'
6566                        /* OutangProc */
6567                        {
6568                            *cmplx = 1;
6569                            break;
6570                        }
6571                    }
6572                }
6573                ecstr(&name);
6574                ecstr(&value);
6575                isnull = false;
6576                assignments = true;
6577            }
6578            ENVARRAY => {
6579                // c:1883-1908 — array assignment `name=( ... )` in the
6580                // pre-cmd loop (no `typeset`-style typeset_force flag).
6581                // c:1884 — `int oldcmdpos = incmdpos, n, type2;`
6582                let oldcmdpos = incmdpos();
6583                let n: u32;
6584                let type2: wordcode;
6585                let p: usize;
6586
6587                // c:1886-1889 — `array setting is cmplx because it can
6588                //   contain process substitutions`
6589                // c:1890 — `*cmplx = c = 1;`
6590                *cmplx = 1;
6591                // c:1891 — `p = ecadd(0);`
6592                p = ecadd(0);
6593                // c:1892 — `incmdpos = 0;`
6594                set_incmdpos(false);
6595                // c:1893-1897 — `+=` detection: if tokstr ends in `+`,
6596                // strip the `+` and use WC_ASSIGN_INC; else WC_ASSIGN_NEW.
6597                let raw = tokstr().unwrap_or_default();
6598                let (name, t2) = if raw.ends_with('+') {
6599                    (raw[..raw.len() - 1].to_string(), WC_ASSIGN_INC)
6600                } else {
6601                    (raw.clone(), WC_ASSIGN_NEW)
6602                };
6603                type2 = t2;
6604                // c:1898 — `ecstr(tokstr);` (tokstr now NUL-trimmed)
6605                ecstr(&name);
6606                // c:1899 — `cmdpush(CS_ARRAY);`
6607                cmdpush(CS_ARRAY as u8);
6608                // c:1900 — `zshlex();`
6609                zshlex();
6610                // c:1901 — `n = par_nl_wordlist();`
6611                n = par_nl_wordlist_wordcode();
6612                // c:1902 — `ecbuf[p] = WCB_ASSIGN(WC_ASSIGN_ARRAY, type2, n);`
6613                ECBUF.with_borrow_mut(|b| {
6614                    b[p] = WCB_ASSIGN(WC_ASSIGN_ARRAY, type2, n);
6615                });
6616                // c:1903 — `cmdpop();`
6617                cmdpop();
6618                // c:1904-1905 — `if (tok != OUTPAR) YYERROR(oecused);`
6619                if tok() != OUTPAR_TOK {
6620                    zerr("par_simple: expected `)' after array assignment");
6621                    return 0;
6622                }
6623                // c:1906 — `incmdpos = oldcmdpos;`
6624                set_incmdpos(oldcmdpos);
6625                // c:1907 — `isnull = 0;`
6626                isnull = false;
6627                // c:1908 — `assignments = 1;`
6628                assignments = true;
6629            }
6630            t if IS_REDIROP(t) => {
6631                // c:1900-1904 — `*cmplx = c = 1; nr += par_redir(&r,
6632                // NULL); continue;`. The wordcode-emitting redir is
6633                // distinct from the AST par_redir — it INSERTS
6634                // WCB_REDIR + fd + ecstrcode(name) at offset `r`
6635                // via ecispace, shifting any later words down.
6636                *cmplx = 1;
6637                let added = par_redir_wordcode(&mut r, None);
6638                if added == 0 {
6639                    break;
6640                }
6641                nr += added;
6642                continue;
6643            }
6644            _ => break,
6645        }
6646        zshlex(); // c:1907 `zshlex();`
6647    }
6648
6649    // c:1920-1921 — `if (tok == AMPER || tok == AMPERBANG) YYERROR;`
6650    if tok() == AMPER || tok() == AMPERBANG {
6651        zerr("par_simple: unexpected &");
6652        return 0;
6653    }
6654
6655    // c:1923 — `p = ecadd(WCB_SIMPLE(0));`
6656    let mut p = ecadd(WCB_SIMPLE(0));
6657
6658    // c:1924-2105 — main words loop. is_typeset tracks whether the
6659    // outer command was `typeset`/`export`/etc. so the final
6660    // placeholder gets WCB_TYPESET instead of WCB_SIMPLE.
6661    let mut is_typeset = false;
6662    let mut postassigns: u32 = 0;
6663    let mut ppost: usize = 0;
6664    loop {
6665        match tok() {
6666            STRING_LEX | TYPESET => {
6667                // c:1926 — `int redir_var = 0;`
6668                let mut redir_var = false;
6669                // c:1928-1929 — `*cmplx = 1; incmdpos = 0;`
6670                *cmplx = 1;
6671                set_incmdpos(false);
6672                // c:1931-1932 — TYPESET → intypeset = is_typeset = 1.
6673                if tok() == TYPESET {
6674                    set_intypeset(true);
6675                    is_typeset = true;
6676                }
6677                let s = tokstr().unwrap_or_default();
6678                // c:1934-1974 — `{var}>file` brace-FD detection.
6679                // `if (!isset(IGNOREBRACES) && *tokstr == Inbrace)`
6680                let bytes = s.as_bytes();
6681                let first_is_inbrace = (bytes.len() >= 2 && bytes[0] == 0xc2 && bytes[1] == 0x8f)
6682                    || (bytes.len() >= 1 && bytes[0] == b'{');
6683                if !isset(IGNOREBRACES) && first_is_inbrace {
6684                    // c:1937-1938 — `char *eptr = tokstr + strlen(tokstr) - 1;`
6685                    //                `char *ptr = eptr;`
6686                    // C tests `*eptr == Outbrace` (0x90 marker or `}`) AND
6687                    // there's content between `{` and `}` (`ptr > tokstr + 1`).
6688                    let last_two_outbrace = bytes.len() >= 2
6689                        && (bytes.ends_with(&[0xc2, 0x90]) || bytes.last() == Some(&b'}'));
6690                    let opener_len = if bytes.len() >= 2 && bytes[0] == 0xc2 && bytes[1] == 0x8f {
6691                        2
6692                    } else {
6693                        1
6694                    };
6695                    let closer_len = if bytes.len() >= 2 && bytes.ends_with(&[0xc2, 0x90]) {
6696                        2
6697                    } else if bytes.last() == Some(&b'}') {
6698                        1
6699                    } else {
6700                        0
6701                    };
6702                    if last_two_outbrace && bytes.len() > opener_len + closer_len {
6703                        // c:1944 — `if (itype_end(tokstr+1, IIDENT, 0) >= ptr)`
6704                        // Inner content is the identifier between `{` and `}`.
6705                        let inner_start = opener_len;
6706                        let inner_end = bytes.len() - closer_len;
6707                        let inner = &s[inner_start..inner_end];
6708                        if !inner.is_empty() && crate::ported::params::isident(inner) {
6709                            // c:1946-1948 — `char *idstring = dupstrpfx(...);`
6710                            //                `redir_var = 1; zshlex();`
6711                            let idstring = inner.to_string();
6712                            redir_var = true;
6713                            zshlex();
6714                            // c:1953-1958 — `if (IS_REDIROP(tok) && tokfd == -1)
6715                            //   { *cmplx = c = 1; nrediradd = par_redir(&r, id);
6716                            //     p += nrediradd; sr += nrediradd; }`
6717                            if IS_REDIROP(tok()) && tokfd() == -1 {
6718                                *cmplx = 1;
6719                                let nrediradd = par_redir_wordcode(&mut r, Some(&idstring));
6720                                p += nrediradd as usize;
6721                                sr += nrediradd;
6722                            } else if postassigns > 0 {
6723                                // c:1959-1966 — postassigns path: emit
6724                                // WCB_ASSIGN(SCALAR, INC, 0) + name + ""
6725                                postassigns += 1;
6726                                ecadd(WCB_ASSIGN(WC_ASSIGN_SCALAR, WC_ASSIGN_INC, 0));
6727                                ecstr(&s);
6728                                ecstr("");
6729                            } else {
6730                                // c:1968-1972 — `else { ecstr(toksave); argc++; }`
6731                                ecstr(&s);
6732                                argc += 1;
6733                            }
6734                        }
6735                    }
6736                }
6737                if !redir_var {
6738                    // c:1977-1996 — normal (non-redir-var) STRING/TYPESET.
6739                    if postassigns > 0 {
6740                        // c:1979-1989 — typeset with bare-name arg → INC
6741                        postassigns += 1;
6742                        ecadd(WCB_ASSIGN(WC_ASSIGN_SCALAR, WC_ASSIGN_INC, 0));
6743                        ecstr(&s);
6744                        ecstr("");
6745                    } else {
6746                        ecstr(&s);
6747                        argc += 1;
6748                    }
6749                    zshlex();
6750                }
6751                isnull = false;
6752            }
6753            ENVSTRING => {
6754                // c:2005-2026 — mid-cmd ENVSTRING (under intypeset
6755                // context). Emits WCB_ASSIGN(SCALAR, NEW, 0) then
6756                // ecstr(name) + ecstr(value), tracking the first
6757                // postassign offset in `ppost` (which the trailing
6758                // WCB_TYPESET header points to).
6759                if postassigns == 0 {
6760                    ppost = ecadd(0);
6761                }
6762                postassigns += 1;
6763                // c:2010-2014 — `for (ptr = tokstr; *ptr && *ptr != Inbrack
6764                // && *ptr != '=' && *ptr != '+'; ptr++); if (*ptr == Inbrack)
6765                // skipparens(Inbrack, Outbrack, &ptr);`.
6766                let raw = tokstr().unwrap_or_default();
6767                let bytes: Vec<char> = raw.chars().collect();
6768                let mut idx = 0usize;
6769                while idx < bytes.len() {
6770                    let ch = bytes[idx];
6771                    if ch == '\u{91}' /* Inbrack */
6772                        || ch == '=' || ch == '+' || ch == '\u{8d}'
6773                    /* Equals */
6774                    {
6775                        break;
6776                    }
6777                    idx += 1;
6778                }
6779                if idx < bytes.len() && bytes[idx] == '\u{91}'
6780                /* Inbrack */
6781                {
6782                    // c:2014 — `skipparens(Inbrack, Outbrack, &ptr);`.
6783                    let byte_off: usize = bytes[..idx].iter().map(|c| c.len_utf8()).sum();
6784                    let mut cursor: &str = &raw[byte_off..];
6785                    let _ = crate::ported::utils::skipparens('\u{91}', '\u{92}', &mut cursor);
6786                    let consumed = raw.len() - byte_off - cursor.len();
6787                    let advance_chars = raw[byte_off..byte_off + consumed].chars().count();
6788                    idx += advance_chars;
6789                    while idx < bytes.len() {
6790                        let ch = bytes[idx];
6791                        if ch == '=' || ch == '+' || ch == '\u{8d}' {
6792                            break;
6793                        }
6794                        idx += 1;
6795                    }
6796                }
6797                let name: String = bytes[..idx].iter().collect();
6798                let str_off = if idx < bytes.len() && (bytes[idx] == '=' || bytes[idx] == '\u{8d}')
6799                {
6800                    idx + 1
6801                } else {
6802                    idx
6803                };
6804                let value: String = bytes[str_off..].iter().collect();
6805                ecadd(WCB_ASSIGN(WC_ASSIGN_SCALAR, WC_ASSIGN_NEW, 0));
6806                ecstr(&name);
6807                ecstr(&value);
6808                isnull = false;
6809                zshlex();
6810            }
6811            ENVARRAY => {
6812                // c:2027-2050 — mid-cmd ENVARRAY (typeset N=(…) form).
6813                // C tracks postassigns + ppost the same as ENVSTRING,
6814                // but the inner emit is WCB_ASSIGN(ARRAY, NEW, n)
6815                // with `n` patched in after par_nl_wordlist consumes
6816                // the elements. C also toggles intypeset=0 around the
6817                // wordlist so the lexer doesn't try to re-emit
6818                // assignments inside the array.
6819                *cmplx = 1;
6820                if postassigns == 0 {
6821                    ppost = ecadd(0);
6822                }
6823                postassigns += 1;
6824                let parr = ecadd(0);
6825                let raw = tokstr().unwrap_or_default();
6826                let is_inc = raw.ends_with('+');
6827                let name = if is_inc {
6828                    &raw[..raw.len() - 1]
6829                } else {
6830                    raw.as_str()
6831                };
6832                let flag = if is_inc { WC_ASSIGN_INC } else { WC_ASSIGN_NEW };
6833                ecstr(name);
6834                cmdpush(CS_ARRAY as u8);
6835                set_intypeset(false);
6836                zshlex();
6837                // c:2044 — `n = par_nl_wordlist();` (parse.c:2379-2391).
6838                // SEPER + NEWLIN both allowed between elements.
6839                let mut nelem = 0u32;
6840                loop {
6841                    let t = tok();
6842                    if t != STRING_LEX && t != SEPER && t != NEWLIN {
6843                        break;
6844                    }
6845                    if t == STRING_LEX {
6846                        ecstr(&tokstr().unwrap_or_default());
6847                        nelem += 1;
6848                    }
6849                    zshlex();
6850                }
6851                ECBUF.with_borrow_mut(|b| {
6852                    if parr < b.len() {
6853                        b[parr] = WCB_ASSIGN(WC_ASSIGN_ARRAY, flag, nelem);
6854                    }
6855                });
6856                cmdpop();
6857                set_intypeset(true);
6858                if tok() != OUTPAR_TOK {
6859                    zerr("expected `)' after array assignment");
6860                    return 0;
6861                }
6862                isnull = false;
6863                zshlex();
6864            }
6865            t if IS_REDIROP(t) => {
6866                // c:1999-2010 — `nrediradd = par_redir(&r, NULL);
6867                // p += nrediradd; if (ppost) ppost += nrediradd;
6868                // sr += nrediradd;`
6869                *cmplx = 1;
6870                let added = par_redir_wordcode(&mut r, None);
6871                if added == 0 {
6872                    break;
6873                }
6874                p += added as usize;
6875                if ppost != 0 {
6876                    ppost += added as usize;
6877                }
6878                sr += added;
6879            }
6880            INOUTPAR => {
6881                // c:2051 — `} else if (tok == INOUTPAR) {`
6882                // c:2052 — `zlong oldlineno = lineno;`
6883                let oldlineno = lineno();
6884                // c:2053 — `int onp, so, oecssub = ecssub;`
6885                let oecssub = ECSSUB.get();
6886                // c:2055-2057 — `if (!isset(MULTIFUNCDEF) && argc > 1) YYERROR;`
6887                if !isset(MULTIFUNCDEF) && argc > 1 {
6888                    zerr("par_simple: too many function names for funcdef");
6889                    return 0;
6890                }
6891                // c:2058-2060 — `if (assignments || postassigns) YYERROR;`
6892                if assignments || postassigns > 0 {
6893                    zerr("par_simple: assignments before funcdef");
6894                    return 0;
6895                }
6896                // c:2061-2068 — hasalias check + zwarn — skipped (no
6897                // alias tracking on the wordcode path).
6898
6899                // c:2070 — `*cmplx = c;`
6900                *cmplx = c_saved;
6901                // c:2071 — `lineno = 0;`
6902                set_lineno(0);
6903                // c:2072 — `incmdpos = 1;`
6904                set_incmdpos(true);
6905                // c:2073 — `cmdpush(CS_FUNCDEF);`
6906                cmdpush(CS_FUNCDEF as u8);
6907                // c:2074 — `zshlex();`
6908                zshlex();
6909                // c:2075-2076 — `while (tok == SEPER) zshlex();`
6910                while tok() == SEPER {
6911                    zshlex();
6912                }
6913                // c:2079 — `ecispace(p + 1, 1); ecbuf[p+1] = argc;
6914                // ecadd(0)*4`. Insert the argc word at p+1, then
6915                // append 4 placeholder words.
6916                ecispace(p + 1, 1);
6917                ECBUF.with_borrow_mut(|b| {
6918                    if p + 1 < b.len() {
6919                        b[p + 1] = argc;
6920                    }
6921                });
6922                // c:2080-2083 — four metadata placeholder slots.
6923                ecadd(0);
6924                ecadd(0);
6925                ecadd(0);
6926                ecadd(0);
6927
6928                // c:2085 — `ecnfunc++;`
6929                ECNFUNC.set(ECNFUNC.get() + 1);
6930                // c:2086 — `ecssub = so = ecsoffs;`
6931                let so = ECSOFFS.get();
6932                ECSSUB.set(so);
6933                // c:2087 — `onp = ecnpats;`
6934                let onp = ECNPATS.with(|cc| cc.get());
6935                // c:2088 — `ecnpats = 0;`
6936                ECNPATS.with(|cc| cc.set(0));
6937
6938                // c:2091 — `int c = 0;` — INNER cmplx for the body
6939                // parse. Local to each branch; C's enclosing *cmplx
6940                // is NOT modified by the body.
6941                let mut body_c: i32 = 0;
6942                // c:2090 — `if (tok == INBRACE) {`
6943                if tok() == INBRACE_TOK {
6944                    // c:2093 — `zshlex();`
6945                    zshlex();
6946                    // c:2094 — `par_list(&c);`
6947                    par_list_wordcode(&mut body_c);
6948                    // c:2095-2101 — `if (tok != OUTBRACE) { cmdpop();
6949                    //   lineno += oldlineno; ecnpats = onp;
6950                    //   ecssub = oecssub; YYERROR; }`
6951                    if tok() != OUTBRACE_TOK {
6952                        cmdpop();
6953                        set_lineno(lineno() + oldlineno);
6954                        ECNPATS.with(|cc| cc.set(onp));
6955                        ECSSUB.set(oecssub);
6956                        zerr("par_simple: funcdef expected `}`");
6957                        return 0;
6958                    }
6959                    // c:2102-2105 — `if (argc == 0) incmdpos = 0;`
6960                    if argc == 0 {
6961                        set_incmdpos(false);
6962                    }
6963                    // c:2106 — `zshlex();`
6964                    zshlex();
6965                } else {
6966                    // c:2107-2132 — short-body funcdef form: `f() cmd`
6967                    // or `() cmd`. Wraps single par_cmd result in a
6968                    // synthetic WC_LIST / WC_SUBLIST /
6969                    // WC_PIPE(WC_PIPE_END, 0) header trio.
6970                    let ll = ecadd(0);
6971                    let sl = ecadd(0);
6972                    ecadd(WCB_PIPE(WC_PIPE_END, 0));
6973                    let ok = par_cmd_wordcode(&mut body_c, if argc == 0 { 1 } else { 0 });
6974                    if !ok {
6975                        cmdpop();
6976                        zerr("par_simple: funcdef short-body: missing command");
6977                        return 0;
6978                    }
6979                    if argc == 0 {
6980                        // c:2118-2127 — anonymous funcdef may take args
6981                        // after the body; first one already read.
6982                        set_incmdpos(false);
6983                    }
6984                    // c:2130-2131 — inner sublist/list use inner cmplx.
6985                    let used = ECUSED.get() as usize;
6986                    set_sublist_code(
6987                        sl,
6988                        WC_SUBLIST_END as i32,
6989                        0,
6990                        (used.saturating_sub(1 + sl)) as i32,
6991                        body_c != 0,
6992                    );
6993                    set_list_code(ll, Z_SYNC | Z_END, body_c != 0);
6994                }
6995                let _ = body_c;
6996                // c:2133 — `cmdpop();`
6997                cmdpop();
6998
6999                // c:2135 — `ecadd(WCB_END());`
7000                ecadd(WCB_END());
7001                // c:2136-2139 — fill 4 metadata slots at p+argc+2..5
7002                let p_argc = (p + (argc as usize) + 2) as usize;
7003                let cur_so = ECSOFFS.get();
7004                let np_now = ECNPATS.with(|cc| cc.get());
7005                ECBUF.with_borrow_mut(|b| {
7006                    b[p_argc] = (so - oecssub) as wordcode;
7007                    b[p_argc + 1] = (cur_so - so) as wordcode;
7008                    b[p_argc + 2] = np_now as wordcode;
7009                    b[p_argc + 3] = 0;
7010                });
7011
7012                // c:2141-2143 — `ecnpats = onp; ecssub = oecssub; ecnfunc++;`
7013                ECNPATS.with(|cc| cc.set(onp));
7014                ECSSUB.set(oecssub);
7015                ECNFUNC.set(ECNFUNC.get() + 1);
7016
7017                // c:2145 — `ecbuf[p] = WCB_FUNCDEF(ecused - 1 - p);`
7018                let used = ECUSED.get() as usize;
7019                let header_off = used.saturating_sub(1 + p) as wordcode;
7020                ECBUF.with_borrow_mut(|b| {
7021                    b[p] = WCB_FUNCDEF(header_off);
7022                });
7023
7024                // c:2147-2172 — `if (argc == 0) { /* anonymous fn args */ }`
7025                if argc == 0 {
7026                    // c:2150 — `int parg = ecadd(0);`
7027                    let mut parg = ecadd(0);
7028                    // c:2151 — `ecadd(0);`
7029                    ecadd(0);
7030                    // c:2152 — `while (tok == STRING || IS_REDIROP(tok)) {`
7031                    while tok() == STRING_LEX || IS_REDIROP(tok()) {
7032                        if tok() == STRING_LEX {
7033                            // c:2155-2157
7034                            ecstr(&tokstr().unwrap_or_default());
7035                            argc += 1;
7036                            zshlex();
7037                        } else {
7038                            // c:2159-2165 — *cmplx=c=1; nrediradd=par_redir;
7039                            // p += nrediradd; ppost += nrediradd if ppost;
7040                            // sr += nrediradd; parg += nrediradd;
7041                            *cmplx = 1;
7042                            let added = par_redir_wordcode(&mut r, None);
7043                            if added == 0 {
7044                                break;
7045                            }
7046                            p += added as usize;
7047                            if ppost != 0 {
7048                                ppost += added as usize;
7049                            }
7050                            sr += added;
7051                            parg += added as usize;
7052                        }
7053                    }
7054                    // c:2168-2169 — `if (argc > 0) *cmplx = 1;`
7055                    if argc > 0 {
7056                        *cmplx = 1;
7057                    }
7058                    // c:2170 — `ecbuf[parg] = ecused - parg;`
7059                    // c:2171 — `ecbuf[parg+1] = argc;`
7060                    let used2 = ECUSED.get() as usize;
7061                    ECBUF.with_borrow_mut(|b| {
7062                        b[parg] = (used2 - parg) as wordcode;
7063                        b[parg + 1] = argc;
7064                    });
7065                }
7066                // c:2173 — `lineno += oldlineno;`
7067                set_lineno(lineno() + oldlineno);
7068
7069                // c:2175-2177 — `isfunc = 1; isnull = 0; break;`
7070                isfunc = true;
7071                isnull = false;
7072                break;
7073            }
7074            _ => break,
7075        }
7076    }
7077
7078    // c:2173-2176 — `if (isnull && !(sr + nr)) { ecused = oecused;
7079    // return 0; }` — undo everything including pre-cmd assignments
7080    // if no actual command word emerged.
7081    if isnull && sr + nr == 0 && !assignments {
7082        ECUSED.set(p as i32);
7083        return 0;
7084    }
7085    // c:2186-2187 — `incmdpos = 1; intypeset = 0;` — reset before
7086    // the placeholder patch so the next-token lex doesn't carry
7087    // typeset/incond state.
7088    set_incmdpos(true);
7089    set_intypeset(false);
7090    // c:2189-2199 — `if (!isfunc) { if (is_typeset) ecbuf[p] =
7091    // WCB_TYPESET(argc); else ecbuf[p] = WCB_SIMPLE(argc); }`.
7092    // When isfunc=true the INOUTPAR branch already wrote WCB_FUNCDEF
7093    // at p; do NOT clobber it.
7094    if !isfunc {
7095        let header = if is_typeset {
7096            if postassigns > 0 {
7097                ECBUF.with_borrow_mut(|b| {
7098                    if ppost < b.len() {
7099                        b[ppost] = postassigns;
7100                    }
7101                });
7102            } else {
7103                ecadd(0);
7104            }
7105            WCB_TYPESET(argc)
7106        } else {
7107            WCB_SIMPLE(argc)
7108        };
7109        ECBUF.with_borrow_mut(|b| {
7110            if p < b.len() {
7111                b[p] = header;
7112            }
7113        });
7114    }
7115    1 + sr
7116}
7117
7118/// Port of `par_redir(int *rp, char *idstring)` from
7119/// `Src/parse.c:2229-2345` — the wordcode-emitting variant that
7120/// pushes WCB_REDIR + fd + ecstrcode(name) into ECBUF. Distinct
7121/// from the AST `par_redir` (parse.rs:3771) which builds a
7122/// ZshRedir struct for the AST executor pipeline.
7123///
7124/// Returns the number of wordcodes added (3 for the basic shape,
7125/// 4 with idstring, 5 for HEREDOC[DASH] which carries the
7126/// terminator strings inline). Returns 0 on parse error.
7127///
7128/// `idstring` mirrors C's `char *idstring` parameter — `None` =
7129/// NULL (no `{var}>file` brace-FD shape), `Some(id)` = the captured
7130/// `{var}` name. C callers without a var pass NULL inline; Rust
7131/// callers do the same with `None`.
7132fn par_redir_wordcode(rp: &mut usize, idstring: Option<&str>) -> i32 {
7133    // c:2231 — `int r = *rp, type, fd1, oldcmdpos, oldnc, ncodes;`
7134    let r: usize = *rp;
7135    let mut r#type: i32;
7136    let fd1: i32;
7137    let oldcmdpos: bool;
7138    let oldnc: i32;
7139    let mut ncodes: usize;
7140    // c:2232 — `char *name;`
7141    let name: String;
7142
7143    // c:2234 — `oldcmdpos = incmdpos;`
7144    oldcmdpos = incmdpos();
7145    // c:2235 — `incmdpos = 0;`
7146    set_incmdpos(false);
7147    // c:2236 — `oldnc = nocorrect;`
7148    oldnc = nocorrect();
7149    // c:2237-2238 — `if (tok != INANG && tok != INOUTANG) nocorrect = 1;`
7150    if tok() != INANG_TOK && tok() != INOUTANG {
7151        set_nocorrect(1);
7152    }
7153    // c:2239 — `type = redirtab[tok - OUTANG];`
7154    // Map current redirop token to redirtab index — matches order of
7155    // C `enum { OUTANG, OUTANGBANG, DOUTANG, DOUTANGBANG, INANG,
7156    // INOUTANG, DINANG, DINANGDASH, INANGAMP, OUTANGAMP, AMPOUTANG,
7157    // OUTANGAMPBANG, DOUTANGAMP, DOUTANGAMPBANG, TRINANG }`.
7158    r#type = match tok() {
7159        OUTANG_TOK => REDIR_WRITE,
7160        OUTANGBANG => REDIR_WRITENOW,
7161        DOUTANG => REDIR_APP,
7162        DOUTANGBANG => REDIR_APPNOW,
7163        INANG_TOK => REDIR_READ,
7164        INOUTANG => REDIR_READWRITE,
7165        DINANG => REDIR_HEREDOC,
7166        DINANGDASH => REDIR_HEREDOCDASH,
7167        INANGAMP => REDIR_MERGEIN,
7168        OUTANGAMP => REDIR_MERGEOUT,
7169        AMPOUTANG => REDIR_ERRWRITE,
7170        OUTANGAMPBANG => REDIR_ERRWRITENOW,
7171        DOUTANGAMP => REDIR_ERRAPP,
7172        DOUTANGAMPBANG => REDIR_ERRAPPNOW,
7173        TRINANG => REDIR_HERESTR,
7174        _ => {
7175            set_incmdpos(oldcmdpos);
7176            set_nocorrect(oldnc);
7177            return 0;
7178        }
7179    };
7180    // c:2240 — `fd1 = tokfd;`
7181    fd1 = tokfd();
7182    // c:2241 — `zshlex();`
7183    zshlex();
7184    // c:2242-2243 — `if (tok != STRING && tok != ENVSTRING) YYERROR(ecused);`
7185    if tok() != STRING_LEX && tok() != ENVSTRING {
7186        set_incmdpos(oldcmdpos);
7187        set_nocorrect(oldnc);
7188        zerr("expected word after redirection");
7189        return 0;
7190    }
7191    // c:2244 — `incmdpos = oldcmdpos;`
7192    set_incmdpos(oldcmdpos);
7193    // c:2245 — `nocorrect = oldnc;`
7194    set_nocorrect(oldnc);
7195
7196    // c:2248-2249 — `if (fd1 == -1) fd1 = IS_READFD(type) ? 0 : 1;`
7197    let fd1 = if fd1 == -1 {
7198        if is_readfd(r#type) {
7199            0
7200        } else {
7201            1
7202        }
7203    } else {
7204        fd1
7205    };
7206
7207    // c:2251 — `name = tokstr;`
7208    name = tokstr().unwrap_or_default();
7209
7210    // c:2253-2321 — switch on type:
7211    match r#type {
7212        // c:2254-2300 — REDIR_HEREDOC / REDIR_HEREDOCDASH
7213        x if x == REDIR_HEREDOC || x == REDIR_HEREDOCDASH => {
7214            // c:2257 — `struct heredocs **hd;`
7215            // c:2258 — `int htype = type;`
7216            let htype = r#type;
7217            // c:2260-2261 — `if (strchr(tokstr, '\n')) YYERROR(ecused);`
7218            if name.contains('\n') {
7219                zerr("here-doc terminator contains newline");
7220                return 0;
7221            }
7222            // c:2263-2273 — `ncodes = 5; if (idstring) { type |= MASK; ncodes = 6; }`
7223            if idstring.is_some() {
7224                r#type |= REDIR_VARID_MASK;
7225                ncodes = 6;
7226            } else {
7227                ncodes = 5;
7228            }
7229            // c:2277 — `ecispace(r, ncodes);`
7230            ecispace(r, ncodes);
7231            // c:2278 — `*rp = r + ncodes;`
7232            *rp = r + ncodes;
7233            // c:2279 — `ecbuf[r] = WCB_REDIR(type | REDIR_FROM_HEREDOC_MASK);`
7234            ECBUF.with_borrow_mut(|b| {
7235                b[r] = WCB_REDIR((r#type | REDIR_FROM_HEREDOC_MASK) as wordcode);
7236                // c:2280 — `ecbuf[r + 1] = fd1;`
7237                b[r + 1] = fd1 as wordcode;
7238            });
7239            // c:2282-2286 — r+2..4 are filled later by setheredoc.
7240            // c:2287-2288 — `if (idstring) ecbuf[r + 5] = ecstrcode(idstring);`
7241            if let Some(id) = idstring {
7242                let coded = ecstrcode(id);
7243                ECBUF.with_borrow_mut(|b| {
7244                    b[r + 5] = coded;
7245                });
7246            }
7247            // c:2290-2296 — `for (hd = &hdocs; *hd; hd = &(*hd)->next);
7248            //                 *hd = zalloc(sizeof(struct heredocs));
7249            //                 (*hd)->next = NULL;
7250            //                 (*hd)->type = htype;
7251            //                 (*hd)->pc = r;
7252            //                 (*hd)->str = tokstr;`
7253            HDOCS.with_borrow_mut(|head| {
7254                let mut cur = head;
7255                while cur.is_some() {
7256                    cur = &mut cur.as_mut().unwrap().next; // c:2290
7257                }
7258                *cur = Some(Box::new(crate::ported::zsh_h::heredocs {
7259                    // c:2292-2296
7260                    next: None,
7261                    typ: htype,
7262                    pc: r as i32,
7263                    str: Some(name.clone()),
7264                }));
7265            });
7266            // c:2298 — `zshlex();`
7267            zshlex();
7268            // c:2299 — `return ncodes;`
7269            return ncodes as i32;
7270        }
7271        // c:2301-2308 — REDIR_WRITE / REDIR_WRITENOW
7272        x if x == REDIR_WRITE || x == REDIR_WRITENOW => {
7273            // c:2303-2305 — `if (tokstr[0] == OutangProc && tokstr[1] == Inpar)
7274            //                  type = REDIR_OUTPIPE;`
7275            let nb: Vec<char> = name.chars().collect();
7276            if nb.len() >= 2 && nb[0] == '\u{96}' && nb[1] == '\u{88}' {
7277                r#type = REDIR_OUTPIPE;
7278            } else if nb.len() >= 2 && nb[0] == '\u{94}' && nb[1] == '\u{88}' {
7279                // c:2306-2307 — `else if (tokstr[0] == Inang && tokstr[1] == Inpar) YYERROR;`
7280                zerr("par_redir: < before >");
7281                return 0;
7282            }
7283        }
7284        // c:2309-2315 — REDIR_READ
7285        x if x == REDIR_READ => {
7286            let nb: Vec<char> = name.chars().collect();
7287            if nb.len() >= 2 && nb[0] == '\u{94}' && nb[1] == '\u{88}' {
7288                r#type = REDIR_INPIPE;
7289            } else if nb.len() >= 2 && nb[0] == '\u{96}' && nb[1] == '\u{88}' {
7290                zerr("par_redir: > before <");
7291                return 0;
7292            }
7293        }
7294        // c:2316-2320 — REDIR_READWRITE
7295        x if x == REDIR_READWRITE => {
7296            let nb: Vec<char> = name.chars().collect();
7297            if nb.len() >= 2 && (nb[0] == '\u{94}' || nb[0] == '\u{96}') && nb[1] == '\u{88}' {
7298                r#type = if nb[0] == '\u{94}' {
7299                    REDIR_INPIPE
7300                } else {
7301                    REDIR_OUTPIPE
7302                };
7303            }
7304        }
7305        _ => {}
7306    }
7307    // c:2322 — `zshlex();`
7308    zshlex();
7309
7310    // c:2326-2333 — `if (idstring) { type |= MASK; ncodes = 4; } else ncodes = 3;`
7311    if idstring.is_some() {
7312        r#type |= REDIR_VARID_MASK;
7313        ncodes = 4;
7314    } else {
7315        ncodes = 3;
7316    }
7317
7318    // c:2334 — `ecispace(r, ncodes);`
7319    ecispace(r, ncodes);
7320    // c:2335 — `*rp = r + ncodes;`
7321    *rp = r + ncodes;
7322    // c:2336 — `ecbuf[r] = WCB_REDIR(type);`
7323    let coded_name = ecstrcode(&name);
7324    ECBUF.with_borrow_mut(|b| {
7325        b[r] = WCB_REDIR(r#type as wordcode);
7326        // c:2337 — `ecbuf[r + 1] = fd1;`
7327        b[r + 1] = fd1 as wordcode;
7328        // c:2338 — `ecbuf[r + 2] = ecstrcode(name);`
7329        b[r + 2] = coded_name;
7330    });
7331    // c:2339-2340 — `if (idstring) ecbuf[r + 3] = ecstrcode(idstring);`
7332    if let Some(id) = idstring {
7333        let coded_id = ecstrcode(id);
7334        ECBUF.with_borrow_mut(|b| {
7335            b[r + 3] = coded_id;
7336        });
7337    }
7338    // c:2342 — `return ncodes;`
7339    ncodes as i32
7340}
7341
7342/// Port of `IS_READFD(type)` macro from `Src/zsh.h` — determines
7343/// default fd (0 for read-ish, 1 for write-ish) when none specified.
7344fn is_readfd(t: i32) -> bool {
7345    matches!(
7346        t,
7347        x if x == REDIR_READ
7348            || x == REDIR_READWRITE
7349            || x == REDIR_MERGEIN
7350            || x == REDIR_HEREDOC
7351            || x == REDIR_HEREDOCDASH
7352            || x == REDIR_HERESTR
7353    )
7354}
7355
7356/// Parse a program (list of lists)
7357/// Parse a complete program (top-level entry). Calls
7358/// parse_program_until with no end-token sentinel. Direct port of
7359/// zsh/Src/parse.c:614-720 `parse_event` / `par_list` /
7360/// `par_event` flow. C distinguishes COND_EVENT (single command
7361/// for here-string) from full event parse; zshrs's parse_program
7362/// is the full-event entry.
7363fn parse_program() -> ZshProgram {
7364    parse_program_until(None)
7365}
7366
7367/// Parse a program until we hit an end token
7368/// Parse a program until one of `end_tokens` is seen (or EOF).
7369/// Drives par_list in a loop. C equivalent: the body of par_event
7370/// (parse.c:635-695) iterating par_list against the lexer.
7371fn parse_program_until(end_tokens: Option<&[lextok]>) -> ZshProgram {
7372    let mut lists = Vec::new();
7373
7374    loop {
7375        // Skip separators
7376        while tok() == SEPER || tok() == NEWLIN {
7377            zshlex();
7378        }
7379
7380        if tok() == ENDINPUT {
7381            break;
7382        }
7383        if tok() == LEXERR {
7384            // c:Src/parse.c:671-680 par_event — when the lexer
7385            // returned LEXERR (e.g. unbalanced `$((1+(2))` math
7386            // sub, unterminated string, etc.), C emits `yyerror(1)`
7387            // and sets errflag so the script aborts with a parse
7388            // error diagnostic + non-zero exit. zshrs's
7389            // parse_program_until previously just `break`'d on
7390            // LEXERR, silently swallowing the malformed input and
7391            // exiting rc=0 — so `$((1+(2))` ran as if it were
7392            // empty. Bug #529 in docs/BUGS.md. Emit yyerror
7393            // mirroring the C behaviour; the broken script then
7394            // surfaces the parse error to the caller.
7395            yyerror("");
7396            break;
7397        }
7398
7399        // Check for end tokens
7400        if let Some(end_toks) = end_tokens {
7401            if end_toks.contains(&tok()) {
7402                break;
7403            }
7404        }
7405
7406        // Also stop at these tokens when not explicitly looking for them
7407        // Note: Else/Elif/Then are NOT here - they're handled by par_if
7408        // to allow nested if statements inside case arms, loops, etc.
7409        //
7410        // c:Src/parse.c:par_event — when an orphan terminator (DONE
7411        // outside a loop, FI outside an if, ESAC outside a case)
7412        // appears at the top level (end_tokens=None), C errors via
7413        // YYERROR. zshrs's `break` silently accepted `done`/`fi`/
7414        // `esac` as no-op input. Error at the outermost call so
7415        // unscoped terminators don't sneak through; nested calls
7416        // still break cleanly via the end_tokens contains-check
7417        // above.
7418        match tok() {
7419            DONE | FI | ESAC | DOLOOP if end_tokens.is_none() => {
7420                // c:Src/parse.c:par_event — emit the specific token
7421                // name (`done`, `fi`, `esac`, `do`) so error-parsing
7422                // tools can identify the unmatched terminator. C zsh
7423                // writes `parse error near \`<tok>'`; the Rust port
7424                // was emitting a generic "orphan terminator" string.
7425                // Bug #142, #413.
7426                let name = match tok() {
7427                    DONE => "done",
7428                    FI => "fi",
7429                    ESAC => "esac",
7430                    DOLOOP => "do",
7431                    _ => "orphan terminator",
7432                };
7433                zerr(&format!("parse error near `{}'", name));
7434                break;
7435            }
7436            DSEMI | SEMIAMP | SEMIBAR if end_tokens.is_none() => {
7437                // c:Src/parse.c:par_event — case-arm terminators
7438                // (`;;`, `;&`, `;|`) outside a case construct are a
7439                // parse error. zshrs's `break` silently accepted them
7440                // at top level, truncating the rest of the script.
7441                // Bug #141 in docs/BUGS.md.
7442                let name = match tok() {
7443                    DSEMI => ";;",
7444                    SEMIAMP => ";&",
7445                    SEMIBAR => ";|",
7446                    _ => "case terminator",
7447                };
7448                zerr(&format!("parse error near `{}'", name));
7449                break;
7450            }
7451            OUTBRACE_TOK if end_tokens.is_none() => {
7452                // c:Src/parse.c:par_event — orphan `}` (no matching
7453                // `{` opener) at top level is a parse error. zshrs's
7454                // generic break swallowed it silently, leaving the
7455                // `echo a` in `echo a }` running and ignoring the
7456                // stray brace. Bug #168 in docs/BUGS.md.
7457                zerr("parse error near `}'");
7458                break;
7459            }
7460            OUTBRACE_TOK | DSEMI | SEMIAMP | SEMIBAR | DONE | FI | ESAC | ZEND => break,
7461            _ => {}
7462        }
7463
7464        match par_list() {
7465            Some(list) => {
7466                let detected = simple_name_with_inoutpar(&list);
7467                lists.push(list);
7468                // Synthesize a FuncDef for the `name() { body }` shape
7469                // at parse time so body_source is captured while the
7470                // lexer still has the input. The lexer port emits
7471                // `name(` as a single Word ending in `<Inpar><Outpar>`,
7472                // so the Simple list is followed by an Inbrace once
7473                // separators are skipped. For `name() cmd args` the
7474                // body has already been swallowed into the same
7475                // Simple's words tail — synthesize directly from there.
7476                if let Some((names, body_argv)) = detected {
7477                    if !body_argv.is_empty() {
7478                        // One-line body already in the Simple. Build
7479                        // a Simple from body_argv as the function body.
7480                        lists.pop();
7481                        let body_simple = ZshCommand::Simple(ZshSimple {
7482                            assigns: Vec::new(),
7483                            words: body_argv,
7484                            redirs: Vec::new(),
7485                        });
7486                        let body_list = ZshList {
7487                            sublist: ZshSublist {
7488                                pipe: ZshPipe {
7489                                    cmd: body_simple,
7490                                    next: None,
7491                                    lineno: lineno(),
7492                                    merge_stderr: false,
7493                                },
7494                                next: None,
7495                                flags: SublistFlags::default(),
7496                            },
7497                            flags: ListFlags::default(),
7498                        };
7499                        let funcdef = ZshCommand::FuncDef(ZshFuncDef {
7500                            names,
7501                            body: Box::new(ZshProgram {
7502                                lists: vec![body_list],
7503                            }),
7504                            tracing: false,
7505                            auto_call_args: None,
7506                            body_source: None,
7507                        });
7508                        let synthetic = ZshList {
7509                            sublist: ZshSublist {
7510                                pipe: ZshPipe {
7511                                    cmd: funcdef,
7512                                    next: None,
7513                                    lineno: lineno(),
7514                                    merge_stderr: false,
7515                                },
7516                                next: None,
7517                                flags: SublistFlags::default(),
7518                            },
7519                            flags: ListFlags::default(),
7520                        };
7521                        lists.push(synthetic);
7522                        continue;
7523                    }
7524                    // Else: words.len() == 1 (only the trailing `name()`
7525                    // word), brace body follows. `names` may carry
7526                    // multiple identifiers from the `fna fnb fnc()`
7527                    // shorthand — all share the same brace body per
7528                    // src/zsh/Src/parse.c:1666 par_funcdef wordlist.
7529                    // Skip separators on the real lexer; safe because
7530                    // parse_program's next iteration would also skip them.
7531                    while tok() == SEPER || tok() == NEWLIN {
7532                        zshlex();
7533                    }
7534                    if tok() == INBRACE_TOK {
7535                        // Capture body_start BEFORE the lexer
7536                        // advances past the first body token. The
7537                        // outer zshlex() consumed `{`; lexer.pos
7538                        // is now right after `{`. The next
7539                        // `zshlex()` would advance past `echo`,
7540                        // making body_start land mid-body and
7541                        // lose the first word — `typeset -f f`
7542                        // printed `a; echo b` instead of
7543                        // `echo a; echo b` for `f() { echo a;
7544                        // echo b }`.
7545                        let body_start = pos();
7546                        zshlex();
7547                        // c:Src/parse.c — synth funcdef body terminates
7548                        // at OUTBRACE_TOK. Explicit end-token avoids
7549                        // the top-level stray-`}` arm. Bug #167/#168.
7550                        let body = parse_program_until(Some(&[OUTBRACE_TOK]));
7551                        let body_end = if tok() == OUTBRACE_TOK {
7552                            pos().saturating_sub(1)
7553                        } else {
7554                            pos()
7555                        };
7556                        let body_source = input_slice(body_start, body_end)
7557                            .map(|s| s.trim().to_string())
7558                            .filter(|s| !s.is_empty());
7559                        if tok() == OUTBRACE_TOK {
7560                            zshlex();
7561                        }
7562                        // Replace the Simple list with a FuncDef list.
7563                        lists.pop();
7564                        let funcdef = ZshCommand::FuncDef(ZshFuncDef {
7565                            names,
7566                            body: Box::new(body),
7567                            tracing: false,
7568                            auto_call_args: None,
7569                            body_source,
7570                        });
7571                        let synthetic = ZshList {
7572                            sublist: ZshSublist {
7573                                pipe: ZshPipe {
7574                                    cmd: funcdef,
7575                                    next: None,
7576                                    lineno: lineno(),
7577                                    merge_stderr: false,
7578                                },
7579                                next: None,
7580                                flags: SublistFlags::default(),
7581                            },
7582                            flags: ListFlags::default(),
7583                        };
7584                        lists.push(synthetic);
7585                    } else if !matches!(tok(), ENDINPUT | OUTBRACE_TOK | SEPER | NEWLIN) {
7586                        // No-brace one-line body: `foo() echo hello`.
7587                        // Parse a single command for the body.
7588                        let body_cmd = par_cmd();
7589                        if let Some(cmd) = body_cmd {
7590                            let body_list = ZshList {
7591                                sublist: ZshSublist {
7592                                    pipe: ZshPipe {
7593                                        cmd,
7594                                        next: None,
7595                                        lineno: lineno(),
7596                                        merge_stderr: false,
7597                                    },
7598                                    next: None,
7599                                    flags: SublistFlags::default(),
7600                                },
7601                                flags: ListFlags::default(),
7602                            };
7603                            lists.pop();
7604                            let funcdef = ZshCommand::FuncDef(ZshFuncDef {
7605                                names: names.clone(),
7606                                body: Box::new(ZshProgram {
7607                                    lists: vec![body_list],
7608                                }),
7609                                tracing: false,
7610                                auto_call_args: None,
7611                                body_source: None,
7612                            });
7613                            let synthetic = ZshList {
7614                                sublist: ZshSublist {
7615                                    pipe: ZshPipe {
7616                                        cmd: funcdef,
7617                                        next: None,
7618                                        lineno: lineno(),
7619                                        merge_stderr: false,
7620                                    },
7621                                    next: None,
7622                                    flags: SublistFlags::default(),
7623                                },
7624                                flags: ListFlags::default(),
7625                            };
7626                            lists.push(synthetic);
7627                        }
7628                    }
7629                }
7630            }
7631            None => break,
7632        }
7633    }
7634
7635    ZshProgram { lists }
7636}
7637
7638/// Parse an assignment
7639/// Parse an assignment word `NAME=value` or `NAME=(arr items)`.
7640/// Sub-routine of par_simple. The C source handles assignments
7641/// inline in par_simple via the ENVSTRING/ENVARRAY token paths
7642/// (parse.c:1842-2000ish); zshrs splits it out to a dedicated
7643/// helper for clarity.
7644fn parse_assign() -> Option<ZshAssign> {
7645    // Helper: locate the Equals-marker that delimits NAME from
7646    // VALUE in an assignment-shaped tokstr. The lexer META-encodes
7647    // EVERY `=` (including those inside `${var%%=foo}` strip
7648    // patterns or `[idx]=...` subscripts), so a naive
7649    // `tokstr.find(Equals)` would split at the first inner `=`
7650    // and break the whole assignment. Walk the string skipping
7651    // brace and bracket depth so the assignment's `=` (the one
7652    // after the last `]` of the LHS subscript / or after the
7653    // bare name) is the one we land on.
7654    fn find_assign_equals(s: &str) -> Option<usize> {
7655        let target = Equals;
7656        let mut brace = 0i32;
7657        let mut bracket = 0i32;
7658        let mut paren = 0i32;
7659        for (i, c) in s.char_indices() {
7660            match c {
7661                    '{' | '\u{8f}' /* Inbrace */ => brace += 1,
7662                    '}' | '\u{90}' /* Outbrace */ => {
7663                        if brace > 0 {
7664                            brace -= 1;
7665                        }
7666                    }
7667                    '[' | '\u{91}' /* Inbrack */ => bracket += 1,
7668                    ']' | '\u{92}' /* Outbrack */ => {
7669                        if bracket > 0 {
7670                            bracket -= 1;
7671                        }
7672                    }
7673                    '(' | '\u{88}' /* Inpar */ => paren += 1,
7674                    ')' | '\u{8a}' /* Outpar */ => {
7675                        if paren > 0 {
7676                            paren -= 1;
7677                        }
7678                    }
7679                    _ if c == target && brace == 0 && bracket == 0 && paren == 0 => {
7680                        return Some(i);
7681                    }
7682                    _ => {}
7683                }
7684        }
7685        None
7686    }
7687
7688    let _ts_tokstr = tokstr()?;
7689    let tokstr = _ts_tokstr.as_str();
7690
7691    // Parse name=value or name+=value.
7692    let (name, value_str, append) = if tok() == ENVARRAY {
7693        let (name, append) = if let Some(stripped) = tokstr.strip_suffix('+') {
7694            (stripped, true)
7695        } else {
7696            (tokstr, false)
7697        };
7698        (name.to_string(), String::new(), append)
7699    } else if let Some(pos) = find_assign_equals(tokstr) {
7700        let name_part = &tokstr[..pos];
7701        let (name, append) = if let Some(stripped) = name_part.strip_suffix('+') {
7702            (stripped, true)
7703        } else {
7704            (name_part, false)
7705        };
7706        (
7707            name.to_string(),
7708            tokstr[pos + Equals.len_utf8()..].to_string(),
7709            append,
7710        )
7711    } else if let Some(pos) = tokstr.find('=') {
7712        // Fallback to literal '=' for compatibility
7713        let name_part = &tokstr[..pos];
7714        let (name, append) = if let Some(stripped) = name_part.strip_suffix('+') {
7715            (stripped, true)
7716        } else {
7717            (name_part, false)
7718        };
7719        (name.to_string(), tokstr[pos + 1..].to_string(), append)
7720    } else {
7721        return None;
7722    };
7723
7724    let value = if tok() == ENVARRAY {
7725        // Array assignment: name=(...)
7726        // c:Src/parse.c:1895 par_simple ENVARRAY arm:
7727        //   `int oldcmdpos = incmdpos; ... incmdpos = 0; ... zshlex();`
7728        // Reset incmdpos to false BEFORE the array body's first lex so
7729        // a leading `{...}` (brace expansion) doesn't trip the
7730        // empty-buf+incmdpos rule at lex.c:1141 that returns `{` as
7731        // STRING and lets the reswd_lookup promote it to INBRACE_TOK.
7732        let oldcmdpos = crate::ported::lex::incmdpos();
7733        crate::ported::lex::set_incmdpos(false);
7734        let mut elements = Vec::new();
7735        zshlex(); // skip past token
7736
7737        let mut arr_iters = 0;
7738        const MAX_ARRAY_ELEMENTS: usize = 10_000;
7739        while matches!(tok(), STRING_LEX | SEPER | NEWLIN) {
7740            arr_iters += 1;
7741            if arr_iters > MAX_ARRAY_ELEMENTS {
7742                zerr("array assignment exceeded maximum elements");
7743                break;
7744            }
7745            if tok() == STRING_LEX {
7746                let _ts_s = crate::ported::lex::tokstr();
7747                if let Some(s) = _ts_s.as_deref() {
7748                    elements.push(s.to_string());
7749                }
7750            }
7751            zshlex();
7752        }
7753        // c:Src/parse.c — `incmdpos = oldcmdpos;` (restore at end of arm)
7754        crate::ported::lex::set_incmdpos(oldcmdpos);
7755
7756        // The closing Outpar is consumed here. The outer par_simple
7757        // loop will then `zshlex()` past whatever follows (typically
7758        // a separator or the next word) — calling zshlex twice in
7759        // tandem (here AND in par_simple) over-advances and merges
7760        // a following `name() { … }` funcdef into the same Simple.
7761        // We only consume Outpar; let the caller handle the rest.
7762        // Without this guard `g=(o1); f() { :; }` parsed as one
7763        // Simple with assigns=[g] and words=["f()"] (one token).
7764        if tok() == OUTPAR_TOK {
7765            // Note: do NOT zshlex() here. par_simple's `lexer
7766            // .zshlex()` after `parse_assign` returns advances past
7767            // the Outpar onto the next significant token.
7768            //
7769            // Force `incmdpos=true` so the next zshlex() recognizes
7770            // a follow-up `b=(...)` / `b=val` as Envarray/Envstring.
7771            // The lexer flips incmdpos to false on bare Outpar (which
7772            // is correct for subshell-close context), but for an
7773            // array-assignment close more assigns/words may follow.
7774            set_incmdpos(true);
7775        }
7776
7777        ZshAssignValue::Array(elements)
7778    } else {
7779        ZshAssignValue::Scalar(value_str)
7780    };
7781
7782    Some(ZshAssign {
7783        name,
7784        value,
7785        append,
7786    })
7787}
7788
7789/// AST `par_redir` variant accepting an idstring for the
7790/// `{var}>file` brace-FD shape. C signature
7791/// `par_redir(int *rp, char *idstring)` (parse.c:2229). The
7792/// idstring is stored in the resulting ZshRedir.varid for the
7793/// executor to bind the named variable to the chosen fd.
7794fn par_redir_with_id(idstring: Option<&str>) -> Option<ZshRedir> {
7795    let varid: Option<String> = idstring.map(|s| s.to_string());
7796    let rtype = match tok() {
7797        OUTANG_TOK => REDIR_WRITE,
7798        OUTANGBANG => REDIR_WRITENOW,
7799        DOUTANG => REDIR_APP,
7800        DOUTANGBANG => REDIR_APPNOW,
7801        INANG_TOK => REDIR_READ,
7802        INOUTANG => REDIR_READWRITE,
7803        DINANG => REDIR_HEREDOC,
7804        DINANGDASH => REDIR_HEREDOCDASH,
7805        TRINANG => REDIR_HERESTR,
7806        INANGAMP => REDIR_MERGEIN,
7807        OUTANGAMP => REDIR_MERGEOUT,
7808        AMPOUTANG => REDIR_ERRWRITE,
7809        OUTANGAMPBANG => REDIR_ERRWRITENOW,
7810        DOUTANGAMP => REDIR_ERRAPP,
7811        DOUTANGAMPBANG => REDIR_ERRAPPNOW,
7812        _ => return None,
7813    };
7814
7815    let fd = if tokfd() >= 0 {
7816        tokfd()
7817    } else if matches!(
7818        rtype,
7819        REDIR_READ
7820            | REDIR_READWRITE
7821            | REDIR_MERGEIN
7822            | REDIR_HEREDOC
7823            | REDIR_HEREDOCDASH
7824            | REDIR_HERESTR
7825    ) {
7826        0
7827    } else {
7828        1
7829    };
7830
7831    // c:2234-2245 — save/restore incmdpos and nocorrect around the
7832    // zshlex that consumes the redir target word:
7833    //   oldcmdpos = incmdpos; incmdpos = 0;
7834    //   oldnc = nocorrect;
7835    //   if (tok != INANG && tok != INOUTANG) nocorrect = 1;
7836    //   ... zshlex; check tok; ...
7837    //   incmdpos = oldcmdpos; nocorrect = oldnc;
7838    // Without this, a redir target lexes in the parent's incmdpos
7839    // (re-promoting `{` / reswords) AND with parent nocorrect (so
7840    // spelling-correction wrongly runs inside `> $(cmd)` etc.).
7841    let oldcmdpos = incmdpos();
7842    set_incmdpos(false);
7843    let oldnc = nocorrect();
7844    let cur = tok();
7845    if cur != INANG_TOK && cur != INOUTANG {
7846        set_nocorrect(1);
7847    }
7848    zshlex();
7849
7850    let name = match tok() {
7851        STRING_LEX | ENVSTRING => {
7852            let n = tokstr().unwrap_or_default();
7853            // c:2244-2245 — restore incmdpos / nocorrect right after
7854            // the redir target word is confirmed, BEFORE the trailing
7855            // zshlex advances past it. The advance itself is deferred
7856            // below so REDIR_HEREDOC[DASH] can push onto HDOCS first
7857            // (matching the wordcode variant at parse.rs:6894-6908) —
7858            // otherwise the NEWLIN drained by that zshlex sees an
7859            // empty HDOCS list and gethere never collects the body.
7860            set_incmdpos(oldcmdpos);
7861            set_nocorrect(oldnc);
7862            n
7863        }
7864        _ => {
7865            set_incmdpos(oldcmdpos);
7866            set_nocorrect(oldnc);
7867            zerr("expected word after redirection");
7868            return None;
7869        }
7870    };
7871
7872    // Heredoc terminator capture. C parse.c:2254-2317 par_redir builds
7873    // a `struct heredocs` entry here for REDIR_HEREDOC[DASH]. zshrs
7874    // pushes onto HDOCS (canonical C linked list, c:2290-2296) AND
7875    // onto LEX_HEREDOCS (Rust-only AST-glue Vec carrying parsed-out
7876    // terminator/strip_tabs/quoted metadata for downstream AST
7877    // consumers). Quoted terminators (`<<'EOF'` / `<<"EOF"` / `<<\EOF`)
7878    // disable expansion in the body — Snull `\u{9d}` marks single-quote,
7879    // Dnull `\u{9e}` marks double-quote, Bnull `\u{9f}` marks
7880    // backslash-escaped chars.
7881    let heredoc_idx = if matches!(rtype, REDIR_HEREDOC | REDIR_HEREDOCDASH) {
7882        let strip_tabs = rtype == REDIR_HEREDOCDASH;
7883        let quoted = name.contains('\u{9d}')
7884            || name.contains('\u{9e}')
7885            || name.contains('\u{9f}')
7886            || name.starts_with('\'')
7887            || name.starts_with('"');
7888        let term = name
7889            .chars()
7890            .filter(|c| {
7891                *c != '\'' && *c != '"' && *c != '\u{9d}' && *c != '\u{9e}' && *c != '\u{9f}'
7892            })
7893            .collect::<String>();
7894        // c:2290-2296 — `for (hd = &hdocs; *hd; hd = &(*hd)->next);
7895        //                 *hd = zalloc(sizeof(struct heredocs));
7896        //                 (*hd)->next = NULL;
7897        //                 (*hd)->type = htype;
7898        //                 (*hd)->pc = r;
7899        //                 (*hd)->str = tokstr;`
7900        // AST path has no wordcode pc to patch; use -1 sentinel so the
7901        // inline NEWLIN walk in `zshlex()` skips the setheredoc call.
7902        HDOCS.with_borrow_mut(|head| {
7903            let mut cur = head;
7904            while cur.is_some() {
7905                cur = &mut cur.as_mut().unwrap().next; // c:2290
7906            }
7907            *cur = Some(Box::new(crate::ported::zsh_h::heredocs {
7908                // c:2292-2296
7909                next: None,
7910                typ: rtype,
7911                pc: -1,
7912                str: Some(name.clone()),
7913            }));
7914        });
7915        // zshrs-only: push parallel AST-glue entry onto LEX_HEREDOCS.
7916        let idx = LEX_HEREDOCS.with_borrow_mut(|v| {
7917            v.push(HereDoc {
7918                terminator: term,
7919                strip_tabs,
7920                content: String::new(),
7921                quoted,
7922                processed: false,
7923            });
7924            v.len() - 1
7925        });
7926        Some(idx)
7927    } else {
7928        None
7929    };
7930
7931    // c:2298 (heredoc) / c:2322 (other redirs) — final zshlex() advance
7932    // past the redir target word. MUST run after the HDOCS push above
7933    // so the heredoc-drain inside this zshlex sees the new entry. For
7934    // non-heredoc forms the order is irrelevant; consolidating to a
7935    // single tail-call here matches the wordcode variant.
7936    zshlex();
7937
7938    Some(ZshRedir {
7939        rtype,
7940        fd,
7941        name,
7942        heredoc: None,
7943        varid,
7944        heredoc_idx,
7945    })
7946}
7947
7948/// Parse C-style for loop: for (( init; cond; step ))
7949/// Parse the c-style `for ((init; cond; incr)) do BODY done`.
7950/// Inner branch of zsh/Src/parse.c:1100-1140 inside par_for.
7951/// Recognized when the token after FOR is DINPAR (the `((`
7952/// detected by gettok via dbparens setup).
7953fn parse_for_cstyle() -> Option<ZshCommand> {
7954    // We're at (( (Dinpar None) - the opening ((
7955    // Lexer returns:
7956    //   Dinpar None     - opening ((
7957    //   Dinpar "init"   - init expression, semicolon consumed
7958    //   Dinpar "cond"   - cond expression, semicolon consumed
7959    //   Doutpar "step"  - step expression, closing )) consumed
7960    zshlex(); // Get init: Dinpar "i=0"
7961
7962    if tok() != DINPAR {
7963        zerr("expected init expression in for ((");
7964        return None;
7965    }
7966    let init = tokstr().unwrap_or_default();
7967
7968    zshlex(); // Get cond: Dinpar "i<10"
7969
7970    if tok() != DINPAR {
7971        zerr("expected condition in for ((");
7972        return None;
7973    }
7974    let cond = tokstr().unwrap_or_default();
7975
7976    zshlex(); // Get step: Doutpar "i++"
7977
7978    if tok() != DOUTPAR {
7979        zerr("expected )) in for");
7980        return None;
7981    }
7982    let step = tokstr().unwrap_or_default();
7983
7984    // c:1110 — `infor = 0;` before the body opener. The companion
7985    // `incmdpos = 1;` at c:1111 is intentionally skipped here for
7986    // the same reason c:1094's `incmdpos = 0;` is skipped in
7987    // par_for above — zshrs doesn't mirror the full
7988    // incmdpos state-machine inline.
7989    set_infor(0); // c:1110
7990    zshlex(); // Move past ))
7991
7992    skip_separators();
7993    let body = parse_loop_body(false, false)?;
7994
7995    Some(ZshCommand::For(ZshFor {
7996        var: String::new(),
7997        list: ForList::CStyle { init, cond, step },
7998        body: Box::new(body),
7999        is_select: false,
8000    }))
8001}
8002
8003/// Parse select loop (same syntax as for)
8004/// Parse `select NAME in WORDS; do BODY; done`. Same shape as
8005/// `for NAME in WORDS; do ...` but with menu-prompt semantics in
8006/// the executor. C equivalent: the SELECT case in par_for at
8007/// parse.c:1087-1207 (selects share parser flow with foreach).
8008fn parse_select() -> Option<ZshCommand> {
8009    // `select` shares par_for's grammar (var, words, body) but the
8010    // compile path is different (interactive prompt loop).
8011    match par_for()? {
8012        ZshCommand::For(mut f) => {
8013            f.is_select = true;
8014            Some(ZshCommand::For(f))
8015        }
8016        other => Some(other),
8017    }
8018}
8019
8020/// Parse loop body (do...done, {...}, or shortloop)
8021/// Parse the `do BODY done` body of a for/while/until/select/
8022/// repeat loop. Direct equivalent of zsh's parse.c handling
8023/// inside the loop builders — they all consume DOLOOP, parse a
8024/// list until DONE, and return the list. The `foreach_style`
8025/// flag signals foreach (where short-form `for NAME in WORDS;
8026/// CMD` may skip do/done) vs c-style (which always requires
8027/// do/done).
8028///
8029/// `is_repeat` widens the SHORTLOOPS gate so `SHORTREPEAT` also
8030/// unlocks the short form for `repeat N CMD` (per c:1600
8031/// `unset(SHORTLOOPS) && unset(SHORTREPEAT)`).
8032fn parse_loop_body(foreach_style: bool, is_repeat: bool) -> Option<ZshProgram> {
8033    // c:1180-1194 — body dispatch order per par_for:
8034    //   `do ... done` (DOLOOP) — primary form.
8035    //   `{ ... }`   (INBRACE) — alternate.
8036    //   csh/CSHJUNKIELOOPS — terminator is `end`.
8037    //   else if (unset(SHORTLOOPS)) — YYERROR.
8038    //   else — short form (single command).
8039    if tok() == DOLOOP {
8040        zshlex();
8041        // Body parse must declare DONE as an end-token so the
8042        // parse_program_until top-level orphan-DONE guard doesn't
8043        // mis-fire on the legitimate loop terminator.
8044        let body = parse_program_until(Some(&[DONE]));
8045        // c:Src/parse.c:1182-1183 / :1535-1536 / :1597-1598 —
8046        // `if (tok != DONE) YYERRORV(oecused);`. zshrs previously
8047        // silently accepted EOF as a substitute for `done`, so
8048        // `for i in a; do echo hi; don` ran the loop with `don` as
8049        // a command (which then failed "command not found") instead
8050        // of erroring at parse time. Bug #403, #404.
8051        if tok() != DONE {
8052            zerr("parse error: expected `done'");
8053            return None;
8054        }
8055        zshlex();
8056        Some(body)
8057    } else if tok() == INBRACE_TOK {
8058        zshlex();
8059        let body = parse_program_until(Some(&[OUTBRACE_TOK]));
8060        // c:Src/parse.c:1186 / :1539 — `if (tok != OUTBRACE) YYERRORV`.
8061        if tok() != OUTBRACE_TOK {
8062            zerr("parse error: expected `}'");
8063            return None;
8064        }
8065        zshlex();
8066        Some(body)
8067    } else if foreach_style || isset(CSHJUNKIELOOPS) {
8068        // c:1184 / 1546 / 1595 — `else if (csh || isset(CSHJUNKIELOOPS))`.
8069        let body = parse_program_until(Some(&[ZEND]));
8070        // c:1190 / 1548 — `if (tok != ZEND) YYERRORV`.
8071        if tok() != ZEND {
8072            zerr("parse error: expected `end'");
8073            return None;
8074        }
8075        zshlex();
8076        Some(body)
8077    } else {
8078        // c:1190 / 1474 / 1551 / 1600 — short-form gate. C bails
8079        // with YYERROR when `unset(SHORTLOOPS) && (!is_repeat ||
8080        // unset(SHORTREPEAT))`. zshrs's option machinery isn't
8081        // initialised at parse-test time (no `init_main` →
8082        // `install_emulation_defaults`), so a strict port here
8083        // body. parse_init seeds SHORTLOOPS=on mirroring C
8084        // `install_emulation_defaults`, so this fires only when a
8085        // script explicitly disabled the option.
8086        if unset(SHORTLOOPS) && (!is_repeat || unset(SHORTREPEAT)) {
8087            zerr("parse error: short loop form requires SHORTLOOPS option");
8088            return None;
8089        }
8090        // c:Src/parse.c:1604 / :1474 / :1551 — short form calls
8091        // par_save_list1 → par_list1 → par_sublist, which parses
8092        // ONE sublist and leaves the trailing SEPER untouched for
8093        // the outer par_list to consume. zshrs previously routed
8094        // through par_list() which consumes the trailing `;`/`\n`
8095        // separator — that swallowed the separator between the
8096        // loop's body command and the next outer command, so
8097        // `repeat 2 print x; print y` parsed as repeat-then-eof
8098        // and par_cmd's post-compound STRING_LEX guard at parse.rs
8099        // line 1170 fired "parse error near `print'". Bug #593.
8100        par_list1().map(|sublist| ZshProgram {
8101            lists: vec![ZshList {
8102                sublist,
8103                flags: ListFlags::default(),
8104            }],
8105        })
8106    }
8107}
8108
8109/// `() { body } arg1 arg2 …` — anonymous function. Defines a fresh
8110/// function named `_zshrs_anon_N`, invokes it with the args, and the
8111/// body runs with positional params set. Implemented as the desugared
8112/// pair (FuncDef + Simple call) so the compile path doesn't need new
8113/// machinery.
8114/// Parse an anonymous function definition `() { BODY }` followed
8115/// by call args. zsh treats `() { echo hi; } a b c` as defining
8116/// and immediately calling an anon fn with args a/b/c. C
8117/// equivalent: the INOUTPAR shape in par_simple at parse.c:1836+
8118/// triggers an anon-funcdef path.
8119fn parse_anon_funcdef() -> Option<ZshCommand> {
8120    zshlex(); // skip ()
8121    skip_separators();
8122    // No `{` after `()` → bare empty subshell shape `()`. Fall back
8123    // to a Subsh with an empty program so the status is 0 (matches
8124    // zsh's `()` no-op behavior).
8125    if tok() != INBRACE_TOK {
8126        return Some(ZshCommand::Subsh(Box::new(ZshProgram {
8127            lists: Vec::new(),
8128        })));
8129    }
8130    zshlex(); // skip {
8131    // c:Src/parse.c:par_subsh — anon `() { … }` body must terminate at
8132    // OUTBRACE_TOK. Pass it as the explicit end-token so the inner
8133    // parse stops cleanly at `}` rather than hitting the top-level
8134    // stray-`}` arm (#168). Bug #167 family.
8135    let body = parse_program_until(Some(&[OUTBRACE_TOK]));
8136    // c:Src/parse.c:1733-1737 — same `if (tok != OUTBRACE) YYERRORV`
8137    // gate as the named-funcdef path. Bug #405 sibling.
8138    if tok() != OUTBRACE_TOK {
8139        zerr("parse error: expected `}'");
8140        return None;
8141    }
8142    zshlex();
8143    // Collect any trailing args until a separator. zsh's anon-fn form
8144    // `() { body } a b c` runs body with $1=a, $2=b, $3=c.
8145    let mut args = Vec::new();
8146    while tok() == STRING_LEX {
8147        if let Some(s) = tokstr() {
8148            args.push(s);
8149        }
8150        zshlex();
8151    }
8152
8153    // Generate a unique name. Module-level static would be cleaner but
8154    // a thread-local atomic is enough — anonymous functions are
8155    // ephemeral and the name isn't user-visible.
8156    static ANON_COUNTER: AtomicUsize = AtomicUsize::new(0);
8157    let n = ANON_COUNTER.fetch_add(1, Ordering::Relaxed);
8158    let name = format!("_zshrs_anon_{}", n);
8159    Some(ZshCommand::FuncDef(ZshFuncDef {
8160        names: vec![name],
8161        body: Box::new(body),
8162        tracing: false,
8163        auto_call_args: Some(args),
8164        body_source: None,
8165    }))
8166}
8167
8168/// Parse {...} cursh
8169/// Parse a current-shell brace block `{ BODY }`. C source
8170/// par_cmd at parse.c:958-1085 handles Inbrace → emit WC_CURSH
8171/// and recurses into the list. zshrs's parse_cursh extracts that
8172/// arm into a dedicated method.
8173fn parse_cursh() -> Option<ZshCommand> {
8174    zshlex(); // skip {
8175    // c:Src/parse.c:par_subsh — pass OUTBRACE_TOK as the explicit
8176    // body terminator so the inner parse stops cleanly at `}` rather
8177    // than falling through the top-level `OUTBRACE_TOK if
8178    // end_tokens.is_none()` arm (which errors on stray `}` per bug
8179    // #168). Bug #167 in docs/BUGS.md.
8180    let prog = parse_program_until(Some(&[OUTBRACE_TOK]));
8181
8182    // c:Src/parse.c:par_subsh — `{ … }` requires a matching `}`.
8183    // C errors via YYERRORV when the body parse returns without
8184    // seeing OUTBRACE_TOK (parse.c:1623 inbrack check). zshrs's
8185    // previous behavior silently returned `Cursh(prog)` and ran the
8186    // body as if the braces were absent. Bug #167 in docs/BUGS.md.
8187    if tok() != OUTBRACE_TOK {
8188        // Reuse the "parse error near `<tok>'" shape from #142/#161.
8189        // The offending token is whatever follows the unclosed brace
8190        // body. For EOF (`{ echo a` at end of input) C zsh errors
8191        // near the LAST consumed body token; we use the current
8192        // tokstr() or fall back to a "}" hint.
8193        let near = tokstr().unwrap_or_else(|| "}".to_string());
8194        zerr(&format!("parse error near `{}'", near));
8195        return None;
8196    }
8197    // Check for { ... } always { ... }. Direct port of zsh's
8198    // par_subsh at parse.c:1612-1660 — note the two `incmdpos = 1`
8199    // forces (parse.c:1632, 1637): after consuming the closing
8200    // Outbrace AND after matching the `always` keyword, the parser
8201    // explicitly resets command position so the next `{` lexes as
8202    // Inbrace. Without these resets the lexer's String-clears-cmdpos
8203    // rule (lex.rs:976-983) leaves the second `{` in word position,
8204    // turning `always { ... }` into a Simple `{` `echo` … and the
8205    // try/always pairing is silently lost.
8206    {
8207        set_incmdpos(true); // parse.c:1632 incmdpos = !zsh_construct
8208        zshlex();
8209
8210        // Check for 'always'
8211        if tok() == STRING_LEX {
8212            let s = tokstr();
8213            if s.map(|s| s == "always").unwrap_or(false) {
8214                set_incmdpos(true); // parse.c:1637 incmdpos = 1
8215                zshlex();
8216                skip_separators();
8217
8218                if tok() == INBRACE_TOK {
8219                    zshlex();
8220                    // c:Src/parse.c — always-clause body terminates at
8221                    // OUTBRACE_TOK. Bug #167/#168 family.
8222                    let always = parse_program_until(Some(&[OUTBRACE_TOK]));
8223                    if tok() == OUTBRACE_TOK {
8224                        zshlex();
8225                    }
8226                    return Some(ZshCommand::Try(ZshTry {
8227                        try_block: Box::new(prog),
8228                        always: Box::new(always),
8229                    }));
8230                }
8231            }
8232        }
8233    }
8234
8235    Some(ZshCommand::Cursh(Box::new(prog)))
8236}
8237
8238/// Parse inline function definition: name() { ... }
8239/// Parse the inline form `NAME () { BODY }` (POSIX-style funcdef
8240/// without the `function` keyword). The name has already been
8241/// consumed and pushed by par_simple before this method fires.
8242/// C source: handled inline in par_simple's INOUTPAR-after-name
8243/// arm (parse.c:1836-2228).
8244fn parse_inline_funcdef(name: String) -> Option<ZshCommand> {
8245    // par_simple's STRING loop left `incmdpos = 0`; the funcdef body
8246    // `{ ... }` requires `incmdpos = 1` so the lexer recognises `{`
8247    // as INBRACE_TOK (current-shell block opener) instead of a
8248    // literal `{` STRING. Without this, `myfunc() { echo body }`
8249    // parsed the body as the single STRING `"{"`, then `echo body`
8250    // fell out at top level. Mirrors the C path where par_cmd's
8251    // dispatcher (parse.c:958) is called with `incmdpos = 1` for
8252    // the funcdef body.
8253    set_incmdpos(true);
8254    // Skip ()
8255    if tok() == INOUTPAR {
8256        zshlex();
8257    }
8258
8259    skip_separators();
8260
8261    // Parse body
8262    if tok() == INBRACE_TOK {
8263        // Same body_start-before-zshlex fix as par_funcdef.
8264        let body_start = pos();
8265        zshlex();
8266        // c:Src/parse.c — inline funcdef body terminates at OUTBRACE_TOK.
8267        // Explicit end-token keeps the inner parse from hitting the
8268        // top-level stray-`}` arm (#168). Bug #167 family.
8269        let body = parse_program_until(Some(&[OUTBRACE_TOK]));
8270        // c:Src/parse.c:1733-1737 — `if (tok != OUTBRACE) { cmdpop();
8271        // lineno += oldlineno; ecnpats = onp; ecssub = oecssub;
8272        // YYERRORV(oecused); }`. Without this gate, `f() { echo hi`
8273        // silently registered as a complete fn with body `echo hi`.
8274        // Bug #405.
8275        if tok() != OUTBRACE_TOK {
8276            zerr("parse error: expected `}'");
8277            return None;
8278        }
8279        let body_end = pos().saturating_sub(1);
8280        let body_source = input_slice(body_start, body_end)
8281            .map(|s| {
8282                // Lexer's pos() may have advanced past `}` AND skipped
8283                // trailing whitespace/newlines before returning the
8284                // OUTBRACE_TOK to us, so the slice up to `pos - 1`
8285                // includes the `}` and any preceding whitespace.
8286                // Strip the trailing `}` and any preceding structural
8287                // separator (`;`, `\n`) — C zsh's getpermtext walks
8288                // the wordcode list and emits each command WITHOUT
8289                // the trailing `;`/`\n` that lives in the input.
8290                let t = s.trim();
8291                let t = t.strip_suffix('}').unwrap_or(t).trim_end();
8292                let t = t
8293                    .trim_end_matches(|c: char| c == ';' || c == '\n')
8294                    .trim_end();
8295                t.to_string()
8296            })
8297            .filter(|s| !s.is_empty());
8298        zshlex();
8299        Some(ZshCommand::FuncDef(ZshFuncDef {
8300            names: vec![name],
8301            body: Box::new(body),
8302            tracing: false,
8303            auto_call_args: None,
8304            body_source,
8305        }))
8306    } else if unset(SHORTLOOPS) {
8307        // c:1742 — `else if (unset(SHORTLOOPS)) YYERRORV(oecused);` —
8308        // funcdef short body (`name() cmd` without `{...}`) only
8309        // accepted when SHORTLOOPS is set. parse_init seeds
8310        // SHORTLOOPS=on so this fires only when a script
8311        // explicitly disabled the option.
8312        zerr("parse error: short function body form requires SHORTLOOPS option");
8313        None
8314    } else {
8315        match par_cmd() {
8316            Some(cmd) => {
8317                let list = ZshList {
8318                    sublist: ZshSublist {
8319                        pipe: ZshPipe {
8320                            cmd,
8321                            next: None,
8322                            lineno: lineno(),
8323                            merge_stderr: false,
8324                        },
8325                        next: None,
8326                        flags: SublistFlags::default(),
8327                    },
8328                    flags: ListFlags::default(),
8329                };
8330                Some(ZshCommand::FuncDef(ZshFuncDef {
8331                    names: vec![name],
8332                    body: Box::new(ZshProgram { lists: vec![list] }),
8333                    tracing: false,
8334                    auto_call_args: None,
8335                    body_source: None,
8336                }))
8337            }
8338            None => None,
8339        }
8340    }
8341}
8342
8343/// Parse conditional expression
8344/// Top of `[[ ]]` cond-expression parsing — entry to recursive
8345/// descent (or → and → not → primary). Direct port of zsh's
8346/// par_cond_1 at parse.c:2434-2475.
8347fn parse_cond_expr() -> Option<ZshCond> {
8348    parse_cond_or()
8349}
8350
8351/// Cond-expression `||` level. C: inside par_cond_1 at
8352/// parse.c:2434-2475 (the `cond_or` ladder).
8353fn parse_cond_or() -> Option<ZshCond> {
8354    let left = parse_cond_and()?;
8355    skip_cond_separators();
8356
8357    if tok() == DBAR {
8358        zshlex();
8359        skip_cond_separators();
8360        parse_cond_or().map(|right| ZshCond::Or(Box::new(left), Box::new(right)))
8361    } else {
8362        Some(left)
8363    }
8364}
8365
8366/// Cond-expression `&&` level. C: par_cond_2 at parse.c:2476-2625.
8367fn parse_cond_and() -> Option<ZshCond> {
8368    let left = parse_cond_not()?;
8369    skip_cond_separators();
8370
8371    if tok() == DAMPER {
8372        zshlex();
8373        skip_cond_separators();
8374        parse_cond_and().map(|right| ZshCond::And(Box::new(left), Box::new(right)))
8375    } else {
8376        Some(left)
8377    }
8378}
8379
8380/// `static FuncDump dumps;` from `Src/parse.c:3652` — head of the
8381/// loaded-`.zwc` linked list. C walks `dumps`/`p->next` directly;
8382/// the Rust port uses a `Mutex<Vec<funcdump>>` indexed by filename
8383/// so refcount ops can find an entry without raw-pointer compare.
8384pub static DUMPS: std::sync::Mutex<Vec<funcdump>> = std::sync::Mutex::new(Vec::new());
8385
8386/// Cond-expression `!` negation level. C: handled inside
8387/// par_cond_2 at parse.c:2476-2625 via the Bang token check.
8388fn parse_cond_not() -> Option<ZshCond> {
8389    skip_cond_separators();
8390
8391    // ! can be either BANG_TOK or String "!"
8392    let is_not =
8393        tok() == BANG_TOK || (tok() == STRING_LEX && tokstr().map(|s| s == "!").unwrap_or(false));
8394    if is_not {
8395        zshlex();
8396        let inner = parse_cond_not()?;
8397        return Some(ZshCond::Not(Box::new(inner)));
8398    }
8399
8400    if tok() == INPAR_TOK {
8401        zshlex();
8402        skip_cond_separators();
8403        // c:Src/parse.c:2534-2547 par_cond_2 INPAR branch — empty
8404        // body `[[ ( ) ]]` makes the inner par_cond's recursive
8405        // par_cond_2 see OUTPAR with no leading STRING/BANG/INPAR
8406        // and YYERROR immediately. Mirror that here: if the very
8407        // next token after `(` (post separator skip) is `)`, emit
8408        // a parse error so the script aborts cleanly instead of
8409        // silently swallowing every following command. Bug #538.
8410        if tok() == OUTPAR_TOK {
8411            yyerror("condition expected");
8412            return None;
8413        }
8414        let inner = parse_cond_expr()?;
8415        skip_cond_separators();
8416        if tok() == OUTPAR_TOK {
8417            zshlex();
8418        }
8419        return Some(inner);
8420    }
8421
8422    parse_cond_primary()
8423}
8424
8425/// Cond-expression primary: unary tests (-f, -d, ...), binary
8426/// tests (=, !=, <, >, ==, =~, -eq, -ne, ...), and parenthesized
8427/// sub-expressions. Direct port of par_cond_double / par_cond_triple
8428/// / par_cond_multi at parse.c:2626-2731 (chosen by arg count).
8429fn parse_cond_primary() -> Option<ZshCond> {
8430    let s1 = match tok() {
8431        STRING_LEX => {
8432            let s = tokstr().unwrap_or_default();
8433            zshlex();
8434            s
8435        }
8436        _ => return None,
8437    };
8438
8439    skip_cond_separators();
8440
8441    // Check for unary operator. zsh's lexer tokenizes leading `-` as
8442    // `zsh_h::Dash` (`\u{9b}`, `Src/zsh.h:182`) inside gettokstr (lex.c:1390-1400
8443    // LX2_DASH — `-` always becomes Dash, untokenized later). Match
8444    // either form here, and use char-count not byte-count since Dash
8445    // is 2 UTF-8 bytes (`\xc2\x9b`).
8446    //
8447    // c:Src/parse.c par_cond — when the leading token is `-` followed
8448    // ENTIRELY by digits (`-5`, `-123`), it's a numeric literal
8449    // operand, not a unary test flag. zsh's parser checks the C
8450    // `isdigit` of the trailing chars to disambiguate; without the
8451    // check, `[[ -5 -lt -3 ]]` reads `-5` as a one-arg test flag,
8452    // then `-lt` as the operand, then `-3` as a leftover token —
8453    // emitting "unknown condition: -5" and falling through to a
8454    // command-not-found dispatch on `-3`. Bug #121 in docs/BUGS.md.
8455    let s1_chars: Vec<char> = s1.chars().collect();
8456    let is_negative_number = s1_chars.len() >= 2
8457        && IS_DASH(s1_chars[0])
8458        && s1_chars[1..].iter().all(|c| c.is_ascii_digit());
8459    if s1_chars.len() == 2 && IS_DASH(s1_chars[0]) && !is_negative_number {
8460        let s2 = match tok() {
8461            STRING_LEX => {
8462                let s = tokstr().unwrap_or_default();
8463                zshlex();
8464                s
8465            }
8466            _ => {
8467                // c:Src/parse.c par_cond_2 — when the leading `-X`
8468                // is a 2-char dash form, zsh ALWAYS treats it as a
8469                // unary test op (the operand-missing case errors
8470                // immediately with `unknown condition: -X`). Don't
8471                // fall back to `Unary("-n", "-X")` — that path
8472                // silently let `[[ -z ]]` evaluate as
8473                // `[[ -n "-z" ]]` → true. Bug #480/#481.
8474                //
8475                // Convert Dash (\u{9b}) back to ASCII `-` for the
8476                // user-visible diagnostic so it reads "unknown
8477                // condition: -z" not "unknown condition: <Dash>z".
8478                let display: String = s1.chars().map(|c| {
8479                    if IS_DASH(c) { '-' } else { c }
8480                }).collect();
8481                crate::ported::utils::zerr(&format!(
8482                    "unknown condition: {}",
8483                    display
8484                ));
8485                return None;
8486            }
8487        };
8488        return Some(ZshCond::Unary(s1, s2));
8489    }
8490
8491    // Check for binary operator. Direct port of zsh/Src/parse.c:2601-2603:
8492    //   incond++;  /* parentheses do globbing */
8493    //   do condlex(); while (COND_SEP());
8494    //   incond--;  /* parentheses do grouping */
8495    // The bump makes the lexer treat `(` as a literal character inside
8496    // the RHS word (e.g. `[[ x =~ (foo) ]]`) instead of returning Inpar
8497    // and splitting the regex into multiple tokens.
8498    let op = match tok() {
8499        STRING_LEX => {
8500            let s = tokstr().unwrap_or_default();
8501            set_incond(incond() + 1);
8502            zshlex();
8503            set_incond(incond() - 1);
8504            s
8505        }
8506        INANG_TOK => {
8507            set_incond(incond() + 1);
8508            zshlex();
8509            set_incond(incond() - 1);
8510            "<".to_string()
8511        }
8512        OUTANG_TOK => {
8513            set_incond(incond() + 1);
8514            zshlex();
8515            set_incond(incond() - 1);
8516            ">".to_string()
8517        }
8518        _ => return Some(ZshCond::Unary("-n".to_string(), s1)),
8519    };
8520
8521    skip_cond_separators();
8522
8523    // c:Src/parse.c:2601-2625 par_cond_2 — only the documented binary
8524    // operators are accepted inside `[[ ... ]]`. zsh rejects ksh/bash
8525    // forms `-a` (logical AND) and `-o` (logical OR) with a parse
8526    // error ("condition expected") because they're not in the
8527    // par_cond_2 binary-op set — zsh uses `&&` / `||` instead.
8528    // Verified: `zsh -fc '[[ "" -a "x" ]]'` → exit 1, "parse error:
8529    // condition expected: ...". Without this gate, zshrs silently
8530    // built ZshCond::Binary("", "-a", "x") and ran an unknown-op
8531    // path that always evaluated false.
8532    // c:Src/parse.c:2601-2625 par_cond_2 — `-a` / `-o` n-ary chain
8533    // operators are not valid binary operators inside `[[ ... ]]`
8534    // (zsh uses `&&` / `||` instead). Match both the ASCII `-a`/
8535    // `-o` form and the tokenized `Dash+a`/`Dash+o` form that the
8536    // lexer emits inside cond bodies (Dash = \u{9b}, Src/zsh.h:182).
8537    let op_chars: Vec<char> = op.chars().collect();
8538    let is_dash_a_or_o =
8539        op_chars.len() == 2 && IS_DASH(op_chars[0]) && (op_chars[1] == 'a' || op_chars[1] == 'o');
8540    if is_dash_a_or_o {
8541        crate::ported::utils::zerr(&format!("parse error: condition expected: {}", s1));
8542        crate::ported::utils::errflag.fetch_or(
8543            crate::ported::zsh_h::ERRFLAG_ERROR,
8544            std::sync::atomic::Ordering::Relaxed,
8545        );
8546        set_tok(LEXERR);
8547        return None;
8548    }
8549
8550    let s2 = match tok() {
8551        STRING_LEX => {
8552            let s = tokstr().unwrap_or_default();
8553            zshlex();
8554            s
8555        }
8556        _ => {
8557            // c:Src/parse.c par_cond_2 — when a binary op is
8558            // recognized but the RHS operand is missing, zsh emits
8559            // `parse error: condition expected: <LHS>` at par_cond_2's
8560            // missing-rhs branch. zshrs's previous fallback returned
8561            // `Binary(s1, op, "")` which silently evaluated as if the
8562            // RHS were empty string → rc=1. Bug #482.
8563            //
8564            // Convert Dash (\u{9b}) back to ASCII `-` in the LHS
8565            // display so the diagnostic reads cleanly.
8566            let display: String = s1.chars().map(|c| {
8567                if IS_DASH(c) { '-' } else { c }
8568            }).collect();
8569            crate::ported::utils::zerr(&format!(
8570                "parse error: condition expected: {}",
8571                display
8572            ));
8573            crate::ported::utils::errflag.fetch_or(
8574                crate::ported::zsh_h::ERRFLAG_ERROR,
8575                std::sync::atomic::Ordering::Relaxed,
8576            );
8577            set_tok(LEXERR);
8578            return None;
8579        }
8580    };
8581
8582    if op == "=~" {
8583        Some(ZshCond::Regex(s1, s2))
8584    } else {
8585        Some(ZshCond::Binary(s1, op, s2))
8586    }
8587}
8588
8589fn skip_cond_separators() {
8590    while tok() == SEPER && {
8591        let s = tokstr();
8592        s.map(|s| !s.contains(';')).unwrap_or(true)
8593    } {
8594        zshlex();
8595    }
8596}
8597
8598/// Parse (( ... )) arithmetic command
8599/// Parse `(( EXPR ))` arithmetic command. C source: parse.c:1810-1834
8600/// `par_dinbrack` (despite the name; the function actually handles
8601/// DINPAR `(( ))` blocks too).
8602fn parse_arith() -> Option<ZshCommand> {
8603    let expr = tokstr().unwrap_or_default();
8604    zshlex();
8605    Some(ZshCommand::Arith(expr))
8606}
8607
8608/// Skip separator tokens
8609fn skip_separators() {
8610    while tok() == SEPER || tok() == NEWLIN {
8611        zshlex();
8612    }
8613}
8614
8615// `fdheaderlen` / `fdmagic` / `fdflags` / etc. macros from
8616// `Src/parse.c:3125-3152`. C uses raw pointer arithmetic on a
8617// `Wordcode` (= `u32 *`); the Rust port takes a slice and indexes.
8618
8619/// Port of `fdheaderlen(f)` macro (`Src/parse.c:3125`) — header
8620/// length in u32 words (read from prelude word `FD_PRELEN`).
8621#[inline]
8622pub fn fdheaderlen(f: &[u32]) -> u32 {
8623    f[FD_PRELEN]
8624}
8625
8626/// Port of `fdmagic(f)` macro (`Src/parse.c:3127`) — first prelude
8627/// word, either `FD_MAGIC` or `FD_OMAGIC`.
8628#[inline]
8629pub fn fdmagic(f: &[u32]) -> u32 {
8630    f[0]
8631}
8632
8633/// Port of `fdflags(f)` macro (`Src/parse.c:3131`) — low byte of
8634/// the packed `pre[1]` word.
8635#[inline]
8636pub fn fdflags(f: &[u32]) -> u32 {
8637    // `pre[1]` is a u32 viewed as 4 bytes; flags = byte 0.
8638    f[1] & 0xff
8639}
8640
8641/// Port of `fdsetflags(f, v)` macro (`Src/parse.c:3132`) — write
8642/// the low byte of `pre[1]`.
8643#[inline]
8644pub fn fdsetflags(f: &mut [u32], v: u8) {
8645    f[1] = (f[1] & !0xff) | (v as u32);
8646}
8647
8648/// Port of `fdother(f)` macro (`Src/parse.c:3133`) — high 24 bits
8649/// of `pre[1]`, holds the byte-offset to the opposite-byte-order
8650/// dump copy.
8651#[inline]
8652pub fn fdother(f: &[u32]) -> u32 {
8653    (f[1] >> 8) & 0x00ff_ffff
8654}
8655
8656/// Port of `fdsetother(f, o)` macro (`Src/parse.c:3134`).
8657#[inline]
8658pub fn fdsetother(f: &mut [u32], o: u32) {
8659    f[1] = (f[1] & 0xff) | ((o & 0x00ff_ffff) << 8);
8660}
8661
8662/// Port of `fdversion(f)` macro (`Src/parse.c:3140`) — read the
8663/// `ZSH_VERSION` C-string from `pre[2..]`.
8664pub fn fdversion(f: &[u32]) -> String {
8665    let bytes: Vec<u8> = f[2..]
8666        .iter()
8667        .take(10)
8668        .flat_map(|w| w.to_le_bytes().into_iter())
8669        .collect();
8670    let end = bytes.iter().position(|&b| b == 0).unwrap_or(bytes.len());
8671    String::from_utf8_lossy(&bytes[..end]).into_owned()
8672}
8673
8674/// Port of `firstfdhead(f)` macro (`Src/parse.c:3142`) — pointer
8675/// to the first `struct fdhead` past the prelude.
8676#[inline]
8677pub fn firstfdhead_offset() -> usize {
8678    FD_PRELEN
8679}
8680
8681/// Port of `nextfdhead(f)` macro (`Src/parse.c:3143`) — advance to
8682/// the next header by reading the current `hlen` slot.
8683#[inline]
8684pub fn nextfdhead_offset(f: &[u32], cur: usize) -> usize {
8685    cur + (f[cur + 4] as usize) // .hlen is field 4 of fdhead
8686}
8687
8688/// Port of `fdhflags(f)` macro (`Src/parse.c:3145`) — low 2 bits
8689/// of the header's `flags` field (the kshload/zshload marker).
8690#[inline]
8691pub fn fdhflags(h: &fdhead) -> u32 {
8692    h.flags & 0x3
8693}
8694
8695/// Port of `fdhtail(f)` macro (`Src/parse.c:3146`) — high 30 bits
8696/// of `flags`, byte offset from the name start to its basename.
8697#[inline]
8698pub fn fdhtail(h: &fdhead) -> u32 {
8699    h.flags >> 2
8700}
8701
8702/// Port of `fdhbldflags(f, t)` macro (`Src/parse.c:3147`) — pack
8703/// `(flags, tail)` into one u32 (low 2 bits = flags, high 30 = tail).
8704#[inline]
8705pub fn fdhbldflags(flags: u32, tail: u32) -> u32 {
8706    flags | (tail << 2)
8707}
8708
8709/// Port of `fdname(f)` macro (`Src/parse.c:3152`) — name string
8710/// follows the fdhead record immediately. Reads bytes from the
8711/// dump buffer until NUL.
8712pub fn fdname(buf: &[u32], header_offset: usize) -> String {
8713    let name_word_off = header_offset + FDHEAD_WORDS;
8714    let bytes: Vec<u8> = buf[name_word_off..]
8715        .iter()
8716        .flat_map(|w| w.to_le_bytes().into_iter())
8717        .take_while(|&b| b != 0)
8718        .collect();
8719    String::from_utf8_lossy(&bytes).into_owned()
8720}
8721
8722/// Decode a `fdhead` record at the given u32-word offset in the
8723/// dump buffer. Used by the header-walk loops in `bin_zcompile -t`.
8724pub fn read_fdhead(buf: &[u32], offset: usize) -> Option<fdhead> {
8725    if offset + FDHEAD_WORDS > buf.len() {
8726        return None;
8727    }
8728    Some(fdhead {
8729        start: buf[offset],
8730        len: buf[offset + 1],
8731        npats: buf[offset + 2],
8732        strs: buf[offset + 3],
8733        hlen: buf[offset + 4],
8734        flags: buf[offset + 5],
8735    })
8736}
8737
8738/// Port of `freedump(FuncDump f)` from `Src/parse.c:3976`. C
8739/// `munmap`s, `zclose`s the fd, and frees the struct. The Rust
8740/// port relies on Drop for the `funcdump` (no mmap held in this
8741/// port — `addr`/`map` are byte-offset placeholders), so the
8742/// equivalent is removing the entry from the dumps list. Called
8743/// by `decrdumpcount` when the refcount hits zero (c:3988) and
8744/// by `closedumps` when shutting down (c:4008).
8745fn freedump_locked(g: &mut std::sync::MutexGuard<'_, Vec<funcdump>>, filename: &str) {
8746    // c:3976
8747    g.retain(|d| d.filename.as_deref() != Some(filename));
8748}
8749
8750// =====================================================================
8751// Remaining `Src/parse.c` ports (this section finishes the file).
8752//
8753// Several of these emit into the C-wordcode buffer (`ECBUF`/etc.) and
8754// are kept for completeness — the live zshrs runtime uses the
8755// `ZshProgram` AST path instead, but `bin_zcompile` (`-c`/`-a` modes)
8756// and any future `.zwc`-emit pipeline both call into these.
8757// =====================================================================
8758
8759/// `ecstr(s)` helper — `ecadd(ecstrcode(s))`. Mirrors the C macro at
8760/// `Src/parse.c:482` used everywhere by the par_* emitters.
8761#[inline]
8762pub fn ecstr(s: &str) {
8763    let code = ecstrcode(s);
8764    ecadd(code);
8765}
8766
8767/// Port of `condlex` function-pointer global from `Src/parse.c`. C
8768/// flips this between `zshlex` and `testlex` depending on whether
8769/// we're inside `[[ ]]` vs `/bin/test` builtin. zshrs has no
8770/// separate `testlex` yet, so this just defers to `zshlex`.
8771#[inline]
8772pub fn condlex() {
8773    zshlex();
8774}
8775
8776fn copy_ecstr_walk(node: &Option<Box<EccstrNode>>, p: &mut [u8]) {
8777    let mut cur = node.as_ref();
8778    while let Some(n) = cur {
8779        // c:540 — `memcpy(p + s->aoffs, s->str, strlen(s->str) + 1);`
8780        let off = n.aoffs as usize;
8781        let need = off + n.str.len() + 1;
8782        if need <= p.len() {
8783            p[off..off + n.str.len()].copy_from_slice(&n.str);
8784            p[off + n.str.len()] = 0;
8785        }
8786        // c:541 — `copy_ecstr(s->left, p);`
8787        copy_ecstr_walk(&n.left, p);
8788        // c:542 — `s = s->right;`
8789        cur = n.right.as_ref();
8790    }
8791}
8792
8793/// Port of `par_cond(void)` from `Src/parse.c:2409`. Top-level cond
8794/// OR-chain — drives `par_cond_1` and stitches `||`-separated terms
8795/// with `WCB_COND(COND_OR, …)`. This is the missing top of the
8796/// wordcode cond chain: `par_cond_wordcode` (the par_dinbrack port)
8797/// must call into HERE so that `[[ a || b ]]` and friends land
8798/// real WC_COND opcodes in `ecbuf`. Without this, the wordcode
8799/// emitter for `[[ ... ]]` produced zero words and parity dropped
8800/// 148 words on `/etc/zshrc` alone.
8801pub fn par_cond_top() -> i32 {
8802    // c:2411 — `int p = ecused, r;`
8803    let p = ECUSED.with(|c| c.get()) as usize;
8804    let r = par_cond_1();
8805    while COND_SEP() {
8806        condlex();
8807    }
8808    if tok() == DBAR {
8809        // c:2417 — `condlex(); while (COND_SEP()) condlex();`
8810        condlex();
8811        while COND_SEP() {
8812            condlex();
8813        }
8814        // c:2420-2422 — `ecispace(p, 1); par_cond(); ecbuf[p] =
8815        // WCB_COND(COND_OR, ecused-1-p);`
8816        ecispace(p, 1);
8817        par_cond_top();
8818        let ecused = ECUSED.with(|c| c.get()) as usize;
8819        ECBUF.with(|c| {
8820            c.borrow_mut()[p] = WCB_COND(COND_OR as u32, (ecused - 1 - p) as u32);
8821        });
8822        return 1;
8823    }
8824    r
8825}
8826
8827/// Port of `static int check_cond(const char *input, const char *cond)`
8828/// from `Src/parse.c:2459`. True iff `input` is the two-char `-X`
8829/// form whose `X` matches `cond` — used by par_cond_2 to detect
8830/// `-a` / `-o` n-ary chain operators and by build_dump for `-k` /
8831/// `-z`. C: `return !IS_DASH(input[0]) ? 0 : !strcmp(input+1, cond);`.
8832fn check_cond(input: &str, cond: &str) -> bool {
8833    let mut chars = input.chars();
8834    match chars.next() {
8835        Some(c) if IS_DASH(c) => chars.as_str() == cond,
8836        _ => false,
8837    }
8838}
8839
8840#[cfg(test)]
8841mod tests {
8842    use super::*;
8843    use crate::utils::{errflag, ERRFLAG_ERROR};
8844    use std::fs;
8845    use std::path::Path;
8846    use std::sync::atomic::Ordering;
8847    use std::sync::mpsc;
8848    use std::thread;
8849    use std::time::Duration;
8850
8851    /// `try_source_file` MUST refuse a stale `.zwc` cache when the
8852    /// uncompiled source has been modified more recently. The C body
8853    /// at c:3819 reads `stc.st_mtime >= stn.st_mtime` — explicitly
8854    /// `>=`, meaning only an equal-or-newer zwc is acceptable.
8855    ///
8856    /// A regression that ignored the mtime check (or used the wrong
8857    /// direction) would silently keep loading the OLD compiled body
8858    /// after the user edited the source file — every `source foo.zsh`
8859    /// would replay yesterday's code, the worst-class shell bug.
8860    ///
8861    /// Pin: create source + .zwc, then touch source to make it
8862    /// newer. try_source_file must return None.
8863    #[test]
8864    fn try_source_file_skips_stale_zwc() {
8865        let _g = crate::test_util::global_state_lock();
8866        let dir = tempfile::tempdir().expect("tempdir");
8867        let src = dir.path().join("script.zsh");
8868        let zwc = dir.path().join("script.zsh.zwc");
8869        // Create zwc FIRST (older), then source (newer).
8870        fs::write(&zwc, b"placeholder zwc").unwrap();
8871        thread::sleep(Duration::from_millis(20));
8872        fs::write(&src, b"echo hi").unwrap();
8873
8874        let result = try_source_file(src.to_str().unwrap());
8875        assert!(
8876            result.is_none(),
8877            "c:3819 — stale .zwc (older than source) MUST be rejected; \
8878             got {:?}",
8879            result
8880        );
8881    }
8882
8883    /// `try_source_file` returns None when no `.zwc` exists for the
8884    /// requested file (c:3819 `if let Ok(meta_c) = &stc` gate fails).
8885    /// This is the common case — most user scripts don't ship with
8886    /// a pre-compiled `.zwc`. The fn returning None lets the caller
8887    /// fall through to the source-read path. A regression that
8888    /// returned `Some(file)` on missing `.zwc` would route every
8889    /// `source foo.zsh` through `check_dump_file` against a
8890    /// non-existent file and crash.
8891    #[test]
8892    fn try_source_file_returns_none_when_no_zwc() {
8893        let _g = crate::test_util::global_state_lock();
8894        let dir = tempfile::tempdir().expect("tempdir");
8895        let src = dir.path().join("plain.zsh");
8896        fs::write(&src, b"echo hi").unwrap();
8897        // No .zwc sibling.
8898
8899        let result = try_source_file(src.to_str().unwrap());
8900        assert!(
8901            result.is_none(),
8902            "c:3819 gate fails when stat(wc) returns Err → None"
8903        );
8904    }
8905
8906    /// Test helper. Mirrors zsh's `errflag` save/clear/check pattern
8907    /// around a parse — see `Src/init.c:loop` which clears errflag
8908    /// before parse_event() and tests it after. Returns `Err` if the
8909    /// parse set `ERRFLAG_ERROR`; otherwise `Ok(program)`.
8910    fn parse(input: &str) -> Result<ZshProgram, String> {
8911        let saved = errflag.load(Ordering::Relaxed);
8912        errflag.fetch_and(!ERRFLAG_ERROR, Ordering::Relaxed);
8913        parse_init(input);
8914        let prog = crate::ported::parse::parse();
8915        let had_err = (errflag.load(Ordering::Relaxed) & ERRFLAG_ERROR) != 0;
8916        // Restore prior error bits; don't carry our new error into the
8917        // outer test runner.
8918        errflag.store(saved, Ordering::Relaxed);
8919        if had_err {
8920            Err("parse error".to_string())
8921        } else {
8922            Ok(prog)
8923        }
8924    }
8925
8926    #[test]
8927    fn test_simple_command() {
8928        let _g = crate::test_util::global_state_lock();
8929        let prog = parse("echo hello world").unwrap();
8930        assert_eq!(prog.lists.len(), 1);
8931        match &prog.lists[0].sublist.pipe.cmd {
8932            ZshCommand::Simple(s) => {
8933                assert_eq!(s.words, vec!["echo", "hello", "world"]);
8934            }
8935            _ => panic!("expected simple command"),
8936        }
8937    }
8938
8939    #[test]
8940    fn test_pipeline() {
8941        let _g = crate::test_util::global_state_lock();
8942        let prog = parse("ls | grep foo | wc -l").unwrap();
8943        assert_eq!(prog.lists.len(), 1);
8944
8945        let pipe = &prog.lists[0].sublist.pipe;
8946        assert!(pipe.next.is_some());
8947
8948        let pipe2 = pipe.next.as_ref().unwrap();
8949        assert!(pipe2.next.is_some());
8950    }
8951
8952    #[test]
8953    fn test_and_or() {
8954        let _g = crate::test_util::global_state_lock();
8955        let prog = parse("cmd1 && cmd2 || cmd3").unwrap();
8956        let sublist = &prog.lists[0].sublist;
8957
8958        assert!(sublist.next.is_some());
8959        let (op, _) = sublist.next.as_ref().unwrap();
8960        assert_eq!(*op, SublistOp::And);
8961    }
8962
8963    #[test]
8964    fn test_if_then() {
8965        let _g = crate::test_util::global_state_lock();
8966        let prog = parse("if test -f foo; then echo yes; fi").unwrap();
8967        match &prog.lists[0].sublist.pipe.cmd {
8968            ZshCommand::If(_) => {}
8969            _ => panic!("expected if command"),
8970        }
8971    }
8972
8973    #[test]
8974    fn test_for_loop() {
8975        let _g = crate::test_util::global_state_lock();
8976        let prog = parse("for i in a b c; do echo $i; done").unwrap();
8977        match &prog.lists[0].sublist.pipe.cmd {
8978            ZshCommand::For(f) => {
8979                assert_eq!(f.var, "i");
8980                match &f.list {
8981                    ForList::Words(w) => assert_eq!(w, &vec!["a", "b", "c"]),
8982                    _ => panic!("expected word list"),
8983                }
8984            }
8985            _ => panic!("expected for command"),
8986        }
8987    }
8988
8989    #[test]
8990    fn test_case() {
8991        let _g = crate::test_util::global_state_lock();
8992        let prog = parse("case $x in a) echo a;; b) echo b;; esac").unwrap();
8993        match &prog.lists[0].sublist.pipe.cmd {
8994            ZshCommand::Case(c) => {
8995                assert_eq!(c.arms.len(), 2);
8996            }
8997            _ => panic!("expected case command"),
8998        }
8999    }
9000
9001    #[test]
9002    fn test_function() {
9003        let _g = crate::test_util::global_state_lock();
9004        // First test just parsing "function foo" to see what happens
9005        let prog = parse("function foo { }").unwrap();
9006        match &prog.lists[0].sublist.pipe.cmd {
9007            ZshCommand::FuncDef(f) => {
9008                assert_eq!(f.names, vec!["foo"]);
9009            }
9010            _ => panic!(
9011                "expected function, got {:?}",
9012                prog.lists[0].sublist.pipe.cmd
9013            ),
9014        }
9015    }
9016
9017    #[test]
9018    fn test_redirection() {
9019        let _g = crate::test_util::global_state_lock();
9020        let prog = parse("echo hello > file.txt").unwrap();
9021        match &prog.lists[0].sublist.pipe.cmd {
9022            ZshCommand::Simple(s) => {
9023                assert_eq!(s.redirs.len(), 1);
9024                assert_eq!(s.redirs[0].rtype, REDIR_WRITE);
9025            }
9026            _ => panic!("expected simple command"),
9027        }
9028    }
9029
9030    #[test]
9031    fn test_assignment() {
9032        let _g = crate::test_util::global_state_lock();
9033        let prog = parse("FOO=bar echo $FOO").unwrap();
9034        match &prog.lists[0].sublist.pipe.cmd {
9035            ZshCommand::Simple(s) => {
9036                assert_eq!(s.assigns.len(), 1);
9037                assert_eq!(s.assigns[0].name, "FOO");
9038            }
9039            _ => panic!("expected simple command"),
9040        }
9041    }
9042
9043    #[test]
9044    fn test_parse_completion_function() {
9045        let _g = crate::test_util::global_state_lock();
9046        let input = r#"_2to3_fixes() {
9047  local -a fixes
9048  fixes=( ${${(M)${(f)"$(2to3 --list-fixes 2>/dev/null)"}:#*}//[[:space:]]/} )
9049  (( ${#fixes} )) && _describe -t fixes 'fix' fixes
9050}"#;
9051        let result = parse(input);
9052        assert!(
9053            result.is_ok(),
9054            "Failed to parse completion function: {:?}",
9055            result.err()
9056        );
9057        let prog = result.unwrap();
9058        assert!(
9059            !prog.lists.is_empty(),
9060            "Expected at least one list in program"
9061        );
9062    }
9063
9064    #[test]
9065    fn test_parse_array_with_complex_elements() {
9066        let _g = crate::test_util::global_state_lock();
9067        let input = r#"arguments=(
9068  '(- * :)'{-h,--help}'[show this help message and exit]'
9069  {-d,--doctests_only}'[fix up doctests only]'
9070  '*:filename:_files'
9071)"#;
9072        let result = parse(input);
9073        assert!(
9074            result.is_ok(),
9075            "Failed to parse array assignment: {:?}",
9076            result.err()
9077        );
9078    }
9079
9080    #[test]
9081    fn test_parse_full_completion_file() {
9082        let _g = crate::test_util::global_state_lock();
9083        let input = r##"#compdef 2to3
9084
9085# zsh completions for '2to3'
9086
9087_2to3_fixes() {
9088  local -a fixes
9089  fixes=( ${${(M)${(f)"$(2to3 --list-fixes 2>/dev/null)"}:#*}//[[:space:]]/} )
9090  (( ${#fixes} )) && _describe -t fixes 'fix' fixes
9091}
9092
9093local -a arguments
9094
9095arguments=(
9096  '(- * :)'{-h,--help}'[show this help message and exit]'
9097  {-d,--doctests_only}'[fix up doctests only]'
9098  {-f,--fix}'[each FIX specifies a transformation; default: all]:fix name:_2to3_fixes'
9099  {-j,--processes}'[run 2to3 concurrently]:number: '
9100  {-x,--nofix}'[prevent a transformation from being run]:fix name:_2to3_fixes'
9101  {-l,--list-fixes}'[list available transformations]'
9102  {-p,--print-function}'[modify the grammar so that print() is a function]'
9103  {-v,--verbose}'[more verbose logging]'
9104  '--no-diffs[do not show diffs of the refactoring]'
9105  {-w,--write}'[write back modified files]'
9106  {-n,--nobackups}'[do not write backups for modified files]'
9107  {-o,--output-dir}'[put output files in this directory instead of overwriting]:directory:_directories'
9108  {-W,--write-unchanged-files}'[also write files even if no changes were required]'
9109  '--add-suffix[append this string to all output filenames]:suffix: '
9110  '*:filename:_files'
9111)
9112
9113_arguments -s -S $arguments
9114"##;
9115        let result = parse(input);
9116        assert!(
9117            result.is_ok(),
9118            "Failed to parse full completion file: {:?}",
9119            result.err()
9120        );
9121        let prog = result.unwrap();
9122        // Should have parsed successfully with at least one statement
9123        assert!(!prog.lists.is_empty(), "Expected at least one list");
9124    }
9125
9126    #[test]
9127    fn test_parse_logs_sh() {
9128        let _g = crate::test_util::global_state_lock();
9129        let input = r#"#!/usr/bin/env bash
9130shopt -s globstar
9131
9132if [[ $(uname) == Darwin ]]; then
9133    tail -f /var/log/**/*.log /var/log/**/*.out | lolcat
9134else
9135    if [[ $ZPWR_DISTRO_NAME == raspbian ]]; then
9136        tail -f /var/log/**/*.log | lolcat
9137    else
9138        printf "Unsupported...\n" >&2
9139    fi
9140fi
9141"#;
9142        let result = parse(input);
9143        assert!(
9144            result.is_ok(),
9145            "Failed to parse logs.sh: {:?}",
9146            result.err()
9147        );
9148    }
9149
9150    #[test]
9151    fn test_parse_case_with_glob() {
9152        let _g = crate::test_util::global_state_lock();
9153        let input = r#"case "$ZPWR_OS_TYPE" in
9154    darwin*)  open_cmd='open'
9155      ;;
9156    cygwin*)  open_cmd='cygstart'
9157      ;;
9158    linux*)
9159        open_cmd='xdg-open'
9160      ;;
9161esac"#;
9162        let result = parse(input);
9163        assert!(
9164            result.is_ok(),
9165            "Failed to parse case with glob: {:?}",
9166            result.err()
9167        );
9168    }
9169
9170    #[test]
9171    fn test_parse_case_with_nested_if() {
9172        let _g = crate::test_util::global_state_lock();
9173        // Test case with nested if and glob patterns
9174        let input = r##"function zpwrGetOpenCommand(){
9175    local open_cmd
9176    case "$ZPWR_OS_TYPE" in
9177        darwin*)  open_cmd='open' ;;
9178        cygwin*)  open_cmd='cygstart' ;;
9179        linux*)
9180            if [[ "$_zpwr_uname_r" != *icrosoft* ]];then
9181                open_cmd='nohup xdg-open'
9182            fi
9183            ;;
9184    esac
9185}"##;
9186        let result = parse(input);
9187        assert!(result.is_ok(), "Failed to parse: {:?}", result.err());
9188    }
9189
9190    #[test]
9191    fn test_parse_zpwr_scripts() {
9192        let _g = crate::test_util::global_state_lock();
9193        let scripts_dir = Path::new("/Users/wizard/.zpwr/scripts");
9194        if !scripts_dir.exists() {
9195            eprintln!("Skipping test: scripts directory not found");
9196            return;
9197        }
9198
9199        let mut total = 0;
9200        let mut passed = 0;
9201        let mut failed_files = Vec::new();
9202        let mut timeout_files = Vec::new();
9203
9204        for ext in &["sh", "zsh"] {
9205            let pattern = scripts_dir.join(format!("*.{}", ext));
9206            if let Ok(entries) = glob::glob(pattern.to_str().unwrap()) {
9207                for entry in entries.flatten() {
9208                    total += 1;
9209                    let file_path = entry.display().to_string();
9210                    let content = match fs::read_to_string(&entry) {
9211                        Ok(c) => c,
9212                        Err(e) => {
9213                            failed_files.push((file_path, format!("read error: {}", e)));
9214                            continue;
9215                        }
9216                    };
9217
9218                    // Parse with timeout
9219                    let content_clone = content.clone();
9220                    let (tx, rx) = mpsc::channel();
9221                    let handle = thread::spawn(move || {
9222                        let result = parse(&content_clone);
9223                        let _ = tx.send(result);
9224                    });
9225
9226                    match rx.recv_timeout(Duration::from_secs(2)) {
9227                        Ok(Ok(_)) => passed += 1,
9228                        Ok(Err(err)) => {
9229                            failed_files.push((file_path, err));
9230                        }
9231                        Err(_) => {
9232                            timeout_files.push(file_path);
9233                            // Thread will be abandoned
9234                        }
9235                    }
9236                }
9237            }
9238        }
9239
9240        eprintln!("\n=== ZPWR Scripts Parse Results ===");
9241        eprintln!("Passed: {}/{}", passed, total);
9242
9243        if !timeout_files.is_empty() {
9244            eprintln!("\nTimeout files (>2s):");
9245            for file in &timeout_files {
9246                eprintln!("  {}", file);
9247            }
9248        }
9249
9250        if !failed_files.is_empty() {
9251            eprintln!("\nFailed files:");
9252            for (file, err) in &failed_files {
9253                eprintln!("  {} - {}", file, err);
9254            }
9255        }
9256
9257        // Allow some failures initially, but track progress
9258        let pass_rate = if total > 0 {
9259            (passed as f64 / total as f64) * 100.0
9260        } else {
9261            0.0
9262        };
9263        eprintln!("Pass rate: {:.1}%", pass_rate);
9264
9265        // Require at least 50% pass rate for now
9266        assert!(pass_rate >= 50.0, "Pass rate too low: {:.1}%", pass_rate);
9267    }
9268
9269    /// c:2643 — `get_cond_num` returns 0..=8 for the canonical binary
9270    /// test operators in order `nt ot ef eq ne lt gt le ge`. The
9271    /// index IS the wordcode opcode dispatch key; flipping any entry
9272    /// would silently mis-dispatch `[[ a -eq b ]]` to a different op.
9273    #[test]
9274    fn get_cond_num_canonical_order_matches_dispatch_table() {
9275        let _g = crate::test_util::global_state_lock();
9276        assert_eq!(get_cond_num("nt"), 0);
9277        assert_eq!(get_cond_num("ot"), 1);
9278        assert_eq!(get_cond_num("ef"), 2);
9279        assert_eq!(get_cond_num("eq"), 3);
9280        assert_eq!(get_cond_num("ne"), 4);
9281        assert_eq!(get_cond_num("lt"), 5);
9282        assert_eq!(get_cond_num("gt"), 6);
9283        assert_eq!(get_cond_num("le"), 7);
9284        assert_eq!(get_cond_num("ge"), 8);
9285    }
9286
9287    /// c:2643 — unknown operator returns -1 (sentinel for "not in the
9288    /// binary set"). Regression returning 0 silently would alias
9289    /// every unknown op to `-nt`, dispatching to the wrong handler.
9290    #[test]
9291    fn get_cond_num_unknown_operator_returns_minus_one() {
9292        let _g = crate::test_util::global_state_lock();
9293        assert_eq!(get_cond_num("xx"), -1);
9294        assert_eq!(get_cond_num(""), -1);
9295        assert_eq!(get_cond_num("eqnt"), -1, "exact-match required");
9296        assert_eq!(
9297            get_cond_num("NT"),
9298            -1,
9299            "case-sensitive — uppercase rejected"
9300        );
9301    }
9302
9303    /// c:2628 — `par_cond_double` requires arg `a` to start with `-`
9304    /// AND have at least one more char. Empty string OR single `-`
9305    /// must error (return 1 via zerr). Regression accepting empty
9306    /// would dispatch `[[ "" string ]]` as a unary test.
9307    #[test]
9308    fn par_cond_double_rejects_short_or_non_dash_first_arg() {
9309        let _g = crate::test_util::global_state_lock();
9310        // empty
9311        let _ = par_cond_double("", "b");
9312        // not-dash
9313        let _ = par_cond_double("foo", "b");
9314        // bare dash
9315        let _ = par_cond_double("-", "b");
9316        // All three must NOT crash + return 1 (error path).
9317    }
9318
9319    /// c:2647 CONDSTRS table — exhaustive iteration: every entry's
9320    /// index round-trips through get_cond_num. A regression that
9321    /// drops an entry would let `[[ a -ef b ]]` silently mis-dispatch.
9322    #[test]
9323    fn get_cond_num_round_trips_for_every_table_entry() {
9324        let _g = crate::test_util::global_state_lock();
9325        for (i, op) in ["nt", "ot", "ef", "eq", "ne", "lt", "gt", "le", "ge"]
9326            .iter()
9327            .enumerate()
9328        {
9329            assert_eq!(get_cond_num(op) as usize, i, "{op} must map to index {i}");
9330        }
9331    }
9332
9333    /// c:2643 — `get_cond_num` is byte-exact: a partial-prefix string
9334    /// must NOT match. `e` (one char) is not `eq`. Catches a
9335    /// regression using `starts_with` instead of equality.
9336    #[test]
9337    fn get_cond_num_partial_prefix_does_not_match() {
9338        let _g = crate::test_util::global_state_lock();
9339        assert_eq!(get_cond_num("e"), -1);
9340        assert_eq!(get_cond_num("eq2"), -1);
9341        assert_eq!(get_cond_num("n"), -1);
9342    }
9343
9344    /// c:2628 — `par_cond_double` checks `IS_DASH(ac[0])` so any
9345    /// non-dash first char fails. The lexed Dash sentinel `\u{9b}`
9346    /// MUST be accepted alongside ASCII `-` (the lexer emits it
9347    /// inside `[[ ... ]]`). Regression dropping the sentinel form
9348    /// would break every cond expression after lexing.
9349    #[test]
9350    fn par_cond_double_accepts_lexed_dash_sentinel() {
9351        let _g = crate::test_util::global_state_lock();
9352        // First char being the Dash sentinel + valid unary letter
9353        // must NOT trigger the "condition expected" error path.
9354        // We can't easily probe the wordcode emission here, but
9355        // the function MUST return without panic for both forms.
9356        let _ = par_cond_double("-z", "foo");
9357        let _ = par_cond_double("\u{9b}z", "foo");
9358    }
9359
9360    /// c:2643 — case sensitivity: uppercase `EQ` MUST NOT match `eq`.
9361    /// zsh's `[[ a -EQ b ]]` is documented as a parse error (only
9362    /// lowercase variants are recognised). Regression doing
9363    /// case-insensitive lookup would silently accept it.
9364    #[test]
9365    fn get_cond_num_is_case_sensitive() {
9366        let _g = crate::test_util::global_state_lock();
9367        assert_eq!(get_cond_num("EQ"), -1);
9368        assert_eq!(get_cond_num("Eq"), -1);
9369        assert_eq!(get_cond_num("eQ"), -1);
9370        // Lowercase still works.
9371        assert_eq!(get_cond_num("eq"), 3);
9372    }
9373
9374    /// `Src/parse.c:2862-2868` — `ecgetstr` inline-3-byte case packs
9375    /// up to 3 chars into bits 3-26 of the wordcode word, then C emits
9376    /// `buf[3] = '\0'; r = dupstring(buf);`. `dupstring` uses `strlen`
9377    /// so the resulting string TRUNCATES at the first NUL byte —
9378    /// short strings of 1 or 2 chars get their tail NUL-padded and
9379    /// silently dropped by strlen.
9380    ///
9381    /// The previous Rust port used `retain(|&x| x != 0)` which SPLICES
9382    /// OUT interior NULs (so `[a, 0, b]` would yield "ab" instead of
9383    /// C's "a"). Verify both endpoints work correctly:
9384    ///   * 1-char string ("a", 0, 0)        → "a"   (strlen-truncate)
9385    ///   * 2-char string ("ab", 0)          → "ab"  (strlen-truncate)
9386    ///   * 3-char string ("abc")            → "abc" (full)
9387    ///   * pathological ("a", 0, "b")       → "a"   (NOT "ab")
9388    #[test]
9389    fn ecgetstr_inline_string_truncates_at_first_nul_like_c_strlen() {
9390        let _g = crate::test_util::global_state_lock();
9391        // Build a wordcode word with `c & 2 != 0` (inline-string flag)
9392        // and the 3 bytes packed at offsets 3, 11, 19. `c & 1` is the
9393        // tokflag; clear it for this test.
9394        fn pack_inline(b0: u8, b1: u8, b2: u8) -> u32 {
9395            // c:2862 layout — bit0 = tokflag (0 here), bit1 = inline (1),
9396            // bits 3-10 = b0, bits 11-18 = b1, bits 19-26 = b2.
9397            (2u32) | ((b0 as u32) << 3) | ((b1 as u32) << 11) | ((b2 as u32) << 19)
9398        }
9399        let mk_state = |word: u32| -> estate {
9400            let p = eprog {
9401                flags: 0,
9402                len: 1,
9403                npats: 0,
9404                nref: 0,
9405                pats: Vec::new(),
9406                prog: vec![word],
9407                strs: None,
9408                shf: None,
9409                dump: None,
9410            };
9411            estate {
9412                prog: Box::new(p),
9413                pc: 0,
9414                strs: None,
9415                strs_offset: 0,
9416            }
9417        };
9418
9419        // 1-char: ('a', 0, 0) → "a"
9420        let mut st = mk_state(pack_inline(b'a', 0, 0));
9421        assert_eq!(
9422            ecgetstr(&mut st, 0, None),
9423            "a",
9424            "c:2869 strlen truncates 1-char inline at the NUL tail"
9425        );
9426
9427        // 2-char: ('a', 'b', 0) → "ab"
9428        let mut st = mk_state(pack_inline(b'a', b'b', 0));
9429        assert_eq!(
9430            ecgetstr(&mut st, 0, None),
9431            "ab",
9432            "c:2869 strlen truncates 2-char inline at the NUL tail"
9433        );
9434
9435        // 3-char: ('a', 'b', 'c') → "abc"
9436        let mut st = mk_state(pack_inline(b'a', b'b', b'c'));
9437        assert_eq!(
9438            ecgetstr(&mut st, 0, None),
9439            "abc",
9440            "c:2869 full 3-byte inline preserved"
9441        );
9442
9443        // Pathological: ('a', 0, 'b') → "a" (NOT "ab" from retain-splice)
9444        let mut st = mk_state(pack_inline(b'a', 0, b'b'));
9445        assert_eq!(
9446            ecgetstr(&mut st, 0, None),
9447            "a",
9448            "c:2869 strlen STOPS at first NUL; must not splice 'b' through"
9449        );
9450    }
9451
9452    /// Pin: `init_parse_status` resets ALL six lexer-parser flags
9453    /// per `Src/parse.c:500-502`. Specifically `inrepeat_ = 0` at
9454    /// c:501 was previously missing in the Rust port. Pin every
9455    /// reset so a future regression that drops one is caught.
9456    #[test]
9457    fn init_parse_status_resets_all_lexer_parser_flags() {
9458        let _g = crate::test_util::global_state_lock();
9459        // Dirty every flag to a non-default value.
9460        set_incasepat(5);
9461        set_incond(7);
9462        set_inredir(true);
9463        set_infor(3);
9464        set_intypeset(true);
9465        set_inrepeat(2);
9466        set_incmdpos(false);
9467        // Reset.
9468        init_parse_status();
9469        // c:500-502 — every flag back to its default.
9470        assert_eq!(incasepat(), 0, "c:500 — incasepat = 0");
9471        assert_eq!(incond(), 0, "c:500 — incond = 0");
9472        assert!(!inredir(), "c:500 — inredir = 0");
9473        assert_eq!(infor(), 0, "c:500 — infor = 0");
9474        assert!(!intypeset(), "c:500 — intypeset = 0");
9475        assert_eq!(
9476            inrepeat(),
9477            0,
9478            "c:501 — inrepeat_ = 0 (was previously missing)"
9479        );
9480        assert!(incmdpos(), "c:502 — incmdpos = 1");
9481    }
9482
9483    // ═══════════════════════════════════════════════════════════════════
9484    // AST shape tests — feed source through parse(), walk the resulting
9485    // ZshProgram, assert structural properties. Each test uses the local
9486    // `parse(input)` helper that errors cleanly on parse failure.
9487    // Anchor: where applicable, behavior matches `zsh -n -c '...'`
9488    // (parse-only, no execution — which would error on syntax issues).
9489    // ═══════════════════════════════════════════════════════════════════
9490
9491    /// Empty input → ZshProgram with no lists.
9492    #[test]
9493    fn parse_empty_source_yields_zero_lists() {
9494        let _g = crate::test_util::global_state_lock();
9495        let prog = parse("").unwrap();
9496        assert_eq!(prog.lists.len(), 0);
9497    }
9498
9499    /// Comment-only input → no lists (comments are skipped at lex level).
9500    #[test]
9501    fn parse_only_comment_yields_zero_lists() {
9502        let _g = crate::test_util::global_state_lock();
9503        let prog = parse("# this is just a comment").unwrap();
9504        assert_eq!(prog.lists.len(), 0, "comments alone produce no cmds");
9505    }
9506
9507    /// Three commands separated by `;` → three lists.
9508    #[test]
9509    fn parse_three_semicolon_separated_commands_yield_three_lists() {
9510        let _g = crate::test_util::global_state_lock();
9511        let prog = parse("a; b; c").unwrap();
9512        assert_eq!(prog.lists.len(), 3);
9513    }
9514
9515    /// Background command — async flag set on the list.
9516    #[test]
9517    fn parse_background_command_sets_async_flag() {
9518        let _g = crate::test_util::global_state_lock();
9519        let prog = parse("sleep 1 &").unwrap();
9520        assert_eq!(prog.lists.len(), 1);
9521        assert!(
9522            prog.lists[0].flags.async_,
9523            "trailing `&` must set async_ flag"
9524        );
9525    }
9526
9527    /// Pipe count: `a | b | c | d` → 4 stages.
9528    #[test]
9529    fn parse_four_stage_pipeline_has_three_next_links() {
9530        let _g = crate::test_util::global_state_lock();
9531        let prog = parse("a | b | c | d").unwrap();
9532        let mut pipe = &prog.lists[0].sublist.pipe;
9533        let mut count = 1;
9534        while let Some(next) = &pipe.next {
9535            pipe = next;
9536            count += 1;
9537        }
9538        assert_eq!(count, 4, "4 commands should produce 4 pipe stages");
9539    }
9540
9541    /// `|&` between pipeline stages sets merge_stderr.
9542    #[test]
9543    fn parse_pipe_amp_sets_merge_stderr() {
9544        let _g = crate::test_util::global_state_lock();
9545        let prog = parse("a |& b").unwrap();
9546        let pipe = &prog.lists[0].sublist.pipe;
9547        assert!(pipe.next.is_some());
9548        assert!(pipe.merge_stderr, "|& must set merge_stderr");
9549    }
9550
9551    /// `cmd1 || cmd2`: sublist.next is Some with `Or`.
9552    #[test]
9553    fn parse_or_operator_sets_sublist_op_or() {
9554        let _g = crate::test_util::global_state_lock();
9555        let prog = parse("cmd1 || cmd2").unwrap();
9556        let sublist = &prog.lists[0].sublist;
9557        let (op, _) = sublist.next.as_ref().expect("must have next");
9558        assert_eq!(*op, SublistOp::Or);
9559    }
9560
9561    /// `! cmd` sets the not flag on the sublist.
9562    #[test]
9563    fn parse_bang_negation_sets_sublist_not_flag() {
9564        let _g = crate::test_util::global_state_lock();
9565        let prog = parse("! false").unwrap();
9566        let sublist = &prog.lists[0].sublist;
9567        assert!(sublist.flags.not, "`!` prefix must set sublist.flags.not");
9568    }
9569
9570    // ── Compound commands ────────────────────────────────────────────
9571    /// `while cond; do body; done` → ZshCommand::While.
9572    #[test]
9573    fn parse_while_loop_yields_while_command() {
9574        let _g = crate::test_util::global_state_lock();
9575        let prog = parse("while true; do echo x; done").unwrap();
9576        assert!(matches!(
9577            prog.lists[0].sublist.pipe.cmd,
9578            ZshCommand::While(_)
9579        ));
9580    }
9581
9582    /// `until cond; do body; done` → ZshCommand::Until.
9583    /// Anchor: `zsh -n -c 'until false; do echo; done'` accepts and parses
9584    /// as an until-loop. zshrs accepts but emits a DIFFERENT AST variant
9585    /// (not Until). Bug — until loop is mis-classified.
9586    #[test]
9587    fn parse_until_loop_yields_until_command_anchored_to_zsh() {
9588        let _g = crate::test_util::global_state_lock();
9589        let prog = parse("until false; do echo x; done").unwrap();
9590        assert!(
9591            matches!(prog.lists[0].sublist.pipe.cmd, ZshCommand::Until(_)),
9592            "zsh parses `until` as Until variant; zshrs uses different variant: {:?}",
9593            prog.lists[0].sublist.pipe.cmd
9594        );
9595    }
9596
9597    /// `(cmd)` → Subsh variant.
9598    #[test]
9599    fn parse_parens_yield_subsh_command() {
9600        let _g = crate::test_util::global_state_lock();
9601        let prog = parse("(echo hi)").unwrap();
9602        assert!(matches!(
9603            prog.lists[0].sublist.pipe.cmd,
9604            ZshCommand::Subsh(_)
9605        ));
9606    }
9607
9608    /// `{ cmd; }` → Cursh (current-shell) command.
9609    #[test]
9610    fn parse_braces_yield_cursh_command() {
9611        let _g = crate::test_util::global_state_lock();
9612        let prog = parse("{ echo hi; }").unwrap();
9613        assert!(matches!(
9614            prog.lists[0].sublist.pipe.cmd,
9615            ZshCommand::Cursh(_)
9616        ));
9617    }
9618
9619    /// `[[ a == b ]]` → ZshCommand::Cond.
9620    #[test]
9621    fn parse_double_brackets_yield_cond_command() {
9622        let _g = crate::test_util::global_state_lock();
9623        let prog = parse("[[ a == b ]]").unwrap();
9624        assert!(matches!(
9625            prog.lists[0].sublist.pipe.cmd,
9626            ZshCommand::Cond(_)
9627        ));
9628    }
9629
9630    /// `(( 1 + 2 ))` → ZshCommand::Arith.
9631    #[test]
9632    fn parse_double_parens_yield_arith_command() {
9633        let _g = crate::test_util::global_state_lock();
9634        let prog = parse("(( 1 + 2 ))").unwrap();
9635        assert!(matches!(
9636            prog.lists[0].sublist.pipe.cmd,
9637            ZshCommand::Arith(_)
9638        ));
9639    }
9640
9641    /// `repeat 3 do echo x; done` → ZshCommand::Repeat.
9642    #[test]
9643    fn parse_repeat_loop_yields_repeat_command() {
9644        let _g = crate::test_util::global_state_lock();
9645        let prog = parse("repeat 3 do echo x; done").unwrap();
9646        assert!(matches!(
9647            prog.lists[0].sublist.pipe.cmd,
9648            ZshCommand::Repeat(_)
9649        ));
9650    }
9651
9652    // ── Function definitions ─────────────────────────────────────────
9653    /// `name() { body; }` → FuncDef variant.
9654    #[test]
9655    fn parse_paren_funcdef_yields_funcdef_command() {
9656        let _g = crate::test_util::global_state_lock();
9657        let prog = parse("greet() { echo hi; }").unwrap();
9658        assert!(matches!(
9659            prog.lists[0].sublist.pipe.cmd,
9660            ZshCommand::FuncDef(_)
9661        ));
9662    }
9663
9664    /// `function name { body; }` → FuncDef variant (zsh keyword form).
9665    #[test]
9666    fn parse_function_keyword_funcdef_yields_funcdef_command() {
9667        let _g = crate::test_util::global_state_lock();
9668        let prog = parse("function greet { echo hi; }").unwrap();
9669        assert!(matches!(
9670            prog.lists[0].sublist.pipe.cmd,
9671            ZshCommand::FuncDef(_)
9672        ));
9673    }
9674
9675    /// Syntax error — `if` without `fi` → parse returns Err.
9676    /// Anchor: `echo 'if true; then echo' | zsh -n` → "parse error".
9677    #[test]
9678    fn parse_unterminated_if_returns_error_anchored_to_zsh() {
9679        let _g = crate::test_util::global_state_lock();
9680        let r = parse("if true; then echo yes");
9681        assert!(r.is_err(), "zsh -n: parse error near `\\n`");
9682    }
9683
9684    /// Syntax error — bare `done` without `for/while/until` → error.
9685    /// Anchor: `echo done | zsh -n` → "parse error near `done`".
9686    #[test]
9687    fn parse_orphan_done_returns_error_anchored_to_zsh() {
9688        let _g = crate::test_util::global_state_lock();
9689        let r = parse("done");
9690        assert!(r.is_err(), "zsh -n: parse error near `done`");
9691    }
9692
9693    /// Simple command's words are metafied at the AST layer (matches
9694    /// zsh's internal representation: `-` lexes to `Dash` = 0x9b, `*`
9695    /// to `Star`, etc.). zsh untokenizes via `untokenize()` BEFORE
9696    /// surfacing words at execution time (Src/exec.c:execcmd_args).
9697    /// This test pins the round-trip: `untokenize(word)` recovers the
9698    /// user-visible form. If parse-time unmetafy ever lands the
9699    /// untokenize call becomes a no-op; the test stays green either
9700    /// way. Companion test below pins the metafied internal form.
9701    #[test]
9702    fn parse_simple_command_words_unmetafied_like_zsh_anchored() {
9703        let _g = crate::test_util::global_state_lock();
9704        let prog = parse("ls -la /tmp").unwrap();
9705        match &prog.lists[0].sublist.pipe.cmd {
9706            ZshCommand::Simple(s) => {
9707                let untok: Vec<String> = s
9708                    .words
9709                    .iter()
9710                    .map(|w| crate::ported::lex::untokenize(w))
9711                    .collect();
9712                assert_eq!(
9713                    untok,
9714                    vec!["ls", "-la", "/tmp"],
9715                    "untokenize(word) must yield the user-visible form"
9716                );
9717            }
9718            other => panic!("expected Simple, got {other:?}"),
9719        }
9720    }
9721
9722    /// Pin the OBSERVED zshrs contract: simple-command word array
9723    /// contains metafied bytes. This is the active (passing) version
9724    /// of the anchor above — it documents zshrs's current internal
9725    /// representation. If zshrs starts unmetafying at parse time, this
9726    /// test will FAIL and the anchor-style test above will start passing.
9727    #[test]
9728    fn parse_simple_command_words_metafied_internal_form() {
9729        let _g = crate::test_util::global_state_lock();
9730        let prog = parse("ls -la /tmp").unwrap();
9731        match &prog.lists[0].sublist.pipe.cmd {
9732            ZshCommand::Simple(s) => {
9733                assert_eq!(s.words.len(), 3);
9734                assert_eq!(s.words[0], "ls");
9735                assert_eq!(s.words[2], "/tmp");
9736                // s.words[1] contains the metafied `-` (`\u{9b}` Dash byte)
9737                // followed by "la". Don't pin the exact byte form (it
9738                // may change); pin that the length is right.
9739                assert_eq!(s.words[1].chars().count(), 3, "`-la` is 3 chars");
9740                assert!(s.words[1].ends_with("la"));
9741            }
9742            other => panic!("expected Simple, got {other:?}"),
9743        }
9744    }
9745
9746    // ─── zsh-corpus pins for parser: structural shapes ────────────────
9747
9748    /// Empty input — parse succeeds, lists may be empty.
9749    #[test]
9750    fn parse_corpus_empty_input_no_error() {
9751        let _g = crate::test_util::global_state_lock();
9752        let prog = parse("").unwrap();
9753        assert!(
9754            prog.lists.is_empty() || prog.lists.len() <= 1,
9755            "empty input → 0 or 1 list, got {}",
9756            prog.lists.len()
9757        );
9758    }
9759
9760    /// Comment-only input parses as empty.
9761    #[test]
9762    fn parse_corpus_comment_only_no_error() {
9763        let _g = crate::test_util::global_state_lock();
9764        let r = parse("# just a comment");
9765        assert!(r.is_ok(), "comment-only parse should succeed");
9766    }
9767
9768    /// `cmd1; cmd2` — two top-level lists or two sublists.
9769    #[test]
9770    fn parse_corpus_semicolon_separates_commands() {
9771        let _g = crate::test_util::global_state_lock();
9772        let prog = parse("echo a; echo b").unwrap();
9773        // We pin: parse produces > 0 lists/sublists; details vary.
9774        assert!(!prog.lists.is_empty(), "non-empty parse");
9775    }
9776
9777    /// `a && b` — DAMPER joins into a sublist chain.
9778    #[test]
9779    fn parse_corpus_logical_and_parses() {
9780        let _g = crate::test_util::global_state_lock();
9781        let r = parse("true && false");
9782        assert!(r.is_ok(), "`a && b` parses cleanly");
9783    }
9784
9785    /// `a || b` — DBAR.
9786    #[test]
9787    fn parse_corpus_logical_or_parses() {
9788        let _g = crate::test_util::global_state_lock();
9789        let r = parse("false || true");
9790        assert!(r.is_ok(), "`a || b` parses cleanly");
9791    }
9792
9793    /// `a | b` pipeline.
9794    #[test]
9795    fn parse_corpus_pipeline_parses() {
9796        let _g = crate::test_util::global_state_lock();
9797        let r = parse("echo hi | cat");
9798        assert!(r.is_ok(), "`a | b` parses");
9799    }
9800
9801    /// `if true; then echo x; fi` — basic if-then-fi block.
9802    #[test]
9803    fn parse_corpus_if_then_fi_parses() {
9804        let _g = crate::test_util::global_state_lock();
9805        let r = parse("if true; then echo x; fi");
9806        assert!(r.is_ok(), "if/then/fi parses cleanly");
9807    }
9808
9809    /// `for i in 1 2 3; do echo $i; done`.
9810    #[test]
9811    fn parse_corpus_for_do_done_parses() {
9812        let _g = crate::test_util::global_state_lock();
9813        let r = parse("for i in 1 2 3; do echo $i; done");
9814        assert!(r.is_ok(), "for/do/done parses cleanly");
9815    }
9816
9817    /// `while true; do break; done`.
9818    #[test]
9819    fn parse_corpus_while_do_done_parses() {
9820        let _g = crate::test_util::global_state_lock();
9821        let r = parse("while true; do break; done");
9822        assert!(r.is_ok(), "while/do/done parses cleanly");
9823    }
9824
9825    /// `case x in (a) echo A;; esac` — case statement.
9826    #[test]
9827    fn parse_corpus_case_esac_parses() {
9828        let _g = crate::test_util::global_state_lock();
9829        let r = parse("case x in (a) echo A;; esac");
9830        assert!(r.is_ok(), "case/esac parses cleanly");
9831    }
9832
9833    /// Function definition `f() { echo x }`.
9834    #[test]
9835    fn parse_corpus_function_def_parses() {
9836        let _g = crate::test_util::global_state_lock();
9837        let r = parse("f() { echo x }");
9838        assert!(r.is_ok(), "f() {{ ... }} parses cleanly");
9839    }
9840
9841    /// `(subshell echo a)` — subshell.
9842    #[test]
9843    fn parse_corpus_subshell_parens_parses() {
9844        let _g = crate::test_util::global_state_lock();
9845        let r = parse("( echo a )");
9846        assert!(r.is_ok(), "subshell parses cleanly");
9847    }
9848
9849    // ═══════════════════════════════════════════════════════════════════
9850    // C-parity tests pinning Src/parse.c. Tests that capture KNOWN
9851    // ZSHRS BUGS use #[ignore = "ZSHRS BUG: …"].
9852    // ═══════════════════════════════════════════════════════════════════
9853
9854    /// `empty_eprog(p)` returns true on an eprog with empty `prog`.
9855    /// C `Src/parse.c:584`:
9856    ///   `return (!p || !p->prog || *p->prog == WCB_END());`
9857    /// Rust port at parse.rs:685 — `p.prog.is_empty() || p.prog[0] == WCB_END()`.
9858    #[test]
9859    fn empty_eprog_empty_prog_returns_true() {
9860        let _g = crate::test_util::global_state_lock();
9861        let p = crate::ported::zsh_h::eprog::default();
9862        assert!(empty_eprog(&p), "empty prog vec → empty_eprog true");
9863    }
9864
9865    /// `empty_eprog(p)` returns true when first wordcode is WCB_END.
9866    /// C: `*p->prog == WCB_END()`.
9867    #[test]
9868    fn empty_eprog_first_wcb_end_returns_true() {
9869        let _g = crate::test_util::global_state_lock();
9870        let mut p = crate::ported::zsh_h::eprog::default();
9871        p.prog.push(WCB_END());
9872        assert!(empty_eprog(&p), "prog[0]==WCB_END → empty_eprog true");
9873    }
9874
9875    /// `empty_eprog(p)` returns false for non-empty non-END prog.
9876    #[test]
9877    fn empty_eprog_non_empty_non_end_returns_false() {
9878        let _g = crate::test_util::global_state_lock();
9879        let mut p = crate::ported::zsh_h::eprog::default();
9880        // Push some non-END wordcode (1 is arbitrary non-zero, not WCB_END).
9881        p.prog.push(1);
9882        assert!(!empty_eprog(&p), "non-END first opcode → false");
9883    }
9884
9885    /// `ecstrcode("")` returns a wordcode for the empty string. C
9886    /// `Src/parse.c:346-ish` ecstrcode interns strings in `ecbuf`.
9887    /// Pin: same call returns same wordcode (deterministic intern).
9888    #[test]
9889    fn ecstrcode_empty_string_returns_deterministic_code() {
9890        let _g = crate::test_util::global_state_lock();
9891        init_parse();
9892        let a = ecstrcode("");
9893        let b = ecstrcode("");
9894        assert_eq!(a, b, "intern of '' must be deterministic");
9895    }
9896
9897    /// `ecstrcode` of two different strings returns different codes.
9898    #[test]
9899    fn ecstrcode_distinct_strings_get_distinct_codes() {
9900        let _g = crate::test_util::global_state_lock();
9901        init_parse();
9902        let a = ecstrcode("foo");
9903        let b = ecstrcode("bar");
9904        // Should differ — if equal, intern table collapsed two different
9905        // strings to the same key (bug).
9906        assert_ne!(a, b, "different strings must intern to different codes");
9907    }
9908
9909    /// `parse_event(ENDINPUT)` on empty input returns None.
9910    /// C `Src/parse.c:715-ish` — empty token stream → no program.
9911    #[test]
9912    #[ignore = "ZSHRS BUG: parse_event setup needs lex state — exact behavior on empty input verification pending"]
9913    fn parse_event_empty_returns_none() {
9914        let _g = crate::test_util::global_state_lock();
9915        init_parse();
9916        // Empty input typically yields no program; needs lex state.
9917        let r = parse_event(crate::ported::lex::ENDINPUT);
9918        assert!(r.is_none(), "no tokens → no event");
9919    }
9920
9921    // ═══════════════════════════════════════════════════════════════════
9922    // Additional C-parity tests for Src/parse.c.
9923    // ═══════════════════════════════════════════════════════════════════
9924
9925    /// c:399 — `ecadd(c)` returns the index where `c` was placed,
9926    /// not the post-increment value. Sequential ecadd calls return
9927    /// strictly increasing indices.
9928    #[test]
9929    fn ecadd_returns_strictly_increasing_indices() {
9930        let _g = crate::test_util::global_state_lock();
9931        init_parse();
9932        let i0 = ecadd(0xDEAD);
9933        let i1 = ecadd(0xBEEF);
9934        let i2 = ecadd(0xC0DE);
9935        assert!(
9936            i1 > i0,
9937            "ecadd indices must strictly increase, got {i0} then {i1}"
9938        );
9939        assert!(
9940            i2 > i1,
9941            "ecadd indices must strictly increase, got {i1} then {i2}"
9942        );
9943        assert_eq!(i1, i0 + 1, "consecutive ecadds advance by 1");
9944        assert_eq!(i2, i1 + 1, "consecutive ecadds advance by 1");
9945    }
9946
9947    /// c:413 — `ecdel(p)` removes one wordcode, shrinks ecused by 1.
9948    /// Pin: subsequent ecadd reuses freed slot (ecused decreased).
9949    #[test]
9950    fn ecdel_shrinks_ecused_by_one() {
9951        let _g = crate::test_util::global_state_lock();
9952        init_parse();
9953        let _i0 = ecadd(0xA);
9954        let i1 = ecadd(0xB);
9955        let _i2 = ecadd(0xC);
9956        let next_before = ECUSED.get();
9957        ecdel(i1);
9958        let next_after = ECUSED.get();
9959        assert_eq!(
9960            next_after,
9961            next_before - 1,
9962            "ecdel must decrement ecused by exactly 1"
9963        );
9964    }
9965
9966    /// c:399-405 — `ecadd` after exhausting buffer must grow it (no
9967    /// panic on push past current eclen). Pin: 1000 adds don't crash.
9968    #[test]
9969    fn ecadd_grows_buffer_on_demand() {
9970        let _g = crate::test_util::global_state_lock();
9971        init_parse();
9972        for i in 0..1000 {
9973            ecadd(i as u32);
9974        }
9975        // No panic = grow path works.
9976        assert!(ECUSED.get() >= 1000, "1000 adds → ecused ≥ 1000");
9977    }
9978
9979    /// c:426 — `ecstrcode` of short strings (≤4 bytes) returns a
9980    /// packed inline wordcode (not an offset into the string region).
9981    /// Pin: identical short strings get identical codes.
9982    #[test]
9983    fn ecstrcode_short_strings_are_deterministic() {
9984        let _g = crate::test_util::global_state_lock();
9985        init_parse();
9986        let a = ecstrcode("ab");
9987        let b = ecstrcode("ab");
9988        assert_eq!(a, b, "same short string must intern to same code");
9989    }
9990
9991    /// c:426 — long strings (>4 bytes) hit the deduped string region.
9992    /// Pin: same long string returns same code on repeat (registry
9993    /// dedupes).
9994    #[test]
9995    fn ecstrcode_long_strings_dedupe_in_registry() {
9996        let _g = crate::test_util::global_state_lock();
9997        init_parse();
9998        let a = ecstrcode("a-much-longer-test-string");
9999        let b = ecstrcode("a-much-longer-test-string");
10000        assert_eq!(a, b, "registry must dedupe identical long strings");
10001    }
10002
10003    /// `clear_hdocs()` is idempotent — calling twice in a row leaves
10004    /// HDOCS = None and LEX_HEREDOCS empty.
10005    #[test]
10006    fn clear_hdocs_is_idempotent() {
10007        let _g = crate::test_util::global_state_lock();
10008        clear_hdocs();
10009        clear_hdocs();
10010        HDOCS.with_borrow(|h| assert!(h.is_none(), "HDOCS must be None"));
10011        LEX_HEREDOCS.with_borrow(|v| assert!(v.is_empty(), "LEX_HEREDOCS must be empty"));
10012    }
10013
10014    /// `init_parse()` resets parse state to known empty defaults.
10015    /// Multiple init_parse calls are safe (idempotent).
10016    #[test]
10017    fn init_parse_is_idempotent() {
10018        let _g = crate::test_util::global_state_lock();
10019        init_parse();
10020        init_parse();
10021        // No panic = pass.
10022    }
10023
10024    /// `empty_eprog` returns true for a default-constructed eprog
10025    /// (empty prog vec).
10026    #[test]
10027    fn empty_eprog_true_for_empty_prog() {
10028        let _g = crate::test_util::global_state_lock();
10029        let p = eprog {
10030            prog: Vec::new(),
10031            ..Default::default()
10032        };
10033        assert!(empty_eprog(&p), "empty prog vec → empty eprog");
10034    }
10035
10036    /// `empty_eprog` returns true when prog[0] == WCB_END().
10037    #[test]
10038    fn empty_eprog_true_for_end_only_prog() {
10039        let _g = crate::test_util::global_state_lock();
10040        let p = eprog {
10041            prog: vec![WCB_END()],
10042            ..Default::default()
10043        };
10044        assert!(empty_eprog(&p), "WCB_END as first opcode → empty");
10045    }
10046
10047    /// `ecadjusthere(p, d)` is safe to call when HDOCS is None.
10048    #[test]
10049    fn ecadjusthere_safe_when_hdocs_none() {
10050        let _g = crate::test_util::global_state_lock();
10051        clear_hdocs();
10052        // No panic = pass.
10053        ecadjusthere(0, 0);
10054        ecadjusthere(100, -5);
10055        ecadjusthere(0, 10);
10056    }
10057
10058    /// `ecispace(p, n)` with n=0 is a no-op.
10059    #[test]
10060    fn ecispace_zero_n_is_noop() {
10061        let _g = crate::test_util::global_state_lock();
10062        init_parse();
10063        let before = ECUSED.get();
10064        ecispace(0, 0);
10065        let after = ECUSED.get();
10066        assert_eq!(before, after, "ecispace(_, 0) must not advance ecused");
10067    }
10068
10069    // ═══════════════════════════════════════════════════════════════════
10070    // Additional C-parity tests for Src/parse.c
10071    // c:146 parse_context_save / c:191 parse_context_restore /
10072    // c:225 ecadjusthere / c:293 ecadd / c:346 ecstrcode / c:574 init_parse /
10073    // c:685 empty_eprog / c:693 clear_hdocs / c:786 parse_list / c:815 parse_cond
10074    // c:2234 par_wordlist / c:2249 par_nl_wordlist
10075    // ═══════════════════════════════════════════════════════════════════
10076
10077    /// c:293 — `ecadd` returns usize (compile-time type pin).
10078    #[test]
10079    fn ecadd_returns_usize_type() {
10080        let _g = crate::test_util::global_state_lock();
10081        init_parse();
10082        let _: usize = ecadd(0);
10083    }
10084
10085    /// c:346 — `ecstrcode` returns u32 (compile-time type pin).
10086    #[test]
10087    fn ecstrcode_returns_u32_type() {
10088        let _g = crate::test_util::global_state_lock();
10089        init_parse();
10090        let _: u32 = ecstrcode("");
10091    }
10092
10093    /// c:346 — `ecstrcode("")` empty string is safe.
10094    #[test]
10095    fn ecstrcode_empty_string_no_panic() {
10096        let _g = crate::test_util::global_state_lock();
10097        init_parse();
10098        let _ = ecstrcode("");
10099    }
10100
10101    /// c:346 — `ecstrcode` is deterministic for same input.
10102    #[test]
10103    fn ecstrcode_is_deterministic() {
10104        let _g = crate::test_util::global_state_lock();
10105        init_parse();
10106        for s in ["", "a", "abc", "hello world"] {
10107            let first = ecstrcode(s);
10108            for _ in 0..3 {
10109                assert_eq!(
10110                    ecstrcode(s),
10111                    first,
10112                    "ecstrcode({:?}) must be deterministic",
10113                    s
10114                );
10115            }
10116        }
10117    }
10118
10119    /// c:786 — `parse_list` returns Option<eprog>.
10120    #[test]
10121    fn parse_list_returns_option_eprog_type() {
10122        let _g = crate::test_util::global_state_lock();
10123        init_parse();
10124        let _: Option<eprog> = parse_list();
10125    }
10126
10127    /// c:815 — `parse_cond` returns Option<eprog>.
10128    #[test]
10129    fn parse_cond_returns_option_eprog_type() {
10130        let _g = crate::test_util::global_state_lock();
10131        init_parse();
10132        let _: Option<eprog> = parse_cond();
10133    }
10134
10135    /// c:2234 — `par_wordlist` returns Vec<String>.
10136    #[test]
10137    fn par_wordlist_returns_vec_string_type() {
10138        let _g = crate::test_util::global_state_lock();
10139        init_parse();
10140        let _: Vec<String> = par_wordlist();
10141    }
10142
10143    /// c:2249 — `par_nl_wordlist` returns Vec<String>.
10144    #[test]
10145    fn par_nl_wordlist_returns_vec_string_type() {
10146        let _g = crate::test_util::global_state_lock();
10147        init_parse();
10148        let _: Vec<String> = par_nl_wordlist();
10149    }
10150
10151    /// c:693 — `clear_hdocs` deterministic state after call (no-panic).
10152    #[test]
10153    fn clear_hdocs_deterministic_after_call() {
10154        let _g = crate::test_util::global_state_lock();
10155        clear_hdocs();
10156        clear_hdocs();
10157    }
10158
10159    /// c:225 — `ecadjusthere(0, 0)` is a no-op (no delta).
10160    #[test]
10161    fn ecadjusthere_zero_delta_no_panic() {
10162        let _g = crate::test_util::global_state_lock();
10163        ecadjusthere(0, 0);
10164    }
10165
10166    /// c:225 — `ecadjusthere` is safe for arbitrary positions.
10167    #[test]
10168    fn ecadjusthere_arbitrary_pos_no_panic() {
10169        let _g = crate::test_util::global_state_lock();
10170        for p in [0usize, 1, 100, 9999] {
10171            ecadjusthere(p, 0);
10172            ecadjusthere(p, 1);
10173            ecadjusthere(p, -1);
10174        }
10175    }
10176
10177    // ═══════════════════════════════════════════════════════════════════
10178    // Additional C-parity tests for Src/parse.c FD_* accessors
10179    // c:3127 fdmagic / c:3131 fdflags / c:3133 fdother / c:3140 fdversion /
10180    // c:3145 fdhflags / c:3146 fdhtail / c:3147 fdhbldflags
10181    // ═══════════════════════════════════════════════════════════════════
10182
10183    fn build_fd_header() -> Vec<u32> {
10184        let mut buf = vec![0u32; FD_PRELEN + 32];
10185        buf[0] = FD_MAGIC; // pre[0] magic
10186        buf[1] = (0x12u32) | (0x00ABCDEFu32 << 8); // flags=0x12, other=0xABCDEF
10187                                                   // Embed version string starting at pre[2].
10188        let ver = b"5.9\0";
10189        for (i, chunk) in ver.chunks(4).enumerate() {
10190            let mut word = [0u8; 4];
10191            word[..chunk.len()].copy_from_slice(chunk);
10192            buf[2 + i] = u32::from_le_bytes(word);
10193        }
10194        buf[FD_PRELEN - 1] = (FD_PRELEN as u32) + 8; // header-len slot
10195        buf
10196    }
10197
10198    /// c:3127 — `fdmagic(f)` returns pre[0] verbatim.
10199    #[test]
10200    fn fdmagic_returns_pre_zero_word() {
10201        let buf = build_fd_header();
10202        assert_eq!(fdmagic(&buf), FD_MAGIC, "fdmagic = pre[0]");
10203    }
10204
10205    /// c:3131 — `fdflags` extracts low byte of pre[1].
10206    #[test]
10207    fn fdflags_low_byte_extraction() {
10208        let buf = build_fd_header();
10209        assert_eq!(fdflags(&buf), 0x12, "flags = pre[1] & 0xff");
10210    }
10211
10212    /// c:3133 — `fdother` extracts high 24 bits of pre[1].
10213    #[test]
10214    fn fdother_high_24_bits_extraction() {
10215        let buf = build_fd_header();
10216        assert_eq!(
10217            fdother(&buf),
10218            0x00ABCDEF,
10219            "other = pre[1] >> 8 & 0x00ffffff"
10220        );
10221    }
10222
10223    /// c:3132 — `fdsetflags` writes low byte, preserves high 24 bits.
10224    #[test]
10225    fn fdsetflags_preserves_high_24_bits() {
10226        let mut buf = build_fd_header();
10227        let other_before = fdother(&buf);
10228        fdsetflags(&mut buf, 0x42);
10229        assert_eq!(fdflags(&buf), 0x42, "new flags written");
10230        assert_eq!(fdother(&buf), other_before, "high 24 bits preserved");
10231    }
10232
10233    /// c:3134 — `fdsetother` writes high 24 bits, preserves low byte.
10234    #[test]
10235    fn fdsetother_preserves_low_byte() {
10236        let mut buf = build_fd_header();
10237        let flags_before = fdflags(&buf);
10238        fdsetother(&mut buf, 0x00DEADBE);
10239        assert_eq!(fdother(&buf), 0x00DEADBE, "new other written");
10240        assert_eq!(fdflags(&buf), flags_before, "low byte preserved");
10241    }
10242
10243    /// c:3134 — `fdsetother` clamps to 24 bits (caller-passed high bits dropped).
10244    #[test]
10245    fn fdsetother_clamps_to_24_bits() {
10246        let mut buf = build_fd_header();
10247        fdsetother(&mut buf, 0xFF_FFFF_FF);
10248        // Only the low 24 bits land in `other`.
10249        assert_eq!(fdother(&buf), 0x00FF_FFFF, "high bits dropped");
10250    }
10251
10252    /// c:3140 — `fdversion(buf)` returns String (compile-time type pin).
10253    #[test]
10254    fn fdversion_returns_string_type() {
10255        let buf = build_fd_header();
10256        let _: String = fdversion(&buf);
10257    }
10258
10259    /// c:3140 — `fdversion` reads the NUL-terminated string from pre[2..].
10260    #[test]
10261    fn fdversion_reads_until_nul() {
10262        let buf = build_fd_header();
10263        assert_eq!(fdversion(&buf), "5.9", "version read until NUL");
10264    }
10265
10266    /// c:3145 — `fdhflags(h)` returns low 2 bits of flags.
10267    #[test]
10268    fn fdhflags_low_two_bits() {
10269        let h = fdhead {
10270            start: 0,
10271            len: 0,
10272            npats: 0,
10273            strs: 0,
10274            hlen: 0,
10275            flags: 0b1011, // tail=2, kshload bits = 0b11
10276        };
10277        assert_eq!(fdhflags(&h), 0b11, "flags = h.flags & 0x3");
10278    }
10279
10280    /// c:3146 — `fdhtail(h)` returns high 30 bits (shifted right by 2).
10281    #[test]
10282    fn fdhtail_shift_right_two() {
10283        let h = fdhead {
10284            start: 0,
10285            len: 0,
10286            npats: 0,
10287            strs: 0,
10288            hlen: 0,
10289            flags: (0x12_3456 << 2) | 0x3,
10290        };
10291        assert_eq!(fdhtail(&h), 0x12_3456, "tail = h.flags >> 2");
10292    }
10293
10294    /// c:3147 — `fdhbldflags(flags, tail)` packs into single u32.
10295    #[test]
10296    fn fdhbldflags_packs_flags_low_tail_high() {
10297        let packed = fdhbldflags(0x3, 0x42);
10298        assert_eq!(packed & 0x3, 0x3, "low 2 bits = flags");
10299        assert_eq!(packed >> 2, 0x42, "high 30 bits = tail");
10300    }
10301
10302    /// c:3145-3147 — `fdhflags(h)`+`fdhtail(h)` round-trip via fdhbldflags.
10303    #[test]
10304    fn fdh_round_trip_via_bldflags() {
10305        for (flags, tail) in [(0u32, 0u32), (1, 100), (2, 0xABC), (3, 0xFFFF)] {
10306            let packed = fdhbldflags(flags, tail);
10307            let h = fdhead {
10308                start: 0,
10309                len: 0,
10310                npats: 0,
10311                strs: 0,
10312                hlen: 0,
10313                flags: packed,
10314            };
10315            assert_eq!(fdhflags(&h), flags, "flags round-trips");
10316            assert_eq!(fdhtail(&h), tail, "tail round-trips");
10317        }
10318    }
10319
10320    /// c:8271 — `firstfdhead_offset()` returns FD_PRELEN constant.
10321    #[test]
10322    fn firstfdhead_offset_returns_prelen() {
10323        assert_eq!(
10324            firstfdhead_offset(),
10325            FD_PRELEN,
10326            "first header starts after prelude"
10327        );
10328    }
10329
10330    /// c:3127 — `fdmagic` differentiates FD_MAGIC from FD_OMAGIC.
10331    #[test]
10332    fn fdmagic_differentiates_magic_omagic() {
10333        let mut buf = vec![FD_MAGIC; FD_PRELEN];
10334        assert_eq!(fdmagic(&buf), FD_MAGIC);
10335        buf[0] = FD_OMAGIC;
10336        assert_eq!(fdmagic(&buf), FD_OMAGIC, "swapped magic readable");
10337        assert_ne!(FD_MAGIC, FD_OMAGIC, "the two magics differ");
10338    }
10339}
zsh/ported/parse.rs

zsh/ported/
parse.rs