Skip to main content

zsh/ported/
parse.rs

1//! Zsh parser — direct port from zsh/Src/parse.c.
2//!
3//! Pulls tokens via the lex.rs free ported (zshlex/tok/tokstr) and
4//! builds an AST tree (relocated to src/extensions/zsh_ast.rs as a
5//! Rust-only IR) plus emits wordcode into ECBUF via the P9b/P9c
6//! pipeline. Follows the zsh grammar closely; productions match
7//! `par_*` in Src/parse.c.
8
9use super::lex::{
10    lextok, set_tok, AMPER, AMPERBANG, AMPOUTANG, BANG_TOK, BARAMP, BAR_TOK, CASE, COPROC, DAMPER,
11    DBAR, DINANG, DINANGDASH, DINBRACK, DINPAR, DOLOOP, DONE, DOUTANG, DOUTANGAMP, DOUTANGAMPBANG,
12    DOUTANGBANG, DOUTBRACK, DOUTPAR, DSEMI, ELIF, ELSE, ENDINPUT, ENVARRAY, ENVSTRING, ESAC, FI,
13    FOR, FOREACH, FUNC, IF, INANGAMP, INANG_TOK, INBRACE_TOK, INOUTANG, INOUTPAR, INPAR_TOK,
14    IS_REDIROP, LEXERR, LEX_HEREDOCS, NEWLIN, NOCORRECT, OUTANGAMP, OUTANGAMPBANG, OUTANGBANG,
15    OUTANG_TOK, OUTBRACE_TOK, OUTPAR_TOK, REPEAT, SELECT, SEMI, SEMIAMP, SEMIBAR, SEPER,
16    STRING_LEX, THEN, TIME, TRINANG, TYPESET, UNTIL, WHILE, ZEND,
17};
18use super::zsh_h::{
19    eprog, estate, funcdump, isset, redir, unset, wc_code, wordcode, Bang, Dash, Equals, Inang,
20    Outang, Tilde, ALIASFUNCDEF, COND_AND, COND_MOD, COND_MODI, COND_NOT, COND_NT, COND_OR,
21    COND_REGEX, COND_STRDEQ, COND_STREQ, COND_STRGTR, COND_STRLT, COND_STRNEQ, CSHJUNKIELOOPS,
22    EC_DUP, EC_NODUP, EF_HEAP, EF_REAL, EXECOPT, IGNOREBRACES, IS_DASH, MULTIFUNCDEF, OPT_ISSET,
23    PM_UNDEFINED, POSIXBUILTINS, REDIRF_FROM_HEREDOC, REDIR_APP, REDIR_APPNOW, REDIR_ERRAPP,
24    REDIR_ERRAPPNOW, REDIR_ERRWRITE, REDIR_ERRWRITENOW, REDIR_FROM_HEREDOC_MASK, REDIR_HEREDOC,
25    REDIR_HEREDOCDASH, REDIR_HERESTR, REDIR_INPIPE, REDIR_MERGEIN, REDIR_MERGEOUT, REDIR_OUTPIPE,
26    REDIR_READ, REDIR_READWRITE, REDIR_VARID_MASK, REDIR_WRITE, REDIR_WRITENOW, SHORTLOOPS,
27    SHORTREPEAT, WCB_COND, WCB_SIMPLE, WC_REDIR, WC_REDIR_FROM_HEREDOC, WC_REDIR_TYPE,
28    WC_REDIR_VARID, WC_SUBLIST_COPROC, WC_SUBLIST_NOT,
29};
30pub use crate::heredoc_ast::HereDoc;
31use crate::ported::lex::{
32    incasepat, incmdpos, incond, infor, input_slice, inredir, inrepeat, intypeset, isnewlin,
33    lex_init, lineno, noaliases, nocorrect, pos, set_incasepat, set_incmdpos, set_incond,
34    set_infor, set_inredir, set_inrepeat, set_intypeset, set_isnewlin, set_lineno, set_noaliases,
35    set_nocorrect, tok, tokfd, toklineno, tokstr, zshlex,
36};
37use crate::ported::signals::unqueue_signals;
38use crate::ported::utils::{errflag, zerr, zwarnnam, ERRFLAG_ERROR};
39use crate::prompt::{cmdpop, cmdpush};
40pub use crate::zsh_ast::{
41    CaseArm, CaseTerm, CaseTerminator, CompoundCommand, ForList, HereDocInfo, ListFlags, ListOp,
42    Redirect, RedirectOp, ShellCommand, ShellWord, SimpleCommand, SublistFlags, SublistOp,
43    VarModifier, ZshAssign, ZshAssignValue, ZshCase, ZshCommand, ZshCond, ZshFor, ZshFuncDef,
44    ZshIf, ZshList, ZshParamFlag, ZshPipe, ZshProgram, ZshRedir, ZshRepeat, ZshSimple, ZshSublist,
45    ZshTry, ZshWhile,
46};
47use crate::zsh_h::{
48    wc_bdata, CS_ALWAYS, CS_ARRAY, CS_CASE, CS_CMDAND, CS_CMDOR, CS_COND, CS_CURSH, CS_ELIF,
49    CS_ELSE, CS_ERRPIPE, CS_FOR, CS_FOREACH, CS_FUNCDEF, CS_IF, CS_IFTHEN, CS_PIPE, CS_REPEAT,
50    CS_SELECT, CS_SUBSH, CS_UNTIL, CS_WHILE, EF_RUN, WCB_ARITH, WCB_ASSIGN, WCB_CASE, WCB_CURSH,
51    WCB_END, WCB_FOR, WCB_FUNCDEF, WCB_IF, WCB_LIST, WCB_PIPE, WCB_REDIR, WCB_REPEAT, WCB_SELECT,
52    WCB_SUBLIST, WCB_SUBSH, WCB_TIMED, WCB_TRY, WCB_TYPESET, WCB_WHILE, WC_ASSIGN_ARRAY,
53    WC_ASSIGN_INC, WC_ASSIGN_NEW, WC_ASSIGN_SCALAR, WC_CASE_AND, WC_CASE_HEAD, WC_CASE_OR,
54    WC_CASE_TESTAND, WC_FOR_COND, WC_FOR_LIST, WC_FOR_PPARAM, WC_IF_ELIF, WC_IF_ELSE, WC_IF_HEAD,
55    WC_IF_IF, WC_PIPE_END, WC_PIPE_LINENO, WC_PIPE_MID, WC_REDIR_WORDS, WC_SELECT_LIST,
56    WC_SELECT_PPARAM, WC_SUBLIST_AND, WC_SUBLIST_END, WC_SUBLIST_FLAGS, WC_SUBLIST_OR,
57    WC_SUBLIST_SIMPLE, WC_SUBLIST_TYPE, WC_TIMED_EMPTY, WC_TIMED_PIPE, WC_WHILE_UNTIL,
58    WC_WHILE_WHILE, Z_ASYNC, Z_DISOWN, Z_END, Z_SIMPLE, Z_SYNC,
59};
60use serde::{Deserialize, Serialize};
61use std::fs::{self, File};
62use std::io::{Read, Seek, SeekFrom, Write};
63use std::os::unix::fs::MetadataExt;
64use std::path::Path;
65use std::sync::atomic::{AtomicUsize, Ordering};
66use std::sync::mpsc;
67use std::thread;
68use std::time::Duration;
69
70// Names lifted out of inside-fn `use` statements (PORT.md
71// 'no imports inside FNs ever').
72
73// Direct port of `Src/parse.c:287-289` grow-policy constants.
74const EC_INIT_SIZE: i32 = 256;
75
76// Pending-here-document list — direct port of `Src/parse.c:84
77// struct heredocs *hdocs;`. Per-parser file-static (bucket-1 in
78// PORT_PLAN.md): each worker thread parsing a separate program needs
79// its own pending-heredoc list. Saved/restored across nested parses
80// by `parse_context_save`/`parse_context_restore` (parse.c:299/337).
81thread_local! {
82    /// Port of file-static `struct heredocs *hdocs;` from `Src/parse.c:84`.
83    pub static HDOCS: std::cell::RefCell<Option<Box<crate::ported::zsh_h::heredocs>>>
84        = const { std::cell::RefCell::new(None) };
85}
86
87// Wordcode-buffer thread-locals — direct port of `Src/parse.c:269-285`
88// file-statics. Per-evaluator (bucket-1 in PORT_PLAN.md): each worker
89// thread parsing a separate program needs its own wordcode buffer.
90//
91// ECBUF: the wordcode array being built. C `Wordcode ecbuf`
92// (parse.c:275).
93// ECLEN: allocated entries in ECBUF (parse.c:269).
94// ECUSED: entries actually used so far (parse.c:271).
95// ECNPATS: count of patterns referenced by ECBUF (parse.c:273).
96// ECSOFFS / ECSSUB: byte offsets into the string region
97// (parse.c:279). ECSSUB subtracts substring overlap.
98// ECNFUNC: count of functions defined so far (parse.c:285).
99// ECSTRS_INDEX: dedup index for long strings — C uses a binary tree
100// of `struct eccstr` (zsh.h:836); the canonical Eccstr port exists
101// at zsh_h::eccstr but stays unused at runtime here. The HashMap
102// preserves the API contract (lookup by (nfunc, str) → offs) with
103// simpler ownership semantics.
104thread_local! {
105    /// `ECBUF` static.
106    pub static ECBUF: std::cell::RefCell<Vec<u32>> = std::cell::RefCell::new(Vec::new());
107    static ECLEN: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
108    static ECUSED: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
109    static ECNPATS: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
110    static ECSOFFS: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
111    static ECSSUB: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
112    static ECNFUNC: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
113    static ECSTRS_INDEX: std::cell::RefCell<std::collections::HashMap<(i32, String), u32>>
114        = std::cell::RefCell::new(std::collections::HashMap::new());
115    /// C zsh's `eccstr` BST (parse.c:447). Port of `Eccstr ecstrs` —
116    /// a hashval-ordered binary search tree of long-strings for
117    /// dedup. Same cmp logic as C: nfunc, then hashval, then strcmp.
118    /// HashMap above is a fast-path lookup; this tree is the
119    /// C-fidelity walker that mirrors C's exact dedup-hit pattern
120    /// (including its quirks for hash-colliding content).
121    static ECSTRS_TREE: std::cell::RefCell<Option<Box<EccstrNode>>>
122        = const { std::cell::RefCell::new(None) };
123    /// Reverse index for `ecgetstr`: offs → owned string. Populated
124    /// at ecstrcode time so the consumer can recover the string from
125    /// the wordcode offs without walking the encode-time HashMap.
126    /// Stores the METAFIED BYTE form of each long-string, exactly
127    /// matching what C's strs region holds. `String` would not work
128    /// here because Rust strings carry UTF-8-encoded chars (e.g.
129    /// the Dash marker `\u{9b}` UTF-8-encodes to two bytes
130    /// `\xc2 \x9b`) while C stores zsh markers as single bytes
131    /// (raw `\x9b`). Storing Vec<u8> lets us write byte-for-byte
132    /// what C writes after metafy.
133    pub static ECSTRS_REVERSE: std::cell::RefCell<std::collections::HashMap<u32, Vec<u8>>>
134        = std::cell::RefCell::new(std::collections::HashMap::new());
135}
136const EC_DOUBLE_THRESHOLD: i32 = 32768;
137const EC_INCREMENT: i32 = 1024;
138
139/// Direct port of `parse_context_save(struct parse_stack *ps, int toplevel)` at `Src/parse.c:295`.
140/// Snapshots the lexer-side file-statics (which currently live on
141/// `lexer` until Phase 7 dissolution makes them file-scope
142/// thread_local!s) plus the pending heredoc list, plus the
143/// wordcode-buffer state (STUB until Phase 9b). Saves Rust-only
144/// recursion counters too so nested parses get fresh limits.
145/// WARNING: param names don't match C — Rust=(ps) vs C=(ps, toplevel)
146pub fn parse_context_save(ps: &mut parse_stack) {
147    // parse.c:299 — `ps->hdocs = hdocs; hdocs = NULL;` — save the
148    // canonical C linked-list and clear it for the nested parse.
149    ps.hdocs = HDOCS.with_borrow_mut(|h| h.take());
150    // zshrs-only: save the parallel AST-glue Vec the same way.
151    // LEX_HEREDOCS carries terminator/strip_tabs/quoted metadata
152    // that has no C analog (C stores it implicitly via tokstr).
153    ps.lex_heredocs = LEX_HEREDOCS.with_borrow_mut(|v| std::mem::take(v));
154    // parse.c:302-310 — save lexer-side state.
155    ps.incmdpos = incmdpos();
156    // parse.c:303 — `ps->aliasspaceflag = aliasspaceflag;`. Mirrors
157    // lex.c LEX_ALIAS_SPACE_FLAG so nested parses preserve the
158    // HISTIGNORESPACE-via-alias state across parser re-entry.
159    ps.aliasspaceflag = crate::ported::lex::LEX_ALIAS_SPACE_FLAG.with(|c| c.get());
160    ps.incond = incond();
161    ps.inredir = inredir();
162    ps.incasepat = incasepat();
163    ps.isnewlin = isnewlin();
164    ps.infor = infor();
165    ps.inrepeat_ = inrepeat();
166    ps.intypeset = intypeset();
167    // parse.c:312-317 — wordcode buffer state. STUB until Phase 9b
168    // (zshrs has no ecbuf yet).
169    ps.eclen = 0;
170    ps.ecused = 0;
171    ps.ecnpats = 0;
172    ps.ecbuf = None;
173    ps.ecstrs = None;
174    ps.ecsoffs = 0;
175    ps.ecssub = 0;
176    ps.ecnfunc = 0;
177    set_incmdpos(true);
178    set_incond(0);
179    set_inredir(false);
180    set_incasepat(0);
181    set_infor(0);
182    set_inrepeat(0);
183    set_intypeset(false);
184}
185
186/// Direct port of `parse_context_restore(const struct parse_stack *ps, int toplevel)` at `Src/parse.c:326`.
187/// Inverse of `parse_context_save`. Restores lexer-side state +
188/// pending heredocs + Rust-only counters from `ps`, then clears
189/// `errflag & ERRFLAG_ERROR` per parse.c:354.
190/// WARNING: param names don't match C — Rust=(ps) vs C=(ps, toplevel)
191pub fn parse_context_restore(ps: &parse_stack) {
192    // parse.c:330-331 — free any in-progress wordcode buffer.
193    // zshrs has no wordcode yet (STUB until Phase 9b); the AST
194    // nodes are owned by their parent so dropping the parser
195    // frees them.
196
197    // parse.c:333-352 — restore saved state.
198    // parse.c:337 — `hdocs = ps->hdocs;`
199    HDOCS.with_borrow_mut(|h| *h = ps.hdocs.clone());
200    // zshrs-only: restore the parallel AST-glue Vec.
201    LEX_HEREDOCS.with_borrow_mut(|v| *v = ps.lex_heredocs.clone());
202    set_incmdpos(ps.incmdpos);
203    // parse.c:334 — `aliasspaceflag = ps->aliasspaceflag;`.
204    crate::ported::lex::LEX_ALIAS_SPACE_FLAG.with(|c| c.set(ps.aliasspaceflag));
205    set_incond(ps.incond);
206    set_inredir(ps.inredir);
207    set_incasepat(ps.incasepat);
208    set_isnewlin(ps.isnewlin);
209    set_infor(ps.infor);
210    set_inrepeat(ps.inrepeat_);
211    set_intypeset(ps.intypeset);
212    // ecbuf/eclen/ecused/ecnpats/ecstrs/ecsoffs/ecssub/ecnfunc
213    // STUB until Phase 9b.
214
215    // parse.c:354 — `errflag &= ~ERRFLAG_ERROR;` — clear the
216    // error flag so the outer parse sees a clean state.
217    errflag.fetch_and(!ERRFLAG_ERROR, Ordering::Relaxed);
218}
219
220/// Direct port of `ecadjusthere(int p, int d)` at `Src/parse.c:360`. Walk
221/// the pending-heredocs list and bump each `pc` by `d` if it's
222/// at or after position `p`. Called by `ecispace` / `ecdel` when
223/// wordcodes shift.
224#[allow(unused_variables)]
225pub fn ecadjusthere(p: usize, d: i32) {
226    // parse.c:362-366 — `for (p2 = hdocs; p2; p2 = p2->next) if
227    // (p2->pc >= p) p2->pc += d;`. zshrs's hdocs are still
228    // Vec<HereDoc> on the lexer (pre-P9c migration); since none
229    // of them carry a wordcode pc today (the AST tree has no pc
230    // slots), this is a no-op until Phase 9c wires
231    // `hdocs.pc` into wordcode emission.
232}
233
234// === AST tree relocated to src/extensions/zsh_ast.rs ===
235//
236// zsh C does NOT have an AST tree — it emits wordcode directly via
237// par_event/par_list/par_sublist/par_pipe/par_cmd/par_simple/etc.
238// (Src/parse.c:485-3000) into a flat `Wordcode ecbuf[]`. The Zsh*/
239// Shell* AST node types lived in this file as a Rust-only IR that
240// stands in for that wordcode.
241//
242// P9e (PORT_PLAN.md): the types moved to src/extensions/zsh_ast.rs
243// to make their Rust-only-extension nature explicit. The full P9c +
244// P9d rewrite (par_* emitting wordcode + vm_helper reading wordcode)
245// retires them entirely — until then, callers reach them via this
246// re-export.
247
248/// Direct port of `ecispace(int p, int n)` at `Src/parse.c:372`. Insert `n`
249/// empty wordcode slots at position `p`, shifting later entries
250/// right, growing the buffer as needed, adjusting heredoc pointers.
251pub fn ecispace(p: usize, n: usize) {
252    // parse.c:376-381 — grow if needed.
253    let need = n as i32;
254    if (ECLEN.get() - ECUSED.get()) < need {
255        let cur = ECLEN.get();
256        let mut a = if cur < EC_DOUBLE_THRESHOLD {
257            cur
258        } else {
259            EC_INCREMENT
260        };
261        if need > a {
262            a = need;
263        }
264        ECBUF.with_borrow_mut(|buf| {
265            buf.resize((cur + a) as usize, 0);
266        });
267        ECLEN.set(cur + a);
268    }
269    // parse.c:382-385 — memmove p → p+n, gap of n.
270    let m = ECUSED.get() as usize - p;
271    if m > 0 {
272        ECBUF.with_borrow_mut(|buf| {
273            let needed = (ECUSED.get() as usize) + n;
274            if buf.len() < needed {
275                buf.resize(needed, 0);
276            }
277            for i in (0..m).rev() {
278                buf[p + n + i] = buf[p + i];
279            }
280            for i in 0..n {
281                buf[p + i] = 0;
282            }
283        });
284    }
285    // parse.c:386 — bump ecused by n.
286    ECUSED.set(ECUSED.get() + need);
287    // parse.c:387 — `ecadjusthere(p, n)`.
288    ecadjusthere(p, need);
289}
290
291/// Direct port of `ecadd(wordcode c)` at `Src/parse.c:397`. Append `c` to
292/// the wordcode buffer with grow-on-demand, return the new index.
293pub fn ecadd(c: u32) -> usize {
294    // parse.c:399-405 — `if ((eclen - ecused) < 1) grow`.
295    if (ECLEN.get() - ECUSED.get()) < 1 {
296        let cur = ECLEN.get();
297        let a = if cur < EC_DOUBLE_THRESHOLD {
298            cur
299        } else {
300            EC_INCREMENT
301        };
302        ECBUF.with_borrow_mut(|buf| {
303            buf.resize((cur + a) as usize, 0);
304        });
305        ECLEN.set(cur + a);
306    }
307    let idx = ECUSED.get();
308    ECBUF.with_borrow_mut(|buf| {
309        if (idx as usize) >= buf.len() {
310            buf.resize((idx + 1) as usize, 0);
311        }
312        buf[idx as usize] = c;
313    });
314    ECUSED.set(idx + 1);
315    idx as usize
316}
317
318/// Direct port of `ecdel(int p)` at `Src/parse.c:413`. Remove the
319/// wordcode at position `p`, shift later entries left by one,
320/// decrement ecused, adjust pending heredoc pointers.
321pub fn ecdel(p: usize) {
322    // parse.c:415-418 — memmove + decrement ecused.
323    let n = ECUSED.get() as usize - p - 1;
324    if n > 0 {
325        ECBUF.with_borrow_mut(|buf| {
326            for i in 0..n {
327                buf[p + i] = buf[p + i + 1];
328            }
329        });
330    }
331    ECUSED.set(ECUSED.get() - 1);
332    // parse.c:420 — `ecadjusthere(p, -1)`.
333    ecadjusthere(p, -1);
334}
335
336/// Direct port of `ecstrcode(char *s)` at `Src/parse.c:426`. Encode a
337/// string into a single wordcode (short strings ≤4 bytes packed
338/// inline; longer strings get an offset into the deduped registry).
339///
340/// The long-string path stores the METAFIED bytes (matches what C's
341/// strs region contains): collapse Rust UTF-8 chars in 0x80..=0xff
342/// to single bytes, then apply zsh metafy (high bytes ≥ 0x83 →
343/// `Meta=0x83 + byte^0x20`). Length tracking (ECSOFFS) uses the
344/// metafied byte count — same as C `strlen(s) + 1` where C's `s`
345/// is already metafied at this point.
346pub fn ecstrcode(s: &str) -> u32 {
347    // Convert Rust char-form → C-byte form. zsh's metafy() at
348    // Src/utils.c only converts bytes flagged IMETA: 0x00, 0x83
349    // (Meta itself), and 0x84..=0xa2 (Pound..Marker, the lex
350    // markers). Other bytes 0x01..=0x82 and 0xa3..=0xff pass
351    // through unchanged. See utils.c:4195-4204 typtab init.
352    //
353    // Rust receives chars. Classify each:
354    //   - codepoint in [0x83..=0xa2] → marker char (emitted by lex
355    //     post-metafy in C); 1 byte unchanged
356    //   - codepoint < 0x80 → ASCII, 1 byte unchanged
357    //   - codepoint in [0x80..=0x82] or [0xa3..=0xff] → single
358    //     non-imeta byte (user-input range); 1 byte unchanged
359    //   - codepoint > 0xff → multi-byte UTF-8 source char (e.g.
360    //     '━' = U+2501 = 0xe2 0x94 0x81). Metafy ONLY the bytes
361    //     that fall in 0x83..=0xa2; pass others through. For '━':
362    //     0xe2 stays, 0x94 → 0x83 0xb4, 0x81 stays.
363    let mut c_bytes: Vec<u8> = Vec::with_capacity(s.len());
364    let imeta = |b: u8| -> bool { b == 0 || (0x83..=0xa2).contains(&b) };
365    for ch in s.chars() {
366        let cu = ch as u32;
367        if cu < 0x80 {
368            // ASCII — single byte unchanged.
369            c_bytes.push(cu as u8);
370        } else if (0x83..=0xa2).contains(&cu) {
371            // Lex marker char (emitted by lex.add(Marker) post-metafy
372            // in C). Stored as single byte.
373            c_bytes.push(cu as u8);
374        } else {
375            // User-input char: encode UTF-8 then metafy imeta bytes.
376            // For chars 0x80..=0xff (like 'º' U+00BA), UTF-8 gives
377            // 2 bytes (e.g. `0xc2 0xba`) — zsh's lex reads these as
378            // raw bytes from input and metafy passes 0xc2 / 0xba
379            // through (both NOT imeta).
380            let mut tmp = [0u8; 4];
381            for &b in ch.encode_utf8(&mut tmp).as_bytes() {
382                if imeta(b) {
383                    c_bytes.push(0x83);
384                    c_bytes.push(b ^ 0x20);
385                } else {
386                    c_bytes.push(b);
387                }
388            }
389        }
390    }
391    // c:`has_token` (Src/utils.c:2282) → `itok(*s)` → `typtab[c] & ITOK`.
392    // ITOK is set for bytes `Pound..=Nularg` (0x84..=0xa1) per
393    // Src/utils.c:4198 (`for (t0=Pound; t0<=LAST_NORMAL_TOK; t0++)
394    // typtab[t0]|=ITOK`) plus :4200 (`for (t0=Snull; t0<=Nularg; t0++)
395    // typtab[t0]|=ITOK|IMETA|INULL`). Pound=0x84 Bang=0x9c (last normal),
396    // Snull=0x9d..Nularg=0xa1. Meta=0x83 has IMETA but NOT ITOK.
397    let t = c_bytes.iter().any(|&b| (0x84..=0xa1).contains(&b));
398    let l = c_bytes.len() + 1; // include NUL terminator
399    if l <= 4 {
400        // parse.c:436-445 — short-string inline pack. Uses raw C-bytes
401        // (NOT metafied — the inline packing stores 1 byte per slot).
402        let mut c: u32 = if t { 3 } else { 2 };
403        match l {
404            4 => {
405                c |= (c_bytes[2] as u32) << 19;
406                c |= (c_bytes[1] as u32) << 11;
407                c |= (c_bytes[0] as u32) << 3;
408            }
409            3 => {
410                c |= (c_bytes[1] as u32) << 11;
411                c |= (c_bytes[0] as u32) << 3;
412            }
413            2 => {
414                c |= (c_bytes[0] as u32) << 3;
415            }
416            1 => {
417                // parse.c:443 — empty string special case.
418                c = if t { 7 } else { 6 };
419            }
420            _ => {}
421        }
422        c
423    } else {
424        // parse.c:447-466 — long string. Port of C's eccstr BST walk
425        // exactly: walk the tree comparing nfunc, then hashval, then
426        // strcmp on bytes. Return offs on full match; insert new
427        // leaf otherwise. Matches C's exact dedup-hit pattern
428        // (which is content-dependent — hash collisions and the
429        // lazy short-circuit cmp chain make the tree shape determine
430        // whether matching nodes are reachable).
431        // hasher is byte-by-byte polynomial (hashtable.c:86); pass
432        // c_bytes via from_utf8_unchecked so non-UTF-8 zsh marker
433        // bytes feed straight in. SAFETY: hasher only iterates
434        // `.bytes()` — no UTF-8 validity assumed.
435        let val =
436            crate::ported::hashtable::hasher(unsafe { std::str::from_utf8_unchecked(&c_bytes) });
437        let nfunc = ECNFUNC.get();
438        let found_offs = ECSTRS_TREE.with_borrow_mut(|root| {
439            // Walk the tree. At each node, if all 3 cmps == 0,
440            // return the node's offs. Otherwise descend left/right
441            // by the first non-zero cmp's sign.
442            let mut cur: &mut Option<Box<EccstrNode>> = root;
443            loop {
444                let p = match cur.as_mut() {
445                    Some(p) => p,
446                    None => break None,
447                };
448                // c:448 — `cmp = p->nfunc - ecnfunc`
449                let mut cmp = (p.nfunc as i64) - (nfunc as i64);
450                if cmp == 0 {
451                    // c:448 — `&& !(cmp = (long)p->hashval - (long)val)`
452                    // C does `(int)(p->hashval - val)` — unsigned 32-bit
453                    // subtraction wraps, then cast to int. Use
454                    // wrapping_sub + as i32 to match the bit pattern.
455                    cmp = (p.hashval.wrapping_sub(val) as i32) as i64;
456                    if cmp == 0 {
457                        // c:448 — `&& !(cmp = strcmp(p->str, s))`
458                        cmp = match p.str.as_slice().cmp(c_bytes.as_slice()) {
459                            std::cmp::Ordering::Less => -1,
460                            std::cmp::Ordering::Equal => 0,
461                            std::cmp::Ordering::Greater => 1,
462                        };
463                        if cmp == 0 {
464                            // c:450 — `return p->offs;`
465                            break Some(p.offs);
466                        }
467                    }
468                }
469                // c:452 — `pp = (cmp < 0 ? &p->left : &p->right);`
470                cur = if cmp < 0 { &mut p.left } else { &mut p.right };
471            }
472        });
473        if let Some(offs) = found_offs {
474            return offs;
475        }
476        // c:462 — `p->offs = ((ecsoffs - ecssub) << 2) | (t ? 1 : 0);`
477        let offs = (((ECSOFFS.get() - ECSSUB.get()) as u32) << 2) | if t { 1 } else { 0 };
478        // c:463 — `p->aoffs = ecsoffs;` (absolute write position).
479        let aoffs = ECSOFFS.get() as u32;
480        // c:457-465 — insert new node at the NULL slot the walk
481        // terminated at. Encode the walk path as a Vec<bool> of
482        // left/right turns (true = right), then re-descend to
483        // insert. Borrow-checker friendly: a single mutable walk
484        // that either finds an existing node (descend) or fills
485        // the empty slot (return).
486        let stored = c_bytes.clone();
487        let stored_len = stored.len();
488        let new_node = Box::new(EccstrNode {
489            left: None,
490            right: None,
491            str: stored.clone(),
492            offs,
493            aoffs,
494            nfunc,
495            hashval: val,
496        });
497        ECSTRS_TREE.with_borrow_mut(|root| {
498            // Build the path first (immutable-walk; safe because we
499            // only ever go further down).
500            let mut path: Vec<bool> = Vec::new();
501            {
502                let mut cur: &Option<Box<EccstrNode>> = root;
503                while let Some(p) = cur.as_ref() {
504                    let mut cmp = (p.nfunc as i64) - (nfunc as i64);
505                    if cmp == 0 {
506                        // C does `(int)(p->hashval - val)` — unsigned 32-bit
507                        // subtraction wraps, then cast to int. Use
508                        // wrapping_sub + as i32 to match the bit pattern.
509                        cmp = (p.hashval.wrapping_sub(val) as i32) as i64;
510                        if cmp == 0 {
511                            cmp = match p.str.as_slice().cmp(c_bytes.as_slice()) {
512                                std::cmp::Ordering::Less => -1,
513                                std::cmp::Ordering::Equal => 0,
514                                std::cmp::Ordering::Greater => 1,
515                            };
516                        }
517                    }
518                    let go_right = cmp >= 0;
519                    path.push(go_right);
520                    cur = if go_right { &p.right } else { &p.left };
521                }
522            }
523            // Descend mutably along the recorded path and assign at
524            // the NULL leaf.
525            let mut cur: &mut Option<Box<EccstrNode>> = root;
526            for turn in path {
527                let p = cur.as_mut().expect("path matches walk");
528                cur = if turn { &mut p.right } else { &mut p.left };
529            }
530            *cur = Some(new_node);
531        });
532        // Also keep the existing reverse index (offs → bytes) for
533        // ecgetstr_wordcode and copy_ecstr — they read flat by offs.
534        ECSTRS_REVERSE.with_borrow_mut(|m| {
535            m.insert(offs, stored);
536        });
537        let _ = l;
538        ECSOFFS.set(ECSOFFS.get() + (stored_len + 1) as i32);
539        offs
540    }
541}
542
543/// Initialize parser status. Direct port of zsh/Src/parse.c:491
544/// `init_parse_status`. Clears the per-parse-call lexer flags
545/// so a fresh parse starts from cmd-position with no nesting
546/// state inherited from a prior parse.
547///
548/// Previously the Rust port omitted `inrepeat_ = 0` at c:501.
549/// `inrepeat_` is the `repeat N <body>` parse-state counter that
550/// the lexer toggles in 3 phases (1 → 2 → 3 → 0). Without the
551/// reset, a fresh parse called after an in-flight `repeat`
552/// command would inherit the stale counter and silently misread
553/// the next token as a body of an already-completed repeat.
554pub fn init_parse_status() {
555    // c:491
556    // parse.c:500-502 — `incasepat = incond = inredir = infor =
557    // intypeset = 0; inrepeat_ = 0; incmdpos = 1;`
558    set_incasepat(0); // c:500
559    set_incond(0); // c:500
560    set_inredir(false); // c:500
561    set_infor(0); // c:500
562    set_intypeset(false); // c:500
563    set_inrepeat(0); // c:501 inrepeat_ = 0
564    set_incmdpos(true); // c:502
565}
566
567/// Initialize parser for a fresh parse. Direct port of
568/// zsh/Src/parse.c:509 `init_parse`. C source allocates a
569/// fresh wordcode buffer (ecbuf) sized EC_INIT_SIZE, resets the
570/// per-parse-call counters, and calls init_parse_status. zshrs
571/// has no flat wordcode buffer (AST is built inline) so this
572/// function reduces to init_parse_status + recursion_depth/
573/// global_iterations clear.
574pub fn init_parse() {
575    // parse.c:513-520 — `ecbuf = (Wordcode) zalloc(EC_INIT_SIZE *
576    // sizeof(wordcode)); eclen = EC_INIT_SIZE; ecused = 0;
577    // ecnpats = 0; ecstrs = NULL; ecsoffs = ecnfunc = 0;
578    // ecssub = 0;`. P9b — initialize the per-evaluator wordcode
579    // buffer for this parse call. zshrs uses thread-local
580    // statics declared at file scope (parse.rs:25-50).
581    ECBUF.with_borrow_mut(|buf| {
582        buf.clear();
583        buf.resize(EC_INIT_SIZE as usize, 0);
584    });
585    ECLEN.set(EC_INIT_SIZE);
586    ECUSED.set(0);
587    ECNPATS.set(0);
588    ECSOFFS.set(0);
589    ECSSUB.set(0);
590    ECNFUNC.set(0);
591    ECSTRS_INDEX.with_borrow_mut(|m| m.clear());
592    ECSTRS_REVERSE.with_borrow_mut(|m| m.clear());
593    ECSTRS_TREE.with_borrow_mut(|t| *t = None);
594
595    // parse.c:522 — `init_parse_status();`
596    init_parse_status();
597}
598
599/// Port of `copy_ecstr(Eccstr s, char *p)` from `Src/parse.c:537`.
600/// Walks the BST and writes each entry to `p[s->aoffs..]` matching
601/// C's recursive in-order traversal exactly. The old impl used the
602/// `ECSTRS_REVERSE` HashMap keyed by `offs` (= ecssub-relative
603/// wordcode-encoded offset), which collides across funcdef scopes:
604/// a string at relative offs=0 inside funcdef A and another at
605/// relative offs=0 inside funcdef B share the same key, so one
606/// overwrites the other.
607pub fn copy_ecstr(_table: &std::collections::HashMap<u32, Vec<u8>>, p: &mut [u8]) {
608    // c:537-544 — walk eccstr BST recursively, writing each node's
609    // str at p[node->aoffs..node->aoffs + strlen + 1] (NUL-terminated).
610    ECSTRS_TREE.with_borrow(|root| {
611        copy_ecstr_walk(root, p);
612    });
613}
614
615/// Port of `bld_eprog(int heap)` from `Src/parse.c:547`. Finalizes
616/// the in-build `ECBUF`/`ECSTRS`/`ECNPATS` state into an `Eprog`.
617/// Resets the build state so a new parse can start.
618pub fn bld_eprog(heap: bool) -> eprog {
619    // c:547
620
621    // c:555 — emit WC_END opcode. `WCB_END` is `WC_END_DEFAULT` (0).
622    ecadd(0);
623
624    let ecused = ECUSED.with(|c| c.get()) as usize;
625    let ecnpats = ECNPATS.with(|c| c.get()) as usize;
626    let ecsoffs = ECSOFFS.with(|c| c.get()) as usize;
627
628    // c:557-559 — `ret->len = ((ecnpats * sizeof(Patprog)) +
629    //                            (ecused * sizeof(wordcode)) +
630    //                            ecsoffs);`
631    // sizeof(Patprog) = sizeof(struct patprog *) = pointer size.
632    // On 64-bit targets that's 8, on 32-bit that's 4. C's eprog
633    // ->len is the canonical value for parity tests, so we use
634    // the same arithmetic.
635    let prog_bytes = ecused * 4; // sizeof(wordcode) = 4
636    let len = (ecnpats * size_of::<*const u8>()) + prog_bytes + ecsoffs;
637
638    // Snapshot the wordcode buffer + string table.
639    let prog_words: Vec<u32> = ECBUF.with(|c| c.borrow()[..ecused].to_vec());
640    let mut strs_bytes = vec![0u8; ecsoffs];
641    ECSTRS_REVERSE.with(|c| copy_ecstr(&c.borrow(), &mut strs_bytes));
642
643    // c:566 — store strs as raw bytes via from_utf8_unchecked so
644    // single-byte zsh markers (e.g. Dash 0x9b) survive intact.
645    // `String::from_utf8_lossy` would replace them with U+FFFD
646    // (`\xef\xbf\xbd`), breaking byte-for-byte parity with C's
647    // strs region. SAFETY: downstream consumers of `eprog.strs`
648    // index by byte offset (per the wordcode `(offs >> 2)` offset
649    // encoding) and call `.as_bytes()` — they never iterate as
650    // chars or rely on UTF-8 validity, so storing non-UTF-8 bytes
651    // in a String is safe in practice. C zsh's strs is `char *`
652    // with the same byte-not-char semantics.
653    let strs_string = unsafe { String::from_utf8_unchecked(strs_bytes) };
654    let ret = eprog {
655        flags: if heap { EF_HEAP } else { EF_REAL }, // c:570
656        len: len as i32,                             // c:559
657        npats: ecnpats as i32,                       // c:561
658        nref: if heap { -1 } else { 1 },             // c:562
659        pats: Vec::new(),                            // c:563 dummy_patprog
660        prog: prog_words,                            // c:565
661        strs: Some(strs_string),
662        shf: None,
663        dump: None,
664    };
665
666    // c:577 — free ecbuf so next parse starts fresh.
667    ECBUF.with(|c| c.borrow_mut().clear());
668    ECLEN.with(|c| c.set(0));
669    ECUSED.with(|c| c.set(0));
670    ECNPATS.with(|c| c.set(0));
671    ECSOFFS.with(|c| c.set(0));
672    ECSTRS_INDEX.with(|c| c.borrow_mut().clear());
673    ECSTRS_REVERSE.with(|c| c.borrow_mut().clear());
674    ECSTRS_TREE.with(|t| *t.borrow_mut() = None);
675
676    ret
677}
678
679/// Port of `int empty_eprog(Eprog p)` from `Src/parse.c:584`. C
680/// body: `return (!p || !p->prog || *p->prog == WCB_END());` —
681/// the eprog is empty when its prog buffer is missing or the
682/// first wordcode is the WC_END marker. Used by signal handlers
683/// (`Src/signals.c:712`) to short-circuit a trap that resolves to
684/// an empty program.
685pub fn empty_eprog(p: &eprog) -> bool {
686    p.prog.is_empty() || p.prog[0] == WCB_END()
687}
688
689/// Clear pending here-document list. Direct port of
690/// `clear_hdocs(void)` from `Src/parse.c:591`. The C version walks
691/// `hdocs` and frees each node; Rust drops the `Box<heredocs>`
692/// chain automatically when the head is replaced with None.
693pub fn clear_hdocs() {
694    // c:591
695    // c:593-598 — for (p = hdocs; p; p = n) { n = p->next; zfree(p); }
696    // c:599 — hdocs = NULL;
697    HDOCS.with_borrow_mut(|h| *h = None);
698    // zshrs-only: also drop the parallel AST-glue Vec. No C
699    // analog — LEX_HEREDOCS is Rust-only working-set state.
700    LEX_HEREDOCS.with_borrow_mut(|v| v.clear());
701}
702
703/// Top-level parse-event entry. Direct port of zsh/Src/parse.c:
704/// 612-631 `parse_event`. Reads one event from the lexer (a
705/// sublist optionally followed by SEPER/AMPER/AMPERBANG) and
706/// returns the resulting ZshProgram.
707///
708/// `endtok` is the token that terminates the event — usually
709/// ENDINPUT, but for command-style substitutions the closing
710/// `)` (zsh's CMD_SUBST_CLOSE).
711///
712/// zshrs port note: zsh's parse_event returns an `Eprog` (heap-
713/// allocated wordcode program). zshrs returns a `ZshProgram`
714/// (AST root). Same role at the parse-output boundary.
715pub fn parse_event(endtok: lextok) -> Option<ZshProgram> {
716    // parse.c:616-619 — reset state and prime the lexer.
717    set_tok(ENDINPUT);
718    set_incmdpos(true);
719    // parse.c:618 — `aliasspaceflag = 0;`. Fresh event: discard any
720    // alias-space carry-over from a prior parse so HISTIGNORESPACE
721    // doesn't suppress the next entered command line.
722    crate::ported::lex::LEX_ALIAS_SPACE_FLAG.with(|c| c.set(0));
723    zshlex();
724    // parse.c:620 — `init_parse();`
725    init_parse();
726
727    // parse.c:622-625 — drive par_event; on failure clear hdocs.
728    if !par_event(endtok) {
729        clear_hdocs();
730        return None;
731    }
732    // parse.c:626-628 — if endtok != ENDINPUT, this is a sub-
733    // parse for a substitution that doesn't need its own eprog.
734    // zshrs returns an empty program in that case (caller
735    // discards).
736    if endtok != ENDINPUT {
737        return Some(ZshProgram { lists: Vec::new() });
738    }
739    // parse.c:630 — `bld_eprog(1);` — build the final eprog.
740    // zshrs has already built the AST via parse_program_until,
741    // but parse_event uses par_event directly so we need to
742    // collect what par_event accumulated.
743    Some(parse_program_until(None))
744}
745
746/// Parse one event (sublist with optional separator). Direct
747/// port of zsh/Src/parse.c:635 `par_event`. Returns true if
748/// an event was successfully parsed, false on EOF / endtok.
749///
750/// zshrs port note: the C version emits wordcodes via ecadd/
751/// set_list_code; zshrs's parser builds AST nodes via
752/// par_sublist + par_list. Same flow, different output.
753pub fn par_event(endtok: lextok) -> bool {
754    // parse.c:639-643 — skip leading SEPERs.
755    while tok() == SEPER {
756        // parse.c:640-641 — at top-level (endtok == ENDINPUT),
757        // a SEPER on a fresh line ends the event.
758        if isnewlin() > 0 && endtok == ENDINPUT {
759            return false;
760        }
761        zshlex();
762    }
763    // parse.c:644-647 — terminate on EOF or matching close-token.
764    if tok() == ENDINPUT {
765        return false;
766    }
767    if tok() == endtok {
768        return true;
769    }
770    // parse.c:649-... — drive par_sublist + handle terminator.
771    // zshrs's par_sublist already builds the AST node directly.
772    match par_sublist() {
773        Some(_) => {
774            // parse.c:651-693 — terminator handling. zshrs's
775            // par_list wraps this; for parse_event we just
776            // confirm the sublist parsed.
777            true
778        }
779        None => false,
780    }
781}
782
783/// Port of `parse_list(void)` from `Src/parse.c:697`. C-shape entry
784/// point: drives `par_list` and finalizes via `bld_eprog`. Returns
785/// `None` on syntax error.
786pub fn parse_list() -> Option<eprog> {
787    // c:697
788    set_tok(ENDINPUT);
789    init_parse();
790    zshlex();
791    // c:Src/parse.c:705 — `par_list(&c);` emits wordcode for the
792    // full multi-statement list (its goto-rec loop walks all
793    // SEPER-separated sublists). The Rust AST par_list() emits
794    // NOTHING to the wordcode buffer (only builds the AST), so
795    // bld_eprog returned an empty program AND tok stayed at
796    // SEPER, tripping the syntax-error check below for any
797    // \`cmd; cmd\` body.
798    //
799    // Route through par_event_wordcode (the wordcode emitter,
800    // lines 4395+) which mirrors C's par_list loop semantics
801    // and populates the wordcode buffer that bld_eprog reads.
802    let _start = par_event_wordcode();
803    if tok() != ENDINPUT {
804        clear_hdocs();
805        set_tok(LEXERR);
806        yyerror("syntax error");
807        return None;
808    }
809    Some(bld_eprog(false))
810}
811
812/// Port of `parse_cond(void)` from `Src/parse.c:722`. Only used by
813/// `bin_test`/`bin_bracket` for `/bin/test`/`[` compat — the
814/// `condlex` global must already point at `testlex` before entry.
815pub fn parse_cond() -> Option<eprog> {
816    // c:722
817    init_parse();
818    if par_cond().is_none() {
819        clear_hdocs();
820        return None;
821    }
822    Some(bld_eprog(true))
823}
824
825// ============================================================
826// Wordcode emission helpers (parse.c private helpers)
827//
828// Direct ports of zsh's wordcode-emission helpers in parse.c.
829// These write u32 opcodes into a flat `ecbuf` array thread-local
830// via ecadd / ecdel / ecispace / ecstrcode and friends. The
831// par_*_wordcode family at parse.rs:1700-3500 walks the lex
832// stream and emits a real wordcode buffer here.
833//
834// (The AST tree built by par_program / par_simple / etc. is a
835// separate path used by fusevm; see compile_zsh.rs for the AST
836// → fusevm-bytecode compiler.)
837// ============================================================
838
839/// Patch a list-placeholder wordcode with its actual opcode +
840/// jump distance. Direct port of zsh/Src/parse.c:738
841/// `set_list_code`. zsh emits an `ecadd(0)` placeholder before
842/// par_sublist runs, then comes back through set_list_code to
843/// rewrite the slot with WCB_LIST(type, distance) once the
844/// sublist's final length is known.
845///
846/// Port of `set_list_code(int p, int type, int cmplx)` from
847/// `Src/parse.c:738`. Patches the WCB_LIST header at `p` based on
848/// whether the sublist body is simple (single command, no
849/// pipeline) and Z_SYNC/Z_END — emits the Z_SIMPLE-optimized
850/// header when possible, otherwise the plain WCB_LIST(type, 0).
851pub fn set_list_code(p: usize, type_code: i32, cmplx: bool) {
852    let _ = wc_bdata;
853    // c:740 — `if (!cmplx && (type == Z_SYNC || type == (Z_SYNC | Z_END))
854    // && WC_SUBLIST_TYPE(ecbuf[p+1]) == WC_SUBLIST_END)`
855    let sublist_code = ECBUF.with_borrow(|b| b.get(p + 1).copied().unwrap_or(0));
856    let z = type_code;
857    let qualifies = !cmplx
858        && (z == Z_SYNC || z == (Z_SYNC | Z_END))
859        && WC_SUBLIST_TYPE(sublist_code) == WC_SUBLIST_END;
860    if qualifies {
861        // c:742 — `int ispipe = !(WC_SUBLIST_FLAGS(ecbuf[p+1])
862        // & WC_SUBLIST_SIMPLE);`
863        let ispipe = (WC_SUBLIST_FLAGS(sublist_code) & WC_SUBLIST_SIMPLE) == 0;
864        // c:743 — `ecbuf[p] = WCB_LIST((type|Z_SIMPLE), ecused-2-p);`
865        let used = ECUSED.get() as usize;
866        let off = used.saturating_sub(2 + p);
867        ECBUF.with_borrow_mut(|b| {
868            if p < b.len() {
869                b[p] = WCB_LIST((z | Z_SIMPLE) as wordcode, off as wordcode);
870            }
871        });
872        // c:744 — `ecdel(p+1);`
873        ecdel(p + 1);
874        // c:745-746 — `if (ispipe) ecbuf[p+1] = WC_PIPE_LINENO(ecbuf[p+1]);`
875        if ispipe {
876            ECBUF.with_borrow_mut(|b| {
877                if p + 1 < b.len() {
878                    b[p + 1] = WC_PIPE_LINENO(b[p + 1]);
879                }
880            });
881        }
882    } else {
883        // c:748 — `ecbuf[p] = WCB_LIST(type, 0);`
884        ECBUF.with_borrow_mut(|b| {
885            if p < b.len() {
886                b[p] = WCB_LIST(z as wordcode, 0);
887            }
888        });
889    }
890}
891
892/// Port of `set_sublist_code(int p, int type, int flags, int skip, int cmplx)`
893/// from `Src/parse.c:755`. Patches the WCB_SUBLIST header at `p`.
894/// When the sublist is non-complex (single command, no pipeline),
895/// sets WC_SUBLIST_SIMPLE and rewrites the following slot to
896/// `WC_PIPE_LINENO`.
897pub fn set_sublist_code(p: usize, type_code: i32, flags: i32, skip: i32, cmplx: bool) {
898    if cmplx {
899        // c:758 — `ecbuf[p] = WCB_SUBLIST(type, flags, skip);`
900        ECBUF.with_borrow_mut(|b| {
901            if p < b.len() {
902                b[p] = WCB_SUBLIST(type_code as wordcode, flags as wordcode, skip as wordcode);
903            }
904        });
905    } else {
906        // c:760 — `ecbuf[p] = WCB_SUBLIST(type, flags|WC_SUBLIST_SIMPLE, skip);`
907        ECBUF.with_borrow_mut(|b| {
908            if p < b.len() {
909                b[p] = WCB_SUBLIST(
910                    type_code as wordcode,
911                    (flags as wordcode) | WC_SUBLIST_SIMPLE,
912                    skip as wordcode,
913                );
914            }
915        });
916        // c:761 — `ecbuf[p+1] = WC_PIPE_LINENO(ecbuf[p+1]);`
917        ECBUF.with_borrow_mut(|b| {
918            if p + 1 < b.len() {
919                b[p + 1] = WC_PIPE_LINENO(b[p + 1]);
920            }
921        });
922    }
923}
924
925/// Parse a list (sublist with optional & or ;).
926///
927/// Direct port of zsh/Src/parse.c:771-804 `par_list` (and the
928/// par_list1 wrapper at parse.c:807-817).
929///
930/// **Structural divergence**: zsh's parse.c emits flat wordcode
931/// into the `ecbuf` u32 array via `ecadd(0)` (placeholder),
932/// `set_list_code(p, code, complexity)`, `wc_bdata(Z_END)`. zshrs
933/// builds an AST node `ZshList { sublist, flags }` instead. The
934/// async/sync/disown discrimination at parse.c:785-790 maps to
935/// zshrs's `ListFlags { async_, disown }` field — Z_SYNC is the
936/// default (no flags), Z_ASYNC = `&` = `async_=true`, Z_DISOWN +
937/// Z_ASYNC = `&!`/`&|` = both true. Same semantics, different
938/// representation. This divergence is repository-wide: every
939/// `par_*` function emits wordcode in C, every `parse_*` builds
940/// AST in Rust. The compile_zsh module then traverses the AST to
941/// emit fusevm bytecode, which serves the same role as zsh's
942/// wordcode but with a different opcode set and execution model.
943fn par_list() -> Option<ZshList> {
944    let sublist = par_sublist()?;
945
946    let flags = match tok() {
947        AMPER => {
948            zshlex();
949            ListFlags {
950                async_: true,
951                disown: false,
952            }
953        }
954        AMPERBANG => {
955            zshlex();
956            ListFlags {
957                async_: true,
958                disown: true,
959            }
960        }
961        SEPER | SEMI | NEWLIN => {
962            zshlex();
963            ListFlags::default()
964        }
965        _ => ListFlags::default(),
966    };
967
968    Some(ZshList { sublist, flags })
969}
970
971/// Parse one list — non-recursing variant. Direct port of
972/// zsh/Src/parse.c:808 `par_list1`. Like par_list but
973/// doesn't recurse on the trailing-separator path; used by
974/// callers that only want one statement (e.g. each arm of a
975/// case body).
976pub fn par_list1() -> Option<ZshSublist> {
977    // parse.c:810-816 — body is a single par_sublist call wrapped
978    // in the eu/ecused tracking that zshrs doesn't need (no
979    // wordcode buffer).
980    par_sublist()
981}
982
983/// Parse a sublist (pipelines connected by && or ||).
984///
985/// Direct port of zsh/Src/parse.c:825 `par_sublist` and
986/// par_sublist2 at parse.c:869-892. par_sublist handles the
987/// && / || conjunction and emits WC_SUBLIST opcodes; par_sublist2
988/// handles the leading `!` negation and `coproc` keyword.
989///
990/// AST mapping: ZshSublist { pipe, conj_chain }, where `conj_chain`
991/// is a Vec<(ConjOp, ZshSublist)> for chained && / ||. C uses
992/// flat wordcode with WC_SUBLIST_AND / WC_SUBLIST_OR markers.
993fn par_sublist() -> Option<ZshSublist> {
994    let mut flags = SublistFlags::default();
995
996    // Handle coproc and !
997    if tok() == COPROC {
998        flags.coproc = true;
999        zshlex();
1000    } else if tok() == BANG_TOK {
1001        flags.not = true;
1002        zshlex();
1003    }
1004
1005    let pipe = par_pline()?;
1006
1007    // Check for && or ||
1008    let next = match tok() {
1009        DAMPER => {
1010            zshlex();
1011            skip_separators();
1012            // c:Src/parse.c:par_sublist — and-or operators (`&&`,
1013            // `||`) require a sublist on each side. After consuming
1014            // `&&`/`||`, another and-or operator OR a pipe-operator
1015            // immediately after is a parse error in C zsh. zshrs's
1016            // recursion silently returned None and dropped the
1017            // operator. Bug #171 in docs/BUGS.md.
1018            if matches!(tok(), DAMPER | DBAR | BAR_TOK | BARAMP) {
1019                let name = match tok() {
1020                    DAMPER => "&&",
1021                    DBAR => "||",
1022                    BAR_TOK => "|",
1023                    BARAMP => "|&",
1024                    _ => "operator",
1025                };
1026                zerr(&format!("parse error near `{}'", name));
1027                return None;
1028            }
1029            par_sublist().map(|s| (SublistOp::And, Box::new(s)))
1030        }
1031        DBAR => {
1032            zshlex();
1033            skip_separators();
1034            if matches!(tok(), DAMPER | DBAR | BAR_TOK | BARAMP) {
1035                let name = match tok() {
1036                    DAMPER => "&&",
1037                    DBAR => "||",
1038                    BAR_TOK => "|",
1039                    BARAMP => "|&",
1040                    _ => "operator",
1041                };
1042                zerr(&format!("parse error near `{}'", name));
1043                return None;
1044            }
1045            par_sublist().map(|s| (SublistOp::Or, Box::new(s)))
1046        }
1047        _ => None,
1048    };
1049
1050    Some(ZshSublist { pipe, next, flags })
1051}
1052
1053/// Port of `par_sublist2(int *cmplx)` from `Src/parse.c:869`.
1054/// Secondary-sublist arm: handles the `COPROC`/`Bang` prefix
1055/// in front of a pline. Returns the WC_SUBLIST flag word added.
1056pub fn par_sublist2(cmplx: &mut i32) -> Option<i32> {
1057    // c:870 — `int f = 0;`
1058    let mut f: i32 = 0;
1059    // c:873-880 — COPROC / BANG prefix flags.
1060    if tok() == COPROC {
1061        *cmplx = 1;
1062        f |= WC_SUBLIST_COPROC as i32;
1063        zshlex();
1064    } else if tok() == BANG_TOK {
1065        *cmplx = 1;
1066        f |= WC_SUBLIST_NOT as i32;
1067        zshlex();
1068    }
1069    // c:882-883 — `if (!par_pline(cmplx) && !f) return -1;`
1070    if !par_pipe_wordcode(cmplx) && f == 0 {
1071        return None;
1072    }
1073    // c:885 — `return f;`
1074    Some(f)
1075}
1076
1077/// Parse a pipeline
1078/// Parse a pipeline (cmds joined by `|` / `|&`). Direct port of
1079/// zsh/Src/parse.c:894 `par_pline`. AST: ZshPipe { cmds: Vec<ZshCommand> }.
1080/// C emits WC_PIPE wordcodes per command; same flow.
1081fn par_pline() -> Option<ZshPipe> {
1082    let lineno = toklineno();
1083    let cmd = par_cmd()?;
1084
1085    // Check for | or |&
1086    let mut merge_stderr = false;
1087    let next = match tok() {
1088        BAR_TOK | BARAMP => {
1089            merge_stderr = tok() == BARAMP;
1090            zshlex();
1091            skip_separators();
1092            // c:Src/parse.c:par_pline — pipe-operators require a
1093            // command on each side. After consuming `|`/`|&`,
1094            // C zsh's recursive par_pline call returns -1 (parse
1095            // error) when the next token is another pipe-operator
1096            // — `a | | b` errors with `parse error near `|''`.
1097            // zshrs's `par_pline()?` silently returned None on
1098            // missing command, dropping the rest of the input
1099            // without diagnosing the empty-pipe-operand. Bug #171
1100            // in docs/BUGS.md.
1101            if matches!(tok(), BAR_TOK | BARAMP) {
1102                let name = if tok() == BARAMP { "|&" } else { "|" };
1103                zerr(&format!("parse error near `{}'", name));
1104                return None;
1105            }
1106            par_pline().map(Box::new)
1107        }
1108        _ => None,
1109    };
1110
1111    Some(ZshPipe {
1112        cmd,
1113        next,
1114        lineno,
1115        merge_stderr,
1116    })
1117}
1118
1119/// Parse a command
1120/// Parse a command — dispatches by leading token (FOR / CASE /
1121/// IF / WHILE / UNTIL / REPEAT / FUNC / DINBRACK / DINPAR /
1122/// Inpar subshell / Inbrace current-shell / TIME / NOCORRECT,
1123/// else simple). Direct port of zsh/Src/parse.c:958 `par_cmd`.
1124fn par_cmd() -> Option<ZshCommand> {
1125    // Parse leading redirections
1126    let mut redirs = Vec::new();
1127    while IS_REDIROP(tok()) {
1128        if let Some(redir) = par_redir() {
1129            redirs.push(redir);
1130        }
1131    }
1132
1133    let cmd = match tok() {
1134        FOR | FOREACH => par_for(),
1135        SELECT => parse_select(),
1136        CASE => par_case(),
1137        IF => par_if(),
1138        WHILE => par_while(false),
1139        UNTIL => par_while(true),
1140        REPEAT => par_repeat(),
1141        INPAR_TOK => par_subsh(),
1142        INOUTPAR => parse_anon_funcdef(),
1143        INBRACE_TOK => parse_cursh(),
1144        FUNC => par_funcdef(),
1145        DINBRACK => par_cond(),
1146        DINPAR => parse_arith(),
1147        TIME => par_time(),
1148        _ => par_simple(redirs),
1149    };
1150
1151    // Parse trailing redirections. For Simple commands the redirs were
1152    // already captured inside par_simple; for compound forms (Cursh,
1153    // Subsh, If, While, etc.) we collect them here and wrap in
1154    // ZshCommand::Redirected so compile_zsh can scope-bracket them.
1155    if let Some(inner) = cmd {
1156        let mut trailing: Vec<ZshRedir> = Vec::new();
1157        while IS_REDIROP(tok()) {
1158            if let Some(redir) = par_redir() {
1159                trailing.push(redir);
1160            }
1161        }
1162        // c:Src/parse.c:par_cmd — compound forms (Cursh `{...}`, Subsh
1163        // `(...)`, If/While/Until/For/Case/Select/Repeat/Funcdef) must
1164        // be followed by a valid sublist/list separator (`;`, `\n`,
1165        // `&`, `|`, `&&`, `||`, redirect-op) — STRING_LEX after a
1166        // compound is a parse error. zshrs's outer par_list loop
1167        // silently treated trailing words as a new command, masking
1168        // syntax errors like `{ echo a; } b c`. Mirror C's strict
1169        // post-compound terminator check. Bug #146 in docs/BUGS.md.
1170        if !matches!(inner, ZshCommand::Simple(_)) && tok() == STRING_LEX {
1171            let bad = tokstr().unwrap_or_default();
1172            zerr(&format!("parse error near `{}'", bad));
1173            // Reset state before returning so the outer loop's None
1174            // detection unwinds cleanly.
1175            set_incmdpos(true);
1176            set_incasepat(0);
1177            set_incond(0);
1178            set_intypeset(false);
1179            return None;
1180        }
1181        // c:1072-1075 — every par_cmd tail resets the lexer state
1182        // toggles so the NEXT command starts in cmd position with
1183        // case/cond/typeset off. par_simple/par_cond set `incmdpos=0`
1184        // during their bodies; without this reset the next iteration
1185        // of the outer par_list loop sees `if` / `done` / `select`
1186        // etc. as plain strings and the AST collapses.
1187        set_incmdpos(true);
1188        set_incasepat(0);
1189        set_incond(0);
1190        set_intypeset(false);
1191        if trailing.is_empty() {
1192            return Some(inner);
1193        }
1194        // Simple already absorbed its own redirs (compile path expects
1195        // them on ZshSimple), so don't double-wrap.
1196        if matches!(inner, ZshCommand::Simple(_)) {
1197            if let ZshCommand::Simple(mut s) = inner {
1198                s.redirs.extend(trailing);
1199                return Some(ZshCommand::Simple(s));
1200            }
1201            unreachable!()
1202        }
1203        return Some(ZshCommand::Redirected(Box::new(inner), trailing));
1204    }
1205    // Same reset on the empty-cmd branch (mirror c:1072 unconditional
1206    // path — the C function only returns 0 above when the dispatch
1207    // produced no command, and falls through to the reset block).
1208    set_incmdpos(true);
1209    set_incasepat(0);
1210    set_incond(0);
1211    set_intypeset(false);
1212
1213    None
1214}
1215
1216/// Parse for/foreach loop
1217/// Parse `for NAME in WORDS; do BODY; done` (foreach style) AND
1218/// `for ((init; cond; incr)) do BODY done` (c-style). Direct port
1219/// of zsh/Src/parse.c:1087 `par_for`. parse_for_cstyle is the
1220/// inner branch for the `((...))` arithmetic-header variant
1221/// (parse.c:1100-1140 inside par_for).
1222fn par_for() -> Option<ZshCommand> {
1223    let is_foreach = tok() == FOREACH;
1224    // c:1094-1095 (Src/parse.c, par_for) — set `infor=2` (only when
1225    // tok==FOR) so the lexer's `(` peek at lex.c:784-789
1226    // (`if (infor) { ... return DINPAR; }`) routes the arith-for
1227    // body through dbparens semicolon-splitting instead of the
1228    // `cmd_or_math` whole-body capture path. Without this, `for ((
1229    // i=0; i<3; i++ ))` lexed as a single `((arith))` expression
1230    // and parse_for_cstyle's second zshlex got an empty/wrong tok.
1231    //
1232    // The companion C statement `incmdpos = 0;` at c:1094 isn't
1233    // mirrored here: zshrs's parser doesn't otherwise touch
1234    // LEX_INCMDPOS at this boundary, and forcing it false breaks
1235    // the SELECT case where downstream tokenization relied on the
1236    // inherited state. The C parser maintains incmdpos inline at
1237    // every grammar transition (parse.c:617, :791, :1072, :1145,
1238    // :1154, :1161, ...); without porting those companion sites a
1239    // single explicit reset here is more harmful than helpful.
1240    set_infor(if tok() == FOR { 2 } else { 0 }); // c:1095
1241    zshlex(); // c:1096
1242
1243    // Check for C-style: for (( init; cond; step ))
1244    if tok() == DINPAR {
1245        // c:1110-1111 — close out infor / cmdpos after parse_for_cstyle
1246        // has consumed the init/cond/step triple. Done inside the
1247        // helper itself so we honour the C ordering.
1248        return parse_for_cstyle();
1249    }
1250
1251    // c:1116 — `infor = 0;` immediately on entering the foreach
1252    // branch. Without this, `infor` stays at 2 (set at c:1095 when
1253    // tok==FOR) for the rest of par_for, and the lexer's `((`
1254    // peek at lex.c:786 routes every subsequent `((...))` inside
1255    // the loop body through dbparens — so `for x in a; do (( 1
1256    // )); done` and `if (( 1 )) { … }` inside the do-body both
1257    // mis-lexed as a c-style for header.
1258    set_infor(0); // c:1116
1259
1260    // Get variable name(s). zsh parse.c par_for accepts multiple
1261    // identifier tokens before `in`/`(`/newline — `for k v in ...`
1262    // assigns each iteration's pair of values to k and v in turn.
1263    // We store the names space-joined since variable identifiers
1264    // can't contain whitespace.
1265    let mut names: Vec<String> = Vec::new();
1266    while tok() == STRING_LEX {
1267        let v = tokstr().unwrap_or_default();
1268        if v == "in" {
1269            break;
1270        }
1271        names.push(v);
1272        zshlex();
1273    }
1274    if names.is_empty() {
1275        zerr("expected variable name in for");
1276        return None;
1277    }
1278    let var = names.join(" ");
1279
1280    // Skip newlines
1281    skip_separators();
1282
1283    // Get list. The lexer-port quirk: `for x (a b c)` arrives as a
1284    // single String token with the parens lexed-as-content
1285    // (`<Inpar>a b c<Outpar>`) instead of as separate Inpar/String/
1286    // Outpar tokens. Detect that shape and split it manually.
1287    let list = if tok() == STRING_LEX
1288        && tokstr()
1289            .map(|s| s.starts_with('\u{88}') && s.ends_with('\u{8a}'))
1290            .unwrap_or(false)
1291    {
1292        let raw = tokstr().unwrap_or_default();
1293        // Strip leading Inpar + trailing Outpar. KEEP the inner
1294        // content tokenized — `for x ({1..3}) …` has `{1..3}` as
1295        // Inbrace+content+Outbrace markers, which compile_word_str
1296        // needs to detect and brace-expand. Untokenizing here would
1297        // collapse the markers to plain `{` `}` chars and the brace-
1298        // expansion pass (which strictly requires Inbrace TOKEN per
1299        // Src/glob.c:hasbraces) would skip the word entirely.
1300        // Split only on UNTOKENIZED whitespace at the top level —
1301        // tokenized characters (TOKEN range \u{84}..\u{a1}) are part
1302        // of one word; bare ASCII spaces / tabs separate words.
1303        let inner = &raw[raw.char_indices().nth(1).map(|(i, _)| i).unwrap_or(0)
1304            ..raw
1305                .char_indices()
1306                .last()
1307                .map(|(i, _)| i)
1308                .unwrap_or(raw.len())];
1309        let mut words: Vec<String> = Vec::new();
1310        let mut cur = String::new();
1311        for c in inner.chars() {
1312            if c == ' ' || c == '\t' || c == '\n' {
1313                if !cur.is_empty() {
1314                    words.push(std::mem::take(&mut cur));
1315                }
1316            } else {
1317                cur.push(c);
1318            }
1319        }
1320        if !cur.is_empty() {
1321            words.push(cur);
1322        }
1323        zshlex();
1324        ForList::Words(words)
1325    } else if tok() == STRING_LEX {
1326        let s = tokstr();
1327        if s.map(|s| s == "in").unwrap_or(false) {
1328            // c:Src/parse.c:1147-1154 — after consuming `in`, the
1329            // for-list reads in WORD position, not command position.
1330            // Reset incmdpos=false so the lexer's LX2_INBRACE arm
1331            // (lex.rs:1791) treats a leading `{` as the brace-
1332            // expansion marker (`bct++; add(Inbrace)`) instead of
1333            // returning STRING("{") + promoting to INBRACE_TOK.
1334            // Without this, `for i in {1..3}` saw `{` as the body-
1335            // opener brace, so the word-collection loop got an
1336            // empty word list and the loop body silently ran 0
1337            // iterations.
1338            set_incmdpos(false);
1339            zshlex();
1340            let mut words = Vec::new();
1341            while tok() == STRING_LEX {
1342                let _ts_s = tokstr();
1343                if let Some(s) = _ts_s.as_deref() {
1344                    words.push(s.to_string());
1345                }
1346                zshlex();
1347            }
1348            // c:Src/parse.c:1162 — `incmdpos = 1;` after the
1349            // wordlist + SEPER are consumed, so the next token
1350            // (`do` / `{` body opener) lexes at command position.
1351            set_incmdpos(true);
1352            ForList::Words(words)
1353        } else {
1354            ForList::Positional
1355        }
1356    } else if tok() == INPAR_TOK {
1357        // for var (...) — `for x ({1..3})`: inside the parens, the
1358        // list is in WORD position so `{` must lex as the brace-
1359        // expansion Inbrace marker, NOT as a body-opener INBRACE_TOK.
1360        // Without resetting incmdpos before the next zshlex, the
1361        // lexer's LX2_INBRACE arm promotes `{` to INBRACE_TOK and
1362        // the word-collection loop exits empty, giving
1363        // `for x ({1..3})` an empty iteration.
1364        set_incmdpos(false);
1365        zshlex();
1366        let mut words = Vec::new();
1367        while tok() == STRING_LEX || tok() == SEPER {
1368            if tok() == STRING_LEX {
1369                let _ts_s = tokstr();
1370                if let Some(s) = _ts_s.as_deref() {
1371                    words.push(s.to_string());
1372                }
1373            }
1374            zshlex();
1375        }
1376        if tok() == OUTPAR_TOK {
1377            // After the `)` of a for-list, the next token is the
1378            // body opener — `do`/`{`. zsh's lexer needs incmdpos
1379            // set so `{` lexes as Inbrace (not as a literal). C
1380            // analogue: parse.c::par_for sets `incmdpos = 1`
1381            // after consuming the Outpar before the body parse.
1382            set_incmdpos(true);
1383            zshlex();
1384        }
1385        ForList::Words(words)
1386    } else {
1387        ForList::Positional
1388    };
1389
1390    // Skip to body
1391    skip_separators();
1392
1393    // Parse body
1394    let body = parse_loop_body(is_foreach, false)?;
1395
1396    Some(ZshCommand::For(ZshFor {
1397        var,
1398        list,
1399        body: Box::new(body),
1400        is_select: false,
1401    }))
1402}
1403
1404/// Parse case statement
1405/// Parse `case WORD in PATTERN) BODY ;; ... esac`. Direct port
1406/// of zsh/Src/parse.c:1209 `par_case`. Each case arm is a
1407/// (pattern_list, body, terminator) tuple where terminator is
1408/// `;;` (default), `;&` (fallthrough), or `;|` (continue testing).
1409fn par_case() -> Option<ZshCommand> {
1410    // C par_case (parse.c:1209-1241). Order of state toggles
1411    // matters — the lexer reads the case word in `incmdpos=0`
1412    // (so it's not promoted to a reswd), then the `in`/`{` in
1413    // `incmdpos=1, noaliases=1, nocorrect=1` (so the `in` literal
1414    // isn't alias-expanded or spell-corrected), then sets
1415    // `incasepat=1, incmdpos=0` before the first pattern.
1416    set_incmdpos(false);
1417    zshlex(); // skip 'case'
1418
1419    let word = match tok() {
1420        STRING_LEX => {
1421            let w = tokstr().unwrap_or_default();
1422            // c:1222 — `incmdpos = 1;` before the next zshlex so the
1423            // `in` keyword is recognised. c:1223-1225 — save+force
1424            // noaliases / nocorrect.
1425            set_incmdpos(true);
1426            let ona = noaliases();
1427            let onc = nocorrect();
1428            set_noaliases(true);
1429            set_nocorrect(1);
1430            zshlex();
1431            // Restore noaliases/nocorrect after the `in`-or-`{` token
1432            // is in hand; both are unconditionally restored at c:1238-1239.
1433            let restore = |ona: bool, onc: i32| {
1434                set_noaliases(ona);
1435                set_nocorrect(onc);
1436            };
1437            (w, ona, onc, restore)
1438        }
1439        _ => {
1440            zerr("expected word after case");
1441            return None;
1442        }
1443    };
1444    let (word, ona, onc, restore) = word;
1445
1446    skip_separators();
1447
1448    // Expect 'in' or {
1449    let use_brace = tok() == INBRACE_TOK;
1450    if tok() == STRING_LEX {
1451        let s = tokstr();
1452        if s.map(|s| s != "in").unwrap_or(true) {
1453            // c:1228-1232 — restore noaliases/nocorrect on error path.
1454            restore(ona, onc);
1455            zerr("expected 'in' in case");
1456            return None;
1457        }
1458    } else if !use_brace {
1459        restore(ona, onc);
1460        zerr("expected 'in' or '{' in case");
1461        return None;
1462    }
1463    // c:1236-1239 — `incasepat = 1; incmdpos = 0; noaliases = ona;
1464    // nocorrect = onc;` — set the case-pattern context AND restore
1465    // alias/correct state BEFORE the zshlex that consumes `in`/`{`.
1466    set_incasepat(1);
1467    set_incmdpos(false);
1468    restore(ona, onc);
1469    zshlex();
1470
1471    let mut arms = Vec::new();
1472    const MAX_ARMS: usize = 10_000;
1473
1474    loop {
1475        if arms.len() > MAX_ARMS {
1476            zerr("par_case: too many arms");
1477            break;
1478        }
1479
1480        // Set incasepat BEFORE skipping separators so lexer knows we're in case pattern context
1481        // This affects how [ and | are lexed
1482        set_incasepat(1);
1483
1484        skip_separators();
1485
1486        // Check for end
1487        // Note: 'esac' might be String "esac" if incasepat > 0 prevents reserved word recognition
1488        let is_esac = tok() == ESAC
1489            || (tok() == STRING_LEX && tokstr().map(|s| s == "esac").unwrap_or(false));
1490        if (use_brace && tok() == OUTBRACE_TOK) || (!use_brace && is_esac) {
1491            set_incasepat(0);
1492            zshlex();
1493            break;
1494        }
1495
1496        // Also break on EOF. c:Src/parse.c:1209 par_case requires
1497        // ESAC (or `}` in brace form) to close the block — reaching
1498        // ENDINPUT without either is a parse error (`case ... esack`
1499        // typo absorbs `esack` as part of the body and silently
1500        // terminates rc=0 otherwise). Bug #400.
1501        if tok() == ENDINPUT || tok() == LEXERR {
1502            set_incasepat(0);
1503            yyerror("unmatched `case'");
1504            break;
1505        }
1506
1507        // c:1250 — `if (tok == INPAR) zshlex();` — leading-paren
1508        // skip path. Used when the lexer DID return INPAR_TOK (e.g.
1509        // SHGLOB or incmdpos forced it). In the normal case-pattern
1510        // path the lexer absorbs `(...)` into one Stringg and the
1511        // hack at c:1322 strips the surrounding parens later. Both
1512        // paths land here.
1513        let leading_inpar_consumed = tok() == INPAR_TOK;
1514        if leading_inpar_consumed {
1515            zshlex();
1516        }
1517
1518        // c:1255-1262 — read pattern STRING. zsh's parser falls
1519        // straight into the STRING reader after the optional INPAR.
1520        // BAR before any pattern means empty string.
1521        let mut patterns = Vec::new();
1522        // Tracks whether the c:1322-1354 hack has fired (paren-
1523        // wrapped Stringg absorbed by the lexer). When it has, the
1524        // closing `)` was already absorbed — no separate OUTPAR
1525        // arm-close to consume.
1526        let mut absorbed_outpar = false;
1527        loop {
1528            if tok() == STRING_LEX {
1529                let s = tokstr();
1530                if s.as_deref().map(|s| s == "esac").unwrap_or(false) {
1531                    break;
1532                }
1533                let mut str_val = s.unwrap_or_default();
1534
1535                // c:1322-1354 hack: when this is the first alt AND
1536                // the string starts with the Inpar marker, the lexer
1537                // absorbed the whole `(...)` as one token. Strip the
1538                // surrounding parens — the remainder IS the pattern.
1539                // The closing arm-paren was absorbed too, so we don't
1540                // expect a separate OUTPAR token afterward.
1541                if patterns.is_empty() && str_val.starts_with(crate::ported::zsh_h::Inpar) {
1542                    let mut pct = 0i32;
1543                    let mut chars: Vec<char> = str_val.chars().collect();
1544                    let mut end_idx: Option<usize> = None;
1545                    for (idx, &c) in chars.iter().enumerate() {
1546                        if c == crate::ported::zsh_h::Inpar {
1547                            pct += 1;
1548                        } else if c == crate::ported::zsh_h::Outpar {
1549                            pct -= 1;
1550                            if pct == 0 {
1551                                end_idx = Some(idx);
1552                                break;
1553                            }
1554                        }
1555                    }
1556                    if let Some(idx) = end_idx {
1557                        chars.remove(idx);
1558                        chars.remove(0);
1559                        str_val = chars.into_iter().collect();
1560                        absorbed_outpar = true;
1561                    }
1562                }
1563                patterns.push(str_val);
1564                set_incasepat(2);
1565                zshlex();
1566                // When the hack fired the closing `)` is already
1567                // consumed; don't read alt-`|` continuations either.
1568                if absorbed_outpar {
1569                    break;
1570                }
1571            } else if tok() != BAR_TOK {
1572                break;
1573            }
1574
1575            if tok() == BAR_TOK {
1576                set_incasepat(1);
1577                zshlex();
1578            } else {
1579                break;
1580            }
1581        }
1582        set_incasepat(0);
1583
1584        // c:1305 — expect OUTPAR (arm-close) when the hack didn't
1585        // already swallow it.
1586        //
1587        // Bug #34 in docs/BUGS.md: the absorbed-pattern hack assumed
1588        // the leading `(` and the case-arm closing `)` were both
1589        // absorbed into the single STRING token. That's true for
1590        // `(x))` (the inner `)` closes the absorbed group; the second
1591        // `)` is the arm closer) only when the lexer slurps BOTH.
1592        // The Rust lexer slurps just `(x|y)` (one balanced pair); the
1593        // second `)` arrives as a separate OUTPAR_TOK that must still
1594        // be consumed as the case-arm closer. Detect and consume it.
1595        if !absorbed_outpar {
1596            if tok() != OUTPAR_TOK {
1597                zerr("expected ')' in case pattern");
1598                return None;
1599            }
1600            // c:Src/parse.c:1257-1258 — `if (tok != STRING)
1601            // YYERRORV(oecused);` C requires at least one pattern
1602            // STRING before `)`. zshrs accepted empty `case x in)`
1603            // and silently fell through to the next iteration with
1604            // an empty pattern arm, swallowing the rest of the
1605            // script. Reject the empty-pattern shape unless a
1606            // leading INPAR was consumed (the `(pat)` form has
1607            // already validated the pattern inside). Bug #161 in
1608            // docs/BUGS.md.
1609            if patterns.is_empty() && !leading_inpar_consumed {
1610                zerr("parse error near `)'");
1611                return None;
1612            }
1613            set_incmdpos(true);
1614            zshlex();
1615            // When the lexer emitted a separate INPAR_TOK at the
1616            // arm start (consumed via `leading_inpar_consumed`
1617            // above), the OUTPAR_TOK we just consumed closed the
1618            // alternation GROUP. If the next token is ALSO
1619            // OUTPAR_TOK, the user wrote `(pat))` and that second
1620            // `)` is the case-arm closer that still needs to be
1621            // consumed before body parsing. Bug #34 in
1622            // docs/BUGS.md.
1623            if leading_inpar_consumed && tok() == OUTPAR_TOK {
1624                zshlex();
1625            }
1626        } else if tok() == OUTPAR_TOK {
1627            // The lexer absorbed `(pat)` as the pattern but left the
1628            // case-arm closing `)` as a separate OUTPAR_TOK. Consume
1629            // it now so body parsing starts at the body, not at `)`.
1630            set_incmdpos(true);
1631            zshlex();
1632        } else {
1633            set_incmdpos(true);
1634        }
1635
1636        // Parse body. Pass end_tokens explicitly so the body's
1637        // parser stops at DSEMI/SEMIAMP/SEMIBAR/ESAC without
1638        // tripping parse_program_until's orphan-terminator check
1639        // (line 7131) which only fires when end_tokens is None.
1640        // Without this, a case arm whose body has no trailing
1641        // `;;` before `esac` (last arm — zsh accepts the dangling
1642        // form) produced "parse error near orphan terminator" on
1643        // the closing `esac`. zsh's par_case at parse.c:1318 sets
1644        // up the case-arm reader to recognize the same terminator
1645        // set; the Rust port was passing the implicit-None and
1646        // hitting the top-level orphan check.
1647        let body = parse_program_until(Some(&[DSEMI, SEMIAMP, SEMIBAR, ESAC]));
1648
1649        // Get terminator. Set incasepat=1 BEFORE the zshlex
1650        // advance so the next token (the next arm's pattern, like
1651        // `[a-z]`) gets tokenized in pattern context. Without
1652        // this, a `[`-prefixed pattern after the FIRST arm became
1653        // Inbrack instead of String and the pattern-loop bailed
1654        // out with "expected ')' in case pattern".
1655        let terminator = match tok() {
1656            DSEMI => {
1657                set_incasepat(1);
1658                zshlex();
1659                CaseTerm::Break
1660            }
1661            SEMIAMP => {
1662                set_incasepat(1);
1663                zshlex();
1664                CaseTerm::Continue
1665            }
1666            SEMIBAR => {
1667                set_incasepat(1);
1668                zshlex();
1669                CaseTerm::TestNext
1670            }
1671            _ => CaseTerm::Break,
1672        };
1673
1674        if !patterns.is_empty() {
1675            arms.push(CaseArm {
1676                patterns,
1677                body,
1678                terminator,
1679            });
1680        }
1681    }
1682
1683    Some(ZshCommand::Case(ZshCase { word, arms }))
1684}
1685
1686/// Parse if statement
1687/// Parse `if COND; then BODY; [elif COND; then BODY;]* [else BODY;] fi`.
1688/// Direct port of zsh/Src/parse.c:1411 `par_if`. The C source
1689/// emits WC_IF wordcodes per arm; zshrs builds an AST chain of
1690/// (cond, then_body) tuples plus an optional else_body.
1691fn par_if() -> Option<ZshCommand> {
1692    zshlex(); // skip 'if'
1693
1694    // Parse condition - stops at 'then' or '{' (zsh allows { instead of then)
1695    let cond = Box::new(parse_program_until(Some(&[THEN, INBRACE_TOK])));
1696
1697    skip_separators();
1698
1699    // Expect 'then' or {
1700    let use_brace = tok() == INBRACE_TOK;
1701    if tok() != THEN && !use_brace {
1702        zerr("expected 'then' or '{' after if condition");
1703        return None;
1704    }
1705    zshlex();
1706
1707    // Parse then-body - stops at else/elif/fi, or } if using brace syntax
1708    let then = if use_brace {
1709        let body = parse_program_until(Some(&[OUTBRACE_TOK]));
1710        if tok() == OUTBRACE_TOK {
1711            zshlex();
1712        }
1713        Box::new(body)
1714    } else {
1715        Box::new(parse_program_until(Some(&[ELSE, ELIF, FI])))
1716    };
1717
1718    // Parse elif and else. zsh accepts the SAME elif/else
1719    // continuations for both classic `then/fi` AND the brace
1720    // form `{ ... } elif ... { ... } else { ... }`. Direct port
1721    // of zsh/Src/parse.c:1417-1500 par_if where the elif/else
1722    // arms are checked AFTER the body close regardless of which
1723    // delimiter style opened the block. Without this, zinit's
1724    //   if [[ -z $sel ]] { ... } else { ... }
1725    // hung the parser — `else` was treated as an external
1726    // command following the if-statement, which the lexer state
1727    // mis-classified inside the still-open function body.
1728    //
1729    // For brace-form: skip the `fi` consumption at the end of
1730    // the loop (no `fi` after a brace block), and `else` may
1731    // arrive after a `}` close. Skip-separators between the
1732    // body close and the elif/else token.
1733    let mut elif = Vec::new();
1734    let mut else_ = None;
1735    // c:Src/parse.c:1501-1504 — `if (tok != FI) { cmdpop(); YYERRORV; }`.
1736    // The C parser fails the whole if-construct when the body close
1737    // isn't seen. zshrs's loop fell through silently on ENDINPUT, so
1738    // `if true; then echo yes` (no `fi`) was accepted. Track whether
1739    // we hit a real terminator and error after the loop if not.
1740    let mut saw_terminator = use_brace; // `{ … }` body already consumed its close
1741
1742    {
1743        loop {
1744            skip_separators();
1745
1746            match tok() {
1747                ELIF => {
1748                    zshlex();
1749                    // elif condition stops at 'then' or '{'
1750                    let econd = parse_program_until(Some(&[THEN, INBRACE_TOK]));
1751                    skip_separators();
1752
1753                    let elif_use_brace = tok() == INBRACE_TOK;
1754                    if tok() != THEN && !elif_use_brace {
1755                        zerr("expected 'then' after elif");
1756                        return None;
1757                    }
1758                    zshlex();
1759
1760                    // elif body stops at else/elif/fi or } if using braces
1761                    let ebody = if elif_use_brace {
1762                        let body = parse_program_until(Some(&[OUTBRACE_TOK]));
1763                        if tok() == OUTBRACE_TOK {
1764                            zshlex();
1765                            saw_terminator = true; // brace close on elif
1766                        }
1767                        body
1768                    } else {
1769                        parse_program_until(Some(&[ELSE, ELIF, FI]))
1770                    };
1771
1772                    elif.push((econd, ebody));
1773                }
1774                ELSE => {
1775                    zshlex();
1776                    skip_separators();
1777
1778                    let else_use_brace = tok() == INBRACE_TOK;
1779                    if else_use_brace {
1780                        zshlex();
1781                    }
1782
1783                    // else body stops at 'fi' or '}'
1784                    else_ = Some(Box::new(if else_use_brace {
1785                        let body = parse_program_until(Some(&[OUTBRACE_TOK]));
1786                        if tok() == OUTBRACE_TOK {
1787                            zshlex();
1788                            saw_terminator = true;
1789                        }
1790                        body
1791                    } else {
1792                        parse_program_until(Some(&[FI]))
1793                    }));
1794
1795                    // Consume the 'fi' if present (not for brace syntax)
1796                    if !else_use_brace && tok() == FI {
1797                        zshlex();
1798                        saw_terminator = true;
1799                    }
1800                    break;
1801                }
1802                FI => {
1803                    // Brace-form `if ... { ... }` is already terminated by
1804                    // its closing `}`. Do NOT consume `fi` here — it belongs
1805                    // to an enclosing then-form if. Without this gate, a
1806                    // brace-form if inside a then-form if's body would steal
1807                    // the outer `fi`, leaving the outer parser to see
1808                    // "unterminated if". This bit zinit-install.zsh:978
1809                    // where `if (( … )) {` (brace) inside `if … ; then …`
1810                    // (then-form) ate the outer `fi`.
1811                    if use_brace {
1812                        break;
1813                    }
1814                    zshlex();
1815                    saw_terminator = true;
1816                    break;
1817                }
1818                _ => break,
1819            }
1820        }
1821    }
1822
1823    if !saw_terminator {
1824        // c:1501-1504 — YYERRORV when the if-construct never closed.
1825        zerr("parse error: unterminated if");
1826        return None;
1827    }
1828
1829    Some(ZshCommand::If(ZshIf {
1830        cond,
1831        then,
1832        elif,
1833        else_,
1834    }))
1835}
1836
1837/// Parse while/until loop
1838/// Parse `while COND; do BODY; done` and `until COND; do BODY; done`.
1839/// Direct port of zsh/Src/parse.c:1521 `par_while`. The
1840/// `until` variant is the same loop with the condition negated.
1841fn par_while(until: bool) -> Option<ZshCommand> {
1842    zshlex(); // skip while/until
1843
1844    // c:1521-1551 par_while — the condition's parser must stop at
1845    // `do` or `{`. Without an explicit end-token set, parse_program
1846    // consumes the brace-form body as additional condition lists,
1847    // leaving parse_loop_body with nothing — `while (( i++ < 3 )) {
1848    // echo $i }` silently parsed but executed nothing.
1849    let cond = Box::new(parse_program_until(Some(&[DOLOOP, INBRACE_TOK])));
1850
1851    skip_separators();
1852    let body = parse_loop_body(false, false)?;
1853
1854    // c:Src/parse.c:1521-1551 par_while — WC_WHILE wordcode is tagged
1855    // with WC_WHILE_TYPE differentiating WHILE vs UNTIL at the wordcode
1856    // layer. The AST mirror in zsh_ast.rs has separate Until(ZshWhile)
1857    // and While(ZshWhile) variants; route by the `until` flag here so
1858    // downstream pattern-matchers can distinguish without poking
1859    // inside the payload's bool.
1860    let w = ZshWhile {
1861        cond,
1862        body: Box::new(body),
1863        until,
1864    };
1865    Some(if until {
1866        ZshCommand::Until(w) // c:1521 (WC_WHILE_TYPE = WC_WHILE_UNTIL)
1867    } else {
1868        ZshCommand::While(w) // c:1521 (WC_WHILE_TYPE = WC_WHILE_WHILE)
1869    })
1870}
1871
1872/// Parse repeat loop
1873/// Parse `repeat N; do BODY; done`. Direct port of
1874/// zsh/Src/parse.c:1565 `par_repeat`. The C source supports
1875/// the SHORTLOOPS short-form `repeat N CMD` (no do/done) — zshrs's
1876/// parser doesn't yet special-case that variant.
1877fn par_repeat() -> Option<ZshCommand> {
1878    zshlex(); // skip 'repeat'
1879
1880    let count = match tok() {
1881        STRING_LEX => {
1882            let c = tokstr().unwrap_or_default();
1883            zshlex();
1884            c
1885        }
1886        _ => {
1887            zerr("expected count after repeat");
1888            return None;
1889        }
1890    };
1891
1892    skip_separators();
1893    // c:1600 — par_repeat's short-form gate is wider: it unlocks
1894    // when SHORTLOOPS OR SHORTREPEAT is set (vs SHORTLOOPS alone for
1895    // for/while). Pass `is_repeat=true` so parse_loop_body
1896    // applies that widened gate.
1897    let body = parse_loop_body(false, true)?;
1898
1899    Some(ZshCommand::Repeat(ZshRepeat {
1900        count,
1901        body: Box::new(body),
1902    }))
1903}
1904
1905/// Parse (...) subshell
1906/// Parse a subshell `( ... )`. Direct port of zsh/Src/parse.c:1619
1907/// `par_subsh`. Body parses as a normal list; the subshell wrapper
1908/// fork-isolates execution in the executor.
1909fn par_subsh() -> Option<ZshCommand> {
1910    zshlex(); // skip (
1911    let prog = parse_program();
1912    if tok() == OUTPAR_TOK {
1913        zshlex();
1914    }
1915    Some(ZshCommand::Subsh(Box::new(prog)))
1916}
1917
1918/// Parse function definition
1919/// Parse `function NAME { BODY }` or `NAME () { BODY }`. Direct
1920/// port of zsh/Src/parse.c:1672 `par_funcdef`. zsh handles
1921/// the multiple keyword shapes (function FOO, FOO (), function FOO ()),
1922/// the optional `[fname1 fname2 ...]` for multi-name function defs,
1923/// and the `function FOO () { ... }` traditional/POSIX hybrid form.
1924fn par_funcdef() -> Option<ZshCommand> {
1925    zshlex(); // skip 'function'
1926
1927    let mut names = Vec::new();
1928    let mut tracing = false;
1929
1930    // Handle options like -T and function names. Two subtleties:
1931    //
1932    //   1. Flags: zsh's lexer encodes a leading `-` as
1933    //      `zsh_h::Dash` (`\u{9b}`, `Src/zsh.h:182`) inside the String tokstr.
1934    //      The previous `s.starts_with('-')` check failed for
1935    //      `\u{9b}T`, so `function -T NAME { body }` slipped the
1936    //      `-T` token into `names` and the function got registered
1937    //      as `T` plus the intended `NAME`.
1938    //
1939    //   2. Body opener: zsh's lexer emits the opening `{` as a
1940    //      String (not INBRACE_TOK) when it follows the String
1941    //      NAME — the preceding name token resets incmdpos to
1942    //      false, and only `{` immediately followed by `}` (the
1943    //      empty-body case) gets promoted to Inbrace. The funcdef
1944    //      parser must recognise the bare-`{` String as the body
1945    //      opener; otherwise `function NAME { body }` falls through
1946    //      to `_ => break`, no body parses, and the FuncDef never
1947    //      lands in the AST. This is consistent with C zsh's
1948    //      par_funcdef which knows it's in funcdef-header context
1949    //      and accepts the brace either way.
1950    loop {
1951        match tok() {
1952            STRING_LEX => {
1953                let _ts_s = tokstr()?;
1954                let s = _ts_s.as_str();
1955                // c:1702 — `if ((*tokstr == Inbrace || *tokstr == '{') && !tokstr[1])`.
1956                // Body opener can be either the literal `{` (early-return
1957                // path at lex.c:1141-1144 / lex.rs LX2_INBRACE cmdpos
1958                // branch) or the Inbrace marker `\u{8f}` (lex.c:1420
1959                // post-switch add(c) where c was rewritten via lextok2).
1960                if s == "{" || s == "\u{8f}" {
1961                    break;
1962                }
1963                let first = s.chars().next();
1964                if matches!(first, Some('-') | Some('+')) || matches!(first, Some(c) if c == Dash) {
1965                    if s.contains('T') {
1966                        tracing = true;
1967                    }
1968                    zshlex();
1969                    continue;
1970                }
1971                // c:Src/exec.c::execcmd_args — function name tokens
1972                // in `function NAME { ... }` form go through globbing
1973                // at parse time. zsh's `function with[bracket] { ... }`
1974                // triggers a glob expansion of `with[bracket]`; no file
1975                // matches → "no matches found: NAME" + rc=1 (when
1976                // NOMATCH is set, the default). Bug #536: zshrs accepted
1977                // the literal bracket-containing name and registered
1978                // the function silently. Mirror C by probing for glob
1979                // metachars on the name; if present AND no file
1980                // matches, emit the diagnostic and abort the parse.
1981                let has_glob_chars = s.chars().any(|c| {
1982                    matches!(
1983                        c,
1984                        '[' | ']'
1985                            | '*'
1986                            | '?'
1987                            | crate::ported::zsh_h::Inbrack
1988                            | crate::ported::zsh_h::Outbrack
1989                            | crate::ported::zsh_h::Star
1990                            | crate::ported::zsh_h::Quest
1991                    )
1992                });
1993                if has_glob_chars && crate::ported::zsh_h::isset(crate::ported::zsh_h::NOMATCH) {
1994                    let untok = crate::ported::lex::untokenize(s);
1995                    let glob_result = crate::ported::glob::glob(&untok);
1996                    if glob_result.is_empty() {
1997                        crate::ported::utils::zerr(&format!("no matches found: {}", untok));
1998                        crate::ported::utils::errflag.fetch_or(
1999                            crate::ported::utils::ERRFLAG_ERROR,
2000                            std::sync::atomic::Ordering::Relaxed,
2001                        );
2002                        return None;
2003                    }
2004                }
2005                names.push(s.to_string());
2006                zshlex();
2007            }
2008            INBRACE_TOK | INOUTPAR | SEPER | NEWLIN => break,
2009            _ => break,
2010        }
2011    }
2012
2013    // Optional ()
2014    let saw_paren = tok() == INOUTPAR;
2015    if saw_paren {
2016        zshlex();
2017    }
2018
2019    skip_separators();
2020
2021    // Body opener: real Inbrace OR a String containing the literal `{`
2022    // (early-return path) OR a String containing the Inbrace marker
2023    // `\u{8f}` (bct++ path post-switch add). C parse.c:1702 handles
2024    // both string forms via `*tokstr == Inbrace || *tokstr == '{'`.
2025    let body_opener_is_string_brace =
2026        tok() == STRING_LEX && tokstr().map(|s| s == "{" || s == "\u{8f}").unwrap_or(false);
2027    if tok() == INBRACE_TOK || body_opener_is_string_brace {
2028        // Capture body_start BEFORE the lexer advances past the
2029        // first body token. After the previous zshlex consumed
2030        // `{`, lexer.pos points just past `{` (which is where the
2031        // body source starts). The next `zshlex()` would advance
2032        // past the first token (`echo`), making body_start land
2033        // mid-body and lose the first word — `typeset -f f` would
2034        // print `a; echo b` for `{ echo a; echo b }`.
2035        // c:Src/parse.c:1690-1706 — par_funcdef requires a clean
2036        //   body-opener brace when the anonymous form `function {body}`
2037        //   is used (no names AND no `()`). zsh's lexer keeps the `{`
2038        //   as its own STRING token via the lex.c:1141-1144 early-
2039        //   return at command position, but the body brace must be
2040        //   followed by whitespace for the inner par_list to find a
2041        //   matching OUTBRACE — without a separator, the closing `}`
2042        //   gets merged into the last word (`X}`) and par_list ends
2043        //   without OUTBRACE, which C zsh reports as `parse error near
2044        //   \`}'`. zshrs's lexer has the same `bct` semantics; reject
2045        //   here at the parse step so the funcdef doesn't silently run
2046        //   with the stray `}` attached. With names or `()` present,
2047        //   the body brace is allowed even without a separator
2048        //   (`function name {body}` and `function () {body}` both work
2049        //   in zsh). Bug #60 in docs/BUGS.md.
2050        if names.is_empty() && !saw_paren {
2051            // Peek the next source byte after the current lexer position
2052            // (`{` was just tokenized — `pos()` points just past it).
2053            // A whitespace separator means proper `function { body }`
2054            // form; anything else is the malformed `function {body}`
2055            // shape zsh rejects.
2056            let next_byte = input_slice(pos(), pos() + 1)
2057                .and_then(|s| s.bytes().next())
2058                .unwrap_or(b' ');
2059            if !matches!(next_byte, b' ' | b'\t' | b'\n' | b';') {
2060                zerr("parse error near `}'"); // c:Src/parse.c YYERRORV
2061                return None;
2062            }
2063        }
2064        let body_start = pos();
2065        zshlex();
2066        // c:Src/parse.c — func body terminates at OUTBRACE_TOK.
2067        // Explicit end-token keeps the inner parse from hitting the
2068        // top-level stray-`}` arm (#168). Bug #167 family.
2069        let body = parse_program_until(Some(&[OUTBRACE_TOK]));
2070        // c:Src/parse.c:1733-1737 — `if (tok != OUTBRACE) { cmdpop();
2071        // ... YYERRORV(oecused); }`. Hard-error on missing close brace
2072        // so `function f { echo hi` doesn't silently register a half-
2073        // parsed body. Bug #405.
2074        if tok() != OUTBRACE_TOK {
2075            zerr("parse error: expected `}'");
2076            return None;
2077        }
2078        let body_end = pos().saturating_sub(1);
2079        let body_source = input_slice(body_start, body_end)
2080            .map(|s| {
2081                // Lexer's pos() may have advanced past `}` AND skipped
2082                // trailing whitespace/newlines before returning the
2083                // OUTBRACE_TOK to us, so the slice up to `pos - 1`
2084                // includes the `}` and any preceding whitespace.
2085                // Strip the trailing `}` and any preceding structural
2086                // separator (`;`, `\n`) — C zsh's getpermtext walks
2087                // the wordcode list and emits each command WITHOUT
2088                // the trailing `;`/`\n` that lives in the input.
2089                let t = s.trim();
2090                let t = t.strip_suffix('}').unwrap_or(t).trim_end();
2091                let t = t
2092                    .trim_end_matches(|c: char| c == ';' || c == '\n')
2093                    .trim_end();
2094                t.to_string()
2095            })
2096            .filter(|s| !s.is_empty());
2097        zshlex();
2098
2099        // Anonymous form `function () { body } a b c` (with `()`) or
2100        // `function { body } a b c` (zsh-only shorthand, no `()`). No
2101        // name was collected. Mirror parse_anon_funcdef: synthesize
2102        // `_zshrs_anon_N`, collect trailing args, set auto_call_args
2103        // so compile_funcdef registers + immediately calls the
2104        // function with the args as positional params.
2105        if names.is_empty() {
2106            let mut args = Vec::new();
2107            while tok() == STRING_LEX {
2108                if let Some(s) = tokstr() {
2109                    args.push(s);
2110                }
2111                zshlex();
2112            }
2113            static ANON_COUNTER: AtomicUsize = AtomicUsize::new(0);
2114            let n = ANON_COUNTER.fetch_add(1, Ordering::Relaxed);
2115            let name = format!("_zshrs_anon_kw_{}", n);
2116            return Some(ZshCommand::FuncDef(ZshFuncDef {
2117                names: vec![name],
2118                body: Box::new(body),
2119                tracing,
2120                auto_call_args: Some(args),
2121                body_source,
2122            }));
2123        }
2124
2125        Some(ZshCommand::FuncDef(ZshFuncDef {
2126            names,
2127            body: Box::new(body),
2128            tracing,
2129            auto_call_args: None,
2130            body_source,
2131        }))
2132    } else {
2133        // Short form
2134        par_list().map(|list| {
2135            ZshCommand::FuncDef(ZshFuncDef {
2136                names,
2137                body: Box::new(ZshProgram { lists: vec![list] }),
2138                tracing,
2139                auto_call_args: None,
2140                body_source: None,
2141            })
2142        })
2143    }
2144}
2145
2146/// Parse time command
2147/// Parse `time CMD` (POSIX time keyword). Direct port of
2148/// zsh/Src/parse.c:1787 `par_time`. The `time` keyword
2149/// times the execution of the following pipeline / cmd.
2150fn par_time() -> Option<ZshCommand> {
2151    zshlex(); // skip 'time'
2152
2153    // Check if there's a pipeline to time
2154    if tok() == SEPER || tok() == NEWLIN || tok() == ENDINPUT {
2155        Some(ZshCommand::Time(None))
2156    } else {
2157        let sublist = par_sublist();
2158        Some(ZshCommand::Time(sublist.map(Box::new)))
2159    }
2160}
2161
2162/// Port of `par_dinbrack(void)` from `Src/parse.c:1810`. Body
2163/// parser inside `[[ ... ]]` — calls `par_cond` to emit the
2164/// condition wordcode then advances past `]]`.
2165pub fn par_dinbrack() -> Option<()> {
2166    // c:1810
2167    set_incond(1); // c:1814
2168    set_incmdpos(false); // c:1815
2169    zshlex(); // c:1816
2170    let _ = par_cond(); // c:1817
2171    if tok() != DOUTBRACK {
2172        // c:1818
2173        yyerror("missing ]]");
2174        return None;
2175    }
2176    set_incond(0); // c:1820
2177    set_incmdpos(true); // c:1821
2178    zshlex(); // c:1822
2179    Some(())
2180}
2181
2182/// Parse a simple command
2183/// Parse a simple command (assignments + words + redirections).
2184/// Direct port of zsh/Src/parse.c:1836 `par_simple` —
2185/// the largest single function in parse.c. Handles ENVSTRING/
2186/// ENVARRAY assignments at command head, intermixed redirs,
2187/// typeset-style multi-assignment commands, and the trailing
2188/// inout-par `()` that converts a simple command into an inline
2189/// function definition.
2190fn par_simple(mut redirs: Vec<ZshRedir>) -> Option<ZshCommand> {
2191    let mut assigns = Vec::new();
2192    let mut words = Vec::new();
2193
2194    // c:1934-1974 — `{var}>file` brace-FD detection is wired
2195    // INSIDE the words loop below (parse.rs:4940-4956) rather than
2196    // here at the head. The words-loop site sees the tok=STRING
2197    // `{varname}` followed by a REDIROP and routes into par_redir
2198    // with redir.varid populated. C does it inline at the start of
2199    // each STRING/TYPESET arm iteration; functionally equivalent.
2200
2201    // c:1843-1846 — leading-NOCORRECT prefix: `nocorrect echo hello`
2202    // emits a NOCORRECT token at the start of par_simple. C sets
2203    // `nocorrect = 1` and skips past via the `zshlex();` at the
2204    // for-loop tail (c:1907). zshrs's par_simple (AST) had no
2205    // NOCORRECT arm so the token was silently dropped and the
2206    // following command line evaporated — `nocorrect echo hello`
2207    // produced empty output.
2208    while tok() == NOCORRECT {
2209        set_nocorrect(1); // c:1846
2210        zshlex(); // c:1907 (loop-tail zshlex)
2211    }
2212
2213    // Parse leading assignments
2214    while tok() == ENVSTRING || tok() == ENVARRAY {
2215        if let Some(assign) = parse_assign() {
2216            assigns.push(assign);
2217        }
2218        zshlex();
2219    }
2220
2221    // Parse words and redirections
2222    loop {
2223        match tok() {
2224            ENVSTRING | ENVARRAY => {
2225                // Mid-command assignment-shape arg under typeset
2226                // / declare / local / etc. (intypeset gates the
2227                // lexer to emit Envstring/Envarray for `name=val`
2228                // and `name=()` past the command name). Parse the
2229                // assignment, then emit a synthetic word
2230                // `NAME=value` (scalar) or `NAME=( … )` (array)
2231                // string so typeset's builtin arg list sees the
2232                // assignment-shape arg. Avoids the inline-env
2233                // scope path that mistakenly treats it like a
2234                // pre-cmd `X=Y cmd` assignment.
2235                if let Some(assign) = parse_assign() {
2236                    let synthetic = match &assign.value {
2237                        ZshAssignValue::Scalar(v) => format!("{}={}", assign.name, v),
2238                        ZshAssignValue::Array(elems) => {
2239                            // c:Src/builtin.c — assoc paren-init `h=( "" v
2240                            //   k2 v2 )` must preserve empty-string
2241                            //   elements (zsh stores key="" + value="v").
2242                            //   The bin_typeset paren-init splitter at
2243                            //   `builtin.rs:4358` recognizes the
2244                            //   REJOIN_SEP (`\u{1f}`) sentinel between
2245                            //   array elements and skips the leading/
2246                            //   trailing parens trim; using it here
2247                            //   round-trips empties end-to-end through
2248                            //   the synthetic-arg rebuild. Space-join
2249                            //   collapses adjacent empties (`(` + `""` +
2250                            //   `empty-val` becomes `( empty-val`) so
2251                            //   bin_typeset never sees the empty key.
2252                            //   Bug #93 in docs/BUGS.md.
2253                            let mut buf = String::with_capacity(
2254                                assign.name.len() + 4 + elems.iter().map(|e| e.len() + 1).sum::<usize>(),
2255                            );
2256                            buf.push_str(&assign.name);
2257                            buf.push_str("=(");
2258                            for elem in elems {
2259                                buf.push('\u{1f}');
2260                                buf.push_str(elem);
2261                            }
2262                            buf.push('\u{1f}');
2263                            buf.push(')');
2264                            buf
2265                        }
2266                    };
2267                    words.push(synthetic);
2268                }
2269                zshlex();
2270            }
2271            STRING_LEX | TYPESET => {
2272                let s = tokstr();
2273                if let Some(s) = s {
2274                    words.push(s);
2275                }
2276                // c:1929 — `incmdpos = 0;` so the next zshlex() does
2277                // not re-promote `{`/`[[`/reserved words at the
2278                // continuation position. Without this, `echo {a,b}`
2279                // re-lexes `{` as INBRACE_TOK (current-shell block)
2280                // and the brace expansion never reaches par_simple.
2281                set_incmdpos(false);
2282                // c:1931-1932 — `if (tok == TYPESET) intypeset = is_typeset = 1;`
2283                // Multi-assign `typeset a=1 b=2` relies on the lexer
2284                // re-emitting `b=2` as ENVSTRING; that path is gated
2285                // on `intypeset`. Without this, follow-on assignment
2286                // words arrive as STRING and the typeset builtin's
2287                // multi-assign form silently degrades.
2288                if tok() == TYPESET {
2289                    set_intypeset(true);
2290                }
2291                zshlex();
2292                // Check for function definition foo() { ... }
2293                if words.len() == 1 && tok() == INOUTPAR {
2294                    return parse_inline_funcdef(words.pop().unwrap());
2295                }
2296                // `{name}>file` named-fd redirect: the lexer doesn't
2297                // recognize this shape, so the bare word `{name}`
2298                // arrives as a String. If it matches `{IDENT}` and
2299                // the NEXT token is a redirop, pop it off as the
2300                // varid for that redir.
2301                if !words.is_empty() && IS_REDIROP(tok()) {
2302                    let last = words.last().unwrap();
2303                    let untoked = super::lex::untokenize(last);
2304                    if untoked.starts_with('{') && untoked.ends_with('}') && untoked.len() > 2 {
2305                        let name = &untoked[1..untoked.len() - 1];
2306                        if !name.is_empty()
2307                            && name.chars().all(|c| c == '_' || c.is_ascii_alphanumeric())
2308                            && name
2309                                .chars()
2310                                .next()
2311                                .map(|c| c == '_' || c.is_ascii_alphabetic())
2312                                .unwrap_or(false)
2313                        {
2314                            let varid = name.to_string();
2315                            words.pop();
2316                            if let Some(mut redir) = par_redir() {
2317                                redir.varid = Some(varid);
2318                                redirs.push(redir);
2319                            }
2320                            continue;
2321                        }
2322                    }
2323                }
2324            }
2325            _ if IS_REDIROP(tok()) => {
2326                match par_redir() {
2327                    Some(redir) => redirs.push(redir),
2328                    None => break, // Error in redir parsing, stop
2329                }
2330            }
2331            INOUTPAR if !words.is_empty() => {
2332                // c:2055-2057 — `if (!isset(MULTIFUNCDEF) && argc > 1)
2333                // YYERROR(oecused);` — multi-name funcdef gate:
2334                // `f1 f2() { ... }` defines f1 AND f2 to the same
2335                // body, but only when MULTIFUNCDEF is set.
2336                if !isset(MULTIFUNCDEF) && words.len() > 1 {
2337                    zerr("parse error: multiple names in function definition without MULTIFUNCDEF");
2338                    return None;
2339                }
2340                // c:2061-2068 — `if (isset(EXECOPT) && hasalias &&
2341                // !isset(ALIASFUNCDEF) && argc && hasalias !=
2342                // input_hasalias()) { zwarn(...); YYERROR(...); }`
2343                // Alias-as-funcdef warning. zshrs's parser doesn't
2344                // track `hasalias` (alias-expansion provenance
2345                // during parse) yet, so `had_alias` stays false —
2346                // the gate is wired here as a marker so the canonical
2347                // C predicate is visible. Once alias-provenance lands,
2348                // swap `false` for the actual provenance compare.
2349                let had_alias = false;
2350                if isset(EXECOPT) && had_alias && !isset(ALIASFUNCDEF) && !words.is_empty() {
2351                    crate::ported::utils::zwarn("defining function based on alias `(unknown)'");
2352                    return None;
2353                }
2354                // foo() { ... } style function
2355                return parse_inline_funcdef(words.pop().unwrap());
2356            }
2357            _ => break,
2358        }
2359    }
2360
2361    if assigns.is_empty() && words.is_empty() && redirs.is_empty() {
2362        return None;
2363    }
2364
2365    Some(ZshCommand::Simple(ZshSimple {
2366        assigns,
2367        words,
2368        redirs,
2369    }))
2370}
2371
2372/// Parse a redirection
2373/// Parse a redirection (>file, <file, >>file, <<HEREDOC, etc.).
2374/// Direct port of zsh/Src/parse.c:2229 `par_redir`. Returns
2375/// a ZshRedir node carrying the operator type, fd, target word
2376/// (or here-doc body / pipe-redir command), and any `{var}` style
2377/// fd-binding parameter.
2378fn par_redir() -> Option<ZshRedir> {
2379    par_redir_with_id(None)
2380}
2381
2382/// Wire a here-document body onto the redirection token that
2383/// requested it. Direct port of zsh/Src/parse.c:2347
2384/// `setheredoc`. Called when a heredoc terminator has been
2385/// matched and the body is ready to be attached to the redir.
2386///
2387/// zshrs port note: zsh's setheredoc patches the wordcode
2388/// in-place via `pc[1] = ecstrcode(doc); pc[2] = ecstrcode(term);`.
2389/// zshrs threads heredoc bodies through `HereDocInfo` structs
2390/// attached inline during the post-parse `fill_heredoc_bodies` walk.
2391/// This method is the AST-side equivalent: writes back to the
2392/// matching redir node by index.
2393/// Port of `setheredoc(int pc, int type, char *str, char *termstr,
2394/// char *munged_termstr)` from `Src/parse.c:2347-2355`. Patches the
2395/// pending heredoc redir at `pc` with its body string + raw and
2396/// munged terminator forms.
2397pub fn setheredoc(pc: usize, redir_type: i32, doc: &str, term: &str, munged_term: &str) {
2398    // zshrs-only guard: AST-path heredocs use `pc = -1 as usize`
2399    // (i.e. `usize::MAX`) as a sentinel meaning "no wordcode slot to
2400    // patch". C never passes a negative pc since the wordcode emitter
2401    // is always active. Skip silently for the AST-only case.
2402    if pc == usize::MAX {
2403        return;
2404    }
2405    // c:2350 — `int varid = WC_REDIR_VARID(ecbuf[pc]) ? REDIR_VARID_MASK : 0;`
2406    let cur = ECBUF.with_borrow(|b| b.get(pc).copied().unwrap_or(0));
2407    let varid = if WC_REDIR_VARID(cur) != 0 {
2408        REDIR_VARID_MASK
2409    } else {
2410        0
2411    };
2412    // c:2351 — `ecbuf[pc] = WCB_REDIR(type | REDIR_FROM_HEREDOC_MASK | varid);`
2413    let new_header = WCB_REDIR((redir_type | REDIR_FROM_HEREDOC_MASK | varid) as wordcode);
2414    // c:2352 — `ecbuf[pc + 2] = ecstrcode(str);`
2415    let coded_str = ecstrcode(doc);
2416    // c:2353 — `ecbuf[pc + 3] = ecstrcode(termstr);`
2417    let coded_term = ecstrcode(term);
2418    // c:2354 — `ecbuf[pc + 4] = ecstrcode(munged_termstr);`
2419    let coded_munged = ecstrcode(munged_term);
2420    ECBUF.with_borrow_mut(|b| {
2421        b[pc] = new_header;
2422        b[pc + 2] = coded_str;
2423        b[pc + 3] = coded_term;
2424        b[pc + 4] = coded_munged;
2425    });
2426}
2427
2428/// Parse a wordlist for `for ... in WORDS;`. Direct port of
2429/// zsh/Src/parse.c:2362 `par_wordlist`. Reads STRING tokens
2430/// until the next SEPER / SEMI / NEWLIN.
2431pub fn par_wordlist() -> Vec<String> {
2432    let mut out = Vec::new();
2433    // parse.c:2362-2378 — collect STRINGs into the wordlist.
2434    while tok() == STRING_LEX {
2435        if let Some(text) = tokstr() {
2436            out.push(text);
2437        }
2438        zshlex();
2439    }
2440    out
2441}
2442
2443/// Parse a newline-separated wordlist. Direct port of
2444/// zsh/Src/parse.c:2379 `par_nl_wordlist`. Like
2445/// par_wordlist but tolerates leading/trailing newlines.
2446pub fn par_nl_wordlist() -> Vec<String> {
2447    // parse.c:2380-2381 — skip leading newlines.
2448    while tok() == NEWLIN {
2449        zshlex();
2450    }
2451    let out = par_wordlist();
2452    // parse.c:2395-2397 — skip trailing newlines.
2453    while tok() == NEWLIN {
2454        zshlex();
2455    }
2456    out
2457}
2458
2459/// `COND_SEP()` macro from `Src/parse.c:2433`. True when the current
2460/// token is a separator usable inside `[[ … ]]` (newline / semi /
2461/// `&`). C uses it to skip optional whitespace between cond terms.
2462#[inline]
2463pub fn COND_SEP() -> bool {
2464    matches!(tok(), NEWLIN | SEMI | AMPER)
2465}
2466
2467/// Parse [[ ... ]] conditional
2468/// Parse `[[ EXPR ]]` conditional expression. Direct port of
2469/// zsh/Src/parse.c:2409 `par_cond` (and helpers par_cond_1,
2470/// par_cond_2, par_cond_double, par_cond_triple, par_cond_multi
2471/// at parse.c:2434-2731). Expression operators: `||` `&&` `!`
2472/// + unary tests (-f, -d, -n, -z, etc.) + binary tests (=, !=,
2473///   <, >, ==, =~, -eq, -ne, -lt, -le, -gt, -ge, -nt, -ot, -ef).
2474fn par_cond() -> Option<ZshCommand> {
2475    // C par_dinbrack (parse.c:1810-1822) wraps the body parse with
2476    // `incond = 1; incmdpos = 0;` BEFORE the first zshlex past `[[`,
2477    // and resets to `incond = 0; incmdpos = 1;` after `]]`. Without
2478    // `incond = 1`, lex.c does not promote `]]` to DOUTBRACK and the
2479    // cond body bleeds past the close bracket — the parser then
2480    // sees `]]` as a separate STRING command. Every `if [[ ... ]]; then`
2481    // failed with `command not found: ]]` before this fix.
2482    set_incond(1);
2483    set_incmdpos(false);
2484    zshlex(); // skip [[
2485              // Empty cond `[[ ]]` is a parse error in zsh — emit the
2486              // diagnostic and return None so the caller produces a
2487              // non-zero exit. Without this, `[[ ]]` silently passed and
2488              // returned exit 0.
2489    if tok() == DOUTBRACK {
2490        zerr("parse error near `]]'");
2491        set_incond(0);
2492        set_incmdpos(true);
2493        zshlex();
2494        return None;
2495    }
2496    let cond = parse_cond_expr();
2497
2498    if tok() == DOUTBRACK {
2499        set_incond(0);
2500        set_incmdpos(true);
2501        zshlex();
2502    } else {
2503        // c:Src/parse.c:1818-1819 — `if (tok != DOUTBRACK)
2504        // YYERRORV(oecused);`. par_dinbrack hard-requires DOUTBRACK
2505        // after par_cond; anything else is a parse error and the
2506        // outer parser's yyerror at c:2747 emits `parse error near
2507        // \`%s'` using zshlextext. Bug #473: BAR (`|`) inside
2508        // `[[ ab == a|b ]]` slipped past par_cond_or (which only
2509        // checks DBAR), the cond returned cleanly, and then the
2510        // top-level parser interpreted BAR as a pipe — running `b`
2511        // as a command (security-relevant if pattern RHS is user
2512        // input). Mirror C: emit parse error and abort.
2513        let tok_text = match tok() {
2514            BAR_TOK => "|".to_string(),
2515            DBAR => "||".to_string(),
2516            AMPER => "&".to_string(),
2517            DAMPER => "&&".to_string(),
2518            SEMI => ";".to_string(),
2519            DSEMI => ";;".to_string(),
2520            NEWLIN | SEPER => String::new(),
2521            _ => tokstr().map(|s| crate::ported::lex::untokenize(&s)).unwrap_or_default(),
2522        };
2523        if tok_text.is_empty() {
2524            zerr("parse error");
2525        } else {
2526            zerr(&format!("parse error near `{}'", tok_text));
2527        }
2528        set_incond(0);
2529        set_incmdpos(true);
2530        return None;
2531    }
2532
2533    cond.map(ZshCommand::Cond)
2534}
2535
2536/// Port of `par_cond_1(void)` from `Src/parse.c:2434`. Parses one
2537/// `||`-separated cond expression. Emits `WCB_COND(COND_AND, …)`
2538/// when an `&&` is found and recurses.
2539pub fn par_cond_1() -> i32 {
2540    // c:2434
2541
2542    let p = ECUSED.with(|c| c.get()) as usize;
2543    let r = par_cond_2();
2544    while COND_SEP() {
2545        condlex();
2546    }
2547    if tok() == DAMPER {
2548        condlex();
2549        while COND_SEP() {
2550            condlex();
2551        }
2552        ecispace(p, 1);
2553        par_cond_1();
2554        let ecused = ECUSED.with(|c| c.get()) as usize;
2555        ECBUF.with(|c| {
2556            c.borrow_mut()[p] = WCB_COND(COND_AND as u32, (ecused - 1 - p) as u32);
2557        });
2558        return 1;
2559    }
2560    r
2561}
2562
2563/// Port of `par_cond_2(void)` from `Src/parse.c:2476`. The heavy
2564/// cond-term parser: handles `! cond`, `(cond)`, unary `[ -X arg ]`,
2565/// binary `[ A op B ]`, and `[ A op1 B op2 C … ]` n-ary chains.
2566pub fn par_cond_2() -> i32 {
2567    // c:2476
2568    // `n_testargs` only applies in `testlex` mode (=== /bin/test
2569    // compat). zshrs has no testlex yet, so always 0.
2570    let n_testargs: i32 = 0;
2571
2572    // c:2481 — handled inline; this Rust port skips the n_testargs
2573    // arm since zshrs invokes par_cond via [[ ... ]] only.
2574
2575    while COND_SEP() {
2576        condlex();
2577    }
2578    if tok() == BANG_TOK {
2579        // c:2522 — `[[ ! cond ]]`
2580        condlex();
2581        ecadd(WCB_COND(COND_NOT as u32, 0));
2582        return par_cond_2();
2583    }
2584    if tok() == INPAR_TOK {
2585        // c:2533 — `[[ (cond) ]]`
2586        condlex();
2587        while COND_SEP() {
2588            condlex();
2589        }
2590        let r = par_cond();
2591        while COND_SEP() {
2592            condlex();
2593        }
2594        if tok() != OUTPAR_TOK {
2595            yyerror("missing )");
2596            return 0;
2597        }
2598        condlex();
2599        return r.map_or(0, |_| 1);
2600    }
2601    let s1 = tokstr().unwrap_or_default();
2602    // c:2549 — `dble = (s1 && IS_DASH(*s1) && (!n_testargs ||
2603    // strspn(s1+1, "abcd...") == 1) && !s1[2]);` — IS_DASH covers
2604    // BOTH `-` and Dash (`\u{9b}`). The raw tokstr inside `[[ ... ]]`
2605    // carries Dash as a marker byte, so `starts_with('-')` alone
2606    // matches only ASCII dashes and misses every `-z`, `-d`, `-r`
2607    // etc. — every such cond emitted the AST-only `condition
2608    // expected` error from par_cond_double. Use IS_DASH and count
2609    // chars (Dash is a single code point) instead of bytes.
2610    let s1_chars: Vec<char> = s1.chars().collect();
2611    let dble = !s1_chars.is_empty()
2612        && IS_DASH(s1_chars[0])
2613        && s1_chars.len() == 2
2614        && "abcdefghknoprstuvwxzLONGS".contains(s1_chars[1]);
2615    if tok() != STRING_LEX {
2616        if !s1.is_empty() && tok() != LEXERR && (!dble || n_testargs != 0) {
2617            // c:2486-2497 — `if (n_testargs == 1)` block: under
2618            // POSIXBUILTINS-off, `[ -t ]` rewrites to `[ -t 1 ]`
2619            // (ksh behavior). The C gate is `unset(POSIXBUILTINS)
2620            // && check_cond(s1, "t")`. zshrs's parser has
2621            // n_testargs=0 (no testlex), so this rewrite path is
2622            // unreachable from zshrs's [[ ]] / [ ] entry points;
2623            // wired here as a marker for parity. When testlex is
2624            // ported the call below activates.
2625            if n_testargs == 1 && unset(POSIXBUILTINS) && check_cond(&s1, "t") {
2626                condlex();
2627                return par_cond_double(&s1, "1");
2628            }
2629            // c:2557 — `[[ STRING ]]` re-interpreted as `[[ -n STRING ]]`.
2630            condlex();
2631            while COND_SEP() {
2632                condlex();
2633            }
2634            return par_cond_double("-n", &s1);
2635        }
2636        yyerror("condition expected");
2637        return 0;
2638    }
2639    condlex();
2640    while COND_SEP() {
2641        condlex();
2642    }
2643    if tok() == INANG_TOK || tok() == OUTANG_TOK {
2644        // c:2576 — `<` / `>` string compare.
2645        let xtok = tok();
2646        condlex();
2647        while COND_SEP() {
2648            condlex();
2649        }
2650        if tok() != STRING_LEX {
2651            yyerror("string expected");
2652            return 0;
2653        }
2654        let s3 = tokstr().unwrap_or_default();
2655        condlex();
2656        while COND_SEP() {
2657            condlex();
2658        }
2659        let op = if xtok == INANG_TOK {
2660            COND_STRLT
2661        } else {
2662            COND_STRGTR
2663        };
2664        ecadd(WCB_COND(op as u32, 0));
2665        ecstr(&s1);
2666        ecstr(&s3);
2667        return 1;
2668    }
2669    if tok() != STRING_LEX {
2670        // c:2592 — only one operand seen → `[ -n s1 ]`.
2671        if tok() != LEXERR {
2672            if !dble || n_testargs != 0 {
2673                return par_cond_double("-n", &s1);
2674            }
2675            return par_cond_multi(&s1, &[]);
2676        }
2677        yyerror("syntax error");
2678        return 0;
2679    }
2680    let s2 = tokstr().unwrap_or_default();
2681    set_incond(incond() + 1);
2682    condlex();
2683    while COND_SEP() {
2684        condlex();
2685    }
2686    set_incond(incond() - 1);
2687    // c:Src/parse.c:2598-2600 — `if (!n_testargs) dble = (s2 &&
2688    // IS_DASH(*s2) && !s2[2]);` — RECOMPUTE dble based on s2 once
2689    // it's been read, so `[[ A -X B ]]` is treated as a 2-arg cond
2690    // `[ -X B ]` (par_cond_double) rather than a 3-arg triple. This
2691    // is what routes `[[ "" -a "x" ]]` to par_cond_double("", "-a")
2692    // → COND_ERROR "parse error: condition expected: ". Without
2693    // this, the original `dble` from s1 stayed false, the parser
2694    // grabbed s3 and built COND_MODI silently. parity bug #25.
2695    let s2_chars: Vec<char> = s2.chars().collect();
2696    let dble = !s2_chars.is_empty() && IS_DASH(s2_chars[0]) && s2_chars.len() == 2;
2697    if tok() == STRING_LEX && !dble {
2698        let s3 = tokstr().unwrap_or_default();
2699        condlex();
2700        while COND_SEP() {
2701            condlex();
2702        }
2703        if tok() == STRING_LEX {
2704            // c:2615 — n-ary `[ A op B C D ... ]`.
2705            let mut l: Vec<String> = vec![s2, s3];
2706            while tok() == STRING_LEX {
2707                l.push(tokstr().unwrap_or_default());
2708                condlex();
2709                while COND_SEP() {
2710                    condlex();
2711                }
2712            }
2713            return par_cond_multi(&s1, &l);
2714        }
2715        return par_cond_triple(&s1, &s2, &s3);
2716    }
2717    par_cond_double(&s1, &s2)
2718}
2719
2720/// Port of `par_cond_double(char *a, char *b)` from `Src/parse.c:2626`.
2721/// Emits wordcode for unary cond `[ -X b ]` or modular `[ -mod b ]`.
2722pub fn par_cond_double(a: &str, b: &str) -> i32 {
2723    // c:2628 — `if (!IS_DASH(a[0]) || !a[1])` — char-based, since
2724    // Dash is a single code point (`\u{9b}`) and `a.len() < 2` on
2725    // BYTES would still pass for "-z" but fail for the marker form
2726    // `\u{9b}z` (2 bytes). Walk by chars.
2727    let ac: Vec<char> = a.chars().collect();
2728    if ac.is_empty() || !IS_DASH(ac[0]) || ac.len() < 2 {
2729        // c:Src/parse.c:2629 COND_ERROR macro expansion:
2730        //   zwarn(...); herrflush(); errflag |= ERRFLAG_ERROR;
2731        //   YYERROR(ecused) /* sets tok = LEXERR */
2732        // The YYERROR portion is critical — without it the outer
2733        // parser keeps walking the wordcode and execution proceeds
2734        // (e.g. `[[ "" -a "x" ]] && echo m || echo n` runs the
2735        // `|| echo n` branch). Setting LEXERR aborts the upper
2736        // parse so the whole line is rejected, matching zsh's
2737        // observable behavior of stdout="" on parse error.
2738        zerr(&format!("parse error: condition expected: {}", a));
2739        errflag.fetch_or(crate::ported::zsh_h::ERRFLAG_ERROR, Ordering::SeqCst);
2740        set_tok(LEXERR);
2741        return 1;
2742    }
2743    // c:2630 — `else if (!a[2] && strspn(a+1, "abcd...zhLONGS") == 1)`
2744    let unary_set = "abcdefgknoprstuvwxzhLONGS";
2745    if ac.len() == 2 && unary_set.contains(ac[1]) {
2746        // c:2631 — `ecadd(WCB_COND(a[1], 0));` uses the raw cond-op
2747        // letter byte as the opcode payload. Use the ASCII char's
2748        // code-point value directly — every letter in `unary_set`
2749        // fits in 7 bits.
2750        ecadd(WCB_COND(ac[1] as u32, 0));
2751        ecstr(b);
2752    } else {
2753        ecadd(WCB_COND(COND_MOD as u32, 1));
2754        ecstr(a);
2755        ecstr(b);
2756    }
2757    1
2758}
2759
2760/// Port of `get_cond_num(char *tst)` from `Src/parse.c:2643`. Returns
2761/// the index of `tst` in `{"nt","ot","ef","eq","ne","lt","gt","le","ge"}`
2762/// or `-1` if not a recognized binary cond operator.
2763pub fn get_cond_num(tst: &str) -> i32 {
2764    // c:2643
2765    const CONDSTRS: [&str; 9] = [
2766        "nt", "ot", "ef", "eq", "ne", "lt", "gt", "le", "ge", // c:2647
2767    ];
2768    for (i, &c) in CONDSTRS.iter().enumerate() {
2769        if c == tst {
2770            return i as i32; // c:2654
2771        }
2772    }
2773    -1 // c:2656
2774}
2775
2776/// par_time's `static int inpartime` guard at C parse.c:1038
2777/// preventing infinite recursion on `time time foo`. The wordcode
2778/// path keeps this as a thread_local since C uses a function-level
2779/// `static int` (per-process; per-evaluator semantically matches).
2780thread_local! {
2781    static PARSER_INPARTIME: std::cell::Cell<bool> = const { std::cell::Cell::new(false) };
2782}
2783
2784/// Port of `par_cond_triple(char *a, char *b, char *c)` from
2785/// `Src/parse.c:2659`. Emits wordcode for the binary forms
2786/// `[ A op B ]` — `=` / `==` / `!=` / `<` / `>` / `=~` / `-X`.
2787///
2788/// C does `(b[0] == Equals || b[0] == '=')` etc., matching BOTH the
2789/// raw ASCII operator char AND its tokenized marker form per
2790/// `Src/zsh.h:159-194`:
2791///   Equals = `\u{8d}`, Outang = `\u{95}`, Inang  = `\u{94}`,
2792///   Tilde  = `\u{98}`, Bang   = `\u{9c}`, Dash   = `\u{9b}`.
2793/// Inside `[[ ... ]]` the lexer emits the marker bytes — comparing
2794/// against literal-only `b"=="` misses every cond op.
2795/// (The previous Rust port had the doc comment values wrong:
2796/// Outang=0x8e was actually Bar; Inang=0x91 was Inbrack;
2797/// Tilde=0x96 was OutangProc; Bang=0x8b was Outparmath. The code
2798/// itself uses the correct const names, so this was a docs-only fix.)
2799pub fn par_cond_triple(a: &str, b: &str, c: &str) -> i32 {
2800    // c:2659
2801    let bc: Vec<char> = b.chars().collect();
2802    let is_eq = |ch: char| ch == '=' || ch == Equals;
2803    let is_gt = |ch: char| ch == '>' || ch == Outang;
2804    let is_lt = |ch: char| ch == '<' || ch == Inang;
2805    let is_tilde = |ch: char| ch == '~' || ch == Tilde;
2806    let is_bang = |ch: char| ch == '!' || ch == Bang;
2807
2808    // c:2663 — `(b[0] == Equals || b[0] == '=') && !b[1]` → `=` (single).
2809    if bc.len() == 1 && is_eq(bc[0]) {
2810        ecadd(WCB_COND(COND_STREQ as u32, 0));
2811        ecstr(a);
2812        ecstr(c);
2813        let np = ECNPATS.with(|cc| {
2814            let v = cc.get();
2815            cc.set(v + 1);
2816            v
2817        }) as u32;
2818        ecadd(np);
2819        return 1;
2820    }
2821    // c:2668-2673 — `(t0 = b[0]=='>' || Outang) || b[0]=='<' || Inang`.
2822    if bc.len() == 1 && (is_gt(bc[0]) || is_lt(bc[0])) {
2823        let op = if is_gt(bc[0]) {
2824            COND_STRGTR
2825        } else {
2826            COND_STRLT
2827        };
2828        ecadd(WCB_COND(op as u32, 0));
2829        ecstr(a);
2830        ecstr(c);
2831        let np = ECNPATS.with(|cc| {
2832            let v = cc.get();
2833            cc.set(v + 1);
2834            v
2835        }) as u32;
2836        ecadd(np);
2837        return 1;
2838    }
2839    // c:2674-2679 — `==` STRDEQ.
2840    if bc.len() == 2 && is_eq(bc[0]) && is_eq(bc[1]) {
2841        ecadd(WCB_COND(COND_STRDEQ as u32, 0));
2842        ecstr(a);
2843        ecstr(c);
2844        let np = ECNPATS.with(|cc| {
2845            let v = cc.get();
2846            cc.set(v + 1);
2847            v
2848        }) as u32;
2849        ecadd(np);
2850        return 1;
2851    }
2852    // c:2680-2684 — `!=` STRNEQ.
2853    if bc.len() == 2 && is_bang(bc[0]) && is_eq(bc[1]) {
2854        ecadd(WCB_COND(COND_STRNEQ as u32, 0));
2855        ecstr(a);
2856        ecstr(c);
2857        let np = ECNPATS.with(|cc| {
2858            let v = cc.get();
2859            cc.set(v + 1);
2860            v
2861        }) as u32;
2862        ecadd(np);
2863        return 1;
2864    }
2865    // c:2685-2691 — `=~` REGEX (no pattern slot — implicit COND_MODI).
2866    if bc.len() == 2 && is_eq(bc[0]) && is_tilde(bc[1]) {
2867        ecadd(WCB_COND(COND_REGEX as u32, 0));
2868        ecstr(a);
2869        ecstr(c);
2870        return 1;
2871    }
2872    // c:2692-2702 — `-OP` numeric-or-modular cond (e.g. `-eq`, `-nt`).
2873    if !bc.is_empty() && IS_DASH(bc[0]) {
2874        let rest: String = bc[1..].iter().collect();
2875        let t = get_cond_num(&rest);
2876        if t > -1 {
2877            ecadd(WCB_COND((t + COND_NT) as u32, 0));
2878            ecstr(a);
2879            ecstr(c);
2880            return 1;
2881        }
2882        ecadd(WCB_COND(COND_MODI as u32, 0));
2883        ecstr(b);
2884        ecstr(a);
2885        ecstr(c);
2886        return 1;
2887    }
2888    // c:2703-2707 — `-mod A B C` modular cond on `a`.
2889    let ac: Vec<char> = a.chars().collect();
2890    if !ac.is_empty() && IS_DASH(ac[0]) && ac.len() > 1 {
2891        ecadd(WCB_COND(COND_MOD as u32, 2));
2892        ecstr(a);
2893        ecstr(b);
2894        ecstr(c);
2895        return 1;
2896    }
2897    zerr(&format!("condition expected: {}", b));
2898    1
2899}
2900
2901/// Port of `par_cond_multi(char *a, LinkList l)` from `Src/parse.c:2716`.
2902/// Emits wordcode for `[ -OP A B C … ]` n-ary cond (alternation).
2903pub fn par_cond_multi(a: &str, l: &[String]) -> i32 {
2904    // c:2716 — `if (!IS_DASH(a[0]) || !a[1])`; same Dash/`-` dual
2905    // matching as par_cond_double, char-walked because Dash is a
2906    // single code point.
2907    let ac: Vec<char> = a.chars().collect();
2908    if ac.is_empty() || !IS_DASH(ac[0]) || ac.len() < 2 {
2909        zerr(&format!("condition expected: {}", a));
2910        return 1;
2911    }
2912    ecadd(WCB_COND(COND_MOD as u32, l.len() as u32));
2913    ecstr(a);
2914    for item in l {
2915        ecstr(item);
2916    }
2917    1
2918}
2919
2920/// Emit a parser-level error. Direct port of zsh/Src/parse.c
2921/// 2733-2766 `yyerror`. C version fills a per-event error buffer
2922/// and sets errflag. zshrs pushes onto errors which the
2923/// caller drains via parse()'s Result return.
2924/// WARNING: param-name divergence — Rust takes `&str message`, C takes
2925/// `int noerr`. The Rust callers pass user-meaningful messages
2926/// (`"missing ]]"`, `"condition expected"`); the C body collects the
2927/// offending token via `dupstring(zshlextext)` for the error string.
2928/// This Rust adapter:
2929///   1. Uses the caller-supplied message verbatim if non-empty.
2930///   2. Skips the `histdone & HISTFLAG_NOEXEC` and `errflag & ERRFLAG_INT`
2931///      gates per c:2746 (printing only when neither is set) — the
2932///      ERRFLAG_INT check is the load-bearing guard.
2933///   3. Sets ERRFLAG_ERROR per c:2753 (noerr=0 path always taken).
2934pub fn yyerror(msg: &str) {
2935    // c:2733
2936    let int_flagged = (errflag.load(Ordering::SeqCst) & crate::ported::zsh_h::ERRFLAG_INT) != 0;
2937    if !int_flagged {
2938        // c:2746
2939        let body = if msg.is_empty() {
2940            "parse error".to_string()
2941        }
2942        // c:2751
2943        else {
2944            format!("parse error: {msg}")
2945        }; // c:2748
2946        zwarnnam("zsh", &body);
2947    }
2948    // c:2753 — `if (!noerr && noerrs != 2) errflag |= ERRFLAG_ERROR;`
2949    errflag.fetch_or(crate::ported::zsh_h::ERRFLAG_ERROR, Ordering::SeqCst);
2950}
2951
2952// ============================================================
2953// Eprog runtime ops (parse.c:2767-2853)
2954//
2955// dupeprog / useeprog / freeeprog are zsh's reference-counting
2956// helpers for executable programs. zshrs's AST is owned by
2957// value (Rust ownership); cloning is a tree-deep copy via
2958// Clone, "use" is a no-op (the executor borrows the AST), and
2959// "free" is automatic on drop.
2960// ============================================================
2961
2962/// Duplicate an Eprog. Direct port of zsh/Src/parse.c:2813
2963/// Port of `Eprog dupeprog(Eprog p, int heap)` from
2964/// `Src/parse.c:2767`. Deep-copies the wordcode array, string
2965/// table, and pattern-prog slots. `dummy_eprog` is returned
2966/// unchanged. `heap`-allocated copies get `nref = -1` (never
2967/// freed); real ones get `nref = 1`.
2968pub fn dupeprog(p: &eprog, heap: bool) -> eprog {
2969    // c:2774-2775 — `if (p == &dummy_eprog) return p;` — caller-
2970    // observable identity in C uses a pointer compare; Rust's
2971    // equivalent is "if it has the dummy's shape (single WCB_END
2972    // word and no strs), return a copy of the same shape".
2973    // c:2796-2797 — `for (i = r->npats; i--; pp++) *pp = dummy_patprog1;`
2974    // C uses `dummy_patprog1` as a placeholder; the Rust port has
2975    // `Vec<Patprog>` (Box<patprog>) — synthesize an equivalent zero-
2976    // initialized patprog for each slot (resolved later by
2977    // pattern.c::patcompile-on-first-use).
2978    let dummy_pat = || crate::ported::zsh_h::patprog {
2979        startoff: 0,
2980        size: 0,
2981        mustoff: 0,
2982        patmlen: 0,
2983        globflags: 0,
2984        globend: 0,
2985        flags: 0,
2986        patnpar: 0,
2987        patstartch: 0,
2988    };
2989    let r = eprog {
2990        // c:2778 — `flags = (heap ? EF_HEAP : EF_REAL) | (p->flags & EF_RUN);`
2991        flags: (if heap { EF_HEAP } else { EF_REAL }) | (p.flags & EF_RUN),
2992        len: p.len,
2993        npats: p.npats,
2994        // c:2787 — `nref = heap ? -1 : 1;`
2995        nref: if heap { -1 } else { 1 },
2996        prog: p.prog.clone(),
2997        strs: p.strs.clone(),
2998        pats: (0..p.npats).map(|_| Box::new(dummy_pat())).collect(),
2999        shf: None,
3000        dump: None,
3001    };
3002    r
3003}
3004
3005/// Port of `void useeprog(Eprog p)` from `Src/parse.c:2813`.
3006/// `if (p && p != &dummy_eprog && p->nref >= 0) p->nref++;` —
3007/// pin a real (non-heap, non-dummy) Eprog so it survives the
3008/// next `freeeprog`.
3009pub fn useeprog(p: &mut eprog) {
3010    // c:2815 — `if (p && p != &dummy_eprog && p->nref >= 0)`
3011    if p.nref >= 0 {
3012        p.nref += 1; // c:2816
3013    }
3014}
3015
3016/// Port of `void freeeprog(Eprog p)` from `Src/parse.c:2823`.
3017/// Refcount-decrement; when it hits zero, drops the pattern progs,
3018/// decrements the dump refcount if any, and releases the eprog.
3019/// `dummy_eprog` is never freed. Heap-eprogs (`nref < 0`) are
3020/// never freed either — they live as long as the heap arena.
3021pub fn freeeprog(p: &mut eprog) {
3022    // c:2829 — `if (p && p != &dummy_eprog) { ... }`
3023    if p.nref > 0 {
3024        p.nref -= 1; // c:2832
3025        if p.nref == 0 {
3026            // c:2833-2840 — drop pats, dump refcount, then the eprog.
3027            // Rust's Drop handles the per-field cleanup; we just
3028            // need to decrement the dump count first.
3029            if let Some(dump) = p.dump.take() {
3030                let dumped = (*dump).clone();
3031                decrdumpcount(&dumped); // c:2837
3032            }
3033            p.prog.clear();
3034            p.strs = None;
3035            p.pats.clear();
3036        }
3037    }
3038}
3039
3040// =============================================================================
3041// Wordcode read helpers — used by text.rs's `gettext2` and exec dispatch
3042// to walk a compiled Eprog without re-running the parser. These are the
3043// only `Src/parse.c` functions ported so far in this file; the recursive-
3044// descent parser (par_event / par_list / par_cmd / par_*) follows
3045// below as free ported at module scope.
3046// =============================================================================
3047
3048/// Port of `ecgetstr(Estate s, int dup, int *tokflag)` from `Src/parse.c:2855`.
3049/// `s->pc` advances through the wordcode buffer; `s->strs` indexes the
3050/// string pool. Returns the interned string (or a 1-3-char literal
3051/// inlined directly into the wordcode word).
3052pub fn ecgetstr(s: &mut estate, dup: i32, tokflag: Option<&mut i32>) -> String {
3053    let prog = &s.prog.prog;
3054    if s.pc >= prog.len() {
3055        return String::new();
3056    }
3057    let c = prog[s.pc]; // c:2858 `wordcode c = *s->pc++;`
3058    s.pc += 1;
3059    if let Some(tf) = tokflag {
3060        *tf = i32::from((c & 1) != 0); // c:2880 `*tokflag = (c & 1);`
3061    }
3062    if c == 6 || c == 7 {
3063        // c:2861 `if (c == 6 || c == 7) r = "";`
3064        return String::new();
3065    }
3066    let r: String = if (c & 2) != 0 {
3067        // c:2862 — `else if (c & 2)`
3068        // c:2863-2868 — 3-byte inline string packed into the wordcode
3069        // word; followed by `buf[3] = '\0'; r = dupstring(buf);`.
3070        // C's `dupstring` uses `strlen(buf)` which TRUNCATES at the
3071        // first NUL byte — short strings of 1 or 2 chars get padded
3072        // with NULs and truncated cleanly. The previous Rust port
3073        // used `retain(|&x| x != 0)` which would silently SPLICE OUT
3074        // an interior NUL (e.g. `[a, 0, b]` → "ab"), diverging from
3075        // C's strlen-truncate (`[a, 0, b]` → "a"). Fix: truncate at
3076        // first NUL to match C exactly.
3077        let b0 = ((c >> 3) & 0xff) as u8;
3078        let b1 = ((c >> 11) & 0xff) as u8;
3079        let b2 = ((c >> 19) & 0xff) as u8;
3080        let v = [b0, b1, b2];
3081        let end = v.iter().position(|&x| x == 0).unwrap_or(v.len()); // c:2869 strlen(buf)
3082        String::from_utf8_lossy(&v[..end]).into_owned()
3083    } else {
3084        // c:2877 `else r = s->strs + (c >> 2);`
3085        let off = (c >> 2) as usize + s.strs_offset;
3086        let strs_bytes = s.strs.as_deref().unwrap_or("").as_bytes();
3087        if off >= strs_bytes.len() {
3088            String::new()
3089        } else {
3090            let tail = &strs_bytes[off..];
3091            let end = tail.iter().position(|&b| b == 0).unwrap_or(tail.len());
3092            String::from_utf8_lossy(&tail[..end]).into_owned()
3093        }
3094    };
3095    // c:2891 `return ((dup == EC_DUP || (dup && (c & 1))) ? dupstring(r) : r);`
3096    // Rust owns the String already; `dup` flag has no observable effect.
3097    let _ = (dup, EC_DUP, EC_NODUP);
3098    r
3099}
3100
3101// ============================================================
3102// Wordcode runtime getters (parse.c:2853-3060)
3103//
3104// Direct ports of the wordcode-read helpers (ecrawstr,
3105// ecgetstr, ecgetarr, ecgetredirs, ecgetlist, eccopyredirs).
3106// Read packed wordcode out of an Eprog at execution time.
3107// Used by exec_wordcode and the wordcode-walking dispatch in
3108// src/vm_helper.
3109// ============================================================
3110
3111/// Port of `ecrawstr(Eprog p, Wordcode pc, int *tokflag)` from
3112/// `Src/parse.c:2891`. Like `ecgetstr` but reads at the given pc
3113/// without advancing — caller steps `pc` separately.
3114pub fn ecrawstr(p: &eprog, pc: usize, tokflag: Option<&mut i32>) -> String {
3115    if pc >= p.prog.len() {
3116        return String::new();
3117    }
3118    let c = p.prog[pc]; // c:2894
3119    if let Some(tf) = tokflag {
3120        *tf = i32::from((c & 1) != 0); // c:2898/2906/2912
3121    }
3122    if c == 6 || c == 7 {
3123        // c:2897
3124        return String::new();
3125    }
3126    if (c & 2) != 0 {
3127        // c:2902-2906 — same 3-byte inline string as ecgetstr, then
3128        // `buf[3] = '\0'; return dupstring(buf);` — truncate at first
3129        // NUL via strlen (NOT splice out interior NULs).
3130        let b0 = ((c >> 3) & 0xff) as u8;
3131        let b1 = ((c >> 11) & 0xff) as u8;
3132        let b2 = ((c >> 19) & 0xff) as u8;
3133        let v = [b0, b1, b2];
3134        let end = v.iter().position(|&x| x == 0).unwrap_or(v.len()); // c:2906 strlen(buf)
3135        String::from_utf8_lossy(&v[..end]).into_owned()
3136    } else {
3137        // c:2911
3138        let off = (c >> 2) as usize;
3139        let strs_bytes = p.strs.as_deref().unwrap_or("").as_bytes();
3140        if off >= strs_bytes.len() {
3141            return String::new();
3142        }
3143        let tail = &strs_bytes[off..];
3144        let end = tail.iter().position(|&b| b == 0).unwrap_or(tail.len());
3145        String::from_utf8_lossy(&tail[..end]).into_owned()
3146    }
3147}
3148
3149/// Port of `ecgetarr(Estate s, int num, int dup, int *tokflag)` from
3150/// `Src/parse.c:2917`. Reads `num` strings from wordcode at `s->pc`
3151/// and OR-folds each entry's token flag into `*tokflag`.
3152pub fn ecgetarr(s: &mut estate, num: usize, dup: i32, tokflag: Option<&mut i32>) -> Vec<String> {
3153    let mut ret: Vec<String> = Vec::with_capacity(num); // c:2922
3154    let mut tf: i32 = 0;
3155    for _ in 0..num {
3156        // c:2924 `while (num--)`
3157        let mut tmp = 0;
3158        ret.push(ecgetstr(s, dup, Some(&mut tmp))); // c:2925
3159        tf |= tmp; // c:2926
3160    }
3161    if let Some(out) = tokflag {
3162        // c:2929
3163        *out = tf;
3164    }
3165    ret
3166}
3167
3168/// Port of `ecgetlist(Estate s, int num, int dup, int *tokflag)` from
3169/// `Src/parse.c:2937`. Same shape as `ecgetarr` but C returns
3170/// `LinkList`; zshrs uses `Vec<String>` for both.
3171pub fn ecgetlist(s: &mut estate, num: usize, dup: i32, tokflag: Option<&mut i32>) -> Vec<String> {
3172    if num == 0 {
3173        // c:2949-2952
3174        if let Some(tf) = tokflag {
3175            *tf = 0;
3176        }
3177        return Vec::new();
3178    }
3179    ecgetarr(s, num, dup, tokflag)
3180}
3181
3182/// Port of `ecgetredirs(Estate s)` from `Src/parse.c:2959`.
3183///
3184/// `strs` must be the same tail `ecgetstr` uses (`s->strs` / `estate.strs` from offset).
3185/// WARNING: param names don't match C — Rust=(prog, strs, pc) vs C=(s)
3186pub fn ecgetredirs(s: &mut estate) -> Vec<redir> {
3187    let mut ret: Vec<redir> = Vec::new(); // c:2959 `LinkList ret = newlinklist();`
3188    let prog_len = s.prog.prog.len();
3189    if s.pc >= prog_len {
3190        return ret;
3191    }
3192    let mut code = s.prog.prog[s.pc]; // c:2962 `wordcode code = *s->pc++;`
3193    s.pc += 1;
3194
3195    loop {
3196        if wc_code(code) != WC_REDIR {
3197            // c:2988-2989 `s->pc--` then break from while
3198            s.pc = s.pc.saturating_sub(1);
3199            break;
3200        }
3201
3202        let typ = WC_REDIR_TYPE(code); // c:2967 `r->type = WC_REDIR_TYPE(code);`
3203        if s.pc >= prog_len {
3204            break;
3205        }
3206        let fd1_w = s.prog.prog[s.pc]; // c:2968 `r->fd1 = *s->pc++;`
3207        s.pc += 1;
3208
3209        let name = ecgetstr(s, EC_DUP, None); // c:2969 `r->name = ecgetstr(...)`
3210
3211        let (flags, here_terminator, munged_here_terminator) = if WC_REDIR_FROM_HEREDOC(code) != 0 {
3212            // c:2970-2973
3213            let term = ecgetstr(s, EC_DUP, None);
3214            let munged = ecgetstr(s, EC_DUP, None);
3215            (REDIRF_FROM_HEREDOC, Some(term), Some(munged))
3216        } else {
3217            // c:2974-2977
3218            (0, None, None)
3219        };
3220
3221        let varid = if WC_REDIR_VARID(code) != 0 {
3222            // c:2979-2980
3223            Some(ecgetstr(s, EC_DUP, None))
3224        } else {
3225            None // c:2981-2982
3226        };
3227
3228        ret.push(redir {
3229            // c:2965-2982 fields + c:2984 `addlinknode`
3230            typ,
3231            flags,
3232            fd1: fd1_w as i32,
3233            fd2: 0,
3234            name: Some(name),
3235            varid,
3236            here_terminator,
3237            munged_here_terminator,
3238        });
3239
3240        if s.pc >= prog_len {
3241            break;
3242        }
3243        code = s.prog.prog[s.pc]; // c:2986 `code = *s->pc++;`
3244        s.pc += 1;
3245    }
3246
3247    ret // c:2990 `return ret`
3248}
3249
3250/// Port of `eccopyredirs(Estate s)` from `Src/parse.c:3003`. Reads
3251/// the WC_REDIR run at `s->pc`, counts the wordcodes needed,
3252/// reserves space in `ecbuf` via `ecispace`, then re-walks `s->pc`
3253/// re-emitting each redir's wordcodes into the reserved slot —
3254/// finally calls `bld_eprog(0)` to package the result as an Eprog.
3255pub fn eccopyredirs(s: &mut estate) -> Option<eprog> {
3256    let prog_len = s.prog.prog.len();
3257    if s.pc >= prog_len {
3258        return None;
3259    }
3260    // c:3007-3009 — `if (wc_code(*pc) != WC_REDIR) return NULL;`
3261    let first_code = s.prog.prog[s.pc];
3262    if wc_code(first_code) != WC_REDIR {
3263        return None;
3264    }
3265    // c:3011 — `init_parse();`
3266    init_parse();
3267
3268    // c:3013-3027 — count wordcodes the redir run will need.
3269    // Each WC_REDIR contributes `code + fd1 + name` = 3, plus
3270    // `+2` if WC_REDIR_FROM_HEREDOC (terminator + munged), plus
3271    // `+1` if WC_REDIR_VARID.
3272    let mut probe = s.pc;
3273    let mut ncodes = 0usize;
3274    loop {
3275        if probe >= prog_len {
3276            break;
3277        }
3278        let code = s.prog.prog[probe];
3279        if wc_code(code) != WC_REDIR {
3280            break;
3281        }
3282        let mut ncode = if WC_REDIR_FROM_HEREDOC(code) != 0 {
3283            5
3284        } else {
3285            3
3286        };
3287        if WC_REDIR_VARID(code) != 0 {
3288            ncode += 1;
3289        }
3290        probe += ncode;
3291        ncodes += ncode;
3292    }
3293
3294    // c:3028-3029 — `r = ecused; ecispace(r, ncodes);`
3295    let r0 = ECUSED.get() as usize;
3296    ecispace(r0, ncodes);
3297
3298    // c:3031-3053 — re-walk `s->pc` and write into ecbuf[r..].
3299    let mut r = r0;
3300    loop {
3301        if s.pc >= prog_len {
3302            break;
3303        }
3304        let code = s.prog.prog[s.pc];
3305        if wc_code(code) != WC_REDIR {
3306            break;
3307        }
3308        s.pc += 1;
3309        // c:3036 — `ecbuf[r++] = code;`
3310        ECBUF.with_borrow_mut(|buf| {
3311            if r >= buf.len() {
3312                buf.resize(r + 1, 0);
3313            }
3314            buf[r] = code;
3315        });
3316        r += 1;
3317        // c:3038 — `ecbuf[r++] = *s->pc++;` (the fd1 word)
3318        let fd1 = s.prog.prog[s.pc];
3319        s.pc += 1;
3320        ECBUF.with_borrow_mut(|buf| {
3321            if r >= buf.len() {
3322                buf.resize(r + 1, 0);
3323            }
3324            buf[r] = fd1;
3325        });
3326        r += 1;
3327        // c:3041 — `ecbuf[r++] = ecstrcode(ecgetstr(s, EC_NODUP, NULL));`
3328        let name = ecgetstr(s, EC_NODUP, None);
3329        let nc = ecstrcode(&name);
3330        ECBUF.with_borrow_mut(|buf| {
3331            if r >= buf.len() {
3332                buf.resize(r + 1, 0);
3333            }
3334            buf[r] = nc;
3335        });
3336        r += 1;
3337        // c:3042-3047 — heredoc terminators.
3338        if WC_REDIR_FROM_HEREDOC(code) != 0 {
3339            let term = ecgetstr(s, EC_NODUP, None);
3340            let tc = ecstrcode(&term);
3341            ECBUF.with_borrow_mut(|buf| {
3342                if r >= buf.len() {
3343                    buf.resize(r + 1, 0);
3344                }
3345                buf[r] = tc;
3346            });
3347            r += 1;
3348            let munged = ecgetstr(s, EC_NODUP, None);
3349            let mc = ecstrcode(&munged);
3350            ECBUF.with_borrow_mut(|buf| {
3351                if r >= buf.len() {
3352                    buf.resize(r + 1, 0);
3353                }
3354                buf[r] = mc;
3355            });
3356            r += 1;
3357        }
3358        // c:3048-3049 — varid.
3359        if WC_REDIR_VARID(code) != 0 {
3360            let varid = ecgetstr(s, EC_NODUP, None);
3361            let vc = ecstrcode(&varid);
3362            ECBUF.with_borrow_mut(|buf| {
3363                if r >= buf.len() {
3364                    buf.resize(r + 1, 0);
3365                }
3366                buf[r] = vc;
3367            });
3368            r += 1;
3369        }
3370    }
3371
3372    // c:3056 — `return bld_eprog(0);` — `bld_eprog` appends the
3373    // WC_END marker and packages ECBUF/ECSTRS into an Eprog.
3374    Some(bld_eprog(false))
3375}
3376
3377/// Port of `init_eprog(void)` from `Src/parse.c:3069`. Sets up
3378/// `dummy_eprog_code = WCB_END(); dummy_eprog.len = sizeof(wordcode);
3379/// dummy_eprog.prog = &dummy_eprog_code; dummy_eprog.strs = NULL;`.
3380/// Called once at shell startup (init_main → init_misc → init_eprog).
3381pub fn init_eprog() {
3382    let mut d = DUMMY_EPROG.lock().unwrap();
3383    d.prog = vec![WCB_END()]; // c:3071/3073
3384    d.len = size_of::<wordcode>() as i32; // c:3072
3385    d.strs = None; // c:3074
3386    d.flags = 0;
3387    d.npats = 0;
3388    d.nref = 0;
3389}
3390
3391// =====================================================================
3392// `bin_zcompile` and wordcode-dump helpers — port of `Src/parse.c:3104+`.
3393//
3394// The wordcode dump format (`.zwc`) is a serialized parse tree zsh can
3395// `mmap()` and dispatch from without re-parsing on every shell start.
3396// File layout (one struct = `FD_PRELEN` `u32`s):
3397//   - `pre[0]` = magic word (FD_MAGIC native byte-order, FD_OMAGIC
3398//     opposite byte-order).
3399//   - `pre[1]` = packed `{flags(8) | other_offset(24)}` byte field.
3400//   - `pre[2..12]` = `ZSH_VERSION` C-string padded to 40 bytes.
3401//   - `pre[12]` = `fdheaderlen` (total prelude+header word count).
3402//   - Then a sequence of `struct fdhead` records, one per function,
3403//     each followed by its NUL-terminated name (padded to 4-byte).
3404//   - Then the wordcode bytes for every function back-to-back.
3405//
3406// On a little-endian host writing a dump twice: first `FD_MAGIC` for
3407// native readers, then re-walks the body byte-swapped and emits a
3408// second `FD_OMAGIC` copy so big-endian readers can mmap it too.
3409// =====================================================================
3410
3411// File-format constants — port of `Src/parse.c:3104-3150`.
3412
3413/// `#define FD_EXT ".zwc"` from `Src/parse.c:3104`.
3414pub const FD_EXT: &str = ".zwc";
3415
3416/// `#define FD_MINMAP 4096` from `Src/parse.c:3105`. mmap threshold
3417/// — `-M` mode only kicks in when the wordcode body is at least
3418/// this many bytes (otherwise read(2) is preferred).
3419pub const FD_MINMAP: usize = 4096;
3420
3421/// `#define FD_PRELEN 12` from `Src/parse.c:3107`. File-header
3422/// length in u32 words: magic + packed-flags-byte + 10 version words.
3423pub const FD_PRELEN: usize = 12;
3424
3425/// `#define FD_MAGIC 0x04050607` from `Src/parse.c:3108`. Sentinel
3426/// for native-byte-order dumps.
3427pub const FD_MAGIC: u32 = 0x04050607;
3428
3429/// `#define FD_OMAGIC 0x07060504` from `Src/parse.c:3109`. Sentinel
3430/// for opposite-byte-order dumps (byte-swapped FD_MAGIC).
3431pub const FD_OMAGIC: u32 = 0x07060504;
3432
3433/// `#define FDF_MAP 1` from `Src/parse.c:3111`. Bit set when the
3434/// dump should be `mmap()`-ed (`-M` flag) vs read normally (`-R`).
3435pub const FDF_MAP: u32 = 1;
3436
3437/// `#define FDF_OTHER 2` from `Src/parse.c:3112`. Bit indicating
3438/// this dump has an opposite-byte-order copy at `fdother(f)`.
3439pub const FDF_OTHER: u32 = 2;
3440
3441/// Port of `struct fdhead` from `Src/parse.c:3116`. One per function
3442/// inside a wordcode dump. All fields are `wordcode` (u32).
3443#[allow(non_camel_case_types)]
3444#[derive(Debug, Clone, Copy)]
3445pub struct fdhead {
3446    /// Offset (in u32 words) to the start of this function's
3447    /// wordcode body inside the dump.
3448    pub start: u32, // c:3117
3449    /// Wordcode-byte length of the body (excludes pattern-prog slots).
3450    pub len: u32, // c:3118
3451    /// Number of compiled patterns the body references.
3452    pub npats: u32, // c:3119
3453    /// Offset of the string table inside `prog->prog`.
3454    pub strs: u32, // c:3120
3455    /// Header-record length in u32 words (record + name).
3456    pub hlen: u32, // c:3121
3457    /// Packed `{ kshload_bits(2) | name_tail_offset(30) }` field.
3458    pub flags: u32, // c:3122
3459}
3460
3461/// `#define FDHF_KSHLOAD 1` from `Src/parse.c:3149`. Function-header
3462/// flag word — `-k` ksh-style autoload marker.
3463pub const FDHF_KSHLOAD: u32 = 1;
3464
3465/// `#define FDHF_ZSHLOAD 2` from `Src/parse.c:3150`. `-z` zsh-style
3466/// autoload marker.
3467pub const FDHF_ZSHLOAD: u32 = 2;
3468
3469/// Port of `struct wcfunc` from `Src/parse.c:3158`. Build-time
3470/// per-function aggregate before write_dump emits it. The Rust
3471/// port stores the source-text body inline since the C-side
3472/// `Eprog` ↔ `parse_string` chain isn't fully wired through this
3473/// layer yet (`build_dump` falls back to source-text caching).
3474#[allow(non_camel_case_types)]
3475#[derive(Debug, Clone)]
3476pub struct wcfunc {
3477    pub name: String, // c:3159
3478    pub flags: u32,   // c:3161
3479    /// Compiled body wordcode (one `u32` array per fn). Empty until
3480    /// the eprog emit-side lands; `write_dump` then walks each entry.
3481    pub body: Vec<u32>,
3482}
3483
3484/// Port of `dump_find_func(Wordcode h, char *name)` from
3485/// `Src/parse.c:3167`. Walks the header table inside a loaded
3486/// dump for a function with the given basename; returns true on hit.
3487pub fn dump_find_func(h: &[u32], name: &str) -> bool {
3488    // c:3167
3489    let header_words = fdheaderlen(h) as usize;
3490    let end = header_words; // walking u32 offsets, end-exclusive
3491    let mut cur = firstfdhead_offset();
3492    while cur < end {
3493        if let Some(fh) = read_fdhead(h, cur) {
3494            let full = fdname(h, cur);
3495            let tail = fdhtail(&fh) as usize;
3496            let basename = if tail <= full.len() {
3497                &full[tail..]
3498            } else {
3499                ""
3500            };
3501            if basename == name {
3502                return true;
3503            }
3504            cur = nextfdhead_offset(h, cur);
3505        } else {
3506            break;
3507        }
3508    }
3509    false
3510}
3511
3512/// Port of `bin_zcompile(char *nam, char **args, Options ops, UNUSED(int func))`
3513/// from `Src/parse.c:3180`. Validates the option set, then dispatches
3514/// to one of: `-t` (test/list), `-c`/`-a` (dump current functions),
3515/// or the default (compile source files to `.zwc`).
3516pub fn bin_zcompile(
3517    nam: &str, // c:3180
3518    args: &[String],
3519    ops: &crate::ported::zsh_h::options,
3520    _func: i32,
3521) -> i32 {
3522    // c:3185-3192 — illegal-combination guard.
3523    if (OPT_ISSET(ops, b'k') && OPT_ISSET(ops, b'z'))
3524        || (OPT_ISSET(ops, b'R') && OPT_ISSET(ops, b'M'))
3525        || (OPT_ISSET(ops, b'c')
3526            && (OPT_ISSET(ops, b'U') || OPT_ISSET(ops, b'k') || OPT_ISSET(ops, b'z')))
3527        || (!(OPT_ISSET(ops, b'c') || OPT_ISSET(ops, b'a')) && OPT_ISSET(ops, b'm'))
3528    {
3529        zwarnnam(nam, "illegal combination of options"); // c:3192
3530        return 1;
3531    }
3532
3533    // c:3194 — `-c`/`-a` + KSHAUTOLOAD warning.
3534    if (OPT_ISSET(ops, b'c') || OPT_ISSET(ops, b'a')) && isset(crate::ported::zsh_h::KSHAUTOLOAD) {
3535        zwarnnam(nam, "functions will use zsh style autoloading"); // c:3195
3536    }
3537
3538    // c:3196-3197 — flag word from `-k` / `-z`.
3539    let flags: u32 = if OPT_ISSET(ops, b'k') {
3540        FDHF_KSHLOAD
3541    } else if OPT_ISSET(ops, b'z') {
3542        FDHF_ZSHLOAD
3543    } else {
3544        0
3545    };
3546
3547    // c:3199 — `-t` test/list mode.
3548    if OPT_ISSET(ops, b't') {
3549        // c:3199
3550        if args.is_empty() {
3551            zwarnnam(nam, "too few arguments"); // c:3202
3552            return 1;
3553        }
3554        let dump_name = if args[0].ends_with(FD_EXT) {
3555            args[0].clone()
3556        } else {
3557            format!("{}{}", args[0], FD_EXT)
3558        };
3559        let f = match load_dump_header(nam, &dump_name, 1) {
3560            // c:3206
3561            Some(buf) => buf,
3562            None => return 1,
3563        };
3564        // c:3209 — per-function check.
3565        if args.len() > 1 {
3566            for name in &args[1..] {
3567                // c:3210
3568                if !dump_find_func(&f, name) {
3569                    // c:3212
3570                    return 1;
3571                }
3572            }
3573            return 0;
3574        }
3575        // c:3215-3221 — listing arm. Walk every fdhead, print
3576        // each function's full name. C uses `fdname(h)` which
3577        // includes the path prefix; matches our `fdname()` impl.
3578        let mapped = if (fdflags(&f) & FDF_MAP) != 0 {
3579            "mapped"
3580        } else {
3581            "read"
3582        };
3583        println!("zwc file ({}) for zsh-{}", mapped, fdversion(&f));
3584        let header_words = fdheaderlen(&f) as usize;
3585        let mut cur = firstfdhead_offset();
3586        while cur < header_words {
3587            if read_fdhead(&f, cur).is_none() {
3588                break;
3589            }
3590            println!("{}", fdname(&f, cur));
3591            cur = nextfdhead_offset(&f, cur);
3592        }
3593        return 0;
3594    }
3595
3596    if args.is_empty() {
3597        zwarnnam(nam, "too few arguments"); // c:3226
3598        return 1;
3599    }
3600
3601    // c:3228 — map mode discriminant.
3602    let map: i32 = if OPT_ISSET(ops, b'M') {
3603        2
3604    } else if OPT_ISSET(ops, b'R') {
3605        0
3606    } else {
3607        1
3608    };
3609
3610    // c:3230-3236 — single-file default-mode short path.
3611    if args.len() == 1 && !(OPT_ISSET(ops, b'c') || OPT_ISSET(ops, b'a')) {
3612        let dump = format!("{}{}", args[0], FD_EXT);
3613        return build_dump(nam, &dump, args, OPT_ISSET(ops, b'U') as i32, map, flags);
3614    }
3615
3616    // c:3239-3247 — multi-file or `-c`/`-a` mode.
3617    let dump = if args[0].ends_with(FD_EXT) {
3618        args[0].clone()
3619    } else {
3620        format!("{}{}", args[0], FD_EXT)
3621    };
3622    let rest = &args[1..];
3623    if OPT_ISSET(ops, b'c') || OPT_ISSET(ops, b'a') {
3624        let what =
3625            (if OPT_ISSET(ops, b'c') { 1 } else { 0 }) | (if OPT_ISSET(ops, b'a') { 2 } else { 0 });
3626        build_cur_dump(nam, &dump, rest, OPT_ISSET(ops, b'm') as i32, map, what)
3627    } else {
3628        build_dump(nam, &dump, rest, OPT_ISSET(ops, b'U') as i32, map, flags)
3629    }
3630}
3631
3632/// Port of `load_dump_header(char *nam, char *name, int err)` from
3633/// `Src/parse.c:3258`. Opens the file, reads + validates the magic
3634/// and version, then slurps the full header table into memory.
3635/// Returns the header u32-array on success or None on any failure
3636/// (emitting C-shaped warnings when `err != 0`).
3637pub fn load_dump_header(nam: &str, name: &str, err: i32) -> Option<Vec<u32>> {
3638    // c:3258
3639
3640    let mut f = match File::open(name) {
3641        // c:3263
3642        Ok(h) => h,
3643        Err(_) => {
3644            if err != 0 {
3645                zwarnnam(nam, &format!("can't open zwc file: {}", name)); // c:3265
3646            }
3647            return None;
3648        }
3649    };
3650
3651    // Read FD_PRELEN+1 u32 words = 52 bytes.
3652    let mut buf_bytes = vec![0u8; (FD_PRELEN + 1) * 4];
3653    if f.read_exact(&mut buf_bytes).is_err() {
3654        if err != 0 {
3655            zwarnnam(nam, &format!("invalid zwc file: {}", name)); // c:3277
3656        }
3657        return None;
3658    }
3659    let mut buf: Vec<u32> = buf_bytes
3660        .chunks_exact(4)
3661        .map(|c| u32::from_le_bytes([c[0], c[1], c[2], c[3]]))
3662        .collect();
3663
3664    // c:3270 — magic + version check. `ZSH_VERSION` (the C-side
3665    // global) — zshrs reports "5.9" in `--zsh` mode (Src/init.c parity).
3666    let magic_ok = fdmagic(&buf) == FD_MAGIC || fdmagic(&buf) == FD_OMAGIC;
3667    let v_ok = fdversion(&buf) == "5.9";
3668    if !magic_ok {
3669        if err != 0 {
3670            zwarnnam(nam, &format!("invalid zwc file: {}", name)); // c:3277
3671        }
3672        return None;
3673    }
3674    if !v_ok {
3675        if err != 0 {
3676            zwarnnam(
3677                nam,
3678                &format!(
3679                    "zwc file has wrong version (zsh-{}): {}", // c:3274
3680                    fdversion(&buf),
3681                    name
3682                ),
3683            );
3684        }
3685        return None;
3686    }
3687
3688    // c:3285 — if magic matches host byte order, head len is `pre[FD_PRELEN]`.
3689    // Else seek to `fdother(buf)` and re-read.
3690    if fdmagic(&buf) != FD_MAGIC {
3691        let other = fdother(&buf) as u64; // c:3290
3692        if f.seek(SeekFrom::Start(other)).is_err() || f.read_exact(&mut buf_bytes).is_err() {
3693            zwarnnam(nam, &format!("invalid zwc file: {}", name)); // c:3295
3694            return None;
3695        }
3696        buf = buf_bytes
3697            .chunks_exact(4)
3698            .map(|c| u32::from_le_bytes([c[0], c[1], c[2], c[3]]))
3699            .collect();
3700    }
3701
3702    let total_words = fdheaderlen(&buf) as usize; // c:3286/3299
3703    if total_words < FD_PRELEN + 1 {
3704        zwarnnam(nam, &format!("invalid zwc file: {}", name));
3705        return None;
3706    }
3707
3708    // Read the remaining header words.
3709    let mut head: Vec<u32> = Vec::with_capacity(total_words);
3710    head.extend_from_slice(&buf);
3711    let remaining_words = total_words - (FD_PRELEN + 1);
3712    if remaining_words > 0 {
3713        let mut rest_bytes = vec![0u8; remaining_words * 4]; // c:3305
3714        if f.read_exact(&mut rest_bytes).is_err() {
3715            zwarnnam(nam, &format!("invalid zwc file: {}", name)); // c:3307
3716            return None;
3717        }
3718        for c in rest_bytes.chunks_exact(4) {
3719            head.push(u32::from_le_bytes([c[0], c[1], c[2], c[3]]));
3720        }
3721    }
3722    Some(head) // c:3311
3723}
3724
3725/// Port of `fdswap(Wordcode p, int n)` from `Src/parse.c:3318`.
3726/// Byte-swap each u32 in `p[..n]` in place. Used when writing the
3727/// opposite-byte-order copy of a wordcode dump.
3728pub fn fdswap(p: &mut [u32]) {
3729    // c:3318
3730    for w in p.iter_mut() {
3731        *w = w.swap_bytes();
3732    }
3733}
3734
3735/// Port of `write_dump(int dfd, LinkList progs, int map, int hlen, int tlen)`
3736/// from `Src/parse.c:3334`. Writes the prelude + header records +
3737/// body wordcode bytes to the dump file descriptor.
3738///
3739/// Two passes: first native-byte-order (`FD_MAGIC`), then opposite-
3740/// byte-order (`FD_OMAGIC`) so big-endian readers can mmap the
3741/// same file. Bodies are byte-swapped via `fdswap` on the second pass.
3742pub fn write_dump(
3743    dfd: &mut File, // c:3334
3744    progs: &[wcfunc],
3745    mut map: i32,
3746    hlen: i32,
3747    tlen: i32,
3748) -> std::io::Result<()> {
3749    if map == 1 && (tlen as usize) >= FD_MINMAP {
3750        // c:3344
3751        map = 1;
3752    } else if map == 1 {
3753        map = 0;
3754    }
3755
3756    let mut other = 0u32; // c:3338
3757    let ohlen = hlen;
3758    let mut cur_hlen = hlen;
3759
3760    loop {
3761        cur_hlen = ohlen;
3762        // c:3347 — build the prelude.
3763        let mut pre = vec![0u32; FD_PRELEN];
3764        pre[0] = if other != 0 { FD_OMAGIC } else { FD_MAGIC }; // c:3350
3765        let flags = (if map != 0 { FDF_MAP } else { 0 }) | other;
3766        fdsetflags(&mut pre, flags as u8); // c:3351
3767        fdsetother(&mut pre, tlen as u32); // c:3352
3768                                           // c:3353 — copy ZSH_VERSION C-string into pre[2..].
3769        let ver = b"5.9";
3770        for (i, &b) in ver.iter().enumerate() {
3771            let word = 2 + i / 4;
3772            let shift = (i % 4) * 8;
3773            pre[word] |= (b as u32) << shift;
3774        }
3775        // Write prelude.
3776        for w in &pre {
3777            dfd.write_all(&w.to_le_bytes())?;
3778        }
3779        // c:3356 — per-fn header records.
3780        for wcf in progs {
3781            let n = &wcf.name;
3782            let prog = &wcf.body;
3783            let mut head = fdhead {
3784                start: cur_hlen as u32,                                     // c:3360
3785                len: (prog.len() * 4) as u32,                               // c:3363
3786                npats: 0, // c:3364 (npats not tracked yet)
3787                strs: 0,  // c:3365
3788                hlen: ((FDHEAD_WORDS as u32) + ((n.len() as u32 + 4) / 4)), // c:3366
3789                flags: 0,
3790            };
3791            cur_hlen += prog.len() as i32; // c:3361
3792                                           // c:3368 — name tail offset from path basename.
3793            let tail = n.rfind('/').map(|p| p + 1).unwrap_or(0);
3794            head.flags = fdhbldflags(wcf.flags, tail as u32); // c:3372
3795                                                              // c:3373 — opposite-byte-order swap on second pass.
3796            let mut head_words: Vec<u32> = vec![
3797                head.start, head.len, head.npats, head.strs, head.hlen, head.flags,
3798            ];
3799            if other != 0 {
3800                fdswap(&mut head_words);
3801            }
3802            for w in &head_words {
3803                dfd.write_all(&w.to_le_bytes())?;
3804            }
3805            // c:3376 — write the name + NUL + pad-to-4.
3806            dfd.write_all(n.as_bytes())?;
3807            dfd.write_all(&[0u8])?;
3808            let pad = (4 - ((n.len() + 1) & 3)) & 3;
3809            if pad > 0 {
3810                dfd.write_all(&vec![0u8; pad])?;
3811            }
3812        }
3813        // c:3381 — per-fn body words.
3814        for wcf in progs {
3815            let mut body = wcf.body.clone();
3816            if other != 0 {
3817                fdswap(&mut body);
3818            }
3819            for w in &body {
3820                dfd.write_all(&w.to_le_bytes())?;
3821            }
3822        }
3823        if other != 0 {
3824            // c:3389
3825            break;
3826        }
3827        other = FDF_OTHER; // c:3391
3828    }
3829    Ok(())
3830}
3831
3832/// Port of `build_dump(char *nam, char *dump, char **files, int ali, int map, int flags)`
3833/// from `Src/parse.c:3397`. Source-file → wordcode dump compiler.
3834///
3835/// Status: scaffolded but the wordcode-emit step depends on
3836/// `parse_string` returning a fully-wired `Eprog` with `prog/strs/
3837/// npats` fields populated. The current `parse_string`/`parse` shape
3838/// emits an AST (`ZshProgram`) but not yet the wordcode array C
3839/// expects in this dump format. Until that lands, this returns 1
3840/// with a clear "wordcode emit not yet ported" message so callers
3841/// (autoload from `.zwc`, `zcompile path/to/file`) fail loud.
3842pub fn build_dump(
3843    nam: &str, // c:3397
3844    dump: &str,
3845    _files: &[String],
3846    _ali: i32,
3847    _map: i32,
3848    _flags: u32,
3849) -> i32 {
3850    zwarnnam(nam, &format!("{}: wordcode dump emit not yet ported", dump));
3851    1
3852}
3853
3854/// Port of `cur_add_func(char *nam, Shfunc shf, LinkList names, LinkList progs, int *hlen, int *tlen, int what)`
3855/// from `Src/parse.c:3489`. Adds a shfunc to the in-build dump
3856/// progs+names lists. Stub: `Eprog` for the function body isn't
3857/// yet wired through `shfunc.funcdef` to be serializable here.
3858pub fn cur_add_func(
3859    nam: &str, // c:3489
3860    shf_name: &str,
3861    shf_flags: i32,
3862    names: &mut Vec<String>,
3863    progs: &mut Vec<wcfunc>,
3864    hlen: &mut i32,
3865    tlen: &mut i32,
3866    what: i32,
3867) -> i32 {
3868    let is_undef = (shf_flags as u32 & PM_UNDEFINED) != 0;
3869    if is_undef {
3870        if (what & 2) == 0 {
3871            // c:3498
3872            zwarnnam(nam, &format!("function is not loaded: {}", shf_name));
3873            return 1;
3874        }
3875        // c:3503 — would call `getfpfunc` to load body for dump.
3876        zwarnnam(nam, &format!("can't load function: {}", shf_name));
3877        return 1;
3878    } else if (what & 1) == 0 {
3879        zwarnnam(nam, &format!("function is already loaded: {}", shf_name)); // c:3514
3880        return 1;
3881    }
3882    // c:3517 — would `dupeprog(shf->funcdef)`. Stub: empty body.
3883    let wcf = wcfunc {
3884        name: shf_name.to_string(),
3885        flags: FDHF_ZSHLOAD,
3886        body: Vec::new(),
3887    };
3888    progs.push(wcf);
3889    names.push(shf_name.to_string());
3890
3891    // c:3526 — bump hlen / tlen.
3892    let name_words = (shf_name.len() as i32 + 4) / 4;
3893    *hlen += (FDHEAD_WORDS as i32) + name_words;
3894    *tlen += 0; // body is empty in stub; real path adds prog->len in words.
3895
3896    0
3897}
3898
3899/// Port of `build_cur_dump(char *nam, char *dump, char **names, int match, int map, int what)`
3900/// from `Src/parse.c:3536`. Compiles currently-loaded functions
3901/// (`-c` for functions, `-a` for aliases) into a `.zwc` dump.
3902/// Same wordcode-emit dependency as `build_dump`.
3903pub fn build_cur_dump(
3904    nam: &str, // c:3536
3905    dump: &str,
3906    _names: &[String],
3907    _match_: i32,
3908    _map: i32,
3909    _what: i32,
3910) -> i32 {
3911    zwarnnam(
3912        nam,
3913        &format!("{}: wordcode dump-current emit not yet ported", dump),
3914    );
3915    1
3916}
3917
3918/// Port of `zwcstat(char *filename, struct stat *buf)` from
3919/// `Src/parse.c:3656`. Stats a `.zwc` file, falling back to
3920/// `.zwc.old` if the primary doesn't exist (zsh uses the `.old`
3921/// suffix to keep a previous dump readable while a rewrite is in
3922/// progress).
3923pub fn zwcstat(filename: &str) -> Option<fs::Metadata> {
3924    // c:3656
3925    if let Ok(m) = fs::metadata(filename) {
3926        return Some(m);
3927    }
3928    let old = format!("{}.old", filename);
3929    fs::metadata(&old).ok()
3930}
3931
3932/// Port of `load_dump_file(char *dump, struct stat *sbuf, int other, int len)`
3933/// from `Src/parse.c:3675`. Reads (or mmap()'s) a complete `.zwc`
3934/// file into memory. Returns the u32 buffer or None on I/O error.
3935pub fn load_dump_file(
3936    dump: &str, // c:3675
3937    _sbuf: &fs::Metadata,
3938    other: i32,
3939    _len: usize,
3940) -> Option<Vec<u32>> {
3941    let mut f = File::open(dump).ok()?;
3942    if other != 0 {
3943        f.seek(SeekFrom::Start(other as u64)).ok()?;
3944    }
3945    let mut bytes = Vec::new();
3946    f.read_to_end(&mut bytes).ok()?;
3947    Some(
3948        bytes
3949            .chunks_exact(4)
3950            .map(|c| u32::from_le_bytes([c[0], c[1], c[2], c[3]]))
3951            .collect(),
3952    )
3953}
3954
3955/// Port of `try_dump_file(char *path, char *name, char *file, int *ksh, int test_only)`
3956/// from `Src/parse.c:3746`. Tries to load function `name` from a
3957/// `.zwc` digest (`<path>.zwc`) or per-function compiled file
3958/// (`<file>.zwc`) when each is newer than its uncompiled source.
3959pub fn try_dump_file(
3960    path: &str,
3961    name: &str,
3962    file: &str, // c:3746
3963    test_only: bool,
3964) -> Option<(Vec<u32>, bool)> {
3965    use std::fs;
3966
3967    // c:3753-3758 — if path ends in .zwc, treat as direct digest.
3968    if path.ends_with(FD_EXT) {
3969        crate::ported::signals::queue_signals();
3970        let result = fs::metadata(path)
3971            .ok()
3972            .and_then(|m| check_dump_file(path, &m, name, test_only));
3973        unqueue_signals();
3974        return result;
3975    }
3976
3977    // c:3759-3760 — dig = "<path>.zwc", wc = "<file>.zwc".
3978    let dig = format!("{}{}", path, FD_EXT);
3979    let wc = format!("{}{}", file, FD_EXT);
3980
3981    // c:3762-3764 — zwcstat(dig, &std); stat(wc, &stc); stat(file, &stn);
3982    let std_meta = fs::metadata(&dig);
3983    let stc_meta = fs::metadata(&wc);
3984    let stn_meta = fs::metadata(file);
3985
3986    crate::ported::signals::queue_signals();
3987
3988    // c:3771-3777 — try digest if newer than (or in absence of) wc/file.
3989    if let Ok(std_m) = &std_meta {
3990        let dig_mtime = std_m.modified().ok();
3991        let wc_newer_or_missing = match &stc_meta {
3992            Err(_) => true,
3993            Ok(c) => dig_mtime >= c.modified().ok(),
3994        };
3995        let src_newer_or_missing = match &stn_meta {
3996            Err(_) => true,
3997            Ok(n) => dig_mtime >= n.modified().ok(),
3998        };
3999        if wc_newer_or_missing && src_newer_or_missing {
4000            if let Some(prog) = check_dump_file(&dig, std_m, name, test_only) {
4001                unqueue_signals();
4002                return Some(prog);
4003            }
4004        }
4005    }
4006
4007    // c:3779-3784 — try per-function .zwc if newer than (or in absence of) source.
4008    if let Ok(stc_m) = &stc_meta {
4009        let wc_mtime = stc_m.modified().ok();
4010        let src_newer_or_missing = match &stn_meta {
4011            Err(_) => true,
4012            Ok(n) => wc_mtime >= n.modified().ok(),
4013        };
4014        if src_newer_or_missing {
4015            if let Some(prog) = check_dump_file(&wc, stc_m, name, test_only) {
4016                unqueue_signals();
4017                return Some(prog);
4018            }
4019        }
4020    }
4021
4022    unqueue_signals(); // c:3787
4023    None // c:3788
4024}
4025
4026/// Port of `try_source_file(char *file)` from `Src/parse.c:3795`.
4027/// Returns an Eprog (the wordcode dump body) if `<file>.zwc` exists
4028/// and is newer than `<file>`, else None.
4029pub fn try_source_file(file: &str) -> Option<String> {
4030    // c:3795
4031
4032    // c:3802-3805 — if ((tail = strrchr(file, '/'))) tail++; else tail = file;
4033    let tail = match file.rfind('/') {
4034        Some(i) => &file[i + 1..],
4035        None => file,
4036    };
4037
4038    // c:3807-3812 — if (strsfx(FD_EXT, file)) { ... return check_dump_file(file, NULL, tail, NULL, 0); }
4039    if file.ends_with(FD_EXT) {
4040        crate::ported::signals::queue_signals(); // c:3808
4041        let meta = fs::metadata(file);
4042        let prog = match meta {
4043            Ok(m) => check_dump_file(file, &m, tail, false).map(|(_, _)| file.to_string()), // c:3809
4044            Err(_) => None,
4045        };
4046        unqueue_signals(); // c:3810
4047        return prog;
4048    }
4049
4050    // c:3813 — wc = dyncat(file, FD_EXT);
4051    let wc = format!("{}{}", file, FD_EXT);
4052
4053    // c:3815-3816 — rc = stat(wc, &stc); rn = stat(file, &stn);
4054    let stc = fs::metadata(&wc);
4055    let stn = fs::metadata(file);
4056
4057    crate::ported::signals::queue_signals(); // c:3818
4058                                             // c:3819-3823 — if (!rc && (rn || stc.st_mtime >= stn.st_mtime) && (prog = check_dump_file(...))) return prog;
4059    if let Ok(meta_c) = &stc {
4060        let newer_than_src = match (&stc, &stn) {
4061            (Ok(c), Ok(n)) => c.modified().ok() >= n.modified().ok(),
4062            (Ok(_), Err(_)) => true, // c:3819 — `rn` (src missing) ⇒ accept .zwc
4063            _ => false,
4064        };
4065        if newer_than_src {
4066            let prog = check_dump_file(&wc, meta_c, tail, false); // c:3820
4067            if prog.is_some() {
4068                unqueue_signals(); // c:3821
4069                return Some(wc); // c:3822
4070            }
4071        }
4072    }
4073    unqueue_signals(); // c:3824
4074    None // c:3825
4075}
4076
4077/// Port of `Eprog check_dump_file(char *file, struct stat *sbuf,
4078/// char *name, int *ksh, int test_only)` from `Src/parse.c:3833`.
4079/// Walks the `dumps` mmap list looking for `(dev, ino)` matching
4080/// `sbuf`; on miss, calls `load_dump_header` to read the .zwc
4081/// header. Then `dump_find_func(d, name)` locates the function
4082/// table entry. Returns the wordcode slice + ksh-load flag.
4083///
4084/// ```c
4085/// Eprog
4086/// check_dump_file(char *file, struct stat *sbuf, char *name,
4087///                 int *ksh, int test_only)
4088/// {
4089///     int isrec = 0;
4090///     Wordcode d;
4091///     FDHead h;
4092///     FuncDump f;
4093///     struct stat lsbuf;
4094///     if (!sbuf) {
4095///         if (zwcstat(file, &lsbuf)) return NULL;
4096///         sbuf = &lsbuf;
4097///     }
4098///   rec:
4099///     d = NULL;
4100///     for (f = dumps; f; f = f->next)
4101///         if (f->dev == sbuf->st_dev && f->ino == sbuf->st_ino)
4102///             { d = f->map; break; }
4103///     if (!f && (isrec || !(d = load_dump_header(NULL, file, 0))))
4104///         return NULL;
4105///     if ((h = dump_find_func(d, name))) {
4106///         if (test_only) return &dummy_eprog;
4107///         /* allocate Eprog from f->map at h offset, incrdumpcount,
4108///            return prog */
4109///     }
4110///     return NULL;
4111/// }
4112/// ```
4113/// Rust port returns `Option<(Vec<u32>, bool)>` instead of the C
4114/// `Eprog` pointer + `*ksh` out-param: tuple element 0 is the
4115/// wordcode slice, element 1 is true if the function was a ksh-
4116/// loaded entry.
4117pub fn check_dump_file(
4118    // c:3833
4119    file: &str,
4120    sbuf: &fs::Metadata,
4121    name: &str,
4122    test_only: bool,
4123) -> Option<(Vec<u32>, bool)> {
4124    use std::os::unix::fs::MetadataExt;
4125
4126    // c:3842-3846 — `if (!sbuf) { zwcstat(file, &lsbuf); sbuf = &lsbuf; }`
4127    // Rust takes sbuf by &Metadata — never null.
4128    let dev = sbuf.dev(); // c:3859
4129    let ino = sbuf.ino(); // c:3859
4130
4131    // c:3854 — `d = NULL;`
4132    let mut d: Option<Vec<u32>> = None;
4133    let mut found_mmap = false; // c:3858 `for (f = dumps; f; ...)`
4134
4135    // c:3858-3862 — walk DUMPS for matching dev/ino.
4136    {
4137        let dumps_guard = DUMPS.lock().expect("dumps poisoned");
4138        for f in dumps_guard.iter() {
4139            // c:3858
4140            if f.dev == dev && f.ino == ino {
4141                // c:3859
4142                d = Some(f.map.clone()); // c:3860
4143                found_mmap = true;
4144                break; // c:3861
4145            }
4146        }
4147    }
4148
4149    // c:3870-3871 — `if (!f && (isrec || !(d = load_dump_header(NULL, file, 0)))) return NULL;`
4150    if !found_mmap {
4151        // c:3870
4152        match load_dump_header("", file, 0) {
4153            // c:3870 load_dump_header
4154            Some(loaded) => d = Some(loaded),
4155            None => return None, // c:3871
4156        }
4157    }
4158
4159    // c:3873 — `if ((h = dump_find_func(d, name)))`
4160    let dump = d?;
4161    if !dump_find_func(&dump, name) {
4162        // c:3873
4163        return None;
4164    }
4165
4166    // c:3876-3879 — `if (test_only) return &dummy_eprog;`
4167    if test_only {
4168        // c:3876
4169        return Some((Vec::new(), false)); // c:3879 dummy
4170    }
4171
4172    // c:3884-3953 — allocate Eprog from the mmap area + ksh detection.
4173    // The C source builds an `Eprog` struct wrapping the wordcode
4174    // slice at h's offset; the Rust port returns the slice directly
4175    // since Eprog construction lives at the call site (load_dump_file).
4176    // ksh-load detection reads the FDHF_KSHLOAD flag on the FDHead.
4177    // !!! STUB: FDHead parsing not yet wired through dump_find_func.
4178    let is_ksh_load = false; // c:3905 fdhflags(h) & FDHF_KSHLOAD
4179
4180    // c:3950 — incrdumpcount(f). The Rust incrdumpcount takes a
4181    // funcdump ref; look up the matching entry by dev/ino again.
4182    if found_mmap {
4183        let dumps_guard = DUMPS.lock().expect("dumps poisoned");
4184        if let Some(f) = dumps_guard.iter().find(|f| f.dev == dev && f.ino == ino) {
4185            incrdumpcount(f); // c:3899
4186        }
4187    }
4188
4189    Some((dump, is_ksh_load)) // c:3953
4190}
4191
4192/// Port of `incrdumpcount(FuncDump f)` from `Src/parse.c:3970/4021`.
4193/// `f->count++;` — refcount-up a loaded dump entry. The Rust port
4194/// keys lookup by `filename` because Rust can't raw-pointer-compare
4195/// funcdump values inside a `Mutex<Vec<...>>`; same observable
4196/// effect (the count of the matching entry increments).
4197pub fn incrdumpcount(f: &funcdump) {
4198    // c:3970 — `f->count++;`
4199    if let Some(d) = DUMPS
4200        .lock()
4201        .unwrap()
4202        .iter_mut()
4203        .find(|d| d.filename.as_deref() == f.filename.as_deref())
4204    {
4205        d.count += 1; // c:3973
4206    }
4207}
4208
4209/// Port of `freedump(FuncDump f)` from `Src/parse.c:3976`. Public
4210/// helper for the rare external caller; locks the dumps mutex and
4211/// drops the entry with the given filename.
4212pub fn freedump(f: &funcdump) {
4213    // c:3976
4214    let mut g = DUMPS.lock().unwrap();
4215    if let Some(name) = f.filename.as_deref() {
4216        freedump_locked(&mut g, name);
4217    }
4218}
4219
4220/// Port of `decrdumpcount(FuncDump f)` from `Src/parse.c:3988/4026`.
4221/// `f->count--; if (!f->count) { unlink from dumps; freedump(f); }`.
4222pub fn decrdumpcount(f: &funcdump) {
4223    // c:3988
4224    let key = f.filename.clone();
4225    let mut g = DUMPS.lock().unwrap();
4226    let mut hit_zero: Option<String> = None;
4227    for d in g.iter_mut() {
4228        if d.filename == key {
4229            d.count -= 1; // c:3991
4230            if d.count == 0 {
4231                // c:3992
4232                hit_zero = d.filename.clone();
4233            }
4234            break;
4235        }
4236    }
4237    if let Some(name) = hit_zero {
4238        // c:3994-4001
4239        freedump_locked(&mut g, &name);
4240    }
4241}
4242
4243/// Port of `closedumps(void)` from `Src/parse.c:4008/4033`. Walks
4244/// `dumps` freeing every entry. Called on shell exit (exec.c:522).
4245pub fn closedumps() {
4246    // c:4008
4247    let mut g = DUMPS.lock().unwrap();
4248    g.clear(); // c:4011-4014 `while (dumps) { ... freedump(...); ... }`
4249}
4250
4251/// Port of `dump_autoload(char *nam, char *file, int on, Options ops, int func)`
4252/// from `Src/parse.c:4042`. Registers every function in a `.zwc`
4253/// for autoload via `shfunctab`.
4254pub fn dump_autoload(
4255    nam: &str,
4256    file: &str, // c:4042
4257    on: i32,
4258    ops: &crate::ported::zsh_h::options,
4259    func: i32,
4260) -> i32 {
4261    use crate::ported::zsh_h::shfunc;
4262    let mut ret = 0; // c:4047
4263
4264    // c:4049-4050 — if (!strsfx(FD_EXT, file)) file = dyncat(file, FD_EXT);
4265    let file_owned;
4266    let file = if !file.ends_with(FD_EXT) {
4267        file_owned = format!("{}{}", file, FD_EXT);
4268        file_owned.as_str()
4269    } else {
4270        file
4271    };
4272
4273    // c:4052-4053 — if (!(h = load_dump_header(nam, file, 1))) return 1;
4274    let h = match load_dump_header(nam, file, 1) {
4275        Some(buf) => buf,
4276        None => return 1,
4277    };
4278
4279    // c:4055-4056 — for (n = firstfdhead(h); n < e; n = nextfdhead(n))
4280    let hlen = fdheaderlen(&h) as usize; // c:4055
4281    let mut n_off = firstfdhead_offset();
4282    while n_off < hlen {
4283        let head = match read_fdhead(&h, n_off) {
4284            Some(hd) => hd,
4285            None => break,
4286        };
4287        // c:4057-4061 — shf = zshcalloc; shf->node.flags = on; ...addnode(fdname + fdhtail)
4288        let name_full = fdname(&h, n_off);
4289        let tail = fdhtail(&head) as usize;
4290        let basename: String = name_full.chars().skip(tail).collect();
4291        let mut shf = shfunc {
4292            node: crate::ported::zsh_h::hashnode {
4293                next: None,
4294                nam: basename.clone(),
4295                flags: on, // c:4058
4296            },
4297            filename: None,
4298            lineno: 0,
4299            funcdef: None,
4300            redir: None,
4301            sticky: None, // c:4060 NULL
4302            body: None,
4303        };
4304        // c:4059 — shf->funcdef = mkautofn(shf);  (placeholder Eprog ptr)
4305        let _ = crate::ported::builtin::mkautofn(&mut shf as *mut _);
4306        // c:4061 — shfunctab->addnode(...)
4307        let snapshot = shf.clone();
4308        {
4309            let mut tab = crate::ported::hashtable::shfunctab_lock()
4310                .write()
4311                .expect("shfunctab poisoned");
4312            tab.add(shf);
4313        }
4314        // c:4062-4063 — if (OPT_ISSET(ops,'X') && eval_autoload(...)) ret = 1;
4315        if OPT_ISSET(ops, b'X') {
4316            let mut shf_ref = snapshot;
4317            if crate::ported::builtin::eval_autoload(&mut shf_ref as *mut _, &basename, ops, func)
4318                != 0
4319            {
4320                ret = 1;
4321            }
4322        }
4323        n_off = nextfdhead_offset(&h, n_off);
4324    }
4325    let _ = nam;
4326    ret // c:4065
4327}
4328
4329/// Port of C `struct eccstr` (zsh.h:836) — the long-string dedup BST
4330/// node. The dedup-walk and cmp logic in `ecstrcode` is faithful to
4331/// parse.c:447-453 including the conditional cmp chain
4332/// (nfunc → hashval → strcmp), so corpus inputs where C's eccstr BST walk
4333/// finds-or-misses match get the same outcome on the Rust side.
4334struct EccstrNode {
4335    left: Option<Box<EccstrNode>>,
4336    right: Option<Box<EccstrNode>>,
4337    /// C-byte form of the string (single byte per char ≤ 0xff).
4338    /// Owned because Rust doesn't have C zsh's "stable pointers into
4339    /// the lexer's tokstr arena" — every tokstr lives as a fresh
4340    /// Rust String allocation.
4341    str: Vec<u8>,
4342    /// Wordcode-encoded offset: `(byte_offset << 2) | token_bit`.
4343    /// Same shape as `Eccstr::offs` (parse.c:459).
4344    offs: u32,
4345    /// Absolute byte offset in the final strs region (= `ecsoffs` at
4346    /// insert time). C `Eccstr::aoffs` (parse.c:464). copy_ecstr uses
4347    /// THIS for the write position — distinct from `offs` which is
4348    /// ecssub-relative and collides across funcdef scopes.
4349    aoffs: u32,
4350    /// `nfunc` snapshot at insert time. Per-function namespace key
4351    /// — top-level scripts use 0; each funcdef bumps it.
4352    nfunc: i32,
4353    /// Hash of `str` computed via zsh's `hasher` (hashtable.c:86).
4354    hashval: u32,
4355}
4356// === end AST relocation ===
4357
4358// Parser state lives in file-scope thread_locals:
4359//   - LEX_* (lexer side, matching Src/lex.c file-statics)
4360//   - ECBUF / ECLEN / ECUSED / ECNPATS / ECSOFFS / ECSSUB / ECNFUNC /
4361//     ECSTRS_INDEX / ECSTRS_REVERSE (wordcode-emission state, matching
4362//     Src/parse.c file-statics)
4363//
4364// Callers use the free-fn entry points directly:
4365//   crate::ported::parse::parse_init(input);
4366//   let prog = crate::ported::parse::parse();
4367
4368const MAX_RECURSION_DEPTH: usize = 500;
4369
4370/// Direct port of `struct parse_stack` at `Src/zsh.h:3099-3109`.
4371/// Used by `parse_context_save` / `parse_context_restore`
4372/// (parse.c:295-355) to snapshot per-parse-call state so a nested
4373/// parse (e.g. inside command substitution) doesn't clobber the
4374/// outer parse.
4375///
4376/// A second port of `struct parse_stack` exists at
4377/// `crate::ported::zsh_h::parse_stack` (zsh.h:1066) using canonical
4378/// Wordcode / Eccstr / `struct heredocs` types — that port is unused
4379/// today and will become authoritative when Phase 9b (PORT_PLAN.md)
4380/// wires wordcode emission. This local version uses the working-set
4381/// shapes (`Vec<HereDoc>`, stubbed wordcode fields) suited to zshrs's
4382/// pre-wordcode AST architecture; the consolidation happens in P9b.
4383#[allow(non_camel_case_types)]
4384#[derive(Debug, Default, Clone)]
4385pub struct parse_stack {
4386    // ── Direct port of struct parse_stack at zsh.h:3099-3109 ──
4387    /// Pending heredocs awaiting body collection (canonical C
4388    /// linked-list shape). C: `struct heredocs *hdocs` (zsh.h:3100).
4389    /// Mirrors `parse::HDOCS` thread_local across nested parses.
4390    pub hdocs: Option<Box<crate::ported::zsh_h::heredocs>>,
4391    /// !!! WARNING: NOT IN PARSE_STACK — Rust-only AST-glue !!!
4392    /// Snapshot of `lex::LEX_HEREDOCS` (the parallel Rust-only Vec
4393    /// carrying terminator / strip_tabs / quoted metadata).
4394    /// Saved/restored alongside the canonical `hdocs` so nested
4395    /// parses get a clean AST view. C's parse_stack has no analog
4396    /// because C tracks terminator metadata implicitly via tokstr.
4397    pub lex_heredocs: Vec<HereDoc>,
4398    /// C: `int incmdpos` (zsh.h:3102).
4399    pub incmdpos: bool,
4400    /// C: `int aliasspaceflag` (zsh.h:3103).
4401    pub aliasspaceflag: i32,
4402    /// C: `int incond` (zsh.h:3104).
4403    pub incond: i32,
4404    /// C: `int inredir` (zsh.h:3105).
4405    pub inredir: bool,
4406    /// C: `int incasepat` (zsh.h:3106).
4407    pub incasepat: i32,
4408    /// C: `int isnewlin` (zsh.h:3107).
4409    pub isnewlin: i32,
4410    /// C: `int infor` (zsh.h:3108).
4411    pub infor: i32,
4412    /// C: `int inrepeat_` (zsh.h:3109).
4413    pub inrepeat_: i32,
4414    /// C: `int intypeset` (zsh.h:3110).
4415    pub intypeset: bool,
4416    // ── Wordcode-buffer state — STUB until Phase 9b ──
4417    // C `Wordcode ecbuf` (zsh.h:3112) + `Eccstr ecstrs` (zsh.h:3113) +
4418    // `int eclen/ecused/ecnpats/ecsoffs/ecssub/ecnfunc` (zsh.h:3112-3114).
4419    // zshrs hasn't emitted wordcode yet — these fields exist to
4420    // preserve the C shape but read/write nothing until P9b lands.
4421    pub eclen: i32,
4422    pub ecused: i32,
4423    pub ecnpats: i32,
4424    pub ecbuf: Option<Vec<u32>>,
4425    pub ecstrs: Option<Vec<u8>>,
4426    pub ecsoffs: i32,
4427    pub ecssub: i32,
4428    pub ecnfunc: i32,
4429}
4430
4431// Old uppercase Rust-only `ParseStack` is gone. Compat alias so
4432// existing call sites (context.rs) keep resolving until the
4433// rename ripples through.
4434/// `ParseStack` type alias.
4435#[allow(non_camel_case_types)]
4436pub type ParseStack = parse_stack;
4437
4438/// `mod_export struct eprog dummy_eprog;` from `Src/parse.c:3066`.
4439/// Placeholder Eprog used by `shf->funcdef = &dummy_eprog;` in
4440/// builtin.c when clearing a stale autoload stub. Held in a Mutex
4441/// so `init_eprog` can set it once at shell startup.
4442pub static DUMMY_EPROG: std::sync::Mutex<eprog> = std::sync::Mutex::new(eprog {
4443    flags: 0,
4444    len: 0,
4445    npats: 0,
4446    nref: 0,
4447    prog: Vec::new(),
4448    strs: None,
4449    pats: Vec::new(),
4450    shf: None,
4451    dump: None,
4452});
4453
4454/// Walk every ZshRedir in the program and, for any with a `heredoc_idx`,
4455/// pull the body+terminator out of `bodies` and stuff into `heredoc`.
4456/// `bodies[i]` corresponds to the i-th heredoc registered by the lexer
4457/// during scanning (in source order).
4458fn fill_heredoc_bodies(prog: &mut ZshProgram, bodies: &[HereDocInfo]) {
4459    for list in &mut prog.lists {
4460        fill_in_sublist(&mut list.sublist, bodies);
4461    }
4462}
4463
4464fn fill_in_sublist(sub: &mut ZshSublist, bodies: &[HereDocInfo]) {
4465    fill_in_pipe(&mut sub.pipe, bodies);
4466    if let Some(next) = &mut sub.next {
4467        fill_in_sublist(&mut next.1, bodies);
4468    }
4469}
4470
4471fn fill_in_pipe(pipe: &mut ZshPipe, bodies: &[HereDocInfo]) {
4472    fill_in_command(&mut pipe.cmd, bodies);
4473    if let Some(next) = &mut pipe.next {
4474        fill_in_pipe(next, bodies);
4475    }
4476}
4477
4478fn fill_in_command(cmd: &mut ZshCommand, bodies: &[HereDocInfo]) {
4479    match cmd {
4480        ZshCommand::Simple(s) => {
4481            for r in &mut s.redirs {
4482                if let Some(idx) = r.heredoc_idx {
4483                    if let Some(info) = bodies.get(idx) {
4484                        r.heredoc = Some(info.clone());
4485                    }
4486                }
4487            }
4488        }
4489        ZshCommand::Subsh(p) | ZshCommand::Cursh(p) => fill_heredoc_bodies(p, bodies),
4490        ZshCommand::FuncDef(f) => fill_heredoc_bodies(&mut f.body, bodies),
4491        ZshCommand::If(i) => {
4492            fill_heredoc_bodies(&mut i.cond, bodies);
4493            fill_heredoc_bodies(&mut i.then, bodies);
4494            for (c, b) in &mut i.elif {
4495                fill_heredoc_bodies(c, bodies);
4496                fill_heredoc_bodies(b, bodies);
4497            }
4498            if let Some(e) = &mut i.else_ {
4499                fill_heredoc_bodies(e, bodies);
4500            }
4501        }
4502        ZshCommand::While(w) | ZshCommand::Until(w) => {
4503            fill_heredoc_bodies(&mut w.cond, bodies);
4504            fill_heredoc_bodies(&mut w.body, bodies);
4505        }
4506        ZshCommand::For(f) => fill_heredoc_bodies(&mut f.body, bodies),
4507        ZshCommand::Case(c) => {
4508            for arm in &mut c.arms {
4509                fill_heredoc_bodies(&mut arm.body, bodies);
4510            }
4511        }
4512        ZshCommand::Repeat(r) => fill_heredoc_bodies(&mut r.body, bodies),
4513        ZshCommand::Time(Some(sublist)) => fill_in_sublist(sublist, bodies),
4514        ZshCommand::Try(t) => {
4515            fill_heredoc_bodies(&mut t.try_block, bodies);
4516            fill_heredoc_bodies(&mut t.always, bodies);
4517        }
4518        ZshCommand::Redirected(inner, redirs) => {
4519            for r in redirs {
4520                if let Some(idx) = r.heredoc_idx {
4521                    if let Some(info) = bodies.get(idx) {
4522                        r.heredoc = Some(info.clone());
4523                    }
4524                }
4525            }
4526            fill_in_command(inner, bodies);
4527        }
4528        ZshCommand::Time(None) | ZshCommand::Cond(_) | ZshCommand::Arith(_) => {}
4529    }
4530}
4531
4532/// If `list` is a Simple containing one word that ends in the
4533/// `<Inpar><Outpar>` token pair (the lexer-port encoding of `()`),
4534/// return the bare name. Used by `parse_program_until` to detect
4535/// `name() {body}` style function definitions where the lexer
4536/// hasn't split the `()` from the name.
4537/// Detect the `name() …` shape inside a Simple. Returns the function
4538/// name and (when the body was already inlined into the same Simple,
4539/// e.g. `foo() echo hi`) the rest of the words as the body's argv.
4540/// Returns None for non-funcdef shapes.
4541fn simple_name_with_inoutpar(list: &ZshList) -> Option<(Vec<String>, Vec<String>)> {
4542    if list.flags.async_ || list.sublist.next.is_some() {
4543        return None;
4544    }
4545    let pipe = &list.sublist.pipe;
4546    if pipe.next.is_some() {
4547        return None;
4548    }
4549    let simple = match &pipe.cmd {
4550        ZshCommand::Simple(s) => s,
4551        _ => return None,
4552    };
4553    if simple.words.is_empty() || !simple.assigns.is_empty() {
4554        return None;
4555    }
4556    let suffix = "\u{88}\u{8a}"; // Inpar + Outpar
4557                                 // Find the FIRST word ending in `()`. zsh accepts the
4558                                 // multi-name shorthand `fna fnb fnc() { body }` (parse.c:
4559                                 // par_funcdef wordlist) — words[0..i-1] are extra names,
4560                                 // words[i] is `lastname()`. Words after are the body argv
4561                                 // (one-line shorthand, `name() cmd args`).
4562    let par_idx = simple.words.iter().position(|w| w.ends_with(suffix))?;
4563    let mut names: Vec<String> = Vec::with_capacity(par_idx + 1);
4564    for w in &simple.words[..par_idx] {
4565        // Earlier names must be bare identifiers, NOT contain
4566        // tokens that imply they're not function names (no `()`,
4567        // no quotes, no expansions). zsh's lexer enforces this
4568        // at the wordlist level; we approximate by requiring the
4569        // word be an identifier-shaped token after untokenize.
4570        let bare = super::lex::untokenize(w);
4571        let valid = !bare.is_empty()
4572            && bare
4573                .chars()
4574                .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-' || c == '.' || c == '$');
4575        if !valid {
4576            return None;
4577        }
4578        names.push(bare);
4579    }
4580    let last = &simple.words[par_idx];
4581    let bare = &last[..last.len() - suffix.len()];
4582    if bare.is_empty() {
4583        return None;
4584    }
4585    names.push(super::lex::untokenize(bare));
4586    let rest = simple.words[par_idx + 1..].to_vec();
4587    Some((names, rest))
4588}
4589
4590/// Initialize parser state for a fresh parse of `input`.
4591/// Free-fn entry point — resets parser thread_locals and loads input.
4592pub fn parse_init(input: &str) {
4593    // Seed the option defaults the parser/lexer inspect. Real zsh
4594    // installs these via `install_emulation_defaults` (options.c:172)
4595    // at shell startup; zshrs's parse-only test entry path bypasses
4596    // init_main, so we mirror the `zsh` emulation defaults here.
4597    // Only seeds when unset so a script that explicitly disabled an
4598    // option stays so.
4599    for (name, default) in [
4600        ("shortloops", true),
4601        ("shortrepeat", false),
4602        ("multifuncdef", true),
4603        ("aliasfuncdef", false),
4604        ("ignorebraces", false),
4605        ("cshjunkieloops", false),
4606        ("posixbuiltins", false),
4607        ("execopt", true),
4608        ("kshautoload", false),
4609        ("aliases", true),
4610    ] {
4611        if crate::ported::options::opt_state_get(name).is_none() {
4612            crate::ported::options::opt_state_set(name, default);
4613        }
4614    }
4615    lex_init(input);
4616}
4617
4618/// P9b decoder (wordcode-pipeline variant): direct port of
4619/// `ecgetstr(Estate s, int dup, int *tokflag)` from
4620/// `Src/parse.c:2855-2890`. Reads a wordcode at `pc`, decodes the
4621/// encoded string back to owned String. Returns (string,
4622/// pc_after_consumed). Distinct from the existing `ecgetstr` (which
4623/// takes a separate strs buffer for text.rs) — this variant uses
4624/// the live ECSTRS_REVERSE HashMap populated at ecstrcode time.
4625pub fn ecgetstr_wordcode(buf: &[u32], pc: usize) -> (String, usize) {
4626    if pc >= buf.len() {
4627        return (String::new(), pc);
4628    }
4629    let c = buf[pc];
4630    let next = pc + 1;
4631    // parse.c:2862-2863 — empty-string sentinels.
4632    if c == 6 || c == 7 {
4633        return (String::new(), next);
4634    }
4635    // parse.c:2864-2871 — inline-packed short string.
4636    if (c & 2) != 0 {
4637        let b0 = ((c >> 3) & 0xff) as u8;
4638        let b1 = ((c >> 11) & 0xff) as u8;
4639        let b2 = ((c >> 19) & 0xff) as u8;
4640        let mut bytes: Vec<u8> = Vec::new();
4641        for b in [b0, b1, b2] {
4642            if b == 0 {
4643                break;
4644            }
4645            bytes.push(b);
4646        }
4647        return (String::from_utf8_lossy(&bytes).into_owned(), next);
4648    }
4649    // parse.c:2872-2873 — long string via offs lookup. Map value is
4650    // metafied Vec<u8>; convert back to display String. Unmetafy is
4651    // the caller's job (the wordcode-parity dumper does it; other
4652    // callers may want raw bytes).
4653    let s = ECSTRS_REVERSE
4654        .with_borrow(|m| m.get(&c).cloned())
4655        .map(|v| String::from_utf8_lossy(&v).into_owned())
4656        .unwrap_or_default();
4657    (s, next)
4658}
4659
4660/// Parse the complete input. Direct port of `parse_event` /
4661/// `par_list` from `Src/parse.c:614-720`. On syntax error,
4662/// sets `errflag |= ERRFLAG_ERROR` (via `zerr`) and returns the
4663/// partial program — callers check `errflag` to detect failure,
4664/// matching C's `Eprog parse_event(...)` + `if (errflag) {...}`.
4665pub fn parse() -> ZshProgram {
4666    zshlex();
4667
4668    let mut program = parse_program_until(None);
4669
4670    // Post-pass: wire heredoc bodies (collected by the inline NEWLIN
4671    // walk in zshlex into LEX_HEREDOCS) back into ZshRedir.heredoc
4672    // fields via heredoc_idx. No C analog — LEX_HEREDOCS is the
4673    // Rust-only AST-glue Vec.
4674    let bodies: Vec<HereDocInfo> = LEX_HEREDOCS
4675        .with_borrow(|v| v.clone())
4676        .into_iter()
4677        .map(|h| HereDocInfo {
4678            content: h.content,
4679            terminator: h.terminator,
4680            quoted: h.quoted,
4681        })
4682        .collect();
4683    if !bodies.is_empty() {
4684        fill_heredoc_bodies(&mut program, &bodies);
4685    }
4686
4687    program
4688}
4689
4690/// Wordcode-emission top-level driver. Closest C analog is
4691/// `parse_list(void)` at `Src/parse.c:697-712`: init_parse +
4692/// zshlex + par_list(&c) + bld_eprog. This entry omits init_parse
4693/// and bld_eprog (caller responsibilities) and inlines a guard
4694/// loop around par_list_wordcode for cases where the lexer leaves
4695/// a non-ENDINPUT terminator (LEXERR, missing close-token, etc.).
4696pub fn par_event_wordcode() -> usize {
4697    let start = ECUSED.get() as usize;
4698    // C `parse_list` (parse.c:697-712) calls par_list ONCE — par_list's
4699    // own goto-rec loop handles all SEPER-separated sublists. The
4700    // outer loop here exists for safety against early-return cases
4701    // (LEXERR, missing terminator) but normally par_list_wordcode
4702    // consumes everything in one call.
4703    let mut cmplx: i32 = 0;
4704    while tok() != ENDINPUT && tok() != LEXERR {
4705        par_list_wordcode(&mut cmplx);
4706        match tok() {
4707            SEMI | NEWLIN | AMPER | AMPERBANG | SEPER => {
4708                zshlex();
4709            }
4710            _ => break,
4711        }
4712    }
4713    // parse.c:712 — `ecadd(WCB_END());`
4714    ecadd(WCB_END());
4715    start
4716}
4717
4718/// Port of `par_list(int *cmplx)` from `Src/parse.c:769-803`.
4719/// `list : { SEPER } [ sublist [ { SEPER | AMPER | AMPERBANG } list ] ]`.
4720/// True line-by-line port: takes `cmplx: &mut i32` matching C's
4721/// `int *cmplx` out-parameter, uses stack-local `c` per iteration
4722/// like C (so inner sublist cmplx is independent of outer).
4723pub fn par_list_wordcode(cmplx: &mut i32) {
4724    // c:773 — `int p, lp = -1, c;`
4725    let mut p: usize;
4726    let mut lp: i32 = -1;
4727    let mut c: i32;
4728    loop {
4729        // c:775 `rec:` — c:777-778 `while (tok == SEPER) zshlex();`
4730        while tok() == SEPER {
4731            zshlex();
4732        }
4733        // c:780 — `p = ecadd(0);`
4734        p = ecadd(0);
4735        // c:781 — `c = 0;`
4736        c = 0;
4737        // c:783 — `if (par_sublist(&c)) { ... }`
4738        if par_sublist_wordcode(&mut c) {
4739            // c:784 — `*cmplx |= c;`
4740            *cmplx |= c;
4741            // c:785 — `if (tok == SEPER || tok == AMPER || tok == AMPERBANG)`
4742            let t = tok();
4743            if t == SEPER || t == AMPER || t == AMPERBANG {
4744                // c:786-787 — `if (tok != SEPER) *cmplx = 1;`
4745                if t != SEPER {
4746                    *cmplx = 1;
4747                }
4748                // c:788-790 — `set_list_code(p, ..., c);`
4749                let z = if t == SEPER {
4750                    Z_SYNC
4751                } else if t == AMPER {
4752                    Z_ASYNC
4753                } else {
4754                    Z_ASYNC | Z_DISOWN
4755                };
4756                set_list_code(p, z, c != 0);
4757                // c:791 — `incmdpos = 1;`
4758                set_incmdpos(true);
4759                // c:792-794 — `do { zshlex(); } while (tok == SEPER);`
4760                loop {
4761                    zshlex();
4762                    if tok() != SEPER {
4763                        break;
4764                    }
4765                }
4766                // c:795 — `lp = p;` c:796 — `goto rec;`
4767                lp = p as i32;
4768                continue;
4769            } else {
4770                // c:798 — `set_list_code(p, (Z_SYNC | Z_END), c);`
4771                set_list_code(p, Z_SYNC | Z_END, c != 0);
4772            }
4773        } else {
4774            // c:800-802 — `ecused--; if (lp >= 0) ecbuf[lp] |= wc_bdata(Z_END);`
4775            ECUSED.set((ECUSED.get() - 1).max(0));
4776            if lp >= 0 {
4777                ECBUF.with_borrow_mut(|b| {
4778                    if (lp as usize) < b.len() {
4779                        b[lp as usize] |= wc_bdata(Z_END as wordcode);
4780                    }
4781                });
4782            }
4783        }
4784        break;
4785    }
4786}
4787
4788/// Port of `par_list1(int *cmplx)` from `Src/parse.c:806-817`.
4789/// Single-sublist variant used by funcdef bodies and the short
4790/// `for`/`while`/`repeat` forms — exactly one sublist with
4791/// `Z_SYNC|Z_END`, no chain.
4792pub fn par_list1_wordcode(cmplx: &mut i32) {
4793    // c:810 — `int p = ecadd(0), c = 0;`
4794    let p = ecadd(0);
4795    let mut c: i32 = 0;
4796    // c:812 — `if (par_sublist(&c)) { ... }`
4797    if par_sublist_wordcode(&mut c) {
4798        // c:813 — `set_list_code(p, (Z_SYNC | Z_END), c);`
4799        set_list_code(p, Z_SYNC | Z_END, c != 0);
4800        // c:814 — `*cmplx |= c;`
4801        *cmplx |= c;
4802    } else {
4803        // c:816 — `ecused--;`
4804        ECUSED.set((ECUSED.get() - 1).max(0));
4805    }
4806}
4807
4808/// Port of `par_save_list(C)` macro from `Src/parse.c:475-480`.
4809///   do { int eu = ecused; par_list(C); if (eu == ecused) ecadd(WCB_END()); } while (0)
4810pub fn par_save_list_wordcode(cmplx: &mut i32) {
4811    let eu = ECUSED.get();
4812    par_list_wordcode(cmplx);
4813    if ECUSED.get() == eu {
4814        ecadd(WCB_END());
4815    }
4816}
4817
4818/// Port of `par_save_list1(C)` macro from `Src/parse.c:481-486`.
4819pub fn par_save_list1_wordcode(cmplx: &mut i32) {
4820    let eu = ECUSED.get();
4821    par_list1_wordcode(cmplx);
4822    if ECUSED.get() == eu {
4823        ecadd(WCB_END());
4824    }
4825}
4826
4827/// Port of `par_sublist(int *cmplx)` from `Src/parse.c:823-865`.
4828/// `sublist : sublist2 [ ( DBAR | DAMPER ) { SEPER } sublist ]`.
4829/// Emits a WCB_SUBLIST header, recurses into par_sublist2 for
4830/// the !/coproc prefix + pipeline, then chains via DBAR (`||`)
4831/// or DAMPER (`&&`) recursively. Returns true if at least one
4832/// pipeline was emitted.
4833pub fn par_sublist_wordcode(cmplx: &mut i32) -> bool {
4834    // c:827 — `int f, p, c = 0;`
4835    let mut c: i32 = 0;
4836    // c:829 — `p = ecadd(0);`
4837    let p = ecadd(0);
4838    // c:831 — `if ((f = par_sublist2(&c)) != -1) { ... }`
4839    match par_sublist2(&mut c) {
4840        Some(f) => {
4841            // c:832 — `int e = ecused;`
4842            let e = ECUSED.get() as usize;
4843            // c:834 — `*cmplx |= c;`
4844            *cmplx |= c;
4845            if tok() == DBAR || tok() == DAMPER {
4846                // c:836 — `enum lextok qtok = tok;`
4847                let qtok = tok();
4848                // c:839 — `cmdpush(tok == DBAR ? CS_CMDOR : CS_CMDAND);`
4849                cmdpush(if qtok == DBAR {
4850                    CS_CMDOR as u8
4851                } else {
4852                    CS_CMDAND as u8
4853                });
4854                // c:840 — `zshlex();`
4855                zshlex();
4856                // c:841-842 — `while (tok == SEPER) zshlex();`
4857                while tok() == SEPER {
4858                    zshlex();
4859                }
4860                // c:843 — `sl = par_sublist(cmplx);`
4861                let sl = par_sublist_wordcode(cmplx);
4862                // c:844-847 — `set_sublist_code(p, (sl ? ... : WC_SUBLIST_END),
4863                // f, (e - 1 - p), c);`
4864                let st = if sl {
4865                    if qtok == DBAR {
4866                        WC_SUBLIST_OR
4867                    } else {
4868                        WC_SUBLIST_AND
4869                    }
4870                } else {
4871                    WC_SUBLIST_END
4872                };
4873                set_sublist_code(p, st as i32, f, (e - 1 - p) as i32, c != 0);
4874                // c:848 — `cmdpop();`
4875                cmdpop();
4876            } else {
4877                // c:850-853 — `if (tok == AMPER || tok == AMPERBANG)
4878                // { c = 1; *cmplx |= c; }`
4879                if tok() == AMPER || tok() == AMPERBANG {
4880                    c = 1;
4881                    *cmplx |= c;
4882                }
4883                // c:854 — `set_sublist_code(p, WC_SUBLIST_END, f,
4884                // (e - 1 - p), c);`
4885                set_sublist_code(p, WC_SUBLIST_END as i32, f, (e - 1 - p) as i32, c != 0);
4886            }
4887            // c:856 — `return 1;`
4888            true
4889        }
4890        None => {
4891            // c:858-859 — `ecused--; return 0;`
4892            ECUSED.set((ECUSED.get() - 1).max(0));
4893            false
4894        }
4895    }
4896}
4897
4898/// Port of `par_pline(int *cmplx)` from `Src/parse.c:894-955`.
4899/// `pline : cmd [ ( BAR | BARAMP ) { SEPER } pline ]`. Emits a
4900/// WCB_PIPE header (mid for chain links, end for the last cmd)
4901/// plus the optional BARAMP `2>&1` synthetic redir.
4902/// Port of `par_pline(int *cmplx)` from `Src/parse.c:893-947`.
4903/// (Named `par_pipe_wordcode` to disambiguate from the AST
4904/// `par_pline` at parse.rs:3744 — semantically the same `pline`
4905/// production.)
4906pub fn par_pipe_wordcode(cmplx: &mut i32) -> bool {
4907    // c:897 — `zlong line = toklineno;`
4908    let line = toklineno() as i64;
4909    // c:899 — `p = ecadd(0);`
4910    let p = ecadd(0);
4911    // c:901-904 — `if (!par_cmd(cmplx, 0)) { ecused--; return 0; }`
4912    if !par_cmd_wordcode(cmplx, 0) {
4913        ECUSED.set((ECUSED.get() - 1).max(0));
4914        return false;
4915    }
4916    if tok() == BAR_TOK {
4917        // c:906 — `*cmplx = 1;`
4918        *cmplx = 1;
4919        // c:907 — `cmdpush(CS_PIPE);`
4920        cmdpush(CS_PIPE as u8);
4921        // c:908 — `zshlex();`
4922        zshlex();
4923        // c:909-910 — `while (tok == SEPER) zshlex();`
4924        while tok() == SEPER {
4925            zshlex();
4926        }
4927        // c:911 — `ecbuf[p] = WCB_PIPE(WC_PIPE_MID, line>=0 ? line+1 : 0);`
4928        ECBUF.with_borrow_mut(|b| {
4929            if p < b.len() {
4930                b[p] = WCB_PIPE(
4931                    WC_PIPE_MID,
4932                    if line >= 0 { (line + 1) as wordcode } else { 0 },
4933                );
4934            }
4935        });
4936        // c:912 — `ecispace(p+1, 1);`
4937        ecispace(p + 1, 1);
4938        // c:913 — `ecbuf[p+1] = ecused - 1 - p;`
4939        let used = ECUSED.get() as usize;
4940        ECBUF.with_borrow_mut(|b| {
4941            if p + 1 < b.len() {
4942                b[p + 1] = (used.saturating_sub(1 + p)) as wordcode;
4943            }
4944        });
4945        // c:914-916 — `if (!par_pline(cmplx)) { tok = LEXERR; }`
4946        if !par_pipe_wordcode(cmplx) {
4947            set_tok(LEXERR);
4948        }
4949        // c:917 — `cmdpop();`
4950        cmdpop();
4951        true
4952    } else if tok() == BARAMP {
4953        // c:920-923 — walk past inline WC_REDIR to find r.
4954        let mut r = p + 1;
4955        loop {
4956            let code = ECBUF.with_borrow(|b| b.get(r).copied().unwrap_or(0));
4957            if wc_code(code) != WC_REDIR {
4958                break;
4959            }
4960            r += WC_REDIR_WORDS(code) as usize;
4961        }
4962        // c:925-928 — `ecispace(r, 3);` + synthetic `2>&1` redir
4963        ecispace(r, 3);
4964        ECBUF.with_borrow_mut(|b| {
4965            if r + 2 < b.len() {
4966                b[r] = WCB_REDIR(REDIR_MERGEOUT as wordcode);
4967                b[r + 1] = 2;
4968                b[r + 2] = ecstrcode("1");
4969            }
4970        });
4971        // c:930 — `*cmplx = 1;`
4972        *cmplx = 1;
4973        cmdpush(CS_ERRPIPE as u8);
4974        zshlex();
4975        while tok() == SEPER {
4976            zshlex();
4977        }
4978        ECBUF.with_borrow_mut(|b| {
4979            if p < b.len() {
4980                b[p] = WCB_PIPE(
4981                    WC_PIPE_MID,
4982                    if line >= 0 { (line + 1) as wordcode } else { 0 },
4983                );
4984            }
4985        });
4986        ecispace(p + 1, 1);
4987        let used = ECUSED.get() as usize;
4988        ECBUF.with_borrow_mut(|b| {
4989            if p + 1 < b.len() {
4990                b[p + 1] = (used.saturating_sub(1 + p)) as wordcode;
4991            }
4992        });
4993        if !par_pipe_wordcode(cmplx) {
4994            set_tok(LEXERR);
4995        }
4996        cmdpop();
4997        true
4998    } else {
4999        // c:944 — `ecbuf[p] = WCB_PIPE(WC_PIPE_END, line>=0 ? line+1 : 0);`
5000        ECBUF.with_borrow_mut(|b| {
5001            if p < b.len() {
5002                b[p] = WCB_PIPE(
5003                    WC_PIPE_END,
5004                    if line >= 0 { (line + 1) as wordcode } else { 0 },
5005                );
5006            }
5007        });
5008        true
5009    }
5010}
5011
5012/// Port of `par_cmd(int *cmplx, int zsh_construct)` from
5013/// `Src/parse.c:958-1085`. Parses leading + trailing redirs and
5014/// dispatches on the current token to the right par_* builder.
5015/// Returns false only when no command was emitted (no redirs +
5016/// par_simple returned 0).
5017/// Port of `par_cmd(int *cmplx, int zsh_construct)` from
5018/// `Src/parse.c:957-1077`.
5019pub fn par_cmd_wordcode(cmplx: &mut i32, zsh_construct: i32) -> bool {
5020    // c:960 — `int r, nr = 0;`
5021    let mut nr: i32 = 0;
5022    // c:962 — `r = ecused;`
5023    let mut r: usize = ECUSED.get() as usize;
5024    // c:964-968 — leading redirs.
5025    if IS_REDIROP(tok()) {
5026        // c:965 — `*cmplx = 1;`
5027        *cmplx = 1;
5028        // c:966-968 — `while (IS_REDIROP(tok)) { nr += par_redir(&r, NULL); }`
5029        while IS_REDIROP(tok()) {
5030            nr += par_redir_wordcode(&mut r, None);
5031        }
5032    }
5033    // c:970-1066 — token-dispatch switch.
5034    match tok() {
5035        FOR => {
5036            cmdpush(CS_FOR as u8);
5037            par_for_wordcode(cmplx);
5038            cmdpop();
5039        }
5040        FOREACH => {
5041            cmdpush(CS_FOREACH as u8);
5042            par_for_wordcode(cmplx);
5043            cmdpop();
5044        }
5045        SELECT => {
5046            // c:982 — `*cmplx = 1;`
5047            *cmplx = 1;
5048            cmdpush(CS_SELECT as u8);
5049            par_for_wordcode(cmplx);
5050            cmdpop();
5051        }
5052        CASE => {
5053            cmdpush(CS_CASE as u8);
5054            par_case_wordcode(cmplx);
5055            cmdpop();
5056        }
5057        IF => {
5058            par_if_wordcode(cmplx);
5059        }
5060        WHILE => {
5061            cmdpush(CS_WHILE as u8);
5062            par_while_wordcode(cmplx);
5063            cmdpop();
5064        }
5065        UNTIL => {
5066            cmdpush(CS_UNTIL as u8);
5067            par_while_wordcode(cmplx);
5068            cmdpop();
5069        }
5070        REPEAT => {
5071            cmdpush(CS_REPEAT as u8);
5072            par_repeat_wordcode(cmplx);
5073            cmdpop();
5074        }
5075        INPAR_TOK => {
5076            // c:1011 — `*cmplx = 1;`
5077            *cmplx = 1;
5078            cmdpush(CS_SUBSH as u8);
5079            par_subsh_wordcode(cmplx, zsh_construct);
5080            cmdpop();
5081        }
5082        INBRACE_TOK => {
5083            cmdpush(CS_CURSH as u8);
5084            par_subsh_wordcode(cmplx, zsh_construct);
5085            cmdpop();
5086        }
5087        FUNC => {
5088            cmdpush(CS_FUNCDEF as u8);
5089            par_funcdef_wordcode(cmplx);
5090            cmdpop();
5091        }
5092        DINBRACK => {
5093            cmdpush(CS_COND as u8);
5094            par_cond_wordcode();
5095            cmdpop();
5096        }
5097        DINPAR => {
5098            par_arith_wordcode();
5099        }
5100        TIME => {
5101            // c:1037-1050 — `static int inpartime` guard so
5102            // `time time foo` doesn't recurse infinitely.
5103            if !PARSER_INPARTIME.with(|c| c.get()) {
5104                // c:1041 — `*cmplx = 1;`
5105                *cmplx = 1;
5106                PARSER_INPARTIME.with(|c| c.set(true));
5107                par_time_wordcode();
5108                PARSER_INPARTIME.with(|c| c.set(false));
5109            } else {
5110                set_tok(STRING_LEX);
5111                let sr = par_simple_wordcode(cmplx, nr);
5112                if sr == 0 && nr == 0 {
5113                    return false;
5114                }
5115                if sr > 1 {
5116                    *cmplx = 1;
5117                    r += (sr - 1) as usize;
5118                }
5119            }
5120        }
5121        _ => {
5122            // c:1054 — `if (!(sr = par_simple(cmplx, nr)))`
5123            let sr = par_simple_wordcode(cmplx, nr);
5124            if sr == 0 {
5125                if nr == 0 {
5126                    return false;
5127                }
5128            } else if sr > 1 {
5129                // c:1060-1061 — `*cmplx = 1; r += sr - 1;`
5130                *cmplx = 1;
5131                r += (sr - 1) as usize;
5132            }
5133        }
5134    }
5135    // c:1067-1071 — trailing redirs.
5136    // c:1067 — `if (IS_REDIROP(tok)) { *cmplx = 1; while (...) (void)par_redir(&r, NULL); }`
5137    if IS_REDIROP(tok()) {
5138        *cmplx = 1;
5139        while IS_REDIROP(tok()) {
5140            let _ = par_redir_wordcode(&mut r, None);
5141        }
5142    }
5143    // c:1072-1075 — `incmdpos=1; incasepat=0; incond=0; intypeset=0;`
5144    set_incmdpos(true);
5145    set_incasepat(0);
5146    set_incond(0);
5147    set_intypeset(false);
5148    let _ = r;
5149    // c:1076 — `return 1;`
5150    true
5151}
5152
5153/// Port of `par_for(int *cmplx)` from `Src/parse.c:1086-1198`.
5154pub fn par_for_wordcode(cmplx: &mut i32) {
5155    // c:1089 — `int oecused = ecused, csh = (tok == FOREACH), p, sel = (tok == SELECT);`
5156    let _oecused = ECUSED.get() as usize;
5157    let csh = tok() == FOREACH;
5158    let sel = tok() == SELECT;
5159    let p: usize;
5160    // c:1090 — `int type;`
5161    let r#type: wordcode;
5162
5163    // c:1092 — `p = ecadd(0);`
5164    p = ecadd(0);
5165
5166    // c:1094 — `incmdpos = 0;`
5167    set_incmdpos(false);
5168    // c:1095 — `infor = tok == FOR ? 2 : 0;`
5169    set_infor(if tok() == FOR { 2 } else { 0 });
5170    // c:1096 — `zshlex();`
5171    zshlex();
5172    // c:1097 — `if (tok == DINPAR) {`
5173    if tok() == DINPAR {
5174        // c:1098 — `zshlex();`
5175        zshlex();
5176        // c:1099-1100 — `if (tok != DINPAR) YYERRORV(oecused);`
5177        if tok() != DINPAR {
5178            zerr("par_for: expected init");
5179            return;
5180        }
5181        // c:1101 — `ecstr(tokstr);`
5182        ecstr(&tokstr().unwrap_or_default());
5183        // c:1102 — `zshlex();`
5184        zshlex();
5185        // c:1103-1104
5186        if tok() != DINPAR {
5187            zerr("par_for: expected cond");
5188            return;
5189        }
5190        // c:1105
5191        ecstr(&tokstr().unwrap_or_default());
5192        // c:1106
5193        zshlex();
5194        // c:1107-1108
5195        if tok() != DOUTPAR {
5196            zerr("par_for: expected ))");
5197            return;
5198        }
5199        // c:1109
5200        ecstr(&tokstr().unwrap_or_default());
5201        // c:1110 — `infor = 0;`
5202        set_infor(0);
5203        // c:1111 — `incmdpos = 1;`
5204        set_incmdpos(true);
5205        // c:1112 — `zshlex();`
5206        zshlex();
5207        // c:1113 — `type = WC_FOR_COND;`
5208        r#type = WC_FOR_COND;
5209    } else {
5210        // c:1115 — `int np = 0, n, posix_in, ona = noaliases, onc = nocorrect;`
5211        let mut np: usize = 0;
5212        let mut n: u32;
5213        let posix_in: bool;
5214        let ona = noaliases();
5215        let onc = nocorrect();
5216        // c:1116 — `infor = 0;`
5217        set_infor(0);
5218        // c:1117-1118 — `if (tok != STRING || !isident(tokstr)) YYERRORV(oecused);`
5219        if tok() != STRING_LEX || !crate::ported::params::isident(&tokstr().unwrap_or_default()) {
5220            zerr("par_for: expected identifier");
5221            return;
5222        }
5223        // c:1119-1120 — `if (!sel) np = ecadd(0);`
5224        if !sel {
5225            np = ecadd(0);
5226        }
5227        // c:1121 — `n = 0;`
5228        n = 0;
5229        // c:1122 — `incmdpos = 1;`
5230        set_incmdpos(true);
5231        // c:1123 — `noaliases = nocorrect = 1;`
5232        set_noaliases(true);
5233        set_nocorrect(1);
5234        // c:1124 — `for (;;) {`
5235        loop {
5236            // c:1125 — `n++;`
5237            n += 1;
5238            // c:1126 — `ecstr(tokstr);`
5239            ecstr(&tokstr().unwrap_or_default());
5240            // c:1127 — `zshlex();`
5241            zshlex();
5242            // c:1128-1129 — `if (tok != STRING || !strcmp(tokstr, "in") || sel) break;`
5243            if tok() != STRING_LEX || tokstr().as_deref() == Some("in") || sel {
5244                break;
5245            }
5246            // c:1130-1135 — `if (!isident(tokstr) || errflag) { ... YYERRORV; }`
5247            if !crate::ported::params::isident(&tokstr().unwrap_or_default())
5248                || (errflag.load(Ordering::Relaxed) & 1) != 0
5249            {
5250                set_noaliases(ona);
5251                set_nocorrect(onc);
5252                zerr("par_for: expected identifier in name list");
5253                return;
5254            }
5255        }
5256        // c:1137-1138 — `noaliases = ona; nocorrect = onc;`
5257        set_noaliases(ona);
5258        set_nocorrect(onc);
5259        // c:1139-1140 — `if (!sel) ecbuf[np] = n;`
5260        if !sel {
5261            ECBUF.with_borrow_mut(|b| {
5262                b[np] = n;
5263            });
5264        }
5265        // c:1141 — `posix_in = isnewlin;`
5266        posix_in = isnewlin() != 0;
5267        // c:1142-1143 — `while (isnewlin) zshlex();`
5268        while isnewlin() != 0 {
5269            zshlex();
5270        }
5271        // c:1144 — `if (tok == STRING && !strcmp(tokstr, "in")) {`
5272        if tok() == STRING_LEX && tokstr().as_deref() == Some("in") {
5273            // c:1145 — `incmdpos = 0;`
5274            set_incmdpos(false);
5275            // c:1146 — `zshlex();`
5276            zshlex();
5277            // c:1147 — `np = ecadd(0);`
5278            np = ecadd(0);
5279            // c:1148 — `n = par_wordlist();`
5280            let n2 = par_wordlist_wordcode();
5281            // c:1149-1150 — `if (tok != SEPER) YYERRORV(oecused);`
5282            if tok() != SEPER {
5283                zerr("par_for: expected separator after `in`");
5284                return;
5285            }
5286            // c:1151 — `ecbuf[np] = n;`
5287            ECBUF.with_borrow_mut(|b| {
5288                b[np] = n2 as wordcode;
5289            });
5290            // c:1152 — `type = (sel ? WC_SELECT_LIST : WC_FOR_LIST);`
5291            r#type = if sel { WC_SELECT_LIST } else { WC_FOR_LIST };
5292        } else if !posix_in && tok() == INPAR_TOK {
5293            // c:1153-1154 — `else if (!posix_in && tok == INPAR)`
5294            // c:1154 — `incmdpos = 0;`
5295            set_incmdpos(false);
5296            // c:1155 — `zshlex();`
5297            zshlex();
5298            // c:1156 — `np = ecadd(0);`
5299            np = ecadd(0);
5300            // c:1157 — `n = par_nl_wordlist();`
5301            let n2 = par_nl_wordlist_wordcode();
5302            // c:1158-1159 — `if (tok != OUTPAR) YYERRORV(oecused);`
5303            if tok() != OUTPAR_TOK {
5304                zerr("par_for: expected `)`");
5305                return;
5306            }
5307            // c:1160 — `ecbuf[np] = n;`
5308            ECBUF.with_borrow_mut(|b| {
5309                b[np] = n2 as wordcode;
5310            });
5311            // c:1161 — `incmdpos = 1;`
5312            set_incmdpos(true);
5313            // c:1162 — `zshlex();`
5314            zshlex();
5315            // c:1163 — `type = (sel ? WC_SELECT_LIST : WC_FOR_LIST);`
5316            r#type = if sel { WC_SELECT_LIST } else { WC_FOR_LIST };
5317        } else {
5318            // c:1165 — `type = (sel ? WC_SELECT_PPARAM : WC_FOR_PPARAM);`
5319            r#type = if sel { WC_SELECT_PPARAM } else { WC_FOR_PPARAM };
5320        }
5321        let _ = np;
5322    }
5323    // c:1167 — `incmdpos = 1;`
5324    set_incmdpos(true);
5325    // c:1168-1169 — `while (tok == SEPER) zshlex();`
5326    while tok() == SEPER {
5327        zshlex();
5328    }
5329    // c:1170-1193 — body dispatch (inline in C, factored here for
5330    // reuse by par_while/par_repeat — same control flow, same calls).
5331    par_loop_body_wordcode(cmplx, csh);
5332    // c:1195-1197 — `ecbuf[p] = (sel ? WCB_SELECT(...) : WCB_FOR(...));`
5333    let used = ECUSED.get() as usize;
5334    let off = used.saturating_sub(1 + p) as wordcode;
5335    ECBUF.with_borrow_mut(|b| {
5336        b[p] = if sel {
5337            WCB_SELECT(r#type, off)
5338        } else {
5339            WCB_FOR(r#type, off)
5340        };
5341    });
5342}
5343
5344/// Port of `par_wordlist(void)` from `Src/parse.c:2361-2371` —
5345/// emits wordcode form. Returns the number of strings emitted.
5346fn par_wordlist_wordcode() -> u32 {
5347    // c:2364 — `int num = 0;`
5348    let mut num: u32 = 0;
5349    // c:2365 — `while (tok == STRING) {`
5350    while tok() == STRING_LEX {
5351        // c:2366 — `ecstr(tokstr);`
5352        ecstr(&tokstr().unwrap_or_default());
5353        // c:2367 — `num++;`
5354        num += 1;
5355        // c:2368 — `zshlex();`
5356        zshlex();
5357    }
5358    // c:2370 — `return num;`
5359    num
5360}
5361
5362/// Port of `par_nl_wordlist(void)` from `Src/parse.c:2378-2390` —
5363/// emits wordcode form. Like par_wordlist but tolerates SEPER
5364/// between words.
5365fn par_nl_wordlist_wordcode() -> u32 {
5366    // c:2381 — `int num = 0;`
5367    let mut num: u32 = 0;
5368    // c:2383 — `while (tok == STRING || tok == SEPER) {`
5369    while tok() == STRING_LEX || tok() == SEPER || tok() == NEWLIN {
5370        // c:2384-2387 — `if (tok != SEPER) { ecstr(tokstr); num++; }`
5371        if tok() == STRING_LEX {
5372            ecstr(&tokstr().unwrap_or_default());
5373            num += 1;
5374        }
5375        // c:2388 — `zshlex();`
5376        zshlex();
5377    }
5378    // c:2390 — `return num;`
5379    num
5380}
5381
5382/// Body dispatch shared by par_for / par_while / par_repeat.
5383/// Direct port of `Src/parse.c:1170-1194`.
5384fn par_loop_body_wordcode(cmplx: &mut i32, csh: bool) {
5385    if tok() == DOLOOP {
5386        zshlex();
5387        // c:1172 — `par_save_list(cmplx);`
5388        par_save_list_wordcode(cmplx);
5389        if tok() != DONE {
5390            zerr("missing `done`");
5391            return;
5392        }
5393        set_incmdpos(false);
5394        zshlex();
5395    } else if tok() == INBRACE_TOK {
5396        zshlex();
5397        // c:1179 — `par_save_list(cmplx);`
5398        par_save_list_wordcode(cmplx);
5399        if tok() != OUTBRACE_TOK {
5400            zerr("missing `}`");
5401            return;
5402        }
5403        set_incmdpos(false);
5404        zshlex();
5405    } else if csh || isset(CSHJUNKIELOOPS) {
5406        // c:1185 — `par_save_list(cmplx);`
5407        par_save_list_wordcode(cmplx);
5408        if tok() != ZEND {
5409            zerr("missing `end`");
5410            return;
5411        }
5412        set_incmdpos(false);
5413        zshlex();
5414    } else if unset(SHORTLOOPS) {
5415        zerr("short loop form requires SHORTLOOPS");
5416    } else {
5417        // c:1193 — `par_save_list1(cmplx);`
5418        par_save_list1_wordcode(cmplx);
5419    }
5420}
5421
5422/// `select` shares par_for body (c:983-985 routes SELECT to par_for).
5423pub fn par_select_wordcode(cmplx: &mut i32) {
5424    par_for_wordcode(cmplx);
5425}
5426
5427/// Port of `par_case(int *cmplx)` from `Src/parse.c:1208-1400`.
5428pub fn par_case_wordcode(_cmplx: &mut i32) {
5429    // c:1211 — `int oecused = ecused, brflag, p, pp, palts, type, nalts;`
5430    let _oecused = ECUSED.get() as usize;
5431    let brflag: bool;
5432    let p: usize;
5433    let mut pp: usize;
5434    let mut palts: usize;
5435    let mut r#type: wordcode;
5436    let mut nalts: u32;
5437    // c:1212 — `int ona, onc;`
5438    let ona: bool;
5439    let onc: i32;
5440
5441    // c:1214 — `p = ecadd(0);`
5442    p = ecadd(0);
5443
5444    // c:1216 — `incmdpos = 0;`
5445    set_incmdpos(false);
5446    // c:1217 — `zshlex();`
5447    zshlex();
5448    // c:1218-1219 — `if (tok != STRING) YYERRORV(oecused);`
5449    if tok() != STRING_LEX {
5450        zerr("par_case: expected scrutinee");
5451        return;
5452    }
5453    // c:1220 — `ecstr(tokstr);`
5454    ecstr(&tokstr().unwrap_or_default());
5455
5456    // c:1222 — `incmdpos = 1;`
5457    set_incmdpos(true);
5458    // c:1223-1224 — `ona = noaliases; onc = nocorrect;`
5459    ona = noaliases();
5460    onc = nocorrect();
5461    // c:1225 — `noaliases = nocorrect = 1;`
5462    set_noaliases(true);
5463    set_nocorrect(1);
5464    // c:1226 — `zshlex();`
5465    zshlex();
5466    // c:1227-1228 — `while (tok == SEPER) zshlex();`
5467    while tok() == SEPER {
5468        zshlex();
5469    }
5470    // c:1229 — `if (!(tok == STRING && !strcmp(tokstr, "in")) && tok != INBRACE)`
5471    if !(tok() == STRING_LEX && tokstr().as_deref() == Some("in")) && tok() != INBRACE_TOK {
5472        // c:1231-1233 — restore noaliases/nocorrect + ERROR
5473        set_noaliases(ona);
5474        set_nocorrect(onc);
5475        zerr("par_case: expected `in` or `{`");
5476        return;
5477    }
5478    // c:1235 — `brflag = (tok == INBRACE);`
5479    brflag = tok() == INBRACE_TOK;
5480    // c:1236 — `incasepat = 1;`
5481    set_incasepat(1);
5482    // c:1237 — `incmdpos = 0;`
5483    set_incmdpos(false);
5484    // c:1238-1239 — `noaliases = ona; nocorrect = onc;`
5485    set_noaliases(ona);
5486    set_nocorrect(onc);
5487    // c:1240 — `zshlex();`
5488    zshlex();
5489
5490    // c:1242 — `for (;;) {`
5491    'arms: loop {
5492        // c:1243 — `char *str;`
5493        let mut str: String;
5494        // c:1244 — `int skip_zshlex;`
5495        let skip_zshlex: bool;
5496
5497        // c:1246-1247 — `while (tok == SEPER) zshlex();`
5498        while tok() == SEPER {
5499            zshlex();
5500        }
5501        // c:1248-1249 — `if (tok == OUTBRACE) break;`
5502        if tok() == OUTBRACE_TOK {
5503            break 'arms;
5504        }
5505        // c:1250-1251 — `if (tok == INPAR) zshlex();`
5506        if tok() == INPAR_TOK {
5507            zshlex();
5508        }
5509        // c:1252-1254 — `if (tok == BAR) { str = ""; skip_zshlex = 1; }`
5510        if tok() == BAR_TOK {
5511            str = String::new();
5512            skip_zshlex = true;
5513        } else {
5514            // c:1256-1257 — `if (tok != STRING) YYERRORV(oecused);`
5515            if tok() != STRING_LEX {
5516                zerr("par_case: expected pattern");
5517                return;
5518            }
5519            // c:1258-1259 — `if (!strcmp(tokstr, "esac")) break;`
5520            if tokstr().as_deref() == Some("esac") {
5521                break 'arms;
5522            }
5523            // c:1260 — `str = dupstring(tokstr);`
5524            str = tokstr().unwrap_or_default();
5525            // c:1261 — `skip_zshlex = 0;`
5526            skip_zshlex = false;
5527        }
5528        // c:1263 — `type = WC_CASE_OR;`
5529        r#type = WC_CASE_OR;
5530        // c:1264-1266 — `pp = ecadd(0); palts = ecadd(0); nalts = 0;`
5531        pp = ecadd(0);
5532        palts = ecadd(0);
5533        nalts = 0;
5534        // c:1300 — `incasepat = -1;`
5535        set_incasepat(-1);
5536        // c:1301 — `incmdpos = 1;`
5537        set_incmdpos(true);
5538        // c:1302-1303 — `if (!skip_zshlex) zshlex();`
5539        if !skip_zshlex {
5540            zshlex();
5541        }
5542        // c:1304 — `for (;;) {`
5543        loop {
5544            // c:1305-1313 — `if (tok == OUTPAR) { ecstr(str);
5545            //   ecadd(ecnpats++); nalts++; incasepat = 0;
5546            //   incmdpos = 1; zshlex(); break; }`
5547            if tok() == OUTPAR_TOK {
5548                ecstr(&str);
5549                let np = ECNPATS.with(|cc| {
5550                    let v = cc.get();
5551                    cc.set(v + 1);
5552                    v
5553                }) as u32;
5554                ecadd(np);
5555                nalts += 1;
5556                set_incasepat(0);
5557                set_incmdpos(true);
5558                zshlex();
5559                break;
5560            }
5561            // c:1314-1320 — `else if (tok == BAR) { ecstr(str);
5562            //   ecadd(ecnpats++); nalts++; incasepat = 1;
5563            //   incmdpos = 0; }`
5564            else if tok() == BAR_TOK {
5565                ecstr(&str);
5566                let np = ECNPATS.with(|cc| {
5567                    let v = cc.get();
5568                    cc.set(v + 1);
5569                    v
5570                }) as u32;
5571                ecadd(np);
5572                nalts += 1;
5573                set_incasepat(1);
5574                set_incmdpos(false);
5575            }
5576            // c:1321-1357 — else { ... `(...)` whole-pattern hack
5577            // (Inpar at str[0]); else YYERRORV. Not yet ported —
5578            // err out on unexpected. }
5579            else {
5580                zerr("par_case: expected `)` or `|`");
5581                return;
5582            }
5583
5584            // c:1359 — `zshlex();`
5585            zshlex();
5586            // c:1360-1377 — switch on next tok.
5587            match tok() {
5588                STRING_LEX => {
5589                    // c:1361-1365
5590                    str = tokstr().unwrap_or_default();
5591                    zshlex();
5592                }
5593                OUTPAR_TOK | BAR_TOK => {
5594                    // c:1367-1371 — empty string
5595                    str = String::new();
5596                }
5597                _ => {
5598                    // c:1374-1376 — `YYERRORV(oecused);`
5599                    zerr("par_case: expected pattern, `)` or `|`");
5600                    return;
5601                }
5602            }
5603        }
5604        // c:1379 — `incasepat = 0;`
5605        set_incasepat(0);
5606        // c:1380 — `par_save_list(cmplx);`
5607        par_save_list_wordcode(_cmplx);
5608        // c:1381-1384 — terminator → arm type
5609        if tok() == SEMIAMP {
5610            r#type = WC_CASE_AND;
5611        } else if tok() == SEMIBAR {
5612            r#type = WC_CASE_TESTAND;
5613        }
5614        // c:1385 — `ecbuf[pp] = WCB_CASE(type, ecused - 1 - pp);`
5615        let used = ECUSED.get() as usize;
5616        ECBUF.with_borrow_mut(|b| {
5617            b[pp] = WCB_CASE(r#type, (used.saturating_sub(1 + pp)) as wordcode);
5618        });
5619        // c:1386 — `ecbuf[palts] = nalts;`
5620        ECBUF.with_borrow_mut(|b| {
5621            b[palts] = nalts;
5622        });
5623        // c:1387-1388 — terminator (ESAC w/o brace OR OUTBRACE w/ brace) → break
5624        if (tok() == ESAC && !brflag) || (tok() == OUTBRACE_TOK && brflag) {
5625            break 'arms;
5626        }
5627        // c:1389-1390 — `if (tok != DSEMI && tok != SEMIAMP && tok != SEMIBAR) YYERRORV;`
5628        if tok() != DSEMI && tok() != SEMIAMP && tok() != SEMIBAR {
5629            zerr("par_case: expected `;;`, `;&`, or `;|`");
5630            return;
5631        }
5632        // c:1391 — `incasepat = 1;`
5633        set_incasepat(1);
5634        // c:1392 — `incmdpos = 0;`
5635        set_incmdpos(false);
5636        // c:1393 — `zshlex();`
5637        zshlex();
5638    }
5639    // c:1395 — `incmdpos = 1;`
5640    set_incmdpos(true);
5641    // c:1396 — `incasepat = 0;`
5642    set_incasepat(0);
5643    // c:1397 — `zshlex();`
5644    zshlex();
5645
5646    // c:1399 — `ecbuf[p] = WCB_CASE(WC_CASE_HEAD, ecused - 1 - p);`
5647    let used = ECUSED.get() as usize;
5648    ECBUF.with_borrow_mut(|b| {
5649        b[p] = WCB_CASE(WC_CASE_HEAD, (used.saturating_sub(1 + p)) as wordcode);
5650    });
5651}
5652
5653/// Port of `par_if(int *cmplx)` from `Src/parse.c:1410-1512`.
5654pub fn par_if_wordcode(cmplx: &mut i32) {
5655    // c:1413 — `int oecused = ecused, p, pp, type, usebrace = 0;`
5656    let _oecused = ECUSED.get() as usize;
5657    let p: usize;
5658    let mut pp: usize = 0;
5659    let mut r#type: wordcode = WC_IF_IF;
5660    let mut usebrace: i32 = 0;
5661    // c:1414 — `enum lextok xtok;`
5662    let mut xtok: lextok;
5663    // c:1415 — `unsigned char nc;`
5664    let nc: u8;
5665    let _ = nc;
5666
5667    // c:1417 — `p = ecadd(0);`
5668    p = ecadd(0);
5669
5670    // c:1419 — `for (;;) {`
5671    loop {
5672        // c:1420 — `xtok = tok;`
5673        xtok = tok();
5674        // c:1421 — `cmdpush(xtok == IF ? CS_IF : CS_ELIF);`
5675        cmdpush(if xtok == IF {
5676            CS_IF as u8
5677        } else {
5678            CS_ELIF as u8
5679        });
5680        // c:1422-1426 — `if (xtok == FI) { incmdpos = 0; zshlex(); break; }`
5681        if xtok == FI {
5682            set_incmdpos(false);
5683            zshlex();
5684            break;
5685        }
5686        // c:1427 — `zshlex();`
5687        zshlex();
5688        // c:1428-1429 — `if (xtok == ELSE) break;`
5689        if xtok == ELSE {
5690            break;
5691        }
5692        // c:1430-1431 — `while (tok == SEPER) zshlex();`
5693        while tok() == SEPER {
5694            zshlex();
5695        }
5696        // c:1432-1435 — `if (!(xtok == IF || xtok == ELIF)) { cmdpop(); YYERRORV; }`
5697        if !(xtok == IF || xtok == ELIF) {
5698            cmdpop();
5699            zerr("par_if: expected `if` or `elif`");
5700            return;
5701        }
5702        // c:1436 — `pp = ecadd(0);`
5703        pp = ecadd(0);
5704        // c:1437 — `type = (xtok == IF ? WC_IF_IF : WC_IF_ELIF);`
5705        r#type = if xtok == IF { WC_IF_IF } else { WC_IF_ELIF };
5706        // c:1438 — `par_save_list(cmplx);` — condition body
5707        par_save_list_wordcode(cmplx);
5708        // c:1439 — `incmdpos = 1;`
5709        set_incmdpos(true);
5710        // c:1440-1443 — `if (tok == ENDINPUT) { cmdpop(); YYERRORV; }`
5711        if tok() == ENDINPUT {
5712            cmdpop();
5713            zerr("par_if: unexpected end-of-input after condition");
5714            return;
5715        }
5716        // c:1444-1445 — `while (tok == SEPER) zshlex();`
5717        while tok() == SEPER {
5718            zshlex();
5719        }
5720        // c:1446 — `xtok = FI;` — pre-set so the post-loop check works
5721        xtok = FI;
5722        // c:1447 — `nc = cmdstack[cmdsp - 1] == CS_IF ? CS_IFTHEN : CS_ELIFTHEN;`
5723        // (Not tracked separately in zshrs cmdstack — derive from cur top
5724        // by reading CMDSTACK; for safety use CS_IFTHEN as default.)
5725        // We don't have a way to read top easily — match by tracking
5726        // whether we just pushed CS_IF or CS_ELIF.
5727        // For wordcode emission this only affects cmdstack debug output;
5728        // not the emitted wordcode. Use CS_IFTHEN.
5729        let nc_local: u8 = CS_IFTHEN as u8;
5730        if tok() == THEN {
5731            // c:1448-1456 — THEN branch
5732            // c:1449 — `usebrace = 0;`
5733            usebrace = 0;
5734            // c:1450 — `cmdpop();`
5735            cmdpop();
5736            // c:1451 — `cmdpush(nc);`
5737            cmdpush(nc_local);
5738            // c:1452 — `zshlex();`
5739            zshlex();
5740            // c:1453 — `par_save_list(cmplx);` — then body
5741            par_save_list_wordcode(cmplx);
5742            // c:1454 — `ecbuf[pp] = WCB_IF(type, ecused - 1 - pp);`
5743            let used = ECUSED.get() as usize;
5744            ECBUF.with_borrow_mut(|b| {
5745                b[pp] = WCB_IF(r#type, (used.saturating_sub(1 + pp)) as wordcode);
5746            });
5747            // c:1455 — `incmdpos = 1;`
5748            set_incmdpos(true);
5749            // c:1456 — `cmdpop();`
5750            cmdpop();
5751        } else if tok() == INBRACE_TOK {
5752            // c:1457-1473 — INBRACE branch
5753            // c:1458 — `usebrace = 1;`
5754            usebrace = 1;
5755            // c:1459 — `cmdpop();`
5756            cmdpop();
5757            // c:1460 — `cmdpush(nc);`
5758            cmdpush(nc_local);
5759            // c:1461 — `zshlex();`
5760            zshlex();
5761            // c:1462 — `par_save_list(cmplx);`
5762            par_save_list_wordcode(cmplx);
5763            // c:1463-1466 — `if (tok != OUTBRACE) { cmdpop(); YYERRORV; }`
5764            if tok() != OUTBRACE_TOK {
5765                cmdpop();
5766                zerr("par_if: expected `}`");
5767                return;
5768            }
5769            // c:1467 — `ecbuf[pp] = WCB_IF(type, ecused - 1 - pp);`
5770            let used = ECUSED.get() as usize;
5771            ECBUF.with_borrow_mut(|b| {
5772                b[pp] = WCB_IF(r#type, (used.saturating_sub(1 + pp)) as wordcode);
5773            });
5774            // c:1469 — `zshlex();`
5775            zshlex();
5776            // c:1470 — `incmdpos = 1;`
5777            set_incmdpos(true);
5778            // c:1471-1472 — `if (tok == SEPER) break;`
5779            if tok() == SEPER {
5780                break;
5781            }
5782            // c:1473 — `cmdpop();`
5783            cmdpop();
5784        } else if unset(SHORTLOOPS) {
5785            // c:1474-1476 — `cmdpop(); YYERRORV;`
5786            cmdpop();
5787            zerr("par_if: short body requires SHORTLOOPS");
5788            return;
5789        } else {
5790            // c:1477-1484 — short loop form
5791            // c:1478 — `cmdpop();`
5792            cmdpop();
5793            // c:1479 — `cmdpush(nc);`
5794            cmdpush(nc_local);
5795            // c:1480 — `par_save_list1(cmplx);`
5796            par_save_list1_wordcode(cmplx);
5797            // c:1481 — `ecbuf[pp] = WCB_IF(type, ecused - 1 - pp);`
5798            let used = ECUSED.get() as usize;
5799            ECBUF.with_borrow_mut(|b| {
5800                b[pp] = WCB_IF(r#type, (used.saturating_sub(1 + pp)) as wordcode);
5801            });
5802            // c:1482 — `incmdpos = 1;`
5803            set_incmdpos(true);
5804            // c:1483 — `break;`
5805            break;
5806        }
5807    }
5808    // c:1486 — `cmdpop();`
5809    cmdpop();
5810    // c:1487 — `if (xtok == ELSE || tok == ELSE) {`
5811    if xtok == ELSE || tok() == ELSE {
5812        // c:1488 — `pp = ecadd(0);`
5813        pp = ecadd(0);
5814        // c:1489 — `cmdpush(CS_ELSE);`
5815        cmdpush(CS_ELSE as u8);
5816        // c:1490-1491 — `while (tok == SEPER) zshlex();`
5817        while tok() == SEPER {
5818            zshlex();
5819        }
5820        // c:1492-1498 — `if (tok == INBRACE && usebrace) { ... } else { ... }`
5821        if tok() == INBRACE_TOK && usebrace != 0 {
5822            // c:1493 — `zshlex();`
5823            zshlex();
5824            // c:1494 — `par_save_list(cmplx);`
5825            par_save_list_wordcode(cmplx);
5826            // c:1495-1498 — `if (tok != OUTBRACE) { cmdpop(); YYERRORV; }`
5827            if tok() != OUTBRACE_TOK {
5828                cmdpop();
5829                zerr("par_if: else expected `}`");
5830                return;
5831            }
5832        } else {
5833            // c:1500 — `par_save_list(cmplx);`
5834            par_save_list_wordcode(cmplx);
5835            // c:1501-1504 — `if (tok != FI) { cmdpop(); YYERRORV; }`
5836            if tok() != FI {
5837                cmdpop();
5838                zerr("par_if: else expected `fi`");
5839                return;
5840            }
5841        }
5842        // c:1506 — `incmdpos = 0;`
5843        set_incmdpos(false);
5844        // c:1507 — `ecbuf[pp] = WCB_IF(WC_IF_ELSE, ecused - 1 - pp);`
5845        let used = ECUSED.get() as usize;
5846        ECBUF.with_borrow_mut(|b| {
5847            b[pp] = WCB_IF(WC_IF_ELSE, (used.saturating_sub(1 + pp)) as wordcode);
5848        });
5849        // c:1508 — `zshlex();`
5850        zshlex();
5851        // c:1509 — `cmdpop();`
5852        cmdpop();
5853    }
5854    // c:1511 — `ecbuf[p] = WCB_IF(WC_IF_HEAD, ecused - 1 - p);`
5855    let used = ECUSED.get() as usize;
5856    ECBUF.with_borrow_mut(|b| {
5857        b[p] = WCB_IF(WC_IF_HEAD, (used.saturating_sub(1 + p)) as wordcode);
5858    });
5859}
5860
5861/// Port of `par_while(int *cmplx)` from `Src/parse.c:1520-1557`.
5862pub fn par_while_wordcode(cmplx: &mut i32) {
5863    // c:1523 — `int oecused = ecused, p;`
5864    let _oecused = ECUSED.get() as usize;
5865    let p: usize;
5866    // c:1524 — `int type = (tok == UNTIL ? WC_WHILE_UNTIL : WC_WHILE_WHILE);`
5867    let r#type: wordcode = if tok() == UNTIL {
5868        WC_WHILE_UNTIL
5869    } else {
5870        WC_WHILE_WHILE
5871    };
5872
5873    // c:1526 — `p = ecadd(0);`
5874    p = ecadd(0);
5875    // c:1527 — `zshlex();`
5876    zshlex();
5877    // c:1528 — `par_save_list(cmplx);` — condition.
5878    par_save_list_wordcode(cmplx);
5879    // c:1529 — `incmdpos = 1;`
5880    set_incmdpos(true);
5881    // c:1530-1531 — `while (tok == SEPER) zshlex();`
5882    while tok() == SEPER {
5883        zshlex();
5884    }
5885    // c:1532-1545 — body dispatch (inlined in C; we factor via
5886    // par_loop_body_wordcode since for/while/repeat share this
5887    // identical block).
5888    if tok() == DOLOOP {
5889        // c:1533 — `zshlex();`
5890        zshlex();
5891        // c:1534 — `par_save_list(cmplx);`
5892        par_save_list_wordcode(cmplx);
5893        // c:1535-1536 — `if (tok != DONE) YYERRORV(oecused);`
5894        if tok() != DONE {
5895            zerr("par_while: expected `done`");
5896            return;
5897        }
5898        // c:1537 — `incmdpos = 0;`
5899        set_incmdpos(false);
5900        // c:1538 — `zshlex();`
5901        zshlex();
5902    } else if tok() == INBRACE_TOK {
5903        // c:1540 — `zshlex();`
5904        zshlex();
5905        // c:1541 — `par_save_list(cmplx);`
5906        par_save_list_wordcode(cmplx);
5907        // c:1542-1543 — `if (tok != OUTBRACE) YYERRORV(oecused);`
5908        if tok() != OUTBRACE_TOK {
5909            zerr("par_while: expected `}`");
5910            return;
5911        }
5912        // c:1544 — `incmdpos = 0;`
5913        set_incmdpos(false);
5914        // c:1545 — `zshlex();`
5915        zshlex();
5916    } else if isset(CSHJUNKIELOOPS) {
5917        // c:1546-1550
5918        par_save_list_wordcode(cmplx);
5919        if tok() != ZEND {
5920            zerr("par_while: expected `end`");
5921            return;
5922        }
5923        zshlex();
5924    } else if unset(SHORTLOOPS) {
5925        // c:1551-1552 — `YYERRORV(oecused);`
5926        zerr("par_while: short body requires SHORTLOOPS");
5927        return;
5928    } else {
5929        // c:1554 — `par_save_list1(cmplx);`
5930        par_save_list1_wordcode(cmplx);
5931    }
5932
5933    // c:1556 — `ecbuf[p] = WCB_WHILE(type, ecused - 1 - p);`
5934    let used = ECUSED.get() as usize;
5935    ECBUF.with_borrow_mut(|b| {
5936        b[p] = WCB_WHILE(r#type, (used.saturating_sub(1 + p)) as wordcode);
5937    });
5938}
5939
5940/// `until` shares par_while body — tok==UNTIL flips the type.
5941pub fn par_until_wordcode(cmplx: &mut i32) {
5942    par_while_wordcode(cmplx);
5943}
5944
5945/// Port of `par_repeat(int *cmplx)` from `Src/parse.c:1564-1606`.
5946pub fn par_repeat_wordcode(cmplx: &mut i32) {
5947    // c:1567 — `/* ### what to do about inrepeat_ here? */`
5948    // c:1568 — `int oecused = ecused, p;`
5949    let _oecused = ECUSED.get() as usize;
5950    let p: usize;
5951
5952    // c:1570 — `p = ecadd(0);`
5953    p = ecadd(0);
5954
5955    // c:1572 — `incmdpos = 0;`
5956    set_incmdpos(false);
5957    // c:1573 — `zshlex();`
5958    zshlex();
5959    // c:1574-1575 — `if (tok != STRING) YYERRORV(oecused);`
5960    if tok() != STRING_LEX {
5961        zerr("par_repeat: expected count");
5962        return;
5963    }
5964    // c:1576 — `ecstr(tokstr);`
5965    ecstr(&tokstr().unwrap_or_default());
5966    // c:1577 — `incmdpos = 1;`
5967    set_incmdpos(true);
5968    // c:1578 — `zshlex();`
5969    zshlex();
5970    // c:1579-1580 — `while (tok == SEPER) zshlex();`
5971    while tok() == SEPER {
5972        zshlex();
5973    }
5974    // c:1581-1604 — body dispatch (inlined here matching C exactly).
5975    if tok() == DOLOOP {
5976        // c:1582-1587
5977        zshlex();
5978        par_save_list_wordcode(cmplx);
5979        if tok() != DONE {
5980            zerr("par_repeat: expected `done`");
5981            return;
5982        }
5983        set_incmdpos(false);
5984        zshlex();
5985    } else if tok() == INBRACE_TOK {
5986        // c:1589-1594
5987        zshlex();
5988        par_save_list_wordcode(cmplx);
5989        if tok() != OUTBRACE_TOK {
5990            zerr("par_repeat: expected `}`");
5991            return;
5992        }
5993        set_incmdpos(false);
5994        zshlex();
5995    } else if isset(CSHJUNKIELOOPS) {
5996        // c:1596-1599
5997        par_save_list_wordcode(cmplx);
5998        if tok() != ZEND {
5999            zerr("par_repeat: expected `end`");
6000            return;
6001        }
6002        zshlex();
6003    } else if unset(SHORTLOOPS) && unset(SHORTREPEAT) {
6004        // c:1601-1602 — par_repeat needs BOTH SHORTLOOPS and SHORTREPEAT
6005        // unset to refuse short form (more permissive than par_while).
6006        zerr("par_repeat: short body requires SHORTLOOPS or SHORTREPEAT");
6007        return;
6008    } else {
6009        // c:1604 — `par_save_list1(cmplx);`
6010        par_save_list1_wordcode(cmplx);
6011    }
6012
6013    // c:1606 — `ecbuf[p] = WCB_REPEAT(ecused - 1 - p);`
6014    let used = ECUSED.get() as usize;
6015    ECBUF.with_borrow_mut(|b| {
6016        b[p] = WCB_REPEAT((used.saturating_sub(1 + p)) as wordcode);
6017    });
6018}
6019
6020/// Port of `par_funcdef(int *cmplx)` from `Src/parse.c:1672-1779`.
6021///
6022/// The `function NAME { ... }` form. Emits a WCB_FUNCDEF header
6023/// followed by a names-count slot, the names themselves, four
6024/// metadata slots (string-area start, string-area length, npats,
6025/// do_tracing), then the body wordcode, then WCB_END.
6026///
6027/// Critical: saves/resets `ecnpats` + `ecssub` + `ecsoffs` around
6028/// the body parse so per-function pattern counts don't leak into
6029/// the enclosing scope's `ecnpats` accumulator (parse.c:1723-1758).
6030pub fn par_funcdef_wordcode(cmplx: &mut i32) {
6031    // c:1674 — `int oecused = ecused, num = 0, onp, p, c = 0;`
6032    let _oecused = ECUSED.get() as usize;
6033    let mut num: i32 = 0;
6034    let onp: i32;
6035    let p: usize;
6036    let mut c: i32 = 0;
6037    // c:1675 — `int so, oecssub = ecssub;`
6038    let so: i32;
6039    let oecssub = ECSSUB.get();
6040    // c:1676 — `zlong oldlineno = lineno;`
6041    let oldlineno = lineno();
6042    // c:1677 — `int do_tracing = 0;`
6043    let mut do_tracing: i32 = 0;
6044
6045    // c:1679 — `lineno = 0;`
6046    set_lineno(0);
6047    // c:1680 — `nocorrect = 1;`
6048    set_nocorrect(1);
6049    // c:1681 — `incmdpos = 0;`
6050    set_incmdpos(false);
6051    // c:1682 — `zshlex();`
6052    zshlex();
6053
6054    // c:1684 — `p = ecadd(0);`
6055    p = ecadd(0);
6056    // c:1685 — `ecadd(0); /* p + 1 */`
6057    let p1 = ecadd(0);
6058
6059    // c:1687-1699 — `Consume an initial (-T), (--), or (-T --).`
6060    // c:1690 — `if (tok == STRING && tokstr[0] == Dash) {`
6061    if tok() == STRING_LEX {
6062        let s = tokstr().unwrap_or_default();
6063        let bytes = s.as_bytes();
6064        // C: `tokstr[0] == Dash` (Dash = 0x9b = 0xc2 0x9b in UTF-8).
6065        // First byte of UTF-8 `\u{9b}` is 0xc2; the char `'-'` is 0x2d.
6066        // Match either form.
6067        let first_is_dash = (bytes.len() >= 2 && bytes[0] == 0xc2 && bytes[1] == 0x9b)
6068            || (bytes.len() >= 1 && bytes[0] == b'-');
6069        if first_is_dash {
6070            // c:1691-1694 — `if (tokstr[1] == 'T' && !tokstr[2]) { ++do_tracing; zshlex(); }`
6071            // After the leading dash byte(s), check remaining bytes.
6072            let after_dash = if bytes.len() >= 2 && bytes[0] == 0xc2 && bytes[1] == 0x9b {
6073                &bytes[2..]
6074            } else {
6075                &bytes[1..]
6076            };
6077            if after_dash.len() == 1 && after_dash[0] == b'T' {
6078                do_tracing += 1;
6079                zshlex();
6080            }
6081            // c:1695-1698 — `if (tok == STRING && tokstr[0] == Dash &&
6082            //                  tokstr[1] == Dash && !tokstr[2]) zshlex();`
6083            if tok() == STRING_LEX {
6084                let s2 = tokstr().unwrap_or_default();
6085                let b2 = s2.as_bytes();
6086                let mut idx = 0;
6087                let mut dashes = 0;
6088                while idx < b2.len() && dashes < 2 {
6089                    if b2[idx] == 0xc2 && idx + 1 < b2.len() && b2[idx + 1] == 0x9b {
6090                        idx += 2;
6091                        dashes += 1;
6092                    } else if b2[idx] == b'-' {
6093                        idx += 1;
6094                        dashes += 1;
6095                    } else {
6096                        break;
6097                    }
6098                }
6099                if dashes == 2 && idx == b2.len() {
6100                    zshlex();
6101                }
6102            }
6103        }
6104    }
6105
6106    // c:1701-1709 — names loop.
6107    // `while (tok == STRING) { if ((*tokstr == Inbrace || *tokstr == '{')
6108    //   && !tokstr[1]) { tok = INBRACE; break; } ecstr(tokstr); num++; zshlex(); }`
6109    while tok() == STRING_LEX {
6110        let s = tokstr().unwrap_or_default();
6111        let bytes = s.as_bytes();
6112        // First byte tests for Inbrace marker (0x8f → UTF-8 `0xc2 0x8f`) or `{`,
6113        // and length-1 check (`!tokstr[1]`).
6114        let is_inbrace_only = (bytes.len() == 1 && bytes[0] == b'{')
6115            || (bytes.len() == 2 && bytes[0] == 0xc2 && bytes[1] == 0x8f);
6116        if is_inbrace_only {
6117            set_tok(INBRACE_TOK);
6118            break;
6119        }
6120        ecstr(&s);
6121        num += 1;
6122        zshlex();
6123    }
6124
6125    // c:1711-1714 — four metadata placeholder slots.
6126    let m2 = ecadd(0);
6127    let m3 = ecadd(0);
6128    let m4 = ecadd(0);
6129    let m5 = ecadd(0);
6130
6131    // c:1716 — `nocorrect = 0;`
6132    set_nocorrect(0);
6133    // c:1717 — `incmdpos = 1;`
6134    set_incmdpos(true);
6135    // c:1718-1719 — `if (tok == INOUTPAR) zshlex();`
6136    if tok() == INOUTPAR {
6137        zshlex();
6138    }
6139    // c:1720-1721 — `while (tok == SEPER) zshlex();`
6140    while tok() == SEPER {
6141        zshlex();
6142    }
6143
6144    // c:1723 — `ecnfunc++;`
6145    ECNFUNC.set(ECNFUNC.get() + 1);
6146    // c:1724 — `ecssub = so = ecsoffs;`
6147    so = ECSOFFS.get();
6148    ECSSUB.set(so);
6149    // c:1725 — `onp = ecnpats;`
6150    onp = ECNPATS.with(|cc| cc.get());
6151    // c:1726 — `ecnpats = 0;`
6152    ECNPATS.with(|cc| cc.set(0));
6153
6154    // c:1728 — `if (tok == INBRACE) {`
6155    if tok() == INBRACE_TOK {
6156        // c:1729 — `zshlex();`
6157        zshlex();
6158        // c:1730 — `par_list(&c);`
6159        par_list_wordcode(&mut c);
6160        // c:1731-1736 — `if (tok != OUTBRACE) { lineno += oldlineno; ... }`
6161        if tok() != OUTBRACE_TOK {
6162            set_lineno(lineno() + oldlineno);
6163            ECNPATS.with(|cc| cc.set(onp));
6164            ECSSUB.set(oecssub);
6165            zerr("par_funcdef: expected `}`");
6166            return;
6167        }
6168        // c:1737-1740 — `if (num == 0) { incmdpos = 0; }`
6169        if num == 0 {
6170            set_incmdpos(false);
6171        }
6172        // c:1741 — `zshlex();`
6173        zshlex();
6174    } else if unset(SHORTLOOPS) {
6175        // c:1742-1746 — `lineno += oldlineno; ecnpats = onp; ecssub = oecssub; YYERRORV`
6176        set_lineno(lineno() + oldlineno);
6177        ECNPATS.with(|cc| cc.set(onp));
6178        ECSSUB.set(oecssub);
6179        zerr("par_funcdef: short body requires SHORTLOOPS");
6180        return;
6181    } else {
6182        // c:1748 — `par_list1(&c);`
6183        par_list1_wordcode(&mut c);
6184    }
6185
6186    // c:1750 — `ecadd(WCB_END());`
6187    ecadd(WCB_END());
6188    // c:1751-1754 — fill the 4 metadata slots
6189    let cur_sofs = ECSOFFS.get();
6190    let body_npats = ECNPATS.with(|cc| cc.get());
6191    ECBUF.with_borrow_mut(|b| {
6192        b[m2] = (so - oecssub) as wordcode;
6193        b[m3] = (cur_sofs - so) as wordcode;
6194        b[m4] = body_npats as wordcode;
6195        b[m5] = do_tracing as wordcode;
6196    });
6197    // c:1755 — `ecbuf[p + 1] = num;`
6198    ECBUF.with_borrow_mut(|b| {
6199        b[p1] = num as wordcode;
6200    });
6201
6202    // c:1757 — `ecnpats = onp;`
6203    ECNPATS.with(|cc| cc.set(onp));
6204    // c:1758 — `ecssub = oecssub;`
6205    ECSSUB.set(oecssub);
6206    // c:1759 — `ecnfunc++;`
6207    ECNFUNC.set(ECNFUNC.get() + 1);
6208
6209    // c:1761 — `ecbuf[p] = WCB_FUNCDEF(ecused - 1 - p);`
6210    let used = ECUSED.get() as usize;
6211    ECBUF.with_borrow_mut(|b| {
6212        b[p] = WCB_FUNCDEF((used.saturating_sub(1 + p)) as wordcode);
6213    });
6214
6215    // c:1763-1777 — anonymous-function trailing args (num == 0 case).
6216    if num == 0 {
6217        // c:1766 — `int parg = ecadd(0);`
6218        let parg = ecadd(0);
6219        // c:1767 — `ecadd(0);`
6220        ecadd(0);
6221        // c:1768-1772 — `while (tok == STRING) { ecstr(tokstr); num++; zshlex(); }`
6222        while tok() == STRING_LEX {
6223            ecstr(&tokstr().unwrap_or_default());
6224            num += 1;
6225            zshlex();
6226        }
6227        // c:1773-1774 — `if (num > 0) *cmplx = 1;`
6228        if num > 0 {
6229            *cmplx = 1;
6230        }
6231        // c:1775 — `ecbuf[parg] = ecused - parg;`
6232        // c:1776 — `ecbuf[parg+1] = num;`
6233        let used2 = ECUSED.get() as usize;
6234        ECBUF.with_borrow_mut(|b| {
6235            b[parg] = (used2 - parg) as wordcode;
6236            b[parg + 1] = num as wordcode;
6237        });
6238    }
6239    // c:1778 — `lineno += oldlineno;`
6240    set_lineno(lineno() + oldlineno);
6241}
6242
6243/// Size of `struct fdhead` in `wordcode` (u32) units. Used by all
6244/// the header-walk macros below.
6245pub const FDHEAD_WORDS: usize = size_of::<fdhead>() / 4;
6246
6247/// `Src/parse.c:1619-1665`. Handles both `(...)` subshell and
6248/// `{...}` brace group (cursh) plus optional `always { ... }`
6249/// trailing block. C uses a single function with `zsh_construct=1`
6250/// for `{...}` and 0 for `(...)`.
6251pub fn par_subsh_wordcode(cmplx: &mut i32, zsh_construct: i32) {
6252    // c:1621 — `enum lextok otok = tok;`
6253    let otok = tok();
6254    // c:1622 — `int oecused = ecused, p, pp;`
6255    let _oecused = ECUSED.get() as usize;
6256    let p: usize;
6257    let pp: usize;
6258
6259    // c:1624 — `p = ecadd(0);`
6260    p = ecadd(0);
6261    // c:1625 — `/* Extra word only needed for always block */`
6262    // c:1626 — `pp = ecadd(0);`
6263    pp = ecadd(0);
6264    // c:1627 — `zshlex();`
6265    zshlex();
6266    // c:1628 — `par_list(cmplx);`
6267    par_list_wordcode(cmplx);
6268    // c:1629 — `ecadd(WCB_END());`
6269    ecadd(WCB_END());
6270    // c:1630-1631 — `if (tok != ((otok == INPAR) ? OUTPAR : OUTBRACE))
6271    // YYERRORV(oecused);`
6272    if tok()
6273        != (if otok == INPAR_TOK {
6274            OUTPAR_TOK
6275        } else {
6276            OUTBRACE_TOK
6277        })
6278    {
6279        zerr("par_subsh: missing closing token");
6280        return;
6281    }
6282    // c:1632 — `incmdpos = !zsh_construct;`
6283    set_incmdpos(zsh_construct == 0);
6284    // c:1633 — `zshlex();`
6285    zshlex();
6286
6287    // c:1635 — `/* Optional always block. No intervening SEPERs allowed. */`
6288    // c:1636 — `if (otok == INBRACE && tok == STRING && !strcmp(tokstr, "always")) {`
6289    if otok == INBRACE_TOK && tok() == STRING_LEX && tokstr().as_deref() == Some("always") {
6290        // c:1637 — `ecbuf[pp] = WCB_TRY(ecused - 1 - pp);`
6291        let used = ECUSED.get() as usize;
6292        ECBUF.with_borrow_mut(|b| {
6293            b[pp] = WCB_TRY((used.saturating_sub(1 + pp)) as wordcode);
6294        });
6295        // c:1638 — `incmdpos = 1;`
6296        set_incmdpos(true);
6297        // c:1639-1641 — `do { zshlex(); } while (tok == SEPER);`
6298        loop {
6299            zshlex();
6300            if tok() != SEPER {
6301                break;
6302            }
6303        }
6304
6305        // c:1643-1644 — `if (tok != INBRACE) YYERRORV(oecused);`
6306        if tok() != INBRACE_TOK {
6307            zerr("par_subsh: 'always' expects `{`");
6308            return;
6309        }
6310        // c:1645 — `cmdpop();`
6311        cmdpop();
6312        // c:1646 — `cmdpush(CS_ALWAYS);`
6313        cmdpush(CS_ALWAYS as u8);
6314
6315        // c:1648 — `zshlex();`
6316        zshlex();
6317        // c:1649 — `par_save_list(cmplx);`
6318        par_save_list_wordcode(cmplx);
6319        // c:1650-1651 — `while (tok == SEPER) zshlex();`
6320        while tok() == SEPER {
6321            zshlex();
6322        }
6323
6324        // c:1653 — `incmdpos = 1;`
6325        set_incmdpos(true);
6326
6327        // c:1655-1656 — `if (tok != OUTBRACE) YYERRORV(oecused);`
6328        if tok() != OUTBRACE_TOK {
6329            zerr("par_subsh: 'always' block missing `}`");
6330            return;
6331        }
6332        // c:1657 — `zshlex();`
6333        zshlex();
6334        // c:1658 — `ecbuf[p] = WCB_TRY(ecused - 1 - p);`
6335        let used = ECUSED.get() as usize;
6336        ECBUF.with_borrow_mut(|b| {
6337            b[p] = WCB_TRY((used.saturating_sub(1 + p)) as wordcode);
6338        });
6339    } else {
6340        // c:1660-1661 — `ecbuf[p] = (otok == INPAR ? WCB_SUBSH(...) : WCB_CURSH(...));`
6341        let used = ECUSED.get() as usize;
6342        let off = used.saturating_sub(1 + p);
6343        ECBUF.with_borrow_mut(|b| {
6344            b[p] = if otok == INPAR_TOK {
6345                WCB_SUBSH(off as wordcode)
6346            } else {
6347                WCB_CURSH(off as wordcode)
6348            };
6349        });
6350    }
6351}
6352
6353/// Port of `par_time(void)` from `Src/parse.c:1787`. `time PIPE`
6354/// emits WCB_TIMED(WC_TIMED_PIPE) + the sublist code; bare `time`
6355/// with no pipeline emits WCB_TIMED(WC_TIMED_EMPTY).
6356pub fn par_time_wordcode() {
6357    // c:1791 — `zshlex();`
6358    zshlex();
6359    // c:1793-1794 — `p = ecadd(0); ecadd(0);`
6360    let p = ecadd(0);
6361    ecadd(0);
6362    // c:1795 — `if ((f = par_sublist2(&c)) < 0)`
6363    let mut c = 0i32;
6364    let f = par_sublist2(&mut c);
6365    match f {
6366        Some(flags) => {
6367            // c:1799 — `ecbuf[p] = WCB_TIMED(WC_TIMED_PIPE);`
6368            ECBUF.with_borrow_mut(|b| {
6369                if p < b.len() {
6370                    b[p] = WCB_TIMED(WC_TIMED_PIPE);
6371                }
6372            });
6373            // c:1800 — `set_sublist_code(p+1, WC_SUBLIST_END, f,
6374            // ecused-2-p, c);`
6375            let used = ECUSED.get() as usize;
6376            let skip = used.saturating_sub(2 + p) as i32;
6377            set_sublist_code(p + 1, WC_SUBLIST_END as i32, flags, skip, c != 0);
6378        }
6379        None => {
6380            // c:1796-1798 — `ecused--; ecbuf[p] = WCB_TIMED(WC_TIMED_EMPTY);`
6381            ECUSED.set((ECUSED.get() - 1).max(0));
6382            ECBUF.with_borrow_mut(|b| {
6383                if p < b.len() {
6384                    b[p] = WCB_TIMED(WC_TIMED_EMPTY);
6385                }
6386            });
6387        }
6388    }
6389}
6390
6391/// Port of `par_dinbrack(void)` from `Src/parse.c:1810`. Wraps
6392/// `par_cond` (the cond-expression emitter at parse.c:2409) with
6393/// the `[[ ... ]]` framing: incond/incmdpos toggles + DOUTBRACK
6394/// expectation.
6395pub fn par_cond_wordcode() {
6396    let oecused = ECUSED.get();
6397    // c:1814 — `incond = 1;`
6398    set_incond(1);
6399    // c:1815 — `incmdpos = 0;`
6400    set_incmdpos(false);
6401    // c:1816 — `zshlex();` past `[[`.
6402    zshlex();
6403    // c:1817 — `par_cond();` — call the no-skip cond-expression
6404    // entry that EMITS WORDCODE (par_cond_top → par_cond_1 →
6405    // par_cond_2 → par_cond_double/triple/multi). NOT the AST
6406    // `par_cond` at parse.rs:4644 which is a misnamed `par_dinbrack`
6407    // that skips `[[` AND `]]` and returns a ZshCommand AST node
6408    // instead of pushing WC_COND opcodes. NOT `parse_cond_expr`
6409    // either — that's also AST-only, returning ZshCond. With
6410    // `parse_cond_expr` here, every `[[ ... ]]` test produced ZERO
6411    // wordcode payload and parity dropped ~148 words on /etc/zshrc.
6412    let _ = par_cond_top();
6413    // c:1818-1819 — `if (tok != DOUTBRACK) YYERRORV(oecused);`
6414    if tok() != DOUTBRACK {
6415        let _ = oecused;
6416        zerr("missing ]]");
6417        return;
6418    }
6419    // c:1820 — `incond = 0;`
6420    set_incond(0);
6421    // c:1821 — `incmdpos = 1;`
6422    set_incmdpos(true);
6423    // c:1822 — `zshlex();` past `]]`.
6424    zshlex();
6425}
6426
6427/// Port of the `case DINPAR:` arm of `par_cmd` from
6428/// `Src/parse.c:1031-1034`:
6429/// ```c
6430/// ecadd(WCB_ARITH());
6431/// ecstr(tokstr);
6432/// zshlex();
6433/// ```
6434/// `(( EXPR ))` arithmetic at command position — emits the ARITH
6435/// opcode followed by the interned EXPR string, then advances past
6436/// the DINPAR token (which already carries the body text).
6437pub fn par_arith_wordcode() {
6438    // c:1032 — `ecadd(WCB_ARITH());`
6439    ecadd(WCB_ARITH());
6440    // c:1033 — `ecstr(tokstr);` — interns the expression string and
6441    // appends its strcode index to the wordcode buffer.
6442    let expr = tokstr().unwrap_or_default();
6443    ecstr(&expr);
6444    // c:1034 — `zshlex();`
6445    zshlex();
6446}
6447
6448/// Port of `par_simple(int *cmplx, int nr)` from
6449/// `Src/parse.c:1836-2227`. Emits WC_SIMPLE + word count +
6450/// interned string offsets. Returns `0` when nothing was emitted,
6451/// otherwise `1 + (number of code words consumed by redirections)`.
6452/// The full C body handles assignments (ENVSTRING/ENVARRAY),
6453/// inline `{var}>file` brace-FDs, prefix modifiers (NOCORRECT etc),
6454/// and `name() { body }` funcdef detection — those paths are
6455/// progressively wired into the AST parser; this wordcode-emitter
6456/// covers the simple `cmd args...` case + interleaved redirs.
6457pub fn par_simple_wordcode(cmplx: &mut i32, mut nr: i32) -> i32 {
6458    // c:1838-1841 — `int oecused = ecused, isnull = 1, r, argc = 0,
6459    //   p, isfunc = 0, sr = 0;`
6460    //   `int c = *cmplx, nrediradd, assignments = 0, ppost = 0,
6461    //   is_typeset = 0;`
6462    // c is the SAVED initial cmplx so INOUTPAR can restore via
6463    // `*cmplx = c;` at c:2070.
6464    let _oecused = ECUSED.get() as usize;
6465    let c_saved = *cmplx;
6466    let mut isnull = true;
6467    let mut argc: u32 = 0;
6468    let mut sr: i32 = 0;
6469    let mut assignments = false;
6470    let mut isfunc = false;
6471
6472    // c:1843 — `r = ecused;` — saves the offset where redirs get
6473    // INSERTED (via ecispace). Each redir shifts later words DOWN
6474    // by ncodes, so the SIMPLE placeholder at `p` (set later) must
6475    // also bump by ncodes when a redir lands. C uses `&r` to pass
6476    // the cursor by reference; Rust uses a mutable local + manual
6477    // bumps after each par_redir_wordcode call.
6478    let mut r: usize = ECUSED.get() as usize;
6479
6480    // c:1844-1919 — pre-cmd loop: NOCORRECT, ENVSTRING (scalar
6481    // assigns), ENVARRAY (array assigns), IS_REDIROP. Loops until
6482    // a non-assignment token is seen.
6483    loop {
6484        match tok() {
6485            NOCORRECT => {
6486                // c:1846-1849
6487                *cmplx = 1;
6488                set_nocorrect(1);
6489            }
6490            ENVSTRING => {
6491                // c:1848-1898 — scalar assignment `name=value` or
6492                // `name+=value`. Emits WCB_ASSIGN(SCALAR, NEW|INC, 0)
6493                // followed by ecstr(name), ecstr(value).
6494                let raw = tokstr().unwrap_or_default();
6495                // Find first of Inbrack / '=' / '+' (the C scan at
6496                // c:1851-1853). Inside Inbrack we skipparens — i.e.
6497                // skip `name[...]` index, then continue.
6498                // c:1851-1853 — `for (ptr = tokstr; *ptr && *ptr != Inbrack
6499                // && *ptr != '=' && *ptr != '+'; ptr++); if (*ptr == Inbrack)
6500                // skipparens(Inbrack, Outbrack, &ptr);`. Walk to the first
6501                // `[`/`=`/`+`/Equals-token, then if we landed on `[`, skip
6502                // the balanced `name[index]` pair via skipparens.
6503                let bytes: Vec<char> = raw.chars().collect();
6504                let raw_str: String = bytes.iter().collect();
6505                let mut idx = 0usize;
6506                while idx < bytes.len() {
6507                    let ch = bytes[idx];
6508                    if ch == '\u{91}' /* Inbrack */
6509                        || ch == '=' || ch == '+' || ch == '\u{8d}'
6510                    /* Equals */
6511                    {
6512                        break;
6513                    }
6514                    idx += 1;
6515                }
6516                if idx < bytes.len() && bytes[idx] == '\u{91}'
6517                /* Inbrack */
6518                {
6519                    // c:1855 — `skipparens(Inbrack, Outbrack, &ptr);`.
6520                    let byte_off: usize = bytes[..idx].iter().map(|c| c.len_utf8()).sum();
6521                    let mut cursor: &str = &raw_str[byte_off..];
6522                    let _ = crate::ported::utils::skipparens('\u{91}', '\u{92}', &mut cursor);
6523                    let consumed = raw_str.len() - byte_off - cursor.len();
6524                    let advance_chars = raw_str[byte_off..byte_off + consumed].chars().count();
6525                    idx += advance_chars;
6526                    // Continue scanning for `=` / `+` after the `]`.
6527                    while idx < bytes.len() {
6528                        let ch = bytes[idx];
6529                        if ch == '=' || ch == '+' || ch == '\u{8d}' {
6530                            break;
6531                        }
6532                        idx += 1;
6533                    }
6534                }
6535                let is_inc = idx < bytes.len() && bytes[idx] == '+';
6536                // c:1856-1858 — `if (*ptr == '+') { *ptr++ = '\0';
6537                // ecadd(WCB_ASSIGN(SCALAR, INC, 0)); } else WCB_NEW`
6538                // C nulls the `+` AT THAT POSITION then advances ptr.
6539                // `name` is bytes BEFORE the `+`, NOT including it.
6540                let name_end = idx;
6541                if is_inc {
6542                    idx += 1;
6543                }
6544                let flag = if is_inc { WC_ASSIGN_INC } else { WC_ASSIGN_NEW };
6545                ecadd(WCB_ASSIGN(WC_ASSIGN_SCALAR, flag, 0));
6546                // c:1860 — `if (*ptr == '=') { *ptr = '\0'; str = ptr + 1; }
6547                //          else equalsplit(tokstr, &str);`
6548                let name: String = bytes[..name_end].iter().collect();
6549                let str_off = if idx < bytes.len() && (bytes[idx] == '=' || bytes[idx] == '\u{8d}')
6550                {
6551                    idx + 1
6552                } else {
6553                    idx
6554                };
6555                let value: String = bytes[str_off..].iter().collect();
6556                // c:1866-1877 — scan value for `=(`/`<(`/`>(` (proc
6557                // subst); if found, bump cmplx (suppresses Z_SIMPLE).
6558                let vbytes: Vec<char> = value.chars().collect();
6559                for (i, ch) in vbytes.iter().enumerate() {
6560                    if i + 1 < vbytes.len() && vbytes[i + 1] == '\u{88}'
6561                    /* Inpar */
6562                    {
6563                        if *ch == '\u{8d}' /* Equals */
6564                            || *ch == '\u{94}' /* Inang */
6565                            || *ch == '\u{96}'
6566                        /* OutangProc */
6567                        {
6568                            *cmplx = 1;
6569                            break;
6570                        }
6571                    }
6572                }
6573                ecstr(&name);
6574                ecstr(&value);
6575                isnull = false;
6576                assignments = true;
6577            }
6578            ENVARRAY => {
6579                // c:1883-1908 — array assignment `name=( ... )` in the
6580                // pre-cmd loop (no `typeset`-style typeset_force flag).
6581                // c:1884 — `int oldcmdpos = incmdpos, n, type2;`
6582                let oldcmdpos = incmdpos();
6583                let n: u32;
6584                let type2: wordcode;
6585                let p: usize;
6586
6587                // c:1886-1889 — `array setting is cmplx because it can
6588                //   contain process substitutions`
6589                // c:1890 — `*cmplx = c = 1;`
6590                *cmplx = 1;
6591                // c:1891 — `p = ecadd(0);`
6592                p = ecadd(0);
6593                // c:1892 — `incmdpos = 0;`
6594                set_incmdpos(false);
6595                // c:1893-1897 — `+=` detection: if tokstr ends in `+`,
6596                // strip the `+` and use WC_ASSIGN_INC; else WC_ASSIGN_NEW.
6597                let raw = tokstr().unwrap_or_default();
6598                let (name, t2) = if raw.ends_with('+') {
6599                    (raw[..raw.len() - 1].to_string(), WC_ASSIGN_INC)
6600                } else {
6601                    (raw.clone(), WC_ASSIGN_NEW)
6602                };
6603                type2 = t2;
6604                // c:1898 — `ecstr(tokstr);` (tokstr now NUL-trimmed)
6605                ecstr(&name);
6606                // c:1899 — `cmdpush(CS_ARRAY);`
6607                cmdpush(CS_ARRAY as u8);
6608                // c:1900 — `zshlex();`
6609                zshlex();
6610                // c:1901 — `n = par_nl_wordlist();`
6611                n = par_nl_wordlist_wordcode();
6612                // c:1902 — `ecbuf[p] = WCB_ASSIGN(WC_ASSIGN_ARRAY, type2, n);`
6613                ECBUF.with_borrow_mut(|b| {
6614                    b[p] = WCB_ASSIGN(WC_ASSIGN_ARRAY, type2, n);
6615                });
6616                // c:1903 — `cmdpop();`
6617                cmdpop();
6618                // c:1904-1905 — `if (tok != OUTPAR) YYERROR(oecused);`
6619                if tok() != OUTPAR_TOK {
6620                    zerr("par_simple: expected `)' after array assignment");
6621                    return 0;
6622                }
6623                // c:1906 — `incmdpos = oldcmdpos;`
6624                set_incmdpos(oldcmdpos);
6625                // c:1907 — `isnull = 0;`
6626                isnull = false;
6627                // c:1908 — `assignments = 1;`
6628                assignments = true;
6629            }
6630            t if IS_REDIROP(t) => {
6631                // c:1900-1904 — `*cmplx = c = 1; nr += par_redir(&r,
6632                // NULL); continue;`. The wordcode-emitting redir is
6633                // distinct from the AST par_redir — it INSERTS
6634                // WCB_REDIR + fd + ecstrcode(name) at offset `r`
6635                // via ecispace, shifting any later words down.
6636                *cmplx = 1;
6637                let added = par_redir_wordcode(&mut r, None);
6638                if added == 0 {
6639                    break;
6640                }
6641                nr += added;
6642                continue;
6643            }
6644            _ => break,
6645        }
6646        zshlex(); // c:1907 `zshlex();`
6647    }
6648
6649    // c:1920-1921 — `if (tok == AMPER || tok == AMPERBANG) YYERROR;`
6650    if tok() == AMPER || tok() == AMPERBANG {
6651        zerr("par_simple: unexpected &");
6652        return 0;
6653    }
6654
6655    // c:1923 — `p = ecadd(WCB_SIMPLE(0));`
6656    let mut p = ecadd(WCB_SIMPLE(0));
6657
6658    // c:1924-2105 — main words loop. is_typeset tracks whether the
6659    // outer command was `typeset`/`export`/etc. so the final
6660    // placeholder gets WCB_TYPESET instead of WCB_SIMPLE.
6661    let mut is_typeset = false;
6662    let mut postassigns: u32 = 0;
6663    let mut ppost: usize = 0;
6664    loop {
6665        match tok() {
6666            STRING_LEX | TYPESET => {
6667                // c:1926 — `int redir_var = 0;`
6668                let mut redir_var = false;
6669                // c:1928-1929 — `*cmplx = 1; incmdpos = 0;`
6670                *cmplx = 1;
6671                set_incmdpos(false);
6672                // c:1931-1932 — TYPESET → intypeset = is_typeset = 1.
6673                if tok() == TYPESET {
6674                    set_intypeset(true);
6675                    is_typeset = true;
6676                }
6677                let s = tokstr().unwrap_or_default();
6678                // c:1934-1974 — `{var}>file` brace-FD detection.
6679                // `if (!isset(IGNOREBRACES) && *tokstr == Inbrace)`
6680                let bytes = s.as_bytes();
6681                let first_is_inbrace = (bytes.len() >= 2 && bytes[0] == 0xc2 && bytes[1] == 0x8f)
6682                    || (bytes.len() >= 1 && bytes[0] == b'{');
6683                if !isset(IGNOREBRACES) && first_is_inbrace {
6684                    // c:1937-1938 — `char *eptr = tokstr + strlen(tokstr) - 1;`
6685                    //                `char *ptr = eptr;`
6686                    // C tests `*eptr == Outbrace` (0x90 marker or `}`) AND
6687                    // there's content between `{` and `}` (`ptr > tokstr + 1`).
6688                    let last_two_outbrace = bytes.len() >= 2
6689                        && (bytes.ends_with(&[0xc2, 0x90]) || bytes.last() == Some(&b'}'));
6690                    let opener_len = if bytes.len() >= 2 && bytes[0] == 0xc2 && bytes[1] == 0x8f {
6691                        2
6692                    } else {
6693                        1
6694                    };
6695                    let closer_len = if bytes.len() >= 2 && bytes.ends_with(&[0xc2, 0x90]) {
6696                        2
6697                    } else if bytes.last() == Some(&b'}') {
6698                        1
6699                    } else {
6700                        0
6701                    };
6702                    if last_two_outbrace && bytes.len() > opener_len + closer_len {
6703                        // c:1944 — `if (itype_end(tokstr+1, IIDENT, 0) >= ptr)`
6704                        // Inner content is the identifier between `{` and `}`.
6705                        let inner_start = opener_len;
6706                        let inner_end = bytes.len() - closer_len;
6707                        let inner = &s[inner_start..inner_end];
6708                        if !inner.is_empty() && crate::ported::params::isident(inner) {
6709                            // c:1946-1948 — `char *idstring = dupstrpfx(...);`
6710                            //                `redir_var = 1; zshlex();`
6711                            let idstring = inner.to_string();
6712                            redir_var = true;
6713                            zshlex();
6714                            // c:1953-1958 — `if (IS_REDIROP(tok) && tokfd == -1)
6715                            //   { *cmplx = c = 1; nrediradd = par_redir(&r, id);
6716                            //     p += nrediradd; sr += nrediradd; }`
6717                            if IS_REDIROP(tok()) && tokfd() == -1 {
6718                                *cmplx = 1;
6719                                let nrediradd = par_redir_wordcode(&mut r, Some(&idstring));
6720                                p += nrediradd as usize;
6721                                sr += nrediradd;
6722                            } else if postassigns > 0 {
6723                                // c:1959-1966 — postassigns path: emit
6724                                // WCB_ASSIGN(SCALAR, INC, 0) + name + ""
6725                                postassigns += 1;
6726                                ecadd(WCB_ASSIGN(WC_ASSIGN_SCALAR, WC_ASSIGN_INC, 0));
6727                                ecstr(&s);
6728                                ecstr("");
6729                            } else {
6730                                // c:1968-1972 — `else { ecstr(toksave); argc++; }`
6731                                ecstr(&s);
6732                                argc += 1;
6733                            }
6734                        }
6735                    }
6736                }
6737                if !redir_var {
6738                    // c:1977-1996 — normal (non-redir-var) STRING/TYPESET.
6739                    if postassigns > 0 {
6740                        // c:1979-1989 — typeset with bare-name arg → INC
6741                        postassigns += 1;
6742                        ecadd(WCB_ASSIGN(WC_ASSIGN_SCALAR, WC_ASSIGN_INC, 0));
6743                        ecstr(&s);
6744                        ecstr("");
6745                    } else {
6746                        ecstr(&s);
6747                        argc += 1;
6748                    }
6749                    zshlex();
6750                }
6751                isnull = false;
6752            }
6753            ENVSTRING => {
6754                // c:2005-2026 — mid-cmd ENVSTRING (under intypeset
6755                // context). Emits WCB_ASSIGN(SCALAR, NEW, 0) then
6756                // ecstr(name) + ecstr(value), tracking the first
6757                // postassign offset in `ppost` (which the trailing
6758                // WCB_TYPESET header points to).
6759                if postassigns == 0 {
6760                    ppost = ecadd(0);
6761                }
6762                postassigns += 1;
6763                // c:2010-2014 — `for (ptr = tokstr; *ptr && *ptr != Inbrack
6764                // && *ptr != '=' && *ptr != '+'; ptr++); if (*ptr == Inbrack)
6765                // skipparens(Inbrack, Outbrack, &ptr);`.
6766                let raw = tokstr().unwrap_or_default();
6767                let bytes: Vec<char> = raw.chars().collect();
6768                let mut idx = 0usize;
6769                while idx < bytes.len() {
6770                    let ch = bytes[idx];
6771                    if ch == '\u{91}' /* Inbrack */
6772                        || ch == '=' || ch == '+' || ch == '\u{8d}'
6773                    /* Equals */
6774                    {
6775                        break;
6776                    }
6777                    idx += 1;
6778                }
6779                if idx < bytes.len() && bytes[idx] == '\u{91}'
6780                /* Inbrack */
6781                {
6782                    // c:2014 — `skipparens(Inbrack, Outbrack, &ptr);`.
6783                    let byte_off: usize = bytes[..idx].iter().map(|c| c.len_utf8()).sum();
6784                    let mut cursor: &str = &raw[byte_off..];
6785                    let _ = crate::ported::utils::skipparens('\u{91}', '\u{92}', &mut cursor);
6786                    let consumed = raw.len() - byte_off - cursor.len();
6787                    let advance_chars = raw[byte_off..byte_off + consumed].chars().count();
6788                    idx += advance_chars;
6789                    while idx < bytes.len() {
6790                        let ch = bytes[idx];
6791                        if ch == '=' || ch == '+' || ch == '\u{8d}' {
6792                            break;
6793                        }
6794                        idx += 1;
6795                    }
6796                }
6797                let name: String = bytes[..idx].iter().collect();
6798                let str_off = if idx < bytes.len() && (bytes[idx] == '=' || bytes[idx] == '\u{8d}')
6799                {
6800                    idx + 1
6801                } else {
6802                    idx
6803                };
6804                let value: String = bytes[str_off..].iter().collect();
6805                ecadd(WCB_ASSIGN(WC_ASSIGN_SCALAR, WC_ASSIGN_NEW, 0));
6806                ecstr(&name);
6807                ecstr(&value);
6808                isnull = false;
6809                zshlex();
6810            }
6811            ENVARRAY => {
6812                // c:2027-2050 — mid-cmd ENVARRAY (typeset N=(…) form).
6813                // C tracks postassigns + ppost the same as ENVSTRING,
6814                // but the inner emit is WCB_ASSIGN(ARRAY, NEW, n)
6815                // with `n` patched in after par_nl_wordlist consumes
6816                // the elements. C also toggles intypeset=0 around the
6817                // wordlist so the lexer doesn't try to re-emit
6818                // assignments inside the array.
6819                *cmplx = 1;
6820                if postassigns == 0 {
6821                    ppost = ecadd(0);
6822                }
6823                postassigns += 1;
6824                let parr = ecadd(0);
6825                let raw = tokstr().unwrap_or_default();
6826                let is_inc = raw.ends_with('+');
6827                let name = if is_inc {
6828                    &raw[..raw.len() - 1]
6829                } else {
6830                    raw.as_str()
6831                };
6832                let flag = if is_inc { WC_ASSIGN_INC } else { WC_ASSIGN_NEW };
6833                ecstr(name);
6834                cmdpush(CS_ARRAY as u8);
6835                set_intypeset(false);
6836                zshlex();
6837                // c:2044 — `n = par_nl_wordlist();` (parse.c:2379-2391).
6838                // SEPER + NEWLIN both allowed between elements.
6839                let mut nelem = 0u32;
6840                loop {
6841                    let t = tok();
6842                    if t != STRING_LEX && t != SEPER && t != NEWLIN {
6843                        break;
6844                    }
6845                    if t == STRING_LEX {
6846                        ecstr(&tokstr().unwrap_or_default());
6847                        nelem += 1;
6848                    }
6849                    zshlex();
6850                }
6851                ECBUF.with_borrow_mut(|b| {
6852                    if parr < b.len() {
6853                        b[parr] = WCB_ASSIGN(WC_ASSIGN_ARRAY, flag, nelem);
6854                    }
6855                });
6856                cmdpop();
6857                set_intypeset(true);
6858                if tok() != OUTPAR_TOK {
6859                    zerr("expected `)' after array assignment");
6860                    return 0;
6861                }
6862                isnull = false;
6863                zshlex();
6864            }
6865            t if IS_REDIROP(t) => {
6866                // c:1999-2010 — `nrediradd = par_redir(&r, NULL);
6867                // p += nrediradd; if (ppost) ppost += nrediradd;
6868                // sr += nrediradd;`
6869                *cmplx = 1;
6870                let added = par_redir_wordcode(&mut r, None);
6871                if added == 0 {
6872                    break;
6873                }
6874                p += added as usize;
6875                if ppost != 0 {
6876                    ppost += added as usize;
6877                }
6878                sr += added;
6879            }
6880            INOUTPAR => {
6881                // c:2051 — `} else if (tok == INOUTPAR) {`
6882                // c:2052 — `zlong oldlineno = lineno;`
6883                let oldlineno = lineno();
6884                // c:2053 — `int onp, so, oecssub = ecssub;`
6885                let oecssub = ECSSUB.get();
6886                // c:2055-2057 — `if (!isset(MULTIFUNCDEF) && argc > 1) YYERROR;`
6887                if !isset(MULTIFUNCDEF) && argc > 1 {
6888                    zerr("par_simple: too many function names for funcdef");
6889                    return 0;
6890                }
6891                // c:2058-2060 — `if (assignments || postassigns) YYERROR;`
6892                if assignments || postassigns > 0 {
6893                    zerr("par_simple: assignments before funcdef");
6894                    return 0;
6895                }
6896                // c:2061-2068 — hasalias check + zwarn — skipped (no
6897                // alias tracking on the wordcode path).
6898
6899                // c:2070 — `*cmplx = c;`
6900                *cmplx = c_saved;
6901                // c:2071 — `lineno = 0;`
6902                set_lineno(0);
6903                // c:2072 — `incmdpos = 1;`
6904                set_incmdpos(true);
6905                // c:2073 — `cmdpush(CS_FUNCDEF);`
6906                cmdpush(CS_FUNCDEF as u8);
6907                // c:2074 — `zshlex();`
6908                zshlex();
6909                // c:2075-2076 — `while (tok == SEPER) zshlex();`
6910                while tok() == SEPER {
6911                    zshlex();
6912                }
6913                // c:2079 — `ecispace(p + 1, 1); ecbuf[p+1] = argc;
6914                // ecadd(0)*4`. Insert the argc word at p+1, then
6915                // append 4 placeholder words.
6916                ecispace(p + 1, 1);
6917                ECBUF.with_borrow_mut(|b| {
6918                    if p + 1 < b.len() {
6919                        b[p + 1] = argc;
6920                    }
6921                });
6922                // c:2080-2083 — four metadata placeholder slots.
6923                ecadd(0);
6924                ecadd(0);
6925                ecadd(0);
6926                ecadd(0);
6927
6928                // c:2085 — `ecnfunc++;`
6929                ECNFUNC.set(ECNFUNC.get() + 1);
6930                // c:2086 — `ecssub = so = ecsoffs;`
6931                let so = ECSOFFS.get();
6932                ECSSUB.set(so);
6933                // c:2087 — `onp = ecnpats;`
6934                let onp = ECNPATS.with(|cc| cc.get());
6935                // c:2088 — `ecnpats = 0;`
6936                ECNPATS.with(|cc| cc.set(0));
6937
6938                // c:2091 — `int c = 0;` — INNER cmplx for the body
6939                // parse. Local to each branch; C's enclosing *cmplx
6940                // is NOT modified by the body.
6941                let mut body_c: i32 = 0;
6942                // c:2090 — `if (tok == INBRACE) {`
6943                if tok() == INBRACE_TOK {
6944                    // c:2093 — `zshlex();`
6945                    zshlex();
6946                    // c:2094 — `par_list(&c);`
6947                    par_list_wordcode(&mut body_c);
6948                    // c:2095-2101 — `if (tok != OUTBRACE) { cmdpop();
6949                    //   lineno += oldlineno; ecnpats = onp;
6950                    //   ecssub = oecssub; YYERROR; }`
6951                    if tok() != OUTBRACE_TOK {
6952                        cmdpop();
6953                        set_lineno(lineno() + oldlineno);
6954                        ECNPATS.with(|cc| cc.set(onp));
6955                        ECSSUB.set(oecssub);
6956                        zerr("par_simple: funcdef expected `}`");
6957                        return 0;
6958                    }
6959                    // c:2102-2105 — `if (argc == 0) incmdpos = 0;`
6960                    if argc == 0 {
6961                        set_incmdpos(false);
6962                    }
6963                    // c:2106 — `zshlex();`
6964                    zshlex();
6965                } else {
6966                    // c:2107-2132 — short-body funcdef form: `f() cmd`
6967                    // or `() cmd`. Wraps single par_cmd result in a
6968                    // synthetic WC_LIST / WC_SUBLIST /
6969                    // WC_PIPE(WC_PIPE_END, 0) header trio.
6970                    let ll = ecadd(0);
6971                    let sl = ecadd(0);
6972                    ecadd(WCB_PIPE(WC_PIPE_END, 0));
6973                    let ok = par_cmd_wordcode(&mut body_c, if argc == 0 { 1 } else { 0 });
6974                    if !ok {
6975                        cmdpop();
6976                        zerr("par_simple: funcdef short-body: missing command");
6977                        return 0;
6978                    }
6979                    if argc == 0 {
6980                        // c:2118-2127 — anonymous funcdef may take args
6981                        // after the body; first one already read.
6982                        set_incmdpos(false);
6983                    }
6984                    // c:2130-2131 — inner sublist/list use inner cmplx.
6985                    let used = ECUSED.get() as usize;
6986                    set_sublist_code(
6987                        sl,
6988                        WC_SUBLIST_END as i32,
6989                        0,
6990                        (used.saturating_sub(1 + sl)) as i32,
6991                        body_c != 0,
6992                    );
6993                    set_list_code(ll, Z_SYNC | Z_END, body_c != 0);
6994                }
6995                let _ = body_c;
6996                // c:2133 — `cmdpop();`
6997                cmdpop();
6998
6999                // c:2135 — `ecadd(WCB_END());`
7000                ecadd(WCB_END());
7001                // c:2136-2139 — fill 4 metadata slots at p+argc+2..5
7002                let p_argc = (p + (argc as usize) + 2) as usize;
7003                let cur_so = ECSOFFS.get();
7004                let np_now = ECNPATS.with(|cc| cc.get());
7005                ECBUF.with_borrow_mut(|b| {
7006                    b[p_argc] = (so - oecssub) as wordcode;
7007                    b[p_argc + 1] = (cur_so - so) as wordcode;
7008                    b[p_argc + 2] = np_now as wordcode;
7009                    b[p_argc + 3] = 0;
7010                });
7011
7012                // c:2141-2143 — `ecnpats = onp; ecssub = oecssub; ecnfunc++;`
7013                ECNPATS.with(|cc| cc.set(onp));
7014                ECSSUB.set(oecssub);
7015                ECNFUNC.set(ECNFUNC.get() + 1);
7016
7017                // c:2145 — `ecbuf[p] = WCB_FUNCDEF(ecused - 1 - p);`
7018                let used = ECUSED.get() as usize;
7019                let header_off = used.saturating_sub(1 + p) as wordcode;
7020                ECBUF.with_borrow_mut(|b| {
7021                    b[p] = WCB_FUNCDEF(header_off);
7022                });
7023
7024                // c:2147-2172 — `if (argc == 0) { /* anonymous fn args */ }`
7025                if argc == 0 {
7026                    // c:2150 — `int parg = ecadd(0);`
7027                    let mut parg = ecadd(0);
7028                    // c:2151 — `ecadd(0);`
7029                    ecadd(0);
7030                    // c:2152 — `while (tok == STRING || IS_REDIROP(tok)) {`
7031                    while tok() == STRING_LEX || IS_REDIROP(tok()) {
7032                        if tok() == STRING_LEX {
7033                            // c:2155-2157
7034                            ecstr(&tokstr().unwrap_or_default());
7035                            argc += 1;
7036                            zshlex();
7037                        } else {
7038                            // c:2159-2165 — *cmplx=c=1; nrediradd=par_redir;
7039                            // p += nrediradd; ppost += nrediradd if ppost;
7040                            // sr += nrediradd; parg += nrediradd;
7041                            *cmplx = 1;
7042                            let added = par_redir_wordcode(&mut r, None);
7043                            if added == 0 {
7044                                break;
7045                            }
7046                            p += added as usize;
7047                            if ppost != 0 {
7048                                ppost += added as usize;
7049                            }
7050                            sr += added;
7051                            parg += added as usize;
7052                        }
7053                    }
7054                    // c:2168-2169 — `if (argc > 0) *cmplx = 1;`
7055                    if argc > 0 {
7056                        *cmplx = 1;
7057                    }
7058                    // c:2170 — `ecbuf[parg] = ecused - parg;`
7059                    // c:2171 — `ecbuf[parg+1] = argc;`
7060                    let used2 = ECUSED.get() as usize;
7061                    ECBUF.with_borrow_mut(|b| {
7062                        b[parg] = (used2 - parg) as wordcode;
7063                        b[parg + 1] = argc;
7064                    });
7065                }
7066                // c:2173 — `lineno += oldlineno;`
7067                set_lineno(lineno() + oldlineno);
7068
7069                // c:2175-2177 — `isfunc = 1; isnull = 0; break;`
7070                isfunc = true;
7071                isnull = false;
7072                break;
7073            }
7074            _ => break,
7075        }
7076    }
7077
7078    // c:2173-2176 — `if (isnull && !(sr + nr)) { ecused = oecused;
7079    // return 0; }` — undo everything including pre-cmd assignments
7080    // if no actual command word emerged.
7081    if isnull && sr + nr == 0 && !assignments {
7082        ECUSED.set(p as i32);
7083        return 0;
7084    }
7085    // c:2186-2187 — `incmdpos = 1; intypeset = 0;` — reset before
7086    // the placeholder patch so the next-token lex doesn't carry
7087    // typeset/incond state.
7088    set_incmdpos(true);
7089    set_intypeset(false);
7090    // c:2189-2199 — `if (!isfunc) { if (is_typeset) ecbuf[p] =
7091    // WCB_TYPESET(argc); else ecbuf[p] = WCB_SIMPLE(argc); }`.
7092    // When isfunc=true the INOUTPAR branch already wrote WCB_FUNCDEF
7093    // at p; do NOT clobber it.
7094    if !isfunc {
7095        let header = if is_typeset {
7096            if postassigns > 0 {
7097                ECBUF.with_borrow_mut(|b| {
7098                    if ppost < b.len() {
7099                        b[ppost] = postassigns;
7100                    }
7101                });
7102            } else {
7103                ecadd(0);
7104            }
7105            WCB_TYPESET(argc)
7106        } else {
7107            WCB_SIMPLE(argc)
7108        };
7109        ECBUF.with_borrow_mut(|b| {
7110            if p < b.len() {
7111                b[p] = header;
7112            }
7113        });
7114    }
7115    1 + sr
7116}
7117
7118/// Port of `par_redir(int *rp, char *idstring)` from
7119/// `Src/parse.c:2229-2345` — the wordcode-emitting variant that
7120/// pushes WCB_REDIR + fd + ecstrcode(name) into ECBUF. Distinct
7121/// from the AST `par_redir` (parse.rs:3771) which builds a
7122/// ZshRedir struct for the AST executor pipeline.
7123///
7124/// Returns the number of wordcodes added (3 for the basic shape,
7125/// 4 with idstring, 5 for HEREDOC[DASH] which carries the
7126/// terminator strings inline). Returns 0 on parse error.
7127///
7128/// `idstring` mirrors C's `char *idstring` parameter — `None` =
7129/// NULL (no `{var}>file` brace-FD shape), `Some(id)` = the captured
7130/// `{var}` name. C callers without a var pass NULL inline; Rust
7131/// callers do the same with `None`.
7132fn par_redir_wordcode(rp: &mut usize, idstring: Option<&str>) -> i32 {
7133    // c:2231 — `int r = *rp, type, fd1, oldcmdpos, oldnc, ncodes;`
7134    let r: usize = *rp;
7135    let mut r#type: i32;
7136    let fd1: i32;
7137    let oldcmdpos: bool;
7138    let oldnc: i32;
7139    let mut ncodes: usize;
7140    // c:2232 — `char *name;`
7141    let name: String;
7142
7143    // c:2234 — `oldcmdpos = incmdpos;`
7144    oldcmdpos = incmdpos();
7145    // c:2235 — `incmdpos = 0;`
7146    set_incmdpos(false);
7147    // c:2236 — `oldnc = nocorrect;`
7148    oldnc = nocorrect();
7149    // c:2237-2238 — `if (tok != INANG && tok != INOUTANG) nocorrect = 1;`
7150    if tok() != INANG_TOK && tok() != INOUTANG {
7151        set_nocorrect(1);
7152    }
7153    // c:2239 — `type = redirtab[tok - OUTANG];`
7154    // Map current redirop token to redirtab index — matches order of
7155    // C `enum { OUTANG, OUTANGBANG, DOUTANG, DOUTANGBANG, INANG,
7156    // INOUTANG, DINANG, DINANGDASH, INANGAMP, OUTANGAMP, AMPOUTANG,
7157    // OUTANGAMPBANG, DOUTANGAMP, DOUTANGAMPBANG, TRINANG }`.
7158    r#type = match tok() {
7159        OUTANG_TOK => REDIR_WRITE,
7160        OUTANGBANG => REDIR_WRITENOW,
7161        DOUTANG => REDIR_APP,
7162        DOUTANGBANG => REDIR_APPNOW,
7163        INANG_TOK => REDIR_READ,
7164        INOUTANG => REDIR_READWRITE,
7165        DINANG => REDIR_HEREDOC,
7166        DINANGDASH => REDIR_HEREDOCDASH,
7167        INANGAMP => REDIR_MERGEIN,
7168        OUTANGAMP => REDIR_MERGEOUT,
7169        AMPOUTANG => REDIR_ERRWRITE,
7170        OUTANGAMPBANG => REDIR_ERRWRITENOW,
7171        DOUTANGAMP => REDIR_ERRAPP,
7172        DOUTANGAMPBANG => REDIR_ERRAPPNOW,
7173        TRINANG => REDIR_HERESTR,
7174        _ => {
7175            set_incmdpos(oldcmdpos);
7176            set_nocorrect(oldnc);
7177            return 0;
7178        }
7179    };
7180    // c:2240 — `fd1 = tokfd;`
7181    fd1 = tokfd();
7182    // c:2241 — `zshlex();`
7183    zshlex();
7184    // c:2242-2243 — `if (tok != STRING && tok != ENVSTRING) YYERROR(ecused);`
7185    if tok() != STRING_LEX && tok() != ENVSTRING {
7186        set_incmdpos(oldcmdpos);
7187        set_nocorrect(oldnc);
7188        zerr("expected word after redirection");
7189        return 0;
7190    }
7191    // c:2244 — `incmdpos = oldcmdpos;`
7192    set_incmdpos(oldcmdpos);
7193    // c:2245 — `nocorrect = oldnc;`
7194    set_nocorrect(oldnc);
7195
7196    // c:2248-2249 — `if (fd1 == -1) fd1 = IS_READFD(type) ? 0 : 1;`
7197    let fd1 = if fd1 == -1 {
7198        if is_readfd(r#type) {
7199            0
7200        } else {
7201            1
7202        }
7203    } else {
7204        fd1
7205    };
7206
7207    // c:2251 — `name = tokstr;`
7208    name = tokstr().unwrap_or_default();
7209
7210    // c:2253-2321 — switch on type:
7211    match r#type {
7212        // c:2254-2300 — REDIR_HEREDOC / REDIR_HEREDOCDASH
7213        x if x == REDIR_HEREDOC || x == REDIR_HEREDOCDASH => {
7214            // c:2257 — `struct heredocs **hd;`
7215            // c:2258 — `int htype = type;`
7216            let htype = r#type;
7217            // c:2260-2261 — `if (strchr(tokstr, '\n')) YYERROR(ecused);`
7218            if name.contains('\n') {
7219                zerr("here-doc terminator contains newline");
7220                return 0;
7221            }
7222            // c:2263-2273 — `ncodes = 5; if (idstring) { type |= MASK; ncodes = 6; }`
7223            if idstring.is_some() {
7224                r#type |= REDIR_VARID_MASK;
7225                ncodes = 6;
7226            } else {
7227                ncodes = 5;
7228            }
7229            // c:2277 — `ecispace(r, ncodes);`
7230            ecispace(r, ncodes);
7231            // c:2278 — `*rp = r + ncodes;`
7232            *rp = r + ncodes;
7233            // c:2279 — `ecbuf[r] = WCB_REDIR(type | REDIR_FROM_HEREDOC_MASK);`
7234            ECBUF.with_borrow_mut(|b| {
7235                b[r] = WCB_REDIR((r#type | REDIR_FROM_HEREDOC_MASK) as wordcode);
7236                // c:2280 — `ecbuf[r + 1] = fd1;`
7237                b[r + 1] = fd1 as wordcode;
7238            });
7239            // c:2282-2286 — r+2..4 are filled later by setheredoc.
7240            // c:2287-2288 — `if (idstring) ecbuf[r + 5] = ecstrcode(idstring);`
7241            if let Some(id) = idstring {
7242                let coded = ecstrcode(id);
7243                ECBUF.with_borrow_mut(|b| {
7244                    b[r + 5] = coded;
7245                });
7246            }
7247            // c:2290-2296 — `for (hd = &hdocs; *hd; hd = &(*hd)->next);
7248            //                 *hd = zalloc(sizeof(struct heredocs));
7249            //                 (*hd)->next = NULL;
7250            //                 (*hd)->type = htype;
7251            //                 (*hd)->pc = r;
7252            //                 (*hd)->str = tokstr;`
7253            HDOCS.with_borrow_mut(|head| {
7254                let mut cur = head;
7255                while cur.is_some() {
7256                    cur = &mut cur.as_mut().unwrap().next; // c:2290
7257                }
7258                *cur = Some(Box::new(crate::ported::zsh_h::heredocs {
7259                    // c:2292-2296
7260                    next: None,
7261                    typ: htype,
7262                    pc: r as i32,
7263                    str: Some(name.clone()),
7264                }));
7265            });
7266            // c:2298 — `zshlex();`
7267            zshlex();
7268            // c:2299 — `return ncodes;`
7269            return ncodes as i32;
7270        }
7271        // c:2301-2308 — REDIR_WRITE / REDIR_WRITENOW
7272        x if x == REDIR_WRITE || x == REDIR_WRITENOW => {
7273            // c:2303-2305 — `if (tokstr[0] == OutangProc && tokstr[1] == Inpar)
7274            //                  type = REDIR_OUTPIPE;`
7275            let nb: Vec<char> = name.chars().collect();
7276            if nb.len() >= 2 && nb[0] == '\u{96}' && nb[1] == '\u{88}' {
7277                r#type = REDIR_OUTPIPE;
7278            } else if nb.len() >= 2 && nb[0] == '\u{94}' && nb[1] == '\u{88}' {
7279                // c:2306-2307 — `else if (tokstr[0] == Inang && tokstr[1] == Inpar) YYERROR;`
7280                zerr("par_redir: < before >");
7281                return 0;
7282            }
7283        }
7284        // c:2309-2315 — REDIR_READ
7285        x if x == REDIR_READ => {
7286            let nb: Vec<char> = name.chars().collect();
7287            if nb.len() >= 2 && nb[0] == '\u{94}' && nb[1] == '\u{88}' {
7288                r#type = REDIR_INPIPE;
7289            } else if nb.len() >= 2 && nb[0] == '\u{96}' && nb[1] == '\u{88}' {
7290                zerr("par_redir: > before <");
7291                return 0;
7292            }
7293        }
7294        // c:2316-2320 — REDIR_READWRITE
7295        x if x == REDIR_READWRITE => {
7296            let nb: Vec<char> = name.chars().collect();
7297            if nb.len() >= 2 && (nb[0] == '\u{94}' || nb[0] == '\u{96}') && nb[1] == '\u{88}' {
7298                r#type = if nb[0] == '\u{94}' {
7299                    REDIR_INPIPE
7300                } else {
7301                    REDIR_OUTPIPE
7302                };
7303            }
7304        }
7305        _ => {}
7306    }
7307    // c:2322 — `zshlex();`
7308    zshlex();
7309
7310    // c:2326-2333 — `if (idstring) { type |= MASK; ncodes = 4; } else ncodes = 3;`
7311    if idstring.is_some() {
7312        r#type |= REDIR_VARID_MASK;
7313        ncodes = 4;
7314    } else {
7315        ncodes = 3;
7316    }
7317
7318    // c:2334 — `ecispace(r, ncodes);`
7319    ecispace(r, ncodes);
7320    // c:2335 — `*rp = r + ncodes;`
7321    *rp = r + ncodes;
7322    // c:2336 — `ecbuf[r] = WCB_REDIR(type);`
7323    let coded_name = ecstrcode(&name);
7324    ECBUF.with_borrow_mut(|b| {
7325        b[r] = WCB_REDIR(r#type as wordcode);
7326        // c:2337 — `ecbuf[r + 1] = fd1;`
7327        b[r + 1] = fd1 as wordcode;
7328        // c:2338 — `ecbuf[r + 2] = ecstrcode(name);`
7329        b[r + 2] = coded_name;
7330    });
7331    // c:2339-2340 — `if (idstring) ecbuf[r + 3] = ecstrcode(idstring);`
7332    if let Some(id) = idstring {
7333        let coded_id = ecstrcode(id);
7334        ECBUF.with_borrow_mut(|b| {
7335            b[r + 3] = coded_id;
7336        });
7337    }
7338    // c:2342 — `return ncodes;`
7339    ncodes as i32
7340}
7341
7342/// Port of `IS_READFD(type)` macro from `Src/zsh.h` — determines
7343/// default fd (0 for read-ish, 1 for write-ish) when none specified.
7344fn is_readfd(t: i32) -> bool {
7345    matches!(
7346        t,
7347        x if x == REDIR_READ
7348            || x == REDIR_READWRITE
7349            || x == REDIR_MERGEIN
7350            || x == REDIR_HEREDOC
7351            || x == REDIR_HEREDOCDASH
7352            || x == REDIR_HERESTR
7353    )
7354}
7355
7356/// Parse a program (list of lists)
7357/// Parse a complete program (top-level entry). Calls
7358/// parse_program_until with no end-token sentinel. Direct port of
7359/// zsh/Src/parse.c:614-720 `parse_event` / `par_list` /
7360/// `par_event` flow. C distinguishes COND_EVENT (single command
7361/// for here-string) from full event parse; zshrs's parse_program
7362/// is the full-event entry.
7363fn parse_program() -> ZshProgram {
7364    parse_program_until(None)
7365}
7366
7367/// Parse a program until we hit an end token
7368/// Parse a program until one of `end_tokens` is seen (or EOF).
7369/// Drives par_list in a loop. C equivalent: the body of par_event
7370/// (parse.c:635-695) iterating par_list against the lexer.
7371fn parse_program_until(end_tokens: Option<&[lextok]>) -> ZshProgram {
7372    let mut lists = Vec::new();
7373
7374    loop {
7375        // Skip separators
7376        while tok() == SEPER || tok() == NEWLIN {
7377            zshlex();
7378        }
7379
7380        if tok() == ENDINPUT || tok() == LEXERR {
7381            break;
7382        }
7383
7384        // Check for end tokens
7385        if let Some(end_toks) = end_tokens {
7386            if end_toks.contains(&tok()) {
7387                break;
7388            }
7389        }
7390
7391        // Also stop at these tokens when not explicitly looking for them
7392        // Note: Else/Elif/Then are NOT here - they're handled by par_if
7393        // to allow nested if statements inside case arms, loops, etc.
7394        //
7395        // c:Src/parse.c:par_event — when an orphan terminator (DONE
7396        // outside a loop, FI outside an if, ESAC outside a case)
7397        // appears at the top level (end_tokens=None), C errors via
7398        // YYERROR. zshrs's `break` silently accepted `done`/`fi`/
7399        // `esac` as no-op input. Error at the outermost call so
7400        // unscoped terminators don't sneak through; nested calls
7401        // still break cleanly via the end_tokens contains-check
7402        // above.
7403        match tok() {
7404            DONE | FI | ESAC | DOLOOP if end_tokens.is_none() => {
7405                // c:Src/parse.c:par_event — emit the specific token
7406                // name (`done`, `fi`, `esac`, `do`) so error-parsing
7407                // tools can identify the unmatched terminator. C zsh
7408                // writes `parse error near \`<tok>'`; the Rust port
7409                // was emitting a generic "orphan terminator" string.
7410                // Bug #142, #413.
7411                let name = match tok() {
7412                    DONE => "done",
7413                    FI => "fi",
7414                    ESAC => "esac",
7415                    DOLOOP => "do",
7416                    _ => "orphan terminator",
7417                };
7418                zerr(&format!("parse error near `{}'", name));
7419                break;
7420            }
7421            DSEMI | SEMIAMP | SEMIBAR if end_tokens.is_none() => {
7422                // c:Src/parse.c:par_event — case-arm terminators
7423                // (`;;`, `;&`, `;|`) outside a case construct are a
7424                // parse error. zshrs's `break` silently accepted them
7425                // at top level, truncating the rest of the script.
7426                // Bug #141 in docs/BUGS.md.
7427                let name = match tok() {
7428                    DSEMI => ";;",
7429                    SEMIAMP => ";&",
7430                    SEMIBAR => ";|",
7431                    _ => "case terminator",
7432                };
7433                zerr(&format!("parse error near `{}'", name));
7434                break;
7435            }
7436            OUTBRACE_TOK if end_tokens.is_none() => {
7437                // c:Src/parse.c:par_event — orphan `}` (no matching
7438                // `{` opener) at top level is a parse error. zshrs's
7439                // generic break swallowed it silently, leaving the
7440                // `echo a` in `echo a }` running and ignoring the
7441                // stray brace. Bug #168 in docs/BUGS.md.
7442                zerr("parse error near `}'");
7443                break;
7444            }
7445            OUTBRACE_TOK | DSEMI | SEMIAMP | SEMIBAR | DONE | FI | ESAC | ZEND => break,
7446            _ => {}
7447        }
7448
7449        match par_list() {
7450            Some(list) => {
7451                let detected = simple_name_with_inoutpar(&list);
7452                lists.push(list);
7453                // Synthesize a FuncDef for the `name() { body }` shape
7454                // at parse time so body_source is captured while the
7455                // lexer still has the input. The lexer port emits
7456                // `name(` as a single Word ending in `<Inpar><Outpar>`,
7457                // so the Simple list is followed by an Inbrace once
7458                // separators are skipped. For `name() cmd args` the
7459                // body has already been swallowed into the same
7460                // Simple's words tail — synthesize directly from there.
7461                if let Some((names, body_argv)) = detected {
7462                    if !body_argv.is_empty() {
7463                        // One-line body already in the Simple. Build
7464                        // a Simple from body_argv as the function body.
7465                        lists.pop();
7466                        let body_simple = ZshCommand::Simple(ZshSimple {
7467                            assigns: Vec::new(),
7468                            words: body_argv,
7469                            redirs: Vec::new(),
7470                        });
7471                        let body_list = ZshList {
7472                            sublist: ZshSublist {
7473                                pipe: ZshPipe {
7474                                    cmd: body_simple,
7475                                    next: None,
7476                                    lineno: lineno(),
7477                                    merge_stderr: false,
7478                                },
7479                                next: None,
7480                                flags: SublistFlags::default(),
7481                            },
7482                            flags: ListFlags::default(),
7483                        };
7484                        let funcdef = ZshCommand::FuncDef(ZshFuncDef {
7485                            names,
7486                            body: Box::new(ZshProgram {
7487                                lists: vec![body_list],
7488                            }),
7489                            tracing: false,
7490                            auto_call_args: None,
7491                            body_source: None,
7492                        });
7493                        let synthetic = ZshList {
7494                            sublist: ZshSublist {
7495                                pipe: ZshPipe {
7496                                    cmd: funcdef,
7497                                    next: None,
7498                                    lineno: lineno(),
7499                                    merge_stderr: false,
7500                                },
7501                                next: None,
7502                                flags: SublistFlags::default(),
7503                            },
7504                            flags: ListFlags::default(),
7505                        };
7506                        lists.push(synthetic);
7507                        continue;
7508                    }
7509                    // Else: words.len() == 1 (only the trailing `name()`
7510                    // word), brace body follows. `names` may carry
7511                    // multiple identifiers from the `fna fnb fnc()`
7512                    // shorthand — all share the same brace body per
7513                    // src/zsh/Src/parse.c:1666 par_funcdef wordlist.
7514                    // Skip separators on the real lexer; safe because
7515                    // parse_program's next iteration would also skip them.
7516                    while tok() == SEPER || tok() == NEWLIN {
7517                        zshlex();
7518                    }
7519                    if tok() == INBRACE_TOK {
7520                        // Capture body_start BEFORE the lexer
7521                        // advances past the first body token. The
7522                        // outer zshlex() consumed `{`; lexer.pos
7523                        // is now right after `{`. The next
7524                        // `zshlex()` would advance past `echo`,
7525                        // making body_start land mid-body and
7526                        // lose the first word — `typeset -f f`
7527                        // printed `a; echo b` instead of
7528                        // `echo a; echo b` for `f() { echo a;
7529                        // echo b }`.
7530                        let body_start = pos();
7531                        zshlex();
7532                        // c:Src/parse.c — synth funcdef body terminates
7533                        // at OUTBRACE_TOK. Explicit end-token avoids
7534                        // the top-level stray-`}` arm. Bug #167/#168.
7535                        let body = parse_program_until(Some(&[OUTBRACE_TOK]));
7536                        let body_end = if tok() == OUTBRACE_TOK {
7537                            pos().saturating_sub(1)
7538                        } else {
7539                            pos()
7540                        };
7541                        let body_source = input_slice(body_start, body_end)
7542                            .map(|s| s.trim().to_string())
7543                            .filter(|s| !s.is_empty());
7544                        if tok() == OUTBRACE_TOK {
7545                            zshlex();
7546                        }
7547                        // Replace the Simple list with a FuncDef list.
7548                        lists.pop();
7549                        let funcdef = ZshCommand::FuncDef(ZshFuncDef {
7550                            names,
7551                            body: Box::new(body),
7552                            tracing: false,
7553                            auto_call_args: None,
7554                            body_source,
7555                        });
7556                        let synthetic = ZshList {
7557                            sublist: ZshSublist {
7558                                pipe: ZshPipe {
7559                                    cmd: funcdef,
7560                                    next: None,
7561                                    lineno: lineno(),
7562                                    merge_stderr: false,
7563                                },
7564                                next: None,
7565                                flags: SublistFlags::default(),
7566                            },
7567                            flags: ListFlags::default(),
7568                        };
7569                        lists.push(synthetic);
7570                    } else if !matches!(tok(), ENDINPUT | OUTBRACE_TOK | SEPER | NEWLIN) {
7571                        // No-brace one-line body: `foo() echo hello`.
7572                        // Parse a single command for the body.
7573                        let body_cmd = par_cmd();
7574                        if let Some(cmd) = body_cmd {
7575                            let body_list = ZshList {
7576                                sublist: ZshSublist {
7577                                    pipe: ZshPipe {
7578                                        cmd,
7579                                        next: None,
7580                                        lineno: lineno(),
7581                                        merge_stderr: false,
7582                                    },
7583                                    next: None,
7584                                    flags: SublistFlags::default(),
7585                                },
7586                                flags: ListFlags::default(),
7587                            };
7588                            lists.pop();
7589                            let funcdef = ZshCommand::FuncDef(ZshFuncDef {
7590                                names: names.clone(),
7591                                body: Box::new(ZshProgram {
7592                                    lists: vec![body_list],
7593                                }),
7594                                tracing: false,
7595                                auto_call_args: None,
7596                                body_source: None,
7597                            });
7598                            let synthetic = ZshList {
7599                                sublist: ZshSublist {
7600                                    pipe: ZshPipe {
7601                                        cmd: funcdef,
7602                                        next: None,
7603                                        lineno: lineno(),
7604                                        merge_stderr: false,
7605                                    },
7606                                    next: None,
7607                                    flags: SublistFlags::default(),
7608                                },
7609                                flags: ListFlags::default(),
7610                            };
7611                            lists.push(synthetic);
7612                        }
7613                    }
7614                }
7615            }
7616            None => break,
7617        }
7618    }
7619
7620    ZshProgram { lists }
7621}
7622
7623/// Parse an assignment
7624/// Parse an assignment word `NAME=value` or `NAME=(arr items)`.
7625/// Sub-routine of par_simple. The C source handles assignments
7626/// inline in par_simple via the ENVSTRING/ENVARRAY token paths
7627/// (parse.c:1842-2000ish); zshrs splits it out to a dedicated
7628/// helper for clarity.
7629fn parse_assign() -> Option<ZshAssign> {
7630    // Helper: locate the Equals-marker that delimits NAME from
7631    // VALUE in an assignment-shaped tokstr. The lexer META-encodes
7632    // EVERY `=` (including those inside `${var%%=foo}` strip
7633    // patterns or `[idx]=...` subscripts), so a naive
7634    // `tokstr.find(Equals)` would split at the first inner `=`
7635    // and break the whole assignment. Walk the string skipping
7636    // brace and bracket depth so the assignment's `=` (the one
7637    // after the last `]` of the LHS subscript / or after the
7638    // bare name) is the one we land on.
7639    fn find_assign_equals(s: &str) -> Option<usize> {
7640        let target = Equals;
7641        let mut brace = 0i32;
7642        let mut bracket = 0i32;
7643        let mut paren = 0i32;
7644        for (i, c) in s.char_indices() {
7645            match c {
7646                    '{' | '\u{8f}' /* Inbrace */ => brace += 1,
7647                    '}' | '\u{90}' /* Outbrace */ => {
7648                        if brace > 0 {
7649                            brace -= 1;
7650                        }
7651                    }
7652                    '[' | '\u{91}' /* Inbrack */ => bracket += 1,
7653                    ']' | '\u{92}' /* Outbrack */ => {
7654                        if bracket > 0 {
7655                            bracket -= 1;
7656                        }
7657                    }
7658                    '(' | '\u{88}' /* Inpar */ => paren += 1,
7659                    ')' | '\u{8a}' /* Outpar */ => {
7660                        if paren > 0 {
7661                            paren -= 1;
7662                        }
7663                    }
7664                    _ if c == target && brace == 0 && bracket == 0 && paren == 0 => {
7665                        return Some(i);
7666                    }
7667                    _ => {}
7668                }
7669        }
7670        None
7671    }
7672
7673    let _ts_tokstr = tokstr()?;
7674    let tokstr = _ts_tokstr.as_str();
7675
7676    // Parse name=value or name+=value.
7677    let (name, value_str, append) = if tok() == ENVARRAY {
7678        let (name, append) = if let Some(stripped) = tokstr.strip_suffix('+') {
7679            (stripped, true)
7680        } else {
7681            (tokstr, false)
7682        };
7683        (name.to_string(), String::new(), append)
7684    } else if let Some(pos) = find_assign_equals(tokstr) {
7685        let name_part = &tokstr[..pos];
7686        let (name, append) = if let Some(stripped) = name_part.strip_suffix('+') {
7687            (stripped, true)
7688        } else {
7689            (name_part, false)
7690        };
7691        (
7692            name.to_string(),
7693            tokstr[pos + Equals.len_utf8()..].to_string(),
7694            append,
7695        )
7696    } else if let Some(pos) = tokstr.find('=') {
7697        // Fallback to literal '=' for compatibility
7698        let name_part = &tokstr[..pos];
7699        let (name, append) = if let Some(stripped) = name_part.strip_suffix('+') {
7700            (stripped, true)
7701        } else {
7702            (name_part, false)
7703        };
7704        (name.to_string(), tokstr[pos + 1..].to_string(), append)
7705    } else {
7706        return None;
7707    };
7708
7709    let value = if tok() == ENVARRAY {
7710        // Array assignment: name=(...)
7711        // c:Src/parse.c:1895 par_simple ENVARRAY arm:
7712        //   `int oldcmdpos = incmdpos; ... incmdpos = 0; ... zshlex();`
7713        // Reset incmdpos to false BEFORE the array body's first lex so
7714        // a leading `{...}` (brace expansion) doesn't trip the
7715        // empty-buf+incmdpos rule at lex.c:1141 that returns `{` as
7716        // STRING and lets the reswd_lookup promote it to INBRACE_TOK.
7717        let oldcmdpos = crate::ported::lex::incmdpos();
7718        crate::ported::lex::set_incmdpos(false);
7719        let mut elements = Vec::new();
7720        zshlex(); // skip past token
7721
7722        let mut arr_iters = 0;
7723        const MAX_ARRAY_ELEMENTS: usize = 10_000;
7724        while matches!(tok(), STRING_LEX | SEPER | NEWLIN) {
7725            arr_iters += 1;
7726            if arr_iters > MAX_ARRAY_ELEMENTS {
7727                zerr("array assignment exceeded maximum elements");
7728                break;
7729            }
7730            if tok() == STRING_LEX {
7731                let _ts_s = crate::ported::lex::tokstr();
7732                if let Some(s) = _ts_s.as_deref() {
7733                    elements.push(s.to_string());
7734                }
7735            }
7736            zshlex();
7737        }
7738        // c:Src/parse.c — `incmdpos = oldcmdpos;` (restore at end of arm)
7739        crate::ported::lex::set_incmdpos(oldcmdpos);
7740
7741        // The closing Outpar is consumed here. The outer par_simple
7742        // loop will then `zshlex()` past whatever follows (typically
7743        // a separator or the next word) — calling zshlex twice in
7744        // tandem (here AND in par_simple) over-advances and merges
7745        // a following `name() { … }` funcdef into the same Simple.
7746        // We only consume Outpar; let the caller handle the rest.
7747        // Without this guard `g=(o1); f() { :; }` parsed as one
7748        // Simple with assigns=[g] and words=["f()"] (one token).
7749        if tok() == OUTPAR_TOK {
7750            // Note: do NOT zshlex() here. par_simple's `lexer
7751            // .zshlex()` after `parse_assign` returns advances past
7752            // the Outpar onto the next significant token.
7753            //
7754            // Force `incmdpos=true` so the next zshlex() recognizes
7755            // a follow-up `b=(...)` / `b=val` as Envarray/Envstring.
7756            // The lexer flips incmdpos to false on bare Outpar (which
7757            // is correct for subshell-close context), but for an
7758            // array-assignment close more assigns/words may follow.
7759            set_incmdpos(true);
7760        }
7761
7762        ZshAssignValue::Array(elements)
7763    } else {
7764        ZshAssignValue::Scalar(value_str)
7765    };
7766
7767    Some(ZshAssign {
7768        name,
7769        value,
7770        append,
7771    })
7772}
7773
7774/// AST `par_redir` variant accepting an idstring for the
7775/// `{var}>file` brace-FD shape. C signature
7776/// `par_redir(int *rp, char *idstring)` (parse.c:2229). The
7777/// idstring is stored in the resulting ZshRedir.varid for the
7778/// executor to bind the named variable to the chosen fd.
7779fn par_redir_with_id(idstring: Option<&str>) -> Option<ZshRedir> {
7780    let varid: Option<String> = idstring.map(|s| s.to_string());
7781    let rtype = match tok() {
7782        OUTANG_TOK => REDIR_WRITE,
7783        OUTANGBANG => REDIR_WRITENOW,
7784        DOUTANG => REDIR_APP,
7785        DOUTANGBANG => REDIR_APPNOW,
7786        INANG_TOK => REDIR_READ,
7787        INOUTANG => REDIR_READWRITE,
7788        DINANG => REDIR_HEREDOC,
7789        DINANGDASH => REDIR_HEREDOCDASH,
7790        TRINANG => REDIR_HERESTR,
7791        INANGAMP => REDIR_MERGEIN,
7792        OUTANGAMP => REDIR_MERGEOUT,
7793        AMPOUTANG => REDIR_ERRWRITE,
7794        OUTANGAMPBANG => REDIR_ERRWRITENOW,
7795        DOUTANGAMP => REDIR_ERRAPP,
7796        DOUTANGAMPBANG => REDIR_ERRAPPNOW,
7797        _ => return None,
7798    };
7799
7800    let fd = if tokfd() >= 0 {
7801        tokfd()
7802    } else if matches!(
7803        rtype,
7804        REDIR_READ
7805            | REDIR_READWRITE
7806            | REDIR_MERGEIN
7807            | REDIR_HEREDOC
7808            | REDIR_HEREDOCDASH
7809            | REDIR_HERESTR
7810    ) {
7811        0
7812    } else {
7813        1
7814    };
7815
7816    // c:2234-2245 — save/restore incmdpos and nocorrect around the
7817    // zshlex that consumes the redir target word:
7818    //   oldcmdpos = incmdpos; incmdpos = 0;
7819    //   oldnc = nocorrect;
7820    //   if (tok != INANG && tok != INOUTANG) nocorrect = 1;
7821    //   ... zshlex; check tok; ...
7822    //   incmdpos = oldcmdpos; nocorrect = oldnc;
7823    // Without this, a redir target lexes in the parent's incmdpos
7824    // (re-promoting `{` / reswords) AND with parent nocorrect (so
7825    // spelling-correction wrongly runs inside `> $(cmd)` etc.).
7826    let oldcmdpos = incmdpos();
7827    set_incmdpos(false);
7828    let oldnc = nocorrect();
7829    let cur = tok();
7830    if cur != INANG_TOK && cur != INOUTANG {
7831        set_nocorrect(1);
7832    }
7833    zshlex();
7834
7835    let name = match tok() {
7836        STRING_LEX | ENVSTRING => {
7837            let n = tokstr().unwrap_or_default();
7838            // c:2244-2245 — restore incmdpos / nocorrect right after
7839            // the redir target word is confirmed, BEFORE the trailing
7840            // zshlex advances past it. The advance itself is deferred
7841            // below so REDIR_HEREDOC[DASH] can push onto HDOCS first
7842            // (matching the wordcode variant at parse.rs:6894-6908) —
7843            // otherwise the NEWLIN drained by that zshlex sees an
7844            // empty HDOCS list and gethere never collects the body.
7845            set_incmdpos(oldcmdpos);
7846            set_nocorrect(oldnc);
7847            n
7848        }
7849        _ => {
7850            set_incmdpos(oldcmdpos);
7851            set_nocorrect(oldnc);
7852            zerr("expected word after redirection");
7853            return None;
7854        }
7855    };
7856
7857    // Heredoc terminator capture. C parse.c:2254-2317 par_redir builds
7858    // a `struct heredocs` entry here for REDIR_HEREDOC[DASH]. zshrs
7859    // pushes onto HDOCS (canonical C linked list, c:2290-2296) AND
7860    // onto LEX_HEREDOCS (Rust-only AST-glue Vec carrying parsed-out
7861    // terminator/strip_tabs/quoted metadata for downstream AST
7862    // consumers). Quoted terminators (`<<'EOF'` / `<<"EOF"` / `<<\EOF`)
7863    // disable expansion in the body — Snull `\u{9d}` marks single-quote,
7864    // Dnull `\u{9e}` marks double-quote, Bnull `\u{9f}` marks
7865    // backslash-escaped chars.
7866    let heredoc_idx = if matches!(rtype, REDIR_HEREDOC | REDIR_HEREDOCDASH) {
7867        let strip_tabs = rtype == REDIR_HEREDOCDASH;
7868        let quoted = name.contains('\u{9d}')
7869            || name.contains('\u{9e}')
7870            || name.contains('\u{9f}')
7871            || name.starts_with('\'')
7872            || name.starts_with('"');
7873        let term = name
7874            .chars()
7875            .filter(|c| {
7876                *c != '\'' && *c != '"' && *c != '\u{9d}' && *c != '\u{9e}' && *c != '\u{9f}'
7877            })
7878            .collect::<String>();
7879        // c:2290-2296 — `for (hd = &hdocs; *hd; hd = &(*hd)->next);
7880        //                 *hd = zalloc(sizeof(struct heredocs));
7881        //                 (*hd)->next = NULL;
7882        //                 (*hd)->type = htype;
7883        //                 (*hd)->pc = r;
7884        //                 (*hd)->str = tokstr;`
7885        // AST path has no wordcode pc to patch; use -1 sentinel so the
7886        // inline NEWLIN walk in `zshlex()` skips the setheredoc call.
7887        HDOCS.with_borrow_mut(|head| {
7888            let mut cur = head;
7889            while cur.is_some() {
7890                cur = &mut cur.as_mut().unwrap().next; // c:2290
7891            }
7892            *cur = Some(Box::new(crate::ported::zsh_h::heredocs {
7893                // c:2292-2296
7894                next: None,
7895                typ: rtype,
7896                pc: -1,
7897                str: Some(name.clone()),
7898            }));
7899        });
7900        // zshrs-only: push parallel AST-glue entry onto LEX_HEREDOCS.
7901        let idx = LEX_HEREDOCS.with_borrow_mut(|v| {
7902            v.push(HereDoc {
7903                terminator: term,
7904                strip_tabs,
7905                content: String::new(),
7906                quoted,
7907                processed: false,
7908            });
7909            v.len() - 1
7910        });
7911        Some(idx)
7912    } else {
7913        None
7914    };
7915
7916    // c:2298 (heredoc) / c:2322 (other redirs) — final zshlex() advance
7917    // past the redir target word. MUST run after the HDOCS push above
7918    // so the heredoc-drain inside this zshlex sees the new entry. For
7919    // non-heredoc forms the order is irrelevant; consolidating to a
7920    // single tail-call here matches the wordcode variant.
7921    zshlex();
7922
7923    Some(ZshRedir {
7924        rtype,
7925        fd,
7926        name,
7927        heredoc: None,
7928        varid,
7929        heredoc_idx,
7930    })
7931}
7932
7933/// Parse C-style for loop: for (( init; cond; step ))
7934/// Parse the c-style `for ((init; cond; incr)) do BODY done`.
7935/// Inner branch of zsh/Src/parse.c:1100-1140 inside par_for.
7936/// Recognized when the token after FOR is DINPAR (the `((`
7937/// detected by gettok via dbparens setup).
7938fn parse_for_cstyle() -> Option<ZshCommand> {
7939    // We're at (( (Dinpar None) - the opening ((
7940    // Lexer returns:
7941    //   Dinpar None     - opening ((
7942    //   Dinpar "init"   - init expression, semicolon consumed
7943    //   Dinpar "cond"   - cond expression, semicolon consumed
7944    //   Doutpar "step"  - step expression, closing )) consumed
7945    zshlex(); // Get init: Dinpar "i=0"
7946
7947    if tok() != DINPAR {
7948        zerr("expected init expression in for ((");
7949        return None;
7950    }
7951    let init = tokstr().unwrap_or_default();
7952
7953    zshlex(); // Get cond: Dinpar "i<10"
7954
7955    if tok() != DINPAR {
7956        zerr("expected condition in for ((");
7957        return None;
7958    }
7959    let cond = tokstr().unwrap_or_default();
7960
7961    zshlex(); // Get step: Doutpar "i++"
7962
7963    if tok() != DOUTPAR {
7964        zerr("expected )) in for");
7965        return None;
7966    }
7967    let step = tokstr().unwrap_or_default();
7968
7969    // c:1110 — `infor = 0;` before the body opener. The companion
7970    // `incmdpos = 1;` at c:1111 is intentionally skipped here for
7971    // the same reason c:1094's `incmdpos = 0;` is skipped in
7972    // par_for above — zshrs doesn't mirror the full
7973    // incmdpos state-machine inline.
7974    set_infor(0); // c:1110
7975    zshlex(); // Move past ))
7976
7977    skip_separators();
7978    let body = parse_loop_body(false, false)?;
7979
7980    Some(ZshCommand::For(ZshFor {
7981        var: String::new(),
7982        list: ForList::CStyle { init, cond, step },
7983        body: Box::new(body),
7984        is_select: false,
7985    }))
7986}
7987
7988/// Parse select loop (same syntax as for)
7989/// Parse `select NAME in WORDS; do BODY; done`. Same shape as
7990/// `for NAME in WORDS; do ...` but with menu-prompt semantics in
7991/// the executor. C equivalent: the SELECT case in par_for at
7992/// parse.c:1087-1207 (selects share parser flow with foreach).
7993fn parse_select() -> Option<ZshCommand> {
7994    // `select` shares par_for's grammar (var, words, body) but the
7995    // compile path is different (interactive prompt loop).
7996    match par_for()? {
7997        ZshCommand::For(mut f) => {
7998            f.is_select = true;
7999            Some(ZshCommand::For(f))
8000        }
8001        other => Some(other),
8002    }
8003}
8004
8005/// Parse loop body (do...done, {...}, or shortloop)
8006/// Parse the `do BODY done` body of a for/while/until/select/
8007/// repeat loop. Direct equivalent of zsh's parse.c handling
8008/// inside the loop builders — they all consume DOLOOP, parse a
8009/// list until DONE, and return the list. The `foreach_style`
8010/// flag signals foreach (where short-form `for NAME in WORDS;
8011/// CMD` may skip do/done) vs c-style (which always requires
8012/// do/done).
8013///
8014/// `is_repeat` widens the SHORTLOOPS gate so `SHORTREPEAT` also
8015/// unlocks the short form for `repeat N CMD` (per c:1600
8016/// `unset(SHORTLOOPS) && unset(SHORTREPEAT)`).
8017fn parse_loop_body(foreach_style: bool, is_repeat: bool) -> Option<ZshProgram> {
8018    // c:1180-1194 — body dispatch order per par_for:
8019    //   `do ... done` (DOLOOP) — primary form.
8020    //   `{ ... }`   (INBRACE) — alternate.
8021    //   csh/CSHJUNKIELOOPS — terminator is `end`.
8022    //   else if (unset(SHORTLOOPS)) — YYERROR.
8023    //   else — short form (single command).
8024    if tok() == DOLOOP {
8025        zshlex();
8026        // Body parse must declare DONE as an end-token so the
8027        // parse_program_until top-level orphan-DONE guard doesn't
8028        // mis-fire on the legitimate loop terminator.
8029        let body = parse_program_until(Some(&[DONE]));
8030        // c:Src/parse.c:1182-1183 / :1535-1536 / :1597-1598 —
8031        // `if (tok != DONE) YYERRORV(oecused);`. zshrs previously
8032        // silently accepted EOF as a substitute for `done`, so
8033        // `for i in a; do echo hi; don` ran the loop with `don` as
8034        // a command (which then failed "command not found") instead
8035        // of erroring at parse time. Bug #403, #404.
8036        if tok() != DONE {
8037            zerr("parse error: expected `done'");
8038            return None;
8039        }
8040        zshlex();
8041        Some(body)
8042    } else if tok() == INBRACE_TOK {
8043        zshlex();
8044        let body = parse_program_until(Some(&[OUTBRACE_TOK]));
8045        // c:Src/parse.c:1186 / :1539 — `if (tok != OUTBRACE) YYERRORV`.
8046        if tok() != OUTBRACE_TOK {
8047            zerr("parse error: expected `}'");
8048            return None;
8049        }
8050        zshlex();
8051        Some(body)
8052    } else if foreach_style || isset(CSHJUNKIELOOPS) {
8053        // c:1184 / 1546 / 1595 — `else if (csh || isset(CSHJUNKIELOOPS))`.
8054        let body = parse_program_until(Some(&[ZEND]));
8055        // c:1190 / 1548 — `if (tok != ZEND) YYERRORV`.
8056        if tok() != ZEND {
8057            zerr("parse error: expected `end'");
8058            return None;
8059        }
8060        zshlex();
8061        Some(body)
8062    } else {
8063        // c:1190 / 1474 / 1551 / 1600 — short-form gate. C bails
8064        // with YYERROR when `unset(SHORTLOOPS) && (!is_repeat ||
8065        // unset(SHORTREPEAT))`. zshrs's option machinery isn't
8066        // initialised at parse-test time (no `init_main` →
8067        // `install_emulation_defaults`), so a strict port here
8068        // body. parse_init seeds SHORTLOOPS=on mirroring C
8069        // `install_emulation_defaults`, so this fires only when a
8070        // script explicitly disabled the option.
8071        if unset(SHORTLOOPS) && (!is_repeat || unset(SHORTREPEAT)) {
8072            zerr("parse error: short loop form requires SHORTLOOPS option");
8073            return None;
8074        }
8075        // c:Src/parse.c:1604 / :1474 / :1551 — short form calls
8076        // par_save_list1 → par_list1 → par_sublist, which parses
8077        // ONE sublist and leaves the trailing SEPER untouched for
8078        // the outer par_list to consume. zshrs previously routed
8079        // through par_list() which consumes the trailing `;`/`\n`
8080        // separator — that swallowed the separator between the
8081        // loop's body command and the next outer command, so
8082        // `repeat 2 print x; print y` parsed as repeat-then-eof
8083        // and par_cmd's post-compound STRING_LEX guard at parse.rs
8084        // line 1170 fired "parse error near `print'". Bug #593.
8085        par_list1().map(|sublist| ZshProgram {
8086            lists: vec![ZshList {
8087                sublist,
8088                flags: ListFlags::default(),
8089            }],
8090        })
8091    }
8092}
8093
8094/// `() { body } arg1 arg2 …` — anonymous function. Defines a fresh
8095/// function named `_zshrs_anon_N`, invokes it with the args, and the
8096/// body runs with positional params set. Implemented as the desugared
8097/// pair (FuncDef + Simple call) so the compile path doesn't need new
8098/// machinery.
8099/// Parse an anonymous function definition `() { BODY }` followed
8100/// by call args. zsh treats `() { echo hi; } a b c` as defining
8101/// and immediately calling an anon fn with args a/b/c. C
8102/// equivalent: the INOUTPAR shape in par_simple at parse.c:1836+
8103/// triggers an anon-funcdef path.
8104fn parse_anon_funcdef() -> Option<ZshCommand> {
8105    zshlex(); // skip ()
8106    skip_separators();
8107    // No `{` after `()` → bare empty subshell shape `()`. Fall back
8108    // to a Subsh with an empty program so the status is 0 (matches
8109    // zsh's `()` no-op behavior).
8110    if tok() != INBRACE_TOK {
8111        return Some(ZshCommand::Subsh(Box::new(ZshProgram {
8112            lists: Vec::new(),
8113        })));
8114    }
8115    zshlex(); // skip {
8116    // c:Src/parse.c:par_subsh — anon `() { … }` body must terminate at
8117    // OUTBRACE_TOK. Pass it as the explicit end-token so the inner
8118    // parse stops cleanly at `}` rather than hitting the top-level
8119    // stray-`}` arm (#168). Bug #167 family.
8120    let body = parse_program_until(Some(&[OUTBRACE_TOK]));
8121    // c:Src/parse.c:1733-1737 — same `if (tok != OUTBRACE) YYERRORV`
8122    // gate as the named-funcdef path. Bug #405 sibling.
8123    if tok() != OUTBRACE_TOK {
8124        zerr("parse error: expected `}'");
8125        return None;
8126    }
8127    zshlex();
8128    // Collect any trailing args until a separator. zsh's anon-fn form
8129    // `() { body } a b c` runs body with $1=a, $2=b, $3=c.
8130    let mut args = Vec::new();
8131    while tok() == STRING_LEX {
8132        if let Some(s) = tokstr() {
8133            args.push(s);
8134        }
8135        zshlex();
8136    }
8137
8138    // Generate a unique name. Module-level static would be cleaner but
8139    // a thread-local atomic is enough — anonymous functions are
8140    // ephemeral and the name isn't user-visible.
8141    static ANON_COUNTER: AtomicUsize = AtomicUsize::new(0);
8142    let n = ANON_COUNTER.fetch_add(1, Ordering::Relaxed);
8143    let name = format!("_zshrs_anon_{}", n);
8144    Some(ZshCommand::FuncDef(ZshFuncDef {
8145        names: vec![name],
8146        body: Box::new(body),
8147        tracing: false,
8148        auto_call_args: Some(args),
8149        body_source: None,
8150    }))
8151}
8152
8153/// Parse {...} cursh
8154/// Parse a current-shell brace block `{ BODY }`. C source
8155/// par_cmd at parse.c:958-1085 handles Inbrace → emit WC_CURSH
8156/// and recurses into the list. zshrs's parse_cursh extracts that
8157/// arm into a dedicated method.
8158fn parse_cursh() -> Option<ZshCommand> {
8159    zshlex(); // skip {
8160    // c:Src/parse.c:par_subsh — pass OUTBRACE_TOK as the explicit
8161    // body terminator so the inner parse stops cleanly at `}` rather
8162    // than falling through the top-level `OUTBRACE_TOK if
8163    // end_tokens.is_none()` arm (which errors on stray `}` per bug
8164    // #168). Bug #167 in docs/BUGS.md.
8165    let prog = parse_program_until(Some(&[OUTBRACE_TOK]));
8166
8167    // c:Src/parse.c:par_subsh — `{ … }` requires a matching `}`.
8168    // C errors via YYERRORV when the body parse returns without
8169    // seeing OUTBRACE_TOK (parse.c:1623 inbrack check). zshrs's
8170    // previous behavior silently returned `Cursh(prog)` and ran the
8171    // body as if the braces were absent. Bug #167 in docs/BUGS.md.
8172    if tok() != OUTBRACE_TOK {
8173        // Reuse the "parse error near `<tok>'" shape from #142/#161.
8174        // The offending token is whatever follows the unclosed brace
8175        // body. For EOF (`{ echo a` at end of input) C zsh errors
8176        // near the LAST consumed body token; we use the current
8177        // tokstr() or fall back to a "}" hint.
8178        let near = tokstr().unwrap_or_else(|| "}".to_string());
8179        zerr(&format!("parse error near `{}'", near));
8180        return None;
8181    }
8182    // Check for { ... } always { ... }. Direct port of zsh's
8183    // par_subsh at parse.c:1612-1660 — note the two `incmdpos = 1`
8184    // forces (parse.c:1632, 1637): after consuming the closing
8185    // Outbrace AND after matching the `always` keyword, the parser
8186    // explicitly resets command position so the next `{` lexes as
8187    // Inbrace. Without these resets the lexer's String-clears-cmdpos
8188    // rule (lex.rs:976-983) leaves the second `{` in word position,
8189    // turning `always { ... }` into a Simple `{` `echo` … and the
8190    // try/always pairing is silently lost.
8191    {
8192        set_incmdpos(true); // parse.c:1632 incmdpos = !zsh_construct
8193        zshlex();
8194
8195        // Check for 'always'
8196        if tok() == STRING_LEX {
8197            let s = tokstr();
8198            if s.map(|s| s == "always").unwrap_or(false) {
8199                set_incmdpos(true); // parse.c:1637 incmdpos = 1
8200                zshlex();
8201                skip_separators();
8202
8203                if tok() == INBRACE_TOK {
8204                    zshlex();
8205                    // c:Src/parse.c — always-clause body terminates at
8206                    // OUTBRACE_TOK. Bug #167/#168 family.
8207                    let always = parse_program_until(Some(&[OUTBRACE_TOK]));
8208                    if tok() == OUTBRACE_TOK {
8209                        zshlex();
8210                    }
8211                    return Some(ZshCommand::Try(ZshTry {
8212                        try_block: Box::new(prog),
8213                        always: Box::new(always),
8214                    }));
8215                }
8216            }
8217        }
8218    }
8219
8220    Some(ZshCommand::Cursh(Box::new(prog)))
8221}
8222
8223/// Parse inline function definition: name() { ... }
8224/// Parse the inline form `NAME () { BODY }` (POSIX-style funcdef
8225/// without the `function` keyword). The name has already been
8226/// consumed and pushed by par_simple before this method fires.
8227/// C source: handled inline in par_simple's INOUTPAR-after-name
8228/// arm (parse.c:1836-2228).
8229fn parse_inline_funcdef(name: String) -> Option<ZshCommand> {
8230    // par_simple's STRING loop left `incmdpos = 0`; the funcdef body
8231    // `{ ... }` requires `incmdpos = 1` so the lexer recognises `{`
8232    // as INBRACE_TOK (current-shell block opener) instead of a
8233    // literal `{` STRING. Without this, `myfunc() { echo body }`
8234    // parsed the body as the single STRING `"{"`, then `echo body`
8235    // fell out at top level. Mirrors the C path where par_cmd's
8236    // dispatcher (parse.c:958) is called with `incmdpos = 1` for
8237    // the funcdef body.
8238    set_incmdpos(true);
8239    // Skip ()
8240    if tok() == INOUTPAR {
8241        zshlex();
8242    }
8243
8244    skip_separators();
8245
8246    // Parse body
8247    if tok() == INBRACE_TOK {
8248        // Same body_start-before-zshlex fix as par_funcdef.
8249        let body_start = pos();
8250        zshlex();
8251        // c:Src/parse.c — inline funcdef body terminates at OUTBRACE_TOK.
8252        // Explicit end-token keeps the inner parse from hitting the
8253        // top-level stray-`}` arm (#168). Bug #167 family.
8254        let body = parse_program_until(Some(&[OUTBRACE_TOK]));
8255        // c:Src/parse.c:1733-1737 — `if (tok != OUTBRACE) { cmdpop();
8256        // lineno += oldlineno; ecnpats = onp; ecssub = oecssub;
8257        // YYERRORV(oecused); }`. Without this gate, `f() { echo hi`
8258        // silently registered as a complete fn with body `echo hi`.
8259        // Bug #405.
8260        if tok() != OUTBRACE_TOK {
8261            zerr("parse error: expected `}'");
8262            return None;
8263        }
8264        let body_end = pos().saturating_sub(1);
8265        let body_source = input_slice(body_start, body_end)
8266            .map(|s| {
8267                // Lexer's pos() may have advanced past `}` AND skipped
8268                // trailing whitespace/newlines before returning the
8269                // OUTBRACE_TOK to us, so the slice up to `pos - 1`
8270                // includes the `}` and any preceding whitespace.
8271                // Strip the trailing `}` and any preceding structural
8272                // separator (`;`, `\n`) — C zsh's getpermtext walks
8273                // the wordcode list and emits each command WITHOUT
8274                // the trailing `;`/`\n` that lives in the input.
8275                let t = s.trim();
8276                let t = t.strip_suffix('}').unwrap_or(t).trim_end();
8277                let t = t
8278                    .trim_end_matches(|c: char| c == ';' || c == '\n')
8279                    .trim_end();
8280                t.to_string()
8281            })
8282            .filter(|s| !s.is_empty());
8283        zshlex();
8284        Some(ZshCommand::FuncDef(ZshFuncDef {
8285            names: vec![name],
8286            body: Box::new(body),
8287            tracing: false,
8288            auto_call_args: None,
8289            body_source,
8290        }))
8291    } else if unset(SHORTLOOPS) {
8292        // c:1742 — `else if (unset(SHORTLOOPS)) YYERRORV(oecused);` —
8293        // funcdef short body (`name() cmd` without `{...}`) only
8294        // accepted when SHORTLOOPS is set. parse_init seeds
8295        // SHORTLOOPS=on so this fires only when a script
8296        // explicitly disabled the option.
8297        zerr("parse error: short function body form requires SHORTLOOPS option");
8298        None
8299    } else {
8300        match par_cmd() {
8301            Some(cmd) => {
8302                let list = ZshList {
8303                    sublist: ZshSublist {
8304                        pipe: ZshPipe {
8305                            cmd,
8306                            next: None,
8307                            lineno: lineno(),
8308                            merge_stderr: false,
8309                        },
8310                        next: None,
8311                        flags: SublistFlags::default(),
8312                    },
8313                    flags: ListFlags::default(),
8314                };
8315                Some(ZshCommand::FuncDef(ZshFuncDef {
8316                    names: vec![name],
8317                    body: Box::new(ZshProgram { lists: vec![list] }),
8318                    tracing: false,
8319                    auto_call_args: None,
8320                    body_source: None,
8321                }))
8322            }
8323            None => None,
8324        }
8325    }
8326}
8327
8328/// Parse conditional expression
8329/// Top of `[[ ]]` cond-expression parsing — entry to recursive
8330/// descent (or → and → not → primary). Direct port of zsh's
8331/// par_cond_1 at parse.c:2434-2475.
8332fn parse_cond_expr() -> Option<ZshCond> {
8333    parse_cond_or()
8334}
8335
8336/// Cond-expression `||` level. C: inside par_cond_1 at
8337/// parse.c:2434-2475 (the `cond_or` ladder).
8338fn parse_cond_or() -> Option<ZshCond> {
8339    let left = parse_cond_and()?;
8340    skip_cond_separators();
8341
8342    if tok() == DBAR {
8343        zshlex();
8344        skip_cond_separators();
8345        parse_cond_or().map(|right| ZshCond::Or(Box::new(left), Box::new(right)))
8346    } else {
8347        Some(left)
8348    }
8349}
8350
8351/// Cond-expression `&&` level. C: par_cond_2 at parse.c:2476-2625.
8352fn parse_cond_and() -> Option<ZshCond> {
8353    let left = parse_cond_not()?;
8354    skip_cond_separators();
8355
8356    if tok() == DAMPER {
8357        zshlex();
8358        skip_cond_separators();
8359        parse_cond_and().map(|right| ZshCond::And(Box::new(left), Box::new(right)))
8360    } else {
8361        Some(left)
8362    }
8363}
8364
8365/// `static FuncDump dumps;` from `Src/parse.c:3652` — head of the
8366/// loaded-`.zwc` linked list. C walks `dumps`/`p->next` directly;
8367/// the Rust port uses a `Mutex<Vec<funcdump>>` indexed by filename
8368/// so refcount ops can find an entry without raw-pointer compare.
8369pub static DUMPS: std::sync::Mutex<Vec<funcdump>> = std::sync::Mutex::new(Vec::new());
8370
8371/// Cond-expression `!` negation level. C: handled inside
8372/// par_cond_2 at parse.c:2476-2625 via the Bang token check.
8373fn parse_cond_not() -> Option<ZshCond> {
8374    skip_cond_separators();
8375
8376    // ! can be either BANG_TOK or String "!"
8377    let is_not =
8378        tok() == BANG_TOK || (tok() == STRING_LEX && tokstr().map(|s| s == "!").unwrap_or(false));
8379    if is_not {
8380        zshlex();
8381        let inner = parse_cond_not()?;
8382        return Some(ZshCond::Not(Box::new(inner)));
8383    }
8384
8385    if tok() == INPAR_TOK {
8386        zshlex();
8387        skip_cond_separators();
8388        // c:Src/parse.c:2534-2547 par_cond_2 INPAR branch — empty
8389        // body `[[ ( ) ]]` makes the inner par_cond's recursive
8390        // par_cond_2 see OUTPAR with no leading STRING/BANG/INPAR
8391        // and YYERROR immediately. Mirror that here: if the very
8392        // next token after `(` (post separator skip) is `)`, emit
8393        // a parse error so the script aborts cleanly instead of
8394        // silently swallowing every following command. Bug #538.
8395        if tok() == OUTPAR_TOK {
8396            yyerror("condition expected");
8397            return None;
8398        }
8399        let inner = parse_cond_expr()?;
8400        skip_cond_separators();
8401        if tok() == OUTPAR_TOK {
8402            zshlex();
8403        }
8404        return Some(inner);
8405    }
8406
8407    parse_cond_primary()
8408}
8409
8410/// Cond-expression primary: unary tests (-f, -d, ...), binary
8411/// tests (=, !=, <, >, ==, =~, -eq, -ne, ...), and parenthesized
8412/// sub-expressions. Direct port of par_cond_double / par_cond_triple
8413/// / par_cond_multi at parse.c:2626-2731 (chosen by arg count).
8414fn parse_cond_primary() -> Option<ZshCond> {
8415    let s1 = match tok() {
8416        STRING_LEX => {
8417            let s = tokstr().unwrap_or_default();
8418            zshlex();
8419            s
8420        }
8421        _ => return None,
8422    };
8423
8424    skip_cond_separators();
8425
8426    // Check for unary operator. zsh's lexer tokenizes leading `-` as
8427    // `zsh_h::Dash` (`\u{9b}`, `Src/zsh.h:182`) inside gettokstr (lex.c:1390-1400
8428    // LX2_DASH — `-` always becomes Dash, untokenized later). Match
8429    // either form here, and use char-count not byte-count since Dash
8430    // is 2 UTF-8 bytes (`\xc2\x9b`).
8431    //
8432    // c:Src/parse.c par_cond — when the leading token is `-` followed
8433    // ENTIRELY by digits (`-5`, `-123`), it's a numeric literal
8434    // operand, not a unary test flag. zsh's parser checks the C
8435    // `isdigit` of the trailing chars to disambiguate; without the
8436    // check, `[[ -5 -lt -3 ]]` reads `-5` as a one-arg test flag,
8437    // then `-lt` as the operand, then `-3` as a leftover token —
8438    // emitting "unknown condition: -5" and falling through to a
8439    // command-not-found dispatch on `-3`. Bug #121 in docs/BUGS.md.
8440    let s1_chars: Vec<char> = s1.chars().collect();
8441    let is_negative_number = s1_chars.len() >= 2
8442        && IS_DASH(s1_chars[0])
8443        && s1_chars[1..].iter().all(|c| c.is_ascii_digit());
8444    if s1_chars.len() == 2 && IS_DASH(s1_chars[0]) && !is_negative_number {
8445        let s2 = match tok() {
8446            STRING_LEX => {
8447                let s = tokstr().unwrap_or_default();
8448                zshlex();
8449                s
8450            }
8451            _ => {
8452                // c:Src/parse.c par_cond_2 — when the leading `-X`
8453                // is a 2-char dash form, zsh ALWAYS treats it as a
8454                // unary test op (the operand-missing case errors
8455                // immediately with `unknown condition: -X`). Don't
8456                // fall back to `Unary("-n", "-X")` — that path
8457                // silently let `[[ -z ]]` evaluate as
8458                // `[[ -n "-z" ]]` → true. Bug #480/#481.
8459                //
8460                // Convert Dash (\u{9b}) back to ASCII `-` for the
8461                // user-visible diagnostic so it reads "unknown
8462                // condition: -z" not "unknown condition: <Dash>z".
8463                let display: String = s1.chars().map(|c| {
8464                    if IS_DASH(c) { '-' } else { c }
8465                }).collect();
8466                crate::ported::utils::zerr(&format!(
8467                    "unknown condition: {}",
8468                    display
8469                ));
8470                return None;
8471            }
8472        };
8473        return Some(ZshCond::Unary(s1, s2));
8474    }
8475
8476    // Check for binary operator. Direct port of zsh/Src/parse.c:2601-2603:
8477    //   incond++;  /* parentheses do globbing */
8478    //   do condlex(); while (COND_SEP());
8479    //   incond--;  /* parentheses do grouping */
8480    // The bump makes the lexer treat `(` as a literal character inside
8481    // the RHS word (e.g. `[[ x =~ (foo) ]]`) instead of returning Inpar
8482    // and splitting the regex into multiple tokens.
8483    let op = match tok() {
8484        STRING_LEX => {
8485            let s = tokstr().unwrap_or_default();
8486            set_incond(incond() + 1);
8487            zshlex();
8488            set_incond(incond() - 1);
8489            s
8490        }
8491        INANG_TOK => {
8492            set_incond(incond() + 1);
8493            zshlex();
8494            set_incond(incond() - 1);
8495            "<".to_string()
8496        }
8497        OUTANG_TOK => {
8498            set_incond(incond() + 1);
8499            zshlex();
8500            set_incond(incond() - 1);
8501            ">".to_string()
8502        }
8503        _ => return Some(ZshCond::Unary("-n".to_string(), s1)),
8504    };
8505
8506    skip_cond_separators();
8507
8508    // c:Src/parse.c:2601-2625 par_cond_2 — only the documented binary
8509    // operators are accepted inside `[[ ... ]]`. zsh rejects ksh/bash
8510    // forms `-a` (logical AND) and `-o` (logical OR) with a parse
8511    // error ("condition expected") because they're not in the
8512    // par_cond_2 binary-op set — zsh uses `&&` / `||` instead.
8513    // Verified: `zsh -fc '[[ "" -a "x" ]]'` → exit 1, "parse error:
8514    // condition expected: ...". Without this gate, zshrs silently
8515    // built ZshCond::Binary("", "-a", "x") and ran an unknown-op
8516    // path that always evaluated false.
8517    // c:Src/parse.c:2601-2625 par_cond_2 — `-a` / `-o` n-ary chain
8518    // operators are not valid binary operators inside `[[ ... ]]`
8519    // (zsh uses `&&` / `||` instead). Match both the ASCII `-a`/
8520    // `-o` form and the tokenized `Dash+a`/`Dash+o` form that the
8521    // lexer emits inside cond bodies (Dash = \u{9b}, Src/zsh.h:182).
8522    let op_chars: Vec<char> = op.chars().collect();
8523    let is_dash_a_or_o =
8524        op_chars.len() == 2 && IS_DASH(op_chars[0]) && (op_chars[1] == 'a' || op_chars[1] == 'o');
8525    if is_dash_a_or_o {
8526        crate::ported::utils::zerr(&format!("parse error: condition expected: {}", s1));
8527        crate::ported::utils::errflag.fetch_or(
8528            crate::ported::zsh_h::ERRFLAG_ERROR,
8529            std::sync::atomic::Ordering::Relaxed,
8530        );
8531        set_tok(LEXERR);
8532        return None;
8533    }
8534
8535    let s2 = match tok() {
8536        STRING_LEX => {
8537            let s = tokstr().unwrap_or_default();
8538            zshlex();
8539            s
8540        }
8541        _ => {
8542            // c:Src/parse.c par_cond_2 — when a binary op is
8543            // recognized but the RHS operand is missing, zsh emits
8544            // `parse error: condition expected: <LHS>` at par_cond_2's
8545            // missing-rhs branch. zshrs's previous fallback returned
8546            // `Binary(s1, op, "")` which silently evaluated as if the
8547            // RHS were empty string → rc=1. Bug #482.
8548            //
8549            // Convert Dash (\u{9b}) back to ASCII `-` in the LHS
8550            // display so the diagnostic reads cleanly.
8551            let display: String = s1.chars().map(|c| {
8552                if IS_DASH(c) { '-' } else { c }
8553            }).collect();
8554            crate::ported::utils::zerr(&format!(
8555                "parse error: condition expected: {}",
8556                display
8557            ));
8558            crate::ported::utils::errflag.fetch_or(
8559                crate::ported::zsh_h::ERRFLAG_ERROR,
8560                std::sync::atomic::Ordering::Relaxed,
8561            );
8562            set_tok(LEXERR);
8563            return None;
8564        }
8565    };
8566
8567    if op == "=~" {
8568        Some(ZshCond::Regex(s1, s2))
8569    } else {
8570        Some(ZshCond::Binary(s1, op, s2))
8571    }
8572}
8573
8574fn skip_cond_separators() {
8575    while tok() == SEPER && {
8576        let s = tokstr();
8577        s.map(|s| !s.contains(';')).unwrap_or(true)
8578    } {
8579        zshlex();
8580    }
8581}
8582
8583/// Parse (( ... )) arithmetic command
8584/// Parse `(( EXPR ))` arithmetic command. C source: parse.c:1810-1834
8585/// `par_dinbrack` (despite the name; the function actually handles
8586/// DINPAR `(( ))` blocks too).
8587fn parse_arith() -> Option<ZshCommand> {
8588    let expr = tokstr().unwrap_or_default();
8589    zshlex();
8590    Some(ZshCommand::Arith(expr))
8591}
8592
8593/// Skip separator tokens
8594fn skip_separators() {
8595    while tok() == SEPER || tok() == NEWLIN {
8596        zshlex();
8597    }
8598}
8599
8600// `fdheaderlen` / `fdmagic` / `fdflags` / etc. macros from
8601// `Src/parse.c:3125-3152`. C uses raw pointer arithmetic on a
8602// `Wordcode` (= `u32 *`); the Rust port takes a slice and indexes.
8603
8604/// Port of `fdheaderlen(f)` macro (`Src/parse.c:3125`) — header
8605/// length in u32 words (read from prelude word `FD_PRELEN`).
8606#[inline]
8607pub fn fdheaderlen(f: &[u32]) -> u32 {
8608    f[FD_PRELEN]
8609}
8610
8611/// Port of `fdmagic(f)` macro (`Src/parse.c:3127`) — first prelude
8612/// word, either `FD_MAGIC` or `FD_OMAGIC`.
8613#[inline]
8614pub fn fdmagic(f: &[u32]) -> u32 {
8615    f[0]
8616}
8617
8618/// Port of `fdflags(f)` macro (`Src/parse.c:3131`) — low byte of
8619/// the packed `pre[1]` word.
8620#[inline]
8621pub fn fdflags(f: &[u32]) -> u32 {
8622    // `pre[1]` is a u32 viewed as 4 bytes; flags = byte 0.
8623    f[1] & 0xff
8624}
8625
8626/// Port of `fdsetflags(f, v)` macro (`Src/parse.c:3132`) — write
8627/// the low byte of `pre[1]`.
8628#[inline]
8629pub fn fdsetflags(f: &mut [u32], v: u8) {
8630    f[1] = (f[1] & !0xff) | (v as u32);
8631}
8632
8633/// Port of `fdother(f)` macro (`Src/parse.c:3133`) — high 24 bits
8634/// of `pre[1]`, holds the byte-offset to the opposite-byte-order
8635/// dump copy.
8636#[inline]
8637pub fn fdother(f: &[u32]) -> u32 {
8638    (f[1] >> 8) & 0x00ff_ffff
8639}
8640
8641/// Port of `fdsetother(f, o)` macro (`Src/parse.c:3134`).
8642#[inline]
8643pub fn fdsetother(f: &mut [u32], o: u32) {
8644    f[1] = (f[1] & 0xff) | ((o & 0x00ff_ffff) << 8);
8645}
8646
8647/// Port of `fdversion(f)` macro (`Src/parse.c:3140`) — read the
8648/// `ZSH_VERSION` C-string from `pre[2..]`.
8649pub fn fdversion(f: &[u32]) -> String {
8650    let bytes: Vec<u8> = f[2..]
8651        .iter()
8652        .take(10)
8653        .flat_map(|w| w.to_le_bytes().into_iter())
8654        .collect();
8655    let end = bytes.iter().position(|&b| b == 0).unwrap_or(bytes.len());
8656    String::from_utf8_lossy(&bytes[..end]).into_owned()
8657}
8658
8659/// Port of `firstfdhead(f)` macro (`Src/parse.c:3142`) — pointer
8660/// to the first `struct fdhead` past the prelude.
8661#[inline]
8662pub fn firstfdhead_offset() -> usize {
8663    FD_PRELEN
8664}
8665
8666/// Port of `nextfdhead(f)` macro (`Src/parse.c:3143`) — advance to
8667/// the next header by reading the current `hlen` slot.
8668#[inline]
8669pub fn nextfdhead_offset(f: &[u32], cur: usize) -> usize {
8670    cur + (f[cur + 4] as usize) // .hlen is field 4 of fdhead
8671}
8672
8673/// Port of `fdhflags(f)` macro (`Src/parse.c:3145`) — low 2 bits
8674/// of the header's `flags` field (the kshload/zshload marker).
8675#[inline]
8676pub fn fdhflags(h: &fdhead) -> u32 {
8677    h.flags & 0x3
8678}
8679
8680/// Port of `fdhtail(f)` macro (`Src/parse.c:3146`) — high 30 bits
8681/// of `flags`, byte offset from the name start to its basename.
8682#[inline]
8683pub fn fdhtail(h: &fdhead) -> u32 {
8684    h.flags >> 2
8685}
8686
8687/// Port of `fdhbldflags(f, t)` macro (`Src/parse.c:3147`) — pack
8688/// `(flags, tail)` into one u32 (low 2 bits = flags, high 30 = tail).
8689#[inline]
8690pub fn fdhbldflags(flags: u32, tail: u32) -> u32 {
8691    flags | (tail << 2)
8692}
8693
8694/// Port of `fdname(f)` macro (`Src/parse.c:3152`) — name string
8695/// follows the fdhead record immediately. Reads bytes from the
8696/// dump buffer until NUL.
8697pub fn fdname(buf: &[u32], header_offset: usize) -> String {
8698    let name_word_off = header_offset + FDHEAD_WORDS;
8699    let bytes: Vec<u8> = buf[name_word_off..]
8700        .iter()
8701        .flat_map(|w| w.to_le_bytes().into_iter())
8702        .take_while(|&b| b != 0)
8703        .collect();
8704    String::from_utf8_lossy(&bytes).into_owned()
8705}
8706
8707/// Decode a `fdhead` record at the given u32-word offset in the
8708/// dump buffer. Used by the header-walk loops in `bin_zcompile -t`.
8709pub fn read_fdhead(buf: &[u32], offset: usize) -> Option<fdhead> {
8710    if offset + FDHEAD_WORDS > buf.len() {
8711        return None;
8712    }
8713    Some(fdhead {
8714        start: buf[offset],
8715        len: buf[offset + 1],
8716        npats: buf[offset + 2],
8717        strs: buf[offset + 3],
8718        hlen: buf[offset + 4],
8719        flags: buf[offset + 5],
8720    })
8721}
8722
8723/// Port of `freedump(FuncDump f)` from `Src/parse.c:3976`. C
8724/// `munmap`s, `zclose`s the fd, and frees the struct. The Rust
8725/// port relies on Drop for the `funcdump` (no mmap held in this
8726/// port — `addr`/`map` are byte-offset placeholders), so the
8727/// equivalent is removing the entry from the dumps list. Called
8728/// by `decrdumpcount` when the refcount hits zero (c:3988) and
8729/// by `closedumps` when shutting down (c:4008).
8730fn freedump_locked(g: &mut std::sync::MutexGuard<'_, Vec<funcdump>>, filename: &str) {
8731    // c:3976
8732    g.retain(|d| d.filename.as_deref() != Some(filename));
8733}
8734
8735// =====================================================================
8736// Remaining `Src/parse.c` ports (this section finishes the file).
8737//
8738// Several of these emit into the C-wordcode buffer (`ECBUF`/etc.) and
8739// are kept for completeness — the live zshrs runtime uses the
8740// `ZshProgram` AST path instead, but `bin_zcompile` (`-c`/`-a` modes)
8741// and any future `.zwc`-emit pipeline both call into these.
8742// =====================================================================
8743
8744/// `ecstr(s)` helper — `ecadd(ecstrcode(s))`. Mirrors the C macro at
8745/// `Src/parse.c:482` used everywhere by the par_* emitters.
8746#[inline]
8747pub fn ecstr(s: &str) {
8748    let code = ecstrcode(s);
8749    ecadd(code);
8750}
8751
8752/// Port of `condlex` function-pointer global from `Src/parse.c`. C
8753/// flips this between `zshlex` and `testlex` depending on whether
8754/// we're inside `[[ ]]` vs `/bin/test` builtin. zshrs has no
8755/// separate `testlex` yet, so this just defers to `zshlex`.
8756#[inline]
8757pub fn condlex() {
8758    zshlex();
8759}
8760
8761fn copy_ecstr_walk(node: &Option<Box<EccstrNode>>, p: &mut [u8]) {
8762    let mut cur = node.as_ref();
8763    while let Some(n) = cur {
8764        // c:540 — `memcpy(p + s->aoffs, s->str, strlen(s->str) + 1);`
8765        let off = n.aoffs as usize;
8766        let need = off + n.str.len() + 1;
8767        if need <= p.len() {
8768            p[off..off + n.str.len()].copy_from_slice(&n.str);
8769            p[off + n.str.len()] = 0;
8770        }
8771        // c:541 — `copy_ecstr(s->left, p);`
8772        copy_ecstr_walk(&n.left, p);
8773        // c:542 — `s = s->right;`
8774        cur = n.right.as_ref();
8775    }
8776}
8777
8778/// Port of `par_cond(void)` from `Src/parse.c:2409`. Top-level cond
8779/// OR-chain — drives `par_cond_1` and stitches `||`-separated terms
8780/// with `WCB_COND(COND_OR, …)`. This is the missing top of the
8781/// wordcode cond chain: `par_cond_wordcode` (the par_dinbrack port)
8782/// must call into HERE so that `[[ a || b ]]` and friends land
8783/// real WC_COND opcodes in `ecbuf`. Without this, the wordcode
8784/// emitter for `[[ ... ]]` produced zero words and parity dropped
8785/// 148 words on `/etc/zshrc` alone.
8786pub fn par_cond_top() -> i32 {
8787    // c:2411 — `int p = ecused, r;`
8788    let p = ECUSED.with(|c| c.get()) as usize;
8789    let r = par_cond_1();
8790    while COND_SEP() {
8791        condlex();
8792    }
8793    if tok() == DBAR {
8794        // c:2417 — `condlex(); while (COND_SEP()) condlex();`
8795        condlex();
8796        while COND_SEP() {
8797            condlex();
8798        }
8799        // c:2420-2422 — `ecispace(p, 1); par_cond(); ecbuf[p] =
8800        // WCB_COND(COND_OR, ecused-1-p);`
8801        ecispace(p, 1);
8802        par_cond_top();
8803        let ecused = ECUSED.with(|c| c.get()) as usize;
8804        ECBUF.with(|c| {
8805            c.borrow_mut()[p] = WCB_COND(COND_OR as u32, (ecused - 1 - p) as u32);
8806        });
8807        return 1;
8808    }
8809    r
8810}
8811
8812/// Port of `static int check_cond(const char *input, const char *cond)`
8813/// from `Src/parse.c:2459`. True iff `input` is the two-char `-X`
8814/// form whose `X` matches `cond` — used by par_cond_2 to detect
8815/// `-a` / `-o` n-ary chain operators and by build_dump for `-k` /
8816/// `-z`. C: `return !IS_DASH(input[0]) ? 0 : !strcmp(input+1, cond);`.
8817fn check_cond(input: &str, cond: &str) -> bool {
8818    let mut chars = input.chars();
8819    match chars.next() {
8820        Some(c) if IS_DASH(c) => chars.as_str() == cond,
8821        _ => false,
8822    }
8823}
8824
8825#[cfg(test)]
8826mod tests {
8827    use super::*;
8828    use crate::utils::{errflag, ERRFLAG_ERROR};
8829    use std::fs;
8830    use std::path::Path;
8831    use std::sync::atomic::Ordering;
8832    use std::sync::mpsc;
8833    use std::thread;
8834    use std::time::Duration;
8835
8836    /// `try_source_file` MUST refuse a stale `.zwc` cache when the
8837    /// uncompiled source has been modified more recently. The C body
8838    /// at c:3819 reads `stc.st_mtime >= stn.st_mtime` — explicitly
8839    /// `>=`, meaning only an equal-or-newer zwc is acceptable.
8840    ///
8841    /// A regression that ignored the mtime check (or used the wrong
8842    /// direction) would silently keep loading the OLD compiled body
8843    /// after the user edited the source file — every `source foo.zsh`
8844    /// would replay yesterday's code, the worst-class shell bug.
8845    ///
8846    /// Pin: create source + .zwc, then touch source to make it
8847    /// newer. try_source_file must return None.
8848    #[test]
8849    fn try_source_file_skips_stale_zwc() {
8850        let _g = crate::test_util::global_state_lock();
8851        let dir = tempfile::tempdir().expect("tempdir");
8852        let src = dir.path().join("script.zsh");
8853        let zwc = dir.path().join("script.zsh.zwc");
8854        // Create zwc FIRST (older), then source (newer).
8855        fs::write(&zwc, b"placeholder zwc").unwrap();
8856        thread::sleep(Duration::from_millis(20));
8857        fs::write(&src, b"echo hi").unwrap();
8858
8859        let result = try_source_file(src.to_str().unwrap());
8860        assert!(
8861            result.is_none(),
8862            "c:3819 — stale .zwc (older than source) MUST be rejected; \
8863             got {:?}",
8864            result
8865        );
8866    }
8867
8868    /// `try_source_file` returns None when no `.zwc` exists for the
8869    /// requested file (c:3819 `if let Ok(meta_c) = &stc` gate fails).
8870    /// This is the common case — most user scripts don't ship with
8871    /// a pre-compiled `.zwc`. The fn returning None lets the caller
8872    /// fall through to the source-read path. A regression that
8873    /// returned `Some(file)` on missing `.zwc` would route every
8874    /// `source foo.zsh` through `check_dump_file` against a
8875    /// non-existent file and crash.
8876    #[test]
8877    fn try_source_file_returns_none_when_no_zwc() {
8878        let _g = crate::test_util::global_state_lock();
8879        let dir = tempfile::tempdir().expect("tempdir");
8880        let src = dir.path().join("plain.zsh");
8881        fs::write(&src, b"echo hi").unwrap();
8882        // No .zwc sibling.
8883
8884        let result = try_source_file(src.to_str().unwrap());
8885        assert!(
8886            result.is_none(),
8887            "c:3819 gate fails when stat(wc) returns Err → None"
8888        );
8889    }
8890
8891    /// Test helper. Mirrors zsh's `errflag` save/clear/check pattern
8892    /// around a parse — see `Src/init.c:loop` which clears errflag
8893    /// before parse_event() and tests it after. Returns `Err` if the
8894    /// parse set `ERRFLAG_ERROR`; otherwise `Ok(program)`.
8895    fn parse(input: &str) -> Result<ZshProgram, String> {
8896        let saved = errflag.load(Ordering::Relaxed);
8897        errflag.fetch_and(!ERRFLAG_ERROR, Ordering::Relaxed);
8898        parse_init(input);
8899        let prog = crate::ported::parse::parse();
8900        let had_err = (errflag.load(Ordering::Relaxed) & ERRFLAG_ERROR) != 0;
8901        // Restore prior error bits; don't carry our new error into the
8902        // outer test runner.
8903        errflag.store(saved, Ordering::Relaxed);
8904        if had_err {
8905            Err("parse error".to_string())
8906        } else {
8907            Ok(prog)
8908        }
8909    }
8910
8911    #[test]
8912    fn test_simple_command() {
8913        let _g = crate::test_util::global_state_lock();
8914        let prog = parse("echo hello world").unwrap();
8915        assert_eq!(prog.lists.len(), 1);
8916        match &prog.lists[0].sublist.pipe.cmd {
8917            ZshCommand::Simple(s) => {
8918                assert_eq!(s.words, vec!["echo", "hello", "world"]);
8919            }
8920            _ => panic!("expected simple command"),
8921        }
8922    }
8923
8924    #[test]
8925    fn test_pipeline() {
8926        let _g = crate::test_util::global_state_lock();
8927        let prog = parse("ls | grep foo | wc -l").unwrap();
8928        assert_eq!(prog.lists.len(), 1);
8929
8930        let pipe = &prog.lists[0].sublist.pipe;
8931        assert!(pipe.next.is_some());
8932
8933        let pipe2 = pipe.next.as_ref().unwrap();
8934        assert!(pipe2.next.is_some());
8935    }
8936
8937    #[test]
8938    fn test_and_or() {
8939        let _g = crate::test_util::global_state_lock();
8940        let prog = parse("cmd1 && cmd2 || cmd3").unwrap();
8941        let sublist = &prog.lists[0].sublist;
8942
8943        assert!(sublist.next.is_some());
8944        let (op, _) = sublist.next.as_ref().unwrap();
8945        assert_eq!(*op, SublistOp::And);
8946    }
8947
8948    #[test]
8949    fn test_if_then() {
8950        let _g = crate::test_util::global_state_lock();
8951        let prog = parse("if test -f foo; then echo yes; fi").unwrap();
8952        match &prog.lists[0].sublist.pipe.cmd {
8953            ZshCommand::If(_) => {}
8954            _ => panic!("expected if command"),
8955        }
8956    }
8957
8958    #[test]
8959    fn test_for_loop() {
8960        let _g = crate::test_util::global_state_lock();
8961        let prog = parse("for i in a b c; do echo $i; done").unwrap();
8962        match &prog.lists[0].sublist.pipe.cmd {
8963            ZshCommand::For(f) => {
8964                assert_eq!(f.var, "i");
8965                match &f.list {
8966                    ForList::Words(w) => assert_eq!(w, &vec!["a", "b", "c"]),
8967                    _ => panic!("expected word list"),
8968                }
8969            }
8970            _ => panic!("expected for command"),
8971        }
8972    }
8973
8974    #[test]
8975    fn test_case() {
8976        let _g = crate::test_util::global_state_lock();
8977        let prog = parse("case $x in a) echo a;; b) echo b;; esac").unwrap();
8978        match &prog.lists[0].sublist.pipe.cmd {
8979            ZshCommand::Case(c) => {
8980                assert_eq!(c.arms.len(), 2);
8981            }
8982            _ => panic!("expected case command"),
8983        }
8984    }
8985
8986    #[test]
8987    fn test_function() {
8988        let _g = crate::test_util::global_state_lock();
8989        // First test just parsing "function foo" to see what happens
8990        let prog = parse("function foo { }").unwrap();
8991        match &prog.lists[0].sublist.pipe.cmd {
8992            ZshCommand::FuncDef(f) => {
8993                assert_eq!(f.names, vec!["foo"]);
8994            }
8995            _ => panic!(
8996                "expected function, got {:?}",
8997                prog.lists[0].sublist.pipe.cmd
8998            ),
8999        }
9000    }
9001
9002    #[test]
9003    fn test_redirection() {
9004        let _g = crate::test_util::global_state_lock();
9005        let prog = parse("echo hello > file.txt").unwrap();
9006        match &prog.lists[0].sublist.pipe.cmd {
9007            ZshCommand::Simple(s) => {
9008                assert_eq!(s.redirs.len(), 1);
9009                assert_eq!(s.redirs[0].rtype, REDIR_WRITE);
9010            }
9011            _ => panic!("expected simple command"),
9012        }
9013    }
9014
9015    #[test]
9016    fn test_assignment() {
9017        let _g = crate::test_util::global_state_lock();
9018        let prog = parse("FOO=bar echo $FOO").unwrap();
9019        match &prog.lists[0].sublist.pipe.cmd {
9020            ZshCommand::Simple(s) => {
9021                assert_eq!(s.assigns.len(), 1);
9022                assert_eq!(s.assigns[0].name, "FOO");
9023            }
9024            _ => panic!("expected simple command"),
9025        }
9026    }
9027
9028    #[test]
9029    fn test_parse_completion_function() {
9030        let _g = crate::test_util::global_state_lock();
9031        let input = r#"_2to3_fixes() {
9032  local -a fixes
9033  fixes=( ${${(M)${(f)"$(2to3 --list-fixes 2>/dev/null)"}:#*}//[[:space:]]/} )
9034  (( ${#fixes} )) && _describe -t fixes 'fix' fixes
9035}"#;
9036        let result = parse(input);
9037        assert!(
9038            result.is_ok(),
9039            "Failed to parse completion function: {:?}",
9040            result.err()
9041        );
9042        let prog = result.unwrap();
9043        assert!(
9044            !prog.lists.is_empty(),
9045            "Expected at least one list in program"
9046        );
9047    }
9048
9049    #[test]
9050    fn test_parse_array_with_complex_elements() {
9051        let _g = crate::test_util::global_state_lock();
9052        let input = r#"arguments=(
9053  '(- * :)'{-h,--help}'[show this help message and exit]'
9054  {-d,--doctests_only}'[fix up doctests only]'
9055  '*:filename:_files'
9056)"#;
9057        let result = parse(input);
9058        assert!(
9059            result.is_ok(),
9060            "Failed to parse array assignment: {:?}",
9061            result.err()
9062        );
9063    }
9064
9065    #[test]
9066    fn test_parse_full_completion_file() {
9067        let _g = crate::test_util::global_state_lock();
9068        let input = r##"#compdef 2to3
9069
9070# zsh completions for '2to3'
9071
9072_2to3_fixes() {
9073  local -a fixes
9074  fixes=( ${${(M)${(f)"$(2to3 --list-fixes 2>/dev/null)"}:#*}//[[:space:]]/} )
9075  (( ${#fixes} )) && _describe -t fixes 'fix' fixes
9076}
9077
9078local -a arguments
9079
9080arguments=(
9081  '(- * :)'{-h,--help}'[show this help message and exit]'
9082  {-d,--doctests_only}'[fix up doctests only]'
9083  {-f,--fix}'[each FIX specifies a transformation; default: all]:fix name:_2to3_fixes'
9084  {-j,--processes}'[run 2to3 concurrently]:number: '
9085  {-x,--nofix}'[prevent a transformation from being run]:fix name:_2to3_fixes'
9086  {-l,--list-fixes}'[list available transformations]'
9087  {-p,--print-function}'[modify the grammar so that print() is a function]'
9088  {-v,--verbose}'[more verbose logging]'
9089  '--no-diffs[do not show diffs of the refactoring]'
9090  {-w,--write}'[write back modified files]'
9091  {-n,--nobackups}'[do not write backups for modified files]'
9092  {-o,--output-dir}'[put output files in this directory instead of overwriting]:directory:_directories'
9093  {-W,--write-unchanged-files}'[also write files even if no changes were required]'
9094  '--add-suffix[append this string to all output filenames]:suffix: '
9095  '*:filename:_files'
9096)
9097
9098_arguments -s -S $arguments
9099"##;
9100        let result = parse(input);
9101        assert!(
9102            result.is_ok(),
9103            "Failed to parse full completion file: {:?}",
9104            result.err()
9105        );
9106        let prog = result.unwrap();
9107        // Should have parsed successfully with at least one statement
9108        assert!(!prog.lists.is_empty(), "Expected at least one list");
9109    }
9110
9111    #[test]
9112    fn test_parse_logs_sh() {
9113        let _g = crate::test_util::global_state_lock();
9114        let input = r#"#!/usr/bin/env bash
9115shopt -s globstar
9116
9117if [[ $(uname) == Darwin ]]; then
9118    tail -f /var/log/**/*.log /var/log/**/*.out | lolcat
9119else
9120    if [[ $ZPWR_DISTRO_NAME == raspbian ]]; then
9121        tail -f /var/log/**/*.log | lolcat
9122    else
9123        printf "Unsupported...\n" >&2
9124    fi
9125fi
9126"#;
9127        let result = parse(input);
9128        assert!(
9129            result.is_ok(),
9130            "Failed to parse logs.sh: {:?}",
9131            result.err()
9132        );
9133    }
9134
9135    #[test]
9136    fn test_parse_case_with_glob() {
9137        let _g = crate::test_util::global_state_lock();
9138        let input = r#"case "$ZPWR_OS_TYPE" in
9139    darwin*)  open_cmd='open'
9140      ;;
9141    cygwin*)  open_cmd='cygstart'
9142      ;;
9143    linux*)
9144        open_cmd='xdg-open'
9145      ;;
9146esac"#;
9147        let result = parse(input);
9148        assert!(
9149            result.is_ok(),
9150            "Failed to parse case with glob: {:?}",
9151            result.err()
9152        );
9153    }
9154
9155    #[test]
9156    fn test_parse_case_with_nested_if() {
9157        let _g = crate::test_util::global_state_lock();
9158        // Test case with nested if and glob patterns
9159        let input = r##"function zpwrGetOpenCommand(){
9160    local open_cmd
9161    case "$ZPWR_OS_TYPE" in
9162        darwin*)  open_cmd='open' ;;
9163        cygwin*)  open_cmd='cygstart' ;;
9164        linux*)
9165            if [[ "$_zpwr_uname_r" != *icrosoft* ]];then
9166                open_cmd='nohup xdg-open'
9167            fi
9168            ;;
9169    esac
9170}"##;
9171        let result = parse(input);
9172        assert!(result.is_ok(), "Failed to parse: {:?}", result.err());
9173    }
9174
9175    #[test]
9176    fn test_parse_zpwr_scripts() {
9177        let _g = crate::test_util::global_state_lock();
9178        let scripts_dir = Path::new("/Users/wizard/.zpwr/scripts");
9179        if !scripts_dir.exists() {
9180            eprintln!("Skipping test: scripts directory not found");
9181            return;
9182        }
9183
9184        let mut total = 0;
9185        let mut passed = 0;
9186        let mut failed_files = Vec::new();
9187        let mut timeout_files = Vec::new();
9188
9189        for ext in &["sh", "zsh"] {
9190            let pattern = scripts_dir.join(format!("*.{}", ext));
9191            if let Ok(entries) = glob::glob(pattern.to_str().unwrap()) {
9192                for entry in entries.flatten() {
9193                    total += 1;
9194                    let file_path = entry.display().to_string();
9195                    let content = match fs::read_to_string(&entry) {
9196                        Ok(c) => c,
9197                        Err(e) => {
9198                            failed_files.push((file_path, format!("read error: {}", e)));
9199                            continue;
9200                        }
9201                    };
9202
9203                    // Parse with timeout
9204                    let content_clone = content.clone();
9205                    let (tx, rx) = mpsc::channel();
9206                    let handle = thread::spawn(move || {
9207                        let result = parse(&content_clone);
9208                        let _ = tx.send(result);
9209                    });
9210
9211                    match rx.recv_timeout(Duration::from_secs(2)) {
9212                        Ok(Ok(_)) => passed += 1,
9213                        Ok(Err(err)) => {
9214                            failed_files.push((file_path, err));
9215                        }
9216                        Err(_) => {
9217                            timeout_files.push(file_path);
9218                            // Thread will be abandoned
9219                        }
9220                    }
9221                }
9222            }
9223        }
9224
9225        eprintln!("\n=== ZPWR Scripts Parse Results ===");
9226        eprintln!("Passed: {}/{}", passed, total);
9227
9228        if !timeout_files.is_empty() {
9229            eprintln!("\nTimeout files (>2s):");
9230            for file in &timeout_files {
9231                eprintln!("  {}", file);
9232            }
9233        }
9234
9235        if !failed_files.is_empty() {
9236            eprintln!("\nFailed files:");
9237            for (file, err) in &failed_files {
9238                eprintln!("  {} - {}", file, err);
9239            }
9240        }
9241
9242        // Allow some failures initially, but track progress
9243        let pass_rate = if total > 0 {
9244            (passed as f64 / total as f64) * 100.0
9245        } else {
9246            0.0
9247        };
9248        eprintln!("Pass rate: {:.1}%", pass_rate);
9249
9250        // Require at least 50% pass rate for now
9251        assert!(pass_rate >= 50.0, "Pass rate too low: {:.1}%", pass_rate);
9252    }
9253
9254    /// c:2643 — `get_cond_num` returns 0..=8 for the canonical binary
9255    /// test operators in order `nt ot ef eq ne lt gt le ge`. The
9256    /// index IS the wordcode opcode dispatch key; flipping any entry
9257    /// would silently mis-dispatch `[[ a -eq b ]]` to a different op.
9258    #[test]
9259    fn get_cond_num_canonical_order_matches_dispatch_table() {
9260        let _g = crate::test_util::global_state_lock();
9261        assert_eq!(get_cond_num("nt"), 0);
9262        assert_eq!(get_cond_num("ot"), 1);
9263        assert_eq!(get_cond_num("ef"), 2);
9264        assert_eq!(get_cond_num("eq"), 3);
9265        assert_eq!(get_cond_num("ne"), 4);
9266        assert_eq!(get_cond_num("lt"), 5);
9267        assert_eq!(get_cond_num("gt"), 6);
9268        assert_eq!(get_cond_num("le"), 7);
9269        assert_eq!(get_cond_num("ge"), 8);
9270    }
9271
9272    /// c:2643 — unknown operator returns -1 (sentinel for "not in the
9273    /// binary set"). Regression returning 0 silently would alias
9274    /// every unknown op to `-nt`, dispatching to the wrong handler.
9275    #[test]
9276    fn get_cond_num_unknown_operator_returns_minus_one() {
9277        let _g = crate::test_util::global_state_lock();
9278        assert_eq!(get_cond_num("xx"), -1);
9279        assert_eq!(get_cond_num(""), -1);
9280        assert_eq!(get_cond_num("eqnt"), -1, "exact-match required");
9281        assert_eq!(
9282            get_cond_num("NT"),
9283            -1,
9284            "case-sensitive — uppercase rejected"
9285        );
9286    }
9287
9288    /// c:2628 — `par_cond_double` requires arg `a` to start with `-`
9289    /// AND have at least one more char. Empty string OR single `-`
9290    /// must error (return 1 via zerr). Regression accepting empty
9291    /// would dispatch `[[ "" string ]]` as a unary test.
9292    #[test]
9293    fn par_cond_double_rejects_short_or_non_dash_first_arg() {
9294        let _g = crate::test_util::global_state_lock();
9295        // empty
9296        let _ = par_cond_double("", "b");
9297        // not-dash
9298        let _ = par_cond_double("foo", "b");
9299        // bare dash
9300        let _ = par_cond_double("-", "b");
9301        // All three must NOT crash + return 1 (error path).
9302    }
9303
9304    /// c:2647 CONDSTRS table — exhaustive iteration: every entry's
9305    /// index round-trips through get_cond_num. A regression that
9306    /// drops an entry would let `[[ a -ef b ]]` silently mis-dispatch.
9307    #[test]
9308    fn get_cond_num_round_trips_for_every_table_entry() {
9309        let _g = crate::test_util::global_state_lock();
9310        for (i, op) in ["nt", "ot", "ef", "eq", "ne", "lt", "gt", "le", "ge"]
9311            .iter()
9312            .enumerate()
9313        {
9314            assert_eq!(get_cond_num(op) as usize, i, "{op} must map to index {i}");
9315        }
9316    }
9317
9318    /// c:2643 — `get_cond_num` is byte-exact: a partial-prefix string
9319    /// must NOT match. `e` (one char) is not `eq`. Catches a
9320    /// regression using `starts_with` instead of equality.
9321    #[test]
9322    fn get_cond_num_partial_prefix_does_not_match() {
9323        let _g = crate::test_util::global_state_lock();
9324        assert_eq!(get_cond_num("e"), -1);
9325        assert_eq!(get_cond_num("eq2"), -1);
9326        assert_eq!(get_cond_num("n"), -1);
9327    }
9328
9329    /// c:2628 — `par_cond_double` checks `IS_DASH(ac[0])` so any
9330    /// non-dash first char fails. The lexed Dash sentinel `\u{9b}`
9331    /// MUST be accepted alongside ASCII `-` (the lexer emits it
9332    /// inside `[[ ... ]]`). Regression dropping the sentinel form
9333    /// would break every cond expression after lexing.
9334    #[test]
9335    fn par_cond_double_accepts_lexed_dash_sentinel() {
9336        let _g = crate::test_util::global_state_lock();
9337        // First char being the Dash sentinel + valid unary letter
9338        // must NOT trigger the "condition expected" error path.
9339        // We can't easily probe the wordcode emission here, but
9340        // the function MUST return without panic for both forms.
9341        let _ = par_cond_double("-z", "foo");
9342        let _ = par_cond_double("\u{9b}z", "foo");
9343    }
9344
9345    /// c:2643 — case sensitivity: uppercase `EQ` MUST NOT match `eq`.
9346    /// zsh's `[[ a -EQ b ]]` is documented as a parse error (only
9347    /// lowercase variants are recognised). Regression doing
9348    /// case-insensitive lookup would silently accept it.
9349    #[test]
9350    fn get_cond_num_is_case_sensitive() {
9351        let _g = crate::test_util::global_state_lock();
9352        assert_eq!(get_cond_num("EQ"), -1);
9353        assert_eq!(get_cond_num("Eq"), -1);
9354        assert_eq!(get_cond_num("eQ"), -1);
9355        // Lowercase still works.
9356        assert_eq!(get_cond_num("eq"), 3);
9357    }
9358
9359    /// `Src/parse.c:2862-2868` — `ecgetstr` inline-3-byte case packs
9360    /// up to 3 chars into bits 3-26 of the wordcode word, then C emits
9361    /// `buf[3] = '\0'; r = dupstring(buf);`. `dupstring` uses `strlen`
9362    /// so the resulting string TRUNCATES at the first NUL byte —
9363    /// short strings of 1 or 2 chars get their tail NUL-padded and
9364    /// silently dropped by strlen.
9365    ///
9366    /// The previous Rust port used `retain(|&x| x != 0)` which SPLICES
9367    /// OUT interior NULs (so `[a, 0, b]` would yield "ab" instead of
9368    /// C's "a"). Verify both endpoints work correctly:
9369    ///   * 1-char string ("a", 0, 0)        → "a"   (strlen-truncate)
9370    ///   * 2-char string ("ab", 0)          → "ab"  (strlen-truncate)
9371    ///   * 3-char string ("abc")            → "abc" (full)
9372    ///   * pathological ("a", 0, "b")       → "a"   (NOT "ab")
9373    #[test]
9374    fn ecgetstr_inline_string_truncates_at_first_nul_like_c_strlen() {
9375        let _g = crate::test_util::global_state_lock();
9376        // Build a wordcode word with `c & 2 != 0` (inline-string flag)
9377        // and the 3 bytes packed at offsets 3, 11, 19. `c & 1` is the
9378        // tokflag; clear it for this test.
9379        fn pack_inline(b0: u8, b1: u8, b2: u8) -> u32 {
9380            // c:2862 layout — bit0 = tokflag (0 here), bit1 = inline (1),
9381            // bits 3-10 = b0, bits 11-18 = b1, bits 19-26 = b2.
9382            (2u32) | ((b0 as u32) << 3) | ((b1 as u32) << 11) | ((b2 as u32) << 19)
9383        }
9384        let mk_state = |word: u32| -> estate {
9385            let p = eprog {
9386                flags: 0,
9387                len: 1,
9388                npats: 0,
9389                nref: 0,
9390                pats: Vec::new(),
9391                prog: vec![word],
9392                strs: None,
9393                shf: None,
9394                dump: None,
9395            };
9396            estate {
9397                prog: Box::new(p),
9398                pc: 0,
9399                strs: None,
9400                strs_offset: 0,
9401            }
9402        };
9403
9404        // 1-char: ('a', 0, 0) → "a"
9405        let mut st = mk_state(pack_inline(b'a', 0, 0));
9406        assert_eq!(
9407            ecgetstr(&mut st, 0, None),
9408            "a",
9409            "c:2869 strlen truncates 1-char inline at the NUL tail"
9410        );
9411
9412        // 2-char: ('a', 'b', 0) → "ab"
9413        let mut st = mk_state(pack_inline(b'a', b'b', 0));
9414        assert_eq!(
9415            ecgetstr(&mut st, 0, None),
9416            "ab",
9417            "c:2869 strlen truncates 2-char inline at the NUL tail"
9418        );
9419
9420        // 3-char: ('a', 'b', 'c') → "abc"
9421        let mut st = mk_state(pack_inline(b'a', b'b', b'c'));
9422        assert_eq!(
9423            ecgetstr(&mut st, 0, None),
9424            "abc",
9425            "c:2869 full 3-byte inline preserved"
9426        );
9427
9428        // Pathological: ('a', 0, 'b') → "a" (NOT "ab" from retain-splice)
9429        let mut st = mk_state(pack_inline(b'a', 0, b'b'));
9430        assert_eq!(
9431            ecgetstr(&mut st, 0, None),
9432            "a",
9433            "c:2869 strlen STOPS at first NUL; must not splice 'b' through"
9434        );
9435    }
9436
9437    /// Pin: `init_parse_status` resets ALL six lexer-parser flags
9438    /// per `Src/parse.c:500-502`. Specifically `inrepeat_ = 0` at
9439    /// c:501 was previously missing in the Rust port. Pin every
9440    /// reset so a future regression that drops one is caught.
9441    #[test]
9442    fn init_parse_status_resets_all_lexer_parser_flags() {
9443        let _g = crate::test_util::global_state_lock();
9444        // Dirty every flag to a non-default value.
9445        set_incasepat(5);
9446        set_incond(7);
9447        set_inredir(true);
9448        set_infor(3);
9449        set_intypeset(true);
9450        set_inrepeat(2);
9451        set_incmdpos(false);
9452        // Reset.
9453        init_parse_status();
9454        // c:500-502 — every flag back to its default.
9455        assert_eq!(incasepat(), 0, "c:500 — incasepat = 0");
9456        assert_eq!(incond(), 0, "c:500 — incond = 0");
9457        assert!(!inredir(), "c:500 — inredir = 0");
9458        assert_eq!(infor(), 0, "c:500 — infor = 0");
9459        assert!(!intypeset(), "c:500 — intypeset = 0");
9460        assert_eq!(
9461            inrepeat(),
9462            0,
9463            "c:501 — inrepeat_ = 0 (was previously missing)"
9464        );
9465        assert!(incmdpos(), "c:502 — incmdpos = 1");
9466    }
9467
9468    // ═══════════════════════════════════════════════════════════════════
9469    // AST shape tests — feed source through parse(), walk the resulting
9470    // ZshProgram, assert structural properties. Each test uses the local
9471    // `parse(input)` helper that errors cleanly on parse failure.
9472    // Anchor: where applicable, behavior matches `zsh -n -c '...'`
9473    // (parse-only, no execution — which would error on syntax issues).
9474    // ═══════════════════════════════════════════════════════════════════
9475
9476    /// Empty input → ZshProgram with no lists.
9477    #[test]
9478    fn parse_empty_source_yields_zero_lists() {
9479        let _g = crate::test_util::global_state_lock();
9480        let prog = parse("").unwrap();
9481        assert_eq!(prog.lists.len(), 0);
9482    }
9483
9484    /// Comment-only input → no lists (comments are skipped at lex level).
9485    #[test]
9486    fn parse_only_comment_yields_zero_lists() {
9487        let _g = crate::test_util::global_state_lock();
9488        let prog = parse("# this is just a comment").unwrap();
9489        assert_eq!(prog.lists.len(), 0, "comments alone produce no cmds");
9490    }
9491
9492    /// Three commands separated by `;` → three lists.
9493    #[test]
9494    fn parse_three_semicolon_separated_commands_yield_three_lists() {
9495        let _g = crate::test_util::global_state_lock();
9496        let prog = parse("a; b; c").unwrap();
9497        assert_eq!(prog.lists.len(), 3);
9498    }
9499
9500    /// Background command — async flag set on the list.
9501    #[test]
9502    fn parse_background_command_sets_async_flag() {
9503        let _g = crate::test_util::global_state_lock();
9504        let prog = parse("sleep 1 &").unwrap();
9505        assert_eq!(prog.lists.len(), 1);
9506        assert!(
9507            prog.lists[0].flags.async_,
9508            "trailing `&` must set async_ flag"
9509        );
9510    }
9511
9512    /// Pipe count: `a | b | c | d` → 4 stages.
9513    #[test]
9514    fn parse_four_stage_pipeline_has_three_next_links() {
9515        let _g = crate::test_util::global_state_lock();
9516        let prog = parse("a | b | c | d").unwrap();
9517        let mut pipe = &prog.lists[0].sublist.pipe;
9518        let mut count = 1;
9519        while let Some(next) = &pipe.next {
9520            pipe = next;
9521            count += 1;
9522        }
9523        assert_eq!(count, 4, "4 commands should produce 4 pipe stages");
9524    }
9525
9526    /// `|&` between pipeline stages sets merge_stderr.
9527    #[test]
9528    fn parse_pipe_amp_sets_merge_stderr() {
9529        let _g = crate::test_util::global_state_lock();
9530        let prog = parse("a |& b").unwrap();
9531        let pipe = &prog.lists[0].sublist.pipe;
9532        assert!(pipe.next.is_some());
9533        assert!(pipe.merge_stderr, "|& must set merge_stderr");
9534    }
9535
9536    /// `cmd1 || cmd2`: sublist.next is Some with `Or`.
9537    #[test]
9538    fn parse_or_operator_sets_sublist_op_or() {
9539        let _g = crate::test_util::global_state_lock();
9540        let prog = parse("cmd1 || cmd2").unwrap();
9541        let sublist = &prog.lists[0].sublist;
9542        let (op, _) = sublist.next.as_ref().expect("must have next");
9543        assert_eq!(*op, SublistOp::Or);
9544    }
9545
9546    /// `! cmd` sets the not flag on the sublist.
9547    #[test]
9548    fn parse_bang_negation_sets_sublist_not_flag() {
9549        let _g = crate::test_util::global_state_lock();
9550        let prog = parse("! false").unwrap();
9551        let sublist = &prog.lists[0].sublist;
9552        assert!(sublist.flags.not, "`!` prefix must set sublist.flags.not");
9553    }
9554
9555    // ── Compound commands ────────────────────────────────────────────
9556    /// `while cond; do body; done` → ZshCommand::While.
9557    #[test]
9558    fn parse_while_loop_yields_while_command() {
9559        let _g = crate::test_util::global_state_lock();
9560        let prog = parse("while true; do echo x; done").unwrap();
9561        assert!(matches!(
9562            prog.lists[0].sublist.pipe.cmd,
9563            ZshCommand::While(_)
9564        ));
9565    }
9566
9567    /// `until cond; do body; done` → ZshCommand::Until.
9568    /// Anchor: `zsh -n -c 'until false; do echo; done'` accepts and parses
9569    /// as an until-loop. zshrs accepts but emits a DIFFERENT AST variant
9570    /// (not Until). Bug — until loop is mis-classified.
9571    #[test]
9572    fn parse_until_loop_yields_until_command_anchored_to_zsh() {
9573        let _g = crate::test_util::global_state_lock();
9574        let prog = parse("until false; do echo x; done").unwrap();
9575        assert!(
9576            matches!(prog.lists[0].sublist.pipe.cmd, ZshCommand::Until(_)),
9577            "zsh parses `until` as Until variant; zshrs uses different variant: {:?}",
9578            prog.lists[0].sublist.pipe.cmd
9579        );
9580    }
9581
9582    /// `(cmd)` → Subsh variant.
9583    #[test]
9584    fn parse_parens_yield_subsh_command() {
9585        let _g = crate::test_util::global_state_lock();
9586        let prog = parse("(echo hi)").unwrap();
9587        assert!(matches!(
9588            prog.lists[0].sublist.pipe.cmd,
9589            ZshCommand::Subsh(_)
9590        ));
9591    }
9592
9593    /// `{ cmd; }` → Cursh (current-shell) command.
9594    #[test]
9595    fn parse_braces_yield_cursh_command() {
9596        let _g = crate::test_util::global_state_lock();
9597        let prog = parse("{ echo hi; }").unwrap();
9598        assert!(matches!(
9599            prog.lists[0].sublist.pipe.cmd,
9600            ZshCommand::Cursh(_)
9601        ));
9602    }
9603
9604    /// `[[ a == b ]]` → ZshCommand::Cond.
9605    #[test]
9606    fn parse_double_brackets_yield_cond_command() {
9607        let _g = crate::test_util::global_state_lock();
9608        let prog = parse("[[ a == b ]]").unwrap();
9609        assert!(matches!(
9610            prog.lists[0].sublist.pipe.cmd,
9611            ZshCommand::Cond(_)
9612        ));
9613    }
9614
9615    /// `(( 1 + 2 ))` → ZshCommand::Arith.
9616    #[test]
9617    fn parse_double_parens_yield_arith_command() {
9618        let _g = crate::test_util::global_state_lock();
9619        let prog = parse("(( 1 + 2 ))").unwrap();
9620        assert!(matches!(
9621            prog.lists[0].sublist.pipe.cmd,
9622            ZshCommand::Arith(_)
9623        ));
9624    }
9625
9626    /// `repeat 3 do echo x; done` → ZshCommand::Repeat.
9627    #[test]
9628    fn parse_repeat_loop_yields_repeat_command() {
9629        let _g = crate::test_util::global_state_lock();
9630        let prog = parse("repeat 3 do echo x; done").unwrap();
9631        assert!(matches!(
9632            prog.lists[0].sublist.pipe.cmd,
9633            ZshCommand::Repeat(_)
9634        ));
9635    }
9636
9637    // ── Function definitions ─────────────────────────────────────────
9638    /// `name() { body; }` → FuncDef variant.
9639    #[test]
9640    fn parse_paren_funcdef_yields_funcdef_command() {
9641        let _g = crate::test_util::global_state_lock();
9642        let prog = parse("greet() { echo hi; }").unwrap();
9643        assert!(matches!(
9644            prog.lists[0].sublist.pipe.cmd,
9645            ZshCommand::FuncDef(_)
9646        ));
9647    }
9648
9649    /// `function name { body; }` → FuncDef variant (zsh keyword form).
9650    #[test]
9651    fn parse_function_keyword_funcdef_yields_funcdef_command() {
9652        let _g = crate::test_util::global_state_lock();
9653        let prog = parse("function greet { echo hi; }").unwrap();
9654        assert!(matches!(
9655            prog.lists[0].sublist.pipe.cmd,
9656            ZshCommand::FuncDef(_)
9657        ));
9658    }
9659
9660    /// Syntax error — `if` without `fi` → parse returns Err.
9661    /// Anchor: `echo 'if true; then echo' | zsh -n` → "parse error".
9662    #[test]
9663    fn parse_unterminated_if_returns_error_anchored_to_zsh() {
9664        let _g = crate::test_util::global_state_lock();
9665        let r = parse("if true; then echo yes");
9666        assert!(r.is_err(), "zsh -n: parse error near `\\n`");
9667    }
9668
9669    /// Syntax error — bare `done` without `for/while/until` → error.
9670    /// Anchor: `echo done | zsh -n` → "parse error near `done`".
9671    #[test]
9672    fn parse_orphan_done_returns_error_anchored_to_zsh() {
9673        let _g = crate::test_util::global_state_lock();
9674        let r = parse("done");
9675        assert!(r.is_err(), "zsh -n: parse error near `done`");
9676    }
9677
9678    /// Simple command's words are metafied at the AST layer (matches
9679    /// zsh's internal representation: `-` lexes to `Dash` = 0x9b, `*`
9680    /// to `Star`, etc.). zsh untokenizes via `untokenize()` BEFORE
9681    /// surfacing words at execution time (Src/exec.c:execcmd_args).
9682    /// This test pins the round-trip: `untokenize(word)` recovers the
9683    /// user-visible form. If parse-time unmetafy ever lands the
9684    /// untokenize call becomes a no-op; the test stays green either
9685    /// way. Companion test below pins the metafied internal form.
9686    #[test]
9687    fn parse_simple_command_words_unmetafied_like_zsh_anchored() {
9688        let _g = crate::test_util::global_state_lock();
9689        let prog = parse("ls -la /tmp").unwrap();
9690        match &prog.lists[0].sublist.pipe.cmd {
9691            ZshCommand::Simple(s) => {
9692                let untok: Vec<String> = s
9693                    .words
9694                    .iter()
9695                    .map(|w| crate::ported::lex::untokenize(w))
9696                    .collect();
9697                assert_eq!(
9698                    untok,
9699                    vec!["ls", "-la", "/tmp"],
9700                    "untokenize(word) must yield the user-visible form"
9701                );
9702            }
9703            other => panic!("expected Simple, got {other:?}"),
9704        }
9705    }
9706
9707    /// Pin the OBSERVED zshrs contract: simple-command word array
9708    /// contains metafied bytes. This is the active (passing) version
9709    /// of the anchor above — it documents zshrs's current internal
9710    /// representation. If zshrs starts unmetafying at parse time, this
9711    /// test will FAIL and the anchor-style test above will start passing.
9712    #[test]
9713    fn parse_simple_command_words_metafied_internal_form() {
9714        let _g = crate::test_util::global_state_lock();
9715        let prog = parse("ls -la /tmp").unwrap();
9716        match &prog.lists[0].sublist.pipe.cmd {
9717            ZshCommand::Simple(s) => {
9718                assert_eq!(s.words.len(), 3);
9719                assert_eq!(s.words[0], "ls");
9720                assert_eq!(s.words[2], "/tmp");
9721                // s.words[1] contains the metafied `-` (`\u{9b}` Dash byte)
9722                // followed by "la". Don't pin the exact byte form (it
9723                // may change); pin that the length is right.
9724                assert_eq!(s.words[1].chars().count(), 3, "`-la` is 3 chars");
9725                assert!(s.words[1].ends_with("la"));
9726            }
9727            other => panic!("expected Simple, got {other:?}"),
9728        }
9729    }
9730
9731    // ─── zsh-corpus pins for parser: structural shapes ────────────────
9732
9733    /// Empty input — parse succeeds, lists may be empty.
9734    #[test]
9735    fn parse_corpus_empty_input_no_error() {
9736        let _g = crate::test_util::global_state_lock();
9737        let prog = parse("").unwrap();
9738        assert!(
9739            prog.lists.is_empty() || prog.lists.len() <= 1,
9740            "empty input → 0 or 1 list, got {}",
9741            prog.lists.len()
9742        );
9743    }
9744
9745    /// Comment-only input parses as empty.
9746    #[test]
9747    fn parse_corpus_comment_only_no_error() {
9748        let _g = crate::test_util::global_state_lock();
9749        let r = parse("# just a comment");
9750        assert!(r.is_ok(), "comment-only parse should succeed");
9751    }
9752
9753    /// `cmd1; cmd2` — two top-level lists or two sublists.
9754    #[test]
9755    fn parse_corpus_semicolon_separates_commands() {
9756        let _g = crate::test_util::global_state_lock();
9757        let prog = parse("echo a; echo b").unwrap();
9758        // We pin: parse produces > 0 lists/sublists; details vary.
9759        assert!(!prog.lists.is_empty(), "non-empty parse");
9760    }
9761
9762    /// `a && b` — DAMPER joins into a sublist chain.
9763    #[test]
9764    fn parse_corpus_logical_and_parses() {
9765        let _g = crate::test_util::global_state_lock();
9766        let r = parse("true && false");
9767        assert!(r.is_ok(), "`a && b` parses cleanly");
9768    }
9769
9770    /// `a || b` — DBAR.
9771    #[test]
9772    fn parse_corpus_logical_or_parses() {
9773        let _g = crate::test_util::global_state_lock();
9774        let r = parse("false || true");
9775        assert!(r.is_ok(), "`a || b` parses cleanly");
9776    }
9777
9778    /// `a | b` pipeline.
9779    #[test]
9780    fn parse_corpus_pipeline_parses() {
9781        let _g = crate::test_util::global_state_lock();
9782        let r = parse("echo hi | cat");
9783        assert!(r.is_ok(), "`a | b` parses");
9784    }
9785
9786    /// `if true; then echo x; fi` — basic if-then-fi block.
9787    #[test]
9788    fn parse_corpus_if_then_fi_parses() {
9789        let _g = crate::test_util::global_state_lock();
9790        let r = parse("if true; then echo x; fi");
9791        assert!(r.is_ok(), "if/then/fi parses cleanly");
9792    }
9793
9794    /// `for i in 1 2 3; do echo $i; done`.
9795    #[test]
9796    fn parse_corpus_for_do_done_parses() {
9797        let _g = crate::test_util::global_state_lock();
9798        let r = parse("for i in 1 2 3; do echo $i; done");
9799        assert!(r.is_ok(), "for/do/done parses cleanly");
9800    }
9801
9802    /// `while true; do break; done`.
9803    #[test]
9804    fn parse_corpus_while_do_done_parses() {
9805        let _g = crate::test_util::global_state_lock();
9806        let r = parse("while true; do break; done");
9807        assert!(r.is_ok(), "while/do/done parses cleanly");
9808    }
9809
9810    /// `case x in (a) echo A;; esac` — case statement.
9811    #[test]
9812    fn parse_corpus_case_esac_parses() {
9813        let _g = crate::test_util::global_state_lock();
9814        let r = parse("case x in (a) echo A;; esac");
9815        assert!(r.is_ok(), "case/esac parses cleanly");
9816    }
9817
9818    /// Function definition `f() { echo x }`.
9819    #[test]
9820    fn parse_corpus_function_def_parses() {
9821        let _g = crate::test_util::global_state_lock();
9822        let r = parse("f() { echo x }");
9823        assert!(r.is_ok(), "f() {{ ... }} parses cleanly");
9824    }
9825
9826    /// `(subshell echo a)` — subshell.
9827    #[test]
9828    fn parse_corpus_subshell_parens_parses() {
9829        let _g = crate::test_util::global_state_lock();
9830        let r = parse("( echo a )");
9831        assert!(r.is_ok(), "subshell parses cleanly");
9832    }
9833
9834    // ═══════════════════════════════════════════════════════════════════
9835    // C-parity tests pinning Src/parse.c. Tests that capture KNOWN
9836    // ZSHRS BUGS use #[ignore = "ZSHRS BUG: …"].
9837    // ═══════════════════════════════════════════════════════════════════
9838
9839    /// `empty_eprog(p)` returns true on an eprog with empty `prog`.
9840    /// C `Src/parse.c:584`:
9841    ///   `return (!p || !p->prog || *p->prog == WCB_END());`
9842    /// Rust port at parse.rs:685 — `p.prog.is_empty() || p.prog[0] == WCB_END()`.
9843    #[test]
9844    fn empty_eprog_empty_prog_returns_true() {
9845        let _g = crate::test_util::global_state_lock();
9846        let p = crate::ported::zsh_h::eprog::default();
9847        assert!(empty_eprog(&p), "empty prog vec → empty_eprog true");
9848    }
9849
9850    /// `empty_eprog(p)` returns true when first wordcode is WCB_END.
9851    /// C: `*p->prog == WCB_END()`.
9852    #[test]
9853    fn empty_eprog_first_wcb_end_returns_true() {
9854        let _g = crate::test_util::global_state_lock();
9855        let mut p = crate::ported::zsh_h::eprog::default();
9856        p.prog.push(WCB_END());
9857        assert!(empty_eprog(&p), "prog[0]==WCB_END → empty_eprog true");
9858    }
9859
9860    /// `empty_eprog(p)` returns false for non-empty non-END prog.
9861    #[test]
9862    fn empty_eprog_non_empty_non_end_returns_false() {
9863        let _g = crate::test_util::global_state_lock();
9864        let mut p = crate::ported::zsh_h::eprog::default();
9865        // Push some non-END wordcode (1 is arbitrary non-zero, not WCB_END).
9866        p.prog.push(1);
9867        assert!(!empty_eprog(&p), "non-END first opcode → false");
9868    }
9869
9870    /// `ecstrcode("")` returns a wordcode for the empty string. C
9871    /// `Src/parse.c:346-ish` ecstrcode interns strings in `ecbuf`.
9872    /// Pin: same call returns same wordcode (deterministic intern).
9873    #[test]
9874    fn ecstrcode_empty_string_returns_deterministic_code() {
9875        let _g = crate::test_util::global_state_lock();
9876        init_parse();
9877        let a = ecstrcode("");
9878        let b = ecstrcode("");
9879        assert_eq!(a, b, "intern of '' must be deterministic");
9880    }
9881
9882    /// `ecstrcode` of two different strings returns different codes.
9883    #[test]
9884    fn ecstrcode_distinct_strings_get_distinct_codes() {
9885        let _g = crate::test_util::global_state_lock();
9886        init_parse();
9887        let a = ecstrcode("foo");
9888        let b = ecstrcode("bar");
9889        // Should differ — if equal, intern table collapsed two different
9890        // strings to the same key (bug).
9891        assert_ne!(a, b, "different strings must intern to different codes");
9892    }
9893
9894    /// `parse_event(ENDINPUT)` on empty input returns None.
9895    /// C `Src/parse.c:715-ish` — empty token stream → no program.
9896    #[test]
9897    #[ignore = "ZSHRS BUG: parse_event setup needs lex state — exact behavior on empty input verification pending"]
9898    fn parse_event_empty_returns_none() {
9899        let _g = crate::test_util::global_state_lock();
9900        init_parse();
9901        // Empty input typically yields no program; needs lex state.
9902        let r = parse_event(crate::ported::lex::ENDINPUT);
9903        assert!(r.is_none(), "no tokens → no event");
9904    }
9905
9906    // ═══════════════════════════════════════════════════════════════════
9907    // Additional C-parity tests for Src/parse.c.
9908    // ═══════════════════════════════════════════════════════════════════
9909
9910    /// c:399 — `ecadd(c)` returns the index where `c` was placed,
9911    /// not the post-increment value. Sequential ecadd calls return
9912    /// strictly increasing indices.
9913    #[test]
9914    fn ecadd_returns_strictly_increasing_indices() {
9915        let _g = crate::test_util::global_state_lock();
9916        init_parse();
9917        let i0 = ecadd(0xDEAD);
9918        let i1 = ecadd(0xBEEF);
9919        let i2 = ecadd(0xC0DE);
9920        assert!(
9921            i1 > i0,
9922            "ecadd indices must strictly increase, got {i0} then {i1}"
9923        );
9924        assert!(
9925            i2 > i1,
9926            "ecadd indices must strictly increase, got {i1} then {i2}"
9927        );
9928        assert_eq!(i1, i0 + 1, "consecutive ecadds advance by 1");
9929        assert_eq!(i2, i1 + 1, "consecutive ecadds advance by 1");
9930    }
9931
9932    /// c:413 — `ecdel(p)` removes one wordcode, shrinks ecused by 1.
9933    /// Pin: subsequent ecadd reuses freed slot (ecused decreased).
9934    #[test]
9935    fn ecdel_shrinks_ecused_by_one() {
9936        let _g = crate::test_util::global_state_lock();
9937        init_parse();
9938        let _i0 = ecadd(0xA);
9939        let i1 = ecadd(0xB);
9940        let _i2 = ecadd(0xC);
9941        let next_before = ECUSED.get();
9942        ecdel(i1);
9943        let next_after = ECUSED.get();
9944        assert_eq!(
9945            next_after,
9946            next_before - 1,
9947            "ecdel must decrement ecused by exactly 1"
9948        );
9949    }
9950
9951    /// c:399-405 — `ecadd` after exhausting buffer must grow it (no
9952    /// panic on push past current eclen). Pin: 1000 adds don't crash.
9953    #[test]
9954    fn ecadd_grows_buffer_on_demand() {
9955        let _g = crate::test_util::global_state_lock();
9956        init_parse();
9957        for i in 0..1000 {
9958            ecadd(i as u32);
9959        }
9960        // No panic = grow path works.
9961        assert!(ECUSED.get() >= 1000, "1000 adds → ecused ≥ 1000");
9962    }
9963
9964    /// c:426 — `ecstrcode` of short strings (≤4 bytes) returns a
9965    /// packed inline wordcode (not an offset into the string region).
9966    /// Pin: identical short strings get identical codes.
9967    #[test]
9968    fn ecstrcode_short_strings_are_deterministic() {
9969        let _g = crate::test_util::global_state_lock();
9970        init_parse();
9971        let a = ecstrcode("ab");
9972        let b = ecstrcode("ab");
9973        assert_eq!(a, b, "same short string must intern to same code");
9974    }
9975
9976    /// c:426 — long strings (>4 bytes) hit the deduped string region.
9977    /// Pin: same long string returns same code on repeat (registry
9978    /// dedupes).
9979    #[test]
9980    fn ecstrcode_long_strings_dedupe_in_registry() {
9981        let _g = crate::test_util::global_state_lock();
9982        init_parse();
9983        let a = ecstrcode("a-much-longer-test-string");
9984        let b = ecstrcode("a-much-longer-test-string");
9985        assert_eq!(a, b, "registry must dedupe identical long strings");
9986    }
9987
9988    /// `clear_hdocs()` is idempotent — calling twice in a row leaves
9989    /// HDOCS = None and LEX_HEREDOCS empty.
9990    #[test]
9991    fn clear_hdocs_is_idempotent() {
9992        let _g = crate::test_util::global_state_lock();
9993        clear_hdocs();
9994        clear_hdocs();
9995        HDOCS.with_borrow(|h| assert!(h.is_none(), "HDOCS must be None"));
9996        LEX_HEREDOCS.with_borrow(|v| assert!(v.is_empty(), "LEX_HEREDOCS must be empty"));
9997    }
9998
9999    /// `init_parse()` resets parse state to known empty defaults.
10000    /// Multiple init_parse calls are safe (idempotent).
10001    #[test]
10002    fn init_parse_is_idempotent() {
10003        let _g = crate::test_util::global_state_lock();
10004        init_parse();
10005        init_parse();
10006        // No panic = pass.
10007    }
10008
10009    /// `empty_eprog` returns true for a default-constructed eprog
10010    /// (empty prog vec).
10011    #[test]
10012    fn empty_eprog_true_for_empty_prog() {
10013        let _g = crate::test_util::global_state_lock();
10014        let p = eprog {
10015            prog: Vec::new(),
10016            ..Default::default()
10017        };
10018        assert!(empty_eprog(&p), "empty prog vec → empty eprog");
10019    }
10020
10021    /// `empty_eprog` returns true when prog[0] == WCB_END().
10022    #[test]
10023    fn empty_eprog_true_for_end_only_prog() {
10024        let _g = crate::test_util::global_state_lock();
10025        let p = eprog {
10026            prog: vec![WCB_END()],
10027            ..Default::default()
10028        };
10029        assert!(empty_eprog(&p), "WCB_END as first opcode → empty");
10030    }
10031
10032    /// `ecadjusthere(p, d)` is safe to call when HDOCS is None.
10033    #[test]
10034    fn ecadjusthere_safe_when_hdocs_none() {
10035        let _g = crate::test_util::global_state_lock();
10036        clear_hdocs();
10037        // No panic = pass.
10038        ecadjusthere(0, 0);
10039        ecadjusthere(100, -5);
10040        ecadjusthere(0, 10);
10041    }
10042
10043    /// `ecispace(p, n)` with n=0 is a no-op.
10044    #[test]
10045    fn ecispace_zero_n_is_noop() {
10046        let _g = crate::test_util::global_state_lock();
10047        init_parse();
10048        let before = ECUSED.get();
10049        ecispace(0, 0);
10050        let after = ECUSED.get();
10051        assert_eq!(before, after, "ecispace(_, 0) must not advance ecused");
10052    }
10053
10054    // ═══════════════════════════════════════════════════════════════════
10055    // Additional C-parity tests for Src/parse.c
10056    // c:146 parse_context_save / c:191 parse_context_restore /
10057    // c:225 ecadjusthere / c:293 ecadd / c:346 ecstrcode / c:574 init_parse /
10058    // c:685 empty_eprog / c:693 clear_hdocs / c:786 parse_list / c:815 parse_cond
10059    // c:2234 par_wordlist / c:2249 par_nl_wordlist
10060    // ═══════════════════════════════════════════════════════════════════
10061
10062    /// c:293 — `ecadd` returns usize (compile-time type pin).
10063    #[test]
10064    fn ecadd_returns_usize_type() {
10065        let _g = crate::test_util::global_state_lock();
10066        init_parse();
10067        let _: usize = ecadd(0);
10068    }
10069
10070    /// c:346 — `ecstrcode` returns u32 (compile-time type pin).
10071    #[test]
10072    fn ecstrcode_returns_u32_type() {
10073        let _g = crate::test_util::global_state_lock();
10074        init_parse();
10075        let _: u32 = ecstrcode("");
10076    }
10077
10078    /// c:346 — `ecstrcode("")` empty string is safe.
10079    #[test]
10080    fn ecstrcode_empty_string_no_panic() {
10081        let _g = crate::test_util::global_state_lock();
10082        init_parse();
10083        let _ = ecstrcode("");
10084    }
10085
10086    /// c:346 — `ecstrcode` is deterministic for same input.
10087    #[test]
10088    fn ecstrcode_is_deterministic() {
10089        let _g = crate::test_util::global_state_lock();
10090        init_parse();
10091        for s in ["", "a", "abc", "hello world"] {
10092            let first = ecstrcode(s);
10093            for _ in 0..3 {
10094                assert_eq!(
10095                    ecstrcode(s),
10096                    first,
10097                    "ecstrcode({:?}) must be deterministic",
10098                    s
10099                );
10100            }
10101        }
10102    }
10103
10104    /// c:786 — `parse_list` returns Option<eprog>.
10105    #[test]
10106    fn parse_list_returns_option_eprog_type() {
10107        let _g = crate::test_util::global_state_lock();
10108        init_parse();
10109        let _: Option<eprog> = parse_list();
10110    }
10111
10112    /// c:815 — `parse_cond` returns Option<eprog>.
10113    #[test]
10114    fn parse_cond_returns_option_eprog_type() {
10115        let _g = crate::test_util::global_state_lock();
10116        init_parse();
10117        let _: Option<eprog> = parse_cond();
10118    }
10119
10120    /// c:2234 — `par_wordlist` returns Vec<String>.
10121    #[test]
10122    fn par_wordlist_returns_vec_string_type() {
10123        let _g = crate::test_util::global_state_lock();
10124        init_parse();
10125        let _: Vec<String> = par_wordlist();
10126    }
10127
10128    /// c:2249 — `par_nl_wordlist` returns Vec<String>.
10129    #[test]
10130    fn par_nl_wordlist_returns_vec_string_type() {
10131        let _g = crate::test_util::global_state_lock();
10132        init_parse();
10133        let _: Vec<String> = par_nl_wordlist();
10134    }
10135
10136    /// c:693 — `clear_hdocs` deterministic state after call (no-panic).
10137    #[test]
10138    fn clear_hdocs_deterministic_after_call() {
10139        let _g = crate::test_util::global_state_lock();
10140        clear_hdocs();
10141        clear_hdocs();
10142    }
10143
10144    /// c:225 — `ecadjusthere(0, 0)` is a no-op (no delta).
10145    #[test]
10146    fn ecadjusthere_zero_delta_no_panic() {
10147        let _g = crate::test_util::global_state_lock();
10148        ecadjusthere(0, 0);
10149    }
10150
10151    /// c:225 — `ecadjusthere` is safe for arbitrary positions.
10152    #[test]
10153    fn ecadjusthere_arbitrary_pos_no_panic() {
10154        let _g = crate::test_util::global_state_lock();
10155        for p in [0usize, 1, 100, 9999] {
10156            ecadjusthere(p, 0);
10157            ecadjusthere(p, 1);
10158            ecadjusthere(p, -1);
10159        }
10160    }
10161
10162    // ═══════════════════════════════════════════════════════════════════
10163    // Additional C-parity tests for Src/parse.c FD_* accessors
10164    // c:3127 fdmagic / c:3131 fdflags / c:3133 fdother / c:3140 fdversion /
10165    // c:3145 fdhflags / c:3146 fdhtail / c:3147 fdhbldflags
10166    // ═══════════════════════════════════════════════════════════════════
10167
10168    fn build_fd_header() -> Vec<u32> {
10169        let mut buf = vec![0u32; FD_PRELEN + 32];
10170        buf[0] = FD_MAGIC; // pre[0] magic
10171        buf[1] = (0x12u32) | (0x00ABCDEFu32 << 8); // flags=0x12, other=0xABCDEF
10172                                                   // Embed version string starting at pre[2].
10173        let ver = b"5.9\0";
10174        for (i, chunk) in ver.chunks(4).enumerate() {
10175            let mut word = [0u8; 4];
10176            word[..chunk.len()].copy_from_slice(chunk);
10177            buf[2 + i] = u32::from_le_bytes(word);
10178        }
10179        buf[FD_PRELEN - 1] = (FD_PRELEN as u32) + 8; // header-len slot
10180        buf
10181    }
10182
10183    /// c:3127 — `fdmagic(f)` returns pre[0] verbatim.
10184    #[test]
10185    fn fdmagic_returns_pre_zero_word() {
10186        let buf = build_fd_header();
10187        assert_eq!(fdmagic(&buf), FD_MAGIC, "fdmagic = pre[0]");
10188    }
10189
10190    /// c:3131 — `fdflags` extracts low byte of pre[1].
10191    #[test]
10192    fn fdflags_low_byte_extraction() {
10193        let buf = build_fd_header();
10194        assert_eq!(fdflags(&buf), 0x12, "flags = pre[1] & 0xff");
10195    }
10196
10197    /// c:3133 — `fdother` extracts high 24 bits of pre[1].
10198    #[test]
10199    fn fdother_high_24_bits_extraction() {
10200        let buf = build_fd_header();
10201        assert_eq!(
10202            fdother(&buf),
10203            0x00ABCDEF,
10204            "other = pre[1] >> 8 & 0x00ffffff"
10205        );
10206    }
10207
10208    /// c:3132 — `fdsetflags` writes low byte, preserves high 24 bits.
10209    #[test]
10210    fn fdsetflags_preserves_high_24_bits() {
10211        let mut buf = build_fd_header();
10212        let other_before = fdother(&buf);
10213        fdsetflags(&mut buf, 0x42);
10214        assert_eq!(fdflags(&buf), 0x42, "new flags written");
10215        assert_eq!(fdother(&buf), other_before, "high 24 bits preserved");
10216    }
10217
10218    /// c:3134 — `fdsetother` writes high 24 bits, preserves low byte.
10219    #[test]
10220    fn fdsetother_preserves_low_byte() {
10221        let mut buf = build_fd_header();
10222        let flags_before = fdflags(&buf);
10223        fdsetother(&mut buf, 0x00DEADBE);
10224        assert_eq!(fdother(&buf), 0x00DEADBE, "new other written");
10225        assert_eq!(fdflags(&buf), flags_before, "low byte preserved");
10226    }
10227
10228    /// c:3134 — `fdsetother` clamps to 24 bits (caller-passed high bits dropped).
10229    #[test]
10230    fn fdsetother_clamps_to_24_bits() {
10231        let mut buf = build_fd_header();
10232        fdsetother(&mut buf, 0xFF_FFFF_FF);
10233        // Only the low 24 bits land in `other`.
10234        assert_eq!(fdother(&buf), 0x00FF_FFFF, "high bits dropped");
10235    }
10236
10237    /// c:3140 — `fdversion(buf)` returns String (compile-time type pin).
10238    #[test]
10239    fn fdversion_returns_string_type() {
10240        let buf = build_fd_header();
10241        let _: String = fdversion(&buf);
10242    }
10243
10244    /// c:3140 — `fdversion` reads the NUL-terminated string from pre[2..].
10245    #[test]
10246    fn fdversion_reads_until_nul() {
10247        let buf = build_fd_header();
10248        assert_eq!(fdversion(&buf), "5.9", "version read until NUL");
10249    }
10250
10251    /// c:3145 — `fdhflags(h)` returns low 2 bits of flags.
10252    #[test]
10253    fn fdhflags_low_two_bits() {
10254        let h = fdhead {
10255            start: 0,
10256            len: 0,
10257            npats: 0,
10258            strs: 0,
10259            hlen: 0,
10260            flags: 0b1011, // tail=2, kshload bits = 0b11
10261        };
10262        assert_eq!(fdhflags(&h), 0b11, "flags = h.flags & 0x3");
10263    }
10264
10265    /// c:3146 — `fdhtail(h)` returns high 30 bits (shifted right by 2).
10266    #[test]
10267    fn fdhtail_shift_right_two() {
10268        let h = fdhead {
10269            start: 0,
10270            len: 0,
10271            npats: 0,
10272            strs: 0,
10273            hlen: 0,
10274            flags: (0x12_3456 << 2) | 0x3,
10275        };
10276        assert_eq!(fdhtail(&h), 0x12_3456, "tail = h.flags >> 2");
10277    }
10278
10279    /// c:3147 — `fdhbldflags(flags, tail)` packs into single u32.
10280    #[test]
10281    fn fdhbldflags_packs_flags_low_tail_high() {
10282        let packed = fdhbldflags(0x3, 0x42);
10283        assert_eq!(packed & 0x3, 0x3, "low 2 bits = flags");
10284        assert_eq!(packed >> 2, 0x42, "high 30 bits = tail");
10285    }
10286
10287    /// c:3145-3147 — `fdhflags(h)`+`fdhtail(h)` round-trip via fdhbldflags.
10288    #[test]
10289    fn fdh_round_trip_via_bldflags() {
10290        for (flags, tail) in [(0u32, 0u32), (1, 100), (2, 0xABC), (3, 0xFFFF)] {
10291            let packed = fdhbldflags(flags, tail);
10292            let h = fdhead {
10293                start: 0,
10294                len: 0,
10295                npats: 0,
10296                strs: 0,
10297                hlen: 0,
10298                flags: packed,
10299            };
10300            assert_eq!(fdhflags(&h), flags, "flags round-trips");
10301            assert_eq!(fdhtail(&h), tail, "tail round-trips");
10302        }
10303    }
10304
10305    /// c:8271 — `firstfdhead_offset()` returns FD_PRELEN constant.
10306    #[test]
10307    fn firstfdhead_offset_returns_prelen() {
10308        assert_eq!(
10309            firstfdhead_offset(),
10310            FD_PRELEN,
10311            "first header starts after prelude"
10312        );
10313    }
10314
10315    /// c:3127 — `fdmagic` differentiates FD_MAGIC from FD_OMAGIC.
10316    #[test]
10317    fn fdmagic_differentiates_magic_omagic() {
10318        let mut buf = vec![FD_MAGIC; FD_PRELEN];
10319        assert_eq!(fdmagic(&buf), FD_MAGIC);
10320        buf[0] = FD_OMAGIC;
10321        assert_eq!(fdmagic(&buf), FD_OMAGIC, "swapped magic readable");
10322        assert_ne!(FD_MAGIC, FD_OMAGIC, "the two magics differ");
10323    }
10324}