zsh/ported/parse.rs
1//! Zsh parser — direct port from zsh/Src/parse.c.
2//!
3//! Pulls tokens via the lex.rs free ported (zshlex/tok/tokstr) and
4//! builds an AST tree (relocated to src/extensions/zsh_ast.rs as a
5//! Rust-only IR) plus emits wordcode into ECBUF via the P9b/P9c
6//! pipeline. Follows the zsh grammar closely; productions match
7//! `par_*` in Src/parse.c.
8
9use super::lex::{
10 lextok, set_tok, AMPER, AMPERBANG, AMPOUTANG, BANG_TOK, BARAMP, BAR_TOK, CASE, COPROC, DAMPER,
11 DBAR, DINANG, DINANGDASH, DINBRACK, DINPAR, DOLOOP, DONE, DOUTANG, DOUTANGAMP, DOUTANGAMPBANG,
12 DOUTANGBANG, DOUTBRACK, DOUTPAR, DSEMI, ELIF, ELSE, ENDINPUT, ENVARRAY, ENVSTRING, ESAC, FI,
13 FOR, FOREACH, FUNC, IF, INANGAMP, INANG_TOK, INBRACE_TOK, INOUTANG, INOUTPAR, INPAR_TOK,
14 IS_REDIROP, LEXERR, LEX_HEREDOCS, NEWLIN, NOCORRECT, OUTANGAMP, OUTANGAMPBANG, OUTANGBANG,
15 OUTANG_TOK, OUTBRACE_TOK, OUTPAR_TOK, REPEAT, SELECT, SEMI, SEMIAMP, SEMIBAR, SEPER,
16 STRING_LEX, THEN, TIME, TRINANG, TYPESET, UNTIL, WHILE, ZEND,
17};
18use super::zsh_h::{
19 eprog, estate, funcdump, isset, redir, unset, wc_code, wordcode, Bang, Dash, Equals, Inang,
20 Outang, Tilde, ALIASFUNCDEF, COND_AND, COND_MOD, COND_MODI, COND_NOT, COND_NT, COND_OR,
21 COND_REGEX, COND_STRDEQ, COND_STREQ, COND_STRGTR, COND_STRLT, COND_STRNEQ, CSHJUNKIELOOPS,
22 EC_DUP, EC_NODUP, EF_HEAP, EF_REAL, EXECOPT, IGNOREBRACES, IS_DASH, MULTIFUNCDEF, OPT_ISSET,
23 PM_UNDEFINED, POSIXBUILTINS, REDIRF_FROM_HEREDOC, REDIR_APP, REDIR_APPNOW, REDIR_ERRAPP,
24 REDIR_ERRAPPNOW, REDIR_ERRWRITE, REDIR_ERRWRITENOW, REDIR_FROM_HEREDOC_MASK, REDIR_HEREDOC,
25 REDIR_HEREDOCDASH, REDIR_HERESTR, REDIR_INPIPE, REDIR_MERGEIN, REDIR_MERGEOUT, REDIR_OUTPIPE,
26 REDIR_READ, REDIR_READWRITE, REDIR_VARID_MASK, REDIR_WRITE, REDIR_WRITENOW, SHORTLOOPS,
27 SHORTREPEAT, WCB_COND, WCB_SIMPLE, WC_REDIR, WC_REDIR_FROM_HEREDOC, WC_REDIR_TYPE,
28 WC_REDIR_VARID, WC_SUBLIST_COPROC, WC_SUBLIST_NOT,
29};
30pub use crate::heredoc_ast::HereDoc;
31use crate::ported::lex::{
32 incasepat, incmdpos, incond, infor, input_slice, inredir, inrepeat, intypeset, isnewlin,
33 lex_init, lineno, noaliases, nocorrect, pos, set_incasepat, set_incmdpos, set_incond,
34 set_infor, set_inredir, set_inrepeat, set_intypeset, set_isnewlin, set_lineno, set_noaliases,
35 set_nocorrect, tok, tokfd, toklineno, tokstr, zshlex,
36};
37use crate::ported::signals::unqueue_signals;
38use crate::ported::utils::{errflag, zerr, zwarnnam, ERRFLAG_ERROR};
39use crate::prompt::{cmdpop, cmdpush};
40pub use crate::zsh_ast::{
41 CaseArm, CaseTerm, CaseTerminator, CompoundCommand, ForList, HereDocInfo, ListFlags, ListOp,
42 Redirect, RedirectOp, ShellCommand, ShellWord, SimpleCommand, SublistFlags, SublistOp,
43 VarModifier, ZshAssign, ZshAssignValue, ZshCase, ZshCommand, ZshCond, ZshFor, ZshFuncDef,
44 ZshIf, ZshList, ZshParamFlag, ZshPipe, ZshProgram, ZshRedir, ZshRepeat, ZshSimple, ZshSublist,
45 ZshTry, ZshWhile,
46};
47use crate::zsh_h::{
48 wc_bdata, CS_ALWAYS, CS_ARRAY, CS_CASE, CS_CMDAND, CS_CMDOR, CS_COND, CS_CURSH, CS_ELIF,
49 CS_ELSE, CS_ERRPIPE, CS_FOR, CS_FOREACH, CS_FUNCDEF, CS_IF, CS_IFTHEN, CS_PIPE, CS_REPEAT,
50 CS_SELECT, CS_SUBSH, CS_UNTIL, CS_WHILE, EF_RUN, WCB_ARITH, WCB_ASSIGN, WCB_CASE, WCB_CURSH,
51 WCB_END, WCB_FOR, WCB_FUNCDEF, WCB_IF, WCB_LIST, WCB_PIPE, WCB_REDIR, WCB_REPEAT, WCB_SELECT,
52 WCB_SUBLIST, WCB_SUBSH, WCB_TIMED, WCB_TRY, WCB_TYPESET, WCB_WHILE, WC_ASSIGN_ARRAY,
53 WC_ASSIGN_INC, WC_ASSIGN_NEW, WC_ASSIGN_SCALAR, WC_CASE_AND, WC_CASE_HEAD, WC_CASE_OR,
54 WC_CASE_TESTAND, WC_FOR_COND, WC_FOR_LIST, WC_FOR_PPARAM, WC_IF_ELIF, WC_IF_ELSE, WC_IF_HEAD,
55 WC_IF_IF, WC_PIPE_END, WC_PIPE_LINENO, WC_PIPE_MID, WC_REDIR_WORDS, WC_SELECT_LIST,
56 WC_SELECT_PPARAM, WC_SUBLIST_AND, WC_SUBLIST_END, WC_SUBLIST_FLAGS, WC_SUBLIST_OR,
57 WC_SUBLIST_SIMPLE, WC_SUBLIST_TYPE, WC_TIMED_EMPTY, WC_TIMED_PIPE, WC_WHILE_UNTIL,
58 WC_WHILE_WHILE, Z_ASYNC, Z_DISOWN, Z_END, Z_SIMPLE, Z_SYNC,
59};
60use serde::{Deserialize, Serialize};
61use std::fs::{self, File};
62use std::io::{Read, Seek, SeekFrom, Write};
63use std::os::unix::fs::MetadataExt;
64use std::path::Path;
65use std::sync::atomic::{AtomicUsize, Ordering};
66use std::sync::mpsc;
67use std::thread;
68use std::time::Duration;
69
70// Names lifted out of inside-fn `use` statements (PORT.md
71// 'no imports inside FNs ever').
72
73// Direct port of `Src/parse.c:287-289` grow-policy constants.
74const EC_INIT_SIZE: i32 = 256;
75
76// Pending-here-document list — direct port of `Src/parse.c:84
77// struct heredocs *hdocs;`. Per-parser file-static (bucket-1 in
78// PORT_PLAN.md): each worker thread parsing a separate program needs
79// its own pending-heredoc list. Saved/restored across nested parses
80// by `parse_context_save`/`parse_context_restore` (parse.c:299/337).
81thread_local! {
82 /// Port of file-static `struct heredocs *hdocs;` from `Src/parse.c:84`.
83 pub static HDOCS: std::cell::RefCell<Option<Box<crate::ported::zsh_h::heredocs>>>
84 = const { std::cell::RefCell::new(None) };
85}
86
87// Wordcode-buffer thread-locals — direct port of `Src/parse.c:269-285`
88// file-statics. Per-evaluator (bucket-1 in PORT_PLAN.md): each worker
89// thread parsing a separate program needs its own wordcode buffer.
90//
91// ECBUF: the wordcode array being built. C `Wordcode ecbuf`
92// (parse.c:275).
93// ECLEN: allocated entries in ECBUF (parse.c:269).
94// ECUSED: entries actually used so far (parse.c:271).
95// ECNPATS: count of patterns referenced by ECBUF (parse.c:273).
96// ECSOFFS / ECSSUB: byte offsets into the string region
97// (parse.c:279). ECSSUB subtracts substring overlap.
98// ECNFUNC: count of functions defined so far (parse.c:285).
99// ECSTRS_INDEX: dedup index for long strings — C uses a binary tree
100// of `struct eccstr` (zsh.h:836); the canonical Eccstr port exists
101// at zsh_h::eccstr but stays unused at runtime here. The HashMap
102// preserves the API contract (lookup by (nfunc, str) → offs) with
103// simpler ownership semantics.
104thread_local! {
105 /// `ECBUF` static.
106 pub static ECBUF: std::cell::RefCell<Vec<u32>> = std::cell::RefCell::new(Vec::new());
107 static ECLEN: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
108 static ECUSED: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
109 static ECNPATS: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
110 static ECSOFFS: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
111 static ECSSUB: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
112 static ECNFUNC: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
113 static ECSTRS_INDEX: std::cell::RefCell<std::collections::HashMap<(i32, String), u32>>
114 = std::cell::RefCell::new(std::collections::HashMap::new());
115 /// C zsh's `eccstr` BST (parse.c:447). Port of `Eccstr ecstrs` —
116 /// a hashval-ordered binary search tree of long-strings for
117 /// dedup. Same cmp logic as C: nfunc, then hashval, then strcmp.
118 /// HashMap above is a fast-path lookup; this tree is the
119 /// C-fidelity walker that mirrors C's exact dedup-hit pattern
120 /// (including its quirks for hash-colliding content).
121 static ECSTRS_TREE: std::cell::RefCell<Option<Box<EccstrNode>>>
122 = const { std::cell::RefCell::new(None) };
123 /// Reverse index for `ecgetstr`: offs → owned string. Populated
124 /// at ecstrcode time so the consumer can recover the string from
125 /// the wordcode offs without walking the encode-time HashMap.
126 /// Stores the METAFIED BYTE form of each long-string, exactly
127 /// matching what C's strs region holds. `String` would not work
128 /// here because Rust strings carry UTF-8-encoded chars (e.g.
129 /// the Dash marker `\u{9b}` UTF-8-encodes to two bytes
130 /// `\xc2 \x9b`) while C stores zsh markers as single bytes
131 /// (raw `\x9b`). Storing Vec<u8> lets us write byte-for-byte
132 /// what C writes after metafy.
133 pub static ECSTRS_REVERSE: std::cell::RefCell<std::collections::HashMap<u32, Vec<u8>>>
134 = std::cell::RefCell::new(std::collections::HashMap::new());
135}
136const EC_DOUBLE_THRESHOLD: i32 = 32768;
137const EC_INCREMENT: i32 = 1024;
138
139/// Direct port of `parse_context_save(struct parse_stack *ps, int toplevel)` at `Src/parse.c:295`.
140/// Snapshots the lexer-side file-statics (which currently live on
141/// `lexer` until Phase 7 dissolution makes them file-scope
142/// thread_local!s) plus the pending heredoc list, plus the
143/// wordcode-buffer state (STUB until Phase 9b). Saves Rust-only
144/// recursion counters too so nested parses get fresh limits.
145/// WARNING: param names don't match C — Rust=(ps) vs C=(ps, toplevel)
146pub fn parse_context_save(ps: &mut parse_stack) {
147 // parse.c:299 — `ps->hdocs = hdocs; hdocs = NULL;` — save the
148 // canonical C linked-list and clear it for the nested parse.
149 ps.hdocs = HDOCS.with_borrow_mut(|h| h.take());
150 // zshrs-only: save the parallel AST-glue Vec the same way.
151 // LEX_HEREDOCS carries terminator/strip_tabs/quoted metadata
152 // that has no C analog (C stores it implicitly via tokstr).
153 ps.lex_heredocs = LEX_HEREDOCS.with_borrow_mut(|v| std::mem::take(v));
154 // parse.c:302-310 — save lexer-side state.
155 ps.incmdpos = incmdpos();
156 // parse.c:303 — `ps->aliasspaceflag = aliasspaceflag;`. Mirrors
157 // lex.c LEX_ALIAS_SPACE_FLAG so nested parses preserve the
158 // HISTIGNORESPACE-via-alias state across parser re-entry.
159 ps.aliasspaceflag = crate::ported::lex::LEX_ALIAS_SPACE_FLAG.with(|c| c.get());
160 ps.incond = incond();
161 ps.inredir = inredir();
162 ps.incasepat = incasepat();
163 ps.isnewlin = isnewlin();
164 ps.infor = infor();
165 ps.inrepeat_ = inrepeat();
166 ps.intypeset = intypeset();
167 // parse.c:312-317 — wordcode buffer state. STUB until Phase 9b
168 // (zshrs has no ecbuf yet).
169 ps.eclen = 0;
170 ps.ecused = 0;
171 ps.ecnpats = 0;
172 ps.ecbuf = None;
173 ps.ecstrs = None;
174 ps.ecsoffs = 0;
175 ps.ecssub = 0;
176 ps.ecnfunc = 0;
177 set_incmdpos(true);
178 set_incond(0);
179 set_inredir(false);
180 set_incasepat(0);
181 set_infor(0);
182 set_inrepeat(0);
183 set_intypeset(false);
184}
185
186/// Direct port of `parse_context_restore(const struct parse_stack *ps, int toplevel)` at `Src/parse.c:326`.
187/// Inverse of `parse_context_save`. Restores lexer-side state +
188/// pending heredocs + Rust-only counters from `ps`, then clears
189/// `errflag & ERRFLAG_ERROR` per parse.c:354.
190/// WARNING: param names don't match C — Rust=(ps) vs C=(ps, toplevel)
191pub fn parse_context_restore(ps: &parse_stack) {
192 // parse.c:330-331 — free any in-progress wordcode buffer.
193 // zshrs has no wordcode yet (STUB until Phase 9b); the AST
194 // nodes are owned by their parent so dropping the parser
195 // frees them.
196
197 // parse.c:333-352 — restore saved state.
198 // parse.c:337 — `hdocs = ps->hdocs;`
199 HDOCS.with_borrow_mut(|h| *h = ps.hdocs.clone());
200 // zshrs-only: restore the parallel AST-glue Vec.
201 LEX_HEREDOCS.with_borrow_mut(|v| *v = ps.lex_heredocs.clone());
202 set_incmdpos(ps.incmdpos);
203 // parse.c:334 — `aliasspaceflag = ps->aliasspaceflag;`.
204 crate::ported::lex::LEX_ALIAS_SPACE_FLAG.with(|c| c.set(ps.aliasspaceflag));
205 set_incond(ps.incond);
206 set_inredir(ps.inredir);
207 set_incasepat(ps.incasepat);
208 set_isnewlin(ps.isnewlin);
209 set_infor(ps.infor);
210 set_inrepeat(ps.inrepeat_);
211 set_intypeset(ps.intypeset);
212 // ecbuf/eclen/ecused/ecnpats/ecstrs/ecsoffs/ecssub/ecnfunc
213 // STUB until Phase 9b.
214
215 // parse.c:354 — `errflag &= ~ERRFLAG_ERROR;` — clear the
216 // error flag so the outer parse sees a clean state.
217 errflag.fetch_and(!ERRFLAG_ERROR, Ordering::Relaxed);
218}
219
220/// Direct port of `ecadjusthere(int p, int d)` at `Src/parse.c:360`. Walk
221/// the pending-heredocs list and bump each `pc` by `d` if it's
222/// at or after position `p`. Called by `ecispace` / `ecdel` when
223/// wordcodes shift.
224#[allow(unused_variables)]
225pub fn ecadjusthere(p: usize, d: i32) {
226 // parse.c:362-366 — `for (p2 = hdocs; p2; p2 = p2->next) if
227 // (p2->pc >= p) p2->pc += d;`. zshrs's hdocs are still
228 // Vec<HereDoc> on the lexer (pre-P9c migration); since none
229 // of them carry a wordcode pc today (the AST tree has no pc
230 // slots), this is a no-op until Phase 9c wires
231 // `hdocs.pc` into wordcode emission.
232}
233
234// === AST tree relocated to src/extensions/zsh_ast.rs ===
235//
236// zsh C does NOT have an AST tree — it emits wordcode directly via
237// par_event/par_list/par_sublist/par_pipe/par_cmd/par_simple/etc.
238// (Src/parse.c:485-3000) into a flat `Wordcode ecbuf[]`. The Zsh*/
239// Shell* AST node types lived in this file as a Rust-only IR that
240// stands in for that wordcode.
241//
242// P9e (PORT_PLAN.md): the types moved to src/extensions/zsh_ast.rs
243// to make their Rust-only-extension nature explicit. The full P9c +
244// P9d rewrite (par_* emitting wordcode + vm_helper reading wordcode)
245// retires them entirely — until then, callers reach them via this
246// re-export.
247
248/// Direct port of `ecispace(int p, int n)` at `Src/parse.c:372`. Insert `n`
249/// empty wordcode slots at position `p`, shifting later entries
250/// right, growing the buffer as needed, adjusting heredoc pointers.
251pub fn ecispace(p: usize, n: usize) {
252 // parse.c:376-381 — grow if needed.
253 let need = n as i32;
254 if (ECLEN.get() - ECUSED.get()) < need {
255 let cur = ECLEN.get();
256 let mut a = if cur < EC_DOUBLE_THRESHOLD {
257 cur
258 } else {
259 EC_INCREMENT
260 };
261 if need > a {
262 a = need;
263 }
264 ECBUF.with_borrow_mut(|buf| {
265 buf.resize((cur + a) as usize, 0);
266 });
267 ECLEN.set(cur + a);
268 }
269 // parse.c:382-385 — memmove p → p+n, gap of n.
270 let m = ECUSED.get() as usize - p;
271 if m > 0 {
272 ECBUF.with_borrow_mut(|buf| {
273 let needed = (ECUSED.get() as usize) + n;
274 if buf.len() < needed {
275 buf.resize(needed, 0);
276 }
277 for i in (0..m).rev() {
278 buf[p + n + i] = buf[p + i];
279 }
280 for i in 0..n {
281 buf[p + i] = 0;
282 }
283 });
284 }
285 // parse.c:386 — bump ecused by n.
286 ECUSED.set(ECUSED.get() + need);
287 // parse.c:387 — `ecadjusthere(p, n)`.
288 ecadjusthere(p, need);
289}
290
291/// Direct port of `ecadd(wordcode c)` at `Src/parse.c:397`. Append `c` to
292/// the wordcode buffer with grow-on-demand, return the new index.
293pub fn ecadd(c: u32) -> usize {
294 // parse.c:399-405 — `if ((eclen - ecused) < 1) grow`.
295 if (ECLEN.get() - ECUSED.get()) < 1 {
296 let cur = ECLEN.get();
297 let a = if cur < EC_DOUBLE_THRESHOLD {
298 cur
299 } else {
300 EC_INCREMENT
301 };
302 ECBUF.with_borrow_mut(|buf| {
303 buf.resize((cur + a) as usize, 0);
304 });
305 ECLEN.set(cur + a);
306 }
307 let idx = ECUSED.get();
308 ECBUF.with_borrow_mut(|buf| {
309 if (idx as usize) >= buf.len() {
310 buf.resize((idx + 1) as usize, 0);
311 }
312 buf[idx as usize] = c;
313 });
314 ECUSED.set(idx + 1);
315 idx as usize
316}
317
318/// Direct port of `ecdel(int p)` at `Src/parse.c:413`. Remove the
319/// wordcode at position `p`, shift later entries left by one,
320/// decrement ecused, adjust pending heredoc pointers.
321pub fn ecdel(p: usize) {
322 // parse.c:415-418 — memmove + decrement ecused.
323 let n = ECUSED.get() as usize - p - 1;
324 if n > 0 {
325 ECBUF.with_borrow_mut(|buf| {
326 for i in 0..n {
327 buf[p + i] = buf[p + i + 1];
328 }
329 });
330 }
331 ECUSED.set(ECUSED.get() - 1);
332 // parse.c:420 — `ecadjusthere(p, -1)`.
333 ecadjusthere(p, -1);
334}
335
336/// Direct port of `ecstrcode(char *s)` at `Src/parse.c:426`. Encode a
337/// string into a single wordcode (short strings ≤4 bytes packed
338/// inline; longer strings get an offset into the deduped registry).
339///
340/// The long-string path stores the METAFIED bytes (matches what C's
341/// strs region contains): collapse Rust UTF-8 chars in 0x80..=0xff
342/// to single bytes, then apply zsh metafy (high bytes ≥ 0x83 →
343/// `Meta=0x83 + byte^0x20`). Length tracking (ECSOFFS) uses the
344/// metafied byte count — same as C `strlen(s) + 1` where C's `s`
345/// is already metafied at this point.
346pub fn ecstrcode(s: &str) -> u32 {
347 // Convert Rust char-form → C-byte form. zsh's metafy() at
348 // Src/utils.c only converts bytes flagged IMETA: 0x00, 0x83
349 // (Meta itself), and 0x84..=0xa2 (Pound..Marker, the lex
350 // markers). Other bytes 0x01..=0x82 and 0xa3..=0xff pass
351 // through unchanged. See utils.c:4195-4204 typtab init.
352 //
353 // Rust receives chars. Classify each:
354 // - codepoint in [0x83..=0xa2] → marker char (emitted by lex
355 // post-metafy in C); 1 byte unchanged
356 // - codepoint < 0x80 → ASCII, 1 byte unchanged
357 // - codepoint in [0x80..=0x82] or [0xa3..=0xff] → single
358 // non-imeta byte (user-input range); 1 byte unchanged
359 // - codepoint > 0xff → multi-byte UTF-8 source char (e.g.
360 // '━' = U+2501 = 0xe2 0x94 0x81). Metafy ONLY the bytes
361 // that fall in 0x83..=0xa2; pass others through. For '━':
362 // 0xe2 stays, 0x94 → 0x83 0xb4, 0x81 stays.
363 let mut c_bytes: Vec<u8> = Vec::with_capacity(s.len());
364 let imeta = |b: u8| -> bool { b == 0 || (0x83..=0xa2).contains(&b) };
365 for ch in s.chars() {
366 let cu = ch as u32;
367 if cu < 0x80 {
368 // ASCII — single byte unchanged.
369 c_bytes.push(cu as u8);
370 } else if (0x83..=0xa2).contains(&cu) {
371 // Lex marker char (emitted by lex.add(Marker) post-metafy
372 // in C). Stored as single byte.
373 c_bytes.push(cu as u8);
374 } else {
375 // User-input char: encode UTF-8 then metafy imeta bytes.
376 // For chars 0x80..=0xff (like 'º' U+00BA), UTF-8 gives
377 // 2 bytes (e.g. `0xc2 0xba`) — zsh's lex reads these as
378 // raw bytes from input and metafy passes 0xc2 / 0xba
379 // through (both NOT imeta).
380 let mut tmp = [0u8; 4];
381 for &b in ch.encode_utf8(&mut tmp).as_bytes() {
382 if imeta(b) {
383 c_bytes.push(0x83);
384 c_bytes.push(b ^ 0x20);
385 } else {
386 c_bytes.push(b);
387 }
388 }
389 }
390 }
391 // c:`has_token` (Src/utils.c:2282) → `itok(*s)` → `typtab[c] & ITOK`.
392 // ITOK is set for bytes `Pound..=Nularg` (0x84..=0xa1) per
393 // Src/utils.c:4198 (`for (t0=Pound; t0<=LAST_NORMAL_TOK; t0++)
394 // typtab[t0]|=ITOK`) plus :4200 (`for (t0=Snull; t0<=Nularg; t0++)
395 // typtab[t0]|=ITOK|IMETA|INULL`). Pound=0x84 Bang=0x9c (last normal),
396 // Snull=0x9d..Nularg=0xa1. Meta=0x83 has IMETA but NOT ITOK.
397 let t = c_bytes.iter().any(|&b| (0x84..=0xa1).contains(&b));
398 let l = c_bytes.len() + 1; // include NUL terminator
399 if l <= 4 {
400 // parse.c:436-445 — short-string inline pack. Uses raw C-bytes
401 // (NOT metafied — the inline packing stores 1 byte per slot).
402 let mut c: u32 = if t { 3 } else { 2 };
403 match l {
404 4 => {
405 c |= (c_bytes[2] as u32) << 19;
406 c |= (c_bytes[1] as u32) << 11;
407 c |= (c_bytes[0] as u32) << 3;
408 }
409 3 => {
410 c |= (c_bytes[1] as u32) << 11;
411 c |= (c_bytes[0] as u32) << 3;
412 }
413 2 => {
414 c |= (c_bytes[0] as u32) << 3;
415 }
416 1 => {
417 // parse.c:443 — empty string special case.
418 c = if t { 7 } else { 6 };
419 }
420 _ => {}
421 }
422 c
423 } else {
424 // parse.c:447-466 — long string. Port of C's eccstr BST walk
425 // exactly: walk the tree comparing nfunc, then hashval, then
426 // strcmp on bytes. Return offs on full match; insert new
427 // leaf otherwise. Matches C's exact dedup-hit pattern
428 // (which is content-dependent — hash collisions and the
429 // lazy short-circuit cmp chain make the tree shape determine
430 // whether matching nodes are reachable).
431 // hasher is byte-by-byte polynomial (hashtable.c:86); pass
432 // c_bytes via from_utf8_unchecked so non-UTF-8 zsh marker
433 // bytes feed straight in. SAFETY: hasher only iterates
434 // `.bytes()` — no UTF-8 validity assumed.
435 let val =
436 crate::ported::hashtable::hasher(unsafe { std::str::from_utf8_unchecked(&c_bytes) });
437 let nfunc = ECNFUNC.get();
438 let found_offs = ECSTRS_TREE.with_borrow_mut(|root| {
439 // Walk the tree. At each node, if all 3 cmps == 0,
440 // return the node's offs. Otherwise descend left/right
441 // by the first non-zero cmp's sign.
442 let mut cur: &mut Option<Box<EccstrNode>> = root;
443 loop {
444 let p = match cur.as_mut() {
445 Some(p) => p,
446 None => break None,
447 };
448 // c:448 — `cmp = p->nfunc - ecnfunc`
449 let mut cmp = (p.nfunc as i64) - (nfunc as i64);
450 if cmp == 0 {
451 // c:448 — `&& !(cmp = (long)p->hashval - (long)val)`
452 // C does `(int)(p->hashval - val)` — unsigned 32-bit
453 // subtraction wraps, then cast to int. Use
454 // wrapping_sub + as i32 to match the bit pattern.
455 cmp = (p.hashval.wrapping_sub(val) as i32) as i64;
456 if cmp == 0 {
457 // c:448 — `&& !(cmp = strcmp(p->str, s))`
458 cmp = match p.str.as_slice().cmp(c_bytes.as_slice()) {
459 std::cmp::Ordering::Less => -1,
460 std::cmp::Ordering::Equal => 0,
461 std::cmp::Ordering::Greater => 1,
462 };
463 if cmp == 0 {
464 // c:450 — `return p->offs;`
465 break Some(p.offs);
466 }
467 }
468 }
469 // c:452 — `pp = (cmp < 0 ? &p->left : &p->right);`
470 cur = if cmp < 0 { &mut p.left } else { &mut p.right };
471 }
472 });
473 if let Some(offs) = found_offs {
474 return offs;
475 }
476 // c:462 — `p->offs = ((ecsoffs - ecssub) << 2) | (t ? 1 : 0);`
477 let offs = (((ECSOFFS.get() - ECSSUB.get()) as u32) << 2) | if t { 1 } else { 0 };
478 // c:463 — `p->aoffs = ecsoffs;` (absolute write position).
479 let aoffs = ECSOFFS.get() as u32;
480 // c:457-465 — insert new node at the NULL slot the walk
481 // terminated at. Encode the walk path as a Vec<bool> of
482 // left/right turns (true = right), then re-descend to
483 // insert. Borrow-checker friendly: a single mutable walk
484 // that either finds an existing node (descend) or fills
485 // the empty slot (return).
486 let stored = c_bytes.clone();
487 let stored_len = stored.len();
488 let new_node = Box::new(EccstrNode {
489 left: None,
490 right: None,
491 str: stored.clone(),
492 offs,
493 aoffs,
494 nfunc,
495 hashval: val,
496 });
497 ECSTRS_TREE.with_borrow_mut(|root| {
498 // Build the path first (immutable-walk; safe because we
499 // only ever go further down).
500 let mut path: Vec<bool> = Vec::new();
501 {
502 let mut cur: &Option<Box<EccstrNode>> = root;
503 while let Some(p) = cur.as_ref() {
504 let mut cmp = (p.nfunc as i64) - (nfunc as i64);
505 if cmp == 0 {
506 // C does `(int)(p->hashval - val)` — unsigned 32-bit
507 // subtraction wraps, then cast to int. Use
508 // wrapping_sub + as i32 to match the bit pattern.
509 cmp = (p.hashval.wrapping_sub(val) as i32) as i64;
510 if cmp == 0 {
511 cmp = match p.str.as_slice().cmp(c_bytes.as_slice()) {
512 std::cmp::Ordering::Less => -1,
513 std::cmp::Ordering::Equal => 0,
514 std::cmp::Ordering::Greater => 1,
515 };
516 }
517 }
518 let go_right = cmp >= 0;
519 path.push(go_right);
520 cur = if go_right { &p.right } else { &p.left };
521 }
522 }
523 // Descend mutably along the recorded path and assign at
524 // the NULL leaf.
525 let mut cur: &mut Option<Box<EccstrNode>> = root;
526 for turn in path {
527 let p = cur.as_mut().expect("path matches walk");
528 cur = if turn { &mut p.right } else { &mut p.left };
529 }
530 *cur = Some(new_node);
531 });
532 // Also keep the existing reverse index (offs → bytes) for
533 // ecgetstr_wordcode and copy_ecstr — they read flat by offs.
534 ECSTRS_REVERSE.with_borrow_mut(|m| {
535 m.insert(offs, stored);
536 });
537 let _ = l;
538 ECSOFFS.set(ECSOFFS.get() + (stored_len + 1) as i32);
539 offs
540 }
541}
542
543/// Initialize parser status. Direct port of zsh/Src/parse.c:491
544/// `init_parse_status`. Clears the per-parse-call lexer flags
545/// so a fresh parse starts from cmd-position with no nesting
546/// state inherited from a prior parse.
547///
548/// Previously the Rust port omitted `inrepeat_ = 0` at c:501.
549/// `inrepeat_` is the `repeat N <body>` parse-state counter that
550/// the lexer toggles in 3 phases (1 → 2 → 3 → 0). Without the
551/// reset, a fresh parse called after an in-flight `repeat`
552/// command would inherit the stale counter and silently misread
553/// the next token as a body of an already-completed repeat.
554pub fn init_parse_status() {
555 // c:491
556 // parse.c:500-502 — `incasepat = incond = inredir = infor =
557 // intypeset = 0; inrepeat_ = 0; incmdpos = 1;`
558 set_incasepat(0); // c:500
559 set_incond(0); // c:500
560 set_inredir(false); // c:500
561 set_infor(0); // c:500
562 set_intypeset(false); // c:500
563 set_inrepeat(0); // c:501 inrepeat_ = 0
564 set_incmdpos(true); // c:502
565}
566
567/// Initialize parser for a fresh parse. Direct port of
568/// zsh/Src/parse.c:509 `init_parse`. C source allocates a
569/// fresh wordcode buffer (ecbuf) sized EC_INIT_SIZE, resets the
570/// per-parse-call counters, and calls init_parse_status. zshrs
571/// has no flat wordcode buffer (AST is built inline) so this
572/// function reduces to init_parse_status + recursion_depth/
573/// global_iterations clear.
574pub fn init_parse() {
575 // parse.c:513-520 — `ecbuf = (Wordcode) zalloc(EC_INIT_SIZE *
576 // sizeof(wordcode)); eclen = EC_INIT_SIZE; ecused = 0;
577 // ecnpats = 0; ecstrs = NULL; ecsoffs = ecnfunc = 0;
578 // ecssub = 0;`. P9b — initialize the per-evaluator wordcode
579 // buffer for this parse call. zshrs uses thread-local
580 // statics declared at file scope (parse.rs:25-50).
581 ECBUF.with_borrow_mut(|buf| {
582 buf.clear();
583 buf.resize(EC_INIT_SIZE as usize, 0);
584 });
585 ECLEN.set(EC_INIT_SIZE);
586 ECUSED.set(0);
587 ECNPATS.set(0);
588 ECSOFFS.set(0);
589 ECSSUB.set(0);
590 ECNFUNC.set(0);
591 ECSTRS_INDEX.with_borrow_mut(|m| m.clear());
592 ECSTRS_REVERSE.with_borrow_mut(|m| m.clear());
593 ECSTRS_TREE.with_borrow_mut(|t| *t = None);
594
595 // parse.c:522 — `init_parse_status();`
596 init_parse_status();
597}
598
599/// Port of `copy_ecstr(Eccstr s, char *p)` from `Src/parse.c:537`.
600/// Walks the BST and writes each entry to `p[s->aoffs..]` matching
601/// C's recursive in-order traversal exactly. The old impl used the
602/// `ECSTRS_REVERSE` HashMap keyed by `offs` (= ecssub-relative
603/// wordcode-encoded offset), which collides across funcdef scopes:
604/// a string at relative offs=0 inside funcdef A and another at
605/// relative offs=0 inside funcdef B share the same key, so one
606/// overwrites the other.
607pub fn copy_ecstr(_table: &std::collections::HashMap<u32, Vec<u8>>, p: &mut [u8]) {
608 // c:537-544 — walk eccstr BST recursively, writing each node's
609 // str at p[node->aoffs..node->aoffs + strlen + 1] (NUL-terminated).
610 ECSTRS_TREE.with_borrow(|root| {
611 copy_ecstr_walk(root, p);
612 });
613}
614
615/// Port of `bld_eprog(int heap)` from `Src/parse.c:547`. Finalizes
616/// the in-build `ECBUF`/`ECSTRS`/`ECNPATS` state into an `Eprog`.
617/// Resets the build state so a new parse can start.
618pub fn bld_eprog(heap: bool) -> eprog {
619 // c:547
620
621 // c:555 — emit WC_END opcode. `WCB_END` is `WC_END_DEFAULT` (0).
622 ecadd(0);
623
624 let ecused = ECUSED.with(|c| c.get()) as usize;
625 let ecnpats = ECNPATS.with(|c| c.get()) as usize;
626 let ecsoffs = ECSOFFS.with(|c| c.get()) as usize;
627
628 // c:557-559 — `ret->len = ((ecnpats * sizeof(Patprog)) +
629 // (ecused * sizeof(wordcode)) +
630 // ecsoffs);`
631 // sizeof(Patprog) = sizeof(struct patprog *) = pointer size.
632 // On 64-bit targets that's 8, on 32-bit that's 4. C's eprog
633 // ->len is the canonical value for parity tests, so we use
634 // the same arithmetic.
635 let prog_bytes = ecused * 4; // sizeof(wordcode) = 4
636 let len = (ecnpats * size_of::<*const u8>()) + prog_bytes + ecsoffs;
637
638 // Snapshot the wordcode buffer + string table.
639 let prog_words: Vec<u32> = ECBUF.with(|c| c.borrow()[..ecused].to_vec());
640 let mut strs_bytes = vec![0u8; ecsoffs];
641 ECSTRS_REVERSE.with(|c| copy_ecstr(&c.borrow(), &mut strs_bytes));
642
643 // c:566 — store strs as raw bytes via from_utf8_unchecked so
644 // single-byte zsh markers (e.g. Dash 0x9b) survive intact.
645 // `String::from_utf8_lossy` would replace them with U+FFFD
646 // (`\xef\xbf\xbd`), breaking byte-for-byte parity with C's
647 // strs region. SAFETY: downstream consumers of `eprog.strs`
648 // index by byte offset (per the wordcode `(offs >> 2)` offset
649 // encoding) and call `.as_bytes()` — they never iterate as
650 // chars or rely on UTF-8 validity, so storing non-UTF-8 bytes
651 // in a String is safe in practice. C zsh's strs is `char *`
652 // with the same byte-not-char semantics.
653 let strs_string = unsafe { String::from_utf8_unchecked(strs_bytes) };
654 let ret = eprog {
655 flags: if heap { EF_HEAP } else { EF_REAL }, // c:570
656 len: len as i32, // c:559
657 npats: ecnpats as i32, // c:561
658 nref: if heap { -1 } else { 1 }, // c:562
659 pats: Vec::new(), // c:563 dummy_patprog
660 prog: prog_words, // c:565
661 strs: Some(strs_string),
662 shf: None,
663 dump: None,
664 };
665
666 // c:577 — free ecbuf so next parse starts fresh.
667 ECBUF.with(|c| c.borrow_mut().clear());
668 ECLEN.with(|c| c.set(0));
669 ECUSED.with(|c| c.set(0));
670 ECNPATS.with(|c| c.set(0));
671 ECSOFFS.with(|c| c.set(0));
672 ECSTRS_INDEX.with(|c| c.borrow_mut().clear());
673 ECSTRS_REVERSE.with(|c| c.borrow_mut().clear());
674 ECSTRS_TREE.with(|t| *t.borrow_mut() = None);
675
676 ret
677}
678
679/// Port of `int empty_eprog(Eprog p)` from `Src/parse.c:584`. C
680/// body: `return (!p || !p->prog || *p->prog == WCB_END());` —
681/// the eprog is empty when its prog buffer is missing or the
682/// first wordcode is the WC_END marker. Used by signal handlers
683/// (`Src/signals.c:712`) to short-circuit a trap that resolves to
684/// an empty program.
685pub fn empty_eprog(p: &eprog) -> bool {
686 p.prog.is_empty() || p.prog[0] == WCB_END()
687}
688
689/// Clear pending here-document list. Direct port of
690/// `clear_hdocs(void)` from `Src/parse.c:591`. The C version walks
691/// `hdocs` and frees each node; Rust drops the `Box<heredocs>`
692/// chain automatically when the head is replaced with None.
693pub fn clear_hdocs() {
694 // c:591
695 // c:593-598 — for (p = hdocs; p; p = n) { n = p->next; zfree(p); }
696 // c:599 — hdocs = NULL;
697 HDOCS.with_borrow_mut(|h| *h = None);
698 // zshrs-only: also drop the parallel AST-glue Vec. No C
699 // analog — LEX_HEREDOCS is Rust-only working-set state.
700 LEX_HEREDOCS.with_borrow_mut(|v| v.clear());
701}
702
703/// Top-level parse-event entry. Direct port of zsh/Src/parse.c:
704/// 612-631 `parse_event`. Reads one event from the lexer (a
705/// sublist optionally followed by SEPER/AMPER/AMPERBANG) and
706/// returns the resulting ZshProgram.
707///
708/// `endtok` is the token that terminates the event — usually
709/// ENDINPUT, but for command-style substitutions the closing
710/// `)` (zsh's CMD_SUBST_CLOSE).
711///
712/// zshrs port note: zsh's parse_event returns an `Eprog` (heap-
713/// allocated wordcode program). zshrs returns a `ZshProgram`
714/// (AST root). Same role at the parse-output boundary.
715pub fn parse_event(endtok: lextok) -> Option<ZshProgram> {
716 // parse.c:616-619 — reset state and prime the lexer.
717 set_tok(ENDINPUT);
718 set_incmdpos(true);
719 // parse.c:618 — `aliasspaceflag = 0;`. Fresh event: discard any
720 // alias-space carry-over from a prior parse so HISTIGNORESPACE
721 // doesn't suppress the next entered command line.
722 crate::ported::lex::LEX_ALIAS_SPACE_FLAG.with(|c| c.set(0));
723 zshlex();
724 // parse.c:620 — `init_parse();`
725 init_parse();
726
727 // parse.c:622-625 — drive par_event; on failure clear hdocs.
728 if !par_event(endtok) {
729 clear_hdocs();
730 return None;
731 }
732 // parse.c:626-628 — if endtok != ENDINPUT, this is a sub-
733 // parse for a substitution that doesn't need its own eprog.
734 // zshrs returns an empty program in that case (caller
735 // discards).
736 if endtok != ENDINPUT {
737 return Some(ZshProgram { lists: Vec::new() });
738 }
739 // parse.c:630 — `bld_eprog(1);` — build the final eprog.
740 // zshrs has already built the AST via parse_program_until,
741 // but parse_event uses par_event directly so we need to
742 // collect what par_event accumulated.
743 Some(parse_program_until(None))
744}
745
746/// Parse one event (sublist with optional separator). Direct
747/// port of zsh/Src/parse.c:635 `par_event`. Returns true if
748/// an event was successfully parsed, false on EOF / endtok.
749///
750/// zshrs port note: the C version emits wordcodes via ecadd/
751/// set_list_code; zshrs's parser builds AST nodes via
752/// par_sublist + par_list. Same flow, different output.
753pub fn par_event(endtok: lextok) -> bool {
754 // parse.c:639-643 — skip leading SEPERs.
755 while tok() == SEPER {
756 // parse.c:640-641 — at top-level (endtok == ENDINPUT),
757 // a SEPER on a fresh line ends the event.
758 if isnewlin() > 0 && endtok == ENDINPUT {
759 return false;
760 }
761 zshlex();
762 }
763 // parse.c:644-647 — terminate on EOF or matching close-token.
764 if tok() == ENDINPUT {
765 return false;
766 }
767 if tok() == endtok {
768 return true;
769 }
770 // parse.c:649-... — drive par_sublist + handle terminator.
771 // zshrs's par_sublist already builds the AST node directly.
772 match par_sublist() {
773 Some(_) => {
774 // parse.c:651-693 — terminator handling. zshrs's
775 // par_list wraps this; for parse_event we just
776 // confirm the sublist parsed.
777 true
778 }
779 None => false,
780 }
781}
782
783/// Port of `parse_list(void)` from `Src/parse.c:697`. C-shape entry
784/// point: drives `par_list` and finalizes via `bld_eprog`. Returns
785/// `None` on syntax error.
786pub fn parse_list() -> Option<eprog> {
787 // c:697
788 set_tok(ENDINPUT);
789 init_parse();
790 zshlex();
791 // c:Src/parse.c:705 — `par_list(&c);` emits wordcode for the
792 // full multi-statement list (its goto-rec loop walks all
793 // SEPER-separated sublists). The Rust AST par_list() emits
794 // NOTHING to the wordcode buffer (only builds the AST), so
795 // bld_eprog returned an empty program AND tok stayed at
796 // SEPER, tripping the syntax-error check below for any
797 // \`cmd; cmd\` body.
798 //
799 // Route through par_event_wordcode (the wordcode emitter,
800 // lines 4395+) which mirrors C's par_list loop semantics
801 // and populates the wordcode buffer that bld_eprog reads.
802 let _start = par_event_wordcode();
803 if tok() != ENDINPUT {
804 clear_hdocs();
805 set_tok(LEXERR);
806 yyerror("syntax error");
807 return None;
808 }
809 Some(bld_eprog(false))
810}
811
812/// Port of `parse_cond(void)` from `Src/parse.c:722`. Only used by
813/// `bin_test`/`bin_bracket` for `/bin/test`/`[` compat — the
814/// `condlex` global must already point at `testlex` before entry.
815pub fn parse_cond() -> Option<eprog> {
816 // c:722
817 init_parse();
818 if par_cond().is_none() {
819 clear_hdocs();
820 return None;
821 }
822 Some(bld_eprog(true))
823}
824
825// ============================================================
826// Wordcode emission helpers (parse.c private helpers)
827//
828// Direct ports of zsh's wordcode-emission helpers in parse.c.
829// These write u32 opcodes into a flat `ecbuf` array thread-local
830// via ecadd / ecdel / ecispace / ecstrcode and friends. The
831// par_*_wordcode family at parse.rs:1700-3500 walks the lex
832// stream and emits a real wordcode buffer here.
833//
834// (The AST tree built by par_program / par_simple / etc. is a
835// separate path used by fusevm; see compile_zsh.rs for the AST
836// → fusevm-bytecode compiler.)
837// ============================================================
838
839/// Patch a list-placeholder wordcode with its actual opcode +
840/// jump distance. Direct port of zsh/Src/parse.c:738
841/// `set_list_code`. zsh emits an `ecadd(0)` placeholder before
842/// par_sublist runs, then comes back through set_list_code to
843/// rewrite the slot with WCB_LIST(type, distance) once the
844/// sublist's final length is known.
845///
846/// Port of `set_list_code(int p, int type, int cmplx)` from
847/// `Src/parse.c:738`. Patches the WCB_LIST header at `p` based on
848/// whether the sublist body is simple (single command, no
849/// pipeline) and Z_SYNC/Z_END — emits the Z_SIMPLE-optimized
850/// header when possible, otherwise the plain WCB_LIST(type, 0).
851pub fn set_list_code(p: usize, type_code: i32, cmplx: bool) {
852 let _ = wc_bdata;
853 // c:740 — `if (!cmplx && (type == Z_SYNC || type == (Z_SYNC | Z_END))
854 // && WC_SUBLIST_TYPE(ecbuf[p+1]) == WC_SUBLIST_END)`
855 let sublist_code = ECBUF.with_borrow(|b| b.get(p + 1).copied().unwrap_or(0));
856 let z = type_code;
857 let qualifies = !cmplx
858 && (z == Z_SYNC || z == (Z_SYNC | Z_END))
859 && WC_SUBLIST_TYPE(sublist_code) == WC_SUBLIST_END;
860 if qualifies {
861 // c:742 — `int ispipe = !(WC_SUBLIST_FLAGS(ecbuf[p+1])
862 // & WC_SUBLIST_SIMPLE);`
863 let ispipe = (WC_SUBLIST_FLAGS(sublist_code) & WC_SUBLIST_SIMPLE) == 0;
864 // c:743 — `ecbuf[p] = WCB_LIST((type|Z_SIMPLE), ecused-2-p);`
865 let used = ECUSED.get() as usize;
866 let off = used.saturating_sub(2 + p);
867 ECBUF.with_borrow_mut(|b| {
868 if p < b.len() {
869 b[p] = WCB_LIST((z | Z_SIMPLE) as wordcode, off as wordcode);
870 }
871 });
872 // c:744 — `ecdel(p+1);`
873 ecdel(p + 1);
874 // c:745-746 — `if (ispipe) ecbuf[p+1] = WC_PIPE_LINENO(ecbuf[p+1]);`
875 if ispipe {
876 ECBUF.with_borrow_mut(|b| {
877 if p + 1 < b.len() {
878 b[p + 1] = WC_PIPE_LINENO(b[p + 1]);
879 }
880 });
881 }
882 } else {
883 // c:748 — `ecbuf[p] = WCB_LIST(type, 0);`
884 ECBUF.with_borrow_mut(|b| {
885 if p < b.len() {
886 b[p] = WCB_LIST(z as wordcode, 0);
887 }
888 });
889 }
890}
891
892/// Port of `set_sublist_code(int p, int type, int flags, int skip, int cmplx)`
893/// from `Src/parse.c:755`. Patches the WCB_SUBLIST header at `p`.
894/// When the sublist is non-complex (single command, no pipeline),
895/// sets WC_SUBLIST_SIMPLE and rewrites the following slot to
896/// `WC_PIPE_LINENO`.
897pub fn set_sublist_code(p: usize, type_code: i32, flags: i32, skip: i32, cmplx: bool) {
898 if cmplx {
899 // c:758 — `ecbuf[p] = WCB_SUBLIST(type, flags, skip);`
900 ECBUF.with_borrow_mut(|b| {
901 if p < b.len() {
902 b[p] = WCB_SUBLIST(type_code as wordcode, flags as wordcode, skip as wordcode);
903 }
904 });
905 } else {
906 // c:760 — `ecbuf[p] = WCB_SUBLIST(type, flags|WC_SUBLIST_SIMPLE, skip);`
907 ECBUF.with_borrow_mut(|b| {
908 if p < b.len() {
909 b[p] = WCB_SUBLIST(
910 type_code as wordcode,
911 (flags as wordcode) | WC_SUBLIST_SIMPLE,
912 skip as wordcode,
913 );
914 }
915 });
916 // c:761 — `ecbuf[p+1] = WC_PIPE_LINENO(ecbuf[p+1]);`
917 ECBUF.with_borrow_mut(|b| {
918 if p + 1 < b.len() {
919 b[p + 1] = WC_PIPE_LINENO(b[p + 1]);
920 }
921 });
922 }
923}
924
925/// Parse a list (sublist with optional & or ;).
926///
927/// Direct port of zsh/Src/parse.c:771-804 `par_list` (and the
928/// par_list1 wrapper at parse.c:807-817).
929///
930/// **Structural divergence**: zsh's parse.c emits flat wordcode
931/// into the `ecbuf` u32 array via `ecadd(0)` (placeholder),
932/// `set_list_code(p, code, complexity)`, `wc_bdata(Z_END)`. zshrs
933/// builds an AST node `ZshList { sublist, flags }` instead. The
934/// async/sync/disown discrimination at parse.c:785-790 maps to
935/// zshrs's `ListFlags { async_, disown }` field — Z_SYNC is the
936/// default (no flags), Z_ASYNC = `&` = `async_=true`, Z_DISOWN +
937/// Z_ASYNC = `&!`/`&|` = both true. Same semantics, different
938/// representation. This divergence is repository-wide: every
939/// `par_*` function emits wordcode in C, every `parse_*` builds
940/// AST in Rust. The compile_zsh module then traverses the AST to
941/// emit fusevm bytecode, which serves the same role as zsh's
942/// wordcode but with a different opcode set and execution model.
943fn par_list() -> Option<ZshList> {
944 let sublist = par_sublist()?;
945
946 let flags = match tok() {
947 AMPER => {
948 zshlex();
949 ListFlags {
950 async_: true,
951 disown: false,
952 }
953 }
954 AMPERBANG => {
955 zshlex();
956 ListFlags {
957 async_: true,
958 disown: true,
959 }
960 }
961 SEPER | SEMI | NEWLIN => {
962 zshlex();
963 ListFlags::default()
964 }
965 _ => ListFlags::default(),
966 };
967
968 Some(ZshList { sublist, flags })
969}
970
971/// Parse one list — non-recursing variant. Direct port of
972/// zsh/Src/parse.c:808 `par_list1`. Like par_list but
973/// doesn't recurse on the trailing-separator path; used by
974/// callers that only want one statement (e.g. each arm of a
975/// case body).
976pub fn par_list1() -> Option<ZshSublist> {
977 // parse.c:810-816 — body is a single par_sublist call wrapped
978 // in the eu/ecused tracking that zshrs doesn't need (no
979 // wordcode buffer).
980 par_sublist()
981}
982
983/// Parse a sublist (pipelines connected by && or ||).
984///
985/// Direct port of zsh/Src/parse.c:825 `par_sublist` and
986/// par_sublist2 at parse.c:869-892. par_sublist handles the
987/// && / || conjunction and emits WC_SUBLIST opcodes; par_sublist2
988/// handles the leading `!` negation and `coproc` keyword.
989///
990/// AST mapping: ZshSublist { pipe, conj_chain }, where `conj_chain`
991/// is a Vec<(ConjOp, ZshSublist)> for chained && / ||. C uses
992/// flat wordcode with WC_SUBLIST_AND / WC_SUBLIST_OR markers.
993fn par_sublist() -> Option<ZshSublist> {
994 let mut flags = SublistFlags::default();
995
996 // Handle coproc and !
997 if tok() == COPROC {
998 flags.coproc = true;
999 zshlex();
1000 } else if tok() == BANG_TOK {
1001 flags.not = true;
1002 zshlex();
1003 }
1004
1005 let pipe = par_pline()?;
1006
1007 // Check for && or ||
1008 let next = match tok() {
1009 DAMPER => {
1010 zshlex();
1011 skip_separators();
1012 // c:Src/parse.c:par_sublist — and-or operators (`&&`,
1013 // `||`) require a sublist on each side. After consuming
1014 // `&&`/`||`, another and-or operator OR a pipe-operator
1015 // immediately after is a parse error in C zsh. zshrs's
1016 // recursion silently returned None and dropped the
1017 // operator. Bug #171 in docs/BUGS.md.
1018 if matches!(tok(), DAMPER | DBAR | BAR_TOK | BARAMP) {
1019 let name = match tok() {
1020 DAMPER => "&&",
1021 DBAR => "||",
1022 BAR_TOK => "|",
1023 BARAMP => "|&",
1024 _ => "operator",
1025 };
1026 zerr(&format!("parse error near `{}'", name));
1027 return None;
1028 }
1029 par_sublist().map(|s| (SublistOp::And, Box::new(s)))
1030 }
1031 DBAR => {
1032 zshlex();
1033 skip_separators();
1034 if matches!(tok(), DAMPER | DBAR | BAR_TOK | BARAMP) {
1035 let name = match tok() {
1036 DAMPER => "&&",
1037 DBAR => "||",
1038 BAR_TOK => "|",
1039 BARAMP => "|&",
1040 _ => "operator",
1041 };
1042 zerr(&format!("parse error near `{}'", name));
1043 return None;
1044 }
1045 par_sublist().map(|s| (SublistOp::Or, Box::new(s)))
1046 }
1047 _ => None,
1048 };
1049
1050 Some(ZshSublist { pipe, next, flags })
1051}
1052
1053/// Port of `par_sublist2(int *cmplx)` from `Src/parse.c:869`.
1054/// Secondary-sublist arm: handles the `COPROC`/`Bang` prefix
1055/// in front of a pline. Returns the WC_SUBLIST flag word added.
1056pub fn par_sublist2(cmplx: &mut i32) -> Option<i32> {
1057 // c:870 — `int f = 0;`
1058 let mut f: i32 = 0;
1059 // c:873-880 — COPROC / BANG prefix flags.
1060 if tok() == COPROC {
1061 *cmplx = 1;
1062 f |= WC_SUBLIST_COPROC as i32;
1063 zshlex();
1064 } else if tok() == BANG_TOK {
1065 *cmplx = 1;
1066 f |= WC_SUBLIST_NOT as i32;
1067 zshlex();
1068 }
1069 // c:882-883 — `if (!par_pline(cmplx) && !f) return -1;`
1070 if !par_pipe_wordcode(cmplx) && f == 0 {
1071 return None;
1072 }
1073 // c:885 — `return f;`
1074 Some(f)
1075}
1076
1077/// Parse a pipeline
1078/// Parse a pipeline (cmds joined by `|` / `|&`). Direct port of
1079/// zsh/Src/parse.c:894 `par_pline`. AST: ZshPipe { cmds: Vec<ZshCommand> }.
1080/// C emits WC_PIPE wordcodes per command; same flow.
1081fn par_pline() -> Option<ZshPipe> {
1082 let lineno = toklineno();
1083 let cmd = par_cmd()?;
1084
1085 // Check for | or |&
1086 let mut merge_stderr = false;
1087 let next = match tok() {
1088 BAR_TOK | BARAMP => {
1089 merge_stderr = tok() == BARAMP;
1090 zshlex();
1091 skip_separators();
1092 // c:Src/parse.c:par_pline — pipe-operators require a
1093 // command on each side. After consuming `|`/`|&`,
1094 // C zsh's recursive par_pline call returns -1 (parse
1095 // error) when the next token is another pipe-operator
1096 // — `a | | b` errors with `parse error near `|''`.
1097 // zshrs's `par_pline()?` silently returned None on
1098 // missing command, dropping the rest of the input
1099 // without diagnosing the empty-pipe-operand. Bug #171
1100 // in docs/BUGS.md.
1101 if matches!(tok(), BAR_TOK | BARAMP) {
1102 let name = if tok() == BARAMP { "|&" } else { "|" };
1103 zerr(&format!("parse error near `{}'", name));
1104 return None;
1105 }
1106 par_pline().map(Box::new)
1107 }
1108 _ => None,
1109 };
1110
1111 Some(ZshPipe {
1112 cmd,
1113 next,
1114 lineno,
1115 merge_stderr,
1116 })
1117}
1118
1119/// Parse a command
1120/// Parse a command — dispatches by leading token (FOR / CASE /
1121/// IF / WHILE / UNTIL / REPEAT / FUNC / DINBRACK / DINPAR /
1122/// Inpar subshell / Inbrace current-shell / TIME / NOCORRECT,
1123/// else simple). Direct port of zsh/Src/parse.c:958 `par_cmd`.
1124fn par_cmd() -> Option<ZshCommand> {
1125 // Parse leading redirections
1126 let mut redirs = Vec::new();
1127 while IS_REDIROP(tok()) {
1128 if let Some(redir) = par_redir() {
1129 redirs.push(redir);
1130 }
1131 }
1132
1133 let cmd = match tok() {
1134 FOR | FOREACH => par_for(),
1135 SELECT => parse_select(),
1136 CASE => par_case(),
1137 IF => par_if(),
1138 WHILE => par_while(false),
1139 UNTIL => par_while(true),
1140 REPEAT => par_repeat(),
1141 INPAR_TOK => par_subsh(),
1142 INOUTPAR => parse_anon_funcdef(),
1143 INBRACE_TOK => parse_cursh(),
1144 FUNC => par_funcdef(),
1145 DINBRACK => par_cond(),
1146 DINPAR => parse_arith(),
1147 TIME => par_time(),
1148 _ => par_simple(redirs),
1149 };
1150
1151 // Parse trailing redirections. For Simple commands the redirs were
1152 // already captured inside par_simple; for compound forms (Cursh,
1153 // Subsh, If, While, etc.) we collect them here and wrap in
1154 // ZshCommand::Redirected so compile_zsh can scope-bracket them.
1155 if let Some(inner) = cmd {
1156 let mut trailing: Vec<ZshRedir> = Vec::new();
1157 while IS_REDIROP(tok()) {
1158 if let Some(redir) = par_redir() {
1159 trailing.push(redir);
1160 }
1161 }
1162 // c:Src/parse.c:par_cmd — compound forms (Cursh `{...}`, Subsh
1163 // `(...)`, If/While/Until/For/Case/Select/Repeat/Funcdef) must
1164 // be followed by a valid sublist/list separator (`;`, `\n`,
1165 // `&`, `|`, `&&`, `||`, redirect-op) — STRING_LEX after a
1166 // compound is a parse error. zshrs's outer par_list loop
1167 // silently treated trailing words as a new command, masking
1168 // syntax errors like `{ echo a; } b c`. Mirror C's strict
1169 // post-compound terminator check. Bug #146 in docs/BUGS.md.
1170 if !matches!(inner, ZshCommand::Simple(_)) && tok() == STRING_LEX {
1171 let bad = tokstr().unwrap_or_default();
1172 zerr(&format!("parse error near `{}'", bad));
1173 // Reset state before returning so the outer loop's None
1174 // detection unwinds cleanly.
1175 set_incmdpos(true);
1176 set_incasepat(0);
1177 set_incond(0);
1178 set_intypeset(false);
1179 return None;
1180 }
1181 // c:1072-1075 — every par_cmd tail resets the lexer state
1182 // toggles so the NEXT command starts in cmd position with
1183 // case/cond/typeset off. par_simple/par_cond set `incmdpos=0`
1184 // during their bodies; without this reset the next iteration
1185 // of the outer par_list loop sees `if` / `done` / `select`
1186 // etc. as plain strings and the AST collapses.
1187 set_incmdpos(true);
1188 set_incasepat(0);
1189 set_incond(0);
1190 set_intypeset(false);
1191 if trailing.is_empty() {
1192 return Some(inner);
1193 }
1194 // Simple already absorbed its own redirs (compile path expects
1195 // them on ZshSimple), so don't double-wrap.
1196 if matches!(inner, ZshCommand::Simple(_)) {
1197 if let ZshCommand::Simple(mut s) = inner {
1198 s.redirs.extend(trailing);
1199 return Some(ZshCommand::Simple(s));
1200 }
1201 unreachable!()
1202 }
1203 return Some(ZshCommand::Redirected(Box::new(inner), trailing));
1204 }
1205 // Same reset on the empty-cmd branch (mirror c:1072 unconditional
1206 // path — the C function only returns 0 above when the dispatch
1207 // produced no command, and falls through to the reset block).
1208 set_incmdpos(true);
1209 set_incasepat(0);
1210 set_incond(0);
1211 set_intypeset(false);
1212
1213 None
1214}
1215
1216/// Parse for/foreach loop
1217/// Parse `for NAME in WORDS; do BODY; done` (foreach style) AND
1218/// `for ((init; cond; incr)) do BODY done` (c-style). Direct port
1219/// of zsh/Src/parse.c:1087 `par_for`. parse_for_cstyle is the
1220/// inner branch for the `((...))` arithmetic-header variant
1221/// (parse.c:1100-1140 inside par_for).
1222fn par_for() -> Option<ZshCommand> {
1223 let is_foreach = tok() == FOREACH;
1224 // c:1094-1095 (Src/parse.c, par_for) — set `infor=2` (only when
1225 // tok==FOR) so the lexer's `(` peek at lex.c:784-789
1226 // (`if (infor) { ... return DINPAR; }`) routes the arith-for
1227 // body through dbparens semicolon-splitting instead of the
1228 // `cmd_or_math` whole-body capture path. Without this, `for ((
1229 // i=0; i<3; i++ ))` lexed as a single `((arith))` expression
1230 // and parse_for_cstyle's second zshlex got an empty/wrong tok.
1231 //
1232 // The companion C statement `incmdpos = 0;` at c:1094 isn't
1233 // mirrored here: zshrs's parser doesn't otherwise touch
1234 // LEX_INCMDPOS at this boundary, and forcing it false breaks
1235 // the SELECT case where downstream tokenization relied on the
1236 // inherited state. The C parser maintains incmdpos inline at
1237 // every grammar transition (parse.c:617, :791, :1072, :1145,
1238 // :1154, :1161, ...); without porting those companion sites a
1239 // single explicit reset here is more harmful than helpful.
1240 set_infor(if tok() == FOR { 2 } else { 0 }); // c:1095
1241 zshlex(); // c:1096
1242
1243 // Check for C-style: for (( init; cond; step ))
1244 if tok() == DINPAR {
1245 // c:1110-1111 — close out infor / cmdpos after parse_for_cstyle
1246 // has consumed the init/cond/step triple. Done inside the
1247 // helper itself so we honour the C ordering.
1248 return parse_for_cstyle();
1249 }
1250
1251 // c:1116 — `infor = 0;` immediately on entering the foreach
1252 // branch. Without this, `infor` stays at 2 (set at c:1095 when
1253 // tok==FOR) for the rest of par_for, and the lexer's `((`
1254 // peek at lex.c:786 routes every subsequent `((...))` inside
1255 // the loop body through dbparens — so `for x in a; do (( 1
1256 // )); done` and `if (( 1 )) { … }` inside the do-body both
1257 // mis-lexed as a c-style for header.
1258 set_infor(0); // c:1116
1259
1260 // Get variable name(s). zsh parse.c par_for accepts multiple
1261 // identifier tokens before `in`/`(`/newline — `for k v in ...`
1262 // assigns each iteration's pair of values to k and v in turn.
1263 // We store the names space-joined since variable identifiers
1264 // can't contain whitespace.
1265 let mut names: Vec<String> = Vec::new();
1266 while tok() == STRING_LEX {
1267 let v = tokstr().unwrap_or_default();
1268 if v == "in" {
1269 break;
1270 }
1271 names.push(v);
1272 zshlex();
1273 }
1274 if names.is_empty() {
1275 zerr("expected variable name in for");
1276 return None;
1277 }
1278 let var = names.join(" ");
1279
1280 // Skip newlines
1281 skip_separators();
1282
1283 // Get list. The lexer-port quirk: `for x (a b c)` arrives as a
1284 // single String token with the parens lexed-as-content
1285 // (`<Inpar>a b c<Outpar>`) instead of as separate Inpar/String/
1286 // Outpar tokens. Detect that shape and split it manually.
1287 let list = if tok() == STRING_LEX
1288 && tokstr()
1289 .map(|s| s.starts_with('\u{88}') && s.ends_with('\u{8a}'))
1290 .unwrap_or(false)
1291 {
1292 let raw = tokstr().unwrap_or_default();
1293 // Strip leading Inpar + trailing Outpar. KEEP the inner
1294 // content tokenized — `for x ({1..3}) …` has `{1..3}` as
1295 // Inbrace+content+Outbrace markers, which compile_word_str
1296 // needs to detect and brace-expand. Untokenizing here would
1297 // collapse the markers to plain `{` `}` chars and the brace-
1298 // expansion pass (which strictly requires Inbrace TOKEN per
1299 // Src/glob.c:hasbraces) would skip the word entirely.
1300 // Split only on UNTOKENIZED whitespace at the top level —
1301 // tokenized characters (TOKEN range \u{84}..\u{a1}) are part
1302 // of one word; bare ASCII spaces / tabs separate words.
1303 let inner = &raw[raw.char_indices().nth(1).map(|(i, _)| i).unwrap_or(0)
1304 ..raw
1305 .char_indices()
1306 .last()
1307 .map(|(i, _)| i)
1308 .unwrap_or(raw.len())];
1309 let mut words: Vec<String> = Vec::new();
1310 let mut cur = String::new();
1311 for c in inner.chars() {
1312 if c == ' ' || c == '\t' || c == '\n' {
1313 if !cur.is_empty() {
1314 words.push(std::mem::take(&mut cur));
1315 }
1316 } else {
1317 cur.push(c);
1318 }
1319 }
1320 if !cur.is_empty() {
1321 words.push(cur);
1322 }
1323 zshlex();
1324 ForList::Words(words)
1325 } else if tok() == STRING_LEX {
1326 let s = tokstr();
1327 if s.map(|s| s == "in").unwrap_or(false) {
1328 // c:Src/parse.c:1147-1154 — after consuming `in`, the
1329 // for-list reads in WORD position, not command position.
1330 // Reset incmdpos=false so the lexer's LX2_INBRACE arm
1331 // (lex.rs:1791) treats a leading `{` as the brace-
1332 // expansion marker (`bct++; add(Inbrace)`) instead of
1333 // returning STRING("{") + promoting to INBRACE_TOK.
1334 // Without this, `for i in {1..3}` saw `{` as the body-
1335 // opener brace, so the word-collection loop got an
1336 // empty word list and the loop body silently ran 0
1337 // iterations.
1338 set_incmdpos(false);
1339 zshlex();
1340 let mut words = Vec::new();
1341 while tok() == STRING_LEX {
1342 let _ts_s = tokstr();
1343 if let Some(s) = _ts_s.as_deref() {
1344 words.push(s.to_string());
1345 }
1346 zshlex();
1347 }
1348 // c:Src/parse.c:1162 — `incmdpos = 1;` after the
1349 // wordlist + SEPER are consumed, so the next token
1350 // (`do` / `{` body opener) lexes at command position.
1351 set_incmdpos(true);
1352 ForList::Words(words)
1353 } else {
1354 ForList::Positional
1355 }
1356 } else if tok() == INPAR_TOK {
1357 // for var (...) — `for x ({1..3})`: inside the parens, the
1358 // list is in WORD position so `{` must lex as the brace-
1359 // expansion Inbrace marker, NOT as a body-opener INBRACE_TOK.
1360 // Without resetting incmdpos before the next zshlex, the
1361 // lexer's LX2_INBRACE arm promotes `{` to INBRACE_TOK and
1362 // the word-collection loop exits empty, giving
1363 // `for x ({1..3})` an empty iteration.
1364 set_incmdpos(false);
1365 zshlex();
1366 let mut words = Vec::new();
1367 while tok() == STRING_LEX || tok() == SEPER {
1368 if tok() == STRING_LEX {
1369 let _ts_s = tokstr();
1370 if let Some(s) = _ts_s.as_deref() {
1371 words.push(s.to_string());
1372 }
1373 }
1374 zshlex();
1375 }
1376 if tok() == OUTPAR_TOK {
1377 // After the `)` of a for-list, the next token is the
1378 // body opener — `do`/`{`. zsh's lexer needs incmdpos
1379 // set so `{` lexes as Inbrace (not as a literal). C
1380 // analogue: parse.c::par_for sets `incmdpos = 1`
1381 // after consuming the Outpar before the body parse.
1382 set_incmdpos(true);
1383 zshlex();
1384 }
1385 ForList::Words(words)
1386 } else {
1387 ForList::Positional
1388 };
1389
1390 // Skip to body
1391 skip_separators();
1392
1393 // Parse body
1394 let body = parse_loop_body(is_foreach, false)?;
1395
1396 Some(ZshCommand::For(ZshFor {
1397 var,
1398 list,
1399 body: Box::new(body),
1400 is_select: false,
1401 }))
1402}
1403
1404/// Parse case statement
1405/// Parse `case WORD in PATTERN) BODY ;; ... esac`. Direct port
1406/// of zsh/Src/parse.c:1209 `par_case`. Each case arm is a
1407/// (pattern_list, body, terminator) tuple where terminator is
1408/// `;;` (default), `;&` (fallthrough), or `;|` (continue testing).
1409fn par_case() -> Option<ZshCommand> {
1410 // C par_case (parse.c:1209-1241). Order of state toggles
1411 // matters — the lexer reads the case word in `incmdpos=0`
1412 // (so it's not promoted to a reswd), then the `in`/`{` in
1413 // `incmdpos=1, noaliases=1, nocorrect=1` (so the `in` literal
1414 // isn't alias-expanded or spell-corrected), then sets
1415 // `incasepat=1, incmdpos=0` before the first pattern.
1416 set_incmdpos(false);
1417 zshlex(); // skip 'case'
1418
1419 let word = match tok() {
1420 STRING_LEX => {
1421 let w = tokstr().unwrap_or_default();
1422 // c:1222 — `incmdpos = 1;` before the next zshlex so the
1423 // `in` keyword is recognised. c:1223-1225 — save+force
1424 // noaliases / nocorrect.
1425 set_incmdpos(true);
1426 let ona = noaliases();
1427 let onc = nocorrect();
1428 set_noaliases(true);
1429 set_nocorrect(1);
1430 zshlex();
1431 // Restore noaliases/nocorrect after the `in`-or-`{` token
1432 // is in hand; both are unconditionally restored at c:1238-1239.
1433 let restore = |ona: bool, onc: i32| {
1434 set_noaliases(ona);
1435 set_nocorrect(onc);
1436 };
1437 (w, ona, onc, restore)
1438 }
1439 _ => {
1440 zerr("expected word after case");
1441 return None;
1442 }
1443 };
1444 let (word, ona, onc, restore) = word;
1445
1446 skip_separators();
1447
1448 // Expect 'in' or {
1449 let use_brace = tok() == INBRACE_TOK;
1450 if tok() == STRING_LEX {
1451 let s = tokstr();
1452 if s.map(|s| s != "in").unwrap_or(true) {
1453 // c:1228-1232 — restore noaliases/nocorrect on error path.
1454 restore(ona, onc);
1455 zerr("expected 'in' in case");
1456 return None;
1457 }
1458 } else if !use_brace {
1459 restore(ona, onc);
1460 zerr("expected 'in' or '{' in case");
1461 return None;
1462 }
1463 // c:1236-1239 — `incasepat = 1; incmdpos = 0; noaliases = ona;
1464 // nocorrect = onc;` — set the case-pattern context AND restore
1465 // alias/correct state BEFORE the zshlex that consumes `in`/`{`.
1466 set_incasepat(1);
1467 set_incmdpos(false);
1468 restore(ona, onc);
1469 zshlex();
1470
1471 let mut arms = Vec::new();
1472 const MAX_ARMS: usize = 10_000;
1473
1474 loop {
1475 if arms.len() > MAX_ARMS {
1476 zerr("par_case: too many arms");
1477 break;
1478 }
1479
1480 // Set incasepat BEFORE skipping separators so lexer knows we're in case pattern context
1481 // This affects how [ and | are lexed
1482 set_incasepat(1);
1483
1484 skip_separators();
1485
1486 // Check for end
1487 // Note: 'esac' might be String "esac" if incasepat > 0 prevents reserved word recognition
1488 let is_esac = tok() == ESAC
1489 || (tok() == STRING_LEX && tokstr().map(|s| s == "esac").unwrap_or(false));
1490 if (use_brace && tok() == OUTBRACE_TOK) || (!use_brace && is_esac) {
1491 set_incasepat(0);
1492 zshlex();
1493 break;
1494 }
1495
1496 // Also break on EOF. c:Src/parse.c:1209 par_case requires
1497 // ESAC (or `}` in brace form) to close the block — reaching
1498 // ENDINPUT without either is a parse error (`case ... esack`
1499 // typo absorbs `esack` as part of the body and silently
1500 // terminates rc=0 otherwise). Bug #400.
1501 if tok() == ENDINPUT || tok() == LEXERR {
1502 set_incasepat(0);
1503 yyerror("unmatched `case'");
1504 break;
1505 }
1506
1507 // c:1250 — `if (tok == INPAR) zshlex();` — leading-paren
1508 // skip path. Used when the lexer DID return INPAR_TOK (e.g.
1509 // SHGLOB or incmdpos forced it). In the normal case-pattern
1510 // path the lexer absorbs `(...)` into one Stringg and the
1511 // hack at c:1322 strips the surrounding parens later. Both
1512 // paths land here.
1513 let leading_inpar_consumed = tok() == INPAR_TOK;
1514 if leading_inpar_consumed {
1515 zshlex();
1516 }
1517
1518 // c:1255-1262 — read pattern STRING. zsh's parser falls
1519 // straight into the STRING reader after the optional INPAR.
1520 // BAR before any pattern means empty string.
1521 let mut patterns = Vec::new();
1522 // Tracks whether the c:1322-1354 hack has fired (paren-
1523 // wrapped Stringg absorbed by the lexer). When it has, the
1524 // closing `)` was already absorbed — no separate OUTPAR
1525 // arm-close to consume.
1526 let mut absorbed_outpar = false;
1527 loop {
1528 if tok() == STRING_LEX {
1529 let s = tokstr();
1530 if s.as_deref().map(|s| s == "esac").unwrap_or(false) {
1531 break;
1532 }
1533 let mut str_val = s.unwrap_or_default();
1534
1535 // c:1322-1354 hack: when this is the first alt AND
1536 // the string starts with the Inpar marker, the lexer
1537 // absorbed the whole `(...)` as one token. Strip the
1538 // surrounding parens — the remainder IS the pattern.
1539 // The closing arm-paren was absorbed too, so we don't
1540 // expect a separate OUTPAR token afterward.
1541 if patterns.is_empty() && str_val.starts_with(crate::ported::zsh_h::Inpar) {
1542 let mut pct = 0i32;
1543 let mut chars: Vec<char> = str_val.chars().collect();
1544 let mut end_idx: Option<usize> = None;
1545 for (idx, &c) in chars.iter().enumerate() {
1546 if c == crate::ported::zsh_h::Inpar {
1547 pct += 1;
1548 } else if c == crate::ported::zsh_h::Outpar {
1549 pct -= 1;
1550 if pct == 0 {
1551 end_idx = Some(idx);
1552 break;
1553 }
1554 }
1555 }
1556 if let Some(idx) = end_idx {
1557 chars.remove(idx);
1558 chars.remove(0);
1559 str_val = chars.into_iter().collect();
1560 absorbed_outpar = true;
1561 }
1562 }
1563 patterns.push(str_val);
1564 set_incasepat(2);
1565 zshlex();
1566 // When the hack fired the closing `)` is already
1567 // consumed; don't read alt-`|` continuations either.
1568 if absorbed_outpar {
1569 break;
1570 }
1571 } else if tok() != BAR_TOK {
1572 break;
1573 }
1574
1575 if tok() == BAR_TOK {
1576 set_incasepat(1);
1577 zshlex();
1578 } else {
1579 break;
1580 }
1581 }
1582 set_incasepat(0);
1583
1584 // c:1305 — expect OUTPAR (arm-close) when the hack didn't
1585 // already swallow it.
1586 //
1587 // Bug #34 in docs/BUGS.md: the absorbed-pattern hack assumed
1588 // the leading `(` and the case-arm closing `)` were both
1589 // absorbed into the single STRING token. That's true for
1590 // `(x))` (the inner `)` closes the absorbed group; the second
1591 // `)` is the arm closer) only when the lexer slurps BOTH.
1592 // The Rust lexer slurps just `(x|y)` (one balanced pair); the
1593 // second `)` arrives as a separate OUTPAR_TOK that must still
1594 // be consumed as the case-arm closer. Detect and consume it.
1595 if !absorbed_outpar {
1596 if tok() != OUTPAR_TOK {
1597 zerr("expected ')' in case pattern");
1598 return None;
1599 }
1600 // c:Src/parse.c:1257-1258 — `if (tok != STRING)
1601 // YYERRORV(oecused);` C requires at least one pattern
1602 // STRING before `)`. zshrs accepted empty `case x in)`
1603 // and silently fell through to the next iteration with
1604 // an empty pattern arm, swallowing the rest of the
1605 // script. Reject the empty-pattern shape unless a
1606 // leading INPAR was consumed (the `(pat)` form has
1607 // already validated the pattern inside). Bug #161 in
1608 // docs/BUGS.md.
1609 if patterns.is_empty() && !leading_inpar_consumed {
1610 zerr("parse error near `)'");
1611 return None;
1612 }
1613 set_incmdpos(true);
1614 zshlex();
1615 // When the lexer emitted a separate INPAR_TOK at the
1616 // arm start (consumed via `leading_inpar_consumed`
1617 // above), the OUTPAR_TOK we just consumed closed the
1618 // alternation GROUP. If the next token is ALSO
1619 // OUTPAR_TOK, the user wrote `(pat))` and that second
1620 // `)` is the case-arm closer that still needs to be
1621 // consumed before body parsing. Bug #34 in
1622 // docs/BUGS.md.
1623 if leading_inpar_consumed && tok() == OUTPAR_TOK {
1624 zshlex();
1625 }
1626 } else if tok() == OUTPAR_TOK {
1627 // The lexer absorbed `(pat)` as the pattern but left the
1628 // case-arm closing `)` as a separate OUTPAR_TOK. Consume
1629 // it now so body parsing starts at the body, not at `)`.
1630 set_incmdpos(true);
1631 zshlex();
1632 } else {
1633 set_incmdpos(true);
1634 }
1635
1636 // Parse body. Pass end_tokens explicitly so the body's
1637 // parser stops at DSEMI/SEMIAMP/SEMIBAR/ESAC without
1638 // tripping parse_program_until's orphan-terminator check
1639 // (line 7131) which only fires when end_tokens is None.
1640 // Without this, a case arm whose body has no trailing
1641 // `;;` before `esac` (last arm — zsh accepts the dangling
1642 // form) produced "parse error near orphan terminator" on
1643 // the closing `esac`. zsh's par_case at parse.c:1318 sets
1644 // up the case-arm reader to recognize the same terminator
1645 // set; the Rust port was passing the implicit-None and
1646 // hitting the top-level orphan check.
1647 let body = parse_program_until(Some(&[DSEMI, SEMIAMP, SEMIBAR, ESAC]));
1648
1649 // Get terminator. Set incasepat=1 BEFORE the zshlex
1650 // advance so the next token (the next arm's pattern, like
1651 // `[a-z]`) gets tokenized in pattern context. Without
1652 // this, a `[`-prefixed pattern after the FIRST arm became
1653 // Inbrack instead of String and the pattern-loop bailed
1654 // out with "expected ')' in case pattern".
1655 let terminator = match tok() {
1656 DSEMI => {
1657 set_incasepat(1);
1658 zshlex();
1659 CaseTerm::Break
1660 }
1661 SEMIAMP => {
1662 set_incasepat(1);
1663 zshlex();
1664 CaseTerm::Continue
1665 }
1666 SEMIBAR => {
1667 set_incasepat(1);
1668 zshlex();
1669 CaseTerm::TestNext
1670 }
1671 _ => CaseTerm::Break,
1672 };
1673
1674 if !patterns.is_empty() {
1675 arms.push(CaseArm {
1676 patterns,
1677 body,
1678 terminator,
1679 });
1680 }
1681 }
1682
1683 Some(ZshCommand::Case(ZshCase { word, arms }))
1684}
1685
1686/// Parse if statement
1687/// Parse `if COND; then BODY; [elif COND; then BODY;]* [else BODY;] fi`.
1688/// Direct port of zsh/Src/parse.c:1411 `par_if`. The C source
1689/// emits WC_IF wordcodes per arm; zshrs builds an AST chain of
1690/// (cond, then_body) tuples plus an optional else_body.
1691fn par_if() -> Option<ZshCommand> {
1692 zshlex(); // skip 'if'
1693
1694 // Parse condition - stops at 'then' or '{' (zsh allows { instead of then)
1695 let cond = Box::new(parse_program_until(Some(&[THEN, INBRACE_TOK])));
1696
1697 skip_separators();
1698
1699 // Expect 'then' or {
1700 let use_brace = tok() == INBRACE_TOK;
1701 if tok() != THEN && !use_brace {
1702 zerr("expected 'then' or '{' after if condition");
1703 return None;
1704 }
1705 zshlex();
1706
1707 // Parse then-body - stops at else/elif/fi, or } if using brace syntax
1708 let then = if use_brace {
1709 let body = parse_program_until(Some(&[OUTBRACE_TOK]));
1710 if tok() == OUTBRACE_TOK {
1711 zshlex();
1712 }
1713 Box::new(body)
1714 } else {
1715 Box::new(parse_program_until(Some(&[ELSE, ELIF, FI])))
1716 };
1717
1718 // Parse elif and else. zsh accepts the SAME elif/else
1719 // continuations for both classic `then/fi` AND the brace
1720 // form `{ ... } elif ... { ... } else { ... }`. Direct port
1721 // of zsh/Src/parse.c:1417-1500 par_if where the elif/else
1722 // arms are checked AFTER the body close regardless of which
1723 // delimiter style opened the block. Without this, zinit's
1724 // if [[ -z $sel ]] { ... } else { ... }
1725 // hung the parser — `else` was treated as an external
1726 // command following the if-statement, which the lexer state
1727 // mis-classified inside the still-open function body.
1728 //
1729 // For brace-form: skip the `fi` consumption at the end of
1730 // the loop (no `fi` after a brace block), and `else` may
1731 // arrive after a `}` close. Skip-separators between the
1732 // body close and the elif/else token.
1733 let mut elif = Vec::new();
1734 let mut else_ = None;
1735 // c:Src/parse.c:1501-1504 — `if (tok != FI) { cmdpop(); YYERRORV; }`.
1736 // The C parser fails the whole if-construct when the body close
1737 // isn't seen. zshrs's loop fell through silently on ENDINPUT, so
1738 // `if true; then echo yes` (no `fi`) was accepted. Track whether
1739 // we hit a real terminator and error after the loop if not.
1740 let mut saw_terminator = use_brace; // `{ … }` body already consumed its close
1741
1742 {
1743 loop {
1744 skip_separators();
1745
1746 match tok() {
1747 ELIF => {
1748 zshlex();
1749 // elif condition stops at 'then' or '{'
1750 let econd = parse_program_until(Some(&[THEN, INBRACE_TOK]));
1751 skip_separators();
1752
1753 let elif_use_brace = tok() == INBRACE_TOK;
1754 if tok() != THEN && !elif_use_brace {
1755 zerr("expected 'then' after elif");
1756 return None;
1757 }
1758 zshlex();
1759
1760 // elif body stops at else/elif/fi or } if using braces
1761 let ebody = if elif_use_brace {
1762 let body = parse_program_until(Some(&[OUTBRACE_TOK]));
1763 if tok() == OUTBRACE_TOK {
1764 zshlex();
1765 saw_terminator = true; // brace close on elif
1766 }
1767 body
1768 } else {
1769 parse_program_until(Some(&[ELSE, ELIF, FI]))
1770 };
1771
1772 elif.push((econd, ebody));
1773 }
1774 ELSE => {
1775 zshlex();
1776 skip_separators();
1777
1778 let else_use_brace = tok() == INBRACE_TOK;
1779 if else_use_brace {
1780 zshlex();
1781 }
1782
1783 // else body stops at 'fi' or '}'
1784 else_ = Some(Box::new(if else_use_brace {
1785 let body = parse_program_until(Some(&[OUTBRACE_TOK]));
1786 if tok() == OUTBRACE_TOK {
1787 zshlex();
1788 saw_terminator = true;
1789 }
1790 body
1791 } else {
1792 parse_program_until(Some(&[FI]))
1793 }));
1794
1795 // Consume the 'fi' if present (not for brace syntax)
1796 if !else_use_brace && tok() == FI {
1797 zshlex();
1798 saw_terminator = true;
1799 }
1800 break;
1801 }
1802 FI => {
1803 // Brace-form `if ... { ... }` is already terminated by
1804 // its closing `}`. Do NOT consume `fi` here — it belongs
1805 // to an enclosing then-form if. Without this gate, a
1806 // brace-form if inside a then-form if's body would steal
1807 // the outer `fi`, leaving the outer parser to see
1808 // "unterminated if". This bit zinit-install.zsh:978
1809 // where `if (( … )) {` (brace) inside `if … ; then …`
1810 // (then-form) ate the outer `fi`.
1811 if use_brace {
1812 break;
1813 }
1814 zshlex();
1815 saw_terminator = true;
1816 break;
1817 }
1818 _ => break,
1819 }
1820 }
1821 }
1822
1823 if !saw_terminator {
1824 // c:1501-1504 — YYERRORV when the if-construct never closed.
1825 zerr("parse error: unterminated if");
1826 return None;
1827 }
1828
1829 Some(ZshCommand::If(ZshIf {
1830 cond,
1831 then,
1832 elif,
1833 else_,
1834 }))
1835}
1836
1837/// Parse while/until loop
1838/// Parse `while COND; do BODY; done` and `until COND; do BODY; done`.
1839/// Direct port of zsh/Src/parse.c:1521 `par_while`. The
1840/// `until` variant is the same loop with the condition negated.
1841fn par_while(until: bool) -> Option<ZshCommand> {
1842 zshlex(); // skip while/until
1843
1844 // c:1521-1551 par_while — the condition's parser must stop at
1845 // `do` or `{`. Without an explicit end-token set, parse_program
1846 // consumes the brace-form body as additional condition lists,
1847 // leaving parse_loop_body with nothing — `while (( i++ < 3 )) {
1848 // echo $i }` silently parsed but executed nothing.
1849 let cond = Box::new(parse_program_until(Some(&[DOLOOP, INBRACE_TOK])));
1850
1851 skip_separators();
1852 let body = parse_loop_body(false, false)?;
1853
1854 // c:Src/parse.c:1521-1551 par_while — WC_WHILE wordcode is tagged
1855 // with WC_WHILE_TYPE differentiating WHILE vs UNTIL at the wordcode
1856 // layer. The AST mirror in zsh_ast.rs has separate Until(ZshWhile)
1857 // and While(ZshWhile) variants; route by the `until` flag here so
1858 // downstream pattern-matchers can distinguish without poking
1859 // inside the payload's bool.
1860 let w = ZshWhile {
1861 cond,
1862 body: Box::new(body),
1863 until,
1864 };
1865 Some(if until {
1866 ZshCommand::Until(w) // c:1521 (WC_WHILE_TYPE = WC_WHILE_UNTIL)
1867 } else {
1868 ZshCommand::While(w) // c:1521 (WC_WHILE_TYPE = WC_WHILE_WHILE)
1869 })
1870}
1871
1872/// Parse repeat loop
1873/// Parse `repeat N; do BODY; done`. Direct port of
1874/// zsh/Src/parse.c:1565 `par_repeat`. The C source supports
1875/// the SHORTLOOPS short-form `repeat N CMD` (no do/done) — zshrs's
1876/// parser doesn't yet special-case that variant.
1877fn par_repeat() -> Option<ZshCommand> {
1878 zshlex(); // skip 'repeat'
1879
1880 let count = match tok() {
1881 STRING_LEX => {
1882 let c = tokstr().unwrap_or_default();
1883 zshlex();
1884 c
1885 }
1886 _ => {
1887 zerr("expected count after repeat");
1888 return None;
1889 }
1890 };
1891
1892 skip_separators();
1893 // c:1600 — par_repeat's short-form gate is wider: it unlocks
1894 // when SHORTLOOPS OR SHORTREPEAT is set (vs SHORTLOOPS alone for
1895 // for/while). Pass `is_repeat=true` so parse_loop_body
1896 // applies that widened gate.
1897 let body = parse_loop_body(false, true)?;
1898
1899 Some(ZshCommand::Repeat(ZshRepeat {
1900 count,
1901 body: Box::new(body),
1902 }))
1903}
1904
1905/// Parse (...) subshell
1906/// Parse a subshell `( ... )`. Direct port of zsh/Src/parse.c:1619
1907/// `par_subsh`. Body parses as a normal list; the subshell wrapper
1908/// fork-isolates execution in the executor.
1909fn par_subsh() -> Option<ZshCommand> {
1910 zshlex(); // skip (
1911 let prog = parse_program();
1912 if tok() == OUTPAR_TOK {
1913 zshlex();
1914 }
1915 Some(ZshCommand::Subsh(Box::new(prog)))
1916}
1917
1918/// Parse function definition
1919/// Parse `function NAME { BODY }` or `NAME () { BODY }`. Direct
1920/// port of zsh/Src/parse.c:1672 `par_funcdef`. zsh handles
1921/// the multiple keyword shapes (function FOO, FOO (), function FOO ()),
1922/// the optional `[fname1 fname2 ...]` for multi-name function defs,
1923/// and the `function FOO () { ... }` traditional/POSIX hybrid form.
1924fn par_funcdef() -> Option<ZshCommand> {
1925 zshlex(); // skip 'function'
1926
1927 let mut names = Vec::new();
1928 let mut tracing = false;
1929
1930 // Handle options like -T and function names. Two subtleties:
1931 //
1932 // 1. Flags: zsh's lexer encodes a leading `-` as
1933 // `zsh_h::Dash` (`\u{9b}`, `Src/zsh.h:182`) inside the String tokstr.
1934 // The previous `s.starts_with('-')` check failed for
1935 // `\u{9b}T`, so `function -T NAME { body }` slipped the
1936 // `-T` token into `names` and the function got registered
1937 // as `T` plus the intended `NAME`.
1938 //
1939 // 2. Body opener: zsh's lexer emits the opening `{` as a
1940 // String (not INBRACE_TOK) when it follows the String
1941 // NAME — the preceding name token resets incmdpos to
1942 // false, and only `{` immediately followed by `}` (the
1943 // empty-body case) gets promoted to Inbrace. The funcdef
1944 // parser must recognise the bare-`{` String as the body
1945 // opener; otherwise `function NAME { body }` falls through
1946 // to `_ => break`, no body parses, and the FuncDef never
1947 // lands in the AST. This is consistent with C zsh's
1948 // par_funcdef which knows it's in funcdef-header context
1949 // and accepts the brace either way.
1950 loop {
1951 match tok() {
1952 STRING_LEX => {
1953 let _ts_s = tokstr()?;
1954 let s = _ts_s.as_str();
1955 // c:1702 — `if ((*tokstr == Inbrace || *tokstr == '{') && !tokstr[1])`.
1956 // Body opener can be either the literal `{` (early-return
1957 // path at lex.c:1141-1144 / lex.rs LX2_INBRACE cmdpos
1958 // branch) or the Inbrace marker `\u{8f}` (lex.c:1420
1959 // post-switch add(c) where c was rewritten via lextok2).
1960 if s == "{" || s == "\u{8f}" {
1961 break;
1962 }
1963 let first = s.chars().next();
1964 if matches!(first, Some('-') | Some('+')) || matches!(first, Some(c) if c == Dash) {
1965 if s.contains('T') {
1966 tracing = true;
1967 }
1968 zshlex();
1969 continue;
1970 }
1971 // c:Src/exec.c::execcmd_args — function name tokens
1972 // in `function NAME { ... }` form go through globbing
1973 // at parse time. zsh's `function with[bracket] { ... }`
1974 // triggers a glob expansion of `with[bracket]`; no file
1975 // matches → "no matches found: NAME" + rc=1 (when
1976 // NOMATCH is set, the default). Bug #536: zshrs accepted
1977 // the literal bracket-containing name and registered
1978 // the function silently. Mirror C by probing for glob
1979 // metachars on the name; if present AND no file
1980 // matches, emit the diagnostic and abort the parse.
1981 let has_glob_chars = s.chars().any(|c| {
1982 matches!(
1983 c,
1984 '[' | ']'
1985 | '*'
1986 | '?'
1987 | crate::ported::zsh_h::Inbrack
1988 | crate::ported::zsh_h::Outbrack
1989 | crate::ported::zsh_h::Star
1990 | crate::ported::zsh_h::Quest
1991 )
1992 });
1993 if has_glob_chars && crate::ported::zsh_h::isset(crate::ported::zsh_h::NOMATCH) {
1994 let untok = crate::ported::lex::untokenize(s);
1995 let glob_result = crate::ported::glob::glob(&untok);
1996 if glob_result.is_empty() {
1997 crate::ported::utils::zerr(&format!("no matches found: {}", untok));
1998 crate::ported::utils::errflag.fetch_or(
1999 crate::ported::utils::ERRFLAG_ERROR,
2000 std::sync::atomic::Ordering::Relaxed,
2001 );
2002 return None;
2003 }
2004 }
2005 names.push(s.to_string());
2006 zshlex();
2007 }
2008 INBRACE_TOK | INOUTPAR | SEPER | NEWLIN => break,
2009 _ => break,
2010 }
2011 }
2012
2013 // Optional ()
2014 let saw_paren = tok() == INOUTPAR;
2015 if saw_paren {
2016 zshlex();
2017 }
2018
2019 skip_separators();
2020
2021 // Body opener: real Inbrace OR a String containing the literal `{`
2022 // (early-return path) OR a String containing the Inbrace marker
2023 // `\u{8f}` (bct++ path post-switch add). C parse.c:1702 handles
2024 // both string forms via `*tokstr == Inbrace || *tokstr == '{'`.
2025 let body_opener_is_string_brace =
2026 tok() == STRING_LEX && tokstr().map(|s| s == "{" || s == "\u{8f}").unwrap_or(false);
2027 if tok() == INBRACE_TOK || body_opener_is_string_brace {
2028 // Capture body_start BEFORE the lexer advances past the
2029 // first body token. After the previous zshlex consumed
2030 // `{`, lexer.pos points just past `{` (which is where the
2031 // body source starts). The next `zshlex()` would advance
2032 // past the first token (`echo`), making body_start land
2033 // mid-body and lose the first word — `typeset -f f` would
2034 // print `a; echo b` for `{ echo a; echo b }`.
2035 // c:Src/parse.c:1690-1706 — par_funcdef requires a clean
2036 // body-opener brace when the anonymous form `function {body}`
2037 // is used (no names AND no `()`). zsh's lexer keeps the `{`
2038 // as its own STRING token via the lex.c:1141-1144 early-
2039 // return at command position, but the body brace must be
2040 // followed by whitespace for the inner par_list to find a
2041 // matching OUTBRACE — without a separator, the closing `}`
2042 // gets merged into the last word (`X}`) and par_list ends
2043 // without OUTBRACE, which C zsh reports as `parse error near
2044 // \`}'`. zshrs's lexer has the same `bct` semantics; reject
2045 // here at the parse step so the funcdef doesn't silently run
2046 // with the stray `}` attached. With names or `()` present,
2047 // the body brace is allowed even without a separator
2048 // (`function name {body}` and `function () {body}` both work
2049 // in zsh). Bug #60 in docs/BUGS.md.
2050 if names.is_empty() && !saw_paren {
2051 // Peek the next source byte after the current lexer position
2052 // (`{` was just tokenized — `pos()` points just past it).
2053 // A whitespace separator means proper `function { body }`
2054 // form; anything else is the malformed `function {body}`
2055 // shape zsh rejects.
2056 let next_byte = input_slice(pos(), pos() + 1)
2057 .and_then(|s| s.bytes().next())
2058 .unwrap_or(b' ');
2059 if !matches!(next_byte, b' ' | b'\t' | b'\n' | b';') {
2060 zerr("parse error near `}'"); // c:Src/parse.c YYERRORV
2061 return None;
2062 }
2063 }
2064 let body_start = pos();
2065 zshlex();
2066 // c:Src/parse.c — func body terminates at OUTBRACE_TOK.
2067 // Explicit end-token keeps the inner parse from hitting the
2068 // top-level stray-`}` arm (#168). Bug #167 family.
2069 let body = parse_program_until(Some(&[OUTBRACE_TOK]));
2070 // c:Src/parse.c:1733-1737 — `if (tok != OUTBRACE) { cmdpop();
2071 // ... YYERRORV(oecused); }`. Hard-error on missing close brace
2072 // so `function f { echo hi` doesn't silently register a half-
2073 // parsed body. Bug #405.
2074 if tok() != OUTBRACE_TOK {
2075 zerr("parse error: expected `}'");
2076 return None;
2077 }
2078 let body_end = pos().saturating_sub(1);
2079 let body_source = input_slice(body_start, body_end)
2080 .map(|s| {
2081 // Lexer's pos() may have advanced past `}` AND skipped
2082 // trailing whitespace/newlines before returning the
2083 // OUTBRACE_TOK to us, so the slice up to `pos - 1`
2084 // includes the `}` and any preceding whitespace.
2085 // Strip the trailing `}` and any preceding structural
2086 // separator (`;`, `\n`) — C zsh's getpermtext walks
2087 // the wordcode list and emits each command WITHOUT
2088 // the trailing `;`/`\n` that lives in the input.
2089 let t = s.trim();
2090 let t = t.strip_suffix('}').unwrap_or(t).trim_end();
2091 let t = t
2092 .trim_end_matches(|c: char| c == ';' || c == '\n')
2093 .trim_end();
2094 t.to_string()
2095 })
2096 .filter(|s| !s.is_empty());
2097 zshlex();
2098
2099 // Anonymous form `function () { body } a b c` (with `()`) or
2100 // `function { body } a b c` (zsh-only shorthand, no `()`). No
2101 // name was collected. Mirror parse_anon_funcdef: synthesize
2102 // `_zshrs_anon_N`, collect trailing args, set auto_call_args
2103 // so compile_funcdef registers + immediately calls the
2104 // function with the args as positional params.
2105 if names.is_empty() {
2106 let mut args = Vec::new();
2107 while tok() == STRING_LEX {
2108 if let Some(s) = tokstr() {
2109 args.push(s);
2110 }
2111 zshlex();
2112 }
2113 static ANON_COUNTER: AtomicUsize = AtomicUsize::new(0);
2114 let n = ANON_COUNTER.fetch_add(1, Ordering::Relaxed);
2115 let name = format!("_zshrs_anon_kw_{}", n);
2116 return Some(ZshCommand::FuncDef(ZshFuncDef {
2117 names: vec![name],
2118 body: Box::new(body),
2119 tracing,
2120 auto_call_args: Some(args),
2121 body_source,
2122 }));
2123 }
2124
2125 Some(ZshCommand::FuncDef(ZshFuncDef {
2126 names,
2127 body: Box::new(body),
2128 tracing,
2129 auto_call_args: None,
2130 body_source,
2131 }))
2132 } else {
2133 // Short form
2134 par_list().map(|list| {
2135 ZshCommand::FuncDef(ZshFuncDef {
2136 names,
2137 body: Box::new(ZshProgram { lists: vec![list] }),
2138 tracing,
2139 auto_call_args: None,
2140 body_source: None,
2141 })
2142 })
2143 }
2144}
2145
2146/// Parse time command
2147/// Parse `time CMD` (POSIX time keyword). Direct port of
2148/// zsh/Src/parse.c:1787 `par_time`. The `time` keyword
2149/// times the execution of the following pipeline / cmd.
2150fn par_time() -> Option<ZshCommand> {
2151 zshlex(); // skip 'time'
2152
2153 // Check if there's a pipeline to time
2154 if tok() == SEPER || tok() == NEWLIN || tok() == ENDINPUT {
2155 Some(ZshCommand::Time(None))
2156 } else {
2157 let sublist = par_sublist();
2158 Some(ZshCommand::Time(sublist.map(Box::new)))
2159 }
2160}
2161
2162/// Port of `par_dinbrack(void)` from `Src/parse.c:1810`. Body
2163/// parser inside `[[ ... ]]` — calls `par_cond` to emit the
2164/// condition wordcode then advances past `]]`.
2165pub fn par_dinbrack() -> Option<()> {
2166 // c:1810
2167 set_incond(1); // c:1814
2168 set_incmdpos(false); // c:1815
2169 zshlex(); // c:1816
2170 let _ = par_cond(); // c:1817
2171 if tok() != DOUTBRACK {
2172 // c:1818
2173 yyerror("missing ]]");
2174 return None;
2175 }
2176 set_incond(0); // c:1820
2177 set_incmdpos(true); // c:1821
2178 zshlex(); // c:1822
2179 Some(())
2180}
2181
2182/// Parse a simple command
2183/// Parse a simple command (assignments + words + redirections).
2184/// Direct port of zsh/Src/parse.c:1836 `par_simple` —
2185/// the largest single function in parse.c. Handles ENVSTRING/
2186/// ENVARRAY assignments at command head, intermixed redirs,
2187/// typeset-style multi-assignment commands, and the trailing
2188/// inout-par `()` that converts a simple command into an inline
2189/// function definition.
2190fn par_simple(mut redirs: Vec<ZshRedir>) -> Option<ZshCommand> {
2191 let mut assigns = Vec::new();
2192 let mut words = Vec::new();
2193
2194 // c:1934-1974 — `{var}>file` brace-FD detection is wired
2195 // INSIDE the words loop below (parse.rs:4940-4956) rather than
2196 // here at the head. The words-loop site sees the tok=STRING
2197 // `{varname}` followed by a REDIROP and routes into par_redir
2198 // with redir.varid populated. C does it inline at the start of
2199 // each STRING/TYPESET arm iteration; functionally equivalent.
2200
2201 // c:1843-1846 — leading-NOCORRECT prefix: `nocorrect echo hello`
2202 // emits a NOCORRECT token at the start of par_simple. C sets
2203 // `nocorrect = 1` and skips past via the `zshlex();` at the
2204 // for-loop tail (c:1907). zshrs's par_simple (AST) had no
2205 // NOCORRECT arm so the token was silently dropped and the
2206 // following command line evaporated — `nocorrect echo hello`
2207 // produced empty output.
2208 while tok() == NOCORRECT {
2209 set_nocorrect(1); // c:1846
2210 zshlex(); // c:1907 (loop-tail zshlex)
2211 }
2212
2213 // Parse leading assignments
2214 while tok() == ENVSTRING || tok() == ENVARRAY {
2215 if let Some(assign) = parse_assign() {
2216 assigns.push(assign);
2217 }
2218 zshlex();
2219 }
2220
2221 // Parse words and redirections
2222 loop {
2223 match tok() {
2224 ENVSTRING | ENVARRAY => {
2225 // Mid-command assignment-shape arg under typeset
2226 // / declare / local / etc. (intypeset gates the
2227 // lexer to emit Envstring/Envarray for `name=val`
2228 // and `name=()` past the command name). Parse the
2229 // assignment, then emit a synthetic word
2230 // `NAME=value` (scalar) or `NAME=( … )` (array)
2231 // string so typeset's builtin arg list sees the
2232 // assignment-shape arg. Avoids the inline-env
2233 // scope path that mistakenly treats it like a
2234 // pre-cmd `X=Y cmd` assignment.
2235 if let Some(assign) = parse_assign() {
2236 let synthetic = match &assign.value {
2237 ZshAssignValue::Scalar(v) => format!("{}={}", assign.name, v),
2238 ZshAssignValue::Array(elems) => {
2239 // c:Src/builtin.c — assoc paren-init `h=( "" v
2240 // k2 v2 )` must preserve empty-string
2241 // elements (zsh stores key="" + value="v").
2242 // The bin_typeset paren-init splitter at
2243 // `builtin.rs:4358` recognizes the
2244 // REJOIN_SEP (`\u{1f}`) sentinel between
2245 // array elements and skips the leading/
2246 // trailing parens trim; using it here
2247 // round-trips empties end-to-end through
2248 // the synthetic-arg rebuild. Space-join
2249 // collapses adjacent empties (`(` + `""` +
2250 // `empty-val` becomes `( empty-val`) so
2251 // bin_typeset never sees the empty key.
2252 // Bug #93 in docs/BUGS.md.
2253 let mut buf = String::with_capacity(
2254 assign.name.len() + 4 + elems.iter().map(|e| e.len() + 1).sum::<usize>(),
2255 );
2256 buf.push_str(&assign.name);
2257 buf.push_str("=(");
2258 for elem in elems {
2259 buf.push('\u{1f}');
2260 buf.push_str(elem);
2261 }
2262 buf.push('\u{1f}');
2263 buf.push(')');
2264 buf
2265 }
2266 };
2267 words.push(synthetic);
2268 }
2269 zshlex();
2270 }
2271 STRING_LEX | TYPESET => {
2272 let s = tokstr();
2273 if let Some(s) = s {
2274 words.push(s);
2275 }
2276 // c:1929 — `incmdpos = 0;` so the next zshlex() does
2277 // not re-promote `{`/`[[`/reserved words at the
2278 // continuation position. Without this, `echo {a,b}`
2279 // re-lexes `{` as INBRACE_TOK (current-shell block)
2280 // and the brace expansion never reaches par_simple.
2281 set_incmdpos(false);
2282 // c:1931-1932 — `if (tok == TYPESET) intypeset = is_typeset = 1;`
2283 // Multi-assign `typeset a=1 b=2` relies on the lexer
2284 // re-emitting `b=2` as ENVSTRING; that path is gated
2285 // on `intypeset`. Without this, follow-on assignment
2286 // words arrive as STRING and the typeset builtin's
2287 // multi-assign form silently degrades.
2288 if tok() == TYPESET {
2289 set_intypeset(true);
2290 }
2291 zshlex();
2292 // Check for function definition foo() { ... }
2293 if words.len() == 1 && tok() == INOUTPAR {
2294 return parse_inline_funcdef(words.pop().unwrap());
2295 }
2296 // `{name}>file` named-fd redirect: the lexer doesn't
2297 // recognize this shape, so the bare word `{name}`
2298 // arrives as a String. If it matches `{IDENT}` and
2299 // the NEXT token is a redirop, pop it off as the
2300 // varid for that redir.
2301 if !words.is_empty() && IS_REDIROP(tok()) {
2302 let last = words.last().unwrap();
2303 let untoked = super::lex::untokenize(last);
2304 if untoked.starts_with('{') && untoked.ends_with('}') && untoked.len() > 2 {
2305 let name = &untoked[1..untoked.len() - 1];
2306 if !name.is_empty()
2307 && name.chars().all(|c| c == '_' || c.is_ascii_alphanumeric())
2308 && name
2309 .chars()
2310 .next()
2311 .map(|c| c == '_' || c.is_ascii_alphabetic())
2312 .unwrap_or(false)
2313 {
2314 let varid = name.to_string();
2315 words.pop();
2316 if let Some(mut redir) = par_redir() {
2317 redir.varid = Some(varid);
2318 redirs.push(redir);
2319 }
2320 continue;
2321 }
2322 }
2323 }
2324 }
2325 _ if IS_REDIROP(tok()) => {
2326 match par_redir() {
2327 Some(redir) => redirs.push(redir),
2328 None => break, // Error in redir parsing, stop
2329 }
2330 }
2331 INOUTPAR if !words.is_empty() => {
2332 // c:2055-2057 — `if (!isset(MULTIFUNCDEF) && argc > 1)
2333 // YYERROR(oecused);` — multi-name funcdef gate:
2334 // `f1 f2() { ... }` defines f1 AND f2 to the same
2335 // body, but only when MULTIFUNCDEF is set.
2336 if !isset(MULTIFUNCDEF) && words.len() > 1 {
2337 zerr("parse error: multiple names in function definition without MULTIFUNCDEF");
2338 return None;
2339 }
2340 // c:2061-2068 — `if (isset(EXECOPT) && hasalias &&
2341 // !isset(ALIASFUNCDEF) && argc && hasalias !=
2342 // input_hasalias()) { zwarn(...); YYERROR(...); }`
2343 // Alias-as-funcdef warning. zshrs's parser doesn't
2344 // track `hasalias` (alias-expansion provenance
2345 // during parse) yet, so `had_alias` stays false —
2346 // the gate is wired here as a marker so the canonical
2347 // C predicate is visible. Once alias-provenance lands,
2348 // swap `false` for the actual provenance compare.
2349 let had_alias = false;
2350 if isset(EXECOPT) && had_alias && !isset(ALIASFUNCDEF) && !words.is_empty() {
2351 crate::ported::utils::zwarn("defining function based on alias `(unknown)'");
2352 return None;
2353 }
2354 // foo() { ... } style function
2355 return parse_inline_funcdef(words.pop().unwrap());
2356 }
2357 _ => break,
2358 }
2359 }
2360
2361 if assigns.is_empty() && words.is_empty() && redirs.is_empty() {
2362 return None;
2363 }
2364
2365 Some(ZshCommand::Simple(ZshSimple {
2366 assigns,
2367 words,
2368 redirs,
2369 }))
2370}
2371
2372/// Parse a redirection
2373/// Parse a redirection (>file, <file, >>file, <<HEREDOC, etc.).
2374/// Direct port of zsh/Src/parse.c:2229 `par_redir`. Returns
2375/// a ZshRedir node carrying the operator type, fd, target word
2376/// (or here-doc body / pipe-redir command), and any `{var}` style
2377/// fd-binding parameter.
2378fn par_redir() -> Option<ZshRedir> {
2379 par_redir_with_id(None)
2380}
2381
2382/// Wire a here-document body onto the redirection token that
2383/// requested it. Direct port of zsh/Src/parse.c:2347
2384/// `setheredoc`. Called when a heredoc terminator has been
2385/// matched and the body is ready to be attached to the redir.
2386///
2387/// zshrs port note: zsh's setheredoc patches the wordcode
2388/// in-place via `pc[1] = ecstrcode(doc); pc[2] = ecstrcode(term);`.
2389/// zshrs threads heredoc bodies through `HereDocInfo` structs
2390/// attached inline during the post-parse `fill_heredoc_bodies` walk.
2391/// This method is the AST-side equivalent: writes back to the
2392/// matching redir node by index.
2393/// Port of `setheredoc(int pc, int type, char *str, char *termstr,
2394/// char *munged_termstr)` from `Src/parse.c:2347-2355`. Patches the
2395/// pending heredoc redir at `pc` with its body string + raw and
2396/// munged terminator forms.
2397pub fn setheredoc(pc: usize, redir_type: i32, doc: &str, term: &str, munged_term: &str) {
2398 // zshrs-only guard: AST-path heredocs use `pc = -1 as usize`
2399 // (i.e. `usize::MAX`) as a sentinel meaning "no wordcode slot to
2400 // patch". C never passes a negative pc since the wordcode emitter
2401 // is always active. Skip silently for the AST-only case.
2402 if pc == usize::MAX {
2403 return;
2404 }
2405 // c:2350 — `int varid = WC_REDIR_VARID(ecbuf[pc]) ? REDIR_VARID_MASK : 0;`
2406 let cur = ECBUF.with_borrow(|b| b.get(pc).copied().unwrap_or(0));
2407 let varid = if WC_REDIR_VARID(cur) != 0 {
2408 REDIR_VARID_MASK
2409 } else {
2410 0
2411 };
2412 // c:2351 — `ecbuf[pc] = WCB_REDIR(type | REDIR_FROM_HEREDOC_MASK | varid);`
2413 let new_header = WCB_REDIR((redir_type | REDIR_FROM_HEREDOC_MASK | varid) as wordcode);
2414 // c:2352 — `ecbuf[pc + 2] = ecstrcode(str);`
2415 let coded_str = ecstrcode(doc);
2416 // c:2353 — `ecbuf[pc + 3] = ecstrcode(termstr);`
2417 let coded_term = ecstrcode(term);
2418 // c:2354 — `ecbuf[pc + 4] = ecstrcode(munged_termstr);`
2419 let coded_munged = ecstrcode(munged_term);
2420 ECBUF.with_borrow_mut(|b| {
2421 b[pc] = new_header;
2422 b[pc + 2] = coded_str;
2423 b[pc + 3] = coded_term;
2424 b[pc + 4] = coded_munged;
2425 });
2426}
2427
2428/// Parse a wordlist for `for ... in WORDS;`. Direct port of
2429/// zsh/Src/parse.c:2362 `par_wordlist`. Reads STRING tokens
2430/// until the next SEPER / SEMI / NEWLIN.
2431pub fn par_wordlist() -> Vec<String> {
2432 let mut out = Vec::new();
2433 // parse.c:2362-2378 — collect STRINGs into the wordlist.
2434 while tok() == STRING_LEX {
2435 if let Some(text) = tokstr() {
2436 out.push(text);
2437 }
2438 zshlex();
2439 }
2440 out
2441}
2442
2443/// Parse a newline-separated wordlist. Direct port of
2444/// zsh/Src/parse.c:2379 `par_nl_wordlist`. Like
2445/// par_wordlist but tolerates leading/trailing newlines.
2446pub fn par_nl_wordlist() -> Vec<String> {
2447 // parse.c:2380-2381 — skip leading newlines.
2448 while tok() == NEWLIN {
2449 zshlex();
2450 }
2451 let out = par_wordlist();
2452 // parse.c:2395-2397 — skip trailing newlines.
2453 while tok() == NEWLIN {
2454 zshlex();
2455 }
2456 out
2457}
2458
2459/// `COND_SEP()` macro from `Src/parse.c:2433`. True when the current
2460/// token is a separator usable inside `[[ … ]]` (newline / semi /
2461/// `&`). C uses it to skip optional whitespace between cond terms.
2462#[inline]
2463pub fn COND_SEP() -> bool {
2464 matches!(tok(), NEWLIN | SEMI | AMPER)
2465}
2466
2467/// Parse [[ ... ]] conditional
2468/// Parse `[[ EXPR ]]` conditional expression. Direct port of
2469/// zsh/Src/parse.c:2409 `par_cond` (and helpers par_cond_1,
2470/// par_cond_2, par_cond_double, par_cond_triple, par_cond_multi
2471/// at parse.c:2434-2731). Expression operators: `||` `&&` `!`
2472/// + unary tests (-f, -d, -n, -z, etc.) + binary tests (=, !=,
2473/// <, >, ==, =~, -eq, -ne, -lt, -le, -gt, -ge, -nt, -ot, -ef).
2474fn par_cond() -> Option<ZshCommand> {
2475 // C par_dinbrack (parse.c:1810-1822) wraps the body parse with
2476 // `incond = 1; incmdpos = 0;` BEFORE the first zshlex past `[[`,
2477 // and resets to `incond = 0; incmdpos = 1;` after `]]`. Without
2478 // `incond = 1`, lex.c does not promote `]]` to DOUTBRACK and the
2479 // cond body bleeds past the close bracket — the parser then
2480 // sees `]]` as a separate STRING command. Every `if [[ ... ]]; then`
2481 // failed with `command not found: ]]` before this fix.
2482 set_incond(1);
2483 set_incmdpos(false);
2484 zshlex(); // skip [[
2485 // Empty cond `[[ ]]` is a parse error in zsh — emit the
2486 // diagnostic and return None so the caller produces a
2487 // non-zero exit. Without this, `[[ ]]` silently passed and
2488 // returned exit 0.
2489 if tok() == DOUTBRACK {
2490 zerr("parse error near `]]'");
2491 set_incond(0);
2492 set_incmdpos(true);
2493 zshlex();
2494 return None;
2495 }
2496 let cond = parse_cond_expr();
2497
2498 if tok() == DOUTBRACK {
2499 set_incond(0);
2500 set_incmdpos(true);
2501 zshlex();
2502 } else {
2503 // c:Src/parse.c:1818-1819 — `if (tok != DOUTBRACK)
2504 // YYERRORV(oecused);`. par_dinbrack hard-requires DOUTBRACK
2505 // after par_cond; anything else is a parse error and the
2506 // outer parser's yyerror at c:2747 emits `parse error near
2507 // \`%s'` using zshlextext. Bug #473: BAR (`|`) inside
2508 // `[[ ab == a|b ]]` slipped past par_cond_or (which only
2509 // checks DBAR), the cond returned cleanly, and then the
2510 // top-level parser interpreted BAR as a pipe — running `b`
2511 // as a command (security-relevant if pattern RHS is user
2512 // input). Mirror C: emit parse error and abort.
2513 let tok_text = match tok() {
2514 BAR_TOK => "|".to_string(),
2515 DBAR => "||".to_string(),
2516 AMPER => "&".to_string(),
2517 DAMPER => "&&".to_string(),
2518 SEMI => ";".to_string(),
2519 DSEMI => ";;".to_string(),
2520 NEWLIN | SEPER => String::new(),
2521 _ => tokstr().map(|s| crate::ported::lex::untokenize(&s)).unwrap_or_default(),
2522 };
2523 if tok_text.is_empty() {
2524 zerr("parse error");
2525 } else {
2526 zerr(&format!("parse error near `{}'", tok_text));
2527 }
2528 set_incond(0);
2529 set_incmdpos(true);
2530 return None;
2531 }
2532
2533 cond.map(ZshCommand::Cond)
2534}
2535
2536/// Port of `par_cond_1(void)` from `Src/parse.c:2434`. Parses one
2537/// `||`-separated cond expression. Emits `WCB_COND(COND_AND, …)`
2538/// when an `&&` is found and recurses.
2539pub fn par_cond_1() -> i32 {
2540 // c:2434
2541
2542 let p = ECUSED.with(|c| c.get()) as usize;
2543 let r = par_cond_2();
2544 while COND_SEP() {
2545 condlex();
2546 }
2547 if tok() == DAMPER {
2548 condlex();
2549 while COND_SEP() {
2550 condlex();
2551 }
2552 ecispace(p, 1);
2553 par_cond_1();
2554 let ecused = ECUSED.with(|c| c.get()) as usize;
2555 ECBUF.with(|c| {
2556 c.borrow_mut()[p] = WCB_COND(COND_AND as u32, (ecused - 1 - p) as u32);
2557 });
2558 return 1;
2559 }
2560 r
2561}
2562
2563/// Port of `par_cond_2(void)` from `Src/parse.c:2476`. The heavy
2564/// cond-term parser: handles `! cond`, `(cond)`, unary `[ -X arg ]`,
2565/// binary `[ A op B ]`, and `[ A op1 B op2 C … ]` n-ary chains.
2566pub fn par_cond_2() -> i32 {
2567 // c:2476
2568 // `n_testargs` only applies in `testlex` mode (=== /bin/test
2569 // compat). zshrs has no testlex yet, so always 0.
2570 let n_testargs: i32 = 0;
2571
2572 // c:2481 — handled inline; this Rust port skips the n_testargs
2573 // arm since zshrs invokes par_cond via [[ ... ]] only.
2574
2575 while COND_SEP() {
2576 condlex();
2577 }
2578 if tok() == BANG_TOK {
2579 // c:2522 — `[[ ! cond ]]`
2580 condlex();
2581 ecadd(WCB_COND(COND_NOT as u32, 0));
2582 return par_cond_2();
2583 }
2584 if tok() == INPAR_TOK {
2585 // c:2533 — `[[ (cond) ]]`
2586 condlex();
2587 while COND_SEP() {
2588 condlex();
2589 }
2590 let r = par_cond();
2591 while COND_SEP() {
2592 condlex();
2593 }
2594 if tok() != OUTPAR_TOK {
2595 yyerror("missing )");
2596 return 0;
2597 }
2598 condlex();
2599 return r.map_or(0, |_| 1);
2600 }
2601 let s1 = tokstr().unwrap_or_default();
2602 // c:2549 — `dble = (s1 && IS_DASH(*s1) && (!n_testargs ||
2603 // strspn(s1+1, "abcd...") == 1) && !s1[2]);` — IS_DASH covers
2604 // BOTH `-` and Dash (`\u{9b}`). The raw tokstr inside `[[ ... ]]`
2605 // carries Dash as a marker byte, so `starts_with('-')` alone
2606 // matches only ASCII dashes and misses every `-z`, `-d`, `-r`
2607 // etc. — every such cond emitted the AST-only `condition
2608 // expected` error from par_cond_double. Use IS_DASH and count
2609 // chars (Dash is a single code point) instead of bytes.
2610 let s1_chars: Vec<char> = s1.chars().collect();
2611 let dble = !s1_chars.is_empty()
2612 && IS_DASH(s1_chars[0])
2613 && s1_chars.len() == 2
2614 && "abcdefghknoprstuvwxzLONGS".contains(s1_chars[1]);
2615 if tok() != STRING_LEX {
2616 if !s1.is_empty() && tok() != LEXERR && (!dble || n_testargs != 0) {
2617 // c:2486-2497 — `if (n_testargs == 1)` block: under
2618 // POSIXBUILTINS-off, `[ -t ]` rewrites to `[ -t 1 ]`
2619 // (ksh behavior). The C gate is `unset(POSIXBUILTINS)
2620 // && check_cond(s1, "t")`. zshrs's parser has
2621 // n_testargs=0 (no testlex), so this rewrite path is
2622 // unreachable from zshrs's [[ ]] / [ ] entry points;
2623 // wired here as a marker for parity. When testlex is
2624 // ported the call below activates.
2625 if n_testargs == 1 && unset(POSIXBUILTINS) && check_cond(&s1, "t") {
2626 condlex();
2627 return par_cond_double(&s1, "1");
2628 }
2629 // c:2557 — `[[ STRING ]]` re-interpreted as `[[ -n STRING ]]`.
2630 condlex();
2631 while COND_SEP() {
2632 condlex();
2633 }
2634 return par_cond_double("-n", &s1);
2635 }
2636 yyerror("condition expected");
2637 return 0;
2638 }
2639 condlex();
2640 while COND_SEP() {
2641 condlex();
2642 }
2643 if tok() == INANG_TOK || tok() == OUTANG_TOK {
2644 // c:2576 — `<` / `>` string compare.
2645 let xtok = tok();
2646 condlex();
2647 while COND_SEP() {
2648 condlex();
2649 }
2650 if tok() != STRING_LEX {
2651 yyerror("string expected");
2652 return 0;
2653 }
2654 let s3 = tokstr().unwrap_or_default();
2655 condlex();
2656 while COND_SEP() {
2657 condlex();
2658 }
2659 let op = if xtok == INANG_TOK {
2660 COND_STRLT
2661 } else {
2662 COND_STRGTR
2663 };
2664 ecadd(WCB_COND(op as u32, 0));
2665 ecstr(&s1);
2666 ecstr(&s3);
2667 return 1;
2668 }
2669 if tok() != STRING_LEX {
2670 // c:2592 — only one operand seen → `[ -n s1 ]`.
2671 if tok() != LEXERR {
2672 if !dble || n_testargs != 0 {
2673 return par_cond_double("-n", &s1);
2674 }
2675 return par_cond_multi(&s1, &[]);
2676 }
2677 yyerror("syntax error");
2678 return 0;
2679 }
2680 let s2 = tokstr().unwrap_or_default();
2681 set_incond(incond() + 1);
2682 condlex();
2683 while COND_SEP() {
2684 condlex();
2685 }
2686 set_incond(incond() - 1);
2687 // c:Src/parse.c:2598-2600 — `if (!n_testargs) dble = (s2 &&
2688 // IS_DASH(*s2) && !s2[2]);` — RECOMPUTE dble based on s2 once
2689 // it's been read, so `[[ A -X B ]]` is treated as a 2-arg cond
2690 // `[ -X B ]` (par_cond_double) rather than a 3-arg triple. This
2691 // is what routes `[[ "" -a "x" ]]` to par_cond_double("", "-a")
2692 // → COND_ERROR "parse error: condition expected: ". Without
2693 // this, the original `dble` from s1 stayed false, the parser
2694 // grabbed s3 and built COND_MODI silently. parity bug #25.
2695 let s2_chars: Vec<char> = s2.chars().collect();
2696 let dble = !s2_chars.is_empty() && IS_DASH(s2_chars[0]) && s2_chars.len() == 2;
2697 if tok() == STRING_LEX && !dble {
2698 let s3 = tokstr().unwrap_or_default();
2699 condlex();
2700 while COND_SEP() {
2701 condlex();
2702 }
2703 if tok() == STRING_LEX {
2704 // c:2615 — n-ary `[ A op B C D ... ]`.
2705 let mut l: Vec<String> = vec![s2, s3];
2706 while tok() == STRING_LEX {
2707 l.push(tokstr().unwrap_or_default());
2708 condlex();
2709 while COND_SEP() {
2710 condlex();
2711 }
2712 }
2713 return par_cond_multi(&s1, &l);
2714 }
2715 return par_cond_triple(&s1, &s2, &s3);
2716 }
2717 par_cond_double(&s1, &s2)
2718}
2719
2720/// Port of `par_cond_double(char *a, char *b)` from `Src/parse.c:2626`.
2721/// Emits wordcode for unary cond `[ -X b ]` or modular `[ -mod b ]`.
2722pub fn par_cond_double(a: &str, b: &str) -> i32 {
2723 // c:2628 — `if (!IS_DASH(a[0]) || !a[1])` — char-based, since
2724 // Dash is a single code point (`\u{9b}`) and `a.len() < 2` on
2725 // BYTES would still pass for "-z" but fail for the marker form
2726 // `\u{9b}z` (2 bytes). Walk by chars.
2727 let ac: Vec<char> = a.chars().collect();
2728 if ac.is_empty() || !IS_DASH(ac[0]) || ac.len() < 2 {
2729 // c:Src/parse.c:2629 COND_ERROR macro expansion:
2730 // zwarn(...); herrflush(); errflag |= ERRFLAG_ERROR;
2731 // YYERROR(ecused) /* sets tok = LEXERR */
2732 // The YYERROR portion is critical — without it the outer
2733 // parser keeps walking the wordcode and execution proceeds
2734 // (e.g. `[[ "" -a "x" ]] && echo m || echo n` runs the
2735 // `|| echo n` branch). Setting LEXERR aborts the upper
2736 // parse so the whole line is rejected, matching zsh's
2737 // observable behavior of stdout="" on parse error.
2738 zerr(&format!("parse error: condition expected: {}", a));
2739 errflag.fetch_or(crate::ported::zsh_h::ERRFLAG_ERROR, Ordering::SeqCst);
2740 set_tok(LEXERR);
2741 return 1;
2742 }
2743 // c:2630 — `else if (!a[2] && strspn(a+1, "abcd...zhLONGS") == 1)`
2744 let unary_set = "abcdefgknoprstuvwxzhLONGS";
2745 if ac.len() == 2 && unary_set.contains(ac[1]) {
2746 // c:2631 — `ecadd(WCB_COND(a[1], 0));` uses the raw cond-op
2747 // letter byte as the opcode payload. Use the ASCII char's
2748 // code-point value directly — every letter in `unary_set`
2749 // fits in 7 bits.
2750 ecadd(WCB_COND(ac[1] as u32, 0));
2751 ecstr(b);
2752 } else {
2753 ecadd(WCB_COND(COND_MOD as u32, 1));
2754 ecstr(a);
2755 ecstr(b);
2756 }
2757 1
2758}
2759
2760/// Port of `get_cond_num(char *tst)` from `Src/parse.c:2643`. Returns
2761/// the index of `tst` in `{"nt","ot","ef","eq","ne","lt","gt","le","ge"}`
2762/// or `-1` if not a recognized binary cond operator.
2763pub fn get_cond_num(tst: &str) -> i32 {
2764 // c:2643
2765 const CONDSTRS: [&str; 9] = [
2766 "nt", "ot", "ef", "eq", "ne", "lt", "gt", "le", "ge", // c:2647
2767 ];
2768 for (i, &c) in CONDSTRS.iter().enumerate() {
2769 if c == tst {
2770 return i as i32; // c:2654
2771 }
2772 }
2773 -1 // c:2656
2774}
2775
2776/// par_time's `static int inpartime` guard at C parse.c:1038
2777/// preventing infinite recursion on `time time foo`. The wordcode
2778/// path keeps this as a thread_local since C uses a function-level
2779/// `static int` (per-process; per-evaluator semantically matches).
2780thread_local! {
2781 static PARSER_INPARTIME: std::cell::Cell<bool> = const { std::cell::Cell::new(false) };
2782}
2783
2784/// Port of `par_cond_triple(char *a, char *b, char *c)` from
2785/// `Src/parse.c:2659`. Emits wordcode for the binary forms
2786/// `[ A op B ]` — `=` / `==` / `!=` / `<` / `>` / `=~` / `-X`.
2787///
2788/// C does `(b[0] == Equals || b[0] == '=')` etc., matching BOTH the
2789/// raw ASCII operator char AND its tokenized marker form per
2790/// `Src/zsh.h:159-194`:
2791/// Equals = `\u{8d}`, Outang = `\u{95}`, Inang = `\u{94}`,
2792/// Tilde = `\u{98}`, Bang = `\u{9c}`, Dash = `\u{9b}`.
2793/// Inside `[[ ... ]]` the lexer emits the marker bytes — comparing
2794/// against literal-only `b"=="` misses every cond op.
2795/// (The previous Rust port had the doc comment values wrong:
2796/// Outang=0x8e was actually Bar; Inang=0x91 was Inbrack;
2797/// Tilde=0x96 was OutangProc; Bang=0x8b was Outparmath. The code
2798/// itself uses the correct const names, so this was a docs-only fix.)
2799pub fn par_cond_triple(a: &str, b: &str, c: &str) -> i32 {
2800 // c:2659
2801 let bc: Vec<char> = b.chars().collect();
2802 let is_eq = |ch: char| ch == '=' || ch == Equals;
2803 let is_gt = |ch: char| ch == '>' || ch == Outang;
2804 let is_lt = |ch: char| ch == '<' || ch == Inang;
2805 let is_tilde = |ch: char| ch == '~' || ch == Tilde;
2806 let is_bang = |ch: char| ch == '!' || ch == Bang;
2807
2808 // c:2663 — `(b[0] == Equals || b[0] == '=') && !b[1]` → `=` (single).
2809 if bc.len() == 1 && is_eq(bc[0]) {
2810 ecadd(WCB_COND(COND_STREQ as u32, 0));
2811 ecstr(a);
2812 ecstr(c);
2813 let np = ECNPATS.with(|cc| {
2814 let v = cc.get();
2815 cc.set(v + 1);
2816 v
2817 }) as u32;
2818 ecadd(np);
2819 return 1;
2820 }
2821 // c:2668-2673 — `(t0 = b[0]=='>' || Outang) || b[0]=='<' || Inang`.
2822 if bc.len() == 1 && (is_gt(bc[0]) || is_lt(bc[0])) {
2823 let op = if is_gt(bc[0]) {
2824 COND_STRGTR
2825 } else {
2826 COND_STRLT
2827 };
2828 ecadd(WCB_COND(op as u32, 0));
2829 ecstr(a);
2830 ecstr(c);
2831 let np = ECNPATS.with(|cc| {
2832 let v = cc.get();
2833 cc.set(v + 1);
2834 v
2835 }) as u32;
2836 ecadd(np);
2837 return 1;
2838 }
2839 // c:2674-2679 — `==` STRDEQ.
2840 if bc.len() == 2 && is_eq(bc[0]) && is_eq(bc[1]) {
2841 ecadd(WCB_COND(COND_STRDEQ as u32, 0));
2842 ecstr(a);
2843 ecstr(c);
2844 let np = ECNPATS.with(|cc| {
2845 let v = cc.get();
2846 cc.set(v + 1);
2847 v
2848 }) as u32;
2849 ecadd(np);
2850 return 1;
2851 }
2852 // c:2680-2684 — `!=` STRNEQ.
2853 if bc.len() == 2 && is_bang(bc[0]) && is_eq(bc[1]) {
2854 ecadd(WCB_COND(COND_STRNEQ as u32, 0));
2855 ecstr(a);
2856 ecstr(c);
2857 let np = ECNPATS.with(|cc| {
2858 let v = cc.get();
2859 cc.set(v + 1);
2860 v
2861 }) as u32;
2862 ecadd(np);
2863 return 1;
2864 }
2865 // c:2685-2691 — `=~` REGEX (no pattern slot — implicit COND_MODI).
2866 if bc.len() == 2 && is_eq(bc[0]) && is_tilde(bc[1]) {
2867 ecadd(WCB_COND(COND_REGEX as u32, 0));
2868 ecstr(a);
2869 ecstr(c);
2870 return 1;
2871 }
2872 // c:2692-2702 — `-OP` numeric-or-modular cond (e.g. `-eq`, `-nt`).
2873 if !bc.is_empty() && IS_DASH(bc[0]) {
2874 let rest: String = bc[1..].iter().collect();
2875 let t = get_cond_num(&rest);
2876 if t > -1 {
2877 ecadd(WCB_COND((t + COND_NT) as u32, 0));
2878 ecstr(a);
2879 ecstr(c);
2880 return 1;
2881 }
2882 ecadd(WCB_COND(COND_MODI as u32, 0));
2883 ecstr(b);
2884 ecstr(a);
2885 ecstr(c);
2886 return 1;
2887 }
2888 // c:2703-2707 — `-mod A B C` modular cond on `a`.
2889 let ac: Vec<char> = a.chars().collect();
2890 if !ac.is_empty() && IS_DASH(ac[0]) && ac.len() > 1 {
2891 ecadd(WCB_COND(COND_MOD as u32, 2));
2892 ecstr(a);
2893 ecstr(b);
2894 ecstr(c);
2895 return 1;
2896 }
2897 zerr(&format!("condition expected: {}", b));
2898 1
2899}
2900
2901/// Port of `par_cond_multi(char *a, LinkList l)` from `Src/parse.c:2716`.
2902/// Emits wordcode for `[ -OP A B C … ]` n-ary cond (alternation).
2903pub fn par_cond_multi(a: &str, l: &[String]) -> i32 {
2904 // c:2716 — `if (!IS_DASH(a[0]) || !a[1])`; same Dash/`-` dual
2905 // matching as par_cond_double, char-walked because Dash is a
2906 // single code point.
2907 let ac: Vec<char> = a.chars().collect();
2908 if ac.is_empty() || !IS_DASH(ac[0]) || ac.len() < 2 {
2909 zerr(&format!("condition expected: {}", a));
2910 return 1;
2911 }
2912 ecadd(WCB_COND(COND_MOD as u32, l.len() as u32));
2913 ecstr(a);
2914 for item in l {
2915 ecstr(item);
2916 }
2917 1
2918}
2919
2920/// Emit a parser-level error. Direct port of zsh/Src/parse.c
2921/// 2733-2766 `yyerror`. C version fills a per-event error buffer
2922/// and sets errflag. zshrs pushes onto errors which the
2923/// caller drains via parse()'s Result return.
2924/// WARNING: param-name divergence — Rust takes `&str message`, C takes
2925/// `int noerr`. The Rust callers pass user-meaningful messages
2926/// (`"missing ]]"`, `"condition expected"`); the C body collects the
2927/// offending token via `dupstring(zshlextext)` for the error string.
2928/// This Rust adapter:
2929/// 1. Uses the caller-supplied message verbatim if non-empty.
2930/// 2. Skips the `histdone & HISTFLAG_NOEXEC` and `errflag & ERRFLAG_INT`
2931/// gates per c:2746 (printing only when neither is set) — the
2932/// ERRFLAG_INT check is the load-bearing guard.
2933/// 3. Sets ERRFLAG_ERROR per c:2753 (noerr=0 path always taken).
2934pub fn yyerror(msg: &str) {
2935 // c:2733
2936 let int_flagged = (errflag.load(Ordering::SeqCst) & crate::ported::zsh_h::ERRFLAG_INT) != 0;
2937 if !int_flagged {
2938 // c:2746
2939 let body = if msg.is_empty() {
2940 "parse error".to_string()
2941 }
2942 // c:2751
2943 else {
2944 format!("parse error: {msg}")
2945 }; // c:2748
2946 zwarnnam("zsh", &body);
2947 }
2948 // c:2753 — `if (!noerr && noerrs != 2) errflag |= ERRFLAG_ERROR;`
2949 errflag.fetch_or(crate::ported::zsh_h::ERRFLAG_ERROR, Ordering::SeqCst);
2950}
2951
2952// ============================================================
2953// Eprog runtime ops (parse.c:2767-2853)
2954//
2955// dupeprog / useeprog / freeeprog are zsh's reference-counting
2956// helpers for executable programs. zshrs's AST is owned by
2957// value (Rust ownership); cloning is a tree-deep copy via
2958// Clone, "use" is a no-op (the executor borrows the AST), and
2959// "free" is automatic on drop.
2960// ============================================================
2961
2962/// Duplicate an Eprog. Direct port of zsh/Src/parse.c:2813
2963/// Port of `Eprog dupeprog(Eprog p, int heap)` from
2964/// `Src/parse.c:2767`. Deep-copies the wordcode array, string
2965/// table, and pattern-prog slots. `dummy_eprog` is returned
2966/// unchanged. `heap`-allocated copies get `nref = -1` (never
2967/// freed); real ones get `nref = 1`.
2968pub fn dupeprog(p: &eprog, heap: bool) -> eprog {
2969 // c:2774-2775 — `if (p == &dummy_eprog) return p;` — caller-
2970 // observable identity in C uses a pointer compare; Rust's
2971 // equivalent is "if it has the dummy's shape (single WCB_END
2972 // word and no strs), return a copy of the same shape".
2973 // c:2796-2797 — `for (i = r->npats; i--; pp++) *pp = dummy_patprog1;`
2974 // C uses `dummy_patprog1` as a placeholder; the Rust port has
2975 // `Vec<Patprog>` (Box<patprog>) — synthesize an equivalent zero-
2976 // initialized patprog for each slot (resolved later by
2977 // pattern.c::patcompile-on-first-use).
2978 let dummy_pat = || crate::ported::zsh_h::patprog {
2979 startoff: 0,
2980 size: 0,
2981 mustoff: 0,
2982 patmlen: 0,
2983 globflags: 0,
2984 globend: 0,
2985 flags: 0,
2986 patnpar: 0,
2987 patstartch: 0,
2988 };
2989 let r = eprog {
2990 // c:2778 — `flags = (heap ? EF_HEAP : EF_REAL) | (p->flags & EF_RUN);`
2991 flags: (if heap { EF_HEAP } else { EF_REAL }) | (p.flags & EF_RUN),
2992 len: p.len,
2993 npats: p.npats,
2994 // c:2787 — `nref = heap ? -1 : 1;`
2995 nref: if heap { -1 } else { 1 },
2996 prog: p.prog.clone(),
2997 strs: p.strs.clone(),
2998 pats: (0..p.npats).map(|_| Box::new(dummy_pat())).collect(),
2999 shf: None,
3000 dump: None,
3001 };
3002 r
3003}
3004
3005/// Port of `void useeprog(Eprog p)` from `Src/parse.c:2813`.
3006/// `if (p && p != &dummy_eprog && p->nref >= 0) p->nref++;` —
3007/// pin a real (non-heap, non-dummy) Eprog so it survives the
3008/// next `freeeprog`.
3009pub fn useeprog(p: &mut eprog) {
3010 // c:2815 — `if (p && p != &dummy_eprog && p->nref >= 0)`
3011 if p.nref >= 0 {
3012 p.nref += 1; // c:2816
3013 }
3014}
3015
3016/// Port of `void freeeprog(Eprog p)` from `Src/parse.c:2823`.
3017/// Refcount-decrement; when it hits zero, drops the pattern progs,
3018/// decrements the dump refcount if any, and releases the eprog.
3019/// `dummy_eprog` is never freed. Heap-eprogs (`nref < 0`) are
3020/// never freed either — they live as long as the heap arena.
3021pub fn freeeprog(p: &mut eprog) {
3022 // c:2829 — `if (p && p != &dummy_eprog) { ... }`
3023 if p.nref > 0 {
3024 p.nref -= 1; // c:2832
3025 if p.nref == 0 {
3026 // c:2833-2840 — drop pats, dump refcount, then the eprog.
3027 // Rust's Drop handles the per-field cleanup; we just
3028 // need to decrement the dump count first.
3029 if let Some(dump) = p.dump.take() {
3030 let dumped = (*dump).clone();
3031 decrdumpcount(&dumped); // c:2837
3032 }
3033 p.prog.clear();
3034 p.strs = None;
3035 p.pats.clear();
3036 }
3037 }
3038}
3039
3040// =============================================================================
3041// Wordcode read helpers — used by text.rs's `gettext2` and exec dispatch
3042// to walk a compiled Eprog without re-running the parser. These are the
3043// only `Src/parse.c` functions ported so far in this file; the recursive-
3044// descent parser (par_event / par_list / par_cmd / par_*) follows
3045// below as free ported at module scope.
3046// =============================================================================
3047
3048/// Port of `ecgetstr(Estate s, int dup, int *tokflag)` from `Src/parse.c:2855`.
3049/// `s->pc` advances through the wordcode buffer; `s->strs` indexes the
3050/// string pool. Returns the interned string (or a 1-3-char literal
3051/// inlined directly into the wordcode word).
3052pub fn ecgetstr(s: &mut estate, dup: i32, tokflag: Option<&mut i32>) -> String {
3053 let prog = &s.prog.prog;
3054 if s.pc >= prog.len() {
3055 return String::new();
3056 }
3057 let c = prog[s.pc]; // c:2858 `wordcode c = *s->pc++;`
3058 s.pc += 1;
3059 if let Some(tf) = tokflag {
3060 *tf = i32::from((c & 1) != 0); // c:2880 `*tokflag = (c & 1);`
3061 }
3062 if c == 6 || c == 7 {
3063 // c:2861 `if (c == 6 || c == 7) r = "";`
3064 return String::new();
3065 }
3066 let r: String = if (c & 2) != 0 {
3067 // c:2862 — `else if (c & 2)`
3068 // c:2863-2868 — 3-byte inline string packed into the wordcode
3069 // word; followed by `buf[3] = '\0'; r = dupstring(buf);`.
3070 // C's `dupstring` uses `strlen(buf)` which TRUNCATES at the
3071 // first NUL byte — short strings of 1 or 2 chars get padded
3072 // with NULs and truncated cleanly. The previous Rust port
3073 // used `retain(|&x| x != 0)` which would silently SPLICE OUT
3074 // an interior NUL (e.g. `[a, 0, b]` → "ab"), diverging from
3075 // C's strlen-truncate (`[a, 0, b]` → "a"). Fix: truncate at
3076 // first NUL to match C exactly.
3077 let b0 = ((c >> 3) & 0xff) as u8;
3078 let b1 = ((c >> 11) & 0xff) as u8;
3079 let b2 = ((c >> 19) & 0xff) as u8;
3080 let v = [b0, b1, b2];
3081 let end = v.iter().position(|&x| x == 0).unwrap_or(v.len()); // c:2869 strlen(buf)
3082 String::from_utf8_lossy(&v[..end]).into_owned()
3083 } else {
3084 // c:2877 `else r = s->strs + (c >> 2);`
3085 let off = (c >> 2) as usize + s.strs_offset;
3086 let strs_bytes = s.strs.as_deref().unwrap_or("").as_bytes();
3087 if off >= strs_bytes.len() {
3088 String::new()
3089 } else {
3090 let tail = &strs_bytes[off..];
3091 let end = tail.iter().position(|&b| b == 0).unwrap_or(tail.len());
3092 String::from_utf8_lossy(&tail[..end]).into_owned()
3093 }
3094 };
3095 // c:2891 `return ((dup == EC_DUP || (dup && (c & 1))) ? dupstring(r) : r);`
3096 // Rust owns the String already; `dup` flag has no observable effect.
3097 let _ = (dup, EC_DUP, EC_NODUP);
3098 r
3099}
3100
3101// ============================================================
3102// Wordcode runtime getters (parse.c:2853-3060)
3103//
3104// Direct ports of the wordcode-read helpers (ecrawstr,
3105// ecgetstr, ecgetarr, ecgetredirs, ecgetlist, eccopyredirs).
3106// Read packed wordcode out of an Eprog at execution time.
3107// Used by exec_wordcode and the wordcode-walking dispatch in
3108// src/vm_helper.
3109// ============================================================
3110
3111/// Port of `ecrawstr(Eprog p, Wordcode pc, int *tokflag)` from
3112/// `Src/parse.c:2891`. Like `ecgetstr` but reads at the given pc
3113/// without advancing — caller steps `pc` separately.
3114pub fn ecrawstr(p: &eprog, pc: usize, tokflag: Option<&mut i32>) -> String {
3115 if pc >= p.prog.len() {
3116 return String::new();
3117 }
3118 let c = p.prog[pc]; // c:2894
3119 if let Some(tf) = tokflag {
3120 *tf = i32::from((c & 1) != 0); // c:2898/2906/2912
3121 }
3122 if c == 6 || c == 7 {
3123 // c:2897
3124 return String::new();
3125 }
3126 if (c & 2) != 0 {
3127 // c:2902-2906 — same 3-byte inline string as ecgetstr, then
3128 // `buf[3] = '\0'; return dupstring(buf);` — truncate at first
3129 // NUL via strlen (NOT splice out interior NULs).
3130 let b0 = ((c >> 3) & 0xff) as u8;
3131 let b1 = ((c >> 11) & 0xff) as u8;
3132 let b2 = ((c >> 19) & 0xff) as u8;
3133 let v = [b0, b1, b2];
3134 let end = v.iter().position(|&x| x == 0).unwrap_or(v.len()); // c:2906 strlen(buf)
3135 String::from_utf8_lossy(&v[..end]).into_owned()
3136 } else {
3137 // c:2911
3138 let off = (c >> 2) as usize;
3139 let strs_bytes = p.strs.as_deref().unwrap_or("").as_bytes();
3140 if off >= strs_bytes.len() {
3141 return String::new();
3142 }
3143 let tail = &strs_bytes[off..];
3144 let end = tail.iter().position(|&b| b == 0).unwrap_or(tail.len());
3145 String::from_utf8_lossy(&tail[..end]).into_owned()
3146 }
3147}
3148
3149/// Port of `ecgetarr(Estate s, int num, int dup, int *tokflag)` from
3150/// `Src/parse.c:2917`. Reads `num` strings from wordcode at `s->pc`
3151/// and OR-folds each entry's token flag into `*tokflag`.
3152pub fn ecgetarr(s: &mut estate, num: usize, dup: i32, tokflag: Option<&mut i32>) -> Vec<String> {
3153 let mut ret: Vec<String> = Vec::with_capacity(num); // c:2922
3154 let mut tf: i32 = 0;
3155 for _ in 0..num {
3156 // c:2924 `while (num--)`
3157 let mut tmp = 0;
3158 ret.push(ecgetstr(s, dup, Some(&mut tmp))); // c:2925
3159 tf |= tmp; // c:2926
3160 }
3161 if let Some(out) = tokflag {
3162 // c:2929
3163 *out = tf;
3164 }
3165 ret
3166}
3167
3168/// Port of `ecgetlist(Estate s, int num, int dup, int *tokflag)` from
3169/// `Src/parse.c:2937`. Same shape as `ecgetarr` but C returns
3170/// `LinkList`; zshrs uses `Vec<String>` for both.
3171pub fn ecgetlist(s: &mut estate, num: usize, dup: i32, tokflag: Option<&mut i32>) -> Vec<String> {
3172 if num == 0 {
3173 // c:2949-2952
3174 if let Some(tf) = tokflag {
3175 *tf = 0;
3176 }
3177 return Vec::new();
3178 }
3179 ecgetarr(s, num, dup, tokflag)
3180}
3181
3182/// Port of `ecgetredirs(Estate s)` from `Src/parse.c:2959`.
3183///
3184/// `strs` must be the same tail `ecgetstr` uses (`s->strs` / `estate.strs` from offset).
3185/// WARNING: param names don't match C — Rust=(prog, strs, pc) vs C=(s)
3186pub fn ecgetredirs(s: &mut estate) -> Vec<redir> {
3187 let mut ret: Vec<redir> = Vec::new(); // c:2959 `LinkList ret = newlinklist();`
3188 let prog_len = s.prog.prog.len();
3189 if s.pc >= prog_len {
3190 return ret;
3191 }
3192 let mut code = s.prog.prog[s.pc]; // c:2962 `wordcode code = *s->pc++;`
3193 s.pc += 1;
3194
3195 loop {
3196 if wc_code(code) != WC_REDIR {
3197 // c:2988-2989 `s->pc--` then break from while
3198 s.pc = s.pc.saturating_sub(1);
3199 break;
3200 }
3201
3202 let typ = WC_REDIR_TYPE(code); // c:2967 `r->type = WC_REDIR_TYPE(code);`
3203 if s.pc >= prog_len {
3204 break;
3205 }
3206 let fd1_w = s.prog.prog[s.pc]; // c:2968 `r->fd1 = *s->pc++;`
3207 s.pc += 1;
3208
3209 let name = ecgetstr(s, EC_DUP, None); // c:2969 `r->name = ecgetstr(...)`
3210
3211 let (flags, here_terminator, munged_here_terminator) = if WC_REDIR_FROM_HEREDOC(code) != 0 {
3212 // c:2970-2973
3213 let term = ecgetstr(s, EC_DUP, None);
3214 let munged = ecgetstr(s, EC_DUP, None);
3215 (REDIRF_FROM_HEREDOC, Some(term), Some(munged))
3216 } else {
3217 // c:2974-2977
3218 (0, None, None)
3219 };
3220
3221 let varid = if WC_REDIR_VARID(code) != 0 {
3222 // c:2979-2980
3223 Some(ecgetstr(s, EC_DUP, None))
3224 } else {
3225 None // c:2981-2982
3226 };
3227
3228 ret.push(redir {
3229 // c:2965-2982 fields + c:2984 `addlinknode`
3230 typ,
3231 flags,
3232 fd1: fd1_w as i32,
3233 fd2: 0,
3234 name: Some(name),
3235 varid,
3236 here_terminator,
3237 munged_here_terminator,
3238 });
3239
3240 if s.pc >= prog_len {
3241 break;
3242 }
3243 code = s.prog.prog[s.pc]; // c:2986 `code = *s->pc++;`
3244 s.pc += 1;
3245 }
3246
3247 ret // c:2990 `return ret`
3248}
3249
3250/// Port of `eccopyredirs(Estate s)` from `Src/parse.c:3003`. Reads
3251/// the WC_REDIR run at `s->pc`, counts the wordcodes needed,
3252/// reserves space in `ecbuf` via `ecispace`, then re-walks `s->pc`
3253/// re-emitting each redir's wordcodes into the reserved slot —
3254/// finally calls `bld_eprog(0)` to package the result as an Eprog.
3255pub fn eccopyredirs(s: &mut estate) -> Option<eprog> {
3256 let prog_len = s.prog.prog.len();
3257 if s.pc >= prog_len {
3258 return None;
3259 }
3260 // c:3007-3009 — `if (wc_code(*pc) != WC_REDIR) return NULL;`
3261 let first_code = s.prog.prog[s.pc];
3262 if wc_code(first_code) != WC_REDIR {
3263 return None;
3264 }
3265 // c:3011 — `init_parse();`
3266 init_parse();
3267
3268 // c:3013-3027 — count wordcodes the redir run will need.
3269 // Each WC_REDIR contributes `code + fd1 + name` = 3, plus
3270 // `+2` if WC_REDIR_FROM_HEREDOC (terminator + munged), plus
3271 // `+1` if WC_REDIR_VARID.
3272 let mut probe = s.pc;
3273 let mut ncodes = 0usize;
3274 loop {
3275 if probe >= prog_len {
3276 break;
3277 }
3278 let code = s.prog.prog[probe];
3279 if wc_code(code) != WC_REDIR {
3280 break;
3281 }
3282 let mut ncode = if WC_REDIR_FROM_HEREDOC(code) != 0 {
3283 5
3284 } else {
3285 3
3286 };
3287 if WC_REDIR_VARID(code) != 0 {
3288 ncode += 1;
3289 }
3290 probe += ncode;
3291 ncodes += ncode;
3292 }
3293
3294 // c:3028-3029 — `r = ecused; ecispace(r, ncodes);`
3295 let r0 = ECUSED.get() as usize;
3296 ecispace(r0, ncodes);
3297
3298 // c:3031-3053 — re-walk `s->pc` and write into ecbuf[r..].
3299 let mut r = r0;
3300 loop {
3301 if s.pc >= prog_len {
3302 break;
3303 }
3304 let code = s.prog.prog[s.pc];
3305 if wc_code(code) != WC_REDIR {
3306 break;
3307 }
3308 s.pc += 1;
3309 // c:3036 — `ecbuf[r++] = code;`
3310 ECBUF.with_borrow_mut(|buf| {
3311 if r >= buf.len() {
3312 buf.resize(r + 1, 0);
3313 }
3314 buf[r] = code;
3315 });
3316 r += 1;
3317 // c:3038 — `ecbuf[r++] = *s->pc++;` (the fd1 word)
3318 let fd1 = s.prog.prog[s.pc];
3319 s.pc += 1;
3320 ECBUF.with_borrow_mut(|buf| {
3321 if r >= buf.len() {
3322 buf.resize(r + 1, 0);
3323 }
3324 buf[r] = fd1;
3325 });
3326 r += 1;
3327 // c:3041 — `ecbuf[r++] = ecstrcode(ecgetstr(s, EC_NODUP, NULL));`
3328 let name = ecgetstr(s, EC_NODUP, None);
3329 let nc = ecstrcode(&name);
3330 ECBUF.with_borrow_mut(|buf| {
3331 if r >= buf.len() {
3332 buf.resize(r + 1, 0);
3333 }
3334 buf[r] = nc;
3335 });
3336 r += 1;
3337 // c:3042-3047 — heredoc terminators.
3338 if WC_REDIR_FROM_HEREDOC(code) != 0 {
3339 let term = ecgetstr(s, EC_NODUP, None);
3340 let tc = ecstrcode(&term);
3341 ECBUF.with_borrow_mut(|buf| {
3342 if r >= buf.len() {
3343 buf.resize(r + 1, 0);
3344 }
3345 buf[r] = tc;
3346 });
3347 r += 1;
3348 let munged = ecgetstr(s, EC_NODUP, None);
3349 let mc = ecstrcode(&munged);
3350 ECBUF.with_borrow_mut(|buf| {
3351 if r >= buf.len() {
3352 buf.resize(r + 1, 0);
3353 }
3354 buf[r] = mc;
3355 });
3356 r += 1;
3357 }
3358 // c:3048-3049 — varid.
3359 if WC_REDIR_VARID(code) != 0 {
3360 let varid = ecgetstr(s, EC_NODUP, None);
3361 let vc = ecstrcode(&varid);
3362 ECBUF.with_borrow_mut(|buf| {
3363 if r >= buf.len() {
3364 buf.resize(r + 1, 0);
3365 }
3366 buf[r] = vc;
3367 });
3368 r += 1;
3369 }
3370 }
3371
3372 // c:3056 — `return bld_eprog(0);` — `bld_eprog` appends the
3373 // WC_END marker and packages ECBUF/ECSTRS into an Eprog.
3374 Some(bld_eprog(false))
3375}
3376
3377/// Port of `init_eprog(void)` from `Src/parse.c:3069`. Sets up
3378/// `dummy_eprog_code = WCB_END(); dummy_eprog.len = sizeof(wordcode);
3379/// dummy_eprog.prog = &dummy_eprog_code; dummy_eprog.strs = NULL;`.
3380/// Called once at shell startup (init_main → init_misc → init_eprog).
3381pub fn init_eprog() {
3382 let mut d = DUMMY_EPROG.lock().unwrap();
3383 d.prog = vec![WCB_END()]; // c:3071/3073
3384 d.len = size_of::<wordcode>() as i32; // c:3072
3385 d.strs = None; // c:3074
3386 d.flags = 0;
3387 d.npats = 0;
3388 d.nref = 0;
3389}
3390
3391// =====================================================================
3392// `bin_zcompile` and wordcode-dump helpers — port of `Src/parse.c:3104+`.
3393//
3394// The wordcode dump format (`.zwc`) is a serialized parse tree zsh can
3395// `mmap()` and dispatch from without re-parsing on every shell start.
3396// File layout (one struct = `FD_PRELEN` `u32`s):
3397// - `pre[0]` = magic word (FD_MAGIC native byte-order, FD_OMAGIC
3398// opposite byte-order).
3399// - `pre[1]` = packed `{flags(8) | other_offset(24)}` byte field.
3400// - `pre[2..12]` = `ZSH_VERSION` C-string padded to 40 bytes.
3401// - `pre[12]` = `fdheaderlen` (total prelude+header word count).
3402// - Then a sequence of `struct fdhead` records, one per function,
3403// each followed by its NUL-terminated name (padded to 4-byte).
3404// - Then the wordcode bytes for every function back-to-back.
3405//
3406// On a little-endian host writing a dump twice: first `FD_MAGIC` for
3407// native readers, then re-walks the body byte-swapped and emits a
3408// second `FD_OMAGIC` copy so big-endian readers can mmap it too.
3409// =====================================================================
3410
3411// File-format constants — port of `Src/parse.c:3104-3150`.
3412
3413/// `#define FD_EXT ".zwc"` from `Src/parse.c:3104`.
3414pub const FD_EXT: &str = ".zwc";
3415
3416/// `#define FD_MINMAP 4096` from `Src/parse.c:3105`. mmap threshold
3417/// — `-M` mode only kicks in when the wordcode body is at least
3418/// this many bytes (otherwise read(2) is preferred).
3419pub const FD_MINMAP: usize = 4096;
3420
3421/// `#define FD_PRELEN 12` from `Src/parse.c:3107`. File-header
3422/// length in u32 words: magic + packed-flags-byte + 10 version words.
3423pub const FD_PRELEN: usize = 12;
3424
3425/// `#define FD_MAGIC 0x04050607` from `Src/parse.c:3108`. Sentinel
3426/// for native-byte-order dumps.
3427pub const FD_MAGIC: u32 = 0x04050607;
3428
3429/// `#define FD_OMAGIC 0x07060504` from `Src/parse.c:3109`. Sentinel
3430/// for opposite-byte-order dumps (byte-swapped FD_MAGIC).
3431pub const FD_OMAGIC: u32 = 0x07060504;
3432
3433/// `#define FDF_MAP 1` from `Src/parse.c:3111`. Bit set when the
3434/// dump should be `mmap()`-ed (`-M` flag) vs read normally (`-R`).
3435pub const FDF_MAP: u32 = 1;
3436
3437/// `#define FDF_OTHER 2` from `Src/parse.c:3112`. Bit indicating
3438/// this dump has an opposite-byte-order copy at `fdother(f)`.
3439pub const FDF_OTHER: u32 = 2;
3440
3441/// Port of `struct fdhead` from `Src/parse.c:3116`. One per function
3442/// inside a wordcode dump. All fields are `wordcode` (u32).
3443#[allow(non_camel_case_types)]
3444#[derive(Debug, Clone, Copy)]
3445pub struct fdhead {
3446 /// Offset (in u32 words) to the start of this function's
3447 /// wordcode body inside the dump.
3448 pub start: u32, // c:3117
3449 /// Wordcode-byte length of the body (excludes pattern-prog slots).
3450 pub len: u32, // c:3118
3451 /// Number of compiled patterns the body references.
3452 pub npats: u32, // c:3119
3453 /// Offset of the string table inside `prog->prog`.
3454 pub strs: u32, // c:3120
3455 /// Header-record length in u32 words (record + name).
3456 pub hlen: u32, // c:3121
3457 /// Packed `{ kshload_bits(2) | name_tail_offset(30) }` field.
3458 pub flags: u32, // c:3122
3459}
3460
3461/// `#define FDHF_KSHLOAD 1` from `Src/parse.c:3149`. Function-header
3462/// flag word — `-k` ksh-style autoload marker.
3463pub const FDHF_KSHLOAD: u32 = 1;
3464
3465/// `#define FDHF_ZSHLOAD 2` from `Src/parse.c:3150`. `-z` zsh-style
3466/// autoload marker.
3467pub const FDHF_ZSHLOAD: u32 = 2;
3468
3469/// Port of `struct wcfunc` from `Src/parse.c:3158`. Build-time
3470/// per-function aggregate before write_dump emits it. The Rust
3471/// port stores the source-text body inline since the C-side
3472/// `Eprog` ↔ `parse_string` chain isn't fully wired through this
3473/// layer yet (`build_dump` falls back to source-text caching).
3474#[allow(non_camel_case_types)]
3475#[derive(Debug, Clone)]
3476pub struct wcfunc {
3477 pub name: String, // c:3159
3478 pub flags: u32, // c:3161
3479 /// Compiled body wordcode (one `u32` array per fn). Empty until
3480 /// the eprog emit-side lands; `write_dump` then walks each entry.
3481 pub body: Vec<u32>,
3482}
3483
3484/// Port of `dump_find_func(Wordcode h, char *name)` from
3485/// `Src/parse.c:3167`. Walks the header table inside a loaded
3486/// dump for a function with the given basename; returns true on hit.
3487pub fn dump_find_func(h: &[u32], name: &str) -> bool {
3488 // c:3167
3489 let header_words = fdheaderlen(h) as usize;
3490 let end = header_words; // walking u32 offsets, end-exclusive
3491 let mut cur = firstfdhead_offset();
3492 while cur < end {
3493 if let Some(fh) = read_fdhead(h, cur) {
3494 let full = fdname(h, cur);
3495 let tail = fdhtail(&fh) as usize;
3496 let basename = if tail <= full.len() {
3497 &full[tail..]
3498 } else {
3499 ""
3500 };
3501 if basename == name {
3502 return true;
3503 }
3504 cur = nextfdhead_offset(h, cur);
3505 } else {
3506 break;
3507 }
3508 }
3509 false
3510}
3511
3512/// Port of `bin_zcompile(char *nam, char **args, Options ops, UNUSED(int func))`
3513/// from `Src/parse.c:3180`. Validates the option set, then dispatches
3514/// to one of: `-t` (test/list), `-c`/`-a` (dump current functions),
3515/// or the default (compile source files to `.zwc`).
3516pub fn bin_zcompile(
3517 nam: &str, // c:3180
3518 args: &[String],
3519 ops: &crate::ported::zsh_h::options,
3520 _func: i32,
3521) -> i32 {
3522 // c:3185-3192 — illegal-combination guard.
3523 if (OPT_ISSET(ops, b'k') && OPT_ISSET(ops, b'z'))
3524 || (OPT_ISSET(ops, b'R') && OPT_ISSET(ops, b'M'))
3525 || (OPT_ISSET(ops, b'c')
3526 && (OPT_ISSET(ops, b'U') || OPT_ISSET(ops, b'k') || OPT_ISSET(ops, b'z')))
3527 || (!(OPT_ISSET(ops, b'c') || OPT_ISSET(ops, b'a')) && OPT_ISSET(ops, b'm'))
3528 {
3529 zwarnnam(nam, "illegal combination of options"); // c:3192
3530 return 1;
3531 }
3532
3533 // c:3194 — `-c`/`-a` + KSHAUTOLOAD warning.
3534 if (OPT_ISSET(ops, b'c') || OPT_ISSET(ops, b'a')) && isset(crate::ported::zsh_h::KSHAUTOLOAD) {
3535 zwarnnam(nam, "functions will use zsh style autoloading"); // c:3195
3536 }
3537
3538 // c:3196-3197 — flag word from `-k` / `-z`.
3539 let flags: u32 = if OPT_ISSET(ops, b'k') {
3540 FDHF_KSHLOAD
3541 } else if OPT_ISSET(ops, b'z') {
3542 FDHF_ZSHLOAD
3543 } else {
3544 0
3545 };
3546
3547 // c:3199 — `-t` test/list mode.
3548 if OPT_ISSET(ops, b't') {
3549 // c:3199
3550 if args.is_empty() {
3551 zwarnnam(nam, "too few arguments"); // c:3202
3552 return 1;
3553 }
3554 let dump_name = if args[0].ends_with(FD_EXT) {
3555 args[0].clone()
3556 } else {
3557 format!("{}{}", args[0], FD_EXT)
3558 };
3559 let f = match load_dump_header(nam, &dump_name, 1) {
3560 // c:3206
3561 Some(buf) => buf,
3562 None => return 1,
3563 };
3564 // c:3209 — per-function check.
3565 if args.len() > 1 {
3566 for name in &args[1..] {
3567 // c:3210
3568 if !dump_find_func(&f, name) {
3569 // c:3212
3570 return 1;
3571 }
3572 }
3573 return 0;
3574 }
3575 // c:3215-3221 — listing arm. Walk every fdhead, print
3576 // each function's full name. C uses `fdname(h)` which
3577 // includes the path prefix; matches our `fdname()` impl.
3578 let mapped = if (fdflags(&f) & FDF_MAP) != 0 {
3579 "mapped"
3580 } else {
3581 "read"
3582 };
3583 println!("zwc file ({}) for zsh-{}", mapped, fdversion(&f));
3584 let header_words = fdheaderlen(&f) as usize;
3585 let mut cur = firstfdhead_offset();
3586 while cur < header_words {
3587 if read_fdhead(&f, cur).is_none() {
3588 break;
3589 }
3590 println!("{}", fdname(&f, cur));
3591 cur = nextfdhead_offset(&f, cur);
3592 }
3593 return 0;
3594 }
3595
3596 if args.is_empty() {
3597 zwarnnam(nam, "too few arguments"); // c:3226
3598 return 1;
3599 }
3600
3601 // c:3228 — map mode discriminant.
3602 let map: i32 = if OPT_ISSET(ops, b'M') {
3603 2
3604 } else if OPT_ISSET(ops, b'R') {
3605 0
3606 } else {
3607 1
3608 };
3609
3610 // c:3230-3236 — single-file default-mode short path.
3611 if args.len() == 1 && !(OPT_ISSET(ops, b'c') || OPT_ISSET(ops, b'a')) {
3612 let dump = format!("{}{}", args[0], FD_EXT);
3613 return build_dump(nam, &dump, args, OPT_ISSET(ops, b'U') as i32, map, flags);
3614 }
3615
3616 // c:3239-3247 — multi-file or `-c`/`-a` mode.
3617 let dump = if args[0].ends_with(FD_EXT) {
3618 args[0].clone()
3619 } else {
3620 format!("{}{}", args[0], FD_EXT)
3621 };
3622 let rest = &args[1..];
3623 if OPT_ISSET(ops, b'c') || OPT_ISSET(ops, b'a') {
3624 let what =
3625 (if OPT_ISSET(ops, b'c') { 1 } else { 0 }) | (if OPT_ISSET(ops, b'a') { 2 } else { 0 });
3626 build_cur_dump(nam, &dump, rest, OPT_ISSET(ops, b'm') as i32, map, what)
3627 } else {
3628 build_dump(nam, &dump, rest, OPT_ISSET(ops, b'U') as i32, map, flags)
3629 }
3630}
3631
3632/// Port of `load_dump_header(char *nam, char *name, int err)` from
3633/// `Src/parse.c:3258`. Opens the file, reads + validates the magic
3634/// and version, then slurps the full header table into memory.
3635/// Returns the header u32-array on success or None on any failure
3636/// (emitting C-shaped warnings when `err != 0`).
3637pub fn load_dump_header(nam: &str, name: &str, err: i32) -> Option<Vec<u32>> {
3638 // c:3258
3639
3640 let mut f = match File::open(name) {
3641 // c:3263
3642 Ok(h) => h,
3643 Err(_) => {
3644 if err != 0 {
3645 zwarnnam(nam, &format!("can't open zwc file: {}", name)); // c:3265
3646 }
3647 return None;
3648 }
3649 };
3650
3651 // Read FD_PRELEN+1 u32 words = 52 bytes.
3652 let mut buf_bytes = vec![0u8; (FD_PRELEN + 1) * 4];
3653 if f.read_exact(&mut buf_bytes).is_err() {
3654 if err != 0 {
3655 zwarnnam(nam, &format!("invalid zwc file: {}", name)); // c:3277
3656 }
3657 return None;
3658 }
3659 let mut buf: Vec<u32> = buf_bytes
3660 .chunks_exact(4)
3661 .map(|c| u32::from_le_bytes([c[0], c[1], c[2], c[3]]))
3662 .collect();
3663
3664 // c:3270 — magic + version check. `ZSH_VERSION` (the C-side
3665 // global) — zshrs reports "5.9" in `--zsh` mode (Src/init.c parity).
3666 let magic_ok = fdmagic(&buf) == FD_MAGIC || fdmagic(&buf) == FD_OMAGIC;
3667 let v_ok = fdversion(&buf) == "5.9";
3668 if !magic_ok {
3669 if err != 0 {
3670 zwarnnam(nam, &format!("invalid zwc file: {}", name)); // c:3277
3671 }
3672 return None;
3673 }
3674 if !v_ok {
3675 if err != 0 {
3676 zwarnnam(
3677 nam,
3678 &format!(
3679 "zwc file has wrong version (zsh-{}): {}", // c:3274
3680 fdversion(&buf),
3681 name
3682 ),
3683 );
3684 }
3685 return None;
3686 }
3687
3688 // c:3285 — if magic matches host byte order, head len is `pre[FD_PRELEN]`.
3689 // Else seek to `fdother(buf)` and re-read.
3690 if fdmagic(&buf) != FD_MAGIC {
3691 let other = fdother(&buf) as u64; // c:3290
3692 if f.seek(SeekFrom::Start(other)).is_err() || f.read_exact(&mut buf_bytes).is_err() {
3693 zwarnnam(nam, &format!("invalid zwc file: {}", name)); // c:3295
3694 return None;
3695 }
3696 buf = buf_bytes
3697 .chunks_exact(4)
3698 .map(|c| u32::from_le_bytes([c[0], c[1], c[2], c[3]]))
3699 .collect();
3700 }
3701
3702 let total_words = fdheaderlen(&buf) as usize; // c:3286/3299
3703 if total_words < FD_PRELEN + 1 {
3704 zwarnnam(nam, &format!("invalid zwc file: {}", name));
3705 return None;
3706 }
3707
3708 // Read the remaining header words.
3709 let mut head: Vec<u32> = Vec::with_capacity(total_words);
3710 head.extend_from_slice(&buf);
3711 let remaining_words = total_words - (FD_PRELEN + 1);
3712 if remaining_words > 0 {
3713 let mut rest_bytes = vec![0u8; remaining_words * 4]; // c:3305
3714 if f.read_exact(&mut rest_bytes).is_err() {
3715 zwarnnam(nam, &format!("invalid zwc file: {}", name)); // c:3307
3716 return None;
3717 }
3718 for c in rest_bytes.chunks_exact(4) {
3719 head.push(u32::from_le_bytes([c[0], c[1], c[2], c[3]]));
3720 }
3721 }
3722 Some(head) // c:3311
3723}
3724
3725/// Port of `fdswap(Wordcode p, int n)` from `Src/parse.c:3318`.
3726/// Byte-swap each u32 in `p[..n]` in place. Used when writing the
3727/// opposite-byte-order copy of a wordcode dump.
3728pub fn fdswap(p: &mut [u32]) {
3729 // c:3318
3730 for w in p.iter_mut() {
3731 *w = w.swap_bytes();
3732 }
3733}
3734
3735/// Port of `write_dump(int dfd, LinkList progs, int map, int hlen, int tlen)`
3736/// from `Src/parse.c:3334`. Writes the prelude + header records +
3737/// body wordcode bytes to the dump file descriptor.
3738///
3739/// Two passes: first native-byte-order (`FD_MAGIC`), then opposite-
3740/// byte-order (`FD_OMAGIC`) so big-endian readers can mmap the
3741/// same file. Bodies are byte-swapped via `fdswap` on the second pass.
3742pub fn write_dump(
3743 dfd: &mut File, // c:3334
3744 progs: &[wcfunc],
3745 mut map: i32,
3746 hlen: i32,
3747 tlen: i32,
3748) -> std::io::Result<()> {
3749 if map == 1 && (tlen as usize) >= FD_MINMAP {
3750 // c:3344
3751 map = 1;
3752 } else if map == 1 {
3753 map = 0;
3754 }
3755
3756 let mut other = 0u32; // c:3338
3757 let ohlen = hlen;
3758 let mut cur_hlen = hlen;
3759
3760 loop {
3761 cur_hlen = ohlen;
3762 // c:3347 — build the prelude.
3763 let mut pre = vec![0u32; FD_PRELEN];
3764 pre[0] = if other != 0 { FD_OMAGIC } else { FD_MAGIC }; // c:3350
3765 let flags = (if map != 0 { FDF_MAP } else { 0 }) | other;
3766 fdsetflags(&mut pre, flags as u8); // c:3351
3767 fdsetother(&mut pre, tlen as u32); // c:3352
3768 // c:3353 — copy ZSH_VERSION C-string into pre[2..].
3769 let ver = b"5.9";
3770 for (i, &b) in ver.iter().enumerate() {
3771 let word = 2 + i / 4;
3772 let shift = (i % 4) * 8;
3773 pre[word] |= (b as u32) << shift;
3774 }
3775 // Write prelude.
3776 for w in &pre {
3777 dfd.write_all(&w.to_le_bytes())?;
3778 }
3779 // c:3356 — per-fn header records.
3780 for wcf in progs {
3781 let n = &wcf.name;
3782 let prog = &wcf.body;
3783 let mut head = fdhead {
3784 start: cur_hlen as u32, // c:3360
3785 len: (prog.len() * 4) as u32, // c:3363
3786 npats: 0, // c:3364 (npats not tracked yet)
3787 strs: 0, // c:3365
3788 hlen: ((FDHEAD_WORDS as u32) + ((n.len() as u32 + 4) / 4)), // c:3366
3789 flags: 0,
3790 };
3791 cur_hlen += prog.len() as i32; // c:3361
3792 // c:3368 — name tail offset from path basename.
3793 let tail = n.rfind('/').map(|p| p + 1).unwrap_or(0);
3794 head.flags = fdhbldflags(wcf.flags, tail as u32); // c:3372
3795 // c:3373 — opposite-byte-order swap on second pass.
3796 let mut head_words: Vec<u32> = vec![
3797 head.start, head.len, head.npats, head.strs, head.hlen, head.flags,
3798 ];
3799 if other != 0 {
3800 fdswap(&mut head_words);
3801 }
3802 for w in &head_words {
3803 dfd.write_all(&w.to_le_bytes())?;
3804 }
3805 // c:3376 — write the name + NUL + pad-to-4.
3806 dfd.write_all(n.as_bytes())?;
3807 dfd.write_all(&[0u8])?;
3808 let pad = (4 - ((n.len() + 1) & 3)) & 3;
3809 if pad > 0 {
3810 dfd.write_all(&vec![0u8; pad])?;
3811 }
3812 }
3813 // c:3381 — per-fn body words.
3814 for wcf in progs {
3815 let mut body = wcf.body.clone();
3816 if other != 0 {
3817 fdswap(&mut body);
3818 }
3819 for w in &body {
3820 dfd.write_all(&w.to_le_bytes())?;
3821 }
3822 }
3823 if other != 0 {
3824 // c:3389
3825 break;
3826 }
3827 other = FDF_OTHER; // c:3391
3828 }
3829 Ok(())
3830}
3831
3832/// Port of `build_dump(char *nam, char *dump, char **files, int ali, int map, int flags)`
3833/// from `Src/parse.c:3397`. Source-file → wordcode dump compiler.
3834///
3835/// Status: scaffolded but the wordcode-emit step depends on
3836/// `parse_string` returning a fully-wired `Eprog` with `prog/strs/
3837/// npats` fields populated. The current `parse_string`/`parse` shape
3838/// emits an AST (`ZshProgram`) but not yet the wordcode array C
3839/// expects in this dump format. Until that lands, this returns 1
3840/// with a clear "wordcode emit not yet ported" message so callers
3841/// (autoload from `.zwc`, `zcompile path/to/file`) fail loud.
3842pub fn build_dump(
3843 nam: &str, // c:3397
3844 dump: &str,
3845 _files: &[String],
3846 _ali: i32,
3847 _map: i32,
3848 _flags: u32,
3849) -> i32 {
3850 zwarnnam(nam, &format!("{}: wordcode dump emit not yet ported", dump));
3851 1
3852}
3853
3854/// Port of `cur_add_func(char *nam, Shfunc shf, LinkList names, LinkList progs, int *hlen, int *tlen, int what)`
3855/// from `Src/parse.c:3489`. Adds a shfunc to the in-build dump
3856/// progs+names lists. Stub: `Eprog` for the function body isn't
3857/// yet wired through `shfunc.funcdef` to be serializable here.
3858pub fn cur_add_func(
3859 nam: &str, // c:3489
3860 shf_name: &str,
3861 shf_flags: i32,
3862 names: &mut Vec<String>,
3863 progs: &mut Vec<wcfunc>,
3864 hlen: &mut i32,
3865 tlen: &mut i32,
3866 what: i32,
3867) -> i32 {
3868 let is_undef = (shf_flags as u32 & PM_UNDEFINED) != 0;
3869 if is_undef {
3870 if (what & 2) == 0 {
3871 // c:3498
3872 zwarnnam(nam, &format!("function is not loaded: {}", shf_name));
3873 return 1;
3874 }
3875 // c:3503 — would call `getfpfunc` to load body for dump.
3876 zwarnnam(nam, &format!("can't load function: {}", shf_name));
3877 return 1;
3878 } else if (what & 1) == 0 {
3879 zwarnnam(nam, &format!("function is already loaded: {}", shf_name)); // c:3514
3880 return 1;
3881 }
3882 // c:3517 — would `dupeprog(shf->funcdef)`. Stub: empty body.
3883 let wcf = wcfunc {
3884 name: shf_name.to_string(),
3885 flags: FDHF_ZSHLOAD,
3886 body: Vec::new(),
3887 };
3888 progs.push(wcf);
3889 names.push(shf_name.to_string());
3890
3891 // c:3526 — bump hlen / tlen.
3892 let name_words = (shf_name.len() as i32 + 4) / 4;
3893 *hlen += (FDHEAD_WORDS as i32) + name_words;
3894 *tlen += 0; // body is empty in stub; real path adds prog->len in words.
3895
3896 0
3897}
3898
3899/// Port of `build_cur_dump(char *nam, char *dump, char **names, int match, int map, int what)`
3900/// from `Src/parse.c:3536`. Compiles currently-loaded functions
3901/// (`-c` for functions, `-a` for aliases) into a `.zwc` dump.
3902/// Same wordcode-emit dependency as `build_dump`.
3903pub fn build_cur_dump(
3904 nam: &str, // c:3536
3905 dump: &str,
3906 _names: &[String],
3907 _match_: i32,
3908 _map: i32,
3909 _what: i32,
3910) -> i32 {
3911 zwarnnam(
3912 nam,
3913 &format!("{}: wordcode dump-current emit not yet ported", dump),
3914 );
3915 1
3916}
3917
3918/// Port of `zwcstat(char *filename, struct stat *buf)` from
3919/// `Src/parse.c:3656`. Stats a `.zwc` file, falling back to
3920/// `.zwc.old` if the primary doesn't exist (zsh uses the `.old`
3921/// suffix to keep a previous dump readable while a rewrite is in
3922/// progress).
3923pub fn zwcstat(filename: &str) -> Option<fs::Metadata> {
3924 // c:3656
3925 if let Ok(m) = fs::metadata(filename) {
3926 return Some(m);
3927 }
3928 let old = format!("{}.old", filename);
3929 fs::metadata(&old).ok()
3930}
3931
3932/// Port of `load_dump_file(char *dump, struct stat *sbuf, int other, int len)`
3933/// from `Src/parse.c:3675`. Reads (or mmap()'s) a complete `.zwc`
3934/// file into memory. Returns the u32 buffer or None on I/O error.
3935pub fn load_dump_file(
3936 dump: &str, // c:3675
3937 _sbuf: &fs::Metadata,
3938 other: i32,
3939 _len: usize,
3940) -> Option<Vec<u32>> {
3941 let mut f = File::open(dump).ok()?;
3942 if other != 0 {
3943 f.seek(SeekFrom::Start(other as u64)).ok()?;
3944 }
3945 let mut bytes = Vec::new();
3946 f.read_to_end(&mut bytes).ok()?;
3947 Some(
3948 bytes
3949 .chunks_exact(4)
3950 .map(|c| u32::from_le_bytes([c[0], c[1], c[2], c[3]]))
3951 .collect(),
3952 )
3953}
3954
3955/// Port of `try_dump_file(char *path, char *name, char *file, int *ksh, int test_only)`
3956/// from `Src/parse.c:3746`. Tries to load function `name` from a
3957/// `.zwc` digest (`<path>.zwc`) or per-function compiled file
3958/// (`<file>.zwc`) when each is newer than its uncompiled source.
3959pub fn try_dump_file(
3960 path: &str,
3961 name: &str,
3962 file: &str, // c:3746
3963 test_only: bool,
3964) -> Option<(Vec<u32>, bool)> {
3965 use std::fs;
3966
3967 // c:3753-3758 — if path ends in .zwc, treat as direct digest.
3968 if path.ends_with(FD_EXT) {
3969 crate::ported::signals::queue_signals();
3970 let result = fs::metadata(path)
3971 .ok()
3972 .and_then(|m| check_dump_file(path, &m, name, test_only));
3973 unqueue_signals();
3974 return result;
3975 }
3976
3977 // c:3759-3760 — dig = "<path>.zwc", wc = "<file>.zwc".
3978 let dig = format!("{}{}", path, FD_EXT);
3979 let wc = format!("{}{}", file, FD_EXT);
3980
3981 // c:3762-3764 — zwcstat(dig, &std); stat(wc, &stc); stat(file, &stn);
3982 let std_meta = fs::metadata(&dig);
3983 let stc_meta = fs::metadata(&wc);
3984 let stn_meta = fs::metadata(file);
3985
3986 crate::ported::signals::queue_signals();
3987
3988 // c:3771-3777 — try digest if newer than (or in absence of) wc/file.
3989 if let Ok(std_m) = &std_meta {
3990 let dig_mtime = std_m.modified().ok();
3991 let wc_newer_or_missing = match &stc_meta {
3992 Err(_) => true,
3993 Ok(c) => dig_mtime >= c.modified().ok(),
3994 };
3995 let src_newer_or_missing = match &stn_meta {
3996 Err(_) => true,
3997 Ok(n) => dig_mtime >= n.modified().ok(),
3998 };
3999 if wc_newer_or_missing && src_newer_or_missing {
4000 if let Some(prog) = check_dump_file(&dig, std_m, name, test_only) {
4001 unqueue_signals();
4002 return Some(prog);
4003 }
4004 }
4005 }
4006
4007 // c:3779-3784 — try per-function .zwc if newer than (or in absence of) source.
4008 if let Ok(stc_m) = &stc_meta {
4009 let wc_mtime = stc_m.modified().ok();
4010 let src_newer_or_missing = match &stn_meta {
4011 Err(_) => true,
4012 Ok(n) => wc_mtime >= n.modified().ok(),
4013 };
4014 if src_newer_or_missing {
4015 if let Some(prog) = check_dump_file(&wc, stc_m, name, test_only) {
4016 unqueue_signals();
4017 return Some(prog);
4018 }
4019 }
4020 }
4021
4022 unqueue_signals(); // c:3787
4023 None // c:3788
4024}
4025
4026/// Port of `try_source_file(char *file)` from `Src/parse.c:3795`.
4027/// Returns an Eprog (the wordcode dump body) if `<file>.zwc` exists
4028/// and is newer than `<file>`, else None.
4029pub fn try_source_file(file: &str) -> Option<String> {
4030 // c:3795
4031
4032 // c:3802-3805 — if ((tail = strrchr(file, '/'))) tail++; else tail = file;
4033 let tail = match file.rfind('/') {
4034 Some(i) => &file[i + 1..],
4035 None => file,
4036 };
4037
4038 // c:3807-3812 — if (strsfx(FD_EXT, file)) { ... return check_dump_file(file, NULL, tail, NULL, 0); }
4039 if file.ends_with(FD_EXT) {
4040 crate::ported::signals::queue_signals(); // c:3808
4041 let meta = fs::metadata(file);
4042 let prog = match meta {
4043 Ok(m) => check_dump_file(file, &m, tail, false).map(|(_, _)| file.to_string()), // c:3809
4044 Err(_) => None,
4045 };
4046 unqueue_signals(); // c:3810
4047 return prog;
4048 }
4049
4050 // c:3813 — wc = dyncat(file, FD_EXT);
4051 let wc = format!("{}{}", file, FD_EXT);
4052
4053 // c:3815-3816 — rc = stat(wc, &stc); rn = stat(file, &stn);
4054 let stc = fs::metadata(&wc);
4055 let stn = fs::metadata(file);
4056
4057 crate::ported::signals::queue_signals(); // c:3818
4058 // c:3819-3823 — if (!rc && (rn || stc.st_mtime >= stn.st_mtime) && (prog = check_dump_file(...))) return prog;
4059 if let Ok(meta_c) = &stc {
4060 let newer_than_src = match (&stc, &stn) {
4061 (Ok(c), Ok(n)) => c.modified().ok() >= n.modified().ok(),
4062 (Ok(_), Err(_)) => true, // c:3819 — `rn` (src missing) ⇒ accept .zwc
4063 _ => false,
4064 };
4065 if newer_than_src {
4066 let prog = check_dump_file(&wc, meta_c, tail, false); // c:3820
4067 if prog.is_some() {
4068 unqueue_signals(); // c:3821
4069 return Some(wc); // c:3822
4070 }
4071 }
4072 }
4073 unqueue_signals(); // c:3824
4074 None // c:3825
4075}
4076
4077/// Port of `Eprog check_dump_file(char *file, struct stat *sbuf,
4078/// char *name, int *ksh, int test_only)` from `Src/parse.c:3833`.
4079/// Walks the `dumps` mmap list looking for `(dev, ino)` matching
4080/// `sbuf`; on miss, calls `load_dump_header` to read the .zwc
4081/// header. Then `dump_find_func(d, name)` locates the function
4082/// table entry. Returns the wordcode slice + ksh-load flag.
4083///
4084/// ```c
4085/// Eprog
4086/// check_dump_file(char *file, struct stat *sbuf, char *name,
4087/// int *ksh, int test_only)
4088/// {
4089/// int isrec = 0;
4090/// Wordcode d;
4091/// FDHead h;
4092/// FuncDump f;
4093/// struct stat lsbuf;
4094/// if (!sbuf) {
4095/// if (zwcstat(file, &lsbuf)) return NULL;
4096/// sbuf = &lsbuf;
4097/// }
4098/// rec:
4099/// d = NULL;
4100/// for (f = dumps; f; f = f->next)
4101/// if (f->dev == sbuf->st_dev && f->ino == sbuf->st_ino)
4102/// { d = f->map; break; }
4103/// if (!f && (isrec || !(d = load_dump_header(NULL, file, 0))))
4104/// return NULL;
4105/// if ((h = dump_find_func(d, name))) {
4106/// if (test_only) return &dummy_eprog;
4107/// /* allocate Eprog from f->map at h offset, incrdumpcount,
4108/// return prog */
4109/// }
4110/// return NULL;
4111/// }
4112/// ```
4113/// Rust port returns `Option<(Vec<u32>, bool)>` instead of the C
4114/// `Eprog` pointer + `*ksh` out-param: tuple element 0 is the
4115/// wordcode slice, element 1 is true if the function was a ksh-
4116/// loaded entry.
4117pub fn check_dump_file(
4118 // c:3833
4119 file: &str,
4120 sbuf: &fs::Metadata,
4121 name: &str,
4122 test_only: bool,
4123) -> Option<(Vec<u32>, bool)> {
4124 use std::os::unix::fs::MetadataExt;
4125
4126 // c:3842-3846 — `if (!sbuf) { zwcstat(file, &lsbuf); sbuf = &lsbuf; }`
4127 // Rust takes sbuf by &Metadata — never null.
4128 let dev = sbuf.dev(); // c:3859
4129 let ino = sbuf.ino(); // c:3859
4130
4131 // c:3854 — `d = NULL;`
4132 let mut d: Option<Vec<u32>> = None;
4133 let mut found_mmap = false; // c:3858 `for (f = dumps; f; ...)`
4134
4135 // c:3858-3862 — walk DUMPS for matching dev/ino.
4136 {
4137 let dumps_guard = DUMPS.lock().expect("dumps poisoned");
4138 for f in dumps_guard.iter() {
4139 // c:3858
4140 if f.dev == dev && f.ino == ino {
4141 // c:3859
4142 d = Some(f.map.clone()); // c:3860
4143 found_mmap = true;
4144 break; // c:3861
4145 }
4146 }
4147 }
4148
4149 // c:3870-3871 — `if (!f && (isrec || !(d = load_dump_header(NULL, file, 0)))) return NULL;`
4150 if !found_mmap {
4151 // c:3870
4152 match load_dump_header("", file, 0) {
4153 // c:3870 load_dump_header
4154 Some(loaded) => d = Some(loaded),
4155 None => return None, // c:3871
4156 }
4157 }
4158
4159 // c:3873 — `if ((h = dump_find_func(d, name)))`
4160 let dump = d?;
4161 if !dump_find_func(&dump, name) {
4162 // c:3873
4163 return None;
4164 }
4165
4166 // c:3876-3879 — `if (test_only) return &dummy_eprog;`
4167 if test_only {
4168 // c:3876
4169 return Some((Vec::new(), false)); // c:3879 dummy
4170 }
4171
4172 // c:3884-3953 — allocate Eprog from the mmap area + ksh detection.
4173 // The C source builds an `Eprog` struct wrapping the wordcode
4174 // slice at h's offset; the Rust port returns the slice directly
4175 // since Eprog construction lives at the call site (load_dump_file).
4176 // ksh-load detection reads the FDHF_KSHLOAD flag on the FDHead.
4177 // !!! STUB: FDHead parsing not yet wired through dump_find_func.
4178 let is_ksh_load = false; // c:3905 fdhflags(h) & FDHF_KSHLOAD
4179
4180 // c:3950 — incrdumpcount(f). The Rust incrdumpcount takes a
4181 // funcdump ref; look up the matching entry by dev/ino again.
4182 if found_mmap {
4183 let dumps_guard = DUMPS.lock().expect("dumps poisoned");
4184 if let Some(f) = dumps_guard.iter().find(|f| f.dev == dev && f.ino == ino) {
4185 incrdumpcount(f); // c:3899
4186 }
4187 }
4188
4189 Some((dump, is_ksh_load)) // c:3953
4190}
4191
4192/// Port of `incrdumpcount(FuncDump f)` from `Src/parse.c:3970/4021`.
4193/// `f->count++;` — refcount-up a loaded dump entry. The Rust port
4194/// keys lookup by `filename` because Rust can't raw-pointer-compare
4195/// funcdump values inside a `Mutex<Vec<...>>`; same observable
4196/// effect (the count of the matching entry increments).
4197pub fn incrdumpcount(f: &funcdump) {
4198 // c:3970 — `f->count++;`
4199 if let Some(d) = DUMPS
4200 .lock()
4201 .unwrap()
4202 .iter_mut()
4203 .find(|d| d.filename.as_deref() == f.filename.as_deref())
4204 {
4205 d.count += 1; // c:3973
4206 }
4207}
4208
4209/// Port of `freedump(FuncDump f)` from `Src/parse.c:3976`. Public
4210/// helper for the rare external caller; locks the dumps mutex and
4211/// drops the entry with the given filename.
4212pub fn freedump(f: &funcdump) {
4213 // c:3976
4214 let mut g = DUMPS.lock().unwrap();
4215 if let Some(name) = f.filename.as_deref() {
4216 freedump_locked(&mut g, name);
4217 }
4218}
4219
4220/// Port of `decrdumpcount(FuncDump f)` from `Src/parse.c:3988/4026`.
4221/// `f->count--; if (!f->count) { unlink from dumps; freedump(f); }`.
4222pub fn decrdumpcount(f: &funcdump) {
4223 // c:3988
4224 let key = f.filename.clone();
4225 let mut g = DUMPS.lock().unwrap();
4226 let mut hit_zero: Option<String> = None;
4227 for d in g.iter_mut() {
4228 if d.filename == key {
4229 d.count -= 1; // c:3991
4230 if d.count == 0 {
4231 // c:3992
4232 hit_zero = d.filename.clone();
4233 }
4234 break;
4235 }
4236 }
4237 if let Some(name) = hit_zero {
4238 // c:3994-4001
4239 freedump_locked(&mut g, &name);
4240 }
4241}
4242
4243/// Port of `closedumps(void)` from `Src/parse.c:4008/4033`. Walks
4244/// `dumps` freeing every entry. Called on shell exit (exec.c:522).
4245pub fn closedumps() {
4246 // c:4008
4247 let mut g = DUMPS.lock().unwrap();
4248 g.clear(); // c:4011-4014 `while (dumps) { ... freedump(...); ... }`
4249}
4250
4251/// Port of `dump_autoload(char *nam, char *file, int on, Options ops, int func)`
4252/// from `Src/parse.c:4042`. Registers every function in a `.zwc`
4253/// for autoload via `shfunctab`.
4254pub fn dump_autoload(
4255 nam: &str,
4256 file: &str, // c:4042
4257 on: i32,
4258 ops: &crate::ported::zsh_h::options,
4259 func: i32,
4260) -> i32 {
4261 use crate::ported::zsh_h::shfunc;
4262 let mut ret = 0; // c:4047
4263
4264 // c:4049-4050 — if (!strsfx(FD_EXT, file)) file = dyncat(file, FD_EXT);
4265 let file_owned;
4266 let file = if !file.ends_with(FD_EXT) {
4267 file_owned = format!("{}{}", file, FD_EXT);
4268 file_owned.as_str()
4269 } else {
4270 file
4271 };
4272
4273 // c:4052-4053 — if (!(h = load_dump_header(nam, file, 1))) return 1;
4274 let h = match load_dump_header(nam, file, 1) {
4275 Some(buf) => buf,
4276 None => return 1,
4277 };
4278
4279 // c:4055-4056 — for (n = firstfdhead(h); n < e; n = nextfdhead(n))
4280 let hlen = fdheaderlen(&h) as usize; // c:4055
4281 let mut n_off = firstfdhead_offset();
4282 while n_off < hlen {
4283 let head = match read_fdhead(&h, n_off) {
4284 Some(hd) => hd,
4285 None => break,
4286 };
4287 // c:4057-4061 — shf = zshcalloc; shf->node.flags = on; ...addnode(fdname + fdhtail)
4288 let name_full = fdname(&h, n_off);
4289 let tail = fdhtail(&head) as usize;
4290 let basename: String = name_full.chars().skip(tail).collect();
4291 let mut shf = shfunc {
4292 node: crate::ported::zsh_h::hashnode {
4293 next: None,
4294 nam: basename.clone(),
4295 flags: on, // c:4058
4296 },
4297 filename: None,
4298 lineno: 0,
4299 funcdef: None,
4300 redir: None,
4301 sticky: None, // c:4060 NULL
4302 body: None,
4303 };
4304 // c:4059 — shf->funcdef = mkautofn(shf); (placeholder Eprog ptr)
4305 let _ = crate::ported::builtin::mkautofn(&mut shf as *mut _);
4306 // c:4061 — shfunctab->addnode(...)
4307 let snapshot = shf.clone();
4308 {
4309 let mut tab = crate::ported::hashtable::shfunctab_lock()
4310 .write()
4311 .expect("shfunctab poisoned");
4312 tab.add(shf);
4313 }
4314 // c:4062-4063 — if (OPT_ISSET(ops,'X') && eval_autoload(...)) ret = 1;
4315 if OPT_ISSET(ops, b'X') {
4316 let mut shf_ref = snapshot;
4317 if crate::ported::builtin::eval_autoload(&mut shf_ref as *mut _, &basename, ops, func)
4318 != 0
4319 {
4320 ret = 1;
4321 }
4322 }
4323 n_off = nextfdhead_offset(&h, n_off);
4324 }
4325 let _ = nam;
4326 ret // c:4065
4327}
4328
4329/// Port of C `struct eccstr` (zsh.h:836) — the long-string dedup BST
4330/// node. The dedup-walk and cmp logic in `ecstrcode` is faithful to
4331/// parse.c:447-453 including the conditional cmp chain
4332/// (nfunc → hashval → strcmp), so corpus inputs where C's eccstr BST walk
4333/// finds-or-misses match get the same outcome on the Rust side.
4334struct EccstrNode {
4335 left: Option<Box<EccstrNode>>,
4336 right: Option<Box<EccstrNode>>,
4337 /// C-byte form of the string (single byte per char ≤ 0xff).
4338 /// Owned because Rust doesn't have C zsh's "stable pointers into
4339 /// the lexer's tokstr arena" — every tokstr lives as a fresh
4340 /// Rust String allocation.
4341 str: Vec<u8>,
4342 /// Wordcode-encoded offset: `(byte_offset << 2) | token_bit`.
4343 /// Same shape as `Eccstr::offs` (parse.c:459).
4344 offs: u32,
4345 /// Absolute byte offset in the final strs region (= `ecsoffs` at
4346 /// insert time). C `Eccstr::aoffs` (parse.c:464). copy_ecstr uses
4347 /// THIS for the write position — distinct from `offs` which is
4348 /// ecssub-relative and collides across funcdef scopes.
4349 aoffs: u32,
4350 /// `nfunc` snapshot at insert time. Per-function namespace key
4351 /// — top-level scripts use 0; each funcdef bumps it.
4352 nfunc: i32,
4353 /// Hash of `str` computed via zsh's `hasher` (hashtable.c:86).
4354 hashval: u32,
4355}
4356// === end AST relocation ===
4357
4358// Parser state lives in file-scope thread_locals:
4359// - LEX_* (lexer side, matching Src/lex.c file-statics)
4360// - ECBUF / ECLEN / ECUSED / ECNPATS / ECSOFFS / ECSSUB / ECNFUNC /
4361// ECSTRS_INDEX / ECSTRS_REVERSE (wordcode-emission state, matching
4362// Src/parse.c file-statics)
4363//
4364// Callers use the free-fn entry points directly:
4365// crate::ported::parse::parse_init(input);
4366// let prog = crate::ported::parse::parse();
4367
4368const MAX_RECURSION_DEPTH: usize = 500;
4369
4370/// Direct port of `struct parse_stack` at `Src/zsh.h:3099-3109`.
4371/// Used by `parse_context_save` / `parse_context_restore`
4372/// (parse.c:295-355) to snapshot per-parse-call state so a nested
4373/// parse (e.g. inside command substitution) doesn't clobber the
4374/// outer parse.
4375///
4376/// A second port of `struct parse_stack` exists at
4377/// `crate::ported::zsh_h::parse_stack` (zsh.h:1066) using canonical
4378/// Wordcode / Eccstr / `struct heredocs` types — that port is unused
4379/// today and will become authoritative when Phase 9b (PORT_PLAN.md)
4380/// wires wordcode emission. This local version uses the working-set
4381/// shapes (`Vec<HereDoc>`, stubbed wordcode fields) suited to zshrs's
4382/// pre-wordcode AST architecture; the consolidation happens in P9b.
4383#[allow(non_camel_case_types)]
4384#[derive(Debug, Default, Clone)]
4385pub struct parse_stack {
4386 // ── Direct port of struct parse_stack at zsh.h:3099-3109 ──
4387 /// Pending heredocs awaiting body collection (canonical C
4388 /// linked-list shape). C: `struct heredocs *hdocs` (zsh.h:3100).
4389 /// Mirrors `parse::HDOCS` thread_local across nested parses.
4390 pub hdocs: Option<Box<crate::ported::zsh_h::heredocs>>,
4391 /// !!! WARNING: NOT IN PARSE_STACK — Rust-only AST-glue !!!
4392 /// Snapshot of `lex::LEX_HEREDOCS` (the parallel Rust-only Vec
4393 /// carrying terminator / strip_tabs / quoted metadata).
4394 /// Saved/restored alongside the canonical `hdocs` so nested
4395 /// parses get a clean AST view. C's parse_stack has no analog
4396 /// because C tracks terminator metadata implicitly via tokstr.
4397 pub lex_heredocs: Vec<HereDoc>,
4398 /// C: `int incmdpos` (zsh.h:3102).
4399 pub incmdpos: bool,
4400 /// C: `int aliasspaceflag` (zsh.h:3103).
4401 pub aliasspaceflag: i32,
4402 /// C: `int incond` (zsh.h:3104).
4403 pub incond: i32,
4404 /// C: `int inredir` (zsh.h:3105).
4405 pub inredir: bool,
4406 /// C: `int incasepat` (zsh.h:3106).
4407 pub incasepat: i32,
4408 /// C: `int isnewlin` (zsh.h:3107).
4409 pub isnewlin: i32,
4410 /// C: `int infor` (zsh.h:3108).
4411 pub infor: i32,
4412 /// C: `int inrepeat_` (zsh.h:3109).
4413 pub inrepeat_: i32,
4414 /// C: `int intypeset` (zsh.h:3110).
4415 pub intypeset: bool,
4416 // ── Wordcode-buffer state — STUB until Phase 9b ──
4417 // C `Wordcode ecbuf` (zsh.h:3112) + `Eccstr ecstrs` (zsh.h:3113) +
4418 // `int eclen/ecused/ecnpats/ecsoffs/ecssub/ecnfunc` (zsh.h:3112-3114).
4419 // zshrs hasn't emitted wordcode yet — these fields exist to
4420 // preserve the C shape but read/write nothing until P9b lands.
4421 pub eclen: i32,
4422 pub ecused: i32,
4423 pub ecnpats: i32,
4424 pub ecbuf: Option<Vec<u32>>,
4425 pub ecstrs: Option<Vec<u8>>,
4426 pub ecsoffs: i32,
4427 pub ecssub: i32,
4428 pub ecnfunc: i32,
4429}
4430
4431// Old uppercase Rust-only `ParseStack` is gone. Compat alias so
4432// existing call sites (context.rs) keep resolving until the
4433// rename ripples through.
4434/// `ParseStack` type alias.
4435#[allow(non_camel_case_types)]
4436pub type ParseStack = parse_stack;
4437
4438/// `mod_export struct eprog dummy_eprog;` from `Src/parse.c:3066`.
4439/// Placeholder Eprog used by `shf->funcdef = &dummy_eprog;` in
4440/// builtin.c when clearing a stale autoload stub. Held in a Mutex
4441/// so `init_eprog` can set it once at shell startup.
4442pub static DUMMY_EPROG: std::sync::Mutex<eprog> = std::sync::Mutex::new(eprog {
4443 flags: 0,
4444 len: 0,
4445 npats: 0,
4446 nref: 0,
4447 prog: Vec::new(),
4448 strs: None,
4449 pats: Vec::new(),
4450 shf: None,
4451 dump: None,
4452});
4453
4454/// Walk every ZshRedir in the program and, for any with a `heredoc_idx`,
4455/// pull the body+terminator out of `bodies` and stuff into `heredoc`.
4456/// `bodies[i]` corresponds to the i-th heredoc registered by the lexer
4457/// during scanning (in source order).
4458fn fill_heredoc_bodies(prog: &mut ZshProgram, bodies: &[HereDocInfo]) {
4459 for list in &mut prog.lists {
4460 fill_in_sublist(&mut list.sublist, bodies);
4461 }
4462}
4463
4464fn fill_in_sublist(sub: &mut ZshSublist, bodies: &[HereDocInfo]) {
4465 fill_in_pipe(&mut sub.pipe, bodies);
4466 if let Some(next) = &mut sub.next {
4467 fill_in_sublist(&mut next.1, bodies);
4468 }
4469}
4470
4471fn fill_in_pipe(pipe: &mut ZshPipe, bodies: &[HereDocInfo]) {
4472 fill_in_command(&mut pipe.cmd, bodies);
4473 if let Some(next) = &mut pipe.next {
4474 fill_in_pipe(next, bodies);
4475 }
4476}
4477
4478fn fill_in_command(cmd: &mut ZshCommand, bodies: &[HereDocInfo]) {
4479 match cmd {
4480 ZshCommand::Simple(s) => {
4481 for r in &mut s.redirs {
4482 if let Some(idx) = r.heredoc_idx {
4483 if let Some(info) = bodies.get(idx) {
4484 r.heredoc = Some(info.clone());
4485 }
4486 }
4487 }
4488 }
4489 ZshCommand::Subsh(p) | ZshCommand::Cursh(p) => fill_heredoc_bodies(p, bodies),
4490 ZshCommand::FuncDef(f) => fill_heredoc_bodies(&mut f.body, bodies),
4491 ZshCommand::If(i) => {
4492 fill_heredoc_bodies(&mut i.cond, bodies);
4493 fill_heredoc_bodies(&mut i.then, bodies);
4494 for (c, b) in &mut i.elif {
4495 fill_heredoc_bodies(c, bodies);
4496 fill_heredoc_bodies(b, bodies);
4497 }
4498 if let Some(e) = &mut i.else_ {
4499 fill_heredoc_bodies(e, bodies);
4500 }
4501 }
4502 ZshCommand::While(w) | ZshCommand::Until(w) => {
4503 fill_heredoc_bodies(&mut w.cond, bodies);
4504 fill_heredoc_bodies(&mut w.body, bodies);
4505 }
4506 ZshCommand::For(f) => fill_heredoc_bodies(&mut f.body, bodies),
4507 ZshCommand::Case(c) => {
4508 for arm in &mut c.arms {
4509 fill_heredoc_bodies(&mut arm.body, bodies);
4510 }
4511 }
4512 ZshCommand::Repeat(r) => fill_heredoc_bodies(&mut r.body, bodies),
4513 ZshCommand::Time(Some(sublist)) => fill_in_sublist(sublist, bodies),
4514 ZshCommand::Try(t) => {
4515 fill_heredoc_bodies(&mut t.try_block, bodies);
4516 fill_heredoc_bodies(&mut t.always, bodies);
4517 }
4518 ZshCommand::Redirected(inner, redirs) => {
4519 for r in redirs {
4520 if let Some(idx) = r.heredoc_idx {
4521 if let Some(info) = bodies.get(idx) {
4522 r.heredoc = Some(info.clone());
4523 }
4524 }
4525 }
4526 fill_in_command(inner, bodies);
4527 }
4528 ZshCommand::Time(None) | ZshCommand::Cond(_) | ZshCommand::Arith(_) => {}
4529 }
4530}
4531
4532/// If `list` is a Simple containing one word that ends in the
4533/// `<Inpar><Outpar>` token pair (the lexer-port encoding of `()`),
4534/// return the bare name. Used by `parse_program_until` to detect
4535/// `name() {body}` style function definitions where the lexer
4536/// hasn't split the `()` from the name.
4537/// Detect the `name() …` shape inside a Simple. Returns the function
4538/// name and (when the body was already inlined into the same Simple,
4539/// e.g. `foo() echo hi`) the rest of the words as the body's argv.
4540/// Returns None for non-funcdef shapes.
4541fn simple_name_with_inoutpar(list: &ZshList) -> Option<(Vec<String>, Vec<String>)> {
4542 if list.flags.async_ || list.sublist.next.is_some() {
4543 return None;
4544 }
4545 let pipe = &list.sublist.pipe;
4546 if pipe.next.is_some() {
4547 return None;
4548 }
4549 let simple = match &pipe.cmd {
4550 ZshCommand::Simple(s) => s,
4551 _ => return None,
4552 };
4553 if simple.words.is_empty() || !simple.assigns.is_empty() {
4554 return None;
4555 }
4556 let suffix = "\u{88}\u{8a}"; // Inpar + Outpar
4557 // Find the FIRST word ending in `()`. zsh accepts the
4558 // multi-name shorthand `fna fnb fnc() { body }` (parse.c:
4559 // par_funcdef wordlist) — words[0..i-1] are extra names,
4560 // words[i] is `lastname()`. Words after are the body argv
4561 // (one-line shorthand, `name() cmd args`).
4562 let par_idx = simple.words.iter().position(|w| w.ends_with(suffix))?;
4563 let mut names: Vec<String> = Vec::with_capacity(par_idx + 1);
4564 for w in &simple.words[..par_idx] {
4565 // Earlier names must be bare identifiers, NOT contain
4566 // tokens that imply they're not function names (no `()`,
4567 // no quotes, no expansions). zsh's lexer enforces this
4568 // at the wordlist level; we approximate by requiring the
4569 // word be an identifier-shaped token after untokenize.
4570 let bare = super::lex::untokenize(w);
4571 let valid = !bare.is_empty()
4572 && bare
4573 .chars()
4574 .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-' || c == '.' || c == '$');
4575 if !valid {
4576 return None;
4577 }
4578 names.push(bare);
4579 }
4580 let last = &simple.words[par_idx];
4581 let bare = &last[..last.len() - suffix.len()];
4582 if bare.is_empty() {
4583 return None;
4584 }
4585 names.push(super::lex::untokenize(bare));
4586 let rest = simple.words[par_idx + 1..].to_vec();
4587 Some((names, rest))
4588}
4589
4590/// Initialize parser state for a fresh parse of `input`.
4591/// Free-fn entry point — resets parser thread_locals and loads input.
4592pub fn parse_init(input: &str) {
4593 // Seed the option defaults the parser/lexer inspect. Real zsh
4594 // installs these via `install_emulation_defaults` (options.c:172)
4595 // at shell startup; zshrs's parse-only test entry path bypasses
4596 // init_main, so we mirror the `zsh` emulation defaults here.
4597 // Only seeds when unset so a script that explicitly disabled an
4598 // option stays so.
4599 for (name, default) in [
4600 ("shortloops", true),
4601 ("shortrepeat", false),
4602 ("multifuncdef", true),
4603 ("aliasfuncdef", false),
4604 ("ignorebraces", false),
4605 ("cshjunkieloops", false),
4606 ("posixbuiltins", false),
4607 ("execopt", true),
4608 ("kshautoload", false),
4609 ("aliases", true),
4610 ] {
4611 if crate::ported::options::opt_state_get(name).is_none() {
4612 crate::ported::options::opt_state_set(name, default);
4613 }
4614 }
4615 lex_init(input);
4616}
4617
4618/// P9b decoder (wordcode-pipeline variant): direct port of
4619/// `ecgetstr(Estate s, int dup, int *tokflag)` from
4620/// `Src/parse.c:2855-2890`. Reads a wordcode at `pc`, decodes the
4621/// encoded string back to owned String. Returns (string,
4622/// pc_after_consumed). Distinct from the existing `ecgetstr` (which
4623/// takes a separate strs buffer for text.rs) — this variant uses
4624/// the live ECSTRS_REVERSE HashMap populated at ecstrcode time.
4625pub fn ecgetstr_wordcode(buf: &[u32], pc: usize) -> (String, usize) {
4626 if pc >= buf.len() {
4627 return (String::new(), pc);
4628 }
4629 let c = buf[pc];
4630 let next = pc + 1;
4631 // parse.c:2862-2863 — empty-string sentinels.
4632 if c == 6 || c == 7 {
4633 return (String::new(), next);
4634 }
4635 // parse.c:2864-2871 — inline-packed short string.
4636 if (c & 2) != 0 {
4637 let b0 = ((c >> 3) & 0xff) as u8;
4638 let b1 = ((c >> 11) & 0xff) as u8;
4639 let b2 = ((c >> 19) & 0xff) as u8;
4640 let mut bytes: Vec<u8> = Vec::new();
4641 for b in [b0, b1, b2] {
4642 if b == 0 {
4643 break;
4644 }
4645 bytes.push(b);
4646 }
4647 return (String::from_utf8_lossy(&bytes).into_owned(), next);
4648 }
4649 // parse.c:2872-2873 — long string via offs lookup. Map value is
4650 // metafied Vec<u8>; convert back to display String. Unmetafy is
4651 // the caller's job (the wordcode-parity dumper does it; other
4652 // callers may want raw bytes).
4653 let s = ECSTRS_REVERSE
4654 .with_borrow(|m| m.get(&c).cloned())
4655 .map(|v| String::from_utf8_lossy(&v).into_owned())
4656 .unwrap_or_default();
4657 (s, next)
4658}
4659
4660/// Parse the complete input. Direct port of `parse_event` /
4661/// `par_list` from `Src/parse.c:614-720`. On syntax error,
4662/// sets `errflag |= ERRFLAG_ERROR` (via `zerr`) and returns the
4663/// partial program — callers check `errflag` to detect failure,
4664/// matching C's `Eprog parse_event(...)` + `if (errflag) {...}`.
4665pub fn parse() -> ZshProgram {
4666 zshlex();
4667
4668 let mut program = parse_program_until(None);
4669
4670 // Post-pass: wire heredoc bodies (collected by the inline NEWLIN
4671 // walk in zshlex into LEX_HEREDOCS) back into ZshRedir.heredoc
4672 // fields via heredoc_idx. No C analog — LEX_HEREDOCS is the
4673 // Rust-only AST-glue Vec.
4674 let bodies: Vec<HereDocInfo> = LEX_HEREDOCS
4675 .with_borrow(|v| v.clone())
4676 .into_iter()
4677 .map(|h| HereDocInfo {
4678 content: h.content,
4679 terminator: h.terminator,
4680 quoted: h.quoted,
4681 })
4682 .collect();
4683 if !bodies.is_empty() {
4684 fill_heredoc_bodies(&mut program, &bodies);
4685 }
4686
4687 program
4688}
4689
4690/// Wordcode-emission top-level driver. Closest C analog is
4691/// `parse_list(void)` at `Src/parse.c:697-712`: init_parse +
4692/// zshlex + par_list(&c) + bld_eprog. This entry omits init_parse
4693/// and bld_eprog (caller responsibilities) and inlines a guard
4694/// loop around par_list_wordcode for cases where the lexer leaves
4695/// a non-ENDINPUT terminator (LEXERR, missing close-token, etc.).
4696pub fn par_event_wordcode() -> usize {
4697 let start = ECUSED.get() as usize;
4698 // C `parse_list` (parse.c:697-712) calls par_list ONCE — par_list's
4699 // own goto-rec loop handles all SEPER-separated sublists. The
4700 // outer loop here exists for safety against early-return cases
4701 // (LEXERR, missing terminator) but normally par_list_wordcode
4702 // consumes everything in one call.
4703 let mut cmplx: i32 = 0;
4704 while tok() != ENDINPUT && tok() != LEXERR {
4705 par_list_wordcode(&mut cmplx);
4706 match tok() {
4707 SEMI | NEWLIN | AMPER | AMPERBANG | SEPER => {
4708 zshlex();
4709 }
4710 _ => break,
4711 }
4712 }
4713 // parse.c:712 — `ecadd(WCB_END());`
4714 ecadd(WCB_END());
4715 start
4716}
4717
4718/// Port of `par_list(int *cmplx)` from `Src/parse.c:769-803`.
4719/// `list : { SEPER } [ sublist [ { SEPER | AMPER | AMPERBANG } list ] ]`.
4720/// True line-by-line port: takes `cmplx: &mut i32` matching C's
4721/// `int *cmplx` out-parameter, uses stack-local `c` per iteration
4722/// like C (so inner sublist cmplx is independent of outer).
4723pub fn par_list_wordcode(cmplx: &mut i32) {
4724 // c:773 — `int p, lp = -1, c;`
4725 let mut p: usize;
4726 let mut lp: i32 = -1;
4727 let mut c: i32;
4728 loop {
4729 // c:775 `rec:` — c:777-778 `while (tok == SEPER) zshlex();`
4730 while tok() == SEPER {
4731 zshlex();
4732 }
4733 // c:780 — `p = ecadd(0);`
4734 p = ecadd(0);
4735 // c:781 — `c = 0;`
4736 c = 0;
4737 // c:783 — `if (par_sublist(&c)) { ... }`
4738 if par_sublist_wordcode(&mut c) {
4739 // c:784 — `*cmplx |= c;`
4740 *cmplx |= c;
4741 // c:785 — `if (tok == SEPER || tok == AMPER || tok == AMPERBANG)`
4742 let t = tok();
4743 if t == SEPER || t == AMPER || t == AMPERBANG {
4744 // c:786-787 — `if (tok != SEPER) *cmplx = 1;`
4745 if t != SEPER {
4746 *cmplx = 1;
4747 }
4748 // c:788-790 — `set_list_code(p, ..., c);`
4749 let z = if t == SEPER {
4750 Z_SYNC
4751 } else if t == AMPER {
4752 Z_ASYNC
4753 } else {
4754 Z_ASYNC | Z_DISOWN
4755 };
4756 set_list_code(p, z, c != 0);
4757 // c:791 — `incmdpos = 1;`
4758 set_incmdpos(true);
4759 // c:792-794 — `do { zshlex(); } while (tok == SEPER);`
4760 loop {
4761 zshlex();
4762 if tok() != SEPER {
4763 break;
4764 }
4765 }
4766 // c:795 — `lp = p;` c:796 — `goto rec;`
4767 lp = p as i32;
4768 continue;
4769 } else {
4770 // c:798 — `set_list_code(p, (Z_SYNC | Z_END), c);`
4771 set_list_code(p, Z_SYNC | Z_END, c != 0);
4772 }
4773 } else {
4774 // c:800-802 — `ecused--; if (lp >= 0) ecbuf[lp] |= wc_bdata(Z_END);`
4775 ECUSED.set((ECUSED.get() - 1).max(0));
4776 if lp >= 0 {
4777 ECBUF.with_borrow_mut(|b| {
4778 if (lp as usize) < b.len() {
4779 b[lp as usize] |= wc_bdata(Z_END as wordcode);
4780 }
4781 });
4782 }
4783 }
4784 break;
4785 }
4786}
4787
4788/// Port of `par_list1(int *cmplx)` from `Src/parse.c:806-817`.
4789/// Single-sublist variant used by funcdef bodies and the short
4790/// `for`/`while`/`repeat` forms — exactly one sublist with
4791/// `Z_SYNC|Z_END`, no chain.
4792pub fn par_list1_wordcode(cmplx: &mut i32) {
4793 // c:810 — `int p = ecadd(0), c = 0;`
4794 let p = ecadd(0);
4795 let mut c: i32 = 0;
4796 // c:812 — `if (par_sublist(&c)) { ... }`
4797 if par_sublist_wordcode(&mut c) {
4798 // c:813 — `set_list_code(p, (Z_SYNC | Z_END), c);`
4799 set_list_code(p, Z_SYNC | Z_END, c != 0);
4800 // c:814 — `*cmplx |= c;`
4801 *cmplx |= c;
4802 } else {
4803 // c:816 — `ecused--;`
4804 ECUSED.set((ECUSED.get() - 1).max(0));
4805 }
4806}
4807
4808/// Port of `par_save_list(C)` macro from `Src/parse.c:475-480`.
4809/// do { int eu = ecused; par_list(C); if (eu == ecused) ecadd(WCB_END()); } while (0)
4810pub fn par_save_list_wordcode(cmplx: &mut i32) {
4811 let eu = ECUSED.get();
4812 par_list_wordcode(cmplx);
4813 if ECUSED.get() == eu {
4814 ecadd(WCB_END());
4815 }
4816}
4817
4818/// Port of `par_save_list1(C)` macro from `Src/parse.c:481-486`.
4819pub fn par_save_list1_wordcode(cmplx: &mut i32) {
4820 let eu = ECUSED.get();
4821 par_list1_wordcode(cmplx);
4822 if ECUSED.get() == eu {
4823 ecadd(WCB_END());
4824 }
4825}
4826
4827/// Port of `par_sublist(int *cmplx)` from `Src/parse.c:823-865`.
4828/// `sublist : sublist2 [ ( DBAR | DAMPER ) { SEPER } sublist ]`.
4829/// Emits a WCB_SUBLIST header, recurses into par_sublist2 for
4830/// the !/coproc prefix + pipeline, then chains via DBAR (`||`)
4831/// or DAMPER (`&&`) recursively. Returns true if at least one
4832/// pipeline was emitted.
4833pub fn par_sublist_wordcode(cmplx: &mut i32) -> bool {
4834 // c:827 — `int f, p, c = 0;`
4835 let mut c: i32 = 0;
4836 // c:829 — `p = ecadd(0);`
4837 let p = ecadd(0);
4838 // c:831 — `if ((f = par_sublist2(&c)) != -1) { ... }`
4839 match par_sublist2(&mut c) {
4840 Some(f) => {
4841 // c:832 — `int e = ecused;`
4842 let e = ECUSED.get() as usize;
4843 // c:834 — `*cmplx |= c;`
4844 *cmplx |= c;
4845 if tok() == DBAR || tok() == DAMPER {
4846 // c:836 — `enum lextok qtok = tok;`
4847 let qtok = tok();
4848 // c:839 — `cmdpush(tok == DBAR ? CS_CMDOR : CS_CMDAND);`
4849 cmdpush(if qtok == DBAR {
4850 CS_CMDOR as u8
4851 } else {
4852 CS_CMDAND as u8
4853 });
4854 // c:840 — `zshlex();`
4855 zshlex();
4856 // c:841-842 — `while (tok == SEPER) zshlex();`
4857 while tok() == SEPER {
4858 zshlex();
4859 }
4860 // c:843 — `sl = par_sublist(cmplx);`
4861 let sl = par_sublist_wordcode(cmplx);
4862 // c:844-847 — `set_sublist_code(p, (sl ? ... : WC_SUBLIST_END),
4863 // f, (e - 1 - p), c);`
4864 let st = if sl {
4865 if qtok == DBAR {
4866 WC_SUBLIST_OR
4867 } else {
4868 WC_SUBLIST_AND
4869 }
4870 } else {
4871 WC_SUBLIST_END
4872 };
4873 set_sublist_code(p, st as i32, f, (e - 1 - p) as i32, c != 0);
4874 // c:848 — `cmdpop();`
4875 cmdpop();
4876 } else {
4877 // c:850-853 — `if (tok == AMPER || tok == AMPERBANG)
4878 // { c = 1; *cmplx |= c; }`
4879 if tok() == AMPER || tok() == AMPERBANG {
4880 c = 1;
4881 *cmplx |= c;
4882 }
4883 // c:854 — `set_sublist_code(p, WC_SUBLIST_END, f,
4884 // (e - 1 - p), c);`
4885 set_sublist_code(p, WC_SUBLIST_END as i32, f, (e - 1 - p) as i32, c != 0);
4886 }
4887 // c:856 — `return 1;`
4888 true
4889 }
4890 None => {
4891 // c:858-859 — `ecused--; return 0;`
4892 ECUSED.set((ECUSED.get() - 1).max(0));
4893 false
4894 }
4895 }
4896}
4897
4898/// Port of `par_pline(int *cmplx)` from `Src/parse.c:894-955`.
4899/// `pline : cmd [ ( BAR | BARAMP ) { SEPER } pline ]`. Emits a
4900/// WCB_PIPE header (mid for chain links, end for the last cmd)
4901/// plus the optional BARAMP `2>&1` synthetic redir.
4902/// Port of `par_pline(int *cmplx)` from `Src/parse.c:893-947`.
4903/// (Named `par_pipe_wordcode` to disambiguate from the AST
4904/// `par_pline` at parse.rs:3744 — semantically the same `pline`
4905/// production.)
4906pub fn par_pipe_wordcode(cmplx: &mut i32) -> bool {
4907 // c:897 — `zlong line = toklineno;`
4908 let line = toklineno() as i64;
4909 // c:899 — `p = ecadd(0);`
4910 let p = ecadd(0);
4911 // c:901-904 — `if (!par_cmd(cmplx, 0)) { ecused--; return 0; }`
4912 if !par_cmd_wordcode(cmplx, 0) {
4913 ECUSED.set((ECUSED.get() - 1).max(0));
4914 return false;
4915 }
4916 if tok() == BAR_TOK {
4917 // c:906 — `*cmplx = 1;`
4918 *cmplx = 1;
4919 // c:907 — `cmdpush(CS_PIPE);`
4920 cmdpush(CS_PIPE as u8);
4921 // c:908 — `zshlex();`
4922 zshlex();
4923 // c:909-910 — `while (tok == SEPER) zshlex();`
4924 while tok() == SEPER {
4925 zshlex();
4926 }
4927 // c:911 — `ecbuf[p] = WCB_PIPE(WC_PIPE_MID, line>=0 ? line+1 : 0);`
4928 ECBUF.with_borrow_mut(|b| {
4929 if p < b.len() {
4930 b[p] = WCB_PIPE(
4931 WC_PIPE_MID,
4932 if line >= 0 { (line + 1) as wordcode } else { 0 },
4933 );
4934 }
4935 });
4936 // c:912 — `ecispace(p+1, 1);`
4937 ecispace(p + 1, 1);
4938 // c:913 — `ecbuf[p+1] = ecused - 1 - p;`
4939 let used = ECUSED.get() as usize;
4940 ECBUF.with_borrow_mut(|b| {
4941 if p + 1 < b.len() {
4942 b[p + 1] = (used.saturating_sub(1 + p)) as wordcode;
4943 }
4944 });
4945 // c:914-916 — `if (!par_pline(cmplx)) { tok = LEXERR; }`
4946 if !par_pipe_wordcode(cmplx) {
4947 set_tok(LEXERR);
4948 }
4949 // c:917 — `cmdpop();`
4950 cmdpop();
4951 true
4952 } else if tok() == BARAMP {
4953 // c:920-923 — walk past inline WC_REDIR to find r.
4954 let mut r = p + 1;
4955 loop {
4956 let code = ECBUF.with_borrow(|b| b.get(r).copied().unwrap_or(0));
4957 if wc_code(code) != WC_REDIR {
4958 break;
4959 }
4960 r += WC_REDIR_WORDS(code) as usize;
4961 }
4962 // c:925-928 — `ecispace(r, 3);` + synthetic `2>&1` redir
4963 ecispace(r, 3);
4964 ECBUF.with_borrow_mut(|b| {
4965 if r + 2 < b.len() {
4966 b[r] = WCB_REDIR(REDIR_MERGEOUT as wordcode);
4967 b[r + 1] = 2;
4968 b[r + 2] = ecstrcode("1");
4969 }
4970 });
4971 // c:930 — `*cmplx = 1;`
4972 *cmplx = 1;
4973 cmdpush(CS_ERRPIPE as u8);
4974 zshlex();
4975 while tok() == SEPER {
4976 zshlex();
4977 }
4978 ECBUF.with_borrow_mut(|b| {
4979 if p < b.len() {
4980 b[p] = WCB_PIPE(
4981 WC_PIPE_MID,
4982 if line >= 0 { (line + 1) as wordcode } else { 0 },
4983 );
4984 }
4985 });
4986 ecispace(p + 1, 1);
4987 let used = ECUSED.get() as usize;
4988 ECBUF.with_borrow_mut(|b| {
4989 if p + 1 < b.len() {
4990 b[p + 1] = (used.saturating_sub(1 + p)) as wordcode;
4991 }
4992 });
4993 if !par_pipe_wordcode(cmplx) {
4994 set_tok(LEXERR);
4995 }
4996 cmdpop();
4997 true
4998 } else {
4999 // c:944 — `ecbuf[p] = WCB_PIPE(WC_PIPE_END, line>=0 ? line+1 : 0);`
5000 ECBUF.with_borrow_mut(|b| {
5001 if p < b.len() {
5002 b[p] = WCB_PIPE(
5003 WC_PIPE_END,
5004 if line >= 0 { (line + 1) as wordcode } else { 0 },
5005 );
5006 }
5007 });
5008 true
5009 }
5010}
5011
5012/// Port of `par_cmd(int *cmplx, int zsh_construct)` from
5013/// `Src/parse.c:958-1085`. Parses leading + trailing redirs and
5014/// dispatches on the current token to the right par_* builder.
5015/// Returns false only when no command was emitted (no redirs +
5016/// par_simple returned 0).
5017/// Port of `par_cmd(int *cmplx, int zsh_construct)` from
5018/// `Src/parse.c:957-1077`.
5019pub fn par_cmd_wordcode(cmplx: &mut i32, zsh_construct: i32) -> bool {
5020 // c:960 — `int r, nr = 0;`
5021 let mut nr: i32 = 0;
5022 // c:962 — `r = ecused;`
5023 let mut r: usize = ECUSED.get() as usize;
5024 // c:964-968 — leading redirs.
5025 if IS_REDIROP(tok()) {
5026 // c:965 — `*cmplx = 1;`
5027 *cmplx = 1;
5028 // c:966-968 — `while (IS_REDIROP(tok)) { nr += par_redir(&r, NULL); }`
5029 while IS_REDIROP(tok()) {
5030 nr += par_redir_wordcode(&mut r, None);
5031 }
5032 }
5033 // c:970-1066 — token-dispatch switch.
5034 match tok() {
5035 FOR => {
5036 cmdpush(CS_FOR as u8);
5037 par_for_wordcode(cmplx);
5038 cmdpop();
5039 }
5040 FOREACH => {
5041 cmdpush(CS_FOREACH as u8);
5042 par_for_wordcode(cmplx);
5043 cmdpop();
5044 }
5045 SELECT => {
5046 // c:982 — `*cmplx = 1;`
5047 *cmplx = 1;
5048 cmdpush(CS_SELECT as u8);
5049 par_for_wordcode(cmplx);
5050 cmdpop();
5051 }
5052 CASE => {
5053 cmdpush(CS_CASE as u8);
5054 par_case_wordcode(cmplx);
5055 cmdpop();
5056 }
5057 IF => {
5058 par_if_wordcode(cmplx);
5059 }
5060 WHILE => {
5061 cmdpush(CS_WHILE as u8);
5062 par_while_wordcode(cmplx);
5063 cmdpop();
5064 }
5065 UNTIL => {
5066 cmdpush(CS_UNTIL as u8);
5067 par_while_wordcode(cmplx);
5068 cmdpop();
5069 }
5070 REPEAT => {
5071 cmdpush(CS_REPEAT as u8);
5072 par_repeat_wordcode(cmplx);
5073 cmdpop();
5074 }
5075 INPAR_TOK => {
5076 // c:1011 — `*cmplx = 1;`
5077 *cmplx = 1;
5078 cmdpush(CS_SUBSH as u8);
5079 par_subsh_wordcode(cmplx, zsh_construct);
5080 cmdpop();
5081 }
5082 INBRACE_TOK => {
5083 cmdpush(CS_CURSH as u8);
5084 par_subsh_wordcode(cmplx, zsh_construct);
5085 cmdpop();
5086 }
5087 FUNC => {
5088 cmdpush(CS_FUNCDEF as u8);
5089 par_funcdef_wordcode(cmplx);
5090 cmdpop();
5091 }
5092 DINBRACK => {
5093 cmdpush(CS_COND as u8);
5094 par_cond_wordcode();
5095 cmdpop();
5096 }
5097 DINPAR => {
5098 par_arith_wordcode();
5099 }
5100 TIME => {
5101 // c:1037-1050 — `static int inpartime` guard so
5102 // `time time foo` doesn't recurse infinitely.
5103 if !PARSER_INPARTIME.with(|c| c.get()) {
5104 // c:1041 — `*cmplx = 1;`
5105 *cmplx = 1;
5106 PARSER_INPARTIME.with(|c| c.set(true));
5107 par_time_wordcode();
5108 PARSER_INPARTIME.with(|c| c.set(false));
5109 } else {
5110 set_tok(STRING_LEX);
5111 let sr = par_simple_wordcode(cmplx, nr);
5112 if sr == 0 && nr == 0 {
5113 return false;
5114 }
5115 if sr > 1 {
5116 *cmplx = 1;
5117 r += (sr - 1) as usize;
5118 }
5119 }
5120 }
5121 _ => {
5122 // c:1054 — `if (!(sr = par_simple(cmplx, nr)))`
5123 let sr = par_simple_wordcode(cmplx, nr);
5124 if sr == 0 {
5125 if nr == 0 {
5126 return false;
5127 }
5128 } else if sr > 1 {
5129 // c:1060-1061 — `*cmplx = 1; r += sr - 1;`
5130 *cmplx = 1;
5131 r += (sr - 1) as usize;
5132 }
5133 }
5134 }
5135 // c:1067-1071 — trailing redirs.
5136 // c:1067 — `if (IS_REDIROP(tok)) { *cmplx = 1; while (...) (void)par_redir(&r, NULL); }`
5137 if IS_REDIROP(tok()) {
5138 *cmplx = 1;
5139 while IS_REDIROP(tok()) {
5140 let _ = par_redir_wordcode(&mut r, None);
5141 }
5142 }
5143 // c:1072-1075 — `incmdpos=1; incasepat=0; incond=0; intypeset=0;`
5144 set_incmdpos(true);
5145 set_incasepat(0);
5146 set_incond(0);
5147 set_intypeset(false);
5148 let _ = r;
5149 // c:1076 — `return 1;`
5150 true
5151}
5152
5153/// Port of `par_for(int *cmplx)` from `Src/parse.c:1086-1198`.
5154pub fn par_for_wordcode(cmplx: &mut i32) {
5155 // c:1089 — `int oecused = ecused, csh = (tok == FOREACH), p, sel = (tok == SELECT);`
5156 let _oecused = ECUSED.get() as usize;
5157 let csh = tok() == FOREACH;
5158 let sel = tok() == SELECT;
5159 let p: usize;
5160 // c:1090 — `int type;`
5161 let r#type: wordcode;
5162
5163 // c:1092 — `p = ecadd(0);`
5164 p = ecadd(0);
5165
5166 // c:1094 — `incmdpos = 0;`
5167 set_incmdpos(false);
5168 // c:1095 — `infor = tok == FOR ? 2 : 0;`
5169 set_infor(if tok() == FOR { 2 } else { 0 });
5170 // c:1096 — `zshlex();`
5171 zshlex();
5172 // c:1097 — `if (tok == DINPAR) {`
5173 if tok() == DINPAR {
5174 // c:1098 — `zshlex();`
5175 zshlex();
5176 // c:1099-1100 — `if (tok != DINPAR) YYERRORV(oecused);`
5177 if tok() != DINPAR {
5178 zerr("par_for: expected init");
5179 return;
5180 }
5181 // c:1101 — `ecstr(tokstr);`
5182 ecstr(&tokstr().unwrap_or_default());
5183 // c:1102 — `zshlex();`
5184 zshlex();
5185 // c:1103-1104
5186 if tok() != DINPAR {
5187 zerr("par_for: expected cond");
5188 return;
5189 }
5190 // c:1105
5191 ecstr(&tokstr().unwrap_or_default());
5192 // c:1106
5193 zshlex();
5194 // c:1107-1108
5195 if tok() != DOUTPAR {
5196 zerr("par_for: expected ))");
5197 return;
5198 }
5199 // c:1109
5200 ecstr(&tokstr().unwrap_or_default());
5201 // c:1110 — `infor = 0;`
5202 set_infor(0);
5203 // c:1111 — `incmdpos = 1;`
5204 set_incmdpos(true);
5205 // c:1112 — `zshlex();`
5206 zshlex();
5207 // c:1113 — `type = WC_FOR_COND;`
5208 r#type = WC_FOR_COND;
5209 } else {
5210 // c:1115 — `int np = 0, n, posix_in, ona = noaliases, onc = nocorrect;`
5211 let mut np: usize = 0;
5212 let mut n: u32;
5213 let posix_in: bool;
5214 let ona = noaliases();
5215 let onc = nocorrect();
5216 // c:1116 — `infor = 0;`
5217 set_infor(0);
5218 // c:1117-1118 — `if (tok != STRING || !isident(tokstr)) YYERRORV(oecused);`
5219 if tok() != STRING_LEX || !crate::ported::params::isident(&tokstr().unwrap_or_default()) {
5220 zerr("par_for: expected identifier");
5221 return;
5222 }
5223 // c:1119-1120 — `if (!sel) np = ecadd(0);`
5224 if !sel {
5225 np = ecadd(0);
5226 }
5227 // c:1121 — `n = 0;`
5228 n = 0;
5229 // c:1122 — `incmdpos = 1;`
5230 set_incmdpos(true);
5231 // c:1123 — `noaliases = nocorrect = 1;`
5232 set_noaliases(true);
5233 set_nocorrect(1);
5234 // c:1124 — `for (;;) {`
5235 loop {
5236 // c:1125 — `n++;`
5237 n += 1;
5238 // c:1126 — `ecstr(tokstr);`
5239 ecstr(&tokstr().unwrap_or_default());
5240 // c:1127 — `zshlex();`
5241 zshlex();
5242 // c:1128-1129 — `if (tok != STRING || !strcmp(tokstr, "in") || sel) break;`
5243 if tok() != STRING_LEX || tokstr().as_deref() == Some("in") || sel {
5244 break;
5245 }
5246 // c:1130-1135 — `if (!isident(tokstr) || errflag) { ... YYERRORV; }`
5247 if !crate::ported::params::isident(&tokstr().unwrap_or_default())
5248 || (errflag.load(Ordering::Relaxed) & 1) != 0
5249 {
5250 set_noaliases(ona);
5251 set_nocorrect(onc);
5252 zerr("par_for: expected identifier in name list");
5253 return;
5254 }
5255 }
5256 // c:1137-1138 — `noaliases = ona; nocorrect = onc;`
5257 set_noaliases(ona);
5258 set_nocorrect(onc);
5259 // c:1139-1140 — `if (!sel) ecbuf[np] = n;`
5260 if !sel {
5261 ECBUF.with_borrow_mut(|b| {
5262 b[np] = n;
5263 });
5264 }
5265 // c:1141 — `posix_in = isnewlin;`
5266 posix_in = isnewlin() != 0;
5267 // c:1142-1143 — `while (isnewlin) zshlex();`
5268 while isnewlin() != 0 {
5269 zshlex();
5270 }
5271 // c:1144 — `if (tok == STRING && !strcmp(tokstr, "in")) {`
5272 if tok() == STRING_LEX && tokstr().as_deref() == Some("in") {
5273 // c:1145 — `incmdpos = 0;`
5274 set_incmdpos(false);
5275 // c:1146 — `zshlex();`
5276 zshlex();
5277 // c:1147 — `np = ecadd(0);`
5278 np = ecadd(0);
5279 // c:1148 — `n = par_wordlist();`
5280 let n2 = par_wordlist_wordcode();
5281 // c:1149-1150 — `if (tok != SEPER) YYERRORV(oecused);`
5282 if tok() != SEPER {
5283 zerr("par_for: expected separator after `in`");
5284 return;
5285 }
5286 // c:1151 — `ecbuf[np] = n;`
5287 ECBUF.with_borrow_mut(|b| {
5288 b[np] = n2 as wordcode;
5289 });
5290 // c:1152 — `type = (sel ? WC_SELECT_LIST : WC_FOR_LIST);`
5291 r#type = if sel { WC_SELECT_LIST } else { WC_FOR_LIST };
5292 } else if !posix_in && tok() == INPAR_TOK {
5293 // c:1153-1154 — `else if (!posix_in && tok == INPAR)`
5294 // c:1154 — `incmdpos = 0;`
5295 set_incmdpos(false);
5296 // c:1155 — `zshlex();`
5297 zshlex();
5298 // c:1156 — `np = ecadd(0);`
5299 np = ecadd(0);
5300 // c:1157 — `n = par_nl_wordlist();`
5301 let n2 = par_nl_wordlist_wordcode();
5302 // c:1158-1159 — `if (tok != OUTPAR) YYERRORV(oecused);`
5303 if tok() != OUTPAR_TOK {
5304 zerr("par_for: expected `)`");
5305 return;
5306 }
5307 // c:1160 — `ecbuf[np] = n;`
5308 ECBUF.with_borrow_mut(|b| {
5309 b[np] = n2 as wordcode;
5310 });
5311 // c:1161 — `incmdpos = 1;`
5312 set_incmdpos(true);
5313 // c:1162 — `zshlex();`
5314 zshlex();
5315 // c:1163 — `type = (sel ? WC_SELECT_LIST : WC_FOR_LIST);`
5316 r#type = if sel { WC_SELECT_LIST } else { WC_FOR_LIST };
5317 } else {
5318 // c:1165 — `type = (sel ? WC_SELECT_PPARAM : WC_FOR_PPARAM);`
5319 r#type = if sel { WC_SELECT_PPARAM } else { WC_FOR_PPARAM };
5320 }
5321 let _ = np;
5322 }
5323 // c:1167 — `incmdpos = 1;`
5324 set_incmdpos(true);
5325 // c:1168-1169 — `while (tok == SEPER) zshlex();`
5326 while tok() == SEPER {
5327 zshlex();
5328 }
5329 // c:1170-1193 — body dispatch (inline in C, factored here for
5330 // reuse by par_while/par_repeat — same control flow, same calls).
5331 par_loop_body_wordcode(cmplx, csh);
5332 // c:1195-1197 — `ecbuf[p] = (sel ? WCB_SELECT(...) : WCB_FOR(...));`
5333 let used = ECUSED.get() as usize;
5334 let off = used.saturating_sub(1 + p) as wordcode;
5335 ECBUF.with_borrow_mut(|b| {
5336 b[p] = if sel {
5337 WCB_SELECT(r#type, off)
5338 } else {
5339 WCB_FOR(r#type, off)
5340 };
5341 });
5342}
5343
5344/// Port of `par_wordlist(void)` from `Src/parse.c:2361-2371` —
5345/// emits wordcode form. Returns the number of strings emitted.
5346fn par_wordlist_wordcode() -> u32 {
5347 // c:2364 — `int num = 0;`
5348 let mut num: u32 = 0;
5349 // c:2365 — `while (tok == STRING) {`
5350 while tok() == STRING_LEX {
5351 // c:2366 — `ecstr(tokstr);`
5352 ecstr(&tokstr().unwrap_or_default());
5353 // c:2367 — `num++;`
5354 num += 1;
5355 // c:2368 — `zshlex();`
5356 zshlex();
5357 }
5358 // c:2370 — `return num;`
5359 num
5360}
5361
5362/// Port of `par_nl_wordlist(void)` from `Src/parse.c:2378-2390` —
5363/// emits wordcode form. Like par_wordlist but tolerates SEPER
5364/// between words.
5365fn par_nl_wordlist_wordcode() -> u32 {
5366 // c:2381 — `int num = 0;`
5367 let mut num: u32 = 0;
5368 // c:2383 — `while (tok == STRING || tok == SEPER) {`
5369 while tok() == STRING_LEX || tok() == SEPER || tok() == NEWLIN {
5370 // c:2384-2387 — `if (tok != SEPER) { ecstr(tokstr); num++; }`
5371 if tok() == STRING_LEX {
5372 ecstr(&tokstr().unwrap_or_default());
5373 num += 1;
5374 }
5375 // c:2388 — `zshlex();`
5376 zshlex();
5377 }
5378 // c:2390 — `return num;`
5379 num
5380}
5381
5382/// Body dispatch shared by par_for / par_while / par_repeat.
5383/// Direct port of `Src/parse.c:1170-1194`.
5384fn par_loop_body_wordcode(cmplx: &mut i32, csh: bool) {
5385 if tok() == DOLOOP {
5386 zshlex();
5387 // c:1172 — `par_save_list(cmplx);`
5388 par_save_list_wordcode(cmplx);
5389 if tok() != DONE {
5390 zerr("missing `done`");
5391 return;
5392 }
5393 set_incmdpos(false);
5394 zshlex();
5395 } else if tok() == INBRACE_TOK {
5396 zshlex();
5397 // c:1179 — `par_save_list(cmplx);`
5398 par_save_list_wordcode(cmplx);
5399 if tok() != OUTBRACE_TOK {
5400 zerr("missing `}`");
5401 return;
5402 }
5403 set_incmdpos(false);
5404 zshlex();
5405 } else if csh || isset(CSHJUNKIELOOPS) {
5406 // c:1185 — `par_save_list(cmplx);`
5407 par_save_list_wordcode(cmplx);
5408 if tok() != ZEND {
5409 zerr("missing `end`");
5410 return;
5411 }
5412 set_incmdpos(false);
5413 zshlex();
5414 } else if unset(SHORTLOOPS) {
5415 zerr("short loop form requires SHORTLOOPS");
5416 } else {
5417 // c:1193 — `par_save_list1(cmplx);`
5418 par_save_list1_wordcode(cmplx);
5419 }
5420}
5421
5422/// `select` shares par_for body (c:983-985 routes SELECT to par_for).
5423pub fn par_select_wordcode(cmplx: &mut i32) {
5424 par_for_wordcode(cmplx);
5425}
5426
5427/// Port of `par_case(int *cmplx)` from `Src/parse.c:1208-1400`.
5428pub fn par_case_wordcode(_cmplx: &mut i32) {
5429 // c:1211 — `int oecused = ecused, brflag, p, pp, palts, type, nalts;`
5430 let _oecused = ECUSED.get() as usize;
5431 let brflag: bool;
5432 let p: usize;
5433 let mut pp: usize;
5434 let mut palts: usize;
5435 let mut r#type: wordcode;
5436 let mut nalts: u32;
5437 // c:1212 — `int ona, onc;`
5438 let ona: bool;
5439 let onc: i32;
5440
5441 // c:1214 — `p = ecadd(0);`
5442 p = ecadd(0);
5443
5444 // c:1216 — `incmdpos = 0;`
5445 set_incmdpos(false);
5446 // c:1217 — `zshlex();`
5447 zshlex();
5448 // c:1218-1219 — `if (tok != STRING) YYERRORV(oecused);`
5449 if tok() != STRING_LEX {
5450 zerr("par_case: expected scrutinee");
5451 return;
5452 }
5453 // c:1220 — `ecstr(tokstr);`
5454 ecstr(&tokstr().unwrap_or_default());
5455
5456 // c:1222 — `incmdpos = 1;`
5457 set_incmdpos(true);
5458 // c:1223-1224 — `ona = noaliases; onc = nocorrect;`
5459 ona = noaliases();
5460 onc = nocorrect();
5461 // c:1225 — `noaliases = nocorrect = 1;`
5462 set_noaliases(true);
5463 set_nocorrect(1);
5464 // c:1226 — `zshlex();`
5465 zshlex();
5466 // c:1227-1228 — `while (tok == SEPER) zshlex();`
5467 while tok() == SEPER {
5468 zshlex();
5469 }
5470 // c:1229 — `if (!(tok == STRING && !strcmp(tokstr, "in")) && tok != INBRACE)`
5471 if !(tok() == STRING_LEX && tokstr().as_deref() == Some("in")) && tok() != INBRACE_TOK {
5472 // c:1231-1233 — restore noaliases/nocorrect + ERROR
5473 set_noaliases(ona);
5474 set_nocorrect(onc);
5475 zerr("par_case: expected `in` or `{`");
5476 return;
5477 }
5478 // c:1235 — `brflag = (tok == INBRACE);`
5479 brflag = tok() == INBRACE_TOK;
5480 // c:1236 — `incasepat = 1;`
5481 set_incasepat(1);
5482 // c:1237 — `incmdpos = 0;`
5483 set_incmdpos(false);
5484 // c:1238-1239 — `noaliases = ona; nocorrect = onc;`
5485 set_noaliases(ona);
5486 set_nocorrect(onc);
5487 // c:1240 — `zshlex();`
5488 zshlex();
5489
5490 // c:1242 — `for (;;) {`
5491 'arms: loop {
5492 // c:1243 — `char *str;`
5493 let mut str: String;
5494 // c:1244 — `int skip_zshlex;`
5495 let skip_zshlex: bool;
5496
5497 // c:1246-1247 — `while (tok == SEPER) zshlex();`
5498 while tok() == SEPER {
5499 zshlex();
5500 }
5501 // c:1248-1249 — `if (tok == OUTBRACE) break;`
5502 if tok() == OUTBRACE_TOK {
5503 break 'arms;
5504 }
5505 // c:1250-1251 — `if (tok == INPAR) zshlex();`
5506 if tok() == INPAR_TOK {
5507 zshlex();
5508 }
5509 // c:1252-1254 — `if (tok == BAR) { str = ""; skip_zshlex = 1; }`
5510 if tok() == BAR_TOK {
5511 str = String::new();
5512 skip_zshlex = true;
5513 } else {
5514 // c:1256-1257 — `if (tok != STRING) YYERRORV(oecused);`
5515 if tok() != STRING_LEX {
5516 zerr("par_case: expected pattern");
5517 return;
5518 }
5519 // c:1258-1259 — `if (!strcmp(tokstr, "esac")) break;`
5520 if tokstr().as_deref() == Some("esac") {
5521 break 'arms;
5522 }
5523 // c:1260 — `str = dupstring(tokstr);`
5524 str = tokstr().unwrap_or_default();
5525 // c:1261 — `skip_zshlex = 0;`
5526 skip_zshlex = false;
5527 }
5528 // c:1263 — `type = WC_CASE_OR;`
5529 r#type = WC_CASE_OR;
5530 // c:1264-1266 — `pp = ecadd(0); palts = ecadd(0); nalts = 0;`
5531 pp = ecadd(0);
5532 palts = ecadd(0);
5533 nalts = 0;
5534 // c:1300 — `incasepat = -1;`
5535 set_incasepat(-1);
5536 // c:1301 — `incmdpos = 1;`
5537 set_incmdpos(true);
5538 // c:1302-1303 — `if (!skip_zshlex) zshlex();`
5539 if !skip_zshlex {
5540 zshlex();
5541 }
5542 // c:1304 — `for (;;) {`
5543 loop {
5544 // c:1305-1313 — `if (tok == OUTPAR) { ecstr(str);
5545 // ecadd(ecnpats++); nalts++; incasepat = 0;
5546 // incmdpos = 1; zshlex(); break; }`
5547 if tok() == OUTPAR_TOK {
5548 ecstr(&str);
5549 let np = ECNPATS.with(|cc| {
5550 let v = cc.get();
5551 cc.set(v + 1);
5552 v
5553 }) as u32;
5554 ecadd(np);
5555 nalts += 1;
5556 set_incasepat(0);
5557 set_incmdpos(true);
5558 zshlex();
5559 break;
5560 }
5561 // c:1314-1320 — `else if (tok == BAR) { ecstr(str);
5562 // ecadd(ecnpats++); nalts++; incasepat = 1;
5563 // incmdpos = 0; }`
5564 else if tok() == BAR_TOK {
5565 ecstr(&str);
5566 let np = ECNPATS.with(|cc| {
5567 let v = cc.get();
5568 cc.set(v + 1);
5569 v
5570 }) as u32;
5571 ecadd(np);
5572 nalts += 1;
5573 set_incasepat(1);
5574 set_incmdpos(false);
5575 }
5576 // c:1321-1357 — else { ... `(...)` whole-pattern hack
5577 // (Inpar at str[0]); else YYERRORV. Not yet ported —
5578 // err out on unexpected. }
5579 else {
5580 zerr("par_case: expected `)` or `|`");
5581 return;
5582 }
5583
5584 // c:1359 — `zshlex();`
5585 zshlex();
5586 // c:1360-1377 — switch on next tok.
5587 match tok() {
5588 STRING_LEX => {
5589 // c:1361-1365
5590 str = tokstr().unwrap_or_default();
5591 zshlex();
5592 }
5593 OUTPAR_TOK | BAR_TOK => {
5594 // c:1367-1371 — empty string
5595 str = String::new();
5596 }
5597 _ => {
5598 // c:1374-1376 — `YYERRORV(oecused);`
5599 zerr("par_case: expected pattern, `)` or `|`");
5600 return;
5601 }
5602 }
5603 }
5604 // c:1379 — `incasepat = 0;`
5605 set_incasepat(0);
5606 // c:1380 — `par_save_list(cmplx);`
5607 par_save_list_wordcode(_cmplx);
5608 // c:1381-1384 — terminator → arm type
5609 if tok() == SEMIAMP {
5610 r#type = WC_CASE_AND;
5611 } else if tok() == SEMIBAR {
5612 r#type = WC_CASE_TESTAND;
5613 }
5614 // c:1385 — `ecbuf[pp] = WCB_CASE(type, ecused - 1 - pp);`
5615 let used = ECUSED.get() as usize;
5616 ECBUF.with_borrow_mut(|b| {
5617 b[pp] = WCB_CASE(r#type, (used.saturating_sub(1 + pp)) as wordcode);
5618 });
5619 // c:1386 — `ecbuf[palts] = nalts;`
5620 ECBUF.with_borrow_mut(|b| {
5621 b[palts] = nalts;
5622 });
5623 // c:1387-1388 — terminator (ESAC w/o brace OR OUTBRACE w/ brace) → break
5624 if (tok() == ESAC && !brflag) || (tok() == OUTBRACE_TOK && brflag) {
5625 break 'arms;
5626 }
5627 // c:1389-1390 — `if (tok != DSEMI && tok != SEMIAMP && tok != SEMIBAR) YYERRORV;`
5628 if tok() != DSEMI && tok() != SEMIAMP && tok() != SEMIBAR {
5629 zerr("par_case: expected `;;`, `;&`, or `;|`");
5630 return;
5631 }
5632 // c:1391 — `incasepat = 1;`
5633 set_incasepat(1);
5634 // c:1392 — `incmdpos = 0;`
5635 set_incmdpos(false);
5636 // c:1393 — `zshlex();`
5637 zshlex();
5638 }
5639 // c:1395 — `incmdpos = 1;`
5640 set_incmdpos(true);
5641 // c:1396 — `incasepat = 0;`
5642 set_incasepat(0);
5643 // c:1397 — `zshlex();`
5644 zshlex();
5645
5646 // c:1399 — `ecbuf[p] = WCB_CASE(WC_CASE_HEAD, ecused - 1 - p);`
5647 let used = ECUSED.get() as usize;
5648 ECBUF.with_borrow_mut(|b| {
5649 b[p] = WCB_CASE(WC_CASE_HEAD, (used.saturating_sub(1 + p)) as wordcode);
5650 });
5651}
5652
5653/// Port of `par_if(int *cmplx)` from `Src/parse.c:1410-1512`.
5654pub fn par_if_wordcode(cmplx: &mut i32) {
5655 // c:1413 — `int oecused = ecused, p, pp, type, usebrace = 0;`
5656 let _oecused = ECUSED.get() as usize;
5657 let p: usize;
5658 let mut pp: usize = 0;
5659 let mut r#type: wordcode = WC_IF_IF;
5660 let mut usebrace: i32 = 0;
5661 // c:1414 — `enum lextok xtok;`
5662 let mut xtok: lextok;
5663 // c:1415 — `unsigned char nc;`
5664 let nc: u8;
5665 let _ = nc;
5666
5667 // c:1417 — `p = ecadd(0);`
5668 p = ecadd(0);
5669
5670 // c:1419 — `for (;;) {`
5671 loop {
5672 // c:1420 — `xtok = tok;`
5673 xtok = tok();
5674 // c:1421 — `cmdpush(xtok == IF ? CS_IF : CS_ELIF);`
5675 cmdpush(if xtok == IF {
5676 CS_IF as u8
5677 } else {
5678 CS_ELIF as u8
5679 });
5680 // c:1422-1426 — `if (xtok == FI) { incmdpos = 0; zshlex(); break; }`
5681 if xtok == FI {
5682 set_incmdpos(false);
5683 zshlex();
5684 break;
5685 }
5686 // c:1427 — `zshlex();`
5687 zshlex();
5688 // c:1428-1429 — `if (xtok == ELSE) break;`
5689 if xtok == ELSE {
5690 break;
5691 }
5692 // c:1430-1431 — `while (tok == SEPER) zshlex();`
5693 while tok() == SEPER {
5694 zshlex();
5695 }
5696 // c:1432-1435 — `if (!(xtok == IF || xtok == ELIF)) { cmdpop(); YYERRORV; }`
5697 if !(xtok == IF || xtok == ELIF) {
5698 cmdpop();
5699 zerr("par_if: expected `if` or `elif`");
5700 return;
5701 }
5702 // c:1436 — `pp = ecadd(0);`
5703 pp = ecadd(0);
5704 // c:1437 — `type = (xtok == IF ? WC_IF_IF : WC_IF_ELIF);`
5705 r#type = if xtok == IF { WC_IF_IF } else { WC_IF_ELIF };
5706 // c:1438 — `par_save_list(cmplx);` — condition body
5707 par_save_list_wordcode(cmplx);
5708 // c:1439 — `incmdpos = 1;`
5709 set_incmdpos(true);
5710 // c:1440-1443 — `if (tok == ENDINPUT) { cmdpop(); YYERRORV; }`
5711 if tok() == ENDINPUT {
5712 cmdpop();
5713 zerr("par_if: unexpected end-of-input after condition");
5714 return;
5715 }
5716 // c:1444-1445 — `while (tok == SEPER) zshlex();`
5717 while tok() == SEPER {
5718 zshlex();
5719 }
5720 // c:1446 — `xtok = FI;` — pre-set so the post-loop check works
5721 xtok = FI;
5722 // c:1447 — `nc = cmdstack[cmdsp - 1] == CS_IF ? CS_IFTHEN : CS_ELIFTHEN;`
5723 // (Not tracked separately in zshrs cmdstack — derive from cur top
5724 // by reading CMDSTACK; for safety use CS_IFTHEN as default.)
5725 // We don't have a way to read top easily — match by tracking
5726 // whether we just pushed CS_IF or CS_ELIF.
5727 // For wordcode emission this only affects cmdstack debug output;
5728 // not the emitted wordcode. Use CS_IFTHEN.
5729 let nc_local: u8 = CS_IFTHEN as u8;
5730 if tok() == THEN {
5731 // c:1448-1456 — THEN branch
5732 // c:1449 — `usebrace = 0;`
5733 usebrace = 0;
5734 // c:1450 — `cmdpop();`
5735 cmdpop();
5736 // c:1451 — `cmdpush(nc);`
5737 cmdpush(nc_local);
5738 // c:1452 — `zshlex();`
5739 zshlex();
5740 // c:1453 — `par_save_list(cmplx);` — then body
5741 par_save_list_wordcode(cmplx);
5742 // c:1454 — `ecbuf[pp] = WCB_IF(type, ecused - 1 - pp);`
5743 let used = ECUSED.get() as usize;
5744 ECBUF.with_borrow_mut(|b| {
5745 b[pp] = WCB_IF(r#type, (used.saturating_sub(1 + pp)) as wordcode);
5746 });
5747 // c:1455 — `incmdpos = 1;`
5748 set_incmdpos(true);
5749 // c:1456 — `cmdpop();`
5750 cmdpop();
5751 } else if tok() == INBRACE_TOK {
5752 // c:1457-1473 — INBRACE branch
5753 // c:1458 — `usebrace = 1;`
5754 usebrace = 1;
5755 // c:1459 — `cmdpop();`
5756 cmdpop();
5757 // c:1460 — `cmdpush(nc);`
5758 cmdpush(nc_local);
5759 // c:1461 — `zshlex();`
5760 zshlex();
5761 // c:1462 — `par_save_list(cmplx);`
5762 par_save_list_wordcode(cmplx);
5763 // c:1463-1466 — `if (tok != OUTBRACE) { cmdpop(); YYERRORV; }`
5764 if tok() != OUTBRACE_TOK {
5765 cmdpop();
5766 zerr("par_if: expected `}`");
5767 return;
5768 }
5769 // c:1467 — `ecbuf[pp] = WCB_IF(type, ecused - 1 - pp);`
5770 let used = ECUSED.get() as usize;
5771 ECBUF.with_borrow_mut(|b| {
5772 b[pp] = WCB_IF(r#type, (used.saturating_sub(1 + pp)) as wordcode);
5773 });
5774 // c:1469 — `zshlex();`
5775 zshlex();
5776 // c:1470 — `incmdpos = 1;`
5777 set_incmdpos(true);
5778 // c:1471-1472 — `if (tok == SEPER) break;`
5779 if tok() == SEPER {
5780 break;
5781 }
5782 // c:1473 — `cmdpop();`
5783 cmdpop();
5784 } else if unset(SHORTLOOPS) {
5785 // c:1474-1476 — `cmdpop(); YYERRORV;`
5786 cmdpop();
5787 zerr("par_if: short body requires SHORTLOOPS");
5788 return;
5789 } else {
5790 // c:1477-1484 — short loop form
5791 // c:1478 — `cmdpop();`
5792 cmdpop();
5793 // c:1479 — `cmdpush(nc);`
5794 cmdpush(nc_local);
5795 // c:1480 — `par_save_list1(cmplx);`
5796 par_save_list1_wordcode(cmplx);
5797 // c:1481 — `ecbuf[pp] = WCB_IF(type, ecused - 1 - pp);`
5798 let used = ECUSED.get() as usize;
5799 ECBUF.with_borrow_mut(|b| {
5800 b[pp] = WCB_IF(r#type, (used.saturating_sub(1 + pp)) as wordcode);
5801 });
5802 // c:1482 — `incmdpos = 1;`
5803 set_incmdpos(true);
5804 // c:1483 — `break;`
5805 break;
5806 }
5807 }
5808 // c:1486 — `cmdpop();`
5809 cmdpop();
5810 // c:1487 — `if (xtok == ELSE || tok == ELSE) {`
5811 if xtok == ELSE || tok() == ELSE {
5812 // c:1488 — `pp = ecadd(0);`
5813 pp = ecadd(0);
5814 // c:1489 — `cmdpush(CS_ELSE);`
5815 cmdpush(CS_ELSE as u8);
5816 // c:1490-1491 — `while (tok == SEPER) zshlex();`
5817 while tok() == SEPER {
5818 zshlex();
5819 }
5820 // c:1492-1498 — `if (tok == INBRACE && usebrace) { ... } else { ... }`
5821 if tok() == INBRACE_TOK && usebrace != 0 {
5822 // c:1493 — `zshlex();`
5823 zshlex();
5824 // c:1494 — `par_save_list(cmplx);`
5825 par_save_list_wordcode(cmplx);
5826 // c:1495-1498 — `if (tok != OUTBRACE) { cmdpop(); YYERRORV; }`
5827 if tok() != OUTBRACE_TOK {
5828 cmdpop();
5829 zerr("par_if: else expected `}`");
5830 return;
5831 }
5832 } else {
5833 // c:1500 — `par_save_list(cmplx);`
5834 par_save_list_wordcode(cmplx);
5835 // c:1501-1504 — `if (tok != FI) { cmdpop(); YYERRORV; }`
5836 if tok() != FI {
5837 cmdpop();
5838 zerr("par_if: else expected `fi`");
5839 return;
5840 }
5841 }
5842 // c:1506 — `incmdpos = 0;`
5843 set_incmdpos(false);
5844 // c:1507 — `ecbuf[pp] = WCB_IF(WC_IF_ELSE, ecused - 1 - pp);`
5845 let used = ECUSED.get() as usize;
5846 ECBUF.with_borrow_mut(|b| {
5847 b[pp] = WCB_IF(WC_IF_ELSE, (used.saturating_sub(1 + pp)) as wordcode);
5848 });
5849 // c:1508 — `zshlex();`
5850 zshlex();
5851 // c:1509 — `cmdpop();`
5852 cmdpop();
5853 }
5854 // c:1511 — `ecbuf[p] = WCB_IF(WC_IF_HEAD, ecused - 1 - p);`
5855 let used = ECUSED.get() as usize;
5856 ECBUF.with_borrow_mut(|b| {
5857 b[p] = WCB_IF(WC_IF_HEAD, (used.saturating_sub(1 + p)) as wordcode);
5858 });
5859}
5860
5861/// Port of `par_while(int *cmplx)` from `Src/parse.c:1520-1557`.
5862pub fn par_while_wordcode(cmplx: &mut i32) {
5863 // c:1523 — `int oecused = ecused, p;`
5864 let _oecused = ECUSED.get() as usize;
5865 let p: usize;
5866 // c:1524 — `int type = (tok == UNTIL ? WC_WHILE_UNTIL : WC_WHILE_WHILE);`
5867 let r#type: wordcode = if tok() == UNTIL {
5868 WC_WHILE_UNTIL
5869 } else {
5870 WC_WHILE_WHILE
5871 };
5872
5873 // c:1526 — `p = ecadd(0);`
5874 p = ecadd(0);
5875 // c:1527 — `zshlex();`
5876 zshlex();
5877 // c:1528 — `par_save_list(cmplx);` — condition.
5878 par_save_list_wordcode(cmplx);
5879 // c:1529 — `incmdpos = 1;`
5880 set_incmdpos(true);
5881 // c:1530-1531 — `while (tok == SEPER) zshlex();`
5882 while tok() == SEPER {
5883 zshlex();
5884 }
5885 // c:1532-1545 — body dispatch (inlined in C; we factor via
5886 // par_loop_body_wordcode since for/while/repeat share this
5887 // identical block).
5888 if tok() == DOLOOP {
5889 // c:1533 — `zshlex();`
5890 zshlex();
5891 // c:1534 — `par_save_list(cmplx);`
5892 par_save_list_wordcode(cmplx);
5893 // c:1535-1536 — `if (tok != DONE) YYERRORV(oecused);`
5894 if tok() != DONE {
5895 zerr("par_while: expected `done`");
5896 return;
5897 }
5898 // c:1537 — `incmdpos = 0;`
5899 set_incmdpos(false);
5900 // c:1538 — `zshlex();`
5901 zshlex();
5902 } else if tok() == INBRACE_TOK {
5903 // c:1540 — `zshlex();`
5904 zshlex();
5905 // c:1541 — `par_save_list(cmplx);`
5906 par_save_list_wordcode(cmplx);
5907 // c:1542-1543 — `if (tok != OUTBRACE) YYERRORV(oecused);`
5908 if tok() != OUTBRACE_TOK {
5909 zerr("par_while: expected `}`");
5910 return;
5911 }
5912 // c:1544 — `incmdpos = 0;`
5913 set_incmdpos(false);
5914 // c:1545 — `zshlex();`
5915 zshlex();
5916 } else if isset(CSHJUNKIELOOPS) {
5917 // c:1546-1550
5918 par_save_list_wordcode(cmplx);
5919 if tok() != ZEND {
5920 zerr("par_while: expected `end`");
5921 return;
5922 }
5923 zshlex();
5924 } else if unset(SHORTLOOPS) {
5925 // c:1551-1552 — `YYERRORV(oecused);`
5926 zerr("par_while: short body requires SHORTLOOPS");
5927 return;
5928 } else {
5929 // c:1554 — `par_save_list1(cmplx);`
5930 par_save_list1_wordcode(cmplx);
5931 }
5932
5933 // c:1556 — `ecbuf[p] = WCB_WHILE(type, ecused - 1 - p);`
5934 let used = ECUSED.get() as usize;
5935 ECBUF.with_borrow_mut(|b| {
5936 b[p] = WCB_WHILE(r#type, (used.saturating_sub(1 + p)) as wordcode);
5937 });
5938}
5939
5940/// `until` shares par_while body — tok==UNTIL flips the type.
5941pub fn par_until_wordcode(cmplx: &mut i32) {
5942 par_while_wordcode(cmplx);
5943}
5944
5945/// Port of `par_repeat(int *cmplx)` from `Src/parse.c:1564-1606`.
5946pub fn par_repeat_wordcode(cmplx: &mut i32) {
5947 // c:1567 — `/* ### what to do about inrepeat_ here? */`
5948 // c:1568 — `int oecused = ecused, p;`
5949 let _oecused = ECUSED.get() as usize;
5950 let p: usize;
5951
5952 // c:1570 — `p = ecadd(0);`
5953 p = ecadd(0);
5954
5955 // c:1572 — `incmdpos = 0;`
5956 set_incmdpos(false);
5957 // c:1573 — `zshlex();`
5958 zshlex();
5959 // c:1574-1575 — `if (tok != STRING) YYERRORV(oecused);`
5960 if tok() != STRING_LEX {
5961 zerr("par_repeat: expected count");
5962 return;
5963 }
5964 // c:1576 — `ecstr(tokstr);`
5965 ecstr(&tokstr().unwrap_or_default());
5966 // c:1577 — `incmdpos = 1;`
5967 set_incmdpos(true);
5968 // c:1578 — `zshlex();`
5969 zshlex();
5970 // c:1579-1580 — `while (tok == SEPER) zshlex();`
5971 while tok() == SEPER {
5972 zshlex();
5973 }
5974 // c:1581-1604 — body dispatch (inlined here matching C exactly).
5975 if tok() == DOLOOP {
5976 // c:1582-1587
5977 zshlex();
5978 par_save_list_wordcode(cmplx);
5979 if tok() != DONE {
5980 zerr("par_repeat: expected `done`");
5981 return;
5982 }
5983 set_incmdpos(false);
5984 zshlex();
5985 } else if tok() == INBRACE_TOK {
5986 // c:1589-1594
5987 zshlex();
5988 par_save_list_wordcode(cmplx);
5989 if tok() != OUTBRACE_TOK {
5990 zerr("par_repeat: expected `}`");
5991 return;
5992 }
5993 set_incmdpos(false);
5994 zshlex();
5995 } else if isset(CSHJUNKIELOOPS) {
5996 // c:1596-1599
5997 par_save_list_wordcode(cmplx);
5998 if tok() != ZEND {
5999 zerr("par_repeat: expected `end`");
6000 return;
6001 }
6002 zshlex();
6003 } else if unset(SHORTLOOPS) && unset(SHORTREPEAT) {
6004 // c:1601-1602 — par_repeat needs BOTH SHORTLOOPS and SHORTREPEAT
6005 // unset to refuse short form (more permissive than par_while).
6006 zerr("par_repeat: short body requires SHORTLOOPS or SHORTREPEAT");
6007 return;
6008 } else {
6009 // c:1604 — `par_save_list1(cmplx);`
6010 par_save_list1_wordcode(cmplx);
6011 }
6012
6013 // c:1606 — `ecbuf[p] = WCB_REPEAT(ecused - 1 - p);`
6014 let used = ECUSED.get() as usize;
6015 ECBUF.with_borrow_mut(|b| {
6016 b[p] = WCB_REPEAT((used.saturating_sub(1 + p)) as wordcode);
6017 });
6018}
6019
6020/// Port of `par_funcdef(int *cmplx)` from `Src/parse.c:1672-1779`.
6021///
6022/// The `function NAME { ... }` form. Emits a WCB_FUNCDEF header
6023/// followed by a names-count slot, the names themselves, four
6024/// metadata slots (string-area start, string-area length, npats,
6025/// do_tracing), then the body wordcode, then WCB_END.
6026///
6027/// Critical: saves/resets `ecnpats` + `ecssub` + `ecsoffs` around
6028/// the body parse so per-function pattern counts don't leak into
6029/// the enclosing scope's `ecnpats` accumulator (parse.c:1723-1758).
6030pub fn par_funcdef_wordcode(cmplx: &mut i32) {
6031 // c:1674 — `int oecused = ecused, num = 0, onp, p, c = 0;`
6032 let _oecused = ECUSED.get() as usize;
6033 let mut num: i32 = 0;
6034 let onp: i32;
6035 let p: usize;
6036 let mut c: i32 = 0;
6037 // c:1675 — `int so, oecssub = ecssub;`
6038 let so: i32;
6039 let oecssub = ECSSUB.get();
6040 // c:1676 — `zlong oldlineno = lineno;`
6041 let oldlineno = lineno();
6042 // c:1677 — `int do_tracing = 0;`
6043 let mut do_tracing: i32 = 0;
6044
6045 // c:1679 — `lineno = 0;`
6046 set_lineno(0);
6047 // c:1680 — `nocorrect = 1;`
6048 set_nocorrect(1);
6049 // c:1681 — `incmdpos = 0;`
6050 set_incmdpos(false);
6051 // c:1682 — `zshlex();`
6052 zshlex();
6053
6054 // c:1684 — `p = ecadd(0);`
6055 p = ecadd(0);
6056 // c:1685 — `ecadd(0); /* p + 1 */`
6057 let p1 = ecadd(0);
6058
6059 // c:1687-1699 — `Consume an initial (-T), (--), or (-T --).`
6060 // c:1690 — `if (tok == STRING && tokstr[0] == Dash) {`
6061 if tok() == STRING_LEX {
6062 let s = tokstr().unwrap_or_default();
6063 let bytes = s.as_bytes();
6064 // C: `tokstr[0] == Dash` (Dash = 0x9b = 0xc2 0x9b in UTF-8).
6065 // First byte of UTF-8 `\u{9b}` is 0xc2; the char `'-'` is 0x2d.
6066 // Match either form.
6067 let first_is_dash = (bytes.len() >= 2 && bytes[0] == 0xc2 && bytes[1] == 0x9b)
6068 || (bytes.len() >= 1 && bytes[0] == b'-');
6069 if first_is_dash {
6070 // c:1691-1694 — `if (tokstr[1] == 'T' && !tokstr[2]) { ++do_tracing; zshlex(); }`
6071 // After the leading dash byte(s), check remaining bytes.
6072 let after_dash = if bytes.len() >= 2 && bytes[0] == 0xc2 && bytes[1] == 0x9b {
6073 &bytes[2..]
6074 } else {
6075 &bytes[1..]
6076 };
6077 if after_dash.len() == 1 && after_dash[0] == b'T' {
6078 do_tracing += 1;
6079 zshlex();
6080 }
6081 // c:1695-1698 — `if (tok == STRING && tokstr[0] == Dash &&
6082 // tokstr[1] == Dash && !tokstr[2]) zshlex();`
6083 if tok() == STRING_LEX {
6084 let s2 = tokstr().unwrap_or_default();
6085 let b2 = s2.as_bytes();
6086 let mut idx = 0;
6087 let mut dashes = 0;
6088 while idx < b2.len() && dashes < 2 {
6089 if b2[idx] == 0xc2 && idx + 1 < b2.len() && b2[idx + 1] == 0x9b {
6090 idx += 2;
6091 dashes += 1;
6092 } else if b2[idx] == b'-' {
6093 idx += 1;
6094 dashes += 1;
6095 } else {
6096 break;
6097 }
6098 }
6099 if dashes == 2 && idx == b2.len() {
6100 zshlex();
6101 }
6102 }
6103 }
6104 }
6105
6106 // c:1701-1709 — names loop.
6107 // `while (tok == STRING) { if ((*tokstr == Inbrace || *tokstr == '{')
6108 // && !tokstr[1]) { tok = INBRACE; break; } ecstr(tokstr); num++; zshlex(); }`
6109 while tok() == STRING_LEX {
6110 let s = tokstr().unwrap_or_default();
6111 let bytes = s.as_bytes();
6112 // First byte tests for Inbrace marker (0x8f → UTF-8 `0xc2 0x8f`) or `{`,
6113 // and length-1 check (`!tokstr[1]`).
6114 let is_inbrace_only = (bytes.len() == 1 && bytes[0] == b'{')
6115 || (bytes.len() == 2 && bytes[0] == 0xc2 && bytes[1] == 0x8f);
6116 if is_inbrace_only {
6117 set_tok(INBRACE_TOK);
6118 break;
6119 }
6120 ecstr(&s);
6121 num += 1;
6122 zshlex();
6123 }
6124
6125 // c:1711-1714 — four metadata placeholder slots.
6126 let m2 = ecadd(0);
6127 let m3 = ecadd(0);
6128 let m4 = ecadd(0);
6129 let m5 = ecadd(0);
6130
6131 // c:1716 — `nocorrect = 0;`
6132 set_nocorrect(0);
6133 // c:1717 — `incmdpos = 1;`
6134 set_incmdpos(true);
6135 // c:1718-1719 — `if (tok == INOUTPAR) zshlex();`
6136 if tok() == INOUTPAR {
6137 zshlex();
6138 }
6139 // c:1720-1721 — `while (tok == SEPER) zshlex();`
6140 while tok() == SEPER {
6141 zshlex();
6142 }
6143
6144 // c:1723 — `ecnfunc++;`
6145 ECNFUNC.set(ECNFUNC.get() + 1);
6146 // c:1724 — `ecssub = so = ecsoffs;`
6147 so = ECSOFFS.get();
6148 ECSSUB.set(so);
6149 // c:1725 — `onp = ecnpats;`
6150 onp = ECNPATS.with(|cc| cc.get());
6151 // c:1726 — `ecnpats = 0;`
6152 ECNPATS.with(|cc| cc.set(0));
6153
6154 // c:1728 — `if (tok == INBRACE) {`
6155 if tok() == INBRACE_TOK {
6156 // c:1729 — `zshlex();`
6157 zshlex();
6158 // c:1730 — `par_list(&c);`
6159 par_list_wordcode(&mut c);
6160 // c:1731-1736 — `if (tok != OUTBRACE) { lineno += oldlineno; ... }`
6161 if tok() != OUTBRACE_TOK {
6162 set_lineno(lineno() + oldlineno);
6163 ECNPATS.with(|cc| cc.set(onp));
6164 ECSSUB.set(oecssub);
6165 zerr("par_funcdef: expected `}`");
6166 return;
6167 }
6168 // c:1737-1740 — `if (num == 0) { incmdpos = 0; }`
6169 if num == 0 {
6170 set_incmdpos(false);
6171 }
6172 // c:1741 — `zshlex();`
6173 zshlex();
6174 } else if unset(SHORTLOOPS) {
6175 // c:1742-1746 — `lineno += oldlineno; ecnpats = onp; ecssub = oecssub; YYERRORV`
6176 set_lineno(lineno() + oldlineno);
6177 ECNPATS.with(|cc| cc.set(onp));
6178 ECSSUB.set(oecssub);
6179 zerr("par_funcdef: short body requires SHORTLOOPS");
6180 return;
6181 } else {
6182 // c:1748 — `par_list1(&c);`
6183 par_list1_wordcode(&mut c);
6184 }
6185
6186 // c:1750 — `ecadd(WCB_END());`
6187 ecadd(WCB_END());
6188 // c:1751-1754 — fill the 4 metadata slots
6189 let cur_sofs = ECSOFFS.get();
6190 let body_npats = ECNPATS.with(|cc| cc.get());
6191 ECBUF.with_borrow_mut(|b| {
6192 b[m2] = (so - oecssub) as wordcode;
6193 b[m3] = (cur_sofs - so) as wordcode;
6194 b[m4] = body_npats as wordcode;
6195 b[m5] = do_tracing as wordcode;
6196 });
6197 // c:1755 — `ecbuf[p + 1] = num;`
6198 ECBUF.with_borrow_mut(|b| {
6199 b[p1] = num as wordcode;
6200 });
6201
6202 // c:1757 — `ecnpats = onp;`
6203 ECNPATS.with(|cc| cc.set(onp));
6204 // c:1758 — `ecssub = oecssub;`
6205 ECSSUB.set(oecssub);
6206 // c:1759 — `ecnfunc++;`
6207 ECNFUNC.set(ECNFUNC.get() + 1);
6208
6209 // c:1761 — `ecbuf[p] = WCB_FUNCDEF(ecused - 1 - p);`
6210 let used = ECUSED.get() as usize;
6211 ECBUF.with_borrow_mut(|b| {
6212 b[p] = WCB_FUNCDEF((used.saturating_sub(1 + p)) as wordcode);
6213 });
6214
6215 // c:1763-1777 — anonymous-function trailing args (num == 0 case).
6216 if num == 0 {
6217 // c:1766 — `int parg = ecadd(0);`
6218 let parg = ecadd(0);
6219 // c:1767 — `ecadd(0);`
6220 ecadd(0);
6221 // c:1768-1772 — `while (tok == STRING) { ecstr(tokstr); num++; zshlex(); }`
6222 while tok() == STRING_LEX {
6223 ecstr(&tokstr().unwrap_or_default());
6224 num += 1;
6225 zshlex();
6226 }
6227 // c:1773-1774 — `if (num > 0) *cmplx = 1;`
6228 if num > 0 {
6229 *cmplx = 1;
6230 }
6231 // c:1775 — `ecbuf[parg] = ecused - parg;`
6232 // c:1776 — `ecbuf[parg+1] = num;`
6233 let used2 = ECUSED.get() as usize;
6234 ECBUF.with_borrow_mut(|b| {
6235 b[parg] = (used2 - parg) as wordcode;
6236 b[parg + 1] = num as wordcode;
6237 });
6238 }
6239 // c:1778 — `lineno += oldlineno;`
6240 set_lineno(lineno() + oldlineno);
6241}
6242
6243/// Size of `struct fdhead` in `wordcode` (u32) units. Used by all
6244/// the header-walk macros below.
6245pub const FDHEAD_WORDS: usize = size_of::<fdhead>() / 4;
6246
6247/// `Src/parse.c:1619-1665`. Handles both `(...)` subshell and
6248/// `{...}` brace group (cursh) plus optional `always { ... }`
6249/// trailing block. C uses a single function with `zsh_construct=1`
6250/// for `{...}` and 0 for `(...)`.
6251pub fn par_subsh_wordcode(cmplx: &mut i32, zsh_construct: i32) {
6252 // c:1621 — `enum lextok otok = tok;`
6253 let otok = tok();
6254 // c:1622 — `int oecused = ecused, p, pp;`
6255 let _oecused = ECUSED.get() as usize;
6256 let p: usize;
6257 let pp: usize;
6258
6259 // c:1624 — `p = ecadd(0);`
6260 p = ecadd(0);
6261 // c:1625 — `/* Extra word only needed for always block */`
6262 // c:1626 — `pp = ecadd(0);`
6263 pp = ecadd(0);
6264 // c:1627 — `zshlex();`
6265 zshlex();
6266 // c:1628 — `par_list(cmplx);`
6267 par_list_wordcode(cmplx);
6268 // c:1629 — `ecadd(WCB_END());`
6269 ecadd(WCB_END());
6270 // c:1630-1631 — `if (tok != ((otok == INPAR) ? OUTPAR : OUTBRACE))
6271 // YYERRORV(oecused);`
6272 if tok()
6273 != (if otok == INPAR_TOK {
6274 OUTPAR_TOK
6275 } else {
6276 OUTBRACE_TOK
6277 })
6278 {
6279 zerr("par_subsh: missing closing token");
6280 return;
6281 }
6282 // c:1632 — `incmdpos = !zsh_construct;`
6283 set_incmdpos(zsh_construct == 0);
6284 // c:1633 — `zshlex();`
6285 zshlex();
6286
6287 // c:1635 — `/* Optional always block. No intervening SEPERs allowed. */`
6288 // c:1636 — `if (otok == INBRACE && tok == STRING && !strcmp(tokstr, "always")) {`
6289 if otok == INBRACE_TOK && tok() == STRING_LEX && tokstr().as_deref() == Some("always") {
6290 // c:1637 — `ecbuf[pp] = WCB_TRY(ecused - 1 - pp);`
6291 let used = ECUSED.get() as usize;
6292 ECBUF.with_borrow_mut(|b| {
6293 b[pp] = WCB_TRY((used.saturating_sub(1 + pp)) as wordcode);
6294 });
6295 // c:1638 — `incmdpos = 1;`
6296 set_incmdpos(true);
6297 // c:1639-1641 — `do { zshlex(); } while (tok == SEPER);`
6298 loop {
6299 zshlex();
6300 if tok() != SEPER {
6301 break;
6302 }
6303 }
6304
6305 // c:1643-1644 — `if (tok != INBRACE) YYERRORV(oecused);`
6306 if tok() != INBRACE_TOK {
6307 zerr("par_subsh: 'always' expects `{`");
6308 return;
6309 }
6310 // c:1645 — `cmdpop();`
6311 cmdpop();
6312 // c:1646 — `cmdpush(CS_ALWAYS);`
6313 cmdpush(CS_ALWAYS as u8);
6314
6315 // c:1648 — `zshlex();`
6316 zshlex();
6317 // c:1649 — `par_save_list(cmplx);`
6318 par_save_list_wordcode(cmplx);
6319 // c:1650-1651 — `while (tok == SEPER) zshlex();`
6320 while tok() == SEPER {
6321 zshlex();
6322 }
6323
6324 // c:1653 — `incmdpos = 1;`
6325 set_incmdpos(true);
6326
6327 // c:1655-1656 — `if (tok != OUTBRACE) YYERRORV(oecused);`
6328 if tok() != OUTBRACE_TOK {
6329 zerr("par_subsh: 'always' block missing `}`");
6330 return;
6331 }
6332 // c:1657 — `zshlex();`
6333 zshlex();
6334 // c:1658 — `ecbuf[p] = WCB_TRY(ecused - 1 - p);`
6335 let used = ECUSED.get() as usize;
6336 ECBUF.with_borrow_mut(|b| {
6337 b[p] = WCB_TRY((used.saturating_sub(1 + p)) as wordcode);
6338 });
6339 } else {
6340 // c:1660-1661 — `ecbuf[p] = (otok == INPAR ? WCB_SUBSH(...) : WCB_CURSH(...));`
6341 let used = ECUSED.get() as usize;
6342 let off = used.saturating_sub(1 + p);
6343 ECBUF.with_borrow_mut(|b| {
6344 b[p] = if otok == INPAR_TOK {
6345 WCB_SUBSH(off as wordcode)
6346 } else {
6347 WCB_CURSH(off as wordcode)
6348 };
6349 });
6350 }
6351}
6352
6353/// Port of `par_time(void)` from `Src/parse.c:1787`. `time PIPE`
6354/// emits WCB_TIMED(WC_TIMED_PIPE) + the sublist code; bare `time`
6355/// with no pipeline emits WCB_TIMED(WC_TIMED_EMPTY).
6356pub fn par_time_wordcode() {
6357 // c:1791 — `zshlex();`
6358 zshlex();
6359 // c:1793-1794 — `p = ecadd(0); ecadd(0);`
6360 let p = ecadd(0);
6361 ecadd(0);
6362 // c:1795 — `if ((f = par_sublist2(&c)) < 0)`
6363 let mut c = 0i32;
6364 let f = par_sublist2(&mut c);
6365 match f {
6366 Some(flags) => {
6367 // c:1799 — `ecbuf[p] = WCB_TIMED(WC_TIMED_PIPE);`
6368 ECBUF.with_borrow_mut(|b| {
6369 if p < b.len() {
6370 b[p] = WCB_TIMED(WC_TIMED_PIPE);
6371 }
6372 });
6373 // c:1800 — `set_sublist_code(p+1, WC_SUBLIST_END, f,
6374 // ecused-2-p, c);`
6375 let used = ECUSED.get() as usize;
6376 let skip = used.saturating_sub(2 + p) as i32;
6377 set_sublist_code(p + 1, WC_SUBLIST_END as i32, flags, skip, c != 0);
6378 }
6379 None => {
6380 // c:1796-1798 — `ecused--; ecbuf[p] = WCB_TIMED(WC_TIMED_EMPTY);`
6381 ECUSED.set((ECUSED.get() - 1).max(0));
6382 ECBUF.with_borrow_mut(|b| {
6383 if p < b.len() {
6384 b[p] = WCB_TIMED(WC_TIMED_EMPTY);
6385 }
6386 });
6387 }
6388 }
6389}
6390
6391/// Port of `par_dinbrack(void)` from `Src/parse.c:1810`. Wraps
6392/// `par_cond` (the cond-expression emitter at parse.c:2409) with
6393/// the `[[ ... ]]` framing: incond/incmdpos toggles + DOUTBRACK
6394/// expectation.
6395pub fn par_cond_wordcode() {
6396 let oecused = ECUSED.get();
6397 // c:1814 — `incond = 1;`
6398 set_incond(1);
6399 // c:1815 — `incmdpos = 0;`
6400 set_incmdpos(false);
6401 // c:1816 — `zshlex();` past `[[`.
6402 zshlex();
6403 // c:1817 — `par_cond();` — call the no-skip cond-expression
6404 // entry that EMITS WORDCODE (par_cond_top → par_cond_1 →
6405 // par_cond_2 → par_cond_double/triple/multi). NOT the AST
6406 // `par_cond` at parse.rs:4644 which is a misnamed `par_dinbrack`
6407 // that skips `[[` AND `]]` and returns a ZshCommand AST node
6408 // instead of pushing WC_COND opcodes. NOT `parse_cond_expr`
6409 // either — that's also AST-only, returning ZshCond. With
6410 // `parse_cond_expr` here, every `[[ ... ]]` test produced ZERO
6411 // wordcode payload and parity dropped ~148 words on /etc/zshrc.
6412 let _ = par_cond_top();
6413 // c:1818-1819 — `if (tok != DOUTBRACK) YYERRORV(oecused);`
6414 if tok() != DOUTBRACK {
6415 let _ = oecused;
6416 zerr("missing ]]");
6417 return;
6418 }
6419 // c:1820 — `incond = 0;`
6420 set_incond(0);
6421 // c:1821 — `incmdpos = 1;`
6422 set_incmdpos(true);
6423 // c:1822 — `zshlex();` past `]]`.
6424 zshlex();
6425}
6426
6427/// Port of the `case DINPAR:` arm of `par_cmd` from
6428/// `Src/parse.c:1031-1034`:
6429/// ```c
6430/// ecadd(WCB_ARITH());
6431/// ecstr(tokstr);
6432/// zshlex();
6433/// ```
6434/// `(( EXPR ))` arithmetic at command position — emits the ARITH
6435/// opcode followed by the interned EXPR string, then advances past
6436/// the DINPAR token (which already carries the body text).
6437pub fn par_arith_wordcode() {
6438 // c:1032 — `ecadd(WCB_ARITH());`
6439 ecadd(WCB_ARITH());
6440 // c:1033 — `ecstr(tokstr);` — interns the expression string and
6441 // appends its strcode index to the wordcode buffer.
6442 let expr = tokstr().unwrap_or_default();
6443 ecstr(&expr);
6444 // c:1034 — `zshlex();`
6445 zshlex();
6446}
6447
6448/// Port of `par_simple(int *cmplx, int nr)` from
6449/// `Src/parse.c:1836-2227`. Emits WC_SIMPLE + word count +
6450/// interned string offsets. Returns `0` when nothing was emitted,
6451/// otherwise `1 + (number of code words consumed by redirections)`.
6452/// The full C body handles assignments (ENVSTRING/ENVARRAY),
6453/// inline `{var}>file` brace-FDs, prefix modifiers (NOCORRECT etc),
6454/// and `name() { body }` funcdef detection — those paths are
6455/// progressively wired into the AST parser; this wordcode-emitter
6456/// covers the simple `cmd args...` case + interleaved redirs.
6457pub fn par_simple_wordcode(cmplx: &mut i32, mut nr: i32) -> i32 {
6458 // c:1838-1841 — `int oecused = ecused, isnull = 1, r, argc = 0,
6459 // p, isfunc = 0, sr = 0;`
6460 // `int c = *cmplx, nrediradd, assignments = 0, ppost = 0,
6461 // is_typeset = 0;`
6462 // c is the SAVED initial cmplx so INOUTPAR can restore via
6463 // `*cmplx = c;` at c:2070.
6464 let _oecused = ECUSED.get() as usize;
6465 let c_saved = *cmplx;
6466 let mut isnull = true;
6467 let mut argc: u32 = 0;
6468 let mut sr: i32 = 0;
6469 let mut assignments = false;
6470 let mut isfunc = false;
6471
6472 // c:1843 — `r = ecused;` — saves the offset where redirs get
6473 // INSERTED (via ecispace). Each redir shifts later words DOWN
6474 // by ncodes, so the SIMPLE placeholder at `p` (set later) must
6475 // also bump by ncodes when a redir lands. C uses `&r` to pass
6476 // the cursor by reference; Rust uses a mutable local + manual
6477 // bumps after each par_redir_wordcode call.
6478 let mut r: usize = ECUSED.get() as usize;
6479
6480 // c:1844-1919 — pre-cmd loop: NOCORRECT, ENVSTRING (scalar
6481 // assigns), ENVARRAY (array assigns), IS_REDIROP. Loops until
6482 // a non-assignment token is seen.
6483 loop {
6484 match tok() {
6485 NOCORRECT => {
6486 // c:1846-1849
6487 *cmplx = 1;
6488 set_nocorrect(1);
6489 }
6490 ENVSTRING => {
6491 // c:1848-1898 — scalar assignment `name=value` or
6492 // `name+=value`. Emits WCB_ASSIGN(SCALAR, NEW|INC, 0)
6493 // followed by ecstr(name), ecstr(value).
6494 let raw = tokstr().unwrap_or_default();
6495 // Find first of Inbrack / '=' / '+' (the C scan at
6496 // c:1851-1853). Inside Inbrack we skipparens — i.e.
6497 // skip `name[...]` index, then continue.
6498 // c:1851-1853 — `for (ptr = tokstr; *ptr && *ptr != Inbrack
6499 // && *ptr != '=' && *ptr != '+'; ptr++); if (*ptr == Inbrack)
6500 // skipparens(Inbrack, Outbrack, &ptr);`. Walk to the first
6501 // `[`/`=`/`+`/Equals-token, then if we landed on `[`, skip
6502 // the balanced `name[index]` pair via skipparens.
6503 let bytes: Vec<char> = raw.chars().collect();
6504 let raw_str: String = bytes.iter().collect();
6505 let mut idx = 0usize;
6506 while idx < bytes.len() {
6507 let ch = bytes[idx];
6508 if ch == '\u{91}' /* Inbrack */
6509 || ch == '=' || ch == '+' || ch == '\u{8d}'
6510 /* Equals */
6511 {
6512 break;
6513 }
6514 idx += 1;
6515 }
6516 if idx < bytes.len() && bytes[idx] == '\u{91}'
6517 /* Inbrack */
6518 {
6519 // c:1855 — `skipparens(Inbrack, Outbrack, &ptr);`.
6520 let byte_off: usize = bytes[..idx].iter().map(|c| c.len_utf8()).sum();
6521 let mut cursor: &str = &raw_str[byte_off..];
6522 let _ = crate::ported::utils::skipparens('\u{91}', '\u{92}', &mut cursor);
6523 let consumed = raw_str.len() - byte_off - cursor.len();
6524 let advance_chars = raw_str[byte_off..byte_off + consumed].chars().count();
6525 idx += advance_chars;
6526 // Continue scanning for `=` / `+` after the `]`.
6527 while idx < bytes.len() {
6528 let ch = bytes[idx];
6529 if ch == '=' || ch == '+' || ch == '\u{8d}' {
6530 break;
6531 }
6532 idx += 1;
6533 }
6534 }
6535 let is_inc = idx < bytes.len() && bytes[idx] == '+';
6536 // c:1856-1858 — `if (*ptr == '+') { *ptr++ = '\0';
6537 // ecadd(WCB_ASSIGN(SCALAR, INC, 0)); } else WCB_NEW`
6538 // C nulls the `+` AT THAT POSITION then advances ptr.
6539 // `name` is bytes BEFORE the `+`, NOT including it.
6540 let name_end = idx;
6541 if is_inc {
6542 idx += 1;
6543 }
6544 let flag = if is_inc { WC_ASSIGN_INC } else { WC_ASSIGN_NEW };
6545 ecadd(WCB_ASSIGN(WC_ASSIGN_SCALAR, flag, 0));
6546 // c:1860 — `if (*ptr == '=') { *ptr = '\0'; str = ptr + 1; }
6547 // else equalsplit(tokstr, &str);`
6548 let name: String = bytes[..name_end].iter().collect();
6549 let str_off = if idx < bytes.len() && (bytes[idx] == '=' || bytes[idx] == '\u{8d}')
6550 {
6551 idx + 1
6552 } else {
6553 idx
6554 };
6555 let value: String = bytes[str_off..].iter().collect();
6556 // c:1866-1877 — scan value for `=(`/`<(`/`>(` (proc
6557 // subst); if found, bump cmplx (suppresses Z_SIMPLE).
6558 let vbytes: Vec<char> = value.chars().collect();
6559 for (i, ch) in vbytes.iter().enumerate() {
6560 if i + 1 < vbytes.len() && vbytes[i + 1] == '\u{88}'
6561 /* Inpar */
6562 {
6563 if *ch == '\u{8d}' /* Equals */
6564 || *ch == '\u{94}' /* Inang */
6565 || *ch == '\u{96}'
6566 /* OutangProc */
6567 {
6568 *cmplx = 1;
6569 break;
6570 }
6571 }
6572 }
6573 ecstr(&name);
6574 ecstr(&value);
6575 isnull = false;
6576 assignments = true;
6577 }
6578 ENVARRAY => {
6579 // c:1883-1908 — array assignment `name=( ... )` in the
6580 // pre-cmd loop (no `typeset`-style typeset_force flag).
6581 // c:1884 — `int oldcmdpos = incmdpos, n, type2;`
6582 let oldcmdpos = incmdpos();
6583 let n: u32;
6584 let type2: wordcode;
6585 let p: usize;
6586
6587 // c:1886-1889 — `array setting is cmplx because it can
6588 // contain process substitutions`
6589 // c:1890 — `*cmplx = c = 1;`
6590 *cmplx = 1;
6591 // c:1891 — `p = ecadd(0);`
6592 p = ecadd(0);
6593 // c:1892 — `incmdpos = 0;`
6594 set_incmdpos(false);
6595 // c:1893-1897 — `+=` detection: if tokstr ends in `+`,
6596 // strip the `+` and use WC_ASSIGN_INC; else WC_ASSIGN_NEW.
6597 let raw = tokstr().unwrap_or_default();
6598 let (name, t2) = if raw.ends_with('+') {
6599 (raw[..raw.len() - 1].to_string(), WC_ASSIGN_INC)
6600 } else {
6601 (raw.clone(), WC_ASSIGN_NEW)
6602 };
6603 type2 = t2;
6604 // c:1898 — `ecstr(tokstr);` (tokstr now NUL-trimmed)
6605 ecstr(&name);
6606 // c:1899 — `cmdpush(CS_ARRAY);`
6607 cmdpush(CS_ARRAY as u8);
6608 // c:1900 — `zshlex();`
6609 zshlex();
6610 // c:1901 — `n = par_nl_wordlist();`
6611 n = par_nl_wordlist_wordcode();
6612 // c:1902 — `ecbuf[p] = WCB_ASSIGN(WC_ASSIGN_ARRAY, type2, n);`
6613 ECBUF.with_borrow_mut(|b| {
6614 b[p] = WCB_ASSIGN(WC_ASSIGN_ARRAY, type2, n);
6615 });
6616 // c:1903 — `cmdpop();`
6617 cmdpop();
6618 // c:1904-1905 — `if (tok != OUTPAR) YYERROR(oecused);`
6619 if tok() != OUTPAR_TOK {
6620 zerr("par_simple: expected `)' after array assignment");
6621 return 0;
6622 }
6623 // c:1906 — `incmdpos = oldcmdpos;`
6624 set_incmdpos(oldcmdpos);
6625 // c:1907 — `isnull = 0;`
6626 isnull = false;
6627 // c:1908 — `assignments = 1;`
6628 assignments = true;
6629 }
6630 t if IS_REDIROP(t) => {
6631 // c:1900-1904 — `*cmplx = c = 1; nr += par_redir(&r,
6632 // NULL); continue;`. The wordcode-emitting redir is
6633 // distinct from the AST par_redir — it INSERTS
6634 // WCB_REDIR + fd + ecstrcode(name) at offset `r`
6635 // via ecispace, shifting any later words down.
6636 *cmplx = 1;
6637 let added = par_redir_wordcode(&mut r, None);
6638 if added == 0 {
6639 break;
6640 }
6641 nr += added;
6642 continue;
6643 }
6644 _ => break,
6645 }
6646 zshlex(); // c:1907 `zshlex();`
6647 }
6648
6649 // c:1920-1921 — `if (tok == AMPER || tok == AMPERBANG) YYERROR;`
6650 if tok() == AMPER || tok() == AMPERBANG {
6651 zerr("par_simple: unexpected &");
6652 return 0;
6653 }
6654
6655 // c:1923 — `p = ecadd(WCB_SIMPLE(0));`
6656 let mut p = ecadd(WCB_SIMPLE(0));
6657
6658 // c:1924-2105 — main words loop. is_typeset tracks whether the
6659 // outer command was `typeset`/`export`/etc. so the final
6660 // placeholder gets WCB_TYPESET instead of WCB_SIMPLE.
6661 let mut is_typeset = false;
6662 let mut postassigns: u32 = 0;
6663 let mut ppost: usize = 0;
6664 loop {
6665 match tok() {
6666 STRING_LEX | TYPESET => {
6667 // c:1926 — `int redir_var = 0;`
6668 let mut redir_var = false;
6669 // c:1928-1929 — `*cmplx = 1; incmdpos = 0;`
6670 *cmplx = 1;
6671 set_incmdpos(false);
6672 // c:1931-1932 — TYPESET → intypeset = is_typeset = 1.
6673 if tok() == TYPESET {
6674 set_intypeset(true);
6675 is_typeset = true;
6676 }
6677 let s = tokstr().unwrap_or_default();
6678 // c:1934-1974 — `{var}>file` brace-FD detection.
6679 // `if (!isset(IGNOREBRACES) && *tokstr == Inbrace)`
6680 let bytes = s.as_bytes();
6681 let first_is_inbrace = (bytes.len() >= 2 && bytes[0] == 0xc2 && bytes[1] == 0x8f)
6682 || (bytes.len() >= 1 && bytes[0] == b'{');
6683 if !isset(IGNOREBRACES) && first_is_inbrace {
6684 // c:1937-1938 — `char *eptr = tokstr + strlen(tokstr) - 1;`
6685 // `char *ptr = eptr;`
6686 // C tests `*eptr == Outbrace` (0x90 marker or `}`) AND
6687 // there's content between `{` and `}` (`ptr > tokstr + 1`).
6688 let last_two_outbrace = bytes.len() >= 2
6689 && (bytes.ends_with(&[0xc2, 0x90]) || bytes.last() == Some(&b'}'));
6690 let opener_len = if bytes.len() >= 2 && bytes[0] == 0xc2 && bytes[1] == 0x8f {
6691 2
6692 } else {
6693 1
6694 };
6695 let closer_len = if bytes.len() >= 2 && bytes.ends_with(&[0xc2, 0x90]) {
6696 2
6697 } else if bytes.last() == Some(&b'}') {
6698 1
6699 } else {
6700 0
6701 };
6702 if last_two_outbrace && bytes.len() > opener_len + closer_len {
6703 // c:1944 — `if (itype_end(tokstr+1, IIDENT, 0) >= ptr)`
6704 // Inner content is the identifier between `{` and `}`.
6705 let inner_start = opener_len;
6706 let inner_end = bytes.len() - closer_len;
6707 let inner = &s[inner_start..inner_end];
6708 if !inner.is_empty() && crate::ported::params::isident(inner) {
6709 // c:1946-1948 — `char *idstring = dupstrpfx(...);`
6710 // `redir_var = 1; zshlex();`
6711 let idstring = inner.to_string();
6712 redir_var = true;
6713 zshlex();
6714 // c:1953-1958 — `if (IS_REDIROP(tok) && tokfd == -1)
6715 // { *cmplx = c = 1; nrediradd = par_redir(&r, id);
6716 // p += nrediradd; sr += nrediradd; }`
6717 if IS_REDIROP(tok()) && tokfd() == -1 {
6718 *cmplx = 1;
6719 let nrediradd = par_redir_wordcode(&mut r, Some(&idstring));
6720 p += nrediradd as usize;
6721 sr += nrediradd;
6722 } else if postassigns > 0 {
6723 // c:1959-1966 — postassigns path: emit
6724 // WCB_ASSIGN(SCALAR, INC, 0) + name + ""
6725 postassigns += 1;
6726 ecadd(WCB_ASSIGN(WC_ASSIGN_SCALAR, WC_ASSIGN_INC, 0));
6727 ecstr(&s);
6728 ecstr("");
6729 } else {
6730 // c:1968-1972 — `else { ecstr(toksave); argc++; }`
6731 ecstr(&s);
6732 argc += 1;
6733 }
6734 }
6735 }
6736 }
6737 if !redir_var {
6738 // c:1977-1996 — normal (non-redir-var) STRING/TYPESET.
6739 if postassigns > 0 {
6740 // c:1979-1989 — typeset with bare-name arg → INC
6741 postassigns += 1;
6742 ecadd(WCB_ASSIGN(WC_ASSIGN_SCALAR, WC_ASSIGN_INC, 0));
6743 ecstr(&s);
6744 ecstr("");
6745 } else {
6746 ecstr(&s);
6747 argc += 1;
6748 }
6749 zshlex();
6750 }
6751 isnull = false;
6752 }
6753 ENVSTRING => {
6754 // c:2005-2026 — mid-cmd ENVSTRING (under intypeset
6755 // context). Emits WCB_ASSIGN(SCALAR, NEW, 0) then
6756 // ecstr(name) + ecstr(value), tracking the first
6757 // postassign offset in `ppost` (which the trailing
6758 // WCB_TYPESET header points to).
6759 if postassigns == 0 {
6760 ppost = ecadd(0);
6761 }
6762 postassigns += 1;
6763 // c:2010-2014 — `for (ptr = tokstr; *ptr && *ptr != Inbrack
6764 // && *ptr != '=' && *ptr != '+'; ptr++); if (*ptr == Inbrack)
6765 // skipparens(Inbrack, Outbrack, &ptr);`.
6766 let raw = tokstr().unwrap_or_default();
6767 let bytes: Vec<char> = raw.chars().collect();
6768 let mut idx = 0usize;
6769 while idx < bytes.len() {
6770 let ch = bytes[idx];
6771 if ch == '\u{91}' /* Inbrack */
6772 || ch == '=' || ch == '+' || ch == '\u{8d}'
6773 /* Equals */
6774 {
6775 break;
6776 }
6777 idx += 1;
6778 }
6779 if idx < bytes.len() && bytes[idx] == '\u{91}'
6780 /* Inbrack */
6781 {
6782 // c:2014 — `skipparens(Inbrack, Outbrack, &ptr);`.
6783 let byte_off: usize = bytes[..idx].iter().map(|c| c.len_utf8()).sum();
6784 let mut cursor: &str = &raw[byte_off..];
6785 let _ = crate::ported::utils::skipparens('\u{91}', '\u{92}', &mut cursor);
6786 let consumed = raw.len() - byte_off - cursor.len();
6787 let advance_chars = raw[byte_off..byte_off + consumed].chars().count();
6788 idx += advance_chars;
6789 while idx < bytes.len() {
6790 let ch = bytes[idx];
6791 if ch == '=' || ch == '+' || ch == '\u{8d}' {
6792 break;
6793 }
6794 idx += 1;
6795 }
6796 }
6797 let name: String = bytes[..idx].iter().collect();
6798 let str_off = if idx < bytes.len() && (bytes[idx] == '=' || bytes[idx] == '\u{8d}')
6799 {
6800 idx + 1
6801 } else {
6802 idx
6803 };
6804 let value: String = bytes[str_off..].iter().collect();
6805 ecadd(WCB_ASSIGN(WC_ASSIGN_SCALAR, WC_ASSIGN_NEW, 0));
6806 ecstr(&name);
6807 ecstr(&value);
6808 isnull = false;
6809 zshlex();
6810 }
6811 ENVARRAY => {
6812 // c:2027-2050 — mid-cmd ENVARRAY (typeset N=(…) form).
6813 // C tracks postassigns + ppost the same as ENVSTRING,
6814 // but the inner emit is WCB_ASSIGN(ARRAY, NEW, n)
6815 // with `n` patched in after par_nl_wordlist consumes
6816 // the elements. C also toggles intypeset=0 around the
6817 // wordlist so the lexer doesn't try to re-emit
6818 // assignments inside the array.
6819 *cmplx = 1;
6820 if postassigns == 0 {
6821 ppost = ecadd(0);
6822 }
6823 postassigns += 1;
6824 let parr = ecadd(0);
6825 let raw = tokstr().unwrap_or_default();
6826 let is_inc = raw.ends_with('+');
6827 let name = if is_inc {
6828 &raw[..raw.len() - 1]
6829 } else {
6830 raw.as_str()
6831 };
6832 let flag = if is_inc { WC_ASSIGN_INC } else { WC_ASSIGN_NEW };
6833 ecstr(name);
6834 cmdpush(CS_ARRAY as u8);
6835 set_intypeset(false);
6836 zshlex();
6837 // c:2044 — `n = par_nl_wordlist();` (parse.c:2379-2391).
6838 // SEPER + NEWLIN both allowed between elements.
6839 let mut nelem = 0u32;
6840 loop {
6841 let t = tok();
6842 if t != STRING_LEX && t != SEPER && t != NEWLIN {
6843 break;
6844 }
6845 if t == STRING_LEX {
6846 ecstr(&tokstr().unwrap_or_default());
6847 nelem += 1;
6848 }
6849 zshlex();
6850 }
6851 ECBUF.with_borrow_mut(|b| {
6852 if parr < b.len() {
6853 b[parr] = WCB_ASSIGN(WC_ASSIGN_ARRAY, flag, nelem);
6854 }
6855 });
6856 cmdpop();
6857 set_intypeset(true);
6858 if tok() != OUTPAR_TOK {
6859 zerr("expected `)' after array assignment");
6860 return 0;
6861 }
6862 isnull = false;
6863 zshlex();
6864 }
6865 t if IS_REDIROP(t) => {
6866 // c:1999-2010 — `nrediradd = par_redir(&r, NULL);
6867 // p += nrediradd; if (ppost) ppost += nrediradd;
6868 // sr += nrediradd;`
6869 *cmplx = 1;
6870 let added = par_redir_wordcode(&mut r, None);
6871 if added == 0 {
6872 break;
6873 }
6874 p += added as usize;
6875 if ppost != 0 {
6876 ppost += added as usize;
6877 }
6878 sr += added;
6879 }
6880 INOUTPAR => {
6881 // c:2051 — `} else if (tok == INOUTPAR) {`
6882 // c:2052 — `zlong oldlineno = lineno;`
6883 let oldlineno = lineno();
6884 // c:2053 — `int onp, so, oecssub = ecssub;`
6885 let oecssub = ECSSUB.get();
6886 // c:2055-2057 — `if (!isset(MULTIFUNCDEF) && argc > 1) YYERROR;`
6887 if !isset(MULTIFUNCDEF) && argc > 1 {
6888 zerr("par_simple: too many function names for funcdef");
6889 return 0;
6890 }
6891 // c:2058-2060 — `if (assignments || postassigns) YYERROR;`
6892 if assignments || postassigns > 0 {
6893 zerr("par_simple: assignments before funcdef");
6894 return 0;
6895 }
6896 // c:2061-2068 — hasalias check + zwarn — skipped (no
6897 // alias tracking on the wordcode path).
6898
6899 // c:2070 — `*cmplx = c;`
6900 *cmplx = c_saved;
6901 // c:2071 — `lineno = 0;`
6902 set_lineno(0);
6903 // c:2072 — `incmdpos = 1;`
6904 set_incmdpos(true);
6905 // c:2073 — `cmdpush(CS_FUNCDEF);`
6906 cmdpush(CS_FUNCDEF as u8);
6907 // c:2074 — `zshlex();`
6908 zshlex();
6909 // c:2075-2076 — `while (tok == SEPER) zshlex();`
6910 while tok() == SEPER {
6911 zshlex();
6912 }
6913 // c:2079 — `ecispace(p + 1, 1); ecbuf[p+1] = argc;
6914 // ecadd(0)*4`. Insert the argc word at p+1, then
6915 // append 4 placeholder words.
6916 ecispace(p + 1, 1);
6917 ECBUF.with_borrow_mut(|b| {
6918 if p + 1 < b.len() {
6919 b[p + 1] = argc;
6920 }
6921 });
6922 // c:2080-2083 — four metadata placeholder slots.
6923 ecadd(0);
6924 ecadd(0);
6925 ecadd(0);
6926 ecadd(0);
6927
6928 // c:2085 — `ecnfunc++;`
6929 ECNFUNC.set(ECNFUNC.get() + 1);
6930 // c:2086 — `ecssub = so = ecsoffs;`
6931 let so = ECSOFFS.get();
6932 ECSSUB.set(so);
6933 // c:2087 — `onp = ecnpats;`
6934 let onp = ECNPATS.with(|cc| cc.get());
6935 // c:2088 — `ecnpats = 0;`
6936 ECNPATS.with(|cc| cc.set(0));
6937
6938 // c:2091 — `int c = 0;` — INNER cmplx for the body
6939 // parse. Local to each branch; C's enclosing *cmplx
6940 // is NOT modified by the body.
6941 let mut body_c: i32 = 0;
6942 // c:2090 — `if (tok == INBRACE) {`
6943 if tok() == INBRACE_TOK {
6944 // c:2093 — `zshlex();`
6945 zshlex();
6946 // c:2094 — `par_list(&c);`
6947 par_list_wordcode(&mut body_c);
6948 // c:2095-2101 — `if (tok != OUTBRACE) { cmdpop();
6949 // lineno += oldlineno; ecnpats = onp;
6950 // ecssub = oecssub; YYERROR; }`
6951 if tok() != OUTBRACE_TOK {
6952 cmdpop();
6953 set_lineno(lineno() + oldlineno);
6954 ECNPATS.with(|cc| cc.set(onp));
6955 ECSSUB.set(oecssub);
6956 zerr("par_simple: funcdef expected `}`");
6957 return 0;
6958 }
6959 // c:2102-2105 — `if (argc == 0) incmdpos = 0;`
6960 if argc == 0 {
6961 set_incmdpos(false);
6962 }
6963 // c:2106 — `zshlex();`
6964 zshlex();
6965 } else {
6966 // c:2107-2132 — short-body funcdef form: `f() cmd`
6967 // or `() cmd`. Wraps single par_cmd result in a
6968 // synthetic WC_LIST / WC_SUBLIST /
6969 // WC_PIPE(WC_PIPE_END, 0) header trio.
6970 let ll = ecadd(0);
6971 let sl = ecadd(0);
6972 ecadd(WCB_PIPE(WC_PIPE_END, 0));
6973 let ok = par_cmd_wordcode(&mut body_c, if argc == 0 { 1 } else { 0 });
6974 if !ok {
6975 cmdpop();
6976 zerr("par_simple: funcdef short-body: missing command");
6977 return 0;
6978 }
6979 if argc == 0 {
6980 // c:2118-2127 — anonymous funcdef may take args
6981 // after the body; first one already read.
6982 set_incmdpos(false);
6983 }
6984 // c:2130-2131 — inner sublist/list use inner cmplx.
6985 let used = ECUSED.get() as usize;
6986 set_sublist_code(
6987 sl,
6988 WC_SUBLIST_END as i32,
6989 0,
6990 (used.saturating_sub(1 + sl)) as i32,
6991 body_c != 0,
6992 );
6993 set_list_code(ll, Z_SYNC | Z_END, body_c != 0);
6994 }
6995 let _ = body_c;
6996 // c:2133 — `cmdpop();`
6997 cmdpop();
6998
6999 // c:2135 — `ecadd(WCB_END());`
7000 ecadd(WCB_END());
7001 // c:2136-2139 — fill 4 metadata slots at p+argc+2..5
7002 let p_argc = (p + (argc as usize) + 2) as usize;
7003 let cur_so = ECSOFFS.get();
7004 let np_now = ECNPATS.with(|cc| cc.get());
7005 ECBUF.with_borrow_mut(|b| {
7006 b[p_argc] = (so - oecssub) as wordcode;
7007 b[p_argc + 1] = (cur_so - so) as wordcode;
7008 b[p_argc + 2] = np_now as wordcode;
7009 b[p_argc + 3] = 0;
7010 });
7011
7012 // c:2141-2143 — `ecnpats = onp; ecssub = oecssub; ecnfunc++;`
7013 ECNPATS.with(|cc| cc.set(onp));
7014 ECSSUB.set(oecssub);
7015 ECNFUNC.set(ECNFUNC.get() + 1);
7016
7017 // c:2145 — `ecbuf[p] = WCB_FUNCDEF(ecused - 1 - p);`
7018 let used = ECUSED.get() as usize;
7019 let header_off = used.saturating_sub(1 + p) as wordcode;
7020 ECBUF.with_borrow_mut(|b| {
7021 b[p] = WCB_FUNCDEF(header_off);
7022 });
7023
7024 // c:2147-2172 — `if (argc == 0) { /* anonymous fn args */ }`
7025 if argc == 0 {
7026 // c:2150 — `int parg = ecadd(0);`
7027 let mut parg = ecadd(0);
7028 // c:2151 — `ecadd(0);`
7029 ecadd(0);
7030 // c:2152 — `while (tok == STRING || IS_REDIROP(tok)) {`
7031 while tok() == STRING_LEX || IS_REDIROP(tok()) {
7032 if tok() == STRING_LEX {
7033 // c:2155-2157
7034 ecstr(&tokstr().unwrap_or_default());
7035 argc += 1;
7036 zshlex();
7037 } else {
7038 // c:2159-2165 — *cmplx=c=1; nrediradd=par_redir;
7039 // p += nrediradd; ppost += nrediradd if ppost;
7040 // sr += nrediradd; parg += nrediradd;
7041 *cmplx = 1;
7042 let added = par_redir_wordcode(&mut r, None);
7043 if added == 0 {
7044 break;
7045 }
7046 p += added as usize;
7047 if ppost != 0 {
7048 ppost += added as usize;
7049 }
7050 sr += added;
7051 parg += added as usize;
7052 }
7053 }
7054 // c:2168-2169 — `if (argc > 0) *cmplx = 1;`
7055 if argc > 0 {
7056 *cmplx = 1;
7057 }
7058 // c:2170 — `ecbuf[parg] = ecused - parg;`
7059 // c:2171 — `ecbuf[parg+1] = argc;`
7060 let used2 = ECUSED.get() as usize;
7061 ECBUF.with_borrow_mut(|b| {
7062 b[parg] = (used2 - parg) as wordcode;
7063 b[parg + 1] = argc;
7064 });
7065 }
7066 // c:2173 — `lineno += oldlineno;`
7067 set_lineno(lineno() + oldlineno);
7068
7069 // c:2175-2177 — `isfunc = 1; isnull = 0; break;`
7070 isfunc = true;
7071 isnull = false;
7072 break;
7073 }
7074 _ => break,
7075 }
7076 }
7077
7078 // c:2173-2176 — `if (isnull && !(sr + nr)) { ecused = oecused;
7079 // return 0; }` — undo everything including pre-cmd assignments
7080 // if no actual command word emerged.
7081 if isnull && sr + nr == 0 && !assignments {
7082 ECUSED.set(p as i32);
7083 return 0;
7084 }
7085 // c:2186-2187 — `incmdpos = 1; intypeset = 0;` — reset before
7086 // the placeholder patch so the next-token lex doesn't carry
7087 // typeset/incond state.
7088 set_incmdpos(true);
7089 set_intypeset(false);
7090 // c:2189-2199 — `if (!isfunc) { if (is_typeset) ecbuf[p] =
7091 // WCB_TYPESET(argc); else ecbuf[p] = WCB_SIMPLE(argc); }`.
7092 // When isfunc=true the INOUTPAR branch already wrote WCB_FUNCDEF
7093 // at p; do NOT clobber it.
7094 if !isfunc {
7095 let header = if is_typeset {
7096 if postassigns > 0 {
7097 ECBUF.with_borrow_mut(|b| {
7098 if ppost < b.len() {
7099 b[ppost] = postassigns;
7100 }
7101 });
7102 } else {
7103 ecadd(0);
7104 }
7105 WCB_TYPESET(argc)
7106 } else {
7107 WCB_SIMPLE(argc)
7108 };
7109 ECBUF.with_borrow_mut(|b| {
7110 if p < b.len() {
7111 b[p] = header;
7112 }
7113 });
7114 }
7115 1 + sr
7116}
7117
7118/// Port of `par_redir(int *rp, char *idstring)` from
7119/// `Src/parse.c:2229-2345` — the wordcode-emitting variant that
7120/// pushes WCB_REDIR + fd + ecstrcode(name) into ECBUF. Distinct
7121/// from the AST `par_redir` (parse.rs:3771) which builds a
7122/// ZshRedir struct for the AST executor pipeline.
7123///
7124/// Returns the number of wordcodes added (3 for the basic shape,
7125/// 4 with idstring, 5 for HEREDOC[DASH] which carries the
7126/// terminator strings inline). Returns 0 on parse error.
7127///
7128/// `idstring` mirrors C's `char *idstring` parameter — `None` =
7129/// NULL (no `{var}>file` brace-FD shape), `Some(id)` = the captured
7130/// `{var}` name. C callers without a var pass NULL inline; Rust
7131/// callers do the same with `None`.
7132fn par_redir_wordcode(rp: &mut usize, idstring: Option<&str>) -> i32 {
7133 // c:2231 — `int r = *rp, type, fd1, oldcmdpos, oldnc, ncodes;`
7134 let r: usize = *rp;
7135 let mut r#type: i32;
7136 let fd1: i32;
7137 let oldcmdpos: bool;
7138 let oldnc: i32;
7139 let mut ncodes: usize;
7140 // c:2232 — `char *name;`
7141 let name: String;
7142
7143 // c:2234 — `oldcmdpos = incmdpos;`
7144 oldcmdpos = incmdpos();
7145 // c:2235 — `incmdpos = 0;`
7146 set_incmdpos(false);
7147 // c:2236 — `oldnc = nocorrect;`
7148 oldnc = nocorrect();
7149 // c:2237-2238 — `if (tok != INANG && tok != INOUTANG) nocorrect = 1;`
7150 if tok() != INANG_TOK && tok() != INOUTANG {
7151 set_nocorrect(1);
7152 }
7153 // c:2239 — `type = redirtab[tok - OUTANG];`
7154 // Map current redirop token to redirtab index — matches order of
7155 // C `enum { OUTANG, OUTANGBANG, DOUTANG, DOUTANGBANG, INANG,
7156 // INOUTANG, DINANG, DINANGDASH, INANGAMP, OUTANGAMP, AMPOUTANG,
7157 // OUTANGAMPBANG, DOUTANGAMP, DOUTANGAMPBANG, TRINANG }`.
7158 r#type = match tok() {
7159 OUTANG_TOK => REDIR_WRITE,
7160 OUTANGBANG => REDIR_WRITENOW,
7161 DOUTANG => REDIR_APP,
7162 DOUTANGBANG => REDIR_APPNOW,
7163 INANG_TOK => REDIR_READ,
7164 INOUTANG => REDIR_READWRITE,
7165 DINANG => REDIR_HEREDOC,
7166 DINANGDASH => REDIR_HEREDOCDASH,
7167 INANGAMP => REDIR_MERGEIN,
7168 OUTANGAMP => REDIR_MERGEOUT,
7169 AMPOUTANG => REDIR_ERRWRITE,
7170 OUTANGAMPBANG => REDIR_ERRWRITENOW,
7171 DOUTANGAMP => REDIR_ERRAPP,
7172 DOUTANGAMPBANG => REDIR_ERRAPPNOW,
7173 TRINANG => REDIR_HERESTR,
7174 _ => {
7175 set_incmdpos(oldcmdpos);
7176 set_nocorrect(oldnc);
7177 return 0;
7178 }
7179 };
7180 // c:2240 — `fd1 = tokfd;`
7181 fd1 = tokfd();
7182 // c:2241 — `zshlex();`
7183 zshlex();
7184 // c:2242-2243 — `if (tok != STRING && tok != ENVSTRING) YYERROR(ecused);`
7185 if tok() != STRING_LEX && tok() != ENVSTRING {
7186 set_incmdpos(oldcmdpos);
7187 set_nocorrect(oldnc);
7188 zerr("expected word after redirection");
7189 return 0;
7190 }
7191 // c:2244 — `incmdpos = oldcmdpos;`
7192 set_incmdpos(oldcmdpos);
7193 // c:2245 — `nocorrect = oldnc;`
7194 set_nocorrect(oldnc);
7195
7196 // c:2248-2249 — `if (fd1 == -1) fd1 = IS_READFD(type) ? 0 : 1;`
7197 let fd1 = if fd1 == -1 {
7198 if is_readfd(r#type) {
7199 0
7200 } else {
7201 1
7202 }
7203 } else {
7204 fd1
7205 };
7206
7207 // c:2251 — `name = tokstr;`
7208 name = tokstr().unwrap_or_default();
7209
7210 // c:2253-2321 — switch on type:
7211 match r#type {
7212 // c:2254-2300 — REDIR_HEREDOC / REDIR_HEREDOCDASH
7213 x if x == REDIR_HEREDOC || x == REDIR_HEREDOCDASH => {
7214 // c:2257 — `struct heredocs **hd;`
7215 // c:2258 — `int htype = type;`
7216 let htype = r#type;
7217 // c:2260-2261 — `if (strchr(tokstr, '\n')) YYERROR(ecused);`
7218 if name.contains('\n') {
7219 zerr("here-doc terminator contains newline");
7220 return 0;
7221 }
7222 // c:2263-2273 — `ncodes = 5; if (idstring) { type |= MASK; ncodes = 6; }`
7223 if idstring.is_some() {
7224 r#type |= REDIR_VARID_MASK;
7225 ncodes = 6;
7226 } else {
7227 ncodes = 5;
7228 }
7229 // c:2277 — `ecispace(r, ncodes);`
7230 ecispace(r, ncodes);
7231 // c:2278 — `*rp = r + ncodes;`
7232 *rp = r + ncodes;
7233 // c:2279 — `ecbuf[r] = WCB_REDIR(type | REDIR_FROM_HEREDOC_MASK);`
7234 ECBUF.with_borrow_mut(|b| {
7235 b[r] = WCB_REDIR((r#type | REDIR_FROM_HEREDOC_MASK) as wordcode);
7236 // c:2280 — `ecbuf[r + 1] = fd1;`
7237 b[r + 1] = fd1 as wordcode;
7238 });
7239 // c:2282-2286 — r+2..4 are filled later by setheredoc.
7240 // c:2287-2288 — `if (idstring) ecbuf[r + 5] = ecstrcode(idstring);`
7241 if let Some(id) = idstring {
7242 let coded = ecstrcode(id);
7243 ECBUF.with_borrow_mut(|b| {
7244 b[r + 5] = coded;
7245 });
7246 }
7247 // c:2290-2296 — `for (hd = &hdocs; *hd; hd = &(*hd)->next);
7248 // *hd = zalloc(sizeof(struct heredocs));
7249 // (*hd)->next = NULL;
7250 // (*hd)->type = htype;
7251 // (*hd)->pc = r;
7252 // (*hd)->str = tokstr;`
7253 HDOCS.with_borrow_mut(|head| {
7254 let mut cur = head;
7255 while cur.is_some() {
7256 cur = &mut cur.as_mut().unwrap().next; // c:2290
7257 }
7258 *cur = Some(Box::new(crate::ported::zsh_h::heredocs {
7259 // c:2292-2296
7260 next: None,
7261 typ: htype,
7262 pc: r as i32,
7263 str: Some(name.clone()),
7264 }));
7265 });
7266 // c:2298 — `zshlex();`
7267 zshlex();
7268 // c:2299 — `return ncodes;`
7269 return ncodes as i32;
7270 }
7271 // c:2301-2308 — REDIR_WRITE / REDIR_WRITENOW
7272 x if x == REDIR_WRITE || x == REDIR_WRITENOW => {
7273 // c:2303-2305 — `if (tokstr[0] == OutangProc && tokstr[1] == Inpar)
7274 // type = REDIR_OUTPIPE;`
7275 let nb: Vec<char> = name.chars().collect();
7276 if nb.len() >= 2 && nb[0] == '\u{96}' && nb[1] == '\u{88}' {
7277 r#type = REDIR_OUTPIPE;
7278 } else if nb.len() >= 2 && nb[0] == '\u{94}' && nb[1] == '\u{88}' {
7279 // c:2306-2307 — `else if (tokstr[0] == Inang && tokstr[1] == Inpar) YYERROR;`
7280 zerr("par_redir: < before >");
7281 return 0;
7282 }
7283 }
7284 // c:2309-2315 — REDIR_READ
7285 x if x == REDIR_READ => {
7286 let nb: Vec<char> = name.chars().collect();
7287 if nb.len() >= 2 && nb[0] == '\u{94}' && nb[1] == '\u{88}' {
7288 r#type = REDIR_INPIPE;
7289 } else if nb.len() >= 2 && nb[0] == '\u{96}' && nb[1] == '\u{88}' {
7290 zerr("par_redir: > before <");
7291 return 0;
7292 }
7293 }
7294 // c:2316-2320 — REDIR_READWRITE
7295 x if x == REDIR_READWRITE => {
7296 let nb: Vec<char> = name.chars().collect();
7297 if nb.len() >= 2 && (nb[0] == '\u{94}' || nb[0] == '\u{96}') && nb[1] == '\u{88}' {
7298 r#type = if nb[0] == '\u{94}' {
7299 REDIR_INPIPE
7300 } else {
7301 REDIR_OUTPIPE
7302 };
7303 }
7304 }
7305 _ => {}
7306 }
7307 // c:2322 — `zshlex();`
7308 zshlex();
7309
7310 // c:2326-2333 — `if (idstring) { type |= MASK; ncodes = 4; } else ncodes = 3;`
7311 if idstring.is_some() {
7312 r#type |= REDIR_VARID_MASK;
7313 ncodes = 4;
7314 } else {
7315 ncodes = 3;
7316 }
7317
7318 // c:2334 — `ecispace(r, ncodes);`
7319 ecispace(r, ncodes);
7320 // c:2335 — `*rp = r + ncodes;`
7321 *rp = r + ncodes;
7322 // c:2336 — `ecbuf[r] = WCB_REDIR(type);`
7323 let coded_name = ecstrcode(&name);
7324 ECBUF.with_borrow_mut(|b| {
7325 b[r] = WCB_REDIR(r#type as wordcode);
7326 // c:2337 — `ecbuf[r + 1] = fd1;`
7327 b[r + 1] = fd1 as wordcode;
7328 // c:2338 — `ecbuf[r + 2] = ecstrcode(name);`
7329 b[r + 2] = coded_name;
7330 });
7331 // c:2339-2340 — `if (idstring) ecbuf[r + 3] = ecstrcode(idstring);`
7332 if let Some(id) = idstring {
7333 let coded_id = ecstrcode(id);
7334 ECBUF.with_borrow_mut(|b| {
7335 b[r + 3] = coded_id;
7336 });
7337 }
7338 // c:2342 — `return ncodes;`
7339 ncodes as i32
7340}
7341
7342/// Port of `IS_READFD(type)` macro from `Src/zsh.h` — determines
7343/// default fd (0 for read-ish, 1 for write-ish) when none specified.
7344fn is_readfd(t: i32) -> bool {
7345 matches!(
7346 t,
7347 x if x == REDIR_READ
7348 || x == REDIR_READWRITE
7349 || x == REDIR_MERGEIN
7350 || x == REDIR_HEREDOC
7351 || x == REDIR_HEREDOCDASH
7352 || x == REDIR_HERESTR
7353 )
7354}
7355
7356/// Parse a program (list of lists)
7357/// Parse a complete program (top-level entry). Calls
7358/// parse_program_until with no end-token sentinel. Direct port of
7359/// zsh/Src/parse.c:614-720 `parse_event` / `par_list` /
7360/// `par_event` flow. C distinguishes COND_EVENT (single command
7361/// for here-string) from full event parse; zshrs's parse_program
7362/// is the full-event entry.
7363fn parse_program() -> ZshProgram {
7364 parse_program_until(None)
7365}
7366
7367/// Parse a program until we hit an end token
7368/// Parse a program until one of `end_tokens` is seen (or EOF).
7369/// Drives par_list in a loop. C equivalent: the body of par_event
7370/// (parse.c:635-695) iterating par_list against the lexer.
7371fn parse_program_until(end_tokens: Option<&[lextok]>) -> ZshProgram {
7372 let mut lists = Vec::new();
7373
7374 loop {
7375 // Skip separators
7376 while tok() == SEPER || tok() == NEWLIN {
7377 zshlex();
7378 }
7379
7380 if tok() == ENDINPUT {
7381 break;
7382 }
7383 if tok() == LEXERR {
7384 // c:Src/parse.c:671-680 par_event — when the lexer
7385 // returned LEXERR (e.g. unbalanced `$((1+(2))` math
7386 // sub, unterminated string, etc.), C emits `yyerror(1)`
7387 // and sets errflag so the script aborts with a parse
7388 // error diagnostic + non-zero exit. zshrs's
7389 // parse_program_until previously just `break`'d on
7390 // LEXERR, silently swallowing the malformed input and
7391 // exiting rc=0 — so `$((1+(2))` ran as if it were
7392 // empty. Bug #529 in docs/BUGS.md. Emit yyerror
7393 // mirroring the C behaviour; the broken script then
7394 // surfaces the parse error to the caller.
7395 yyerror("");
7396 break;
7397 }
7398
7399 // Check for end tokens
7400 if let Some(end_toks) = end_tokens {
7401 if end_toks.contains(&tok()) {
7402 break;
7403 }
7404 }
7405
7406 // Also stop at these tokens when not explicitly looking for them
7407 // Note: Else/Elif/Then are NOT here - they're handled by par_if
7408 // to allow nested if statements inside case arms, loops, etc.
7409 //
7410 // c:Src/parse.c:par_event — when an orphan terminator (DONE
7411 // outside a loop, FI outside an if, ESAC outside a case)
7412 // appears at the top level (end_tokens=None), C errors via
7413 // YYERROR. zshrs's `break` silently accepted `done`/`fi`/
7414 // `esac` as no-op input. Error at the outermost call so
7415 // unscoped terminators don't sneak through; nested calls
7416 // still break cleanly via the end_tokens contains-check
7417 // above.
7418 match tok() {
7419 DONE | FI | ESAC | DOLOOP if end_tokens.is_none() => {
7420 // c:Src/parse.c:par_event — emit the specific token
7421 // name (`done`, `fi`, `esac`, `do`) so error-parsing
7422 // tools can identify the unmatched terminator. C zsh
7423 // writes `parse error near \`<tok>'`; the Rust port
7424 // was emitting a generic "orphan terminator" string.
7425 // Bug #142, #413.
7426 let name = match tok() {
7427 DONE => "done",
7428 FI => "fi",
7429 ESAC => "esac",
7430 DOLOOP => "do",
7431 _ => "orphan terminator",
7432 };
7433 zerr(&format!("parse error near `{}'", name));
7434 break;
7435 }
7436 DSEMI | SEMIAMP | SEMIBAR if end_tokens.is_none() => {
7437 // c:Src/parse.c:par_event — case-arm terminators
7438 // (`;;`, `;&`, `;|`) outside a case construct are a
7439 // parse error. zshrs's `break` silently accepted them
7440 // at top level, truncating the rest of the script.
7441 // Bug #141 in docs/BUGS.md.
7442 let name = match tok() {
7443 DSEMI => ";;",
7444 SEMIAMP => ";&",
7445 SEMIBAR => ";|",
7446 _ => "case terminator",
7447 };
7448 zerr(&format!("parse error near `{}'", name));
7449 break;
7450 }
7451 OUTBRACE_TOK if end_tokens.is_none() => {
7452 // c:Src/parse.c:par_event — orphan `}` (no matching
7453 // `{` opener) at top level is a parse error. zshrs's
7454 // generic break swallowed it silently, leaving the
7455 // `echo a` in `echo a }` running and ignoring the
7456 // stray brace. Bug #168 in docs/BUGS.md.
7457 zerr("parse error near `}'");
7458 break;
7459 }
7460 OUTBRACE_TOK | DSEMI | SEMIAMP | SEMIBAR | DONE | FI | ESAC | ZEND => break,
7461 _ => {}
7462 }
7463
7464 match par_list() {
7465 Some(list) => {
7466 let detected = simple_name_with_inoutpar(&list);
7467 lists.push(list);
7468 // Synthesize a FuncDef for the `name() { body }` shape
7469 // at parse time so body_source is captured while the
7470 // lexer still has the input. The lexer port emits
7471 // `name(` as a single Word ending in `<Inpar><Outpar>`,
7472 // so the Simple list is followed by an Inbrace once
7473 // separators are skipped. For `name() cmd args` the
7474 // body has already been swallowed into the same
7475 // Simple's words tail — synthesize directly from there.
7476 if let Some((names, body_argv)) = detected {
7477 if !body_argv.is_empty() {
7478 // One-line body already in the Simple. Build
7479 // a Simple from body_argv as the function body.
7480 lists.pop();
7481 let body_simple = ZshCommand::Simple(ZshSimple {
7482 assigns: Vec::new(),
7483 words: body_argv,
7484 redirs: Vec::new(),
7485 });
7486 let body_list = ZshList {
7487 sublist: ZshSublist {
7488 pipe: ZshPipe {
7489 cmd: body_simple,
7490 next: None,
7491 lineno: lineno(),
7492 merge_stderr: false,
7493 },
7494 next: None,
7495 flags: SublistFlags::default(),
7496 },
7497 flags: ListFlags::default(),
7498 };
7499 let funcdef = ZshCommand::FuncDef(ZshFuncDef {
7500 names,
7501 body: Box::new(ZshProgram {
7502 lists: vec![body_list],
7503 }),
7504 tracing: false,
7505 auto_call_args: None,
7506 body_source: None,
7507 });
7508 let synthetic = ZshList {
7509 sublist: ZshSublist {
7510 pipe: ZshPipe {
7511 cmd: funcdef,
7512 next: None,
7513 lineno: lineno(),
7514 merge_stderr: false,
7515 },
7516 next: None,
7517 flags: SublistFlags::default(),
7518 },
7519 flags: ListFlags::default(),
7520 };
7521 lists.push(synthetic);
7522 continue;
7523 }
7524 // Else: words.len() == 1 (only the trailing `name()`
7525 // word), brace body follows. `names` may carry
7526 // multiple identifiers from the `fna fnb fnc()`
7527 // shorthand — all share the same brace body per
7528 // src/zsh/Src/parse.c:1666 par_funcdef wordlist.
7529 // Skip separators on the real lexer; safe because
7530 // parse_program's next iteration would also skip them.
7531 while tok() == SEPER || tok() == NEWLIN {
7532 zshlex();
7533 }
7534 if tok() == INBRACE_TOK {
7535 // Capture body_start BEFORE the lexer
7536 // advances past the first body token. The
7537 // outer zshlex() consumed `{`; lexer.pos
7538 // is now right after `{`. The next
7539 // `zshlex()` would advance past `echo`,
7540 // making body_start land mid-body and
7541 // lose the first word — `typeset -f f`
7542 // printed `a; echo b` instead of
7543 // `echo a; echo b` for `f() { echo a;
7544 // echo b }`.
7545 let body_start = pos();
7546 zshlex();
7547 // c:Src/parse.c — synth funcdef body terminates
7548 // at OUTBRACE_TOK. Explicit end-token avoids
7549 // the top-level stray-`}` arm. Bug #167/#168.
7550 let body = parse_program_until(Some(&[OUTBRACE_TOK]));
7551 let body_end = if tok() == OUTBRACE_TOK {
7552 pos().saturating_sub(1)
7553 } else {
7554 pos()
7555 };
7556 let body_source = input_slice(body_start, body_end)
7557 .map(|s| s.trim().to_string())
7558 .filter(|s| !s.is_empty());
7559 if tok() == OUTBRACE_TOK {
7560 zshlex();
7561 }
7562 // Replace the Simple list with a FuncDef list.
7563 lists.pop();
7564 let funcdef = ZshCommand::FuncDef(ZshFuncDef {
7565 names,
7566 body: Box::new(body),
7567 tracing: false,
7568 auto_call_args: None,
7569 body_source,
7570 });
7571 let synthetic = ZshList {
7572 sublist: ZshSublist {
7573 pipe: ZshPipe {
7574 cmd: funcdef,
7575 next: None,
7576 lineno: lineno(),
7577 merge_stderr: false,
7578 },
7579 next: None,
7580 flags: SublistFlags::default(),
7581 },
7582 flags: ListFlags::default(),
7583 };
7584 lists.push(synthetic);
7585 } else if !matches!(tok(), ENDINPUT | OUTBRACE_TOK | SEPER | NEWLIN) {
7586 // No-brace one-line body: `foo() echo hello`.
7587 // Parse a single command for the body.
7588 let body_cmd = par_cmd();
7589 if let Some(cmd) = body_cmd {
7590 let body_list = ZshList {
7591 sublist: ZshSublist {
7592 pipe: ZshPipe {
7593 cmd,
7594 next: None,
7595 lineno: lineno(),
7596 merge_stderr: false,
7597 },
7598 next: None,
7599 flags: SublistFlags::default(),
7600 },
7601 flags: ListFlags::default(),
7602 };
7603 lists.pop();
7604 let funcdef = ZshCommand::FuncDef(ZshFuncDef {
7605 names: names.clone(),
7606 body: Box::new(ZshProgram {
7607 lists: vec![body_list],
7608 }),
7609 tracing: false,
7610 auto_call_args: None,
7611 body_source: None,
7612 });
7613 let synthetic = ZshList {
7614 sublist: ZshSublist {
7615 pipe: ZshPipe {
7616 cmd: funcdef,
7617 next: None,
7618 lineno: lineno(),
7619 merge_stderr: false,
7620 },
7621 next: None,
7622 flags: SublistFlags::default(),
7623 },
7624 flags: ListFlags::default(),
7625 };
7626 lists.push(synthetic);
7627 }
7628 }
7629 }
7630 }
7631 None => break,
7632 }
7633 }
7634
7635 ZshProgram { lists }
7636}
7637
7638/// Parse an assignment
7639/// Parse an assignment word `NAME=value` or `NAME=(arr items)`.
7640/// Sub-routine of par_simple. The C source handles assignments
7641/// inline in par_simple via the ENVSTRING/ENVARRAY token paths
7642/// (parse.c:1842-2000ish); zshrs splits it out to a dedicated
7643/// helper for clarity.
7644fn parse_assign() -> Option<ZshAssign> {
7645 // Helper: locate the Equals-marker that delimits NAME from
7646 // VALUE in an assignment-shaped tokstr. The lexer META-encodes
7647 // EVERY `=` (including those inside `${var%%=foo}` strip
7648 // patterns or `[idx]=...` subscripts), so a naive
7649 // `tokstr.find(Equals)` would split at the first inner `=`
7650 // and break the whole assignment. Walk the string skipping
7651 // brace and bracket depth so the assignment's `=` (the one
7652 // after the last `]` of the LHS subscript / or after the
7653 // bare name) is the one we land on.
7654 fn find_assign_equals(s: &str) -> Option<usize> {
7655 let target = Equals;
7656 let mut brace = 0i32;
7657 let mut bracket = 0i32;
7658 let mut paren = 0i32;
7659 for (i, c) in s.char_indices() {
7660 match c {
7661 '{' | '\u{8f}' /* Inbrace */ => brace += 1,
7662 '}' | '\u{90}' /* Outbrace */ => {
7663 if brace > 0 {
7664 brace -= 1;
7665 }
7666 }
7667 '[' | '\u{91}' /* Inbrack */ => bracket += 1,
7668 ']' | '\u{92}' /* Outbrack */ => {
7669 if bracket > 0 {
7670 bracket -= 1;
7671 }
7672 }
7673 '(' | '\u{88}' /* Inpar */ => paren += 1,
7674 ')' | '\u{8a}' /* Outpar */ => {
7675 if paren > 0 {
7676 paren -= 1;
7677 }
7678 }
7679 _ if c == target && brace == 0 && bracket == 0 && paren == 0 => {
7680 return Some(i);
7681 }
7682 _ => {}
7683 }
7684 }
7685 None
7686 }
7687
7688 let _ts_tokstr = tokstr()?;
7689 let tokstr = _ts_tokstr.as_str();
7690
7691 // Parse name=value or name+=value.
7692 let (name, value_str, append) = if tok() == ENVARRAY {
7693 let (name, append) = if let Some(stripped) = tokstr.strip_suffix('+') {
7694 (stripped, true)
7695 } else {
7696 (tokstr, false)
7697 };
7698 (name.to_string(), String::new(), append)
7699 } else if let Some(pos) = find_assign_equals(tokstr) {
7700 let name_part = &tokstr[..pos];
7701 let (name, append) = if let Some(stripped) = name_part.strip_suffix('+') {
7702 (stripped, true)
7703 } else {
7704 (name_part, false)
7705 };
7706 (
7707 name.to_string(),
7708 tokstr[pos + Equals.len_utf8()..].to_string(),
7709 append,
7710 )
7711 } else if let Some(pos) = tokstr.find('=') {
7712 // Fallback to literal '=' for compatibility
7713 let name_part = &tokstr[..pos];
7714 let (name, append) = if let Some(stripped) = name_part.strip_suffix('+') {
7715 (stripped, true)
7716 } else {
7717 (name_part, false)
7718 };
7719 (name.to_string(), tokstr[pos + 1..].to_string(), append)
7720 } else {
7721 return None;
7722 };
7723
7724 let value = if tok() == ENVARRAY {
7725 // Array assignment: name=(...)
7726 // c:Src/parse.c:1895 par_simple ENVARRAY arm:
7727 // `int oldcmdpos = incmdpos; ... incmdpos = 0; ... zshlex();`
7728 // Reset incmdpos to false BEFORE the array body's first lex so
7729 // a leading `{...}` (brace expansion) doesn't trip the
7730 // empty-buf+incmdpos rule at lex.c:1141 that returns `{` as
7731 // STRING and lets the reswd_lookup promote it to INBRACE_TOK.
7732 let oldcmdpos = crate::ported::lex::incmdpos();
7733 crate::ported::lex::set_incmdpos(false);
7734 let mut elements = Vec::new();
7735 zshlex(); // skip past token
7736
7737 let mut arr_iters = 0;
7738 const MAX_ARRAY_ELEMENTS: usize = 10_000;
7739 while matches!(tok(), STRING_LEX | SEPER | NEWLIN) {
7740 arr_iters += 1;
7741 if arr_iters > MAX_ARRAY_ELEMENTS {
7742 zerr("array assignment exceeded maximum elements");
7743 break;
7744 }
7745 if tok() == STRING_LEX {
7746 let _ts_s = crate::ported::lex::tokstr();
7747 if let Some(s) = _ts_s.as_deref() {
7748 elements.push(s.to_string());
7749 }
7750 }
7751 zshlex();
7752 }
7753 // c:Src/parse.c — `incmdpos = oldcmdpos;` (restore at end of arm)
7754 crate::ported::lex::set_incmdpos(oldcmdpos);
7755
7756 // The closing Outpar is consumed here. The outer par_simple
7757 // loop will then `zshlex()` past whatever follows (typically
7758 // a separator or the next word) — calling zshlex twice in
7759 // tandem (here AND in par_simple) over-advances and merges
7760 // a following `name() { … }` funcdef into the same Simple.
7761 // We only consume Outpar; let the caller handle the rest.
7762 // Without this guard `g=(o1); f() { :; }` parsed as one
7763 // Simple with assigns=[g] and words=["f()"] (one token).
7764 if tok() == OUTPAR_TOK {
7765 // Note: do NOT zshlex() here. par_simple's `lexer
7766 // .zshlex()` after `parse_assign` returns advances past
7767 // the Outpar onto the next significant token.
7768 //
7769 // Force `incmdpos=true` so the next zshlex() recognizes
7770 // a follow-up `b=(...)` / `b=val` as Envarray/Envstring.
7771 // The lexer flips incmdpos to false on bare Outpar (which
7772 // is correct for subshell-close context), but for an
7773 // array-assignment close more assigns/words may follow.
7774 set_incmdpos(true);
7775 }
7776
7777 ZshAssignValue::Array(elements)
7778 } else {
7779 ZshAssignValue::Scalar(value_str)
7780 };
7781
7782 Some(ZshAssign {
7783 name,
7784 value,
7785 append,
7786 })
7787}
7788
7789/// AST `par_redir` variant accepting an idstring for the
7790/// `{var}>file` brace-FD shape. C signature
7791/// `par_redir(int *rp, char *idstring)` (parse.c:2229). The
7792/// idstring is stored in the resulting ZshRedir.varid for the
7793/// executor to bind the named variable to the chosen fd.
7794fn par_redir_with_id(idstring: Option<&str>) -> Option<ZshRedir> {
7795 let varid: Option<String> = idstring.map(|s| s.to_string());
7796 let rtype = match tok() {
7797 OUTANG_TOK => REDIR_WRITE,
7798 OUTANGBANG => REDIR_WRITENOW,
7799 DOUTANG => REDIR_APP,
7800 DOUTANGBANG => REDIR_APPNOW,
7801 INANG_TOK => REDIR_READ,
7802 INOUTANG => REDIR_READWRITE,
7803 DINANG => REDIR_HEREDOC,
7804 DINANGDASH => REDIR_HEREDOCDASH,
7805 TRINANG => REDIR_HERESTR,
7806 INANGAMP => REDIR_MERGEIN,
7807 OUTANGAMP => REDIR_MERGEOUT,
7808 AMPOUTANG => REDIR_ERRWRITE,
7809 OUTANGAMPBANG => REDIR_ERRWRITENOW,
7810 DOUTANGAMP => REDIR_ERRAPP,
7811 DOUTANGAMPBANG => REDIR_ERRAPPNOW,
7812 _ => return None,
7813 };
7814
7815 let fd = if tokfd() >= 0 {
7816 tokfd()
7817 } else if matches!(
7818 rtype,
7819 REDIR_READ
7820 | REDIR_READWRITE
7821 | REDIR_MERGEIN
7822 | REDIR_HEREDOC
7823 | REDIR_HEREDOCDASH
7824 | REDIR_HERESTR
7825 ) {
7826 0
7827 } else {
7828 1
7829 };
7830
7831 // c:2234-2245 — save/restore incmdpos and nocorrect around the
7832 // zshlex that consumes the redir target word:
7833 // oldcmdpos = incmdpos; incmdpos = 0;
7834 // oldnc = nocorrect;
7835 // if (tok != INANG && tok != INOUTANG) nocorrect = 1;
7836 // ... zshlex; check tok; ...
7837 // incmdpos = oldcmdpos; nocorrect = oldnc;
7838 // Without this, a redir target lexes in the parent's incmdpos
7839 // (re-promoting `{` / reswords) AND with parent nocorrect (so
7840 // spelling-correction wrongly runs inside `> $(cmd)` etc.).
7841 let oldcmdpos = incmdpos();
7842 set_incmdpos(false);
7843 let oldnc = nocorrect();
7844 let cur = tok();
7845 if cur != INANG_TOK && cur != INOUTANG {
7846 set_nocorrect(1);
7847 }
7848 zshlex();
7849
7850 let name = match tok() {
7851 STRING_LEX | ENVSTRING => {
7852 let n = tokstr().unwrap_or_default();
7853 // c:2244-2245 — restore incmdpos / nocorrect right after
7854 // the redir target word is confirmed, BEFORE the trailing
7855 // zshlex advances past it. The advance itself is deferred
7856 // below so REDIR_HEREDOC[DASH] can push onto HDOCS first
7857 // (matching the wordcode variant at parse.rs:6894-6908) —
7858 // otherwise the NEWLIN drained by that zshlex sees an
7859 // empty HDOCS list and gethere never collects the body.
7860 set_incmdpos(oldcmdpos);
7861 set_nocorrect(oldnc);
7862 n
7863 }
7864 _ => {
7865 set_incmdpos(oldcmdpos);
7866 set_nocorrect(oldnc);
7867 zerr("expected word after redirection");
7868 return None;
7869 }
7870 };
7871
7872 // Heredoc terminator capture. C parse.c:2254-2317 par_redir builds
7873 // a `struct heredocs` entry here for REDIR_HEREDOC[DASH]. zshrs
7874 // pushes onto HDOCS (canonical C linked list, c:2290-2296) AND
7875 // onto LEX_HEREDOCS (Rust-only AST-glue Vec carrying parsed-out
7876 // terminator/strip_tabs/quoted metadata for downstream AST
7877 // consumers). Quoted terminators (`<<'EOF'` / `<<"EOF"` / `<<\EOF`)
7878 // disable expansion in the body — Snull `\u{9d}` marks single-quote,
7879 // Dnull `\u{9e}` marks double-quote, Bnull `\u{9f}` marks
7880 // backslash-escaped chars.
7881 let heredoc_idx = if matches!(rtype, REDIR_HEREDOC | REDIR_HEREDOCDASH) {
7882 let strip_tabs = rtype == REDIR_HEREDOCDASH;
7883 let quoted = name.contains('\u{9d}')
7884 || name.contains('\u{9e}')
7885 || name.contains('\u{9f}')
7886 || name.starts_with('\'')
7887 || name.starts_with('"');
7888 let term = name
7889 .chars()
7890 .filter(|c| {
7891 *c != '\'' && *c != '"' && *c != '\u{9d}' && *c != '\u{9e}' && *c != '\u{9f}'
7892 })
7893 .collect::<String>();
7894 // c:2290-2296 — `for (hd = &hdocs; *hd; hd = &(*hd)->next);
7895 // *hd = zalloc(sizeof(struct heredocs));
7896 // (*hd)->next = NULL;
7897 // (*hd)->type = htype;
7898 // (*hd)->pc = r;
7899 // (*hd)->str = tokstr;`
7900 // AST path has no wordcode pc to patch; use -1 sentinel so the
7901 // inline NEWLIN walk in `zshlex()` skips the setheredoc call.
7902 HDOCS.with_borrow_mut(|head| {
7903 let mut cur = head;
7904 while cur.is_some() {
7905 cur = &mut cur.as_mut().unwrap().next; // c:2290
7906 }
7907 *cur = Some(Box::new(crate::ported::zsh_h::heredocs {
7908 // c:2292-2296
7909 next: None,
7910 typ: rtype,
7911 pc: -1,
7912 str: Some(name.clone()),
7913 }));
7914 });
7915 // zshrs-only: push parallel AST-glue entry onto LEX_HEREDOCS.
7916 let idx = LEX_HEREDOCS.with_borrow_mut(|v| {
7917 v.push(HereDoc {
7918 terminator: term,
7919 strip_tabs,
7920 content: String::new(),
7921 quoted,
7922 processed: false,
7923 });
7924 v.len() - 1
7925 });
7926 Some(idx)
7927 } else {
7928 None
7929 };
7930
7931 // c:2298 (heredoc) / c:2322 (other redirs) — final zshlex() advance
7932 // past the redir target word. MUST run after the HDOCS push above
7933 // so the heredoc-drain inside this zshlex sees the new entry. For
7934 // non-heredoc forms the order is irrelevant; consolidating to a
7935 // single tail-call here matches the wordcode variant.
7936 zshlex();
7937
7938 Some(ZshRedir {
7939 rtype,
7940 fd,
7941 name,
7942 heredoc: None,
7943 varid,
7944 heredoc_idx,
7945 })
7946}
7947
7948/// Parse C-style for loop: for (( init; cond; step ))
7949/// Parse the c-style `for ((init; cond; incr)) do BODY done`.
7950/// Inner branch of zsh/Src/parse.c:1100-1140 inside par_for.
7951/// Recognized when the token after FOR is DINPAR (the `((`
7952/// detected by gettok via dbparens setup).
7953fn parse_for_cstyle() -> Option<ZshCommand> {
7954 // We're at (( (Dinpar None) - the opening ((
7955 // Lexer returns:
7956 // Dinpar None - opening ((
7957 // Dinpar "init" - init expression, semicolon consumed
7958 // Dinpar "cond" - cond expression, semicolon consumed
7959 // Doutpar "step" - step expression, closing )) consumed
7960 zshlex(); // Get init: Dinpar "i=0"
7961
7962 if tok() != DINPAR {
7963 zerr("expected init expression in for ((");
7964 return None;
7965 }
7966 let init = tokstr().unwrap_or_default();
7967
7968 zshlex(); // Get cond: Dinpar "i<10"
7969
7970 if tok() != DINPAR {
7971 zerr("expected condition in for ((");
7972 return None;
7973 }
7974 let cond = tokstr().unwrap_or_default();
7975
7976 zshlex(); // Get step: Doutpar "i++"
7977
7978 if tok() != DOUTPAR {
7979 zerr("expected )) in for");
7980 return None;
7981 }
7982 let step = tokstr().unwrap_or_default();
7983
7984 // c:1110 — `infor = 0;` before the body opener. The companion
7985 // `incmdpos = 1;` at c:1111 is intentionally skipped here for
7986 // the same reason c:1094's `incmdpos = 0;` is skipped in
7987 // par_for above — zshrs doesn't mirror the full
7988 // incmdpos state-machine inline.
7989 set_infor(0); // c:1110
7990 zshlex(); // Move past ))
7991
7992 skip_separators();
7993 let body = parse_loop_body(false, false)?;
7994
7995 Some(ZshCommand::For(ZshFor {
7996 var: String::new(),
7997 list: ForList::CStyle { init, cond, step },
7998 body: Box::new(body),
7999 is_select: false,
8000 }))
8001}
8002
8003/// Parse select loop (same syntax as for)
8004/// Parse `select NAME in WORDS; do BODY; done`. Same shape as
8005/// `for NAME in WORDS; do ...` but with menu-prompt semantics in
8006/// the executor. C equivalent: the SELECT case in par_for at
8007/// parse.c:1087-1207 (selects share parser flow with foreach).
8008fn parse_select() -> Option<ZshCommand> {
8009 // `select` shares par_for's grammar (var, words, body) but the
8010 // compile path is different (interactive prompt loop).
8011 match par_for()? {
8012 ZshCommand::For(mut f) => {
8013 f.is_select = true;
8014 Some(ZshCommand::For(f))
8015 }
8016 other => Some(other),
8017 }
8018}
8019
8020/// Parse loop body (do...done, {...}, or shortloop)
8021/// Parse the `do BODY done` body of a for/while/until/select/
8022/// repeat loop. Direct equivalent of zsh's parse.c handling
8023/// inside the loop builders — they all consume DOLOOP, parse a
8024/// list until DONE, and return the list. The `foreach_style`
8025/// flag signals foreach (where short-form `for NAME in WORDS;
8026/// CMD` may skip do/done) vs c-style (which always requires
8027/// do/done).
8028///
8029/// `is_repeat` widens the SHORTLOOPS gate so `SHORTREPEAT` also
8030/// unlocks the short form for `repeat N CMD` (per c:1600
8031/// `unset(SHORTLOOPS) && unset(SHORTREPEAT)`).
8032fn parse_loop_body(foreach_style: bool, is_repeat: bool) -> Option<ZshProgram> {
8033 // c:1180-1194 — body dispatch order per par_for:
8034 // `do ... done` (DOLOOP) — primary form.
8035 // `{ ... }` (INBRACE) — alternate.
8036 // csh/CSHJUNKIELOOPS — terminator is `end`.
8037 // else if (unset(SHORTLOOPS)) — YYERROR.
8038 // else — short form (single command).
8039 if tok() == DOLOOP {
8040 zshlex();
8041 // Body parse must declare DONE as an end-token so the
8042 // parse_program_until top-level orphan-DONE guard doesn't
8043 // mis-fire on the legitimate loop terminator.
8044 let body = parse_program_until(Some(&[DONE]));
8045 // c:Src/parse.c:1182-1183 / :1535-1536 / :1597-1598 —
8046 // `if (tok != DONE) YYERRORV(oecused);`. zshrs previously
8047 // silently accepted EOF as a substitute for `done`, so
8048 // `for i in a; do echo hi; don` ran the loop with `don` as
8049 // a command (which then failed "command not found") instead
8050 // of erroring at parse time. Bug #403, #404.
8051 if tok() != DONE {
8052 zerr("parse error: expected `done'");
8053 return None;
8054 }
8055 zshlex();
8056 Some(body)
8057 } else if tok() == INBRACE_TOK {
8058 zshlex();
8059 let body = parse_program_until(Some(&[OUTBRACE_TOK]));
8060 // c:Src/parse.c:1186 / :1539 — `if (tok != OUTBRACE) YYERRORV`.
8061 if tok() != OUTBRACE_TOK {
8062 zerr("parse error: expected `}'");
8063 return None;
8064 }
8065 zshlex();
8066 Some(body)
8067 } else if foreach_style || isset(CSHJUNKIELOOPS) {
8068 // c:1184 / 1546 / 1595 — `else if (csh || isset(CSHJUNKIELOOPS))`.
8069 let body = parse_program_until(Some(&[ZEND]));
8070 // c:1190 / 1548 — `if (tok != ZEND) YYERRORV`.
8071 if tok() != ZEND {
8072 zerr("parse error: expected `end'");
8073 return None;
8074 }
8075 zshlex();
8076 Some(body)
8077 } else {
8078 // c:1190 / 1474 / 1551 / 1600 — short-form gate. C bails
8079 // with YYERROR when `unset(SHORTLOOPS) && (!is_repeat ||
8080 // unset(SHORTREPEAT))`. zshrs's option machinery isn't
8081 // initialised at parse-test time (no `init_main` →
8082 // `install_emulation_defaults`), so a strict port here
8083 // body. parse_init seeds SHORTLOOPS=on mirroring C
8084 // `install_emulation_defaults`, so this fires only when a
8085 // script explicitly disabled the option.
8086 if unset(SHORTLOOPS) && (!is_repeat || unset(SHORTREPEAT)) {
8087 zerr("parse error: short loop form requires SHORTLOOPS option");
8088 return None;
8089 }
8090 // c:Src/parse.c:1604 / :1474 / :1551 — short form calls
8091 // par_save_list1 → par_list1 → par_sublist, which parses
8092 // ONE sublist and leaves the trailing SEPER untouched for
8093 // the outer par_list to consume. zshrs previously routed
8094 // through par_list() which consumes the trailing `;`/`\n`
8095 // separator — that swallowed the separator between the
8096 // loop's body command and the next outer command, so
8097 // `repeat 2 print x; print y` parsed as repeat-then-eof
8098 // and par_cmd's post-compound STRING_LEX guard at parse.rs
8099 // line 1170 fired "parse error near `print'". Bug #593.
8100 par_list1().map(|sublist| ZshProgram {
8101 lists: vec![ZshList {
8102 sublist,
8103 flags: ListFlags::default(),
8104 }],
8105 })
8106 }
8107}
8108
8109/// `() { body } arg1 arg2 …` — anonymous function. Defines a fresh
8110/// function named `_zshrs_anon_N`, invokes it with the args, and the
8111/// body runs with positional params set. Implemented as the desugared
8112/// pair (FuncDef + Simple call) so the compile path doesn't need new
8113/// machinery.
8114/// Parse an anonymous function definition `() { BODY }` followed
8115/// by call args. zsh treats `() { echo hi; } a b c` as defining
8116/// and immediately calling an anon fn with args a/b/c. C
8117/// equivalent: the INOUTPAR shape in par_simple at parse.c:1836+
8118/// triggers an anon-funcdef path.
8119fn parse_anon_funcdef() -> Option<ZshCommand> {
8120 zshlex(); // skip ()
8121 skip_separators();
8122 // No `{` after `()` → bare empty subshell shape `()`. Fall back
8123 // to a Subsh with an empty program so the status is 0 (matches
8124 // zsh's `()` no-op behavior).
8125 if tok() != INBRACE_TOK {
8126 return Some(ZshCommand::Subsh(Box::new(ZshProgram {
8127 lists: Vec::new(),
8128 })));
8129 }
8130 zshlex(); // skip {
8131 // c:Src/parse.c:par_subsh — anon `() { … }` body must terminate at
8132 // OUTBRACE_TOK. Pass it as the explicit end-token so the inner
8133 // parse stops cleanly at `}` rather than hitting the top-level
8134 // stray-`}` arm (#168). Bug #167 family.
8135 let body = parse_program_until(Some(&[OUTBRACE_TOK]));
8136 // c:Src/parse.c:1733-1737 — same `if (tok != OUTBRACE) YYERRORV`
8137 // gate as the named-funcdef path. Bug #405 sibling.
8138 if tok() != OUTBRACE_TOK {
8139 zerr("parse error: expected `}'");
8140 return None;
8141 }
8142 zshlex();
8143 // Collect any trailing args until a separator. zsh's anon-fn form
8144 // `() { body } a b c` runs body with $1=a, $2=b, $3=c.
8145 let mut args = Vec::new();
8146 while tok() == STRING_LEX {
8147 if let Some(s) = tokstr() {
8148 args.push(s);
8149 }
8150 zshlex();
8151 }
8152
8153 // Generate a unique name. Module-level static would be cleaner but
8154 // a thread-local atomic is enough — anonymous functions are
8155 // ephemeral and the name isn't user-visible.
8156 static ANON_COUNTER: AtomicUsize = AtomicUsize::new(0);
8157 let n = ANON_COUNTER.fetch_add(1, Ordering::Relaxed);
8158 let name = format!("_zshrs_anon_{}", n);
8159 Some(ZshCommand::FuncDef(ZshFuncDef {
8160 names: vec![name],
8161 body: Box::new(body),
8162 tracing: false,
8163 auto_call_args: Some(args),
8164 body_source: None,
8165 }))
8166}
8167
8168/// Parse {...} cursh
8169/// Parse a current-shell brace block `{ BODY }`. C source
8170/// par_cmd at parse.c:958-1085 handles Inbrace → emit WC_CURSH
8171/// and recurses into the list. zshrs's parse_cursh extracts that
8172/// arm into a dedicated method.
8173fn parse_cursh() -> Option<ZshCommand> {
8174 zshlex(); // skip {
8175 // c:Src/parse.c:par_subsh — pass OUTBRACE_TOK as the explicit
8176 // body terminator so the inner parse stops cleanly at `}` rather
8177 // than falling through the top-level `OUTBRACE_TOK if
8178 // end_tokens.is_none()` arm (which errors on stray `}` per bug
8179 // #168). Bug #167 in docs/BUGS.md.
8180 let prog = parse_program_until(Some(&[OUTBRACE_TOK]));
8181
8182 // c:Src/parse.c:par_subsh — `{ … }` requires a matching `}`.
8183 // C errors via YYERRORV when the body parse returns without
8184 // seeing OUTBRACE_TOK (parse.c:1623 inbrack check). zshrs's
8185 // previous behavior silently returned `Cursh(prog)` and ran the
8186 // body as if the braces were absent. Bug #167 in docs/BUGS.md.
8187 if tok() != OUTBRACE_TOK {
8188 // Reuse the "parse error near `<tok>'" shape from #142/#161.
8189 // The offending token is whatever follows the unclosed brace
8190 // body. For EOF (`{ echo a` at end of input) C zsh errors
8191 // near the LAST consumed body token; we use the current
8192 // tokstr() or fall back to a "}" hint.
8193 let near = tokstr().unwrap_or_else(|| "}".to_string());
8194 zerr(&format!("parse error near `{}'", near));
8195 return None;
8196 }
8197 // Check for { ... } always { ... }. Direct port of zsh's
8198 // par_subsh at parse.c:1612-1660 — note the two `incmdpos = 1`
8199 // forces (parse.c:1632, 1637): after consuming the closing
8200 // Outbrace AND after matching the `always` keyword, the parser
8201 // explicitly resets command position so the next `{` lexes as
8202 // Inbrace. Without these resets the lexer's String-clears-cmdpos
8203 // rule (lex.rs:976-983) leaves the second `{` in word position,
8204 // turning `always { ... }` into a Simple `{` `echo` … and the
8205 // try/always pairing is silently lost.
8206 {
8207 set_incmdpos(true); // parse.c:1632 incmdpos = !zsh_construct
8208 zshlex();
8209
8210 // Check for 'always'
8211 if tok() == STRING_LEX {
8212 let s = tokstr();
8213 if s.map(|s| s == "always").unwrap_or(false) {
8214 set_incmdpos(true); // parse.c:1637 incmdpos = 1
8215 zshlex();
8216 skip_separators();
8217
8218 if tok() == INBRACE_TOK {
8219 zshlex();
8220 // c:Src/parse.c — always-clause body terminates at
8221 // OUTBRACE_TOK. Bug #167/#168 family.
8222 let always = parse_program_until(Some(&[OUTBRACE_TOK]));
8223 if tok() == OUTBRACE_TOK {
8224 zshlex();
8225 }
8226 return Some(ZshCommand::Try(ZshTry {
8227 try_block: Box::new(prog),
8228 always: Box::new(always),
8229 }));
8230 }
8231 }
8232 }
8233 }
8234
8235 Some(ZshCommand::Cursh(Box::new(prog)))
8236}
8237
8238/// Parse inline function definition: name() { ... }
8239/// Parse the inline form `NAME () { BODY }` (POSIX-style funcdef
8240/// without the `function` keyword). The name has already been
8241/// consumed and pushed by par_simple before this method fires.
8242/// C source: handled inline in par_simple's INOUTPAR-after-name
8243/// arm (parse.c:1836-2228).
8244fn parse_inline_funcdef(name: String) -> Option<ZshCommand> {
8245 // par_simple's STRING loop left `incmdpos = 0`; the funcdef body
8246 // `{ ... }` requires `incmdpos = 1` so the lexer recognises `{`
8247 // as INBRACE_TOK (current-shell block opener) instead of a
8248 // literal `{` STRING. Without this, `myfunc() { echo body }`
8249 // parsed the body as the single STRING `"{"`, then `echo body`
8250 // fell out at top level. Mirrors the C path where par_cmd's
8251 // dispatcher (parse.c:958) is called with `incmdpos = 1` for
8252 // the funcdef body.
8253 set_incmdpos(true);
8254 // Skip ()
8255 if tok() == INOUTPAR {
8256 zshlex();
8257 }
8258
8259 skip_separators();
8260
8261 // Parse body
8262 if tok() == INBRACE_TOK {
8263 // Same body_start-before-zshlex fix as par_funcdef.
8264 let body_start = pos();
8265 zshlex();
8266 // c:Src/parse.c — inline funcdef body terminates at OUTBRACE_TOK.
8267 // Explicit end-token keeps the inner parse from hitting the
8268 // top-level stray-`}` arm (#168). Bug #167 family.
8269 let body = parse_program_until(Some(&[OUTBRACE_TOK]));
8270 // c:Src/parse.c:1733-1737 — `if (tok != OUTBRACE) { cmdpop();
8271 // lineno += oldlineno; ecnpats = onp; ecssub = oecssub;
8272 // YYERRORV(oecused); }`. Without this gate, `f() { echo hi`
8273 // silently registered as a complete fn with body `echo hi`.
8274 // Bug #405.
8275 if tok() != OUTBRACE_TOK {
8276 zerr("parse error: expected `}'");
8277 return None;
8278 }
8279 let body_end = pos().saturating_sub(1);
8280 let body_source = input_slice(body_start, body_end)
8281 .map(|s| {
8282 // Lexer's pos() may have advanced past `}` AND skipped
8283 // trailing whitespace/newlines before returning the
8284 // OUTBRACE_TOK to us, so the slice up to `pos - 1`
8285 // includes the `}` and any preceding whitespace.
8286 // Strip the trailing `}` and any preceding structural
8287 // separator (`;`, `\n`) — C zsh's getpermtext walks
8288 // the wordcode list and emits each command WITHOUT
8289 // the trailing `;`/`\n` that lives in the input.
8290 let t = s.trim();
8291 let t = t.strip_suffix('}').unwrap_or(t).trim_end();
8292 let t = t
8293 .trim_end_matches(|c: char| c == ';' || c == '\n')
8294 .trim_end();
8295 t.to_string()
8296 })
8297 .filter(|s| !s.is_empty());
8298 zshlex();
8299 Some(ZshCommand::FuncDef(ZshFuncDef {
8300 names: vec![name],
8301 body: Box::new(body),
8302 tracing: false,
8303 auto_call_args: None,
8304 body_source,
8305 }))
8306 } else if unset(SHORTLOOPS) {
8307 // c:1742 — `else if (unset(SHORTLOOPS)) YYERRORV(oecused);` —
8308 // funcdef short body (`name() cmd` without `{...}`) only
8309 // accepted when SHORTLOOPS is set. parse_init seeds
8310 // SHORTLOOPS=on so this fires only when a script
8311 // explicitly disabled the option.
8312 zerr("parse error: short function body form requires SHORTLOOPS option");
8313 None
8314 } else {
8315 match par_cmd() {
8316 Some(cmd) => {
8317 let list = ZshList {
8318 sublist: ZshSublist {
8319 pipe: ZshPipe {
8320 cmd,
8321 next: None,
8322 lineno: lineno(),
8323 merge_stderr: false,
8324 },
8325 next: None,
8326 flags: SublistFlags::default(),
8327 },
8328 flags: ListFlags::default(),
8329 };
8330 Some(ZshCommand::FuncDef(ZshFuncDef {
8331 names: vec![name],
8332 body: Box::new(ZshProgram { lists: vec![list] }),
8333 tracing: false,
8334 auto_call_args: None,
8335 body_source: None,
8336 }))
8337 }
8338 None => None,
8339 }
8340 }
8341}
8342
8343/// Parse conditional expression
8344/// Top of `[[ ]]` cond-expression parsing — entry to recursive
8345/// descent (or → and → not → primary). Direct port of zsh's
8346/// par_cond_1 at parse.c:2434-2475.
8347fn parse_cond_expr() -> Option<ZshCond> {
8348 parse_cond_or()
8349}
8350
8351/// Cond-expression `||` level. C: inside par_cond_1 at
8352/// parse.c:2434-2475 (the `cond_or` ladder).
8353fn parse_cond_or() -> Option<ZshCond> {
8354 let left = parse_cond_and()?;
8355 skip_cond_separators();
8356
8357 if tok() == DBAR {
8358 zshlex();
8359 skip_cond_separators();
8360 parse_cond_or().map(|right| ZshCond::Or(Box::new(left), Box::new(right)))
8361 } else {
8362 Some(left)
8363 }
8364}
8365
8366/// Cond-expression `&&` level. C: par_cond_2 at parse.c:2476-2625.
8367fn parse_cond_and() -> Option<ZshCond> {
8368 let left = parse_cond_not()?;
8369 skip_cond_separators();
8370
8371 if tok() == DAMPER {
8372 zshlex();
8373 skip_cond_separators();
8374 parse_cond_and().map(|right| ZshCond::And(Box::new(left), Box::new(right)))
8375 } else {
8376 Some(left)
8377 }
8378}
8379
8380/// `static FuncDump dumps;` from `Src/parse.c:3652` — head of the
8381/// loaded-`.zwc` linked list. C walks `dumps`/`p->next` directly;
8382/// the Rust port uses a `Mutex<Vec<funcdump>>` indexed by filename
8383/// so refcount ops can find an entry without raw-pointer compare.
8384pub static DUMPS: std::sync::Mutex<Vec<funcdump>> = std::sync::Mutex::new(Vec::new());
8385
8386/// Cond-expression `!` negation level. C: handled inside
8387/// par_cond_2 at parse.c:2476-2625 via the Bang token check.
8388fn parse_cond_not() -> Option<ZshCond> {
8389 skip_cond_separators();
8390
8391 // ! can be either BANG_TOK or String "!"
8392 let is_not =
8393 tok() == BANG_TOK || (tok() == STRING_LEX && tokstr().map(|s| s == "!").unwrap_or(false));
8394 if is_not {
8395 zshlex();
8396 let inner = parse_cond_not()?;
8397 return Some(ZshCond::Not(Box::new(inner)));
8398 }
8399
8400 if tok() == INPAR_TOK {
8401 zshlex();
8402 skip_cond_separators();
8403 // c:Src/parse.c:2534-2547 par_cond_2 INPAR branch — empty
8404 // body `[[ ( ) ]]` makes the inner par_cond's recursive
8405 // par_cond_2 see OUTPAR with no leading STRING/BANG/INPAR
8406 // and YYERROR immediately. Mirror that here: if the very
8407 // next token after `(` (post separator skip) is `)`, emit
8408 // a parse error so the script aborts cleanly instead of
8409 // silently swallowing every following command. Bug #538.
8410 if tok() == OUTPAR_TOK {
8411 yyerror("condition expected");
8412 return None;
8413 }
8414 let inner = parse_cond_expr()?;
8415 skip_cond_separators();
8416 if tok() == OUTPAR_TOK {
8417 zshlex();
8418 }
8419 return Some(inner);
8420 }
8421
8422 parse_cond_primary()
8423}
8424
8425/// Cond-expression primary: unary tests (-f, -d, ...), binary
8426/// tests (=, !=, <, >, ==, =~, -eq, -ne, ...), and parenthesized
8427/// sub-expressions. Direct port of par_cond_double / par_cond_triple
8428/// / par_cond_multi at parse.c:2626-2731 (chosen by arg count).
8429fn parse_cond_primary() -> Option<ZshCond> {
8430 let s1 = match tok() {
8431 STRING_LEX => {
8432 let s = tokstr().unwrap_or_default();
8433 zshlex();
8434 s
8435 }
8436 _ => return None,
8437 };
8438
8439 skip_cond_separators();
8440
8441 // Check for unary operator. zsh's lexer tokenizes leading `-` as
8442 // `zsh_h::Dash` (`\u{9b}`, `Src/zsh.h:182`) inside gettokstr (lex.c:1390-1400
8443 // LX2_DASH — `-` always becomes Dash, untokenized later). Match
8444 // either form here, and use char-count not byte-count since Dash
8445 // is 2 UTF-8 bytes (`\xc2\x9b`).
8446 //
8447 // c:Src/parse.c par_cond — when the leading token is `-` followed
8448 // ENTIRELY by digits (`-5`, `-123`), it's a numeric literal
8449 // operand, not a unary test flag. zsh's parser checks the C
8450 // `isdigit` of the trailing chars to disambiguate; without the
8451 // check, `[[ -5 -lt -3 ]]` reads `-5` as a one-arg test flag,
8452 // then `-lt` as the operand, then `-3` as a leftover token —
8453 // emitting "unknown condition: -5" and falling through to a
8454 // command-not-found dispatch on `-3`. Bug #121 in docs/BUGS.md.
8455 let s1_chars: Vec<char> = s1.chars().collect();
8456 let is_negative_number = s1_chars.len() >= 2
8457 && IS_DASH(s1_chars[0])
8458 && s1_chars[1..].iter().all(|c| c.is_ascii_digit());
8459 if s1_chars.len() == 2 && IS_DASH(s1_chars[0]) && !is_negative_number {
8460 let s2 = match tok() {
8461 STRING_LEX => {
8462 let s = tokstr().unwrap_or_default();
8463 zshlex();
8464 s
8465 }
8466 _ => {
8467 // c:Src/parse.c par_cond_2 — when the leading `-X`
8468 // is a 2-char dash form, zsh ALWAYS treats it as a
8469 // unary test op (the operand-missing case errors
8470 // immediately with `unknown condition: -X`). Don't
8471 // fall back to `Unary("-n", "-X")` — that path
8472 // silently let `[[ -z ]]` evaluate as
8473 // `[[ -n "-z" ]]` → true. Bug #480/#481.
8474 //
8475 // Convert Dash (\u{9b}) back to ASCII `-` for the
8476 // user-visible diagnostic so it reads "unknown
8477 // condition: -z" not "unknown condition: <Dash>z".
8478 let display: String = s1.chars().map(|c| {
8479 if IS_DASH(c) { '-' } else { c }
8480 }).collect();
8481 crate::ported::utils::zerr(&format!(
8482 "unknown condition: {}",
8483 display
8484 ));
8485 return None;
8486 }
8487 };
8488 return Some(ZshCond::Unary(s1, s2));
8489 }
8490
8491 // Check for binary operator. Direct port of zsh/Src/parse.c:2601-2603:
8492 // incond++; /* parentheses do globbing */
8493 // do condlex(); while (COND_SEP());
8494 // incond--; /* parentheses do grouping */
8495 // The bump makes the lexer treat `(` as a literal character inside
8496 // the RHS word (e.g. `[[ x =~ (foo) ]]`) instead of returning Inpar
8497 // and splitting the regex into multiple tokens.
8498 let op = match tok() {
8499 STRING_LEX => {
8500 let s = tokstr().unwrap_or_default();
8501 set_incond(incond() + 1);
8502 zshlex();
8503 set_incond(incond() - 1);
8504 s
8505 }
8506 INANG_TOK => {
8507 set_incond(incond() + 1);
8508 zshlex();
8509 set_incond(incond() - 1);
8510 "<".to_string()
8511 }
8512 OUTANG_TOK => {
8513 set_incond(incond() + 1);
8514 zshlex();
8515 set_incond(incond() - 1);
8516 ">".to_string()
8517 }
8518 _ => return Some(ZshCond::Unary("-n".to_string(), s1)),
8519 };
8520
8521 skip_cond_separators();
8522
8523 // c:Src/parse.c:2601-2625 par_cond_2 — only the documented binary
8524 // operators are accepted inside `[[ ... ]]`. zsh rejects ksh/bash
8525 // forms `-a` (logical AND) and `-o` (logical OR) with a parse
8526 // error ("condition expected") because they're not in the
8527 // par_cond_2 binary-op set — zsh uses `&&` / `||` instead.
8528 // Verified: `zsh -fc '[[ "" -a "x" ]]'` → exit 1, "parse error:
8529 // condition expected: ...". Without this gate, zshrs silently
8530 // built ZshCond::Binary("", "-a", "x") and ran an unknown-op
8531 // path that always evaluated false.
8532 // c:Src/parse.c:2601-2625 par_cond_2 — `-a` / `-o` n-ary chain
8533 // operators are not valid binary operators inside `[[ ... ]]`
8534 // (zsh uses `&&` / `||` instead). Match both the ASCII `-a`/
8535 // `-o` form and the tokenized `Dash+a`/`Dash+o` form that the
8536 // lexer emits inside cond bodies (Dash = \u{9b}, Src/zsh.h:182).
8537 let op_chars: Vec<char> = op.chars().collect();
8538 let is_dash_a_or_o =
8539 op_chars.len() == 2 && IS_DASH(op_chars[0]) && (op_chars[1] == 'a' || op_chars[1] == 'o');
8540 if is_dash_a_or_o {
8541 crate::ported::utils::zerr(&format!("parse error: condition expected: {}", s1));
8542 crate::ported::utils::errflag.fetch_or(
8543 crate::ported::zsh_h::ERRFLAG_ERROR,
8544 std::sync::atomic::Ordering::Relaxed,
8545 );
8546 set_tok(LEXERR);
8547 return None;
8548 }
8549
8550 let s2 = match tok() {
8551 STRING_LEX => {
8552 let s = tokstr().unwrap_or_default();
8553 zshlex();
8554 s
8555 }
8556 _ => {
8557 // c:Src/parse.c par_cond_2 — when a binary op is
8558 // recognized but the RHS operand is missing, zsh emits
8559 // `parse error: condition expected: <LHS>` at par_cond_2's
8560 // missing-rhs branch. zshrs's previous fallback returned
8561 // `Binary(s1, op, "")` which silently evaluated as if the
8562 // RHS were empty string → rc=1. Bug #482.
8563 //
8564 // Convert Dash (\u{9b}) back to ASCII `-` in the LHS
8565 // display so the diagnostic reads cleanly.
8566 let display: String = s1.chars().map(|c| {
8567 if IS_DASH(c) { '-' } else { c }
8568 }).collect();
8569 crate::ported::utils::zerr(&format!(
8570 "parse error: condition expected: {}",
8571 display
8572 ));
8573 crate::ported::utils::errflag.fetch_or(
8574 crate::ported::zsh_h::ERRFLAG_ERROR,
8575 std::sync::atomic::Ordering::Relaxed,
8576 );
8577 set_tok(LEXERR);
8578 return None;
8579 }
8580 };
8581
8582 if op == "=~" {
8583 Some(ZshCond::Regex(s1, s2))
8584 } else {
8585 Some(ZshCond::Binary(s1, op, s2))
8586 }
8587}
8588
8589fn skip_cond_separators() {
8590 while tok() == SEPER && {
8591 let s = tokstr();
8592 s.map(|s| !s.contains(';')).unwrap_or(true)
8593 } {
8594 zshlex();
8595 }
8596}
8597
8598/// Parse (( ... )) arithmetic command
8599/// Parse `(( EXPR ))` arithmetic command. C source: parse.c:1810-1834
8600/// `par_dinbrack` (despite the name; the function actually handles
8601/// DINPAR `(( ))` blocks too).
8602fn parse_arith() -> Option<ZshCommand> {
8603 let expr = tokstr().unwrap_or_default();
8604 zshlex();
8605 Some(ZshCommand::Arith(expr))
8606}
8607
8608/// Skip separator tokens
8609fn skip_separators() {
8610 while tok() == SEPER || tok() == NEWLIN {
8611 zshlex();
8612 }
8613}
8614
8615// `fdheaderlen` / `fdmagic` / `fdflags` / etc. macros from
8616// `Src/parse.c:3125-3152`. C uses raw pointer arithmetic on a
8617// `Wordcode` (= `u32 *`); the Rust port takes a slice and indexes.
8618
8619/// Port of `fdheaderlen(f)` macro (`Src/parse.c:3125`) — header
8620/// length in u32 words (read from prelude word `FD_PRELEN`).
8621#[inline]
8622pub fn fdheaderlen(f: &[u32]) -> u32 {
8623 f[FD_PRELEN]
8624}
8625
8626/// Port of `fdmagic(f)` macro (`Src/parse.c:3127`) — first prelude
8627/// word, either `FD_MAGIC` or `FD_OMAGIC`.
8628#[inline]
8629pub fn fdmagic(f: &[u32]) -> u32 {
8630 f[0]
8631}
8632
8633/// Port of `fdflags(f)` macro (`Src/parse.c:3131`) — low byte of
8634/// the packed `pre[1]` word.
8635#[inline]
8636pub fn fdflags(f: &[u32]) -> u32 {
8637 // `pre[1]` is a u32 viewed as 4 bytes; flags = byte 0.
8638 f[1] & 0xff
8639}
8640
8641/// Port of `fdsetflags(f, v)` macro (`Src/parse.c:3132`) — write
8642/// the low byte of `pre[1]`.
8643#[inline]
8644pub fn fdsetflags(f: &mut [u32], v: u8) {
8645 f[1] = (f[1] & !0xff) | (v as u32);
8646}
8647
8648/// Port of `fdother(f)` macro (`Src/parse.c:3133`) — high 24 bits
8649/// of `pre[1]`, holds the byte-offset to the opposite-byte-order
8650/// dump copy.
8651#[inline]
8652pub fn fdother(f: &[u32]) -> u32 {
8653 (f[1] >> 8) & 0x00ff_ffff
8654}
8655
8656/// Port of `fdsetother(f, o)` macro (`Src/parse.c:3134`).
8657#[inline]
8658pub fn fdsetother(f: &mut [u32], o: u32) {
8659 f[1] = (f[1] & 0xff) | ((o & 0x00ff_ffff) << 8);
8660}
8661
8662/// Port of `fdversion(f)` macro (`Src/parse.c:3140`) — read the
8663/// `ZSH_VERSION` C-string from `pre[2..]`.
8664pub fn fdversion(f: &[u32]) -> String {
8665 let bytes: Vec<u8> = f[2..]
8666 .iter()
8667 .take(10)
8668 .flat_map(|w| w.to_le_bytes().into_iter())
8669 .collect();
8670 let end = bytes.iter().position(|&b| b == 0).unwrap_or(bytes.len());
8671 String::from_utf8_lossy(&bytes[..end]).into_owned()
8672}
8673
8674/// Port of `firstfdhead(f)` macro (`Src/parse.c:3142`) — pointer
8675/// to the first `struct fdhead` past the prelude.
8676#[inline]
8677pub fn firstfdhead_offset() -> usize {
8678 FD_PRELEN
8679}
8680
8681/// Port of `nextfdhead(f)` macro (`Src/parse.c:3143`) — advance to
8682/// the next header by reading the current `hlen` slot.
8683#[inline]
8684pub fn nextfdhead_offset(f: &[u32], cur: usize) -> usize {
8685 cur + (f[cur + 4] as usize) // .hlen is field 4 of fdhead
8686}
8687
8688/// Port of `fdhflags(f)` macro (`Src/parse.c:3145`) — low 2 bits
8689/// of the header's `flags` field (the kshload/zshload marker).
8690#[inline]
8691pub fn fdhflags(h: &fdhead) -> u32 {
8692 h.flags & 0x3
8693}
8694
8695/// Port of `fdhtail(f)` macro (`Src/parse.c:3146`) — high 30 bits
8696/// of `flags`, byte offset from the name start to its basename.
8697#[inline]
8698pub fn fdhtail(h: &fdhead) -> u32 {
8699 h.flags >> 2
8700}
8701
8702/// Port of `fdhbldflags(f, t)` macro (`Src/parse.c:3147`) — pack
8703/// `(flags, tail)` into one u32 (low 2 bits = flags, high 30 = tail).
8704#[inline]
8705pub fn fdhbldflags(flags: u32, tail: u32) -> u32 {
8706 flags | (tail << 2)
8707}
8708
8709/// Port of `fdname(f)` macro (`Src/parse.c:3152`) — name string
8710/// follows the fdhead record immediately. Reads bytes from the
8711/// dump buffer until NUL.
8712pub fn fdname(buf: &[u32], header_offset: usize) -> String {
8713 let name_word_off = header_offset + FDHEAD_WORDS;
8714 let bytes: Vec<u8> = buf[name_word_off..]
8715 .iter()
8716 .flat_map(|w| w.to_le_bytes().into_iter())
8717 .take_while(|&b| b != 0)
8718 .collect();
8719 String::from_utf8_lossy(&bytes).into_owned()
8720}
8721
8722/// Decode a `fdhead` record at the given u32-word offset in the
8723/// dump buffer. Used by the header-walk loops in `bin_zcompile -t`.
8724pub fn read_fdhead(buf: &[u32], offset: usize) -> Option<fdhead> {
8725 if offset + FDHEAD_WORDS > buf.len() {
8726 return None;
8727 }
8728 Some(fdhead {
8729 start: buf[offset],
8730 len: buf[offset + 1],
8731 npats: buf[offset + 2],
8732 strs: buf[offset + 3],
8733 hlen: buf[offset + 4],
8734 flags: buf[offset + 5],
8735 })
8736}
8737
8738/// Port of `freedump(FuncDump f)` from `Src/parse.c:3976`. C
8739/// `munmap`s, `zclose`s the fd, and frees the struct. The Rust
8740/// port relies on Drop for the `funcdump` (no mmap held in this
8741/// port — `addr`/`map` are byte-offset placeholders), so the
8742/// equivalent is removing the entry from the dumps list. Called
8743/// by `decrdumpcount` when the refcount hits zero (c:3988) and
8744/// by `closedumps` when shutting down (c:4008).
8745fn freedump_locked(g: &mut std::sync::MutexGuard<'_, Vec<funcdump>>, filename: &str) {
8746 // c:3976
8747 g.retain(|d| d.filename.as_deref() != Some(filename));
8748}
8749
8750// =====================================================================
8751// Remaining `Src/parse.c` ports (this section finishes the file).
8752//
8753// Several of these emit into the C-wordcode buffer (`ECBUF`/etc.) and
8754// are kept for completeness — the live zshrs runtime uses the
8755// `ZshProgram` AST path instead, but `bin_zcompile` (`-c`/`-a` modes)
8756// and any future `.zwc`-emit pipeline both call into these.
8757// =====================================================================
8758
8759/// `ecstr(s)` helper — `ecadd(ecstrcode(s))`. Mirrors the C macro at
8760/// `Src/parse.c:482` used everywhere by the par_* emitters.
8761#[inline]
8762pub fn ecstr(s: &str) {
8763 let code = ecstrcode(s);
8764 ecadd(code);
8765}
8766
8767/// Port of `condlex` function-pointer global from `Src/parse.c`. C
8768/// flips this between `zshlex` and `testlex` depending on whether
8769/// we're inside `[[ ]]` vs `/bin/test` builtin. zshrs has no
8770/// separate `testlex` yet, so this just defers to `zshlex`.
8771#[inline]
8772pub fn condlex() {
8773 zshlex();
8774}
8775
8776fn copy_ecstr_walk(node: &Option<Box<EccstrNode>>, p: &mut [u8]) {
8777 let mut cur = node.as_ref();
8778 while let Some(n) = cur {
8779 // c:540 — `memcpy(p + s->aoffs, s->str, strlen(s->str) + 1);`
8780 let off = n.aoffs as usize;
8781 let need = off + n.str.len() + 1;
8782 if need <= p.len() {
8783 p[off..off + n.str.len()].copy_from_slice(&n.str);
8784 p[off + n.str.len()] = 0;
8785 }
8786 // c:541 — `copy_ecstr(s->left, p);`
8787 copy_ecstr_walk(&n.left, p);
8788 // c:542 — `s = s->right;`
8789 cur = n.right.as_ref();
8790 }
8791}
8792
8793/// Port of `par_cond(void)` from `Src/parse.c:2409`. Top-level cond
8794/// OR-chain — drives `par_cond_1` and stitches `||`-separated terms
8795/// with `WCB_COND(COND_OR, …)`. This is the missing top of the
8796/// wordcode cond chain: `par_cond_wordcode` (the par_dinbrack port)
8797/// must call into HERE so that `[[ a || b ]]` and friends land
8798/// real WC_COND opcodes in `ecbuf`. Without this, the wordcode
8799/// emitter for `[[ ... ]]` produced zero words and parity dropped
8800/// 148 words on `/etc/zshrc` alone.
8801pub fn par_cond_top() -> i32 {
8802 // c:2411 — `int p = ecused, r;`
8803 let p = ECUSED.with(|c| c.get()) as usize;
8804 let r = par_cond_1();
8805 while COND_SEP() {
8806 condlex();
8807 }
8808 if tok() == DBAR {
8809 // c:2417 — `condlex(); while (COND_SEP()) condlex();`
8810 condlex();
8811 while COND_SEP() {
8812 condlex();
8813 }
8814 // c:2420-2422 — `ecispace(p, 1); par_cond(); ecbuf[p] =
8815 // WCB_COND(COND_OR, ecused-1-p);`
8816 ecispace(p, 1);
8817 par_cond_top();
8818 let ecused = ECUSED.with(|c| c.get()) as usize;
8819 ECBUF.with(|c| {
8820 c.borrow_mut()[p] = WCB_COND(COND_OR as u32, (ecused - 1 - p) as u32);
8821 });
8822 return 1;
8823 }
8824 r
8825}
8826
8827/// Port of `static int check_cond(const char *input, const char *cond)`
8828/// from `Src/parse.c:2459`. True iff `input` is the two-char `-X`
8829/// form whose `X` matches `cond` — used by par_cond_2 to detect
8830/// `-a` / `-o` n-ary chain operators and by build_dump for `-k` /
8831/// `-z`. C: `return !IS_DASH(input[0]) ? 0 : !strcmp(input+1, cond);`.
8832fn check_cond(input: &str, cond: &str) -> bool {
8833 let mut chars = input.chars();
8834 match chars.next() {
8835 Some(c) if IS_DASH(c) => chars.as_str() == cond,
8836 _ => false,
8837 }
8838}
8839
8840#[cfg(test)]
8841mod tests {
8842 use super::*;
8843 use crate::utils::{errflag, ERRFLAG_ERROR};
8844 use std::fs;
8845 use std::path::Path;
8846 use std::sync::atomic::Ordering;
8847 use std::sync::mpsc;
8848 use std::thread;
8849 use std::time::Duration;
8850
8851 /// `try_source_file` MUST refuse a stale `.zwc` cache when the
8852 /// uncompiled source has been modified more recently. The C body
8853 /// at c:3819 reads `stc.st_mtime >= stn.st_mtime` — explicitly
8854 /// `>=`, meaning only an equal-or-newer zwc is acceptable.
8855 ///
8856 /// A regression that ignored the mtime check (or used the wrong
8857 /// direction) would silently keep loading the OLD compiled body
8858 /// after the user edited the source file — every `source foo.zsh`
8859 /// would replay yesterday's code, the worst-class shell bug.
8860 ///
8861 /// Pin: create source + .zwc, then touch source to make it
8862 /// newer. try_source_file must return None.
8863 #[test]
8864 fn try_source_file_skips_stale_zwc() {
8865 let _g = crate::test_util::global_state_lock();
8866 let dir = tempfile::tempdir().expect("tempdir");
8867 let src = dir.path().join("script.zsh");
8868 let zwc = dir.path().join("script.zsh.zwc");
8869 // Create zwc FIRST (older), then source (newer).
8870 fs::write(&zwc, b"placeholder zwc").unwrap();
8871 thread::sleep(Duration::from_millis(20));
8872 fs::write(&src, b"echo hi").unwrap();
8873
8874 let result = try_source_file(src.to_str().unwrap());
8875 assert!(
8876 result.is_none(),
8877 "c:3819 — stale .zwc (older than source) MUST be rejected; \
8878 got {:?}",
8879 result
8880 );
8881 }
8882
8883 /// `try_source_file` returns None when no `.zwc` exists for the
8884 /// requested file (c:3819 `if let Ok(meta_c) = &stc` gate fails).
8885 /// This is the common case — most user scripts don't ship with
8886 /// a pre-compiled `.zwc`. The fn returning None lets the caller
8887 /// fall through to the source-read path. A regression that
8888 /// returned `Some(file)` on missing `.zwc` would route every
8889 /// `source foo.zsh` through `check_dump_file` against a
8890 /// non-existent file and crash.
8891 #[test]
8892 fn try_source_file_returns_none_when_no_zwc() {
8893 let _g = crate::test_util::global_state_lock();
8894 let dir = tempfile::tempdir().expect("tempdir");
8895 let src = dir.path().join("plain.zsh");
8896 fs::write(&src, b"echo hi").unwrap();
8897 // No .zwc sibling.
8898
8899 let result = try_source_file(src.to_str().unwrap());
8900 assert!(
8901 result.is_none(),
8902 "c:3819 gate fails when stat(wc) returns Err → None"
8903 );
8904 }
8905
8906 /// Test helper. Mirrors zsh's `errflag` save/clear/check pattern
8907 /// around a parse — see `Src/init.c:loop` which clears errflag
8908 /// before parse_event() and tests it after. Returns `Err` if the
8909 /// parse set `ERRFLAG_ERROR`; otherwise `Ok(program)`.
8910 fn parse(input: &str) -> Result<ZshProgram, String> {
8911 let saved = errflag.load(Ordering::Relaxed);
8912 errflag.fetch_and(!ERRFLAG_ERROR, Ordering::Relaxed);
8913 parse_init(input);
8914 let prog = crate::ported::parse::parse();
8915 let had_err = (errflag.load(Ordering::Relaxed) & ERRFLAG_ERROR) != 0;
8916 // Restore prior error bits; don't carry our new error into the
8917 // outer test runner.
8918 errflag.store(saved, Ordering::Relaxed);
8919 if had_err {
8920 Err("parse error".to_string())
8921 } else {
8922 Ok(prog)
8923 }
8924 }
8925
8926 #[test]
8927 fn test_simple_command() {
8928 let _g = crate::test_util::global_state_lock();
8929 let prog = parse("echo hello world").unwrap();
8930 assert_eq!(prog.lists.len(), 1);
8931 match &prog.lists[0].sublist.pipe.cmd {
8932 ZshCommand::Simple(s) => {
8933 assert_eq!(s.words, vec!["echo", "hello", "world"]);
8934 }
8935 _ => panic!("expected simple command"),
8936 }
8937 }
8938
8939 #[test]
8940 fn test_pipeline() {
8941 let _g = crate::test_util::global_state_lock();
8942 let prog = parse("ls | grep foo | wc -l").unwrap();
8943 assert_eq!(prog.lists.len(), 1);
8944
8945 let pipe = &prog.lists[0].sublist.pipe;
8946 assert!(pipe.next.is_some());
8947
8948 let pipe2 = pipe.next.as_ref().unwrap();
8949 assert!(pipe2.next.is_some());
8950 }
8951
8952 #[test]
8953 fn test_and_or() {
8954 let _g = crate::test_util::global_state_lock();
8955 let prog = parse("cmd1 && cmd2 || cmd3").unwrap();
8956 let sublist = &prog.lists[0].sublist;
8957
8958 assert!(sublist.next.is_some());
8959 let (op, _) = sublist.next.as_ref().unwrap();
8960 assert_eq!(*op, SublistOp::And);
8961 }
8962
8963 #[test]
8964 fn test_if_then() {
8965 let _g = crate::test_util::global_state_lock();
8966 let prog = parse("if test -f foo; then echo yes; fi").unwrap();
8967 match &prog.lists[0].sublist.pipe.cmd {
8968 ZshCommand::If(_) => {}
8969 _ => panic!("expected if command"),
8970 }
8971 }
8972
8973 #[test]
8974 fn test_for_loop() {
8975 let _g = crate::test_util::global_state_lock();
8976 let prog = parse("for i in a b c; do echo $i; done").unwrap();
8977 match &prog.lists[0].sublist.pipe.cmd {
8978 ZshCommand::For(f) => {
8979 assert_eq!(f.var, "i");
8980 match &f.list {
8981 ForList::Words(w) => assert_eq!(w, &vec!["a", "b", "c"]),
8982 _ => panic!("expected word list"),
8983 }
8984 }
8985 _ => panic!("expected for command"),
8986 }
8987 }
8988
8989 #[test]
8990 fn test_case() {
8991 let _g = crate::test_util::global_state_lock();
8992 let prog = parse("case $x in a) echo a;; b) echo b;; esac").unwrap();
8993 match &prog.lists[0].sublist.pipe.cmd {
8994 ZshCommand::Case(c) => {
8995 assert_eq!(c.arms.len(), 2);
8996 }
8997 _ => panic!("expected case command"),
8998 }
8999 }
9000
9001 #[test]
9002 fn test_function() {
9003 let _g = crate::test_util::global_state_lock();
9004 // First test just parsing "function foo" to see what happens
9005 let prog = parse("function foo { }").unwrap();
9006 match &prog.lists[0].sublist.pipe.cmd {
9007 ZshCommand::FuncDef(f) => {
9008 assert_eq!(f.names, vec!["foo"]);
9009 }
9010 _ => panic!(
9011 "expected function, got {:?}",
9012 prog.lists[0].sublist.pipe.cmd
9013 ),
9014 }
9015 }
9016
9017 #[test]
9018 fn test_redirection() {
9019 let _g = crate::test_util::global_state_lock();
9020 let prog = parse("echo hello > file.txt").unwrap();
9021 match &prog.lists[0].sublist.pipe.cmd {
9022 ZshCommand::Simple(s) => {
9023 assert_eq!(s.redirs.len(), 1);
9024 assert_eq!(s.redirs[0].rtype, REDIR_WRITE);
9025 }
9026 _ => panic!("expected simple command"),
9027 }
9028 }
9029
9030 #[test]
9031 fn test_assignment() {
9032 let _g = crate::test_util::global_state_lock();
9033 let prog = parse("FOO=bar echo $FOO").unwrap();
9034 match &prog.lists[0].sublist.pipe.cmd {
9035 ZshCommand::Simple(s) => {
9036 assert_eq!(s.assigns.len(), 1);
9037 assert_eq!(s.assigns[0].name, "FOO");
9038 }
9039 _ => panic!("expected simple command"),
9040 }
9041 }
9042
9043 #[test]
9044 fn test_parse_completion_function() {
9045 let _g = crate::test_util::global_state_lock();
9046 let input = r#"_2to3_fixes() {
9047 local -a fixes
9048 fixes=( ${${(M)${(f)"$(2to3 --list-fixes 2>/dev/null)"}:#*}//[[:space:]]/} )
9049 (( ${#fixes} )) && _describe -t fixes 'fix' fixes
9050}"#;
9051 let result = parse(input);
9052 assert!(
9053 result.is_ok(),
9054 "Failed to parse completion function: {:?}",
9055 result.err()
9056 );
9057 let prog = result.unwrap();
9058 assert!(
9059 !prog.lists.is_empty(),
9060 "Expected at least one list in program"
9061 );
9062 }
9063
9064 #[test]
9065 fn test_parse_array_with_complex_elements() {
9066 let _g = crate::test_util::global_state_lock();
9067 let input = r#"arguments=(
9068 '(- * :)'{-h,--help}'[show this help message and exit]'
9069 {-d,--doctests_only}'[fix up doctests only]'
9070 '*:filename:_files'
9071)"#;
9072 let result = parse(input);
9073 assert!(
9074 result.is_ok(),
9075 "Failed to parse array assignment: {:?}",
9076 result.err()
9077 );
9078 }
9079
9080 #[test]
9081 fn test_parse_full_completion_file() {
9082 let _g = crate::test_util::global_state_lock();
9083 let input = r##"#compdef 2to3
9084
9085# zsh completions for '2to3'
9086
9087_2to3_fixes() {
9088 local -a fixes
9089 fixes=( ${${(M)${(f)"$(2to3 --list-fixes 2>/dev/null)"}:#*}//[[:space:]]/} )
9090 (( ${#fixes} )) && _describe -t fixes 'fix' fixes
9091}
9092
9093local -a arguments
9094
9095arguments=(
9096 '(- * :)'{-h,--help}'[show this help message and exit]'
9097 {-d,--doctests_only}'[fix up doctests only]'
9098 {-f,--fix}'[each FIX specifies a transformation; default: all]:fix name:_2to3_fixes'
9099 {-j,--processes}'[run 2to3 concurrently]:number: '
9100 {-x,--nofix}'[prevent a transformation from being run]:fix name:_2to3_fixes'
9101 {-l,--list-fixes}'[list available transformations]'
9102 {-p,--print-function}'[modify the grammar so that print() is a function]'
9103 {-v,--verbose}'[more verbose logging]'
9104 '--no-diffs[do not show diffs of the refactoring]'
9105 {-w,--write}'[write back modified files]'
9106 {-n,--nobackups}'[do not write backups for modified files]'
9107 {-o,--output-dir}'[put output files in this directory instead of overwriting]:directory:_directories'
9108 {-W,--write-unchanged-files}'[also write files even if no changes were required]'
9109 '--add-suffix[append this string to all output filenames]:suffix: '
9110 '*:filename:_files'
9111)
9112
9113_arguments -s -S $arguments
9114"##;
9115 let result = parse(input);
9116 assert!(
9117 result.is_ok(),
9118 "Failed to parse full completion file: {:?}",
9119 result.err()
9120 );
9121 let prog = result.unwrap();
9122 // Should have parsed successfully with at least one statement
9123 assert!(!prog.lists.is_empty(), "Expected at least one list");
9124 }
9125
9126 #[test]
9127 fn test_parse_logs_sh() {
9128 let _g = crate::test_util::global_state_lock();
9129 let input = r#"#!/usr/bin/env bash
9130shopt -s globstar
9131
9132if [[ $(uname) == Darwin ]]; then
9133 tail -f /var/log/**/*.log /var/log/**/*.out | lolcat
9134else
9135 if [[ $ZPWR_DISTRO_NAME == raspbian ]]; then
9136 tail -f /var/log/**/*.log | lolcat
9137 else
9138 printf "Unsupported...\n" >&2
9139 fi
9140fi
9141"#;
9142 let result = parse(input);
9143 assert!(
9144 result.is_ok(),
9145 "Failed to parse logs.sh: {:?}",
9146 result.err()
9147 );
9148 }
9149
9150 #[test]
9151 fn test_parse_case_with_glob() {
9152 let _g = crate::test_util::global_state_lock();
9153 let input = r#"case "$ZPWR_OS_TYPE" in
9154 darwin*) open_cmd='open'
9155 ;;
9156 cygwin*) open_cmd='cygstart'
9157 ;;
9158 linux*)
9159 open_cmd='xdg-open'
9160 ;;
9161esac"#;
9162 let result = parse(input);
9163 assert!(
9164 result.is_ok(),
9165 "Failed to parse case with glob: {:?}",
9166 result.err()
9167 );
9168 }
9169
9170 #[test]
9171 fn test_parse_case_with_nested_if() {
9172 let _g = crate::test_util::global_state_lock();
9173 // Test case with nested if and glob patterns
9174 let input = r##"function zpwrGetOpenCommand(){
9175 local open_cmd
9176 case "$ZPWR_OS_TYPE" in
9177 darwin*) open_cmd='open' ;;
9178 cygwin*) open_cmd='cygstart' ;;
9179 linux*)
9180 if [[ "$_zpwr_uname_r" != *icrosoft* ]];then
9181 open_cmd='nohup xdg-open'
9182 fi
9183 ;;
9184 esac
9185}"##;
9186 let result = parse(input);
9187 assert!(result.is_ok(), "Failed to parse: {:?}", result.err());
9188 }
9189
9190 #[test]
9191 fn test_parse_zpwr_scripts() {
9192 let _g = crate::test_util::global_state_lock();
9193 let scripts_dir = Path::new("/Users/wizard/.zpwr/scripts");
9194 if !scripts_dir.exists() {
9195 eprintln!("Skipping test: scripts directory not found");
9196 return;
9197 }
9198
9199 let mut total = 0;
9200 let mut passed = 0;
9201 let mut failed_files = Vec::new();
9202 let mut timeout_files = Vec::new();
9203
9204 for ext in &["sh", "zsh"] {
9205 let pattern = scripts_dir.join(format!("*.{}", ext));
9206 if let Ok(entries) = glob::glob(pattern.to_str().unwrap()) {
9207 for entry in entries.flatten() {
9208 total += 1;
9209 let file_path = entry.display().to_string();
9210 let content = match fs::read_to_string(&entry) {
9211 Ok(c) => c,
9212 Err(e) => {
9213 failed_files.push((file_path, format!("read error: {}", e)));
9214 continue;
9215 }
9216 };
9217
9218 // Parse with timeout
9219 let content_clone = content.clone();
9220 let (tx, rx) = mpsc::channel();
9221 let handle = thread::spawn(move || {
9222 let result = parse(&content_clone);
9223 let _ = tx.send(result);
9224 });
9225
9226 match rx.recv_timeout(Duration::from_secs(2)) {
9227 Ok(Ok(_)) => passed += 1,
9228 Ok(Err(err)) => {
9229 failed_files.push((file_path, err));
9230 }
9231 Err(_) => {
9232 timeout_files.push(file_path);
9233 // Thread will be abandoned
9234 }
9235 }
9236 }
9237 }
9238 }
9239
9240 eprintln!("\n=== ZPWR Scripts Parse Results ===");
9241 eprintln!("Passed: {}/{}", passed, total);
9242
9243 if !timeout_files.is_empty() {
9244 eprintln!("\nTimeout files (>2s):");
9245 for file in &timeout_files {
9246 eprintln!(" {}", file);
9247 }
9248 }
9249
9250 if !failed_files.is_empty() {
9251 eprintln!("\nFailed files:");
9252 for (file, err) in &failed_files {
9253 eprintln!(" {} - {}", file, err);
9254 }
9255 }
9256
9257 // Allow some failures initially, but track progress
9258 let pass_rate = if total > 0 {
9259 (passed as f64 / total as f64) * 100.0
9260 } else {
9261 0.0
9262 };
9263 eprintln!("Pass rate: {:.1}%", pass_rate);
9264
9265 // Require at least 50% pass rate for now
9266 assert!(pass_rate >= 50.0, "Pass rate too low: {:.1}%", pass_rate);
9267 }
9268
9269 /// c:2643 — `get_cond_num` returns 0..=8 for the canonical binary
9270 /// test operators in order `nt ot ef eq ne lt gt le ge`. The
9271 /// index IS the wordcode opcode dispatch key; flipping any entry
9272 /// would silently mis-dispatch `[[ a -eq b ]]` to a different op.
9273 #[test]
9274 fn get_cond_num_canonical_order_matches_dispatch_table() {
9275 let _g = crate::test_util::global_state_lock();
9276 assert_eq!(get_cond_num("nt"), 0);
9277 assert_eq!(get_cond_num("ot"), 1);
9278 assert_eq!(get_cond_num("ef"), 2);
9279 assert_eq!(get_cond_num("eq"), 3);
9280 assert_eq!(get_cond_num("ne"), 4);
9281 assert_eq!(get_cond_num("lt"), 5);
9282 assert_eq!(get_cond_num("gt"), 6);
9283 assert_eq!(get_cond_num("le"), 7);
9284 assert_eq!(get_cond_num("ge"), 8);
9285 }
9286
9287 /// c:2643 — unknown operator returns -1 (sentinel for "not in the
9288 /// binary set"). Regression returning 0 silently would alias
9289 /// every unknown op to `-nt`, dispatching to the wrong handler.
9290 #[test]
9291 fn get_cond_num_unknown_operator_returns_minus_one() {
9292 let _g = crate::test_util::global_state_lock();
9293 assert_eq!(get_cond_num("xx"), -1);
9294 assert_eq!(get_cond_num(""), -1);
9295 assert_eq!(get_cond_num("eqnt"), -1, "exact-match required");
9296 assert_eq!(
9297 get_cond_num("NT"),
9298 -1,
9299 "case-sensitive — uppercase rejected"
9300 );
9301 }
9302
9303 /// c:2628 — `par_cond_double` requires arg `a` to start with `-`
9304 /// AND have at least one more char. Empty string OR single `-`
9305 /// must error (return 1 via zerr). Regression accepting empty
9306 /// would dispatch `[[ "" string ]]` as a unary test.
9307 #[test]
9308 fn par_cond_double_rejects_short_or_non_dash_first_arg() {
9309 let _g = crate::test_util::global_state_lock();
9310 // empty
9311 let _ = par_cond_double("", "b");
9312 // not-dash
9313 let _ = par_cond_double("foo", "b");
9314 // bare dash
9315 let _ = par_cond_double("-", "b");
9316 // All three must NOT crash + return 1 (error path).
9317 }
9318
9319 /// c:2647 CONDSTRS table — exhaustive iteration: every entry's
9320 /// index round-trips through get_cond_num. A regression that
9321 /// drops an entry would let `[[ a -ef b ]]` silently mis-dispatch.
9322 #[test]
9323 fn get_cond_num_round_trips_for_every_table_entry() {
9324 let _g = crate::test_util::global_state_lock();
9325 for (i, op) in ["nt", "ot", "ef", "eq", "ne", "lt", "gt", "le", "ge"]
9326 .iter()
9327 .enumerate()
9328 {
9329 assert_eq!(get_cond_num(op) as usize, i, "{op} must map to index {i}");
9330 }
9331 }
9332
9333 /// c:2643 — `get_cond_num` is byte-exact: a partial-prefix string
9334 /// must NOT match. `e` (one char) is not `eq`. Catches a
9335 /// regression using `starts_with` instead of equality.
9336 #[test]
9337 fn get_cond_num_partial_prefix_does_not_match() {
9338 let _g = crate::test_util::global_state_lock();
9339 assert_eq!(get_cond_num("e"), -1);
9340 assert_eq!(get_cond_num("eq2"), -1);
9341 assert_eq!(get_cond_num("n"), -1);
9342 }
9343
9344 /// c:2628 — `par_cond_double` checks `IS_DASH(ac[0])` so any
9345 /// non-dash first char fails. The lexed Dash sentinel `\u{9b}`
9346 /// MUST be accepted alongside ASCII `-` (the lexer emits it
9347 /// inside `[[ ... ]]`). Regression dropping the sentinel form
9348 /// would break every cond expression after lexing.
9349 #[test]
9350 fn par_cond_double_accepts_lexed_dash_sentinel() {
9351 let _g = crate::test_util::global_state_lock();
9352 // First char being the Dash sentinel + valid unary letter
9353 // must NOT trigger the "condition expected" error path.
9354 // We can't easily probe the wordcode emission here, but
9355 // the function MUST return without panic for both forms.
9356 let _ = par_cond_double("-z", "foo");
9357 let _ = par_cond_double("\u{9b}z", "foo");
9358 }
9359
9360 /// c:2643 — case sensitivity: uppercase `EQ` MUST NOT match `eq`.
9361 /// zsh's `[[ a -EQ b ]]` is documented as a parse error (only
9362 /// lowercase variants are recognised). Regression doing
9363 /// case-insensitive lookup would silently accept it.
9364 #[test]
9365 fn get_cond_num_is_case_sensitive() {
9366 let _g = crate::test_util::global_state_lock();
9367 assert_eq!(get_cond_num("EQ"), -1);
9368 assert_eq!(get_cond_num("Eq"), -1);
9369 assert_eq!(get_cond_num("eQ"), -1);
9370 // Lowercase still works.
9371 assert_eq!(get_cond_num("eq"), 3);
9372 }
9373
9374 /// `Src/parse.c:2862-2868` — `ecgetstr` inline-3-byte case packs
9375 /// up to 3 chars into bits 3-26 of the wordcode word, then C emits
9376 /// `buf[3] = '\0'; r = dupstring(buf);`. `dupstring` uses `strlen`
9377 /// so the resulting string TRUNCATES at the first NUL byte —
9378 /// short strings of 1 or 2 chars get their tail NUL-padded and
9379 /// silently dropped by strlen.
9380 ///
9381 /// The previous Rust port used `retain(|&x| x != 0)` which SPLICES
9382 /// OUT interior NULs (so `[a, 0, b]` would yield "ab" instead of
9383 /// C's "a"). Verify both endpoints work correctly:
9384 /// * 1-char string ("a", 0, 0) → "a" (strlen-truncate)
9385 /// * 2-char string ("ab", 0) → "ab" (strlen-truncate)
9386 /// * 3-char string ("abc") → "abc" (full)
9387 /// * pathological ("a", 0, "b") → "a" (NOT "ab")
9388 #[test]
9389 fn ecgetstr_inline_string_truncates_at_first_nul_like_c_strlen() {
9390 let _g = crate::test_util::global_state_lock();
9391 // Build a wordcode word with `c & 2 != 0` (inline-string flag)
9392 // and the 3 bytes packed at offsets 3, 11, 19. `c & 1` is the
9393 // tokflag; clear it for this test.
9394 fn pack_inline(b0: u8, b1: u8, b2: u8) -> u32 {
9395 // c:2862 layout — bit0 = tokflag (0 here), bit1 = inline (1),
9396 // bits 3-10 = b0, bits 11-18 = b1, bits 19-26 = b2.
9397 (2u32) | ((b0 as u32) << 3) | ((b1 as u32) << 11) | ((b2 as u32) << 19)
9398 }
9399 let mk_state = |word: u32| -> estate {
9400 let p = eprog {
9401 flags: 0,
9402 len: 1,
9403 npats: 0,
9404 nref: 0,
9405 pats: Vec::new(),
9406 prog: vec![word],
9407 strs: None,
9408 shf: None,
9409 dump: None,
9410 };
9411 estate {
9412 prog: Box::new(p),
9413 pc: 0,
9414 strs: None,
9415 strs_offset: 0,
9416 }
9417 };
9418
9419 // 1-char: ('a', 0, 0) → "a"
9420 let mut st = mk_state(pack_inline(b'a', 0, 0));
9421 assert_eq!(
9422 ecgetstr(&mut st, 0, None),
9423 "a",
9424 "c:2869 strlen truncates 1-char inline at the NUL tail"
9425 );
9426
9427 // 2-char: ('a', 'b', 0) → "ab"
9428 let mut st = mk_state(pack_inline(b'a', b'b', 0));
9429 assert_eq!(
9430 ecgetstr(&mut st, 0, None),
9431 "ab",
9432 "c:2869 strlen truncates 2-char inline at the NUL tail"
9433 );
9434
9435 // 3-char: ('a', 'b', 'c') → "abc"
9436 let mut st = mk_state(pack_inline(b'a', b'b', b'c'));
9437 assert_eq!(
9438 ecgetstr(&mut st, 0, None),
9439 "abc",
9440 "c:2869 full 3-byte inline preserved"
9441 );
9442
9443 // Pathological: ('a', 0, 'b') → "a" (NOT "ab" from retain-splice)
9444 let mut st = mk_state(pack_inline(b'a', 0, b'b'));
9445 assert_eq!(
9446 ecgetstr(&mut st, 0, None),
9447 "a",
9448 "c:2869 strlen STOPS at first NUL; must not splice 'b' through"
9449 );
9450 }
9451
9452 /// Pin: `init_parse_status` resets ALL six lexer-parser flags
9453 /// per `Src/parse.c:500-502`. Specifically `inrepeat_ = 0` at
9454 /// c:501 was previously missing in the Rust port. Pin every
9455 /// reset so a future regression that drops one is caught.
9456 #[test]
9457 fn init_parse_status_resets_all_lexer_parser_flags() {
9458 let _g = crate::test_util::global_state_lock();
9459 // Dirty every flag to a non-default value.
9460 set_incasepat(5);
9461 set_incond(7);
9462 set_inredir(true);
9463 set_infor(3);
9464 set_intypeset(true);
9465 set_inrepeat(2);
9466 set_incmdpos(false);
9467 // Reset.
9468 init_parse_status();
9469 // c:500-502 — every flag back to its default.
9470 assert_eq!(incasepat(), 0, "c:500 — incasepat = 0");
9471 assert_eq!(incond(), 0, "c:500 — incond = 0");
9472 assert!(!inredir(), "c:500 — inredir = 0");
9473 assert_eq!(infor(), 0, "c:500 — infor = 0");
9474 assert!(!intypeset(), "c:500 — intypeset = 0");
9475 assert_eq!(
9476 inrepeat(),
9477 0,
9478 "c:501 — inrepeat_ = 0 (was previously missing)"
9479 );
9480 assert!(incmdpos(), "c:502 — incmdpos = 1");
9481 }
9482
9483 // ═══════════════════════════════════════════════════════════════════
9484 // AST shape tests — feed source through parse(), walk the resulting
9485 // ZshProgram, assert structural properties. Each test uses the local
9486 // `parse(input)` helper that errors cleanly on parse failure.
9487 // Anchor: where applicable, behavior matches `zsh -n -c '...'`
9488 // (parse-only, no execution — which would error on syntax issues).
9489 // ═══════════════════════════════════════════════════════════════════
9490
9491 /// Empty input → ZshProgram with no lists.
9492 #[test]
9493 fn parse_empty_source_yields_zero_lists() {
9494 let _g = crate::test_util::global_state_lock();
9495 let prog = parse("").unwrap();
9496 assert_eq!(prog.lists.len(), 0);
9497 }
9498
9499 /// Comment-only input → no lists (comments are skipped at lex level).
9500 #[test]
9501 fn parse_only_comment_yields_zero_lists() {
9502 let _g = crate::test_util::global_state_lock();
9503 let prog = parse("# this is just a comment").unwrap();
9504 assert_eq!(prog.lists.len(), 0, "comments alone produce no cmds");
9505 }
9506
9507 /// Three commands separated by `;` → three lists.
9508 #[test]
9509 fn parse_three_semicolon_separated_commands_yield_three_lists() {
9510 let _g = crate::test_util::global_state_lock();
9511 let prog = parse("a; b; c").unwrap();
9512 assert_eq!(prog.lists.len(), 3);
9513 }
9514
9515 /// Background command — async flag set on the list.
9516 #[test]
9517 fn parse_background_command_sets_async_flag() {
9518 let _g = crate::test_util::global_state_lock();
9519 let prog = parse("sleep 1 &").unwrap();
9520 assert_eq!(prog.lists.len(), 1);
9521 assert!(
9522 prog.lists[0].flags.async_,
9523 "trailing `&` must set async_ flag"
9524 );
9525 }
9526
9527 /// Pipe count: `a | b | c | d` → 4 stages.
9528 #[test]
9529 fn parse_four_stage_pipeline_has_three_next_links() {
9530 let _g = crate::test_util::global_state_lock();
9531 let prog = parse("a | b | c | d").unwrap();
9532 let mut pipe = &prog.lists[0].sublist.pipe;
9533 let mut count = 1;
9534 while let Some(next) = &pipe.next {
9535 pipe = next;
9536 count += 1;
9537 }
9538 assert_eq!(count, 4, "4 commands should produce 4 pipe stages");
9539 }
9540
9541 /// `|&` between pipeline stages sets merge_stderr.
9542 #[test]
9543 fn parse_pipe_amp_sets_merge_stderr() {
9544 let _g = crate::test_util::global_state_lock();
9545 let prog = parse("a |& b").unwrap();
9546 let pipe = &prog.lists[0].sublist.pipe;
9547 assert!(pipe.next.is_some());
9548 assert!(pipe.merge_stderr, "|& must set merge_stderr");
9549 }
9550
9551 /// `cmd1 || cmd2`: sublist.next is Some with `Or`.
9552 #[test]
9553 fn parse_or_operator_sets_sublist_op_or() {
9554 let _g = crate::test_util::global_state_lock();
9555 let prog = parse("cmd1 || cmd2").unwrap();
9556 let sublist = &prog.lists[0].sublist;
9557 let (op, _) = sublist.next.as_ref().expect("must have next");
9558 assert_eq!(*op, SublistOp::Or);
9559 }
9560
9561 /// `! cmd` sets the not flag on the sublist.
9562 #[test]
9563 fn parse_bang_negation_sets_sublist_not_flag() {
9564 let _g = crate::test_util::global_state_lock();
9565 let prog = parse("! false").unwrap();
9566 let sublist = &prog.lists[0].sublist;
9567 assert!(sublist.flags.not, "`!` prefix must set sublist.flags.not");
9568 }
9569
9570 // ── Compound commands ────────────────────────────────────────────
9571 /// `while cond; do body; done` → ZshCommand::While.
9572 #[test]
9573 fn parse_while_loop_yields_while_command() {
9574 let _g = crate::test_util::global_state_lock();
9575 let prog = parse("while true; do echo x; done").unwrap();
9576 assert!(matches!(
9577 prog.lists[0].sublist.pipe.cmd,
9578 ZshCommand::While(_)
9579 ));
9580 }
9581
9582 /// `until cond; do body; done` → ZshCommand::Until.
9583 /// Anchor: `zsh -n -c 'until false; do echo; done'` accepts and parses
9584 /// as an until-loop. zshrs accepts but emits a DIFFERENT AST variant
9585 /// (not Until). Bug — until loop is mis-classified.
9586 #[test]
9587 fn parse_until_loop_yields_until_command_anchored_to_zsh() {
9588 let _g = crate::test_util::global_state_lock();
9589 let prog = parse("until false; do echo x; done").unwrap();
9590 assert!(
9591 matches!(prog.lists[0].sublist.pipe.cmd, ZshCommand::Until(_)),
9592 "zsh parses `until` as Until variant; zshrs uses different variant: {:?}",
9593 prog.lists[0].sublist.pipe.cmd
9594 );
9595 }
9596
9597 /// `(cmd)` → Subsh variant.
9598 #[test]
9599 fn parse_parens_yield_subsh_command() {
9600 let _g = crate::test_util::global_state_lock();
9601 let prog = parse("(echo hi)").unwrap();
9602 assert!(matches!(
9603 prog.lists[0].sublist.pipe.cmd,
9604 ZshCommand::Subsh(_)
9605 ));
9606 }
9607
9608 /// `{ cmd; }` → Cursh (current-shell) command.
9609 #[test]
9610 fn parse_braces_yield_cursh_command() {
9611 let _g = crate::test_util::global_state_lock();
9612 let prog = parse("{ echo hi; }").unwrap();
9613 assert!(matches!(
9614 prog.lists[0].sublist.pipe.cmd,
9615 ZshCommand::Cursh(_)
9616 ));
9617 }
9618
9619 /// `[[ a == b ]]` → ZshCommand::Cond.
9620 #[test]
9621 fn parse_double_brackets_yield_cond_command() {
9622 let _g = crate::test_util::global_state_lock();
9623 let prog = parse("[[ a == b ]]").unwrap();
9624 assert!(matches!(
9625 prog.lists[0].sublist.pipe.cmd,
9626 ZshCommand::Cond(_)
9627 ));
9628 }
9629
9630 /// `(( 1 + 2 ))` → ZshCommand::Arith.
9631 #[test]
9632 fn parse_double_parens_yield_arith_command() {
9633 let _g = crate::test_util::global_state_lock();
9634 let prog = parse("(( 1 + 2 ))").unwrap();
9635 assert!(matches!(
9636 prog.lists[0].sublist.pipe.cmd,
9637 ZshCommand::Arith(_)
9638 ));
9639 }
9640
9641 /// `repeat 3 do echo x; done` → ZshCommand::Repeat.
9642 #[test]
9643 fn parse_repeat_loop_yields_repeat_command() {
9644 let _g = crate::test_util::global_state_lock();
9645 let prog = parse("repeat 3 do echo x; done").unwrap();
9646 assert!(matches!(
9647 prog.lists[0].sublist.pipe.cmd,
9648 ZshCommand::Repeat(_)
9649 ));
9650 }
9651
9652 // ── Function definitions ─────────────────────────────────────────
9653 /// `name() { body; }` → FuncDef variant.
9654 #[test]
9655 fn parse_paren_funcdef_yields_funcdef_command() {
9656 let _g = crate::test_util::global_state_lock();
9657 let prog = parse("greet() { echo hi; }").unwrap();
9658 assert!(matches!(
9659 prog.lists[0].sublist.pipe.cmd,
9660 ZshCommand::FuncDef(_)
9661 ));
9662 }
9663
9664 /// `function name { body; }` → FuncDef variant (zsh keyword form).
9665 #[test]
9666 fn parse_function_keyword_funcdef_yields_funcdef_command() {
9667 let _g = crate::test_util::global_state_lock();
9668 let prog = parse("function greet { echo hi; }").unwrap();
9669 assert!(matches!(
9670 prog.lists[0].sublist.pipe.cmd,
9671 ZshCommand::FuncDef(_)
9672 ));
9673 }
9674
9675 /// Syntax error — `if` without `fi` → parse returns Err.
9676 /// Anchor: `echo 'if true; then echo' | zsh -n` → "parse error".
9677 #[test]
9678 fn parse_unterminated_if_returns_error_anchored_to_zsh() {
9679 let _g = crate::test_util::global_state_lock();
9680 let r = parse("if true; then echo yes");
9681 assert!(r.is_err(), "zsh -n: parse error near `\\n`");
9682 }
9683
9684 /// Syntax error — bare `done` without `for/while/until` → error.
9685 /// Anchor: `echo done | zsh -n` → "parse error near `done`".
9686 #[test]
9687 fn parse_orphan_done_returns_error_anchored_to_zsh() {
9688 let _g = crate::test_util::global_state_lock();
9689 let r = parse("done");
9690 assert!(r.is_err(), "zsh -n: parse error near `done`");
9691 }
9692
9693 /// Simple command's words are metafied at the AST layer (matches
9694 /// zsh's internal representation: `-` lexes to `Dash` = 0x9b, `*`
9695 /// to `Star`, etc.). zsh untokenizes via `untokenize()` BEFORE
9696 /// surfacing words at execution time (Src/exec.c:execcmd_args).
9697 /// This test pins the round-trip: `untokenize(word)` recovers the
9698 /// user-visible form. If parse-time unmetafy ever lands the
9699 /// untokenize call becomes a no-op; the test stays green either
9700 /// way. Companion test below pins the metafied internal form.
9701 #[test]
9702 fn parse_simple_command_words_unmetafied_like_zsh_anchored() {
9703 let _g = crate::test_util::global_state_lock();
9704 let prog = parse("ls -la /tmp").unwrap();
9705 match &prog.lists[0].sublist.pipe.cmd {
9706 ZshCommand::Simple(s) => {
9707 let untok: Vec<String> = s
9708 .words
9709 .iter()
9710 .map(|w| crate::ported::lex::untokenize(w))
9711 .collect();
9712 assert_eq!(
9713 untok,
9714 vec!["ls", "-la", "/tmp"],
9715 "untokenize(word) must yield the user-visible form"
9716 );
9717 }
9718 other => panic!("expected Simple, got {other:?}"),
9719 }
9720 }
9721
9722 /// Pin the OBSERVED zshrs contract: simple-command word array
9723 /// contains metafied bytes. This is the active (passing) version
9724 /// of the anchor above — it documents zshrs's current internal
9725 /// representation. If zshrs starts unmetafying at parse time, this
9726 /// test will FAIL and the anchor-style test above will start passing.
9727 #[test]
9728 fn parse_simple_command_words_metafied_internal_form() {
9729 let _g = crate::test_util::global_state_lock();
9730 let prog = parse("ls -la /tmp").unwrap();
9731 match &prog.lists[0].sublist.pipe.cmd {
9732 ZshCommand::Simple(s) => {
9733 assert_eq!(s.words.len(), 3);
9734 assert_eq!(s.words[0], "ls");
9735 assert_eq!(s.words[2], "/tmp");
9736 // s.words[1] contains the metafied `-` (`\u{9b}` Dash byte)
9737 // followed by "la". Don't pin the exact byte form (it
9738 // may change); pin that the length is right.
9739 assert_eq!(s.words[1].chars().count(), 3, "`-la` is 3 chars");
9740 assert!(s.words[1].ends_with("la"));
9741 }
9742 other => panic!("expected Simple, got {other:?}"),
9743 }
9744 }
9745
9746 // ─── zsh-corpus pins for parser: structural shapes ────────────────
9747
9748 /// Empty input — parse succeeds, lists may be empty.
9749 #[test]
9750 fn parse_corpus_empty_input_no_error() {
9751 let _g = crate::test_util::global_state_lock();
9752 let prog = parse("").unwrap();
9753 assert!(
9754 prog.lists.is_empty() || prog.lists.len() <= 1,
9755 "empty input → 0 or 1 list, got {}",
9756 prog.lists.len()
9757 );
9758 }
9759
9760 /// Comment-only input parses as empty.
9761 #[test]
9762 fn parse_corpus_comment_only_no_error() {
9763 let _g = crate::test_util::global_state_lock();
9764 let r = parse("# just a comment");
9765 assert!(r.is_ok(), "comment-only parse should succeed");
9766 }
9767
9768 /// `cmd1; cmd2` — two top-level lists or two sublists.
9769 #[test]
9770 fn parse_corpus_semicolon_separates_commands() {
9771 let _g = crate::test_util::global_state_lock();
9772 let prog = parse("echo a; echo b").unwrap();
9773 // We pin: parse produces > 0 lists/sublists; details vary.
9774 assert!(!prog.lists.is_empty(), "non-empty parse");
9775 }
9776
9777 /// `a && b` — DAMPER joins into a sublist chain.
9778 #[test]
9779 fn parse_corpus_logical_and_parses() {
9780 let _g = crate::test_util::global_state_lock();
9781 let r = parse("true && false");
9782 assert!(r.is_ok(), "`a && b` parses cleanly");
9783 }
9784
9785 /// `a || b` — DBAR.
9786 #[test]
9787 fn parse_corpus_logical_or_parses() {
9788 let _g = crate::test_util::global_state_lock();
9789 let r = parse("false || true");
9790 assert!(r.is_ok(), "`a || b` parses cleanly");
9791 }
9792
9793 /// `a | b` pipeline.
9794 #[test]
9795 fn parse_corpus_pipeline_parses() {
9796 let _g = crate::test_util::global_state_lock();
9797 let r = parse("echo hi | cat");
9798 assert!(r.is_ok(), "`a | b` parses");
9799 }
9800
9801 /// `if true; then echo x; fi` — basic if-then-fi block.
9802 #[test]
9803 fn parse_corpus_if_then_fi_parses() {
9804 let _g = crate::test_util::global_state_lock();
9805 let r = parse("if true; then echo x; fi");
9806 assert!(r.is_ok(), "if/then/fi parses cleanly");
9807 }
9808
9809 /// `for i in 1 2 3; do echo $i; done`.
9810 #[test]
9811 fn parse_corpus_for_do_done_parses() {
9812 let _g = crate::test_util::global_state_lock();
9813 let r = parse("for i in 1 2 3; do echo $i; done");
9814 assert!(r.is_ok(), "for/do/done parses cleanly");
9815 }
9816
9817 /// `while true; do break; done`.
9818 #[test]
9819 fn parse_corpus_while_do_done_parses() {
9820 let _g = crate::test_util::global_state_lock();
9821 let r = parse("while true; do break; done");
9822 assert!(r.is_ok(), "while/do/done parses cleanly");
9823 }
9824
9825 /// `case x in (a) echo A;; esac` — case statement.
9826 #[test]
9827 fn parse_corpus_case_esac_parses() {
9828 let _g = crate::test_util::global_state_lock();
9829 let r = parse("case x in (a) echo A;; esac");
9830 assert!(r.is_ok(), "case/esac parses cleanly");
9831 }
9832
9833 /// Function definition `f() { echo x }`.
9834 #[test]
9835 fn parse_corpus_function_def_parses() {
9836 let _g = crate::test_util::global_state_lock();
9837 let r = parse("f() { echo x }");
9838 assert!(r.is_ok(), "f() {{ ... }} parses cleanly");
9839 }
9840
9841 /// `(subshell echo a)` — subshell.
9842 #[test]
9843 fn parse_corpus_subshell_parens_parses() {
9844 let _g = crate::test_util::global_state_lock();
9845 let r = parse("( echo a )");
9846 assert!(r.is_ok(), "subshell parses cleanly");
9847 }
9848
9849 // ═══════════════════════════════════════════════════════════════════
9850 // C-parity tests pinning Src/parse.c. Tests that capture KNOWN
9851 // ZSHRS BUGS use #[ignore = "ZSHRS BUG: …"].
9852 // ═══════════════════════════════════════════════════════════════════
9853
9854 /// `empty_eprog(p)` returns true on an eprog with empty `prog`.
9855 /// C `Src/parse.c:584`:
9856 /// `return (!p || !p->prog || *p->prog == WCB_END());`
9857 /// Rust port at parse.rs:685 — `p.prog.is_empty() || p.prog[0] == WCB_END()`.
9858 #[test]
9859 fn empty_eprog_empty_prog_returns_true() {
9860 let _g = crate::test_util::global_state_lock();
9861 let p = crate::ported::zsh_h::eprog::default();
9862 assert!(empty_eprog(&p), "empty prog vec → empty_eprog true");
9863 }
9864
9865 /// `empty_eprog(p)` returns true when first wordcode is WCB_END.
9866 /// C: `*p->prog == WCB_END()`.
9867 #[test]
9868 fn empty_eprog_first_wcb_end_returns_true() {
9869 let _g = crate::test_util::global_state_lock();
9870 let mut p = crate::ported::zsh_h::eprog::default();
9871 p.prog.push(WCB_END());
9872 assert!(empty_eprog(&p), "prog[0]==WCB_END → empty_eprog true");
9873 }
9874
9875 /// `empty_eprog(p)` returns false for non-empty non-END prog.
9876 #[test]
9877 fn empty_eprog_non_empty_non_end_returns_false() {
9878 let _g = crate::test_util::global_state_lock();
9879 let mut p = crate::ported::zsh_h::eprog::default();
9880 // Push some non-END wordcode (1 is arbitrary non-zero, not WCB_END).
9881 p.prog.push(1);
9882 assert!(!empty_eprog(&p), "non-END first opcode → false");
9883 }
9884
9885 /// `ecstrcode("")` returns a wordcode for the empty string. C
9886 /// `Src/parse.c:346-ish` ecstrcode interns strings in `ecbuf`.
9887 /// Pin: same call returns same wordcode (deterministic intern).
9888 #[test]
9889 fn ecstrcode_empty_string_returns_deterministic_code() {
9890 let _g = crate::test_util::global_state_lock();
9891 init_parse();
9892 let a = ecstrcode("");
9893 let b = ecstrcode("");
9894 assert_eq!(a, b, "intern of '' must be deterministic");
9895 }
9896
9897 /// `ecstrcode` of two different strings returns different codes.
9898 #[test]
9899 fn ecstrcode_distinct_strings_get_distinct_codes() {
9900 let _g = crate::test_util::global_state_lock();
9901 init_parse();
9902 let a = ecstrcode("foo");
9903 let b = ecstrcode("bar");
9904 // Should differ — if equal, intern table collapsed two different
9905 // strings to the same key (bug).
9906 assert_ne!(a, b, "different strings must intern to different codes");
9907 }
9908
9909 /// `parse_event(ENDINPUT)` on empty input returns None.
9910 /// C `Src/parse.c:715-ish` — empty token stream → no program.
9911 #[test]
9912 #[ignore = "ZSHRS BUG: parse_event setup needs lex state — exact behavior on empty input verification pending"]
9913 fn parse_event_empty_returns_none() {
9914 let _g = crate::test_util::global_state_lock();
9915 init_parse();
9916 // Empty input typically yields no program; needs lex state.
9917 let r = parse_event(crate::ported::lex::ENDINPUT);
9918 assert!(r.is_none(), "no tokens → no event");
9919 }
9920
9921 // ═══════════════════════════════════════════════════════════════════
9922 // Additional C-parity tests for Src/parse.c.
9923 // ═══════════════════════════════════════════════════════════════════
9924
9925 /// c:399 — `ecadd(c)` returns the index where `c` was placed,
9926 /// not the post-increment value. Sequential ecadd calls return
9927 /// strictly increasing indices.
9928 #[test]
9929 fn ecadd_returns_strictly_increasing_indices() {
9930 let _g = crate::test_util::global_state_lock();
9931 init_parse();
9932 let i0 = ecadd(0xDEAD);
9933 let i1 = ecadd(0xBEEF);
9934 let i2 = ecadd(0xC0DE);
9935 assert!(
9936 i1 > i0,
9937 "ecadd indices must strictly increase, got {i0} then {i1}"
9938 );
9939 assert!(
9940 i2 > i1,
9941 "ecadd indices must strictly increase, got {i1} then {i2}"
9942 );
9943 assert_eq!(i1, i0 + 1, "consecutive ecadds advance by 1");
9944 assert_eq!(i2, i1 + 1, "consecutive ecadds advance by 1");
9945 }
9946
9947 /// c:413 — `ecdel(p)` removes one wordcode, shrinks ecused by 1.
9948 /// Pin: subsequent ecadd reuses freed slot (ecused decreased).
9949 #[test]
9950 fn ecdel_shrinks_ecused_by_one() {
9951 let _g = crate::test_util::global_state_lock();
9952 init_parse();
9953 let _i0 = ecadd(0xA);
9954 let i1 = ecadd(0xB);
9955 let _i2 = ecadd(0xC);
9956 let next_before = ECUSED.get();
9957 ecdel(i1);
9958 let next_after = ECUSED.get();
9959 assert_eq!(
9960 next_after,
9961 next_before - 1,
9962 "ecdel must decrement ecused by exactly 1"
9963 );
9964 }
9965
9966 /// c:399-405 — `ecadd` after exhausting buffer must grow it (no
9967 /// panic on push past current eclen). Pin: 1000 adds don't crash.
9968 #[test]
9969 fn ecadd_grows_buffer_on_demand() {
9970 let _g = crate::test_util::global_state_lock();
9971 init_parse();
9972 for i in 0..1000 {
9973 ecadd(i as u32);
9974 }
9975 // No panic = grow path works.
9976 assert!(ECUSED.get() >= 1000, "1000 adds → ecused ≥ 1000");
9977 }
9978
9979 /// c:426 — `ecstrcode` of short strings (≤4 bytes) returns a
9980 /// packed inline wordcode (not an offset into the string region).
9981 /// Pin: identical short strings get identical codes.
9982 #[test]
9983 fn ecstrcode_short_strings_are_deterministic() {
9984 let _g = crate::test_util::global_state_lock();
9985 init_parse();
9986 let a = ecstrcode("ab");
9987 let b = ecstrcode("ab");
9988 assert_eq!(a, b, "same short string must intern to same code");
9989 }
9990
9991 /// c:426 — long strings (>4 bytes) hit the deduped string region.
9992 /// Pin: same long string returns same code on repeat (registry
9993 /// dedupes).
9994 #[test]
9995 fn ecstrcode_long_strings_dedupe_in_registry() {
9996 let _g = crate::test_util::global_state_lock();
9997 init_parse();
9998 let a = ecstrcode("a-much-longer-test-string");
9999 let b = ecstrcode("a-much-longer-test-string");
10000 assert_eq!(a, b, "registry must dedupe identical long strings");
10001 }
10002
10003 /// `clear_hdocs()` is idempotent — calling twice in a row leaves
10004 /// HDOCS = None and LEX_HEREDOCS empty.
10005 #[test]
10006 fn clear_hdocs_is_idempotent() {
10007 let _g = crate::test_util::global_state_lock();
10008 clear_hdocs();
10009 clear_hdocs();
10010 HDOCS.with_borrow(|h| assert!(h.is_none(), "HDOCS must be None"));
10011 LEX_HEREDOCS.with_borrow(|v| assert!(v.is_empty(), "LEX_HEREDOCS must be empty"));
10012 }
10013
10014 /// `init_parse()` resets parse state to known empty defaults.
10015 /// Multiple init_parse calls are safe (idempotent).
10016 #[test]
10017 fn init_parse_is_idempotent() {
10018 let _g = crate::test_util::global_state_lock();
10019 init_parse();
10020 init_parse();
10021 // No panic = pass.
10022 }
10023
10024 /// `empty_eprog` returns true for a default-constructed eprog
10025 /// (empty prog vec).
10026 #[test]
10027 fn empty_eprog_true_for_empty_prog() {
10028 let _g = crate::test_util::global_state_lock();
10029 let p = eprog {
10030 prog: Vec::new(),
10031 ..Default::default()
10032 };
10033 assert!(empty_eprog(&p), "empty prog vec → empty eprog");
10034 }
10035
10036 /// `empty_eprog` returns true when prog[0] == WCB_END().
10037 #[test]
10038 fn empty_eprog_true_for_end_only_prog() {
10039 let _g = crate::test_util::global_state_lock();
10040 let p = eprog {
10041 prog: vec![WCB_END()],
10042 ..Default::default()
10043 };
10044 assert!(empty_eprog(&p), "WCB_END as first opcode → empty");
10045 }
10046
10047 /// `ecadjusthere(p, d)` is safe to call when HDOCS is None.
10048 #[test]
10049 fn ecadjusthere_safe_when_hdocs_none() {
10050 let _g = crate::test_util::global_state_lock();
10051 clear_hdocs();
10052 // No panic = pass.
10053 ecadjusthere(0, 0);
10054 ecadjusthere(100, -5);
10055 ecadjusthere(0, 10);
10056 }
10057
10058 /// `ecispace(p, n)` with n=0 is a no-op.
10059 #[test]
10060 fn ecispace_zero_n_is_noop() {
10061 let _g = crate::test_util::global_state_lock();
10062 init_parse();
10063 let before = ECUSED.get();
10064 ecispace(0, 0);
10065 let after = ECUSED.get();
10066 assert_eq!(before, after, "ecispace(_, 0) must not advance ecused");
10067 }
10068
10069 // ═══════════════════════════════════════════════════════════════════
10070 // Additional C-parity tests for Src/parse.c
10071 // c:146 parse_context_save / c:191 parse_context_restore /
10072 // c:225 ecadjusthere / c:293 ecadd / c:346 ecstrcode / c:574 init_parse /
10073 // c:685 empty_eprog / c:693 clear_hdocs / c:786 parse_list / c:815 parse_cond
10074 // c:2234 par_wordlist / c:2249 par_nl_wordlist
10075 // ═══════════════════════════════════════════════════════════════════
10076
10077 /// c:293 — `ecadd` returns usize (compile-time type pin).
10078 #[test]
10079 fn ecadd_returns_usize_type() {
10080 let _g = crate::test_util::global_state_lock();
10081 init_parse();
10082 let _: usize = ecadd(0);
10083 }
10084
10085 /// c:346 — `ecstrcode` returns u32 (compile-time type pin).
10086 #[test]
10087 fn ecstrcode_returns_u32_type() {
10088 let _g = crate::test_util::global_state_lock();
10089 init_parse();
10090 let _: u32 = ecstrcode("");
10091 }
10092
10093 /// c:346 — `ecstrcode("")` empty string is safe.
10094 #[test]
10095 fn ecstrcode_empty_string_no_panic() {
10096 let _g = crate::test_util::global_state_lock();
10097 init_parse();
10098 let _ = ecstrcode("");
10099 }
10100
10101 /// c:346 — `ecstrcode` is deterministic for same input.
10102 #[test]
10103 fn ecstrcode_is_deterministic() {
10104 let _g = crate::test_util::global_state_lock();
10105 init_parse();
10106 for s in ["", "a", "abc", "hello world"] {
10107 let first = ecstrcode(s);
10108 for _ in 0..3 {
10109 assert_eq!(
10110 ecstrcode(s),
10111 first,
10112 "ecstrcode({:?}) must be deterministic",
10113 s
10114 );
10115 }
10116 }
10117 }
10118
10119 /// c:786 — `parse_list` returns Option<eprog>.
10120 #[test]
10121 fn parse_list_returns_option_eprog_type() {
10122 let _g = crate::test_util::global_state_lock();
10123 init_parse();
10124 let _: Option<eprog> = parse_list();
10125 }
10126
10127 /// c:815 — `parse_cond` returns Option<eprog>.
10128 #[test]
10129 fn parse_cond_returns_option_eprog_type() {
10130 let _g = crate::test_util::global_state_lock();
10131 init_parse();
10132 let _: Option<eprog> = parse_cond();
10133 }
10134
10135 /// c:2234 — `par_wordlist` returns Vec<String>.
10136 #[test]
10137 fn par_wordlist_returns_vec_string_type() {
10138 let _g = crate::test_util::global_state_lock();
10139 init_parse();
10140 let _: Vec<String> = par_wordlist();
10141 }
10142
10143 /// c:2249 — `par_nl_wordlist` returns Vec<String>.
10144 #[test]
10145 fn par_nl_wordlist_returns_vec_string_type() {
10146 let _g = crate::test_util::global_state_lock();
10147 init_parse();
10148 let _: Vec<String> = par_nl_wordlist();
10149 }
10150
10151 /// c:693 — `clear_hdocs` deterministic state after call (no-panic).
10152 #[test]
10153 fn clear_hdocs_deterministic_after_call() {
10154 let _g = crate::test_util::global_state_lock();
10155 clear_hdocs();
10156 clear_hdocs();
10157 }
10158
10159 /// c:225 — `ecadjusthere(0, 0)` is a no-op (no delta).
10160 #[test]
10161 fn ecadjusthere_zero_delta_no_panic() {
10162 let _g = crate::test_util::global_state_lock();
10163 ecadjusthere(0, 0);
10164 }
10165
10166 /// c:225 — `ecadjusthere` is safe for arbitrary positions.
10167 #[test]
10168 fn ecadjusthere_arbitrary_pos_no_panic() {
10169 let _g = crate::test_util::global_state_lock();
10170 for p in [0usize, 1, 100, 9999] {
10171 ecadjusthere(p, 0);
10172 ecadjusthere(p, 1);
10173 ecadjusthere(p, -1);
10174 }
10175 }
10176
10177 // ═══════════════════════════════════════════════════════════════════
10178 // Additional C-parity tests for Src/parse.c FD_* accessors
10179 // c:3127 fdmagic / c:3131 fdflags / c:3133 fdother / c:3140 fdversion /
10180 // c:3145 fdhflags / c:3146 fdhtail / c:3147 fdhbldflags
10181 // ═══════════════════════════════════════════════════════════════════
10182
10183 fn build_fd_header() -> Vec<u32> {
10184 let mut buf = vec![0u32; FD_PRELEN + 32];
10185 buf[0] = FD_MAGIC; // pre[0] magic
10186 buf[1] = (0x12u32) | (0x00ABCDEFu32 << 8); // flags=0x12, other=0xABCDEF
10187 // Embed version string starting at pre[2].
10188 let ver = b"5.9\0";
10189 for (i, chunk) in ver.chunks(4).enumerate() {
10190 let mut word = [0u8; 4];
10191 word[..chunk.len()].copy_from_slice(chunk);
10192 buf[2 + i] = u32::from_le_bytes(word);
10193 }
10194 buf[FD_PRELEN - 1] = (FD_PRELEN as u32) + 8; // header-len slot
10195 buf
10196 }
10197
10198 /// c:3127 — `fdmagic(f)` returns pre[0] verbatim.
10199 #[test]
10200 fn fdmagic_returns_pre_zero_word() {
10201 let buf = build_fd_header();
10202 assert_eq!(fdmagic(&buf), FD_MAGIC, "fdmagic = pre[0]");
10203 }
10204
10205 /// c:3131 — `fdflags` extracts low byte of pre[1].
10206 #[test]
10207 fn fdflags_low_byte_extraction() {
10208 let buf = build_fd_header();
10209 assert_eq!(fdflags(&buf), 0x12, "flags = pre[1] & 0xff");
10210 }
10211
10212 /// c:3133 — `fdother` extracts high 24 bits of pre[1].
10213 #[test]
10214 fn fdother_high_24_bits_extraction() {
10215 let buf = build_fd_header();
10216 assert_eq!(
10217 fdother(&buf),
10218 0x00ABCDEF,
10219 "other = pre[1] >> 8 & 0x00ffffff"
10220 );
10221 }
10222
10223 /// c:3132 — `fdsetflags` writes low byte, preserves high 24 bits.
10224 #[test]
10225 fn fdsetflags_preserves_high_24_bits() {
10226 let mut buf = build_fd_header();
10227 let other_before = fdother(&buf);
10228 fdsetflags(&mut buf, 0x42);
10229 assert_eq!(fdflags(&buf), 0x42, "new flags written");
10230 assert_eq!(fdother(&buf), other_before, "high 24 bits preserved");
10231 }
10232
10233 /// c:3134 — `fdsetother` writes high 24 bits, preserves low byte.
10234 #[test]
10235 fn fdsetother_preserves_low_byte() {
10236 let mut buf = build_fd_header();
10237 let flags_before = fdflags(&buf);
10238 fdsetother(&mut buf, 0x00DEADBE);
10239 assert_eq!(fdother(&buf), 0x00DEADBE, "new other written");
10240 assert_eq!(fdflags(&buf), flags_before, "low byte preserved");
10241 }
10242
10243 /// c:3134 — `fdsetother` clamps to 24 bits (caller-passed high bits dropped).
10244 #[test]
10245 fn fdsetother_clamps_to_24_bits() {
10246 let mut buf = build_fd_header();
10247 fdsetother(&mut buf, 0xFF_FFFF_FF);
10248 // Only the low 24 bits land in `other`.
10249 assert_eq!(fdother(&buf), 0x00FF_FFFF, "high bits dropped");
10250 }
10251
10252 /// c:3140 — `fdversion(buf)` returns String (compile-time type pin).
10253 #[test]
10254 fn fdversion_returns_string_type() {
10255 let buf = build_fd_header();
10256 let _: String = fdversion(&buf);
10257 }
10258
10259 /// c:3140 — `fdversion` reads the NUL-terminated string from pre[2..].
10260 #[test]
10261 fn fdversion_reads_until_nul() {
10262 let buf = build_fd_header();
10263 assert_eq!(fdversion(&buf), "5.9", "version read until NUL");
10264 }
10265
10266 /// c:3145 — `fdhflags(h)` returns low 2 bits of flags.
10267 #[test]
10268 fn fdhflags_low_two_bits() {
10269 let h = fdhead {
10270 start: 0,
10271 len: 0,
10272 npats: 0,
10273 strs: 0,
10274 hlen: 0,
10275 flags: 0b1011, // tail=2, kshload bits = 0b11
10276 };
10277 assert_eq!(fdhflags(&h), 0b11, "flags = h.flags & 0x3");
10278 }
10279
10280 /// c:3146 — `fdhtail(h)` returns high 30 bits (shifted right by 2).
10281 #[test]
10282 fn fdhtail_shift_right_two() {
10283 let h = fdhead {
10284 start: 0,
10285 len: 0,
10286 npats: 0,
10287 strs: 0,
10288 hlen: 0,
10289 flags: (0x12_3456 << 2) | 0x3,
10290 };
10291 assert_eq!(fdhtail(&h), 0x12_3456, "tail = h.flags >> 2");
10292 }
10293
10294 /// c:3147 — `fdhbldflags(flags, tail)` packs into single u32.
10295 #[test]
10296 fn fdhbldflags_packs_flags_low_tail_high() {
10297 let packed = fdhbldflags(0x3, 0x42);
10298 assert_eq!(packed & 0x3, 0x3, "low 2 bits = flags");
10299 assert_eq!(packed >> 2, 0x42, "high 30 bits = tail");
10300 }
10301
10302 /// c:3145-3147 — `fdhflags(h)`+`fdhtail(h)` round-trip via fdhbldflags.
10303 #[test]
10304 fn fdh_round_trip_via_bldflags() {
10305 for (flags, tail) in [(0u32, 0u32), (1, 100), (2, 0xABC), (3, 0xFFFF)] {
10306 let packed = fdhbldflags(flags, tail);
10307 let h = fdhead {
10308 start: 0,
10309 len: 0,
10310 npats: 0,
10311 strs: 0,
10312 hlen: 0,
10313 flags: packed,
10314 };
10315 assert_eq!(fdhflags(&h), flags, "flags round-trips");
10316 assert_eq!(fdhtail(&h), tail, "tail round-trips");
10317 }
10318 }
10319
10320 /// c:8271 — `firstfdhead_offset()` returns FD_PRELEN constant.
10321 #[test]
10322 fn firstfdhead_offset_returns_prelen() {
10323 assert_eq!(
10324 firstfdhead_offset(),
10325 FD_PRELEN,
10326 "first header starts after prelude"
10327 );
10328 }
10329
10330 /// c:3127 — `fdmagic` differentiates FD_MAGIC from FD_OMAGIC.
10331 #[test]
10332 fn fdmagic_differentiates_magic_omagic() {
10333 let mut buf = vec![FD_MAGIC; FD_PRELEN];
10334 assert_eq!(fdmagic(&buf), FD_MAGIC);
10335 buf[0] = FD_OMAGIC;
10336 assert_eq!(fdmagic(&buf), FD_OMAGIC, "swapped magic readable");
10337 assert_ne!(FD_MAGIC, FD_OMAGIC, "the two magics differ");
10338 }
10339}