zsh/ported/parse.rs
1//! Zsh parser — direct port from zsh/Src/parse.c.
2//!
3//! Pulls tokens via the lex.rs free ported (zshlex/tok/tokstr) and
4//! builds an AST tree (relocated to src/extensions/zsh_ast.rs as a
5//! Rust-only IR) plus emits wordcode into ECBUF via the P9b/P9c
6//! pipeline. Follows the zsh grammar closely; productions match
7//! `par_*` in Src/parse.c.
8
9use super::lex::{
10 lextok, set_tok, AMPER, AMPERBANG, AMPOUTANG, BANG_TOK, BARAMP, BAR_TOK, CASE, COPROC, DAMPER,
11 DBAR, DINANG, DINANGDASH, DINBRACK, DINPAR, DOLOOP, DONE, DOUTANG, DOUTANGAMP, DOUTANGAMPBANG,
12 DOUTANGBANG, DOUTBRACK, DOUTPAR, DSEMI, ELIF, ELSE, ENDINPUT, ENVARRAY, ENVSTRING, ESAC, FI,
13 FOR, FOREACH, FUNC, IF, INANGAMP, INANG_TOK, INBRACE_TOK, INOUTANG, INOUTPAR, INPAR_TOK,
14 IS_REDIROP, LEXERR, LEX_HEREDOCS, NEWLIN, NOCORRECT, OUTANGAMP, OUTANGAMPBANG, OUTANGBANG,
15 OUTANG_TOK, OUTBRACE_TOK, OUTPAR_TOK, REPEAT, SELECT, SEMI, SEMIAMP, SEMIBAR, SEPER,
16 STRING_LEX, THEN, TIME, TRINANG, TYPESET, UNTIL, WHILE, ZEND,
17};
18use super::zsh_h::{
19 eprog, estate, funcdump, isset, redir, unset, wc_code, wordcode, Bang, Dash, Equals, Inang,
20 Outang, Tilde, ALIASFUNCDEF, COND_AND, COND_MOD, COND_MODI, COND_NOT, COND_NT, COND_OR,
21 COND_REGEX, COND_STRDEQ, COND_STREQ, COND_STRGTR, COND_STRLT, COND_STRNEQ, CSHJUNKIELOOPS,
22 EC_DUP, EC_NODUP, EF_HEAP, EF_REAL, EXECOPT, IGNOREBRACES, IS_DASH, MULTIFUNCDEF, OPT_ISSET,
23 PM_UNDEFINED, POSIXBUILTINS, REDIRF_FROM_HEREDOC, REDIR_APP, REDIR_APPNOW, REDIR_ERRAPP,
24 REDIR_ERRAPPNOW, REDIR_ERRWRITE, REDIR_ERRWRITENOW, REDIR_FROM_HEREDOC_MASK, REDIR_HEREDOC,
25 REDIR_HEREDOCDASH, REDIR_HERESTR, REDIR_INPIPE, REDIR_MERGEIN, REDIR_MERGEOUT, REDIR_OUTPIPE,
26 REDIR_READ, REDIR_READWRITE, REDIR_VARID_MASK, REDIR_WRITE, REDIR_WRITENOW, SHORTLOOPS,
27 SHORTREPEAT, WCB_COND, WCB_SIMPLE, WC_REDIR, WC_REDIR_FROM_HEREDOC, WC_REDIR_TYPE,
28 WC_REDIR_VARID, WC_SUBLIST_COPROC, WC_SUBLIST_NOT,
29};
30pub use crate::heredoc_ast::HereDoc;
31use crate::ported::lex::{
32 incasepat, incmdpos, incond, infor, input_slice, inredir, inrepeat, intypeset, isnewlin,
33 lex_init, lineno, noaliases, nocorrect, pos, set_incasepat, set_incmdpos, set_incond,
34 set_infor, set_inredir, set_inrepeat, set_intypeset, set_isnewlin, set_lineno, set_noaliases,
35 set_nocorrect, tok, tokfd, toklineno, tokstr, zshlex,
36};
37use crate::ported::signals::unqueue_signals;
38use crate::ported::utils::{errflag, zerr, zwarnnam, ERRFLAG_ERROR};
39use crate::prompt::{cmdpop, cmdpush};
40pub use crate::zsh_ast::{
41 CaseArm, CaseTerm, CaseTerminator, CompoundCommand, ForList, HereDocInfo, ListFlags, ListOp,
42 Redirect, RedirectOp, ShellCommand, ShellWord, SimpleCommand, SublistFlags, SublistOp,
43 VarModifier, ZshAssign, ZshAssignValue, ZshCase, ZshCommand, ZshCond, ZshFor, ZshFuncDef,
44 ZshIf, ZshList, ZshParamFlag, ZshPipe, ZshProgram, ZshRedir, ZshRepeat, ZshSimple, ZshSublist,
45 ZshTry, ZshWhile,
46};
47use crate::zsh_h::{
48 wc_bdata, CS_ALWAYS, CS_ARRAY, CS_CASE, CS_CMDAND, CS_CMDOR, CS_COND, CS_CURSH, CS_ELIF,
49 CS_ELSE, CS_ERRPIPE, CS_FOR, CS_FOREACH, CS_FUNCDEF, CS_IF, CS_IFTHEN, CS_PIPE, CS_REPEAT,
50 CS_SELECT, CS_SUBSH, CS_UNTIL, CS_WHILE, EF_RUN, WCB_ARITH, WCB_ASSIGN, WCB_CASE, WCB_CURSH,
51 WCB_END, WCB_FOR, WCB_FUNCDEF, WCB_IF, WCB_LIST, WCB_PIPE, WCB_REDIR, WCB_REPEAT, WCB_SELECT,
52 WCB_SUBLIST, WCB_SUBSH, WCB_TIMED, WCB_TRY, WCB_TYPESET, WCB_WHILE, WC_ASSIGN_ARRAY,
53 WC_ASSIGN_INC, WC_ASSIGN_NEW, WC_ASSIGN_SCALAR, WC_CASE_AND, WC_CASE_HEAD, WC_CASE_OR,
54 WC_CASE_TESTAND, WC_FOR_COND, WC_FOR_LIST, WC_FOR_PPARAM, WC_IF_ELIF, WC_IF_ELSE, WC_IF_HEAD,
55 WC_IF_IF, WC_PIPE_END, WC_PIPE_LINENO, WC_PIPE_MID, WC_REDIR_WORDS, WC_SELECT_LIST,
56 WC_SELECT_PPARAM, WC_SUBLIST_AND, WC_SUBLIST_END, WC_SUBLIST_FLAGS, WC_SUBLIST_OR,
57 WC_SUBLIST_SIMPLE, WC_SUBLIST_TYPE, WC_TIMED_EMPTY, WC_TIMED_PIPE, WC_WHILE_UNTIL,
58 WC_WHILE_WHILE, Z_ASYNC, Z_DISOWN, Z_END, Z_SIMPLE, Z_SYNC,
59};
60use serde::{Deserialize, Serialize};
61use std::fs::{self, File};
62use std::io::{Read, Seek, SeekFrom, Write};
63use std::os::unix::fs::MetadataExt;
64use std::path::Path;
65use std::sync::atomic::{AtomicUsize, Ordering};
66use std::sync::mpsc;
67use std::thread;
68use std::time::Duration;
69
70// Names lifted out of inside-fn `use` statements (PORT.md
71// 'no imports inside FNs ever').
72
73// Direct port of `Src/parse.c:287-289` grow-policy constants.
74const EC_INIT_SIZE: i32 = 256;
75
76// Pending-here-document list — direct port of `Src/parse.c:84
77// struct heredocs *hdocs;`. Per-parser file-static (bucket-1 in
78// PORT_PLAN.md): each worker thread parsing a separate program needs
79// its own pending-heredoc list. Saved/restored across nested parses
80// by `parse_context_save`/`parse_context_restore` (parse.c:299/337).
81thread_local! {
82 /// Port of file-static `struct heredocs *hdocs;` from `Src/parse.c:84`.
83 pub static HDOCS: std::cell::RefCell<Option<Box<crate::ported::zsh_h::heredocs>>>
84 = const { std::cell::RefCell::new(None) };
85}
86
87// Wordcode-buffer thread-locals — direct port of `Src/parse.c:269-285`
88// file-statics. Per-evaluator (bucket-1 in PORT_PLAN.md): each worker
89// thread parsing a separate program needs its own wordcode buffer.
90//
91// ECBUF: the wordcode array being built. C `Wordcode ecbuf`
92// (parse.c:275).
93// ECLEN: allocated entries in ECBUF (parse.c:269).
94// ECUSED: entries actually used so far (parse.c:271).
95// ECNPATS: count of patterns referenced by ECBUF (parse.c:273).
96// ECSOFFS / ECSSUB: byte offsets into the string region
97// (parse.c:279). ECSSUB subtracts substring overlap.
98// ECNFUNC: count of functions defined so far (parse.c:285).
99// ECSTRS_INDEX: dedup index for long strings — C uses a binary tree
100// of `struct eccstr` (zsh.h:836); the canonical Eccstr port exists
101// at zsh_h::eccstr but stays unused at runtime here. The HashMap
102// preserves the API contract (lookup by (nfunc, str) → offs) with
103// simpler ownership semantics.
104thread_local! {
105 /// `ECBUF` static.
106 pub static ECBUF: std::cell::RefCell<Vec<u32>> = std::cell::RefCell::new(Vec::new());
107 static ECLEN: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
108 static ECUSED: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
109 static ECNPATS: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
110 static ECSOFFS: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
111 static ECSSUB: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
112 static ECNFUNC: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
113 static ECSTRS_INDEX: std::cell::RefCell<std::collections::HashMap<(i32, String), u32>>
114 = std::cell::RefCell::new(std::collections::HashMap::new());
115 /// C zsh's `eccstr` BST (parse.c:447). Port of `Eccstr ecstrs` —
116 /// a hashval-ordered binary search tree of long-strings for
117 /// dedup. Same cmp logic as C: nfunc, then hashval, then strcmp.
118 /// HashMap above is a fast-path lookup; this tree is the
119 /// C-fidelity walker that mirrors C's exact dedup-hit pattern
120 /// (including its quirks for hash-colliding content).
121 static ECSTRS_TREE: std::cell::RefCell<Option<Box<EccstrNode>>>
122 = const { std::cell::RefCell::new(None) };
123 /// Reverse index for `ecgetstr`: offs → owned string. Populated
124 /// at ecstrcode time so the consumer can recover the string from
125 /// the wordcode offs without walking the encode-time HashMap.
126 /// Stores the METAFIED BYTE form of each long-string, exactly
127 /// matching what C's strs region holds. `String` would not work
128 /// here because Rust strings carry UTF-8-encoded chars (e.g.
129 /// the Dash marker `\u{9b}` UTF-8-encodes to two bytes
130 /// `\xc2 \x9b`) while C stores zsh markers as single bytes
131 /// (raw `\x9b`). Storing Vec<u8> lets us write byte-for-byte
132 /// what C writes after metafy.
133 pub static ECSTRS_REVERSE: std::cell::RefCell<std::collections::HashMap<u32, Vec<u8>>>
134 = std::cell::RefCell::new(std::collections::HashMap::new());
135}
136const EC_DOUBLE_THRESHOLD: i32 = 32768;
137const EC_INCREMENT: i32 = 1024;
138
139/// Direct port of `parse_context_save(struct parse_stack *ps, int toplevel)` at `Src/parse.c:295`.
140/// Snapshots the lexer-side file-statics (which currently live on
141/// `lexer` until Phase 7 dissolution makes them file-scope
142/// thread_local!s) plus the pending heredoc list, plus the
143/// wordcode-buffer state (STUB until Phase 9b). Saves Rust-only
144/// recursion counters too so nested parses get fresh limits.
145/// WARNING: param names don't match C — Rust=(ps) vs C=(ps, toplevel)
146pub fn parse_context_save(ps: &mut parse_stack) {
147 // parse.c:299 — `ps->hdocs = hdocs; hdocs = NULL;` — save the
148 // canonical C linked-list and clear it for the nested parse.
149 ps.hdocs = HDOCS.with_borrow_mut(|h| h.take());
150 // zshrs-only: save the parallel AST-glue Vec the same way.
151 // LEX_HEREDOCS carries terminator/strip_tabs/quoted metadata
152 // that has no C analog (C stores it implicitly via tokstr).
153 ps.lex_heredocs = LEX_HEREDOCS.with_borrow_mut(|v| std::mem::take(v));
154 // parse.c:302-310 — save lexer-side state.
155 ps.incmdpos = incmdpos();
156 // parse.c:303 — `ps->aliasspaceflag = aliasspaceflag;`. Mirrors
157 // lex.c LEX_ALIAS_SPACE_FLAG so nested parses preserve the
158 // HISTIGNORESPACE-via-alias state across parser re-entry.
159 ps.aliasspaceflag = crate::ported::lex::LEX_ALIAS_SPACE_FLAG.with(|c| c.get());
160 ps.incond = incond();
161 ps.inredir = inredir();
162 ps.incasepat = incasepat();
163 ps.isnewlin = isnewlin();
164 ps.infor = infor();
165 ps.inrepeat_ = inrepeat();
166 ps.intypeset = intypeset();
167 // parse.c:312-317 — wordcode buffer state. STUB until Phase 9b
168 // (zshrs has no ecbuf yet).
169 ps.eclen = 0;
170 ps.ecused = 0;
171 ps.ecnpats = 0;
172 ps.ecbuf = None;
173 ps.ecstrs = None;
174 ps.ecsoffs = 0;
175 ps.ecssub = 0;
176 ps.ecnfunc = 0;
177 set_incmdpos(true);
178 set_incond(0);
179 set_inredir(false);
180 set_incasepat(0);
181 set_infor(0);
182 set_inrepeat(0);
183 set_intypeset(false);
184}
185
186/// Direct port of `parse_context_restore(const struct parse_stack *ps, int toplevel)` at `Src/parse.c:326`.
187/// Inverse of `parse_context_save`. Restores lexer-side state +
188/// pending heredocs + Rust-only counters from `ps`, then clears
189/// `errflag & ERRFLAG_ERROR` per parse.c:354.
190/// WARNING: param names don't match C — Rust=(ps) vs C=(ps, toplevel)
191pub fn parse_context_restore(ps: &parse_stack) {
192 // parse.c:330-331 — free any in-progress wordcode buffer.
193 // zshrs has no wordcode yet (STUB until Phase 9b); the AST
194 // nodes are owned by their parent so dropping the parser
195 // frees them.
196
197 // parse.c:333-352 — restore saved state.
198 // parse.c:337 — `hdocs = ps->hdocs;`
199 HDOCS.with_borrow_mut(|h| *h = ps.hdocs.clone());
200 // zshrs-only: restore the parallel AST-glue Vec.
201 LEX_HEREDOCS.with_borrow_mut(|v| *v = ps.lex_heredocs.clone());
202 set_incmdpos(ps.incmdpos);
203 // parse.c:334 — `aliasspaceflag = ps->aliasspaceflag;`.
204 crate::ported::lex::LEX_ALIAS_SPACE_FLAG.with(|c| c.set(ps.aliasspaceflag));
205 set_incond(ps.incond);
206 set_inredir(ps.inredir);
207 set_incasepat(ps.incasepat);
208 set_isnewlin(ps.isnewlin);
209 set_infor(ps.infor);
210 set_inrepeat(ps.inrepeat_);
211 set_intypeset(ps.intypeset);
212 // ecbuf/eclen/ecused/ecnpats/ecstrs/ecsoffs/ecssub/ecnfunc
213 // STUB until Phase 9b.
214
215 // parse.c:354 — `errflag &= ~ERRFLAG_ERROR;` — clear the
216 // error flag so the outer parse sees a clean state.
217 errflag.fetch_and(!ERRFLAG_ERROR, Ordering::Relaxed);
218}
219
220/// Direct port of `ecadjusthere(int p, int d)` at `Src/parse.c:360`. Walk
221/// the pending-heredocs list and bump each `pc` by `d` if it's
222/// at or after position `p`. Called by `ecispace` / `ecdel` when
223/// wordcodes shift.
224#[allow(unused_variables)]
225pub fn ecadjusthere(p: usize, d: i32) {
226 // parse.c:362-366 — `for (p2 = hdocs; p2; p2 = p2->next) if
227 // (p2->pc >= p) p2->pc += d;`. zshrs's hdocs are still
228 // Vec<HereDoc> on the lexer (pre-P9c migration); since none
229 // of them carry a wordcode pc today (the AST tree has no pc
230 // slots), this is a no-op until Phase 9c wires
231 // `hdocs.pc` into wordcode emission.
232}
233
234// === AST tree relocated to src/extensions/zsh_ast.rs ===
235//
236// zsh C does NOT have an AST tree — it emits wordcode directly via
237// par_event/par_list/par_sublist/par_pipe/par_cmd/par_simple/etc.
238// (Src/parse.c:485-3000) into a flat `Wordcode ecbuf[]`. The Zsh*/
239// Shell* AST node types lived in this file as a Rust-only IR that
240// stands in for that wordcode.
241//
242// P9e (PORT_PLAN.md): the types moved to src/extensions/zsh_ast.rs
243// to make their Rust-only-extension nature explicit. The full P9c +
244// P9d rewrite (par_* emitting wordcode + vm_helper reading wordcode)
245// retires them entirely — until then, callers reach them via this
246// re-export.
247
248/// Direct port of `ecispace(int p, int n)` at `Src/parse.c:372`. Insert `n`
249/// empty wordcode slots at position `p`, shifting later entries
250/// right, growing the buffer as needed, adjusting heredoc pointers.
251pub fn ecispace(p: usize, n: usize) {
252 // parse.c:376-381 — grow if needed.
253 let need = n as i32;
254 if (ECLEN.get() - ECUSED.get()) < need {
255 let cur = ECLEN.get();
256 let mut a = if cur < EC_DOUBLE_THRESHOLD {
257 cur
258 } else {
259 EC_INCREMENT
260 };
261 if need > a {
262 a = need;
263 }
264 ECBUF.with_borrow_mut(|buf| {
265 buf.resize((cur + a) as usize, 0);
266 });
267 ECLEN.set(cur + a);
268 }
269 // parse.c:382-385 — memmove p → p+n, gap of n.
270 let m = ECUSED.get() as usize - p;
271 if m > 0 {
272 ECBUF.with_borrow_mut(|buf| {
273 let needed = (ECUSED.get() as usize) + n;
274 if buf.len() < needed {
275 buf.resize(needed, 0);
276 }
277 for i in (0..m).rev() {
278 buf[p + n + i] = buf[p + i];
279 }
280 for i in 0..n {
281 buf[p + i] = 0;
282 }
283 });
284 }
285 // parse.c:386 — bump ecused by n.
286 ECUSED.set(ECUSED.get() + need);
287 // parse.c:387 — `ecadjusthere(p, n)`.
288 ecadjusthere(p, need);
289}
290
291/// Direct port of `ecadd(wordcode c)` at `Src/parse.c:397`. Append `c` to
292/// the wordcode buffer with grow-on-demand, return the new index.
293pub fn ecadd(c: u32) -> usize {
294 // parse.c:399-405 — `if ((eclen - ecused) < 1) grow`.
295 if (ECLEN.get() - ECUSED.get()) < 1 {
296 let cur = ECLEN.get();
297 let a = if cur < EC_DOUBLE_THRESHOLD {
298 cur
299 } else {
300 EC_INCREMENT
301 };
302 ECBUF.with_borrow_mut(|buf| {
303 buf.resize((cur + a) as usize, 0);
304 });
305 ECLEN.set(cur + a);
306 }
307 let idx = ECUSED.get();
308 ECBUF.with_borrow_mut(|buf| {
309 if (idx as usize) >= buf.len() {
310 buf.resize((idx + 1) as usize, 0);
311 }
312 buf[idx as usize] = c;
313 });
314 ECUSED.set(idx + 1);
315 idx as usize
316}
317
318/// Direct port of `ecdel(int p)` at `Src/parse.c:413`. Remove the
319/// wordcode at position `p`, shift later entries left by one,
320/// decrement ecused, adjust pending heredoc pointers.
321pub fn ecdel(p: usize) {
322 // parse.c:415-418 — memmove + decrement ecused.
323 let n = ECUSED.get() as usize - p - 1;
324 if n > 0 {
325 ECBUF.with_borrow_mut(|buf| {
326 for i in 0..n {
327 buf[p + i] = buf[p + i + 1];
328 }
329 });
330 }
331 ECUSED.set(ECUSED.get() - 1);
332 // parse.c:420 — `ecadjusthere(p, -1)`.
333 ecadjusthere(p, -1);
334}
335
336/// Direct port of `ecstrcode(char *s)` at `Src/parse.c:426`. Encode a
337/// string into a single wordcode (short strings ≤4 bytes packed
338/// inline; longer strings get an offset into the deduped registry).
339///
340/// The long-string path stores the METAFIED bytes (matches what C's
341/// strs region contains): collapse Rust UTF-8 chars in 0x80..=0xff
342/// to single bytes, then apply zsh metafy (high bytes ≥ 0x83 →
343/// `Meta=0x83 + byte^0x20`). Length tracking (ECSOFFS) uses the
344/// metafied byte count — same as C `strlen(s) + 1` where C's `s`
345/// is already metafied at this point.
346pub fn ecstrcode(s: &str) -> u32 {
347 // Convert Rust char-form → C-byte form. zsh's metafy() at
348 // Src/utils.c only converts bytes flagged IMETA: 0x00, 0x83
349 // (Meta itself), and 0x84..=0xa2 (Pound..Marker, the lex
350 // markers). Other bytes 0x01..=0x82 and 0xa3..=0xff pass
351 // through unchanged. See utils.c:4195-4204 typtab init.
352 //
353 // Rust receives chars. Classify each:
354 // - codepoint in [0x83..=0xa2] → marker char (emitted by lex
355 // post-metafy in C); 1 byte unchanged
356 // - codepoint < 0x80 → ASCII, 1 byte unchanged
357 // - codepoint in [0x80..=0x82] or [0xa3..=0xff] → single
358 // non-imeta byte (user-input range); 1 byte unchanged
359 // - codepoint > 0xff → multi-byte UTF-8 source char (e.g.
360 // '━' = U+2501 = 0xe2 0x94 0x81). Metafy ONLY the bytes
361 // that fall in 0x83..=0xa2; pass others through. For '━':
362 // 0xe2 stays, 0x94 → 0x83 0xb4, 0x81 stays.
363 let mut c_bytes: Vec<u8> = Vec::with_capacity(s.len());
364 let imeta = |b: u8| -> bool { b == 0 || (0x83..=0xa2).contains(&b) };
365 for ch in s.chars() {
366 let cu = ch as u32;
367 if cu < 0x80 {
368 // ASCII — single byte unchanged.
369 c_bytes.push(cu as u8);
370 } else if (0x83..=0xa2).contains(&cu) {
371 // Lex marker char (emitted by lex.add(Marker) post-metafy
372 // in C). Stored as single byte.
373 c_bytes.push(cu as u8);
374 } else {
375 // User-input char: encode UTF-8 then metafy imeta bytes.
376 // For chars 0x80..=0xff (like 'º' U+00BA), UTF-8 gives
377 // 2 bytes (e.g. `0xc2 0xba`) — zsh's lex reads these as
378 // raw bytes from input and metafy passes 0xc2 / 0xba
379 // through (both NOT imeta).
380 let mut tmp = [0u8; 4];
381 for &b in ch.encode_utf8(&mut tmp).as_bytes() {
382 if imeta(b) {
383 c_bytes.push(0x83);
384 c_bytes.push(b ^ 0x20);
385 } else {
386 c_bytes.push(b);
387 }
388 }
389 }
390 }
391 // c:`has_token` (Src/utils.c:2282) → `itok(*s)` → `typtab[c] & ITOK`.
392 // ITOK is set for bytes `Pound..=Nularg` (0x84..=0xa1) per
393 // Src/utils.c:4198 (`for (t0=Pound; t0<=LAST_NORMAL_TOK; t0++)
394 // typtab[t0]|=ITOK`) plus :4200 (`for (t0=Snull; t0<=Nularg; t0++)
395 // typtab[t0]|=ITOK|IMETA|INULL`). Pound=0x84 Bang=0x9c (last normal),
396 // Snull=0x9d..Nularg=0xa1. Meta=0x83 has IMETA but NOT ITOK.
397 let t = c_bytes.iter().any(|&b| (0x84..=0xa1).contains(&b));
398 let l = c_bytes.len() + 1; // include NUL terminator
399 if l <= 4 {
400 // parse.c:436-445 — short-string inline pack. Uses raw C-bytes
401 // (NOT metafied — the inline packing stores 1 byte per slot).
402 let mut c: u32 = if t { 3 } else { 2 };
403 match l {
404 4 => {
405 c |= (c_bytes[2] as u32) << 19;
406 c |= (c_bytes[1] as u32) << 11;
407 c |= (c_bytes[0] as u32) << 3;
408 }
409 3 => {
410 c |= (c_bytes[1] as u32) << 11;
411 c |= (c_bytes[0] as u32) << 3;
412 }
413 2 => {
414 c |= (c_bytes[0] as u32) << 3;
415 }
416 1 => {
417 // parse.c:443 — empty string special case.
418 c = if t { 7 } else { 6 };
419 }
420 _ => {}
421 }
422 c
423 } else {
424 // parse.c:447-466 — long string. Port of C's eccstr BST walk
425 // exactly: walk the tree comparing nfunc, then hashval, then
426 // strcmp on bytes. Return offs on full match; insert new
427 // leaf otherwise. Matches C's exact dedup-hit pattern
428 // (which is content-dependent — hash collisions and the
429 // lazy short-circuit cmp chain make the tree shape determine
430 // whether matching nodes are reachable).
431 // hasher is byte-by-byte polynomial (hashtable.c:86); pass
432 // c_bytes via from_utf8_unchecked so non-UTF-8 zsh marker
433 // bytes feed straight in. SAFETY: hasher only iterates
434 // `.bytes()` — no UTF-8 validity assumed.
435 let val =
436 crate::ported::hashtable::hasher(unsafe { std::str::from_utf8_unchecked(&c_bytes) });
437 let nfunc = ECNFUNC.get();
438 let found_offs = ECSTRS_TREE.with_borrow_mut(|root| {
439 // Walk the tree. At each node, if all 3 cmps == 0,
440 // return the node's offs. Otherwise descend left/right
441 // by the first non-zero cmp's sign.
442 let mut cur: &mut Option<Box<EccstrNode>> = root;
443 loop {
444 let p = match cur.as_mut() {
445 Some(p) => p,
446 None => break None,
447 };
448 // c:448 — `cmp = p->nfunc - ecnfunc`
449 let mut cmp = (p.nfunc as i64) - (nfunc as i64);
450 if cmp == 0 {
451 // c:448 — `&& !(cmp = (long)p->hashval - (long)val)`
452 // C does `(int)(p->hashval - val)` — unsigned 32-bit
453 // subtraction wraps, then cast to int. Use
454 // wrapping_sub + as i32 to match the bit pattern.
455 cmp = (p.hashval.wrapping_sub(val) as i32) as i64;
456 if cmp == 0 {
457 // c:448 — `&& !(cmp = strcmp(p->str, s))`
458 cmp = match p.str.as_slice().cmp(c_bytes.as_slice()) {
459 std::cmp::Ordering::Less => -1,
460 std::cmp::Ordering::Equal => 0,
461 std::cmp::Ordering::Greater => 1,
462 };
463 if cmp == 0 {
464 // c:450 — `return p->offs;`
465 break Some(p.offs);
466 }
467 }
468 }
469 // c:452 — `pp = (cmp < 0 ? &p->left : &p->right);`
470 cur = if cmp < 0 { &mut p.left } else { &mut p.right };
471 }
472 });
473 if let Some(offs) = found_offs {
474 return offs;
475 }
476 // c:462 — `p->offs = ((ecsoffs - ecssub) << 2) | (t ? 1 : 0);`
477 let offs = (((ECSOFFS.get() - ECSSUB.get()) as u32) << 2) | if t { 1 } else { 0 };
478 // c:463 — `p->aoffs = ecsoffs;` (absolute write position).
479 let aoffs = ECSOFFS.get() as u32;
480 // c:457-465 — insert new node at the NULL slot the walk
481 // terminated at. Encode the walk path as a Vec<bool> of
482 // left/right turns (true = right), then re-descend to
483 // insert. Borrow-checker friendly: a single mutable walk
484 // that either finds an existing node (descend) or fills
485 // the empty slot (return).
486 let stored = c_bytes.clone();
487 let stored_len = stored.len();
488 let new_node = Box::new(EccstrNode {
489 left: None,
490 right: None,
491 str: stored.clone(),
492 offs,
493 aoffs,
494 nfunc,
495 hashval: val,
496 });
497 ECSTRS_TREE.with_borrow_mut(|root| {
498 // Build the path first (immutable-walk; safe because we
499 // only ever go further down).
500 let mut path: Vec<bool> = Vec::new();
501 {
502 let mut cur: &Option<Box<EccstrNode>> = root;
503 while let Some(p) = cur.as_ref() {
504 let mut cmp = (p.nfunc as i64) - (nfunc as i64);
505 if cmp == 0 {
506 // C does `(int)(p->hashval - val)` — unsigned 32-bit
507 // subtraction wraps, then cast to int. Use
508 // wrapping_sub + as i32 to match the bit pattern.
509 cmp = (p.hashval.wrapping_sub(val) as i32) as i64;
510 if cmp == 0 {
511 cmp = match p.str.as_slice().cmp(c_bytes.as_slice()) {
512 std::cmp::Ordering::Less => -1,
513 std::cmp::Ordering::Equal => 0,
514 std::cmp::Ordering::Greater => 1,
515 };
516 }
517 }
518 let go_right = cmp >= 0;
519 path.push(go_right);
520 cur = if go_right { &p.right } else { &p.left };
521 }
522 }
523 // Descend mutably along the recorded path and assign at
524 // the NULL leaf.
525 let mut cur: &mut Option<Box<EccstrNode>> = root;
526 for turn in path {
527 let p = cur.as_mut().expect("path matches walk");
528 cur = if turn { &mut p.right } else { &mut p.left };
529 }
530 *cur = Some(new_node);
531 });
532 // Also keep the existing reverse index (offs → bytes) for
533 // ecgetstr_wordcode and copy_ecstr — they read flat by offs.
534 ECSTRS_REVERSE.with_borrow_mut(|m| {
535 m.insert(offs, stored);
536 });
537 let _ = l;
538 ECSOFFS.set(ECSOFFS.get() + (stored_len + 1) as i32);
539 offs
540 }
541}
542
543/// Initialize parser status. Direct port of zsh/Src/parse.c:491
544/// `init_parse_status`. Clears the per-parse-call lexer flags
545/// so a fresh parse starts from cmd-position with no nesting
546/// state inherited from a prior parse.
547///
548/// Previously the Rust port omitted `inrepeat_ = 0` at c:501.
549/// `inrepeat_` is the `repeat N <body>` parse-state counter that
550/// the lexer toggles in 3 phases (1 → 2 → 3 → 0). Without the
551/// reset, a fresh parse called after an in-flight `repeat`
552/// command would inherit the stale counter and silently misread
553/// the next token as a body of an already-completed repeat.
554pub fn init_parse_status() {
555 // c:491
556 // parse.c:500-502 — `incasepat = incond = inredir = infor =
557 // intypeset = 0; inrepeat_ = 0; incmdpos = 1;`
558 set_incasepat(0); // c:500
559 set_incond(0); // c:500
560 set_inredir(false); // c:500
561 set_infor(0); // c:500
562 set_intypeset(false); // c:500
563 set_inrepeat(0); // c:501 inrepeat_ = 0
564 set_incmdpos(true); // c:502
565}
566
567/// Initialize parser for a fresh parse. Direct port of
568/// zsh/Src/parse.c:509 `init_parse`. C source allocates a
569/// fresh wordcode buffer (ecbuf) sized EC_INIT_SIZE, resets the
570/// per-parse-call counters, and calls init_parse_status. zshrs
571/// has no flat wordcode buffer (AST is built inline) so this
572/// function reduces to init_parse_status + recursion_depth/
573/// global_iterations clear.
574pub fn init_parse() {
575 // parse.c:513-520 — `ecbuf = (Wordcode) zalloc(EC_INIT_SIZE *
576 // sizeof(wordcode)); eclen = EC_INIT_SIZE; ecused = 0;
577 // ecnpats = 0; ecstrs = NULL; ecsoffs = ecnfunc = 0;
578 // ecssub = 0;`. P9b — initialize the per-evaluator wordcode
579 // buffer for this parse call. zshrs uses thread-local
580 // statics declared at file scope (parse.rs:25-50).
581 ECBUF.with_borrow_mut(|buf| {
582 buf.clear();
583 buf.resize(EC_INIT_SIZE as usize, 0);
584 });
585 ECLEN.set(EC_INIT_SIZE);
586 ECUSED.set(0);
587 ECNPATS.set(0);
588 ECSOFFS.set(0);
589 ECSSUB.set(0);
590 ECNFUNC.set(0);
591 ECSTRS_INDEX.with_borrow_mut(|m| m.clear());
592 ECSTRS_REVERSE.with_borrow_mut(|m| m.clear());
593 ECSTRS_TREE.with_borrow_mut(|t| *t = None);
594
595 // parse.c:522 — `init_parse_status();`
596 init_parse_status();
597}
598
599/// Port of `copy_ecstr(Eccstr s, char *p)` from `Src/parse.c:537`.
600/// Walks the BST and writes each entry to `p[s->aoffs..]` matching
601/// C's recursive in-order traversal exactly. The old impl used the
602/// `ECSTRS_REVERSE` HashMap keyed by `offs` (= ecssub-relative
603/// wordcode-encoded offset), which collides across funcdef scopes:
604/// a string at relative offs=0 inside funcdef A and another at
605/// relative offs=0 inside funcdef B share the same key, so one
606/// overwrites the other.
607pub fn copy_ecstr(_table: &std::collections::HashMap<u32, Vec<u8>>, p: &mut [u8]) {
608 // c:537-544 — walk eccstr BST recursively, writing each node's
609 // str at p[node->aoffs..node->aoffs + strlen + 1] (NUL-terminated).
610 ECSTRS_TREE.with_borrow(|root| {
611 copy_ecstr_walk(root, p);
612 });
613}
614
615/// Port of `bld_eprog(int heap)` from `Src/parse.c:547`. Finalizes
616/// the in-build `ECBUF`/`ECSTRS`/`ECNPATS` state into an `Eprog`.
617/// Resets the build state so a new parse can start.
618pub fn bld_eprog(heap: bool) -> eprog {
619 // c:547
620
621 // c:555 — emit WC_END opcode. `WCB_END` is `WC_END_DEFAULT` (0).
622 ecadd(0);
623
624 let ecused = ECUSED.with(|c| c.get()) as usize;
625 let ecnpats = ECNPATS.with(|c| c.get()) as usize;
626 let ecsoffs = ECSOFFS.with(|c| c.get()) as usize;
627
628 // c:557-559 — `ret->len = ((ecnpats * sizeof(Patprog)) +
629 // (ecused * sizeof(wordcode)) +
630 // ecsoffs);`
631 // sizeof(Patprog) = sizeof(struct patprog *) = pointer size.
632 // On 64-bit targets that's 8, on 32-bit that's 4. C's eprog
633 // ->len is the canonical value for parity tests, so we use
634 // the same arithmetic.
635 let prog_bytes = ecused * 4; // sizeof(wordcode) = 4
636 let len = (ecnpats * size_of::<*const u8>()) + prog_bytes + ecsoffs;
637
638 // Snapshot the wordcode buffer + string table.
639 let prog_words: Vec<u32> = ECBUF.with(|c| c.borrow()[..ecused].to_vec());
640 let mut strs_bytes = vec![0u8; ecsoffs];
641 ECSTRS_REVERSE.with(|c| copy_ecstr(&c.borrow(), &mut strs_bytes));
642
643 // c:566 — store strs as raw bytes via from_utf8_unchecked so
644 // single-byte zsh markers (e.g. Dash 0x9b) survive intact.
645 // `String::from_utf8_lossy` would replace them with U+FFFD
646 // (`\xef\xbf\xbd`), breaking byte-for-byte parity with C's
647 // strs region. SAFETY: downstream consumers of `eprog.strs`
648 // index by byte offset (per the wordcode `(offs >> 2)` offset
649 // encoding) and call `.as_bytes()` — they never iterate as
650 // chars or rely on UTF-8 validity, so storing non-UTF-8 bytes
651 // in a String is safe in practice. C zsh's strs is `char *`
652 // with the same byte-not-char semantics.
653 let strs_string = unsafe { String::from_utf8_unchecked(strs_bytes) };
654 let ret = eprog {
655 flags: if heap { EF_HEAP } else { EF_REAL }, // c:570
656 len: len as i32, // c:559
657 npats: ecnpats as i32, // c:561
658 nref: if heap { -1 } else { 1 }, // c:562
659 pats: Vec::new(), // c:563 dummy_patprog
660 prog: prog_words, // c:565
661 strs: Some(strs_string),
662 shf: None,
663 dump: None,
664 };
665
666 // c:577 — free ecbuf so next parse starts fresh.
667 ECBUF.with(|c| c.borrow_mut().clear());
668 ECLEN.with(|c| c.set(0));
669 ECUSED.with(|c| c.set(0));
670 ECNPATS.with(|c| c.set(0));
671 ECSOFFS.with(|c| c.set(0));
672 ECSTRS_INDEX.with(|c| c.borrow_mut().clear());
673 ECSTRS_REVERSE.with(|c| c.borrow_mut().clear());
674 ECSTRS_TREE.with(|t| *t.borrow_mut() = None);
675
676 ret
677}
678
679/// Port of `int empty_eprog(Eprog p)` from `Src/parse.c:584`. C
680/// body: `return (!p || !p->prog || *p->prog == WCB_END());` —
681/// the eprog is empty when its prog buffer is missing or the
682/// first wordcode is the WC_END marker. Used by signal handlers
683/// (`Src/signals.c:712`) to short-circuit a trap that resolves to
684/// an empty program.
685pub fn empty_eprog(p: &eprog) -> bool {
686 p.prog.is_empty() || p.prog[0] == WCB_END()
687}
688
689/// Clear pending here-document list. Direct port of
690/// `clear_hdocs(void)` from `Src/parse.c:591`. The C version walks
691/// `hdocs` and frees each node; Rust drops the `Box<heredocs>`
692/// chain automatically when the head is replaced with None.
693pub fn clear_hdocs() {
694 // c:591
695 // c:593-598 — for (p = hdocs; p; p = n) { n = p->next; zfree(p); }
696 // c:599 — hdocs = NULL;
697 HDOCS.with_borrow_mut(|h| *h = None);
698 // zshrs-only: also drop the parallel AST-glue Vec. No C
699 // analog — LEX_HEREDOCS is Rust-only working-set state.
700 LEX_HEREDOCS.with_borrow_mut(|v| v.clear());
701}
702
703/// Top-level parse-event entry. Direct port of zsh/Src/parse.c:
704/// 612-631 `parse_event`. Reads one event from the lexer (a
705/// sublist optionally followed by SEPER/AMPER/AMPERBANG) and
706/// returns the resulting ZshProgram.
707///
708/// `endtok` is the token that terminates the event — usually
709/// ENDINPUT, but for command-style substitutions the closing
710/// `)` (zsh's CMD_SUBST_CLOSE).
711///
712/// zshrs port note: zsh's parse_event returns an `Eprog` (heap-
713/// allocated wordcode program). zshrs returns a `ZshProgram`
714/// (AST root). Same role at the parse-output boundary.
715pub fn parse_event(endtok: lextok) -> Option<ZshProgram> {
716 // parse.c:616-619 — reset state and prime the lexer.
717 set_tok(ENDINPUT);
718 set_incmdpos(true);
719 // parse.c:618 — `aliasspaceflag = 0;`. Fresh event: discard any
720 // alias-space carry-over from a prior parse so HISTIGNORESPACE
721 // doesn't suppress the next entered command line.
722 crate::ported::lex::LEX_ALIAS_SPACE_FLAG.with(|c| c.set(0));
723 zshlex();
724 // parse.c:620 — `init_parse();`
725 init_parse();
726
727 // parse.c:622-625 — drive par_event; on failure clear hdocs.
728 if !par_event(endtok) {
729 clear_hdocs();
730 return None;
731 }
732 // parse.c:626-628 — if endtok != ENDINPUT, this is a sub-
733 // parse for a substitution that doesn't need its own eprog.
734 // zshrs returns an empty program in that case (caller
735 // discards).
736 if endtok != ENDINPUT {
737 return Some(ZshProgram { lists: Vec::new() });
738 }
739 // parse.c:630 — `bld_eprog(1);` — build the final eprog.
740 // zshrs has already built the AST via parse_program_until,
741 // but parse_event uses par_event directly so we need to
742 // collect what par_event accumulated.
743 Some(parse_program_until(None))
744}
745
746/// Parse one event (sublist with optional separator). Direct
747/// port of zsh/Src/parse.c:635 `par_event`. Returns true if
748/// an event was successfully parsed, false on EOF / endtok.
749///
750/// zshrs port note: the C version emits wordcodes via ecadd/
751/// set_list_code; zshrs's parser builds AST nodes via
752/// par_sublist + par_list. Same flow, different output.
753pub fn par_event(endtok: lextok) -> bool {
754 // parse.c:639-643 — skip leading SEPERs.
755 while tok() == SEPER {
756 // parse.c:640-641 — at top-level (endtok == ENDINPUT),
757 // a SEPER on a fresh line ends the event.
758 if isnewlin() > 0 && endtok == ENDINPUT {
759 return false;
760 }
761 zshlex();
762 }
763 // parse.c:644-647 — terminate on EOF or matching close-token.
764 if tok() == ENDINPUT {
765 return false;
766 }
767 if tok() == endtok {
768 return true;
769 }
770 // parse.c:649-... — drive par_sublist + handle terminator.
771 // zshrs's par_sublist already builds the AST node directly.
772 match par_sublist() {
773 Some(_) => {
774 // parse.c:651-693 — terminator handling. zshrs's
775 // par_list wraps this; for parse_event we just
776 // confirm the sublist parsed.
777 true
778 }
779 None => false,
780 }
781}
782
783/// Port of `parse_list(void)` from `Src/parse.c:697`. C-shape entry
784/// point: drives `par_list` and finalizes via `bld_eprog`. Returns
785/// `None` on syntax error.
786pub fn parse_list() -> Option<eprog> {
787 // c:697
788 set_tok(ENDINPUT);
789 init_parse();
790 zshlex();
791 // c:Src/parse.c:705 — `par_list(&c);` emits wordcode for the
792 // full multi-statement list (its goto-rec loop walks all
793 // SEPER-separated sublists). The Rust AST par_list() emits
794 // NOTHING to the wordcode buffer (only builds the AST), so
795 // bld_eprog returned an empty program AND tok stayed at
796 // SEPER, tripping the syntax-error check below for any
797 // \`cmd; cmd\` body.
798 //
799 // Route through par_event_wordcode (the wordcode emitter,
800 // lines 4395+) which mirrors C's par_list loop semantics
801 // and populates the wordcode buffer that bld_eprog reads.
802 let _start = par_event_wordcode();
803 if tok() != ENDINPUT {
804 clear_hdocs();
805 set_tok(LEXERR);
806 yyerror("syntax error");
807 return None;
808 }
809 Some(bld_eprog(false))
810}
811
812/// Port of `parse_cond(void)` from `Src/parse.c:722`. Only used by
813/// `bin_test`/`bin_bracket` for `/bin/test`/`[` compat — the
814/// `condlex` global must already point at `testlex` before entry.
815pub fn parse_cond() -> Option<eprog> {
816 // c:722
817 init_parse();
818 if par_cond().is_none() {
819 clear_hdocs();
820 return None;
821 }
822 Some(bld_eprog(true))
823}
824
825// ============================================================
826// Wordcode emission helpers (parse.c private helpers)
827//
828// Direct ports of zsh's wordcode-emission helpers in parse.c.
829// These write u32 opcodes into a flat `ecbuf` array thread-local
830// via ecadd / ecdel / ecispace / ecstrcode and friends. The
831// par_*_wordcode family at parse.rs:1700-3500 walks the lex
832// stream and emits a real wordcode buffer here.
833//
834// (The AST tree built by par_program / par_simple / etc. is a
835// separate path used by fusevm; see compile_zsh.rs for the AST
836// → fusevm-bytecode compiler.)
837// ============================================================
838
839/// Patch a list-placeholder wordcode with its actual opcode +
840/// jump distance. Direct port of zsh/Src/parse.c:738
841/// `set_list_code`. zsh emits an `ecadd(0)` placeholder before
842/// par_sublist runs, then comes back through set_list_code to
843/// rewrite the slot with WCB_LIST(type, distance) once the
844/// sublist's final length is known.
845///
846/// Port of `set_list_code(int p, int type, int cmplx)` from
847/// `Src/parse.c:738`. Patches the WCB_LIST header at `p` based on
848/// whether the sublist body is simple (single command, no
849/// pipeline) and Z_SYNC/Z_END — emits the Z_SIMPLE-optimized
850/// header when possible, otherwise the plain WCB_LIST(type, 0).
851pub fn set_list_code(p: usize, type_code: i32, cmplx: bool) {
852 let _ = wc_bdata;
853 // c:740 — `if (!cmplx && (type == Z_SYNC || type == (Z_SYNC | Z_END))
854 // && WC_SUBLIST_TYPE(ecbuf[p+1]) == WC_SUBLIST_END)`
855 let sublist_code = ECBUF.with_borrow(|b| b.get(p + 1).copied().unwrap_or(0));
856 let z = type_code;
857 let qualifies = !cmplx
858 && (z == Z_SYNC || z == (Z_SYNC | Z_END))
859 && WC_SUBLIST_TYPE(sublist_code) == WC_SUBLIST_END;
860 if qualifies {
861 // c:742 — `int ispipe = !(WC_SUBLIST_FLAGS(ecbuf[p+1])
862 // & WC_SUBLIST_SIMPLE);`
863 let ispipe = (WC_SUBLIST_FLAGS(sublist_code) & WC_SUBLIST_SIMPLE) == 0;
864 // c:743 — `ecbuf[p] = WCB_LIST((type|Z_SIMPLE), ecused-2-p);`
865 let used = ECUSED.get() as usize;
866 let off = used.saturating_sub(2 + p);
867 ECBUF.with_borrow_mut(|b| {
868 if p < b.len() {
869 b[p] = WCB_LIST((z | Z_SIMPLE) as wordcode, off as wordcode);
870 }
871 });
872 // c:744 — `ecdel(p+1);`
873 ecdel(p + 1);
874 // c:745-746 — `if (ispipe) ecbuf[p+1] = WC_PIPE_LINENO(ecbuf[p+1]);`
875 if ispipe {
876 ECBUF.with_borrow_mut(|b| {
877 if p + 1 < b.len() {
878 b[p + 1] = WC_PIPE_LINENO(b[p + 1]);
879 }
880 });
881 }
882 } else {
883 // c:748 — `ecbuf[p] = WCB_LIST(type, 0);`
884 ECBUF.with_borrow_mut(|b| {
885 if p < b.len() {
886 b[p] = WCB_LIST(z as wordcode, 0);
887 }
888 });
889 }
890}
891
892/// Port of `set_sublist_code(int p, int type, int flags, int skip, int cmplx)`
893/// from `Src/parse.c:755`. Patches the WCB_SUBLIST header at `p`.
894/// When the sublist is non-complex (single command, no pipeline),
895/// sets WC_SUBLIST_SIMPLE and rewrites the following slot to
896/// `WC_PIPE_LINENO`.
897pub fn set_sublist_code(p: usize, type_code: i32, flags: i32, skip: i32, cmplx: bool) {
898 if cmplx {
899 // c:758 — `ecbuf[p] = WCB_SUBLIST(type, flags, skip);`
900 ECBUF.with_borrow_mut(|b| {
901 if p < b.len() {
902 b[p] = WCB_SUBLIST(type_code as wordcode, flags as wordcode, skip as wordcode);
903 }
904 });
905 } else {
906 // c:760 — `ecbuf[p] = WCB_SUBLIST(type, flags|WC_SUBLIST_SIMPLE, skip);`
907 ECBUF.with_borrow_mut(|b| {
908 if p < b.len() {
909 b[p] = WCB_SUBLIST(
910 type_code as wordcode,
911 (flags as wordcode) | WC_SUBLIST_SIMPLE,
912 skip as wordcode,
913 );
914 }
915 });
916 // c:761 — `ecbuf[p+1] = WC_PIPE_LINENO(ecbuf[p+1]);`
917 ECBUF.with_borrow_mut(|b| {
918 if p + 1 < b.len() {
919 b[p + 1] = WC_PIPE_LINENO(b[p + 1]);
920 }
921 });
922 }
923}
924
925/// Parse a list (sublist with optional & or ;).
926///
927/// Direct port of zsh/Src/parse.c:771-804 `par_list` (and the
928/// par_list1 wrapper at parse.c:807-817).
929///
930/// **Structural divergence**: zsh's parse.c emits flat wordcode
931/// into the `ecbuf` u32 array via `ecadd(0)` (placeholder),
932/// `set_list_code(p, code, complexity)`, `wc_bdata(Z_END)`. zshrs
933/// builds an AST node `ZshList { sublist, flags }` instead. The
934/// async/sync/disown discrimination at parse.c:785-790 maps to
935/// zshrs's `ListFlags { async_, disown }` field — Z_SYNC is the
936/// default (no flags), Z_ASYNC = `&` = `async_=true`, Z_DISOWN +
937/// Z_ASYNC = `&!`/`&|` = both true. Same semantics, different
938/// representation. This divergence is repository-wide: every
939/// `par_*` function emits wordcode in C, every `parse_*` builds
940/// AST in Rust. The compile_zsh module then traverses the AST to
941/// emit fusevm bytecode, which serves the same role as zsh's
942/// wordcode but with a different opcode set and execution model.
943fn par_list() -> Option<ZshList> {
944 let sublist = par_sublist()?;
945
946 let flags = match tok() {
947 AMPER => {
948 zshlex();
949 ListFlags {
950 async_: true,
951 disown: false,
952 }
953 }
954 AMPERBANG => {
955 zshlex();
956 ListFlags {
957 async_: true,
958 disown: true,
959 }
960 }
961 SEPER | SEMI | NEWLIN => {
962 zshlex();
963 ListFlags::default()
964 }
965 _ => ListFlags::default(),
966 };
967
968 Some(ZshList { sublist, flags })
969}
970
971/// Parse one list — non-recursing variant. Direct port of
972/// zsh/Src/parse.c:808 `par_list1`. Like par_list but
973/// doesn't recurse on the trailing-separator path; used by
974/// callers that only want one statement (e.g. each arm of a
975/// case body).
976pub fn par_list1() -> Option<ZshSublist> {
977 // parse.c:810-816 — body is a single par_sublist call wrapped
978 // in the eu/ecused tracking that zshrs doesn't need (no
979 // wordcode buffer).
980 par_sublist()
981}
982
983/// Parse a sublist (pipelines connected by && or ||).
984///
985/// Direct port of zsh/Src/parse.c:825 `par_sublist` and
986/// par_sublist2 at parse.c:869-892. par_sublist handles the
987/// && / || conjunction and emits WC_SUBLIST opcodes; par_sublist2
988/// handles the leading `!` negation and `coproc` keyword.
989///
990/// AST mapping: ZshSublist { pipe, conj_chain }, where `conj_chain`
991/// is a Vec<(ConjOp, ZshSublist)> for chained && / ||. C uses
992/// flat wordcode with WC_SUBLIST_AND / WC_SUBLIST_OR markers.
993fn par_sublist() -> Option<ZshSublist> {
994 let mut flags = SublistFlags::default();
995
996 // Handle coproc and !
997 if tok() == COPROC {
998 flags.coproc = true;
999 zshlex();
1000 } else if tok() == BANG_TOK {
1001 flags.not = true;
1002 zshlex();
1003 }
1004
1005 let pipe = par_pline()?;
1006
1007 // Check for && or ||
1008 let next = match tok() {
1009 DAMPER => {
1010 zshlex();
1011 skip_separators();
1012 // c:Src/parse.c:par_sublist — and-or operators (`&&`,
1013 // `||`) require a sublist on each side. After consuming
1014 // `&&`/`||`, another and-or operator OR a pipe-operator
1015 // immediately after is a parse error in C zsh. zshrs's
1016 // recursion silently returned None and dropped the
1017 // operator. Bug #171 in docs/BUGS.md.
1018 if matches!(tok(), DAMPER | DBAR | BAR_TOK | BARAMP) {
1019 let name = match tok() {
1020 DAMPER => "&&",
1021 DBAR => "||",
1022 BAR_TOK => "|",
1023 BARAMP => "|&",
1024 _ => "operator",
1025 };
1026 zerr(&format!("parse error near `{}'", name));
1027 return None;
1028 }
1029 par_sublist().map(|s| (SublistOp::And, Box::new(s)))
1030 }
1031 DBAR => {
1032 zshlex();
1033 skip_separators();
1034 if matches!(tok(), DAMPER | DBAR | BAR_TOK | BARAMP) {
1035 let name = match tok() {
1036 DAMPER => "&&",
1037 DBAR => "||",
1038 BAR_TOK => "|",
1039 BARAMP => "|&",
1040 _ => "operator",
1041 };
1042 zerr(&format!("parse error near `{}'", name));
1043 return None;
1044 }
1045 par_sublist().map(|s| (SublistOp::Or, Box::new(s)))
1046 }
1047 _ => None,
1048 };
1049
1050 Some(ZshSublist { pipe, next, flags })
1051}
1052
1053/// Port of `par_sublist2(int *cmplx)` from `Src/parse.c:869`.
1054/// Secondary-sublist arm: handles the `COPROC`/`Bang` prefix
1055/// in front of a pline. Returns the WC_SUBLIST flag word added.
1056pub fn par_sublist2(cmplx: &mut i32) -> Option<i32> {
1057 // c:870 — `int f = 0;`
1058 let mut f: i32 = 0;
1059 // c:873-880 — COPROC / BANG prefix flags.
1060 if tok() == COPROC {
1061 *cmplx = 1;
1062 f |= WC_SUBLIST_COPROC as i32;
1063 zshlex();
1064 } else if tok() == BANG_TOK {
1065 *cmplx = 1;
1066 f |= WC_SUBLIST_NOT as i32;
1067 zshlex();
1068 }
1069 // c:882-883 — `if (!par_pline(cmplx) && !f) return -1;`
1070 if !par_pipe_wordcode(cmplx) && f == 0 {
1071 return None;
1072 }
1073 // c:885 — `return f;`
1074 Some(f)
1075}
1076
1077/// Parse a pipeline
1078/// Parse a pipeline (cmds joined by `|` / `|&`). Direct port of
1079/// zsh/Src/parse.c:894 `par_pline`. AST: ZshPipe { cmds: Vec<ZshCommand> }.
1080/// C emits WC_PIPE wordcodes per command; same flow.
1081fn par_pline() -> Option<ZshPipe> {
1082 let lineno = toklineno();
1083 let cmd = par_cmd()?;
1084
1085 // Check for | or |&
1086 let mut merge_stderr = false;
1087 let next = match tok() {
1088 BAR_TOK | BARAMP => {
1089 merge_stderr = tok() == BARAMP;
1090 zshlex();
1091 skip_separators();
1092 // c:Src/parse.c:par_pline — pipe-operators require a
1093 // command on each side. After consuming `|`/`|&`,
1094 // C zsh's recursive par_pline call returns -1 (parse
1095 // error) when the next token is another pipe-operator
1096 // — `a | | b` errors with `parse error near `|''`.
1097 // zshrs's `par_pline()?` silently returned None on
1098 // missing command, dropping the rest of the input
1099 // without diagnosing the empty-pipe-operand. Bug #171
1100 // in docs/BUGS.md.
1101 if matches!(tok(), BAR_TOK | BARAMP) {
1102 let name = if tok() == BARAMP { "|&" } else { "|" };
1103 zerr(&format!("parse error near `{}'", name));
1104 return None;
1105 }
1106 par_pline().map(Box::new)
1107 }
1108 _ => None,
1109 };
1110
1111 Some(ZshPipe {
1112 cmd,
1113 next,
1114 lineno,
1115 merge_stderr,
1116 })
1117}
1118
1119/// Parse a command
1120/// Parse a command — dispatches by leading token (FOR / CASE /
1121/// IF / WHILE / UNTIL / REPEAT / FUNC / DINBRACK / DINPAR /
1122/// Inpar subshell / Inbrace current-shell / TIME / NOCORRECT,
1123/// else simple). Direct port of zsh/Src/parse.c:958 `par_cmd`.
1124fn par_cmd() -> Option<ZshCommand> {
1125 // Parse leading redirections
1126 let mut redirs = Vec::new();
1127 while IS_REDIROP(tok()) {
1128 if let Some(redir) = par_redir() {
1129 redirs.push(redir);
1130 }
1131 }
1132
1133 let cmd = match tok() {
1134 FOR | FOREACH => par_for(),
1135 SELECT => parse_select(),
1136 CASE => par_case(),
1137 IF => par_if(),
1138 WHILE => par_while(false),
1139 UNTIL => par_while(true),
1140 REPEAT => par_repeat(),
1141 INPAR_TOK => par_subsh(),
1142 INOUTPAR => parse_anon_funcdef(),
1143 INBRACE_TOK => parse_cursh(),
1144 FUNC => par_funcdef(),
1145 DINBRACK => par_cond(),
1146 DINPAR => parse_arith(),
1147 TIME => par_time(),
1148 _ => par_simple(redirs),
1149 };
1150
1151 // Parse trailing redirections. For Simple commands the redirs were
1152 // already captured inside par_simple; for compound forms (Cursh,
1153 // Subsh, If, While, etc.) we collect them here and wrap in
1154 // ZshCommand::Redirected so compile_zsh can scope-bracket them.
1155 if let Some(inner) = cmd {
1156 let mut trailing: Vec<ZshRedir> = Vec::new();
1157 while IS_REDIROP(tok()) {
1158 if let Some(redir) = par_redir() {
1159 trailing.push(redir);
1160 }
1161 }
1162 // c:Src/parse.c:par_cmd — compound forms (Cursh `{...}`, Subsh
1163 // `(...)`, If/While/Until/For/Case/Select/Repeat/Funcdef) must
1164 // be followed by a valid sublist/list separator (`;`, `\n`,
1165 // `&`, `|`, `&&`, `||`, redirect-op) — STRING_LEX after a
1166 // compound is a parse error. zshrs's outer par_list loop
1167 // silently treated trailing words as a new command, masking
1168 // syntax errors like `{ echo a; } b c`. Mirror C's strict
1169 // post-compound terminator check. Bug #146 in docs/BUGS.md.
1170 if !matches!(inner, ZshCommand::Simple(_)) && tok() == STRING_LEX {
1171 let bad = tokstr().unwrap_or_default();
1172 zerr(&format!("parse error near `{}'", bad));
1173 // Reset state before returning so the outer loop's None
1174 // detection unwinds cleanly.
1175 set_incmdpos(true);
1176 set_incasepat(0);
1177 set_incond(0);
1178 set_intypeset(false);
1179 return None;
1180 }
1181 // c:1072-1075 — every par_cmd tail resets the lexer state
1182 // toggles so the NEXT command starts in cmd position with
1183 // case/cond/typeset off. par_simple/par_cond set `incmdpos=0`
1184 // during their bodies; without this reset the next iteration
1185 // of the outer par_list loop sees `if` / `done` / `select`
1186 // etc. as plain strings and the AST collapses.
1187 set_incmdpos(true);
1188 set_incasepat(0);
1189 set_incond(0);
1190 set_intypeset(false);
1191 if trailing.is_empty() {
1192 return Some(inner);
1193 }
1194 // Simple already absorbed its own redirs (compile path expects
1195 // them on ZshSimple), so don't double-wrap.
1196 if matches!(inner, ZshCommand::Simple(_)) {
1197 if let ZshCommand::Simple(mut s) = inner {
1198 s.redirs.extend(trailing);
1199 return Some(ZshCommand::Simple(s));
1200 }
1201 unreachable!()
1202 }
1203 return Some(ZshCommand::Redirected(Box::new(inner), trailing));
1204 }
1205 // Same reset on the empty-cmd branch (mirror c:1072 unconditional
1206 // path — the C function only returns 0 above when the dispatch
1207 // produced no command, and falls through to the reset block).
1208 set_incmdpos(true);
1209 set_incasepat(0);
1210 set_incond(0);
1211 set_intypeset(false);
1212
1213 None
1214}
1215
1216/// Parse for/foreach loop
1217/// Parse `for NAME in WORDS; do BODY; done` (foreach style) AND
1218/// `for ((init; cond; incr)) do BODY done` (c-style). Direct port
1219/// of zsh/Src/parse.c:1087 `par_for`. parse_for_cstyle is the
1220/// inner branch for the `((...))` arithmetic-header variant
1221/// (parse.c:1100-1140 inside par_for).
1222fn par_for() -> Option<ZshCommand> {
1223 let is_foreach = tok() == FOREACH;
1224 // c:1094-1095 (Src/parse.c, par_for) — set `infor=2` (only when
1225 // tok==FOR) so the lexer's `(` peek at lex.c:784-789
1226 // (`if (infor) { ... return DINPAR; }`) routes the arith-for
1227 // body through dbparens semicolon-splitting instead of the
1228 // `cmd_or_math` whole-body capture path. Without this, `for ((
1229 // i=0; i<3; i++ ))` lexed as a single `((arith))` expression
1230 // and parse_for_cstyle's second zshlex got an empty/wrong tok.
1231 //
1232 // The companion C statement `incmdpos = 0;` at c:1094 isn't
1233 // mirrored here: zshrs's parser doesn't otherwise touch
1234 // LEX_INCMDPOS at this boundary, and forcing it false breaks
1235 // the SELECT case where downstream tokenization relied on the
1236 // inherited state. The C parser maintains incmdpos inline at
1237 // every grammar transition (parse.c:617, :791, :1072, :1145,
1238 // :1154, :1161, ...); without porting those companion sites a
1239 // single explicit reset here is more harmful than helpful.
1240 set_infor(if tok() == FOR { 2 } else { 0 }); // c:1095
1241 zshlex(); // c:1096
1242
1243 // Check for C-style: for (( init; cond; step ))
1244 if tok() == DINPAR {
1245 // c:1110-1111 — close out infor / cmdpos after parse_for_cstyle
1246 // has consumed the init/cond/step triple. Done inside the
1247 // helper itself so we honour the C ordering.
1248 return parse_for_cstyle();
1249 }
1250
1251 // c:1116 — `infor = 0;` immediately on entering the foreach
1252 // branch. Without this, `infor` stays at 2 (set at c:1095 when
1253 // tok==FOR) for the rest of par_for, and the lexer's `((`
1254 // peek at lex.c:786 routes every subsequent `((...))` inside
1255 // the loop body through dbparens — so `for x in a; do (( 1
1256 // )); done` and `if (( 1 )) { … }` inside the do-body both
1257 // mis-lexed as a c-style for header.
1258 set_infor(0); // c:1116
1259
1260 // Get variable name(s). zsh parse.c par_for accepts multiple
1261 // identifier tokens before `in`/`(`/newline — `for k v in ...`
1262 // assigns each iteration's pair of values to k and v in turn.
1263 // We store the names space-joined since variable identifiers
1264 // can't contain whitespace.
1265 let mut names: Vec<String> = Vec::new();
1266 while tok() == STRING_LEX {
1267 let v = tokstr().unwrap_or_default();
1268 if v == "in" {
1269 break;
1270 }
1271 names.push(v);
1272 zshlex();
1273 }
1274 if names.is_empty() {
1275 zerr("expected variable name in for");
1276 return None;
1277 }
1278 let var = names.join(" ");
1279
1280 // Skip newlines
1281 skip_separators();
1282
1283 // Get list. The lexer-port quirk: `for x (a b c)` arrives as a
1284 // single String token with the parens lexed-as-content
1285 // (`<Inpar>a b c<Outpar>`) instead of as separate Inpar/String/
1286 // Outpar tokens. Detect that shape and split it manually.
1287 let list = if tok() == STRING_LEX
1288 && tokstr()
1289 .map(|s| s.starts_with('\u{88}') && s.ends_with('\u{8a}'))
1290 .unwrap_or(false)
1291 {
1292 let raw = tokstr().unwrap_or_default();
1293 // Strip leading Inpar + trailing Outpar. KEEP the inner
1294 // content tokenized — `for x ({1..3}) …` has `{1..3}` as
1295 // Inbrace+content+Outbrace markers, which compile_word_str
1296 // needs to detect and brace-expand. Untokenizing here would
1297 // collapse the markers to plain `{` `}` chars and the brace-
1298 // expansion pass (which strictly requires Inbrace TOKEN per
1299 // Src/glob.c:hasbraces) would skip the word entirely.
1300 // Split only on UNTOKENIZED whitespace at the top level —
1301 // tokenized characters (TOKEN range \u{84}..\u{a1}) are part
1302 // of one word; bare ASCII spaces / tabs separate words.
1303 let inner = &raw[raw.char_indices().nth(1).map(|(i, _)| i).unwrap_or(0)
1304 ..raw
1305 .char_indices()
1306 .last()
1307 .map(|(i, _)| i)
1308 .unwrap_or(raw.len())];
1309 let mut words: Vec<String> = Vec::new();
1310 let mut cur = String::new();
1311 for c in inner.chars() {
1312 if c == ' ' || c == '\t' || c == '\n' {
1313 if !cur.is_empty() {
1314 words.push(std::mem::take(&mut cur));
1315 }
1316 } else {
1317 cur.push(c);
1318 }
1319 }
1320 if !cur.is_empty() {
1321 words.push(cur);
1322 }
1323 zshlex();
1324 ForList::Words(words)
1325 } else if tok() == STRING_LEX {
1326 let s = tokstr();
1327 if s.map(|s| s == "in").unwrap_or(false) {
1328 // c:Src/parse.c:1147-1154 — after consuming `in`, the
1329 // for-list reads in WORD position, not command position.
1330 // Reset incmdpos=false so the lexer's LX2_INBRACE arm
1331 // (lex.rs:1791) treats a leading `{` as the brace-
1332 // expansion marker (`bct++; add(Inbrace)`) instead of
1333 // returning STRING("{") + promoting to INBRACE_TOK.
1334 // Without this, `for i in {1..3}` saw `{` as the body-
1335 // opener brace, so the word-collection loop got an
1336 // empty word list and the loop body silently ran 0
1337 // iterations.
1338 set_incmdpos(false);
1339 zshlex();
1340 let mut words = Vec::new();
1341 while tok() == STRING_LEX {
1342 let _ts_s = tokstr();
1343 if let Some(s) = _ts_s.as_deref() {
1344 words.push(s.to_string());
1345 }
1346 zshlex();
1347 }
1348 // c:Src/parse.c:1162 — `incmdpos = 1;` after the
1349 // wordlist + SEPER are consumed, so the next token
1350 // (`do` / `{` body opener) lexes at command position.
1351 set_incmdpos(true);
1352 ForList::Words(words)
1353 } else {
1354 ForList::Positional
1355 }
1356 } else if tok() == INPAR_TOK {
1357 // for var (...) — `for x ({1..3})`: inside the parens, the
1358 // list is in WORD position so `{` must lex as the brace-
1359 // expansion Inbrace marker, NOT as a body-opener INBRACE_TOK.
1360 // Without resetting incmdpos before the next zshlex, the
1361 // lexer's LX2_INBRACE arm promotes `{` to INBRACE_TOK and
1362 // the word-collection loop exits empty, giving
1363 // `for x ({1..3})` an empty iteration.
1364 set_incmdpos(false);
1365 zshlex();
1366 let mut words = Vec::new();
1367 while tok() == STRING_LEX || tok() == SEPER {
1368 if tok() == STRING_LEX {
1369 let _ts_s = tokstr();
1370 if let Some(s) = _ts_s.as_deref() {
1371 words.push(s.to_string());
1372 }
1373 }
1374 zshlex();
1375 }
1376 if tok() == OUTPAR_TOK {
1377 // After the `)` of a for-list, the next token is the
1378 // body opener — `do`/`{`. zsh's lexer needs incmdpos
1379 // set so `{` lexes as Inbrace (not as a literal). C
1380 // analogue: parse.c::par_for sets `incmdpos = 1`
1381 // after consuming the Outpar before the body parse.
1382 set_incmdpos(true);
1383 zshlex();
1384 }
1385 ForList::Words(words)
1386 } else {
1387 ForList::Positional
1388 };
1389
1390 // Skip to body
1391 skip_separators();
1392
1393 // Parse body
1394 let body = parse_loop_body(is_foreach, false)?;
1395
1396 Some(ZshCommand::For(ZshFor {
1397 var,
1398 list,
1399 body: Box::new(body),
1400 is_select: false,
1401 }))
1402}
1403
1404/// Parse case statement
1405/// Parse `case WORD in PATTERN) BODY ;; ... esac`. Direct port
1406/// of zsh/Src/parse.c:1209 `par_case`. Each case arm is a
1407/// (pattern_list, body, terminator) tuple where terminator is
1408/// `;;` (default), `;&` (fallthrough), or `;|` (continue testing).
1409fn par_case() -> Option<ZshCommand> {
1410 // C par_case (parse.c:1209-1241). Order of state toggles
1411 // matters — the lexer reads the case word in `incmdpos=0`
1412 // (so it's not promoted to a reswd), then the `in`/`{` in
1413 // `incmdpos=1, noaliases=1, nocorrect=1` (so the `in` literal
1414 // isn't alias-expanded or spell-corrected), then sets
1415 // `incasepat=1, incmdpos=0` before the first pattern.
1416 set_incmdpos(false);
1417 zshlex(); // skip 'case'
1418
1419 let word = match tok() {
1420 STRING_LEX => {
1421 let w = tokstr().unwrap_or_default();
1422 // c:1222 — `incmdpos = 1;` before the next zshlex so the
1423 // `in` keyword is recognised. c:1223-1225 — save+force
1424 // noaliases / nocorrect.
1425 set_incmdpos(true);
1426 let ona = noaliases();
1427 let onc = nocorrect();
1428 set_noaliases(true);
1429 set_nocorrect(1);
1430 zshlex();
1431 // Restore noaliases/nocorrect after the `in`-or-`{` token
1432 // is in hand; both are unconditionally restored at c:1238-1239.
1433 let restore = |ona: bool, onc: i32| {
1434 set_noaliases(ona);
1435 set_nocorrect(onc);
1436 };
1437 (w, ona, onc, restore)
1438 }
1439 _ => {
1440 zerr("expected word after case");
1441 return None;
1442 }
1443 };
1444 let (word, ona, onc, restore) = word;
1445
1446 skip_separators();
1447
1448 // Expect 'in' or {
1449 let use_brace = tok() == INBRACE_TOK;
1450 if tok() == STRING_LEX {
1451 let s = tokstr();
1452 if s.map(|s| s != "in").unwrap_or(true) {
1453 // c:1228-1232 — restore noaliases/nocorrect on error path.
1454 restore(ona, onc);
1455 zerr("expected 'in' in case");
1456 return None;
1457 }
1458 } else if !use_brace {
1459 restore(ona, onc);
1460 zerr("expected 'in' or '{' in case");
1461 return None;
1462 }
1463 // c:1236-1239 — `incasepat = 1; incmdpos = 0; noaliases = ona;
1464 // nocorrect = onc;` — set the case-pattern context AND restore
1465 // alias/correct state BEFORE the zshlex that consumes `in`/`{`.
1466 set_incasepat(1);
1467 set_incmdpos(false);
1468 restore(ona, onc);
1469 zshlex();
1470
1471 let mut arms = Vec::new();
1472 const MAX_ARMS: usize = 10_000;
1473
1474 loop {
1475 if arms.len() > MAX_ARMS {
1476 zerr("par_case: too many arms");
1477 break;
1478 }
1479
1480 // Set incasepat BEFORE skipping separators so lexer knows we're in case pattern context
1481 // This affects how [ and | are lexed
1482 set_incasepat(1);
1483
1484 skip_separators();
1485
1486 // Check for end
1487 // Note: 'esac' might be String "esac" if incasepat > 0 prevents reserved word recognition
1488 let is_esac = tok() == ESAC
1489 || (tok() == STRING_LEX && tokstr().map(|s| s == "esac").unwrap_or(false));
1490 if (use_brace && tok() == OUTBRACE_TOK) || (!use_brace && is_esac) {
1491 set_incasepat(0);
1492 zshlex();
1493 break;
1494 }
1495
1496 // Also break on EOF. c:Src/parse.c:1209 par_case requires
1497 // ESAC (or `}` in brace form) to close the block — reaching
1498 // ENDINPUT without either is a parse error (`case ... esack`
1499 // typo absorbs `esack` as part of the body and silently
1500 // terminates rc=0 otherwise). Bug #400.
1501 if tok() == ENDINPUT || tok() == LEXERR {
1502 set_incasepat(0);
1503 yyerror("unmatched `case'");
1504 break;
1505 }
1506
1507 // c:1250 — `if (tok == INPAR) zshlex();` — leading-paren
1508 // skip path. Used when the lexer DID return INPAR_TOK (e.g.
1509 // SHGLOB or incmdpos forced it). In the normal case-pattern
1510 // path the lexer absorbs `(...)` into one Stringg and the
1511 // hack at c:1322 strips the surrounding parens later. Both
1512 // paths land here.
1513 let leading_inpar_consumed = tok() == INPAR_TOK;
1514 if leading_inpar_consumed {
1515 zshlex();
1516 }
1517
1518 // c:1255-1262 — read pattern STRING. zsh's parser falls
1519 // straight into the STRING reader after the optional INPAR.
1520 // BAR before any pattern means empty string.
1521 let mut patterns = Vec::new();
1522 // Tracks whether the c:1322-1354 hack has fired (paren-
1523 // wrapped Stringg absorbed by the lexer). When it has, the
1524 // closing `)` was already absorbed — no separate OUTPAR
1525 // arm-close to consume.
1526 let mut absorbed_outpar = false;
1527 loop {
1528 if tok() == STRING_LEX {
1529 let s = tokstr();
1530 if s.as_deref().map(|s| s == "esac").unwrap_or(false) {
1531 break;
1532 }
1533 let mut str_val = s.unwrap_or_default();
1534
1535 // c:1322-1354 hack: when this is the first alt AND
1536 // the string starts with the Inpar marker, the lexer
1537 // absorbed the whole `(...)` as one token. Strip the
1538 // surrounding parens — the remainder IS the pattern.
1539 // The closing arm-paren was absorbed too, so we don't
1540 // expect a separate OUTPAR token afterward.
1541 if patterns.is_empty() && str_val.starts_with(crate::ported::zsh_h::Inpar) {
1542 let mut pct = 0i32;
1543 let mut chars: Vec<char> = str_val.chars().collect();
1544 let mut end_idx: Option<usize> = None;
1545 for (idx, &c) in chars.iter().enumerate() {
1546 if c == crate::ported::zsh_h::Inpar {
1547 pct += 1;
1548 } else if c == crate::ported::zsh_h::Outpar {
1549 pct -= 1;
1550 if pct == 0 {
1551 end_idx = Some(idx);
1552 break;
1553 }
1554 }
1555 }
1556 if let Some(idx) = end_idx {
1557 chars.remove(idx);
1558 chars.remove(0);
1559 str_val = chars.into_iter().collect();
1560 absorbed_outpar = true;
1561 }
1562 }
1563 patterns.push(str_val);
1564 set_incasepat(2);
1565 zshlex();
1566 // When the hack fired the closing `)` is already
1567 // consumed; don't read alt-`|` continuations either.
1568 if absorbed_outpar {
1569 break;
1570 }
1571 } else if tok() != BAR_TOK {
1572 break;
1573 }
1574
1575 if tok() == BAR_TOK {
1576 set_incasepat(1);
1577 zshlex();
1578 } else {
1579 break;
1580 }
1581 }
1582 set_incasepat(0);
1583
1584 // c:1305 — expect OUTPAR (arm-close) when the hack didn't
1585 // already swallow it.
1586 //
1587 // Bug #34 in docs/BUGS.md: the absorbed-pattern hack assumed
1588 // the leading `(` and the case-arm closing `)` were both
1589 // absorbed into the single STRING token. That's true for
1590 // `(x))` (the inner `)` closes the absorbed group; the second
1591 // `)` is the arm closer) only when the lexer slurps BOTH.
1592 // The Rust lexer slurps just `(x|y)` (one balanced pair); the
1593 // second `)` arrives as a separate OUTPAR_TOK that must still
1594 // be consumed as the case-arm closer. Detect and consume it.
1595 if !absorbed_outpar {
1596 if tok() != OUTPAR_TOK {
1597 zerr("expected ')' in case pattern");
1598 return None;
1599 }
1600 // c:Src/parse.c:1257-1258 — `if (tok != STRING)
1601 // YYERRORV(oecused);` C requires at least one pattern
1602 // STRING before `)`. zshrs accepted empty `case x in)`
1603 // and silently fell through to the next iteration with
1604 // an empty pattern arm, swallowing the rest of the
1605 // script. Reject the empty-pattern shape unless a
1606 // leading INPAR was consumed (the `(pat)` form has
1607 // already validated the pattern inside). Bug #161 in
1608 // docs/BUGS.md.
1609 if patterns.is_empty() && !leading_inpar_consumed {
1610 zerr("parse error near `)'");
1611 return None;
1612 }
1613 set_incmdpos(true);
1614 zshlex();
1615 // When the lexer emitted a separate INPAR_TOK at the
1616 // arm start (consumed via `leading_inpar_consumed`
1617 // above), the OUTPAR_TOK we just consumed closed the
1618 // alternation GROUP. If the next token is ALSO
1619 // OUTPAR_TOK, the user wrote `(pat))` and that second
1620 // `)` is the case-arm closer that still needs to be
1621 // consumed before body parsing. Bug #34 in
1622 // docs/BUGS.md.
1623 if leading_inpar_consumed && tok() == OUTPAR_TOK {
1624 zshlex();
1625 }
1626 } else if tok() == OUTPAR_TOK {
1627 // The lexer absorbed `(pat)` as the pattern but left the
1628 // case-arm closing `)` as a separate OUTPAR_TOK. Consume
1629 // it now so body parsing starts at the body, not at `)`.
1630 set_incmdpos(true);
1631 zshlex();
1632 } else {
1633 set_incmdpos(true);
1634 }
1635
1636 // Parse body. Pass end_tokens explicitly so the body's
1637 // parser stops at DSEMI/SEMIAMP/SEMIBAR/ESAC without
1638 // tripping parse_program_until's orphan-terminator check
1639 // (line 7131) which only fires when end_tokens is None.
1640 // Without this, a case arm whose body has no trailing
1641 // `;;` before `esac` (last arm — zsh accepts the dangling
1642 // form) produced "parse error near orphan terminator" on
1643 // the closing `esac`. zsh's par_case at parse.c:1318 sets
1644 // up the case-arm reader to recognize the same terminator
1645 // set; the Rust port was passing the implicit-None and
1646 // hitting the top-level orphan check.
1647 let body = parse_program_until(Some(&[DSEMI, SEMIAMP, SEMIBAR, ESAC]));
1648
1649 // Get terminator. Set incasepat=1 BEFORE the zshlex
1650 // advance so the next token (the next arm's pattern, like
1651 // `[a-z]`) gets tokenized in pattern context. Without
1652 // this, a `[`-prefixed pattern after the FIRST arm became
1653 // Inbrack instead of String and the pattern-loop bailed
1654 // out with "expected ')' in case pattern".
1655 let terminator = match tok() {
1656 DSEMI => {
1657 set_incasepat(1);
1658 zshlex();
1659 CaseTerm::Break
1660 }
1661 SEMIAMP => {
1662 set_incasepat(1);
1663 zshlex();
1664 CaseTerm::Continue
1665 }
1666 SEMIBAR => {
1667 set_incasepat(1);
1668 zshlex();
1669 CaseTerm::TestNext
1670 }
1671 _ => CaseTerm::Break,
1672 };
1673
1674 if !patterns.is_empty() {
1675 arms.push(CaseArm {
1676 patterns,
1677 body,
1678 terminator,
1679 });
1680 }
1681 }
1682
1683 Some(ZshCommand::Case(ZshCase { word, arms }))
1684}
1685
1686/// Parse if statement
1687/// Parse `if COND; then BODY; [elif COND; then BODY;]* [else BODY;] fi`.
1688/// Direct port of zsh/Src/parse.c:1411 `par_if`. The C source
1689/// emits WC_IF wordcodes per arm; zshrs builds an AST chain of
1690/// (cond, then_body) tuples plus an optional else_body.
1691fn par_if() -> Option<ZshCommand> {
1692 zshlex(); // skip 'if'
1693
1694 // Parse condition - stops at 'then' or '{' (zsh allows { instead of then)
1695 let cond = Box::new(parse_program_until(Some(&[THEN, INBRACE_TOK])));
1696
1697 skip_separators();
1698
1699 // Expect 'then' or {
1700 let use_brace = tok() == INBRACE_TOK;
1701 if tok() != THEN && !use_brace {
1702 zerr("expected 'then' or '{' after if condition");
1703 return None;
1704 }
1705 zshlex();
1706
1707 // Parse then-body - stops at else/elif/fi, or } if using brace syntax
1708 let then = if use_brace {
1709 let body = parse_program_until(Some(&[OUTBRACE_TOK]));
1710 if tok() == OUTBRACE_TOK {
1711 zshlex();
1712 }
1713 Box::new(body)
1714 } else {
1715 Box::new(parse_program_until(Some(&[ELSE, ELIF, FI])))
1716 };
1717
1718 // Parse elif and else. zsh accepts the SAME elif/else
1719 // continuations for both classic `then/fi` AND the brace
1720 // form `{ ... } elif ... { ... } else { ... }`. Direct port
1721 // of zsh/Src/parse.c:1417-1500 par_if where the elif/else
1722 // arms are checked AFTER the body close regardless of which
1723 // delimiter style opened the block. Without this, zinit's
1724 // if [[ -z $sel ]] { ... } else { ... }
1725 // hung the parser — `else` was treated as an external
1726 // command following the if-statement, which the lexer state
1727 // mis-classified inside the still-open function body.
1728 //
1729 // For brace-form: skip the `fi` consumption at the end of
1730 // the loop (no `fi` after a brace block), and `else` may
1731 // arrive after a `}` close. Skip-separators between the
1732 // body close and the elif/else token.
1733 let mut elif = Vec::new();
1734 let mut else_ = None;
1735 // c:Src/parse.c:1501-1504 — `if (tok != FI) { cmdpop(); YYERRORV; }`.
1736 // The C parser fails the whole if-construct when the body close
1737 // isn't seen. zshrs's loop fell through silently on ENDINPUT, so
1738 // `if true; then echo yes` (no `fi`) was accepted. Track whether
1739 // we hit a real terminator and error after the loop if not.
1740 let mut saw_terminator = use_brace; // `{ … }` body already consumed its close
1741
1742 {
1743 loop {
1744 skip_separators();
1745
1746 match tok() {
1747 ELIF => {
1748 zshlex();
1749 // elif condition stops at 'then' or '{'
1750 let econd = parse_program_until(Some(&[THEN, INBRACE_TOK]));
1751 skip_separators();
1752
1753 let elif_use_brace = tok() == INBRACE_TOK;
1754 if tok() != THEN && !elif_use_brace {
1755 zerr("expected 'then' after elif");
1756 return None;
1757 }
1758 zshlex();
1759
1760 // elif body stops at else/elif/fi or } if using braces
1761 let ebody = if elif_use_brace {
1762 let body = parse_program_until(Some(&[OUTBRACE_TOK]));
1763 if tok() == OUTBRACE_TOK {
1764 zshlex();
1765 saw_terminator = true; // brace close on elif
1766 }
1767 body
1768 } else {
1769 parse_program_until(Some(&[ELSE, ELIF, FI]))
1770 };
1771
1772 elif.push((econd, ebody));
1773 }
1774 ELSE => {
1775 zshlex();
1776 skip_separators();
1777
1778 let else_use_brace = tok() == INBRACE_TOK;
1779 if else_use_brace {
1780 zshlex();
1781 }
1782
1783 // else body stops at 'fi' or '}'
1784 else_ = Some(Box::new(if else_use_brace {
1785 let body = parse_program_until(Some(&[OUTBRACE_TOK]));
1786 if tok() == OUTBRACE_TOK {
1787 zshlex();
1788 saw_terminator = true;
1789 }
1790 body
1791 } else {
1792 parse_program_until(Some(&[FI]))
1793 }));
1794
1795 // Consume the 'fi' if present (not for brace syntax)
1796 if !else_use_brace && tok() == FI {
1797 zshlex();
1798 saw_terminator = true;
1799 }
1800 break;
1801 }
1802 FI => {
1803 // Brace-form `if ... { ... }` is already terminated by
1804 // its closing `}`. Do NOT consume `fi` here — it belongs
1805 // to an enclosing then-form if. Without this gate, a
1806 // brace-form if inside a then-form if's body would steal
1807 // the outer `fi`, leaving the outer parser to see
1808 // "unterminated if". This bit zinit-install.zsh:978
1809 // where `if (( … )) {` (brace) inside `if … ; then …`
1810 // (then-form) ate the outer `fi`.
1811 if use_brace {
1812 break;
1813 }
1814 zshlex();
1815 saw_terminator = true;
1816 break;
1817 }
1818 _ => break,
1819 }
1820 }
1821 }
1822
1823 if !saw_terminator {
1824 // c:1501-1504 — YYERRORV when the if-construct never closed.
1825 zerr("parse error: unterminated if");
1826 return None;
1827 }
1828
1829 Some(ZshCommand::If(ZshIf {
1830 cond,
1831 then,
1832 elif,
1833 else_,
1834 }))
1835}
1836
1837/// Parse while/until loop
1838/// Parse `while COND; do BODY; done` and `until COND; do BODY; done`.
1839/// Direct port of zsh/Src/parse.c:1521 `par_while`. The
1840/// `until` variant is the same loop with the condition negated.
1841fn par_while(until: bool) -> Option<ZshCommand> {
1842 zshlex(); // skip while/until
1843
1844 // c:1521-1551 par_while — the condition's parser must stop at
1845 // `do` or `{`. Without an explicit end-token set, parse_program
1846 // consumes the brace-form body as additional condition lists,
1847 // leaving parse_loop_body with nothing — `while (( i++ < 3 )) {
1848 // echo $i }` silently parsed but executed nothing.
1849 let cond = Box::new(parse_program_until(Some(&[DOLOOP, INBRACE_TOK])));
1850
1851 skip_separators();
1852 let body = parse_loop_body(false, false)?;
1853
1854 // c:Src/parse.c:1521-1551 par_while — WC_WHILE wordcode is tagged
1855 // with WC_WHILE_TYPE differentiating WHILE vs UNTIL at the wordcode
1856 // layer. The AST mirror in zsh_ast.rs has separate Until(ZshWhile)
1857 // and While(ZshWhile) variants; route by the `until` flag here so
1858 // downstream pattern-matchers can distinguish without poking
1859 // inside the payload's bool.
1860 let w = ZshWhile {
1861 cond,
1862 body: Box::new(body),
1863 until,
1864 };
1865 Some(if until {
1866 ZshCommand::Until(w) // c:1521 (WC_WHILE_TYPE = WC_WHILE_UNTIL)
1867 } else {
1868 ZshCommand::While(w) // c:1521 (WC_WHILE_TYPE = WC_WHILE_WHILE)
1869 })
1870}
1871
1872/// Parse repeat loop
1873/// Parse `repeat N; do BODY; done`. Direct port of
1874/// zsh/Src/parse.c:1565 `par_repeat`. The C source supports
1875/// the SHORTLOOPS short-form `repeat N CMD` (no do/done) — zshrs's
1876/// parser doesn't yet special-case that variant.
1877fn par_repeat() -> Option<ZshCommand> {
1878 zshlex(); // skip 'repeat'
1879
1880 let count = match tok() {
1881 STRING_LEX => {
1882 let c = tokstr().unwrap_or_default();
1883 zshlex();
1884 c
1885 }
1886 _ => {
1887 zerr("expected count after repeat");
1888 return None;
1889 }
1890 };
1891
1892 skip_separators();
1893 // c:1600 — par_repeat's short-form gate is wider: it unlocks
1894 // when SHORTLOOPS OR SHORTREPEAT is set (vs SHORTLOOPS alone for
1895 // for/while). Pass `is_repeat=true` so parse_loop_body
1896 // applies that widened gate.
1897 let body = parse_loop_body(false, true)?;
1898
1899 Some(ZshCommand::Repeat(ZshRepeat {
1900 count,
1901 body: Box::new(body),
1902 }))
1903}
1904
1905/// Parse (...) subshell
1906/// Parse a subshell `( ... )`. Direct port of zsh/Src/parse.c:1619
1907/// `par_subsh`. Body parses as a normal list; the subshell wrapper
1908/// fork-isolates execution in the executor.
1909fn par_subsh() -> Option<ZshCommand> {
1910 zshlex(); // skip (
1911 let prog = parse_program();
1912 if tok() == OUTPAR_TOK {
1913 zshlex();
1914 }
1915 Some(ZshCommand::Subsh(Box::new(prog)))
1916}
1917
1918/// Parse function definition
1919/// Parse `function NAME { BODY }` or `NAME () { BODY }`. Direct
1920/// port of zsh/Src/parse.c:1672 `par_funcdef`. zsh handles
1921/// the multiple keyword shapes (function FOO, FOO (), function FOO ()),
1922/// the optional `[fname1 fname2 ...]` for multi-name function defs,
1923/// and the `function FOO () { ... }` traditional/POSIX hybrid form.
1924fn par_funcdef() -> Option<ZshCommand> {
1925 zshlex(); // skip 'function'
1926
1927 let mut names = Vec::new();
1928 let mut tracing = false;
1929
1930 // Handle options like -T and function names. Two subtleties:
1931 //
1932 // 1. Flags: zsh's lexer encodes a leading `-` as
1933 // `zsh_h::Dash` (`\u{9b}`, `Src/zsh.h:182`) inside the String tokstr.
1934 // The previous `s.starts_with('-')` check failed for
1935 // `\u{9b}T`, so `function -T NAME { body }` slipped the
1936 // `-T` token into `names` and the function got registered
1937 // as `T` plus the intended `NAME`.
1938 //
1939 // 2. Body opener: zsh's lexer emits the opening `{` as a
1940 // String (not INBRACE_TOK) when it follows the String
1941 // NAME — the preceding name token resets incmdpos to
1942 // false, and only `{` immediately followed by `}` (the
1943 // empty-body case) gets promoted to Inbrace. The funcdef
1944 // parser must recognise the bare-`{` String as the body
1945 // opener; otherwise `function NAME { body }` falls through
1946 // to `_ => break`, no body parses, and the FuncDef never
1947 // lands in the AST. This is consistent with C zsh's
1948 // par_funcdef which knows it's in funcdef-header context
1949 // and accepts the brace either way.
1950 loop {
1951 match tok() {
1952 STRING_LEX => {
1953 let _ts_s = tokstr()?;
1954 let s = _ts_s.as_str();
1955 // c:1702 — `if ((*tokstr == Inbrace || *tokstr == '{') && !tokstr[1])`.
1956 // Body opener can be either the literal `{` (early-return
1957 // path at lex.c:1141-1144 / lex.rs LX2_INBRACE cmdpos
1958 // branch) or the Inbrace marker `\u{8f}` (lex.c:1420
1959 // post-switch add(c) where c was rewritten via lextok2).
1960 if s == "{" || s == "\u{8f}" {
1961 break;
1962 }
1963 let first = s.chars().next();
1964 if matches!(first, Some('-') | Some('+')) || matches!(first, Some(c) if c == Dash) {
1965 if s.contains('T') {
1966 tracing = true;
1967 }
1968 zshlex();
1969 continue;
1970 }
1971 // c:Src/exec.c::execcmd_args — function name tokens
1972 // in `function NAME { ... }` form go through globbing
1973 // at parse time. zsh's `function with[bracket] { ... }`
1974 // triggers a glob expansion of `with[bracket]`; no file
1975 // matches → "no matches found: NAME" + rc=1 (when
1976 // NOMATCH is set, the default). Bug #536: zshrs accepted
1977 // the literal bracket-containing name and registered
1978 // the function silently. Mirror C by probing for glob
1979 // metachars on the name; if present AND no file
1980 // matches, emit the diagnostic and abort the parse.
1981 let has_glob_chars = s.chars().any(|c| {
1982 matches!(
1983 c,
1984 '[' | ']'
1985 | '*'
1986 | '?'
1987 | crate::ported::zsh_h::Inbrack
1988 | crate::ported::zsh_h::Outbrack
1989 | crate::ported::zsh_h::Star
1990 | crate::ported::zsh_h::Quest
1991 )
1992 });
1993 if has_glob_chars && crate::ported::zsh_h::isset(crate::ported::zsh_h::NOMATCH) {
1994 let untok = crate::ported::lex::untokenize(s);
1995 let glob_result = crate::ported::glob::glob(&untok);
1996 if glob_result.is_empty() {
1997 crate::ported::utils::zerr(&format!("no matches found: {}", untok));
1998 crate::ported::utils::errflag.fetch_or(
1999 crate::ported::utils::ERRFLAG_ERROR,
2000 std::sync::atomic::Ordering::Relaxed,
2001 );
2002 return None;
2003 }
2004 }
2005 names.push(s.to_string());
2006 zshlex();
2007 }
2008 INBRACE_TOK | INOUTPAR | SEPER | NEWLIN => break,
2009 _ => break,
2010 }
2011 }
2012
2013 // Optional ()
2014 let saw_paren = tok() == INOUTPAR;
2015 if saw_paren {
2016 zshlex();
2017 }
2018
2019 skip_separators();
2020
2021 // Body opener: real Inbrace OR a String containing the literal `{`
2022 // (early-return path) OR a String containing the Inbrace marker
2023 // `\u{8f}` (bct++ path post-switch add). C parse.c:1702 handles
2024 // both string forms via `*tokstr == Inbrace || *tokstr == '{'`.
2025 let body_opener_is_string_brace =
2026 tok() == STRING_LEX && tokstr().map(|s| s == "{" || s == "\u{8f}").unwrap_or(false);
2027 if tok() == INBRACE_TOK || body_opener_is_string_brace {
2028 // Capture body_start BEFORE the lexer advances past the
2029 // first body token. After the previous zshlex consumed
2030 // `{`, lexer.pos points just past `{` (which is where the
2031 // body source starts). The next `zshlex()` would advance
2032 // past the first token (`echo`), making body_start land
2033 // mid-body and lose the first word — `typeset -f f` would
2034 // print `a; echo b` for `{ echo a; echo b }`.
2035 // c:Src/parse.c:1690-1706 — par_funcdef requires a clean
2036 // body-opener brace when the anonymous form `function {body}`
2037 // is used (no names AND no `()`). zsh's lexer keeps the `{`
2038 // as its own STRING token via the lex.c:1141-1144 early-
2039 // return at command position, but the body brace must be
2040 // followed by whitespace for the inner par_list to find a
2041 // matching OUTBRACE — without a separator, the closing `}`
2042 // gets merged into the last word (`X}`) and par_list ends
2043 // without OUTBRACE, which C zsh reports as `parse error near
2044 // \`}'`. zshrs's lexer has the same `bct` semantics; reject
2045 // here at the parse step so the funcdef doesn't silently run
2046 // with the stray `}` attached. With names or `()` present,
2047 // the body brace is allowed even without a separator
2048 // (`function name {body}` and `function () {body}` both work
2049 // in zsh). Bug #60 in docs/BUGS.md.
2050 if names.is_empty() && !saw_paren {
2051 // Peek the next source byte after the current lexer position
2052 // (`{` was just tokenized — `pos()` points just past it).
2053 // A whitespace separator means proper `function { body }`
2054 // form; anything else is the malformed `function {body}`
2055 // shape zsh rejects.
2056 let next_byte = input_slice(pos(), pos() + 1)
2057 .and_then(|s| s.bytes().next())
2058 .unwrap_or(b' ');
2059 if !matches!(next_byte, b' ' | b'\t' | b'\n' | b';') {
2060 zerr("parse error near `}'"); // c:Src/parse.c YYERRORV
2061 return None;
2062 }
2063 }
2064 let body_start = pos();
2065 zshlex();
2066 // c:Src/parse.c — func body terminates at OUTBRACE_TOK.
2067 // Explicit end-token keeps the inner parse from hitting the
2068 // top-level stray-`}` arm (#168). Bug #167 family.
2069 let body = parse_program_until(Some(&[OUTBRACE_TOK]));
2070 // c:Src/parse.c:1733-1737 — `if (tok != OUTBRACE) { cmdpop();
2071 // ... YYERRORV(oecused); }`. Hard-error on missing close brace
2072 // so `function f { echo hi` doesn't silently register a half-
2073 // parsed body. Bug #405.
2074 if tok() != OUTBRACE_TOK {
2075 zerr("parse error: expected `}'");
2076 return None;
2077 }
2078 let body_end = pos().saturating_sub(1);
2079 let body_source = input_slice(body_start, body_end)
2080 .map(|s| {
2081 // Lexer's pos() may have advanced past `}` AND skipped
2082 // trailing whitespace/newlines before returning the
2083 // OUTBRACE_TOK to us, so the slice up to `pos - 1`
2084 // includes the `}` and any preceding whitespace.
2085 // Strip the trailing `}` and any preceding structural
2086 // separator (`;`, `\n`) — C zsh's getpermtext walks
2087 // the wordcode list and emits each command WITHOUT
2088 // the trailing `;`/`\n` that lives in the input.
2089 let t = s.trim();
2090 let t = t.strip_suffix('}').unwrap_or(t).trim_end();
2091 let t = t
2092 .trim_end_matches(|c: char| c == ';' || c == '\n')
2093 .trim_end();
2094 t.to_string()
2095 })
2096 .filter(|s| !s.is_empty());
2097 zshlex();
2098
2099 // Anonymous form `function () { body } a b c` (with `()`) or
2100 // `function { body } a b c` (zsh-only shorthand, no `()`). No
2101 // name was collected. Mirror parse_anon_funcdef: synthesize
2102 // `_zshrs_anon_N`, collect trailing args, set auto_call_args
2103 // so compile_funcdef registers + immediately calls the
2104 // function with the args as positional params.
2105 if names.is_empty() {
2106 let mut args = Vec::new();
2107 while tok() == STRING_LEX {
2108 if let Some(s) = tokstr() {
2109 args.push(s);
2110 }
2111 zshlex();
2112 }
2113 static ANON_COUNTER: AtomicUsize = AtomicUsize::new(0);
2114 let n = ANON_COUNTER.fetch_add(1, Ordering::Relaxed);
2115 let name = format!("_zshrs_anon_kw_{}", n);
2116 return Some(ZshCommand::FuncDef(ZshFuncDef {
2117 names: vec![name],
2118 body: Box::new(body),
2119 tracing,
2120 auto_call_args: Some(args),
2121 body_source,
2122 }));
2123 }
2124
2125 Some(ZshCommand::FuncDef(ZshFuncDef {
2126 names,
2127 body: Box::new(body),
2128 tracing,
2129 auto_call_args: None,
2130 body_source,
2131 }))
2132 } else {
2133 // Short form
2134 par_list().map(|list| {
2135 ZshCommand::FuncDef(ZshFuncDef {
2136 names,
2137 body: Box::new(ZshProgram { lists: vec![list] }),
2138 tracing,
2139 auto_call_args: None,
2140 body_source: None,
2141 })
2142 })
2143 }
2144}
2145
2146/// Parse time command
2147/// Parse `time CMD` (POSIX time keyword). Direct port of
2148/// zsh/Src/parse.c:1787 `par_time`. The `time` keyword
2149/// times the execution of the following pipeline / cmd.
2150fn par_time() -> Option<ZshCommand> {
2151 zshlex(); // skip 'time'
2152
2153 // Check if there's a pipeline to time
2154 if tok() == SEPER || tok() == NEWLIN || tok() == ENDINPUT {
2155 Some(ZshCommand::Time(None))
2156 } else {
2157 let sublist = par_sublist();
2158 Some(ZshCommand::Time(sublist.map(Box::new)))
2159 }
2160}
2161
2162/// Port of `par_dinbrack(void)` from `Src/parse.c:1810`. Body
2163/// parser inside `[[ ... ]]` — calls `par_cond` to emit the
2164/// condition wordcode then advances past `]]`.
2165pub fn par_dinbrack() -> Option<()> {
2166 // c:1810
2167 set_incond(1); // c:1814
2168 set_incmdpos(false); // c:1815
2169 zshlex(); // c:1816
2170 let _ = par_cond(); // c:1817
2171 if tok() != DOUTBRACK {
2172 // c:1818
2173 yyerror("missing ]]");
2174 return None;
2175 }
2176 set_incond(0); // c:1820
2177 set_incmdpos(true); // c:1821
2178 zshlex(); // c:1822
2179 Some(())
2180}
2181
2182/// Parse a simple command
2183/// Parse a simple command (assignments + words + redirections).
2184/// Direct port of zsh/Src/parse.c:1836 `par_simple` —
2185/// the largest single function in parse.c. Handles ENVSTRING/
2186/// ENVARRAY assignments at command head, intermixed redirs,
2187/// typeset-style multi-assignment commands, and the trailing
2188/// inout-par `()` that converts a simple command into an inline
2189/// function definition.
2190fn par_simple(mut redirs: Vec<ZshRedir>) -> Option<ZshCommand> {
2191 let mut assigns = Vec::new();
2192 let mut words = Vec::new();
2193
2194 // c:1934-1974 — `{var}>file` brace-FD detection is wired
2195 // INSIDE the words loop below (parse.rs:4940-4956) rather than
2196 // here at the head. The words-loop site sees the tok=STRING
2197 // `{varname}` followed by a REDIROP and routes into par_redir
2198 // with redir.varid populated. C does it inline at the start of
2199 // each STRING/TYPESET arm iteration; functionally equivalent.
2200
2201 // c:1843-1846 — leading-NOCORRECT prefix: `nocorrect echo hello`
2202 // emits a NOCORRECT token at the start of par_simple. C sets
2203 // `nocorrect = 1` and skips past via the `zshlex();` at the
2204 // for-loop tail (c:1907). zshrs's par_simple (AST) had no
2205 // NOCORRECT arm so the token was silently dropped and the
2206 // following command line evaporated — `nocorrect echo hello`
2207 // produced empty output.
2208 while tok() == NOCORRECT {
2209 set_nocorrect(1); // c:1846
2210 zshlex(); // c:1907 (loop-tail zshlex)
2211 }
2212
2213 // Parse leading assignments
2214 while tok() == ENVSTRING || tok() == ENVARRAY {
2215 if let Some(assign) = parse_assign() {
2216 assigns.push(assign);
2217 }
2218 zshlex();
2219 }
2220
2221 // Parse words and redirections
2222 loop {
2223 match tok() {
2224 ENVSTRING | ENVARRAY => {
2225 // Mid-command assignment-shape arg under typeset
2226 // / declare / local / etc. (intypeset gates the
2227 // lexer to emit Envstring/Envarray for `name=val`
2228 // and `name=()` past the command name). Parse the
2229 // assignment, then emit a synthetic word
2230 // `NAME=value` (scalar) or `NAME=( … )` (array)
2231 // string so typeset's builtin arg list sees the
2232 // assignment-shape arg. Avoids the inline-env
2233 // scope path that mistakenly treats it like a
2234 // pre-cmd `X=Y cmd` assignment.
2235 if let Some(assign) = parse_assign() {
2236 let synthetic = match &assign.value {
2237 ZshAssignValue::Scalar(v) => format!("{}={}", assign.name, v),
2238 ZshAssignValue::Array(elems) => {
2239 // c:Src/builtin.c — assoc paren-init `h=( "" v
2240 // k2 v2 )` must preserve empty-string
2241 // elements (zsh stores key="" + value="v").
2242 // The bin_typeset paren-init splitter at
2243 // `builtin.rs:4358` recognizes the
2244 // REJOIN_SEP (`\u{1f}`) sentinel between
2245 // array elements and skips the leading/
2246 // trailing parens trim; using it here
2247 // round-trips empties end-to-end through
2248 // the synthetic-arg rebuild. Space-join
2249 // collapses adjacent empties (`(` + `""` +
2250 // `empty-val` becomes `( empty-val`) so
2251 // bin_typeset never sees the empty key.
2252 // Bug #93 in docs/BUGS.md.
2253 let mut buf = String::with_capacity(
2254 assign.name.len() + 4 + elems.iter().map(|e| e.len() + 1).sum::<usize>(),
2255 );
2256 buf.push_str(&assign.name);
2257 buf.push_str("=(");
2258 for elem in elems {
2259 buf.push('\u{1f}');
2260 buf.push_str(elem);
2261 }
2262 buf.push('\u{1f}');
2263 buf.push(')');
2264 buf
2265 }
2266 };
2267 words.push(synthetic);
2268 }
2269 zshlex();
2270 }
2271 STRING_LEX | TYPESET => {
2272 let s = tokstr();
2273 if let Some(s) = s {
2274 words.push(s);
2275 }
2276 // c:1929 — `incmdpos = 0;` so the next zshlex() does
2277 // not re-promote `{`/`[[`/reserved words at the
2278 // continuation position. Without this, `echo {a,b}`
2279 // re-lexes `{` as INBRACE_TOK (current-shell block)
2280 // and the brace expansion never reaches par_simple.
2281 set_incmdpos(false);
2282 // c:1931-1932 — `if (tok == TYPESET) intypeset = is_typeset = 1;`
2283 // Multi-assign `typeset a=1 b=2` relies on the lexer
2284 // re-emitting `b=2` as ENVSTRING; that path is gated
2285 // on `intypeset`. Without this, follow-on assignment
2286 // words arrive as STRING and the typeset builtin's
2287 // multi-assign form silently degrades.
2288 if tok() == TYPESET {
2289 set_intypeset(true);
2290 }
2291 zshlex();
2292 // Check for function definition foo() { ... }
2293 if words.len() == 1 && tok() == INOUTPAR {
2294 return parse_inline_funcdef(words.pop().unwrap());
2295 }
2296 // `{name}>file` named-fd redirect: the lexer doesn't
2297 // recognize this shape, so the bare word `{name}`
2298 // arrives as a String. If it matches `{IDENT}` and
2299 // the NEXT token is a redirop, pop it off as the
2300 // varid for that redir.
2301 if !words.is_empty() && IS_REDIROP(tok()) {
2302 let last = words.last().unwrap();
2303 let untoked = super::lex::untokenize(last);
2304 if untoked.starts_with('{') && untoked.ends_with('}') && untoked.len() > 2 {
2305 let name = &untoked[1..untoked.len() - 1];
2306 if !name.is_empty()
2307 && name.chars().all(|c| c == '_' || c.is_ascii_alphanumeric())
2308 && name
2309 .chars()
2310 .next()
2311 .map(|c| c == '_' || c.is_ascii_alphabetic())
2312 .unwrap_or(false)
2313 {
2314 let varid = name.to_string();
2315 words.pop();
2316 if let Some(mut redir) = par_redir() {
2317 redir.varid = Some(varid);
2318 redirs.push(redir);
2319 }
2320 continue;
2321 }
2322 }
2323 }
2324 }
2325 _ if IS_REDIROP(tok()) => {
2326 match par_redir() {
2327 Some(redir) => redirs.push(redir),
2328 None => break, // Error in redir parsing, stop
2329 }
2330 }
2331 INOUTPAR if !words.is_empty() => {
2332 // c:2055-2057 — `if (!isset(MULTIFUNCDEF) && argc > 1)
2333 // YYERROR(oecused);` — multi-name funcdef gate:
2334 // `f1 f2() { ... }` defines f1 AND f2 to the same
2335 // body, but only when MULTIFUNCDEF is set.
2336 if !isset(MULTIFUNCDEF) && words.len() > 1 {
2337 zerr("parse error: multiple names in function definition without MULTIFUNCDEF");
2338 return None;
2339 }
2340 // c:2061-2068 — `if (isset(EXECOPT) && hasalias &&
2341 // !isset(ALIASFUNCDEF) && argc && hasalias !=
2342 // input_hasalias()) { zwarn(...); YYERROR(...); }`
2343 // Alias-as-funcdef warning. zshrs's parser doesn't
2344 // track `hasalias` (alias-expansion provenance
2345 // during parse) yet, so `had_alias` stays false —
2346 // the gate is wired here as a marker so the canonical
2347 // C predicate is visible. Once alias-provenance lands,
2348 // swap `false` for the actual provenance compare.
2349 let had_alias = false;
2350 if isset(EXECOPT) && had_alias && !isset(ALIASFUNCDEF) && !words.is_empty() {
2351 crate::ported::utils::zwarn("defining function based on alias `(unknown)'");
2352 return None;
2353 }
2354 // foo() { ... } style function
2355 return parse_inline_funcdef(words.pop().unwrap());
2356 }
2357 _ => break,
2358 }
2359 }
2360
2361 if assigns.is_empty() && words.is_empty() && redirs.is_empty() {
2362 return None;
2363 }
2364
2365 Some(ZshCommand::Simple(ZshSimple {
2366 assigns,
2367 words,
2368 redirs,
2369 }))
2370}
2371
2372/// Parse a redirection
2373/// Parse a redirection (>file, <file, >>file, <<HEREDOC, etc.).
2374/// Direct port of zsh/Src/parse.c:2229 `par_redir`. Returns
2375/// a ZshRedir node carrying the operator type, fd, target word
2376/// (or here-doc body / pipe-redir command), and any `{var}` style
2377/// fd-binding parameter.
2378fn par_redir() -> Option<ZshRedir> {
2379 par_redir_with_id(None)
2380}
2381
2382/// Wire a here-document body onto the redirection token that
2383/// requested it. Direct port of zsh/Src/parse.c:2347
2384/// `setheredoc`. Called when a heredoc terminator has been
2385/// matched and the body is ready to be attached to the redir.
2386///
2387/// zshrs port note: zsh's setheredoc patches the wordcode
2388/// in-place via `pc[1] = ecstrcode(doc); pc[2] = ecstrcode(term);`.
2389/// zshrs threads heredoc bodies through `HereDocInfo` structs
2390/// attached inline during the post-parse `fill_heredoc_bodies` walk.
2391/// This method is the AST-side equivalent: writes back to the
2392/// matching redir node by index.
2393/// Port of `setheredoc(int pc, int type, char *str, char *termstr,
2394/// char *munged_termstr)` from `Src/parse.c:2347-2355`. Patches the
2395/// pending heredoc redir at `pc` with its body string + raw and
2396/// munged terminator forms.
2397pub fn setheredoc(pc: usize, redir_type: i32, doc: &str, term: &str, munged_term: &str) {
2398 // zshrs-only guard: AST-path heredocs use `pc = -1 as usize`
2399 // (i.e. `usize::MAX`) as a sentinel meaning "no wordcode slot to
2400 // patch". C never passes a negative pc since the wordcode emitter
2401 // is always active. Skip silently for the AST-only case.
2402 if pc == usize::MAX {
2403 return;
2404 }
2405 // c:2350 — `int varid = WC_REDIR_VARID(ecbuf[pc]) ? REDIR_VARID_MASK : 0;`
2406 let cur = ECBUF.with_borrow(|b| b.get(pc).copied().unwrap_or(0));
2407 let varid = if WC_REDIR_VARID(cur) != 0 {
2408 REDIR_VARID_MASK
2409 } else {
2410 0
2411 };
2412 // c:2351 — `ecbuf[pc] = WCB_REDIR(type | REDIR_FROM_HEREDOC_MASK | varid);`
2413 let new_header = WCB_REDIR((redir_type | REDIR_FROM_HEREDOC_MASK | varid) as wordcode);
2414 // c:2352 — `ecbuf[pc + 2] = ecstrcode(str);`
2415 let coded_str = ecstrcode(doc);
2416 // c:2353 — `ecbuf[pc + 3] = ecstrcode(termstr);`
2417 let coded_term = ecstrcode(term);
2418 // c:2354 — `ecbuf[pc + 4] = ecstrcode(munged_termstr);`
2419 let coded_munged = ecstrcode(munged_term);
2420 ECBUF.with_borrow_mut(|b| {
2421 b[pc] = new_header;
2422 b[pc + 2] = coded_str;
2423 b[pc + 3] = coded_term;
2424 b[pc + 4] = coded_munged;
2425 });
2426}
2427
2428/// Parse a wordlist for `for ... in WORDS;`. Direct port of
2429/// zsh/Src/parse.c:2362 `par_wordlist`. Reads STRING tokens
2430/// until the next SEPER / SEMI / NEWLIN.
2431pub fn par_wordlist() -> Vec<String> {
2432 let mut out = Vec::new();
2433 // parse.c:2362-2378 — collect STRINGs into the wordlist.
2434 while tok() == STRING_LEX {
2435 if let Some(text) = tokstr() {
2436 out.push(text);
2437 }
2438 zshlex();
2439 }
2440 out
2441}
2442
2443/// Parse a newline-separated wordlist. Direct port of
2444/// zsh/Src/parse.c:2379 `par_nl_wordlist`. Like
2445/// par_wordlist but tolerates leading/trailing newlines.
2446pub fn par_nl_wordlist() -> Vec<String> {
2447 // parse.c:2380-2381 — skip leading newlines.
2448 while tok() == NEWLIN {
2449 zshlex();
2450 }
2451 let out = par_wordlist();
2452 // parse.c:2395-2397 — skip trailing newlines.
2453 while tok() == NEWLIN {
2454 zshlex();
2455 }
2456 out
2457}
2458
2459/// `COND_SEP()` macro from `Src/parse.c:2433`. True when the current
2460/// token is a separator usable inside `[[ … ]]` (newline / semi /
2461/// `&`). C uses it to skip optional whitespace between cond terms.
2462#[inline]
2463pub fn COND_SEP() -> bool {
2464 matches!(tok(), NEWLIN | SEMI | AMPER)
2465}
2466
2467/// Parse [[ ... ]] conditional
2468/// Parse `[[ EXPR ]]` conditional expression. Direct port of
2469/// zsh/Src/parse.c:2409 `par_cond` (and helpers par_cond_1,
2470/// par_cond_2, par_cond_double, par_cond_triple, par_cond_multi
2471/// at parse.c:2434-2731). Expression operators: `||` `&&` `!`
2472/// + unary tests (-f, -d, -n, -z, etc.) + binary tests (=, !=,
2473/// <, >, ==, =~, -eq, -ne, -lt, -le, -gt, -ge, -nt, -ot, -ef).
2474fn par_cond() -> Option<ZshCommand> {
2475 // C par_dinbrack (parse.c:1810-1822) wraps the body parse with
2476 // `incond = 1; incmdpos = 0;` BEFORE the first zshlex past `[[`,
2477 // and resets to `incond = 0; incmdpos = 1;` after `]]`. Without
2478 // `incond = 1`, lex.c does not promote `]]` to DOUTBRACK and the
2479 // cond body bleeds past the close bracket — the parser then
2480 // sees `]]` as a separate STRING command. Every `if [[ ... ]]; then`
2481 // failed with `command not found: ]]` before this fix.
2482 set_incond(1);
2483 set_incmdpos(false);
2484 zshlex(); // skip [[
2485 // Empty cond `[[ ]]` is a parse error in zsh — emit the
2486 // diagnostic and return None so the caller produces a
2487 // non-zero exit. Without this, `[[ ]]` silently passed and
2488 // returned exit 0.
2489 if tok() == DOUTBRACK {
2490 zerr("parse error near `]]'");
2491 set_incond(0);
2492 set_incmdpos(true);
2493 zshlex();
2494 return None;
2495 }
2496 let cond = parse_cond_expr();
2497
2498 if tok() == DOUTBRACK {
2499 set_incond(0);
2500 set_incmdpos(true);
2501 zshlex();
2502 } else {
2503 // c:Src/parse.c:1818-1819 — `if (tok != DOUTBRACK)
2504 // YYERRORV(oecused);`. par_dinbrack hard-requires DOUTBRACK
2505 // after par_cond; anything else is a parse error and the
2506 // outer parser's yyerror at c:2747 emits `parse error near
2507 // \`%s'` using zshlextext. Bug #473: BAR (`|`) inside
2508 // `[[ ab == a|b ]]` slipped past par_cond_or (which only
2509 // checks DBAR), the cond returned cleanly, and then the
2510 // top-level parser interpreted BAR as a pipe — running `b`
2511 // as a command (security-relevant if pattern RHS is user
2512 // input). Mirror C: emit parse error and abort.
2513 let tok_text = match tok() {
2514 BAR_TOK => "|".to_string(),
2515 DBAR => "||".to_string(),
2516 AMPER => "&".to_string(),
2517 DAMPER => "&&".to_string(),
2518 SEMI => ";".to_string(),
2519 DSEMI => ";;".to_string(),
2520 NEWLIN | SEPER => String::new(),
2521 _ => tokstr().map(|s| crate::ported::lex::untokenize(&s)).unwrap_or_default(),
2522 };
2523 if tok_text.is_empty() {
2524 zerr("parse error");
2525 } else {
2526 zerr(&format!("parse error near `{}'", tok_text));
2527 }
2528 set_incond(0);
2529 set_incmdpos(true);
2530 return None;
2531 }
2532
2533 cond.map(ZshCommand::Cond)
2534}
2535
2536/// Port of `par_cond_1(void)` from `Src/parse.c:2434`. Parses one
2537/// `||`-separated cond expression. Emits `WCB_COND(COND_AND, …)`
2538/// when an `&&` is found and recurses.
2539pub fn par_cond_1() -> i32 {
2540 // c:2434
2541
2542 let p = ECUSED.with(|c| c.get()) as usize;
2543 let r = par_cond_2();
2544 while COND_SEP() {
2545 condlex();
2546 }
2547 if tok() == DAMPER {
2548 condlex();
2549 while COND_SEP() {
2550 condlex();
2551 }
2552 ecispace(p, 1);
2553 par_cond_1();
2554 let ecused = ECUSED.with(|c| c.get()) as usize;
2555 ECBUF.with(|c| {
2556 c.borrow_mut()[p] = WCB_COND(COND_AND as u32, (ecused - 1 - p) as u32);
2557 });
2558 return 1;
2559 }
2560 r
2561}
2562
2563/// Port of `par_cond_2(void)` from `Src/parse.c:2476`. The heavy
2564/// cond-term parser: handles `! cond`, `(cond)`, unary `[ -X arg ]`,
2565/// binary `[ A op B ]`, and `[ A op1 B op2 C … ]` n-ary chains.
2566pub fn par_cond_2() -> i32 {
2567 // c:2476
2568 // `n_testargs` only applies in `testlex` mode (=== /bin/test
2569 // compat). zshrs has no testlex yet, so always 0.
2570 let n_testargs: i32 = 0;
2571
2572 // c:2481 — handled inline; this Rust port skips the n_testargs
2573 // arm since zshrs invokes par_cond via [[ ... ]] only.
2574
2575 while COND_SEP() {
2576 condlex();
2577 }
2578 if tok() == BANG_TOK {
2579 // c:2522 — `[[ ! cond ]]`
2580 condlex();
2581 ecadd(WCB_COND(COND_NOT as u32, 0));
2582 return par_cond_2();
2583 }
2584 if tok() == INPAR_TOK {
2585 // c:2533 — `[[ (cond) ]]`
2586 condlex();
2587 while COND_SEP() {
2588 condlex();
2589 }
2590 let r = par_cond();
2591 while COND_SEP() {
2592 condlex();
2593 }
2594 if tok() != OUTPAR_TOK {
2595 yyerror("missing )");
2596 return 0;
2597 }
2598 condlex();
2599 return r.map_or(0, |_| 1);
2600 }
2601 let s1 = tokstr().unwrap_or_default();
2602 // c:2549 — `dble = (s1 && IS_DASH(*s1) && (!n_testargs ||
2603 // strspn(s1+1, "abcd...") == 1) && !s1[2]);` — IS_DASH covers
2604 // BOTH `-` and Dash (`\u{9b}`). The raw tokstr inside `[[ ... ]]`
2605 // carries Dash as a marker byte, so `starts_with('-')` alone
2606 // matches only ASCII dashes and misses every `-z`, `-d`, `-r`
2607 // etc. — every such cond emitted the AST-only `condition
2608 // expected` error from par_cond_double. Use IS_DASH and count
2609 // chars (Dash is a single code point) instead of bytes.
2610 let s1_chars: Vec<char> = s1.chars().collect();
2611 let dble = !s1_chars.is_empty()
2612 && IS_DASH(s1_chars[0])
2613 && s1_chars.len() == 2
2614 && "abcdefghknoprstuvwxzLONGS".contains(s1_chars[1]);
2615 if tok() != STRING_LEX {
2616 if !s1.is_empty() && tok() != LEXERR && (!dble || n_testargs != 0) {
2617 // c:2486-2497 — `if (n_testargs == 1)` block: under
2618 // POSIXBUILTINS-off, `[ -t ]` rewrites to `[ -t 1 ]`
2619 // (ksh behavior). The C gate is `unset(POSIXBUILTINS)
2620 // && check_cond(s1, "t")`. zshrs's parser has
2621 // n_testargs=0 (no testlex), so this rewrite path is
2622 // unreachable from zshrs's [[ ]] / [ ] entry points;
2623 // wired here as a marker for parity. When testlex is
2624 // ported the call below activates.
2625 if n_testargs == 1 && unset(POSIXBUILTINS) && check_cond(&s1, "t") {
2626 condlex();
2627 return par_cond_double(&s1, "1");
2628 }
2629 // c:2557 — `[[ STRING ]]` re-interpreted as `[[ -n STRING ]]`.
2630 condlex();
2631 while COND_SEP() {
2632 condlex();
2633 }
2634 return par_cond_double("-n", &s1);
2635 }
2636 yyerror("condition expected");
2637 return 0;
2638 }
2639 condlex();
2640 while COND_SEP() {
2641 condlex();
2642 }
2643 if tok() == INANG_TOK || tok() == OUTANG_TOK {
2644 // c:2576 — `<` / `>` string compare.
2645 let xtok = tok();
2646 condlex();
2647 while COND_SEP() {
2648 condlex();
2649 }
2650 if tok() != STRING_LEX {
2651 yyerror("string expected");
2652 return 0;
2653 }
2654 let s3 = tokstr().unwrap_or_default();
2655 condlex();
2656 while COND_SEP() {
2657 condlex();
2658 }
2659 let op = if xtok == INANG_TOK {
2660 COND_STRLT
2661 } else {
2662 COND_STRGTR
2663 };
2664 ecadd(WCB_COND(op as u32, 0));
2665 ecstr(&s1);
2666 ecstr(&s3);
2667 return 1;
2668 }
2669 if tok() != STRING_LEX {
2670 // c:2592 — only one operand seen → `[ -n s1 ]`.
2671 if tok() != LEXERR {
2672 if !dble || n_testargs != 0 {
2673 return par_cond_double("-n", &s1);
2674 }
2675 return par_cond_multi(&s1, &[]);
2676 }
2677 yyerror("syntax error");
2678 return 0;
2679 }
2680 let s2 = tokstr().unwrap_or_default();
2681 set_incond(incond() + 1);
2682 condlex();
2683 while COND_SEP() {
2684 condlex();
2685 }
2686 set_incond(incond() - 1);
2687 // c:Src/parse.c:2598-2600 — `if (!n_testargs) dble = (s2 &&
2688 // IS_DASH(*s2) && !s2[2]);` — RECOMPUTE dble based on s2 once
2689 // it's been read, so `[[ A -X B ]]` is treated as a 2-arg cond
2690 // `[ -X B ]` (par_cond_double) rather than a 3-arg triple. This
2691 // is what routes `[[ "" -a "x" ]]` to par_cond_double("", "-a")
2692 // → COND_ERROR "parse error: condition expected: ". Without
2693 // this, the original `dble` from s1 stayed false, the parser
2694 // grabbed s3 and built COND_MODI silently. parity bug #25.
2695 let s2_chars: Vec<char> = s2.chars().collect();
2696 let dble = !s2_chars.is_empty() && IS_DASH(s2_chars[0]) && s2_chars.len() == 2;
2697 if tok() == STRING_LEX && !dble {
2698 let s3 = tokstr().unwrap_or_default();
2699 condlex();
2700 while COND_SEP() {
2701 condlex();
2702 }
2703 if tok() == STRING_LEX {
2704 // c:2615 — n-ary `[ A op B C D ... ]`.
2705 let mut l: Vec<String> = vec![s2, s3];
2706 while tok() == STRING_LEX {
2707 l.push(tokstr().unwrap_or_default());
2708 condlex();
2709 while COND_SEP() {
2710 condlex();
2711 }
2712 }
2713 return par_cond_multi(&s1, &l);
2714 }
2715 return par_cond_triple(&s1, &s2, &s3);
2716 }
2717 par_cond_double(&s1, &s2)
2718}
2719
2720/// Port of `par_cond_double(char *a, char *b)` from `Src/parse.c:2626`.
2721/// Emits wordcode for unary cond `[ -X b ]` or modular `[ -mod b ]`.
2722pub fn par_cond_double(a: &str, b: &str) -> i32 {
2723 // c:2628 — `if (!IS_DASH(a[0]) || !a[1])` — char-based, since
2724 // Dash is a single code point (`\u{9b}`) and `a.len() < 2` on
2725 // BYTES would still pass for "-z" but fail for the marker form
2726 // `\u{9b}z` (2 bytes). Walk by chars.
2727 let ac: Vec<char> = a.chars().collect();
2728 if ac.is_empty() || !IS_DASH(ac[0]) || ac.len() < 2 {
2729 // c:Src/parse.c:2629 COND_ERROR macro expansion:
2730 // zwarn(...); herrflush(); errflag |= ERRFLAG_ERROR;
2731 // YYERROR(ecused) /* sets tok = LEXERR */
2732 // The YYERROR portion is critical — without it the outer
2733 // parser keeps walking the wordcode and execution proceeds
2734 // (e.g. `[[ "" -a "x" ]] && echo m || echo n` runs the
2735 // `|| echo n` branch). Setting LEXERR aborts the upper
2736 // parse so the whole line is rejected, matching zsh's
2737 // observable behavior of stdout="" on parse error.
2738 zerr(&format!("parse error: condition expected: {}", a));
2739 errflag.fetch_or(crate::ported::zsh_h::ERRFLAG_ERROR, Ordering::SeqCst);
2740 set_tok(LEXERR);
2741 return 1;
2742 }
2743 // c:2630 — `else if (!a[2] && strspn(a+1, "abcd...zhLONGS") == 1)`
2744 let unary_set = "abcdefgknoprstuvwxzhLONGS";
2745 if ac.len() == 2 && unary_set.contains(ac[1]) {
2746 // c:2631 — `ecadd(WCB_COND(a[1], 0));` uses the raw cond-op
2747 // letter byte as the opcode payload. Use the ASCII char's
2748 // code-point value directly — every letter in `unary_set`
2749 // fits in 7 bits.
2750 ecadd(WCB_COND(ac[1] as u32, 0));
2751 ecstr(b);
2752 } else {
2753 ecadd(WCB_COND(COND_MOD as u32, 1));
2754 ecstr(a);
2755 ecstr(b);
2756 }
2757 1
2758}
2759
2760/// Port of `get_cond_num(char *tst)` from `Src/parse.c:2643`. Returns
2761/// the index of `tst` in `{"nt","ot","ef","eq","ne","lt","gt","le","ge"}`
2762/// or `-1` if not a recognized binary cond operator.
2763pub fn get_cond_num(tst: &str) -> i32 {
2764 // c:2643
2765 const CONDSTRS: [&str; 9] = [
2766 "nt", "ot", "ef", "eq", "ne", "lt", "gt", "le", "ge", // c:2647
2767 ];
2768 for (i, &c) in CONDSTRS.iter().enumerate() {
2769 if c == tst {
2770 return i as i32; // c:2654
2771 }
2772 }
2773 -1 // c:2656
2774}
2775
2776/// par_time's `static int inpartime` guard at C parse.c:1038
2777/// preventing infinite recursion on `time time foo`. The wordcode
2778/// path keeps this as a thread_local since C uses a function-level
2779/// `static int` (per-process; per-evaluator semantically matches).
2780thread_local! {
2781 static PARSER_INPARTIME: std::cell::Cell<bool> = const { std::cell::Cell::new(false) };
2782}
2783
2784/// Port of `par_cond_triple(char *a, char *b, char *c)` from
2785/// `Src/parse.c:2659`. Emits wordcode for the binary forms
2786/// `[ A op B ]` — `=` / `==` / `!=` / `<` / `>` / `=~` / `-X`.
2787///
2788/// C does `(b[0] == Equals || b[0] == '=')` etc., matching BOTH the
2789/// raw ASCII operator char AND its tokenized marker form per
2790/// `Src/zsh.h:159-194`:
2791/// Equals = `\u{8d}`, Outang = `\u{95}`, Inang = `\u{94}`,
2792/// Tilde = `\u{98}`, Bang = `\u{9c}`, Dash = `\u{9b}`.
2793/// Inside `[[ ... ]]` the lexer emits the marker bytes — comparing
2794/// against literal-only `b"=="` misses every cond op.
2795/// (The previous Rust port had the doc comment values wrong:
2796/// Outang=0x8e was actually Bar; Inang=0x91 was Inbrack;
2797/// Tilde=0x96 was OutangProc; Bang=0x8b was Outparmath. The code
2798/// itself uses the correct const names, so this was a docs-only fix.)
2799pub fn par_cond_triple(a: &str, b: &str, c: &str) -> i32 {
2800 // c:2659
2801 let bc: Vec<char> = b.chars().collect();
2802 let is_eq = |ch: char| ch == '=' || ch == Equals;
2803 let is_gt = |ch: char| ch == '>' || ch == Outang;
2804 let is_lt = |ch: char| ch == '<' || ch == Inang;
2805 let is_tilde = |ch: char| ch == '~' || ch == Tilde;
2806 let is_bang = |ch: char| ch == '!' || ch == Bang;
2807
2808 // c:2663 — `(b[0] == Equals || b[0] == '=') && !b[1]` → `=` (single).
2809 if bc.len() == 1 && is_eq(bc[0]) {
2810 ecadd(WCB_COND(COND_STREQ as u32, 0));
2811 ecstr(a);
2812 ecstr(c);
2813 let np = ECNPATS.with(|cc| {
2814 let v = cc.get();
2815 cc.set(v + 1);
2816 v
2817 }) as u32;
2818 ecadd(np);
2819 return 1;
2820 }
2821 // c:2668-2673 — `(t0 = b[0]=='>' || Outang) || b[0]=='<' || Inang`.
2822 if bc.len() == 1 && (is_gt(bc[0]) || is_lt(bc[0])) {
2823 let op = if is_gt(bc[0]) {
2824 COND_STRGTR
2825 } else {
2826 COND_STRLT
2827 };
2828 ecadd(WCB_COND(op as u32, 0));
2829 ecstr(a);
2830 ecstr(c);
2831 let np = ECNPATS.with(|cc| {
2832 let v = cc.get();
2833 cc.set(v + 1);
2834 v
2835 }) as u32;
2836 ecadd(np);
2837 return 1;
2838 }
2839 // c:2674-2679 — `==` STRDEQ.
2840 if bc.len() == 2 && is_eq(bc[0]) && is_eq(bc[1]) {
2841 ecadd(WCB_COND(COND_STRDEQ as u32, 0));
2842 ecstr(a);
2843 ecstr(c);
2844 let np = ECNPATS.with(|cc| {
2845 let v = cc.get();
2846 cc.set(v + 1);
2847 v
2848 }) as u32;
2849 ecadd(np);
2850 return 1;
2851 }
2852 // c:2680-2684 — `!=` STRNEQ.
2853 if bc.len() == 2 && is_bang(bc[0]) && is_eq(bc[1]) {
2854 ecadd(WCB_COND(COND_STRNEQ as u32, 0));
2855 ecstr(a);
2856 ecstr(c);
2857 let np = ECNPATS.with(|cc| {
2858 let v = cc.get();
2859 cc.set(v + 1);
2860 v
2861 }) as u32;
2862 ecadd(np);
2863 return 1;
2864 }
2865 // c:2685-2691 — `=~` REGEX (no pattern slot — implicit COND_MODI).
2866 if bc.len() == 2 && is_eq(bc[0]) && is_tilde(bc[1]) {
2867 ecadd(WCB_COND(COND_REGEX as u32, 0));
2868 ecstr(a);
2869 ecstr(c);
2870 return 1;
2871 }
2872 // c:2692-2702 — `-OP` numeric-or-modular cond (e.g. `-eq`, `-nt`).
2873 if !bc.is_empty() && IS_DASH(bc[0]) {
2874 let rest: String = bc[1..].iter().collect();
2875 let t = get_cond_num(&rest);
2876 if t > -1 {
2877 ecadd(WCB_COND((t + COND_NT) as u32, 0));
2878 ecstr(a);
2879 ecstr(c);
2880 return 1;
2881 }
2882 ecadd(WCB_COND(COND_MODI as u32, 0));
2883 ecstr(b);
2884 ecstr(a);
2885 ecstr(c);
2886 return 1;
2887 }
2888 // c:2703-2707 — `-mod A B C` modular cond on `a`.
2889 let ac: Vec<char> = a.chars().collect();
2890 if !ac.is_empty() && IS_DASH(ac[0]) && ac.len() > 1 {
2891 ecadd(WCB_COND(COND_MOD as u32, 2));
2892 ecstr(a);
2893 ecstr(b);
2894 ecstr(c);
2895 return 1;
2896 }
2897 zerr(&format!("condition expected: {}", b));
2898 1
2899}
2900
2901/// Port of `par_cond_multi(char *a, LinkList l)` from `Src/parse.c:2716`.
2902/// Emits wordcode for `[ -OP A B C … ]` n-ary cond (alternation).
2903pub fn par_cond_multi(a: &str, l: &[String]) -> i32 {
2904 // c:2716 — `if (!IS_DASH(a[0]) || !a[1])`; same Dash/`-` dual
2905 // matching as par_cond_double, char-walked because Dash is a
2906 // single code point.
2907 let ac: Vec<char> = a.chars().collect();
2908 if ac.is_empty() || !IS_DASH(ac[0]) || ac.len() < 2 {
2909 zerr(&format!("condition expected: {}", a));
2910 return 1;
2911 }
2912 ecadd(WCB_COND(COND_MOD as u32, l.len() as u32));
2913 ecstr(a);
2914 for item in l {
2915 ecstr(item);
2916 }
2917 1
2918}
2919
2920/// Emit a parser-level error. Direct port of zsh/Src/parse.c
2921/// 2733-2766 `yyerror`. C version fills a per-event error buffer
2922/// and sets errflag. zshrs pushes onto errors which the
2923/// caller drains via parse()'s Result return.
2924/// WARNING: param-name divergence — Rust takes `&str message`, C takes
2925/// `int noerr`. The Rust callers pass user-meaningful messages
2926/// (`"missing ]]"`, `"condition expected"`); the C body collects the
2927/// offending token via `dupstring(zshlextext)` for the error string.
2928/// This Rust adapter:
2929/// 1. Uses the caller-supplied message verbatim if non-empty.
2930/// 2. Skips the `histdone & HISTFLAG_NOEXEC` and `errflag & ERRFLAG_INT`
2931/// gates per c:2746 (printing only when neither is set) — the
2932/// ERRFLAG_INT check is the load-bearing guard.
2933/// 3. Sets ERRFLAG_ERROR per c:2753 (noerr=0 path always taken).
2934pub fn yyerror(msg: &str) {
2935 // c:2733
2936 let int_flagged = (errflag.load(Ordering::SeqCst) & crate::ported::zsh_h::ERRFLAG_INT) != 0;
2937 if !int_flagged {
2938 // c:2746
2939 let body = if msg.is_empty() {
2940 "parse error".to_string()
2941 }
2942 // c:2751
2943 else {
2944 format!("parse error: {msg}")
2945 }; // c:2748
2946 zwarnnam("zsh", &body);
2947 }
2948 // c:2753 — `if (!noerr && noerrs != 2) errflag |= ERRFLAG_ERROR;`
2949 errflag.fetch_or(crate::ported::zsh_h::ERRFLAG_ERROR, Ordering::SeqCst);
2950}
2951
2952// ============================================================
2953// Eprog runtime ops (parse.c:2767-2853)
2954//
2955// dupeprog / useeprog / freeeprog are zsh's reference-counting
2956// helpers for executable programs. zshrs's AST is owned by
2957// value (Rust ownership); cloning is a tree-deep copy via
2958// Clone, "use" is a no-op (the executor borrows the AST), and
2959// "free" is automatic on drop.
2960// ============================================================
2961
2962/// Duplicate an Eprog. Direct port of zsh/Src/parse.c:2813
2963/// Port of `Eprog dupeprog(Eprog p, int heap)` from
2964/// `Src/parse.c:2767`. Deep-copies the wordcode array, string
2965/// table, and pattern-prog slots. `dummy_eprog` is returned
2966/// unchanged. `heap`-allocated copies get `nref = -1` (never
2967/// freed); real ones get `nref = 1`.
2968pub fn dupeprog(p: &eprog, heap: bool) -> eprog {
2969 // c:2774-2775 — `if (p == &dummy_eprog) return p;` — caller-
2970 // observable identity in C uses a pointer compare; Rust's
2971 // equivalent is "if it has the dummy's shape (single WCB_END
2972 // word and no strs), return a copy of the same shape".
2973 // c:2796-2797 — `for (i = r->npats; i--; pp++) *pp = dummy_patprog1;`
2974 // C uses `dummy_patprog1` as a placeholder; the Rust port has
2975 // `Vec<Patprog>` (Box<patprog>) — synthesize an equivalent zero-
2976 // initialized patprog for each slot (resolved later by
2977 // pattern.c::patcompile-on-first-use).
2978 let dummy_pat = || crate::ported::zsh_h::patprog {
2979 startoff: 0,
2980 size: 0,
2981 mustoff: 0,
2982 patmlen: 0,
2983 globflags: 0,
2984 globend: 0,
2985 flags: 0,
2986 patnpar: 0,
2987 patstartch: 0,
2988 };
2989 let r = eprog {
2990 // c:2778 — `flags = (heap ? EF_HEAP : EF_REAL) | (p->flags & EF_RUN);`
2991 flags: (if heap { EF_HEAP } else { EF_REAL }) | (p.flags & EF_RUN),
2992 len: p.len,
2993 npats: p.npats,
2994 // c:2787 — `nref = heap ? -1 : 1;`
2995 nref: if heap { -1 } else { 1 },
2996 prog: p.prog.clone(),
2997 strs: p.strs.clone(),
2998 pats: (0..p.npats).map(|_| Box::new(dummy_pat())).collect(),
2999 shf: None,
3000 dump: None,
3001 };
3002 r
3003}
3004
3005/// Port of `void useeprog(Eprog p)` from `Src/parse.c:2813`.
3006/// `if (p && p != &dummy_eprog && p->nref >= 0) p->nref++;` —
3007/// pin a real (non-heap, non-dummy) Eprog so it survives the
3008/// next `freeeprog`.
3009pub fn useeprog(p: &mut eprog) {
3010 // c:2815 — `if (p && p != &dummy_eprog && p->nref >= 0)`
3011 if p.nref >= 0 {
3012 p.nref += 1; // c:2816
3013 }
3014}
3015
3016/// Port of `void freeeprog(Eprog p)` from `Src/parse.c:2823`.
3017/// Refcount-decrement; when it hits zero, drops the pattern progs,
3018/// decrements the dump refcount if any, and releases the eprog.
3019/// `dummy_eprog` is never freed. Heap-eprogs (`nref < 0`) are
3020/// never freed either — they live as long as the heap arena.
3021pub fn freeeprog(p: &mut eprog) {
3022 // c:2829 — `if (p && p != &dummy_eprog) { ... }`
3023 if p.nref > 0 {
3024 p.nref -= 1; // c:2832
3025 if p.nref == 0 {
3026 // c:2833-2840 — drop pats, dump refcount, then the eprog.
3027 // Rust's Drop handles the per-field cleanup; we just
3028 // need to decrement the dump count first.
3029 if let Some(dump) = p.dump.take() {
3030 let dumped = (*dump).clone();
3031 decrdumpcount(&dumped); // c:2837
3032 }
3033 p.prog.clear();
3034 p.strs = None;
3035 p.pats.clear();
3036 }
3037 }
3038}
3039
3040// =============================================================================
3041// Wordcode read helpers — used by text.rs's `gettext2` and exec dispatch
3042// to walk a compiled Eprog without re-running the parser. These are the
3043// only `Src/parse.c` functions ported so far in this file; the recursive-
3044// descent parser (par_event / par_list / par_cmd / par_*) follows
3045// below as free ported at module scope.
3046// =============================================================================
3047
3048/// Port of `ecgetstr(Estate s, int dup, int *tokflag)` from `Src/parse.c:2855`.
3049/// `s->pc` advances through the wordcode buffer; `s->strs` indexes the
3050/// string pool. Returns the interned string (or a 1-3-char literal
3051/// inlined directly into the wordcode word).
3052pub fn ecgetstr(s: &mut estate, dup: i32, tokflag: Option<&mut i32>) -> String {
3053 let prog = &s.prog.prog;
3054 if s.pc >= prog.len() {
3055 return String::new();
3056 }
3057 let c = prog[s.pc]; // c:2858 `wordcode c = *s->pc++;`
3058 s.pc += 1;
3059 if let Some(tf) = tokflag {
3060 *tf = i32::from((c & 1) != 0); // c:2880 `*tokflag = (c & 1);`
3061 }
3062 if c == 6 || c == 7 {
3063 // c:2861 `if (c == 6 || c == 7) r = "";`
3064 return String::new();
3065 }
3066 let r: String = if (c & 2) != 0 {
3067 // c:2862 — `else if (c & 2)`
3068 // c:2863-2868 — 3-byte inline string packed into the wordcode
3069 // word; followed by `buf[3] = '\0'; r = dupstring(buf);`.
3070 // C's `dupstring` uses `strlen(buf)` which TRUNCATES at the
3071 // first NUL byte — short strings of 1 or 2 chars get padded
3072 // with NULs and truncated cleanly. The previous Rust port
3073 // used `retain(|&x| x != 0)` which would silently SPLICE OUT
3074 // an interior NUL (e.g. `[a, 0, b]` → "ab"), diverging from
3075 // C's strlen-truncate (`[a, 0, b]` → "a"). Fix: truncate at
3076 // first NUL to match C exactly.
3077 let b0 = ((c >> 3) & 0xff) as u8;
3078 let b1 = ((c >> 11) & 0xff) as u8;
3079 let b2 = ((c >> 19) & 0xff) as u8;
3080 let v = [b0, b1, b2];
3081 let end = v.iter().position(|&x| x == 0).unwrap_or(v.len()); // c:2869 strlen(buf)
3082 String::from_utf8_lossy(&v[..end]).into_owned()
3083 } else {
3084 // c:2877 `else r = s->strs + (c >> 2);`
3085 let off = (c >> 2) as usize + s.strs_offset;
3086 let strs_bytes = s.strs.as_deref().unwrap_or("").as_bytes();
3087 if off >= strs_bytes.len() {
3088 String::new()
3089 } else {
3090 let tail = &strs_bytes[off..];
3091 let end = tail.iter().position(|&b| b == 0).unwrap_or(tail.len());
3092 String::from_utf8_lossy(&tail[..end]).into_owned()
3093 }
3094 };
3095 // c:2891 `return ((dup == EC_DUP || (dup && (c & 1))) ? dupstring(r) : r);`
3096 // Rust owns the String already; `dup` flag has no observable effect.
3097 let _ = (dup, EC_DUP, EC_NODUP);
3098 r
3099}
3100
3101// ============================================================
3102// Wordcode runtime getters (parse.c:2853-3060)
3103//
3104// Direct ports of the wordcode-read helpers (ecrawstr,
3105// ecgetstr, ecgetarr, ecgetredirs, ecgetlist, eccopyredirs).
3106// Read packed wordcode out of an Eprog at execution time.
3107// Used by exec_wordcode and the wordcode-walking dispatch in
3108// src/vm_helper.
3109// ============================================================
3110
3111/// Port of `ecrawstr(Eprog p, Wordcode pc, int *tokflag)` from
3112/// `Src/parse.c:2891`. Like `ecgetstr` but reads at the given pc
3113/// without advancing — caller steps `pc` separately.
3114pub fn ecrawstr(p: &eprog, pc: usize, tokflag: Option<&mut i32>) -> String {
3115 if pc >= p.prog.len() {
3116 return String::new();
3117 }
3118 let c = p.prog[pc]; // c:2894
3119 if let Some(tf) = tokflag {
3120 *tf = i32::from((c & 1) != 0); // c:2898/2906/2912
3121 }
3122 if c == 6 || c == 7 {
3123 // c:2897
3124 return String::new();
3125 }
3126 if (c & 2) != 0 {
3127 // c:2902-2906 — same 3-byte inline string as ecgetstr, then
3128 // `buf[3] = '\0'; return dupstring(buf);` — truncate at first
3129 // NUL via strlen (NOT splice out interior NULs).
3130 let b0 = ((c >> 3) & 0xff) as u8;
3131 let b1 = ((c >> 11) & 0xff) as u8;
3132 let b2 = ((c >> 19) & 0xff) as u8;
3133 let v = [b0, b1, b2];
3134 let end = v.iter().position(|&x| x == 0).unwrap_or(v.len()); // c:2906 strlen(buf)
3135 String::from_utf8_lossy(&v[..end]).into_owned()
3136 } else {
3137 // c:2911
3138 let off = (c >> 2) as usize;
3139 let strs_bytes = p.strs.as_deref().unwrap_or("").as_bytes();
3140 if off >= strs_bytes.len() {
3141 return String::new();
3142 }
3143 let tail = &strs_bytes[off..];
3144 let end = tail.iter().position(|&b| b == 0).unwrap_or(tail.len());
3145 String::from_utf8_lossy(&tail[..end]).into_owned()
3146 }
3147}
3148
3149/// Port of `ecgetarr(Estate s, int num, int dup, int *tokflag)` from
3150/// `Src/parse.c:2917`. Reads `num` strings from wordcode at `s->pc`
3151/// and OR-folds each entry's token flag into `*tokflag`.
3152pub fn ecgetarr(s: &mut estate, num: usize, dup: i32, tokflag: Option<&mut i32>) -> Vec<String> {
3153 let mut ret: Vec<String> = Vec::with_capacity(num); // c:2922
3154 let mut tf: i32 = 0;
3155 for _ in 0..num {
3156 // c:2924 `while (num--)`
3157 let mut tmp = 0;
3158 ret.push(ecgetstr(s, dup, Some(&mut tmp))); // c:2925
3159 tf |= tmp; // c:2926
3160 }
3161 if let Some(out) = tokflag {
3162 // c:2929
3163 *out = tf;
3164 }
3165 ret
3166}
3167
3168/// Port of `ecgetlist(Estate s, int num, int dup, int *tokflag)` from
3169/// `Src/parse.c:2937`. Same shape as `ecgetarr` but C returns
3170/// `LinkList`; zshrs uses `Vec<String>` for both.
3171pub fn ecgetlist(s: &mut estate, num: usize, dup: i32, tokflag: Option<&mut i32>) -> Vec<String> {
3172 if num == 0 {
3173 // c:2949-2952
3174 if let Some(tf) = tokflag {
3175 *tf = 0;
3176 }
3177 return Vec::new();
3178 }
3179 ecgetarr(s, num, dup, tokflag)
3180}
3181
3182/// Port of `ecgetredirs(Estate s)` from `Src/parse.c:2959`.
3183///
3184/// `strs` must be the same tail `ecgetstr` uses (`s->strs` / `estate.strs` from offset).
3185/// WARNING: param names don't match C — Rust=(prog, strs, pc) vs C=(s)
3186pub fn ecgetredirs(s: &mut estate) -> Vec<redir> {
3187 let mut ret: Vec<redir> = Vec::new(); // c:2959 `LinkList ret = newlinklist();`
3188 let prog_len = s.prog.prog.len();
3189 if s.pc >= prog_len {
3190 return ret;
3191 }
3192 let mut code = s.prog.prog[s.pc]; // c:2962 `wordcode code = *s->pc++;`
3193 s.pc += 1;
3194
3195 loop {
3196 if wc_code(code) != WC_REDIR {
3197 // c:2988-2989 `s->pc--` then break from while
3198 s.pc = s.pc.saturating_sub(1);
3199 break;
3200 }
3201
3202 let typ = WC_REDIR_TYPE(code); // c:2967 `r->type = WC_REDIR_TYPE(code);`
3203 if s.pc >= prog_len {
3204 break;
3205 }
3206 let fd1_w = s.prog.prog[s.pc]; // c:2968 `r->fd1 = *s->pc++;`
3207 s.pc += 1;
3208
3209 let name = ecgetstr(s, EC_DUP, None); // c:2969 `r->name = ecgetstr(...)`
3210
3211 let (flags, here_terminator, munged_here_terminator) = if WC_REDIR_FROM_HEREDOC(code) != 0 {
3212 // c:2970-2973
3213 let term = ecgetstr(s, EC_DUP, None);
3214 let munged = ecgetstr(s, EC_DUP, None);
3215 (REDIRF_FROM_HEREDOC, Some(term), Some(munged))
3216 } else {
3217 // c:2974-2977
3218 (0, None, None)
3219 };
3220
3221 let varid = if WC_REDIR_VARID(code) != 0 {
3222 // c:2979-2980
3223 Some(ecgetstr(s, EC_DUP, None))
3224 } else {
3225 None // c:2981-2982
3226 };
3227
3228 ret.push(redir {
3229 // c:2965-2982 fields + c:2984 `addlinknode`
3230 typ,
3231 flags,
3232 fd1: fd1_w as i32,
3233 fd2: 0,
3234 name: Some(name),
3235 varid,
3236 here_terminator,
3237 munged_here_terminator,
3238 });
3239
3240 if s.pc >= prog_len {
3241 break;
3242 }
3243 code = s.prog.prog[s.pc]; // c:2986 `code = *s->pc++;`
3244 s.pc += 1;
3245 }
3246
3247 ret // c:2990 `return ret`
3248}
3249
3250/// Port of `eccopyredirs(Estate s)` from `Src/parse.c:3003`. Reads
3251/// the WC_REDIR run at `s->pc`, counts the wordcodes needed,
3252/// reserves space in `ecbuf` via `ecispace`, then re-walks `s->pc`
3253/// re-emitting each redir's wordcodes into the reserved slot —
3254/// finally calls `bld_eprog(0)` to package the result as an Eprog.
3255pub fn eccopyredirs(s: &mut estate) -> Option<eprog> {
3256 let prog_len = s.prog.prog.len();
3257 if s.pc >= prog_len {
3258 return None;
3259 }
3260 // c:3007-3009 — `if (wc_code(*pc) != WC_REDIR) return NULL;`
3261 let first_code = s.prog.prog[s.pc];
3262 if wc_code(first_code) != WC_REDIR {
3263 return None;
3264 }
3265 // c:3011 — `init_parse();`
3266 init_parse();
3267
3268 // c:3013-3027 — count wordcodes the redir run will need.
3269 // Each WC_REDIR contributes `code + fd1 + name` = 3, plus
3270 // `+2` if WC_REDIR_FROM_HEREDOC (terminator + munged), plus
3271 // `+1` if WC_REDIR_VARID.
3272 let mut probe = s.pc;
3273 let mut ncodes = 0usize;
3274 loop {
3275 if probe >= prog_len {
3276 break;
3277 }
3278 let code = s.prog.prog[probe];
3279 if wc_code(code) != WC_REDIR {
3280 break;
3281 }
3282 let mut ncode = if WC_REDIR_FROM_HEREDOC(code) != 0 {
3283 5
3284 } else {
3285 3
3286 };
3287 if WC_REDIR_VARID(code) != 0 {
3288 ncode += 1;
3289 }
3290 probe += ncode;
3291 ncodes += ncode;
3292 }
3293
3294 // c:3028-3029 — `r = ecused; ecispace(r, ncodes);`
3295 let r0 = ECUSED.get() as usize;
3296 ecispace(r0, ncodes);
3297
3298 // c:3031-3053 — re-walk `s->pc` and write into ecbuf[r..].
3299 let mut r = r0;
3300 loop {
3301 if s.pc >= prog_len {
3302 break;
3303 }
3304 let code = s.prog.prog[s.pc];
3305 if wc_code(code) != WC_REDIR {
3306 break;
3307 }
3308 s.pc += 1;
3309 // c:3036 — `ecbuf[r++] = code;`
3310 ECBUF.with_borrow_mut(|buf| {
3311 if r >= buf.len() {
3312 buf.resize(r + 1, 0);
3313 }
3314 buf[r] = code;
3315 });
3316 r += 1;
3317 // c:3038 — `ecbuf[r++] = *s->pc++;` (the fd1 word)
3318 let fd1 = s.prog.prog[s.pc];
3319 s.pc += 1;
3320 ECBUF.with_borrow_mut(|buf| {
3321 if r >= buf.len() {
3322 buf.resize(r + 1, 0);
3323 }
3324 buf[r] = fd1;
3325 });
3326 r += 1;
3327 // c:3041 — `ecbuf[r++] = ecstrcode(ecgetstr(s, EC_NODUP, NULL));`
3328 let name = ecgetstr(s, EC_NODUP, None);
3329 let nc = ecstrcode(&name);
3330 ECBUF.with_borrow_mut(|buf| {
3331 if r >= buf.len() {
3332 buf.resize(r + 1, 0);
3333 }
3334 buf[r] = nc;
3335 });
3336 r += 1;
3337 // c:3042-3047 — heredoc terminators.
3338 if WC_REDIR_FROM_HEREDOC(code) != 0 {
3339 let term = ecgetstr(s, EC_NODUP, None);
3340 let tc = ecstrcode(&term);
3341 ECBUF.with_borrow_mut(|buf| {
3342 if r >= buf.len() {
3343 buf.resize(r + 1, 0);
3344 }
3345 buf[r] = tc;
3346 });
3347 r += 1;
3348 let munged = ecgetstr(s, EC_NODUP, None);
3349 let mc = ecstrcode(&munged);
3350 ECBUF.with_borrow_mut(|buf| {
3351 if r >= buf.len() {
3352 buf.resize(r + 1, 0);
3353 }
3354 buf[r] = mc;
3355 });
3356 r += 1;
3357 }
3358 // c:3048-3049 — varid.
3359 if WC_REDIR_VARID(code) != 0 {
3360 let varid = ecgetstr(s, EC_NODUP, None);
3361 let vc = ecstrcode(&varid);
3362 ECBUF.with_borrow_mut(|buf| {
3363 if r >= buf.len() {
3364 buf.resize(r + 1, 0);
3365 }
3366 buf[r] = vc;
3367 });
3368 r += 1;
3369 }
3370 }
3371
3372 // c:3056 — `return bld_eprog(0);` — `bld_eprog` appends the
3373 // WC_END marker and packages ECBUF/ECSTRS into an Eprog.
3374 Some(bld_eprog(false))
3375}
3376
3377/// Port of `init_eprog(void)` from `Src/parse.c:3069`. Sets up
3378/// `dummy_eprog_code = WCB_END(); dummy_eprog.len = sizeof(wordcode);
3379/// dummy_eprog.prog = &dummy_eprog_code; dummy_eprog.strs = NULL;`.
3380/// Called once at shell startup (init_main → init_misc → init_eprog).
3381pub fn init_eprog() {
3382 let mut d = DUMMY_EPROG.lock().unwrap();
3383 d.prog = vec![WCB_END()]; // c:3071/3073
3384 d.len = size_of::<wordcode>() as i32; // c:3072
3385 d.strs = None; // c:3074
3386 d.flags = 0;
3387 d.npats = 0;
3388 d.nref = 0;
3389}
3390
3391// =====================================================================
3392// `bin_zcompile` and wordcode-dump helpers — port of `Src/parse.c:3104+`.
3393//
3394// The wordcode dump format (`.zwc`) is a serialized parse tree zsh can
3395// `mmap()` and dispatch from without re-parsing on every shell start.
3396// File layout (one struct = `FD_PRELEN` `u32`s):
3397// - `pre[0]` = magic word (FD_MAGIC native byte-order, FD_OMAGIC
3398// opposite byte-order).
3399// - `pre[1]` = packed `{flags(8) | other_offset(24)}` byte field.
3400// - `pre[2..12]` = `ZSH_VERSION` C-string padded to 40 bytes.
3401// - `pre[12]` = `fdheaderlen` (total prelude+header word count).
3402// - Then a sequence of `struct fdhead` records, one per function,
3403// each followed by its NUL-terminated name (padded to 4-byte).
3404// - Then the wordcode bytes for every function back-to-back.
3405//
3406// On a little-endian host writing a dump twice: first `FD_MAGIC` for
3407// native readers, then re-walks the body byte-swapped and emits a
3408// second `FD_OMAGIC` copy so big-endian readers can mmap it too.
3409// =====================================================================
3410
3411// File-format constants — port of `Src/parse.c:3104-3150`.
3412
3413/// `#define FD_EXT ".zwc"` from `Src/parse.c:3104`.
3414pub const FD_EXT: &str = ".zwc";
3415
3416/// `#define FD_MINMAP 4096` from `Src/parse.c:3105`. mmap threshold
3417/// — `-M` mode only kicks in when the wordcode body is at least
3418/// this many bytes (otherwise read(2) is preferred).
3419pub const FD_MINMAP: usize = 4096;
3420
3421/// `#define FD_PRELEN 12` from `Src/parse.c:3107`. File-header
3422/// length in u32 words: magic + packed-flags-byte + 10 version words.
3423pub const FD_PRELEN: usize = 12;
3424
3425/// `#define FD_MAGIC 0x04050607` from `Src/parse.c:3108`. Sentinel
3426/// for native-byte-order dumps.
3427pub const FD_MAGIC: u32 = 0x04050607;
3428
3429/// `#define FD_OMAGIC 0x07060504` from `Src/parse.c:3109`. Sentinel
3430/// for opposite-byte-order dumps (byte-swapped FD_MAGIC).
3431pub const FD_OMAGIC: u32 = 0x07060504;
3432
3433/// `#define FDF_MAP 1` from `Src/parse.c:3111`. Bit set when the
3434/// dump should be `mmap()`-ed (`-M` flag) vs read normally (`-R`).
3435pub const FDF_MAP: u32 = 1;
3436
3437/// `#define FDF_OTHER 2` from `Src/parse.c:3112`. Bit indicating
3438/// this dump has an opposite-byte-order copy at `fdother(f)`.
3439pub const FDF_OTHER: u32 = 2;
3440
3441/// Port of `struct fdhead` from `Src/parse.c:3116`. One per function
3442/// inside a wordcode dump. All fields are `wordcode` (u32).
3443#[allow(non_camel_case_types)]
3444#[derive(Debug, Clone, Copy)]
3445pub struct fdhead {
3446 /// Offset (in u32 words) to the start of this function's
3447 /// wordcode body inside the dump.
3448 pub start: u32, // c:3117
3449 /// Wordcode-byte length of the body (excludes pattern-prog slots).
3450 pub len: u32, // c:3118
3451 /// Number of compiled patterns the body references.
3452 pub npats: u32, // c:3119
3453 /// Offset of the string table inside `prog->prog`.
3454 pub strs: u32, // c:3120
3455 /// Header-record length in u32 words (record + name).
3456 pub hlen: u32, // c:3121
3457 /// Packed `{ kshload_bits(2) | name_tail_offset(30) }` field.
3458 pub flags: u32, // c:3122
3459}
3460
3461/// `#define FDHF_KSHLOAD 1` from `Src/parse.c:3149`. Function-header
3462/// flag word — `-k` ksh-style autoload marker.
3463pub const FDHF_KSHLOAD: u32 = 1;
3464
3465/// `#define FDHF_ZSHLOAD 2` from `Src/parse.c:3150`. `-z` zsh-style
3466/// autoload marker.
3467pub const FDHF_ZSHLOAD: u32 = 2;
3468
3469/// Port of `struct wcfunc` from `Src/parse.c:3158`. Build-time
3470/// per-function aggregate before write_dump emits it. The Rust
3471/// port stores the source-text body inline since the C-side
3472/// `Eprog` ↔ `parse_string` chain isn't fully wired through this
3473/// layer yet (`build_dump` falls back to source-text caching).
3474#[allow(non_camel_case_types)]
3475#[derive(Debug, Clone)]
3476pub struct wcfunc {
3477 pub name: String, // c:3159
3478 pub flags: u32, // c:3161
3479 /// Compiled body wordcode (one `u32` array per fn). Empty until
3480 /// the eprog emit-side lands; `write_dump` then walks each entry.
3481 pub body: Vec<u32>,
3482}
3483
3484/// Port of `dump_find_func(Wordcode h, char *name)` from
3485/// `Src/parse.c:3167`. Walks the header table inside a loaded
3486/// dump for a function with the given basename; returns true on hit.
3487pub fn dump_find_func(h: &[u32], name: &str) -> bool {
3488 // c:3167
3489 let header_words = fdheaderlen(h) as usize;
3490 let end = header_words; // walking u32 offsets, end-exclusive
3491 let mut cur = firstfdhead_offset();
3492 while cur < end {
3493 if let Some(fh) = read_fdhead(h, cur) {
3494 let full = fdname(h, cur);
3495 let tail = fdhtail(&fh) as usize;
3496 let basename = if tail <= full.len() {
3497 &full[tail..]
3498 } else {
3499 ""
3500 };
3501 if basename == name {
3502 return true;
3503 }
3504 cur = nextfdhead_offset(h, cur);
3505 } else {
3506 break;
3507 }
3508 }
3509 false
3510}
3511
3512/// Port of `bin_zcompile(char *nam, char **args, Options ops, UNUSED(int func))`
3513/// from `Src/parse.c:3180`. Validates the option set, then dispatches
3514/// to one of: `-t` (test/list), `-c`/`-a` (dump current functions),
3515/// or the default (compile source files to `.zwc`).
3516pub fn bin_zcompile(
3517 nam: &str, // c:3180
3518 args: &[String],
3519 ops: &crate::ported::zsh_h::options,
3520 _func: i32,
3521) -> i32 {
3522 // c:3185-3192 — illegal-combination guard.
3523 if (OPT_ISSET(ops, b'k') && OPT_ISSET(ops, b'z'))
3524 || (OPT_ISSET(ops, b'R') && OPT_ISSET(ops, b'M'))
3525 || (OPT_ISSET(ops, b'c')
3526 && (OPT_ISSET(ops, b'U') || OPT_ISSET(ops, b'k') || OPT_ISSET(ops, b'z')))
3527 || (!(OPT_ISSET(ops, b'c') || OPT_ISSET(ops, b'a')) && OPT_ISSET(ops, b'm'))
3528 {
3529 zwarnnam(nam, "illegal combination of options"); // c:3192
3530 return 1;
3531 }
3532
3533 // c:3194 — `-c`/`-a` + KSHAUTOLOAD warning.
3534 if (OPT_ISSET(ops, b'c') || OPT_ISSET(ops, b'a')) && isset(crate::ported::zsh_h::KSHAUTOLOAD) {
3535 zwarnnam(nam, "functions will use zsh style autoloading"); // c:3195
3536 }
3537
3538 // c:3196-3197 — flag word from `-k` / `-z`.
3539 let flags: u32 = if OPT_ISSET(ops, b'k') {
3540 FDHF_KSHLOAD
3541 } else if OPT_ISSET(ops, b'z') {
3542 FDHF_ZSHLOAD
3543 } else {
3544 0
3545 };
3546
3547 // c:3199 — `-t` test/list mode.
3548 if OPT_ISSET(ops, b't') {
3549 // c:3199
3550 if args.is_empty() {
3551 zwarnnam(nam, "too few arguments"); // c:3202
3552 return 1;
3553 }
3554 let dump_name = if args[0].ends_with(FD_EXT) {
3555 args[0].clone()
3556 } else {
3557 format!("{}{}", args[0], FD_EXT)
3558 };
3559 let f = match load_dump_header(nam, &dump_name, 1) {
3560 // c:3206
3561 Some(buf) => buf,
3562 None => return 1,
3563 };
3564 // c:3209 — per-function check.
3565 if args.len() > 1 {
3566 for name in &args[1..] {
3567 // c:3210
3568 if !dump_find_func(&f, name) {
3569 // c:3212
3570 return 1;
3571 }
3572 }
3573 return 0;
3574 }
3575 // c:3215-3221 — listing arm. Walk every fdhead, print
3576 // each function's full name. C uses `fdname(h)` which
3577 // includes the path prefix; matches our `fdname()` impl.
3578 let mapped = if (fdflags(&f) & FDF_MAP) != 0 {
3579 "mapped"
3580 } else {
3581 "read"
3582 };
3583 println!("zwc file ({}) for zsh-{}", mapped, fdversion(&f));
3584 let header_words = fdheaderlen(&f) as usize;
3585 let mut cur = firstfdhead_offset();
3586 while cur < header_words {
3587 if read_fdhead(&f, cur).is_none() {
3588 break;
3589 }
3590 println!("{}", fdname(&f, cur));
3591 cur = nextfdhead_offset(&f, cur);
3592 }
3593 return 0;
3594 }
3595
3596 if args.is_empty() {
3597 zwarnnam(nam, "too few arguments"); // c:3226
3598 return 1;
3599 }
3600
3601 // c:3228 — map mode discriminant.
3602 let map: i32 = if OPT_ISSET(ops, b'M') {
3603 2
3604 } else if OPT_ISSET(ops, b'R') {
3605 0
3606 } else {
3607 1
3608 };
3609
3610 // c:3230-3236 — single-file default-mode short path.
3611 if args.len() == 1 && !(OPT_ISSET(ops, b'c') || OPT_ISSET(ops, b'a')) {
3612 let dump = format!("{}{}", args[0], FD_EXT);
3613 return build_dump(nam, &dump, args, OPT_ISSET(ops, b'U') as i32, map, flags);
3614 }
3615
3616 // c:3239-3247 — multi-file or `-c`/`-a` mode.
3617 let dump = if args[0].ends_with(FD_EXT) {
3618 args[0].clone()
3619 } else {
3620 format!("{}{}", args[0], FD_EXT)
3621 };
3622 let rest = &args[1..];
3623 if OPT_ISSET(ops, b'c') || OPT_ISSET(ops, b'a') {
3624 let what =
3625 (if OPT_ISSET(ops, b'c') { 1 } else { 0 }) | (if OPT_ISSET(ops, b'a') { 2 } else { 0 });
3626 build_cur_dump(nam, &dump, rest, OPT_ISSET(ops, b'm') as i32, map, what)
3627 } else {
3628 build_dump(nam, &dump, rest, OPT_ISSET(ops, b'U') as i32, map, flags)
3629 }
3630}
3631
3632/// Port of `load_dump_header(char *nam, char *name, int err)` from
3633/// `Src/parse.c:3258`. Opens the file, reads + validates the magic
3634/// and version, then slurps the full header table into memory.
3635/// Returns the header u32-array on success or None on any failure
3636/// (emitting C-shaped warnings when `err != 0`).
3637pub fn load_dump_header(nam: &str, name: &str, err: i32) -> Option<Vec<u32>> {
3638 // c:3258
3639
3640 let mut f = match File::open(name) {
3641 // c:3263
3642 Ok(h) => h,
3643 Err(_) => {
3644 if err != 0 {
3645 zwarnnam(nam, &format!("can't open zwc file: {}", name)); // c:3265
3646 }
3647 return None;
3648 }
3649 };
3650
3651 // Read FD_PRELEN+1 u32 words = 52 bytes.
3652 let mut buf_bytes = vec![0u8; (FD_PRELEN + 1) * 4];
3653 if f.read_exact(&mut buf_bytes).is_err() {
3654 if err != 0 {
3655 zwarnnam(nam, &format!("invalid zwc file: {}", name)); // c:3277
3656 }
3657 return None;
3658 }
3659 let mut buf: Vec<u32> = buf_bytes
3660 .chunks_exact(4)
3661 .map(|c| u32::from_le_bytes([c[0], c[1], c[2], c[3]]))
3662 .collect();
3663
3664 // c:3270 — magic + version check. `ZSH_VERSION` (the C-side
3665 // global) — zshrs reports "5.9" in `--zsh` mode (Src/init.c parity).
3666 let magic_ok = fdmagic(&buf) == FD_MAGIC || fdmagic(&buf) == FD_OMAGIC;
3667 let v_ok = fdversion(&buf) == "5.9";
3668 if !magic_ok {
3669 if err != 0 {
3670 zwarnnam(nam, &format!("invalid zwc file: {}", name)); // c:3277
3671 }
3672 return None;
3673 }
3674 if !v_ok {
3675 if err != 0 {
3676 zwarnnam(
3677 nam,
3678 &format!(
3679 "zwc file has wrong version (zsh-{}): {}", // c:3274
3680 fdversion(&buf),
3681 name
3682 ),
3683 );
3684 }
3685 return None;
3686 }
3687
3688 // c:3285 — if magic matches host byte order, head len is `pre[FD_PRELEN]`.
3689 // Else seek to `fdother(buf)` and re-read.
3690 if fdmagic(&buf) != FD_MAGIC {
3691 let other = fdother(&buf) as u64; // c:3290
3692 if f.seek(SeekFrom::Start(other)).is_err() || f.read_exact(&mut buf_bytes).is_err() {
3693 zwarnnam(nam, &format!("invalid zwc file: {}", name)); // c:3295
3694 return None;
3695 }
3696 buf = buf_bytes
3697 .chunks_exact(4)
3698 .map(|c| u32::from_le_bytes([c[0], c[1], c[2], c[3]]))
3699 .collect();
3700 }
3701
3702 let total_words = fdheaderlen(&buf) as usize; // c:3286/3299
3703 if total_words < FD_PRELEN + 1 {
3704 zwarnnam(nam, &format!("invalid zwc file: {}", name));
3705 return None;
3706 }
3707
3708 // Read the remaining header words.
3709 let mut head: Vec<u32> = Vec::with_capacity(total_words);
3710 head.extend_from_slice(&buf);
3711 let remaining_words = total_words - (FD_PRELEN + 1);
3712 if remaining_words > 0 {
3713 let mut rest_bytes = vec![0u8; remaining_words * 4]; // c:3305
3714 if f.read_exact(&mut rest_bytes).is_err() {
3715 zwarnnam(nam, &format!("invalid zwc file: {}", name)); // c:3307
3716 return None;
3717 }
3718 for c in rest_bytes.chunks_exact(4) {
3719 head.push(u32::from_le_bytes([c[0], c[1], c[2], c[3]]));
3720 }
3721 }
3722 Some(head) // c:3311
3723}
3724
3725/// Port of `fdswap(Wordcode p, int n)` from `Src/parse.c:3318`.
3726/// Byte-swap each u32 in `p[..n]` in place. Used when writing the
3727/// opposite-byte-order copy of a wordcode dump.
3728pub fn fdswap(p: &mut [u32]) {
3729 // c:3318
3730 for w in p.iter_mut() {
3731 *w = w.swap_bytes();
3732 }
3733}
3734
3735/// Port of `write_dump(int dfd, LinkList progs, int map, int hlen, int tlen)`
3736/// from `Src/parse.c:3334`. Writes the prelude + header records +
3737/// body wordcode bytes to the dump file descriptor.
3738///
3739/// Two passes: first native-byte-order (`FD_MAGIC`), then opposite-
3740/// byte-order (`FD_OMAGIC`) so big-endian readers can mmap the
3741/// same file. Bodies are byte-swapped via `fdswap` on the second pass.
3742pub fn write_dump(
3743 dfd: &mut File, // c:3334
3744 progs: &[wcfunc],
3745 mut map: i32,
3746 hlen: i32,
3747 tlen: i32,
3748) -> std::io::Result<()> {
3749 if map == 1 && (tlen as usize) >= FD_MINMAP {
3750 // c:3344
3751 map = 1;
3752 } else if map == 1 {
3753 map = 0;
3754 }
3755
3756 let mut other = 0u32; // c:3338
3757 let ohlen = hlen;
3758 let mut cur_hlen = hlen;
3759
3760 loop {
3761 cur_hlen = ohlen;
3762 // c:3347 — build the prelude.
3763 let mut pre = vec![0u32; FD_PRELEN];
3764 pre[0] = if other != 0 { FD_OMAGIC } else { FD_MAGIC }; // c:3350
3765 let flags = (if map != 0 { FDF_MAP } else { 0 }) | other;
3766 fdsetflags(&mut pre, flags as u8); // c:3351
3767 fdsetother(&mut pre, tlen as u32); // c:3352
3768 // c:3353 — copy ZSH_VERSION C-string into pre[2..].
3769 let ver = b"5.9";
3770 for (i, &b) in ver.iter().enumerate() {
3771 let word = 2 + i / 4;
3772 let shift = (i % 4) * 8;
3773 pre[word] |= (b as u32) << shift;
3774 }
3775 // Write prelude.
3776 for w in &pre {
3777 dfd.write_all(&w.to_le_bytes())?;
3778 }
3779 // c:3356 — per-fn header records.
3780 for wcf in progs {
3781 let n = &wcf.name;
3782 let prog = &wcf.body;
3783 let mut head = fdhead {
3784 start: cur_hlen as u32, // c:3360
3785 len: (prog.len() * 4) as u32, // c:3363
3786 npats: 0, // c:3364 (npats not tracked yet)
3787 strs: 0, // c:3365
3788 hlen: ((FDHEAD_WORDS as u32) + ((n.len() as u32 + 4) / 4)), // c:3366
3789 flags: 0,
3790 };
3791 cur_hlen += prog.len() as i32; // c:3361
3792 // c:3368 — name tail offset from path basename.
3793 let tail = n.rfind('/').map(|p| p + 1).unwrap_or(0);
3794 head.flags = fdhbldflags(wcf.flags, tail as u32); // c:3372
3795 // c:3373 — opposite-byte-order swap on second pass.
3796 let mut head_words: Vec<u32> = vec![
3797 head.start, head.len, head.npats, head.strs, head.hlen, head.flags,
3798 ];
3799 if other != 0 {
3800 fdswap(&mut head_words);
3801 }
3802 for w in &head_words {
3803 dfd.write_all(&w.to_le_bytes())?;
3804 }
3805 // c:3376 — write the name + NUL + pad-to-4.
3806 dfd.write_all(n.as_bytes())?;
3807 dfd.write_all(&[0u8])?;
3808 let pad = (4 - ((n.len() + 1) & 3)) & 3;
3809 if pad > 0 {
3810 dfd.write_all(&vec![0u8; pad])?;
3811 }
3812 }
3813 // c:3381 — per-fn body words.
3814 for wcf in progs {
3815 let mut body = wcf.body.clone();
3816 if other != 0 {
3817 fdswap(&mut body);
3818 }
3819 for w in &body {
3820 dfd.write_all(&w.to_le_bytes())?;
3821 }
3822 }
3823 if other != 0 {
3824 // c:3389
3825 break;
3826 }
3827 other = FDF_OTHER; // c:3391
3828 }
3829 Ok(())
3830}
3831
3832/// Port of `build_dump(char *nam, char *dump, char **files, int ali, int map, int flags)`
3833/// from `Src/parse.c:3397`. Source-file → wordcode dump compiler.
3834///
3835/// Status: scaffolded but the wordcode-emit step depends on
3836/// `parse_string` returning a fully-wired `Eprog` with `prog/strs/
3837/// npats` fields populated. The current `parse_string`/`parse` shape
3838/// emits an AST (`ZshProgram`) but not yet the wordcode array C
3839/// expects in this dump format. Until that lands, this returns 1
3840/// with a clear "wordcode emit not yet ported" message so callers
3841/// (autoload from `.zwc`, `zcompile path/to/file`) fail loud.
3842pub fn build_dump(
3843 nam: &str, // c:3397
3844 dump: &str,
3845 _files: &[String],
3846 _ali: i32,
3847 _map: i32,
3848 _flags: u32,
3849) -> i32 {
3850 zwarnnam(nam, &format!("{}: wordcode dump emit not yet ported", dump));
3851 1
3852}
3853
3854/// Port of `cur_add_func(char *nam, Shfunc shf, LinkList names, LinkList progs, int *hlen, int *tlen, int what)`
3855/// from `Src/parse.c:3489`. Adds a shfunc to the in-build dump
3856/// progs+names lists. Stub: `Eprog` for the function body isn't
3857/// yet wired through `shfunc.funcdef` to be serializable here.
3858pub fn cur_add_func(
3859 nam: &str, // c:3489
3860 shf_name: &str,
3861 shf_flags: i32,
3862 names: &mut Vec<String>,
3863 progs: &mut Vec<wcfunc>,
3864 hlen: &mut i32,
3865 tlen: &mut i32,
3866 what: i32,
3867) -> i32 {
3868 let is_undef = (shf_flags as u32 & PM_UNDEFINED) != 0;
3869 if is_undef {
3870 if (what & 2) == 0 {
3871 // c:3498
3872 zwarnnam(nam, &format!("function is not loaded: {}", shf_name));
3873 return 1;
3874 }
3875 // c:3503 — would call `getfpfunc` to load body for dump.
3876 zwarnnam(nam, &format!("can't load function: {}", shf_name));
3877 return 1;
3878 } else if (what & 1) == 0 {
3879 zwarnnam(nam, &format!("function is already loaded: {}", shf_name)); // c:3514
3880 return 1;
3881 }
3882 // c:3517 — would `dupeprog(shf->funcdef)`. Stub: empty body.
3883 let wcf = wcfunc {
3884 name: shf_name.to_string(),
3885 flags: FDHF_ZSHLOAD,
3886 body: Vec::new(),
3887 };
3888 progs.push(wcf);
3889 names.push(shf_name.to_string());
3890
3891 // c:3526 — bump hlen / tlen.
3892 let name_words = (shf_name.len() as i32 + 4) / 4;
3893 *hlen += (FDHEAD_WORDS as i32) + name_words;
3894 *tlen += 0; // body is empty in stub; real path adds prog->len in words.
3895
3896 0
3897}
3898
3899/// Port of `build_cur_dump(char *nam, char *dump, char **names, int match, int map, int what)`
3900/// from `Src/parse.c:3536`. Compiles currently-loaded functions
3901/// (`-c` for functions, `-a` for aliases) into a `.zwc` dump.
3902/// Same wordcode-emit dependency as `build_dump`.
3903pub fn build_cur_dump(
3904 nam: &str, // c:3536
3905 dump: &str,
3906 _names: &[String],
3907 _match_: i32,
3908 _map: i32,
3909 _what: i32,
3910) -> i32 {
3911 zwarnnam(
3912 nam,
3913 &format!("{}: wordcode dump-current emit not yet ported", dump),
3914 );
3915 1
3916}
3917
3918/// Port of `zwcstat(char *filename, struct stat *buf)` from
3919/// `Src/parse.c:3656`. Stats a `.zwc` file, falling back to
3920/// `.zwc.old` if the primary doesn't exist (zsh uses the `.old`
3921/// suffix to keep a previous dump readable while a rewrite is in
3922/// progress).
3923pub fn zwcstat(filename: &str) -> Option<fs::Metadata> {
3924 // c:3656
3925 if let Ok(m) = fs::metadata(filename) {
3926 return Some(m);
3927 }
3928 let old = format!("{}.old", filename);
3929 fs::metadata(&old).ok()
3930}
3931
3932/// Port of `load_dump_file(char *dump, struct stat *sbuf, int other, int len)`
3933/// from `Src/parse.c:3675`. Reads (or mmap()'s) a complete `.zwc`
3934/// file into memory. Returns the u32 buffer or None on I/O error.
3935pub fn load_dump_file(
3936 dump: &str, // c:3675
3937 _sbuf: &fs::Metadata,
3938 other: i32,
3939 _len: usize,
3940) -> Option<Vec<u32>> {
3941 let mut f = File::open(dump).ok()?;
3942 if other != 0 {
3943 f.seek(SeekFrom::Start(other as u64)).ok()?;
3944 }
3945 let mut bytes = Vec::new();
3946 f.read_to_end(&mut bytes).ok()?;
3947 Some(
3948 bytes
3949 .chunks_exact(4)
3950 .map(|c| u32::from_le_bytes([c[0], c[1], c[2], c[3]]))
3951 .collect(),
3952 )
3953}
3954
3955/// Port of `try_dump_file(char *path, char *name, char *file, int *ksh, int test_only)`
3956/// from `Src/parse.c:3746`. Tries to load function `name` from a
3957/// `.zwc` digest (`<path>.zwc`) or per-function compiled file
3958/// (`<file>.zwc`) when each is newer than its uncompiled source.
3959pub fn try_dump_file(
3960 path: &str,
3961 name: &str,
3962 file: &str, // c:3746
3963 test_only: bool,
3964) -> Option<(Vec<u32>, bool)> {
3965 use std::fs;
3966
3967 // c:3753-3758 — if path ends in .zwc, treat as direct digest.
3968 if path.ends_with(FD_EXT) {
3969 crate::ported::signals::queue_signals();
3970 let result = fs::metadata(path)
3971 .ok()
3972 .and_then(|m| check_dump_file(path, &m, name, test_only));
3973 unqueue_signals();
3974 return result;
3975 }
3976
3977 // c:3759-3760 — dig = "<path>.zwc", wc = "<file>.zwc".
3978 let dig = format!("{}{}", path, FD_EXT);
3979 let wc = format!("{}{}", file, FD_EXT);
3980
3981 // c:3762-3764 — zwcstat(dig, &std); stat(wc, &stc); stat(file, &stn);
3982 let std_meta = fs::metadata(&dig);
3983 let stc_meta = fs::metadata(&wc);
3984 let stn_meta = fs::metadata(file);
3985
3986 crate::ported::signals::queue_signals();
3987
3988 // c:3771-3777 — try digest if newer than (or in absence of) wc/file.
3989 if let Ok(std_m) = &std_meta {
3990 let dig_mtime = std_m.modified().ok();
3991 let wc_newer_or_missing = match &stc_meta {
3992 Err(_) => true,
3993 Ok(c) => dig_mtime >= c.modified().ok(),
3994 };
3995 let src_newer_or_missing = match &stn_meta {
3996 Err(_) => true,
3997 Ok(n) => dig_mtime >= n.modified().ok(),
3998 };
3999 if wc_newer_or_missing && src_newer_or_missing {
4000 if let Some(prog) = check_dump_file(&dig, std_m, name, test_only) {
4001 unqueue_signals();
4002 return Some(prog);
4003 }
4004 }
4005 }
4006
4007 // c:3779-3784 — try per-function .zwc if newer than (or in absence of) source.
4008 if let Ok(stc_m) = &stc_meta {
4009 let wc_mtime = stc_m.modified().ok();
4010 let src_newer_or_missing = match &stn_meta {
4011 Err(_) => true,
4012 Ok(n) => wc_mtime >= n.modified().ok(),
4013 };
4014 if src_newer_or_missing {
4015 if let Some(prog) = check_dump_file(&wc, stc_m, name, test_only) {
4016 unqueue_signals();
4017 return Some(prog);
4018 }
4019 }
4020 }
4021
4022 unqueue_signals(); // c:3787
4023 None // c:3788
4024}
4025
4026/// Port of `try_source_file(char *file)` from `Src/parse.c:3795`.
4027/// Returns an Eprog (the wordcode dump body) if `<file>.zwc` exists
4028/// and is newer than `<file>`, else None.
4029pub fn try_source_file(file: &str) -> Option<String> {
4030 // c:3795
4031
4032 // c:3802-3805 — if ((tail = strrchr(file, '/'))) tail++; else tail = file;
4033 let tail = match file.rfind('/') {
4034 Some(i) => &file[i + 1..],
4035 None => file,
4036 };
4037
4038 // c:3807-3812 — if (strsfx(FD_EXT, file)) { ... return check_dump_file(file, NULL, tail, NULL, 0); }
4039 if file.ends_with(FD_EXT) {
4040 crate::ported::signals::queue_signals(); // c:3808
4041 let meta = fs::metadata(file);
4042 let prog = match meta {
4043 Ok(m) => check_dump_file(file, &m, tail, false).map(|(_, _)| file.to_string()), // c:3809
4044 Err(_) => None,
4045 };
4046 unqueue_signals(); // c:3810
4047 return prog;
4048 }
4049
4050 // c:3813 — wc = dyncat(file, FD_EXT);
4051 let wc = format!("{}{}", file, FD_EXT);
4052
4053 // c:3815-3816 — rc = stat(wc, &stc); rn = stat(file, &stn);
4054 let stc = fs::metadata(&wc);
4055 let stn = fs::metadata(file);
4056
4057 crate::ported::signals::queue_signals(); // c:3818
4058 // c:3819-3823 — if (!rc && (rn || stc.st_mtime >= stn.st_mtime) && (prog = check_dump_file(...))) return prog;
4059 if let Ok(meta_c) = &stc {
4060 let newer_than_src = match (&stc, &stn) {
4061 (Ok(c), Ok(n)) => c.modified().ok() >= n.modified().ok(),
4062 (Ok(_), Err(_)) => true, // c:3819 — `rn` (src missing) ⇒ accept .zwc
4063 _ => false,
4064 };
4065 if newer_than_src {
4066 let prog = check_dump_file(&wc, meta_c, tail, false); // c:3820
4067 if prog.is_some() {
4068 unqueue_signals(); // c:3821
4069 return Some(wc); // c:3822
4070 }
4071 }
4072 }
4073 unqueue_signals(); // c:3824
4074 None // c:3825
4075}
4076
4077/// Port of `Eprog check_dump_file(char *file, struct stat *sbuf,
4078/// char *name, int *ksh, int test_only)` from `Src/parse.c:3833`.
4079/// Walks the `dumps` mmap list looking for `(dev, ino)` matching
4080/// `sbuf`; on miss, calls `load_dump_header` to read the .zwc
4081/// header. Then `dump_find_func(d, name)` locates the function
4082/// table entry. Returns the wordcode slice + ksh-load flag.
4083///
4084/// ```c
4085/// Eprog
4086/// check_dump_file(char *file, struct stat *sbuf, char *name,
4087/// int *ksh, int test_only)
4088/// {
4089/// int isrec = 0;
4090/// Wordcode d;
4091/// FDHead h;
4092/// FuncDump f;
4093/// struct stat lsbuf;
4094/// if (!sbuf) {
4095/// if (zwcstat(file, &lsbuf)) return NULL;
4096/// sbuf = &lsbuf;
4097/// }
4098/// rec:
4099/// d = NULL;
4100/// for (f = dumps; f; f = f->next)
4101/// if (f->dev == sbuf->st_dev && f->ino == sbuf->st_ino)
4102/// { d = f->map; break; }
4103/// if (!f && (isrec || !(d = load_dump_header(NULL, file, 0))))
4104/// return NULL;
4105/// if ((h = dump_find_func(d, name))) {
4106/// if (test_only) return &dummy_eprog;
4107/// /* allocate Eprog from f->map at h offset, incrdumpcount,
4108/// return prog */
4109/// }
4110/// return NULL;
4111/// }
4112/// ```
4113/// Rust port returns `Option<(Vec<u32>, bool)>` instead of the C
4114/// `Eprog` pointer + `*ksh` out-param: tuple element 0 is the
4115/// wordcode slice, element 1 is true if the function was a ksh-
4116/// loaded entry.
4117pub fn check_dump_file(
4118 // c:3833
4119 file: &str,
4120 sbuf: &fs::Metadata,
4121 name: &str,
4122 test_only: bool,
4123) -> Option<(Vec<u32>, bool)> {
4124 use std::os::unix::fs::MetadataExt;
4125
4126 // c:3842-3846 — `if (!sbuf) { zwcstat(file, &lsbuf); sbuf = &lsbuf; }`
4127 // Rust takes sbuf by &Metadata — never null.
4128 let dev = sbuf.dev(); // c:3859
4129 let ino = sbuf.ino(); // c:3859
4130
4131 // c:3854 — `d = NULL;`
4132 let mut d: Option<Vec<u32>> = None;
4133 let mut found_mmap = false; // c:3858 `for (f = dumps; f; ...)`
4134
4135 // c:3858-3862 — walk DUMPS for matching dev/ino.
4136 {
4137 let dumps_guard = DUMPS.lock().expect("dumps poisoned");
4138 for f in dumps_guard.iter() {
4139 // c:3858
4140 if f.dev == dev && f.ino == ino {
4141 // c:3859
4142 d = Some(f.map.clone()); // c:3860
4143 found_mmap = true;
4144 break; // c:3861
4145 }
4146 }
4147 }
4148
4149 // c:3870-3871 — `if (!f && (isrec || !(d = load_dump_header(NULL, file, 0)))) return NULL;`
4150 if !found_mmap {
4151 // c:3870
4152 match load_dump_header("", file, 0) {
4153 // c:3870 load_dump_header
4154 Some(loaded) => d = Some(loaded),
4155 None => return None, // c:3871
4156 }
4157 }
4158
4159 // c:3873 — `if ((h = dump_find_func(d, name)))`
4160 let dump = d?;
4161 if !dump_find_func(&dump, name) {
4162 // c:3873
4163 return None;
4164 }
4165
4166 // c:3876-3879 — `if (test_only) return &dummy_eprog;`
4167 if test_only {
4168 // c:3876
4169 return Some((Vec::new(), false)); // c:3879 dummy
4170 }
4171
4172 // c:3884-3953 — allocate Eprog from the mmap area + ksh detection.
4173 // The C source builds an `Eprog` struct wrapping the wordcode
4174 // slice at h's offset; the Rust port returns the slice directly
4175 // since Eprog construction lives at the call site (load_dump_file).
4176 // ksh-load detection reads the FDHF_KSHLOAD flag on the FDHead.
4177 // !!! STUB: FDHead parsing not yet wired through dump_find_func.
4178 let is_ksh_load = false; // c:3905 fdhflags(h) & FDHF_KSHLOAD
4179
4180 // c:3950 — incrdumpcount(f). The Rust incrdumpcount takes a
4181 // funcdump ref; look up the matching entry by dev/ino again.
4182 if found_mmap {
4183 let dumps_guard = DUMPS.lock().expect("dumps poisoned");
4184 if let Some(f) = dumps_guard.iter().find(|f| f.dev == dev && f.ino == ino) {
4185 incrdumpcount(f); // c:3899
4186 }
4187 }
4188
4189 Some((dump, is_ksh_load)) // c:3953
4190}
4191
4192/// Port of `incrdumpcount(FuncDump f)` from `Src/parse.c:3970/4021`.
4193/// `f->count++;` — refcount-up a loaded dump entry. The Rust port
4194/// keys lookup by `filename` because Rust can't raw-pointer-compare
4195/// funcdump values inside a `Mutex<Vec<...>>`; same observable
4196/// effect (the count of the matching entry increments).
4197pub fn incrdumpcount(f: &funcdump) {
4198 // c:3970 — `f->count++;`
4199 if let Some(d) = DUMPS
4200 .lock()
4201 .unwrap()
4202 .iter_mut()
4203 .find(|d| d.filename.as_deref() == f.filename.as_deref())
4204 {
4205 d.count += 1; // c:3973
4206 }
4207}
4208
4209/// Port of `freedump(FuncDump f)` from `Src/parse.c:3976`. Public
4210/// helper for the rare external caller; locks the dumps mutex and
4211/// drops the entry with the given filename.
4212pub fn freedump(f: &funcdump) {
4213 // c:3976
4214 let mut g = DUMPS.lock().unwrap();
4215 if let Some(name) = f.filename.as_deref() {
4216 freedump_locked(&mut g, name);
4217 }
4218}
4219
4220/// Port of `decrdumpcount(FuncDump f)` from `Src/parse.c:3988/4026`.
4221/// `f->count--; if (!f->count) { unlink from dumps; freedump(f); }`.
4222pub fn decrdumpcount(f: &funcdump) {
4223 // c:3988
4224 let key = f.filename.clone();
4225 let mut g = DUMPS.lock().unwrap();
4226 let mut hit_zero: Option<String> = None;
4227 for d in g.iter_mut() {
4228 if d.filename == key {
4229 d.count -= 1; // c:3991
4230 if d.count == 0 {
4231 // c:3992
4232 hit_zero = d.filename.clone();
4233 }
4234 break;
4235 }
4236 }
4237 if let Some(name) = hit_zero {
4238 // c:3994-4001
4239 freedump_locked(&mut g, &name);
4240 }
4241}
4242
4243/// Port of `closedumps(void)` from `Src/parse.c:4008/4033`. Walks
4244/// `dumps` freeing every entry. Called on shell exit (exec.c:522).
4245pub fn closedumps() {
4246 // c:4008
4247 let mut g = DUMPS.lock().unwrap();
4248 g.clear(); // c:4011-4014 `while (dumps) { ... freedump(...); ... }`
4249}
4250
4251/// Port of `dump_autoload(char *nam, char *file, int on, Options ops, int func)`
4252/// from `Src/parse.c:4042`. Registers every function in a `.zwc`
4253/// for autoload via `shfunctab`.
4254pub fn dump_autoload(
4255 nam: &str,
4256 file: &str, // c:4042
4257 on: i32,
4258 ops: &crate::ported::zsh_h::options,
4259 func: i32,
4260) -> i32 {
4261 use crate::ported::zsh_h::shfunc;
4262 let mut ret = 0; // c:4047
4263
4264 // c:4049-4050 — if (!strsfx(FD_EXT, file)) file = dyncat(file, FD_EXT);
4265 let file_owned;
4266 let file = if !file.ends_with(FD_EXT) {
4267 file_owned = format!("{}{}", file, FD_EXT);
4268 file_owned.as_str()
4269 } else {
4270 file
4271 };
4272
4273 // c:4052-4053 — if (!(h = load_dump_header(nam, file, 1))) return 1;
4274 let h = match load_dump_header(nam, file, 1) {
4275 Some(buf) => buf,
4276 None => return 1,
4277 };
4278
4279 // c:4055-4056 — for (n = firstfdhead(h); n < e; n = nextfdhead(n))
4280 let hlen = fdheaderlen(&h) as usize; // c:4055
4281 let mut n_off = firstfdhead_offset();
4282 while n_off < hlen {
4283 let head = match read_fdhead(&h, n_off) {
4284 Some(hd) => hd,
4285 None => break,
4286 };
4287 // c:4057-4061 — shf = zshcalloc; shf->node.flags = on; ...addnode(fdname + fdhtail)
4288 let name_full = fdname(&h, n_off);
4289 let tail = fdhtail(&head) as usize;
4290 let basename: String = name_full.chars().skip(tail).collect();
4291 let mut shf = shfunc {
4292 node: crate::ported::zsh_h::hashnode {
4293 next: None,
4294 nam: basename.clone(),
4295 flags: on, // c:4058
4296 },
4297 filename: None,
4298 lineno: 0,
4299 funcdef: None,
4300 redir: None,
4301 sticky: None, // c:4060 NULL
4302 body: None,
4303 };
4304 // c:4059 — shf->funcdef = mkautofn(shf); (placeholder Eprog ptr)
4305 let _ = crate::ported::builtin::mkautofn(&mut shf as *mut _);
4306 // c:4061 — shfunctab->addnode(...)
4307 let snapshot = shf.clone();
4308 {
4309 let mut tab = crate::ported::hashtable::shfunctab_lock()
4310 .write()
4311 .expect("shfunctab poisoned");
4312 tab.add(shf);
4313 }
4314 // c:4062-4063 — if (OPT_ISSET(ops,'X') && eval_autoload(...)) ret = 1;
4315 if OPT_ISSET(ops, b'X') {
4316 let mut shf_ref = snapshot;
4317 if crate::ported::builtin::eval_autoload(&mut shf_ref as *mut _, &basename, ops, func)
4318 != 0
4319 {
4320 ret = 1;
4321 }
4322 }
4323 n_off = nextfdhead_offset(&h, n_off);
4324 }
4325 let _ = nam;
4326 ret // c:4065
4327}
4328
4329/// Port of C `struct eccstr` (zsh.h:836) — the long-string dedup BST
4330/// node. The dedup-walk and cmp logic in `ecstrcode` is faithful to
4331/// parse.c:447-453 including the conditional cmp chain
4332/// (nfunc → hashval → strcmp), so corpus inputs where C's eccstr BST walk
4333/// finds-or-misses match get the same outcome on the Rust side.
4334struct EccstrNode {
4335 left: Option<Box<EccstrNode>>,
4336 right: Option<Box<EccstrNode>>,
4337 /// C-byte form of the string (single byte per char ≤ 0xff).
4338 /// Owned because Rust doesn't have C zsh's "stable pointers into
4339 /// the lexer's tokstr arena" — every tokstr lives as a fresh
4340 /// Rust String allocation.
4341 str: Vec<u8>,
4342 /// Wordcode-encoded offset: `(byte_offset << 2) | token_bit`.
4343 /// Same shape as `Eccstr::offs` (parse.c:459).
4344 offs: u32,
4345 /// Absolute byte offset in the final strs region (= `ecsoffs` at
4346 /// insert time). C `Eccstr::aoffs` (parse.c:464). copy_ecstr uses
4347 /// THIS for the write position — distinct from `offs` which is
4348 /// ecssub-relative and collides across funcdef scopes.
4349 aoffs: u32,
4350 /// `nfunc` snapshot at insert time. Per-function namespace key
4351 /// — top-level scripts use 0; each funcdef bumps it.
4352 nfunc: i32,
4353 /// Hash of `str` computed via zsh's `hasher` (hashtable.c:86).
4354 hashval: u32,
4355}
4356// === end AST relocation ===
4357
4358// Parser state lives in file-scope thread_locals:
4359// - LEX_* (lexer side, matching Src/lex.c file-statics)
4360// - ECBUF / ECLEN / ECUSED / ECNPATS / ECSOFFS / ECSSUB / ECNFUNC /
4361// ECSTRS_INDEX / ECSTRS_REVERSE (wordcode-emission state, matching
4362// Src/parse.c file-statics)
4363//
4364// Callers use the free-fn entry points directly:
4365// crate::ported::parse::parse_init(input);
4366// let prog = crate::ported::parse::parse();
4367
4368const MAX_RECURSION_DEPTH: usize = 500;
4369
4370/// Direct port of `struct parse_stack` at `Src/zsh.h:3099-3109`.
4371/// Used by `parse_context_save` / `parse_context_restore`
4372/// (parse.c:295-355) to snapshot per-parse-call state so a nested
4373/// parse (e.g. inside command substitution) doesn't clobber the
4374/// outer parse.
4375///
4376/// A second port of `struct parse_stack` exists at
4377/// `crate::ported::zsh_h::parse_stack` (zsh.h:1066) using canonical
4378/// Wordcode / Eccstr / `struct heredocs` types — that port is unused
4379/// today and will become authoritative when Phase 9b (PORT_PLAN.md)
4380/// wires wordcode emission. This local version uses the working-set
4381/// shapes (`Vec<HereDoc>`, stubbed wordcode fields) suited to zshrs's
4382/// pre-wordcode AST architecture; the consolidation happens in P9b.
4383#[allow(non_camel_case_types)]
4384#[derive(Debug, Default, Clone)]
4385pub struct parse_stack {
4386 // ── Direct port of struct parse_stack at zsh.h:3099-3109 ──
4387 /// Pending heredocs awaiting body collection (canonical C
4388 /// linked-list shape). C: `struct heredocs *hdocs` (zsh.h:3100).
4389 /// Mirrors `parse::HDOCS` thread_local across nested parses.
4390 pub hdocs: Option<Box<crate::ported::zsh_h::heredocs>>,
4391 /// !!! WARNING: NOT IN PARSE_STACK — Rust-only AST-glue !!!
4392 /// Snapshot of `lex::LEX_HEREDOCS` (the parallel Rust-only Vec
4393 /// carrying terminator / strip_tabs / quoted metadata).
4394 /// Saved/restored alongside the canonical `hdocs` so nested
4395 /// parses get a clean AST view. C's parse_stack has no analog
4396 /// because C tracks terminator metadata implicitly via tokstr.
4397 pub lex_heredocs: Vec<HereDoc>,
4398 /// C: `int incmdpos` (zsh.h:3102).
4399 pub incmdpos: bool,
4400 /// C: `int aliasspaceflag` (zsh.h:3103).
4401 pub aliasspaceflag: i32,
4402 /// C: `int incond` (zsh.h:3104).
4403 pub incond: i32,
4404 /// C: `int inredir` (zsh.h:3105).
4405 pub inredir: bool,
4406 /// C: `int incasepat` (zsh.h:3106).
4407 pub incasepat: i32,
4408 /// C: `int isnewlin` (zsh.h:3107).
4409 pub isnewlin: i32,
4410 /// C: `int infor` (zsh.h:3108).
4411 pub infor: i32,
4412 /// C: `int inrepeat_` (zsh.h:3109).
4413 pub inrepeat_: i32,
4414 /// C: `int intypeset` (zsh.h:3110).
4415 pub intypeset: bool,
4416 // ── Wordcode-buffer state — STUB until Phase 9b ──
4417 // C `Wordcode ecbuf` (zsh.h:3112) + `Eccstr ecstrs` (zsh.h:3113) +
4418 // `int eclen/ecused/ecnpats/ecsoffs/ecssub/ecnfunc` (zsh.h:3112-3114).
4419 // zshrs hasn't emitted wordcode yet — these fields exist to
4420 // preserve the C shape but read/write nothing until P9b lands.
4421 pub eclen: i32,
4422 pub ecused: i32,
4423 pub ecnpats: i32,
4424 pub ecbuf: Option<Vec<u32>>,
4425 pub ecstrs: Option<Vec<u8>>,
4426 pub ecsoffs: i32,
4427 pub ecssub: i32,
4428 pub ecnfunc: i32,
4429}
4430
4431// Old uppercase Rust-only `ParseStack` is gone. Compat alias so
4432// existing call sites (context.rs) keep resolving until the
4433// rename ripples through.
4434/// `ParseStack` type alias.
4435#[allow(non_camel_case_types)]
4436pub type ParseStack = parse_stack;
4437
4438/// `mod_export struct eprog dummy_eprog;` from `Src/parse.c:3066`.
4439/// Placeholder Eprog used by `shf->funcdef = &dummy_eprog;` in
4440/// builtin.c when clearing a stale autoload stub. Held in a Mutex
4441/// so `init_eprog` can set it once at shell startup.
4442pub static DUMMY_EPROG: std::sync::Mutex<eprog> = std::sync::Mutex::new(eprog {
4443 flags: 0,
4444 len: 0,
4445 npats: 0,
4446 nref: 0,
4447 prog: Vec::new(),
4448 strs: None,
4449 pats: Vec::new(),
4450 shf: None,
4451 dump: None,
4452});
4453
4454/// Walk every ZshRedir in the program and, for any with a `heredoc_idx`,
4455/// pull the body+terminator out of `bodies` and stuff into `heredoc`.
4456/// `bodies[i]` corresponds to the i-th heredoc registered by the lexer
4457/// during scanning (in source order).
4458fn fill_heredoc_bodies(prog: &mut ZshProgram, bodies: &[HereDocInfo]) {
4459 for list in &mut prog.lists {
4460 fill_in_sublist(&mut list.sublist, bodies);
4461 }
4462}
4463
4464fn fill_in_sublist(sub: &mut ZshSublist, bodies: &[HereDocInfo]) {
4465 fill_in_pipe(&mut sub.pipe, bodies);
4466 if let Some(next) = &mut sub.next {
4467 fill_in_sublist(&mut next.1, bodies);
4468 }
4469}
4470
4471fn fill_in_pipe(pipe: &mut ZshPipe, bodies: &[HereDocInfo]) {
4472 fill_in_command(&mut pipe.cmd, bodies);
4473 if let Some(next) = &mut pipe.next {
4474 fill_in_pipe(next, bodies);
4475 }
4476}
4477
4478fn fill_in_command(cmd: &mut ZshCommand, bodies: &[HereDocInfo]) {
4479 match cmd {
4480 ZshCommand::Simple(s) => {
4481 for r in &mut s.redirs {
4482 if let Some(idx) = r.heredoc_idx {
4483 if let Some(info) = bodies.get(idx) {
4484 r.heredoc = Some(info.clone());
4485 }
4486 }
4487 }
4488 }
4489 ZshCommand::Subsh(p) | ZshCommand::Cursh(p) => fill_heredoc_bodies(p, bodies),
4490 ZshCommand::FuncDef(f) => fill_heredoc_bodies(&mut f.body, bodies),
4491 ZshCommand::If(i) => {
4492 fill_heredoc_bodies(&mut i.cond, bodies);
4493 fill_heredoc_bodies(&mut i.then, bodies);
4494 for (c, b) in &mut i.elif {
4495 fill_heredoc_bodies(c, bodies);
4496 fill_heredoc_bodies(b, bodies);
4497 }
4498 if let Some(e) = &mut i.else_ {
4499 fill_heredoc_bodies(e, bodies);
4500 }
4501 }
4502 ZshCommand::While(w) | ZshCommand::Until(w) => {
4503 fill_heredoc_bodies(&mut w.cond, bodies);
4504 fill_heredoc_bodies(&mut w.body, bodies);
4505 }
4506 ZshCommand::For(f) => fill_heredoc_bodies(&mut f.body, bodies),
4507 ZshCommand::Case(c) => {
4508 for arm in &mut c.arms {
4509 fill_heredoc_bodies(&mut arm.body, bodies);
4510 }
4511 }
4512 ZshCommand::Repeat(r) => fill_heredoc_bodies(&mut r.body, bodies),
4513 ZshCommand::Time(Some(sublist)) => fill_in_sublist(sublist, bodies),
4514 ZshCommand::Try(t) => {
4515 fill_heredoc_bodies(&mut t.try_block, bodies);
4516 fill_heredoc_bodies(&mut t.always, bodies);
4517 }
4518 ZshCommand::Redirected(inner, redirs) => {
4519 for r in redirs {
4520 if let Some(idx) = r.heredoc_idx {
4521 if let Some(info) = bodies.get(idx) {
4522 r.heredoc = Some(info.clone());
4523 }
4524 }
4525 }
4526 fill_in_command(inner, bodies);
4527 }
4528 ZshCommand::Time(None) | ZshCommand::Cond(_) | ZshCommand::Arith(_) => {}
4529 }
4530}
4531
4532/// If `list` is a Simple containing one word that ends in the
4533/// `<Inpar><Outpar>` token pair (the lexer-port encoding of `()`),
4534/// return the bare name. Used by `parse_program_until` to detect
4535/// `name() {body}` style function definitions where the lexer
4536/// hasn't split the `()` from the name.
4537/// Detect the `name() …` shape inside a Simple. Returns the function
4538/// name and (when the body was already inlined into the same Simple,
4539/// e.g. `foo() echo hi`) the rest of the words as the body's argv.
4540/// Returns None for non-funcdef shapes.
4541fn simple_name_with_inoutpar(list: &ZshList) -> Option<(Vec<String>, Vec<String>)> {
4542 if list.flags.async_ || list.sublist.next.is_some() {
4543 return None;
4544 }
4545 let pipe = &list.sublist.pipe;
4546 if pipe.next.is_some() {
4547 return None;
4548 }
4549 let simple = match &pipe.cmd {
4550 ZshCommand::Simple(s) => s,
4551 _ => return None,
4552 };
4553 if simple.words.is_empty() || !simple.assigns.is_empty() {
4554 return None;
4555 }
4556 let suffix = "\u{88}\u{8a}"; // Inpar + Outpar
4557 // Find the FIRST word ending in `()`. zsh accepts the
4558 // multi-name shorthand `fna fnb fnc() { body }` (parse.c:
4559 // par_funcdef wordlist) — words[0..i-1] are extra names,
4560 // words[i] is `lastname()`. Words after are the body argv
4561 // (one-line shorthand, `name() cmd args`).
4562 let par_idx = simple.words.iter().position(|w| w.ends_with(suffix))?;
4563 let mut names: Vec<String> = Vec::with_capacity(par_idx + 1);
4564 for w in &simple.words[..par_idx] {
4565 // Earlier names must be bare identifiers, NOT contain
4566 // tokens that imply they're not function names (no `()`,
4567 // no quotes, no expansions). zsh's lexer enforces this
4568 // at the wordlist level; we approximate by requiring the
4569 // word be an identifier-shaped token after untokenize.
4570 let bare = super::lex::untokenize(w);
4571 let valid = !bare.is_empty()
4572 && bare
4573 .chars()
4574 .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-' || c == '.' || c == '$');
4575 if !valid {
4576 return None;
4577 }
4578 names.push(bare);
4579 }
4580 let last = &simple.words[par_idx];
4581 let bare = &last[..last.len() - suffix.len()];
4582 if bare.is_empty() {
4583 return None;
4584 }
4585 names.push(super::lex::untokenize(bare));
4586 let rest = simple.words[par_idx + 1..].to_vec();
4587 Some((names, rest))
4588}
4589
4590/// Initialize parser state for a fresh parse of `input`.
4591/// Free-fn entry point — resets parser thread_locals and loads input.
4592pub fn parse_init(input: &str) {
4593 // Seed the option defaults the parser/lexer inspect. Real zsh
4594 // installs these via `install_emulation_defaults` (options.c:172)
4595 // at shell startup; zshrs's parse-only test entry path bypasses
4596 // init_main, so we mirror the `zsh` emulation defaults here.
4597 // Only seeds when unset so a script that explicitly disabled an
4598 // option stays so.
4599 for (name, default) in [
4600 ("shortloops", true),
4601 ("shortrepeat", false),
4602 ("multifuncdef", true),
4603 ("aliasfuncdef", false),
4604 ("ignorebraces", false),
4605 ("cshjunkieloops", false),
4606 ("posixbuiltins", false),
4607 ("execopt", true),
4608 ("kshautoload", false),
4609 ("aliases", true),
4610 ] {
4611 if crate::ported::options::opt_state_get(name).is_none() {
4612 crate::ported::options::opt_state_set(name, default);
4613 }
4614 }
4615 lex_init(input);
4616}
4617
4618/// P9b decoder (wordcode-pipeline variant): direct port of
4619/// `ecgetstr(Estate s, int dup, int *tokflag)` from
4620/// `Src/parse.c:2855-2890`. Reads a wordcode at `pc`, decodes the
4621/// encoded string back to owned String. Returns (string,
4622/// pc_after_consumed). Distinct from the existing `ecgetstr` (which
4623/// takes a separate strs buffer for text.rs) — this variant uses
4624/// the live ECSTRS_REVERSE HashMap populated at ecstrcode time.
4625pub fn ecgetstr_wordcode(buf: &[u32], pc: usize) -> (String, usize) {
4626 if pc >= buf.len() {
4627 return (String::new(), pc);
4628 }
4629 let c = buf[pc];
4630 let next = pc + 1;
4631 // parse.c:2862-2863 — empty-string sentinels.
4632 if c == 6 || c == 7 {
4633 return (String::new(), next);
4634 }
4635 // parse.c:2864-2871 — inline-packed short string.
4636 if (c & 2) != 0 {
4637 let b0 = ((c >> 3) & 0xff) as u8;
4638 let b1 = ((c >> 11) & 0xff) as u8;
4639 let b2 = ((c >> 19) & 0xff) as u8;
4640 let mut bytes: Vec<u8> = Vec::new();
4641 for b in [b0, b1, b2] {
4642 if b == 0 {
4643 break;
4644 }
4645 bytes.push(b);
4646 }
4647 return (String::from_utf8_lossy(&bytes).into_owned(), next);
4648 }
4649 // parse.c:2872-2873 — long string via offs lookup. Map value is
4650 // metafied Vec<u8>; convert back to display String. Unmetafy is
4651 // the caller's job (the wordcode-parity dumper does it; other
4652 // callers may want raw bytes).
4653 let s = ECSTRS_REVERSE
4654 .with_borrow(|m| m.get(&c).cloned())
4655 .map(|v| String::from_utf8_lossy(&v).into_owned())
4656 .unwrap_or_default();
4657 (s, next)
4658}
4659
4660/// Parse the complete input. Direct port of `parse_event` /
4661/// `par_list` from `Src/parse.c:614-720`. On syntax error,
4662/// sets `errflag |= ERRFLAG_ERROR` (via `zerr`) and returns the
4663/// partial program — callers check `errflag` to detect failure,
4664/// matching C's `Eprog parse_event(...)` + `if (errflag) {...}`.
4665pub fn parse() -> ZshProgram {
4666 zshlex();
4667
4668 let mut program = parse_program_until(None);
4669
4670 // Post-pass: wire heredoc bodies (collected by the inline NEWLIN
4671 // walk in zshlex into LEX_HEREDOCS) back into ZshRedir.heredoc
4672 // fields via heredoc_idx. No C analog — LEX_HEREDOCS is the
4673 // Rust-only AST-glue Vec.
4674 let bodies: Vec<HereDocInfo> = LEX_HEREDOCS
4675 .with_borrow(|v| v.clone())
4676 .into_iter()
4677 .map(|h| HereDocInfo {
4678 content: h.content,
4679 terminator: h.terminator,
4680 quoted: h.quoted,
4681 })
4682 .collect();
4683 if !bodies.is_empty() {
4684 fill_heredoc_bodies(&mut program, &bodies);
4685 }
4686
4687 program
4688}
4689
4690/// Wordcode-emission top-level driver. Closest C analog is
4691/// `parse_list(void)` at `Src/parse.c:697-712`: init_parse +
4692/// zshlex + par_list(&c) + bld_eprog. This entry omits init_parse
4693/// and bld_eprog (caller responsibilities) and inlines a guard
4694/// loop around par_list_wordcode for cases where the lexer leaves
4695/// a non-ENDINPUT terminator (LEXERR, missing close-token, etc.).
4696pub fn par_event_wordcode() -> usize {
4697 let start = ECUSED.get() as usize;
4698 // C `parse_list` (parse.c:697-712) calls par_list ONCE — par_list's
4699 // own goto-rec loop handles all SEPER-separated sublists. The
4700 // outer loop here exists for safety against early-return cases
4701 // (LEXERR, missing terminator) but normally par_list_wordcode
4702 // consumes everything in one call.
4703 let mut cmplx: i32 = 0;
4704 while tok() != ENDINPUT && tok() != LEXERR {
4705 par_list_wordcode(&mut cmplx);
4706 match tok() {
4707 SEMI | NEWLIN | AMPER | AMPERBANG | SEPER => {
4708 zshlex();
4709 }
4710 _ => break,
4711 }
4712 }
4713 // parse.c:712 — `ecadd(WCB_END());`
4714 ecadd(WCB_END());
4715 start
4716}
4717
4718/// Port of `par_list(int *cmplx)` from `Src/parse.c:769-803`.
4719/// `list : { SEPER } [ sublist [ { SEPER | AMPER | AMPERBANG } list ] ]`.
4720/// True line-by-line port: takes `cmplx: &mut i32` matching C's
4721/// `int *cmplx` out-parameter, uses stack-local `c` per iteration
4722/// like C (so inner sublist cmplx is independent of outer).
4723pub fn par_list_wordcode(cmplx: &mut i32) {
4724 // c:773 — `int p, lp = -1, c;`
4725 let mut p: usize;
4726 let mut lp: i32 = -1;
4727 let mut c: i32;
4728 loop {
4729 // c:775 `rec:` — c:777-778 `while (tok == SEPER) zshlex();`
4730 while tok() == SEPER {
4731 zshlex();
4732 }
4733 // c:780 — `p = ecadd(0);`
4734 p = ecadd(0);
4735 // c:781 — `c = 0;`
4736 c = 0;
4737 // c:783 — `if (par_sublist(&c)) { ... }`
4738 if par_sublist_wordcode(&mut c) {
4739 // c:784 — `*cmplx |= c;`
4740 *cmplx |= c;
4741 // c:785 — `if (tok == SEPER || tok == AMPER || tok == AMPERBANG)`
4742 let t = tok();
4743 if t == SEPER || t == AMPER || t == AMPERBANG {
4744 // c:786-787 — `if (tok != SEPER) *cmplx = 1;`
4745 if t != SEPER {
4746 *cmplx = 1;
4747 }
4748 // c:788-790 — `set_list_code(p, ..., c);`
4749 let z = if t == SEPER {
4750 Z_SYNC
4751 } else if t == AMPER {
4752 Z_ASYNC
4753 } else {
4754 Z_ASYNC | Z_DISOWN
4755 };
4756 set_list_code(p, z, c != 0);
4757 // c:791 — `incmdpos = 1;`
4758 set_incmdpos(true);
4759 // c:792-794 — `do { zshlex(); } while (tok == SEPER);`
4760 loop {
4761 zshlex();
4762 if tok() != SEPER {
4763 break;
4764 }
4765 }
4766 // c:795 — `lp = p;` c:796 — `goto rec;`
4767 lp = p as i32;
4768 continue;
4769 } else {
4770 // c:798 — `set_list_code(p, (Z_SYNC | Z_END), c);`
4771 set_list_code(p, Z_SYNC | Z_END, c != 0);
4772 }
4773 } else {
4774 // c:800-802 — `ecused--; if (lp >= 0) ecbuf[lp] |= wc_bdata(Z_END);`
4775 ECUSED.set((ECUSED.get() - 1).max(0));
4776 if lp >= 0 {
4777 ECBUF.with_borrow_mut(|b| {
4778 if (lp as usize) < b.len() {
4779 b[lp as usize] |= wc_bdata(Z_END as wordcode);
4780 }
4781 });
4782 }
4783 }
4784 break;
4785 }
4786}
4787
4788/// Port of `par_list1(int *cmplx)` from `Src/parse.c:806-817`.
4789/// Single-sublist variant used by funcdef bodies and the short
4790/// `for`/`while`/`repeat` forms — exactly one sublist with
4791/// `Z_SYNC|Z_END`, no chain.
4792pub fn par_list1_wordcode(cmplx: &mut i32) {
4793 // c:810 — `int p = ecadd(0), c = 0;`
4794 let p = ecadd(0);
4795 let mut c: i32 = 0;
4796 // c:812 — `if (par_sublist(&c)) { ... }`
4797 if par_sublist_wordcode(&mut c) {
4798 // c:813 — `set_list_code(p, (Z_SYNC | Z_END), c);`
4799 set_list_code(p, Z_SYNC | Z_END, c != 0);
4800 // c:814 — `*cmplx |= c;`
4801 *cmplx |= c;
4802 } else {
4803 // c:816 — `ecused--;`
4804 ECUSED.set((ECUSED.get() - 1).max(0));
4805 }
4806}
4807
4808/// Port of `par_save_list(C)` macro from `Src/parse.c:475-480`.
4809/// do { int eu = ecused; par_list(C); if (eu == ecused) ecadd(WCB_END()); } while (0)
4810pub fn par_save_list_wordcode(cmplx: &mut i32) {
4811 let eu = ECUSED.get();
4812 par_list_wordcode(cmplx);
4813 if ECUSED.get() == eu {
4814 ecadd(WCB_END());
4815 }
4816}
4817
4818/// Port of `par_save_list1(C)` macro from `Src/parse.c:481-486`.
4819pub fn par_save_list1_wordcode(cmplx: &mut i32) {
4820 let eu = ECUSED.get();
4821 par_list1_wordcode(cmplx);
4822 if ECUSED.get() == eu {
4823 ecadd(WCB_END());
4824 }
4825}
4826
4827/// Port of `par_sublist(int *cmplx)` from `Src/parse.c:823-865`.
4828/// `sublist : sublist2 [ ( DBAR | DAMPER ) { SEPER } sublist ]`.
4829/// Emits a WCB_SUBLIST header, recurses into par_sublist2 for
4830/// the !/coproc prefix + pipeline, then chains via DBAR (`||`)
4831/// or DAMPER (`&&`) recursively. Returns true if at least one
4832/// pipeline was emitted.
4833pub fn par_sublist_wordcode(cmplx: &mut i32) -> bool {
4834 // c:827 — `int f, p, c = 0;`
4835 let mut c: i32 = 0;
4836 // c:829 — `p = ecadd(0);`
4837 let p = ecadd(0);
4838 // c:831 — `if ((f = par_sublist2(&c)) != -1) { ... }`
4839 match par_sublist2(&mut c) {
4840 Some(f) => {
4841 // c:832 — `int e = ecused;`
4842 let e = ECUSED.get() as usize;
4843 // c:834 — `*cmplx |= c;`
4844 *cmplx |= c;
4845 if tok() == DBAR || tok() == DAMPER {
4846 // c:836 — `enum lextok qtok = tok;`
4847 let qtok = tok();
4848 // c:839 — `cmdpush(tok == DBAR ? CS_CMDOR : CS_CMDAND);`
4849 cmdpush(if qtok == DBAR {
4850 CS_CMDOR as u8
4851 } else {
4852 CS_CMDAND as u8
4853 });
4854 // c:840 — `zshlex();`
4855 zshlex();
4856 // c:841-842 — `while (tok == SEPER) zshlex();`
4857 while tok() == SEPER {
4858 zshlex();
4859 }
4860 // c:843 — `sl = par_sublist(cmplx);`
4861 let sl = par_sublist_wordcode(cmplx);
4862 // c:844-847 — `set_sublist_code(p, (sl ? ... : WC_SUBLIST_END),
4863 // f, (e - 1 - p), c);`
4864 let st = if sl {
4865 if qtok == DBAR {
4866 WC_SUBLIST_OR
4867 } else {
4868 WC_SUBLIST_AND
4869 }
4870 } else {
4871 WC_SUBLIST_END
4872 };
4873 set_sublist_code(p, st as i32, f, (e - 1 - p) as i32, c != 0);
4874 // c:848 — `cmdpop();`
4875 cmdpop();
4876 } else {
4877 // c:850-853 — `if (tok == AMPER || tok == AMPERBANG)
4878 // { c = 1; *cmplx |= c; }`
4879 if tok() == AMPER || tok() == AMPERBANG {
4880 c = 1;
4881 *cmplx |= c;
4882 }
4883 // c:854 — `set_sublist_code(p, WC_SUBLIST_END, f,
4884 // (e - 1 - p), c);`
4885 set_sublist_code(p, WC_SUBLIST_END as i32, f, (e - 1 - p) as i32, c != 0);
4886 }
4887 // c:856 — `return 1;`
4888 true
4889 }
4890 None => {
4891 // c:858-859 — `ecused--; return 0;`
4892 ECUSED.set((ECUSED.get() - 1).max(0));
4893 false
4894 }
4895 }
4896}
4897
4898/// Port of `par_pline(int *cmplx)` from `Src/parse.c:894-955`.
4899/// `pline : cmd [ ( BAR | BARAMP ) { SEPER } pline ]`. Emits a
4900/// WCB_PIPE header (mid for chain links, end for the last cmd)
4901/// plus the optional BARAMP `2>&1` synthetic redir.
4902/// Port of `par_pline(int *cmplx)` from `Src/parse.c:893-947`.
4903/// (Named `par_pipe_wordcode` to disambiguate from the AST
4904/// `par_pline` at parse.rs:3744 — semantically the same `pline`
4905/// production.)
4906pub fn par_pipe_wordcode(cmplx: &mut i32) -> bool {
4907 // c:897 — `zlong line = toklineno;`
4908 let line = toklineno() as i64;
4909 // c:899 — `p = ecadd(0);`
4910 let p = ecadd(0);
4911 // c:901-904 — `if (!par_cmd(cmplx, 0)) { ecused--; return 0; }`
4912 if !par_cmd_wordcode(cmplx, 0) {
4913 ECUSED.set((ECUSED.get() - 1).max(0));
4914 return false;
4915 }
4916 if tok() == BAR_TOK {
4917 // c:906 — `*cmplx = 1;`
4918 *cmplx = 1;
4919 // c:907 — `cmdpush(CS_PIPE);`
4920 cmdpush(CS_PIPE as u8);
4921 // c:908 — `zshlex();`
4922 zshlex();
4923 // c:909-910 — `while (tok == SEPER) zshlex();`
4924 while tok() == SEPER {
4925 zshlex();
4926 }
4927 // c:911 — `ecbuf[p] = WCB_PIPE(WC_PIPE_MID, line>=0 ? line+1 : 0);`
4928 ECBUF.with_borrow_mut(|b| {
4929 if p < b.len() {
4930 b[p] = WCB_PIPE(
4931 WC_PIPE_MID,
4932 if line >= 0 { (line + 1) as wordcode } else { 0 },
4933 );
4934 }
4935 });
4936 // c:912 — `ecispace(p+1, 1);`
4937 ecispace(p + 1, 1);
4938 // c:913 — `ecbuf[p+1] = ecused - 1 - p;`
4939 let used = ECUSED.get() as usize;
4940 ECBUF.with_borrow_mut(|b| {
4941 if p + 1 < b.len() {
4942 b[p + 1] = (used.saturating_sub(1 + p)) as wordcode;
4943 }
4944 });
4945 // c:914-916 — `if (!par_pline(cmplx)) { tok = LEXERR; }`
4946 if !par_pipe_wordcode(cmplx) {
4947 set_tok(LEXERR);
4948 }
4949 // c:917 — `cmdpop();`
4950 cmdpop();
4951 true
4952 } else if tok() == BARAMP {
4953 // c:920-923 — walk past inline WC_REDIR to find r.
4954 let mut r = p + 1;
4955 loop {
4956 let code = ECBUF.with_borrow(|b| b.get(r).copied().unwrap_or(0));
4957 if wc_code(code) != WC_REDIR {
4958 break;
4959 }
4960 r += WC_REDIR_WORDS(code) as usize;
4961 }
4962 // c:925-928 — `ecispace(r, 3);` + synthetic `2>&1` redir
4963 ecispace(r, 3);
4964 ECBUF.with_borrow_mut(|b| {
4965 if r + 2 < b.len() {
4966 b[r] = WCB_REDIR(REDIR_MERGEOUT as wordcode);
4967 b[r + 1] = 2;
4968 b[r + 2] = ecstrcode("1");
4969 }
4970 });
4971 // c:930 — `*cmplx = 1;`
4972 *cmplx = 1;
4973 cmdpush(CS_ERRPIPE as u8);
4974 zshlex();
4975 while tok() == SEPER {
4976 zshlex();
4977 }
4978 ECBUF.with_borrow_mut(|b| {
4979 if p < b.len() {
4980 b[p] = WCB_PIPE(
4981 WC_PIPE_MID,
4982 if line >= 0 { (line + 1) as wordcode } else { 0 },
4983 );
4984 }
4985 });
4986 ecispace(p + 1, 1);
4987 let used = ECUSED.get() as usize;
4988 ECBUF.with_borrow_mut(|b| {
4989 if p + 1 < b.len() {
4990 b[p + 1] = (used.saturating_sub(1 + p)) as wordcode;
4991 }
4992 });
4993 if !par_pipe_wordcode(cmplx) {
4994 set_tok(LEXERR);
4995 }
4996 cmdpop();
4997 true
4998 } else {
4999 // c:944 — `ecbuf[p] = WCB_PIPE(WC_PIPE_END, line>=0 ? line+1 : 0);`
5000 ECBUF.with_borrow_mut(|b| {
5001 if p < b.len() {
5002 b[p] = WCB_PIPE(
5003 WC_PIPE_END,
5004 if line >= 0 { (line + 1) as wordcode } else { 0 },
5005 );
5006 }
5007 });
5008 true
5009 }
5010}
5011
5012/// Port of `par_cmd(int *cmplx, int zsh_construct)` from
5013/// `Src/parse.c:958-1085`. Parses leading + trailing redirs and
5014/// dispatches on the current token to the right par_* builder.
5015/// Returns false only when no command was emitted (no redirs +
5016/// par_simple returned 0).
5017/// Port of `par_cmd(int *cmplx, int zsh_construct)` from
5018/// `Src/parse.c:957-1077`.
5019pub fn par_cmd_wordcode(cmplx: &mut i32, zsh_construct: i32) -> bool {
5020 // c:960 — `int r, nr = 0;`
5021 let mut nr: i32 = 0;
5022 // c:962 — `r = ecused;`
5023 let mut r: usize = ECUSED.get() as usize;
5024 // c:964-968 — leading redirs.
5025 if IS_REDIROP(tok()) {
5026 // c:965 — `*cmplx = 1;`
5027 *cmplx = 1;
5028 // c:966-968 — `while (IS_REDIROP(tok)) { nr += par_redir(&r, NULL); }`
5029 while IS_REDIROP(tok()) {
5030 nr += par_redir_wordcode(&mut r, None);
5031 }
5032 }
5033 // c:970-1066 — token-dispatch switch.
5034 match tok() {
5035 FOR => {
5036 cmdpush(CS_FOR as u8);
5037 par_for_wordcode(cmplx);
5038 cmdpop();
5039 }
5040 FOREACH => {
5041 cmdpush(CS_FOREACH as u8);
5042 par_for_wordcode(cmplx);
5043 cmdpop();
5044 }
5045 SELECT => {
5046 // c:982 — `*cmplx = 1;`
5047 *cmplx = 1;
5048 cmdpush(CS_SELECT as u8);
5049 par_for_wordcode(cmplx);
5050 cmdpop();
5051 }
5052 CASE => {
5053 cmdpush(CS_CASE as u8);
5054 par_case_wordcode(cmplx);
5055 cmdpop();
5056 }
5057 IF => {
5058 par_if_wordcode(cmplx);
5059 }
5060 WHILE => {
5061 cmdpush(CS_WHILE as u8);
5062 par_while_wordcode(cmplx);
5063 cmdpop();
5064 }
5065 UNTIL => {
5066 cmdpush(CS_UNTIL as u8);
5067 par_while_wordcode(cmplx);
5068 cmdpop();
5069 }
5070 REPEAT => {
5071 cmdpush(CS_REPEAT as u8);
5072 par_repeat_wordcode(cmplx);
5073 cmdpop();
5074 }
5075 INPAR_TOK => {
5076 // c:1011 — `*cmplx = 1;`
5077 *cmplx = 1;
5078 cmdpush(CS_SUBSH as u8);
5079 par_subsh_wordcode(cmplx, zsh_construct);
5080 cmdpop();
5081 }
5082 INBRACE_TOK => {
5083 cmdpush(CS_CURSH as u8);
5084 par_subsh_wordcode(cmplx, zsh_construct);
5085 cmdpop();
5086 }
5087 FUNC => {
5088 cmdpush(CS_FUNCDEF as u8);
5089 par_funcdef_wordcode(cmplx);
5090 cmdpop();
5091 }
5092 DINBRACK => {
5093 cmdpush(CS_COND as u8);
5094 par_cond_wordcode();
5095 cmdpop();
5096 }
5097 DINPAR => {
5098 par_arith_wordcode();
5099 }
5100 TIME => {
5101 // c:1037-1050 — `static int inpartime` guard so
5102 // `time time foo` doesn't recurse infinitely.
5103 if !PARSER_INPARTIME.with(|c| c.get()) {
5104 // c:1041 — `*cmplx = 1;`
5105 *cmplx = 1;
5106 PARSER_INPARTIME.with(|c| c.set(true));
5107 par_time_wordcode();
5108 PARSER_INPARTIME.with(|c| c.set(false));
5109 } else {
5110 set_tok(STRING_LEX);
5111 let sr = par_simple_wordcode(cmplx, nr);
5112 if sr == 0 && nr == 0 {
5113 return false;
5114 }
5115 if sr > 1 {
5116 *cmplx = 1;
5117 r += (sr - 1) as usize;
5118 }
5119 }
5120 }
5121 _ => {
5122 // c:1054 — `if (!(sr = par_simple(cmplx, nr)))`
5123 let sr = par_simple_wordcode(cmplx, nr);
5124 if sr == 0 {
5125 if nr == 0 {
5126 return false;
5127 }
5128 } else if sr > 1 {
5129 // c:1060-1061 — `*cmplx = 1; r += sr - 1;`
5130 *cmplx = 1;
5131 r += (sr - 1) as usize;
5132 }
5133 }
5134 }
5135 // c:1067-1071 — trailing redirs.
5136 // c:1067 — `if (IS_REDIROP(tok)) { *cmplx = 1; while (...) (void)par_redir(&r, NULL); }`
5137 if IS_REDIROP(tok()) {
5138 *cmplx = 1;
5139 while IS_REDIROP(tok()) {
5140 let _ = par_redir_wordcode(&mut r, None);
5141 }
5142 }
5143 // c:1072-1075 — `incmdpos=1; incasepat=0; incond=0; intypeset=0;`
5144 set_incmdpos(true);
5145 set_incasepat(0);
5146 set_incond(0);
5147 set_intypeset(false);
5148 let _ = r;
5149 // c:1076 — `return 1;`
5150 true
5151}
5152
5153/// Port of `par_for(int *cmplx)` from `Src/parse.c:1086-1198`.
5154pub fn par_for_wordcode(cmplx: &mut i32) {
5155 // c:1089 — `int oecused = ecused, csh = (tok == FOREACH), p, sel = (tok == SELECT);`
5156 let _oecused = ECUSED.get() as usize;
5157 let csh = tok() == FOREACH;
5158 let sel = tok() == SELECT;
5159 let p: usize;
5160 // c:1090 — `int type;`
5161 let r#type: wordcode;
5162
5163 // c:1092 — `p = ecadd(0);`
5164 p = ecadd(0);
5165
5166 // c:1094 — `incmdpos = 0;`
5167 set_incmdpos(false);
5168 // c:1095 — `infor = tok == FOR ? 2 : 0;`
5169 set_infor(if tok() == FOR { 2 } else { 0 });
5170 // c:1096 — `zshlex();`
5171 zshlex();
5172 // c:1097 — `if (tok == DINPAR) {`
5173 if tok() == DINPAR {
5174 // c:1098 — `zshlex();`
5175 zshlex();
5176 // c:1099-1100 — `if (tok != DINPAR) YYERRORV(oecused);`
5177 if tok() != DINPAR {
5178 zerr("par_for: expected init");
5179 return;
5180 }
5181 // c:1101 — `ecstr(tokstr);`
5182 ecstr(&tokstr().unwrap_or_default());
5183 // c:1102 — `zshlex();`
5184 zshlex();
5185 // c:1103-1104
5186 if tok() != DINPAR {
5187 zerr("par_for: expected cond");
5188 return;
5189 }
5190 // c:1105
5191 ecstr(&tokstr().unwrap_or_default());
5192 // c:1106
5193 zshlex();
5194 // c:1107-1108
5195 if tok() != DOUTPAR {
5196 zerr("par_for: expected ))");
5197 return;
5198 }
5199 // c:1109
5200 ecstr(&tokstr().unwrap_or_default());
5201 // c:1110 — `infor = 0;`
5202 set_infor(0);
5203 // c:1111 — `incmdpos = 1;`
5204 set_incmdpos(true);
5205 // c:1112 — `zshlex();`
5206 zshlex();
5207 // c:1113 — `type = WC_FOR_COND;`
5208 r#type = WC_FOR_COND;
5209 } else {
5210 // c:1115 — `int np = 0, n, posix_in, ona = noaliases, onc = nocorrect;`
5211 let mut np: usize = 0;
5212 let mut n: u32;
5213 let posix_in: bool;
5214 let ona = noaliases();
5215 let onc = nocorrect();
5216 // c:1116 — `infor = 0;`
5217 set_infor(0);
5218 // c:1117-1118 — `if (tok != STRING || !isident(tokstr)) YYERRORV(oecused);`
5219 if tok() != STRING_LEX || !crate::ported::params::isident(&tokstr().unwrap_or_default()) {
5220 zerr("par_for: expected identifier");
5221 return;
5222 }
5223 // c:1119-1120 — `if (!sel) np = ecadd(0);`
5224 if !sel {
5225 np = ecadd(0);
5226 }
5227 // c:1121 — `n = 0;`
5228 n = 0;
5229 // c:1122 — `incmdpos = 1;`
5230 set_incmdpos(true);
5231 // c:1123 — `noaliases = nocorrect = 1;`
5232 set_noaliases(true);
5233 set_nocorrect(1);
5234 // c:1124 — `for (;;) {`
5235 loop {
5236 // c:1125 — `n++;`
5237 n += 1;
5238 // c:1126 — `ecstr(tokstr);`
5239 ecstr(&tokstr().unwrap_or_default());
5240 // c:1127 — `zshlex();`
5241 zshlex();
5242 // c:1128-1129 — `if (tok != STRING || !strcmp(tokstr, "in") || sel) break;`
5243 if tok() != STRING_LEX || tokstr().as_deref() == Some("in") || sel {
5244 break;
5245 }
5246 // c:1130-1135 — `if (!isident(tokstr) || errflag) { ... YYERRORV; }`
5247 if !crate::ported::params::isident(&tokstr().unwrap_or_default())
5248 || (errflag.load(Ordering::Relaxed) & 1) != 0
5249 {
5250 set_noaliases(ona);
5251 set_nocorrect(onc);
5252 zerr("par_for: expected identifier in name list");
5253 return;
5254 }
5255 }
5256 // c:1137-1138 — `noaliases = ona; nocorrect = onc;`
5257 set_noaliases(ona);
5258 set_nocorrect(onc);
5259 // c:1139-1140 — `if (!sel) ecbuf[np] = n;`
5260 if !sel {
5261 ECBUF.with_borrow_mut(|b| {
5262 b[np] = n;
5263 });
5264 }
5265 // c:1141 — `posix_in = isnewlin;`
5266 posix_in = isnewlin() != 0;
5267 // c:1142-1143 — `while (isnewlin) zshlex();`
5268 while isnewlin() != 0 {
5269 zshlex();
5270 }
5271 // c:1144 — `if (tok == STRING && !strcmp(tokstr, "in")) {`
5272 if tok() == STRING_LEX && tokstr().as_deref() == Some("in") {
5273 // c:1145 — `incmdpos = 0;`
5274 set_incmdpos(false);
5275 // c:1146 — `zshlex();`
5276 zshlex();
5277 // c:1147 — `np = ecadd(0);`
5278 np = ecadd(0);
5279 // c:1148 — `n = par_wordlist();`
5280 let n2 = par_wordlist_wordcode();
5281 // c:1149-1150 — `if (tok != SEPER) YYERRORV(oecused);`
5282 if tok() != SEPER {
5283 zerr("par_for: expected separator after `in`");
5284 return;
5285 }
5286 // c:1151 — `ecbuf[np] = n;`
5287 ECBUF.with_borrow_mut(|b| {
5288 b[np] = n2 as wordcode;
5289 });
5290 // c:1152 — `type = (sel ? WC_SELECT_LIST : WC_FOR_LIST);`
5291 r#type = if sel { WC_SELECT_LIST } else { WC_FOR_LIST };
5292 } else if !posix_in && tok() == INPAR_TOK {
5293 // c:1153-1154 — `else if (!posix_in && tok == INPAR)`
5294 // c:1154 — `incmdpos = 0;`
5295 set_incmdpos(false);
5296 // c:1155 — `zshlex();`
5297 zshlex();
5298 // c:1156 — `np = ecadd(0);`
5299 np = ecadd(0);
5300 // c:1157 — `n = par_nl_wordlist();`
5301 let n2 = par_nl_wordlist_wordcode();
5302 // c:1158-1159 — `if (tok != OUTPAR) YYERRORV(oecused);`
5303 if tok() != OUTPAR_TOK {
5304 zerr("par_for: expected `)`");
5305 return;
5306 }
5307 // c:1160 — `ecbuf[np] = n;`
5308 ECBUF.with_borrow_mut(|b| {
5309 b[np] = n2 as wordcode;
5310 });
5311 // c:1161 — `incmdpos = 1;`
5312 set_incmdpos(true);
5313 // c:1162 — `zshlex();`
5314 zshlex();
5315 // c:1163 — `type = (sel ? WC_SELECT_LIST : WC_FOR_LIST);`
5316 r#type = if sel { WC_SELECT_LIST } else { WC_FOR_LIST };
5317 } else {
5318 // c:1165 — `type = (sel ? WC_SELECT_PPARAM : WC_FOR_PPARAM);`
5319 r#type = if sel { WC_SELECT_PPARAM } else { WC_FOR_PPARAM };
5320 }
5321 let _ = np;
5322 }
5323 // c:1167 — `incmdpos = 1;`
5324 set_incmdpos(true);
5325 // c:1168-1169 — `while (tok == SEPER) zshlex();`
5326 while tok() == SEPER {
5327 zshlex();
5328 }
5329 // c:1170-1193 — body dispatch (inline in C, factored here for
5330 // reuse by par_while/par_repeat — same control flow, same calls).
5331 par_loop_body_wordcode(cmplx, csh);
5332 // c:1195-1197 — `ecbuf[p] = (sel ? WCB_SELECT(...) : WCB_FOR(...));`
5333 let used = ECUSED.get() as usize;
5334 let off = used.saturating_sub(1 + p) as wordcode;
5335 ECBUF.with_borrow_mut(|b| {
5336 b[p] = if sel {
5337 WCB_SELECT(r#type, off)
5338 } else {
5339 WCB_FOR(r#type, off)
5340 };
5341 });
5342}
5343
5344/// Port of `par_wordlist(void)` from `Src/parse.c:2361-2371` —
5345/// emits wordcode form. Returns the number of strings emitted.
5346fn par_wordlist_wordcode() -> u32 {
5347 // c:2364 — `int num = 0;`
5348 let mut num: u32 = 0;
5349 // c:2365 — `while (tok == STRING) {`
5350 while tok() == STRING_LEX {
5351 // c:2366 — `ecstr(tokstr);`
5352 ecstr(&tokstr().unwrap_or_default());
5353 // c:2367 — `num++;`
5354 num += 1;
5355 // c:2368 — `zshlex();`
5356 zshlex();
5357 }
5358 // c:2370 — `return num;`
5359 num
5360}
5361
5362/// Port of `par_nl_wordlist(void)` from `Src/parse.c:2378-2390` —
5363/// emits wordcode form. Like par_wordlist but tolerates SEPER
5364/// between words.
5365fn par_nl_wordlist_wordcode() -> u32 {
5366 // c:2381 — `int num = 0;`
5367 let mut num: u32 = 0;
5368 // c:2383 — `while (tok == STRING || tok == SEPER) {`
5369 while tok() == STRING_LEX || tok() == SEPER || tok() == NEWLIN {
5370 // c:2384-2387 — `if (tok != SEPER) { ecstr(tokstr); num++; }`
5371 if tok() == STRING_LEX {
5372 ecstr(&tokstr().unwrap_or_default());
5373 num += 1;
5374 }
5375 // c:2388 — `zshlex();`
5376 zshlex();
5377 }
5378 // c:2390 — `return num;`
5379 num
5380}
5381
5382/// Body dispatch shared by par_for / par_while / par_repeat.
5383/// Direct port of `Src/parse.c:1170-1194`.
5384fn par_loop_body_wordcode(cmplx: &mut i32, csh: bool) {
5385 if tok() == DOLOOP {
5386 zshlex();
5387 // c:1172 — `par_save_list(cmplx);`
5388 par_save_list_wordcode(cmplx);
5389 if tok() != DONE {
5390 zerr("missing `done`");
5391 return;
5392 }
5393 set_incmdpos(false);
5394 zshlex();
5395 } else if tok() == INBRACE_TOK {
5396 zshlex();
5397 // c:1179 — `par_save_list(cmplx);`
5398 par_save_list_wordcode(cmplx);
5399 if tok() != OUTBRACE_TOK {
5400 zerr("missing `}`");
5401 return;
5402 }
5403 set_incmdpos(false);
5404 zshlex();
5405 } else if csh || isset(CSHJUNKIELOOPS) {
5406 // c:1185 — `par_save_list(cmplx);`
5407 par_save_list_wordcode(cmplx);
5408 if tok() != ZEND {
5409 zerr("missing `end`");
5410 return;
5411 }
5412 set_incmdpos(false);
5413 zshlex();
5414 } else if unset(SHORTLOOPS) {
5415 zerr("short loop form requires SHORTLOOPS");
5416 } else {
5417 // c:1193 — `par_save_list1(cmplx);`
5418 par_save_list1_wordcode(cmplx);
5419 }
5420}
5421
5422/// `select` shares par_for body (c:983-985 routes SELECT to par_for).
5423pub fn par_select_wordcode(cmplx: &mut i32) {
5424 par_for_wordcode(cmplx);
5425}
5426
5427/// Port of `par_case(int *cmplx)` from `Src/parse.c:1208-1400`.
5428pub fn par_case_wordcode(_cmplx: &mut i32) {
5429 // c:1211 — `int oecused = ecused, brflag, p, pp, palts, type, nalts;`
5430 let _oecused = ECUSED.get() as usize;
5431 let brflag: bool;
5432 let p: usize;
5433 let mut pp: usize;
5434 let mut palts: usize;
5435 let mut r#type: wordcode;
5436 let mut nalts: u32;
5437 // c:1212 — `int ona, onc;`
5438 let ona: bool;
5439 let onc: i32;
5440
5441 // c:1214 — `p = ecadd(0);`
5442 p = ecadd(0);
5443
5444 // c:1216 — `incmdpos = 0;`
5445 set_incmdpos(false);
5446 // c:1217 — `zshlex();`
5447 zshlex();
5448 // c:1218-1219 — `if (tok != STRING) YYERRORV(oecused);`
5449 if tok() != STRING_LEX {
5450 zerr("par_case: expected scrutinee");
5451 return;
5452 }
5453 // c:1220 — `ecstr(tokstr);`
5454 ecstr(&tokstr().unwrap_or_default());
5455
5456 // c:1222 — `incmdpos = 1;`
5457 set_incmdpos(true);
5458 // c:1223-1224 — `ona = noaliases; onc = nocorrect;`
5459 ona = noaliases();
5460 onc = nocorrect();
5461 // c:1225 — `noaliases = nocorrect = 1;`
5462 set_noaliases(true);
5463 set_nocorrect(1);
5464 // c:1226 — `zshlex();`
5465 zshlex();
5466 // c:1227-1228 — `while (tok == SEPER) zshlex();`
5467 while tok() == SEPER {
5468 zshlex();
5469 }
5470 // c:1229 — `if (!(tok == STRING && !strcmp(tokstr, "in")) && tok != INBRACE)`
5471 if !(tok() == STRING_LEX && tokstr().as_deref() == Some("in")) && tok() != INBRACE_TOK {
5472 // c:1231-1233 — restore noaliases/nocorrect + ERROR
5473 set_noaliases(ona);
5474 set_nocorrect(onc);
5475 zerr("par_case: expected `in` or `{`");
5476 return;
5477 }
5478 // c:1235 — `brflag = (tok == INBRACE);`
5479 brflag = tok() == INBRACE_TOK;
5480 // c:1236 — `incasepat = 1;`
5481 set_incasepat(1);
5482 // c:1237 — `incmdpos = 0;`
5483 set_incmdpos(false);
5484 // c:1238-1239 — `noaliases = ona; nocorrect = onc;`
5485 set_noaliases(ona);
5486 set_nocorrect(onc);
5487 // c:1240 — `zshlex();`
5488 zshlex();
5489
5490 // c:1242 — `for (;;) {`
5491 'arms: loop {
5492 // c:1243 — `char *str;`
5493 let mut str: String;
5494 // c:1244 — `int skip_zshlex;`
5495 let skip_zshlex: bool;
5496
5497 // c:1246-1247 — `while (tok == SEPER) zshlex();`
5498 while tok() == SEPER {
5499 zshlex();
5500 }
5501 // c:1248-1249 — `if (tok == OUTBRACE) break;`
5502 if tok() == OUTBRACE_TOK {
5503 break 'arms;
5504 }
5505 // c:1250-1251 — `if (tok == INPAR) zshlex();`
5506 if tok() == INPAR_TOK {
5507 zshlex();
5508 }
5509 // c:1252-1254 — `if (tok == BAR) { str = ""; skip_zshlex = 1; }`
5510 if tok() == BAR_TOK {
5511 str = String::new();
5512 skip_zshlex = true;
5513 } else {
5514 // c:1256-1257 — `if (tok != STRING) YYERRORV(oecused);`
5515 if tok() != STRING_LEX {
5516 zerr("par_case: expected pattern");
5517 return;
5518 }
5519 // c:1258-1259 — `if (!strcmp(tokstr, "esac")) break;`
5520 if tokstr().as_deref() == Some("esac") {
5521 break 'arms;
5522 }
5523 // c:1260 — `str = dupstring(tokstr);`
5524 str = tokstr().unwrap_or_default();
5525 // c:1261 — `skip_zshlex = 0;`
5526 skip_zshlex = false;
5527 }
5528 // c:1263 — `type = WC_CASE_OR;`
5529 r#type = WC_CASE_OR;
5530 // c:1264-1266 — `pp = ecadd(0); palts = ecadd(0); nalts = 0;`
5531 pp = ecadd(0);
5532 palts = ecadd(0);
5533 nalts = 0;
5534 // c:1300 — `incasepat = -1;`
5535 set_incasepat(-1);
5536 // c:1301 — `incmdpos = 1;`
5537 set_incmdpos(true);
5538 // c:1302-1303 — `if (!skip_zshlex) zshlex();`
5539 if !skip_zshlex {
5540 zshlex();
5541 }
5542 // c:1304 — `for (;;) {`
5543 loop {
5544 // c:1305-1313 — `if (tok == OUTPAR) { ecstr(str);
5545 // ecadd(ecnpats++); nalts++; incasepat = 0;
5546 // incmdpos = 1; zshlex(); break; }`
5547 if tok() == OUTPAR_TOK {
5548 ecstr(&str);
5549 let np = ECNPATS.with(|cc| {
5550 let v = cc.get();
5551 cc.set(v + 1);
5552 v
5553 }) as u32;
5554 ecadd(np);
5555 nalts += 1;
5556 set_incasepat(0);
5557 set_incmdpos(true);
5558 zshlex();
5559 break;
5560 }
5561 // c:1314-1320 — `else if (tok == BAR) { ecstr(str);
5562 // ecadd(ecnpats++); nalts++; incasepat = 1;
5563 // incmdpos = 0; }`
5564 else if tok() == BAR_TOK {
5565 ecstr(&str);
5566 let np = ECNPATS.with(|cc| {
5567 let v = cc.get();
5568 cc.set(v + 1);
5569 v
5570 }) as u32;
5571 ecadd(np);
5572 nalts += 1;
5573 set_incasepat(1);
5574 set_incmdpos(false);
5575 }
5576 // c:1321-1357 — else { ... `(...)` whole-pattern hack
5577 // (Inpar at str[0]); else YYERRORV. Not yet ported —
5578 // err out on unexpected. }
5579 else {
5580 zerr("par_case: expected `)` or `|`");
5581 return;
5582 }
5583
5584 // c:1359 — `zshlex();`
5585 zshlex();
5586 // c:1360-1377 — switch on next tok.
5587 match tok() {
5588 STRING_LEX => {
5589 // c:1361-1365
5590 str = tokstr().unwrap_or_default();
5591 zshlex();
5592 }
5593 OUTPAR_TOK | BAR_TOK => {
5594 // c:1367-1371 — empty string
5595 str = String::new();
5596 }
5597 _ => {
5598 // c:1374-1376 — `YYERRORV(oecused);`
5599 zerr("par_case: expected pattern, `)` or `|`");
5600 return;
5601 }
5602 }
5603 }
5604 // c:1379 — `incasepat = 0;`
5605 set_incasepat(0);
5606 // c:1380 — `par_save_list(cmplx);`
5607 par_save_list_wordcode(_cmplx);
5608 // c:1381-1384 — terminator → arm type
5609 if tok() == SEMIAMP {
5610 r#type = WC_CASE_AND;
5611 } else if tok() == SEMIBAR {
5612 r#type = WC_CASE_TESTAND;
5613 }
5614 // c:1385 — `ecbuf[pp] = WCB_CASE(type, ecused - 1 - pp);`
5615 let used = ECUSED.get() as usize;
5616 ECBUF.with_borrow_mut(|b| {
5617 b[pp] = WCB_CASE(r#type, (used.saturating_sub(1 + pp)) as wordcode);
5618 });
5619 // c:1386 — `ecbuf[palts] = nalts;`
5620 ECBUF.with_borrow_mut(|b| {
5621 b[palts] = nalts;
5622 });
5623 // c:1387-1388 — terminator (ESAC w/o brace OR OUTBRACE w/ brace) → break
5624 if (tok() == ESAC && !brflag) || (tok() == OUTBRACE_TOK && brflag) {
5625 break 'arms;
5626 }
5627 // c:1389-1390 — `if (tok != DSEMI && tok != SEMIAMP && tok != SEMIBAR) YYERRORV;`
5628 if tok() != DSEMI && tok() != SEMIAMP && tok() != SEMIBAR {
5629 zerr("par_case: expected `;;`, `;&`, or `;|`");
5630 return;
5631 }
5632 // c:1391 — `incasepat = 1;`
5633 set_incasepat(1);
5634 // c:1392 — `incmdpos = 0;`
5635 set_incmdpos(false);
5636 // c:1393 — `zshlex();`
5637 zshlex();
5638 }
5639 // c:1395 — `incmdpos = 1;`
5640 set_incmdpos(true);
5641 // c:1396 — `incasepat = 0;`
5642 set_incasepat(0);
5643 // c:1397 — `zshlex();`
5644 zshlex();
5645
5646 // c:1399 — `ecbuf[p] = WCB_CASE(WC_CASE_HEAD, ecused - 1 - p);`
5647 let used = ECUSED.get() as usize;
5648 ECBUF.with_borrow_mut(|b| {
5649 b[p] = WCB_CASE(WC_CASE_HEAD, (used.saturating_sub(1 + p)) as wordcode);
5650 });
5651}
5652
5653/// Port of `par_if(int *cmplx)` from `Src/parse.c:1410-1512`.
5654pub fn par_if_wordcode(cmplx: &mut i32) {
5655 // c:1413 — `int oecused = ecused, p, pp, type, usebrace = 0;`
5656 let _oecused = ECUSED.get() as usize;
5657 let p: usize;
5658 let mut pp: usize = 0;
5659 let mut r#type: wordcode = WC_IF_IF;
5660 let mut usebrace: i32 = 0;
5661 // c:1414 — `enum lextok xtok;`
5662 let mut xtok: lextok;
5663 // c:1415 — `unsigned char nc;`
5664 let nc: u8;
5665 let _ = nc;
5666
5667 // c:1417 — `p = ecadd(0);`
5668 p = ecadd(0);
5669
5670 // c:1419 — `for (;;) {`
5671 loop {
5672 // c:1420 — `xtok = tok;`
5673 xtok = tok();
5674 // c:1421 — `cmdpush(xtok == IF ? CS_IF : CS_ELIF);`
5675 cmdpush(if xtok == IF {
5676 CS_IF as u8
5677 } else {
5678 CS_ELIF as u8
5679 });
5680 // c:1422-1426 — `if (xtok == FI) { incmdpos = 0; zshlex(); break; }`
5681 if xtok == FI {
5682 set_incmdpos(false);
5683 zshlex();
5684 break;
5685 }
5686 // c:1427 — `zshlex();`
5687 zshlex();
5688 // c:1428-1429 — `if (xtok == ELSE) break;`
5689 if xtok == ELSE {
5690 break;
5691 }
5692 // c:1430-1431 — `while (tok == SEPER) zshlex();`
5693 while tok() == SEPER {
5694 zshlex();
5695 }
5696 // c:1432-1435 — `if (!(xtok == IF || xtok == ELIF)) { cmdpop(); YYERRORV; }`
5697 if !(xtok == IF || xtok == ELIF) {
5698 cmdpop();
5699 zerr("par_if: expected `if` or `elif`");
5700 return;
5701 }
5702 // c:1436 — `pp = ecadd(0);`
5703 pp = ecadd(0);
5704 // c:1437 — `type = (xtok == IF ? WC_IF_IF : WC_IF_ELIF);`
5705 r#type = if xtok == IF { WC_IF_IF } else { WC_IF_ELIF };
5706 // c:1438 — `par_save_list(cmplx);` — condition body
5707 par_save_list_wordcode(cmplx);
5708 // c:1439 — `incmdpos = 1;`
5709 set_incmdpos(true);
5710 // c:1440-1443 — `if (tok == ENDINPUT) { cmdpop(); YYERRORV; }`
5711 if tok() == ENDINPUT {
5712 cmdpop();
5713 zerr("par_if: unexpected end-of-input after condition");
5714 return;
5715 }
5716 // c:1444-1445 — `while (tok == SEPER) zshlex();`
5717 while tok() == SEPER {
5718 zshlex();
5719 }
5720 // c:1446 — `xtok = FI;` — pre-set so the post-loop check works
5721 xtok = FI;
5722 // c:1447 — `nc = cmdstack[cmdsp - 1] == CS_IF ? CS_IFTHEN : CS_ELIFTHEN;`
5723 // (Not tracked separately in zshrs cmdstack — derive from cur top
5724 // by reading CMDSTACK; for safety use CS_IFTHEN as default.)
5725 // We don't have a way to read top easily — match by tracking
5726 // whether we just pushed CS_IF or CS_ELIF.
5727 // For wordcode emission this only affects cmdstack debug output;
5728 // not the emitted wordcode. Use CS_IFTHEN.
5729 let nc_local: u8 = CS_IFTHEN as u8;
5730 if tok() == THEN {
5731 // c:1448-1456 — THEN branch
5732 // c:1449 — `usebrace = 0;`
5733 usebrace = 0;
5734 // c:1450 — `cmdpop();`
5735 cmdpop();
5736 // c:1451 — `cmdpush(nc);`
5737 cmdpush(nc_local);
5738 // c:1452 — `zshlex();`
5739 zshlex();
5740 // c:1453 — `par_save_list(cmplx);` — then body
5741 par_save_list_wordcode(cmplx);
5742 // c:1454 — `ecbuf[pp] = WCB_IF(type, ecused - 1 - pp);`
5743 let used = ECUSED.get() as usize;
5744 ECBUF.with_borrow_mut(|b| {
5745 b[pp] = WCB_IF(r#type, (used.saturating_sub(1 + pp)) as wordcode);
5746 });
5747 // c:1455 — `incmdpos = 1;`
5748 set_incmdpos(true);
5749 // c:1456 — `cmdpop();`
5750 cmdpop();
5751 } else if tok() == INBRACE_TOK {
5752 // c:1457-1473 — INBRACE branch
5753 // c:1458 — `usebrace = 1;`
5754 usebrace = 1;
5755 // c:1459 — `cmdpop();`
5756 cmdpop();
5757 // c:1460 — `cmdpush(nc);`
5758 cmdpush(nc_local);
5759 // c:1461 — `zshlex();`
5760 zshlex();
5761 // c:1462 — `par_save_list(cmplx);`
5762 par_save_list_wordcode(cmplx);
5763 // c:1463-1466 — `if (tok != OUTBRACE) { cmdpop(); YYERRORV; }`
5764 if tok() != OUTBRACE_TOK {
5765 cmdpop();
5766 zerr("par_if: expected `}`");
5767 return;
5768 }
5769 // c:1467 — `ecbuf[pp] = WCB_IF(type, ecused - 1 - pp);`
5770 let used = ECUSED.get() as usize;
5771 ECBUF.with_borrow_mut(|b| {
5772 b[pp] = WCB_IF(r#type, (used.saturating_sub(1 + pp)) as wordcode);
5773 });
5774 // c:1469 — `zshlex();`
5775 zshlex();
5776 // c:1470 — `incmdpos = 1;`
5777 set_incmdpos(true);
5778 // c:1471-1472 — `if (tok == SEPER) break;`
5779 if tok() == SEPER {
5780 break;
5781 }
5782 // c:1473 — `cmdpop();`
5783 cmdpop();
5784 } else if unset(SHORTLOOPS) {
5785 // c:1474-1476 — `cmdpop(); YYERRORV;`
5786 cmdpop();
5787 zerr("par_if: short body requires SHORTLOOPS");
5788 return;
5789 } else {
5790 // c:1477-1484 — short loop form
5791 // c:1478 — `cmdpop();`
5792 cmdpop();
5793 // c:1479 — `cmdpush(nc);`
5794 cmdpush(nc_local);
5795 // c:1480 — `par_save_list1(cmplx);`
5796 par_save_list1_wordcode(cmplx);
5797 // c:1481 — `ecbuf[pp] = WCB_IF(type, ecused - 1 - pp);`
5798 let used = ECUSED.get() as usize;
5799 ECBUF.with_borrow_mut(|b| {
5800 b[pp] = WCB_IF(r#type, (used.saturating_sub(1 + pp)) as wordcode);
5801 });
5802 // c:1482 — `incmdpos = 1;`
5803 set_incmdpos(true);
5804 // c:1483 — `break;`
5805 break;
5806 }
5807 }
5808 // c:1486 — `cmdpop();`
5809 cmdpop();
5810 // c:1487 — `if (xtok == ELSE || tok == ELSE) {`
5811 if xtok == ELSE || tok() == ELSE {
5812 // c:1488 — `pp = ecadd(0);`
5813 pp = ecadd(0);
5814 // c:1489 — `cmdpush(CS_ELSE);`
5815 cmdpush(CS_ELSE as u8);
5816 // c:1490-1491 — `while (tok == SEPER) zshlex();`
5817 while tok() == SEPER {
5818 zshlex();
5819 }
5820 // c:1492-1498 — `if (tok == INBRACE && usebrace) { ... } else { ... }`
5821 if tok() == INBRACE_TOK && usebrace != 0 {
5822 // c:1493 — `zshlex();`
5823 zshlex();
5824 // c:1494 — `par_save_list(cmplx);`
5825 par_save_list_wordcode(cmplx);
5826 // c:1495-1498 — `if (tok != OUTBRACE) { cmdpop(); YYERRORV; }`
5827 if tok() != OUTBRACE_TOK {
5828 cmdpop();
5829 zerr("par_if: else expected `}`");
5830 return;
5831 }
5832 } else {
5833 // c:1500 — `par_save_list(cmplx);`
5834 par_save_list_wordcode(cmplx);
5835 // c:1501-1504 — `if (tok != FI) { cmdpop(); YYERRORV; }`
5836 if tok() != FI {
5837 cmdpop();
5838 zerr("par_if: else expected `fi`");
5839 return;
5840 }
5841 }
5842 // c:1506 — `incmdpos = 0;`
5843 set_incmdpos(false);
5844 // c:1507 — `ecbuf[pp] = WCB_IF(WC_IF_ELSE, ecused - 1 - pp);`
5845 let used = ECUSED.get() as usize;
5846 ECBUF.with_borrow_mut(|b| {
5847 b[pp] = WCB_IF(WC_IF_ELSE, (used.saturating_sub(1 + pp)) as wordcode);
5848 });
5849 // c:1508 — `zshlex();`
5850 zshlex();
5851 // c:1509 — `cmdpop();`
5852 cmdpop();
5853 }
5854 // c:1511 — `ecbuf[p] = WCB_IF(WC_IF_HEAD, ecused - 1 - p);`
5855 let used = ECUSED.get() as usize;
5856 ECBUF.with_borrow_mut(|b| {
5857 b[p] = WCB_IF(WC_IF_HEAD, (used.saturating_sub(1 + p)) as wordcode);
5858 });
5859}
5860
5861/// Port of `par_while(int *cmplx)` from `Src/parse.c:1520-1557`.
5862pub fn par_while_wordcode(cmplx: &mut i32) {
5863 // c:1523 — `int oecused = ecused, p;`
5864 let _oecused = ECUSED.get() as usize;
5865 let p: usize;
5866 // c:1524 — `int type = (tok == UNTIL ? WC_WHILE_UNTIL : WC_WHILE_WHILE);`
5867 let r#type: wordcode = if tok() == UNTIL {
5868 WC_WHILE_UNTIL
5869 } else {
5870 WC_WHILE_WHILE
5871 };
5872
5873 // c:1526 — `p = ecadd(0);`
5874 p = ecadd(0);
5875 // c:1527 — `zshlex();`
5876 zshlex();
5877 // c:1528 — `par_save_list(cmplx);` — condition.
5878 par_save_list_wordcode(cmplx);
5879 // c:1529 — `incmdpos = 1;`
5880 set_incmdpos(true);
5881 // c:1530-1531 — `while (tok == SEPER) zshlex();`
5882 while tok() == SEPER {
5883 zshlex();
5884 }
5885 // c:1532-1545 — body dispatch (inlined in C; we factor via
5886 // par_loop_body_wordcode since for/while/repeat share this
5887 // identical block).
5888 if tok() == DOLOOP {
5889 // c:1533 — `zshlex();`
5890 zshlex();
5891 // c:1534 — `par_save_list(cmplx);`
5892 par_save_list_wordcode(cmplx);
5893 // c:1535-1536 — `if (tok != DONE) YYERRORV(oecused);`
5894 if tok() != DONE {
5895 zerr("par_while: expected `done`");
5896 return;
5897 }
5898 // c:1537 — `incmdpos = 0;`
5899 set_incmdpos(false);
5900 // c:1538 — `zshlex();`
5901 zshlex();
5902 } else if tok() == INBRACE_TOK {
5903 // c:1540 — `zshlex();`
5904 zshlex();
5905 // c:1541 — `par_save_list(cmplx);`
5906 par_save_list_wordcode(cmplx);
5907 // c:1542-1543 — `if (tok != OUTBRACE) YYERRORV(oecused);`
5908 if tok() != OUTBRACE_TOK {
5909 zerr("par_while: expected `}`");
5910 return;
5911 }
5912 // c:1544 — `incmdpos = 0;`
5913 set_incmdpos(false);
5914 // c:1545 — `zshlex();`
5915 zshlex();
5916 } else if isset(CSHJUNKIELOOPS) {
5917 // c:1546-1550
5918 par_save_list_wordcode(cmplx);
5919 if tok() != ZEND {
5920 zerr("par_while: expected `end`");
5921 return;
5922 }
5923 zshlex();
5924 } else if unset(SHORTLOOPS) {
5925 // c:1551-1552 — `YYERRORV(oecused);`
5926 zerr("par_while: short body requires SHORTLOOPS");
5927 return;
5928 } else {
5929 // c:1554 — `par_save_list1(cmplx);`
5930 par_save_list1_wordcode(cmplx);
5931 }
5932
5933 // c:1556 — `ecbuf[p] = WCB_WHILE(type, ecused - 1 - p);`
5934 let used = ECUSED.get() as usize;
5935 ECBUF.with_borrow_mut(|b| {
5936 b[p] = WCB_WHILE(r#type, (used.saturating_sub(1 + p)) as wordcode);
5937 });
5938}
5939
5940/// `until` shares par_while body — tok==UNTIL flips the type.
5941pub fn par_until_wordcode(cmplx: &mut i32) {
5942 par_while_wordcode(cmplx);
5943}
5944
5945/// Port of `par_repeat(int *cmplx)` from `Src/parse.c:1564-1606`.
5946pub fn par_repeat_wordcode(cmplx: &mut i32) {
5947 // c:1567 — `/* ### what to do about inrepeat_ here? */`
5948 // c:1568 — `int oecused = ecused, p;`
5949 let _oecused = ECUSED.get() as usize;
5950 let p: usize;
5951
5952 // c:1570 — `p = ecadd(0);`
5953 p = ecadd(0);
5954
5955 // c:1572 — `incmdpos = 0;`
5956 set_incmdpos(false);
5957 // c:1573 — `zshlex();`
5958 zshlex();
5959 // c:1574-1575 — `if (tok != STRING) YYERRORV(oecused);`
5960 if tok() != STRING_LEX {
5961 zerr("par_repeat: expected count");
5962 return;
5963 }
5964 // c:1576 — `ecstr(tokstr);`
5965 ecstr(&tokstr().unwrap_or_default());
5966 // c:1577 — `incmdpos = 1;`
5967 set_incmdpos(true);
5968 // c:1578 — `zshlex();`
5969 zshlex();
5970 // c:1579-1580 — `while (tok == SEPER) zshlex();`
5971 while tok() == SEPER {
5972 zshlex();
5973 }
5974 // c:1581-1604 — body dispatch (inlined here matching C exactly).
5975 if tok() == DOLOOP {
5976 // c:1582-1587
5977 zshlex();
5978 par_save_list_wordcode(cmplx);
5979 if tok() != DONE {
5980 zerr("par_repeat: expected `done`");
5981 return;
5982 }
5983 set_incmdpos(false);
5984 zshlex();
5985 } else if tok() == INBRACE_TOK {
5986 // c:1589-1594
5987 zshlex();
5988 par_save_list_wordcode(cmplx);
5989 if tok() != OUTBRACE_TOK {
5990 zerr("par_repeat: expected `}`");
5991 return;
5992 }
5993 set_incmdpos(false);
5994 zshlex();
5995 } else if isset(CSHJUNKIELOOPS) {
5996 // c:1596-1599
5997 par_save_list_wordcode(cmplx);
5998 if tok() != ZEND {
5999 zerr("par_repeat: expected `end`");
6000 return;
6001 }
6002 zshlex();
6003 } else if unset(SHORTLOOPS) && unset(SHORTREPEAT) {
6004 // c:1601-1602 — par_repeat needs BOTH SHORTLOOPS and SHORTREPEAT
6005 // unset to refuse short form (more permissive than par_while).
6006 zerr("par_repeat: short body requires SHORTLOOPS or SHORTREPEAT");
6007 return;
6008 } else {
6009 // c:1604 — `par_save_list1(cmplx);`
6010 par_save_list1_wordcode(cmplx);
6011 }
6012
6013 // c:1606 — `ecbuf[p] = WCB_REPEAT(ecused - 1 - p);`
6014 let used = ECUSED.get() as usize;
6015 ECBUF.with_borrow_mut(|b| {
6016 b[p] = WCB_REPEAT((used.saturating_sub(1 + p)) as wordcode);
6017 });
6018}
6019
6020/// Port of `par_funcdef(int *cmplx)` from `Src/parse.c:1672-1779`.
6021///
6022/// The `function NAME { ... }` form. Emits a WCB_FUNCDEF header
6023/// followed by a names-count slot, the names themselves, four
6024/// metadata slots (string-area start, string-area length, npats,
6025/// do_tracing), then the body wordcode, then WCB_END.
6026///
6027/// Critical: saves/resets `ecnpats` + `ecssub` + `ecsoffs` around
6028/// the body parse so per-function pattern counts don't leak into
6029/// the enclosing scope's `ecnpats` accumulator (parse.c:1723-1758).
6030pub fn par_funcdef_wordcode(cmplx: &mut i32) {
6031 // c:1674 — `int oecused = ecused, num = 0, onp, p, c = 0;`
6032 let _oecused = ECUSED.get() as usize;
6033 let mut num: i32 = 0;
6034 let onp: i32;
6035 let p: usize;
6036 let mut c: i32 = 0;
6037 // c:1675 — `int so, oecssub = ecssub;`
6038 let so: i32;
6039 let oecssub = ECSSUB.get();
6040 // c:1676 — `zlong oldlineno = lineno;`
6041 let oldlineno = lineno();
6042 // c:1677 — `int do_tracing = 0;`
6043 let mut do_tracing: i32 = 0;
6044
6045 // c:1679 — `lineno = 0;`
6046 set_lineno(0);
6047 // c:1680 — `nocorrect = 1;`
6048 set_nocorrect(1);
6049 // c:1681 — `incmdpos = 0;`
6050 set_incmdpos(false);
6051 // c:1682 — `zshlex();`
6052 zshlex();
6053
6054 // c:1684 — `p = ecadd(0);`
6055 p = ecadd(0);
6056 // c:1685 — `ecadd(0); /* p + 1 */`
6057 let p1 = ecadd(0);
6058
6059 // c:1687-1699 — `Consume an initial (-T), (--), or (-T --).`
6060 // c:1690 — `if (tok == STRING && tokstr[0] == Dash) {`
6061 if tok() == STRING_LEX {
6062 let s = tokstr().unwrap_or_default();
6063 let bytes = s.as_bytes();
6064 // C: `tokstr[0] == Dash` (Dash = 0x9b = 0xc2 0x9b in UTF-8).
6065 // First byte of UTF-8 `\u{9b}` is 0xc2; the char `'-'` is 0x2d.
6066 // Match either form.
6067 let first_is_dash = (bytes.len() >= 2 && bytes[0] == 0xc2 && bytes[1] == 0x9b)
6068 || (bytes.len() >= 1 && bytes[0] == b'-');
6069 if first_is_dash {
6070 // c:1691-1694 — `if (tokstr[1] == 'T' && !tokstr[2]) { ++do_tracing; zshlex(); }`
6071 // After the leading dash byte(s), check remaining bytes.
6072 let after_dash = if bytes.len() >= 2 && bytes[0] == 0xc2 && bytes[1] == 0x9b {
6073 &bytes[2..]
6074 } else {
6075 &bytes[1..]
6076 };
6077 if after_dash.len() == 1 && after_dash[0] == b'T' {
6078 do_tracing += 1;
6079 zshlex();
6080 }
6081 // c:1695-1698 — `if (tok == STRING && tokstr[0] == Dash &&
6082 // tokstr[1] == Dash && !tokstr[2]) zshlex();`
6083 if tok() == STRING_LEX {
6084 let s2 = tokstr().unwrap_or_default();
6085 let b2 = s2.as_bytes();
6086 let mut idx = 0;
6087 let mut dashes = 0;
6088 while idx < b2.len() && dashes < 2 {
6089 if b2[idx] == 0xc2 && idx + 1 < b2.len() && b2[idx + 1] == 0x9b {
6090 idx += 2;
6091 dashes += 1;
6092 } else if b2[idx] == b'-' {
6093 idx += 1;
6094 dashes += 1;
6095 } else {
6096 break;
6097 }
6098 }
6099 if dashes == 2 && idx == b2.len() {
6100 zshlex();
6101 }
6102 }
6103 }
6104 }
6105
6106 // c:1701-1709 — names loop.
6107 // `while (tok == STRING) { if ((*tokstr == Inbrace || *tokstr == '{')
6108 // && !tokstr[1]) { tok = INBRACE; break; } ecstr(tokstr); num++; zshlex(); }`
6109 while tok() == STRING_LEX {
6110 let s = tokstr().unwrap_or_default();
6111 let bytes = s.as_bytes();
6112 // First byte tests for Inbrace marker (0x8f → UTF-8 `0xc2 0x8f`) or `{`,
6113 // and length-1 check (`!tokstr[1]`).
6114 let is_inbrace_only = (bytes.len() == 1 && bytes[0] == b'{')
6115 || (bytes.len() == 2 && bytes[0] == 0xc2 && bytes[1] == 0x8f);
6116 if is_inbrace_only {
6117 set_tok(INBRACE_TOK);
6118 break;
6119 }
6120 ecstr(&s);
6121 num += 1;
6122 zshlex();
6123 }
6124
6125 // c:1711-1714 — four metadata placeholder slots.
6126 let m2 = ecadd(0);
6127 let m3 = ecadd(0);
6128 let m4 = ecadd(0);
6129 let m5 = ecadd(0);
6130
6131 // c:1716 — `nocorrect = 0;`
6132 set_nocorrect(0);
6133 // c:1717 — `incmdpos = 1;`
6134 set_incmdpos(true);
6135 // c:1718-1719 — `if (tok == INOUTPAR) zshlex();`
6136 if tok() == INOUTPAR {
6137 zshlex();
6138 }
6139 // c:1720-1721 — `while (tok == SEPER) zshlex();`
6140 while tok() == SEPER {
6141 zshlex();
6142 }
6143
6144 // c:1723 — `ecnfunc++;`
6145 ECNFUNC.set(ECNFUNC.get() + 1);
6146 // c:1724 — `ecssub = so = ecsoffs;`
6147 so = ECSOFFS.get();
6148 ECSSUB.set(so);
6149 // c:1725 — `onp = ecnpats;`
6150 onp = ECNPATS.with(|cc| cc.get());
6151 // c:1726 — `ecnpats = 0;`
6152 ECNPATS.with(|cc| cc.set(0));
6153
6154 // c:1728 — `if (tok == INBRACE) {`
6155 if tok() == INBRACE_TOK {
6156 // c:1729 — `zshlex();`
6157 zshlex();
6158 // c:1730 — `par_list(&c);`
6159 par_list_wordcode(&mut c);
6160 // c:1731-1736 — `if (tok != OUTBRACE) { lineno += oldlineno; ... }`
6161 if tok() != OUTBRACE_TOK {
6162 set_lineno(lineno() + oldlineno);
6163 ECNPATS.with(|cc| cc.set(onp));
6164 ECSSUB.set(oecssub);
6165 zerr("par_funcdef: expected `}`");
6166 return;
6167 }
6168 // c:1737-1740 — `if (num == 0) { incmdpos = 0; }`
6169 if num == 0 {
6170 set_incmdpos(false);
6171 }
6172 // c:1741 — `zshlex();`
6173 zshlex();
6174 } else if unset(SHORTLOOPS) {
6175 // c:1742-1746 — `lineno += oldlineno; ecnpats = onp; ecssub = oecssub; YYERRORV`
6176 set_lineno(lineno() + oldlineno);
6177 ECNPATS.with(|cc| cc.set(onp));
6178 ECSSUB.set(oecssub);
6179 zerr("par_funcdef: short body requires SHORTLOOPS");
6180 return;
6181 } else {
6182 // c:1748 — `par_list1(&c);`
6183 par_list1_wordcode(&mut c);
6184 }
6185
6186 // c:1750 — `ecadd(WCB_END());`
6187 ecadd(WCB_END());
6188 // c:1751-1754 — fill the 4 metadata slots
6189 let cur_sofs = ECSOFFS.get();
6190 let body_npats = ECNPATS.with(|cc| cc.get());
6191 ECBUF.with_borrow_mut(|b| {
6192 b[m2] = (so - oecssub) as wordcode;
6193 b[m3] = (cur_sofs - so) as wordcode;
6194 b[m4] = body_npats as wordcode;
6195 b[m5] = do_tracing as wordcode;
6196 });
6197 // c:1755 — `ecbuf[p + 1] = num;`
6198 ECBUF.with_borrow_mut(|b| {
6199 b[p1] = num as wordcode;
6200 });
6201
6202 // c:1757 — `ecnpats = onp;`
6203 ECNPATS.with(|cc| cc.set(onp));
6204 // c:1758 — `ecssub = oecssub;`
6205 ECSSUB.set(oecssub);
6206 // c:1759 — `ecnfunc++;`
6207 ECNFUNC.set(ECNFUNC.get() + 1);
6208
6209 // c:1761 — `ecbuf[p] = WCB_FUNCDEF(ecused - 1 - p);`
6210 let used = ECUSED.get() as usize;
6211 ECBUF.with_borrow_mut(|b| {
6212 b[p] = WCB_FUNCDEF((used.saturating_sub(1 + p)) as wordcode);
6213 });
6214
6215 // c:1763-1777 — anonymous-function trailing args (num == 0 case).
6216 if num == 0 {
6217 // c:1766 — `int parg = ecadd(0);`
6218 let parg = ecadd(0);
6219 // c:1767 — `ecadd(0);`
6220 ecadd(0);
6221 // c:1768-1772 — `while (tok == STRING) { ecstr(tokstr); num++; zshlex(); }`
6222 while tok() == STRING_LEX {
6223 ecstr(&tokstr().unwrap_or_default());
6224 num += 1;
6225 zshlex();
6226 }
6227 // c:1773-1774 — `if (num > 0) *cmplx = 1;`
6228 if num > 0 {
6229 *cmplx = 1;
6230 }
6231 // c:1775 — `ecbuf[parg] = ecused - parg;`
6232 // c:1776 — `ecbuf[parg+1] = num;`
6233 let used2 = ECUSED.get() as usize;
6234 ECBUF.with_borrow_mut(|b| {
6235 b[parg] = (used2 - parg) as wordcode;
6236 b[parg + 1] = num as wordcode;
6237 });
6238 }
6239 // c:1778 — `lineno += oldlineno;`
6240 set_lineno(lineno() + oldlineno);
6241}
6242
6243/// Size of `struct fdhead` in `wordcode` (u32) units. Used by all
6244/// the header-walk macros below.
6245pub const FDHEAD_WORDS: usize = size_of::<fdhead>() / 4;
6246
6247/// `Src/parse.c:1619-1665`. Handles both `(...)` subshell and
6248/// `{...}` brace group (cursh) plus optional `always { ... }`
6249/// trailing block. C uses a single function with `zsh_construct=1`
6250/// for `{...}` and 0 for `(...)`.
6251pub fn par_subsh_wordcode(cmplx: &mut i32, zsh_construct: i32) {
6252 // c:1621 — `enum lextok otok = tok;`
6253 let otok = tok();
6254 // c:1622 — `int oecused = ecused, p, pp;`
6255 let _oecused = ECUSED.get() as usize;
6256 let p: usize;
6257 let pp: usize;
6258
6259 // c:1624 — `p = ecadd(0);`
6260 p = ecadd(0);
6261 // c:1625 — `/* Extra word only needed for always block */`
6262 // c:1626 — `pp = ecadd(0);`
6263 pp = ecadd(0);
6264 // c:1627 — `zshlex();`
6265 zshlex();
6266 // c:1628 — `par_list(cmplx);`
6267 par_list_wordcode(cmplx);
6268 // c:1629 — `ecadd(WCB_END());`
6269 ecadd(WCB_END());
6270 // c:1630-1631 — `if (tok != ((otok == INPAR) ? OUTPAR : OUTBRACE))
6271 // YYERRORV(oecused);`
6272 if tok()
6273 != (if otok == INPAR_TOK {
6274 OUTPAR_TOK
6275 } else {
6276 OUTBRACE_TOK
6277 })
6278 {
6279 zerr("par_subsh: missing closing token");
6280 return;
6281 }
6282 // c:1632 — `incmdpos = !zsh_construct;`
6283 set_incmdpos(zsh_construct == 0);
6284 // c:1633 — `zshlex();`
6285 zshlex();
6286
6287 // c:1635 — `/* Optional always block. No intervening SEPERs allowed. */`
6288 // c:1636 — `if (otok == INBRACE && tok == STRING && !strcmp(tokstr, "always")) {`
6289 if otok == INBRACE_TOK && tok() == STRING_LEX && tokstr().as_deref() == Some("always") {
6290 // c:1637 — `ecbuf[pp] = WCB_TRY(ecused - 1 - pp);`
6291 let used = ECUSED.get() as usize;
6292 ECBUF.with_borrow_mut(|b| {
6293 b[pp] = WCB_TRY((used.saturating_sub(1 + pp)) as wordcode);
6294 });
6295 // c:1638 — `incmdpos = 1;`
6296 set_incmdpos(true);
6297 // c:1639-1641 — `do { zshlex(); } while (tok == SEPER);`
6298 loop {
6299 zshlex();
6300 if tok() != SEPER {
6301 break;
6302 }
6303 }
6304
6305 // c:1643-1644 — `if (tok != INBRACE) YYERRORV(oecused);`
6306 if tok() != INBRACE_TOK {
6307 zerr("par_subsh: 'always' expects `{`");
6308 return;
6309 }
6310 // c:1645 — `cmdpop();`
6311 cmdpop();
6312 // c:1646 — `cmdpush(CS_ALWAYS);`
6313 cmdpush(CS_ALWAYS as u8);
6314
6315 // c:1648 — `zshlex();`
6316 zshlex();
6317 // c:1649 — `par_save_list(cmplx);`
6318 par_save_list_wordcode(cmplx);
6319 // c:1650-1651 — `while (tok == SEPER) zshlex();`
6320 while tok() == SEPER {
6321 zshlex();
6322 }
6323
6324 // c:1653 — `incmdpos = 1;`
6325 set_incmdpos(true);
6326
6327 // c:1655-1656 — `if (tok != OUTBRACE) YYERRORV(oecused);`
6328 if tok() != OUTBRACE_TOK {
6329 zerr("par_subsh: 'always' block missing `}`");
6330 return;
6331 }
6332 // c:1657 — `zshlex();`
6333 zshlex();
6334 // c:1658 — `ecbuf[p] = WCB_TRY(ecused - 1 - p);`
6335 let used = ECUSED.get() as usize;
6336 ECBUF.with_borrow_mut(|b| {
6337 b[p] = WCB_TRY((used.saturating_sub(1 + p)) as wordcode);
6338 });
6339 } else {
6340 // c:1660-1661 — `ecbuf[p] = (otok == INPAR ? WCB_SUBSH(...) : WCB_CURSH(...));`
6341 let used = ECUSED.get() as usize;
6342 let off = used.saturating_sub(1 + p);
6343 ECBUF.with_borrow_mut(|b| {
6344 b[p] = if otok == INPAR_TOK {
6345 WCB_SUBSH(off as wordcode)
6346 } else {
6347 WCB_CURSH(off as wordcode)
6348 };
6349 });
6350 }
6351}
6352
6353/// Port of `par_time(void)` from `Src/parse.c:1787`. `time PIPE`
6354/// emits WCB_TIMED(WC_TIMED_PIPE) + the sublist code; bare `time`
6355/// with no pipeline emits WCB_TIMED(WC_TIMED_EMPTY).
6356pub fn par_time_wordcode() {
6357 // c:1791 — `zshlex();`
6358 zshlex();
6359 // c:1793-1794 — `p = ecadd(0); ecadd(0);`
6360 let p = ecadd(0);
6361 ecadd(0);
6362 // c:1795 — `if ((f = par_sublist2(&c)) < 0)`
6363 let mut c = 0i32;
6364 let f = par_sublist2(&mut c);
6365 match f {
6366 Some(flags) => {
6367 // c:1799 — `ecbuf[p] = WCB_TIMED(WC_TIMED_PIPE);`
6368 ECBUF.with_borrow_mut(|b| {
6369 if p < b.len() {
6370 b[p] = WCB_TIMED(WC_TIMED_PIPE);
6371 }
6372 });
6373 // c:1800 — `set_sublist_code(p+1, WC_SUBLIST_END, f,
6374 // ecused-2-p, c);`
6375 let used = ECUSED.get() as usize;
6376 let skip = used.saturating_sub(2 + p) as i32;
6377 set_sublist_code(p + 1, WC_SUBLIST_END as i32, flags, skip, c != 0);
6378 }
6379 None => {
6380 // c:1796-1798 — `ecused--; ecbuf[p] = WCB_TIMED(WC_TIMED_EMPTY);`
6381 ECUSED.set((ECUSED.get() - 1).max(0));
6382 ECBUF.with_borrow_mut(|b| {
6383 if p < b.len() {
6384 b[p] = WCB_TIMED(WC_TIMED_EMPTY);
6385 }
6386 });
6387 }
6388 }
6389}
6390
6391/// Port of `par_dinbrack(void)` from `Src/parse.c:1810`. Wraps
6392/// `par_cond` (the cond-expression emitter at parse.c:2409) with
6393/// the `[[ ... ]]` framing: incond/incmdpos toggles + DOUTBRACK
6394/// expectation.
6395pub fn par_cond_wordcode() {
6396 let oecused = ECUSED.get();
6397 // c:1814 — `incond = 1;`
6398 set_incond(1);
6399 // c:1815 — `incmdpos = 0;`
6400 set_incmdpos(false);
6401 // c:1816 — `zshlex();` past `[[`.
6402 zshlex();
6403 // c:1817 — `par_cond();` — call the no-skip cond-expression
6404 // entry that EMITS WORDCODE (par_cond_top → par_cond_1 →
6405 // par_cond_2 → par_cond_double/triple/multi). NOT the AST
6406 // `par_cond` at parse.rs:4644 which is a misnamed `par_dinbrack`
6407 // that skips `[[` AND `]]` and returns a ZshCommand AST node
6408 // instead of pushing WC_COND opcodes. NOT `parse_cond_expr`
6409 // either — that's also AST-only, returning ZshCond. With
6410 // `parse_cond_expr` here, every `[[ ... ]]` test produced ZERO
6411 // wordcode payload and parity dropped ~148 words on /etc/zshrc.
6412 let _ = par_cond_top();
6413 // c:1818-1819 — `if (tok != DOUTBRACK) YYERRORV(oecused);`
6414 if tok() != DOUTBRACK {
6415 let _ = oecused;
6416 zerr("missing ]]");
6417 return;
6418 }
6419 // c:1820 — `incond = 0;`
6420 set_incond(0);
6421 // c:1821 — `incmdpos = 1;`
6422 set_incmdpos(true);
6423 // c:1822 — `zshlex();` past `]]`.
6424 zshlex();
6425}
6426
6427/// Port of the `case DINPAR:` arm of `par_cmd` from
6428/// `Src/parse.c:1031-1034`:
6429/// ```c
6430/// ecadd(WCB_ARITH());
6431/// ecstr(tokstr);
6432/// zshlex();
6433/// ```
6434/// `(( EXPR ))` arithmetic at command position — emits the ARITH
6435/// opcode followed by the interned EXPR string, then advances past
6436/// the DINPAR token (which already carries the body text).
6437pub fn par_arith_wordcode() {
6438 // c:1032 — `ecadd(WCB_ARITH());`
6439 ecadd(WCB_ARITH());
6440 // c:1033 — `ecstr(tokstr);` — interns the expression string and
6441 // appends its strcode index to the wordcode buffer.
6442 let expr = tokstr().unwrap_or_default();
6443 ecstr(&expr);
6444 // c:1034 — `zshlex();`
6445 zshlex();
6446}
6447
6448/// Port of `par_simple(int *cmplx, int nr)` from
6449/// `Src/parse.c:1836-2227`. Emits WC_SIMPLE + word count +
6450/// interned string offsets. Returns `0` when nothing was emitted,
6451/// otherwise `1 + (number of code words consumed by redirections)`.
6452/// The full C body handles assignments (ENVSTRING/ENVARRAY),
6453/// inline `{var}>file` brace-FDs, prefix modifiers (NOCORRECT etc),
6454/// and `name() { body }` funcdef detection — those paths are
6455/// progressively wired into the AST parser; this wordcode-emitter
6456/// covers the simple `cmd args...` case + interleaved redirs.
6457pub fn par_simple_wordcode(cmplx: &mut i32, mut nr: i32) -> i32 {
6458 // c:1838-1841 — `int oecused = ecused, isnull = 1, r, argc = 0,
6459 // p, isfunc = 0, sr = 0;`
6460 // `int c = *cmplx, nrediradd, assignments = 0, ppost = 0,
6461 // is_typeset = 0;`
6462 // c is the SAVED initial cmplx so INOUTPAR can restore via
6463 // `*cmplx = c;` at c:2070.
6464 let _oecused = ECUSED.get() as usize;
6465 let c_saved = *cmplx;
6466 let mut isnull = true;
6467 let mut argc: u32 = 0;
6468 let mut sr: i32 = 0;
6469 let mut assignments = false;
6470 let mut isfunc = false;
6471
6472 // c:1843 — `r = ecused;` — saves the offset where redirs get
6473 // INSERTED (via ecispace). Each redir shifts later words DOWN
6474 // by ncodes, so the SIMPLE placeholder at `p` (set later) must
6475 // also bump by ncodes when a redir lands. C uses `&r` to pass
6476 // the cursor by reference; Rust uses a mutable local + manual
6477 // bumps after each par_redir_wordcode call.
6478 let mut r: usize = ECUSED.get() as usize;
6479
6480 // c:1844-1919 — pre-cmd loop: NOCORRECT, ENVSTRING (scalar
6481 // assigns), ENVARRAY (array assigns), IS_REDIROP. Loops until
6482 // a non-assignment token is seen.
6483 loop {
6484 match tok() {
6485 NOCORRECT => {
6486 // c:1846-1849
6487 *cmplx = 1;
6488 set_nocorrect(1);
6489 }
6490 ENVSTRING => {
6491 // c:1848-1898 — scalar assignment `name=value` or
6492 // `name+=value`. Emits WCB_ASSIGN(SCALAR, NEW|INC, 0)
6493 // followed by ecstr(name), ecstr(value).
6494 let raw = tokstr().unwrap_or_default();
6495 // Find first of Inbrack / '=' / '+' (the C scan at
6496 // c:1851-1853). Inside Inbrack we skipparens — i.e.
6497 // skip `name[...]` index, then continue.
6498 // c:1851-1853 — `for (ptr = tokstr; *ptr && *ptr != Inbrack
6499 // && *ptr != '=' && *ptr != '+'; ptr++); if (*ptr == Inbrack)
6500 // skipparens(Inbrack, Outbrack, &ptr);`. Walk to the first
6501 // `[`/`=`/`+`/Equals-token, then if we landed on `[`, skip
6502 // the balanced `name[index]` pair via skipparens.
6503 let bytes: Vec<char> = raw.chars().collect();
6504 let raw_str: String = bytes.iter().collect();
6505 let mut idx = 0usize;
6506 while idx < bytes.len() {
6507 let ch = bytes[idx];
6508 if ch == '\u{91}' /* Inbrack */
6509 || ch == '=' || ch == '+' || ch == '\u{8d}'
6510 /* Equals */
6511 {
6512 break;
6513 }
6514 idx += 1;
6515 }
6516 if idx < bytes.len() && bytes[idx] == '\u{91}'
6517 /* Inbrack */
6518 {
6519 // c:1855 — `skipparens(Inbrack, Outbrack, &ptr);`.
6520 let byte_off: usize = bytes[..idx].iter().map(|c| c.len_utf8()).sum();
6521 let mut cursor: &str = &raw_str[byte_off..];
6522 let _ = crate::ported::utils::skipparens('\u{91}', '\u{92}', &mut cursor);
6523 let consumed = raw_str.len() - byte_off - cursor.len();
6524 let advance_chars = raw_str[byte_off..byte_off + consumed].chars().count();
6525 idx += advance_chars;
6526 // Continue scanning for `=` / `+` after the `]`.
6527 while idx < bytes.len() {
6528 let ch = bytes[idx];
6529 if ch == '=' || ch == '+' || ch == '\u{8d}' {
6530 break;
6531 }
6532 idx += 1;
6533 }
6534 }
6535 let is_inc = idx < bytes.len() && bytes[idx] == '+';
6536 // c:1856-1858 — `if (*ptr == '+') { *ptr++ = '\0';
6537 // ecadd(WCB_ASSIGN(SCALAR, INC, 0)); } else WCB_NEW`
6538 // C nulls the `+` AT THAT POSITION then advances ptr.
6539 // `name` is bytes BEFORE the `+`, NOT including it.
6540 let name_end = idx;
6541 if is_inc {
6542 idx += 1;
6543 }
6544 let flag = if is_inc { WC_ASSIGN_INC } else { WC_ASSIGN_NEW };
6545 ecadd(WCB_ASSIGN(WC_ASSIGN_SCALAR, flag, 0));
6546 // c:1860 — `if (*ptr == '=') { *ptr = '\0'; str = ptr + 1; }
6547 // else equalsplit(tokstr, &str);`
6548 let name: String = bytes[..name_end].iter().collect();
6549 let str_off = if idx < bytes.len() && (bytes[idx] == '=' || bytes[idx] == '\u{8d}')
6550 {
6551 idx + 1
6552 } else {
6553 idx
6554 };
6555 let value: String = bytes[str_off..].iter().collect();
6556 // c:1866-1877 — scan value for `=(`/`<(`/`>(` (proc
6557 // subst); if found, bump cmplx (suppresses Z_SIMPLE).
6558 let vbytes: Vec<char> = value.chars().collect();
6559 for (i, ch) in vbytes.iter().enumerate() {
6560 if i + 1 < vbytes.len() && vbytes[i + 1] == '\u{88}'
6561 /* Inpar */
6562 {
6563 if *ch == '\u{8d}' /* Equals */
6564 || *ch == '\u{94}' /* Inang */
6565 || *ch == '\u{96}'
6566 /* OutangProc */
6567 {
6568 *cmplx = 1;
6569 break;
6570 }
6571 }
6572 }
6573 ecstr(&name);
6574 ecstr(&value);
6575 isnull = false;
6576 assignments = true;
6577 }
6578 ENVARRAY => {
6579 // c:1883-1908 — array assignment `name=( ... )` in the
6580 // pre-cmd loop (no `typeset`-style typeset_force flag).
6581 // c:1884 — `int oldcmdpos = incmdpos, n, type2;`
6582 let oldcmdpos = incmdpos();
6583 let n: u32;
6584 let type2: wordcode;
6585 let p: usize;
6586
6587 // c:1886-1889 — `array setting is cmplx because it can
6588 // contain process substitutions`
6589 // c:1890 — `*cmplx = c = 1;`
6590 *cmplx = 1;
6591 // c:1891 — `p = ecadd(0);`
6592 p = ecadd(0);
6593 // c:1892 — `incmdpos = 0;`
6594 set_incmdpos(false);
6595 // c:1893-1897 — `+=` detection: if tokstr ends in `+`,
6596 // strip the `+` and use WC_ASSIGN_INC; else WC_ASSIGN_NEW.
6597 let raw = tokstr().unwrap_or_default();
6598 let (name, t2) = if raw.ends_with('+') {
6599 (raw[..raw.len() - 1].to_string(), WC_ASSIGN_INC)
6600 } else {
6601 (raw.clone(), WC_ASSIGN_NEW)
6602 };
6603 type2 = t2;
6604 // c:1898 — `ecstr(tokstr);` (tokstr now NUL-trimmed)
6605 ecstr(&name);
6606 // c:1899 — `cmdpush(CS_ARRAY);`
6607 cmdpush(CS_ARRAY as u8);
6608 // c:1900 — `zshlex();`
6609 zshlex();
6610 // c:1901 — `n = par_nl_wordlist();`
6611 n = par_nl_wordlist_wordcode();
6612 // c:1902 — `ecbuf[p] = WCB_ASSIGN(WC_ASSIGN_ARRAY, type2, n);`
6613 ECBUF.with_borrow_mut(|b| {
6614 b[p] = WCB_ASSIGN(WC_ASSIGN_ARRAY, type2, n);
6615 });
6616 // c:1903 — `cmdpop();`
6617 cmdpop();
6618 // c:1904-1905 — `if (tok != OUTPAR) YYERROR(oecused);`
6619 if tok() != OUTPAR_TOK {
6620 zerr("par_simple: expected `)' after array assignment");
6621 return 0;
6622 }
6623 // c:1906 — `incmdpos = oldcmdpos;`
6624 set_incmdpos(oldcmdpos);
6625 // c:1907 — `isnull = 0;`
6626 isnull = false;
6627 // c:1908 — `assignments = 1;`
6628 assignments = true;
6629 }
6630 t if IS_REDIROP(t) => {
6631 // c:1900-1904 — `*cmplx = c = 1; nr += par_redir(&r,
6632 // NULL); continue;`. The wordcode-emitting redir is
6633 // distinct from the AST par_redir — it INSERTS
6634 // WCB_REDIR + fd + ecstrcode(name) at offset `r`
6635 // via ecispace, shifting any later words down.
6636 *cmplx = 1;
6637 let added = par_redir_wordcode(&mut r, None);
6638 if added == 0 {
6639 break;
6640 }
6641 nr += added;
6642 continue;
6643 }
6644 _ => break,
6645 }
6646 zshlex(); // c:1907 `zshlex();`
6647 }
6648
6649 // c:1920-1921 — `if (tok == AMPER || tok == AMPERBANG) YYERROR;`
6650 if tok() == AMPER || tok() == AMPERBANG {
6651 zerr("par_simple: unexpected &");
6652 return 0;
6653 }
6654
6655 // c:1923 — `p = ecadd(WCB_SIMPLE(0));`
6656 let mut p = ecadd(WCB_SIMPLE(0));
6657
6658 // c:1924-2105 — main words loop. is_typeset tracks whether the
6659 // outer command was `typeset`/`export`/etc. so the final
6660 // placeholder gets WCB_TYPESET instead of WCB_SIMPLE.
6661 let mut is_typeset = false;
6662 let mut postassigns: u32 = 0;
6663 let mut ppost: usize = 0;
6664 loop {
6665 match tok() {
6666 STRING_LEX | TYPESET => {
6667 // c:1926 — `int redir_var = 0;`
6668 let mut redir_var = false;
6669 // c:1928-1929 — `*cmplx = 1; incmdpos = 0;`
6670 *cmplx = 1;
6671 set_incmdpos(false);
6672 // c:1931-1932 — TYPESET → intypeset = is_typeset = 1.
6673 if tok() == TYPESET {
6674 set_intypeset(true);
6675 is_typeset = true;
6676 }
6677 let s = tokstr().unwrap_or_default();
6678 // c:1934-1974 — `{var}>file` brace-FD detection.
6679 // `if (!isset(IGNOREBRACES) && *tokstr == Inbrace)`
6680 let bytes = s.as_bytes();
6681 let first_is_inbrace = (bytes.len() >= 2 && bytes[0] == 0xc2 && bytes[1] == 0x8f)
6682 || (bytes.len() >= 1 && bytes[0] == b'{');
6683 if !isset(IGNOREBRACES) && first_is_inbrace {
6684 // c:1937-1938 — `char *eptr = tokstr + strlen(tokstr) - 1;`
6685 // `char *ptr = eptr;`
6686 // C tests `*eptr == Outbrace` (0x90 marker or `}`) AND
6687 // there's content between `{` and `}` (`ptr > tokstr + 1`).
6688 let last_two_outbrace = bytes.len() >= 2
6689 && (bytes.ends_with(&[0xc2, 0x90]) || bytes.last() == Some(&b'}'));
6690 let opener_len = if bytes.len() >= 2 && bytes[0] == 0xc2 && bytes[1] == 0x8f {
6691 2
6692 } else {
6693 1
6694 };
6695 let closer_len = if bytes.len() >= 2 && bytes.ends_with(&[0xc2, 0x90]) {
6696 2
6697 } else if bytes.last() == Some(&b'}') {
6698 1
6699 } else {
6700 0
6701 };
6702 if last_two_outbrace && bytes.len() > opener_len + closer_len {
6703 // c:1944 — `if (itype_end(tokstr+1, IIDENT, 0) >= ptr)`
6704 // Inner content is the identifier between `{` and `}`.
6705 let inner_start = opener_len;
6706 let inner_end = bytes.len() - closer_len;
6707 let inner = &s[inner_start..inner_end];
6708 if !inner.is_empty() && crate::ported::params::isident(inner) {
6709 // c:1946-1948 — `char *idstring = dupstrpfx(...);`
6710 // `redir_var = 1; zshlex();`
6711 let idstring = inner.to_string();
6712 redir_var = true;
6713 zshlex();
6714 // c:1953-1958 — `if (IS_REDIROP(tok) && tokfd == -1)
6715 // { *cmplx = c = 1; nrediradd = par_redir(&r, id);
6716 // p += nrediradd; sr += nrediradd; }`
6717 if IS_REDIROP(tok()) && tokfd() == -1 {
6718 *cmplx = 1;
6719 let nrediradd = par_redir_wordcode(&mut r, Some(&idstring));
6720 p += nrediradd as usize;
6721 sr += nrediradd;
6722 } else if postassigns > 0 {
6723 // c:1959-1966 — postassigns path: emit
6724 // WCB_ASSIGN(SCALAR, INC, 0) + name + ""
6725 postassigns += 1;
6726 ecadd(WCB_ASSIGN(WC_ASSIGN_SCALAR, WC_ASSIGN_INC, 0));
6727 ecstr(&s);
6728 ecstr("");
6729 } else {
6730 // c:1968-1972 — `else { ecstr(toksave); argc++; }`
6731 ecstr(&s);
6732 argc += 1;
6733 }
6734 }
6735 }
6736 }
6737 if !redir_var {
6738 // c:1977-1996 — normal (non-redir-var) STRING/TYPESET.
6739 if postassigns > 0 {
6740 // c:1979-1989 — typeset with bare-name arg → INC
6741 postassigns += 1;
6742 ecadd(WCB_ASSIGN(WC_ASSIGN_SCALAR, WC_ASSIGN_INC, 0));
6743 ecstr(&s);
6744 ecstr("");
6745 } else {
6746 ecstr(&s);
6747 argc += 1;
6748 }
6749 zshlex();
6750 }
6751 isnull = false;
6752 }
6753 ENVSTRING => {
6754 // c:2005-2026 — mid-cmd ENVSTRING (under intypeset
6755 // context). Emits WCB_ASSIGN(SCALAR, NEW, 0) then
6756 // ecstr(name) + ecstr(value), tracking the first
6757 // postassign offset in `ppost` (which the trailing
6758 // WCB_TYPESET header points to).
6759 if postassigns == 0 {
6760 ppost = ecadd(0);
6761 }
6762 postassigns += 1;
6763 // c:2010-2014 — `for (ptr = tokstr; *ptr && *ptr != Inbrack
6764 // && *ptr != '=' && *ptr != '+'; ptr++); if (*ptr == Inbrack)
6765 // skipparens(Inbrack, Outbrack, &ptr);`.
6766 let raw = tokstr().unwrap_or_default();
6767 let bytes: Vec<char> = raw.chars().collect();
6768 let mut idx = 0usize;
6769 while idx < bytes.len() {
6770 let ch = bytes[idx];
6771 if ch == '\u{91}' /* Inbrack */
6772 || ch == '=' || ch == '+' || ch == '\u{8d}'
6773 /* Equals */
6774 {
6775 break;
6776 }
6777 idx += 1;
6778 }
6779 if idx < bytes.len() && bytes[idx] == '\u{91}'
6780 /* Inbrack */
6781 {
6782 // c:2014 — `skipparens(Inbrack, Outbrack, &ptr);`.
6783 let byte_off: usize = bytes[..idx].iter().map(|c| c.len_utf8()).sum();
6784 let mut cursor: &str = &raw[byte_off..];
6785 let _ = crate::ported::utils::skipparens('\u{91}', '\u{92}', &mut cursor);
6786 let consumed = raw.len() - byte_off - cursor.len();
6787 let advance_chars = raw[byte_off..byte_off + consumed].chars().count();
6788 idx += advance_chars;
6789 while idx < bytes.len() {
6790 let ch = bytes[idx];
6791 if ch == '=' || ch == '+' || ch == '\u{8d}' {
6792 break;
6793 }
6794 idx += 1;
6795 }
6796 }
6797 let name: String = bytes[..idx].iter().collect();
6798 let str_off = if idx < bytes.len() && (bytes[idx] == '=' || bytes[idx] == '\u{8d}')
6799 {
6800 idx + 1
6801 } else {
6802 idx
6803 };
6804 let value: String = bytes[str_off..].iter().collect();
6805 ecadd(WCB_ASSIGN(WC_ASSIGN_SCALAR, WC_ASSIGN_NEW, 0));
6806 ecstr(&name);
6807 ecstr(&value);
6808 isnull = false;
6809 zshlex();
6810 }
6811 ENVARRAY => {
6812 // c:2027-2050 — mid-cmd ENVARRAY (typeset N=(…) form).
6813 // C tracks postassigns + ppost the same as ENVSTRING,
6814 // but the inner emit is WCB_ASSIGN(ARRAY, NEW, n)
6815 // with `n` patched in after par_nl_wordlist consumes
6816 // the elements. C also toggles intypeset=0 around the
6817 // wordlist so the lexer doesn't try to re-emit
6818 // assignments inside the array.
6819 *cmplx = 1;
6820 if postassigns == 0 {
6821 ppost = ecadd(0);
6822 }
6823 postassigns += 1;
6824 let parr = ecadd(0);
6825 let raw = tokstr().unwrap_or_default();
6826 let is_inc = raw.ends_with('+');
6827 let name = if is_inc {
6828 &raw[..raw.len() - 1]
6829 } else {
6830 raw.as_str()
6831 };
6832 let flag = if is_inc { WC_ASSIGN_INC } else { WC_ASSIGN_NEW };
6833 ecstr(name);
6834 cmdpush(CS_ARRAY as u8);
6835 set_intypeset(false);
6836 zshlex();
6837 // c:2044 — `n = par_nl_wordlist();` (parse.c:2379-2391).
6838 // SEPER + NEWLIN both allowed between elements.
6839 let mut nelem = 0u32;
6840 loop {
6841 let t = tok();
6842 if t != STRING_LEX && t != SEPER && t != NEWLIN {
6843 break;
6844 }
6845 if t == STRING_LEX {
6846 ecstr(&tokstr().unwrap_or_default());
6847 nelem += 1;
6848 }
6849 zshlex();
6850 }
6851 ECBUF.with_borrow_mut(|b| {
6852 if parr < b.len() {
6853 b[parr] = WCB_ASSIGN(WC_ASSIGN_ARRAY, flag, nelem);
6854 }
6855 });
6856 cmdpop();
6857 set_intypeset(true);
6858 if tok() != OUTPAR_TOK {
6859 zerr("expected `)' after array assignment");
6860 return 0;
6861 }
6862 isnull = false;
6863 zshlex();
6864 }
6865 t if IS_REDIROP(t) => {
6866 // c:1999-2010 — `nrediradd = par_redir(&r, NULL);
6867 // p += nrediradd; if (ppost) ppost += nrediradd;
6868 // sr += nrediradd;`
6869 *cmplx = 1;
6870 let added = par_redir_wordcode(&mut r, None);
6871 if added == 0 {
6872 break;
6873 }
6874 p += added as usize;
6875 if ppost != 0 {
6876 ppost += added as usize;
6877 }
6878 sr += added;
6879 }
6880 INOUTPAR => {
6881 // c:2051 — `} else if (tok == INOUTPAR) {`
6882 // c:2052 — `zlong oldlineno = lineno;`
6883 let oldlineno = lineno();
6884 // c:2053 — `int onp, so, oecssub = ecssub;`
6885 let oecssub = ECSSUB.get();
6886 // c:2055-2057 — `if (!isset(MULTIFUNCDEF) && argc > 1) YYERROR;`
6887 if !isset(MULTIFUNCDEF) && argc > 1 {
6888 zerr("par_simple: too many function names for funcdef");
6889 return 0;
6890 }
6891 // c:2058-2060 — `if (assignments || postassigns) YYERROR;`
6892 if assignments || postassigns > 0 {
6893 zerr("par_simple: assignments before funcdef");
6894 return 0;
6895 }
6896 // c:2061-2068 — hasalias check + zwarn — skipped (no
6897 // alias tracking on the wordcode path).
6898
6899 // c:2070 — `*cmplx = c;`
6900 *cmplx = c_saved;
6901 // c:2071 — `lineno = 0;`
6902 set_lineno(0);
6903 // c:2072 — `incmdpos = 1;`
6904 set_incmdpos(true);
6905 // c:2073 — `cmdpush(CS_FUNCDEF);`
6906 cmdpush(CS_FUNCDEF as u8);
6907 // c:2074 — `zshlex();`
6908 zshlex();
6909 // c:2075-2076 — `while (tok == SEPER) zshlex();`
6910 while tok() == SEPER {
6911 zshlex();
6912 }
6913 // c:2079 — `ecispace(p + 1, 1); ecbuf[p+1] = argc;
6914 // ecadd(0)*4`. Insert the argc word at p+1, then
6915 // append 4 placeholder words.
6916 ecispace(p + 1, 1);
6917 ECBUF.with_borrow_mut(|b| {
6918 if p + 1 < b.len() {
6919 b[p + 1] = argc;
6920 }
6921 });
6922 // c:2080-2083 — four metadata placeholder slots.
6923 ecadd(0);
6924 ecadd(0);
6925 ecadd(0);
6926 ecadd(0);
6927
6928 // c:2085 — `ecnfunc++;`
6929 ECNFUNC.set(ECNFUNC.get() + 1);
6930 // c:2086 — `ecssub = so = ecsoffs;`
6931 let so = ECSOFFS.get();
6932 ECSSUB.set(so);
6933 // c:2087 — `onp = ecnpats;`
6934 let onp = ECNPATS.with(|cc| cc.get());
6935 // c:2088 — `ecnpats = 0;`
6936 ECNPATS.with(|cc| cc.set(0));
6937
6938 // c:2091 — `int c = 0;` — INNER cmplx for the body
6939 // parse. Local to each branch; C's enclosing *cmplx
6940 // is NOT modified by the body.
6941 let mut body_c: i32 = 0;
6942 // c:2090 — `if (tok == INBRACE) {`
6943 if tok() == INBRACE_TOK {
6944 // c:2093 — `zshlex();`
6945 zshlex();
6946 // c:2094 — `par_list(&c);`
6947 par_list_wordcode(&mut body_c);
6948 // c:2095-2101 — `if (tok != OUTBRACE) { cmdpop();
6949 // lineno += oldlineno; ecnpats = onp;
6950 // ecssub = oecssub; YYERROR; }`
6951 if tok() != OUTBRACE_TOK {
6952 cmdpop();
6953 set_lineno(lineno() + oldlineno);
6954 ECNPATS.with(|cc| cc.set(onp));
6955 ECSSUB.set(oecssub);
6956 zerr("par_simple: funcdef expected `}`");
6957 return 0;
6958 }
6959 // c:2102-2105 — `if (argc == 0) incmdpos = 0;`
6960 if argc == 0 {
6961 set_incmdpos(false);
6962 }
6963 // c:2106 — `zshlex();`
6964 zshlex();
6965 } else {
6966 // c:2107-2132 — short-body funcdef form: `f() cmd`
6967 // or `() cmd`. Wraps single par_cmd result in a
6968 // synthetic WC_LIST / WC_SUBLIST /
6969 // WC_PIPE(WC_PIPE_END, 0) header trio.
6970 let ll = ecadd(0);
6971 let sl = ecadd(0);
6972 ecadd(WCB_PIPE(WC_PIPE_END, 0));
6973 let ok = par_cmd_wordcode(&mut body_c, if argc == 0 { 1 } else { 0 });
6974 if !ok {
6975 cmdpop();
6976 zerr("par_simple: funcdef short-body: missing command");
6977 return 0;
6978 }
6979 if argc == 0 {
6980 // c:2118-2127 — anonymous funcdef may take args
6981 // after the body; first one already read.
6982 set_incmdpos(false);
6983 }
6984 // c:2130-2131 — inner sublist/list use inner cmplx.
6985 let used = ECUSED.get() as usize;
6986 set_sublist_code(
6987 sl,
6988 WC_SUBLIST_END as i32,
6989 0,
6990 (used.saturating_sub(1 + sl)) as i32,
6991 body_c != 0,
6992 );
6993 set_list_code(ll, Z_SYNC | Z_END, body_c != 0);
6994 }
6995 let _ = body_c;
6996 // c:2133 — `cmdpop();`
6997 cmdpop();
6998
6999 // c:2135 — `ecadd(WCB_END());`
7000 ecadd(WCB_END());
7001 // c:2136-2139 — fill 4 metadata slots at p+argc+2..5
7002 let p_argc = (p + (argc as usize) + 2) as usize;
7003 let cur_so = ECSOFFS.get();
7004 let np_now = ECNPATS.with(|cc| cc.get());
7005 ECBUF.with_borrow_mut(|b| {
7006 b[p_argc] = (so - oecssub) as wordcode;
7007 b[p_argc + 1] = (cur_so - so) as wordcode;
7008 b[p_argc + 2] = np_now as wordcode;
7009 b[p_argc + 3] = 0;
7010 });
7011
7012 // c:2141-2143 — `ecnpats = onp; ecssub = oecssub; ecnfunc++;`
7013 ECNPATS.with(|cc| cc.set(onp));
7014 ECSSUB.set(oecssub);
7015 ECNFUNC.set(ECNFUNC.get() + 1);
7016
7017 // c:2145 — `ecbuf[p] = WCB_FUNCDEF(ecused - 1 - p);`
7018 let used = ECUSED.get() as usize;
7019 let header_off = used.saturating_sub(1 + p) as wordcode;
7020 ECBUF.with_borrow_mut(|b| {
7021 b[p] = WCB_FUNCDEF(header_off);
7022 });
7023
7024 // c:2147-2172 — `if (argc == 0) { /* anonymous fn args */ }`
7025 if argc == 0 {
7026 // c:2150 — `int parg = ecadd(0);`
7027 let mut parg = ecadd(0);
7028 // c:2151 — `ecadd(0);`
7029 ecadd(0);
7030 // c:2152 — `while (tok == STRING || IS_REDIROP(tok)) {`
7031 while tok() == STRING_LEX || IS_REDIROP(tok()) {
7032 if tok() == STRING_LEX {
7033 // c:2155-2157
7034 ecstr(&tokstr().unwrap_or_default());
7035 argc += 1;
7036 zshlex();
7037 } else {
7038 // c:2159-2165 — *cmplx=c=1; nrediradd=par_redir;
7039 // p += nrediradd; ppost += nrediradd if ppost;
7040 // sr += nrediradd; parg += nrediradd;
7041 *cmplx = 1;
7042 let added = par_redir_wordcode(&mut r, None);
7043 if added == 0 {
7044 break;
7045 }
7046 p += added as usize;
7047 if ppost != 0 {
7048 ppost += added as usize;
7049 }
7050 sr += added;
7051 parg += added as usize;
7052 }
7053 }
7054 // c:2168-2169 — `if (argc > 0) *cmplx = 1;`
7055 if argc > 0 {
7056 *cmplx = 1;
7057 }
7058 // c:2170 — `ecbuf[parg] = ecused - parg;`
7059 // c:2171 — `ecbuf[parg+1] = argc;`
7060 let used2 = ECUSED.get() as usize;
7061 ECBUF.with_borrow_mut(|b| {
7062 b[parg] = (used2 - parg) as wordcode;
7063 b[parg + 1] = argc;
7064 });
7065 }
7066 // c:2173 — `lineno += oldlineno;`
7067 set_lineno(lineno() + oldlineno);
7068
7069 // c:2175-2177 — `isfunc = 1; isnull = 0; break;`
7070 isfunc = true;
7071 isnull = false;
7072 break;
7073 }
7074 _ => break,
7075 }
7076 }
7077
7078 // c:2173-2176 — `if (isnull && !(sr + nr)) { ecused = oecused;
7079 // return 0; }` — undo everything including pre-cmd assignments
7080 // if no actual command word emerged.
7081 if isnull && sr + nr == 0 && !assignments {
7082 ECUSED.set(p as i32);
7083 return 0;
7084 }
7085 // c:2186-2187 — `incmdpos = 1; intypeset = 0;` — reset before
7086 // the placeholder patch so the next-token lex doesn't carry
7087 // typeset/incond state.
7088 set_incmdpos(true);
7089 set_intypeset(false);
7090 // c:2189-2199 — `if (!isfunc) { if (is_typeset) ecbuf[p] =
7091 // WCB_TYPESET(argc); else ecbuf[p] = WCB_SIMPLE(argc); }`.
7092 // When isfunc=true the INOUTPAR branch already wrote WCB_FUNCDEF
7093 // at p; do NOT clobber it.
7094 if !isfunc {
7095 let header = if is_typeset {
7096 if postassigns > 0 {
7097 ECBUF.with_borrow_mut(|b| {
7098 if ppost < b.len() {
7099 b[ppost] = postassigns;
7100 }
7101 });
7102 } else {
7103 ecadd(0);
7104 }
7105 WCB_TYPESET(argc)
7106 } else {
7107 WCB_SIMPLE(argc)
7108 };
7109 ECBUF.with_borrow_mut(|b| {
7110 if p < b.len() {
7111 b[p] = header;
7112 }
7113 });
7114 }
7115 1 + sr
7116}
7117
7118/// Port of `par_redir(int *rp, char *idstring)` from
7119/// `Src/parse.c:2229-2345` — the wordcode-emitting variant that
7120/// pushes WCB_REDIR + fd + ecstrcode(name) into ECBUF. Distinct
7121/// from the AST `par_redir` (parse.rs:3771) which builds a
7122/// ZshRedir struct for the AST executor pipeline.
7123///
7124/// Returns the number of wordcodes added (3 for the basic shape,
7125/// 4 with idstring, 5 for HEREDOC[DASH] which carries the
7126/// terminator strings inline). Returns 0 on parse error.
7127///
7128/// `idstring` mirrors C's `char *idstring` parameter — `None` =
7129/// NULL (no `{var}>file` brace-FD shape), `Some(id)` = the captured
7130/// `{var}` name. C callers without a var pass NULL inline; Rust
7131/// callers do the same with `None`.
7132fn par_redir_wordcode(rp: &mut usize, idstring: Option<&str>) -> i32 {
7133 // c:2231 — `int r = *rp, type, fd1, oldcmdpos, oldnc, ncodes;`
7134 let r: usize = *rp;
7135 let mut r#type: i32;
7136 let fd1: i32;
7137 let oldcmdpos: bool;
7138 let oldnc: i32;
7139 let mut ncodes: usize;
7140 // c:2232 — `char *name;`
7141 let name: String;
7142
7143 // c:2234 — `oldcmdpos = incmdpos;`
7144 oldcmdpos = incmdpos();
7145 // c:2235 — `incmdpos = 0;`
7146 set_incmdpos(false);
7147 // c:2236 — `oldnc = nocorrect;`
7148 oldnc = nocorrect();
7149 // c:2237-2238 — `if (tok != INANG && tok != INOUTANG) nocorrect = 1;`
7150 if tok() != INANG_TOK && tok() != INOUTANG {
7151 set_nocorrect(1);
7152 }
7153 // c:2239 — `type = redirtab[tok - OUTANG];`
7154 // Map current redirop token to redirtab index — matches order of
7155 // C `enum { OUTANG, OUTANGBANG, DOUTANG, DOUTANGBANG, INANG,
7156 // INOUTANG, DINANG, DINANGDASH, INANGAMP, OUTANGAMP, AMPOUTANG,
7157 // OUTANGAMPBANG, DOUTANGAMP, DOUTANGAMPBANG, TRINANG }`.
7158 r#type = match tok() {
7159 OUTANG_TOK => REDIR_WRITE,
7160 OUTANGBANG => REDIR_WRITENOW,
7161 DOUTANG => REDIR_APP,
7162 DOUTANGBANG => REDIR_APPNOW,
7163 INANG_TOK => REDIR_READ,
7164 INOUTANG => REDIR_READWRITE,
7165 DINANG => REDIR_HEREDOC,
7166 DINANGDASH => REDIR_HEREDOCDASH,
7167 INANGAMP => REDIR_MERGEIN,
7168 OUTANGAMP => REDIR_MERGEOUT,
7169 AMPOUTANG => REDIR_ERRWRITE,
7170 OUTANGAMPBANG => REDIR_ERRWRITENOW,
7171 DOUTANGAMP => REDIR_ERRAPP,
7172 DOUTANGAMPBANG => REDIR_ERRAPPNOW,
7173 TRINANG => REDIR_HERESTR,
7174 _ => {
7175 set_incmdpos(oldcmdpos);
7176 set_nocorrect(oldnc);
7177 return 0;
7178 }
7179 };
7180 // c:2240 — `fd1 = tokfd;`
7181 fd1 = tokfd();
7182 // c:2241 — `zshlex();`
7183 zshlex();
7184 // c:2242-2243 — `if (tok != STRING && tok != ENVSTRING) YYERROR(ecused);`
7185 if tok() != STRING_LEX && tok() != ENVSTRING {
7186 set_incmdpos(oldcmdpos);
7187 set_nocorrect(oldnc);
7188 zerr("expected word after redirection");
7189 return 0;
7190 }
7191 // c:2244 — `incmdpos = oldcmdpos;`
7192 set_incmdpos(oldcmdpos);
7193 // c:2245 — `nocorrect = oldnc;`
7194 set_nocorrect(oldnc);
7195
7196 // c:2248-2249 — `if (fd1 == -1) fd1 = IS_READFD(type) ? 0 : 1;`
7197 let fd1 = if fd1 == -1 {
7198 if is_readfd(r#type) {
7199 0
7200 } else {
7201 1
7202 }
7203 } else {
7204 fd1
7205 };
7206
7207 // c:2251 — `name = tokstr;`
7208 name = tokstr().unwrap_or_default();
7209
7210 // c:2253-2321 — switch on type:
7211 match r#type {
7212 // c:2254-2300 — REDIR_HEREDOC / REDIR_HEREDOCDASH
7213 x if x == REDIR_HEREDOC || x == REDIR_HEREDOCDASH => {
7214 // c:2257 — `struct heredocs **hd;`
7215 // c:2258 — `int htype = type;`
7216 let htype = r#type;
7217 // c:2260-2261 — `if (strchr(tokstr, '\n')) YYERROR(ecused);`
7218 if name.contains('\n') {
7219 zerr("here-doc terminator contains newline");
7220 return 0;
7221 }
7222 // c:2263-2273 — `ncodes = 5; if (idstring) { type |= MASK; ncodes = 6; }`
7223 if idstring.is_some() {
7224 r#type |= REDIR_VARID_MASK;
7225 ncodes = 6;
7226 } else {
7227 ncodes = 5;
7228 }
7229 // c:2277 — `ecispace(r, ncodes);`
7230 ecispace(r, ncodes);
7231 // c:2278 — `*rp = r + ncodes;`
7232 *rp = r + ncodes;
7233 // c:2279 — `ecbuf[r] = WCB_REDIR(type | REDIR_FROM_HEREDOC_MASK);`
7234 ECBUF.with_borrow_mut(|b| {
7235 b[r] = WCB_REDIR((r#type | REDIR_FROM_HEREDOC_MASK) as wordcode);
7236 // c:2280 — `ecbuf[r + 1] = fd1;`
7237 b[r + 1] = fd1 as wordcode;
7238 });
7239 // c:2282-2286 — r+2..4 are filled later by setheredoc.
7240 // c:2287-2288 — `if (idstring) ecbuf[r + 5] = ecstrcode(idstring);`
7241 if let Some(id) = idstring {
7242 let coded = ecstrcode(id);
7243 ECBUF.with_borrow_mut(|b| {
7244 b[r + 5] = coded;
7245 });
7246 }
7247 // c:2290-2296 — `for (hd = &hdocs; *hd; hd = &(*hd)->next);
7248 // *hd = zalloc(sizeof(struct heredocs));
7249 // (*hd)->next = NULL;
7250 // (*hd)->type = htype;
7251 // (*hd)->pc = r;
7252 // (*hd)->str = tokstr;`
7253 HDOCS.with_borrow_mut(|head| {
7254 let mut cur = head;
7255 while cur.is_some() {
7256 cur = &mut cur.as_mut().unwrap().next; // c:2290
7257 }
7258 *cur = Some(Box::new(crate::ported::zsh_h::heredocs {
7259 // c:2292-2296
7260 next: None,
7261 typ: htype,
7262 pc: r as i32,
7263 str: Some(name.clone()),
7264 }));
7265 });
7266 // c:2298 — `zshlex();`
7267 zshlex();
7268 // c:2299 — `return ncodes;`
7269 return ncodes as i32;
7270 }
7271 // c:2301-2308 — REDIR_WRITE / REDIR_WRITENOW
7272 x if x == REDIR_WRITE || x == REDIR_WRITENOW => {
7273 // c:2303-2305 — `if (tokstr[0] == OutangProc && tokstr[1] == Inpar)
7274 // type = REDIR_OUTPIPE;`
7275 let nb: Vec<char> = name.chars().collect();
7276 if nb.len() >= 2 && nb[0] == '\u{96}' && nb[1] == '\u{88}' {
7277 r#type = REDIR_OUTPIPE;
7278 } else if nb.len() >= 2 && nb[0] == '\u{94}' && nb[1] == '\u{88}' {
7279 // c:2306-2307 — `else if (tokstr[0] == Inang && tokstr[1] == Inpar) YYERROR;`
7280 zerr("par_redir: < before >");
7281 return 0;
7282 }
7283 }
7284 // c:2309-2315 — REDIR_READ
7285 x if x == REDIR_READ => {
7286 let nb: Vec<char> = name.chars().collect();
7287 if nb.len() >= 2 && nb[0] == '\u{94}' && nb[1] == '\u{88}' {
7288 r#type = REDIR_INPIPE;
7289 } else if nb.len() >= 2 && nb[0] == '\u{96}' && nb[1] == '\u{88}' {
7290 zerr("par_redir: > before <");
7291 return 0;
7292 }
7293 }
7294 // c:2316-2320 — REDIR_READWRITE
7295 x if x == REDIR_READWRITE => {
7296 let nb: Vec<char> = name.chars().collect();
7297 if nb.len() >= 2 && (nb[0] == '\u{94}' || nb[0] == '\u{96}') && nb[1] == '\u{88}' {
7298 r#type = if nb[0] == '\u{94}' {
7299 REDIR_INPIPE
7300 } else {
7301 REDIR_OUTPIPE
7302 };
7303 }
7304 }
7305 _ => {}
7306 }
7307 // c:2322 — `zshlex();`
7308 zshlex();
7309
7310 // c:2326-2333 — `if (idstring) { type |= MASK; ncodes = 4; } else ncodes = 3;`
7311 if idstring.is_some() {
7312 r#type |= REDIR_VARID_MASK;
7313 ncodes = 4;
7314 } else {
7315 ncodes = 3;
7316 }
7317
7318 // c:2334 — `ecispace(r, ncodes);`
7319 ecispace(r, ncodes);
7320 // c:2335 — `*rp = r + ncodes;`
7321 *rp = r + ncodes;
7322 // c:2336 — `ecbuf[r] = WCB_REDIR(type);`
7323 let coded_name = ecstrcode(&name);
7324 ECBUF.with_borrow_mut(|b| {
7325 b[r] = WCB_REDIR(r#type as wordcode);
7326 // c:2337 — `ecbuf[r + 1] = fd1;`
7327 b[r + 1] = fd1 as wordcode;
7328 // c:2338 — `ecbuf[r + 2] = ecstrcode(name);`
7329 b[r + 2] = coded_name;
7330 });
7331 // c:2339-2340 — `if (idstring) ecbuf[r + 3] = ecstrcode(idstring);`
7332 if let Some(id) = idstring {
7333 let coded_id = ecstrcode(id);
7334 ECBUF.with_borrow_mut(|b| {
7335 b[r + 3] = coded_id;
7336 });
7337 }
7338 // c:2342 — `return ncodes;`
7339 ncodes as i32
7340}
7341
7342/// Port of `IS_READFD(type)` macro from `Src/zsh.h` — determines
7343/// default fd (0 for read-ish, 1 for write-ish) when none specified.
7344fn is_readfd(t: i32) -> bool {
7345 matches!(
7346 t,
7347 x if x == REDIR_READ
7348 || x == REDIR_READWRITE
7349 || x == REDIR_MERGEIN
7350 || x == REDIR_HEREDOC
7351 || x == REDIR_HEREDOCDASH
7352 || x == REDIR_HERESTR
7353 )
7354}
7355
7356/// Parse a program (list of lists)
7357/// Parse a complete program (top-level entry). Calls
7358/// parse_program_until with no end-token sentinel. Direct port of
7359/// zsh/Src/parse.c:614-720 `parse_event` / `par_list` /
7360/// `par_event` flow. C distinguishes COND_EVENT (single command
7361/// for here-string) from full event parse; zshrs's parse_program
7362/// is the full-event entry.
7363fn parse_program() -> ZshProgram {
7364 parse_program_until(None)
7365}
7366
7367/// Parse a program until we hit an end token
7368/// Parse a program until one of `end_tokens` is seen (or EOF).
7369/// Drives par_list in a loop. C equivalent: the body of par_event
7370/// (parse.c:635-695) iterating par_list against the lexer.
7371fn parse_program_until(end_tokens: Option<&[lextok]>) -> ZshProgram {
7372 let mut lists = Vec::new();
7373
7374 loop {
7375 // Skip separators
7376 while tok() == SEPER || tok() == NEWLIN {
7377 zshlex();
7378 }
7379
7380 if tok() == ENDINPUT || tok() == LEXERR {
7381 break;
7382 }
7383
7384 // Check for end tokens
7385 if let Some(end_toks) = end_tokens {
7386 if end_toks.contains(&tok()) {
7387 break;
7388 }
7389 }
7390
7391 // Also stop at these tokens when not explicitly looking for them
7392 // Note: Else/Elif/Then are NOT here - they're handled by par_if
7393 // to allow nested if statements inside case arms, loops, etc.
7394 //
7395 // c:Src/parse.c:par_event — when an orphan terminator (DONE
7396 // outside a loop, FI outside an if, ESAC outside a case)
7397 // appears at the top level (end_tokens=None), C errors via
7398 // YYERROR. zshrs's `break` silently accepted `done`/`fi`/
7399 // `esac` as no-op input. Error at the outermost call so
7400 // unscoped terminators don't sneak through; nested calls
7401 // still break cleanly via the end_tokens contains-check
7402 // above.
7403 match tok() {
7404 DONE | FI | ESAC | DOLOOP if end_tokens.is_none() => {
7405 // c:Src/parse.c:par_event — emit the specific token
7406 // name (`done`, `fi`, `esac`, `do`) so error-parsing
7407 // tools can identify the unmatched terminator. C zsh
7408 // writes `parse error near \`<tok>'`; the Rust port
7409 // was emitting a generic "orphan terminator" string.
7410 // Bug #142, #413.
7411 let name = match tok() {
7412 DONE => "done",
7413 FI => "fi",
7414 ESAC => "esac",
7415 DOLOOP => "do",
7416 _ => "orphan terminator",
7417 };
7418 zerr(&format!("parse error near `{}'", name));
7419 break;
7420 }
7421 DSEMI | SEMIAMP | SEMIBAR if end_tokens.is_none() => {
7422 // c:Src/parse.c:par_event — case-arm terminators
7423 // (`;;`, `;&`, `;|`) outside a case construct are a
7424 // parse error. zshrs's `break` silently accepted them
7425 // at top level, truncating the rest of the script.
7426 // Bug #141 in docs/BUGS.md.
7427 let name = match tok() {
7428 DSEMI => ";;",
7429 SEMIAMP => ";&",
7430 SEMIBAR => ";|",
7431 _ => "case terminator",
7432 };
7433 zerr(&format!("parse error near `{}'", name));
7434 break;
7435 }
7436 OUTBRACE_TOK if end_tokens.is_none() => {
7437 // c:Src/parse.c:par_event — orphan `}` (no matching
7438 // `{` opener) at top level is a parse error. zshrs's
7439 // generic break swallowed it silently, leaving the
7440 // `echo a` in `echo a }` running and ignoring the
7441 // stray brace. Bug #168 in docs/BUGS.md.
7442 zerr("parse error near `}'");
7443 break;
7444 }
7445 OUTBRACE_TOK | DSEMI | SEMIAMP | SEMIBAR | DONE | FI | ESAC | ZEND => break,
7446 _ => {}
7447 }
7448
7449 match par_list() {
7450 Some(list) => {
7451 let detected = simple_name_with_inoutpar(&list);
7452 lists.push(list);
7453 // Synthesize a FuncDef for the `name() { body }` shape
7454 // at parse time so body_source is captured while the
7455 // lexer still has the input. The lexer port emits
7456 // `name(` as a single Word ending in `<Inpar><Outpar>`,
7457 // so the Simple list is followed by an Inbrace once
7458 // separators are skipped. For `name() cmd args` the
7459 // body has already been swallowed into the same
7460 // Simple's words tail — synthesize directly from there.
7461 if let Some((names, body_argv)) = detected {
7462 if !body_argv.is_empty() {
7463 // One-line body already in the Simple. Build
7464 // a Simple from body_argv as the function body.
7465 lists.pop();
7466 let body_simple = ZshCommand::Simple(ZshSimple {
7467 assigns: Vec::new(),
7468 words: body_argv,
7469 redirs: Vec::new(),
7470 });
7471 let body_list = ZshList {
7472 sublist: ZshSublist {
7473 pipe: ZshPipe {
7474 cmd: body_simple,
7475 next: None,
7476 lineno: lineno(),
7477 merge_stderr: false,
7478 },
7479 next: None,
7480 flags: SublistFlags::default(),
7481 },
7482 flags: ListFlags::default(),
7483 };
7484 let funcdef = ZshCommand::FuncDef(ZshFuncDef {
7485 names,
7486 body: Box::new(ZshProgram {
7487 lists: vec![body_list],
7488 }),
7489 tracing: false,
7490 auto_call_args: None,
7491 body_source: None,
7492 });
7493 let synthetic = ZshList {
7494 sublist: ZshSublist {
7495 pipe: ZshPipe {
7496 cmd: funcdef,
7497 next: None,
7498 lineno: lineno(),
7499 merge_stderr: false,
7500 },
7501 next: None,
7502 flags: SublistFlags::default(),
7503 },
7504 flags: ListFlags::default(),
7505 };
7506 lists.push(synthetic);
7507 continue;
7508 }
7509 // Else: words.len() == 1 (only the trailing `name()`
7510 // word), brace body follows. `names` may carry
7511 // multiple identifiers from the `fna fnb fnc()`
7512 // shorthand — all share the same brace body per
7513 // src/zsh/Src/parse.c:1666 par_funcdef wordlist.
7514 // Skip separators on the real lexer; safe because
7515 // parse_program's next iteration would also skip them.
7516 while tok() == SEPER || tok() == NEWLIN {
7517 zshlex();
7518 }
7519 if tok() == INBRACE_TOK {
7520 // Capture body_start BEFORE the lexer
7521 // advances past the first body token. The
7522 // outer zshlex() consumed `{`; lexer.pos
7523 // is now right after `{`. The next
7524 // `zshlex()` would advance past `echo`,
7525 // making body_start land mid-body and
7526 // lose the first word — `typeset -f f`
7527 // printed `a; echo b` instead of
7528 // `echo a; echo b` for `f() { echo a;
7529 // echo b }`.
7530 let body_start = pos();
7531 zshlex();
7532 // c:Src/parse.c — synth funcdef body terminates
7533 // at OUTBRACE_TOK. Explicit end-token avoids
7534 // the top-level stray-`}` arm. Bug #167/#168.
7535 let body = parse_program_until(Some(&[OUTBRACE_TOK]));
7536 let body_end = if tok() == OUTBRACE_TOK {
7537 pos().saturating_sub(1)
7538 } else {
7539 pos()
7540 };
7541 let body_source = input_slice(body_start, body_end)
7542 .map(|s| s.trim().to_string())
7543 .filter(|s| !s.is_empty());
7544 if tok() == OUTBRACE_TOK {
7545 zshlex();
7546 }
7547 // Replace the Simple list with a FuncDef list.
7548 lists.pop();
7549 let funcdef = ZshCommand::FuncDef(ZshFuncDef {
7550 names,
7551 body: Box::new(body),
7552 tracing: false,
7553 auto_call_args: None,
7554 body_source,
7555 });
7556 let synthetic = ZshList {
7557 sublist: ZshSublist {
7558 pipe: ZshPipe {
7559 cmd: funcdef,
7560 next: None,
7561 lineno: lineno(),
7562 merge_stderr: false,
7563 },
7564 next: None,
7565 flags: SublistFlags::default(),
7566 },
7567 flags: ListFlags::default(),
7568 };
7569 lists.push(synthetic);
7570 } else if !matches!(tok(), ENDINPUT | OUTBRACE_TOK | SEPER | NEWLIN) {
7571 // No-brace one-line body: `foo() echo hello`.
7572 // Parse a single command for the body.
7573 let body_cmd = par_cmd();
7574 if let Some(cmd) = body_cmd {
7575 let body_list = ZshList {
7576 sublist: ZshSublist {
7577 pipe: ZshPipe {
7578 cmd,
7579 next: None,
7580 lineno: lineno(),
7581 merge_stderr: false,
7582 },
7583 next: None,
7584 flags: SublistFlags::default(),
7585 },
7586 flags: ListFlags::default(),
7587 };
7588 lists.pop();
7589 let funcdef = ZshCommand::FuncDef(ZshFuncDef {
7590 names: names.clone(),
7591 body: Box::new(ZshProgram {
7592 lists: vec![body_list],
7593 }),
7594 tracing: false,
7595 auto_call_args: None,
7596 body_source: None,
7597 });
7598 let synthetic = ZshList {
7599 sublist: ZshSublist {
7600 pipe: ZshPipe {
7601 cmd: funcdef,
7602 next: None,
7603 lineno: lineno(),
7604 merge_stderr: false,
7605 },
7606 next: None,
7607 flags: SublistFlags::default(),
7608 },
7609 flags: ListFlags::default(),
7610 };
7611 lists.push(synthetic);
7612 }
7613 }
7614 }
7615 }
7616 None => break,
7617 }
7618 }
7619
7620 ZshProgram { lists }
7621}
7622
7623/// Parse an assignment
7624/// Parse an assignment word `NAME=value` or `NAME=(arr items)`.
7625/// Sub-routine of par_simple. The C source handles assignments
7626/// inline in par_simple via the ENVSTRING/ENVARRAY token paths
7627/// (parse.c:1842-2000ish); zshrs splits it out to a dedicated
7628/// helper for clarity.
7629fn parse_assign() -> Option<ZshAssign> {
7630 // Helper: locate the Equals-marker that delimits NAME from
7631 // VALUE in an assignment-shaped tokstr. The lexer META-encodes
7632 // EVERY `=` (including those inside `${var%%=foo}` strip
7633 // patterns or `[idx]=...` subscripts), so a naive
7634 // `tokstr.find(Equals)` would split at the first inner `=`
7635 // and break the whole assignment. Walk the string skipping
7636 // brace and bracket depth so the assignment's `=` (the one
7637 // after the last `]` of the LHS subscript / or after the
7638 // bare name) is the one we land on.
7639 fn find_assign_equals(s: &str) -> Option<usize> {
7640 let target = Equals;
7641 let mut brace = 0i32;
7642 let mut bracket = 0i32;
7643 let mut paren = 0i32;
7644 for (i, c) in s.char_indices() {
7645 match c {
7646 '{' | '\u{8f}' /* Inbrace */ => brace += 1,
7647 '}' | '\u{90}' /* Outbrace */ => {
7648 if brace > 0 {
7649 brace -= 1;
7650 }
7651 }
7652 '[' | '\u{91}' /* Inbrack */ => bracket += 1,
7653 ']' | '\u{92}' /* Outbrack */ => {
7654 if bracket > 0 {
7655 bracket -= 1;
7656 }
7657 }
7658 '(' | '\u{88}' /* Inpar */ => paren += 1,
7659 ')' | '\u{8a}' /* Outpar */ => {
7660 if paren > 0 {
7661 paren -= 1;
7662 }
7663 }
7664 _ if c == target && brace == 0 && bracket == 0 && paren == 0 => {
7665 return Some(i);
7666 }
7667 _ => {}
7668 }
7669 }
7670 None
7671 }
7672
7673 let _ts_tokstr = tokstr()?;
7674 let tokstr = _ts_tokstr.as_str();
7675
7676 // Parse name=value or name+=value.
7677 let (name, value_str, append) = if tok() == ENVARRAY {
7678 let (name, append) = if let Some(stripped) = tokstr.strip_suffix('+') {
7679 (stripped, true)
7680 } else {
7681 (tokstr, false)
7682 };
7683 (name.to_string(), String::new(), append)
7684 } else if let Some(pos) = find_assign_equals(tokstr) {
7685 let name_part = &tokstr[..pos];
7686 let (name, append) = if let Some(stripped) = name_part.strip_suffix('+') {
7687 (stripped, true)
7688 } else {
7689 (name_part, false)
7690 };
7691 (
7692 name.to_string(),
7693 tokstr[pos + Equals.len_utf8()..].to_string(),
7694 append,
7695 )
7696 } else if let Some(pos) = tokstr.find('=') {
7697 // Fallback to literal '=' for compatibility
7698 let name_part = &tokstr[..pos];
7699 let (name, append) = if let Some(stripped) = name_part.strip_suffix('+') {
7700 (stripped, true)
7701 } else {
7702 (name_part, false)
7703 };
7704 (name.to_string(), tokstr[pos + 1..].to_string(), append)
7705 } else {
7706 return None;
7707 };
7708
7709 let value = if tok() == ENVARRAY {
7710 // Array assignment: name=(...)
7711 // c:Src/parse.c:1895 par_simple ENVARRAY arm:
7712 // `int oldcmdpos = incmdpos; ... incmdpos = 0; ... zshlex();`
7713 // Reset incmdpos to false BEFORE the array body's first lex so
7714 // a leading `{...}` (brace expansion) doesn't trip the
7715 // empty-buf+incmdpos rule at lex.c:1141 that returns `{` as
7716 // STRING and lets the reswd_lookup promote it to INBRACE_TOK.
7717 let oldcmdpos = crate::ported::lex::incmdpos();
7718 crate::ported::lex::set_incmdpos(false);
7719 let mut elements = Vec::new();
7720 zshlex(); // skip past token
7721
7722 let mut arr_iters = 0;
7723 const MAX_ARRAY_ELEMENTS: usize = 10_000;
7724 while matches!(tok(), STRING_LEX | SEPER | NEWLIN) {
7725 arr_iters += 1;
7726 if arr_iters > MAX_ARRAY_ELEMENTS {
7727 zerr("array assignment exceeded maximum elements");
7728 break;
7729 }
7730 if tok() == STRING_LEX {
7731 let _ts_s = crate::ported::lex::tokstr();
7732 if let Some(s) = _ts_s.as_deref() {
7733 elements.push(s.to_string());
7734 }
7735 }
7736 zshlex();
7737 }
7738 // c:Src/parse.c — `incmdpos = oldcmdpos;` (restore at end of arm)
7739 crate::ported::lex::set_incmdpos(oldcmdpos);
7740
7741 // The closing Outpar is consumed here. The outer par_simple
7742 // loop will then `zshlex()` past whatever follows (typically
7743 // a separator or the next word) — calling zshlex twice in
7744 // tandem (here AND in par_simple) over-advances and merges
7745 // a following `name() { … }` funcdef into the same Simple.
7746 // We only consume Outpar; let the caller handle the rest.
7747 // Without this guard `g=(o1); f() { :; }` parsed as one
7748 // Simple with assigns=[g] and words=["f()"] (one token).
7749 if tok() == OUTPAR_TOK {
7750 // Note: do NOT zshlex() here. par_simple's `lexer
7751 // .zshlex()` after `parse_assign` returns advances past
7752 // the Outpar onto the next significant token.
7753 //
7754 // Force `incmdpos=true` so the next zshlex() recognizes
7755 // a follow-up `b=(...)` / `b=val` as Envarray/Envstring.
7756 // The lexer flips incmdpos to false on bare Outpar (which
7757 // is correct for subshell-close context), but for an
7758 // array-assignment close more assigns/words may follow.
7759 set_incmdpos(true);
7760 }
7761
7762 ZshAssignValue::Array(elements)
7763 } else {
7764 ZshAssignValue::Scalar(value_str)
7765 };
7766
7767 Some(ZshAssign {
7768 name,
7769 value,
7770 append,
7771 })
7772}
7773
7774/// AST `par_redir` variant accepting an idstring for the
7775/// `{var}>file` brace-FD shape. C signature
7776/// `par_redir(int *rp, char *idstring)` (parse.c:2229). The
7777/// idstring is stored in the resulting ZshRedir.varid for the
7778/// executor to bind the named variable to the chosen fd.
7779fn par_redir_with_id(idstring: Option<&str>) -> Option<ZshRedir> {
7780 let varid: Option<String> = idstring.map(|s| s.to_string());
7781 let rtype = match tok() {
7782 OUTANG_TOK => REDIR_WRITE,
7783 OUTANGBANG => REDIR_WRITENOW,
7784 DOUTANG => REDIR_APP,
7785 DOUTANGBANG => REDIR_APPNOW,
7786 INANG_TOK => REDIR_READ,
7787 INOUTANG => REDIR_READWRITE,
7788 DINANG => REDIR_HEREDOC,
7789 DINANGDASH => REDIR_HEREDOCDASH,
7790 TRINANG => REDIR_HERESTR,
7791 INANGAMP => REDIR_MERGEIN,
7792 OUTANGAMP => REDIR_MERGEOUT,
7793 AMPOUTANG => REDIR_ERRWRITE,
7794 OUTANGAMPBANG => REDIR_ERRWRITENOW,
7795 DOUTANGAMP => REDIR_ERRAPP,
7796 DOUTANGAMPBANG => REDIR_ERRAPPNOW,
7797 _ => return None,
7798 };
7799
7800 let fd = if tokfd() >= 0 {
7801 tokfd()
7802 } else if matches!(
7803 rtype,
7804 REDIR_READ
7805 | REDIR_READWRITE
7806 | REDIR_MERGEIN
7807 | REDIR_HEREDOC
7808 | REDIR_HEREDOCDASH
7809 | REDIR_HERESTR
7810 ) {
7811 0
7812 } else {
7813 1
7814 };
7815
7816 // c:2234-2245 — save/restore incmdpos and nocorrect around the
7817 // zshlex that consumes the redir target word:
7818 // oldcmdpos = incmdpos; incmdpos = 0;
7819 // oldnc = nocorrect;
7820 // if (tok != INANG && tok != INOUTANG) nocorrect = 1;
7821 // ... zshlex; check tok; ...
7822 // incmdpos = oldcmdpos; nocorrect = oldnc;
7823 // Without this, a redir target lexes in the parent's incmdpos
7824 // (re-promoting `{` / reswords) AND with parent nocorrect (so
7825 // spelling-correction wrongly runs inside `> $(cmd)` etc.).
7826 let oldcmdpos = incmdpos();
7827 set_incmdpos(false);
7828 let oldnc = nocorrect();
7829 let cur = tok();
7830 if cur != INANG_TOK && cur != INOUTANG {
7831 set_nocorrect(1);
7832 }
7833 zshlex();
7834
7835 let name = match tok() {
7836 STRING_LEX | ENVSTRING => {
7837 let n = tokstr().unwrap_or_default();
7838 // c:2244-2245 — restore incmdpos / nocorrect right after
7839 // the redir target word is confirmed, BEFORE the trailing
7840 // zshlex advances past it. The advance itself is deferred
7841 // below so REDIR_HEREDOC[DASH] can push onto HDOCS first
7842 // (matching the wordcode variant at parse.rs:6894-6908) —
7843 // otherwise the NEWLIN drained by that zshlex sees an
7844 // empty HDOCS list and gethere never collects the body.
7845 set_incmdpos(oldcmdpos);
7846 set_nocorrect(oldnc);
7847 n
7848 }
7849 _ => {
7850 set_incmdpos(oldcmdpos);
7851 set_nocorrect(oldnc);
7852 zerr("expected word after redirection");
7853 return None;
7854 }
7855 };
7856
7857 // Heredoc terminator capture. C parse.c:2254-2317 par_redir builds
7858 // a `struct heredocs` entry here for REDIR_HEREDOC[DASH]. zshrs
7859 // pushes onto HDOCS (canonical C linked list, c:2290-2296) AND
7860 // onto LEX_HEREDOCS (Rust-only AST-glue Vec carrying parsed-out
7861 // terminator/strip_tabs/quoted metadata for downstream AST
7862 // consumers). Quoted terminators (`<<'EOF'` / `<<"EOF"` / `<<\EOF`)
7863 // disable expansion in the body — Snull `\u{9d}` marks single-quote,
7864 // Dnull `\u{9e}` marks double-quote, Bnull `\u{9f}` marks
7865 // backslash-escaped chars.
7866 let heredoc_idx = if matches!(rtype, REDIR_HEREDOC | REDIR_HEREDOCDASH) {
7867 let strip_tabs = rtype == REDIR_HEREDOCDASH;
7868 let quoted = name.contains('\u{9d}')
7869 || name.contains('\u{9e}')
7870 || name.contains('\u{9f}')
7871 || name.starts_with('\'')
7872 || name.starts_with('"');
7873 let term = name
7874 .chars()
7875 .filter(|c| {
7876 *c != '\'' && *c != '"' && *c != '\u{9d}' && *c != '\u{9e}' && *c != '\u{9f}'
7877 })
7878 .collect::<String>();
7879 // c:2290-2296 — `for (hd = &hdocs; *hd; hd = &(*hd)->next);
7880 // *hd = zalloc(sizeof(struct heredocs));
7881 // (*hd)->next = NULL;
7882 // (*hd)->type = htype;
7883 // (*hd)->pc = r;
7884 // (*hd)->str = tokstr;`
7885 // AST path has no wordcode pc to patch; use -1 sentinel so the
7886 // inline NEWLIN walk in `zshlex()` skips the setheredoc call.
7887 HDOCS.with_borrow_mut(|head| {
7888 let mut cur = head;
7889 while cur.is_some() {
7890 cur = &mut cur.as_mut().unwrap().next; // c:2290
7891 }
7892 *cur = Some(Box::new(crate::ported::zsh_h::heredocs {
7893 // c:2292-2296
7894 next: None,
7895 typ: rtype,
7896 pc: -1,
7897 str: Some(name.clone()),
7898 }));
7899 });
7900 // zshrs-only: push parallel AST-glue entry onto LEX_HEREDOCS.
7901 let idx = LEX_HEREDOCS.with_borrow_mut(|v| {
7902 v.push(HereDoc {
7903 terminator: term,
7904 strip_tabs,
7905 content: String::new(),
7906 quoted,
7907 processed: false,
7908 });
7909 v.len() - 1
7910 });
7911 Some(idx)
7912 } else {
7913 None
7914 };
7915
7916 // c:2298 (heredoc) / c:2322 (other redirs) — final zshlex() advance
7917 // past the redir target word. MUST run after the HDOCS push above
7918 // so the heredoc-drain inside this zshlex sees the new entry. For
7919 // non-heredoc forms the order is irrelevant; consolidating to a
7920 // single tail-call here matches the wordcode variant.
7921 zshlex();
7922
7923 Some(ZshRedir {
7924 rtype,
7925 fd,
7926 name,
7927 heredoc: None,
7928 varid,
7929 heredoc_idx,
7930 })
7931}
7932
7933/// Parse C-style for loop: for (( init; cond; step ))
7934/// Parse the c-style `for ((init; cond; incr)) do BODY done`.
7935/// Inner branch of zsh/Src/parse.c:1100-1140 inside par_for.
7936/// Recognized when the token after FOR is DINPAR (the `((`
7937/// detected by gettok via dbparens setup).
7938fn parse_for_cstyle() -> Option<ZshCommand> {
7939 // We're at (( (Dinpar None) - the opening ((
7940 // Lexer returns:
7941 // Dinpar None - opening ((
7942 // Dinpar "init" - init expression, semicolon consumed
7943 // Dinpar "cond" - cond expression, semicolon consumed
7944 // Doutpar "step" - step expression, closing )) consumed
7945 zshlex(); // Get init: Dinpar "i=0"
7946
7947 if tok() != DINPAR {
7948 zerr("expected init expression in for ((");
7949 return None;
7950 }
7951 let init = tokstr().unwrap_or_default();
7952
7953 zshlex(); // Get cond: Dinpar "i<10"
7954
7955 if tok() != DINPAR {
7956 zerr("expected condition in for ((");
7957 return None;
7958 }
7959 let cond = tokstr().unwrap_or_default();
7960
7961 zshlex(); // Get step: Doutpar "i++"
7962
7963 if tok() != DOUTPAR {
7964 zerr("expected )) in for");
7965 return None;
7966 }
7967 let step = tokstr().unwrap_or_default();
7968
7969 // c:1110 — `infor = 0;` before the body opener. The companion
7970 // `incmdpos = 1;` at c:1111 is intentionally skipped here for
7971 // the same reason c:1094's `incmdpos = 0;` is skipped in
7972 // par_for above — zshrs doesn't mirror the full
7973 // incmdpos state-machine inline.
7974 set_infor(0); // c:1110
7975 zshlex(); // Move past ))
7976
7977 skip_separators();
7978 let body = parse_loop_body(false, false)?;
7979
7980 Some(ZshCommand::For(ZshFor {
7981 var: String::new(),
7982 list: ForList::CStyle { init, cond, step },
7983 body: Box::new(body),
7984 is_select: false,
7985 }))
7986}
7987
7988/// Parse select loop (same syntax as for)
7989/// Parse `select NAME in WORDS; do BODY; done`. Same shape as
7990/// `for NAME in WORDS; do ...` but with menu-prompt semantics in
7991/// the executor. C equivalent: the SELECT case in par_for at
7992/// parse.c:1087-1207 (selects share parser flow with foreach).
7993fn parse_select() -> Option<ZshCommand> {
7994 // `select` shares par_for's grammar (var, words, body) but the
7995 // compile path is different (interactive prompt loop).
7996 match par_for()? {
7997 ZshCommand::For(mut f) => {
7998 f.is_select = true;
7999 Some(ZshCommand::For(f))
8000 }
8001 other => Some(other),
8002 }
8003}
8004
8005/// Parse loop body (do...done, {...}, or shortloop)
8006/// Parse the `do BODY done` body of a for/while/until/select/
8007/// repeat loop. Direct equivalent of zsh's parse.c handling
8008/// inside the loop builders — they all consume DOLOOP, parse a
8009/// list until DONE, and return the list. The `foreach_style`
8010/// flag signals foreach (where short-form `for NAME in WORDS;
8011/// CMD` may skip do/done) vs c-style (which always requires
8012/// do/done).
8013///
8014/// `is_repeat` widens the SHORTLOOPS gate so `SHORTREPEAT` also
8015/// unlocks the short form for `repeat N CMD` (per c:1600
8016/// `unset(SHORTLOOPS) && unset(SHORTREPEAT)`).
8017fn parse_loop_body(foreach_style: bool, is_repeat: bool) -> Option<ZshProgram> {
8018 // c:1180-1194 — body dispatch order per par_for:
8019 // `do ... done` (DOLOOP) — primary form.
8020 // `{ ... }` (INBRACE) — alternate.
8021 // csh/CSHJUNKIELOOPS — terminator is `end`.
8022 // else if (unset(SHORTLOOPS)) — YYERROR.
8023 // else — short form (single command).
8024 if tok() == DOLOOP {
8025 zshlex();
8026 // Body parse must declare DONE as an end-token so the
8027 // parse_program_until top-level orphan-DONE guard doesn't
8028 // mis-fire on the legitimate loop terminator.
8029 let body = parse_program_until(Some(&[DONE]));
8030 // c:Src/parse.c:1182-1183 / :1535-1536 / :1597-1598 —
8031 // `if (tok != DONE) YYERRORV(oecused);`. zshrs previously
8032 // silently accepted EOF as a substitute for `done`, so
8033 // `for i in a; do echo hi; don` ran the loop with `don` as
8034 // a command (which then failed "command not found") instead
8035 // of erroring at parse time. Bug #403, #404.
8036 if tok() != DONE {
8037 zerr("parse error: expected `done'");
8038 return None;
8039 }
8040 zshlex();
8041 Some(body)
8042 } else if tok() == INBRACE_TOK {
8043 zshlex();
8044 let body = parse_program_until(Some(&[OUTBRACE_TOK]));
8045 // c:Src/parse.c:1186 / :1539 — `if (tok != OUTBRACE) YYERRORV`.
8046 if tok() != OUTBRACE_TOK {
8047 zerr("parse error: expected `}'");
8048 return None;
8049 }
8050 zshlex();
8051 Some(body)
8052 } else if foreach_style || isset(CSHJUNKIELOOPS) {
8053 // c:1184 / 1546 / 1595 — `else if (csh || isset(CSHJUNKIELOOPS))`.
8054 let body = parse_program_until(Some(&[ZEND]));
8055 // c:1190 / 1548 — `if (tok != ZEND) YYERRORV`.
8056 if tok() != ZEND {
8057 zerr("parse error: expected `end'");
8058 return None;
8059 }
8060 zshlex();
8061 Some(body)
8062 } else {
8063 // c:1190 / 1474 / 1551 / 1600 — short-form gate. C bails
8064 // with YYERROR when `unset(SHORTLOOPS) && (!is_repeat ||
8065 // unset(SHORTREPEAT))`. zshrs's option machinery isn't
8066 // initialised at parse-test time (no `init_main` →
8067 // `install_emulation_defaults`), so a strict port here
8068 // body. parse_init seeds SHORTLOOPS=on mirroring C
8069 // `install_emulation_defaults`, so this fires only when a
8070 // script explicitly disabled the option.
8071 if unset(SHORTLOOPS) && (!is_repeat || unset(SHORTREPEAT)) {
8072 zerr("parse error: short loop form requires SHORTLOOPS option");
8073 return None;
8074 }
8075 // c:Src/parse.c:1604 / :1474 / :1551 — short form calls
8076 // par_save_list1 → par_list1 → par_sublist, which parses
8077 // ONE sublist and leaves the trailing SEPER untouched for
8078 // the outer par_list to consume. zshrs previously routed
8079 // through par_list() which consumes the trailing `;`/`\n`
8080 // separator — that swallowed the separator between the
8081 // loop's body command and the next outer command, so
8082 // `repeat 2 print x; print y` parsed as repeat-then-eof
8083 // and par_cmd's post-compound STRING_LEX guard at parse.rs
8084 // line 1170 fired "parse error near `print'". Bug #593.
8085 par_list1().map(|sublist| ZshProgram {
8086 lists: vec![ZshList {
8087 sublist,
8088 flags: ListFlags::default(),
8089 }],
8090 })
8091 }
8092}
8093
8094/// `() { body } arg1 arg2 …` — anonymous function. Defines a fresh
8095/// function named `_zshrs_anon_N`, invokes it with the args, and the
8096/// body runs with positional params set. Implemented as the desugared
8097/// pair (FuncDef + Simple call) so the compile path doesn't need new
8098/// machinery.
8099/// Parse an anonymous function definition `() { BODY }` followed
8100/// by call args. zsh treats `() { echo hi; } a b c` as defining
8101/// and immediately calling an anon fn with args a/b/c. C
8102/// equivalent: the INOUTPAR shape in par_simple at parse.c:1836+
8103/// triggers an anon-funcdef path.
8104fn parse_anon_funcdef() -> Option<ZshCommand> {
8105 zshlex(); // skip ()
8106 skip_separators();
8107 // No `{` after `()` → bare empty subshell shape `()`. Fall back
8108 // to a Subsh with an empty program so the status is 0 (matches
8109 // zsh's `()` no-op behavior).
8110 if tok() != INBRACE_TOK {
8111 return Some(ZshCommand::Subsh(Box::new(ZshProgram {
8112 lists: Vec::new(),
8113 })));
8114 }
8115 zshlex(); // skip {
8116 // c:Src/parse.c:par_subsh — anon `() { … }` body must terminate at
8117 // OUTBRACE_TOK. Pass it as the explicit end-token so the inner
8118 // parse stops cleanly at `}` rather than hitting the top-level
8119 // stray-`}` arm (#168). Bug #167 family.
8120 let body = parse_program_until(Some(&[OUTBRACE_TOK]));
8121 // c:Src/parse.c:1733-1737 — same `if (tok != OUTBRACE) YYERRORV`
8122 // gate as the named-funcdef path. Bug #405 sibling.
8123 if tok() != OUTBRACE_TOK {
8124 zerr("parse error: expected `}'");
8125 return None;
8126 }
8127 zshlex();
8128 // Collect any trailing args until a separator. zsh's anon-fn form
8129 // `() { body } a b c` runs body with $1=a, $2=b, $3=c.
8130 let mut args = Vec::new();
8131 while tok() == STRING_LEX {
8132 if let Some(s) = tokstr() {
8133 args.push(s);
8134 }
8135 zshlex();
8136 }
8137
8138 // Generate a unique name. Module-level static would be cleaner but
8139 // a thread-local atomic is enough — anonymous functions are
8140 // ephemeral and the name isn't user-visible.
8141 static ANON_COUNTER: AtomicUsize = AtomicUsize::new(0);
8142 let n = ANON_COUNTER.fetch_add(1, Ordering::Relaxed);
8143 let name = format!("_zshrs_anon_{}", n);
8144 Some(ZshCommand::FuncDef(ZshFuncDef {
8145 names: vec![name],
8146 body: Box::new(body),
8147 tracing: false,
8148 auto_call_args: Some(args),
8149 body_source: None,
8150 }))
8151}
8152
8153/// Parse {...} cursh
8154/// Parse a current-shell brace block `{ BODY }`. C source
8155/// par_cmd at parse.c:958-1085 handles Inbrace → emit WC_CURSH
8156/// and recurses into the list. zshrs's parse_cursh extracts that
8157/// arm into a dedicated method.
8158fn parse_cursh() -> Option<ZshCommand> {
8159 zshlex(); // skip {
8160 // c:Src/parse.c:par_subsh — pass OUTBRACE_TOK as the explicit
8161 // body terminator so the inner parse stops cleanly at `}` rather
8162 // than falling through the top-level `OUTBRACE_TOK if
8163 // end_tokens.is_none()` arm (which errors on stray `}` per bug
8164 // #168). Bug #167 in docs/BUGS.md.
8165 let prog = parse_program_until(Some(&[OUTBRACE_TOK]));
8166
8167 // c:Src/parse.c:par_subsh — `{ … }` requires a matching `}`.
8168 // C errors via YYERRORV when the body parse returns without
8169 // seeing OUTBRACE_TOK (parse.c:1623 inbrack check). zshrs's
8170 // previous behavior silently returned `Cursh(prog)` and ran the
8171 // body as if the braces were absent. Bug #167 in docs/BUGS.md.
8172 if tok() != OUTBRACE_TOK {
8173 // Reuse the "parse error near `<tok>'" shape from #142/#161.
8174 // The offending token is whatever follows the unclosed brace
8175 // body. For EOF (`{ echo a` at end of input) C zsh errors
8176 // near the LAST consumed body token; we use the current
8177 // tokstr() or fall back to a "}" hint.
8178 let near = tokstr().unwrap_or_else(|| "}".to_string());
8179 zerr(&format!("parse error near `{}'", near));
8180 return None;
8181 }
8182 // Check for { ... } always { ... }. Direct port of zsh's
8183 // par_subsh at parse.c:1612-1660 — note the two `incmdpos = 1`
8184 // forces (parse.c:1632, 1637): after consuming the closing
8185 // Outbrace AND after matching the `always` keyword, the parser
8186 // explicitly resets command position so the next `{` lexes as
8187 // Inbrace. Without these resets the lexer's String-clears-cmdpos
8188 // rule (lex.rs:976-983) leaves the second `{` in word position,
8189 // turning `always { ... }` into a Simple `{` `echo` … and the
8190 // try/always pairing is silently lost.
8191 {
8192 set_incmdpos(true); // parse.c:1632 incmdpos = !zsh_construct
8193 zshlex();
8194
8195 // Check for 'always'
8196 if tok() == STRING_LEX {
8197 let s = tokstr();
8198 if s.map(|s| s == "always").unwrap_or(false) {
8199 set_incmdpos(true); // parse.c:1637 incmdpos = 1
8200 zshlex();
8201 skip_separators();
8202
8203 if tok() == INBRACE_TOK {
8204 zshlex();
8205 // c:Src/parse.c — always-clause body terminates at
8206 // OUTBRACE_TOK. Bug #167/#168 family.
8207 let always = parse_program_until(Some(&[OUTBRACE_TOK]));
8208 if tok() == OUTBRACE_TOK {
8209 zshlex();
8210 }
8211 return Some(ZshCommand::Try(ZshTry {
8212 try_block: Box::new(prog),
8213 always: Box::new(always),
8214 }));
8215 }
8216 }
8217 }
8218 }
8219
8220 Some(ZshCommand::Cursh(Box::new(prog)))
8221}
8222
8223/// Parse inline function definition: name() { ... }
8224/// Parse the inline form `NAME () { BODY }` (POSIX-style funcdef
8225/// without the `function` keyword). The name has already been
8226/// consumed and pushed by par_simple before this method fires.
8227/// C source: handled inline in par_simple's INOUTPAR-after-name
8228/// arm (parse.c:1836-2228).
8229fn parse_inline_funcdef(name: String) -> Option<ZshCommand> {
8230 // par_simple's STRING loop left `incmdpos = 0`; the funcdef body
8231 // `{ ... }` requires `incmdpos = 1` so the lexer recognises `{`
8232 // as INBRACE_TOK (current-shell block opener) instead of a
8233 // literal `{` STRING. Without this, `myfunc() { echo body }`
8234 // parsed the body as the single STRING `"{"`, then `echo body`
8235 // fell out at top level. Mirrors the C path where par_cmd's
8236 // dispatcher (parse.c:958) is called with `incmdpos = 1` for
8237 // the funcdef body.
8238 set_incmdpos(true);
8239 // Skip ()
8240 if tok() == INOUTPAR {
8241 zshlex();
8242 }
8243
8244 skip_separators();
8245
8246 // Parse body
8247 if tok() == INBRACE_TOK {
8248 // Same body_start-before-zshlex fix as par_funcdef.
8249 let body_start = pos();
8250 zshlex();
8251 // c:Src/parse.c — inline funcdef body terminates at OUTBRACE_TOK.
8252 // Explicit end-token keeps the inner parse from hitting the
8253 // top-level stray-`}` arm (#168). Bug #167 family.
8254 let body = parse_program_until(Some(&[OUTBRACE_TOK]));
8255 // c:Src/parse.c:1733-1737 — `if (tok != OUTBRACE) { cmdpop();
8256 // lineno += oldlineno; ecnpats = onp; ecssub = oecssub;
8257 // YYERRORV(oecused); }`. Without this gate, `f() { echo hi`
8258 // silently registered as a complete fn with body `echo hi`.
8259 // Bug #405.
8260 if tok() != OUTBRACE_TOK {
8261 zerr("parse error: expected `}'");
8262 return None;
8263 }
8264 let body_end = pos().saturating_sub(1);
8265 let body_source = input_slice(body_start, body_end)
8266 .map(|s| {
8267 // Lexer's pos() may have advanced past `}` AND skipped
8268 // trailing whitespace/newlines before returning the
8269 // OUTBRACE_TOK to us, so the slice up to `pos - 1`
8270 // includes the `}` and any preceding whitespace.
8271 // Strip the trailing `}` and any preceding structural
8272 // separator (`;`, `\n`) — C zsh's getpermtext walks
8273 // the wordcode list and emits each command WITHOUT
8274 // the trailing `;`/`\n` that lives in the input.
8275 let t = s.trim();
8276 let t = t.strip_suffix('}').unwrap_or(t).trim_end();
8277 let t = t
8278 .trim_end_matches(|c: char| c == ';' || c == '\n')
8279 .trim_end();
8280 t.to_string()
8281 })
8282 .filter(|s| !s.is_empty());
8283 zshlex();
8284 Some(ZshCommand::FuncDef(ZshFuncDef {
8285 names: vec![name],
8286 body: Box::new(body),
8287 tracing: false,
8288 auto_call_args: None,
8289 body_source,
8290 }))
8291 } else if unset(SHORTLOOPS) {
8292 // c:1742 — `else if (unset(SHORTLOOPS)) YYERRORV(oecused);` —
8293 // funcdef short body (`name() cmd` without `{...}`) only
8294 // accepted when SHORTLOOPS is set. parse_init seeds
8295 // SHORTLOOPS=on so this fires only when a script
8296 // explicitly disabled the option.
8297 zerr("parse error: short function body form requires SHORTLOOPS option");
8298 None
8299 } else {
8300 match par_cmd() {
8301 Some(cmd) => {
8302 let list = ZshList {
8303 sublist: ZshSublist {
8304 pipe: ZshPipe {
8305 cmd,
8306 next: None,
8307 lineno: lineno(),
8308 merge_stderr: false,
8309 },
8310 next: None,
8311 flags: SublistFlags::default(),
8312 },
8313 flags: ListFlags::default(),
8314 };
8315 Some(ZshCommand::FuncDef(ZshFuncDef {
8316 names: vec![name],
8317 body: Box::new(ZshProgram { lists: vec![list] }),
8318 tracing: false,
8319 auto_call_args: None,
8320 body_source: None,
8321 }))
8322 }
8323 None => None,
8324 }
8325 }
8326}
8327
8328/// Parse conditional expression
8329/// Top of `[[ ]]` cond-expression parsing — entry to recursive
8330/// descent (or → and → not → primary). Direct port of zsh's
8331/// par_cond_1 at parse.c:2434-2475.
8332fn parse_cond_expr() -> Option<ZshCond> {
8333 parse_cond_or()
8334}
8335
8336/// Cond-expression `||` level. C: inside par_cond_1 at
8337/// parse.c:2434-2475 (the `cond_or` ladder).
8338fn parse_cond_or() -> Option<ZshCond> {
8339 let left = parse_cond_and()?;
8340 skip_cond_separators();
8341
8342 if tok() == DBAR {
8343 zshlex();
8344 skip_cond_separators();
8345 parse_cond_or().map(|right| ZshCond::Or(Box::new(left), Box::new(right)))
8346 } else {
8347 Some(left)
8348 }
8349}
8350
8351/// Cond-expression `&&` level. C: par_cond_2 at parse.c:2476-2625.
8352fn parse_cond_and() -> Option<ZshCond> {
8353 let left = parse_cond_not()?;
8354 skip_cond_separators();
8355
8356 if tok() == DAMPER {
8357 zshlex();
8358 skip_cond_separators();
8359 parse_cond_and().map(|right| ZshCond::And(Box::new(left), Box::new(right)))
8360 } else {
8361 Some(left)
8362 }
8363}
8364
8365/// `static FuncDump dumps;` from `Src/parse.c:3652` — head of the
8366/// loaded-`.zwc` linked list. C walks `dumps`/`p->next` directly;
8367/// the Rust port uses a `Mutex<Vec<funcdump>>` indexed by filename
8368/// so refcount ops can find an entry without raw-pointer compare.
8369pub static DUMPS: std::sync::Mutex<Vec<funcdump>> = std::sync::Mutex::new(Vec::new());
8370
8371/// Cond-expression `!` negation level. C: handled inside
8372/// par_cond_2 at parse.c:2476-2625 via the Bang token check.
8373fn parse_cond_not() -> Option<ZshCond> {
8374 skip_cond_separators();
8375
8376 // ! can be either BANG_TOK or String "!"
8377 let is_not =
8378 tok() == BANG_TOK || (tok() == STRING_LEX && tokstr().map(|s| s == "!").unwrap_or(false));
8379 if is_not {
8380 zshlex();
8381 let inner = parse_cond_not()?;
8382 return Some(ZshCond::Not(Box::new(inner)));
8383 }
8384
8385 if tok() == INPAR_TOK {
8386 zshlex();
8387 skip_cond_separators();
8388 // c:Src/parse.c:2534-2547 par_cond_2 INPAR branch — empty
8389 // body `[[ ( ) ]]` makes the inner par_cond's recursive
8390 // par_cond_2 see OUTPAR with no leading STRING/BANG/INPAR
8391 // and YYERROR immediately. Mirror that here: if the very
8392 // next token after `(` (post separator skip) is `)`, emit
8393 // a parse error so the script aborts cleanly instead of
8394 // silently swallowing every following command. Bug #538.
8395 if tok() == OUTPAR_TOK {
8396 yyerror("condition expected");
8397 return None;
8398 }
8399 let inner = parse_cond_expr()?;
8400 skip_cond_separators();
8401 if tok() == OUTPAR_TOK {
8402 zshlex();
8403 }
8404 return Some(inner);
8405 }
8406
8407 parse_cond_primary()
8408}
8409
8410/// Cond-expression primary: unary tests (-f, -d, ...), binary
8411/// tests (=, !=, <, >, ==, =~, -eq, -ne, ...), and parenthesized
8412/// sub-expressions. Direct port of par_cond_double / par_cond_triple
8413/// / par_cond_multi at parse.c:2626-2731 (chosen by arg count).
8414fn parse_cond_primary() -> Option<ZshCond> {
8415 let s1 = match tok() {
8416 STRING_LEX => {
8417 let s = tokstr().unwrap_or_default();
8418 zshlex();
8419 s
8420 }
8421 _ => return None,
8422 };
8423
8424 skip_cond_separators();
8425
8426 // Check for unary operator. zsh's lexer tokenizes leading `-` as
8427 // `zsh_h::Dash` (`\u{9b}`, `Src/zsh.h:182`) inside gettokstr (lex.c:1390-1400
8428 // LX2_DASH — `-` always becomes Dash, untokenized later). Match
8429 // either form here, and use char-count not byte-count since Dash
8430 // is 2 UTF-8 bytes (`\xc2\x9b`).
8431 //
8432 // c:Src/parse.c par_cond — when the leading token is `-` followed
8433 // ENTIRELY by digits (`-5`, `-123`), it's a numeric literal
8434 // operand, not a unary test flag. zsh's parser checks the C
8435 // `isdigit` of the trailing chars to disambiguate; without the
8436 // check, `[[ -5 -lt -3 ]]` reads `-5` as a one-arg test flag,
8437 // then `-lt` as the operand, then `-3` as a leftover token —
8438 // emitting "unknown condition: -5" and falling through to a
8439 // command-not-found dispatch on `-3`. Bug #121 in docs/BUGS.md.
8440 let s1_chars: Vec<char> = s1.chars().collect();
8441 let is_negative_number = s1_chars.len() >= 2
8442 && IS_DASH(s1_chars[0])
8443 && s1_chars[1..].iter().all(|c| c.is_ascii_digit());
8444 if s1_chars.len() == 2 && IS_DASH(s1_chars[0]) && !is_negative_number {
8445 let s2 = match tok() {
8446 STRING_LEX => {
8447 let s = tokstr().unwrap_or_default();
8448 zshlex();
8449 s
8450 }
8451 _ => {
8452 // c:Src/parse.c par_cond_2 — when the leading `-X`
8453 // is a 2-char dash form, zsh ALWAYS treats it as a
8454 // unary test op (the operand-missing case errors
8455 // immediately with `unknown condition: -X`). Don't
8456 // fall back to `Unary("-n", "-X")` — that path
8457 // silently let `[[ -z ]]` evaluate as
8458 // `[[ -n "-z" ]]` → true. Bug #480/#481.
8459 //
8460 // Convert Dash (\u{9b}) back to ASCII `-` for the
8461 // user-visible diagnostic so it reads "unknown
8462 // condition: -z" not "unknown condition: <Dash>z".
8463 let display: String = s1.chars().map(|c| {
8464 if IS_DASH(c) { '-' } else { c }
8465 }).collect();
8466 crate::ported::utils::zerr(&format!(
8467 "unknown condition: {}",
8468 display
8469 ));
8470 return None;
8471 }
8472 };
8473 return Some(ZshCond::Unary(s1, s2));
8474 }
8475
8476 // Check for binary operator. Direct port of zsh/Src/parse.c:2601-2603:
8477 // incond++; /* parentheses do globbing */
8478 // do condlex(); while (COND_SEP());
8479 // incond--; /* parentheses do grouping */
8480 // The bump makes the lexer treat `(` as a literal character inside
8481 // the RHS word (e.g. `[[ x =~ (foo) ]]`) instead of returning Inpar
8482 // and splitting the regex into multiple tokens.
8483 let op = match tok() {
8484 STRING_LEX => {
8485 let s = tokstr().unwrap_or_default();
8486 set_incond(incond() + 1);
8487 zshlex();
8488 set_incond(incond() - 1);
8489 s
8490 }
8491 INANG_TOK => {
8492 set_incond(incond() + 1);
8493 zshlex();
8494 set_incond(incond() - 1);
8495 "<".to_string()
8496 }
8497 OUTANG_TOK => {
8498 set_incond(incond() + 1);
8499 zshlex();
8500 set_incond(incond() - 1);
8501 ">".to_string()
8502 }
8503 _ => return Some(ZshCond::Unary("-n".to_string(), s1)),
8504 };
8505
8506 skip_cond_separators();
8507
8508 // c:Src/parse.c:2601-2625 par_cond_2 — only the documented binary
8509 // operators are accepted inside `[[ ... ]]`. zsh rejects ksh/bash
8510 // forms `-a` (logical AND) and `-o` (logical OR) with a parse
8511 // error ("condition expected") because they're not in the
8512 // par_cond_2 binary-op set — zsh uses `&&` / `||` instead.
8513 // Verified: `zsh -fc '[[ "" -a "x" ]]'` → exit 1, "parse error:
8514 // condition expected: ...". Without this gate, zshrs silently
8515 // built ZshCond::Binary("", "-a", "x") and ran an unknown-op
8516 // path that always evaluated false.
8517 // c:Src/parse.c:2601-2625 par_cond_2 — `-a` / `-o` n-ary chain
8518 // operators are not valid binary operators inside `[[ ... ]]`
8519 // (zsh uses `&&` / `||` instead). Match both the ASCII `-a`/
8520 // `-o` form and the tokenized `Dash+a`/`Dash+o` form that the
8521 // lexer emits inside cond bodies (Dash = \u{9b}, Src/zsh.h:182).
8522 let op_chars: Vec<char> = op.chars().collect();
8523 let is_dash_a_or_o =
8524 op_chars.len() == 2 && IS_DASH(op_chars[0]) && (op_chars[1] == 'a' || op_chars[1] == 'o');
8525 if is_dash_a_or_o {
8526 crate::ported::utils::zerr(&format!("parse error: condition expected: {}", s1));
8527 crate::ported::utils::errflag.fetch_or(
8528 crate::ported::zsh_h::ERRFLAG_ERROR,
8529 std::sync::atomic::Ordering::Relaxed,
8530 );
8531 set_tok(LEXERR);
8532 return None;
8533 }
8534
8535 let s2 = match tok() {
8536 STRING_LEX => {
8537 let s = tokstr().unwrap_or_default();
8538 zshlex();
8539 s
8540 }
8541 _ => {
8542 // c:Src/parse.c par_cond_2 — when a binary op is
8543 // recognized but the RHS operand is missing, zsh emits
8544 // `parse error: condition expected: <LHS>` at par_cond_2's
8545 // missing-rhs branch. zshrs's previous fallback returned
8546 // `Binary(s1, op, "")` which silently evaluated as if the
8547 // RHS were empty string → rc=1. Bug #482.
8548 //
8549 // Convert Dash (\u{9b}) back to ASCII `-` in the LHS
8550 // display so the diagnostic reads cleanly.
8551 let display: String = s1.chars().map(|c| {
8552 if IS_DASH(c) { '-' } else { c }
8553 }).collect();
8554 crate::ported::utils::zerr(&format!(
8555 "parse error: condition expected: {}",
8556 display
8557 ));
8558 crate::ported::utils::errflag.fetch_or(
8559 crate::ported::zsh_h::ERRFLAG_ERROR,
8560 std::sync::atomic::Ordering::Relaxed,
8561 );
8562 set_tok(LEXERR);
8563 return None;
8564 }
8565 };
8566
8567 if op == "=~" {
8568 Some(ZshCond::Regex(s1, s2))
8569 } else {
8570 Some(ZshCond::Binary(s1, op, s2))
8571 }
8572}
8573
8574fn skip_cond_separators() {
8575 while tok() == SEPER && {
8576 let s = tokstr();
8577 s.map(|s| !s.contains(';')).unwrap_or(true)
8578 } {
8579 zshlex();
8580 }
8581}
8582
8583/// Parse (( ... )) arithmetic command
8584/// Parse `(( EXPR ))` arithmetic command. C source: parse.c:1810-1834
8585/// `par_dinbrack` (despite the name; the function actually handles
8586/// DINPAR `(( ))` blocks too).
8587fn parse_arith() -> Option<ZshCommand> {
8588 let expr = tokstr().unwrap_or_default();
8589 zshlex();
8590 Some(ZshCommand::Arith(expr))
8591}
8592
8593/// Skip separator tokens
8594fn skip_separators() {
8595 while tok() == SEPER || tok() == NEWLIN {
8596 zshlex();
8597 }
8598}
8599
8600// `fdheaderlen` / `fdmagic` / `fdflags` / etc. macros from
8601// `Src/parse.c:3125-3152`. C uses raw pointer arithmetic on a
8602// `Wordcode` (= `u32 *`); the Rust port takes a slice and indexes.
8603
8604/// Port of `fdheaderlen(f)` macro (`Src/parse.c:3125`) — header
8605/// length in u32 words (read from prelude word `FD_PRELEN`).
8606#[inline]
8607pub fn fdheaderlen(f: &[u32]) -> u32 {
8608 f[FD_PRELEN]
8609}
8610
8611/// Port of `fdmagic(f)` macro (`Src/parse.c:3127`) — first prelude
8612/// word, either `FD_MAGIC` or `FD_OMAGIC`.
8613#[inline]
8614pub fn fdmagic(f: &[u32]) -> u32 {
8615 f[0]
8616}
8617
8618/// Port of `fdflags(f)` macro (`Src/parse.c:3131`) — low byte of
8619/// the packed `pre[1]` word.
8620#[inline]
8621pub fn fdflags(f: &[u32]) -> u32 {
8622 // `pre[1]` is a u32 viewed as 4 bytes; flags = byte 0.
8623 f[1] & 0xff
8624}
8625
8626/// Port of `fdsetflags(f, v)` macro (`Src/parse.c:3132`) — write
8627/// the low byte of `pre[1]`.
8628#[inline]
8629pub fn fdsetflags(f: &mut [u32], v: u8) {
8630 f[1] = (f[1] & !0xff) | (v as u32);
8631}
8632
8633/// Port of `fdother(f)` macro (`Src/parse.c:3133`) — high 24 bits
8634/// of `pre[1]`, holds the byte-offset to the opposite-byte-order
8635/// dump copy.
8636#[inline]
8637pub fn fdother(f: &[u32]) -> u32 {
8638 (f[1] >> 8) & 0x00ff_ffff
8639}
8640
8641/// Port of `fdsetother(f, o)` macro (`Src/parse.c:3134`).
8642#[inline]
8643pub fn fdsetother(f: &mut [u32], o: u32) {
8644 f[1] = (f[1] & 0xff) | ((o & 0x00ff_ffff) << 8);
8645}
8646
8647/// Port of `fdversion(f)` macro (`Src/parse.c:3140`) — read the
8648/// `ZSH_VERSION` C-string from `pre[2..]`.
8649pub fn fdversion(f: &[u32]) -> String {
8650 let bytes: Vec<u8> = f[2..]
8651 .iter()
8652 .take(10)
8653 .flat_map(|w| w.to_le_bytes().into_iter())
8654 .collect();
8655 let end = bytes.iter().position(|&b| b == 0).unwrap_or(bytes.len());
8656 String::from_utf8_lossy(&bytes[..end]).into_owned()
8657}
8658
8659/// Port of `firstfdhead(f)` macro (`Src/parse.c:3142`) — pointer
8660/// to the first `struct fdhead` past the prelude.
8661#[inline]
8662pub fn firstfdhead_offset() -> usize {
8663 FD_PRELEN
8664}
8665
8666/// Port of `nextfdhead(f)` macro (`Src/parse.c:3143`) — advance to
8667/// the next header by reading the current `hlen` slot.
8668#[inline]
8669pub fn nextfdhead_offset(f: &[u32], cur: usize) -> usize {
8670 cur + (f[cur + 4] as usize) // .hlen is field 4 of fdhead
8671}
8672
8673/// Port of `fdhflags(f)` macro (`Src/parse.c:3145`) — low 2 bits
8674/// of the header's `flags` field (the kshload/zshload marker).
8675#[inline]
8676pub fn fdhflags(h: &fdhead) -> u32 {
8677 h.flags & 0x3
8678}
8679
8680/// Port of `fdhtail(f)` macro (`Src/parse.c:3146`) — high 30 bits
8681/// of `flags`, byte offset from the name start to its basename.
8682#[inline]
8683pub fn fdhtail(h: &fdhead) -> u32 {
8684 h.flags >> 2
8685}
8686
8687/// Port of `fdhbldflags(f, t)` macro (`Src/parse.c:3147`) — pack
8688/// `(flags, tail)` into one u32 (low 2 bits = flags, high 30 = tail).
8689#[inline]
8690pub fn fdhbldflags(flags: u32, tail: u32) -> u32 {
8691 flags | (tail << 2)
8692}
8693
8694/// Port of `fdname(f)` macro (`Src/parse.c:3152`) — name string
8695/// follows the fdhead record immediately. Reads bytes from the
8696/// dump buffer until NUL.
8697pub fn fdname(buf: &[u32], header_offset: usize) -> String {
8698 let name_word_off = header_offset + FDHEAD_WORDS;
8699 let bytes: Vec<u8> = buf[name_word_off..]
8700 .iter()
8701 .flat_map(|w| w.to_le_bytes().into_iter())
8702 .take_while(|&b| b != 0)
8703 .collect();
8704 String::from_utf8_lossy(&bytes).into_owned()
8705}
8706
8707/// Decode a `fdhead` record at the given u32-word offset in the
8708/// dump buffer. Used by the header-walk loops in `bin_zcompile -t`.
8709pub fn read_fdhead(buf: &[u32], offset: usize) -> Option<fdhead> {
8710 if offset + FDHEAD_WORDS > buf.len() {
8711 return None;
8712 }
8713 Some(fdhead {
8714 start: buf[offset],
8715 len: buf[offset + 1],
8716 npats: buf[offset + 2],
8717 strs: buf[offset + 3],
8718 hlen: buf[offset + 4],
8719 flags: buf[offset + 5],
8720 })
8721}
8722
8723/// Port of `freedump(FuncDump f)` from `Src/parse.c:3976`. C
8724/// `munmap`s, `zclose`s the fd, and frees the struct. The Rust
8725/// port relies on Drop for the `funcdump` (no mmap held in this
8726/// port — `addr`/`map` are byte-offset placeholders), so the
8727/// equivalent is removing the entry from the dumps list. Called
8728/// by `decrdumpcount` when the refcount hits zero (c:3988) and
8729/// by `closedumps` when shutting down (c:4008).
8730fn freedump_locked(g: &mut std::sync::MutexGuard<'_, Vec<funcdump>>, filename: &str) {
8731 // c:3976
8732 g.retain(|d| d.filename.as_deref() != Some(filename));
8733}
8734
8735// =====================================================================
8736// Remaining `Src/parse.c` ports (this section finishes the file).
8737//
8738// Several of these emit into the C-wordcode buffer (`ECBUF`/etc.) and
8739// are kept for completeness — the live zshrs runtime uses the
8740// `ZshProgram` AST path instead, but `bin_zcompile` (`-c`/`-a` modes)
8741// and any future `.zwc`-emit pipeline both call into these.
8742// =====================================================================
8743
8744/// `ecstr(s)` helper — `ecadd(ecstrcode(s))`. Mirrors the C macro at
8745/// `Src/parse.c:482` used everywhere by the par_* emitters.
8746#[inline]
8747pub fn ecstr(s: &str) {
8748 let code = ecstrcode(s);
8749 ecadd(code);
8750}
8751
8752/// Port of `condlex` function-pointer global from `Src/parse.c`. C
8753/// flips this between `zshlex` and `testlex` depending on whether
8754/// we're inside `[[ ]]` vs `/bin/test` builtin. zshrs has no
8755/// separate `testlex` yet, so this just defers to `zshlex`.
8756#[inline]
8757pub fn condlex() {
8758 zshlex();
8759}
8760
8761fn copy_ecstr_walk(node: &Option<Box<EccstrNode>>, p: &mut [u8]) {
8762 let mut cur = node.as_ref();
8763 while let Some(n) = cur {
8764 // c:540 — `memcpy(p + s->aoffs, s->str, strlen(s->str) + 1);`
8765 let off = n.aoffs as usize;
8766 let need = off + n.str.len() + 1;
8767 if need <= p.len() {
8768 p[off..off + n.str.len()].copy_from_slice(&n.str);
8769 p[off + n.str.len()] = 0;
8770 }
8771 // c:541 — `copy_ecstr(s->left, p);`
8772 copy_ecstr_walk(&n.left, p);
8773 // c:542 — `s = s->right;`
8774 cur = n.right.as_ref();
8775 }
8776}
8777
8778/// Port of `par_cond(void)` from `Src/parse.c:2409`. Top-level cond
8779/// OR-chain — drives `par_cond_1` and stitches `||`-separated terms
8780/// with `WCB_COND(COND_OR, …)`. This is the missing top of the
8781/// wordcode cond chain: `par_cond_wordcode` (the par_dinbrack port)
8782/// must call into HERE so that `[[ a || b ]]` and friends land
8783/// real WC_COND opcodes in `ecbuf`. Without this, the wordcode
8784/// emitter for `[[ ... ]]` produced zero words and parity dropped
8785/// 148 words on `/etc/zshrc` alone.
8786pub fn par_cond_top() -> i32 {
8787 // c:2411 — `int p = ecused, r;`
8788 let p = ECUSED.with(|c| c.get()) as usize;
8789 let r = par_cond_1();
8790 while COND_SEP() {
8791 condlex();
8792 }
8793 if tok() == DBAR {
8794 // c:2417 — `condlex(); while (COND_SEP()) condlex();`
8795 condlex();
8796 while COND_SEP() {
8797 condlex();
8798 }
8799 // c:2420-2422 — `ecispace(p, 1); par_cond(); ecbuf[p] =
8800 // WCB_COND(COND_OR, ecused-1-p);`
8801 ecispace(p, 1);
8802 par_cond_top();
8803 let ecused = ECUSED.with(|c| c.get()) as usize;
8804 ECBUF.with(|c| {
8805 c.borrow_mut()[p] = WCB_COND(COND_OR as u32, (ecused - 1 - p) as u32);
8806 });
8807 return 1;
8808 }
8809 r
8810}
8811
8812/// Port of `static int check_cond(const char *input, const char *cond)`
8813/// from `Src/parse.c:2459`. True iff `input` is the two-char `-X`
8814/// form whose `X` matches `cond` — used by par_cond_2 to detect
8815/// `-a` / `-o` n-ary chain operators and by build_dump for `-k` /
8816/// `-z`. C: `return !IS_DASH(input[0]) ? 0 : !strcmp(input+1, cond);`.
8817fn check_cond(input: &str, cond: &str) -> bool {
8818 let mut chars = input.chars();
8819 match chars.next() {
8820 Some(c) if IS_DASH(c) => chars.as_str() == cond,
8821 _ => false,
8822 }
8823}
8824
8825#[cfg(test)]
8826mod tests {
8827 use super::*;
8828 use crate::utils::{errflag, ERRFLAG_ERROR};
8829 use std::fs;
8830 use std::path::Path;
8831 use std::sync::atomic::Ordering;
8832 use std::sync::mpsc;
8833 use std::thread;
8834 use std::time::Duration;
8835
8836 /// `try_source_file` MUST refuse a stale `.zwc` cache when the
8837 /// uncompiled source has been modified more recently. The C body
8838 /// at c:3819 reads `stc.st_mtime >= stn.st_mtime` — explicitly
8839 /// `>=`, meaning only an equal-or-newer zwc is acceptable.
8840 ///
8841 /// A regression that ignored the mtime check (or used the wrong
8842 /// direction) would silently keep loading the OLD compiled body
8843 /// after the user edited the source file — every `source foo.zsh`
8844 /// would replay yesterday's code, the worst-class shell bug.
8845 ///
8846 /// Pin: create source + .zwc, then touch source to make it
8847 /// newer. try_source_file must return None.
8848 #[test]
8849 fn try_source_file_skips_stale_zwc() {
8850 let _g = crate::test_util::global_state_lock();
8851 let dir = tempfile::tempdir().expect("tempdir");
8852 let src = dir.path().join("script.zsh");
8853 let zwc = dir.path().join("script.zsh.zwc");
8854 // Create zwc FIRST (older), then source (newer).
8855 fs::write(&zwc, b"placeholder zwc").unwrap();
8856 thread::sleep(Duration::from_millis(20));
8857 fs::write(&src, b"echo hi").unwrap();
8858
8859 let result = try_source_file(src.to_str().unwrap());
8860 assert!(
8861 result.is_none(),
8862 "c:3819 — stale .zwc (older than source) MUST be rejected; \
8863 got {:?}",
8864 result
8865 );
8866 }
8867
8868 /// `try_source_file` returns None when no `.zwc` exists for the
8869 /// requested file (c:3819 `if let Ok(meta_c) = &stc` gate fails).
8870 /// This is the common case — most user scripts don't ship with
8871 /// a pre-compiled `.zwc`. The fn returning None lets the caller
8872 /// fall through to the source-read path. A regression that
8873 /// returned `Some(file)` on missing `.zwc` would route every
8874 /// `source foo.zsh` through `check_dump_file` against a
8875 /// non-existent file and crash.
8876 #[test]
8877 fn try_source_file_returns_none_when_no_zwc() {
8878 let _g = crate::test_util::global_state_lock();
8879 let dir = tempfile::tempdir().expect("tempdir");
8880 let src = dir.path().join("plain.zsh");
8881 fs::write(&src, b"echo hi").unwrap();
8882 // No .zwc sibling.
8883
8884 let result = try_source_file(src.to_str().unwrap());
8885 assert!(
8886 result.is_none(),
8887 "c:3819 gate fails when stat(wc) returns Err → None"
8888 );
8889 }
8890
8891 /// Test helper. Mirrors zsh's `errflag` save/clear/check pattern
8892 /// around a parse — see `Src/init.c:loop` which clears errflag
8893 /// before parse_event() and tests it after. Returns `Err` if the
8894 /// parse set `ERRFLAG_ERROR`; otherwise `Ok(program)`.
8895 fn parse(input: &str) -> Result<ZshProgram, String> {
8896 let saved = errflag.load(Ordering::Relaxed);
8897 errflag.fetch_and(!ERRFLAG_ERROR, Ordering::Relaxed);
8898 parse_init(input);
8899 let prog = crate::ported::parse::parse();
8900 let had_err = (errflag.load(Ordering::Relaxed) & ERRFLAG_ERROR) != 0;
8901 // Restore prior error bits; don't carry our new error into the
8902 // outer test runner.
8903 errflag.store(saved, Ordering::Relaxed);
8904 if had_err {
8905 Err("parse error".to_string())
8906 } else {
8907 Ok(prog)
8908 }
8909 }
8910
8911 #[test]
8912 fn test_simple_command() {
8913 let _g = crate::test_util::global_state_lock();
8914 let prog = parse("echo hello world").unwrap();
8915 assert_eq!(prog.lists.len(), 1);
8916 match &prog.lists[0].sublist.pipe.cmd {
8917 ZshCommand::Simple(s) => {
8918 assert_eq!(s.words, vec!["echo", "hello", "world"]);
8919 }
8920 _ => panic!("expected simple command"),
8921 }
8922 }
8923
8924 #[test]
8925 fn test_pipeline() {
8926 let _g = crate::test_util::global_state_lock();
8927 let prog = parse("ls | grep foo | wc -l").unwrap();
8928 assert_eq!(prog.lists.len(), 1);
8929
8930 let pipe = &prog.lists[0].sublist.pipe;
8931 assert!(pipe.next.is_some());
8932
8933 let pipe2 = pipe.next.as_ref().unwrap();
8934 assert!(pipe2.next.is_some());
8935 }
8936
8937 #[test]
8938 fn test_and_or() {
8939 let _g = crate::test_util::global_state_lock();
8940 let prog = parse("cmd1 && cmd2 || cmd3").unwrap();
8941 let sublist = &prog.lists[0].sublist;
8942
8943 assert!(sublist.next.is_some());
8944 let (op, _) = sublist.next.as_ref().unwrap();
8945 assert_eq!(*op, SublistOp::And);
8946 }
8947
8948 #[test]
8949 fn test_if_then() {
8950 let _g = crate::test_util::global_state_lock();
8951 let prog = parse("if test -f foo; then echo yes; fi").unwrap();
8952 match &prog.lists[0].sublist.pipe.cmd {
8953 ZshCommand::If(_) => {}
8954 _ => panic!("expected if command"),
8955 }
8956 }
8957
8958 #[test]
8959 fn test_for_loop() {
8960 let _g = crate::test_util::global_state_lock();
8961 let prog = parse("for i in a b c; do echo $i; done").unwrap();
8962 match &prog.lists[0].sublist.pipe.cmd {
8963 ZshCommand::For(f) => {
8964 assert_eq!(f.var, "i");
8965 match &f.list {
8966 ForList::Words(w) => assert_eq!(w, &vec!["a", "b", "c"]),
8967 _ => panic!("expected word list"),
8968 }
8969 }
8970 _ => panic!("expected for command"),
8971 }
8972 }
8973
8974 #[test]
8975 fn test_case() {
8976 let _g = crate::test_util::global_state_lock();
8977 let prog = parse("case $x in a) echo a;; b) echo b;; esac").unwrap();
8978 match &prog.lists[0].sublist.pipe.cmd {
8979 ZshCommand::Case(c) => {
8980 assert_eq!(c.arms.len(), 2);
8981 }
8982 _ => panic!("expected case command"),
8983 }
8984 }
8985
8986 #[test]
8987 fn test_function() {
8988 let _g = crate::test_util::global_state_lock();
8989 // First test just parsing "function foo" to see what happens
8990 let prog = parse("function foo { }").unwrap();
8991 match &prog.lists[0].sublist.pipe.cmd {
8992 ZshCommand::FuncDef(f) => {
8993 assert_eq!(f.names, vec!["foo"]);
8994 }
8995 _ => panic!(
8996 "expected function, got {:?}",
8997 prog.lists[0].sublist.pipe.cmd
8998 ),
8999 }
9000 }
9001
9002 #[test]
9003 fn test_redirection() {
9004 let _g = crate::test_util::global_state_lock();
9005 let prog = parse("echo hello > file.txt").unwrap();
9006 match &prog.lists[0].sublist.pipe.cmd {
9007 ZshCommand::Simple(s) => {
9008 assert_eq!(s.redirs.len(), 1);
9009 assert_eq!(s.redirs[0].rtype, REDIR_WRITE);
9010 }
9011 _ => panic!("expected simple command"),
9012 }
9013 }
9014
9015 #[test]
9016 fn test_assignment() {
9017 let _g = crate::test_util::global_state_lock();
9018 let prog = parse("FOO=bar echo $FOO").unwrap();
9019 match &prog.lists[0].sublist.pipe.cmd {
9020 ZshCommand::Simple(s) => {
9021 assert_eq!(s.assigns.len(), 1);
9022 assert_eq!(s.assigns[0].name, "FOO");
9023 }
9024 _ => panic!("expected simple command"),
9025 }
9026 }
9027
9028 #[test]
9029 fn test_parse_completion_function() {
9030 let _g = crate::test_util::global_state_lock();
9031 let input = r#"_2to3_fixes() {
9032 local -a fixes
9033 fixes=( ${${(M)${(f)"$(2to3 --list-fixes 2>/dev/null)"}:#*}//[[:space:]]/} )
9034 (( ${#fixes} )) && _describe -t fixes 'fix' fixes
9035}"#;
9036 let result = parse(input);
9037 assert!(
9038 result.is_ok(),
9039 "Failed to parse completion function: {:?}",
9040 result.err()
9041 );
9042 let prog = result.unwrap();
9043 assert!(
9044 !prog.lists.is_empty(),
9045 "Expected at least one list in program"
9046 );
9047 }
9048
9049 #[test]
9050 fn test_parse_array_with_complex_elements() {
9051 let _g = crate::test_util::global_state_lock();
9052 let input = r#"arguments=(
9053 '(- * :)'{-h,--help}'[show this help message and exit]'
9054 {-d,--doctests_only}'[fix up doctests only]'
9055 '*:filename:_files'
9056)"#;
9057 let result = parse(input);
9058 assert!(
9059 result.is_ok(),
9060 "Failed to parse array assignment: {:?}",
9061 result.err()
9062 );
9063 }
9064
9065 #[test]
9066 fn test_parse_full_completion_file() {
9067 let _g = crate::test_util::global_state_lock();
9068 let input = r##"#compdef 2to3
9069
9070# zsh completions for '2to3'
9071
9072_2to3_fixes() {
9073 local -a fixes
9074 fixes=( ${${(M)${(f)"$(2to3 --list-fixes 2>/dev/null)"}:#*}//[[:space:]]/} )
9075 (( ${#fixes} )) && _describe -t fixes 'fix' fixes
9076}
9077
9078local -a arguments
9079
9080arguments=(
9081 '(- * :)'{-h,--help}'[show this help message and exit]'
9082 {-d,--doctests_only}'[fix up doctests only]'
9083 {-f,--fix}'[each FIX specifies a transformation; default: all]:fix name:_2to3_fixes'
9084 {-j,--processes}'[run 2to3 concurrently]:number: '
9085 {-x,--nofix}'[prevent a transformation from being run]:fix name:_2to3_fixes'
9086 {-l,--list-fixes}'[list available transformations]'
9087 {-p,--print-function}'[modify the grammar so that print() is a function]'
9088 {-v,--verbose}'[more verbose logging]'
9089 '--no-diffs[do not show diffs of the refactoring]'
9090 {-w,--write}'[write back modified files]'
9091 {-n,--nobackups}'[do not write backups for modified files]'
9092 {-o,--output-dir}'[put output files in this directory instead of overwriting]:directory:_directories'
9093 {-W,--write-unchanged-files}'[also write files even if no changes were required]'
9094 '--add-suffix[append this string to all output filenames]:suffix: '
9095 '*:filename:_files'
9096)
9097
9098_arguments -s -S $arguments
9099"##;
9100 let result = parse(input);
9101 assert!(
9102 result.is_ok(),
9103 "Failed to parse full completion file: {:?}",
9104 result.err()
9105 );
9106 let prog = result.unwrap();
9107 // Should have parsed successfully with at least one statement
9108 assert!(!prog.lists.is_empty(), "Expected at least one list");
9109 }
9110
9111 #[test]
9112 fn test_parse_logs_sh() {
9113 let _g = crate::test_util::global_state_lock();
9114 let input = r#"#!/usr/bin/env bash
9115shopt -s globstar
9116
9117if [[ $(uname) == Darwin ]]; then
9118 tail -f /var/log/**/*.log /var/log/**/*.out | lolcat
9119else
9120 if [[ $ZPWR_DISTRO_NAME == raspbian ]]; then
9121 tail -f /var/log/**/*.log | lolcat
9122 else
9123 printf "Unsupported...\n" >&2
9124 fi
9125fi
9126"#;
9127 let result = parse(input);
9128 assert!(
9129 result.is_ok(),
9130 "Failed to parse logs.sh: {:?}",
9131 result.err()
9132 );
9133 }
9134
9135 #[test]
9136 fn test_parse_case_with_glob() {
9137 let _g = crate::test_util::global_state_lock();
9138 let input = r#"case "$ZPWR_OS_TYPE" in
9139 darwin*) open_cmd='open'
9140 ;;
9141 cygwin*) open_cmd='cygstart'
9142 ;;
9143 linux*)
9144 open_cmd='xdg-open'
9145 ;;
9146esac"#;
9147 let result = parse(input);
9148 assert!(
9149 result.is_ok(),
9150 "Failed to parse case with glob: {:?}",
9151 result.err()
9152 );
9153 }
9154
9155 #[test]
9156 fn test_parse_case_with_nested_if() {
9157 let _g = crate::test_util::global_state_lock();
9158 // Test case with nested if and glob patterns
9159 let input = r##"function zpwrGetOpenCommand(){
9160 local open_cmd
9161 case "$ZPWR_OS_TYPE" in
9162 darwin*) open_cmd='open' ;;
9163 cygwin*) open_cmd='cygstart' ;;
9164 linux*)
9165 if [[ "$_zpwr_uname_r" != *icrosoft* ]];then
9166 open_cmd='nohup xdg-open'
9167 fi
9168 ;;
9169 esac
9170}"##;
9171 let result = parse(input);
9172 assert!(result.is_ok(), "Failed to parse: {:?}", result.err());
9173 }
9174
9175 #[test]
9176 fn test_parse_zpwr_scripts() {
9177 let _g = crate::test_util::global_state_lock();
9178 let scripts_dir = Path::new("/Users/wizard/.zpwr/scripts");
9179 if !scripts_dir.exists() {
9180 eprintln!("Skipping test: scripts directory not found");
9181 return;
9182 }
9183
9184 let mut total = 0;
9185 let mut passed = 0;
9186 let mut failed_files = Vec::new();
9187 let mut timeout_files = Vec::new();
9188
9189 for ext in &["sh", "zsh"] {
9190 let pattern = scripts_dir.join(format!("*.{}", ext));
9191 if let Ok(entries) = glob::glob(pattern.to_str().unwrap()) {
9192 for entry in entries.flatten() {
9193 total += 1;
9194 let file_path = entry.display().to_string();
9195 let content = match fs::read_to_string(&entry) {
9196 Ok(c) => c,
9197 Err(e) => {
9198 failed_files.push((file_path, format!("read error: {}", e)));
9199 continue;
9200 }
9201 };
9202
9203 // Parse with timeout
9204 let content_clone = content.clone();
9205 let (tx, rx) = mpsc::channel();
9206 let handle = thread::spawn(move || {
9207 let result = parse(&content_clone);
9208 let _ = tx.send(result);
9209 });
9210
9211 match rx.recv_timeout(Duration::from_secs(2)) {
9212 Ok(Ok(_)) => passed += 1,
9213 Ok(Err(err)) => {
9214 failed_files.push((file_path, err));
9215 }
9216 Err(_) => {
9217 timeout_files.push(file_path);
9218 // Thread will be abandoned
9219 }
9220 }
9221 }
9222 }
9223 }
9224
9225 eprintln!("\n=== ZPWR Scripts Parse Results ===");
9226 eprintln!("Passed: {}/{}", passed, total);
9227
9228 if !timeout_files.is_empty() {
9229 eprintln!("\nTimeout files (>2s):");
9230 for file in &timeout_files {
9231 eprintln!(" {}", file);
9232 }
9233 }
9234
9235 if !failed_files.is_empty() {
9236 eprintln!("\nFailed files:");
9237 for (file, err) in &failed_files {
9238 eprintln!(" {} - {}", file, err);
9239 }
9240 }
9241
9242 // Allow some failures initially, but track progress
9243 let pass_rate = if total > 0 {
9244 (passed as f64 / total as f64) * 100.0
9245 } else {
9246 0.0
9247 };
9248 eprintln!("Pass rate: {:.1}%", pass_rate);
9249
9250 // Require at least 50% pass rate for now
9251 assert!(pass_rate >= 50.0, "Pass rate too low: {:.1}%", pass_rate);
9252 }
9253
9254 /// c:2643 — `get_cond_num` returns 0..=8 for the canonical binary
9255 /// test operators in order `nt ot ef eq ne lt gt le ge`. The
9256 /// index IS the wordcode opcode dispatch key; flipping any entry
9257 /// would silently mis-dispatch `[[ a -eq b ]]` to a different op.
9258 #[test]
9259 fn get_cond_num_canonical_order_matches_dispatch_table() {
9260 let _g = crate::test_util::global_state_lock();
9261 assert_eq!(get_cond_num("nt"), 0);
9262 assert_eq!(get_cond_num("ot"), 1);
9263 assert_eq!(get_cond_num("ef"), 2);
9264 assert_eq!(get_cond_num("eq"), 3);
9265 assert_eq!(get_cond_num("ne"), 4);
9266 assert_eq!(get_cond_num("lt"), 5);
9267 assert_eq!(get_cond_num("gt"), 6);
9268 assert_eq!(get_cond_num("le"), 7);
9269 assert_eq!(get_cond_num("ge"), 8);
9270 }
9271
9272 /// c:2643 — unknown operator returns -1 (sentinel for "not in the
9273 /// binary set"). Regression returning 0 silently would alias
9274 /// every unknown op to `-nt`, dispatching to the wrong handler.
9275 #[test]
9276 fn get_cond_num_unknown_operator_returns_minus_one() {
9277 let _g = crate::test_util::global_state_lock();
9278 assert_eq!(get_cond_num("xx"), -1);
9279 assert_eq!(get_cond_num(""), -1);
9280 assert_eq!(get_cond_num("eqnt"), -1, "exact-match required");
9281 assert_eq!(
9282 get_cond_num("NT"),
9283 -1,
9284 "case-sensitive — uppercase rejected"
9285 );
9286 }
9287
9288 /// c:2628 — `par_cond_double` requires arg `a` to start with `-`
9289 /// AND have at least one more char. Empty string OR single `-`
9290 /// must error (return 1 via zerr). Regression accepting empty
9291 /// would dispatch `[[ "" string ]]` as a unary test.
9292 #[test]
9293 fn par_cond_double_rejects_short_or_non_dash_first_arg() {
9294 let _g = crate::test_util::global_state_lock();
9295 // empty
9296 let _ = par_cond_double("", "b");
9297 // not-dash
9298 let _ = par_cond_double("foo", "b");
9299 // bare dash
9300 let _ = par_cond_double("-", "b");
9301 // All three must NOT crash + return 1 (error path).
9302 }
9303
9304 /// c:2647 CONDSTRS table — exhaustive iteration: every entry's
9305 /// index round-trips through get_cond_num. A regression that
9306 /// drops an entry would let `[[ a -ef b ]]` silently mis-dispatch.
9307 #[test]
9308 fn get_cond_num_round_trips_for_every_table_entry() {
9309 let _g = crate::test_util::global_state_lock();
9310 for (i, op) in ["nt", "ot", "ef", "eq", "ne", "lt", "gt", "le", "ge"]
9311 .iter()
9312 .enumerate()
9313 {
9314 assert_eq!(get_cond_num(op) as usize, i, "{op} must map to index {i}");
9315 }
9316 }
9317
9318 /// c:2643 — `get_cond_num` is byte-exact: a partial-prefix string
9319 /// must NOT match. `e` (one char) is not `eq`. Catches a
9320 /// regression using `starts_with` instead of equality.
9321 #[test]
9322 fn get_cond_num_partial_prefix_does_not_match() {
9323 let _g = crate::test_util::global_state_lock();
9324 assert_eq!(get_cond_num("e"), -1);
9325 assert_eq!(get_cond_num("eq2"), -1);
9326 assert_eq!(get_cond_num("n"), -1);
9327 }
9328
9329 /// c:2628 — `par_cond_double` checks `IS_DASH(ac[0])` so any
9330 /// non-dash first char fails. The lexed Dash sentinel `\u{9b}`
9331 /// MUST be accepted alongside ASCII `-` (the lexer emits it
9332 /// inside `[[ ... ]]`). Regression dropping the sentinel form
9333 /// would break every cond expression after lexing.
9334 #[test]
9335 fn par_cond_double_accepts_lexed_dash_sentinel() {
9336 let _g = crate::test_util::global_state_lock();
9337 // First char being the Dash sentinel + valid unary letter
9338 // must NOT trigger the "condition expected" error path.
9339 // We can't easily probe the wordcode emission here, but
9340 // the function MUST return without panic for both forms.
9341 let _ = par_cond_double("-z", "foo");
9342 let _ = par_cond_double("\u{9b}z", "foo");
9343 }
9344
9345 /// c:2643 — case sensitivity: uppercase `EQ` MUST NOT match `eq`.
9346 /// zsh's `[[ a -EQ b ]]` is documented as a parse error (only
9347 /// lowercase variants are recognised). Regression doing
9348 /// case-insensitive lookup would silently accept it.
9349 #[test]
9350 fn get_cond_num_is_case_sensitive() {
9351 let _g = crate::test_util::global_state_lock();
9352 assert_eq!(get_cond_num("EQ"), -1);
9353 assert_eq!(get_cond_num("Eq"), -1);
9354 assert_eq!(get_cond_num("eQ"), -1);
9355 // Lowercase still works.
9356 assert_eq!(get_cond_num("eq"), 3);
9357 }
9358
9359 /// `Src/parse.c:2862-2868` — `ecgetstr` inline-3-byte case packs
9360 /// up to 3 chars into bits 3-26 of the wordcode word, then C emits
9361 /// `buf[3] = '\0'; r = dupstring(buf);`. `dupstring` uses `strlen`
9362 /// so the resulting string TRUNCATES at the first NUL byte —
9363 /// short strings of 1 or 2 chars get their tail NUL-padded and
9364 /// silently dropped by strlen.
9365 ///
9366 /// The previous Rust port used `retain(|&x| x != 0)` which SPLICES
9367 /// OUT interior NULs (so `[a, 0, b]` would yield "ab" instead of
9368 /// C's "a"). Verify both endpoints work correctly:
9369 /// * 1-char string ("a", 0, 0) → "a" (strlen-truncate)
9370 /// * 2-char string ("ab", 0) → "ab" (strlen-truncate)
9371 /// * 3-char string ("abc") → "abc" (full)
9372 /// * pathological ("a", 0, "b") → "a" (NOT "ab")
9373 #[test]
9374 fn ecgetstr_inline_string_truncates_at_first_nul_like_c_strlen() {
9375 let _g = crate::test_util::global_state_lock();
9376 // Build a wordcode word with `c & 2 != 0` (inline-string flag)
9377 // and the 3 bytes packed at offsets 3, 11, 19. `c & 1` is the
9378 // tokflag; clear it for this test.
9379 fn pack_inline(b0: u8, b1: u8, b2: u8) -> u32 {
9380 // c:2862 layout — bit0 = tokflag (0 here), bit1 = inline (1),
9381 // bits 3-10 = b0, bits 11-18 = b1, bits 19-26 = b2.
9382 (2u32) | ((b0 as u32) << 3) | ((b1 as u32) << 11) | ((b2 as u32) << 19)
9383 }
9384 let mk_state = |word: u32| -> estate {
9385 let p = eprog {
9386 flags: 0,
9387 len: 1,
9388 npats: 0,
9389 nref: 0,
9390 pats: Vec::new(),
9391 prog: vec![word],
9392 strs: None,
9393 shf: None,
9394 dump: None,
9395 };
9396 estate {
9397 prog: Box::new(p),
9398 pc: 0,
9399 strs: None,
9400 strs_offset: 0,
9401 }
9402 };
9403
9404 // 1-char: ('a', 0, 0) → "a"
9405 let mut st = mk_state(pack_inline(b'a', 0, 0));
9406 assert_eq!(
9407 ecgetstr(&mut st, 0, None),
9408 "a",
9409 "c:2869 strlen truncates 1-char inline at the NUL tail"
9410 );
9411
9412 // 2-char: ('a', 'b', 0) → "ab"
9413 let mut st = mk_state(pack_inline(b'a', b'b', 0));
9414 assert_eq!(
9415 ecgetstr(&mut st, 0, None),
9416 "ab",
9417 "c:2869 strlen truncates 2-char inline at the NUL tail"
9418 );
9419
9420 // 3-char: ('a', 'b', 'c') → "abc"
9421 let mut st = mk_state(pack_inline(b'a', b'b', b'c'));
9422 assert_eq!(
9423 ecgetstr(&mut st, 0, None),
9424 "abc",
9425 "c:2869 full 3-byte inline preserved"
9426 );
9427
9428 // Pathological: ('a', 0, 'b') → "a" (NOT "ab" from retain-splice)
9429 let mut st = mk_state(pack_inline(b'a', 0, b'b'));
9430 assert_eq!(
9431 ecgetstr(&mut st, 0, None),
9432 "a",
9433 "c:2869 strlen STOPS at first NUL; must not splice 'b' through"
9434 );
9435 }
9436
9437 /// Pin: `init_parse_status` resets ALL six lexer-parser flags
9438 /// per `Src/parse.c:500-502`. Specifically `inrepeat_ = 0` at
9439 /// c:501 was previously missing in the Rust port. Pin every
9440 /// reset so a future regression that drops one is caught.
9441 #[test]
9442 fn init_parse_status_resets_all_lexer_parser_flags() {
9443 let _g = crate::test_util::global_state_lock();
9444 // Dirty every flag to a non-default value.
9445 set_incasepat(5);
9446 set_incond(7);
9447 set_inredir(true);
9448 set_infor(3);
9449 set_intypeset(true);
9450 set_inrepeat(2);
9451 set_incmdpos(false);
9452 // Reset.
9453 init_parse_status();
9454 // c:500-502 — every flag back to its default.
9455 assert_eq!(incasepat(), 0, "c:500 — incasepat = 0");
9456 assert_eq!(incond(), 0, "c:500 — incond = 0");
9457 assert!(!inredir(), "c:500 — inredir = 0");
9458 assert_eq!(infor(), 0, "c:500 — infor = 0");
9459 assert!(!intypeset(), "c:500 — intypeset = 0");
9460 assert_eq!(
9461 inrepeat(),
9462 0,
9463 "c:501 — inrepeat_ = 0 (was previously missing)"
9464 );
9465 assert!(incmdpos(), "c:502 — incmdpos = 1");
9466 }
9467
9468 // ═══════════════════════════════════════════════════════════════════
9469 // AST shape tests — feed source through parse(), walk the resulting
9470 // ZshProgram, assert structural properties. Each test uses the local
9471 // `parse(input)` helper that errors cleanly on parse failure.
9472 // Anchor: where applicable, behavior matches `zsh -n -c '...'`
9473 // (parse-only, no execution — which would error on syntax issues).
9474 // ═══════════════════════════════════════════════════════════════════
9475
9476 /// Empty input → ZshProgram with no lists.
9477 #[test]
9478 fn parse_empty_source_yields_zero_lists() {
9479 let _g = crate::test_util::global_state_lock();
9480 let prog = parse("").unwrap();
9481 assert_eq!(prog.lists.len(), 0);
9482 }
9483
9484 /// Comment-only input → no lists (comments are skipped at lex level).
9485 #[test]
9486 fn parse_only_comment_yields_zero_lists() {
9487 let _g = crate::test_util::global_state_lock();
9488 let prog = parse("# this is just a comment").unwrap();
9489 assert_eq!(prog.lists.len(), 0, "comments alone produce no cmds");
9490 }
9491
9492 /// Three commands separated by `;` → three lists.
9493 #[test]
9494 fn parse_three_semicolon_separated_commands_yield_three_lists() {
9495 let _g = crate::test_util::global_state_lock();
9496 let prog = parse("a; b; c").unwrap();
9497 assert_eq!(prog.lists.len(), 3);
9498 }
9499
9500 /// Background command — async flag set on the list.
9501 #[test]
9502 fn parse_background_command_sets_async_flag() {
9503 let _g = crate::test_util::global_state_lock();
9504 let prog = parse("sleep 1 &").unwrap();
9505 assert_eq!(prog.lists.len(), 1);
9506 assert!(
9507 prog.lists[0].flags.async_,
9508 "trailing `&` must set async_ flag"
9509 );
9510 }
9511
9512 /// Pipe count: `a | b | c | d` → 4 stages.
9513 #[test]
9514 fn parse_four_stage_pipeline_has_three_next_links() {
9515 let _g = crate::test_util::global_state_lock();
9516 let prog = parse("a | b | c | d").unwrap();
9517 let mut pipe = &prog.lists[0].sublist.pipe;
9518 let mut count = 1;
9519 while let Some(next) = &pipe.next {
9520 pipe = next;
9521 count += 1;
9522 }
9523 assert_eq!(count, 4, "4 commands should produce 4 pipe stages");
9524 }
9525
9526 /// `|&` between pipeline stages sets merge_stderr.
9527 #[test]
9528 fn parse_pipe_amp_sets_merge_stderr() {
9529 let _g = crate::test_util::global_state_lock();
9530 let prog = parse("a |& b").unwrap();
9531 let pipe = &prog.lists[0].sublist.pipe;
9532 assert!(pipe.next.is_some());
9533 assert!(pipe.merge_stderr, "|& must set merge_stderr");
9534 }
9535
9536 /// `cmd1 || cmd2`: sublist.next is Some with `Or`.
9537 #[test]
9538 fn parse_or_operator_sets_sublist_op_or() {
9539 let _g = crate::test_util::global_state_lock();
9540 let prog = parse("cmd1 || cmd2").unwrap();
9541 let sublist = &prog.lists[0].sublist;
9542 let (op, _) = sublist.next.as_ref().expect("must have next");
9543 assert_eq!(*op, SublistOp::Or);
9544 }
9545
9546 /// `! cmd` sets the not flag on the sublist.
9547 #[test]
9548 fn parse_bang_negation_sets_sublist_not_flag() {
9549 let _g = crate::test_util::global_state_lock();
9550 let prog = parse("! false").unwrap();
9551 let sublist = &prog.lists[0].sublist;
9552 assert!(sublist.flags.not, "`!` prefix must set sublist.flags.not");
9553 }
9554
9555 // ── Compound commands ────────────────────────────────────────────
9556 /// `while cond; do body; done` → ZshCommand::While.
9557 #[test]
9558 fn parse_while_loop_yields_while_command() {
9559 let _g = crate::test_util::global_state_lock();
9560 let prog = parse("while true; do echo x; done").unwrap();
9561 assert!(matches!(
9562 prog.lists[0].sublist.pipe.cmd,
9563 ZshCommand::While(_)
9564 ));
9565 }
9566
9567 /// `until cond; do body; done` → ZshCommand::Until.
9568 /// Anchor: `zsh -n -c 'until false; do echo; done'` accepts and parses
9569 /// as an until-loop. zshrs accepts but emits a DIFFERENT AST variant
9570 /// (not Until). Bug — until loop is mis-classified.
9571 #[test]
9572 fn parse_until_loop_yields_until_command_anchored_to_zsh() {
9573 let _g = crate::test_util::global_state_lock();
9574 let prog = parse("until false; do echo x; done").unwrap();
9575 assert!(
9576 matches!(prog.lists[0].sublist.pipe.cmd, ZshCommand::Until(_)),
9577 "zsh parses `until` as Until variant; zshrs uses different variant: {:?}",
9578 prog.lists[0].sublist.pipe.cmd
9579 );
9580 }
9581
9582 /// `(cmd)` → Subsh variant.
9583 #[test]
9584 fn parse_parens_yield_subsh_command() {
9585 let _g = crate::test_util::global_state_lock();
9586 let prog = parse("(echo hi)").unwrap();
9587 assert!(matches!(
9588 prog.lists[0].sublist.pipe.cmd,
9589 ZshCommand::Subsh(_)
9590 ));
9591 }
9592
9593 /// `{ cmd; }` → Cursh (current-shell) command.
9594 #[test]
9595 fn parse_braces_yield_cursh_command() {
9596 let _g = crate::test_util::global_state_lock();
9597 let prog = parse("{ echo hi; }").unwrap();
9598 assert!(matches!(
9599 prog.lists[0].sublist.pipe.cmd,
9600 ZshCommand::Cursh(_)
9601 ));
9602 }
9603
9604 /// `[[ a == b ]]` → ZshCommand::Cond.
9605 #[test]
9606 fn parse_double_brackets_yield_cond_command() {
9607 let _g = crate::test_util::global_state_lock();
9608 let prog = parse("[[ a == b ]]").unwrap();
9609 assert!(matches!(
9610 prog.lists[0].sublist.pipe.cmd,
9611 ZshCommand::Cond(_)
9612 ));
9613 }
9614
9615 /// `(( 1 + 2 ))` → ZshCommand::Arith.
9616 #[test]
9617 fn parse_double_parens_yield_arith_command() {
9618 let _g = crate::test_util::global_state_lock();
9619 let prog = parse("(( 1 + 2 ))").unwrap();
9620 assert!(matches!(
9621 prog.lists[0].sublist.pipe.cmd,
9622 ZshCommand::Arith(_)
9623 ));
9624 }
9625
9626 /// `repeat 3 do echo x; done` → ZshCommand::Repeat.
9627 #[test]
9628 fn parse_repeat_loop_yields_repeat_command() {
9629 let _g = crate::test_util::global_state_lock();
9630 let prog = parse("repeat 3 do echo x; done").unwrap();
9631 assert!(matches!(
9632 prog.lists[0].sublist.pipe.cmd,
9633 ZshCommand::Repeat(_)
9634 ));
9635 }
9636
9637 // ── Function definitions ─────────────────────────────────────────
9638 /// `name() { body; }` → FuncDef variant.
9639 #[test]
9640 fn parse_paren_funcdef_yields_funcdef_command() {
9641 let _g = crate::test_util::global_state_lock();
9642 let prog = parse("greet() { echo hi; }").unwrap();
9643 assert!(matches!(
9644 prog.lists[0].sublist.pipe.cmd,
9645 ZshCommand::FuncDef(_)
9646 ));
9647 }
9648
9649 /// `function name { body; }` → FuncDef variant (zsh keyword form).
9650 #[test]
9651 fn parse_function_keyword_funcdef_yields_funcdef_command() {
9652 let _g = crate::test_util::global_state_lock();
9653 let prog = parse("function greet { echo hi; }").unwrap();
9654 assert!(matches!(
9655 prog.lists[0].sublist.pipe.cmd,
9656 ZshCommand::FuncDef(_)
9657 ));
9658 }
9659
9660 /// Syntax error — `if` without `fi` → parse returns Err.
9661 /// Anchor: `echo 'if true; then echo' | zsh -n` → "parse error".
9662 #[test]
9663 fn parse_unterminated_if_returns_error_anchored_to_zsh() {
9664 let _g = crate::test_util::global_state_lock();
9665 let r = parse("if true; then echo yes");
9666 assert!(r.is_err(), "zsh -n: parse error near `\\n`");
9667 }
9668
9669 /// Syntax error — bare `done` without `for/while/until` → error.
9670 /// Anchor: `echo done | zsh -n` → "parse error near `done`".
9671 #[test]
9672 fn parse_orphan_done_returns_error_anchored_to_zsh() {
9673 let _g = crate::test_util::global_state_lock();
9674 let r = parse("done");
9675 assert!(r.is_err(), "zsh -n: parse error near `done`");
9676 }
9677
9678 /// Simple command's words are metafied at the AST layer (matches
9679 /// zsh's internal representation: `-` lexes to `Dash` = 0x9b, `*`
9680 /// to `Star`, etc.). zsh untokenizes via `untokenize()` BEFORE
9681 /// surfacing words at execution time (Src/exec.c:execcmd_args).
9682 /// This test pins the round-trip: `untokenize(word)` recovers the
9683 /// user-visible form. If parse-time unmetafy ever lands the
9684 /// untokenize call becomes a no-op; the test stays green either
9685 /// way. Companion test below pins the metafied internal form.
9686 #[test]
9687 fn parse_simple_command_words_unmetafied_like_zsh_anchored() {
9688 let _g = crate::test_util::global_state_lock();
9689 let prog = parse("ls -la /tmp").unwrap();
9690 match &prog.lists[0].sublist.pipe.cmd {
9691 ZshCommand::Simple(s) => {
9692 let untok: Vec<String> = s
9693 .words
9694 .iter()
9695 .map(|w| crate::ported::lex::untokenize(w))
9696 .collect();
9697 assert_eq!(
9698 untok,
9699 vec!["ls", "-la", "/tmp"],
9700 "untokenize(word) must yield the user-visible form"
9701 );
9702 }
9703 other => panic!("expected Simple, got {other:?}"),
9704 }
9705 }
9706
9707 /// Pin the OBSERVED zshrs contract: simple-command word array
9708 /// contains metafied bytes. This is the active (passing) version
9709 /// of the anchor above — it documents zshrs's current internal
9710 /// representation. If zshrs starts unmetafying at parse time, this
9711 /// test will FAIL and the anchor-style test above will start passing.
9712 #[test]
9713 fn parse_simple_command_words_metafied_internal_form() {
9714 let _g = crate::test_util::global_state_lock();
9715 let prog = parse("ls -la /tmp").unwrap();
9716 match &prog.lists[0].sublist.pipe.cmd {
9717 ZshCommand::Simple(s) => {
9718 assert_eq!(s.words.len(), 3);
9719 assert_eq!(s.words[0], "ls");
9720 assert_eq!(s.words[2], "/tmp");
9721 // s.words[1] contains the metafied `-` (`\u{9b}` Dash byte)
9722 // followed by "la". Don't pin the exact byte form (it
9723 // may change); pin that the length is right.
9724 assert_eq!(s.words[1].chars().count(), 3, "`-la` is 3 chars");
9725 assert!(s.words[1].ends_with("la"));
9726 }
9727 other => panic!("expected Simple, got {other:?}"),
9728 }
9729 }
9730
9731 // ─── zsh-corpus pins for parser: structural shapes ────────────────
9732
9733 /// Empty input — parse succeeds, lists may be empty.
9734 #[test]
9735 fn parse_corpus_empty_input_no_error() {
9736 let _g = crate::test_util::global_state_lock();
9737 let prog = parse("").unwrap();
9738 assert!(
9739 prog.lists.is_empty() || prog.lists.len() <= 1,
9740 "empty input → 0 or 1 list, got {}",
9741 prog.lists.len()
9742 );
9743 }
9744
9745 /// Comment-only input parses as empty.
9746 #[test]
9747 fn parse_corpus_comment_only_no_error() {
9748 let _g = crate::test_util::global_state_lock();
9749 let r = parse("# just a comment");
9750 assert!(r.is_ok(), "comment-only parse should succeed");
9751 }
9752
9753 /// `cmd1; cmd2` — two top-level lists or two sublists.
9754 #[test]
9755 fn parse_corpus_semicolon_separates_commands() {
9756 let _g = crate::test_util::global_state_lock();
9757 let prog = parse("echo a; echo b").unwrap();
9758 // We pin: parse produces > 0 lists/sublists; details vary.
9759 assert!(!prog.lists.is_empty(), "non-empty parse");
9760 }
9761
9762 /// `a && b` — DAMPER joins into a sublist chain.
9763 #[test]
9764 fn parse_corpus_logical_and_parses() {
9765 let _g = crate::test_util::global_state_lock();
9766 let r = parse("true && false");
9767 assert!(r.is_ok(), "`a && b` parses cleanly");
9768 }
9769
9770 /// `a || b` — DBAR.
9771 #[test]
9772 fn parse_corpus_logical_or_parses() {
9773 let _g = crate::test_util::global_state_lock();
9774 let r = parse("false || true");
9775 assert!(r.is_ok(), "`a || b` parses cleanly");
9776 }
9777
9778 /// `a | b` pipeline.
9779 #[test]
9780 fn parse_corpus_pipeline_parses() {
9781 let _g = crate::test_util::global_state_lock();
9782 let r = parse("echo hi | cat");
9783 assert!(r.is_ok(), "`a | b` parses");
9784 }
9785
9786 /// `if true; then echo x; fi` — basic if-then-fi block.
9787 #[test]
9788 fn parse_corpus_if_then_fi_parses() {
9789 let _g = crate::test_util::global_state_lock();
9790 let r = parse("if true; then echo x; fi");
9791 assert!(r.is_ok(), "if/then/fi parses cleanly");
9792 }
9793
9794 /// `for i in 1 2 3; do echo $i; done`.
9795 #[test]
9796 fn parse_corpus_for_do_done_parses() {
9797 let _g = crate::test_util::global_state_lock();
9798 let r = parse("for i in 1 2 3; do echo $i; done");
9799 assert!(r.is_ok(), "for/do/done parses cleanly");
9800 }
9801
9802 /// `while true; do break; done`.
9803 #[test]
9804 fn parse_corpus_while_do_done_parses() {
9805 let _g = crate::test_util::global_state_lock();
9806 let r = parse("while true; do break; done");
9807 assert!(r.is_ok(), "while/do/done parses cleanly");
9808 }
9809
9810 /// `case x in (a) echo A;; esac` — case statement.
9811 #[test]
9812 fn parse_corpus_case_esac_parses() {
9813 let _g = crate::test_util::global_state_lock();
9814 let r = parse("case x in (a) echo A;; esac");
9815 assert!(r.is_ok(), "case/esac parses cleanly");
9816 }
9817
9818 /// Function definition `f() { echo x }`.
9819 #[test]
9820 fn parse_corpus_function_def_parses() {
9821 let _g = crate::test_util::global_state_lock();
9822 let r = parse("f() { echo x }");
9823 assert!(r.is_ok(), "f() {{ ... }} parses cleanly");
9824 }
9825
9826 /// `(subshell echo a)` — subshell.
9827 #[test]
9828 fn parse_corpus_subshell_parens_parses() {
9829 let _g = crate::test_util::global_state_lock();
9830 let r = parse("( echo a )");
9831 assert!(r.is_ok(), "subshell parses cleanly");
9832 }
9833
9834 // ═══════════════════════════════════════════════════════════════════
9835 // C-parity tests pinning Src/parse.c. Tests that capture KNOWN
9836 // ZSHRS BUGS use #[ignore = "ZSHRS BUG: …"].
9837 // ═══════════════════════════════════════════════════════════════════
9838
9839 /// `empty_eprog(p)` returns true on an eprog with empty `prog`.
9840 /// C `Src/parse.c:584`:
9841 /// `return (!p || !p->prog || *p->prog == WCB_END());`
9842 /// Rust port at parse.rs:685 — `p.prog.is_empty() || p.prog[0] == WCB_END()`.
9843 #[test]
9844 fn empty_eprog_empty_prog_returns_true() {
9845 let _g = crate::test_util::global_state_lock();
9846 let p = crate::ported::zsh_h::eprog::default();
9847 assert!(empty_eprog(&p), "empty prog vec → empty_eprog true");
9848 }
9849
9850 /// `empty_eprog(p)` returns true when first wordcode is WCB_END.
9851 /// C: `*p->prog == WCB_END()`.
9852 #[test]
9853 fn empty_eprog_first_wcb_end_returns_true() {
9854 let _g = crate::test_util::global_state_lock();
9855 let mut p = crate::ported::zsh_h::eprog::default();
9856 p.prog.push(WCB_END());
9857 assert!(empty_eprog(&p), "prog[0]==WCB_END → empty_eprog true");
9858 }
9859
9860 /// `empty_eprog(p)` returns false for non-empty non-END prog.
9861 #[test]
9862 fn empty_eprog_non_empty_non_end_returns_false() {
9863 let _g = crate::test_util::global_state_lock();
9864 let mut p = crate::ported::zsh_h::eprog::default();
9865 // Push some non-END wordcode (1 is arbitrary non-zero, not WCB_END).
9866 p.prog.push(1);
9867 assert!(!empty_eprog(&p), "non-END first opcode → false");
9868 }
9869
9870 /// `ecstrcode("")` returns a wordcode for the empty string. C
9871 /// `Src/parse.c:346-ish` ecstrcode interns strings in `ecbuf`.
9872 /// Pin: same call returns same wordcode (deterministic intern).
9873 #[test]
9874 fn ecstrcode_empty_string_returns_deterministic_code() {
9875 let _g = crate::test_util::global_state_lock();
9876 init_parse();
9877 let a = ecstrcode("");
9878 let b = ecstrcode("");
9879 assert_eq!(a, b, "intern of '' must be deterministic");
9880 }
9881
9882 /// `ecstrcode` of two different strings returns different codes.
9883 #[test]
9884 fn ecstrcode_distinct_strings_get_distinct_codes() {
9885 let _g = crate::test_util::global_state_lock();
9886 init_parse();
9887 let a = ecstrcode("foo");
9888 let b = ecstrcode("bar");
9889 // Should differ — if equal, intern table collapsed two different
9890 // strings to the same key (bug).
9891 assert_ne!(a, b, "different strings must intern to different codes");
9892 }
9893
9894 /// `parse_event(ENDINPUT)` on empty input returns None.
9895 /// C `Src/parse.c:715-ish` — empty token stream → no program.
9896 #[test]
9897 #[ignore = "ZSHRS BUG: parse_event setup needs lex state — exact behavior on empty input verification pending"]
9898 fn parse_event_empty_returns_none() {
9899 let _g = crate::test_util::global_state_lock();
9900 init_parse();
9901 // Empty input typically yields no program; needs lex state.
9902 let r = parse_event(crate::ported::lex::ENDINPUT);
9903 assert!(r.is_none(), "no tokens → no event");
9904 }
9905
9906 // ═══════════════════════════════════════════════════════════════════
9907 // Additional C-parity tests for Src/parse.c.
9908 // ═══════════════════════════════════════════════════════════════════
9909
9910 /// c:399 — `ecadd(c)` returns the index where `c` was placed,
9911 /// not the post-increment value. Sequential ecadd calls return
9912 /// strictly increasing indices.
9913 #[test]
9914 fn ecadd_returns_strictly_increasing_indices() {
9915 let _g = crate::test_util::global_state_lock();
9916 init_parse();
9917 let i0 = ecadd(0xDEAD);
9918 let i1 = ecadd(0xBEEF);
9919 let i2 = ecadd(0xC0DE);
9920 assert!(
9921 i1 > i0,
9922 "ecadd indices must strictly increase, got {i0} then {i1}"
9923 );
9924 assert!(
9925 i2 > i1,
9926 "ecadd indices must strictly increase, got {i1} then {i2}"
9927 );
9928 assert_eq!(i1, i0 + 1, "consecutive ecadds advance by 1");
9929 assert_eq!(i2, i1 + 1, "consecutive ecadds advance by 1");
9930 }
9931
9932 /// c:413 — `ecdel(p)` removes one wordcode, shrinks ecused by 1.
9933 /// Pin: subsequent ecadd reuses freed slot (ecused decreased).
9934 #[test]
9935 fn ecdel_shrinks_ecused_by_one() {
9936 let _g = crate::test_util::global_state_lock();
9937 init_parse();
9938 let _i0 = ecadd(0xA);
9939 let i1 = ecadd(0xB);
9940 let _i2 = ecadd(0xC);
9941 let next_before = ECUSED.get();
9942 ecdel(i1);
9943 let next_after = ECUSED.get();
9944 assert_eq!(
9945 next_after,
9946 next_before - 1,
9947 "ecdel must decrement ecused by exactly 1"
9948 );
9949 }
9950
9951 /// c:399-405 — `ecadd` after exhausting buffer must grow it (no
9952 /// panic on push past current eclen). Pin: 1000 adds don't crash.
9953 #[test]
9954 fn ecadd_grows_buffer_on_demand() {
9955 let _g = crate::test_util::global_state_lock();
9956 init_parse();
9957 for i in 0..1000 {
9958 ecadd(i as u32);
9959 }
9960 // No panic = grow path works.
9961 assert!(ECUSED.get() >= 1000, "1000 adds → ecused ≥ 1000");
9962 }
9963
9964 /// c:426 — `ecstrcode` of short strings (≤4 bytes) returns a
9965 /// packed inline wordcode (not an offset into the string region).
9966 /// Pin: identical short strings get identical codes.
9967 #[test]
9968 fn ecstrcode_short_strings_are_deterministic() {
9969 let _g = crate::test_util::global_state_lock();
9970 init_parse();
9971 let a = ecstrcode("ab");
9972 let b = ecstrcode("ab");
9973 assert_eq!(a, b, "same short string must intern to same code");
9974 }
9975
9976 /// c:426 — long strings (>4 bytes) hit the deduped string region.
9977 /// Pin: same long string returns same code on repeat (registry
9978 /// dedupes).
9979 #[test]
9980 fn ecstrcode_long_strings_dedupe_in_registry() {
9981 let _g = crate::test_util::global_state_lock();
9982 init_parse();
9983 let a = ecstrcode("a-much-longer-test-string");
9984 let b = ecstrcode("a-much-longer-test-string");
9985 assert_eq!(a, b, "registry must dedupe identical long strings");
9986 }
9987
9988 /// `clear_hdocs()` is idempotent — calling twice in a row leaves
9989 /// HDOCS = None and LEX_HEREDOCS empty.
9990 #[test]
9991 fn clear_hdocs_is_idempotent() {
9992 let _g = crate::test_util::global_state_lock();
9993 clear_hdocs();
9994 clear_hdocs();
9995 HDOCS.with_borrow(|h| assert!(h.is_none(), "HDOCS must be None"));
9996 LEX_HEREDOCS.with_borrow(|v| assert!(v.is_empty(), "LEX_HEREDOCS must be empty"));
9997 }
9998
9999 /// `init_parse()` resets parse state to known empty defaults.
10000 /// Multiple init_parse calls are safe (idempotent).
10001 #[test]
10002 fn init_parse_is_idempotent() {
10003 let _g = crate::test_util::global_state_lock();
10004 init_parse();
10005 init_parse();
10006 // No panic = pass.
10007 }
10008
10009 /// `empty_eprog` returns true for a default-constructed eprog
10010 /// (empty prog vec).
10011 #[test]
10012 fn empty_eprog_true_for_empty_prog() {
10013 let _g = crate::test_util::global_state_lock();
10014 let p = eprog {
10015 prog: Vec::new(),
10016 ..Default::default()
10017 };
10018 assert!(empty_eprog(&p), "empty prog vec → empty eprog");
10019 }
10020
10021 /// `empty_eprog` returns true when prog[0] == WCB_END().
10022 #[test]
10023 fn empty_eprog_true_for_end_only_prog() {
10024 let _g = crate::test_util::global_state_lock();
10025 let p = eprog {
10026 prog: vec![WCB_END()],
10027 ..Default::default()
10028 };
10029 assert!(empty_eprog(&p), "WCB_END as first opcode → empty");
10030 }
10031
10032 /// `ecadjusthere(p, d)` is safe to call when HDOCS is None.
10033 #[test]
10034 fn ecadjusthere_safe_when_hdocs_none() {
10035 let _g = crate::test_util::global_state_lock();
10036 clear_hdocs();
10037 // No panic = pass.
10038 ecadjusthere(0, 0);
10039 ecadjusthere(100, -5);
10040 ecadjusthere(0, 10);
10041 }
10042
10043 /// `ecispace(p, n)` with n=0 is a no-op.
10044 #[test]
10045 fn ecispace_zero_n_is_noop() {
10046 let _g = crate::test_util::global_state_lock();
10047 init_parse();
10048 let before = ECUSED.get();
10049 ecispace(0, 0);
10050 let after = ECUSED.get();
10051 assert_eq!(before, after, "ecispace(_, 0) must not advance ecused");
10052 }
10053
10054 // ═══════════════════════════════════════════════════════════════════
10055 // Additional C-parity tests for Src/parse.c
10056 // c:146 parse_context_save / c:191 parse_context_restore /
10057 // c:225 ecadjusthere / c:293 ecadd / c:346 ecstrcode / c:574 init_parse /
10058 // c:685 empty_eprog / c:693 clear_hdocs / c:786 parse_list / c:815 parse_cond
10059 // c:2234 par_wordlist / c:2249 par_nl_wordlist
10060 // ═══════════════════════════════════════════════════════════════════
10061
10062 /// c:293 — `ecadd` returns usize (compile-time type pin).
10063 #[test]
10064 fn ecadd_returns_usize_type() {
10065 let _g = crate::test_util::global_state_lock();
10066 init_parse();
10067 let _: usize = ecadd(0);
10068 }
10069
10070 /// c:346 — `ecstrcode` returns u32 (compile-time type pin).
10071 #[test]
10072 fn ecstrcode_returns_u32_type() {
10073 let _g = crate::test_util::global_state_lock();
10074 init_parse();
10075 let _: u32 = ecstrcode("");
10076 }
10077
10078 /// c:346 — `ecstrcode("")` empty string is safe.
10079 #[test]
10080 fn ecstrcode_empty_string_no_panic() {
10081 let _g = crate::test_util::global_state_lock();
10082 init_parse();
10083 let _ = ecstrcode("");
10084 }
10085
10086 /// c:346 — `ecstrcode` is deterministic for same input.
10087 #[test]
10088 fn ecstrcode_is_deterministic() {
10089 let _g = crate::test_util::global_state_lock();
10090 init_parse();
10091 for s in ["", "a", "abc", "hello world"] {
10092 let first = ecstrcode(s);
10093 for _ in 0..3 {
10094 assert_eq!(
10095 ecstrcode(s),
10096 first,
10097 "ecstrcode({:?}) must be deterministic",
10098 s
10099 );
10100 }
10101 }
10102 }
10103
10104 /// c:786 — `parse_list` returns Option<eprog>.
10105 #[test]
10106 fn parse_list_returns_option_eprog_type() {
10107 let _g = crate::test_util::global_state_lock();
10108 init_parse();
10109 let _: Option<eprog> = parse_list();
10110 }
10111
10112 /// c:815 — `parse_cond` returns Option<eprog>.
10113 #[test]
10114 fn parse_cond_returns_option_eprog_type() {
10115 let _g = crate::test_util::global_state_lock();
10116 init_parse();
10117 let _: Option<eprog> = parse_cond();
10118 }
10119
10120 /// c:2234 — `par_wordlist` returns Vec<String>.
10121 #[test]
10122 fn par_wordlist_returns_vec_string_type() {
10123 let _g = crate::test_util::global_state_lock();
10124 init_parse();
10125 let _: Vec<String> = par_wordlist();
10126 }
10127
10128 /// c:2249 — `par_nl_wordlist` returns Vec<String>.
10129 #[test]
10130 fn par_nl_wordlist_returns_vec_string_type() {
10131 let _g = crate::test_util::global_state_lock();
10132 init_parse();
10133 let _: Vec<String> = par_nl_wordlist();
10134 }
10135
10136 /// c:693 — `clear_hdocs` deterministic state after call (no-panic).
10137 #[test]
10138 fn clear_hdocs_deterministic_after_call() {
10139 let _g = crate::test_util::global_state_lock();
10140 clear_hdocs();
10141 clear_hdocs();
10142 }
10143
10144 /// c:225 — `ecadjusthere(0, 0)` is a no-op (no delta).
10145 #[test]
10146 fn ecadjusthere_zero_delta_no_panic() {
10147 let _g = crate::test_util::global_state_lock();
10148 ecadjusthere(0, 0);
10149 }
10150
10151 /// c:225 — `ecadjusthere` is safe for arbitrary positions.
10152 #[test]
10153 fn ecadjusthere_arbitrary_pos_no_panic() {
10154 let _g = crate::test_util::global_state_lock();
10155 for p in [0usize, 1, 100, 9999] {
10156 ecadjusthere(p, 0);
10157 ecadjusthere(p, 1);
10158 ecadjusthere(p, -1);
10159 }
10160 }
10161
10162 // ═══════════════════════════════════════════════════════════════════
10163 // Additional C-parity tests for Src/parse.c FD_* accessors
10164 // c:3127 fdmagic / c:3131 fdflags / c:3133 fdother / c:3140 fdversion /
10165 // c:3145 fdhflags / c:3146 fdhtail / c:3147 fdhbldflags
10166 // ═══════════════════════════════════════════════════════════════════
10167
10168 fn build_fd_header() -> Vec<u32> {
10169 let mut buf = vec![0u32; FD_PRELEN + 32];
10170 buf[0] = FD_MAGIC; // pre[0] magic
10171 buf[1] = (0x12u32) | (0x00ABCDEFu32 << 8); // flags=0x12, other=0xABCDEF
10172 // Embed version string starting at pre[2].
10173 let ver = b"5.9\0";
10174 for (i, chunk) in ver.chunks(4).enumerate() {
10175 let mut word = [0u8; 4];
10176 word[..chunk.len()].copy_from_slice(chunk);
10177 buf[2 + i] = u32::from_le_bytes(word);
10178 }
10179 buf[FD_PRELEN - 1] = (FD_PRELEN as u32) + 8; // header-len slot
10180 buf
10181 }
10182
10183 /// c:3127 — `fdmagic(f)` returns pre[0] verbatim.
10184 #[test]
10185 fn fdmagic_returns_pre_zero_word() {
10186 let buf = build_fd_header();
10187 assert_eq!(fdmagic(&buf), FD_MAGIC, "fdmagic = pre[0]");
10188 }
10189
10190 /// c:3131 — `fdflags` extracts low byte of pre[1].
10191 #[test]
10192 fn fdflags_low_byte_extraction() {
10193 let buf = build_fd_header();
10194 assert_eq!(fdflags(&buf), 0x12, "flags = pre[1] & 0xff");
10195 }
10196
10197 /// c:3133 — `fdother` extracts high 24 bits of pre[1].
10198 #[test]
10199 fn fdother_high_24_bits_extraction() {
10200 let buf = build_fd_header();
10201 assert_eq!(
10202 fdother(&buf),
10203 0x00ABCDEF,
10204 "other = pre[1] >> 8 & 0x00ffffff"
10205 );
10206 }
10207
10208 /// c:3132 — `fdsetflags` writes low byte, preserves high 24 bits.
10209 #[test]
10210 fn fdsetflags_preserves_high_24_bits() {
10211 let mut buf = build_fd_header();
10212 let other_before = fdother(&buf);
10213 fdsetflags(&mut buf, 0x42);
10214 assert_eq!(fdflags(&buf), 0x42, "new flags written");
10215 assert_eq!(fdother(&buf), other_before, "high 24 bits preserved");
10216 }
10217
10218 /// c:3134 — `fdsetother` writes high 24 bits, preserves low byte.
10219 #[test]
10220 fn fdsetother_preserves_low_byte() {
10221 let mut buf = build_fd_header();
10222 let flags_before = fdflags(&buf);
10223 fdsetother(&mut buf, 0x00DEADBE);
10224 assert_eq!(fdother(&buf), 0x00DEADBE, "new other written");
10225 assert_eq!(fdflags(&buf), flags_before, "low byte preserved");
10226 }
10227
10228 /// c:3134 — `fdsetother` clamps to 24 bits (caller-passed high bits dropped).
10229 #[test]
10230 fn fdsetother_clamps_to_24_bits() {
10231 let mut buf = build_fd_header();
10232 fdsetother(&mut buf, 0xFF_FFFF_FF);
10233 // Only the low 24 bits land in `other`.
10234 assert_eq!(fdother(&buf), 0x00FF_FFFF, "high bits dropped");
10235 }
10236
10237 /// c:3140 — `fdversion(buf)` returns String (compile-time type pin).
10238 #[test]
10239 fn fdversion_returns_string_type() {
10240 let buf = build_fd_header();
10241 let _: String = fdversion(&buf);
10242 }
10243
10244 /// c:3140 — `fdversion` reads the NUL-terminated string from pre[2..].
10245 #[test]
10246 fn fdversion_reads_until_nul() {
10247 let buf = build_fd_header();
10248 assert_eq!(fdversion(&buf), "5.9", "version read until NUL");
10249 }
10250
10251 /// c:3145 — `fdhflags(h)` returns low 2 bits of flags.
10252 #[test]
10253 fn fdhflags_low_two_bits() {
10254 let h = fdhead {
10255 start: 0,
10256 len: 0,
10257 npats: 0,
10258 strs: 0,
10259 hlen: 0,
10260 flags: 0b1011, // tail=2, kshload bits = 0b11
10261 };
10262 assert_eq!(fdhflags(&h), 0b11, "flags = h.flags & 0x3");
10263 }
10264
10265 /// c:3146 — `fdhtail(h)` returns high 30 bits (shifted right by 2).
10266 #[test]
10267 fn fdhtail_shift_right_two() {
10268 let h = fdhead {
10269 start: 0,
10270 len: 0,
10271 npats: 0,
10272 strs: 0,
10273 hlen: 0,
10274 flags: (0x12_3456 << 2) | 0x3,
10275 };
10276 assert_eq!(fdhtail(&h), 0x12_3456, "tail = h.flags >> 2");
10277 }
10278
10279 /// c:3147 — `fdhbldflags(flags, tail)` packs into single u32.
10280 #[test]
10281 fn fdhbldflags_packs_flags_low_tail_high() {
10282 let packed = fdhbldflags(0x3, 0x42);
10283 assert_eq!(packed & 0x3, 0x3, "low 2 bits = flags");
10284 assert_eq!(packed >> 2, 0x42, "high 30 bits = tail");
10285 }
10286
10287 /// c:3145-3147 — `fdhflags(h)`+`fdhtail(h)` round-trip via fdhbldflags.
10288 #[test]
10289 fn fdh_round_trip_via_bldflags() {
10290 for (flags, tail) in [(0u32, 0u32), (1, 100), (2, 0xABC), (3, 0xFFFF)] {
10291 let packed = fdhbldflags(flags, tail);
10292 let h = fdhead {
10293 start: 0,
10294 len: 0,
10295 npats: 0,
10296 strs: 0,
10297 hlen: 0,
10298 flags: packed,
10299 };
10300 assert_eq!(fdhflags(&h), flags, "flags round-trips");
10301 assert_eq!(fdhtail(&h), tail, "tail round-trips");
10302 }
10303 }
10304
10305 /// c:8271 — `firstfdhead_offset()` returns FD_PRELEN constant.
10306 #[test]
10307 fn firstfdhead_offset_returns_prelen() {
10308 assert_eq!(
10309 firstfdhead_offset(),
10310 FD_PRELEN,
10311 "first header starts after prelude"
10312 );
10313 }
10314
10315 /// c:3127 — `fdmagic` differentiates FD_MAGIC from FD_OMAGIC.
10316 #[test]
10317 fn fdmagic_differentiates_magic_omagic() {
10318 let mut buf = vec![FD_MAGIC; FD_PRELEN];
10319 assert_eq!(fdmagic(&buf), FD_MAGIC);
10320 buf[0] = FD_OMAGIC;
10321 assert_eq!(fdmagic(&buf), FD_OMAGIC, "swapped magic readable");
10322 assert_ne!(FD_MAGIC, FD_OMAGIC, "the two magics differ");
10323 }
10324}