zsh/ported/parse.rs
1//! Zsh parser — direct port from zsh/Src/parse.c.
2//!
3//! Pulls tokens via the lex.rs free fns (zshlex/tok/tokstr) and
4//! builds an AST tree (relocated to src/extensions/zsh_ast.rs as a
5//! Rust-only IR) plus emits wordcode into ECBUF via the P9b/P9c
6//! pipeline. Follows the zsh grammar closely; productions match
7//! `par_*` in Src/parse.c.
8
9use super::lex::{
10 lextok, set_tok, AMPER, AMPERBANG, AMPOUTANG, BANG_TOK, BARAMP, BAR_TOK, CASE, COPROC, DAMPER,
11 DBAR, DINANG, DINANGDASH, DINBRACK, DINPAR, DOLOOP, DONE, DOUTANG, DOUTANGAMP, DOUTANGAMPBANG,
12 DOUTANGBANG, DOUTBRACK, DOUTPAR, DSEMI, ELIF, ELSE, ENDINPUT, ENVARRAY, ENVSTRING, ESAC, FI,
13 FOR, FOREACH, FUNC, IF, INANGAMP, INANG_TOK, INBRACE_TOK, INOUTANG, INOUTPAR, INPAR_TOK,
14 IS_REDIROP, LEXERR, NEWLIN, NOCORRECT, NULLTOK, OUTANGAMP, OUTANGAMPBANG, OUTANGBANG,
15 OUTANG_TOK, OUTBRACE_TOK, OUTPAR_TOK, REPEAT, SELECT, SEMI, SEMIAMP, SEMIBAR, SEPER,
16 STRING_LEX, THEN, TIME, TRINANG, TYPESET, UNTIL, WHILE, ZEND,
17};
18use super::zsh_h::{
19 eprog, estate, isset, redir, unset, wc_code, wordcode, Bang, Dash, Equals, Inang, Inpar,
20 Outang, Outpar, Stringg, Tilde, ALIASFUNCDEF, COND_AND, COND_MOD, COND_MODI, COND_NOT, COND_NT,
21 COND_OR, COND_REGEX, COND_STRDEQ, COND_STREQ, COND_STRGTR, COND_STRLT, COND_STRNEQ,
22 CSHJUNKIELOOPS,
23 EC_DUP, EC_NODUP, EF_HEAP, EF_REAL, EXECOPT, IGNOREBRACES, IS_DASH, MULTIFUNCDEF, OPT_ISSET,
24 PM_UNDEFINED, POSIXBUILTINS, REDIRF_FROM_HEREDOC, REDIR_APP, REDIR_APPNOW,
25 REDIR_FROM_HEREDOC_MASK, REDIR_VARID_MASK, REDIR_ERRAPP,
26 REDIR_ERRAPPNOW, REDIR_ERRWRITE, REDIR_ERRWRITENOW, REDIR_HEREDOC, REDIR_HEREDOCDASH,
27 REDIR_HERESTR, REDIR_INPIPE, REDIR_MERGEIN, REDIR_MERGEOUT, REDIR_OUTPIPE, REDIR_READ,
28 REDIR_READWRITE, REDIR_WRITE, REDIR_WRITENOW, SHORTLOOPS, SHORTREPEAT, WCB_COND, WCB_SIMPLE,
29 WC_REDIR, WC_REDIR_FROM_HEREDOC, WC_REDIR_TYPE, WC_REDIR_VARID, WC_SUBLIST_COPROC,
30 WC_SUBLIST_NOT,
31};
32use crate::ported::utils::{zerr, zwarnnam};
33use serde::{Deserialize, Serialize};
34use std::fs::File;
35use std::io::{Read, Seek, SeekFrom, Write};
36use std::sync::atomic::{AtomicUsize, Ordering};
37
38// Direct port of `Src/parse.c:287-289` grow-policy constants.
39const EC_INIT_SIZE: i32 = 256;
40
41// Pending-here-document list — direct port of `Src/parse.c:84
42// struct heredocs *hdocs;`. Per-parser file-static (bucket-1 in
43// PORT_PLAN.md): each worker thread parsing a separate program needs
44// its own pending-heredoc list. Saved/restored across nested parses
45// by `parse_context_save`/`parse_context_restore` (parse.c:299/337).
46thread_local! {
47 /// Port of file-static `struct heredocs *hdocs;` from `Src/parse.c:84`.
48 pub static HDOCS: std::cell::RefCell<Option<Box<crate::ported::zsh_h::heredocs>>>
49 = const { std::cell::RefCell::new(None) };
50}
51
52// Wordcode-buffer thread-locals — direct port of `Src/parse.c:269-285`
53// file-statics. Per-evaluator (bucket-1 in PORT_PLAN.md): each worker
54// thread parsing a separate program needs its own wordcode buffer.
55//
56// ECBUF: the wordcode array being built. C `Wordcode ecbuf`
57// (parse.c:275).
58// ECLEN: allocated entries in ECBUF (parse.c:269).
59// ECUSED: entries actually used so far (parse.c:271).
60// ECNPATS: count of patterns referenced by ECBUF (parse.c:273).
61// ECSOFFS / ECSSUB: byte offsets into the string region
62// (parse.c:279). ECSSUB subtracts substring overlap.
63// ECNFUNC: count of functions defined so far (parse.c:285).
64// ECSTRS_INDEX: dedup index for long strings — C uses a binary tree
65// of `struct eccstr` (zsh.h:836); the canonical Eccstr port exists
66// at zsh_h::eccstr but stays unused at runtime here. The HashMap
67// preserves the API contract (lookup by (nfunc, str) → offs) with
68// simpler ownership semantics.
69thread_local! {
70 pub static ECBUF: std::cell::RefCell<Vec<u32>> = std::cell::RefCell::new(Vec::new());
71 static ECLEN: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
72 static ECUSED: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
73 static ECNPATS: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
74 static ECSOFFS: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
75 static ECSSUB: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
76 static ECNFUNC: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
77 static ECSTRS_INDEX: std::cell::RefCell<std::collections::HashMap<(i32, String), u32>>
78 = std::cell::RefCell::new(std::collections::HashMap::new());
79 /// C zsh's `eccstr` BST (parse.c:447). Port of `Eccstr ecstrs` —
80 /// a hashval-ordered binary search tree of long-strings for
81 /// dedup. Same cmp logic as C: nfunc, then hashval, then strcmp.
82 /// HashMap above is a fast-path lookup; this tree is the
83 /// C-fidelity walker that mirrors C's exact dedup-hit pattern
84 /// (including its quirks for hash-colliding content).
85 static ECSTRS_TREE: std::cell::RefCell<Option<Box<EccstrNode>>>
86 = const { std::cell::RefCell::new(None) };
87 /// Reverse index for `ecgetstr`: offs → owned string. Populated
88 /// at ecstrcode time so the consumer can recover the string from
89 /// the wordcode offs without walking the encode-time HashMap.
90 /// Stores the METAFIED BYTE form of each long-string, exactly
91 /// matching what C's strs region holds. `String` would not work
92 /// here because Rust strings carry UTF-8-encoded chars (e.g.
93 /// the Dash marker `\u{9b}` UTF-8-encodes to two bytes
94 /// `\xc2 \x9b`) while C stores zsh markers as single bytes
95 /// (raw `\x9b`). Storing Vec<u8> lets us write byte-for-byte
96 /// what C writes after metafy.
97 pub static ECSTRS_REVERSE: std::cell::RefCell<std::collections::HashMap<u32, Vec<u8>>>
98 = std::cell::RefCell::new(std::collections::HashMap::new());
99}
100const EC_DOUBLE_THRESHOLD: i32 = 32768;
101const EC_INCREMENT: i32 = 1024;
102
103/// Direct port of `parse_context_save(struct parse_stack *ps, int toplevel)` at `Src/parse.c:295`.
104/// Snapshots the lexer-side file-statics (which currently live on
105/// `lexer` until Phase 7 dissolution makes them file-scope
106/// thread_local!s) plus the pending heredoc list, plus the
107/// wordcode-buffer state (STUB until Phase 9b). Saves Rust-only
108/// recursion counters too so nested parses get fresh limits.
109/// WARNING: param names don't match C — Rust=(ps) vs C=(ps, toplevel)
110pub fn parse_context_save(ps: &mut parse_stack) {
111 // parse.c:299 — `ps->hdocs = hdocs; hdocs = NULL;` — save the
112 // canonical C linked-list and clear it for the nested parse.
113 ps.hdocs = HDOCS.with_borrow_mut(|h| h.take());
114 // zshrs-only: save the parallel AST-glue Vec the same way.
115 // LEX_HEREDOCS carries terminator/strip_tabs/quoted metadata
116 // that has no C analog (C stores it implicitly via tokstr).
117 ps.lex_heredocs = crate::ported::lex::LEX_HEREDOCS.with_borrow_mut(|v| std::mem::take(v));
118 // parse.c:302-310 — save lexer-side state.
119 ps.incmdpos = incmdpos();
120 // parse.c:303 — aliasspaceflag — not yet a LEX_* thread_local.
121 // STUB; Phase 7 wires it. Same for the few below marked STUB.
122 ps.aliasspaceflag = 0;
123 ps.incond = incond();
124 ps.inredir = inredir();
125 ps.incasepat = incasepat();
126 ps.isnewlin = isnewlin();
127 ps.infor = infor();
128 ps.inrepeat_ = inrepeat();
129 ps.intypeset = intypeset();
130 // parse.c:312-317 — wordcode buffer state. STUB until Phase 9b
131 // (zshrs has no ecbuf yet).
132 ps.eclen = 0;
133 ps.ecused = 0;
134 ps.ecnpats = 0;
135 ps.ecbuf = None;
136 ps.ecstrs = None;
137 ps.ecsoffs = 0;
138 ps.ecssub = 0;
139 ps.ecnfunc = 0;
140 set_incmdpos(true);
141 set_incond(0);
142 set_inredir(false);
143 set_incasepat(0);
144 set_infor(0);
145 set_inrepeat(0);
146 set_intypeset(false);
147}
148
149/// Direct port of `parse_context_restore(const struct parse_stack *ps, int toplevel)` at `Src/parse.c:326`.
150/// Inverse of `parse_context_save`. Restores lexer-side state +
151/// pending heredocs + Rust-only counters from `ps`, then clears
152/// `errflag & ERRFLAG_ERROR` per parse.c:354.
153/// WARNING: param names don't match C — Rust=(ps) vs C=(ps, toplevel)
154pub fn parse_context_restore(ps: &parse_stack) {
155 // parse.c:330-331 — free any in-progress wordcode buffer.
156 // zshrs has no wordcode yet (STUB until Phase 9b); the AST
157 // nodes are owned by their parent so dropping the parser
158 // frees them.
159
160 // parse.c:333-352 — restore saved state.
161 // parse.c:337 — `hdocs = ps->hdocs;`
162 HDOCS.with_borrow_mut(|h| *h = ps.hdocs.clone());
163 // zshrs-only: restore the parallel AST-glue Vec.
164 crate::ported::lex::LEX_HEREDOCS.with_borrow_mut(|v| *v = ps.lex_heredocs.clone());
165 set_incmdpos(ps.incmdpos);
166 // aliasspaceflag STUB until Phase 7.
167 set_incond(ps.incond);
168 set_inredir(ps.inredir);
169 set_incasepat(ps.incasepat);
170 set_isnewlin(ps.isnewlin);
171 set_infor(ps.infor);
172 set_inrepeat(ps.inrepeat_);
173 set_intypeset(ps.intypeset);
174 // ecbuf/eclen/ecused/ecnpats/ecstrs/ecsoffs/ecssub/ecnfunc
175 // STUB until Phase 9b.
176
177 // parse.c:354 — `errflag &= ~ERRFLAG_ERROR;` — clear the
178 // error flag so the outer parse sees a clean state.
179 crate::ported::utils::errflag.fetch_and(
180 !crate::ported::utils::ERRFLAG_ERROR,
181 std::sync::atomic::Ordering::Relaxed,
182 );
183}
184
185/// Direct port of `ecadjusthere(int p, int d)` at `Src/parse.c:360`. Walk
186/// the pending-heredocs list and bump each `pc` by `d` if it's
187/// at or after position `p`. Called by `ecispace` / `ecdel` when
188/// wordcodes shift.
189#[allow(unused_variables)]
190pub fn ecadjusthere(p: usize, d: i32) {
191 // parse.c:362-366 — `for (p2 = hdocs; p2; p2 = p2->next) if
192 // (p2->pc >= p) p2->pc += d;`. zshrs's hdocs are still
193 // Vec<HereDoc> on the lexer (pre-P9c migration); since none
194 // of them carry a wordcode pc today (the AST tree has no pc
195 // slots), this is a no-op until Phase 9c wires
196 // `hdocs.pc` into wordcode emission.
197}
198
199// === AST tree relocated to src/extensions/zsh_ast.rs ===
200//
201// zsh C does NOT have an AST tree — it emits wordcode directly via
202// par_event/par_list/par_sublist/par_pipe/par_cmd/par_simple/etc.
203// (Src/parse.c:485-3000) into a flat `Wordcode ecbuf[]`. The Zsh*/
204// Shell* AST node types lived in this file as a Rust-only IR that
205// stands in for that wordcode.
206//
207// P9e (PORT_PLAN.md): the types moved to src/extensions/zsh_ast.rs
208// to make their Rust-only-extension nature explicit. The full P9c +
209// P9d rewrite (par_* emitting wordcode + exec.rs reading wordcode)
210// retires them entirely — until then, callers reach them via this
211// re-export.
212pub use crate::heredoc_ast::HereDoc;
213pub use crate::zsh_ast::{
214 CaseArm, CaseTerm, CaseTerminator, CompoundCommand, ForList, HereDocInfo, ListFlags, ListOp,
215 Redirect, RedirectOp, ShellCommand, ShellWord, SimpleCommand, SublistFlags, SublistOp,
216 VarModifier, ZshAssign, ZshAssignValue, ZshCase, ZshCommand, ZshCond, ZshFor, ZshFuncDef,
217 ZshIf, ZshList, ZshParamFlag, ZshPipe, ZshProgram, ZshRedir, ZshRepeat, ZshSimple, ZshSublist,
218 ZshTry, ZshWhile,
219};
220use crate::ported::lex::{
221 incasepat, incmdpos, incond, infor, input_slice, inredir, inrepeat, intypeset,
222 isnewlin, lex_init, lineno, noaliases, nocorrect, pos, set_incasepat, set_incmdpos, set_incond, set_lineno,
223 set_infor, set_inredir, set_inrepeat, set_intypeset, set_isnewlin, set_noaliases,
224 set_nocorrect, set_pos, set_tokfd, set_toklineno, set_tokstr, tok, tokfd, toklineno, tokstr, zshlex,
225};
226use crate::prompt::{cmdpop, cmdpush};
227use crate::zsh_h::{
228 wc_bdata, CS_ALWAYS, CS_ARRAY, CS_CASE, CS_CMDAND, CS_CMDOR, CS_COND, CS_CURSH, CS_ELIF, CS_ELSE,
229 CS_ERRPIPE, CS_FOR, CS_FOREACH, CS_FUNCDEF, CS_IF, CS_IFTHEN, CS_PIPE, CS_REPEAT, CS_SELECT,
230 CS_SUBSH, CS_UNTIL, CS_WHILE, EF_RUN, WCB_ARITH, WCB_ASSIGN, WCB_CASE, WCB_CURSH, WCB_END,
231 WCB_FOR, WCB_FUNCDEF, WCB_IF, WCB_LIST, WCB_PIPE, WCB_REDIR, WCB_REPEAT, WCB_SELECT,
232 WCB_SUBLIST, WCB_SUBSH, WCB_TIMED, WCB_TRY, WCB_TYPESET, WCB_WHILE, WC_ASSIGN_ARRAY, WC_ASSIGN_INC,
233 WC_ASSIGN_NEW, WC_ASSIGN_SCALAR, WC_CASE_AND, WC_CASE_HEAD, WC_CASE_OR, WC_CASE_TESTAND,
234 WC_FOR_COND, WC_FOR_LIST, WC_FOR_PPARAM, WC_IF_ELIF, WC_IF_ELSE, WC_IF_HEAD, WC_IF_IF,
235 WC_PIPE_END, WC_PIPE_LINENO,
236 WC_PIPE_MID, WC_REDIR_WORDS, WC_SELECT_LIST, WC_SELECT_PPARAM, WC_SUBLIST_AND, WC_SUBLIST_END,
237 WC_SUBLIST_FLAGS, WC_SUBLIST_OR, WC_SUBLIST_SIMPLE, WC_SUBLIST_TYPE, WC_TIMED_EMPTY,
238 WC_TIMED_PIPE, WC_WHILE_UNTIL, WC_WHILE_WHILE, Z_ASYNC, Z_DISOWN, Z_END, Z_SIMPLE, Z_SYNC,
239};
240
241/// Direct port of `ecispace(int p, int n)` at `Src/parse.c:372`. Insert `n`
242/// empty wordcode slots at position `p`, shifting later entries
243/// right, growing the buffer as needed, adjusting heredoc pointers.
244pub fn ecispace(p: usize, n: usize) {
245 // parse.c:376-381 — grow if needed.
246 let need = n as i32;
247 if (ECLEN.get() - ECUSED.get()) < need {
248 let cur = ECLEN.get();
249 let mut a = if cur < EC_DOUBLE_THRESHOLD {
250 cur
251 } else {
252 EC_INCREMENT
253 };
254 if need > a {
255 a = need;
256 }
257 ECBUF.with_borrow_mut(|buf| {
258 buf.resize((cur + a) as usize, 0);
259 });
260 ECLEN.set(cur + a);
261 }
262 // parse.c:382-385 — memmove p → p+n, gap of n.
263 let m = ECUSED.get() as usize - p;
264 if m > 0 {
265 ECBUF.with_borrow_mut(|buf| {
266 let needed = (ECUSED.get() as usize) + n;
267 if buf.len() < needed {
268 buf.resize(needed, 0);
269 }
270 for i in (0..m).rev() {
271 buf[p + n + i] = buf[p + i];
272 }
273 for i in 0..n {
274 buf[p + i] = 0;
275 }
276 });
277 }
278 // parse.c:386 — bump ecused by n.
279 ECUSED.set(ECUSED.get() + need);
280 // parse.c:387 — `ecadjusthere(p, n)`.
281 ecadjusthere(p, need);
282}
283
284/// Direct port of `ecadd(wordcode c)` at `Src/parse.c:397`. Append `c` to
285/// the wordcode buffer with grow-on-demand, return the new index.
286pub fn ecadd(c: u32) -> usize {
287 // parse.c:399-405 — `if ((eclen - ecused) < 1) grow`.
288 if (ECLEN.get() - ECUSED.get()) < 1 {
289 let cur = ECLEN.get();
290 let a = if cur < EC_DOUBLE_THRESHOLD {
291 cur
292 } else {
293 EC_INCREMENT
294 };
295 ECBUF.with_borrow_mut(|buf| {
296 buf.resize((cur + a) as usize, 0);
297 });
298 ECLEN.set(cur + a);
299 }
300 let idx = ECUSED.get();
301 ECBUF.with_borrow_mut(|buf| {
302 if (idx as usize) >= buf.len() {
303 buf.resize((idx + 1) as usize, 0);
304 }
305 buf[idx as usize] = c;
306 });
307 ECUSED.set(idx + 1);
308 idx as usize
309}
310
311/// Direct port of `ecdel(int p)` at `Src/parse.c:413`. Remove the
312/// wordcode at position `p`, shift later entries left by one,
313/// decrement ecused, adjust pending heredoc pointers.
314pub fn ecdel(p: usize) {
315 // parse.c:415-418 — memmove + decrement ecused.
316 let n = ECUSED.get() as usize - p - 1;
317 if n > 0 {
318 ECBUF.with_borrow_mut(|buf| {
319 for i in 0..n {
320 buf[p + i] = buf[p + i + 1];
321 }
322 });
323 }
324 ECUSED.set(ECUSED.get() - 1);
325 // parse.c:420 — `ecadjusthere(p, -1)`.
326 ecadjusthere(p, -1);
327}
328
329/// Direct port of `ecstrcode(char *s)` at `Src/parse.c:426`. Encode a
330/// string into a single wordcode (short strings ≤4 bytes packed
331/// inline; longer strings get an offset into the deduped registry).
332///
333/// The long-string path stores the METAFIED bytes (matches what C's
334/// strs region contains): collapse Rust UTF-8 chars in 0x80..=0xff
335/// to single bytes, then apply zsh metafy (high bytes ≥ 0x83 →
336/// `Meta=0x83 + byte^0x20`). Length tracking (ECSOFFS) uses the
337/// metafied byte count — same as C `strlen(s) + 1` where C's `s`
338/// is already metafied at this point.
339pub fn ecstrcode(s: &str) -> u32 {
340 // Convert Rust char-form → C-byte form. zsh's metafy() at
341 // Src/utils.c only converts bytes flagged IMETA: 0x00, 0x83
342 // (Meta itself), and 0x84..=0xa2 (Pound..Marker, the lex
343 // markers). Other bytes 0x01..=0x82 and 0xa3..=0xff pass
344 // through unchanged. See utils.c:4195-4204 typtab init.
345 //
346 // Rust receives chars. Classify each:
347 // - codepoint in [0x83..=0xa2] → marker char (emitted by lex
348 // post-metafy in C); 1 byte unchanged
349 // - codepoint < 0x80 → ASCII, 1 byte unchanged
350 // - codepoint in [0x80..=0x82] or [0xa3..=0xff] → single
351 // non-imeta byte (user-input range); 1 byte unchanged
352 // - codepoint > 0xff → multi-byte UTF-8 source char (e.g.
353 // '━' = U+2501 = 0xe2 0x94 0x81). Metafy ONLY the bytes
354 // that fall in 0x83..=0xa2; pass others through. For '━':
355 // 0xe2 stays, 0x94 → 0x83 0xb4, 0x81 stays.
356 let mut c_bytes: Vec<u8> = Vec::with_capacity(s.len());
357 let imeta = |b: u8| -> bool { b == 0 || (0x83..=0xa2).contains(&b) };
358 for ch in s.chars() {
359 let cu = ch as u32;
360 if cu < 0x80 {
361 // ASCII — single byte unchanged.
362 c_bytes.push(cu as u8);
363 } else if (0x83..=0xa2).contains(&cu) {
364 // Lex marker char (emitted by lex.add(Marker) post-metafy
365 // in C). Stored as single byte.
366 c_bytes.push(cu as u8);
367 } else {
368 // User-input char: encode UTF-8 then metafy imeta bytes.
369 // For chars 0x80..=0xff (like 'º' U+00BA), UTF-8 gives
370 // 2 bytes (e.g. `0xc2 0xba`) — zsh's lex reads these as
371 // raw bytes from input and metafy passes 0xc2 / 0xba
372 // through (both NOT imeta).
373 let mut tmp = [0u8; 4];
374 for &b in ch.encode_utf8(&mut tmp).as_bytes() {
375 if imeta(b) {
376 c_bytes.push(0x83);
377 c_bytes.push(b ^ 0x20);
378 } else {
379 c_bytes.push(b);
380 }
381 }
382 }
383 }
384 // c:`has_token` (Src/utils.c:2282) → `itok(*s)` → `typtab[c] & ITOK`.
385 // ITOK is set for bytes `Pound..=Nularg` (0x84..=0xa1) per
386 // Src/utils.c:4198 (`for (t0=Pound; t0<=LAST_NORMAL_TOK; t0++)
387 // typtab[t0]|=ITOK`) plus :4200 (`for (t0=Snull; t0<=Nularg; t0++)
388 // typtab[t0]|=ITOK|IMETA|INULL`). Pound=0x84 Bang=0x9c (last normal),
389 // Snull=0x9d..Nularg=0xa1. Meta=0x83 has IMETA but NOT ITOK.
390 let t = c_bytes.iter().any(|&b| (0x84..=0xa1).contains(&b));
391 let l = c_bytes.len() + 1; // include NUL terminator
392 if l <= 4 {
393 // parse.c:436-445 — short-string inline pack. Uses raw C-bytes
394 // (NOT metafied — the inline packing stores 1 byte per slot).
395 let mut c: u32 = if t { 3 } else { 2 };
396 match l {
397 4 => {
398 c |= (c_bytes[2] as u32) << 19;
399 c |= (c_bytes[1] as u32) << 11;
400 c |= (c_bytes[0] as u32) << 3;
401 }
402 3 => {
403 c |= (c_bytes[1] as u32) << 11;
404 c |= (c_bytes[0] as u32) << 3;
405 }
406 2 => {
407 c |= (c_bytes[0] as u32) << 3;
408 }
409 1 => {
410 // parse.c:443 — empty string special case.
411 c = if t { 7 } else { 6 };
412 }
413 _ => {}
414 }
415 c
416 } else {
417 // parse.c:447-466 — long string. Port of C's eccstr BST walk
418 // exactly: walk the tree comparing nfunc, then hashval, then
419 // strcmp on bytes. Return offs on full match; insert new
420 // leaf otherwise. Matches C's exact dedup-hit pattern
421 // (which is content-dependent — hash collisions and the
422 // lazy short-circuit cmp chain make the tree shape determine
423 // whether matching nodes are reachable).
424 // hasher is byte-by-byte polynomial (hashtable.c:86); pass
425 // c_bytes via from_utf8_unchecked so non-UTF-8 zsh marker
426 // bytes feed straight in. SAFETY: hasher only iterates
427 // `.bytes()` — no UTF-8 validity assumed.
428 let val = crate::ported::hashtable::hasher(unsafe {
429 std::str::from_utf8_unchecked(&c_bytes)
430 });
431 let nfunc = ECNFUNC.get();
432 let found_offs = ECSTRS_TREE.with_borrow_mut(|root| {
433 // Walk the tree. At each node, if all 3 cmps == 0,
434 // return the node's offs. Otherwise descend left/right
435 // by the first non-zero cmp's sign.
436 let mut cur: &mut Option<Box<EccstrNode>> = root;
437 loop {
438 let p = match cur.as_mut() {
439 Some(p) => p,
440 None => break None,
441 };
442 // c:448 — `cmp = p->nfunc - ecnfunc`
443 let mut cmp = (p.nfunc as i64) - (nfunc as i64);
444 if cmp == 0 {
445 // c:448 — `&& !(cmp = (long)p->hashval - (long)val)`
446 // C does `(int)(p->hashval - val)` — unsigned 32-bit
447 // subtraction wraps, then cast to int. Use
448 // wrapping_sub + as i32 to match the bit pattern.
449 cmp = (p.hashval.wrapping_sub(val) as i32) as i64;
450 if cmp == 0 {
451 // c:448 — `&& !(cmp = strcmp(p->str, s))`
452 cmp = match p.str.as_slice().cmp(c_bytes.as_slice()) {
453 std::cmp::Ordering::Less => -1,
454 std::cmp::Ordering::Equal => 0,
455 std::cmp::Ordering::Greater => 1,
456 };
457 if cmp == 0 {
458 // c:450 — `return p->offs;`
459 break Some(p.offs);
460 }
461 }
462 }
463 // c:452 — `pp = (cmp < 0 ? &p->left : &p->right);`
464 cur = if cmp < 0 { &mut p.left } else { &mut p.right };
465 }
466 });
467 if let Some(offs) = found_offs {
468 return offs;
469 }
470 // c:462 — `p->offs = ((ecsoffs - ecssub) << 2) | (t ? 1 : 0);`
471 let offs =
472 (((ECSOFFS.get() - ECSSUB.get()) as u32) << 2) | if t { 1 } else { 0 };
473 // c:463 — `p->aoffs = ecsoffs;` (absolute write position).
474 let aoffs = ECSOFFS.get() as u32;
475 // c:457-465 — insert new node at the NULL slot the walk
476 // terminated at. Encode the walk path as a Vec<bool> of
477 // left/right turns (true = right), then re-descend to
478 // insert. Borrow-checker friendly: a single mutable walk
479 // that either finds an existing node (descend) or fills
480 // the empty slot (return).
481 let stored = c_bytes.clone();
482 let stored_len = stored.len();
483 let new_node = Box::new(EccstrNode {
484 left: None,
485 right: None,
486 str: stored.clone(),
487 offs,
488 aoffs,
489 nfunc,
490 hashval: val,
491 });
492 ECSTRS_TREE.with_borrow_mut(|root| {
493 // Build the path first (immutable-walk; safe because we
494 // only ever go further down).
495 let mut path: Vec<bool> = Vec::new();
496 {
497 let mut cur: &Option<Box<EccstrNode>> = root;
498 while let Some(p) = cur.as_ref() {
499 let mut cmp = (p.nfunc as i64) - (nfunc as i64);
500 if cmp == 0 {
501 // C does `(int)(p->hashval - val)` — unsigned 32-bit
502 // subtraction wraps, then cast to int. Use
503 // wrapping_sub + as i32 to match the bit pattern.
504 cmp = (p.hashval.wrapping_sub(val) as i32) as i64;
505 if cmp == 0 {
506 cmp = match p.str.as_slice().cmp(c_bytes.as_slice()) {
507 std::cmp::Ordering::Less => -1,
508 std::cmp::Ordering::Equal => 0,
509 std::cmp::Ordering::Greater => 1,
510 };
511 }
512 }
513 let go_right = cmp >= 0;
514 path.push(go_right);
515 cur = if go_right { &p.right } else { &p.left };
516 }
517 }
518 // Descend mutably along the recorded path and assign at
519 // the NULL leaf.
520 let mut cur: &mut Option<Box<EccstrNode>> = root;
521 for turn in path {
522 let p = cur.as_mut().expect("path matches walk");
523 cur = if turn { &mut p.right } else { &mut p.left };
524 }
525 *cur = Some(new_node);
526 });
527 // Also keep the existing reverse index (offs → bytes) for
528 // ecgetstr_wordcode and copy_ecstr — they read flat by offs.
529 ECSTRS_REVERSE.with_borrow_mut(|m| {
530 m.insert(offs, stored);
531 });
532 let _ = l;
533 ECSOFFS.set(ECSOFFS.get() + (stored_len + 1) as i32);
534 offs
535 }
536}
537
538/// Initialize parser status. Direct port of zsh/Src/parse.c:491
539/// `init_parse_status`. Clears the per-parse-call lexer flags
540/// so a fresh parse starts from cmd-position with no nesting
541/// state inherited from a prior parse.
542///
543/// Previously the Rust port omitted `inrepeat_ = 0` at c:501.
544/// `inrepeat_` is the `repeat N <body>` parse-state counter that
545/// the lexer toggles in 3 phases (1 → 2 → 3 → 0). Without the
546/// reset, a fresh parse called after an in-flight `repeat`
547/// command would inherit the stale counter and silently misread
548/// the next token as a body of an already-completed repeat.
549pub fn init_parse_status() { // c:491
550 // parse.c:500-502 — `incasepat = incond = inredir = infor =
551 // intypeset = 0; inrepeat_ = 0; incmdpos = 1;`
552 set_incasepat(0); // c:500
553 set_incond(0); // c:500
554 set_inredir(false); // c:500
555 set_infor(0); // c:500
556 set_intypeset(false); // c:500
557 crate::ported::lex::set_inrepeat(0); // c:501 inrepeat_ = 0
558 set_incmdpos(true); // c:502
559}
560
561/// Initialize parser for a fresh parse. Direct port of
562/// zsh/Src/parse.c:509 `init_parse`. C source allocates a
563/// fresh wordcode buffer (ecbuf) sized EC_INIT_SIZE, resets the
564/// per-parse-call counters, and calls init_parse_status. zshrs
565/// has no flat wordcode buffer (AST is built inline) so this
566/// function reduces to init_parse_status + recursion_depth/
567/// global_iterations clear.
568pub fn init_parse() {
569 // parse.c:513-520 — `ecbuf = (Wordcode) zalloc(EC_INIT_SIZE *
570 // sizeof(wordcode)); eclen = EC_INIT_SIZE; ecused = 0;
571 // ecnpats = 0; ecstrs = NULL; ecsoffs = ecnfunc = 0;
572 // ecssub = 0;`. P9b — initialize the per-evaluator wordcode
573 // buffer for this parse call. zshrs uses thread-local
574 // statics declared at file scope (parse.rs:25-50).
575 ECBUF.with_borrow_mut(|buf| {
576 buf.clear();
577 buf.resize(EC_INIT_SIZE as usize, 0);
578 });
579 ECLEN.set(EC_INIT_SIZE);
580 ECUSED.set(0);
581 ECNPATS.set(0);
582 ECSOFFS.set(0);
583 ECSSUB.set(0);
584 ECNFUNC.set(0);
585 ECSTRS_INDEX.with_borrow_mut(|m| m.clear());
586 ECSTRS_REVERSE.with_borrow_mut(|m| m.clear());
587 ECSTRS_TREE.with_borrow_mut(|t| *t = None);
588
589 // parse.c:522 — `init_parse_status();`
590 init_parse_status();
591}
592
593/// Port of `copy_ecstr(Eccstr s, char *p)` from `Src/parse.c:537`.
594/// Walks the BST and writes each entry to `p[s->aoffs..]` matching
595/// C's recursive in-order traversal exactly. The old impl used the
596/// `ECSTRS_REVERSE` HashMap keyed by `offs` (= ecssub-relative
597/// wordcode-encoded offset), which collides across funcdef scopes:
598/// a string at relative offs=0 inside funcdef A and another at
599/// relative offs=0 inside funcdef B share the same key, so one
600/// overwrites the other.
601pub fn copy_ecstr(_table: &std::collections::HashMap<u32, Vec<u8>>, p: &mut [u8]) {
602 // c:537-544 — walk eccstr BST recursively, writing each node's
603 // str at p[node->aoffs..node->aoffs + strlen + 1] (NUL-terminated).
604 ECSTRS_TREE.with_borrow(|root| {
605 copy_ecstr_walk(root, p);
606 });
607}
608
609/// Port of `bld_eprog(int heap)` from `Src/parse.c:547`. Finalizes
610/// the in-build `ECBUF`/`ECSTRS`/`ECNPATS` state into an `Eprog`.
611/// Resets the build state so a new parse can start.
612pub fn bld_eprog(heap: bool) -> crate::ported::zsh_h::eprog {
613 // c:547
614
615 // c:555 — emit WC_END opcode. `WCB_END` is `WC_END_DEFAULT` (0).
616 ecadd(0);
617
618 let ecused = ECUSED.with(|c| c.get()) as usize;
619 let ecnpats = ECNPATS.with(|c| c.get()) as usize;
620 let ecsoffs = ECSOFFS.with(|c| c.get()) as usize;
621
622 // c:557-559 — `ret->len = ((ecnpats * sizeof(Patprog)) +
623 // (ecused * sizeof(wordcode)) +
624 // ecsoffs);`
625 // sizeof(Patprog) = sizeof(struct patprog *) = pointer size.
626 // On 64-bit targets that's 8, on 32-bit that's 4. C's eprog
627 // ->len is the canonical value for parity tests, so we use
628 // the same arithmetic.
629 let prog_bytes = ecused * 4; // sizeof(wordcode) = 4
630 let len = (ecnpats * std::mem::size_of::<*const u8>()) + prog_bytes + ecsoffs;
631
632 // Snapshot the wordcode buffer + string table.
633 let prog_words: Vec<u32> = ECBUF.with(|c| c.borrow()[..ecused].to_vec());
634 let mut strs_bytes = vec![0u8; ecsoffs];
635 ECSTRS_REVERSE.with(|c| copy_ecstr(&c.borrow(), &mut strs_bytes));
636
637 // c:566 — store strs as raw bytes via from_utf8_unchecked so
638 // single-byte zsh markers (e.g. Dash 0x9b) survive intact.
639 // `String::from_utf8_lossy` would replace them with U+FFFD
640 // (`\xef\xbf\xbd`), breaking byte-for-byte parity with C's
641 // strs region. SAFETY: downstream consumers of `eprog.strs`
642 // index by byte offset (per the wordcode `(offs >> 2)` offset
643 // encoding) and call `.as_bytes()` — they never iterate as
644 // chars or rely on UTF-8 validity, so storing non-UTF-8 bytes
645 // in a String is safe in practice. C zsh's strs is `char *`
646 // with the same byte-not-char semantics.
647 let strs_string = unsafe { String::from_utf8_unchecked(strs_bytes) };
648 let ret = eprog {
649 flags: if heap { EF_HEAP } else { EF_REAL }, // c:570
650 len: len as i32, // c:559
651 npats: ecnpats as i32, // c:561
652 nref: if heap { -1 } else { 1 }, // c:562
653 pats: Vec::new(), // c:563 dummy_patprog
654 prog: prog_words, // c:565
655 strs: Some(strs_string),
656 shf: None,
657 dump: None,
658 };
659
660 // c:577 — free ecbuf so next parse starts fresh.
661 ECBUF.with(|c| c.borrow_mut().clear());
662 ECLEN.with(|c| c.set(0));
663 ECUSED.with(|c| c.set(0));
664 ECNPATS.with(|c| c.set(0));
665 ECSOFFS.with(|c| c.set(0));
666 ECSTRS_INDEX.with(|c| c.borrow_mut().clear());
667 ECSTRS_REVERSE.with(|c| c.borrow_mut().clear());
668 ECSTRS_TREE.with(|t| *t.borrow_mut() = None);
669
670 ret
671}
672
673/// Port of `int empty_eprog(Eprog p)` from `Src/parse.c:584`. C
674/// body: `return (!p || !p->prog || *p->prog == WCB_END());` —
675/// the eprog is empty when its prog buffer is missing or the
676/// first wordcode is the WC_END marker. Used by signal handlers
677/// (`Src/signals.c:712`) to short-circuit a trap that resolves to
678/// an empty program.
679pub fn empty_eprog(p: &crate::ported::zsh_h::eprog) -> bool {
680 p.prog.is_empty() || p.prog[0] == crate::ported::zsh_h::WCB_END()
681}
682
683/// Clear pending here-document list. Direct port of
684/// `clear_hdocs(void)` from `Src/parse.c:591`. The C version walks
685/// `hdocs` and frees each node; Rust drops the `Box<heredocs>`
686/// chain automatically when the head is replaced with None.
687pub fn clear_hdocs() { // c:591
688 // c:593-598 — for (p = hdocs; p; p = n) { n = p->next; zfree(p); }
689 // c:599 — hdocs = NULL;
690 HDOCS.with_borrow_mut(|h| *h = None);
691 // zshrs-only: also drop the parallel AST-glue Vec. No C
692 // analog — LEX_HEREDOCS is Rust-only working-set state.
693 crate::ported::lex::LEX_HEREDOCS.with_borrow_mut(|v| v.clear());
694}
695
696/// Top-level parse-event entry. Direct port of zsh/Src/parse.c:
697/// 612-631 `parse_event`. Reads one event from the lexer (a
698/// sublist optionally followed by SEPER/AMPER/AMPERBANG) and
699/// returns the resulting ZshProgram.
700///
701/// `endtok` is the token that terminates the event — usually
702/// ENDINPUT, but for command-style substitutions the closing
703/// `)` (zsh's CMD_SUBST_CLOSE).
704///
705/// zshrs port note: zsh's parse_event returns an `Eprog` (heap-
706/// allocated wordcode program). zshrs returns a `ZshProgram`
707/// (AST root). Same role at the parse-output boundary.
708pub fn parse_event(endtok: lextok) -> Option<ZshProgram> {
709 // parse.c:616-619 — reset state and prime the lexer.
710 set_tok(ENDINPUT);
711 set_incmdpos(true);
712 zshlex();
713 // parse.c:620 — `init_parse();`
714 init_parse();
715
716 // parse.c:622-625 — drive par_event; on failure clear hdocs.
717 if !par_event(endtok) {
718 clear_hdocs();
719 return None;
720 }
721 // parse.c:626-628 — if endtok != ENDINPUT, this is a sub-
722 // parse for a substitution that doesn't need its own eprog.
723 // zshrs returns an empty program in that case (caller
724 // discards).
725 if endtok != ENDINPUT {
726 return Some(ZshProgram { lists: Vec::new() });
727 }
728 // parse.c:630 — `bld_eprog(1);` — build the final eprog.
729 // zshrs has already built the AST via parse_program_until,
730 // but parse_event uses par_event directly so we need to
731 // collect what par_event accumulated.
732 Some(parse_program_until(None))
733}
734
735/// Parse one event (sublist with optional separator). Direct
736/// port of zsh/Src/parse.c:635 `par_event`. Returns true if
737/// an event was successfully parsed, false on EOF / endtok.
738///
739/// zshrs port note: the C version emits wordcodes via ecadd/
740/// set_list_code; zshrs's parser builds AST nodes via
741/// par_sublist + par_list. Same flow, different output.
742pub fn par_event(endtok: lextok) -> bool {
743 // parse.c:639-643 — skip leading SEPERs.
744 while tok() == SEPER {
745 // parse.c:640-641 — at top-level (endtok == ENDINPUT),
746 // a SEPER on a fresh line ends the event.
747 if isnewlin() > 0 && endtok == ENDINPUT {
748 return false;
749 }
750 zshlex();
751 }
752 // parse.c:644-647 — terminate on EOF or matching close-token.
753 if tok() == ENDINPUT {
754 return false;
755 }
756 if tok() == endtok {
757 return true;
758 }
759 // parse.c:649-... — drive par_sublist + handle terminator.
760 // zshrs's par_sublist already builds the AST node directly.
761 match par_sublist() {
762 Some(_) => {
763 // parse.c:651-693 — terminator handling. zshrs's
764 // par_list wraps this; for parse_event we just
765 // confirm the sublist parsed.
766 true
767 }
768 None => false,
769 }
770}
771
772/// Port of `parse_list(void)` from `Src/parse.c:697`. C-shape entry
773/// point: drives `par_list` and finalizes via `bld_eprog`. Returns
774/// `None` on syntax error.
775pub fn parse_list() -> Option<eprog> {
776 // c:697
777 set_tok(ENDINPUT);
778 init_parse();
779 zshlex();
780 let _ = par_list();
781 if tok() != ENDINPUT {
782 clear_hdocs();
783 set_tok(LEXERR);
784 yyerror("syntax error");
785 return None;
786 }
787 Some(bld_eprog(true))
788}
789
790/// Port of `parse_cond(void)` from `Src/parse.c:722`. Only used by
791/// `bin_test`/`bin_bracket` for `/bin/test`/`[` compat — the
792/// `condlex` global must already point at `testlex` before entry.
793pub fn parse_cond() -> Option<eprog> {
794 // c:722
795 init_parse();
796 if par_cond().is_none() {
797 clear_hdocs();
798 return None;
799 }
800 Some(bld_eprog(true))
801}
802
803// ============================================================
804// Wordcode emission helpers (parse.c private helpers)
805//
806// Direct ports of zsh's wordcode-emission helpers in parse.c.
807// These write u32 opcodes into a flat `ecbuf` array thread-local
808// via ecadd / ecdel / ecispace / ecstrcode and friends. The
809// par_*_wordcode family at parse.rs:1700-3500 walks the lex
810// stream and emits a real wordcode buffer here.
811//
812// (The AST tree built by par_program / par_simple / etc. is a
813// separate path used by fusevm; see compile_zsh.rs for the AST
814// → fusevm-bytecode compiler.)
815// ============================================================
816
817/// Patch a list-placeholder wordcode with its actual opcode +
818/// jump distance. Direct port of zsh/Src/parse.c:738
819/// `set_list_code`. zsh emits an `ecadd(0)` placeholder before
820/// par_sublist runs, then comes back through set_list_code to
821/// rewrite the slot with WCB_LIST(type, distance) once the
822/// sublist's final length is known.
823///
824/// Port of `set_list_code(int p, int type, int cmplx)` from
825/// `Src/parse.c:738`. Patches the WCB_LIST header at `p` based on
826/// whether the sublist body is simple (single command, no
827/// pipeline) and Z_SYNC/Z_END — emits the Z_SIMPLE-optimized
828/// header when possible, otherwise the plain WCB_LIST(type, 0).
829pub fn set_list_code(p: usize, type_code: i32, cmplx: bool) {
830 let _ = wc_bdata;
831 // c:740 — `if (!cmplx && (type == Z_SYNC || type == (Z_SYNC | Z_END))
832 // && WC_SUBLIST_TYPE(ecbuf[p+1]) == WC_SUBLIST_END)`
833 let sublist_code = ECBUF.with_borrow(|b| b.get(p + 1).copied().unwrap_or(0));
834 let z = type_code;
835 let qualifies = !cmplx
836 && (z == Z_SYNC || z == (Z_SYNC | Z_END))
837 && WC_SUBLIST_TYPE(sublist_code) == WC_SUBLIST_END;
838 if qualifies {
839 // c:742 — `int ispipe = !(WC_SUBLIST_FLAGS(ecbuf[p+1])
840 // & WC_SUBLIST_SIMPLE);`
841 let ispipe = (WC_SUBLIST_FLAGS(sublist_code) & WC_SUBLIST_SIMPLE) == 0;
842 // c:743 — `ecbuf[p] = WCB_LIST((type|Z_SIMPLE), ecused-2-p);`
843 let used = ECUSED.get() as usize;
844 let off = used.saturating_sub(2 + p);
845 ECBUF.with_borrow_mut(|b| {
846 if p < b.len() {
847 b[p] = WCB_LIST((z | Z_SIMPLE) as wordcode, off as wordcode);
848 }
849 });
850 // c:744 — `ecdel(p+1);`
851 ecdel(p + 1);
852 // c:745-746 — `if (ispipe) ecbuf[p+1] = WC_PIPE_LINENO(ecbuf[p+1]);`
853 if ispipe {
854 ECBUF.with_borrow_mut(|b| {
855 if p + 1 < b.len() {
856 b[p + 1] = WC_PIPE_LINENO(b[p + 1]);
857 }
858 });
859 }
860 } else {
861 // c:748 — `ecbuf[p] = WCB_LIST(type, 0);`
862 ECBUF.with_borrow_mut(|b| {
863 if p < b.len() {
864 b[p] = WCB_LIST(z as wordcode, 0);
865 }
866 });
867 }
868}
869
870/// Port of `set_sublist_code(int p, int type, int flags, int skip, int cmplx)`
871/// from `Src/parse.c:755`. Patches the WCB_SUBLIST header at `p`.
872/// When the sublist is non-complex (single command, no pipeline),
873/// sets WC_SUBLIST_SIMPLE and rewrites the following slot to
874/// `WC_PIPE_LINENO`.
875pub fn set_sublist_code(p: usize, type_code: i32, flags: i32, skip: i32, cmplx: bool) {
876 if cmplx {
877 // c:758 — `ecbuf[p] = WCB_SUBLIST(type, flags, skip);`
878 ECBUF.with_borrow_mut(|b| {
879 if p < b.len() {
880 b[p] = WCB_SUBLIST(type_code as wordcode, flags as wordcode, skip as wordcode);
881 }
882 });
883 } else {
884 // c:760 — `ecbuf[p] = WCB_SUBLIST(type, flags|WC_SUBLIST_SIMPLE, skip);`
885 ECBUF.with_borrow_mut(|b| {
886 if p < b.len() {
887 b[p] = WCB_SUBLIST(
888 type_code as wordcode,
889 (flags as wordcode) | WC_SUBLIST_SIMPLE,
890 skip as wordcode,
891 );
892 }
893 });
894 // c:761 — `ecbuf[p+1] = WC_PIPE_LINENO(ecbuf[p+1]);`
895 ECBUF.with_borrow_mut(|b| {
896 if p + 1 < b.len() {
897 b[p + 1] = WC_PIPE_LINENO(b[p + 1]);
898 }
899 });
900 }
901}
902
903/// Parse a list (sublist with optional & or ;).
904///
905/// Direct port of zsh/Src/parse.c:771-804 `par_list` (and the
906/// par_list1 wrapper at parse.c:807-817).
907///
908/// **Structural divergence**: zsh's parse.c emits flat wordcode
909/// into the `ecbuf` u32 array via `ecadd(0)` (placeholder),
910/// `set_list_code(p, code, complexity)`, `wc_bdata(Z_END)`. zshrs
911/// builds an AST node `ZshList { sublist, flags }` instead. The
912/// async/sync/disown discrimination at parse.c:785-790 maps to
913/// zshrs's `ListFlags { async_, disown }` field — Z_SYNC is the
914/// default (no flags), Z_ASYNC = `&` = `async_=true`, Z_DISOWN +
915/// Z_ASYNC = `&!`/`&|` = both true. Same semantics, different
916/// representation. This divergence is repository-wide: every
917/// `par_*` function emits wordcode in C, every `parse_*` builds
918/// AST in Rust. The compile_zsh module then traverses the AST to
919/// emit fusevm bytecode, which serves the same role as zsh's
920/// wordcode but with a different opcode set and execution model.
921fn par_list() -> Option<ZshList> {
922 let sublist = par_sublist()?;
923
924 let flags = match tok() {
925 AMPER => {
926 zshlex();
927 ListFlags {
928 async_: true,
929 disown: false,
930 }
931 }
932 AMPERBANG => {
933 zshlex();
934 ListFlags {
935 async_: true,
936 disown: true,
937 }
938 }
939 SEPER | SEMI | NEWLIN => {
940 zshlex();
941 ListFlags::default()
942 }
943 _ => ListFlags::default(),
944 };
945
946 Some(ZshList { sublist, flags })
947}
948
949/// Parse one list — non-recursing variant. Direct port of
950/// zsh/Src/parse.c:808 `par_list1`. Like par_list but
951/// doesn't recurse on the trailing-separator path; used by
952/// callers that only want one statement (e.g. each arm of a
953/// case body).
954pub fn par_list1() -> Option<ZshSublist> {
955 // parse.c:810-816 — body is a single par_sublist call wrapped
956 // in the eu/ecused tracking that zshrs doesn't need (no
957 // wordcode buffer).
958 par_sublist()
959}
960
961/// Parse a sublist (pipelines connected by && or ||).
962///
963/// Direct port of zsh/Src/parse.c:825 `par_sublist` and
964/// par_sublist2 at parse.c:869-892. par_sublist handles the
965/// && / || conjunction and emits WC_SUBLIST opcodes; par_sublist2
966/// handles the leading `!` negation and `coproc` keyword.
967///
968/// AST mapping: ZshSublist { pipe, conj_chain }, where `conj_chain`
969/// is a Vec<(ConjOp, ZshSublist)> for chained && / ||. C uses
970/// flat wordcode with WC_SUBLIST_AND / WC_SUBLIST_OR markers.
971fn par_sublist() -> Option<ZshSublist> {
972 let mut flags = SublistFlags::default();
973
974 // Handle coproc and !
975 if tok() == COPROC {
976 flags.coproc = true;
977 zshlex();
978 } else if tok() == BANG_TOK {
979 flags.not = true;
980 zshlex();
981 }
982
983 let pipe = par_pline()?;
984
985 // Check for && or ||
986 let next = match tok() {
987 DAMPER => {
988 zshlex();
989 skip_separators();
990 par_sublist().map(|s| (SublistOp::And, Box::new(s)))
991 }
992 DBAR => {
993 zshlex();
994 skip_separators();
995 par_sublist().map(|s| (SublistOp::Or, Box::new(s)))
996 }
997 _ => None,
998 };
999
1000 Some(ZshSublist { pipe, next, flags })
1001}
1002
1003/// Port of `par_sublist2(int *cmplx)` from `Src/parse.c:869`.
1004/// Secondary-sublist arm: handles the `COPROC`/`Bang` prefix
1005/// in front of a pline. Returns the WC_SUBLIST flag word added.
1006pub fn par_sublist2(cmplx: &mut i32) -> Option<i32> {
1007 // c:870 — `int f = 0;`
1008 let mut f: i32 = 0;
1009 // c:873-880 — COPROC / BANG prefix flags.
1010 if tok() == COPROC {
1011 *cmplx = 1;
1012 f |= WC_SUBLIST_COPROC as i32;
1013 zshlex();
1014 } else if tok() == BANG_TOK {
1015 *cmplx = 1;
1016 f |= WC_SUBLIST_NOT as i32;
1017 zshlex();
1018 }
1019 // c:882-883 — `if (!par_pline(cmplx) && !f) return -1;`
1020 if !par_pipe_wordcode(cmplx) && f == 0 {
1021 return None;
1022 }
1023 // c:885 — `return f;`
1024 Some(f)
1025}
1026
1027/// Parse a pipeline
1028/// Parse a pipeline (cmds joined by `|` / `|&`). Direct port of
1029/// zsh/Src/parse.c:894 `par_pline`. AST: ZshPipe { cmds: Vec<ZshCommand> }.
1030/// C emits WC_PIPE wordcodes per command; same flow.
1031fn par_pline() -> Option<ZshPipe> {
1032 let lineno = toklineno();
1033 let cmd = par_cmd()?;
1034
1035 // Check for | or |&
1036 let mut merge_stderr = false;
1037 let next = match tok() {
1038 BAR_TOK | BARAMP => {
1039 merge_stderr = tok() == BARAMP;
1040 zshlex();
1041 skip_separators();
1042 par_pline().map(Box::new)
1043 }
1044 _ => None,
1045 };
1046
1047 Some(ZshPipe {
1048 cmd,
1049 next,
1050 lineno,
1051 merge_stderr,
1052 })
1053}
1054
1055/// Parse a command
1056/// Parse a command — dispatches by leading token (FOR / CASE /
1057/// IF / WHILE / UNTIL / REPEAT / FUNC / DINBRACK / DINPAR /
1058/// Inpar subshell / Inbrace current-shell / TIME / NOCORRECT,
1059/// else simple). Direct port of zsh/Src/parse.c:958 `par_cmd`.
1060fn par_cmd() -> Option<ZshCommand> {
1061 // Parse leading redirections
1062 let mut redirs = Vec::new();
1063 while IS_REDIROP(tok()) {
1064 if let Some(redir) = par_redir() {
1065 redirs.push(redir);
1066 }
1067 }
1068
1069 let cmd = match tok() {
1070 FOR | FOREACH => par_for(),
1071 SELECT => parse_select(),
1072 CASE => par_case(),
1073 IF => par_if(),
1074 WHILE => par_while(false),
1075 UNTIL => par_while(true),
1076 REPEAT => par_repeat(),
1077 INPAR_TOK => par_subsh(),
1078 INOUTPAR => parse_anon_funcdef(),
1079 INBRACE_TOK => parse_cursh(),
1080 FUNC => par_funcdef(),
1081 DINBRACK => par_cond(),
1082 DINPAR => parse_arith(),
1083 TIME => par_time(),
1084 _ => par_simple(redirs),
1085 };
1086
1087 // Parse trailing redirections. For Simple commands the redirs were
1088 // already captured inside par_simple; for compound forms (Cursh,
1089 // Subsh, If, While, etc.) we collect them here and wrap in
1090 // ZshCommand::Redirected so compile_zsh can scope-bracket them.
1091 if let Some(inner) = cmd {
1092 let mut trailing: Vec<ZshRedir> = Vec::new();
1093 while IS_REDIROP(tok()) {
1094 if let Some(redir) = par_redir() {
1095 trailing.push(redir);
1096 }
1097 }
1098 // c:1072-1075 — every par_cmd tail resets the lexer state
1099 // toggles so the NEXT command starts in cmd position with
1100 // case/cond/typeset off. par_simple/par_cond set `incmdpos=0`
1101 // during their bodies; without this reset the next iteration
1102 // of the outer par_list loop sees `if` / `done` / `select`
1103 // etc. as plain strings and the AST collapses.
1104 set_incmdpos(true);
1105 set_incasepat(0);
1106 set_incond(0);
1107 set_intypeset(false);
1108 if trailing.is_empty() {
1109 return Some(inner);
1110 }
1111 // Simple already absorbed its own redirs (compile path expects
1112 // them on ZshSimple), so don't double-wrap.
1113 if matches!(inner, ZshCommand::Simple(_)) {
1114 if let ZshCommand::Simple(mut s) = inner {
1115 s.redirs.extend(trailing);
1116 return Some(ZshCommand::Simple(s));
1117 }
1118 unreachable!()
1119 }
1120 return Some(ZshCommand::Redirected(Box::new(inner), trailing));
1121 }
1122 // Same reset on the empty-cmd branch (mirror c:1072 unconditional
1123 // path — the C function only returns 0 above when the dispatch
1124 // produced no command, and falls through to the reset block).
1125 set_incmdpos(true);
1126 set_incasepat(0);
1127 set_incond(0);
1128 set_intypeset(false);
1129
1130 None
1131}
1132
1133/// Parse for/foreach loop
1134/// Parse `for NAME in WORDS; do BODY; done` (foreach style) AND
1135/// `for ((init; cond; incr)) do BODY done` (c-style). Direct port
1136/// of zsh/Src/parse.c:1087 `par_for`. parse_for_cstyle is the
1137/// inner branch for the `((...))` arithmetic-header variant
1138/// (parse.c:1100-1140 inside par_for).
1139fn par_for() -> Option<ZshCommand> {
1140 let is_foreach = tok() == FOREACH;
1141 zshlex();
1142
1143 // Check for C-style: for (( init; cond; step ))
1144 if tok() == DINPAR {
1145 return parse_for_cstyle();
1146 }
1147
1148 // Get variable name(s). zsh parse.c par_for accepts multiple
1149 // identifier tokens before `in`/`(`/newline — `for k v in ...`
1150 // assigns each iteration's pair of values to k and v in turn.
1151 // We store the names space-joined since variable identifiers
1152 // can't contain whitespace.
1153 let mut names: Vec<String> = Vec::new();
1154 while tok() == STRING_LEX {
1155 let v = tokstr().unwrap_or_default();
1156 if v == "in" {
1157 break;
1158 }
1159 names.push(v);
1160 zshlex();
1161 }
1162 if names.is_empty() {
1163 crate::ported::utils::zerr("expected variable name in for");
1164 return None;
1165 }
1166 let var = names.join(" ");
1167
1168 // Skip newlines
1169 skip_separators();
1170
1171 // Get list. The lexer-port quirk: `for x (a b c)` arrives as a
1172 // single String token with the parens lexed-as-content
1173 // (`<Inpar>a b c<Outpar>`) instead of as separate Inpar/String/
1174 // Outpar tokens. Detect that shape and split it manually.
1175 let list = if tok() == STRING_LEX
1176 && tokstr()
1177 .map(|s| s.starts_with('\u{88}') && s.ends_with('\u{8a}'))
1178 .unwrap_or(false)
1179 {
1180 let raw = tokstr().unwrap_or_default();
1181 // Strip leading Inpar + trailing Outpar, then untokenize the
1182 // inner content and split on whitespace for the word list.
1183 let inner = &raw[raw.char_indices().nth(1).map(|(i, _)| i).unwrap_or(0)
1184 ..raw
1185 .char_indices()
1186 .last()
1187 .map(|(i, _)| i)
1188 .unwrap_or(raw.len())];
1189 let cleaned = super::lex::untokenize(inner);
1190 let words: Vec<String> = cleaned.split_whitespace().map(|s| s.to_string()).collect();
1191 zshlex();
1192 ForList::Words(words)
1193 } else if tok() == STRING_LEX {
1194 let s = tokstr();
1195 if s.map(|s| s == "in").unwrap_or(false) {
1196 zshlex();
1197 let mut words = Vec::new();
1198 while tok() == STRING_LEX {
1199 let _ts_s = tokstr();
1200 if let Some(s) = _ts_s.as_deref() {
1201 words.push(s.to_string());
1202 }
1203 zshlex();
1204 }
1205 ForList::Words(words)
1206 } else {
1207 ForList::Positional
1208 }
1209 } else if tok() == INPAR_TOK {
1210 // for var (...)
1211 zshlex();
1212 let mut words = Vec::new();
1213 while tok() == STRING_LEX || tok() == SEPER {
1214 if tok() == STRING_LEX {
1215 let _ts_s = tokstr();
1216 if let Some(s) = _ts_s.as_deref() {
1217 words.push(s.to_string());
1218 }
1219 }
1220 zshlex();
1221 }
1222 if tok() == OUTPAR_TOK {
1223 // After the `)` of a for-list, the next token is the
1224 // body opener — `do`/`{`. zsh's lexer needs incmdpos
1225 // set so `{` lexes as Inbrace (not as a literal). C
1226 // analogue: parse.c::par_for sets `incmdpos = 1`
1227 // after consuming the Outpar before the body parse.
1228 set_incmdpos(true);
1229 zshlex();
1230 }
1231 ForList::Words(words)
1232 } else {
1233 ForList::Positional
1234 };
1235
1236 // Skip to body
1237 skip_separators();
1238
1239 // Parse body
1240 let body = parse_loop_body(is_foreach, false)?;
1241
1242 Some(ZshCommand::For(ZshFor {
1243 var,
1244 list,
1245 body: Box::new(body),
1246 is_select: false,
1247 }))
1248}
1249
1250/// Parse case statement
1251/// Parse `case WORD in PATTERN) BODY ;; ... esac`. Direct port
1252/// of zsh/Src/parse.c:1209 `par_case`. Each case arm is a
1253/// (pattern_list, body, terminator) tuple where terminator is
1254/// `;;` (default), `;&` (fallthrough), or `;|` (continue testing).
1255fn par_case() -> Option<ZshCommand> {
1256 // C par_case (parse.c:1209-1241). Order of state toggles
1257 // matters — the lexer reads the case word in `incmdpos=0`
1258 // (so it's not promoted to a reswd), then the `in`/`{` in
1259 // `incmdpos=1, noaliases=1, nocorrect=1` (so the `in` literal
1260 // isn't alias-expanded or spell-corrected), then sets
1261 // `incasepat=1, incmdpos=0` before the first pattern.
1262 set_incmdpos(false);
1263 zshlex(); // skip 'case'
1264
1265 let word = match tok() {
1266 STRING_LEX => {
1267 let w = tokstr().unwrap_or_default();
1268 // c:1222 — `incmdpos = 1;` before the next zshlex so the
1269 // `in` keyword is recognised. c:1223-1225 — save+force
1270 // noaliases / nocorrect.
1271 set_incmdpos(true);
1272 let ona = noaliases();
1273 let onc = nocorrect();
1274 set_noaliases(true);
1275 set_nocorrect(1);
1276 zshlex();
1277 // Restore noaliases/nocorrect after the `in`-or-`{` token
1278 // is in hand; both are unconditionally restored at c:1238-1239.
1279 let restore = |ona: bool, onc: i32| {
1280 set_noaliases(ona);
1281 set_nocorrect(onc);
1282 };
1283 (w, ona, onc, restore)
1284 }
1285 _ => {
1286 crate::ported::utils::zerr("expected word after case");
1287 return None;
1288 }
1289 };
1290 let (word, ona, onc, restore) = word;
1291
1292 skip_separators();
1293
1294 // Expect 'in' or {
1295 let use_brace = tok() == INBRACE_TOK;
1296 if tok() == STRING_LEX {
1297 let s = tokstr();
1298 if s.map(|s| s != "in").unwrap_or(true) {
1299 // c:1228-1232 — restore noaliases/nocorrect on error path.
1300 restore(ona, onc);
1301 crate::ported::utils::zerr("expected 'in' in case");
1302 return None;
1303 }
1304 } else if !use_brace {
1305 restore(ona, onc);
1306 crate::ported::utils::zerr("expected 'in' or '{' in case");
1307 return None;
1308 }
1309 // c:1236-1239 — `incasepat = 1; incmdpos = 0; noaliases = ona;
1310 // nocorrect = onc;` — set the case-pattern context AND restore
1311 // alias/correct state BEFORE the zshlex that consumes `in`/`{`.
1312 set_incasepat(1);
1313 set_incmdpos(false);
1314 restore(ona, onc);
1315 zshlex();
1316
1317 let mut arms = Vec::new();
1318 const MAX_ARMS: usize = 10_000;
1319
1320 loop {
1321 if arms.len() > MAX_ARMS {
1322 crate::ported::utils::zerr("par_case: too many arms");
1323 break;
1324 }
1325
1326 // Set incasepat BEFORE skipping separators so lexer knows we're in case pattern context
1327 // This affects how [ and | are lexed
1328 set_incasepat(1);
1329
1330 skip_separators();
1331
1332 // Check for end
1333 // Note: 'esac' might be String "esac" if incasepat > 0 prevents reserved word recognition
1334 let is_esac = tok() == ESAC
1335 || (tok() == STRING_LEX && tokstr().map(|s| s == "esac").unwrap_or(false));
1336 if (use_brace && tok() == OUTBRACE_TOK) || (!use_brace && is_esac) {
1337 set_incasepat(0);
1338 zshlex();
1339 break;
1340 }
1341
1342 // Also break on EOF
1343 if tok() == ENDINPUT || tok() == LEXERR {
1344 set_incasepat(0);
1345 break;
1346 }
1347
1348 // Skip optional `(`. zsh's case grammar: `case W in (P)…)`.
1349 // The leading `(` is paired with a matching `)` that closes
1350 // the pattern itself; the arm-close `)` follows separately.
1351 // Track whether we consumed it so we can skip the matching
1352 // `)` after pattern parsing — otherwise the arm-close would
1353 // be interpreted as the pattern-close and the actual body
1354 // would get the leftover `)`.
1355 let had_leading_paren = tok() == INPAR_TOK;
1356 if had_leading_paren {
1357 zshlex();
1358 }
1359
1360 // incasepat is already set above
1361 let mut patterns = Vec::new();
1362 loop {
1363 if tok() == STRING_LEX {
1364 let s = tokstr();
1365 if s.map(|s| s == "esac").unwrap_or(false) {
1366 break;
1367 }
1368 patterns.push(tokstr().unwrap_or_default());
1369 // After first pattern token, set incasepat=2 so ( is treated as part of pattern
1370 set_incasepat(2);
1371 zshlex();
1372 } else if tok() != BAR_TOK {
1373 break;
1374 }
1375
1376 if tok() == BAR_TOK {
1377 // Reset to 1 (start of next alternative pattern)
1378 set_incasepat(1);
1379 zshlex();
1380 } else {
1381 break;
1382 }
1383 }
1384 set_incasepat(0);
1385
1386 // zsh's `(P)` form (parse.c:1320-1360 hack) treats the entire
1387 // parenthesized contents as ONE zsh pattern with internal `|`
1388 // as the literal alternation operator — NOT as multiple
1389 // case-arm alternatives. Without a leading `(`, the bare
1390 // `P1|P2)` form splits into multiple alts. Mirror that here:
1391 // when a leading `(` was consumed, fold the |-separated
1392 // pieces back into a single pattern string.
1393 if had_leading_paren && patterns.len() > 1 {
1394 let joined = patterns.join("|");
1395 patterns = vec![joined];
1396 }
1397
1398 // Expect ). Also handle the `(P))` wrapped-pattern form:
1399 // when a leading `(` was consumed, accept an extra `)` —
1400 // the inner `)` closes the optional-paren wrapper, the
1401 // outer `)` is the arm-close. zsh accepts BOTH `(P) BODY`
1402 // (bare pattern, leading-paren is just the opt-marker, the
1403 // close is arm-close) and `(P)) BODY` (paren-wrapped
1404 // pattern, then arm-close). The first form is unambiguous
1405 // when the bare pattern was simple; the second is needed
1406 // when the body starts with `(`.
1407 if tok() != OUTPAR_TOK {
1408 crate::ported::utils::zerr("expected ')' in case pattern");
1409 return None;
1410 }
1411 // Port of Src/parse.c:1310-1313 — when the case pattern
1412 // closes with `)`, set `incmdpos = 1` BEFORE consuming
1413 // the token so the first word of the arm body is lexed
1414 // in command position. Without this, `case X in X) c1=v ;;`
1415 // lexes `c1=v` as a plain STRING rather than an assignment
1416 // word, and exec treats it as a command name (yielding
1417 // "command not found: c1=v"). Subsequent statements after
1418 // `;` parse correctly because the `;` separator restores
1419 // command position; only the FIRST body word was broken.
1420 set_incmdpos(true);
1421 zshlex();
1422 if had_leading_paren && tok() == OUTPAR_TOK {
1423 set_incmdpos(true);
1424 zshlex();
1425 }
1426
1427 // Parse body
1428 let body = parse_program();
1429
1430 // Get terminator. Set incasepat=1 BEFORE the zshlex
1431 // advance so the next token (the next arm's pattern, like
1432 // `[a-z]`) gets tokenized in pattern context. Without
1433 // this, a `[`-prefixed pattern after the FIRST arm became
1434 // Inbrack instead of String and the pattern-loop bailed
1435 // out with "expected ')' in case pattern".
1436 let terminator = match tok() {
1437 DSEMI => {
1438 set_incasepat(1);
1439 zshlex();
1440 CaseTerm::Break
1441 }
1442 SEMIAMP => {
1443 set_incasepat(1);
1444 zshlex();
1445 CaseTerm::Continue
1446 }
1447 SEMIBAR => {
1448 set_incasepat(1);
1449 zshlex();
1450 CaseTerm::TestNext
1451 }
1452 _ => CaseTerm::Break,
1453 };
1454
1455 if !patterns.is_empty() {
1456 arms.push(CaseArm {
1457 patterns,
1458 body,
1459 terminator,
1460 });
1461 }
1462 }
1463
1464 Some(ZshCommand::Case(ZshCase { word, arms }))
1465}
1466
1467/// Parse if statement
1468/// Parse `if COND; then BODY; [elif COND; then BODY;]* [else BODY;] fi`.
1469/// Direct port of zsh/Src/parse.c:1411 `par_if`. The C source
1470/// emits WC_IF wordcodes per arm; zshrs builds an AST chain of
1471/// (cond, then_body) tuples plus an optional else_body.
1472fn par_if() -> Option<ZshCommand> {
1473 zshlex(); // skip 'if'
1474
1475 // Parse condition - stops at 'then' or '{' (zsh allows { instead of then)
1476 let cond = Box::new(parse_program_until(Some(&[THEN, INBRACE_TOK])));
1477
1478 skip_separators();
1479
1480 // Expect 'then' or {
1481 let use_brace = tok() == INBRACE_TOK;
1482 if tok() != THEN && !use_brace {
1483 crate::ported::utils::zerr("expected 'then' or '{' after if condition");
1484 return None;
1485 }
1486 zshlex();
1487
1488 // Parse then-body - stops at else/elif/fi, or } if using brace syntax
1489 let then = if use_brace {
1490 let body = parse_program_until(Some(&[OUTBRACE_TOK]));
1491 if tok() == OUTBRACE_TOK {
1492 zshlex();
1493 }
1494 Box::new(body)
1495 } else {
1496 Box::new(parse_program_until(Some(&[ELSE, ELIF, FI])))
1497 };
1498
1499 // Parse elif and else. zsh accepts the SAME elif/else
1500 // continuations for both classic `then/fi` AND the brace
1501 // form `{ ... } elif ... { ... } else { ... }`. Direct port
1502 // of zsh/Src/parse.c:1417-1500 par_if where the elif/else
1503 // arms are checked AFTER the body close regardless of which
1504 // delimiter style opened the block. Without this, zinit's
1505 // if [[ -z $sel ]] { ... } else { ... }
1506 // hung the parser — `else` was treated as an external
1507 // command following the if-statement, which the lexer state
1508 // mis-classified inside the still-open function body.
1509 //
1510 // For brace-form: skip the `fi` consumption at the end of
1511 // the loop (no `fi` after a brace block), and `else` may
1512 // arrive after a `}` close. Skip-separators between the
1513 // body close and the elif/else token.
1514 let mut elif = Vec::new();
1515 let mut else_ = None;
1516
1517 {
1518 loop {
1519 skip_separators();
1520
1521 match tok() {
1522 ELIF => {
1523 zshlex();
1524 // elif condition stops at 'then' or '{'
1525 let econd = parse_program_until(Some(&[THEN, INBRACE_TOK]));
1526 skip_separators();
1527
1528 let elif_use_brace = tok() == INBRACE_TOK;
1529 if tok() != THEN && !elif_use_brace {
1530 crate::ported::utils::zerr("expected 'then' after elif");
1531 return None;
1532 }
1533 zshlex();
1534
1535 // elif body stops at else/elif/fi or } if using braces
1536 let ebody = if elif_use_brace {
1537 let body = parse_program_until(Some(&[OUTBRACE_TOK]));
1538 if tok() == OUTBRACE_TOK {
1539 zshlex();
1540 }
1541 body
1542 } else {
1543 parse_program_until(Some(&[ELSE, ELIF, FI]))
1544 };
1545
1546 elif.push((econd, ebody));
1547 }
1548 ELSE => {
1549 zshlex();
1550 skip_separators();
1551
1552 let else_use_brace = tok() == INBRACE_TOK;
1553 if else_use_brace {
1554 zshlex();
1555 }
1556
1557 // else body stops at 'fi' or '}'
1558 else_ = Some(Box::new(if else_use_brace {
1559 let body = parse_program_until(Some(&[OUTBRACE_TOK]));
1560 if tok() == OUTBRACE_TOK {
1561 zshlex();
1562 }
1563 body
1564 } else {
1565 parse_program_until(Some(&[FI]))
1566 }));
1567
1568 // Consume the 'fi' if present (not for brace syntax)
1569 if !else_use_brace && tok() == FI {
1570 zshlex();
1571 }
1572 break;
1573 }
1574 FI => {
1575 zshlex();
1576 break;
1577 }
1578 _ => break,
1579 }
1580 }
1581 }
1582
1583 Some(ZshCommand::If(ZshIf {
1584 cond,
1585 then,
1586 elif,
1587 else_,
1588 }))
1589}
1590
1591/// Parse while/until loop
1592/// Parse `while COND; do BODY; done` and `until COND; do BODY; done`.
1593/// Direct port of zsh/Src/parse.c:1521 `par_while`. The
1594/// `until` variant is the same loop with the condition negated.
1595fn par_while(until: bool) -> Option<ZshCommand> {
1596 zshlex(); // skip while/until
1597
1598 let cond = Box::new(parse_program());
1599
1600 skip_separators();
1601 let body = parse_loop_body(false, false)?;
1602
1603 Some(ZshCommand::While(ZshWhile {
1604 cond,
1605 body: Box::new(body),
1606 until,
1607 }))
1608}
1609
1610/// Parse repeat loop
1611/// Parse `repeat N; do BODY; done`. Direct port of
1612/// zsh/Src/parse.c:1565 `par_repeat`. The C source supports
1613/// the SHORTLOOPS short-form `repeat N CMD` (no do/done) — zshrs's
1614/// parser doesn't yet special-case that variant.
1615fn par_repeat() -> Option<ZshCommand> {
1616 zshlex(); // skip 'repeat'
1617
1618 let count = match tok() {
1619 STRING_LEX => {
1620 let c = tokstr().unwrap_or_default();
1621 zshlex();
1622 c
1623 }
1624 _ => {
1625 crate::ported::utils::zerr("expected count after repeat");
1626 return None;
1627 }
1628 };
1629
1630 skip_separators();
1631 // c:1600 — par_repeat's short-form gate is wider: it unlocks
1632 // when SHORTLOOPS OR SHORTREPEAT is set (vs SHORTLOOPS alone for
1633 // for/while). Pass `is_repeat=true` so parse_loop_body
1634 // applies that widened gate.
1635 let body = parse_loop_body(false, true)?;
1636
1637 Some(ZshCommand::Repeat(ZshRepeat {
1638 count,
1639 body: Box::new(body),
1640 }))
1641}
1642
1643/// Parse (...) subshell
1644/// Parse a subshell `( ... )`. Direct port of zsh/Src/parse.c:1619
1645/// `par_subsh`. Body parses as a normal list; the subshell wrapper
1646/// fork-isolates execution in the executor.
1647fn par_subsh() -> Option<ZshCommand> {
1648 zshlex(); // skip (
1649 let prog = parse_program();
1650 if tok() == OUTPAR_TOK {
1651 zshlex();
1652 }
1653 Some(ZshCommand::Subsh(Box::new(prog)))
1654}
1655
1656/// Parse function definition
1657/// Parse `function NAME { BODY }` or `NAME () { BODY }`. Direct
1658/// port of zsh/Src/parse.c:1672 `par_funcdef`. zsh handles
1659/// the multiple keyword shapes (function FOO, FOO (), function FOO ()),
1660/// the optional `[fname1 fname2 ...]` for multi-name function defs,
1661/// and the `function FOO () { ... }` traditional/POSIX hybrid form.
1662fn par_funcdef() -> Option<ZshCommand> {
1663 zshlex(); // skip 'function'
1664
1665 let mut names = Vec::new();
1666 let mut tracing = false;
1667
1668 // Handle options like -T and function names. Two subtleties:
1669 //
1670 // 1. Flags: zsh's lexer encodes a leading `-` as
1671 // `zsh_h::Dash` (`\u{9b}`, `Src/zsh.h:182`) inside the String tokstr.
1672 // The previous `s.starts_with('-')` check failed for
1673 // `\u{9b}T`, so `function -T NAME { body }` slipped the
1674 // `-T` token into `names` and the function got registered
1675 // as `T` plus the intended `NAME`.
1676 //
1677 // 2. Body opener: zsh's lexer emits the opening `{` as a
1678 // String (not INBRACE_TOK) when it follows the String
1679 // NAME — the preceding name token resets incmdpos to
1680 // false, and only `{` immediately followed by `}` (the
1681 // empty-body case) gets promoted to Inbrace. The funcdef
1682 // parser must recognise the bare-`{` String as the body
1683 // opener; otherwise `function NAME { body }` falls through
1684 // to `_ => break`, no body parses, and the FuncDef never
1685 // lands in the AST. This is consistent with C zsh's
1686 // par_funcdef which knows it's in funcdef-header context
1687 // and accepts the brace either way.
1688 loop {
1689 match tok() {
1690 STRING_LEX => {
1691 let _ts_s = tokstr()?;
1692 let s = _ts_s.as_str();
1693 // c:1702 — `if ((*tokstr == Inbrace || *tokstr == '{') && !tokstr[1])`.
1694 // Body opener can be either the literal `{` (early-return
1695 // path at lex.c:1141-1144 / lex.rs LX2_INBRACE cmdpos
1696 // branch) or the Inbrace marker `\u{8f}` (lex.c:1420
1697 // post-switch add(c) where c was rewritten via lextok2).
1698 if s == "{" || s == "\u{8f}" {
1699 break;
1700 }
1701 let first = s.chars().next();
1702 if matches!(first, Some('-') | Some('+')) || matches!(first, Some(c) if c == Dash) {
1703 if s.contains('T') {
1704 tracing = true;
1705 }
1706 zshlex();
1707 continue;
1708 }
1709 names.push(s.to_string());
1710 zshlex();
1711 }
1712 INBRACE_TOK | INOUTPAR | SEPER | NEWLIN => break,
1713 _ => break,
1714 }
1715 }
1716
1717 // Optional ()
1718 let saw_paren = tok() == INOUTPAR;
1719 if saw_paren {
1720 zshlex();
1721 }
1722
1723 skip_separators();
1724
1725 // Body opener: real Inbrace OR a String containing the literal `{`
1726 // (early-return path) OR a String containing the Inbrace marker
1727 // `\u{8f}` (bct++ path post-switch add). C parse.c:1702 handles
1728 // both string forms via `*tokstr == Inbrace || *tokstr == '{'`.
1729 let body_opener_is_string_brace =
1730 tok() == STRING_LEX
1731 && tokstr().map(|s| s == "{" || s == "\u{8f}").unwrap_or(false);
1732 if tok() == INBRACE_TOK || body_opener_is_string_brace {
1733 // Capture body_start BEFORE the lexer advances past the
1734 // first body token. After the previous zshlex consumed
1735 // `{`, lexer.pos points just past `{` (which is where the
1736 // body source starts). The next `zshlex()` would advance
1737 // past the first token (`echo`), making body_start land
1738 // mid-body and lose the first word — `typeset -f f` would
1739 // print `a; echo b` for `{ echo a; echo b }`.
1740 let body_start = pos();
1741 zshlex();
1742 let body = parse_program();
1743 let body_end = if tok() == OUTBRACE_TOK {
1744 // Lexer has just consumed `}`; pos is past it. Body content
1745 // ends one byte before pos.
1746 pos().saturating_sub(1)
1747 } else {
1748 pos()
1749 };
1750 let body_source = input_slice(body_start, body_end)
1751 .map(|s| s.trim().to_string())
1752 .filter(|s| !s.is_empty());
1753 if tok() == OUTBRACE_TOK {
1754 zshlex();
1755 }
1756
1757 // Anonymous form `function () { body } a b c` (with `()`) or
1758 // `function { body } a b c` (zsh-only shorthand, no `()`). No
1759 // name was collected. Mirror parse_anon_funcdef: synthesize
1760 // `_zshrs_anon_N`, collect trailing args, set auto_call_args
1761 // so compile_funcdef registers + immediately calls the
1762 // function with the args as positional params.
1763 if names.is_empty() {
1764 let mut args = Vec::new();
1765 while tok() == STRING_LEX {
1766 if let Some(s) = tokstr() {
1767 args.push(s);
1768 }
1769 zshlex();
1770 }
1771 static ANON_COUNTER: AtomicUsize = AtomicUsize::new(0);
1772 let n = ANON_COUNTER.fetch_add(1, Ordering::Relaxed);
1773 let name = format!("_zshrs_anon_kw_{}", n);
1774 return Some(ZshCommand::FuncDef(ZshFuncDef {
1775 names: vec![name],
1776 body: Box::new(body),
1777 tracing,
1778 auto_call_args: Some(args),
1779 body_source,
1780 }));
1781 }
1782
1783 Some(ZshCommand::FuncDef(ZshFuncDef {
1784 names,
1785 body: Box::new(body),
1786 tracing,
1787 auto_call_args: None,
1788 body_source,
1789 }))
1790 } else {
1791 // Short form
1792 par_list().map(|list| {
1793 ZshCommand::FuncDef(ZshFuncDef {
1794 names,
1795 body: Box::new(ZshProgram { lists: vec![list] }),
1796 tracing,
1797 auto_call_args: None,
1798 body_source: None,
1799 })
1800 })
1801 }
1802}
1803
1804/// Parse time command
1805/// Parse `time CMD` (POSIX time keyword). Direct port of
1806/// zsh/Src/parse.c:1787 `par_time`. The `time` keyword
1807/// times the execution of the following pipeline / cmd.
1808fn par_time() -> Option<ZshCommand> {
1809 zshlex(); // skip 'time'
1810
1811 // Check if there's a pipeline to time
1812 if tok() == SEPER || tok() == NEWLIN || tok() == ENDINPUT {
1813 Some(ZshCommand::Time(None))
1814 } else {
1815 let sublist = par_sublist();
1816 Some(ZshCommand::Time(sublist.map(Box::new)))
1817 }
1818}
1819
1820/// Port of `par_dinbrack(void)` from `Src/parse.c:1810`. Body
1821/// parser inside `[[ ... ]]` — calls `par_cond` to emit the
1822/// condition wordcode then advances past `]]`.
1823pub fn par_dinbrack() -> Option<()> {
1824 // c:1810
1825 set_incond(1); // c:1814
1826 set_incmdpos(false); // c:1815
1827 zshlex(); // c:1816
1828 let _ = par_cond(); // c:1817
1829 if tok() != DOUTBRACK {
1830 // c:1818
1831 yyerror("missing ]]");
1832 return None;
1833 }
1834 set_incond(0); // c:1820
1835 set_incmdpos(true); // c:1821
1836 zshlex(); // c:1822
1837 Some(())
1838}
1839
1840/// Parse a simple command
1841/// Parse a simple command (assignments + words + redirections).
1842/// Direct port of zsh/Src/parse.c:1836 `par_simple` —
1843/// the largest single function in parse.c. Handles ENVSTRING/
1844/// ENVARRAY assignments at command head, intermixed redirs,
1845/// typeset-style multi-assignment commands, and the trailing
1846/// inout-par `()` that converts a simple command into an inline
1847/// function definition.
1848fn par_simple(mut redirs: Vec<ZshRedir>) -> Option<ZshCommand> {
1849 let mut assigns = Vec::new();
1850 let mut words = Vec::new();
1851
1852 // c:1934-1974 — `{var}>file` brace-FD detection is wired
1853 // INSIDE the words loop below (parse.rs:4940-4956) rather than
1854 // here at the head. The words-loop site sees the tok=STRING
1855 // `{varname}` followed by a REDIROP and routes into par_redir
1856 // with redir.varid populated. C does it inline at the start of
1857 // each STRING/TYPESET arm iteration; functionally equivalent.
1858
1859 // Parse leading assignments
1860 while tok() == ENVSTRING || tok() == ENVARRAY {
1861 if let Some(assign) = parse_assign() {
1862 assigns.push(assign);
1863 }
1864 zshlex();
1865 }
1866
1867 // Parse words and redirections
1868 loop {
1869 match tok() {
1870 ENVSTRING | ENVARRAY => {
1871 // Mid-command assignment-shape arg under typeset
1872 // / declare / local / etc. (intypeset gates the
1873 // lexer to emit Envstring/Envarray for `name=val`
1874 // and `name=()` past the command name). Parse the
1875 // assignment, then emit a synthetic word
1876 // `NAME=value` (scalar) or `NAME=( … )` (array)
1877 // string so typeset's builtin arg list sees the
1878 // assignment-shape arg. Avoids the inline-env
1879 // scope path that mistakenly treats it like a
1880 // pre-cmd `X=Y cmd` assignment.
1881 if let Some(assign) = parse_assign() {
1882 let synthetic = match &assign.value {
1883 ZshAssignValue::Scalar(v) => format!("{}={}", assign.name, v),
1884 ZshAssignValue::Array(elems) => {
1885 format!("{}=({})", assign.name, elems.join(" "))
1886 }
1887 };
1888 words.push(synthetic);
1889 }
1890 zshlex();
1891 }
1892 STRING_LEX | TYPESET => {
1893 let s = tokstr();
1894 if let Some(s) = s {
1895 words.push(s);
1896 }
1897 // c:1929 — `incmdpos = 0;` so the next zshlex() does
1898 // not re-promote `{`/`[[`/reserved words at the
1899 // continuation position. Without this, `echo {a,b}`
1900 // re-lexes `{` as INBRACE_TOK (current-shell block)
1901 // and the brace expansion never reaches par_simple.
1902 set_incmdpos(false);
1903 // c:1931-1932 — `if (tok == TYPESET) intypeset = is_typeset = 1;`
1904 // Multi-assign `typeset a=1 b=2` relies on the lexer
1905 // re-emitting `b=2` as ENVSTRING; that path is gated
1906 // on `intypeset`. Without this, follow-on assignment
1907 // words arrive as STRING and the typeset builtin's
1908 // multi-assign form silently degrades.
1909 if tok() == TYPESET {
1910 set_intypeset(true);
1911 }
1912 zshlex();
1913 // Check for function definition foo() { ... }
1914 if words.len() == 1 && tok() == INOUTPAR {
1915 return parse_inline_funcdef(words.pop().unwrap());
1916 }
1917 // `{name}>file` named-fd redirect: the lexer doesn't
1918 // recognize this shape, so the bare word `{name}`
1919 // arrives as a String. If it matches `{IDENT}` and
1920 // the NEXT token is a redirop, pop it off as the
1921 // varid for that redir.
1922 if !words.is_empty() && IS_REDIROP(tok()) {
1923 let last = words.last().unwrap();
1924 let untoked = super::lex::untokenize(last);
1925 if untoked.starts_with('{') && untoked.ends_with('}') && untoked.len() > 2 {
1926 let name = &untoked[1..untoked.len() - 1];
1927 if !name.is_empty()
1928 && name.chars().all(|c| c == '_' || c.is_ascii_alphanumeric())
1929 && name
1930 .chars()
1931 .next()
1932 .map(|c| c == '_' || c.is_ascii_alphabetic())
1933 .unwrap_or(false)
1934 {
1935 let varid = name.to_string();
1936 words.pop();
1937 if let Some(mut redir) = par_redir() {
1938 redir.varid = Some(varid);
1939 redirs.push(redir);
1940 }
1941 continue;
1942 }
1943 }
1944 }
1945 }
1946 _ if IS_REDIROP(tok()) => {
1947 match par_redir() {
1948 Some(redir) => redirs.push(redir),
1949 None => break, // Error in redir parsing, stop
1950 }
1951 }
1952 INOUTPAR if !words.is_empty() => {
1953 // c:2055-2057 — `if (!isset(MULTIFUNCDEF) && argc > 1)
1954 // YYERROR(oecused);` — multi-name funcdef gate:
1955 // `f1 f2() { ... }` defines f1 AND f2 to the same
1956 // body, but only when MULTIFUNCDEF is set.
1957 if !isset(MULTIFUNCDEF) && words.len() > 1 {
1958 crate::ported::utils::zerr(
1959 "parse error: multiple names in function definition without MULTIFUNCDEF",
1960 );
1961 return None;
1962 }
1963 // c:2061-2068 — `if (isset(EXECOPT) && hasalias &&
1964 // !isset(ALIASFUNCDEF) && argc && hasalias !=
1965 // input_hasalias()) { zwarn(...); YYERROR(...); }`
1966 // Alias-as-funcdef warning. zshrs's parser doesn't
1967 // track `hasalias` (alias-expansion provenance
1968 // during parse) yet, so `had_alias` stays false —
1969 // the gate is wired here as a marker so the canonical
1970 // C predicate is visible. Once alias-provenance lands,
1971 // swap `false` for the actual provenance compare.
1972 let had_alias = false;
1973 if isset(EXECOPT) && had_alias && !isset(ALIASFUNCDEF) && !words.is_empty() {
1974 crate::ported::utils::zwarn("defining function based on alias `(unknown)'");
1975 return None;
1976 }
1977 // foo() { ... } style function
1978 return parse_inline_funcdef(words.pop().unwrap());
1979 }
1980 _ => break,
1981 }
1982 }
1983
1984 if assigns.is_empty() && words.is_empty() && redirs.is_empty() {
1985 return None;
1986 }
1987
1988 Some(ZshCommand::Simple(ZshSimple {
1989 assigns,
1990 words,
1991 redirs,
1992 }))
1993}
1994
1995/// Parse a redirection
1996/// Parse a redirection (>file, <file, >>file, <<HEREDOC, etc.).
1997/// Direct port of zsh/Src/parse.c:2229 `par_redir`. Returns
1998/// a ZshRedir node carrying the operator type, fd, target word
1999/// (or here-doc body / pipe-redir command), and any `{var}` style
2000/// fd-binding parameter.
2001fn par_redir() -> Option<ZshRedir> {
2002 par_redir_with_id(None)
2003}
2004
2005/// Wire a here-document body onto the redirection token that
2006/// requested it. Direct port of zsh/Src/parse.c:2347
2007/// `setheredoc`. Called when a heredoc terminator has been
2008/// matched and the body is ready to be attached to the redir.
2009///
2010/// zshrs port note: zsh's setheredoc patches the wordcode
2011/// in-place via `pc[1] = ecstrcode(doc); pc[2] = ecstrcode(term);`.
2012/// zshrs threads heredoc bodies through `HereDocInfo` structs
2013/// attached inline during the post-parse `fill_heredoc_bodies` walk.
2014/// This method is the AST-side equivalent: writes back to the
2015/// matching redir node by index.
2016/// Port of `setheredoc(int pc, int type, char *str, char *termstr,
2017/// char *munged_termstr)` from `Src/parse.c:2347-2355`. Patches the
2018/// pending heredoc redir at `pc` with its body string + raw and
2019/// munged terminator forms.
2020pub fn setheredoc(pc: usize, redir_type: i32, doc: &str, term: &str, munged_term: &str) {
2021 // zshrs-only guard: AST-path heredocs use `pc = -1 as usize`
2022 // (i.e. `usize::MAX`) as a sentinel meaning "no wordcode slot to
2023 // patch". C never passes a negative pc since the wordcode emitter
2024 // is always active. Skip silently for the AST-only case.
2025 if pc == usize::MAX {
2026 return;
2027 }
2028 // c:2350 — `int varid = WC_REDIR_VARID(ecbuf[pc]) ? REDIR_VARID_MASK : 0;`
2029 let cur = ECBUF.with_borrow(|b| b.get(pc).copied().unwrap_or(0));
2030 let varid = if WC_REDIR_VARID(cur) != 0 {
2031 REDIR_VARID_MASK
2032 } else {
2033 0
2034 };
2035 // c:2351 — `ecbuf[pc] = WCB_REDIR(type | REDIR_FROM_HEREDOC_MASK | varid);`
2036 let new_header = WCB_REDIR((redir_type | REDIR_FROM_HEREDOC_MASK | varid) as wordcode);
2037 // c:2352 — `ecbuf[pc + 2] = ecstrcode(str);`
2038 let coded_str = ecstrcode(doc);
2039 // c:2353 — `ecbuf[pc + 3] = ecstrcode(termstr);`
2040 let coded_term = ecstrcode(term);
2041 // c:2354 — `ecbuf[pc + 4] = ecstrcode(munged_termstr);`
2042 let coded_munged = ecstrcode(munged_term);
2043 ECBUF.with_borrow_mut(|b| {
2044 b[pc] = new_header;
2045 b[pc + 2] = coded_str;
2046 b[pc + 3] = coded_term;
2047 b[pc + 4] = coded_munged;
2048 });
2049}
2050
2051/// Parse a wordlist for `for ... in WORDS;`. Direct port of
2052/// zsh/Src/parse.c:2362 `par_wordlist`. Reads STRING tokens
2053/// until the next SEPER / SEMI / NEWLIN.
2054pub fn par_wordlist() -> Vec<String> {
2055 let mut out = Vec::new();
2056 // parse.c:2362-2378 — collect STRINGs into the wordlist.
2057 while tok() == STRING_LEX {
2058 if let Some(text) = tokstr() {
2059 out.push(text);
2060 }
2061 zshlex();
2062 }
2063 out
2064}
2065
2066/// Parse a newline-separated wordlist. Direct port of
2067/// zsh/Src/parse.c:2379 `par_nl_wordlist`. Like
2068/// par_wordlist but tolerates leading/trailing newlines.
2069pub fn par_nl_wordlist() -> Vec<String> {
2070 // parse.c:2380-2381 — skip leading newlines.
2071 while tok() == NEWLIN {
2072 zshlex();
2073 }
2074 let out = par_wordlist();
2075 // parse.c:2395-2397 — skip trailing newlines.
2076 while tok() == NEWLIN {
2077 zshlex();
2078 }
2079 out
2080}
2081
2082/// `COND_SEP()` macro from `Src/parse.c:2433`. True when the current
2083/// token is a separator usable inside `[[ … ]]` (newline / semi /
2084/// `&`). C uses it to skip optional whitespace between cond terms.
2085#[inline]
2086pub fn COND_SEP() -> bool {
2087 matches!(tok(), NEWLIN | SEMI | AMPER)
2088}
2089
2090/// Parse [[ ... ]] conditional
2091/// Parse `[[ EXPR ]]` conditional expression. Direct port of
2092/// zsh/Src/parse.c:2409 `par_cond` (and helpers par_cond_1,
2093/// par_cond_2, par_cond_double, par_cond_triple, par_cond_multi
2094/// at parse.c:2434-2731). Expression operators: `||` `&&` `!`
2095/// + unary tests (-f, -d, -n, -z, etc.) + binary tests (=, !=,
2096/// <, >, ==, =~, -eq, -ne, -lt, -le, -gt, -ge, -nt, -ot, -ef).
2097fn par_cond() -> Option<ZshCommand> {
2098 // C par_dinbrack (parse.c:1810-1822) wraps the body parse with
2099 // `incond = 1; incmdpos = 0;` BEFORE the first zshlex past `[[`,
2100 // and resets to `incond = 0; incmdpos = 1;` after `]]`. Without
2101 // `incond = 1`, lex.c does not promote `]]` to DOUTBRACK and the
2102 // cond body bleeds past the close bracket — the parser then
2103 // sees `]]` as a separate STRING command. Every `if [[ ... ]]; then`
2104 // failed with `command not found: ]]` before this fix.
2105 set_incond(1);
2106 set_incmdpos(false);
2107 zshlex(); // skip [[
2108 // Empty cond `[[ ]]` is a parse error in zsh — emit the
2109 // diagnostic and return None so the caller produces a
2110 // non-zero exit. Without this, `[[ ]]` silently passed and
2111 // returned exit 0.
2112 if tok() == DOUTBRACK {
2113 crate::ported::utils::zerr("parse error near `]]'");
2114 set_incond(0);
2115 set_incmdpos(true);
2116 zshlex();
2117 return None;
2118 }
2119 let cond = parse_cond_expr();
2120
2121 if tok() == DOUTBRACK {
2122 set_incond(0);
2123 set_incmdpos(true);
2124 zshlex();
2125 } else {
2126 // Recover incond/incmdpos so subsequent parsing isn't stuck
2127 // in cond-mode if the close bracket is missing.
2128 set_incond(0);
2129 set_incmdpos(true);
2130 }
2131
2132 cond.map(ZshCommand::Cond)
2133}
2134
2135/// Port of `par_cond_1(void)` from `Src/parse.c:2434`. Parses one
2136/// `||`-separated cond expression. Emits `WCB_COND(COND_AND, …)`
2137/// when an `&&` is found and recurses.
2138pub fn par_cond_1() -> i32 {
2139 // c:2434
2140
2141 let p = ECUSED.with(|c| c.get()) as usize;
2142 let r = par_cond_2();
2143 while COND_SEP() {
2144 condlex();
2145 }
2146 if tok() == DAMPER {
2147 condlex();
2148 while COND_SEP() {
2149 condlex();
2150 }
2151 ecispace(p, 1);
2152 par_cond_1();
2153 let ecused = ECUSED.with(|c| c.get()) as usize;
2154 ECBUF.with(|c| {
2155 c.borrow_mut()[p] = WCB_COND(COND_AND as u32, (ecused - 1 - p) as u32);
2156 });
2157 return 1;
2158 }
2159 r
2160}
2161
2162/// Port of `par_cond_2(void)` from `Src/parse.c:2476`. The heavy
2163/// cond-term parser: handles `! cond`, `(cond)`, unary `[ -X arg ]`,
2164/// binary `[ A op B ]`, and `[ A op1 B op2 C … ]` n-ary chains.
2165pub fn par_cond_2() -> i32 {
2166 // c:2476
2167 // `n_testargs` only applies in `testlex` mode (=== /bin/test
2168 // compat). zshrs has no testlex yet, so always 0.
2169 let n_testargs: i32 = 0;
2170
2171 // c:2481 — handled inline; this Rust port skips the n_testargs
2172 // arm since zshrs invokes par_cond via [[ ... ]] only.
2173
2174 while COND_SEP() {
2175 condlex();
2176 }
2177 if tok() == BANG_TOK {
2178 // c:2522 — `[[ ! cond ]]`
2179 condlex();
2180 ecadd(WCB_COND(COND_NOT as u32, 0));
2181 return par_cond_2();
2182 }
2183 if tok() == INPAR_TOK {
2184 // c:2533 — `[[ (cond) ]]`
2185 condlex();
2186 while COND_SEP() {
2187 condlex();
2188 }
2189 let r = par_cond();
2190 while COND_SEP() {
2191 condlex();
2192 }
2193 if tok() != OUTPAR_TOK {
2194 yyerror("missing )");
2195 return 0;
2196 }
2197 condlex();
2198 return r.map_or(0, |_| 1);
2199 }
2200 let s1 = tokstr().unwrap_or_default();
2201 // c:2549 — `dble = (s1 && IS_DASH(*s1) && (!n_testargs ||
2202 // strspn(s1+1, "abcd...") == 1) && !s1[2]);` — IS_DASH covers
2203 // BOTH `-` and Dash (`\u{9b}`). The raw tokstr inside `[[ ... ]]`
2204 // carries Dash as a marker byte, so `starts_with('-')` alone
2205 // matches only ASCII dashes and misses every `-z`, `-d`, `-r`
2206 // etc. — every such cond emitted the AST-only `condition
2207 // expected` error from par_cond_double. Use IS_DASH and count
2208 // chars (Dash is a single code point) instead of bytes.
2209 let s1_chars: Vec<char> = s1.chars().collect();
2210 let dble = !s1_chars.is_empty()
2211 && IS_DASH(s1_chars[0])
2212 && s1_chars.len() == 2
2213 && "abcdefghknoprstuvwxzLONGS".contains(s1_chars[1]);
2214 if tok() != STRING_LEX {
2215 if !s1.is_empty() && tok() != LEXERR && (!dble || n_testargs != 0) {
2216 // c:2486-2497 — `if (n_testargs == 1)` block: under
2217 // POSIXBUILTINS-off, `[ -t ]` rewrites to `[ -t 1 ]`
2218 // (ksh behavior). The C gate is `unset(POSIXBUILTINS)
2219 // && check_cond(s1, "t")`. zshrs's parser has
2220 // n_testargs=0 (no testlex), so this rewrite path is
2221 // unreachable from zshrs's [[ ]] / [ ] entry points;
2222 // wired here as a marker for parity. When testlex is
2223 // ported the call below activates.
2224 if n_testargs == 1 && unset(POSIXBUILTINS) && check_cond(&s1, "t") {
2225 condlex();
2226 return par_cond_double(&s1, "1");
2227 }
2228 // c:2557 — `[[ STRING ]]` re-interpreted as `[[ -n STRING ]]`.
2229 condlex();
2230 while COND_SEP() {
2231 condlex();
2232 }
2233 return par_cond_double("-n", &s1);
2234 }
2235 yyerror("condition expected");
2236 return 0;
2237 }
2238 condlex();
2239 while COND_SEP() {
2240 condlex();
2241 }
2242 if tok() == INANG_TOK || tok() == OUTANG_TOK {
2243 // c:2576 — `<` / `>` string compare.
2244 let xtok = tok();
2245 condlex();
2246 while COND_SEP() {
2247 condlex();
2248 }
2249 if tok() != STRING_LEX {
2250 yyerror("string expected");
2251 return 0;
2252 }
2253 let s3 = tokstr().unwrap_or_default();
2254 condlex();
2255 while COND_SEP() {
2256 condlex();
2257 }
2258 let op = if xtok == INANG_TOK {
2259 COND_STRLT
2260 } else {
2261 COND_STRGTR
2262 };
2263 ecadd(WCB_COND(op as u32, 0));
2264 ecstr(&s1);
2265 ecstr(&s3);
2266 return 1;
2267 }
2268 if tok() != STRING_LEX {
2269 // c:2592 — only one operand seen → `[ -n s1 ]`.
2270 if tok() != LEXERR {
2271 if !dble || n_testargs != 0 {
2272 return par_cond_double("-n", &s1);
2273 }
2274 return par_cond_multi(&s1, &[]);
2275 }
2276 yyerror("syntax error");
2277 return 0;
2278 }
2279 let s2 = tokstr().unwrap_or_default();
2280 set_incond(incond() + 1);
2281 condlex();
2282 while COND_SEP() {
2283 condlex();
2284 }
2285 set_incond(incond() - 1);
2286 if tok() == STRING_LEX && !dble {
2287 let s3 = tokstr().unwrap_or_default();
2288 condlex();
2289 while COND_SEP() {
2290 condlex();
2291 }
2292 if tok() == STRING_LEX {
2293 // c:2615 — n-ary `[ A op B C D ... ]`.
2294 let mut l: Vec<String> = vec![s2, s3];
2295 while tok() == STRING_LEX {
2296 l.push(tokstr().unwrap_or_default());
2297 condlex();
2298 while COND_SEP() {
2299 condlex();
2300 }
2301 }
2302 return par_cond_multi(&s1, &l);
2303 }
2304 return par_cond_triple(&s1, &s2, &s3);
2305 }
2306 par_cond_double(&s1, &s2)
2307}
2308
2309/// Port of `par_cond_double(char *a, char *b)` from `Src/parse.c:2626`.
2310/// Emits wordcode for unary cond `[ -X b ]` or modular `[ -mod b ]`.
2311pub fn par_cond_double(a: &str, b: &str) -> i32 {
2312 // c:2628 — `if (!IS_DASH(a[0]) || !a[1])` — char-based, since
2313 // Dash is a single code point (`\u{9b}`) and `a.len() < 2` on
2314 // BYTES would still pass for "-z" but fail for the marker form
2315 // `\u{9b}z` (2 bytes). Walk by chars.
2316 let ac: Vec<char> = a.chars().collect();
2317 if ac.is_empty() || !IS_DASH(ac[0]) || ac.len() < 2 {
2318 crate::ported::utils::zerr(&format!("parse error: condition expected: {}", a));
2319 return 1;
2320 }
2321 // c:2630 — `else if (!a[2] && strspn(a+1, "abcd...zhLONGS") == 1)`
2322 let unary_set = "abcdefgknoprstuvwxzhLONGS";
2323 if ac.len() == 2 && unary_set.contains(ac[1]) {
2324 // c:2631 — `ecadd(WCB_COND(a[1], 0));` uses the raw cond-op
2325 // letter byte as the opcode payload. Use the ASCII char's
2326 // code-point value directly — every letter in `unary_set`
2327 // fits in 7 bits.
2328 ecadd(WCB_COND(ac[1] as u32, 0));
2329 ecstr(b);
2330 } else {
2331 ecadd(WCB_COND(COND_MOD as u32, 1));
2332 ecstr(a);
2333 ecstr(b);
2334 }
2335 1
2336}
2337
2338/// Port of `get_cond_num(char *tst)` from `Src/parse.c:2643`. Returns
2339/// the index of `tst` in `{"nt","ot","ef","eq","ne","lt","gt","le","ge"}`
2340/// or `-1` if not a recognized binary cond operator.
2341pub fn get_cond_num(tst: &str) -> i32 {
2342 // c:2643
2343 const CONDSTRS: [&str; 9] = [
2344 "nt", "ot", "ef", "eq", "ne", "lt", "gt", "le", "ge", // c:2647
2345 ];
2346 for (i, &c) in CONDSTRS.iter().enumerate() {
2347 if c == tst {
2348 return i as i32; // c:2654
2349 }
2350 }
2351 -1 // c:2656
2352}
2353
2354/// par_time's `static int inpartime` guard at C parse.c:1038
2355/// preventing infinite recursion on `time time foo`. The wordcode
2356/// path keeps this as a thread_local since C uses a function-level
2357/// `static int` (per-process; per-evaluator semantically matches).
2358thread_local! {
2359 static PARSER_INPARTIME: std::cell::Cell<bool> = const { std::cell::Cell::new(false) };
2360}
2361
2362/// Port of `par_cond_triple(char *a, char *b, char *c)` from
2363/// `Src/parse.c:2659`. Emits wordcode for the binary forms
2364/// `[ A op B ]` — `=` / `==` / `!=` / `<` / `>` / `=~` / `-X`.
2365///
2366/// C does `(b[0] == Equals || b[0] == '=')` etc., matching BOTH the
2367/// raw ASCII operator char AND its tokenized marker form per
2368/// `Src/zsh.h:159-194`:
2369/// Equals = `\u{8d}`, Outang = `\u{95}`, Inang = `\u{94}`,
2370/// Tilde = `\u{98}`, Bang = `\u{9c}`, Dash = `\u{9b}`.
2371/// Inside `[[ ... ]]` the lexer emits the marker bytes — comparing
2372/// against literal-only `b"=="` misses every cond op.
2373/// (The previous Rust port had the doc comment values wrong:
2374/// Outang=0x8e was actually Bar; Inang=0x91 was Inbrack;
2375/// Tilde=0x96 was OutangProc; Bang=0x8b was Outparmath. The code
2376/// itself uses the correct const names, so this was a docs-only fix.)
2377pub fn par_cond_triple(a: &str, b: &str, c: &str) -> i32 {
2378 // c:2659
2379 let bc: Vec<char> = b.chars().collect();
2380 let is_eq = |ch: char| ch == '=' || ch == Equals;
2381 let is_gt = |ch: char| ch == '>' || ch == Outang;
2382 let is_lt = |ch: char| ch == '<' || ch == Inang;
2383 let is_tilde = |ch: char| ch == '~' || ch == Tilde;
2384 let is_bang = |ch: char| ch == '!' || ch == Bang;
2385
2386 // c:2663 — `(b[0] == Equals || b[0] == '=') && !b[1]` → `=` (single).
2387 if bc.len() == 1 && is_eq(bc[0]) {
2388 ecadd(WCB_COND(COND_STREQ as u32, 0));
2389 ecstr(a);
2390 ecstr(c);
2391 let np = ECNPATS.with(|cc| { let v = cc.get(); cc.set(v + 1); v }) as u32;
2392 ecadd(np);
2393 return 1;
2394 }
2395 // c:2668-2673 — `(t0 = b[0]=='>' || Outang) || b[0]=='<' || Inang`.
2396 if bc.len() == 1 && (is_gt(bc[0]) || is_lt(bc[0])) {
2397 let op = if is_gt(bc[0]) { COND_STRGTR } else { COND_STRLT };
2398 ecadd(WCB_COND(op as u32, 0));
2399 ecstr(a);
2400 ecstr(c);
2401 let np = ECNPATS.with(|cc| { let v = cc.get(); cc.set(v + 1); v }) as u32;
2402 ecadd(np);
2403 return 1;
2404 }
2405 // c:2674-2679 — `==` STRDEQ.
2406 if bc.len() == 2 && is_eq(bc[0]) && is_eq(bc[1]) {
2407 ecadd(WCB_COND(COND_STRDEQ as u32, 0));
2408 ecstr(a);
2409 ecstr(c);
2410 let np = ECNPATS.with(|cc| { let v = cc.get(); cc.set(v + 1); v }) as u32;
2411 ecadd(np);
2412 return 1;
2413 }
2414 // c:2680-2684 — `!=` STRNEQ.
2415 if bc.len() == 2 && is_bang(bc[0]) && is_eq(bc[1]) {
2416 ecadd(WCB_COND(COND_STRNEQ as u32, 0));
2417 ecstr(a);
2418 ecstr(c);
2419 let np = ECNPATS.with(|cc| { let v = cc.get(); cc.set(v + 1); v }) as u32;
2420 ecadd(np);
2421 return 1;
2422 }
2423 // c:2685-2691 — `=~` REGEX (no pattern slot — implicit COND_MODI).
2424 if bc.len() == 2 && is_eq(bc[0]) && is_tilde(bc[1]) {
2425 ecadd(WCB_COND(COND_REGEX as u32, 0));
2426 ecstr(a);
2427 ecstr(c);
2428 return 1;
2429 }
2430 // c:2692-2702 — `-OP` numeric-or-modular cond (e.g. `-eq`, `-nt`).
2431 if !bc.is_empty() && IS_DASH(bc[0]) {
2432 let rest: String = bc[1..].iter().collect();
2433 let t = get_cond_num(&rest);
2434 if t > -1 {
2435 ecadd(WCB_COND((t + COND_NT) as u32, 0));
2436 ecstr(a);
2437 ecstr(c);
2438 return 1;
2439 }
2440 ecadd(WCB_COND(COND_MODI as u32, 0));
2441 ecstr(b);
2442 ecstr(a);
2443 ecstr(c);
2444 return 1;
2445 }
2446 // c:2703-2707 — `-mod A B C` modular cond on `a`.
2447 let ac: Vec<char> = a.chars().collect();
2448 if !ac.is_empty() && IS_DASH(ac[0]) && ac.len() > 1 {
2449 ecadd(WCB_COND(COND_MOD as u32, 2));
2450 ecstr(a);
2451 ecstr(b);
2452 ecstr(c);
2453 return 1;
2454 }
2455 crate::ported::utils::zerr(&format!("condition expected: {}", b));
2456 1
2457}
2458
2459/// Port of `par_cond_multi(char *a, LinkList l)` from `Src/parse.c:2716`.
2460/// Emits wordcode for `[ -OP A B C … ]` n-ary cond (alternation).
2461pub fn par_cond_multi(a: &str, l: &[String]) -> i32 {
2462 // c:2716 — `if (!IS_DASH(a[0]) || !a[1])`; same Dash/`-` dual
2463 // matching as par_cond_double, char-walked because Dash is a
2464 // single code point.
2465 let ac: Vec<char> = a.chars().collect();
2466 if ac.is_empty() || !IS_DASH(ac[0]) || ac.len() < 2 {
2467 crate::ported::utils::zerr(&format!("condition expected: {}", a));
2468 return 1;
2469 }
2470 ecadd(WCB_COND(COND_MOD as u32, l.len() as u32));
2471 ecstr(a);
2472 for item in l {
2473 ecstr(item);
2474 }
2475 1
2476}
2477
2478/// Emit a parser-level error. Direct port of zsh/Src/parse.c
2479/// 2733-2766 `yyerror`. C version fills a per-event error buffer
2480/// and sets errflag. zshrs pushes onto errors which the
2481/// caller drains via parse()'s Result return.
2482/// WARNING: param-name divergence — Rust takes `&str message`, C takes
2483/// `int noerr`. The Rust callers pass user-meaningful messages
2484/// (`"missing ]]"`, `"condition expected"`); the C body collects the
2485/// offending token via `dupstring(zshlextext)` for the error string.
2486/// This Rust adapter:
2487/// 1. Uses the caller-supplied message verbatim if non-empty.
2488/// 2. Skips the `histdone & HISTFLAG_NOEXEC` and `errflag & ERRFLAG_INT`
2489/// gates per c:2746 (printing only when neither is set) — the
2490/// ERRFLAG_INT check is the load-bearing guard.
2491/// 3. Sets ERRFLAG_ERROR per c:2753 (noerr=0 path always taken).
2492pub fn yyerror(msg: &str) { // c:2733
2493 let int_flagged = (crate::ported::utils::errflag.load(std::sync::atomic::Ordering::SeqCst)
2494 & crate::ported::zsh_h::ERRFLAG_INT) != 0;
2495 if !int_flagged { // c:2746
2496 let body = if msg.is_empty() { "parse error".to_string() } // c:2751
2497 else { format!("parse error: {msg}") }; // c:2748
2498 crate::ported::utils::zwarnnam("zsh", &body);
2499 }
2500 // c:2753 — `if (!noerr && noerrs != 2) errflag |= ERRFLAG_ERROR;`
2501 crate::ported::utils::errflag.fetch_or(
2502 crate::ported::zsh_h::ERRFLAG_ERROR,
2503 std::sync::atomic::Ordering::SeqCst);
2504}
2505
2506// ============================================================
2507// Eprog runtime ops (parse.c:2767-2853)
2508//
2509// dupeprog / useeprog / freeeprog are zsh's reference-counting
2510// helpers for executable programs. zshrs's AST is owned by
2511// value (Rust ownership); cloning is a tree-deep copy via
2512// Clone, "use" is a no-op (the executor borrows the AST), and
2513// "free" is automatic on drop.
2514// ============================================================
2515
2516/// Duplicate an Eprog. Direct port of zsh/Src/parse.c:2813
2517/// Port of `Eprog dupeprog(Eprog p, int heap)` from
2518/// `Src/parse.c:2767`. Deep-copies the wordcode array, string
2519/// table, and pattern-prog slots. `dummy_eprog` is returned
2520/// unchanged. `heap`-allocated copies get `nref = -1` (never
2521/// freed); real ones get `nref = 1`.
2522pub fn dupeprog(p: &crate::ported::zsh_h::eprog, heap: bool) -> crate::ported::zsh_h::eprog {
2523 // c:2774-2775 — `if (p == &dummy_eprog) return p;` — caller-
2524 // observable identity in C uses a pointer compare; Rust's
2525 // equivalent is "if it has the dummy's shape (single WCB_END
2526 // word and no strs), return a copy of the same shape".
2527 // c:2796-2797 — `for (i = r->npats; i--; pp++) *pp = dummy_patprog1;`
2528 // C uses `dummy_patprog1` as a placeholder; the Rust port has
2529 // `Vec<Patprog>` (Box<patprog>) — synthesize an equivalent zero-
2530 // initialized patprog for each slot (resolved later by
2531 // pattern.c::patcompile-on-first-use).
2532 let dummy_pat = || crate::ported::zsh_h::patprog {
2533 startoff: 0,
2534 size: 0,
2535 mustoff: 0,
2536 patmlen: 0,
2537 globflags: 0,
2538 globend: 0,
2539 flags: 0,
2540 patnpar: 0,
2541 patstartch: 0,
2542 };
2543 let r = crate::ported::zsh_h::eprog {
2544 // c:2778 — `flags = (heap ? EF_HEAP : EF_REAL) | (p->flags & EF_RUN);`
2545 flags: (if heap { EF_HEAP } else { EF_REAL }) | (p.flags & EF_RUN),
2546 len: p.len,
2547 npats: p.npats,
2548 // c:2787 — `nref = heap ? -1 : 1;`
2549 nref: if heap { -1 } else { 1 },
2550 prog: p.prog.clone(),
2551 strs: p.strs.clone(),
2552 pats: (0..p.npats).map(|_| Box::new(dummy_pat())).collect(),
2553 shf: None,
2554 dump: None,
2555 };
2556 r
2557}
2558
2559/// Port of `void useeprog(Eprog p)` from `Src/parse.c:2813`.
2560/// `if (p && p != &dummy_eprog && p->nref >= 0) p->nref++;` —
2561/// pin a real (non-heap, non-dummy) Eprog so it survives the
2562/// next `freeeprog`.
2563pub fn useeprog(p: &mut crate::ported::zsh_h::eprog) {
2564 // c:2815 — `if (p && p != &dummy_eprog && p->nref >= 0)`
2565 if p.nref >= 0 {
2566 p.nref += 1; // c:2816
2567 }
2568}
2569
2570/// Port of `void freeeprog(Eprog p)` from `Src/parse.c:2823`.
2571/// Refcount-decrement; when it hits zero, drops the pattern progs,
2572/// decrements the dump refcount if any, and releases the eprog.
2573/// `dummy_eprog` is never freed. Heap-eprogs (`nref < 0`) are
2574/// never freed either — they live as long as the heap arena.
2575pub fn freeeprog(p: &mut crate::ported::zsh_h::eprog) {
2576 // c:2829 — `if (p && p != &dummy_eprog) { ... }`
2577 if p.nref > 0 {
2578 p.nref -= 1; // c:2832
2579 if p.nref == 0 {
2580 // c:2833-2840 — drop pats, dump refcount, then the eprog.
2581 // Rust's Drop handles the per-field cleanup; we just
2582 // need to decrement the dump count first.
2583 if let Some(dump) = p.dump.take() {
2584 let dumped = (*dump).clone();
2585 decrdumpcount(&dumped); // c:2837
2586 }
2587 p.prog.clear();
2588 p.strs = None;
2589 p.pats.clear();
2590 }
2591 }
2592}
2593
2594// =============================================================================
2595// Wordcode read helpers — used by text.rs's `gettext2` and exec dispatch
2596// to walk a compiled Eprog without re-running the parser. These are the
2597// only `Src/parse.c` functions ported so far in this file; the recursive-
2598// descent parser (par_event / par_list / par_cmd / par_*) follows
2599// below as free fns at module scope.
2600// =============================================================================
2601
2602/// Port of `ecgetstr(Estate s, int dup, int *tokflag)` from `Src/parse.c:2855`.
2603/// `s->pc` advances through the wordcode buffer; `s->strs` indexes the
2604/// string pool. Returns the interned string (or a 1-3-char literal
2605/// inlined directly into the wordcode word).
2606pub fn ecgetstr(s: &mut estate, dup: i32, tokflag: Option<&mut i32>) -> String {
2607 let prog = &s.prog.prog;
2608 if s.pc >= prog.len() {
2609 return String::new();
2610 }
2611 let c = prog[s.pc]; // c:2858 `wordcode c = *s->pc++;`
2612 s.pc += 1;
2613 if let Some(tf) = tokflag {
2614 *tf = i32::from((c & 1) != 0); // c:2880 `*tokflag = (c & 1);`
2615 }
2616 if c == 6 || c == 7 {
2617 // c:2861 `if (c == 6 || c == 7) r = "";`
2618 return String::new();
2619 }
2620 let r: String = if (c & 2) != 0 {
2621 // c:2862 — `else if (c & 2)`
2622 // c:2863-2868 — 3-byte inline string packed into the wordcode
2623 // word; followed by `buf[3] = '\0'; r = dupstring(buf);`.
2624 // C's `dupstring` uses `strlen(buf)` which TRUNCATES at the
2625 // first NUL byte — short strings of 1 or 2 chars get padded
2626 // with NULs and truncated cleanly. The previous Rust port
2627 // used `retain(|&x| x != 0)` which would silently SPLICE OUT
2628 // an interior NUL (e.g. `[a, 0, b]` → "ab"), diverging from
2629 // C's strlen-truncate (`[a, 0, b]` → "a"). Fix: truncate at
2630 // first NUL to match C exactly.
2631 let b0 = ((c >> 3) & 0xff) as u8;
2632 let b1 = ((c >> 11) & 0xff) as u8;
2633 let b2 = ((c >> 19) & 0xff) as u8;
2634 let v = [b0, b1, b2];
2635 let end = v.iter().position(|&x| x == 0).unwrap_or(v.len()); // c:2869 strlen(buf)
2636 String::from_utf8_lossy(&v[..end]).into_owned()
2637 } else {
2638 // c:2877 `else r = s->strs + (c >> 2);`
2639 let off = (c >> 2) as usize + s.strs_offset;
2640 let strs_bytes = s.strs.as_deref().unwrap_or("").as_bytes();
2641 if off >= strs_bytes.len() {
2642 String::new()
2643 } else {
2644 let tail = &strs_bytes[off..];
2645 let end = tail.iter().position(|&b| b == 0).unwrap_or(tail.len());
2646 String::from_utf8_lossy(&tail[..end]).into_owned()
2647 }
2648 };
2649 // c:2891 `return ((dup == EC_DUP || (dup && (c & 1))) ? dupstring(r) : r);`
2650 // Rust owns the String already; `dup` flag has no observable effect.
2651 let _ = (dup, EC_DUP, EC_NODUP);
2652 r
2653}
2654
2655// ============================================================
2656// Wordcode runtime getters (parse.c:2853-3060)
2657//
2658// Direct ports of the wordcode-read helpers (ecrawstr,
2659// ecgetstr, ecgetarr, ecgetredirs, ecgetlist, eccopyredirs).
2660// Read packed wordcode out of an Eprog at execution time.
2661// Used by exec_wordcode and the wordcode-walking dispatch in
2662// src/exec.rs.
2663// ============================================================
2664
2665/// Port of `ecrawstr(Eprog p, Wordcode pc, int *tokflag)` from
2666/// `Src/parse.c:2891`. Like `ecgetstr` but reads at the given pc
2667/// without advancing — caller steps `pc` separately.
2668pub fn ecrawstr(p: &eprog, pc: usize, tokflag: Option<&mut i32>) -> String {
2669 if pc >= p.prog.len() {
2670 return String::new();
2671 }
2672 let c = p.prog[pc]; // c:2894
2673 if let Some(tf) = tokflag {
2674 *tf = i32::from((c & 1) != 0); // c:2898/2906/2912
2675 }
2676 if c == 6 || c == 7 {
2677 // c:2897
2678 return String::new();
2679 }
2680 if (c & 2) != 0 {
2681 // c:2902-2906 — same 3-byte inline string as ecgetstr, then
2682 // `buf[3] = '\0'; return dupstring(buf);` — truncate at first
2683 // NUL via strlen (NOT splice out interior NULs).
2684 let b0 = ((c >> 3) & 0xff) as u8;
2685 let b1 = ((c >> 11) & 0xff) as u8;
2686 let b2 = ((c >> 19) & 0xff) as u8;
2687 let v = [b0, b1, b2];
2688 let end = v.iter().position(|&x| x == 0).unwrap_or(v.len()); // c:2906 strlen(buf)
2689 String::from_utf8_lossy(&v[..end]).into_owned()
2690 } else {
2691 // c:2911
2692 let off = (c >> 2) as usize;
2693 let strs_bytes = p.strs.as_deref().unwrap_or("").as_bytes();
2694 if off >= strs_bytes.len() {
2695 return String::new();
2696 }
2697 let tail = &strs_bytes[off..];
2698 let end = tail.iter().position(|&b| b == 0).unwrap_or(tail.len());
2699 String::from_utf8_lossy(&tail[..end]).into_owned()
2700 }
2701}
2702
2703/// Port of `ecgetarr(Estate s, int num, int dup, int *tokflag)` from
2704/// `Src/parse.c:2917`. Reads `num` strings from wordcode at `s->pc`
2705/// and OR-folds each entry's token flag into `*tokflag`.
2706pub fn ecgetarr(s: &mut estate, num: usize, dup: i32, tokflag: Option<&mut i32>) -> Vec<String> {
2707 let mut ret: Vec<String> = Vec::with_capacity(num); // c:2922
2708 let mut tf: i32 = 0;
2709 for _ in 0..num {
2710 // c:2924 `while (num--)`
2711 let mut tmp = 0;
2712 ret.push(ecgetstr(s, dup, Some(&mut tmp))); // c:2925
2713 tf |= tmp; // c:2926
2714 }
2715 if let Some(out) = tokflag {
2716 // c:2929
2717 *out = tf;
2718 }
2719 ret
2720}
2721
2722/// Port of `ecgetlist(Estate s, int num, int dup, int *tokflag)` from
2723/// `Src/parse.c:2937`. Same shape as `ecgetarr` but C returns
2724/// `LinkList`; zshrs uses `Vec<String>` for both.
2725pub fn ecgetlist(
2726 s: &mut crate::ported::zsh_h::estate,
2727 num: usize,
2728 dup: i32,
2729 tokflag: Option<&mut i32>,
2730) -> Vec<String> {
2731 if num == 0 {
2732 // c:2949-2952
2733 if let Some(tf) = tokflag {
2734 *tf = 0;
2735 }
2736 return Vec::new();
2737 }
2738 ecgetarr(s, num, dup, tokflag)
2739}
2740
2741/// Port of `ecgetredirs(Estate s)` from `Src/parse.c:2959`.
2742///
2743/// `strs` must be the same tail `ecgetstr` uses (`s->strs` / `estate.strs` from offset).
2744/// WARNING: param names don't match C — Rust=(prog, strs, pc) vs C=(s)
2745pub fn ecgetredirs(s: &mut estate) -> Vec<redir> {
2746 let mut ret: Vec<redir> = Vec::new(); // c:2959 `LinkList ret = newlinklist();`
2747 let prog_len = s.prog.prog.len();
2748 if s.pc >= prog_len {
2749 return ret;
2750 }
2751 let mut code = s.prog.prog[s.pc]; // c:2962 `wordcode code = *s->pc++;`
2752 s.pc += 1;
2753
2754 loop {
2755 if wc_code(code) != WC_REDIR {
2756 // c:2988-2989 `s->pc--` then break from while
2757 s.pc = s.pc.saturating_sub(1);
2758 break;
2759 }
2760
2761 let typ = WC_REDIR_TYPE(code); // c:2967 `r->type = WC_REDIR_TYPE(code);`
2762 if s.pc >= prog_len {
2763 break;
2764 }
2765 let fd1_w = s.prog.prog[s.pc]; // c:2968 `r->fd1 = *s->pc++;`
2766 s.pc += 1;
2767
2768 let name = ecgetstr(s, EC_DUP, None); // c:2969 `r->name = ecgetstr(...)`
2769
2770 let (flags, here_terminator, munged_here_terminator) = if WC_REDIR_FROM_HEREDOC(code) != 0 {
2771 // c:2970-2973
2772 let term = ecgetstr(s, EC_DUP, None);
2773 let munged = ecgetstr(s, EC_DUP, None);
2774 (REDIRF_FROM_HEREDOC, Some(term), Some(munged))
2775 } else {
2776 // c:2974-2977
2777 (0, None, None)
2778 };
2779
2780 let varid = if WC_REDIR_VARID(code) != 0 {
2781 // c:2979-2980
2782 Some(ecgetstr(s, EC_DUP, None))
2783 } else {
2784 None // c:2981-2982
2785 };
2786
2787 ret.push(redir {
2788 // c:2965-2982 fields + c:2984 `addlinknode`
2789 typ,
2790 flags,
2791 fd1: fd1_w as i32,
2792 fd2: 0,
2793 name: Some(name),
2794 varid,
2795 here_terminator,
2796 munged_here_terminator,
2797 });
2798
2799 if s.pc >= prog_len {
2800 break;
2801 }
2802 code = s.prog.prog[s.pc]; // c:2986 `code = *s->pc++;`
2803 s.pc += 1;
2804 }
2805
2806 ret // c:2990 `return ret`
2807}
2808
2809/// Port of `eccopyredirs(Estate s)` from `Src/parse.c:3003`. Reads
2810/// the WC_REDIR run at `s->pc`, counts the wordcodes needed,
2811/// reserves space in `ecbuf` via `ecispace`, then re-walks `s->pc`
2812/// re-emitting each redir's wordcodes into the reserved slot —
2813/// finally calls `bld_eprog(0)` to package the result as an Eprog.
2814pub fn eccopyredirs(s: &mut crate::ported::zsh_h::estate) -> Option<crate::ported::zsh_h::eprog> {
2815 let prog_len = s.prog.prog.len();
2816 if s.pc >= prog_len {
2817 return None;
2818 }
2819 // c:3007-3009 — `if (wc_code(*pc) != WC_REDIR) return NULL;`
2820 let first_code = s.prog.prog[s.pc];
2821 if wc_code(first_code) != WC_REDIR {
2822 return None;
2823 }
2824 // c:3011 — `init_parse();`
2825 init_parse();
2826
2827 // c:3013-3027 — count wordcodes the redir run will need.
2828 // Each WC_REDIR contributes `code + fd1 + name` = 3, plus
2829 // `+2` if WC_REDIR_FROM_HEREDOC (terminator + munged), plus
2830 // `+1` if WC_REDIR_VARID.
2831 let mut probe = s.pc;
2832 let mut ncodes = 0usize;
2833 loop {
2834 if probe >= prog_len {
2835 break;
2836 }
2837 let code = s.prog.prog[probe];
2838 if wc_code(code) != WC_REDIR {
2839 break;
2840 }
2841 let mut ncode = if WC_REDIR_FROM_HEREDOC(code) != 0 {
2842 5
2843 } else {
2844 3
2845 };
2846 if WC_REDIR_VARID(code) != 0 {
2847 ncode += 1;
2848 }
2849 probe += ncode;
2850 ncodes += ncode;
2851 }
2852
2853 // c:3028-3029 — `r = ecused; ecispace(r, ncodes);`
2854 let r0 = ECUSED.get() as usize;
2855 ecispace(r0, ncodes);
2856
2857 // c:3031-3053 — re-walk `s->pc` and write into ecbuf[r..].
2858 let mut r = r0;
2859 loop {
2860 if s.pc >= prog_len {
2861 break;
2862 }
2863 let code = s.prog.prog[s.pc];
2864 if wc_code(code) != WC_REDIR {
2865 break;
2866 }
2867 s.pc += 1;
2868 // c:3036 — `ecbuf[r++] = code;`
2869 ECBUF.with_borrow_mut(|buf| {
2870 if r >= buf.len() {
2871 buf.resize(r + 1, 0);
2872 }
2873 buf[r] = code;
2874 });
2875 r += 1;
2876 // c:3038 — `ecbuf[r++] = *s->pc++;` (the fd1 word)
2877 let fd1 = s.prog.prog[s.pc];
2878 s.pc += 1;
2879 ECBUF.with_borrow_mut(|buf| {
2880 if r >= buf.len() {
2881 buf.resize(r + 1, 0);
2882 }
2883 buf[r] = fd1;
2884 });
2885 r += 1;
2886 // c:3041 — `ecbuf[r++] = ecstrcode(ecgetstr(s, EC_NODUP, NULL));`
2887 let name = ecgetstr(s, EC_NODUP, None);
2888 let nc = ecstrcode(&name);
2889 ECBUF.with_borrow_mut(|buf| {
2890 if r >= buf.len() {
2891 buf.resize(r + 1, 0);
2892 }
2893 buf[r] = nc;
2894 });
2895 r += 1;
2896 // c:3042-3047 — heredoc terminators.
2897 if WC_REDIR_FROM_HEREDOC(code) != 0 {
2898 let term = ecgetstr(s, EC_NODUP, None);
2899 let tc = ecstrcode(&term);
2900 ECBUF.with_borrow_mut(|buf| {
2901 if r >= buf.len() {
2902 buf.resize(r + 1, 0);
2903 }
2904 buf[r] = tc;
2905 });
2906 r += 1;
2907 let munged = ecgetstr(s, EC_NODUP, None);
2908 let mc = ecstrcode(&munged);
2909 ECBUF.with_borrow_mut(|buf| {
2910 if r >= buf.len() {
2911 buf.resize(r + 1, 0);
2912 }
2913 buf[r] = mc;
2914 });
2915 r += 1;
2916 }
2917 // c:3048-3049 — varid.
2918 if WC_REDIR_VARID(code) != 0 {
2919 let varid = ecgetstr(s, EC_NODUP, None);
2920 let vc = ecstrcode(&varid);
2921 ECBUF.with_borrow_mut(|buf| {
2922 if r >= buf.len() {
2923 buf.resize(r + 1, 0);
2924 }
2925 buf[r] = vc;
2926 });
2927 r += 1;
2928 }
2929 }
2930
2931 // c:3056 — `return bld_eprog(0);` — `bld_eprog` appends the
2932 // WC_END marker and packages ECBUF/ECSTRS into an Eprog.
2933 Some(bld_eprog(false))
2934}
2935
2936/// Port of `init_eprog(void)` from `Src/parse.c:3069`. Sets up
2937/// `dummy_eprog_code = WCB_END(); dummy_eprog.len = sizeof(wordcode);
2938/// dummy_eprog.prog = &dummy_eprog_code; dummy_eprog.strs = NULL;`.
2939/// Called once at shell startup (init_main → init_misc → init_eprog).
2940pub fn init_eprog() {
2941 let mut d = DUMMY_EPROG.lock().unwrap();
2942 d.prog = vec![crate::ported::zsh_h::WCB_END()]; // c:3071/3073
2943 d.len = std::mem::size_of::<wordcode>() as i32; // c:3072
2944 d.strs = None; // c:3074
2945 d.flags = 0;
2946 d.npats = 0;
2947 d.nref = 0;
2948}
2949
2950// =====================================================================
2951// `bin_zcompile` and wordcode-dump helpers — port of `Src/parse.c:3104+`.
2952//
2953// The wordcode dump format (`.zwc`) is a serialized parse tree zsh can
2954// `mmap()` and dispatch from without re-parsing on every shell start.
2955// File layout (one struct = `FD_PRELEN` `u32`s):
2956// - `pre[0]` = magic word (FD_MAGIC native byte-order, FD_OMAGIC
2957// opposite byte-order).
2958// - `pre[1]` = packed `{flags(8) | other_offset(24)}` byte field.
2959// - `pre[2..12]` = `ZSH_VERSION` C-string padded to 40 bytes.
2960// - `pre[12]` = `fdheaderlen` (total prelude+header word count).
2961// - Then a sequence of `struct fdhead` records, one per function,
2962// each followed by its NUL-terminated name (padded to 4-byte).
2963// - Then the wordcode bytes for every function back-to-back.
2964//
2965// On a little-endian host writing a dump twice: first `FD_MAGIC` for
2966// native readers, then re-walks the body byte-swapped and emits a
2967// second `FD_OMAGIC` copy so big-endian readers can mmap it too.
2968// =====================================================================
2969
2970// File-format constants — port of `Src/parse.c:3104-3150`.
2971
2972/// `#define FD_EXT ".zwc"` from `Src/parse.c:3104`.
2973pub const FD_EXT: &str = ".zwc";
2974
2975/// `#define FD_MINMAP 4096` from `Src/parse.c:3105`. mmap threshold
2976/// — `-M` mode only kicks in when the wordcode body is at least
2977/// this many bytes (otherwise read(2) is preferred).
2978pub const FD_MINMAP: usize = 4096;
2979
2980/// `#define FD_PRELEN 12` from `Src/parse.c:3107`. File-header
2981/// length in u32 words: magic + packed-flags-byte + 10 version words.
2982pub const FD_PRELEN: usize = 12;
2983
2984/// `#define FD_MAGIC 0x04050607` from `Src/parse.c:3108`. Sentinel
2985/// for native-byte-order dumps.
2986pub const FD_MAGIC: u32 = 0x04050607;
2987
2988/// `#define FD_OMAGIC 0x07060504` from `Src/parse.c:3109`. Sentinel
2989/// for opposite-byte-order dumps (byte-swapped FD_MAGIC).
2990pub const FD_OMAGIC: u32 = 0x07060504;
2991
2992/// `#define FDF_MAP 1` from `Src/parse.c:3111`. Bit set when the
2993/// dump should be `mmap()`-ed (`-M` flag) vs read normally (`-R`).
2994pub const FDF_MAP: u32 = 1;
2995
2996/// `#define FDF_OTHER 2` from `Src/parse.c:3112`. Bit indicating
2997/// this dump has an opposite-byte-order copy at `fdother(f)`.
2998pub const FDF_OTHER: u32 = 2;
2999
3000/// Port of `struct fdhead` from `Src/parse.c:3116`. One per function
3001/// inside a wordcode dump. All fields are `wordcode` (u32).
3002#[allow(non_camel_case_types)]
3003#[derive(Debug, Clone, Copy)]
3004pub struct fdhead {
3005 /// Offset (in u32 words) to the start of this function's
3006 /// wordcode body inside the dump.
3007 pub start: u32, // c:3117
3008 /// Wordcode-byte length of the body (excludes pattern-prog slots).
3009 pub len: u32, // c:3118
3010 /// Number of compiled patterns the body references.
3011 pub npats: u32, // c:3119
3012 /// Offset of the string table inside `prog->prog`.
3013 pub strs: u32, // c:3120
3014 /// Header-record length in u32 words (record + name).
3015 pub hlen: u32, // c:3121
3016 /// Packed `{ kshload_bits(2) | name_tail_offset(30) }` field.
3017 pub flags: u32, // c:3122
3018}
3019
3020/// `#define FDHF_KSHLOAD 1` from `Src/parse.c:3149`. Function-header
3021/// flag word — `-k` ksh-style autoload marker.
3022pub const FDHF_KSHLOAD: u32 = 1;
3023
3024/// `#define FDHF_ZSHLOAD 2` from `Src/parse.c:3150`. `-z` zsh-style
3025/// autoload marker.
3026pub const FDHF_ZSHLOAD: u32 = 2;
3027
3028/// Port of `struct wcfunc` from `Src/parse.c:3158`. Build-time
3029/// per-function aggregate before write_dump emits it. The Rust
3030/// port stores the source-text body inline since the C-side
3031/// `Eprog` ↔ `parse_string` chain isn't fully wired through this
3032/// layer yet (`build_dump` falls back to source-text caching).
3033#[allow(non_camel_case_types)]
3034#[derive(Debug, Clone)]
3035pub struct wcfunc {
3036 pub name: String, // c:3159
3037 pub flags: u32, // c:3161
3038 /// Compiled body wordcode (one `u32` array per fn). Empty until
3039 /// the eprog emit-side lands; `write_dump` then walks each entry.
3040 pub body: Vec<u32>,
3041}
3042
3043/// Port of `dump_find_func(Wordcode h, char *name)` from
3044/// `Src/parse.c:3167`. Walks the header table inside a loaded
3045/// dump for a function with the given basename; returns true on hit.
3046pub fn dump_find_func(h: &[u32], name: &str) -> bool {
3047 // c:3167
3048 let header_words = fdheaderlen(h) as usize;
3049 let end = header_words; // walking u32 offsets, end-exclusive
3050 let mut cur = firstfdhead_offset();
3051 while cur < end {
3052 if let Some(fh) = read_fdhead(h, cur) {
3053 let full = fdname(h, cur);
3054 let tail = fdhtail(&fh) as usize;
3055 let basename = if tail <= full.len() {
3056 &full[tail..]
3057 } else {
3058 ""
3059 };
3060 if basename == name {
3061 return true;
3062 }
3063 cur = nextfdhead_offset(h, cur);
3064 } else {
3065 break;
3066 }
3067 }
3068 false
3069}
3070
3071/// Port of `bin_zcompile(char *nam, char **args, Options ops, UNUSED(int func))`
3072/// from `Src/parse.c:3180`. Validates the option set, then dispatches
3073/// to one of: `-t` (test/list), `-c`/`-a` (dump current functions),
3074/// or the default (compile source files to `.zwc`).
3075pub fn bin_zcompile(
3076 nam: &str, // c:3180
3077 args: &[String],
3078 ops: &crate::ported::zsh_h::options,
3079 _func: i32,
3080) -> i32 {
3081 // c:3185-3192 — illegal-combination guard.
3082 if (OPT_ISSET(ops, b'k') && OPT_ISSET(ops, b'z'))
3083 || (OPT_ISSET(ops, b'R') && OPT_ISSET(ops, b'M'))
3084 || (OPT_ISSET(ops, b'c')
3085 && (OPT_ISSET(ops, b'U') || OPT_ISSET(ops, b'k') || OPT_ISSET(ops, b'z')))
3086 || (!(OPT_ISSET(ops, b'c') || OPT_ISSET(ops, b'a')) && OPT_ISSET(ops, b'm'))
3087 {
3088 zwarnnam(nam, "illegal combination of options"); // c:3192
3089 return 1;
3090 }
3091
3092 // c:3194 — `-c`/`-a` + KSHAUTOLOAD warning.
3093 if (OPT_ISSET(ops, b'c') || OPT_ISSET(ops, b'a')) && isset(crate::ported::zsh_h::KSHAUTOLOAD) {
3094 zwarnnam(nam, "functions will use zsh style autoloading"); // c:3195
3095 }
3096
3097 // c:3196-3197 — flag word from `-k` / `-z`.
3098 let flags: u32 = if OPT_ISSET(ops, b'k') {
3099 FDHF_KSHLOAD
3100 } else if OPT_ISSET(ops, b'z') {
3101 FDHF_ZSHLOAD
3102 } else {
3103 0
3104 };
3105
3106 // c:3199 — `-t` test/list mode.
3107 if OPT_ISSET(ops, b't') {
3108 // c:3199
3109 if args.is_empty() {
3110 zwarnnam(nam, "too few arguments"); // c:3202
3111 return 1;
3112 }
3113 let dump_name = if args[0].ends_with(FD_EXT) {
3114 args[0].clone()
3115 } else {
3116 format!("{}{}", args[0], FD_EXT)
3117 };
3118 let f = match load_dump_header(nam, &dump_name, 1) {
3119 // c:3206
3120 Some(buf) => buf,
3121 None => return 1,
3122 };
3123 // c:3209 — per-function check.
3124 if args.len() > 1 {
3125 for name in &args[1..] {
3126 // c:3210
3127 if !dump_find_func(&f, name) {
3128 // c:3212
3129 return 1;
3130 }
3131 }
3132 return 0;
3133 }
3134 // c:3215-3221 — listing arm. Walk every fdhead, print
3135 // each function's full name. C uses `fdname(h)` which
3136 // includes the path prefix; matches our `fdname()` impl.
3137 let mapped = if (fdflags(&f) & FDF_MAP) != 0 {
3138 "mapped"
3139 } else {
3140 "read"
3141 };
3142 println!("zwc file ({}) for zsh-{}", mapped, fdversion(&f));
3143 let header_words = fdheaderlen(&f) as usize;
3144 let mut cur = firstfdhead_offset();
3145 while cur < header_words {
3146 if read_fdhead(&f, cur).is_none() {
3147 break;
3148 }
3149 println!("{}", fdname(&f, cur));
3150 cur = nextfdhead_offset(&f, cur);
3151 }
3152 return 0;
3153 }
3154
3155 if args.is_empty() {
3156 zwarnnam(nam, "too few arguments"); // c:3226
3157 return 1;
3158 }
3159
3160 // c:3228 — map mode discriminant.
3161 let map: i32 = if OPT_ISSET(ops, b'M') {
3162 2
3163 } else if OPT_ISSET(ops, b'R') {
3164 0
3165 } else {
3166 1
3167 };
3168
3169 // c:3230-3236 — single-file default-mode short path.
3170 if args.len() == 1 && !(OPT_ISSET(ops, b'c') || OPT_ISSET(ops, b'a')) {
3171 let dump = format!("{}{}", args[0], FD_EXT);
3172 return build_dump(nam, &dump, args, OPT_ISSET(ops, b'U') as i32, map, flags);
3173 }
3174
3175 // c:3239-3247 — multi-file or `-c`/`-a` mode.
3176 let dump = if args[0].ends_with(FD_EXT) {
3177 args[0].clone()
3178 } else {
3179 format!("{}{}", args[0], FD_EXT)
3180 };
3181 let rest = &args[1..];
3182 if OPT_ISSET(ops, b'c') || OPT_ISSET(ops, b'a') {
3183 let what =
3184 (if OPT_ISSET(ops, b'c') { 1 } else { 0 }) | (if OPT_ISSET(ops, b'a') { 2 } else { 0 });
3185 build_cur_dump(nam, &dump, rest, OPT_ISSET(ops, b'm') as i32, map, what)
3186 } else {
3187 build_dump(nam, &dump, rest, OPT_ISSET(ops, b'U') as i32, map, flags)
3188 }
3189}
3190
3191/// Port of `load_dump_header(char *nam, char *name, int err)` from
3192/// `Src/parse.c:3258`. Opens the file, reads + validates the magic
3193/// and version, then slurps the full header table into memory.
3194/// Returns the header u32-array on success or None on any failure
3195/// (emitting C-shaped warnings when `err != 0`).
3196pub fn load_dump_header(nam: &str, name: &str, err: i32) -> Option<Vec<u32>> {
3197 // c:3258
3198
3199 let mut f = match File::open(name) {
3200 // c:3263
3201 Ok(h) => h,
3202 Err(_) => {
3203 if err != 0 {
3204 zwarnnam(nam, &format!("can't open zwc file: {}", name)); // c:3265
3205 }
3206 return None;
3207 }
3208 };
3209
3210 // Read FD_PRELEN+1 u32 words = 52 bytes.
3211 let mut buf_bytes = vec![0u8; (FD_PRELEN + 1) * 4];
3212 if f.read_exact(&mut buf_bytes).is_err() {
3213 if err != 0 {
3214 zwarnnam(nam, &format!("invalid zwc file: {}", name)); // c:3277
3215 }
3216 return None;
3217 }
3218 let mut buf: Vec<u32> = buf_bytes
3219 .chunks_exact(4)
3220 .map(|c| u32::from_le_bytes([c[0], c[1], c[2], c[3]]))
3221 .collect();
3222
3223 // c:3270 — magic + version check. `ZSH_VERSION` (the C-side
3224 // global) — zshrs reports "5.9" in `--zsh` mode (Src/init.c parity).
3225 let magic_ok = fdmagic(&buf) == FD_MAGIC || fdmagic(&buf) == FD_OMAGIC;
3226 let v_ok = fdversion(&buf) == "5.9";
3227 if !magic_ok {
3228 if err != 0 {
3229 zwarnnam(nam, &format!("invalid zwc file: {}", name)); // c:3277
3230 }
3231 return None;
3232 }
3233 if !v_ok {
3234 if err != 0 {
3235 zwarnnam(
3236 nam,
3237 &format!(
3238 "zwc file has wrong version (zsh-{}): {}", // c:3274
3239 fdversion(&buf),
3240 name
3241 ),
3242 );
3243 }
3244 return None;
3245 }
3246
3247 // c:3285 — if magic matches host byte order, head len is `pre[FD_PRELEN]`.
3248 // Else seek to `fdother(buf)` and re-read.
3249 if fdmagic(&buf) != FD_MAGIC {
3250 let other = fdother(&buf) as u64; // c:3290
3251 if f.seek(SeekFrom::Start(other)).is_err() || f.read_exact(&mut buf_bytes).is_err() {
3252 zwarnnam(nam, &format!("invalid zwc file: {}", name)); // c:3295
3253 return None;
3254 }
3255 buf = buf_bytes
3256 .chunks_exact(4)
3257 .map(|c| u32::from_le_bytes([c[0], c[1], c[2], c[3]]))
3258 .collect();
3259 }
3260
3261 let total_words = fdheaderlen(&buf) as usize; // c:3286/3299
3262 if total_words < FD_PRELEN + 1 {
3263 zwarnnam(nam, &format!("invalid zwc file: {}", name));
3264 return None;
3265 }
3266
3267 // Read the remaining header words.
3268 let mut head: Vec<u32> = Vec::with_capacity(total_words);
3269 head.extend_from_slice(&buf);
3270 let remaining_words = total_words - (FD_PRELEN + 1);
3271 if remaining_words > 0 {
3272 let mut rest_bytes = vec![0u8; remaining_words * 4]; // c:3305
3273 if f.read_exact(&mut rest_bytes).is_err() {
3274 zwarnnam(nam, &format!("invalid zwc file: {}", name)); // c:3307
3275 return None;
3276 }
3277 for c in rest_bytes.chunks_exact(4) {
3278 head.push(u32::from_le_bytes([c[0], c[1], c[2], c[3]]));
3279 }
3280 }
3281 Some(head) // c:3311
3282}
3283
3284/// Port of `fdswap(Wordcode p, int n)` from `Src/parse.c:3318`.
3285/// Byte-swap each u32 in `p[..n]` in place. Used when writing the
3286/// opposite-byte-order copy of a wordcode dump.
3287pub fn fdswap(p: &mut [u32]) {
3288 // c:3318
3289 for w in p.iter_mut() {
3290 *w = w.swap_bytes();
3291 }
3292}
3293
3294/// Port of `write_dump(int dfd, LinkList progs, int map, int hlen, int tlen)`
3295/// from `Src/parse.c:3334`. Writes the prelude + header records +
3296/// body wordcode bytes to the dump file descriptor.
3297///
3298/// Two passes: first native-byte-order (`FD_MAGIC`), then opposite-
3299/// byte-order (`FD_OMAGIC`) so big-endian readers can mmap the
3300/// same file. Bodies are byte-swapped via `fdswap` on the second pass.
3301pub fn write_dump(
3302 dfd: &mut std::fs::File, // c:3334
3303 progs: &[wcfunc],
3304 mut map: i32,
3305 hlen: i32,
3306 tlen: i32,
3307) -> std::io::Result<()> {
3308 if map == 1 && (tlen as usize) >= FD_MINMAP {
3309 // c:3344
3310 map = 1;
3311 } else if map == 1 {
3312 map = 0;
3313 }
3314
3315 let mut other = 0u32; // c:3338
3316 let ohlen = hlen;
3317 let mut cur_hlen = hlen;
3318
3319 loop {
3320 cur_hlen = ohlen;
3321 // c:3347 — build the prelude.
3322 let mut pre = vec![0u32; FD_PRELEN];
3323 pre[0] = if other != 0 { FD_OMAGIC } else { FD_MAGIC }; // c:3350
3324 let flags = (if map != 0 { FDF_MAP } else { 0 }) | other;
3325 fdsetflags(&mut pre, flags as u8); // c:3351
3326 fdsetother(&mut pre, tlen as u32); // c:3352
3327 // c:3353 — copy ZSH_VERSION C-string into pre[2..].
3328 let ver = b"5.9";
3329 for (i, &b) in ver.iter().enumerate() {
3330 let word = 2 + i / 4;
3331 let shift = (i % 4) * 8;
3332 pre[word] |= (b as u32) << shift;
3333 }
3334 // Write prelude.
3335 for w in &pre {
3336 dfd.write_all(&w.to_le_bytes())?;
3337 }
3338 // c:3356 — per-fn header records.
3339 for wcf in progs {
3340 let n = &wcf.name;
3341 let prog = &wcf.body;
3342 let mut head = fdhead {
3343 start: cur_hlen as u32, // c:3360
3344 len: (prog.len() * 4) as u32, // c:3363
3345 npats: 0, // c:3364 (npats not tracked yet)
3346 strs: 0, // c:3365
3347 hlen: ((FDHEAD_WORDS as u32) + ((n.len() as u32 + 4) / 4)), // c:3366
3348 flags: 0,
3349 };
3350 cur_hlen += prog.len() as i32; // c:3361
3351 // c:3368 — name tail offset from path basename.
3352 let tail = n.rfind('/').map(|p| p + 1).unwrap_or(0);
3353 head.flags = fdhbldflags(wcf.flags, tail as u32); // c:3372
3354 // c:3373 — opposite-byte-order swap on second pass.
3355 let mut head_words: Vec<u32> = vec![
3356 head.start, head.len, head.npats, head.strs, head.hlen, head.flags,
3357 ];
3358 if other != 0 {
3359 fdswap(&mut head_words);
3360 }
3361 for w in &head_words {
3362 dfd.write_all(&w.to_le_bytes())?;
3363 }
3364 // c:3376 — write the name + NUL + pad-to-4.
3365 dfd.write_all(n.as_bytes())?;
3366 dfd.write_all(&[0u8])?;
3367 let pad = (4 - ((n.len() + 1) & 3)) & 3;
3368 if pad > 0 {
3369 dfd.write_all(&vec![0u8; pad])?;
3370 }
3371 }
3372 // c:3381 — per-fn body words.
3373 for wcf in progs {
3374 let mut body = wcf.body.clone();
3375 if other != 0 {
3376 fdswap(&mut body);
3377 }
3378 for w in &body {
3379 dfd.write_all(&w.to_le_bytes())?;
3380 }
3381 }
3382 if other != 0 {
3383 // c:3389
3384 break;
3385 }
3386 other = FDF_OTHER; // c:3391
3387 }
3388 Ok(())
3389}
3390
3391/// Port of `build_dump(char *nam, char *dump, char **files, int ali, int map, int flags)`
3392/// from `Src/parse.c:3397`. Source-file → wordcode dump compiler.
3393///
3394/// Status: scaffolded but the wordcode-emit step depends on
3395/// `parse_string` returning a fully-wired `Eprog` with `prog/strs/
3396/// npats` fields populated. The current `parse_string`/`parse` shape
3397/// emits an AST (`ZshProgram`) but not yet the wordcode array C
3398/// expects in this dump format. Until that lands, this returns 1
3399/// with a clear "wordcode emit not yet ported" message so callers
3400/// (autoload from `.zwc`, `zcompile path/to/file`) fail loud.
3401pub fn build_dump(
3402 nam: &str, // c:3397
3403 dump: &str,
3404 _files: &[String],
3405 _ali: i32,
3406 _map: i32,
3407 _flags: u32,
3408) -> i32 {
3409 crate::ported::utils::zwarnnam(nam, &format!("{}: wordcode dump emit not yet ported", dump));
3410 1
3411}
3412
3413/// Port of `cur_add_func(char *nam, Shfunc shf, LinkList names, LinkList progs, int *hlen, int *tlen, int what)`
3414/// from `Src/parse.c:3489`. Adds a shfunc to the in-build dump
3415/// progs+names lists. Stub: `Eprog` for the function body isn't
3416/// yet wired through `shfunc.funcdef` to be serializable here.
3417pub fn cur_add_func(
3418 nam: &str, // c:3489
3419 shf_name: &str,
3420 shf_flags: i32,
3421 names: &mut Vec<String>,
3422 progs: &mut Vec<wcfunc>,
3423 hlen: &mut i32,
3424 tlen: &mut i32,
3425 what: i32,
3426) -> i32 {
3427 let is_undef = (shf_flags as u32 & PM_UNDEFINED) != 0;
3428 if is_undef {
3429 if (what & 2) == 0 {
3430 // c:3498
3431 zwarnnam(nam, &format!("function is not loaded: {}", shf_name));
3432 return 1;
3433 }
3434 // c:3503 — would call `getfpfunc` to load body for dump.
3435 zwarnnam(nam, &format!("can't load function: {}", shf_name));
3436 return 1;
3437 } else if (what & 1) == 0 {
3438 zwarnnam(nam, &format!("function is already loaded: {}", shf_name)); // c:3514
3439 return 1;
3440 }
3441 // c:3517 — would `dupeprog(shf->funcdef)`. Stub: empty body.
3442 let wcf = wcfunc {
3443 name: shf_name.to_string(),
3444 flags: FDHF_ZSHLOAD,
3445 body: Vec::new(),
3446 };
3447 progs.push(wcf);
3448 names.push(shf_name.to_string());
3449
3450 // c:3526 — bump hlen / tlen.
3451 let name_words = (shf_name.len() as i32 + 4) / 4;
3452 *hlen += (FDHEAD_WORDS as i32) + name_words;
3453 *tlen += 0; // body is empty in stub; real path adds prog->len in words.
3454
3455 0
3456}
3457
3458/// Port of `build_cur_dump(char *nam, char *dump, char **names, int match, int map, int what)`
3459/// from `Src/parse.c:3536`. Compiles currently-loaded functions
3460/// (`-c` for functions, `-a` for aliases) into a `.zwc` dump.
3461/// Same wordcode-emit dependency as `build_dump`.
3462pub fn build_cur_dump(
3463 nam: &str, // c:3536
3464 dump: &str,
3465 _names: &[String],
3466 _match_: i32,
3467 _map: i32,
3468 _what: i32,
3469) -> i32 {
3470 crate::ported::utils::zwarnnam(
3471 nam,
3472 &format!("{}: wordcode dump-current emit not yet ported", dump),
3473 );
3474 1
3475}
3476
3477/// Port of `zwcstat(char *filename, struct stat *buf)` from
3478/// `Src/parse.c:3656`. Stats a `.zwc` file, falling back to
3479/// `.zwc.old` if the primary doesn't exist (zsh uses the `.old`
3480/// suffix to keep a previous dump readable while a rewrite is in
3481/// progress).
3482pub fn zwcstat(filename: &str) -> Option<std::fs::Metadata> {
3483 // c:3656
3484 if let Ok(m) = std::fs::metadata(filename) {
3485 return Some(m);
3486 }
3487 let old = format!("{}.old", filename);
3488 std::fs::metadata(&old).ok()
3489}
3490
3491/// Port of `load_dump_file(char *dump, struct stat *sbuf, int other, int len)`
3492/// from `Src/parse.c:3675`. Reads (or mmap()'s) a complete `.zwc`
3493/// file into memory. Returns the u32 buffer or None on I/O error.
3494pub fn load_dump_file(
3495 dump: &str, // c:3675
3496 _sbuf: &std::fs::Metadata,
3497 other: i32,
3498 _len: usize,
3499) -> Option<Vec<u32>> {
3500 let mut f = File::open(dump).ok()?;
3501 if other != 0 {
3502 f.seek(SeekFrom::Start(other as u64)).ok()?;
3503 }
3504 let mut bytes = Vec::new();
3505 f.read_to_end(&mut bytes).ok()?;
3506 Some(
3507 bytes
3508 .chunks_exact(4)
3509 .map(|c| u32::from_le_bytes([c[0], c[1], c[2], c[3]]))
3510 .collect(),
3511 )
3512}
3513
3514/// Port of `try_dump_file(char *path, char *name, char *file, int *ksh, int test_only)`
3515/// from `Src/parse.c:3746`. Tries to load a function from a `.zwc`
3516/// in the given fpath directory. Returns `(found, ksh_load)` —
3517/// stub: returns false until the dump-cache port (`FuncDump`) lands.
3518pub fn try_dump_file(
3519 _path: &str,
3520 _name: &str,
3521 _file: &str, // c:3746
3522 _test_only: bool,
3523) -> Option<(bool, bool)> {
3524 None
3525}
3526
3527/// Port of `try_source_file(char *file)` from `Src/parse.c:3795`.
3528/// Tries `source <file>` then falls back to `source <file>.zwc`.
3529/// Returns the resolved path on hit. Stub: returns None until the
3530/// dump-cache port lands.
3531pub fn try_source_file(_file: &str) -> Option<String> {
3532 // c:3795
3533 None
3534}
3535
3536/// Port of `Eprog check_dump_file(char *file, struct stat *sbuf,
3537/// char *name, int *ksh, int test_only)` from `Src/parse.c:3833`.
3538/// Walks the `dumps` mmap list looking for `(dev, ino)` matching
3539/// `sbuf`; on miss, calls `load_dump_header` to read the .zwc
3540/// header. Then `dump_find_func(d, name)` locates the function
3541/// table entry. Returns the wordcode slice + ksh-load flag.
3542///
3543/// ```c
3544/// Eprog
3545/// check_dump_file(char *file, struct stat *sbuf, char *name,
3546/// int *ksh, int test_only)
3547/// {
3548/// int isrec = 0;
3549/// Wordcode d;
3550/// FDHead h;
3551/// FuncDump f;
3552/// struct stat lsbuf;
3553/// if (!sbuf) {
3554/// if (zwcstat(file, &lsbuf)) return NULL;
3555/// sbuf = &lsbuf;
3556/// }
3557/// rec:
3558/// d = NULL;
3559/// for (f = dumps; f; f = f->next)
3560/// if (f->dev == sbuf->st_dev && f->ino == sbuf->st_ino)
3561/// { d = f->map; break; }
3562/// if (!f && (isrec || !(d = load_dump_header(NULL, file, 0))))
3563/// return NULL;
3564/// if ((h = dump_find_func(d, name))) {
3565/// if (test_only) return &dummy_eprog;
3566/// /* allocate Eprog from f->map at h offset, incrdumpcount,
3567/// return prog */
3568/// }
3569/// return NULL;
3570/// }
3571/// ```
3572/// Rust port returns `Option<(Vec<u32>, bool)>` instead of the C
3573/// `Eprog` pointer + `*ksh` out-param: tuple element 0 is the
3574/// wordcode slice, element 1 is true if the function was a ksh-
3575/// loaded entry.
3576pub fn check_dump_file( // c:3833
3577 file: &str,
3578 sbuf: &std::fs::Metadata,
3579 name: &str,
3580 test_only: bool,
3581) -> Option<(Vec<u32>, bool)> {
3582 use std::os::unix::fs::MetadataExt;
3583
3584 // c:3842-3846 — `if (!sbuf) { zwcstat(file, &lsbuf); sbuf = &lsbuf; }`
3585 // Rust takes sbuf by &Metadata — never null.
3586 let dev = sbuf.dev(); // c:3859
3587 let ino = sbuf.ino(); // c:3859
3588
3589 // c:3854 — `d = NULL;`
3590 let mut d: Option<Vec<u32>> = None;
3591 let mut found_mmap = false; // c:3858 `for (f = dumps; f; ...)`
3592
3593 // c:3858-3862 — walk DUMPS for matching dev/ino.
3594 {
3595 let dumps_guard = DUMPS.lock().expect("dumps poisoned");
3596 for f in dumps_guard.iter() { // c:3858
3597 if f.dev == dev && f.ino == ino { // c:3859
3598 d = Some(f.map.clone()); // c:3860
3599 found_mmap = true;
3600 break; // c:3861
3601 }
3602 }
3603 }
3604
3605 // c:3870-3871 — `if (!f && (isrec || !(d = load_dump_header(NULL, file, 0)))) return NULL;`
3606 if !found_mmap { // c:3870
3607 match load_dump_header("", file, 0) { // c:3870 load_dump_header
3608 Some(loaded) => d = Some(loaded),
3609 None => return None, // c:3871
3610 }
3611 }
3612
3613 // c:3873 — `if ((h = dump_find_func(d, name)))`
3614 let dump = d?;
3615 if !dump_find_func(&dump, name) { // c:3873
3616 return None;
3617 }
3618
3619 // c:3876-3879 — `if (test_only) return &dummy_eprog;`
3620 if test_only { // c:3876
3621 return Some((Vec::new(), false)); // c:3879 dummy
3622 }
3623
3624 // c:3884-3953 — allocate Eprog from the mmap area + ksh detection.
3625 // The C source builds an `Eprog` struct wrapping the wordcode
3626 // slice at h's offset; the Rust port returns the slice directly
3627 // since Eprog construction lives at the call site (load_dump_file).
3628 // ksh-load detection reads the FDHF_KSHLOAD flag on the FDHead.
3629 // !!! STUB: FDHead parsing not yet wired through dump_find_func.
3630 let is_ksh_load = false; // c:3905 fdhflags(h) & FDHF_KSHLOAD
3631
3632 // c:3950 — incrdumpcount(f). The Rust incrdumpcount takes a
3633 // funcdump ref; look up the matching entry by dev/ino again.
3634 if found_mmap {
3635 let dumps_guard = DUMPS.lock().expect("dumps poisoned");
3636 if let Some(f) = dumps_guard.iter().find(|f| f.dev == dev && f.ino == ino) {
3637 incrdumpcount(f); // c:3899
3638 }
3639 }
3640
3641 Some((dump, is_ksh_load)) // c:3953
3642}
3643
3644/// Port of `incrdumpcount(FuncDump f)` from `Src/parse.c:3970/4021`.
3645/// `f->count++;` — refcount-up a loaded dump entry. The Rust port
3646/// keys lookup by `filename` because Rust can't raw-pointer-compare
3647/// funcdump values inside a `Mutex<Vec<...>>`; same observable
3648/// effect (the count of the matching entry increments).
3649pub fn incrdumpcount(f: &crate::ported::zsh_h::funcdump) {
3650 // c:3970 — `f->count++;`
3651 if let Some(d) = DUMPS.lock().unwrap().iter_mut().find(|d| d.filename.as_deref() == f.filename.as_deref()) {
3652 d.count += 1; // c:3973
3653 }
3654}
3655
3656/// Port of `freedump(FuncDump f)` from `Src/parse.c:3976`. Public
3657/// helper for the rare external caller; locks the dumps mutex and
3658/// drops the entry with the given filename.
3659pub fn freedump(f: &crate::ported::zsh_h::funcdump) {
3660 // c:3976
3661 let mut g = DUMPS.lock().unwrap();
3662 if let Some(name) = f.filename.as_deref() {
3663 freedump_locked(&mut g, name);
3664 }
3665}
3666
3667/// Port of `decrdumpcount(FuncDump f)` from `Src/parse.c:3988/4026`.
3668/// `f->count--; if (!f->count) { unlink from dumps; freedump(f); }`.
3669pub fn decrdumpcount(f: &crate::ported::zsh_h::funcdump) {
3670 // c:3988
3671 let key = f.filename.clone();
3672 let mut g = DUMPS.lock().unwrap();
3673 let mut hit_zero: Option<String> = None;
3674 for d in g.iter_mut() {
3675 if d.filename == key {
3676 d.count -= 1; // c:3991
3677 if d.count == 0 {
3678 // c:3992
3679 hit_zero = d.filename.clone();
3680 }
3681 break;
3682 }
3683 }
3684 if let Some(name) = hit_zero {
3685 // c:3994-4001
3686 freedump_locked(&mut g, &name);
3687 }
3688}
3689
3690/// Port of `closedumps(void)` from `Src/parse.c:4008/4033`. Walks
3691/// `dumps` freeing every entry. Called on shell exit (exec.c:522).
3692pub fn closedumps() {
3693 // c:4008
3694 let mut g = DUMPS.lock().unwrap();
3695 g.clear(); // c:4011-4014 `while (dumps) { ... freedump(...); ... }`
3696}
3697
3698/// Port of `dump_autoload(char *nam, char *file, int on, Options ops, int func)`
3699/// from `Src/parse.c:4042`. Registers every function in a `.zwc`
3700/// for autoload via `shfunctab`. Stub: returns 1 (error) until the
3701/// dump-cache port lands.
3702pub fn dump_autoload(
3703 nam: &str,
3704 file: &str, // c:4042
3705 _on: i32,
3706 _ops: &crate::ported::zsh_h::options,
3707 _func: i32,
3708) -> i32 {
3709 zwarnnam(nam, &format!("{}: zwc-based autoload not yet ported", file));
3710 1
3711}
3712
3713/// Port of C `struct eccstr` (zsh.h:836) — the long-string dedup BST
3714/// node. The dedup-walk and cmp logic in `ecstrcode` is faithful to
3715/// parse.c:447-453 including the conditional cmp chain
3716/// (nfunc → hashval → strcmp), so corpus inputs where C's eccstr BST walk
3717/// finds-or-misses match get the same outcome on the Rust side.
3718struct EccstrNode {
3719 left: Option<Box<EccstrNode>>,
3720 right: Option<Box<EccstrNode>>,
3721 /// C-byte form of the string (single byte per char ≤ 0xff).
3722 /// Owned because Rust doesn't have C zsh's "stable pointers into
3723 /// the lexer's tokstr arena" — every tokstr lives as a fresh
3724 /// Rust String allocation.
3725 str: Vec<u8>,
3726 /// Wordcode-encoded offset: `(byte_offset << 2) | token_bit`.
3727 /// Same shape as `Eccstr::offs` (parse.c:459).
3728 offs: u32,
3729 /// Absolute byte offset in the final strs region (= `ecsoffs` at
3730 /// insert time). C `Eccstr::aoffs` (parse.c:464). copy_ecstr uses
3731 /// THIS for the write position — distinct from `offs` which is
3732 /// ecssub-relative and collides across funcdef scopes.
3733 aoffs: u32,
3734 /// `nfunc` snapshot at insert time. Per-function namespace key
3735 /// — top-level scripts use 0; each funcdef bumps it.
3736 nfunc: i32,
3737 /// Hash of `str` computed via zsh's `hasher` (hashtable.c:86).
3738 hashval: u32,
3739}
3740// === end AST relocation ===
3741
3742// Parser state lives in file-scope thread_locals:
3743// - LEX_* (lexer side, matching Src/lex.c file-statics)
3744// - ECBUF / ECLEN / ECUSED / ECNPATS / ECSOFFS / ECSSUB / ECNFUNC /
3745// ECSTRS_INDEX / ECSTRS_REVERSE (wordcode-emission state, matching
3746// Src/parse.c file-statics)
3747//
3748// Callers use the free-fn entry points directly:
3749// crate::ported::parse::parse_init(input);
3750// let prog = crate::ported::parse::parse();
3751
3752const MAX_RECURSION_DEPTH: usize = 500;
3753
3754/// Direct port of `struct parse_stack` at `Src/zsh.h:3099-3109`.
3755/// Used by `parse_context_save` / `parse_context_restore`
3756/// (parse.c:295-355) to snapshot per-parse-call state so a nested
3757/// parse (e.g. inside command substitution) doesn't clobber the
3758/// outer parse.
3759///
3760/// A second port of `struct parse_stack` exists at
3761/// `crate::ported::zsh_h::parse_stack` (zsh.h:1066) using canonical
3762/// Wordcode / Eccstr / `struct heredocs` types — that port is unused
3763/// today and will become authoritative when Phase 9b (PORT_PLAN.md)
3764/// wires wordcode emission. This local version uses the working-set
3765/// shapes (`Vec<HereDoc>`, stubbed wordcode fields) suited to zshrs's
3766/// pre-wordcode AST architecture; the consolidation happens in P9b.
3767#[allow(non_camel_case_types)]
3768#[derive(Debug, Default, Clone)]
3769pub struct parse_stack {
3770 // ── Direct port of struct parse_stack at zsh.h:3099-3109 ──
3771 /// Pending heredocs awaiting body collection (canonical C
3772 /// linked-list shape). C: `struct heredocs *hdocs` (zsh.h:3100).
3773 /// Mirrors `parse::HDOCS` thread_local across nested parses.
3774 pub hdocs: Option<Box<crate::ported::zsh_h::heredocs>>,
3775 /// !!! WARNING: NOT IN PARSE_STACK — Rust-only AST-glue !!!
3776 /// Snapshot of `lex::LEX_HEREDOCS` (the parallel Rust-only Vec
3777 /// carrying terminator / strip_tabs / quoted metadata).
3778 /// Saved/restored alongside the canonical `hdocs` so nested
3779 /// parses get a clean AST view. C's parse_stack has no analog
3780 /// because C tracks terminator metadata implicitly via tokstr.
3781 pub lex_heredocs: Vec<HereDoc>,
3782 /// C: `int incmdpos` (zsh.h:3102).
3783 pub incmdpos: bool,
3784 /// C: `int aliasspaceflag` (zsh.h:3103).
3785 pub aliasspaceflag: i32,
3786 /// C: `int incond` (zsh.h:3104).
3787 pub incond: i32,
3788 /// C: `int inredir` (zsh.h:3105).
3789 pub inredir: bool,
3790 /// C: `int incasepat` (zsh.h:3106).
3791 pub incasepat: i32,
3792 /// C: `int isnewlin` (zsh.h:3107).
3793 pub isnewlin: i32,
3794 /// C: `int infor` (zsh.h:3108).
3795 pub infor: i32,
3796 /// C: `int inrepeat_` (zsh.h:3109).
3797 pub inrepeat_: i32,
3798 /// C: `int intypeset` (zsh.h:3110).
3799 pub intypeset: bool,
3800 // ── Wordcode-buffer state — STUB until Phase 9b ──
3801 // C `Wordcode ecbuf` (zsh.h:3112) + `Eccstr ecstrs` (zsh.h:3113) +
3802 // `int eclen/ecused/ecnpats/ecsoffs/ecssub/ecnfunc` (zsh.h:3112-3114).
3803 // zshrs hasn't emitted wordcode yet — these fields exist to
3804 // preserve the C shape but read/write nothing until P9b lands.
3805 pub eclen: i32,
3806 pub ecused: i32,
3807 pub ecnpats: i32,
3808 pub ecbuf: Option<Vec<u32>>,
3809 pub ecstrs: Option<Vec<u8>>,
3810 pub ecsoffs: i32,
3811 pub ecssub: i32,
3812 pub ecnfunc: i32,
3813}
3814
3815// Old uppercase Rust-only `ParseStack` is gone. Compat alias so
3816// existing call sites (context.rs) keep resolving until the
3817// rename ripples through.
3818#[allow(non_camel_case_types)]
3819pub type ParseStack = parse_stack;
3820
3821/// `mod_export struct eprog dummy_eprog;` from `Src/parse.c:3066`.
3822/// Placeholder Eprog used by `shf->funcdef = &dummy_eprog;` in
3823/// builtin.c when clearing a stale autoload stub. Held in a Mutex
3824/// so `init_eprog` can set it once at shell startup.
3825pub static DUMMY_EPROG: std::sync::Mutex<crate::ported::zsh_h::eprog> =
3826 std::sync::Mutex::new(crate::ported::zsh_h::eprog {
3827 flags: 0,
3828 len: 0,
3829 npats: 0,
3830 nref: 0,
3831 prog: Vec::new(),
3832 strs: None,
3833 pats: Vec::new(),
3834 shf: None,
3835 dump: None,
3836 });
3837
3838/// Walk every ZshRedir in the program and, for any with a `heredoc_idx`,
3839/// pull the body+terminator out of `bodies` and stuff into `heredoc`.
3840/// `bodies[i]` corresponds to the i-th heredoc registered by the lexer
3841/// during scanning (in source order).
3842fn fill_heredoc_bodies(prog: &mut ZshProgram, bodies: &[HereDocInfo]) {
3843 for list in &mut prog.lists {
3844 fill_in_sublist(&mut list.sublist, bodies);
3845 }
3846}
3847
3848fn fill_in_sublist(sub: &mut ZshSublist, bodies: &[HereDocInfo]) {
3849 fill_in_pipe(&mut sub.pipe, bodies);
3850 if let Some(next) = &mut sub.next {
3851 fill_in_sublist(&mut next.1, bodies);
3852 }
3853}
3854
3855fn fill_in_pipe(pipe: &mut ZshPipe, bodies: &[HereDocInfo]) {
3856 fill_in_command(&mut pipe.cmd, bodies);
3857 if let Some(next) = &mut pipe.next {
3858 fill_in_pipe(next, bodies);
3859 }
3860}
3861
3862fn fill_in_command(cmd: &mut ZshCommand, bodies: &[HereDocInfo]) {
3863 match cmd {
3864 ZshCommand::Simple(s) => {
3865 for r in &mut s.redirs {
3866 if let Some(idx) = r.heredoc_idx {
3867 if let Some(info) = bodies.get(idx) {
3868 r.heredoc = Some(info.clone());
3869 }
3870 }
3871 }
3872 }
3873 ZshCommand::Subsh(p) | ZshCommand::Cursh(p) => fill_heredoc_bodies(p, bodies),
3874 ZshCommand::FuncDef(f) => fill_heredoc_bodies(&mut f.body, bodies),
3875 ZshCommand::If(i) => {
3876 fill_heredoc_bodies(&mut i.cond, bodies);
3877 fill_heredoc_bodies(&mut i.then, bodies);
3878 for (c, b) in &mut i.elif {
3879 fill_heredoc_bodies(c, bodies);
3880 fill_heredoc_bodies(b, bodies);
3881 }
3882 if let Some(e) = &mut i.else_ {
3883 fill_heredoc_bodies(e, bodies);
3884 }
3885 }
3886 ZshCommand::While(w) | ZshCommand::Until(w) => {
3887 fill_heredoc_bodies(&mut w.cond, bodies);
3888 fill_heredoc_bodies(&mut w.body, bodies);
3889 }
3890 ZshCommand::For(f) => fill_heredoc_bodies(&mut f.body, bodies),
3891 ZshCommand::Case(c) => {
3892 for arm in &mut c.arms {
3893 fill_heredoc_bodies(&mut arm.body, bodies);
3894 }
3895 }
3896 ZshCommand::Repeat(r) => fill_heredoc_bodies(&mut r.body, bodies),
3897 ZshCommand::Time(Some(sublist)) => fill_in_sublist(sublist, bodies),
3898 ZshCommand::Try(t) => {
3899 fill_heredoc_bodies(&mut t.try_block, bodies);
3900 fill_heredoc_bodies(&mut t.always, bodies);
3901 }
3902 ZshCommand::Redirected(inner, redirs) => {
3903 for r in redirs {
3904 if let Some(idx) = r.heredoc_idx {
3905 if let Some(info) = bodies.get(idx) {
3906 r.heredoc = Some(info.clone());
3907 }
3908 }
3909 }
3910 fill_in_command(inner, bodies);
3911 }
3912 ZshCommand::Time(None) | ZshCommand::Cond(_) | ZshCommand::Arith(_) => {}
3913 }
3914}
3915
3916/// If `list` is a Simple containing one word that ends in the
3917/// `<Inpar><Outpar>` token pair (the lexer-port encoding of `()`),
3918/// return the bare name. Used by `parse_program_until` to detect
3919/// `name() {body}` style function definitions where the lexer
3920/// hasn't split the `()` from the name.
3921/// Detect the `name() …` shape inside a Simple. Returns the function
3922/// name and (when the body was already inlined into the same Simple,
3923/// e.g. `foo() echo hi`) the rest of the words as the body's argv.
3924/// Returns None for non-funcdef shapes.
3925fn simple_name_with_inoutpar(list: &ZshList) -> Option<(Vec<String>, Vec<String>)> {
3926 if list.flags.async_ || list.sublist.next.is_some() {
3927 return None;
3928 }
3929 let pipe = &list.sublist.pipe;
3930 if pipe.next.is_some() {
3931 return None;
3932 }
3933 let simple = match &pipe.cmd {
3934 ZshCommand::Simple(s) => s,
3935 _ => return None,
3936 };
3937 if simple.words.is_empty() || !simple.assigns.is_empty() {
3938 return None;
3939 }
3940 let suffix = "\u{88}\u{8a}"; // Inpar + Outpar
3941 // Find the FIRST word ending in `()`. zsh accepts the
3942 // multi-name shorthand `fna fnb fnc() { body }` (parse.c:
3943 // par_funcdef wordlist) — words[0..i-1] are extra names,
3944 // words[i] is `lastname()`. Words after are the body argv
3945 // (one-line shorthand, `name() cmd args`).
3946 let par_idx = simple.words.iter().position(|w| w.ends_with(suffix))?;
3947 let mut names: Vec<String> = Vec::with_capacity(par_idx + 1);
3948 for w in &simple.words[..par_idx] {
3949 // Earlier names must be bare identifiers, NOT contain
3950 // tokens that imply they're not function names (no `()`,
3951 // no quotes, no expansions). zsh's lexer enforces this
3952 // at the wordlist level; we approximate by requiring the
3953 // word be an identifier-shaped token after untokenize.
3954 let bare = super::lex::untokenize(w);
3955 let valid = !bare.is_empty()
3956 && bare
3957 .chars()
3958 .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-' || c == '.' || c == '$');
3959 if !valid {
3960 return None;
3961 }
3962 names.push(bare);
3963 }
3964 let last = &simple.words[par_idx];
3965 let bare = &last[..last.len() - suffix.len()];
3966 if bare.is_empty() {
3967 return None;
3968 }
3969 names.push(super::lex::untokenize(bare));
3970 let rest = simple.words[par_idx + 1..].to_vec();
3971 Some((names, rest))
3972}
3973
3974/// Initialize parser state for a fresh parse of `input`.
3975/// Free-fn entry point — resets parser thread_locals and loads input.
3976pub fn parse_init(input: &str) {
3977 // Seed the option defaults the parser/lexer inspect. Real zsh
3978 // installs these via `install_emulation_defaults` (options.c:172)
3979 // at shell startup; zshrs's parse-only test entry path bypasses
3980 // init_main, so we mirror the `zsh` emulation defaults here.
3981 // Only seeds when unset so a script that explicitly disabled an
3982 // option stays so.
3983 for (name, default) in [
3984 ("shortloops", true),
3985 ("shortrepeat", false),
3986 ("multifuncdef", true),
3987 ("aliasfuncdef", false),
3988 ("ignorebraces", false),
3989 ("cshjunkieloops", false),
3990 ("posixbuiltins", false),
3991 ("execopt", true),
3992 ("kshautoload", false),
3993 ("aliases", true),
3994 ] {
3995 if crate::ported::options::opt_state_get(name).is_none() {
3996 crate::ported::options::opt_state_set(name, default);
3997 }
3998 }
3999 lex_init(input);
4000}
4001
4002/// P9b decoder (wordcode-pipeline variant): direct port of
4003/// `ecgetstr(Estate s, int dup, int *tokflag)` from
4004/// `Src/parse.c:2855-2890`. Reads a wordcode at `pc`, decodes the
4005/// encoded string back to owned String. Returns (string,
4006/// pc_after_consumed). Distinct from the existing `ecgetstr` (which
4007/// takes a separate strs buffer for text.rs) — this variant uses
4008/// the live ECSTRS_REVERSE HashMap populated at ecstrcode time.
4009pub fn ecgetstr_wordcode(buf: &[u32], pc: usize) -> (String, usize) {
4010 if pc >= buf.len() {
4011 return (String::new(), pc);
4012 }
4013 let c = buf[pc];
4014 let next = pc + 1;
4015 // parse.c:2862-2863 — empty-string sentinels.
4016 if c == 6 || c == 7 {
4017 return (String::new(), next);
4018 }
4019 // parse.c:2864-2871 — inline-packed short string.
4020 if (c & 2) != 0 {
4021 let b0 = ((c >> 3) & 0xff) as u8;
4022 let b1 = ((c >> 11) & 0xff) as u8;
4023 let b2 = ((c >> 19) & 0xff) as u8;
4024 let mut bytes: Vec<u8> = Vec::new();
4025 for b in [b0, b1, b2] {
4026 if b == 0 {
4027 break;
4028 }
4029 bytes.push(b);
4030 }
4031 return (String::from_utf8_lossy(&bytes).into_owned(), next);
4032 }
4033 // parse.c:2872-2873 — long string via offs lookup. Map value is
4034 // metafied Vec<u8>; convert back to display String. Unmetafy is
4035 // the caller's job (the wordcode-parity dumper does it; other
4036 // callers may want raw bytes).
4037 let s = ECSTRS_REVERSE
4038 .with_borrow(|m| m.get(&c).cloned())
4039 .map(|v| String::from_utf8_lossy(&v).into_owned())
4040 .unwrap_or_default();
4041 (s, next)
4042}
4043
4044/// Parse the complete input. Direct port of `parse_event` /
4045/// `par_list` from `Src/parse.c:614-720`. On syntax error,
4046/// sets `errflag |= ERRFLAG_ERROR` (via `zerr`) and returns the
4047/// partial program — callers check `errflag` to detect failure,
4048/// matching C's `Eprog parse_event(...)` + `if (errflag) {...}`.
4049pub fn parse() -> ZshProgram {
4050 zshlex();
4051
4052 let mut program = parse_program_until(None);
4053
4054 // Post-pass: wire heredoc bodies (collected by the inline NEWLIN
4055 // walk in zshlex into LEX_HEREDOCS) back into ZshRedir.heredoc
4056 // fields via heredoc_idx. No C analog — LEX_HEREDOCS is the
4057 // Rust-only AST-glue Vec.
4058 let bodies: Vec<HereDocInfo> = crate::ported::lex::LEX_HEREDOCS
4059 .with_borrow(|v| v.clone())
4060 .into_iter()
4061 .map(|h| HereDocInfo {
4062 content: h.content,
4063 terminator: h.terminator,
4064 quoted: h.quoted,
4065 })
4066 .collect();
4067 if !bodies.is_empty() {
4068 fill_heredoc_bodies(&mut program, &bodies);
4069 }
4070
4071 program
4072}
4073
4074/// Wordcode-emission top-level driver. Closest C analog is
4075/// `parse_list(void)` at `Src/parse.c:697-712`: init_parse +
4076/// zshlex + par_list(&c) + bld_eprog. This entry omits init_parse
4077/// and bld_eprog (caller responsibilities) and inlines a guard
4078/// loop around par_list_wordcode for cases where the lexer leaves
4079/// a non-ENDINPUT terminator (LEXERR, missing close-token, etc.).
4080pub fn par_event_wordcode() -> usize {
4081 let start = ECUSED.get() as usize;
4082 // C `parse_list` (parse.c:697-712) calls par_list ONCE — par_list's
4083 // own goto-rec loop handles all SEPER-separated sublists. The
4084 // outer loop here exists for safety against early-return cases
4085 // (LEXERR, missing terminator) but normally par_list_wordcode
4086 // consumes everything in one call.
4087 let mut cmplx: i32 = 0;
4088 while tok() != ENDINPUT && tok() != LEXERR {
4089 par_list_wordcode(&mut cmplx);
4090 match tok() {
4091 SEMI | NEWLIN | AMPER | AMPERBANG | SEPER => {
4092 zshlex();
4093 }
4094 _ => break,
4095 }
4096 }
4097 // parse.c:712 — `ecadd(WCB_END());`
4098 ecadd(crate::ported::zsh_h::WCB_END());
4099 start
4100}
4101
4102/// Port of `par_list(int *cmplx)` from `Src/parse.c:769-803`.
4103/// `list : { SEPER } [ sublist [ { SEPER | AMPER | AMPERBANG } list ] ]`.
4104/// True line-by-line port: takes `cmplx: &mut i32` matching C's
4105/// `int *cmplx` out-parameter, uses stack-local `c` per iteration
4106/// like C (so inner sublist cmplx is independent of outer).
4107pub fn par_list_wordcode(cmplx: &mut i32) {
4108 // c:773 — `int p, lp = -1, c;`
4109 let mut p: usize;
4110 let mut lp: i32 = -1;
4111 let mut c: i32;
4112 loop {
4113 // c:775 `rec:` — c:777-778 `while (tok == SEPER) zshlex();`
4114 while tok() == SEPER {
4115 zshlex();
4116 }
4117 // c:780 — `p = ecadd(0);`
4118 p = ecadd(0);
4119 // c:781 — `c = 0;`
4120 c = 0;
4121 // c:783 — `if (par_sublist(&c)) { ... }`
4122 if par_sublist_wordcode(&mut c) {
4123 // c:784 — `*cmplx |= c;`
4124 *cmplx |= c;
4125 // c:785 — `if (tok == SEPER || tok == AMPER || tok == AMPERBANG)`
4126 let t = tok();
4127 if t == SEPER || t == AMPER || t == AMPERBANG {
4128 // c:786-787 — `if (tok != SEPER) *cmplx = 1;`
4129 if t != SEPER {
4130 *cmplx = 1;
4131 }
4132 // c:788-790 — `set_list_code(p, ..., c);`
4133 let z = if t == SEPER {
4134 Z_SYNC
4135 } else if t == AMPER {
4136 Z_ASYNC
4137 } else {
4138 Z_ASYNC | Z_DISOWN
4139 };
4140 set_list_code(p, z, c != 0);
4141 // c:791 — `incmdpos = 1;`
4142 set_incmdpos(true);
4143 // c:792-794 — `do { zshlex(); } while (tok == SEPER);`
4144 loop {
4145 zshlex();
4146 if tok() != SEPER {
4147 break;
4148 }
4149 }
4150 // c:795 — `lp = p;` c:796 — `goto rec;`
4151 lp = p as i32;
4152 continue;
4153 } else {
4154 // c:798 — `set_list_code(p, (Z_SYNC | Z_END), c);`
4155 set_list_code(p, Z_SYNC | Z_END, c != 0);
4156 }
4157 } else {
4158 // c:800-802 — `ecused--; if (lp >= 0) ecbuf[lp] |= wc_bdata(Z_END);`
4159 ECUSED.set((ECUSED.get() - 1).max(0));
4160 if lp >= 0 {
4161 ECBUF.with_borrow_mut(|b| {
4162 if (lp as usize) < b.len() {
4163 b[lp as usize] |= wc_bdata(Z_END as wordcode);
4164 }
4165 });
4166 }
4167 }
4168 break;
4169 }
4170}
4171
4172/// Port of `par_list1(int *cmplx)` from `Src/parse.c:806-817`.
4173/// Single-sublist variant used by funcdef bodies and the short
4174/// `for`/`while`/`repeat` forms — exactly one sublist with
4175/// `Z_SYNC|Z_END`, no chain.
4176pub fn par_list1_wordcode(cmplx: &mut i32) {
4177 // c:810 — `int p = ecadd(0), c = 0;`
4178 let p = ecadd(0);
4179 let mut c: i32 = 0;
4180 // c:812 — `if (par_sublist(&c)) { ... }`
4181 if par_sublist_wordcode(&mut c) {
4182 // c:813 — `set_list_code(p, (Z_SYNC | Z_END), c);`
4183 set_list_code(p, Z_SYNC | Z_END, c != 0);
4184 // c:814 — `*cmplx |= c;`
4185 *cmplx |= c;
4186 } else {
4187 // c:816 — `ecused--;`
4188 ECUSED.set((ECUSED.get() - 1).max(0));
4189 }
4190}
4191
4192/// Port of `par_save_list(C)` macro from `Src/parse.c:475-480`.
4193/// do { int eu = ecused; par_list(C); if (eu == ecused) ecadd(WCB_END()); } while (0)
4194pub fn par_save_list_wordcode(cmplx: &mut i32) {
4195 let eu = ECUSED.get();
4196 par_list_wordcode(cmplx);
4197 if ECUSED.get() == eu {
4198 ecadd(WCB_END());
4199 }
4200}
4201
4202/// Port of `par_save_list1(C)` macro from `Src/parse.c:481-486`.
4203pub fn par_save_list1_wordcode(cmplx: &mut i32) {
4204 let eu = ECUSED.get();
4205 par_list1_wordcode(cmplx);
4206 if ECUSED.get() == eu {
4207 ecadd(WCB_END());
4208 }
4209}
4210
4211/// Port of `par_sublist(int *cmplx)` from `Src/parse.c:823-865`.
4212/// `sublist : sublist2 [ ( DBAR | DAMPER ) { SEPER } sublist ]`.
4213/// Emits a WCB_SUBLIST header, recurses into par_sublist2 for
4214/// the !/coproc prefix + pipeline, then chains via DBAR (`||`)
4215/// or DAMPER (`&&`) recursively. Returns true if at least one
4216/// pipeline was emitted.
4217pub fn par_sublist_wordcode(cmplx: &mut i32) -> bool {
4218 // c:827 — `int f, p, c = 0;`
4219 let mut c: i32 = 0;
4220 // c:829 — `p = ecadd(0);`
4221 let p = ecadd(0);
4222 // c:831 — `if ((f = par_sublist2(&c)) != -1) { ... }`
4223 match par_sublist2(&mut c) {
4224 Some(f) => {
4225 // c:832 — `int e = ecused;`
4226 let e = ECUSED.get() as usize;
4227 // c:834 — `*cmplx |= c;`
4228 *cmplx |= c;
4229 if tok() == DBAR || tok() == DAMPER {
4230 // c:836 — `enum lextok qtok = tok;`
4231 let qtok = tok();
4232 // c:839 — `cmdpush(tok == DBAR ? CS_CMDOR : CS_CMDAND);`
4233 cmdpush(if qtok == DBAR {
4234 CS_CMDOR as u8
4235 } else {
4236 CS_CMDAND as u8
4237 });
4238 // c:840 — `zshlex();`
4239 zshlex();
4240 // c:841-842 — `while (tok == SEPER) zshlex();`
4241 while tok() == SEPER {
4242 zshlex();
4243 }
4244 // c:843 — `sl = par_sublist(cmplx);`
4245 let sl = par_sublist_wordcode(cmplx);
4246 // c:844-847 — `set_sublist_code(p, (sl ? ... : WC_SUBLIST_END),
4247 // f, (e - 1 - p), c);`
4248 let st = if sl {
4249 if qtok == DBAR {
4250 WC_SUBLIST_OR
4251 } else {
4252 WC_SUBLIST_AND
4253 }
4254 } else {
4255 WC_SUBLIST_END
4256 };
4257 set_sublist_code(p, st as i32, f, (e - 1 - p) as i32, c != 0);
4258 // c:848 — `cmdpop();`
4259 cmdpop();
4260 } else {
4261 // c:850-853 — `if (tok == AMPER || tok == AMPERBANG)
4262 // { c = 1; *cmplx |= c; }`
4263 if tok() == AMPER || tok() == AMPERBANG {
4264 c = 1;
4265 *cmplx |= c;
4266 }
4267 // c:854 — `set_sublist_code(p, WC_SUBLIST_END, f,
4268 // (e - 1 - p), c);`
4269 set_sublist_code(p, WC_SUBLIST_END as i32, f, (e - 1 - p) as i32, c != 0);
4270 }
4271 // c:856 — `return 1;`
4272 true
4273 }
4274 None => {
4275 // c:858-859 — `ecused--; return 0;`
4276 ECUSED.set((ECUSED.get() - 1).max(0));
4277 false
4278 }
4279 }
4280}
4281
4282/// Port of `par_pline(int *cmplx)` from `Src/parse.c:894-955`.
4283/// `pline : cmd [ ( BAR | BARAMP ) { SEPER } pline ]`. Emits a
4284/// WCB_PIPE header (mid for chain links, end for the last cmd)
4285/// plus the optional BARAMP `2>&1` synthetic redir.
4286/// Port of `par_pline(int *cmplx)` from `Src/parse.c:893-947`.
4287/// (Named `par_pipe_wordcode` to disambiguate from the AST
4288/// `par_pline` at parse.rs:3744 — semantically the same `pline`
4289/// production.)
4290pub fn par_pipe_wordcode(cmplx: &mut i32) -> bool {
4291 // c:897 — `zlong line = toklineno;`
4292 let line = toklineno() as i64;
4293 // c:899 — `p = ecadd(0);`
4294 let p = ecadd(0);
4295 // c:901-904 — `if (!par_cmd(cmplx, 0)) { ecused--; return 0; }`
4296 if !par_cmd_wordcode(cmplx, 0) {
4297 ECUSED.set((ECUSED.get() - 1).max(0));
4298 return false;
4299 }
4300 if tok() == BAR_TOK {
4301 // c:906 — `*cmplx = 1;`
4302 *cmplx = 1;
4303 // c:907 — `cmdpush(CS_PIPE);`
4304 cmdpush(CS_PIPE as u8);
4305 // c:908 — `zshlex();`
4306 zshlex();
4307 // c:909-910 — `while (tok == SEPER) zshlex();`
4308 while tok() == SEPER {
4309 zshlex();
4310 }
4311 // c:911 — `ecbuf[p] = WCB_PIPE(WC_PIPE_MID, line>=0 ? line+1 : 0);`
4312 ECBUF.with_borrow_mut(|b| {
4313 if p < b.len() {
4314 b[p] = WCB_PIPE(
4315 WC_PIPE_MID,
4316 if line >= 0 { (line + 1) as wordcode } else { 0 },
4317 );
4318 }
4319 });
4320 // c:912 — `ecispace(p+1, 1);`
4321 ecispace(p + 1, 1);
4322 // c:913 — `ecbuf[p+1] = ecused - 1 - p;`
4323 let used = ECUSED.get() as usize;
4324 ECBUF.with_borrow_mut(|b| {
4325 if p + 1 < b.len() {
4326 b[p + 1] = (used.saturating_sub(1 + p)) as wordcode;
4327 }
4328 });
4329 // c:914-916 — `if (!par_pline(cmplx)) { tok = LEXERR; }`
4330 if !par_pipe_wordcode(cmplx) {
4331 set_tok(LEXERR);
4332 }
4333 // c:917 — `cmdpop();`
4334 cmdpop();
4335 true
4336 } else if tok() == BARAMP {
4337 // c:920-923 — walk past inline WC_REDIR to find r.
4338 let mut r = p + 1;
4339 loop {
4340 let code = ECBUF.with_borrow(|b| b.get(r).copied().unwrap_or(0));
4341 if wc_code(code) != WC_REDIR {
4342 break;
4343 }
4344 r += WC_REDIR_WORDS(code) as usize;
4345 }
4346 // c:925-928 — `ecispace(r, 3);` + synthetic `2>&1` redir
4347 ecispace(r, 3);
4348 ECBUF.with_borrow_mut(|b| {
4349 if r + 2 < b.len() {
4350 b[r] = WCB_REDIR(REDIR_MERGEOUT as wordcode);
4351 b[r + 1] = 2;
4352 b[r + 2] = ecstrcode("1");
4353 }
4354 });
4355 // c:930 — `*cmplx = 1;`
4356 *cmplx = 1;
4357 cmdpush(CS_ERRPIPE as u8);
4358 zshlex();
4359 while tok() == SEPER {
4360 zshlex();
4361 }
4362 ECBUF.with_borrow_mut(|b| {
4363 if p < b.len() {
4364 b[p] = WCB_PIPE(
4365 WC_PIPE_MID,
4366 if line >= 0 { (line + 1) as wordcode } else { 0 },
4367 );
4368 }
4369 });
4370 ecispace(p + 1, 1);
4371 let used = ECUSED.get() as usize;
4372 ECBUF.with_borrow_mut(|b| {
4373 if p + 1 < b.len() {
4374 b[p + 1] = (used.saturating_sub(1 + p)) as wordcode;
4375 }
4376 });
4377 if !par_pipe_wordcode(cmplx) {
4378 set_tok(LEXERR);
4379 }
4380 cmdpop();
4381 true
4382 } else {
4383 // c:944 — `ecbuf[p] = WCB_PIPE(WC_PIPE_END, line>=0 ? line+1 : 0);`
4384 ECBUF.with_borrow_mut(|b| {
4385 if p < b.len() {
4386 b[p] = WCB_PIPE(
4387 WC_PIPE_END,
4388 if line >= 0 { (line + 1) as wordcode } else { 0 },
4389 );
4390 }
4391 });
4392 true
4393 }
4394}
4395
4396/// Port of `par_cmd(int *cmplx, int zsh_construct)` from
4397/// `Src/parse.c:958-1085`. Parses leading + trailing redirs and
4398/// dispatches on the current token to the right par_* builder.
4399/// Returns false only when no command was emitted (no redirs +
4400/// par_simple returned 0).
4401/// Port of `par_cmd(int *cmplx, int zsh_construct)` from
4402/// `Src/parse.c:957-1077`.
4403pub fn par_cmd_wordcode(cmplx: &mut i32, zsh_construct: i32) -> bool {
4404 // c:960 — `int r, nr = 0;`
4405 let mut nr: i32 = 0;
4406 // c:962 — `r = ecused;`
4407 let mut r: usize = ECUSED.get() as usize;
4408 // c:964-968 — leading redirs.
4409 if IS_REDIROP(tok()) {
4410 // c:965 — `*cmplx = 1;`
4411 *cmplx = 1;
4412 // c:966-968 — `while (IS_REDIROP(tok)) { nr += par_redir(&r, NULL); }`
4413 while IS_REDIROP(tok()) {
4414 nr += par_redir_wordcode(&mut r);
4415 }
4416 }
4417 // c:970-1066 — token-dispatch switch.
4418 match tok() {
4419 FOR => {
4420 cmdpush(CS_FOR as u8);
4421 par_for_wordcode(cmplx);
4422 cmdpop();
4423 }
4424 FOREACH => {
4425 cmdpush(CS_FOREACH as u8);
4426 par_for_wordcode(cmplx);
4427 cmdpop();
4428 }
4429 SELECT => {
4430 // c:982 — `*cmplx = 1;`
4431 *cmplx = 1;
4432 cmdpush(CS_SELECT as u8);
4433 par_for_wordcode(cmplx);
4434 cmdpop();
4435 }
4436 CASE => {
4437 cmdpush(CS_CASE as u8);
4438 par_case_wordcode(cmplx);
4439 cmdpop();
4440 }
4441 IF => {
4442 par_if_wordcode(cmplx);
4443 }
4444 WHILE => {
4445 cmdpush(CS_WHILE as u8);
4446 par_while_wordcode(cmplx);
4447 cmdpop();
4448 }
4449 UNTIL => {
4450 cmdpush(CS_UNTIL as u8);
4451 par_while_wordcode(cmplx);
4452 cmdpop();
4453 }
4454 REPEAT => {
4455 cmdpush(CS_REPEAT as u8);
4456 par_repeat_wordcode(cmplx);
4457 cmdpop();
4458 }
4459 INPAR_TOK => {
4460 // c:1011 — `*cmplx = 1;`
4461 *cmplx = 1;
4462 cmdpush(CS_SUBSH as u8);
4463 par_subsh_wordcode_impl(cmplx, zsh_construct);
4464 cmdpop();
4465 }
4466 INBRACE_TOK => {
4467 cmdpush(CS_CURSH as u8);
4468 par_subsh_wordcode_impl(cmplx, zsh_construct);
4469 cmdpop();
4470 }
4471 FUNC => {
4472 cmdpush(CS_FUNCDEF as u8);
4473 par_funcdef_wordcode(cmplx);
4474 cmdpop();
4475 }
4476 DINBRACK => {
4477 cmdpush(CS_COND as u8);
4478 par_cond_wordcode();
4479 cmdpop();
4480 }
4481 DINPAR => {
4482 par_arith_wordcode();
4483 }
4484 TIME => {
4485 // c:1037-1050 — `static int inpartime` guard so
4486 // `time time foo` doesn't recurse infinitely.
4487 if !PARSER_INPARTIME.with(|c| c.get()) {
4488 // c:1041 — `*cmplx = 1;`
4489 *cmplx = 1;
4490 PARSER_INPARTIME.with(|c| c.set(true));
4491 par_time_wordcode();
4492 PARSER_INPARTIME.with(|c| c.set(false));
4493 } else {
4494 set_tok(STRING_LEX);
4495 let sr = par_simple_wordcode_impl(cmplx, nr);
4496 if sr == 0 && nr == 0 {
4497 return false;
4498 }
4499 if sr > 1 {
4500 *cmplx = 1;
4501 r += (sr - 1) as usize;
4502 }
4503 }
4504 }
4505 _ => {
4506 // c:1054 — `if (!(sr = par_simple(cmplx, nr)))`
4507 let sr = par_simple_wordcode_impl(cmplx, nr);
4508 if sr == 0 {
4509 if nr == 0 {
4510 return false;
4511 }
4512 } else if sr > 1 {
4513 // c:1060-1061 — `*cmplx = 1; r += sr - 1;`
4514 *cmplx = 1;
4515 r += (sr - 1) as usize;
4516 }
4517 }
4518 }
4519 // c:1067-1071 — trailing redirs.
4520 // c:1067 — `if (IS_REDIROP(tok)) { *cmplx = 1; while (...) (void)par_redir(&r, NULL); }`
4521 if IS_REDIROP(tok()) {
4522 *cmplx = 1;
4523 while IS_REDIROP(tok()) {
4524 let _ = par_redir_wordcode(&mut r);
4525 }
4526 }
4527 // c:1072-1075 — `incmdpos=1; incasepat=0; incond=0; intypeset=0;`
4528 set_incmdpos(true);
4529 set_incasepat(0);
4530 set_incond(0);
4531 set_intypeset(false);
4532 let _ = r;
4533 // c:1076 — `return 1;`
4534 true
4535}
4536
4537/// Port of `par_for(int *cmplx)` from `Src/parse.c:1086-1198`.
4538pub fn par_for_wordcode(cmplx: &mut i32) {
4539 // c:1089 — `int oecused = ecused, csh = (tok == FOREACH), p, sel = (tok == SELECT);`
4540 let _oecused = ECUSED.get() as usize;
4541 let csh = tok() == FOREACH;
4542 let sel = tok() == SELECT;
4543 let p: usize;
4544 // c:1090 — `int type;`
4545 let r#type: wordcode;
4546
4547 // c:1092 — `p = ecadd(0);`
4548 p = ecadd(0);
4549
4550 // c:1094 — `incmdpos = 0;`
4551 set_incmdpos(false);
4552 // c:1095 — `infor = tok == FOR ? 2 : 0;`
4553 set_infor(if tok() == FOR { 2 } else { 0 });
4554 // c:1096 — `zshlex();`
4555 zshlex();
4556 // c:1097 — `if (tok == DINPAR) {`
4557 if tok() == DINPAR {
4558 // c:1098 — `zshlex();`
4559 zshlex();
4560 // c:1099-1100 — `if (tok != DINPAR) YYERRORV(oecused);`
4561 if tok() != DINPAR {
4562 crate::ported::utils::zerr("par_for: expected init");
4563 return;
4564 }
4565 // c:1101 — `ecstr(tokstr);`
4566 ecstr(&tokstr().unwrap_or_default());
4567 // c:1102 — `zshlex();`
4568 zshlex();
4569 // c:1103-1104
4570 if tok() != DINPAR {
4571 crate::ported::utils::zerr("par_for: expected cond");
4572 return;
4573 }
4574 // c:1105
4575 ecstr(&tokstr().unwrap_or_default());
4576 // c:1106
4577 zshlex();
4578 // c:1107-1108
4579 if tok() != DOUTPAR {
4580 crate::ported::utils::zerr("par_for: expected ))");
4581 return;
4582 }
4583 // c:1109
4584 ecstr(&tokstr().unwrap_or_default());
4585 // c:1110 — `infor = 0;`
4586 set_infor(0);
4587 // c:1111 — `incmdpos = 1;`
4588 set_incmdpos(true);
4589 // c:1112 — `zshlex();`
4590 zshlex();
4591 // c:1113 — `type = WC_FOR_COND;`
4592 r#type = WC_FOR_COND;
4593 } else {
4594 // c:1115 — `int np = 0, n, posix_in, ona = noaliases, onc = nocorrect;`
4595 let mut np: usize = 0;
4596 let mut n: u32;
4597 let posix_in: bool;
4598 let ona = noaliases();
4599 let onc = nocorrect();
4600 // c:1116 — `infor = 0;`
4601 set_infor(0);
4602 // c:1117-1118 — `if (tok != STRING || !isident(tokstr)) YYERRORV(oecused);`
4603 if tok() != STRING_LEX
4604 || !crate::ported::utils::isident(&tokstr().unwrap_or_default())
4605 {
4606 crate::ported::utils::zerr("par_for: expected identifier");
4607 return;
4608 }
4609 // c:1119-1120 — `if (!sel) np = ecadd(0);`
4610 if !sel {
4611 np = ecadd(0);
4612 }
4613 // c:1121 — `n = 0;`
4614 n = 0;
4615 // c:1122 — `incmdpos = 1;`
4616 set_incmdpos(true);
4617 // c:1123 — `noaliases = nocorrect = 1;`
4618 set_noaliases(true);
4619 set_nocorrect(1);
4620 // c:1124 — `for (;;) {`
4621 loop {
4622 // c:1125 — `n++;`
4623 n += 1;
4624 // c:1126 — `ecstr(tokstr);`
4625 ecstr(&tokstr().unwrap_or_default());
4626 // c:1127 — `zshlex();`
4627 zshlex();
4628 // c:1128-1129 — `if (tok != STRING || !strcmp(tokstr, "in") || sel) break;`
4629 if tok() != STRING_LEX
4630 || tokstr().as_deref() == Some("in")
4631 || sel
4632 {
4633 break;
4634 }
4635 // c:1130-1135 — `if (!isident(tokstr) || errflag) { ... YYERRORV; }`
4636 if !crate::ported::utils::isident(&tokstr().unwrap_or_default())
4637 || (crate::ported::utils::errflag.load(std::sync::atomic::Ordering::Relaxed) & 1) != 0
4638 {
4639 set_noaliases(ona);
4640 set_nocorrect(onc);
4641 crate::ported::utils::zerr("par_for: expected identifier in name list");
4642 return;
4643 }
4644 }
4645 // c:1137-1138 — `noaliases = ona; nocorrect = onc;`
4646 set_noaliases(ona);
4647 set_nocorrect(onc);
4648 // c:1139-1140 — `if (!sel) ecbuf[np] = n;`
4649 if !sel {
4650 ECBUF.with_borrow_mut(|b| {
4651 b[np] = n;
4652 });
4653 }
4654 // c:1141 — `posix_in = isnewlin;`
4655 posix_in = isnewlin() != 0;
4656 // c:1142-1143 — `while (isnewlin) zshlex();`
4657 while isnewlin() != 0 {
4658 zshlex();
4659 }
4660 // c:1144 — `if (tok == STRING && !strcmp(tokstr, "in")) {`
4661 if tok() == STRING_LEX && tokstr().as_deref() == Some("in") {
4662 // c:1145 — `incmdpos = 0;`
4663 set_incmdpos(false);
4664 // c:1146 — `zshlex();`
4665 zshlex();
4666 // c:1147 — `np = ecadd(0);`
4667 np = ecadd(0);
4668 // c:1148 — `n = par_wordlist();`
4669 let n2 = par_wordlist_wordcode();
4670 // c:1149-1150 — `if (tok != SEPER) YYERRORV(oecused);`
4671 if tok() != SEPER {
4672 crate::ported::utils::zerr("par_for: expected separator after `in`");
4673 return;
4674 }
4675 // c:1151 — `ecbuf[np] = n;`
4676 ECBUF.with_borrow_mut(|b| {
4677 b[np] = n2 as wordcode;
4678 });
4679 // c:1152 — `type = (sel ? WC_SELECT_LIST : WC_FOR_LIST);`
4680 r#type = if sel { WC_SELECT_LIST } else { WC_FOR_LIST };
4681 } else if !posix_in && tok() == INPAR_TOK {
4682 // c:1153-1154 — `else if (!posix_in && tok == INPAR)`
4683 // c:1154 — `incmdpos = 0;`
4684 set_incmdpos(false);
4685 // c:1155 — `zshlex();`
4686 zshlex();
4687 // c:1156 — `np = ecadd(0);`
4688 np = ecadd(0);
4689 // c:1157 — `n = par_nl_wordlist();`
4690 let n2 = par_nl_wordlist_wordcode();
4691 // c:1158-1159 — `if (tok != OUTPAR) YYERRORV(oecused);`
4692 if tok() != OUTPAR_TOK {
4693 crate::ported::utils::zerr("par_for: expected `)`");
4694 return;
4695 }
4696 // c:1160 — `ecbuf[np] = n;`
4697 ECBUF.with_borrow_mut(|b| {
4698 b[np] = n2 as wordcode;
4699 });
4700 // c:1161 — `incmdpos = 1;`
4701 set_incmdpos(true);
4702 // c:1162 — `zshlex();`
4703 zshlex();
4704 // c:1163 — `type = (sel ? WC_SELECT_LIST : WC_FOR_LIST);`
4705 r#type = if sel { WC_SELECT_LIST } else { WC_FOR_LIST };
4706 } else {
4707 // c:1165 — `type = (sel ? WC_SELECT_PPARAM : WC_FOR_PPARAM);`
4708 r#type = if sel { WC_SELECT_PPARAM } else { WC_FOR_PPARAM };
4709 }
4710 let _ = np;
4711 }
4712 // c:1167 — `incmdpos = 1;`
4713 set_incmdpos(true);
4714 // c:1168-1169 — `while (tok == SEPER) zshlex();`
4715 while tok() == SEPER {
4716 zshlex();
4717 }
4718 // c:1170-1193 — body dispatch (inline in C, factored here for
4719 // reuse by par_while/par_repeat — same control flow, same calls).
4720 par_loop_body_wordcode(cmplx, csh);
4721 // c:1195-1197 — `ecbuf[p] = (sel ? WCB_SELECT(...) : WCB_FOR(...));`
4722 let used = ECUSED.get() as usize;
4723 let off = used.saturating_sub(1 + p) as wordcode;
4724 ECBUF.with_borrow_mut(|b| {
4725 b[p] = if sel {
4726 WCB_SELECT(r#type, off)
4727 } else {
4728 WCB_FOR(r#type, off)
4729 };
4730 });
4731}
4732
4733/// Port of `par_wordlist(void)` from `Src/parse.c:2361-2371` —
4734/// emits wordcode form. Returns the number of strings emitted.
4735fn par_wordlist_wordcode() -> u32 {
4736 // c:2364 — `int num = 0;`
4737 let mut num: u32 = 0;
4738 // c:2365 — `while (tok == STRING) {`
4739 while tok() == STRING_LEX {
4740 // c:2366 — `ecstr(tokstr);`
4741 ecstr(&tokstr().unwrap_or_default());
4742 // c:2367 — `num++;`
4743 num += 1;
4744 // c:2368 — `zshlex();`
4745 zshlex();
4746 }
4747 // c:2370 — `return num;`
4748 num
4749}
4750
4751/// Port of `par_nl_wordlist(void)` from `Src/parse.c:2378-2390` —
4752/// emits wordcode form. Like par_wordlist but tolerates SEPER
4753/// between words.
4754fn par_nl_wordlist_wordcode() -> u32 {
4755 // c:2381 — `int num = 0;`
4756 let mut num: u32 = 0;
4757 // c:2383 — `while (tok == STRING || tok == SEPER) {`
4758 while tok() == STRING_LEX || tok() == SEPER || tok() == NEWLIN {
4759 // c:2384-2387 — `if (tok != SEPER) { ecstr(tokstr); num++; }`
4760 if tok() == STRING_LEX {
4761 ecstr(&tokstr().unwrap_or_default());
4762 num += 1;
4763 }
4764 // c:2388 — `zshlex();`
4765 zshlex();
4766 }
4767 // c:2390 — `return num;`
4768 num
4769}
4770
4771/// Body dispatch shared by par_for / par_while / par_repeat.
4772/// Direct port of `Src/parse.c:1170-1194`.
4773fn par_loop_body_wordcode(cmplx: &mut i32, csh: bool) {
4774 if tok() == DOLOOP {
4775 zshlex();
4776 // c:1172 — `par_save_list(cmplx);`
4777 par_save_list_wordcode(cmplx);
4778 if tok() != DONE {
4779 crate::ported::utils::zerr("missing `done`");
4780 return;
4781 }
4782 set_incmdpos(false);
4783 zshlex();
4784 } else if tok() == INBRACE_TOK {
4785 zshlex();
4786 // c:1179 — `par_save_list(cmplx);`
4787 par_save_list_wordcode(cmplx);
4788 if tok() != OUTBRACE_TOK {
4789 crate::ported::utils::zerr("missing `}`");
4790 return;
4791 }
4792 set_incmdpos(false);
4793 zshlex();
4794 } else if csh || isset(CSHJUNKIELOOPS) {
4795 // c:1185 — `par_save_list(cmplx);`
4796 par_save_list_wordcode(cmplx);
4797 if tok() != ZEND {
4798 crate::ported::utils::zerr("missing `end`");
4799 return;
4800 }
4801 set_incmdpos(false);
4802 zshlex();
4803 } else if unset(SHORTLOOPS) {
4804 crate::ported::utils::zerr("short loop form requires SHORTLOOPS");
4805 } else {
4806 // c:1193 — `par_save_list1(cmplx);`
4807 par_save_list1_wordcode(cmplx);
4808 }
4809}
4810
4811/// `select` shares par_for body (c:983-985 routes SELECT to par_for).
4812pub fn par_select_wordcode(cmplx: &mut i32) {
4813 par_for_wordcode(cmplx);
4814}
4815
4816/// Port of `par_case(int *cmplx)` from `Src/parse.c:1208-1400`.
4817pub fn par_case_wordcode(_cmplx: &mut i32) {
4818 // c:1211 — `int oecused = ecused, brflag, p, pp, palts, type, nalts;`
4819 let _oecused = ECUSED.get() as usize;
4820 let brflag: bool;
4821 let p: usize;
4822 let mut pp: usize;
4823 let mut palts: usize;
4824 let mut r#type: wordcode;
4825 let mut nalts: u32;
4826 // c:1212 — `int ona, onc;`
4827 let ona: bool;
4828 let onc: i32;
4829
4830 // c:1214 — `p = ecadd(0);`
4831 p = ecadd(0);
4832
4833 // c:1216 — `incmdpos = 0;`
4834 set_incmdpos(false);
4835 // c:1217 — `zshlex();`
4836 zshlex();
4837 // c:1218-1219 — `if (tok != STRING) YYERRORV(oecused);`
4838 if tok() != STRING_LEX {
4839 crate::ported::utils::zerr("par_case: expected scrutinee");
4840 return;
4841 }
4842 // c:1220 — `ecstr(tokstr);`
4843 ecstr(&tokstr().unwrap_or_default());
4844
4845 // c:1222 — `incmdpos = 1;`
4846 set_incmdpos(true);
4847 // c:1223-1224 — `ona = noaliases; onc = nocorrect;`
4848 ona = noaliases();
4849 onc = nocorrect();
4850 // c:1225 — `noaliases = nocorrect = 1;`
4851 set_noaliases(true);
4852 set_nocorrect(1);
4853 // c:1226 — `zshlex();`
4854 zshlex();
4855 // c:1227-1228 — `while (tok == SEPER) zshlex();`
4856 while tok() == SEPER {
4857 zshlex();
4858 }
4859 // c:1229 — `if (!(tok == STRING && !strcmp(tokstr, "in")) && tok != INBRACE)`
4860 if !(tok() == STRING_LEX && tokstr().as_deref() == Some("in")) && tok() != INBRACE_TOK {
4861 // c:1231-1233 — restore noaliases/nocorrect + ERROR
4862 set_noaliases(ona);
4863 set_nocorrect(onc);
4864 crate::ported::utils::zerr("par_case: expected `in` or `{`");
4865 return;
4866 }
4867 // c:1235 — `brflag = (tok == INBRACE);`
4868 brflag = tok() == INBRACE_TOK;
4869 // c:1236 — `incasepat = 1;`
4870 set_incasepat(1);
4871 // c:1237 — `incmdpos = 0;`
4872 set_incmdpos(false);
4873 // c:1238-1239 — `noaliases = ona; nocorrect = onc;`
4874 set_noaliases(ona);
4875 set_nocorrect(onc);
4876 // c:1240 — `zshlex();`
4877 zshlex();
4878
4879 // c:1242 — `for (;;) {`
4880 'arms: loop {
4881 // c:1243 — `char *str;`
4882 let mut str: String;
4883 // c:1244 — `int skip_zshlex;`
4884 let skip_zshlex: bool;
4885
4886 // c:1246-1247 — `while (tok == SEPER) zshlex();`
4887 while tok() == SEPER {
4888 zshlex();
4889 }
4890 // c:1248-1249 — `if (tok == OUTBRACE) break;`
4891 if tok() == OUTBRACE_TOK {
4892 break 'arms;
4893 }
4894 // c:1250-1251 — `if (tok == INPAR) zshlex();`
4895 if tok() == INPAR_TOK {
4896 zshlex();
4897 }
4898 // c:1252-1254 — `if (tok == BAR) { str = ""; skip_zshlex = 1; }`
4899 if tok() == BAR_TOK {
4900 str = String::new();
4901 skip_zshlex = true;
4902 } else {
4903 // c:1256-1257 — `if (tok != STRING) YYERRORV(oecused);`
4904 if tok() != STRING_LEX {
4905 crate::ported::utils::zerr("par_case: expected pattern");
4906 return;
4907 }
4908 // c:1258-1259 — `if (!strcmp(tokstr, "esac")) break;`
4909 if tokstr().as_deref() == Some("esac") {
4910 break 'arms;
4911 }
4912 // c:1260 — `str = dupstring(tokstr);`
4913 str = tokstr().unwrap_or_default();
4914 // c:1261 — `skip_zshlex = 0;`
4915 skip_zshlex = false;
4916 }
4917 // c:1263 — `type = WC_CASE_OR;`
4918 r#type = WC_CASE_OR;
4919 // c:1264-1266 — `pp = ecadd(0); palts = ecadd(0); nalts = 0;`
4920 pp = ecadd(0);
4921 palts = ecadd(0);
4922 nalts = 0;
4923 // c:1300 — `incasepat = -1;`
4924 set_incasepat(-1);
4925 // c:1301 — `incmdpos = 1;`
4926 set_incmdpos(true);
4927 // c:1302-1303 — `if (!skip_zshlex) zshlex();`
4928 if !skip_zshlex {
4929 zshlex();
4930 }
4931 // c:1304 — `for (;;) {`
4932 loop {
4933 // c:1305-1313 — `if (tok == OUTPAR) { ecstr(str);
4934 // ecadd(ecnpats++); nalts++; incasepat = 0;
4935 // incmdpos = 1; zshlex(); break; }`
4936 if tok() == OUTPAR_TOK {
4937 ecstr(&str);
4938 let np = ECNPATS.with(|cc| {
4939 let v = cc.get();
4940 cc.set(v + 1);
4941 v
4942 }) as u32;
4943 ecadd(np);
4944 nalts += 1;
4945 set_incasepat(0);
4946 set_incmdpos(true);
4947 zshlex();
4948 break;
4949 }
4950 // c:1314-1320 — `else if (tok == BAR) { ecstr(str);
4951 // ecadd(ecnpats++); nalts++; incasepat = 1;
4952 // incmdpos = 0; }`
4953 else if tok() == BAR_TOK {
4954 ecstr(&str);
4955 let np = ECNPATS.with(|cc| {
4956 let v = cc.get();
4957 cc.set(v + 1);
4958 v
4959 }) as u32;
4960 ecadd(np);
4961 nalts += 1;
4962 set_incasepat(1);
4963 set_incmdpos(false);
4964 }
4965 // c:1321-1357 — else { ... `(...)` whole-pattern hack
4966 // (Inpar at str[0]); else YYERRORV. Not yet ported —
4967 // err out on unexpected. }
4968 else {
4969 crate::ported::utils::zerr("par_case: expected `)` or `|`");
4970 return;
4971 }
4972
4973 // c:1359 — `zshlex();`
4974 zshlex();
4975 // c:1360-1377 — switch on next tok.
4976 match tok() {
4977 STRING_LEX => {
4978 // c:1361-1365
4979 str = tokstr().unwrap_or_default();
4980 zshlex();
4981 }
4982 OUTPAR_TOK | BAR_TOK => {
4983 // c:1367-1371 — empty string
4984 str = String::new();
4985 }
4986 _ => {
4987 // c:1374-1376 — `YYERRORV(oecused);`
4988 crate::ported::utils::zerr("par_case: expected pattern, `)` or `|`");
4989 return;
4990 }
4991 }
4992 }
4993 // c:1379 — `incasepat = 0;`
4994 set_incasepat(0);
4995 // c:1380 — `par_save_list(cmplx);`
4996 par_save_list_wordcode(_cmplx);
4997 // c:1381-1384 — terminator → arm type
4998 if tok() == SEMIAMP {
4999 r#type = WC_CASE_AND;
5000 } else if tok() == SEMIBAR {
5001 r#type = WC_CASE_TESTAND;
5002 }
5003 // c:1385 — `ecbuf[pp] = WCB_CASE(type, ecused - 1 - pp);`
5004 let used = ECUSED.get() as usize;
5005 ECBUF.with_borrow_mut(|b| {
5006 b[pp] = WCB_CASE(r#type, (used.saturating_sub(1 + pp)) as wordcode);
5007 });
5008 // c:1386 — `ecbuf[palts] = nalts;`
5009 ECBUF.with_borrow_mut(|b| {
5010 b[palts] = nalts;
5011 });
5012 // c:1387-1388 — terminator (ESAC w/o brace OR OUTBRACE w/ brace) → break
5013 if (tok() == ESAC && !brflag) || (tok() == OUTBRACE_TOK && brflag) {
5014 break 'arms;
5015 }
5016 // c:1389-1390 — `if (tok != DSEMI && tok != SEMIAMP && tok != SEMIBAR) YYERRORV;`
5017 if tok() != DSEMI && tok() != SEMIAMP && tok() != SEMIBAR {
5018 crate::ported::utils::zerr("par_case: expected `;;`, `;&`, or `;|`");
5019 return;
5020 }
5021 // c:1391 — `incasepat = 1;`
5022 set_incasepat(1);
5023 // c:1392 — `incmdpos = 0;`
5024 set_incmdpos(false);
5025 // c:1393 — `zshlex();`
5026 zshlex();
5027 }
5028 // c:1395 — `incmdpos = 1;`
5029 set_incmdpos(true);
5030 // c:1396 — `incasepat = 0;`
5031 set_incasepat(0);
5032 // c:1397 — `zshlex();`
5033 zshlex();
5034
5035 // c:1399 — `ecbuf[p] = WCB_CASE(WC_CASE_HEAD, ecused - 1 - p);`
5036 let used = ECUSED.get() as usize;
5037 ECBUF.with_borrow_mut(|b| {
5038 b[p] = WCB_CASE(WC_CASE_HEAD, (used.saturating_sub(1 + p)) as wordcode);
5039 });
5040}
5041
5042/// Port of `par_if(int *cmplx)` from `Src/parse.c:1410-1512`.
5043pub fn par_if_wordcode(cmplx: &mut i32) {
5044 // c:1413 — `int oecused = ecused, p, pp, type, usebrace = 0;`
5045 let _oecused = ECUSED.get() as usize;
5046 let p: usize;
5047 let mut pp: usize = 0;
5048 let mut r#type: wordcode = WC_IF_IF;
5049 let mut usebrace: i32 = 0;
5050 // c:1414 — `enum lextok xtok;`
5051 let mut xtok: lextok;
5052 // c:1415 — `unsigned char nc;`
5053 let nc: u8;
5054 let _ = nc;
5055
5056 // c:1417 — `p = ecadd(0);`
5057 p = ecadd(0);
5058
5059 // c:1419 — `for (;;) {`
5060 loop {
5061 // c:1420 — `xtok = tok;`
5062 xtok = tok();
5063 // c:1421 — `cmdpush(xtok == IF ? CS_IF : CS_ELIF);`
5064 cmdpush(if xtok == IF { CS_IF as u8 } else { CS_ELIF as u8 });
5065 // c:1422-1426 — `if (xtok == FI) { incmdpos = 0; zshlex(); break; }`
5066 if xtok == FI {
5067 set_incmdpos(false);
5068 zshlex();
5069 break;
5070 }
5071 // c:1427 — `zshlex();`
5072 zshlex();
5073 // c:1428-1429 — `if (xtok == ELSE) break;`
5074 if xtok == ELSE {
5075 break;
5076 }
5077 // c:1430-1431 — `while (tok == SEPER) zshlex();`
5078 while tok() == SEPER {
5079 zshlex();
5080 }
5081 // c:1432-1435 — `if (!(xtok == IF || xtok == ELIF)) { cmdpop(); YYERRORV; }`
5082 if !(xtok == IF || xtok == ELIF) {
5083 cmdpop();
5084 crate::ported::utils::zerr("par_if: expected `if` or `elif`");
5085 return;
5086 }
5087 // c:1436 — `pp = ecadd(0);`
5088 pp = ecadd(0);
5089 // c:1437 — `type = (xtok == IF ? WC_IF_IF : WC_IF_ELIF);`
5090 r#type = if xtok == IF { WC_IF_IF } else { WC_IF_ELIF };
5091 // c:1438 — `par_save_list(cmplx);` — condition body
5092 par_save_list_wordcode(cmplx);
5093 // c:1439 — `incmdpos = 1;`
5094 set_incmdpos(true);
5095 // c:1440-1443 — `if (tok == ENDINPUT) { cmdpop(); YYERRORV; }`
5096 if tok() == ENDINPUT {
5097 cmdpop();
5098 crate::ported::utils::zerr("par_if: unexpected end-of-input after condition");
5099 return;
5100 }
5101 // c:1444-1445 — `while (tok == SEPER) zshlex();`
5102 while tok() == SEPER {
5103 zshlex();
5104 }
5105 // c:1446 — `xtok = FI;` — pre-set so the post-loop check works
5106 xtok = FI;
5107 // c:1447 — `nc = cmdstack[cmdsp - 1] == CS_IF ? CS_IFTHEN : CS_ELIFTHEN;`
5108 // (Not tracked separately in zshrs cmdstack — derive from cur top
5109 // by reading CMDSTACK; for safety use CS_IFTHEN as default.)
5110 // We don't have a way to read top easily — match by tracking
5111 // whether we just pushed CS_IF or CS_ELIF.
5112 // For wordcode emission this only affects cmdstack debug output;
5113 // not the emitted wordcode. Use CS_IFTHEN.
5114 let nc_local: u8 = CS_IFTHEN as u8;
5115 if tok() == THEN {
5116 // c:1448-1456 — THEN branch
5117 // c:1449 — `usebrace = 0;`
5118 usebrace = 0;
5119 // c:1450 — `cmdpop();`
5120 cmdpop();
5121 // c:1451 — `cmdpush(nc);`
5122 cmdpush(nc_local);
5123 // c:1452 — `zshlex();`
5124 zshlex();
5125 // c:1453 — `par_save_list(cmplx);` — then body
5126 par_save_list_wordcode(cmplx);
5127 // c:1454 — `ecbuf[pp] = WCB_IF(type, ecused - 1 - pp);`
5128 let used = ECUSED.get() as usize;
5129 ECBUF.with_borrow_mut(|b| {
5130 b[pp] = WCB_IF(r#type, (used.saturating_sub(1 + pp)) as wordcode);
5131 });
5132 // c:1455 — `incmdpos = 1;`
5133 set_incmdpos(true);
5134 // c:1456 — `cmdpop();`
5135 cmdpop();
5136 } else if tok() == INBRACE_TOK {
5137 // c:1457-1473 — INBRACE branch
5138 // c:1458 — `usebrace = 1;`
5139 usebrace = 1;
5140 // c:1459 — `cmdpop();`
5141 cmdpop();
5142 // c:1460 — `cmdpush(nc);`
5143 cmdpush(nc_local);
5144 // c:1461 — `zshlex();`
5145 zshlex();
5146 // c:1462 — `par_save_list(cmplx);`
5147 par_save_list_wordcode(cmplx);
5148 // c:1463-1466 — `if (tok != OUTBRACE) { cmdpop(); YYERRORV; }`
5149 if tok() != OUTBRACE_TOK {
5150 cmdpop();
5151 crate::ported::utils::zerr("par_if: expected `}`");
5152 return;
5153 }
5154 // c:1467 — `ecbuf[pp] = WCB_IF(type, ecused - 1 - pp);`
5155 let used = ECUSED.get() as usize;
5156 ECBUF.with_borrow_mut(|b| {
5157 b[pp] = WCB_IF(r#type, (used.saturating_sub(1 + pp)) as wordcode);
5158 });
5159 // c:1469 — `zshlex();`
5160 zshlex();
5161 // c:1470 — `incmdpos = 1;`
5162 set_incmdpos(true);
5163 // c:1471-1472 — `if (tok == SEPER) break;`
5164 if tok() == SEPER {
5165 break;
5166 }
5167 // c:1473 — `cmdpop();`
5168 cmdpop();
5169 } else if unset(SHORTLOOPS) {
5170 // c:1474-1476 — `cmdpop(); YYERRORV;`
5171 cmdpop();
5172 crate::ported::utils::zerr("par_if: short body requires SHORTLOOPS");
5173 return;
5174 } else {
5175 // c:1477-1484 — short loop form
5176 // c:1478 — `cmdpop();`
5177 cmdpop();
5178 // c:1479 — `cmdpush(nc);`
5179 cmdpush(nc_local);
5180 // c:1480 — `par_save_list1(cmplx);`
5181 par_save_list1_wordcode(cmplx);
5182 // c:1481 — `ecbuf[pp] = WCB_IF(type, ecused - 1 - pp);`
5183 let used = ECUSED.get() as usize;
5184 ECBUF.with_borrow_mut(|b| {
5185 b[pp] = WCB_IF(r#type, (used.saturating_sub(1 + pp)) as wordcode);
5186 });
5187 // c:1482 — `incmdpos = 1;`
5188 set_incmdpos(true);
5189 // c:1483 — `break;`
5190 break;
5191 }
5192 }
5193 // c:1486 — `cmdpop();`
5194 cmdpop();
5195 // c:1487 — `if (xtok == ELSE || tok == ELSE) {`
5196 if xtok == ELSE || tok() == ELSE {
5197 // c:1488 — `pp = ecadd(0);`
5198 pp = ecadd(0);
5199 // c:1489 — `cmdpush(CS_ELSE);`
5200 cmdpush(CS_ELSE as u8);
5201 // c:1490-1491 — `while (tok == SEPER) zshlex();`
5202 while tok() == SEPER {
5203 zshlex();
5204 }
5205 // c:1492-1498 — `if (tok == INBRACE && usebrace) { ... } else { ... }`
5206 if tok() == INBRACE_TOK && usebrace != 0 {
5207 // c:1493 — `zshlex();`
5208 zshlex();
5209 // c:1494 — `par_save_list(cmplx);`
5210 par_save_list_wordcode(cmplx);
5211 // c:1495-1498 — `if (tok != OUTBRACE) { cmdpop(); YYERRORV; }`
5212 if tok() != OUTBRACE_TOK {
5213 cmdpop();
5214 crate::ported::utils::zerr("par_if: else expected `}`");
5215 return;
5216 }
5217 } else {
5218 // c:1500 — `par_save_list(cmplx);`
5219 par_save_list_wordcode(cmplx);
5220 // c:1501-1504 — `if (tok != FI) { cmdpop(); YYERRORV; }`
5221 if tok() != FI {
5222 cmdpop();
5223 crate::ported::utils::zerr("par_if: else expected `fi`");
5224 return;
5225 }
5226 }
5227 // c:1506 — `incmdpos = 0;`
5228 set_incmdpos(false);
5229 // c:1507 — `ecbuf[pp] = WCB_IF(WC_IF_ELSE, ecused - 1 - pp);`
5230 let used = ECUSED.get() as usize;
5231 ECBUF.with_borrow_mut(|b| {
5232 b[pp] = WCB_IF(WC_IF_ELSE, (used.saturating_sub(1 + pp)) as wordcode);
5233 });
5234 // c:1508 — `zshlex();`
5235 zshlex();
5236 // c:1509 — `cmdpop();`
5237 cmdpop();
5238 }
5239 // c:1511 — `ecbuf[p] = WCB_IF(WC_IF_HEAD, ecused - 1 - p);`
5240 let used = ECUSED.get() as usize;
5241 ECBUF.with_borrow_mut(|b| {
5242 b[p] = WCB_IF(WC_IF_HEAD, (used.saturating_sub(1 + p)) as wordcode);
5243 });
5244}
5245
5246/// Port of `par_while(int *cmplx)` from `Src/parse.c:1520-1557`.
5247pub fn par_while_wordcode(cmplx: &mut i32) {
5248 // c:1523 — `int oecused = ecused, p;`
5249 let _oecused = ECUSED.get() as usize;
5250 let p: usize;
5251 // c:1524 — `int type = (tok == UNTIL ? WC_WHILE_UNTIL : WC_WHILE_WHILE);`
5252 let r#type: wordcode = if tok() == UNTIL {
5253 WC_WHILE_UNTIL
5254 } else {
5255 WC_WHILE_WHILE
5256 };
5257
5258 // c:1526 — `p = ecadd(0);`
5259 p = ecadd(0);
5260 // c:1527 — `zshlex();`
5261 zshlex();
5262 // c:1528 — `par_save_list(cmplx);` — condition.
5263 par_save_list_wordcode(cmplx);
5264 // c:1529 — `incmdpos = 1;`
5265 set_incmdpos(true);
5266 // c:1530-1531 — `while (tok == SEPER) zshlex();`
5267 while tok() == SEPER {
5268 zshlex();
5269 }
5270 // c:1532-1545 — body dispatch (inlined in C; we factor via
5271 // par_loop_body_wordcode since for/while/repeat share this
5272 // identical block).
5273 if tok() == DOLOOP {
5274 // c:1533 — `zshlex();`
5275 zshlex();
5276 // c:1534 — `par_save_list(cmplx);`
5277 par_save_list_wordcode(cmplx);
5278 // c:1535-1536 — `if (tok != DONE) YYERRORV(oecused);`
5279 if tok() != DONE {
5280 crate::ported::utils::zerr("par_while: expected `done`");
5281 return;
5282 }
5283 // c:1537 — `incmdpos = 0;`
5284 set_incmdpos(false);
5285 // c:1538 — `zshlex();`
5286 zshlex();
5287 } else if tok() == INBRACE_TOK {
5288 // c:1540 — `zshlex();`
5289 zshlex();
5290 // c:1541 — `par_save_list(cmplx);`
5291 par_save_list_wordcode(cmplx);
5292 // c:1542-1543 — `if (tok != OUTBRACE) YYERRORV(oecused);`
5293 if tok() != OUTBRACE_TOK {
5294 crate::ported::utils::zerr("par_while: expected `}`");
5295 return;
5296 }
5297 // c:1544 — `incmdpos = 0;`
5298 set_incmdpos(false);
5299 // c:1545 — `zshlex();`
5300 zshlex();
5301 } else if isset(CSHJUNKIELOOPS) {
5302 // c:1546-1550
5303 par_save_list_wordcode(cmplx);
5304 if tok() != ZEND {
5305 crate::ported::utils::zerr("par_while: expected `end`");
5306 return;
5307 }
5308 zshlex();
5309 } else if unset(SHORTLOOPS) {
5310 // c:1551-1552 — `YYERRORV(oecused);`
5311 crate::ported::utils::zerr("par_while: short body requires SHORTLOOPS");
5312 return;
5313 } else {
5314 // c:1554 — `par_save_list1(cmplx);`
5315 par_save_list1_wordcode(cmplx);
5316 }
5317
5318 // c:1556 — `ecbuf[p] = WCB_WHILE(type, ecused - 1 - p);`
5319 let used = ECUSED.get() as usize;
5320 ECBUF.with_borrow_mut(|b| {
5321 b[p] = WCB_WHILE(r#type, (used.saturating_sub(1 + p)) as wordcode);
5322 });
5323}
5324
5325/// `until` shares par_while body — tok==UNTIL flips the type.
5326pub fn par_until_wordcode(cmplx: &mut i32) {
5327 par_while_wordcode(cmplx);
5328}
5329
5330/// Port of `par_repeat(int *cmplx)` from `Src/parse.c:1564-1606`.
5331pub fn par_repeat_wordcode(cmplx: &mut i32) {
5332 // c:1567 — `/* ### what to do about inrepeat_ here? */`
5333 // c:1568 — `int oecused = ecused, p;`
5334 let _oecused = ECUSED.get() as usize;
5335 let p: usize;
5336
5337 // c:1570 — `p = ecadd(0);`
5338 p = ecadd(0);
5339
5340 // c:1572 — `incmdpos = 0;`
5341 set_incmdpos(false);
5342 // c:1573 — `zshlex();`
5343 zshlex();
5344 // c:1574-1575 — `if (tok != STRING) YYERRORV(oecused);`
5345 if tok() != STRING_LEX {
5346 crate::ported::utils::zerr("par_repeat: expected count");
5347 return;
5348 }
5349 // c:1576 — `ecstr(tokstr);`
5350 ecstr(&tokstr().unwrap_or_default());
5351 // c:1577 — `incmdpos = 1;`
5352 set_incmdpos(true);
5353 // c:1578 — `zshlex();`
5354 zshlex();
5355 // c:1579-1580 — `while (tok == SEPER) zshlex();`
5356 while tok() == SEPER {
5357 zshlex();
5358 }
5359 // c:1581-1604 — body dispatch (inlined here matching C exactly).
5360 if tok() == DOLOOP {
5361 // c:1582-1587
5362 zshlex();
5363 par_save_list_wordcode(cmplx);
5364 if tok() != DONE {
5365 crate::ported::utils::zerr("par_repeat: expected `done`");
5366 return;
5367 }
5368 set_incmdpos(false);
5369 zshlex();
5370 } else if tok() == INBRACE_TOK {
5371 // c:1589-1594
5372 zshlex();
5373 par_save_list_wordcode(cmplx);
5374 if tok() != OUTBRACE_TOK {
5375 crate::ported::utils::zerr("par_repeat: expected `}`");
5376 return;
5377 }
5378 set_incmdpos(false);
5379 zshlex();
5380 } else if isset(CSHJUNKIELOOPS) {
5381 // c:1596-1599
5382 par_save_list_wordcode(cmplx);
5383 if tok() != ZEND {
5384 crate::ported::utils::zerr("par_repeat: expected `end`");
5385 return;
5386 }
5387 zshlex();
5388 } else if unset(SHORTLOOPS) && unset(SHORTREPEAT) {
5389 // c:1601-1602 — par_repeat needs BOTH SHORTLOOPS and SHORTREPEAT
5390 // unset to refuse short form (more permissive than par_while).
5391 crate::ported::utils::zerr("par_repeat: short body requires SHORTLOOPS or SHORTREPEAT");
5392 return;
5393 } else {
5394 // c:1604 — `par_save_list1(cmplx);`
5395 par_save_list1_wordcode(cmplx);
5396 }
5397
5398 // c:1606 — `ecbuf[p] = WCB_REPEAT(ecused - 1 - p);`
5399 let used = ECUSED.get() as usize;
5400 ECBUF.with_borrow_mut(|b| {
5401 b[p] = WCB_REPEAT((used.saturating_sub(1 + p)) as wordcode);
5402 });
5403}
5404
5405/// Port of `par_funcdef(int *cmplx)` from `Src/parse.c:1672-1779`.
5406///
5407/// The `function NAME { ... }` form. Emits a WCB_FUNCDEF header
5408/// followed by a names-count slot, the names themselves, four
5409/// metadata slots (string-area start, string-area length, npats,
5410/// do_tracing), then the body wordcode, then WCB_END.
5411///
5412/// Critical: saves/resets `ecnpats` + `ecssub` + `ecsoffs` around
5413/// the body parse so per-function pattern counts don't leak into
5414/// the enclosing scope's `ecnpats` accumulator (parse.c:1723-1758).
5415pub fn par_funcdef_wordcode(cmplx: &mut i32) {
5416 // c:1674 — `int oecused = ecused, num = 0, onp, p, c = 0;`
5417 let _oecused = ECUSED.get() as usize;
5418 let mut num: i32 = 0;
5419 let onp: i32;
5420 let p: usize;
5421 let mut c: i32 = 0;
5422 // c:1675 — `int so, oecssub = ecssub;`
5423 let so: i32;
5424 let oecssub = ECSSUB.get();
5425 // c:1676 — `zlong oldlineno = lineno;`
5426 let oldlineno = lineno();
5427 // c:1677 — `int do_tracing = 0;`
5428 let mut do_tracing: i32 = 0;
5429
5430 // c:1679 — `lineno = 0;`
5431 set_lineno(0);
5432 // c:1680 — `nocorrect = 1;`
5433 set_nocorrect(1);
5434 // c:1681 — `incmdpos = 0;`
5435 set_incmdpos(false);
5436 // c:1682 — `zshlex();`
5437 zshlex();
5438
5439 // c:1684 — `p = ecadd(0);`
5440 p = ecadd(0);
5441 // c:1685 — `ecadd(0); /* p + 1 */`
5442 let p1 = ecadd(0);
5443
5444 // c:1687-1699 — `Consume an initial (-T), (--), or (-T --).`
5445 // c:1690 — `if (tok == STRING && tokstr[0] == Dash) {`
5446 if tok() == STRING_LEX {
5447 let s = tokstr().unwrap_or_default();
5448 let bytes = s.as_bytes();
5449 // C: `tokstr[0] == Dash` (Dash = 0x9b = 0xc2 0x9b in UTF-8).
5450 // First byte of UTF-8 `\u{9b}` is 0xc2; the char `'-'` is 0x2d.
5451 // Match either form.
5452 let first_is_dash = (bytes.len() >= 2 && bytes[0] == 0xc2 && bytes[1] == 0x9b)
5453 || (bytes.len() >= 1 && bytes[0] == b'-');
5454 if first_is_dash {
5455 // c:1691-1694 — `if (tokstr[1] == 'T' && !tokstr[2]) { ++do_tracing; zshlex(); }`
5456 // After the leading dash byte(s), check remaining bytes.
5457 let after_dash = if bytes.len() >= 2 && bytes[0] == 0xc2 && bytes[1] == 0x9b {
5458 &bytes[2..]
5459 } else {
5460 &bytes[1..]
5461 };
5462 if after_dash.len() == 1 && after_dash[0] == b'T' {
5463 do_tracing += 1;
5464 zshlex();
5465 }
5466 // c:1695-1698 — `if (tok == STRING && tokstr[0] == Dash &&
5467 // tokstr[1] == Dash && !tokstr[2]) zshlex();`
5468 if tok() == STRING_LEX {
5469 let s2 = tokstr().unwrap_or_default();
5470 let b2 = s2.as_bytes();
5471 let mut idx = 0;
5472 let mut dashes = 0;
5473 while idx < b2.len() && dashes < 2 {
5474 if b2[idx] == 0xc2 && idx + 1 < b2.len() && b2[idx + 1] == 0x9b {
5475 idx += 2;
5476 dashes += 1;
5477 } else if b2[idx] == b'-' {
5478 idx += 1;
5479 dashes += 1;
5480 } else {
5481 break;
5482 }
5483 }
5484 if dashes == 2 && idx == b2.len() {
5485 zshlex();
5486 }
5487 }
5488 }
5489 }
5490
5491 // c:1701-1709 — names loop.
5492 // `while (tok == STRING) { if ((*tokstr == Inbrace || *tokstr == '{')
5493 // && !tokstr[1]) { tok = INBRACE; break; } ecstr(tokstr); num++; zshlex(); }`
5494 while tok() == STRING_LEX {
5495 let s = tokstr().unwrap_or_default();
5496 let bytes = s.as_bytes();
5497 // First byte tests for Inbrace marker (0x8f → UTF-8 `0xc2 0x8f`) or `{`,
5498 // and length-1 check (`!tokstr[1]`).
5499 let is_inbrace_only = (bytes.len() == 1 && bytes[0] == b'{')
5500 || (bytes.len() == 2 && bytes[0] == 0xc2 && bytes[1] == 0x8f);
5501 if is_inbrace_only {
5502 set_tok(INBRACE_TOK);
5503 break;
5504 }
5505 ecstr(&s);
5506 num += 1;
5507 zshlex();
5508 }
5509
5510 // c:1711-1714 — four metadata placeholder slots.
5511 let m2 = ecadd(0);
5512 let m3 = ecadd(0);
5513 let m4 = ecadd(0);
5514 let m5 = ecadd(0);
5515
5516 // c:1716 — `nocorrect = 0;`
5517 set_nocorrect(0);
5518 // c:1717 — `incmdpos = 1;`
5519 set_incmdpos(true);
5520 // c:1718-1719 — `if (tok == INOUTPAR) zshlex();`
5521 if tok() == INOUTPAR {
5522 zshlex();
5523 }
5524 // c:1720-1721 — `while (tok == SEPER) zshlex();`
5525 while tok() == SEPER {
5526 zshlex();
5527 }
5528
5529 // c:1723 — `ecnfunc++;`
5530 ECNFUNC.set(ECNFUNC.get() + 1);
5531 // c:1724 — `ecssub = so = ecsoffs;`
5532 so = ECSOFFS.get();
5533 ECSSUB.set(so);
5534 // c:1725 — `onp = ecnpats;`
5535 onp = ECNPATS.with(|cc| cc.get());
5536 // c:1726 — `ecnpats = 0;`
5537 ECNPATS.with(|cc| cc.set(0));
5538
5539 // c:1728 — `if (tok == INBRACE) {`
5540 if tok() == INBRACE_TOK {
5541 // c:1729 — `zshlex();`
5542 zshlex();
5543 // c:1730 — `par_list(&c);`
5544 par_list_wordcode(&mut c);
5545 // c:1731-1736 — `if (tok != OUTBRACE) { lineno += oldlineno; ... }`
5546 if tok() != OUTBRACE_TOK {
5547 set_lineno(lineno() + oldlineno);
5548 ECNPATS.with(|cc| cc.set(onp));
5549 ECSSUB.set(oecssub);
5550 crate::ported::utils::zerr("par_funcdef: expected `}`");
5551 return;
5552 }
5553 // c:1737-1740 — `if (num == 0) { incmdpos = 0; }`
5554 if num == 0 {
5555 set_incmdpos(false);
5556 }
5557 // c:1741 — `zshlex();`
5558 zshlex();
5559 } else if unset(SHORTLOOPS) {
5560 // c:1742-1746 — `lineno += oldlineno; ecnpats = onp; ecssub = oecssub; YYERRORV`
5561 set_lineno(lineno() + oldlineno);
5562 ECNPATS.with(|cc| cc.set(onp));
5563 ECSSUB.set(oecssub);
5564 crate::ported::utils::zerr("par_funcdef: short body requires SHORTLOOPS");
5565 return;
5566 } else {
5567 // c:1748 — `par_list1(&c);`
5568 par_list1_wordcode(&mut c);
5569 }
5570
5571 // c:1750 — `ecadd(WCB_END());`
5572 ecadd(WCB_END());
5573 // c:1751-1754 — fill the 4 metadata slots
5574 let cur_sofs = ECSOFFS.get();
5575 let body_npats = ECNPATS.with(|cc| cc.get());
5576 ECBUF.with_borrow_mut(|b| {
5577 b[m2] = (so - oecssub) as wordcode;
5578 b[m3] = (cur_sofs - so) as wordcode;
5579 b[m4] = body_npats as wordcode;
5580 b[m5] = do_tracing as wordcode;
5581 });
5582 // c:1755 — `ecbuf[p + 1] = num;`
5583 ECBUF.with_borrow_mut(|b| {
5584 b[p1] = num as wordcode;
5585 });
5586
5587 // c:1757 — `ecnpats = onp;`
5588 ECNPATS.with(|cc| cc.set(onp));
5589 // c:1758 — `ecssub = oecssub;`
5590 ECSSUB.set(oecssub);
5591 // c:1759 — `ecnfunc++;`
5592 ECNFUNC.set(ECNFUNC.get() + 1);
5593
5594 // c:1761 — `ecbuf[p] = WCB_FUNCDEF(ecused - 1 - p);`
5595 let used = ECUSED.get() as usize;
5596 ECBUF.with_borrow_mut(|b| {
5597 b[p] = WCB_FUNCDEF((used.saturating_sub(1 + p)) as wordcode);
5598 });
5599
5600 // c:1763-1777 — anonymous-function trailing args (num == 0 case).
5601 if num == 0 {
5602 // c:1766 — `int parg = ecadd(0);`
5603 let parg = ecadd(0);
5604 // c:1767 — `ecadd(0);`
5605 ecadd(0);
5606 // c:1768-1772 — `while (tok == STRING) { ecstr(tokstr); num++; zshlex(); }`
5607 while tok() == STRING_LEX {
5608 ecstr(&tokstr().unwrap_or_default());
5609 num += 1;
5610 zshlex();
5611 }
5612 // c:1773-1774 — `if (num > 0) *cmplx = 1;`
5613 if num > 0 {
5614 *cmplx = 1;
5615 }
5616 // c:1775 — `ecbuf[parg] = ecused - parg;`
5617 // c:1776 — `ecbuf[parg+1] = num;`
5618 let used2 = ECUSED.get() as usize;
5619 ECBUF.with_borrow_mut(|b| {
5620 b[parg] = (used2 - parg) as wordcode;
5621 b[parg + 1] = num as wordcode;
5622 });
5623 }
5624 // c:1778 — `lineno += oldlineno;`
5625 set_lineno(lineno() + oldlineno);
5626}
5627
5628/// Size of `struct fdhead` in `wordcode` (u32) units. Used by all
5629/// the header-walk macros below.
5630pub const FDHEAD_WORDS: usize = std::mem::size_of::<fdhead>() / 4;
5631
5632/// `Src/parse.c:1619-1665`. Handles both `(...)` subshell and
5633/// `{...}` brace group (cursh) plus optional `always { ... }`
5634/// trailing block. C uses a single function with `zsh_construct=1`
5635/// for `{...}` and 0 for `(...)`.
5636pub fn par_subsh_wordcode_impl(cmplx: &mut i32, zsh_construct: i32) {
5637 // c:1621 — `enum lextok otok = tok;`
5638 let otok = tok();
5639 // c:1622 — `int oecused = ecused, p, pp;`
5640 let _oecused = ECUSED.get() as usize;
5641 let p: usize;
5642 let pp: usize;
5643
5644 // c:1624 — `p = ecadd(0);`
5645 p = ecadd(0);
5646 // c:1625 — `/* Extra word only needed for always block */`
5647 // c:1626 — `pp = ecadd(0);`
5648 pp = ecadd(0);
5649 // c:1627 — `zshlex();`
5650 zshlex();
5651 // c:1628 — `par_list(cmplx);`
5652 par_list_wordcode(cmplx);
5653 // c:1629 — `ecadd(WCB_END());`
5654 ecadd(WCB_END());
5655 // c:1630-1631 — `if (tok != ((otok == INPAR) ? OUTPAR : OUTBRACE))
5656 // YYERRORV(oecused);`
5657 if tok() != (if otok == INPAR_TOK { OUTPAR_TOK } else { OUTBRACE_TOK }) {
5658 crate::ported::utils::zerr("par_subsh: missing closing token");
5659 return;
5660 }
5661 // c:1632 — `incmdpos = !zsh_construct;`
5662 set_incmdpos(zsh_construct == 0);
5663 // c:1633 — `zshlex();`
5664 zshlex();
5665
5666 // c:1635 — `/* Optional always block. No intervening SEPERs allowed. */`
5667 // c:1636 — `if (otok == INBRACE && tok == STRING && !strcmp(tokstr, "always")) {`
5668 if otok == INBRACE_TOK && tok() == STRING_LEX && tokstr().as_deref() == Some("always") {
5669 // c:1637 — `ecbuf[pp] = WCB_TRY(ecused - 1 - pp);`
5670 let used = ECUSED.get() as usize;
5671 ECBUF.with_borrow_mut(|b| {
5672 b[pp] = WCB_TRY((used.saturating_sub(1 + pp)) as wordcode);
5673 });
5674 // c:1638 — `incmdpos = 1;`
5675 set_incmdpos(true);
5676 // c:1639-1641 — `do { zshlex(); } while (tok == SEPER);`
5677 loop {
5678 zshlex();
5679 if tok() != SEPER {
5680 break;
5681 }
5682 }
5683
5684 // c:1643-1644 — `if (tok != INBRACE) YYERRORV(oecused);`
5685 if tok() != INBRACE_TOK {
5686 crate::ported::utils::zerr("par_subsh: 'always' expects `{`");
5687 return;
5688 }
5689 // c:1645 — `cmdpop();`
5690 cmdpop();
5691 // c:1646 — `cmdpush(CS_ALWAYS);`
5692 cmdpush(CS_ALWAYS as u8);
5693
5694 // c:1648 — `zshlex();`
5695 zshlex();
5696 // c:1649 — `par_save_list(cmplx);`
5697 par_save_list_wordcode(cmplx);
5698 // c:1650-1651 — `while (tok == SEPER) zshlex();`
5699 while tok() == SEPER {
5700 zshlex();
5701 }
5702
5703 // c:1653 — `incmdpos = 1;`
5704 set_incmdpos(true);
5705
5706 // c:1655-1656 — `if (tok != OUTBRACE) YYERRORV(oecused);`
5707 if tok() != OUTBRACE_TOK {
5708 crate::ported::utils::zerr("par_subsh: 'always' block missing `}`");
5709 return;
5710 }
5711 // c:1657 — `zshlex();`
5712 zshlex();
5713 // c:1658 — `ecbuf[p] = WCB_TRY(ecused - 1 - p);`
5714 let used = ECUSED.get() as usize;
5715 ECBUF.with_borrow_mut(|b| {
5716 b[p] = WCB_TRY((used.saturating_sub(1 + p)) as wordcode);
5717 });
5718 } else {
5719 // c:1660-1661 — `ecbuf[p] = (otok == INPAR ? WCB_SUBSH(...) : WCB_CURSH(...));`
5720 let used = ECUSED.get() as usize;
5721 let off = used.saturating_sub(1 + p);
5722 ECBUF.with_borrow_mut(|b| {
5723 b[p] = if otok == INPAR_TOK {
5724 WCB_SUBSH(off as wordcode)
5725 } else {
5726 WCB_CURSH(off as wordcode)
5727 };
5728 });
5729 }
5730}
5731
5732/// Wrapper for `(...)` subshell — calls `par_subsh_wordcode_impl(0)`.
5733pub fn par_subsh_wordcode(cmplx: &mut i32) {
5734 par_subsh_wordcode_impl(cmplx, 0);
5735}
5736
5737/// Wrapper for `{...}` brace group (cursh) — calls
5738/// `par_subsh_wordcode_impl(1)`. C uses the same `par_subsh`
5739/// function with `zsh_construct=1`; the Rust split exists because
5740/// the par_cmd dispatch at parse.rs:1446 already named them
5741/// separately.
5742pub fn par_cursh_wordcode(cmplx: &mut i32) {
5743 par_subsh_wordcode_impl(cmplx, 1);
5744}
5745
5746/// Port of `par_time(void)` from `Src/parse.c:1787`. `time PIPE`
5747/// emits WCB_TIMED(WC_TIMED_PIPE) + the sublist code; bare `time`
5748/// with no pipeline emits WCB_TIMED(WC_TIMED_EMPTY).
5749pub fn par_time_wordcode() {
5750 // c:1791 — `zshlex();`
5751 zshlex();
5752 // c:1793-1794 — `p = ecadd(0); ecadd(0);`
5753 let p = ecadd(0);
5754 ecadd(0);
5755 // c:1795 — `if ((f = par_sublist2(&c)) < 0)`
5756 let mut c = 0i32;
5757 let f = par_sublist2(&mut c);
5758 match f {
5759 Some(flags) => {
5760 // c:1799 — `ecbuf[p] = WCB_TIMED(WC_TIMED_PIPE);`
5761 ECBUF.with_borrow_mut(|b| {
5762 if p < b.len() {
5763 b[p] = WCB_TIMED(WC_TIMED_PIPE);
5764 }
5765 });
5766 // c:1800 — `set_sublist_code(p+1, WC_SUBLIST_END, f,
5767 // ecused-2-p, c);`
5768 let used = ECUSED.get() as usize;
5769 let skip = used.saturating_sub(2 + p) as i32;
5770 set_sublist_code(p + 1, WC_SUBLIST_END as i32, flags, skip, c != 0);
5771 }
5772 None => {
5773 // c:1796-1798 — `ecused--; ecbuf[p] = WCB_TIMED(WC_TIMED_EMPTY);`
5774 ECUSED.set((ECUSED.get() - 1).max(0));
5775 ECBUF.with_borrow_mut(|b| {
5776 if p < b.len() {
5777 b[p] = WCB_TIMED(WC_TIMED_EMPTY);
5778 }
5779 });
5780 }
5781 }
5782}
5783
5784/// Port of `par_dinbrack(void)` from `Src/parse.c:1810`. Wraps
5785/// `par_cond` (the cond-expression emitter at parse.c:2409) with
5786/// the `[[ ... ]]` framing: incond/incmdpos toggles + DOUTBRACK
5787/// expectation.
5788pub fn par_cond_wordcode() {
5789 let oecused = ECUSED.get();
5790 // c:1814 — `incond = 1;`
5791 set_incond(1);
5792 // c:1815 — `incmdpos = 0;`
5793 set_incmdpos(false);
5794 // c:1816 — `zshlex();` past `[[`.
5795 zshlex();
5796 // c:1817 — `par_cond();` — call the no-skip cond-expression
5797 // entry that EMITS WORDCODE (par_cond_top → par_cond_1 →
5798 // par_cond_2 → par_cond_double/triple/multi). NOT the AST
5799 // `par_cond` at parse.rs:4644 which is a misnamed `par_dinbrack`
5800 // that skips `[[` AND `]]` and returns a ZshCommand AST node
5801 // instead of pushing WC_COND opcodes. NOT `parse_cond_expr`
5802 // either — that's also AST-only, returning ZshCond. With
5803 // `parse_cond_expr` here, every `[[ ... ]]` test produced ZERO
5804 // wordcode payload and parity dropped ~148 words on /etc/zshrc.
5805 let _ = par_cond_top();
5806 // c:1818-1819 — `if (tok != DOUTBRACK) YYERRORV(oecused);`
5807 if tok() != DOUTBRACK {
5808 let _ = oecused;
5809 crate::ported::utils::zerr("missing ]]");
5810 return;
5811 }
5812 // c:1820 — `incond = 0;`
5813 set_incond(0);
5814 // c:1821 — `incmdpos = 1;`
5815 set_incmdpos(true);
5816 // c:1822 — `zshlex();` past `]]`.
5817 zshlex();
5818}
5819
5820/// Port of the `case DINPAR:` arm of `par_cmd` from
5821/// `Src/parse.c:1031-1034`:
5822/// ```c
5823/// ecadd(WCB_ARITH());
5824/// ecstr(tokstr);
5825/// zshlex();
5826/// ```
5827/// `(( EXPR ))` arithmetic at command position — emits the ARITH
5828/// opcode followed by the interned EXPR string, then advances past
5829/// the DINPAR token (which already carries the body text).
5830pub fn par_arith_wordcode() {
5831 // c:1032 — `ecadd(WCB_ARITH());`
5832 ecadd(WCB_ARITH());
5833 // c:1033 — `ecstr(tokstr);` — interns the expression string and
5834 // appends its strcode index to the wordcode buffer.
5835 let expr = tokstr().unwrap_or_default();
5836 ecstr(&expr);
5837 // c:1034 — `zshlex();`
5838 zshlex();
5839}
5840
5841/// Port of `par_simple(int *cmplx, int nr)` from
5842/// `Src/parse.c:1836-2227`. Emits WC_SIMPLE + word count +
5843/// interned string offsets. Returns `0` when nothing was emitted,
5844/// otherwise `1 + (number of code words consumed by redirections)`.
5845/// The full C body handles assignments (ENVSTRING/ENVARRAY),
5846/// inline `{var}>file` brace-FDs, prefix modifiers (NOCORRECT etc),
5847/// and `name() { body }` funcdef detection — those paths are
5848/// progressively wired into the AST parser; this wordcode-emitter
5849/// covers the simple `cmd args...` case + interleaved redirs.
5850pub fn par_simple_wordcode_impl(cmplx: &mut i32, mut nr: i32) -> i32 {
5851 // c:1838-1841 — `int oecused = ecused, isnull = 1, r, argc = 0,
5852 // p, isfunc = 0, sr = 0;`
5853 // `int c = *cmplx, nrediradd, assignments = 0, ppost = 0,
5854 // is_typeset = 0;`
5855 // c is the SAVED initial cmplx so INOUTPAR can restore via
5856 // `*cmplx = c;` at c:2070.
5857 let _oecused = ECUSED.get() as usize;
5858 let c_saved = *cmplx;
5859 let mut isnull = true;
5860 let mut argc: u32 = 0;
5861 let mut sr: i32 = 0;
5862 let mut assignments = false;
5863 let mut isfunc = false;
5864
5865 // c:1843 — `r = ecused;` — saves the offset where redirs get
5866 // INSERTED (via ecispace). Each redir shifts later words DOWN
5867 // by ncodes, so the SIMPLE placeholder at `p` (set later) must
5868 // also bump by ncodes when a redir lands. C uses `&r` to pass
5869 // the cursor by reference; Rust uses a mutable local + manual
5870 // bumps after each par_redir_wordcode call.
5871 let mut r: usize = ECUSED.get() as usize;
5872
5873 // c:1844-1919 — pre-cmd loop: NOCORRECT, ENVSTRING (scalar
5874 // assigns), ENVARRAY (array assigns), IS_REDIROP. Loops until
5875 // a non-assignment token is seen.
5876 loop {
5877 match tok() {
5878 NOCORRECT => {
5879 // c:1846-1849
5880 *cmplx = 1;
5881 set_nocorrect(1);
5882 }
5883 ENVSTRING => {
5884 // c:1848-1898 — scalar assignment `name=value` or
5885 // `name+=value`. Emits WCB_ASSIGN(SCALAR, NEW|INC, 0)
5886 // followed by ecstr(name), ecstr(value).
5887 let raw = tokstr().unwrap_or_default();
5888 // Find first of Inbrack / '=' / '+' (the C scan at
5889 // c:1851-1853). Inside Inbrack we skipparens — i.e.
5890 // skip `name[...]` index, then continue.
5891 let bytes: Vec<char> = raw.chars().collect();
5892 let mut idx = 0usize;
5893 while idx < bytes.len() {
5894 let ch = bytes[idx];
5895 if ch == '\u{91}' /* Inbrack */ {
5896 // Skip matched Inbrack…Outbrack pair.
5897 let mut depth = 1;
5898 idx += 1;
5899 while idx < bytes.len() && depth > 0 {
5900 match bytes[idx] {
5901 '\u{91}' => depth += 1,
5902 '\u{92}' => depth -= 1,
5903 _ => {}
5904 }
5905 idx += 1;
5906 }
5907 continue;
5908 }
5909 // c:1851-1853 — `*ptr != '=' && *ptr != '+'` —
5910 // C scan stops on either literal `=` / `+` OR the
5911 // Equals marker (`\u{8d}`) the lexer emits for
5912 // unquoted `=`. Without the marker check, the
5913 // ENVSTRING split scans past the `=` (since it's
5914 // already tokenised) and the whole `name=value`
5915 // ends up in one ecstr.
5916 if ch == '=' || ch == '+' || ch == '\u{8d}' /* Equals */ {
5917 break;
5918 }
5919 idx += 1;
5920 }
5921 let is_inc = idx < bytes.len() && bytes[idx] == '+';
5922 // c:1856-1858 — `if (*ptr == '+') { *ptr++ = '\0';
5923 // ecadd(WCB_ASSIGN(SCALAR, INC, 0)); } else WCB_NEW`
5924 // C nulls the `+` AT THAT POSITION then advances ptr.
5925 // `name` is bytes BEFORE the `+`, NOT including it.
5926 let name_end = idx;
5927 if is_inc {
5928 idx += 1;
5929 }
5930 let flag = if is_inc { WC_ASSIGN_INC } else { WC_ASSIGN_NEW };
5931 ecadd(WCB_ASSIGN(WC_ASSIGN_SCALAR, flag, 0));
5932 // c:1860 — `if (*ptr == '=') { *ptr = '\0'; str = ptr + 1; }
5933 // else equalsplit(tokstr, &str);`
5934 let name: String = bytes[..name_end].iter().collect();
5935 let str_off = if idx < bytes.len()
5936 && (bytes[idx] == '=' || bytes[idx] == '\u{8d}')
5937 {
5938 idx + 1
5939 } else {
5940 idx
5941 };
5942 let value: String = bytes[str_off..].iter().collect();
5943 // c:1866-1877 — scan value for `=(`/`<(`/`>(` (proc
5944 // subst); if found, bump cmplx (suppresses Z_SIMPLE).
5945 let vbytes: Vec<char> = value.chars().collect();
5946 for (i, ch) in vbytes.iter().enumerate() {
5947 if i + 1 < vbytes.len() && vbytes[i + 1] == '\u{88}' /* Inpar */ {
5948 if *ch == '\u{8d}' /* Equals */
5949 || *ch == '\u{94}' /* Inang */
5950 || *ch == '\u{96}' /* OutangProc */
5951 {
5952 *cmplx = 1;
5953 break;
5954 }
5955 }
5956 }
5957 ecstr(&name);
5958 ecstr(&value);
5959 isnull = false;
5960 assignments = true;
5961 }
5962 ENVARRAY => {
5963 // c:1883-1908 — array assignment `name=( ... )` in the
5964 // pre-cmd loop (no `typeset`-style typeset_force flag).
5965 // c:1884 — `int oldcmdpos = incmdpos, n, type2;`
5966 let oldcmdpos = incmdpos();
5967 let n: u32;
5968 let type2: wordcode;
5969 let p: usize;
5970
5971 // c:1886-1889 — `array setting is cmplx because it can
5972 // contain process substitutions`
5973 // c:1890 — `*cmplx = c = 1;`
5974 *cmplx = 1;
5975 // c:1891 — `p = ecadd(0);`
5976 p = ecadd(0);
5977 // c:1892 — `incmdpos = 0;`
5978 set_incmdpos(false);
5979 // c:1893-1897 — `+=` detection: if tokstr ends in `+`,
5980 // strip the `+` and use WC_ASSIGN_INC; else WC_ASSIGN_NEW.
5981 let raw = tokstr().unwrap_or_default();
5982 let (name, t2) = if raw.ends_with('+') {
5983 (raw[..raw.len() - 1].to_string(), WC_ASSIGN_INC)
5984 } else {
5985 (raw.clone(), WC_ASSIGN_NEW)
5986 };
5987 type2 = t2;
5988 // c:1898 — `ecstr(tokstr);` (tokstr now NUL-trimmed)
5989 ecstr(&name);
5990 // c:1899 — `cmdpush(CS_ARRAY);`
5991 cmdpush(CS_ARRAY as u8);
5992 // c:1900 — `zshlex();`
5993 zshlex();
5994 // c:1901 — `n = par_nl_wordlist();`
5995 n = par_nl_wordlist_wordcode();
5996 // c:1902 — `ecbuf[p] = WCB_ASSIGN(WC_ASSIGN_ARRAY, type2, n);`
5997 ECBUF.with_borrow_mut(|b| {
5998 b[p] = WCB_ASSIGN(WC_ASSIGN_ARRAY, type2, n);
5999 });
6000 // c:1903 — `cmdpop();`
6001 cmdpop();
6002 // c:1904-1905 — `if (tok != OUTPAR) YYERROR(oecused);`
6003 if tok() != OUTPAR_TOK {
6004 crate::ported::utils::zerr("par_simple: expected `)' after array assignment");
6005 return 0;
6006 }
6007 // c:1906 — `incmdpos = oldcmdpos;`
6008 set_incmdpos(oldcmdpos);
6009 // c:1907 — `isnull = 0;`
6010 isnull = false;
6011 // c:1908 — `assignments = 1;`
6012 assignments = true;
6013 }
6014 t if IS_REDIROP(t) => {
6015 // c:1900-1904 — `*cmplx = c = 1; nr += par_redir(&r,
6016 // NULL); continue;`. The wordcode-emitting redir is
6017 // distinct from the AST par_redir — it INSERTS
6018 // WCB_REDIR + fd + ecstrcode(name) at offset `r`
6019 // via ecispace, shifting any later words down.
6020 *cmplx = 1;
6021 let added = par_redir_wordcode(&mut r);
6022 if added == 0 {
6023 break;
6024 }
6025 nr += added;
6026 continue;
6027 }
6028 _ => break,
6029 }
6030 zshlex(); // c:1907 `zshlex();`
6031 }
6032
6033 // c:1920-1921 — `if (tok == AMPER || tok == AMPERBANG) YYERROR;`
6034 if tok() == AMPER || tok() == AMPERBANG {
6035 crate::ported::utils::zerr("par_simple: unexpected &");
6036 return 0;
6037 }
6038
6039 // c:1923 — `p = ecadd(WCB_SIMPLE(0));`
6040 let mut p = ecadd(WCB_SIMPLE(0));
6041
6042 // c:1924-2105 — main words loop. is_typeset tracks whether the
6043 // outer command was `typeset`/`export`/etc. so the final
6044 // placeholder gets WCB_TYPESET instead of WCB_SIMPLE.
6045 let mut is_typeset = false;
6046 let mut postassigns: u32 = 0;
6047 let mut ppost: usize = 0;
6048 loop {
6049 match tok() {
6050 STRING_LEX | TYPESET => {
6051 // c:1926 — `int redir_var = 0;`
6052 let mut redir_var = false;
6053 // c:1928-1929 — `*cmplx = 1; incmdpos = 0;`
6054 *cmplx = 1;
6055 set_incmdpos(false);
6056 // c:1931-1932 — TYPESET → intypeset = is_typeset = 1.
6057 if tok() == TYPESET {
6058 set_intypeset(true);
6059 is_typeset = true;
6060 }
6061 let s = tokstr().unwrap_or_default();
6062 // c:1934-1974 — `{var}>file` brace-FD detection.
6063 // `if (!isset(IGNOREBRACES) && *tokstr == Inbrace)`
6064 let bytes = s.as_bytes();
6065 let first_is_inbrace = (bytes.len() >= 2 && bytes[0] == 0xc2 && bytes[1] == 0x8f)
6066 || (bytes.len() >= 1 && bytes[0] == b'{');
6067 if !isset(IGNOREBRACES) && first_is_inbrace {
6068 // c:1937-1938 — `char *eptr = tokstr + strlen(tokstr) - 1;`
6069 // `char *ptr = eptr;`
6070 // C tests `*eptr == Outbrace` (0x90 marker or `}`) AND
6071 // there's content between `{` and `}` (`ptr > tokstr + 1`).
6072 let last_two_outbrace = bytes.len() >= 2
6073 && (bytes.ends_with(&[0xc2, 0x90]) || bytes.last() == Some(&b'}'));
6074 let opener_len = if bytes.len() >= 2 && bytes[0] == 0xc2 && bytes[1] == 0x8f {
6075 2
6076 } else {
6077 1
6078 };
6079 let closer_len = if bytes.len() >= 2 && bytes.ends_with(&[0xc2, 0x90]) {
6080 2
6081 } else if bytes.last() == Some(&b'}') {
6082 1
6083 } else {
6084 0
6085 };
6086 if last_two_outbrace && bytes.len() > opener_len + closer_len {
6087 // c:1944 — `if (itype_end(tokstr+1, IIDENT, 0) >= ptr)`
6088 // Inner content is the identifier between `{` and `}`.
6089 let inner_start = opener_len;
6090 let inner_end = bytes.len() - closer_len;
6091 let inner = &s[inner_start..inner_end];
6092 if !inner.is_empty()
6093 && crate::ported::utils::isident(inner)
6094 {
6095 // c:1946-1948 — `char *idstring = dupstrpfx(...);`
6096 // `redir_var = 1; zshlex();`
6097 let idstring = inner.to_string();
6098 redir_var = true;
6099 zshlex();
6100 // c:1953-1958 — `if (IS_REDIROP(tok) && tokfd == -1)
6101 // { *cmplx = c = 1; nrediradd = par_redir(&r, id);
6102 // p += nrediradd; sr += nrediradd; }`
6103 if IS_REDIROP(tok()) && tokfd() == -1 {
6104 *cmplx = 1;
6105 let nrediradd = par_redir_wordcode_inner(&mut r, Some(&idstring));
6106 p += nrediradd as usize;
6107 sr += nrediradd;
6108 } else if postassigns > 0 {
6109 // c:1959-1966 — postassigns path: emit
6110 // WCB_ASSIGN(SCALAR, INC, 0) + name + ""
6111 postassigns += 1;
6112 ecadd(WCB_ASSIGN(WC_ASSIGN_SCALAR, WC_ASSIGN_INC, 0));
6113 ecstr(&s);
6114 ecstr("");
6115 } else {
6116 // c:1968-1972 — `else { ecstr(toksave); argc++; }`
6117 ecstr(&s);
6118 argc += 1;
6119 }
6120 }
6121 }
6122 }
6123 if !redir_var {
6124 // c:1977-1996 — normal (non-redir-var) STRING/TYPESET.
6125 if postassigns > 0 {
6126 // c:1979-1989 — typeset with bare-name arg → INC
6127 postassigns += 1;
6128 ecadd(WCB_ASSIGN(WC_ASSIGN_SCALAR, WC_ASSIGN_INC, 0));
6129 ecstr(&s);
6130 ecstr("");
6131 } else {
6132 ecstr(&s);
6133 argc += 1;
6134 }
6135 zshlex();
6136 }
6137 isnull = false;
6138 }
6139 ENVSTRING => {
6140 // c:2005-2026 — mid-cmd ENVSTRING (under intypeset
6141 // context). Emits WCB_ASSIGN(SCALAR, NEW, 0) then
6142 // ecstr(name) + ecstr(value), tracking the first
6143 // postassign offset in `ppost` (which the trailing
6144 // WCB_TYPESET header points to).
6145 if postassigns == 0 {
6146 ppost = ecadd(0);
6147 }
6148 postassigns += 1;
6149 let raw = tokstr().unwrap_or_default();
6150 let bytes: Vec<char> = raw.chars().collect();
6151 let mut idx = 0usize;
6152 while idx < bytes.len() {
6153 let ch = bytes[idx];
6154 if ch == '\u{91}' /* Inbrack */ {
6155 let mut depth = 1;
6156 idx += 1;
6157 while idx < bytes.len() && depth > 0 {
6158 match bytes[idx] {
6159 '\u{91}' => depth += 1,
6160 '\u{92}' => depth -= 1,
6161 _ => {}
6162 }
6163 idx += 1;
6164 }
6165 continue;
6166 }
6167 if ch == '=' || ch == '+' || ch == '\u{8d}' /* Equals */ {
6168 break;
6169 }
6170 idx += 1;
6171 }
6172 let name: String = bytes[..idx].iter().collect();
6173 let str_off = if idx < bytes.len() && (bytes[idx] == '=' || bytes[idx] == '\u{8d}') {
6174 idx + 1
6175 } else {
6176 idx
6177 };
6178 let value: String = bytes[str_off..].iter().collect();
6179 ecadd(WCB_ASSIGN(WC_ASSIGN_SCALAR, WC_ASSIGN_NEW, 0));
6180 ecstr(&name);
6181 ecstr(&value);
6182 isnull = false;
6183 zshlex();
6184 }
6185 ENVARRAY => {
6186 // c:2027-2050 — mid-cmd ENVARRAY (typeset N=(…) form).
6187 // C tracks postassigns + ppost the same as ENVSTRING,
6188 // but the inner emit is WCB_ASSIGN(ARRAY, NEW, n)
6189 // with `n` patched in after par_nl_wordlist consumes
6190 // the elements. C also toggles intypeset=0 around the
6191 // wordlist so the lexer doesn't try to re-emit
6192 // assignments inside the array.
6193 *cmplx = 1;
6194 if postassigns == 0 {
6195 ppost = ecadd(0);
6196 }
6197 postassigns += 1;
6198 let parr = ecadd(0);
6199 let raw = tokstr().unwrap_or_default();
6200 let is_inc = raw.ends_with('+');
6201 let name = if is_inc { &raw[..raw.len() - 1] } else { raw.as_str() };
6202 let flag = if is_inc { WC_ASSIGN_INC } else { WC_ASSIGN_NEW };
6203 ecstr(name);
6204 cmdpush(CS_ARRAY as u8);
6205 set_intypeset(false);
6206 zshlex();
6207 // c:2044 — `n = par_nl_wordlist();` (parse.c:2379-2391).
6208 // SEPER + NEWLIN both allowed between elements.
6209 let mut nelem = 0u32;
6210 loop {
6211 let t = tok();
6212 if t != STRING_LEX && t != SEPER && t != NEWLIN {
6213 break;
6214 }
6215 if t == STRING_LEX {
6216 ecstr(&tokstr().unwrap_or_default());
6217 nelem += 1;
6218 }
6219 zshlex();
6220 }
6221 ECBUF.with_borrow_mut(|b| {
6222 if parr < b.len() {
6223 b[parr] = WCB_ASSIGN(WC_ASSIGN_ARRAY, flag, nelem);
6224 }
6225 });
6226 cmdpop();
6227 set_intypeset(true);
6228 if tok() != OUTPAR_TOK {
6229 crate::ported::utils::zerr("expected `)' after array assignment");
6230 return 0;
6231 }
6232 isnull = false;
6233 zshlex();
6234 }
6235 t if IS_REDIROP(t) => {
6236 // c:1999-2010 — `nrediradd = par_redir(&r, NULL);
6237 // p += nrediradd; if (ppost) ppost += nrediradd;
6238 // sr += nrediradd;`
6239 *cmplx = 1;
6240 let added = par_redir_wordcode(&mut r);
6241 if added == 0 {
6242 break;
6243 }
6244 p += added as usize;
6245 if ppost != 0 {
6246 ppost += added as usize;
6247 }
6248 sr += added;
6249 }
6250 INOUTPAR => {
6251 // c:2051 — `} else if (tok == INOUTPAR) {`
6252 // c:2052 — `zlong oldlineno = lineno;`
6253 let oldlineno = lineno();
6254 // c:2053 — `int onp, so, oecssub = ecssub;`
6255 let oecssub = ECSSUB.get();
6256 // c:2055-2057 — `if (!isset(MULTIFUNCDEF) && argc > 1) YYERROR;`
6257 if !isset(MULTIFUNCDEF) && argc > 1 {
6258 crate::ported::utils::zerr("par_simple: too many function names for funcdef");
6259 return 0;
6260 }
6261 // c:2058-2060 — `if (assignments || postassigns) YYERROR;`
6262 if assignments || postassigns > 0 {
6263 crate::ported::utils::zerr("par_simple: assignments before funcdef");
6264 return 0;
6265 }
6266 // c:2061-2068 — hasalias check + zwarn — skipped (no
6267 // alias tracking on the wordcode path).
6268
6269 // c:2070 — `*cmplx = c;`
6270 *cmplx = c_saved;
6271 // c:2071 — `lineno = 0;`
6272 set_lineno(0);
6273 // c:2072 — `incmdpos = 1;`
6274 set_incmdpos(true);
6275 // c:2073 — `cmdpush(CS_FUNCDEF);`
6276 cmdpush(CS_FUNCDEF as u8);
6277 // c:2074 — `zshlex();`
6278 zshlex();
6279 // c:2075-2076 — `while (tok == SEPER) zshlex();`
6280 while tok() == SEPER {
6281 zshlex();
6282 }
6283 // c:2079 — `ecispace(p + 1, 1); ecbuf[p+1] = argc;
6284 // ecadd(0)*4`. Insert the argc word at p+1, then
6285 // append 4 placeholder words.
6286 ecispace(p + 1, 1);
6287 ECBUF.with_borrow_mut(|b| {
6288 if p + 1 < b.len() {
6289 b[p + 1] = argc;
6290 }
6291 });
6292 // c:2080-2083 — four metadata placeholder slots.
6293 ecadd(0);
6294 ecadd(0);
6295 ecadd(0);
6296 ecadd(0);
6297
6298 // c:2085 — `ecnfunc++;`
6299 ECNFUNC.set(ECNFUNC.get() + 1);
6300 // c:2086 — `ecssub = so = ecsoffs;`
6301 let so = ECSOFFS.get();
6302 ECSSUB.set(so);
6303 // c:2087 — `onp = ecnpats;`
6304 let onp = ECNPATS.with(|cc| cc.get());
6305 // c:2088 — `ecnpats = 0;`
6306 ECNPATS.with(|cc| cc.set(0));
6307
6308 // c:2091 — `int c = 0;` — INNER cmplx for the body
6309 // parse. Local to each branch; C's enclosing *cmplx
6310 // is NOT modified by the body.
6311 let mut body_c: i32 = 0;
6312 // c:2090 — `if (tok == INBRACE) {`
6313 if tok() == INBRACE_TOK {
6314 // c:2093 — `zshlex();`
6315 zshlex();
6316 // c:2094 — `par_list(&c);`
6317 par_list_wordcode(&mut body_c);
6318 // c:2095-2101 — `if (tok != OUTBRACE) { cmdpop();
6319 // lineno += oldlineno; ecnpats = onp;
6320 // ecssub = oecssub; YYERROR; }`
6321 if tok() != OUTBRACE_TOK {
6322 cmdpop();
6323 set_lineno(lineno() + oldlineno);
6324 ECNPATS.with(|cc| cc.set(onp));
6325 ECSSUB.set(oecssub);
6326 crate::ported::utils::zerr("par_simple: funcdef expected `}`");
6327 return 0;
6328 }
6329 // c:2102-2105 — `if (argc == 0) incmdpos = 0;`
6330 if argc == 0 {
6331 set_incmdpos(false);
6332 }
6333 // c:2106 — `zshlex();`
6334 zshlex();
6335 } else {
6336 // c:2107-2132 — short-body funcdef form: `f() cmd`
6337 // or `() cmd`. Wraps single par_cmd result in a
6338 // synthetic WC_LIST / WC_SUBLIST /
6339 // WC_PIPE(WC_PIPE_END, 0) header trio.
6340 let ll = ecadd(0);
6341 let sl = ecadd(0);
6342 ecadd(WCB_PIPE(WC_PIPE_END, 0));
6343 let ok = par_cmd_wordcode(&mut body_c, if argc == 0 { 1 } else { 0 });
6344 if !ok {
6345 cmdpop();
6346 crate::ported::utils::zerr("par_simple: funcdef short-body: missing command");
6347 return 0;
6348 }
6349 if argc == 0 {
6350 // c:2118-2127 — anonymous funcdef may take args
6351 // after the body; first one already read.
6352 set_incmdpos(false);
6353 }
6354 // c:2130-2131 — inner sublist/list use inner cmplx.
6355 let used = ECUSED.get() as usize;
6356 set_sublist_code(
6357 sl,
6358 WC_SUBLIST_END as i32,
6359 0,
6360 (used.saturating_sub(1 + sl)) as i32,
6361 body_c != 0,
6362 );
6363 set_list_code(ll, Z_SYNC | Z_END, body_c != 0);
6364 }
6365 let _ = body_c;
6366 // c:2133 — `cmdpop();`
6367 cmdpop();
6368
6369 // c:2135 — `ecadd(WCB_END());`
6370 ecadd(WCB_END());
6371 // c:2136-2139 — fill 4 metadata slots at p+argc+2..5
6372 let p_argc = (p + (argc as usize) + 2) as usize;
6373 let cur_so = ECSOFFS.get();
6374 let np_now = ECNPATS.with(|cc| cc.get());
6375 ECBUF.with_borrow_mut(|b| {
6376 b[p_argc] = (so - oecssub) as wordcode;
6377 b[p_argc + 1] = (cur_so - so) as wordcode;
6378 b[p_argc + 2] = np_now as wordcode;
6379 b[p_argc + 3] = 0;
6380 });
6381
6382 // c:2141-2143 — `ecnpats = onp; ecssub = oecssub; ecnfunc++;`
6383 ECNPATS.with(|cc| cc.set(onp));
6384 ECSSUB.set(oecssub);
6385 ECNFUNC.set(ECNFUNC.get() + 1);
6386
6387 // c:2145 — `ecbuf[p] = WCB_FUNCDEF(ecused - 1 - p);`
6388 let used = ECUSED.get() as usize;
6389 let header_off = used.saturating_sub(1 + p) as wordcode;
6390 ECBUF.with_borrow_mut(|b| {
6391 b[p] = WCB_FUNCDEF(header_off);
6392 });
6393
6394 // c:2147-2172 — `if (argc == 0) { /* anonymous fn args */ }`
6395 if argc == 0 {
6396 // c:2150 — `int parg = ecadd(0);`
6397 let mut parg = ecadd(0);
6398 // c:2151 — `ecadd(0);`
6399 ecadd(0);
6400 // c:2152 — `while (tok == STRING || IS_REDIROP(tok)) {`
6401 while tok() == STRING_LEX || IS_REDIROP(tok()) {
6402 if tok() == STRING_LEX {
6403 // c:2155-2157
6404 ecstr(&tokstr().unwrap_or_default());
6405 argc += 1;
6406 zshlex();
6407 } else {
6408 // c:2159-2165 — *cmplx=c=1; nrediradd=par_redir;
6409 // p += nrediradd; ppost += nrediradd if ppost;
6410 // sr += nrediradd; parg += nrediradd;
6411 *cmplx = 1;
6412 let added = par_redir_wordcode(&mut r);
6413 if added == 0 {
6414 break;
6415 }
6416 p += added as usize;
6417 if ppost != 0 {
6418 ppost += added as usize;
6419 }
6420 sr += added;
6421 parg += added as usize;
6422 }
6423 }
6424 // c:2168-2169 — `if (argc > 0) *cmplx = 1;`
6425 if argc > 0 {
6426 *cmplx = 1;
6427 }
6428 // c:2170 — `ecbuf[parg] = ecused - parg;`
6429 // c:2171 — `ecbuf[parg+1] = argc;`
6430 let used2 = ECUSED.get() as usize;
6431 ECBUF.with_borrow_mut(|b| {
6432 b[parg] = (used2 - parg) as wordcode;
6433 b[parg + 1] = argc;
6434 });
6435 }
6436 // c:2173 — `lineno += oldlineno;`
6437 set_lineno(lineno() + oldlineno);
6438
6439 // c:2175-2177 — `isfunc = 1; isnull = 0; break;`
6440 isfunc = true;
6441 isnull = false;
6442 break;
6443 }
6444 _ => break,
6445 }
6446 }
6447
6448 // c:2173-2176 — `if (isnull && !(sr + nr)) { ecused = oecused;
6449 // return 0; }` — undo everything including pre-cmd assignments
6450 // if no actual command word emerged.
6451 if isnull && sr + nr == 0 && !assignments {
6452 ECUSED.set(p as i32);
6453 return 0;
6454 }
6455 // c:2186-2187 — `incmdpos = 1; intypeset = 0;` — reset before
6456 // the placeholder patch so the next-token lex doesn't carry
6457 // typeset/incond state.
6458 set_incmdpos(true);
6459 set_intypeset(false);
6460 // c:2189-2199 — `if (!isfunc) { if (is_typeset) ecbuf[p] =
6461 // WCB_TYPESET(argc); else ecbuf[p] = WCB_SIMPLE(argc); }`.
6462 // When isfunc=true the INOUTPAR branch already wrote WCB_FUNCDEF
6463 // at p; do NOT clobber it.
6464 if !isfunc {
6465 let header = if is_typeset {
6466 if postassigns > 0 {
6467 ECBUF.with_borrow_mut(|b| {
6468 if ppost < b.len() {
6469 b[ppost] = postassigns;
6470 }
6471 });
6472 } else {
6473 ecadd(0);
6474 }
6475 WCB_TYPESET(argc)
6476 } else {
6477 WCB_SIMPLE(argc)
6478 };
6479 ECBUF.with_borrow_mut(|b| {
6480 if p < b.len() {
6481 b[p] = header;
6482 }
6483 });
6484 }
6485 1 + sr
6486}
6487
6488/// Wrapper for callers without a cmplx accumulator. Allocates a
6489/// local cmplx and ignores it — only used by legacy dispatch sites.
6490pub fn par_simple_wordcode() {
6491 let mut cmplx: i32 = 0;
6492 par_simple_wordcode_impl(&mut cmplx, 0);
6493}
6494
6495/// Port of `par_redir(int *rp, char *idstring)` from
6496/// `Src/parse.c:2229-2345` — the wordcode-emitting variant that
6497/// pushes WCB_REDIR + fd + ecstrcode(name) into ECBUF. Distinct
6498/// from the AST `par_redir` (parse.rs:3771) which builds a
6499/// ZshRedir struct for the AST executor pipeline.
6500///
6501/// Returns the number of wordcodes added (3 for the basic shape,
6502/// 4 with idstring, 5 for HEREDOC[DASH] which carries the
6503/// terminator strings inline). Returns 0 on parse error.
6504fn par_redir_wordcode(rp: &mut usize) -> i32 {
6505 par_redir_wordcode_inner(rp, None)
6506}
6507
6508/// par_redir variant taking the `idstring` parameter for the
6509/// `{var}>file` shape. C signature `par_redir(int *rp, char *idstring)`
6510/// passes NULL when there's no var-id. Rust uses Option<&str>.
6511fn par_redir_wordcode_inner(rp: &mut usize, idstring: Option<&str>) -> i32 {
6512 // c:2231 — `int r = *rp, type, fd1, oldcmdpos, oldnc, ncodes;`
6513 let r: usize = *rp;
6514 let mut r#type: i32;
6515 let fd1: i32;
6516 let oldcmdpos: bool;
6517 let oldnc: i32;
6518 let mut ncodes: usize;
6519 // c:2232 — `char *name;`
6520 let name: String;
6521
6522 // c:2234 — `oldcmdpos = incmdpos;`
6523 oldcmdpos = incmdpos();
6524 // c:2235 — `incmdpos = 0;`
6525 set_incmdpos(false);
6526 // c:2236 — `oldnc = nocorrect;`
6527 oldnc = nocorrect();
6528 // c:2237-2238 — `if (tok != INANG && tok != INOUTANG) nocorrect = 1;`
6529 if tok() != INANG_TOK && tok() != INOUTANG {
6530 set_nocorrect(1);
6531 }
6532 // c:2239 — `type = redirtab[tok - OUTANG];`
6533 // Map current redirop token to redirtab index — matches order of
6534 // C `enum { OUTANG, OUTANGBANG, DOUTANG, DOUTANGBANG, INANG,
6535 // INOUTANG, DINANG, DINANGDASH, INANGAMP, OUTANGAMP, AMPOUTANG,
6536 // OUTANGAMPBANG, DOUTANGAMP, DOUTANGAMPBANG, TRINANG }`.
6537 r#type = match tok() {
6538 OUTANG_TOK => REDIR_WRITE,
6539 OUTANGBANG => REDIR_WRITENOW,
6540 DOUTANG => REDIR_APP,
6541 DOUTANGBANG => REDIR_APPNOW,
6542 INANG_TOK => REDIR_READ,
6543 INOUTANG => REDIR_READWRITE,
6544 DINANG => REDIR_HEREDOC,
6545 DINANGDASH => REDIR_HEREDOCDASH,
6546 INANGAMP => REDIR_MERGEIN,
6547 OUTANGAMP => REDIR_MERGEOUT,
6548 AMPOUTANG => REDIR_ERRWRITE,
6549 OUTANGAMPBANG => REDIR_ERRWRITENOW,
6550 DOUTANGAMP => REDIR_ERRAPP,
6551 DOUTANGAMPBANG => REDIR_ERRAPPNOW,
6552 TRINANG => REDIR_HERESTR,
6553 _ => {
6554 set_incmdpos(oldcmdpos);
6555 set_nocorrect(oldnc);
6556 return 0;
6557 }
6558 };
6559 // c:2240 — `fd1 = tokfd;`
6560 fd1 = tokfd();
6561 // c:2241 — `zshlex();`
6562 zshlex();
6563 // c:2242-2243 — `if (tok != STRING && tok != ENVSTRING) YYERROR(ecused);`
6564 if tok() != STRING_LEX && tok() != ENVSTRING {
6565 set_incmdpos(oldcmdpos);
6566 set_nocorrect(oldnc);
6567 crate::ported::utils::zerr("expected word after redirection");
6568 return 0;
6569 }
6570 // c:2244 — `incmdpos = oldcmdpos;`
6571 set_incmdpos(oldcmdpos);
6572 // c:2245 — `nocorrect = oldnc;`
6573 set_nocorrect(oldnc);
6574
6575 // c:2248-2249 — `if (fd1 == -1) fd1 = IS_READFD(type) ? 0 : 1;`
6576 let fd1 = if fd1 == -1 {
6577 if is_readfd(r#type) {
6578 0
6579 } else {
6580 1
6581 }
6582 } else {
6583 fd1
6584 };
6585
6586 // c:2251 — `name = tokstr;`
6587 name = tokstr().unwrap_or_default();
6588
6589 // c:2253-2321 — switch on type:
6590 match r#type {
6591 // c:2254-2300 — REDIR_HEREDOC / REDIR_HEREDOCDASH
6592 x if x == REDIR_HEREDOC || x == REDIR_HEREDOCDASH => {
6593 // c:2257 — `struct heredocs **hd;`
6594 // c:2258 — `int htype = type;`
6595 let htype = r#type;
6596 // c:2260-2261 — `if (strchr(tokstr, '\n')) YYERROR(ecused);`
6597 if name.contains('\n') {
6598 crate::ported::utils::zerr("here-doc terminator contains newline");
6599 return 0;
6600 }
6601 // c:2263-2273 — `ncodes = 5; if (idstring) { type |= MASK; ncodes = 6; }`
6602 if idstring.is_some() {
6603 r#type |= REDIR_VARID_MASK;
6604 ncodes = 6;
6605 } else {
6606 ncodes = 5;
6607 }
6608 // c:2277 — `ecispace(r, ncodes);`
6609 ecispace(r, ncodes);
6610 // c:2278 — `*rp = r + ncodes;`
6611 *rp = r + ncodes;
6612 // c:2279 — `ecbuf[r] = WCB_REDIR(type | REDIR_FROM_HEREDOC_MASK);`
6613 ECBUF.with_borrow_mut(|b| {
6614 b[r] = WCB_REDIR((r#type | REDIR_FROM_HEREDOC_MASK) as wordcode);
6615 // c:2280 — `ecbuf[r + 1] = fd1;`
6616 b[r + 1] = fd1 as wordcode;
6617 });
6618 // c:2282-2286 — r+2..4 are filled later by setheredoc.
6619 // c:2287-2288 — `if (idstring) ecbuf[r + 5] = ecstrcode(idstring);`
6620 if let Some(id) = idstring {
6621 let coded = ecstrcode(id);
6622 ECBUF.with_borrow_mut(|b| {
6623 b[r + 5] = coded;
6624 });
6625 }
6626 // c:2290-2296 — `for (hd = &hdocs; *hd; hd = &(*hd)->next);
6627 // *hd = zalloc(sizeof(struct heredocs));
6628 // (*hd)->next = NULL;
6629 // (*hd)->type = htype;
6630 // (*hd)->pc = r;
6631 // (*hd)->str = tokstr;`
6632 HDOCS.with_borrow_mut(|head| {
6633 let mut cur = head;
6634 while cur.is_some() {
6635 cur = &mut cur.as_mut().unwrap().next; // c:2290
6636 }
6637 *cur = Some(Box::new(crate::ported::zsh_h::heredocs { // c:2292-2296
6638 next: None,
6639 typ: htype,
6640 pc: r as i32,
6641 str: Some(name.clone()),
6642 }));
6643 });
6644 // c:2298 — `zshlex();`
6645 zshlex();
6646 // c:2299 — `return ncodes;`
6647 return ncodes as i32;
6648 }
6649 // c:2301-2308 — REDIR_WRITE / REDIR_WRITENOW
6650 x if x == REDIR_WRITE || x == REDIR_WRITENOW => {
6651 // c:2303-2305 — `if (tokstr[0] == OutangProc && tokstr[1] == Inpar)
6652 // type = REDIR_OUTPIPE;`
6653 let nb: Vec<char> = name.chars().collect();
6654 if nb.len() >= 2 && nb[0] == '\u{96}' && nb[1] == '\u{88}' {
6655 r#type = REDIR_OUTPIPE;
6656 } else if nb.len() >= 2 && nb[0] == '\u{94}' && nb[1] == '\u{88}' {
6657 // c:2306-2307 — `else if (tokstr[0] == Inang && tokstr[1] == Inpar) YYERROR;`
6658 crate::ported::utils::zerr("par_redir: < before >");
6659 return 0;
6660 }
6661 }
6662 // c:2309-2315 — REDIR_READ
6663 x if x == REDIR_READ => {
6664 let nb: Vec<char> = name.chars().collect();
6665 if nb.len() >= 2 && nb[0] == '\u{94}' && nb[1] == '\u{88}' {
6666 r#type = REDIR_INPIPE;
6667 } else if nb.len() >= 2 && nb[0] == '\u{96}' && nb[1] == '\u{88}' {
6668 crate::ported::utils::zerr("par_redir: > before <");
6669 return 0;
6670 }
6671 }
6672 // c:2316-2320 — REDIR_READWRITE
6673 x if x == REDIR_READWRITE => {
6674 let nb: Vec<char> = name.chars().collect();
6675 if nb.len() >= 2
6676 && (nb[0] == '\u{94}' || nb[0] == '\u{96}')
6677 && nb[1] == '\u{88}'
6678 {
6679 r#type = if nb[0] == '\u{94}' {
6680 REDIR_INPIPE
6681 } else {
6682 REDIR_OUTPIPE
6683 };
6684 }
6685 }
6686 _ => {}
6687 }
6688 // c:2322 — `zshlex();`
6689 zshlex();
6690
6691 // c:2326-2333 — `if (idstring) { type |= MASK; ncodes = 4; } else ncodes = 3;`
6692 if idstring.is_some() {
6693 r#type |= REDIR_VARID_MASK;
6694 ncodes = 4;
6695 } else {
6696 ncodes = 3;
6697 }
6698
6699 // c:2334 — `ecispace(r, ncodes);`
6700 ecispace(r, ncodes);
6701 // c:2335 — `*rp = r + ncodes;`
6702 *rp = r + ncodes;
6703 // c:2336 — `ecbuf[r] = WCB_REDIR(type);`
6704 let coded_name = ecstrcode(&name);
6705 ECBUF.with_borrow_mut(|b| {
6706 b[r] = WCB_REDIR(r#type as wordcode);
6707 // c:2337 — `ecbuf[r + 1] = fd1;`
6708 b[r + 1] = fd1 as wordcode;
6709 // c:2338 — `ecbuf[r + 2] = ecstrcode(name);`
6710 b[r + 2] = coded_name;
6711 });
6712 // c:2339-2340 — `if (idstring) ecbuf[r + 3] = ecstrcode(idstring);`
6713 if let Some(id) = idstring {
6714 let coded_id = ecstrcode(id);
6715 ECBUF.with_borrow_mut(|b| {
6716 b[r + 3] = coded_id;
6717 });
6718 }
6719 // c:2342 — `return ncodes;`
6720 ncodes as i32
6721}
6722
6723/// Port of `IS_READFD(type)` macro from `Src/zsh.h` — determines
6724/// default fd (0 for read-ish, 1 for write-ish) when none specified.
6725fn is_readfd(t: i32) -> bool {
6726 matches!(
6727 t,
6728 x if x == REDIR_READ
6729 || x == REDIR_READWRITE
6730 || x == REDIR_MERGEIN
6731 || x == REDIR_HEREDOC
6732 || x == REDIR_HEREDOCDASH
6733 || x == REDIR_HERESTR
6734 )
6735}
6736
6737/// Parse a program (list of lists)
6738/// Parse a complete program (top-level entry). Calls
6739/// parse_program_until with no end-token sentinel. Direct port of
6740/// zsh/Src/parse.c:614-720 `parse_event` / `par_list` /
6741/// `par_event` flow. C distinguishes COND_EVENT (single command
6742/// for here-string) from full event parse; zshrs's parse_program
6743/// is the full-event entry.
6744fn parse_program() -> ZshProgram {
6745 parse_program_until(None)
6746}
6747
6748/// Parse a program until we hit an end token
6749/// Parse a program until one of `end_tokens` is seen (or EOF).
6750/// Drives par_list in a loop. C equivalent: the body of par_event
6751/// (parse.c:635-695) iterating par_list against the lexer.
6752fn parse_program_until(end_tokens: Option<&[lextok]>) -> ZshProgram {
6753 let mut lists = Vec::new();
6754
6755 loop {
6756 // Skip separators
6757 while tok() == SEPER || tok() == NEWLIN {
6758 zshlex();
6759 }
6760
6761 if tok() == ENDINPUT || tok() == LEXERR {
6762 break;
6763 }
6764
6765 // Check for end tokens
6766 if let Some(end_toks) = end_tokens {
6767 if end_toks.contains(&tok()) {
6768 break;
6769 }
6770 }
6771
6772 // Also stop at these tokens when not explicitly looking for them
6773 // Note: Else/Elif/Then are NOT here - they're handled by par_if
6774 // to allow nested if statements inside case arms, loops, etc.
6775 match tok() {
6776 OUTBRACE_TOK | DSEMI | SEMIAMP | SEMIBAR | DONE | FI | ESAC | ZEND => break,
6777 _ => {}
6778 }
6779
6780 match par_list() {
6781 Some(list) => {
6782 let detected = simple_name_with_inoutpar(&list);
6783 lists.push(list);
6784 // Synthesize a FuncDef for the `name() { body }` shape
6785 // at parse time so body_source is captured while the
6786 // lexer still has the input. The lexer port emits
6787 // `name(` as a single Word ending in `<Inpar><Outpar>`,
6788 // so the Simple list is followed by an Inbrace once
6789 // separators are skipped. For `name() cmd args` the
6790 // body has already been swallowed into the same
6791 // Simple's words tail — synthesize directly from there.
6792 if let Some((names, body_argv)) = detected {
6793 if !body_argv.is_empty() {
6794 // One-line body already in the Simple. Build
6795 // a Simple from body_argv as the function body.
6796 lists.pop();
6797 let body_simple = ZshCommand::Simple(ZshSimple {
6798 assigns: Vec::new(),
6799 words: body_argv,
6800 redirs: Vec::new(),
6801 });
6802 let body_list = ZshList {
6803 sublist: ZshSublist {
6804 pipe: ZshPipe {
6805 cmd: body_simple,
6806 next: None,
6807 lineno: lineno(),
6808 merge_stderr: false,
6809 },
6810 next: None,
6811 flags: SublistFlags::default(),
6812 },
6813 flags: ListFlags::default(),
6814 };
6815 let funcdef = ZshCommand::FuncDef(ZshFuncDef {
6816 names,
6817 body: Box::new(ZshProgram {
6818 lists: vec![body_list],
6819 }),
6820 tracing: false,
6821 auto_call_args: None,
6822 body_source: None,
6823 });
6824 let synthetic = ZshList {
6825 sublist: ZshSublist {
6826 pipe: ZshPipe {
6827 cmd: funcdef,
6828 next: None,
6829 lineno: lineno(),
6830 merge_stderr: false,
6831 },
6832 next: None,
6833 flags: SublistFlags::default(),
6834 },
6835 flags: ListFlags::default(),
6836 };
6837 lists.push(synthetic);
6838 continue;
6839 }
6840 // Else: words.len() == 1 (only the trailing `name()`
6841 // word), brace body follows. `names` may carry
6842 // multiple identifiers from the `fna fnb fnc()`
6843 // shorthand — all share the same brace body per
6844 // src/zsh/Src/parse.c:1666 par_funcdef wordlist.
6845 // Skip separators on the real lexer; safe because
6846 // parse_program's next iteration would also skip them.
6847 while tok() == SEPER || tok() == NEWLIN {
6848 zshlex();
6849 }
6850 if tok() == INBRACE_TOK {
6851 // Capture body_start BEFORE the lexer
6852 // advances past the first body token. The
6853 // outer zshlex() consumed `{`; lexer.pos
6854 // is now right after `{`. The next
6855 // `zshlex()` would advance past `echo`,
6856 // making body_start land mid-body and
6857 // lose the first word — `typeset -f f`
6858 // printed `a; echo b` instead of
6859 // `echo a; echo b` for `f() { echo a;
6860 // echo b }`.
6861 let body_start = pos();
6862 zshlex();
6863 let body = parse_program();
6864 let body_end = if tok() == OUTBRACE_TOK {
6865 pos().saturating_sub(1)
6866 } else {
6867 pos()
6868 };
6869 let body_source = input_slice(body_start, body_end)
6870 .map(|s| s.trim().to_string())
6871 .filter(|s| !s.is_empty());
6872 if tok() == OUTBRACE_TOK {
6873 zshlex();
6874 }
6875 // Replace the Simple list with a FuncDef list.
6876 lists.pop();
6877 let funcdef = ZshCommand::FuncDef(ZshFuncDef {
6878 names,
6879 body: Box::new(body),
6880 tracing: false,
6881 auto_call_args: None,
6882 body_source,
6883 });
6884 let synthetic = ZshList {
6885 sublist: ZshSublist {
6886 pipe: ZshPipe {
6887 cmd: funcdef,
6888 next: None,
6889 lineno: lineno(),
6890 merge_stderr: false,
6891 },
6892 next: None,
6893 flags: SublistFlags::default(),
6894 },
6895 flags: ListFlags::default(),
6896 };
6897 lists.push(synthetic);
6898 } else if !matches!(tok(), ENDINPUT | OUTBRACE_TOK | SEPER | NEWLIN) {
6899 // No-brace one-line body: `foo() echo hello`.
6900 // Parse a single command for the body.
6901 let body_cmd = par_cmd();
6902 if let Some(cmd) = body_cmd {
6903 let body_list = ZshList {
6904 sublist: ZshSublist {
6905 pipe: ZshPipe {
6906 cmd,
6907 next: None,
6908 lineno: lineno(),
6909 merge_stderr: false,
6910 },
6911 next: None,
6912 flags: SublistFlags::default(),
6913 },
6914 flags: ListFlags::default(),
6915 };
6916 lists.pop();
6917 let funcdef = ZshCommand::FuncDef(ZshFuncDef {
6918 names: names.clone(),
6919 body: Box::new(ZshProgram {
6920 lists: vec![body_list],
6921 }),
6922 tracing: false,
6923 auto_call_args: None,
6924 body_source: None,
6925 });
6926 let synthetic = ZshList {
6927 sublist: ZshSublist {
6928 pipe: ZshPipe {
6929 cmd: funcdef,
6930 next: None,
6931 lineno: lineno(),
6932 merge_stderr: false,
6933 },
6934 next: None,
6935 flags: SublistFlags::default(),
6936 },
6937 flags: ListFlags::default(),
6938 };
6939 lists.push(synthetic);
6940 }
6941 }
6942 }
6943 }
6944 None => break,
6945 }
6946 }
6947
6948 ZshProgram { lists }
6949}
6950
6951/// Parse an assignment
6952/// Parse an assignment word `NAME=value` or `NAME=(arr items)`.
6953/// Sub-routine of par_simple. The C source handles assignments
6954/// inline in par_simple via the ENVSTRING/ENVARRAY token paths
6955/// (parse.c:1842-2000ish); zshrs splits it out to a dedicated
6956/// helper for clarity.
6957fn parse_assign() -> Option<ZshAssign> {
6958 // Helper: locate the Equals-marker that delimits NAME from
6959 // VALUE in an assignment-shaped tokstr. The lexer META-encodes
6960 // EVERY `=` (including those inside `${var%%=foo}` strip
6961 // patterns or `[idx]=...` subscripts), so a naive
6962 // `tokstr.find(Equals)` would split at the first inner `=`
6963 // and break the whole assignment. Walk the string skipping
6964 // brace and bracket depth so the assignment's `=` (the one
6965 // after the last `]` of the LHS subscript / or after the
6966 // bare name) is the one we land on.
6967 fn find_assign_equals(s: &str) -> Option<usize> {
6968 let target = crate::ported::zsh_h::Equals;
6969 let mut brace = 0i32;
6970 let mut bracket = 0i32;
6971 let mut paren = 0i32;
6972 for (i, c) in s.char_indices() {
6973 match c {
6974 '{' | '\u{8f}' /* Inbrace */ => brace += 1,
6975 '}' | '\u{90}' /* Outbrace */ => {
6976 if brace > 0 {
6977 brace -= 1;
6978 }
6979 }
6980 '[' | '\u{91}' /* Inbrack */ => bracket += 1,
6981 ']' | '\u{92}' /* Outbrack */ => {
6982 if bracket > 0 {
6983 bracket -= 1;
6984 }
6985 }
6986 '(' | '\u{88}' /* Inpar */ => paren += 1,
6987 ')' | '\u{8a}' /* Outpar */ => {
6988 if paren > 0 {
6989 paren -= 1;
6990 }
6991 }
6992 _ if c == target && brace == 0 && bracket == 0 && paren == 0 => {
6993 return Some(i);
6994 }
6995 _ => {}
6996 }
6997 }
6998 None
6999 }
7000
7001 let _ts_tokstr = tokstr()?;
7002 let tokstr = _ts_tokstr.as_str();
7003
7004 // Parse name=value or name+=value.
7005 let (name, value_str, append) = if tok() == ENVARRAY {
7006 let (name, append) = if let Some(stripped) = tokstr.strip_suffix('+') {
7007 (stripped, true)
7008 } else {
7009 (tokstr, false)
7010 };
7011 (name.to_string(), String::new(), append)
7012 } else if let Some(pos) = find_assign_equals(tokstr) {
7013 let name_part = &tokstr[..pos];
7014 let (name, append) = if let Some(stripped) = name_part.strip_suffix('+') {
7015 (stripped, true)
7016 } else {
7017 (name_part, false)
7018 };
7019 (
7020 name.to_string(),
7021 tokstr[pos + Equals.len_utf8()..].to_string(),
7022 append,
7023 )
7024 } else if let Some(pos) = tokstr.find('=') {
7025 // Fallback to literal '=' for compatibility
7026 let name_part = &tokstr[..pos];
7027 let (name, append) = if let Some(stripped) = name_part.strip_suffix('+') {
7028 (stripped, true)
7029 } else {
7030 (name_part, false)
7031 };
7032 (name.to_string(), tokstr[pos + 1..].to_string(), append)
7033 } else {
7034 return None;
7035 };
7036
7037 let value = if tok() == ENVARRAY {
7038 // Array assignment: name=(...)
7039 let mut elements = Vec::new();
7040 zshlex(); // skip past token
7041
7042 let mut arr_iters = 0;
7043 const MAX_ARRAY_ELEMENTS: usize = 10_000;
7044 while matches!(tok(), STRING_LEX | SEPER | NEWLIN) {
7045 arr_iters += 1;
7046 if arr_iters > MAX_ARRAY_ELEMENTS {
7047 crate::ported::utils::zerr("array assignment exceeded maximum elements");
7048 break;
7049 }
7050 if tok() == STRING_LEX {
7051 let _ts_s = crate::ported::lex::tokstr();
7052 if let Some(s) = _ts_s.as_deref() {
7053 elements.push(s.to_string());
7054 }
7055 }
7056 zshlex();
7057 }
7058
7059 // The closing Outpar is consumed here. The outer par_simple
7060 // loop will then `zshlex()` past whatever follows (typically
7061 // a separator or the next word) — calling zshlex twice in
7062 // tandem (here AND in par_simple) over-advances and merges
7063 // a following `name() { … }` funcdef into the same Simple.
7064 // We only consume Outpar; let the caller handle the rest.
7065 // Without this guard `g=(o1); f() { :; }` parsed as one
7066 // Simple with assigns=[g] and words=["f()"] (one token).
7067 if tok() == OUTPAR_TOK {
7068 // Note: do NOT zshlex() here. par_simple's `lexer
7069 // .zshlex()` after `parse_assign` returns advances past
7070 // the Outpar onto the next significant token.
7071 //
7072 // Force `incmdpos=true` so the next zshlex() recognizes
7073 // a follow-up `b=(...)` / `b=val` as Envarray/Envstring.
7074 // The lexer flips incmdpos to false on bare Outpar (which
7075 // is correct for subshell-close context), but for an
7076 // array-assignment close more assigns/words may follow.
7077 set_incmdpos(true);
7078 }
7079
7080 ZshAssignValue::Array(elements)
7081 } else {
7082 ZshAssignValue::Scalar(value_str)
7083 };
7084
7085 Some(ZshAssign {
7086 name,
7087 value,
7088 append,
7089 })
7090}
7091
7092/// AST `par_redir` variant accepting an idstring for the
7093/// `{var}>file` brace-FD shape. C signature
7094/// `par_redir(int *rp, char *idstring)` (parse.c:2229). The
7095/// idstring is stored in the resulting ZshRedir.varid for the
7096/// executor to bind the named variable to the chosen fd.
7097fn par_redir_with_id(idstring: Option<&str>) -> Option<ZshRedir> {
7098 let varid: Option<String> = idstring.map(|s| s.to_string());
7099 let rtype = match tok() {
7100 OUTANG_TOK => REDIR_WRITE,
7101 OUTANGBANG => REDIR_WRITENOW,
7102 DOUTANG => REDIR_APP,
7103 DOUTANGBANG => REDIR_APPNOW,
7104 INANG_TOK => REDIR_READ,
7105 INOUTANG => REDIR_READWRITE,
7106 DINANG => REDIR_HEREDOC,
7107 DINANGDASH => REDIR_HEREDOCDASH,
7108 TRINANG => REDIR_HERESTR,
7109 INANGAMP => REDIR_MERGEIN,
7110 OUTANGAMP => REDIR_MERGEOUT,
7111 AMPOUTANG => REDIR_ERRWRITE,
7112 OUTANGAMPBANG => REDIR_ERRWRITENOW,
7113 DOUTANGAMP => REDIR_ERRAPP,
7114 DOUTANGAMPBANG => REDIR_ERRAPPNOW,
7115 _ => return None,
7116 };
7117
7118 let fd = if tokfd() >= 0 {
7119 tokfd()
7120 } else if matches!(
7121 rtype,
7122 REDIR_READ
7123 | REDIR_READWRITE
7124 | REDIR_MERGEIN
7125 | REDIR_HEREDOC
7126 | REDIR_HEREDOCDASH
7127 | REDIR_HERESTR
7128 ) {
7129 0
7130 } else {
7131 1
7132 };
7133
7134 // c:2234-2245 — save/restore incmdpos and nocorrect around the
7135 // zshlex that consumes the redir target word:
7136 // oldcmdpos = incmdpos; incmdpos = 0;
7137 // oldnc = nocorrect;
7138 // if (tok != INANG && tok != INOUTANG) nocorrect = 1;
7139 // ... zshlex; check tok; ...
7140 // incmdpos = oldcmdpos; nocorrect = oldnc;
7141 // Without this, a redir target lexes in the parent's incmdpos
7142 // (re-promoting `{` / reswords) AND with parent nocorrect (so
7143 // spelling-correction wrongly runs inside `> $(cmd)` etc.).
7144 let oldcmdpos = incmdpos();
7145 set_incmdpos(false);
7146 let oldnc = nocorrect();
7147 let cur = tok();
7148 if cur != INANG_TOK && cur != INOUTANG {
7149 set_nocorrect(1);
7150 }
7151 zshlex();
7152
7153 let name = match tok() {
7154 STRING_LEX | ENVSTRING => {
7155 let n = tokstr().unwrap_or_default();
7156 // Restore BEFORE the next zshlex so trailing tokens lex
7157 // in the original parent context (mirrors C ordering at
7158 // parse.c:2244-2245 — restore right after the word is
7159 // confirmed, before any downstream advance).
7160 set_incmdpos(oldcmdpos);
7161 set_nocorrect(oldnc);
7162 zshlex();
7163 n
7164 }
7165 _ => {
7166 set_incmdpos(oldcmdpos);
7167 set_nocorrect(oldnc);
7168 crate::ported::utils::zerr("expected word after redirection");
7169 return None;
7170 }
7171 };
7172
7173 // Heredoc terminator capture. C parse.c:2254-2317 par_redir builds
7174 // a `struct heredocs` entry here for REDIR_HEREDOC[DASH]. zshrs
7175 // pushes onto HDOCS (canonical C linked list, c:2290-2296) AND
7176 // onto LEX_HEREDOCS (Rust-only AST-glue Vec carrying parsed-out
7177 // terminator/strip_tabs/quoted metadata for downstream AST
7178 // consumers). Quoted terminators (`<<'EOF'` / `<<"EOF"` / `<<\EOF`)
7179 // disable expansion in the body — Snull `\u{9d}` marks single-quote,
7180 // Dnull `\u{9e}` marks double-quote, Bnull `\u{9f}` marks
7181 // backslash-escaped chars.
7182 let heredoc_idx = if matches!(rtype, REDIR_HEREDOC | REDIR_HEREDOCDASH) {
7183 let strip_tabs = rtype == REDIR_HEREDOCDASH;
7184 let quoted = name.contains('\u{9d}')
7185 || name.contains('\u{9e}')
7186 || name.contains('\u{9f}')
7187 || name.starts_with('\'')
7188 || name.starts_with('"');
7189 let term = name
7190 .chars()
7191 .filter(|c| {
7192 *c != '\'' && *c != '"' && *c != '\u{9d}' && *c != '\u{9e}' && *c != '\u{9f}'
7193 })
7194 .collect::<String>();
7195 // c:2290-2296 — `for (hd = &hdocs; *hd; hd = &(*hd)->next);
7196 // *hd = zalloc(sizeof(struct heredocs));
7197 // (*hd)->next = NULL;
7198 // (*hd)->type = htype;
7199 // (*hd)->pc = r;
7200 // (*hd)->str = tokstr;`
7201 // AST path has no wordcode pc to patch; use -1 sentinel so the
7202 // inline NEWLIN walk in `zshlex()` skips the setheredoc call.
7203 HDOCS.with_borrow_mut(|head| {
7204 let mut cur = head;
7205 while cur.is_some() {
7206 cur = &mut cur.as_mut().unwrap().next; // c:2290
7207 }
7208 *cur = Some(Box::new(crate::ported::zsh_h::heredocs { // c:2292-2296
7209 next: None,
7210 typ: rtype,
7211 pc: -1,
7212 str: Some(name.clone()),
7213 }));
7214 });
7215 // zshrs-only: push parallel AST-glue entry onto LEX_HEREDOCS.
7216 let idx = crate::ported::lex::LEX_HEREDOCS.with_borrow_mut(|v| {
7217 v.push(crate::ported::lex::HereDoc {
7218 terminator: term,
7219 strip_tabs,
7220 content: String::new(),
7221 quoted,
7222 processed: false,
7223 });
7224 v.len() - 1
7225 });
7226 Some(idx)
7227 } else {
7228 None
7229 };
7230
7231 Some(ZshRedir {
7232 rtype,
7233 fd,
7234 name,
7235 heredoc: None,
7236 varid,
7237 heredoc_idx,
7238 })
7239}
7240
7241/// Parse C-style for loop: for (( init; cond; step ))
7242/// Parse the c-style `for ((init; cond; incr)) do BODY done`.
7243/// Inner branch of zsh/Src/parse.c:1100-1140 inside par_for.
7244/// Recognized when the token after FOR is DINPAR (the `((`
7245/// detected by gettok via dbparens setup).
7246fn parse_for_cstyle() -> Option<ZshCommand> {
7247 // We're at (( (Dinpar None) - the opening ((
7248 // Lexer returns:
7249 // Dinpar None - opening ((
7250 // Dinpar "init" - init expression, semicolon consumed
7251 // Dinpar "cond" - cond expression, semicolon consumed
7252 // Doutpar "step" - step expression, closing )) consumed
7253
7254 zshlex(); // Get init: Dinpar "i=0"
7255
7256 if tok() != DINPAR {
7257 crate::ported::utils::zerr("expected init expression in for ((");
7258 return None;
7259 }
7260 let init = tokstr().unwrap_or_default();
7261
7262 zshlex(); // Get cond: Dinpar "i<10"
7263
7264 if tok() != DINPAR {
7265 crate::ported::utils::zerr("expected condition in for ((");
7266 return None;
7267 }
7268 let cond = tokstr().unwrap_or_default();
7269
7270 zshlex(); // Get step: Doutpar "i++"
7271
7272 if tok() != DOUTPAR {
7273 crate::ported::utils::zerr("expected )) in for");
7274 return None;
7275 }
7276 let step = tokstr().unwrap_or_default();
7277
7278 zshlex(); // Move past ))
7279
7280 skip_separators();
7281 let body = parse_loop_body(false, false)?;
7282
7283 Some(ZshCommand::For(ZshFor {
7284 var: String::new(),
7285 list: ForList::CStyle { init, cond, step },
7286 body: Box::new(body),
7287 is_select: false,
7288 }))
7289}
7290
7291/// Parse select loop (same syntax as for)
7292/// Parse `select NAME in WORDS; do BODY; done`. Same shape as
7293/// `for NAME in WORDS; do ...` but with menu-prompt semantics in
7294/// the executor. C equivalent: the SELECT case in par_for at
7295/// parse.c:1087-1207 (selects share parser flow with foreach).
7296fn parse_select() -> Option<ZshCommand> {
7297 // `select` shares par_for's grammar (var, words, body) but the
7298 // compile path is different (interactive prompt loop).
7299 match par_for()? {
7300 ZshCommand::For(mut f) => {
7301 f.is_select = true;
7302 Some(ZshCommand::For(f))
7303 }
7304 other => Some(other),
7305 }
7306}
7307
7308/// Parse loop body (do...done, {...}, or shortloop)
7309/// Parse the `do BODY done` body of a for/while/until/select/
7310/// repeat loop. Direct equivalent of zsh's parse.c handling
7311/// inside the loop builders — they all consume DOLOOP, parse a
7312/// list until DONE, and return the list. The `foreach_style`
7313/// flag signals foreach (where short-form `for NAME in WORDS;
7314/// CMD` may skip do/done) vs c-style (which always requires
7315/// do/done).
7316///
7317/// `is_repeat` widens the SHORTLOOPS gate so `SHORTREPEAT` also
7318/// unlocks the short form for `repeat N CMD` (per c:1600
7319/// `unset(SHORTLOOPS) && unset(SHORTREPEAT)`).
7320fn parse_loop_body(foreach_style: bool, is_repeat: bool) -> Option<ZshProgram> {
7321 // c:1180-1194 — body dispatch order per par_for:
7322 // `do ... done` (DOLOOP) — primary form.
7323 // `{ ... }` (INBRACE) — alternate.
7324 // csh/CSHJUNKIELOOPS — terminator is `end`.
7325 // else if (unset(SHORTLOOPS)) — YYERROR.
7326 // else — short form (single command).
7327 if tok() == DOLOOP {
7328 zshlex();
7329 let body = parse_program();
7330 if tok() == DONE {
7331 zshlex();
7332 }
7333 Some(body)
7334 } else if tok() == INBRACE_TOK {
7335 zshlex();
7336 let body = parse_program();
7337 if tok() == OUTBRACE_TOK {
7338 zshlex();
7339 }
7340 Some(body)
7341 } else if foreach_style || isset(CSHJUNKIELOOPS) {
7342 // c:1184 / 1546 / 1595 — `else if (csh || isset(CSHJUNKIELOOPS))`.
7343 let body = parse_program();
7344 if tok() == ZEND {
7345 zshlex();
7346 }
7347 Some(body)
7348 } else {
7349 // c:1190 / 1474 / 1551 / 1600 — short-form gate. C bails
7350 // with YYERROR when `unset(SHORTLOOPS) && (!is_repeat ||
7351 // unset(SHORTREPEAT))`. zshrs's option machinery isn't
7352 // initialised at parse-test time (no `init_main` →
7353 // `install_emulation_defaults`), so a strict port here
7354 // body. parse_init seeds SHORTLOOPS=on mirroring C
7355 // `install_emulation_defaults`, so this fires only when a
7356 // script explicitly disabled the option.
7357 if unset(SHORTLOOPS) && (!is_repeat || unset(SHORTREPEAT)) {
7358 crate::ported::utils::zerr("parse error: short loop form requires SHORTLOOPS option");
7359 return None;
7360 }
7361 // c:1192-1193 — short form: single command body.
7362 par_list().map(|list| ZshProgram { lists: vec![list] })
7363 }
7364}
7365
7366/// `() { body } arg1 arg2 …` — anonymous function. Defines a fresh
7367/// function named `_zshrs_anon_N`, invokes it with the args, and the
7368/// body runs with positional params set. Implemented as the desugared
7369/// pair (FuncDef + Simple call) so the compile path doesn't need new
7370/// machinery.
7371/// Parse an anonymous function definition `() { BODY }` followed
7372/// by call args. zsh treats `() { echo hi; } a b c` as defining
7373/// and immediately calling an anon fn with args a/b/c. C
7374/// equivalent: the INOUTPAR shape in par_simple at parse.c:1836+
7375/// triggers an anon-funcdef path.
7376fn parse_anon_funcdef() -> Option<ZshCommand> {
7377 zshlex(); // skip ()
7378 skip_separators();
7379 // No `{` after `()` → bare empty subshell shape `()`. Fall back
7380 // to a Subsh with an empty program so the status is 0 (matches
7381 // zsh's `()` no-op behavior).
7382 if tok() != INBRACE_TOK {
7383 return Some(ZshCommand::Subsh(Box::new(ZshProgram {
7384 lists: Vec::new(),
7385 })));
7386 }
7387 zshlex(); // skip {
7388 let body = parse_program();
7389 if tok() == OUTBRACE_TOK {
7390 zshlex();
7391 }
7392 // Collect any trailing args until a separator. zsh's anon-fn form
7393 // `() { body } a b c` runs body with $1=a, $2=b, $3=c.
7394 let mut args = Vec::new();
7395 while tok() == STRING_LEX {
7396 if let Some(s) = tokstr() {
7397 args.push(s);
7398 }
7399 zshlex();
7400 }
7401
7402 // Generate a unique name. Module-level static would be cleaner but
7403 // a thread-local atomic is enough — anonymous functions are
7404 // ephemeral and the name isn't user-visible.
7405 static ANON_COUNTER: AtomicUsize = AtomicUsize::new(0);
7406 let n = ANON_COUNTER.fetch_add(1, Ordering::Relaxed);
7407 let name = format!("_zshrs_anon_{}", n);
7408 Some(ZshCommand::FuncDef(ZshFuncDef {
7409 names: vec![name],
7410 body: Box::new(body),
7411 tracing: false,
7412 auto_call_args: Some(args),
7413 body_source: None,
7414 }))
7415}
7416
7417/// Parse {...} cursh
7418/// Parse a current-shell brace block `{ BODY }`. C source
7419/// par_cmd at parse.c:958-1085 handles Inbrace → emit WC_CURSH
7420/// and recurses into the list. zshrs's parse_cursh extracts that
7421/// arm into a dedicated method.
7422fn parse_cursh() -> Option<ZshCommand> {
7423 zshlex(); // skip {
7424 let prog = parse_program();
7425
7426 // Check for { ... } always { ... }. Direct port of zsh's
7427 // par_subsh at parse.c:1612-1660 — note the two `incmdpos = 1`
7428 // forces (parse.c:1632, 1637): after consuming the closing
7429 // Outbrace AND after matching the `always` keyword, the parser
7430 // explicitly resets command position so the next `{` lexes as
7431 // Inbrace. Without these resets the lexer's String-clears-cmdpos
7432 // rule (lex.rs:976-983) leaves the second `{` in word position,
7433 // turning `always { ... }` into a Simple `{` `echo` … and the
7434 // try/always pairing is silently lost.
7435 if tok() == OUTBRACE_TOK {
7436 set_incmdpos(true); // parse.c:1632 incmdpos = !zsh_construct
7437 zshlex();
7438
7439 // Check for 'always'
7440 if tok() == STRING_LEX {
7441 let s = tokstr();
7442 if s.map(|s| s == "always").unwrap_or(false) {
7443 set_incmdpos(true); // parse.c:1637 incmdpos = 1
7444 zshlex();
7445 skip_separators();
7446
7447 if tok() == INBRACE_TOK {
7448 zshlex();
7449 let always = parse_program();
7450 if tok() == OUTBRACE_TOK {
7451 zshlex();
7452 }
7453 return Some(ZshCommand::Try(ZshTry {
7454 try_block: Box::new(prog),
7455 always: Box::new(always),
7456 }));
7457 }
7458 }
7459 }
7460 }
7461
7462 Some(ZshCommand::Cursh(Box::new(prog)))
7463}
7464
7465/// Parse inline function definition: name() { ... }
7466/// Parse the inline form `NAME () { BODY }` (POSIX-style funcdef
7467/// without the `function` keyword). The name has already been
7468/// consumed and pushed by par_simple before this method fires.
7469/// C source: handled inline in par_simple's INOUTPAR-after-name
7470/// arm (parse.c:1836-2228).
7471fn parse_inline_funcdef(name: String) -> Option<ZshCommand> {
7472 // par_simple's STRING loop left `incmdpos = 0`; the funcdef body
7473 // `{ ... }` requires `incmdpos = 1` so the lexer recognises `{`
7474 // as INBRACE_TOK (current-shell block opener) instead of a
7475 // literal `{` STRING. Without this, `myfunc() { echo body }`
7476 // parsed the body as the single STRING `"{"`, then `echo body`
7477 // fell out at top level. Mirrors the C path where par_cmd's
7478 // dispatcher (parse.c:958) is called with `incmdpos = 1` for
7479 // the funcdef body.
7480 set_incmdpos(true);
7481 // Skip ()
7482 if tok() == INOUTPAR {
7483 zshlex();
7484 }
7485
7486 skip_separators();
7487
7488 // Parse body
7489 if tok() == INBRACE_TOK {
7490 // Same body_start-before-zshlex fix as par_funcdef.
7491 let body_start = pos();
7492 zshlex();
7493 let body = parse_program();
7494 let body_end = if tok() == OUTBRACE_TOK {
7495 pos().saturating_sub(1)
7496 } else {
7497 pos()
7498 };
7499 let body_source = input_slice(body_start, body_end)
7500 .map(|s| s.trim().to_string())
7501 .filter(|s| !s.is_empty());
7502 if tok() == OUTBRACE_TOK {
7503 zshlex();
7504 }
7505 Some(ZshCommand::FuncDef(ZshFuncDef {
7506 names: vec![name],
7507 body: Box::new(body),
7508 tracing: false,
7509 auto_call_args: None,
7510 body_source,
7511 }))
7512 } else if unset(SHORTLOOPS) {
7513 // c:1742 — `else if (unset(SHORTLOOPS)) YYERRORV(oecused);` —
7514 // funcdef short body (`name() cmd` without `{...}`) only
7515 // accepted when SHORTLOOPS is set. parse_init seeds
7516 // SHORTLOOPS=on so this fires only when a script
7517 // explicitly disabled the option.
7518 crate::ported::utils::zerr("parse error: short function body form requires SHORTLOOPS option");
7519 None
7520 } else {
7521 match par_cmd() {
7522 Some(cmd) => {
7523 let list = ZshList {
7524 sublist: ZshSublist {
7525 pipe: ZshPipe {
7526 cmd,
7527 next: None,
7528 lineno: lineno(),
7529 merge_stderr: false,
7530 },
7531 next: None,
7532 flags: SublistFlags::default(),
7533 },
7534 flags: ListFlags::default(),
7535 };
7536 Some(ZshCommand::FuncDef(ZshFuncDef {
7537 names: vec![name],
7538 body: Box::new(ZshProgram { lists: vec![list] }),
7539 tracing: false,
7540 auto_call_args: None,
7541 body_source: None,
7542 }))
7543 }
7544 None => None,
7545 }
7546 }
7547}
7548
7549/// Parse conditional expression
7550/// Top of `[[ ]]` cond-expression parsing — entry to recursive
7551/// descent (or → and → not → primary). Direct port of zsh's
7552/// par_cond_1 at parse.c:2434-2475.
7553fn parse_cond_expr() -> Option<ZshCond> {
7554 parse_cond_or()
7555}
7556
7557/// Cond-expression `||` level. C: inside par_cond_1 at
7558/// parse.c:2434-2475 (the `cond_or` ladder).
7559fn parse_cond_or() -> Option<ZshCond> {
7560 let left = parse_cond_and()?;
7561 skip_cond_separators();
7562
7563 if tok() == DBAR {
7564 zshlex();
7565 skip_cond_separators();
7566 parse_cond_or().map(|right| ZshCond::Or(Box::new(left), Box::new(right)))
7567 } else {
7568 Some(left)
7569 }
7570}
7571
7572/// Cond-expression `&&` level. C: par_cond_2 at parse.c:2476-2625.
7573fn parse_cond_and() -> Option<ZshCond> {
7574 let left = parse_cond_not()?;
7575 skip_cond_separators();
7576
7577 if tok() == DAMPER {
7578 zshlex();
7579 skip_cond_separators();
7580 parse_cond_and().map(|right| ZshCond::And(Box::new(left), Box::new(right)))
7581 } else {
7582 Some(left)
7583 }
7584}
7585
7586/// `static FuncDump dumps;` from `Src/parse.c:3652` — head of the
7587/// loaded-`.zwc` linked list. C walks `dumps`/`p->next` directly;
7588/// the Rust port uses a `Mutex<Vec<funcdump>>` indexed by filename
7589/// so refcount ops can find an entry without raw-pointer compare.
7590pub static DUMPS: std::sync::Mutex<Vec<crate::ported::zsh_h::funcdump>> =
7591 std::sync::Mutex::new(Vec::new());
7592
7593/// Cond-expression `!` negation level. C: handled inside
7594/// par_cond_2 at parse.c:2476-2625 via the Bang token check.
7595fn parse_cond_not() -> Option<ZshCond> {
7596 skip_cond_separators();
7597
7598 // ! can be either BANG_TOK or String "!"
7599 let is_not =
7600 tok() == BANG_TOK || (tok() == STRING_LEX && tokstr().map(|s| s == "!").unwrap_or(false));
7601 if is_not {
7602 zshlex();
7603 let inner = parse_cond_not()?;
7604 return Some(ZshCond::Not(Box::new(inner)));
7605 }
7606
7607 if tok() == INPAR_TOK {
7608 zshlex();
7609 skip_cond_separators();
7610 let inner = parse_cond_expr()?;
7611 skip_cond_separators();
7612 if tok() == OUTPAR_TOK {
7613 zshlex();
7614 }
7615 return Some(inner);
7616 }
7617
7618 parse_cond_primary()
7619}
7620
7621/// Cond-expression primary: unary tests (-f, -d, ...), binary
7622/// tests (=, !=, <, >, ==, =~, -eq, -ne, ...), and parenthesized
7623/// sub-expressions. Direct port of par_cond_double / par_cond_triple
7624/// / par_cond_multi at parse.c:2626-2731 (chosen by arg count).
7625fn parse_cond_primary() -> Option<ZshCond> {
7626 let s1 = match tok() {
7627 STRING_LEX => {
7628 let s = tokstr().unwrap_or_default();
7629 zshlex();
7630 s
7631 }
7632 _ => return None,
7633 };
7634
7635 skip_cond_separators();
7636
7637 // Check for unary operator. zsh's lexer tokenizes leading `-` as
7638 // `zsh_h::Dash` (`\u{9b}`, `Src/zsh.h:182`) inside gettokstr (lex.c:1390-1400
7639 // LX2_DASH — `-` always becomes Dash, untokenized later). Match
7640 // either form here, and use char-count not byte-count since Dash
7641 // is 2 UTF-8 bytes (`\xc2\x9b`).
7642 let s1_chars: Vec<char> = s1.chars().collect();
7643 if s1_chars.len() == 2 && IS_DASH(s1_chars[0]) {
7644 let s2 = match tok() {
7645 STRING_LEX => {
7646 let s = tokstr().unwrap_or_default();
7647 zshlex();
7648 s
7649 }
7650 _ => return Some(ZshCond::Unary("-n".to_string(), s1)),
7651 };
7652 return Some(ZshCond::Unary(s1, s2));
7653 }
7654
7655 // Check for binary operator. Direct port of zsh/Src/parse.c:2601-2603:
7656 // incond++; /* parentheses do globbing */
7657 // do condlex(); while (COND_SEP());
7658 // incond--; /* parentheses do grouping */
7659 // The bump makes the lexer treat `(` as a literal character inside
7660 // the RHS word (e.g. `[[ x =~ (foo) ]]`) instead of returning Inpar
7661 // and splitting the regex into multiple tokens.
7662 let op = match tok() {
7663 STRING_LEX => {
7664 let s = tokstr().unwrap_or_default();
7665 set_incond(incond() + 1);
7666 zshlex();
7667 set_incond(incond() - 1);
7668 s
7669 }
7670 INANG_TOK => {
7671 set_incond(incond() + 1);
7672 zshlex();
7673 set_incond(incond() - 1);
7674 "<".to_string()
7675 }
7676 OUTANG_TOK => {
7677 set_incond(incond() + 1);
7678 zshlex();
7679 set_incond(incond() - 1);
7680 ">".to_string()
7681 }
7682 _ => return Some(ZshCond::Unary("-n".to_string(), s1)),
7683 };
7684
7685 skip_cond_separators();
7686
7687 let s2 = match tok() {
7688 STRING_LEX => {
7689 let s = tokstr().unwrap_or_default();
7690 zshlex();
7691 s
7692 }
7693 _ => return Some(ZshCond::Binary(s1, op, String::new())),
7694 };
7695
7696 if op == "=~" {
7697 Some(ZshCond::Regex(s1, s2))
7698 } else {
7699 Some(ZshCond::Binary(s1, op, s2))
7700 }
7701}
7702
7703fn skip_cond_separators() {
7704 while tok() == SEPER && {
7705 let s = tokstr();
7706 s.map(|s| !s.contains(';')).unwrap_or(true)
7707 } {
7708 zshlex();
7709 }
7710}
7711
7712/// Parse (( ... )) arithmetic command
7713/// Parse `(( EXPR ))` arithmetic command. C source: parse.c:1810-1834
7714/// `par_dinbrack` (despite the name; the function actually handles
7715/// DINPAR `(( ))` blocks too).
7716fn parse_arith() -> Option<ZshCommand> {
7717 let expr = tokstr().unwrap_or_default();
7718 zshlex();
7719 Some(ZshCommand::Arith(expr))
7720}
7721
7722/// Skip separator tokens
7723fn skip_separators() {
7724 while tok() == SEPER || tok() == NEWLIN {
7725 zshlex();
7726 }
7727}
7728
7729// `fdheaderlen` / `fdmagic` / `fdflags` / etc. macros from
7730// `Src/parse.c:3125-3152`. C uses raw pointer arithmetic on a
7731// `Wordcode` (= `u32 *`); the Rust port takes a slice and indexes.
7732
7733/// Port of `fdheaderlen(f)` macro (`Src/parse.c:3125`) — header
7734/// length in u32 words (read from prelude word `FD_PRELEN`).
7735#[inline]
7736pub fn fdheaderlen(f: &[u32]) -> u32 {
7737 f[FD_PRELEN]
7738}
7739
7740/// Port of `fdmagic(f)` macro (`Src/parse.c:3127`) — first prelude
7741/// word, either `FD_MAGIC` or `FD_OMAGIC`.
7742#[inline]
7743pub fn fdmagic(f: &[u32]) -> u32 {
7744 f[0]
7745}
7746
7747/// Port of `fdflags(f)` macro (`Src/parse.c:3131`) — low byte of
7748/// the packed `pre[1]` word.
7749#[inline]
7750pub fn fdflags(f: &[u32]) -> u32 {
7751 // `pre[1]` is a u32 viewed as 4 bytes; flags = byte 0.
7752 f[1] & 0xff
7753}
7754
7755/// Port of `fdsetflags(f, v)` macro (`Src/parse.c:3132`) — write
7756/// the low byte of `pre[1]`.
7757#[inline]
7758pub fn fdsetflags(f: &mut [u32], v: u8) {
7759 f[1] = (f[1] & !0xff) | (v as u32);
7760}
7761
7762/// Port of `fdother(f)` macro (`Src/parse.c:3133`) — high 24 bits
7763/// of `pre[1]`, holds the byte-offset to the opposite-byte-order
7764/// dump copy.
7765#[inline]
7766pub fn fdother(f: &[u32]) -> u32 {
7767 (f[1] >> 8) & 0x00ff_ffff
7768}
7769
7770/// Port of `fdsetother(f, o)` macro (`Src/parse.c:3134`).
7771#[inline]
7772pub fn fdsetother(f: &mut [u32], o: u32) {
7773 f[1] = (f[1] & 0xff) | ((o & 0x00ff_ffff) << 8);
7774}
7775
7776/// Port of `fdversion(f)` macro (`Src/parse.c:3140`) — read the
7777/// `ZSH_VERSION` C-string from `pre[2..]`.
7778pub fn fdversion(f: &[u32]) -> String {
7779 let bytes: Vec<u8> = f[2..]
7780 .iter()
7781 .take(10)
7782 .flat_map(|w| w.to_le_bytes().into_iter())
7783 .collect();
7784 let end = bytes.iter().position(|&b| b == 0).unwrap_or(bytes.len());
7785 String::from_utf8_lossy(&bytes[..end]).into_owned()
7786}
7787
7788/// Port of `firstfdhead(f)` macro (`Src/parse.c:3142`) — pointer
7789/// to the first `struct fdhead` past the prelude.
7790#[inline]
7791pub fn firstfdhead_offset() -> usize {
7792 FD_PRELEN
7793}
7794
7795/// Port of `nextfdhead(f)` macro (`Src/parse.c:3143`) — advance to
7796/// the next header by reading the current `hlen` slot.
7797#[inline]
7798pub fn nextfdhead_offset(f: &[u32], cur: usize) -> usize {
7799 cur + (f[cur + 4] as usize) // .hlen is field 4 of fdhead
7800}
7801
7802/// Port of `fdhflags(f)` macro (`Src/parse.c:3145`) — low 2 bits
7803/// of the header's `flags` field (the kshload/zshload marker).
7804#[inline]
7805pub fn fdhflags(h: &fdhead) -> u32 {
7806 h.flags & 0x3
7807}
7808
7809/// Port of `fdhtail(f)` macro (`Src/parse.c:3146`) — high 30 bits
7810/// of `flags`, byte offset from the name start to its basename.
7811#[inline]
7812pub fn fdhtail(h: &fdhead) -> u32 {
7813 h.flags >> 2
7814}
7815
7816/// Port of `fdhbldflags(f, t)` macro (`Src/parse.c:3147`) — pack
7817/// `(flags, tail)` into one u32 (low 2 bits = flags, high 30 = tail).
7818#[inline]
7819pub fn fdhbldflags(flags: u32, tail: u32) -> u32 {
7820 flags | (tail << 2)
7821}
7822
7823/// Port of `fdname(f)` macro (`Src/parse.c:3152`) — name string
7824/// follows the fdhead record immediately. Reads bytes from the
7825/// dump buffer until NUL.
7826pub fn fdname(buf: &[u32], header_offset: usize) -> String {
7827 let name_word_off = header_offset + FDHEAD_WORDS;
7828 let bytes: Vec<u8> = buf[name_word_off..]
7829 .iter()
7830 .flat_map(|w| w.to_le_bytes().into_iter())
7831 .take_while(|&b| b != 0)
7832 .collect();
7833 String::from_utf8_lossy(&bytes).into_owned()
7834}
7835
7836/// Decode a `fdhead` record at the given u32-word offset in the
7837/// dump buffer. Used by the header-walk loops in `bin_zcompile -t`.
7838pub fn read_fdhead(buf: &[u32], offset: usize) -> Option<fdhead> {
7839 if offset + FDHEAD_WORDS > buf.len() {
7840 return None;
7841 }
7842 Some(fdhead {
7843 start: buf[offset],
7844 len: buf[offset + 1],
7845 npats: buf[offset + 2],
7846 strs: buf[offset + 3],
7847 hlen: buf[offset + 4],
7848 flags: buf[offset + 5],
7849 })
7850}
7851
7852/// Port of `freedump(FuncDump f)` from `Src/parse.c:3976`. C
7853/// `munmap`s, `zclose`s the fd, and frees the struct. The Rust
7854/// port relies on Drop for the `funcdump` (no mmap held in this
7855/// port — `addr`/`map` are byte-offset placeholders), so the
7856/// equivalent is removing the entry from the dumps list. Called
7857/// by `decrdumpcount` when the refcount hits zero (c:3988) and
7858/// by `closedumps` when shutting down (c:4008).
7859fn freedump_locked(
7860 g: &mut std::sync::MutexGuard<'_, Vec<crate::ported::zsh_h::funcdump>>,
7861 filename: &str,
7862) {
7863 // c:3976
7864 g.retain(|d| d.filename.as_deref() != Some(filename));
7865}
7866
7867// =====================================================================
7868// Remaining `Src/parse.c` ports (this section finishes the file).
7869//
7870// Several of these emit into the C-wordcode buffer (`ECBUF`/etc.) and
7871// are kept for completeness — the live zshrs runtime uses the
7872// `ZshProgram` AST path instead, but `bin_zcompile` (`-c`/`-a` modes)
7873// and any future `.zwc`-emit pipeline both call into these.
7874// =====================================================================
7875
7876/// `ecstr(s)` helper — `ecadd(ecstrcode(s))`. Mirrors the C macro at
7877/// `Src/parse.c:482` used everywhere by the par_* emitters.
7878#[inline]
7879pub fn ecstr(s: &str) {
7880 let code = ecstrcode(s);
7881 ecadd(code);
7882}
7883
7884/// Port of `condlex` function-pointer global from `Src/parse.c`. C
7885/// flips this between `zshlex` and `testlex` depending on whether
7886/// we're inside `[[ ]]` vs `/bin/test` builtin. zshrs has no
7887/// separate `testlex` yet, so this just defers to `zshlex`.
7888#[inline]
7889pub fn condlex() {
7890 zshlex();
7891}
7892
7893fn copy_ecstr_walk(node: &Option<Box<EccstrNode>>, p: &mut [u8]) {
7894 let mut cur = node.as_ref();
7895 while let Some(n) = cur {
7896 // c:540 — `memcpy(p + s->aoffs, s->str, strlen(s->str) + 1);`
7897 let off = n.aoffs as usize;
7898 let need = off + n.str.len() + 1;
7899 if need <= p.len() {
7900 p[off..off + n.str.len()].copy_from_slice(&n.str);
7901 p[off + n.str.len()] = 0;
7902 }
7903 // c:541 — `copy_ecstr(s->left, p);`
7904 copy_ecstr_walk(&n.left, p);
7905 // c:542 — `s = s->right;`
7906 cur = n.right.as_ref();
7907 }
7908}
7909
7910/// Port of `par_cond(void)` from `Src/parse.c:2409`. Top-level cond
7911/// OR-chain — drives `par_cond_1` and stitches `||`-separated terms
7912/// with `WCB_COND(COND_OR, …)`. This is the missing top of the
7913/// wordcode cond chain: `par_cond_wordcode` (the par_dinbrack port)
7914/// must call into HERE so that `[[ a || b ]]` and friends land
7915/// real WC_COND opcodes in `ecbuf`. Without this, the wordcode
7916/// emitter for `[[ ... ]]` produced zero words and parity dropped
7917/// 148 words on `/etc/zshrc` alone.
7918pub fn par_cond_top() -> i32 {
7919 // c:2411 — `int p = ecused, r;`
7920 let p = ECUSED.with(|c| c.get()) as usize;
7921 let r = par_cond_1();
7922 while COND_SEP() {
7923 condlex();
7924 }
7925 if tok() == DBAR {
7926 // c:2417 — `condlex(); while (COND_SEP()) condlex();`
7927 condlex();
7928 while COND_SEP() {
7929 condlex();
7930 }
7931 // c:2420-2422 — `ecispace(p, 1); par_cond(); ecbuf[p] =
7932 // WCB_COND(COND_OR, ecused-1-p);`
7933 ecispace(p, 1);
7934 par_cond_top();
7935 let ecused = ECUSED.with(|c| c.get()) as usize;
7936 ECBUF.with(|c| {
7937 c.borrow_mut()[p] = WCB_COND(COND_OR as u32, (ecused - 1 - p) as u32);
7938 });
7939 return 1;
7940 }
7941 r
7942}
7943
7944/// Port of `static int check_cond(const char *input, const char *cond)`
7945/// from `Src/parse.c:2459`. True iff `input` is the two-char `-X`
7946/// form whose `X` matches `cond` — used by par_cond_2 to detect
7947/// `-a` / `-o` n-ary chain operators and by build_dump for `-k` /
7948/// `-z`. C: `return !IS_DASH(input[0]) ? 0 : !strcmp(input+1, cond);`.
7949fn check_cond(input: &str, cond: &str) -> bool {
7950 let mut chars = input.chars();
7951 match chars.next() {
7952 Some(c) if IS_DASH(c) => chars.as_str() == cond,
7953 _ => false,
7954 }
7955}
7956
7957#[cfg(test)]
7958mod tests {
7959 use super::*;
7960 use crate::utils::{errflag, ERRFLAG_ERROR};
7961 use std::fs;
7962 use std::path::Path;
7963 use std::sync::atomic::Ordering;
7964 use std::sync::mpsc;
7965 use std::thread;
7966 use std::time::{Duration, Instant};
7967
7968 /// Test helper. Mirrors zsh's `errflag` save/clear/check pattern
7969 /// around a parse — see `Src/init.c:loop` which clears errflag
7970 /// before parse_event() and tests it after. Returns `Err` if the
7971 /// parse set `ERRFLAG_ERROR`; otherwise `Ok(program)`.
7972 fn parse(input: &str) -> Result<ZshProgram, String> {
7973 let saved = errflag.load(Ordering::Relaxed);
7974 errflag.fetch_and(!ERRFLAG_ERROR, Ordering::Relaxed);
7975 crate::ported::parse::parse_init(input);
7976 let prog = crate::ported::parse::parse();
7977 let had_err = (errflag.load(Ordering::Relaxed) & ERRFLAG_ERROR) != 0;
7978 // Restore prior error bits; don't carry our new error into the
7979 // outer test runner.
7980 errflag.store(saved, Ordering::Relaxed);
7981 if had_err {
7982 Err("parse error".to_string())
7983 } else {
7984 Ok(prog)
7985 }
7986 }
7987
7988 #[test]
7989 fn test_simple_command() {
7990 let prog = parse("echo hello world").unwrap();
7991 assert_eq!(prog.lists.len(), 1);
7992 match &prog.lists[0].sublist.pipe.cmd {
7993 ZshCommand::Simple(s) => {
7994 assert_eq!(s.words, vec!["echo", "hello", "world"]);
7995 }
7996 _ => panic!("expected simple command"),
7997 }
7998 }
7999
8000 #[test]
8001 fn test_pipeline() {
8002 let prog = parse("ls | grep foo | wc -l").unwrap();
8003 assert_eq!(prog.lists.len(), 1);
8004
8005 let pipe = &prog.lists[0].sublist.pipe;
8006 assert!(pipe.next.is_some());
8007
8008 let pipe2 = pipe.next.as_ref().unwrap();
8009 assert!(pipe2.next.is_some());
8010 }
8011
8012 #[test]
8013 fn test_and_or() {
8014 let prog = parse("cmd1 && cmd2 || cmd3").unwrap();
8015 let sublist = &prog.lists[0].sublist;
8016
8017 assert!(sublist.next.is_some());
8018 let (op, _) = sublist.next.as_ref().unwrap();
8019 assert_eq!(*op, SublistOp::And);
8020 }
8021
8022 #[test]
8023 fn test_if_then() {
8024 let prog = parse("if test -f foo; then echo yes; fi").unwrap();
8025 match &prog.lists[0].sublist.pipe.cmd {
8026 ZshCommand::If(_) => {}
8027 _ => panic!("expected if command"),
8028 }
8029 }
8030
8031 #[test]
8032 fn test_for_loop() {
8033 let prog = parse("for i in a b c; do echo $i; done").unwrap();
8034 match &prog.lists[0].sublist.pipe.cmd {
8035 ZshCommand::For(f) => {
8036 assert_eq!(f.var, "i");
8037 match &f.list {
8038 ForList::Words(w) => assert_eq!(w, &vec!["a", "b", "c"]),
8039 _ => panic!("expected word list"),
8040 }
8041 }
8042 _ => panic!("expected for command"),
8043 }
8044 }
8045
8046 #[test]
8047 fn test_case() {
8048 let prog = parse("case $x in a) echo a;; b) echo b;; esac").unwrap();
8049 match &prog.lists[0].sublist.pipe.cmd {
8050 ZshCommand::Case(c) => {
8051 assert_eq!(c.arms.len(), 2);
8052 }
8053 _ => panic!("expected case command"),
8054 }
8055 }
8056
8057 #[test]
8058 fn test_function() {
8059 // First test just parsing "function foo" to see what happens
8060 let prog = parse("function foo { }").unwrap();
8061 match &prog.lists[0].sublist.pipe.cmd {
8062 ZshCommand::FuncDef(f) => {
8063 assert_eq!(f.names, vec!["foo"]);
8064 }
8065 _ => panic!(
8066 "expected function, got {:?}",
8067 prog.lists[0].sublist.pipe.cmd
8068 ),
8069 }
8070 }
8071
8072 #[test]
8073 fn test_redirection() {
8074 let prog = parse("echo hello > file.txt").unwrap();
8075 match &prog.lists[0].sublist.pipe.cmd {
8076 ZshCommand::Simple(s) => {
8077 assert_eq!(s.redirs.len(), 1);
8078 assert_eq!(s.redirs[0].rtype, REDIR_WRITE);
8079 }
8080 _ => panic!("expected simple command"),
8081 }
8082 }
8083
8084 #[test]
8085 fn test_assignment() {
8086 let prog = parse("FOO=bar echo $FOO").unwrap();
8087 match &prog.lists[0].sublist.pipe.cmd {
8088 ZshCommand::Simple(s) => {
8089 assert_eq!(s.assigns.len(), 1);
8090 assert_eq!(s.assigns[0].name, "FOO");
8091 }
8092 _ => panic!("expected simple command"),
8093 }
8094 }
8095
8096 #[test]
8097 fn test_parse_completion_function() {
8098 let input = r#"_2to3_fixes() {
8099 local -a fixes
8100 fixes=( ${${(M)${(f)"$(2to3 --list-fixes 2>/dev/null)"}:#*}//[[:space:]]/} )
8101 (( ${#fixes} )) && _describe -t fixes 'fix' fixes
8102}"#;
8103 let result = parse(input);
8104 assert!(
8105 result.is_ok(),
8106 "Failed to parse completion function: {:?}",
8107 result.err()
8108 );
8109 let prog = result.unwrap();
8110 assert!(
8111 !prog.lists.is_empty(),
8112 "Expected at least one list in program"
8113 );
8114 }
8115
8116 #[test]
8117 fn test_parse_array_with_complex_elements() {
8118 let input = r#"arguments=(
8119 '(- * :)'{-h,--help}'[show this help message and exit]'
8120 {-d,--doctests_only}'[fix up doctests only]'
8121 '*:filename:_files'
8122)"#;
8123 let result = parse(input);
8124 assert!(
8125 result.is_ok(),
8126 "Failed to parse array assignment: {:?}",
8127 result.err()
8128 );
8129 }
8130
8131 #[test]
8132 fn test_parse_full_completion_file() {
8133 let input = r##"#compdef 2to3
8134
8135# zsh completions for '2to3'
8136
8137_2to3_fixes() {
8138 local -a fixes
8139 fixes=( ${${(M)${(f)"$(2to3 --list-fixes 2>/dev/null)"}:#*}//[[:space:]]/} )
8140 (( ${#fixes} )) && _describe -t fixes 'fix' fixes
8141}
8142
8143local -a arguments
8144
8145arguments=(
8146 '(- * :)'{-h,--help}'[show this help message and exit]'
8147 {-d,--doctests_only}'[fix up doctests only]'
8148 {-f,--fix}'[each FIX specifies a transformation; default: all]:fix name:_2to3_fixes'
8149 {-j,--processes}'[run 2to3 concurrently]:number: '
8150 {-x,--nofix}'[prevent a transformation from being run]:fix name:_2to3_fixes'
8151 {-l,--list-fixes}'[list available transformations]'
8152 {-p,--print-function}'[modify the grammar so that print() is a function]'
8153 {-v,--verbose}'[more verbose logging]'
8154 '--no-diffs[do not show diffs of the refactoring]'
8155 {-w,--write}'[write back modified files]'
8156 {-n,--nobackups}'[do not write backups for modified files]'
8157 {-o,--output-dir}'[put output files in this directory instead of overwriting]:directory:_directories'
8158 {-W,--write-unchanged-files}'[also write files even if no changes were required]'
8159 '--add-suffix[append this string to all output filenames]:suffix: '
8160 '*:filename:_files'
8161)
8162
8163_arguments -s -S $arguments
8164"##;
8165 let result = parse(input);
8166 assert!(
8167 result.is_ok(),
8168 "Failed to parse full completion file: {:?}",
8169 result.err()
8170 );
8171 let prog = result.unwrap();
8172 // Should have parsed successfully with at least one statement
8173 assert!(!prog.lists.is_empty(), "Expected at least one list");
8174 }
8175
8176 #[test]
8177 fn test_parse_logs_sh() {
8178 let input = r#"#!/usr/bin/env bash
8179shopt -s globstar
8180
8181if [[ $(uname) == Darwin ]]; then
8182 tail -f /var/log/**/*.log /var/log/**/*.out | lolcat
8183else
8184 if [[ $ZPWR_DISTRO_NAME == raspbian ]]; then
8185 tail -f /var/log/**/*.log | lolcat
8186 else
8187 printf "Unsupported...\n" >&2
8188 fi
8189fi
8190"#;
8191 let result = parse(input);
8192 assert!(
8193 result.is_ok(),
8194 "Failed to parse logs.sh: {:?}",
8195 result.err()
8196 );
8197 }
8198
8199 #[test]
8200 fn test_parse_case_with_glob() {
8201 let input = r#"case "$ZPWR_OS_TYPE" in
8202 darwin*) open_cmd='open'
8203 ;;
8204 cygwin*) open_cmd='cygstart'
8205 ;;
8206 linux*)
8207 open_cmd='xdg-open'
8208 ;;
8209esac"#;
8210 let result = parse(input);
8211 assert!(
8212 result.is_ok(),
8213 "Failed to parse case with glob: {:?}",
8214 result.err()
8215 );
8216 }
8217
8218 #[test]
8219 fn test_parse_case_with_nested_if() {
8220 // Test case with nested if and glob patterns
8221 let input = r##"function zpwrGetOpenCommand(){
8222 local open_cmd
8223 case "$ZPWR_OS_TYPE" in
8224 darwin*) open_cmd='open' ;;
8225 cygwin*) open_cmd='cygstart' ;;
8226 linux*)
8227 if [[ "$_zpwr_uname_r" != *icrosoft* ]];then
8228 open_cmd='nohup xdg-open'
8229 fi
8230 ;;
8231 esac
8232}"##;
8233 let result = parse(input);
8234 assert!(result.is_ok(), "Failed to parse: {:?}", result.err());
8235 }
8236
8237 #[test]
8238 fn test_parse_zpwr_scripts() {
8239 let scripts_dir = Path::new("/Users/wizard/.zpwr/scripts");
8240 if !scripts_dir.exists() {
8241 eprintln!("Skipping test: scripts directory not found");
8242 return;
8243 }
8244
8245 let mut total = 0;
8246 let mut passed = 0;
8247 let mut failed_files = Vec::new();
8248 let mut timeout_files = Vec::new();
8249
8250 for ext in &["sh", "zsh"] {
8251 let pattern = scripts_dir.join(format!("*.{}", ext));
8252 if let Ok(entries) = glob::glob(pattern.to_str().unwrap()) {
8253 for entry in entries.flatten() {
8254 total += 1;
8255 let file_path = entry.display().to_string();
8256 let content = match fs::read_to_string(&entry) {
8257 Ok(c) => c,
8258 Err(e) => {
8259 failed_files.push((file_path, format!("read error: {}", e)));
8260 continue;
8261 }
8262 };
8263
8264 // Parse with timeout
8265 let content_clone = content.clone();
8266 let (tx, rx) = mpsc::channel();
8267 let handle = thread::spawn(move || {
8268 let result = parse(&content_clone);
8269 let _ = tx.send(result);
8270 });
8271
8272 match rx.recv_timeout(Duration::from_secs(2)) {
8273 Ok(Ok(_)) => passed += 1,
8274 Ok(Err(err)) => {
8275 failed_files.push((file_path, err));
8276 }
8277 Err(_) => {
8278 timeout_files.push(file_path);
8279 // Thread will be abandoned
8280 }
8281 }
8282 }
8283 }
8284 }
8285
8286 eprintln!("\n=== ZPWR Scripts Parse Results ===");
8287 eprintln!("Passed: {}/{}", passed, total);
8288
8289 if !timeout_files.is_empty() {
8290 eprintln!("\nTimeout files (>2s):");
8291 for file in &timeout_files {
8292 eprintln!(" {}", file);
8293 }
8294 }
8295
8296 if !failed_files.is_empty() {
8297 eprintln!("\nFailed files:");
8298 for (file, err) in &failed_files {
8299 eprintln!(" {} - {}", file, err);
8300 }
8301 }
8302
8303 // Allow some failures initially, but track progress
8304 let pass_rate = if total > 0 {
8305 (passed as f64 / total as f64) * 100.0
8306 } else {
8307 0.0
8308 };
8309 eprintln!("Pass rate: {:.1}%", pass_rate);
8310
8311 // Require at least 50% pass rate for now
8312 assert!(pass_rate >= 50.0, "Pass rate too low: {:.1}%", pass_rate);
8313 }
8314
8315 #[test]
8316 #[ignore] // Uses threads that can't be killed on timeout; use integration test instead
8317 fn test_parse_zsh_stdlib_functions() {
8318 let functions_dir = Path::new(env!("CARGO_MANIFEST_DIR")).join("test_data/zsh_functions");
8319 if !functions_dir.exists() {
8320 eprintln!(
8321 "Skipping test: zsh_functions directory not found at {:?}",
8322 functions_dir
8323 );
8324 return;
8325 }
8326
8327 let mut total = 0;
8328 let mut passed = 0;
8329 let mut failed_files = Vec::new();
8330 let mut timeout_files = Vec::new();
8331
8332 if let Ok(entries) = fs::read_dir(&functions_dir) {
8333 for entry in entries.flatten() {
8334 let path = entry.path();
8335 if !path.is_file() {
8336 continue;
8337 }
8338
8339 total += 1;
8340 let file_path = path.display().to_string();
8341 let content = match fs::read_to_string(&path) {
8342 Ok(c) => c,
8343 Err(e) => {
8344 failed_files.push((file_path, format!("read error: {}", e)));
8345 continue;
8346 }
8347 };
8348
8349 // Parse with timeout
8350 let content_clone = content.clone();
8351 let (tx, rx) = mpsc::channel();
8352 thread::spawn(move || {
8353 let result = parse(&content_clone);
8354 let _ = tx.send(result);
8355 });
8356
8357 match rx.recv_timeout(Duration::from_secs(2)) {
8358 Ok(Ok(_)) => passed += 1,
8359 Ok(Err(err)) => {
8360 failed_files.push((file_path, err));
8361 }
8362 Err(_) => {
8363 timeout_files.push(file_path);
8364 }
8365 }
8366 }
8367 }
8368
8369 eprintln!("\n=== Zsh Stdlib Functions Parse Results ===");
8370 eprintln!("Passed: {}/{}", passed, total);
8371
8372 if !timeout_files.is_empty() {
8373 eprintln!("\nTimeout files (>2s): {}", timeout_files.len());
8374 for file in timeout_files.iter().take(10) {
8375 eprintln!(" {}", file);
8376 }
8377 if timeout_files.len() > 10 {
8378 eprintln!(" ... and {} more", timeout_files.len() - 10);
8379 }
8380 }
8381
8382 if !failed_files.is_empty() {
8383 eprintln!("\nFailed files: {}", failed_files.len());
8384 for (file, err) in failed_files.iter().take(20) {
8385 let filename = Path::new(file)
8386 .file_name()
8387 .unwrap_or_default()
8388 .to_string_lossy();
8389 eprintln!(" {} - {}", filename, err);
8390 }
8391 if failed_files.len() > 20 {
8392 eprintln!(" ... and {} more", failed_files.len() - 20);
8393 }
8394 }
8395
8396 let pass_rate = if total > 0 {
8397 (passed as f64 / total as f64) * 100.0
8398 } else {
8399 0.0
8400 };
8401 eprintln!("Pass rate: {:.1}%", pass_rate);
8402
8403 // Require at least 50% pass rate
8404 assert!(pass_rate >= 50.0, "Pass rate too low: {:.1}%", pass_rate);
8405 }
8406
8407 /// c:2643 — `get_cond_num` returns 0..=8 for the canonical binary
8408 /// test operators in order `nt ot ef eq ne lt gt le ge`. The
8409 /// index IS the wordcode opcode dispatch key; flipping any entry
8410 /// would silently mis-dispatch `[[ a -eq b ]]` to a different op.
8411 #[test]
8412 fn get_cond_num_canonical_order_matches_dispatch_table() {
8413 assert_eq!(get_cond_num("nt"), 0);
8414 assert_eq!(get_cond_num("ot"), 1);
8415 assert_eq!(get_cond_num("ef"), 2);
8416 assert_eq!(get_cond_num("eq"), 3);
8417 assert_eq!(get_cond_num("ne"), 4);
8418 assert_eq!(get_cond_num("lt"), 5);
8419 assert_eq!(get_cond_num("gt"), 6);
8420 assert_eq!(get_cond_num("le"), 7);
8421 assert_eq!(get_cond_num("ge"), 8);
8422 }
8423
8424 /// c:2643 — unknown operator returns -1 (sentinel for "not in the
8425 /// binary set"). Regression returning 0 silently would alias
8426 /// every unknown op to `-nt`, dispatching to the wrong handler.
8427 #[test]
8428 fn get_cond_num_unknown_operator_returns_minus_one() {
8429 assert_eq!(get_cond_num("xx"), -1);
8430 assert_eq!(get_cond_num(""), -1);
8431 assert_eq!(get_cond_num("eqnt"), -1, "exact-match required");
8432 assert_eq!(get_cond_num("NT"), -1, "case-sensitive — uppercase rejected");
8433 }
8434
8435 /// c:2628 — `par_cond_double` requires arg `a` to start with `-`
8436 /// AND have at least one more char. Empty string OR single `-`
8437 /// must error (return 1 via zerr). Regression accepting empty
8438 /// would dispatch `[[ "" string ]]` as a unary test.
8439 #[test]
8440 fn par_cond_double_rejects_short_or_non_dash_first_arg() {
8441 // empty
8442 let _ = par_cond_double("", "b");
8443 // not-dash
8444 let _ = par_cond_double("foo", "b");
8445 // bare dash
8446 let _ = par_cond_double("-", "b");
8447 // All three must NOT crash + return 1 (error path).
8448 }
8449
8450 /// c:2647 CONDSTRS table — exhaustive iteration: every entry's
8451 /// index round-trips through get_cond_num. A regression that
8452 /// drops an entry would let `[[ a -ef b ]]` silently mis-dispatch.
8453 #[test]
8454 fn get_cond_num_round_trips_for_every_table_entry() {
8455 for (i, op) in ["nt","ot","ef","eq","ne","lt","gt","le","ge"].iter().enumerate() {
8456 assert_eq!(get_cond_num(op) as usize, i,
8457 "{op} must map to index {i}");
8458 }
8459 }
8460
8461 /// c:2643 — `get_cond_num` is byte-exact: a partial-prefix string
8462 /// must NOT match. `e` (one char) is not `eq`. Catches a
8463 /// regression using `starts_with` instead of equality.
8464 #[test]
8465 fn get_cond_num_partial_prefix_does_not_match() {
8466 assert_eq!(get_cond_num("e"), -1);
8467 assert_eq!(get_cond_num("eq2"), -1);
8468 assert_eq!(get_cond_num("n"), -1);
8469 }
8470
8471 /// c:2628 — `par_cond_double` checks `IS_DASH(ac[0])` so any
8472 /// non-dash first char fails. The lexed Dash sentinel `\u{9b}`
8473 /// MUST be accepted alongside ASCII `-` (the lexer emits it
8474 /// inside `[[ ... ]]`). Regression dropping the sentinel form
8475 /// would break every cond expression after lexing.
8476 #[test]
8477 fn par_cond_double_accepts_lexed_dash_sentinel() {
8478 // First char being the Dash sentinel + valid unary letter
8479 // must NOT trigger the "condition expected" error path.
8480 // We can't easily probe the wordcode emission here, but
8481 // the function MUST return without panic for both forms.
8482 let _ = par_cond_double("-z", "foo");
8483 let _ = par_cond_double("\u{9b}z", "foo");
8484 }
8485
8486 /// c:2643 — case sensitivity: uppercase `EQ` MUST NOT match `eq`.
8487 /// zsh's `[[ a -EQ b ]]` is documented as a parse error (only
8488 /// lowercase variants are recognised). Regression doing
8489 /// case-insensitive lookup would silently accept it.
8490 #[test]
8491 fn get_cond_num_is_case_sensitive() {
8492 assert_eq!(get_cond_num("EQ"), -1);
8493 assert_eq!(get_cond_num("Eq"), -1);
8494 assert_eq!(get_cond_num("eQ"), -1);
8495 // Lowercase still works.
8496 assert_eq!(get_cond_num("eq"), 3);
8497 }
8498
8499 /// `Src/parse.c:2862-2868` — `ecgetstr` inline-3-byte case packs
8500 /// up to 3 chars into bits 3-26 of the wordcode word, then C emits
8501 /// `buf[3] = '\0'; r = dupstring(buf);`. `dupstring` uses `strlen`
8502 /// so the resulting string TRUNCATES at the first NUL byte —
8503 /// short strings of 1 or 2 chars get their tail NUL-padded and
8504 /// silently dropped by strlen.
8505 ///
8506 /// The previous Rust port used `retain(|&x| x != 0)` which SPLICES
8507 /// OUT interior NULs (so `[a, 0, b]` would yield "ab" instead of
8508 /// C's "a"). Verify both endpoints work correctly:
8509 /// * 1-char string ("a", 0, 0) → "a" (strlen-truncate)
8510 /// * 2-char string ("ab", 0) → "ab" (strlen-truncate)
8511 /// * 3-char string ("abc") → "abc" (full)
8512 /// * pathological ("a", 0, "b") → "a" (NOT "ab")
8513 #[test]
8514 fn ecgetstr_inline_string_truncates_at_first_nul_like_c_strlen() {
8515 // Build a wordcode word with `c & 2 != 0` (inline-string flag)
8516 // and the 3 bytes packed at offsets 3, 11, 19. `c & 1` is the
8517 // tokflag; clear it for this test.
8518 fn pack_inline(b0: u8, b1: u8, b2: u8) -> u32 {
8519 // c:2862 layout — bit0 = tokflag (0 here), bit1 = inline (1),
8520 // bits 3-10 = b0, bits 11-18 = b1, bits 19-26 = b2.
8521 (2u32)
8522 | ((b0 as u32) << 3)
8523 | ((b1 as u32) << 11)
8524 | ((b2 as u32) << 19)
8525 }
8526 use crate::ported::zsh_h::{eprog, estate};
8527 let mk_state = |word: u32| -> estate {
8528 let p = eprog {
8529 flags: 0,
8530 len: 1,
8531 npats: 0,
8532 nref: 0,
8533 pats: Vec::new(),
8534 prog: vec![word],
8535 strs: None,
8536 shf: None,
8537 dump: None,
8538 };
8539 estate { prog: Box::new(p), pc: 0, strs: None, strs_offset: 0 }
8540 };
8541
8542 // 1-char: ('a', 0, 0) → "a"
8543 let mut st = mk_state(pack_inline(b'a', 0, 0));
8544 assert_eq!(ecgetstr(&mut st, 0, None), "a",
8545 "c:2869 strlen truncates 1-char inline at the NUL tail");
8546
8547 // 2-char: ('a', 'b', 0) → "ab"
8548 let mut st = mk_state(pack_inline(b'a', b'b', 0));
8549 assert_eq!(ecgetstr(&mut st, 0, None), "ab",
8550 "c:2869 strlen truncates 2-char inline at the NUL tail");
8551
8552 // 3-char: ('a', 'b', 'c') → "abc"
8553 let mut st = mk_state(pack_inline(b'a', b'b', b'c'));
8554 assert_eq!(ecgetstr(&mut st, 0, None), "abc",
8555 "c:2869 full 3-byte inline preserved");
8556
8557 // Pathological: ('a', 0, 'b') → "a" (NOT "ab" from retain-splice)
8558 let mut st = mk_state(pack_inline(b'a', 0, b'b'));
8559 assert_eq!(ecgetstr(&mut st, 0, None), "a",
8560 "c:2869 strlen STOPS at first NUL; must not splice 'b' through");
8561 }
8562
8563 /// Pin: `init_parse_status` resets ALL six lexer-parser flags
8564 /// per `Src/parse.c:500-502`. Specifically `inrepeat_ = 0` at
8565 /// c:501 was previously missing in the Rust port. Pin every
8566 /// reset so a future regression that drops one is caught.
8567 #[test]
8568 fn init_parse_status_resets_all_lexer_parser_flags() {
8569 use crate::ported::lex::{
8570 incasepat, incond, incmdpos, infor, inrepeat, inredir,
8571 intypeset, set_incasepat, set_incond, set_incmdpos,
8572 set_infor, set_inrepeat, set_inredir, set_intypeset,
8573 };
8574 // Dirty every flag to a non-default value.
8575 set_incasepat(5);
8576 set_incond(7);
8577 set_inredir(true);
8578 set_infor(3);
8579 set_intypeset(true);
8580 set_inrepeat(2);
8581 set_incmdpos(false);
8582 // Reset.
8583 init_parse_status();
8584 // c:500-502 — every flag back to its default.
8585 assert_eq!(incasepat(), 0, "c:500 — incasepat = 0");
8586 assert_eq!(incond(), 0, "c:500 — incond = 0");
8587 assert!(!inredir(), "c:500 — inredir = 0");
8588 assert_eq!(infor(), 0, "c:500 — infor = 0");
8589 assert!(!intypeset(), "c:500 — intypeset = 0");
8590 assert_eq!(inrepeat(), 0, "c:501 — inrepeat_ = 0 (was previously missing)");
8591 assert!(incmdpos(), "c:502 — incmdpos = 1");
8592 }
8593}