zsh/ported/parse.rs
1//! Zsh parser — direct port from zsh/Src/parse.c.
2//!
3//! Pulls tokens via the lex.rs free fns (zshlex/tok/tokstr) and
4//! builds an AST tree (relocated to src/extensions/zsh_ast.rs as a
5//! Rust-only IR) plus emits wordcode into ECBUF via the P9b/P9c
6//! pipeline. Follows the zsh grammar closely; productions match
7//! `par_*` in Src/parse.c.
8
9use super::lex::{
10 lextok, set_tok, AMPER, AMPERBANG, AMPOUTANG, BANG_TOK, BARAMP, BAR_TOK, CASE, COPROC, DAMPER,
11 DBAR, DINANG, DINANGDASH, DINBRACK, DINPAR, DOLOOP, DONE, DOUTANG, DOUTANGAMP, DOUTANGAMPBANG,
12 DOUTANGBANG, DOUTBRACK, DOUTPAR, DSEMI, ELIF, ELSE, ENDINPUT, ENVARRAY, ENVSTRING, ESAC, FI,
13 FOR, FOREACH, FUNC, IF, INANGAMP, INANG_TOK, INBRACE_TOK, INOUTANG, INOUTPAR, INPAR_TOK,
14 IS_REDIROP, LEXERR, NEWLIN, NOCORRECT, NULLTOK, OUTANGAMP, OUTANGAMPBANG, OUTANGBANG,
15 OUTANG_TOK, OUTBRACE_TOK, OUTPAR_TOK, REPEAT, SELECT, SEMI, SEMIAMP, SEMIBAR, SEPER,
16 STRING_LEX, THEN, TIME, TRINANG, TYPESET, UNTIL, WHILE, ZEND,
17};
18use super::zsh_h::{
19 eprog, estate, isset, redir, unset, wc_code, wordcode, Bang, Dash, Equals, Inang, Inpar,
20 Outang, Outpar, Stringg, Tilde, ALIASFUNCDEF, COND_AND, COND_MOD, COND_MODI, COND_NOT, COND_NT,
21 COND_OR, COND_REGEX, COND_STRDEQ, COND_STREQ, COND_STRGTR, COND_STRLT, COND_STRNEQ,
22 CSHJUNKIELOOPS,
23 EC_DUP, EC_NODUP, EF_HEAP, EF_REAL, EXECOPT, IGNOREBRACES, IS_DASH, MULTIFUNCDEF, OPT_ISSET,
24 PM_UNDEFINED, POSIXBUILTINS, REDIRF_FROM_HEREDOC, REDIR_APP, REDIR_APPNOW,
25 REDIR_FROM_HEREDOC_MASK, REDIR_VARID_MASK, REDIR_ERRAPP,
26 REDIR_ERRAPPNOW, REDIR_ERRWRITE, REDIR_ERRWRITENOW, REDIR_HEREDOC, REDIR_HEREDOCDASH,
27 REDIR_HERESTR, REDIR_INPIPE, REDIR_MERGEIN, REDIR_MERGEOUT, REDIR_OUTPIPE, REDIR_READ,
28 REDIR_READWRITE, REDIR_WRITE, REDIR_WRITENOW, SHORTLOOPS, SHORTREPEAT, WCB_COND, WCB_SIMPLE,
29 WC_REDIR, WC_REDIR_FROM_HEREDOC, WC_REDIR_TYPE, WC_REDIR_VARID, WC_SUBLIST_COPROC,
30 WC_SUBLIST_NOT,
31};
32use crate::ported::utils::{zerr, zwarnnam};
33use serde::{Deserialize, Serialize};
34use std::fs::File;
35use std::io::{Read, Seek, SeekFrom, Write};
36use std::sync::atomic::{AtomicUsize, Ordering};
37
38/// Port of C `struct eccstr` (zsh.h:836) — the long-string dedup BST
39/// node. The dedup-walk and cmp logic in `ecstrcode` is faithful to
40/// parse.c:447-453 including the conditional cmp chain
41/// (nfunc → hashval → strcmp), so corpus inputs where C's tree-walk
42/// finds-or-misses match get the same outcome on the Rust side.
43struct EccstrNode {
44 left: Option<Box<EccstrNode>>,
45 right: Option<Box<EccstrNode>>,
46 /// C-byte form of the string (single byte per char ≤ 0xff).
47 /// Owned because Rust doesn't have C zsh's "stable pointers into
48 /// the lexer's tokstr arena" — every tokstr lives as a fresh
49 /// Rust String allocation.
50 str: Vec<u8>,
51 /// Wordcode-encoded offset: `(byte_offset << 2) | token_bit`.
52 /// Same shape as `Eccstr::offs` (parse.c:459).
53 offs: u32,
54 /// `nfunc` snapshot at insert time. Per-function namespace key
55 /// — top-level scripts use 0; each funcdef bumps it.
56 nfunc: i32,
57 /// Hash of `str` computed via zsh's `hasher` (hashtable.c:86).
58 hashval: u32,
59}
60
61// Wordcode-buffer thread-locals — direct port of `Src/parse.c:269-285`
62// file-statics. Per-evaluator (bucket-1 in PORT_PLAN.md): each worker
63// thread parsing a separate program needs its own wordcode buffer.
64//
65// ECBUF: the wordcode array being built. C `Wordcode ecbuf`
66// (parse.c:275).
67// ECLEN: allocated entries in ECBUF (parse.c:269).
68// ECUSED: entries actually used so far (parse.c:271).
69// ECNPATS: count of patterns referenced by ECBUF (parse.c:273).
70// ECSOFFS / ECSSUB: byte offsets into the deferred string region
71// (parse.c:279). ECSSUB subtracts substring overlap.
72// ECNFUNC: count of functions defined so far (parse.c:285).
73// ECSTRS_INDEX: dedup index for long strings — C uses a binary tree
74// of `struct eccstr` (zsh.h:836); the canonical Eccstr port exists
75// at zsh_h::eccstr but stays unused at runtime here. The HashMap
76// preserves the API contract (lookup by (nfunc, str) → offs) with
77// simpler ownership semantics.
78thread_local! {
79 pub static ECBUF: std::cell::RefCell<Vec<u32>> = std::cell::RefCell::new(Vec::new());
80 static ECLEN: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
81 static ECUSED: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
82 static ECNPATS: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
83 static ECSOFFS: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
84 static ECSSUB: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
85 static ECNFUNC: std::cell::Cell<i32> = const { std::cell::Cell::new(0) };
86 static ECSTRS_INDEX: std::cell::RefCell<std::collections::HashMap<(i32, String), u32>>
87 = std::cell::RefCell::new(std::collections::HashMap::new());
88 /// C zsh's `eccstr` BST (parse.c:447). Port of `Eccstr ecstrs` —
89 /// a hashval-ordered binary search tree of long-strings for
90 /// dedup. Same cmp logic as C: nfunc, then hashval, then strcmp.
91 /// HashMap above is a fast-path lookup; this tree is the
92 /// C-fidelity walker that mirrors C's exact dedup-hit pattern
93 /// (including its quirks for hash-colliding content).
94 static ECSTRS_TREE: std::cell::RefCell<Option<Box<EccstrNode>>>
95 = const { std::cell::RefCell::new(None) };
96 /// Reverse index for `ecgetstr`: offs → owned string. Populated
97 /// at ecstrcode time so the consumer can recover the string from
98 /// the wordcode offs without walking the encode-time HashMap.
99 /// Stores the METAFIED BYTE form of each long-string, exactly
100 /// matching what C's strs region holds. `String` would not work
101 /// here because Rust strings carry UTF-8-encoded chars (e.g.
102 /// the Dash marker `\u{9b}` UTF-8-encodes to two bytes
103 /// `\xc2 \x9b`) while C stores zsh markers as single bytes
104 /// (raw `\x9b`). Storing Vec<u8> lets us write byte-for-byte
105 /// what C writes after metafy.
106 pub static ECSTRS_REVERSE: std::cell::RefCell<std::collections::HashMap<u32, Vec<u8>>>
107 = std::cell::RefCell::new(std::collections::HashMap::new());
108}
109
110// Direct port of `Src/parse.c:287-289` grow-policy constants.
111const EC_INIT_SIZE: i32 = 256;
112const EC_DOUBLE_THRESHOLD: i32 = 32768;
113const EC_INCREMENT: i32 = 1024;
114
115// Parser recursion + iteration safety counters as file-scope
116// thread_locals (Rust-only — no C analog; C uses OS stack overflow).
117thread_local! {
118 pub static PARSER_RECURSION_DEPTH: std::cell::Cell<usize> = const { std::cell::Cell::new(0) };
119 pub static PARSER_GLOBAL_ITERATIONS: std::cell::Cell<usize> = const { std::cell::Cell::new(0) };
120}
121
122// =============================================================================
123// Wordcode read helpers — used by text.rs's `gettext2` and exec dispatch
124// to walk a compiled Eprog without re-running the parser. These are the
125// only `Src/parse.c` functions ported so far in this file; the recursive-
126// descent parser (par_event / par_list / par_cmd / par_*) follows
127// below as free fns at module scope.
128// =============================================================================
129
130/// Port of `ecgetstr(Estate s, int dup, int *tokflag)` from `Src/parse.c:2855`.
131/// `s->pc` advances through the wordcode buffer; `s->strs` indexes the
132/// string pool. Returns the interned string (or a 1-3-char literal
133/// inlined directly into the wordcode word).
134pub fn ecgetstr(s: &mut estate, dup: i32, tokflag: Option<&mut i32>) -> String {
135 let prog = &s.prog.prog;
136 if s.pc >= prog.len() {
137 return String::new();
138 }
139 let c = prog[s.pc]; // c:2858 `wordcode c = *s->pc++;`
140 s.pc += 1;
141 if let Some(tf) = tokflag {
142 *tf = i32::from((c & 1) != 0); // c:2880 `*tokflag = (c & 1);`
143 }
144 if c == 6 || c == 7 {
145 // c:2861 `if (c == 6 || c == 7) r = "";`
146 return String::new();
147 }
148 let r: String = if (c & 2) != 0 {
149 // c:2862 `else if (c & 2)`
150 // c:2863-2866 — 3-byte inline string packed into the wordcode word.
151 let b0 = ((c >> 3) & 0xff) as u8;
152 let b1 = ((c >> 11) & 0xff) as u8;
153 let b2 = ((c >> 19) & 0xff) as u8;
154 let mut v = vec![b0, b1, b2];
155 v.retain(|&x| x != 0);
156 String::from_utf8_lossy(&v).into_owned()
157 } else {
158 // c:2877 `else r = s->strs + (c >> 2);`
159 let off = (c >> 2) as usize + s.strs_offset;
160 let strs_bytes = s.strs.as_deref().unwrap_or("").as_bytes();
161 if off >= strs_bytes.len() {
162 String::new()
163 } else {
164 let tail = &strs_bytes[off..];
165 let end = tail.iter().position(|&b| b == 0).unwrap_or(tail.len());
166 String::from_utf8_lossy(&tail[..end]).into_owned()
167 }
168 };
169 // c:2891 `return ((dup == EC_DUP || (dup && (c & 1))) ? dupstring(r) : r);`
170 // Rust owns the String already; `dup` flag has no observable effect.
171 let _ = (dup, EC_DUP, EC_NODUP);
172 r
173}
174
175/// Port of `ecgetredirs(Estate s)` from `Src/parse.c:2959`.
176///
177/// `strs` must be the same tail `ecgetstr` uses (`s->strs` / `estate.strs` from offset).
178/// WARNING: param names don't match C — Rust=(prog, strs, pc) vs C=(s)
179pub fn ecgetredirs(s: &mut estate) -> Vec<redir> {
180 let mut ret: Vec<redir> = Vec::new(); // c:2959 `LinkList ret = newlinklist();`
181 let prog_len = s.prog.prog.len();
182 if s.pc >= prog_len {
183 return ret;
184 }
185 let mut code = s.prog.prog[s.pc]; // c:2962 `wordcode code = *s->pc++;`
186 s.pc += 1;
187
188 loop {
189 if wc_code(code) != WC_REDIR {
190 // c:2988-2989 `s->pc--` then break from while
191 s.pc = s.pc.saturating_sub(1);
192 break;
193 }
194
195 let typ = WC_REDIR_TYPE(code); // c:2967 `r->type = WC_REDIR_TYPE(code);`
196 if s.pc >= prog_len {
197 break;
198 }
199 let fd1_w = s.prog.prog[s.pc]; // c:2968 `r->fd1 = *s->pc++;`
200 s.pc += 1;
201
202 let name = ecgetstr(s, EC_DUP, None); // c:2969 `r->name = ecgetstr(...)`
203
204 let (flags, here_terminator, munged_here_terminator) = if WC_REDIR_FROM_HEREDOC(code) != 0 {
205 // c:2970-2973
206 let term = ecgetstr(s, EC_DUP, None);
207 let munged = ecgetstr(s, EC_DUP, None);
208 (REDIRF_FROM_HEREDOC, Some(term), Some(munged))
209 } else {
210 // c:2974-2977
211 (0, None, None)
212 };
213
214 let varid = if WC_REDIR_VARID(code) != 0 {
215 // c:2979-2980
216 Some(ecgetstr(s, EC_DUP, None))
217 } else {
218 None // c:2981-2982
219 };
220
221 ret.push(redir {
222 // c:2965-2982 fields + c:2984 `addlinknode`
223 typ,
224 flags,
225 fd1: fd1_w as i32,
226 fd2: 0,
227 name: Some(name),
228 varid,
229 here_terminator,
230 munged_here_terminator,
231 });
232
233 if s.pc >= prog_len {
234 break;
235 }
236 code = s.prog.prog[s.pc]; // c:2986 `code = *s->pc++;`
237 s.pc += 1;
238 }
239
240 ret // c:2990 `return ret`
241}
242
243// === AST tree relocated to src/extensions/zsh_ast.rs ===
244//
245// zsh C does NOT have an AST tree — it emits wordcode directly via
246// par_event/par_list/par_sublist/par_pipe/par_cmd/par_simple/etc.
247// (Src/parse.c:485-3000) into a flat `Wordcode ecbuf[]`. The Zsh*/
248// Shell* AST node types lived in this file as a Rust-only IR that
249// stands in for that wordcode.
250//
251// P9e (PORT_PLAN.md): the types moved to src/extensions/zsh_ast.rs
252// to make their Rust-only-extension nature explicit. The full P9c +
253// P9d rewrite (par_* emitting wordcode + exec.rs reading wordcode)
254// retires them entirely — until then, callers reach them via this
255// re-export.
256pub use crate::heredoc_ast::HereDoc;
257pub use crate::zsh_ast::{
258 CaseArm, CaseTerm, CaseTerminator, CompoundCommand, ForList, HereDocInfo, ListFlags, ListOp,
259 Redirect, RedirectOp, ShellCommand, ShellWord, SimpleCommand, SublistFlags, SublistOp,
260 VarModifier, ZshAssign, ZshAssignValue, ZshCase, ZshCommand, ZshCond, ZshFor, ZshFuncDef,
261 ZshIf, ZshList, ZshParamFlag, ZshPipe, ZshProgram, ZshRedir, ZshRepeat, ZshSimple, ZshSublist,
262 ZshTry, ZshWhile,
263};
264use crate::ported::lex::{
265 heredocs_clear, heredocs_clone, heredocs_is_empty, heredocs_len, heredocs_push, heredocs_set,
266 heredocs_take, incasepat, incmdpos, incond, infor, input_slice, inredir, inrepeat, intypeset,
267 isnewlin, lex_init, lineno, noaliases, nocorrect, pos, set_incasepat, set_incmdpos, set_incond,
268 set_infor, set_inredir, set_inrepeat, set_intypeset, set_isnewlin, set_noaliases,
269 set_nocorrect, set_pos, set_tokfd, set_toklineno, set_tokstr, tok, tokfd, toklineno, tokstr,
270 tokstr_eq, tokstr_is_none, tokstr_is_some, tokstr_take, zshlex,
271};
272use crate::prompt::{cmdpop, cmdpush};
273use crate::zsh_h::{
274 wc_bdata, CS_ARRAY, CS_CASE, CS_CMDAND, CS_CMDOR, CS_COND, CS_CURSH, CS_ELIF, CS_ELSE,
275 CS_ERRPIPE, CS_FOR, CS_FOREACH, CS_FUNCDEF, CS_IF, CS_IFTHEN, CS_PIPE, CS_REPEAT, CS_SELECT,
276 CS_SUBSH, CS_UNTIL, CS_WHILE, EF_RUN, WCB_ARITH, WCB_ASSIGN, WCB_CASE, WCB_CURSH, WCB_END,
277 WCB_FOR, WCB_FUNCDEF, WCB_IF, WCB_LIST, WCB_PIPE, WCB_REDIR, WCB_REPEAT, WCB_SELECT,
278 WCB_SUBLIST, WCB_SUBSH, WCB_TIMED, WCB_TRY, WCB_TYPESET, WCB_WHILE, WC_ASSIGN_ARRAY, WC_ASSIGN_INC,
279 WC_ASSIGN_NEW, WC_ASSIGN_SCALAR, WC_CASE_AND, WC_CASE_HEAD, WC_CASE_OR, WC_CASE_TESTAND,
280 WC_FOR_COND, WC_FOR_LIST, WC_FOR_PPARAM, WC_IF_HEAD, WC_IF_IF, WC_PIPE_END, WC_PIPE_LINENO,
281 WC_PIPE_MID, WC_REDIR_WORDS, WC_SELECT_LIST, WC_SELECT_PPARAM, WC_SUBLIST_AND, WC_SUBLIST_END,
282 WC_SUBLIST_FLAGS, WC_SUBLIST_OR, WC_SUBLIST_SIMPLE, WC_SUBLIST_TYPE, WC_TIMED_EMPTY,
283 WC_TIMED_PIPE, WC_WHILE_UNTIL, WC_WHILE_WHILE, Z_ASYNC, Z_DISOWN, Z_END, Z_SIMPLE, Z_SYNC,
284};
285// === end AST relocation ===
286
287// Parser state lives in file-scope thread_locals:
288// - LEX_* (lexer side, matching Src/lex.c file-statics)
289// - ECBUF / ECLEN / ECUSED / ECNPATS / ECSOFFS / ECSSUB / ECNFUNC /
290// ECSTRS_INDEX / ECSTRS_REVERSE (wordcode-emission state, matching
291// Src/parse.c file-statics)
292// - PARSER_RECURSION_DEPTH / PARSER_GLOBAL_ITERATIONS (Rust-only
293// safety counters; no C analog — C relies on OS stack overflow).
294//
295// Callers use the free-fn entry points directly:
296// crate::ported::parse::parse_init(input);
297// let prog = crate::ported::parse::parse();
298
299const MAX_RECURSION_DEPTH: usize = 500;
300
301/// Direct port of `struct parse_stack` at `Src/zsh.h:3099-3109`.
302/// Used by `parse_context_save` / `parse_context_restore`
303/// (parse.c:295-355) to snapshot per-parse-call state so a nested
304/// parse (e.g. inside command substitution) doesn't clobber the
305/// outer parse.
306///
307/// A second port of `struct parse_stack` exists at
308/// `crate::ported::zsh_h::parse_stack` (zsh.h:1066) using canonical
309/// Wordcode / Eccstr / `struct heredocs` types — that port is unused
310/// today and will become authoritative when Phase 9b (PORT_PLAN.md)
311/// wires wordcode emission. This local version uses the working-set
312/// shapes (Vec<HereDoc>, stubbed wordcode fields) suited to zshrs's
313/// pre-wordcode AST architecture; the consolidation happens in P9b.
314#[allow(non_camel_case_types)]
315#[derive(Debug, Default, Clone)]
316pub struct parse_stack {
317 // ── Direct port of struct parse_stack at zsh.h:3099-3109 ──
318 /// Pending heredocs awaiting body collection. C: `struct heredocs
319 /// *hdocs` (zsh.h:3100). zshrs uses Vec<HereDoc> until Phase 9b
320 /// (PORT_PLAN.md) reinstates C's linked-list shape.
321 pub hdocs: Vec<HereDoc>,
322 /// C: `int incmdpos` (zsh.h:3102).
323 pub incmdpos: bool,
324 /// C: `int aliasspaceflag` (zsh.h:3103).
325 pub aliasspaceflag: i32,
326 /// C: `int incond` (zsh.h:3104).
327 pub incond: i32,
328 /// C: `int inredir` (zsh.h:3105).
329 pub inredir: bool,
330 /// C: `int incasepat` (zsh.h:3106).
331 pub incasepat: i32,
332 /// C: `int isnewlin` (zsh.h:3107).
333 pub isnewlin: i32,
334 /// C: `int infor` (zsh.h:3108).
335 pub infor: i32,
336 /// C: `int inrepeat_` (zsh.h:3109).
337 pub inrepeat_: i32,
338 /// C: `int intypeset` (zsh.h:3110).
339 pub intypeset: bool,
340 // ── Wordcode-buffer state — STUB until Phase 9b ──
341 // C `Wordcode ecbuf` (zsh.h:3112) + `Eccstr ecstrs` (zsh.h:3113) +
342 // `int eclen/ecused/ecnpats/ecsoffs/ecssub/ecnfunc` (zsh.h:3112-3114).
343 // zshrs hasn't emitted wordcode yet — these fields exist to
344 // preserve the C shape but read/write nothing until P9b lands.
345 pub eclen: i32,
346 pub ecused: i32,
347 pub ecnpats: i32,
348 pub ecbuf: Option<Vec<u32>>,
349 pub ecstrs: Option<Vec<u8>>,
350 pub ecsoffs: i32,
351 pub ecssub: i32,
352 pub ecnfunc: i32,
353 // P8: Rust-only safety counters (recursion_depth, global_iterations)
354 // migrated to PARSER_RECURSION_DEPTH + PARSER_GLOBAL_ITERATIONS
355 // thread_locals. parse_stack no longer carries them — matches C
356 // exactly (C's struct parse_stack has no analog).
357}
358
359// Old uppercase Rust-only `ParseStack` is gone. Compat alias so
360// existing call sites (context.rs) keep resolving until the
361// rename ripples through.
362#[allow(non_camel_case_types)]
363pub type ParseStack = parse_stack;
364
365/// Walk every ZshRedir in the program and, for any with a `heredoc_idx`,
366/// pull the body+terminator out of `bodies` and stuff into `heredoc`.
367/// `bodies[i]` corresponds to the i-th heredoc registered by the lexer
368/// during scanning (in source order).
369fn fill_heredoc_bodies(prog: &mut ZshProgram, bodies: &[HereDocInfo]) {
370 for list in &mut prog.lists {
371 fill_in_sublist(&mut list.sublist, bodies);
372 }
373}
374
375fn fill_in_sublist(sub: &mut ZshSublist, bodies: &[HereDocInfo]) {
376 fill_in_pipe(&mut sub.pipe, bodies);
377 if let Some(next) = &mut sub.next {
378 fill_in_sublist(&mut next.1, bodies);
379 }
380}
381
382fn fill_in_pipe(pipe: &mut ZshPipe, bodies: &[HereDocInfo]) {
383 fill_in_command(&mut pipe.cmd, bodies);
384 if let Some(next) = &mut pipe.next {
385 fill_in_pipe(next, bodies);
386 }
387}
388
389fn fill_in_command(cmd: &mut ZshCommand, bodies: &[HereDocInfo]) {
390 match cmd {
391 ZshCommand::Simple(s) => {
392 for r in &mut s.redirs {
393 resolve_redir(r, bodies);
394 }
395 }
396 ZshCommand::Subsh(p) | ZshCommand::Cursh(p) => fill_heredoc_bodies(p, bodies),
397 ZshCommand::FuncDef(f) => fill_heredoc_bodies(&mut f.body, bodies),
398 ZshCommand::If(i) => {
399 fill_heredoc_bodies(&mut i.cond, bodies);
400 fill_heredoc_bodies(&mut i.then, bodies);
401 for (c, b) in &mut i.elif {
402 fill_heredoc_bodies(c, bodies);
403 fill_heredoc_bodies(b, bodies);
404 }
405 if let Some(e) = &mut i.else_ {
406 fill_heredoc_bodies(e, bodies);
407 }
408 }
409 ZshCommand::While(w) | ZshCommand::Until(w) => {
410 fill_heredoc_bodies(&mut w.cond, bodies);
411 fill_heredoc_bodies(&mut w.body, bodies);
412 }
413 ZshCommand::For(f) => fill_heredoc_bodies(&mut f.body, bodies),
414 ZshCommand::Case(c) => {
415 for arm in &mut c.arms {
416 fill_heredoc_bodies(&mut arm.body, bodies);
417 }
418 }
419 ZshCommand::Repeat(r) => fill_heredoc_bodies(&mut r.body, bodies),
420 ZshCommand::Time(Some(sublist)) => fill_in_sublist(sublist, bodies),
421 ZshCommand::Try(t) => {
422 fill_heredoc_bodies(&mut t.try_block, bodies);
423 fill_heredoc_bodies(&mut t.always, bodies);
424 }
425 ZshCommand::Redirected(inner, redirs) => {
426 for r in redirs {
427 resolve_redir(r, bodies);
428 }
429 fill_in_command(inner, bodies);
430 }
431 ZshCommand::Time(None) | ZshCommand::Cond(_) | ZshCommand::Arith(_) => {}
432 }
433}
434
435fn resolve_redir(r: &mut ZshRedir, bodies: &[HereDocInfo]) {
436 if let Some(idx) = r.heredoc_idx {
437 if let Some(info) = bodies.get(idx) {
438 r.heredoc = Some(info.clone());
439 }
440 }
441}
442
443/// If `list` is a Simple containing one word that ends in the
444/// `<Inpar><Outpar>` token pair (the lexer-port encoding of `()`),
445/// return the bare name. Used by `parse_program_until` to detect
446/// `name() {body}` style function definitions where the lexer
447/// hasn't split the `()` from the name.
448/// Detect the `name() …` shape inside a Simple. Returns the function
449/// name and (when the body was already inlined into the same Simple,
450/// e.g. `foo() echo hi`) the rest of the words as the body's argv.
451/// Returns None for non-funcdef shapes.
452fn simple_name_with_inoutpar(list: &ZshList) -> Option<(Vec<String>, Vec<String>)> {
453 if list.flags.async_ || list.sublist.next.is_some() {
454 return None;
455 }
456 let pipe = &list.sublist.pipe;
457 if pipe.next.is_some() {
458 return None;
459 }
460 let simple = match &pipe.cmd {
461 ZshCommand::Simple(s) => s,
462 _ => return None,
463 };
464 if simple.words.is_empty() || !simple.assigns.is_empty() {
465 return None;
466 }
467 let suffix = "\u{88}\u{8a}"; // Inpar + Outpar
468 // Find the FIRST word ending in `()`. zsh accepts the
469 // multi-name shorthand `fna fnb fnc() { body }` (parse.c:
470 // par_funcdef wordlist) — words[0..i-1] are extra names,
471 // words[i] is `lastname()`. Words after are the body argv
472 // (one-line shorthand, `name() cmd args`).
473 let par_idx = simple.words.iter().position(|w| w.ends_with(suffix))?;
474 let mut names: Vec<String> = Vec::with_capacity(par_idx + 1);
475 for w in &simple.words[..par_idx] {
476 // Earlier names must be bare identifiers, NOT contain
477 // tokens that imply they're not function names (no `()`,
478 // no quotes, no expansions). zsh's lexer enforces this
479 // at the wordlist level; we approximate by requiring the
480 // word be an identifier-shaped token after untokenize.
481 let bare = super::lex::untokenize(w);
482 let valid = !bare.is_empty()
483 && bare
484 .chars()
485 .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-' || c == '.' || c == '$');
486 if !valid {
487 return None;
488 }
489 names.push(bare);
490 }
491 let last = &simple.words[par_idx];
492 let bare = &last[..last.len() - suffix.len()];
493 if bare.is_empty() {
494 return None;
495 }
496 names.push(super::lex::untokenize(bare));
497 let rest = simple.words[par_idx + 1..].to_vec();
498 Some((names, rest))
499}
500
501/// Initialize parser state for a fresh parse of `input`.
502/// Free-fn entry point — resets parser thread_locals and loads input.
503pub fn parse_init(input: &str) {
504 // P8: reset Rust-only safety counters at parser construction.
505 PARSER_GLOBAL_ITERATIONS.set(0);
506 PARSER_RECURSION_DEPTH.set(0);
507 // Seed the option defaults the parser/lexer inspect. Real zsh
508 // installs these via `install_emulation_defaults` (options.c:172)
509 // at shell startup; zshrs's parse-only test entry path bypasses
510 // init_main, so we mirror the `zsh` emulation defaults here.
511 // Only seeds when unset so a script that explicitly disabled an
512 // option stays so.
513 for (name, default) in [
514 ("shortloops", true),
515 ("shortrepeat", false),
516 ("multifuncdef", true),
517 ("aliasfuncdef", false),
518 ("ignorebraces", false),
519 ("cshjunkieloops", false),
520 ("posixbuiltins", false),
521 ("execopt", true),
522 ("kshautoload", false),
523 ("aliases", true),
524 ] {
525 if crate::ported::options::opt_state_get(name).is_none() {
526 crate::ported::options::opt_state_set(name, default);
527 }
528 }
529 lex_init(input);
530}
531
532/// C zsh's parser has no iteration cap — it trusts itself. The
533/// Rust-only `check_limit` was a paranoia counter that fired
534/// spuriously under nested cmdsubst (parse_context_save resets the
535/// counter to 0 mid-parse, then the outer frame's tear-down decrement
536/// underflowed). Now a no-op for C fidelity; mirrors the Phase 1
537/// removal in lex.rs.
538#[inline]
539fn check_limit() -> bool {
540 false
541}
542
543/// Same story as `check_limit` — C has no recursion cap, and the
544/// Rust counter underflowed across nested-context boundaries. Stub.
545#[inline]
546fn check_recursion() -> bool {
547 false
548}
549
550/// Direct port of `parse_context_save(struct parse_stack *ps, int toplevel)` at `Src/parse.c:295`.
551/// Snapshots the lexer-side file-statics (which currently live on
552/// `lexer` until Phase 7 dissolution makes them file-scope
553/// thread_local!s) plus the pending heredoc list, plus the
554/// wordcode-buffer state (STUB until Phase 9b). Saves Rust-only
555/// recursion counters too so nested parses get fresh limits.
556/// WARNING: param names don't match C — Rust=(ps) vs C=(ps, toplevel)
557pub fn parse_context_save(ps: &mut parse_stack) {
558 // parse.c:299 — `ps->hdocs = hdocs; hdocs = NULL;`
559 ps.hdocs = heredocs_take();
560 // parse.c:302-310 — save lexer-side state.
561 ps.incmdpos = incmdpos();
562 // parse.c:303 — aliasspaceflag — not yet a LEX_* thread_local.
563 // STUB; Phase 7 wires it. Same for the few below marked STUB.
564 ps.aliasspaceflag = 0;
565 ps.incond = incond();
566 ps.inredir = inredir();
567 ps.incasepat = incasepat();
568 ps.isnewlin = isnewlin();
569 ps.infor = infor();
570 ps.inrepeat_ = inrepeat();
571 ps.intypeset = intypeset();
572 // parse.c:312-317 — wordcode buffer state. STUB until Phase 9b
573 // (zshrs has no ecbuf yet).
574 ps.eclen = 0;
575 ps.ecused = 0;
576 ps.ecnpats = 0;
577 ps.ecbuf = None;
578 ps.ecstrs = None;
579 ps.ecsoffs = 0;
580 ps.ecssub = 0;
581 ps.ecnfunc = 0;
582 // P8: counters are file-scope thread_locals; reset them on save
583 // (matches the C parse_context_save clear-buffer semantics).
584 // Nested parses get a fresh limit; outer parse's count is lost
585 // — acceptable since the counters are safety nets, not state.
586 PARSER_RECURSION_DEPTH.set(0);
587 PARSER_GLOBAL_ITERATIONS.set(0);
588 set_incmdpos(true);
589 set_incond(0);
590 set_inredir(false);
591 set_incasepat(0);
592 set_infor(0);
593 set_inrepeat(0);
594 set_intypeset(false);
595}
596
597/// Direct port of `parse_context_restore(const struct parse_stack *ps, int toplevel)` at `Src/parse.c:326`.
598/// Inverse of `parse_context_save`. Restores lexer-side state +
599/// pending heredocs + Rust-only counters from `ps`, then clears
600/// `errflag & ERRFLAG_ERROR` per parse.c:354.
601/// WARNING: param names don't match C — Rust=(ps) vs C=(ps, toplevel)
602pub fn parse_context_restore(ps: &parse_stack) {
603 // parse.c:330-331 — free any in-progress wordcode buffer.
604 // zshrs has no wordcode yet (STUB until Phase 9b); the AST
605 // nodes are owned by their parent so dropping the parser
606 // frees them.
607
608 // parse.c:333-352 — restore saved state.
609 heredocs_set(ps.hdocs.clone());
610 set_incmdpos(ps.incmdpos);
611 // aliasspaceflag STUB until Phase 7.
612 set_incond(ps.incond);
613 set_inredir(ps.inredir);
614 set_incasepat(ps.incasepat);
615 set_isnewlin(ps.isnewlin);
616 set_infor(ps.infor);
617 set_inrepeat(ps.inrepeat_);
618 set_intypeset(ps.intypeset);
619 // ecbuf/eclen/ecused/ecnpats/ecstrs/ecsoffs/ecssub/ecnfunc
620 // STUB until Phase 9b.
621 // P8: counters not restored — see parse_context_save comment.
622
623 // parse.c:354 — `errflag &= ~ERRFLAG_ERROR;` — clear the
624 // error flag so the outer parse sees a clean state.
625 crate::ported::utils::errflag.fetch_and(
626 !crate::ported::utils::ERRFLAG_ERROR,
627 std::sync::atomic::Ordering::Relaxed,
628 );
629}
630
631/// Initialize parser status. Direct port of zsh/Src/parse.c:491
632/// `init_parse_status`. Clears the per-parse-call lexer flags
633/// so a fresh parse starts from cmd-position with no nesting
634/// state inherited from a prior parse.
635pub fn init_parse_status() {
636 // parse.c:500-502 — `incasepat = incond = inredir = infor =
637 // intypeset = 0; inrepeat_ = 0; incmdpos = 1;`
638 set_incasepat(0);
639 set_incond(0);
640 set_inredir(false);
641 set_infor(0);
642 set_intypeset(false);
643 set_incmdpos(true);
644}
645
646/// Initialize parser for a fresh parse. Direct port of
647/// zsh/Src/parse.c:509 `init_parse`. C source allocates a
648/// fresh wordcode buffer (ecbuf) sized EC_INIT_SIZE, resets the
649/// per-parse-call counters, and calls init_parse_status. zshrs
650/// has no flat wordcode buffer (AST is built inline) so this
651/// function reduces to init_parse_status + recursion_depth/
652/// global_iterations clear.
653pub fn init_parse() {
654 // parse.c:513-520 — `ecbuf = (Wordcode) zalloc(EC_INIT_SIZE *
655 // sizeof(wordcode)); eclen = EC_INIT_SIZE; ecused = 0;
656 // ecnpats = 0; ecstrs = NULL; ecsoffs = ecnfunc = 0;
657 // ecssub = 0;`. P9b — initialize the per-evaluator wordcode
658 // buffer for this parse call. zshrs uses thread-local
659 // statics declared at file scope (parse.rs:25-50).
660 ECBUF.with_borrow_mut(|buf| {
661 buf.clear();
662 buf.resize(EC_INIT_SIZE as usize, 0);
663 });
664 ECLEN.set(EC_INIT_SIZE);
665 ECUSED.set(0);
666 ECNPATS.set(0);
667 ECSOFFS.set(0);
668 ECSSUB.set(0);
669 ECNFUNC.set(0);
670 ECSTRS_INDEX.with_borrow_mut(|m| m.clear());
671 ECSTRS_REVERSE.with_borrow_mut(|m| m.clear());
672 ECSTRS_TREE.with_borrow_mut(|t| *t = None);
673
674 PARSER_RECURSION_DEPTH.set(0);
675 PARSER_GLOBAL_ITERATIONS.set(0);
676 // parse.c:522 — `init_parse_status();`
677 init_parse_status();
678}
679
680/// Port of `int empty_eprog(Eprog p)` from `Src/parse.c:584`. C
681/// body: `return (!p || !p->prog || *p->prog == WCB_END());` —
682/// the eprog is empty when its prog buffer is missing or the
683/// first wordcode is the WC_END marker. Used by signal handlers
684/// (`Src/signals.c:712`) to short-circuit a trap that resolves to
685/// an empty program.
686pub fn empty_eprog(p: &crate::ported::zsh_h::eprog) -> bool {
687 p.prog.is_empty() || p.prog[0] == crate::ported::zsh_h::WCB_END()
688}
689
690/// Clear pending here-document list. Direct port of
691/// zsh/Src/parse.c:591 `clear_hdocs`. The C version walks
692/// the global `hdocs` linked list and frees each node. zshrs
693/// stores pending heredocs on the lexer's `heredocs` Vec —
694/// truncating it has the same effect.
695pub fn clear_hdocs() {
696 heredocs_clear();
697}
698
699/// Top-level parse-event entry. Direct port of zsh/Src/parse.c:
700/// 612-631 `parse_event`. Reads one event from the lexer (a
701/// sublist optionally followed by SEPER/AMPER/AMPERBANG) and
702/// returns the resulting ZshProgram.
703///
704/// `endtok` is the token that terminates the event — usually
705/// ENDINPUT, but for command-style substitutions the closing
706/// `)` (zsh's CMD_SUBST_CLOSE).
707///
708/// zshrs port note: zsh's parse_event returns an `Eprog` (heap-
709/// allocated wordcode program). zshrs returns a `ZshProgram`
710/// (AST root). Same role at the parse-output boundary.
711pub fn parse_event(endtok: lextok) -> Option<ZshProgram> {
712 // parse.c:616-619 — reset state and prime the lexer.
713 set_tok(ENDINPUT);
714 set_incmdpos(true);
715 zshlex();
716 // parse.c:620 — `init_parse();`
717 init_parse();
718
719 // parse.c:622-625 — drive par_event; on failure clear hdocs.
720 if !par_event(endtok) {
721 clear_hdocs();
722 return None;
723 }
724 // parse.c:626-628 — if endtok != ENDINPUT, this is a sub-
725 // parse for a substitution that doesn't need its own eprog.
726 // zshrs returns an empty program in that case (caller
727 // discards).
728 if endtok != ENDINPUT {
729 return Some(ZshProgram { lists: Vec::new() });
730 }
731 // parse.c:630 — `bld_eprog(1);` — build the final eprog.
732 // zshrs has already built the AST via parse_program_until,
733 // but parse_event uses par_event directly so we need to
734 // collect what par_event accumulated.
735 Some(parse_program_until(None))
736}
737
738/// Parse one event (sublist with optional separator). Direct
739/// port of zsh/Src/parse.c:635 `par_event`. Returns true if
740/// an event was successfully parsed, false on EOF / endtok.
741///
742/// zshrs port note: the C version emits wordcodes via ecadd/
743/// set_list_code; zshrs's parser builds AST nodes via
744/// par_sublist + par_list. Same flow, different output.
745pub fn par_event(endtok: lextok) -> bool {
746 // parse.c:639-643 — skip leading SEPERs.
747 while tok() == SEPER {
748 // parse.c:640-641 — at top-level (endtok == ENDINPUT),
749 // a SEPER on a fresh line ends the event.
750 if isnewlin() > 0 && endtok == ENDINPUT {
751 return false;
752 }
753 zshlex();
754 }
755 // parse.c:644-647 — terminate on EOF or matching close-token.
756 if tok() == ENDINPUT {
757 return false;
758 }
759 if tok() == endtok {
760 return true;
761 }
762 // parse.c:649-... — drive par_sublist + handle terminator.
763 // zshrs's par_sublist already builds the AST node directly.
764 match par_sublist() {
765 Some(_) => {
766 // parse.c:651-693 — terminator handling. zshrs's
767 // par_list wraps this; for parse_event we just
768 // confirm the sublist parsed.
769 true
770 }
771 None => false,
772 }
773}
774
775/// Parse one list — non-recursing variant. Direct port of
776/// zsh/Src/parse.c:808 `par_list1`. Like par_list but
777/// doesn't recurse on the trailing-separator path; used by
778/// callers that only want one statement (e.g. each arm of a
779/// case body).
780pub fn par_list1() -> Option<ZshSublist> {
781 // parse.c:810-816 — body is a single par_sublist call wrapped
782 // in the eu/ecused tracking that zshrs doesn't need (no
783 // wordcode buffer).
784 par_sublist()
785}
786
787/// Wire a here-document body onto the redirection token that
788/// requested it. Direct port of zsh/Src/parse.c:2347
789/// `setheredoc`. Called when a heredoc terminator has been
790/// matched and the body is ready to be attached to the redir.
791///
792/// zshrs port note: zsh's setheredoc patches the wordcode
793/// in-place via `pc[1] = ecstrcode(doc); pc[2] = ecstrcode(term);`.
794/// zshrs threads heredoc bodies through `HereDocInfo` structs
795/// that resolve_redir applies during the post-parse fill_in pass.
796/// This method is the AST-side equivalent: writes back to the
797/// matching redir node by index.
798pub fn setheredoc(_pc: usize, _redir_type: i32, _doc: &str, _term: &str, _munged_term: &str) {
799 // zshrs's heredoc resolution happens in fill_in_command /
800 // resolve_redir at parse top. This stub exists for API
801 // parity with the C signature; live wiring happens via
802 // heredocs which the post-parse pass consumes.
803}
804
805/// Parse a wordlist for `for ... in WORDS;`. Direct port of
806/// zsh/Src/parse.c:2362 `par_wordlist`. Reads STRING tokens
807/// until the next SEPER / SEMI / NEWLIN.
808pub fn par_wordlist() -> Vec<String> {
809 let mut out = Vec::new();
810 // parse.c:2362-2378 — collect STRINGs into the wordlist.
811 while tok() == STRING_LEX {
812 if let Some(text) = tokstr() {
813 out.push(text);
814 }
815 zshlex();
816 }
817 out
818}
819
820/// Parse a newline-separated wordlist. Direct port of
821/// zsh/Src/parse.c:2379 `par_nl_wordlist`. Like
822/// par_wordlist but tolerates leading/trailing newlines.
823pub fn par_nl_wordlist() -> Vec<String> {
824 // parse.c:2380-2381 — skip leading newlines.
825 while tok() == NEWLIN {
826 zshlex();
827 }
828 let out = par_wordlist();
829 // parse.c:2395-2397 — skip trailing newlines.
830 while tok() == NEWLIN {
831 zshlex();
832 }
833 out
834}
835
836/// Read an integer from the next cond token. NOT a direct C port —
837/// the C `get_cond_num(char *tst)` (parse.c:2643) is the
838/// string-lookup helper ported below. This Rust-only helper exists
839/// to support the AST cond-walker (`par_cond_*` analogs) when it
840/// needs a numeric literal from the current lex position.
841pub fn read_cond_num() -> Option<i64> {
842 if tok() != STRING_LEX {
843 return None;
844 }
845 let text = tokstr()?;
846 let parsed = text.parse::<i64>().ok()?;
847 zshlex();
848 Some(parsed)
849}
850
851/// Port of `get_cond_num(char *tst)` from `Src/parse.c:2643`. Returns
852/// the index of `tst` in `{"nt","ot","ef","eq","ne","lt","gt","le","ge"}`
853/// or `-1` if not a recognized binary cond operator.
854pub fn get_cond_num(tst: &str) -> i32 {
855 // c:2643
856 const CONDSTRS: [&str; 9] = [
857 "nt", "ot", "ef", "eq", "ne", "lt", "gt", "le", "ge", // c:2647
858 ];
859 for (i, &c) in CONDSTRS.iter().enumerate() {
860 if c == tst {
861 return i as i32; // c:2654
862 }
863 }
864 -1 // c:2656
865}
866
867/// Emit a parser-level error. Direct port of zsh/Src/parse.c
868/// 2733-2766 `yyerror`. C version fills a per-event error buffer
869/// and sets errflag. zshrs pushes onto errors which the
870/// caller drains via parse()'s Result return.
871pub fn yyerror(msg: &str) {
872 // parse.c:2735-2765 — zsh's yyerror collects the offending
873 // token's literal text + line number. zshrs already does
874 // this via error() with the lexer's toklineno.
875 error(msg);
876}
877
878// ============================================================
879// Wordcode emission stubs (parse.c private helpers)
880//
881// The following functions are direct counterparts of zsh's
882// private wordcode-emission helpers in parse.c. zsh uses these
883// to write u32 opcodes into a flat `ecbuf` array; zshrs builds
884// an AST tree and never emits wordcode at the parse layer.
885// The implementations are documented stubs that preserve the
886// function signatures + cite the C source. Real wordcode would
887// be emitted later by compile_zsh.rs walking the AST.
888//
889// Listed for port-surface completeness so every parse.c symbol
890// has a Rust counterpart even when the algorithm is moot in the
891// AST architecture.
892// ============================================================
893
894/// Patch a list-placeholder wordcode with its actual opcode +
895/// jump distance. Direct port of zsh/Src/parse.c:738
896/// `set_list_code`. zsh emits an `ecadd(0)` placeholder before
897/// par_sublist runs, then comes back through set_list_code to
898/// rewrite the slot with WCB_LIST(type, distance) once the
899/// sublist's final length is known.
900///
901/// Port of `set_list_code(int p, int type, int cmplx)` from
902/// `Src/parse.c:738`. Patches the WCB_LIST header at `p` based on
903/// whether the sublist body is simple (single command, no
904/// pipeline) and Z_SYNC/Z_END — emits the Z_SIMPLE-optimized
905/// header when possible, otherwise the plain WCB_LIST(type, 0).
906pub fn set_list_code(p: usize, type_code: i32, cmplx: bool) {
907 let _ = wc_bdata;
908 // c:740 — `if (!cmplx && (type == Z_SYNC || type == (Z_SYNC | Z_END))
909 // && WC_SUBLIST_TYPE(ecbuf[p+1]) == WC_SUBLIST_END)`
910 let sublist_code = ECBUF.with_borrow(|b| b.get(p + 1).copied().unwrap_or(0));
911 let z = type_code;
912 let qualifies = !cmplx
913 && (z == Z_SYNC || z == (Z_SYNC | Z_END))
914 && WC_SUBLIST_TYPE(sublist_code) == WC_SUBLIST_END;
915 if qualifies {
916 // c:742 — `int ispipe = !(WC_SUBLIST_FLAGS(ecbuf[p+1])
917 // & WC_SUBLIST_SIMPLE);`
918 let ispipe = (WC_SUBLIST_FLAGS(sublist_code) & WC_SUBLIST_SIMPLE) == 0;
919 // c:743 — `ecbuf[p] = WCB_LIST((type|Z_SIMPLE), ecused-2-p);`
920 let used = ECUSED.get() as usize;
921 let off = used.saturating_sub(2 + p);
922 ECBUF.with_borrow_mut(|b| {
923 if p < b.len() {
924 b[p] = WCB_LIST((z | Z_SIMPLE) as wordcode, off as wordcode);
925 }
926 });
927 // c:744 — `ecdel(p+1);`
928 ecdel(p + 1);
929 // c:745-746 — `if (ispipe) ecbuf[p+1] = WC_PIPE_LINENO(ecbuf[p+1]);`
930 if ispipe {
931 ECBUF.with_borrow_mut(|b| {
932 if p + 1 < b.len() {
933 b[p + 1] = WC_PIPE_LINENO(b[p + 1]);
934 }
935 });
936 }
937 } else {
938 // c:748 — `ecbuf[p] = WCB_LIST(type, 0);`
939 ECBUF.with_borrow_mut(|b| {
940 if p < b.len() {
941 b[p] = WCB_LIST(z as wordcode, 0);
942 }
943 });
944 }
945}
946
947/// Port of `set_sublist_code(int p, int type, int flags, int skip, int cmplx)`
948/// from `Src/parse.c:755`. Patches the WCB_SUBLIST header at `p`.
949/// When the sublist is non-complex (single command, no pipeline),
950/// sets WC_SUBLIST_SIMPLE and rewrites the following slot to
951/// `WC_PIPE_LINENO`.
952pub fn set_sublist_code(p: usize, type_code: i32, flags: i32, skip: i32, cmplx: bool) {
953 if cmplx {
954 // c:758 — `ecbuf[p] = WCB_SUBLIST(type, flags, skip);`
955 ECBUF.with_borrow_mut(|b| {
956 if p < b.len() {
957 b[p] = WCB_SUBLIST(type_code as wordcode, flags as wordcode, skip as wordcode);
958 }
959 });
960 } else {
961 // c:760 — `ecbuf[p] = WCB_SUBLIST(type, flags|WC_SUBLIST_SIMPLE, skip);`
962 ECBUF.with_borrow_mut(|b| {
963 if p < b.len() {
964 b[p] = WCB_SUBLIST(
965 type_code as wordcode,
966 (flags as wordcode) | WC_SUBLIST_SIMPLE,
967 skip as wordcode,
968 );
969 }
970 });
971 // c:761 — `ecbuf[p+1] = WC_PIPE_LINENO(ecbuf[p+1]);`
972 ECBUF.with_borrow_mut(|b| {
973 if p + 1 < b.len() {
974 b[p + 1] = WC_PIPE_LINENO(b[p + 1]);
975 }
976 });
977 }
978}
979
980/// Direct port of `ecadd(wordcode c)` at `Src/parse.c:397`. Append `c` to
981/// the wordcode buffer with grow-on-demand, return the new index.
982pub fn ecadd(c: u32) -> usize {
983 // parse.c:399-405 — `if ((eclen - ecused) < 1) grow`.
984 if (ECLEN.get() - ECUSED.get()) < 1 {
985 let cur = ECLEN.get();
986 let a = if cur < EC_DOUBLE_THRESHOLD {
987 cur
988 } else {
989 EC_INCREMENT
990 };
991 ECBUF.with_borrow_mut(|buf| {
992 buf.resize((cur + a) as usize, 0);
993 });
994 ECLEN.set(cur + a);
995 }
996 let idx = ECUSED.get();
997 ECBUF.with_borrow_mut(|buf| {
998 if (idx as usize) >= buf.len() {
999 buf.resize((idx + 1) as usize, 0);
1000 }
1001 buf[idx as usize] = c;
1002 });
1003 ECUSED.set(idx + 1);
1004 idx as usize
1005}
1006
1007/// Direct port of `ecdel(int p)` at `Src/parse.c:413`. Remove the
1008/// wordcode at position `p`, shift later entries left by one,
1009/// decrement ecused, adjust pending heredoc pointers.
1010pub fn ecdel(p: usize) {
1011 // parse.c:415-418 — memmove + decrement ecused.
1012 let n = ECUSED.get() as usize - p - 1;
1013 if n > 0 {
1014 ECBUF.with_borrow_mut(|buf| {
1015 for i in 0..n {
1016 buf[p + i] = buf[p + i + 1];
1017 }
1018 });
1019 }
1020 ECUSED.set(ECUSED.get() - 1);
1021 // parse.c:420 — `ecadjusthere(p, -1)`.
1022 ecadjusthere(p, -1);
1023}
1024
1025/// Direct port of `ecstrcode(char *s)` at `Src/parse.c:426`. Encode a
1026/// string into a single wordcode (short strings ≤4 bytes packed
1027/// inline; longer strings get an offset into the deduped registry).
1028///
1029/// The long-string path stores the METAFIED bytes (matches what C's
1030/// strs region contains): collapse Rust UTF-8 chars in 0x80..=0xff
1031/// to single bytes, then apply zsh metafy (high bytes ≥ 0x83 →
1032/// `Meta=0x83 + byte^0x20`). Length tracking (ECSOFFS) uses the
1033/// metafied byte count — same as C `strlen(s) + 1` where C's `s`
1034/// is already metafied at this point.
1035pub fn ecstrcode(s: &str) -> u32 {
1036 // Convert Rust UTF-8 → C-byte form inline: chars ≤ 0xff collapse
1037 // to single bytes (so zsh markers like Dash = `\u{9b}` are 1 byte
1038 // instead of `\xc2 \x9b` UTF-8). Chars > 0xff fall back to their
1039 // UTF-8 bytes — matches how C tokstr would hold them (it sees
1040 // multi-byte UTF-8 source as raw byte sequences).
1041 let mut c_bytes: Vec<u8> = Vec::with_capacity(s.len());
1042 for ch in s.chars() {
1043 let cu = ch as u32;
1044 if cu <= 0xff {
1045 c_bytes.push(cu as u8);
1046 } else {
1047 let mut tmp = [0u8; 4];
1048 c_bytes.extend_from_slice(ch.encode_utf8(&mut tmp).as_bytes());
1049 }
1050 }
1051 let t = c_bytes.iter().any(|&b| (0x83..=0x9f).contains(&b));
1052 let l = c_bytes.len() + 1; // include NUL terminator
1053 if l <= 4 {
1054 // parse.c:436-445 — short-string inline pack. Uses raw C-bytes
1055 // (NOT metafied — the inline packing stores 1 byte per slot).
1056 let mut c: u32 = if t { 3 } else { 2 };
1057 match l {
1058 4 => {
1059 c |= (c_bytes[2] as u32) << 19;
1060 c |= (c_bytes[1] as u32) << 11;
1061 c |= (c_bytes[0] as u32) << 3;
1062 }
1063 3 => {
1064 c |= (c_bytes[1] as u32) << 11;
1065 c |= (c_bytes[0] as u32) << 3;
1066 }
1067 2 => {
1068 c |= (c_bytes[0] as u32) << 3;
1069 }
1070 1 => {
1071 // parse.c:443 — empty string special case.
1072 c = if t { 7 } else { 6 };
1073 }
1074 _ => {}
1075 }
1076 c
1077 } else {
1078 // parse.c:447-466 — long string. Port of C's eccstr BST walk
1079 // exactly: walk the tree comparing nfunc, then hashval, then
1080 // strcmp on bytes. Return offs on full match; insert new
1081 // leaf otherwise. Matches C's exact dedup-hit pattern
1082 // (which is content-dependent — hash collisions and the
1083 // lazy short-circuit cmp chain make the tree shape determine
1084 // whether matching nodes are reachable).
1085 // hasher is byte-by-byte polynomial (hashtable.c:86); pass
1086 // c_bytes via from_utf8_unchecked so non-UTF-8 zsh marker
1087 // bytes feed straight in. SAFETY: hasher only iterates
1088 // `.bytes()` — no UTF-8 validity assumed.
1089 let val = crate::ported::hashtable::hasher(unsafe {
1090 std::str::from_utf8_unchecked(&c_bytes)
1091 });
1092 let nfunc = ECNFUNC.get();
1093 let found_offs = ECSTRS_TREE.with_borrow_mut(|root| {
1094 // Walk the tree. At each node, if all 3 cmps == 0,
1095 // return the node's offs. Otherwise descend left/right
1096 // by the first non-zero cmp's sign.
1097 let mut cur: &mut Option<Box<EccstrNode>> = root;
1098 loop {
1099 let p = match cur.as_mut() {
1100 Some(p) => p,
1101 None => break None,
1102 };
1103 // c:448 — `cmp = p->nfunc - ecnfunc`
1104 let mut cmp = (p.nfunc as i64) - (nfunc as i64);
1105 if cmp == 0 {
1106 // c:448 — `&& !(cmp = (long)p->hashval - (long)val)`
1107 cmp = (p.hashval as i64) - (val as i64);
1108 if cmp == 0 {
1109 // c:448 — `&& !(cmp = strcmp(p->str, s))`
1110 cmp = match p.str.as_slice().cmp(c_bytes.as_slice()) {
1111 std::cmp::Ordering::Less => -1,
1112 std::cmp::Ordering::Equal => 0,
1113 std::cmp::Ordering::Greater => 1,
1114 };
1115 if cmp == 0 {
1116 // c:450 — `return p->offs;`
1117 break Some(p.offs);
1118 }
1119 }
1120 }
1121 // c:452 — `pp = (cmp < 0 ? &p->left : &p->right);`
1122 cur = if cmp < 0 { &mut p.left } else { &mut p.right };
1123 }
1124 });
1125 if let Some(offs) = found_offs {
1126 return offs;
1127 }
1128 let offs =
1129 (((ECSOFFS.get() - ECSSUB.get()) as u32) << 2) | if t { 1 } else { 0 };
1130 // c:457-465 — insert new node at the NULL slot the walk
1131 // terminated at. Encode the walk path as a Vec<bool> of
1132 // left/right turns (true = right), then re-descend to
1133 // insert. Borrow-checker friendly: a single mutable walk
1134 // that either finds an existing node (descend) or fills
1135 // the empty slot (return).
1136 let stored = c_bytes.clone();
1137 let stored_len = stored.len();
1138 let new_node = Box::new(EccstrNode {
1139 left: None,
1140 right: None,
1141 str: stored.clone(),
1142 offs,
1143 nfunc,
1144 hashval: val,
1145 });
1146 ECSTRS_TREE.with_borrow_mut(|root| {
1147 // Build the path first (immutable-walk; safe because we
1148 // only ever go further down).
1149 let mut path: Vec<bool> = Vec::new();
1150 {
1151 let mut cur: &Option<Box<EccstrNode>> = root;
1152 while let Some(p) = cur.as_ref() {
1153 let mut cmp = (p.nfunc as i64) - (nfunc as i64);
1154 if cmp == 0 {
1155 cmp = (p.hashval as i64) - (val as i64);
1156 if cmp == 0 {
1157 cmp = match p.str.as_slice().cmp(c_bytes.as_slice()) {
1158 std::cmp::Ordering::Less => -1,
1159 std::cmp::Ordering::Equal => 0,
1160 std::cmp::Ordering::Greater => 1,
1161 };
1162 }
1163 }
1164 let go_right = cmp >= 0;
1165 path.push(go_right);
1166 cur = if go_right { &p.right } else { &p.left };
1167 }
1168 }
1169 // Descend mutably along the recorded path and assign at
1170 // the NULL leaf.
1171 let mut cur: &mut Option<Box<EccstrNode>> = root;
1172 for turn in path {
1173 let p = cur.as_mut().expect("path matches walk");
1174 cur = if turn { &mut p.right } else { &mut p.left };
1175 }
1176 *cur = Some(new_node);
1177 });
1178 // Also keep the existing reverse index (offs → bytes) for
1179 // ecgetstr_wordcode and copy_ecstr — they read flat by offs.
1180 ECSTRS_REVERSE.with_borrow_mut(|m| {
1181 m.insert(offs, stored);
1182 });
1183 let _ = l;
1184 ECSOFFS.set(ECSOFFS.get() + (stored_len + 1) as i32);
1185 offs
1186 }
1187}
1188
1189/// P9b decoder (wordcode-pipeline variant): direct port of
1190/// `ecgetstr(Estate s, int dup, int *tokflag)` from
1191/// `Src/parse.c:2855-2890`. Reads a wordcode at `pc`, decodes the
1192/// encoded string back to owned String. Returns (string,
1193/// pc_after_consumed). Distinct from the existing `ecgetstr` (which
1194/// takes a separate strs buffer for text.rs) — this variant uses
1195/// the live ECSTRS_REVERSE HashMap populated at ecstrcode time.
1196pub fn ecgetstr_wordcode(buf: &[u32], pc: usize) -> (String, usize) {
1197 if pc >= buf.len() {
1198 return (String::new(), pc);
1199 }
1200 let c = buf[pc];
1201 let next = pc + 1;
1202 // parse.c:2862-2863 — empty-string sentinels.
1203 if c == 6 || c == 7 {
1204 return (String::new(), next);
1205 }
1206 // parse.c:2864-2871 — inline-packed short string.
1207 if (c & 2) != 0 {
1208 let b0 = ((c >> 3) & 0xff) as u8;
1209 let b1 = ((c >> 11) & 0xff) as u8;
1210 let b2 = ((c >> 19) & 0xff) as u8;
1211 let mut bytes: Vec<u8> = Vec::new();
1212 for b in [b0, b1, b2] {
1213 if b == 0 {
1214 break;
1215 }
1216 bytes.push(b);
1217 }
1218 return (String::from_utf8_lossy(&bytes).into_owned(), next);
1219 }
1220 // parse.c:2872-2873 — long string via offs lookup. Map value is
1221 // metafied Vec<u8>; convert back to display String. Unmetafy is
1222 // the caller's job (the wordcode-parity dumper does it; other
1223 // callers may want raw bytes).
1224 let s = ECSTRS_REVERSE
1225 .with_borrow(|m| m.get(&c).cloned())
1226 .map(|v| String::from_utf8_lossy(&v).into_owned())
1227 .unwrap_or_default();
1228 (s, next)
1229}
1230
1231/// Direct port of `ecispace(int p, int n)` at `Src/parse.c:372`. Insert `n`
1232/// empty wordcode slots at position `p`, shifting later entries
1233/// right, growing the buffer as needed, adjusting heredoc pointers.
1234pub fn ecispace(p: usize, n: usize) {
1235 // parse.c:376-381 — grow if needed.
1236 let need = n as i32;
1237 if (ECLEN.get() - ECUSED.get()) < need {
1238 let cur = ECLEN.get();
1239 let mut a = if cur < EC_DOUBLE_THRESHOLD {
1240 cur
1241 } else {
1242 EC_INCREMENT
1243 };
1244 if need > a {
1245 a = need;
1246 }
1247 ECBUF.with_borrow_mut(|buf| {
1248 buf.resize((cur + a) as usize, 0);
1249 });
1250 ECLEN.set(cur + a);
1251 }
1252 // parse.c:382-385 — memmove p → p+n, gap of n.
1253 let m = ECUSED.get() as usize - p;
1254 if m > 0 {
1255 ECBUF.with_borrow_mut(|buf| {
1256 let needed = (ECUSED.get() as usize) + n;
1257 if buf.len() < needed {
1258 buf.resize(needed, 0);
1259 }
1260 for i in (0..m).rev() {
1261 buf[p + n + i] = buf[p + i];
1262 }
1263 for i in 0..n {
1264 buf[p + i] = 0;
1265 }
1266 });
1267 }
1268 // parse.c:386 — bump ecused by n.
1269 ECUSED.set(ECUSED.get() + need);
1270 // parse.c:387 — `ecadjusthere(p, n)`.
1271 ecadjusthere(p, need);
1272}
1273
1274/// Direct port of `ecadjusthere(int p, int d)` at `Src/parse.c:360`. Walk
1275/// the pending-heredocs list and bump each `pc` by `d` if it's
1276/// at or after position `p`. Called by `ecispace` / `ecdel` when
1277/// wordcodes shift.
1278#[allow(unused_variables)]
1279pub fn ecadjusthere(p: usize, d: i32) {
1280 // parse.c:362-366 — `for (p2 = hdocs; p2; p2 = p2->next) if
1281 // (p2->pc >= p) p2->pc += d;`. zshrs's hdocs are still
1282 // Vec<HereDoc> on the lexer (pre-P9c migration); since none
1283 // of them carry a wordcode pc today (the AST tree has no pc
1284 // slots), this is a no-op until Phase 9c wires
1285 // `hdocs.pc` into wordcode emission.
1286}
1287
1288// ============================================================
1289// Eprog runtime ops (parse.c:2767-2853)
1290//
1291// dupeprog / useeprog / freeeprog are zsh's reference-counting
1292// helpers for executable programs. zshrs's AST is owned by
1293// value (Rust ownership); cloning is a tree-deep copy via
1294// Clone, "use" is a no-op (the executor borrows the AST), and
1295// "free" is automatic on drop.
1296// ============================================================
1297
1298/// Duplicate an Eprog. Direct port of zsh/Src/parse.c:2813
1299/// Port of `Eprog dupeprog(Eprog p, int heap)` from
1300/// `Src/parse.c:2767`. Deep-copies the wordcode array, string
1301/// table, and pattern-prog slots. `dummy_eprog` is returned
1302/// unchanged. `heap`-allocated copies get `nref = -1` (never
1303/// freed); real ones get `nref = 1`.
1304pub fn dupeprog(p: &crate::ported::zsh_h::eprog, heap: bool) -> crate::ported::zsh_h::eprog {
1305 // c:2774-2775 — `if (p == &dummy_eprog) return p;` — caller-
1306 // observable identity in C uses a pointer compare; Rust's
1307 // equivalent is "if it has the dummy's shape (single WCB_END
1308 // word and no strs), return a copy of the same shape".
1309 // c:2796-2797 — `for (i = r->npats; i--; pp++) *pp = dummy_patprog1;`
1310 // C uses `dummy_patprog1` as a placeholder; the Rust port has
1311 // `Vec<Patprog>` (Box<patprog>) — synthesize an equivalent zero-
1312 // initialized patprog for each slot (resolved later by
1313 // pattern.c::patcompile-on-first-use).
1314 let dummy_pat = || crate::ported::zsh_h::patprog {
1315 startoff: 0,
1316 size: 0,
1317 mustoff: 0,
1318 patmlen: 0,
1319 globflags: 0,
1320 globend: 0,
1321 flags: 0,
1322 patnpar: 0,
1323 patstartch: 0,
1324 };
1325 let r = crate::ported::zsh_h::eprog {
1326 // c:2778 — `flags = (heap ? EF_HEAP : EF_REAL) | (p->flags & EF_RUN);`
1327 flags: (if heap { EF_HEAP } else { EF_REAL }) | (p.flags & EF_RUN),
1328 len: p.len,
1329 npats: p.npats,
1330 // c:2787 — `nref = heap ? -1 : 1;`
1331 nref: if heap { -1 } else { 1 },
1332 prog: p.prog.clone(),
1333 strs: p.strs.clone(),
1334 pats: (0..p.npats).map(|_| Box::new(dummy_pat())).collect(),
1335 shf: None,
1336 dump: None,
1337 };
1338 r
1339}
1340
1341/// Port of `void useeprog(Eprog p)` from `Src/parse.c:2813`.
1342/// `if (p && p != &dummy_eprog && p->nref >= 0) p->nref++;` —
1343/// pin a real (non-heap, non-dummy) Eprog so it survives the
1344/// next `freeeprog`.
1345pub fn useeprog(p: &mut crate::ported::zsh_h::eprog) {
1346 // c:2815 — `if (p && p != &dummy_eprog && p->nref >= 0)`
1347 if p.nref >= 0 {
1348 p.nref += 1; // c:2816
1349 }
1350}
1351
1352/// Port of `void freeeprog(Eprog p)` from `Src/parse.c:2823`.
1353/// Refcount-decrement; when it hits zero, drops the pattern progs,
1354/// decrements the dump refcount if any, and releases the eprog.
1355/// `dummy_eprog` is never freed. Heap-eprogs (`nref < 0`) are
1356/// never freed either — they live as long as the heap arena.
1357pub fn freeeprog(p: &mut crate::ported::zsh_h::eprog) {
1358 // c:2829 — `if (p && p != &dummy_eprog) { ... }`
1359 if p.nref > 0 {
1360 p.nref -= 1; // c:2832
1361 if p.nref == 0 {
1362 // c:2833-2840 — drop pats, dump refcount, then the eprog.
1363 // Rust's Drop handles the per-field cleanup; we just
1364 // need to decrement the dump count first.
1365 if let Some(dump) = p.dump.take() {
1366 let dumped = (*dump).clone();
1367 decrdumpcount(&dumped); // c:2837
1368 }
1369 p.prog.clear();
1370 p.strs = None;
1371 p.pats.clear();
1372 }
1373 }
1374}
1375
1376// ============================================================
1377// Wordcode runtime getters (parse.c:2853-3060)
1378//
1379// These read packed wordcode out of a running Eprog at execution
1380// time. zshrs's executor walks the AST directly so these are
1381// stubs that preserve the C signatures + cite the source.
1382// ============================================================
1383
1384/// Port of `ecrawstr(Eprog p, Wordcode pc, int *tokflag)` from
1385/// `Src/parse.c:2891`. Like `ecgetstr` but reads at the given pc
1386/// without advancing — caller steps `pc` separately.
1387pub fn ecrawstr(p: &eprog, pc: usize, tokflag: Option<&mut i32>) -> String {
1388 if pc >= p.prog.len() {
1389 return String::new();
1390 }
1391 let c = p.prog[pc]; // c:2894
1392 if let Some(tf) = tokflag {
1393 *tf = i32::from((c & 1) != 0); // c:2898/2906/2912
1394 }
1395 if c == 6 || c == 7 {
1396 // c:2897
1397 return String::new();
1398 }
1399 if (c & 2) != 0 {
1400 // c:2902
1401 let b0 = ((c >> 3) & 0xff) as u8;
1402 let b1 = ((c >> 11) & 0xff) as u8;
1403 let b2 = ((c >> 19) & 0xff) as u8;
1404 let mut v = vec![b0, b1, b2];
1405 v.retain(|&x| x != 0);
1406 String::from_utf8_lossy(&v).into_owned()
1407 } else {
1408 // c:2911
1409 let off = (c >> 2) as usize;
1410 let strs_bytes = p.strs.as_deref().unwrap_or("").as_bytes();
1411 if off >= strs_bytes.len() {
1412 return String::new();
1413 }
1414 let tail = &strs_bytes[off..];
1415 let end = tail.iter().position(|&b| b == 0).unwrap_or(tail.len());
1416 String::from_utf8_lossy(&tail[..end]).into_owned()
1417 }
1418}
1419
1420/// Port of `ecgetarr(Estate s, int num, int dup, int *tokflag)` from
1421/// `Src/parse.c:2917`. Reads `num` strings from wordcode at `s->pc`
1422/// and OR-folds each entry's token flag into `*tokflag`.
1423pub fn ecgetarr(s: &mut estate, num: usize, dup: i32, tokflag: Option<&mut i32>) -> Vec<String> {
1424 let mut ret: Vec<String> = Vec::with_capacity(num); // c:2922
1425 let mut tf: i32 = 0;
1426 for _ in 0..num {
1427 // c:2924 `while (num--)`
1428 let mut tmp = 0;
1429 ret.push(ecgetstr(s, dup, Some(&mut tmp))); // c:2925
1430 tf |= tmp; // c:2926
1431 }
1432 if let Some(out) = tokflag {
1433 // c:2929
1434 *out = tf;
1435 }
1436 ret
1437}
1438
1439/// Port of `ecgetlist(Estate s, int num, int dup, int *tokflag)` from
1440/// `Src/parse.c:2937`. Same shape as `ecgetarr` but C returns
1441/// `LinkList`; zshrs uses `Vec<String>` for both.
1442pub fn ecgetlist(
1443 s: &mut crate::ported::zsh_h::estate,
1444 num: usize,
1445 dup: i32,
1446 tokflag: Option<&mut i32>,
1447) -> Vec<String> {
1448 if num == 0 {
1449 // c:2949-2952
1450 if let Some(tf) = tokflag {
1451 *tf = 0;
1452 }
1453 return Vec::new();
1454 }
1455 ecgetarr(s, num, dup, tokflag)
1456}
1457
1458/// Port of `eccopyredirs(Estate s)` from `Src/parse.c:3003`. Reads
1459/// the WC_REDIR run at `s->pc`, counts the wordcodes needed,
1460/// reserves space in `ecbuf` via `ecispace`, then re-walks `s->pc`
1461/// re-emitting each redir's wordcodes into the reserved slot —
1462/// finally calls `bld_eprog(0)` to package the result as an Eprog.
1463pub fn eccopyredirs(s: &mut crate::ported::zsh_h::estate) -> Option<crate::ported::zsh_h::eprog> {
1464 let prog_len = s.prog.prog.len();
1465 if s.pc >= prog_len {
1466 return None;
1467 }
1468 // c:3007-3009 — `if (wc_code(*pc) != WC_REDIR) return NULL;`
1469 let first_code = s.prog.prog[s.pc];
1470 if wc_code(first_code) != WC_REDIR {
1471 return None;
1472 }
1473 // c:3011 — `init_parse();`
1474 init_parse();
1475
1476 // c:3013-3027 — count wordcodes the redir run will need.
1477 // Each WC_REDIR contributes `code + fd1 + name` = 3, plus
1478 // `+2` if WC_REDIR_FROM_HEREDOC (terminator + munged), plus
1479 // `+1` if WC_REDIR_VARID.
1480 let mut probe = s.pc;
1481 let mut ncodes = 0usize;
1482 loop {
1483 if probe >= prog_len {
1484 break;
1485 }
1486 let code = s.prog.prog[probe];
1487 if wc_code(code) != WC_REDIR {
1488 break;
1489 }
1490 let mut ncode = if WC_REDIR_FROM_HEREDOC(code) != 0 {
1491 5
1492 } else {
1493 3
1494 };
1495 if WC_REDIR_VARID(code) != 0 {
1496 ncode += 1;
1497 }
1498 probe += ncode;
1499 ncodes += ncode;
1500 }
1501
1502 // c:3028-3029 — `r = ecused; ecispace(r, ncodes);`
1503 let r0 = ECUSED.get() as usize;
1504 ecispace(r0, ncodes);
1505
1506 // c:3031-3053 — re-walk `s->pc` and write into ecbuf[r..].
1507 let mut r = r0;
1508 loop {
1509 if s.pc >= prog_len {
1510 break;
1511 }
1512 let code = s.prog.prog[s.pc];
1513 if wc_code(code) != WC_REDIR {
1514 break;
1515 }
1516 s.pc += 1;
1517 // c:3036 — `ecbuf[r++] = code;`
1518 ECBUF.with_borrow_mut(|buf| {
1519 if r >= buf.len() {
1520 buf.resize(r + 1, 0);
1521 }
1522 buf[r] = code;
1523 });
1524 r += 1;
1525 // c:3038 — `ecbuf[r++] = *s->pc++;` (the fd1 word)
1526 let fd1 = s.prog.prog[s.pc];
1527 s.pc += 1;
1528 ECBUF.with_borrow_mut(|buf| {
1529 if r >= buf.len() {
1530 buf.resize(r + 1, 0);
1531 }
1532 buf[r] = fd1;
1533 });
1534 r += 1;
1535 // c:3041 — `ecbuf[r++] = ecstrcode(ecgetstr(s, EC_NODUP, NULL));`
1536 let name = ecgetstr(s, EC_NODUP, None);
1537 let nc = ecstrcode(&name);
1538 ECBUF.with_borrow_mut(|buf| {
1539 if r >= buf.len() {
1540 buf.resize(r + 1, 0);
1541 }
1542 buf[r] = nc;
1543 });
1544 r += 1;
1545 // c:3042-3047 — heredoc terminators.
1546 if WC_REDIR_FROM_HEREDOC(code) != 0 {
1547 let term = ecgetstr(s, EC_NODUP, None);
1548 let tc = ecstrcode(&term);
1549 ECBUF.with_borrow_mut(|buf| {
1550 if r >= buf.len() {
1551 buf.resize(r + 1, 0);
1552 }
1553 buf[r] = tc;
1554 });
1555 r += 1;
1556 let munged = ecgetstr(s, EC_NODUP, None);
1557 let mc = ecstrcode(&munged);
1558 ECBUF.with_borrow_mut(|buf| {
1559 if r >= buf.len() {
1560 buf.resize(r + 1, 0);
1561 }
1562 buf[r] = mc;
1563 });
1564 r += 1;
1565 }
1566 // c:3048-3049 — varid.
1567 if WC_REDIR_VARID(code) != 0 {
1568 let varid = ecgetstr(s, EC_NODUP, None);
1569 let vc = ecstrcode(&varid);
1570 ECBUF.with_borrow_mut(|buf| {
1571 if r >= buf.len() {
1572 buf.resize(r + 1, 0);
1573 }
1574 buf[r] = vc;
1575 });
1576 r += 1;
1577 }
1578 }
1579
1580 // c:3056 — `return bld_eprog(0);` — `bld_eprog` appends the
1581 // WC_END marker and packages ECBUF/ECSTRS into an Eprog.
1582 Some(bld_eprog(false))
1583}
1584
1585/// `mod_export struct eprog dummy_eprog;` from `Src/parse.c:3066`.
1586/// Placeholder Eprog used by `shf->funcdef = &dummy_eprog;` in
1587/// builtin.c when clearing a stale autoload stub. Held in a Mutex
1588/// so `init_eprog` can set it once at shell startup.
1589pub static DUMMY_EPROG: std::sync::Mutex<crate::ported::zsh_h::eprog> =
1590 std::sync::Mutex::new(crate::ported::zsh_h::eprog {
1591 flags: 0,
1592 len: 0,
1593 npats: 0,
1594 nref: 0,
1595 prog: Vec::new(),
1596 strs: None,
1597 pats: Vec::new(),
1598 shf: None,
1599 dump: None,
1600 });
1601
1602/// Port of `init_eprog(void)` from `Src/parse.c:3069`. Sets up
1603/// `dummy_eprog_code = WCB_END(); dummy_eprog.len = sizeof(wordcode);
1604/// dummy_eprog.prog = &dummy_eprog_code; dummy_eprog.strs = NULL;`.
1605/// Called once at shell startup (init_main → init_misc → init_eprog).
1606pub fn init_eprog() {
1607 let mut d = DUMMY_EPROG.lock().unwrap();
1608 d.prog = vec![crate::ported::zsh_h::WCB_END()]; // c:3071/3073
1609 d.len = std::mem::size_of::<wordcode>() as i32; // c:3072
1610 d.strs = None; // c:3074
1611 d.flags = 0;
1612 d.npats = 0;
1613 d.nref = 0;
1614}
1615
1616/// Parse the complete input. Direct port of `parse_event` /
1617/// `par_list` from `Src/parse.c:614-720`. On syntax error,
1618/// sets `errflag |= ERRFLAG_ERROR` (via `zerr`) and returns the
1619/// partial program — callers check `errflag` to detect failure,
1620/// matching C's `Eprog parse_event(...)` + `if (errflag) {...}`.
1621pub fn parse() -> ZshProgram {
1622 zshlex();
1623
1624 let mut program = parse_program_until(None);
1625
1626 // Surface lexer-level errors (unmatched quote/heredoc/etc.)
1627 // that the parser silently rolls past. zsh aborts with a
1628 // diagnostic via `zerr` which sets `errflag |= ERRFLAG_ERROR`.
1629 if let Some(msg) = crate::ported::lex::error() {
1630 crate::ported::utils::zerr(&msg);
1631 }
1632
1633 // Post-pass: wire heredoc bodies (collected by lexer.process_heredocs)
1634 // back into ZshRedir.heredoc fields via heredoc_idx.
1635 let bodies: Vec<HereDocInfo> = heredocs_clone()
1636 .into_iter()
1637 .map(|h| HereDocInfo {
1638 content: h.content,
1639 terminator: h.terminator,
1640 quoted: h.quoted,
1641 })
1642 .collect();
1643 if !bodies.is_empty() {
1644 fill_heredoc_bodies(&mut program, &bodies);
1645 }
1646
1647 program
1648}
1649
1650/// P9c: wordcode-emission parser entry. Direct port of zsh's
1651/// `parse_event(int endtok)` from `Src/parse.c:683-720`. Emits a
1652/// minimal wordcode stream for the parsed program into the live
1653/// `ECBUF` thread_local via P9b's `ecadd` / `ecstrcode` API and
1654/// returns the start index of the emitted Eprog (matching C's
1655/// `Eprog parse_event(...)` return).
1656///
1657/// Minimal implementation: emits `WCB_END()` only for now (P9c
1658/// stub). The full par_event/par_list/par_sublist/par_pipe/par_cmd
1659/// recursion that walks the token stream and emits the right
1660/// wordcode for each production is the multi-week rewrite called
1661/// out in PORT_PLAN.md. This stub establishes the entry point and
1662/// drives the live ECBUF emission so downstream consumers (P9d
1663/// exec_wordcode) have a real wordcode buffer to walk.
1664pub fn par_event_wordcode() -> usize {
1665 let start = ECUSED.get() as usize;
1666 // parse.c:691-710 — par_list loop. Each iteration emits one WC_LIST
1667 // entry plus its sublist payload; terminator handling between
1668 // lists matches the SEMI/NEWLIN/AMPER/SEPER switch in the C source.
1669 while tok() != ENDINPUT && tok() != LEXERR {
1670 par_list_wordcode();
1671 match tok() {
1672 SEMI | NEWLIN | AMPER | AMPERBANG | SEPER => {
1673 zshlex();
1674 }
1675 _ => break,
1676 }
1677 }
1678 // parse.c:712 — `ecadd(WCB_END());`
1679 ecadd(crate::ported::zsh_h::WCB_END());
1680 start
1681}
1682
1683/// Thread-local mirror of C parse.c's `int *cmplx` argument. Each
1684/// `par_*` wordcode emitter ORs its complexity bit into this
1685/// during the recursive descent; the outer `par_event_wordcode`
1686/// reads it at the end. Mirrors C's `int *cmplx` plumbing
1687/// through every par_* function — Rust uses a thread_local so
1688/// the signatures can stay no-arg.
1689thread_local! {
1690 static PARSER_CMPLX: std::cell::Cell<bool> = const { std::cell::Cell::new(false) };
1691 static PARSER_INPARTIME: std::cell::Cell<bool> = const { std::cell::Cell::new(false) };
1692}
1693
1694#[inline]
1695fn cmplx_get() -> bool {
1696 PARSER_CMPLX.with(|c| c.get())
1697}
1698#[inline]
1699fn cmplx_or(b: bool) {
1700 PARSER_CMPLX.with(|c| c.set(c.get() | b));
1701}
1702#[inline]
1703fn cmplx_set(b: bool) {
1704 PARSER_CMPLX.with(|c| c.set(b));
1705}
1706
1707/// Port of `par_list(int *cmplx)` from `Src/parse.c:771-803`.
1708/// `list : { SEPER } [ sublist [ { SEPER | AMPER | AMPERBANG } list ] ]`.
1709/// Drives the WCB_LIST chain — for each sublist, emits a WCB_LIST
1710/// header, recurses into par_sublist, then patches the header
1711/// with the right Z_SYNC/Z_ASYNC/Z_ASYNC|Z_DISOWN flag + Z_END
1712/// marker on the last entry.
1713pub fn par_list_wordcode() {
1714 let mut lp: Option<usize> = None;
1715 loop {
1716 // c:780 — `while (tok == SEPER) zshlex();`
1717 while tok() == SEPER {
1718 zshlex();
1719 }
1720 // c:782 — `p = ecadd(0);`
1721 let p = ecadd(0);
1722 // c:783 — `c = 0;` — local cmplx accumulator for this sublist.
1723 let outer = cmplx_get();
1724 cmplx_set(false);
1725 let sublist_ok = par_sublist_wordcode();
1726 let c = cmplx_get();
1727 cmplx_set(outer | c);
1728 if sublist_ok {
1729 // c:785 — `*cmplx |= c;` (already done above)
1730 let t = tok();
1731 if t == SEPER || t == AMPER || t == AMPERBANG {
1732 // c:787 — `if (tok != SEPER) *cmplx = 1;`
1733 if t != SEPER {
1734 cmplx_set(true);
1735 }
1736 // c:788 — `set_list_code(p, ...)`
1737 let z = if t == SEPER {
1738 Z_SYNC
1739 } else if t == AMPER {
1740 Z_ASYNC
1741 } else {
1742 Z_ASYNC | Z_DISOWN
1743 };
1744 set_list_code(p, z, c);
1745 // c:792-794 — `incmdpos = 1; do { zshlex(); } while
1746 // (tok == SEPER);`
1747 set_incmdpos(true);
1748 loop {
1749 zshlex();
1750 if tok() != SEPER {
1751 break;
1752 }
1753 }
1754 lp = Some(p);
1755 continue; // c:795 `goto rec;`
1756 } else {
1757 // c:797 — `set_list_code(p, (Z_SYNC | Z_END), c);`
1758 set_list_code(p, Z_SYNC | Z_END, c);
1759 }
1760 } else {
1761 // c:799-802 — `ecused--; if (lp >= 0) ecbuf[lp] |= wc_bdata(Z_END);`
1762 ECUSED.set((ECUSED.get() - 1).max(0));
1763 if let Some(prev) = lp {
1764 ECBUF.with_borrow_mut(|b| {
1765 if prev < b.len() {
1766 b[prev] |= wc_bdata(Z_END as wordcode);
1767 }
1768 });
1769 }
1770 }
1771 break;
1772 }
1773}
1774
1775/// Port of `par_list1(int *cmplx)` from `Src/parse.c:805-816`.
1776/// Single-sublist variant used by funcdef bodies and the short
1777/// `for`/`while`/`repeat` forms — exactly one sublist with
1778/// `Z_SYNC|Z_END`, no chain.
1779pub fn par_list1_wordcode() {
1780 // c:807 — `p = ecadd(0); c = 0;`
1781 let p = ecadd(0);
1782 let outer = cmplx_get();
1783 cmplx_set(false);
1784 let ok = par_sublist_wordcode();
1785 let c = cmplx_get();
1786 cmplx_set(outer | c);
1787 if ok {
1788 // c:809-811 — `set_list_code(p, Z_SYNC|Z_END, c); *cmplx |= c;`
1789 set_list_code(p, Z_SYNC | Z_END, c);
1790 } else {
1791 // c:813 — `ecused--;`
1792 ECUSED.set((ECUSED.get() - 1).max(0));
1793 }
1794}
1795
1796/// Port of `par_sublist(int *cmplx)` from `Src/parse.c:823-865`.
1797/// `sublist : sublist2 [ ( DBAR | DAMPER ) { SEPER } sublist ]`.
1798/// Emits a WCB_SUBLIST header, recurses into par_sublist2 for
1799/// the !/coproc prefix + pipeline, then chains via DBAR (`||`)
1800/// or DAMPER (`&&`) recursively. Returns true if at least one
1801/// pipeline was emitted.
1802pub fn par_sublist_wordcode() -> bool {
1803 // c:827 — `p = ecadd(0);`
1804 let p = ecadd(0);
1805 let outer = cmplx_get();
1806 cmplx_set(false);
1807 let mut c2 = 0i32;
1808 let f = par_sublist2(&mut c2);
1809 let c = c2 != 0;
1810 cmplx_set(outer | c);
1811 match f {
1812 Some(flags) => {
1813 // c:831 — `e = ecused;`
1814 let e = ECUSED.get() as usize;
1815 if tok() == DBAR || tok() == DAMPER {
1816 // c:834 — `qtok = tok;`
1817 let qtok = tok();
1818 // c:836 — `cmdpush(tok == DBAR ? CS_CMDOR : CS_CMDAND);`
1819 cmdpush(if qtok == DBAR {
1820 CS_CMDOR as u8
1821 } else {
1822 CS_CMDAND as u8
1823 });
1824 // c:837 — `zshlex();`
1825 zshlex();
1826 // c:838-839 — `while (tok == SEPER) zshlex();`
1827 while tok() == SEPER {
1828 zshlex();
1829 }
1830 // c:840 — `sl = par_sublist(cmplx);`
1831 let sl = par_sublist_wordcode();
1832 // c:841-844 — `set_sublist_code(p, (sl ? (qtok==DBAR ?
1833 // WC_SUBLIST_OR : WC_SUBLIST_AND) : WC_SUBLIST_END),
1834 // f, e-1-p, c);`
1835 let st = if sl {
1836 if qtok == DBAR {
1837 WC_SUBLIST_OR
1838 } else {
1839 WC_SUBLIST_AND
1840 }
1841 } else {
1842 WC_SUBLIST_END
1843 };
1844 set_sublist_code(p, st as i32, flags, (e - 1 - p) as i32, c);
1845 // c:845 — `cmdpop();`
1846 cmdpop();
1847 } else {
1848 // c:847-849 — `if (tok == AMPER || tok == AMPERBANG)
1849 // { c = 1; *cmplx |= c; }`
1850 let c_final = if tok() == AMPER || tok() == AMPERBANG {
1851 cmplx_set(true);
1852 true
1853 } else {
1854 c
1855 };
1856 // c:851 — `set_sublist_code(p, WC_SUBLIST_END, f,
1857 // e-1-p, c);`
1858 set_sublist_code(p, WC_SUBLIST_END as i32, flags, (e - 1 - p) as i32, c_final);
1859 }
1860 true
1861 }
1862 None => {
1863 // c:855-857 — `ecused--; return 0;`
1864 ECUSED.set((ECUSED.get() - 1).max(0));
1865 false
1866 }
1867 }
1868}
1869
1870/// Port of `par_pline(int *cmplx)` from `Src/parse.c:894-955`.
1871/// `pline : cmd [ ( BAR | BARAMP ) { SEPER } pline ]`. Emits a
1872/// WCB_PIPE header (mid for chain links, end for the last cmd)
1873/// plus the optional BARAMP `2>&1` synthetic redir.
1874pub fn par_pipe_wordcode() -> bool {
1875 let line = toklineno() as i64;
1876 // c:898 — `p = ecadd(0);`
1877 let p = ecadd(0);
1878 // c:900-903 — `if (!par_cmd(cmplx, 0)) { ecused--; return 0; }`
1879 if !par_cmd_wordcode(false) {
1880 ECUSED.set((ECUSED.get() - 1).max(0));
1881 return false;
1882 }
1883 if tok() == BAR_TOK {
1884 // c:905 — `*cmplx = 1;`
1885 cmplx_set(true);
1886 // c:906 — `cmdpush(CS_PIPE);`
1887 cmdpush(CS_PIPE as u8);
1888 // c:907 — `zshlex();`
1889 zshlex();
1890 // c:908-909 — `while (tok == SEPER) zshlex();`
1891 while tok() == SEPER {
1892 zshlex();
1893 }
1894 // c:910 — `ecbuf[p] = WCB_PIPE(WC_PIPE_MID, line>=0 ? line+1 : 0);`
1895 ECBUF.with_borrow_mut(|b| {
1896 if p < b.len() {
1897 b[p] = WCB_PIPE(
1898 WC_PIPE_MID,
1899 if line >= 0 { (line + 1) as wordcode } else { 0 },
1900 );
1901 }
1902 });
1903 // c:911 — `ecispace(p+1, 1);`
1904 ecispace(p + 1, 1);
1905 // c:912 — `ecbuf[p+1] = ecused - 1 - p;`
1906 let used = ECUSED.get() as usize;
1907 ECBUF.with_borrow_mut(|b| {
1908 if p + 1 < b.len() {
1909 b[p + 1] = (used.saturating_sub(1 + p)) as wordcode;
1910 }
1911 });
1912 // c:913-915 — `if (!par_pline(cmplx)) tok = LEXERR;`
1913 if !par_pipe_wordcode() {
1914 set_tok(LEXERR);
1915 }
1916 cmdpop();
1917 true
1918 } else if tok() == BARAMP {
1919 // c:920-924 — walk past inline WC_REDIR to find r.
1920 let mut r = p + 1;
1921 loop {
1922 let code = ECBUF.with_borrow(|b| b.get(r).copied().unwrap_or(0));
1923 if wc_code(code) != WC_REDIR {
1924 break;
1925 }
1926 r += WC_REDIR_WORDS(code) as usize;
1927 }
1928 // c:926-929 — `ecispace(r, 3);` + synthetic `2>&1` redir
1929 ecispace(r, 3);
1930 ECBUF.with_borrow_mut(|b| {
1931 if r + 2 < b.len() {
1932 b[r] = WCB_REDIR(REDIR_MERGEOUT as wordcode);
1933 b[r + 1] = 2;
1934 b[r + 2] = ecstrcode("1");
1935 }
1936 });
1937 cmplx_set(true);
1938 cmdpush(CS_ERRPIPE as u8);
1939 zshlex();
1940 while tok() == SEPER {
1941 zshlex();
1942 }
1943 ECBUF.with_borrow_mut(|b| {
1944 if p < b.len() {
1945 b[p] = WCB_PIPE(
1946 WC_PIPE_MID,
1947 if line >= 0 { (line + 1) as wordcode } else { 0 },
1948 );
1949 }
1950 });
1951 ecispace(p + 1, 1);
1952 let used = ECUSED.get() as usize;
1953 ECBUF.with_borrow_mut(|b| {
1954 if p + 1 < b.len() {
1955 b[p + 1] = (used.saturating_sub(1 + p)) as wordcode;
1956 }
1957 });
1958 if !par_pipe_wordcode() {
1959 set_tok(LEXERR);
1960 }
1961 cmdpop();
1962 true
1963 } else {
1964 // c:951 — `ecbuf[p] = WCB_PIPE(WC_PIPE_END, line>=0 ? line+1 : 0);`
1965 ECBUF.with_borrow_mut(|b| {
1966 if p < b.len() {
1967 b[p] = WCB_PIPE(
1968 WC_PIPE_END,
1969 if line >= 0 { (line + 1) as wordcode } else { 0 },
1970 );
1971 }
1972 });
1973 true
1974 }
1975}
1976
1977/// Port of `par_cmd(int *cmplx, int zsh_construct)` from
1978/// `Src/parse.c:958-1085`. Parses leading + trailing redirs and
1979/// dispatches on the current token to the right par_* builder.
1980/// Returns false only when no command was emitted (no redirs +
1981/// par_simple returned 0).
1982pub fn par_cmd_wordcode(zsh_construct: bool) -> bool {
1983 let mut nr = 0i32;
1984 // c:962 — `r = ecused;` — used for trailing-redir patch
1985 // bookkeeping; the actual redir mutation goes through par_redir
1986 // which keeps its own offset.
1987 let mut r = ECUSED.get();
1988 // c:964-969 — leading redirs.
1989 if IS_REDIROP(tok()) {
1990 cmplx_set(true);
1991 while IS_REDIROP(tok()) {
1992 if let Some(_) = par_redir() {
1993 nr += 1;
1994 } else {
1995 break;
1996 }
1997 }
1998 }
1999 match tok() {
2000 FOR => {
2001 cmdpush(CS_FOR as u8);
2002 par_for_wordcode();
2003 cmdpop();
2004 }
2005 FOREACH => {
2006 cmdpush(CS_FOREACH as u8);
2007 par_for_wordcode();
2008 cmdpop();
2009 }
2010 SELECT => {
2011 cmplx_set(true);
2012 cmdpush(CS_SELECT as u8);
2013 par_for_wordcode();
2014 cmdpop();
2015 }
2016 CASE => {
2017 cmdpush(CS_CASE as u8);
2018 par_case_wordcode();
2019 cmdpop();
2020 }
2021 IF => {
2022 par_if_wordcode();
2023 }
2024 WHILE => {
2025 cmdpush(CS_WHILE as u8);
2026 par_while_wordcode();
2027 cmdpop();
2028 }
2029 UNTIL => {
2030 cmdpush(CS_UNTIL as u8);
2031 par_while_wordcode();
2032 cmdpop();
2033 }
2034 REPEAT => {
2035 cmdpush(CS_REPEAT as u8);
2036 par_repeat_wordcode();
2037 cmdpop();
2038 }
2039 INPAR_TOK => {
2040 cmplx_set(true);
2041 cmdpush(CS_SUBSH as u8);
2042 par_subsh_wordcode_impl(zsh_construct);
2043 cmdpop();
2044 }
2045 INBRACE_TOK => {
2046 cmdpush(CS_CURSH as u8);
2047 par_subsh_wordcode_impl(zsh_construct);
2048 cmdpop();
2049 }
2050 FUNC => {
2051 cmdpush(CS_FUNCDEF as u8);
2052 par_funcdef_wordcode();
2053 cmdpop();
2054 }
2055 DINBRACK => {
2056 cmdpush(CS_COND as u8);
2057 par_cond_wordcode();
2058 cmdpop();
2059 }
2060 DINPAR => {
2061 par_arith_wordcode();
2062 }
2063 TIME => {
2064 // c:1037-1050 — `static int inpartime` guard so
2065 // `time time foo` doesn't recurse infinitely.
2066 if !PARSER_INPARTIME.with(|c| c.get()) {
2067 cmplx_set(true);
2068 PARSER_INPARTIME.with(|c| c.set(true));
2069 par_time_wordcode();
2070 PARSER_INPARTIME.with(|c| c.set(false));
2071 } else {
2072 set_tok(STRING_LEX);
2073 let sr = par_simple_wordcode_impl(nr);
2074 if sr == 0 && nr == 0 {
2075 return false;
2076 }
2077 if sr > 1 {
2078 cmplx_set(true);
2079 r += sr - 1;
2080 }
2081 }
2082 }
2083 _ => {
2084 // c:1054 — `if (!(sr = par_simple(cmplx, nr)))`
2085 let sr = par_simple_wordcode_impl(nr);
2086 if sr == 0 {
2087 if nr == 0 {
2088 return false;
2089 }
2090 } else if sr > 1 {
2091 cmplx_set(true);
2092 r += sr - 1;
2093 }
2094 }
2095 }
2096 // c:1075-1078 — trailing redirs.
2097 if IS_REDIROP(tok()) {
2098 cmplx_set(true);
2099 while IS_REDIROP(tok()) {
2100 let _ = par_redir();
2101 }
2102 }
2103 // c:1079-1082 — `incmdpos=1; incasepat=0; incond=0; intypeset=0;`
2104 set_incmdpos(true);
2105 set_incasepat(0);
2106 set_incond(0);
2107 set_intypeset(false);
2108 let _ = r;
2109 true
2110}
2111
2112/// Adapter: par_cmd_wordcode wrapper for sites that don't supply
2113/// the zsh_construct flag (defaults to false, matching the C
2114/// `par_cmd(cmplx, 0)` call shape at c:902).
2115pub fn par_cmd_wordcode_noargs() {
2116 par_cmd_wordcode(false);
2117}
2118
2119/// P9c stub: direct port of `par_for(int *complex)` from
2120/// Port of `par_for(int *cmplx)` from `Src/parse.c:1087-1199`.
2121pub fn par_for_wordcode() {
2122 let csh = tok() == FOREACH;
2123 let sel = tok() == SELECT;
2124 let p = ecadd(0);
2125 set_incmdpos(false);
2126 set_infor(if tok() == FOR { 2 } else { 0 });
2127 zshlex();
2128 let type_code: wordcode;
2129 if tok() == DINPAR {
2130 zshlex();
2131 if tok() != DINPAR {
2132 error("par_for: expected init");
2133 return;
2134 }
2135 ecstr(&tokstr().unwrap_or_default());
2136 zshlex();
2137 if tok() != DINPAR {
2138 error("par_for: expected cond");
2139 return;
2140 }
2141 ecstr(&tokstr().unwrap_or_default());
2142 zshlex();
2143 if tok() != DOUTPAR {
2144 error("par_for: expected ))");
2145 return;
2146 }
2147 ecstr(&tokstr().unwrap_or_default());
2148 set_infor(0);
2149 set_incmdpos(true);
2150 zshlex();
2151 type_code = WC_FOR_COND;
2152 } else {
2153 set_infor(0);
2154 if tok() != STRING_LEX {
2155 error("par_for: expected identifier");
2156 return;
2157 }
2158 let np = if !sel { Some(ecadd(0)) } else { None };
2159 let mut n = 0u32;
2160 set_incmdpos(true);
2161 loop {
2162 n += 1;
2163 ecstr(&tokstr().unwrap_or_default());
2164 zshlex();
2165 if tok() != STRING_LEX || sel {
2166 break;
2167 }
2168 if tokstr().as_deref() == Some("in") {
2169 break;
2170 }
2171 }
2172 if let Some(np) = np {
2173 ECBUF.with_borrow_mut(|b| {
2174 if np < b.len() {
2175 b[np] = n;
2176 }
2177 });
2178 }
2179 let posix_in = isnewlin() != 0;
2180 while isnewlin() != 0 {
2181 zshlex();
2182 }
2183 if tok() == STRING_LEX && tokstr().as_deref() == Some("in") {
2184 set_incmdpos(false);
2185 zshlex();
2186 let np = ecadd(0);
2187 let mut n = 0u32;
2188 while tok() == STRING_LEX {
2189 if let Some(s) = tokstr() {
2190 ecstr(&s);
2191 }
2192 n += 1;
2193 zshlex();
2194 }
2195 if tok() != SEPER {
2196 error("par_for: expected separator after `in`");
2197 return;
2198 }
2199 ECBUF.with_borrow_mut(|b| {
2200 if np < b.len() {
2201 b[np] = n as wordcode;
2202 }
2203 });
2204 type_code = if sel { WC_SELECT_LIST } else { WC_FOR_LIST };
2205 } else if !posix_in && tok() == INPAR_TOK {
2206 set_incmdpos(false);
2207 zshlex();
2208 let np = ecadd(0);
2209 let mut n = 0u32;
2210 while tok() == NEWLIN {
2211 zshlex();
2212 }
2213 while tok() == STRING_LEX {
2214 if let Some(s) = tokstr() {
2215 ecstr(&s);
2216 }
2217 n += 1;
2218 zshlex();
2219 }
2220 while tok() == NEWLIN {
2221 zshlex();
2222 }
2223 if tok() != OUTPAR_TOK {
2224 error("par_for: expected `)`");
2225 return;
2226 }
2227 ECBUF.with_borrow_mut(|b| {
2228 if np < b.len() {
2229 b[np] = n as wordcode;
2230 }
2231 });
2232 set_incmdpos(true);
2233 zshlex();
2234 type_code = if sel { WC_SELECT_LIST } else { WC_FOR_LIST };
2235 } else {
2236 type_code = if sel { WC_SELECT_PPARAM } else { WC_FOR_PPARAM };
2237 }
2238 }
2239 set_incmdpos(true);
2240 while tok() == SEPER {
2241 zshlex();
2242 }
2243 par_loop_body_wordcode(csh);
2244 let used = ECUSED.get() as usize;
2245 let off = used.saturating_sub(1 + p) as wordcode;
2246 ECBUF.with_borrow_mut(|b| {
2247 if p < b.len() {
2248 b[p] = if sel {
2249 WCB_SELECT(type_code, off)
2250 } else {
2251 WCB_FOR(type_code, off)
2252 };
2253 }
2254 });
2255}
2256
2257/// Body dispatch shared by par_for / par_while / par_repeat.
2258/// Direct port of `Src/parse.c:1167-1195`.
2259fn par_loop_body_wordcode(csh: bool) {
2260 if tok() == DOLOOP {
2261 zshlex();
2262 par_list_wordcode();
2263 if tok() != DONE {
2264 error("missing `done`");
2265 return;
2266 }
2267 set_incmdpos(false);
2268 zshlex();
2269 } else if tok() == INBRACE_TOK {
2270 zshlex();
2271 par_list_wordcode();
2272 if tok() != OUTBRACE_TOK {
2273 error("missing `}`");
2274 return;
2275 }
2276 set_incmdpos(false);
2277 zshlex();
2278 } else if csh || isset(CSHJUNKIELOOPS) {
2279 par_list_wordcode();
2280 if tok() != ZEND {
2281 error("missing `end`");
2282 return;
2283 }
2284 set_incmdpos(false);
2285 zshlex();
2286 } else if unset(SHORTLOOPS) {
2287 error("short loop form requires SHORTLOOPS");
2288 } else {
2289 par_list1_wordcode();
2290 }
2291}
2292
2293/// `select` shares par_for body (c:1024 routes SELECT to par_for).
2294pub fn par_select_wordcode() {
2295 par_for_wordcode();
2296}
2297
2298/// Port of `par_case(int *cmplx)` from `Src/parse.c:1209-1409`.
2299pub fn par_case_wordcode() {
2300 let p = ecadd(0);
2301 set_incmdpos(false);
2302 zshlex();
2303 if tok() != STRING_LEX {
2304 error("par_case: expected scrutinee");
2305 return;
2306 }
2307 ecstr(&tokstr().unwrap_or_default());
2308 set_incmdpos(true);
2309 zshlex();
2310 while tok() == SEPER {
2311 zshlex();
2312 }
2313 let saw_brace = tok() == INBRACE_TOK;
2314 if !saw_brace && !(tok() == STRING_LEX && tokstr().as_deref() == Some("in")) {
2315 error("par_case: expected `in` or `{`");
2316 return;
2317 }
2318 zshlex();
2319 loop {
2320 while tok() == SEPER {
2321 zshlex();
2322 }
2323 // c:1245-1247 — `esac` can arrive either as the ESAC reswd
2324 // token (when incmdpos was true at the SEPER between arms,
2325 // which is the normal case after the body's `;;`) OR as a
2326 // STRING with tokstr "esac" (alias context or noaliases off).
2327 // Accept both shapes so the outer arm loop terminates.
2328 if (saw_brace && tok() == OUTBRACE_TOK)
2329 || (!saw_brace && tok() == ESAC)
2330 || (!saw_brace && tok() == STRING_LEX && tokstr().as_deref() == Some("esac"))
2331 {
2332 zshlex();
2333 break;
2334 }
2335 if tok() == INPAR_TOK {
2336 zshlex();
2337 }
2338 // c:1265-1266 — `pp = ecadd(0); palts = ecadd(0); nalts = 0;`
2339 // Two arm-header words: PP holds WCB_CASE(type, body_off),
2340 // PALTS holds the pattern alternative count.
2341 let pp = ecadd(0);
2342 let palts = ecadd(0);
2343 let mut nalts: u32 = 0;
2344 loop {
2345 if tok() != STRING_LEX {
2346 error("par_case: expected pattern");
2347 return;
2348 }
2349 ecstr(&tokstr().unwrap_or_default());
2350 // c:1307,1316 — `ecadd(ecnpats++);` after each pattern.
2351 // Records a per-pattern index slot that the compiled
2352 // Patprog later drops into. Without this, npats=0 and
2353 // the strs/wordcode header bytes diverge from C.
2354 let np = ECNPATS.with(|c| { let v = c.get(); c.set(v + 1); v }) as u32;
2355 ecadd(np);
2356 nalts += 1;
2357 zshlex();
2358 if tok() != BAR_TOK {
2359 break;
2360 }
2361 zshlex();
2362 }
2363 ECBUF.with_borrow_mut(|b| {
2364 if palts < b.len() {
2365 b[palts] = nalts;
2366 }
2367 });
2368 if tok() != OUTPAR_TOK {
2369 error("par_case: expected `)`");
2370 return;
2371 }
2372 set_incmdpos(true);
2373 zshlex();
2374 par_list_wordcode();
2375 // c:1330-1336 — arm-terminator drives the WC_CASE_OR /
2376 // WC_CASE_AND / WC_CASE_TESTAND type tag in the WCB_CASE
2377 // header, which is patched at pp.
2378 let arm_type = match tok() {
2379 DSEMI => WC_CASE_OR,
2380 SEMIAMP => WC_CASE_AND,
2381 SEMIBAR => WC_CASE_TESTAND,
2382 _ => WC_CASE_OR,
2383 };
2384 let used = ECUSED.get() as usize;
2385 let arm_off = used.saturating_sub(1 + pp) as wordcode;
2386 ECBUF.with_borrow_mut(|b| {
2387 if pp < b.len() {
2388 b[pp] = WCB_CASE(arm_type, arm_off);
2389 }
2390 });
2391 if tok() == DSEMI || tok() == SEMIAMP || tok() == SEMIBAR {
2392 zshlex();
2393 }
2394 }
2395 let used = ECUSED.get() as usize;
2396 let off = used.saturating_sub(1 + p) as wordcode;
2397 ECBUF.with_borrow_mut(|b| {
2398 if p < b.len() {
2399 b[p] = WCB_CASE(WC_CASE_HEAD, off);
2400 }
2401 });
2402}
2403
2404/// Port of `par_if(int *cmplx)` from `Src/parse.c:1411-1519`.
2405pub fn par_if_wordcode() {
2406 let p = ecadd(0);
2407 cmdpush(CS_IF as u8);
2408 loop {
2409 let arm = ecadd(0);
2410 zshlex();
2411 par_list_wordcode();
2412 let body_brace = tok() == INBRACE_TOK;
2413 if !body_brace {
2414 while tok() == SEPER {
2415 zshlex();
2416 }
2417 if tok() != THEN {
2418 error("par_if: expected `then`");
2419 cmdpop();
2420 return;
2421 }
2422 }
2423 cmdpop();
2424 cmdpush(CS_IFTHEN as u8);
2425 zshlex();
2426 par_list_wordcode();
2427 cmdpop();
2428 let used = ECUSED.get() as usize;
2429 let arm_off = used.saturating_sub(1 + arm) as wordcode;
2430 ECBUF.with_borrow_mut(|b| {
2431 if arm < b.len() {
2432 b[arm] = WCB_IF(WC_IF_IF, arm_off);
2433 }
2434 });
2435 match tok() {
2436 ELIF => {
2437 cmdpush(CS_ELIF as u8);
2438 continue;
2439 }
2440 ELSE => {
2441 cmdpush(CS_ELSE as u8);
2442 let arm = ecadd(0);
2443 zshlex();
2444 par_list_wordcode();
2445 let used = ECUSED.get() as usize;
2446 let arm_off = used.saturating_sub(1 + arm) as wordcode;
2447 ECBUF.with_borrow_mut(|b| {
2448 if arm < b.len() {
2449 b[arm] = WCB_IF(WC_IF_IF, arm_off);
2450 }
2451 });
2452 cmdpop();
2453 if tok() != FI {
2454 error("par_if: expected `fi`");
2455 return;
2456 }
2457 zshlex();
2458 break;
2459 }
2460 FI => {
2461 zshlex();
2462 break;
2463 }
2464 _ => {
2465 if body_brace && tok() == OUTBRACE_TOK {
2466 zshlex();
2467 break;
2468 }
2469 error("par_if: expected `elif`/`else`/`fi`");
2470 return;
2471 }
2472 }
2473 }
2474 let used = ECUSED.get() as usize;
2475 let off = used.saturating_sub(1 + p) as wordcode;
2476 ECBUF.with_borrow_mut(|b| {
2477 if p < b.len() {
2478 b[p] = WCB_IF(WC_IF_HEAD, off);
2479 }
2480 });
2481}
2482
2483/// Port of `par_while(int *cmplx)` from `Src/parse.c:1521-1564`.
2484pub fn par_while_wordcode() {
2485 let until = tok() == UNTIL;
2486 let p = ecadd(0);
2487 zshlex();
2488 par_list_wordcode();
2489 while tok() == SEPER {
2490 zshlex();
2491 }
2492 par_loop_body_wordcode(false);
2493 let type_code = if until {
2494 WC_WHILE_UNTIL
2495 } else {
2496 WC_WHILE_WHILE
2497 };
2498 let used = ECUSED.get() as usize;
2499 let off = used.saturating_sub(1 + p) as wordcode;
2500 ECBUF.with_borrow_mut(|b| {
2501 if p < b.len() {
2502 b[p] = WCB_WHILE(type_code, off);
2503 }
2504 });
2505}
2506
2507/// `until` shares par_while body — tok==UNTIL flips the type.
2508pub fn par_until_wordcode() {
2509 par_while_wordcode();
2510}
2511
2512/// Port of `par_repeat(int *cmplx)` from `Src/parse.c:1565-1618`.
2513pub fn par_repeat_wordcode() {
2514 let p = ecadd(0);
2515 set_incmdpos(false);
2516 zshlex();
2517 if tok() != STRING_LEX {
2518 error("par_repeat: expected count");
2519 return;
2520 }
2521 ecstr(&tokstr().unwrap_or_default());
2522 set_incmdpos(true);
2523 zshlex();
2524 while tok() == SEPER {
2525 zshlex();
2526 }
2527 par_loop_body_wordcode(false);
2528 let used = ECUSED.get() as usize;
2529 let off = used.saturating_sub(1 + p) as wordcode;
2530 ECBUF.with_borrow_mut(|b| {
2531 if p < b.len() {
2532 b[p] = WCB_REPEAT(off);
2533 }
2534 });
2535}
2536
2537/// Port of `par_funcdef(int *cmplx)` from `Src/parse.c:1672-1786`.
2538pub fn par_funcdef_wordcode() {
2539 let p = ecadd(0);
2540 // c:1681-1683 — `nocorrect = 1; incmdpos = 0; zshlex();` —
2541 // set BEFORE the zshlex past `function`, so the next-token
2542 // lex doesn't promote `{` to INBRACE_TOK or recognise reswds.
2543 set_nocorrect(1);
2544 set_incmdpos(false);
2545 zshlex();
2546 let np = ecadd(0);
2547 let mut n = 0u32;
2548 // c:1701-1709 — names loop. C special-cases `tokstr[0] ==
2549 // Inbrace || tokstr[0] == '{'` to break out and set tok =
2550 // INBRACE, since a bare `{` at incmdpos=0 lexes as STRING
2551 // but should still open the funcdef body. Without this,
2552 // `function f { ... }` swallowed the `{` as a name and the
2553 // body never started.
2554 while tok() == STRING_LEX {
2555 let s = tokstr().unwrap_or_default();
2556 let bytes = s.as_bytes();
2557 if bytes.len() == 1 && (bytes[0] == b'{' || s == "\u{8f}") {
2558 set_tok(INBRACE_TOK);
2559 break;
2560 }
2561 ecstr(&s);
2562 n += 1;
2563 zshlex();
2564 }
2565 ECBUF.with_borrow_mut(|b| {
2566 if np < b.len() {
2567 b[np] = n;
2568 }
2569 });
2570 // c:1715-1716 — `nocorrect = 0; incmdpos = 1;` — restore
2571 // before the body parse.
2572 set_nocorrect(0);
2573 set_incmdpos(true);
2574 if tok() == INOUTPAR {
2575 zshlex();
2576 }
2577 while tok() == SEPER {
2578 zshlex();
2579 }
2580 if tok() == INBRACE_TOK {
2581 zshlex();
2582 par_list_wordcode();
2583 if tok() != OUTBRACE_TOK {
2584 error("par_funcdef: expected `}`");
2585 return;
2586 }
2587 zshlex();
2588 } else if unset(SHORTLOOPS) {
2589 error("par_funcdef: short body requires SHORTLOOPS");
2590 return;
2591 } else {
2592 par_list1_wordcode();
2593 }
2594 let used = ECUSED.get() as usize;
2595 let off = used.saturating_sub(1 + p) as wordcode;
2596 ECBUF.with_borrow_mut(|b| {
2597 if p < b.len() {
2598 b[p] = WCB_FUNCDEF(off);
2599 }
2600 });
2601}
2602
2603/// `Src/parse.c:1619-1665`. Handles both `(...)` subshell and
2604/// `{...}` brace group (cursh) plus optional `always { ... }`
2605/// trailing block. C uses a single function with `zsh_construct=1`
2606/// for `{...}` and 0 for `(...)`.
2607pub fn par_subsh_wordcode_impl(zsh_construct: bool) {
2608 // c:1621 — `enum lextok otok = tok;`
2609 let otok = tok();
2610 // c:1624 — `p = ecadd(0);`
2611 let p = ecadd(0);
2612 // c:1626 — `pp = ecadd(0);` (extra word for the always-block try slot)
2613 let pp = ecadd(0);
2614 // c:1627 — `zshlex();`
2615 zshlex();
2616 // c:1628 — `par_list(cmplx);`
2617 par_list_wordcode();
2618 // c:1629 — `ecadd(WCB_END());`
2619 ecadd(WCB_END());
2620 // c:1630-1631 — `if (tok != ((otok == INPAR) ? OUTPAR : OUTBRACE))
2621 // YYERRORV(oecused);`
2622 let want = if otok == INPAR_TOK {
2623 OUTPAR_TOK
2624 } else {
2625 OUTBRACE_TOK
2626 };
2627 if tok() != want {
2628 error("par_subsh: missing closing token");
2629 return;
2630 }
2631 // c:1633 — `incmdpos = !zsh_construct;`
2632 set_incmdpos(!zsh_construct);
2633 // c:1634 — `zshlex();`
2634 zshlex();
2635
2636 // c:1637 — `if (otok == INBRACE && tok == STRING && !strcmp(tokstr, "always"))`
2637 let is_always =
2638 otok == INBRACE_TOK && tok() == STRING_LEX && tokstr().as_deref() == Some("always");
2639 if is_always {
2640 // c:1638 — `ecbuf[pp] = WCB_TRY(ecused - 1 - pp);`
2641 let used = ECUSED.get() as usize;
2642 let off = used.saturating_sub(1 + pp);
2643 ECBUF.with_borrow_mut(|b| {
2644 if pp < b.len() {
2645 b[pp] = WCB_TRY(off as wordcode);
2646 }
2647 });
2648 // c:1639 — `incmdpos = 1;`
2649 set_incmdpos(true);
2650 // c:1640-1642 — `do { zshlex(); } while (tok == SEPER);`
2651 loop {
2652 zshlex();
2653 if tok() != SEPER {
2654 break;
2655 }
2656 }
2657 // c:1644-1645 — `if (tok != INBRACE) YYERRORV(oecused);`
2658 if tok() != INBRACE_TOK {
2659 error("par_subsh: 'always' expects '{'");
2660 return;
2661 }
2662 // c:1648 — `zshlex();`
2663 zshlex();
2664 // c:1649 — `par_save_list(cmplx);`
2665 par_list_wordcode();
2666 // c:1650-1651 — `while (tok == SEPER) zshlex();`
2667 while tok() == SEPER {
2668 zshlex();
2669 }
2670 // c:1653 — `incmdpos = 1;`
2671 set_incmdpos(true);
2672 // c:1655-1656 — `if (tok != OUTBRACE) YYERRORV(oecused);`
2673 if tok() != OUTBRACE_TOK {
2674 error("par_subsh: 'always' block missing '}'");
2675 return;
2676 }
2677 zshlex();
2678 // c:1658 — `ecbuf[p] = WCB_TRY(ecused - 1 - p);`
2679 let used = ECUSED.get() as usize;
2680 let off = used.saturating_sub(1 + p);
2681 ECBUF.with_borrow_mut(|b| {
2682 if p < b.len() {
2683 b[p] = WCB_TRY(off as wordcode);
2684 }
2685 });
2686 } else {
2687 // c:1660-1662 — `ecbuf[p] = (otok == INPAR ? WCB_SUBSH(...) :
2688 // WCB_CURSH(...));`
2689 let used = ECUSED.get() as usize;
2690 let off = used.saturating_sub(1 + p);
2691 ECBUF.with_borrow_mut(|b| {
2692 if p < b.len() {
2693 b[p] = if otok == INPAR_TOK {
2694 WCB_SUBSH(off as wordcode)
2695 } else {
2696 WCB_CURSH(off as wordcode)
2697 };
2698 }
2699 });
2700 }
2701}
2702
2703/// Wrapper for `(...)` subshell — calls `par_subsh_wordcode_impl(false)`.
2704pub fn par_subsh_wordcode() {
2705 par_subsh_wordcode_impl(false);
2706}
2707
2708/// Wrapper for `{...}` brace group (cursh) — calls
2709/// `par_subsh_wordcode_impl(true)`. C uses the same `par_subsh`
2710/// function with `zsh_construct=1`; the Rust split exists because
2711/// the par_cmd dispatch at parse.rs:1446 already named them
2712/// separately.
2713pub fn par_cursh_wordcode() {
2714 par_subsh_wordcode_impl(true);
2715}
2716
2717/// Port of `par_time(void)` from `Src/parse.c:1787`. `time PIPE`
2718/// emits WCB_TIMED(WC_TIMED_PIPE) + the sublist code; bare `time`
2719/// with no pipeline emits WCB_TIMED(WC_TIMED_EMPTY).
2720pub fn par_time_wordcode() {
2721 // c:1791 — `zshlex();`
2722 zshlex();
2723 // c:1793-1794 — `p = ecadd(0); ecadd(0);`
2724 let p = ecadd(0);
2725 ecadd(0);
2726 // c:1795 — `if ((f = par_sublist2(&c)) < 0)`
2727 let mut c = 0i32;
2728 let f = par_sublist2(&mut c);
2729 match f {
2730 Some(flags) => {
2731 // c:1799 — `ecbuf[p] = WCB_TIMED(WC_TIMED_PIPE);`
2732 ECBUF.with_borrow_mut(|b| {
2733 if p < b.len() {
2734 b[p] = WCB_TIMED(WC_TIMED_PIPE);
2735 }
2736 });
2737 // c:1800 — `set_sublist_code(p+1, WC_SUBLIST_END, f,
2738 // ecused-2-p, c);`
2739 let used = ECUSED.get() as usize;
2740 let skip = used.saturating_sub(2 + p) as i32;
2741 set_sublist_code(p + 1, WC_SUBLIST_END as i32, flags, skip, c != 0);
2742 }
2743 None => {
2744 // c:1796-1798 — `ecused--; ecbuf[p] = WCB_TIMED(WC_TIMED_EMPTY);`
2745 ECUSED.set((ECUSED.get() - 1).max(0));
2746 ECBUF.with_borrow_mut(|b| {
2747 if p < b.len() {
2748 b[p] = WCB_TIMED(WC_TIMED_EMPTY);
2749 }
2750 });
2751 }
2752 }
2753}
2754
2755/// Port of `par_dinbrack(void)` from `Src/parse.c:1810`. Wraps
2756/// `par_cond` (the cond-expression emitter at parse.c:2409) with
2757/// the `[[ ... ]]` framing: incond/incmdpos toggles + DOUTBRACK
2758/// expectation.
2759pub fn par_cond_wordcode() {
2760 let oecused = ECUSED.get();
2761 // c:1814 — `incond = 1;`
2762 set_incond(1);
2763 // c:1815 — `incmdpos = 0;`
2764 set_incmdpos(false);
2765 // c:1816 — `zshlex();` past `[[`.
2766 zshlex();
2767 // c:1817 — `par_cond();` — call the no-skip cond-expression
2768 // entry that EMITS WORDCODE (par_cond_top → par_cond_1 →
2769 // par_cond_2 → par_cond_double/triple/multi). NOT the AST
2770 // `par_cond` at parse.rs:4644 which is a misnamed `par_dinbrack`
2771 // that skips `[[` AND `]]` and returns a ZshCommand AST node
2772 // instead of pushing WC_COND opcodes. NOT `parse_cond_expr`
2773 // either — that's also AST-only, returning ZshCond. With
2774 // `parse_cond_expr` here, every `[[ ... ]]` test produced ZERO
2775 // wordcode payload and parity dropped ~148 words on /etc/zshrc.
2776 let _ = par_cond_top();
2777 // c:1818-1819 — `if (tok != DOUTBRACK) YYERRORV(oecused);`
2778 if tok() != DOUTBRACK {
2779 let _ = oecused;
2780 error("missing ]]");
2781 return;
2782 }
2783 // c:1820 — `incond = 0;`
2784 set_incond(0);
2785 // c:1821 — `incmdpos = 1;`
2786 set_incmdpos(true);
2787 // c:1822 — `zshlex();` past `]]`.
2788 zshlex();
2789}
2790
2791/// Port of the `case DINPAR:` arm of `par_cmd` from
2792/// `Src/parse.c:1031-1034`:
2793/// ```c
2794/// ecadd(WCB_ARITH());
2795/// ecstr(tokstr);
2796/// zshlex();
2797/// ```
2798/// `(( EXPR ))` arithmetic at command position — emits the ARITH
2799/// opcode followed by the interned EXPR string, then advances past
2800/// the DINPAR token (which already carries the body text).
2801pub fn par_arith_wordcode() {
2802 // c:1032 — `ecadd(WCB_ARITH());`
2803 ecadd(WCB_ARITH());
2804 // c:1033 — `ecstr(tokstr);` — interns the expression string and
2805 // appends its strcode index to the wordcode buffer.
2806 let expr = tokstr().unwrap_or_default();
2807 ecstr(&expr);
2808 // c:1034 — `zshlex();`
2809 zshlex();
2810}
2811
2812/// Port of `par_simple(int *cmplx, int nr)` from
2813/// `Src/parse.c:1836-2227`. Emits WC_SIMPLE + word count +
2814/// interned string offsets. Returns `0` when nothing was emitted,
2815/// otherwise `1 + (number of code words consumed by redirections)`.
2816/// The full C body handles assignments (ENVSTRING/ENVARRAY),
2817/// inline `{var}>file` brace-FDs, prefix modifiers (NOCORRECT etc),
2818/// and `name() { body }` funcdef detection — those paths are
2819/// progressively wired into the AST parser; this wordcode-emitter
2820/// covers the simple `cmd args...` case + interleaved redirs.
2821pub fn par_simple_wordcode_impl(mut nr: i32) -> i32 {
2822 // c:1836-1842 — `int oecused = ecused, isnull = 1, r, argc = 0,
2823 // p, isfunc = 0, sr = 0; int c = *cmplx, nrediradd, assignments
2824 // = 0, ppost = 0, is_typeset = 0; ...`
2825 let _oecused = ECUSED.get() as usize;
2826 let mut isnull = true;
2827 let mut argc: u32 = 0;
2828 let mut sr: i32 = 0;
2829 let mut assignments = false;
2830
2831 // c:1843 — `r = ecused;` — saves the offset where redirs get
2832 // INSERTED (via ecispace). Each redir shifts later words DOWN
2833 // by ncodes, so the SIMPLE placeholder at `p` (set later) must
2834 // also bump by ncodes when a redir lands. C uses `&r` to pass
2835 // the cursor by reference; Rust uses a mutable local + manual
2836 // bumps after each par_redir_wordcode call.
2837 let mut r: usize = ECUSED.get() as usize;
2838
2839 // c:1844-1919 — pre-cmd loop: NOCORRECT, ENVSTRING (scalar
2840 // assigns), ENVARRAY (array assigns), IS_REDIROP. Loops until
2841 // a non-assignment token is seen.
2842 loop {
2843 match tok() {
2844 NOCORRECT => {
2845 // c:1846-1849
2846 cmplx_set(true);
2847 set_nocorrect(1);
2848 }
2849 ENVSTRING => {
2850 // c:1848-1898 — scalar assignment `name=value` or
2851 // `name+=value`. Emits WCB_ASSIGN(SCALAR, NEW|INC, 0)
2852 // followed by ecstr(name), ecstr(value).
2853 let raw = tokstr().unwrap_or_default();
2854 // Find first of Inbrack / '=' / '+' (the C scan at
2855 // c:1851-1853). Inside Inbrack we skipparens — i.e.
2856 // skip `name[...]` index, then continue.
2857 let bytes: Vec<char> = raw.chars().collect();
2858 let mut idx = 0usize;
2859 while idx < bytes.len() {
2860 let ch = bytes[idx];
2861 if ch == '\u{91}' /* Inbrack */ {
2862 // Skip matched Inbrack…Outbrack pair.
2863 let mut depth = 1;
2864 idx += 1;
2865 while idx < bytes.len() && depth > 0 {
2866 match bytes[idx] {
2867 '\u{91}' => depth += 1,
2868 '\u{92}' => depth -= 1,
2869 _ => {}
2870 }
2871 idx += 1;
2872 }
2873 continue;
2874 }
2875 // c:1851-1853 — `*ptr != '=' && *ptr != '+'` —
2876 // C scan stops on either literal `=` / `+` OR the
2877 // Equals marker (`\u{8d}`) the lexer emits for
2878 // unquoted `=`. Without the marker check, the
2879 // ENVSTRING split scans past the `=` (since it's
2880 // already tokenised) and the whole `name=value`
2881 // ends up in one ecstr.
2882 if ch == '=' || ch == '+' || ch == '\u{8d}' /* Equals */ {
2883 break;
2884 }
2885 idx += 1;
2886 }
2887 let is_inc = idx < bytes.len() && bytes[idx] == '+';
2888 // c:1855-1860 — emit WCB_ASSIGN with WC_ASSIGN_INC
2889 // (+=) or WC_ASSIGN_NEW (=). The third arg (count)
2890 // is 0 for scalar.
2891 let flag = if is_inc { WC_ASSIGN_INC } else { WC_ASSIGN_NEW };
2892 ecadd(WCB_ASSIGN(WC_ASSIGN_SCALAR, flag, 0));
2893 // Split into name and str at the `=` (after the
2894 // optional `+`).
2895 if is_inc {
2896 idx += 1;
2897 }
2898 let name: String = bytes[..idx].iter().collect();
2899 // Skip past the `=` separator (literal or Equals
2900 // marker `\u{8d}`) so the value starts at the byte
2901 // after it. Mirrors C `*ptr = '\0'; str = ptr + 1;`
2902 // (parse.c:1864).
2903 let str_off = if idx < bytes.len()
2904 && (bytes[idx] == '=' || bytes[idx] == '\u{8d}')
2905 {
2906 idx + 1
2907 } else {
2908 idx
2909 };
2910 let value: String = bytes[str_off..].iter().collect();
2911 // c:1866-1877 — scan value for `=(`/`<(`/`>(` (proc
2912 // subst); if found, bump cmplx (suppresses Z_SIMPLE).
2913 let vbytes: Vec<char> = value.chars().collect();
2914 for (i, ch) in vbytes.iter().enumerate() {
2915 if i + 1 < vbytes.len() && vbytes[i + 1] == '\u{88}' /* Inpar */ {
2916 if *ch == '\u{8d}' /* Equals */
2917 || *ch == '\u{94}' /* Inang */
2918 || *ch == '\u{96}' /* OutangProc */
2919 {
2920 cmplx_set(true);
2921 break;
2922 }
2923 }
2924 }
2925 ecstr(&name);
2926 ecstr(&value);
2927 isnull = false;
2928 assignments = true;
2929 }
2930 ENVARRAY => {
2931 // c:1898-1922 — array assignment `name=( ... )`.
2932 // Implementation note: emits placeholder, parses
2933 // wordlist, patches WCB_ASSIGN(ARRAY, NEW|INC, n)
2934 // header with the actual element count. zshrs's
2935 // par_nl_wordlist isn't wired into the wordcode
2936 // emitter yet; fall back to a minimal placeholder
2937 // so the WCB_ASSIGN slot exists at the expected
2938 // position. TODO: full port of c:1898-1922.
2939 cmplx_set(true);
2940 let p = ecadd(0);
2941 set_incmdpos(false);
2942 let raw = tokstr().unwrap_or_default();
2943 let is_inc = raw.ends_with('+');
2944 let name = if is_inc { &raw[..raw.len() - 1] } else { raw.as_str() };
2945 let flag = if is_inc { WC_ASSIGN_INC } else { WC_ASSIGN_NEW };
2946 ecstr(name);
2947 cmdpush(CS_ARRAY as u8);
2948 zshlex();
2949 // Count words until OUTPAR_TOK.
2950 let mut n = 0u32;
2951 while tok() == STRING_LEX {
2952 let w = tokstr().unwrap_or_default();
2953 ecstr(&w);
2954 n += 1;
2955 zshlex();
2956 while tok() == NEWLIN {
2957 zshlex();
2958 }
2959 }
2960 ECBUF.with_borrow_mut(|b| {
2961 if p < b.len() {
2962 b[p] = WCB_ASSIGN(WC_ASSIGN_ARRAY, flag, n);
2963 }
2964 });
2965 cmdpop();
2966 if tok() != OUTPAR_TOK {
2967 error("expected `)' after array assignment");
2968 return 0;
2969 }
2970 set_incmdpos(true);
2971 isnull = false;
2972 assignments = true;
2973 }
2974 t if IS_REDIROP(t) => {
2975 // c:1900-1904 — `*cmplx = c = 1; nr += par_redir(&r,
2976 // NULL); continue;`. The wordcode-emitting redir is
2977 // distinct from the AST par_redir — it INSERTS
2978 // WCB_REDIR + fd + ecstrcode(name) at offset `r`
2979 // via ecispace, shifting any later words down.
2980 cmplx_set(true);
2981 let added = par_redir_wordcode(&mut r);
2982 if added == 0 {
2983 break;
2984 }
2985 nr += added;
2986 continue;
2987 }
2988 _ => break,
2989 }
2990 zshlex(); // c:1907 `zshlex();`
2991 }
2992
2993 // c:1920-1921 — `if (tok == AMPER || tok == AMPERBANG) YYERROR;`
2994 if tok() == AMPER || tok() == AMPERBANG {
2995 error("par_simple: unexpected &");
2996 return 0;
2997 }
2998
2999 // c:1923 — `p = ecadd(WCB_SIMPLE(0));`
3000 let mut p = ecadd(WCB_SIMPLE(0));
3001
3002 // c:1924-2105 — main words loop. is_typeset tracks whether the
3003 // outer command was `typeset`/`export`/etc. so the final
3004 // placeholder gets WCB_TYPESET instead of WCB_SIMPLE.
3005 let mut is_typeset = false;
3006 let mut postassigns: u32 = 0;
3007 let mut ppost: usize = 0;
3008 loop {
3009 match tok() {
3010 STRING_LEX | TYPESET => {
3011 // c:1928-1929 — `*cmplx = 1; incmdpos = 0;`
3012 cmplx_set(true);
3013 set_incmdpos(false);
3014 // c:1931-1932 — TYPESET → intypeset = is_typeset = 1.
3015 if tok() == TYPESET {
3016 set_intypeset(true);
3017 is_typeset = true;
3018 }
3019 let s = tokstr().unwrap_or_default();
3020 ecstr(&s);
3021 argc += 1;
3022 isnull = false;
3023 zshlex();
3024 }
3025 ENVSTRING => {
3026 // c:2005-2026 — mid-cmd ENVSTRING (under intypeset
3027 // context). Emits WCB_ASSIGN(SCALAR, NEW, 0) then
3028 // ecstr(name) + ecstr(value), tracking the first
3029 // postassign offset in `ppost` (which the trailing
3030 // WCB_TYPESET header points to).
3031 if postassigns == 0 {
3032 ppost = ecadd(0);
3033 }
3034 postassigns += 1;
3035 let raw = tokstr().unwrap_or_default();
3036 let bytes: Vec<char> = raw.chars().collect();
3037 let mut idx = 0usize;
3038 while idx < bytes.len() {
3039 let ch = bytes[idx];
3040 if ch == '\u{91}' /* Inbrack */ {
3041 let mut depth = 1;
3042 idx += 1;
3043 while idx < bytes.len() && depth > 0 {
3044 match bytes[idx] {
3045 '\u{91}' => depth += 1,
3046 '\u{92}' => depth -= 1,
3047 _ => {}
3048 }
3049 idx += 1;
3050 }
3051 continue;
3052 }
3053 if ch == '=' || ch == '+' || ch == '\u{8d}' /* Equals */ {
3054 break;
3055 }
3056 idx += 1;
3057 }
3058 let name: String = bytes[..idx].iter().collect();
3059 let str_off = if idx < bytes.len() && (bytes[idx] == '=' || bytes[idx] == '\u{8d}') {
3060 idx + 1
3061 } else {
3062 idx
3063 };
3064 let value: String = bytes[str_off..].iter().collect();
3065 ecadd(WCB_ASSIGN(WC_ASSIGN_SCALAR, WC_ASSIGN_NEW, 0));
3066 ecstr(&name);
3067 ecstr(&value);
3068 isnull = false;
3069 zshlex();
3070 }
3071 ENVARRAY => {
3072 // c:2027-2050 — mid-cmd ENVARRAY (typeset N=(…) form).
3073 // C tracks postassigns + ppost the same as ENVSTRING,
3074 // but the inner emit is WCB_ASSIGN(ARRAY, NEW, n)
3075 // with `n` patched in after par_nl_wordlist consumes
3076 // the elements. C also toggles intypeset=0 around the
3077 // wordlist so the lexer doesn't try to re-emit
3078 // assignments inside the array.
3079 cmplx_set(true);
3080 if postassigns == 0 {
3081 ppost = ecadd(0);
3082 }
3083 postassigns += 1;
3084 let parr = ecadd(0);
3085 let raw = tokstr().unwrap_or_default();
3086 let is_inc = raw.ends_with('+');
3087 let name = if is_inc { &raw[..raw.len() - 1] } else { raw.as_str() };
3088 let flag = if is_inc { WC_ASSIGN_INC } else { WC_ASSIGN_NEW };
3089 ecstr(name);
3090 cmdpush(CS_ARRAY as u8);
3091 set_intypeset(false);
3092 zshlex();
3093 let mut nelem = 0u32;
3094 while tok() == STRING_LEX {
3095 ecstr(&tokstr().unwrap_or_default());
3096 nelem += 1;
3097 zshlex();
3098 while tok() == NEWLIN {
3099 zshlex();
3100 }
3101 }
3102 ECBUF.with_borrow_mut(|b| {
3103 if parr < b.len() {
3104 b[parr] = WCB_ASSIGN(WC_ASSIGN_ARRAY, flag, nelem);
3105 }
3106 });
3107 cmdpop();
3108 set_intypeset(true);
3109 if tok() != OUTPAR_TOK {
3110 error("expected `)' after array assignment");
3111 return 0;
3112 }
3113 isnull = false;
3114 zshlex();
3115 }
3116 t if IS_REDIROP(t) => {
3117 // c:1999-2010 — `nrediradd = par_redir(&r, NULL);
3118 // p += nrediradd; if (ppost) ppost += nrediradd;
3119 // sr += nrediradd;`
3120 cmplx_set(true);
3121 let added = par_redir_wordcode(&mut r);
3122 if added == 0 {
3123 break;
3124 }
3125 p += added as usize;
3126 if ppost != 0 {
3127 ppost += added as usize;
3128 }
3129 sr += added;
3130 }
3131 INOUTPAR => {
3132 // c:2051-2168 — `name() { body }` funcdef detection.
3133 // C rewrites the SIMPLE placeholder at `p` into a
3134 // FUNCDEF header structure with multiple words:
3135 // p: WCB_FUNCDEF(total_offset)
3136 // p+1: argc (name count)
3137 // p+2..N: the names already ecstr'd above
3138 // N+1: 0 (placeholder)
3139 // N+2: 0 (placeholder)
3140 // N+3: 0 (placeholder)
3141 // N+4: 0 (placeholder)
3142 // N+5: WCB_END()
3143 // ...body wordcode...
3144 // ecbuf[p+argc+2] = so - oecssub; (string area)
3145 // ecbuf[p+argc+3] = ecsoffs - so;
3146 // ecbuf[p+argc+4] = ecnpats;
3147 // ecbuf[p+argc+5] = 0;
3148 //
3149 // This Rust port handles the common `name() { … }`
3150 // case (single name + brace body); anonymous funcdef
3151 // and short-body forms are stubbed for now.
3152 if !isset(MULTIFUNCDEF) && argc > 1 {
3153 error("par_simple: too many function names for funcdef");
3154 return 0;
3155 }
3156 if assignments || postassigns > 0 {
3157 error("par_simple: assignments before funcdef");
3158 return 0;
3159 }
3160 cmplx_set(true);
3161 set_incmdpos(true);
3162 cmdpush(CS_FUNCDEF as u8);
3163 zshlex();
3164 while tok() == SEPER {
3165 zshlex();
3166 }
3167 // c:2079 — `ecispace(p + 1, 1); ecbuf[p+1] = argc;
3168 // ecadd(0)*4`. Insert the argc word at p+1, then
3169 // append 4 placeholder words.
3170 ecispace(p + 1, 1);
3171 ECBUF.with_borrow_mut(|b| {
3172 if p + 1 < b.len() {
3173 b[p + 1] = argc;
3174 }
3175 });
3176 ecadd(0);
3177 ecadd(0);
3178 ecadd(0);
3179 ecadd(0);
3180 let so = ECSOFFS.get();
3181 let onp = ECNPATS.with(|c| c.get());
3182 ECNPATS.with(|c| c.set(0));
3183 ECNFUNC.set(ECNFUNC.get() + 1);
3184 let oecssub = ECSSUB.get();
3185 ECSSUB.set(so);
3186 if tok() == INBRACE_TOK {
3187 zshlex();
3188 par_list_wordcode();
3189 if tok() != OUTBRACE_TOK {
3190 cmdpop();
3191 error("par_simple: funcdef expected `}`");
3192 return 0;
3193 }
3194 if argc == 0 {
3195 // Anonymous funcdef.
3196 set_incmdpos(false);
3197 }
3198 zshlex();
3199 } else {
3200 // Short-body or non-brace form not yet ported.
3201 cmdpop();
3202 error("par_simple: funcdef expected `{`");
3203 return 0;
3204 }
3205 cmdpop();
3206 ecadd(WCB_END());
3207 let used = ECUSED.get() as usize;
3208 let header_off = used.saturating_sub(1 + p) as wordcode;
3209 let p_argc = (p + (argc as usize) + 2) as usize;
3210 let cur_so = ECSOFFS.get();
3211 let np_now = ECNPATS.with(|c| c.get());
3212 ECBUF.with_borrow_mut(|b| {
3213 if p_argc + 3 < b.len() {
3214 b[p_argc] = (so - oecssub) as wordcode;
3215 b[p_argc + 1] = (cur_so - so) as wordcode;
3216 b[p_argc + 2] = np_now as wordcode;
3217 b[p_argc + 3] = 0;
3218 }
3219 if p < b.len() {
3220 b[p] = WCB_FUNCDEF(header_off);
3221 }
3222 });
3223 ECNPATS.with(|c| c.set(onp));
3224 ECSSUB.set(oecssub);
3225 ECNFUNC.set(ECNFUNC.get() + 1);
3226 isnull = false;
3227 // Anonymous funcdef may have arguments — not ported
3228 // yet. Break out of the words loop; outer parser
3229 // handles whatever follows.
3230 break;
3231 }
3232 _ => break,
3233 }
3234 }
3235
3236 // c:2173-2176 — `if (isnull && !(sr + nr)) { ecused = oecused;
3237 // return 0; }` — undo everything including pre-cmd assignments
3238 // if no actual command word emerged.
3239 if isnull && sr + nr == 0 && !assignments {
3240 ECUSED.set(p as i32);
3241 return 0;
3242 }
3243 // c:2186-2187 — `incmdpos = 1; intypeset = 0;` — reset before
3244 // the placeholder patch so the next-token lex doesn't carry
3245 // typeset/incond state.
3246 set_incmdpos(true);
3247 set_intypeset(false);
3248 // c:2189-2199 — `if (!isfunc) { if (is_typeset) ecbuf[p] =
3249 // WCB_TYPESET(argc); else ecbuf[p] = WCB_SIMPLE(argc); }`.
3250 // The WCB_TYPESET header is followed by either a postassigns
3251 // count at `ppost` (when assignments were emitted) or a
3252 // trailing 0 word.
3253 let header = if is_typeset {
3254 if postassigns > 0 {
3255 ECBUF.with_borrow_mut(|b| {
3256 if ppost < b.len() {
3257 b[ppost] = postassigns;
3258 }
3259 });
3260 } else {
3261 ecadd(0);
3262 }
3263 WCB_TYPESET(argc)
3264 } else {
3265 WCB_SIMPLE(argc)
3266 };
3267 ECBUF.with_borrow_mut(|b| {
3268 if p < b.len() {
3269 b[p] = header;
3270 }
3271 });
3272 1 + sr
3273}
3274
3275/// Wrapper for the par_cmd dispatch sites that don't pass `nr`
3276/// (matches C's call shape at parse.c:1054 `par_simple(cmplx, nr)`).
3277pub fn par_simple_wordcode() {
3278 par_simple_wordcode_impl(0);
3279}
3280
3281/// Port of `par_redir(int *rp, char *idstring)` from
3282/// `Src/parse.c:2229-2345` — the wordcode-emitting variant that
3283/// pushes WCB_REDIR + fd + ecstrcode(name) into ECBUF. Distinct
3284/// from the AST `par_redir` (parse.rs:3771) which builds a
3285/// ZshRedir struct for the AST executor pipeline.
3286///
3287/// Returns the number of wordcodes added (3 for the basic shape,
3288/// 4 with idstring, 5 for HEREDOC[DASH] which carries the
3289/// terminator strings inline). Returns 0 on parse error.
3290fn par_redir_wordcode(rp: &mut usize) -> i32 {
3291 let cur = tok();
3292 let rtype: i32 = match cur {
3293 OUTANG_TOK => REDIR_WRITE,
3294 OUTANGBANG => REDIR_WRITENOW,
3295 DOUTANG => REDIR_APP,
3296 DOUTANGBANG => REDIR_APPNOW,
3297 INANG_TOK => REDIR_READ,
3298 INOUTANG => REDIR_READWRITE,
3299 DINANG => REDIR_HEREDOC,
3300 DINANGDASH => REDIR_HEREDOCDASH,
3301 TRINANG => REDIR_HERESTR,
3302 INANGAMP => REDIR_MERGEIN,
3303 OUTANGAMP => REDIR_MERGEOUT,
3304 AMPOUTANG => REDIR_ERRWRITE,
3305 OUTANGAMPBANG => REDIR_ERRWRITENOW,
3306 DOUTANGAMP => REDIR_ERRAPP,
3307 DOUTANGAMPBANG => REDIR_ERRAPPNOW,
3308 _ => return 0,
3309 };
3310 let fd1 = if tokfd() >= 0 {
3311 tokfd()
3312 } else if matches!(
3313 rtype,
3314 REDIR_READ
3315 | REDIR_READWRITE
3316 | REDIR_MERGEIN
3317 | REDIR_HEREDOC
3318 | REDIR_HEREDOCDASH
3319 | REDIR_HERESTR
3320 ) {
3321 0
3322 } else {
3323 1
3324 };
3325 // c:2234-2245 — save+force incmdpos=0 / nocorrect=1 (when not
3326 // INANG/INOUTANG) around the zshlex that consumes the target
3327 // word.
3328 let oldcmdpos = incmdpos();
3329 set_incmdpos(false);
3330 let oldnc = nocorrect();
3331 if cur != INANG_TOK && cur != INOUTANG {
3332 set_nocorrect(1);
3333 }
3334 zshlex();
3335 if tok() != STRING_LEX && tok() != ENVSTRING {
3336 set_incmdpos(oldcmdpos);
3337 set_nocorrect(oldnc);
3338 error("expected word after redirection");
3339 return 0;
3340 }
3341 let name = tokstr().unwrap_or_default();
3342 set_incmdpos(oldcmdpos);
3343 set_nocorrect(oldnc);
3344
3345 // c:2249-2300 — HEREDOC / HEREDOCDASH carry extra words (here
3346 // string + terminator + munged terminator). The C source
3347 // emits 5 words and registers a struct heredocs entry that
3348 // setheredoc patches later. Stub for now: emit the basic
3349 // 3-word shape so wordcode parity at least sees WC_REDIR.
3350 // TODO: full heredoc registration + 5-word emission.
3351 let _ = (REDIR_FROM_HEREDOC_MASK, REDIR_VARID_MASK);
3352
3353 // c:2302-2321 — proc-subst rewriting: detect `>(`/`<(` in the
3354 // target word's first 2 chars and rewrite REDIR_WRITE/READ to
3355 // REDIR_OUTPIPE/INPIPE. The detection compares the FIRST char
3356 // of the unmetafied tokstr against the marker bytes.
3357 let mut rtype = rtype;
3358 let nbytes: Vec<char> = name.chars().collect();
3359 let two = |i: usize| -> Option<(char, char)> {
3360 if i + 1 < nbytes.len() {
3361 Some((nbytes[i], nbytes[i + 1]))
3362 } else {
3363 None
3364 }
3365 };
3366 if let Some((c0, c1)) = two(0) {
3367 match rtype {
3368 x if x == REDIR_WRITE || x == REDIR_WRITENOW => {
3369 if c0 == '\u{96}' /* OutangProc */ && c1 == '\u{88}' /* Inpar */ {
3370 rtype = REDIR_OUTPIPE;
3371 } else if c0 == '\u{94}' /* Inang */ && c1 == '\u{88}' {
3372 error("invalid redirection: < before >");
3373 return 0;
3374 }
3375 }
3376 x if x == REDIR_READ => {
3377 if c0 == '\u{94}' && c1 == '\u{88}' {
3378 rtype = REDIR_INPIPE;
3379 } else if c0 == '\u{96}' && c1 == '\u{88}' {
3380 error("invalid redirection: > before <");
3381 return 0;
3382 }
3383 }
3384 x if x == REDIR_READWRITE => {
3385 if c0 == '\u{94}' && c1 == '\u{88}' {
3386 rtype = REDIR_INPIPE;
3387 } else if c0 == '\u{96}' && c1 == '\u{88}' {
3388 rtype = REDIR_OUTPIPE;
3389 }
3390 }
3391 _ => {}
3392 }
3393 }
3394 zshlex();
3395
3396 // c:2326-2333 — emit WCB_REDIR + fd + ecstrcode(name) at the
3397 // CALLER's `r` cursor (NOT at ecused). ecispace shifts later
3398 // words DOWN to make space; the caller bumps its `p` (SIMPLE
3399 // placeholder offset) to compensate. 3-word basic shape;
3400 // idstring (`{var}>file`) form not yet wired here.
3401 let ncodes: usize = 3;
3402 let r = *rp;
3403 ecispace(r, ncodes);
3404 let coded = ecstrcode(&name);
3405 ECBUF.with_borrow_mut(|b| {
3406 if r + 2 < b.len() {
3407 b[r] = WCB_REDIR(rtype as wordcode);
3408 b[r + 1] = fd1 as wordcode;
3409 b[r + 2] = coded;
3410 }
3411 });
3412 *rp += ncodes; // c:2280 `*rp = r + ncodes;`
3413 ncodes as i32
3414}
3415
3416/// Parse a program (list of lists)
3417/// Parse a complete program (top-level entry). Calls
3418/// parse_program_until with no end-token sentinel. Direct port of
3419/// zsh/Src/parse.c:614-720 `parse_event` / `par_list` /
3420/// `par_event` flow. C distinguishes COND_EVENT (single command
3421/// for here-string) from full event parse; zshrs's parse_program
3422/// is the full-event entry.
3423fn parse_program() -> ZshProgram {
3424 parse_program_until(None)
3425}
3426
3427/// Parse a program until we hit an end token
3428/// Parse a program until one of `end_tokens` is seen (or EOF).
3429/// Drives par_list in a loop. C equivalent: the body of par_event
3430/// (parse.c:635-695) iterating par_list against the lexer.
3431fn parse_program_until(end_tokens: Option<&[lextok]>) -> ZshProgram {
3432 let mut lists = Vec::new();
3433
3434 loop {
3435 if check_limit() {
3436 error("parser exceeded global iteration limit");
3437 break;
3438 }
3439
3440 // Skip separators
3441 while tok() == SEPER || tok() == NEWLIN {
3442 if check_limit() {
3443 error("parser exceeded global iteration limit");
3444 return ZshProgram { lists };
3445 }
3446 zshlex();
3447 }
3448
3449 if tok() == ENDINPUT || tok() == LEXERR {
3450 break;
3451 }
3452
3453 // Check for end tokens
3454 if let Some(end_toks) = end_tokens {
3455 if end_toks.contains(&tok()) {
3456 break;
3457 }
3458 }
3459
3460 // Also stop at these tokens when not explicitly looking for them
3461 // Note: Else/Elif/Then are NOT here - they're handled by par_if
3462 // to allow nested if statements inside case arms, loops, etc.
3463 match tok() {
3464 OUTBRACE_TOK | DSEMI | SEMIAMP | SEMIBAR | DONE | FI | ESAC | ZEND => break,
3465 _ => {}
3466 }
3467
3468 match par_list() {
3469 Some(list) => {
3470 let detected = simple_name_with_inoutpar(&list);
3471 lists.push(list);
3472 // Synthesize a FuncDef for the `name() { body }` shape
3473 // at parse time so body_source is captured while the
3474 // lexer still has the input. The lexer port emits
3475 // `name(` as a single Word ending in `<Inpar><Outpar>`,
3476 // so the Simple list is followed by an Inbrace once
3477 // separators are skipped. For `name() cmd args` the
3478 // body has already been swallowed into the same
3479 // Simple's words tail — synthesize directly from there.
3480 if let Some((names, body_argv)) = detected {
3481 if !body_argv.is_empty() {
3482 // One-line body already in the Simple. Build
3483 // a Simple from body_argv as the function body.
3484 lists.pop();
3485 let body_simple = ZshCommand::Simple(ZshSimple {
3486 assigns: Vec::new(),
3487 words: body_argv,
3488 redirs: Vec::new(),
3489 });
3490 let body_list = ZshList {
3491 sublist: ZshSublist {
3492 pipe: ZshPipe {
3493 cmd: body_simple,
3494 next: None,
3495 lineno: lineno(),
3496 merge_stderr: false,
3497 },
3498 next: None,
3499 flags: SublistFlags::default(),
3500 },
3501 flags: ListFlags::default(),
3502 };
3503 let funcdef = ZshCommand::FuncDef(ZshFuncDef {
3504 names,
3505 body: Box::new(ZshProgram {
3506 lists: vec![body_list],
3507 }),
3508 tracing: false,
3509 auto_call_args: None,
3510 body_source: None,
3511 });
3512 let synthetic = ZshList {
3513 sublist: ZshSublist {
3514 pipe: ZshPipe {
3515 cmd: funcdef,
3516 next: None,
3517 lineno: lineno(),
3518 merge_stderr: false,
3519 },
3520 next: None,
3521 flags: SublistFlags::default(),
3522 },
3523 flags: ListFlags::default(),
3524 };
3525 lists.push(synthetic);
3526 continue;
3527 }
3528 // Else: words.len() == 1 (only the trailing `name()`
3529 // word), brace body follows. `names` may carry
3530 // multiple identifiers from the `fna fnb fnc()`
3531 // shorthand — all share the same brace body per
3532 // src/zsh/Src/parse.c:1666 par_funcdef wordlist.
3533 // Skip separators on the real lexer; safe because
3534 // parse_program's next iteration would also skip them.
3535 while tok() == SEPER || tok() == NEWLIN {
3536 zshlex();
3537 }
3538 if tok() == INBRACE_TOK {
3539 // Capture body_start BEFORE the lexer
3540 // advances past the first body token. The
3541 // outer zshlex() consumed `{`; lexer.pos
3542 // is now right after `{`. The next
3543 // `zshlex()` would advance past `echo`,
3544 // making body_start land mid-body and
3545 // lose the first word — `typeset -f f`
3546 // printed `a; echo b` instead of
3547 // `echo a; echo b` for `f() { echo a;
3548 // echo b }`.
3549 let body_start = pos();
3550 zshlex();
3551 let body = parse_program();
3552 let body_end = if tok() == OUTBRACE_TOK {
3553 pos().saturating_sub(1)
3554 } else {
3555 pos()
3556 };
3557 let body_source = input_slice(body_start, body_end)
3558 .map(|s| s.trim().to_string())
3559 .filter(|s| !s.is_empty());
3560 if tok() == OUTBRACE_TOK {
3561 zshlex();
3562 }
3563 // Replace the Simple list with a FuncDef list.
3564 lists.pop();
3565 let funcdef = ZshCommand::FuncDef(ZshFuncDef {
3566 names,
3567 body: Box::new(body),
3568 tracing: false,
3569 auto_call_args: None,
3570 body_source,
3571 });
3572 let synthetic = ZshList {
3573 sublist: ZshSublist {
3574 pipe: ZshPipe {
3575 cmd: funcdef,
3576 next: None,
3577 lineno: lineno(),
3578 merge_stderr: false,
3579 },
3580 next: None,
3581 flags: SublistFlags::default(),
3582 },
3583 flags: ListFlags::default(),
3584 };
3585 lists.push(synthetic);
3586 } else if !matches!(tok(), ENDINPUT | OUTBRACE_TOK | SEPER | NEWLIN) {
3587 // No-brace one-line body: `foo() echo hello`.
3588 // Parse a single command for the body.
3589 let body_cmd = par_cmd();
3590 if let Some(cmd) = body_cmd {
3591 let body_list = ZshList {
3592 sublist: ZshSublist {
3593 pipe: ZshPipe {
3594 cmd,
3595 next: None,
3596 lineno: lineno(),
3597 merge_stderr: false,
3598 },
3599 next: None,
3600 flags: SublistFlags::default(),
3601 },
3602 flags: ListFlags::default(),
3603 };
3604 lists.pop();
3605 let funcdef = ZshCommand::FuncDef(ZshFuncDef {
3606 names: names.clone(),
3607 body: Box::new(ZshProgram {
3608 lists: vec![body_list],
3609 }),
3610 tracing: false,
3611 auto_call_args: None,
3612 body_source: None,
3613 });
3614 let synthetic = ZshList {
3615 sublist: ZshSublist {
3616 pipe: ZshPipe {
3617 cmd: funcdef,
3618 next: None,
3619 lineno: lineno(),
3620 merge_stderr: false,
3621 },
3622 next: None,
3623 flags: SublistFlags::default(),
3624 },
3625 flags: ListFlags::default(),
3626 };
3627 lists.push(synthetic);
3628 }
3629 }
3630 }
3631 }
3632 None => break,
3633 }
3634 }
3635
3636 ZshProgram { lists }
3637}
3638
3639/// Parse a list (sublist with optional & or ;).
3640///
3641/// Direct port of zsh/Src/parse.c:771-804 `par_list` (and the
3642/// par_list1 wrapper at parse.c:807-817).
3643///
3644/// **Structural divergence**: zsh's parse.c emits flat wordcode
3645/// into the `ecbuf` u32 array via `ecadd(0)` (placeholder),
3646/// `set_list_code(p, code, complexity)`, `wc_bdata(Z_END)`. zshrs
3647/// builds an AST node `ZshList { sublist, flags }` instead. The
3648/// async/sync/disown discrimination at parse.c:785-790 maps to
3649/// zshrs's `ListFlags { async_, disown }` field — Z_SYNC is the
3650/// default (no flags), Z_ASYNC = `&` = `async_=true`, Z_DISOWN +
3651/// Z_ASYNC = `&!`/`&|` = both true. Same semantics, different
3652/// representation. This divergence is repository-wide: every
3653/// `par_*` function emits wordcode in C, every `parse_*` builds
3654/// AST in Rust. The compile_zsh module then traverses the AST to
3655/// emit fusevm bytecode, which serves the same role as zsh's
3656/// wordcode but with a different opcode set and execution model.
3657fn par_list() -> Option<ZshList> {
3658 let sublist = par_sublist()?;
3659
3660 let flags = match tok() {
3661 AMPER => {
3662 zshlex();
3663 ListFlags {
3664 async_: true,
3665 disown: false,
3666 }
3667 }
3668 AMPERBANG => {
3669 zshlex();
3670 ListFlags {
3671 async_: true,
3672 disown: true,
3673 }
3674 }
3675 SEPER | SEMI | NEWLIN => {
3676 zshlex();
3677 ListFlags::default()
3678 }
3679 _ => ListFlags::default(),
3680 };
3681
3682 Some(ZshList { sublist, flags })
3683}
3684
3685/// Parse a sublist (pipelines connected by && or ||).
3686///
3687/// Direct port of zsh/Src/parse.c:825 `par_sublist` and
3688/// par_sublist2 at parse.c:869-892. par_sublist handles the
3689/// && / || conjunction and emits WC_SUBLIST opcodes; par_sublist2
3690/// handles the leading `!` negation and `coproc` keyword.
3691///
3692/// AST mapping: ZshSublist { pipe, conj_chain }, where `conj_chain`
3693/// is a Vec<(ConjOp, ZshSublist)> for chained && / ||. C uses
3694/// flat wordcode with WC_SUBLIST_AND / WC_SUBLIST_OR markers.
3695fn par_sublist() -> Option<ZshSublist> {
3696 PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get() + 1);
3697 if check_recursion() {
3698 error("par_sublist: max recursion depth exceeded");
3699 PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get().saturating_sub(1));
3700 return None;
3701 }
3702
3703 let mut flags = SublistFlags::default();
3704
3705 // Handle coproc and !
3706 if tok() == COPROC {
3707 flags.coproc = true;
3708 zshlex();
3709 } else if tok() == BANG_TOK {
3710 flags.not = true;
3711 zshlex();
3712 }
3713
3714 let pipe = match par_pline() {
3715 Some(p) => p,
3716 None => {
3717 PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get().saturating_sub(1));
3718 return None;
3719 }
3720 };
3721
3722 // Check for && or ||
3723 let next = match tok() {
3724 DAMPER => {
3725 zshlex();
3726 skip_separators();
3727 par_sublist().map(|s| (SublistOp::And, Box::new(s)))
3728 }
3729 DBAR => {
3730 zshlex();
3731 skip_separators();
3732 par_sublist().map(|s| (SublistOp::Or, Box::new(s)))
3733 }
3734 _ => None,
3735 };
3736
3737 PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get().saturating_sub(1));
3738 Some(ZshSublist { pipe, next, flags })
3739}
3740
3741/// Parse a pipeline
3742/// Parse a pipeline (cmds joined by `|` / `|&`). Direct port of
3743/// zsh/Src/parse.c:894 `par_pline`. AST: ZshPipe { cmds: Vec<ZshCommand> }.
3744/// C emits WC_PIPE wordcodes per command; same flow.
3745fn par_pline() -> Option<ZshPipe> {
3746 PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get() + 1);
3747 if check_recursion() {
3748 error("par_pline: max recursion depth exceeded");
3749 PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get().saturating_sub(1));
3750 return None;
3751 }
3752
3753 let lineno = toklineno();
3754 let cmd = match par_cmd() {
3755 Some(c) => c,
3756 None => {
3757 PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get().saturating_sub(1));
3758 return None;
3759 }
3760 };
3761
3762 // Check for | or |&
3763 let mut merge_stderr = false;
3764 let next = match tok() {
3765 BAR_TOK | BARAMP => {
3766 merge_stderr = tok() == BARAMP;
3767 zshlex();
3768 skip_separators();
3769 par_pline().map(Box::new)
3770 }
3771 _ => None,
3772 };
3773
3774 PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get().saturating_sub(1));
3775 Some(ZshPipe {
3776 cmd,
3777 next,
3778 lineno,
3779 merge_stderr,
3780 })
3781}
3782
3783/// Parse a command
3784/// Parse a command — dispatches by leading token (FOR / CASE /
3785/// IF / WHILE / UNTIL / REPEAT / FUNC / DINBRACK / DINPAR /
3786/// Inpar subshell / Inbrace current-shell / TIME / NOCORRECT,
3787/// else simple). Direct port of zsh/Src/parse.c:958 `par_cmd`.
3788fn par_cmd() -> Option<ZshCommand> {
3789 // Parse leading redirections
3790 let mut redirs = Vec::new();
3791 while IS_REDIROP(tok()) {
3792 if let Some(redir) = par_redir() {
3793 redirs.push(redir);
3794 }
3795 }
3796
3797 let cmd = match tok() {
3798 FOR | FOREACH => par_for(),
3799 SELECT => parse_select(),
3800 CASE => par_case(),
3801 IF => par_if(),
3802 WHILE => par_while(false),
3803 UNTIL => par_while(true),
3804 REPEAT => par_repeat(),
3805 INPAR_TOK => par_subsh(),
3806 INOUTPAR => parse_anon_funcdef(),
3807 INBRACE_TOK => parse_cursh(),
3808 FUNC => par_funcdef(),
3809 DINBRACK => par_cond(),
3810 DINPAR => parse_arith(),
3811 TIME => par_time(),
3812 _ => par_simple(redirs),
3813 };
3814
3815 // Parse trailing redirections. For Simple commands the redirs were
3816 // already captured inside par_simple; for compound forms (Cursh,
3817 // Subsh, If, While, etc.) we collect them here and wrap in
3818 // ZshCommand::Redirected so compile_zsh can scope-bracket them.
3819 if let Some(inner) = cmd {
3820 let mut trailing: Vec<ZshRedir> = Vec::new();
3821 while IS_REDIROP(tok()) {
3822 if let Some(redir) = par_redir() {
3823 trailing.push(redir);
3824 }
3825 }
3826 // c:1072-1075 — every par_cmd tail resets the lexer state
3827 // toggles so the NEXT command starts in cmd position with
3828 // case/cond/typeset off. par_simple/par_cond set `incmdpos=0`
3829 // during their bodies; without this reset the next iteration
3830 // of the outer par_list loop sees `if` / `done` / `select`
3831 // etc. as plain strings and the AST collapses.
3832 set_incmdpos(true);
3833 set_incasepat(0);
3834 set_incond(0);
3835 set_intypeset(false);
3836 if trailing.is_empty() {
3837 return Some(inner);
3838 }
3839 // Simple already absorbed its own redirs (compile path expects
3840 // them on ZshSimple), so don't double-wrap.
3841 if matches!(inner, ZshCommand::Simple(_)) {
3842 if let ZshCommand::Simple(mut s) = inner {
3843 s.redirs.extend(trailing);
3844 return Some(ZshCommand::Simple(s));
3845 }
3846 unreachable!()
3847 }
3848 return Some(ZshCommand::Redirected(Box::new(inner), trailing));
3849 }
3850 // Same reset on the empty-cmd branch (mirror c:1072 unconditional
3851 // path — the C function only returns 0 above when the dispatch
3852 // produced no command, and falls through to the reset block).
3853 set_incmdpos(true);
3854 set_incasepat(0);
3855 set_incond(0);
3856 set_intypeset(false);
3857
3858 None
3859}
3860
3861/// Parse a simple command
3862/// Parse a simple command (assignments + words + redirections).
3863/// Direct port of zsh/Src/parse.c:1836 `par_simple` —
3864/// the largest single function in parse.c. Handles ENVSTRING/
3865/// ENVARRAY assignments at command head, intermixed redirs,
3866/// typeset-style multi-assignment commands, and the trailing
3867/// inout-par `()` that converts a simple command into an inline
3868/// function definition.
3869fn par_simple(mut redirs: Vec<ZshRedir>) -> Option<ZshCommand> {
3870 let mut assigns = Vec::new();
3871 let mut words = Vec::new();
3872 const MAX_ITERATIONS: usize = 10_000;
3873 let mut iterations = 0;
3874
3875 // c:1934 — `if (!isset(IGNOREBRACES) && *tokstr == Inbrace) { ... }`
3876 // gates the `{var}>file` brace-FD recognition (a non-POSIX zsh
3877 // extension that lets `{varname}>file` redirect into the named
3878 // shell variable). zshrs's parser doesn't recognise the brace-FD
3879 // shape yet, so the gate is wired here as a marker — when the
3880 // {var}-FD feature lands, swap this `false` for the actual
3881 // `tokstr starts with Inbrace` test and route into a {var}>file
3882 // redir builder.
3883 let saw_brace_fd_candidate = false;
3884 if !isset(IGNOREBRACES) && saw_brace_fd_candidate {
3885 // TODO: {var}>file FD recognition (par_simple body at c:1934-2000).
3886 }
3887
3888 // Parse leading assignments
3889 while tok() == ENVSTRING || tok() == ENVARRAY {
3890 iterations += 1;
3891 if iterations > MAX_ITERATIONS {
3892 error("par_simple: exceeded max iterations in assignments");
3893 return None;
3894 }
3895 if let Some(assign) = parse_assign() {
3896 assigns.push(assign);
3897 }
3898 zshlex();
3899 }
3900
3901 // Parse words and redirections
3902 loop {
3903 iterations += 1;
3904 if iterations > MAX_ITERATIONS {
3905 error("par_simple: exceeded max iterations");
3906 return None;
3907 }
3908 match tok() {
3909 ENVSTRING | ENVARRAY => {
3910 // Mid-command assignment-shape arg under typeset
3911 // / declare / local / etc. (intypeset gates the
3912 // lexer to emit Envstring/Envarray for `name=val`
3913 // and `name=()` past the command name). Parse the
3914 // assignment, then emit a synthetic word
3915 // `NAME=value` (scalar) or `NAME=( … )` (array)
3916 // string so typeset's builtin arg list sees the
3917 // assignment-shape arg. Avoids the inline-env
3918 // scope path that mistakenly treats it like a
3919 // pre-cmd `X=Y cmd` assignment.
3920 if let Some(assign) = parse_assign() {
3921 let synthetic = match &assign.value {
3922 ZshAssignValue::Scalar(v) => format!("{}={}", assign.name, v),
3923 ZshAssignValue::Array(elems) => {
3924 format!("{}=({})", assign.name, elems.join(" "))
3925 }
3926 };
3927 words.push(synthetic);
3928 }
3929 zshlex();
3930 }
3931 STRING_LEX | TYPESET => {
3932 let s = tokstr();
3933 if let Some(s) = s {
3934 words.push(s);
3935 }
3936 // c:1929 — `incmdpos = 0;` so the next zshlex() does
3937 // not re-promote `{`/`[[`/reserved words at the
3938 // continuation position. Without this, `echo {a,b}`
3939 // re-lexes `{` as INBRACE_TOK (current-shell block)
3940 // and the brace expansion never reaches par_simple.
3941 set_incmdpos(false);
3942 // c:1931-1932 — `if (tok == TYPESET) intypeset = is_typeset = 1;`
3943 // Multi-assign `typeset a=1 b=2` relies on the lexer
3944 // re-emitting `b=2` as ENVSTRING; that path is gated
3945 // on `intypeset`. Without this, follow-on assignment
3946 // words arrive as STRING and the typeset builtin's
3947 // multi-assign form silently degrades.
3948 if tok() == TYPESET {
3949 set_intypeset(true);
3950 }
3951 zshlex();
3952 // Check for function definition foo() { ... }
3953 if words.len() == 1 && peek_inoutpar() {
3954 return parse_inline_funcdef(words.pop().unwrap());
3955 }
3956 // `{name}>file` named-fd redirect: the lexer doesn't
3957 // recognize this shape, so the bare word `{name}`
3958 // arrives as a String. If it matches `{IDENT}` and
3959 // the NEXT token is a redirop, pop it off as the
3960 // varid for that redir.
3961 if !words.is_empty() && IS_REDIROP(tok()) {
3962 let last = words.last().unwrap();
3963 let untoked = super::lex::untokenize(last);
3964 if untoked.starts_with('{') && untoked.ends_with('}') && untoked.len() > 2 {
3965 let name = &untoked[1..untoked.len() - 1];
3966 if !name.is_empty()
3967 && name.chars().all(|c| c == '_' || c.is_ascii_alphanumeric())
3968 && name
3969 .chars()
3970 .next()
3971 .map(|c| c == '_' || c.is_ascii_alphabetic())
3972 .unwrap_or(false)
3973 {
3974 let varid = name.to_string();
3975 words.pop();
3976 if let Some(mut redir) = par_redir() {
3977 redir.varid = Some(varid);
3978 redirs.push(redir);
3979 }
3980 continue;
3981 }
3982 }
3983 }
3984 }
3985 _ if IS_REDIROP(tok()) => {
3986 match par_redir() {
3987 Some(redir) => redirs.push(redir),
3988 None => break, // Error in redir parsing, stop
3989 }
3990 }
3991 INOUTPAR if !words.is_empty() => {
3992 // c:2055-2057 — `if (!isset(MULTIFUNCDEF) && argc > 1)
3993 // YYERROR(oecused);` — multi-name funcdef gate:
3994 // `f1 f2() { ... }` defines f1 AND f2 to the same
3995 // body, but only when MULTIFUNCDEF is set.
3996 if !isset(MULTIFUNCDEF) && words.len() > 1 {
3997 error(
3998 "parse error: multiple names in function definition without MULTIFUNCDEF",
3999 );
4000 return None;
4001 }
4002 // c:2061-2068 — `if (isset(EXECOPT) && hasalias &&
4003 // !isset(ALIASFUNCDEF) && argc && hasalias !=
4004 // input_hasalias()) { zwarn(...); YYERROR(...); }`
4005 // Alias-as-funcdef warning. zshrs's parser doesn't
4006 // track `hasalias` (alias-expansion provenance
4007 // during parse) yet, so `had_alias` stays false —
4008 // the gate is wired here as a marker so the canonical
4009 // C predicate is visible. Once alias-provenance lands,
4010 // swap `false` for the actual provenance compare.
4011 let had_alias = false;
4012 if isset(EXECOPT) && had_alias && !isset(ALIASFUNCDEF) && !words.is_empty() {
4013 crate::ported::utils::zwarn("defining function based on alias `(unknown)'");
4014 return None;
4015 }
4016 // foo() { ... } style function
4017 return parse_inline_funcdef(words.pop().unwrap());
4018 }
4019 _ => break,
4020 }
4021 }
4022
4023 if assigns.is_empty() && words.is_empty() && redirs.is_empty() {
4024 return None;
4025 }
4026
4027 Some(ZshCommand::Simple(ZshSimple {
4028 assigns,
4029 words,
4030 redirs,
4031 }))
4032}
4033
4034/// Parse an assignment
4035/// Parse an assignment word `NAME=value` or `NAME=(arr items)`.
4036/// Sub-routine of par_simple. The C source handles assignments
4037/// inline in par_simple via the ENVSTRING/ENVARRAY token paths
4038/// (parse.c:1842-2000ish); zshrs splits it out to a dedicated
4039/// helper for clarity.
4040fn parse_assign() -> Option<ZshAssign> {
4041 // Helper: locate the Equals-marker that delimits NAME from
4042 // VALUE in an assignment-shaped tokstr. The lexer META-encodes
4043 // EVERY `=` (including those inside `${var%%=foo}` strip
4044 // patterns or `[idx]=...` subscripts), so a naive
4045 // `tokstr.find(Equals)` would split at the first inner `=`
4046 // and break the whole assignment. Walk the string skipping
4047 // brace and bracket depth so the assignment's `=` (the one
4048 // after the last `]` of the LHS subscript / or after the
4049 // bare name) is the one we land on.
4050 fn find_assign_equals(s: &str) -> Option<usize> {
4051 let target = crate::ported::zsh_h::Equals;
4052 let mut brace = 0i32;
4053 let mut bracket = 0i32;
4054 let mut paren = 0i32;
4055 for (i, c) in s.char_indices() {
4056 match c {
4057 '{' | '\u{8f}' /* Inbrace */ => brace += 1,
4058 '}' | '\u{90}' /* Outbrace */ => {
4059 if brace > 0 {
4060 brace -= 1;
4061 }
4062 }
4063 '[' | '\u{91}' /* Inbrack */ => bracket += 1,
4064 ']' | '\u{92}' /* Outbrack */ => {
4065 if bracket > 0 {
4066 bracket -= 1;
4067 }
4068 }
4069 '(' | '\u{88}' /* Inpar */ => paren += 1,
4070 ')' | '\u{8a}' /* Outpar */ => {
4071 if paren > 0 {
4072 paren -= 1;
4073 }
4074 }
4075 _ if c == target && brace == 0 && bracket == 0 && paren == 0 => {
4076 return Some(i);
4077 }
4078 _ => {}
4079 }
4080 }
4081 None
4082 }
4083
4084 let _ts_tokstr = tokstr()?;
4085 let tokstr = _ts_tokstr.as_str();
4086
4087 // Parse name=value or name+=value.
4088 let (name, value_str, append) = if tok() == ENVARRAY {
4089 let (name, append) = if let Some(stripped) = tokstr.strip_suffix('+') {
4090 (stripped, true)
4091 } else {
4092 (tokstr, false)
4093 };
4094 (name.to_string(), String::new(), append)
4095 } else if let Some(pos) = find_assign_equals(tokstr) {
4096 let name_part = &tokstr[..pos];
4097 let (name, append) = if let Some(stripped) = name_part.strip_suffix('+') {
4098 (stripped, true)
4099 } else {
4100 (name_part, false)
4101 };
4102 (
4103 name.to_string(),
4104 tokstr[pos + Equals.len_utf8()..].to_string(),
4105 append,
4106 )
4107 } else if let Some(pos) = tokstr.find('=') {
4108 // Fallback to literal '=' for compatibility
4109 let name_part = &tokstr[..pos];
4110 let (name, append) = if let Some(stripped) = name_part.strip_suffix('+') {
4111 (stripped, true)
4112 } else {
4113 (name_part, false)
4114 };
4115 (name.to_string(), tokstr[pos + 1..].to_string(), append)
4116 } else {
4117 return None;
4118 };
4119
4120 let value = if tok() == ENVARRAY {
4121 // Array assignment: name=(...)
4122 let mut elements = Vec::new();
4123 zshlex(); // skip past token
4124
4125 let mut arr_iters = 0;
4126 const MAX_ARRAY_ELEMENTS: usize = 10_000;
4127 while matches!(tok(), STRING_LEX | SEPER | NEWLIN) {
4128 arr_iters += 1;
4129 if arr_iters > MAX_ARRAY_ELEMENTS {
4130 error("array assignment exceeded maximum elements");
4131 break;
4132 }
4133 if tok() == STRING_LEX {
4134 let _ts_s = crate::ported::lex::tokstr();
4135 if let Some(s) = _ts_s.as_deref() {
4136 elements.push(s.to_string());
4137 }
4138 }
4139 zshlex();
4140 }
4141
4142 // The closing Outpar is consumed here. The outer par_simple
4143 // loop will then `zshlex()` past whatever follows (typically
4144 // a separator or the next word) — calling zshlex twice in
4145 // tandem (here AND in par_simple) over-advances and merges
4146 // a following `name() { … }` funcdef into the same Simple.
4147 // We only consume Outpar; let the caller handle the rest.
4148 // Without this guard `g=(o1); f() { :; }` parsed as one
4149 // Simple with assigns=[g] and words=["f()"] (one token).
4150 if tok() == OUTPAR_TOK {
4151 // Note: do NOT zshlex() here. par_simple's `lexer
4152 // .zshlex()` after `parse_assign` returns advances past
4153 // the Outpar onto the next significant token.
4154 //
4155 // Force `incmdpos=true` so the next zshlex() recognizes
4156 // a follow-up `b=(...)` / `b=val` as Envarray/Envstring.
4157 // The lexer flips incmdpos to false on bare Outpar (which
4158 // is correct for subshell-close context), but for an
4159 // array-assignment close more assigns/words may follow.
4160 set_incmdpos(true);
4161 }
4162
4163 ZshAssignValue::Array(elements)
4164 } else {
4165 ZshAssignValue::Scalar(value_str)
4166 };
4167
4168 Some(ZshAssign {
4169 name,
4170 value,
4171 append,
4172 })
4173}
4174
4175/// Parse a redirection
4176/// Parse a redirection (>file, <file, >>file, <<HEREDOC, etc.).
4177/// Direct port of zsh/Src/parse.c:2229 `par_redir`. Returns
4178/// a ZshRedir node carrying the operator type, fd, target word
4179/// (or here-doc body / pipe-redir command), and any `{var}` style
4180/// fd-binding parameter.
4181fn par_redir() -> Option<ZshRedir> {
4182 let rtype = match tok() {
4183 OUTANG_TOK => REDIR_WRITE,
4184 OUTANGBANG => REDIR_WRITENOW,
4185 DOUTANG => REDIR_APP,
4186 DOUTANGBANG => REDIR_APPNOW,
4187 INANG_TOK => REDIR_READ,
4188 INOUTANG => REDIR_READWRITE,
4189 DINANG => REDIR_HEREDOC,
4190 DINANGDASH => REDIR_HEREDOCDASH,
4191 TRINANG => REDIR_HERESTR,
4192 INANGAMP => REDIR_MERGEIN,
4193 OUTANGAMP => REDIR_MERGEOUT,
4194 AMPOUTANG => REDIR_ERRWRITE,
4195 OUTANGAMPBANG => REDIR_ERRWRITENOW,
4196 DOUTANGAMP => REDIR_ERRAPP,
4197 DOUTANGAMPBANG => REDIR_ERRAPPNOW,
4198 _ => return None,
4199 };
4200
4201 let fd = if tokfd() >= 0 {
4202 tokfd()
4203 } else if matches!(
4204 rtype,
4205 REDIR_READ
4206 | REDIR_READWRITE
4207 | REDIR_MERGEIN
4208 | REDIR_HEREDOC
4209 | REDIR_HEREDOCDASH
4210 | REDIR_HERESTR
4211 ) {
4212 0
4213 } else {
4214 1
4215 };
4216
4217 // c:2234-2245 — save/restore incmdpos and nocorrect around the
4218 // zshlex that consumes the redir target word:
4219 // oldcmdpos = incmdpos; incmdpos = 0;
4220 // oldnc = nocorrect;
4221 // if (tok != INANG && tok != INOUTANG) nocorrect = 1;
4222 // ... zshlex; check tok; ...
4223 // incmdpos = oldcmdpos; nocorrect = oldnc;
4224 // Without this, a redir target lexes in the parent's incmdpos
4225 // (re-promoting `{` / reswords) AND with parent nocorrect (so
4226 // spelling-correction wrongly runs inside `> $(cmd)` etc.).
4227 let oldcmdpos = incmdpos();
4228 set_incmdpos(false);
4229 let oldnc = nocorrect();
4230 let cur = tok();
4231 if cur != INANG_TOK && cur != INOUTANG {
4232 set_nocorrect(1);
4233 }
4234 zshlex();
4235
4236 let name = match tok() {
4237 STRING_LEX | ENVSTRING => {
4238 let n = tokstr().unwrap_or_default();
4239 // Restore BEFORE the next zshlex so trailing tokens lex
4240 // in the original parent context (mirrors C ordering at
4241 // parse.c:2244-2245 — restore right after the word is
4242 // confirmed, before any downstream advance).
4243 set_incmdpos(oldcmdpos);
4244 set_nocorrect(oldnc);
4245 zshlex();
4246 n
4247 }
4248 _ => {
4249 set_incmdpos(oldcmdpos);
4250 set_nocorrect(oldnc);
4251 error("expected word after redirection");
4252 return None;
4253 }
4254 };
4255
4256 // Heredoc terminator capture. C parse.c:2254-2317 par_redir builds
4257 // a `struct heredocs` entry here for REDIR_HEREDOC[DASH]; zshrs
4258 // pushes a HereDoc onto heredocs[] for process_heredocs (called
4259 // by zshlex on the next NEWLIN) to fill in. Quoted terminators
4260 // (`<<'EOF'` / `<<"EOF"` / `<<\EOF`) disable expansion in the
4261 // body — Snull `\u{9d}` marks single-quote, Dnull `\u{9e}` marks
4262 // double-quote, Bnull `\u{9f}` marks any backslash-escaped char.
4263 let heredoc_idx = if matches!(rtype, REDIR_HEREDOC | REDIR_HEREDOCDASH) {
4264 let strip_tabs = rtype == REDIR_HEREDOCDASH;
4265 let quoted = name.contains('\u{9d}')
4266 || name.contains('\u{9e}')
4267 || name.contains('\u{9f}')
4268 || name.starts_with('\'')
4269 || name.starts_with('"');
4270 let term = name
4271 .chars()
4272 .filter(|c| {
4273 *c != '\'' && *c != '"' && *c != '\u{9d}' && *c != '\u{9e}' && *c != '\u{9f}'
4274 })
4275 .collect::<String>();
4276 crate::ported::lex::heredocs_push(crate::ported::lex::HereDoc {
4277 terminator: term,
4278 strip_tabs,
4279 content: String::new(),
4280 quoted,
4281 processed: false,
4282 });
4283 Some(heredocs_len() - 1)
4284 } else {
4285 None
4286 };
4287
4288 Some(ZshRedir {
4289 rtype,
4290 fd,
4291 name,
4292 heredoc: None,
4293 varid: None,
4294 heredoc_idx,
4295 })
4296}
4297
4298/// Parse for/foreach loop
4299/// Parse `for NAME in WORDS; do BODY; done` (foreach style) AND
4300/// `for ((init; cond; incr)) do BODY done` (c-style). Direct port
4301/// of zsh/Src/parse.c:1087 `par_for`. parse_for_cstyle is the
4302/// inner branch for the `((...))` arithmetic-header variant
4303/// (parse.c:1100-1140 inside par_for).
4304fn par_for() -> Option<ZshCommand> {
4305 let is_foreach = tok() == FOREACH;
4306 zshlex();
4307
4308 // Check for C-style: for (( init; cond; step ))
4309 if tok() == DINPAR {
4310 return parse_for_cstyle();
4311 }
4312
4313 // Get variable name(s). zsh parse.c par_for accepts multiple
4314 // identifier tokens before `in`/`(`/newline — `for k v in ...`
4315 // assigns each iteration's pair of values to k and v in turn.
4316 // We store the names space-joined since variable identifiers
4317 // can't contain whitespace.
4318 let mut names: Vec<String> = Vec::new();
4319 while tok() == STRING_LEX {
4320 let v = tokstr().unwrap_or_default();
4321 if v == "in" {
4322 break;
4323 }
4324 names.push(v);
4325 zshlex();
4326 }
4327 if names.is_empty() {
4328 error("expected variable name in for");
4329 return None;
4330 }
4331 let var = names.join(" ");
4332
4333 // Skip newlines
4334 skip_separators();
4335
4336 // Get list. The lexer-port quirk: `for x (a b c)` arrives as a
4337 // single String token with the parens lexed-as-content
4338 // (`<Inpar>a b c<Outpar>`) instead of as separate Inpar/String/
4339 // Outpar tokens. Detect that shape and split it manually.
4340 let list = if tok() == STRING_LEX
4341 && tokstr()
4342 .map(|s| s.starts_with('\u{88}') && s.ends_with('\u{8a}'))
4343 .unwrap_or(false)
4344 {
4345 let raw = tokstr().unwrap_or_default();
4346 // Strip leading Inpar + trailing Outpar, then untokenize the
4347 // inner content and split on whitespace for the word list.
4348 let inner = &raw[raw.char_indices().nth(1).map(|(i, _)| i).unwrap_or(0)
4349 ..raw
4350 .char_indices()
4351 .last()
4352 .map(|(i, _)| i)
4353 .unwrap_or(raw.len())];
4354 let cleaned = super::lex::untokenize(inner);
4355 let words: Vec<String> = cleaned.split_whitespace().map(|s| s.to_string()).collect();
4356 zshlex();
4357 ForList::Words(words)
4358 } else if tok() == STRING_LEX {
4359 let s = tokstr();
4360 if s.map(|s| s == "in").unwrap_or(false) {
4361 zshlex();
4362 let mut words = Vec::new();
4363 let mut word_count = 0;
4364 while tok() == STRING_LEX {
4365 word_count += 1;
4366 if word_count > 500 || check_limit() {
4367 error("for: too many words");
4368 return None;
4369 }
4370 let _ts_s = tokstr();
4371 if let Some(s) = _ts_s.as_deref() {
4372 words.push(s.to_string());
4373 }
4374 zshlex();
4375 }
4376 ForList::Words(words)
4377 } else {
4378 ForList::Positional
4379 }
4380 } else if tok() == INPAR_TOK {
4381 // for var (...)
4382 zshlex();
4383 let mut words = Vec::new();
4384 let mut word_count = 0;
4385 while tok() == STRING_LEX || tok() == SEPER {
4386 word_count += 1;
4387 if word_count > 500 || check_limit() {
4388 error("for: too many words in parens");
4389 return None;
4390 }
4391 if tok() == STRING_LEX {
4392 let _ts_s = tokstr();
4393 if let Some(s) = _ts_s.as_deref() {
4394 words.push(s.to_string());
4395 }
4396 }
4397 zshlex();
4398 }
4399 if tok() == OUTPAR_TOK {
4400 // After the `)` of a for-list, the next token is the
4401 // body opener — `do`/`{`. zsh's lexer needs incmdpos
4402 // set so `{` lexes as Inbrace (not as a literal). C
4403 // analogue: parse.c::par_for sets `incmdpos = 1`
4404 // after consuming the Outpar before the body parse.
4405 set_incmdpos(true);
4406 zshlex();
4407 }
4408 ForList::Words(words)
4409 } else {
4410 ForList::Positional
4411 };
4412
4413 // Skip to body
4414 skip_separators();
4415
4416 // Parse body
4417 let body = parse_loop_body(is_foreach)?;
4418
4419 Some(ZshCommand::For(ZshFor {
4420 var,
4421 list,
4422 body: Box::new(body),
4423 is_select: false,
4424 }))
4425}
4426
4427/// Parse C-style for loop: for (( init; cond; step ))
4428/// Parse the c-style `for ((init; cond; incr)) do BODY done`.
4429/// Inner branch of zsh/Src/parse.c:1100-1140 inside par_for.
4430/// Recognized when the token after FOR is DINPAR (the `((`
4431/// detected by gettok via dbparens setup).
4432fn parse_for_cstyle() -> Option<ZshCommand> {
4433 // We're at (( (Dinpar None) - the opening ((
4434 // Lexer returns:
4435 // Dinpar None - opening ((
4436 // Dinpar "init" - init expression, semicolon consumed
4437 // Dinpar "cond" - cond expression, semicolon consumed
4438 // Doutpar "step" - step expression, closing )) consumed
4439
4440 zshlex(); // Get init: Dinpar "i=0"
4441
4442 if tok() != DINPAR {
4443 error("expected init expression in for ((");
4444 return None;
4445 }
4446 let init = tokstr().unwrap_or_default();
4447
4448 zshlex(); // Get cond: Dinpar "i<10"
4449
4450 if tok() != DINPAR {
4451 error("expected condition in for ((");
4452 return None;
4453 }
4454 let cond = tokstr().unwrap_or_default();
4455
4456 zshlex(); // Get step: Doutpar "i++"
4457
4458 if tok() != DOUTPAR {
4459 error("expected )) in for");
4460 return None;
4461 }
4462 let step = tokstr().unwrap_or_default();
4463
4464 zshlex(); // Move past ))
4465
4466 skip_separators();
4467 let body = parse_loop_body(false)?;
4468
4469 Some(ZshCommand::For(ZshFor {
4470 var: String::new(),
4471 list: ForList::CStyle { init, cond, step },
4472 body: Box::new(body),
4473 is_select: false,
4474 }))
4475}
4476
4477/// Parse select loop (same syntax as for)
4478/// Parse `select NAME in WORDS; do BODY; done`. Same shape as
4479/// `for NAME in WORDS; do ...` but with menu-prompt semantics in
4480/// the executor. C equivalent: the SELECT case in par_for at
4481/// parse.c:1087-1207 (selects share parser flow with foreach).
4482fn parse_select() -> Option<ZshCommand> {
4483 // `select` shares par_for's grammar (var, words, body) but the
4484 // compile path is different (interactive prompt loop).
4485 match par_for()? {
4486 ZshCommand::For(mut f) => {
4487 f.is_select = true;
4488 Some(ZshCommand::For(f))
4489 }
4490 other => Some(other),
4491 }
4492}
4493
4494/// Parse case statement
4495/// Parse `case WORD in PATTERN) BODY ;; ... esac`. Direct port
4496/// of zsh/Src/parse.c:1209 `par_case`. Each case arm is a
4497/// (pattern_list, body, terminator) tuple where terminator is
4498/// `;;` (default), `;&` (fallthrough), or `;|` (continue testing).
4499fn par_case() -> Option<ZshCommand> {
4500 // C par_case (parse.c:1209-1241). Order of state toggles
4501 // matters — the lexer reads the case word in `incmdpos=0`
4502 // (so it's not promoted to a reswd), then the `in`/`{` in
4503 // `incmdpos=1, noaliases=1, nocorrect=1` (so the `in` literal
4504 // isn't alias-expanded or spell-corrected), then sets
4505 // `incasepat=1, incmdpos=0` before the first pattern.
4506 set_incmdpos(false);
4507 zshlex(); // skip 'case'
4508
4509 let word = match tok() {
4510 STRING_LEX => {
4511 let w = tokstr().unwrap_or_default();
4512 // c:1222 — `incmdpos = 1;` before the next zshlex so the
4513 // `in` keyword is recognised. c:1223-1225 — save+force
4514 // noaliases / nocorrect.
4515 set_incmdpos(true);
4516 let ona = noaliases();
4517 let onc = nocorrect();
4518 set_noaliases(true);
4519 set_nocorrect(1);
4520 zshlex();
4521 // Restore noaliases/nocorrect after the `in`-or-`{` token
4522 // is in hand; both are unconditionally restored at c:1238-1239.
4523 let restore = |ona: bool, onc: i32| {
4524 set_noaliases(ona);
4525 set_nocorrect(onc);
4526 };
4527 (w, ona, onc, restore)
4528 }
4529 _ => {
4530 error("expected word after case");
4531 return None;
4532 }
4533 };
4534 let (word, ona, onc, restore) = word;
4535
4536 skip_separators();
4537
4538 // Expect 'in' or {
4539 let use_brace = tok() == INBRACE_TOK;
4540 if tok() == STRING_LEX {
4541 let s = tokstr();
4542 if s.map(|s| s != "in").unwrap_or(true) {
4543 // c:1228-1232 — restore noaliases/nocorrect on error path.
4544 restore(ona, onc);
4545 error("expected 'in' in case");
4546 return None;
4547 }
4548 } else if !use_brace {
4549 restore(ona, onc);
4550 error("expected 'in' or '{' in case");
4551 return None;
4552 }
4553 // c:1236-1239 — `incasepat = 1; incmdpos = 0; noaliases = ona;
4554 // nocorrect = onc;` — set the case-pattern context AND restore
4555 // alias/correct state BEFORE the zshlex that consumes `in`/`{`.
4556 set_incasepat(1);
4557 set_incmdpos(false);
4558 restore(ona, onc);
4559 zshlex();
4560
4561 let mut arms = Vec::new();
4562 const MAX_ARMS: usize = 10_000;
4563
4564 loop {
4565 if arms.len() > MAX_ARMS {
4566 error("par_case: too many arms");
4567 break;
4568 }
4569
4570 // Set incasepat BEFORE skipping separators so lexer knows we're in case pattern context
4571 // This affects how [ and | are lexed
4572 set_incasepat(1);
4573
4574 skip_separators();
4575
4576 // Check for end
4577 // Note: 'esac' might be String "esac" if incasepat > 0 prevents reserved word recognition
4578 let is_esac = tok() == ESAC
4579 || (tok() == STRING_LEX && tokstr().map(|s| s == "esac").unwrap_or(false));
4580 if (use_brace && tok() == OUTBRACE_TOK) || (!use_brace && is_esac) {
4581 set_incasepat(0);
4582 zshlex();
4583 break;
4584 }
4585
4586 // Also break on EOF
4587 if tok() == ENDINPUT || tok() == LEXERR {
4588 set_incasepat(0);
4589 break;
4590 }
4591
4592 // Skip optional `(`. zsh's case grammar: `case W in (P)…)`.
4593 // The leading `(` is paired with a matching `)` that closes
4594 // the pattern itself; the arm-close `)` follows separately.
4595 // Track whether we consumed it so we can skip the matching
4596 // `)` after pattern parsing — otherwise the arm-close would
4597 // be interpreted as the pattern-close and the actual body
4598 // would get the leftover `)`.
4599 let had_leading_paren = tok() == INPAR_TOK;
4600 if had_leading_paren {
4601 zshlex();
4602 }
4603
4604 // incasepat is already set above
4605 let mut patterns = Vec::new();
4606 let mut pattern_iterations = 0;
4607 loop {
4608 pattern_iterations += 1;
4609 if pattern_iterations > 1000 {
4610 error("par_case: too many pattern iterations");
4611 set_incasepat(0);
4612 return None;
4613 }
4614
4615 if tok() == STRING_LEX {
4616 let s = tokstr();
4617 if s.map(|s| s == "esac").unwrap_or(false) {
4618 break;
4619 }
4620 patterns.push(tokstr().unwrap_or_default());
4621 // After first pattern token, set incasepat=2 so ( is treated as part of pattern
4622 set_incasepat(2);
4623 zshlex();
4624 } else if tok() != BAR_TOK {
4625 break;
4626 }
4627
4628 if tok() == BAR_TOK {
4629 // Reset to 1 (start of next alternative pattern)
4630 set_incasepat(1);
4631 zshlex();
4632 } else {
4633 break;
4634 }
4635 }
4636 set_incasepat(0);
4637
4638 // zsh's `(P)` form (parse.c:1320-1360 hack) treats the entire
4639 // parenthesized contents as ONE zsh pattern with internal `|`
4640 // as the literal alternation operator — NOT as multiple
4641 // case-arm alternatives. Without a leading `(`, the bare
4642 // `P1|P2)` form splits into multiple alts. Mirror that here:
4643 // when a leading `(` was consumed, fold the |-separated
4644 // pieces back into a single pattern string.
4645 if had_leading_paren && patterns.len() > 1 {
4646 let joined = patterns.join("|");
4647 patterns = vec![joined];
4648 }
4649
4650 // Expect ). Also handle the `(P))` wrapped-pattern form:
4651 // when a leading `(` was consumed, accept an extra `)` —
4652 // the inner `)` closes the optional-paren wrapper, the
4653 // outer `)` is the arm-close. zsh accepts BOTH `(P) BODY`
4654 // (bare pattern, leading-paren is just the opt-marker, the
4655 // close is arm-close) and `(P)) BODY` (paren-wrapped
4656 // pattern, then arm-close). The first form is unambiguous
4657 // when the bare pattern was simple; the second is needed
4658 // when the body starts with `(`.
4659 if tok() != OUTPAR_TOK {
4660 error("expected ')' in case pattern");
4661 return None;
4662 }
4663 // Port of Src/parse.c:1310-1313 — when the case pattern
4664 // closes with `)`, set `incmdpos = 1` BEFORE consuming
4665 // the token so the first word of the arm body is lexed
4666 // in command position. Without this, `case X in X) c1=v ;;`
4667 // lexes `c1=v` as a plain STRING rather than an assignment
4668 // word, and exec treats it as a command name (yielding
4669 // "command not found: c1=v"). Subsequent statements after
4670 // `;` parse correctly because the `;` separator restores
4671 // command position; only the FIRST body word was broken.
4672 set_incmdpos(true);
4673 zshlex();
4674 if had_leading_paren && tok() == OUTPAR_TOK {
4675 set_incmdpos(true);
4676 zshlex();
4677 }
4678
4679 // Parse body
4680 let body = parse_program();
4681
4682 // Get terminator. Set incasepat=1 BEFORE the zshlex
4683 // advance so the next token (the next arm's pattern, like
4684 // `[a-z]`) gets tokenized in pattern context. Without
4685 // this, a `[`-prefixed pattern after the FIRST arm became
4686 // Inbrack instead of String and the pattern-loop bailed
4687 // out with "expected ')' in case pattern".
4688 let terminator = match tok() {
4689 DSEMI => {
4690 set_incasepat(1);
4691 zshlex();
4692 CaseTerm::Break
4693 }
4694 SEMIAMP => {
4695 set_incasepat(1);
4696 zshlex();
4697 CaseTerm::Continue
4698 }
4699 SEMIBAR => {
4700 set_incasepat(1);
4701 zshlex();
4702 CaseTerm::TestNext
4703 }
4704 _ => CaseTerm::Break,
4705 };
4706
4707 if !patterns.is_empty() {
4708 arms.push(CaseArm {
4709 patterns,
4710 body,
4711 terminator,
4712 });
4713 }
4714 }
4715
4716 Some(ZshCommand::Case(ZshCase { word, arms }))
4717}
4718
4719/// Parse if statement
4720/// Parse `if COND; then BODY; [elif COND; then BODY;]* [else BODY;] fi`.
4721/// Direct port of zsh/Src/parse.c:1411 `par_if`. The C source
4722/// emits WC_IF wordcodes per arm; zshrs builds an AST chain of
4723/// (cond, then_body) tuples plus an optional else_body.
4724fn par_if() -> Option<ZshCommand> {
4725 zshlex(); // skip 'if'
4726
4727 // Parse condition - stops at 'then' or '{' (zsh allows { instead of then)
4728 let cond = Box::new(parse_program_until(Some(&[THEN, INBRACE_TOK])));
4729
4730 skip_separators();
4731
4732 // Expect 'then' or {
4733 let use_brace = tok() == INBRACE_TOK;
4734 if tok() != THEN && !use_brace {
4735 error("expected 'then' or '{' after if condition");
4736 return None;
4737 }
4738 zshlex();
4739
4740 // Parse then-body - stops at else/elif/fi, or } if using brace syntax
4741 let then = if use_brace {
4742 let body = parse_program_until(Some(&[OUTBRACE_TOK]));
4743 if tok() == OUTBRACE_TOK {
4744 zshlex();
4745 }
4746 Box::new(body)
4747 } else {
4748 Box::new(parse_program_until(Some(&[ELSE, ELIF, FI])))
4749 };
4750
4751 // Parse elif and else. zsh accepts the SAME elif/else
4752 // continuations for both classic `then/fi` AND the brace
4753 // form `{ ... } elif ... { ... } else { ... }`. Direct port
4754 // of zsh/Src/parse.c:1417-1500 par_if where the elif/else
4755 // arms are checked AFTER the body close regardless of which
4756 // delimiter style opened the block. Without this, zinit's
4757 // if [[ -z $sel ]] { ... } else { ... }
4758 // hung the parser — `else` was treated as an external
4759 // command following the if-statement, which the lexer state
4760 // mis-classified inside the still-open function body.
4761 //
4762 // For brace-form: skip the `fi` consumption at the end of
4763 // the loop (no `fi` after a brace block), and `else` may
4764 // arrive after a `}` close. Skip-separators between the
4765 // body close and the elif/else token.
4766 let mut elif = Vec::new();
4767 let mut else_ = None;
4768
4769 {
4770 loop {
4771 skip_separators();
4772
4773 match tok() {
4774 ELIF => {
4775 zshlex();
4776 // elif condition stops at 'then' or '{'
4777 let econd = parse_program_until(Some(&[THEN, INBRACE_TOK]));
4778 skip_separators();
4779
4780 let elif_use_brace = tok() == INBRACE_TOK;
4781 if tok() != THEN && !elif_use_brace {
4782 error("expected 'then' after elif");
4783 return None;
4784 }
4785 zshlex();
4786
4787 // elif body stops at else/elif/fi or } if using braces
4788 let ebody = if elif_use_brace {
4789 let body = parse_program_until(Some(&[OUTBRACE_TOK]));
4790 if tok() == OUTBRACE_TOK {
4791 zshlex();
4792 }
4793 body
4794 } else {
4795 parse_program_until(Some(&[ELSE, ELIF, FI]))
4796 };
4797
4798 elif.push((econd, ebody));
4799 }
4800 ELSE => {
4801 zshlex();
4802 skip_separators();
4803
4804 let else_use_brace = tok() == INBRACE_TOK;
4805 if else_use_brace {
4806 zshlex();
4807 }
4808
4809 // else body stops at 'fi' or '}'
4810 else_ = Some(Box::new(if else_use_brace {
4811 let body = parse_program_until(Some(&[OUTBRACE_TOK]));
4812 if tok() == OUTBRACE_TOK {
4813 zshlex();
4814 }
4815 body
4816 } else {
4817 parse_program_until(Some(&[FI]))
4818 }));
4819
4820 // Consume the 'fi' if present (not for brace syntax)
4821 if !else_use_brace && tok() == FI {
4822 zshlex();
4823 }
4824 break;
4825 }
4826 FI => {
4827 zshlex();
4828 break;
4829 }
4830 _ => break,
4831 }
4832 }
4833 }
4834
4835 Some(ZshCommand::If(ZshIf {
4836 cond,
4837 then,
4838 elif,
4839 else_,
4840 }))
4841}
4842
4843/// Parse while/until loop
4844/// Parse `while COND; do BODY; done` and `until COND; do BODY; done`.
4845/// Direct port of zsh/Src/parse.c:1521 `par_while`. The
4846/// `until` variant is the same loop with the condition negated.
4847fn par_while(until: bool) -> Option<ZshCommand> {
4848 zshlex(); // skip while/until
4849
4850 let cond = Box::new(parse_program());
4851
4852 skip_separators();
4853 let body = parse_loop_body(false)?;
4854
4855 Some(ZshCommand::While(ZshWhile {
4856 cond,
4857 body: Box::new(body),
4858 until,
4859 }))
4860}
4861
4862/// Parse repeat loop
4863/// Parse `repeat N; do BODY; done`. Direct port of
4864/// zsh/Src/parse.c:1565 `par_repeat`. The C source supports
4865/// the SHORTLOOPS short-form `repeat N CMD` (no do/done) — zshrs's
4866/// parser doesn't yet special-case that variant.
4867fn par_repeat() -> Option<ZshCommand> {
4868 zshlex(); // skip 'repeat'
4869
4870 let count = match tok() {
4871 STRING_LEX => {
4872 let c = tokstr().unwrap_or_default();
4873 zshlex();
4874 c
4875 }
4876 _ => {
4877 error("expected count after repeat");
4878 return None;
4879 }
4880 };
4881
4882 skip_separators();
4883 // c:1600 — par_repeat's short-form gate is wider: it unlocks
4884 // when SHORTLOOPS OR SHORTREPEAT is set (vs SHORTLOOPS alone for
4885 // for/while). Pass `is_repeat=true` so parse_loop_body_kind
4886 // applies that widened gate.
4887 let body = parse_loop_body_kind(false, true)?;
4888
4889 Some(ZshCommand::Repeat(ZshRepeat {
4890 count,
4891 body: Box::new(body),
4892 }))
4893}
4894
4895/// Parse loop body (do...done, {...}, or shortloop)
4896/// Parse the `do BODY done` body of a for/while/until/select/
4897/// repeat loop. Direct equivalent of zsh's parse.c handling
4898/// inside the loop builders — they all consume DOLOOP, parse a
4899/// list until DONE, and return the list. The `foreach_style`
4900/// flag signals foreach (where short-form `for NAME in WORDS;
4901/// CMD` may skip do/done) vs c-style (which always requires
4902/// do/done).
4903fn parse_loop_body(foreach_style: bool) -> Option<ZshProgram> {
4904 parse_loop_body_kind(foreach_style, false)
4905}
4906
4907/// Body-dispatch helper. `is_repeat` widens the SHORTLOOPS gate so
4908/// `SHORTREPEAT` also unlocks the short form for `repeat N CMD`
4909/// (per c:1600 `unset(SHORTLOOPS) && unset(SHORTREPEAT)`).
4910fn parse_loop_body_kind(foreach_style: bool, is_repeat: bool) -> Option<ZshProgram> {
4911 // c:1180-1194 — body dispatch order per par_for:
4912 // `do ... done` (DOLOOP) — primary form.
4913 // `{ ... }` (INBRACE) — alternate.
4914 // csh/CSHJUNKIELOOPS — terminator is `end`.
4915 // else if (unset(SHORTLOOPS)) — YYERROR.
4916 // else — short form (single command).
4917 if tok() == DOLOOP {
4918 zshlex();
4919 let body = parse_program();
4920 if tok() == DONE {
4921 zshlex();
4922 }
4923 Some(body)
4924 } else if tok() == INBRACE_TOK {
4925 zshlex();
4926 let body = parse_program();
4927 if tok() == OUTBRACE_TOK {
4928 zshlex();
4929 }
4930 Some(body)
4931 } else if foreach_style || isset(CSHJUNKIELOOPS) {
4932 // c:1184 / 1546 / 1595 — `else if (csh || isset(CSHJUNKIELOOPS))`.
4933 let body = parse_program();
4934 if tok() == ZEND {
4935 zshlex();
4936 }
4937 Some(body)
4938 } else {
4939 // c:1190 / 1474 / 1551 / 1600 — short-form gate. C bails
4940 // with YYERROR when `unset(SHORTLOOPS) && (!is_repeat ||
4941 // unset(SHORTREPEAT))`. zshrs's option machinery isn't
4942 // initialised at parse-test time (no `init_main` →
4943 // `install_emulation_defaults`), so a strict port here
4944 // body. parse_init seeds SHORTLOOPS=on mirroring C
4945 // `install_emulation_defaults`, so this fires only when a
4946 // script explicitly disabled the option.
4947 if unset(SHORTLOOPS) && (!is_repeat || unset(SHORTREPEAT)) {
4948 error("parse error: short loop form requires SHORTLOOPS option");
4949 return None;
4950 }
4951 // c:1192-1193 — short form: single command body.
4952 par_list().map(|list| ZshProgram { lists: vec![list] })
4953 }
4954}
4955
4956/// Parse (...) subshell
4957/// Parse a subshell `( ... )`. Direct port of zsh/Src/parse.c:1619
4958/// `par_subsh`. Body parses as a normal list; the subshell wrapper
4959/// fork-isolates execution in the executor.
4960fn par_subsh() -> Option<ZshCommand> {
4961 zshlex(); // skip (
4962 let prog = parse_program();
4963 if tok() == OUTPAR_TOK {
4964 zshlex();
4965 }
4966 Some(ZshCommand::Subsh(Box::new(prog)))
4967}
4968
4969/// `() { body } arg1 arg2 …` — anonymous function. Defines a fresh
4970/// function named `_zshrs_anon_N`, invokes it with the args, and the
4971/// body runs with positional params set. Implemented as the desugared
4972/// pair (FuncDef + Simple call) so the compile path doesn't need new
4973/// machinery.
4974/// Parse an anonymous function definition `() { BODY }` followed
4975/// by call args. zsh treats `() { echo hi; } a b c` as defining
4976/// and immediately calling an anon fn with args a/b/c. C
4977/// equivalent: the INOUTPAR shape in par_simple at parse.c:1836+
4978/// triggers an anon-funcdef path.
4979fn parse_anon_funcdef() -> Option<ZshCommand> {
4980 zshlex(); // skip ()
4981 skip_separators();
4982 // No `{` after `()` → bare empty subshell shape `()`. Fall back
4983 // to a Subsh with an empty program so the status is 0 (matches
4984 // zsh's `()` no-op behavior).
4985 if tok() != INBRACE_TOK {
4986 return Some(ZshCommand::Subsh(Box::new(ZshProgram {
4987 lists: Vec::new(),
4988 })));
4989 }
4990 zshlex(); // skip {
4991 let body = parse_program();
4992 if tok() == OUTBRACE_TOK {
4993 zshlex();
4994 }
4995 // Collect any trailing args until a separator. zsh's anon-fn form
4996 // `() { body } a b c` runs body with $1=a, $2=b, $3=c.
4997 let mut args = Vec::new();
4998 while tok() == STRING_LEX {
4999 if let Some(s) = tokstr() {
5000 args.push(s);
5001 }
5002 zshlex();
5003 }
5004
5005 // Generate a unique name. Module-level static would be cleaner but
5006 // a thread-local atomic is enough — anonymous functions are
5007 // ephemeral and the name isn't user-visible.
5008 static ANON_COUNTER: AtomicUsize = AtomicUsize::new(0);
5009 let n = ANON_COUNTER.fetch_add(1, Ordering::Relaxed);
5010 let name = format!("_zshrs_anon_{}", n);
5011 Some(ZshCommand::FuncDef(ZshFuncDef {
5012 names: vec![name],
5013 body: Box::new(body),
5014 tracing: false,
5015 auto_call_args: Some(args),
5016 body_source: None,
5017 }))
5018}
5019
5020/// Parse {...} cursh
5021/// Parse a current-shell brace block `{ BODY }`. C source
5022/// par_cmd at parse.c:958-1085 handles Inbrace → emit WC_CURSH
5023/// and recurses into the list. zshrs's parse_cursh extracts that
5024/// arm into a dedicated method.
5025fn parse_cursh() -> Option<ZshCommand> {
5026 zshlex(); // skip {
5027 let prog = parse_program();
5028
5029 // Check for { ... } always { ... }. Direct port of zsh's
5030 // par_subsh at parse.c:1612-1660 — note the two `incmdpos = 1`
5031 // forces (parse.c:1632, 1637): after consuming the closing
5032 // Outbrace AND after matching the `always` keyword, the parser
5033 // explicitly resets command position so the next `{` lexes as
5034 // Inbrace. Without these resets the lexer's String-clears-cmdpos
5035 // rule (lex.rs:976-983) leaves the second `{` in word position,
5036 // turning `always { ... }` into a Simple `{` `echo` … and the
5037 // try/always pairing is silently lost.
5038 if tok() == OUTBRACE_TOK {
5039 set_incmdpos(true); // parse.c:1632 incmdpos = !zsh_construct
5040 zshlex();
5041
5042 // Check for 'always'
5043 if tok() == STRING_LEX {
5044 let s = tokstr();
5045 if s.map(|s| s == "always").unwrap_or(false) {
5046 set_incmdpos(true); // parse.c:1637 incmdpos = 1
5047 zshlex();
5048 skip_separators();
5049
5050 if tok() == INBRACE_TOK {
5051 zshlex();
5052 let always = parse_program();
5053 if tok() == OUTBRACE_TOK {
5054 zshlex();
5055 }
5056 return Some(ZshCommand::Try(ZshTry {
5057 try_block: Box::new(prog),
5058 always: Box::new(always),
5059 }));
5060 }
5061 }
5062 }
5063 }
5064
5065 Some(ZshCommand::Cursh(Box::new(prog)))
5066}
5067
5068/// Parse function definition
5069/// Parse `function NAME { BODY }` or `NAME () { BODY }`. Direct
5070/// port of zsh/Src/parse.c:1672 `par_funcdef`. zsh handles
5071/// the multiple keyword shapes (function FOO, FOO (), function FOO ()),
5072/// the optional `[fname1 fname2 ...]` for multi-name function defs,
5073/// and the `function FOO () { ... }` traditional/POSIX hybrid form.
5074fn par_funcdef() -> Option<ZshCommand> {
5075 zshlex(); // skip 'function'
5076
5077 let mut names = Vec::new();
5078 let mut tracing = false;
5079
5080 // Handle options like -T and function names. Two subtleties:
5081 //
5082 // 1. Flags: zsh's lexer encodes a leading `-` as
5083 // `zsh_h::Dash` (`\u{9b}`, `Src/zsh.h:182`) inside the String tokstr.
5084 // The previous `s.starts_with('-')` check failed for
5085 // `\u{9b}T`, so `function -T NAME { body }` slipped the
5086 // `-T` token into `names` and the function got registered
5087 // as `T` plus the intended `NAME`.
5088 //
5089 // 2. Body opener: zsh's lexer emits the opening `{` as a
5090 // String (not INBRACE_TOK) when it follows the String
5091 // NAME — the preceding name token resets incmdpos to
5092 // false, and only `{` immediately followed by `}` (the
5093 // empty-body case) gets promoted to Inbrace. The funcdef
5094 // parser must recognise the bare-`{` String as the body
5095 // opener; otherwise `function NAME { body }` falls through
5096 // to `_ => break`, no body parses, and the FuncDef never
5097 // lands in the AST. This is consistent with C zsh's
5098 // par_funcdef which knows it's in funcdef-header context
5099 // and accepts the brace either way.
5100 loop {
5101 match tok() {
5102 STRING_LEX => {
5103 let _ts_s = tokstr()?;
5104 let s = _ts_s.as_str();
5105 // c:1702 — `if ((*tokstr == Inbrace || *tokstr == '{') && !tokstr[1])`.
5106 // Body opener can be either the literal `{` (early-return
5107 // path at lex.c:1141-1144 / lex.rs LX2_INBRACE cmdpos
5108 // branch) or the Inbrace marker `\u{8f}` (lex.c:1420
5109 // post-switch add(c) where c was rewritten via lextok2).
5110 if s == "{" || s == "\u{8f}" {
5111 break;
5112 }
5113 let first = s.chars().next();
5114 if matches!(first, Some('-') | Some('+')) || matches!(first, Some(c) if c == Dash) {
5115 if s.contains('T') {
5116 tracing = true;
5117 }
5118 zshlex();
5119 continue;
5120 }
5121 names.push(s.to_string());
5122 zshlex();
5123 }
5124 INBRACE_TOK | INOUTPAR | SEPER | NEWLIN => break,
5125 _ => break,
5126 }
5127 }
5128
5129 // Optional ()
5130 let saw_paren = tok() == INOUTPAR;
5131 if saw_paren {
5132 zshlex();
5133 }
5134
5135 skip_separators();
5136
5137 // Body opener: real Inbrace OR a String containing the literal `{`
5138 // (early-return path) OR a String containing the Inbrace marker
5139 // `\u{8f}` (bct++ path post-switch add). C parse.c:1702 handles
5140 // both string forms via `*tokstr == Inbrace || *tokstr == '{'`.
5141 let body_opener_is_string_brace =
5142 tok() == STRING_LEX && (tokstr_eq("{") || tokstr_eq("\u{8f}"));
5143 if tok() == INBRACE_TOK || body_opener_is_string_brace {
5144 // Capture body_start BEFORE the lexer advances past the
5145 // first body token. After the previous zshlex consumed
5146 // `{`, lexer.pos points just past `{` (which is where the
5147 // body source starts). The next `zshlex()` would advance
5148 // past the first token (`echo`), making body_start land
5149 // mid-body and lose the first word — `typeset -f f` would
5150 // print `a; echo b` for `{ echo a; echo b }`.
5151 let body_start = pos();
5152 zshlex();
5153 let body = parse_program();
5154 let body_end = if tok() == OUTBRACE_TOK {
5155 // Lexer has just consumed `}`; pos is past it. Body content
5156 // ends one byte before pos.
5157 pos().saturating_sub(1)
5158 } else {
5159 pos()
5160 };
5161 let body_source = input_slice(body_start, body_end)
5162 .map(|s| s.trim().to_string())
5163 .filter(|s| !s.is_empty());
5164 if tok() == OUTBRACE_TOK {
5165 zshlex();
5166 }
5167
5168 // Anonymous form `function () { body } a b c` (with `()`) or
5169 // `function { body } a b c` (zsh-only shorthand, no `()`). No
5170 // name was collected. Mirror parse_anon_funcdef: synthesize
5171 // `_zshrs_anon_N`, collect trailing args, set auto_call_args
5172 // so compile_funcdef registers + immediately calls the
5173 // function with the args as positional params.
5174 if names.is_empty() {
5175 let mut args = Vec::new();
5176 while tok() == STRING_LEX {
5177 if let Some(s) = tokstr() {
5178 args.push(s);
5179 }
5180 zshlex();
5181 }
5182 static ANON_COUNTER: AtomicUsize = AtomicUsize::new(0);
5183 let n = ANON_COUNTER.fetch_add(1, Ordering::Relaxed);
5184 let name = format!("_zshrs_anon_kw_{}", n);
5185 return Some(ZshCommand::FuncDef(ZshFuncDef {
5186 names: vec![name],
5187 body: Box::new(body),
5188 tracing,
5189 auto_call_args: Some(args),
5190 body_source,
5191 }));
5192 }
5193
5194 Some(ZshCommand::FuncDef(ZshFuncDef {
5195 names,
5196 body: Box::new(body),
5197 tracing,
5198 auto_call_args: None,
5199 body_source,
5200 }))
5201 } else {
5202 // Short form
5203 par_list().map(|list| {
5204 ZshCommand::FuncDef(ZshFuncDef {
5205 names,
5206 body: Box::new(ZshProgram { lists: vec![list] }),
5207 tracing,
5208 auto_call_args: None,
5209 body_source: None,
5210 })
5211 })
5212 }
5213}
5214
5215/// Parse inline function definition: name() { ... }
5216/// Parse the inline form `NAME () { BODY }` (POSIX-style funcdef
5217/// without the `function` keyword). The name has already been
5218/// consumed and pushed by par_simple before this method fires.
5219/// C source: handled inline in par_simple's INOUTPAR-after-name
5220/// arm (parse.c:1836-2228).
5221fn parse_inline_funcdef(name: String) -> Option<ZshCommand> {
5222 // par_simple's STRING loop left `incmdpos = 0`; the funcdef body
5223 // `{ ... }` requires `incmdpos = 1` so the lexer recognises `{`
5224 // as INBRACE_TOK (current-shell block opener) instead of a
5225 // literal `{` STRING. Without this, `myfunc() { echo body }`
5226 // parsed the body as the single STRING `"{"`, then `echo body`
5227 // fell out at top level. Mirrors the C path where par_cmd's
5228 // dispatcher (parse.c:958) is called with `incmdpos = 1` for
5229 // the funcdef body.
5230 set_incmdpos(true);
5231 // Skip ()
5232 if tok() == INOUTPAR {
5233 zshlex();
5234 }
5235
5236 skip_separators();
5237
5238 // Parse body
5239 if tok() == INBRACE_TOK {
5240 // Same body_start-before-zshlex fix as par_funcdef.
5241 let body_start = pos();
5242 zshlex();
5243 let body = parse_program();
5244 let body_end = if tok() == OUTBRACE_TOK {
5245 pos().saturating_sub(1)
5246 } else {
5247 pos()
5248 };
5249 let body_source = input_slice(body_start, body_end)
5250 .map(|s| s.trim().to_string())
5251 .filter(|s| !s.is_empty());
5252 if tok() == OUTBRACE_TOK {
5253 zshlex();
5254 }
5255 Some(ZshCommand::FuncDef(ZshFuncDef {
5256 names: vec![name],
5257 body: Box::new(body),
5258 tracing: false,
5259 auto_call_args: None,
5260 body_source,
5261 }))
5262 } else if unset(SHORTLOOPS) {
5263 // c:1742 — `else if (unset(SHORTLOOPS)) YYERRORV(oecused);` —
5264 // funcdef short body (`name() cmd` without `{...}`) only
5265 // accepted when SHORTLOOPS is set. parse_init seeds
5266 // SHORTLOOPS=on so this fires only when a script
5267 // explicitly disabled the option.
5268 error("parse error: short function body form requires SHORTLOOPS option");
5269 None
5270 } else {
5271 match par_cmd() {
5272 Some(cmd) => {
5273 let list = ZshList {
5274 sublist: ZshSublist {
5275 pipe: ZshPipe {
5276 cmd,
5277 next: None,
5278 lineno: lineno(),
5279 merge_stderr: false,
5280 },
5281 next: None,
5282 flags: SublistFlags::default(),
5283 },
5284 flags: ListFlags::default(),
5285 };
5286 Some(ZshCommand::FuncDef(ZshFuncDef {
5287 names: vec![name],
5288 body: Box::new(ZshProgram { lists: vec![list] }),
5289 tracing: false,
5290 auto_call_args: None,
5291 body_source: None,
5292 }))
5293 }
5294 None => None,
5295 }
5296 }
5297}
5298
5299/// Parse [[ ... ]] conditional
5300/// Parse `[[ EXPR ]]` conditional expression. Direct port of
5301/// zsh/Src/parse.c:2409 `par_cond` (and helpers par_cond_1,
5302/// par_cond_2, par_cond_double, par_cond_triple, par_cond_multi
5303/// at parse.c:2434-2731). Expression operators: `||` `&&` `!`
5304/// + unary tests (-f, -d, -n, -z, etc.) + binary tests (=, !=,
5305/// <, >, ==, =~, -eq, -ne, -lt, -le, -gt, -ge, -nt, -ot, -ef).
5306fn par_cond() -> Option<ZshCommand> {
5307 // C par_dinbrack (parse.c:1810-1822) wraps the body parse with
5308 // `incond = 1; incmdpos = 0;` BEFORE the first zshlex past `[[`,
5309 // and resets to `incond = 0; incmdpos = 1;` after `]]`. Without
5310 // `incond = 1`, lex.c does not promote `]]` to DOUTBRACK and the
5311 // cond body bleeds past the close bracket — the parser then
5312 // sees `]]` as a separate STRING command. Every `if [[ ... ]]; then`
5313 // failed with `command not found: ]]` before this fix.
5314 set_incond(1);
5315 set_incmdpos(false);
5316 zshlex(); // skip [[
5317 // Empty cond `[[ ]]` is a parse error in zsh — emit the
5318 // diagnostic and return None so the caller produces a
5319 // non-zero exit. Without this, `[[ ]]` silently passed and
5320 // returned exit 0.
5321 if tok() == DOUTBRACK {
5322 error("parse error near `]]'");
5323 set_incond(0);
5324 set_incmdpos(true);
5325 zshlex();
5326 return None;
5327 }
5328 let cond = parse_cond_expr();
5329
5330 if tok() == DOUTBRACK {
5331 set_incond(0);
5332 set_incmdpos(true);
5333 zshlex();
5334 } else {
5335 // Recover incond/incmdpos so subsequent parsing isn't stuck
5336 // in cond-mode if the close bracket is missing.
5337 set_incond(0);
5338 set_incmdpos(true);
5339 }
5340
5341 cond.map(ZshCommand::Cond)
5342}
5343
5344/// Parse conditional expression
5345/// Top of `[[ ]]` cond-expression parsing — entry to recursive
5346/// descent (or → and → not → primary). Direct port of zsh's
5347/// par_cond_1 at parse.c:2434-2475.
5348fn parse_cond_expr() -> Option<ZshCond> {
5349 parse_cond_or()
5350}
5351
5352/// Cond-expression `||` level. C: inside par_cond_1 at
5353/// parse.c:2434-2475 (the `cond_or` ladder).
5354fn parse_cond_or() -> Option<ZshCond> {
5355 PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get() + 1);
5356 if check_recursion() {
5357 error("parse_cond_or: max recursion depth exceeded");
5358 PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get().saturating_sub(1));
5359 return None;
5360 }
5361
5362 let left = match parse_cond_and() {
5363 Some(l) => l,
5364 None => {
5365 PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get().saturating_sub(1));
5366 return None;
5367 }
5368 };
5369
5370 skip_cond_separators();
5371
5372 let result = if tok() == DBAR {
5373 zshlex();
5374 skip_cond_separators();
5375 parse_cond_or().map(|right| ZshCond::Or(Box::new(left), Box::new(right)))
5376 } else {
5377 Some(left)
5378 };
5379
5380 PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get().saturating_sub(1));
5381 result
5382}
5383
5384/// Cond-expression `&&` level. C: par_cond_2 at parse.c:2476-2625.
5385fn parse_cond_and() -> Option<ZshCond> {
5386 PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get() + 1);
5387 if check_recursion() {
5388 error("parse_cond_and: max recursion depth exceeded");
5389 PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get().saturating_sub(1));
5390 return None;
5391 }
5392
5393 let left = match parse_cond_not() {
5394 Some(l) => l,
5395 None => {
5396 PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get().saturating_sub(1));
5397 return None;
5398 }
5399 };
5400
5401 skip_cond_separators();
5402
5403 let result = if tok() == DAMPER {
5404 zshlex();
5405 skip_cond_separators();
5406 parse_cond_and().map(|right| ZshCond::And(Box::new(left), Box::new(right)))
5407 } else {
5408 Some(left)
5409 };
5410
5411 PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get().saturating_sub(1));
5412 result
5413}
5414
5415/// Cond-expression `!` negation level. C: handled inside
5416/// par_cond_2 at parse.c:2476-2625 via the Bang token check.
5417fn parse_cond_not() -> Option<ZshCond> {
5418 PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get() + 1);
5419 if check_recursion() {
5420 error("parse_cond_not: max recursion depth exceeded");
5421 PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get().saturating_sub(1));
5422 return None;
5423 }
5424
5425 skip_cond_separators();
5426
5427 // ! can be either BANG_TOK or String "!"
5428 let is_not =
5429 tok() == BANG_TOK || (tok() == STRING_LEX && tokstr().map(|s| s == "!").unwrap_or(false));
5430 if is_not {
5431 zshlex();
5432 let inner = match parse_cond_not() {
5433 Some(i) => i,
5434 None => {
5435 PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get().saturating_sub(1));
5436 return None;
5437 }
5438 };
5439 PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get().saturating_sub(1));
5440 return Some(ZshCond::Not(Box::new(inner)));
5441 }
5442
5443 if tok() == INPAR_TOK {
5444 zshlex();
5445 skip_cond_separators();
5446 let inner = match parse_cond_expr() {
5447 Some(i) => i,
5448 None => {
5449 PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get().saturating_sub(1));
5450 return None;
5451 }
5452 };
5453 skip_cond_separators();
5454 if tok() == OUTPAR_TOK {
5455 zshlex();
5456 }
5457 PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get().saturating_sub(1));
5458 return Some(inner);
5459 }
5460
5461 let result = parse_cond_primary();
5462 PARSER_RECURSION_DEPTH.set(PARSER_RECURSION_DEPTH.get().saturating_sub(1));
5463 result
5464}
5465
5466/// Cond-expression primary: unary tests (-f, -d, ...), binary
5467/// tests (=, !=, <, >, ==, =~, -eq, -ne, ...), and parenthesized
5468/// sub-expressions. Direct port of par_cond_double / par_cond_triple
5469/// / par_cond_multi at parse.c:2626-2731 (chosen by arg count).
5470fn parse_cond_primary() -> Option<ZshCond> {
5471 let s1 = match tok() {
5472 STRING_LEX => {
5473 let s = tokstr().unwrap_or_default();
5474 zshlex();
5475 s
5476 }
5477 _ => return None,
5478 };
5479
5480 skip_cond_separators();
5481
5482 // Check for unary operator. zsh's lexer tokenizes leading `-` as
5483 // `zsh_h::Dash` (`\u{9b}`, `Src/zsh.h:182`) inside gettokstr (lex.c:1390-1400
5484 // LX2_DASH — `-` always becomes Dash, untokenized later). Match
5485 // either form here, and use char-count not byte-count since Dash
5486 // is 2 UTF-8 bytes (`\xc2\x9b`).
5487 let s1_chars: Vec<char> = s1.chars().collect();
5488 if s1_chars.len() == 2 && IS_DASH(s1_chars[0]) {
5489 let s2 = match tok() {
5490 STRING_LEX => {
5491 let s = tokstr().unwrap_or_default();
5492 zshlex();
5493 s
5494 }
5495 _ => return Some(ZshCond::Unary("-n".to_string(), s1)),
5496 };
5497 return Some(ZshCond::Unary(s1, s2));
5498 }
5499
5500 // Check for binary operator. Direct port of zsh/Src/parse.c:2601-2603:
5501 // incond++; /* parentheses do globbing */
5502 // do condlex(); while (COND_SEP());
5503 // incond--; /* parentheses do grouping */
5504 // The bump makes the lexer treat `(` as a literal character inside
5505 // the RHS word (e.g. `[[ x =~ (foo) ]]`) instead of returning Inpar
5506 // and splitting the regex into multiple tokens.
5507 let op = match tok() {
5508 STRING_LEX => {
5509 let s = tokstr().unwrap_or_default();
5510 set_incond(incond() + 1);
5511 zshlex();
5512 set_incond(incond() - 1);
5513 s
5514 }
5515 INANG_TOK => {
5516 set_incond(incond() + 1);
5517 zshlex();
5518 set_incond(incond() - 1);
5519 "<".to_string()
5520 }
5521 OUTANG_TOK => {
5522 set_incond(incond() + 1);
5523 zshlex();
5524 set_incond(incond() - 1);
5525 ">".to_string()
5526 }
5527 _ => return Some(ZshCond::Unary("-n".to_string(), s1)),
5528 };
5529
5530 skip_cond_separators();
5531
5532 let s2 = match tok() {
5533 STRING_LEX => {
5534 let s = tokstr().unwrap_or_default();
5535 zshlex();
5536 s
5537 }
5538 _ => return Some(ZshCond::Binary(s1, op, String::new())),
5539 };
5540
5541 if op == "=~" {
5542 Some(ZshCond::Regex(s1, s2))
5543 } else {
5544 Some(ZshCond::Binary(s1, op, s2))
5545 }
5546}
5547
5548fn skip_cond_separators() {
5549 while tok() == SEPER && {
5550 let s = tokstr();
5551 s.map(|s| !s.contains(';')).unwrap_or(true)
5552 } {
5553 zshlex();
5554 }
5555}
5556
5557/// Parse (( ... )) arithmetic command
5558/// Parse `(( EXPR ))` arithmetic command. C source: parse.c:1810-1834
5559/// `par_dinbrack` (despite the name; the function actually handles
5560/// DINPAR `(( ))` blocks too).
5561fn parse_arith() -> Option<ZshCommand> {
5562 let expr = tokstr().unwrap_or_default();
5563 zshlex();
5564 Some(ZshCommand::Arith(expr))
5565}
5566
5567/// Parse time command
5568/// Parse `time CMD` (POSIX time keyword). Direct port of
5569/// zsh/Src/parse.c:1787 `par_time`. The `time` keyword
5570/// times the execution of the following pipeline / cmd.
5571fn par_time() -> Option<ZshCommand> {
5572 zshlex(); // skip 'time'
5573
5574 // Check if there's a pipeline to time
5575 if tok() == SEPER || tok() == NEWLIN || tok() == ENDINPUT {
5576 Some(ZshCommand::Time(None))
5577 } else {
5578 let sublist = par_sublist();
5579 Some(ZshCommand::Time(sublist.map(Box::new)))
5580 }
5581}
5582
5583/// Check if next token is ()
5584fn peek_inoutpar() -> bool {
5585 tok() == INOUTPAR
5586}
5587
5588/// Skip separator tokens
5589fn skip_separators() {
5590 let mut iterations = 0;
5591 while tok() == SEPER || tok() == NEWLIN {
5592 iterations += 1;
5593 if iterations > 100_000 {
5594 error("skip_separators: too many iterations");
5595 return;
5596 }
5597 zshlex();
5598 }
5599}
5600
5601/// Record a parse error. Direct port of zsh's `zerr` invocation
5602/// from `Src/parse.c:625-633 yyerror`. Sets `errflag |=
5603/// ERRFLAG_ERROR` (when `noerrs == 0`) and emits a diagnostic on
5604/// stderr via `zwarning`.
5605fn error(msg: &str) {
5606 crate::ported::utils::zerr(msg);
5607}
5608
5609// =====================================================================
5610// `bin_zcompile` and wordcode-dump helpers — port of `Src/parse.c:3104+`.
5611//
5612// The wordcode dump format (`.zwc`) is a serialized parse tree zsh can
5613// `mmap()` and dispatch from without re-parsing on every shell start.
5614// File layout (one struct = `FD_PRELEN` `u32`s):
5615// - `pre[0]` = magic word (FD_MAGIC native byte-order, FD_OMAGIC
5616// opposite byte-order).
5617// - `pre[1]` = packed `{flags(8) | other_offset(24)}` byte field.
5618// - `pre[2..12]` = `ZSH_VERSION` C-string padded to 40 bytes.
5619// - `pre[12]` = `fdheaderlen` (total prelude+header word count).
5620// - Then a sequence of `struct fdhead` records, one per function,
5621// each followed by its NUL-terminated name (padded to 4-byte).
5622// - Then the wordcode bytes for every function back-to-back.
5623//
5624// On a little-endian host writing a dump twice: first `FD_MAGIC` for
5625// native readers, then re-walks the body byte-swapped and emits a
5626// second `FD_OMAGIC` copy so big-endian readers can mmap it too.
5627// =====================================================================
5628
5629// File-format constants — port of `Src/parse.c:3104-3150`.
5630
5631/// `#define FD_EXT ".zwc"` from `Src/parse.c:3104`.
5632pub const FD_EXT: &str = ".zwc";
5633
5634/// `#define FD_MINMAP 4096` from `Src/parse.c:3105`. mmap threshold
5635/// — `-M` mode only kicks in when the wordcode body is at least
5636/// this many bytes (otherwise read(2) is preferred).
5637pub const FD_MINMAP: usize = 4096;
5638
5639/// `#define FD_PRELEN 12` from `Src/parse.c:3107`. File-header
5640/// length in u32 words: magic + packed-flags-byte + 10 version words.
5641pub const FD_PRELEN: usize = 12;
5642
5643/// `#define FD_MAGIC 0x04050607` from `Src/parse.c:3108`. Sentinel
5644/// for native-byte-order dumps.
5645pub const FD_MAGIC: u32 = 0x04050607;
5646
5647/// `#define FD_OMAGIC 0x07060504` from `Src/parse.c:3109`. Sentinel
5648/// for opposite-byte-order dumps (byte-swapped FD_MAGIC).
5649pub const FD_OMAGIC: u32 = 0x07060504;
5650
5651/// `#define FDF_MAP 1` from `Src/parse.c:3111`. Bit set when the
5652/// dump should be `mmap()`-ed (`-M` flag) vs read normally (`-R`).
5653pub const FDF_MAP: u32 = 1;
5654
5655/// `#define FDF_OTHER 2` from `Src/parse.c:3112`. Bit indicating
5656/// this dump has an opposite-byte-order copy at `fdother(f)`.
5657pub const FDF_OTHER: u32 = 2;
5658
5659/// `#define FDHF_KSHLOAD 1` from `Src/parse.c:3149`. Function-header
5660/// flag word — `-k` ksh-style autoload marker.
5661pub const FDHF_KSHLOAD: u32 = 1;
5662
5663/// `#define FDHF_ZSHLOAD 2` from `Src/parse.c:3150`. `-z` zsh-style
5664/// autoload marker.
5665pub const FDHF_ZSHLOAD: u32 = 2;
5666
5667/// Port of `struct fdhead` from `Src/parse.c:3116`. One per function
5668/// inside a wordcode dump. All fields are `wordcode` (u32).
5669#[allow(non_camel_case_types)]
5670#[derive(Debug, Clone, Copy)]
5671pub struct fdhead {
5672 /// Offset (in u32 words) to the start of this function's
5673 /// wordcode body inside the dump.
5674 pub start: u32, // c:3117
5675 /// Wordcode-byte length of the body (excludes pattern-prog slots).
5676 pub len: u32, // c:3118
5677 /// Number of compiled patterns the body references.
5678 pub npats: u32, // c:3119
5679 /// Offset of the string table inside `prog->prog`.
5680 pub strs: u32, // c:3120
5681 /// Header-record length in u32 words (record + name).
5682 pub hlen: u32, // c:3121
5683 /// Packed `{ kshload_bits(2) | name_tail_offset(30) }` field.
5684 pub flags: u32, // c:3122
5685}
5686
5687/// Size of `struct fdhead` in `wordcode` (u32) units. Used by all
5688/// the header-walk macros below.
5689pub const FDHEAD_WORDS: usize = std::mem::size_of::<fdhead>() / 4;
5690
5691/// Port of `struct wcfunc` from `Src/parse.c:3158`. Build-time
5692/// per-function aggregate before write_dump emits it. The Rust
5693/// port stores the source-text body inline since the C-side
5694/// `Eprog` ↔ `parse_string` chain isn't fully wired through this
5695/// layer yet (`build_dump` falls back to source-text caching).
5696#[allow(non_camel_case_types)]
5697#[derive(Debug, Clone)]
5698pub struct wcfunc {
5699 pub name: String, // c:3159
5700 pub flags: u32, // c:3161
5701 /// Compiled body wordcode (one `u32` array per fn). Empty until
5702 /// the eprog emit-side lands; `write_dump` then walks each entry.
5703 pub body: Vec<u32>,
5704}
5705
5706// `fdheaderlen` / `fdmagic` / `fdflags` / etc. macros from
5707// `Src/parse.c:3125-3152`. C uses raw pointer arithmetic on a
5708// `Wordcode` (= `u32 *`); the Rust port takes a slice and indexes.
5709
5710/// Port of `fdheaderlen(f)` macro (`Src/parse.c:3125`) — header
5711/// length in u32 words (read from prelude word `FD_PRELEN`).
5712#[inline]
5713pub fn fdheaderlen(f: &[u32]) -> u32 {
5714 f[FD_PRELEN]
5715}
5716
5717/// Port of `fdmagic(f)` macro (`Src/parse.c:3127`) — first prelude
5718/// word, either `FD_MAGIC` or `FD_OMAGIC`.
5719#[inline]
5720pub fn fdmagic(f: &[u32]) -> u32 {
5721 f[0]
5722}
5723
5724/// Port of `fdflags(f)` macro (`Src/parse.c:3131`) — low byte of
5725/// the packed `pre[1]` word.
5726#[inline]
5727pub fn fdflags(f: &[u32]) -> u32 {
5728 // `pre[1]` is a u32 viewed as 4 bytes; flags = byte 0.
5729 f[1] & 0xff
5730}
5731
5732/// Port of `fdsetflags(f, v)` macro (`Src/parse.c:3132`) — write
5733/// the low byte of `pre[1]`.
5734#[inline]
5735pub fn fdsetflags(f: &mut [u32], v: u8) {
5736 f[1] = (f[1] & !0xff) | (v as u32);
5737}
5738
5739/// Port of `fdother(f)` macro (`Src/parse.c:3133`) — high 24 bits
5740/// of `pre[1]`, holds the byte-offset to the opposite-byte-order
5741/// dump copy.
5742#[inline]
5743pub fn fdother(f: &[u32]) -> u32 {
5744 (f[1] >> 8) & 0x00ff_ffff
5745}
5746
5747/// Port of `fdsetother(f, o)` macro (`Src/parse.c:3134`).
5748#[inline]
5749pub fn fdsetother(f: &mut [u32], o: u32) {
5750 f[1] = (f[1] & 0xff) | ((o & 0x00ff_ffff) << 8);
5751}
5752
5753/// Port of `fdversion(f)` macro (`Src/parse.c:3140`) — read the
5754/// `ZSH_VERSION` C-string from `pre[2..]`.
5755pub fn fdversion(f: &[u32]) -> String {
5756 let bytes: Vec<u8> = f[2..]
5757 .iter()
5758 .take(10)
5759 .flat_map(|w| w.to_le_bytes().into_iter())
5760 .collect();
5761 let end = bytes.iter().position(|&b| b == 0).unwrap_or(bytes.len());
5762 String::from_utf8_lossy(&bytes[..end]).into_owned()
5763}
5764
5765/// Port of `firstfdhead(f)` macro (`Src/parse.c:3142`) — pointer
5766/// to the first `struct fdhead` past the prelude.
5767#[inline]
5768pub fn firstfdhead_offset() -> usize {
5769 FD_PRELEN
5770}
5771
5772/// Port of `nextfdhead(f)` macro (`Src/parse.c:3143`) — advance to
5773/// the next header by reading the current `hlen` slot.
5774#[inline]
5775pub fn nextfdhead_offset(f: &[u32], cur: usize) -> usize {
5776 cur + (f[cur + 4] as usize) // .hlen is field 4 of fdhead
5777}
5778
5779/// Port of `fdhflags(f)` macro (`Src/parse.c:3145`) — low 2 bits
5780/// of the header's `flags` field (the kshload/zshload marker).
5781#[inline]
5782pub fn fdhflags(h: &fdhead) -> u32 {
5783 h.flags & 0x3
5784}
5785
5786/// Port of `fdhtail(f)` macro (`Src/parse.c:3146`) — high 30 bits
5787/// of `flags`, byte offset from the name start to its basename.
5788#[inline]
5789pub fn fdhtail(h: &fdhead) -> u32 {
5790 h.flags >> 2
5791}
5792
5793/// Port of `fdhbldflags(f, t)` macro (`Src/parse.c:3147`) — pack
5794/// `(flags, tail)` into one u32 (low 2 bits = flags, high 30 = tail).
5795#[inline]
5796pub fn fdhbldflags(flags: u32, tail: u32) -> u32 {
5797 flags | (tail << 2)
5798}
5799
5800/// Port of `fdname(f)` macro (`Src/parse.c:3152`) — name string
5801/// follows the fdhead record immediately. Reads bytes from the
5802/// dump buffer until NUL.
5803pub fn fdname(buf: &[u32], header_offset: usize) -> String {
5804 let name_word_off = header_offset + FDHEAD_WORDS;
5805 let bytes: Vec<u8> = buf[name_word_off..]
5806 .iter()
5807 .flat_map(|w| w.to_le_bytes().into_iter())
5808 .take_while(|&b| b != 0)
5809 .collect();
5810 String::from_utf8_lossy(&bytes).into_owned()
5811}
5812
5813/// Decode a `fdhead` record at the given u32-word offset in the
5814/// dump buffer. Used by the header-walk loops in `bin_zcompile -t`.
5815pub fn read_fdhead(buf: &[u32], offset: usize) -> Option<fdhead> {
5816 if offset + FDHEAD_WORDS > buf.len() {
5817 return None;
5818 }
5819 Some(fdhead {
5820 start: buf[offset],
5821 len: buf[offset + 1],
5822 npats: buf[offset + 2],
5823 strs: buf[offset + 3],
5824 hlen: buf[offset + 4],
5825 flags: buf[offset + 5],
5826 })
5827}
5828
5829/// Port of `fdswap(Wordcode p, int n)` from `Src/parse.c:3318`.
5830/// Byte-swap each u32 in `p[..n]` in place. Used when writing the
5831/// opposite-byte-order copy of a wordcode dump.
5832pub fn fdswap(p: &mut [u32]) {
5833 // c:3318
5834 for w in p.iter_mut() {
5835 *w = w.swap_bytes();
5836 }
5837}
5838
5839/// Port of `dump_find_func(Wordcode h, char *name)` from
5840/// `Src/parse.c:3167`. Walks the header table inside a loaded
5841/// dump for a function with the given basename; returns true on hit.
5842pub fn dump_find_func(h: &[u32], name: &str) -> bool {
5843 // c:3167
5844 let header_words = fdheaderlen(h) as usize;
5845 let end = header_words; // walking u32 offsets, end-exclusive
5846 let mut cur = firstfdhead_offset();
5847 while cur < end {
5848 if let Some(fh) = read_fdhead(h, cur) {
5849 let full = fdname(h, cur);
5850 let tail = fdhtail(&fh) as usize;
5851 let basename = if tail <= full.len() {
5852 &full[tail..]
5853 } else {
5854 ""
5855 };
5856 if basename == name {
5857 return true;
5858 }
5859 cur = nextfdhead_offset(h, cur);
5860 } else {
5861 break;
5862 }
5863 }
5864 false
5865}
5866
5867/// Port of `load_dump_header(char *nam, char *name, int err)` from
5868/// `Src/parse.c:3258`. Opens the file, reads + validates the magic
5869/// and version, then slurps the full header table into memory.
5870/// Returns the header u32-array on success or None on any failure
5871/// (emitting C-shaped warnings when `err != 0`).
5872pub fn load_dump_header(nam: &str, name: &str, err: i32) -> Option<Vec<u32>> {
5873 // c:3258
5874
5875 let mut f = match File::open(name) {
5876 // c:3263
5877 Ok(h) => h,
5878 Err(_) => {
5879 if err != 0 {
5880 zwarnnam(nam, &format!("can't open zwc file: {}", name)); // c:3265
5881 }
5882 return None;
5883 }
5884 };
5885
5886 // Read FD_PRELEN+1 u32 words = 52 bytes.
5887 let mut buf_bytes = vec![0u8; (FD_PRELEN + 1) * 4];
5888 if f.read_exact(&mut buf_bytes).is_err() {
5889 if err != 0 {
5890 zwarnnam(nam, &format!("invalid zwc file: {}", name)); // c:3277
5891 }
5892 return None;
5893 }
5894 let mut buf: Vec<u32> = buf_bytes
5895 .chunks_exact(4)
5896 .map(|c| u32::from_le_bytes([c[0], c[1], c[2], c[3]]))
5897 .collect();
5898
5899 // c:3270 — magic + version check. `ZSH_VERSION` (the C-side
5900 // global) — zshrs reports "5.9" in `--zsh` mode (Src/init.c parity).
5901 let magic_ok = fdmagic(&buf) == FD_MAGIC || fdmagic(&buf) == FD_OMAGIC;
5902 let v_ok = fdversion(&buf) == "5.9";
5903 if !magic_ok {
5904 if err != 0 {
5905 zwarnnam(nam, &format!("invalid zwc file: {}", name)); // c:3277
5906 }
5907 return None;
5908 }
5909 if !v_ok {
5910 if err != 0 {
5911 zwarnnam(
5912 nam,
5913 &format!(
5914 "zwc file has wrong version (zsh-{}): {}", // c:3274
5915 fdversion(&buf),
5916 name
5917 ),
5918 );
5919 }
5920 return None;
5921 }
5922
5923 // c:3285 — if magic matches host byte order, head len is `pre[FD_PRELEN]`.
5924 // Else seek to `fdother(buf)` and re-read.
5925 if fdmagic(&buf) != FD_MAGIC {
5926 let other = fdother(&buf) as u64; // c:3290
5927 if f.seek(SeekFrom::Start(other)).is_err() || f.read_exact(&mut buf_bytes).is_err() {
5928 zwarnnam(nam, &format!("invalid zwc file: {}", name)); // c:3295
5929 return None;
5930 }
5931 buf = buf_bytes
5932 .chunks_exact(4)
5933 .map(|c| u32::from_le_bytes([c[0], c[1], c[2], c[3]]))
5934 .collect();
5935 }
5936
5937 let total_words = fdheaderlen(&buf) as usize; // c:3286/3299
5938 if total_words < FD_PRELEN + 1 {
5939 zwarnnam(nam, &format!("invalid zwc file: {}", name));
5940 return None;
5941 }
5942
5943 // Read the remaining header words.
5944 let mut head: Vec<u32> = Vec::with_capacity(total_words);
5945 head.extend_from_slice(&buf);
5946 let remaining_words = total_words - (FD_PRELEN + 1);
5947 if remaining_words > 0 {
5948 let mut rest_bytes = vec![0u8; remaining_words * 4]; // c:3305
5949 if f.read_exact(&mut rest_bytes).is_err() {
5950 zwarnnam(nam, &format!("invalid zwc file: {}", name)); // c:3307
5951 return None;
5952 }
5953 for c in rest_bytes.chunks_exact(4) {
5954 head.push(u32::from_le_bytes([c[0], c[1], c[2], c[3]]));
5955 }
5956 }
5957 Some(head) // c:3311
5958}
5959
5960/// Port of `build_dump(char *nam, char *dump, char **files, int ali, int map, int flags)`
5961/// from `Src/parse.c:3397`. Source-file → wordcode dump compiler.
5962///
5963/// Status: scaffolded but the wordcode-emit step depends on
5964/// `parse_string` returning a fully-wired `Eprog` with `prog/strs/
5965/// npats` fields populated. The current `parse_string`/`parse` shape
5966/// emits an AST (`ZshProgram`) but not yet the wordcode array C
5967/// expects in this dump format. Until that lands, this returns 1
5968/// with a clear "wordcode emit not yet ported" message so callers
5969/// (autoload from `.zwc`, `zcompile path/to/file`) fail loud.
5970pub fn build_dump(
5971 nam: &str, // c:3397
5972 dump: &str,
5973 _files: &[String],
5974 _ali: i32,
5975 _map: i32,
5976 _flags: u32,
5977) -> i32 {
5978 crate::ported::utils::zwarnnam(nam, &format!("{}: wordcode dump emit not yet ported", dump));
5979 1
5980}
5981
5982/// Port of `build_cur_dump(char *nam, char *dump, char **names, int match, int map, int what)`
5983/// from `Src/parse.c:3536`. Compiles currently-loaded functions
5984/// (`-c` for functions, `-a` for aliases) into a `.zwc` dump.
5985/// Same wordcode-emit dependency as `build_dump`.
5986pub fn build_cur_dump(
5987 nam: &str, // c:3536
5988 dump: &str,
5989 _names: &[String],
5990 _match_: i32,
5991 _map: i32,
5992 _what: i32,
5993) -> i32 {
5994 crate::ported::utils::zwarnnam(
5995 nam,
5996 &format!("{}: wordcode dump-current emit not yet ported", dump),
5997 );
5998 1
5999}
6000
6001/// Port of `zwcstat(char *filename, struct stat *buf)` from
6002/// `Src/parse.c:3656`. Stats a `.zwc` file, falling back to
6003/// `.zwc.old` if the primary doesn't exist (zsh uses the `.old`
6004/// suffix to keep a previous dump readable while a rewrite is in
6005/// progress).
6006pub fn zwcstat(filename: &str) -> Option<std::fs::Metadata> {
6007 // c:3656
6008 if let Ok(m) = std::fs::metadata(filename) {
6009 return Some(m);
6010 }
6011 let old = format!("{}.old", filename);
6012 std::fs::metadata(&old).ok()
6013}
6014
6015/// Port of `load_dump_file(char *dump, struct stat *sbuf, int other, int len)`
6016/// from `Src/parse.c:3675`. Reads (or mmap()'s) a complete `.zwc`
6017/// file into memory. Returns the u32 buffer or None on I/O error.
6018pub fn load_dump_file(
6019 dump: &str, // c:3675
6020 _sbuf: &std::fs::Metadata,
6021 other: i32,
6022 _len: usize,
6023) -> Option<Vec<u32>> {
6024 let mut f = File::open(dump).ok()?;
6025 if other != 0 {
6026 f.seek(SeekFrom::Start(other as u64)).ok()?;
6027 }
6028 let mut bytes = Vec::new();
6029 f.read_to_end(&mut bytes).ok()?;
6030 Some(
6031 bytes
6032 .chunks_exact(4)
6033 .map(|c| u32::from_le_bytes([c[0], c[1], c[2], c[3]]))
6034 .collect(),
6035 )
6036}
6037
6038/// Port of `try_dump_file(char *path, char *name, char *file, int *ksh, int test_only)`
6039/// from `Src/parse.c:3746`. Tries to load a function from a `.zwc`
6040/// in the given fpath directory. Returns `(found, ksh_load)` —
6041/// stub: returns false until the dump-cache port (`FuncDump`) lands.
6042pub fn try_dump_file(
6043 _path: &str,
6044 _name: &str,
6045 _file: &str, // c:3746
6046 _test_only: bool,
6047) -> Option<(bool, bool)> {
6048 None
6049}
6050
6051/// Port of `try_source_file(char *file)` from `Src/parse.c:3795`.
6052/// Tries `source <file>` then falls back to `source <file>.zwc`.
6053/// Returns the resolved path on hit. Stub: returns None until the
6054/// dump-cache port lands.
6055pub fn try_source_file(_file: &str) -> Option<String> {
6056 // c:3795
6057 None
6058}
6059
6060/// Port of `check_dump_file(char *file, struct stat *sbuf, char *name, int *ksh, int test_only)`
6061/// from `Src/parse.c:3833`. Opens + validates a `.zwc` file,
6062/// returning its loaded buffer or None.
6063pub fn check_dump_file(
6064 _file: &str, // c:3833
6065 _sbuf: &std::fs::Metadata,
6066 _name: &str,
6067 _test_only: bool,
6068) -> Option<(Vec<u32>, bool)> {
6069 None
6070}
6071
6072/// `static FuncDump dumps;` from `Src/parse.c:3652` — head of the
6073/// loaded-`.zwc` linked list. C walks `dumps`/`p->next` directly;
6074/// the Rust port uses a `Mutex<Vec<funcdump>>` indexed by filename
6075/// so refcount ops can find an entry without raw-pointer compare.
6076pub static DUMPS: std::sync::Mutex<Vec<crate::ported::zsh_h::funcdump>> =
6077 std::sync::Mutex::new(Vec::new());
6078
6079/// Port of `incrdumpcount(FuncDump f)` from `Src/parse.c:3970/4021`.
6080/// `f->count++;` — refcount-up a loaded dump entry. The Rust port
6081/// keys lookup by `filename` because Rust can't raw-pointer-compare
6082/// funcdump values inside a `Mutex<Vec<...>>`; same observable
6083/// effect (the count of the matching entry increments).
6084pub fn incrdumpcount(f: &crate::ported::zsh_h::funcdump) {
6085 // c:3970
6086 let key = f.filename.as_deref();
6087 let mut g = DUMPS.lock().unwrap();
6088 for d in g.iter_mut() {
6089 if d.filename.as_deref() == key {
6090 d.count += 1; // c:3973
6091 return;
6092 }
6093 }
6094}
6095
6096/// Port of `freedump(FuncDump f)` from `Src/parse.c:3976`. C
6097/// `munmap`s, `zclose`s the fd, and frees the struct. The Rust
6098/// port relies on Drop for the `funcdump` (no mmap held in this
6099/// port — `addr`/`map` are byte-offset placeholders), so the
6100/// equivalent is removing the entry from the dumps list. Called
6101/// by `decrdumpcount` when the refcount hits zero (c:3988) and
6102/// by `closedumps` when shutting down (c:4008).
6103fn freedump_locked(
6104 g: &mut std::sync::MutexGuard<'_, Vec<crate::ported::zsh_h::funcdump>>,
6105 filename: &str,
6106) {
6107 // c:3976
6108 g.retain(|d| d.filename.as_deref() != Some(filename));
6109}
6110
6111/// Port of `freedump(FuncDump f)` from `Src/parse.c:3976`. Public
6112/// helper for the rare external caller; locks the dumps mutex and
6113/// drops the entry with the given filename.
6114pub fn freedump(f: &crate::ported::zsh_h::funcdump) {
6115 // c:3976
6116 let mut g = DUMPS.lock().unwrap();
6117 if let Some(name) = f.filename.as_deref() {
6118 freedump_locked(&mut g, name);
6119 }
6120}
6121
6122/// Port of `decrdumpcount(FuncDump f)` from `Src/parse.c:3988/4026`.
6123/// `f->count--; if (!f->count) { unlink from dumps; freedump(f); }`.
6124pub fn decrdumpcount(f: &crate::ported::zsh_h::funcdump) {
6125 // c:3988
6126 let key = f.filename.clone();
6127 let mut g = DUMPS.lock().unwrap();
6128 let mut hit_zero: Option<String> = None;
6129 for d in g.iter_mut() {
6130 if d.filename == key {
6131 d.count -= 1; // c:3991
6132 if d.count == 0 {
6133 // c:3992
6134 hit_zero = d.filename.clone();
6135 }
6136 break;
6137 }
6138 }
6139 if let Some(name) = hit_zero {
6140 // c:3994-4001
6141 freedump_locked(&mut g, &name);
6142 }
6143}
6144
6145/// Port of `closedumps(void)` from `Src/parse.c:4008/4033`. Walks
6146/// `dumps` freeing every entry. Called on shell exit (exec.c:522).
6147pub fn closedumps() {
6148 // c:4008
6149 let mut g = DUMPS.lock().unwrap();
6150 g.clear(); // c:4011-4014 `while (dumps) { ... freedump(...); ... }`
6151}
6152
6153/// Port of `dump_autoload(char *nam, char *file, int on, Options ops, int func)`
6154/// from `Src/parse.c:4042`. Registers every function in a `.zwc`
6155/// for autoload via `shfunctab`. Stub: returns 1 (error) until the
6156/// dump-cache port lands.
6157pub fn dump_autoload(
6158 nam: &str,
6159 file: &str, // c:4042
6160 _on: i32,
6161 _ops: &crate::ported::zsh_h::options,
6162 _func: i32,
6163) -> i32 {
6164 zwarnnam(nam, &format!("{}: zwc-based autoload not yet ported", file));
6165 1
6166}
6167
6168/// Port of `bin_zcompile(char *nam, char **args, Options ops, UNUSED(int func))`
6169/// from `Src/parse.c:3180`. Validates the option set, then dispatches
6170/// to one of: `-t` (test/list), `-c`/`-a` (dump current functions),
6171/// or the default (compile source files to `.zwc`).
6172pub fn bin_zcompile(
6173 nam: &str, // c:3180
6174 args: &[String],
6175 ops: &crate::ported::zsh_h::options,
6176 _func: i32,
6177) -> i32 {
6178 // c:3185-3192 — illegal-combination guard.
6179 if (OPT_ISSET(ops, b'k') && OPT_ISSET(ops, b'z'))
6180 || (OPT_ISSET(ops, b'R') && OPT_ISSET(ops, b'M'))
6181 || (OPT_ISSET(ops, b'c')
6182 && (OPT_ISSET(ops, b'U') || OPT_ISSET(ops, b'k') || OPT_ISSET(ops, b'z')))
6183 || (!(OPT_ISSET(ops, b'c') || OPT_ISSET(ops, b'a')) && OPT_ISSET(ops, b'm'))
6184 {
6185 zwarnnam(nam, "illegal combination of options"); // c:3192
6186 return 1;
6187 }
6188
6189 // c:3194 — `-c`/`-a` + KSHAUTOLOAD warning.
6190 if (OPT_ISSET(ops, b'c') || OPT_ISSET(ops, b'a')) && isset(crate::ported::zsh_h::KSHAUTOLOAD) {
6191 zwarnnam(nam, "functions will use zsh style autoloading"); // c:3195
6192 }
6193
6194 // c:3196-3197 — flag word from `-k` / `-z`.
6195 let flags: u32 = if OPT_ISSET(ops, b'k') {
6196 FDHF_KSHLOAD
6197 } else if OPT_ISSET(ops, b'z') {
6198 FDHF_ZSHLOAD
6199 } else {
6200 0
6201 };
6202
6203 // c:3199 — `-t` test/list mode.
6204 if OPT_ISSET(ops, b't') {
6205 // c:3199
6206 if args.is_empty() {
6207 zwarnnam(nam, "too few arguments"); // c:3202
6208 return 1;
6209 }
6210 let dump_name = if args[0].ends_with(FD_EXT) {
6211 args[0].clone()
6212 } else {
6213 format!("{}{}", args[0], FD_EXT)
6214 };
6215 let f = match load_dump_header(nam, &dump_name, 1) {
6216 // c:3206
6217 Some(buf) => buf,
6218 None => return 1,
6219 };
6220 // c:3209 — per-function check.
6221 if args.len() > 1 {
6222 for name in &args[1..] {
6223 // c:3210
6224 if !dump_find_func(&f, name) {
6225 // c:3212
6226 return 1;
6227 }
6228 }
6229 return 0;
6230 }
6231 // c:3215-3221 — listing arm. Walk every fdhead, print
6232 // each function's full name. C uses `fdname(h)` which
6233 // includes the path prefix; matches our `fdname()` impl.
6234 let mapped = if (fdflags(&f) & FDF_MAP) != 0 {
6235 "mapped"
6236 } else {
6237 "read"
6238 };
6239 println!("zwc file ({}) for zsh-{}", mapped, fdversion(&f));
6240 let header_words = fdheaderlen(&f) as usize;
6241 let mut cur = firstfdhead_offset();
6242 while cur < header_words {
6243 if read_fdhead(&f, cur).is_none() {
6244 break;
6245 }
6246 println!("{}", fdname(&f, cur));
6247 cur = nextfdhead_offset(&f, cur);
6248 }
6249 return 0;
6250 }
6251
6252 if args.is_empty() {
6253 zwarnnam(nam, "too few arguments"); // c:3226
6254 return 1;
6255 }
6256
6257 // c:3228 — map mode discriminant.
6258 let map: i32 = if OPT_ISSET(ops, b'M') {
6259 2
6260 } else if OPT_ISSET(ops, b'R') {
6261 0
6262 } else {
6263 1
6264 };
6265
6266 // c:3230-3236 — single-file default-mode short path.
6267 if args.len() == 1 && !(OPT_ISSET(ops, b'c') || OPT_ISSET(ops, b'a')) {
6268 let dump = format!("{}{}", args[0], FD_EXT);
6269 return build_dump(nam, &dump, args, OPT_ISSET(ops, b'U') as i32, map, flags);
6270 }
6271
6272 // c:3239-3247 — multi-file or `-c`/`-a` mode.
6273 let dump = if args[0].ends_with(FD_EXT) {
6274 args[0].clone()
6275 } else {
6276 format!("{}{}", args[0], FD_EXT)
6277 };
6278 let rest = &args[1..];
6279 if OPT_ISSET(ops, b'c') || OPT_ISSET(ops, b'a') {
6280 let what =
6281 (if OPT_ISSET(ops, b'c') { 1 } else { 0 }) | (if OPT_ISSET(ops, b'a') { 2 } else { 0 });
6282 build_cur_dump(nam, &dump, rest, OPT_ISSET(ops, b'm') as i32, map, what)
6283 } else {
6284 build_dump(nam, &dump, rest, OPT_ISSET(ops, b'U') as i32, map, flags)
6285 }
6286}
6287
6288// =====================================================================
6289// Remaining `Src/parse.c` ports (this section finishes the file).
6290//
6291// Several of these emit into the C-wordcode buffer (`ECBUF`/etc.) and
6292// are kept for completeness — the live zshrs runtime uses the
6293// `ZshProgram` AST path instead, but `bin_zcompile` (`-c`/`-a` modes)
6294// and any future `.zwc`-emit pipeline both call into these.
6295// =====================================================================
6296
6297/// `ecstr(s)` helper — `ecadd(ecstrcode(s))`. Mirrors the C macro at
6298/// `Src/parse.c:482` used everywhere by the par_* emitters.
6299#[inline]
6300pub fn ecstr(s: &str) {
6301 let code = ecstrcode(s);
6302 ecadd(code);
6303}
6304
6305/// Port of `condlex` function-pointer global from `Src/parse.c`. C
6306/// flips this between `zshlex` and `testlex` depending on whether
6307/// we're inside `[[ ]]` vs `/bin/test` builtin. zshrs has no
6308/// separate `testlex` yet, so this just defers to `zshlex`.
6309#[inline]
6310pub fn condlex() {
6311 zshlex();
6312}
6313
6314/// `COND_SEP()` macro from `Src/parse.c:2433`. True when the current
6315/// token is a separator usable inside `[[ … ]]` (newline / semi /
6316/// `&`). C uses it to skip optional whitespace between cond terms.
6317#[inline]
6318pub fn COND_SEP() -> bool {
6319 matches!(tok(), NEWLIN | SEMI | AMPER)
6320}
6321
6322/// Port of `copy_ecstr(Eccstr s, char *p)` from `Src/parse.c:537`.
6323/// Walks the in-build string-eccstr tree and writes each entry to
6324/// `p[s->aoffs..]`. The Rust port mirrors via the
6325/// `ECSTRS_REVERSE` HashMap (eccstr-tree replacement) and writes
6326/// into a `Vec<u8>` slice.
6327pub fn copy_ecstr(table: &std::collections::HashMap<u32, Vec<u8>>, p: &mut [u8]) {
6328 // c:537. Map key is the wordcode-encoded offs from `ecstrcode`
6329 // (`(byte_offset << 2) | token_bit`, parse.c:459); strip the
6330 // low 2 bits to get the real byte offset. Map value is the
6331 // metafied byte form — written verbatim to match C's strs
6332 // region byte-for-byte.
6333 for (&offs, bytes) in table.iter() {
6334 let off = (offs >> 2) as usize;
6335 let need = off + bytes.len() + 1;
6336 if need > p.len() {
6337 continue;
6338 }
6339 p[off..off + bytes.len()].copy_from_slice(bytes);
6340 p[off + bytes.len()] = 0;
6341 }
6342}
6343
6344/// Port of `bld_eprog(int heap)` from `Src/parse.c:547`. Finalizes
6345/// the in-build `ECBUF`/`ECSTRS`/`ECNPATS` state into an `Eprog`.
6346/// Resets the build state so a new parse can start.
6347pub fn bld_eprog(heap: bool) -> crate::ported::zsh_h::eprog {
6348 // c:547
6349
6350 // c:555 — emit WC_END opcode. `WCB_END` is `WC_END_DEFAULT` (0).
6351 ecadd(0);
6352
6353 let ecused = ECUSED.with(|c| c.get()) as usize;
6354 let ecnpats = ECNPATS.with(|c| c.get()) as usize;
6355 let ecsoffs = ECSOFFS.with(|c| c.get()) as usize;
6356
6357 let prog_bytes = ecused * 4; // c:559
6358 let len = (ecnpats * 4) + prog_bytes + ecsoffs;
6359
6360 // Snapshot the wordcode buffer + string table.
6361 let prog_words: Vec<u32> = ECBUF.with(|c| c.borrow()[..ecused].to_vec());
6362 let mut strs_bytes = vec![0u8; ecsoffs];
6363 ECSTRS_REVERSE.with(|c| copy_ecstr(&c.borrow(), &mut strs_bytes));
6364
6365 // c:566 — store strs as raw bytes via from_utf8_unchecked so
6366 // single-byte zsh markers (e.g. Dash 0x9b) survive intact.
6367 // `String::from_utf8_lossy` would replace them with U+FFFD
6368 // (`\xef\xbf\xbd`), breaking byte-for-byte parity with C's
6369 // strs region. SAFETY: downstream consumers of `eprog.strs`
6370 // index by byte offset (per the wordcode `(offs >> 2)` offset
6371 // encoding) and call `.as_bytes()` — they never iterate as
6372 // chars or rely on UTF-8 validity, so storing non-UTF-8 bytes
6373 // in a String is safe in practice. C zsh's strs is `char *`
6374 // with the same byte-not-char semantics.
6375 let strs_string = unsafe { String::from_utf8_unchecked(strs_bytes) };
6376 let ret = eprog {
6377 flags: if heap { EF_HEAP } else { EF_REAL }, // c:570
6378 len: len as i32, // c:559
6379 npats: ecnpats as i32, // c:561
6380 nref: if heap { -1 } else { 1 }, // c:562
6381 pats: Vec::new(), // c:563 dummy_patprog
6382 prog: prog_words, // c:565
6383 strs: Some(strs_string),
6384 shf: None,
6385 dump: None,
6386 };
6387
6388 // c:577 — free ecbuf so next parse starts fresh.
6389 ECBUF.with(|c| c.borrow_mut().clear());
6390 ECLEN.with(|c| c.set(0));
6391 ECUSED.with(|c| c.set(0));
6392 ECNPATS.with(|c| c.set(0));
6393 ECSOFFS.with(|c| c.set(0));
6394 ECSTRS_INDEX.with(|c| c.borrow_mut().clear());
6395 ECSTRS_REVERSE.with(|c| c.borrow_mut().clear());
6396 ECSTRS_TREE.with(|t| *t.borrow_mut() = None);
6397
6398 ret
6399}
6400
6401/// Port of `parse_list(void)` from `Src/parse.c:697`. C-shape entry
6402/// point: drives `par_list` and finalizes via `bld_eprog`. Returns
6403/// `None` on syntax error.
6404pub fn parse_list() -> Option<eprog> {
6405 // c:697
6406 set_tok(ENDINPUT);
6407 init_parse();
6408 zshlex();
6409 let _ = par_list();
6410 if tok() != ENDINPUT {
6411 clear_hdocs();
6412 set_tok(LEXERR);
6413 yyerror("syntax error");
6414 return None;
6415 }
6416 Some(bld_eprog(true))
6417}
6418
6419/// Port of `parse_cond(void)` from `Src/parse.c:722`. Only used by
6420/// `bin_test`/`bin_bracket` for `/bin/test`/`[` compat — the
6421/// `condlex` global must already point at `testlex` before entry.
6422pub fn parse_cond() -> Option<eprog> {
6423 // c:722
6424 init_parse();
6425 if par_cond().is_none() {
6426 clear_hdocs();
6427 return None;
6428 }
6429 Some(bld_eprog(true))
6430}
6431
6432/// Port of `par_sublist2(int *cmplx)` from `Src/parse.c:869`.
6433/// Secondary-sublist arm: handles the `COPROC`/`Bang` prefix
6434/// in front of a pline. Returns the WC_SUBLIST flag word added.
6435pub fn par_sublist2(cmplx: &mut i32) -> Option<i32> {
6436 // c:869
6437 let mut f = 0i32;
6438 if tok() == COPROC {
6439 *cmplx = 1;
6440 f |= WC_SUBLIST_COPROC as i32;
6441 zshlex();
6442 } else if tok() == BANG_TOK {
6443 *cmplx = 1;
6444 f |= WC_SUBLIST_NOT as i32;
6445 zshlex();
6446 }
6447 // c:884 — `if (!par_pline(cmplx) && !f) return -1;`
6448 // The wordcode-emitter call chain (par_sublist_wordcode →
6449 // par_sublist2 → par_pipe_wordcode) needs the wordcode pipe
6450 // emitter, NOT the AST `par_pline`. The previous version called
6451 // `par_pline` which builds AST nodes and never writes to ECBUF —
6452 // the entire wordcode dispatch tree was broken below sublist
6453 // level (every script lexed to LIST + END only, since pipes /
6454 // commands / args never got emitted).
6455 let outer = cmplx_get();
6456 cmplx_set(false);
6457 let ok = par_pipe_wordcode();
6458 *cmplx |= cmplx_get() as i32;
6459 cmplx_set(outer | cmplx_get());
6460 if !ok && f == 0 {
6461 return None;
6462 }
6463 Some(f)
6464}
6465
6466/// Port of `par_dinbrack(void)` from `Src/parse.c:1810`. Body
6467/// parser inside `[[ ... ]]` — calls `par_cond` to emit the
6468/// condition wordcode then advances past `]]`.
6469pub fn par_dinbrack() -> Option<()> {
6470 // c:1810
6471 set_incond(1); // c:1814
6472 set_incmdpos(false); // c:1815
6473 zshlex(); // c:1816
6474 let _ = par_cond(); // c:1817
6475 if tok() != DOUTBRACK {
6476 // c:1818
6477 yyerror("missing ]]");
6478 return None;
6479 }
6480 set_incond(0); // c:1820
6481 set_incmdpos(true); // c:1821
6482 zshlex(); // c:1822
6483 Some(())
6484}
6485
6486/// Port of `par_cond(void)` from `Src/parse.c:2409`. Top-level cond
6487/// OR-chain — drives `par_cond_1` and stitches `||`-separated terms
6488/// with `WCB_COND(COND_OR, …)`. This is the missing top of the
6489/// wordcode cond chain: `par_cond_wordcode` (the par_dinbrack port)
6490/// must call into HERE so that `[[ a || b ]]` and friends land
6491/// real WC_COND opcodes in `ecbuf`. Without this, the wordcode
6492/// emitter for `[[ ... ]]` produced zero words and parity dropped
6493/// 148 words on `/etc/zshrc` alone.
6494pub fn par_cond_top() -> i32 {
6495 // c:2411 — `int p = ecused, r;`
6496 let p = ECUSED.with(|c| c.get()) as usize;
6497 let r = par_cond_1();
6498 while COND_SEP() {
6499 condlex();
6500 }
6501 if tok() == DBAR {
6502 // c:2417 — `condlex(); while (COND_SEP()) condlex();`
6503 condlex();
6504 while COND_SEP() {
6505 condlex();
6506 }
6507 // c:2420-2422 — `ecispace(p, 1); par_cond(); ecbuf[p] =
6508 // WCB_COND(COND_OR, ecused-1-p);`
6509 ecispace(p, 1);
6510 par_cond_top();
6511 let ecused = ECUSED.with(|c| c.get()) as usize;
6512 ECBUF.with(|c| {
6513 c.borrow_mut()[p] = WCB_COND(COND_OR as u32, (ecused - 1 - p) as u32);
6514 });
6515 return 1;
6516 }
6517 r
6518}
6519
6520/// Port of `par_cond_1(void)` from `Src/parse.c:2434`. Parses one
6521/// `||`-separated cond expression. Emits `WCB_COND(COND_AND, …)`
6522/// when an `&&` is found and recurses.
6523pub fn par_cond_1() -> i32 {
6524 // c:2434
6525
6526 let p = ECUSED.with(|c| c.get()) as usize;
6527 let r = par_cond_2();
6528 while COND_SEP() {
6529 condlex();
6530 }
6531 if tok() == DAMPER {
6532 condlex();
6533 while COND_SEP() {
6534 condlex();
6535 }
6536 ecispace(p, 1);
6537 par_cond_1();
6538 let ecused = ECUSED.with(|c| c.get()) as usize;
6539 ECBUF.with(|c| {
6540 c.borrow_mut()[p] = WCB_COND(COND_AND as u32, (ecused - 1 - p) as u32);
6541 });
6542 return 1;
6543 }
6544 r
6545}
6546
6547/// Port of `static int check_cond(const char *input, const char *cond)`
6548/// from `Src/parse.c:2459`. True iff `input` is the two-char `-X`
6549/// form whose `X` matches `cond` — used by par_cond_2 to detect
6550/// `-a` / `-o` n-ary chain operators and by build_dump for `-k` /
6551/// `-z`. C: `return !IS_DASH(input[0]) ? 0 : !strcmp(input+1, cond);`.
6552fn check_cond(input: &str, cond: &str) -> bool {
6553 let mut chars = input.chars();
6554 match chars.next() {
6555 Some(c) if IS_DASH(c) => chars.as_str() == cond,
6556 _ => false,
6557 }
6558}
6559
6560/// Port of `par_cond_2(void)` from `Src/parse.c:2476`. The heavy
6561/// cond-term parser: handles `! cond`, `(cond)`, unary `[ -X arg ]`,
6562/// binary `[ A op B ]`, and `[ A op1 B op2 C … ]` n-ary chains.
6563pub fn par_cond_2() -> i32 {
6564 // c:2476
6565 // `n_testargs` only applies in `testlex` mode (=== /bin/test
6566 // compat). zshrs has no testlex yet, so always 0.
6567 let n_testargs: i32 = 0;
6568
6569 // c:2481 — handled inline; this Rust port skips the n_testargs
6570 // arm since zshrs invokes par_cond via [[ ... ]] only.
6571
6572 while COND_SEP() {
6573 condlex();
6574 }
6575 if tok() == BANG_TOK {
6576 // c:2522 — `[[ ! cond ]]`
6577 condlex();
6578 ecadd(WCB_COND(COND_NOT as u32, 0));
6579 return par_cond_2();
6580 }
6581 if tok() == INPAR_TOK {
6582 // c:2533 — `[[ (cond) ]]`
6583 condlex();
6584 while COND_SEP() {
6585 condlex();
6586 }
6587 let r = par_cond();
6588 while COND_SEP() {
6589 condlex();
6590 }
6591 if tok() != OUTPAR_TOK {
6592 yyerror("missing )");
6593 return 0;
6594 }
6595 condlex();
6596 return r.map_or(0, |_| 1);
6597 }
6598 let s1 = tokstr().unwrap_or_default();
6599 // c:2549 — `dble = (s1 && IS_DASH(*s1) && (!n_testargs ||
6600 // strspn(s1+1, "abcd...") == 1) && !s1[2]);` — IS_DASH covers
6601 // BOTH `-` and Dash (`\u{9b}`). The raw tokstr inside `[[ ... ]]`
6602 // carries Dash as a marker byte, so `starts_with('-')` alone
6603 // matches only ASCII dashes and misses every `-z`, `-d`, `-r`
6604 // etc. — every such cond emitted the AST-only `condition
6605 // expected` error from par_cond_double. Use IS_DASH and count
6606 // chars (Dash is a single code point) instead of bytes.
6607 let s1_chars: Vec<char> = s1.chars().collect();
6608 let dble = !s1_chars.is_empty()
6609 && IS_DASH(s1_chars[0])
6610 && s1_chars.len() == 2
6611 && "abcdefghknoprstuvwxzLONGS".contains(s1_chars[1]);
6612 if tok() != STRING_LEX {
6613 if !s1.is_empty() && tok() != LEXERR && (!dble || n_testargs != 0) {
6614 // c:2486-2497 — `if (n_testargs == 1)` block: under
6615 // POSIXBUILTINS-off, `[ -t ]` rewrites to `[ -t 1 ]`
6616 // (ksh behavior). The C gate is `unset(POSIXBUILTINS)
6617 // && check_cond(s1, "t")`. zshrs's parser has
6618 // n_testargs=0 (no testlex), so this rewrite path is
6619 // unreachable from zshrs's [[ ]] / [ ] entry points;
6620 // wired here as a marker for parity. When testlex is
6621 // ported the call below activates.
6622 if n_testargs == 1 && unset(POSIXBUILTINS) && check_cond(&s1, "t") {
6623 condlex();
6624 return par_cond_double(&s1, "1");
6625 }
6626 // c:2557 — `[[ STRING ]]` re-interpreted as `[[ -n STRING ]]`.
6627 condlex();
6628 while COND_SEP() {
6629 condlex();
6630 }
6631 return par_cond_double("-n", &s1);
6632 }
6633 yyerror("condition expected");
6634 return 0;
6635 }
6636 condlex();
6637 while COND_SEP() {
6638 condlex();
6639 }
6640 if tok() == INANG_TOK || tok() == OUTANG_TOK {
6641 // c:2576 — `<` / `>` string compare.
6642 let xtok = tok();
6643 condlex();
6644 while COND_SEP() {
6645 condlex();
6646 }
6647 if tok() != STRING_LEX {
6648 yyerror("string expected");
6649 return 0;
6650 }
6651 let s3 = tokstr().unwrap_or_default();
6652 condlex();
6653 while COND_SEP() {
6654 condlex();
6655 }
6656 let op = if xtok == INANG_TOK {
6657 COND_STRLT
6658 } else {
6659 COND_STRGTR
6660 };
6661 ecadd(WCB_COND(op as u32, 0));
6662 ecstr(&s1);
6663 ecstr(&s3);
6664 return 1;
6665 }
6666 if tok() != STRING_LEX {
6667 // c:2592 — only one operand seen → `[ -n s1 ]`.
6668 if tok() != LEXERR {
6669 if !dble || n_testargs != 0 {
6670 return par_cond_double("-n", &s1);
6671 }
6672 return par_cond_multi(&s1, &[]);
6673 }
6674 yyerror("syntax error");
6675 return 0;
6676 }
6677 let s2 = tokstr().unwrap_or_default();
6678 set_incond(incond() + 1);
6679 condlex();
6680 while COND_SEP() {
6681 condlex();
6682 }
6683 set_incond(incond() - 1);
6684 if tok() == STRING_LEX && !dble {
6685 let s3 = tokstr().unwrap_or_default();
6686 condlex();
6687 while COND_SEP() {
6688 condlex();
6689 }
6690 if tok() == STRING_LEX {
6691 // c:2615 — n-ary `[ A op B C D ... ]`.
6692 let mut l: Vec<String> = vec![s2, s3];
6693 while tok() == STRING_LEX {
6694 l.push(tokstr().unwrap_or_default());
6695 condlex();
6696 while COND_SEP() {
6697 condlex();
6698 }
6699 }
6700 return par_cond_multi(&s1, &l);
6701 }
6702 return par_cond_triple(&s1, &s2, &s3);
6703 }
6704 par_cond_double(&s1, &s2)
6705}
6706
6707/// Port of `par_cond_double(char *a, char *b)` from `Src/parse.c:2626`.
6708/// Emits wordcode for unary cond `[ -X b ]` or modular `[ -mod b ]`.
6709pub fn par_cond_double(a: &str, b: &str) -> i32 {
6710 // c:2628 — `if (!IS_DASH(a[0]) || !a[1])` — char-based, since
6711 // Dash is a single code point (`\u{9b}`) and `a.len() < 2` on
6712 // BYTES would still pass for "-z" but fail for the marker form
6713 // `\u{9b}z` (2 bytes). Walk by chars.
6714 let ac: Vec<char> = a.chars().collect();
6715 if ac.is_empty() || !IS_DASH(ac[0]) || ac.len() < 2 {
6716 crate::ported::utils::zerr(&format!("parse error: condition expected: {}", a));
6717 return 1;
6718 }
6719 // c:2630 — `else if (!a[2] && strspn(a+1, "abcd...zhLONGS") == 1)`
6720 let unary_set = "abcdefgknoprstuvwxzhLONGS";
6721 if ac.len() == 2 && unary_set.contains(ac[1]) {
6722 // c:2631 — `ecadd(WCB_COND(a[1], 0));` uses the raw cond-op
6723 // letter byte as the opcode payload. Use the ASCII char's
6724 // code-point value directly — every letter in `unary_set`
6725 // fits in 7 bits.
6726 ecadd(WCB_COND(ac[1] as u32, 0));
6727 ecstr(b);
6728 } else {
6729 ecadd(WCB_COND(COND_MOD as u32, 1));
6730 ecstr(a);
6731 ecstr(b);
6732 }
6733 1
6734}
6735
6736/// Port of `par_cond_triple(char *a, char *b, char *c)` from
6737/// `Src/parse.c:2659`. Emits wordcode for the binary forms
6738/// `[ A op B ]` — `=` / `==` / `!=` / `<` / `>` / `=~` / `-X`.
6739///
6740/// C does `(b[0] == Equals || b[0] == '=')` etc., matching BOTH the
6741/// raw ASCII operator char AND its tokenized marker form (Equals =
6742/// `\u{8d}`, Outang = `\u{8e}`, Inang = `\u{91}`, Tilde = `\u{96}`,
6743/// Bang = `\u{8b}`, Dash = `\u{9b}`). Inside `[[ ... ]]` the lexer
6744/// emits the marker bytes — comparing against literal-only `b"=="`
6745/// misses every cond op.
6746pub fn par_cond_triple(a: &str, b: &str, c: &str) -> i32 {
6747 // c:2659
6748 let bc: Vec<char> = b.chars().collect();
6749 let is_eq = |ch: char| ch == '=' || ch == Equals;
6750 let is_gt = |ch: char| ch == '>' || ch == Outang;
6751 let is_lt = |ch: char| ch == '<' || ch == Inang;
6752 let is_tilde = |ch: char| ch == '~' || ch == Tilde;
6753 let is_bang = |ch: char| ch == '!' || ch == Bang;
6754
6755 // c:2663 — `(b[0] == Equals || b[0] == '=') && !b[1]` → `=` (single).
6756 if bc.len() == 1 && is_eq(bc[0]) {
6757 ecadd(WCB_COND(COND_STREQ as u32, 0));
6758 ecstr(a);
6759 ecstr(c);
6760 let np = ECNPATS.with(|cc| { let v = cc.get(); cc.set(v + 1); v }) as u32;
6761 ecadd(np);
6762 return 1;
6763 }
6764 // c:2668-2673 — `(t0 = b[0]=='>' || Outang) || b[0]=='<' || Inang`.
6765 if bc.len() == 1 && (is_gt(bc[0]) || is_lt(bc[0])) {
6766 let op = if is_gt(bc[0]) { COND_STRGTR } else { COND_STRLT };
6767 ecadd(WCB_COND(op as u32, 0));
6768 ecstr(a);
6769 ecstr(c);
6770 let np = ECNPATS.with(|cc| { let v = cc.get(); cc.set(v + 1); v }) as u32;
6771 ecadd(np);
6772 return 1;
6773 }
6774 // c:2674-2679 — `==` STRDEQ.
6775 if bc.len() == 2 && is_eq(bc[0]) && is_eq(bc[1]) {
6776 ecadd(WCB_COND(COND_STRDEQ as u32, 0));
6777 ecstr(a);
6778 ecstr(c);
6779 let np = ECNPATS.with(|cc| { let v = cc.get(); cc.set(v + 1); v }) as u32;
6780 ecadd(np);
6781 return 1;
6782 }
6783 // c:2680-2684 — `!=` STRNEQ.
6784 if bc.len() == 2 && is_bang(bc[0]) && is_eq(bc[1]) {
6785 ecadd(WCB_COND(COND_STRNEQ as u32, 0));
6786 ecstr(a);
6787 ecstr(c);
6788 let np = ECNPATS.with(|cc| { let v = cc.get(); cc.set(v + 1); v }) as u32;
6789 ecadd(np);
6790 return 1;
6791 }
6792 // c:2685-2691 — `=~` REGEX (no pattern slot — implicit COND_MODI).
6793 if bc.len() == 2 && is_eq(bc[0]) && is_tilde(bc[1]) {
6794 ecadd(WCB_COND(COND_REGEX as u32, 0));
6795 ecstr(a);
6796 ecstr(c);
6797 return 1;
6798 }
6799 // c:2692-2702 — `-OP` numeric-or-modular cond (e.g. `-eq`, `-nt`).
6800 if !bc.is_empty() && IS_DASH(bc[0]) {
6801 let rest: String = bc[1..].iter().collect();
6802 let t = get_cond_num(&rest);
6803 if t > -1 {
6804 ecadd(WCB_COND((t + COND_NT) as u32, 0));
6805 ecstr(a);
6806 ecstr(c);
6807 return 1;
6808 }
6809 ecadd(WCB_COND(COND_MODI as u32, 0));
6810 ecstr(b);
6811 ecstr(a);
6812 ecstr(c);
6813 return 1;
6814 }
6815 // c:2703-2707 — `-mod A B C` modular cond on `a`.
6816 let ac: Vec<char> = a.chars().collect();
6817 if !ac.is_empty() && IS_DASH(ac[0]) && ac.len() > 1 {
6818 ecadd(WCB_COND(COND_MOD as u32, 2));
6819 ecstr(a);
6820 ecstr(b);
6821 ecstr(c);
6822 return 1;
6823 }
6824 crate::ported::utils::zerr(&format!("condition expected: {}", b));
6825 1
6826}
6827
6828/// Port of `par_cond_multi(char *a, LinkList l)` from `Src/parse.c:2716`.
6829/// Emits wordcode for `[ -OP A B C … ]` n-ary cond (alternation).
6830pub fn par_cond_multi(a: &str, l: &[String]) -> i32 {
6831 // c:2716 — `if (!IS_DASH(a[0]) || !a[1])`; same Dash/`-` dual
6832 // matching as par_cond_double, char-walked because Dash is a
6833 // single code point.
6834 let ac: Vec<char> = a.chars().collect();
6835 if ac.is_empty() || !IS_DASH(ac[0]) || ac.len() < 2 {
6836 crate::ported::utils::zerr(&format!("condition expected: {}", a));
6837 return 1;
6838 }
6839 ecadd(WCB_COND(COND_MOD as u32, l.len() as u32));
6840 ecstr(a);
6841 for item in l {
6842 ecstr(item);
6843 }
6844 1
6845}
6846
6847/// Port of `cur_add_func(char *nam, Shfunc shf, LinkList names, LinkList progs, int *hlen, int *tlen, int what)`
6848/// from `Src/parse.c:3489`. Adds a shfunc to the in-build dump
6849/// progs+names lists. Stub: `Eprog` for the function body isn't
6850/// yet wired through `shfunc.funcdef` to be serializable here.
6851pub fn cur_add_func(
6852 nam: &str, // c:3489
6853 shf_name: &str,
6854 shf_flags: i32,
6855 names: &mut Vec<String>,
6856 progs: &mut Vec<wcfunc>,
6857 hlen: &mut i32,
6858 tlen: &mut i32,
6859 what: i32,
6860) -> i32 {
6861 let is_undef = (shf_flags as u32 & PM_UNDEFINED) != 0;
6862 if is_undef {
6863 if (what & 2) == 0 {
6864 // c:3498
6865 zwarnnam(nam, &format!("function is not loaded: {}", shf_name));
6866 return 1;
6867 }
6868 // c:3503 — would call `getfpfunc` to load body for dump.
6869 zwarnnam(nam, &format!("can't load function: {}", shf_name));
6870 return 1;
6871 } else if (what & 1) == 0 {
6872 zwarnnam(nam, &format!("function is already loaded: {}", shf_name)); // c:3514
6873 return 1;
6874 }
6875 // c:3517 — would `dupeprog(shf->funcdef)`. Stub: empty body.
6876 let wcf = wcfunc {
6877 name: shf_name.to_string(),
6878 flags: FDHF_ZSHLOAD,
6879 body: Vec::new(),
6880 };
6881 progs.push(wcf);
6882 names.push(shf_name.to_string());
6883
6884 // c:3526 — bump hlen / tlen.
6885 let name_words = (shf_name.len() as i32 + 4) / 4;
6886 *hlen += (FDHEAD_WORDS as i32) + name_words;
6887 *tlen += 0; // body is empty in stub; real path adds prog->len in words.
6888
6889 0
6890}
6891
6892/// Port of `write_dump(int dfd, LinkList progs, int map, int hlen, int tlen)`
6893/// from `Src/parse.c:3334`. Writes the prelude + header records +
6894/// body wordcode bytes to the dump file descriptor.
6895///
6896/// Two passes: first native-byte-order (`FD_MAGIC`), then opposite-
6897/// byte-order (`FD_OMAGIC`) so big-endian readers can mmap the
6898/// same file. Bodies are byte-swapped via `fdswap` on the second pass.
6899pub fn write_dump(
6900 dfd: &mut std::fs::File, // c:3334
6901 progs: &[wcfunc],
6902 mut map: i32,
6903 hlen: i32,
6904 tlen: i32,
6905) -> std::io::Result<()> {
6906 if map == 1 && (tlen as usize) >= FD_MINMAP {
6907 // c:3344
6908 map = 1;
6909 } else if map == 1 {
6910 map = 0;
6911 }
6912
6913 let mut other = 0u32; // c:3338
6914 let ohlen = hlen;
6915 let mut cur_hlen = hlen;
6916
6917 loop {
6918 cur_hlen = ohlen;
6919 // c:3347 — build the prelude.
6920 let mut pre = vec![0u32; FD_PRELEN];
6921 pre[0] = if other != 0 { FD_OMAGIC } else { FD_MAGIC }; // c:3350
6922 let flags = (if map != 0 { FDF_MAP } else { 0 }) | other;
6923 fdsetflags(&mut pre, flags as u8); // c:3351
6924 fdsetother(&mut pre, tlen as u32); // c:3352
6925 // c:3353 — copy ZSH_VERSION C-string into pre[2..].
6926 let ver = b"5.9";
6927 for (i, &b) in ver.iter().enumerate() {
6928 let word = 2 + i / 4;
6929 let shift = (i % 4) * 8;
6930 pre[word] |= (b as u32) << shift;
6931 }
6932 // Write prelude.
6933 for w in &pre {
6934 dfd.write_all(&w.to_le_bytes())?;
6935 }
6936 // c:3356 — per-fn header records.
6937 for wcf in progs {
6938 let n = &wcf.name;
6939 let prog = &wcf.body;
6940 let mut head = fdhead {
6941 start: cur_hlen as u32, // c:3360
6942 len: (prog.len() * 4) as u32, // c:3363
6943 npats: 0, // c:3364 (npats not tracked yet)
6944 strs: 0, // c:3365
6945 hlen: ((FDHEAD_WORDS as u32) + ((n.len() as u32 + 4) / 4)), // c:3366
6946 flags: 0,
6947 };
6948 cur_hlen += prog.len() as i32; // c:3361
6949 // c:3368 — name tail offset from path basename.
6950 let tail = n.rfind('/').map(|p| p + 1).unwrap_or(0);
6951 head.flags = fdhbldflags(wcf.flags, tail as u32); // c:3372
6952 // c:3373 — opposite-byte-order swap on second pass.
6953 let mut head_words: Vec<u32> = vec![
6954 head.start, head.len, head.npats, head.strs, head.hlen, head.flags,
6955 ];
6956 if other != 0 {
6957 fdswap(&mut head_words);
6958 }
6959 for w in &head_words {
6960 dfd.write_all(&w.to_le_bytes())?;
6961 }
6962 // c:3376 — write the name + NUL + pad-to-4.
6963 dfd.write_all(n.as_bytes())?;
6964 dfd.write_all(&[0u8])?;
6965 let pad = (4 - ((n.len() + 1) & 3)) & 3;
6966 if pad > 0 {
6967 dfd.write_all(&vec![0u8; pad])?;
6968 }
6969 }
6970 // c:3381 — per-fn body words.
6971 for wcf in progs {
6972 let mut body = wcf.body.clone();
6973 if other != 0 {
6974 fdswap(&mut body);
6975 }
6976 for w in &body {
6977 dfd.write_all(&w.to_le_bytes())?;
6978 }
6979 }
6980 if other != 0 {
6981 // c:3389
6982 break;
6983 }
6984 other = FDF_OTHER; // c:3391
6985 }
6986 Ok(())
6987}
6988
6989#[cfg(test)]
6990mod tests {
6991 use super::*;
6992 use crate::utils::{errflag, ERRFLAG_ERROR};
6993 use std::fs;
6994 use std::path::Path;
6995 use std::sync::atomic::Ordering;
6996 use std::sync::mpsc;
6997 use std::thread;
6998 use std::time::{Duration, Instant};
6999
7000 /// Test helper. Mirrors zsh's `errflag` save/clear/check pattern
7001 /// around a parse — see `Src/init.c:loop` which clears errflag
7002 /// before parse_event() and tests it after. Returns `Err` if the
7003 /// parse set `ERRFLAG_ERROR`; otherwise `Ok(program)`.
7004 fn parse(input: &str) -> Result<ZshProgram, String> {
7005 let saved = errflag.load(Ordering::Relaxed);
7006 errflag.fetch_and(!ERRFLAG_ERROR, Ordering::Relaxed);
7007 crate::ported::parse::parse_init(input);
7008 let prog = crate::ported::parse::parse();
7009 let had_err = (errflag.load(Ordering::Relaxed) & ERRFLAG_ERROR) != 0;
7010 // Restore prior error bits; don't carry our new error into the
7011 // outer test runner.
7012 errflag.store(saved, Ordering::Relaxed);
7013 if had_err {
7014 Err("parse error".to_string())
7015 } else {
7016 Ok(prog)
7017 }
7018 }
7019
7020 #[test]
7021 fn test_simple_command() {
7022 let prog = parse("echo hello world").unwrap();
7023 assert_eq!(prog.lists.len(), 1);
7024 match &prog.lists[0].sublist.pipe.cmd {
7025 ZshCommand::Simple(s) => {
7026 assert_eq!(s.words, vec!["echo", "hello", "world"]);
7027 }
7028 _ => panic!("expected simple command"),
7029 }
7030 }
7031
7032 #[test]
7033 fn test_pipeline() {
7034 let prog = parse("ls | grep foo | wc -l").unwrap();
7035 assert_eq!(prog.lists.len(), 1);
7036
7037 let pipe = &prog.lists[0].sublist.pipe;
7038 assert!(pipe.next.is_some());
7039
7040 let pipe2 = pipe.next.as_ref().unwrap();
7041 assert!(pipe2.next.is_some());
7042 }
7043
7044 #[test]
7045 fn test_and_or() {
7046 let prog = parse("cmd1 && cmd2 || cmd3").unwrap();
7047 let sublist = &prog.lists[0].sublist;
7048
7049 assert!(sublist.next.is_some());
7050 let (op, _) = sublist.next.as_ref().unwrap();
7051 assert_eq!(*op, SublistOp::And);
7052 }
7053
7054 #[test]
7055 fn test_if_then() {
7056 let prog = parse("if test -f foo; then echo yes; fi").unwrap();
7057 match &prog.lists[0].sublist.pipe.cmd {
7058 ZshCommand::If(_) => {}
7059 _ => panic!("expected if command"),
7060 }
7061 }
7062
7063 #[test]
7064 fn test_for_loop() {
7065 let prog = parse("for i in a b c; do echo $i; done").unwrap();
7066 match &prog.lists[0].sublist.pipe.cmd {
7067 ZshCommand::For(f) => {
7068 assert_eq!(f.var, "i");
7069 match &f.list {
7070 ForList::Words(w) => assert_eq!(w, &vec!["a", "b", "c"]),
7071 _ => panic!("expected word list"),
7072 }
7073 }
7074 _ => panic!("expected for command"),
7075 }
7076 }
7077
7078 #[test]
7079 fn test_case() {
7080 let prog = parse("case $x in a) echo a;; b) echo b;; esac").unwrap();
7081 match &prog.lists[0].sublist.pipe.cmd {
7082 ZshCommand::Case(c) => {
7083 assert_eq!(c.arms.len(), 2);
7084 }
7085 _ => panic!("expected case command"),
7086 }
7087 }
7088
7089 #[test]
7090 fn test_function() {
7091 // First test just parsing "function foo" to see what happens
7092 let prog = parse("function foo { }").unwrap();
7093 match &prog.lists[0].sublist.pipe.cmd {
7094 ZshCommand::FuncDef(f) => {
7095 assert_eq!(f.names, vec!["foo"]);
7096 }
7097 _ => panic!(
7098 "expected function, got {:?}",
7099 prog.lists[0].sublist.pipe.cmd
7100 ),
7101 }
7102 }
7103
7104 #[test]
7105 fn test_redirection() {
7106 let prog = parse("echo hello > file.txt").unwrap();
7107 match &prog.lists[0].sublist.pipe.cmd {
7108 ZshCommand::Simple(s) => {
7109 assert_eq!(s.redirs.len(), 1);
7110 assert_eq!(s.redirs[0].rtype, REDIR_WRITE);
7111 }
7112 _ => panic!("expected simple command"),
7113 }
7114 }
7115
7116 #[test]
7117 fn test_assignment() {
7118 let prog = parse("FOO=bar echo $FOO").unwrap();
7119 match &prog.lists[0].sublist.pipe.cmd {
7120 ZshCommand::Simple(s) => {
7121 assert_eq!(s.assigns.len(), 1);
7122 assert_eq!(s.assigns[0].name, "FOO");
7123 }
7124 _ => panic!("expected simple command"),
7125 }
7126 }
7127
7128 #[test]
7129 fn test_parse_completion_function() {
7130 let input = r#"_2to3_fixes() {
7131 local -a fixes
7132 fixes=( ${${(M)${(f)"$(2to3 --list-fixes 2>/dev/null)"}:#*}//[[:space:]]/} )
7133 (( ${#fixes} )) && _describe -t fixes 'fix' fixes
7134}"#;
7135 let result = parse(input);
7136 assert!(
7137 result.is_ok(),
7138 "Failed to parse completion function: {:?}",
7139 result.err()
7140 );
7141 let prog = result.unwrap();
7142 assert!(
7143 !prog.lists.is_empty(),
7144 "Expected at least one list in program"
7145 );
7146 }
7147
7148 #[test]
7149 fn test_parse_array_with_complex_elements() {
7150 let input = r#"arguments=(
7151 '(- * :)'{-h,--help}'[show this help message and exit]'
7152 {-d,--doctests_only}'[fix up doctests only]'
7153 '*:filename:_files'
7154)"#;
7155 let result = parse(input);
7156 assert!(
7157 result.is_ok(),
7158 "Failed to parse array assignment: {:?}",
7159 result.err()
7160 );
7161 }
7162
7163 #[test]
7164 fn test_parse_full_completion_file() {
7165 let input = r##"#compdef 2to3
7166
7167# zsh completions for '2to3'
7168
7169_2to3_fixes() {
7170 local -a fixes
7171 fixes=( ${${(M)${(f)"$(2to3 --list-fixes 2>/dev/null)"}:#*}//[[:space:]]/} )
7172 (( ${#fixes} )) && _describe -t fixes 'fix' fixes
7173}
7174
7175local -a arguments
7176
7177arguments=(
7178 '(- * :)'{-h,--help}'[show this help message and exit]'
7179 {-d,--doctests_only}'[fix up doctests only]'
7180 {-f,--fix}'[each FIX specifies a transformation; default: all]:fix name:_2to3_fixes'
7181 {-j,--processes}'[run 2to3 concurrently]:number: '
7182 {-x,--nofix}'[prevent a transformation from being run]:fix name:_2to3_fixes'
7183 {-l,--list-fixes}'[list available transformations]'
7184 {-p,--print-function}'[modify the grammar so that print() is a function]'
7185 {-v,--verbose}'[more verbose logging]'
7186 '--no-diffs[do not show diffs of the refactoring]'
7187 {-w,--write}'[write back modified files]'
7188 {-n,--nobackups}'[do not write backups for modified files]'
7189 {-o,--output-dir}'[put output files in this directory instead of overwriting]:directory:_directories'
7190 {-W,--write-unchanged-files}'[also write files even if no changes were required]'
7191 '--add-suffix[append this string to all output filenames]:suffix: '
7192 '*:filename:_files'
7193)
7194
7195_arguments -s -S $arguments
7196"##;
7197 let result = parse(input);
7198 assert!(
7199 result.is_ok(),
7200 "Failed to parse full completion file: {:?}",
7201 result.err()
7202 );
7203 let prog = result.unwrap();
7204 // Should have parsed successfully with at least one statement
7205 assert!(!prog.lists.is_empty(), "Expected at least one list");
7206 }
7207
7208 #[test]
7209 fn test_parse_logs_sh() {
7210 let input = r#"#!/usr/bin/env bash
7211shopt -s globstar
7212
7213if [[ $(uname) == Darwin ]]; then
7214 tail -f /var/log/**/*.log /var/log/**/*.out | lolcat
7215else
7216 if [[ $ZPWR_DISTRO_NAME == raspbian ]]; then
7217 tail -f /var/log/**/*.log | lolcat
7218 else
7219 printf "Unsupported...\n" >&2
7220 fi
7221fi
7222"#;
7223 let result = parse(input);
7224 assert!(
7225 result.is_ok(),
7226 "Failed to parse logs.sh: {:?}",
7227 result.err()
7228 );
7229 }
7230
7231 #[test]
7232 fn test_parse_case_with_glob() {
7233 let input = r#"case "$ZPWR_OS_TYPE" in
7234 darwin*) open_cmd='open'
7235 ;;
7236 cygwin*) open_cmd='cygstart'
7237 ;;
7238 linux*)
7239 open_cmd='xdg-open'
7240 ;;
7241esac"#;
7242 let result = parse(input);
7243 assert!(
7244 result.is_ok(),
7245 "Failed to parse case with glob: {:?}",
7246 result.err()
7247 );
7248 }
7249
7250 #[test]
7251 fn test_parse_case_with_nested_if() {
7252 // Test case with nested if and glob patterns
7253 let input = r##"function zpwrGetOpenCommand(){
7254 local open_cmd
7255 case "$ZPWR_OS_TYPE" in
7256 darwin*) open_cmd='open' ;;
7257 cygwin*) open_cmd='cygstart' ;;
7258 linux*)
7259 if [[ "$_zpwr_uname_r" != *icrosoft* ]];then
7260 open_cmd='nohup xdg-open'
7261 fi
7262 ;;
7263 esac
7264}"##;
7265 let result = parse(input);
7266 assert!(result.is_ok(), "Failed to parse: {:?}", result.err());
7267 }
7268
7269 #[test]
7270 fn test_parse_zpwr_scripts() {
7271 let scripts_dir = Path::new("/Users/wizard/.zpwr/scripts");
7272 if !scripts_dir.exists() {
7273 eprintln!("Skipping test: scripts directory not found");
7274 return;
7275 }
7276
7277 let mut total = 0;
7278 let mut passed = 0;
7279 let mut failed_files = Vec::new();
7280 let mut timeout_files = Vec::new();
7281
7282 for ext in &["sh", "zsh"] {
7283 let pattern = scripts_dir.join(format!("*.{}", ext));
7284 if let Ok(entries) = glob::glob(pattern.to_str().unwrap()) {
7285 for entry in entries.flatten() {
7286 total += 1;
7287 let file_path = entry.display().to_string();
7288 let content = match fs::read_to_string(&entry) {
7289 Ok(c) => c,
7290 Err(e) => {
7291 failed_files.push((file_path, format!("read error: {}", e)));
7292 continue;
7293 }
7294 };
7295
7296 // Parse with timeout
7297 let content_clone = content.clone();
7298 let (tx, rx) = mpsc::channel();
7299 let handle = thread::spawn(move || {
7300 let result = parse(&content_clone);
7301 let _ = tx.send(result);
7302 });
7303
7304 match rx.recv_timeout(Duration::from_secs(2)) {
7305 Ok(Ok(_)) => passed += 1,
7306 Ok(Err(err)) => {
7307 failed_files.push((file_path, err));
7308 }
7309 Err(_) => {
7310 timeout_files.push(file_path);
7311 // Thread will be abandoned
7312 }
7313 }
7314 }
7315 }
7316 }
7317
7318 eprintln!("\n=== ZPWR Scripts Parse Results ===");
7319 eprintln!("Passed: {}/{}", passed, total);
7320
7321 if !timeout_files.is_empty() {
7322 eprintln!("\nTimeout files (>2s):");
7323 for file in &timeout_files {
7324 eprintln!(" {}", file);
7325 }
7326 }
7327
7328 if !failed_files.is_empty() {
7329 eprintln!("\nFailed files:");
7330 for (file, err) in &failed_files {
7331 eprintln!(" {} - {}", file, err);
7332 }
7333 }
7334
7335 // Allow some failures initially, but track progress
7336 let pass_rate = if total > 0 {
7337 (passed as f64 / total as f64) * 100.0
7338 } else {
7339 0.0
7340 };
7341 eprintln!("Pass rate: {:.1}%", pass_rate);
7342
7343 // Require at least 50% pass rate for now
7344 assert!(pass_rate >= 50.0, "Pass rate too low: {:.1}%", pass_rate);
7345 }
7346
7347 #[test]
7348 #[ignore] // Uses threads that can't be killed on timeout; use integration test instead
7349 fn test_parse_zsh_stdlib_functions() {
7350 let functions_dir = Path::new(env!("CARGO_MANIFEST_DIR")).join("test_data/zsh_functions");
7351 if !functions_dir.exists() {
7352 eprintln!(
7353 "Skipping test: zsh_functions directory not found at {:?}",
7354 functions_dir
7355 );
7356 return;
7357 }
7358
7359 let mut total = 0;
7360 let mut passed = 0;
7361 let mut failed_files = Vec::new();
7362 let mut timeout_files = Vec::new();
7363
7364 if let Ok(entries) = fs::read_dir(&functions_dir) {
7365 for entry in entries.flatten() {
7366 let path = entry.path();
7367 if !path.is_file() {
7368 continue;
7369 }
7370
7371 total += 1;
7372 let file_path = path.display().to_string();
7373 let content = match fs::read_to_string(&path) {
7374 Ok(c) => c,
7375 Err(e) => {
7376 failed_files.push((file_path, format!("read error: {}", e)));
7377 continue;
7378 }
7379 };
7380
7381 // Parse with timeout
7382 let content_clone = content.clone();
7383 let (tx, rx) = mpsc::channel();
7384 thread::spawn(move || {
7385 let result = parse(&content_clone);
7386 let _ = tx.send(result);
7387 });
7388
7389 match rx.recv_timeout(Duration::from_secs(2)) {
7390 Ok(Ok(_)) => passed += 1,
7391 Ok(Err(err)) => {
7392 failed_files.push((file_path, err));
7393 }
7394 Err(_) => {
7395 timeout_files.push(file_path);
7396 }
7397 }
7398 }
7399 }
7400
7401 eprintln!("\n=== Zsh Stdlib Functions Parse Results ===");
7402 eprintln!("Passed: {}/{}", passed, total);
7403
7404 if !timeout_files.is_empty() {
7405 eprintln!("\nTimeout files (>2s): {}", timeout_files.len());
7406 for file in timeout_files.iter().take(10) {
7407 eprintln!(" {}", file);
7408 }
7409 if timeout_files.len() > 10 {
7410 eprintln!(" ... and {} more", timeout_files.len() - 10);
7411 }
7412 }
7413
7414 if !failed_files.is_empty() {
7415 eprintln!("\nFailed files: {}", failed_files.len());
7416 for (file, err) in failed_files.iter().take(20) {
7417 let filename = Path::new(file)
7418 .file_name()
7419 .unwrap_or_default()
7420 .to_string_lossy();
7421 eprintln!(" {} - {}", filename, err);
7422 }
7423 if failed_files.len() > 20 {
7424 eprintln!(" ... and {} more", failed_files.len() - 20);
7425 }
7426 }
7427
7428 let pass_rate = if total > 0 {
7429 (passed as f64 / total as f64) * 100.0
7430 } else {
7431 0.0
7432 };
7433 eprintln!("Pass rate: {:.1}%", pass_rate);
7434
7435 // Require at least 50% pass rate
7436 assert!(pass_rate >= 50.0, "Pass rate too low: {:.1}%", pass_rate);
7437 }
7438}