zshrs_parse/parser.rs
1//! Zsh parser - Direct port from zsh/Src/parse.c
2//!
3//! This parser takes tokens from the ZshLexer and builds an AST.
4//! It follows the zsh grammar closely, producing structures that
5//! can be executed by the shell executor.
6
7use crate::lexer::ZshLexer;
8use crate::tokens::LexTok;
9use serde::{Deserialize, Serialize};
10
11/// AST node for a complete program (list of commands)
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct ZshProgram {
14 pub lists: Vec<ZshList>,
15}
16
17/// A list is a sequence of sublists separated by ; or & or newline
18#[derive(Debug, Clone, Serialize, Deserialize)]
19pub struct ZshList {
20 pub sublist: ZshSublist,
21 pub flags: ListFlags,
22}
23
24#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
25pub struct ListFlags {
26 /// Run asynchronously (&)
27 pub async_: bool,
28 /// Disown after running (&| or &!)
29 pub disown: bool,
30}
31
32/// A sublist is pipelines connected by && or ||
33#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct ZshSublist {
35 pub pipe: ZshPipe,
36 pub next: Option<(SublistOp, Box<ZshSublist>)>,
37 pub flags: SublistFlags,
38}
39
40#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
41pub enum SublistOp {
42 And, // &&
43 Or, // ||
44}
45
46#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
47pub struct SublistFlags {
48 /// Coproc
49 pub coproc: bool,
50 /// Negated with !
51 pub not: bool,
52}
53
54/// A pipeline is commands connected by |
55#[derive(Debug, Clone, Serialize, Deserialize)]
56pub struct ZshPipe {
57 pub cmd: ZshCommand,
58 pub next: Option<Box<ZshPipe>>,
59 pub lineno: u64,
60 /// `|&` between this stage and the next — merge stderr into the
61 /// pipe so the next stage's stdin sees both stdout AND stderr from
62 /// this stage. When `next` is None this flag is meaningless.
63 #[serde(default)]
64 pub merge_stderr: bool,
65}
66
67/// A command
68#[derive(Debug, Clone, Serialize, Deserialize)]
69pub enum ZshCommand {
70 Simple(ZshSimple),
71 Subsh(Box<ZshProgram>), // (list)
72 Cursh(Box<ZshProgram>), // {list}
73 For(ZshFor),
74 Case(ZshCase),
75 If(ZshIf),
76 While(ZshWhile),
77 Until(ZshWhile),
78 Repeat(ZshRepeat),
79 FuncDef(ZshFuncDef),
80 Time(Option<Box<ZshSublist>>),
81 Cond(ZshCond), // [[ ... ]]
82 Arith(String), // (( ... ))
83 Try(ZshTry), // { ... } always { ... }
84 /// Compound command with trailing redirects:
85 /// `{ cmd } 2>&1`, `(...) >file`, `if ...; fi >file`, etc.
86 /// Simple commands carry redirects in their own struct; this wrapper
87 /// is only used for compound forms.
88 Redirected(Box<ZshCommand>, Vec<ZshRedir>),
89}
90
91/// A simple command (assignments, words, redirections)
92#[derive(Debug, Clone, Serialize, Deserialize)]
93pub struct ZshSimple {
94 pub assigns: Vec<ZshAssign>,
95 pub words: Vec<String>,
96 pub redirs: Vec<ZshRedir>,
97}
98
99/// An assignment
100#[derive(Debug, Clone, Serialize, Deserialize)]
101pub struct ZshAssign {
102 pub name: String,
103 pub value: ZshAssignValue,
104 pub append: bool, // +=
105}
106
107#[derive(Debug, Clone, Serialize, Deserialize)]
108pub enum ZshAssignValue {
109 Scalar(String),
110 Array(Vec<String>),
111}
112
113/// A redirection
114#[derive(Debug, Clone, Serialize, Deserialize)]
115pub struct ZshRedir {
116 pub rtype: RedirType,
117 pub fd: i32,
118 pub name: String,
119 pub heredoc: Option<HereDocInfo>,
120 pub varid: Option<String>, // {var}>file
121 /// Index into ZshLexer.heredocs[] for body lookup. Filled in by
122 /// `parse_redirection` for Heredoc/HeredocDash, then resolved into
123 /// `heredoc.content` by `fill_heredoc_bodies` after process_heredocs
124 /// has run for the line.
125 #[serde(skip)]
126 pub heredoc_idx: Option<usize>,
127}
128
129#[derive(Debug, Clone, Serialize, Deserialize)]
130pub struct HereDocInfo {
131 pub content: String,
132 pub terminator: String,
133 /// Originally-quoted terminator (`<<'EOF'`, `<<"EOF"`). When true the
134 /// body is passed verbatim — no `$var` / `$(cmd)` / `$((expr))`
135 /// expansion. Plain `<<EOF` runs all expansions.
136 #[serde(default)]
137 pub quoted: bool,
138}
139
140/// Redirection type
141#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
142pub enum RedirType {
143 Write, // >
144 Writenow, // >|
145 Append, // >>
146 Appendnow, // >>|
147 Read, // <
148 ReadWrite, // <>
149 Heredoc, // <<
150 HeredocDash, // <<-
151 Herestr, // <<<
152 MergeIn, // <&
153 MergeOut, // >&
154 ErrWrite, // &>
155 ErrWritenow, // &>|
156 ErrAppend, // >>&
157 ErrAppendnow, // >>&|
158 InPipe, // < <(...)
159 OutPipe, // > >(...)
160}
161
162/// For loop
163#[derive(Debug, Clone, Serialize, Deserialize)]
164pub struct ZshFor {
165 pub var: String,
166 pub list: ForList,
167 pub body: Box<ZshProgram>,
168 /// True if this was parsed as `select` rather than `for`. Both share
169 /// the same parser, so the compiler routes on this flag.
170 #[serde(default)]
171 pub is_select: bool,
172}
173
174#[derive(Debug, Clone, Serialize, Deserialize)]
175pub enum ForList {
176 Words(Vec<String>),
177 CStyle {
178 init: String,
179 cond: String,
180 step: String,
181 },
182 Positional,
183}
184
185/// Case statement
186#[derive(Debug, Clone, Serialize, Deserialize)]
187pub struct ZshCase {
188 pub word: String,
189 pub arms: Vec<CaseArm>,
190}
191
192#[derive(Debug, Clone, Serialize, Deserialize)]
193pub struct CaseArm {
194 pub patterns: Vec<String>,
195 pub body: ZshProgram,
196 pub terminator: CaseTerm,
197}
198
199#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
200pub enum CaseTerm {
201 Break, // ;;
202 Continue, // ;&
203 TestNext, // ;|
204}
205
206/// If statement
207#[derive(Debug, Clone, Serialize, Deserialize)]
208pub struct ZshIf {
209 pub cond: Box<ZshProgram>,
210 pub then: Box<ZshProgram>,
211 pub elif: Vec<(ZshProgram, ZshProgram)>,
212 pub else_: Option<Box<ZshProgram>>,
213}
214
215/// While/Until loop
216#[derive(Debug, Clone, Serialize, Deserialize)]
217pub struct ZshWhile {
218 pub cond: Box<ZshProgram>,
219 pub body: Box<ZshProgram>,
220 pub until: bool,
221}
222
223/// Repeat loop
224#[derive(Debug, Clone, Serialize, Deserialize)]
225pub struct ZshRepeat {
226 pub count: String,
227 pub body: Box<ZshProgram>,
228}
229
230/// Function definition
231#[derive(Debug, Clone, Serialize, Deserialize)]
232pub struct ZshFuncDef {
233 pub names: Vec<String>,
234 pub body: Box<ZshProgram>,
235 pub tracing: bool,
236 /// Anonymous-function call args. `() { body } a b` parses as a
237 /// FuncDef (auto-named) with `auto_call_args = Some(vec!["a", "b"])`.
238 /// compile_funcdef registers the function then emits a Simple call
239 /// with these args.
240 #[serde(default)]
241 pub auto_call_args: Option<Vec<String>>,
242 /// Original source text of the function body (the bytes between
243 /// `{` and `}`, without the braces themselves), captured at parse
244 /// time. Populated for `function name { body }` and `function name() { body }`
245 /// forms; left None for the synthesized inline-funcdef recovery
246 /// path. ZshCompiler::compile_funcdef forwards it to
247 /// `BUILTIN_REGISTER_COMPILED_FN` so introspection (`whence`, `which`,
248 /// `${functions[name]}`) has canonical source text.
249 #[serde(default)]
250 pub body_source: Option<String>,
251}
252
253/// Conditional expression [[ ... ]]
254#[derive(Debug, Clone, Serialize, Deserialize)]
255pub enum ZshCond {
256 Not(Box<ZshCond>),
257 And(Box<ZshCond>, Box<ZshCond>),
258 Or(Box<ZshCond>, Box<ZshCond>),
259 Unary(String, String), // -f file, -n str, etc.
260 Binary(String, String, String), // str = pat, a -eq b, etc.
261 Regex(String, String), // str =~ regex
262}
263
264/// Try/always block
265#[derive(Debug, Clone, Serialize, Deserialize)]
266pub struct ZshTry {
267 pub try_block: Box<ZshProgram>,
268 pub always: Box<ZshProgram>,
269}
270
271/// Zsh parameter expansion flags
272#[derive(Debug, Clone, Serialize, Deserialize)]
273pub enum ZshParamFlag {
274 Lower, // L - lowercase
275 Upper, // U - uppercase
276 Capitalize, // C - capitalize words
277 Join(String), // j:sep: - join array with separator
278 JoinNewline, // F - join with newlines
279 Split(String), // s:sep: - split string into array
280 SplitLines, // f - split on newlines
281 SplitWords, // z - split into words (shell parsing)
282 Type, // t - type of variable
283 Words, // w - word splitting
284 Quote, // qq - single-quote always
285 QuoteIfNeeded, // q+ - single-quote only if needed
286 DoubleQuote, // qqq - double-quote
287 DollarQuote, // qqqq - $'...' style
288 QuoteBackslash, // q / b / B - backslash-escape special chars
289 Unique, // u - unique elements only
290 Reverse, // O - reverse sort
291 Sort, // o - sort
292 NumericSort, // n - numeric sort
293 IndexSort, // a - sort in array index order
294 Keys, // k - associative array keys
295 Values, // v - associative array values
296 Length, // # - length (character codes)
297 CountChars, // c - count total characters
298 Expand, // e - perform shell expansions
299 PromptExpand, // % - expand prompt escapes
300 PromptExpandFull, // %% - full prompt expansion
301 Visible, // V - make non-printable chars visible
302 Directory, // D - substitute directory names
303 Head(usize), // [1,n] - first n elements
304 Tail(usize), // [-n,-1] - last n elements
305 PadLeft(usize, char), // l:len:fill: - pad left
306 PadRight(usize, char), // r:len:fill: - pad right
307 Width(usize), // m - use width for padding
308 Match, // M - include matched portion
309 Remove, // R - include non-matched portion (complement of M)
310 Subscript, // S - subscript scanning
311 Parameter, // P - use value as parameter name (indirection)
312 Glob, // ~ - glob patterns in pattern
313 /// `@` flag — force array-context behavior even inside DQ. zsh's
314 /// `"${(@o)arr}"` keeps the sort active and splices each element as
315 /// its own word. Without this, the array-only flags became no-ops
316 /// in DQ.
317 At,
318}
319
320/// List operator (for shell command lists)
321#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
322pub enum ListOp {
323 And, // &&
324 Or, // ||
325 Semi, // ;
326 Amp, // &
327 Newline, // \n
328}
329
330/// Shell word - can be simple literal or complex expansion
331#[derive(Debug, Clone, Serialize, Deserialize)]
332pub enum ShellWord {
333 /// Plain text token. Most ZWC-decoded words land here. Goes through
334 /// `expand_string` (plus glob/tilde/etc. as text-level transforms) for
335 /// final output.
336 Literal(String),
337 /// Concatenation of sub-words. ZWC array decoding produces this with
338 /// child Literals; nothing else constructs it now that the legacy
339 /// hand-rolled parser is gone.
340 Concat(Vec<ShellWord>),
341}
342
343/// Variable modifier for parameter expansion
344#[derive(Debug, Clone, Serialize, Deserialize)]
345pub enum VarModifier {
346 Default(ShellWord),
347 DefaultAssign(ShellWord),
348 Error(ShellWord),
349 Alternate(ShellWord),
350 Length,
351 Substring(i64, Option<i64>),
352 RemovePrefix(ShellWord),
353 RemovePrefixLong(ShellWord),
354 RemoveSuffix(ShellWord),
355 RemoveSuffixLong(ShellWord),
356 Replace(ShellWord, ShellWord),
357 ReplaceAll(ShellWord, ShellWord),
358 /// `${var/#pat/repl}` — anchored at start (prefix only).
359 /// Per Src/subst.c paramsubst's `/`-arm with SUB_START.
360 ReplacePrefix(ShellWord, ShellWord),
361 /// `${var/%pat/repl}` — anchored at end (suffix only).
362 /// Per Src/subst.c paramsubst's `/`-arm with SUB_END.
363 ReplaceSuffix(ShellWord, ShellWord),
364 Upper,
365 Lower,
366}
367
368/// Shell command - the old shell_ast compatible type
369#[derive(Debug, Clone, Serialize, Deserialize)]
370pub enum ShellCommand {
371 Simple(SimpleCommand),
372 Pipeline(Vec<ShellCommand>, bool),
373 List(Vec<(ShellCommand, ListOp)>),
374 Compound(CompoundCommand),
375 FunctionDef(String, Box<ShellCommand>),
376}
377
378/// Simple command with assignments, words, and redirects
379#[derive(Debug, Clone, Serialize, Deserialize)]
380pub struct SimpleCommand {
381 pub assignments: Vec<(String, ShellWord, bool)>,
382 pub words: Vec<ShellWord>,
383 pub redirects: Vec<Redirect>,
384}
385
386/// Redirect
387#[derive(Debug, Clone, Serialize, Deserialize)]
388pub struct Redirect {
389 pub fd: Option<i32>,
390 pub op: RedirectOp,
391 pub target: ShellWord,
392 pub heredoc_content: Option<String>,
393 pub fd_var: Option<String>,
394}
395
396/// Redirect operator
397#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
398pub enum RedirectOp {
399 Write,
400 Append,
401 Read,
402 ReadWrite,
403 Clobber,
404 DupRead,
405 DupWrite,
406 HereDoc,
407 HereString,
408 WriteBoth,
409 AppendBoth,
410}
411
412/// Compound command
413#[derive(Debug, Clone, Serialize, Deserialize)]
414pub enum CompoundCommand {
415 BraceGroup(Vec<ShellCommand>),
416 Subshell(Vec<ShellCommand>),
417 If {
418 conditions: Vec<(Vec<ShellCommand>, Vec<ShellCommand>)>,
419 else_part: Option<Vec<ShellCommand>>,
420 },
421 For {
422 var: String,
423 words: Option<Vec<ShellWord>>,
424 body: Vec<ShellCommand>,
425 },
426 ForArith {
427 init: String,
428 cond: String,
429 step: String,
430 body: Vec<ShellCommand>,
431 },
432 While {
433 condition: Vec<ShellCommand>,
434 body: Vec<ShellCommand>,
435 },
436 Until {
437 condition: Vec<ShellCommand>,
438 body: Vec<ShellCommand>,
439 },
440 Case {
441 word: ShellWord,
442 cases: Vec<(Vec<ShellWord>, Vec<ShellCommand>, CaseTerminator)>,
443 },
444 Select {
445 var: String,
446 words: Option<Vec<ShellWord>>,
447 body: Vec<ShellCommand>,
448 },
449 Coproc {
450 name: Option<String>,
451 body: Box<ShellCommand>,
452 },
453 /// repeat N do ... done
454 Repeat {
455 count: String,
456 body: Vec<ShellCommand>,
457 },
458 /// { try-block } always { always-block }
459 Try {
460 try_body: Vec<ShellCommand>,
461 always_body: Vec<ShellCommand>,
462 },
463 Arith(String),
464 WithRedirects(Box<ShellCommand>, Vec<Redirect>),
465}
466
467/// Case terminator
468#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
469pub enum CaseTerminator {
470 Break,
471 Fallthrough,
472 Continue,
473}
474
475/// Parse errors
476#[derive(Debug, Clone, Serialize, Deserialize)]
477pub struct ParseError {
478 pub message: String,
479 pub line: u64,
480}
481
482impl std::fmt::Display for ParseError {
483 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
484 write!(f, "parse error at line {}: {}", self.line, self.message)
485 }
486}
487
488impl std::error::Error for ParseError {}
489
490/// The Zsh Parser
491pub struct ZshParser<'a> {
492 lexer: ZshLexer<'a>,
493 errors: Vec<ParseError>,
494 /// Global iteration counter to prevent infinite loops
495 global_iterations: usize,
496 /// Recursion depth counter to prevent stack overflow
497 recursion_depth: usize,
498}
499
500const MAX_RECURSION_DEPTH: usize = 500;
501
502/// Saved parse context. Direct port of zsh's `struct parse_stack`
503/// declared in zsh/Src/zsh.h and used by parse.c:295-355
504/// (`parse_context_save` / `parse_context_restore`). Pushes per-
505/// parse-call state so a nested parse (e.g. inside command
506/// substitution) doesn't clobber the outer parse.
507///
508/// zshrs port note: zsh's parse_stack tracks wordcode-buffer state
509/// (ecbuf, eclen, ecused, ecnpats, ecstrs, ecsoffs, ecssub, ecnfunc).
510/// zshrs builds AST trees instead so those fields collapse to a
511/// recursion_depth + global_iterations save. The lexer-side fields
512/// (incmdpos, incond, etc.) live on ZshLexer here so they get saved
513/// via the lexer's own `LexStack` rather than being duplicated here.
514#[derive(Debug, Default, Clone)]
515pub struct ParseStack {
516 pub recursion_depth: usize,
517 pub global_iterations: usize,
518}
519
520/// Walk every ZshRedir in the program and, for any with a `heredoc_idx`,
521/// pull the body+terminator out of `bodies` and stuff into `heredoc`.
522/// `bodies[i]` corresponds to the i-th heredoc registered by the lexer
523/// during scanning (in source order).
524fn fill_heredoc_bodies(prog: &mut ZshProgram, bodies: &[HereDocInfo]) {
525 for list in &mut prog.lists {
526 fill_in_sublist(&mut list.sublist, bodies);
527 }
528}
529
530fn fill_in_sublist(sub: &mut ZshSublist, bodies: &[HereDocInfo]) {
531 fill_in_pipe(&mut sub.pipe, bodies);
532 if let Some(next) = &mut sub.next {
533 fill_in_sublist(&mut next.1, bodies);
534 }
535}
536
537fn fill_in_pipe(pipe: &mut ZshPipe, bodies: &[HereDocInfo]) {
538 fill_in_command(&mut pipe.cmd, bodies);
539 if let Some(next) = &mut pipe.next {
540 fill_in_pipe(next, bodies);
541 }
542}
543
544fn fill_in_command(cmd: &mut ZshCommand, bodies: &[HereDocInfo]) {
545 match cmd {
546 ZshCommand::Simple(s) => {
547 for r in &mut s.redirs {
548 resolve_redir(r, bodies);
549 }
550 }
551 ZshCommand::Subsh(p) | ZshCommand::Cursh(p) => fill_heredoc_bodies(p, bodies),
552 ZshCommand::FuncDef(f) => fill_heredoc_bodies(&mut f.body, bodies),
553 ZshCommand::If(i) => {
554 fill_heredoc_bodies(&mut i.cond, bodies);
555 fill_heredoc_bodies(&mut i.then, bodies);
556 for (c, b) in &mut i.elif {
557 fill_heredoc_bodies(c, bodies);
558 fill_heredoc_bodies(b, bodies);
559 }
560 if let Some(e) = &mut i.else_ {
561 fill_heredoc_bodies(e, bodies);
562 }
563 }
564 ZshCommand::While(w) | ZshCommand::Until(w) => {
565 fill_heredoc_bodies(&mut w.cond, bodies);
566 fill_heredoc_bodies(&mut w.body, bodies);
567 }
568 ZshCommand::For(f) => fill_heredoc_bodies(&mut f.body, bodies),
569 ZshCommand::Case(c) => {
570 for arm in &mut c.arms {
571 fill_heredoc_bodies(&mut arm.body, bodies);
572 }
573 }
574 ZshCommand::Repeat(r) => fill_heredoc_bodies(&mut r.body, bodies),
575 ZshCommand::Time(Some(sublist)) => fill_in_sublist(sublist, bodies),
576 ZshCommand::Try(t) => {
577 fill_heredoc_bodies(&mut t.try_block, bodies);
578 fill_heredoc_bodies(&mut t.always, bodies);
579 }
580 ZshCommand::Redirected(inner, redirs) => {
581 for r in redirs {
582 resolve_redir(r, bodies);
583 }
584 fill_in_command(inner, bodies);
585 }
586 ZshCommand::Time(None) | ZshCommand::Cond(_) | ZshCommand::Arith(_) => {}
587 }
588}
589
590fn resolve_redir(r: &mut ZshRedir, bodies: &[HereDocInfo]) {
591 if let Some(idx) = r.heredoc_idx {
592 if let Some(info) = bodies.get(idx) {
593 r.heredoc = Some(info.clone());
594 }
595 }
596}
597
598/// If `list` is a Simple containing one word that ends in the
599/// `<INPAR><OUTPAR>` token pair (the lexer-port encoding of `()`),
600/// return the bare name. Used by `parse_program_until` to detect
601/// `name() {body}` style function definitions where the lexer
602/// hasn't split the `()` from the name.
603/// Detect the `name() …` shape inside a Simple. Returns the function
604/// name and (when the body was already inlined into the same Simple,
605/// e.g. `foo() echo hi`) the rest of the words as the body's argv.
606/// Returns None for non-funcdef shapes.
607fn simple_name_with_inoutpar(list: &ZshList) -> Option<(Vec<String>, Vec<String>)> {
608 if list.flags.async_ || list.sublist.next.is_some() {
609 return None;
610 }
611 let pipe = &list.sublist.pipe;
612 if pipe.next.is_some() {
613 return None;
614 }
615 let simple = match &pipe.cmd {
616 ZshCommand::Simple(s) => s,
617 _ => return None,
618 };
619 if simple.words.is_empty() || !simple.assigns.is_empty() {
620 return None;
621 }
622 let suffix = "\u{88}\u{8a}"; // INPAR + OUTPAR
623 // Find the FIRST word ending in `()`. zsh accepts the
624 // multi-name shorthand `fna fnb fnc() { body }` (parse.c:
625 // par_funcdef wordlist) — words[0..i-1] are extra names,
626 // words[i] is `lastname()`. Words after are the body argv
627 // (one-line shorthand, `name() cmd args`).
628 let par_idx = simple.words.iter().position(|w| w.ends_with(suffix))?;
629 let mut names: Vec<String> = Vec::with_capacity(par_idx + 1);
630 for w in &simple.words[..par_idx] {
631 // Earlier names must be bare identifiers, NOT contain
632 // tokens that imply they're not function names (no `()`,
633 // no quotes, no expansions). zsh's lexer enforces this
634 // at the wordlist level; we approximate by requiring the
635 // word be an identifier-shaped token after untokenize.
636 let bare = crate::lexer::untokenize(w);
637 let valid = !bare.is_empty()
638 && bare
639 .chars()
640 .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-' || c == '.' || c == '$');
641 if !valid {
642 return None;
643 }
644 names.push(bare);
645 }
646 let last = &simple.words[par_idx];
647 let bare = &last[..last.len() - suffix.len()];
648 if bare.is_empty() {
649 return None;
650 }
651 names.push(crate::lexer::untokenize(bare));
652 let rest = simple.words[par_idx + 1..].to_vec();
653 Some((names, rest))
654}
655
656impl<'a> ZshParser<'a> {
657 /// Create a new parser
658 pub fn new(input: &'a str) -> Self {
659 ZshParser {
660 lexer: ZshLexer::new(input),
661 errors: Vec::new(),
662 global_iterations: 0,
663 recursion_depth: 0,
664 }
665 }
666
667 /// Check iteration limit; returns true if exceeded
668 #[inline]
669 fn check_limit(&mut self) -> bool {
670 self.global_iterations += 1;
671 self.global_iterations > 10_000
672 }
673
674 /// Check recursion depth; returns true if exceeded
675 #[inline]
676 fn check_recursion(&mut self) -> bool {
677 self.recursion_depth > MAX_RECURSION_DEPTH
678 }
679
680 /// Save parse context onto a `ParseStack`. Direct port of
681 /// zsh/Src/parse.c:295-320 `parse_context_save`. Pushes
682 /// recursion_depth + global_iterations and resets to zero so
683 /// a nested parse can't trigger the outer parse's limits.
684 /// Lexer-side state (incmdpos / incond / etc.) saves via the
685 /// lexer's own `LexStack` since those fields live on ZshLexer.
686 pub fn parse_context_save(&mut self, ps: &mut ParseStack) {
687 // parse.c:299-317 — save parser state. zshrs collapses zsh's
688 // wordcode-buffer fields (ecbuf/eclen/ecused/ecnpats/ecstrs/
689 // ecsoffs/ecssub/ecnfunc) into the recursion+iteration pair
690 // since the AST builder doesn't use a flat wordcode buffer.
691 ps.recursion_depth = self.recursion_depth;
692 ps.global_iterations = self.global_iterations;
693 // parse.c:318-319 — clear the buffer + heredoc list so a
694 // nested parse starts from a clean slate.
695 self.recursion_depth = 0;
696 self.global_iterations = 0;
697 }
698
699 /// Restore parse context from a `ParseStack`. Direct port of
700 /// zsh/Src/parse.c:326-355 `parse_context_restore`. Inverse of
701 /// `parse_context_save`. Also clears any half-built AST state
702 /// to prevent leaking into the outer parse.
703 pub fn parse_context_restore(&mut self, ps: &ParseStack) {
704 // parse.c:330-331 — free any in-progress wordcode buffer.
705 // zshrs has no equivalent — AST nodes are owned by their
706 // parent so dropping the parser frees them.
707
708 // parse.c:333-352 — restore saved state.
709 self.recursion_depth = ps.recursion_depth;
710 self.global_iterations = ps.global_iterations;
711
712 // parse.c:354 — `errflag &= ~ERRFLAG_ERROR;` — clear the
713 // error flag so the outer parse sees a clean state. zshrs
714 // tracks errors per-parser; clearing means dropping any
715 // partial errors collected during the nested parse.
716 self.errors.clear();
717 }
718
719 /// Initialize parser status. Direct port of zsh/Src/parse.c:489-503
720 /// `init_parse_status`. Clears the per-parse-call lexer flags
721 /// so a fresh parse starts from cmd-position with no nesting
722 /// state inherited from a prior parse.
723 pub fn init_parse_status(&mut self) {
724 // parse.c:500-502 — `incasepat = incond = inredir = infor =
725 // intypeset = 0; inrepeat_ = 0; incmdpos = 1;`
726 self.lexer.incasepat = 0;
727 self.lexer.incond = 0;
728 self.lexer.inredir = false;
729 self.lexer.infor = 0;
730 self.lexer.intypeset = false;
731 self.lexer.incmdpos = true;
732 }
733
734 /// Initialize parser for a fresh parse. Direct port of
735 /// zsh/Src/parse.c:507-525 `init_parse`. C source allocates a
736 /// fresh wordcode buffer (ecbuf) sized EC_INIT_SIZE, resets the
737 /// per-parse-call counters, and calls init_parse_status. zshrs
738 /// has no flat wordcode buffer (AST is built inline) so this
739 /// function reduces to init_parse_status + recursion_depth/
740 /// global_iterations clear.
741 pub fn init_parse(&mut self) {
742 // parse.c:513-520 — init wordcode buffer. zshrs no-op.
743 self.recursion_depth = 0;
744 self.global_iterations = 0;
745 // parse.c:522 — `init_parse_status();`
746 self.init_parse_status();
747 }
748
749 /// Check whether the parsed program is empty. Direct port of
750 /// zsh/Src/parse.c:583-587 `empty_eprog`. C version checks
751 /// `*p->prog == WCB_END()` (single end-of-wordcode marker).
752 /// zshrs version checks the AST node count.
753 pub fn empty_eprog(prog: &ZshProgram) -> bool {
754 prog.lists.is_empty()
755 }
756
757 /// Clear pending here-document list. Direct port of
758 /// zsh/Src/parse.c:589-600 `clear_hdocs`. The C version walks
759 /// the global `hdocs` linked list and frees each node. zshrs
760 /// stores pending heredocs on the lexer's `heredocs` Vec —
761 /// truncating it has the same effect.
762 pub fn clear_hdocs(&mut self) {
763 self.lexer.heredocs.clear();
764 }
765
766 /// Top-level parse-event entry. Direct port of zsh/Src/parse.c:
767 /// 612-631 `parse_event`. Reads one event from the lexer (a
768 /// sublist optionally followed by SEPER/AMPER/AMPERBANG) and
769 /// returns the resulting ZshProgram.
770 ///
771 /// `endtok` is the token that terminates the event — usually
772 /// ENDINPUT, but for command-style substitutions the closing
773 /// `)` (zsh's CMD_SUBST_CLOSE).
774 ///
775 /// zshrs port note: zsh's parse_event returns an `Eprog` (heap-
776 /// allocated wordcode program). zshrs returns a `ZshProgram`
777 /// (AST root). Same role at the parse-output boundary.
778 pub fn parse_event(&mut self, endtok: LexTok) -> Option<ZshProgram> {
779 // parse.c:616-619 — reset state and prime the lexer.
780 self.lexer.tok = LexTok::Endinput;
781 self.lexer.incmdpos = true;
782 self.lexer.zshlex();
783 // parse.c:620 — `init_parse();`
784 self.init_parse();
785
786 // parse.c:622-625 — drive par_event; on failure clear hdocs.
787 if !self.par_event(endtok) {
788 self.clear_hdocs();
789 return None;
790 }
791 // parse.c:626-628 — if endtok != ENDINPUT, this is a sub-
792 // parse for a substitution that doesn't need its own eprog.
793 // zshrs returns an empty program in that case (caller
794 // discards).
795 if endtok != LexTok::Endinput {
796 return Some(ZshProgram { lists: Vec::new() });
797 }
798 // parse.c:630 — `bld_eprog(1);` — build the final eprog.
799 // zshrs has already built the AST via parse_program_until,
800 // but parse_event uses par_event directly so we need to
801 // collect what par_event accumulated.
802 Some(self.parse_program_until(None))
803 }
804
805 /// Parse one event (sublist with optional separator). Direct
806 /// port of zsh/Src/parse.c:633-695 `par_event`. Returns true if
807 /// an event was successfully parsed, false on EOF / endtok.
808 ///
809 /// zshrs port note: the C version emits wordcodes via ecadd/
810 /// set_list_code; zshrs's parser builds AST nodes via
811 /// parse_sublist + parse_list. Same flow, different output.
812 pub fn par_event(&mut self, endtok: LexTok) -> bool {
813 // parse.c:639-643 — skip leading SEPERs.
814 while self.lexer.tok == LexTok::Seper {
815 // parse.c:640-641 — at top-level (endtok == ENDINPUT),
816 // a SEPER on a fresh line ends the event.
817 if self.lexer.isnewlin > 0 && endtok == LexTok::Endinput {
818 return false;
819 }
820 self.lexer.zshlex();
821 }
822 // parse.c:644-647 — terminate on EOF or matching close-token.
823 if self.lexer.tok == LexTok::Endinput {
824 return false;
825 }
826 if self.lexer.tok == endtok {
827 return true;
828 }
829 // parse.c:649-... — drive parse_sublist + handle terminator.
830 // zshrs's parse_sublist already builds the AST node directly.
831 match self.parse_sublist() {
832 Some(_) => {
833 // parse.c:651-693 — terminator handling. zshrs's
834 // parse_list wraps this; for parse_event we just
835 // confirm the sublist parsed.
836 true
837 }
838 None => false,
839 }
840 }
841
842 /// Parse one list — non-recursing variant. Direct port of
843 /// zsh/Src/parse.c:807-817 `par_list1`. Like par_list but
844 /// doesn't recurse on the trailing-separator path; used by
845 /// callers that only want one statement (e.g. each arm of a
846 /// case body).
847 pub fn par_list1(&mut self) -> Option<ZshSublist> {
848 // parse.c:810-816 — body is a single par_sublist call wrapped
849 // in the eu/ecused tracking that zshrs doesn't need (no
850 // wordcode buffer).
851 self.parse_sublist()
852 }
853
854 /// Wire a here-document body onto the redirection token that
855 /// requested it. Direct port of zsh/Src/parse.c:2347-2361
856 /// `setheredoc`. Called when a heredoc terminator has been
857 /// matched and the body is ready to be attached to the redir.
858 ///
859 /// zshrs port note: zsh's setheredoc patches the wordcode
860 /// in-place via `pc[1] = ecstrcode(doc); pc[2] = ecstrcode(term);`.
861 /// zshrs threads heredoc bodies through `HereDocInfo` structs
862 /// that resolve_redir applies during the post-parse fill_in pass.
863 /// This method is the AST-side equivalent: writes back to the
864 /// matching redir node by index.
865 pub fn setheredoc(
866 &mut self,
867 _pc: usize,
868 _redir_type: i32,
869 _doc: &str,
870 _term: &str,
871 _munged_term: &str,
872 ) {
873 // zshrs's heredoc resolution happens in fill_in_command /
874 // resolve_redir at parser.rs top. This stub exists for API
875 // parity with the C signature; live wiring happens via
876 // self.lexer.heredocs which the post-parse pass consumes.
877 }
878
879 /// Parse a wordlist for `for ... in WORDS;`. Direct port of
880 /// zsh/Src/parse.c:2362-2378 `par_wordlist`. Reads STRING tokens
881 /// until the next SEPER / SEMI / NEWLIN.
882 pub fn par_wordlist(&mut self) -> Vec<String> {
883 let mut out = Vec::new();
884 // parse.c:2362-2378 — collect STRINGs into the wordlist.
885 while self.lexer.tok == LexTok::String {
886 if let Some(text) = self.lexer.tokstr.clone() {
887 out.push(text);
888 }
889 self.lexer.zshlex();
890 }
891 out
892 }
893
894 /// Parse a newline-separated wordlist. Direct port of
895 /// zsh/Src/parse.c:2379-2398 `par_nl_wordlist`. Like
896 /// par_wordlist but tolerates leading/trailing newlines.
897 pub fn par_nl_wordlist(&mut self) -> Vec<String> {
898 // parse.c:2380-2381 — skip leading newlines.
899 while self.lexer.tok == LexTok::Newlin {
900 self.lexer.zshlex();
901 }
902 let out = self.par_wordlist();
903 // parse.c:2395-2397 — skip trailing newlines.
904 while self.lexer.tok == LexTok::Newlin {
905 self.lexer.zshlex();
906 }
907 out
908 }
909
910 /// Get the integer value of the next token in a cond expression.
911 /// Direct port of zsh/Src/parse.c:2643-2658 `get_cond_num`.
912 /// Used for `[[ N OP M ]]` numeric tests where N/M are integer
913 /// literals or variable references.
914 pub fn get_cond_num(&mut self) -> Option<i64> {
915 if self.lexer.tok != LexTok::String {
916 return None;
917 }
918 let text = self.lexer.tokstr.as_ref()?.clone();
919 // parse.c:2647-2655 — parse as integer with optional sign.
920 let parsed = text.parse::<i64>().ok()?;
921 self.lexer.zshlex();
922 Some(parsed)
923 }
924
925 /// Emit a parser-level error. Direct port of zsh/Src/parse.c
926 /// 2733-2766 `yyerror`. C version fills a per-event error buffer
927 /// and sets errflag. zshrs pushes onto self.errors which the
928 /// caller drains via parse()'s Result return.
929 pub fn yyerror(&mut self, msg: &str) {
930 // parse.c:2735-2765 — zsh's yyerror collects the offending
931 // token's literal text + line number. zshrs already does
932 // this via self.error() with the lexer's toklineno.
933 self.error(msg);
934 }
935
936 // ============================================================
937 // Wordcode emission stubs (parse.c private helpers)
938 //
939 // The following functions are direct counterparts of zsh's
940 // private wordcode-emission helpers in parse.c. zsh uses these
941 // to write u32 opcodes into a flat `ecbuf` array; zshrs builds
942 // an AST tree and never emits wordcode at the parse layer.
943 // The implementations are documented stubs that preserve the
944 // function signatures + cite the C source. Real wordcode would
945 // be emitted later by compile_zsh.rs walking the AST.
946 //
947 // Listed for port-surface completeness so every parse.c symbol
948 // has a Rust counterpart even when the algorithm is moot in the
949 // AST architecture.
950 // ============================================================
951
952 /// Patch a list-placeholder wordcode with its actual opcode +
953 /// jump distance. Direct port of zsh/Src/parse.c:736-749
954 /// `set_list_code`. zsh emits an `ecadd(0)` placeholder before
955 /// par_sublist runs, then comes back through set_list_code to
956 /// rewrite the slot with WCB_LIST(type, distance) once the
957 /// sublist's final length is known.
958 ///
959 /// zshrs port note: zshrs builds AST nodes inline so there's
960 /// no placeholder to patch. The ZshList { sublist, flags }
961 /// node is created with the right flags from the start.
962 /// Stub provided for port-surface completeness.
963 pub fn set_list_code(_p: usize, _type_code: i32, _cmplx: bool) {
964 // parse.c:740-748 — wordcode patching. zshrs no-op.
965 }
966
967 /// Patch a sublist-placeholder wordcode with its actual opcode.
968 /// Direct port of zsh/Src/parse.c:753-763 `set_sublist_code`.
969 /// Same role as set_list_code at the sublist level.
970 pub fn set_sublist_code(_p: usize, _type_code: i32, _flags: i32, _skip: i32, _cmplx: bool) {
971 // parse.c:757-762 — wordcode patching. zshrs no-op.
972 }
973
974 /// Add one wordcode opcode to the buffer. Direct port of
975 /// zsh/Src/parse.c:396-408 `ecadd`. Returns the index of the
976 /// new opcode. zshrs no-op since the AST is built inline.
977 pub fn ecadd(_c: u32) -> usize {
978 // parse.c:399-407 — append to ecbuf with grow-on-demand.
979 // zshrs no-op.
980 0
981 }
982
983 /// Delete a wordcode at position p. Direct port of
984 /// zsh/Src/parse.c:412-421 `ecdel`. zshrs no-op.
985 pub fn ecdel(_p: usize) {
986 // parse.c:415-420 — memmove + decrement ecused. zshrs no-op.
987 }
988
989 /// Encode a string into a wordcode value. Direct port of
990 /// zsh/Src/parse.c:425-471 `ecstrcode`. C source packs short
991 /// strings (≤4 chars) into a single wordcode + uses a binary
992 /// tree (Eccstr) for longer strings; long-string slots are
993 /// de-duplicated via hasher + strcmp. zshrs no-op since the
994 /// AST stores strings directly.
995 pub fn ecstrcode(_s: &str) -> u32 {
996 // parse.c:432-470 — the actual encoding logic. zshrs no-op.
997 0
998 }
999
1000 /// Insert N empty wordcode slots at position p. Direct port of
1001 /// zsh/Src/parse.c:371-388 `ecispace`. Used to reserve space
1002 /// for a forward-jump opcode that will be patched once the
1003 /// jump target is known. zshrs no-op since AST jumps are
1004 /// resolved at compile_zsh time.
1005 pub fn ecispace(_p: usize, _n: usize) {
1006 // parse.c:376-387 — grow + memmove + adjust hdocs. zshrs no-op.
1007 }
1008
1009 /// Adjust pending heredoc pointers when wordcodes shift. Direct
1010 /// port of zsh/Src/parse.c:359-367 `ecadjusthere`. Called
1011 /// internally by ecispace / ecdel after they shift the buffer.
1012 /// zshrs no-op since heredocs are tracked by index in the
1013 /// lexer's Vec, not by absolute wordcode offset.
1014 pub fn ecadjusthere(_p: usize, _d: i32) {
1015 // parse.c:362-366 — walk hdocs list, bump pc by d. zshrs no-op.
1016 }
1017
1018 // ============================================================
1019 // Eprog runtime ops (parse.c:2767-2853)
1020 //
1021 // dupeprog / useeprog / freeeprog are zsh's reference-counting
1022 // helpers for executable programs. zshrs's AST is owned by
1023 // value (Rust ownership); cloning is a tree-deep copy via
1024 // Clone, "use" is a no-op (the executor borrows the AST), and
1025 // "free" is automatic on drop.
1026 // ============================================================
1027
1028 /// Duplicate an Eprog. Direct port of zsh/Src/parse.c:2767-2812
1029 /// `dupeprog`. C version deep-copies the wordcode array + string
1030 /// table + pattern progs. zshrs uses Clone on the AST.
1031 pub fn dupeprog(prog: &ZshProgram) -> ZshProgram {
1032 prog.clone()
1033 }
1034
1035 /// Increment an Eprog's reference count. Direct port of
1036 /// zsh/Src/parse.c:2813-2822 `useeprog`. zshrs no-op (Rust
1037 /// ownership).
1038 pub fn useeprog(_prog: &ZshProgram) {
1039 // parse.c:2815-2821 — `prog->nref++` if not heap-allocated.
1040 // zshrs no-op.
1041 }
1042
1043 /// Decrement / free an Eprog. Direct port of
1044 /// zsh/Src/parse.c:2823-2854 `freeeprog`. zshrs no-op (drop on
1045 /// scope-exit).
1046 pub fn freeeprog(_prog: ZshProgram) {
1047 // parse.c:2825-2853 — decrement nref, free if zero. zshrs
1048 // drops via Rust ownership.
1049 }
1050
1051 // ============================================================
1052 // Wordcode runtime getters (parse.c:2853-3060)
1053 //
1054 // These read packed wordcode out of a running Eprog at execution
1055 // time. zshrs's executor walks the AST directly so these are
1056 // stubs that preserve the C signatures + cite the source.
1057 // ============================================================
1058
1059 /// Read a packed string from the wordcode stream. Direct port of
1060 /// zsh/Src/parse.c:2853-2887 `ecgetstr`. C version unpacks
1061 /// 4-char inline strings + indexes into the strs table for
1062 /// longer ones. zshrs no-op (AST stores strings directly).
1063 pub fn ecgetstr(_dup: bool) -> String {
1064 // parse.c:2858-2886 — wordcode unpack logic. zshrs no-op.
1065 String::new()
1066 }
1067
1068 /// Read a packed string without consuming the wordcode pointer.
1069 /// Direct port of zsh/Src/parse.c:2890-2913 `ecrawstr`. zshrs
1070 /// no-op.
1071 pub fn ecrawstr() -> String {
1072 String::new()
1073 }
1074
1075 /// Read a NUL-terminated string array from wordcode. Direct port
1076 /// of zsh/Src/parse.c:2916-2933 `ecgetarr`. zshrs no-op.
1077 pub fn ecgetarr(_num: usize, _dup: bool) -> Vec<String> {
1078 Vec::new()
1079 }
1080
1081 /// Read a linked-list of strings from wordcode. Direct port of
1082 /// zsh/Src/parse.c:2936-2955 `ecgetlist`. zshrs no-op.
1083 pub fn ecgetlist(_num: usize, _dup: bool) -> Vec<String> {
1084 Vec::new()
1085 }
1086
1087 /// Read a sequence of redirection wordcodes. Direct port of
1088 /// zsh/Src/parse.c:2958-2991 `ecgetredirs`. zshrs no-op
1089 /// (redirections live as AST ZshRedir nodes).
1090 pub fn ecgetredirs() -> Vec<ZshRedir> {
1091 Vec::new()
1092 }
1093
1094 /// Copy consecutive redirection wordcodes into a new Eprog.
1095 /// Direct port of zsh/Src/parse.c:3001-3060 `eccopyredirs`.
1096 /// zshrs no-op.
1097 pub fn eccopyredirs() -> Option<ZshProgram> {
1098 None
1099 }
1100
1101 /// Initialize the dummy Eprog used as a placeholder. Direct port
1102 /// of zsh/Src/parse.c:3068-3075 `init_eprog`. zshrs no-op since
1103 /// the AST has no equivalent dummy node — empty programs are
1104 /// just `ZshProgram { lists: vec![] }`.
1105 pub fn init_eprog() {
1106 // parse.c:3071-3074 — set up dummy_eprog_code = WCB_END().
1107 // zshrs no-op.
1108 }
1109
1110 /// Parse the complete input
1111 pub fn parse(&mut self) -> Result<ZshProgram, Vec<ParseError>> {
1112 self.lexer.zshlex();
1113
1114 let mut program = self.parse_program_until(None);
1115
1116 if !self.errors.is_empty() {
1117 return Err(std::mem::take(&mut self.errors));
1118 }
1119 // Surface lexer-level errors (unmatched quote/heredoc/etc.)
1120 // that the parser silently rolls past. zsh aborts with a
1121 // diagnostic in this case; mirror it.
1122 if let Some(msg) = self.lexer.error.clone() {
1123 return Err(vec![ParseError {
1124 message: msg,
1125 line: 1,
1126 }]);
1127 }
1128
1129 // Post-pass: wire heredoc bodies (collected by lexer.process_heredocs)
1130 // back into ZshRedir.heredoc fields via heredoc_idx.
1131 let bodies: Vec<HereDocInfo> = self
1132 .lexer
1133 .heredocs
1134 .iter()
1135 .map(|h| HereDocInfo {
1136 content: h.content.clone(),
1137 terminator: h.terminator.clone(),
1138 quoted: h.quoted,
1139 })
1140 .collect();
1141 if !bodies.is_empty() {
1142 fill_heredoc_bodies(&mut program, &bodies);
1143 }
1144
1145 Ok(program)
1146 }
1147
1148 /// Parse a program (list of lists)
1149 /// Parse a complete program (top-level entry). Calls
1150 /// parse_program_until with no end-token sentinel. Direct port of
1151 /// zsh/Src/parse.c:614-720 `parse_event` / `parse_list` /
1152 /// `par_event` flow. C distinguishes COND_EVENT (single command
1153 /// for here-string) from full event parse; zshrs's parse_program
1154 /// is the full-event entry.
1155 fn parse_program(&mut self) -> ZshProgram {
1156 self.parse_program_until(None)
1157 }
1158
1159 /// Parse a program until we hit an end token
1160 /// Parse a program until one of `end_tokens` is seen (or EOF).
1161 /// Drives parse_list in a loop. C equivalent: the body of par_event
1162 /// (parse.c:635-695) iterating par_list against the lexer.
1163 fn parse_program_until(&mut self, end_tokens: Option<&[LexTok]>) -> ZshProgram {
1164 let mut lists = Vec::new();
1165
1166 loop {
1167 if self.check_limit() {
1168 self.error("parser exceeded global iteration limit");
1169 break;
1170 }
1171
1172 // Skip separators
1173 while self.lexer.tok == LexTok::Seper || self.lexer.tok == LexTok::Newlin {
1174 if self.check_limit() {
1175 self.error("parser exceeded global iteration limit");
1176 return ZshProgram { lists };
1177 }
1178 self.lexer.zshlex();
1179 }
1180
1181 if self.lexer.tok == LexTok::Endinput || self.lexer.tok == LexTok::Lexerr {
1182 break;
1183 }
1184
1185 // Check for end tokens
1186 if let Some(end_toks) = end_tokens {
1187 if end_toks.contains(&self.lexer.tok) {
1188 break;
1189 }
1190 }
1191
1192 // Also stop at these tokens when not explicitly looking for them
1193 // Note: Else/Elif/Then are NOT here - they're handled by parse_if
1194 // to allow nested if statements inside case arms, loops, etc.
1195 match self.lexer.tok {
1196 LexTok::Outbrace
1197 | LexTok::Dsemi
1198 | LexTok::Semiamp
1199 | LexTok::Semibar
1200 | LexTok::Done
1201 | LexTok::Fi
1202 | LexTok::Esac
1203 | LexTok::Zend => break,
1204 _ => {}
1205 }
1206
1207 match self.parse_list() {
1208 Some(list) => {
1209 let detected = simple_name_with_inoutpar(&list);
1210 lists.push(list);
1211 // Synthesize a FuncDef for the `name() { body }` shape
1212 // at parse time so body_source is captured while the
1213 // lexer still has the input. The lexer port emits
1214 // `name(` as a single Word ending in `<INPAR><OUTPAR>`,
1215 // so the Simple list is followed by an Inbrace once
1216 // separators are skipped. For `name() cmd args` the
1217 // body has already been swallowed into the same
1218 // Simple's words tail — synthesize directly from there.
1219 if let Some((names, body_argv)) = detected {
1220 if !body_argv.is_empty() {
1221 // One-line body already in the Simple. Build
1222 // a Simple from body_argv as the function body.
1223 lists.pop();
1224 let body_simple = ZshCommand::Simple(ZshSimple {
1225 assigns: Vec::new(),
1226 words: body_argv,
1227 redirs: Vec::new(),
1228 });
1229 let body_list = ZshList {
1230 sublist: ZshSublist {
1231 pipe: ZshPipe {
1232 cmd: body_simple,
1233 next: None,
1234 lineno: self.lexer.lineno,
1235 merge_stderr: false,
1236 },
1237 next: None,
1238 flags: SublistFlags::default(),
1239 },
1240 flags: ListFlags::default(),
1241 };
1242 let funcdef = ZshCommand::FuncDef(ZshFuncDef {
1243 names,
1244 body: Box::new(ZshProgram {
1245 lists: vec![body_list],
1246 }),
1247 tracing: false,
1248 auto_call_args: None,
1249 body_source: None,
1250 });
1251 let synthetic = ZshList {
1252 sublist: ZshSublist {
1253 pipe: ZshPipe {
1254 cmd: funcdef,
1255 next: None,
1256 lineno: self.lexer.lineno,
1257 merge_stderr: false,
1258 },
1259 next: None,
1260 flags: SublistFlags::default(),
1261 },
1262 flags: ListFlags::default(),
1263 };
1264 lists.push(synthetic);
1265 continue;
1266 }
1267 // Else: words.len() == 1 (only the trailing `name()`
1268 // word), brace body follows. `names` may carry
1269 // multiple identifiers from the `fna fnb fnc()`
1270 // shorthand — all share the same brace body per
1271 // src/zsh/Src/parse.c:1666 par_funcdef wordlist.
1272 // Skip separators on the real lexer; safe because
1273 // parse_program's next iteration would also skip them.
1274 while self.lexer.tok == LexTok::Seper || self.lexer.tok == LexTok::Newlin {
1275 self.lexer.zshlex();
1276 }
1277 if self.lexer.tok == LexTok::Inbrace {
1278 // Capture body_start BEFORE the lexer
1279 // advances past the first body token. The
1280 // outer zshlex() consumed `{`; lexer.pos
1281 // is now right after `{`. The next
1282 // `zshlex()` would advance past `echo`,
1283 // making body_start land mid-body and
1284 // lose the first word — `typeset -f f`
1285 // printed `a; echo b` instead of
1286 // `echo a; echo b` for `f() { echo a;
1287 // echo b }`.
1288 let body_start = self.lexer.pos;
1289 self.lexer.zshlex();
1290 let body = self.parse_program();
1291 let body_end = if self.lexer.tok == LexTok::Outbrace {
1292 self.lexer.pos.saturating_sub(1)
1293 } else {
1294 self.lexer.pos
1295 };
1296 let body_source = self
1297 .lexer
1298 .input
1299 .get(body_start..body_end)
1300 .map(|s| s.trim().to_string())
1301 .filter(|s| !s.is_empty());
1302 if self.lexer.tok == LexTok::Outbrace {
1303 self.lexer.zshlex();
1304 }
1305 // Replace the Simple list with a FuncDef list.
1306 lists.pop();
1307 let funcdef = ZshCommand::FuncDef(ZshFuncDef {
1308 names,
1309 body: Box::new(body),
1310 tracing: false,
1311 auto_call_args: None,
1312 body_source,
1313 });
1314 let synthetic = ZshList {
1315 sublist: ZshSublist {
1316 pipe: ZshPipe {
1317 cmd: funcdef,
1318 next: None,
1319 lineno: self.lexer.lineno,
1320 merge_stderr: false,
1321 },
1322 next: None,
1323 flags: SublistFlags::default(),
1324 },
1325 flags: ListFlags::default(),
1326 };
1327 lists.push(synthetic);
1328 } else if !matches!(
1329 self.lexer.tok,
1330 LexTok::Endinput | LexTok::Outbrace | LexTok::Seper | LexTok::Newlin
1331 ) {
1332 // No-brace one-line body: `foo() echo hello`.
1333 // Parse a single command for the body.
1334 let body_cmd = self.parse_cmd();
1335 if let Some(cmd) = body_cmd {
1336 let body_list = ZshList {
1337 sublist: ZshSublist {
1338 pipe: ZshPipe {
1339 cmd,
1340 next: None,
1341 lineno: self.lexer.lineno,
1342 merge_stderr: false,
1343 },
1344 next: None,
1345 flags: SublistFlags::default(),
1346 },
1347 flags: ListFlags::default(),
1348 };
1349 lists.pop();
1350 let funcdef = ZshCommand::FuncDef(ZshFuncDef {
1351 names: names.clone(),
1352 body: Box::new(ZshProgram {
1353 lists: vec![body_list],
1354 }),
1355 tracing: false,
1356 auto_call_args: None,
1357 body_source: None,
1358 });
1359 let synthetic = ZshList {
1360 sublist: ZshSublist {
1361 pipe: ZshPipe {
1362 cmd: funcdef,
1363 next: None,
1364 lineno: self.lexer.lineno,
1365 merge_stderr: false,
1366 },
1367 next: None,
1368 flags: SublistFlags::default(),
1369 },
1370 flags: ListFlags::default(),
1371 };
1372 lists.push(synthetic);
1373 }
1374 }
1375 }
1376 }
1377 None => break,
1378 }
1379 }
1380
1381 ZshProgram { lists }
1382 }
1383
1384 /// Parse a list (sublist with optional & or ;).
1385 ///
1386 /// Direct port of zsh/Src/parse.c:771-804 `par_list` (and the
1387 /// par_list1 wrapper at parse.c:807-817).
1388 ///
1389 /// **Structural divergence**: zsh's parse.c emits flat wordcode
1390 /// into the `ecbuf` u32 array via `ecadd(0)` (placeholder),
1391 /// `set_list_code(p, code, complexity)`, `wc_bdata(Z_END)`. zshrs
1392 /// builds an AST node `ZshList { sublist, flags }` instead. The
1393 /// async/sync/disown discrimination at parse.c:785-790 maps to
1394 /// zshrs's `ListFlags { async_, disown }` field — Z_SYNC is the
1395 /// default (no flags), Z_ASYNC = `&` = `async_=true`, Z_DISOWN +
1396 /// Z_ASYNC = `&!`/`&|` = both true. Same semantics, different
1397 /// representation. This divergence is repository-wide: every
1398 /// `par_*` function emits wordcode in C, every `parse_*` builds
1399 /// AST in Rust. The compile_zsh module then traverses the AST to
1400 /// emit fusevm bytecode, which serves the same role as zsh's
1401 /// wordcode but with a different opcode set and execution model.
1402 fn parse_list(&mut self) -> Option<ZshList> {
1403 let sublist = self.parse_sublist()?;
1404
1405 let flags = match self.lexer.tok {
1406 LexTok::Amper => {
1407 self.lexer.zshlex();
1408 ListFlags {
1409 async_: true,
1410 disown: false,
1411 }
1412 }
1413 LexTok::Amperbang => {
1414 self.lexer.zshlex();
1415 ListFlags {
1416 async_: true,
1417 disown: true,
1418 }
1419 }
1420 LexTok::Seper | LexTok::Semi | LexTok::Newlin => {
1421 self.lexer.zshlex();
1422 ListFlags::default()
1423 }
1424 _ => ListFlags::default(),
1425 };
1426
1427 Some(ZshList { sublist, flags })
1428 }
1429
1430 /// Parse a sublist (pipelines connected by && or ||).
1431 ///
1432 /// Direct port of zsh/Src/parse.c:825-867 `par_sublist` and
1433 /// par_sublist2 at parse.c:869-892. par_sublist handles the
1434 /// && / || conjunction and emits WC_SUBLIST opcodes; par_sublist2
1435 /// handles the leading `!` negation and `coproc` keyword.
1436 ///
1437 /// AST mapping: ZshSublist { pipe, conj_chain }, where `conj_chain`
1438 /// is a Vec<(ConjOp, ZshSublist)> for chained && / ||. C uses
1439 /// flat wordcode with WC_SUBLIST_AND / WC_SUBLIST_OR markers.
1440 fn parse_sublist(&mut self) -> Option<ZshSublist> {
1441 self.recursion_depth += 1;
1442 if self.check_recursion() {
1443 self.error("parse_sublist: max recursion depth exceeded");
1444 self.recursion_depth -= 1;
1445 return None;
1446 }
1447
1448 let mut flags = SublistFlags::default();
1449
1450 // Handle coproc and !
1451 if self.lexer.tok == LexTok::Coproc {
1452 flags.coproc = true;
1453 self.lexer.zshlex();
1454 } else if self.lexer.tok == LexTok::Bang {
1455 flags.not = true;
1456 self.lexer.zshlex();
1457 }
1458
1459 let pipe = match self.parse_pipe() {
1460 Some(p) => p,
1461 None => {
1462 self.recursion_depth -= 1;
1463 return None;
1464 }
1465 };
1466
1467 // Check for && or ||
1468 let next = match self.lexer.tok {
1469 LexTok::Damper => {
1470 self.lexer.zshlex();
1471 self.skip_separators();
1472 self.parse_sublist().map(|s| (SublistOp::And, Box::new(s)))
1473 }
1474 LexTok::Dbar => {
1475 self.lexer.zshlex();
1476 self.skip_separators();
1477 self.parse_sublist().map(|s| (SublistOp::Or, Box::new(s)))
1478 }
1479 _ => None,
1480 };
1481
1482 self.recursion_depth -= 1;
1483 Some(ZshSublist { pipe, next, flags })
1484 }
1485
1486 /// Parse a pipeline
1487 /// Parse a pipeline (cmds joined by `|` / `|&`). Direct port of
1488 /// zsh/Src/parse.c:894-956 `par_pline`. AST: ZshPipe { cmds: Vec<ZshCommand> }.
1489 /// C emits WC_PIPE wordcodes per command; same flow.
1490 fn parse_pipe(&mut self) -> Option<ZshPipe> {
1491 self.recursion_depth += 1;
1492 if self.check_recursion() {
1493 self.error("parse_pipe: max recursion depth exceeded");
1494 self.recursion_depth -= 1;
1495 return None;
1496 }
1497
1498 let lineno = self.lexer.toklineno;
1499 let cmd = match self.parse_cmd() {
1500 Some(c) => c,
1501 None => {
1502 self.recursion_depth -= 1;
1503 return None;
1504 }
1505 };
1506
1507 // Check for | or |&
1508 let mut merge_stderr = false;
1509 let next = match self.lexer.tok {
1510 LexTok::Bar | LexTok::Baramp => {
1511 merge_stderr = self.lexer.tok == LexTok::Baramp;
1512 self.lexer.zshlex();
1513 self.skip_separators();
1514 self.parse_pipe().map(Box::new)
1515 }
1516 _ => None,
1517 };
1518
1519 self.recursion_depth -= 1;
1520 Some(ZshPipe {
1521 cmd,
1522 next,
1523 lineno,
1524 merge_stderr,
1525 })
1526 }
1527
1528 /// Parse a command
1529 /// Parse a command — dispatches by leading token (FOR / CASE /
1530 /// IF / WHILE / UNTIL / REPEAT / FUNC / DINBRACK / DINPAR /
1531 /// INPAR subshell / INBRACE current-shell / TIME / NOCORRECT,
1532 /// else simple). Direct port of zsh/Src/parse.c:958-1085 `par_cmd`.
1533 fn parse_cmd(&mut self) -> Option<ZshCommand> {
1534 // Parse leading redirections
1535 let mut redirs = Vec::new();
1536 while self.lexer.tok.is_redirop() {
1537 if let Some(redir) = self.parse_redir() {
1538 redirs.push(redir);
1539 }
1540 }
1541
1542 let cmd = match self.lexer.tok {
1543 LexTok::For | LexTok::Foreach => self.parse_for(),
1544 LexTok::Select => self.parse_select(),
1545 LexTok::Case => self.parse_case(),
1546 LexTok::If => self.parse_if(),
1547 LexTok::While => self.parse_while(false),
1548 LexTok::Until => self.parse_while(true),
1549 LexTok::Repeat => self.parse_repeat(),
1550 LexTok::Inpar => self.parse_subsh(),
1551 LexTok::Inoutpar => self.parse_anon_funcdef(),
1552 LexTok::Inbrace => self.parse_cursh(),
1553 LexTok::Func => self.parse_funcdef(),
1554 LexTok::Dinbrack => self.parse_cond(),
1555 LexTok::Dinpar => self.parse_arith(),
1556 LexTok::Time => self.parse_time(),
1557 _ => self.parse_simple(redirs),
1558 };
1559
1560 // Parse trailing redirections. For Simple commands the redirs were
1561 // already captured inside parse_simple; for compound forms (Cursh,
1562 // Subsh, If, While, etc.) we collect them here and wrap in
1563 // ZshCommand::Redirected so compile_zsh can scope-bracket them.
1564 if let Some(inner) = cmd {
1565 let mut trailing: Vec<ZshRedir> = Vec::new();
1566 while self.lexer.tok.is_redirop() {
1567 if let Some(redir) = self.parse_redir() {
1568 trailing.push(redir);
1569 }
1570 }
1571 if trailing.is_empty() {
1572 return Some(inner);
1573 }
1574 // Simple already absorbed its own redirs (compile path expects
1575 // them on ZshSimple), so don't double-wrap.
1576 if matches!(inner, ZshCommand::Simple(_)) {
1577 if let ZshCommand::Simple(mut s) = inner {
1578 s.redirs.extend(trailing);
1579 return Some(ZshCommand::Simple(s));
1580 }
1581 unreachable!()
1582 }
1583 return Some(ZshCommand::Redirected(Box::new(inner), trailing));
1584 }
1585
1586 None
1587 }
1588
1589 /// Parse a simple command
1590 /// Parse a simple command (assignments + words + redirections).
1591 /// Direct port of zsh/Src/parse.c:1836-2228 `par_simple` —
1592 /// the largest single function in parse.c. Handles ENVSTRING/
1593 /// ENVARRAY assignments at command head, intermixed redirs,
1594 /// typeset-style multi-assignment commands, and the trailing
1595 /// inout-par `()` that converts a simple command into an inline
1596 /// function definition.
1597 fn parse_simple(&mut self, mut redirs: Vec<ZshRedir>) -> Option<ZshCommand> {
1598 let mut assigns = Vec::new();
1599 let mut words = Vec::new();
1600 const MAX_ITERATIONS: usize = 10_000;
1601 let mut iterations = 0;
1602
1603 // Parse leading assignments
1604 while self.lexer.tok == LexTok::Envstring || self.lexer.tok == LexTok::Envarray {
1605 iterations += 1;
1606 if iterations > MAX_ITERATIONS {
1607 self.error("parse_simple: exceeded max iterations in assignments");
1608 return None;
1609 }
1610 if let Some(assign) = self.parse_assign() {
1611 assigns.push(assign);
1612 }
1613 self.lexer.zshlex();
1614 }
1615
1616 // Parse words and redirections
1617 loop {
1618 iterations += 1;
1619 if iterations > MAX_ITERATIONS {
1620 self.error("parse_simple: exceeded max iterations");
1621 return None;
1622 }
1623 match self.lexer.tok {
1624 LexTok::String | LexTok::Typeset => {
1625 let s = self.lexer.tokstr.clone();
1626 if let Some(s) = s {
1627 words.push(s);
1628 }
1629 self.lexer.zshlex();
1630 // Check for function definition foo() { ... }
1631 if words.len() == 1 && self.peek_inoutpar() {
1632 return self.parse_inline_funcdef(words.pop().unwrap());
1633 }
1634 // `{name}>file` named-fd redirect: the lexer doesn't
1635 // recognize this shape, so the bare word `{name}`
1636 // arrives as a String. If it matches `{IDENT}` and
1637 // the NEXT token is a redirop, pop it off as the
1638 // varid for that redir.
1639 if !words.is_empty() && self.lexer.tok.is_redirop() {
1640 let last = words.last().unwrap();
1641 let untoked = crate::lexer::untokenize(last);
1642 if untoked.starts_with('{') && untoked.ends_with('}') && untoked.len() > 2 {
1643 let name = &untoked[1..untoked.len() - 1];
1644 if !name.is_empty()
1645 && name.chars().all(|c| c == '_' || c.is_ascii_alphanumeric())
1646 && name
1647 .chars()
1648 .next()
1649 .map(|c| c == '_' || c.is_ascii_alphabetic())
1650 .unwrap_or(false)
1651 {
1652 let varid = name.to_string();
1653 words.pop();
1654 if let Some(mut redir) = self.parse_redir() {
1655 redir.varid = Some(varid);
1656 redirs.push(redir);
1657 }
1658 continue;
1659 }
1660 }
1661 }
1662 }
1663 _ if self.lexer.tok.is_redirop() => {
1664 match self.parse_redir() {
1665 Some(redir) => redirs.push(redir),
1666 None => break, // Error in redir parsing, stop
1667 }
1668 }
1669 LexTok::Inoutpar if !words.is_empty() => {
1670 // foo() { ... } style function
1671 return self.parse_inline_funcdef(words.pop().unwrap());
1672 }
1673 _ => break,
1674 }
1675 }
1676
1677 if assigns.is_empty() && words.is_empty() && redirs.is_empty() {
1678 return None;
1679 }
1680
1681 Some(ZshCommand::Simple(ZshSimple {
1682 assigns,
1683 words,
1684 redirs,
1685 }))
1686 }
1687
1688 /// Parse an assignment
1689 /// Parse an assignment word `NAME=value` or `NAME=(arr items)`.
1690 /// Sub-routine of parse_simple. The C source handles assignments
1691 /// inline in par_simple via the ENVSTRING/ENVARRAY token paths
1692 /// (parse.c:1842-2000ish); zshrs splits it out to a dedicated
1693 /// helper for clarity.
1694 fn parse_assign(&mut self) -> Option<ZshAssign> {
1695 use crate::tokens::char_tokens;
1696
1697 let tokstr = self.lexer.tokstr.as_ref()?;
1698
1699 // Parse name=value or name+=value.
1700 let (name, value_str, append) = if self.lexer.tok == LexTok::Envarray {
1701 let (name, append) = if let Some(stripped) = tokstr.strip_suffix('+') {
1702 (stripped, true)
1703 } else {
1704 (tokstr.as_str(), false)
1705 };
1706 (name.to_string(), String::new(), append)
1707 } else if let Some(pos) = tokstr.find(char_tokens::EQUALS) {
1708 let name_part = &tokstr[..pos];
1709 let (name, append) = if let Some(stripped) = name_part.strip_suffix('+') {
1710 (stripped, true)
1711 } else {
1712 (name_part, false)
1713 };
1714 (
1715 name.to_string(),
1716 tokstr[pos + char_tokens::EQUALS.len_utf8()..].to_string(),
1717 append,
1718 )
1719 } else if let Some(pos) = tokstr.find('=') {
1720 // Fallback to literal '=' for compatibility
1721 let name_part = &tokstr[..pos];
1722 let (name, append) = if let Some(stripped) = name_part.strip_suffix('+') {
1723 (stripped, true)
1724 } else {
1725 (name_part, false)
1726 };
1727 (name.to_string(), tokstr[pos + 1..].to_string(), append)
1728 } else {
1729 return None;
1730 };
1731
1732 let value = if self.lexer.tok == LexTok::Envarray {
1733 // Array assignment: name=(...)
1734 let mut elements = Vec::new();
1735 self.lexer.zshlex(); // skip past token
1736
1737 let mut arr_iters = 0;
1738 const MAX_ARRAY_ELEMENTS: usize = 10_000;
1739 while matches!(
1740 self.lexer.tok,
1741 LexTok::String | LexTok::Seper | LexTok::Newlin
1742 ) {
1743 arr_iters += 1;
1744 if arr_iters > MAX_ARRAY_ELEMENTS {
1745 self.error("array assignment exceeded maximum elements");
1746 break;
1747 }
1748 if self.lexer.tok == LexTok::String {
1749 if let Some(ref s) = self.lexer.tokstr {
1750 elements.push(s.clone());
1751 }
1752 }
1753 self.lexer.zshlex();
1754 }
1755
1756 // The closing OUTPAR is consumed here. The outer parse_simple
1757 // loop will then `zshlex()` past whatever follows (typically
1758 // a separator or the next word) — calling zshlex twice in
1759 // tandem (here AND in parse_simple) over-advances and merges
1760 // a following `name() { … }` funcdef into the same Simple.
1761 // We only consume Outpar; let the caller handle the rest.
1762 // Without this guard `g=(o1); f() { :; }` parsed as one
1763 // Simple with assigns=[g] and words=["f()"] (one token).
1764 if self.lexer.tok == LexTok::Outpar {
1765 // Note: do NOT zshlex() here. parse_simple's `self.lexer
1766 // .zshlex()` after `parse_assign` returns advances past
1767 // the Outpar onto the next significant token.
1768 //
1769 // Force `incmdpos=true` so the next zshlex() recognizes
1770 // a follow-up `b=(...)` / `b=val` as Envarray/Envstring.
1771 // The lexer flips incmdpos to false on bare Outpar (which
1772 // is correct for subshell-close context), but for an
1773 // array-assignment close more assigns/words may follow.
1774 self.lexer.incmdpos = true;
1775 }
1776
1777 ZshAssignValue::Array(elements)
1778 } else {
1779 ZshAssignValue::Scalar(value_str)
1780 };
1781
1782 Some(ZshAssign {
1783 name,
1784 value,
1785 append,
1786 })
1787 }
1788
1789 /// Parse a redirection
1790 /// Parse a redirection (>file, <file, >>file, <<HEREDOC, etc.).
1791 /// Direct port of zsh/Src/parse.c:2229-2346 `par_redir`. Returns
1792 /// a ZshRedir node carrying the operator type, fd, target word
1793 /// (or here-doc body / pipe-redir command), and any `{var}` style
1794 /// fd-binding parameter.
1795 fn parse_redir(&mut self) -> Option<ZshRedir> {
1796 let rtype = match self.lexer.tok {
1797 LexTok::Outang => RedirType::Write,
1798 LexTok::Outangbang => RedirType::Writenow,
1799 LexTok::Doutang => RedirType::Append,
1800 LexTok::Doutangbang => RedirType::Appendnow,
1801 LexTok::Inang => RedirType::Read,
1802 LexTok::Inoutang => RedirType::ReadWrite,
1803 LexTok::Dinang => RedirType::Heredoc,
1804 LexTok::Dinangdash => RedirType::HeredocDash,
1805 LexTok::Trinang => RedirType::Herestr,
1806 LexTok::Inangamp => RedirType::MergeIn,
1807 LexTok::Outangamp => RedirType::MergeOut,
1808 LexTok::Ampoutang => RedirType::ErrWrite,
1809 LexTok::Outangampbang => RedirType::ErrWritenow,
1810 LexTok::Doutangamp => RedirType::ErrAppend,
1811 LexTok::Doutangampbang => RedirType::ErrAppendnow,
1812 _ => return None,
1813 };
1814
1815 let fd = if self.lexer.tokfd >= 0 {
1816 self.lexer.tokfd
1817 } else if matches!(
1818 rtype,
1819 RedirType::Read
1820 | RedirType::ReadWrite
1821 | RedirType::MergeIn
1822 | RedirType::Heredoc
1823 | RedirType::HeredocDash
1824 | RedirType::Herestr
1825 ) {
1826 0
1827 } else {
1828 1
1829 };
1830
1831 self.lexer.zshlex();
1832
1833 let name = match self.lexer.tok {
1834 LexTok::String | LexTok::Envstring => {
1835 let n = self.lexer.tokstr.clone().unwrap_or_default();
1836 self.lexer.zshlex();
1837 n
1838 }
1839 _ => {
1840 self.error("expected word after redirection");
1841 return None;
1842 }
1843 };
1844
1845 // Heredoc body capture: when reading the terminator above, the
1846 // lexer pushed a HereDoc to self.lexer.heredocs[]. Record the
1847 // index so fill_heredoc_bodies() can wire content back after
1848 // process_heredocs() has run.
1849 let heredoc_idx = if matches!(rtype, RedirType::Heredoc | RedirType::HeredocDash) {
1850 if !self.lexer.heredocs.is_empty() {
1851 Some(self.lexer.heredocs.len() - 1)
1852 } else {
1853 None
1854 }
1855 } else {
1856 None
1857 };
1858
1859 Some(ZshRedir {
1860 rtype,
1861 fd,
1862 name,
1863 heredoc: None,
1864 varid: None,
1865 heredoc_idx,
1866 })
1867 }
1868
1869 /// Parse for/foreach loop
1870 /// Parse `for NAME in WORDS; do BODY; done` (foreach style) AND
1871 /// `for ((init; cond; incr)) do BODY done` (c-style). Direct port
1872 /// of zsh/Src/parse.c:1087-1207 `par_for`. parse_for_cstyle is the
1873 /// inner branch for the `((...))` arithmetic-header variant
1874 /// (parse.c:1100-1140 inside par_for).
1875 fn parse_for(&mut self) -> Option<ZshCommand> {
1876 let is_foreach = self.lexer.tok == LexTok::Foreach;
1877 self.lexer.zshlex();
1878
1879 // Check for C-style: for (( init; cond; step ))
1880 if self.lexer.tok == LexTok::Dinpar {
1881 return self.parse_for_cstyle();
1882 }
1883
1884 // Get variable name(s). zsh parse.c par_for accepts multiple
1885 // identifier tokens before `in`/`(`/newline — `for k v in ...`
1886 // assigns each iteration's pair of values to k and v in turn.
1887 // We store the names space-joined since variable identifiers
1888 // can't contain whitespace.
1889 let mut names: Vec<String> = Vec::new();
1890 while self.lexer.tok == LexTok::String {
1891 let v = self.lexer.tokstr.clone().unwrap_or_default();
1892 if v == "in" {
1893 break;
1894 }
1895 names.push(v);
1896 self.lexer.zshlex();
1897 }
1898 if names.is_empty() {
1899 self.error("expected variable name in for");
1900 return None;
1901 }
1902 let var = names.join(" ");
1903
1904 // Skip newlines
1905 self.skip_separators();
1906
1907 // Get list. The lexer-port quirk: `for x (a b c)` arrives as a
1908 // single String token with the parens lexed-as-content
1909 // (`<INPAR>a b c<OUTPAR>`) instead of as separate Inpar/String/
1910 // Outpar tokens. Detect that shape and split it manually.
1911 let list = if self.lexer.tok == LexTok::String
1912 && self
1913 .lexer
1914 .tokstr
1915 .as_ref()
1916 .map(|s| s.starts_with('\u{88}') && s.ends_with('\u{8a}'))
1917 .unwrap_or(false)
1918 {
1919 let raw = self.lexer.tokstr.clone().unwrap_or_default();
1920 // Strip leading INPAR + trailing OUTPAR, then untokenize the
1921 // inner content and split on whitespace for the word list.
1922 let inner = &raw[raw.char_indices().nth(1).map(|(i, _)| i).unwrap_or(0)
1923 ..raw
1924 .char_indices()
1925 .last()
1926 .map(|(i, _)| i)
1927 .unwrap_or(raw.len())];
1928 let cleaned = crate::lexer::untokenize(inner);
1929 let words: Vec<String> = cleaned.split_whitespace().map(|s| s.to_string()).collect();
1930 self.lexer.zshlex();
1931 ForList::Words(words)
1932 } else if self.lexer.tok == LexTok::String {
1933 let s = self.lexer.tokstr.as_ref();
1934 if s.map(|s| s == "in").unwrap_or(false) {
1935 self.lexer.zshlex();
1936 let mut words = Vec::new();
1937 let mut word_count = 0;
1938 while self.lexer.tok == LexTok::String {
1939 word_count += 1;
1940 if word_count > 500 || self.check_limit() {
1941 self.error("for: too many words");
1942 return None;
1943 }
1944 if let Some(ref s) = self.lexer.tokstr {
1945 words.push(s.clone());
1946 }
1947 self.lexer.zshlex();
1948 }
1949 ForList::Words(words)
1950 } else {
1951 ForList::Positional
1952 }
1953 } else if self.lexer.tok == LexTok::Inpar {
1954 // for var (...)
1955 self.lexer.zshlex();
1956 let mut words = Vec::new();
1957 let mut word_count = 0;
1958 while self.lexer.tok == LexTok::String || self.lexer.tok == LexTok::Seper {
1959 word_count += 1;
1960 if word_count > 500 || self.check_limit() {
1961 self.error("for: too many words in parens");
1962 return None;
1963 }
1964 if self.lexer.tok == LexTok::String {
1965 if let Some(ref s) = self.lexer.tokstr {
1966 words.push(s.clone());
1967 }
1968 }
1969 self.lexer.zshlex();
1970 }
1971 if self.lexer.tok == LexTok::Outpar {
1972 // After the `)` of a for-list, the next token is the
1973 // body opener — `do`/`{`. zsh's lexer needs incmdpos
1974 // set so `{` lexes as Inbrace (not as a literal). C
1975 // analogue: parse.c::par_for sets `incmdpos = 1`
1976 // after consuming the OUTPAR before the body parse.
1977 self.lexer.incmdpos = true;
1978 self.lexer.zshlex();
1979 }
1980 ForList::Words(words)
1981 } else {
1982 ForList::Positional
1983 };
1984
1985 // Skip to body
1986 self.skip_separators();
1987
1988 // Parse body
1989 let body = self.parse_loop_body(is_foreach)?;
1990
1991 Some(ZshCommand::For(ZshFor {
1992 var,
1993 list,
1994 body: Box::new(body),
1995 is_select: false,
1996 }))
1997 }
1998
1999 /// Parse C-style for loop: for (( init; cond; step ))
2000 /// Parse the c-style `for ((init; cond; incr)) do BODY done`.
2001 /// Inner branch of zsh/Src/parse.c:1100-1140 inside par_for.
2002 /// Recognized when the token after FOR is DINPAR (the `((`
2003 /// detected by gettok via dbparens setup).
2004 fn parse_for_cstyle(&mut self) -> Option<ZshCommand> {
2005 // We're at (( (Dinpar None) - the opening ((
2006 // Lexer returns:
2007 // Dinpar None - opening ((
2008 // Dinpar "init" - init expression, semicolon consumed
2009 // Dinpar "cond" - cond expression, semicolon consumed
2010 // Doutpar "step" - step expression, closing )) consumed
2011
2012 self.lexer.zshlex(); // Get init: Dinpar "i=0"
2013
2014 if self.lexer.tok != LexTok::Dinpar {
2015 self.error("expected init expression in for ((");
2016 return None;
2017 }
2018 let init = self.lexer.tokstr.clone().unwrap_or_default();
2019
2020 self.lexer.zshlex(); // Get cond: Dinpar "i<10"
2021
2022 if self.lexer.tok != LexTok::Dinpar {
2023 self.error("expected condition in for ((");
2024 return None;
2025 }
2026 let cond = self.lexer.tokstr.clone().unwrap_or_default();
2027
2028 self.lexer.zshlex(); // Get step: Doutpar "i++"
2029
2030 if self.lexer.tok != LexTok::Doutpar {
2031 self.error("expected )) in for");
2032 return None;
2033 }
2034 let step = self.lexer.tokstr.clone().unwrap_or_default();
2035
2036 self.lexer.zshlex(); // Move past ))
2037
2038 self.skip_separators();
2039 let body = self.parse_loop_body(false)?;
2040
2041 Some(ZshCommand::For(ZshFor {
2042 var: String::new(),
2043 list: ForList::CStyle { init, cond, step },
2044 body: Box::new(body),
2045 is_select: false,
2046 }))
2047 }
2048
2049 /// Parse select loop (same syntax as for)
2050 /// Parse `select NAME in WORDS; do BODY; done`. Same shape as
2051 /// `for NAME in WORDS; do ...` but with menu-prompt semantics in
2052 /// the executor. C equivalent: the SELECT case in par_for at
2053 /// parse.c:1087-1207 (selects share parser flow with foreach).
2054 fn parse_select(&mut self) -> Option<ZshCommand> {
2055 // `select` shares parse_for's grammar (var, words, body) but the
2056 // compile path is different (interactive prompt loop).
2057 match self.parse_for()? {
2058 ZshCommand::For(mut f) => {
2059 f.is_select = true;
2060 Some(ZshCommand::For(f))
2061 }
2062 other => Some(other),
2063 }
2064 }
2065
2066 /// Parse case statement
2067 /// Parse `case WORD in PATTERN) BODY ;; ... esac`. Direct port
2068 /// of zsh/Src/parse.c:1209-1409 `par_case`. Each case arm is a
2069 /// (pattern_list, body, terminator) tuple where terminator is
2070 /// `;;` (default), `;&` (fallthrough), or `;|` (continue testing).
2071 fn parse_case(&mut self) -> Option<ZshCommand> {
2072 self.lexer.zshlex(); // skip 'case'
2073
2074 let word = match self.lexer.tok {
2075 LexTok::String => {
2076 let w = self.lexer.tokstr.clone().unwrap_or_default();
2077 self.lexer.zshlex();
2078 w
2079 }
2080 _ => {
2081 self.error("expected word after case");
2082 return None;
2083 }
2084 };
2085
2086 self.skip_separators();
2087
2088 // Expect 'in' or {
2089 let use_brace = self.lexer.tok == LexTok::Inbrace;
2090 if self.lexer.tok == LexTok::String {
2091 let s = self.lexer.tokstr.as_ref();
2092 if s.map(|s| s != "in").unwrap_or(true) {
2093 self.error("expected 'in' in case");
2094 return None;
2095 }
2096 } else if !use_brace {
2097 self.error("expected 'in' or '{' in case");
2098 return None;
2099 }
2100 // Set incasepat=1 BEFORE consuming "in" so the next token (which
2101 // could be a leading `(` of a paren-prefixed pattern like
2102 // `case foo in (a|b) …`) is lexed as Inpar, not as a glob-token.
2103 // Without this the `(` got swallowed into a gettokstr('(', false)
2104 // call and produced a String like "(foo)" — the parser then saw
2105 // the `)` inside a string instead of as a separate Outpar.
2106 self.lexer.incasepat = 1;
2107 self.lexer.zshlex();
2108
2109 let mut arms = Vec::new();
2110 const MAX_ARMS: usize = 10_000;
2111
2112 loop {
2113 if arms.len() > MAX_ARMS {
2114 self.error("parse_case: too many arms");
2115 break;
2116 }
2117
2118 // Set incasepat BEFORE skipping separators so lexer knows we're in case pattern context
2119 // This affects how [ and | are lexed
2120 self.lexer.incasepat = 1;
2121
2122 self.skip_separators();
2123
2124 // Check for end
2125 // Note: 'esac' might be String "esac" if incasepat > 0 prevents reserved word recognition
2126 let is_esac = self.lexer.tok == LexTok::Esac
2127 || (self.lexer.tok == LexTok::String
2128 && self
2129 .lexer
2130 .tokstr
2131 .as_ref()
2132 .map(|s| s == "esac")
2133 .unwrap_or(false));
2134 if (use_brace && self.lexer.tok == LexTok::Outbrace) || (!use_brace && is_esac) {
2135 self.lexer.incasepat = 0;
2136 self.lexer.zshlex();
2137 break;
2138 }
2139
2140 // Also break on EOF
2141 if self.lexer.tok == LexTok::Endinput || self.lexer.tok == LexTok::Lexerr {
2142 self.lexer.incasepat = 0;
2143 break;
2144 }
2145
2146 // Skip optional `(`. zsh's case grammar: `case W in (P)…)`.
2147 // The leading `(` is paired with a matching `)` that closes
2148 // the pattern itself; the arm-close `)` follows separately.
2149 // Track whether we consumed it so we can skip the matching
2150 // `)` after pattern parsing — otherwise the arm-close would
2151 // be interpreted as the pattern-close and the actual body
2152 // would get the leftover `)`.
2153 let had_leading_paren = self.lexer.tok == LexTok::Inpar;
2154 if had_leading_paren {
2155 self.lexer.zshlex();
2156 }
2157
2158 // incasepat is already set above
2159 let mut patterns = Vec::new();
2160 let mut pattern_iterations = 0;
2161 loop {
2162 pattern_iterations += 1;
2163 if pattern_iterations > 1000 {
2164 self.error("parse_case: too many pattern iterations");
2165 self.lexer.incasepat = 0;
2166 return None;
2167 }
2168
2169 if self.lexer.tok == LexTok::String {
2170 let s = self.lexer.tokstr.as_ref();
2171 if s.map(|s| s == "esac").unwrap_or(false) {
2172 break;
2173 }
2174 patterns.push(self.lexer.tokstr.clone().unwrap_or_default());
2175 // After first pattern token, set incasepat=2 so ( is treated as part of pattern
2176 self.lexer.incasepat = 2;
2177 self.lexer.zshlex();
2178 } else if self.lexer.tok != LexTok::Bar {
2179 break;
2180 }
2181
2182 if self.lexer.tok == LexTok::Bar {
2183 // Reset to 1 (start of next alternative pattern)
2184 self.lexer.incasepat = 1;
2185 self.lexer.zshlex();
2186 } else {
2187 break;
2188 }
2189 }
2190 self.lexer.incasepat = 0;
2191
2192 // zsh's `(P)` form (parse.c:1320-1360 hack) treats the entire
2193 // parenthesized contents as ONE zsh pattern with internal `|`
2194 // as the literal alternation operator — NOT as multiple
2195 // case-arm alternatives. Without a leading `(`, the bare
2196 // `P1|P2)` form splits into multiple alts. Mirror that here:
2197 // when a leading `(` was consumed, fold the |-separated
2198 // pieces back into a single pattern string.
2199 if had_leading_paren && patterns.len() > 1 {
2200 let joined = patterns.join("|");
2201 patterns = vec![joined];
2202 }
2203
2204 // Expect ). Also handle the `(P))` wrapped-pattern form:
2205 // when a leading `(` was consumed, accept an extra `)` —
2206 // the inner `)` closes the optional-paren wrapper, the
2207 // outer `)` is the arm-close. zsh accepts BOTH `(P) BODY`
2208 // (bare pattern, leading-paren is just the opt-marker, the
2209 // close is arm-close) and `(P)) BODY` (paren-wrapped
2210 // pattern, then arm-close). The first form is unambiguous
2211 // when the bare pattern was simple; the second is needed
2212 // when the body starts with `(`.
2213 if self.lexer.tok != LexTok::Outpar {
2214 self.error("expected ')' in case pattern");
2215 return None;
2216 }
2217 // Port of Src/parse.c:1310-1313 — when the case pattern
2218 // closes with `)`, set `incmdpos = 1` BEFORE consuming
2219 // the token so the first word of the arm body is lexed
2220 // in command position. Without this, `case X in X) c1=v ;;`
2221 // lexes `c1=v` as a plain STRING rather than an assignment
2222 // word, and exec treats it as a command name (yielding
2223 // "command not found: c1=v"). Subsequent statements after
2224 // `;` parse correctly because the `;` separator restores
2225 // command position; only the FIRST body word was broken.
2226 self.lexer.incmdpos = true;
2227 self.lexer.zshlex();
2228 if had_leading_paren && self.lexer.tok == LexTok::Outpar {
2229 self.lexer.incmdpos = true;
2230 self.lexer.zshlex();
2231 }
2232
2233 // Parse body
2234 let body = self.parse_program();
2235
2236 // Get terminator. Set incasepat=1 BEFORE the zshlex
2237 // advance so the next token (the next arm's pattern, like
2238 // `[a-z]`) gets tokenized in pattern context. Without
2239 // this, a `[`-prefixed pattern after the FIRST arm became
2240 // Inbrack instead of String and the pattern-loop bailed
2241 // out with "expected ')' in case pattern".
2242 let terminator = match self.lexer.tok {
2243 LexTok::Dsemi => {
2244 self.lexer.incasepat = 1;
2245 self.lexer.zshlex();
2246 CaseTerm::Break
2247 }
2248 LexTok::Semiamp => {
2249 self.lexer.incasepat = 1;
2250 self.lexer.zshlex();
2251 CaseTerm::Continue
2252 }
2253 LexTok::Semibar => {
2254 self.lexer.incasepat = 1;
2255 self.lexer.zshlex();
2256 CaseTerm::TestNext
2257 }
2258 _ => CaseTerm::Break,
2259 };
2260
2261 if !patterns.is_empty() {
2262 arms.push(CaseArm {
2263 patterns,
2264 body,
2265 terminator,
2266 });
2267 }
2268 }
2269
2270 Some(ZshCommand::Case(ZshCase { word, arms }))
2271 }
2272
2273 /// Parse if statement
2274 /// Parse `if COND; then BODY; [elif COND; then BODY;]* [else BODY;] fi`.
2275 /// Direct port of zsh/Src/parse.c:1411-1519 `par_if`. The C source
2276 /// emits WC_IF wordcodes per arm; zshrs builds an AST chain of
2277 /// (cond, then_body) tuples plus an optional else_body.
2278 fn parse_if(&mut self) -> Option<ZshCommand> {
2279 self.lexer.zshlex(); // skip 'if'
2280
2281 // Parse condition - stops at 'then' or '{' (zsh allows { instead of then)
2282 let cond = Box::new(self.parse_program_until(Some(&[LexTok::Then, LexTok::Inbrace])));
2283
2284 self.skip_separators();
2285
2286 // Expect 'then' or {
2287 let use_brace = self.lexer.tok == LexTok::Inbrace;
2288 if self.lexer.tok != LexTok::Then && !use_brace {
2289 self.error("expected 'then' or '{' after if condition");
2290 return None;
2291 }
2292 self.lexer.zshlex();
2293
2294 // Parse then-body - stops at else/elif/fi, or } if using brace syntax
2295 let then = if use_brace {
2296 let body = self.parse_program_until(Some(&[LexTok::Outbrace]));
2297 if self.lexer.tok == LexTok::Outbrace {
2298 self.lexer.zshlex();
2299 }
2300 Box::new(body)
2301 } else {
2302 Box::new(self.parse_program_until(Some(&[LexTok::Else, LexTok::Elif, LexTok::Fi])))
2303 };
2304
2305 // Parse elif and else. zsh accepts the SAME elif/else
2306 // continuations for both classic `then/fi` AND the brace
2307 // form `{ ... } elif ... { ... } else { ... }`. Direct port
2308 // of zsh/Src/parse.c:1417-1500 par_if where the elif/else
2309 // arms are checked AFTER the body close regardless of which
2310 // delimiter style opened the block. Without this, zinit's
2311 // if [[ -z $sel ]] { ... } else { ... }
2312 // hung the parser — `else` was treated as an external
2313 // command following the if-statement, which the lexer state
2314 // mis-classified inside the still-open function body.
2315 //
2316 // For brace-form: skip the `fi` consumption at the end of
2317 // the loop (no `fi` after a brace block), and `else` may
2318 // arrive after a `}` close. Skip-separators between the
2319 // body close and the elif/else token.
2320 let mut elif = Vec::new();
2321 let mut else_ = None;
2322
2323 {
2324 loop {
2325 self.skip_separators();
2326
2327 match self.lexer.tok {
2328 LexTok::Elif => {
2329 self.lexer.zshlex();
2330 // elif condition stops at 'then' or '{'
2331 let econd =
2332 self.parse_program_until(Some(&[LexTok::Then, LexTok::Inbrace]));
2333 self.skip_separators();
2334
2335 let elif_use_brace = self.lexer.tok == LexTok::Inbrace;
2336 if self.lexer.tok != LexTok::Then && !elif_use_brace {
2337 self.error("expected 'then' after elif");
2338 return None;
2339 }
2340 self.lexer.zshlex();
2341
2342 // elif body stops at else/elif/fi or } if using braces
2343 let ebody = if elif_use_brace {
2344 let body = self.parse_program_until(Some(&[LexTok::Outbrace]));
2345 if self.lexer.tok == LexTok::Outbrace {
2346 self.lexer.zshlex();
2347 }
2348 body
2349 } else {
2350 self.parse_program_until(Some(&[
2351 LexTok::Else,
2352 LexTok::Elif,
2353 LexTok::Fi,
2354 ]))
2355 };
2356
2357 elif.push((econd, ebody));
2358 }
2359 LexTok::Else => {
2360 self.lexer.zshlex();
2361 self.skip_separators();
2362
2363 let else_use_brace = self.lexer.tok == LexTok::Inbrace;
2364 if else_use_brace {
2365 self.lexer.zshlex();
2366 }
2367
2368 // else body stops at 'fi' or '}'
2369 else_ = Some(Box::new(if else_use_brace {
2370 let body = self.parse_program_until(Some(&[LexTok::Outbrace]));
2371 if self.lexer.tok == LexTok::Outbrace {
2372 self.lexer.zshlex();
2373 }
2374 body
2375 } else {
2376 self.parse_program_until(Some(&[LexTok::Fi]))
2377 }));
2378
2379 // Consume the 'fi' if present (not for brace syntax)
2380 if !else_use_brace && self.lexer.tok == LexTok::Fi {
2381 self.lexer.zshlex();
2382 }
2383 break;
2384 }
2385 LexTok::Fi => {
2386 self.lexer.zshlex();
2387 break;
2388 }
2389 _ => break,
2390 }
2391 }
2392 }
2393
2394 Some(ZshCommand::If(ZshIf {
2395 cond,
2396 then,
2397 elif,
2398 else_,
2399 }))
2400 }
2401
2402 /// Parse while/until loop
2403 /// Parse `while COND; do BODY; done` and `until COND; do BODY; done`.
2404 /// Direct port of zsh/Src/parse.c:1521-1563 `par_while`. The
2405 /// `until` variant is the same loop with the condition negated.
2406 fn parse_while(&mut self, until: bool) -> Option<ZshCommand> {
2407 self.lexer.zshlex(); // skip while/until
2408
2409 let cond = Box::new(self.parse_program());
2410
2411 self.skip_separators();
2412 let body = self.parse_loop_body(false)?;
2413
2414 Some(ZshCommand::While(ZshWhile {
2415 cond,
2416 body: Box::new(body),
2417 until,
2418 }))
2419 }
2420
2421 /// Parse repeat loop
2422 /// Parse `repeat N; do BODY; done`. Direct port of
2423 /// zsh/Src/parse.c:1565-1617 `par_repeat`. The C source supports
2424 /// the SHORTLOOPS short-form `repeat N CMD` (no do/done) — zshrs's
2425 /// parser doesn't yet special-case that variant.
2426 fn parse_repeat(&mut self) -> Option<ZshCommand> {
2427 self.lexer.zshlex(); // skip 'repeat'
2428
2429 let count = match self.lexer.tok {
2430 LexTok::String => {
2431 let c = self.lexer.tokstr.clone().unwrap_or_default();
2432 self.lexer.zshlex();
2433 c
2434 }
2435 _ => {
2436 self.error("expected count after repeat");
2437 return None;
2438 }
2439 };
2440
2441 self.skip_separators();
2442 let body = self.parse_loop_body(false)?;
2443
2444 Some(ZshCommand::Repeat(ZshRepeat {
2445 count,
2446 body: Box::new(body),
2447 }))
2448 }
2449
2450 /// Parse loop body (do...done, {...}, or shortloop)
2451 /// Parse the `do BODY done` body of a for/while/until/select/
2452 /// repeat loop. Direct equivalent of zsh's parse.c handling
2453 /// inside the loop builders — they all consume DOLOOP, parse a
2454 /// list until DONE, and return the list. The `foreach_style`
2455 /// flag signals foreach (where short-form `for NAME in WORDS;
2456 /// CMD` may skip do/done) vs c-style (which always requires
2457 /// do/done).
2458 fn parse_loop_body(&mut self, foreach_style: bool) -> Option<ZshProgram> {
2459 if self.lexer.tok == LexTok::Doloop {
2460 self.lexer.zshlex();
2461 let body = self.parse_program();
2462 if self.lexer.tok == LexTok::Done {
2463 self.lexer.zshlex();
2464 }
2465 Some(body)
2466 } else if self.lexer.tok == LexTok::Inbrace {
2467 self.lexer.zshlex();
2468 let body = self.parse_program();
2469 if self.lexer.tok == LexTok::Outbrace {
2470 self.lexer.zshlex();
2471 }
2472 Some(body)
2473 } else if foreach_style {
2474 // foreach allows 'end' terminator
2475 let body = self.parse_program();
2476 if self.lexer.tok == LexTok::Zend {
2477 self.lexer.zshlex();
2478 }
2479 Some(body)
2480 } else {
2481 // Short loop - single command
2482 self.parse_list()
2483 .map(|list| ZshProgram { lists: vec![list] })
2484 }
2485 }
2486
2487 /// Parse (...) subshell
2488 /// Parse a subshell `( ... )`. Direct port of zsh/Src/parse.c:1619-1670
2489 /// `par_subsh`. Body parses as a normal list; the subshell wrapper
2490 /// fork-isolates execution in the executor.
2491 fn parse_subsh(&mut self) -> Option<ZshCommand> {
2492 self.lexer.zshlex(); // skip (
2493 let prog = self.parse_program();
2494 if self.lexer.tok == LexTok::Outpar {
2495 self.lexer.zshlex();
2496 }
2497 Some(ZshCommand::Subsh(Box::new(prog)))
2498 }
2499
2500 /// `() { body } arg1 arg2 …` — anonymous function. Defines a fresh
2501 /// function named `_zshrs_anon_N`, invokes it with the args, and the
2502 /// body runs with positional params set. Implemented as the desugared
2503 /// pair (FuncDef + Simple call) so the compile path doesn't need new
2504 /// machinery.
2505 /// Parse an anonymous function definition `() { BODY }` followed
2506 /// by call args. zsh treats `() { echo hi; } a b c` as defining
2507 /// and immediately calling an anon fn with args a/b/c. C
2508 /// equivalent: the INOUTPAR shape in par_simple at parse.c:1836+
2509 /// triggers an anon-funcdef path.
2510 fn parse_anon_funcdef(&mut self) -> Option<ZshCommand> {
2511 self.lexer.zshlex(); // skip ()
2512 self.skip_separators();
2513 // No `{` after `()` → bare empty subshell shape `()`. Fall back
2514 // to a Subsh with an empty program so the status is 0 (matches
2515 // zsh's `()` no-op behavior).
2516 if self.lexer.tok != LexTok::Inbrace {
2517 return Some(ZshCommand::Subsh(Box::new(ZshProgram {
2518 lists: Vec::new(),
2519 })));
2520 }
2521 self.lexer.zshlex(); // skip {
2522 let body = self.parse_program();
2523 if self.lexer.tok == LexTok::Outbrace {
2524 self.lexer.zshlex();
2525 }
2526 // Collect any trailing args until a separator. zsh's anon-fn form
2527 // `() { body } a b c` runs body with $1=a, $2=b, $3=c.
2528 let mut args = Vec::new();
2529 while self.lexer.tok == LexTok::String {
2530 if let Some(s) = self.lexer.tokstr.clone() {
2531 args.push(s);
2532 }
2533 self.lexer.zshlex();
2534 }
2535
2536 // Generate a unique name. Module-level static would be cleaner but
2537 // a thread-local atomic is enough — anonymous functions are
2538 // ephemeral and the name isn't user-visible.
2539 use std::sync::atomic::{AtomicUsize, Ordering};
2540 static ANON_COUNTER: AtomicUsize = AtomicUsize::new(0);
2541 let n = ANON_COUNTER.fetch_add(1, Ordering::Relaxed);
2542 let name = format!("_zshrs_anon_{}", n);
2543 Some(ZshCommand::FuncDef(ZshFuncDef {
2544 names: vec![name],
2545 body: Box::new(body),
2546 tracing: false,
2547 auto_call_args: Some(args),
2548 body_source: None,
2549 }))
2550 }
2551
2552 /// Parse {...} cursh
2553 /// Parse a current-shell brace block `{ BODY }`. C source
2554 /// par_cmd at parse.c:958-1085 handles INBRACE → emit WC_CURSH
2555 /// and recurses into the list. zshrs's parse_cursh extracts that
2556 /// arm into a dedicated method.
2557 fn parse_cursh(&mut self) -> Option<ZshCommand> {
2558 self.lexer.zshlex(); // skip {
2559 let prog = self.parse_program();
2560
2561 // Check for { ... } always { ... }. Direct port of zsh's
2562 // par_subsh at parse.c:1612-1660 — note the two `incmdpos = 1`
2563 // forces (parse.c:1632, 1637): after consuming the closing
2564 // OUTBRACE AND after matching the `always` keyword, the parser
2565 // explicitly resets command position so the next `{` lexes as
2566 // INBRACE. Without these resets the lexer's String-clears-cmdpos
2567 // rule (lex.rs:976-983) leaves the second `{` in word position,
2568 // turning `always { ... }` into a Simple `{` `echo` … and the
2569 // try/always pairing is silently lost.
2570 if self.lexer.tok == LexTok::Outbrace {
2571 self.lexer.incmdpos = true; // parse.c:1632 incmdpos = !zsh_construct
2572 self.lexer.zshlex();
2573
2574 // Check for 'always'
2575 if self.lexer.tok == LexTok::String {
2576 let s = self.lexer.tokstr.as_ref();
2577 if s.map(|s| s == "always").unwrap_or(false) {
2578 self.lexer.incmdpos = true; // parse.c:1637 incmdpos = 1
2579 self.lexer.zshlex();
2580 self.skip_separators();
2581
2582 if self.lexer.tok == LexTok::Inbrace {
2583 self.lexer.zshlex();
2584 let always = self.parse_program();
2585 if self.lexer.tok == LexTok::Outbrace {
2586 self.lexer.zshlex();
2587 }
2588 return Some(ZshCommand::Try(ZshTry {
2589 try_block: Box::new(prog),
2590 always: Box::new(always),
2591 }));
2592 }
2593 }
2594 }
2595 }
2596
2597 Some(ZshCommand::Cursh(Box::new(prog)))
2598 }
2599
2600 /// Parse function definition
2601 /// Parse `function NAME { BODY }` or `NAME () { BODY }`. Direct
2602 /// port of zsh/Src/parse.c:1672-1785 `par_funcdef`. zsh handles
2603 /// the multiple keyword shapes (function FOO, FOO (), function FOO ()),
2604 /// the optional `[fname1 fname2 ...]` for multi-name function defs,
2605 /// and the `function FOO () { ... }` traditional/POSIX hybrid form.
2606 fn parse_funcdef(&mut self) -> Option<ZshCommand> {
2607 self.lexer.zshlex(); // skip 'function'
2608
2609 let mut names = Vec::new();
2610 let mut tracing = false;
2611
2612 // Handle options like -T and function names. Two subtleties:
2613 //
2614 // 1. Flags: zsh's lexer encodes a leading `-` as
2615 // `char_tokens::DASH` (\u{9b}) inside the String tokstr.
2616 // The previous `s.starts_with('-')` check failed for
2617 // `\u{9b}T`, so `function -T NAME { body }` slipped the
2618 // `-T` token into `names` and the function got registered
2619 // as `T` plus the intended `NAME`.
2620 //
2621 // 2. Body opener: zsh's lexer emits the opening `{` as a
2622 // String (not LexTok::Inbrace) when it follows the String
2623 // NAME — the preceding name token resets incmdpos to
2624 // false, and only `{` immediately followed by `}` (the
2625 // empty-body case) gets promoted to Inbrace. The funcdef
2626 // parser must recognise the bare-`{` String as the body
2627 // opener; otherwise `function NAME { body }` falls through
2628 // to `_ => break`, no body parses, and the FuncDef never
2629 // lands in the AST. This is consistent with C zsh's
2630 // par_funcdef which knows it's in funcdef-header context
2631 // and accepts the brace either way.
2632 loop {
2633 match self.lexer.tok {
2634 LexTok::String => {
2635 let s = self.lexer.tokstr.as_ref()?;
2636 if s == "{" {
2637 // Funcdef body opener — break, body-parser branch handles it.
2638 break;
2639 }
2640 let first = s.chars().next();
2641 if matches!(first, Some('-') | Some('+'))
2642 || matches!(first, Some(c) if c == crate::tokens::char_tokens::DASH)
2643 {
2644 if s.contains('T') {
2645 tracing = true;
2646 }
2647 self.lexer.zshlex();
2648 continue;
2649 }
2650 names.push(s.clone());
2651 self.lexer.zshlex();
2652 }
2653 LexTok::Inbrace | LexTok::Inoutpar | LexTok::Seper | LexTok::Newlin => break,
2654 _ => break,
2655 }
2656 }
2657
2658 // Optional ()
2659 let saw_paren = self.lexer.tok == LexTok::Inoutpar;
2660 if saw_paren {
2661 self.lexer.zshlex();
2662 }
2663
2664 self.skip_separators();
2665
2666 // Body opener: real Inbrace OR a String("{") (the lexer emits
2667 // the latter after a String NAME — see comment above).
2668 let body_opener_is_string_brace = self.lexer.tok == LexTok::String
2669 && self.lexer.tokstr.as_deref() == Some("{");
2670 if self.lexer.tok == LexTok::Inbrace || body_opener_is_string_brace {
2671 // Capture body_start BEFORE the lexer advances past the
2672 // first body token. After the previous zshlex consumed
2673 // `{`, lexer.pos points just past `{` (which is where the
2674 // body source starts). The next `zshlex()` would advance
2675 // past the first token (`echo`), making body_start land
2676 // mid-body and lose the first word — `typeset -f f` would
2677 // print `a; echo b` for `{ echo a; echo b }`.
2678 let body_start = self.lexer.pos;
2679 self.lexer.zshlex();
2680 let body = self.parse_program();
2681 let body_end = if self.lexer.tok == LexTok::Outbrace {
2682 // Lexer has just consumed `}`; pos is past it. Body content
2683 // ends one byte before pos.
2684 self.lexer.pos.saturating_sub(1)
2685 } else {
2686 self.lexer.pos
2687 };
2688 let body_source = self
2689 .lexer
2690 .input
2691 .get(body_start..body_end)
2692 .map(|s| s.trim().to_string())
2693 .filter(|s| !s.is_empty());
2694 if self.lexer.tok == LexTok::Outbrace {
2695 self.lexer.zshlex();
2696 }
2697
2698 // Anonymous form `function () { body } a b c` (with `()`) or
2699 // `function { body } a b c` (zsh-only shorthand, no `()`). No
2700 // name was collected. Mirror parse_anon_funcdef: synthesize
2701 // `_zshrs_anon_N`, collect trailing args, set auto_call_args
2702 // so compile_funcdef registers + immediately calls the
2703 // function with the args as positional params.
2704 if names.is_empty() {
2705 let mut args = Vec::new();
2706 while self.lexer.tok == LexTok::String {
2707 if let Some(s) = self.lexer.tokstr.clone() {
2708 args.push(s);
2709 }
2710 self.lexer.zshlex();
2711 }
2712 use std::sync::atomic::{AtomicUsize, Ordering};
2713 static ANON_COUNTER: AtomicUsize = AtomicUsize::new(0);
2714 let n = ANON_COUNTER.fetch_add(1, Ordering::Relaxed);
2715 let name = format!("_zshrs_anon_kw_{}", n);
2716 return Some(ZshCommand::FuncDef(ZshFuncDef {
2717 names: vec![name],
2718 body: Box::new(body),
2719 tracing,
2720 auto_call_args: Some(args),
2721 body_source,
2722 }));
2723 }
2724
2725 Some(ZshCommand::FuncDef(ZshFuncDef {
2726 names,
2727 body: Box::new(body),
2728 tracing,
2729 auto_call_args: None,
2730 body_source,
2731 }))
2732 } else {
2733 // Short form
2734 self.parse_list().map(|list| {
2735 ZshCommand::FuncDef(ZshFuncDef {
2736 names,
2737 body: Box::new(ZshProgram { lists: vec![list] }),
2738 tracing,
2739 auto_call_args: None,
2740 body_source: None,
2741 })
2742 })
2743 }
2744 }
2745
2746 /// Parse inline function definition: name() { ... }
2747 /// Parse the inline form `NAME () { BODY }` (POSIX-style funcdef
2748 /// without the `function` keyword). The name has already been
2749 /// consumed and pushed by parse_simple before this method fires.
2750 /// C source: handled inline in par_simple's INOUTPAR-after-name
2751 /// arm (parse.c:1836-2228).
2752 fn parse_inline_funcdef(&mut self, name: String) -> Option<ZshCommand> {
2753 // Skip ()
2754 if self.lexer.tok == LexTok::Inoutpar {
2755 self.lexer.zshlex();
2756 }
2757
2758 self.skip_separators();
2759
2760 // Parse body
2761 if self.lexer.tok == LexTok::Inbrace {
2762 // Same body_start-before-zshlex fix as parse_funcdef.
2763 let body_start = self.lexer.pos;
2764 self.lexer.zshlex();
2765 let body = self.parse_program();
2766 let body_end = if self.lexer.tok == LexTok::Outbrace {
2767 self.lexer.pos.saturating_sub(1)
2768 } else {
2769 self.lexer.pos
2770 };
2771 let body_source = self
2772 .lexer
2773 .input
2774 .get(body_start..body_end)
2775 .map(|s| s.trim().to_string())
2776 .filter(|s| !s.is_empty());
2777 if self.lexer.tok == LexTok::Outbrace {
2778 self.lexer.zshlex();
2779 }
2780 Some(ZshCommand::FuncDef(ZshFuncDef {
2781 names: vec![name],
2782 body: Box::new(body),
2783 tracing: false,
2784 auto_call_args: None,
2785 body_source,
2786 }))
2787 } else {
2788 match self.parse_cmd() {
2789 Some(cmd) => {
2790 let list = ZshList {
2791 sublist: ZshSublist {
2792 pipe: ZshPipe {
2793 cmd,
2794 next: None,
2795 lineno: self.lexer.lineno,
2796 merge_stderr: false,
2797 },
2798 next: None,
2799 flags: SublistFlags::default(),
2800 },
2801 flags: ListFlags::default(),
2802 };
2803 Some(ZshCommand::FuncDef(ZshFuncDef {
2804 names: vec![name],
2805 body: Box::new(ZshProgram { lists: vec![list] }),
2806 tracing: false,
2807 auto_call_args: None,
2808 body_source: None,
2809 }))
2810 }
2811 None => None,
2812 }
2813 }
2814 }
2815
2816 /// Parse [[ ... ]] conditional
2817 /// Parse `[[ EXPR ]]` conditional expression. Direct port of
2818 /// zsh/Src/parse.c:2409-2731 `par_cond` (and helpers par_cond_1,
2819 /// par_cond_2, par_cond_double, par_cond_triple, par_cond_multi
2820 /// at parse.c:2434-2731). Expression operators: `||` `&&` `!`
2821 /// + unary tests (-f, -d, -n, -z, etc.) + binary tests (=, !=,
2822 /// <, >, ==, =~, -eq, -ne, -lt, -le, -gt, -ge, -nt, -ot, -ef).
2823 fn parse_cond(&mut self) -> Option<ZshCommand> {
2824 self.lexer.zshlex(); // skip [[
2825 // Empty cond `[[ ]]` is a parse error in zsh — emit the
2826 // diagnostic and return None so the caller produces a
2827 // non-zero exit. Without this, `[[ ]]` silently passed and
2828 // returned exit 0.
2829 if self.lexer.tok == LexTok::Doutbrack {
2830 self.error("parse error near `]]'");
2831 self.lexer.zshlex();
2832 return None;
2833 }
2834 let cond = self.parse_cond_expr();
2835
2836 if self.lexer.tok == LexTok::Doutbrack {
2837 self.lexer.zshlex();
2838 }
2839
2840 cond.map(ZshCommand::Cond)
2841 }
2842
2843 /// Parse conditional expression
2844 /// Top of `[[ ]]` cond-expression parsing — entry to recursive
2845 /// descent (or → and → not → primary). Direct port of zsh's
2846 /// par_cond_1 at parse.c:2434-2475.
2847 fn parse_cond_expr(&mut self) -> Option<ZshCond> {
2848 self.parse_cond_or()
2849 }
2850
2851 /// Cond-expression `||` level. C: inside par_cond_1 at
2852 /// parse.c:2434-2475 (the `cond_or` ladder).
2853 fn parse_cond_or(&mut self) -> Option<ZshCond> {
2854 self.recursion_depth += 1;
2855 if self.check_recursion() {
2856 self.error("parse_cond_or: max recursion depth exceeded");
2857 self.recursion_depth -= 1;
2858 return None;
2859 }
2860
2861 let left = match self.parse_cond_and() {
2862 Some(l) => l,
2863 None => {
2864 self.recursion_depth -= 1;
2865 return None;
2866 }
2867 };
2868
2869 self.skip_cond_separators();
2870
2871 let result = if self.lexer.tok == LexTok::Dbar {
2872 self.lexer.zshlex();
2873 self.skip_cond_separators();
2874 self.parse_cond_or()
2875 .map(|right| ZshCond::Or(Box::new(left), Box::new(right)))
2876 } else {
2877 Some(left)
2878 };
2879
2880 self.recursion_depth -= 1;
2881 result
2882 }
2883
2884 /// Cond-expression `&&` level. C: par_cond_2 at parse.c:2476-2625.
2885 fn parse_cond_and(&mut self) -> Option<ZshCond> {
2886 self.recursion_depth += 1;
2887 if self.check_recursion() {
2888 self.error("parse_cond_and: max recursion depth exceeded");
2889 self.recursion_depth -= 1;
2890 return None;
2891 }
2892
2893 let left = match self.parse_cond_not() {
2894 Some(l) => l,
2895 None => {
2896 self.recursion_depth -= 1;
2897 return None;
2898 }
2899 };
2900
2901 self.skip_cond_separators();
2902
2903 let result = if self.lexer.tok == LexTok::Damper {
2904 self.lexer.zshlex();
2905 self.skip_cond_separators();
2906 self.parse_cond_and()
2907 .map(|right| ZshCond::And(Box::new(left), Box::new(right)))
2908 } else {
2909 Some(left)
2910 };
2911
2912 self.recursion_depth -= 1;
2913 result
2914 }
2915
2916 /// Cond-expression `!` negation level. C: handled inside
2917 /// par_cond_2 at parse.c:2476-2625 via the BANG token check.
2918 fn parse_cond_not(&mut self) -> Option<ZshCond> {
2919 self.recursion_depth += 1;
2920 if self.check_recursion() {
2921 self.error("parse_cond_not: max recursion depth exceeded");
2922 self.recursion_depth -= 1;
2923 return None;
2924 }
2925
2926 self.skip_cond_separators();
2927
2928 // ! can be either LexTok::Bang or String "!"
2929 let is_not = self.lexer.tok == LexTok::Bang
2930 || (self.lexer.tok == LexTok::String
2931 && self
2932 .lexer
2933 .tokstr
2934 .as_ref()
2935 .map(|s| s == "!")
2936 .unwrap_or(false));
2937 if is_not {
2938 self.lexer.zshlex();
2939 let inner = match self.parse_cond_not() {
2940 Some(i) => i,
2941 None => {
2942 self.recursion_depth -= 1;
2943 return None;
2944 }
2945 };
2946 self.recursion_depth -= 1;
2947 return Some(ZshCond::Not(Box::new(inner)));
2948 }
2949
2950 if self.lexer.tok == LexTok::Inpar {
2951 self.lexer.zshlex();
2952 self.skip_cond_separators();
2953 let inner = match self.parse_cond_expr() {
2954 Some(i) => i,
2955 None => {
2956 self.recursion_depth -= 1;
2957 return None;
2958 }
2959 };
2960 self.skip_cond_separators();
2961 if self.lexer.tok == LexTok::Outpar {
2962 self.lexer.zshlex();
2963 }
2964 self.recursion_depth -= 1;
2965 return Some(inner);
2966 }
2967
2968 let result = self.parse_cond_primary();
2969 self.recursion_depth -= 1;
2970 result
2971 }
2972
2973 /// Cond-expression primary: unary tests (-f, -d, ...), binary
2974 /// tests (=, !=, <, >, ==, =~, -eq, -ne, ...), and parenthesized
2975 /// sub-expressions. Direct port of par_cond_double / par_cond_triple
2976 /// / par_cond_multi at parse.c:2626-2731 (chosen by arg count).
2977 fn parse_cond_primary(&mut self) -> Option<ZshCond> {
2978 let s1 = match self.lexer.tok {
2979 LexTok::String => {
2980 let s = self.lexer.tokstr.clone().unwrap_or_default();
2981 self.lexer.zshlex();
2982 s
2983 }
2984 _ => return None,
2985 };
2986
2987 self.skip_cond_separators();
2988
2989 // Check for unary operator. zsh's lexer tokenizes leading `-` as
2990 // `char_tokens::DASH` (\u{9b}) inside gettokstr (lex.c:1390-1400
2991 // LX2_DASH — `-` always becomes Dash, untokenized later). Match
2992 // either form here, and use char-count not byte-count since DASH
2993 // is 2 UTF-8 bytes (`\xc2\x9b`).
2994 let s1_chars: Vec<char> = s1.chars().collect();
2995 if s1_chars.len() == 2 && crate::tokens::is_dash(s1_chars[0]) {
2996 let s2 = match self.lexer.tok {
2997 LexTok::String => {
2998 let s = self.lexer.tokstr.clone().unwrap_or_default();
2999 self.lexer.zshlex();
3000 s
3001 }
3002 _ => return Some(ZshCond::Unary("-n".to_string(), s1)),
3003 };
3004 return Some(ZshCond::Unary(s1, s2));
3005 }
3006
3007 // Check for binary operator. Direct port of zsh/Src/parse.c:2601-2603:
3008 // incond++; /* parentheses do globbing */
3009 // do condlex(); while (COND_SEP());
3010 // incond--; /* parentheses do grouping */
3011 // The bump makes the lexer treat `(` as a literal character inside
3012 // the RHS word (e.g. `[[ x =~ (foo) ]]`) instead of returning INPAR
3013 // and splitting the regex into multiple tokens.
3014 let op = match self.lexer.tok {
3015 LexTok::String => {
3016 let s = self.lexer.tokstr.clone().unwrap_or_default();
3017 self.lexer.incond += 1;
3018 self.lexer.zshlex();
3019 self.lexer.incond -= 1;
3020 s
3021 }
3022 LexTok::Inang => {
3023 self.lexer.incond += 1;
3024 self.lexer.zshlex();
3025 self.lexer.incond -= 1;
3026 "<".to_string()
3027 }
3028 LexTok::Outang => {
3029 self.lexer.incond += 1;
3030 self.lexer.zshlex();
3031 self.lexer.incond -= 1;
3032 ">".to_string()
3033 }
3034 _ => return Some(ZshCond::Unary("-n".to_string(), s1)),
3035 };
3036
3037 self.skip_cond_separators();
3038
3039 let s2 = match self.lexer.tok {
3040 LexTok::String => {
3041 let s = self.lexer.tokstr.clone().unwrap_or_default();
3042 self.lexer.zshlex();
3043 s
3044 }
3045 _ => return Some(ZshCond::Binary(s1, op, String::new())),
3046 };
3047
3048 if op == "=~" {
3049 Some(ZshCond::Regex(s1, s2))
3050 } else {
3051 Some(ZshCond::Binary(s1, op, s2))
3052 }
3053 }
3054
3055 fn skip_cond_separators(&mut self) {
3056 while self.lexer.tok == LexTok::Seper && {
3057 let s = self.lexer.tokstr.as_ref();
3058 s.map(|s| !s.contains(';')).unwrap_or(true)
3059 } {
3060 self.lexer.zshlex();
3061 }
3062 }
3063
3064 /// Parse (( ... )) arithmetic command
3065 /// Parse `(( EXPR ))` arithmetic command. C source: parse.c:1810-1834
3066 /// `par_dinbrack` (despite the name; the function actually handles
3067 /// DINPAR `(( ))` blocks too).
3068 fn parse_arith(&mut self) -> Option<ZshCommand> {
3069 let expr = self.lexer.tokstr.clone().unwrap_or_default();
3070 self.lexer.zshlex();
3071 Some(ZshCommand::Arith(expr))
3072 }
3073
3074 /// Parse time command
3075 /// Parse `time CMD` (POSIX time keyword). Direct port of
3076 /// zsh/Src/parse.c:1787-1808 `par_time`. The `time` keyword
3077 /// times the execution of the following pipeline / cmd.
3078 fn parse_time(&mut self) -> Option<ZshCommand> {
3079 self.lexer.zshlex(); // skip 'time'
3080
3081 // Check if there's a pipeline to time
3082 if self.lexer.tok == LexTok::Seper
3083 || self.lexer.tok == LexTok::Newlin
3084 || self.lexer.tok == LexTok::Endinput
3085 {
3086 Some(ZshCommand::Time(None))
3087 } else {
3088 let sublist = self.parse_sublist();
3089 Some(ZshCommand::Time(sublist.map(Box::new)))
3090 }
3091 }
3092
3093 /// Check if next token is ()
3094 fn peek_inoutpar(&mut self) -> bool {
3095 self.lexer.tok == LexTok::Inoutpar
3096 }
3097
3098 /// Skip separator tokens
3099 fn skip_separators(&mut self) {
3100 let mut iterations = 0;
3101 while self.lexer.tok == LexTok::Seper || self.lexer.tok == LexTok::Newlin {
3102 iterations += 1;
3103 if iterations > 100_000 {
3104 self.error("skip_separators: too many iterations");
3105 return;
3106 }
3107 self.lexer.zshlex();
3108 }
3109 }
3110
3111 /// Record an error
3112 fn error(&mut self, msg: &str) {
3113 self.errors.push(ParseError {
3114 message: msg.to_string(),
3115 line: self.lexer.lineno,
3116 });
3117 }
3118}
3119
3120#[cfg(test)]
3121mod tests {
3122 use super::*;
3123
3124 fn parse(input: &str) -> Result<ZshProgram, Vec<ParseError>> {
3125 let mut parser = ZshParser::new(input);
3126 parser.parse()
3127 }
3128
3129 #[test]
3130 fn test_simple_command() {
3131 let prog = parse("echo hello world").unwrap();
3132 assert_eq!(prog.lists.len(), 1);
3133 match &prog.lists[0].sublist.pipe.cmd {
3134 ZshCommand::Simple(s) => {
3135 assert_eq!(s.words, vec!["echo", "hello", "world"]);
3136 }
3137 _ => panic!("expected simple command"),
3138 }
3139 }
3140
3141 #[test]
3142 fn test_pipeline() {
3143 let prog = parse("ls | grep foo | wc -l").unwrap();
3144 assert_eq!(prog.lists.len(), 1);
3145
3146 let pipe = &prog.lists[0].sublist.pipe;
3147 assert!(pipe.next.is_some());
3148
3149 let pipe2 = pipe.next.as_ref().unwrap();
3150 assert!(pipe2.next.is_some());
3151 }
3152
3153 #[test]
3154 fn test_and_or() {
3155 let prog = parse("cmd1 && cmd2 || cmd3").unwrap();
3156 let sublist = &prog.lists[0].sublist;
3157
3158 assert!(sublist.next.is_some());
3159 let (op, _) = sublist.next.as_ref().unwrap();
3160 assert_eq!(*op, SublistOp::And);
3161 }
3162
3163 #[test]
3164 fn test_if_then() {
3165 let prog = parse("if test -f foo; then echo yes; fi").unwrap();
3166 match &prog.lists[0].sublist.pipe.cmd {
3167 ZshCommand::If(_) => {}
3168 _ => panic!("expected if command"),
3169 }
3170 }
3171
3172 #[test]
3173 fn test_for_loop() {
3174 let prog = parse("for i in a b c; do echo $i; done").unwrap();
3175 match &prog.lists[0].sublist.pipe.cmd {
3176 ZshCommand::For(f) => {
3177 assert_eq!(f.var, "i");
3178 match &f.list {
3179 ForList::Words(w) => assert_eq!(w, &vec!["a", "b", "c"]),
3180 _ => panic!("expected word list"),
3181 }
3182 }
3183 _ => panic!("expected for command"),
3184 }
3185 }
3186
3187 #[test]
3188 fn test_case() {
3189 let prog = parse("case $x in a) echo a;; b) echo b;; esac").unwrap();
3190 match &prog.lists[0].sublist.pipe.cmd {
3191 ZshCommand::Case(c) => {
3192 assert_eq!(c.arms.len(), 2);
3193 }
3194 _ => panic!("expected case command"),
3195 }
3196 }
3197
3198 #[test]
3199 fn test_function() {
3200 // First test just parsing "function foo" to see what happens
3201 let prog = parse("function foo { }").unwrap();
3202 match &prog.lists[0].sublist.pipe.cmd {
3203 ZshCommand::FuncDef(f) => {
3204 assert_eq!(f.names, vec!["foo"]);
3205 }
3206 _ => panic!(
3207 "expected function, got {:?}",
3208 prog.lists[0].sublist.pipe.cmd
3209 ),
3210 }
3211 }
3212
3213 #[test]
3214 fn test_redirection() {
3215 let prog = parse("echo hello > file.txt").unwrap();
3216 match &prog.lists[0].sublist.pipe.cmd {
3217 ZshCommand::Simple(s) => {
3218 assert_eq!(s.redirs.len(), 1);
3219 assert_eq!(s.redirs[0].rtype, RedirType::Write);
3220 }
3221 _ => panic!("expected simple command"),
3222 }
3223 }
3224
3225 #[test]
3226 fn test_assignment() {
3227 let prog = parse("FOO=bar echo $FOO").unwrap();
3228 match &prog.lists[0].sublist.pipe.cmd {
3229 ZshCommand::Simple(s) => {
3230 assert_eq!(s.assigns.len(), 1);
3231 assert_eq!(s.assigns[0].name, "FOO");
3232 }
3233 _ => panic!("expected simple command"),
3234 }
3235 }
3236
3237 #[test]
3238 fn test_parse_completion_function() {
3239 let input = r#"_2to3_fixes() {
3240 local -a fixes
3241 fixes=( ${${(M)${(f)"$(2to3 --list-fixes 2>/dev/null)"}:#*}//[[:space:]]/} )
3242 (( ${#fixes} )) && _describe -t fixes 'fix' fixes
3243}"#;
3244 let result = parse(input);
3245 assert!(
3246 result.is_ok(),
3247 "Failed to parse completion function: {:?}",
3248 result.err()
3249 );
3250 let prog = result.unwrap();
3251 assert!(
3252 !prog.lists.is_empty(),
3253 "Expected at least one list in program"
3254 );
3255 }
3256
3257 #[test]
3258 fn test_parse_array_with_complex_elements() {
3259 let input = r#"arguments=(
3260 '(- * :)'{-h,--help}'[show this help message and exit]'
3261 {-d,--doctests_only}'[fix up doctests only]'
3262 '*:filename:_files'
3263)"#;
3264 let result = parse(input);
3265 assert!(
3266 result.is_ok(),
3267 "Failed to parse array assignment: {:?}",
3268 result.err()
3269 );
3270 }
3271
3272 #[test]
3273 fn test_parse_full_completion_file() {
3274 let input = r##"#compdef 2to3
3275
3276# zsh completions for '2to3'
3277
3278_2to3_fixes() {
3279 local -a fixes
3280 fixes=( ${${(M)${(f)"$(2to3 --list-fixes 2>/dev/null)"}:#*}//[[:space:]]/} )
3281 (( ${#fixes} )) && _describe -t fixes 'fix' fixes
3282}
3283
3284local -a arguments
3285
3286arguments=(
3287 '(- * :)'{-h,--help}'[show this help message and exit]'
3288 {-d,--doctests_only}'[fix up doctests only]'
3289 {-f,--fix}'[each FIX specifies a transformation; default: all]:fix name:_2to3_fixes'
3290 {-j,--processes}'[run 2to3 concurrently]:number: '
3291 {-x,--nofix}'[prevent a transformation from being run]:fix name:_2to3_fixes'
3292 {-l,--list-fixes}'[list available transformations]'
3293 {-p,--print-function}'[modify the grammar so that print() is a function]'
3294 {-v,--verbose}'[more verbose logging]'
3295 '--no-diffs[do not show diffs of the refactoring]'
3296 {-w,--write}'[write back modified files]'
3297 {-n,--nobackups}'[do not write backups for modified files]'
3298 {-o,--output-dir}'[put output files in this directory instead of overwriting]:directory:_directories'
3299 {-W,--write-unchanged-files}'[also write files even if no changes were required]'
3300 '--add-suffix[append this string to all output filenames]:suffix: '
3301 '*:filename:_files'
3302)
3303
3304_arguments -s -S $arguments
3305"##;
3306 let result = parse(input);
3307 assert!(
3308 result.is_ok(),
3309 "Failed to parse full completion file: {:?}",
3310 result.err()
3311 );
3312 let prog = result.unwrap();
3313 // Should have parsed successfully with at least one statement
3314 assert!(!prog.lists.is_empty(), "Expected at least one list");
3315 }
3316
3317 #[test]
3318 fn test_parse_logs_sh() {
3319 let input = r#"#!/usr/bin/env bash
3320shopt -s globstar
3321
3322if [[ $(uname) == Darwin ]]; then
3323 tail -f /var/log/**/*.log /var/log/**/*.out | lolcat
3324else
3325 if [[ $ZPWR_DISTRO_NAME == raspbian ]]; then
3326 tail -f /var/log/**/*.log | lolcat
3327 else
3328 printf "Unsupported...\n" >&2
3329 fi
3330fi
3331"#;
3332 let result = parse(input);
3333 assert!(
3334 result.is_ok(),
3335 "Failed to parse logs.sh: {:?}",
3336 result.err()
3337 );
3338 }
3339
3340 #[test]
3341 fn test_parse_case_with_glob() {
3342 let input = r#"case "$ZPWR_OS_TYPE" in
3343 darwin*) open_cmd='open'
3344 ;;
3345 cygwin*) open_cmd='cygstart'
3346 ;;
3347 linux*)
3348 open_cmd='xdg-open'
3349 ;;
3350esac"#;
3351 let result = parse(input);
3352 assert!(
3353 result.is_ok(),
3354 "Failed to parse case with glob: {:?}",
3355 result.err()
3356 );
3357 }
3358
3359 #[test]
3360 fn test_parse_case_with_nested_if() {
3361 // Test case with nested if and glob patterns
3362 let input = r##"function zpwrGetOpenCommand(){
3363 local open_cmd
3364 case "$ZPWR_OS_TYPE" in
3365 darwin*) open_cmd='open' ;;
3366 cygwin*) open_cmd='cygstart' ;;
3367 linux*)
3368 if [[ "$_zpwr_uname_r" != *icrosoft* ]];then
3369 open_cmd='nohup xdg-open'
3370 fi
3371 ;;
3372 esac
3373}"##;
3374 let result = parse(input);
3375 assert!(result.is_ok(), "Failed to parse: {:?}", result.err());
3376 }
3377
3378 #[test]
3379 fn test_parse_zpwr_scripts() {
3380 use std::fs;
3381 use std::path::Path;
3382 use std::sync::mpsc;
3383 use std::thread;
3384 use std::time::{Duration, Instant};
3385
3386 let scripts_dir = Path::new("/Users/wizard/.zpwr/scripts");
3387 if !scripts_dir.exists() {
3388 eprintln!("Skipping test: scripts directory not found");
3389 return;
3390 }
3391
3392 let mut total = 0;
3393 let mut passed = 0;
3394 let mut failed_files = Vec::new();
3395 let mut timeout_files = Vec::new();
3396
3397 for ext in &["sh", "zsh"] {
3398 let pattern = scripts_dir.join(format!("*.{}", ext));
3399 if let Ok(entries) = glob::glob(pattern.to_str().unwrap()) {
3400 for entry in entries.flatten() {
3401 total += 1;
3402 let file_path = entry.display().to_string();
3403 let content = match fs::read_to_string(&entry) {
3404 Ok(c) => c,
3405 Err(e) => {
3406 failed_files.push((file_path, format!("read error: {}", e)));
3407 continue;
3408 }
3409 };
3410
3411 // Parse with timeout
3412 let content_clone = content.clone();
3413 let (tx, rx) = mpsc::channel();
3414 let handle = thread::spawn(move || {
3415 let result = parse(&content_clone);
3416 let _ = tx.send(result);
3417 });
3418
3419 match rx.recv_timeout(Duration::from_secs(2)) {
3420 Ok(Ok(_)) => passed += 1,
3421 Ok(Err(errors)) => {
3422 let first_err = errors
3423 .first()
3424 .map(|e| format!("line {}: {}", e.line, e.message))
3425 .unwrap_or_default();
3426 failed_files.push((file_path, first_err));
3427 }
3428 Err(_) => {
3429 timeout_files.push(file_path);
3430 // Thread will be abandoned
3431 }
3432 }
3433 }
3434 }
3435 }
3436
3437 eprintln!("\n=== ZPWR Scripts Parse Results ===");
3438 eprintln!("Passed: {}/{}", passed, total);
3439
3440 if !timeout_files.is_empty() {
3441 eprintln!("\nTimeout files (>2s):");
3442 for file in &timeout_files {
3443 eprintln!(" {}", file);
3444 }
3445 }
3446
3447 if !failed_files.is_empty() {
3448 eprintln!("\nFailed files:");
3449 for (file, err) in &failed_files {
3450 eprintln!(" {} - {}", file, err);
3451 }
3452 }
3453
3454 // Allow some failures initially, but track progress
3455 let pass_rate = if total > 0 {
3456 (passed as f64 / total as f64) * 100.0
3457 } else {
3458 0.0
3459 };
3460 eprintln!("Pass rate: {:.1}%", pass_rate);
3461
3462 // Require at least 50% pass rate for now
3463 assert!(pass_rate >= 50.0, "Pass rate too low: {:.1}%", pass_rate);
3464 }
3465
3466 #[test]
3467 #[ignore] // Uses threads that can't be killed on timeout; use integration test instead
3468 fn test_parse_zsh_stdlib_functions() {
3469 use std::fs;
3470 use std::path::Path;
3471 use std::sync::mpsc;
3472 use std::thread;
3473 use std::time::Duration;
3474
3475 let functions_dir = Path::new(env!("CARGO_MANIFEST_DIR")).join("test_data/zsh_functions");
3476 if !functions_dir.exists() {
3477 eprintln!(
3478 "Skipping test: zsh_functions directory not found at {:?}",
3479 functions_dir
3480 );
3481 return;
3482 }
3483
3484 let mut total = 0;
3485 let mut passed = 0;
3486 let mut failed_files = Vec::new();
3487 let mut timeout_files = Vec::new();
3488
3489 if let Ok(entries) = fs::read_dir(&functions_dir) {
3490 for entry in entries.flatten() {
3491 let path = entry.path();
3492 if !path.is_file() {
3493 continue;
3494 }
3495
3496 total += 1;
3497 let file_path = path.display().to_string();
3498 let content = match fs::read_to_string(&path) {
3499 Ok(c) => c,
3500 Err(e) => {
3501 failed_files.push((file_path, format!("read error: {}", e)));
3502 continue;
3503 }
3504 };
3505
3506 // Parse with timeout
3507 let content_clone = content.clone();
3508 let (tx, rx) = mpsc::channel();
3509 thread::spawn(move || {
3510 let result = parse(&content_clone);
3511 let _ = tx.send(result);
3512 });
3513
3514 match rx.recv_timeout(Duration::from_secs(2)) {
3515 Ok(Ok(_)) => passed += 1,
3516 Ok(Err(errors)) => {
3517 let first_err = errors
3518 .first()
3519 .map(|e| format!("line {}: {}", e.line, e.message))
3520 .unwrap_or_default();
3521 failed_files.push((file_path, first_err));
3522 }
3523 Err(_) => {
3524 timeout_files.push(file_path);
3525 }
3526 }
3527 }
3528 }
3529
3530 eprintln!("\n=== Zsh Stdlib Functions Parse Results ===");
3531 eprintln!("Passed: {}/{}", passed, total);
3532
3533 if !timeout_files.is_empty() {
3534 eprintln!("\nTimeout files (>2s): {}", timeout_files.len());
3535 for file in timeout_files.iter().take(10) {
3536 eprintln!(" {}", file);
3537 }
3538 if timeout_files.len() > 10 {
3539 eprintln!(" ... and {} more", timeout_files.len() - 10);
3540 }
3541 }
3542
3543 if !failed_files.is_empty() {
3544 eprintln!("\nFailed files: {}", failed_files.len());
3545 for (file, err) in failed_files.iter().take(20) {
3546 let filename = Path::new(file)
3547 .file_name()
3548 .unwrap_or_default()
3549 .to_string_lossy();
3550 eprintln!(" {} - {}", filename, err);
3551 }
3552 if failed_files.len() > 20 {
3553 eprintln!(" ... and {} more", failed_files.len() - 20);
3554 }
3555 }
3556
3557 let pass_rate = if total > 0 {
3558 (passed as f64 / total as f64) * 100.0
3559 } else {
3560 0.0
3561 };
3562 eprintln!("Pass rate: {:.1}%", pass_rate);
3563
3564 // Require at least 50% pass rate
3565 assert!(pass_rate >= 50.0, "Pass rate too low: {:.1}%", pass_rate);
3566 }
3567}