zshrs_parse/parser.rs
1//! Zsh parser - Direct port from zsh/Src/parse.c
2//!
3//! This parser takes tokens from the ZshLexer and builds an AST.
4//! It follows the zsh grammar closely, producing structures that
5//! can be executed by the shell executor.
6
7use crate::lexer::ZshLexer;
8use crate::tokens::LexTok;
9use serde::{Deserialize, Serialize};
10
11/// AST node for a complete program (list of commands)
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct ZshProgram {
14 pub lists: Vec<ZshList>,
15}
16
17/// A list is a sequence of sublists separated by ; or & or newline
18#[derive(Debug, Clone, Serialize, Deserialize)]
19pub struct ZshList {
20 pub sublist: ZshSublist,
21 pub flags: ListFlags,
22}
23
24#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
25pub struct ListFlags {
26 /// Run asynchronously (&)
27 pub async_: bool,
28 /// Disown after running (&| or &!)
29 pub disown: bool,
30}
31
32/// A sublist is pipelines connected by && or ||
33#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct ZshSublist {
35 pub pipe: ZshPipe,
36 pub next: Option<(SublistOp, Box<ZshSublist>)>,
37 pub flags: SublistFlags,
38}
39
40#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
41pub enum SublistOp {
42 And, // &&
43 Or, // ||
44}
45
46#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
47pub struct SublistFlags {
48 /// Coproc
49 pub coproc: bool,
50 /// Negated with !
51 pub not: bool,
52}
53
54/// A pipeline is commands connected by |
55#[derive(Debug, Clone, Serialize, Deserialize)]
56pub struct ZshPipe {
57 pub cmd: ZshCommand,
58 pub next: Option<Box<ZshPipe>>,
59 pub lineno: u64,
60 /// `|&` between this stage and the next — merge stderr into the
61 /// pipe so the next stage's stdin sees both stdout AND stderr from
62 /// this stage. When `next` is None this flag is meaningless.
63 #[serde(default)]
64 pub merge_stderr: bool,
65}
66
67/// A command
68#[derive(Debug, Clone, Serialize, Deserialize)]
69pub enum ZshCommand {
70 Simple(ZshSimple),
71 Subsh(Box<ZshProgram>), // (list)
72 Cursh(Box<ZshProgram>), // {list}
73 For(ZshFor),
74 Case(ZshCase),
75 If(ZshIf),
76 While(ZshWhile),
77 Until(ZshWhile),
78 Repeat(ZshRepeat),
79 FuncDef(ZshFuncDef),
80 Time(Option<Box<ZshSublist>>),
81 Cond(ZshCond), // [[ ... ]]
82 Arith(String), // (( ... ))
83 Try(ZshTry), // { ... } always { ... }
84 /// Compound command with trailing redirects:
85 /// `{ cmd } 2>&1`, `(...) >file`, `if ...; fi >file`, etc.
86 /// Simple commands carry redirects in their own struct; this wrapper
87 /// is only used for compound forms.
88 Redirected(Box<ZshCommand>, Vec<ZshRedir>),
89}
90
91/// A simple command (assignments, words, redirections)
92#[derive(Debug, Clone, Serialize, Deserialize)]
93pub struct ZshSimple {
94 pub assigns: Vec<ZshAssign>,
95 pub words: Vec<String>,
96 pub redirs: Vec<ZshRedir>,
97}
98
99/// An assignment
100#[derive(Debug, Clone, Serialize, Deserialize)]
101pub struct ZshAssign {
102 pub name: String,
103 pub value: ZshAssignValue,
104 pub append: bool, // +=
105}
106
107#[derive(Debug, Clone, Serialize, Deserialize)]
108pub enum ZshAssignValue {
109 Scalar(String),
110 Array(Vec<String>),
111}
112
113/// A redirection
114#[derive(Debug, Clone, Serialize, Deserialize)]
115pub struct ZshRedir {
116 pub rtype: RedirType,
117 pub fd: i32,
118 pub name: String,
119 pub heredoc: Option<HereDocInfo>,
120 pub varid: Option<String>, // {var}>file
121 /// Index into ZshLexer.heredocs[] for body lookup. Filled in by
122 /// `parse_redirection` for Heredoc/HeredocDash, then resolved into
123 /// `heredoc.content` by `fill_heredoc_bodies` after process_heredocs
124 /// has run for the line.
125 #[serde(skip)]
126 pub heredoc_idx: Option<usize>,
127}
128
129#[derive(Debug, Clone, Serialize, Deserialize)]
130pub struct HereDocInfo {
131 pub content: String,
132 pub terminator: String,
133 /// Originally-quoted terminator (`<<'EOF'`, `<<"EOF"`). When true the
134 /// body is passed verbatim — no `$var` / `$(cmd)` / `$((expr))`
135 /// expansion. Plain `<<EOF` runs all expansions.
136 #[serde(default)]
137 pub quoted: bool,
138}
139
140/// Redirection type
141#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
142pub enum RedirType {
143 Write, // >
144 Writenow, // >|
145 Append, // >>
146 Appendnow, // >>|
147 Read, // <
148 ReadWrite, // <>
149 Heredoc, // <<
150 HeredocDash, // <<-
151 Herestr, // <<<
152 MergeIn, // <&
153 MergeOut, // >&
154 ErrWrite, // &>
155 ErrWritenow, // &>|
156 ErrAppend, // >>&
157 ErrAppendnow, // >>&|
158 InPipe, // < <(...)
159 OutPipe, // > >(...)
160}
161
162/// For loop
163#[derive(Debug, Clone, Serialize, Deserialize)]
164pub struct ZshFor {
165 pub var: String,
166 pub list: ForList,
167 pub body: Box<ZshProgram>,
168 /// True if this was parsed as `select` rather than `for`. Both share
169 /// the same parser, so the compiler routes on this flag.
170 #[serde(default)]
171 pub is_select: bool,
172}
173
174#[derive(Debug, Clone, Serialize, Deserialize)]
175pub enum ForList {
176 Words(Vec<String>),
177 CStyle {
178 init: String,
179 cond: String,
180 step: String,
181 },
182 Positional,
183}
184
185/// Case statement
186#[derive(Debug, Clone, Serialize, Deserialize)]
187pub struct ZshCase {
188 pub word: String,
189 pub arms: Vec<CaseArm>,
190}
191
192#[derive(Debug, Clone, Serialize, Deserialize)]
193pub struct CaseArm {
194 pub patterns: Vec<String>,
195 pub body: ZshProgram,
196 pub terminator: CaseTerm,
197}
198
199#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
200pub enum CaseTerm {
201 Break, // ;;
202 Continue, // ;&
203 TestNext, // ;|
204}
205
206/// If statement
207#[derive(Debug, Clone, Serialize, Deserialize)]
208pub struct ZshIf {
209 pub cond: Box<ZshProgram>,
210 pub then: Box<ZshProgram>,
211 pub elif: Vec<(ZshProgram, ZshProgram)>,
212 pub else_: Option<Box<ZshProgram>>,
213}
214
215/// While/Until loop
216#[derive(Debug, Clone, Serialize, Deserialize)]
217pub struct ZshWhile {
218 pub cond: Box<ZshProgram>,
219 pub body: Box<ZshProgram>,
220 pub until: bool,
221}
222
223/// Repeat loop
224#[derive(Debug, Clone, Serialize, Deserialize)]
225pub struct ZshRepeat {
226 pub count: String,
227 pub body: Box<ZshProgram>,
228}
229
230/// Function definition
231#[derive(Debug, Clone, Serialize, Deserialize)]
232pub struct ZshFuncDef {
233 pub names: Vec<String>,
234 pub body: Box<ZshProgram>,
235 pub tracing: bool,
236 /// Anonymous-function call args. `() { body } a b` parses as a
237 /// FuncDef (auto-named) with `auto_call_args = Some(vec!["a", "b"])`.
238 /// compile_funcdef registers the function then emits a Simple call
239 /// with these args.
240 #[serde(default)]
241 pub auto_call_args: Option<Vec<String>>,
242 /// Original source text of the function body (the bytes between
243 /// `{` and `}`, without the braces themselves), captured at parse
244 /// time. Populated for `function name { body }` and `function name() { body }`
245 /// forms; left None for the synthesized inline-funcdef recovery
246 /// path. ZshCompiler::compile_funcdef forwards it to
247 /// `BUILTIN_REGISTER_COMPILED_FN` so introspection (`whence`, `which`,
248 /// `${functions[name]}`) has canonical source text.
249 #[serde(default)]
250 pub body_source: Option<String>,
251}
252
253/// Conditional expression [[ ... ]]
254#[derive(Debug, Clone, Serialize, Deserialize)]
255pub enum ZshCond {
256 Not(Box<ZshCond>),
257 And(Box<ZshCond>, Box<ZshCond>),
258 Or(Box<ZshCond>, Box<ZshCond>),
259 Unary(String, String), // -f file, -n str, etc.
260 Binary(String, String, String), // str = pat, a -eq b, etc.
261 Regex(String, String), // str =~ regex
262}
263
264/// Try/always block
265#[derive(Debug, Clone, Serialize, Deserialize)]
266pub struct ZshTry {
267 pub try_block: Box<ZshProgram>,
268 pub always: Box<ZshProgram>,
269}
270
271/// Zsh parameter expansion flags
272#[derive(Debug, Clone, Serialize, Deserialize)]
273pub enum ZshParamFlag {
274 Lower, // L - lowercase
275 Upper, // U - uppercase
276 Capitalize, // C - capitalize words
277 Join(String), // j:sep: - join array with separator
278 JoinNewline, // F - join with newlines
279 Split(String), // s:sep: - split string into array
280 SplitLines, // f - split on newlines
281 SplitWords, // z - split into words (shell parsing)
282 Type, // t - type of variable
283 Words, // w - word splitting
284 Quote, // qq - single-quote always
285 QuoteIfNeeded, // q+ - single-quote only if needed
286 DoubleQuote, // qqq - double-quote
287 DollarQuote, // qqqq - $'...' style
288 QuoteBackslash, // q / b / B - backslash-escape special chars
289 Unique, // u - unique elements only
290 Reverse, // O - reverse sort
291 Sort, // o - sort
292 NumericSort, // n - numeric sort
293 IndexSort, // a - sort in array index order
294 Keys, // k - associative array keys
295 Values, // v - associative array values
296 Length, // # - length (character codes)
297 CountChars, // c - count total characters
298 Expand, // e - perform shell expansions
299 PromptExpand, // % - expand prompt escapes
300 PromptExpandFull, // %% - full prompt expansion
301 Visible, // V - make non-printable chars visible
302 Directory, // D - substitute directory names
303 Head(usize), // [1,n] - first n elements
304 Tail(usize), // [-n,-1] - last n elements
305 PadLeft(usize, char), // l:len:fill: - pad left
306 PadRight(usize, char), // r:len:fill: - pad right
307 Width(usize), // m - use width for padding
308 Match, // M - include matched portion
309 Remove, // R - include non-matched portion (complement of M)
310 Subscript, // S - subscript scanning
311 Parameter, // P - use value as parameter name (indirection)
312 Glob, // ~ - glob patterns in pattern
313 /// `@` flag — force array-context behavior even inside DQ. zsh's
314 /// `"${(@o)arr}"` keeps the sort active and splices each element as
315 /// its own word. Without this, the array-only flags became no-ops
316 /// in DQ.
317 At,
318}
319
320/// List operator (for shell command lists)
321#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
322pub enum ListOp {
323 And, // &&
324 Or, // ||
325 Semi, // ;
326 Amp, // &
327 Newline, // \n
328}
329
330/// Shell word - can be simple literal or complex expansion
331#[derive(Debug, Clone, Serialize, Deserialize)]
332pub enum ShellWord {
333 /// Plain text token. Most ZWC-decoded words land here. Goes through
334 /// `expand_string` (plus glob/tilde/etc. as text-level transforms) for
335 /// final output.
336 Literal(String),
337 /// Concatenation of sub-words. ZWC array decoding produces this with
338 /// child Literals; nothing else constructs it now that the legacy
339 /// hand-rolled parser is gone.
340 Concat(Vec<ShellWord>),
341}
342
343/// Variable modifier for parameter expansion
344#[derive(Debug, Clone, Serialize, Deserialize)]
345pub enum VarModifier {
346 Default(ShellWord),
347 DefaultAssign(ShellWord),
348 Error(ShellWord),
349 Alternate(ShellWord),
350 Length,
351 Substring(i64, Option<i64>),
352 RemovePrefix(ShellWord),
353 RemovePrefixLong(ShellWord),
354 RemoveSuffix(ShellWord),
355 RemoveSuffixLong(ShellWord),
356 Replace(ShellWord, ShellWord),
357 ReplaceAll(ShellWord, ShellWord),
358 Upper,
359 Lower,
360}
361
362/// Shell command - the old shell_ast compatible type
363#[derive(Debug, Clone, Serialize, Deserialize)]
364pub enum ShellCommand {
365 Simple(SimpleCommand),
366 Pipeline(Vec<ShellCommand>, bool),
367 List(Vec<(ShellCommand, ListOp)>),
368 Compound(CompoundCommand),
369 FunctionDef(String, Box<ShellCommand>),
370}
371
372/// Simple command with assignments, words, and redirects
373#[derive(Debug, Clone, Serialize, Deserialize)]
374pub struct SimpleCommand {
375 pub assignments: Vec<(String, ShellWord, bool)>,
376 pub words: Vec<ShellWord>,
377 pub redirects: Vec<Redirect>,
378}
379
380/// Redirect
381#[derive(Debug, Clone, Serialize, Deserialize)]
382pub struct Redirect {
383 pub fd: Option<i32>,
384 pub op: RedirectOp,
385 pub target: ShellWord,
386 pub heredoc_content: Option<String>,
387 pub fd_var: Option<String>,
388}
389
390/// Redirect operator
391#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
392pub enum RedirectOp {
393 Write,
394 Append,
395 Read,
396 ReadWrite,
397 Clobber,
398 DupRead,
399 DupWrite,
400 HereDoc,
401 HereString,
402 WriteBoth,
403 AppendBoth,
404}
405
406/// Compound command
407#[derive(Debug, Clone, Serialize, Deserialize)]
408pub enum CompoundCommand {
409 BraceGroup(Vec<ShellCommand>),
410 Subshell(Vec<ShellCommand>),
411 If {
412 conditions: Vec<(Vec<ShellCommand>, Vec<ShellCommand>)>,
413 else_part: Option<Vec<ShellCommand>>,
414 },
415 For {
416 var: String,
417 words: Option<Vec<ShellWord>>,
418 body: Vec<ShellCommand>,
419 },
420 ForArith {
421 init: String,
422 cond: String,
423 step: String,
424 body: Vec<ShellCommand>,
425 },
426 While {
427 condition: Vec<ShellCommand>,
428 body: Vec<ShellCommand>,
429 },
430 Until {
431 condition: Vec<ShellCommand>,
432 body: Vec<ShellCommand>,
433 },
434 Case {
435 word: ShellWord,
436 cases: Vec<(Vec<ShellWord>, Vec<ShellCommand>, CaseTerminator)>,
437 },
438 Select {
439 var: String,
440 words: Option<Vec<ShellWord>>,
441 body: Vec<ShellCommand>,
442 },
443 Coproc {
444 name: Option<String>,
445 body: Box<ShellCommand>,
446 },
447 /// repeat N do ... done
448 Repeat {
449 count: String,
450 body: Vec<ShellCommand>,
451 },
452 /// { try-block } always { always-block }
453 Try {
454 try_body: Vec<ShellCommand>,
455 always_body: Vec<ShellCommand>,
456 },
457 Arith(String),
458 WithRedirects(Box<ShellCommand>, Vec<Redirect>),
459}
460
461/// Case terminator
462#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
463pub enum CaseTerminator {
464 Break,
465 Fallthrough,
466 Continue,
467}
468
469/// Parse errors
470#[derive(Debug, Clone, Serialize, Deserialize)]
471pub struct ParseError {
472 pub message: String,
473 pub line: u64,
474}
475
476impl std::fmt::Display for ParseError {
477 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
478 write!(f, "parse error at line {}: {}", self.line, self.message)
479 }
480}
481
482impl std::error::Error for ParseError {}
483
484/// The Zsh Parser
485pub struct ZshParser<'a> {
486 lexer: ZshLexer<'a>,
487 errors: Vec<ParseError>,
488 /// Global iteration counter to prevent infinite loops
489 global_iterations: usize,
490 /// Recursion depth counter to prevent stack overflow
491 recursion_depth: usize,
492}
493
494const MAX_RECURSION_DEPTH: usize = 500;
495
496/// Saved parse context. Direct port of zsh's `struct parse_stack`
497/// declared in zsh/Src/zsh.h and used by parse.c:295-355
498/// (`parse_context_save` / `parse_context_restore`). Pushes per-
499/// parse-call state so a nested parse (e.g. inside command
500/// substitution) doesn't clobber the outer parse.
501///
502/// zshrs port note: zsh's parse_stack tracks wordcode-buffer state
503/// (ecbuf, eclen, ecused, ecnpats, ecstrs, ecsoffs, ecssub, ecnfunc).
504/// zshrs builds AST trees instead so those fields collapse to a
505/// recursion_depth + global_iterations save. The lexer-side fields
506/// (incmdpos, incond, etc.) live on ZshLexer here so they get saved
507/// via the lexer's own `LexStack` rather than being duplicated here.
508#[derive(Debug, Default, Clone)]
509pub struct ParseStack {
510 pub recursion_depth: usize,
511 pub global_iterations: usize,
512}
513
514/// Walk every ZshRedir in the program and, for any with a `heredoc_idx`,
515/// pull the body+terminator out of `bodies` and stuff into `heredoc`.
516/// `bodies[i]` corresponds to the i-th heredoc registered by the lexer
517/// during scanning (in source order).
518fn fill_heredoc_bodies(prog: &mut ZshProgram, bodies: &[HereDocInfo]) {
519 for list in &mut prog.lists {
520 fill_in_sublist(&mut list.sublist, bodies);
521 }
522}
523
524fn fill_in_sublist(sub: &mut ZshSublist, bodies: &[HereDocInfo]) {
525 fill_in_pipe(&mut sub.pipe, bodies);
526 if let Some(next) = &mut sub.next {
527 fill_in_sublist(&mut next.1, bodies);
528 }
529}
530
531fn fill_in_pipe(pipe: &mut ZshPipe, bodies: &[HereDocInfo]) {
532 fill_in_command(&mut pipe.cmd, bodies);
533 if let Some(next) = &mut pipe.next {
534 fill_in_pipe(next, bodies);
535 }
536}
537
538fn fill_in_command(cmd: &mut ZshCommand, bodies: &[HereDocInfo]) {
539 match cmd {
540 ZshCommand::Simple(s) => {
541 for r in &mut s.redirs {
542 resolve_redir(r, bodies);
543 }
544 }
545 ZshCommand::Subsh(p) | ZshCommand::Cursh(p) => fill_heredoc_bodies(p, bodies),
546 ZshCommand::FuncDef(f) => fill_heredoc_bodies(&mut f.body, bodies),
547 ZshCommand::If(i) => {
548 fill_heredoc_bodies(&mut i.cond, bodies);
549 fill_heredoc_bodies(&mut i.then, bodies);
550 for (c, b) in &mut i.elif {
551 fill_heredoc_bodies(c, bodies);
552 fill_heredoc_bodies(b, bodies);
553 }
554 if let Some(e) = &mut i.else_ {
555 fill_heredoc_bodies(e, bodies);
556 }
557 }
558 ZshCommand::While(w) | ZshCommand::Until(w) => {
559 fill_heredoc_bodies(&mut w.cond, bodies);
560 fill_heredoc_bodies(&mut w.body, bodies);
561 }
562 ZshCommand::For(f) => fill_heredoc_bodies(&mut f.body, bodies),
563 ZshCommand::Case(c) => {
564 for arm in &mut c.arms {
565 fill_heredoc_bodies(&mut arm.body, bodies);
566 }
567 }
568 ZshCommand::Repeat(r) => fill_heredoc_bodies(&mut r.body, bodies),
569 ZshCommand::Time(Some(sublist)) => fill_in_sublist(sublist, bodies),
570 ZshCommand::Try(t) => {
571 fill_heredoc_bodies(&mut t.try_block, bodies);
572 fill_heredoc_bodies(&mut t.always, bodies);
573 }
574 ZshCommand::Redirected(inner, redirs) => {
575 for r in redirs {
576 resolve_redir(r, bodies);
577 }
578 fill_in_command(inner, bodies);
579 }
580 ZshCommand::Time(None) | ZshCommand::Cond(_) | ZshCommand::Arith(_) => {}
581 }
582}
583
584fn resolve_redir(r: &mut ZshRedir, bodies: &[HereDocInfo]) {
585 if let Some(idx) = r.heredoc_idx {
586 if let Some(info) = bodies.get(idx) {
587 r.heredoc = Some(info.clone());
588 }
589 }
590}
591
592/// If `list` is a Simple containing one word that ends in the
593/// `<INPAR><OUTPAR>` token pair (the lexer-port encoding of `()`),
594/// return the bare name. Used by `parse_program_until` to detect
595/// `name() {body}` style function definitions where the lexer
596/// hasn't split the `()` from the name.
597/// Detect the `name() …` shape inside a Simple. Returns the function
598/// name and (when the body was already inlined into the same Simple,
599/// e.g. `foo() echo hi`) the rest of the words as the body's argv.
600/// Returns None for non-funcdef shapes.
601fn simple_name_with_inoutpar(list: &ZshList) -> Option<(Vec<String>, Vec<String>)> {
602 if list.flags.async_ || list.sublist.next.is_some() {
603 return None;
604 }
605 let pipe = &list.sublist.pipe;
606 if pipe.next.is_some() {
607 return None;
608 }
609 let simple = match &pipe.cmd {
610 ZshCommand::Simple(s) => s,
611 _ => return None,
612 };
613 if simple.words.is_empty() || !simple.assigns.is_empty() {
614 return None;
615 }
616 let suffix = "\u{88}\u{8a}"; // INPAR + OUTPAR
617 // Find the FIRST word ending in `()`. zsh accepts the
618 // multi-name shorthand `fna fnb fnc() { body }` (parse.c:
619 // par_funcdef wordlist) — words[0..i-1] are extra names,
620 // words[i] is `lastname()`. Words after are the body argv
621 // (one-line shorthand, `name() cmd args`).
622 let par_idx = simple.words.iter().position(|w| w.ends_with(suffix))?;
623 let mut names: Vec<String> = Vec::with_capacity(par_idx + 1);
624 for w in &simple.words[..par_idx] {
625 // Earlier names must be bare identifiers, NOT contain
626 // tokens that imply they're not function names (no `()`,
627 // no quotes, no expansions). zsh's lexer enforces this
628 // at the wordlist level; we approximate by requiring the
629 // word be an identifier-shaped token after untokenize.
630 let bare = crate::lexer::untokenize(w);
631 let valid = !bare.is_empty()
632 && bare
633 .chars()
634 .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-' || c == '.' || c == '$');
635 if !valid {
636 return None;
637 }
638 names.push(bare);
639 }
640 let last = &simple.words[par_idx];
641 let bare = &last[..last.len() - suffix.len()];
642 if bare.is_empty() {
643 return None;
644 }
645 names.push(crate::lexer::untokenize(bare));
646 let rest = simple.words[par_idx + 1..].to_vec();
647 Some((names, rest))
648}
649
650impl<'a> ZshParser<'a> {
651 /// Create a new parser
652 pub fn new(input: &'a str) -> Self {
653 ZshParser {
654 lexer: ZshLexer::new(input),
655 errors: Vec::new(),
656 global_iterations: 0,
657 recursion_depth: 0,
658 }
659 }
660
661 /// Check iteration limit; returns true if exceeded
662 #[inline]
663 fn check_limit(&mut self) -> bool {
664 self.global_iterations += 1;
665 self.global_iterations > 10_000
666 }
667
668 /// Check recursion depth; returns true if exceeded
669 #[inline]
670 fn check_recursion(&mut self) -> bool {
671 self.recursion_depth > MAX_RECURSION_DEPTH
672 }
673
674 /// Save parse context onto a `ParseStack`. Direct port of
675 /// zsh/Src/parse.c:295-320 `parse_context_save`. Pushes
676 /// recursion_depth + global_iterations and resets to zero so
677 /// a nested parse can't trigger the outer parse's limits.
678 /// Lexer-side state (incmdpos / incond / etc.) saves via the
679 /// lexer's own `LexStack` since those fields live on ZshLexer.
680 pub fn parse_context_save(&mut self, ps: &mut ParseStack) {
681 // parse.c:299-317 — save parser state. zshrs collapses zsh's
682 // wordcode-buffer fields (ecbuf/eclen/ecused/ecnpats/ecstrs/
683 // ecsoffs/ecssub/ecnfunc) into the recursion+iteration pair
684 // since the AST builder doesn't use a flat wordcode buffer.
685 ps.recursion_depth = self.recursion_depth;
686 ps.global_iterations = self.global_iterations;
687 // parse.c:318-319 — clear the buffer + heredoc list so a
688 // nested parse starts from a clean slate.
689 self.recursion_depth = 0;
690 self.global_iterations = 0;
691 }
692
693 /// Restore parse context from a `ParseStack`. Direct port of
694 /// zsh/Src/parse.c:326-355 `parse_context_restore`. Inverse of
695 /// `parse_context_save`. Also clears any half-built AST state
696 /// to prevent leaking into the outer parse.
697 pub fn parse_context_restore(&mut self, ps: &ParseStack) {
698 // parse.c:330-331 — free any in-progress wordcode buffer.
699 // zshrs has no equivalent — AST nodes are owned by their
700 // parent so dropping the parser frees them.
701
702 // parse.c:333-352 — restore saved state.
703 self.recursion_depth = ps.recursion_depth;
704 self.global_iterations = ps.global_iterations;
705
706 // parse.c:354 — `errflag &= ~ERRFLAG_ERROR;` — clear the
707 // error flag so the outer parse sees a clean state. zshrs
708 // tracks errors per-parser; clearing means dropping any
709 // partial errors collected during the nested parse.
710 self.errors.clear();
711 }
712
713 /// Initialize parser status. Direct port of zsh/Src/parse.c:489-503
714 /// `init_parse_status`. Clears the per-parse-call lexer flags
715 /// so a fresh parse starts from cmd-position with no nesting
716 /// state inherited from a prior parse.
717 pub fn init_parse_status(&mut self) {
718 // parse.c:500-502 — `incasepat = incond = inredir = infor =
719 // intypeset = 0; inrepeat_ = 0; incmdpos = 1;`
720 self.lexer.incasepat = 0;
721 self.lexer.incond = 0;
722 self.lexer.inredir = false;
723 self.lexer.infor = 0;
724 self.lexer.intypeset = false;
725 self.lexer.incmdpos = true;
726 }
727
728 /// Initialize parser for a fresh parse. Direct port of
729 /// zsh/Src/parse.c:507-525 `init_parse`. C source allocates a
730 /// fresh wordcode buffer (ecbuf) sized EC_INIT_SIZE, resets the
731 /// per-parse-call counters, and calls init_parse_status. zshrs
732 /// has no flat wordcode buffer (AST is built inline) so this
733 /// function reduces to init_parse_status + recursion_depth/
734 /// global_iterations clear.
735 pub fn init_parse(&mut self) {
736 // parse.c:513-520 — init wordcode buffer. zshrs no-op.
737 self.recursion_depth = 0;
738 self.global_iterations = 0;
739 // parse.c:522 — `init_parse_status();`
740 self.init_parse_status();
741 }
742
743 /// Check whether the parsed program is empty. Direct port of
744 /// zsh/Src/parse.c:583-587 `empty_eprog`. C version checks
745 /// `*p->prog == WCB_END()` (single end-of-wordcode marker).
746 /// zshrs version checks the AST node count.
747 pub fn empty_eprog(prog: &ZshProgram) -> bool {
748 prog.lists.is_empty()
749 }
750
751 /// Clear pending here-document list. Direct port of
752 /// zsh/Src/parse.c:589-600 `clear_hdocs`. The C version walks
753 /// the global `hdocs` linked list and frees each node. zshrs
754 /// stores pending heredocs on the lexer's `heredocs` Vec —
755 /// truncating it has the same effect.
756 pub fn clear_hdocs(&mut self) {
757 self.lexer.heredocs.clear();
758 }
759
760 /// Top-level parse-event entry. Direct port of zsh/Src/parse.c:
761 /// 612-631 `parse_event`. Reads one event from the lexer (a
762 /// sublist optionally followed by SEPER/AMPER/AMPERBANG) and
763 /// returns the resulting ZshProgram.
764 ///
765 /// `endtok` is the token that terminates the event — usually
766 /// ENDINPUT, but for command-style substitutions the closing
767 /// `)` (zsh's CMD_SUBST_CLOSE).
768 ///
769 /// zshrs port note: zsh's parse_event returns an `Eprog` (heap-
770 /// allocated wordcode program). zshrs returns a `ZshProgram`
771 /// (AST root). Same role at the parse-output boundary.
772 pub fn parse_event(&mut self, endtok: LexTok) -> Option<ZshProgram> {
773 // parse.c:616-619 — reset state and prime the lexer.
774 self.lexer.tok = LexTok::Endinput;
775 self.lexer.incmdpos = true;
776 self.lexer.zshlex();
777 // parse.c:620 — `init_parse();`
778 self.init_parse();
779
780 // parse.c:622-625 — drive par_event; on failure clear hdocs.
781 if !self.par_event(endtok) {
782 self.clear_hdocs();
783 return None;
784 }
785 // parse.c:626-628 — if endtok != ENDINPUT, this is a sub-
786 // parse for a substitution that doesn't need its own eprog.
787 // zshrs returns an empty program in that case (caller
788 // discards).
789 if endtok != LexTok::Endinput {
790 return Some(ZshProgram { lists: Vec::new() });
791 }
792 // parse.c:630 — `bld_eprog(1);` — build the final eprog.
793 // zshrs has already built the AST via parse_program_until,
794 // but parse_event uses par_event directly so we need to
795 // collect what par_event accumulated.
796 Some(self.parse_program_until(None))
797 }
798
799 /// Parse one event (sublist with optional separator). Direct
800 /// port of zsh/Src/parse.c:633-695 `par_event`. Returns true if
801 /// an event was successfully parsed, false on EOF / endtok.
802 ///
803 /// zshrs port note: the C version emits wordcodes via ecadd/
804 /// set_list_code; zshrs's parser builds AST nodes via
805 /// parse_sublist + parse_list. Same flow, different output.
806 pub fn par_event(&mut self, endtok: LexTok) -> bool {
807 // parse.c:639-643 — skip leading SEPERs.
808 while self.lexer.tok == LexTok::Seper {
809 // parse.c:640-641 — at top-level (endtok == ENDINPUT),
810 // a SEPER on a fresh line ends the event.
811 if self.lexer.isnewlin > 0 && endtok == LexTok::Endinput {
812 return false;
813 }
814 self.lexer.zshlex();
815 }
816 // parse.c:644-647 — terminate on EOF or matching close-token.
817 if self.lexer.tok == LexTok::Endinput {
818 return false;
819 }
820 if self.lexer.tok == endtok {
821 return true;
822 }
823 // parse.c:649-... — drive parse_sublist + handle terminator.
824 // zshrs's parse_sublist already builds the AST node directly.
825 match self.parse_sublist() {
826 Some(_) => {
827 // parse.c:651-693 — terminator handling. zshrs's
828 // parse_list wraps this; for parse_event we just
829 // confirm the sublist parsed.
830 true
831 }
832 None => false,
833 }
834 }
835
836 /// Parse one list — non-recursing variant. Direct port of
837 /// zsh/Src/parse.c:807-817 `par_list1`. Like par_list but
838 /// doesn't recurse on the trailing-separator path; used by
839 /// callers that only want one statement (e.g. each arm of a
840 /// case body).
841 pub fn par_list1(&mut self) -> Option<ZshSublist> {
842 // parse.c:810-816 — body is a single par_sublist call wrapped
843 // in the eu/ecused tracking that zshrs doesn't need (no
844 // wordcode buffer).
845 self.parse_sublist()
846 }
847
848 /// Wire a here-document body onto the redirection token that
849 /// requested it. Direct port of zsh/Src/parse.c:2347-2361
850 /// `setheredoc`. Called when a heredoc terminator has been
851 /// matched and the body is ready to be attached to the redir.
852 ///
853 /// zshrs port note: zsh's setheredoc patches the wordcode
854 /// in-place via `pc[1] = ecstrcode(doc); pc[2] = ecstrcode(term);`.
855 /// zshrs threads heredoc bodies through `HereDocInfo` structs
856 /// that resolve_redir applies during the post-parse fill_in pass.
857 /// This method is the AST-side equivalent: writes back to the
858 /// matching redir node by index.
859 pub fn setheredoc(
860 &mut self,
861 _pc: usize,
862 _redir_type: i32,
863 _doc: &str,
864 _term: &str,
865 _munged_term: &str,
866 ) {
867 // zshrs's heredoc resolution happens in fill_in_command /
868 // resolve_redir at parser.rs top. This stub exists for API
869 // parity with the C signature; live wiring happens via
870 // self.lexer.heredocs which the post-parse pass consumes.
871 }
872
873 /// Parse a wordlist for `for ... in WORDS;`. Direct port of
874 /// zsh/Src/parse.c:2362-2378 `par_wordlist`. Reads STRING tokens
875 /// until the next SEPER / SEMI / NEWLIN.
876 pub fn par_wordlist(&mut self) -> Vec<String> {
877 let mut out = Vec::new();
878 // parse.c:2362-2378 — collect STRINGs into the wordlist.
879 while self.lexer.tok == LexTok::String {
880 if let Some(text) = self.lexer.tokstr.clone() {
881 out.push(text);
882 }
883 self.lexer.zshlex();
884 }
885 out
886 }
887
888 /// Parse a newline-separated wordlist. Direct port of
889 /// zsh/Src/parse.c:2379-2398 `par_nl_wordlist`. Like
890 /// par_wordlist but tolerates leading/trailing newlines.
891 pub fn par_nl_wordlist(&mut self) -> Vec<String> {
892 // parse.c:2380-2381 — skip leading newlines.
893 while self.lexer.tok == LexTok::Newlin {
894 self.lexer.zshlex();
895 }
896 let out = self.par_wordlist();
897 // parse.c:2395-2397 — skip trailing newlines.
898 while self.lexer.tok == LexTok::Newlin {
899 self.lexer.zshlex();
900 }
901 out
902 }
903
904 /// Get the integer value of the next token in a cond expression.
905 /// Direct port of zsh/Src/parse.c:2643-2658 `get_cond_num`.
906 /// Used for `[[ N OP M ]]` numeric tests where N/M are integer
907 /// literals or variable references.
908 pub fn get_cond_num(&mut self) -> Option<i64> {
909 if self.lexer.tok != LexTok::String {
910 return None;
911 }
912 let text = self.lexer.tokstr.as_ref()?.clone();
913 // parse.c:2647-2655 — parse as integer with optional sign.
914 let parsed = text.parse::<i64>().ok()?;
915 self.lexer.zshlex();
916 Some(parsed)
917 }
918
919 /// Emit a parser-level error. Direct port of zsh/Src/parse.c:
920 /// 2733-2766 `yyerror`. C version fills a per-event error buffer
921 /// + sets errflag. zshrs pushes onto self.errors which the
922 /// caller drains via parse()'s Result return.
923 pub fn yyerror(&mut self, msg: &str) {
924 // parse.c:2735-2765 — zsh's yyerror collects the offending
925 // token's literal text + line number. zshrs already does
926 // this via self.error() with the lexer's toklineno.
927 self.error(msg);
928 }
929
930 // ============================================================
931 // Wordcode emission stubs (parse.c private helpers)
932 //
933 // The following functions are direct counterparts of zsh's
934 // private wordcode-emission helpers in parse.c. zsh uses these
935 // to write u32 opcodes into a flat `ecbuf` array; zshrs builds
936 // an AST tree and never emits wordcode at the parse layer.
937 // The implementations are documented stubs that preserve the
938 // function signatures + cite the C source. Real wordcode would
939 // be emitted later by compile_zsh.rs walking the AST.
940 //
941 // Listed for port-surface completeness so every parse.c symbol
942 // has a Rust counterpart even when the algorithm is moot in the
943 // AST architecture.
944 // ============================================================
945
946 /// Patch a list-placeholder wordcode with its actual opcode +
947 /// jump distance. Direct port of zsh/Src/parse.c:736-749
948 /// `set_list_code`. zsh emits an `ecadd(0)` placeholder before
949 /// par_sublist runs, then comes back through set_list_code to
950 /// rewrite the slot with WCB_LIST(type, distance) once the
951 /// sublist's final length is known.
952 ///
953 /// zshrs port note: zshrs builds AST nodes inline so there's
954 /// no placeholder to patch. The ZshList { sublist, flags }
955 /// node is created with the right flags from the start.
956 /// Stub provided for port-surface completeness.
957 pub fn set_list_code(_p: usize, _type_code: i32, _cmplx: bool) {
958 // parse.c:740-748 — wordcode patching. zshrs no-op.
959 }
960
961 /// Patch a sublist-placeholder wordcode with its actual opcode.
962 /// Direct port of zsh/Src/parse.c:753-763 `set_sublist_code`.
963 /// Same role as set_list_code at the sublist level.
964 pub fn set_sublist_code(_p: usize, _type_code: i32, _flags: i32, _skip: i32, _cmplx: bool) {
965 // parse.c:757-762 — wordcode patching. zshrs no-op.
966 }
967
968 /// Add one wordcode opcode to the buffer. Direct port of
969 /// zsh/Src/parse.c:396-408 `ecadd`. Returns the index of the
970 /// new opcode. zshrs no-op since the AST is built inline.
971 pub fn ecadd(_c: u32) -> usize {
972 // parse.c:399-407 — append to ecbuf with grow-on-demand.
973 // zshrs no-op.
974 0
975 }
976
977 /// Delete a wordcode at position p. Direct port of
978 /// zsh/Src/parse.c:412-421 `ecdel`. zshrs no-op.
979 pub fn ecdel(_p: usize) {
980 // parse.c:415-420 — memmove + decrement ecused. zshrs no-op.
981 }
982
983 /// Encode a string into a wordcode value. Direct port of
984 /// zsh/Src/parse.c:425-471 `ecstrcode`. C source packs short
985 /// strings (≤4 chars) into a single wordcode + uses a binary
986 /// tree (Eccstr) for longer strings; long-string slots are
987 /// de-duplicated via hasher + strcmp. zshrs no-op since the
988 /// AST stores strings directly.
989 pub fn ecstrcode(_s: &str) -> u32 {
990 // parse.c:432-470 — the actual encoding logic. zshrs no-op.
991 0
992 }
993
994 /// Insert N empty wordcode slots at position p. Direct port of
995 /// zsh/Src/parse.c:371-388 `ecispace`. Used to reserve space
996 /// for a forward-jump opcode that will be patched once the
997 /// jump target is known. zshrs no-op since AST jumps are
998 /// resolved at compile_zsh time.
999 pub fn ecispace(_p: usize, _n: usize) {
1000 // parse.c:376-387 — grow + memmove + adjust hdocs. zshrs no-op.
1001 }
1002
1003 /// Adjust pending heredoc pointers when wordcodes shift. Direct
1004 /// port of zsh/Src/parse.c:359-367 `ecadjusthere`. Called
1005 /// internally by ecispace / ecdel after they shift the buffer.
1006 /// zshrs no-op since heredocs are tracked by index in the
1007 /// lexer's Vec, not by absolute wordcode offset.
1008 pub fn ecadjusthere(_p: usize, _d: i32) {
1009 // parse.c:362-366 — walk hdocs list, bump pc by d. zshrs no-op.
1010 }
1011
1012 // ============================================================
1013 // Eprog runtime ops (parse.c:2767-2853)
1014 //
1015 // dupeprog / useeprog / freeeprog are zsh's reference-counting
1016 // helpers for executable programs. zshrs's AST is owned by
1017 // value (Rust ownership); cloning is a tree-deep copy via
1018 // Clone, "use" is a no-op (the executor borrows the AST), and
1019 // "free" is automatic on drop.
1020 // ============================================================
1021
1022 /// Duplicate an Eprog. Direct port of zsh/Src/parse.c:2767-2812
1023 /// `dupeprog`. C version deep-copies the wordcode array + string
1024 /// table + pattern progs. zshrs uses Clone on the AST.
1025 pub fn dupeprog(prog: &ZshProgram) -> ZshProgram {
1026 prog.clone()
1027 }
1028
1029 /// Increment an Eprog's reference count. Direct port of
1030 /// zsh/Src/parse.c:2813-2822 `useeprog`. zshrs no-op (Rust
1031 /// ownership).
1032 pub fn useeprog(_prog: &ZshProgram) {
1033 // parse.c:2815-2821 — `prog->nref++` if not heap-allocated.
1034 // zshrs no-op.
1035 }
1036
1037 /// Decrement / free an Eprog. Direct port of
1038 /// zsh/Src/parse.c:2823-2854 `freeeprog`. zshrs no-op (drop on
1039 /// scope-exit).
1040 pub fn freeeprog(_prog: ZshProgram) {
1041 // parse.c:2825-2853 — decrement nref, free if zero. zshrs
1042 // drops via Rust ownership.
1043 }
1044
1045 // ============================================================
1046 // Wordcode runtime getters (parse.c:2853-3060)
1047 //
1048 // These read packed wordcode out of a running Eprog at execution
1049 // time. zshrs's executor walks the AST directly so these are
1050 // stubs that preserve the C signatures + cite the source.
1051 // ============================================================
1052
1053 /// Read a packed string from the wordcode stream. Direct port of
1054 /// zsh/Src/parse.c:2853-2887 `ecgetstr`. C version unpacks
1055 /// 4-char inline strings + indexes into the strs table for
1056 /// longer ones. zshrs no-op (AST stores strings directly).
1057 pub fn ecgetstr(_dup: bool) -> String {
1058 // parse.c:2858-2886 — wordcode unpack logic. zshrs no-op.
1059 String::new()
1060 }
1061
1062 /// Read a packed string without consuming the wordcode pointer.
1063 /// Direct port of zsh/Src/parse.c:2890-2913 `ecrawstr`. zshrs
1064 /// no-op.
1065 pub fn ecrawstr() -> String {
1066 String::new()
1067 }
1068
1069 /// Read a NUL-terminated string array from wordcode. Direct port
1070 /// of zsh/Src/parse.c:2916-2933 `ecgetarr`. zshrs no-op.
1071 pub fn ecgetarr(_num: usize, _dup: bool) -> Vec<String> {
1072 Vec::new()
1073 }
1074
1075 /// Read a linked-list of strings from wordcode. Direct port of
1076 /// zsh/Src/parse.c:2936-2955 `ecgetlist`. zshrs no-op.
1077 pub fn ecgetlist(_num: usize, _dup: bool) -> Vec<String> {
1078 Vec::new()
1079 }
1080
1081 /// Read a sequence of redirection wordcodes. Direct port of
1082 /// zsh/Src/parse.c:2958-2991 `ecgetredirs`. zshrs no-op
1083 /// (redirections live as AST ZshRedir nodes).
1084 pub fn ecgetredirs() -> Vec<ZshRedir> {
1085 Vec::new()
1086 }
1087
1088 /// Copy consecutive redirection wordcodes into a new Eprog.
1089 /// Direct port of zsh/Src/parse.c:3001-3060 `eccopyredirs`.
1090 /// zshrs no-op.
1091 pub fn eccopyredirs() -> Option<ZshProgram> {
1092 None
1093 }
1094
1095 /// Initialize the dummy Eprog used as a placeholder. Direct port
1096 /// of zsh/Src/parse.c:3068-3075 `init_eprog`. zshrs no-op since
1097 /// the AST has no equivalent dummy node — empty programs are
1098 /// just `ZshProgram { lists: vec![] }`.
1099 pub fn init_eprog() {
1100 // parse.c:3071-3074 — set up dummy_eprog_code = WCB_END().
1101 // zshrs no-op.
1102 }
1103
1104 /// Parse the complete input
1105 pub fn parse(&mut self) -> Result<ZshProgram, Vec<ParseError>> {
1106 self.lexer.zshlex();
1107
1108 let mut program = self.parse_program_until(None);
1109
1110 if !self.errors.is_empty() {
1111 return Err(std::mem::take(&mut self.errors));
1112 }
1113 // Surface lexer-level errors (unmatched quote/heredoc/etc.)
1114 // that the parser silently rolls past. zsh aborts with a
1115 // diagnostic in this case; mirror it.
1116 if let Some(msg) = self.lexer.error.clone() {
1117 return Err(vec![ParseError {
1118 message: msg,
1119 line: 1,
1120 }]);
1121 }
1122
1123 // Post-pass: wire heredoc bodies (collected by lexer.process_heredocs)
1124 // back into ZshRedir.heredoc fields via heredoc_idx.
1125 let bodies: Vec<HereDocInfo> = self
1126 .lexer
1127 .heredocs
1128 .iter()
1129 .map(|h| HereDocInfo {
1130 content: h.content.clone(),
1131 terminator: h.terminator.clone(),
1132 quoted: h.quoted,
1133 })
1134 .collect();
1135 if !bodies.is_empty() {
1136 fill_heredoc_bodies(&mut program, &bodies);
1137 }
1138
1139 Ok(program)
1140 }
1141
1142 /// Parse a program (list of lists)
1143 /// Parse a complete program (top-level entry). Calls
1144 /// parse_program_until with no end-token sentinel. Direct port of
1145 /// zsh/Src/parse.c:614-720 `parse_event` / `parse_list` /
1146 /// `par_event` flow. C distinguishes COND_EVENT (single command
1147 /// for here-string) from full event parse; zshrs's parse_program
1148 /// is the full-event entry.
1149 fn parse_program(&mut self) -> ZshProgram {
1150 self.parse_program_until(None)
1151 }
1152
1153 /// Parse a program until we hit an end token
1154 /// Parse a program until one of `end_tokens` is seen (or EOF).
1155 /// Drives parse_list in a loop. C equivalent: the body of par_event
1156 /// (parse.c:635-695) iterating par_list against the lexer.
1157 fn parse_program_until(&mut self, end_tokens: Option<&[LexTok]>) -> ZshProgram {
1158 let mut lists = Vec::new();
1159
1160 loop {
1161 if self.check_limit() {
1162 self.error("parser exceeded global iteration limit");
1163 break;
1164 }
1165
1166 // Skip separators
1167 while self.lexer.tok == LexTok::Seper || self.lexer.tok == LexTok::Newlin {
1168 if self.check_limit() {
1169 self.error("parser exceeded global iteration limit");
1170 return ZshProgram { lists };
1171 }
1172 self.lexer.zshlex();
1173 }
1174
1175 if self.lexer.tok == LexTok::Endinput || self.lexer.tok == LexTok::Lexerr {
1176 break;
1177 }
1178
1179 // Check for end tokens
1180 if let Some(end_toks) = end_tokens {
1181 if end_toks.contains(&self.lexer.tok) {
1182 break;
1183 }
1184 }
1185
1186 // Also stop at these tokens when not explicitly looking for them
1187 // Note: Else/Elif/Then are NOT here - they're handled by parse_if
1188 // to allow nested if statements inside case arms, loops, etc.
1189 match self.lexer.tok {
1190 LexTok::Outbrace
1191 | LexTok::Dsemi
1192 | LexTok::Semiamp
1193 | LexTok::Semibar
1194 | LexTok::Done
1195 | LexTok::Fi
1196 | LexTok::Esac
1197 | LexTok::Zend => break,
1198 _ => {}
1199 }
1200
1201 match self.parse_list() {
1202 Some(list) => {
1203 let detected = simple_name_with_inoutpar(&list);
1204 lists.push(list);
1205 // Synthesize a FuncDef for the `name() { body }` shape
1206 // at parse time so body_source is captured while the
1207 // lexer still has the input. The lexer port emits
1208 // `name(` as a single Word ending in `<INPAR><OUTPAR>`,
1209 // so the Simple list is followed by an Inbrace once
1210 // separators are skipped. For `name() cmd args` the
1211 // body has already been swallowed into the same
1212 // Simple's words tail — synthesize directly from there.
1213 if let Some((names, body_argv)) = detected {
1214 if !body_argv.is_empty() {
1215 // One-line body already in the Simple. Build
1216 // a Simple from body_argv as the function body.
1217 lists.pop();
1218 let body_simple = ZshCommand::Simple(ZshSimple {
1219 assigns: Vec::new(),
1220 words: body_argv,
1221 redirs: Vec::new(),
1222 });
1223 let body_list = ZshList {
1224 sublist: ZshSublist {
1225 pipe: ZshPipe {
1226 cmd: body_simple,
1227 next: None,
1228 lineno: self.lexer.lineno,
1229 merge_stderr: false,
1230 },
1231 next: None,
1232 flags: SublistFlags::default(),
1233 },
1234 flags: ListFlags::default(),
1235 };
1236 let funcdef = ZshCommand::FuncDef(ZshFuncDef {
1237 names,
1238 body: Box::new(ZshProgram {
1239 lists: vec![body_list],
1240 }),
1241 tracing: false,
1242 auto_call_args: None,
1243 body_source: None,
1244 });
1245 let synthetic = ZshList {
1246 sublist: ZshSublist {
1247 pipe: ZshPipe {
1248 cmd: funcdef,
1249 next: None,
1250 lineno: self.lexer.lineno,
1251 merge_stderr: false,
1252 },
1253 next: None,
1254 flags: SublistFlags::default(),
1255 },
1256 flags: ListFlags::default(),
1257 };
1258 lists.push(synthetic);
1259 continue;
1260 }
1261 // Else: words.len() == 1 (only the trailing `name()`
1262 // word), brace body follows. `names` may carry
1263 // multiple identifiers from the `fna fnb fnc()`
1264 // shorthand — all share the same brace body per
1265 // src/zsh/Src/parse.c:1666 par_funcdef wordlist.
1266 // Skip separators on the real lexer; safe because
1267 // parse_program's next iteration would also skip them.
1268 while self.lexer.tok == LexTok::Seper || self.lexer.tok == LexTok::Newlin {
1269 self.lexer.zshlex();
1270 }
1271 if self.lexer.tok == LexTok::Inbrace {
1272 // Capture body_start BEFORE the lexer
1273 // advances past the first body token. The
1274 // outer zshlex() consumed `{`; lexer.pos
1275 // is now right after `{`. The next
1276 // `zshlex()` would advance past `echo`,
1277 // making body_start land mid-body and
1278 // lose the first word — `typeset -f f`
1279 // printed `a; echo b` instead of
1280 // `echo a; echo b` for `f() { echo a;
1281 // echo b }`.
1282 let body_start = self.lexer.pos;
1283 self.lexer.zshlex();
1284 let body = self.parse_program();
1285 let body_end = if self.lexer.tok == LexTok::Outbrace {
1286 self.lexer.pos.saturating_sub(1)
1287 } else {
1288 self.lexer.pos
1289 };
1290 let body_source = self
1291 .lexer
1292 .input
1293 .get(body_start..body_end)
1294 .map(|s| s.trim().to_string())
1295 .filter(|s| !s.is_empty());
1296 if self.lexer.tok == LexTok::Outbrace {
1297 self.lexer.zshlex();
1298 }
1299 // Replace the Simple list with a FuncDef list.
1300 lists.pop();
1301 let funcdef = ZshCommand::FuncDef(ZshFuncDef {
1302 names,
1303 body: Box::new(body),
1304 tracing: false,
1305 auto_call_args: None,
1306 body_source,
1307 });
1308 let synthetic = ZshList {
1309 sublist: ZshSublist {
1310 pipe: ZshPipe {
1311 cmd: funcdef,
1312 next: None,
1313 lineno: self.lexer.lineno,
1314 merge_stderr: false,
1315 },
1316 next: None,
1317 flags: SublistFlags::default(),
1318 },
1319 flags: ListFlags::default(),
1320 };
1321 lists.push(synthetic);
1322 } else if !matches!(
1323 self.lexer.tok,
1324 LexTok::Endinput | LexTok::Outbrace | LexTok::Seper | LexTok::Newlin
1325 ) {
1326 // No-brace one-line body: `foo() echo hello`.
1327 // Parse a single command for the body.
1328 let body_cmd = self.parse_cmd();
1329 if let Some(cmd) = body_cmd {
1330 let body_list = ZshList {
1331 sublist: ZshSublist {
1332 pipe: ZshPipe {
1333 cmd,
1334 next: None,
1335 lineno: self.lexer.lineno,
1336 merge_stderr: false,
1337 },
1338 next: None,
1339 flags: SublistFlags::default(),
1340 },
1341 flags: ListFlags::default(),
1342 };
1343 lists.pop();
1344 let funcdef = ZshCommand::FuncDef(ZshFuncDef {
1345 names: names.clone(),
1346 body: Box::new(ZshProgram {
1347 lists: vec![body_list],
1348 }),
1349 tracing: false,
1350 auto_call_args: None,
1351 body_source: None,
1352 });
1353 let synthetic = ZshList {
1354 sublist: ZshSublist {
1355 pipe: ZshPipe {
1356 cmd: funcdef,
1357 next: None,
1358 lineno: self.lexer.lineno,
1359 merge_stderr: false,
1360 },
1361 next: None,
1362 flags: SublistFlags::default(),
1363 },
1364 flags: ListFlags::default(),
1365 };
1366 lists.push(synthetic);
1367 }
1368 }
1369 }
1370 }
1371 None => break,
1372 }
1373 }
1374
1375 ZshProgram { lists }
1376 }
1377
1378 /// Parse a list (sublist with optional & or ;).
1379 ///
1380 /// Direct port of zsh/Src/parse.c:771-804 `par_list` (and the
1381 /// par_list1 wrapper at parse.c:807-817).
1382 ///
1383 /// **Structural divergence**: zsh's parse.c emits flat wordcode
1384 /// into the `ecbuf` u32 array via `ecadd(0)` (placeholder),
1385 /// `set_list_code(p, code, complexity)`, `wc_bdata(Z_END)`. zshrs
1386 /// builds an AST node `ZshList { sublist, flags }` instead. The
1387 /// async/sync/disown discrimination at parse.c:785-790 maps to
1388 /// zshrs's `ListFlags { async_, disown }` field — Z_SYNC is the
1389 /// default (no flags), Z_ASYNC = `&` = `async_=true`, Z_DISOWN +
1390 /// Z_ASYNC = `&!`/`&|` = both true. Same semantics, different
1391 /// representation. This divergence is repository-wide: every
1392 /// `par_*` function emits wordcode in C, every `parse_*` builds
1393 /// AST in Rust. The compile_zsh module then traverses the AST to
1394 /// emit fusevm bytecode, which serves the same role as zsh's
1395 /// wordcode but with a different opcode set and execution model.
1396 fn parse_list(&mut self) -> Option<ZshList> {
1397 let sublist = self.parse_sublist()?;
1398
1399 let flags = match self.lexer.tok {
1400 LexTok::Amper => {
1401 self.lexer.zshlex();
1402 ListFlags {
1403 async_: true,
1404 disown: false,
1405 }
1406 }
1407 LexTok::Amperbang => {
1408 self.lexer.zshlex();
1409 ListFlags {
1410 async_: true,
1411 disown: true,
1412 }
1413 }
1414 LexTok::Seper | LexTok::Semi | LexTok::Newlin => {
1415 self.lexer.zshlex();
1416 ListFlags::default()
1417 }
1418 _ => ListFlags::default(),
1419 };
1420
1421 Some(ZshList { sublist, flags })
1422 }
1423
1424 /// Parse a sublist (pipelines connected by && or ||).
1425 ///
1426 /// Direct port of zsh/Src/parse.c:825-867 `par_sublist` and
1427 /// par_sublist2 at parse.c:869-892. par_sublist handles the
1428 /// && / || conjunction and emits WC_SUBLIST opcodes; par_sublist2
1429 /// handles the leading `!` negation and `coproc` keyword.
1430 ///
1431 /// AST mapping: ZshSublist { pipe, conj_chain }, where `conj_chain`
1432 /// is a Vec<(ConjOp, ZshSublist)> for chained && / ||. C uses
1433 /// flat wordcode with WC_SUBLIST_AND / WC_SUBLIST_OR markers.
1434 fn parse_sublist(&mut self) -> Option<ZshSublist> {
1435 self.recursion_depth += 1;
1436 if self.check_recursion() {
1437 self.error("parse_sublist: max recursion depth exceeded");
1438 self.recursion_depth -= 1;
1439 return None;
1440 }
1441
1442 let mut flags = SublistFlags::default();
1443
1444 // Handle coproc and !
1445 if self.lexer.tok == LexTok::Coproc {
1446 flags.coproc = true;
1447 self.lexer.zshlex();
1448 } else if self.lexer.tok == LexTok::Bang {
1449 flags.not = true;
1450 self.lexer.zshlex();
1451 }
1452
1453 let pipe = match self.parse_pipe() {
1454 Some(p) => p,
1455 None => {
1456 self.recursion_depth -= 1;
1457 return None;
1458 }
1459 };
1460
1461 // Check for && or ||
1462 let next = match self.lexer.tok {
1463 LexTok::Damper => {
1464 self.lexer.zshlex();
1465 self.skip_separators();
1466 self.parse_sublist().map(|s| (SublistOp::And, Box::new(s)))
1467 }
1468 LexTok::Dbar => {
1469 self.lexer.zshlex();
1470 self.skip_separators();
1471 self.parse_sublist().map(|s| (SublistOp::Or, Box::new(s)))
1472 }
1473 _ => None,
1474 };
1475
1476 self.recursion_depth -= 1;
1477 Some(ZshSublist { pipe, next, flags })
1478 }
1479
1480 /// Parse a pipeline
1481 /// Parse a pipeline (cmds joined by `|` / `|&`). Direct port of
1482 /// zsh/Src/parse.c:894-956 `par_pline`. AST: ZshPipe { cmds: Vec<ZshCommand> }.
1483 /// C emits WC_PIPE wordcodes per command; same flow.
1484 fn parse_pipe(&mut self) -> Option<ZshPipe> {
1485 self.recursion_depth += 1;
1486 if self.check_recursion() {
1487 self.error("parse_pipe: max recursion depth exceeded");
1488 self.recursion_depth -= 1;
1489 return None;
1490 }
1491
1492 let lineno = self.lexer.toklineno;
1493 let cmd = match self.parse_cmd() {
1494 Some(c) => c,
1495 None => {
1496 self.recursion_depth -= 1;
1497 return None;
1498 }
1499 };
1500
1501 // Check for | or |&
1502 let mut merge_stderr = false;
1503 let next = match self.lexer.tok {
1504 LexTok::Bar | LexTok::Baramp => {
1505 merge_stderr = self.lexer.tok == LexTok::Baramp;
1506 self.lexer.zshlex();
1507 self.skip_separators();
1508 self.parse_pipe().map(Box::new)
1509 }
1510 _ => None,
1511 };
1512
1513 self.recursion_depth -= 1;
1514 Some(ZshPipe {
1515 cmd,
1516 next,
1517 lineno,
1518 merge_stderr,
1519 })
1520 }
1521
1522 /// Parse a command
1523 /// Parse a command — dispatches by leading token (FOR / CASE /
1524 /// IF / WHILE / UNTIL / REPEAT / FUNC / DINBRACK / DINPAR /
1525 /// INPAR subshell / INBRACE current-shell / TIME / NOCORRECT,
1526 /// else simple). Direct port of zsh/Src/parse.c:958-1085 `par_cmd`.
1527 fn parse_cmd(&mut self) -> Option<ZshCommand> {
1528 // Parse leading redirections
1529 let mut redirs = Vec::new();
1530 while self.lexer.tok.is_redirop() {
1531 if let Some(redir) = self.parse_redir() {
1532 redirs.push(redir);
1533 }
1534 }
1535
1536 let cmd = match self.lexer.tok {
1537 LexTok::For | LexTok::Foreach => self.parse_for(),
1538 LexTok::Select => self.parse_select(),
1539 LexTok::Case => self.parse_case(),
1540 LexTok::If => self.parse_if(),
1541 LexTok::While => self.parse_while(false),
1542 LexTok::Until => self.parse_while(true),
1543 LexTok::Repeat => self.parse_repeat(),
1544 LexTok::Inpar => self.parse_subsh(),
1545 LexTok::Inoutpar => self.parse_anon_funcdef(),
1546 LexTok::Inbrace => self.parse_cursh(),
1547 LexTok::Func => self.parse_funcdef(),
1548 LexTok::Dinbrack => self.parse_cond(),
1549 LexTok::Dinpar => self.parse_arith(),
1550 LexTok::Time => self.parse_time(),
1551 _ => self.parse_simple(redirs),
1552 };
1553
1554 // Parse trailing redirections. For Simple commands the redirs were
1555 // already captured inside parse_simple; for compound forms (Cursh,
1556 // Subsh, If, While, etc.) we collect them here and wrap in
1557 // ZshCommand::Redirected so compile_zsh can scope-bracket them.
1558 if let Some(inner) = cmd {
1559 let mut trailing: Vec<ZshRedir> = Vec::new();
1560 while self.lexer.tok.is_redirop() {
1561 if let Some(redir) = self.parse_redir() {
1562 trailing.push(redir);
1563 }
1564 }
1565 if trailing.is_empty() {
1566 return Some(inner);
1567 }
1568 // Simple already absorbed its own redirs (compile path expects
1569 // them on ZshSimple), so don't double-wrap.
1570 if matches!(inner, ZshCommand::Simple(_)) {
1571 if let ZshCommand::Simple(mut s) = inner {
1572 s.redirs.extend(trailing);
1573 return Some(ZshCommand::Simple(s));
1574 }
1575 unreachable!()
1576 }
1577 return Some(ZshCommand::Redirected(Box::new(inner), trailing));
1578 }
1579
1580 None
1581 }
1582
1583 /// Parse a simple command
1584 /// Parse a simple command (assignments + words + redirections).
1585 /// Direct port of zsh/Src/parse.c:1836-2228 `par_simple` —
1586 /// the largest single function in parse.c. Handles ENVSTRING/
1587 /// ENVARRAY assignments at command head, intermixed redirs,
1588 /// typeset-style multi-assignment commands, and the trailing
1589 /// inout-par `()` that converts a simple command into an inline
1590 /// function definition.
1591 fn parse_simple(&mut self, mut redirs: Vec<ZshRedir>) -> Option<ZshCommand> {
1592 let mut assigns = Vec::new();
1593 let mut words = Vec::new();
1594 const MAX_ITERATIONS: usize = 10_000;
1595 let mut iterations = 0;
1596
1597 // Parse leading assignments
1598 while self.lexer.tok == LexTok::Envstring || self.lexer.tok == LexTok::Envarray {
1599 iterations += 1;
1600 if iterations > MAX_ITERATIONS {
1601 self.error("parse_simple: exceeded max iterations in assignments");
1602 return None;
1603 }
1604 if let Some(assign) = self.parse_assign() {
1605 assigns.push(assign);
1606 }
1607 self.lexer.zshlex();
1608 }
1609
1610 // Parse words and redirections
1611 loop {
1612 iterations += 1;
1613 if iterations > MAX_ITERATIONS {
1614 self.error("parse_simple: exceeded max iterations");
1615 return None;
1616 }
1617 match self.lexer.tok {
1618 LexTok::String | LexTok::Typeset => {
1619 let s = self.lexer.tokstr.clone();
1620 if let Some(s) = s {
1621 words.push(s);
1622 }
1623 self.lexer.zshlex();
1624 // Check for function definition foo() { ... }
1625 if words.len() == 1 && self.peek_inoutpar() {
1626 return self.parse_inline_funcdef(words.pop().unwrap());
1627 }
1628 // `{name}>file` named-fd redirect: the lexer doesn't
1629 // recognize this shape, so the bare word `{name}`
1630 // arrives as a String. If it matches `{IDENT}` and
1631 // the NEXT token is a redirop, pop it off as the
1632 // varid for that redir.
1633 if !words.is_empty() && self.lexer.tok.is_redirop() {
1634 let last = words.last().unwrap();
1635 let untoked = crate::lexer::untokenize(last);
1636 if untoked.starts_with('{') && untoked.ends_with('}') && untoked.len() > 2 {
1637 let name = &untoked[1..untoked.len() - 1];
1638 if !name.is_empty()
1639 && name.chars().all(|c| c == '_' || c.is_ascii_alphanumeric())
1640 && name
1641 .chars()
1642 .next()
1643 .map(|c| c == '_' || c.is_ascii_alphabetic())
1644 .unwrap_or(false)
1645 {
1646 let varid = name.to_string();
1647 words.pop();
1648 if let Some(mut redir) = self.parse_redir() {
1649 redir.varid = Some(varid);
1650 redirs.push(redir);
1651 }
1652 continue;
1653 }
1654 }
1655 }
1656 }
1657 _ if self.lexer.tok.is_redirop() => {
1658 match self.parse_redir() {
1659 Some(redir) => redirs.push(redir),
1660 None => break, // Error in redir parsing, stop
1661 }
1662 }
1663 LexTok::Inoutpar if !words.is_empty() => {
1664 // foo() { ... } style function
1665 return self.parse_inline_funcdef(words.pop().unwrap());
1666 }
1667 _ => break,
1668 }
1669 }
1670
1671 if assigns.is_empty() && words.is_empty() && redirs.is_empty() {
1672 return None;
1673 }
1674
1675 Some(ZshCommand::Simple(ZshSimple {
1676 assigns,
1677 words,
1678 redirs,
1679 }))
1680 }
1681
1682 /// Parse an assignment
1683 /// Parse an assignment word `NAME=value` or `NAME=(arr items)`.
1684 /// Sub-routine of parse_simple. The C source handles assignments
1685 /// inline in par_simple via the ENVSTRING/ENVARRAY token paths
1686 /// (parse.c:1842-2000ish); zshrs splits it out to a dedicated
1687 /// helper for clarity.
1688 fn parse_assign(&mut self) -> Option<ZshAssign> {
1689 use crate::tokens::char_tokens;
1690
1691 let tokstr = self.lexer.tokstr.as_ref()?;
1692
1693 // Parse name=value or name+=value
1694 // The '=' is encoded as char_tokens::EQUALS in the token string
1695 let (name, value_str, append) = if let Some(pos) = tokstr.find(char_tokens::EQUALS) {
1696 let name_part = &tokstr[..pos];
1697 let (name, append) = if name_part.ends_with('+') {
1698 (&name_part[..name_part.len() - 1], true)
1699 } else {
1700 (name_part, false)
1701 };
1702 (
1703 name.to_string(),
1704 tokstr[pos + char_tokens::EQUALS.len_utf8()..].to_string(),
1705 append,
1706 )
1707 } else if let Some(pos) = tokstr.find('=') {
1708 // Fallback to literal '=' for compatibility
1709 let name_part = &tokstr[..pos];
1710 let (name, append) = if name_part.ends_with('+') {
1711 (&name_part[..name_part.len() - 1], true)
1712 } else {
1713 (name_part, false)
1714 };
1715 (name.to_string(), tokstr[pos + 1..].to_string(), append)
1716 } else {
1717 return None;
1718 };
1719
1720 let value = if self.lexer.tok == LexTok::Envarray {
1721 // Array assignment: name=(...)
1722 let mut elements = Vec::new();
1723 self.lexer.zshlex(); // skip past token
1724
1725 let mut arr_iters = 0;
1726 const MAX_ARRAY_ELEMENTS: usize = 10_000;
1727 while matches!(
1728 self.lexer.tok,
1729 LexTok::String | LexTok::Seper | LexTok::Newlin
1730 ) {
1731 arr_iters += 1;
1732 if arr_iters > MAX_ARRAY_ELEMENTS {
1733 self.error("array assignment exceeded maximum elements");
1734 break;
1735 }
1736 if self.lexer.tok == LexTok::String {
1737 if let Some(ref s) = self.lexer.tokstr {
1738 elements.push(s.clone());
1739 }
1740 }
1741 self.lexer.zshlex();
1742 }
1743
1744 // The closing OUTPAR is consumed here. The outer parse_simple
1745 // loop will then `zshlex()` past whatever follows (typically
1746 // a separator or the next word) — calling zshlex twice in
1747 // tandem (here AND in parse_simple) over-advances and merges
1748 // a following `name() { … }` funcdef into the same Simple.
1749 // We only consume Outpar; let the caller handle the rest.
1750 // Without this guard `g=(o1); f() { :; }` parsed as one
1751 // Simple with assigns=[g] and words=["f()"] (one token).
1752 if self.lexer.tok == LexTok::Outpar {
1753 // Note: do NOT zshlex() here. parse_simple's `self.lexer
1754 // .zshlex()` after `parse_assign` returns advances past
1755 // the Outpar onto the next significant token.
1756 //
1757 // Force `incmdpos=true` so the next zshlex() recognizes
1758 // a follow-up `b=(...)` / `b=val` as Envarray/Envstring.
1759 // The lexer flips incmdpos to false on bare Outpar (which
1760 // is correct for subshell-close context), but for an
1761 // array-assignment close more assigns/words may follow.
1762 self.lexer.incmdpos = true;
1763 }
1764
1765 ZshAssignValue::Array(elements)
1766 } else {
1767 ZshAssignValue::Scalar(value_str)
1768 };
1769
1770 Some(ZshAssign {
1771 name,
1772 value,
1773 append,
1774 })
1775 }
1776
1777 /// Parse a redirection
1778 /// Parse a redirection (>file, <file, >>file, <<HEREDOC, etc.).
1779 /// Direct port of zsh/Src/parse.c:2229-2346 `par_redir`. Returns
1780 /// a ZshRedir node carrying the operator type, fd, target word
1781 /// (or here-doc body / pipe-redir command), and any `{var}` style
1782 /// fd-binding parameter.
1783 fn parse_redir(&mut self) -> Option<ZshRedir> {
1784 let rtype = match self.lexer.tok {
1785 LexTok::Outang => RedirType::Write,
1786 LexTok::Outangbang => RedirType::Writenow,
1787 LexTok::Doutang => RedirType::Append,
1788 LexTok::Doutangbang => RedirType::Appendnow,
1789 LexTok::Inang => RedirType::Read,
1790 LexTok::Inoutang => RedirType::ReadWrite,
1791 LexTok::Dinang => RedirType::Heredoc,
1792 LexTok::Dinangdash => RedirType::HeredocDash,
1793 LexTok::Trinang => RedirType::Herestr,
1794 LexTok::Inangamp => RedirType::MergeIn,
1795 LexTok::Outangamp => RedirType::MergeOut,
1796 LexTok::Ampoutang => RedirType::ErrWrite,
1797 LexTok::Outangampbang => RedirType::ErrWritenow,
1798 LexTok::Doutangamp => RedirType::ErrAppend,
1799 LexTok::Doutangampbang => RedirType::ErrAppendnow,
1800 _ => return None,
1801 };
1802
1803 let fd = if self.lexer.tokfd >= 0 {
1804 self.lexer.tokfd
1805 } else if matches!(
1806 rtype,
1807 RedirType::Read
1808 | RedirType::ReadWrite
1809 | RedirType::MergeIn
1810 | RedirType::Heredoc
1811 | RedirType::HeredocDash
1812 | RedirType::Herestr
1813 ) {
1814 0
1815 } else {
1816 1
1817 };
1818
1819 self.lexer.zshlex();
1820
1821 let name = match self.lexer.tok {
1822 LexTok::String | LexTok::Envstring => {
1823 let n = self.lexer.tokstr.clone().unwrap_or_default();
1824 self.lexer.zshlex();
1825 n
1826 }
1827 _ => {
1828 self.error("expected word after redirection");
1829 return None;
1830 }
1831 };
1832
1833 // Heredoc body capture: when reading the terminator above, the
1834 // lexer pushed a HereDoc to self.lexer.heredocs[]. Record the
1835 // index so fill_heredoc_bodies() can wire content back after
1836 // process_heredocs() has run.
1837 let heredoc_idx = if matches!(rtype, RedirType::Heredoc | RedirType::HeredocDash) {
1838 if !self.lexer.heredocs.is_empty() {
1839 Some(self.lexer.heredocs.len() - 1)
1840 } else {
1841 None
1842 }
1843 } else {
1844 None
1845 };
1846
1847 Some(ZshRedir {
1848 rtype,
1849 fd,
1850 name,
1851 heredoc: None,
1852 varid: None,
1853 heredoc_idx,
1854 })
1855 }
1856
1857 /// Parse for/foreach loop
1858 /// Parse `for NAME in WORDS; do BODY; done` (foreach style) AND
1859 /// `for ((init; cond; incr)) do BODY done` (c-style). Direct port
1860 /// of zsh/Src/parse.c:1087-1207 `par_for`. parse_for_cstyle is the
1861 /// inner branch for the `((...))` arithmetic-header variant
1862 /// (parse.c:1100-1140 inside par_for).
1863 fn parse_for(&mut self) -> Option<ZshCommand> {
1864 let is_foreach = self.lexer.tok == LexTok::Foreach;
1865 self.lexer.zshlex();
1866
1867 // Check for C-style: for (( init; cond; step ))
1868 if self.lexer.tok == LexTok::Dinpar {
1869 return self.parse_for_cstyle();
1870 }
1871
1872 // Get variable name(s). zsh parse.c par_for accepts multiple
1873 // identifier tokens before `in`/`(`/newline — `for k v in ...`
1874 // assigns each iteration's pair of values to k and v in turn.
1875 // We store the names space-joined since variable identifiers
1876 // can't contain whitespace.
1877 let mut names: Vec<String> = Vec::new();
1878 loop {
1879 match self.lexer.tok {
1880 LexTok::String => {
1881 let v = self.lexer.tokstr.clone().unwrap_or_default();
1882 if v == "in" {
1883 break;
1884 }
1885 names.push(v);
1886 self.lexer.zshlex();
1887 }
1888 _ => break,
1889 }
1890 }
1891 if names.is_empty() {
1892 self.error("expected variable name in for");
1893 return None;
1894 }
1895 let var = names.join(" ");
1896
1897 // Skip newlines
1898 self.skip_separators();
1899
1900 // Get list. The lexer-port quirk: `for x (a b c)` arrives as a
1901 // single String token with the parens lexed-as-content
1902 // (`<INPAR>a b c<OUTPAR>`) instead of as separate Inpar/String/
1903 // Outpar tokens. Detect that shape and split it manually.
1904 let list = if self.lexer.tok == LexTok::String
1905 && self
1906 .lexer
1907 .tokstr
1908 .as_ref()
1909 .map(|s| s.starts_with('\u{88}') && s.ends_with('\u{8a}'))
1910 .unwrap_or(false)
1911 {
1912 let raw = self.lexer.tokstr.clone().unwrap_or_default();
1913 // Strip leading INPAR + trailing OUTPAR, then untokenize the
1914 // inner content and split on whitespace for the word list.
1915 let inner = &raw[raw.char_indices().nth(1).map(|(i, _)| i).unwrap_or(0)
1916 ..raw
1917 .char_indices()
1918 .last()
1919 .map(|(i, _)| i)
1920 .unwrap_or(raw.len())];
1921 let cleaned = crate::lexer::untokenize(inner);
1922 let words: Vec<String> = cleaned.split_whitespace().map(|s| s.to_string()).collect();
1923 self.lexer.zshlex();
1924 ForList::Words(words)
1925 } else if self.lexer.tok == LexTok::String {
1926 let s = self.lexer.tokstr.as_ref();
1927 if s.map(|s| s == "in").unwrap_or(false) {
1928 self.lexer.zshlex();
1929 let mut words = Vec::new();
1930 let mut word_count = 0;
1931 while self.lexer.tok == LexTok::String {
1932 word_count += 1;
1933 if word_count > 500 || self.check_limit() {
1934 self.error("for: too many words");
1935 return None;
1936 }
1937 if let Some(ref s) = self.lexer.tokstr {
1938 words.push(s.clone());
1939 }
1940 self.lexer.zshlex();
1941 }
1942 ForList::Words(words)
1943 } else {
1944 ForList::Positional
1945 }
1946 } else if self.lexer.tok == LexTok::Inpar {
1947 // for var (...)
1948 self.lexer.zshlex();
1949 let mut words = Vec::new();
1950 let mut word_count = 0;
1951 while self.lexer.tok == LexTok::String || self.lexer.tok == LexTok::Seper {
1952 word_count += 1;
1953 if word_count > 500 || self.check_limit() {
1954 self.error("for: too many words in parens");
1955 return None;
1956 }
1957 if self.lexer.tok == LexTok::String {
1958 if let Some(ref s) = self.lexer.tokstr {
1959 words.push(s.clone());
1960 }
1961 }
1962 self.lexer.zshlex();
1963 }
1964 if self.lexer.tok == LexTok::Outpar {
1965 self.lexer.zshlex();
1966 }
1967 ForList::Words(words)
1968 } else {
1969 ForList::Positional
1970 };
1971
1972 // Skip to body
1973 self.skip_separators();
1974
1975 // Parse body
1976 let body = self.parse_loop_body(is_foreach)?;
1977
1978 Some(ZshCommand::For(ZshFor {
1979 var,
1980 list,
1981 body: Box::new(body),
1982 is_select: false,
1983 }))
1984 }
1985
1986 /// Parse C-style for loop: for (( init; cond; step ))
1987 /// Parse the c-style `for ((init; cond; incr)) do BODY done`.
1988 /// Inner branch of zsh/Src/parse.c:1100-1140 inside par_for.
1989 /// Recognized when the token after FOR is DINPAR (the `((`
1990 /// detected by gettok via dbparens setup).
1991 fn parse_for_cstyle(&mut self) -> Option<ZshCommand> {
1992 // We're at (( (Dinpar None) - the opening ((
1993 // Lexer returns:
1994 // Dinpar None - opening ((
1995 // Dinpar "init" - init expression, semicolon consumed
1996 // Dinpar "cond" - cond expression, semicolon consumed
1997 // Doutpar "step" - step expression, closing )) consumed
1998
1999 self.lexer.zshlex(); // Get init: Dinpar "i=0"
2000
2001 if self.lexer.tok != LexTok::Dinpar {
2002 self.error("expected init expression in for ((");
2003 return None;
2004 }
2005 let init = self.lexer.tokstr.clone().unwrap_or_default();
2006
2007 self.lexer.zshlex(); // Get cond: Dinpar "i<10"
2008
2009 if self.lexer.tok != LexTok::Dinpar {
2010 self.error("expected condition in for ((");
2011 return None;
2012 }
2013 let cond = self.lexer.tokstr.clone().unwrap_or_default();
2014
2015 self.lexer.zshlex(); // Get step: Doutpar "i++"
2016
2017 if self.lexer.tok != LexTok::Doutpar {
2018 self.error("expected )) in for");
2019 return None;
2020 }
2021 let step = self.lexer.tokstr.clone().unwrap_or_default();
2022
2023 self.lexer.zshlex(); // Move past ))
2024
2025 self.skip_separators();
2026 let body = self.parse_loop_body(false)?;
2027
2028 Some(ZshCommand::For(ZshFor {
2029 var: String::new(),
2030 list: ForList::CStyle { init, cond, step },
2031 body: Box::new(body),
2032 is_select: false,
2033 }))
2034 }
2035
2036 /// Parse select loop (same syntax as for)
2037 /// Parse `select NAME in WORDS; do BODY; done`. Same shape as
2038 /// `for NAME in WORDS; do ...` but with menu-prompt semantics in
2039 /// the executor. C equivalent: the SELECT case in par_for at
2040 /// parse.c:1087-1207 (selects share parser flow with foreach).
2041 fn parse_select(&mut self) -> Option<ZshCommand> {
2042 // `select` shares parse_for's grammar (var, words, body) but the
2043 // compile path is different (interactive prompt loop).
2044 match self.parse_for()? {
2045 ZshCommand::For(mut f) => {
2046 f.is_select = true;
2047 Some(ZshCommand::For(f))
2048 }
2049 other => Some(other),
2050 }
2051 }
2052
2053 /// Parse case statement
2054 /// Parse `case WORD in PATTERN) BODY ;; ... esac`. Direct port
2055 /// of zsh/Src/parse.c:1209-1409 `par_case`. Each case arm is a
2056 /// (pattern_list, body, terminator) tuple where terminator is
2057 /// `;;` (default), `;&` (fallthrough), or `;|` (continue testing).
2058 fn parse_case(&mut self) -> Option<ZshCommand> {
2059 self.lexer.zshlex(); // skip 'case'
2060
2061 let word = match self.lexer.tok {
2062 LexTok::String => {
2063 let w = self.lexer.tokstr.clone().unwrap_or_default();
2064 self.lexer.zshlex();
2065 w
2066 }
2067 _ => {
2068 self.error("expected word after case");
2069 return None;
2070 }
2071 };
2072
2073 self.skip_separators();
2074
2075 // Expect 'in' or {
2076 let use_brace = self.lexer.tok == LexTok::Inbrace;
2077 if self.lexer.tok == LexTok::String {
2078 let s = self.lexer.tokstr.as_ref();
2079 if s.map(|s| s != "in").unwrap_or(true) {
2080 self.error("expected 'in' in case");
2081 return None;
2082 }
2083 } else if !use_brace {
2084 self.error("expected 'in' or '{' in case");
2085 return None;
2086 }
2087 // Set incasepat=1 BEFORE consuming "in" so the next token (which
2088 // could be a leading `(` of a paren-prefixed pattern like
2089 // `case foo in (a|b) …`) is lexed as Inpar, not as a glob-token.
2090 // Without this the `(` got swallowed into a gettokstr('(', false)
2091 // call and produced a String like "(foo)" — the parser then saw
2092 // the `)` inside a string instead of as a separate Outpar.
2093 self.lexer.incasepat = 1;
2094 self.lexer.zshlex();
2095
2096 let mut arms = Vec::new();
2097 const MAX_ARMS: usize = 10_000;
2098
2099 loop {
2100 if arms.len() > MAX_ARMS {
2101 self.error("parse_case: too many arms");
2102 break;
2103 }
2104
2105 // Set incasepat BEFORE skipping separators so lexer knows we're in case pattern context
2106 // This affects how [ and | are lexed
2107 self.lexer.incasepat = 1;
2108
2109 self.skip_separators();
2110
2111 // Check for end
2112 // Note: 'esac' might be String "esac" if incasepat > 0 prevents reserved word recognition
2113 let is_esac = self.lexer.tok == LexTok::Esac
2114 || (self.lexer.tok == LexTok::String
2115 && self
2116 .lexer
2117 .tokstr
2118 .as_ref()
2119 .map(|s| s == "esac")
2120 .unwrap_or(false));
2121 if (use_brace && self.lexer.tok == LexTok::Outbrace) || (!use_brace && is_esac) {
2122 self.lexer.incasepat = 0;
2123 self.lexer.zshlex();
2124 break;
2125 }
2126
2127 // Also break on EOF
2128 if self.lexer.tok == LexTok::Endinput || self.lexer.tok == LexTok::Lexerr {
2129 self.lexer.incasepat = 0;
2130 break;
2131 }
2132
2133 // Skip optional `(`. zsh's case grammar: `case W in (P)…)`.
2134 // The leading `(` is paired with a matching `)` that closes
2135 // the pattern itself; the arm-close `)` follows separately.
2136 // Track whether we consumed it so we can skip the matching
2137 // `)` after pattern parsing — otherwise the arm-close would
2138 // be interpreted as the pattern-close and the actual body
2139 // would get the leftover `)`.
2140 let had_leading_paren = self.lexer.tok == LexTok::Inpar;
2141 if had_leading_paren {
2142 self.lexer.zshlex();
2143 }
2144
2145 // incasepat is already set above
2146 let mut patterns = Vec::new();
2147 let mut pattern_iterations = 0;
2148 loop {
2149 pattern_iterations += 1;
2150 if pattern_iterations > 1000 {
2151 self.error("parse_case: too many pattern iterations");
2152 self.lexer.incasepat = 0;
2153 return None;
2154 }
2155
2156 if self.lexer.tok == LexTok::String {
2157 let s = self.lexer.tokstr.as_ref();
2158 if s.map(|s| s == "esac").unwrap_or(false) {
2159 break;
2160 }
2161 patterns.push(self.lexer.tokstr.clone().unwrap_or_default());
2162 // After first pattern token, set incasepat=2 so ( is treated as part of pattern
2163 self.lexer.incasepat = 2;
2164 self.lexer.zshlex();
2165 } else if self.lexer.tok != LexTok::Bar {
2166 break;
2167 }
2168
2169 if self.lexer.tok == LexTok::Bar {
2170 // Reset to 1 (start of next alternative pattern)
2171 self.lexer.incasepat = 1;
2172 self.lexer.zshlex();
2173 } else {
2174 break;
2175 }
2176 }
2177 self.lexer.incasepat = 0;
2178
2179 // Expect ). Also handle the `(P))` wrapped-pattern form:
2180 // when a leading `(` was consumed, accept an extra `)` —
2181 // the inner `)` closes the optional-paren wrapper, the
2182 // outer `)` is the arm-close. zsh accepts BOTH `(P) BODY`
2183 // (bare pattern, leading-paren is just the opt-marker, the
2184 // close is arm-close) and `(P)) BODY` (paren-wrapped
2185 // pattern, then arm-close). The first form is unambiguous
2186 // when the bare pattern was simple; the second is needed
2187 // when the body starts with `(`.
2188 if self.lexer.tok != LexTok::Outpar {
2189 self.error("expected ')' in case pattern");
2190 return None;
2191 }
2192 self.lexer.zshlex();
2193 if had_leading_paren && self.lexer.tok == LexTok::Outpar {
2194 self.lexer.zshlex();
2195 }
2196
2197 // Parse body
2198 let body = self.parse_program();
2199
2200 // Get terminator. Set incasepat=1 BEFORE the zshlex
2201 // advance so the next token (the next arm's pattern, like
2202 // `[a-z]`) gets tokenized in pattern context. Without
2203 // this, a `[`-prefixed pattern after the FIRST arm became
2204 // Inbrack instead of String and the pattern-loop bailed
2205 // out with "expected ')' in case pattern".
2206 let terminator = match self.lexer.tok {
2207 LexTok::Dsemi => {
2208 self.lexer.incasepat = 1;
2209 self.lexer.zshlex();
2210 CaseTerm::Break
2211 }
2212 LexTok::Semiamp => {
2213 self.lexer.incasepat = 1;
2214 self.lexer.zshlex();
2215 CaseTerm::Continue
2216 }
2217 LexTok::Semibar => {
2218 self.lexer.incasepat = 1;
2219 self.lexer.zshlex();
2220 CaseTerm::TestNext
2221 }
2222 _ => CaseTerm::Break,
2223 };
2224
2225 if !patterns.is_empty() {
2226 arms.push(CaseArm {
2227 patterns,
2228 body,
2229 terminator,
2230 });
2231 }
2232 }
2233
2234 Some(ZshCommand::Case(ZshCase { word, arms }))
2235 }
2236
2237 /// Parse if statement
2238 /// Parse `if COND; then BODY; [elif COND; then BODY;]* [else BODY;] fi`.
2239 /// Direct port of zsh/Src/parse.c:1411-1519 `par_if`. The C source
2240 /// emits WC_IF wordcodes per arm; zshrs builds an AST chain of
2241 /// (cond, then_body) tuples plus an optional else_body.
2242 fn parse_if(&mut self) -> Option<ZshCommand> {
2243 self.lexer.zshlex(); // skip 'if'
2244
2245 // Parse condition - stops at 'then' or '{' (zsh allows { instead of then)
2246 let cond = Box::new(self.parse_program_until(Some(&[LexTok::Then, LexTok::Inbrace])));
2247
2248 self.skip_separators();
2249
2250 // Expect 'then' or {
2251 let use_brace = self.lexer.tok == LexTok::Inbrace;
2252 if self.lexer.tok != LexTok::Then && !use_brace {
2253 self.error("expected 'then' or '{' after if condition");
2254 return None;
2255 }
2256 self.lexer.zshlex();
2257
2258 // Parse then-body - stops at else/elif/fi, or } if using brace syntax
2259 let then = if use_brace {
2260 let body = self.parse_program_until(Some(&[LexTok::Outbrace]));
2261 if self.lexer.tok == LexTok::Outbrace {
2262 self.lexer.zshlex();
2263 }
2264 Box::new(body)
2265 } else {
2266 Box::new(self.parse_program_until(Some(&[LexTok::Else, LexTok::Elif, LexTok::Fi])))
2267 };
2268
2269 // Parse elif and else (only for then/fi syntax, not brace syntax)
2270 let mut elif = Vec::new();
2271 let mut else_ = None;
2272
2273 if !use_brace {
2274 loop {
2275 self.skip_separators();
2276
2277 match self.lexer.tok {
2278 LexTok::Elif => {
2279 self.lexer.zshlex();
2280 // elif condition stops at 'then' or '{'
2281 let econd =
2282 self.parse_program_until(Some(&[LexTok::Then, LexTok::Inbrace]));
2283 self.skip_separators();
2284
2285 let elif_use_brace = self.lexer.tok == LexTok::Inbrace;
2286 if self.lexer.tok != LexTok::Then && !elif_use_brace {
2287 self.error("expected 'then' after elif");
2288 return None;
2289 }
2290 self.lexer.zshlex();
2291
2292 // elif body stops at else/elif/fi or } if using braces
2293 let ebody = if elif_use_brace {
2294 let body = self.parse_program_until(Some(&[LexTok::Outbrace]));
2295 if self.lexer.tok == LexTok::Outbrace {
2296 self.lexer.zshlex();
2297 }
2298 body
2299 } else {
2300 self.parse_program_until(Some(&[
2301 LexTok::Else,
2302 LexTok::Elif,
2303 LexTok::Fi,
2304 ]))
2305 };
2306
2307 elif.push((econd, ebody));
2308 }
2309 LexTok::Else => {
2310 self.lexer.zshlex();
2311 self.skip_separators();
2312
2313 let else_use_brace = self.lexer.tok == LexTok::Inbrace;
2314 if else_use_brace {
2315 self.lexer.zshlex();
2316 }
2317
2318 // else body stops at 'fi' or '}'
2319 else_ = Some(Box::new(if else_use_brace {
2320 let body = self.parse_program_until(Some(&[LexTok::Outbrace]));
2321 if self.lexer.tok == LexTok::Outbrace {
2322 self.lexer.zshlex();
2323 }
2324 body
2325 } else {
2326 self.parse_program_until(Some(&[LexTok::Fi]))
2327 }));
2328
2329 // Consume the 'fi' if present (not for brace syntax)
2330 if !else_use_brace && self.lexer.tok == LexTok::Fi {
2331 self.lexer.zshlex();
2332 }
2333 break;
2334 }
2335 LexTok::Fi => {
2336 self.lexer.zshlex();
2337 break;
2338 }
2339 _ => break,
2340 }
2341 }
2342 }
2343
2344 Some(ZshCommand::If(ZshIf {
2345 cond,
2346 then,
2347 elif,
2348 else_,
2349 }))
2350 }
2351
2352 /// Parse while/until loop
2353 /// Parse `while COND; do BODY; done` and `until COND; do BODY; done`.
2354 /// Direct port of zsh/Src/parse.c:1521-1563 `par_while`. The
2355 /// `until` variant is the same loop with the condition negated.
2356 fn parse_while(&mut self, until: bool) -> Option<ZshCommand> {
2357 self.lexer.zshlex(); // skip while/until
2358
2359 let cond = Box::new(self.parse_program());
2360
2361 self.skip_separators();
2362 let body = self.parse_loop_body(false)?;
2363
2364 Some(ZshCommand::While(ZshWhile {
2365 cond,
2366 body: Box::new(body),
2367 until,
2368 }))
2369 }
2370
2371 /// Parse repeat loop
2372 /// Parse `repeat N; do BODY; done`. Direct port of
2373 /// zsh/Src/parse.c:1565-1617 `par_repeat`. The C source supports
2374 /// the SHORTLOOPS short-form `repeat N CMD` (no do/done) — zshrs's
2375 /// parser doesn't yet special-case that variant.
2376 fn parse_repeat(&mut self) -> Option<ZshCommand> {
2377 self.lexer.zshlex(); // skip 'repeat'
2378
2379 let count = match self.lexer.tok {
2380 LexTok::String => {
2381 let c = self.lexer.tokstr.clone().unwrap_or_default();
2382 self.lexer.zshlex();
2383 c
2384 }
2385 _ => {
2386 self.error("expected count after repeat");
2387 return None;
2388 }
2389 };
2390
2391 self.skip_separators();
2392 let body = self.parse_loop_body(false)?;
2393
2394 Some(ZshCommand::Repeat(ZshRepeat {
2395 count,
2396 body: Box::new(body),
2397 }))
2398 }
2399
2400 /// Parse loop body (do...done, {...}, or shortloop)
2401 /// Parse the `do BODY done` body of a for/while/until/select/
2402 /// repeat loop. Direct equivalent of zsh's parse.c handling
2403 /// inside the loop builders — they all consume DOLOOP, parse a
2404 /// list until DONE, and return the list. The `foreach_style`
2405 /// flag signals foreach (where short-form `for NAME in WORDS;
2406 /// CMD` may skip do/done) vs c-style (which always requires
2407 /// do/done).
2408 fn parse_loop_body(&mut self, foreach_style: bool) -> Option<ZshProgram> {
2409 if self.lexer.tok == LexTok::Doloop {
2410 self.lexer.zshlex();
2411 let body = self.parse_program();
2412 if self.lexer.tok == LexTok::Done {
2413 self.lexer.zshlex();
2414 }
2415 Some(body)
2416 } else if self.lexer.tok == LexTok::Inbrace {
2417 self.lexer.zshlex();
2418 let body = self.parse_program();
2419 if self.lexer.tok == LexTok::Outbrace {
2420 self.lexer.zshlex();
2421 }
2422 Some(body)
2423 } else if foreach_style {
2424 // foreach allows 'end' terminator
2425 let body = self.parse_program();
2426 if self.lexer.tok == LexTok::Zend {
2427 self.lexer.zshlex();
2428 }
2429 Some(body)
2430 } else {
2431 // Short loop - single command
2432 self.parse_list()
2433 .map(|list| ZshProgram { lists: vec![list] })
2434 }
2435 }
2436
2437 /// Parse (...) subshell
2438 /// Parse a subshell `( ... )`. Direct port of zsh/Src/parse.c:1619-1670
2439 /// `par_subsh`. Body parses as a normal list; the subshell wrapper
2440 /// fork-isolates execution in the executor.
2441 fn parse_subsh(&mut self) -> Option<ZshCommand> {
2442 self.lexer.zshlex(); // skip (
2443 let prog = self.parse_program();
2444 if self.lexer.tok == LexTok::Outpar {
2445 self.lexer.zshlex();
2446 }
2447 Some(ZshCommand::Subsh(Box::new(prog)))
2448 }
2449
2450 /// `() { body } arg1 arg2 …` — anonymous function. Defines a fresh
2451 /// function named `_zshrs_anon_N`, invokes it with the args, and the
2452 /// body runs with positional params set. Implemented as the desugared
2453 /// pair (FuncDef + Simple call) so the compile path doesn't need new
2454 /// machinery.
2455 /// Parse an anonymous function definition `() { BODY }` followed
2456 /// by call args. zsh treats `() { echo hi; } a b c` as defining
2457 /// and immediately calling an anon fn with args a/b/c. C
2458 /// equivalent: the INOUTPAR shape in par_simple at parse.c:1836+
2459 /// triggers an anon-funcdef path.
2460 fn parse_anon_funcdef(&mut self) -> Option<ZshCommand> {
2461 self.lexer.zshlex(); // skip ()
2462 self.skip_separators();
2463 // No `{` after `()` → bare empty subshell shape `()`. Fall back
2464 // to a Subsh with an empty program so the status is 0 (matches
2465 // zsh's `()` no-op behavior).
2466 if self.lexer.tok != LexTok::Inbrace {
2467 return Some(ZshCommand::Subsh(Box::new(ZshProgram {
2468 lists: Vec::new(),
2469 })));
2470 }
2471 self.lexer.zshlex(); // skip {
2472 let body = self.parse_program();
2473 if self.lexer.tok == LexTok::Outbrace {
2474 self.lexer.zshlex();
2475 }
2476 // Collect any trailing args until a separator. zsh's anon-fn form
2477 // `() { body } a b c` runs body with $1=a, $2=b, $3=c.
2478 let mut args = Vec::new();
2479 while self.lexer.tok == LexTok::String {
2480 if let Some(s) = self.lexer.tokstr.clone() {
2481 args.push(s);
2482 }
2483 self.lexer.zshlex();
2484 }
2485
2486 // Generate a unique name. Module-level static would be cleaner but
2487 // a thread-local atomic is enough — anonymous functions are
2488 // ephemeral and the name isn't user-visible.
2489 use std::sync::atomic::{AtomicUsize, Ordering};
2490 static ANON_COUNTER: AtomicUsize = AtomicUsize::new(0);
2491 let n = ANON_COUNTER.fetch_add(1, Ordering::Relaxed);
2492 let name = format!("_zshrs_anon_{}", n);
2493 Some(ZshCommand::FuncDef(ZshFuncDef {
2494 names: vec![name],
2495 body: Box::new(body),
2496 tracing: false,
2497 auto_call_args: Some(args),
2498 body_source: None,
2499 }))
2500 }
2501
2502 /// Parse {...} cursh
2503 /// Parse a current-shell brace block `{ BODY }`. C source:
2504 /// par_cmd at parse.c:958-1085 handles INBRACE → emit WC_CURSH
2505 /// + recurse into list. zshrs's parse_cursh extracts that arm
2506 /// into a dedicated method.
2507 fn parse_cursh(&mut self) -> Option<ZshCommand> {
2508 self.lexer.zshlex(); // skip {
2509 let prog = self.parse_program();
2510
2511 // Check for { ... } always { ... }
2512 if self.lexer.tok == LexTok::Outbrace {
2513 self.lexer.zshlex();
2514
2515 // Check for 'always'
2516 if self.lexer.tok == LexTok::String {
2517 let s = self.lexer.tokstr.as_ref();
2518 if s.map(|s| s == "always").unwrap_or(false) {
2519 self.lexer.zshlex();
2520 self.skip_separators();
2521
2522 if self.lexer.tok == LexTok::Inbrace {
2523 self.lexer.zshlex();
2524 let always = self.parse_program();
2525 if self.lexer.tok == LexTok::Outbrace {
2526 self.lexer.zshlex();
2527 }
2528 return Some(ZshCommand::Try(ZshTry {
2529 try_block: Box::new(prog),
2530 always: Box::new(always),
2531 }));
2532 }
2533 }
2534 }
2535 }
2536
2537 Some(ZshCommand::Cursh(Box::new(prog)))
2538 }
2539
2540 /// Parse function definition
2541 /// Parse `function NAME { BODY }` or `NAME () { BODY }`. Direct
2542 /// port of zsh/Src/parse.c:1672-1785 `par_funcdef`. zsh handles
2543 /// the multiple keyword shapes (function FOO, FOO (), function FOO ()),
2544 /// the optional `[fname1 fname2 ...]` for multi-name function defs,
2545 /// and the `function FOO () { ... }` traditional/POSIX hybrid form.
2546 fn parse_funcdef(&mut self) -> Option<ZshCommand> {
2547 self.lexer.zshlex(); // skip 'function'
2548
2549 let mut names = Vec::new();
2550 let mut tracing = false;
2551
2552 // Handle options like -T and function names
2553 loop {
2554 match self.lexer.tok {
2555 LexTok::String => {
2556 let s = self.lexer.tokstr.as_ref()?;
2557 if s.starts_with('-') {
2558 if s.contains('T') {
2559 tracing = true;
2560 }
2561 self.lexer.zshlex();
2562 continue;
2563 }
2564 names.push(s.clone());
2565 self.lexer.zshlex();
2566 }
2567 LexTok::Inbrace | LexTok::Inoutpar | LexTok::Seper | LexTok::Newlin => break,
2568 _ => break,
2569 }
2570 }
2571
2572 // Optional ()
2573 let saw_paren = self.lexer.tok == LexTok::Inoutpar;
2574 if saw_paren {
2575 self.lexer.zshlex();
2576 }
2577
2578 self.skip_separators();
2579
2580 // Parse body
2581 if self.lexer.tok == LexTok::Inbrace {
2582 // Capture body_start BEFORE the lexer advances past the
2583 // first body token. After the previous zshlex consumed
2584 // `{`, lexer.pos points just past `{` (which is where the
2585 // body source starts). The next `zshlex()` would advance
2586 // past the first token (`echo`), making body_start land
2587 // mid-body and lose the first word — `typeset -f f` would
2588 // print `a; echo b` for `{ echo a; echo b }`.
2589 let body_start = self.lexer.pos;
2590 self.lexer.zshlex();
2591 let body = self.parse_program();
2592 let body_end = if self.lexer.tok == LexTok::Outbrace {
2593 // Lexer has just consumed `}`; pos is past it. Body content
2594 // ends one byte before pos.
2595 self.lexer.pos.saturating_sub(1)
2596 } else {
2597 self.lexer.pos
2598 };
2599 let body_source = self
2600 .lexer
2601 .input
2602 .get(body_start..body_end)
2603 .map(|s| s.trim().to_string())
2604 .filter(|s| !s.is_empty());
2605 if self.lexer.tok == LexTok::Outbrace {
2606 self.lexer.zshlex();
2607 }
2608
2609 // Anonymous form `function () { body } a b c` (with `()`) or
2610 // `function { body } a b c` (zsh-only shorthand, no `()`). No
2611 // name was collected. Mirror parse_anon_funcdef: synthesize
2612 // `_zshrs_anon_N`, collect trailing args, set auto_call_args
2613 // so compile_funcdef registers + immediately calls the
2614 // function with the args as positional params.
2615 if names.is_empty() {
2616 let mut args = Vec::new();
2617 while self.lexer.tok == LexTok::String {
2618 if let Some(s) = self.lexer.tokstr.clone() {
2619 args.push(s);
2620 }
2621 self.lexer.zshlex();
2622 }
2623 use std::sync::atomic::{AtomicUsize, Ordering};
2624 static ANON_COUNTER: AtomicUsize = AtomicUsize::new(0);
2625 let n = ANON_COUNTER.fetch_add(1, Ordering::Relaxed);
2626 let name = format!("_zshrs_anon_kw_{}", n);
2627 return Some(ZshCommand::FuncDef(ZshFuncDef {
2628 names: vec![name],
2629 body: Box::new(body),
2630 tracing,
2631 auto_call_args: Some(args),
2632 body_source,
2633 }));
2634 }
2635
2636 Some(ZshCommand::FuncDef(ZshFuncDef {
2637 names,
2638 body: Box::new(body),
2639 tracing,
2640 auto_call_args: None,
2641 body_source,
2642 }))
2643 } else {
2644 // Short form
2645 self.parse_list().map(|list| {
2646 ZshCommand::FuncDef(ZshFuncDef {
2647 names,
2648 body: Box::new(ZshProgram { lists: vec![list] }),
2649 tracing,
2650 auto_call_args: None,
2651 body_source: None,
2652 })
2653 })
2654 }
2655 }
2656
2657 /// Parse inline function definition: name() { ... }
2658 /// Parse the inline form `NAME () { BODY }` (POSIX-style funcdef
2659 /// without the `function` keyword). The name has already been
2660 /// consumed and pushed by parse_simple before this method fires.
2661 /// C source: handled inline in par_simple's INOUTPAR-after-name
2662 /// arm (parse.c:1836-2228).
2663 fn parse_inline_funcdef(&mut self, name: String) -> Option<ZshCommand> {
2664 // Skip ()
2665 if self.lexer.tok == LexTok::Inoutpar {
2666 self.lexer.zshlex();
2667 }
2668
2669 self.skip_separators();
2670
2671 // Parse body
2672 if self.lexer.tok == LexTok::Inbrace {
2673 // Same body_start-before-zshlex fix as parse_funcdef.
2674 let body_start = self.lexer.pos;
2675 self.lexer.zshlex();
2676 let body = self.parse_program();
2677 let body_end = if self.lexer.tok == LexTok::Outbrace {
2678 self.lexer.pos.saturating_sub(1)
2679 } else {
2680 self.lexer.pos
2681 };
2682 let body_source = self
2683 .lexer
2684 .input
2685 .get(body_start..body_end)
2686 .map(|s| s.trim().to_string())
2687 .filter(|s| !s.is_empty());
2688 if self.lexer.tok == LexTok::Outbrace {
2689 self.lexer.zshlex();
2690 }
2691 Some(ZshCommand::FuncDef(ZshFuncDef {
2692 names: vec![name],
2693 body: Box::new(body),
2694 tracing: false,
2695 auto_call_args: None,
2696 body_source,
2697 }))
2698 } else {
2699 match self.parse_cmd() {
2700 Some(cmd) => {
2701 let list = ZshList {
2702 sublist: ZshSublist {
2703 pipe: ZshPipe {
2704 cmd,
2705 next: None,
2706 lineno: self.lexer.lineno,
2707 merge_stderr: false,
2708 },
2709 next: None,
2710 flags: SublistFlags::default(),
2711 },
2712 flags: ListFlags::default(),
2713 };
2714 Some(ZshCommand::FuncDef(ZshFuncDef {
2715 names: vec![name],
2716 body: Box::new(ZshProgram { lists: vec![list] }),
2717 tracing: false,
2718 auto_call_args: None,
2719 body_source: None,
2720 }))
2721 }
2722 None => None,
2723 }
2724 }
2725 }
2726
2727 /// Parse [[ ... ]] conditional
2728 /// Parse `[[ EXPR ]]` conditional expression. Direct port of
2729 /// zsh/Src/parse.c:2409-2731 `par_cond` (and helpers par_cond_1,
2730 /// par_cond_2, par_cond_double, par_cond_triple, par_cond_multi
2731 /// at parse.c:2434-2731). Expression operators: `||` `&&` `!`
2732 /// + unary tests (-f, -d, -n, -z, etc.) + binary tests (=, !=,
2733 /// <, >, ==, =~, -eq, -ne, -lt, -le, -gt, -ge, -nt, -ot, -ef).
2734 fn parse_cond(&mut self) -> Option<ZshCommand> {
2735 self.lexer.zshlex(); // skip [[
2736 // Empty cond `[[ ]]` is a parse error in zsh — emit the
2737 // diagnostic and return None so the caller produces a
2738 // non-zero exit. Without this, `[[ ]]` silently passed and
2739 // returned exit 0.
2740 if self.lexer.tok == LexTok::Doutbrack {
2741 self.error("parse error near `]]'");
2742 self.lexer.zshlex();
2743 return None;
2744 }
2745 let cond = self.parse_cond_expr();
2746
2747 if self.lexer.tok == LexTok::Doutbrack {
2748 self.lexer.zshlex();
2749 }
2750
2751 cond.map(ZshCommand::Cond)
2752 }
2753
2754 /// Parse conditional expression
2755 /// Top of `[[ ]]` cond-expression parsing — entry to recursive
2756 /// descent (or → and → not → primary). Direct port of zsh's
2757 /// par_cond_1 at parse.c:2434-2475.
2758 fn parse_cond_expr(&mut self) -> Option<ZshCond> {
2759 self.parse_cond_or()
2760 }
2761
2762 /// Cond-expression `||` level. C: inside par_cond_1 at
2763 /// parse.c:2434-2475 (the `cond_or` ladder).
2764 fn parse_cond_or(&mut self) -> Option<ZshCond> {
2765 self.recursion_depth += 1;
2766 if self.check_recursion() {
2767 self.error("parse_cond_or: max recursion depth exceeded");
2768 self.recursion_depth -= 1;
2769 return None;
2770 }
2771
2772 let left = match self.parse_cond_and() {
2773 Some(l) => l,
2774 None => {
2775 self.recursion_depth -= 1;
2776 return None;
2777 }
2778 };
2779
2780 self.skip_cond_separators();
2781
2782 let result = if self.lexer.tok == LexTok::Dbar {
2783 self.lexer.zshlex();
2784 self.skip_cond_separators();
2785 self.parse_cond_or()
2786 .map(|right| ZshCond::Or(Box::new(left), Box::new(right)))
2787 } else {
2788 Some(left)
2789 };
2790
2791 self.recursion_depth -= 1;
2792 result
2793 }
2794
2795 /// Cond-expression `&&` level. C: par_cond_2 at parse.c:2476-2625.
2796 fn parse_cond_and(&mut self) -> Option<ZshCond> {
2797 self.recursion_depth += 1;
2798 if self.check_recursion() {
2799 self.error("parse_cond_and: max recursion depth exceeded");
2800 self.recursion_depth -= 1;
2801 return None;
2802 }
2803
2804 let left = match self.parse_cond_not() {
2805 Some(l) => l,
2806 None => {
2807 self.recursion_depth -= 1;
2808 return None;
2809 }
2810 };
2811
2812 self.skip_cond_separators();
2813
2814 let result = if self.lexer.tok == LexTok::Damper {
2815 self.lexer.zshlex();
2816 self.skip_cond_separators();
2817 self.parse_cond_and()
2818 .map(|right| ZshCond::And(Box::new(left), Box::new(right)))
2819 } else {
2820 Some(left)
2821 };
2822
2823 self.recursion_depth -= 1;
2824 result
2825 }
2826
2827 /// Cond-expression `!` negation level. C: handled inside
2828 /// par_cond_2 at parse.c:2476-2625 via the BANG token check.
2829 fn parse_cond_not(&mut self) -> Option<ZshCond> {
2830 self.recursion_depth += 1;
2831 if self.check_recursion() {
2832 self.error("parse_cond_not: max recursion depth exceeded");
2833 self.recursion_depth -= 1;
2834 return None;
2835 }
2836
2837 self.skip_cond_separators();
2838
2839 // ! can be either LexTok::Bang or String "!"
2840 let is_not = self.lexer.tok == LexTok::Bang
2841 || (self.lexer.tok == LexTok::String
2842 && self
2843 .lexer
2844 .tokstr
2845 .as_ref()
2846 .map(|s| s == "!")
2847 .unwrap_or(false));
2848 if is_not {
2849 self.lexer.zshlex();
2850 let inner = match self.parse_cond_not() {
2851 Some(i) => i,
2852 None => {
2853 self.recursion_depth -= 1;
2854 return None;
2855 }
2856 };
2857 self.recursion_depth -= 1;
2858 return Some(ZshCond::Not(Box::new(inner)));
2859 }
2860
2861 if self.lexer.tok == LexTok::Inpar {
2862 self.lexer.zshlex();
2863 self.skip_cond_separators();
2864 let inner = match self.parse_cond_expr() {
2865 Some(i) => i,
2866 None => {
2867 self.recursion_depth -= 1;
2868 return None;
2869 }
2870 };
2871 self.skip_cond_separators();
2872 if self.lexer.tok == LexTok::Outpar {
2873 self.lexer.zshlex();
2874 }
2875 self.recursion_depth -= 1;
2876 return Some(inner);
2877 }
2878
2879 let result = self.parse_cond_primary();
2880 self.recursion_depth -= 1;
2881 result
2882 }
2883
2884 /// Cond-expression primary: unary tests (-f, -d, ...), binary
2885 /// tests (=, !=, <, >, ==, =~, -eq, -ne, ...), and parenthesized
2886 /// sub-expressions. Direct port of par_cond_double / par_cond_triple
2887 /// / par_cond_multi at parse.c:2626-2731 (chosen by arg count).
2888 fn parse_cond_primary(&mut self) -> Option<ZshCond> {
2889 let s1 = match self.lexer.tok {
2890 LexTok::String => {
2891 let s = self.lexer.tokstr.clone().unwrap_or_default();
2892 self.lexer.zshlex();
2893 s
2894 }
2895 _ => return None,
2896 };
2897
2898 self.skip_cond_separators();
2899
2900 // Check for unary operator
2901 if s1.starts_with('-') && s1.len() == 2 {
2902 let s2 = match self.lexer.tok {
2903 LexTok::String => {
2904 let s = self.lexer.tokstr.clone().unwrap_or_default();
2905 self.lexer.zshlex();
2906 s
2907 }
2908 _ => return Some(ZshCond::Unary("-n".to_string(), s1)),
2909 };
2910 return Some(ZshCond::Unary(s1, s2));
2911 }
2912
2913 // Check for binary operator
2914 let op = match self.lexer.tok {
2915 LexTok::String => {
2916 let s = self.lexer.tokstr.clone().unwrap_or_default();
2917 self.lexer.zshlex();
2918 s
2919 }
2920 LexTok::Inang => {
2921 self.lexer.zshlex();
2922 "<".to_string()
2923 }
2924 LexTok::Outang => {
2925 self.lexer.zshlex();
2926 ">".to_string()
2927 }
2928 _ => return Some(ZshCond::Unary("-n".to_string(), s1)),
2929 };
2930
2931 self.skip_cond_separators();
2932
2933 let s2 = match self.lexer.tok {
2934 LexTok::String => {
2935 let s = self.lexer.tokstr.clone().unwrap_or_default();
2936 self.lexer.zshlex();
2937 s
2938 }
2939 _ => return Some(ZshCond::Binary(s1, op, String::new())),
2940 };
2941
2942 if op == "=~" {
2943 Some(ZshCond::Regex(s1, s2))
2944 } else {
2945 Some(ZshCond::Binary(s1, op, s2))
2946 }
2947 }
2948
2949 fn skip_cond_separators(&mut self) {
2950 while self.lexer.tok == LexTok::Seper && {
2951 let s = self.lexer.tokstr.as_ref();
2952 s.map(|s| !s.contains(';')).unwrap_or(true)
2953 } {
2954 self.lexer.zshlex();
2955 }
2956 }
2957
2958 /// Parse (( ... )) arithmetic command
2959 /// Parse `(( EXPR ))` arithmetic command. C source: parse.c:1810-1834
2960 /// `par_dinbrack` (despite the name; the function actually handles
2961 /// DINPAR `(( ))` blocks too).
2962 fn parse_arith(&mut self) -> Option<ZshCommand> {
2963 let expr = self.lexer.tokstr.clone().unwrap_or_default();
2964 self.lexer.zshlex();
2965 Some(ZshCommand::Arith(expr))
2966 }
2967
2968 /// Parse time command
2969 /// Parse `time CMD` (POSIX time keyword). Direct port of
2970 /// zsh/Src/parse.c:1787-1808 `par_time`. The `time` keyword
2971 /// times the execution of the following pipeline / cmd.
2972 fn parse_time(&mut self) -> Option<ZshCommand> {
2973 self.lexer.zshlex(); // skip 'time'
2974
2975 // Check if there's a pipeline to time
2976 if self.lexer.tok == LexTok::Seper
2977 || self.lexer.tok == LexTok::Newlin
2978 || self.lexer.tok == LexTok::Endinput
2979 {
2980 Some(ZshCommand::Time(None))
2981 } else {
2982 let sublist = self.parse_sublist();
2983 Some(ZshCommand::Time(sublist.map(Box::new)))
2984 }
2985 }
2986
2987 /// Check if next token is ()
2988 fn peek_inoutpar(&mut self) -> bool {
2989 self.lexer.tok == LexTok::Inoutpar
2990 }
2991
2992 /// Skip separator tokens
2993 fn skip_separators(&mut self) {
2994 let mut iterations = 0;
2995 while self.lexer.tok == LexTok::Seper || self.lexer.tok == LexTok::Newlin {
2996 iterations += 1;
2997 if iterations > 100_000 {
2998 self.error("skip_separators: too many iterations");
2999 return;
3000 }
3001 self.lexer.zshlex();
3002 }
3003 }
3004
3005 /// Record an error
3006 fn error(&mut self, msg: &str) {
3007 self.errors.push(ParseError {
3008 message: msg.to_string(),
3009 line: self.lexer.lineno,
3010 });
3011 }
3012}
3013
3014#[cfg(test)]
3015mod tests {
3016 use super::*;
3017
3018 fn parse(input: &str) -> Result<ZshProgram, Vec<ParseError>> {
3019 let mut parser = ZshParser::new(input);
3020 parser.parse()
3021 }
3022
3023 #[test]
3024 fn test_simple_command() {
3025 let prog = parse("echo hello world").unwrap();
3026 assert_eq!(prog.lists.len(), 1);
3027 match &prog.lists[0].sublist.pipe.cmd {
3028 ZshCommand::Simple(s) => {
3029 assert_eq!(s.words, vec!["echo", "hello", "world"]);
3030 }
3031 _ => panic!("expected simple command"),
3032 }
3033 }
3034
3035 #[test]
3036 fn test_pipeline() {
3037 let prog = parse("ls | grep foo | wc -l").unwrap();
3038 assert_eq!(prog.lists.len(), 1);
3039
3040 let pipe = &prog.lists[0].sublist.pipe;
3041 assert!(pipe.next.is_some());
3042
3043 let pipe2 = pipe.next.as_ref().unwrap();
3044 assert!(pipe2.next.is_some());
3045 }
3046
3047 #[test]
3048 fn test_and_or() {
3049 let prog = parse("cmd1 && cmd2 || cmd3").unwrap();
3050 let sublist = &prog.lists[0].sublist;
3051
3052 assert!(sublist.next.is_some());
3053 let (op, _) = sublist.next.as_ref().unwrap();
3054 assert_eq!(*op, SublistOp::And);
3055 }
3056
3057 #[test]
3058 fn test_if_then() {
3059 let prog = parse("if test -f foo; then echo yes; fi").unwrap();
3060 match &prog.lists[0].sublist.pipe.cmd {
3061 ZshCommand::If(_) => {}
3062 _ => panic!("expected if command"),
3063 }
3064 }
3065
3066 #[test]
3067 fn test_for_loop() {
3068 let prog = parse("for i in a b c; do echo $i; done").unwrap();
3069 match &prog.lists[0].sublist.pipe.cmd {
3070 ZshCommand::For(f) => {
3071 assert_eq!(f.var, "i");
3072 match &f.list {
3073 ForList::Words(w) => assert_eq!(w, &vec!["a", "b", "c"]),
3074 _ => panic!("expected word list"),
3075 }
3076 }
3077 _ => panic!("expected for command"),
3078 }
3079 }
3080
3081 #[test]
3082 fn test_case() {
3083 let prog = parse("case $x in a) echo a;; b) echo b;; esac").unwrap();
3084 match &prog.lists[0].sublist.pipe.cmd {
3085 ZshCommand::Case(c) => {
3086 assert_eq!(c.arms.len(), 2);
3087 }
3088 _ => panic!("expected case command"),
3089 }
3090 }
3091
3092 #[test]
3093 fn test_function() {
3094 // First test just parsing "function foo" to see what happens
3095 let prog = parse("function foo { }").unwrap();
3096 match &prog.lists[0].sublist.pipe.cmd {
3097 ZshCommand::FuncDef(f) => {
3098 assert_eq!(f.names, vec!["foo"]);
3099 }
3100 _ => panic!(
3101 "expected function, got {:?}",
3102 prog.lists[0].sublist.pipe.cmd
3103 ),
3104 }
3105 }
3106
3107 #[test]
3108 fn test_redirection() {
3109 let prog = parse("echo hello > file.txt").unwrap();
3110 match &prog.lists[0].sublist.pipe.cmd {
3111 ZshCommand::Simple(s) => {
3112 assert_eq!(s.redirs.len(), 1);
3113 assert_eq!(s.redirs[0].rtype, RedirType::Write);
3114 }
3115 _ => panic!("expected simple command"),
3116 }
3117 }
3118
3119 #[test]
3120 fn test_assignment() {
3121 let prog = parse("FOO=bar echo $FOO").unwrap();
3122 match &prog.lists[0].sublist.pipe.cmd {
3123 ZshCommand::Simple(s) => {
3124 assert_eq!(s.assigns.len(), 1);
3125 assert_eq!(s.assigns[0].name, "FOO");
3126 }
3127 _ => panic!("expected simple command"),
3128 }
3129 }
3130
3131 #[test]
3132 fn test_parse_completion_function() {
3133 let input = r#"_2to3_fixes() {
3134 local -a fixes
3135 fixes=( ${${(M)${(f)"$(2to3 --list-fixes 2>/dev/null)"}:#*}//[[:space:]]/} )
3136 (( ${#fixes} )) && _describe -t fixes 'fix' fixes
3137}"#;
3138 let result = parse(input);
3139 assert!(
3140 result.is_ok(),
3141 "Failed to parse completion function: {:?}",
3142 result.err()
3143 );
3144 let prog = result.unwrap();
3145 assert!(
3146 !prog.lists.is_empty(),
3147 "Expected at least one list in program"
3148 );
3149 }
3150
3151 #[test]
3152 fn test_parse_array_with_complex_elements() {
3153 let input = r#"arguments=(
3154 '(- * :)'{-h,--help}'[show this help message and exit]'
3155 {-d,--doctests_only}'[fix up doctests only]'
3156 '*:filename:_files'
3157)"#;
3158 let result = parse(input);
3159 assert!(
3160 result.is_ok(),
3161 "Failed to parse array assignment: {:?}",
3162 result.err()
3163 );
3164 }
3165
3166 #[test]
3167 fn test_parse_full_completion_file() {
3168 let input = r##"#compdef 2to3
3169
3170# zsh completions for '2to3'
3171
3172_2to3_fixes() {
3173 local -a fixes
3174 fixes=( ${${(M)${(f)"$(2to3 --list-fixes 2>/dev/null)"}:#*}//[[:space:]]/} )
3175 (( ${#fixes} )) && _describe -t fixes 'fix' fixes
3176}
3177
3178local -a arguments
3179
3180arguments=(
3181 '(- * :)'{-h,--help}'[show this help message and exit]'
3182 {-d,--doctests_only}'[fix up doctests only]'
3183 {-f,--fix}'[each FIX specifies a transformation; default: all]:fix name:_2to3_fixes'
3184 {-j,--processes}'[run 2to3 concurrently]:number: '
3185 {-x,--nofix}'[prevent a transformation from being run]:fix name:_2to3_fixes'
3186 {-l,--list-fixes}'[list available transformations]'
3187 {-p,--print-function}'[modify the grammar so that print() is a function]'
3188 {-v,--verbose}'[more verbose logging]'
3189 '--no-diffs[do not show diffs of the refactoring]'
3190 {-w,--write}'[write back modified files]'
3191 {-n,--nobackups}'[do not write backups for modified files]'
3192 {-o,--output-dir}'[put output files in this directory instead of overwriting]:directory:_directories'
3193 {-W,--write-unchanged-files}'[also write files even if no changes were required]'
3194 '--add-suffix[append this string to all output filenames]:suffix: '
3195 '*:filename:_files'
3196)
3197
3198_arguments -s -S $arguments
3199"##;
3200 let result = parse(input);
3201 assert!(
3202 result.is_ok(),
3203 "Failed to parse full completion file: {:?}",
3204 result.err()
3205 );
3206 let prog = result.unwrap();
3207 // Should have parsed successfully with at least one statement
3208 assert!(!prog.lists.is_empty(), "Expected at least one list");
3209 }
3210
3211 #[test]
3212 fn test_parse_logs_sh() {
3213 let input = r#"#!/usr/bin/env bash
3214shopt -s globstar
3215
3216if [[ $(uname) == Darwin ]]; then
3217 tail -f /var/log/**/*.log /var/log/**/*.out | lolcat
3218else
3219 if [[ $ZPWR_DISTRO_NAME == raspbian ]]; then
3220 tail -f /var/log/**/*.log | lolcat
3221 else
3222 printf "Unsupported...\n" >&2
3223 fi
3224fi
3225"#;
3226 let result = parse(input);
3227 assert!(
3228 result.is_ok(),
3229 "Failed to parse logs.sh: {:?}",
3230 result.err()
3231 );
3232 }
3233
3234 #[test]
3235 fn test_parse_case_with_glob() {
3236 let input = r#"case "$ZPWR_OS_TYPE" in
3237 darwin*) open_cmd='open'
3238 ;;
3239 cygwin*) open_cmd='cygstart'
3240 ;;
3241 linux*)
3242 open_cmd='xdg-open'
3243 ;;
3244esac"#;
3245 let result = parse(input);
3246 assert!(
3247 result.is_ok(),
3248 "Failed to parse case with glob: {:?}",
3249 result.err()
3250 );
3251 }
3252
3253 #[test]
3254 fn test_parse_case_with_nested_if() {
3255 // Test case with nested if and glob patterns
3256 let input = r##"function zpwrGetOpenCommand(){
3257 local open_cmd
3258 case "$ZPWR_OS_TYPE" in
3259 darwin*) open_cmd='open' ;;
3260 cygwin*) open_cmd='cygstart' ;;
3261 linux*)
3262 if [[ "$_zpwr_uname_r" != *icrosoft* ]];then
3263 open_cmd='nohup xdg-open'
3264 fi
3265 ;;
3266 esac
3267}"##;
3268 let result = parse(input);
3269 assert!(result.is_ok(), "Failed to parse: {:?}", result.err());
3270 }
3271
3272 #[test]
3273 fn test_parse_zpwr_scripts() {
3274 use std::fs;
3275 use std::path::Path;
3276 use std::sync::mpsc;
3277 use std::thread;
3278 use std::time::{Duration, Instant};
3279
3280 let scripts_dir = Path::new("/Users/wizard/.zpwr/scripts");
3281 if !scripts_dir.exists() {
3282 eprintln!("Skipping test: scripts directory not found");
3283 return;
3284 }
3285
3286 let mut total = 0;
3287 let mut passed = 0;
3288 let mut failed_files = Vec::new();
3289 let mut timeout_files = Vec::new();
3290
3291 for ext in &["sh", "zsh"] {
3292 let pattern = scripts_dir.join(format!("*.{}", ext));
3293 if let Ok(entries) = glob::glob(pattern.to_str().unwrap()) {
3294 for entry in entries.flatten() {
3295 total += 1;
3296 let file_path = entry.display().to_string();
3297 let content = match fs::read_to_string(&entry) {
3298 Ok(c) => c,
3299 Err(e) => {
3300 failed_files.push((file_path, format!("read error: {}", e)));
3301 continue;
3302 }
3303 };
3304
3305 // Parse with timeout
3306 let content_clone = content.clone();
3307 let (tx, rx) = mpsc::channel();
3308 let handle = thread::spawn(move || {
3309 let result = parse(&content_clone);
3310 let _ = tx.send(result);
3311 });
3312
3313 match rx.recv_timeout(Duration::from_secs(2)) {
3314 Ok(Ok(_)) => passed += 1,
3315 Ok(Err(errors)) => {
3316 let first_err = errors
3317 .first()
3318 .map(|e| format!("line {}: {}", e.line, e.message))
3319 .unwrap_or_default();
3320 failed_files.push((file_path, first_err));
3321 }
3322 Err(_) => {
3323 timeout_files.push(file_path);
3324 // Thread will be abandoned
3325 }
3326 }
3327 }
3328 }
3329 }
3330
3331 eprintln!("\n=== ZPWR Scripts Parse Results ===");
3332 eprintln!("Passed: {}/{}", passed, total);
3333
3334 if !timeout_files.is_empty() {
3335 eprintln!("\nTimeout files (>2s):");
3336 for file in &timeout_files {
3337 eprintln!(" {}", file);
3338 }
3339 }
3340
3341 if !failed_files.is_empty() {
3342 eprintln!("\nFailed files:");
3343 for (file, err) in &failed_files {
3344 eprintln!(" {} - {}", file, err);
3345 }
3346 }
3347
3348 // Allow some failures initially, but track progress
3349 let pass_rate = if total > 0 {
3350 (passed as f64 / total as f64) * 100.0
3351 } else {
3352 0.0
3353 };
3354 eprintln!("Pass rate: {:.1}%", pass_rate);
3355
3356 // Require at least 50% pass rate for now
3357 assert!(pass_rate >= 50.0, "Pass rate too low: {:.1}%", pass_rate);
3358 }
3359
3360 #[test]
3361 #[ignore] // Uses threads that can't be killed on timeout; use integration test instead
3362 fn test_parse_zsh_stdlib_functions() {
3363 use std::fs;
3364 use std::path::Path;
3365 use std::sync::mpsc;
3366 use std::thread;
3367 use std::time::Duration;
3368
3369 let functions_dir = Path::new(env!("CARGO_MANIFEST_DIR")).join("test_data/zsh_functions");
3370 if !functions_dir.exists() {
3371 eprintln!(
3372 "Skipping test: zsh_functions directory not found at {:?}",
3373 functions_dir
3374 );
3375 return;
3376 }
3377
3378 let mut total = 0;
3379 let mut passed = 0;
3380 let mut failed_files = Vec::new();
3381 let mut timeout_files = Vec::new();
3382
3383 if let Ok(entries) = fs::read_dir(&functions_dir) {
3384 for entry in entries.flatten() {
3385 let path = entry.path();
3386 if !path.is_file() {
3387 continue;
3388 }
3389
3390 total += 1;
3391 let file_path = path.display().to_string();
3392 let content = match fs::read_to_string(&path) {
3393 Ok(c) => c,
3394 Err(e) => {
3395 failed_files.push((file_path, format!("read error: {}", e)));
3396 continue;
3397 }
3398 };
3399
3400 // Parse with timeout
3401 let content_clone = content.clone();
3402 let (tx, rx) = mpsc::channel();
3403 thread::spawn(move || {
3404 let result = parse(&content_clone);
3405 let _ = tx.send(result);
3406 });
3407
3408 match rx.recv_timeout(Duration::from_secs(2)) {
3409 Ok(Ok(_)) => passed += 1,
3410 Ok(Err(errors)) => {
3411 let first_err = errors
3412 .first()
3413 .map(|e| format!("line {}: {}", e.line, e.message))
3414 .unwrap_or_default();
3415 failed_files.push((file_path, first_err));
3416 }
3417 Err(_) => {
3418 timeout_files.push(file_path);
3419 }
3420 }
3421 }
3422 }
3423
3424 eprintln!("\n=== Zsh Stdlib Functions Parse Results ===");
3425 eprintln!("Passed: {}/{}", passed, total);
3426
3427 if !timeout_files.is_empty() {
3428 eprintln!("\nTimeout files (>2s): {}", timeout_files.len());
3429 for file in timeout_files.iter().take(10) {
3430 eprintln!(" {}", file);
3431 }
3432 if timeout_files.len() > 10 {
3433 eprintln!(" ... and {} more", timeout_files.len() - 10);
3434 }
3435 }
3436
3437 if !failed_files.is_empty() {
3438 eprintln!("\nFailed files: {}", failed_files.len());
3439 for (file, err) in failed_files.iter().take(20) {
3440 let filename = Path::new(file)
3441 .file_name()
3442 .unwrap_or_default()
3443 .to_string_lossy();
3444 eprintln!(" {} - {}", filename, err);
3445 }
3446 if failed_files.len() > 20 {
3447 eprintln!(" ... and {} more", failed_files.len() - 20);
3448 }
3449 }
3450
3451 let pass_rate = if total > 0 {
3452 (passed as f64 / total as f64) * 100.0
3453 } else {
3454 0.0
3455 };
3456 eprintln!("Pass rate: {:.1}%", pass_rate);
3457
3458 // Require at least 50% pass rate
3459 assert!(pass_rate >= 50.0, "Pass rate too low: {:.1}%", pass_rate);
3460 }
3461}