zshrs_parse/parser.rs
1//! Zsh parser - Direct port from zsh/Src/parse.c
2//!
3//! This parser takes tokens from the ZshLexer and builds an AST.
4//! It follows the zsh grammar closely, producing structures that
5//! can be executed by the shell executor.
6
7use crate::lexer::ZshLexer;
8use crate::tokens::LexTok;
9use serde::{Deserialize, Serialize};
10
11/// AST node for a complete program (list of commands)
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct ZshProgram {
14 pub lists: Vec<ZshList>,
15}
16
17/// A list is a sequence of sublists separated by ; or & or newline
18#[derive(Debug, Clone, Serialize, Deserialize)]
19pub struct ZshList {
20 pub sublist: ZshSublist,
21 pub flags: ListFlags,
22}
23
24#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
25pub struct ListFlags {
26 /// Run asynchronously (&)
27 pub async_: bool,
28 /// Disown after running (&| or &!)
29 pub disown: bool,
30}
31
32/// A sublist is pipelines connected by && or ||
33#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct ZshSublist {
35 pub pipe: ZshPipe,
36 pub next: Option<(SublistOp, Box<ZshSublist>)>,
37 pub flags: SublistFlags,
38}
39
40#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
41pub enum SublistOp {
42 And, // &&
43 Or, // ||
44}
45
46#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
47pub struct SublistFlags {
48 /// Coproc
49 pub coproc: bool,
50 /// Negated with !
51 pub not: bool,
52}
53
54/// A pipeline is commands connected by |
55#[derive(Debug, Clone, Serialize, Deserialize)]
56pub struct ZshPipe {
57 pub cmd: ZshCommand,
58 pub next: Option<Box<ZshPipe>>,
59 pub lineno: u64,
60 /// `|&` between this stage and the next — merge stderr into the
61 /// pipe so the next stage's stdin sees both stdout AND stderr from
62 /// this stage. When `next` is None this flag is meaningless.
63 #[serde(default)]
64 pub merge_stderr: bool,
65}
66
67/// A command
68#[derive(Debug, Clone, Serialize, Deserialize)]
69pub enum ZshCommand {
70 Simple(ZshSimple),
71 Subsh(Box<ZshProgram>), // (list)
72 Cursh(Box<ZshProgram>), // {list}
73 For(ZshFor),
74 Case(ZshCase),
75 If(ZshIf),
76 While(ZshWhile),
77 Until(ZshWhile),
78 Repeat(ZshRepeat),
79 FuncDef(ZshFuncDef),
80 Time(Option<Box<ZshSublist>>),
81 Cond(ZshCond), // [[ ... ]]
82 Arith(String), // (( ... ))
83 Try(ZshTry), // { ... } always { ... }
84 /// Compound command with trailing redirects:
85 /// `{ cmd } 2>&1`, `(...) >file`, `if ...; fi >file`, etc.
86 /// Simple commands carry redirects in their own struct; this wrapper
87 /// is only used for compound forms.
88 Redirected(Box<ZshCommand>, Vec<ZshRedir>),
89}
90
91/// A simple command (assignments, words, redirections)
92#[derive(Debug, Clone, Serialize, Deserialize)]
93pub struct ZshSimple {
94 pub assigns: Vec<ZshAssign>,
95 pub words: Vec<String>,
96 pub redirs: Vec<ZshRedir>,
97}
98
99/// An assignment
100#[derive(Debug, Clone, Serialize, Deserialize)]
101pub struct ZshAssign {
102 pub name: String,
103 pub value: ZshAssignValue,
104 pub append: bool, // +=
105}
106
107#[derive(Debug, Clone, Serialize, Deserialize)]
108pub enum ZshAssignValue {
109 Scalar(String),
110 Array(Vec<String>),
111}
112
113/// A redirection
114#[derive(Debug, Clone, Serialize, Deserialize)]
115pub struct ZshRedir {
116 pub rtype: RedirType,
117 pub fd: i32,
118 pub name: String,
119 pub heredoc: Option<HereDocInfo>,
120 pub varid: Option<String>, // {var}>file
121 /// Index into ZshLexer.heredocs[] for body lookup. Filled in by
122 /// `parse_redirection` for Heredoc/HeredocDash, then resolved into
123 /// `heredoc.content` by `fill_heredoc_bodies` after process_heredocs
124 /// has run for the line.
125 #[serde(skip)]
126 pub heredoc_idx: Option<usize>,
127}
128
129#[derive(Debug, Clone, Serialize, Deserialize)]
130pub struct HereDocInfo {
131 pub content: String,
132 pub terminator: String,
133 /// Originally-quoted terminator (`<<'EOF'`, `<<"EOF"`). When true the
134 /// body is passed verbatim — no `$var` / `$(cmd)` / `$((expr))`
135 /// expansion. Plain `<<EOF` runs all expansions.
136 #[serde(default)]
137 pub quoted: bool,
138}
139
140/// Redirection type
141#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
142pub enum RedirType {
143 Write, // >
144 Writenow, // >|
145 Append, // >>
146 Appendnow, // >>|
147 Read, // <
148 ReadWrite, // <>
149 Heredoc, // <<
150 HeredocDash, // <<-
151 Herestr, // <<<
152 MergeIn, // <&
153 MergeOut, // >&
154 ErrWrite, // &>
155 ErrWritenow, // &>|
156 ErrAppend, // >>&
157 ErrAppendnow, // >>&|
158 InPipe, // < <(...)
159 OutPipe, // > >(...)
160}
161
162/// For loop
163#[derive(Debug, Clone, Serialize, Deserialize)]
164pub struct ZshFor {
165 pub var: String,
166 pub list: ForList,
167 pub body: Box<ZshProgram>,
168 /// True if this was parsed as `select` rather than `for`. Both share
169 /// the same parser, so the compiler routes on this flag.
170 #[serde(default)]
171 pub is_select: bool,
172}
173
174#[derive(Debug, Clone, Serialize, Deserialize)]
175pub enum ForList {
176 Words(Vec<String>),
177 CStyle {
178 init: String,
179 cond: String,
180 step: String,
181 },
182 Positional,
183}
184
185/// Case statement
186#[derive(Debug, Clone, Serialize, Deserialize)]
187pub struct ZshCase {
188 pub word: String,
189 pub arms: Vec<CaseArm>,
190}
191
192#[derive(Debug, Clone, Serialize, Deserialize)]
193pub struct CaseArm {
194 pub patterns: Vec<String>,
195 pub body: ZshProgram,
196 pub terminator: CaseTerm,
197}
198
199#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
200pub enum CaseTerm {
201 Break, // ;;
202 Continue, // ;&
203 TestNext, // ;|
204}
205
206/// If statement
207#[derive(Debug, Clone, Serialize, Deserialize)]
208pub struct ZshIf {
209 pub cond: Box<ZshProgram>,
210 pub then: Box<ZshProgram>,
211 pub elif: Vec<(ZshProgram, ZshProgram)>,
212 pub else_: Option<Box<ZshProgram>>,
213}
214
215/// While/Until loop
216#[derive(Debug, Clone, Serialize, Deserialize)]
217pub struct ZshWhile {
218 pub cond: Box<ZshProgram>,
219 pub body: Box<ZshProgram>,
220 pub until: bool,
221}
222
223/// Repeat loop
224#[derive(Debug, Clone, Serialize, Deserialize)]
225pub struct ZshRepeat {
226 pub count: String,
227 pub body: Box<ZshProgram>,
228}
229
230/// Function definition
231#[derive(Debug, Clone, Serialize, Deserialize)]
232pub struct ZshFuncDef {
233 pub names: Vec<String>,
234 pub body: Box<ZshProgram>,
235 pub tracing: bool,
236 /// Anonymous-function call args. `() { body } a b` parses as a
237 /// FuncDef (auto-named) with `auto_call_args = Some(vec!["a", "b"])`.
238 /// compile_funcdef registers the function then emits a Simple call
239 /// with these args.
240 #[serde(default)]
241 pub auto_call_args: Option<Vec<String>>,
242 /// Original source text of the function body (the bytes between
243 /// `{` and `}`, without the braces themselves), captured at parse
244 /// time. Populated for `function name { body }` and `function name() { body }`
245 /// forms; left None for the synthesized inline-funcdef recovery
246 /// path. ZshCompiler::compile_funcdef forwards it to
247 /// `BUILTIN_REGISTER_COMPILED_FN` so introspection (`whence`, `which`,
248 /// `${functions[name]}`) has canonical source text.
249 #[serde(default)]
250 pub body_source: Option<String>,
251}
252
253/// Conditional expression [[ ... ]]
254#[derive(Debug, Clone, Serialize, Deserialize)]
255pub enum ZshCond {
256 Not(Box<ZshCond>),
257 And(Box<ZshCond>, Box<ZshCond>),
258 Or(Box<ZshCond>, Box<ZshCond>),
259 Unary(String, String), // -f file, -n str, etc.
260 Binary(String, String, String), // str = pat, a -eq b, etc.
261 Regex(String, String), // str =~ regex
262}
263
264/// Try/always block
265#[derive(Debug, Clone, Serialize, Deserialize)]
266pub struct ZshTry {
267 pub try_block: Box<ZshProgram>,
268 pub always: Box<ZshProgram>,
269}
270
271/// Zsh parameter expansion flags
272#[derive(Debug, Clone, Serialize, Deserialize)]
273pub enum ZshParamFlag {
274 Lower, // L - lowercase
275 Upper, // U - uppercase
276 Capitalize, // C - capitalize words
277 Join(String), // j:sep: - join array with separator
278 JoinNewline, // F - join with newlines
279 Split(String), // s:sep: - split string into array
280 SplitLines, // f - split on newlines
281 SplitWords, // z - split into words (shell parsing)
282 Type, // t - type of variable
283 Words, // w - word splitting
284 Quote, // qq - single-quote always
285 QuoteIfNeeded, // q+ - single-quote only if needed
286 DoubleQuote, // qqq - double-quote
287 DollarQuote, // qqqq - $'...' style
288 QuoteBackslash, // q / b / B - backslash-escape special chars
289 Unique, // u - unique elements only
290 Reverse, // O - reverse sort
291 Sort, // o - sort
292 NumericSort, // n - numeric sort
293 IndexSort, // a - sort in array index order
294 Keys, // k - associative array keys
295 Values, // v - associative array values
296 Length, // # - length (character codes)
297 CountChars, // c - count total characters
298 Expand, // e - perform shell expansions
299 PromptExpand, // % - expand prompt escapes
300 PromptExpandFull, // %% - full prompt expansion
301 Visible, // V - make non-printable chars visible
302 Directory, // D - substitute directory names
303 Head(usize), // [1,n] - first n elements
304 Tail(usize), // [-n,-1] - last n elements
305 PadLeft(usize, char), // l:len:fill: - pad left
306 PadRight(usize, char), // r:len:fill: - pad right
307 Width(usize), // m - use width for padding
308 Match, // M - include matched portion
309 Remove, // R - include non-matched portion (complement of M)
310 Subscript, // S - subscript scanning
311 Parameter, // P - use value as parameter name (indirection)
312 Glob, // ~ - glob patterns in pattern
313 /// `@` flag — force array-context behavior even inside DQ. zsh's
314 /// `"${(@o)arr}"` keeps the sort active and splices each element as
315 /// its own word. Without this, the array-only flags became no-ops
316 /// in DQ.
317 At,
318}
319
320/// List operator (for shell command lists)
321#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
322pub enum ListOp {
323 And, // &&
324 Or, // ||
325 Semi, // ;
326 Amp, // &
327 Newline, // \n
328}
329
330/// Shell word - can be simple literal or complex expansion
331#[derive(Debug, Clone, Serialize, Deserialize)]
332pub enum ShellWord {
333 /// Plain text token. Most ZWC-decoded words land here. Goes through
334 /// `expand_string` (plus glob/tilde/etc. as text-level transforms) for
335 /// final output.
336 Literal(String),
337 /// Concatenation of sub-words. ZWC array decoding produces this with
338 /// child Literals; nothing else constructs it now that the legacy
339 /// hand-rolled parser is gone.
340 Concat(Vec<ShellWord>),
341}
342
343/// Variable modifier for parameter expansion
344#[derive(Debug, Clone, Serialize, Deserialize)]
345pub enum VarModifier {
346 Default(ShellWord),
347 DefaultAssign(ShellWord),
348 Error(ShellWord),
349 Alternate(ShellWord),
350 Length,
351 Substring(i64, Option<i64>),
352 RemovePrefix(ShellWord),
353 RemovePrefixLong(ShellWord),
354 RemoveSuffix(ShellWord),
355 RemoveSuffixLong(ShellWord),
356 Replace(ShellWord, ShellWord),
357 ReplaceAll(ShellWord, ShellWord),
358 Upper,
359 Lower,
360}
361
362/// Shell command - the old shell_ast compatible type
363#[derive(Debug, Clone, Serialize, Deserialize)]
364pub enum ShellCommand {
365 Simple(SimpleCommand),
366 Pipeline(Vec<ShellCommand>, bool),
367 List(Vec<(ShellCommand, ListOp)>),
368 Compound(CompoundCommand),
369 FunctionDef(String, Box<ShellCommand>),
370}
371
372/// Simple command with assignments, words, and redirects
373#[derive(Debug, Clone, Serialize, Deserialize)]
374pub struct SimpleCommand {
375 pub assignments: Vec<(String, ShellWord, bool)>,
376 pub words: Vec<ShellWord>,
377 pub redirects: Vec<Redirect>,
378}
379
380/// Redirect
381#[derive(Debug, Clone, Serialize, Deserialize)]
382pub struct Redirect {
383 pub fd: Option<i32>,
384 pub op: RedirectOp,
385 pub target: ShellWord,
386 pub heredoc_content: Option<String>,
387 pub fd_var: Option<String>,
388}
389
390/// Redirect operator
391#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
392pub enum RedirectOp {
393 Write,
394 Append,
395 Read,
396 ReadWrite,
397 Clobber,
398 DupRead,
399 DupWrite,
400 HereDoc,
401 HereString,
402 WriteBoth,
403 AppendBoth,
404}
405
406/// Compound command
407#[derive(Debug, Clone, Serialize, Deserialize)]
408pub enum CompoundCommand {
409 BraceGroup(Vec<ShellCommand>),
410 Subshell(Vec<ShellCommand>),
411 If {
412 conditions: Vec<(Vec<ShellCommand>, Vec<ShellCommand>)>,
413 else_part: Option<Vec<ShellCommand>>,
414 },
415 For {
416 var: String,
417 words: Option<Vec<ShellWord>>,
418 body: Vec<ShellCommand>,
419 },
420 ForArith {
421 init: String,
422 cond: String,
423 step: String,
424 body: Vec<ShellCommand>,
425 },
426 While {
427 condition: Vec<ShellCommand>,
428 body: Vec<ShellCommand>,
429 },
430 Until {
431 condition: Vec<ShellCommand>,
432 body: Vec<ShellCommand>,
433 },
434 Case {
435 word: ShellWord,
436 cases: Vec<(Vec<ShellWord>, Vec<ShellCommand>, CaseTerminator)>,
437 },
438 Select {
439 var: String,
440 words: Option<Vec<ShellWord>>,
441 body: Vec<ShellCommand>,
442 },
443 Coproc {
444 name: Option<String>,
445 body: Box<ShellCommand>,
446 },
447 /// repeat N do ... done
448 Repeat {
449 count: String,
450 body: Vec<ShellCommand>,
451 },
452 /// { try-block } always { always-block }
453 Try {
454 try_body: Vec<ShellCommand>,
455 always_body: Vec<ShellCommand>,
456 },
457 Arith(String),
458 WithRedirects(Box<ShellCommand>, Vec<Redirect>),
459}
460
461/// Case terminator
462#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
463pub enum CaseTerminator {
464 Break,
465 Fallthrough,
466 Continue,
467}
468
469/// Parse errors
470#[derive(Debug, Clone, Serialize, Deserialize)]
471pub struct ParseError {
472 pub message: String,
473 pub line: u64,
474}
475
476impl std::fmt::Display for ParseError {
477 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
478 write!(f, "parse error at line {}: {}", self.line, self.message)
479 }
480}
481
482impl std::error::Error for ParseError {}
483
484/// The Zsh Parser
485pub struct ZshParser<'a> {
486 lexer: ZshLexer<'a>,
487 errors: Vec<ParseError>,
488 /// Global iteration counter to prevent infinite loops
489 global_iterations: usize,
490 /// Recursion depth counter to prevent stack overflow
491 recursion_depth: usize,
492}
493
494const MAX_RECURSION_DEPTH: usize = 500;
495
496/// Saved parse context. Direct port of zsh's `struct parse_stack`
497/// declared in zsh/Src/zsh.h and used by parse.c:295-355
498/// (`parse_context_save` / `parse_context_restore`). Pushes per-
499/// parse-call state so a nested parse (e.g. inside command
500/// substitution) doesn't clobber the outer parse.
501///
502/// zshrs port note: zsh's parse_stack tracks wordcode-buffer state
503/// (ecbuf, eclen, ecused, ecnpats, ecstrs, ecsoffs, ecssub, ecnfunc).
504/// zshrs builds AST trees instead so those fields collapse to a
505/// recursion_depth + global_iterations save. The lexer-side fields
506/// (incmdpos, incond, etc.) live on ZshLexer here so they get saved
507/// via the lexer's own `LexStack` rather than being duplicated here.
508#[derive(Debug, Default, Clone)]
509pub struct ParseStack {
510 pub recursion_depth: usize,
511 pub global_iterations: usize,
512}
513
514/// Walk every ZshRedir in the program and, for any with a `heredoc_idx`,
515/// pull the body+terminator out of `bodies` and stuff into `heredoc`.
516/// `bodies[i]` corresponds to the i-th heredoc registered by the lexer
517/// during scanning (in source order).
518fn fill_heredoc_bodies(prog: &mut ZshProgram, bodies: &[HereDocInfo]) {
519 for list in &mut prog.lists {
520 fill_in_sublist(&mut list.sublist, bodies);
521 }
522}
523
524fn fill_in_sublist(sub: &mut ZshSublist, bodies: &[HereDocInfo]) {
525 fill_in_pipe(&mut sub.pipe, bodies);
526 if let Some(next) = &mut sub.next {
527 fill_in_sublist(&mut next.1, bodies);
528 }
529}
530
531fn fill_in_pipe(pipe: &mut ZshPipe, bodies: &[HereDocInfo]) {
532 fill_in_command(&mut pipe.cmd, bodies);
533 if let Some(next) = &mut pipe.next {
534 fill_in_pipe(next, bodies);
535 }
536}
537
538fn fill_in_command(cmd: &mut ZshCommand, bodies: &[HereDocInfo]) {
539 match cmd {
540 ZshCommand::Simple(s) => {
541 for r in &mut s.redirs {
542 resolve_redir(r, bodies);
543 }
544 }
545 ZshCommand::Subsh(p) | ZshCommand::Cursh(p) => fill_heredoc_bodies(p, bodies),
546 ZshCommand::FuncDef(f) => fill_heredoc_bodies(&mut f.body, bodies),
547 ZshCommand::If(i) => {
548 fill_heredoc_bodies(&mut i.cond, bodies);
549 fill_heredoc_bodies(&mut i.then, bodies);
550 for (c, b) in &mut i.elif {
551 fill_heredoc_bodies(c, bodies);
552 fill_heredoc_bodies(b, bodies);
553 }
554 if let Some(e) = &mut i.else_ {
555 fill_heredoc_bodies(e, bodies);
556 }
557 }
558 ZshCommand::While(w) | ZshCommand::Until(w) => {
559 fill_heredoc_bodies(&mut w.cond, bodies);
560 fill_heredoc_bodies(&mut w.body, bodies);
561 }
562 ZshCommand::For(f) => fill_heredoc_bodies(&mut f.body, bodies),
563 ZshCommand::Case(c) => {
564 for arm in &mut c.arms {
565 fill_heredoc_bodies(&mut arm.body, bodies);
566 }
567 }
568 ZshCommand::Repeat(r) => fill_heredoc_bodies(&mut r.body, bodies),
569 ZshCommand::Time(Some(sublist)) => fill_in_sublist(sublist, bodies),
570 ZshCommand::Try(t) => {
571 fill_heredoc_bodies(&mut t.try_block, bodies);
572 fill_heredoc_bodies(&mut t.always, bodies);
573 }
574 ZshCommand::Redirected(inner, redirs) => {
575 for r in redirs {
576 resolve_redir(r, bodies);
577 }
578 fill_in_command(inner, bodies);
579 }
580 ZshCommand::Time(None) | ZshCommand::Cond(_) | ZshCommand::Arith(_) => {}
581 }
582}
583
584fn resolve_redir(r: &mut ZshRedir, bodies: &[HereDocInfo]) {
585 if let Some(idx) = r.heredoc_idx {
586 if let Some(info) = bodies.get(idx) {
587 r.heredoc = Some(info.clone());
588 }
589 }
590}
591
592/// If `list` is a Simple containing one word that ends in the
593/// `<INPAR><OUTPAR>` token pair (the lexer-port encoding of `()`),
594/// return the bare name. Used by `parse_program_until` to detect
595/// `name() {body}` style function definitions where the lexer
596/// hasn't split the `()` from the name.
597/// Detect the `name() …` shape inside a Simple. Returns the function
598/// name and (when the body was already inlined into the same Simple,
599/// e.g. `foo() echo hi`) the rest of the words as the body's argv.
600/// Returns None for non-funcdef shapes.
601fn simple_name_with_inoutpar(list: &ZshList) -> Option<(Vec<String>, Vec<String>)> {
602 if list.flags.async_ || list.sublist.next.is_some() {
603 return None;
604 }
605 let pipe = &list.sublist.pipe;
606 if pipe.next.is_some() {
607 return None;
608 }
609 let simple = match &pipe.cmd {
610 ZshCommand::Simple(s) => s,
611 _ => return None,
612 };
613 if simple.words.is_empty() || !simple.assigns.is_empty() {
614 return None;
615 }
616 let suffix = "\u{88}\u{8a}"; // INPAR + OUTPAR
617 // Find the FIRST word ending in `()`. zsh accepts the
618 // multi-name shorthand `fna fnb fnc() { body }` (parse.c:
619 // par_funcdef wordlist) — words[0..i-1] are extra names,
620 // words[i] is `lastname()`. Words after are the body argv
621 // (one-line shorthand, `name() cmd args`).
622 let par_idx = simple.words.iter().position(|w| w.ends_with(suffix))?;
623 let mut names: Vec<String> = Vec::with_capacity(par_idx + 1);
624 for w in &simple.words[..par_idx] {
625 // Earlier names must be bare identifiers, NOT contain
626 // tokens that imply they're not function names (no `()`,
627 // no quotes, no expansions). zsh's lexer enforces this
628 // at the wordlist level; we approximate by requiring the
629 // word be an identifier-shaped token after untokenize.
630 let bare = crate::lexer::untokenize(w);
631 let valid = !bare.is_empty()
632 && bare
633 .chars()
634 .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-' || c == '.' || c == '$');
635 if !valid {
636 return None;
637 }
638 names.push(bare);
639 }
640 let last = &simple.words[par_idx];
641 let bare = &last[..last.len() - suffix.len()];
642 if bare.is_empty() {
643 return None;
644 }
645 names.push(crate::lexer::untokenize(bare));
646 let rest = simple.words[par_idx + 1..].to_vec();
647 Some((names, rest))
648}
649
650impl<'a> ZshParser<'a> {
651 /// Create a new parser
652 pub fn new(input: &'a str) -> Self {
653 ZshParser {
654 lexer: ZshLexer::new(input),
655 errors: Vec::new(),
656 global_iterations: 0,
657 recursion_depth: 0,
658 }
659 }
660
661 /// Check iteration limit; returns true if exceeded
662 #[inline]
663 fn check_limit(&mut self) -> bool {
664 self.global_iterations += 1;
665 self.global_iterations > 10_000
666 }
667
668 /// Check recursion depth; returns true if exceeded
669 #[inline]
670 fn check_recursion(&mut self) -> bool {
671 self.recursion_depth > MAX_RECURSION_DEPTH
672 }
673
674 /// Save parse context onto a `ParseStack`. Direct port of
675 /// zsh/Src/parse.c:295-320 `parse_context_save`. Pushes
676 /// recursion_depth + global_iterations and resets to zero so
677 /// a nested parse can't trigger the outer parse's limits.
678 /// Lexer-side state (incmdpos / incond / etc.) saves via the
679 /// lexer's own `LexStack` since those fields live on ZshLexer.
680 pub fn parse_context_save(&mut self, ps: &mut ParseStack) {
681 // parse.c:299-317 — save parser state. zshrs collapses zsh's
682 // wordcode-buffer fields (ecbuf/eclen/ecused/ecnpats/ecstrs/
683 // ecsoffs/ecssub/ecnfunc) into the recursion+iteration pair
684 // since the AST builder doesn't use a flat wordcode buffer.
685 ps.recursion_depth = self.recursion_depth;
686 ps.global_iterations = self.global_iterations;
687 // parse.c:318-319 — clear the buffer + heredoc list so a
688 // nested parse starts from a clean slate.
689 self.recursion_depth = 0;
690 self.global_iterations = 0;
691 }
692
693 /// Restore parse context from a `ParseStack`. Direct port of
694 /// zsh/Src/parse.c:326-355 `parse_context_restore`. Inverse of
695 /// `parse_context_save`. Also clears any half-built AST state
696 /// to prevent leaking into the outer parse.
697 pub fn parse_context_restore(&mut self, ps: &ParseStack) {
698 // parse.c:330-331 — free any in-progress wordcode buffer.
699 // zshrs has no equivalent — AST nodes are owned by their
700 // parent so dropping the parser frees them.
701
702 // parse.c:333-352 — restore saved state.
703 self.recursion_depth = ps.recursion_depth;
704 self.global_iterations = ps.global_iterations;
705
706 // parse.c:354 — `errflag &= ~ERRFLAG_ERROR;` — clear the
707 // error flag so the outer parse sees a clean state. zshrs
708 // tracks errors per-parser; clearing means dropping any
709 // partial errors collected during the nested parse.
710 self.errors.clear();
711 }
712
713 /// Initialize parser status. Direct port of zsh/Src/parse.c:489-503
714 /// `init_parse_status`. Clears the per-parse-call lexer flags
715 /// so a fresh parse starts from cmd-position with no nesting
716 /// state inherited from a prior parse.
717 pub fn init_parse_status(&mut self) {
718 // parse.c:500-502 — `incasepat = incond = inredir = infor =
719 // intypeset = 0; inrepeat_ = 0; incmdpos = 1;`
720 self.lexer.incasepat = 0;
721 self.lexer.incond = 0;
722 self.lexer.inredir = false;
723 self.lexer.infor = 0;
724 self.lexer.intypeset = false;
725 self.lexer.incmdpos = true;
726 }
727
728 /// Initialize parser for a fresh parse. Direct port of
729 /// zsh/Src/parse.c:507-525 `init_parse`. C source allocates a
730 /// fresh wordcode buffer (ecbuf) sized EC_INIT_SIZE, resets the
731 /// per-parse-call counters, and calls init_parse_status. zshrs
732 /// has no flat wordcode buffer (AST is built inline) so this
733 /// function reduces to init_parse_status + recursion_depth/
734 /// global_iterations clear.
735 pub fn init_parse(&mut self) {
736 // parse.c:513-520 — init wordcode buffer. zshrs no-op.
737 self.recursion_depth = 0;
738 self.global_iterations = 0;
739 // parse.c:522 — `init_parse_status();`
740 self.init_parse_status();
741 }
742
743 /// Check whether the parsed program is empty. Direct port of
744 /// zsh/Src/parse.c:583-587 `empty_eprog`. C version checks
745 /// `*p->prog == WCB_END()` (single end-of-wordcode marker).
746 /// zshrs version checks the AST node count.
747 pub fn empty_eprog(prog: &ZshProgram) -> bool {
748 prog.lists.is_empty()
749 }
750
751 /// Clear pending here-document list. Direct port of
752 /// zsh/Src/parse.c:589-600 `clear_hdocs`. The C version walks
753 /// the global `hdocs` linked list and frees each node. zshrs
754 /// stores pending heredocs on the lexer's `heredocs` Vec —
755 /// truncating it has the same effect.
756 pub fn clear_hdocs(&mut self) {
757 self.lexer.heredocs.clear();
758 }
759
760 /// Top-level parse-event entry. Direct port of zsh/Src/parse.c:
761 /// 612-631 `parse_event`. Reads one event from the lexer (a
762 /// sublist optionally followed by SEPER/AMPER/AMPERBANG) and
763 /// returns the resulting ZshProgram.
764 ///
765 /// `endtok` is the token that terminates the event — usually
766 /// ENDINPUT, but for command-style substitutions the closing
767 /// `)` (zsh's CMD_SUBST_CLOSE).
768 ///
769 /// zshrs port note: zsh's parse_event returns an `Eprog` (heap-
770 /// allocated wordcode program). zshrs returns a `ZshProgram`
771 /// (AST root). Same role at the parse-output boundary.
772 pub fn parse_event(&mut self, endtok: LexTok) -> Option<ZshProgram> {
773 // parse.c:616-619 — reset state and prime the lexer.
774 self.lexer.tok = LexTok::Endinput;
775 self.lexer.incmdpos = true;
776 self.lexer.zshlex();
777 // parse.c:620 — `init_parse();`
778 self.init_parse();
779
780 // parse.c:622-625 — drive par_event; on failure clear hdocs.
781 if !self.par_event(endtok) {
782 self.clear_hdocs();
783 return None;
784 }
785 // parse.c:626-628 — if endtok != ENDINPUT, this is a sub-
786 // parse for a substitution that doesn't need its own eprog.
787 // zshrs returns an empty program in that case (caller
788 // discards).
789 if endtok != LexTok::Endinput {
790 return Some(ZshProgram { lists: Vec::new() });
791 }
792 // parse.c:630 — `bld_eprog(1);` — build the final eprog.
793 // zshrs has already built the AST via parse_program_until,
794 // but parse_event uses par_event directly so we need to
795 // collect what par_event accumulated.
796 Some(self.parse_program_until(None))
797 }
798
799 /// Parse one event (sublist with optional separator). Direct
800 /// port of zsh/Src/parse.c:633-695 `par_event`. Returns true if
801 /// an event was successfully parsed, false on EOF / endtok.
802 ///
803 /// zshrs port note: the C version emits wordcodes via ecadd/
804 /// set_list_code; zshrs's parser builds AST nodes via
805 /// parse_sublist + parse_list. Same flow, different output.
806 pub fn par_event(&mut self, endtok: LexTok) -> bool {
807 // parse.c:639-643 — skip leading SEPERs.
808 while self.lexer.tok == LexTok::Seper {
809 // parse.c:640-641 — at top-level (endtok == ENDINPUT),
810 // a SEPER on a fresh line ends the event.
811 if self.lexer.isnewlin > 0 && endtok == LexTok::Endinput {
812 return false;
813 }
814 self.lexer.zshlex();
815 }
816 // parse.c:644-647 — terminate on EOF or matching close-token.
817 if self.lexer.tok == LexTok::Endinput {
818 return false;
819 }
820 if self.lexer.tok == endtok {
821 return true;
822 }
823 // parse.c:649-... — drive parse_sublist + handle terminator.
824 // zshrs's parse_sublist already builds the AST node directly.
825 match self.parse_sublist() {
826 Some(_) => {
827 // parse.c:651-693 — terminator handling. zshrs's
828 // parse_list wraps this; for parse_event we just
829 // confirm the sublist parsed.
830 true
831 }
832 None => false,
833 }
834 }
835
836 /// Parse one list — non-recursing variant. Direct port of
837 /// zsh/Src/parse.c:807-817 `par_list1`. Like par_list but
838 /// doesn't recurse on the trailing-separator path; used by
839 /// callers that only want one statement (e.g. each arm of a
840 /// case body).
841 pub fn par_list1(&mut self) -> Option<ZshSublist> {
842 // parse.c:810-816 — body is a single par_sublist call wrapped
843 // in the eu/ecused tracking that zshrs doesn't need (no
844 // wordcode buffer).
845 self.parse_sublist()
846 }
847
848 /// Wire a here-document body onto the redirection token that
849 /// requested it. Direct port of zsh/Src/parse.c:2347-2361
850 /// `setheredoc`. Called when a heredoc terminator has been
851 /// matched and the body is ready to be attached to the redir.
852 ///
853 /// zshrs port note: zsh's setheredoc patches the wordcode
854 /// in-place via `pc[1] = ecstrcode(doc); pc[2] = ecstrcode(term);`.
855 /// zshrs threads heredoc bodies through `HereDocInfo` structs
856 /// that resolve_redir applies during the post-parse fill_in pass.
857 /// This method is the AST-side equivalent: writes back to the
858 /// matching redir node by index.
859 pub fn setheredoc(
860 &mut self,
861 _pc: usize,
862 _redir_type: i32,
863 _doc: &str,
864 _term: &str,
865 _munged_term: &str,
866 ) {
867 // zshrs's heredoc resolution happens in fill_in_command /
868 // resolve_redir at parser.rs top. This stub exists for API
869 // parity with the C signature; live wiring happens via
870 // self.lexer.heredocs which the post-parse pass consumes.
871 }
872
873 /// Parse a wordlist for `for ... in WORDS;`. Direct port of
874 /// zsh/Src/parse.c:2362-2378 `par_wordlist`. Reads STRING tokens
875 /// until the next SEPER / SEMI / NEWLIN.
876 pub fn par_wordlist(&mut self) -> Vec<String> {
877 let mut out = Vec::new();
878 // parse.c:2362-2378 — collect STRINGs into the wordlist.
879 while self.lexer.tok == LexTok::String {
880 if let Some(text) = self.lexer.tokstr.clone() {
881 out.push(text);
882 }
883 self.lexer.zshlex();
884 }
885 out
886 }
887
888 /// Parse a newline-separated wordlist. Direct port of
889 /// zsh/Src/parse.c:2379-2398 `par_nl_wordlist`. Like
890 /// par_wordlist but tolerates leading/trailing newlines.
891 pub fn par_nl_wordlist(&mut self) -> Vec<String> {
892 // parse.c:2380-2381 — skip leading newlines.
893 while self.lexer.tok == LexTok::Newlin {
894 self.lexer.zshlex();
895 }
896 let out = self.par_wordlist();
897 // parse.c:2395-2397 — skip trailing newlines.
898 while self.lexer.tok == LexTok::Newlin {
899 self.lexer.zshlex();
900 }
901 out
902 }
903
904 /// Get the integer value of the next token in a cond expression.
905 /// Direct port of zsh/Src/parse.c:2643-2658 `get_cond_num`.
906 /// Used for `[[ N OP M ]]` numeric tests where N/M are integer
907 /// literals or variable references.
908 pub fn get_cond_num(&mut self) -> Option<i64> {
909 if self.lexer.tok != LexTok::String {
910 return None;
911 }
912 let text = self.lexer.tokstr.as_ref()?.clone();
913 // parse.c:2647-2655 — parse as integer with optional sign.
914 let parsed = text.parse::<i64>().ok()?;
915 self.lexer.zshlex();
916 Some(parsed)
917 }
918
919 /// Emit a parser-level error. Direct port of zsh/Src/parse.c:
920 /// 2733-2766 `yyerror`. C version fills a per-event error buffer
921 /// + sets errflag. zshrs pushes onto self.errors which the
922 /// caller drains via parse()'s Result return.
923 pub fn yyerror(&mut self, msg: &str) {
924 // parse.c:2735-2765 — zsh's yyerror collects the offending
925 // token's literal text + line number. zshrs already does
926 // this via self.error() with the lexer's toklineno.
927 self.error(msg);
928 }
929
930 // ============================================================
931 // Wordcode emission stubs (parse.c private helpers)
932 //
933 // The following functions are direct counterparts of zsh's
934 // private wordcode-emission helpers in parse.c. zsh uses these
935 // to write u32 opcodes into a flat `ecbuf` array; zshrs builds
936 // an AST tree and never emits wordcode at the parse layer.
937 // The implementations are documented stubs that preserve the
938 // function signatures + cite the C source. Real wordcode would
939 // be emitted later by compile_zsh.rs walking the AST.
940 //
941 // Listed for port-surface completeness so every parse.c symbol
942 // has a Rust counterpart even when the algorithm is moot in the
943 // AST architecture.
944 // ============================================================
945
946 /// Patch a list-placeholder wordcode with its actual opcode +
947 /// jump distance. Direct port of zsh/Src/parse.c:736-749
948 /// `set_list_code`. zsh emits an `ecadd(0)` placeholder before
949 /// par_sublist runs, then comes back through set_list_code to
950 /// rewrite the slot with WCB_LIST(type, distance) once the
951 /// sublist's final length is known.
952 ///
953 /// zshrs port note: zshrs builds AST nodes inline so there's
954 /// no placeholder to patch. The ZshList { sublist, flags }
955 /// node is created with the right flags from the start.
956 /// Stub provided for port-surface completeness.
957 pub fn set_list_code(_p: usize, _type_code: i32, _cmplx: bool) {
958 // parse.c:740-748 — wordcode patching. zshrs no-op.
959 }
960
961 /// Patch a sublist-placeholder wordcode with its actual opcode.
962 /// Direct port of zsh/Src/parse.c:753-763 `set_sublist_code`.
963 /// Same role as set_list_code at the sublist level.
964 pub fn set_sublist_code(_p: usize, _type_code: i32, _flags: i32, _skip: i32, _cmplx: bool) {
965 // parse.c:757-762 — wordcode patching. zshrs no-op.
966 }
967
968 /// Add one wordcode opcode to the buffer. Direct port of
969 /// zsh/Src/parse.c:396-408 `ecadd`. Returns the index of the
970 /// new opcode. zshrs no-op since the AST is built inline.
971 pub fn ecadd(_c: u32) -> usize {
972 // parse.c:399-407 — append to ecbuf with grow-on-demand.
973 // zshrs no-op.
974 0
975 }
976
977 /// Delete a wordcode at position p. Direct port of
978 /// zsh/Src/parse.c:412-421 `ecdel`. zshrs no-op.
979 pub fn ecdel(_p: usize) {
980 // parse.c:415-420 — memmove + decrement ecused. zshrs no-op.
981 }
982
983 /// Encode a string into a wordcode value. Direct port of
984 /// zsh/Src/parse.c:425-471 `ecstrcode`. C source packs short
985 /// strings (≤4 chars) into a single wordcode + uses a binary
986 /// tree (Eccstr) for longer strings; long-string slots are
987 /// de-duplicated via hasher + strcmp. zshrs no-op since the
988 /// AST stores strings directly.
989 pub fn ecstrcode(_s: &str) -> u32 {
990 // parse.c:432-470 — the actual encoding logic. zshrs no-op.
991 0
992 }
993
994 /// Insert N empty wordcode slots at position p. Direct port of
995 /// zsh/Src/parse.c:371-388 `ecispace`. Used to reserve space
996 /// for a forward-jump opcode that will be patched once the
997 /// jump target is known. zshrs no-op since AST jumps are
998 /// resolved at compile_zsh time.
999 pub fn ecispace(_p: usize, _n: usize) {
1000 // parse.c:376-387 — grow + memmove + adjust hdocs. zshrs no-op.
1001 }
1002
1003 /// Adjust pending heredoc pointers when wordcodes shift. Direct
1004 /// port of zsh/Src/parse.c:359-367 `ecadjusthere`. Called
1005 /// internally by ecispace / ecdel after they shift the buffer.
1006 /// zshrs no-op since heredocs are tracked by index in the
1007 /// lexer's Vec, not by absolute wordcode offset.
1008 pub fn ecadjusthere(_p: usize, _d: i32) {
1009 // parse.c:362-366 — walk hdocs list, bump pc by d. zshrs no-op.
1010 }
1011
1012 // ============================================================
1013 // Eprog runtime ops (parse.c:2767-2853)
1014 //
1015 // dupeprog / useeprog / freeeprog are zsh's reference-counting
1016 // helpers for executable programs. zshrs's AST is owned by
1017 // value (Rust ownership); cloning is a tree-deep copy via
1018 // Clone, "use" is a no-op (the executor borrows the AST), and
1019 // "free" is automatic on drop.
1020 // ============================================================
1021
1022 /// Duplicate an Eprog. Direct port of zsh/Src/parse.c:2767-2812
1023 /// `dupeprog`. C version deep-copies the wordcode array + string
1024 /// table + pattern progs. zshrs uses Clone on the AST.
1025 pub fn dupeprog(prog: &ZshProgram) -> ZshProgram {
1026 prog.clone()
1027 }
1028
1029 /// Increment an Eprog's reference count. Direct port of
1030 /// zsh/Src/parse.c:2813-2822 `useeprog`. zshrs no-op (Rust
1031 /// ownership).
1032 pub fn useeprog(_prog: &ZshProgram) {
1033 // parse.c:2815-2821 — `prog->nref++` if not heap-allocated.
1034 // zshrs no-op.
1035 }
1036
1037 /// Decrement / free an Eprog. Direct port of
1038 /// zsh/Src/parse.c:2823-2854 `freeeprog`. zshrs no-op (drop on
1039 /// scope-exit).
1040 pub fn freeeprog(_prog: ZshProgram) {
1041 // parse.c:2825-2853 — decrement nref, free if zero. zshrs
1042 // drops via Rust ownership.
1043 }
1044
1045 // ============================================================
1046 // Wordcode runtime getters (parse.c:2853-3060)
1047 //
1048 // These read packed wordcode out of a running Eprog at execution
1049 // time. zshrs's executor walks the AST directly so these are
1050 // stubs that preserve the C signatures + cite the source.
1051 // ============================================================
1052
1053 /// Read a packed string from the wordcode stream. Direct port of
1054 /// zsh/Src/parse.c:2853-2887 `ecgetstr`. C version unpacks
1055 /// 4-char inline strings + indexes into the strs table for
1056 /// longer ones. zshrs no-op (AST stores strings directly).
1057 pub fn ecgetstr(_dup: bool) -> String {
1058 // parse.c:2858-2886 — wordcode unpack logic. zshrs no-op.
1059 String::new()
1060 }
1061
1062 /// Read a packed string without consuming the wordcode pointer.
1063 /// Direct port of zsh/Src/parse.c:2890-2913 `ecrawstr`. zshrs
1064 /// no-op.
1065 pub fn ecrawstr() -> String {
1066 String::new()
1067 }
1068
1069 /// Read a NUL-terminated string array from wordcode. Direct port
1070 /// of zsh/Src/parse.c:2916-2933 `ecgetarr`. zshrs no-op.
1071 pub fn ecgetarr(_num: usize, _dup: bool) -> Vec<String> {
1072 Vec::new()
1073 }
1074
1075 /// Read a linked-list of strings from wordcode. Direct port of
1076 /// zsh/Src/parse.c:2936-2955 `ecgetlist`. zshrs no-op.
1077 pub fn ecgetlist(_num: usize, _dup: bool) -> Vec<String> {
1078 Vec::new()
1079 }
1080
1081 /// Read a sequence of redirection wordcodes. Direct port of
1082 /// zsh/Src/parse.c:2958-2991 `ecgetredirs`. zshrs no-op
1083 /// (redirections live as AST ZshRedir nodes).
1084 pub fn ecgetredirs() -> Vec<ZshRedir> {
1085 Vec::new()
1086 }
1087
1088 /// Copy consecutive redirection wordcodes into a new Eprog.
1089 /// Direct port of zsh/Src/parse.c:3001-3060 `eccopyredirs`.
1090 /// zshrs no-op.
1091 pub fn eccopyredirs() -> Option<ZshProgram> {
1092 None
1093 }
1094
1095 /// Initialize the dummy Eprog used as a placeholder. Direct port
1096 /// of zsh/Src/parse.c:3068-3075 `init_eprog`. zshrs no-op since
1097 /// the AST has no equivalent dummy node — empty programs are
1098 /// just `ZshProgram { lists: vec![] }`.
1099 pub fn init_eprog() {
1100 // parse.c:3071-3074 — set up dummy_eprog_code = WCB_END().
1101 // zshrs no-op.
1102 }
1103
1104 /// Parse the complete input
1105 pub fn parse(&mut self) -> Result<ZshProgram, Vec<ParseError>> {
1106 self.lexer.zshlex();
1107
1108 let mut program = self.parse_program_until(None);
1109
1110 if !self.errors.is_empty() {
1111 return Err(std::mem::take(&mut self.errors));
1112 }
1113 // Surface lexer-level errors (unmatched quote/heredoc/etc.)
1114 // that the parser silently rolls past. zsh aborts with a
1115 // diagnostic in this case; mirror it.
1116 if let Some(msg) = self.lexer.error.clone() {
1117 return Err(vec![ParseError {
1118 message: msg,
1119 line: 1,
1120 }]);
1121 }
1122
1123 // Post-pass: wire heredoc bodies (collected by lexer.process_heredocs)
1124 // back into ZshRedir.heredoc fields via heredoc_idx.
1125 let bodies: Vec<HereDocInfo> = self
1126 .lexer
1127 .heredocs
1128 .iter()
1129 .map(|h| HereDocInfo {
1130 content: h.content.clone(),
1131 terminator: h.terminator.clone(),
1132 quoted: h.quoted,
1133 })
1134 .collect();
1135 if !bodies.is_empty() {
1136 fill_heredoc_bodies(&mut program, &bodies);
1137 }
1138
1139 Ok(program)
1140 }
1141
1142 /// Parse a program (list of lists)
1143 /// Parse a complete program (top-level entry). Calls
1144 /// parse_program_until with no end-token sentinel. Direct port of
1145 /// zsh/Src/parse.c:614-720 `parse_event` / `parse_list` /
1146 /// `par_event` flow. C distinguishes COND_EVENT (single command
1147 /// for here-string) from full event parse; zshrs's parse_program
1148 /// is the full-event entry.
1149 fn parse_program(&mut self) -> ZshProgram {
1150 self.parse_program_until(None)
1151 }
1152
1153 /// Parse a program until we hit an end token
1154 /// Parse a program until one of `end_tokens` is seen (or EOF).
1155 /// Drives parse_list in a loop. C equivalent: the body of par_event
1156 /// (parse.c:635-695) iterating par_list against the lexer.
1157 fn parse_program_until(&mut self, end_tokens: Option<&[LexTok]>) -> ZshProgram {
1158 let mut lists = Vec::new();
1159
1160 loop {
1161 if self.check_limit() {
1162 self.error("parser exceeded global iteration limit");
1163 break;
1164 }
1165
1166 // Skip separators
1167 while self.lexer.tok == LexTok::Seper || self.lexer.tok == LexTok::Newlin {
1168 if self.check_limit() {
1169 self.error("parser exceeded global iteration limit");
1170 return ZshProgram { lists };
1171 }
1172 self.lexer.zshlex();
1173 }
1174
1175 if self.lexer.tok == LexTok::Endinput || self.lexer.tok == LexTok::Lexerr {
1176 break;
1177 }
1178
1179 // Check for end tokens
1180 if let Some(end_toks) = end_tokens {
1181 if end_toks.contains(&self.lexer.tok) {
1182 break;
1183 }
1184 }
1185
1186 // Also stop at these tokens when not explicitly looking for them
1187 // Note: Else/Elif/Then are NOT here - they're handled by parse_if
1188 // to allow nested if statements inside case arms, loops, etc.
1189 match self.lexer.tok {
1190 LexTok::Outbrace
1191 | LexTok::Dsemi
1192 | LexTok::Semiamp
1193 | LexTok::Semibar
1194 | LexTok::Done
1195 | LexTok::Fi
1196 | LexTok::Esac
1197 | LexTok::Zend => break,
1198 _ => {}
1199 }
1200
1201 match self.parse_list() {
1202 Some(list) => {
1203 let detected = simple_name_with_inoutpar(&list);
1204 lists.push(list);
1205 // Synthesize a FuncDef for the `name() { body }` shape
1206 // at parse time so body_source is captured while the
1207 // lexer still has the input. The lexer port emits
1208 // `name(` as a single Word ending in `<INPAR><OUTPAR>`,
1209 // so the Simple list is followed by an Inbrace once
1210 // separators are skipped. For `name() cmd args` the
1211 // body has already been swallowed into the same
1212 // Simple's words tail — synthesize directly from there.
1213 if let Some((names, body_argv)) = detected {
1214 if !body_argv.is_empty() {
1215 // One-line body already in the Simple. Build
1216 // a Simple from body_argv as the function body.
1217 lists.pop();
1218 let body_simple = ZshCommand::Simple(ZshSimple {
1219 assigns: Vec::new(),
1220 words: body_argv,
1221 redirs: Vec::new(),
1222 });
1223 let body_list = ZshList {
1224 sublist: ZshSublist {
1225 pipe: ZshPipe {
1226 cmd: body_simple,
1227 next: None,
1228 lineno: self.lexer.lineno,
1229 merge_stderr: false,
1230 },
1231 next: None,
1232 flags: SublistFlags::default(),
1233 },
1234 flags: ListFlags::default(),
1235 };
1236 let funcdef = ZshCommand::FuncDef(ZshFuncDef {
1237 names,
1238 body: Box::new(ZshProgram {
1239 lists: vec![body_list],
1240 }),
1241 tracing: false,
1242 auto_call_args: None,
1243 body_source: None,
1244 });
1245 let synthetic = ZshList {
1246 sublist: ZshSublist {
1247 pipe: ZshPipe {
1248 cmd: funcdef,
1249 next: None,
1250 lineno: self.lexer.lineno,
1251 merge_stderr: false,
1252 },
1253 next: None,
1254 flags: SublistFlags::default(),
1255 },
1256 flags: ListFlags::default(),
1257 };
1258 lists.push(synthetic);
1259 continue;
1260 }
1261 // Else: words.len() == 1 (only the trailing `name()`
1262 // word), brace body follows. `names` may carry
1263 // multiple identifiers from the `fna fnb fnc()`
1264 // shorthand — all share the same brace body per
1265 // src/zsh/Src/parse.c:1666 par_funcdef wordlist.
1266 // Skip separators on the real lexer; safe because
1267 // parse_program's next iteration would also skip them.
1268 while self.lexer.tok == LexTok::Seper || self.lexer.tok == LexTok::Newlin {
1269 self.lexer.zshlex();
1270 }
1271 if self.lexer.tok == LexTok::Inbrace {
1272 // Capture body_start BEFORE the lexer
1273 // advances past the first body token. The
1274 // outer zshlex() consumed `{`; lexer.pos
1275 // is now right after `{`. The next
1276 // `zshlex()` would advance past `echo`,
1277 // making body_start land mid-body and
1278 // lose the first word — `typeset -f f`
1279 // printed `a; echo b` instead of
1280 // `echo a; echo b` for `f() { echo a;
1281 // echo b }`.
1282 let body_start = self.lexer.pos;
1283 self.lexer.zshlex();
1284 let body = self.parse_program();
1285 let body_end = if self.lexer.tok == LexTok::Outbrace {
1286 self.lexer.pos.saturating_sub(1)
1287 } else {
1288 self.lexer.pos
1289 };
1290 let body_source = self
1291 .lexer
1292 .input
1293 .get(body_start..body_end)
1294 .map(|s| s.trim().to_string())
1295 .filter(|s| !s.is_empty());
1296 if self.lexer.tok == LexTok::Outbrace {
1297 self.lexer.zshlex();
1298 }
1299 // Replace the Simple list with a FuncDef list.
1300 lists.pop();
1301 let funcdef = ZshCommand::FuncDef(ZshFuncDef {
1302 names,
1303 body: Box::new(body),
1304 tracing: false,
1305 auto_call_args: None,
1306 body_source,
1307 });
1308 let synthetic = ZshList {
1309 sublist: ZshSublist {
1310 pipe: ZshPipe {
1311 cmd: funcdef,
1312 next: None,
1313 lineno: self.lexer.lineno,
1314 merge_stderr: false,
1315 },
1316 next: None,
1317 flags: SublistFlags::default(),
1318 },
1319 flags: ListFlags::default(),
1320 };
1321 lists.push(synthetic);
1322 } else if !matches!(
1323 self.lexer.tok,
1324 LexTok::Endinput | LexTok::Outbrace | LexTok::Seper | LexTok::Newlin
1325 ) {
1326 // No-brace one-line body: `foo() echo hello`.
1327 // Parse a single command for the body.
1328 let body_cmd = self.parse_cmd();
1329 if let Some(cmd) = body_cmd {
1330 let body_list = ZshList {
1331 sublist: ZshSublist {
1332 pipe: ZshPipe {
1333 cmd,
1334 next: None,
1335 lineno: self.lexer.lineno,
1336 merge_stderr: false,
1337 },
1338 next: None,
1339 flags: SublistFlags::default(),
1340 },
1341 flags: ListFlags::default(),
1342 };
1343 lists.pop();
1344 let funcdef = ZshCommand::FuncDef(ZshFuncDef {
1345 names: names.clone(),
1346 body: Box::new(ZshProgram {
1347 lists: vec![body_list],
1348 }),
1349 tracing: false,
1350 auto_call_args: None,
1351 body_source: None,
1352 });
1353 let synthetic = ZshList {
1354 sublist: ZshSublist {
1355 pipe: ZshPipe {
1356 cmd: funcdef,
1357 next: None,
1358 lineno: self.lexer.lineno,
1359 merge_stderr: false,
1360 },
1361 next: None,
1362 flags: SublistFlags::default(),
1363 },
1364 flags: ListFlags::default(),
1365 };
1366 lists.push(synthetic);
1367 }
1368 }
1369 }
1370 }
1371 None => break,
1372 }
1373 }
1374
1375 ZshProgram { lists }
1376 }
1377
1378 /// Parse a list (sublist with optional & or ;).
1379 ///
1380 /// Direct port of zsh/Src/parse.c:771-804 `par_list` (and the
1381 /// par_list1 wrapper at parse.c:807-817).
1382 ///
1383 /// **Structural divergence**: zsh's parse.c emits flat wordcode
1384 /// into the `ecbuf` u32 array via `ecadd(0)` (placeholder),
1385 /// `set_list_code(p, code, complexity)`, `wc_bdata(Z_END)`. zshrs
1386 /// builds an AST node `ZshList { sublist, flags }` instead. The
1387 /// async/sync/disown discrimination at parse.c:785-790 maps to
1388 /// zshrs's `ListFlags { async_, disown }` field — Z_SYNC is the
1389 /// default (no flags), Z_ASYNC = `&` = `async_=true`, Z_DISOWN +
1390 /// Z_ASYNC = `&!`/`&|` = both true. Same semantics, different
1391 /// representation. This divergence is repository-wide: every
1392 /// `par_*` function emits wordcode in C, every `parse_*` builds
1393 /// AST in Rust. The compile_zsh module then traverses the AST to
1394 /// emit fusevm bytecode, which serves the same role as zsh's
1395 /// wordcode but with a different opcode set and execution model.
1396 fn parse_list(&mut self) -> Option<ZshList> {
1397 let sublist = self.parse_sublist()?;
1398
1399 let flags = match self.lexer.tok {
1400 LexTok::Amper => {
1401 self.lexer.zshlex();
1402 ListFlags {
1403 async_: true,
1404 disown: false,
1405 }
1406 }
1407 LexTok::Amperbang => {
1408 self.lexer.zshlex();
1409 ListFlags {
1410 async_: true,
1411 disown: true,
1412 }
1413 }
1414 LexTok::Seper | LexTok::Semi | LexTok::Newlin => {
1415 self.lexer.zshlex();
1416 ListFlags::default()
1417 }
1418 _ => ListFlags::default(),
1419 };
1420
1421 Some(ZshList { sublist, flags })
1422 }
1423
1424 /// Parse a sublist (pipelines connected by && or ||).
1425 ///
1426 /// Direct port of zsh/Src/parse.c:825-867 `par_sublist` and
1427 /// par_sublist2 at parse.c:869-892. par_sublist handles the
1428 /// && / || conjunction and emits WC_SUBLIST opcodes; par_sublist2
1429 /// handles the leading `!` negation and `coproc` keyword.
1430 ///
1431 /// AST mapping: ZshSublist { pipe, conj_chain }, where `conj_chain`
1432 /// is a Vec<(ConjOp, ZshSublist)> for chained && / ||. C uses
1433 /// flat wordcode with WC_SUBLIST_AND / WC_SUBLIST_OR markers.
1434 fn parse_sublist(&mut self) -> Option<ZshSublist> {
1435 self.recursion_depth += 1;
1436 if self.check_recursion() {
1437 self.error("parse_sublist: max recursion depth exceeded");
1438 self.recursion_depth -= 1;
1439 return None;
1440 }
1441
1442 let mut flags = SublistFlags::default();
1443
1444 // Handle coproc and !
1445 if self.lexer.tok == LexTok::Coproc {
1446 flags.coproc = true;
1447 self.lexer.zshlex();
1448 } else if self.lexer.tok == LexTok::Bang {
1449 flags.not = true;
1450 self.lexer.zshlex();
1451 }
1452
1453 let pipe = match self.parse_pipe() {
1454 Some(p) => p,
1455 None => {
1456 self.recursion_depth -= 1;
1457 return None;
1458 }
1459 };
1460
1461 // Check for && or ||
1462 let next = match self.lexer.tok {
1463 LexTok::Damper => {
1464 self.lexer.zshlex();
1465 self.skip_separators();
1466 self.parse_sublist().map(|s| (SublistOp::And, Box::new(s)))
1467 }
1468 LexTok::Dbar => {
1469 self.lexer.zshlex();
1470 self.skip_separators();
1471 self.parse_sublist().map(|s| (SublistOp::Or, Box::new(s)))
1472 }
1473 _ => None,
1474 };
1475
1476 self.recursion_depth -= 1;
1477 Some(ZshSublist { pipe, next, flags })
1478 }
1479
1480 /// Parse a pipeline
1481 /// Parse a pipeline (cmds joined by `|` / `|&`). Direct port of
1482 /// zsh/Src/parse.c:894-956 `par_pline`. AST: ZshPipe { cmds: Vec<ZshCommand> }.
1483 /// C emits WC_PIPE wordcodes per command; same flow.
1484 fn parse_pipe(&mut self) -> Option<ZshPipe> {
1485 self.recursion_depth += 1;
1486 if self.check_recursion() {
1487 self.error("parse_pipe: max recursion depth exceeded");
1488 self.recursion_depth -= 1;
1489 return None;
1490 }
1491
1492 let lineno = self.lexer.toklineno;
1493 let cmd = match self.parse_cmd() {
1494 Some(c) => c,
1495 None => {
1496 self.recursion_depth -= 1;
1497 return None;
1498 }
1499 };
1500
1501 // Check for | or |&
1502 let mut merge_stderr = false;
1503 let next = match self.lexer.tok {
1504 LexTok::Bar | LexTok::Baramp => {
1505 merge_stderr = self.lexer.tok == LexTok::Baramp;
1506 self.lexer.zshlex();
1507 self.skip_separators();
1508 self.parse_pipe().map(Box::new)
1509 }
1510 _ => None,
1511 };
1512
1513 self.recursion_depth -= 1;
1514 Some(ZshPipe {
1515 cmd,
1516 next,
1517 lineno,
1518 merge_stderr,
1519 })
1520 }
1521
1522 /// Parse a command
1523 /// Parse a command — dispatches by leading token (FOR / CASE /
1524 /// IF / WHILE / UNTIL / REPEAT / FUNC / DINBRACK / DINPAR /
1525 /// INPAR subshell / INBRACE current-shell / TIME / NOCORRECT,
1526 /// else simple). Direct port of zsh/Src/parse.c:958-1085 `par_cmd`.
1527 fn parse_cmd(&mut self) -> Option<ZshCommand> {
1528 // Parse leading redirections
1529 let mut redirs = Vec::new();
1530 while self.lexer.tok.is_redirop() {
1531 if let Some(redir) = self.parse_redir() {
1532 redirs.push(redir);
1533 }
1534 }
1535
1536 let cmd = match self.lexer.tok {
1537 LexTok::For | LexTok::Foreach => self.parse_for(),
1538 LexTok::Select => self.parse_select(),
1539 LexTok::Case => self.parse_case(),
1540 LexTok::If => self.parse_if(),
1541 LexTok::While => self.parse_while(false),
1542 LexTok::Until => self.parse_while(true),
1543 LexTok::Repeat => self.parse_repeat(),
1544 LexTok::Inpar => self.parse_subsh(),
1545 LexTok::Inoutpar => self.parse_anon_funcdef(),
1546 LexTok::Inbrace => self.parse_cursh(),
1547 LexTok::Func => self.parse_funcdef(),
1548 LexTok::Dinbrack => self.parse_cond(),
1549 LexTok::Dinpar => self.parse_arith(),
1550 LexTok::Time => self.parse_time(),
1551 _ => self.parse_simple(redirs),
1552 };
1553
1554 // Parse trailing redirections. For Simple commands the redirs were
1555 // already captured inside parse_simple; for compound forms (Cursh,
1556 // Subsh, If, While, etc.) we collect them here and wrap in
1557 // ZshCommand::Redirected so compile_zsh can scope-bracket them.
1558 if let Some(inner) = cmd {
1559 let mut trailing: Vec<ZshRedir> = Vec::new();
1560 while self.lexer.tok.is_redirop() {
1561 if let Some(redir) = self.parse_redir() {
1562 trailing.push(redir);
1563 }
1564 }
1565 if trailing.is_empty() {
1566 return Some(inner);
1567 }
1568 // Simple already absorbed its own redirs (compile path expects
1569 // them on ZshSimple), so don't double-wrap.
1570 if matches!(inner, ZshCommand::Simple(_)) {
1571 if let ZshCommand::Simple(mut s) = inner {
1572 s.redirs.extend(trailing);
1573 return Some(ZshCommand::Simple(s));
1574 }
1575 unreachable!()
1576 }
1577 return Some(ZshCommand::Redirected(Box::new(inner), trailing));
1578 }
1579
1580 None
1581 }
1582
1583 /// Parse a simple command
1584 /// Parse a simple command (assignments + words + redirections).
1585 /// Direct port of zsh/Src/parse.c:1836-2228 `par_simple` —
1586 /// the largest single function in parse.c. Handles ENVSTRING/
1587 /// ENVARRAY assignments at command head, intermixed redirs,
1588 /// typeset-style multi-assignment commands, and the trailing
1589 /// inout-par `()` that converts a simple command into an inline
1590 /// function definition.
1591 fn parse_simple(&mut self, mut redirs: Vec<ZshRedir>) -> Option<ZshCommand> {
1592 let mut assigns = Vec::new();
1593 let mut words = Vec::new();
1594 const MAX_ITERATIONS: usize = 10_000;
1595 let mut iterations = 0;
1596
1597 // Parse leading assignments
1598 while self.lexer.tok == LexTok::Envstring || self.lexer.tok == LexTok::Envarray {
1599 iterations += 1;
1600 if iterations > MAX_ITERATIONS {
1601 self.error("parse_simple: exceeded max iterations in assignments");
1602 return None;
1603 }
1604 if let Some(assign) = self.parse_assign() {
1605 assigns.push(assign);
1606 }
1607 self.lexer.zshlex();
1608 }
1609
1610 // Parse words and redirections
1611 loop {
1612 iterations += 1;
1613 if iterations > MAX_ITERATIONS {
1614 self.error("parse_simple: exceeded max iterations");
1615 return None;
1616 }
1617 match self.lexer.tok {
1618 LexTok::String | LexTok::Typeset => {
1619 let s = self.lexer.tokstr.clone();
1620 if let Some(s) = s {
1621 words.push(s);
1622 }
1623 self.lexer.zshlex();
1624 // Check for function definition foo() { ... }
1625 if words.len() == 1 && self.peek_inoutpar() {
1626 return self.parse_inline_funcdef(words.pop().unwrap());
1627 }
1628 // `{name}>file` named-fd redirect: the lexer doesn't
1629 // recognize this shape, so the bare word `{name}`
1630 // arrives as a String. If it matches `{IDENT}` and
1631 // the NEXT token is a redirop, pop it off as the
1632 // varid for that redir.
1633 if !words.is_empty() && self.lexer.tok.is_redirop() {
1634 let last = words.last().unwrap();
1635 let untoked = crate::lexer::untokenize(last);
1636 if untoked.starts_with('{') && untoked.ends_with('}') && untoked.len() > 2 {
1637 let name = &untoked[1..untoked.len() - 1];
1638 if !name.is_empty()
1639 && name.chars().all(|c| c == '_' || c.is_ascii_alphanumeric())
1640 && name
1641 .chars()
1642 .next()
1643 .map(|c| c == '_' || c.is_ascii_alphabetic())
1644 .unwrap_or(false)
1645 {
1646 let varid = name.to_string();
1647 words.pop();
1648 if let Some(mut redir) = self.parse_redir() {
1649 redir.varid = Some(varid);
1650 redirs.push(redir);
1651 }
1652 continue;
1653 }
1654 }
1655 }
1656 }
1657 _ if self.lexer.tok.is_redirop() => {
1658 match self.parse_redir() {
1659 Some(redir) => redirs.push(redir),
1660 None => break, // Error in redir parsing, stop
1661 }
1662 }
1663 LexTok::Inoutpar if !words.is_empty() => {
1664 // foo() { ... } style function
1665 return self.parse_inline_funcdef(words.pop().unwrap());
1666 }
1667 _ => break,
1668 }
1669 }
1670
1671 if assigns.is_empty() && words.is_empty() && redirs.is_empty() {
1672 return None;
1673 }
1674
1675 Some(ZshCommand::Simple(ZshSimple {
1676 assigns,
1677 words,
1678 redirs,
1679 }))
1680 }
1681
1682 /// Parse an assignment
1683 /// Parse an assignment word `NAME=value` or `NAME=(arr items)`.
1684 /// Sub-routine of parse_simple. The C source handles assignments
1685 /// inline in par_simple via the ENVSTRING/ENVARRAY token paths
1686 /// (parse.c:1842-2000ish); zshrs splits it out to a dedicated
1687 /// helper for clarity.
1688 fn parse_assign(&mut self) -> Option<ZshAssign> {
1689 use crate::tokens::char_tokens;
1690
1691 let tokstr = self.lexer.tokstr.as_ref()?;
1692
1693 // Parse name=value or name+=value.
1694 let (name, value_str, append) = if self.lexer.tok == LexTok::Envarray {
1695 let (name, append) = if let Some(stripped) = tokstr.strip_suffix('+') {
1696 (stripped, true)
1697 } else {
1698 (tokstr.as_str(), false)
1699 };
1700 (name.to_string(), String::new(), append)
1701 } else if let Some(pos) = tokstr.find(char_tokens::EQUALS) {
1702 let name_part = &tokstr[..pos];
1703 let (name, append) = if name_part.ends_with('+') {
1704 (&name_part[..name_part.len() - 1], true)
1705 } else {
1706 (name_part, false)
1707 };
1708 (
1709 name.to_string(),
1710 tokstr[pos + char_tokens::EQUALS.len_utf8()..].to_string(),
1711 append,
1712 )
1713 } else if let Some(pos) = tokstr.find('=') {
1714 // Fallback to literal '=' for compatibility
1715 let name_part = &tokstr[..pos];
1716 let (name, append) = if name_part.ends_with('+') {
1717 (&name_part[..name_part.len() - 1], true)
1718 } else {
1719 (name_part, false)
1720 };
1721 (name.to_string(), tokstr[pos + 1..].to_string(), append)
1722 } else {
1723 return None;
1724 };
1725
1726 let value = if self.lexer.tok == LexTok::Envarray {
1727 // Array assignment: name=(...)
1728 let mut elements = Vec::new();
1729 self.lexer.zshlex(); // skip past token
1730
1731 let mut arr_iters = 0;
1732 const MAX_ARRAY_ELEMENTS: usize = 10_000;
1733 while matches!(
1734 self.lexer.tok,
1735 LexTok::String | LexTok::Seper | LexTok::Newlin
1736 ) {
1737 arr_iters += 1;
1738 if arr_iters > MAX_ARRAY_ELEMENTS {
1739 self.error("array assignment exceeded maximum elements");
1740 break;
1741 }
1742 if self.lexer.tok == LexTok::String {
1743 if let Some(ref s) = self.lexer.tokstr {
1744 elements.push(s.clone());
1745 }
1746 }
1747 self.lexer.zshlex();
1748 }
1749
1750 // The closing OUTPAR is consumed here. The outer parse_simple
1751 // loop will then `zshlex()` past whatever follows (typically
1752 // a separator or the next word) — calling zshlex twice in
1753 // tandem (here AND in parse_simple) over-advances and merges
1754 // a following `name() { … }` funcdef into the same Simple.
1755 // We only consume Outpar; let the caller handle the rest.
1756 // Without this guard `g=(o1); f() { :; }` parsed as one
1757 // Simple with assigns=[g] and words=["f()"] (one token).
1758 if self.lexer.tok == LexTok::Outpar {
1759 // Note: do NOT zshlex() here. parse_simple's `self.lexer
1760 // .zshlex()` after `parse_assign` returns advances past
1761 // the Outpar onto the next significant token.
1762 //
1763 // Force `incmdpos=true` so the next zshlex() recognizes
1764 // a follow-up `b=(...)` / `b=val` as Envarray/Envstring.
1765 // The lexer flips incmdpos to false on bare Outpar (which
1766 // is correct for subshell-close context), but for an
1767 // array-assignment close more assigns/words may follow.
1768 self.lexer.incmdpos = true;
1769 }
1770
1771 ZshAssignValue::Array(elements)
1772 } else {
1773 ZshAssignValue::Scalar(value_str)
1774 };
1775
1776 Some(ZshAssign {
1777 name,
1778 value,
1779 append,
1780 })
1781 }
1782
1783 /// Parse a redirection
1784 /// Parse a redirection (>file, <file, >>file, <<HEREDOC, etc.).
1785 /// Direct port of zsh/Src/parse.c:2229-2346 `par_redir`. Returns
1786 /// a ZshRedir node carrying the operator type, fd, target word
1787 /// (or here-doc body / pipe-redir command), and any `{var}` style
1788 /// fd-binding parameter.
1789 fn parse_redir(&mut self) -> Option<ZshRedir> {
1790 let rtype = match self.lexer.tok {
1791 LexTok::Outang => RedirType::Write,
1792 LexTok::Outangbang => RedirType::Writenow,
1793 LexTok::Doutang => RedirType::Append,
1794 LexTok::Doutangbang => RedirType::Appendnow,
1795 LexTok::Inang => RedirType::Read,
1796 LexTok::Inoutang => RedirType::ReadWrite,
1797 LexTok::Dinang => RedirType::Heredoc,
1798 LexTok::Dinangdash => RedirType::HeredocDash,
1799 LexTok::Trinang => RedirType::Herestr,
1800 LexTok::Inangamp => RedirType::MergeIn,
1801 LexTok::Outangamp => RedirType::MergeOut,
1802 LexTok::Ampoutang => RedirType::ErrWrite,
1803 LexTok::Outangampbang => RedirType::ErrWritenow,
1804 LexTok::Doutangamp => RedirType::ErrAppend,
1805 LexTok::Doutangampbang => RedirType::ErrAppendnow,
1806 _ => return None,
1807 };
1808
1809 let fd = if self.lexer.tokfd >= 0 {
1810 self.lexer.tokfd
1811 } else if matches!(
1812 rtype,
1813 RedirType::Read
1814 | RedirType::ReadWrite
1815 | RedirType::MergeIn
1816 | RedirType::Heredoc
1817 | RedirType::HeredocDash
1818 | RedirType::Herestr
1819 ) {
1820 0
1821 } else {
1822 1
1823 };
1824
1825 self.lexer.zshlex();
1826
1827 let name = match self.lexer.tok {
1828 LexTok::String | LexTok::Envstring => {
1829 let n = self.lexer.tokstr.clone().unwrap_or_default();
1830 self.lexer.zshlex();
1831 n
1832 }
1833 _ => {
1834 self.error("expected word after redirection");
1835 return None;
1836 }
1837 };
1838
1839 // Heredoc body capture: when reading the terminator above, the
1840 // lexer pushed a HereDoc to self.lexer.heredocs[]. Record the
1841 // index so fill_heredoc_bodies() can wire content back after
1842 // process_heredocs() has run.
1843 let heredoc_idx = if matches!(rtype, RedirType::Heredoc | RedirType::HeredocDash) {
1844 if !self.lexer.heredocs.is_empty() {
1845 Some(self.lexer.heredocs.len() - 1)
1846 } else {
1847 None
1848 }
1849 } else {
1850 None
1851 };
1852
1853 Some(ZshRedir {
1854 rtype,
1855 fd,
1856 name,
1857 heredoc: None,
1858 varid: None,
1859 heredoc_idx,
1860 })
1861 }
1862
1863 /// Parse for/foreach loop
1864 /// Parse `for NAME in WORDS; do BODY; done` (foreach style) AND
1865 /// `for ((init; cond; incr)) do BODY done` (c-style). Direct port
1866 /// of zsh/Src/parse.c:1087-1207 `par_for`. parse_for_cstyle is the
1867 /// inner branch for the `((...))` arithmetic-header variant
1868 /// (parse.c:1100-1140 inside par_for).
1869 fn parse_for(&mut self) -> Option<ZshCommand> {
1870 let is_foreach = self.lexer.tok == LexTok::Foreach;
1871 self.lexer.zshlex();
1872
1873 // Check for C-style: for (( init; cond; step ))
1874 if self.lexer.tok == LexTok::Dinpar {
1875 return self.parse_for_cstyle();
1876 }
1877
1878 // Get variable name(s). zsh parse.c par_for accepts multiple
1879 // identifier tokens before `in`/`(`/newline — `for k v in ...`
1880 // assigns each iteration's pair of values to k and v in turn.
1881 // We store the names space-joined since variable identifiers
1882 // can't contain whitespace.
1883 let mut names: Vec<String> = Vec::new();
1884 loop {
1885 match self.lexer.tok {
1886 LexTok::String => {
1887 let v = self.lexer.tokstr.clone().unwrap_or_default();
1888 if v == "in" {
1889 break;
1890 }
1891 names.push(v);
1892 self.lexer.zshlex();
1893 }
1894 _ => break,
1895 }
1896 }
1897 if names.is_empty() {
1898 self.error("expected variable name in for");
1899 return None;
1900 }
1901 let var = names.join(" ");
1902
1903 // Skip newlines
1904 self.skip_separators();
1905
1906 // Get list. The lexer-port quirk: `for x (a b c)` arrives as a
1907 // single String token with the parens lexed-as-content
1908 // (`<INPAR>a b c<OUTPAR>`) instead of as separate Inpar/String/
1909 // Outpar tokens. Detect that shape and split it manually.
1910 let list = if self.lexer.tok == LexTok::String
1911 && self
1912 .lexer
1913 .tokstr
1914 .as_ref()
1915 .map(|s| s.starts_with('\u{88}') && s.ends_with('\u{8a}'))
1916 .unwrap_or(false)
1917 {
1918 let raw = self.lexer.tokstr.clone().unwrap_or_default();
1919 // Strip leading INPAR + trailing OUTPAR, then untokenize the
1920 // inner content and split on whitespace for the word list.
1921 let inner = &raw[raw.char_indices().nth(1).map(|(i, _)| i).unwrap_or(0)
1922 ..raw
1923 .char_indices()
1924 .last()
1925 .map(|(i, _)| i)
1926 .unwrap_or(raw.len())];
1927 let cleaned = crate::lexer::untokenize(inner);
1928 let words: Vec<String> = cleaned.split_whitespace().map(|s| s.to_string()).collect();
1929 self.lexer.zshlex();
1930 ForList::Words(words)
1931 } else if self.lexer.tok == LexTok::String {
1932 let s = self.lexer.tokstr.as_ref();
1933 if s.map(|s| s == "in").unwrap_or(false) {
1934 self.lexer.zshlex();
1935 let mut words = Vec::new();
1936 let mut word_count = 0;
1937 while self.lexer.tok == LexTok::String {
1938 word_count += 1;
1939 if word_count > 500 || self.check_limit() {
1940 self.error("for: too many words");
1941 return None;
1942 }
1943 if let Some(ref s) = self.lexer.tokstr {
1944 words.push(s.clone());
1945 }
1946 self.lexer.zshlex();
1947 }
1948 ForList::Words(words)
1949 } else {
1950 ForList::Positional
1951 }
1952 } else if self.lexer.tok == LexTok::Inpar {
1953 // for var (...)
1954 self.lexer.zshlex();
1955 let mut words = Vec::new();
1956 let mut word_count = 0;
1957 while self.lexer.tok == LexTok::String || self.lexer.tok == LexTok::Seper {
1958 word_count += 1;
1959 if word_count > 500 || self.check_limit() {
1960 self.error("for: too many words in parens");
1961 return None;
1962 }
1963 if self.lexer.tok == LexTok::String {
1964 if let Some(ref s) = self.lexer.tokstr {
1965 words.push(s.clone());
1966 }
1967 }
1968 self.lexer.zshlex();
1969 }
1970 if self.lexer.tok == LexTok::Outpar {
1971 self.lexer.zshlex();
1972 }
1973 ForList::Words(words)
1974 } else {
1975 ForList::Positional
1976 };
1977
1978 // Skip to body
1979 self.skip_separators();
1980
1981 // Parse body
1982 let body = self.parse_loop_body(is_foreach)?;
1983
1984 Some(ZshCommand::For(ZshFor {
1985 var,
1986 list,
1987 body: Box::new(body),
1988 is_select: false,
1989 }))
1990 }
1991
1992 /// Parse C-style for loop: for (( init; cond; step ))
1993 /// Parse the c-style `for ((init; cond; incr)) do BODY done`.
1994 /// Inner branch of zsh/Src/parse.c:1100-1140 inside par_for.
1995 /// Recognized when the token after FOR is DINPAR (the `((`
1996 /// detected by gettok via dbparens setup).
1997 fn parse_for_cstyle(&mut self) -> Option<ZshCommand> {
1998 // We're at (( (Dinpar None) - the opening ((
1999 // Lexer returns:
2000 // Dinpar None - opening ((
2001 // Dinpar "init" - init expression, semicolon consumed
2002 // Dinpar "cond" - cond expression, semicolon consumed
2003 // Doutpar "step" - step expression, closing )) consumed
2004
2005 self.lexer.zshlex(); // Get init: Dinpar "i=0"
2006
2007 if self.lexer.tok != LexTok::Dinpar {
2008 self.error("expected init expression in for ((");
2009 return None;
2010 }
2011 let init = self.lexer.tokstr.clone().unwrap_or_default();
2012
2013 self.lexer.zshlex(); // Get cond: Dinpar "i<10"
2014
2015 if self.lexer.tok != LexTok::Dinpar {
2016 self.error("expected condition in for ((");
2017 return None;
2018 }
2019 let cond = self.lexer.tokstr.clone().unwrap_or_default();
2020
2021 self.lexer.zshlex(); // Get step: Doutpar "i++"
2022
2023 if self.lexer.tok != LexTok::Doutpar {
2024 self.error("expected )) in for");
2025 return None;
2026 }
2027 let step = self.lexer.tokstr.clone().unwrap_or_default();
2028
2029 self.lexer.zshlex(); // Move past ))
2030
2031 self.skip_separators();
2032 let body = self.parse_loop_body(false)?;
2033
2034 Some(ZshCommand::For(ZshFor {
2035 var: String::new(),
2036 list: ForList::CStyle { init, cond, step },
2037 body: Box::new(body),
2038 is_select: false,
2039 }))
2040 }
2041
2042 /// Parse select loop (same syntax as for)
2043 /// Parse `select NAME in WORDS; do BODY; done`. Same shape as
2044 /// `for NAME in WORDS; do ...` but with menu-prompt semantics in
2045 /// the executor. C equivalent: the SELECT case in par_for at
2046 /// parse.c:1087-1207 (selects share parser flow with foreach).
2047 fn parse_select(&mut self) -> Option<ZshCommand> {
2048 // `select` shares parse_for's grammar (var, words, body) but the
2049 // compile path is different (interactive prompt loop).
2050 match self.parse_for()? {
2051 ZshCommand::For(mut f) => {
2052 f.is_select = true;
2053 Some(ZshCommand::For(f))
2054 }
2055 other => Some(other),
2056 }
2057 }
2058
2059 /// Parse case statement
2060 /// Parse `case WORD in PATTERN) BODY ;; ... esac`. Direct port
2061 /// of zsh/Src/parse.c:1209-1409 `par_case`. Each case arm is a
2062 /// (pattern_list, body, terminator) tuple where terminator is
2063 /// `;;` (default), `;&` (fallthrough), or `;|` (continue testing).
2064 fn parse_case(&mut self) -> Option<ZshCommand> {
2065 self.lexer.zshlex(); // skip 'case'
2066
2067 let word = match self.lexer.tok {
2068 LexTok::String => {
2069 let w = self.lexer.tokstr.clone().unwrap_or_default();
2070 self.lexer.zshlex();
2071 w
2072 }
2073 _ => {
2074 self.error("expected word after case");
2075 return None;
2076 }
2077 };
2078
2079 self.skip_separators();
2080
2081 // Expect 'in' or {
2082 let use_brace = self.lexer.tok == LexTok::Inbrace;
2083 if self.lexer.tok == LexTok::String {
2084 let s = self.lexer.tokstr.as_ref();
2085 if s.map(|s| s != "in").unwrap_or(true) {
2086 self.error("expected 'in' in case");
2087 return None;
2088 }
2089 } else if !use_brace {
2090 self.error("expected 'in' or '{' in case");
2091 return None;
2092 }
2093 // Set incasepat=1 BEFORE consuming "in" so the next token (which
2094 // could be a leading `(` of a paren-prefixed pattern like
2095 // `case foo in (a|b) …`) is lexed as Inpar, not as a glob-token.
2096 // Without this the `(` got swallowed into a gettokstr('(', false)
2097 // call and produced a String like "(foo)" — the parser then saw
2098 // the `)` inside a string instead of as a separate Outpar.
2099 self.lexer.incasepat = 1;
2100 self.lexer.zshlex();
2101
2102 let mut arms = Vec::new();
2103 const MAX_ARMS: usize = 10_000;
2104
2105 loop {
2106 if arms.len() > MAX_ARMS {
2107 self.error("parse_case: too many arms");
2108 break;
2109 }
2110
2111 // Set incasepat BEFORE skipping separators so lexer knows we're in case pattern context
2112 // This affects how [ and | are lexed
2113 self.lexer.incasepat = 1;
2114
2115 self.skip_separators();
2116
2117 // Check for end
2118 // Note: 'esac' might be String "esac" if incasepat > 0 prevents reserved word recognition
2119 let is_esac = self.lexer.tok == LexTok::Esac
2120 || (self.lexer.tok == LexTok::String
2121 && self
2122 .lexer
2123 .tokstr
2124 .as_ref()
2125 .map(|s| s == "esac")
2126 .unwrap_or(false));
2127 if (use_brace && self.lexer.tok == LexTok::Outbrace) || (!use_brace && is_esac) {
2128 self.lexer.incasepat = 0;
2129 self.lexer.zshlex();
2130 break;
2131 }
2132
2133 // Also break on EOF
2134 if self.lexer.tok == LexTok::Endinput || self.lexer.tok == LexTok::Lexerr {
2135 self.lexer.incasepat = 0;
2136 break;
2137 }
2138
2139 // Skip optional `(`. zsh's case grammar: `case W in (P)…)`.
2140 // The leading `(` is paired with a matching `)` that closes
2141 // the pattern itself; the arm-close `)` follows separately.
2142 // Track whether we consumed it so we can skip the matching
2143 // `)` after pattern parsing — otherwise the arm-close would
2144 // be interpreted as the pattern-close and the actual body
2145 // would get the leftover `)`.
2146 let had_leading_paren = self.lexer.tok == LexTok::Inpar;
2147 if had_leading_paren {
2148 self.lexer.zshlex();
2149 }
2150
2151 // incasepat is already set above
2152 let mut patterns = Vec::new();
2153 let mut pattern_iterations = 0;
2154 loop {
2155 pattern_iterations += 1;
2156 if pattern_iterations > 1000 {
2157 self.error("parse_case: too many pattern iterations");
2158 self.lexer.incasepat = 0;
2159 return None;
2160 }
2161
2162 if self.lexer.tok == LexTok::String {
2163 let s = self.lexer.tokstr.as_ref();
2164 if s.map(|s| s == "esac").unwrap_or(false) {
2165 break;
2166 }
2167 patterns.push(self.lexer.tokstr.clone().unwrap_or_default());
2168 // After first pattern token, set incasepat=2 so ( is treated as part of pattern
2169 self.lexer.incasepat = 2;
2170 self.lexer.zshlex();
2171 } else if self.lexer.tok != LexTok::Bar {
2172 break;
2173 }
2174
2175 if self.lexer.tok == LexTok::Bar {
2176 // Reset to 1 (start of next alternative pattern)
2177 self.lexer.incasepat = 1;
2178 self.lexer.zshlex();
2179 } else {
2180 break;
2181 }
2182 }
2183 self.lexer.incasepat = 0;
2184
2185 // Expect ). Also handle the `(P))` wrapped-pattern form:
2186 // when a leading `(` was consumed, accept an extra `)` —
2187 // the inner `)` closes the optional-paren wrapper, the
2188 // outer `)` is the arm-close. zsh accepts BOTH `(P) BODY`
2189 // (bare pattern, leading-paren is just the opt-marker, the
2190 // close is arm-close) and `(P)) BODY` (paren-wrapped
2191 // pattern, then arm-close). The first form is unambiguous
2192 // when the bare pattern was simple; the second is needed
2193 // when the body starts with `(`.
2194 if self.lexer.tok != LexTok::Outpar {
2195 self.error("expected ')' in case pattern");
2196 return None;
2197 }
2198 self.lexer.zshlex();
2199 if had_leading_paren && self.lexer.tok == LexTok::Outpar {
2200 self.lexer.zshlex();
2201 }
2202
2203 // Parse body
2204 let body = self.parse_program();
2205
2206 // Get terminator. Set incasepat=1 BEFORE the zshlex
2207 // advance so the next token (the next arm's pattern, like
2208 // `[a-z]`) gets tokenized in pattern context. Without
2209 // this, a `[`-prefixed pattern after the FIRST arm became
2210 // Inbrack instead of String and the pattern-loop bailed
2211 // out with "expected ')' in case pattern".
2212 let terminator = match self.lexer.tok {
2213 LexTok::Dsemi => {
2214 self.lexer.incasepat = 1;
2215 self.lexer.zshlex();
2216 CaseTerm::Break
2217 }
2218 LexTok::Semiamp => {
2219 self.lexer.incasepat = 1;
2220 self.lexer.zshlex();
2221 CaseTerm::Continue
2222 }
2223 LexTok::Semibar => {
2224 self.lexer.incasepat = 1;
2225 self.lexer.zshlex();
2226 CaseTerm::TestNext
2227 }
2228 _ => CaseTerm::Break,
2229 };
2230
2231 if !patterns.is_empty() {
2232 arms.push(CaseArm {
2233 patterns,
2234 body,
2235 terminator,
2236 });
2237 }
2238 }
2239
2240 Some(ZshCommand::Case(ZshCase { word, arms }))
2241 }
2242
2243 /// Parse if statement
2244 /// Parse `if COND; then BODY; [elif COND; then BODY;]* [else BODY;] fi`.
2245 /// Direct port of zsh/Src/parse.c:1411-1519 `par_if`. The C source
2246 /// emits WC_IF wordcodes per arm; zshrs builds an AST chain of
2247 /// (cond, then_body) tuples plus an optional else_body.
2248 fn parse_if(&mut self) -> Option<ZshCommand> {
2249 self.lexer.zshlex(); // skip 'if'
2250
2251 // Parse condition - stops at 'then' or '{' (zsh allows { instead of then)
2252 let cond = Box::new(self.parse_program_until(Some(&[LexTok::Then, LexTok::Inbrace])));
2253
2254 self.skip_separators();
2255
2256 // Expect 'then' or {
2257 let use_brace = self.lexer.tok == LexTok::Inbrace;
2258 if self.lexer.tok != LexTok::Then && !use_brace {
2259 self.error("expected 'then' or '{' after if condition");
2260 return None;
2261 }
2262 self.lexer.zshlex();
2263
2264 // Parse then-body - stops at else/elif/fi, or } if using brace syntax
2265 let then = if use_brace {
2266 let body = self.parse_program_until(Some(&[LexTok::Outbrace]));
2267 if self.lexer.tok == LexTok::Outbrace {
2268 self.lexer.zshlex();
2269 }
2270 Box::new(body)
2271 } else {
2272 Box::new(self.parse_program_until(Some(&[LexTok::Else, LexTok::Elif, LexTok::Fi])))
2273 };
2274
2275 // Parse elif and else (only for then/fi syntax, not brace syntax)
2276 let mut elif = Vec::new();
2277 let mut else_ = None;
2278
2279 if !use_brace {
2280 loop {
2281 self.skip_separators();
2282
2283 match self.lexer.tok {
2284 LexTok::Elif => {
2285 self.lexer.zshlex();
2286 // elif condition stops at 'then' or '{'
2287 let econd =
2288 self.parse_program_until(Some(&[LexTok::Then, LexTok::Inbrace]));
2289 self.skip_separators();
2290
2291 let elif_use_brace = self.lexer.tok == LexTok::Inbrace;
2292 if self.lexer.tok != LexTok::Then && !elif_use_brace {
2293 self.error("expected 'then' after elif");
2294 return None;
2295 }
2296 self.lexer.zshlex();
2297
2298 // elif body stops at else/elif/fi or } if using braces
2299 let ebody = if elif_use_brace {
2300 let body = self.parse_program_until(Some(&[LexTok::Outbrace]));
2301 if self.lexer.tok == LexTok::Outbrace {
2302 self.lexer.zshlex();
2303 }
2304 body
2305 } else {
2306 self.parse_program_until(Some(&[
2307 LexTok::Else,
2308 LexTok::Elif,
2309 LexTok::Fi,
2310 ]))
2311 };
2312
2313 elif.push((econd, ebody));
2314 }
2315 LexTok::Else => {
2316 self.lexer.zshlex();
2317 self.skip_separators();
2318
2319 let else_use_brace = self.lexer.tok == LexTok::Inbrace;
2320 if else_use_brace {
2321 self.lexer.zshlex();
2322 }
2323
2324 // else body stops at 'fi' or '}'
2325 else_ = Some(Box::new(if else_use_brace {
2326 let body = self.parse_program_until(Some(&[LexTok::Outbrace]));
2327 if self.lexer.tok == LexTok::Outbrace {
2328 self.lexer.zshlex();
2329 }
2330 body
2331 } else {
2332 self.parse_program_until(Some(&[LexTok::Fi]))
2333 }));
2334
2335 // Consume the 'fi' if present (not for brace syntax)
2336 if !else_use_brace && self.lexer.tok == LexTok::Fi {
2337 self.lexer.zshlex();
2338 }
2339 break;
2340 }
2341 LexTok::Fi => {
2342 self.lexer.zshlex();
2343 break;
2344 }
2345 _ => break,
2346 }
2347 }
2348 }
2349
2350 Some(ZshCommand::If(ZshIf {
2351 cond,
2352 then,
2353 elif,
2354 else_,
2355 }))
2356 }
2357
2358 /// Parse while/until loop
2359 /// Parse `while COND; do BODY; done` and `until COND; do BODY; done`.
2360 /// Direct port of zsh/Src/parse.c:1521-1563 `par_while`. The
2361 /// `until` variant is the same loop with the condition negated.
2362 fn parse_while(&mut self, until: bool) -> Option<ZshCommand> {
2363 self.lexer.zshlex(); // skip while/until
2364
2365 let cond = Box::new(self.parse_program());
2366
2367 self.skip_separators();
2368 let body = self.parse_loop_body(false)?;
2369
2370 Some(ZshCommand::While(ZshWhile {
2371 cond,
2372 body: Box::new(body),
2373 until,
2374 }))
2375 }
2376
2377 /// Parse repeat loop
2378 /// Parse `repeat N; do BODY; done`. Direct port of
2379 /// zsh/Src/parse.c:1565-1617 `par_repeat`. The C source supports
2380 /// the SHORTLOOPS short-form `repeat N CMD` (no do/done) — zshrs's
2381 /// parser doesn't yet special-case that variant.
2382 fn parse_repeat(&mut self) -> Option<ZshCommand> {
2383 self.lexer.zshlex(); // skip 'repeat'
2384
2385 let count = match self.lexer.tok {
2386 LexTok::String => {
2387 let c = self.lexer.tokstr.clone().unwrap_or_default();
2388 self.lexer.zshlex();
2389 c
2390 }
2391 _ => {
2392 self.error("expected count after repeat");
2393 return None;
2394 }
2395 };
2396
2397 self.skip_separators();
2398 let body = self.parse_loop_body(false)?;
2399
2400 Some(ZshCommand::Repeat(ZshRepeat {
2401 count,
2402 body: Box::new(body),
2403 }))
2404 }
2405
2406 /// Parse loop body (do...done, {...}, or shortloop)
2407 /// Parse the `do BODY done` body of a for/while/until/select/
2408 /// repeat loop. Direct equivalent of zsh's parse.c handling
2409 /// inside the loop builders — they all consume DOLOOP, parse a
2410 /// list until DONE, and return the list. The `foreach_style`
2411 /// flag signals foreach (where short-form `for NAME in WORDS;
2412 /// CMD` may skip do/done) vs c-style (which always requires
2413 /// do/done).
2414 fn parse_loop_body(&mut self, foreach_style: bool) -> Option<ZshProgram> {
2415 if self.lexer.tok == LexTok::Doloop {
2416 self.lexer.zshlex();
2417 let body = self.parse_program();
2418 if self.lexer.tok == LexTok::Done {
2419 self.lexer.zshlex();
2420 }
2421 Some(body)
2422 } else if self.lexer.tok == LexTok::Inbrace {
2423 self.lexer.zshlex();
2424 let body = self.parse_program();
2425 if self.lexer.tok == LexTok::Outbrace {
2426 self.lexer.zshlex();
2427 }
2428 Some(body)
2429 } else if foreach_style {
2430 // foreach allows 'end' terminator
2431 let body = self.parse_program();
2432 if self.lexer.tok == LexTok::Zend {
2433 self.lexer.zshlex();
2434 }
2435 Some(body)
2436 } else {
2437 // Short loop - single command
2438 self.parse_list()
2439 .map(|list| ZshProgram { lists: vec![list] })
2440 }
2441 }
2442
2443 /// Parse (...) subshell
2444 /// Parse a subshell `( ... )`. Direct port of zsh/Src/parse.c:1619-1670
2445 /// `par_subsh`. Body parses as a normal list; the subshell wrapper
2446 /// fork-isolates execution in the executor.
2447 fn parse_subsh(&mut self) -> Option<ZshCommand> {
2448 self.lexer.zshlex(); // skip (
2449 let prog = self.parse_program();
2450 if self.lexer.tok == LexTok::Outpar {
2451 self.lexer.zshlex();
2452 }
2453 Some(ZshCommand::Subsh(Box::new(prog)))
2454 }
2455
2456 /// `() { body } arg1 arg2 …` — anonymous function. Defines a fresh
2457 /// function named `_zshrs_anon_N`, invokes it with the args, and the
2458 /// body runs with positional params set. Implemented as the desugared
2459 /// pair (FuncDef + Simple call) so the compile path doesn't need new
2460 /// machinery.
2461 /// Parse an anonymous function definition `() { BODY }` followed
2462 /// by call args. zsh treats `() { echo hi; } a b c` as defining
2463 /// and immediately calling an anon fn with args a/b/c. C
2464 /// equivalent: the INOUTPAR shape in par_simple at parse.c:1836+
2465 /// triggers an anon-funcdef path.
2466 fn parse_anon_funcdef(&mut self) -> Option<ZshCommand> {
2467 self.lexer.zshlex(); // skip ()
2468 self.skip_separators();
2469 // No `{` after `()` → bare empty subshell shape `()`. Fall back
2470 // to a Subsh with an empty program so the status is 0 (matches
2471 // zsh's `()` no-op behavior).
2472 if self.lexer.tok != LexTok::Inbrace {
2473 return Some(ZshCommand::Subsh(Box::new(ZshProgram {
2474 lists: Vec::new(),
2475 })));
2476 }
2477 self.lexer.zshlex(); // skip {
2478 let body = self.parse_program();
2479 if self.lexer.tok == LexTok::Outbrace {
2480 self.lexer.zshlex();
2481 }
2482 // Collect any trailing args until a separator. zsh's anon-fn form
2483 // `() { body } a b c` runs body with $1=a, $2=b, $3=c.
2484 let mut args = Vec::new();
2485 while self.lexer.tok == LexTok::String {
2486 if let Some(s) = self.lexer.tokstr.clone() {
2487 args.push(s);
2488 }
2489 self.lexer.zshlex();
2490 }
2491
2492 // Generate a unique name. Module-level static would be cleaner but
2493 // a thread-local atomic is enough — anonymous functions are
2494 // ephemeral and the name isn't user-visible.
2495 use std::sync::atomic::{AtomicUsize, Ordering};
2496 static ANON_COUNTER: AtomicUsize = AtomicUsize::new(0);
2497 let n = ANON_COUNTER.fetch_add(1, Ordering::Relaxed);
2498 let name = format!("_zshrs_anon_{}", n);
2499 Some(ZshCommand::FuncDef(ZshFuncDef {
2500 names: vec![name],
2501 body: Box::new(body),
2502 tracing: false,
2503 auto_call_args: Some(args),
2504 body_source: None,
2505 }))
2506 }
2507
2508 /// Parse {...} cursh
2509 /// Parse a current-shell brace block `{ BODY }`. C source:
2510 /// par_cmd at parse.c:958-1085 handles INBRACE → emit WC_CURSH
2511 /// + recurse into list. zshrs's parse_cursh extracts that arm
2512 /// into a dedicated method.
2513 fn parse_cursh(&mut self) -> Option<ZshCommand> {
2514 self.lexer.zshlex(); // skip {
2515 let prog = self.parse_program();
2516
2517 // Check for { ... } always { ... }
2518 if self.lexer.tok == LexTok::Outbrace {
2519 self.lexer.zshlex();
2520
2521 // Check for 'always'
2522 if self.lexer.tok == LexTok::String {
2523 let s = self.lexer.tokstr.as_ref();
2524 if s.map(|s| s == "always").unwrap_or(false) {
2525 self.lexer.zshlex();
2526 self.skip_separators();
2527
2528 if self.lexer.tok == LexTok::Inbrace {
2529 self.lexer.zshlex();
2530 let always = self.parse_program();
2531 if self.lexer.tok == LexTok::Outbrace {
2532 self.lexer.zshlex();
2533 }
2534 return Some(ZshCommand::Try(ZshTry {
2535 try_block: Box::new(prog),
2536 always: Box::new(always),
2537 }));
2538 }
2539 }
2540 }
2541 }
2542
2543 Some(ZshCommand::Cursh(Box::new(prog)))
2544 }
2545
2546 /// Parse function definition
2547 /// Parse `function NAME { BODY }` or `NAME () { BODY }`. Direct
2548 /// port of zsh/Src/parse.c:1672-1785 `par_funcdef`. zsh handles
2549 /// the multiple keyword shapes (function FOO, FOO (), function FOO ()),
2550 /// the optional `[fname1 fname2 ...]` for multi-name function defs,
2551 /// and the `function FOO () { ... }` traditional/POSIX hybrid form.
2552 fn parse_funcdef(&mut self) -> Option<ZshCommand> {
2553 self.lexer.zshlex(); // skip 'function'
2554
2555 let mut names = Vec::new();
2556 let mut tracing = false;
2557
2558 // Handle options like -T and function names. Two subtleties:
2559 //
2560 // 1. Flags: zsh's lexer encodes a leading `-` as
2561 // `char_tokens::DASH` (\u{9b}) inside the String tokstr.
2562 // The previous `s.starts_with('-')` check failed for
2563 // `\u{9b}T`, so `function -T NAME { body }` slipped the
2564 // `-T` token into `names` and the function got registered
2565 // as `T` plus the intended `NAME`.
2566 //
2567 // 2. Body opener: zsh's lexer emits the opening `{` as a
2568 // String (not LexTok::Inbrace) when it follows the String
2569 // NAME — the preceding name token resets incmdpos to
2570 // false, and only `{` immediately followed by `}` (the
2571 // empty-body case) gets promoted to Inbrace. The funcdef
2572 // parser must recognise the bare-`{` String as the body
2573 // opener; otherwise `function NAME { body }` falls through
2574 // to `_ => break`, no body parses, and the FuncDef never
2575 // lands in the AST. This is consistent with C zsh's
2576 // par_funcdef which knows it's in funcdef-header context
2577 // and accepts the brace either way.
2578 loop {
2579 match self.lexer.tok {
2580 LexTok::String => {
2581 let s = self.lexer.tokstr.as_ref()?;
2582 if s == "{" {
2583 // Funcdef body opener — break, body-parser branch handles it.
2584 break;
2585 }
2586 let first = s.chars().next();
2587 if matches!(first, Some('-') | Some('+'))
2588 || matches!(first, Some(c) if c == crate::tokens::char_tokens::DASH)
2589 {
2590 if s.contains('T') {
2591 tracing = true;
2592 }
2593 self.lexer.zshlex();
2594 continue;
2595 }
2596 names.push(s.clone());
2597 self.lexer.zshlex();
2598 }
2599 LexTok::Inbrace | LexTok::Inoutpar | LexTok::Seper | LexTok::Newlin => break,
2600 _ => break,
2601 }
2602 }
2603
2604 // Optional ()
2605 let saw_paren = self.lexer.tok == LexTok::Inoutpar;
2606 if saw_paren {
2607 self.lexer.zshlex();
2608 }
2609
2610 self.skip_separators();
2611
2612 // Body opener: real Inbrace OR a String("{") (the lexer emits
2613 // the latter after a String NAME — see comment above).
2614 let body_opener_is_string_brace = self.lexer.tok == LexTok::String
2615 && self.lexer.tokstr.as_deref() == Some("{");
2616 if self.lexer.tok == LexTok::Inbrace || body_opener_is_string_brace {
2617 // Capture body_start BEFORE the lexer advances past the
2618 // first body token. After the previous zshlex consumed
2619 // `{`, lexer.pos points just past `{` (which is where the
2620 // body source starts). The next `zshlex()` would advance
2621 // past the first token (`echo`), making body_start land
2622 // mid-body and lose the first word — `typeset -f f` would
2623 // print `a; echo b` for `{ echo a; echo b }`.
2624 let body_start = self.lexer.pos;
2625 self.lexer.zshlex();
2626 let body = self.parse_program();
2627 let body_end = if self.lexer.tok == LexTok::Outbrace {
2628 // Lexer has just consumed `}`; pos is past it. Body content
2629 // ends one byte before pos.
2630 self.lexer.pos.saturating_sub(1)
2631 } else {
2632 self.lexer.pos
2633 };
2634 let body_source = self
2635 .lexer
2636 .input
2637 .get(body_start..body_end)
2638 .map(|s| s.trim().to_string())
2639 .filter(|s| !s.is_empty());
2640 if self.lexer.tok == LexTok::Outbrace {
2641 self.lexer.zshlex();
2642 }
2643
2644 // Anonymous form `function () { body } a b c` (with `()`) or
2645 // `function { body } a b c` (zsh-only shorthand, no `()`). No
2646 // name was collected. Mirror parse_anon_funcdef: synthesize
2647 // `_zshrs_anon_N`, collect trailing args, set auto_call_args
2648 // so compile_funcdef registers + immediately calls the
2649 // function with the args as positional params.
2650 if names.is_empty() {
2651 let mut args = Vec::new();
2652 while self.lexer.tok == LexTok::String {
2653 if let Some(s) = self.lexer.tokstr.clone() {
2654 args.push(s);
2655 }
2656 self.lexer.zshlex();
2657 }
2658 use std::sync::atomic::{AtomicUsize, Ordering};
2659 static ANON_COUNTER: AtomicUsize = AtomicUsize::new(0);
2660 let n = ANON_COUNTER.fetch_add(1, Ordering::Relaxed);
2661 let name = format!("_zshrs_anon_kw_{}", n);
2662 return Some(ZshCommand::FuncDef(ZshFuncDef {
2663 names: vec![name],
2664 body: Box::new(body),
2665 tracing,
2666 auto_call_args: Some(args),
2667 body_source,
2668 }));
2669 }
2670
2671 Some(ZshCommand::FuncDef(ZshFuncDef {
2672 names,
2673 body: Box::new(body),
2674 tracing,
2675 auto_call_args: None,
2676 body_source,
2677 }))
2678 } else {
2679 // Short form
2680 self.parse_list().map(|list| {
2681 ZshCommand::FuncDef(ZshFuncDef {
2682 names,
2683 body: Box::new(ZshProgram { lists: vec![list] }),
2684 tracing,
2685 auto_call_args: None,
2686 body_source: None,
2687 })
2688 })
2689 }
2690 }
2691
2692 /// Parse inline function definition: name() { ... }
2693 /// Parse the inline form `NAME () { BODY }` (POSIX-style funcdef
2694 /// without the `function` keyword). The name has already been
2695 /// consumed and pushed by parse_simple before this method fires.
2696 /// C source: handled inline in par_simple's INOUTPAR-after-name
2697 /// arm (parse.c:1836-2228).
2698 fn parse_inline_funcdef(&mut self, name: String) -> Option<ZshCommand> {
2699 // Skip ()
2700 if self.lexer.tok == LexTok::Inoutpar {
2701 self.lexer.zshlex();
2702 }
2703
2704 self.skip_separators();
2705
2706 // Parse body
2707 if self.lexer.tok == LexTok::Inbrace {
2708 // Same body_start-before-zshlex fix as parse_funcdef.
2709 let body_start = self.lexer.pos;
2710 self.lexer.zshlex();
2711 let body = self.parse_program();
2712 let body_end = if self.lexer.tok == LexTok::Outbrace {
2713 self.lexer.pos.saturating_sub(1)
2714 } else {
2715 self.lexer.pos
2716 };
2717 let body_source = self
2718 .lexer
2719 .input
2720 .get(body_start..body_end)
2721 .map(|s| s.trim().to_string())
2722 .filter(|s| !s.is_empty());
2723 if self.lexer.tok == LexTok::Outbrace {
2724 self.lexer.zshlex();
2725 }
2726 Some(ZshCommand::FuncDef(ZshFuncDef {
2727 names: vec![name],
2728 body: Box::new(body),
2729 tracing: false,
2730 auto_call_args: None,
2731 body_source,
2732 }))
2733 } else {
2734 match self.parse_cmd() {
2735 Some(cmd) => {
2736 let list = ZshList {
2737 sublist: ZshSublist {
2738 pipe: ZshPipe {
2739 cmd,
2740 next: None,
2741 lineno: self.lexer.lineno,
2742 merge_stderr: false,
2743 },
2744 next: None,
2745 flags: SublistFlags::default(),
2746 },
2747 flags: ListFlags::default(),
2748 };
2749 Some(ZshCommand::FuncDef(ZshFuncDef {
2750 names: vec![name],
2751 body: Box::new(ZshProgram { lists: vec![list] }),
2752 tracing: false,
2753 auto_call_args: None,
2754 body_source: None,
2755 }))
2756 }
2757 None => None,
2758 }
2759 }
2760 }
2761
2762 /// Parse [[ ... ]] conditional
2763 /// Parse `[[ EXPR ]]` conditional expression. Direct port of
2764 /// zsh/Src/parse.c:2409-2731 `par_cond` (and helpers par_cond_1,
2765 /// par_cond_2, par_cond_double, par_cond_triple, par_cond_multi
2766 /// at parse.c:2434-2731). Expression operators: `||` `&&` `!`
2767 /// + unary tests (-f, -d, -n, -z, etc.) + binary tests (=, !=,
2768 /// <, >, ==, =~, -eq, -ne, -lt, -le, -gt, -ge, -nt, -ot, -ef).
2769 fn parse_cond(&mut self) -> Option<ZshCommand> {
2770 self.lexer.zshlex(); // skip [[
2771 // Empty cond `[[ ]]` is a parse error in zsh — emit the
2772 // diagnostic and return None so the caller produces a
2773 // non-zero exit. Without this, `[[ ]]` silently passed and
2774 // returned exit 0.
2775 if self.lexer.tok == LexTok::Doutbrack {
2776 self.error("parse error near `]]'");
2777 self.lexer.zshlex();
2778 return None;
2779 }
2780 let cond = self.parse_cond_expr();
2781
2782 if self.lexer.tok == LexTok::Doutbrack {
2783 self.lexer.zshlex();
2784 }
2785
2786 cond.map(ZshCommand::Cond)
2787 }
2788
2789 /// Parse conditional expression
2790 /// Top of `[[ ]]` cond-expression parsing — entry to recursive
2791 /// descent (or → and → not → primary). Direct port of zsh's
2792 /// par_cond_1 at parse.c:2434-2475.
2793 fn parse_cond_expr(&mut self) -> Option<ZshCond> {
2794 self.parse_cond_or()
2795 }
2796
2797 /// Cond-expression `||` level. C: inside par_cond_1 at
2798 /// parse.c:2434-2475 (the `cond_or` ladder).
2799 fn parse_cond_or(&mut self) -> Option<ZshCond> {
2800 self.recursion_depth += 1;
2801 if self.check_recursion() {
2802 self.error("parse_cond_or: max recursion depth exceeded");
2803 self.recursion_depth -= 1;
2804 return None;
2805 }
2806
2807 let left = match self.parse_cond_and() {
2808 Some(l) => l,
2809 None => {
2810 self.recursion_depth -= 1;
2811 return None;
2812 }
2813 };
2814
2815 self.skip_cond_separators();
2816
2817 let result = if self.lexer.tok == LexTok::Dbar {
2818 self.lexer.zshlex();
2819 self.skip_cond_separators();
2820 self.parse_cond_or()
2821 .map(|right| ZshCond::Or(Box::new(left), Box::new(right)))
2822 } else {
2823 Some(left)
2824 };
2825
2826 self.recursion_depth -= 1;
2827 result
2828 }
2829
2830 /// Cond-expression `&&` level. C: par_cond_2 at parse.c:2476-2625.
2831 fn parse_cond_and(&mut self) -> Option<ZshCond> {
2832 self.recursion_depth += 1;
2833 if self.check_recursion() {
2834 self.error("parse_cond_and: max recursion depth exceeded");
2835 self.recursion_depth -= 1;
2836 return None;
2837 }
2838
2839 let left = match self.parse_cond_not() {
2840 Some(l) => l,
2841 None => {
2842 self.recursion_depth -= 1;
2843 return None;
2844 }
2845 };
2846
2847 self.skip_cond_separators();
2848
2849 let result = if self.lexer.tok == LexTok::Damper {
2850 self.lexer.zshlex();
2851 self.skip_cond_separators();
2852 self.parse_cond_and()
2853 .map(|right| ZshCond::And(Box::new(left), Box::new(right)))
2854 } else {
2855 Some(left)
2856 };
2857
2858 self.recursion_depth -= 1;
2859 result
2860 }
2861
2862 /// Cond-expression `!` negation level. C: handled inside
2863 /// par_cond_2 at parse.c:2476-2625 via the BANG token check.
2864 fn parse_cond_not(&mut self) -> Option<ZshCond> {
2865 self.recursion_depth += 1;
2866 if self.check_recursion() {
2867 self.error("parse_cond_not: max recursion depth exceeded");
2868 self.recursion_depth -= 1;
2869 return None;
2870 }
2871
2872 self.skip_cond_separators();
2873
2874 // ! can be either LexTok::Bang or String "!"
2875 let is_not = self.lexer.tok == LexTok::Bang
2876 || (self.lexer.tok == LexTok::String
2877 && self
2878 .lexer
2879 .tokstr
2880 .as_ref()
2881 .map(|s| s == "!")
2882 .unwrap_or(false));
2883 if is_not {
2884 self.lexer.zshlex();
2885 let inner = match self.parse_cond_not() {
2886 Some(i) => i,
2887 None => {
2888 self.recursion_depth -= 1;
2889 return None;
2890 }
2891 };
2892 self.recursion_depth -= 1;
2893 return Some(ZshCond::Not(Box::new(inner)));
2894 }
2895
2896 if self.lexer.tok == LexTok::Inpar {
2897 self.lexer.zshlex();
2898 self.skip_cond_separators();
2899 let inner = match self.parse_cond_expr() {
2900 Some(i) => i,
2901 None => {
2902 self.recursion_depth -= 1;
2903 return None;
2904 }
2905 };
2906 self.skip_cond_separators();
2907 if self.lexer.tok == LexTok::Outpar {
2908 self.lexer.zshlex();
2909 }
2910 self.recursion_depth -= 1;
2911 return Some(inner);
2912 }
2913
2914 let result = self.parse_cond_primary();
2915 self.recursion_depth -= 1;
2916 result
2917 }
2918
2919 /// Cond-expression primary: unary tests (-f, -d, ...), binary
2920 /// tests (=, !=, <, >, ==, =~, -eq, -ne, ...), and parenthesized
2921 /// sub-expressions. Direct port of par_cond_double / par_cond_triple
2922 /// / par_cond_multi at parse.c:2626-2731 (chosen by arg count).
2923 fn parse_cond_primary(&mut self) -> Option<ZshCond> {
2924 let s1 = match self.lexer.tok {
2925 LexTok::String => {
2926 let s = self.lexer.tokstr.clone().unwrap_or_default();
2927 self.lexer.zshlex();
2928 s
2929 }
2930 _ => return None,
2931 };
2932
2933 self.skip_cond_separators();
2934
2935 // Check for unary operator
2936 if s1.starts_with('-') && s1.len() == 2 {
2937 let s2 = match self.lexer.tok {
2938 LexTok::String => {
2939 let s = self.lexer.tokstr.clone().unwrap_or_default();
2940 self.lexer.zshlex();
2941 s
2942 }
2943 _ => return Some(ZshCond::Unary("-n".to_string(), s1)),
2944 };
2945 return Some(ZshCond::Unary(s1, s2));
2946 }
2947
2948 // Check for binary operator
2949 let op = match self.lexer.tok {
2950 LexTok::String => {
2951 let s = self.lexer.tokstr.clone().unwrap_or_default();
2952 self.lexer.zshlex();
2953 s
2954 }
2955 LexTok::Inang => {
2956 self.lexer.zshlex();
2957 "<".to_string()
2958 }
2959 LexTok::Outang => {
2960 self.lexer.zshlex();
2961 ">".to_string()
2962 }
2963 _ => return Some(ZshCond::Unary("-n".to_string(), s1)),
2964 };
2965
2966 self.skip_cond_separators();
2967
2968 let s2 = match self.lexer.tok {
2969 LexTok::String => {
2970 let s = self.lexer.tokstr.clone().unwrap_or_default();
2971 self.lexer.zshlex();
2972 s
2973 }
2974 _ => return Some(ZshCond::Binary(s1, op, String::new())),
2975 };
2976
2977 if op == "=~" {
2978 Some(ZshCond::Regex(s1, s2))
2979 } else {
2980 Some(ZshCond::Binary(s1, op, s2))
2981 }
2982 }
2983
2984 fn skip_cond_separators(&mut self) {
2985 while self.lexer.tok == LexTok::Seper && {
2986 let s = self.lexer.tokstr.as_ref();
2987 s.map(|s| !s.contains(';')).unwrap_or(true)
2988 } {
2989 self.lexer.zshlex();
2990 }
2991 }
2992
2993 /// Parse (( ... )) arithmetic command
2994 /// Parse `(( EXPR ))` arithmetic command. C source: parse.c:1810-1834
2995 /// `par_dinbrack` (despite the name; the function actually handles
2996 /// DINPAR `(( ))` blocks too).
2997 fn parse_arith(&mut self) -> Option<ZshCommand> {
2998 let expr = self.lexer.tokstr.clone().unwrap_or_default();
2999 self.lexer.zshlex();
3000 Some(ZshCommand::Arith(expr))
3001 }
3002
3003 /// Parse time command
3004 /// Parse `time CMD` (POSIX time keyword). Direct port of
3005 /// zsh/Src/parse.c:1787-1808 `par_time`. The `time` keyword
3006 /// times the execution of the following pipeline / cmd.
3007 fn parse_time(&mut self) -> Option<ZshCommand> {
3008 self.lexer.zshlex(); // skip 'time'
3009
3010 // Check if there's a pipeline to time
3011 if self.lexer.tok == LexTok::Seper
3012 || self.lexer.tok == LexTok::Newlin
3013 || self.lexer.tok == LexTok::Endinput
3014 {
3015 Some(ZshCommand::Time(None))
3016 } else {
3017 let sublist = self.parse_sublist();
3018 Some(ZshCommand::Time(sublist.map(Box::new)))
3019 }
3020 }
3021
3022 /// Check if next token is ()
3023 fn peek_inoutpar(&mut self) -> bool {
3024 self.lexer.tok == LexTok::Inoutpar
3025 }
3026
3027 /// Skip separator tokens
3028 fn skip_separators(&mut self) {
3029 let mut iterations = 0;
3030 while self.lexer.tok == LexTok::Seper || self.lexer.tok == LexTok::Newlin {
3031 iterations += 1;
3032 if iterations > 100_000 {
3033 self.error("skip_separators: too many iterations");
3034 return;
3035 }
3036 self.lexer.zshlex();
3037 }
3038 }
3039
3040 /// Record an error
3041 fn error(&mut self, msg: &str) {
3042 self.errors.push(ParseError {
3043 message: msg.to_string(),
3044 line: self.lexer.lineno,
3045 });
3046 }
3047}
3048
3049#[cfg(test)]
3050mod tests {
3051 use super::*;
3052
3053 fn parse(input: &str) -> Result<ZshProgram, Vec<ParseError>> {
3054 let mut parser = ZshParser::new(input);
3055 parser.parse()
3056 }
3057
3058 #[test]
3059 fn test_simple_command() {
3060 let prog = parse("echo hello world").unwrap();
3061 assert_eq!(prog.lists.len(), 1);
3062 match &prog.lists[0].sublist.pipe.cmd {
3063 ZshCommand::Simple(s) => {
3064 assert_eq!(s.words, vec!["echo", "hello", "world"]);
3065 }
3066 _ => panic!("expected simple command"),
3067 }
3068 }
3069
3070 #[test]
3071 fn test_pipeline() {
3072 let prog = parse("ls | grep foo | wc -l").unwrap();
3073 assert_eq!(prog.lists.len(), 1);
3074
3075 let pipe = &prog.lists[0].sublist.pipe;
3076 assert!(pipe.next.is_some());
3077
3078 let pipe2 = pipe.next.as_ref().unwrap();
3079 assert!(pipe2.next.is_some());
3080 }
3081
3082 #[test]
3083 fn test_and_or() {
3084 let prog = parse("cmd1 && cmd2 || cmd3").unwrap();
3085 let sublist = &prog.lists[0].sublist;
3086
3087 assert!(sublist.next.is_some());
3088 let (op, _) = sublist.next.as_ref().unwrap();
3089 assert_eq!(*op, SublistOp::And);
3090 }
3091
3092 #[test]
3093 fn test_if_then() {
3094 let prog = parse("if test -f foo; then echo yes; fi").unwrap();
3095 match &prog.lists[0].sublist.pipe.cmd {
3096 ZshCommand::If(_) => {}
3097 _ => panic!("expected if command"),
3098 }
3099 }
3100
3101 #[test]
3102 fn test_for_loop() {
3103 let prog = parse("for i in a b c; do echo $i; done").unwrap();
3104 match &prog.lists[0].sublist.pipe.cmd {
3105 ZshCommand::For(f) => {
3106 assert_eq!(f.var, "i");
3107 match &f.list {
3108 ForList::Words(w) => assert_eq!(w, &vec!["a", "b", "c"]),
3109 _ => panic!("expected word list"),
3110 }
3111 }
3112 _ => panic!("expected for command"),
3113 }
3114 }
3115
3116 #[test]
3117 fn test_case() {
3118 let prog = parse("case $x in a) echo a;; b) echo b;; esac").unwrap();
3119 match &prog.lists[0].sublist.pipe.cmd {
3120 ZshCommand::Case(c) => {
3121 assert_eq!(c.arms.len(), 2);
3122 }
3123 _ => panic!("expected case command"),
3124 }
3125 }
3126
3127 #[test]
3128 fn test_function() {
3129 // First test just parsing "function foo" to see what happens
3130 let prog = parse("function foo { }").unwrap();
3131 match &prog.lists[0].sublist.pipe.cmd {
3132 ZshCommand::FuncDef(f) => {
3133 assert_eq!(f.names, vec!["foo"]);
3134 }
3135 _ => panic!(
3136 "expected function, got {:?}",
3137 prog.lists[0].sublist.pipe.cmd
3138 ),
3139 }
3140 }
3141
3142 #[test]
3143 fn test_redirection() {
3144 let prog = parse("echo hello > file.txt").unwrap();
3145 match &prog.lists[0].sublist.pipe.cmd {
3146 ZshCommand::Simple(s) => {
3147 assert_eq!(s.redirs.len(), 1);
3148 assert_eq!(s.redirs[0].rtype, RedirType::Write);
3149 }
3150 _ => panic!("expected simple command"),
3151 }
3152 }
3153
3154 #[test]
3155 fn test_assignment() {
3156 let prog = parse("FOO=bar echo $FOO").unwrap();
3157 match &prog.lists[0].sublist.pipe.cmd {
3158 ZshCommand::Simple(s) => {
3159 assert_eq!(s.assigns.len(), 1);
3160 assert_eq!(s.assigns[0].name, "FOO");
3161 }
3162 _ => panic!("expected simple command"),
3163 }
3164 }
3165
3166 #[test]
3167 fn test_parse_completion_function() {
3168 let input = r#"_2to3_fixes() {
3169 local -a fixes
3170 fixes=( ${${(M)${(f)"$(2to3 --list-fixes 2>/dev/null)"}:#*}//[[:space:]]/} )
3171 (( ${#fixes} )) && _describe -t fixes 'fix' fixes
3172}"#;
3173 let result = parse(input);
3174 assert!(
3175 result.is_ok(),
3176 "Failed to parse completion function: {:?}",
3177 result.err()
3178 );
3179 let prog = result.unwrap();
3180 assert!(
3181 !prog.lists.is_empty(),
3182 "Expected at least one list in program"
3183 );
3184 }
3185
3186 #[test]
3187 fn test_parse_array_with_complex_elements() {
3188 let input = r#"arguments=(
3189 '(- * :)'{-h,--help}'[show this help message and exit]'
3190 {-d,--doctests_only}'[fix up doctests only]'
3191 '*:filename:_files'
3192)"#;
3193 let result = parse(input);
3194 assert!(
3195 result.is_ok(),
3196 "Failed to parse array assignment: {:?}",
3197 result.err()
3198 );
3199 }
3200
3201 #[test]
3202 fn test_parse_full_completion_file() {
3203 let input = r##"#compdef 2to3
3204
3205# zsh completions for '2to3'
3206
3207_2to3_fixes() {
3208 local -a fixes
3209 fixes=( ${${(M)${(f)"$(2to3 --list-fixes 2>/dev/null)"}:#*}//[[:space:]]/} )
3210 (( ${#fixes} )) && _describe -t fixes 'fix' fixes
3211}
3212
3213local -a arguments
3214
3215arguments=(
3216 '(- * :)'{-h,--help}'[show this help message and exit]'
3217 {-d,--doctests_only}'[fix up doctests only]'
3218 {-f,--fix}'[each FIX specifies a transformation; default: all]:fix name:_2to3_fixes'
3219 {-j,--processes}'[run 2to3 concurrently]:number: '
3220 {-x,--nofix}'[prevent a transformation from being run]:fix name:_2to3_fixes'
3221 {-l,--list-fixes}'[list available transformations]'
3222 {-p,--print-function}'[modify the grammar so that print() is a function]'
3223 {-v,--verbose}'[more verbose logging]'
3224 '--no-diffs[do not show diffs of the refactoring]'
3225 {-w,--write}'[write back modified files]'
3226 {-n,--nobackups}'[do not write backups for modified files]'
3227 {-o,--output-dir}'[put output files in this directory instead of overwriting]:directory:_directories'
3228 {-W,--write-unchanged-files}'[also write files even if no changes were required]'
3229 '--add-suffix[append this string to all output filenames]:suffix: '
3230 '*:filename:_files'
3231)
3232
3233_arguments -s -S $arguments
3234"##;
3235 let result = parse(input);
3236 assert!(
3237 result.is_ok(),
3238 "Failed to parse full completion file: {:?}",
3239 result.err()
3240 );
3241 let prog = result.unwrap();
3242 // Should have parsed successfully with at least one statement
3243 assert!(!prog.lists.is_empty(), "Expected at least one list");
3244 }
3245
3246 #[test]
3247 fn test_parse_logs_sh() {
3248 let input = r#"#!/usr/bin/env bash
3249shopt -s globstar
3250
3251if [[ $(uname) == Darwin ]]; then
3252 tail -f /var/log/**/*.log /var/log/**/*.out | lolcat
3253else
3254 if [[ $ZPWR_DISTRO_NAME == raspbian ]]; then
3255 tail -f /var/log/**/*.log | lolcat
3256 else
3257 printf "Unsupported...\n" >&2
3258 fi
3259fi
3260"#;
3261 let result = parse(input);
3262 assert!(
3263 result.is_ok(),
3264 "Failed to parse logs.sh: {:?}",
3265 result.err()
3266 );
3267 }
3268
3269 #[test]
3270 fn test_parse_case_with_glob() {
3271 let input = r#"case "$ZPWR_OS_TYPE" in
3272 darwin*) open_cmd='open'
3273 ;;
3274 cygwin*) open_cmd='cygstart'
3275 ;;
3276 linux*)
3277 open_cmd='xdg-open'
3278 ;;
3279esac"#;
3280 let result = parse(input);
3281 assert!(
3282 result.is_ok(),
3283 "Failed to parse case with glob: {:?}",
3284 result.err()
3285 );
3286 }
3287
3288 #[test]
3289 fn test_parse_case_with_nested_if() {
3290 // Test case with nested if and glob patterns
3291 let input = r##"function zpwrGetOpenCommand(){
3292 local open_cmd
3293 case "$ZPWR_OS_TYPE" in
3294 darwin*) open_cmd='open' ;;
3295 cygwin*) open_cmd='cygstart' ;;
3296 linux*)
3297 if [[ "$_zpwr_uname_r" != *icrosoft* ]];then
3298 open_cmd='nohup xdg-open'
3299 fi
3300 ;;
3301 esac
3302}"##;
3303 let result = parse(input);
3304 assert!(result.is_ok(), "Failed to parse: {:?}", result.err());
3305 }
3306
3307 #[test]
3308 fn test_parse_zpwr_scripts() {
3309 use std::fs;
3310 use std::path::Path;
3311 use std::sync::mpsc;
3312 use std::thread;
3313 use std::time::{Duration, Instant};
3314
3315 let scripts_dir = Path::new("/Users/wizard/.zpwr/scripts");
3316 if !scripts_dir.exists() {
3317 eprintln!("Skipping test: scripts directory not found");
3318 return;
3319 }
3320
3321 let mut total = 0;
3322 let mut passed = 0;
3323 let mut failed_files = Vec::new();
3324 let mut timeout_files = Vec::new();
3325
3326 for ext in &["sh", "zsh"] {
3327 let pattern = scripts_dir.join(format!("*.{}", ext));
3328 if let Ok(entries) = glob::glob(pattern.to_str().unwrap()) {
3329 for entry in entries.flatten() {
3330 total += 1;
3331 let file_path = entry.display().to_string();
3332 let content = match fs::read_to_string(&entry) {
3333 Ok(c) => c,
3334 Err(e) => {
3335 failed_files.push((file_path, format!("read error: {}", e)));
3336 continue;
3337 }
3338 };
3339
3340 // Parse with timeout
3341 let content_clone = content.clone();
3342 let (tx, rx) = mpsc::channel();
3343 let handle = thread::spawn(move || {
3344 let result = parse(&content_clone);
3345 let _ = tx.send(result);
3346 });
3347
3348 match rx.recv_timeout(Duration::from_secs(2)) {
3349 Ok(Ok(_)) => passed += 1,
3350 Ok(Err(errors)) => {
3351 let first_err = errors
3352 .first()
3353 .map(|e| format!("line {}: {}", e.line, e.message))
3354 .unwrap_or_default();
3355 failed_files.push((file_path, first_err));
3356 }
3357 Err(_) => {
3358 timeout_files.push(file_path);
3359 // Thread will be abandoned
3360 }
3361 }
3362 }
3363 }
3364 }
3365
3366 eprintln!("\n=== ZPWR Scripts Parse Results ===");
3367 eprintln!("Passed: {}/{}", passed, total);
3368
3369 if !timeout_files.is_empty() {
3370 eprintln!("\nTimeout files (>2s):");
3371 for file in &timeout_files {
3372 eprintln!(" {}", file);
3373 }
3374 }
3375
3376 if !failed_files.is_empty() {
3377 eprintln!("\nFailed files:");
3378 for (file, err) in &failed_files {
3379 eprintln!(" {} - {}", file, err);
3380 }
3381 }
3382
3383 // Allow some failures initially, but track progress
3384 let pass_rate = if total > 0 {
3385 (passed as f64 / total as f64) * 100.0
3386 } else {
3387 0.0
3388 };
3389 eprintln!("Pass rate: {:.1}%", pass_rate);
3390
3391 // Require at least 50% pass rate for now
3392 assert!(pass_rate >= 50.0, "Pass rate too low: {:.1}%", pass_rate);
3393 }
3394
3395 #[test]
3396 #[ignore] // Uses threads that can't be killed on timeout; use integration test instead
3397 fn test_parse_zsh_stdlib_functions() {
3398 use std::fs;
3399 use std::path::Path;
3400 use std::sync::mpsc;
3401 use std::thread;
3402 use std::time::Duration;
3403
3404 let functions_dir = Path::new(env!("CARGO_MANIFEST_DIR")).join("test_data/zsh_functions");
3405 if !functions_dir.exists() {
3406 eprintln!(
3407 "Skipping test: zsh_functions directory not found at {:?}",
3408 functions_dir
3409 );
3410 return;
3411 }
3412
3413 let mut total = 0;
3414 let mut passed = 0;
3415 let mut failed_files = Vec::new();
3416 let mut timeout_files = Vec::new();
3417
3418 if let Ok(entries) = fs::read_dir(&functions_dir) {
3419 for entry in entries.flatten() {
3420 let path = entry.path();
3421 if !path.is_file() {
3422 continue;
3423 }
3424
3425 total += 1;
3426 let file_path = path.display().to_string();
3427 let content = match fs::read_to_string(&path) {
3428 Ok(c) => c,
3429 Err(e) => {
3430 failed_files.push((file_path, format!("read error: {}", e)));
3431 continue;
3432 }
3433 };
3434
3435 // Parse with timeout
3436 let content_clone = content.clone();
3437 let (tx, rx) = mpsc::channel();
3438 thread::spawn(move || {
3439 let result = parse(&content_clone);
3440 let _ = tx.send(result);
3441 });
3442
3443 match rx.recv_timeout(Duration::from_secs(2)) {
3444 Ok(Ok(_)) => passed += 1,
3445 Ok(Err(errors)) => {
3446 let first_err = errors
3447 .first()
3448 .map(|e| format!("line {}: {}", e.line, e.message))
3449 .unwrap_or_default();
3450 failed_files.push((file_path, first_err));
3451 }
3452 Err(_) => {
3453 timeout_files.push(file_path);
3454 }
3455 }
3456 }
3457 }
3458
3459 eprintln!("\n=== Zsh Stdlib Functions Parse Results ===");
3460 eprintln!("Passed: {}/{}", passed, total);
3461
3462 if !timeout_files.is_empty() {
3463 eprintln!("\nTimeout files (>2s): {}", timeout_files.len());
3464 for file in timeout_files.iter().take(10) {
3465 eprintln!(" {}", file);
3466 }
3467 if timeout_files.len() > 10 {
3468 eprintln!(" ... and {} more", timeout_files.len() - 10);
3469 }
3470 }
3471
3472 if !failed_files.is_empty() {
3473 eprintln!("\nFailed files: {}", failed_files.len());
3474 for (file, err) in failed_files.iter().take(20) {
3475 let filename = Path::new(file)
3476 .file_name()
3477 .unwrap_or_default()
3478 .to_string_lossy();
3479 eprintln!(" {} - {}", filename, err);
3480 }
3481 if failed_files.len() > 20 {
3482 eprintln!(" ... and {} more", failed_files.len() - 20);
3483 }
3484 }
3485
3486 let pass_rate = if total > 0 {
3487 (passed as f64 / total as f64) * 100.0
3488 } else {
3489 0.0
3490 };
3491 eprintln!("Pass rate: {:.1}%", pass_rate);
3492
3493 // Require at least 50% pass rate
3494 assert!(pass_rate >= 50.0, "Pass rate too low: {:.1}%", pass_rate);
3495 }
3496}