Skip to main content

zsh/extensions/
zsh_ast.rs

1//! Zsh AST types — Rust-only, NOT in zsh C.
2//!
3//! zsh C does NOT have an AST tree. Its parser emits a flat wordcode
4//! stream (`Wordcode ecbuf[]`) directly via `par_event` → `par_list` →
5//! `par_sublist` → `par_pline` → `par_cmd` → `par_simple` / `par_redir`
6//! (Src/parse.c:485-3000). The wordcode is consumed by `execlist` /
7//! `execpline` / `execcmd` in `Src/exec.c` via `WC_KIND`/`wc_code`/
8//! `wc_data` macros walking `ecbuf`.
9//!
10//! zshrs built an AST tree as an intermediate step on the way to
11//! wordcode. This file holds those Rust-only AST node types.
12//! Originally lived in `src/ported/parse.rs` but relocated here for
13//! P9e of the PORT_PLAN.md migration to make their non-C-faithful
14//! nature explicit.
15//!
16//! Phase 9c (par_* wordcode emission) + Phase 9d (exec.rs wordcode
17//! consumer rewrite) will eventually retire these types entirely —
18//! the parser will emit wordcode directly and the executor will read
19//! wordcode directly, matching the C pipeline. Until then, the AST
20//! tree is the working IR.
21
22use serde::{Deserialize, Serialize};
23pub use crate::extensions::heredoc_ast::HereDocInfo;
24
25/// AST node for a complete program (list of commands)
26#[derive(Debug, Clone, Serialize, Deserialize)]
27pub struct ZshProgram {
28    pub lists: Vec<ZshList>,
29}
30
31/// A list is a sequence of sublists separated by ; or & or newline
32#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct ZshList {
34    pub sublist: ZshSublist,
35    pub flags: ListFlags,
36}
37
38#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
39pub struct ListFlags {
40    /// Run asynchronously (&)
41    pub async_: bool,
42    /// Disown after running (&| or &!)
43    pub disown: bool,
44}
45
46/// A sublist is pipelines connected by && or ||
47#[derive(Debug, Clone, Serialize, Deserialize)]
48pub struct ZshSublist {
49    pub pipe: ZshPipe,
50    pub next: Option<(SublistOp, Box<ZshSublist>)>,
51    pub flags: SublistFlags,
52}
53
54#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
55pub enum SublistOp {
56    And, // &&
57    Or,  // ||
58}
59
60#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
61pub struct SublistFlags {
62    /// Coproc
63    pub coproc: bool,
64    /// Negated with !
65    pub not: bool,
66}
67
68/// A pipeline is commands connected by |
69#[derive(Debug, Clone, Serialize, Deserialize)]
70pub struct ZshPipe {
71    pub cmd: ZshCommand,
72    pub next: Option<Box<ZshPipe>>,
73    pub lineno: u64,
74    /// `|&` between this stage and the next — merge stderr into the
75    /// pipe so the next stage's stdin sees both stdout AND stderr from
76    /// this stage. When `next` is None this flag is meaningless.
77    #[serde(default)]
78    pub merge_stderr: bool,
79}
80
81/// A command
82#[derive(Debug, Clone, Serialize, Deserialize)]
83pub enum ZshCommand {
84    Simple(ZshSimple),
85    Subsh(Box<ZshProgram>), // (list)
86    Cursh(Box<ZshProgram>), // {list}
87    For(ZshFor),
88    Case(ZshCase),
89    If(ZshIf),
90    While(ZshWhile),
91    Until(ZshWhile),
92    Repeat(ZshRepeat),
93    FuncDef(ZshFuncDef),
94    Time(Option<Box<ZshSublist>>),
95    Cond(ZshCond), // [[ ... ]]
96    Arith(String), // (( ... ))
97    Try(ZshTry),   // { ... } always { ... }
98    /// Compound command with trailing redirects:
99    /// `{ cmd } 2>&1`, `(...) >file`, `if ...; fi >file`, etc.
100    /// Simple commands carry redirects in their own struct; this wrapper
101    /// is only used for compound forms.
102    Redirected(Box<ZshCommand>, Vec<ZshRedir>),
103}
104
105/// A simple command (assignments, words, redirections)
106#[derive(Debug, Clone, Serialize, Deserialize)]
107pub struct ZshSimple {
108    pub assigns: Vec<ZshAssign>,
109    pub words: Vec<String>,
110    pub redirs: Vec<ZshRedir>,
111}
112
113/// An assignment
114#[derive(Debug, Clone, Serialize, Deserialize)]
115pub struct ZshAssign {
116    pub name: String,
117    pub value: ZshAssignValue,
118    pub append: bool, // +=
119}
120
121#[derive(Debug, Clone, Serialize, Deserialize)]
122pub enum ZshAssignValue {
123    Scalar(String),
124    Array(Vec<String>),
125}
126
127/// A redirection
128#[derive(Debug, Clone, Serialize, Deserialize)]
129pub struct ZshRedir {
130    pub rtype: i32,
131    pub fd: i32,
132    pub name: String,
133    pub heredoc: Option<HereDocInfo>,
134    pub varid: Option<String>, // {var}>file
135    /// Index into the lexer-side `HEREDOCS` thread_local for body lookup. Filled in by
136    /// `parse_redirection` for Heredoc/HeredocDash, then resolved into
137    /// `heredoc.content` by `fill_heredoc_bodies` after process_heredocs
138    /// has run for the line.
139    #[serde(skip)]
140    pub heredoc_idx: Option<usize>,
141}
142
143/// For loop
144#[derive(Debug, Clone, Serialize, Deserialize)]
145pub struct ZshFor {
146    pub var: String,
147    pub list: ForList,
148    pub body: Box<ZshProgram>,
149    /// True if this was parsed as `select` rather than `for`. Both share
150    /// the same parser, so the compiler routes on this flag.
151    #[serde(default)]
152    pub is_select: bool,
153}
154
155#[derive(Debug, Clone, Serialize, Deserialize)]
156pub enum ForList {
157    Words(Vec<String>),
158    CStyle {
159        init: String,
160        cond: String,
161        step: String,
162    },
163    Positional,
164}
165
166/// Case statement
167#[derive(Debug, Clone, Serialize, Deserialize)]
168pub struct ZshCase {
169    pub word: String,
170    pub arms: Vec<CaseArm>,
171}
172
173#[derive(Debug, Clone, Serialize, Deserialize)]
174pub struct CaseArm {
175    pub patterns: Vec<String>,
176    pub body: ZshProgram,
177    pub terminator: CaseTerm,
178}
179
180#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
181pub enum CaseTerm {
182    Break,    // ;;
183    Continue, // ;&
184    TestNext, // ;|
185}
186
187/// If statement
188#[derive(Debug, Clone, Serialize, Deserialize)]
189pub struct ZshIf {
190    pub cond: Box<ZshProgram>,
191    pub then: Box<ZshProgram>,
192    pub elif: Vec<(ZshProgram, ZshProgram)>,
193    pub else_: Option<Box<ZshProgram>>,
194}
195
196/// While/Until loop
197#[derive(Debug, Clone, Serialize, Deserialize)]
198pub struct ZshWhile {
199    pub cond: Box<ZshProgram>,
200    pub body: Box<ZshProgram>,
201    pub until: bool,
202}
203
204/// Repeat loop
205#[derive(Debug, Clone, Serialize, Deserialize)]
206pub struct ZshRepeat {
207    pub count: String,
208    pub body: Box<ZshProgram>,
209}
210
211/// Function definition
212#[derive(Debug, Clone, Serialize, Deserialize)]
213pub struct ZshFuncDef {
214    pub names: Vec<String>,
215    pub body: Box<ZshProgram>,
216    pub tracing: bool,
217    /// Anonymous-function call args. `() { body } a b` parses as a
218    /// FuncDef (auto-named) with `auto_call_args = Some(vec!["a", "b"])`.
219    /// compile_funcdef registers the function then emits a Simple call
220    /// with these args.
221    #[serde(default)]
222    pub auto_call_args: Option<Vec<String>>,
223    /// Original source text of the function body (the bytes between
224    /// `{` and `}`, without the braces themselves), captured at parse
225    /// time. Populated for `function name { body }` and `function name() { body }`
226    /// forms; left None for the synthesized inline-funcdef recovery
227    /// path. ZshCompiler::compile_funcdef forwards it to
228    /// `BUILTIN_REGISTER_COMPILED_FN` so introspection (`whence`, `which`,
229    /// `${functions[name]}`) has canonical source text.
230    #[serde(default)]
231    pub body_source: Option<String>,
232}
233
234/// Conditional expression [[ ... ]]
235#[derive(Debug, Clone, Serialize, Deserialize)]
236pub enum ZshCond {
237    Not(Box<ZshCond>),
238    And(Box<ZshCond>, Box<ZshCond>),
239    Or(Box<ZshCond>, Box<ZshCond>),
240    Unary(String, String),          // -f file, -n str, etc.
241    Binary(String, String, String), // str = pat, a -eq b, etc.
242    Regex(String, String),          // str =~ regex
243}
244
245/// Try/always block
246#[derive(Debug, Clone, Serialize, Deserialize)]
247pub struct ZshTry {
248    pub try_block: Box<ZshProgram>,
249    pub always: Box<ZshProgram>,
250}
251
252/// Zsh parameter expansion flags
253#[derive(Debug, Clone, Serialize, Deserialize)]
254pub enum ZshParamFlag {
255    Lower,                 // L - lowercase
256    Upper,                 // U - uppercase
257    Capitalize,            // C - capitalize words
258    Join(String),          // j:sep: - join array with separator
259    JoinNewline,           // F - join with newlines
260    Split(String),         // s:sep: - split string into array
261    SplitLines,            // f - split on newlines
262    SplitWords,            // z - split into words (shell parsing)
263    Type,                  // t - type of variable
264    Words,                 // w - word splitting
265    Quote,                 // qq - single-quote always
266    QuoteIfNeeded,         // q+ - single-quote only if needed
267    DoubleQuote,           // qqq - double-quote
268    DollarQuote,           // qqqq - $'...' style
269    QuoteBackslash,        // q / b / B - backslash-escape special chars
270    Unique,                // u - unique elements only
271    Reverse,               // O - reverse sort
272    Sort,                  // o - sort
273    NumericSort,           // n - numeric sort
274    IndexSort,             // a - sort in array index order
275    Keys,                  // k - associative array keys
276    Values,                // v - associative array values
277    Length,                // # - length (character codes)
278    CountChars,            // c - count total characters
279    Expand,                // e - perform shell expansions
280    PromptExpand,          // % - expand prompt escapes
281    PromptExpandFull,      // %% - full prompt expansion
282    Visible,               // V - make non-printable chars visible
283    Directory,             // D - substitute directory names
284    Head(usize),           // [1,n] - first n elements
285    Tail(usize),           // [-n,-1] - last n elements
286    PadLeft(usize, char),  // l:len:fill: - pad left
287    PadRight(usize, char), // r:len:fill: - pad right
288    Width(usize),          // m - use width for padding
289    Match,                 // M - include matched portion
290    Remove,                // R - include non-matched portion (complement of M)
291    Subscript,             // S - subscript scanning
292    Parameter,             // P - use value as parameter name (indirection)
293    Glob,                  // ~ - glob patterns in pattern
294    /// `@` flag — force array-context behavior even inside DQ. zsh's
295    /// `"${(@o)arr}"` keeps the sort active and splices each element as
296    /// its own word. Without this, the array-only flags became no-ops
297    /// in DQ.
298    At,
299}
300
301/// List operator (for shell command lists)
302#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
303pub enum ListOp {
304    And,     // &&
305    Or,      // ||
306    Semi,    // ;
307    Amp,     // &
308    Newline, // \n
309}
310
311/// Shell word - can be simple literal or complex expansion
312#[derive(Debug, Clone, Serialize, Deserialize)]
313pub enum ShellWord {
314    /// Plain text token. Most ZWC-decoded words land here. Goes through
315    /// `expand_string` (plus glob/tilde/etc. as text-level transforms) for
316    /// final output.
317    Literal(String),
318    /// Concatenation of sub-words. ZWC array decoding produces this with
319    /// child Literals; nothing else constructs it now that the legacy
320    /// hand-rolled parser is gone.
321    Concat(Vec<ShellWord>),
322}
323
324/// Variable modifier for parameter expansion
325#[derive(Debug, Clone, Serialize, Deserialize)]
326pub enum VarModifier {
327    Default(ShellWord),
328    DefaultAssign(ShellWord),
329    Error(ShellWord),
330    Alternate(ShellWord),
331    Length,
332    Substring(i64, Option<i64>),
333    RemovePrefix(ShellWord),
334    RemovePrefixLong(ShellWord),
335    RemoveSuffix(ShellWord),
336    RemoveSuffixLong(ShellWord),
337    Replace(ShellWord, ShellWord),
338    ReplaceAll(ShellWord, ShellWord),
339    /// `${var/#pat/repl}` — anchored at start (prefix only).
340    /// Per Src/subst.c paramsubst's `/`-arm with SUB_START.
341    ReplacePrefix(ShellWord, ShellWord),
342    /// `${var/%pat/repl}` — anchored at end (suffix only).
343    /// Per Src/subst.c paramsubst's `/`-arm with SUB_END.
344    ReplaceSuffix(ShellWord, ShellWord),
345    Upper,
346    Lower,
347}
348
349/// Shell command - the old shell_ast compatible type
350#[derive(Debug, Clone, Serialize, Deserialize)]
351pub enum ShellCommand {
352    Simple(SimpleCommand),
353    Pipeline(Vec<ShellCommand>, bool),
354    List(Vec<(ShellCommand, ListOp)>),
355    Compound(CompoundCommand),
356    FunctionDef(String, Box<ShellCommand>),
357}
358
359/// Simple command with assignments, words, and redirects
360#[derive(Debug, Clone, Serialize, Deserialize)]
361pub struct SimpleCommand {
362    pub assignments: Vec<(String, ShellWord, bool)>,
363    pub words: Vec<ShellWord>,
364    pub redirects: Vec<Redirect>,
365}
366
367/// Redirect
368#[derive(Debug, Clone, Serialize, Deserialize)]
369pub struct Redirect {
370    pub fd: Option<i32>,
371    pub op: RedirectOp,
372    pub target: ShellWord,
373    pub heredoc_content: Option<String>,
374    pub fd_var: Option<String>,
375}
376
377/// Redirect operator
378#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
379pub enum RedirectOp {
380    Write,
381    Append,
382    Read,
383    ReadWrite,
384    Clobber,
385    DupRead,
386    DupWrite,
387    HereDoc,
388    HereString,
389    WriteBoth,
390    AppendBoth,
391}
392
393/// Compound command
394#[derive(Debug, Clone, Serialize, Deserialize)]
395pub enum CompoundCommand {
396    BraceGroup(Vec<ShellCommand>),
397    Subshell(Vec<ShellCommand>),
398    If {
399        conditions: Vec<(Vec<ShellCommand>, Vec<ShellCommand>)>,
400        else_part: Option<Vec<ShellCommand>>,
401    },
402    For {
403        var: String,
404        words: Option<Vec<ShellWord>>,
405        body: Vec<ShellCommand>,
406    },
407    ForArith {
408        init: String,
409        cond: String,
410        step: String,
411        body: Vec<ShellCommand>,
412    },
413    While {
414        condition: Vec<ShellCommand>,
415        body: Vec<ShellCommand>,
416    },
417    Until {
418        condition: Vec<ShellCommand>,
419        body: Vec<ShellCommand>,
420    },
421    Case {
422        word: ShellWord,
423        cases: Vec<(Vec<ShellWord>, Vec<ShellCommand>, CaseTerminator)>,
424    },
425    Select {
426        var: String,
427        words: Option<Vec<ShellWord>>,
428        body: Vec<ShellCommand>,
429    },
430    Coproc {
431        name: Option<String>,
432        body: Box<ShellCommand>,
433    },
434    /// repeat N do ... done
435    Repeat {
436        count: String,
437        body: Vec<ShellCommand>,
438    },
439    /// { try-block } always { always-block }
440    Try {
441        try_body: Vec<ShellCommand>,
442        always_body: Vec<ShellCommand>,
443    },
444    Arith(String),
445    WithRedirects(Box<ShellCommand>, Vec<Redirect>),
446}
447
448/// Case terminator
449#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
450pub enum CaseTerminator {
451    Break,
452    Fallthrough,
453    Continue,
454}