Skip to main content

reef/
ast.rs

1//! Abstract syntax tree for bash commands.
2//!
3//! All AST nodes borrow from the input string (`&'a str`) — zero-copy.
4//! The parser produces a `Vec<Cmd<'a>>` representing the top-level command list.
5
6use std::borrow::Cow;
7
8// ---------------------------------------------------------------------------
9// Commands
10// ---------------------------------------------------------------------------
11
12/// A complete command — foreground or background.
13#[derive(Debug, Clone, PartialEq, Eq)]
14#[non_exhaustive]
15pub enum Cmd<'a> {
16    /// A foreground command list.
17    List(AndOrList<'a>),
18    /// A background job (`cmd &`).
19    Job(AndOrList<'a>),
20}
21
22/// A chain of commands connected by `&&` and `||`.
23#[derive(Debug, Clone, PartialEq, Eq)]
24pub struct AndOrList<'a> {
25    /// The first pipeline in the chain.
26    pub first: Pipeline<'a>,
27    /// Subsequent `&&` / `||` pipelines.
28    pub rest: Vec<AndOr<'a>>,
29}
30
31/// A single `&&` or `||` link in an and-or chain.
32#[derive(Debug, Clone, PartialEq, Eq)]
33#[non_exhaustive]
34pub enum AndOr<'a> {
35    /// `&&` — run if the previous succeeded.
36    And(Pipeline<'a>),
37    /// `||` — run if the previous failed.
38    Or(Pipeline<'a>),
39}
40
41/// A pipeline: one or more commands connected by `|`.
42#[derive(Debug, Clone, PartialEq, Eq)]
43#[non_exhaustive]
44pub enum Pipeline<'a> {
45    /// A single command (no pipe).
46    Single(Executable<'a>),
47    /// `[!] cmd1 | cmd2 | ...` — bool is true if negated.
48    Pipe(bool, Vec<Executable<'a>>),
49}
50
51/// An executable unit: simple command, compound command, or function definition.
52#[derive(Debug, Clone, PartialEq, Eq)]
53#[non_exhaustive]
54pub enum Executable<'a> {
55    /// A simple command (possibly with assignments and redirections).
56    Simple(SimpleCmd<'a>),
57    /// A compound command (`if`, `for`, `while`, etc.).
58    Compound(CompoundCmd<'a>),
59    /// A function definition: `name() { body; }`.
60    FuncDef(&'a str, CompoundCmd<'a>),
61}
62
63// ---------------------------------------------------------------------------
64// Simple command
65// ---------------------------------------------------------------------------
66
67/// A simple command with optional prefix assignments and suffix words/redirects.
68#[derive(Debug, Clone, PartialEq, Eq)]
69pub struct SimpleCmd<'a> {
70    /// Assignments and redirections before the command name.
71    pub prefix: Vec<CmdPrefix<'a>>,
72    /// Arguments and redirections after the command name.
73    pub suffix: Vec<CmdSuffix<'a>>,
74}
75
76/// A prefix element: variable assignment or redirection.
77#[derive(Debug, Clone, PartialEq, Eq)]
78#[non_exhaustive]
79pub enum CmdPrefix<'a> {
80    /// `NAME=value` — scalar assignment.
81    Assign(&'a str, Option<Word<'a>>),
82    /// `arr=(word ...)` — array assignment.
83    ArrayAssign(&'a str, Vec<Word<'a>>),
84    /// `arr+=(word ...)` — array append.
85    ArrayAppend(&'a str, Vec<Word<'a>>),
86    /// An I/O redirection.
87    Redirect(Redir<'a>),
88}
89
90/// A suffix element: argument word or redirection.
91#[derive(Debug, Clone, PartialEq, Eq)]
92#[non_exhaustive]
93pub enum CmdSuffix<'a> {
94    /// A regular argument word.
95    Word(Word<'a>),
96    /// An I/O redirection.
97    Redirect(Redir<'a>),
98}
99
100// ---------------------------------------------------------------------------
101// Compound commands
102// ---------------------------------------------------------------------------
103
104/// A compound command with optional trailing redirections.
105#[derive(Debug, Clone, PartialEq, Eq)]
106pub struct CompoundCmd<'a> {
107    /// The compound command body.
108    pub kind: CompoundKind<'a>,
109    /// Redirections applied to the entire compound command.
110    pub redirects: Vec<Redir<'a>>,
111}
112
113/// The body of a compound command.
114#[derive(Debug, Clone, PartialEq, Eq)]
115#[non_exhaustive]
116pub enum CompoundKind<'a> {
117    /// `for var [in words]; do body; done`
118    For {
119        /// Loop variable name.
120        var: &'a str,
121        /// Word list (None = `"$@"`).
122        words: Option<Vec<Word<'a>>>,
123        /// Loop body commands.
124        body: Vec<Cmd<'a>>,
125    },
126    /// `while guard; do body; done`
127    While(GuardBody<'a>),
128    /// `until guard; do body; done`
129    Until(GuardBody<'a>),
130    /// `if cond; then body; [elif ...;] [else ...;] fi`
131    If {
132        /// Condition–body pairs (first is `if`, rest are `elif`).
133        conditionals: Vec<GuardBody<'a>>,
134        /// Optional `else` branch.
135        else_branch: Option<Vec<Cmd<'a>>>,
136    },
137    /// `case word in pattern) body;; ... esac`
138    Case {
139        /// The word being matched.
140        word: Word<'a>,
141        /// Pattern–body arms.
142        arms: Vec<CaseArm<'a>>,
143    },
144    /// C-style for loop: `for (( init; cond; step )); do body; done`
145    CFor {
146        /// Initialization expression.
147        init: Option<Arith<'a>>,
148        /// Condition expression.
149        cond: Option<Arith<'a>>,
150        /// Step expression.
151        step: Option<Arith<'a>>,
152        /// Loop body commands.
153        body: Vec<Cmd<'a>>,
154    },
155    /// `{ body; }` — brace group.
156    Brace(Vec<Cmd<'a>>),
157    /// `( body )` — subshell.
158    Subshell(Vec<Cmd<'a>>),
159    /// `[[ expression ]]` — extended test command.
160    DoubleBracket(Vec<Cmd<'a>>),
161    /// `(( expression ))` — arithmetic command.
162    Arithmetic(Arith<'a>),
163}
164
165/// A guard (condition) and body pair, used by `while`, `until`, and `if`.
166#[derive(Debug, Clone, PartialEq, Eq)]
167pub struct GuardBody<'a> {
168    /// The condition commands.
169    pub guard: Vec<Cmd<'a>>,
170    /// The body commands.
171    pub body: Vec<Cmd<'a>>,
172}
173
174/// A single arm in a `case` statement.
175#[derive(Debug, Clone, PartialEq, Eq)]
176pub struct CaseArm<'a> {
177    /// Patterns to match against (separated by `|`).
178    pub patterns: Vec<Word<'a>>,
179    /// Commands to execute if a pattern matches.
180    pub body: Vec<Cmd<'a>>,
181}
182
183// ---------------------------------------------------------------------------
184// Words
185// ---------------------------------------------------------------------------
186
187/// A shell word: either a single part or a concatenation of parts.
188#[derive(Debug, Clone, PartialEq, Eq)]
189#[non_exhaustive]
190pub enum Word<'a> {
191    /// A word consisting of a single part.
192    Simple(WordPart<'a>),
193    /// A word formed by concatenating multiple parts (e.g., `"hello"$var`).
194    Concat(Vec<WordPart<'a>>),
195}
196
197/// A fragment of a word: bare text, quoted text, or a substitution.
198#[derive(Debug, Clone, PartialEq, Eq)]
199#[non_exhaustive]
200pub enum WordPart<'a> {
201    /// Unquoted content.
202    Bare(Atom<'a>),
203    /// Double-quoted content (may contain expansions).
204    DQuoted(Vec<Atom<'a>>),
205    /// Single-quoted content (literal text, no expansions).
206    SQuoted(&'a str),
207}
208
209/// An atomic element within a word: literal text, expansion, or glob.
210#[derive(Debug, Clone, PartialEq, Eq)]
211#[non_exhaustive]
212pub enum Atom<'a> {
213    /// Literal text.
214    Lit(&'a str),
215    /// Backslash-escaped character.
216    Escaped(Cow<'a, str>),
217    /// Parameter reference (`$var`, `$1`, `$@`, etc.).
218    Param(Param<'a>),
219    /// Substitution (`$(cmd)`, `${var...}`, `$((expr))`).
220    Subst(Box<Subst<'a>>),
221    /// `*` glob wildcard.
222    Star,
223    /// `?` glob wildcard.
224    Question,
225    /// `[` glob bracket open.
226    SquareOpen,
227    /// `]` glob bracket close.
228    SquareClose,
229    /// `~` tilde expansion.
230    Tilde,
231    /// `<(cmd)` — process substitution (input).
232    ProcSubIn(Vec<Cmd<'a>>),
233    /// ANSI-C `$'...'` — raw content between the quotes (escape sequences unresolved).
234    AnsiCQuoted(&'a str),
235    /// Brace range expansion: `{start..end[..step]}`.
236    BraceRange {
237        /// Range start value.
238        start: &'a str,
239        /// Range end value.
240        end: &'a str,
241        /// Optional step value.
242        step: Option<&'a str>,
243    },
244}
245
246// ---------------------------------------------------------------------------
247// Parameters
248// ---------------------------------------------------------------------------
249
250/// A shell parameter reference.
251#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
252#[non_exhaustive]
253pub enum Param<'a> {
254    /// Named variable (`$var`).
255    Var(&'a str),
256    /// Positional parameter (`$1`, `$2`, ...).
257    Positional(u32),
258    /// `$@` — all positional parameters (separate words).
259    At,
260    /// `$*` — all positional parameters (single word).
261    Star,
262    /// `$#` — number of positional parameters.
263    Pound,
264    /// `$?` — exit status of last command.
265    Status,
266    /// `$$` — process ID.
267    Pid,
268    /// `$!` — PID of last background process.
269    Bang,
270    /// `$-` — current shell option flags.
271    Dash,
272}
273
274// ---------------------------------------------------------------------------
275// Substitutions
276// ---------------------------------------------------------------------------
277
278/// A substitution or parameter expansion.
279#[derive(Debug, Clone, PartialEq, Eq)]
280#[non_exhaustive]
281pub enum Subst<'a> {
282    /// Command substitution: `$(cmd)` or `` `cmd` ``.
283    Cmd(Vec<Cmd<'a>>),
284    /// Arithmetic expansion: `$((expr))`.
285    Arith(Option<Arith<'a>>),
286    /// String length: `${#var}`.
287    Len(Param<'a>),
288    /// `${!var}` — indirect variable expansion.
289    Indirect(&'a str),
290    /// `${!prefix*}` / `${!prefix@}` — list variables matching prefix.
291    PrefixList(&'a str),
292    /// `${var@Q}` — parameter transformation (quoting).
293    Transform(&'a str, u8),
294    /// `${var:-word}` / `${var-word}` — default value.
295    Default(Param<'a>, Option<Word<'a>>),
296    /// `${var:=word}` / `${var=word}` — assign default.
297    Assign(Param<'a>, Option<Word<'a>>),
298    /// `${var:?word}` / `${var?word}` — error if unset.
299    Error(Param<'a>, Option<Word<'a>>),
300    /// `${var:+word}` / `${var+word}` — alternate value.
301    Alt(Param<'a>, Option<Word<'a>>),
302    /// `${var%pattern}` — remove shortest suffix match.
303    TrimSuffixSmall(Param<'a>, Option<Word<'a>>),
304    /// `${var%%pattern}` — remove longest suffix match.
305    TrimSuffixLarge(Param<'a>, Option<Word<'a>>),
306    /// `${var#pattern}` — remove shortest prefix match.
307    TrimPrefixSmall(Param<'a>, Option<Word<'a>>),
308    /// `${var##pattern}` — remove longest prefix match.
309    TrimPrefixLarge(Param<'a>, Option<Word<'a>>),
310    /// `${var/pattern/replacement}` — replace first match.
311    Replace(Param<'a>, Option<Word<'a>>, Option<Word<'a>>),
312    /// `${var//pattern/replacement}` — replace all matches.
313    ReplaceAll(Param<'a>, Option<Word<'a>>, Option<Word<'a>>),
314    /// `${var/#pattern/replacement}` — replace prefix match.
315    ReplacePrefix(Param<'a>, Option<Word<'a>>, Option<Word<'a>>),
316    /// `${var/%pattern/replacement}` — replace suffix match.
317    ReplaceSuffix(Param<'a>, Option<Word<'a>>, Option<Word<'a>>),
318    /// `${var:offset:length}` — substring extraction.
319    Substring(Param<'a>, &'a str, Option<&'a str>),
320    /// `${var^}` / `${var^^}` — uppercase (bool: all if true).
321    Upper(bool, Param<'a>),
322    /// `${var,}` / `${var,,}` — lowercase (bool: all if true).
323    Lower(bool, Param<'a>),
324    /// `${arr[index]}` — array element access (index is a Word for $((expr)) support).
325    ArrayElement(&'a str, Word<'a>),
326    /// `${arr[@]}` or `${arr[*]}` — all array elements.
327    ArrayAll(&'a str),
328    /// `${#arr[@]}` — array length.
329    ArrayLen(&'a str),
330    /// `${arr[@]:offset:length}` — array slice.
331    ArraySlice(&'a str, &'a str, Option<&'a str>),
332}
333
334// ---------------------------------------------------------------------------
335// Arithmetic
336// ---------------------------------------------------------------------------
337
338/// An arithmetic expression node (used in `$(( ))`, `(( ))`, and C-style for).
339#[derive(Debug, Clone, PartialEq, Eq)]
340#[non_exhaustive]
341pub enum Arith<'a> {
342    /// Variable reference.
343    Var(&'a str),
344    /// Integer literal.
345    Lit(i64),
346
347    /// Addition.
348    Add(Box<Arith<'a>>, Box<Arith<'a>>),
349    /// Subtraction.
350    Sub(Box<Arith<'a>>, Box<Arith<'a>>),
351    /// Multiplication.
352    Mul(Box<Arith<'a>>, Box<Arith<'a>>),
353    /// Division.
354    Div(Box<Arith<'a>>, Box<Arith<'a>>),
355    /// Modulo.
356    Rem(Box<Arith<'a>>, Box<Arith<'a>>),
357    /// Exponentiation.
358    Pow(Box<Arith<'a>>, Box<Arith<'a>>),
359
360    /// Less than.
361    Lt(Box<Arith<'a>>, Box<Arith<'a>>),
362    /// Less than or equal.
363    Le(Box<Arith<'a>>, Box<Arith<'a>>),
364    /// Greater than.
365    Gt(Box<Arith<'a>>, Box<Arith<'a>>),
366    /// Greater than or equal.
367    Ge(Box<Arith<'a>>, Box<Arith<'a>>),
368    /// Equal.
369    Eq(Box<Arith<'a>>, Box<Arith<'a>>),
370    /// Not equal.
371    Ne(Box<Arith<'a>>, Box<Arith<'a>>),
372
373    /// Bitwise AND.
374    BitAnd(Box<Arith<'a>>, Box<Arith<'a>>),
375    /// Bitwise OR.
376    BitOr(Box<Arith<'a>>, Box<Arith<'a>>),
377    /// Bitwise XOR.
378    BitXor(Box<Arith<'a>>, Box<Arith<'a>>),
379    /// Logical AND.
380    LogAnd(Box<Arith<'a>>, Box<Arith<'a>>),
381    /// Logical OR.
382    LogOr(Box<Arith<'a>>, Box<Arith<'a>>),
383    /// Left shift.
384    Shl(Box<Arith<'a>>, Box<Arith<'a>>),
385    /// Right shift.
386    Shr(Box<Arith<'a>>, Box<Arith<'a>>),
387
388    /// Unary plus.
389    Pos(Box<Arith<'a>>),
390    /// Unary minus.
391    Neg(Box<Arith<'a>>),
392    /// Logical NOT.
393    LogNot(Box<Arith<'a>>),
394    /// Bitwise NOT.
395    BitNot(Box<Arith<'a>>),
396
397    /// Pre-increment (`++var`).
398    PreInc(&'a str),
399    /// Post-increment (`var++`).
400    PostInc(&'a str),
401    /// Pre-decrement (`--var`).
402    PreDec(&'a str),
403    /// Post-decrement (`var--`).
404    PostDec(&'a str),
405
406    /// Ternary operator (`cond ? then : else`).
407    Ternary(Box<Arith<'a>>, Box<Arith<'a>>, Box<Arith<'a>>),
408    /// Assignment (`var = expr`).
409    Assign(&'a str, Box<Arith<'a>>),
410}
411
412// ---------------------------------------------------------------------------
413// Heredoc body
414// ---------------------------------------------------------------------------
415
416/// The body of a heredoc (here-document).
417#[derive(Debug, Clone, PartialEq, Eq)]
418#[non_exhaustive]
419pub enum HeredocBody<'a> {
420    /// Quoted delimiter — no expansion (literal text).
421    Literal(&'a str),
422    /// Unquoted delimiter — variable and command expansion.
423    Interpolated(Vec<Atom<'a>>),
424}
425
426// ---------------------------------------------------------------------------
427// Redirects
428// ---------------------------------------------------------------------------
429
430/// An I/O redirection.
431#[derive(Debug, Clone, PartialEq, Eq)]
432#[non_exhaustive]
433pub enum Redir<'a> {
434    /// `[n]< word` — read from file.
435    Read(Option<u16>, Word<'a>),
436    /// `[n]> word` — write to file.
437    Write(Option<u16>, Word<'a>),
438    /// `[n]>> word` — append to file.
439    Append(Option<u16>, Word<'a>),
440    /// `[n]<> word` — open for reading and writing.
441    ReadWrite(Option<u16>, Word<'a>),
442    /// `[n]>| word` — write, overriding noclobber.
443    Clobber(Option<u16>, Word<'a>),
444    /// `[n]<& word` — duplicate input fd.
445    DupRead(Option<u16>, Word<'a>),
446    /// `[n]>& word` — duplicate output fd.
447    DupWrite(Option<u16>, Word<'a>),
448    /// `<<< word` — here-string.
449    HereString(Word<'a>),
450    /// `<< [-]DELIM ... DELIM` — here-document.
451    Heredoc(HeredocBody<'a>),
452    /// `&> word` — redirect both stdout and stderr.
453    WriteAll(Word<'a>),
454    /// `&>> word` — append both stdout and stderr.
455    AppendAll(Word<'a>),
456}