reef/ast.rs
1//! Abstract syntax tree for bash commands.
2//!
3//! All AST nodes borrow from the input string (`&'a str`) — zero-copy.
4//! The parser produces a `Vec<Cmd<'a>>` representing the top-level command list.
5
6use std::borrow::Cow;
7
8// ---------------------------------------------------------------------------
9// Commands
10// ---------------------------------------------------------------------------
11
12/// A complete command — foreground or background.
13#[derive(Debug, Clone, PartialEq, Eq)]
14#[non_exhaustive]
15pub enum Cmd<'a> {
16 /// A foreground command list.
17 List(AndOrList<'a>),
18 /// A background job (`cmd &`).
19 Job(AndOrList<'a>),
20}
21
22/// A chain of commands connected by `&&` and `||`.
23#[derive(Debug, Clone, PartialEq, Eq)]
24pub struct AndOrList<'a> {
25 /// The first pipeline in the chain.
26 pub first: Pipeline<'a>,
27 /// Subsequent `&&` / `||` pipelines.
28 pub rest: Vec<AndOr<'a>>,
29}
30
31/// A single `&&` or `||` link in an and-or chain.
32#[derive(Debug, Clone, PartialEq, Eq)]
33#[non_exhaustive]
34pub enum AndOr<'a> {
35 /// `&&` — run if the previous succeeded.
36 And(Pipeline<'a>),
37 /// `||` — run if the previous failed.
38 Or(Pipeline<'a>),
39}
40
41/// A pipeline: one or more commands connected by `|`.
42#[derive(Debug, Clone, PartialEq, Eq)]
43#[non_exhaustive]
44pub enum Pipeline<'a> {
45 /// A single command (no pipe).
46 Single(Executable<'a>),
47 /// `[!] cmd1 | cmd2 | ...` — bool is true if negated.
48 Pipe(bool, Vec<Executable<'a>>),
49}
50
51/// An executable unit: simple command, compound command, or function definition.
52#[derive(Debug, Clone, PartialEq, Eq)]
53#[non_exhaustive]
54pub enum Executable<'a> {
55 /// A simple command (possibly with assignments and redirections).
56 Simple(SimpleCmd<'a>),
57 /// A compound command (`if`, `for`, `while`, etc.).
58 Compound(CompoundCmd<'a>),
59 /// A function definition: `name() { body; }`.
60 FuncDef(&'a str, CompoundCmd<'a>),
61}
62
63// ---------------------------------------------------------------------------
64// Simple command
65// ---------------------------------------------------------------------------
66
67/// A simple command with optional prefix assignments and suffix words/redirects.
68#[derive(Debug, Clone, PartialEq, Eq)]
69pub struct SimpleCmd<'a> {
70 /// Assignments and redirections before the command name.
71 pub prefix: Vec<CmdPrefix<'a>>,
72 /// Arguments and redirections after the command name.
73 pub suffix: Vec<CmdSuffix<'a>>,
74}
75
76/// A prefix element: variable assignment or redirection.
77#[derive(Debug, Clone, PartialEq, Eq)]
78#[non_exhaustive]
79pub enum CmdPrefix<'a> {
80 /// `NAME=value` — scalar assignment.
81 Assign(&'a str, Option<Word<'a>>),
82 /// `arr=(word ...)` — array assignment.
83 ArrayAssign(&'a str, Vec<Word<'a>>),
84 /// `arr+=(word ...)` — array append.
85 ArrayAppend(&'a str, Vec<Word<'a>>),
86 /// An I/O redirection.
87 Redirect(Redir<'a>),
88}
89
90/// A suffix element: argument word or redirection.
91#[derive(Debug, Clone, PartialEq, Eq)]
92#[non_exhaustive]
93pub enum CmdSuffix<'a> {
94 /// A regular argument word.
95 Word(Word<'a>),
96 /// An I/O redirection.
97 Redirect(Redir<'a>),
98}
99
100// ---------------------------------------------------------------------------
101// Compound commands
102// ---------------------------------------------------------------------------
103
104/// A compound command with optional trailing redirections.
105#[derive(Debug, Clone, PartialEq, Eq)]
106pub struct CompoundCmd<'a> {
107 /// The compound command body.
108 pub kind: CompoundKind<'a>,
109 /// Redirections applied to the entire compound command.
110 pub redirects: Vec<Redir<'a>>,
111}
112
113/// The body of a compound command.
114#[derive(Debug, Clone, PartialEq, Eq)]
115#[non_exhaustive]
116pub enum CompoundKind<'a> {
117 /// `for var [in words]; do body; done`
118 For {
119 /// Loop variable name.
120 var: &'a str,
121 /// Word list (None = `"$@"`).
122 words: Option<Vec<Word<'a>>>,
123 /// Loop body commands.
124 body: Vec<Cmd<'a>>,
125 },
126 /// `while guard; do body; done`
127 While(GuardBody<'a>),
128 /// `until guard; do body; done`
129 Until(GuardBody<'a>),
130 /// `if cond; then body; [elif ...;] [else ...;] fi`
131 If {
132 /// Condition–body pairs (first is `if`, rest are `elif`).
133 conditionals: Vec<GuardBody<'a>>,
134 /// Optional `else` branch.
135 else_branch: Option<Vec<Cmd<'a>>>,
136 },
137 /// `case word in pattern) body;; ... esac`
138 Case {
139 /// The word being matched.
140 word: Word<'a>,
141 /// Pattern–body arms.
142 arms: Vec<CaseArm<'a>>,
143 },
144 /// C-style for loop: `for (( init; cond; step )); do body; done`
145 CFor {
146 /// Initialization expression.
147 init: Option<Arith<'a>>,
148 /// Condition expression.
149 cond: Option<Arith<'a>>,
150 /// Step expression.
151 step: Option<Arith<'a>>,
152 /// Loop body commands.
153 body: Vec<Cmd<'a>>,
154 },
155 /// `{ body; }` — brace group.
156 Brace(Vec<Cmd<'a>>),
157 /// `( body )` — subshell.
158 Subshell(Vec<Cmd<'a>>),
159 /// `[[ expression ]]` — extended test command.
160 DoubleBracket(Vec<Cmd<'a>>),
161 /// `(( expression ))` — arithmetic command.
162 Arithmetic(Arith<'a>),
163}
164
165/// A guard (condition) and body pair, used by `while`, `until`, and `if`.
166#[derive(Debug, Clone, PartialEq, Eq)]
167pub struct GuardBody<'a> {
168 /// The condition commands.
169 pub guard: Vec<Cmd<'a>>,
170 /// The body commands.
171 pub body: Vec<Cmd<'a>>,
172}
173
174/// A single arm in a `case` statement.
175#[derive(Debug, Clone, PartialEq, Eq)]
176pub struct CaseArm<'a> {
177 /// Patterns to match against (separated by `|`).
178 pub patterns: Vec<Word<'a>>,
179 /// Commands to execute if a pattern matches.
180 pub body: Vec<Cmd<'a>>,
181}
182
183// ---------------------------------------------------------------------------
184// Words
185// ---------------------------------------------------------------------------
186
187/// A shell word: either a single part or a concatenation of parts.
188#[derive(Debug, Clone, PartialEq, Eq)]
189#[non_exhaustive]
190pub enum Word<'a> {
191 /// A word consisting of a single part.
192 Simple(WordPart<'a>),
193 /// A word formed by concatenating multiple parts (e.g., `"hello"$var`).
194 Concat(Vec<WordPart<'a>>),
195}
196
197/// A fragment of a word: bare text, quoted text, or a substitution.
198#[derive(Debug, Clone, PartialEq, Eq)]
199#[non_exhaustive]
200pub enum WordPart<'a> {
201 /// Unquoted content.
202 Bare(Atom<'a>),
203 /// Double-quoted content (may contain expansions).
204 DQuoted(Vec<Atom<'a>>),
205 /// Single-quoted content (literal text, no expansions).
206 SQuoted(&'a str),
207}
208
209/// An atomic element within a word: literal text, expansion, or glob.
210#[derive(Debug, Clone, PartialEq, Eq)]
211#[non_exhaustive]
212pub enum Atom<'a> {
213 /// Literal text.
214 Lit(&'a str),
215 /// Backslash-escaped character.
216 Escaped(Cow<'a, str>),
217 /// Parameter reference (`$var`, `$1`, `$@`, etc.).
218 Param(Param<'a>),
219 /// Substitution (`$(cmd)`, `${var...}`, `$((expr))`).
220 Subst(Box<Subst<'a>>),
221 /// `*` glob wildcard.
222 Star,
223 /// `?` glob wildcard.
224 Question,
225 /// `[` glob bracket open.
226 SquareOpen,
227 /// `]` glob bracket close.
228 SquareClose,
229 /// `~` tilde expansion.
230 Tilde,
231 /// `<(cmd)` — process substitution (input).
232 ProcSubIn(Vec<Cmd<'a>>),
233 /// ANSI-C `$'...'` — raw content between the quotes (escape sequences unresolved).
234 AnsiCQuoted(&'a str),
235 /// Brace range expansion: `{start..end[..step]}`.
236 BraceRange {
237 /// Range start value.
238 start: &'a str,
239 /// Range end value.
240 end: &'a str,
241 /// Optional step value.
242 step: Option<&'a str>,
243 },
244}
245
246// ---------------------------------------------------------------------------
247// Parameters
248// ---------------------------------------------------------------------------
249
250/// A shell parameter reference.
251#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
252#[non_exhaustive]
253pub enum Param<'a> {
254 /// Named variable (`$var`).
255 Var(&'a str),
256 /// Positional parameter (`$1`, `$2`, ...).
257 Positional(u32),
258 /// `$@` — all positional parameters (separate words).
259 At,
260 /// `$*` — all positional parameters (single word).
261 Star,
262 /// `$#` — number of positional parameters.
263 Pound,
264 /// `$?` — exit status of last command.
265 Status,
266 /// `$$` — process ID.
267 Pid,
268 /// `$!` — PID of last background process.
269 Bang,
270 /// `$-` — current shell option flags.
271 Dash,
272}
273
274// ---------------------------------------------------------------------------
275// Substitutions
276// ---------------------------------------------------------------------------
277
278/// A substitution or parameter expansion.
279#[derive(Debug, Clone, PartialEq, Eq)]
280#[non_exhaustive]
281pub enum Subst<'a> {
282 /// Command substitution: `$(cmd)` or `` `cmd` ``.
283 Cmd(Vec<Cmd<'a>>),
284 /// Arithmetic expansion: `$((expr))`.
285 Arith(Option<Arith<'a>>),
286 /// String length: `${#var}`.
287 Len(Param<'a>),
288 /// `${!var}` — indirect variable expansion.
289 Indirect(&'a str),
290 /// `${!prefix*}` / `${!prefix@}` — list variables matching prefix.
291 PrefixList(&'a str),
292 /// `${var@Q}` — parameter transformation (quoting).
293 Transform(&'a str, u8),
294 /// `${var:-word}` / `${var-word}` — default value.
295 Default(Param<'a>, Option<Word<'a>>),
296 /// `${var:=word}` / `${var=word}` — assign default.
297 Assign(Param<'a>, Option<Word<'a>>),
298 /// `${var:?word}` / `${var?word}` — error if unset.
299 Error(Param<'a>, Option<Word<'a>>),
300 /// `${var:+word}` / `${var+word}` — alternate value.
301 Alt(Param<'a>, Option<Word<'a>>),
302 /// `${var%pattern}` — remove shortest suffix match.
303 TrimSuffixSmall(Param<'a>, Option<Word<'a>>),
304 /// `${var%%pattern}` — remove longest suffix match.
305 TrimSuffixLarge(Param<'a>, Option<Word<'a>>),
306 /// `${var#pattern}` — remove shortest prefix match.
307 TrimPrefixSmall(Param<'a>, Option<Word<'a>>),
308 /// `${var##pattern}` — remove longest prefix match.
309 TrimPrefixLarge(Param<'a>, Option<Word<'a>>),
310 /// `${var/pattern/replacement}` — replace first match.
311 Replace(Param<'a>, Option<Word<'a>>, Option<Word<'a>>),
312 /// `${var//pattern/replacement}` — replace all matches.
313 ReplaceAll(Param<'a>, Option<Word<'a>>, Option<Word<'a>>),
314 /// `${var/#pattern/replacement}` — replace prefix match.
315 ReplacePrefix(Param<'a>, Option<Word<'a>>, Option<Word<'a>>),
316 /// `${var/%pattern/replacement}` — replace suffix match.
317 ReplaceSuffix(Param<'a>, Option<Word<'a>>, Option<Word<'a>>),
318 /// `${var:offset:length}` — substring extraction.
319 Substring(Param<'a>, &'a str, Option<&'a str>),
320 /// `${var^}` / `${var^^}` — uppercase (bool: all if true).
321 Upper(bool, Param<'a>),
322 /// `${var,}` / `${var,,}` — lowercase (bool: all if true).
323 Lower(bool, Param<'a>),
324 /// `${arr[index]}` — array element access (index is a Word for $((expr)) support).
325 ArrayElement(&'a str, Word<'a>),
326 /// `${arr[@]}` or `${arr[*]}` — all array elements.
327 ArrayAll(&'a str),
328 /// `${#arr[@]}` — array length.
329 ArrayLen(&'a str),
330 /// `${arr[@]:offset:length}` — array slice.
331 ArraySlice(&'a str, &'a str, Option<&'a str>),
332}
333
334// ---------------------------------------------------------------------------
335// Arithmetic
336// ---------------------------------------------------------------------------
337
338/// An arithmetic expression node (used in `$(( ))`, `(( ))`, and C-style for).
339#[derive(Debug, Clone, PartialEq, Eq)]
340#[non_exhaustive]
341pub enum Arith<'a> {
342 /// Variable reference.
343 Var(&'a str),
344 /// Integer literal.
345 Lit(i64),
346
347 /// Addition.
348 Add(Box<Arith<'a>>, Box<Arith<'a>>),
349 /// Subtraction.
350 Sub(Box<Arith<'a>>, Box<Arith<'a>>),
351 /// Multiplication.
352 Mul(Box<Arith<'a>>, Box<Arith<'a>>),
353 /// Division.
354 Div(Box<Arith<'a>>, Box<Arith<'a>>),
355 /// Modulo.
356 Rem(Box<Arith<'a>>, Box<Arith<'a>>),
357 /// Exponentiation.
358 Pow(Box<Arith<'a>>, Box<Arith<'a>>),
359
360 /// Less than.
361 Lt(Box<Arith<'a>>, Box<Arith<'a>>),
362 /// Less than or equal.
363 Le(Box<Arith<'a>>, Box<Arith<'a>>),
364 /// Greater than.
365 Gt(Box<Arith<'a>>, Box<Arith<'a>>),
366 /// Greater than or equal.
367 Ge(Box<Arith<'a>>, Box<Arith<'a>>),
368 /// Equal.
369 Eq(Box<Arith<'a>>, Box<Arith<'a>>),
370 /// Not equal.
371 Ne(Box<Arith<'a>>, Box<Arith<'a>>),
372
373 /// Bitwise AND.
374 BitAnd(Box<Arith<'a>>, Box<Arith<'a>>),
375 /// Bitwise OR.
376 BitOr(Box<Arith<'a>>, Box<Arith<'a>>),
377 /// Bitwise XOR.
378 BitXor(Box<Arith<'a>>, Box<Arith<'a>>),
379 /// Logical AND.
380 LogAnd(Box<Arith<'a>>, Box<Arith<'a>>),
381 /// Logical OR.
382 LogOr(Box<Arith<'a>>, Box<Arith<'a>>),
383 /// Left shift.
384 Shl(Box<Arith<'a>>, Box<Arith<'a>>),
385 /// Right shift.
386 Shr(Box<Arith<'a>>, Box<Arith<'a>>),
387
388 /// Unary plus.
389 Pos(Box<Arith<'a>>),
390 /// Unary minus.
391 Neg(Box<Arith<'a>>),
392 /// Logical NOT.
393 LogNot(Box<Arith<'a>>),
394 /// Bitwise NOT.
395 BitNot(Box<Arith<'a>>),
396
397 /// Pre-increment (`++var`).
398 PreInc(&'a str),
399 /// Post-increment (`var++`).
400 PostInc(&'a str),
401 /// Pre-decrement (`--var`).
402 PreDec(&'a str),
403 /// Post-decrement (`var--`).
404 PostDec(&'a str),
405
406 /// Ternary operator (`cond ? then : else`).
407 Ternary(Box<Arith<'a>>, Box<Arith<'a>>, Box<Arith<'a>>),
408 /// Assignment (`var = expr`).
409 Assign(&'a str, Box<Arith<'a>>),
410}
411
412// ---------------------------------------------------------------------------
413// Heredoc body
414// ---------------------------------------------------------------------------
415
416/// The body of a heredoc (here-document).
417#[derive(Debug, Clone, PartialEq, Eq)]
418#[non_exhaustive]
419pub enum HeredocBody<'a> {
420 /// Quoted delimiter — no expansion (literal text).
421 Literal(&'a str),
422 /// Unquoted delimiter — variable and command expansion.
423 Interpolated(Vec<Atom<'a>>),
424}
425
426// ---------------------------------------------------------------------------
427// Redirects
428// ---------------------------------------------------------------------------
429
430/// An I/O redirection.
431#[derive(Debug, Clone, PartialEq, Eq)]
432#[non_exhaustive]
433pub enum Redir<'a> {
434 /// `[n]< word` — read from file.
435 Read(Option<u16>, Word<'a>),
436 /// `[n]> word` — write to file.
437 Write(Option<u16>, Word<'a>),
438 /// `[n]>> word` — append to file.
439 Append(Option<u16>, Word<'a>),
440 /// `[n]<> word` — open for reading and writing.
441 ReadWrite(Option<u16>, Word<'a>),
442 /// `[n]>| word` — write, overriding noclobber.
443 Clobber(Option<u16>, Word<'a>),
444 /// `[n]<& word` — duplicate input fd.
445 DupRead(Option<u16>, Word<'a>),
446 /// `[n]>& word` — duplicate output fd.
447 DupWrite(Option<u16>, Word<'a>),
448 /// `<<< word` — here-string.
449 HereString(Word<'a>),
450 /// `<< [-]DELIM ... DELIM` — here-document.
451 Heredoc(HeredocBody<'a>),
452 /// `&> word` — redirect both stdout and stderr.
453 WriteAll(Word<'a>),
454 /// `&>> word` — append both stdout and stderr.
455 AppendAll(Word<'a>),
456}