conch_parser/ast/builder/mod.rs
1//! Defines an interfaces to receive parse data and construct ASTs.
2//!
3//! This allows the parser to remain agnostic of the required source
4//! representation, and frees up the library user to substitute their own.
5//! If one does not require a custom AST representation, this module offers
6//! a reasonable default builder implementation.
7//!
8//! If a custom AST representation is required you will need to implement
9//! the `Builder` trait for your AST. Otherwise you can provide the `DefaultBuilder`
10//! struct to the parser if you wish to use the default AST implementation.
11
12use ast::{AndOr, DefaultArithmetic, DefaultParameter, RedirectOrCmdWord, RedirectOrEnvVar};
13
14mod default_builder;
15mod empty_builder;
16
17pub use self::default_builder::*;
18pub use self::empty_builder::EmptyBuilder;
19
20/// An indicator to the builder of how complete commands are separated.
21#[derive(Debug, PartialEq, Eq, Clone, Copy)]
22pub enum SeparatorKind {
23 /// A semicolon appears between commands, normally indicating a sequence.
24 Semi,
25 /// An ampersand appears between commands, normally indicating an asyncronous job.
26 Amp,
27 /// A newline (and possibly a comment) appears at the end of a command before the next.
28 Newline,
29 /// The command was delimited by a token (e.g. a compound command delimiter) or
30 /// the end of input, but is *not* followed by another sequential command.
31 Other,
32}
33
34/// An indicator to the builder whether a `while` or `until` command was parsed.
35#[derive(Debug, PartialEq, Eq, Copy, Clone)]
36pub enum LoopKind {
37 /// A `while` command was parsed, normally indicating the loop's body should be run
38 /// while the guard's exit status is successful.
39 While,
40 /// An `until` command was parsed, normally indicating the loop's body should be run
41 /// until the guard's exit status becomes successful.
42 Until,
43}
44
45/// A grouping of a list of commands and any comments trailing after the commands.
46#[derive(Debug, PartialEq, Eq, Clone)]
47pub struct CommandGroup<C> {
48 /// The sequential list of commands.
49 pub commands: Vec<C>,
50 /// Any trailing comments appearing on the next line after the last command.
51 pub trailing_comments: Vec<Newline>,
52}
53
54/// A grouping of guard and body commands, and any comments they may have.
55#[derive(Debug, PartialEq, Eq, Clone)]
56pub struct GuardBodyPairGroup<C> {
57 /// The guard commands, which if successful, should lead to the
58 /// execution of the body commands.
59 pub guard: CommandGroup<C>,
60 /// The body commands to execute if the guard is successful.
61 pub body: CommandGroup<C>,
62}
63
64/// Parsed fragments relating to a shell `if` command.
65#[derive(Debug, PartialEq, Eq, Clone)]
66pub struct IfFragments<C> {
67 /// A list of conditionals branches.
68 pub conditionals: Vec<GuardBodyPairGroup<C>>,
69 /// The `else` branch, if any,
70 pub else_branch: Option<CommandGroup<C>>,
71}
72
73/// Parsed fragments relating to a shell `for` command.
74#[derive(Debug, PartialEq, Eq, Clone)]
75pub struct ForFragments<W, C> {
76 /// The name of the variable to which each of the words will be bound.
77 pub var: String,
78 /// A comment that begins on the same line as the variable declaration.
79 pub var_comment: Option<Newline>,
80 /// Any comments after the variable declaration, a group of words to
81 /// iterator over, and comment defined on the same line as the words.
82 pub words: Option<(Vec<Newline>, Vec<W>, Option<Newline>)>,
83 /// Any comments that appear after the `words` declaration (if it exists),
84 /// but before the body of commands.
85 pub pre_body_comments: Vec<Newline>,
86 /// The body to be invoked for every iteration.
87 pub body: CommandGroup<C>,
88}
89
90/// Parsed fragments relating to a shell `case` command.
91#[derive(Debug, PartialEq, Eq, Clone)]
92pub struct CaseFragments<W, C> {
93 /// The word to be matched against.
94 pub word: W,
95 /// The comments appearing after the word to match but before the `in` reserved word.
96 pub post_word_comments: Vec<Newline>,
97 /// A comment appearing immediately after the `in` reserved word,
98 /// yet still on the same line.
99 pub in_comment: Option<Newline>,
100 /// All the possible branches of the `case` command.
101 pub arms: Vec<CaseArm<W, C>>,
102 /// The comments appearing after the last arm but before the `esac` reserved word.
103 pub post_arms_comments: Vec<Newline>,
104}
105
106/// An individual "unit of execution" within a `case` command.
107///
108/// Each arm has a number of pattern alternatives, and a body
109/// of commands to run if any pattern matches.
110#[derive(Debug, PartialEq, Eq, Clone)]
111pub struct CaseArm<W, C> {
112 /// The patterns which correspond to this case arm.
113 pub patterns: CasePatternFragments<W>,
114 /// The body of commands to run if any pattern matches.
115 pub body: CommandGroup<C>,
116 /// A comment appearing at the end of the arm declaration,
117 /// i.e. after `;;` but on the same line.
118 pub arm_comment: Option<Newline>,
119}
120
121/// Parsed fragments relating to patterns in a shell `case` command.
122#[derive(Debug, PartialEq, Eq, Clone)]
123pub struct CasePatternFragments<W> {
124 /// Comments appearing after a previous arm, but before the start of a pattern.
125 pub pre_pattern_comments: Vec<Newline>,
126 /// Pattern alternatives which all correspond to the same case arm.
127 pub pattern_alternatives: Vec<W>,
128 /// A comment appearing at the end of the pattern declaration on the same line.
129 pub pattern_comment: Option<Newline>,
130}
131
132/// An indicator to the builder what kind of complex word was parsed.
133#[derive(Debug, PartialEq, Eq, Clone)]
134pub enum ComplexWordKind<C> {
135 /// Several distinct words concatenated together.
136 Concat(Vec<WordKind<C>>),
137 /// A regular word.
138 Single(WordKind<C>),
139}
140
141/// An indicator to the builder what kind of word was parsed.
142#[derive(Debug, PartialEq, Eq, Clone)]
143pub enum WordKind<C> {
144 /// A regular word.
145 Simple(SimpleWordKind<C>),
146 /// List of words concatenated within double quotes.
147 DoubleQuoted(Vec<SimpleWordKind<C>>),
148 /// List of words concatenated within single quotes. Virtually
149 /// identical as a literal, but makes a distinction between the two.
150 SingleQuoted(String),
151}
152
153/// An indicator to the builder what kind of simple word was parsed.
154#[derive(Debug, PartialEq, Eq, Clone)]
155pub enum SimpleWordKind<C> {
156 /// A non-special literal word.
157 Literal(String),
158 /// Access of a value inside a parameter, e.g. `$foo` or `$$`.
159 Param(DefaultParameter),
160 /// A parameter substitution, e.g. `${param-word}`.
161 Subst(Box<ParameterSubstitutionKind<ComplexWordKind<C>, C>>),
162 /// Represents the standard output of some command, e.g. \`echo foo\`.
163 CommandSubst(CommandGroup<C>),
164 /// A token which normally has a special meaning is treated as a literal
165 /// because it was escaped, typically with a backslash, e.g. `\"`.
166 Escaped(String),
167 /// Represents `*`, useful for handling pattern expansions.
168 Star,
169 /// Represents `?`, useful for handling pattern expansions.
170 Question,
171 /// Represents `[`, useful for handling pattern expansions.
172 SquareOpen,
173 /// Represents `]`, useful for handling pattern expansions.
174 SquareClose,
175 /// Represents `~`, useful for handling tilde expansions.
176 Tilde,
177 /// Represents `:`, useful for handling tilde expansions.
178 Colon,
179}
180
181/// Represents redirecting a command's file descriptors.
182#[derive(Debug, Clone, Eq, PartialEq)]
183pub enum RedirectKind<W> {
184 /// Open a file for reading, e.g. `[n]< file`.
185 Read(Option<u16>, W),
186 /// Open a file for writing after truncating, e.g. `[n]> file`.
187 Write(Option<u16>, W),
188 /// Open a file for reading and writing, e.g. `[n]<> file`.
189 ReadWrite(Option<u16>, W),
190 /// Open a file for writing, appending to the end, e.g. `[n]>> file`.
191 Append(Option<u16>, W),
192 /// Open a file for writing, failing if the `noclobber` shell option is set, e.g. `[n]>| file`.
193 Clobber(Option<u16>, W),
194 /// Lines contained in the source that should be provided by as input to a file descriptor.
195 Heredoc(Option<u16>, W),
196 /// Duplicate a file descriptor for reading, e.g. `[n]<& [n|-]`.
197 DupRead(Option<u16>, W),
198 /// Duplicate a file descriptor for writing, e.g. `[n]>& [n|-]`.
199 DupWrite(Option<u16>, W),
200}
201
202/// Represents the type of parameter that was parsed
203#[derive(Debug, PartialEq, Eq, Clone)]
204pub enum ParameterSubstitutionKind<W, C> {
205 /// Returns the standard output of running a command, e.g. `$(cmd)`
206 Command(CommandGroup<C>),
207 /// Returns the length of the value of a parameter, e.g. ${#param}
208 Len(DefaultParameter),
209 /// Returns the resulting value of an arithmetic subsitution, e.g. `$(( x++ ))`
210 Arith(Option<DefaultArithmetic>),
211 /// Use a provided value if the parameter is null or unset, e.g.
212 /// `${param:-[word]}`.
213 /// The boolean indicates the presence of a `:`, and that if the parameter has
214 /// a null value, that situation should be treated as if the parameter is unset.
215 Default(bool, DefaultParameter, Option<W>),
216 /// Assign a provided value to the parameter if it is null or unset,
217 /// e.g. `${param:=[word]}`.
218 /// The boolean indicates the presence of a `:`, and that if the parameter has
219 /// a null value, that situation should be treated as if the parameter is unset.
220 Assign(bool, DefaultParameter, Option<W>),
221 /// If the parameter is null or unset, an error should result with the provided
222 /// message, e.g. `${param:?[word]}`.
223 /// The boolean indicates the presence of a `:`, and that if the parameter has
224 /// a null value, that situation should be treated as if the parameter is unset.
225 Error(bool, DefaultParameter, Option<W>),
226 /// If the parameter is NOT null or unset, a provided word will be used,
227 /// e.g. `${param:+[word]}`.
228 /// The boolean indicates the presence of a `:`, and that if the parameter has
229 /// a null value, that situation should be treated as if the parameter is unset.
230 Alternative(bool, DefaultParameter, Option<W>),
231 /// Remove smallest suffix pattern, e.g. `${param%pattern}`
232 RemoveSmallestSuffix(DefaultParameter, Option<W>),
233 /// Remove largest suffix pattern, e.g. `${param%%pattern}`
234 RemoveLargestSuffix(DefaultParameter, Option<W>),
235 /// Remove smallest prefix pattern, e.g. `${param#pattern}`
236 RemoveSmallestPrefix(DefaultParameter, Option<W>),
237 /// Remove largest prefix pattern, e.g. `${param##pattern}`
238 RemoveLargestPrefix(DefaultParameter, Option<W>),
239}
240
241/// Represents a parsed newline, more specifically, the presense of a comment
242/// immediately preceeding the newline.
243///
244/// Since shell comments are usually treated as a newline, they can be present
245/// anywhere a newline can be as well. Thus if it is desired to retain comments
246/// they can be optionally attached to a parsed newline.
247#[derive(Debug, PartialEq, Eq, Clone)]
248pub struct Newline(pub Option<String>);
249
250/// A trait which defines an interface which the parser defined in the `parse` module
251/// uses to delegate Abstract Syntax Tree creation. The methods defined here correspond
252/// to their respectively named methods on the parser, and accept the relevant data for
253/// each shell command type.
254pub trait Builder {
255 /// The type which represents a complete, top-level command.
256 type Command;
257 /// The type which represents an and/or list of commands.
258 type CommandList;
259 /// The type which represents a command that can be used in an and/or command list.
260 type ListableCommand;
261 /// The type which represents a command that can be used in a pipeline.
262 type PipeableCommand;
263 /// The type which represents compound commands like `if`, `case`, `for`, etc.
264 type CompoundCommand;
265 /// The type which represents shell words, which can be command names or arguments.
266 type Word;
267 /// The type which represents a file descriptor redirection.
268 type Redirect;
269 /// A type for returning custom parse/build errors.
270 type Error;
271
272 /// Invoked once a complete command is found. That is, a command delimited by a
273 /// newline, semicolon, ampersand, or the end of input.
274 ///
275 /// # Arguments
276 /// * pre_cmd_comments: any comments that appear before the start of the command
277 /// * list: an and/or list of commands previously generated by the same builder
278 /// * separator: indicates how the command was delimited
279 /// * cmd_comment: a comment that appears at the end of the command
280 fn complete_command(&mut self,
281 pre_cmd_comments: Vec<Newline>,
282 list: Self::CommandList,
283 separator: SeparatorKind,
284 cmd_comment: Option<Newline>)
285 -> Result<Self::Command, Self::Error>;
286
287 /// Invoked when multiple commands are parsed which are separated by `&&` or `||`.
288 /// Typically after the first command is run, each of the following commands may or
289 /// may not be executed, depending on the exit status of the previously executed command.
290 ///
291 /// # Arguments
292 /// * first: the first command before any `&&` or `||` separator
293 /// * rest: A collection of comments after the last separator and the next command.
294 fn and_or_list(&mut self,
295 first: Self::ListableCommand,
296 rest: Vec<(Vec<Newline>, AndOr<Self::ListableCommand>)>)
297 -> Result<Self::CommandList, Self::Error>;
298
299 /// Invoked when a pipeline of commands is parsed.
300 /// A pipeline is one or more commands where the standard output of the previous
301 /// typically becomes the standard input of the next.
302 ///
303 /// # Arguments
304 /// * bang: the presence of a `!` at the start of the pipeline, typically indicating
305 /// that the pipeline's exit status should be logically inverted.
306 /// * cmds: a collection of tuples which are any comments appearing after a pipe token, followed
307 /// by the command itself, all in the order they were parsed
308 fn pipeline(&mut self,
309 bang: bool,
310 cmds: Vec<(Vec<Newline>, Self::PipeableCommand)>)
311 -> Result<Self::ListableCommand, Self::Error>;
312
313 /// Invoked when the "simplest" possible command is parsed: an executable with arguments.
314 ///
315 /// # Arguments
316 /// * redirects_or_env_vars: redirections or environment variables that occur before any command
317 /// * redirects_or_cmd_words: redirections or any command or argument
318 fn simple_command(
319 &mut self,
320 redirects_or_env_vars: Vec<RedirectOrEnvVar<Self::Redirect, String, Self::Word>>,
321 redirects_or_cmd_words: Vec<RedirectOrCmdWord<Self::Redirect, Self::Word>>
322 ) -> Result<Self::PipeableCommand, Self::Error>;
323
324 /// Invoked when a non-zero number of commands were parsed between balanced curly braces.
325 /// Typically these commands should run within the current shell environment.
326 ///
327 /// # Arguments
328 /// * cmds: the commands that were parsed between braces
329 /// * redirects: any redirects to be applied over the **entire** group of commands
330 fn brace_group(&mut self,
331 cmds: CommandGroup<Self::Command>,
332 redirects: Vec<Self::Redirect>)
333 -> Result<Self::CompoundCommand, Self::Error>;
334
335 /// Invoked when a non-zero number of commands were parsed between balanced parentheses.
336 /// Typically these commands should run within their own environment without affecting
337 /// the shell's global environment.
338 ///
339 /// # Arguments
340 /// * cmds: the commands that were parsed between parens
341 /// * redirects: any redirects to be applied over the **entire** group of commands
342 fn subshell(&mut self,
343 cmds: CommandGroup<Self::Command>,
344 redirects: Vec<Self::Redirect>)
345 -> Result<Self::CompoundCommand, Self::Error>;
346
347 /// Invoked when a loop command like `while` or `until` is parsed.
348 /// Typically these commands will execute their body based on the exit status of their guard.
349 ///
350 /// # Arguments
351 /// * kind: the type of the loop: `while` or `until`
352 /// * guard: commands that determine how long the loop will run for
353 /// * body: commands to be run every iteration of the loop
354 /// * redirects: any redirects to be applied over **all** commands part of the loop
355 fn loop_command(&mut self,
356 kind: LoopKind,
357 guard_body_pair: GuardBodyPairGroup<Self::Command>,
358 redirects: Vec<Self::Redirect>)
359 -> Result<Self::CompoundCommand, Self::Error>;
360
361 /// Invoked when an `if` conditional command is parsed.
362 /// Typically an `if` command is made up of one or more guard-body pairs, where the body
363 /// of the first successful corresponding guard is executed. There can also be an optional
364 /// `else` part to be run if no guard is successful.
365 ///
366 /// # Arguments
367 /// * fragments: parsed fragments relating to a shell `if` command.
368 /// * redirects: any redirects to be applied over **all** commands within the `if` command
369 fn if_command(&mut self,
370 fragments: IfFragments<Self::Command>,
371 redirects: Vec<Self::Redirect>)
372 -> Result<Self::CompoundCommand, Self::Error>;
373
374 /// Invoked when a `for` command is parsed.
375 /// Typically a `for` command binds a variable to each member in a group of words and
376 /// invokes its body with that variable present in the environment. If no words are
377 /// specified, the command will iterate over the arguments to the script or enclosing function.
378 ///
379 /// # Arguments
380 /// * fragments: parsed fragments relating to a shell `for` command.
381 /// * redirects: any redirects to be applied over **all** commands within the `for` command
382 fn for_command(&mut self,
383 fragments: ForFragments<Self::Word, Self::Command>,
384 redirects: Vec<Self::Redirect>)
385 -> Result<Self::CompoundCommand, Self::Error>;
386
387 /// Invoked when a `case` command is parsed.
388 /// Typically this command will execute certain commands when a given word matches a pattern.
389 ///
390 /// # Arguments
391 /// * fragments: parsed fragments relating to a shell `case` command.
392 /// * redirects: any redirects to be applied over **all** commands part of the `case` block
393 fn case_command(&mut self,
394 fragments: CaseFragments<Self::Word, Self::Command>,
395 redirects: Vec<Self::Redirect>)
396 -> Result<Self::CompoundCommand, Self::Error>;
397
398 /// Bridges the gap between a `PipeableCommand` and a `CompoundCommand` since
399 /// `CompoundCommand`s are typically `PipeableCommand`s as well.
400 ///
401 /// # Arguments
402 /// cmd: The `CompoundCommand` to convert into a `PipeableCommand`
403 fn compound_command_into_pipeable(&mut self,
404 cmd: Self::CompoundCommand)
405 -> Result<Self::PipeableCommand, Self::Error>;
406
407 /// Invoked when a function declaration is parsed.
408 /// Typically a function declaration overwrites any previously defined function
409 /// within the current environment.
410 ///
411 /// # Arguments
412 /// * name: the name of the function to be created
413 /// * post_name_comments: any comments appearing after the function name but before the body
414 /// * body: commands to be run when the function is invoked
415 fn function_declaration(&mut self,
416 name: String,
417 post_name_comments: Vec<Newline>,
418 body: Self::CompoundCommand)
419 -> Result<Self::PipeableCommand, Self::Error>;
420
421 /// Invoked when only comments are parsed with no commands following.
422 /// This can occur if an entire shell script is commented out or if there
423 /// are comments present at the end of the script.
424 ///
425 /// # Arguments
426 /// * comments: the parsed comments
427 fn comments(&mut self,
428 comments: Vec<Newline>)
429 -> Result<(), Self::Error>;
430
431 /// Invoked when a word is parsed.
432 ///
433 /// # Arguments
434 /// * kind: the type of word that was parsed
435 fn word(&mut self,
436 kind: ComplexWordKind<Self::Command>)
437 -> Result<Self::Word, Self::Error>;
438
439 /// Invoked when a redirect is parsed.
440 ///
441 /// # Arguments
442 /// * kind: the type of redirect that was parsed
443 fn redirect(&mut self,
444 kind: RedirectKind<Self::Word>)
445 -> Result<Self::Redirect, Self::Error>;
446}
447
448macro_rules! impl_builder_body {
449 ($T:ident) => {
450 type Command = $T::Command;
451 type CommandList = $T::CommandList;
452 type ListableCommand = $T::ListableCommand;
453 type PipeableCommand = $T::PipeableCommand;
454 type CompoundCommand = $T::CompoundCommand;
455 type Word = $T::Word;
456 type Redirect = $T::Redirect;
457 type Error = $T::Error;
458
459 fn complete_command(&mut self,
460 pre_cmd_comments: Vec<Newline>,
461 list: Self::CommandList,
462 separator: SeparatorKind,
463 cmd_comment: Option<Newline>)
464 -> Result<Self::Command, Self::Error>
465 {
466 (**self).complete_command(pre_cmd_comments, list, separator, cmd_comment)
467 }
468
469 fn and_or_list(&mut self,
470 first: Self::ListableCommand,
471 rest: Vec<(Vec<Newline>, AndOr<Self::ListableCommand>)>)
472 -> Result<Self::CommandList, Self::Error>
473 {
474 (**self).and_or_list(first, rest)
475 }
476
477 fn pipeline(&mut self,
478 bang: bool,
479 cmds: Vec<(Vec<Newline>, Self::PipeableCommand)>)
480 -> Result<Self::ListableCommand, Self::Error>
481 {
482 (**self).pipeline(bang, cmds)
483 }
484
485 fn simple_command(
486 &mut self,
487 redirects_or_env_vars: Vec<RedirectOrEnvVar<Self::Redirect, String, Self::Word>>,
488 redirects_or_cmd_words: Vec<RedirectOrCmdWord<Self::Redirect, Self::Word>>
489 ) -> Result<Self::PipeableCommand, Self::Error>
490 {
491 (**self).simple_command(redirects_or_env_vars, redirects_or_cmd_words)
492 }
493
494 fn brace_group(&mut self,
495 cmds: CommandGroup<Self::Command>,
496 redirects: Vec<Self::Redirect>)
497 -> Result<Self::CompoundCommand, Self::Error>
498 {
499 (**self).brace_group(cmds, redirects)
500 }
501
502 fn subshell(&mut self,
503 cmds: CommandGroup<Self::Command>,
504 redirects: Vec<Self::Redirect>)
505 -> Result<Self::CompoundCommand, Self::Error>
506 {
507 (**self).subshell(cmds, redirects)
508 }
509
510 fn loop_command(&mut self,
511 kind: LoopKind,
512 guard_body_pair: GuardBodyPairGroup<Self::Command>,
513 redirects: Vec<Self::Redirect>)
514 -> Result<Self::CompoundCommand, Self::Error>
515 {
516 (**self).loop_command(kind, guard_body_pair, redirects)
517 }
518
519 fn if_command(&mut self,
520 fragments: IfFragments<Self::Command>,
521 redirects: Vec<Self::Redirect>)
522 -> Result<Self::CompoundCommand, Self::Error>
523 {
524 (**self).if_command(fragments, redirects)
525 }
526
527 fn for_command(&mut self,
528 fragments: ForFragments<Self::Word, Self::Command>,
529 redirects: Vec<Self::Redirect>)
530 -> Result<Self::CompoundCommand, Self::Error>
531 {
532 (**self).for_command(fragments, redirects)
533 }
534
535 fn case_command(&mut self,
536 fragments: CaseFragments<Self::Word, Self::Command>,
537 redirects: Vec<Self::Redirect>)
538 -> Result<Self::CompoundCommand, Self::Error>
539 {
540 (**self).case_command(fragments, redirects)
541 }
542
543 fn compound_command_into_pipeable(&mut self,
544 cmd: Self::CompoundCommand)
545 -> Result<Self::PipeableCommand, Self::Error>
546 {
547 (**self).compound_command_into_pipeable(cmd)
548 }
549
550 fn function_declaration(&mut self,
551 name: String,
552 post_name_comments: Vec<Newline>,
553 body: Self::CompoundCommand)
554 -> Result<Self::PipeableCommand, Self::Error>
555 {
556 (**self).function_declaration(name, post_name_comments, body)
557 }
558
559 fn comments(&mut self,
560 comments: Vec<Newline>)
561 -> Result<(), Self::Error>
562 {
563 (**self).comments(comments)
564 }
565
566 fn word(&mut self,
567 kind: ComplexWordKind<Self::Command>)
568 -> Result<Self::Word, Self::Error>
569 {
570 (**self).word(kind)
571 }
572
573 fn redirect(&mut self,
574 kind: RedirectKind<Self::Word>)
575 -> Result<Self::Redirect, Self::Error>
576 {
577 (**self).redirect(kind)
578 }
579 }
580}
581
582impl<'a, T: Builder + ?Sized> Builder for &'a mut T {
583 impl_builder_body!(T);
584}
585
586impl<T: Builder + ?Sized> Builder for Box<T> {
587 impl_builder_body!(T);
588}