brush_parser/
parser.rs

1use crate::ast::{self, SeparatorOperator};
2use crate::error;
3use crate::tokenizer::{Token, TokenEndReason, Tokenizer, TokenizerOptions, Tokens};
4
5/// Options used to control the behavior of the parser.
6#[derive(Clone, Eq, Hash, PartialEq)]
7pub struct ParserOptions {
8    /// Whether or not to enable extended globbing (a.k.a. `extglob`).
9    pub enable_extended_globbing: bool,
10    /// Whether or not to enable POSIX compliance mode.
11    pub posix_mode: bool,
12    /// Whether or not to enable maximal compatibility with the `sh` shell.
13    pub sh_mode: bool,
14    /// Whether or not to perform tilde expansion.
15    pub tilde_expansion: bool,
16}
17
18impl Default for ParserOptions {
19    fn default() -> Self {
20        Self {
21            enable_extended_globbing: true,
22            posix_mode: false,
23            sh_mode: false,
24            tilde_expansion: true,
25        }
26    }
27}
28
29impl ParserOptions {
30    /// Returns the tokenizer options implied by these parser options.
31    pub fn tokenizer_options(&self) -> TokenizerOptions {
32        TokenizerOptions {
33            enable_extended_globbing: self.enable_extended_globbing,
34            posix_mode: self.posix_mode,
35            sh_mode: self.sh_mode,
36        }
37    }
38}
39
40/// Implements parsing for shell programs.
41pub struct Parser<R> {
42    reader: R,
43    options: ParserOptions,
44    source_info: SourceInfo,
45}
46
47impl<R: std::io::BufRead> Parser<R> {
48    /// Returns a new parser instance.
49    ///
50    /// # Arguments
51    ///
52    /// * `reader` - The reader to use for input.
53    /// * `options` - The options to use when parsing.
54    /// * `source_info` - Information about the source of the tokens.
55    pub fn new(reader: R, options: &ParserOptions, source_info: &SourceInfo) -> Self {
56        Parser {
57            reader,
58            options: options.clone(),
59            source_info: source_info.clone(),
60        }
61    }
62
63    /// Parses the input into an abstract syntax tree (AST) of a shell program.
64    pub fn parse_program(&mut self) -> Result<ast::Program, error::ParseError> {
65        //
66        // References:
67        //   * https://www.gnu.org/software/bash/manual/bash.html#Shell-Syntax
68        //   * https://mywiki.wooledge.org/BashParser
69        //   * https://aosabook.org/en/v1/bash.html
70        //   * https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html
71        //
72
73        let tokens = self.tokenize()?;
74        parse_tokens(&tokens, &self.options, &self.source_info)
75    }
76
77    /// Parses a function definition body from the input. The body is expected to be
78    /// preceded by "()", but no function name.
79    pub fn parse_function_parens_and_body(
80        &mut self,
81    ) -> Result<ast::FunctionBody, error::ParseError> {
82        let tokens = self.tokenize()?;
83        let parse_result = token_parser::function_parens_and_body(
84            &Tokens { tokens: &tokens },
85            &self.options,
86            &self.source_info,
87        );
88        parse_result_to_error(parse_result, &tokens)
89    }
90
91    fn tokenize(&mut self) -> Result<Vec<Token>, error::ParseError> {
92        // First we tokenize the input, according to the policy implied by provided options.
93        let mut tokenizer = Tokenizer::new(&mut self.reader, &self.options.tokenizer_options());
94
95        tracing::debug!(target: "tokenize", "Tokenizing...");
96
97        let mut tokens = vec![];
98        loop {
99            let result = match tokenizer.next_token() {
100                Ok(result) => result,
101                Err(e) => {
102                    return Err(error::ParseError::Tokenizing {
103                        inner: e,
104                        position: tokenizer.current_location(),
105                    });
106                }
107            };
108
109            let reason = result.reason;
110            if let Some(token) = result.token {
111                tracing::debug!(target: "tokenize", "TOKEN {}: {:?} {reason:?}", tokens.len(), token);
112                tokens.push(token);
113            }
114
115            if matches!(reason, TokenEndReason::EndOfInput) {
116                break;
117            }
118        }
119
120        tracing::debug!(target: "tokenize", "  => {} token(s)", tokens.len());
121
122        Ok(tokens)
123    }
124}
125
126/// Parses a sequence of tokens into the abstract syntax tree (AST) of a shell program.
127///
128/// # Arguments
129///
130/// * `tokens` - The tokens to parse.
131/// * `options` - The options to use when parsing.
132/// * `source_info` - Information about the source of the tokens.
133pub fn parse_tokens(
134    tokens: &Vec<Token>,
135    options: &ParserOptions,
136    source_info: &SourceInfo,
137) -> Result<ast::Program, error::ParseError> {
138    let parse_result = token_parser::program(&Tokens { tokens }, options, source_info);
139    parse_result_to_error(parse_result, tokens)
140}
141
142fn parse_result_to_error<R>(
143    parse_result: Result<R, peg::error::ParseError<usize>>,
144    tokens: &Vec<Token>,
145) -> Result<R, error::ParseError>
146where
147    R: std::fmt::Debug,
148{
149    let result = match parse_result {
150        Ok(program) => {
151            tracing::debug!(target: "parse", "PROG: {:?}", program);
152            Ok(program)
153        }
154        Err(parse_error) => {
155            tracing::debug!(target: "parse", "Parse error: {:?}", parse_error);
156            Err(error::convert_peg_parse_error(
157                &parse_error,
158                tokens.as_slice(),
159            ))
160        }
161    };
162
163    result
164}
165
166impl peg::Parse for Tokens<'_> {
167    type PositionRepr = usize;
168
169    #[inline]
170    fn start(&self) -> usize {
171        0
172    }
173
174    #[inline]
175    fn is_eof(&self, p: usize) -> bool {
176        p >= self.tokens.len()
177    }
178
179    #[inline]
180    fn position_repr(&self, p: usize) -> Self::PositionRepr {
181        p
182    }
183}
184
185impl<'a> peg::ParseElem<'a> for Tokens<'a> {
186    type Element = &'a Token;
187
188    #[inline]
189    fn parse_elem(&'a self, pos: usize) -> peg::RuleResult<Self::Element> {
190        match self.tokens.get(pos) {
191            Some(c) => peg::RuleResult::Matched(pos + 1, c),
192            None => peg::RuleResult::Failed,
193        }
194    }
195}
196
197impl<'a> peg::ParseSlice<'a> for Tokens<'a> {
198    type Slice = String;
199
200    fn parse_slice(&'a self, start: usize, end: usize) -> Self::Slice {
201        let mut result = String::new();
202        let mut last_token_was_word = false;
203
204        for token in &self.tokens[start..end] {
205            match token {
206                Token::Operator(s, _) => {
207                    result.push_str(s);
208                    last_token_was_word = false;
209                }
210                Token::Word(s, _) => {
211                    // Place spaces between adjacent words.
212                    if last_token_was_word {
213                        result.push(' ');
214                    }
215
216                    result.push_str(s);
217                    last_token_was_word = true;
218                }
219            }
220        }
221
222        result
223    }
224}
225
226/// Information about the source of tokens.
227#[derive(Clone, Default)]
228pub struct SourceInfo {
229    /// The source of the tokens.
230    pub source: String,
231}
232
233peg::parser! {
234    grammar token_parser<'a>(parser_options: &ParserOptions, source_info: &SourceInfo) for Tokens<'a> {
235        pub(crate) rule program() -> ast::Program =
236            linebreak() c:complete_commands() linebreak() { ast::Program { complete_commands: c } } /
237            linebreak() { ast::Program { complete_commands: vec![] } }
238
239        rule complete_commands() -> Vec<ast::CompleteCommand> =
240            c:complete_command() ++ newline_list()
241
242        rule complete_command() -> ast::CompleteCommand =
243            first:and_or() remainder:(s:separator_op() l:and_or() { (s, l) })* last_sep:separator_op()? {
244                let mut and_ors = vec![first];
245                let mut seps = vec![];
246
247                for (sep, ao) in remainder {
248                    seps.push(sep);
249                    and_ors.push(ao);
250                }
251
252                // N.B. We default to synchronous if no separator op is given.
253                seps.push(last_sep.unwrap_or(SeparatorOperator::Sequence));
254
255                let mut items = vec![];
256                for (i, ao) in and_ors.into_iter().enumerate() {
257                    items.push(ast::CompoundListItem(ao, seps[i].clone()));
258                }
259
260                ast::CompoundList(items)
261            }
262
263        rule and_or() -> ast::AndOrList =
264            first:pipeline() additional:_and_or_item()* { ast::AndOrList { first, additional } }
265
266        rule _and_or_item() -> ast::AndOr =
267            op:_and_or_op() linebreak() p:pipeline() { op(p) }
268
269        rule _and_or_op() -> fn(ast::Pipeline) -> ast::AndOr =
270            specific_operator("&&") { ast::AndOr::And } /
271            specific_operator("||") { ast::AndOr::Or }
272
273        rule pipeline() -> ast::Pipeline =
274            timed:pipeline_timed()? bang:bang()? seq:pipe_sequence() { ast::Pipeline { timed, bang: bang.is_some(), seq } }
275
276        rule pipeline_timed() -> ast::PipelineTimed =
277            non_posix_extensions_enabled() specific_word("time") posix_output:specific_word("-p")? {
278                if posix_output.is_some() {
279                    ast::PipelineTimed::TimedWithPosixOutput
280                } else {
281                    ast::PipelineTimed::Timed
282                }
283            }
284
285        rule bang() -> bool = specific_word("!") { true }
286
287        pub(crate) rule pipe_sequence() -> Vec<ast::Command> =
288            c:(c:command() r:&pipe_extension_redirection()? {? // check for `|&` without consuming the stream.
289                let mut c = c;
290                if r.is_some() {
291                    add_pipe_extension_redirection(&mut c)?;
292                }
293                Ok(c)
294            }) ++ (pipe_operator() linebreak()) {
295            c
296        }
297        rule pipe_operator() =
298            specific_operator("|") /
299            pipe_extension_redirection()
300
301        rule pipe_extension_redirection() -> &'input Token  =
302            non_posix_extensions_enabled() p:specific_operator("|&") { p }
303
304        // N.B. We needed to move the function definition branch up to avoid conflicts with array assignment syntax.
305        rule command() -> ast::Command =
306            f:function_definition() { ast::Command::Function(f) } /
307            c:simple_command() { ast::Command::Simple(c) } /
308            c:compound_command() r:redirect_list()? { ast::Command::Compound(c, r) } /
309            // N.B. Extended test commands are bash extensions.
310            non_posix_extensions_enabled() c:extended_test_command() { ast::Command::ExtendedTest(c) } /
311            expected!("command")
312
313        // N.B. The arithmetic command is a non-sh extension.
314        // N.B. The arithmetic for clause command is a non-sh extension.
315        pub(crate) rule compound_command() -> ast::CompoundCommand =
316            non_posix_extensions_enabled() a:arithmetic_command() { ast::CompoundCommand::Arithmetic(a) } /
317            b:brace_group() { ast::CompoundCommand::BraceGroup(b) } /
318            s:subshell() { ast::CompoundCommand::Subshell(s) } /
319            f:for_clause() { ast::CompoundCommand::ForClause(f) } /
320            c:case_clause() { ast::CompoundCommand::CaseClause(c) } /
321            i:if_clause() { ast::CompoundCommand::IfClause(i) } /
322            w:while_clause() { ast::CompoundCommand::WhileClause(w) } /
323            u:until_clause() { ast::CompoundCommand::UntilClause(u) } /
324            non_posix_extensions_enabled() c:arithmetic_for_clause() { ast::CompoundCommand::ArithmeticForClause(c) } /
325            expected!("compound command")
326
327        pub(crate) rule arithmetic_command() -> ast::ArithmeticCommand =
328            specific_operator("(") specific_operator("(") expr:arithmetic_expression() specific_operator(")") specific_operator(")") {
329                ast::ArithmeticCommand { expr }
330            }
331
332        pub(crate) rule arithmetic_expression() -> ast::UnexpandedArithmeticExpr =
333            raw_expr:$(arithmetic_expression_piece()*) { ast::UnexpandedArithmeticExpr { value: raw_expr } }
334
335        rule arithmetic_expression_piece() =
336            specific_operator("(") (!specific_operator(")") arithmetic_expression_piece())* specific_operator(")") {} /
337            !arithmetic_end() [_] {}
338
339        // TODO: evaluate arithmetic end; the semicolon is used in arithmetic for loops.
340        rule arithmetic_end() -> () =
341            specific_operator(")") specific_operator(")") {} /
342            specific_operator(";") {}
343
344        rule subshell() -> ast::SubshellCommand =
345            specific_operator("(") c:compound_list() specific_operator(")") { ast::SubshellCommand(c) }
346
347        rule compound_list() -> ast::CompoundList =
348            linebreak() first:and_or() remainder:(s:separator() l:and_or() { (s, l) })* last_sep:separator()? {
349                let mut and_ors = vec![first];
350                let mut seps = vec![];
351
352                for (sep, ao) in remainder {
353                    seps.push(sep.unwrap_or(SeparatorOperator::Sequence));
354                    and_ors.push(ao);
355                }
356
357                // N.B. We default to synchronous if no separator op is given.
358                let last_sep = last_sep.unwrap_or(None);
359                seps.push(last_sep.unwrap_or(SeparatorOperator::Sequence));
360
361                let mut items = vec![];
362                for (i, ao) in and_ors.into_iter().enumerate() {
363                    items.push(ast::CompoundListItem(ao, seps[i].clone()));
364                }
365
366                ast::CompoundList(items)
367            }
368
369        rule for_clause() -> ast::ForClauseCommand =
370            specific_word("for") n:name() linebreak() _in() w:wordlist()? sequential_sep() d:do_group() {
371                ast::ForClauseCommand { variable_name: n.to_owned(), values: w, body: d }
372            } /
373            specific_word("for") n:name() sequential_sep()? d:do_group() {
374                ast::ForClauseCommand { variable_name: n.to_owned(), values: None, body: d }
375            }
376
377        // N.B. The arithmetic for loop is a non-sh extension.
378        rule arithmetic_for_clause() -> ast::ArithmeticForClauseCommand =
379            specific_word("for")
380            specific_operator("(") specific_operator("(")
381                initializer:arithmetic_expression()? specific_operator(";")
382                condition:arithmetic_expression()? specific_operator(";")
383                updater:arithmetic_expression()?
384            specific_operator(")") specific_operator(")")
385            sequential_sep()
386            body:do_group() {
387                ast::ArithmeticForClauseCommand { initializer, condition, updater, body }
388            }
389
390        rule extended_test_command() -> ast::ExtendedTestExpr =
391            specific_word("[[") linebreak() e:extended_test_expression() linebreak() specific_word("]]") { e }
392
393        rule extended_test_expression() -> ast::ExtendedTestExpr = precedence! {
394            left:(@) linebreak() specific_operator("||") linebreak() right:@ { ast::ExtendedTestExpr::Or(Box::from(left), Box::from(right)) }
395            --
396            left:(@) linebreak() specific_operator("&&") linebreak() right:@ { ast::ExtendedTestExpr::And(Box::from(left), Box::from(right)) }
397            --
398            specific_word("!") e:@ { ast::ExtendedTestExpr::Not(Box::from(e)) }
399            --
400            specific_operator("(") e:extended_test_expression() specific_operator(")") { ast::ExtendedTestExpr::Parenthesized(Box::from(e)) }
401            --
402            // Arithmetic operators
403            left:word() specific_word("-eq") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::ArithmeticEqualTo, ast::Word::from(left), ast::Word::from(right)) }
404            left:word() specific_word("-ne") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::ArithmeticNotEqualTo, ast::Word::from(left), ast::Word::from(right)) }
405            left:word() specific_word("-lt") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::ArithmeticLessThan, ast::Word::from(left), ast::Word::from(right)) }
406            left:word() specific_word("-le") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::ArithmeticLessThanOrEqualTo, ast::Word::from(left), ast::Word::from(right)) }
407            left:word() specific_word("-gt") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::ArithmeticGreaterThan, ast::Word::from(left), ast::Word::from(right)) }
408            left:word() specific_word("-ge") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::ArithmeticGreaterThanOrEqualTo, ast::Word::from(left), ast::Word::from(right)) }
409            // Non-arithmetic binary operators
410            left:word() specific_word("-ef") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::FilesReferToSameDeviceAndInodeNumbers, ast::Word::from(left), ast::Word::from(right)) }
411            left:word() specific_word("-nt") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::LeftFileIsNewerOrExistsWhenRightDoesNot, ast::Word::from(left), ast::Word::from(right)) }
412            left:word() specific_word("-ot") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::LeftFileIsOlderOrDoesNotExistWhenRightDoes, ast::Word::from(left), ast::Word::from(right)) }
413            left:word() (specific_word("==") / specific_word("=")) right:word()  { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::StringExactlyMatchesPattern, ast::Word::from(left), ast::Word::from(right)) }
414            left:word() specific_word("!=") right:word()  { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::StringDoesNotExactlyMatchPattern, ast::Word::from(left), ast::Word::from(right)) }
415            left:word() specific_word("=~") right:regex_word()  {
416                if right.value.starts_with(['\'', '\"']) {
417                    // TODO: Confirm it ends with that too?
418                    ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::StringContainsSubstring, ast::Word::from(left), right)
419                } else {
420                    ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::StringMatchesRegex, ast::Word::from(left), right)
421                }
422            }
423            left:word() specific_operator("<") right:word()   { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::LeftSortsBeforeRight, ast::Word::from(left), ast::Word::from(right)) }
424            left:word() specific_operator(">") right:word()   { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::LeftSortsAfterRight, ast::Word::from(left), ast::Word::from(right)) }
425            --
426            p:extended_unary_predicate() f:word() { ast::ExtendedTestExpr::UnaryTest(p, ast::Word::from(f)) }
427            --
428            w:word() { ast::ExtendedTestExpr::UnaryTest(ast::UnaryPredicate::StringHasNonZeroLength, ast::Word::from(w)) }
429        }
430
431        rule extended_unary_predicate() -> ast::UnaryPredicate =
432            specific_word("-a") { ast::UnaryPredicate::FileExists } /
433            specific_word("-b") { ast::UnaryPredicate::FileExistsAndIsBlockSpecialFile } /
434            specific_word("-c") { ast::UnaryPredicate::FileExistsAndIsCharSpecialFile } /
435            specific_word("-d") { ast::UnaryPredicate::FileExistsAndIsDir } /
436            specific_word("-e") { ast::UnaryPredicate::FileExists } /
437            specific_word("-f") { ast::UnaryPredicate::FileExistsAndIsRegularFile } /
438            specific_word("-g") { ast::UnaryPredicate::FileExistsAndIsSetgid } /
439            specific_word("-h") { ast::UnaryPredicate::FileExistsAndIsSymlink } /
440            specific_word("-k") { ast::UnaryPredicate::FileExistsAndHasStickyBit } /
441            specific_word("-n") { ast::UnaryPredicate::StringHasNonZeroLength } /
442            specific_word("-o") { ast::UnaryPredicate::ShellOptionEnabled } /
443            specific_word("-p") { ast::UnaryPredicate::FileExistsAndIsFifo } /
444            specific_word("-r") { ast::UnaryPredicate::FileExistsAndIsReadable } /
445            specific_word("-s") { ast::UnaryPredicate::FileExistsAndIsNotZeroLength } /
446            specific_word("-t") { ast::UnaryPredicate::FdIsOpenTerminal } /
447            specific_word("-u") { ast::UnaryPredicate::FileExistsAndIsSetuid } /
448            specific_word("-v") { ast::UnaryPredicate::ShellVariableIsSetAndAssigned } /
449            specific_word("-w") { ast::UnaryPredicate::FileExistsAndIsWritable } /
450            specific_word("-x") { ast::UnaryPredicate::FileExistsAndIsExecutable } /
451            specific_word("-z") { ast::UnaryPredicate::StringHasZeroLength } /
452            specific_word("-G") { ast::UnaryPredicate::FileExistsAndOwnedByEffectiveGroupId } /
453            specific_word("-L") { ast::UnaryPredicate::FileExistsAndIsSymlink } /
454            specific_word("-N") { ast::UnaryPredicate::FileExistsAndModifiedSinceLastRead } /
455            specific_word("-O") { ast::UnaryPredicate::FileExistsAndOwnedByEffectiveUserId } /
456            specific_word("-R") { ast::UnaryPredicate::ShellVariableIsSetAndNameRef } /
457            specific_word("-S") { ast::UnaryPredicate::FileExistsAndIsSocket }
458
459        // N.B. For some reason we seem to need to allow a select subset
460        // of unescaped operators in regex words.
461        rule regex_word() -> ast::Word =
462            value:$((!specific_word("]]") regex_word_piece())+) {
463                ast::Word { value }
464            }
465
466        rule regex_word_piece() =
467            word() {} /
468            specific_operator("|") {} /
469            specific_operator("(") parenthesized_regex_word()* specific_operator(")") {}
470
471        rule parenthesized_regex_word() =
472            regex_word_piece() /
473            !specific_operator(")") !specific_operator("]]") [_]
474
475        rule name() -> &'input str =
476            w:[Token::Word(_, _)] { w.to_str() }
477
478        rule _in() -> () =
479            specific_word("in") { }
480
481        // TODO: validate if this should call non_reserved_word() or word()
482        rule wordlist() -> Vec<ast::Word> =
483            (w:non_reserved_word() { ast::Word::from(w) })+
484
485        // TODO: validate if this should call non_reserved_word() or word()
486        pub(crate) rule case_clause() -> ast::CaseClauseCommand =
487            specific_word("case") w:non_reserved_word() linebreak() _in() linebreak() first_items:case_item()* last_item:case_item_ns()? specific_word("esac") {
488                let mut cases = first_items;
489
490                if let Some(last_item) = last_item {
491                    cases.push(last_item);
492                }
493
494                ast::CaseClauseCommand { value: ast::Word::from(w), cases }
495            }
496
497        pub(crate) rule case_item_ns() -> ast::CaseItem =
498            specific_operator("(")? p:pattern() specific_operator(")") c:compound_list() {
499                ast::CaseItem { patterns: p, cmd: Some(c), post_action: ast::CaseItemPostAction::ExitCase }
500            } /
501            specific_operator("(")? p:pattern() specific_operator(")") linebreak() {
502                ast::CaseItem { patterns: p, cmd: None, post_action: ast::CaseItemPostAction::ExitCase }
503            }
504
505        pub(crate) rule case_item() -> ast::CaseItem =
506            specific_operator("(")? p:pattern() specific_operator(")") linebreak() post_action:case_item_post_action() linebreak() {
507                ast::CaseItem { patterns: p, cmd: None, post_action }
508            } /
509            specific_operator("(")? p:pattern() specific_operator(")") c:compound_list() post_action:case_item_post_action() linebreak() {
510                ast::CaseItem { patterns: p, cmd: Some(c), post_action }
511            }
512
513        rule case_item_post_action() -> ast::CaseItemPostAction =
514            specific_operator(";;") {
515                ast::CaseItemPostAction::ExitCase
516            } /
517            non_posix_extensions_enabled() specific_operator(";;&") {
518                ast::CaseItemPostAction::ContinueEvaluatingCases
519            } /
520            non_posix_extensions_enabled() specific_operator(";&") {
521                ast::CaseItemPostAction::UnconditionallyExecuteNextCaseItem
522            }
523
524        // TODO: validate if this should call non_reserved_word() or word()
525        rule pattern() -> Vec<ast::Word> =
526            (w:word() { ast::Word::from(w) }) ++ specific_operator("|")
527
528        rule if_clause() -> ast::IfClauseCommand =
529            specific_word("if") condition:compound_list() specific_word("then") then:compound_list() elses:else_part()? specific_word("fi") {
530                ast::IfClauseCommand {
531                    condition,
532                    then,
533                    elses,
534                }
535            }
536
537        rule else_part() -> Vec<ast::ElseClause> =
538            cs:_conditional_else_part()+ u:_unconditional_else_part()? {
539                let mut parts = vec![];
540                for c in cs {
541                    parts.push(c);
542                }
543
544                if let Some(uncond) = u {
545                    parts.push(uncond);
546                }
547
548                parts
549            } /
550            e:_unconditional_else_part() { vec![e] }
551
552        rule _conditional_else_part() -> ast::ElseClause =
553            specific_word("elif") condition:compound_list() specific_word("then") body:compound_list() {
554                ast::ElseClause { condition: Some(condition), body }
555            }
556
557        rule _unconditional_else_part() -> ast::ElseClause =
558            specific_word("else") body:compound_list() {
559                ast::ElseClause { condition: None, body }
560             }
561
562        rule while_clause() -> ast::WhileOrUntilClauseCommand =
563            specific_word("while") c:compound_list() d:do_group() { ast::WhileOrUntilClauseCommand(c, d) }
564
565        rule until_clause() -> ast::WhileOrUntilClauseCommand =
566            specific_word("until") c:compound_list() d:do_group() { ast::WhileOrUntilClauseCommand(c, d) }
567
568        // N.B. Non-sh extensions allows use of the 'function' word to indicate a function definition.
569        rule function_definition() -> ast::FunctionDefinition =
570            specific_word("function")? fname:fname() body:function_parens_and_body() {
571                ast::FunctionDefinition { fname: fname.to_owned(), body, source: source_info.source.clone() }
572            } /
573            specific_word("function") fname:fname() linebreak() body:function_body() {
574                ast::FunctionDefinition { fname: fname.to_owned(), body, source: source_info.source.clone() }
575            } /
576            expected!("function definition")
577
578        pub(crate) rule function_parens_and_body() -> ast::FunctionBody =
579            specific_operator("(") specific_operator(")") linebreak() body:function_body() { body }
580
581        rule function_body() -> ast::FunctionBody =
582            c:compound_command() r:redirect_list()? { ast::FunctionBody(c, r) }
583
584        rule fname() -> &'input str =
585            // Special-case: don't allow it to end with an equals sign, to avoid the challenge of
586            // misinterpreting certain declaration assignments as function definitions.
587            // TODO: Find a way to make this still work without requiring this targeted exception.
588            w:[Token::Word(word, _) if !word.ends_with('=')] { w.to_str() }
589
590        rule brace_group() -> ast::BraceGroupCommand =
591            specific_word("{") c:compound_list() specific_word("}") { ast::BraceGroupCommand(c) }
592
593        rule do_group() -> ast::DoGroupCommand =
594            specific_word("do") c:compound_list() specific_word("done") { ast::DoGroupCommand(c) }
595
596        rule simple_command() -> ast::SimpleCommand =
597            prefix:cmd_prefix() word_and_suffix:(word_or_name:cmd_word() suffix:cmd_suffix()? { (word_or_name, suffix) })? {
598                match word_and_suffix {
599                    Some((word_or_name, suffix)) => {
600                        ast::SimpleCommand { prefix: Some(prefix), word_or_name: Some(ast::Word::from(word_or_name)), suffix }
601                    }
602                    None => {
603                        ast::SimpleCommand { prefix: Some(prefix), word_or_name: None, suffix: None }
604                    }
605                }
606            } /
607            word_or_name:cmd_name() suffix:cmd_suffix()? {
608                ast::SimpleCommand { prefix: None, word_or_name: Some(ast::Word::from(word_or_name)), suffix } } /
609            expected!("simple command")
610
611        rule cmd_name() -> &'input Token =
612            non_reserved_word()
613
614        rule cmd_word() -> &'input Token =
615            !assignment_word() w:non_reserved_word() { w }
616
617        rule cmd_prefix() -> ast::CommandPrefix =
618            p:(
619                i:io_redirect() { ast::CommandPrefixOrSuffixItem::IoRedirect(i) } /
620                assignment_and_word:assignment_word() {
621                    let (assignment, word) = assignment_and_word;
622                    ast::CommandPrefixOrSuffixItem::AssignmentWord(assignment, word)
623                }
624            )+ { ast::CommandPrefix(p) }
625
626        rule cmd_suffix() -> ast::CommandSuffix =
627            s:(
628                non_posix_extensions_enabled() sub:process_substitution() {
629                    let (kind, subshell) = sub;
630                    ast::CommandPrefixOrSuffixItem::ProcessSubstitution(kind, subshell)
631                } /
632                i:io_redirect() {
633                    ast::CommandPrefixOrSuffixItem::IoRedirect(i)
634                } /
635                assignment_and_word:assignment_word() {
636                    let (assignment, word) = assignment_and_word;
637                    ast::CommandPrefixOrSuffixItem::AssignmentWord(assignment, word)
638                } /
639                w:word() {
640                    ast::CommandPrefixOrSuffixItem::Word(ast::Word::from(w))
641                }
642            )+ { ast::CommandSuffix(s) }
643
644        rule redirect_list() -> ast::RedirectList =
645            r:io_redirect()+ { ast::RedirectList(r) } /
646            expected!("redirect list")
647
648        // N.B. here strings are extensions to the POSIX standard.
649        rule io_redirect() -> ast::IoRedirect =
650            n:io_number()? f:io_file() {
651                    let (kind, target) = f;
652                    ast::IoRedirect::File(n, kind, target)
653                } /
654            non_posix_extensions_enabled() specific_operator("&>>") target:filename() { ast::IoRedirect::OutputAndError(ast::Word::from(target), true) } /
655            non_posix_extensions_enabled() specific_operator("&>") target:filename() { ast::IoRedirect::OutputAndError(ast::Word::from(target), false) } /
656            non_posix_extensions_enabled() n:io_number()? specific_operator("<<<") w:word() { ast::IoRedirect::HereString(n, ast::Word::from(w)) } /
657            n:io_number()? h:io_here() { ast::IoRedirect::HereDocument(n, h) } /
658            expected!("I/O redirect")
659
660        // N.B. Process substitution forms are extensions to the POSIX standard.
661        rule io_file() -> (ast::IoFileRedirectKind, ast::IoFileRedirectTarget) =
662            specific_operator("<")  f:io_filename() { (ast::IoFileRedirectKind::Read, f) } /
663            specific_operator("<&") f:io_filename_or_fd() { (ast::IoFileRedirectKind::DuplicateInput, f) } /
664            specific_operator(">")  f:io_filename() { (ast::IoFileRedirectKind::Write, f) } /
665            specific_operator(">&") f:io_filename_or_fd() { (ast::IoFileRedirectKind::DuplicateOutput, f) } /
666            specific_operator(">>") f:io_filename() { (ast::IoFileRedirectKind::Append, f) } /
667            specific_operator("<>") f:io_filename() { (ast::IoFileRedirectKind::ReadAndWrite, f) } /
668            specific_operator(">|") f:io_filename() { (ast::IoFileRedirectKind::Clobber, f) }
669
670        rule io_filename_or_fd() -> ast::IoFileRedirectTarget =
671            fd:io_fd() { ast::IoFileRedirectTarget::Fd(fd) } /
672            io_filename()
673
674        rule io_fd() -> u32 =
675            w:[Token::Word(_, _)] {? w.to_str().parse().or(Err("io_fd u32")) }
676
677        rule io_filename() -> ast::IoFileRedirectTarget =
678            non_posix_extensions_enabled() sub:process_substitution() {
679                let (kind, subshell) = sub;
680                ast::IoFileRedirectTarget::ProcessSubstitution(kind, subshell)
681            } /
682            f:filename() { ast::IoFileRedirectTarget::Filename(ast::Word::from(f)) }
683
684        rule filename() -> &'input Token =
685            word()
686
687        pub(crate) rule io_here() -> ast::IoHereDocument =
688           specific_operator("<<-") here_tag:here_tag() doc:[_] closing_tag:here_tag() {
689                let requires_expansion = !here_tag.to_str().contains(['\'', '"', '\\']);
690                ast::IoHereDocument {
691                    remove_tabs: true,
692                    requires_expansion,
693                    here_end: ast::Word::from(here_tag),
694                    doc: ast::Word::from(doc)
695                }
696            } /
697            specific_operator("<<") here_tag:here_tag() doc:[_] closing_tag:here_tag() {
698                let requires_expansion = !here_tag.to_str().contains(['\'', '"', '\\']);
699                ast::IoHereDocument {
700                    remove_tabs: false,
701                    requires_expansion,
702                    here_end: ast::Word::from(here_tag),
703                    doc: ast::Word::from(doc)
704                }
705            }
706
707        rule here_tag() -> &'input Token =
708            word()
709
710        rule process_substitution() -> (ast::ProcessSubstitutionKind, ast::SubshellCommand) =
711            specific_operator("<") s:subshell() { (ast::ProcessSubstitutionKind::Read, s) } /
712            specific_operator(">") s:subshell() { (ast::ProcessSubstitutionKind::Write, s) }
713
714        rule newline_list() -> () =
715            newline()+ {}
716
717        rule linebreak() -> () =
718            quiet! {
719                newline()* {}
720            }
721
722        rule separator_op() -> ast::SeparatorOperator =
723            specific_operator("&") { ast::SeparatorOperator::Async } /
724            specific_operator(";") { ast::SeparatorOperator::Sequence }
725
726        rule separator() -> Option<ast::SeparatorOperator> =
727            s:separator_op() linebreak() { Some(s) } /
728            newline_list() { None }
729
730        rule sequential_sep() -> () =
731            specific_operator(";") linebreak() /
732            newline_list()
733
734        //
735        // Token interpretation
736        //
737
738        rule non_reserved_word() -> &'input Token =
739            !reserved_word() w:word() { w }
740
741        rule word() -> &'input Token =
742            [Token::Word(_, _)]
743
744        rule reserved_word() -> &'input Token =
745            [Token::Word(w, _) if matches!(w.as_str(),
746                "!" |
747                "{" |
748                "}" |
749                "case" |
750                "do" |
751                "done" |
752                "elif" |
753                "else" |
754                "esac" |
755                "fi" |
756                "for" |
757                "if" |
758                "in" |
759                "then" |
760                "until" |
761                "while"
762            )] /
763
764            // N.B. bash also treats the following as reserved.
765            non_posix_extensions_enabled() token:non_posix_reserved_word_token() { token }
766
767        rule non_posix_reserved_word_token() -> &'input Token =
768            specific_word("[[") /
769            specific_word("]]") /
770            specific_word("function") /
771            specific_word("select")
772
773        rule newline() -> () = quiet! {
774            specific_operator("\n") {}
775        }
776
777        pub(crate) rule assignment_word() -> (ast::Assignment, ast::Word) =
778            non_posix_extensions_enabled() [Token::Word(w, _)] specific_operator("(") elements:array_elements() specific_operator(")") {?
779                let parsed = parse_array_assignment(w.as_str(), elements.as_slice())?;
780
781                let mut all_as_word = w.to_owned();
782                all_as_word.push('(');
783                for (i, e) in elements.iter().enumerate() {
784                    if i > 0 {
785                        all_as_word.push(' ');
786                    }
787                    all_as_word.push_str(e);
788                }
789                all_as_word.push(')');
790
791                Ok((parsed, ast::Word { value: all_as_word }))
792            } /
793            [Token::Word(w, _)] {?
794                let parsed = parse_assignment_word(w.as_str())?;
795                Ok((parsed, ast::Word { value: w.to_owned() }))
796            }
797
798        rule array_elements() -> Vec<&'input String> =
799             linebreak() e:array_element()* { e }
800
801        rule array_element() -> &'input String =
802            linebreak() [Token::Word(e, _)] linebreak() { e }
803
804        // N.B. An I/O number must be a string of only digits, and it must be
805        // followed by a '<' or '>' character (but not consume them). We also
806        // need to make sure that there was no space between the number and the
807        // redirection operator; unfortunately we don't have the space anymore
808        // but we can infer it by looking at the tokens' locations.
809        rule io_number() -> u32 =
810            [Token::Word(w, num_loc) if w.chars().all(|c: char| c.is_ascii_digit())]
811            &([Token::Operator(o, redir_loc) if
812                    o.starts_with(['<', '>']) &&
813                    locations_are_contiguous(num_loc, redir_loc)]) {
814
815                w.parse().unwrap()
816            }
817
818        //
819        // Helpers
820        //
821        rule specific_operator(expected: &str) -> &'input Token =
822            [Token::Operator(w, _) if w.as_str() == expected]
823
824        rule specific_word(expected: &str) -> &'input Token =
825            [Token::Word(w, _) if w.as_str() == expected]
826
827        rule non_posix_extensions_enabled() -> () =
828            &[_] {? if !parser_options.sh_mode { Ok(()) } else { Err("posix") } }
829    }
830}
831
832peg::parser! {
833    grammar assignments() for str {
834        pub(crate) rule name_and_scalar_value() -> ast::Assignment =
835            nae:name_and_equals() value:scalar_value() {
836                let (name, append) = nae;
837                ast::Assignment { name, value, append }
838            }
839
840        pub(crate) rule name_and_equals() -> (ast::AssignmentName, bool) =
841            name:name() append:("+"?) "=" {
842                (name, append.is_some())
843            }
844
845        pub(crate) rule literal_array_element() -> (Option<String>, String) =
846            "[" inner:$((!"]" [_])*) "]=" value:$([_]*) {
847                (Some(inner.to_owned()), value.to_owned())
848            } /
849            value:$([_]+) {
850                (None, value.to_owned())
851            }
852
853        rule name() -> ast::AssignmentName =
854            aen:array_element_name() {
855                let (name, index) = aen;
856                ast::AssignmentName::ArrayElementName(name.to_owned(), index.to_owned())
857            } /
858            name:scalar_name() {
859                ast::AssignmentName::VariableName(name.to_owned())
860            }
861
862        rule array_element_name() -> (&'input str, &'input str) =
863            name:scalar_name() "[" ai:array_index() "]" { (name, ai) }
864
865        rule array_index() -> &'input str =
866            $((![']'] [_])*)
867
868        rule scalar_name() -> &'input str =
869            $(alpha_or_underscore() non_first_variable_char()*)
870
871        rule non_first_variable_char() -> () =
872            ['_' | '0'..='9' | 'a'..='z' | 'A'..='Z'] {}
873
874        rule alpha_or_underscore() -> () =
875            ['_' | 'a'..='z' | 'A'..='Z'] {}
876
877        rule scalar_value() -> ast::AssignmentValue =
878            v:$([_]*) { ast::AssignmentValue::Scalar(ast::Word { value: v.to_owned() }) }
879    }
880}
881
882fn parse_assignment_word(word: &str) -> Result<ast::Assignment, &'static str> {
883    let parse_result = assignments::name_and_scalar_value(word);
884    parse_result.map_err(|_| "not assignment word")
885}
886
887// add `2>&1` to the command if the pipeline is `|&`
888fn add_pipe_extension_redirection(c: &mut ast::Command) -> Result<(), &'static str> {
889    fn add_to_redirect_list(l: &mut Option<ast::RedirectList>, r: ast::IoRedirect) {
890        if let Some(l) = l {
891            l.0.push(r);
892        } else {
893            let v = vec![r];
894            *l = Some(ast::RedirectList(v));
895        }
896    }
897
898    let r = ast::IoRedirect::File(
899        Some(2),
900        ast::IoFileRedirectKind::DuplicateOutput,
901        ast::IoFileRedirectTarget::Fd(1),
902    );
903
904    match c {
905        ast::Command::Simple(c) => {
906            let r = ast::CommandPrefixOrSuffixItem::IoRedirect(r);
907            if let Some(l) = &mut c.suffix {
908                l.0.push(r);
909            } else {
910                c.suffix = Some(ast::CommandSuffix(vec![r]));
911            }
912        }
913        ast::Command::Compound(_, l) => add_to_redirect_list(l, r),
914        ast::Command::Function(f) => add_to_redirect_list(&mut f.body.1, r),
915        ast::Command::ExtendedTest(_) => return Err("|& unimplemented for extended tests"),
916    }
917
918    Ok(())
919}
920
921fn locations_are_contiguous(
922    loc_left: &crate::TokenLocation,
923    loc_right: &crate::TokenLocation,
924) -> bool {
925    loc_left.end.index == loc_right.start.index
926}
927
928fn parse_array_assignment(
929    word: &str,
930    elements: &[&String],
931) -> Result<ast::Assignment, &'static str> {
932    let (assignment_name, append) =
933        assignments::name_and_equals(word).map_err(|_| "not array assignment word")?;
934
935    let elements = elements
936        .iter()
937        .map(|element| assignments::literal_array_element(element))
938        .collect::<Result<Vec<_>, _>>()
939        .map_err(|_| "invalid array element in literal")?;
940
941    let elements_as_words = elements
942        .into_iter()
943        .map(|(key, value)| {
944            (
945                key.map(|k| ast::Word::new(k.as_str())),
946                ast::Word::new(value.as_str()),
947            )
948        })
949        .collect();
950
951    Ok(ast::Assignment {
952        name: assignment_name,
953        value: ast::AssignmentValue::Array(elements_as_words),
954        append,
955    })
956}
957
958#[cfg(test)]
959#[allow(clippy::panic_in_result_fn)]
960mod tests {
961
962    use super::*;
963    use crate::tokenizer::tokenize_str;
964    use anyhow::Result;
965    use assert_matches::assert_matches;
966
967    #[test]
968    fn parse_case() -> Result<()> {
969        let input = r"\
970case x in
971x)
972    echo y;;
973esac\
974";
975
976        let tokens = tokenize_str(input)?;
977        let command = super::token_parser::case_clause(
978            &Tokens {
979                tokens: tokens.as_slice(),
980            },
981            &ParserOptions::default(),
982            &SourceInfo::default(),
983        )?;
984
985        assert_eq!(command.cases.len(), 1);
986        assert_eq!(command.cases[0].patterns.len(), 1);
987        assert_eq!(command.cases[0].patterns[0].flatten(), "x");
988
989        Ok(())
990    }
991
992    #[test]
993    fn parse_case_ns() -> Result<()> {
994        let input = r"\
995case x in
996x)
997    echo y
998esac\
999";
1000
1001        let tokens = tokenize_str(input)?;
1002        let command = super::token_parser::case_clause(
1003            &Tokens {
1004                tokens: tokens.as_slice(),
1005            },
1006            &ParserOptions::default(),
1007            &SourceInfo::default(),
1008        )?;
1009
1010        assert_eq!(command.cases.len(), 1);
1011        assert_eq!(command.cases[0].patterns.len(), 1);
1012        assert_eq!(command.cases[0].patterns[0].flatten(), "x");
1013
1014        Ok(())
1015    }
1016
1017    #[test]
1018    fn parse_redirection() -> Result<()> {
1019        let input = r"echo |& wc";
1020
1021        let tokens = tokenize_str(input)?;
1022        let seq = super::token_parser::pipe_sequence(
1023            &Tokens {
1024                tokens: tokens.as_slice(),
1025            },
1026            &ParserOptions::default(),
1027            &SourceInfo::default(),
1028        )?;
1029
1030        assert_eq!(seq.len(), 2);
1031        assert_matches!(seq[0], ast::Command::Simple(..));
1032        if let ast::Command::Simple(c) = &seq[0] {
1033            let c = c.suffix.as_ref().unwrap();
1034            assert_matches!(
1035                c.0[0],
1036                ast::CommandPrefixOrSuffixItem::IoRedirect(ast::IoRedirect::File(
1037                    Some(2),
1038                    ast::IoFileRedirectKind::DuplicateOutput,
1039                    ast::IoFileRedirectTarget::Fd(1)
1040                ))
1041            );
1042        }
1043        Ok(())
1044    }
1045
1046    #[test]
1047    fn parse_function_with_pipe_redirection() -> Result<()> {
1048        let inputs = [r"foo() { echo 1; } 2>&1 | cat", r"foo() { echo 1; } |& cat"];
1049
1050        for input in inputs {
1051            let tokens = tokenize_str(input)?;
1052            let seq = super::token_parser::pipe_sequence(
1053                &Tokens {
1054                    tokens: tokens.as_slice(),
1055                },
1056                &ParserOptions::default(),
1057                &SourceInfo::default(),
1058            )?;
1059            assert_eq!(seq.len(), 2);
1060            assert_matches!(seq[0], ast::Command::Function(..));
1061            if let ast::Command::Function(f) = &seq[0] {
1062                let l = &f.body.1;
1063                assert!(l.is_some());
1064                assert_matches!(
1065                    l.as_ref().unwrap().0[0],
1066                    ast::IoRedirect::File(
1067                        Some(2),
1068                        ast::IoFileRedirectKind::DuplicateOutput,
1069                        ast::IoFileRedirectTarget::Fd(1)
1070                    )
1071                );
1072            }
1073        }
1074        Ok(())
1075    }
1076
1077    #[test]
1078    fn test_parse_program() -> Result<()> {
1079        use ast::*;
1080
1081        let input = r#"
1082
1083#!/usr/bin/env bash
1084
1085for f in A B C; do
1086
1087    # sdfsdf
1088    echo "${f@L}" >&2
1089
1090   done
1091
1092"#;
1093        let expected = Program {
1094            complete_commands: vec![CompoundList(vec![CompoundListItem(
1095                AndOrList {
1096                    first: Pipeline {
1097                        timed: None,
1098                        bang: false,
1099                        seq: vec![Command::Compound(
1100                            CompoundCommand::ForClause(ForClauseCommand {
1101                                variable_name: "f".into(),
1102                                values: Some(vec![Word::new("A"), Word::new("B"), Word::new("C")]),
1103                                body: DoGroupCommand(CompoundList(vec![CompoundListItem(
1104                                    AndOrList {
1105                                        first: Pipeline {
1106                                            timed: None,
1107                                            bang: false,
1108                                            seq: vec![Command::Simple(SimpleCommand {
1109                                                prefix: None,
1110                                                word_or_name: Some(Word::new("echo")),
1111                                                suffix: Some(CommandSuffix(vec![
1112                                                    CommandPrefixOrSuffixItem::Word(Word::new(
1113                                                        r#""${f@L}""#,
1114                                                    )),
1115                                                    CommandPrefixOrSuffixItem::IoRedirect(
1116                                                        IoRedirect::File(
1117                                                            None,
1118                                                            IoFileRedirectKind::DuplicateOutput,
1119                                                            IoFileRedirectTarget::Fd(2),
1120                                                        ),
1121                                                    ),
1122                                                ])),
1123                                            })],
1124                                        },
1125                                        additional: vec![],
1126                                    },
1127                                    SeparatorOperator::Sequence,
1128                                )])),
1129                            }),
1130                            None,
1131                        )],
1132                    },
1133                    additional: vec![],
1134                },
1135                SeparatorOperator::Sequence,
1136            )])],
1137        };
1138
1139        let tokens = tokenize_str(input)?;
1140        let result = super::token_parser::program(
1141            &Tokens {
1142                tokens: tokens.as_slice(),
1143            },
1144            &ParserOptions::default(),
1145            &SourceInfo::default(),
1146        )?;
1147
1148        assert_eq!(result, expected);
1149
1150        Ok(())
1151    }
1152}
brush_parser/parser.rs

brush_parser/
parser.rs