brush_parser/
parser.rs

1use crate::ast::{self, SeparatorOperator};
2use crate::error;
3use crate::tokenizer::{Token, TokenEndReason, Tokenizer, TokenizerOptions, Tokens};
4
5/// Options used to control the behavior of the parser.
6#[derive(Clone, Eq, Hash, PartialEq)]
7pub struct ParserOptions {
8    /// Whether or not to enable extended globbing (a.k.a. `extglob`).
9    pub enable_extended_globbing: bool,
10    /// Whether or not to enable POSIX complaince mode.
11    pub posix_mode: bool,
12    /// Whether or not to enable maximal compatibility with the `sh` shell.
13    pub sh_mode: bool,
14    /// Whether or not to perform tilde expansion.
15    pub tilde_expansion: bool,
16}
17
18impl Default for ParserOptions {
19    fn default() -> Self {
20        Self {
21            enable_extended_globbing: true,
22            posix_mode: false,
23            sh_mode: false,
24            tilde_expansion: true,
25        }
26    }
27}
28
29impl ParserOptions {
30    /// Returns the tokenizer options implied by these parser options.
31    pub fn tokenizer_options(&self) -> TokenizerOptions {
32        TokenizerOptions {
33            enable_extended_globbing: self.enable_extended_globbing,
34            posix_mode: self.posix_mode,
35            sh_mode: self.sh_mode,
36        }
37    }
38}
39
40/// Implements parsing for shell programs.
41pub struct Parser<R> {
42    reader: R,
43    options: ParserOptions,
44    source_info: SourceInfo,
45}
46
47impl<R: std::io::BufRead> Parser<R> {
48    /// Returns a new parser instance.
49    ///
50    /// # Arguments
51    ///
52    /// * `reader` - The reader to use for input.
53    /// * `options` - The options to use when parsing.
54    /// * `source_info` - Information about the source of the tokens.
55    pub fn new(reader: R, options: &ParserOptions, source_info: &SourceInfo) -> Self {
56        Parser {
57            reader,
58            options: options.clone(),
59            source_info: source_info.clone(),
60        }
61    }
62
63    /// Parses the input into an abstract syntax tree (AST) of a shell program.
64    pub fn parse(&mut self) -> Result<ast::Program, error::ParseError> {
65        //
66        // References:
67        //   * https://www.gnu.org/software/bash/manual/bash.html#Shell-Syntax
68        //   * https://mywiki.wooledge.org/BashParser
69        //   * https://aosabook.org/en/v1/bash.html
70        //   * https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html
71        //
72
73        // First we tokenize the input, according to the policy implied by provided options.
74        let mut tokenizer = Tokenizer::new(&mut self.reader, &self.options.tokenizer_options());
75
76        tracing::debug!(target: "tokenize", "Tokenizing...");
77
78        let mut tokens = vec![];
79        loop {
80            let result = match tokenizer.next_token() {
81                Ok(result) => result,
82                Err(e) => {
83                    return Err(error::ParseError::Tokenizing {
84                        inner: e,
85                        position: tokenizer.current_location(),
86                    });
87                }
88            };
89
90            let reason = result.reason;
91            if let Some(token) = result.token {
92                tracing::debug!(target: "tokenize", "TOKEN {}: {:?} {reason:?}", tokens.len(), token);
93                tokens.push(token);
94            }
95
96            if matches!(reason, TokenEndReason::EndOfInput) {
97                break;
98            }
99        }
100
101        tracing::debug!(target: "tokenize", "  => {} token(s)", tokens.len());
102
103        parse_tokens(&tokens, &self.options, &self.source_info)
104    }
105}
106
107/// Parses a sequence of tokens into the abstract syntax tree (AST) of a shell program.
108///
109/// # Arguments
110///
111/// * `tokens` - The tokens to parse.
112/// * `options` - The options to use when parsing.
113/// * `source_info` - Information about the source of the tokens.
114pub fn parse_tokens(
115    tokens: &Vec<Token>,
116    options: &ParserOptions,
117    source_info: &SourceInfo,
118) -> Result<ast::Program, error::ParseError> {
119    let parse_result = token_parser::program(&Tokens { tokens }, options, source_info);
120
121    let result = match parse_result {
122        Ok(program) => {
123            tracing::debug!(target: "parse", "PROG: {:?}", program);
124            Ok(program)
125        }
126        Err(parse_error) => {
127            tracing::debug!(target: "parse", "Parse error: {:?}", parse_error);
128            Err(error::convert_peg_parse_error(
129                parse_error,
130                tokens.as_slice(),
131            ))
132        }
133    };
134
135    result
136}
137
138impl peg::Parse for Tokens<'_> {
139    type PositionRepr = usize;
140
141    #[inline]
142    fn start(&self) -> usize {
143        0
144    }
145
146    #[inline]
147    fn is_eof(&self, p: usize) -> bool {
148        p >= self.tokens.len()
149    }
150
151    #[inline]
152    fn position_repr(&self, p: usize) -> Self::PositionRepr {
153        p
154    }
155}
156
157impl<'a> peg::ParseElem<'a> for Tokens<'a> {
158    type Element = &'a Token;
159
160    #[inline]
161    fn parse_elem(&'a self, pos: usize) -> peg::RuleResult<Self::Element> {
162        match self.tokens.get(pos) {
163            Some(c) => peg::RuleResult::Matched(pos + 1, c),
164            None => peg::RuleResult::Failed,
165        }
166    }
167}
168
169impl<'a> peg::ParseSlice<'a> for Tokens<'a> {
170    type Slice = String;
171
172    fn parse_slice(&'a self, start: usize, end: usize) -> Self::Slice {
173        let mut result = String::new();
174        let mut last_token_was_word = false;
175
176        for token in &self.tokens[start..end] {
177            match token {
178                Token::Operator(s, _) => {
179                    result.push_str(s);
180                    last_token_was_word = false;
181                }
182                Token::Word(s, _) => {
183                    // Place spaces between adjacent words.
184                    if last_token_was_word {
185                        result.push(' ');
186                    }
187
188                    result.push_str(s);
189                    last_token_was_word = true;
190                }
191            }
192        }
193
194        result
195    }
196}
197
198/// Information about the source of tokens.
199#[derive(Clone, Default)]
200pub struct SourceInfo {
201    /// The source of the tokens.
202    pub source: String,
203}
204
205peg::parser! {
206    grammar token_parser<'a>(parser_options: &ParserOptions, source_info: &SourceInfo) for Tokens<'a> {
207        pub(crate) rule program() -> ast::Program =
208            linebreak() c:complete_commands() linebreak() { ast::Program { complete_commands: c } } /
209            linebreak() { ast::Program { complete_commands: vec![] } }
210
211        rule complete_commands() -> Vec<ast::CompleteCommand> =
212            c:complete_command() ++ newline_list()
213
214        rule complete_command() -> ast::CompleteCommand =
215            first:and_or() remainder:(s:separator_op() l:and_or() { (s, l) })* last_sep:separator_op()? {
216                let mut and_ors = vec![first];
217                let mut seps = vec![];
218
219                for (sep, ao) in remainder.into_iter() {
220                    seps.push(sep);
221                    and_ors.push(ao);
222                }
223
224                // N.B. We default to synchronous if no separator op is given.
225                seps.push(last_sep.unwrap_or(SeparatorOperator::Sequence));
226
227                let mut items = vec![];
228                for (i, ao) in and_ors.into_iter().enumerate() {
229                    items.push(ast::CompoundListItem(ao, seps[i].clone()));
230                }
231
232                ast::CompoundList(items)
233            }
234
235        rule and_or() -> ast::AndOrList =
236            first:pipeline() additional:_and_or_item()* { ast::AndOrList { first, additional } }
237
238        rule _and_or_item() -> ast::AndOr =
239            op:_and_or_op() linebreak() p:pipeline() { op(p) }
240
241        rule _and_or_op() -> fn(ast::Pipeline) -> ast::AndOr =
242            specific_operator("&&") { ast::AndOr::And } /
243            specific_operator("||") { ast::AndOr::Or }
244
245        rule pipeline() -> ast::Pipeline =
246            timed:pipeline_timed()? bang:bang()? seq:pipe_sequence() { ast::Pipeline { timed, bang: bang.is_some(), seq } }
247
248        rule pipeline_timed() -> ast::PipelineTimed =
249            non_posix_extensions_enabled() specific_word("time") posix_output:specific_word("-p")? {
250                if posix_output.is_some() {
251                    ast::PipelineTimed::TimedWithPosixOutput
252                } else {
253                    ast::PipelineTimed::Timed
254                }
255            }
256
257        rule bang() -> bool = specific_word("!") { true }
258
259        pub(crate) rule pipe_sequence() -> Vec<ast::Command> =
260            c:(c:command() r:&pipe_extension_redirection()? {? // check for `|&` without consuming the stream.
261                let mut c = c;
262                if r.is_some() {
263                    add_pipe_extension_redirection(&mut c)?;
264                }
265                Ok(c)
266            }) ++ (pipe_operator() linebreak()) {
267            c
268        }
269        rule pipe_operator() =
270            specific_operator("|") /
271            pipe_extension_redirection()
272
273        rule pipe_extension_redirection() -> &'input Token  =
274            non_posix_extensions_enabled() p:specific_operator("|&") { p }
275
276        // N.B. We needed to move the function definition branch up to avoid conflicts with array assignment syntax.
277        rule command() -> ast::Command =
278            f:function_definition() { ast::Command::Function(f) } /
279            c:simple_command() { ast::Command::Simple(c) } /
280            c:compound_command() r:redirect_list()? { ast::Command::Compound(c, r) } /
281            // N.B. Extended test commands are bash extensions.
282            non_posix_extensions_enabled() c:extended_test_command() { ast::Command::ExtendedTest(c) } /
283            expected!("command")
284
285        // N.B. The arithmetic command is a non-sh extension.
286        // N.B. The arithmetic for clause command is a non-sh extension.
287        pub(crate) rule compound_command() -> ast::CompoundCommand =
288            non_posix_extensions_enabled() a:arithmetic_command() { ast::CompoundCommand::Arithmetic(a) } /
289            b:brace_group() { ast::CompoundCommand::BraceGroup(b) } /
290            s:subshell() { ast::CompoundCommand::Subshell(s) } /
291            f:for_clause() { ast::CompoundCommand::ForClause(f) } /
292            c:case_clause() { ast::CompoundCommand::CaseClause(c) } /
293            i:if_clause() { ast::CompoundCommand::IfClause(i) } /
294            w:while_clause() { ast::CompoundCommand::WhileClause(w) } /
295            u:until_clause() { ast::CompoundCommand::UntilClause(u) } /
296            non_posix_extensions_enabled() c:arithmetic_for_clause() { ast::CompoundCommand::ArithmeticForClause(c) } /
297            expected!("compound command")
298
299        pub(crate) rule arithmetic_command() -> ast::ArithmeticCommand =
300            specific_operator("(") specific_operator("(") expr:arithmetic_expression() specific_operator(")") specific_operator(")") {
301                ast::ArithmeticCommand { expr }
302            }
303
304        pub(crate) rule arithmetic_expression() -> ast::UnexpandedArithmeticExpr =
305            raw_expr:$(arithmetic_expression_piece()*) { ast::UnexpandedArithmeticExpr { value: raw_expr } }
306
307        rule arithmetic_expression_piece() =
308            specific_operator("(") (!specific_operator(")") arithmetic_expression_piece())* specific_operator(")") {} /
309            !arithmetic_end() [_] {}
310
311        // TODO: evaluate arithmetic end; the semicolon is used in arithmetic for loops.
312        rule arithmetic_end() -> () =
313            specific_operator(")") specific_operator(")") {} /
314            specific_operator(";") {}
315
316        rule subshell() -> ast::SubshellCommand =
317            specific_operator("(") c:compound_list() specific_operator(")") { ast::SubshellCommand(c) }
318
319        rule compound_list() -> ast::CompoundList =
320            linebreak() first:and_or() remainder:(s:separator() l:and_or() { (s, l) })* last_sep:separator()? {
321                let mut and_ors = vec![first];
322                let mut seps = vec![];
323
324                for (sep, ao) in remainder.into_iter() {
325                    seps.push(sep.unwrap_or(SeparatorOperator::Sequence));
326                    and_ors.push(ao);
327                }
328
329                // N.B. We default to synchronous if no separator op is given.
330                let last_sep = last_sep.unwrap_or(None);
331                seps.push(last_sep.unwrap_or(SeparatorOperator::Sequence));
332
333                let mut items = vec![];
334                for (i, ao) in and_ors.into_iter().enumerate() {
335                    items.push(ast::CompoundListItem(ao, seps[i].clone()));
336                }
337
338                ast::CompoundList(items)
339            }
340
341        rule for_clause() -> ast::ForClauseCommand =
342            specific_word("for") n:name() linebreak() _in() w:wordlist()? sequential_sep() d:do_group() {
343                ast::ForClauseCommand { variable_name: n.to_owned(), values: w, body: d }
344            } /
345            specific_word("for") n:name() sequential_sep()? d:do_group() {
346                ast::ForClauseCommand { variable_name: n.to_owned(), values: None, body: d }
347            }
348
349        // N.B. The arithmetic for loop is a non-sh extension.
350        rule arithmetic_for_clause() -> ast::ArithmeticForClauseCommand =
351            specific_word("for")
352            specific_operator("(") specific_operator("(")
353                initializer:arithmetic_expression()? specific_operator(";")
354                condition:arithmetic_expression()? specific_operator(";")
355                updater:arithmetic_expression()?
356            specific_operator(")") specific_operator(")")
357            sequential_sep()
358            body:do_group() {
359                ast::ArithmeticForClauseCommand { initializer, condition, updater, body }
360            }
361
362        rule extended_test_command() -> ast::ExtendedTestExpr =
363            specific_word("[[") linebreak() e:extended_test_expression() linebreak() specific_word("]]") { e }
364
365        rule extended_test_expression() -> ast::ExtendedTestExpr = precedence! {
366            left:(@) linebreak() specific_operator("||") linebreak() right:@ { ast::ExtendedTestExpr::Or(Box::from(left), Box::from(right)) }
367            --
368            left:(@) linebreak() specific_operator("&&") linebreak() right:@ { ast::ExtendedTestExpr::And(Box::from(left), Box::from(right)) }
369            --
370            specific_word("!") e:@ { ast::ExtendedTestExpr::Not(Box::from(e)) }
371            --
372            specific_operator("(") e:extended_test_expression() specific_operator(")") { ast::ExtendedTestExpr::Parenthesized(Box::from(e)) }
373            --
374            // Arithmetic operators
375            left:word() specific_word("-eq") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::ArithmeticEqualTo, ast::Word::from(left), ast::Word::from(right)) }
376            left:word() specific_word("-ne") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::ArithmeticNotEqualTo, ast::Word::from(left), ast::Word::from(right)) }
377            left:word() specific_word("-lt") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::ArithmeticLessThan, ast::Word::from(left), ast::Word::from(right)) }
378            left:word() specific_word("-le") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::ArithmeticLessThanOrEqualTo, ast::Word::from(left), ast::Word::from(right)) }
379            left:word() specific_word("-gt") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::ArithmeticGreaterThan, ast::Word::from(left), ast::Word::from(right)) }
380            left:word() specific_word("-ge") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::ArithmeticGreaterThanOrEqualTo, ast::Word::from(left), ast::Word::from(right)) }
381            // Non-arithmetic binary operators
382            left:word() specific_word("-ef") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::FilesReferToSameDeviceAndInodeNumbers, ast::Word::from(left), ast::Word::from(right)) }
383            left:word() specific_word("-nt") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::LeftFileIsNewerOrExistsWhenRightDoesNot, ast::Word::from(left), ast::Word::from(right)) }
384            left:word() specific_word("-ot") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::LeftFileIsOlderOrDoesNotExistWhenRightDoes, ast::Word::from(left), ast::Word::from(right)) }
385            left:word() (specific_word("==") / specific_word("=")) right:word()  { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::StringExactlyMatchesPattern, ast::Word::from(left), ast::Word::from(right)) }
386            left:word() specific_word("!=") right:word()  { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::StringDoesNotExactlyMatchPattern, ast::Word::from(left), ast::Word::from(right)) }
387            left:word() specific_word("=~") right:regex_word()  {
388                if right.value.starts_with(['\'', '\"']) {
389                    // TODO: Confirm it ends with that too?
390                    ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::StringContainsSubstring, ast::Word::from(left), right)
391                } else {
392                    ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::StringMatchesRegex, ast::Word::from(left), right)
393                }
394            }
395            left:word() specific_operator("<") right:word()   { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::LeftSortsBeforeRight, ast::Word::from(left), ast::Word::from(right)) }
396            left:word() specific_operator(">") right:word()   { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::LeftSortsAfterRight, ast::Word::from(left), ast::Word::from(right)) }
397            --
398            p:extended_unary_predicate() f:word() { ast::ExtendedTestExpr::UnaryTest(p, ast::Word::from(f)) }
399            --
400            w:word() { ast::ExtendedTestExpr::UnaryTest(ast::UnaryPredicate::StringHasNonZeroLength, ast::Word::from(w)) }
401        }
402
403        rule extended_unary_predicate() -> ast::UnaryPredicate =
404            specific_word("-a") { ast::UnaryPredicate::FileExists } /
405            specific_word("-b") { ast::UnaryPredicate::FileExistsAndIsBlockSpecialFile } /
406            specific_word("-c") { ast::UnaryPredicate::FileExistsAndIsCharSpecialFile } /
407            specific_word("-d") { ast::UnaryPredicate::FileExistsAndIsDir } /
408            specific_word("-e") { ast::UnaryPredicate::FileExists } /
409            specific_word("-f") { ast::UnaryPredicate::FileExistsAndIsRegularFile } /
410            specific_word("-g") { ast::UnaryPredicate::FileExistsAndIsSetgid } /
411            specific_word("-h") { ast::UnaryPredicate::FileExistsAndIsSymlink } /
412            specific_word("-k") { ast::UnaryPredicate::FileExistsAndHasStickyBit } /
413            specific_word("-n") { ast::UnaryPredicate::StringHasNonZeroLength } /
414            specific_word("-o") { ast::UnaryPredicate::ShellOptionEnabled } /
415            specific_word("-p") { ast::UnaryPredicate::FileExistsAndIsFifo } /
416            specific_word("-r") { ast::UnaryPredicate::FileExistsAndIsReadable } /
417            specific_word("-s") { ast::UnaryPredicate::FileExistsAndIsNotZeroLength } /
418            specific_word("-t") { ast::UnaryPredicate::FdIsOpenTerminal } /
419            specific_word("-u") { ast::UnaryPredicate::FileExistsAndIsSetuid } /
420            specific_word("-v") { ast::UnaryPredicate::ShellVariableIsSetAndAssigned } /
421            specific_word("-w") { ast::UnaryPredicate::FileExistsAndIsWritable } /
422            specific_word("-x") { ast::UnaryPredicate::FileExistsAndIsExecutable } /
423            specific_word("-z") { ast::UnaryPredicate::StringHasZeroLength } /
424            specific_word("-G") { ast::UnaryPredicate::FileExistsAndOwnedByEffectiveGroupId } /
425            specific_word("-L") { ast::UnaryPredicate::FileExistsAndIsSymlink } /
426            specific_word("-N") { ast::UnaryPredicate::FileExistsAndModifiedSinceLastRead } /
427            specific_word("-O") { ast::UnaryPredicate::FileExistsAndOwnedByEffectiveUserId } /
428            specific_word("-R") { ast::UnaryPredicate::ShellVariableIsSetAndNameRef } /
429            specific_word("-S") { ast::UnaryPredicate::FileExistsAndIsSocket }
430
431        // N.B. For some reason we seem to need to allow a select subset
432        // of unescaped operators in regex words.
433        rule regex_word() -> ast::Word =
434            value:$((!specific_word("]]") regex_word_piece())+) {
435                ast::Word { value }
436            }
437
438        rule regex_word_piece() =
439            word() {} /
440            specific_operator("|") {} /
441            specific_operator("(") parenthesized_regex_word()* specific_operator(")") {}
442
443        rule parenthesized_regex_word() =
444            regex_word_piece() /
445            !specific_operator(")") !specific_operator("]]") [_]
446
447        rule name() -> &'input str =
448            w:[Token::Word(_, _)] { w.to_str() }
449
450        rule _in() -> () =
451            specific_word("in") { }
452
453        // TODO: validate if this should call non_reserved_word() or word()
454        rule wordlist() -> Vec<ast::Word> =
455            (w:non_reserved_word() { ast::Word::from(w) })+
456
457        // TODO: validate if this should call non_reserved_word() or word()
458        pub(crate) rule case_clause() -> ast::CaseClauseCommand =
459            specific_word("case") w:non_reserved_word() linebreak() _in() linebreak() first_items:case_item()* last_item:case_item_ns()? specific_word("esac") {
460                let mut cases = first_items;
461
462                if let Some(last_item) = last_item {
463                    cases.push(last_item);
464                }
465
466                ast::CaseClauseCommand { value: ast::Word::from(w), cases }
467            }
468
469        pub(crate) rule case_item_ns() -> ast::CaseItem =
470            specific_operator("(")? p:pattern() specific_operator(")") c:compound_list() {
471                ast::CaseItem { patterns: p, cmd: Some(c), post_action: ast::CaseItemPostAction::ExitCase }
472            } /
473            specific_operator("(")? p:pattern() specific_operator(")") linebreak() {
474                ast::CaseItem { patterns: p, cmd: None, post_action: ast::CaseItemPostAction::ExitCase }
475            }
476
477        pub(crate) rule case_item() -> ast::CaseItem =
478            specific_operator("(")? p:pattern() specific_operator(")") linebreak() post_action:case_item_post_action() linebreak() {
479                ast::CaseItem { patterns: p, cmd: None, post_action }
480            } /
481            specific_operator("(")? p:pattern() specific_operator(")") c:compound_list() post_action:case_item_post_action() linebreak() {
482                ast::CaseItem { patterns: p, cmd: Some(c), post_action }
483            }
484
485        rule case_item_post_action() -> ast::CaseItemPostAction =
486            specific_operator(";;") {
487                ast::CaseItemPostAction::ExitCase
488            } /
489            non_posix_extensions_enabled() specific_operator(";;&") {
490                ast::CaseItemPostAction::ContinueEvaluatingCases
491            } /
492            non_posix_extensions_enabled() specific_operator(";&") {
493                ast::CaseItemPostAction::UnconditionallyExecuteNextCaseItem
494            }
495
496        // TODO: validate if this should call non_reserved_word() or word()
497        rule pattern() -> Vec<ast::Word> =
498            (w:word() { ast::Word::from(w) }) ++ specific_operator("|")
499
500        rule if_clause() -> ast::IfClauseCommand =
501            specific_word("if") condition:compound_list() specific_word("then") then:compound_list() elses:else_part()? specific_word("fi") {
502                ast::IfClauseCommand {
503                    condition,
504                    then,
505                    elses,
506                }
507            }
508
509        rule else_part() -> Vec<ast::ElseClause> =
510            cs:_conditional_else_part()+ u:_unconditional_else_part()? {
511                let mut parts = vec![];
512                for c in cs.into_iter() {
513                    parts.push(c);
514                }
515
516                if let Some(uncond) = u {
517                    parts.push(uncond);
518                }
519
520                parts
521            } /
522            e:_unconditional_else_part() { vec![e] }
523
524        rule _conditional_else_part() -> ast::ElseClause =
525            specific_word("elif") condition:compound_list() specific_word("then") body:compound_list() {
526                ast::ElseClause { condition: Some(condition), body }
527            }
528
529        rule _unconditional_else_part() -> ast::ElseClause =
530            specific_word("else") body:compound_list() {
531                ast::ElseClause { condition: None, body }
532             }
533
534        rule while_clause() -> ast::WhileOrUntilClauseCommand =
535            specific_word("while") c:compound_list() d:do_group() { ast::WhileOrUntilClauseCommand(c, d) }
536
537        rule until_clause() -> ast::WhileOrUntilClauseCommand =
538            specific_word("until") c:compound_list() d:do_group() { ast::WhileOrUntilClauseCommand(c, d) }
539
540        // N.B. Non-sh extensions allows use of the 'function' word to indicate a function definition.
541        rule function_definition() -> ast::FunctionDefinition =
542            specific_word("function")? fname:fname() specific_operator("(") specific_operator(")") linebreak() body:function_body() {
543                ast::FunctionDefinition { fname: fname.to_owned(), body, source: source_info.source.clone() }
544            } /
545            specific_word("function") fname:fname() linebreak() body:function_body() {
546                ast::FunctionDefinition { fname: fname.to_owned(), body, source: source_info.source.clone() }
547            } /
548            expected!("function definition")
549
550        rule function_body() -> ast::FunctionBody =
551            c:compound_command() r:redirect_list()? { ast::FunctionBody(c, r) }
552
553        rule fname() -> &'input str =
554            // Special-case: don't allow it to end with an equals sign, to avoid the challenge of
555            // misinterpreting certain declaration assignments as function definitions.
556            // TODO: Find a way to make this still work without requiring this targeted exception.
557            w:[Token::Word(word, _) if !word.ends_with('=')] { w.to_str() }
558
559        rule brace_group() -> ast::BraceGroupCommand =
560            specific_word("{") c:compound_list() specific_word("}") { ast::BraceGroupCommand(c) }
561
562        rule do_group() -> ast::DoGroupCommand =
563            specific_word("do") c:compound_list() specific_word("done") { ast::DoGroupCommand(c) }
564
565        rule simple_command() -> ast::SimpleCommand =
566            prefix:cmd_prefix() word_and_suffix:(word_or_name:cmd_word() suffix:cmd_suffix()? { (word_or_name, suffix) })? {
567                match word_and_suffix {
568                    Some((word_or_name, suffix)) => {
569                        ast::SimpleCommand { prefix: Some(prefix), word_or_name: Some(ast::Word::from(word_or_name)), suffix }
570                    }
571                    None => {
572                        ast::SimpleCommand { prefix: Some(prefix), word_or_name: None, suffix: None }
573                    }
574                }
575            } /
576            word_or_name:cmd_name() suffix:cmd_suffix()? {
577                ast::SimpleCommand { prefix: None, word_or_name: Some(ast::Word::from(word_or_name)), suffix } } /
578            expected!("simple command")
579
580        rule cmd_name() -> &'input Token =
581            non_reserved_word()
582
583        rule cmd_word() -> &'input Token =
584            !assignment_word() w:non_reserved_word() { w }
585
586        rule cmd_prefix() -> ast::CommandPrefix =
587            p:(
588                i:io_redirect() { ast::CommandPrefixOrSuffixItem::IoRedirect(i) } /
589                assignment_and_word:assignment_word() {
590                    let (assignment, word) = assignment_and_word;
591                    ast::CommandPrefixOrSuffixItem::AssignmentWord(assignment, word)
592                }
593            )+ { ast::CommandPrefix(p) }
594
595        rule cmd_suffix() -> ast::CommandSuffix =
596            s:(
597                non_posix_extensions_enabled() sub:process_substitution() {
598                    let (kind, subshell) = sub;
599                    ast::CommandPrefixOrSuffixItem::ProcessSubstitution(kind, subshell)
600                } /
601                i:io_redirect() {
602                    ast::CommandPrefixOrSuffixItem::IoRedirect(i)
603                } /
604                assignment_and_word:assignment_word() {
605                    let (assignment, word) = assignment_and_word;
606                    ast::CommandPrefixOrSuffixItem::AssignmentWord(assignment, word)
607                } /
608                w:word() {
609                    ast::CommandPrefixOrSuffixItem::Word(ast::Word::from(w))
610                }
611            )+ { ast::CommandSuffix(s) }
612
613        rule redirect_list() -> ast::RedirectList =
614            r:io_redirect()+ { ast::RedirectList(r) } /
615            expected!("redirect list")
616
617        // N.B. here strings are extensions to the POSIX standard.
618        rule io_redirect() -> ast::IoRedirect =
619            n:io_number()? f:io_file() {
620                    let (kind, target) = f;
621                    ast::IoRedirect::File(n, kind, target)
622                } /
623            non_posix_extensions_enabled() specific_operator("&>>") target:filename() { ast::IoRedirect::OutputAndError(ast::Word::from(target), true) } /
624            non_posix_extensions_enabled() specific_operator("&>") target:filename() { ast::IoRedirect::OutputAndError(ast::Word::from(target), false) } /
625            non_posix_extensions_enabled() n:io_number()? specific_operator("<<<") w:word() { ast::IoRedirect::HereString(n, ast::Word::from(w)) } /
626            n:io_number()? h:io_here() { ast::IoRedirect::HereDocument(n, h) } /
627            expected!("I/O redirect")
628
629        // N.B. Process substitution forms are extensions to the POSIX standard.
630        rule io_file() -> (ast::IoFileRedirectKind, ast::IoFileRedirectTarget) =
631            specific_operator("<")  f:io_filename() { (ast::IoFileRedirectKind::Read, f) } /
632            specific_operator("<&") f:io_filename_or_fd() { (ast::IoFileRedirectKind::DuplicateInput, f) } /
633            specific_operator(">")  f:io_filename() { (ast::IoFileRedirectKind::Write, f) } /
634            specific_operator(">&") f:io_filename_or_fd() { (ast::IoFileRedirectKind::DuplicateOutput, f) } /
635            specific_operator(">>") f:io_filename() { (ast::IoFileRedirectKind::Append, f) } /
636            specific_operator("<>") f:io_filename() { (ast::IoFileRedirectKind::ReadAndWrite, f) } /
637            specific_operator(">|") f:io_filename() { (ast::IoFileRedirectKind::Clobber, f) }
638
639        rule io_filename_or_fd() -> ast::IoFileRedirectTarget =
640            fd:io_fd() { ast::IoFileRedirectTarget::Fd(fd) } /
641            io_filename()
642
643        rule io_fd() -> u32 =
644            w:[Token::Word(_, _)] {? w.to_str().parse().or(Err("io_fd u32")) }
645
646        rule io_filename() -> ast::IoFileRedirectTarget =
647            non_posix_extensions_enabled() sub:process_substitution() {
648                let (kind, subshell) = sub;
649                ast::IoFileRedirectTarget::ProcessSubstitution(kind, subshell)
650            } /
651            f:filename() { ast::IoFileRedirectTarget::Filename(ast::Word::from(f)) }
652
653        rule filename() -> &'input Token =
654            word()
655
656        pub(crate) rule io_here() -> ast::IoHereDocument =
657           specific_operator("<<-") here_tag:here_tag() doc:[_] closing_tag:here_tag() {
658                let requires_expansion = !here_tag.to_str().contains(['\'', '"', '\\']);
659                ast::IoHereDocument {
660                    remove_tabs: true,
661                    requires_expansion,
662                    here_end: ast::Word::from(here_tag),
663                    doc: ast::Word::from(doc)
664                }
665            } /
666            specific_operator("<<") here_tag:here_tag() doc:[_] closing_tag:here_tag() {
667                let requires_expansion = !here_tag.to_str().contains(['\'', '"', '\\']);
668                ast::IoHereDocument {
669                    remove_tabs: false,
670                    requires_expansion,
671                    here_end: ast::Word::from(here_tag),
672                    doc: ast::Word::from(doc)
673                }
674            }
675
676        rule here_tag() -> &'input Token =
677            word()
678
679        rule process_substitution() -> (ast::ProcessSubstitutionKind, ast::SubshellCommand) =
680            specific_operator("<") s:subshell() { (ast::ProcessSubstitutionKind::Read, s) } /
681            specific_operator(">") s:subshell() { (ast::ProcessSubstitutionKind::Write, s) }
682
683        rule newline_list() -> () =
684            newline()+ {}
685
686        rule linebreak() -> () =
687            quiet! {
688                newline()* {}
689            }
690
691        rule separator_op() -> ast::SeparatorOperator =
692            specific_operator("&") { ast::SeparatorOperator::Async } /
693            specific_operator(";") { ast::SeparatorOperator::Sequence }
694
695        rule separator() -> Option<ast::SeparatorOperator> =
696            s:separator_op() linebreak() { Some(s) } /
697            newline_list() { None }
698
699        rule sequential_sep() -> () =
700            specific_operator(";") linebreak() /
701            newline_list()
702
703        //
704        // Token interpretation
705        //
706
707        rule non_reserved_word() -> &'input Token =
708            !reserved_word() w:word() { w }
709
710        rule word() -> &'input Token =
711            [Token::Word(_, _)]
712
713        rule reserved_word() -> &'input Token =
714            [Token::Word(w, _) if matches!(w.as_str(),
715                "!" |
716                "{" |
717                "}" |
718                "case" |
719                "do" |
720                "done" |
721                "elif" |
722                "else" |
723                "esac" |
724                "fi" |
725                "for" |
726                "if" |
727                "in" |
728                "then" |
729                "until" |
730                "while"
731            )] /
732
733            // N.B. bash also treats the following as reserved.
734            non_posix_extensions_enabled() token:non_posix_reserved_word_token() { token }
735
736        rule non_posix_reserved_word_token() -> &'input Token =
737            specific_word("[[") /
738            specific_word("]]") /
739            specific_word("function") /
740            specific_word("select")
741
742        rule newline() -> () = quiet! {
743            specific_operator("\n") {}
744        }
745
746        pub(crate) rule assignment_word() -> (ast::Assignment, ast::Word) =
747            non_posix_extensions_enabled() [Token::Word(w, _)] specific_operator("(") elements:array_elements() specific_operator(")") {?
748                let parsed = parse_array_assignment(w.as_str(), elements.as_slice())?;
749
750                let mut all_as_word = w.to_owned();
751                all_as_word.push('(');
752                for (i, e) in elements.iter().enumerate() {
753                    if i > 0 {
754                        all_as_word.push(' ');
755                    }
756                    all_as_word.push_str(e);
757                }
758                all_as_word.push(')');
759
760                Ok((parsed, ast::Word { value: all_as_word }))
761            } /
762            [Token::Word(w, _)] {?
763                let parsed = parse_assignment_word(w.as_str())?;
764                Ok((parsed, ast::Word { value: w.to_owned() }))
765            }
766
767        rule array_elements() -> Vec<&'input String> =
768             linebreak() e:array_element()* { e }
769
770        rule array_element() -> &'input String =
771            linebreak() [Token::Word(e, _)] linebreak() { e }
772
773        // N.B. An I/O number must be a string of only digits, and it must be
774        // followed by a '<' or '>' character (but not consume them). We also
775        // need to make sure that there was no space between the number and the
776        // redirection operator; unfortunately we don't have the space anymore
777        // but we can infer it by looking at the tokens' locations.
778        rule io_number() -> u32 =
779            [Token::Word(w, num_loc) if w.chars().all(|c: char| c.is_ascii_digit())]
780            &([Token::Operator(o, redir_loc) if
781                    o.starts_with(['<', '>']) &&
782                    locations_are_contiguous(num_loc, redir_loc)]) {
783
784                w.parse().unwrap()
785            }
786
787        //
788        // Helpers
789        //
790        rule specific_operator(expected: &str) -> &'input Token =
791            [Token::Operator(w, _) if w.as_str() == expected]
792
793        rule specific_word(expected: &str) -> &'input Token =
794            [Token::Word(w, _) if w.as_str() == expected]
795
796        rule non_posix_extensions_enabled() -> () =
797            &[_] {? if !parser_options.sh_mode { Ok(()) } else { Err("posix") } }
798    }
799}
800
801peg::parser! {
802    grammar assignments() for str {
803        pub(crate) rule name_and_scalar_value() -> ast::Assignment =
804            nae:name_and_equals() value:scalar_value() {
805                let (name, append) = nae;
806                ast::Assignment { name, value, append }
807            }
808
809        pub(crate) rule name_and_equals() -> (ast::AssignmentName, bool) =
810            name:name() append:("+"?) "=" {
811                (name, append.is_some())
812            }
813
814        pub(crate) rule literal_array_element() -> (Option<String>, String) =
815            "[" inner:$((!"]" [_])*) "]=" value:$([_]*) {
816                (Some(inner.to_owned()), value.to_owned())
817            } /
818            value:$([_]+) {
819                (None, value.to_owned())
820            }
821
822        rule name() -> ast::AssignmentName =
823            aen:array_element_name() {
824                let (name, index) = aen;
825                ast::AssignmentName::ArrayElementName(name.to_owned(), index.to_owned())
826            } /
827            name:scalar_name() {
828                ast::AssignmentName::VariableName(name.to_owned())
829            }
830
831        rule array_element_name() -> (&'input str, &'input str) =
832            name:scalar_name() "[" ai:array_index() "]" { (name, ai) }
833
834        rule array_index() -> &'input str =
835            $((![']'] [_])*)
836
837        rule scalar_name() -> &'input str =
838            $(alpha_or_underscore() non_first_variable_char()*)
839
840        rule non_first_variable_char() -> () =
841            ['_' | '0'..='9' | 'a'..='z' | 'A'..='Z'] {}
842
843        rule alpha_or_underscore() -> () =
844            ['_' | 'a'..='z' | 'A'..='Z'] {}
845
846        rule scalar_value() -> ast::AssignmentValue =
847            v:$([_]*) { ast::AssignmentValue::Scalar(ast::Word { value: v.to_owned() }) }
848    }
849}
850
851fn parse_assignment_word(word: &str) -> Result<ast::Assignment, &'static str> {
852    let parse_result = assignments::name_and_scalar_value(word);
853    parse_result.map_err(|_| "not assignment word")
854}
855
856// add `2>&1` to the command if the pipeline is `|&`
857fn add_pipe_extension_redirection(c: &mut ast::Command) -> Result<(), &'static str> {
858    let r = ast::IoRedirect::File(
859        Some(2),
860        ast::IoFileRedirectKind::DuplicateOutput,
861        ast::IoFileRedirectTarget::Fd(1),
862    );
863
864    fn add_to_redirect_list(l: &mut Option<ast::RedirectList>, r: ast::IoRedirect) {
865        if let Some(l) = l {
866            l.0.push(r);
867        } else {
868            let v = vec![r];
869            *l = Some(ast::RedirectList(v));
870        }
871    }
872
873    match c {
874        ast::Command::Simple(c) => {
875            let r = ast::CommandPrefixOrSuffixItem::IoRedirect(r);
876            if let Some(l) = &mut c.suffix {
877                l.0.push(r);
878            } else {
879                c.suffix = Some(ast::CommandSuffix(vec![r]));
880            }
881        }
882        ast::Command::Compound(_, l) => add_to_redirect_list(l, r),
883        ast::Command::Function(f) => add_to_redirect_list(&mut f.body.1, r),
884        ast::Command::ExtendedTest(_) => return Err("|& unimplemented for extended tests"),
885    };
886
887    Ok(())
888}
889
890fn locations_are_contiguous(
891    loc_left: &crate::TokenLocation,
892    loc_right: &crate::TokenLocation,
893) -> bool {
894    loc_left.end.index == loc_right.start.index
895}
896
897fn parse_array_assignment(
898    word: &str,
899    elements: &[&String],
900) -> Result<ast::Assignment, &'static str> {
901    let (assignment_name, append) =
902        assignments::name_and_equals(word).map_err(|_| "not array assignment word")?;
903
904    let elements = elements
905        .iter()
906        .map(|element| assignments::literal_array_element(element))
907        .collect::<Result<Vec<_>, _>>()
908        .map_err(|_| "invalid array element in literal")?;
909
910    let elements_as_words = elements
911        .into_iter()
912        .map(|(key, value)| {
913            (
914                key.map(|k| ast::Word::new(k.as_str())),
915                ast::Word::new(value.as_str()),
916            )
917        })
918        .collect();
919
920    Ok(ast::Assignment {
921        name: assignment_name,
922        value: ast::AssignmentValue::Array(elements_as_words),
923        append,
924    })
925}
926
927#[cfg(test)]
928mod tests {
929
930    use super::*;
931    use crate::tokenizer::tokenize_str;
932    use anyhow::Result;
933    use assert_matches::assert_matches;
934
935    #[test]
936    fn parse_case() -> Result<()> {
937        let input = r"\
938case x in
939x)
940    echo y;;
941esac\
942";
943
944        let tokens = tokenize_str(input)?;
945        let command = super::token_parser::case_clause(
946            &Tokens {
947                tokens: tokens.as_slice(),
948            },
949            &ParserOptions::default(),
950            &SourceInfo::default(),
951        )?;
952
953        assert_eq!(command.cases.len(), 1);
954        assert_eq!(command.cases[0].patterns.len(), 1);
955        assert_eq!(command.cases[0].patterns[0].flatten(), "x");
956
957        Ok(())
958    }
959
960    #[test]
961    fn parse_case_ns() -> Result<()> {
962        let input = r"\
963case x in
964x)
965    echo y
966esac\
967";
968
969        let tokens = tokenize_str(input)?;
970        let command = super::token_parser::case_clause(
971            &Tokens {
972                tokens: tokens.as_slice(),
973            },
974            &ParserOptions::default(),
975            &SourceInfo::default(),
976        )?;
977
978        assert_eq!(command.cases.len(), 1);
979        assert_eq!(command.cases[0].patterns.len(), 1);
980        assert_eq!(command.cases[0].patterns[0].flatten(), "x");
981
982        Ok(())
983    }
984
985    #[test]
986    fn parse_redirection() -> Result<()> {
987        let input = r"echo |& wc";
988
989        let tokens = tokenize_str(input)?;
990        let seq = super::token_parser::pipe_sequence(
991            &Tokens {
992                tokens: tokens.as_slice(),
993            },
994            &ParserOptions::default(),
995            &SourceInfo::default(),
996        )?;
997
998        assert_eq!(seq.len(), 2);
999        assert_matches!(seq[0], ast::Command::Simple(..));
1000        if let ast::Command::Simple(c) = &seq[0] {
1001            let c = c.suffix.as_ref().unwrap();
1002            assert_matches!(
1003                c.0[0],
1004                ast::CommandPrefixOrSuffixItem::IoRedirect(ast::IoRedirect::File(
1005                    Some(2),
1006                    ast::IoFileRedirectKind::DuplicateOutput,
1007                    ast::IoFileRedirectTarget::Fd(1)
1008                ))
1009            )
1010        }
1011        Ok(())
1012    }
1013
1014    #[test]
1015    fn parse_function_with_pipe_redirection() -> Result<()> {
1016        let inputs = [r"foo() { echo 1; } 2>&1 | cat", r"foo() { echo 1; } |& cat"];
1017
1018        for input in inputs {
1019            let tokens = tokenize_str(input)?;
1020            let seq = super::token_parser::pipe_sequence(
1021                &Tokens {
1022                    tokens: tokens.as_slice(),
1023                },
1024                &ParserOptions::default(),
1025                &SourceInfo::default(),
1026            )?;
1027            assert_eq!(seq.len(), 2);
1028            assert_matches!(seq[0], ast::Command::Function(..));
1029            if let ast::Command::Function(f) = &seq[0] {
1030                let l = &f.body.1;
1031                assert!(l.is_some());
1032                assert_matches!(
1033                    l.as_ref().unwrap().0[0],
1034                    ast::IoRedirect::File(
1035                        Some(2),
1036                        ast::IoFileRedirectKind::DuplicateOutput,
1037                        ast::IoFileRedirectTarget::Fd(1)
1038                    )
1039                )
1040            }
1041        }
1042        Ok(())
1043    }
1044
1045    #[test]
1046    fn test_parse_program() -> Result<()> {
1047        let input = r#"
1048
1049#!/usr/bin/env bash
1050
1051for f in A B C; do
1052
1053    # sdfsdf
1054    echo "${f@L}" >&2
1055
1056   done
1057
1058"#;
1059        use ast::*;
1060        let expected = Program {
1061            complete_commands: vec![CompoundList(vec![CompoundListItem(
1062                AndOrList {
1063                    first: Pipeline {
1064                        timed: None,
1065                        bang: false,
1066                        seq: vec![Command::Compound(
1067                            CompoundCommand::ForClause(ForClauseCommand {
1068                                variable_name: "f".into(),
1069                                values: Some(vec![Word::new("A"), Word::new("B"), Word::new("C")]),
1070                                body: DoGroupCommand(CompoundList(vec![CompoundListItem(
1071                                    AndOrList {
1072                                        first: Pipeline {
1073                                            timed: None,
1074                                            bang: false,
1075                                            seq: vec![Command::Simple(SimpleCommand {
1076                                                prefix: None,
1077                                                word_or_name: Some(Word::new("echo")),
1078                                                suffix: Some(CommandSuffix(vec![
1079                                                    CommandPrefixOrSuffixItem::Word(Word::new(
1080                                                        r#""${f@L}""#,
1081                                                    )),
1082                                                    CommandPrefixOrSuffixItem::IoRedirect(
1083                                                        IoRedirect::File(
1084                                                            None,
1085                                                            IoFileRedirectKind::DuplicateOutput,
1086                                                            IoFileRedirectTarget::Fd(2),
1087                                                        ),
1088                                                    ),
1089                                                ])),
1090                                            })],
1091                                        },
1092                                        additional: vec![],
1093                                    },
1094                                    SeparatorOperator::Sequence,
1095                                )])),
1096                            }),
1097                            None,
1098                        )],
1099                    },
1100                    additional: vec![],
1101                },
1102                SeparatorOperator::Sequence,
1103            )])],
1104        };
1105
1106        let tokens = tokenize_str(input)?;
1107        let result = super::token_parser::program(
1108            &Tokens {
1109                tokens: tokens.as_slice(),
1110            },
1111            &ParserOptions::default(),
1112            &SourceInfo::default(),
1113        )?;
1114
1115        assert_eq!(result, expected);
1116
1117        Ok(())
1118    }
1119}
brush_parser/parser.rs

brush_parser/
parser.rs