brush_parser/
parser.rs

1use crate::ast::{self, SeparatorOperator};
2use crate::error;
3use crate::tokenizer::{Token, TokenEndReason, Tokenizer, TokenizerOptions, Tokens};
4
5/// Options used to control the behavior of the parser.
6#[derive(Clone, Eq, Hash, PartialEq)]
7pub struct ParserOptions {
8    /// Whether or not to enable extended globbing (a.k.a. `extglob`).
9    pub enable_extended_globbing: bool,
10    /// Whether or not to enable POSIX compliance mode.
11    pub posix_mode: bool,
12    /// Whether or not to enable maximal compatibility with the `sh` shell.
13    pub sh_mode: bool,
14    /// Whether or not to perform tilde expansion.
15    pub tilde_expansion: bool,
16}
17
18impl Default for ParserOptions {
19    fn default() -> Self {
20        Self {
21            enable_extended_globbing: true,
22            posix_mode: false,
23            sh_mode: false,
24            tilde_expansion: true,
25        }
26    }
27}
28
29impl ParserOptions {
30    /// Returns the tokenizer options implied by these parser options.
31    pub const fn tokenizer_options(&self) -> TokenizerOptions {
32        TokenizerOptions {
33            enable_extended_globbing: self.enable_extended_globbing,
34            posix_mode: self.posix_mode,
35            sh_mode: self.sh_mode,
36        }
37    }
38}
39
40/// Implements parsing for shell programs.
41pub struct Parser<R> {
42    reader: R,
43    options: ParserOptions,
44    source_info: SourceInfo,
45}
46
47impl<R: std::io::BufRead> Parser<R> {
48    /// Returns a new parser instance.
49    ///
50    /// # Arguments
51    ///
52    /// * `reader` - The reader to use for input.
53    /// * `options` - The options to use when parsing.
54    /// * `source_info` - Information about the source of the tokens.
55    pub fn new(reader: R, options: &ParserOptions, source_info: &SourceInfo) -> Self {
56        Self {
57            reader,
58            options: options.clone(),
59            source_info: source_info.clone(),
60        }
61    }
62
63    /// Parses the input into an abstract syntax tree (AST) of a shell program.
64    pub fn parse_program(&mut self) -> Result<ast::Program, error::ParseError> {
65        //
66        // References:
67        //   * https://www.gnu.org/software/bash/manual/bash.html#Shell-Syntax
68        //   * https://mywiki.wooledge.org/BashParser
69        //   * https://aosabook.org/en/v1/bash.html
70        //   * https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html
71        //
72
73        let tokens = self.tokenize()?;
74        parse_tokens(&tokens, &self.options, &self.source_info)
75    }
76
77    /// Parses a function definition body from the input. The body is expected to be
78    /// preceded by "()", but no function name.
79    pub fn parse_function_parens_and_body(
80        &mut self,
81    ) -> Result<ast::FunctionBody, error::ParseError> {
82        let tokens = self.tokenize()?;
83        let parse_result = token_parser::function_parens_and_body(
84            &Tokens { tokens: &tokens },
85            &self.options,
86            &self.source_info,
87        );
88        parse_result_to_error(parse_result, &tokens)
89    }
90
91    fn tokenize(&mut self) -> Result<Vec<Token>, error::ParseError> {
92        // First we tokenize the input, according to the policy implied by provided options.
93        let mut tokenizer = Tokenizer::new(&mut self.reader, &self.options.tokenizer_options());
94
95        tracing::debug!(target: "tokenize", "Tokenizing...");
96
97        let mut tokens = vec![];
98        loop {
99            let result = match tokenizer.next_token() {
100                Ok(result) => result,
101                Err(e) => {
102                    return Err(error::ParseError::Tokenizing {
103                        inner: e,
104                        position: tokenizer.current_location(),
105                    });
106                }
107            };
108
109            let reason = result.reason;
110            if let Some(token) = result.token {
111                tracing::debug!(target: "tokenize", "TOKEN {}: {:?} {reason:?}", tokens.len(), token);
112                tokens.push(token);
113            }
114
115            if matches!(reason, TokenEndReason::EndOfInput) {
116                break;
117            }
118        }
119
120        tracing::debug!(target: "tokenize", "  => {} token(s)", tokens.len());
121
122        Ok(tokens)
123    }
124}
125
126/// Parses a sequence of tokens into the abstract syntax tree (AST) of a shell program.
127///
128/// # Arguments
129///
130/// * `tokens` - The tokens to parse.
131/// * `options` - The options to use when parsing.
132/// * `source_info` - Information about the source of the tokens.
133pub fn parse_tokens(
134    tokens: &Vec<Token>,
135    options: &ParserOptions,
136    source_info: &SourceInfo,
137) -> Result<ast::Program, error::ParseError> {
138    let parse_result = token_parser::program(&Tokens { tokens }, options, source_info);
139    parse_result_to_error(parse_result, tokens)
140}
141
142fn parse_result_to_error<R>(
143    parse_result: Result<R, peg::error::ParseError<usize>>,
144    tokens: &Vec<Token>,
145) -> Result<R, error::ParseError>
146where
147    R: std::fmt::Debug,
148{
149    match parse_result {
150        Ok(program) => {
151            tracing::debug!(target: "parse", "PROG: {:?}", program);
152            Ok(program)
153        }
154        Err(parse_error) => {
155            tracing::debug!(target: "parse", "Parse error: {:?}", parse_error);
156            Err(error::convert_peg_parse_error(
157                &parse_error,
158                tokens.as_slice(),
159            ))
160        }
161    }
162}
163
164impl peg::Parse for Tokens<'_> {
165    type PositionRepr = usize;
166
167    #[inline]
168    fn start(&self) -> usize {
169        0
170    }
171
172    #[inline]
173    fn is_eof(&self, p: usize) -> bool {
174        p >= self.tokens.len()
175    }
176
177    #[inline]
178    fn position_repr(&self, p: usize) -> Self::PositionRepr {
179        p
180    }
181}
182
183impl<'a> peg::ParseElem<'a> for Tokens<'a> {
184    type Element = &'a Token;
185
186    #[inline]
187    fn parse_elem(&'a self, pos: usize) -> peg::RuleResult<Self::Element> {
188        match self.tokens.get(pos) {
189            Some(c) => peg::RuleResult::Matched(pos + 1, c),
190            None => peg::RuleResult::Failed,
191        }
192    }
193}
194
195impl<'a> peg::ParseSlice<'a> for Tokens<'a> {
196    type Slice = String;
197
198    fn parse_slice(&'a self, start: usize, end: usize) -> Self::Slice {
199        let mut result = String::new();
200        let mut last_token_was_word = false;
201
202        for token in &self.tokens[start..end] {
203            match token {
204                Token::Operator(s, _) => {
205                    result.push_str(s);
206                    last_token_was_word = false;
207                }
208                Token::Word(s, _) => {
209                    // Place spaces between adjacent words.
210                    if last_token_was_word {
211                        result.push(' ');
212                    }
213
214                    result.push_str(s);
215                    last_token_was_word = true;
216                }
217            }
218        }
219
220        result
221    }
222}
223
224/// Information about the source of tokens.
225#[derive(Clone, Default)]
226pub struct SourceInfo {
227    /// The source of the tokens.
228    pub source: String,
229}
230
231peg::parser! {
232    grammar token_parser<'a>(parser_options: &ParserOptions, source_info: &SourceInfo) for Tokens<'a> {
233        pub(crate) rule program() -> ast::Program =
234            linebreak() c:complete_commands() linebreak() { ast::Program { complete_commands: c } } /
235            linebreak() { ast::Program { complete_commands: vec![] } }
236
237        rule complete_commands() -> Vec<ast::CompleteCommand> =
238            c:complete_command() ++ newline_list()
239
240        rule complete_command() -> ast::CompleteCommand =
241            first:and_or() remainder:(s:separator_op() l:and_or() { (s, l) })* last_sep:separator_op()? {
242                let mut and_ors = vec![first];
243                let mut seps = vec![];
244
245                for (sep, ao) in remainder {
246                    seps.push(sep);
247                    and_ors.push(ao);
248                }
249
250                // N.B. We default to synchronous if no separator op is given.
251                seps.push(last_sep.unwrap_or(SeparatorOperator::Sequence));
252
253                let mut items = vec![];
254                for (i, ao) in and_ors.into_iter().enumerate() {
255                    items.push(ast::CompoundListItem(ao, seps[i].clone()));
256                }
257
258                ast::CompoundList(items)
259            }
260
261        rule and_or() -> ast::AndOrList =
262            first:pipeline() additional:_and_or_item()* { ast::AndOrList { first, additional } }
263
264        rule _and_or_item() -> ast::AndOr =
265            op:_and_or_op() linebreak() p:pipeline() { op(p) }
266
267        rule _and_or_op() -> fn(ast::Pipeline) -> ast::AndOr =
268            specific_operator("&&") { ast::AndOr::And } /
269            specific_operator("||") { ast::AndOr::Or }
270
271        rule pipeline() -> ast::Pipeline =
272            timed:pipeline_timed()? bang:bang()? seq:pipe_sequence() { ast::Pipeline { timed, bang: bang.is_some(), seq } }
273
274        rule pipeline_timed() -> ast::PipelineTimed =
275            non_posix_extensions_enabled() specific_word("time") posix_output:specific_word("-p")? {
276                if posix_output.is_some() {
277                    ast::PipelineTimed::TimedWithPosixOutput
278                } else {
279                    ast::PipelineTimed::Timed
280                }
281            }
282
283        rule bang() -> bool = specific_word("!") { true }
284
285        pub(crate) rule pipe_sequence() -> Vec<ast::Command> =
286            c:(c:command() r:&pipe_extension_redirection()? {? // check for `|&` without consuming the stream.
287                let mut c = c;
288                if r.is_some() {
289                    add_pipe_extension_redirection(&mut c)?;
290                }
291                Ok(c)
292            }) ++ (pipe_operator() linebreak()) {
293            c
294        }
295        rule pipe_operator() =
296            specific_operator("|") /
297            pipe_extension_redirection()
298
299        rule pipe_extension_redirection() -> &'input Token  =
300            non_posix_extensions_enabled() p:specific_operator("|&") { p }
301
302        // N.B. We needed to move the function definition branch up to avoid conflicts with array assignment syntax.
303        rule command() -> ast::Command =
304            f:function_definition() { ast::Command::Function(f) } /
305            c:simple_command() { ast::Command::Simple(c) } /
306            c:compound_command() r:redirect_list()? { ast::Command::Compound(c, r) } /
307            // N.B. Extended test commands are bash extensions.
308            non_posix_extensions_enabled() c:extended_test_command() { ast::Command::ExtendedTest(c) } /
309            expected!("command")
310
311        // N.B. The arithmetic command is a non-sh extension.
312        // N.B. The arithmetic for clause command is a non-sh extension.
313        pub(crate) rule compound_command() -> ast::CompoundCommand =
314            non_posix_extensions_enabled() a:arithmetic_command() { ast::CompoundCommand::Arithmetic(a) } /
315            b:brace_group() { ast::CompoundCommand::BraceGroup(b) } /
316            s:subshell() { ast::CompoundCommand::Subshell(s) } /
317            f:for_clause() { ast::CompoundCommand::ForClause(f) } /
318            c:case_clause() { ast::CompoundCommand::CaseClause(c) } /
319            i:if_clause() { ast::CompoundCommand::IfClause(i) } /
320            w:while_clause() { ast::CompoundCommand::WhileClause(w) } /
321            u:until_clause() { ast::CompoundCommand::UntilClause(u) } /
322            non_posix_extensions_enabled() c:arithmetic_for_clause() { ast::CompoundCommand::ArithmeticForClause(c) } /
323            expected!("compound command")
324
325        pub(crate) rule arithmetic_command() -> ast::ArithmeticCommand =
326            specific_operator("(") specific_operator("(") expr:arithmetic_expression() specific_operator(")") specific_operator(")") {
327                ast::ArithmeticCommand { expr }
328            }
329
330        pub(crate) rule arithmetic_expression() -> ast::UnexpandedArithmeticExpr =
331            raw_expr:$(arithmetic_expression_piece()*) { ast::UnexpandedArithmeticExpr { value: raw_expr } }
332
333        rule arithmetic_expression_piece() =
334            specific_operator("(") (!specific_operator(")") arithmetic_expression_piece())* specific_operator(")") {} /
335            !arithmetic_end() [_] {}
336
337        // TODO: evaluate arithmetic end; the semicolon is used in arithmetic for loops.
338        rule arithmetic_end() -> () =
339            specific_operator(")") specific_operator(")") {} /
340            specific_operator(";") {}
341
342        rule subshell() -> ast::SubshellCommand =
343            specific_operator("(") c:compound_list() specific_operator(")") { ast::SubshellCommand(c) }
344
345        rule compound_list() -> ast::CompoundList =
346            linebreak() first:and_or() remainder:(s:separator() l:and_or() { (s, l) })* last_sep:separator()? {
347                let mut and_ors = vec![first];
348                let mut seps = vec![];
349
350                for (sep, ao) in remainder {
351                    seps.push(sep.unwrap_or(SeparatorOperator::Sequence));
352                    and_ors.push(ao);
353                }
354
355                // N.B. We default to synchronous if no separator op is given.
356                let last_sep = last_sep.unwrap_or(None);
357                seps.push(last_sep.unwrap_or(SeparatorOperator::Sequence));
358
359                let mut items = vec![];
360                for (i, ao) in and_ors.into_iter().enumerate() {
361                    items.push(ast::CompoundListItem(ao, seps[i].clone()));
362                }
363
364                ast::CompoundList(items)
365            }
366
367        rule for_clause() -> ast::ForClauseCommand =
368            specific_word("for") n:name() linebreak() _in() w:wordlist()? sequential_sep() d:do_group() {
369                ast::ForClauseCommand { variable_name: n.to_owned(), values: w, body: d }
370            } /
371            specific_word("for") n:name() sequential_sep()? d:do_group() {
372                ast::ForClauseCommand { variable_name: n.to_owned(), values: None, body: d }
373            }
374
375        // N.B. The arithmetic for loop is a non-sh extension.
376        rule arithmetic_for_clause() -> ast::ArithmeticForClauseCommand =
377            specific_word("for")
378            specific_operator("(") specific_operator("(")
379                initializer:arithmetic_expression()? specific_operator(";")
380                condition:arithmetic_expression()? specific_operator(";")
381                updater:arithmetic_expression()?
382            specific_operator(")") specific_operator(")")
383            sequential_sep()
384            body:do_group() {
385                ast::ArithmeticForClauseCommand { initializer, condition, updater, body }
386            }
387
388        rule extended_test_command() -> ast::ExtendedTestExpr =
389            specific_word("[[") linebreak() e:extended_test_expression() linebreak() specific_word("]]") { e }
390
391        rule extended_test_expression() -> ast::ExtendedTestExpr = precedence! {
392            left:(@) linebreak() specific_operator("||") linebreak() right:@ { ast::ExtendedTestExpr::Or(Box::from(left), Box::from(right)) }
393            --
394            left:(@) linebreak() specific_operator("&&") linebreak() right:@ { ast::ExtendedTestExpr::And(Box::from(left), Box::from(right)) }
395            --
396            specific_word("!") e:@ { ast::ExtendedTestExpr::Not(Box::from(e)) }
397            --
398            specific_operator("(") e:extended_test_expression() specific_operator(")") { ast::ExtendedTestExpr::Parenthesized(Box::from(e)) }
399            --
400            // Arithmetic operators
401            left:word() specific_word("-eq") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::ArithmeticEqualTo, ast::Word::from(left), ast::Word::from(right)) }
402            left:word() specific_word("-ne") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::ArithmeticNotEqualTo, ast::Word::from(left), ast::Word::from(right)) }
403            left:word() specific_word("-lt") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::ArithmeticLessThan, ast::Word::from(left), ast::Word::from(right)) }
404            left:word() specific_word("-le") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::ArithmeticLessThanOrEqualTo, ast::Word::from(left), ast::Word::from(right)) }
405            left:word() specific_word("-gt") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::ArithmeticGreaterThan, ast::Word::from(left), ast::Word::from(right)) }
406            left:word() specific_word("-ge") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::ArithmeticGreaterThanOrEqualTo, ast::Word::from(left), ast::Word::from(right)) }
407            // Non-arithmetic binary operators
408            left:word() specific_word("-ef") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::FilesReferToSameDeviceAndInodeNumbers, ast::Word::from(left), ast::Word::from(right)) }
409            left:word() specific_word("-nt") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::LeftFileIsNewerOrExistsWhenRightDoesNot, ast::Word::from(left), ast::Word::from(right)) }
410            left:word() specific_word("-ot") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::LeftFileIsOlderOrDoesNotExistWhenRightDoes, ast::Word::from(left), ast::Word::from(right)) }
411            left:word() (specific_word("==") / specific_word("=")) right:word()  { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::StringExactlyMatchesPattern, ast::Word::from(left), ast::Word::from(right)) }
412            left:word() specific_word("!=") right:word()  { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::StringDoesNotExactlyMatchPattern, ast::Word::from(left), ast::Word::from(right)) }
413            left:word() specific_word("=~") right:regex_word()  {
414                if right.value.starts_with(['\'', '\"']) {
415                    // TODO: Confirm it ends with that too?
416                    ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::StringContainsSubstring, ast::Word::from(left), right)
417                } else {
418                    ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::StringMatchesRegex, ast::Word::from(left), right)
419                }
420            }
421            left:word() specific_operator("<") right:word()   { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::LeftSortsBeforeRight, ast::Word::from(left), ast::Word::from(right)) }
422            left:word() specific_operator(">") right:word()   { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::LeftSortsAfterRight, ast::Word::from(left), ast::Word::from(right)) }
423            --
424            p:extended_unary_predicate() f:word() { ast::ExtendedTestExpr::UnaryTest(p, ast::Word::from(f)) }
425            --
426            w:word() { ast::ExtendedTestExpr::UnaryTest(ast::UnaryPredicate::StringHasNonZeroLength, ast::Word::from(w)) }
427        }
428
429        rule extended_unary_predicate() -> ast::UnaryPredicate =
430            specific_word("-a") { ast::UnaryPredicate::FileExists } /
431            specific_word("-b") { ast::UnaryPredicate::FileExistsAndIsBlockSpecialFile } /
432            specific_word("-c") { ast::UnaryPredicate::FileExistsAndIsCharSpecialFile } /
433            specific_word("-d") { ast::UnaryPredicate::FileExistsAndIsDir } /
434            specific_word("-e") { ast::UnaryPredicate::FileExists } /
435            specific_word("-f") { ast::UnaryPredicate::FileExistsAndIsRegularFile } /
436            specific_word("-g") { ast::UnaryPredicate::FileExistsAndIsSetgid } /
437            specific_word("-h") { ast::UnaryPredicate::FileExistsAndIsSymlink } /
438            specific_word("-k") { ast::UnaryPredicate::FileExistsAndHasStickyBit } /
439            specific_word("-n") { ast::UnaryPredicate::StringHasNonZeroLength } /
440            specific_word("-o") { ast::UnaryPredicate::ShellOptionEnabled } /
441            specific_word("-p") { ast::UnaryPredicate::FileExistsAndIsFifo } /
442            specific_word("-r") { ast::UnaryPredicate::FileExistsAndIsReadable } /
443            specific_word("-s") { ast::UnaryPredicate::FileExistsAndIsNotZeroLength } /
444            specific_word("-t") { ast::UnaryPredicate::FdIsOpenTerminal } /
445            specific_word("-u") { ast::UnaryPredicate::FileExistsAndIsSetuid } /
446            specific_word("-v") { ast::UnaryPredicate::ShellVariableIsSetAndAssigned } /
447            specific_word("-w") { ast::UnaryPredicate::FileExistsAndIsWritable } /
448            specific_word("-x") { ast::UnaryPredicate::FileExistsAndIsExecutable } /
449            specific_word("-z") { ast::UnaryPredicate::StringHasZeroLength } /
450            specific_word("-G") { ast::UnaryPredicate::FileExistsAndOwnedByEffectiveGroupId } /
451            specific_word("-L") { ast::UnaryPredicate::FileExistsAndIsSymlink } /
452            specific_word("-N") { ast::UnaryPredicate::FileExistsAndModifiedSinceLastRead } /
453            specific_word("-O") { ast::UnaryPredicate::FileExistsAndOwnedByEffectiveUserId } /
454            specific_word("-R") { ast::UnaryPredicate::ShellVariableIsSetAndNameRef } /
455            specific_word("-S") { ast::UnaryPredicate::FileExistsAndIsSocket }
456
457        // N.B. For some reason we seem to need to allow a select subset
458        // of unescaped operators in regex words.
459        rule regex_word() -> ast::Word =
460            value:$((!specific_word("]]") regex_word_piece())+) {
461                ast::Word { value }
462            }
463
464        rule regex_word_piece() =
465            word() {} /
466            specific_operator("|") {} /
467            specific_operator("(") parenthesized_regex_word()* specific_operator(")") {}
468
469        rule parenthesized_regex_word() =
470            regex_word_piece() /
471            !specific_operator(")") !specific_operator("]]") [_]
472
473        rule name() -> &'input str =
474            w:[Token::Word(_, _)] { w.to_str() }
475
476        rule _in() -> () =
477            specific_word("in") { }
478
479        rule wordlist() -> Vec<ast::Word> =
480            (w:word() { ast::Word::from(w) })+
481
482        pub(crate) rule case_clause() -> ast::CaseClauseCommand =
483            specific_word("case") w:word() linebreak() _in() linebreak() first_items:case_item()* last_item:case_item_ns()? specific_word("esac") {
484                let mut cases = first_items;
485
486                if let Some(last_item) = last_item {
487                    cases.push(last_item);
488                }
489
490                ast::CaseClauseCommand { value: ast::Word::from(w), cases }
491            }
492
493        pub(crate) rule case_item_ns() -> ast::CaseItem =
494            specific_operator("(")? p:pattern() specific_operator(")") c:compound_list() {
495                ast::CaseItem { patterns: p, cmd: Some(c), post_action: ast::CaseItemPostAction::ExitCase }
496            } /
497            specific_operator("(")? p:pattern() specific_operator(")") linebreak() {
498                ast::CaseItem { patterns: p, cmd: None, post_action: ast::CaseItemPostAction::ExitCase }
499            }
500
501        pub(crate) rule case_item() -> ast::CaseItem =
502            specific_operator("(")? p:pattern() specific_operator(")") linebreak() post_action:case_item_post_action() linebreak() {
503                ast::CaseItem { patterns: p, cmd: None, post_action }
504            } /
505            specific_operator("(")? p:pattern() specific_operator(")") c:compound_list() post_action:case_item_post_action() linebreak() {
506                ast::CaseItem { patterns: p, cmd: Some(c), post_action }
507            }
508
509        rule case_item_post_action() -> ast::CaseItemPostAction =
510            specific_operator(";;") {
511                ast::CaseItemPostAction::ExitCase
512            } /
513            non_posix_extensions_enabled() specific_operator(";;&") {
514                ast::CaseItemPostAction::ContinueEvaluatingCases
515            } /
516            non_posix_extensions_enabled() specific_operator(";&") {
517                ast::CaseItemPostAction::UnconditionallyExecuteNextCaseItem
518            }
519
520        rule pattern() -> Vec<ast::Word> =
521            (w:word() { ast::Word::from(w) }) ++ specific_operator("|")
522
523        rule if_clause() -> ast::IfClauseCommand =
524            specific_word("if") condition:compound_list() specific_word("then") then:compound_list() elses:else_part()? specific_word("fi") {
525                ast::IfClauseCommand {
526                    condition,
527                    then,
528                    elses,
529                }
530            }
531
532        rule else_part() -> Vec<ast::ElseClause> =
533            cs:_conditional_else_part()+ u:_unconditional_else_part()? {
534                let mut parts = vec![];
535                for c in cs {
536                    parts.push(c);
537                }
538
539                if let Some(uncond) = u {
540                    parts.push(uncond);
541                }
542
543                parts
544            } /
545            e:_unconditional_else_part() { vec![e] }
546
547        rule _conditional_else_part() -> ast::ElseClause =
548            specific_word("elif") condition:compound_list() specific_word("then") body:compound_list() {
549                ast::ElseClause { condition: Some(condition), body }
550            }
551
552        rule _unconditional_else_part() -> ast::ElseClause =
553            specific_word("else") body:compound_list() {
554                ast::ElseClause { condition: None, body }
555             }
556
557        rule while_clause() -> ast::WhileOrUntilClauseCommand =
558            specific_word("while") c:compound_list() d:do_group() { ast::WhileOrUntilClauseCommand(c, d) }
559
560        rule until_clause() -> ast::WhileOrUntilClauseCommand =
561            specific_word("until") c:compound_list() d:do_group() { ast::WhileOrUntilClauseCommand(c, d) }
562
563        // N.B. Non-sh extensions allows use of the 'function' word to indicate a function definition.
564        rule function_definition() -> ast::FunctionDefinition =
565            specific_word("function")? fname:fname() body:function_parens_and_body() {
566                ast::FunctionDefinition { fname: fname.to_owned(), body, source: source_info.source.clone() }
567            } /
568            specific_word("function") fname:fname() linebreak() body:function_body() {
569                ast::FunctionDefinition { fname: fname.to_owned(), body, source: source_info.source.clone() }
570            } /
571            expected!("function definition")
572
573        pub(crate) rule function_parens_and_body() -> ast::FunctionBody =
574            specific_operator("(") specific_operator(")") linebreak() body:function_body() { body }
575
576        rule function_body() -> ast::FunctionBody =
577            c:compound_command() r:redirect_list()? { ast::FunctionBody(c, r) }
578
579        rule fname() -> &'input str =
580            // Special-case: don't allow it to end with an equals sign, to avoid the challenge of
581            // misinterpreting certain declaration assignments as function definitions.
582            // TODO: Find a way to make this still work without requiring this targeted exception.
583            w:[Token::Word(word, _) if !word.ends_with('=')] { w.to_str() }
584
585        rule brace_group() -> ast::BraceGroupCommand =
586            specific_word("{") c:compound_list() specific_word("}") { ast::BraceGroupCommand(c) }
587
588        rule do_group() -> ast::DoGroupCommand =
589            specific_word("do") c:compound_list() specific_word("done") { ast::DoGroupCommand(c) }
590
591        rule simple_command() -> ast::SimpleCommand =
592            prefix:cmd_prefix() word_and_suffix:(word_or_name:cmd_word() suffix:cmd_suffix()? { (word_or_name, suffix) })? {
593                match word_and_suffix {
594                    Some((word_or_name, suffix)) => {
595                        ast::SimpleCommand { prefix: Some(prefix), word_or_name: Some(ast::Word::from(word_or_name)), suffix }
596                    }
597                    None => {
598                        ast::SimpleCommand { prefix: Some(prefix), word_or_name: None, suffix: None }
599                    }
600                }
601            } /
602            word_or_name:cmd_name() suffix:cmd_suffix()? {
603                ast::SimpleCommand { prefix: None, word_or_name: Some(ast::Word::from(word_or_name)), suffix } } /
604            expected!("simple command")
605
606        rule cmd_name() -> &'input Token =
607            non_reserved_word()
608
609        rule cmd_word() -> &'input Token =
610            !assignment_word() w:non_reserved_word() { w }
611
612        rule cmd_prefix() -> ast::CommandPrefix =
613            p:(
614                i:io_redirect() { ast::CommandPrefixOrSuffixItem::IoRedirect(i) } /
615                assignment_and_word:assignment_word() {
616                    let (assignment, word) = assignment_and_word;
617                    ast::CommandPrefixOrSuffixItem::AssignmentWord(assignment, word)
618                }
619            )+ { ast::CommandPrefix(p) }
620
621        rule cmd_suffix() -> ast::CommandSuffix =
622            s:(
623                non_posix_extensions_enabled() sub:process_substitution() {
624                    let (kind, subshell) = sub;
625                    ast::CommandPrefixOrSuffixItem::ProcessSubstitution(kind, subshell)
626                } /
627                i:io_redirect() {
628                    ast::CommandPrefixOrSuffixItem::IoRedirect(i)
629                } /
630                assignment_and_word:assignment_word() {
631                    let (assignment, word) = assignment_and_word;
632                    ast::CommandPrefixOrSuffixItem::AssignmentWord(assignment, word)
633                } /
634                w:word() {
635                    ast::CommandPrefixOrSuffixItem::Word(ast::Word::from(w))
636                }
637            )+ { ast::CommandSuffix(s) }
638
639        rule redirect_list() -> ast::RedirectList =
640            r:io_redirect()+ { ast::RedirectList(r) } /
641            expected!("redirect list")
642
643        // N.B. here strings are extensions to the POSIX standard.
644        rule io_redirect() -> ast::IoRedirect =
645            n:io_number()? f:io_file() {
646                    let (kind, target) = f;
647                    ast::IoRedirect::File(n, kind, target)
648                } /
649            non_posix_extensions_enabled() specific_operator("&>>") target:filename() { ast::IoRedirect::OutputAndError(ast::Word::from(target), true) } /
650            non_posix_extensions_enabled() specific_operator("&>") target:filename() { ast::IoRedirect::OutputAndError(ast::Word::from(target), false) } /
651            non_posix_extensions_enabled() n:io_number()? specific_operator("<<<") w:word() { ast::IoRedirect::HereString(n, ast::Word::from(w)) } /
652            n:io_number()? h:io_here() { ast::IoRedirect::HereDocument(n, h) } /
653            expected!("I/O redirect")
654
655        // N.B. Process substitution forms are extensions to the POSIX standard.
656        rule io_file() -> (ast::IoFileRedirectKind, ast::IoFileRedirectTarget) =
657            specific_operator("<")  f:io_filename() { (ast::IoFileRedirectKind::Read, f) } /
658            specific_operator("<&") f:io_fd_duplication_source() { (ast::IoFileRedirectKind::DuplicateInput, f) } /
659            specific_operator(">")  f:io_filename() { (ast::IoFileRedirectKind::Write, f) } /
660            specific_operator(">&") f:io_fd_duplication_source() { (ast::IoFileRedirectKind::DuplicateOutput, f) } /
661            specific_operator(">>") f:io_filename() { (ast::IoFileRedirectKind::Append, f) } /
662            specific_operator("<>") f:io_filename() { (ast::IoFileRedirectKind::ReadAndWrite, f) } /
663            specific_operator(">|") f:io_filename() { (ast::IoFileRedirectKind::Clobber, f) }
664
665        rule io_fd_duplication_source() -> ast::IoFileRedirectTarget =
666            w:word() { ast::IoFileRedirectTarget::Duplicate(ast::Word::from(w)) }
667
668        rule io_fd() -> u32 =
669            w:[Token::Word(_, _)] {? w.to_str().parse().or(Err("io_fd u32")) }
670
671        rule io_filename() -> ast::IoFileRedirectTarget =
672            non_posix_extensions_enabled() sub:process_substitution() {
673                let (kind, subshell) = sub;
674                ast::IoFileRedirectTarget::ProcessSubstitution(kind, subshell)
675            } /
676            f:filename() { ast::IoFileRedirectTarget::Filename(ast::Word::from(f)) }
677
678        rule filename() -> &'input Token =
679            word()
680
681        pub(crate) rule io_here() -> ast::IoHereDocument =
682           specific_operator("<<-") here_tag:here_tag() doc:[_] closing_tag:here_tag() {
683                let requires_expansion = !here_tag.to_str().contains(['\'', '"', '\\']);
684                ast::IoHereDocument {
685                    remove_tabs: true,
686                    requires_expansion,
687                    here_end: ast::Word::from(here_tag),
688                    doc: ast::Word::from(doc)
689                }
690            } /
691            specific_operator("<<") here_tag:here_tag() doc:[_] closing_tag:here_tag() {
692                let requires_expansion = !here_tag.to_str().contains(['\'', '"', '\\']);
693                ast::IoHereDocument {
694                    remove_tabs: false,
695                    requires_expansion,
696                    here_end: ast::Word::from(here_tag),
697                    doc: ast::Word::from(doc)
698                }
699            }
700
701        rule here_tag() -> &'input Token =
702            word()
703
704        rule process_substitution() -> (ast::ProcessSubstitutionKind, ast::SubshellCommand) =
705            specific_operator("<") s:subshell() { (ast::ProcessSubstitutionKind::Read, s) } /
706            specific_operator(">") s:subshell() { (ast::ProcessSubstitutionKind::Write, s) }
707
708        rule newline_list() -> () =
709            newline()+ {}
710
711        rule linebreak() -> () =
712            quiet! {
713                newline()* {}
714            }
715
716        rule separator_op() -> ast::SeparatorOperator =
717            specific_operator("&") { ast::SeparatorOperator::Async } /
718            specific_operator(";") { ast::SeparatorOperator::Sequence }
719
720        rule separator() -> Option<ast::SeparatorOperator> =
721            s:separator_op() linebreak() { Some(s) } /
722            newline_list() { None }
723
724        rule sequential_sep() -> () =
725            specific_operator(";") linebreak() /
726            newline_list()
727
728        //
729        // Token interpretation
730        //
731
732        rule non_reserved_word() -> &'input Token =
733            !reserved_word() w:word() { w }
734
735        rule word() -> &'input Token =
736            [Token::Word(_, _)]
737
738        rule reserved_word() -> &'input Token =
739            [Token::Word(w, _) if matches!(w.as_str(),
740                "!" |
741                "{" |
742                "}" |
743                "case" |
744                "do" |
745                "done" |
746                "elif" |
747                "else" |
748                "esac" |
749                "fi" |
750                "for" |
751                "if" |
752                "in" |
753                "then" |
754                "until" |
755                "while"
756            )] /
757
758            // N.B. bash also treats the following as reserved.
759            non_posix_extensions_enabled() token:non_posix_reserved_word_token() { token }
760
761        rule non_posix_reserved_word_token() -> &'input Token =
762            specific_word("[[") /
763            specific_word("]]") /
764            specific_word("function") /
765            specific_word("select")
766
767        rule newline() -> () = quiet! {
768            specific_operator("\n") {}
769        }
770
771        pub(crate) rule assignment_word() -> (ast::Assignment, ast::Word) =
772            non_posix_extensions_enabled() [Token::Word(w, _)] specific_operator("(") elements:array_elements() specific_operator(")") {?
773                let parsed = parse_array_assignment(w.as_str(), elements.as_slice())?;
774
775                let mut all_as_word = w.to_owned();
776                all_as_word.push('(');
777                for (i, e) in elements.iter().enumerate() {
778                    if i > 0 {
779                        all_as_word.push(' ');
780                    }
781                    all_as_word.push_str(e);
782                }
783                all_as_word.push(')');
784
785                Ok((parsed, ast::Word { value: all_as_word }))
786            } /
787            [Token::Word(w, _)] {?
788                let parsed = parse_assignment_word(w.as_str())?;
789                Ok((parsed, ast::Word { value: w.to_owned() }))
790            }
791
792        rule array_elements() -> Vec<&'input String> =
793             linebreak() e:array_element()* { e }
794
795        rule array_element() -> &'input String =
796            linebreak() [Token::Word(e, _)] linebreak() { e }
797
798        // N.B. An I/O number must be a string of only digits, and it must be
799        // followed by a '<' or '>' character (but not consume them). We also
800        // need to make sure that there was no space between the number and the
801        // redirection operator; unfortunately we don't have the space anymore
802        // but we can infer it by looking at the tokens' locations.
803        rule io_number() -> u32 =
804            [Token::Word(w, num_loc) if w.chars().all(|c: char| c.is_ascii_digit())]
805            &([Token::Operator(o, redir_loc) if
806                    o.starts_with(['<', '>']) &&
807                    locations_are_contiguous(num_loc, redir_loc)]) {
808
809                w.parse().unwrap()
810            }
811
812        //
813        // Helpers
814        //
815        rule specific_operator(expected: &str) -> &'input Token =
816            [Token::Operator(w, _) if w.as_str() == expected]
817
818        rule specific_word(expected: &str) -> &'input Token =
819            [Token::Word(w, _) if w.as_str() == expected]
820
821        rule non_posix_extensions_enabled() -> () =
822            &[_] {? if !parser_options.sh_mode { Ok(()) } else { Err("posix") } }
823    }
824}
825
826peg::parser! {
827    grammar assignments() for str {
828        pub(crate) rule name_and_scalar_value() -> ast::Assignment =
829            nae:name_and_equals() value:scalar_value() {
830                let (name, append) = nae;
831                ast::Assignment { name, value, append }
832            }
833
834        pub(crate) rule name_and_equals() -> (ast::AssignmentName, bool) =
835            name:name() append:("+"?) "=" {
836                (name, append.is_some())
837            }
838
839        pub(crate) rule literal_array_element() -> (Option<String>, String) =
840            "[" inner:$((!"]" [_])*) "]=" value:$([_]*) {
841                (Some(inner.to_owned()), value.to_owned())
842            } /
843            value:$([_]+) {
844                (None, value.to_owned())
845            }
846
847        rule name() -> ast::AssignmentName =
848            aen:array_element_name() {
849                let (name, index) = aen;
850                ast::AssignmentName::ArrayElementName(name.to_owned(), index.to_owned())
851            } /
852            name:scalar_name() {
853                ast::AssignmentName::VariableName(name.to_owned())
854            }
855
856        rule array_element_name() -> (&'input str, &'input str) =
857            name:scalar_name() "[" ai:array_index() "]" { (name, ai) }
858
859        rule array_index() -> &'input str =
860            $((![']'] [_])*)
861
862        rule scalar_name() -> &'input str =
863            $(alpha_or_underscore() non_first_variable_char()*)
864
865        rule non_first_variable_char() -> () =
866            ['_' | '0'..='9' | 'a'..='z' | 'A'..='Z'] {}
867
868        rule alpha_or_underscore() -> () =
869            ['_' | 'a'..='z' | 'A'..='Z'] {}
870
871        rule scalar_value() -> ast::AssignmentValue =
872            v:$([_]*) { ast::AssignmentValue::Scalar(ast::Word { value: v.to_owned() }) }
873    }
874}
875
876fn parse_assignment_word(word: &str) -> Result<ast::Assignment, &'static str> {
877    let parse_result = assignments::name_and_scalar_value(word);
878    parse_result.map_err(|_| "not assignment word")
879}
880
881// add `2>&1` to the command if the pipeline is `|&`
882fn add_pipe_extension_redirection(c: &mut ast::Command) -> Result<(), &'static str> {
883    fn add_to_redirect_list(l: &mut Option<ast::RedirectList>, r: ast::IoRedirect) {
884        if let Some(l) = l {
885            l.0.push(r);
886        } else {
887            let v = vec![r];
888            *l = Some(ast::RedirectList(v));
889        }
890    }
891
892    let r = ast::IoRedirect::File(
893        Some(2),
894        ast::IoFileRedirectKind::DuplicateOutput,
895        ast::IoFileRedirectTarget::Fd(1),
896    );
897
898    match c {
899        ast::Command::Simple(c) => {
900            let r = ast::CommandPrefixOrSuffixItem::IoRedirect(r);
901            if let Some(l) = &mut c.suffix {
902                l.0.push(r);
903            } else {
904                c.suffix = Some(ast::CommandSuffix(vec![r]));
905            }
906        }
907        ast::Command::Compound(_, l) => add_to_redirect_list(l, r),
908        ast::Command::Function(f) => add_to_redirect_list(&mut f.body.1, r),
909        ast::Command::ExtendedTest(_) => return Err("|& unimplemented for extended tests"),
910    }
911
912    Ok(())
913}
914
915const fn locations_are_contiguous(
916    loc_left: &crate::TokenLocation,
917    loc_right: &crate::TokenLocation,
918) -> bool {
919    loc_left.end.index == loc_right.start.index
920}
921
922fn parse_array_assignment(
923    word: &str,
924    elements: &[&String],
925) -> Result<ast::Assignment, &'static str> {
926    let (assignment_name, append) =
927        assignments::name_and_equals(word).map_err(|_| "not array assignment word")?;
928
929    let elements = elements
930        .iter()
931        .map(|element| assignments::literal_array_element(element))
932        .collect::<Result<Vec<_>, _>>()
933        .map_err(|_| "invalid array element in literal")?;
934
935    let elements_as_words = elements
936        .into_iter()
937        .map(|(key, value)| {
938            (
939                key.map(|k| ast::Word::new(k.as_str())),
940                ast::Word::new(value.as_str()),
941            )
942        })
943        .collect();
944
945    Ok(ast::Assignment {
946        name: assignment_name,
947        value: ast::AssignmentValue::Array(elements_as_words),
948        append,
949    })
950}
951
952#[cfg(test)]
953#[allow(clippy::panic_in_result_fn)]
954mod tests {
955
956    use super::*;
957    use crate::tokenizer::tokenize_str;
958    use anyhow::Result;
959    use insta::assert_ron_snapshot;
960
961    #[derive(serde::Serialize)]
962    struct ParseResult<'a, T> {
963        input: &'a str,
964        result: &'a T,
965    }
966
967    #[test]
968    fn parse_case() -> Result<()> {
969        let input = r"\
970case x in
971x)
972    echo y;;
973esac\
974";
975
976        let tokens = tokenize_str(input)?;
977        let command = super::token_parser::case_clause(
978            &Tokens {
979                tokens: tokens.as_slice(),
980            },
981            &ParserOptions::default(),
982            &SourceInfo::default(),
983        )?;
984
985        assert_ron_snapshot!(ParseResult {
986            input,
987            result: &command
988        });
989
990        Ok(())
991    }
992
993    #[test]
994    fn parse_case_ns() -> Result<()> {
995        let input = r"\
996case x in
997x)
998    echo y
999esac\
1000";
1001
1002        let tokens = tokenize_str(input)?;
1003        let command = super::token_parser::case_clause(
1004            &Tokens {
1005                tokens: tokens.as_slice(),
1006            },
1007            &ParserOptions::default(),
1008            &SourceInfo::default(),
1009        )?;
1010
1011        assert_ron_snapshot!(ParseResult {
1012            input,
1013            result: &command
1014        });
1015
1016        Ok(())
1017    }
1018
1019    #[test]
1020    fn parse_redirection() -> Result<()> {
1021        let input = r"echo |& wc";
1022
1023        let tokens = tokenize_str(input)?;
1024        let seq = super::token_parser::pipe_sequence(
1025            &Tokens {
1026                tokens: tokens.as_slice(),
1027            },
1028            &ParserOptions::default(),
1029            &SourceInfo::default(),
1030        )?;
1031
1032        assert_ron_snapshot!(ParseResult {
1033            input,
1034            result: &seq
1035        });
1036
1037        Ok(())
1038    }
1039
1040    #[test]
1041    fn parse_here_doc_with_no_trailing_newline() -> Result<()> {
1042        let input = r"cat <<EOF
1043Something
1044EOF";
1045
1046        let tokens = tokenize_str(input)?;
1047        let result = super::token_parser::program(
1048            &Tokens {
1049                tokens: tokens.as_slice(),
1050            },
1051            &ParserOptions::default(),
1052            &SourceInfo::default(),
1053        )?;
1054
1055        assert_ron_snapshot!(ParseResult {
1056            input,
1057            result: &result
1058        });
1059
1060        Ok(())
1061    }
1062
1063    #[test]
1064    fn parse_function_with_pipe_redirection() -> Result<()> {
1065        let inputs = [r"foo() { echo 1; } 2>&1 | cat", r"foo() { echo 1; } |& cat"];
1066
1067        for input in inputs {
1068            let tokens = tokenize_str(input)?;
1069            let seq = super::token_parser::pipe_sequence(
1070                &Tokens {
1071                    tokens: tokens.as_slice(),
1072                },
1073                &ParserOptions::default(),
1074                &SourceInfo::default(),
1075            )?;
1076
1077            assert_ron_snapshot!(ParseResult {
1078                input,
1079                result: &seq
1080            });
1081        }
1082
1083        Ok(())
1084    }
1085
1086    #[test]
1087    fn test_parse_program() -> Result<()> {
1088        let input = r#"
1089
1090#!/usr/bin/env bash
1091
1092for f in A B C; do
1093
1094    # sdfsdf
1095    echo "${f@L}" >&2
1096
1097   done
1098
1099"#;
1100
1101        let tokens = tokenize_str(input)?;
1102        let result = super::token_parser::program(
1103            &Tokens {
1104                tokens: tokens.as_slice(),
1105            },
1106            &ParserOptions::default(),
1107            &SourceInfo::default(),
1108        )?;
1109
1110        assert_ron_snapshot!(ParseResult {
1111            input,
1112            result: &result
1113        });
1114
1115        Ok(())
1116    }
1117}
brush_parser/parser.rs

brush_parser/
parser.rs