brush_parser/
parser.rs

1use crate::ast::{self, SeparatorOperator};
2use crate::error;
3use crate::tokenizer::{Token, TokenEndReason, Tokenizer, TokenizerOptions, Tokens};
4
5/// Options used to control the behavior of the parser.
6#[derive(Clone, Eq, Hash, PartialEq)]
7pub struct ParserOptions {
8    /// Whether or not to enable extended globbing (a.k.a. `extglob`).
9    pub enable_extended_globbing: bool,
10    /// Whether or not to enable POSIX compliance mode.
11    pub posix_mode: bool,
12    /// Whether or not to enable maximal compatibility with the `sh` shell.
13    pub sh_mode: bool,
14    /// Whether or not to perform tilde expansion.
15    pub tilde_expansion: bool,
16}
17
18impl Default for ParserOptions {
19    fn default() -> Self {
20        Self {
21            enable_extended_globbing: true,
22            posix_mode: false,
23            sh_mode: false,
24            tilde_expansion: true,
25        }
26    }
27}
28
29impl ParserOptions {
30    /// Returns the tokenizer options implied by these parser options.
31    pub const fn tokenizer_options(&self) -> TokenizerOptions {
32        TokenizerOptions {
33            enable_extended_globbing: self.enable_extended_globbing,
34            posix_mode: self.posix_mode,
35            sh_mode: self.sh_mode,
36        }
37    }
38}
39
40/// Implements parsing for shell programs.
41pub struct Parser<R> {
42    reader: R,
43    options: ParserOptions,
44    source_info: SourceInfo,
45}
46
47impl<R: std::io::BufRead> Parser<R> {
48    /// Returns a new parser instance.
49    ///
50    /// # Arguments
51    ///
52    /// * `reader` - The reader to use for input.
53    /// * `options` - The options to use when parsing.
54    /// * `source_info` - Information about the source of the tokens.
55    pub fn new(reader: R, options: &ParserOptions, source_info: &SourceInfo) -> Self {
56        Self {
57            reader,
58            options: options.clone(),
59            source_info: source_info.clone(),
60        }
61    }
62
63    /// Parses the input into an abstract syntax tree (AST) of a shell program.
64    pub fn parse_program(&mut self) -> Result<ast::Program, error::ParseError> {
65        //
66        // References:
67        //   * https://www.gnu.org/software/bash/manual/bash.html#Shell-Syntax
68        //   * https://mywiki.wooledge.org/BashParser
69        //   * https://aosabook.org/en/v1/bash.html
70        //   * https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html
71        //
72
73        let tokens = self.tokenize()?;
74        parse_tokens(&tokens, &self.options, &self.source_info)
75    }
76
77    /// Parses a function definition body from the input. The body is expected to be
78    /// preceded by "()", but no function name.
79    pub fn parse_function_parens_and_body(
80        &mut self,
81    ) -> Result<ast::FunctionBody, error::ParseError> {
82        let tokens = self.tokenize()?;
83        let parse_result = token_parser::function_parens_and_body(
84            &Tokens { tokens: &tokens },
85            &self.options,
86            &self.source_info,
87        );
88        parse_result_to_error(parse_result, &tokens)
89    }
90
91    fn tokenize(&mut self) -> Result<Vec<Token>, error::ParseError> {
92        // First we tokenize the input, according to the policy implied by provided options.
93        let mut tokenizer = Tokenizer::new(&mut self.reader, &self.options.tokenizer_options());
94
95        tracing::debug!(target: "tokenize", "Tokenizing...");
96
97        let mut tokens = vec![];
98        loop {
99            let result = match tokenizer.next_token() {
100                Ok(result) => result,
101                Err(e) => {
102                    return Err(error::ParseError::Tokenizing {
103                        inner: e,
104                        position: tokenizer.current_location(),
105                    });
106                }
107            };
108
109            let reason = result.reason;
110            if let Some(token) = result.token {
111                tracing::debug!(target: "tokenize", "TOKEN {}: {:?} {reason:?}", tokens.len(), token);
112                tokens.push(token);
113            }
114
115            if matches!(reason, TokenEndReason::EndOfInput) {
116                break;
117            }
118        }
119
120        tracing::debug!(target: "tokenize", "  => {} token(s)", tokens.len());
121
122        Ok(tokens)
123    }
124}
125
126/// Parses a sequence of tokens into the abstract syntax tree (AST) of a shell program.
127///
128/// # Arguments
129///
130/// * `tokens` - The tokens to parse.
131/// * `options` - The options to use when parsing.
132/// * `source_info` - Information about the source of the tokens.
133pub fn parse_tokens(
134    tokens: &Vec<Token>,
135    options: &ParserOptions,
136    source_info: &SourceInfo,
137) -> Result<ast::Program, error::ParseError> {
138    let parse_result = token_parser::program(&Tokens { tokens }, options, source_info);
139    parse_result_to_error(parse_result, tokens)
140}
141
142fn parse_result_to_error<R>(
143    parse_result: Result<R, peg::error::ParseError<usize>>,
144    tokens: &Vec<Token>,
145) -> Result<R, error::ParseError>
146where
147    R: std::fmt::Debug,
148{
149    match parse_result {
150        Ok(program) => {
151            tracing::debug!(target: "parse", "PROG: {:?}", program);
152            Ok(program)
153        }
154        Err(parse_error) => {
155            tracing::debug!(target: "parse", "Parse error: {:?}", parse_error);
156            Err(error::convert_peg_parse_error(
157                &parse_error,
158                tokens.as_slice(),
159            ))
160        }
161    }
162}
163
164impl peg::Parse for Tokens<'_> {
165    type PositionRepr = usize;
166
167    #[inline]
168    fn start(&self) -> usize {
169        0
170    }
171
172    #[inline]
173    fn is_eof(&self, p: usize) -> bool {
174        p >= self.tokens.len()
175    }
176
177    #[inline]
178    fn position_repr(&self, p: usize) -> Self::PositionRepr {
179        p
180    }
181}
182
183impl<'a> peg::ParseElem<'a> for Tokens<'a> {
184    type Element = &'a Token;
185
186    #[inline]
187    fn parse_elem(&'a self, pos: usize) -> peg::RuleResult<Self::Element> {
188        match self.tokens.get(pos) {
189            Some(c) => peg::RuleResult::Matched(pos + 1, c),
190            None => peg::RuleResult::Failed,
191        }
192    }
193}
194
195impl<'a> peg::ParseSlice<'a> for Tokens<'a> {
196    type Slice = String;
197
198    fn parse_slice(&'a self, start: usize, end: usize) -> Self::Slice {
199        let mut result = String::new();
200        let mut last_token_was_word = false;
201
202        for token in &self.tokens[start..end] {
203            match token {
204                Token::Operator(s, _) => {
205                    result.push_str(s);
206                    last_token_was_word = false;
207                }
208                Token::Word(s, _) => {
209                    // Place spaces between adjacent words.
210                    if last_token_was_word {
211                        result.push(' ');
212                    }
213
214                    result.push_str(s);
215                    last_token_was_word = true;
216                }
217            }
218        }
219
220        result
221    }
222}
223
224/// Information about the source of tokens.
225#[derive(Clone, Default)]
226pub struct SourceInfo {
227    /// The source of the tokens.
228    pub source: String,
229}
230
231peg::parser! {
232    grammar token_parser<'a>(parser_options: &ParserOptions, source_info: &SourceInfo) for Tokens<'a> {
233        pub(crate) rule program() -> ast::Program =
234            linebreak() c:complete_commands() linebreak() { ast::Program { complete_commands: c } } /
235            linebreak() { ast::Program { complete_commands: vec![] } }
236
237        rule complete_commands() -> Vec<ast::CompleteCommand> =
238            c:complete_command() ++ newline_list()
239
240        rule complete_command() -> ast::CompleteCommand =
241            first:and_or() remainder:(s:separator_op() l:and_or() { (s, l) })* last_sep:separator_op()? {
242                let mut and_ors = vec![first];
243                let mut seps = vec![];
244
245                for (sep, ao) in remainder {
246                    seps.push(sep);
247                    and_ors.push(ao);
248                }
249
250                // N.B. We default to synchronous if no separator op is given.
251                seps.push(last_sep.unwrap_or(SeparatorOperator::Sequence));
252
253                let mut items = vec![];
254                for (i, ao) in and_ors.into_iter().enumerate() {
255                    items.push(ast::CompoundListItem(ao, seps[i].clone()));
256                }
257
258                ast::CompoundList(items)
259            }
260
261        rule and_or() -> ast::AndOrList =
262            first:pipeline() additional:_and_or_item()* { ast::AndOrList { first, additional } }
263
264        rule _and_or_item() -> ast::AndOr =
265            op:_and_or_op() linebreak() p:pipeline() { op(p) }
266
267        rule _and_or_op() -> fn(ast::Pipeline) -> ast::AndOr =
268            specific_operator("&&") { ast::AndOr::And } /
269            specific_operator("||") { ast::AndOr::Or }
270
271        rule pipeline() -> ast::Pipeline =
272            timed:pipeline_timed()? bang:bang()? seq:pipe_sequence() { ast::Pipeline { timed, bang: bang.is_some(), seq } }
273
274        rule pipeline_timed() -> ast::PipelineTimed =
275            non_posix_extensions_enabled() specific_word("time") posix_output:specific_word("-p")? {
276                if posix_output.is_some() {
277                    ast::PipelineTimed::TimedWithPosixOutput
278                } else {
279                    ast::PipelineTimed::Timed
280                }
281            }
282
283        rule bang() -> bool = specific_word("!") { true }
284
285        pub(crate) rule pipe_sequence() -> Vec<ast::Command> =
286            c:(c:command() r:&pipe_extension_redirection()? {? // check for `|&` without consuming the stream.
287                let mut c = c;
288                if r.is_some() {
289                    add_pipe_extension_redirection(&mut c)?;
290                }
291                Ok(c)
292            }) ++ (pipe_operator() linebreak()) {
293            c
294        }
295        rule pipe_operator() =
296            specific_operator("|") /
297            pipe_extension_redirection()
298
299        rule pipe_extension_redirection() -> &'input Token  =
300            non_posix_extensions_enabled() p:specific_operator("|&") { p }
301
302        // N.B. We needed to move the function definition branch up to avoid conflicts with array assignment syntax.
303        rule command() -> ast::Command =
304            f:function_definition() { ast::Command::Function(f) } /
305            c:simple_command() { ast::Command::Simple(c) } /
306            c:compound_command() r:redirect_list()? { ast::Command::Compound(c, r) } /
307            // N.B. Extended test commands are bash extensions.
308            non_posix_extensions_enabled() c:extended_test_command() { ast::Command::ExtendedTest(c) } /
309            expected!("command")
310
311        // N.B. The arithmetic command is a non-sh extension.
312        // N.B. The arithmetic for clause command is a non-sh extension.
313        pub(crate) rule compound_command() -> ast::CompoundCommand =
314            non_posix_extensions_enabled() a:arithmetic_command() { ast::CompoundCommand::Arithmetic(a) } /
315            b:brace_group() { ast::CompoundCommand::BraceGroup(b) } /
316            s:subshell() { ast::CompoundCommand::Subshell(s) } /
317            f:for_clause() { ast::CompoundCommand::ForClause(f) } /
318            c:case_clause() { ast::CompoundCommand::CaseClause(c) } /
319            i:if_clause() { ast::CompoundCommand::IfClause(i) } /
320            w:while_clause() { ast::CompoundCommand::WhileClause(w) } /
321            u:until_clause() { ast::CompoundCommand::UntilClause(u) } /
322            non_posix_extensions_enabled() c:arithmetic_for_clause() { ast::CompoundCommand::ArithmeticForClause(c) } /
323            expected!("compound command")
324
325        pub(crate) rule arithmetic_command() -> ast::ArithmeticCommand =
326            specific_operator("(") specific_operator("(") expr:arithmetic_expression() specific_operator(")") specific_operator(")") {
327                ast::ArithmeticCommand { expr }
328            }
329
330        pub(crate) rule arithmetic_expression() -> ast::UnexpandedArithmeticExpr =
331            raw_expr:$(arithmetic_expression_piece()*) { ast::UnexpandedArithmeticExpr { value: raw_expr } }
332
333        rule arithmetic_expression_piece() =
334            // Allow a parenthesized expression (with matching opening and closing parens).
335            specific_operator("(") (!specific_operator(")") arithmetic_expression_piece())* specific_operator(")") {} /
336            // Otherwise consume any token that's neither the normal end of the entire arithmetic expression, nor an
337            // unexpected mismatched closing parenthesis. In the latter case, it may be that this really was never an
338            // arithmetic expression in the first place and we need to backtrack and instead try parsing as a subshell
339            // command instead.
340            !arithmetic_end() !specific_operator(")") [_] {}
341
342        // TODO: evaluate arithmetic end; the semicolon is used in arithmetic for loops.
343        rule arithmetic_end() -> () =
344            specific_operator(")") specific_operator(")") {} /
345            specific_operator(";") {}
346
347        rule subshell() -> ast::SubshellCommand =
348            specific_operator("(") c:compound_list() specific_operator(")") { ast::SubshellCommand(c) }
349
350        rule compound_list() -> ast::CompoundList =
351            linebreak() first:and_or() remainder:(s:separator() l:and_or() { (s, l) })* last_sep:separator()? {
352                let mut and_ors = vec![first];
353                let mut seps = vec![];
354
355                for (sep, ao) in remainder {
356                    seps.push(sep.unwrap_or(SeparatorOperator::Sequence));
357                    and_ors.push(ao);
358                }
359
360                // N.B. We default to synchronous if no separator op is given.
361                let last_sep = last_sep.unwrap_or(None);
362                seps.push(last_sep.unwrap_or(SeparatorOperator::Sequence));
363
364                let mut items = vec![];
365                for (i, ao) in and_ors.into_iter().enumerate() {
366                    items.push(ast::CompoundListItem(ao, seps[i].clone()));
367                }
368
369                ast::CompoundList(items)
370            }
371
372        rule for_clause() -> ast::ForClauseCommand =
373            specific_word("for") n:name() linebreak() _in() w:wordlist()? sequential_sep() d:do_group() {
374                ast::ForClauseCommand { variable_name: n.to_owned(), values: w, body: d }
375            } /
376            specific_word("for") n:name() sequential_sep()? d:do_group() {
377                ast::ForClauseCommand { variable_name: n.to_owned(), values: None, body: d }
378            }
379
380        // N.B. The arithmetic for loop is a non-sh extension.
381        rule arithmetic_for_clause() -> ast::ArithmeticForClauseCommand =
382            specific_word("for")
383            specific_operator("(") specific_operator("(")
384                initializer:arithmetic_expression()? specific_operator(";")
385                condition:arithmetic_expression()? specific_operator(";")
386                updater:arithmetic_expression()?
387            specific_operator(")") specific_operator(")")
388            sequential_sep()
389            body:do_group() {
390                ast::ArithmeticForClauseCommand { initializer, condition, updater, body }
391            }
392
393        rule extended_test_command() -> ast::ExtendedTestExpr =
394            specific_word("[[") linebreak() e:extended_test_expression() linebreak() specific_word("]]") { e }
395
396        rule extended_test_expression() -> ast::ExtendedTestExpr = precedence! {
397            left:(@) linebreak() specific_operator("||") linebreak() right:@ { ast::ExtendedTestExpr::Or(Box::from(left), Box::from(right)) }
398            --
399            left:(@) linebreak() specific_operator("&&") linebreak() right:@ { ast::ExtendedTestExpr::And(Box::from(left), Box::from(right)) }
400            --
401            specific_word("!") e:@ { ast::ExtendedTestExpr::Not(Box::from(e)) }
402            --
403            specific_operator("(") e:extended_test_expression() specific_operator(")") { ast::ExtendedTestExpr::Parenthesized(Box::from(e)) }
404            --
405            // Arithmetic operators
406            left:word() specific_word("-eq") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::ArithmeticEqualTo, ast::Word::from(left), ast::Word::from(right)) }
407            left:word() specific_word("-ne") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::ArithmeticNotEqualTo, ast::Word::from(left), ast::Word::from(right)) }
408            left:word() specific_word("-lt") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::ArithmeticLessThan, ast::Word::from(left), ast::Word::from(right)) }
409            left:word() specific_word("-le") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::ArithmeticLessThanOrEqualTo, ast::Word::from(left), ast::Word::from(right)) }
410            left:word() specific_word("-gt") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::ArithmeticGreaterThan, ast::Word::from(left), ast::Word::from(right)) }
411            left:word() specific_word("-ge") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::ArithmeticGreaterThanOrEqualTo, ast::Word::from(left), ast::Word::from(right)) }
412            // Non-arithmetic binary operators
413            left:word() specific_word("-ef") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::FilesReferToSameDeviceAndInodeNumbers, ast::Word::from(left), ast::Word::from(right)) }
414            left:word() specific_word("-nt") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::LeftFileIsNewerOrExistsWhenRightDoesNot, ast::Word::from(left), ast::Word::from(right)) }
415            left:word() specific_word("-ot") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::LeftFileIsOlderOrDoesNotExistWhenRightDoes, ast::Word::from(left), ast::Word::from(right)) }
416            left:word() (specific_word("==") / specific_word("=")) right:word()  { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::StringExactlyMatchesPattern, ast::Word::from(left), ast::Word::from(right)) }
417            left:word() specific_word("!=") right:word()  { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::StringDoesNotExactlyMatchPattern, ast::Word::from(left), ast::Word::from(right)) }
418            left:word() specific_word("=~") right:regex_word()  {
419                if right.value.starts_with(['\'', '\"']) {
420                    // TODO: Confirm it ends with that too?
421                    ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::StringContainsSubstring, ast::Word::from(left), right)
422                } else {
423                    ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::StringMatchesRegex, ast::Word::from(left), right)
424                }
425            }
426            left:word() specific_operator("<") right:word()   { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::LeftSortsBeforeRight, ast::Word::from(left), ast::Word::from(right)) }
427            left:word() specific_operator(">") right:word()   { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::LeftSortsAfterRight, ast::Word::from(left), ast::Word::from(right)) }
428            --
429            p:extended_unary_predicate() f:word() { ast::ExtendedTestExpr::UnaryTest(p, ast::Word::from(f)) }
430            --
431            w:word() { ast::ExtendedTestExpr::UnaryTest(ast::UnaryPredicate::StringHasNonZeroLength, ast::Word::from(w)) }
432        }
433
434        rule extended_unary_predicate() -> ast::UnaryPredicate =
435            specific_word("-a") { ast::UnaryPredicate::FileExists } /
436            specific_word("-b") { ast::UnaryPredicate::FileExistsAndIsBlockSpecialFile } /
437            specific_word("-c") { ast::UnaryPredicate::FileExistsAndIsCharSpecialFile } /
438            specific_word("-d") { ast::UnaryPredicate::FileExistsAndIsDir } /
439            specific_word("-e") { ast::UnaryPredicate::FileExists } /
440            specific_word("-f") { ast::UnaryPredicate::FileExistsAndIsRegularFile } /
441            specific_word("-g") { ast::UnaryPredicate::FileExistsAndIsSetgid } /
442            specific_word("-h") { ast::UnaryPredicate::FileExistsAndIsSymlink } /
443            specific_word("-k") { ast::UnaryPredicate::FileExistsAndHasStickyBit } /
444            specific_word("-n") { ast::UnaryPredicate::StringHasNonZeroLength } /
445            specific_word("-o") { ast::UnaryPredicate::ShellOptionEnabled } /
446            specific_word("-p") { ast::UnaryPredicate::FileExistsAndIsFifo } /
447            specific_word("-r") { ast::UnaryPredicate::FileExistsAndIsReadable } /
448            specific_word("-s") { ast::UnaryPredicate::FileExistsAndIsNotZeroLength } /
449            specific_word("-t") { ast::UnaryPredicate::FdIsOpenTerminal } /
450            specific_word("-u") { ast::UnaryPredicate::FileExistsAndIsSetuid } /
451            specific_word("-v") { ast::UnaryPredicate::ShellVariableIsSetAndAssigned } /
452            specific_word("-w") { ast::UnaryPredicate::FileExistsAndIsWritable } /
453            specific_word("-x") { ast::UnaryPredicate::FileExistsAndIsExecutable } /
454            specific_word("-z") { ast::UnaryPredicate::StringHasZeroLength } /
455            specific_word("-G") { ast::UnaryPredicate::FileExistsAndOwnedByEffectiveGroupId } /
456            specific_word("-L") { ast::UnaryPredicate::FileExistsAndIsSymlink } /
457            specific_word("-N") { ast::UnaryPredicate::FileExistsAndModifiedSinceLastRead } /
458            specific_word("-O") { ast::UnaryPredicate::FileExistsAndOwnedByEffectiveUserId } /
459            specific_word("-R") { ast::UnaryPredicate::ShellVariableIsSetAndNameRef } /
460            specific_word("-S") { ast::UnaryPredicate::FileExistsAndIsSocket }
461
462        // N.B. For some reason we seem to need to allow a select subset
463        // of unescaped operators in regex words.
464        rule regex_word() -> ast::Word =
465            value:$((!specific_word("]]") regex_word_piece())+) {
466                ast::Word { value }
467            }
468
469        rule regex_word_piece() =
470            word() {} /
471            specific_operator("|") {} /
472            specific_operator("(") parenthesized_regex_word()* specific_operator(")") {}
473
474        rule parenthesized_regex_word() =
475            regex_word_piece() /
476            !specific_operator(")") !specific_operator("]]") [_]
477
478        rule name() -> &'input str =
479            w:[Token::Word(_, _)] { w.to_str() }
480
481        rule _in() -> () =
482            specific_word("in") { }
483
484        rule wordlist() -> Vec<ast::Word> =
485            (w:word() { ast::Word::from(w) })+
486
487        pub(crate) rule case_clause() -> ast::CaseClauseCommand =
488            specific_word("case") w:word() linebreak() _in() linebreak() first_items:case_item()* last_item:case_item_ns()? specific_word("esac") {
489                let mut cases = first_items;
490
491                if let Some(last_item) = last_item {
492                    cases.push(last_item);
493                }
494
495                ast::CaseClauseCommand { value: ast::Word::from(w), cases }
496            }
497
498        pub(crate) rule case_item_ns() -> ast::CaseItem =
499            specific_operator("(")? p:pattern() specific_operator(")") c:compound_list() {
500                ast::CaseItem { patterns: p, cmd: Some(c), post_action: ast::CaseItemPostAction::ExitCase }
501            } /
502            specific_operator("(")? p:pattern() specific_operator(")") linebreak() {
503                ast::CaseItem { patterns: p, cmd: None, post_action: ast::CaseItemPostAction::ExitCase }
504            }
505
506        pub(crate) rule case_item() -> ast::CaseItem =
507            specific_operator("(")? p:pattern() specific_operator(")") linebreak() post_action:case_item_post_action() linebreak() {
508                ast::CaseItem { patterns: p, cmd: None, post_action }
509            } /
510            specific_operator("(")? p:pattern() specific_operator(")") c:compound_list() post_action:case_item_post_action() linebreak() {
511                ast::CaseItem { patterns: p, cmd: Some(c), post_action }
512            }
513
514        rule case_item_post_action() -> ast::CaseItemPostAction =
515            specific_operator(";;") {
516                ast::CaseItemPostAction::ExitCase
517            } /
518            non_posix_extensions_enabled() specific_operator(";;&") {
519                ast::CaseItemPostAction::ContinueEvaluatingCases
520            } /
521            non_posix_extensions_enabled() specific_operator(";&") {
522                ast::CaseItemPostAction::UnconditionallyExecuteNextCaseItem
523            }
524
525        rule pattern() -> Vec<ast::Word> =
526            (w:word() { ast::Word::from(w) }) ++ specific_operator("|")
527
528        rule if_clause() -> ast::IfClauseCommand =
529            specific_word("if") condition:compound_list() specific_word("then") then:compound_list() elses:else_part()? specific_word("fi") {
530                ast::IfClauseCommand {
531                    condition,
532                    then,
533                    elses,
534                }
535            }
536
537        rule else_part() -> Vec<ast::ElseClause> =
538            cs:_conditional_else_part()+ u:_unconditional_else_part()? {
539                let mut parts = vec![];
540                for c in cs {
541                    parts.push(c);
542                }
543
544                if let Some(uncond) = u {
545                    parts.push(uncond);
546                }
547
548                parts
549            } /
550            e:_unconditional_else_part() { vec![e] }
551
552        rule _conditional_else_part() -> ast::ElseClause =
553            specific_word("elif") condition:compound_list() specific_word("then") body:compound_list() {
554                ast::ElseClause { condition: Some(condition), body }
555            }
556
557        rule _unconditional_else_part() -> ast::ElseClause =
558            specific_word("else") body:compound_list() {
559                ast::ElseClause { condition: None, body }
560             }
561
562        rule while_clause() -> ast::WhileOrUntilClauseCommand =
563            specific_word("while") c:compound_list() d:do_group() { ast::WhileOrUntilClauseCommand(c, d) }
564
565        rule until_clause() -> ast::WhileOrUntilClauseCommand =
566            specific_word("until") c:compound_list() d:do_group() { ast::WhileOrUntilClauseCommand(c, d) }
567
568        // N.B. Non-sh extensions allows use of the 'function' word to indicate a function definition.
569        rule function_definition() -> ast::FunctionDefinition =
570            specific_word("function")? fname:fname() body:function_parens_and_body() {
571                ast::FunctionDefinition { fname: fname.to_owned(), body, source: source_info.source.clone() }
572            } /
573            specific_word("function") fname:fname() linebreak() body:function_body() {
574                ast::FunctionDefinition { fname: fname.to_owned(), body, source: source_info.source.clone() }
575            } /
576            expected!("function definition")
577
578        pub(crate) rule function_parens_and_body() -> ast::FunctionBody =
579            specific_operator("(") specific_operator(")") linebreak() body:function_body() { body }
580
581        rule function_body() -> ast::FunctionBody =
582            c:compound_command() r:redirect_list()? { ast::FunctionBody(c, r) }
583
584        rule fname() -> &'input str =
585            // Special-case: don't allow it to end with an equals sign, to avoid the challenge of
586            // misinterpreting certain declaration assignments as function definitions.
587            // TODO: Find a way to make this still work without requiring this targeted exception.
588            w:[Token::Word(word, _) if !word.ends_with('=')] { w.to_str() }
589
590        rule brace_group() -> ast::BraceGroupCommand =
591            specific_word("{") c:compound_list() specific_word("}") { ast::BraceGroupCommand(c) }
592
593        rule do_group() -> ast::DoGroupCommand =
594            specific_word("do") c:compound_list() specific_word("done") { ast::DoGroupCommand(c) }
595
596        rule simple_command() -> ast::SimpleCommand =
597            prefix:cmd_prefix() word_and_suffix:(word_or_name:cmd_word() suffix:cmd_suffix()? { (word_or_name, suffix) })? {
598                match word_and_suffix {
599                    Some((word_or_name, suffix)) => {
600                        ast::SimpleCommand { prefix: Some(prefix), word_or_name: Some(ast::Word::from(word_or_name)), suffix }
601                    }
602                    None => {
603                        ast::SimpleCommand { prefix: Some(prefix), word_or_name: None, suffix: None }
604                    }
605                }
606            } /
607            word_or_name:cmd_name() suffix:cmd_suffix()? {
608                ast::SimpleCommand { prefix: None, word_or_name: Some(ast::Word::from(word_or_name)), suffix } } /
609            expected!("simple command")
610
611        rule cmd_name() -> &'input Token =
612            non_reserved_word()
613
614        rule cmd_word() -> &'input Token =
615            !assignment_word() w:non_reserved_word() { w }
616
617        rule cmd_prefix() -> ast::CommandPrefix =
618            p:(
619                i:io_redirect() { ast::CommandPrefixOrSuffixItem::IoRedirect(i) } /
620                assignment_and_word:assignment_word() {
621                    let (assignment, word) = assignment_and_word;
622                    ast::CommandPrefixOrSuffixItem::AssignmentWord(assignment, word)
623                }
624            )+ { ast::CommandPrefix(p) }
625
626        rule cmd_suffix() -> ast::CommandSuffix =
627            s:(
628                non_posix_extensions_enabled() sub:process_substitution() {
629                    let (kind, subshell) = sub;
630                    ast::CommandPrefixOrSuffixItem::ProcessSubstitution(kind, subshell)
631                } /
632                i:io_redirect() {
633                    ast::CommandPrefixOrSuffixItem::IoRedirect(i)
634                } /
635                assignment_and_word:assignment_word() {
636                    let (assignment, word) = assignment_and_word;
637                    ast::CommandPrefixOrSuffixItem::AssignmentWord(assignment, word)
638                } /
639                w:word() {
640                    ast::CommandPrefixOrSuffixItem::Word(ast::Word::from(w))
641                }
642            )+ { ast::CommandSuffix(s) }
643
644        rule redirect_list() -> ast::RedirectList =
645            r:io_redirect()+ { ast::RedirectList(r) } /
646            expected!("redirect list")
647
648        // N.B. here strings are extensions to the POSIX standard.
649        rule io_redirect() -> ast::IoRedirect =
650            n:io_number()? f:io_file() {
651                    let (kind, target) = f;
652                    ast::IoRedirect::File(n, kind, target)
653                } /
654            non_posix_extensions_enabled() specific_operator("&>>") target:filename() { ast::IoRedirect::OutputAndError(ast::Word::from(target), true) } /
655            non_posix_extensions_enabled() specific_operator("&>") target:filename() { ast::IoRedirect::OutputAndError(ast::Word::from(target), false) } /
656            non_posix_extensions_enabled() n:io_number()? specific_operator("<<<") w:word() { ast::IoRedirect::HereString(n, ast::Word::from(w)) } /
657            n:io_number()? h:io_here() { ast::IoRedirect::HereDocument(n, h) } /
658            expected!("I/O redirect")
659
660        // N.B. Process substitution forms are extensions to the POSIX standard.
661        rule io_file() -> (ast::IoFileRedirectKind, ast::IoFileRedirectTarget) =
662            specific_operator("<")  f:io_filename() { (ast::IoFileRedirectKind::Read, f) } /
663            specific_operator("<&") f:io_fd_duplication_source() { (ast::IoFileRedirectKind::DuplicateInput, f) } /
664            specific_operator(">")  f:io_filename() { (ast::IoFileRedirectKind::Write, f) } /
665            specific_operator(">&") f:io_fd_duplication_source() { (ast::IoFileRedirectKind::DuplicateOutput, f) } /
666            specific_operator(">>") f:io_filename() { (ast::IoFileRedirectKind::Append, f) } /
667            specific_operator("<>") f:io_filename() { (ast::IoFileRedirectKind::ReadAndWrite, f) } /
668            specific_operator(">|") f:io_filename() { (ast::IoFileRedirectKind::Clobber, f) }
669
670        rule io_fd_duplication_source() -> ast::IoFileRedirectTarget =
671            w:word() { ast::IoFileRedirectTarget::Duplicate(ast::Word::from(w)) }
672
673        rule io_fd() -> u32 =
674            w:[Token::Word(_, _)] {? w.to_str().parse().or(Err("io_fd u32")) }
675
676        rule io_filename() -> ast::IoFileRedirectTarget =
677            non_posix_extensions_enabled() sub:process_substitution() {
678                let (kind, subshell) = sub;
679                ast::IoFileRedirectTarget::ProcessSubstitution(kind, subshell)
680            } /
681            f:filename() { ast::IoFileRedirectTarget::Filename(ast::Word::from(f)) }
682
683        rule filename() -> &'input Token =
684            word()
685
686        pub(crate) rule io_here() -> ast::IoHereDocument =
687           specific_operator("<<-") here_tag:here_tag() doc:[_] closing_tag:here_tag() {
688                let requires_expansion = !here_tag.to_str().contains(['\'', '"', '\\']);
689                ast::IoHereDocument {
690                    remove_tabs: true,
691                    requires_expansion,
692                    here_end: ast::Word::from(here_tag),
693                    doc: ast::Word::from(doc)
694                }
695            } /
696            specific_operator("<<") here_tag:here_tag() doc:[_] closing_tag:here_tag() {
697                let requires_expansion = !here_tag.to_str().contains(['\'', '"', '\\']);
698                ast::IoHereDocument {
699                    remove_tabs: false,
700                    requires_expansion,
701                    here_end: ast::Word::from(here_tag),
702                    doc: ast::Word::from(doc)
703                }
704            }
705
706        rule here_tag() -> &'input Token =
707            word()
708
709        rule process_substitution() -> (ast::ProcessSubstitutionKind, ast::SubshellCommand) =
710            specific_operator("<") s:subshell() { (ast::ProcessSubstitutionKind::Read, s) } /
711            specific_operator(">") s:subshell() { (ast::ProcessSubstitutionKind::Write, s) }
712
713        rule newline_list() -> () =
714            newline()+ {}
715
716        rule linebreak() -> () =
717            quiet! {
718                newline()* {}
719            }
720
721        rule separator_op() -> ast::SeparatorOperator =
722            specific_operator("&") { ast::SeparatorOperator::Async } /
723            specific_operator(";") { ast::SeparatorOperator::Sequence }
724
725        rule separator() -> Option<ast::SeparatorOperator> =
726            s:separator_op() linebreak() { Some(s) } /
727            newline_list() { None }
728
729        rule sequential_sep() -> () =
730            specific_operator(";") linebreak() /
731            newline_list()
732
733        //
734        // Token interpretation
735        //
736
737        rule non_reserved_word() -> &'input Token =
738            !reserved_word() w:word() { w }
739
740        rule word() -> &'input Token =
741            [Token::Word(_, _)]
742
743        rule reserved_word() -> &'input Token =
744            [Token::Word(w, _) if matches!(w.as_str(),
745                "!" |
746                "{" |
747                "}" |
748                "case" |
749                "do" |
750                "done" |
751                "elif" |
752                "else" |
753                "esac" |
754                "fi" |
755                "for" |
756                "if" |
757                "in" |
758                "then" |
759                "until" |
760                "while"
761            )] /
762
763            // N.B. bash also treats the following as reserved.
764            non_posix_extensions_enabled() token:non_posix_reserved_word_token() { token }
765
766        rule non_posix_reserved_word_token() -> &'input Token =
767            specific_word("[[") /
768            specific_word("]]") /
769            specific_word("function") /
770            specific_word("select")
771
772        rule newline() -> () = quiet! {
773            specific_operator("\n") {}
774        }
775
776        pub(crate) rule assignment_word() -> (ast::Assignment, ast::Word) =
777            non_posix_extensions_enabled() [Token::Word(w, _)] specific_operator("(") elements:array_elements() specific_operator(")") {?
778                let parsed = parse_array_assignment(w.as_str(), elements.as_slice())?;
779
780                let mut all_as_word = w.to_owned();
781                all_as_word.push('(');
782                for (i, e) in elements.iter().enumerate() {
783                    if i > 0 {
784                        all_as_word.push(' ');
785                    }
786                    all_as_word.push_str(e);
787                }
788                all_as_word.push(')');
789
790                Ok((parsed, ast::Word { value: all_as_word }))
791            } /
792            [Token::Word(w, _)] {?
793                let parsed = parse_assignment_word(w.as_str())?;
794                Ok((parsed, ast::Word { value: w.to_owned() }))
795            }
796
797        rule array_elements() -> Vec<&'input String> =
798             linebreak() e:array_element()* { e }
799
800        rule array_element() -> &'input String =
801            linebreak() [Token::Word(e, _)] linebreak() { e }
802
803        // N.B. An I/O number must be a string of only digits, and it must be
804        // followed by a '<' or '>' character (but not consume them). We also
805        // need to make sure that there was no space between the number and the
806        // redirection operator; unfortunately we don't have the space anymore
807        // but we can infer it by looking at the tokens' locations.
808        rule io_number() -> u32 =
809            [Token::Word(w, num_loc) if w.chars().all(|c: char| c.is_ascii_digit())]
810            &([Token::Operator(o, redir_loc) if
811                    o.starts_with(['<', '>']) &&
812                    locations_are_contiguous(num_loc, redir_loc)]) {
813
814                w.parse().unwrap()
815            }
816
817        //
818        // Helpers
819        //
820        rule specific_operator(expected: &str) -> &'input Token =
821            [Token::Operator(w, _) if w.as_str() == expected]
822
823        rule specific_word(expected: &str) -> &'input Token =
824            [Token::Word(w, _) if w.as_str() == expected]
825
826        rule non_posix_extensions_enabled() -> () =
827            &[_] {? if !parser_options.sh_mode { Ok(()) } else { Err("posix") } }
828    }
829}
830
831peg::parser! {
832    grammar assignments() for str {
833        pub(crate) rule name_and_scalar_value() -> ast::Assignment =
834            nae:name_and_equals() value:scalar_value() {
835                let (name, append) = nae;
836                ast::Assignment { name, value, append }
837            }
838
839        pub(crate) rule name_and_equals() -> (ast::AssignmentName, bool) =
840            name:name() append:("+"?) "=" {
841                (name, append.is_some())
842            }
843
844        pub(crate) rule literal_array_element() -> (Option<String>, String) =
845            "[" inner:$((!"]" [_])*) "]=" value:$([_]*) {
846                (Some(inner.to_owned()), value.to_owned())
847            } /
848            value:$([_]+) {
849                (None, value.to_owned())
850            }
851
852        rule name() -> ast::AssignmentName =
853            aen:array_element_name() {
854                let (name, index) = aen;
855                ast::AssignmentName::ArrayElementName(name.to_owned(), index.to_owned())
856            } /
857            name:scalar_name() {
858                ast::AssignmentName::VariableName(name.to_owned())
859            }
860
861        rule array_element_name() -> (&'input str, &'input str) =
862            name:scalar_name() "[" ai:array_index() "]" { (name, ai) }
863
864        rule array_index() -> &'input str =
865            $((![']'] [_])*)
866
867        rule scalar_name() -> &'input str =
868            $(alpha_or_underscore() non_first_variable_char()*)
869
870        rule non_first_variable_char() -> () =
871            ['_' | '0'..='9' | 'a'..='z' | 'A'..='Z'] {}
872
873        rule alpha_or_underscore() -> () =
874            ['_' | 'a'..='z' | 'A'..='Z'] {}
875
876        rule scalar_value() -> ast::AssignmentValue =
877            v:$([_]*) { ast::AssignmentValue::Scalar(ast::Word { value: v.to_owned() }) }
878    }
879}
880
881fn parse_assignment_word(word: &str) -> Result<ast::Assignment, &'static str> {
882    let parse_result = assignments::name_and_scalar_value(word);
883    parse_result.map_err(|_| "not assignment word")
884}
885
886// add `2>&1` to the command if the pipeline is `|&`
887fn add_pipe_extension_redirection(c: &mut ast::Command) -> Result<(), &'static str> {
888    fn add_to_redirect_list(l: &mut Option<ast::RedirectList>, r: ast::IoRedirect) {
889        if let Some(l) = l {
890            l.0.push(r);
891        } else {
892            let v = vec![r];
893            *l = Some(ast::RedirectList(v));
894        }
895    }
896
897    let r = ast::IoRedirect::File(
898        Some(2),
899        ast::IoFileRedirectKind::DuplicateOutput,
900        ast::IoFileRedirectTarget::Fd(1),
901    );
902
903    match c {
904        ast::Command::Simple(c) => {
905            let r = ast::CommandPrefixOrSuffixItem::IoRedirect(r);
906            if let Some(l) = &mut c.suffix {
907                l.0.push(r);
908            } else {
909                c.suffix = Some(ast::CommandSuffix(vec![r]));
910            }
911        }
912        ast::Command::Compound(_, l) => add_to_redirect_list(l, r),
913        ast::Command::Function(f) => add_to_redirect_list(&mut f.body.1, r),
914        ast::Command::ExtendedTest(_) => return Err("|& unimplemented for extended tests"),
915    }
916
917    Ok(())
918}
919
920const fn locations_are_contiguous(
921    loc_left: &crate::TokenLocation,
922    loc_right: &crate::TokenLocation,
923) -> bool {
924    loc_left.end.index == loc_right.start.index
925}
926
927fn parse_array_assignment(
928    word: &str,
929    elements: &[&String],
930) -> Result<ast::Assignment, &'static str> {
931    let (assignment_name, append) =
932        assignments::name_and_equals(word).map_err(|_| "not array assignment word")?;
933
934    let elements = elements
935        .iter()
936        .map(|element| assignments::literal_array_element(element))
937        .collect::<Result<Vec<_>, _>>()
938        .map_err(|_| "invalid array element in literal")?;
939
940    let elements_as_words = elements
941        .into_iter()
942        .map(|(key, value)| {
943            (
944                key.map(|k| ast::Word::new(k.as_str())),
945                ast::Word::new(value.as_str()),
946            )
947        })
948        .collect();
949
950    Ok(ast::Assignment {
951        name: assignment_name,
952        value: ast::AssignmentValue::Array(elements_as_words),
953        append,
954    })
955}
956
957#[cfg(test)]
958mod tests {
959
960    use super::*;
961    use crate::tokenizer::tokenize_str;
962    use anyhow::Result;
963    use insta::assert_ron_snapshot;
964
965    #[derive(serde::Serialize)]
966    struct ParseResult<'a, T> {
967        input: &'a str,
968        result: &'a T,
969    }
970
971    #[test]
972    fn parse_case() -> Result<()> {
973        let input = r"\
974case x in
975x)
976    echo y;;
977esac\
978";
979
980        let tokens = tokenize_str(input)?;
981        let command = super::token_parser::case_clause(
982            &Tokens {
983                tokens: tokens.as_slice(),
984            },
985            &ParserOptions::default(),
986            &SourceInfo::default(),
987        )?;
988
989        assert_ron_snapshot!(ParseResult {
990            input,
991            result: &command
992        });
993
994        Ok(())
995    }
996
997    #[test]
998    fn parse_case_ns() -> Result<()> {
999        let input = r"\
1000case x in
1001x)
1002    echo y
1003esac\
1004";
1005
1006        let tokens = tokenize_str(input)?;
1007        let command = super::token_parser::case_clause(
1008            &Tokens {
1009                tokens: tokens.as_slice(),
1010            },
1011            &ParserOptions::default(),
1012            &SourceInfo::default(),
1013        )?;
1014
1015        assert_ron_snapshot!(ParseResult {
1016            input,
1017            result: &command
1018        });
1019
1020        Ok(())
1021    }
1022
1023    #[test]
1024    fn parse_arith_and_non_arith_parens() -> Result<()> {
1025        let input = r"( : && ( (( 0 )) || : ) )";
1026
1027        let tokens = tokenize_str(input)?;
1028        let result = super::token_parser::program(
1029            &Tokens {
1030                tokens: tokens.as_slice(),
1031            },
1032            &ParserOptions::default(),
1033            &SourceInfo::default(),
1034        )?;
1035
1036        assert_ron_snapshot!(ParseResult {
1037            input,
1038            result: &result
1039        });
1040
1041        Ok(())
1042    }
1043
1044    #[test]
1045    fn parse_redirection() -> Result<()> {
1046        let input = r"echo |& wc";
1047
1048        let tokens = tokenize_str(input)?;
1049        let seq = super::token_parser::pipe_sequence(
1050            &Tokens {
1051                tokens: tokens.as_slice(),
1052            },
1053            &ParserOptions::default(),
1054            &SourceInfo::default(),
1055        )?;
1056
1057        assert_ron_snapshot!(ParseResult {
1058            input,
1059            result: &seq
1060        });
1061
1062        Ok(())
1063    }
1064
1065    #[test]
1066    fn parse_here_doc_with_no_trailing_newline() -> Result<()> {
1067        let input = r"cat <<EOF
1068Something
1069EOF";
1070
1071        let tokens = tokenize_str(input)?;
1072        let result = super::token_parser::program(
1073            &Tokens {
1074                tokens: tokens.as_slice(),
1075            },
1076            &ParserOptions::default(),
1077            &SourceInfo::default(),
1078        )?;
1079
1080        assert_ron_snapshot!(ParseResult {
1081            input,
1082            result: &result
1083        });
1084
1085        Ok(())
1086    }
1087
1088    #[test]
1089    fn parse_function_with_pipe_redirection() -> Result<()> {
1090        let inputs = [r"foo() { echo 1; } 2>&1 | cat", r"foo() { echo 1; } |& cat"];
1091
1092        for input in inputs {
1093            let tokens = tokenize_str(input)?;
1094            let seq = super::token_parser::pipe_sequence(
1095                &Tokens {
1096                    tokens: tokens.as_slice(),
1097                },
1098                &ParserOptions::default(),
1099                &SourceInfo::default(),
1100            )?;
1101
1102            assert_ron_snapshot!(ParseResult {
1103                input,
1104                result: &seq
1105            });
1106        }
1107
1108        Ok(())
1109    }
1110
1111    #[test]
1112    fn test_parse_program() -> Result<()> {
1113        let input = r#"
1114
1115#!/usr/bin/env bash
1116
1117for f in A B C; do
1118
1119    # sdfsdf
1120    echo "${f@L}" >&2
1121
1122   done
1123
1124"#;
1125
1126        let tokens = tokenize_str(input)?;
1127        let result = super::token_parser::program(
1128            &Tokens {
1129                tokens: tokens.as_slice(),
1130            },
1131            &ParserOptions::default(),
1132            &SourceInfo::default(),
1133        )?;
1134
1135        assert_ron_snapshot!(ParseResult {
1136            input,
1137            result: &result
1138        });
1139
1140        Ok(())
1141    }
1142}