1use crate::ast::{self, SeparatorOperator};
2use crate::error;
3use crate::tokenizer::{Token, TokenEndReason, Tokenizer, TokenizerOptions, Tokens};
4
5#[derive(Clone, Eq, Hash, PartialEq)]
7pub struct ParserOptions {
8 pub enable_extended_globbing: bool,
10 pub posix_mode: bool,
12 pub sh_mode: bool,
14 pub tilde_expansion: bool,
16}
17
18impl Default for ParserOptions {
19 fn default() -> Self {
20 Self {
21 enable_extended_globbing: true,
22 posix_mode: false,
23 sh_mode: false,
24 tilde_expansion: true,
25 }
26 }
27}
28
29impl ParserOptions {
30 pub fn tokenizer_options(&self) -> TokenizerOptions {
32 TokenizerOptions {
33 enable_extended_globbing: self.enable_extended_globbing,
34 posix_mode: self.posix_mode,
35 sh_mode: self.sh_mode,
36 }
37 }
38}
39
40pub struct Parser<R> {
42 reader: R,
43 options: ParserOptions,
44 source_info: SourceInfo,
45}
46
47impl<R: std::io::BufRead> Parser<R> {
48 pub fn new(reader: R, options: &ParserOptions, source_info: &SourceInfo) -> Self {
56 Parser {
57 reader,
58 options: options.clone(),
59 source_info: source_info.clone(),
60 }
61 }
62
63 pub fn parse_program(&mut self) -> Result<ast::Program, error::ParseError> {
65 let tokens = self.tokenize()?;
74 parse_tokens(&tokens, &self.options, &self.source_info)
75 }
76
77 pub fn parse_function_parens_and_body(
80 &mut self,
81 ) -> Result<ast::FunctionBody, error::ParseError> {
82 let tokens = self.tokenize()?;
83 let parse_result = token_parser::function_parens_and_body(
84 &Tokens { tokens: &tokens },
85 &self.options,
86 &self.source_info,
87 );
88 parse_result_to_error(parse_result, &tokens)
89 }
90
91 fn tokenize(&mut self) -> Result<Vec<Token>, error::ParseError> {
92 let mut tokenizer = Tokenizer::new(&mut self.reader, &self.options.tokenizer_options());
94
95 tracing::debug!(target: "tokenize", "Tokenizing...");
96
97 let mut tokens = vec![];
98 loop {
99 let result = match tokenizer.next_token() {
100 Ok(result) => result,
101 Err(e) => {
102 return Err(error::ParseError::Tokenizing {
103 inner: e,
104 position: tokenizer.current_location(),
105 });
106 }
107 };
108
109 let reason = result.reason;
110 if let Some(token) = result.token {
111 tracing::debug!(target: "tokenize", "TOKEN {}: {:?} {reason:?}", tokens.len(), token);
112 tokens.push(token);
113 }
114
115 if matches!(reason, TokenEndReason::EndOfInput) {
116 break;
117 }
118 }
119
120 tracing::debug!(target: "tokenize", " => {} token(s)", tokens.len());
121
122 Ok(tokens)
123 }
124}
125
126pub fn parse_tokens(
134 tokens: &Vec<Token>,
135 options: &ParserOptions,
136 source_info: &SourceInfo,
137) -> Result<ast::Program, error::ParseError> {
138 let parse_result = token_parser::program(&Tokens { tokens }, options, source_info);
139 parse_result_to_error(parse_result, tokens)
140}
141
142fn parse_result_to_error<R>(
143 parse_result: Result<R, peg::error::ParseError<usize>>,
144 tokens: &Vec<Token>,
145) -> Result<R, error::ParseError>
146where
147 R: std::fmt::Debug,
148{
149 let result = match parse_result {
150 Ok(program) => {
151 tracing::debug!(target: "parse", "PROG: {:?}", program);
152 Ok(program)
153 }
154 Err(parse_error) => {
155 tracing::debug!(target: "parse", "Parse error: {:?}", parse_error);
156 Err(error::convert_peg_parse_error(
157 &parse_error,
158 tokens.as_slice(),
159 ))
160 }
161 };
162
163 result
164}
165
166impl peg::Parse for Tokens<'_> {
167 type PositionRepr = usize;
168
169 #[inline]
170 fn start(&self) -> usize {
171 0
172 }
173
174 #[inline]
175 fn is_eof(&self, p: usize) -> bool {
176 p >= self.tokens.len()
177 }
178
179 #[inline]
180 fn position_repr(&self, p: usize) -> Self::PositionRepr {
181 p
182 }
183}
184
185impl<'a> peg::ParseElem<'a> for Tokens<'a> {
186 type Element = &'a Token;
187
188 #[inline]
189 fn parse_elem(&'a self, pos: usize) -> peg::RuleResult<Self::Element> {
190 match self.tokens.get(pos) {
191 Some(c) => peg::RuleResult::Matched(pos + 1, c),
192 None => peg::RuleResult::Failed,
193 }
194 }
195}
196
197impl<'a> peg::ParseSlice<'a> for Tokens<'a> {
198 type Slice = String;
199
200 fn parse_slice(&'a self, start: usize, end: usize) -> Self::Slice {
201 let mut result = String::new();
202 let mut last_token_was_word = false;
203
204 for token in &self.tokens[start..end] {
205 match token {
206 Token::Operator(s, _) => {
207 result.push_str(s);
208 last_token_was_word = false;
209 }
210 Token::Word(s, _) => {
211 if last_token_was_word {
213 result.push(' ');
214 }
215
216 result.push_str(s);
217 last_token_was_word = true;
218 }
219 }
220 }
221
222 result
223 }
224}
225
226#[derive(Clone, Default)]
228pub struct SourceInfo {
229 pub source: String,
231}
232
233peg::parser! {
234 grammar token_parser<'a>(parser_options: &ParserOptions, source_info: &SourceInfo) for Tokens<'a> {
235 pub(crate) rule program() -> ast::Program =
236 linebreak() c:complete_commands() linebreak() { ast::Program { complete_commands: c } } /
237 linebreak() { ast::Program { complete_commands: vec![] } }
238
239 rule complete_commands() -> Vec<ast::CompleteCommand> =
240 c:complete_command() ++ newline_list()
241
242 rule complete_command() -> ast::CompleteCommand =
243 first:and_or() remainder:(s:separator_op() l:and_or() { (s, l) })* last_sep:separator_op()? {
244 let mut and_ors = vec![first];
245 let mut seps = vec![];
246
247 for (sep, ao) in remainder {
248 seps.push(sep);
249 and_ors.push(ao);
250 }
251
252 seps.push(last_sep.unwrap_or(SeparatorOperator::Sequence));
254
255 let mut items = vec![];
256 for (i, ao) in and_ors.into_iter().enumerate() {
257 items.push(ast::CompoundListItem(ao, seps[i].clone()));
258 }
259
260 ast::CompoundList(items)
261 }
262
263 rule and_or() -> ast::AndOrList =
264 first:pipeline() additional:_and_or_item()* { ast::AndOrList { first, additional } }
265
266 rule _and_or_item() -> ast::AndOr =
267 op:_and_or_op() linebreak() p:pipeline() { op(p) }
268
269 rule _and_or_op() -> fn(ast::Pipeline) -> ast::AndOr =
270 specific_operator("&&") { ast::AndOr::And } /
271 specific_operator("||") { ast::AndOr::Or }
272
273 rule pipeline() -> ast::Pipeline =
274 timed:pipeline_timed()? bang:bang()? seq:pipe_sequence() { ast::Pipeline { timed, bang: bang.is_some(), seq } }
275
276 rule pipeline_timed() -> ast::PipelineTimed =
277 non_posix_extensions_enabled() specific_word("time") posix_output:specific_word("-p")? {
278 if posix_output.is_some() {
279 ast::PipelineTimed::TimedWithPosixOutput
280 } else {
281 ast::PipelineTimed::Timed
282 }
283 }
284
285 rule bang() -> bool = specific_word("!") { true }
286
287 pub(crate) rule pipe_sequence() -> Vec<ast::Command> =
288 c:(c:command() r:&pipe_extension_redirection()? {? let mut c = c;
290 if r.is_some() {
291 add_pipe_extension_redirection(&mut c)?;
292 }
293 Ok(c)
294 }) ++ (pipe_operator() linebreak()) {
295 c
296 }
297 rule pipe_operator() =
298 specific_operator("|") /
299 pipe_extension_redirection()
300
301 rule pipe_extension_redirection() -> &'input Token =
302 non_posix_extensions_enabled() p:specific_operator("|&") { p }
303
304 rule command() -> ast::Command =
306 f:function_definition() { ast::Command::Function(f) } /
307 c:simple_command() { ast::Command::Simple(c) } /
308 c:compound_command() r:redirect_list()? { ast::Command::Compound(c, r) } /
309 non_posix_extensions_enabled() c:extended_test_command() { ast::Command::ExtendedTest(c) } /
311 expected!("command")
312
313 pub(crate) rule compound_command() -> ast::CompoundCommand =
316 non_posix_extensions_enabled() a:arithmetic_command() { ast::CompoundCommand::Arithmetic(a) } /
317 b:brace_group() { ast::CompoundCommand::BraceGroup(b) } /
318 s:subshell() { ast::CompoundCommand::Subshell(s) } /
319 f:for_clause() { ast::CompoundCommand::ForClause(f) } /
320 c:case_clause() { ast::CompoundCommand::CaseClause(c) } /
321 i:if_clause() { ast::CompoundCommand::IfClause(i) } /
322 w:while_clause() { ast::CompoundCommand::WhileClause(w) } /
323 u:until_clause() { ast::CompoundCommand::UntilClause(u) } /
324 non_posix_extensions_enabled() c:arithmetic_for_clause() { ast::CompoundCommand::ArithmeticForClause(c) } /
325 expected!("compound command")
326
327 pub(crate) rule arithmetic_command() -> ast::ArithmeticCommand =
328 specific_operator("(") specific_operator("(") expr:arithmetic_expression() specific_operator(")") specific_operator(")") {
329 ast::ArithmeticCommand { expr }
330 }
331
332 pub(crate) rule arithmetic_expression() -> ast::UnexpandedArithmeticExpr =
333 raw_expr:$(arithmetic_expression_piece()*) { ast::UnexpandedArithmeticExpr { value: raw_expr } }
334
335 rule arithmetic_expression_piece() =
336 specific_operator("(") (!specific_operator(")") arithmetic_expression_piece())* specific_operator(")") {} /
337 !arithmetic_end() [_] {}
338
339 rule arithmetic_end() -> () =
341 specific_operator(")") specific_operator(")") {} /
342 specific_operator(";") {}
343
344 rule subshell() -> ast::SubshellCommand =
345 specific_operator("(") c:compound_list() specific_operator(")") { ast::SubshellCommand(c) }
346
347 rule compound_list() -> ast::CompoundList =
348 linebreak() first:and_or() remainder:(s:separator() l:and_or() { (s, l) })* last_sep:separator()? {
349 let mut and_ors = vec![first];
350 let mut seps = vec![];
351
352 for (sep, ao) in remainder {
353 seps.push(sep.unwrap_or(SeparatorOperator::Sequence));
354 and_ors.push(ao);
355 }
356
357 let last_sep = last_sep.unwrap_or(None);
359 seps.push(last_sep.unwrap_or(SeparatorOperator::Sequence));
360
361 let mut items = vec![];
362 for (i, ao) in and_ors.into_iter().enumerate() {
363 items.push(ast::CompoundListItem(ao, seps[i].clone()));
364 }
365
366 ast::CompoundList(items)
367 }
368
369 rule for_clause() -> ast::ForClauseCommand =
370 specific_word("for") n:name() linebreak() _in() w:wordlist()? sequential_sep() d:do_group() {
371 ast::ForClauseCommand { variable_name: n.to_owned(), values: w, body: d }
372 } /
373 specific_word("for") n:name() sequential_sep()? d:do_group() {
374 ast::ForClauseCommand { variable_name: n.to_owned(), values: None, body: d }
375 }
376
377 rule arithmetic_for_clause() -> ast::ArithmeticForClauseCommand =
379 specific_word("for")
380 specific_operator("(") specific_operator("(")
381 initializer:arithmetic_expression()? specific_operator(";")
382 condition:arithmetic_expression()? specific_operator(";")
383 updater:arithmetic_expression()?
384 specific_operator(")") specific_operator(")")
385 sequential_sep()
386 body:do_group() {
387 ast::ArithmeticForClauseCommand { initializer, condition, updater, body }
388 }
389
390 rule extended_test_command() -> ast::ExtendedTestExpr =
391 specific_word("[[") linebreak() e:extended_test_expression() linebreak() specific_word("]]") { e }
392
393 rule extended_test_expression() -> ast::ExtendedTestExpr = precedence! {
394 left:(@) linebreak() specific_operator("||") linebreak() right:@ { ast::ExtendedTestExpr::Or(Box::from(left), Box::from(right)) }
395 --
396 left:(@) linebreak() specific_operator("&&") linebreak() right:@ { ast::ExtendedTestExpr::And(Box::from(left), Box::from(right)) }
397 --
398 specific_word("!") e:@ { ast::ExtendedTestExpr::Not(Box::from(e)) }
399 --
400 specific_operator("(") e:extended_test_expression() specific_operator(")") { ast::ExtendedTestExpr::Parenthesized(Box::from(e)) }
401 --
402 left:word() specific_word("-eq") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::ArithmeticEqualTo, ast::Word::from(left), ast::Word::from(right)) }
404 left:word() specific_word("-ne") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::ArithmeticNotEqualTo, ast::Word::from(left), ast::Word::from(right)) }
405 left:word() specific_word("-lt") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::ArithmeticLessThan, ast::Word::from(left), ast::Word::from(right)) }
406 left:word() specific_word("-le") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::ArithmeticLessThanOrEqualTo, ast::Word::from(left), ast::Word::from(right)) }
407 left:word() specific_word("-gt") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::ArithmeticGreaterThan, ast::Word::from(left), ast::Word::from(right)) }
408 left:word() specific_word("-ge") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::ArithmeticGreaterThanOrEqualTo, ast::Word::from(left), ast::Word::from(right)) }
409 left:word() specific_word("-ef") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::FilesReferToSameDeviceAndInodeNumbers, ast::Word::from(left), ast::Word::from(right)) }
411 left:word() specific_word("-nt") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::LeftFileIsNewerOrExistsWhenRightDoesNot, ast::Word::from(left), ast::Word::from(right)) }
412 left:word() specific_word("-ot") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::LeftFileIsOlderOrDoesNotExistWhenRightDoes, ast::Word::from(left), ast::Word::from(right)) }
413 left:word() (specific_word("==") / specific_word("=")) right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::StringExactlyMatchesPattern, ast::Word::from(left), ast::Word::from(right)) }
414 left:word() specific_word("!=") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::StringDoesNotExactlyMatchPattern, ast::Word::from(left), ast::Word::from(right)) }
415 left:word() specific_word("=~") right:regex_word() {
416 if right.value.starts_with(['\'', '\"']) {
417 ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::StringContainsSubstring, ast::Word::from(left), right)
419 } else {
420 ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::StringMatchesRegex, ast::Word::from(left), right)
421 }
422 }
423 left:word() specific_operator("<") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::LeftSortsBeforeRight, ast::Word::from(left), ast::Word::from(right)) }
424 left:word() specific_operator(">") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::LeftSortsAfterRight, ast::Word::from(left), ast::Word::from(right)) }
425 --
426 p:extended_unary_predicate() f:word() { ast::ExtendedTestExpr::UnaryTest(p, ast::Word::from(f)) }
427 --
428 w:word() { ast::ExtendedTestExpr::UnaryTest(ast::UnaryPredicate::StringHasNonZeroLength, ast::Word::from(w)) }
429 }
430
431 rule extended_unary_predicate() -> ast::UnaryPredicate =
432 specific_word("-a") { ast::UnaryPredicate::FileExists } /
433 specific_word("-b") { ast::UnaryPredicate::FileExistsAndIsBlockSpecialFile } /
434 specific_word("-c") { ast::UnaryPredicate::FileExistsAndIsCharSpecialFile } /
435 specific_word("-d") { ast::UnaryPredicate::FileExistsAndIsDir } /
436 specific_word("-e") { ast::UnaryPredicate::FileExists } /
437 specific_word("-f") { ast::UnaryPredicate::FileExistsAndIsRegularFile } /
438 specific_word("-g") { ast::UnaryPredicate::FileExistsAndIsSetgid } /
439 specific_word("-h") { ast::UnaryPredicate::FileExistsAndIsSymlink } /
440 specific_word("-k") { ast::UnaryPredicate::FileExistsAndHasStickyBit } /
441 specific_word("-n") { ast::UnaryPredicate::StringHasNonZeroLength } /
442 specific_word("-o") { ast::UnaryPredicate::ShellOptionEnabled } /
443 specific_word("-p") { ast::UnaryPredicate::FileExistsAndIsFifo } /
444 specific_word("-r") { ast::UnaryPredicate::FileExistsAndIsReadable } /
445 specific_word("-s") { ast::UnaryPredicate::FileExistsAndIsNotZeroLength } /
446 specific_word("-t") { ast::UnaryPredicate::FdIsOpenTerminal } /
447 specific_word("-u") { ast::UnaryPredicate::FileExistsAndIsSetuid } /
448 specific_word("-v") { ast::UnaryPredicate::ShellVariableIsSetAndAssigned } /
449 specific_word("-w") { ast::UnaryPredicate::FileExistsAndIsWritable } /
450 specific_word("-x") { ast::UnaryPredicate::FileExistsAndIsExecutable } /
451 specific_word("-z") { ast::UnaryPredicate::StringHasZeroLength } /
452 specific_word("-G") { ast::UnaryPredicate::FileExistsAndOwnedByEffectiveGroupId } /
453 specific_word("-L") { ast::UnaryPredicate::FileExistsAndIsSymlink } /
454 specific_word("-N") { ast::UnaryPredicate::FileExistsAndModifiedSinceLastRead } /
455 specific_word("-O") { ast::UnaryPredicate::FileExistsAndOwnedByEffectiveUserId } /
456 specific_word("-R") { ast::UnaryPredicate::ShellVariableIsSetAndNameRef } /
457 specific_word("-S") { ast::UnaryPredicate::FileExistsAndIsSocket }
458
459 rule regex_word() -> ast::Word =
462 value:$((!specific_word("]]") regex_word_piece())+) {
463 ast::Word { value }
464 }
465
466 rule regex_word_piece() =
467 word() {} /
468 specific_operator("|") {} /
469 specific_operator("(") parenthesized_regex_word()* specific_operator(")") {}
470
471 rule parenthesized_regex_word() =
472 regex_word_piece() /
473 !specific_operator(")") !specific_operator("]]") [_]
474
475 rule name() -> &'input str =
476 w:[Token::Word(_, _)] { w.to_str() }
477
478 rule _in() -> () =
479 specific_word("in") { }
480
481 rule wordlist() -> Vec<ast::Word> =
483 (w:non_reserved_word() { ast::Word::from(w) })+
484
485 pub(crate) rule case_clause() -> ast::CaseClauseCommand =
487 specific_word("case") w:non_reserved_word() linebreak() _in() linebreak() first_items:case_item()* last_item:case_item_ns()? specific_word("esac") {
488 let mut cases = first_items;
489
490 if let Some(last_item) = last_item {
491 cases.push(last_item);
492 }
493
494 ast::CaseClauseCommand { value: ast::Word::from(w), cases }
495 }
496
497 pub(crate) rule case_item_ns() -> ast::CaseItem =
498 specific_operator("(")? p:pattern() specific_operator(")") c:compound_list() {
499 ast::CaseItem { patterns: p, cmd: Some(c), post_action: ast::CaseItemPostAction::ExitCase }
500 } /
501 specific_operator("(")? p:pattern() specific_operator(")") linebreak() {
502 ast::CaseItem { patterns: p, cmd: None, post_action: ast::CaseItemPostAction::ExitCase }
503 }
504
505 pub(crate) rule case_item() -> ast::CaseItem =
506 specific_operator("(")? p:pattern() specific_operator(")") linebreak() post_action:case_item_post_action() linebreak() {
507 ast::CaseItem { patterns: p, cmd: None, post_action }
508 } /
509 specific_operator("(")? p:pattern() specific_operator(")") c:compound_list() post_action:case_item_post_action() linebreak() {
510 ast::CaseItem { patterns: p, cmd: Some(c), post_action }
511 }
512
513 rule case_item_post_action() -> ast::CaseItemPostAction =
514 specific_operator(";;") {
515 ast::CaseItemPostAction::ExitCase
516 } /
517 non_posix_extensions_enabled() specific_operator(";;&") {
518 ast::CaseItemPostAction::ContinueEvaluatingCases
519 } /
520 non_posix_extensions_enabled() specific_operator(";&") {
521 ast::CaseItemPostAction::UnconditionallyExecuteNextCaseItem
522 }
523
524 rule pattern() -> Vec<ast::Word> =
526 (w:word() { ast::Word::from(w) }) ++ specific_operator("|")
527
528 rule if_clause() -> ast::IfClauseCommand =
529 specific_word("if") condition:compound_list() specific_word("then") then:compound_list() elses:else_part()? specific_word("fi") {
530 ast::IfClauseCommand {
531 condition,
532 then,
533 elses,
534 }
535 }
536
537 rule else_part() -> Vec<ast::ElseClause> =
538 cs:_conditional_else_part()+ u:_unconditional_else_part()? {
539 let mut parts = vec![];
540 for c in cs {
541 parts.push(c);
542 }
543
544 if let Some(uncond) = u {
545 parts.push(uncond);
546 }
547
548 parts
549 } /
550 e:_unconditional_else_part() { vec![e] }
551
552 rule _conditional_else_part() -> ast::ElseClause =
553 specific_word("elif") condition:compound_list() specific_word("then") body:compound_list() {
554 ast::ElseClause { condition: Some(condition), body }
555 }
556
557 rule _unconditional_else_part() -> ast::ElseClause =
558 specific_word("else") body:compound_list() {
559 ast::ElseClause { condition: None, body }
560 }
561
562 rule while_clause() -> ast::WhileOrUntilClauseCommand =
563 specific_word("while") c:compound_list() d:do_group() { ast::WhileOrUntilClauseCommand(c, d) }
564
565 rule until_clause() -> ast::WhileOrUntilClauseCommand =
566 specific_word("until") c:compound_list() d:do_group() { ast::WhileOrUntilClauseCommand(c, d) }
567
568 rule function_definition() -> ast::FunctionDefinition =
570 specific_word("function")? fname:fname() body:function_parens_and_body() {
571 ast::FunctionDefinition { fname: fname.to_owned(), body, source: source_info.source.clone() }
572 } /
573 specific_word("function") fname:fname() linebreak() body:function_body() {
574 ast::FunctionDefinition { fname: fname.to_owned(), body, source: source_info.source.clone() }
575 } /
576 expected!("function definition")
577
578 pub(crate) rule function_parens_and_body() -> ast::FunctionBody =
579 specific_operator("(") specific_operator(")") linebreak() body:function_body() { body }
580
581 rule function_body() -> ast::FunctionBody =
582 c:compound_command() r:redirect_list()? { ast::FunctionBody(c, r) }
583
584 rule fname() -> &'input str =
585 w:[Token::Word(word, _) if !word.ends_with('=')] { w.to_str() }
589
590 rule brace_group() -> ast::BraceGroupCommand =
591 specific_word("{") c:compound_list() specific_word("}") { ast::BraceGroupCommand(c) }
592
593 rule do_group() -> ast::DoGroupCommand =
594 specific_word("do") c:compound_list() specific_word("done") { ast::DoGroupCommand(c) }
595
596 rule simple_command() -> ast::SimpleCommand =
597 prefix:cmd_prefix() word_and_suffix:(word_or_name:cmd_word() suffix:cmd_suffix()? { (word_or_name, suffix) })? {
598 match word_and_suffix {
599 Some((word_or_name, suffix)) => {
600 ast::SimpleCommand { prefix: Some(prefix), word_or_name: Some(ast::Word::from(word_or_name)), suffix }
601 }
602 None => {
603 ast::SimpleCommand { prefix: Some(prefix), word_or_name: None, suffix: None }
604 }
605 }
606 } /
607 word_or_name:cmd_name() suffix:cmd_suffix()? {
608 ast::SimpleCommand { prefix: None, word_or_name: Some(ast::Word::from(word_or_name)), suffix } } /
609 expected!("simple command")
610
611 rule cmd_name() -> &'input Token =
612 non_reserved_word()
613
614 rule cmd_word() -> &'input Token =
615 !assignment_word() w:non_reserved_word() { w }
616
617 rule cmd_prefix() -> ast::CommandPrefix =
618 p:(
619 i:io_redirect() { ast::CommandPrefixOrSuffixItem::IoRedirect(i) } /
620 assignment_and_word:assignment_word() {
621 let (assignment, word) = assignment_and_word;
622 ast::CommandPrefixOrSuffixItem::AssignmentWord(assignment, word)
623 }
624 )+ { ast::CommandPrefix(p) }
625
626 rule cmd_suffix() -> ast::CommandSuffix =
627 s:(
628 non_posix_extensions_enabled() sub:process_substitution() {
629 let (kind, subshell) = sub;
630 ast::CommandPrefixOrSuffixItem::ProcessSubstitution(kind, subshell)
631 } /
632 i:io_redirect() {
633 ast::CommandPrefixOrSuffixItem::IoRedirect(i)
634 } /
635 assignment_and_word:assignment_word() {
636 let (assignment, word) = assignment_and_word;
637 ast::CommandPrefixOrSuffixItem::AssignmentWord(assignment, word)
638 } /
639 w:word() {
640 ast::CommandPrefixOrSuffixItem::Word(ast::Word::from(w))
641 }
642 )+ { ast::CommandSuffix(s) }
643
644 rule redirect_list() -> ast::RedirectList =
645 r:io_redirect()+ { ast::RedirectList(r) } /
646 expected!("redirect list")
647
648 rule io_redirect() -> ast::IoRedirect =
650 n:io_number()? f:io_file() {
651 let (kind, target) = f;
652 ast::IoRedirect::File(n, kind, target)
653 } /
654 non_posix_extensions_enabled() specific_operator("&>>") target:filename() { ast::IoRedirect::OutputAndError(ast::Word::from(target), true) } /
655 non_posix_extensions_enabled() specific_operator("&>") target:filename() { ast::IoRedirect::OutputAndError(ast::Word::from(target), false) } /
656 non_posix_extensions_enabled() n:io_number()? specific_operator("<<<") w:word() { ast::IoRedirect::HereString(n, ast::Word::from(w)) } /
657 n:io_number()? h:io_here() { ast::IoRedirect::HereDocument(n, h) } /
658 expected!("I/O redirect")
659
660 rule io_file() -> (ast::IoFileRedirectKind, ast::IoFileRedirectTarget) =
662 specific_operator("<") f:io_filename() { (ast::IoFileRedirectKind::Read, f) } /
663 specific_operator("<&") f:io_filename_or_fd() { (ast::IoFileRedirectKind::DuplicateInput, f) } /
664 specific_operator(">") f:io_filename() { (ast::IoFileRedirectKind::Write, f) } /
665 specific_operator(">&") f:io_filename_or_fd() { (ast::IoFileRedirectKind::DuplicateOutput, f) } /
666 specific_operator(">>") f:io_filename() { (ast::IoFileRedirectKind::Append, f) } /
667 specific_operator("<>") f:io_filename() { (ast::IoFileRedirectKind::ReadAndWrite, f) } /
668 specific_operator(">|") f:io_filename() { (ast::IoFileRedirectKind::Clobber, f) }
669
670 rule io_filename_or_fd() -> ast::IoFileRedirectTarget =
671 fd:io_fd() { ast::IoFileRedirectTarget::Fd(fd) } /
672 io_filename()
673
674 rule io_fd() -> u32 =
675 w:[Token::Word(_, _)] {? w.to_str().parse().or(Err("io_fd u32")) }
676
677 rule io_filename() -> ast::IoFileRedirectTarget =
678 non_posix_extensions_enabled() sub:process_substitution() {
679 let (kind, subshell) = sub;
680 ast::IoFileRedirectTarget::ProcessSubstitution(kind, subshell)
681 } /
682 f:filename() { ast::IoFileRedirectTarget::Filename(ast::Word::from(f)) }
683
684 rule filename() -> &'input Token =
685 word()
686
687 pub(crate) rule io_here() -> ast::IoHereDocument =
688 specific_operator("<<-") here_tag:here_tag() doc:[_] closing_tag:here_tag() {
689 let requires_expansion = !here_tag.to_str().contains(['\'', '"', '\\']);
690 ast::IoHereDocument {
691 remove_tabs: true,
692 requires_expansion,
693 here_end: ast::Word::from(here_tag),
694 doc: ast::Word::from(doc)
695 }
696 } /
697 specific_operator("<<") here_tag:here_tag() doc:[_] closing_tag:here_tag() {
698 let requires_expansion = !here_tag.to_str().contains(['\'', '"', '\\']);
699 ast::IoHereDocument {
700 remove_tabs: false,
701 requires_expansion,
702 here_end: ast::Word::from(here_tag),
703 doc: ast::Word::from(doc)
704 }
705 }
706
707 rule here_tag() -> &'input Token =
708 word()
709
710 rule process_substitution() -> (ast::ProcessSubstitutionKind, ast::SubshellCommand) =
711 specific_operator("<") s:subshell() { (ast::ProcessSubstitutionKind::Read, s) } /
712 specific_operator(">") s:subshell() { (ast::ProcessSubstitutionKind::Write, s) }
713
714 rule newline_list() -> () =
715 newline()+ {}
716
717 rule linebreak() -> () =
718 quiet! {
719 newline()* {}
720 }
721
722 rule separator_op() -> ast::SeparatorOperator =
723 specific_operator("&") { ast::SeparatorOperator::Async } /
724 specific_operator(";") { ast::SeparatorOperator::Sequence }
725
726 rule separator() -> Option<ast::SeparatorOperator> =
727 s:separator_op() linebreak() { Some(s) } /
728 newline_list() { None }
729
730 rule sequential_sep() -> () =
731 specific_operator(";") linebreak() /
732 newline_list()
733
734 rule non_reserved_word() -> &'input Token =
739 !reserved_word() w:word() { w }
740
741 rule word() -> &'input Token =
742 [Token::Word(_, _)]
743
744 rule reserved_word() -> &'input Token =
745 [Token::Word(w, _) if matches!(w.as_str(),
746 "!" |
747 "{" |
748 "}" |
749 "case" |
750 "do" |
751 "done" |
752 "elif" |
753 "else" |
754 "esac" |
755 "fi" |
756 "for" |
757 "if" |
758 "in" |
759 "then" |
760 "until" |
761 "while"
762 )] /
763
764 non_posix_extensions_enabled() token:non_posix_reserved_word_token() { token }
766
767 rule non_posix_reserved_word_token() -> &'input Token =
768 specific_word("[[") /
769 specific_word("]]") /
770 specific_word("function") /
771 specific_word("select")
772
773 rule newline() -> () = quiet! {
774 specific_operator("\n") {}
775 }
776
777 pub(crate) rule assignment_word() -> (ast::Assignment, ast::Word) =
778 non_posix_extensions_enabled() [Token::Word(w, _)] specific_operator("(") elements:array_elements() specific_operator(")") {?
779 let parsed = parse_array_assignment(w.as_str(), elements.as_slice())?;
780
781 let mut all_as_word = w.to_owned();
782 all_as_word.push('(');
783 for (i, e) in elements.iter().enumerate() {
784 if i > 0 {
785 all_as_word.push(' ');
786 }
787 all_as_word.push_str(e);
788 }
789 all_as_word.push(')');
790
791 Ok((parsed, ast::Word { value: all_as_word }))
792 } /
793 [Token::Word(w, _)] {?
794 let parsed = parse_assignment_word(w.as_str())?;
795 Ok((parsed, ast::Word { value: w.to_owned() }))
796 }
797
798 rule array_elements() -> Vec<&'input String> =
799 linebreak() e:array_element()* { e }
800
801 rule array_element() -> &'input String =
802 linebreak() [Token::Word(e, _)] linebreak() { e }
803
804 rule io_number() -> u32 =
810 [Token::Word(w, num_loc) if w.chars().all(|c: char| c.is_ascii_digit())]
811 &([Token::Operator(o, redir_loc) if
812 o.starts_with(['<', '>']) &&
813 locations_are_contiguous(num_loc, redir_loc)]) {
814
815 w.parse().unwrap()
816 }
817
818 rule specific_operator(expected: &str) -> &'input Token =
822 [Token::Operator(w, _) if w.as_str() == expected]
823
824 rule specific_word(expected: &str) -> &'input Token =
825 [Token::Word(w, _) if w.as_str() == expected]
826
827 rule non_posix_extensions_enabled() -> () =
828 &[_] {? if !parser_options.sh_mode { Ok(()) } else { Err("posix") } }
829 }
830}
831
832peg::parser! {
833 grammar assignments() for str {
834 pub(crate) rule name_and_scalar_value() -> ast::Assignment =
835 nae:name_and_equals() value:scalar_value() {
836 let (name, append) = nae;
837 ast::Assignment { name, value, append }
838 }
839
840 pub(crate) rule name_and_equals() -> (ast::AssignmentName, bool) =
841 name:name() append:("+"?) "=" {
842 (name, append.is_some())
843 }
844
845 pub(crate) rule literal_array_element() -> (Option<String>, String) =
846 "[" inner:$((!"]" [_])*) "]=" value:$([_]*) {
847 (Some(inner.to_owned()), value.to_owned())
848 } /
849 value:$([_]+) {
850 (None, value.to_owned())
851 }
852
853 rule name() -> ast::AssignmentName =
854 aen:array_element_name() {
855 let (name, index) = aen;
856 ast::AssignmentName::ArrayElementName(name.to_owned(), index.to_owned())
857 } /
858 name:scalar_name() {
859 ast::AssignmentName::VariableName(name.to_owned())
860 }
861
862 rule array_element_name() -> (&'input str, &'input str) =
863 name:scalar_name() "[" ai:array_index() "]" { (name, ai) }
864
865 rule array_index() -> &'input str =
866 $((![']'] [_])*)
867
868 rule scalar_name() -> &'input str =
869 $(alpha_or_underscore() non_first_variable_char()*)
870
871 rule non_first_variable_char() -> () =
872 ['_' | '0'..='9' | 'a'..='z' | 'A'..='Z'] {}
873
874 rule alpha_or_underscore() -> () =
875 ['_' | 'a'..='z' | 'A'..='Z'] {}
876
877 rule scalar_value() -> ast::AssignmentValue =
878 v:$([_]*) { ast::AssignmentValue::Scalar(ast::Word { value: v.to_owned() }) }
879 }
880}
881
882fn parse_assignment_word(word: &str) -> Result<ast::Assignment, &'static str> {
883 let parse_result = assignments::name_and_scalar_value(word);
884 parse_result.map_err(|_| "not assignment word")
885}
886
887fn add_pipe_extension_redirection(c: &mut ast::Command) -> Result<(), &'static str> {
889 fn add_to_redirect_list(l: &mut Option<ast::RedirectList>, r: ast::IoRedirect) {
890 if let Some(l) = l {
891 l.0.push(r);
892 } else {
893 let v = vec![r];
894 *l = Some(ast::RedirectList(v));
895 }
896 }
897
898 let r = ast::IoRedirect::File(
899 Some(2),
900 ast::IoFileRedirectKind::DuplicateOutput,
901 ast::IoFileRedirectTarget::Fd(1),
902 );
903
904 match c {
905 ast::Command::Simple(c) => {
906 let r = ast::CommandPrefixOrSuffixItem::IoRedirect(r);
907 if let Some(l) = &mut c.suffix {
908 l.0.push(r);
909 } else {
910 c.suffix = Some(ast::CommandSuffix(vec![r]));
911 }
912 }
913 ast::Command::Compound(_, l) => add_to_redirect_list(l, r),
914 ast::Command::Function(f) => add_to_redirect_list(&mut f.body.1, r),
915 ast::Command::ExtendedTest(_) => return Err("|& unimplemented for extended tests"),
916 }
917
918 Ok(())
919}
920
921fn locations_are_contiguous(
922 loc_left: &crate::TokenLocation,
923 loc_right: &crate::TokenLocation,
924) -> bool {
925 loc_left.end.index == loc_right.start.index
926}
927
928fn parse_array_assignment(
929 word: &str,
930 elements: &[&String],
931) -> Result<ast::Assignment, &'static str> {
932 let (assignment_name, append) =
933 assignments::name_and_equals(word).map_err(|_| "not array assignment word")?;
934
935 let elements = elements
936 .iter()
937 .map(|element| assignments::literal_array_element(element))
938 .collect::<Result<Vec<_>, _>>()
939 .map_err(|_| "invalid array element in literal")?;
940
941 let elements_as_words = elements
942 .into_iter()
943 .map(|(key, value)| {
944 (
945 key.map(|k| ast::Word::new(k.as_str())),
946 ast::Word::new(value.as_str()),
947 )
948 })
949 .collect();
950
951 Ok(ast::Assignment {
952 name: assignment_name,
953 value: ast::AssignmentValue::Array(elements_as_words),
954 append,
955 })
956}
957
958#[cfg(test)]
959#[allow(clippy::panic_in_result_fn)]
960mod tests {
961
962 use super::*;
963 use crate::tokenizer::tokenize_str;
964 use anyhow::Result;
965 use assert_matches::assert_matches;
966
967 #[test]
968 fn parse_case() -> Result<()> {
969 let input = r"\
970case x in
971x)
972 echo y;;
973esac\
974";
975
976 let tokens = tokenize_str(input)?;
977 let command = super::token_parser::case_clause(
978 &Tokens {
979 tokens: tokens.as_slice(),
980 },
981 &ParserOptions::default(),
982 &SourceInfo::default(),
983 )?;
984
985 assert_eq!(command.cases.len(), 1);
986 assert_eq!(command.cases[0].patterns.len(), 1);
987 assert_eq!(command.cases[0].patterns[0].flatten(), "x");
988
989 Ok(())
990 }
991
992 #[test]
993 fn parse_case_ns() -> Result<()> {
994 let input = r"\
995case x in
996x)
997 echo y
998esac\
999";
1000
1001 let tokens = tokenize_str(input)?;
1002 let command = super::token_parser::case_clause(
1003 &Tokens {
1004 tokens: tokens.as_slice(),
1005 },
1006 &ParserOptions::default(),
1007 &SourceInfo::default(),
1008 )?;
1009
1010 assert_eq!(command.cases.len(), 1);
1011 assert_eq!(command.cases[0].patterns.len(), 1);
1012 assert_eq!(command.cases[0].patterns[0].flatten(), "x");
1013
1014 Ok(())
1015 }
1016
1017 #[test]
1018 fn parse_redirection() -> Result<()> {
1019 let input = r"echo |& wc";
1020
1021 let tokens = tokenize_str(input)?;
1022 let seq = super::token_parser::pipe_sequence(
1023 &Tokens {
1024 tokens: tokens.as_slice(),
1025 },
1026 &ParserOptions::default(),
1027 &SourceInfo::default(),
1028 )?;
1029
1030 assert_eq!(seq.len(), 2);
1031 assert_matches!(seq[0], ast::Command::Simple(..));
1032 if let ast::Command::Simple(c) = &seq[0] {
1033 let c = c.suffix.as_ref().unwrap();
1034 assert_matches!(
1035 c.0[0],
1036 ast::CommandPrefixOrSuffixItem::IoRedirect(ast::IoRedirect::File(
1037 Some(2),
1038 ast::IoFileRedirectKind::DuplicateOutput,
1039 ast::IoFileRedirectTarget::Fd(1)
1040 ))
1041 );
1042 }
1043 Ok(())
1044 }
1045
1046 #[test]
1047 fn parse_function_with_pipe_redirection() -> Result<()> {
1048 let inputs = [r"foo() { echo 1; } 2>&1 | cat", r"foo() { echo 1; } |& cat"];
1049
1050 for input in inputs {
1051 let tokens = tokenize_str(input)?;
1052 let seq = super::token_parser::pipe_sequence(
1053 &Tokens {
1054 tokens: tokens.as_slice(),
1055 },
1056 &ParserOptions::default(),
1057 &SourceInfo::default(),
1058 )?;
1059 assert_eq!(seq.len(), 2);
1060 assert_matches!(seq[0], ast::Command::Function(..));
1061 if let ast::Command::Function(f) = &seq[0] {
1062 let l = &f.body.1;
1063 assert!(l.is_some());
1064 assert_matches!(
1065 l.as_ref().unwrap().0[0],
1066 ast::IoRedirect::File(
1067 Some(2),
1068 ast::IoFileRedirectKind::DuplicateOutput,
1069 ast::IoFileRedirectTarget::Fd(1)
1070 )
1071 );
1072 }
1073 }
1074 Ok(())
1075 }
1076
1077 #[test]
1078 fn test_parse_program() -> Result<()> {
1079 use ast::*;
1080
1081 let input = r#"
1082
1083#!/usr/bin/env bash
1084
1085for f in A B C; do
1086
1087 # sdfsdf
1088 echo "${f@L}" >&2
1089
1090 done
1091
1092"#;
1093 let expected = Program {
1094 complete_commands: vec![CompoundList(vec![CompoundListItem(
1095 AndOrList {
1096 first: Pipeline {
1097 timed: None,
1098 bang: false,
1099 seq: vec![Command::Compound(
1100 CompoundCommand::ForClause(ForClauseCommand {
1101 variable_name: "f".into(),
1102 values: Some(vec![Word::new("A"), Word::new("B"), Word::new("C")]),
1103 body: DoGroupCommand(CompoundList(vec![CompoundListItem(
1104 AndOrList {
1105 first: Pipeline {
1106 timed: None,
1107 bang: false,
1108 seq: vec![Command::Simple(SimpleCommand {
1109 prefix: None,
1110 word_or_name: Some(Word::new("echo")),
1111 suffix: Some(CommandSuffix(vec![
1112 CommandPrefixOrSuffixItem::Word(Word::new(
1113 r#""${f@L}""#,
1114 )),
1115 CommandPrefixOrSuffixItem::IoRedirect(
1116 IoRedirect::File(
1117 None,
1118 IoFileRedirectKind::DuplicateOutput,
1119 IoFileRedirectTarget::Fd(2),
1120 ),
1121 ),
1122 ])),
1123 })],
1124 },
1125 additional: vec![],
1126 },
1127 SeparatorOperator::Sequence,
1128 )])),
1129 }),
1130 None,
1131 )],
1132 },
1133 additional: vec![],
1134 },
1135 SeparatorOperator::Sequence,
1136 )])],
1137 };
1138
1139 let tokens = tokenize_str(input)?;
1140 let result = super::token_parser::program(
1141 &Tokens {
1142 tokens: tokens.as_slice(),
1143 },
1144 &ParserOptions::default(),
1145 &SourceInfo::default(),
1146 )?;
1147
1148 assert_eq!(result, expected);
1149
1150 Ok(())
1151 }
1152}