1use crate::ast::{self, SeparatorOperator, SourceLocation, maybe_location};
2use crate::tokenizer::{Token, TokenEndReason, Tokenizer, TokenizerOptions, Tokens};
3use crate::{TokenLocation, error};
4
5use bon::Builder;
6
7#[derive(Clone, Eq, Hash, PartialEq)]
9pub struct ParserOptions {
10 pub enable_extended_globbing: bool,
12 pub posix_mode: bool,
14 pub sh_mode: bool,
16 pub tilde_expansion: bool,
18}
19
20impl Default for ParserOptions {
21 fn default() -> Self {
22 Self {
23 enable_extended_globbing: true,
24 posix_mode: false,
25 sh_mode: false,
26 tilde_expansion: true,
27 }
28 }
29}
30
31impl ParserOptions {
32 pub const fn tokenizer_options(&self) -> TokenizerOptions {
34 TokenizerOptions {
35 enable_extended_globbing: self.enable_extended_globbing,
36 posix_mode: self.posix_mode,
37 sh_mode: self.sh_mode,
38 }
39 }
40}
41
42#[derive(Builder)]
44pub struct Parser<R: std::io::BufRead> {
45 reader: R,
47 #[builder(default)]
49 options: ParserOptions,
50 #[builder(default)]
52 source_info: SourceInfo,
53}
54
55impl<R: std::io::BufRead> Parser<R> {
56 pub fn new(reader: R, options: &ParserOptions, source_info: &SourceInfo) -> Self {
64 Self {
65 reader,
66 options: options.clone(),
67 source_info: source_info.clone(),
68 }
69 }
70
71 pub fn parse_program(&mut self) -> Result<ast::Program, error::ParseError> {
73 let tokens = self.tokenize()?;
82 parse_tokens(&tokens, &self.options, &self.source_info)
83 }
84
85 pub fn parse_function_parens_and_body(
88 &mut self,
89 ) -> Result<ast::FunctionBody, error::ParseError> {
90 let tokens = self.tokenize()?;
91 let parse_result = token_parser::function_parens_and_body(
92 &Tokens { tokens: &tokens },
93 &self.options,
94 &self.source_info,
95 );
96 parse_result_to_error(parse_result, &tokens)
97 }
98
99 fn tokenize(&mut self) -> Result<Vec<Token>, error::ParseError> {
100 let mut tokenizer = Tokenizer::new(&mut self.reader, &self.options.tokenizer_options());
102
103 tracing::debug!(target: "tokenize", "Tokenizing...");
104
105 let mut tokens = vec![];
106 loop {
107 let result = match tokenizer.next_token() {
108 Ok(result) => result,
109 Err(e) => {
110 return Err(error::ParseError::Tokenizing {
111 inner: e,
112 position: tokenizer.current_location(),
113 });
114 }
115 };
116
117 let reason = result.reason;
118 if let Some(token) = result.token {
119 tracing::debug!(target: "tokenize", "TOKEN {}: {:?} {reason:?}", tokens.len(), token);
120 tokens.push(token);
121 }
122
123 if matches!(reason, TokenEndReason::EndOfInput) {
124 break;
125 }
126 }
127
128 tracing::debug!(target: "tokenize", " => {} token(s)", tokens.len());
129
130 Ok(tokens)
131 }
132}
133
134pub fn parse_tokens(
142 tokens: &Vec<Token>,
143 options: &ParserOptions,
144 source_info: &SourceInfo,
145) -> Result<ast::Program, error::ParseError> {
146 let parse_result = token_parser::program(&Tokens { tokens }, options, source_info);
147 parse_result_to_error(parse_result, tokens)
148}
149
150fn parse_result_to_error<R>(
151 parse_result: Result<R, peg::error::ParseError<usize>>,
152 tokens: &Vec<Token>,
153) -> Result<R, error::ParseError>
154where
155 R: std::fmt::Debug,
156{
157 match parse_result {
158 Ok(program) => {
159 tracing::debug!(target: "parse", "PROG: {:?}", program);
160 Ok(program)
161 }
162 Err(parse_error) => {
163 tracing::debug!(target: "parse", "Parse error: {:?}", parse_error);
164 Err(error::convert_peg_parse_error(
165 &parse_error,
166 tokens.as_slice(),
167 ))
168 }
169 }
170}
171
172impl peg::Parse for Tokens<'_> {
173 type PositionRepr = usize;
174
175 #[inline]
176 fn start(&self) -> usize {
177 0
178 }
179
180 #[inline]
181 fn is_eof(&self, p: usize) -> bool {
182 p >= self.tokens.len()
183 }
184
185 #[inline]
186 fn position_repr(&self, p: usize) -> Self::PositionRepr {
187 p
188 }
189}
190
191impl<'a> peg::ParseElem<'a> for Tokens<'a> {
192 type Element = &'a Token;
193
194 #[inline]
195 fn parse_elem(&'a self, pos: usize) -> peg::RuleResult<Self::Element> {
196 match self.tokens.get(pos) {
197 Some(c) => peg::RuleResult::Matched(pos + 1, c),
198 None => peg::RuleResult::Failed,
199 }
200 }
201}
202
203impl<'a> peg::ParseSlice<'a> for Tokens<'a> {
204 type Slice = String;
205
206 fn parse_slice(&'a self, start: usize, end: usize) -> Self::Slice {
207 let mut result = String::new();
208 let mut last_token_was_word = false;
209
210 for token in &self.tokens[start..end] {
211 match token {
212 Token::Operator(s, _) => {
213 result.push_str(s);
214 last_token_was_word = false;
215 }
216 Token::Word(s, _) => {
217 if last_token_was_word {
219 result.push(' ');
220 }
221
222 result.push_str(s);
223 last_token_was_word = true;
224 }
225 }
226 }
227
228 result
229 }
230}
231
232#[derive(Clone, Debug, Default)]
234pub struct SourceInfo {
235 pub source: String,
237}
238
239impl std::fmt::Display for SourceInfo {
240 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
241 write!(f, "{}", self.source)
242 }
243}
244
245peg::parser! {
246 grammar token_parser<'a>(parser_options: &ParserOptions, source_info: &SourceInfo) for Tokens<'a> {
247 pub(crate) rule program() -> ast::Program =
248 linebreak() c:complete_commands() linebreak() { ast::Program { complete_commands: c } } /
249 linebreak() { ast::Program { complete_commands: vec![] } }
250
251 rule complete_commands() -> Vec<ast::CompleteCommand> =
252 c:complete_command() ++ newline_list()
253
254 rule complete_command() -> ast::CompleteCommand =
255 first:and_or() remainder:(s:separator_op() l:and_or() { (s, l) })* last_sep:separator_op()? {
256 let mut and_ors = vec![first];
257 let mut seps = vec![];
258
259 for (sep, ao) in remainder {
260 seps.push(sep);
261 and_ors.push(ao);
262 }
263
264 seps.push(last_sep.unwrap_or(SeparatorOperator::Sequence));
266
267 let mut items = vec![];
268 for (i, ao) in and_ors.into_iter().enumerate() {
269 items.push(ast::CompoundListItem(ao, seps[i].clone()));
270 }
271
272 ast::CompoundList(items)
273 }
274
275 rule and_or() -> ast::AndOrList =
276 first:pipeline() additional:_and_or_item()* { ast::AndOrList { first, additional } }
277
278 rule _and_or_item() -> ast::AndOr =
279 op:_and_or_op() linebreak() p:pipeline() { op(p) }
280
281 rule _and_or_op() -> fn(ast::Pipeline) -> ast::AndOr =
282 specific_operator("&&") { ast::AndOr::And } /
283 specific_operator("||") { ast::AndOr::Or }
284
285 rule pipeline() -> ast::Pipeline =
286 timed:pipeline_timed()? bang:bang()* seq:pipe_sequence() {?
287 if timed.is_none() && bang.is_empty() && seq.is_empty() {
288 Err("empty pipeline")
289 } else {
290 let invert = bang.len() % 2 == 1;
291 Ok(ast::Pipeline { timed, bang: invert, seq })
292 }
293 }
294
295 rule pipeline_timed() -> ast::PipelineTimed =
296 non_posix_extensions_enabled() s:specific_word("time") posix_output:specific_word("-p")? {
297 let start = s.location();
298 if let Some(end) = posix_output {
299 ast::PipelineTimed::TimedWithPosixOutput(TokenLocation::within(start, end.location()))
300 } else {
301 ast::PipelineTimed::Timed(start.to_owned())
302 }
303 }
304
305 rule bang() -> bool = specific_word("!") { true }
306
307 pub(crate) rule pipe_sequence() -> Vec<ast::Command> =
308 c:(c:command() r:&pipe_extension_redirection()? {? let mut c = c;
310 if r.is_some() {
311 add_pipe_extension_redirection(&mut c)?;
312 }
313 Ok(c)
314 }) ** (pipe_operator() linebreak()) {
315 c
316 }
317
318 rule pipe_operator() =
319 specific_operator("|") /
320 pipe_extension_redirection()
321
322 rule pipe_extension_redirection() -> &'input Token =
323 non_posix_extensions_enabled() p:specific_operator("|&") { p }
324
325 rule command() -> ast::Command =
327 f:function_definition() { ast::Command::Function(f) } /
328 c:simple_command() { ast::Command::Simple(c) } /
329 c:compound_command() r:redirect_list()? { ast::Command::Compound(c, r) } /
330 non_posix_extensions_enabled() c:extended_test_command() { ast::Command::ExtendedTest(c) } /
332 expected!("command")
333
334 pub(crate) rule compound_command() -> ast::CompoundCommand =
337 non_posix_extensions_enabled() a:arithmetic_command() { ast::CompoundCommand::Arithmetic(a) } /
338 b:brace_group() { ast::CompoundCommand::BraceGroup(b) } /
339 s:subshell() { ast::CompoundCommand::Subshell(s) } /
340 f:for_clause() { ast::CompoundCommand::ForClause(f) } /
341 c:case_clause() { ast::CompoundCommand::CaseClause(c) } /
342 i:if_clause() { ast::CompoundCommand::IfClause(i) } /
343 w:while_clause() { ast::CompoundCommand::WhileClause(w) } /
344 u:until_clause() { ast::CompoundCommand::UntilClause(u) } /
345 non_posix_extensions_enabled() c:arithmetic_for_clause() { ast::CompoundCommand::ArithmeticForClause(c) } /
346 expected!("compound command")
347
348 pub(crate) rule arithmetic_command() -> ast::ArithmeticCommand =
349 start:specific_operator("(") specific_operator("(") expr:arithmetic_expression() specific_operator(")") end:specific_operator(")") {
350 let loc = TokenLocation::within(
351 start.location(),
352 end.location()
353 );
354 ast::ArithmeticCommand { expr, loc }
355 }
356
357 pub(crate) rule arithmetic_expression() -> ast::UnexpandedArithmeticExpr =
358 raw_expr:$(arithmetic_expression_piece()*) { ast::UnexpandedArithmeticExpr { value: raw_expr } }
359
360 rule arithmetic_expression_piece() =
361 specific_operator("(") (!specific_operator(")") arithmetic_expression_piece())* specific_operator(")") {} /
363 !arithmetic_end() !specific_operator(")") [_] {}
368
369 rule arithmetic_end() -> () =
371 specific_operator(")") specific_operator(")") {} /
372 specific_operator(";") {}
373
374 rule subshell() -> ast::SubshellCommand =
375 start:specific_operator("(") list:compound_list() end:specific_operator(")") {
376 let loc = TokenLocation::within(start.location(), end.location());
377 ast::SubshellCommand { list, loc }
378 }
379
380 rule compound_list() -> ast::CompoundList =
381 linebreak() first:and_or() remainder:(s:separator() l:and_or() { (s, l) })* last_sep:separator()? {
382 let mut and_ors = vec![first];
383 let mut seps = vec![];
384
385 for (sep, ao) in remainder {
386 seps.push(sep.unwrap_or(SeparatorOperator::Sequence));
387 and_ors.push(ao);
388 }
389
390 let last_sep = last_sep.unwrap_or(None);
392 seps.push(last_sep.unwrap_or(SeparatorOperator::Sequence));
393
394 let mut items = vec![];
395 for (i, ao) in and_ors.into_iter().enumerate() {
396 items.push(ast::CompoundListItem(ao, seps[i].clone()));
397 }
398
399 ast::CompoundList(items)
400 }
401
402 rule for_clause() -> ast::ForClauseCommand =
403 s:specific_word("for") n:name() linebreak() _in() w:wordlist()? sequential_sep() d:do_group() {
404 let start = s.location();
405 let end = &d.loc;
406 let loc = TokenLocation::within(start, end);
407 ast::ForClauseCommand { variable_name: n.to_owned(), values: w, body: d, loc }
408 } /
409 s:specific_word("for") n:name() sequential_sep()? d:do_group() {
410 let start = s.location();
411 let end = &d.loc;
412 let loc = TokenLocation::within(start, end);
413 ast::ForClauseCommand { variable_name: n.to_owned(), values: None, body: d, loc }
414 }
415
416 rule arithmetic_for_clause() -> ast::ArithmeticForClauseCommand =
418 s:specific_word("for")
419 specific_operator("(") specific_operator("(")
420 initializer:arithmetic_expression()? specific_operator(";")
421 condition:arithmetic_expression()? specific_operator(";")
422 updater:arithmetic_expression()?
423 specific_operator(")") specific_operator(")")
424 body:arithmetic_for_body() {
425 let start = s.location();
426 let end = &body.loc;
427 let loc = TokenLocation::within(start, end);
428 ast::ArithmeticForClauseCommand { initializer, condition, updater, body, loc }
429 }
430
431 rule arithmetic_for_body() -> ast::DoGroupCommand =
432 sequential_sep() body:do_group() { body } /
433 body:brace_group() { ast::DoGroupCommand { list: body.list, loc: body.loc } }
434
435 rule extended_test_command() -> ast::ExtendedTestExprCommand =
436 s:specific_word("[[") linebreak() expr:extended_test_expression() linebreak() e:specific_word("]]") {
437 let start = s.location();
438 let end = e.location();
439 let loc = TokenLocation::within(start, end);
440
441 ast::ExtendedTestExprCommand { expr, loc }
442 }
443
444 rule extended_test_expression() -> ast::ExtendedTestExpr = precedence! {
445 left:(@) linebreak() specific_operator("||") linebreak() right:@ { ast::ExtendedTestExpr::Or(Box::from(left), Box::from(right)) }
446 --
447 left:(@) linebreak() specific_operator("&&") linebreak() right:@ { ast::ExtendedTestExpr::And(Box::from(left), Box::from(right)) }
448 --
449 specific_word("!") e:@ { ast::ExtendedTestExpr::Not(Box::from(e)) }
450 --
451 specific_operator("(") e:extended_test_expression() specific_operator(")") { ast::ExtendedTestExpr::Parenthesized(Box::from(e)) }
452 --
453 left:word() specific_word("-eq") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::ArithmeticEqualTo, ast::Word::from(left), ast::Word::from(right)) }
455 left:word() specific_word("-ne") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::ArithmeticNotEqualTo, ast::Word::from(left), ast::Word::from(right)) }
456 left:word() specific_word("-lt") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::ArithmeticLessThan, ast::Word::from(left), ast::Word::from(right)) }
457 left:word() specific_word("-le") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::ArithmeticLessThanOrEqualTo, ast::Word::from(left), ast::Word::from(right)) }
458 left:word() specific_word("-gt") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::ArithmeticGreaterThan, ast::Word::from(left), ast::Word::from(right)) }
459 left:word() specific_word("-ge") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::ArithmeticGreaterThanOrEqualTo, ast::Word::from(left), ast::Word::from(right)) }
460 left:word() specific_word("-ef") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::FilesReferToSameDeviceAndInodeNumbers, ast::Word::from(left), ast::Word::from(right)) }
462 left:word() specific_word("-nt") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::LeftFileIsNewerOrExistsWhenRightDoesNot, ast::Word::from(left), ast::Word::from(right)) }
463 left:word() specific_word("-ot") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::LeftFileIsOlderOrDoesNotExistWhenRightDoes, ast::Word::from(left), ast::Word::from(right)) }
464 left:word() (specific_word("==") / specific_word("=")) right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::StringExactlyMatchesPattern, ast::Word::from(left), ast::Word::from(right)) }
465 left:word() specific_word("!=") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::StringDoesNotExactlyMatchPattern, ast::Word::from(left), ast::Word::from(right)) }
466 left:word() specific_word("=~") right:regex_word() {
467 if right.value.starts_with(['\'', '\"']) {
468 ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::StringContainsSubstring, ast::Word::from(left), right)
470 } else {
471 ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::StringMatchesRegex, ast::Word::from(left), right)
472 }
473 }
474 left:word() specific_operator("<") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::LeftSortsBeforeRight, ast::Word::from(left), ast::Word::from(right)) }
475 left:word() specific_operator(">") right:word() { ast::ExtendedTestExpr::BinaryTest(ast::BinaryPredicate::LeftSortsAfterRight, ast::Word::from(left), ast::Word::from(right)) }
476 --
477 p:extended_unary_predicate() f:word() { ast::ExtendedTestExpr::UnaryTest(p, ast::Word::from(f)) }
478 --
479 w:word() { ast::ExtendedTestExpr::UnaryTest(ast::UnaryPredicate::StringHasNonZeroLength, ast::Word::from(w)) }
480 }
481
482 rule extended_unary_predicate() -> ast::UnaryPredicate =
483 specific_word("-a") { ast::UnaryPredicate::FileExists } /
484 specific_word("-b") { ast::UnaryPredicate::FileExistsAndIsBlockSpecialFile } /
485 specific_word("-c") { ast::UnaryPredicate::FileExistsAndIsCharSpecialFile } /
486 specific_word("-d") { ast::UnaryPredicate::FileExistsAndIsDir } /
487 specific_word("-e") { ast::UnaryPredicate::FileExists } /
488 specific_word("-f") { ast::UnaryPredicate::FileExistsAndIsRegularFile } /
489 specific_word("-g") { ast::UnaryPredicate::FileExistsAndIsSetgid } /
490 specific_word("-h") { ast::UnaryPredicate::FileExistsAndIsSymlink } /
491 specific_word("-k") { ast::UnaryPredicate::FileExistsAndHasStickyBit } /
492 specific_word("-n") { ast::UnaryPredicate::StringHasNonZeroLength } /
493 specific_word("-o") { ast::UnaryPredicate::ShellOptionEnabled } /
494 specific_word("-p") { ast::UnaryPredicate::FileExistsAndIsFifo } /
495 specific_word("-r") { ast::UnaryPredicate::FileExistsAndIsReadable } /
496 specific_word("-s") { ast::UnaryPredicate::FileExistsAndIsNotZeroLength } /
497 specific_word("-t") { ast::UnaryPredicate::FdIsOpenTerminal } /
498 specific_word("-u") { ast::UnaryPredicate::FileExistsAndIsSetuid } /
499 specific_word("-v") { ast::UnaryPredicate::ShellVariableIsSetAndAssigned } /
500 specific_word("-w") { ast::UnaryPredicate::FileExistsAndIsWritable } /
501 specific_word("-x") { ast::UnaryPredicate::FileExistsAndIsExecutable } /
502 specific_word("-z") { ast::UnaryPredicate::StringHasZeroLength } /
503 specific_word("-G") { ast::UnaryPredicate::FileExistsAndOwnedByEffectiveGroupId } /
504 specific_word("-L") { ast::UnaryPredicate::FileExistsAndIsSymlink } /
505 specific_word("-N") { ast::UnaryPredicate::FileExistsAndModifiedSinceLastRead } /
506 specific_word("-O") { ast::UnaryPredicate::FileExistsAndOwnedByEffectiveUserId } /
507 specific_word("-R") { ast::UnaryPredicate::ShellVariableIsSetAndNameRef } /
508 specific_word("-S") { ast::UnaryPredicate::FileExistsAndIsSocket }
509
510 rule regex_word() -> ast::Word =
513 value:$((!specific_word("]]") regex_word_piece())+) {
514 ast::Word::from(value)
515 }
516
517 rule regex_word_piece() =
518 word() {} /
519 specific_operator("|") {} /
520 specific_operator("(") parenthesized_regex_word()* specific_operator(")") {}
521
522 rule parenthesized_regex_word() =
523 regex_word_piece() /
524 !specific_operator(")") !specific_operator("]]") [_]
525
526 rule name() -> &'input str =
527 w:[Token::Word(_, _)] { w.to_str() }
528
529 rule _in() -> () =
530 specific_word("in") { }
531
532 rule wordlist() -> Vec<ast::Word> =
533 (w:word() { ast::Word::from(w) })+
534
535 pub(crate) rule case_clause() -> ast::CaseClauseCommand =
536 specific_word("case") w:word() linebreak() _in() linebreak() first_items:case_item()* last_item:case_item_ns()? specific_word("esac") {
537 let mut cases = first_items;
538
539 if let Some(last_item) = last_item {
540 cases.push(last_item);
541 }
542
543 ast::CaseClauseCommand { value: ast::Word::from(w), cases }
544 }
545
546 pub(crate) rule case_item_ns() -> ast::CaseItem =
547 s:specific_operator("(")? p:pattern() specific_operator(")") c:compound_list() {
548 let start = s.map(|s| s.location()).or_else(|| p.first().and_then(|w| w.loc.as_ref()));
549 let end = c.location();
550
551 let loc = maybe_location(start, end.as_ref());
552
553 ast::CaseItem { patterns: p, cmd: Some(c), post_action: ast::CaseItemPostAction::ExitCase, loc }
554 } /
555 s:specific_operator("(")? p:pattern() e:specific_operator(")") linebreak() {
556 let start = s.map(|s| s.location()).or_else(|| p.first().and_then(|w| w.loc.as_ref()));
557 let end = Some(e.location());
558
559 let loc = maybe_location(start, end);
560 ast::CaseItem { patterns: p, cmd: None, post_action: ast::CaseItemPostAction::ExitCase, loc }
561 }
562
563 pub(crate) rule case_item() -> ast::CaseItem =
564 s:specific_operator("(")? p:pattern() specific_operator(")") linebreak() post_action:case_item_post_action() linebreak() {
565 let start = s.map(|s| s.location()).or_else(|| p.first().and_then(|w| w.loc.as_ref()));
566 let end = Some(post_action.1);
567 let loc = maybe_location(start, end);
568 ast::CaseItem { patterns: p, cmd: None, post_action: post_action.0, loc }
569 } /
570 s:specific_operator("(")? p:pattern() specific_operator(")") c:compound_list() post_action:case_item_post_action() linebreak() {
571 let start = s.map(|s| s.location()).or_else(|| p.first().and_then(|w| w.loc.as_ref()));
572 let end = Some(post_action.1);
573 let loc = maybe_location(start, end);
574 ast::CaseItem { patterns: p, cmd: Some(c), post_action: post_action.0, loc }
575 }
576
577 rule case_item_post_action() -> (ast::CaseItemPostAction, &'input TokenLocation) =
578 s:specific_operator(";;") {
579 (ast::CaseItemPostAction::ExitCase, s.location())
580 } /
581 non_posix_extensions_enabled() s:specific_operator(";;&") {
582 (ast::CaseItemPostAction::ContinueEvaluatingCases, s.location())
583 } /
584 non_posix_extensions_enabled() s:specific_operator(";&") {
585 (ast::CaseItemPostAction::UnconditionallyExecuteNextCaseItem, s.location())
586 }
587
588 rule pattern() -> Vec<ast::Word> =
589 (w:word() { ast::Word::from(w) }) ++ specific_operator("|")
590
591 rule if_clause() -> ast::IfClauseCommand =
592 s:specific_word("if") condition:compound_list() specific_word("then") then:compound_list() elses:else_part()? e:specific_word("fi") {
593 let start = s.location();
594 let end = s.location();
595 let loc = TokenLocation::within(start, end);
596
597 ast::IfClauseCommand {
598 condition,
599 then,
600 elses,
601 loc
602 }
603 }
604
605 rule else_part() -> Vec<ast::ElseClause> =
606 cs:_conditional_else_part()+ u:_unconditional_else_part()? {
607 let mut parts = vec![];
608 for c in cs {
609 parts.push(c);
610 }
611
612 if let Some(uncond) = u {
613 parts.push(uncond);
614 }
615
616 parts
617 } /
618 e:_unconditional_else_part() { vec![e] }
619
620 rule _conditional_else_part() -> ast::ElseClause =
621 specific_word("elif") condition:compound_list() specific_word("then") body:compound_list() {
622 ast::ElseClause { condition: Some(condition), body }
623 }
624
625 rule _unconditional_else_part() -> ast::ElseClause =
626 specific_word("else") body:compound_list() {
627 ast::ElseClause { condition: None, body }
628 }
629
630 rule while_clause() -> ast::WhileOrUntilClauseCommand =
631 s:specific_word("while") c:compound_list() d:do_group() {
632 let start = s.location();
633 let end = &d.loc;
634 let loc = TokenLocation::within(start, end);
635
636 ast::WhileOrUntilClauseCommand(c, d, loc)
637 }
638
639 rule until_clause() -> ast::WhileOrUntilClauseCommand =
640 s:specific_word("until") c:compound_list() d:do_group() {
641 let start = s.location();
642 let end = &d.loc;
643 let loc = TokenLocation::within(start, end);
644
645 ast::WhileOrUntilClauseCommand(c, d, loc)
646 }
647
648 rule function_definition() -> ast::FunctionDefinition =
650 specific_word("function")? fname:fname() body:function_parens_and_body() {
651 ast::FunctionDefinition { fname, body, source: source_info.source.clone() }
652 } /
653 specific_word("function") fname:fname() linebreak() body:function_body() {
654 ast::FunctionDefinition { fname, body, source: source_info.source.clone() }
655 } /
656 expected!("function definition")
657
658 pub(crate) rule function_parens_and_body() -> ast::FunctionBody =
659 specific_operator("(") specific_operator(")") linebreak() body:function_body() { body }
660
661 rule function_body() -> ast::FunctionBody =
662 c:compound_command() r:redirect_list()? { ast::FunctionBody(c, r) }
663
664 rule fname() -> ast::Word =
665 w:[Token::Word(word, l) if !word.ends_with('=')] { ast::Word::with_location(word, l) }
669
670 rule brace_group() -> ast::BraceGroupCommand =
671 start:specific_word("{") list:compound_list() end:specific_word("}") {
672 let loc = TokenLocation::within(start.location(), end.location());
673 ast::BraceGroupCommand { list, loc }
674 }
675
676 rule do_group() -> ast::DoGroupCommand =
677 start:specific_word("do") list:compound_list() end:specific_word("done") {
678 let loc = TokenLocation::within(start.location(), end.location());
679 ast::DoGroupCommand { list, loc }
680 }
681
682 rule simple_command() -> ast::SimpleCommand =
683 prefix:cmd_prefix() word_and_suffix:(word_or_name:cmd_word() suffix:cmd_suffix()? { (word_or_name, suffix) })? {
684 match word_and_suffix {
685 Some((word_or_name, suffix)) => {
686 ast::SimpleCommand { prefix: Some(prefix), word_or_name: Some(ast::Word::from(word_or_name)), suffix }
687 }
688 None => {
689 ast::SimpleCommand { prefix: Some(prefix), word_or_name: None, suffix: None }
690 }
691 }
692 } /
693 word_or_name:cmd_name() suffix:cmd_suffix()? {
694 ast::SimpleCommand { prefix: None, word_or_name: Some(ast::Word::from(word_or_name)), suffix } } /
695 expected!("simple command")
696
697 rule cmd_name() -> &'input Token =
698 non_reserved_word()
699
700 rule cmd_word() -> &'input Token =
701 !assignment_word() w:non_reserved_word() { w }
702
703 rule cmd_prefix() -> ast::CommandPrefix =
704 p:(
705 i:io_redirect() { ast::CommandPrefixOrSuffixItem::IoRedirect(i) } /
706 assignment_and_word:assignment_word() {
707 let (assignment, word) = assignment_and_word;
708 ast::CommandPrefixOrSuffixItem::AssignmentWord(assignment, word)
709 }
710 )+ { ast::CommandPrefix(p) }
711
712 rule cmd_suffix() -> ast::CommandSuffix =
713 s:(
714 non_posix_extensions_enabled() sub:process_substitution() {
715 let (kind, subshell) = sub;
716 ast::CommandPrefixOrSuffixItem::ProcessSubstitution(kind, subshell)
717 } /
718 i:io_redirect() {
719 ast::CommandPrefixOrSuffixItem::IoRedirect(i)
720 } /
721 assignment_and_word:assignment_word() {
722 let (assignment, word) = assignment_and_word;
723 ast::CommandPrefixOrSuffixItem::AssignmentWord(assignment, word)
724 } /
725 w:word() {
726 ast::CommandPrefixOrSuffixItem::Word(ast::Word::from(w))
727 }
728 )+ { ast::CommandSuffix(s) }
729
730 rule redirect_list() -> ast::RedirectList =
731 r:io_redirect()+ { ast::RedirectList(r) } /
732 expected!("redirect list")
733
734 rule io_redirect() -> ast::IoRedirect =
736 n:io_number()? f:io_file() {
737 let (kind, target) = f;
738 ast::IoRedirect::File(n, kind, target)
739 } /
740 non_posix_extensions_enabled() specific_operator("&>>") target:filename() { ast::IoRedirect::OutputAndError(ast::Word::from(target), true) } /
741 non_posix_extensions_enabled() specific_operator("&>") target:filename() { ast::IoRedirect::OutputAndError(ast::Word::from(target), false) } /
742 non_posix_extensions_enabled() n:io_number()? specific_operator("<<<") w:word() { ast::IoRedirect::HereString(n, ast::Word::from(w)) } /
743 n:io_number()? h:io_here() { ast::IoRedirect::HereDocument(n, h) } /
744 expected!("I/O redirect")
745
746 rule io_file() -> (ast::IoFileRedirectKind, ast::IoFileRedirectTarget) =
748 specific_operator("<") f:io_filename() { (ast::IoFileRedirectKind::Read, f) } /
749 specific_operator("<&") f:io_fd_duplication_source() { (ast::IoFileRedirectKind::DuplicateInput, f) } /
750 specific_operator(">") f:io_filename() { (ast::IoFileRedirectKind::Write, f) } /
751 specific_operator(">&") f:io_fd_duplication_source() { (ast::IoFileRedirectKind::DuplicateOutput, f) } /
752 specific_operator(">>") f:io_filename() { (ast::IoFileRedirectKind::Append, f) } /
753 specific_operator("<>") f:io_filename() { (ast::IoFileRedirectKind::ReadAndWrite, f) } /
754 specific_operator(">|") f:io_filename() { (ast::IoFileRedirectKind::Clobber, f) }
755
756 rule io_fd_duplication_source() -> ast::IoFileRedirectTarget =
757 w:word() { ast::IoFileRedirectTarget::Duplicate(ast::Word::from(w)) }
758
759 rule io_fd() -> u32 =
760 w:[Token::Word(_, _)] {? w.to_str().parse().or(Err("io_fd u32")) }
761
762 rule io_filename() -> ast::IoFileRedirectTarget =
763 non_posix_extensions_enabled() sub:process_substitution() {
764 let (kind, subshell) = sub;
765 ast::IoFileRedirectTarget::ProcessSubstitution(kind, subshell)
766 } /
767 f:filename() { ast::IoFileRedirectTarget::Filename(ast::Word::from(f)) }
768
769 rule filename() -> &'input Token =
770 word()
771
772 pub(crate) rule io_here() -> ast::IoHereDocument =
773 specific_operator("<<-") here_tag:here_tag() doc:[_] closing_tag:here_tag() {
774 let requires_expansion = !here_tag.to_str().contains(['\'', '"', '\\']);
775 ast::IoHereDocument {
776 remove_tabs: true,
777 requires_expansion,
778 here_end: ast::Word::from(here_tag),
779 doc: ast::Word::from(doc)
780 }
781 } /
782 specific_operator("<<") here_tag:here_tag() doc:[_] closing_tag:here_tag() {
783 let requires_expansion = !here_tag.to_str().contains(['\'', '"', '\\']);
784 ast::IoHereDocument {
785 remove_tabs: false,
786 requires_expansion,
787 here_end: ast::Word::from(here_tag),
788 doc: ast::Word::from(doc)
789 }
790 }
791
792 rule here_tag() -> &'input Token =
793 word()
794
795 rule process_substitution() -> (ast::ProcessSubstitutionKind, ast::SubshellCommand) =
796 specific_operator("<") s:subshell() { (ast::ProcessSubstitutionKind::Read, s) } /
797 specific_operator(">") s:subshell() { (ast::ProcessSubstitutionKind::Write, s) }
798
799 rule newline_list() -> () =
800 newline()+ {}
801
802 rule linebreak() -> () =
803 quiet! {
804 newline()* {}
805 }
806
807 rule separator_op() -> ast::SeparatorOperator =
808 specific_operator("&") { ast::SeparatorOperator::Async } /
809 specific_operator(";") { ast::SeparatorOperator::Sequence }
810
811 rule separator() -> Option<ast::SeparatorOperator> =
812 s:separator_op() linebreak() { Some(s) } /
813 newline_list() { None }
814
815 rule sequential_sep() -> () =
816 specific_operator(";") linebreak() /
817 newline_list()
818
819 rule non_reserved_word() -> &'input Token =
824 !reserved_word() w:word() { w }
825
826 rule word() -> &'input Token =
827 [Token::Word(_, _)]
828
829 rule reserved_word() -> &'input Token =
830 [Token::Word(w, _) if matches!(w.as_str(),
831 "!" |
832 "{" |
833 "}" |
834 "case" |
835 "do" |
836 "done" |
837 "elif" |
838 "else" |
839 "esac" |
840 "fi" |
841 "for" |
842 "if" |
843 "in" |
844 "then" |
845 "until" |
846 "while"
847 )] /
848
849 non_posix_extensions_enabled() token:non_posix_reserved_word_token() { token }
851
852 rule non_posix_reserved_word_token() -> &'input Token =
853 specific_word("[[") /
854 specific_word("]]") /
855 specific_word("function") /
856 specific_word("select")
857
858 rule newline() -> () = quiet! {
859 specific_operator("\n") {}
860 }
861
862 pub(crate) rule assignment_word() -> (ast::Assignment, ast::Word) =
863 non_posix_extensions_enabled() [Token::Word(w, l)] specific_operator("(") elements:array_elements() end:specific_operator(")") {?
864 let mut parsed = parse_array_assignment(w.as_str(), elements.as_slice())?;
865
866 let mut all_as_word = w.to_owned();
867 all_as_word.push('(');
868 for (i, e) in elements.iter().enumerate() {
869 if i > 0 {
870 all_as_word.push(' ');
871 }
872 all_as_word.push_str(e);
873 }
874 all_as_word.push(')');
875
876 let loc = TokenLocation::within(l, end.location());
877 parsed.loc = loc.clone();
878 Ok((parsed, ast::Word::with_location(&all_as_word, &loc)))
879 } /
880 [Token::Word(w, l)] {?
881 let mut parsed = parse_assignment_word(w.as_str())?;
882 parsed.loc = l.clone();
883 Ok((parsed, ast::Word::with_location(w, l)))
884 }
885
886 rule array_elements() -> Vec<&'input String> =
887 linebreak() e:array_element()* { e }
888
889 rule array_element() -> &'input String =
890 linebreak() [Token::Word(e, _)] linebreak() { e }
891
892 rule io_number() -> ast::IoFd =
898 [Token::Word(w, num_loc) if w.chars().all(|c: char| c.is_ascii_digit())]
899 &([Token::Operator(o, redir_loc) if
900 o.starts_with(['<', '>']) &&
901 locations_are_contiguous(num_loc, redir_loc)]) {
902
903 w.parse().unwrap()
904 }
905
906 rule specific_operator(expected: &str) -> &'input Token =
910 [Token::Operator(w, _) if w.as_str() == expected]
911
912 rule specific_word(expected: &str) -> &'input Token =
913 [Token::Word(w, _) if w.as_str() == expected]
914
915 rule non_posix_extensions_enabled() -> () =
916 &[_] {? if !parser_options.sh_mode { Ok(()) } else { Err("posix") } }
917 }
918}
919
920peg::parser! {
921 grammar assignments() for str {
922 pub(crate) rule name_and_scalar_value() -> ast::Assignment =
923 nae:name_and_equals() value:scalar_value() {
924 let (name, append) = nae;
925 ast::Assignment { name, value, append, loc: TokenLocation::default() }
926 }
927
928 pub(crate) rule name_and_equals() -> (ast::AssignmentName, bool) =
929 name:name() append:("+"?) "=" {
930 (name, append.is_some())
931 }
932
933 pub(crate) rule literal_array_element() -> (Option<String>, String) =
934 "[" inner:$((!"]" [_])*) "]=" value:$([_]*) {
935 (Some(inner.to_owned()), value.to_owned())
936 } /
937 value:$([_]+) {
938 (None, value.to_owned())
939 }
940
941 rule name() -> ast::AssignmentName =
942 aen:array_element_name() {
943 let (name, index) = aen;
944 ast::AssignmentName::ArrayElementName(name.to_owned(), index.to_owned())
945 } /
946 name:scalar_name() {
947 ast::AssignmentName::VariableName(name.to_owned())
948 }
949
950 rule array_element_name() -> (&'input str, &'input str) =
951 name:scalar_name() "[" ai:array_index() "]" { (name, ai) }
952
953 rule array_index() -> &'input str =
954 $((![']'] [_])*)
955
956 rule scalar_name() -> &'input str =
957 $(alpha_or_underscore() non_first_variable_char()*)
958
959 rule non_first_variable_char() -> () =
960 ['_' | '0'..='9' | 'a'..='z' | 'A'..='Z'] {}
961
962 rule alpha_or_underscore() -> () =
963 ['_' | 'a'..='z' | 'A'..='Z'] {}
964
965 rule scalar_value() -> ast::AssignmentValue =
966 v:$([_]*) { ast::AssignmentValue::Scalar(ast::Word::from(v.to_owned())) }
967 }
968}
969
970fn parse_assignment_word(word: &str) -> Result<ast::Assignment, &'static str> {
971 let parse_result = assignments::name_and_scalar_value(word);
972 parse_result.map_err(|_| "not assignment word")
973}
974
975fn add_pipe_extension_redirection(c: &mut ast::Command) -> Result<(), &'static str> {
977 fn add_to_redirect_list(l: &mut Option<ast::RedirectList>, r: ast::IoRedirect) {
978 if let Some(l) = l {
979 l.0.push(r);
980 } else {
981 let v = vec![r];
982 *l = Some(ast::RedirectList(v));
983 }
984 }
985
986 let r = ast::IoRedirect::File(
987 Some(2),
988 ast::IoFileRedirectKind::DuplicateOutput,
989 ast::IoFileRedirectTarget::Fd(1),
990 );
991
992 match c {
993 ast::Command::Simple(c) => {
994 let r = ast::CommandPrefixOrSuffixItem::IoRedirect(r);
995 if let Some(l) = &mut c.suffix {
996 l.0.push(r);
997 } else {
998 c.suffix = Some(ast::CommandSuffix(vec![r]));
999 }
1000 }
1001 ast::Command::Compound(_, l) => add_to_redirect_list(l, r),
1002 ast::Command::Function(f) => add_to_redirect_list(&mut f.body.1, r),
1003 ast::Command::ExtendedTest(_) => return Err("|& unimplemented for extended tests"),
1004 }
1005
1006 Ok(())
1007}
1008
1009#[inline]
1010fn locations_are_contiguous(
1011 loc_left: &crate::TokenLocation,
1012 loc_right: &crate::TokenLocation,
1013) -> bool {
1014 loc_left.end.index == loc_right.start.index
1015}
1016
1017fn parse_array_assignment(
1018 word: &str,
1019 elements: &[&String],
1020) -> Result<ast::Assignment, &'static str> {
1021 let (assignment_name, append) =
1022 assignments::name_and_equals(word).map_err(|_| "not array assignment word")?;
1023
1024 let elements = elements
1025 .iter()
1026 .map(|element| assignments::literal_array_element(element))
1027 .collect::<Result<Vec<_>, _>>()
1028 .map_err(|_| "invalid array element in literal")?;
1029
1030 let elements_as_words = elements
1031 .into_iter()
1032 .map(|(key, value)| {
1033 (
1034 key.map(|k| ast::Word::new(k.as_str())),
1035 ast::Word::new(value.as_str()),
1036 )
1037 })
1038 .collect();
1039
1040 Ok(ast::Assignment {
1041 name: assignment_name,
1042 value: ast::AssignmentValue::Array(elements_as_words),
1043 append,
1044 loc: TokenLocation::default(),
1045 })
1046}
1047
1048#[cfg(test)]
1049mod tests {
1050
1051 use super::*;
1052 use crate::tokenizer::tokenize_str;
1053 use anyhow::Result;
1054 use insta::assert_ron_snapshot;
1055
1056 #[derive(serde::Serialize)]
1057 struct ParseResult<'a, T> {
1058 input: &'a str,
1059 result: &'a T,
1060 }
1061
1062 #[test]
1063 fn parse_case() -> Result<()> {
1064 let input = r"\
1065case x in
1066x)
1067 echo y;;
1068esac\
1069";
1070
1071 let tokens = tokenize_str(input)?;
1072 let command = super::token_parser::case_clause(
1073 &Tokens {
1074 tokens: tokens.as_slice(),
1075 },
1076 &ParserOptions::default(),
1077 &SourceInfo::default(),
1078 )?;
1079
1080 assert_ron_snapshot!(ParseResult {
1081 input,
1082 result: &command
1083 });
1084
1085 Ok(())
1086 }
1087
1088 #[test]
1089 fn parse_case_ns() -> Result<()> {
1090 let input = r"\
1091case x in
1092x)
1093 echo y
1094esac\
1095";
1096
1097 let tokens = tokenize_str(input)?;
1098 let command = super::token_parser::case_clause(
1099 &Tokens {
1100 tokens: tokens.as_slice(),
1101 },
1102 &ParserOptions::default(),
1103 &SourceInfo::default(),
1104 )?;
1105
1106 assert_ron_snapshot!(ParseResult {
1107 input,
1108 result: &command
1109 });
1110
1111 Ok(())
1112 }
1113
1114 #[test]
1115 fn parse_arith_and_non_arith_parens() -> Result<()> {
1116 let input = r"( : && ( (( 0 )) || : ) )";
1117
1118 let tokens = tokenize_str(input)?;
1119 let result = super::token_parser::program(
1120 &Tokens {
1121 tokens: tokens.as_slice(),
1122 },
1123 &ParserOptions::default(),
1124 &SourceInfo::default(),
1125 )?;
1126
1127 assert_ron_snapshot!(ParseResult {
1128 input,
1129 result: &result
1130 });
1131
1132 Ok(())
1133 }
1134
1135 #[test]
1136 fn parse_redirection() -> Result<()> {
1137 let input = r"echo |& wc";
1138
1139 let tokens = tokenize_str(input)?;
1140 let seq = super::token_parser::pipe_sequence(
1141 &Tokens {
1142 tokens: tokens.as_slice(),
1143 },
1144 &ParserOptions::default(),
1145 &SourceInfo::default(),
1146 )?;
1147
1148 assert_ron_snapshot!(ParseResult {
1149 input,
1150 result: &seq
1151 });
1152
1153 Ok(())
1154 }
1155
1156 #[test]
1157 fn parse_here_doc_with_no_trailing_newline() -> Result<()> {
1158 let input = r"cat <<EOF
1159Something
1160EOF";
1161
1162 let tokens = tokenize_str(input)?;
1163 let result = super::token_parser::program(
1164 &Tokens {
1165 tokens: tokens.as_slice(),
1166 },
1167 &ParserOptions::default(),
1168 &SourceInfo::default(),
1169 )?;
1170
1171 assert_ron_snapshot!(ParseResult {
1172 input,
1173 result: &result
1174 });
1175
1176 Ok(())
1177 }
1178
1179 #[test]
1180 fn parse_function_with_pipe_redirection() -> Result<()> {
1181 let inputs = [r"foo() { echo 1; } 2>&1 | cat", r"foo() { echo 1; } |& cat"];
1182
1183 for input in inputs {
1184 let tokens = tokenize_str(input)?;
1185 let seq = super::token_parser::pipe_sequence(
1186 &Tokens {
1187 tokens: tokens.as_slice(),
1188 },
1189 &ParserOptions::default(),
1190 &SourceInfo::default(),
1191 )?;
1192
1193 assert_ron_snapshot!(ParseResult {
1194 input,
1195 result: &seq
1196 });
1197 }
1198
1199 Ok(())
1200 }
1201
1202 #[test]
1203 fn test_parse_program() -> Result<()> {
1204 let input = r#"
1205
1206#!/usr/bin/env bash
1207
1208for f in A B C; do
1209
1210 # sdfsdf
1211 echo "${f@L}" >&2
1212
1213 done
1214
1215"#;
1216
1217 let tokens = tokenize_str(input)?;
1218 let result = super::token_parser::program(
1219 &Tokens {
1220 tokens: tokens.as_slice(),
1221 },
1222 &ParserOptions::default(),
1223 &SourceInfo::default(),
1224 )?;
1225
1226 assert_ron_snapshot!(ParseResult {
1227 input,
1228 result: &result
1229 });
1230
1231 Ok(())
1232 }
1233}