yash_syntax/parser/
list.rs

1// This file is part of yash, an extended POSIX shell.
2// Copyright (C) 2020 WATANABE Yuki
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17//! Syntax parser for list and compound list
18
19use super::core::Parser;
20use super::core::Rec;
21use super::core::Result;
22use super::error::Error;
23use super::error::SyntaxError;
24use super::lex::Operator::{And, Newline, Semicolon};
25use super::lex::TokenId::{self, EndOfInput, IoLocation, IoNumber, Operator, Token};
26use crate::syntax::Item;
27use crate::syntax::List;
28use std::pin::Pin;
29use std::rc::Rc;
30
31fn error_type_for_trailing_token_in_command_line(token_id: TokenId) -> Option<SyntaxError> {
32    use super::lex::Keyword::*;
33    use super::lex::Operator::*;
34    use SyntaxError::*;
35    match token_id {
36        EndOfInput => None,
37        Token(None) | IoNumber | IoLocation => Some(MissingSeparator),
38        Token(Some(keyword)) => match keyword {
39            Bang | OpenBracketBracket | Case | For | Function | If | Until | While | OpenBrace => {
40                Some(MissingSeparator)
41            }
42            Do => Some(UnopenedLoop),
43            Done => Some(UnopenedDoClause),
44            Elif | Else | Fi | Then => Some(UnopenedIf),
45            Esac => Some(UnopenedCase),
46            In => Some(InAsCommandName),
47            CloseBrace => Some(UnopenedGrouping),
48        },
49        Operator(operator) => match operator {
50            And | AndAnd | Semicolon | Bar | BarBar => Some(InvalidCommandToken),
51            OpenParen => Some(MissingSeparator),
52            CloseParen => Some(UnopenedSubshell),
53            SemicolonAnd | SemicolonSemicolon | SemicolonSemicolonAnd | SemicolonBar => {
54                Some(UnopenedCase)
55            }
56            Newline | Less | LessAnd | LessOpenParen | LessLess | LessLessDash | LessLessLess
57            | LessGreater | Greater | GreaterAnd | GreaterOpenParen | GreaterGreater
58            | GreaterGreaterBar | GreaterBar => unreachable!(),
59        },
60    }
61}
62
63impl Parser<'_, '_> {
64    // There is no function that parses a single item because it would not be
65    // very useful for parsing a list. An item requires a separator operator
66    // ('&' or ';') for it to be followed by another item. You cannot tell from
67    // the resultant item whether there was a separator operator.
68    // pub async fn item(&mut self) -> Result<Rec<Item>> { }
69
70    /// Parses a list.
71    ///
72    /// This function parses a sequence of and-or lists that are separated by `;`
73    /// or `&`. A newline token that delimits the list is not parsed.
74    ///
75    /// If there is no valid command at the current position, this function
76    /// returns a list with no items.
77    pub async fn list(&mut self) -> Result<Rec<List>> {
78        let mut items = vec![];
79
80        let mut result = match self.and_or_list().await? {
81            Rec::AliasSubstituted => return Ok(Rec::AliasSubstituted),
82            Rec::Parsed(result) => result,
83        };
84
85        while let Some(and_or) = result {
86            let token = self.peek_token().await?;
87            let (async_flag, next) = match token.id {
88                Operator(Semicolon) => (None, true),
89                Operator(And) => (Some(token.word.location.clone()), true),
90                _ => (None, false),
91            };
92
93            let and_or = Rc::new(and_or);
94            items.push(Item { and_or, async_flag });
95
96            if !next {
97                break;
98            }
99            self.take_token_raw().await?;
100
101            result = loop {
102                if let Rec::Parsed(result) = self.and_or_list().await? {
103                    break result;
104                }
105            };
106        }
107
108        Ok(Rec::Parsed(List(items)))
109    }
110
111    // TODO Consider returning Result<Result<(), &Token>, Error>
112    /// Parses an optional newline token and here-document contents.
113    ///
114    /// If the current token is a newline, it is consumed and any pending here-document contents
115    /// are read starting from the next line. Otherwise, this function returns `Ok(false)` without
116    /// any side effect.
117    pub async fn newline_and_here_doc_contents(&mut self) -> Result<bool> {
118        if self.peek_token().await?.id != Operator(Newline) {
119            return Ok(false);
120        }
121
122        self.take_token_raw().await?;
123        self.here_doc_contents().await?;
124        Ok(true)
125    }
126
127    /// Parses a complete command optionally delimited by a newline.
128    ///
129    /// A complete command is a minimal sequence of and-or lists that can be executed in the shell
130    /// environment. This function reads as many lines as needed to compose the complete command.
131    ///
132    /// If the current line is empty (or containing only whitespaces and comments), the result is
133    /// an empty list. If the first token of the current line is the end of input, the result is
134    /// `Ok(None)`.
135    pub async fn command_line(&mut self) -> Result<Option<List>> {
136        let list = loop {
137            if let Rec::Parsed(list) = self.list().await? {
138                break list;
139            }
140        };
141
142        if !self.newline_and_here_doc_contents().await? {
143            let next = self.peek_token().await?;
144            if let Some(syntax_error) = error_type_for_trailing_token_in_command_line(next.id) {
145                let cause = syntax_error.into();
146                let location = next.word.location.clone();
147                return Err(Error { cause, location });
148            }
149            if list.0.is_empty() {
150                return Ok(None);
151            }
152        }
153
154        self.ensure_no_unread_here_doc()?;
155        Ok(Some(list))
156    }
157
158    /// Parses an optional compound list.
159    ///
160    /// A compound list is a sequence of one or more and-or lists that are
161    /// separated by newlines and optionally preceded and/or followed by
162    /// newlines.
163    ///
164    /// This function stops parsing on encountering an unexpected token that
165    /// cannot be parsed as the beginning of an and-or list. If the token is a
166    /// possible [clause delimiter](super::lex::TokenId::is_clause_delimiter),
167    /// the result is a list of commands that have been parsed up to the token.
168    /// Otherwise, an `InvalidCommandToken` error is returned.
169    pub async fn maybe_compound_list(&mut self) -> Result<List> {
170        let mut items = vec![];
171
172        loop {
173            let list = loop {
174                if let Rec::Parsed(list) = self.list().await? {
175                    break list;
176                }
177            };
178            items.extend(list.0);
179
180            if !self.newline_and_here_doc_contents().await? {
181                break;
182            }
183        }
184
185        let next = self.peek_token().await?;
186        if next.id.is_clause_delimiter() {
187            Ok(List(items))
188        } else {
189            let cause = SyntaxError::InvalidCommandToken.into();
190            let location = next.word.location.clone();
191            Err(Error { cause, location })
192        }
193    }
194
195    /// Like [`maybe_compound_list`](Self::maybe_compound_list), but returns the future in a pinning box.
196    pub fn maybe_compound_list_boxed(
197        &mut self,
198    ) -> Pin<Box<dyn Future<Output = Result<List>> + '_>> {
199        Box::pin(self.maybe_compound_list())
200    }
201}
202
203#[allow(clippy::bool_assert_comparison)]
204#[cfg(test)]
205mod tests {
206    use super::super::error::ErrorCause;
207    use super::super::lex::Lexer;
208    use super::*;
209    use crate::source::Source;
210    use crate::syntax::AndOrList;
211    use crate::syntax::Command;
212    use crate::syntax::Pipeline;
213    use crate::syntax::RedirBody;
214    use assert_matches::assert_matches;
215    use futures_util::FutureExt;
216
217    #[test]
218    fn parser_list_eof() {
219        let mut lexer = Lexer::with_code("");
220        let mut parser = Parser::new(&mut lexer);
221
222        let list = parser.list().now_or_never().unwrap().unwrap().unwrap();
223        assert_eq!(list.0, vec![]);
224    }
225
226    #[test]
227    fn parser_list_one_item_without_last_semicolon() {
228        let mut lexer = Lexer::with_code("foo");
229        let mut parser = Parser::new(&mut lexer);
230
231        let list = parser.list().now_or_never().unwrap().unwrap().unwrap();
232        assert_eq!(list.0.len(), 1);
233        assert_eq!(list.0[0].async_flag, None);
234        assert_eq!(list.0[0].and_or.to_string(), "foo");
235    }
236
237    #[test]
238    fn parser_list_one_item_with_last_semicolon() {
239        let mut lexer = Lexer::with_code("foo;");
240        let mut parser = Parser::new(&mut lexer);
241
242        let list = parser.list().now_or_never().unwrap().unwrap().unwrap();
243        assert_eq!(list.0.len(), 1);
244        assert_eq!(list.0[0].async_flag, None);
245        assert_eq!(list.0[0].and_or.to_string(), "foo");
246    }
247
248    #[test]
249    fn parser_list_many_items() {
250        let mut lexer = Lexer::with_code("foo & bar ; baz&");
251        let mut parser = Parser::new(&mut lexer);
252
253        let list = parser.list().now_or_never().unwrap().unwrap().unwrap();
254        assert_eq!(list.0.len(), 3);
255
256        let location = list.0[0].async_flag.as_ref().unwrap();
257        assert_eq!(*location.code.value.borrow(), "foo & bar ; baz&");
258        assert_eq!(location.code.start_line_number.get(), 1);
259        assert_eq!(*location.code.source, Source::Unknown);
260        assert_eq!(location.range, 4..5);
261        assert_eq!(list.0[0].and_or.to_string(), "foo");
262
263        assert_eq!(list.0[1].async_flag, None);
264        assert_eq!(list.0[1].and_or.to_string(), "bar");
265
266        let location = list.0[2].async_flag.as_ref().unwrap();
267        assert_eq!(*location.code.value.borrow(), "foo & bar ; baz&");
268        assert_eq!(location.code.start_line_number.get(), 1);
269        assert_eq!(*location.code.source, Source::Unknown);
270        assert_eq!(location.range, 15..16);
271        assert_eq!(list.0[2].and_or.to_string(), "baz");
272    }
273
274    #[test]
275    fn parser_command_line_eof() {
276        let mut lexer = Lexer::with_code("");
277        let mut parser = Parser::new(&mut lexer);
278
279        let result = parser.command_line().now_or_never().unwrap().unwrap();
280        assert!(result.is_none());
281    }
282
283    #[test]
284    fn parser_command_line_command_and_newline() {
285        let mut lexer = Lexer::with_code("<<END\nfoo\nEND\n");
286        let mut parser = Parser::new(&mut lexer);
287
288        let result = parser.command_line().now_or_never().unwrap();
289        let List(items) = result.unwrap().unwrap();
290        assert_eq!(items.len(), 1);
291        let item = items.first().unwrap();
292        assert_eq!(item.async_flag, None);
293        let AndOrList { first, rest } = &*item.and_or;
294        assert!(rest.is_empty(), "expected empty rest: {rest:?}");
295        let Pipeline { commands, negation } = first;
296        assert_eq!(*negation, false);
297        assert_eq!(commands.len(), 1);
298        let cmd = assert_matches!(*commands[0], Command::Simple(ref c) => c);
299        assert_eq!(cmd.words, []);
300        assert_eq!(cmd.redirs.len(), 1);
301        assert_eq!(cmd.redirs[0].fd, None);
302        assert_matches!(cmd.redirs[0].body, RedirBody::HereDoc(ref here_doc) => {
303            assert_eq!(here_doc.delimiter.to_string(), "END");
304            assert_eq!(here_doc.remove_tabs, false);
305            assert_eq!(here_doc.content.get().unwrap().to_string(), "foo\n");
306        });
307    }
308
309    #[test]
310    fn parser_command_line_command_without_newline() {
311        let mut lexer = Lexer::with_code("foo");
312        let mut parser = Parser::new(&mut lexer);
313
314        let result = parser.command_line().now_or_never().unwrap();
315        let list = result.unwrap().unwrap();
316        assert_eq!(list.to_string(), "foo");
317    }
318
319    #[test]
320    fn parser_command_line_newline_only() {
321        let mut lexer = Lexer::with_code("\n");
322        let mut parser = Parser::new(&mut lexer);
323
324        let result = parser.command_line().now_or_never().unwrap();
325        let list = result.unwrap().unwrap();
326        assert_eq!(list.0, []);
327    }
328
329    #[test]
330    fn parser_command_line_here_doc_without_newline() {
331        let mut lexer = Lexer::with_code("<<END");
332        let mut parser = Parser::new(&mut lexer);
333
334        let e = parser.command_line().now_or_never().unwrap().unwrap_err();
335        assert_eq!(
336            e.cause,
337            ErrorCause::Syntax(SyntaxError::MissingHereDocContent)
338        );
339        assert_eq!(*e.location.code.value.borrow(), "<<END");
340        assert_eq!(e.location.code.start_line_number.get(), 1);
341        assert_eq!(*e.location.code.source, Source::Unknown);
342        assert_eq!(e.location.range, 2..5);
343    }
344
345    #[test]
346    fn parser_command_line_wrong_delimiter_1() {
347        let mut lexer = Lexer::with_code("foo)");
348        let mut parser = Parser::new(&mut lexer);
349
350        let e = parser.command_line().now_or_never().unwrap().unwrap_err();
351        assert_eq!(e.cause, ErrorCause::Syntax(SyntaxError::UnopenedSubshell));
352        assert_eq!(*e.location.code.value.borrow(), "foo)");
353        assert_eq!(e.location.code.start_line_number.get(), 1);
354        assert_eq!(*e.location.code.source, Source::Unknown);
355        assert_eq!(e.location.range, 3..4);
356    }
357
358    #[test]
359    fn parser_command_line_wrong_delimiter_2() {
360        let mut lexer = Lexer::with_code("foo bar (");
361        let mut parser = Parser::new(&mut lexer);
362
363        let e = parser.command_line().now_or_never().unwrap().unwrap_err();
364        assert_eq!(e.cause, ErrorCause::Syntax(SyntaxError::MissingSeparator));
365        assert_eq!(*e.location.code.value.borrow(), "foo bar (");
366        assert_eq!(e.location.code.start_line_number.get(), 1);
367        assert_eq!(*e.location.code.source, Source::Unknown);
368        assert_eq!(e.location.range, 8..9);
369    }
370
371    #[test]
372    fn parser_command_line_wrong_delimiter_3() {
373        let mut lexer = Lexer::with_code("foo bar; ;");
374        let mut parser = Parser::new(&mut lexer);
375
376        let e = parser.command_line().now_or_never().unwrap().unwrap_err();
377        assert_eq!(
378            e.cause,
379            ErrorCause::Syntax(SyntaxError::InvalidCommandToken)
380        );
381        assert_eq!(*e.location.code.value.borrow(), "foo bar; ;");
382        assert_eq!(e.location.code.start_line_number.get(), 1);
383        assert_eq!(*e.location.code.source, Source::Unknown);
384        assert_eq!(e.location.range, 9..10);
385    }
386
387    #[test]
388    fn parser_maybe_compound_list_empty() {
389        let mut lexer = Lexer::with_code("");
390        let mut parser = Parser::new(&mut lexer);
391
392        let result = parser.maybe_compound_list().now_or_never().unwrap();
393        let list = result.unwrap();
394        assert_eq!(list.0, []);
395    }
396
397    #[test]
398    fn parser_maybe_compound_list_some_commands() {
399        let mut lexer = Lexer::with_code("echo; ls& cat");
400        let mut parser = Parser::new(&mut lexer);
401
402        let result = parser.maybe_compound_list().now_or_never().unwrap();
403        let list = result.unwrap();
404        assert_eq!(list.to_string(), "echo; ls& cat");
405    }
406
407    #[test]
408    fn parser_maybe_compound_list_some_commands_with_newline() {
409        let mut lexer = Lexer::with_code("echo& ls\n\ncat\n\n");
410        let mut parser = Parser::new(&mut lexer);
411
412        let result = parser.maybe_compound_list().now_or_never().unwrap();
413        let list = result.unwrap();
414        assert_eq!(list.to_string(), "echo& ls; cat");
415
416        assert_eq!(lexer.index(), 15);
417    }
418
419    #[test]
420    fn parser_maybe_compound_list_empty_with_delimiter() {
421        let mut lexer = Lexer::with_code("}");
422        let mut parser = Parser::new(&mut lexer);
423
424        let result = parser.maybe_compound_list().now_or_never().unwrap();
425        let list = result.unwrap();
426        assert_eq!(list.0, []);
427    }
428
429    // TODO Test maybe_compound_list with alias substitution
430
431    #[test]
432    fn parser_maybe_compound_list_empty_with_invalid_delimiter() {
433        let mut lexer = Lexer::with_code(";");
434        let mut parser = Parser::new(&mut lexer);
435
436        let result = parser.maybe_compound_list().now_or_never().unwrap();
437        let e = result.unwrap_err();
438        assert_eq!(
439            e.cause,
440            ErrorCause::Syntax(SyntaxError::InvalidCommandToken)
441        );
442        assert_eq!(*e.location.code.value.borrow(), ";");
443        assert_eq!(e.location.code.start_line_number.get(), 1);
444        assert_eq!(*e.location.code.source, Source::Unknown);
445        assert_eq!(e.location.range, 0..1);
446    }
447
448    #[test]
449    fn parser_maybe_compound_list_some_commands_with_invalid_delimiter() {
450        let mut lexer = Lexer::with_code("echo; ls\n &");
451        let mut parser = Parser::new(&mut lexer);
452
453        let result = parser.maybe_compound_list().now_or_never().unwrap();
454        let e = result.unwrap_err();
455        assert_eq!(
456            e.cause,
457            ErrorCause::Syntax(SyntaxError::InvalidCommandToken)
458        );
459        assert_eq!(*e.location.code.value.borrow(), "echo; ls\n &");
460        assert_eq!(e.location.code.start_line_number.get(), 1);
461        assert_eq!(*e.location.code.source, Source::Unknown);
462        assert_eq!(e.location.range, 10..11);
463    }
464}