yash_syntax/parser/
list.rs

1// This file is part of yash, an extended POSIX shell.
2// Copyright (C) 2020 WATANABE Yuki
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17//! Syntax parser for list and compound list
18
19use super::core::Parser;
20use super::core::Rec;
21use super::core::Result;
22use super::error::Error;
23use super::error::SyntaxError;
24use super::lex::Operator::{And, Newline, Semicolon};
25use super::lex::TokenId::{self, EndOfInput, IoNumber, Operator, Token};
26use crate::syntax::Item;
27use crate::syntax::List;
28use std::future::Future;
29use std::pin::Pin;
30use std::rc::Rc;
31
32fn error_type_for_trailing_token_in_command_line(token_id: TokenId) -> Option<SyntaxError> {
33    use super::lex::Keyword::*;
34    use super::lex::Operator::*;
35    use SyntaxError::*;
36    match token_id {
37        EndOfInput => None,
38        Token(None) | IoNumber => Some(MissingSeparator),
39        Token(Some(keyword)) => match keyword {
40            Bang | OpenBracketBracket | Case | For | Function | If | Until | While | OpenBrace => {
41                Some(MissingSeparator)
42            }
43            Do => Some(UnopenedLoop),
44            Done => Some(UnopenedDoClause),
45            Elif | Else | Fi | Then => Some(UnopenedIf),
46            Esac => Some(UnopenedCase),
47            In => Some(InAsCommandName),
48            CloseBrace => Some(UnopenedGrouping),
49        },
50        Operator(operator) => match operator {
51            And | AndAnd | Semicolon | Bar | BarBar => Some(InvalidCommandToken),
52            OpenParen => Some(MissingSeparator),
53            CloseParen => Some(UnopenedSubshell),
54            SemicolonAnd | SemicolonSemicolon | SemicolonSemicolonAnd | SemicolonBar => {
55                Some(UnopenedCase)
56            }
57            Newline | Less | LessAnd | LessOpenParen | LessLess | LessLessDash | LessLessLess
58            | LessGreater | Greater | GreaterAnd | GreaterOpenParen | GreaterGreater
59            | GreaterGreaterBar | GreaterBar => unreachable!(),
60        },
61    }
62}
63
64impl Parser<'_, '_> {
65    // There is no function that parses a single item because it would not be
66    // very useful for parsing a list. An item requires a separator operator
67    // ('&' or ';') for it to be followed by another item. You cannot tell from
68    // the resultant item whether there was a separator operator.
69    // pub async fn item(&mut self) -> Result<Rec<Item>> { }
70
71    /// Parses a list.
72    ///
73    /// This function parses a sequence of and-or lists that are separated by `;`
74    /// or `&`. A newline token that delimits the list is not parsed.
75    ///
76    /// If there is no valid command at the current position, this function
77    /// returns a list with no items.
78    pub async fn list(&mut self) -> Result<Rec<List>> {
79        let mut items = vec![];
80
81        let mut result = match self.and_or_list().await? {
82            Rec::AliasSubstituted => return Ok(Rec::AliasSubstituted),
83            Rec::Parsed(result) => result,
84        };
85
86        while let Some(and_or) = result {
87            let token = self.peek_token().await?;
88            let (async_flag, next) = match token.id {
89                Operator(Semicolon) => (None, true),
90                Operator(And) => (Some(token.word.location.clone()), true),
91                _ => (None, false),
92            };
93
94            let and_or = Rc::new(and_or);
95            items.push(Item { and_or, async_flag });
96
97            if !next {
98                break;
99            }
100            self.take_token_raw().await?;
101
102            result = loop {
103                if let Rec::Parsed(result) = self.and_or_list().await? {
104                    break result;
105                }
106            };
107        }
108
109        Ok(Rec::Parsed(List(items)))
110    }
111
112    // TODO Consider returning Result<Result<(), &Token>, Error>
113    /// Parses an optional newline token and here-document contents.
114    ///
115    /// If the current token is a newline, it is consumed and any pending here-document contents
116    /// are read starting from the next line. Otherwise, this function returns `Ok(false)` without
117    /// any side effect.
118    pub async fn newline_and_here_doc_contents(&mut self) -> Result<bool> {
119        if self.peek_token().await?.id != Operator(Newline) {
120            return Ok(false);
121        }
122
123        self.take_token_raw().await?;
124        self.here_doc_contents().await?;
125        Ok(true)
126    }
127
128    /// Parses a complete command optionally delimited by a newline.
129    ///
130    /// A complete command is a minimal sequence of and-or lists that can be executed in the shell
131    /// environment. This function reads as many lines as needed to compose the complete command.
132    ///
133    /// If the current line is empty (or containing only whitespaces and comments), the result is
134    /// an empty list. If the first token of the current line is the end of input, the result is
135    /// `Ok(None)`.
136    pub async fn command_line(&mut self) -> Result<Option<List>> {
137        let list = loop {
138            if let Rec::Parsed(list) = self.list().await? {
139                break list;
140            }
141        };
142
143        if !self.newline_and_here_doc_contents().await? {
144            let next = self.peek_token().await?;
145            if let Some(syntax_error) = error_type_for_trailing_token_in_command_line(next.id) {
146                let cause = syntax_error.into();
147                let location = next.word.location.clone();
148                return Err(Error { cause, location });
149            }
150            if list.0.is_empty() {
151                return Ok(None);
152            }
153        }
154
155        self.ensure_no_unread_here_doc()?;
156        Ok(Some(list))
157    }
158
159    /// Parses an optional compound list.
160    ///
161    /// A compound list is a sequence of one or more and-or lists that are
162    /// separated by newlines and optionally preceded and/or followed by
163    /// newlines.
164    ///
165    /// This function stops parsing on encountering an unexpected token that
166    /// cannot be parsed as the beginning of an and-or list. If the token is a
167    /// possible [clause delimiter](super::lex::TokenId::is_clause_delimiter),
168    /// the result is a list of commands that have been parsed up to the token.
169    /// Otherwise, an `InvalidCommandToken` error is returned.
170    pub async fn maybe_compound_list(&mut self) -> Result<List> {
171        let mut items = vec![];
172
173        loop {
174            let list = loop {
175                if let Rec::Parsed(list) = self.list().await? {
176                    break list;
177                }
178            };
179            items.extend(list.0);
180
181            if !self.newline_and_here_doc_contents().await? {
182                break;
183            }
184        }
185
186        let next = self.peek_token().await?;
187        if next.id.is_clause_delimiter() {
188            Ok(List(items))
189        } else {
190            let cause = SyntaxError::InvalidCommandToken.into();
191            let location = next.word.location.clone();
192            Err(Error { cause, location })
193        }
194    }
195
196    /// Like [`maybe_compound_list`](Self::maybe_compound_list), but returns the future in a pinning box.
197    pub fn maybe_compound_list_boxed(
198        &mut self,
199    ) -> Pin<Box<dyn Future<Output = Result<List>> + '_>> {
200        Box::pin(self.maybe_compound_list())
201    }
202}
203
204#[allow(clippy::bool_assert_comparison)]
205#[cfg(test)]
206mod tests {
207    use super::super::error::ErrorCause;
208    use super::super::lex::Lexer;
209    use super::*;
210    use crate::alias::EmptyGlossary;
211    use crate::source::Source;
212    use crate::syntax::AndOrList;
213    use crate::syntax::Command;
214    use crate::syntax::Pipeline;
215    use crate::syntax::RedirBody;
216    use assert_matches::assert_matches;
217    use futures_util::FutureExt;
218
219    #[test]
220    fn parser_list_eof() {
221        let mut lexer = Lexer::from_memory("", Source::Unknown);
222        let mut parser = Parser::new(&mut lexer, &EmptyGlossary);
223
224        let list = parser.list().now_or_never().unwrap().unwrap().unwrap();
225        assert_eq!(list.0, vec![]);
226    }
227
228    #[test]
229    fn parser_list_one_item_without_last_semicolon() {
230        let mut lexer = Lexer::from_memory("foo", Source::Unknown);
231        let mut parser = Parser::new(&mut lexer, &EmptyGlossary);
232
233        let list = parser.list().now_or_never().unwrap().unwrap().unwrap();
234        assert_eq!(list.0.len(), 1);
235        assert_eq!(list.0[0].async_flag, None);
236        assert_eq!(list.0[0].and_or.to_string(), "foo");
237    }
238
239    #[test]
240    fn parser_list_one_item_with_last_semicolon() {
241        let mut lexer = Lexer::from_memory("foo;", Source::Unknown);
242        let mut parser = Parser::new(&mut lexer, &EmptyGlossary);
243
244        let list = parser.list().now_or_never().unwrap().unwrap().unwrap();
245        assert_eq!(list.0.len(), 1);
246        assert_eq!(list.0[0].async_flag, None);
247        assert_eq!(list.0[0].and_or.to_string(), "foo");
248    }
249
250    #[test]
251    fn parser_list_many_items() {
252        let mut lexer = Lexer::from_memory("foo & bar ; baz&", Source::Unknown);
253        let mut parser = Parser::new(&mut lexer, &EmptyGlossary);
254
255        let list = parser.list().now_or_never().unwrap().unwrap().unwrap();
256        assert_eq!(list.0.len(), 3);
257
258        let location = list.0[0].async_flag.as_ref().unwrap();
259        assert_eq!(*location.code.value.borrow(), "foo & bar ; baz&");
260        assert_eq!(location.code.start_line_number.get(), 1);
261        assert_eq!(*location.code.source, Source::Unknown);
262        assert_eq!(location.range, 4..5);
263        assert_eq!(list.0[0].and_or.to_string(), "foo");
264
265        assert_eq!(list.0[1].async_flag, None);
266        assert_eq!(list.0[1].and_or.to_string(), "bar");
267
268        let location = list.0[2].async_flag.as_ref().unwrap();
269        assert_eq!(*location.code.value.borrow(), "foo & bar ; baz&");
270        assert_eq!(location.code.start_line_number.get(), 1);
271        assert_eq!(*location.code.source, Source::Unknown);
272        assert_eq!(location.range, 15..16);
273        assert_eq!(list.0[2].and_or.to_string(), "baz");
274    }
275
276    #[test]
277    fn parser_command_line_eof() {
278        let mut lexer = Lexer::from_memory("", Source::Unknown);
279        let mut parser = Parser::new(&mut lexer, &EmptyGlossary);
280
281        let result = parser.command_line().now_or_never().unwrap().unwrap();
282        assert!(result.is_none());
283    }
284
285    #[test]
286    fn parser_command_line_command_and_newline() {
287        let mut lexer = Lexer::from_memory("<<END\nfoo\nEND\n", Source::Unknown);
288        let mut parser = Parser::new(&mut lexer, &EmptyGlossary);
289
290        let result = parser.command_line().now_or_never().unwrap();
291        let List(items) = result.unwrap().unwrap();
292        assert_eq!(items.len(), 1);
293        let item = items.first().unwrap();
294        assert_eq!(item.async_flag, None);
295        let AndOrList { first, rest } = &*item.and_or;
296        assert!(rest.is_empty(), "expected empty rest: {rest:?}");
297        let Pipeline { commands, negation } = first;
298        assert_eq!(*negation, false);
299        assert_eq!(commands.len(), 1);
300        let cmd = assert_matches!(*commands[0], Command::Simple(ref c) => c);
301        assert_eq!(cmd.words, []);
302        assert_eq!(cmd.redirs.len(), 1);
303        assert_eq!(cmd.redirs[0].fd, None);
304        assert_matches!(cmd.redirs[0].body, RedirBody::HereDoc(ref here_doc) => {
305            assert_eq!(here_doc.delimiter.to_string(), "END");
306            assert_eq!(here_doc.remove_tabs, false);
307            assert_eq!(here_doc.content.get().unwrap().to_string(), "foo\n");
308        });
309    }
310
311    #[test]
312    fn parser_command_line_command_without_newline() {
313        let mut lexer = Lexer::from_memory("foo", Source::Unknown);
314        let mut parser = Parser::new(&mut lexer, &EmptyGlossary);
315
316        let result = parser.command_line().now_or_never().unwrap();
317        let list = result.unwrap().unwrap();
318        assert_eq!(list.to_string(), "foo");
319    }
320
321    #[test]
322    fn parser_command_line_newline_only() {
323        let mut lexer = Lexer::from_memory("\n", Source::Unknown);
324        let mut parser = Parser::new(&mut lexer, &EmptyGlossary);
325
326        let result = parser.command_line().now_or_never().unwrap();
327        let list = result.unwrap().unwrap();
328        assert_eq!(list.0, []);
329    }
330
331    #[test]
332    fn parser_command_line_here_doc_without_newline() {
333        let mut lexer = Lexer::from_memory("<<END", Source::Unknown);
334        let mut parser = Parser::new(&mut lexer, &EmptyGlossary);
335
336        let e = parser.command_line().now_or_never().unwrap().unwrap_err();
337        assert_eq!(
338            e.cause,
339            ErrorCause::Syntax(SyntaxError::MissingHereDocContent)
340        );
341        assert_eq!(*e.location.code.value.borrow(), "<<END");
342        assert_eq!(e.location.code.start_line_number.get(), 1);
343        assert_eq!(*e.location.code.source, Source::Unknown);
344        assert_eq!(e.location.range, 2..5);
345    }
346
347    #[test]
348    fn parser_command_line_wrong_delimiter_1() {
349        let mut lexer = Lexer::from_memory("foo)", Source::Unknown);
350        let mut parser = Parser::new(&mut lexer, &EmptyGlossary);
351
352        let e = parser.command_line().now_or_never().unwrap().unwrap_err();
353        assert_eq!(e.cause, ErrorCause::Syntax(SyntaxError::UnopenedSubshell));
354        assert_eq!(*e.location.code.value.borrow(), "foo)");
355        assert_eq!(e.location.code.start_line_number.get(), 1);
356        assert_eq!(*e.location.code.source, Source::Unknown);
357        assert_eq!(e.location.range, 3..4);
358    }
359
360    #[test]
361    fn parser_command_line_wrong_delimiter_2() {
362        let mut lexer = Lexer::from_memory("foo bar (", Source::Unknown);
363        let mut parser = Parser::new(&mut lexer, &EmptyGlossary);
364
365        let e = parser.command_line().now_or_never().unwrap().unwrap_err();
366        assert_eq!(e.cause, ErrorCause::Syntax(SyntaxError::MissingSeparator));
367        assert_eq!(*e.location.code.value.borrow(), "foo bar (");
368        assert_eq!(e.location.code.start_line_number.get(), 1);
369        assert_eq!(*e.location.code.source, Source::Unknown);
370        assert_eq!(e.location.range, 8..9);
371    }
372
373    #[test]
374    fn parser_command_line_wrong_delimiter_3() {
375        let mut lexer = Lexer::from_memory("foo bar; ;", Source::Unknown);
376        let mut parser = Parser::new(&mut lexer, &EmptyGlossary);
377
378        let e = parser.command_line().now_or_never().unwrap().unwrap_err();
379        assert_eq!(
380            e.cause,
381            ErrorCause::Syntax(SyntaxError::InvalidCommandToken)
382        );
383        assert_eq!(*e.location.code.value.borrow(), "foo bar; ;");
384        assert_eq!(e.location.code.start_line_number.get(), 1);
385        assert_eq!(*e.location.code.source, Source::Unknown);
386        assert_eq!(e.location.range, 9..10);
387    }
388
389    #[test]
390    fn parser_maybe_compound_list_empty() {
391        let mut lexer = Lexer::from_memory("", Source::Unknown);
392        let mut parser = Parser::new(&mut lexer, &EmptyGlossary);
393
394        let result = parser.maybe_compound_list().now_or_never().unwrap();
395        let list = result.unwrap();
396        assert_eq!(list.0, []);
397    }
398
399    #[test]
400    fn parser_maybe_compound_list_some_commands() {
401        let mut lexer = Lexer::from_memory("echo; ls& cat", Source::Unknown);
402        let mut parser = Parser::new(&mut lexer, &EmptyGlossary);
403
404        let result = parser.maybe_compound_list().now_or_never().unwrap();
405        let list = result.unwrap();
406        assert_eq!(list.to_string(), "echo; ls& cat");
407    }
408
409    #[test]
410    fn parser_maybe_compound_list_some_commands_with_newline() {
411        let mut lexer = Lexer::from_memory("echo& ls\n\ncat\n\n", Source::Unknown);
412        let mut parser = Parser::new(&mut lexer, &EmptyGlossary);
413
414        let result = parser.maybe_compound_list().now_or_never().unwrap();
415        let list = result.unwrap();
416        assert_eq!(list.to_string(), "echo& ls; cat");
417
418        assert_eq!(lexer.index(), 15);
419    }
420
421    #[test]
422    fn parser_maybe_compound_list_empty_with_delimiter() {
423        let mut lexer = Lexer::from_memory("}", Source::Unknown);
424        let mut parser = Parser::new(&mut lexer, &EmptyGlossary);
425
426        let result = parser.maybe_compound_list().now_or_never().unwrap();
427        let list = result.unwrap();
428        assert_eq!(list.0, []);
429    }
430
431    // TODO Test maybe_compound_list with alias substitution
432
433    #[test]
434    fn parser_maybe_compound_list_empty_with_invalid_delimiter() {
435        let mut lexer = Lexer::from_memory(";", Source::Unknown);
436        let mut parser = Parser::new(&mut lexer, &EmptyGlossary);
437
438        let result = parser.maybe_compound_list().now_or_never().unwrap();
439        let e = result.unwrap_err();
440        assert_eq!(
441            e.cause,
442            ErrorCause::Syntax(SyntaxError::InvalidCommandToken)
443        );
444        assert_eq!(*e.location.code.value.borrow(), ";");
445        assert_eq!(e.location.code.start_line_number.get(), 1);
446        assert_eq!(*e.location.code.source, Source::Unknown);
447        assert_eq!(e.location.range, 0..1);
448    }
449
450    #[test]
451    fn parser_maybe_compound_list_some_commands_with_invalid_delimiter() {
452        let mut lexer = Lexer::from_memory("echo; ls\n &", Source::Unknown);
453        let mut parser = Parser::new(&mut lexer, &EmptyGlossary);
454
455        let result = parser.maybe_compound_list().now_or_never().unwrap();
456        let e = result.unwrap_err();
457        assert_eq!(
458            e.cause,
459            ErrorCause::Syntax(SyntaxError::InvalidCommandToken)
460        );
461        assert_eq!(*e.location.code.value.borrow(), "echo; ls\n &");
462        assert_eq!(e.location.code.start_line_number.get(), 1);
463        assert_eq!(*e.location.code.source, Source::Unknown);
464        assert_eq!(e.location.range, 10..11);
465    }
466}