Skip to main content

yash_syntax/parser/
list.rs

1// This file is part of yash, an extended POSIX shell.
2// Copyright (C) 2020 WATANABE Yuki
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17//! Syntax parser for list and compound list
18
19use super::core::Parser;
20use super::core::Rec;
21use super::core::Result;
22use super::error::Error;
23use super::error::SyntaxError;
24use super::lex::Operator::{And, Newline, Semicolon};
25use super::lex::TokenId::{self, EndOfInput, IoLocation, IoNumber, Operator, Token};
26use crate::syntax::Item;
27use crate::syntax::List;
28use std::pin::Pin;
29use std::rc::Rc;
30
31fn error_type_for_trailing_token_in_command_line(token_id: TokenId) -> Option<SyntaxError> {
32    use super::lex::Keyword::*;
33    use super::lex::Operator::*;
34    use SyntaxError::*;
35    match token_id {
36        EndOfInput => None,
37        Token(None) | IoNumber | IoLocation => Some(MissingSeparator),
38        Token(Some(keyword)) => match keyword {
39            Bang | OpenBracketBracket | Case | For | Function | If | Until | While | OpenBrace => {
40                Some(MissingSeparator)
41            }
42            Do => Some(UnopenedLoop),
43            Done => Some(UnopenedDoClause),
44            Elif | Else | Fi | Then => Some(UnopenedIf),
45            Esac => Some(UnopenedCase),
46            In => Some(InAsCommandName),
47            CloseBrace => Some(UnopenedGrouping),
48        },
49        Operator(operator) => match operator {
50            And | AndAnd | Semicolon | Bar | BarBar => Some(InvalidCommandToken),
51            OpenParen => Some(MissingSeparator),
52            CloseParen => Some(UnopenedSubshell),
53            SemicolonAnd | SemicolonSemicolon | SemicolonSemicolonAnd | SemicolonBar => {
54                Some(UnopenedCase)
55            }
56            Newline | Less | LessAnd | LessOpenParen | LessLess | LessLessDash | LessLessLess
57            | LessGreater | Greater | GreaterAnd | GreaterOpenParen | GreaterGreater
58            | GreaterGreaterBar | GreaterBar => unreachable!(),
59        },
60    }
61}
62
63impl Parser<'_, '_> {
64    // There is no function that parses a single item because it would not be
65    // very useful for parsing a list. An item requires a separator operator
66    // ('&' or ';') for it to be followed by another item. You cannot tell from
67    // the resultant item whether there was a separator operator.
68    // pub async fn item(&mut self) -> Result<Rec<Item>> { }
69
70    /// Parses a list.
71    ///
72    /// This function parses a sequence of and-or lists that are separated by `;`
73    /// or `&`. A newline token that delimits the list is not parsed.
74    ///
75    /// If there is no valid command at the current position, this function
76    /// returns a list with no items.
77    pub async fn list(&mut self) -> Result<Rec<List>> {
78        let mut items = vec![];
79
80        let mut result = match self.and_or_list().await? {
81            Rec::AliasSubstituted => return Ok(Rec::AliasSubstituted),
82            Rec::Parsed(result) => result,
83        };
84
85        while let Some(and_or) = result {
86            let token = self.peek_token().await?;
87            let (async_flag, next) = match token.id {
88                Operator(Semicolon) => (None, true),
89                Operator(And) => (Some(token.word.location.clone()), true),
90                _ => (None, false),
91            };
92
93            let and_or = Rc::new(and_or);
94            items.push(Item { and_or, async_flag });
95
96            if !next {
97                break;
98            }
99            self.take_token_raw().await?;
100
101            result = loop {
102                if let Rec::Parsed(result) = self.and_or_list().await? {
103                    break result;
104                }
105            };
106        }
107
108        Ok(Rec::Parsed(List(items)))
109    }
110
111    // TODO Consider returning Result<Result<(), &Token>, Error>
112    /// Parses an optional newline token and here-document contents.
113    ///
114    /// If the current token is a newline, it is consumed and any pending here-document contents
115    /// are read starting from the next line. Otherwise, this function returns `Ok(false)` without
116    /// any side effect.
117    pub async fn newline_and_here_doc_contents(&mut self) -> Result<bool> {
118        if self.peek_token().await?.id != Operator(Newline) {
119            return Ok(false);
120        }
121
122        self.take_token_raw().await?;
123        self.here_doc_contents().await?;
124        Ok(true)
125    }
126
127    /// Parses a complete command optionally delimited by a newline.
128    ///
129    /// A complete command is a minimal sequence of and-or lists that can be executed in the shell
130    /// environment. This function reads as many lines as needed to compose the complete command.
131    ///
132    /// If the current line is empty (or containing only whitespaces and comments), the result is
133    /// an empty list. If the first token of the current line is the end of input, the result is
134    /// `Ok(None)`.
135    pub async fn command_line(&mut self) -> Result<Option<List>> {
136        let list = loop {
137            if let Rec::Parsed(list) = self.list().await? {
138                break list;
139            }
140        };
141
142        if !self.newline_and_here_doc_contents().await? {
143            let next = self.peek_token().await?;
144            if let Some(syntax_error) = error_type_for_trailing_token_in_command_line(next.id) {
145                let cause = syntax_error.into();
146                let location = next.word.location.clone();
147                return Err(Error { cause, location });
148            }
149            if list.0.is_empty() {
150                return Ok(None);
151            }
152        }
153
154        self.ensure_no_unread_here_doc()?;
155        Ok(Some(list))
156    }
157
158    /// Parses an optional compound list.
159    ///
160    /// A compound list is a sequence of one or more and-or lists that are
161    /// separated by newlines and optionally preceded and/or followed by
162    /// newlines.
163    ///
164    /// This function stops parsing on encountering an unexpected token that
165    /// cannot be parsed as the beginning of an and-or list. If the token is a
166    /// possible [clause delimiter](super::lex::TokenId::is_clause_delimiter),
167    /// the result is a list of commands that have been parsed up to the token.
168    /// Otherwise, an `InvalidCommandToken` error is returned.
169    pub async fn maybe_compound_list(&mut self) -> Result<List> {
170        let mut items = vec![];
171
172        loop {
173            let list = loop {
174                if let Rec::Parsed(list) = self.list().await? {
175                    break list;
176                }
177            };
178            items.extend(list.0);
179
180            if !self.newline_and_here_doc_contents().await? {
181                break;
182            }
183        }
184
185        let next = self.peek_token().await?;
186        if next.id.is_clause_delimiter() {
187            Ok(List(items))
188        } else {
189            let cause = SyntaxError::InvalidCommandToken.into();
190            let location = next.word.location.clone();
191            Err(Error { cause, location })
192        }
193    }
194
195    /// Like [`maybe_compound_list`](Self::maybe_compound_list), but returns the future in a pinning box.
196    pub fn maybe_compound_list_boxed(
197        &mut self,
198    ) -> Pin<Box<dyn Future<Output = Result<List>> + '_>> {
199        Box::pin(self.maybe_compound_list())
200    }
201}
202
203#[allow(
204    clippy::bool_assert_comparison,
205    reason = "to make the expected values clearer"
206)]
207#[cfg(test)]
208mod tests {
209    use super::super::error::ErrorCause;
210    use super::super::lex::Lexer;
211    use super::*;
212    use crate::source::Source;
213    use crate::syntax::AndOrList;
214    use crate::syntax::Command;
215    use crate::syntax::Pipeline;
216    use crate::syntax::RedirBody;
217    use assert_matches::assert_matches;
218    use futures_util::FutureExt as _;
219
220    #[test]
221    fn parser_list_eof() {
222        let mut lexer = Lexer::with_code("");
223        let mut parser = Parser::new(&mut lexer);
224
225        let list = parser.list().now_or_never().unwrap().unwrap().unwrap();
226        assert_eq!(list.0, vec![]);
227    }
228
229    #[test]
230    fn parser_list_one_item_without_last_semicolon() {
231        let mut lexer = Lexer::with_code("foo");
232        let mut parser = Parser::new(&mut lexer);
233
234        let list = parser.list().now_or_never().unwrap().unwrap().unwrap();
235        assert_eq!(list.0.len(), 1);
236        assert_eq!(list.0[0].async_flag, None);
237        assert_eq!(list.0[0].and_or.to_string(), "foo");
238    }
239
240    #[test]
241    fn parser_list_one_item_with_last_semicolon() {
242        let mut lexer = Lexer::with_code("foo;");
243        let mut parser = Parser::new(&mut lexer);
244
245        let list = parser.list().now_or_never().unwrap().unwrap().unwrap();
246        assert_eq!(list.0.len(), 1);
247        assert_eq!(list.0[0].async_flag, None);
248        assert_eq!(list.0[0].and_or.to_string(), "foo");
249    }
250
251    #[test]
252    fn parser_list_many_items() {
253        let mut lexer = Lexer::with_code("foo & bar ; baz&");
254        let mut parser = Parser::new(&mut lexer);
255
256        let list = parser.list().now_or_never().unwrap().unwrap().unwrap();
257        assert_eq!(list.0.len(), 3);
258
259        let location = list.0[0].async_flag.as_ref().unwrap();
260        assert_eq!(*location.code.value.borrow(), "foo & bar ; baz&");
261        assert_eq!(location.code.start_line_number.get(), 1);
262        assert_eq!(*location.code.source, Source::Unknown);
263        assert_eq!(location.range, 4..5);
264        assert_eq!(list.0[0].and_or.to_string(), "foo");
265
266        assert_eq!(list.0[1].async_flag, None);
267        assert_eq!(list.0[1].and_or.to_string(), "bar");
268
269        let location = list.0[2].async_flag.as_ref().unwrap();
270        assert_eq!(*location.code.value.borrow(), "foo & bar ; baz&");
271        assert_eq!(location.code.start_line_number.get(), 1);
272        assert_eq!(*location.code.source, Source::Unknown);
273        assert_eq!(location.range, 15..16);
274        assert_eq!(list.0[2].and_or.to_string(), "baz");
275    }
276
277    #[test]
278    fn parser_command_line_eof() {
279        let mut lexer = Lexer::with_code("");
280        let mut parser = Parser::new(&mut lexer);
281
282        let result = parser.command_line().now_or_never().unwrap().unwrap();
283        assert!(result.is_none());
284    }
285
286    #[test]
287    fn parser_command_line_command_and_newline() {
288        let mut lexer = Lexer::with_code("<<END\nfoo\nEND\n");
289        let mut parser = Parser::new(&mut lexer);
290
291        let result = parser.command_line().now_or_never().unwrap();
292        let List(items) = result.unwrap().unwrap();
293        assert_eq!(items.len(), 1);
294        let item = items.first().unwrap();
295        assert_eq!(item.async_flag, None);
296        let AndOrList { first, rest } = &*item.and_or;
297        assert!(rest.is_empty(), "expected empty rest: {rest:?}");
298        let Pipeline { commands, negation } = first;
299        assert_eq!(*negation, false);
300        assert_eq!(commands.len(), 1);
301        let cmd = assert_matches!(*commands[0], Command::Simple(ref c) => c);
302        assert_eq!(cmd.words, []);
303        assert_eq!(cmd.redirs.len(), 1);
304        assert_eq!(cmd.redirs[0].fd, None);
305        assert_matches!(cmd.redirs[0].body, RedirBody::HereDoc(ref here_doc) => {
306            assert_eq!(here_doc.delimiter.to_string(), "END");
307            assert_eq!(here_doc.remove_tabs, false);
308            assert_eq!(here_doc.content.get().unwrap().to_string(), "foo\n");
309        });
310    }
311
312    #[test]
313    fn parser_command_line_command_without_newline() {
314        let mut lexer = Lexer::with_code("foo");
315        let mut parser = Parser::new(&mut lexer);
316
317        let result = parser.command_line().now_or_never().unwrap();
318        let list = result.unwrap().unwrap();
319        assert_eq!(list.to_string(), "foo");
320    }
321
322    #[test]
323    fn parser_command_line_newline_only() {
324        let mut lexer = Lexer::with_code("\n");
325        let mut parser = Parser::new(&mut lexer);
326
327        let result = parser.command_line().now_or_never().unwrap();
328        let list = result.unwrap().unwrap();
329        assert_eq!(list.0, []);
330    }
331
332    #[test]
333    fn parser_command_line_here_doc_without_newline() {
334        let mut lexer = Lexer::with_code("<<END");
335        let mut parser = Parser::new(&mut lexer);
336
337        let e = parser.command_line().now_or_never().unwrap().unwrap_err();
338        assert_eq!(
339            e.cause,
340            ErrorCause::Syntax(SyntaxError::MissingHereDocContent)
341        );
342        assert_eq!(*e.location.code.value.borrow(), "<<END");
343        assert_eq!(e.location.code.start_line_number.get(), 1);
344        assert_eq!(*e.location.code.source, Source::Unknown);
345        assert_eq!(e.location.range, 2..5);
346    }
347
348    #[test]
349    fn parser_command_line_wrong_delimiter_1() {
350        let mut lexer = Lexer::with_code("foo)");
351        let mut parser = Parser::new(&mut lexer);
352
353        let e = parser.command_line().now_or_never().unwrap().unwrap_err();
354        assert_eq!(e.cause, ErrorCause::Syntax(SyntaxError::UnopenedSubshell));
355        assert_eq!(*e.location.code.value.borrow(), "foo)");
356        assert_eq!(e.location.code.start_line_number.get(), 1);
357        assert_eq!(*e.location.code.source, Source::Unknown);
358        assert_eq!(e.location.range, 3..4);
359    }
360
361    #[test]
362    fn parser_command_line_wrong_delimiter_2() {
363        let mut lexer = Lexer::with_code("foo bar (");
364        let mut parser = Parser::new(&mut lexer);
365
366        let e = parser.command_line().now_or_never().unwrap().unwrap_err();
367        assert_eq!(e.cause, ErrorCause::Syntax(SyntaxError::MissingSeparator));
368        assert_eq!(*e.location.code.value.borrow(), "foo bar (");
369        assert_eq!(e.location.code.start_line_number.get(), 1);
370        assert_eq!(*e.location.code.source, Source::Unknown);
371        assert_eq!(e.location.range, 8..9);
372    }
373
374    #[test]
375    fn parser_command_line_wrong_delimiter_3() {
376        let mut lexer = Lexer::with_code("foo bar; ;");
377        let mut parser = Parser::new(&mut lexer);
378
379        let e = parser.command_line().now_or_never().unwrap().unwrap_err();
380        assert_eq!(
381            e.cause,
382            ErrorCause::Syntax(SyntaxError::InvalidCommandToken)
383        );
384        assert_eq!(*e.location.code.value.borrow(), "foo bar; ;");
385        assert_eq!(e.location.code.start_line_number.get(), 1);
386        assert_eq!(*e.location.code.source, Source::Unknown);
387        assert_eq!(e.location.range, 9..10);
388    }
389
390    #[test]
391    fn parser_maybe_compound_list_empty() {
392        let mut lexer = Lexer::with_code("");
393        let mut parser = Parser::new(&mut lexer);
394
395        let result = parser.maybe_compound_list().now_or_never().unwrap();
396        let list = result.unwrap();
397        assert_eq!(list.0, []);
398    }
399
400    #[test]
401    fn parser_maybe_compound_list_some_commands() {
402        let mut lexer = Lexer::with_code("echo; ls& cat");
403        let mut parser = Parser::new(&mut lexer);
404
405        let result = parser.maybe_compound_list().now_or_never().unwrap();
406        let list = result.unwrap();
407        assert_eq!(list.to_string(), "echo; ls& cat");
408    }
409
410    #[test]
411    fn parser_maybe_compound_list_some_commands_with_newline() {
412        let mut lexer = Lexer::with_code("echo& ls\n\ncat\n\n");
413        let mut parser = Parser::new(&mut lexer);
414
415        let result = parser.maybe_compound_list().now_or_never().unwrap();
416        let list = result.unwrap();
417        assert_eq!(list.to_string(), "echo& ls; cat");
418
419        assert_eq!(lexer.index(), 15);
420    }
421
422    #[test]
423    fn parser_maybe_compound_list_empty_with_delimiter() {
424        let mut lexer = Lexer::with_code("}");
425        let mut parser = Parser::new(&mut lexer);
426
427        let result = parser.maybe_compound_list().now_or_never().unwrap();
428        let list = result.unwrap();
429        assert_eq!(list.0, []);
430    }
431
432    // TODO Test maybe_compound_list with alias substitution
433
434    #[test]
435    fn parser_maybe_compound_list_empty_with_invalid_delimiter() {
436        let mut lexer = Lexer::with_code(";");
437        let mut parser = Parser::new(&mut lexer);
438
439        let result = parser.maybe_compound_list().now_or_never().unwrap();
440        let e = result.unwrap_err();
441        assert_eq!(
442            e.cause,
443            ErrorCause::Syntax(SyntaxError::InvalidCommandToken)
444        );
445        assert_eq!(*e.location.code.value.borrow(), ";");
446        assert_eq!(e.location.code.start_line_number.get(), 1);
447        assert_eq!(*e.location.code.source, Source::Unknown);
448        assert_eq!(e.location.range, 0..1);
449    }
450
451    #[test]
452    fn parser_maybe_compound_list_some_commands_with_invalid_delimiter() {
453        let mut lexer = Lexer::with_code("echo; ls\n &");
454        let mut parser = Parser::new(&mut lexer);
455
456        let result = parser.maybe_compound_list().now_or_never().unwrap();
457        let e = result.unwrap_err();
458        assert_eq!(
459            e.cause,
460            ErrorCause::Syntax(SyntaxError::InvalidCommandToken)
461        );
462        assert_eq!(*e.location.code.value.borrow(), "echo; ls\n &");
463        assert_eq!(e.location.code.start_line_number.get(), 1);
464        assert_eq!(*e.location.code.source, Source::Unknown);
465        assert_eq!(e.location.range, 10..11);
466    }
467}