yash_syntax/parser/
error.rs

1// This file is part of yash, an extended POSIX shell.
2// Copyright (C) 2020 WATANABE Yuki
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17//! Definition of errors that happen in the parser.
18
19use crate::source::pretty::Annotation;
20use crate::source::pretty::AnnotationType;
21use crate::source::pretty::MessageBase;
22use crate::source::Location;
23use crate::syntax::AndOr;
24use std::borrow::Cow;
25use std::rc::Rc;
26use thiserror::Error;
27
28/// Types of syntax errors.
29#[derive(Clone, Debug, Eq, Error, PartialEq)]
30#[error("{}", self.message())]
31#[non_exhaustive]
32pub enum SyntaxError {
33    /// A backslash is at the end of the input.
34    IncompleteEscape,
35    /// A backslash is not followed by a character that makes a valid escape.
36    InvalidEscape,
37    /// A `(` lacks a closing `)`.
38    UnclosedParen { opening_location: Location },
39    /// A single quotation lacks a closing `'`.
40    UnclosedSingleQuote { opening_location: Location },
41    /// A double quotation lacks a closing `"`.
42    UnclosedDoubleQuote { opening_location: Location },
43    /// A `$'` lacks a closing `'`.
44    UnclosedDollarSingleQuote { opening_location: Location },
45    /// A parameter expansion lacks a closing `}`.
46    UnclosedParam { opening_location: Location },
47    /// A parameter expansion lacks a name.
48    EmptyParam,
49    /// A parameter expansion has an invalid name.
50    InvalidParam,
51    /// A modifier does not have a valid form in a parameter expansion.
52    InvalidModifier,
53    /// A braced parameter expansion has both a prefix and suffix modifier.
54    MultipleModifier,
55    /// A command substitution started with `$(` but lacks a closing `)`.
56    UnclosedCommandSubstitution { opening_location: Location },
57    /// A command substitution started with `` ` `` but lacks a closing `` ` ``.
58    UnclosedBackquote { opening_location: Location },
59    /// An arithmetic expansion lacks a closing `))`.
60    UnclosedArith { opening_location: Location },
61    /// A command begins with an inappropriate keyword or operator token.
62    InvalidCommandToken,
63    /// A separator is missing between commands.
64    MissingSeparator,
65    /// The file descriptor specified for a redirection cannot be used.
66    FdOutOfRange,
67    /// A redirection operator is missing its operand.
68    MissingRedirOperand,
69    /// A here-document operator is missing its delimiter token.
70    MissingHereDocDelimiter,
71    /// A here-document operator is missing its corresponding content.
72    MissingHereDocContent,
73    /// A here-document content is missing its delimiter.
74    UnclosedHereDocContent { redir_op_location: Location },
75    /// An array assignment started with `=(` but lacks a closing `)`.
76    UnclosedArrayValue { opening_location: Location },
77    /// A `}` appears without a matching `{`.
78    UnopenedGrouping,
79    /// A grouping is not closed.
80    UnclosedGrouping { opening_location: Location },
81    /// A grouping contains no commands.
82    EmptyGrouping,
83    /// A `)` appears without a matching `(`.
84    UnopenedSubshell,
85    /// A subshell is not closed.
86    UnclosedSubshell { opening_location: Location },
87    /// A subshell contains no commands.
88    EmptySubshell,
89    /// A `do` appears outside a loop.
90    UnopenedLoop,
91    /// A `done` appears outside a loop.
92    UnopenedDoClause,
93    /// A do clause is not closed.
94    UnclosedDoClause { opening_location: Location },
95    /// A do clause contains no commands.
96    EmptyDoClause,
97    /// The variable name is missing in a for loop.
98    MissingForName,
99    /// The variable name is not a valid word in a for loop.
100    InvalidForName,
101    /// A value is not a valid word in a for loop.
102    InvalidForValue,
103    /// A for loop is missing a do clause.
104    MissingForBody { opening_location: Location },
105    /// A while loop is missing a do clause.
106    UnclosedWhileClause { opening_location: Location },
107    /// A while loop's condition is empty.
108    EmptyWhileCondition,
109    /// An until loop is missing a do clause.
110    UnclosedUntilClause { opening_location: Location },
111    /// An until loop's condition is empty.
112    EmptyUntilCondition,
113    /// An if command is missing the then clause.
114    IfMissingThen { if_location: Location },
115    /// An if command's condition is empty.
116    EmptyIfCondition,
117    /// An if command's body is empty.
118    EmptyIfBody,
119    /// An elif clause is missing the then clause.
120    ElifMissingThen { elif_location: Location },
121    /// An elif clause's condition is empty.
122    EmptyElifCondition,
123    /// An elif clause's body is empty.
124    EmptyElifBody,
125    /// An else clause is empty.
126    EmptyElse,
127    /// An `elif`, `else`, `then`, or `fi` appears outside an if command.
128    UnopenedIf,
129    /// An if command is not closed.
130    UnclosedIf { opening_location: Location },
131    /// The case command is missing its subject.
132    MissingCaseSubject,
133    /// The subject of the case command is not a valid word.
134    InvalidCaseSubject,
135    /// The case command is missing `in` after the subject.
136    MissingIn { opening_location: Location },
137    /// The `)` is missing in a case item.
138    UnclosedPatternList,
139    /// The pattern is missing in a case item.
140    MissingPattern,
141    /// The pattern is not a valid word token.
142    InvalidPattern,
143    /// The first pattern of a case item is `esac`.
144    #[deprecated = "this error no longer occurs"]
145    EsacAsPattern,
146    /// An `esac` or `;;` appears outside a case command.
147    UnopenedCase,
148    /// A case command is not closed.
149    UnclosedCase { opening_location: Location },
150    /// The `(` is not followed by `)` in a function definition.
151    UnmatchedParenthesis,
152    /// The function body is missing in a function definition command.
153    MissingFunctionBody,
154    /// A function body is not a compound command.
155    InvalidFunctionBody,
156    /// The keyword `in` is used as a command name.
157    InAsCommandName,
158    /// A pipeline is missing after a `&&` or `||` token.
159    MissingPipeline(AndOr),
160    /// Two successive `!` tokens.
161    DoubleNegation,
162    /// A `|` token is followed by a `!`.
163    BangAfterBar,
164    /// A command is missing after a `!` token.
165    MissingCommandAfterBang,
166    /// A command is missing after a `|` token.
167    MissingCommandAfterBar,
168    /// There is a redundant token.
169    RedundantToken,
170    /// A control escape (`\c...`) is incomplete in a dollar-single-quoted string.
171    IncompleteControlEscape,
172    /// A control-backslash escape (`\c\\`) is incomplete in a dollar-single-quoted string.
173    IncompleteControlBackslashEscape,
174    /// A control escape (`\c...`) does not have a valid control character.
175    InvalidControlEscape,
176    /// An octal escape is out of range (greater than `\377`) in a dollar-single-quoted string.
177    OctalEscapeOutOfRange,
178    /// An hexadecimal escape (`\x...`) is incomplete in a dollar-single-quoted string.
179    IncompleteHexEscape,
180    /// A Unicode escape (`\u...`) is incomplete in a dollar-single-quoted string.
181    IncompleteShortUnicodeEscape,
182    /// A Unicode escape (`\U...`) is incomplete in a dollar-single-quoted string.
183    IncompleteLongUnicodeEscape,
184    /// A Unicode escape (`\u...` or `\U...`) is out of range in a dollar-single-quoted string.
185    UnicodeEscapeOutOfRange,
186}
187
188impl SyntaxError {
189    /// Returns an error message describing the error.
190    #[must_use]
191    pub fn message(&self) -> &'static str {
192        use SyntaxError::*;
193        match self {
194            IncompleteEscape => "the backslash is escaping nothing",
195            InvalidEscape => "the backslash escape is invalid",
196            UnclosedParen { .. } => "the parenthesis is not closed",
197            UnclosedSingleQuote { .. } => "the single quote is not closed",
198            UnclosedDoubleQuote { .. } => "the double quote is not closed",
199            UnclosedDollarSingleQuote { .. } => "the dollar single quote is not closed",
200            UnclosedParam { .. } => "the parameter expansion is not closed",
201            EmptyParam => "the parameter name is missing",
202            InvalidParam => "the parameter name is invalid",
203            InvalidModifier => "the parameter expansion contains a malformed modifier",
204            MultipleModifier => "a suffix modifier cannot be used together with a prefix modifier",
205            UnclosedCommandSubstitution { .. } => "the command substitution is not closed",
206            UnclosedBackquote { .. } => "the backquote is not closed",
207            UnclosedArith { .. } => "the arithmetic expansion is not closed",
208            InvalidCommandToken => "the command starts with an inappropriate token",
209            MissingSeparator => "a separator is missing between the commands",
210            FdOutOfRange => "the file descriptor is too large",
211            MissingRedirOperand => "the redirection operator is missing its operand",
212            MissingHereDocDelimiter => "the here-document operator is missing its delimiter",
213            MissingHereDocContent => "content of the here-document is missing",
214            UnclosedHereDocContent { .. } => {
215                "the delimiter to close the here-document content is missing"
216            }
217            UnclosedArrayValue { .. } => "the array assignment value is not closed",
218            UnopenedGrouping | UnopenedSubshell | UnopenedLoop | UnopenedDoClause | UnopenedIf
219            | UnopenedCase | InAsCommandName => "the compound command delimiter is unmatched",
220            UnclosedGrouping { .. } => "the grouping is not closed",
221            EmptyGrouping => "the grouping is missing its content",
222            UnclosedSubshell { .. } => "the subshell is not closed",
223            EmptySubshell => "the subshell is missing its content",
224            UnclosedDoClause { .. } => "the `do` clause is missing its closing `done`",
225            EmptyDoClause => "the `do` clause is missing its content",
226            MissingForName => "the variable name is missing in the `for` loop",
227            InvalidForName => "the variable name is invalid",
228            InvalidForValue => "the operator token is invalid in the word list of the `for` loop",
229            MissingForBody { .. } => "the `for` loop is missing its `do` clause",
230            UnclosedWhileClause { .. } => "the `while` loop is missing its `do` clause",
231            EmptyWhileCondition => "the `while` loop is missing its condition",
232            UnclosedUntilClause { .. } => "the `until` loop is missing its `do` clause",
233            EmptyUntilCondition => "the `until` loop is missing its condition",
234            IfMissingThen { .. } => "the `if` command is missing the `then` clause",
235            EmptyIfCondition => "the `if` command is missing its condition",
236            EmptyIfBody => "the `if` command is missing its body",
237            ElifMissingThen { .. } => "the `elif` clause is missing the `then` clause",
238            EmptyElifCondition => "the `elif` clause is missing its condition",
239            EmptyElifBody => "the `elif` clause is missing its body",
240            EmptyElse => "the `else` clause is missing its content",
241            UnclosedIf { .. } => "the `if` command is missing its closing `fi`",
242            MissingCaseSubject => "the subject is missing after `case`",
243            InvalidCaseSubject => "the `case` command subject is not a valid word",
244            MissingIn { .. } => "`in` is missing in the `case` command",
245            UnclosedPatternList => "the pattern list is not properly closed by a `)`",
246            MissingPattern => "a pattern is missing in the `case` command",
247            InvalidPattern => "the pattern is not a valid word token",
248            #[allow(deprecated)]
249            EsacAsPattern => "`esac` cannot be the first of a pattern list",
250            UnclosedCase { .. } => "the `case` command is missing its closing `esac`",
251            UnmatchedParenthesis => "`)` is missing after `(`",
252            MissingFunctionBody => "the function body is missing",
253            InvalidFunctionBody => "the function body must be a compound command",
254            MissingPipeline(AndOr::AndThen) => "a command is missing after `&&`",
255            MissingPipeline(AndOr::OrElse) => "a command is missing after `||`",
256            DoubleNegation => "`!` cannot be used twice in a row",
257            BangAfterBar => "`!` cannot be used in the middle of a pipeline",
258            MissingCommandAfterBang => "a command is missing after `!`",
259            MissingCommandAfterBar => "a command is missing after `|`",
260            RedundantToken => "there is a redundant token",
261            IncompleteControlEscape => "the control escape is incomplete",
262            IncompleteControlBackslashEscape => "the control-backslash escape is incomplete",
263            InvalidControlEscape => "the control escape is invalid",
264            OctalEscapeOutOfRange => "the octal escape is out of range",
265            IncompleteHexEscape => "the hexadecimal escape is incomplete",
266            IncompleteShortUnicodeEscape | IncompleteLongUnicodeEscape => {
267                "the Unicode escape is incomplete"
268            }
269            UnicodeEscapeOutOfRange => "the Unicode escape is out of range",
270        }
271    }
272
273    /// Returns a label for annotating the error location.
274    #[must_use]
275    pub fn label(&self) -> &'static str {
276        use SyntaxError::*;
277        match self {
278            IncompleteEscape => "expected an escaped character after the backslash",
279            InvalidEscape => "invalid escape sequence",
280            UnclosedParen { .. }
281            | UnclosedCommandSubstitution { .. }
282            | UnclosedArrayValue { .. }
283            | UnclosedSubshell { .. }
284            | UnclosedPatternList
285            | UnmatchedParenthesis => "expected `)`",
286            EmptyGrouping
287            | EmptySubshell
288            | EmptyDoClause
289            | EmptyWhileCondition
290            | EmptyUntilCondition
291            | EmptyIfCondition
292            | EmptyIfBody
293            | EmptyElifCondition
294            | EmptyElifBody
295            | EmptyElse
296            | MissingPipeline(_)
297            | MissingCommandAfterBang
298            | MissingCommandAfterBar => "expected a command",
299            InvalidForValue | MissingCaseSubject | InvalidCaseSubject | MissingPattern
300            | InvalidPattern => "expected a word",
301            UnclosedSingleQuote { .. } | UnclosedDollarSingleQuote { .. } => "expected `'`",
302            UnclosedDoubleQuote { .. } => "expected `\"`",
303            UnclosedParam { .. } | UnclosedGrouping { .. } => "expected `}`",
304            EmptyParam => "expected a parameter name",
305            InvalidParam => "not a valid named or positional parameter",
306            InvalidModifier => "broken modifier",
307            MultipleModifier => "conflicting modifier",
308            UnclosedBackquote { .. } => "expected '`'",
309            UnclosedArith { .. } => "expected `))`",
310            InvalidCommandToken => "does not begin a valid command",
311            MissingSeparator => "expected `;` or `&` before this token",
312            FdOutOfRange => "unsupported file descriptor",
313            MissingRedirOperand => "expected a redirection operand",
314            MissingHereDocDelimiter => "expected a delimiter word",
315            MissingHereDocContent => "content not found",
316            UnclosedHereDocContent { .. } => "missing delimiter",
317            UnopenedGrouping => "no grouping command to close",
318            UnopenedSubshell => "no subshell to close",
319            UnopenedLoop => "not in a loop",
320            UnopenedDoClause => "no `do` clause to close",
321            UnclosedDoClause { .. } => "expected `done`",
322            MissingForName => "expected a variable name",
323            InvalidForName => "not a valid variable name",
324            MissingForBody { .. } | UnclosedWhileClause { .. } | UnclosedUntilClause { .. } => {
325                "expected `do ... done`"
326            }
327            IfMissingThen { .. } | ElifMissingThen { .. } => "expected `then ... fi`",
328            UnopenedIf => "not in an `if` command",
329            UnclosedIf { .. } => "expected `fi`",
330            MissingIn { .. } => "expected `in`",
331            #[allow(deprecated)]
332            EsacAsPattern => "needs quoting",
333            UnopenedCase => "not in a `case` command",
334            UnclosedCase { .. } => "expected `esac`",
335            MissingFunctionBody | InvalidFunctionBody => "expected a compound command",
336            InAsCommandName => "cannot be used as a command name",
337            DoubleNegation => "only one `!` allowed",
338            BangAfterBar => "`!` not allowed here",
339            RedundantToken => "unexpected token",
340            IncompleteControlEscape => r"expected a control character after `\c`",
341            IncompleteControlBackslashEscape => r"expected another backslash after `\c\`",
342            InvalidControlEscape => "not a valid control character",
343            OctalEscapeOutOfRange => r"expected a value between \0 and \377",
344            IncompleteHexEscape => r"expected a hexadecimal digit after `\x`",
345            IncompleteShortUnicodeEscape => r"expected a hexadecimal digit after `\u`",
346            IncompleteLongUnicodeEscape => r"expected a hexadecimal digit after `\U`",
347            UnicodeEscapeOutOfRange => "not a valid Unicode scalar value",
348        }
349    }
350
351    /// Returns a location related with the error cause and a message describing
352    /// the location.
353    #[must_use]
354    pub fn related_location(&self) -> Option<(&Location, &'static str)> {
355        use SyntaxError::*;
356        match self {
357            UnclosedParen { opening_location }
358            | UnclosedSubshell { opening_location }
359            | UnclosedArrayValue { opening_location } => {
360                Some((opening_location, "the opening parenthesis was here"))
361            }
362            UnclosedSingleQuote { opening_location }
363            | UnclosedDoubleQuote { opening_location }
364            | UnclosedDollarSingleQuote { opening_location } => {
365                Some((opening_location, "the opening quote was here"))
366            }
367            UnclosedParam { opening_location } => {
368                Some((opening_location, "the parameter started here"))
369            }
370            UnclosedCommandSubstitution { opening_location } => {
371                Some((opening_location, "the command substitution started here"))
372            }
373            UnclosedBackquote { opening_location } => {
374                Some((opening_location, "the opening backquote was here"))
375            }
376            UnclosedArith { opening_location } => {
377                Some((opening_location, "the arithmetic expansion started here"))
378            }
379            UnclosedHereDocContent { redir_op_location } => {
380                Some((redir_op_location, "the redirection operator was here"))
381            }
382            UnclosedGrouping { opening_location } => {
383                Some((opening_location, "the opening brace was here"))
384            }
385            UnclosedDoClause { opening_location } => {
386                Some((opening_location, "the `do` clause started here"))
387            }
388            MissingForBody { opening_location } => {
389                Some((opening_location, "the `for` loop started here"))
390            }
391            UnclosedWhileClause { opening_location } => {
392                Some((opening_location, "the `while` loop started here"))
393            }
394            UnclosedUntilClause { opening_location } => {
395                Some((opening_location, "the `until` loop started here"))
396            }
397            IfMissingThen { if_location }
398            | UnclosedIf {
399                opening_location: if_location,
400            } => Some((if_location, "the `if` command started here")),
401            ElifMissingThen { elif_location } => {
402                Some((elif_location, "the `elif` clause started here"))
403            }
404            MissingIn { opening_location } | UnclosedCase { opening_location } => {
405                Some((opening_location, "the `case` command started here"))
406            }
407            _ => None,
408        }
409    }
410}
411
412/// Types of errors that may happen in parsing.
413#[derive(Clone, Debug, Error)]
414#[error("{}", self.message())]
415pub enum ErrorCause {
416    /// Error in an underlying input function.
417    Io(#[from] Rc<std::io::Error>),
418    /// Syntax error.
419    Syntax(#[from] SyntaxError),
420}
421
422impl PartialEq for ErrorCause {
423    fn eq(&self, other: &Self) -> bool {
424        match (self, other) {
425            (ErrorCause::Syntax(e1), ErrorCause::Syntax(e2)) => e1 == e2,
426            _ => false,
427        }
428    }
429}
430
431impl ErrorCause {
432    /// Returns an error message describing the error cause.
433    #[must_use]
434    pub fn message(&self) -> Cow<'static, str> {
435        use ErrorCause::*;
436        match self {
437            Io(e) => format!("cannot read commands: {e}").into(),
438            Syntax(e) => e.message().into(),
439        }
440    }
441
442    /// Returns a label for annotating the error location.
443    #[must_use]
444    pub fn label(&self) -> &'static str {
445        use ErrorCause::*;
446        match self {
447            Io(_) => "the command could be read up to here",
448            Syntax(e) => e.label(),
449        }
450    }
451
452    /// Returns a location related with the error cause and a message describing
453    /// the location.
454    #[must_use]
455    pub fn related_location(&self) -> Option<(&Location, &'static str)> {
456        use ErrorCause::*;
457        match self {
458            Io(_) => None,
459            Syntax(e) => e.related_location(),
460        }
461    }
462}
463
464impl From<std::io::Error> for ErrorCause {
465    fn from(e: std::io::Error) -> ErrorCause {
466        ErrorCause::from(Rc::new(e))
467    }
468}
469
470/// Explanation of a failure in parsing.
471#[derive(Clone, Debug, Error, PartialEq)]
472#[error("{cause}")]
473pub struct Error {
474    pub cause: ErrorCause,
475    pub location: Location,
476}
477
478impl MessageBase for Error {
479    fn message_title(&self) -> Cow<str> {
480        self.cause.message()
481    }
482
483    fn main_annotation(&self) -> Annotation {
484        Annotation::new(
485            AnnotationType::Error,
486            self.cause.label().into(),
487            &self.location,
488        )
489    }
490
491    fn additional_annotations<'a, T: Extend<Annotation<'a>>>(&'a self, results: &mut T) {
492        // TODO Use Extend::extend_one
493        if let Some((location, label)) = self.cause.related_location() {
494            results.extend(std::iter::once(Annotation::new(
495                AnnotationType::Info,
496                label.into(),
497                location,
498            )));
499        }
500        if let ErrorCause::Syntax(SyntaxError::BangAfterBar) = &self.cause {
501            results.extend(std::iter::once(Annotation::new(
502                AnnotationType::Help,
503                "surround this in a grouping: `{ ! ...; }`".into(),
504                &self.location,
505            )));
506        }
507    }
508}
509
510#[cfg(test)]
511mod tests {
512    use super::*;
513    use crate::source::pretty::Message;
514    use crate::source::Code;
515    use crate::source::Source;
516    use std::num::NonZeroU64;
517    use std::rc::Rc;
518
519    #[test]
520    fn display_for_error() {
521        let code = Rc::new(Code {
522            value: "".to_string().into(),
523            start_line_number: NonZeroU64::new(1).unwrap(),
524            source: Source::Unknown.into(),
525        });
526        let location = Location { code, range: 0..42 };
527        let error = Error {
528            cause: SyntaxError::MissingHereDocDelimiter.into(),
529            location,
530        };
531        assert_eq!(
532            error.to_string(),
533            "the here-document operator is missing its delimiter"
534        );
535    }
536
537    #[test]
538    fn from_error_for_message() {
539        let code = Rc::new(Code {
540            value: "".to_string().into(),
541            start_line_number: NonZeroU64::new(1).unwrap(),
542            source: Source::Unknown.into(),
543        });
544        let location = Location { code, range: 0..42 };
545        let error = Error {
546            cause: SyntaxError::MissingHereDocDelimiter.into(),
547            location,
548        };
549        let message = Message::from(&error);
550        assert_eq!(message.r#type, AnnotationType::Error);
551        assert_eq!(
552            message.title,
553            "the here-document operator is missing its delimiter"
554        );
555        assert_eq!(message.annotations.len(), 1);
556        assert_eq!(message.annotations[0].r#type, AnnotationType::Error);
557        assert_eq!(message.annotations[0].label, "expected a delimiter word");
558        assert_eq!(message.annotations[0].location, &error.location);
559    }
560}