pomsky_syntax/
error.rs

1//! Module containing all the errors that can occur during parsing
2
3use std::{
4    fmt,
5    num::{IntErrorKind, ParseIntError},
6};
7
8use crate::{Span, lexer::Token};
9
10pub use crate::lexer::LexErrorMsg;
11
12/// An error than can occur only during parsing
13#[derive(Debug, Clone, PartialEq, Eq)]
14pub struct ParseError {
15    pub kind: ParseErrorKind,
16    pub span: Span,
17}
18
19impl core::fmt::Display for ParseError {
20    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
21        if let Some(std::ops::Range { start, end }) = self.span.range() {
22            write!(f, "{}\n  at {start}..{end}", self.kind)
23        } else {
24            self.kind.fmt(f)
25        }
26    }
27}
28
29/// An error kind (without a span) than can occur only during parsing
30#[derive(Debug, Clone, PartialEq, Eq)]
31#[non_exhaustive]
32pub enum ParseErrorKind {
33    UnknownToken,
34    LexErrorWithMessage(LexErrorMsg),
35    KeywordAfterLet(String),
36    KeywordAfterColon(String),
37    NonAsciiIdentAfterColon(char),
38    GroupNameTooLong(usize),
39    UnexpectedKeyword(String),
40
41    Deprecated(DeprecationError),
42
43    Expected(&'static str),
44    LeftoverTokens,
45    ExpectedToken(Token),
46    RangeIsNotIncreasing,
47    RangeLeadingZeroesVariableLength,
48    UnallowedNot,
49    UnallowedMultiNot(usize),
50    LonePipe,
51    LetBindingExists,
52    MissingLetKeyword,
53    InvalidEscapeInStringAt(usize),
54    CharString(CharStringError),
55    CharClass(CharClassError),
56    InvalidCodePoint,
57    Number(NumberError),
58    Repetition(RepetitionError),
59    MultipleStringsInTestCase,
60
61    RecursionLimit,
62}
63
64impl ParseErrorKind {
65    /// Creates a [`ParseError`] from this error kind, and a [`Span`] indicating
66    /// where the error occurred.
67    pub fn at(self, span: Span) -> ParseError {
68        ParseError { kind: self, span }
69    }
70}
71
72impl From<RepetitionError> for ParseErrorKind {
73    fn from(e: RepetitionError) -> Self {
74        ParseErrorKind::Repetition(e)
75    }
76}
77
78impl From<CharClassError> for ParseErrorKind {
79    fn from(e: CharClassError) -> Self {
80        ParseErrorKind::CharClass(e)
81    }
82}
83
84impl From<DeprecationError> for ParseErrorKind {
85    fn from(e: DeprecationError) -> Self {
86        ParseErrorKind::Deprecated(e)
87    }
88}
89
90impl From<NumberError> for ParseErrorKind {
91    fn from(e: NumberError) -> Self {
92        ParseErrorKind::Number(e)
93    }
94}
95
96impl std::error::Error for ParseErrorKind {}
97
98impl core::fmt::Display for ParseErrorKind {
99    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
100        match self {
101            ParseErrorKind::UnknownToken => write!(f, "Unknown token"),
102            ParseErrorKind::LexErrorWithMessage(msg) => msg.fmt(f),
103            ParseErrorKind::KeywordAfterLet(keyword)
104            | ParseErrorKind::UnexpectedKeyword(keyword)
105            | ParseErrorKind::KeywordAfterColon(keyword) => {
106                write!(f, "Unexpected keyword `{keyword}`")
107            }
108            &ParseErrorKind::NonAsciiIdentAfterColon(char) => {
109                let num = char as u32;
110                write!(
111                    f,
112                    "Group name contains illegal code point `{char}` (U+{num:04X}). Group names must be ASCII only."
113                )
114            }
115            &ParseErrorKind::GroupNameTooLong(len) => {
116                write!(
117                    f,
118                    "Group name is too long. It is {len} code points long, but must be at most 128 code points."
119                )
120            }
121
122            ParseErrorKind::Deprecated(deprecation) => deprecation.fmt(f),
123
124            ParseErrorKind::Expected(expected) => write!(f, "Expected {expected}"),
125            ParseErrorKind::LeftoverTokens => {
126                write!(f, "There are leftover tokens that couldn't be parsed")
127            }
128            ParseErrorKind::ExpectedToken(token) => write!(f, "Expected {token}"),
129            ParseErrorKind::RangeIsNotIncreasing => {
130                write!(f, "The first number in a range must be smaller than the second")
131            }
132            ParseErrorKind::RangeLeadingZeroesVariableLength => write!(
133                f,
134                "Leading zeroes are not allowed, unless both numbers have the same number of digits"
135            ),
136            ParseErrorKind::UnallowedNot => write!(f, "This code point or range can't be negated"),
137            ParseErrorKind::UnallowedMultiNot(_) => {
138                write!(f, "A shorthand character class can't be negated more than once")
139            }
140            ParseErrorKind::LonePipe => write!(f, "A pipe must be followed by an expression"),
141            ParseErrorKind::LetBindingExists => {
142                write!(f, "A variable with the same name already exists in this scope")
143            }
144            ParseErrorKind::MissingLetKeyword => {
145                write!(f, "A variable declaration must start with the `let` keyword")
146            }
147            ParseErrorKind::InvalidEscapeInStringAt(_) => {
148                write!(f, "Unsupported escape sequence in string")
149            }
150            ParseErrorKind::InvalidCodePoint => {
151                write!(f, "This code point is outside the allowed range")
152            }
153            ParseErrorKind::MultipleStringsInTestCase => {
154                write!(f, "Test cases can't have multiple strings")
155            }
156            ParseErrorKind::CharString(error) => error.fmt(f),
157            ParseErrorKind::CharClass(error) => error.fmt(f),
158            ParseErrorKind::Number(error) => error.fmt(f),
159            ParseErrorKind::Repetition(error) => error.fmt(f),
160
161            ParseErrorKind::RecursionLimit => write!(f, "Recursion limit reached"),
162        }
163    }
164}
165
166/// An error that is returned when a deprecated feature is used
167#[derive(Debug, Clone, Copy, PartialEq, Eq)]
168pub enum DeprecationError {}
169
170impl std::error::Error for DeprecationError {}
171
172impl core::fmt::Display for DeprecationError {
173    fn fmt(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result {
174        match *self {}
175    }
176}
177
178/// An error that relates to a character string
179#[derive(Debug, Clone, Copy, PartialEq, Eq)]
180#[non_exhaustive]
181pub enum CharStringError {
182    /// Empty string in a code point range within a character class, e.g.
183    /// `[''-'z']`
184    Empty,
185    /// String in a code point range within a character class that contains
186    /// multiple code points, e.g. `['abc'-'z']`
187    TooManyCodePoints,
188}
189
190impl std::error::Error for CharStringError {}
191
192impl core::fmt::Display for CharStringError {
193    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
194        let error = match self {
195            CharStringError::Empty => "Strings used in ranges can't be empty",
196            CharStringError::TooManyCodePoints => {
197                "Strings used in ranges can only contain 1 code point"
198            }
199        };
200
201        f.write_str(error)
202    }
203}
204
205/// An error that relates to a character class
206#[derive(Debug, Clone, PartialEq, Eq)]
207#[non_exhaustive]
208pub enum CharClassError {
209    /// Empty character class, i.e. `[]`
210    Empty,
211    /// This error is created when `[^` is encountered. This is a negated
212    /// character class in a regex, but pomsky instead uses the `![` syntax.
213    CaretInGroup,
214    /// Non-ascending code point range, e.g. `['z'-'a']`
215    NonAscendingRange(char, char),
216    /// Invalid token within a character class
217    Invalid,
218    /// Character class contains incompatible shorthands, e.g. `[. codepoint]`
219    Unallowed,
220    /// Unknown shorthand character class or Unicode property
221    UnknownNamedClass {
222        found: Box<str>,
223        extra_in_prefix: bool,
224        #[cfg(feature = "suggestions")]
225        similar: Option<Box<str>>,
226    },
227    /// A character class that can't be negated, e.g. `[!ascii]`
228    Negative,
229    /// The character class has a prefix where none is expected, e.g. `[scx:w]`
230    UnexpectedPrefix,
231    /// The character class has the wrong prefix, e.g. `[sc:Basic_Latin]` (the correct prefix would be `block:`)
232    WrongPrefix { expected: &'static str, has_in_prefix: bool },
233}
234
235impl std::error::Error for CharClassError {}
236
237impl core::fmt::Display for CharClassError {
238    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
239        match self {
240            CharClassError::Empty => write!(f, "This character class is empty"),
241            CharClassError::CaretInGroup => write!(f, "`^` is not allowed here"),
242            &CharClassError::NonAscendingRange(a, b) => write!(
243                f,
244                "Character range must be in increasing order, but it is U+{:04X?} - U+{:04X?}",
245                a as u32, b as u32
246            ),
247            CharClassError::Invalid => {
248                write!(f, "Expected string, range, code point or named character class")
249            }
250            CharClassError::Unallowed => {
251                write!(f, "This combination of character classes is not allowed")
252            }
253            &CharClassError::UnknownNamedClass { ref found, extra_in_prefix, .. } => {
254                if extra_in_prefix {
255                    write!(f, "Unknown character class `{}`", found.replacen("In", "blk:", 1))
256                } else {
257                    write!(f, "Unknown character class `{found}`")
258                }
259            }
260            CharClassError::Negative => write!(f, "This character class can't be negated"),
261            CharClassError::UnexpectedPrefix => {
262                write!(f, "This character class cannot have a prefix")
263            }
264            &CharClassError::WrongPrefix { expected, has_in_prefix } => {
265                if has_in_prefix {
266                    write!(
267                        f,
268                        "This character class has the wrong prefix; it should be {expected},\n\
269                        and the `In` at the start should be removed"
270                    )
271                } else {
272                    write!(f, "This character class has the wrong prefix; it should be {expected}")
273                }
274            }
275        }
276    }
277}
278
279/// An error that relates to parsing a number
280#[derive(Debug, Copy, Clone, PartialEq, Eq)]
281#[non_exhaustive]
282pub enum NumberError {
283    /// The parsed string is empty
284    Empty,
285    /// The parsed string contains a character that isn't a digit
286    InvalidDigit,
287    /// The number is too large to fit in the target integer type
288    TooLarge,
289    /// The number is too small to fit in the target integer type
290    TooSmall,
291    /// The number is zero, but the target number type can't be zero
292    Zero,
293}
294
295impl From<ParseIntError> for NumberError {
296    fn from(e: ParseIntError) -> Self {
297        match e.kind() {
298            IntErrorKind::Empty => NumberError::Empty,
299            IntErrorKind::InvalidDigit => NumberError::InvalidDigit,
300            IntErrorKind::PosOverflow => NumberError::TooLarge,
301            IntErrorKind::NegOverflow => NumberError::TooSmall,
302            IntErrorKind::Zero => NumberError::Zero,
303            _ => unimplemented!(),
304        }
305    }
306}
307
308impl std::error::Error for NumberError {}
309
310impl core::fmt::Display for NumberError {
311    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
312        let error = match self {
313            NumberError::Empty => "cannot parse integer from empty string",
314            NumberError::InvalidDigit => "invalid digit found in string",
315            NumberError::TooLarge => "number too large",
316            NumberError::TooSmall => "number too small",
317            NumberError::Zero => "number would be zero for non-zero type",
318        };
319
320        f.write_str(error)
321    }
322}
323
324/// An error indicating an invalid repetition, e.g. `x{4,2}`
325#[derive(Debug, Clone, Copy, PartialEq, Eq)]
326pub enum RepetitionError {
327    /// The second number in the repetition is greater than the first
328    NotAscending,
329    /// Question mark after a repetition, e.g. `x{3}?`
330    QmSuffix,
331    /// Multiple consecutive repetitions
332    Multi,
333}
334
335impl std::error::Error for RepetitionError {}
336
337impl core::fmt::Display for RepetitionError {
338    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
339        let error = match self {
340            RepetitionError::NotAscending => "Lower bound can't be greater than the upper bound",
341            RepetitionError::QmSuffix => "Unexpected `?` following a repetition",
342            RepetitionError::Multi => "Only one repetition allowed",
343        };
344
345        f.write_str(error)
346    }
347}