miden_assembly_syntax/parser/
error.rs

1use alloc::{
2    string::{String, ToString},
3    vec::Vec,
4};
5use core::{fmt, ops::Range};
6
7use miden_debug_types::{SourceId, SourceSpan};
8use miden_utils_diagnostics::{Diagnostic, miette};
9
10use super::ParseError;
11
12// LITERAL ERROR KIND
13// ================================================================================================
14
15#[derive(Debug, Copy, Clone, PartialEq, Eq)]
16pub enum LiteralErrorKind {
17    /// The input was empty
18    Empty,
19    /// The input contained an invalid digit
20    InvalidDigit,
21    /// The value overflows `u32::MAX`
22    U32Overflow,
23    /// The value overflows `Felt::MODULUS`
24    FeltOverflow,
25    /// The value was expected to be a value < 63
26    InvalidBitSize,
27}
28
29impl fmt::Display for LiteralErrorKind {
30    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
31        match self {
32            Self::Empty => f.write_str("input was empty"),
33            Self::InvalidDigit => f.write_str("invalid digit"),
34            Self::U32Overflow => f.write_str("value overflowed the u32 range"),
35            Self::FeltOverflow => f.write_str("value overflowed the field modulus"),
36            Self::InvalidBitSize => {
37                f.write_str("expected value to be a valid bit size, e.g. 0..63")
38            },
39        }
40    }
41}
42
43// HEX ERROR KIND
44// ================================================================================================
45
46#[derive(Debug, Copy, Clone, PartialEq, Eq)]
47pub enum HexErrorKind {
48    /// Expected two hex digits for every byte, but had fewer than that
49    MissingDigits,
50    /// Valid hex-encoded integers are expected to come in sizes of 8, 16, or 64 digits,
51    /// but the input consisted of an invalid number of digits.
52    Invalid,
53    /// Occurs when a hex-encoded value overflows `Felt::MODULUS`, the maximum integral value
54    Overflow,
55    /// Occurs when the hex-encoded value is > 64 digits
56    TooLong,
57}
58
59impl fmt::Display for HexErrorKind {
60    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
61        match self {
62            Self::MissingDigits => {
63                f.write_str("expected number of hex digits to be a multiple of 2")
64            },
65            Self::Invalid => f.write_str("expected 2, 4, 8, 16, or 64 hex digits"),
66            Self::Overflow => f.write_str("value overflowed the field modulus"),
67            Self::TooLong => f.write_str(
68                "value has too many digits, long hex strings must contain exactly 64 digits",
69            ),
70        }
71    }
72}
73
74// BINARY ERROR KIND
75// ================================================================================================
76
77#[derive(Debug, Copy, Clone, PartialEq, Eq)]
78pub enum BinErrorKind {
79    /// Occurs when the bin-encoded value is > 32 digits
80    TooLong,
81}
82
83impl fmt::Display for BinErrorKind {
84    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
85        match self {
86            Self::TooLong => f.write_str(
87                "value has too many digits, binary string can contain no more than 32 digits",
88            ),
89        }
90    }
91}
92
93// PARSING ERROR
94// ================================================================================================
95
96#[derive(Debug, Default, thiserror::Error, Diagnostic)]
97#[repr(u8)]
98pub enum ParsingError {
99    #[default]
100    #[error("parsing failed due to unexpected input")]
101    #[diagnostic()]
102    Failed = 0,
103    #[error("expected input to be valid utf8, but invalid byte sequences were found")]
104    #[diagnostic()]
105    InvalidUtf8 {
106        #[label("invalid byte sequence starts here")]
107        span: SourceSpan,
108    },
109    #[error(
110        "expected input to be valid utf8, but end-of-file was reached before final codepoint was read"
111    )]
112    #[diagnostic()]
113    IncompleteUtf8 {
114        #[label("the codepoint starting here is incomplete")]
115        span: SourceSpan,
116    },
117    #[error("invalid syntax")]
118    #[diagnostic()]
119    InvalidToken {
120        #[label("occurs here")]
121        span: SourceSpan,
122    },
123    #[error("invalid syntax")]
124    #[diagnostic(help("expected {}", expected.as_slice().join(", or ")))]
125    UnrecognizedToken {
126        #[label("found a {token} here")]
127        span: SourceSpan,
128        token: String,
129        expected: Vec<String>,
130    },
131    #[error("unexpected trailing tokens")]
132    #[diagnostic()]
133    ExtraToken {
134        #[label("{token} was found here, but was not expected")]
135        span: SourceSpan,
136        token: String,
137    },
138    #[error("unexpected end of file")]
139    #[diagnostic(help("expected {}", expected.as_slice().join(", or ")))]
140    UnrecognizedEof {
141        #[label("reached end of file here")]
142        span: SourceSpan,
143        expected: Vec<String>,
144    },
145    #[error("{error}")]
146    #[diagnostic(help(
147        "bare identifiers must be lowercase alphanumeric with '_', quoted identifiers can include any graphical character"
148    ))]
149    InvalidIdentifier {
150        #[source]
151        #[diagnostic(source)]
152        error: crate::ast::IdentError,
153        #[label]
154        span: SourceSpan,
155    },
156    #[error("unclosed quoted identifier")]
157    #[diagnostic()]
158    UnclosedQuote {
159        #[label("no match for quotation mark starting here")]
160        start: SourceSpan,
161    },
162    #[error("too many instructions in a single code block")]
163    #[diagnostic()]
164    CodeBlockTooBig {
165        #[label]
166        span: SourceSpan,
167    },
168    #[error("invalid constant expression: division by zero")]
169    DivisionByZero {
170        #[label]
171        span: SourceSpan,
172    },
173    #[error("unexpected string in an arithmetic expression")]
174    #[diagnostic()]
175    StringInArithmeticExpression {
176        #[label]
177        span: SourceSpan,
178    },
179    #[error("doc comment is too large")]
180    #[diagnostic(help("make sure it is less than u16::MAX bytes in length"))]
181    DocsTooLarge {
182        #[label]
183        span: SourceSpan,
184    },
185    #[error("invalid literal: {}", kind)]
186    #[diagnostic()]
187    InvalidLiteral {
188        #[label]
189        span: SourceSpan,
190        kind: LiteralErrorKind,
191    },
192    #[error("invalid literal: {}", kind)]
193    #[diagnostic()]
194    InvalidHexLiteral {
195        #[label]
196        span: SourceSpan,
197        kind: HexErrorKind,
198    },
199    #[error("invalid literal: {}", kind)]
200    #[diagnostic()]
201    InvalidBinaryLiteral {
202        #[label]
203        span: SourceSpan,
204        kind: BinErrorKind,
205    },
206    #[error("invalid MAST root literal")]
207    InvalidMastRoot {
208        #[label]
209        span: SourceSpan,
210    },
211    #[error("invalid library path: {}", message)]
212    InvalidLibraryPath {
213        #[label]
214        span: SourceSpan,
215        message: String,
216    },
217    #[error("invalid immediate: value must be in the range {}..{} (exclusive)", range.start, range.end)]
218    ImmediateOutOfRange {
219        #[label]
220        span: SourceSpan,
221        range: Range<usize>,
222    },
223    #[error("too many procedures in this module")]
224    #[diagnostic()]
225    ModuleTooLarge {
226        #[label]
227        span: SourceSpan,
228    },
229    #[error("too many re-exported procedures in this module")]
230    #[diagnostic()]
231    ModuleTooManyReexports {
232        #[label]
233        span: SourceSpan,
234    },
235    #[error(
236        "too many operands for `push`: tried to push {} elements, but only 16 can be pushed at one time",
237        count
238    )]
239    #[diagnostic()]
240    PushOverflow {
241        #[label]
242        span: SourceSpan,
243        count: usize,
244    },
245    #[error("expected a fully-qualified module path, e.g. `std::u64`")]
246    UnqualifiedImport {
247        #[label]
248        span: SourceSpan,
249    },
250    #[error(
251        "re-exporting a procedure identified by digest requires giving it a name, e.g. `export.DIGEST->foo`"
252    )]
253    UnnamedReexportOfMastRoot {
254        #[label]
255        span: SourceSpan,
256    },
257    #[error("conflicting attributes for procedure definition")]
258    #[diagnostic()]
259    AttributeConflict {
260        #[label(
261            "conflict occurs because an attribute with the same name has already been defined"
262        )]
263        span: SourceSpan,
264        #[label("previously defined here")]
265        prev: SourceSpan,
266    },
267    #[error("conflicting key-value attributes for procedure definition")]
268    #[diagnostic()]
269    AttributeKeyValueConflict {
270        #[label(
271            "conflict occurs because a key with the same name has already been set in a previous declaration"
272        )]
273        span: SourceSpan,
274        #[label("previously defined here")]
275        prev: SourceSpan,
276    },
277    #[error("invalid Advice Map key")]
278    #[diagnostic()]
279    InvalidAdvMapKey {
280        #[label(
281            "an Advice Map key must be a word, either in 64-character hex format or in array-like format `[f0,f1,f2,f3]`"
282        )]
283        span: SourceSpan,
284    },
285    #[error("invalid slice constant")]
286    #[diagnostic()]
287    InvalidSliceConstant {
288        #[label("slices are only supported over word-sized constants")]
289        span: SourceSpan,
290    },
291    #[error("invalid slice: expected valid range")]
292    #[diagnostic()]
293    InvalidRange {
294        #[label("range used for the word constant slice is malformed: `{range:?}`")]
295        span: SourceSpan,
296        range: Range<usize>,
297    },
298    #[error("invalid slice: expected non-empty range")]
299    #[diagnostic()]
300    EmptySlice {
301        #[label("range used for the word constant slice is empty: `{range:?}`")]
302        span: SourceSpan,
303        range: Range<usize>,
304    },
305    #[error("unrecognized calling convention")]
306    #[diagnostic(help("expected one of: 'fast', 'C', 'wasm', 'canon-lift', or 'canon-lower'"))]
307    UnrecognizedCallConv {
308        #[label]
309        span: SourceSpan,
310    },
311    #[error("invalid struct annotation")]
312    #[diagnostic(help("expected one of: '@packed', '@transparent', '@bigendian', or '@align(N)'"))]
313    InvalidStructAnnotation {
314        #[label]
315        span: SourceSpan,
316    },
317    #[error("invalid struct representation")]
318    #[diagnostic()]
319    InvalidStructRepr {
320        #[label("{message}")]
321        span: SourceSpan,
322        message: String,
323    },
324}
325
326impl ParsingError {
327    fn tag(&self) -> u8 {
328        // SAFETY: This is safe because we have given this enum a
329        // primitive representation with #[repr(u8)], with the first
330        // field of the underlying union-of-structs the discriminant
331        //
332        // See the section on "accessing the numeric value of the discriminant"
333        // here: https://doc.rust-lang.org/std/mem/fn.discriminant.html
334        unsafe { *<*const _>::from(self).cast::<u8>() }
335    }
336}
337
338impl Eq for ParsingError {}
339
340impl PartialEq for ParsingError {
341    fn eq(&self, other: &Self) -> bool {
342        match (self, other) {
343            (Self::Failed, Self::Failed) => true,
344            (Self::InvalidLiteral { kind: l, .. }, Self::InvalidLiteral { kind: r, .. }) => l == r,
345            (Self::InvalidHexLiteral { kind: l, .. }, Self::InvalidHexLiteral { kind: r, .. }) => {
346                l == r
347            },
348            (
349                Self::InvalidLibraryPath { message: l, .. },
350                Self::InvalidLibraryPath { message: r, .. },
351            ) => l == r,
352            (
353                Self::ImmediateOutOfRange { range: l, .. },
354                Self::ImmediateOutOfRange { range: r, .. },
355            ) => l == r,
356            (Self::PushOverflow { count: l, .. }, Self::PushOverflow { count: r, .. }) => l == r,
357            (
358                Self::UnrecognizedToken { token: ltok, expected: lexpect, .. },
359                Self::UnrecognizedToken { token: rtok, expected: rexpect, .. },
360            ) => ltok == rtok && lexpect == rexpect,
361            (Self::ExtraToken { token: ltok, .. }, Self::ExtraToken { token: rtok, .. }) => {
362                ltok == rtok
363            },
364            (
365                Self::UnrecognizedEof { expected: lexpect, .. },
366                Self::UnrecognizedEof { expected: rexpect, .. },
367            ) => lexpect == rexpect,
368            (x, y) => x.tag() == y.tag(),
369        }
370    }
371}
372
373impl ParsingError {
374    pub fn from_utf8_error(source_id: SourceId, err: core::str::Utf8Error) -> Self {
375        let start = u32::try_from(err.valid_up_to()).ok().unwrap_or(u32::MAX);
376        match err.error_len() {
377            None => Self::IncompleteUtf8 { span: SourceSpan::at(source_id, start) },
378            Some(len) => Self::InvalidUtf8 {
379                span: SourceSpan::new(source_id, start..(start + len as u32)),
380            },
381        }
382    }
383
384    pub fn from_parse_error(source_id: SourceId, err: ParseError<'_>) -> Self {
385        use super::Token;
386
387        match err {
388            ParseError::InvalidToken { location: at } => {
389                Self::InvalidToken { span: SourceSpan::at(source_id, at) }
390            },
391            ParseError::UnrecognizedToken { token: (l, Token::Eof, r), expected } => {
392                Self::UnrecognizedEof {
393                    span: SourceSpan::new(source_id, l..r),
394                    expected: simplify_expected_tokens(expected),
395                }
396            },
397            ParseError::UnrecognizedToken { token: (l, tok, r), expected } => {
398                Self::UnrecognizedToken {
399                    span: SourceSpan::new(source_id, l..r),
400                    token: tok.to_string(),
401                    expected: simplify_expected_tokens(expected),
402                }
403            },
404            ParseError::ExtraToken { token: (l, tok, r) } => Self::ExtraToken {
405                span: SourceSpan::new(source_id, l..r),
406                token: tok.to_string(),
407            },
408            ParseError::UnrecognizedEof { location: at, expected } => Self::UnrecognizedEof {
409                span: SourceSpan::new(source_id, at..at),
410                expected: simplify_expected_tokens(expected),
411            },
412            ParseError::User { error } => error,
413        }
414    }
415}
416
417// HELPER FUNCTIONS
418// ================================================================================================
419
420// The parser generator will show every token that is expected in some scenarios, so to avoid
421// cluttering the diagnostic output with all of the instruction opcodes, we collapse them into a
422// single token.
423fn simplify_expected_tokens(expected: Vec<String>) -> Vec<String> {
424    use super::Token;
425    let mut has_instruction = false;
426    let mut has_ctrl = false;
427    let mut has_type = false;
428    expected
429        .into_iter()
430        .filter_map(|t| {
431            let tok = match t.as_str() {
432                "bare_ident" => return Some("identifier".to_string()),
433                "const_ident" => return Some("constant identifier".to_string()),
434                "quoted_ident" => return Some("quoted identifier".to_string()),
435                "doc_comment" => return Some("doc comment".to_string()),
436                "hex_value" => return Some("hex-encoded literal".to_string()),
437                "bin_value" => return Some("bin-encoded literal".to_string()),
438                "uint" => return Some("integer literal".to_string()),
439                "EOF" => return Some("end of file".to_string()),
440                other => other[1..].strip_suffix('"').and_then(Token::parse),
441            };
442            match tok {
443                Some(Token::If | Token::While | Token::Repeat) => {
444                    if !has_ctrl {
445                        has_ctrl = true;
446                        Some("control flow opcode (e.g. \"if.true\")".to_string())
447                    } else {
448                        None
449                    }
450                },
451                Some(tok) if tok.is_instruction() => {
452                    if !has_instruction {
453                        has_instruction = true;
454                        Some("primitive opcode (e.g. \"add\")".to_string())
455                    } else {
456                        None
457                    }
458                },
459                Some(tok) if tok.is_type_keyword() => {
460                    if !has_type {
461                        has_type = true;
462                        Some("type (e.g. \"felt\")".to_string())
463                    } else {
464                        None
465                    }
466                },
467                _ => Some(t),
468            }
469        })
470        .collect()
471}