miden_assembly_syntax/parser/
error.rs

1// Allow unused assignments - required by miette::Diagnostic derive macro
2#![allow(unused_assignments)]
3
4use alloc::{
5    string::{String, ToString},
6    vec::Vec,
7};
8use core::{fmt, ops::Range};
9
10use miden_debug_types::{SourceId, SourceSpan};
11use miden_utils_diagnostics::{Diagnostic, miette};
12
13use super::ParseError;
14
15// LITERAL ERROR KIND
16// ================================================================================================
17
18#[derive(Debug, Copy, Clone, PartialEq, Eq)]
19pub enum LiteralErrorKind {
20    /// The input was empty
21    Empty,
22    /// The input contained an invalid digit
23    InvalidDigit,
24    /// The value overflows `u32::MAX`
25    U32Overflow,
26    /// The value overflows `Felt::MODULUS`
27    FeltOverflow,
28    /// The value was expected to be a value < 63
29    InvalidBitSize,
30}
31
32impl fmt::Display for LiteralErrorKind {
33    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
34        match self {
35            Self::Empty => f.write_str("input was empty"),
36            Self::InvalidDigit => f.write_str("invalid digit"),
37            Self::U32Overflow => f.write_str("value overflowed the u32 range"),
38            Self::FeltOverflow => f.write_str("value overflowed the field modulus"),
39            Self::InvalidBitSize => {
40                f.write_str("expected value to be a valid bit size, e.g. 0..63")
41            },
42        }
43    }
44}
45
46// HEX ERROR KIND
47// ================================================================================================
48
49#[derive(Debug, Copy, Clone, PartialEq, Eq)]
50pub enum HexErrorKind {
51    /// Expected two hex digits for every byte, but had fewer than that
52    MissingDigits,
53    /// Valid hex-encoded integers are expected to come in sizes of 8, 16, or 64 digits,
54    /// but the input consisted of an invalid number of digits.
55    Invalid,
56    /// Occurs when a hex-encoded value overflows `Felt::MODULUS`, the maximum integral value
57    Overflow,
58    /// Occurs when the hex-encoded value is > 64 digits
59    TooLong,
60}
61
62impl fmt::Display for HexErrorKind {
63    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
64        match self {
65            Self::MissingDigits => {
66                f.write_str("expected number of hex digits to be a multiple of 2")
67            },
68            Self::Invalid => f.write_str("expected 2, 4, 8, 16, or 64 hex digits"),
69            Self::Overflow => f.write_str("value overflowed the field modulus"),
70            Self::TooLong => f.write_str(
71                "value has too many digits, long hex strings must contain exactly 64 digits",
72            ),
73        }
74    }
75}
76
77// BINARY ERROR KIND
78// ================================================================================================
79
80#[derive(Debug, Copy, Clone, PartialEq, Eq)]
81pub enum BinErrorKind {
82    /// Occurs when the bin-encoded value is > 32 digits
83    TooLong,
84}
85
86impl fmt::Display for BinErrorKind {
87    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
88        match self {
89            Self::TooLong => f.write_str(
90                "value has too many digits, binary string can contain no more than 32 digits",
91            ),
92        }
93    }
94}
95
96// PARSING ERROR
97// ================================================================================================
98
99#[derive(Debug, Default, thiserror::Error, Diagnostic)]
100#[repr(u8)]
101pub enum ParsingError {
102    #[default]
103    #[error("parsing failed due to unexpected input")]
104    #[diagnostic()]
105    Failed = 0,
106    #[error("expected input to be valid utf8, but invalid byte sequences were found")]
107    #[diagnostic()]
108    InvalidUtf8 {
109        #[label("invalid byte sequence starts here")]
110        span: SourceSpan,
111    },
112    #[error(
113        "expected input to be valid utf8, but end-of-file was reached before final codepoint was read"
114    )]
115    #[diagnostic()]
116    IncompleteUtf8 {
117        #[label("the codepoint starting here is incomplete")]
118        span: SourceSpan,
119    },
120    #[error("invalid syntax")]
121    #[diagnostic()]
122    InvalidToken {
123        #[label("occurs here")]
124        span: SourceSpan,
125    },
126    #[error("invalid syntax")]
127    #[diagnostic(help("expected {}", expected.as_slice().join(", or ")))]
128    UnrecognizedToken {
129        #[label("found a {token} here")]
130        span: SourceSpan,
131        token: String,
132        expected: Vec<String>,
133    },
134    #[error("unexpected trailing tokens")]
135    #[diagnostic()]
136    ExtraToken {
137        #[label("{token} was found here, but was not expected")]
138        span: SourceSpan,
139        token: String,
140    },
141    #[error("unexpected end of file")]
142    #[diagnostic(help("expected {}", expected.as_slice().join(", or ")))]
143    UnrecognizedEof {
144        #[label("reached end of file here")]
145        span: SourceSpan,
146        expected: Vec<String>,
147    },
148    #[error("{error}")]
149    #[diagnostic(help(
150        "bare identifiers must be lowercase alphanumeric with '_', quoted identifiers can include any graphical character"
151    ))]
152    InvalidIdentifier {
153        #[source]
154        #[diagnostic(source)]
155        error: crate::ast::IdentError,
156        #[label]
157        span: SourceSpan,
158    },
159    #[error("unclosed quoted identifier")]
160    #[diagnostic()]
161    UnclosedQuote {
162        #[label("no match for quotation mark starting here")]
163        start: SourceSpan,
164    },
165    #[error("too many instructions in a single code block")]
166    #[diagnostic()]
167    CodeBlockTooBig {
168        #[label]
169        span: SourceSpan,
170    },
171    #[error("invalid constant expression: division by zero")]
172    DivisionByZero {
173        #[label]
174        span: SourceSpan,
175    },
176    #[error("unexpected string in an arithmetic expression")]
177    #[diagnostic()]
178    StringInArithmeticExpression {
179        #[label]
180        span: SourceSpan,
181    },
182    #[error("doc comment is too large")]
183    #[diagnostic(help("make sure it is less than u16::MAX bytes in length"))]
184    DocsTooLarge {
185        #[label]
186        span: SourceSpan,
187    },
188    #[error("invalid literal: {}", kind)]
189    #[diagnostic()]
190    InvalidLiteral {
191        #[label]
192        span: SourceSpan,
193        kind: LiteralErrorKind,
194    },
195    #[error("invalid literal: {}", kind)]
196    #[diagnostic()]
197    InvalidHexLiteral {
198        #[label]
199        span: SourceSpan,
200        kind: HexErrorKind,
201    },
202    #[error("invalid literal: {}", kind)]
203    #[diagnostic()]
204    InvalidBinaryLiteral {
205        #[label]
206        span: SourceSpan,
207        kind: BinErrorKind,
208    },
209    #[error("invalid MAST root literal")]
210    InvalidMastRoot {
211        #[label]
212        span: SourceSpan,
213    },
214    #[error("invalid library path: {}", message)]
215    InvalidLibraryPath {
216        #[label]
217        span: SourceSpan,
218        message: String,
219    },
220    #[error("invalid immediate: value must be in the range {}..{} (exclusive)", range.start, range.end)]
221    ImmediateOutOfRange {
222        #[label]
223        span: SourceSpan,
224        range: Range<usize>,
225    },
226    #[error("too many procedures in this module")]
227    #[diagnostic()]
228    ModuleTooLarge {
229        #[label]
230        span: SourceSpan,
231    },
232    #[error("too many re-exported procedures in this module")]
233    #[diagnostic()]
234    ModuleTooManyReexports {
235        #[label]
236        span: SourceSpan,
237    },
238    #[error(
239        "too many operands for `push`: tried to push {} elements, but only 16 can be pushed at one time",
240        count
241    )]
242    #[diagnostic()]
243    PushOverflow {
244        #[label]
245        span: SourceSpan,
246        count: usize,
247    },
248    #[error("expected a fully-qualified module path, e.g. `std::u64`")]
249    UnqualifiedImport {
250        #[label]
251        span: SourceSpan,
252    },
253    #[error(
254        "re-exporting a procedure identified by digest requires giving it a name, e.g. `export.DIGEST->foo`"
255    )]
256    UnnamedReexportOfMastRoot {
257        #[label]
258        span: SourceSpan,
259    },
260    #[error("conflicting attributes for procedure definition")]
261    #[diagnostic()]
262    AttributeConflict {
263        #[label(
264            "conflict occurs because an attribute with the same name has already been defined"
265        )]
266        span: SourceSpan,
267        #[label("previously defined here")]
268        prev: SourceSpan,
269    },
270    #[error("conflicting key-value attributes for procedure definition")]
271    #[diagnostic()]
272    AttributeKeyValueConflict {
273        #[label(
274            "conflict occurs because a key with the same name has already been set in a previous declaration"
275        )]
276        span: SourceSpan,
277        #[label("previously defined here")]
278        prev: SourceSpan,
279    },
280    #[error("invalid Advice Map key")]
281    #[diagnostic()]
282    InvalidAdvMapKey {
283        #[label(
284            "an Advice Map key must be a word, either in 64-character hex format or in array-like format `[f0,f1,f2,f3]`"
285        )]
286        span: SourceSpan,
287    },
288    #[error("invalid slice constant")]
289    #[diagnostic()]
290    InvalidSliceConstant {
291        #[label("slices are only supported over word-sized constants")]
292        span: SourceSpan,
293    },
294    #[error("invalid slice: expected valid range")]
295    #[diagnostic()]
296    InvalidRange {
297        #[label("range used for the word constant slice is malformed: `{range:?}`")]
298        span: SourceSpan,
299        range: Range<usize>,
300    },
301    #[error("invalid slice: expected non-empty range")]
302    #[diagnostic()]
303    EmptySlice {
304        #[label("range used for the word constant slice is empty: `{range:?}`")]
305        span: SourceSpan,
306        range: Range<usize>,
307    },
308    #[error("unrecognized calling convention")]
309    #[diagnostic(help("expected one of: 'fast', 'C', 'wasm', 'canon-lift', or 'canon-lower'"))]
310    UnrecognizedCallConv {
311        #[label]
312        span: SourceSpan,
313    },
314    #[error("invalid struct annotation")]
315    #[diagnostic(help("expected one of: '@packed', '@transparent', '@bigendian', or '@align(N)'"))]
316    InvalidStructAnnotation {
317        #[label]
318        span: SourceSpan,
319    },
320    #[error("invalid struct representation")]
321    #[diagnostic()]
322    InvalidStructRepr {
323        #[label("{message}")]
324        span: SourceSpan,
325        message: String,
326    },
327}
328
329impl ParsingError {
330    fn tag(&self) -> u8 {
331        // SAFETY: This is safe because we have given this enum a
332        // primitive representation with #[repr(u8)], with the first
333        // field of the underlying union-of-structs the discriminant
334        //
335        // See the section on "accessing the numeric value of the discriminant"
336        // here: https://doc.rust-lang.org/std/mem/fn.discriminant.html
337        unsafe { *<*const _>::from(self).cast::<u8>() }
338    }
339}
340
341impl Eq for ParsingError {}
342
343impl PartialEq for ParsingError {
344    fn eq(&self, other: &Self) -> bool {
345        match (self, other) {
346            (Self::Failed, Self::Failed) => true,
347            (Self::InvalidLiteral { kind: l, .. }, Self::InvalidLiteral { kind: r, .. }) => l == r,
348            (Self::InvalidHexLiteral { kind: l, .. }, Self::InvalidHexLiteral { kind: r, .. }) => {
349                l == r
350            },
351            (
352                Self::InvalidLibraryPath { message: l, .. },
353                Self::InvalidLibraryPath { message: r, .. },
354            ) => l == r,
355            (
356                Self::ImmediateOutOfRange { range: l, .. },
357                Self::ImmediateOutOfRange { range: r, .. },
358            ) => l == r,
359            (Self::PushOverflow { count: l, .. }, Self::PushOverflow { count: r, .. }) => l == r,
360            (
361                Self::UnrecognizedToken { token: ltok, expected: lexpect, .. },
362                Self::UnrecognizedToken { token: rtok, expected: rexpect, .. },
363            ) => ltok == rtok && lexpect == rexpect,
364            (Self::ExtraToken { token: ltok, .. }, Self::ExtraToken { token: rtok, .. }) => {
365                ltok == rtok
366            },
367            (
368                Self::UnrecognizedEof { expected: lexpect, .. },
369                Self::UnrecognizedEof { expected: rexpect, .. },
370            ) => lexpect == rexpect,
371            (x, y) => x.tag() == y.tag(),
372        }
373    }
374}
375
376impl ParsingError {
377    pub fn from_utf8_error(source_id: SourceId, err: core::str::Utf8Error) -> Self {
378        let start = u32::try_from(err.valid_up_to()).ok().unwrap_or(u32::MAX);
379        match err.error_len() {
380            None => Self::IncompleteUtf8 { span: SourceSpan::at(source_id, start) },
381            Some(len) => Self::InvalidUtf8 {
382                span: SourceSpan::new(source_id, start..(start + len as u32)),
383            },
384        }
385    }
386
387    pub fn from_parse_error(source_id: SourceId, err: ParseError<'_>) -> Self {
388        use super::Token;
389
390        match err {
391            ParseError::InvalidToken { location: at } => {
392                Self::InvalidToken { span: SourceSpan::at(source_id, at) }
393            },
394            ParseError::UnrecognizedToken { token: (l, Token::Eof, r), expected } => {
395                Self::UnrecognizedEof {
396                    span: SourceSpan::new(source_id, l..r),
397                    expected: simplify_expected_tokens(expected),
398                }
399            },
400            ParseError::UnrecognizedToken { token: (l, tok, r), expected } => {
401                Self::UnrecognizedToken {
402                    span: SourceSpan::new(source_id, l..r),
403                    token: tok.to_string(),
404                    expected: simplify_expected_tokens(expected),
405                }
406            },
407            ParseError::ExtraToken { token: (l, tok, r) } => Self::ExtraToken {
408                span: SourceSpan::new(source_id, l..r),
409                token: tok.to_string(),
410            },
411            ParseError::UnrecognizedEof { location: at, expected } => Self::UnrecognizedEof {
412                span: SourceSpan::new(source_id, at..at),
413                expected: simplify_expected_tokens(expected),
414            },
415            ParseError::User { error } => error,
416        }
417    }
418}
419
420// HELPER FUNCTIONS
421// ================================================================================================
422
423// The parser generator will show every token that is expected in some scenarios, so to avoid
424// cluttering the diagnostic output with all of the instruction opcodes, we collapse them into a
425// single token.
426fn simplify_expected_tokens(expected: Vec<String>) -> Vec<String> {
427    use super::Token;
428    let mut has_instruction = false;
429    let mut has_ctrl = false;
430    let mut has_type = false;
431    expected
432        .into_iter()
433        .filter_map(|t| {
434            let tok = match t.as_str() {
435                "bare_ident" => return Some("identifier".to_string()),
436                "const_ident" => return Some("constant identifier".to_string()),
437                "quoted_ident" => return Some("quoted identifier".to_string()),
438                "doc_comment" => return Some("doc comment".to_string()),
439                "hex_value" => return Some("hex-encoded literal".to_string()),
440                "bin_value" => return Some("bin-encoded literal".to_string()),
441                "uint" => return Some("integer literal".to_string()),
442                "EOF" => return Some("end of file".to_string()),
443                other => other[1..].strip_suffix('"').and_then(Token::parse),
444            };
445            match tok {
446                Some(Token::If | Token::While | Token::Repeat) => {
447                    if !has_ctrl {
448                        has_ctrl = true;
449                        Some("control flow opcode (e.g. \"if.true\")".to_string())
450                    } else {
451                        None
452                    }
453                },
454                Some(tok) if tok.is_instruction() => {
455                    if !has_instruction {
456                        has_instruction = true;
457                        Some("primitive opcode (e.g. \"add\")".to_string())
458                    } else {
459                        None
460                    }
461                },
462                Some(tok) if tok.is_type_keyword() => {
463                    if !has_type {
464                        has_type = true;
465                        Some("type (e.g. \"felt\")".to_string())
466                    } else {
467                        None
468                    }
469                },
470                _ => Some(t),
471            }
472        })
473        .collect()
474}