miden_assembly_syntax/parser/
error.rs

1// Allow unused assignments - required by miette::Diagnostic derive macro
2#![allow(unused_assignments)]
3
4use alloc::{
5    string::{String, ToString},
6    vec::Vec,
7};
8use core::{fmt, ops::Range};
9
10use miden_debug_types::{SourceId, SourceSpan};
11use miden_utils_diagnostics::{Diagnostic, miette};
12
13use super::ParseError;
14
15// LITERAL ERROR KIND
16// ================================================================================================
17
18#[derive(Debug, Copy, Clone, PartialEq, Eq)]
19pub enum LiteralErrorKind {
20    /// The input was empty
21    Empty,
22    /// The input contained an invalid digit
23    InvalidDigit,
24    /// The value overflows `u32::MAX`
25    U32Overflow,
26    /// The value overflows `Felt::MODULUS`
27    FeltOverflow,
28    /// The value was expected to be a value < 63
29    InvalidBitSize,
30}
31
32impl fmt::Display for LiteralErrorKind {
33    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
34        match self {
35            Self::Empty => f.write_str("input was empty"),
36            Self::InvalidDigit => f.write_str("invalid digit"),
37            Self::U32Overflow => f.write_str("value overflowed the u32 range"),
38            Self::FeltOverflow => f.write_str("value overflowed the field modulus"),
39            Self::InvalidBitSize => {
40                f.write_str("expected value to be a valid bit size, e.g. 0..63")
41            },
42        }
43    }
44}
45
46// HEX ERROR KIND
47// ================================================================================================
48
49#[derive(Debug, Copy, Clone, PartialEq, Eq)]
50pub enum HexErrorKind {
51    /// Expected two hex digits for every byte, but had fewer than that
52    MissingDigits,
53    /// Valid hex-encoded integers are expected to come in sizes of 8, 16, or 64 digits,
54    /// but the input consisted of an invalid number of digits.
55    Invalid,
56    /// Occurs when a hex-encoded value overflows `Felt::MODULUS`, the maximum integral value
57    Overflow,
58    /// Occurs when the hex-encoded value is > 64 digits
59    TooLong,
60}
61
62impl fmt::Display for HexErrorKind {
63    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
64        match self {
65            Self::MissingDigits => {
66                f.write_str("expected number of hex digits to be a multiple of 2")
67            },
68            Self::Invalid => f.write_str("expected 2, 4, 8, 16, or 64 hex digits"),
69            Self::Overflow => f.write_str("value overflowed the field modulus"),
70            Self::TooLong => f.write_str(
71                "value has too many digits, long hex strings must contain exactly 64 digits",
72            ),
73        }
74    }
75}
76
77// BINARY ERROR KIND
78// ================================================================================================
79
80#[derive(Debug, Copy, Clone, PartialEq, Eq)]
81pub enum BinErrorKind {
82    /// Occurs when the bin-encoded value is > 32 digits
83    TooLong,
84}
85
86impl fmt::Display for BinErrorKind {
87    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
88        match self {
89            Self::TooLong => f.write_str(
90                "value has too many digits, binary string can contain no more than 32 digits",
91            ),
92        }
93    }
94}
95
96// PARSING ERROR
97// ================================================================================================
98
99#[derive(Debug, Default, thiserror::Error, Diagnostic)]
100#[repr(u8)]
101pub enum ParsingError {
102    #[default]
103    #[error("parsing failed due to unexpected input")]
104    #[diagnostic()]
105    Failed = 0,
106    #[error("expected input to be valid utf8, but invalid byte sequences were found")]
107    #[diagnostic()]
108    InvalidUtf8 {
109        #[label("invalid byte sequence starts here")]
110        span: SourceSpan,
111    },
112    #[error(
113        "expected input to be valid utf8, but end-of-file was reached before final codepoint was read"
114    )]
115    #[diagnostic()]
116    IncompleteUtf8 {
117        #[label("the codepoint starting here is incomplete")]
118        span: SourceSpan,
119    },
120    #[error("invalid syntax")]
121    #[diagnostic()]
122    InvalidToken {
123        #[label("occurs here")]
124        span: SourceSpan,
125    },
126    #[error("invalid syntax")]
127    #[diagnostic(help("expected {}", expected.as_slice().join(", or ")))]
128    UnrecognizedToken {
129        #[label("found a {token} here")]
130        span: SourceSpan,
131        token: String,
132        expected: Vec<String>,
133    },
134    #[error("unexpected trailing tokens")]
135    #[diagnostic()]
136    ExtraToken {
137        #[label("{token} was found here, but was not expected")]
138        span: SourceSpan,
139        token: String,
140    },
141    #[error("unexpected end of file")]
142    #[diagnostic(help("expected {}", expected.as_slice().join(", or ")))]
143    UnrecognizedEof {
144        #[label("reached end of file here")]
145        span: SourceSpan,
146        expected: Vec<String>,
147    },
148    #[error("{error}")]
149    #[diagnostic(help(
150        "bare identifiers must be lowercase alphanumeric with '_', quoted identifiers can include any graphical character"
151    ))]
152    InvalidIdentifier {
153        #[source]
154        #[diagnostic(source)]
155        error: crate::ast::IdentError,
156        #[label]
157        span: SourceSpan,
158    },
159    #[error("unclosed quoted identifier")]
160    #[diagnostic()]
161    UnclosedQuote {
162        #[label("no match for quotation mark starting here")]
163        start: SourceSpan,
164    },
165    #[error("too many instructions in a single code block")]
166    #[diagnostic()]
167    CodeBlockTooBig {
168        #[label]
169        span: SourceSpan,
170    },
171    #[error("invalid constant expression: division by zero")]
172    DivisionByZero {
173        #[label]
174        span: SourceSpan,
175    },
176    #[error("unexpected string in an arithmetic expression")]
177    #[diagnostic()]
178    StringInArithmeticExpression {
179        #[label]
180        span: SourceSpan,
181    },
182    #[error("doc comment is too large")]
183    #[diagnostic(help("make sure it is less than u16::MAX bytes in length"))]
184    DocsTooLarge {
185        #[label]
186        span: SourceSpan,
187    },
188    #[error("invalid literal: {}", kind)]
189    #[diagnostic()]
190    InvalidLiteral {
191        #[label]
192        span: SourceSpan,
193        kind: LiteralErrorKind,
194    },
195    #[error("invalid literal: {}", kind)]
196    #[diagnostic()]
197    InvalidHexLiteral {
198        #[label]
199        span: SourceSpan,
200        kind: HexErrorKind,
201    },
202    #[error("invalid literal: {}", kind)]
203    #[diagnostic()]
204    InvalidBinaryLiteral {
205        #[label]
206        span: SourceSpan,
207        kind: BinErrorKind,
208    },
209    #[error("invalid MAST root literal")]
210    InvalidMastRoot {
211        #[label]
212        span: SourceSpan,
213    },
214    #[error("invalid library path: {}", message)]
215    InvalidLibraryPath {
216        #[label]
217        span: SourceSpan,
218        message: String,
219    },
220    #[error("invalid immediate: value must be in the range {}..{} (exclusive)", range.start, range.end)]
221    ImmediateOutOfRange {
222        #[label]
223        span: SourceSpan,
224        range: Range<usize>,
225    },
226    #[error("too many procedures in this module")]
227    #[diagnostic()]
228    ModuleTooLarge {
229        #[label]
230        span: SourceSpan,
231    },
232    #[error("too many re-exported procedures in this module")]
233    #[diagnostic()]
234    ModuleTooManyReexports {
235        #[label]
236        span: SourceSpan,
237    },
238    #[error(
239        "too many operands for `push`: tried to push {} elements, but only 16 can be pushed at one time",
240        count
241    )]
242    #[diagnostic()]
243    PushOverflow {
244        #[label]
245        span: SourceSpan,
246        count: usize,
247    },
248    #[error("expected a fully-qualified module path, e.g. `std::u64`")]
249    UnqualifiedImport {
250        #[label]
251        span: SourceSpan,
252    },
253    #[error(
254        "re-exporting a procedure identified by digest requires giving it a name, e.g. `pub use DIGEST->foo`"
255    )]
256    UnnamedReexportOfMastRoot {
257        #[label]
258        span: SourceSpan,
259    },
260    #[error("conflicting attributes for procedure definition")]
261    #[diagnostic()]
262    AttributeConflict {
263        #[label(
264            "conflict occurs because an attribute with the same name has already been defined"
265        )]
266        span: SourceSpan,
267        #[label("previously defined here")]
268        prev: SourceSpan,
269    },
270    #[error("conflicting key-value attributes for procedure definition")]
271    #[diagnostic()]
272    AttributeKeyValueConflict {
273        #[label(
274            "conflict occurs because a key with the same name has already been set in a previous declaration"
275        )]
276        span: SourceSpan,
277        #[label("previously defined here")]
278        prev: SourceSpan,
279    },
280    #[error("invalid Advice Map key")]
281    #[diagnostic()]
282    InvalidAdvMapKey {
283        #[label(
284            "an Advice Map key must be a word, either in 64-character hex format or in array-like format `[f0,f1,f2,f3]`"
285        )]
286        span: SourceSpan,
287    },
288    #[error("invalid slice constant")]
289    #[diagnostic()]
290    InvalidSliceConstant {
291        #[label("slices are only supported over word-sized constants")]
292        span: SourceSpan,
293    },
294    #[error("invalid slice: expected valid range")]
295    #[diagnostic()]
296    InvalidRange {
297        #[label("range used for the word constant slice is malformed: `{range:?}`")]
298        span: SourceSpan,
299        range: Range<usize>,
300    },
301    #[error("invalid slice: expected non-empty range")]
302    #[diagnostic()]
303    EmptySlice {
304        #[label("range used for the word constant slice is empty: `{range:?}`")]
305        span: SourceSpan,
306        range: Range<usize>,
307    },
308    #[error("unrecognized calling convention")]
309    #[diagnostic(help("expected one of: 'fast', 'C', 'wasm', 'canon-lift', or 'canon-lower'"))]
310    UnrecognizedCallConv {
311        #[label]
312        span: SourceSpan,
313    },
314    #[error("invalid struct annotation")]
315    #[diagnostic(help("expected one of: '@packed', '@transparent', '@bigendian', or '@align(N)'"))]
316    InvalidStructAnnotation {
317        #[label]
318        span: SourceSpan,
319    },
320    #[error("invalid struct representation")]
321    #[diagnostic()]
322    InvalidStructRepr {
323        #[label("{message}")]
324        span: SourceSpan,
325        message: String,
326    },
327    #[error("deprecated instruction: `{instruction}` has been removed")]
328    #[diagnostic(help("use `{}` instead", replacement))]
329    DeprecatedInstruction {
330        #[label("this instruction is no longer supported")]
331        span: SourceSpan,
332        instruction: String,
333        replacement: String,
334    },
335    #[error("invalid procedure @locals attribute")]
336    #[diagnostic()]
337    InvalidLocalsAttr {
338        #[label("{message}")]
339        span: SourceSpan,
340        message: String,
341    },
342    #[error("invalid padding value for the `adv.push_mapvaln` instruction: {padding}")]
343    #[diagnostic(help("valid padding values are 0, 4, and 8"))]
344    InvalidPadValue {
345        #[label]
346        span: SourceSpan,
347        padding: u8,
348    },
349}
350
351impl ParsingError {
352    fn tag(&self) -> u8 {
353        // SAFETY: This is safe because we have given this enum a
354        // primitive representation with #[repr(u8)], with the first
355        // field of the underlying union-of-structs the discriminant
356        //
357        // See the section on "accessing the numeric value of the discriminant"
358        // here: https://doc.rust-lang.org/std/mem/fn.discriminant.html
359        unsafe { *<*const _>::from(self).cast::<u8>() }
360    }
361}
362
363impl Eq for ParsingError {}
364
365impl PartialEq for ParsingError {
366    fn eq(&self, other: &Self) -> bool {
367        match (self, other) {
368            (Self::Failed, Self::Failed) => true,
369            (Self::InvalidLiteral { kind: l, .. }, Self::InvalidLiteral { kind: r, .. }) => l == r,
370            (Self::InvalidHexLiteral { kind: l, .. }, Self::InvalidHexLiteral { kind: r, .. }) => {
371                l == r
372            },
373            (
374                Self::InvalidLibraryPath { message: l, .. },
375                Self::InvalidLibraryPath { message: r, .. },
376            ) => l == r,
377            (
378                Self::ImmediateOutOfRange { range: l, .. },
379                Self::ImmediateOutOfRange { range: r, .. },
380            ) => l == r,
381            (Self::PushOverflow { count: l, .. }, Self::PushOverflow { count: r, .. }) => l == r,
382            (
383                Self::UnrecognizedToken { token: ltok, expected: lexpect, .. },
384                Self::UnrecognizedToken { token: rtok, expected: rexpect, .. },
385            ) => ltok == rtok && lexpect == rexpect,
386            (Self::ExtraToken { token: ltok, .. }, Self::ExtraToken { token: rtok, .. }) => {
387                ltok == rtok
388            },
389            (
390                Self::UnrecognizedEof { expected: lexpect, .. },
391                Self::UnrecognizedEof { expected: rexpect, .. },
392            ) => lexpect == rexpect,
393            (x, y) => x.tag() == y.tag(),
394        }
395    }
396}
397
398impl ParsingError {
399    pub fn from_utf8_error(source_id: SourceId, err: core::str::Utf8Error) -> Self {
400        let start = u32::try_from(err.valid_up_to()).ok().unwrap_or(u32::MAX);
401        match err.error_len() {
402            None => Self::IncompleteUtf8 { span: SourceSpan::at(source_id, start) },
403            Some(len) => Self::InvalidUtf8 {
404                span: SourceSpan::new(source_id, start..(start + len as u32)),
405            },
406        }
407    }
408
409    pub fn from_parse_error(source_id: SourceId, err: ParseError<'_>) -> Self {
410        use super::Token;
411
412        match err {
413            ParseError::InvalidToken { location: at } => {
414                Self::InvalidToken { span: SourceSpan::at(source_id, at) }
415            },
416            ParseError::UnrecognizedToken { token: (l, Token::Eof, r), expected } => {
417                Self::UnrecognizedEof {
418                    span: SourceSpan::new(source_id, l..r),
419                    expected: simplify_expected_tokens(expected),
420                }
421            },
422            ParseError::UnrecognizedToken { token: (l, tok, r), expected } => {
423                Self::UnrecognizedToken {
424                    span: SourceSpan::new(source_id, l..r),
425                    token: tok.to_string(),
426                    expected: simplify_expected_tokens(expected),
427                }
428            },
429            ParseError::ExtraToken { token: (l, tok, r) } => Self::ExtraToken {
430                span: SourceSpan::new(source_id, l..r),
431                token: tok.to_string(),
432            },
433            ParseError::UnrecognizedEof { location: at, expected } => Self::UnrecognizedEof {
434                span: SourceSpan::new(source_id, at..at),
435                expected: simplify_expected_tokens(expected),
436            },
437            ParseError::User { error } => error,
438        }
439    }
440}
441
442// HELPER FUNCTIONS
443// ================================================================================================
444
445// The parser generator will show every token that is expected in some scenarios, so to avoid
446// cluttering the diagnostic output with all of the instruction opcodes, we collapse them into a
447// single token.
448fn simplify_expected_tokens(expected: Vec<String>) -> Vec<String> {
449    use super::Token;
450    let mut has_instruction = false;
451    let mut has_ctrl = false;
452    let mut has_type = false;
453    expected
454        .into_iter()
455        .filter_map(|t| {
456            let tok = match t.as_str() {
457                "bare_ident" => return Some("identifier".to_string()),
458                "const_ident" => return Some("constant identifier".to_string()),
459                "quoted_ident" => return Some("quoted identifier".to_string()),
460                "doc_comment" => return Some("doc comment".to_string()),
461                "hex_value" => return Some("hex-encoded literal".to_string()),
462                "bin_value" => return Some("bin-encoded literal".to_string()),
463                "uint" => return Some("integer literal".to_string()),
464                "EOF" => return Some("end of file".to_string()),
465                other => other[1..].strip_suffix('"').and_then(Token::parse),
466            };
467            match tok {
468                Some(Token::If | Token::While | Token::Repeat) => {
469                    if !has_ctrl {
470                        has_ctrl = true;
471                        Some("control flow opcode (e.g. \"if.true\")".to_string())
472                    } else {
473                        None
474                    }
475                },
476                Some(tok) if tok.is_instruction() => {
477                    if !has_instruction {
478                        has_instruction = true;
479                        Some("primitive opcode (e.g. \"add\")".to_string())
480                    } else {
481                        None
482                    }
483                },
484                Some(tok) if tok.is_type_keyword() => {
485                    if !has_type {
486                        has_type = true;
487                        Some("type (e.g. \"felt\")".to_string())
488                    } else {
489                        None
490                    }
491                },
492                _ => Some(t),
493            }
494        })
495        .collect()
496}