Skip to main content

miden_assembly_syntax/parser/
error.rs

1// Allow unused assignments - required by miette::Diagnostic derive macro
2#![allow(unused_assignments)]
3
4use alloc::{
5    string::{String, ToString},
6    vec::Vec,
7};
8use core::{fmt, ops::Range};
9
10use miden_debug_types::{SourceId, SourceSpan};
11use miden_utils_diagnostics::{Diagnostic, miette};
12
13use super::ParseError;
14
15// LITERAL ERROR KIND
16// ================================================================================================
17
18#[derive(Debug, Copy, Clone, PartialEq, Eq)]
19pub enum LiteralErrorKind {
20    /// The input was empty
21    Empty,
22    /// The input contained an invalid digit
23    InvalidDigit,
24    /// The value overflows `u32::MAX`
25    U32Overflow,
26    /// The value overflows `Felt::ORDER_U64`
27    FeltOverflow,
28    /// The value was expected to be a value < 63
29    InvalidBitSize,
30}
31
32impl fmt::Display for LiteralErrorKind {
33    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
34        match self {
35            Self::Empty => f.write_str("input was empty"),
36            Self::InvalidDigit => f.write_str("invalid digit"),
37            Self::U32Overflow => f.write_str("value overflowed the u32 range"),
38            Self::FeltOverflow => f.write_str("value overflowed the field modulus"),
39            Self::InvalidBitSize => {
40                f.write_str("expected value to be a valid bit size, e.g. 0..63")
41            },
42        }
43    }
44}
45
46// HEX ERROR KIND
47// ================================================================================================
48
49#[derive(Debug, Copy, Clone, PartialEq, Eq)]
50pub enum HexErrorKind {
51    /// Expected two hex digits for every byte, but had fewer than that
52    MissingDigits,
53    /// Valid hex-encoded integers are expected to come in sizes of 8, 16, or 64 digits,
54    /// but the input consisted of an invalid number of digits.
55    Invalid,
56    /// Occurs when a hex-encoded value overflows `Felt::ORDER_U64`, the maximum integral value
57    Overflow,
58    /// Occurs when the hex-encoded value is > 64 digits
59    TooLong,
60}
61
62impl fmt::Display for HexErrorKind {
63    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
64        match self {
65            Self::MissingDigits => {
66                f.write_str("expected number of hex digits to be a multiple of 2")
67            },
68            Self::Invalid => f.write_str("expected 2, 4, 8, 16, or 64 hex digits"),
69            Self::Overflow => f.write_str("value overflowed the field modulus"),
70            Self::TooLong => f.write_str(
71                "value has too many digits, long hex strings must contain exactly 64 digits",
72            ),
73        }
74    }
75}
76
77// BINARY ERROR KIND
78// ================================================================================================
79
80#[derive(Debug, Copy, Clone, PartialEq, Eq)]
81pub enum BinErrorKind {
82    /// Occurs when the bin-encoded value is > 32 digits
83    TooLong,
84}
85
86impl fmt::Display for BinErrorKind {
87    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
88        match self {
89            Self::TooLong => f.write_str(
90                "value has too many digits, binary string can contain no more than 32 digits",
91            ),
92        }
93    }
94}
95
96// PARSING ERROR
97// ================================================================================================
98
99#[derive(Debug, Default, thiserror::Error, Diagnostic)]
100#[repr(u8)]
101pub enum ParsingError {
102    #[default]
103    #[error("parsing failed due to unexpected input")]
104    #[diagnostic()]
105    Failed = 0,
106    #[error("expected input to be valid utf8, but invalid byte sequences were found")]
107    #[diagnostic()]
108    InvalidUtf8 {
109        #[label("invalid byte sequence starts here")]
110        span: SourceSpan,
111    },
112    #[error(
113        "expected input to be valid utf8, but end-of-file was reached before final codepoint was read"
114    )]
115    #[diagnostic()]
116    IncompleteUtf8 {
117        #[label("the codepoint starting here is incomplete")]
118        span: SourceSpan,
119    },
120    #[error("invalid syntax")]
121    #[diagnostic()]
122    InvalidToken {
123        #[label("occurs here")]
124        span: SourceSpan,
125    },
126    #[error("invalid syntax")]
127    #[diagnostic(help("expected {}", expected.as_slice().join(", or ")))]
128    UnrecognizedToken {
129        #[label("found a {token} here")]
130        span: SourceSpan,
131        token: String,
132        expected: Vec<String>,
133    },
134    #[error("unexpected trailing tokens")]
135    #[diagnostic()]
136    ExtraToken {
137        #[label("{token} was found here, but was not expected")]
138        span: SourceSpan,
139        token: String,
140    },
141    #[error("unexpected end of file")]
142    #[diagnostic(help("expected {}", expected.as_slice().join(", or ")))]
143    UnrecognizedEof {
144        #[label("reached end of file here")]
145        span: SourceSpan,
146        expected: Vec<String>,
147    },
148    #[error("{error}")]
149    #[diagnostic(help(
150        "bare identifiers must be lowercase alphanumeric with '_', quoted identifiers can include any graphical character"
151    ))]
152    InvalidIdentifier {
153        #[source]
154        #[diagnostic(source)]
155        error: crate::ast::IdentError,
156        #[label]
157        span: SourceSpan,
158    },
159    #[error("unclosed quoted identifier")]
160    #[diagnostic()]
161    UnclosedQuote {
162        #[label("no match for quotation mark starting here")]
163        start: SourceSpan,
164    },
165    #[error("too many instructions in a single code block")]
166    #[diagnostic()]
167    CodeBlockTooBig {
168        #[label]
169        span: SourceSpan,
170    },
171    #[error("invalid constant expression: division by zero")]
172    DivisionByZero {
173        #[label]
174        span: SourceSpan,
175    },
176    #[error("constant expression nesting depth exceeded")]
177    #[diagnostic(help("constant expression folding exceeded the maximum depth of {max_depth}"))]
178    ConstExprDepthExceeded {
179        #[label]
180        span: SourceSpan,
181        max_depth: usize,
182    },
183    #[error("invalid constant expression: value is larger than expected range")]
184    ConstantOverflow {
185        #[label]
186        span: SourceSpan,
187    },
188    #[error("unexpected string in an arithmetic expression")]
189    #[diagnostic()]
190    StringInArithmeticExpression {
191        #[label]
192        span: SourceSpan,
193    },
194    #[error("doc comment is too large")]
195    #[diagnostic(help("make sure it is less than u16::MAX bytes in length"))]
196    DocsTooLarge {
197        #[label]
198        span: SourceSpan,
199    },
200    #[error("invalid literal: {}", kind)]
201    #[diagnostic()]
202    InvalidLiteral {
203        #[label]
204        span: SourceSpan,
205        kind: LiteralErrorKind,
206    },
207    #[error("invalid literal: {}", kind)]
208    #[diagnostic()]
209    InvalidHexLiteral {
210        #[label]
211        span: SourceSpan,
212        kind: HexErrorKind,
213    },
214    #[error("invalid literal: {}", kind)]
215    #[diagnostic()]
216    InvalidBinaryLiteral {
217        #[label]
218        span: SourceSpan,
219        kind: BinErrorKind,
220    },
221    #[error("invalid MAST root literal")]
222    InvalidMastRoot {
223        #[label]
224        span: SourceSpan,
225    },
226    #[error("invalid library path: {}", message)]
227    InvalidLibraryPath {
228        #[label]
229        span: SourceSpan,
230        message: String,
231    },
232    #[error("invalid immediate: value must be in the range {}..{} (exclusive)", range.start, range.end)]
233    ImmediateOutOfRange {
234        #[label]
235        span: SourceSpan,
236        range: Range<usize>,
237    },
238    #[error("too many procedures in this module")]
239    #[diagnostic()]
240    ModuleTooLarge {
241        #[label]
242        span: SourceSpan,
243    },
244    #[error("too many re-exported procedures in this module")]
245    #[diagnostic()]
246    ModuleTooManyReexports {
247        #[label]
248        span: SourceSpan,
249    },
250    #[error(
251        "too many operands for `push`: tried to push {} elements, but only 16 can be pushed at one time",
252        count
253    )]
254    #[diagnostic()]
255    PushOverflow {
256        #[label]
257        span: SourceSpan,
258        count: usize,
259    },
260    #[error("expected a fully-qualified module path, e.g. `std::u64`")]
261    UnqualifiedImport {
262        #[label]
263        span: SourceSpan,
264    },
265    #[error(
266        "re-exporting a procedure identified by digest requires giving it a name, e.g. `pub use DIGEST->foo`"
267    )]
268    UnnamedReexportOfMastRoot {
269        #[label]
270        span: SourceSpan,
271    },
272    #[error("conflicting attributes for procedure definition")]
273    #[diagnostic()]
274    AttributeConflict {
275        #[label(
276            "conflict occurs because an attribute with the same name has already been defined"
277        )]
278        span: SourceSpan,
279        #[label("previously defined here")]
280        prev: SourceSpan,
281    },
282    #[error("conflicting key-value attributes for procedure definition")]
283    #[diagnostic()]
284    AttributeKeyValueConflict {
285        #[label(
286            "conflict occurs because a key with the same name has already been set in a previous declaration"
287        )]
288        span: SourceSpan,
289        #[label("previously defined here")]
290        prev: SourceSpan,
291    },
292    #[error("invalid Advice Map key")]
293    #[diagnostic()]
294    InvalidAdvMapKey {
295        #[label(
296            "an Advice Map key must be a word, either in 64-character hex format or in array-like format `[f0,f1,f2,f3]`"
297        )]
298        span: SourceSpan,
299    },
300    #[error("invalid slice constant")]
301    #[diagnostic()]
302    InvalidSliceConstant {
303        #[label("slices are only supported over word-sized constants")]
304        span: SourceSpan,
305    },
306    #[error("invalid slice: expected valid range")]
307    #[diagnostic()]
308    InvalidRange {
309        #[label("range used for the word constant slice is malformed: `{range:?}`")]
310        span: SourceSpan,
311        range: Range<usize>,
312    },
313    #[error("invalid slice: expected non-empty range")]
314    #[diagnostic()]
315    EmptySlice {
316        #[label("range used for the word constant slice is empty: `{range:?}`")]
317        span: SourceSpan,
318        range: Range<usize>,
319    },
320    #[error("unrecognized calling convention")]
321    #[diagnostic(help("expected one of: 'fast', 'C', 'wasm', 'canon-lift', or 'canon-lower'"))]
322    UnrecognizedCallConv {
323        #[label]
324        span: SourceSpan,
325    },
326    #[error("invalid struct annotation")]
327    #[diagnostic(help("expected one of: '@packed', '@transparent', '@bigendian', or '@align(N)'"))]
328    InvalidStructAnnotation {
329        #[label]
330        span: SourceSpan,
331    },
332    #[error("invalid struct representation")]
333    #[diagnostic()]
334    InvalidStructRepr {
335        #[label("{message}")]
336        span: SourceSpan,
337        message: String,
338    },
339    #[error("deprecated instruction: `{instruction}` has been removed")]
340    #[diagnostic(help("use `{}` instead", replacement))]
341    DeprecatedInstruction {
342        #[label("this instruction is no longer supported")]
343        span: SourceSpan,
344        instruction: String,
345        replacement: String,
346    },
347    #[error("invalid procedure @locals attribute")]
348    #[diagnostic()]
349    InvalidLocalsAttr {
350        #[label("{message}")]
351        span: SourceSpan,
352        message: String,
353    },
354    #[error("invalid padding value for the `adv.push_mapvaln` instruction: {padding}")]
355    #[diagnostic(help("valid padding values are 0, 4, and 8"))]
356    InvalidPadValue {
357        #[label]
358        span: SourceSpan,
359        padding: u8,
360    },
361}
362
363impl ParsingError {
364    fn tag(&self) -> u8 {
365        // SAFETY: This is safe because we have given this enum a
366        // primitive representation with #[repr(u8)], with the first
367        // field of the underlying union-of-structs the discriminant
368        //
369        // See the section on "accessing the numeric value of the discriminant"
370        // here: https://doc.rust-lang.org/std/mem/fn.discriminant.html
371        unsafe { *<*const _>::from(self).cast::<u8>() }
372    }
373}
374
375impl Eq for ParsingError {}
376
377impl PartialEq for ParsingError {
378    fn eq(&self, other: &Self) -> bool {
379        match (self, other) {
380            (Self::Failed, Self::Failed) => true,
381            (Self::InvalidLiteral { kind: l, .. }, Self::InvalidLiteral { kind: r, .. }) => l == r,
382            (Self::InvalidHexLiteral { kind: l, .. }, Self::InvalidHexLiteral { kind: r, .. }) => {
383                l == r
384            },
385            (
386                Self::InvalidLibraryPath { message: l, .. },
387                Self::InvalidLibraryPath { message: r, .. },
388            ) => l == r,
389            (
390                Self::ImmediateOutOfRange { range: l, .. },
391                Self::ImmediateOutOfRange { range: r, .. },
392            ) => l == r,
393            (Self::PushOverflow { count: l, .. }, Self::PushOverflow { count: r, .. }) => l == r,
394            (
395                Self::UnrecognizedToken { token: ltok, expected: lexpect, .. },
396                Self::UnrecognizedToken { token: rtok, expected: rexpect, .. },
397            ) => ltok == rtok && lexpect == rexpect,
398            (Self::ExtraToken { token: ltok, .. }, Self::ExtraToken { token: rtok, .. }) => {
399                ltok == rtok
400            },
401            (
402                Self::UnrecognizedEof { expected: lexpect, .. },
403                Self::UnrecognizedEof { expected: rexpect, .. },
404            ) => lexpect == rexpect,
405            (x, y) => x.tag() == y.tag(),
406        }
407    }
408}
409
410impl ParsingError {
411    pub fn from_utf8_error(source_id: SourceId, err: core::str::Utf8Error) -> Self {
412        let start = u32::try_from(err.valid_up_to()).ok().unwrap_or(u32::MAX);
413        match err.error_len() {
414            None => Self::IncompleteUtf8 { span: SourceSpan::at(source_id, start) },
415            Some(len) => Self::InvalidUtf8 {
416                span: SourceSpan::new(source_id, start..(start + len as u32)),
417            },
418        }
419    }
420
421    pub fn from_parse_error(source_id: SourceId, err: ParseError<'_>) -> Self {
422        use super::Token;
423
424        match err {
425            ParseError::InvalidToken { location: at } => {
426                Self::InvalidToken { span: SourceSpan::at(source_id, at) }
427            },
428            ParseError::UnrecognizedToken { token: (l, Token::Eof, r), expected } => {
429                Self::UnrecognizedEof {
430                    span: SourceSpan::new(source_id, l..r),
431                    expected: simplify_expected_tokens(expected),
432                }
433            },
434            ParseError::UnrecognizedToken { token: (l, tok, r), expected } => {
435                Self::UnrecognizedToken {
436                    span: SourceSpan::new(source_id, l..r),
437                    token: tok.to_string(),
438                    expected: simplify_expected_tokens(expected),
439                }
440            },
441            ParseError::ExtraToken { token: (l, tok, r) } => Self::ExtraToken {
442                span: SourceSpan::new(source_id, l..r),
443                token: tok.to_string(),
444            },
445            ParseError::UnrecognizedEof { location: at, expected } => Self::UnrecognizedEof {
446                span: SourceSpan::new(source_id, at..at),
447                expected: simplify_expected_tokens(expected),
448            },
449            ParseError::User { error } => error,
450        }
451    }
452}
453
454// HELPER FUNCTIONS
455// ================================================================================================
456
457// The parser generator will show every token that is expected in some scenarios, so to avoid
458// cluttering the diagnostic output with all of the instruction opcodes, we collapse them into a
459// single token.
460fn simplify_expected_tokens(expected: Vec<String>) -> Vec<String> {
461    use super::Token;
462    let mut has_instruction = false;
463    let mut has_ctrl = false;
464    let mut has_type = false;
465    expected
466        .into_iter()
467        .filter_map(|t| {
468            let tok = match t.as_str() {
469                "bare_ident" => return Some("identifier".to_string()),
470                "const_ident" => return Some("constant identifier".to_string()),
471                "quoted_ident" => return Some("quoted identifier".to_string()),
472                "doc_comment" => return Some("doc comment".to_string()),
473                "hex_value" => return Some("hex-encoded literal".to_string()),
474                "bin_value" => return Some("bin-encoded literal".to_string()),
475                "uint" => return Some("integer literal".to_string()),
476                "EOF" => return Some("end of file".to_string()),
477                other => other[1..].strip_suffix('"').and_then(Token::parse),
478            };
479            match tok {
480                Some(Token::If | Token::While | Token::Repeat) => {
481                    if !has_ctrl {
482                        has_ctrl = true;
483                        Some("control flow opcode (e.g. \"if.true\")".to_string())
484                    } else {
485                        None
486                    }
487                },
488                Some(tok) if tok.is_instruction() => {
489                    if !has_instruction {
490                        has_instruction = true;
491                        Some("primitive opcode (e.g. \"add\")".to_string())
492                    } else {
493                        None
494                    }
495                },
496                Some(tok) if tok.is_type_keyword() => {
497                    if !has_type {
498                        has_type = true;
499                        Some("type (e.g. \"felt\")".to_string())
500                    } else {
501                        None
502                    }
503                },
504                _ => Some(t),
505            }
506        })
507        .collect()
508}