1use alloc::{
2 string::{String, ToString},
3 vec::Vec,
4};
5use core::{fmt, ops::Range};
6
7use miden_debug_types::{SourceId, SourceSpan};
8use miden_utils_diagnostics::{Diagnostic, miette};
9
10use super::ParseError;
11
12#[derive(Debug, Copy, Clone, PartialEq, Eq)]
16pub enum LiteralErrorKind {
17 Empty,
19 InvalidDigit,
21 U32Overflow,
23 FeltOverflow,
25 InvalidBitSize,
27}
28
29impl fmt::Display for LiteralErrorKind {
30 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
31 match self {
32 Self::Empty => f.write_str("input was empty"),
33 Self::InvalidDigit => f.write_str("invalid digit"),
34 Self::U32Overflow => f.write_str("value overflowed the u32 range"),
35 Self::FeltOverflow => f.write_str("value overflowed the field modulus"),
36 Self::InvalidBitSize => {
37 f.write_str("expected value to be a valid bit size, e.g. 0..63")
38 },
39 }
40 }
41}
42
43#[derive(Debug, Copy, Clone, PartialEq, Eq)]
47pub enum HexErrorKind {
48 MissingDigits,
50 Invalid,
53 Overflow,
55 TooLong,
57}
58
59impl fmt::Display for HexErrorKind {
60 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
61 match self {
62 Self::MissingDigits => {
63 f.write_str("expected number of hex digits to be a multiple of 2")
64 },
65 Self::Invalid => f.write_str("expected 2, 4, 8, 16, or 64 hex digits"),
66 Self::Overflow => f.write_str("value overflowed the field modulus"),
67 Self::TooLong => f.write_str(
68 "value has too many digits, long hex strings must contain exactly 64 digits",
69 ),
70 }
71 }
72}
73
74#[derive(Debug, Copy, Clone, PartialEq, Eq)]
78pub enum BinErrorKind {
79 TooLong,
81}
82
83impl fmt::Display for BinErrorKind {
84 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
85 match self {
86 Self::TooLong => f.write_str(
87 "value has too many digits, binary string can contain no more than 32 digits",
88 ),
89 }
90 }
91}
92
93#[derive(Debug, Default, thiserror::Error, Diagnostic)]
97#[repr(u8)]
98pub enum ParsingError {
99 #[default]
100 #[error("parsing failed due to unexpected input")]
101 #[diagnostic()]
102 Failed = 0,
103 #[error("expected input to be valid utf8, but invalid byte sequences were found")]
104 #[diagnostic()]
105 InvalidUtf8 {
106 #[label("invalid byte sequence starts here")]
107 span: SourceSpan,
108 },
109 #[error(
110 "expected input to be valid utf8, but end-of-file was reached before final codepoint was read"
111 )]
112 #[diagnostic()]
113 IncompleteUtf8 {
114 #[label("the codepoint starting here is incomplete")]
115 span: SourceSpan,
116 },
117 #[error("invalid syntax")]
118 #[diagnostic()]
119 InvalidToken {
120 #[label("occurs here")]
121 span: SourceSpan,
122 },
123 #[error("invalid syntax")]
124 #[diagnostic(help("expected {}", expected.as_slice().join(", or ")))]
125 UnrecognizedToken {
126 #[label("found a {token} here")]
127 span: SourceSpan,
128 token: String,
129 expected: Vec<String>,
130 },
131 #[error("unexpected trailing tokens")]
132 #[diagnostic()]
133 ExtraToken {
134 #[label("{token} was found here, but was not expected")]
135 span: SourceSpan,
136 token: String,
137 },
138 #[error("unexpected end of file")]
139 #[diagnostic(help("expected {}", expected.as_slice().join(", or ")))]
140 UnrecognizedEof {
141 #[label("reached end of file here")]
142 span: SourceSpan,
143 expected: Vec<String>,
144 },
145 #[error("{error}")]
146 #[diagnostic(help(
147 "bare identifiers must be lowercase alphanumeric with '_', quoted identifiers can include any graphical character"
148 ))]
149 InvalidIdentifier {
150 #[source]
151 #[diagnostic(source)]
152 error: crate::ast::IdentError,
153 #[label]
154 span: SourceSpan,
155 },
156 #[error("unclosed quoted identifier")]
157 #[diagnostic()]
158 UnclosedQuote {
159 #[label("no match for quotation mark starting here")]
160 start: SourceSpan,
161 },
162 #[error("too many instructions in a single code block")]
163 #[diagnostic()]
164 CodeBlockTooBig {
165 #[label]
166 span: SourceSpan,
167 },
168 #[error("invalid constant expression: division by zero")]
169 DivisionByZero {
170 #[label]
171 span: SourceSpan,
172 },
173 #[error("unexpected string in an arithmetic expression")]
174 #[diagnostic()]
175 StringInArithmeticExpression {
176 #[label]
177 span: SourceSpan,
178 },
179 #[error("doc comment is too large")]
180 #[diagnostic(help("make sure it is less than u16::MAX bytes in length"))]
181 DocsTooLarge {
182 #[label]
183 span: SourceSpan,
184 },
185 #[error("invalid literal: {}", kind)]
186 #[diagnostic()]
187 InvalidLiteral {
188 #[label]
189 span: SourceSpan,
190 kind: LiteralErrorKind,
191 },
192 #[error("invalid literal: {}", kind)]
193 #[diagnostic()]
194 InvalidHexLiteral {
195 #[label]
196 span: SourceSpan,
197 kind: HexErrorKind,
198 },
199 #[error("invalid literal: {}", kind)]
200 #[diagnostic()]
201 InvalidBinaryLiteral {
202 #[label]
203 span: SourceSpan,
204 kind: BinErrorKind,
205 },
206 #[error("invalid MAST root literal")]
207 InvalidMastRoot {
208 #[label]
209 span: SourceSpan,
210 },
211 #[error("invalid library path: {}", message)]
212 InvalidLibraryPath {
213 #[label]
214 span: SourceSpan,
215 message: String,
216 },
217 #[error("invalid immediate: value must be in the range {}..{} (exclusive)", range.start, range.end)]
218 ImmediateOutOfRange {
219 #[label]
220 span: SourceSpan,
221 range: Range<usize>,
222 },
223 #[error("too many procedures in this module")]
224 #[diagnostic()]
225 ModuleTooLarge {
226 #[label]
227 span: SourceSpan,
228 },
229 #[error("too many re-exported procedures in this module")]
230 #[diagnostic()]
231 ModuleTooManyReexports {
232 #[label]
233 span: SourceSpan,
234 },
235 #[error(
236 "too many operands for `push`: tried to push {} elements, but only 16 can be pushed at one time",
237 count
238 )]
239 #[diagnostic()]
240 PushOverflow {
241 #[label]
242 span: SourceSpan,
243 count: usize,
244 },
245 #[error("expected a fully-qualified module path, e.g. `std::u64`")]
246 UnqualifiedImport {
247 #[label]
248 span: SourceSpan,
249 },
250 #[error(
251 "re-exporting a procedure identified by digest requires giving it a name, e.g. `export.DIGEST->foo`"
252 )]
253 UnnamedReexportOfMastRoot {
254 #[label]
255 span: SourceSpan,
256 },
257 #[error("conflicting attributes for procedure definition")]
258 #[diagnostic()]
259 AttributeConflict {
260 #[label(
261 "conflict occurs because an attribute with the same name has already been defined"
262 )]
263 span: SourceSpan,
264 #[label("previously defined here")]
265 prev: SourceSpan,
266 },
267 #[error("conflicting key-value attributes for procedure definition")]
268 #[diagnostic()]
269 AttributeKeyValueConflict {
270 #[label(
271 "conflict occurs because a key with the same name has already been set in a previous declaration"
272 )]
273 span: SourceSpan,
274 #[label("previously defined here")]
275 prev: SourceSpan,
276 },
277 #[error("invalid Advice Map key")]
278 #[diagnostic()]
279 InvalidAdvMapKey {
280 #[label(
281 "an Advice Map key must be a word, either in 64-character hex format or in array-like format `[f0,f1,f2,f3]`"
282 )]
283 #[label]
284 span: SourceSpan,
285 },
286}
287
288impl ParsingError {
289 fn tag(&self) -> u8 {
290 unsafe { *<*const _>::from(self).cast::<u8>() }
297 }
298}
299
300impl Eq for ParsingError {}
301
302impl PartialEq for ParsingError {
303 fn eq(&self, other: &Self) -> bool {
304 match (self, other) {
305 (Self::Failed, Self::Failed) => true,
306 (Self::InvalidLiteral { kind: l, .. }, Self::InvalidLiteral { kind: r, .. }) => l == r,
307 (Self::InvalidHexLiteral { kind: l, .. }, Self::InvalidHexLiteral { kind: r, .. }) => {
308 l == r
309 },
310 (
311 Self::InvalidLibraryPath { message: l, .. },
312 Self::InvalidLibraryPath { message: r, .. },
313 ) => l == r,
314 (
315 Self::ImmediateOutOfRange { range: l, .. },
316 Self::ImmediateOutOfRange { range: r, .. },
317 ) => l == r,
318 (Self::PushOverflow { count: l, .. }, Self::PushOverflow { count: r, .. }) => l == r,
319 (
320 Self::UnrecognizedToken { token: ltok, expected: lexpect, .. },
321 Self::UnrecognizedToken { token: rtok, expected: rexpect, .. },
322 ) => ltok == rtok && lexpect == rexpect,
323 (Self::ExtraToken { token: ltok, .. }, Self::ExtraToken { token: rtok, .. }) => {
324 ltok == rtok
325 },
326 (
327 Self::UnrecognizedEof { expected: lexpect, .. },
328 Self::UnrecognizedEof { expected: rexpect, .. },
329 ) => lexpect == rexpect,
330 (x, y) => x.tag() == y.tag(),
331 }
332 }
333}
334
335impl ParsingError {
336 pub fn from_utf8_error(source_id: SourceId, err: core::str::Utf8Error) -> Self {
337 let start = u32::try_from(err.valid_up_to()).ok().unwrap_or(u32::MAX);
338 match err.error_len() {
339 None => Self::IncompleteUtf8 { span: SourceSpan::at(source_id, start) },
340 Some(len) => Self::InvalidUtf8 {
341 span: SourceSpan::new(source_id, start..(start + len as u32)),
342 },
343 }
344 }
345
346 pub fn from_parse_error(source_id: SourceId, err: ParseError<'_>) -> Self {
347 use super::Token;
348
349 match err {
350 ParseError::InvalidToken { location: at } => {
351 Self::InvalidToken { span: SourceSpan::at(source_id, at) }
352 },
353 ParseError::UnrecognizedToken { token: (l, Token::Eof, r), expected } => {
354 Self::UnrecognizedEof {
355 span: SourceSpan::new(source_id, l..r),
356 expected: simplify_expected_tokens(expected),
357 }
358 },
359 ParseError::UnrecognizedToken { token: (l, tok, r), expected } => {
360 Self::UnrecognizedToken {
361 span: SourceSpan::new(source_id, l..r),
362 token: tok.to_string(),
363 expected: simplify_expected_tokens(expected),
364 }
365 },
366 ParseError::ExtraToken { token: (l, tok, r) } => Self::ExtraToken {
367 span: SourceSpan::new(source_id, l..r),
368 token: tok.to_string(),
369 },
370 ParseError::UnrecognizedEof { location: at, expected } => Self::UnrecognizedEof {
371 span: SourceSpan::new(source_id, at..at),
372 expected: simplify_expected_tokens(expected),
373 },
374 ParseError::User { error } => error,
375 }
376 }
377}
378
379fn simplify_expected_tokens(expected: Vec<String>) -> Vec<String> {
386 use super::Token;
387 let mut has_instruction = false;
388 let mut has_ctrl = false;
389 expected
390 .into_iter()
391 .filter_map(|t| {
392 let tok = match t.as_str() {
393 "bare_ident" => return Some("identifier".to_string()),
394 "const_ident" => return Some("constant identifier".to_string()),
395 "quoted_ident" => return Some("quoted identifier".to_string()),
396 "doc_comment" => return Some("doc comment".to_string()),
397 "hex_value" => return Some("hex-encoded literal".to_string()),
398 "bin_value" => return Some("bin-encoded literal".to_string()),
399 "uint" => return Some("integer literal".to_string()),
400 "EOF" => return Some("end of file".to_string()),
401 other => other[1..].strip_suffix('"').and_then(Token::parse),
402 };
403 match tok {
404 Some(Token::If | Token::While | Token::Repeat) => {
405 if !has_ctrl {
406 has_ctrl = true;
407 Some("control flow opcode (e.g. \"if.true\")".to_string())
408 } else {
409 None
410 }
411 },
412 Some(tok) if tok.is_instruction() => {
413 if !has_instruction {
414 has_instruction = true;
415 Some("primitive opcode (e.g. \"add\")".to_string())
416 } else {
417 None
418 }
419 },
420 _ => Some(t),
421 }
422 })
423 .collect()
424}