1macro_rules! span {
3 ($id:expr, $l:expr, $r:expr) => {
4 ::miden_debug_types::SourceSpan::new($id, $l..$r)
5 };
6 ($id:expr, $i:expr) => {
7 ::miden_debug_types::SourceSpan::at($id, $i)
8 };
9}
10
11lalrpop_util::lalrpop_mod!(
12 #[expect(clippy::all)]
13 #[expect(clippy::redundant_closure_for_method_calls)]
14 #[expect(clippy::trivially_copy_pass_by_ref)]
15 #[expect(unused_lifetimes)]
16 #[expect(unused_qualifications)]
17 grammar,
18 "/parser/grammar.rs"
19);
20
21mod error;
22mod lexer;
23mod scanner;
24mod token;
25
26use alloc::{boxed::Box, collections::BTreeSet, string::ToString, sync::Arc, vec::Vec};
27
28use miden_debug_types::{SourceFile, SourceLanguage, SourceManager, Uri};
29use miden_utils_diagnostics::Report;
30
31pub use self::{
32 error::{BinErrorKind, HexErrorKind, LiteralErrorKind, ParsingError},
33 lexer::Lexer,
34 scanner::Scanner,
35 token::{BinEncodedValue, DocumentationType, IntValue, PushValue, Token, WordValue},
36};
37use crate::{Path, ast, sema};
38
39type ParseError<'a> = lalrpop_util::ParseError<u32, Token<'a>, ParsingError>;
43
44#[derive(Default)]
50pub struct ModuleParser {
51 kind: ast::ModuleKind,
56 interned: BTreeSet<Arc<str>>,
73 warnings_as_errors: bool,
75}
76
77impl ModuleParser {
78 pub fn new(kind: ast::ModuleKind) -> Self {
80 Self {
81 kind,
82 interned: Default::default(),
83 warnings_as_errors: false,
84 }
85 }
86
87 pub fn set_warnings_as_errors(&mut self, yes: bool) {
89 self.warnings_as_errors = yes;
90 }
91
92 pub fn parse(
94 &mut self,
95 path: impl AsRef<Path>,
96 source: Arc<SourceFile>,
97 source_manager: Arc<dyn SourceManager>,
98 ) -> Result<Box<ast::Module>, Report> {
99 let path = path.as_ref();
100 if let Err(err) = Path::validate(path.as_str()) {
101 return Err(Report::msg(err.to_string()).with_source_code(source));
102 }
103 let forms = parse_forms_internal(source.clone(), &mut self.interned)
104 .map_err(|err| Report::new(err).with_source_code(source.clone()))?;
105 sema::analyze(source, self.kind, path, forms, self.warnings_as_errors, source_manager)
106 .map_err(Report::new)
107 }
108
109 #[cfg(feature = "std")]
111 pub fn parse_file<N, P>(
112 &mut self,
113 name: N,
114 path: P,
115 source_manager: Arc<dyn SourceManager>,
116 ) -> Result<Box<ast::Module>, Report>
117 where
118 N: AsRef<Path>,
119 P: AsRef<std::path::Path>,
120 {
121 use miden_debug_types::SourceManagerExt;
122 use miden_utils_diagnostics::{IntoDiagnostic, WrapErr};
123
124 let path = path.as_ref();
125 let source_file = source_manager
126 .load_file(path)
127 .into_diagnostic()
128 .wrap_err_with(|| format!("failed to load source file from '{}'", path.display()))?;
129 self.parse(name, source_file, source_manager)
130 }
131
132 pub fn parse_str(
134 &mut self,
135 name: impl AsRef<Path>,
136 source: impl ToString,
137 source_manager: Arc<dyn SourceManager>,
138 ) -> Result<Box<ast::Module>, Report> {
139 use miden_debug_types::SourceContent;
140
141 let name = name.as_ref();
142 let uri = Uri::from(name.as_str().to_string().into_boxed_str());
143 let content = SourceContent::new(
144 SourceLanguage::Masm,
145 uri.clone(),
146 source.to_string().into_boxed_str(),
147 );
148 let source_file = source_manager.load_from_raw_parts(uri, content);
149 self.parse(name, source_file, source_manager)
150 }
151}
152
153#[cfg(any(test, feature = "testing"))]
158pub fn parse_forms(source: Arc<SourceFile>) -> Result<Vec<ast::Form>, ParsingError> {
159 let mut interned = BTreeSet::default();
160 parse_forms_internal(source, &mut interned)
161}
162
163fn parse_forms_internal(
168 source: Arc<SourceFile>,
169 interned: &mut BTreeSet<Arc<str>>,
170) -> Result<Vec<ast::Form>, ParsingError> {
171 let source_id = source.id();
172 let scanner = Scanner::new(source.as_str());
173 let lexer = Lexer::new(source_id, scanner);
174 let felt_type = Arc::new(ast::types::ArrayType::new(ast::types::Type::Felt, 4));
175 grammar::FormsParser::new()
176 .parse(source_id, interned, &felt_type, core::marker::PhantomData, lexer)
177 .map_err(|err| ParsingError::from_parse_error(source_id, err))
178}
179
180#[cfg(feature = "std")]
191pub fn read_modules_from_dir(
192 dir: impl AsRef<std::path::Path>,
193 namespace: impl AsRef<Path>,
194 source_manager: Arc<dyn SourceManager>,
195 warnings_as_errors: bool,
196) -> Result<impl Iterator<Item = Box<ast::Module>>, Report> {
197 use std::collections::{BTreeMap, btree_map::Entry};
198
199 use miden_utils_diagnostics::{IntoDiagnostic, WrapErr, report};
200 use module_walker::{ModuleEntry, WalkModules};
201
202 let dir = dir.as_ref();
203 if !dir.is_dir() {
204 return Err(report!("the provided path '{}' is not a valid directory", dir.display()));
205 }
206
207 if dir.join(ast::Module::ROOT_FILENAME).exists() {
209 return Err(report!("{} is not allowed in the root directory", ast::Module::ROOT_FILENAME));
210 }
211
212 let mut modules = BTreeMap::default();
213
214 let walker = WalkModules::new(namespace.as_ref().to_path_buf(), dir)
215 .into_diagnostic()
216 .wrap_err_with(|| format!("failed to load modules from '{}'", dir.display()))?;
217 for entry in walker {
218 let ModuleEntry { mut name, source_path } = entry?;
219 if name.last().unwrap() == ast::Module::ROOT {
220 name.pop();
221 }
222
223 let mut parser = ModuleParser::new(ast::ModuleKind::Library);
225 parser.set_warnings_as_errors(warnings_as_errors);
226 let ast = parser.parse_file(&name, &source_path, source_manager.clone())?;
227 match modules.entry(name) {
228 Entry::Occupied(ref entry) => {
229 return Err(report!("duplicate module '{0}'", entry.key().clone()));
230 },
231 Entry::Vacant(entry) => {
232 entry.insert(ast);
233 },
234 }
235 }
236
237 Ok(modules.into_values())
238}
239
240#[cfg(feature = "std")]
241mod module_walker {
242 use std::{
243 ffi::OsStr,
244 fs::{self, DirEntry, FileType},
245 io,
246 path::{Path, PathBuf},
247 };
248
249 use miden_utils_diagnostics::{IntoDiagnostic, Report, report};
250
251 use crate::{Path as LibraryPath, PathBuf as LibraryPathBuf, ast::Module};
252
253 pub struct ModuleEntry {
254 pub name: LibraryPathBuf,
255 pub source_path: PathBuf,
256 }
257
258 pub struct WalkModules<'a> {
259 namespace: LibraryPathBuf,
260 root: &'a Path,
261 stack: alloc::collections::VecDeque<io::Result<DirEntry>>,
262 }
263
264 impl<'a> WalkModules<'a> {
265 pub fn new(namespace: LibraryPathBuf, path: &'a Path) -> io::Result<Self> {
266 use alloc::collections::VecDeque;
267
268 let stack = VecDeque::from_iter(fs::read_dir(path)?);
269
270 Ok(Self { namespace, root: path, stack })
271 }
272
273 fn next_entry(
274 &mut self,
275 entry: &DirEntry,
276 ty: FileType,
277 ) -> Result<Option<ModuleEntry>, Report> {
278 if ty.is_dir() {
279 let dir = entry.path();
280 self.stack.extend(fs::read_dir(dir).into_diagnostic()?);
281 return Ok(None);
282 }
283
284 let mut file_path = entry.path();
285 let is_module = file_path
286 .extension()
287 .map(|ext| ext == AsRef::<OsStr>::as_ref(Module::FILE_EXTENSION))
288 .unwrap_or(false);
289 if !is_module {
290 return Ok(None);
291 }
292
293 file_path.set_extension("");
295 if file_path.is_dir() {
296 return Err(report!(
297 "file and directory with same name are not allowed: {}",
298 file_path.display()
299 ));
300 }
301 let relative_path = file_path
302 .strip_prefix(self.root)
303 .expect("expected path to be a child of the root directory");
304
305 let mut libpath = self.namespace.clone();
307 for component in relative_path.iter() {
308 let component = component.to_str().ok_or_else(|| {
309 let p = entry.path();
310 report!("{} is an invalid directory entry", p.display())
311 })?;
312 LibraryPath::validate(component).into_diagnostic()?;
313 libpath.push(component);
314 }
315 Ok(Some(ModuleEntry { name: libpath, source_path: entry.path() }))
316 }
317 }
318
319 impl Iterator for WalkModules<'_> {
320 type Item = Result<ModuleEntry, Report>;
321
322 fn next(&mut self) -> Option<Self::Item> {
323 loop {
324 let entry = self
325 .stack
326 .pop_front()?
327 .and_then(|entry| entry.file_type().map(|ft| (entry, ft)))
328 .into_diagnostic();
329
330 match entry {
331 Ok((ref entry, file_type)) => {
332 match self.next_entry(entry, file_type).transpose() {
333 None => {},
334 result => break result,
335 }
336 },
337 Err(err) => break Some(Err(err)),
338 }
339 }
340 }
341 }
342}
343
344#[cfg(test)]
348mod tests {
349 use miden_core::assert_matches;
350 use miden_debug_types::SourceId;
351
352 use super::*;
353
354 #[test]
356 fn lex_exp() {
357 let source_id = SourceId::default();
358 let scanner = Scanner::new("begin exp.u9 end");
359 let mut lexer = Lexer::new(source_id, scanner).map(|result| result.map(|(_, t, _)| t));
360 assert_matches!(lexer.next(), Some(Ok(Token::Begin)));
361 assert_matches!(lexer.next(), Some(Ok(Token::ExpU)));
362 assert_matches!(lexer.next(), Some(Ok(Token::Int(n))) if n == 9);
363 assert_matches!(lexer.next(), Some(Ok(Token::End)));
364 }
365
366 #[test]
367 fn lex_block() {
368 let source_id = SourceId::default();
369 let scanner = Scanner::new(
370 "\
371const ERR1 = 1
372
373begin
374 u32assertw
375 u32assertw.err=ERR1
376 u32assertw.err=2
377end
378",
379 );
380 let mut lexer = Lexer::new(source_id, scanner).map(|result| result.map(|(_, t, _)| t));
381 assert_matches!(lexer.next(), Some(Ok(Token::Const)));
382 assert_matches!(lexer.next(), Some(Ok(Token::ConstantIdent("ERR1"))));
383 assert_matches!(lexer.next(), Some(Ok(Token::Equal)));
384 assert_matches!(lexer.next(), Some(Ok(Token::Int(1))));
385 assert_matches!(lexer.next(), Some(Ok(Token::Begin)));
386 assert_matches!(lexer.next(), Some(Ok(Token::U32Assertw)));
387 assert_matches!(lexer.next(), Some(Ok(Token::U32Assertw)));
388 assert_matches!(lexer.next(), Some(Ok(Token::Dot)));
389 assert_matches!(lexer.next(), Some(Ok(Token::Err)));
390 assert_matches!(lexer.next(), Some(Ok(Token::Equal)));
391 assert_matches!(lexer.next(), Some(Ok(Token::ConstantIdent("ERR1"))));
392 assert_matches!(lexer.next(), Some(Ok(Token::U32Assertw)));
393 assert_matches!(lexer.next(), Some(Ok(Token::Dot)));
394 assert_matches!(lexer.next(), Some(Ok(Token::Err)));
395 assert_matches!(lexer.next(), Some(Ok(Token::Equal)));
396 assert_matches!(lexer.next(), Some(Ok(Token::Int(2))));
397 assert_matches!(lexer.next(), Some(Ok(Token::End)));
398 assert_matches!(lexer.next(), Some(Ok(Token::Eof)));
399 }
400
401 #[test]
402 fn lex_emit() {
403 let source_id = SourceId::default();
404 let scanner = Scanner::new(
405 "\
406begin
407 push.1
408 emit.event(\"abc\")
409end
410",
411 );
412 let mut lexer = Lexer::new(source_id, scanner).map(|result| result.map(|(_, t, _)| t));
413 assert_matches!(lexer.next(), Some(Ok(Token::Begin)));
414 assert_matches!(lexer.next(), Some(Ok(Token::Push)));
415 assert_matches!(lexer.next(), Some(Ok(Token::Dot)));
416 assert_matches!(lexer.next(), Some(Ok(Token::Int(1))));
417 assert_matches!(lexer.next(), Some(Ok(Token::Emit)));
418 assert_matches!(lexer.next(), Some(Ok(Token::Dot)));
419 assert_matches!(lexer.next(), Some(Ok(Token::Event)));
420 assert_matches!(lexer.next(), Some(Ok(Token::Lparen)));
421 assert_matches!(lexer.next(), Some(Ok(Token::QuotedIdent("abc"))));
422 assert_matches!(lexer.next(), Some(Ok(Token::Rparen)));
423 assert_matches!(lexer.next(), Some(Ok(Token::End)));
424 assert_matches!(lexer.next(), Some(Ok(Token::Eof)));
425 }
426
427 #[test]
428 fn lex_invalid_token_after_whitespace_returns_error() {
429 let source_id = SourceId::default();
430 let scanner = Scanner::new("begin \u{0001}\nend\n");
431 let mut lexer = Lexer::new(source_id, scanner).map(|result| result.map(|(_, t, _)| t));
432
433 assert_matches!(lexer.next(), Some(Ok(Token::Begin)));
434 assert_matches!(
435 lexer.next(),
436 Some(Err(ParsingError::InvalidToken { span })) if span.into_range() == (6..7)
437 );
438 }
439
440 #[test]
441 fn lex_invalid_underscore_token_span() {
442 let source_id = SourceId::default();
443 let scanner = Scanner::new("begin _-\nend\n");
444 let mut lexer = Lexer::new(source_id, scanner).map(|result| result.map(|(_, t, _)| t));
445
446 assert_matches!(lexer.next(), Some(Ok(Token::Begin)));
447 assert_matches!(
448 lexer.next(),
449 Some(Err(ParsingError::InvalidToken { span })) if span.into_range() == (6..7)
450 );
451 }
452
453 #[test]
454 fn lex_single_char_token_and_ident_spans() {
455 let source_id = SourceId::default();
456 let scanner = Scanner::new("@\nA\n");
457 let mut lexer = Lexer::new(source_id, scanner);
458
459 assert_matches!(lexer.next(), Some(Ok((0, Token::At, 1))));
460 assert_matches!(lexer.next(), Some(Ok((2, Token::ConstantIdent("A"), 3))));
461 }
462
463 #[test]
464 fn overlong_path_component_is_rejected_without_panic() {
465 use std::{
466 panic::{AssertUnwindSafe, catch_unwind},
467 sync::Arc,
468 };
469
470 use crate::{
471 debuginfo::DefaultSourceManager,
472 parse::{Parse, ParseOptions},
473 };
474
475 let big_component = "a".repeat(u16::MAX as usize);
476 let source = format!("begin\n exec.{big_component}::x::foo\nend\n");
477
478 let source_manager = Arc::new(DefaultSourceManager::default());
479 let parsed = catch_unwind(AssertUnwindSafe(|| {
480 source.parse_with_options(source_manager, ParseOptions::default())
481 }));
482
483 assert!(parsed.is_ok(), "parsing panicked, expected a structured error");
484 let err = parsed.unwrap().expect_err("parsing succeeded, expected an error");
485 crate::assert_diagnostic!(err, "this reference is invalid without a corresponding import");
486 }
487}