1macro_rules! span {
3 ($id:expr, $l:expr, $r:expr) => {
4 ::miden_debug_types::SourceSpan::new($id, $l..$r)
5 };
6 ($id:expr, $i:expr) => {
7 ::miden_debug_types::SourceSpan::at($id, $i)
8 };
9}
10
11lalrpop_util::lalrpop_mod!(
12 #[expect(clippy::all)]
13 #[expect(unused_lifetimes)]
14 grammar,
15 "/parser/grammar.rs"
16);
17
18mod error;
19mod lexer;
20mod scanner;
21mod token;
22
23use alloc::{boxed::Box, collections::BTreeSet, string::ToString, sync::Arc, vec::Vec};
24
25use miden_debug_types::{SourceFile, SourceLanguage, SourceManager, Uri};
26use miden_utils_diagnostics::Report;
27
28pub use self::{
29 error::{BinErrorKind, HexErrorKind, LiteralErrorKind, ParsingError},
30 lexer::Lexer,
31 scanner::Scanner,
32 token::{BinEncodedValue, DocumentationType, IntValue, PushValue, Token, WordValue},
33};
34use crate::{Path, ast, sema};
35
36type ParseError<'a> = lalrpop_util::ParseError<u32, Token<'a>, ParsingError>;
40
41#[derive(Default)]
47pub struct ModuleParser {
48 kind: ast::ModuleKind,
53 interned: BTreeSet<Arc<str>>,
70 warnings_as_errors: bool,
72}
73
74impl ModuleParser {
75 pub fn new(kind: ast::ModuleKind) -> Self {
77 Self {
78 kind,
79 interned: Default::default(),
80 warnings_as_errors: false,
81 }
82 }
83
84 pub fn set_warnings_as_errors(&mut self, yes: bool) {
86 self.warnings_as_errors = yes;
87 }
88
89 pub fn parse(
91 &mut self,
92 path: impl AsRef<Path>,
93 source: Arc<SourceFile>,
94 source_manager: Arc<dyn SourceManager>,
95 ) -> Result<Box<ast::Module>, Report> {
96 let path = path.as_ref();
97 if let Err(err) = Path::validate(path.as_str()) {
98 return Err(Report::msg(err.to_string()).with_source_code(source.clone()));
99 }
100 let forms = parse_forms_internal(source.clone(), &mut self.interned)
101 .map_err(|err| Report::new(err).with_source_code(source.clone()))?;
102 sema::analyze(source, self.kind, path, forms, self.warnings_as_errors, source_manager)
103 .map_err(Report::new)
104 }
105
106 #[cfg(feature = "std")]
108 pub fn parse_file<N, P>(
109 &mut self,
110 name: N,
111 path: P,
112 source_manager: Arc<dyn SourceManager>,
113 ) -> Result<Box<ast::Module>, Report>
114 where
115 N: AsRef<Path>,
116 P: AsRef<std::path::Path>,
117 {
118 use miden_debug_types::SourceManagerExt;
119 use miden_utils_diagnostics::{IntoDiagnostic, WrapErr};
120
121 let path = path.as_ref();
122 let source_file = source_manager
123 .load_file(path)
124 .into_diagnostic()
125 .wrap_err_with(|| format!("failed to load source file from '{}'", path.display()))?;
126 self.parse(name, source_file, source_manager)
127 }
128
129 pub fn parse_str(
131 &mut self,
132 name: impl AsRef<Path>,
133 source: impl ToString,
134 source_manager: Arc<dyn SourceManager>,
135 ) -> Result<Box<ast::Module>, Report> {
136 use miden_debug_types::SourceContent;
137
138 let name = name.as_ref();
139 let uri = Uri::from(name.as_str().to_string().into_boxed_str());
140 let content = SourceContent::new(
141 SourceLanguage::Masm,
142 uri.clone(),
143 source.to_string().into_boxed_str(),
144 );
145 let source_file = source_manager.load_from_raw_parts(uri, content);
146 self.parse(name, source_file, source_manager)
147 }
148}
149
150#[cfg(any(test, feature = "testing"))]
155pub fn parse_forms(source: Arc<SourceFile>) -> Result<Vec<ast::Form>, ParsingError> {
156 let mut interned = BTreeSet::default();
157 parse_forms_internal(source, &mut interned)
158}
159
160fn parse_forms_internal(
165 source: Arc<SourceFile>,
166 interned: &mut BTreeSet<Arc<str>>,
167) -> Result<Vec<ast::Form>, ParsingError> {
168 let source_id = source.id();
169 let scanner = Scanner::new(source.as_str());
170 let lexer = Lexer::new(source_id, scanner);
171 let felt_type = Arc::new(ast::types::ArrayType::new(ast::types::Type::Felt, 4));
172 grammar::FormsParser::new()
173 .parse(source_id, interned, &felt_type, core::marker::PhantomData, lexer)
174 .map_err(|err| ParsingError::from_parse_error(source_id, err))
175}
176
177#[cfg(feature = "std")]
188pub fn read_modules_from_dir(
189 dir: impl AsRef<std::path::Path>,
190 namespace: impl AsRef<Path>,
191 source_manager: Arc<dyn SourceManager>,
192 warnings_as_errors: bool,
193) -> Result<impl Iterator<Item = Box<ast::Module>>, Report> {
194 use std::collections::{BTreeMap, btree_map::Entry};
195
196 use miden_utils_diagnostics::{IntoDiagnostic, WrapErr, report};
197 use module_walker::{ModuleEntry, WalkModules};
198
199 let dir = dir.as_ref();
200 if !dir.is_dir() {
201 return Err(report!("the provided path '{}' is not a valid directory", dir.display()));
202 }
203
204 if dir.join(ast::Module::ROOT_FILENAME).exists() {
206 return Err(report!("{} is not allowed in the root directory", ast::Module::ROOT_FILENAME));
207 }
208
209 let mut modules = BTreeMap::default();
210
211 let walker = WalkModules::new(namespace.as_ref().to_path_buf(), dir)
212 .into_diagnostic()
213 .wrap_err_with(|| format!("failed to load modules from '{}'", dir.display()))?;
214 for entry in walker {
215 let ModuleEntry { mut name, source_path } = entry?;
216 if name.last().unwrap() == ast::Module::ROOT {
217 name.pop();
218 }
219
220 let mut parser = ModuleParser::new(ast::ModuleKind::Library);
222 parser.set_warnings_as_errors(warnings_as_errors);
223 let ast = parser.parse_file(&name, &source_path, source_manager.clone())?;
224 match modules.entry(name) {
225 Entry::Occupied(ref entry) => {
226 return Err(report!("duplicate module '{0}'", entry.key().clone()));
227 },
228 Entry::Vacant(entry) => {
229 entry.insert(ast);
230 },
231 }
232 }
233
234 Ok(modules.into_values())
235}
236
237#[cfg(feature = "std")]
238mod module_walker {
239 use std::{
240 ffi::OsStr,
241 fs::{self, DirEntry, FileType},
242 io,
243 path::{Path, PathBuf},
244 };
245
246 use miden_utils_diagnostics::{IntoDiagnostic, Report, report};
247
248 use crate::{Path as LibraryPath, PathBuf as LibraryPathBuf, ast::Module};
249
250 pub struct ModuleEntry {
251 pub name: LibraryPathBuf,
252 pub source_path: PathBuf,
253 }
254
255 pub struct WalkModules<'a> {
256 namespace: LibraryPathBuf,
257 root: &'a Path,
258 stack: alloc::collections::VecDeque<io::Result<DirEntry>>,
259 }
260
261 impl<'a> WalkModules<'a> {
262 pub fn new(namespace: LibraryPathBuf, path: &'a Path) -> io::Result<Self> {
263 use alloc::collections::VecDeque;
264
265 let stack = VecDeque::from_iter(fs::read_dir(path)?);
266
267 Ok(Self { namespace, root: path, stack })
268 }
269
270 fn next_entry(
271 &mut self,
272 entry: &DirEntry,
273 ty: &FileType,
274 ) -> Result<Option<ModuleEntry>, Report> {
275 if ty.is_dir() {
276 let dir = entry.path();
277 self.stack.extend(fs::read_dir(dir).into_diagnostic()?);
278 return Ok(None);
279 }
280
281 let mut file_path = entry.path();
282 let is_module = file_path
283 .extension()
284 .map(|ext| ext == AsRef::<OsStr>::as_ref(Module::FILE_EXTENSION))
285 .unwrap_or(false);
286 if !is_module {
287 return Ok(None);
288 }
289
290 file_path.set_extension("");
292 if file_path.is_dir() {
293 return Err(report!(
294 "file and directory with same name are not allowed: {}",
295 file_path.display()
296 ));
297 }
298 let relative_path = file_path
299 .strip_prefix(self.root)
300 .expect("expected path to be a child of the root directory");
301
302 let mut libpath = self.namespace.clone();
304 for component in relative_path.iter() {
305 let component = component.to_str().ok_or_else(|| {
306 let p = entry.path();
307 report!("{} is an invalid directory entry", p.display())
308 })?;
309 LibraryPath::validate(component).into_diagnostic()?;
310 libpath.push(component);
311 }
312 Ok(Some(ModuleEntry { name: libpath, source_path: entry.path() }))
313 }
314 }
315
316 impl Iterator for WalkModules<'_> {
317 type Item = Result<ModuleEntry, Report>;
318
319 fn next(&mut self) -> Option<Self::Item> {
320 loop {
321 let entry = self
322 .stack
323 .pop_front()?
324 .and_then(|entry| entry.file_type().map(|ft| (entry, ft)))
325 .into_diagnostic();
326
327 match entry {
328 Ok((ref entry, ref file_type)) => {
329 match self.next_entry(entry, file_type).transpose() {
330 None => {},
331 result => break result,
332 }
333 },
334 Err(err) => break Some(Err(err)),
335 }
336 }
337 }
338 }
339}
340
341#[cfg(test)]
345mod tests {
346 use miden_core::assert_matches;
347 use miden_debug_types::SourceId;
348
349 use super::*;
350
351 #[test]
353 fn lex_exp() {
354 let source_id = SourceId::default();
355 let scanner = Scanner::new("begin exp.u9 end");
356 let mut lexer = Lexer::new(source_id, scanner).map(|result| result.map(|(_, t, _)| t));
357 assert_matches!(lexer.next(), Some(Ok(Token::Begin)));
358 assert_matches!(lexer.next(), Some(Ok(Token::ExpU)));
359 assert_matches!(lexer.next(), Some(Ok(Token::Int(n))) if n == 9);
360 assert_matches!(lexer.next(), Some(Ok(Token::End)));
361 }
362
363 #[test]
364 fn lex_block() {
365 let source_id = SourceId::default();
366 let scanner = Scanner::new(
367 "\
368const ERR1 = 1
369
370begin
371 u32assertw
372 u32assertw.err=ERR1
373 u32assertw.err=2
374end
375",
376 );
377 let mut lexer = Lexer::new(source_id, scanner).map(|result| result.map(|(_, t, _)| t));
378 assert_matches!(lexer.next(), Some(Ok(Token::Const)));
379 assert_matches!(lexer.next(), Some(Ok(Token::ConstantIdent("ERR1"))));
380 assert_matches!(lexer.next(), Some(Ok(Token::Equal)));
381 assert_matches!(lexer.next(), Some(Ok(Token::Int(1))));
382 assert_matches!(lexer.next(), Some(Ok(Token::Begin)));
383 assert_matches!(lexer.next(), Some(Ok(Token::U32Assertw)));
384 assert_matches!(lexer.next(), Some(Ok(Token::U32Assertw)));
385 assert_matches!(lexer.next(), Some(Ok(Token::Dot)));
386 assert_matches!(lexer.next(), Some(Ok(Token::Err)));
387 assert_matches!(lexer.next(), Some(Ok(Token::Equal)));
388 assert_matches!(lexer.next(), Some(Ok(Token::ConstantIdent("ERR1"))));
389 assert_matches!(lexer.next(), Some(Ok(Token::U32Assertw)));
390 assert_matches!(lexer.next(), Some(Ok(Token::Dot)));
391 assert_matches!(lexer.next(), Some(Ok(Token::Err)));
392 assert_matches!(lexer.next(), Some(Ok(Token::Equal)));
393 assert_matches!(lexer.next(), Some(Ok(Token::Int(2))));
394 assert_matches!(lexer.next(), Some(Ok(Token::End)));
395 assert_matches!(lexer.next(), Some(Ok(Token::Eof)));
396 }
397
398 #[test]
399 fn lex_emit() {
400 let source_id = SourceId::default();
401 let scanner = Scanner::new(
402 "\
403begin
404 push.1
405 emit.event(\"abc\")
406end
407",
408 );
409 let mut lexer = Lexer::new(source_id, scanner).map(|result| result.map(|(_, t, _)| t));
410 assert_matches!(lexer.next(), Some(Ok(Token::Begin)));
411 assert_matches!(lexer.next(), Some(Ok(Token::Push)));
412 assert_matches!(lexer.next(), Some(Ok(Token::Dot)));
413 assert_matches!(lexer.next(), Some(Ok(Token::Int(1))));
414 assert_matches!(lexer.next(), Some(Ok(Token::Emit)));
415 assert_matches!(lexer.next(), Some(Ok(Token::Dot)));
416 assert_matches!(lexer.next(), Some(Ok(Token::Event)));
417 assert_matches!(lexer.next(), Some(Ok(Token::Lparen)));
418 assert_matches!(lexer.next(), Some(Ok(Token::QuotedIdent("abc"))));
419 assert_matches!(lexer.next(), Some(Ok(Token::Rparen)));
420 assert_matches!(lexer.next(), Some(Ok(Token::End)));
421 assert_matches!(lexer.next(), Some(Ok(Token::Eof)));
422 }
423
424 #[test]
425 fn lex_invalid_token_after_whitespace_returns_error() {
426 let source_id = SourceId::default();
427 let scanner = Scanner::new("begin \u{0001}\nend\n");
428 let mut lexer = Lexer::new(source_id, scanner).map(|result| result.map(|(_, t, _)| t));
429
430 assert_matches!(lexer.next(), Some(Ok(Token::Begin)));
431 assert_matches!(
432 lexer.next(),
433 Some(Err(ParsingError::InvalidToken { span })) if span.into_range() == (6..7)
434 );
435 }
436
437 #[test]
438 fn lex_invalid_underscore_token_span() {
439 let source_id = SourceId::default();
440 let scanner = Scanner::new("begin _-\nend\n");
441 let mut lexer = Lexer::new(source_id, scanner).map(|result| result.map(|(_, t, _)| t));
442
443 assert_matches!(lexer.next(), Some(Ok(Token::Begin)));
444 assert_matches!(
445 lexer.next(),
446 Some(Err(ParsingError::InvalidToken { span })) if span.into_range() == (6..7)
447 );
448 }
449
450 #[test]
451 fn lex_single_char_token_and_ident_spans() {
452 let source_id = SourceId::default();
453 let scanner = Scanner::new("@\nA\n");
454 let mut lexer = Lexer::new(source_id, scanner);
455
456 assert_matches!(lexer.next(), Some(Ok((0, Token::At, 1))));
457 assert_matches!(lexer.next(), Some(Ok((2, Token::ConstantIdent("A"), 3))));
458 }
459
460 #[test]
461 fn overlong_path_component_is_rejected_without_panic() {
462 use std::{
463 panic::{AssertUnwindSafe, catch_unwind},
464 sync::Arc,
465 };
466
467 use crate::{
468 debuginfo::DefaultSourceManager,
469 parse::{Parse, ParseOptions},
470 };
471
472 let big_component = "a".repeat(256);
473 let source = format!("begin\n exec.{big_component}::x::foo\nend\n");
474
475 let source_manager = Arc::new(DefaultSourceManager::default());
476 let parsed = catch_unwind(AssertUnwindSafe(|| {
477 source.parse_with_options(source_manager, ParseOptions::default())
478 }));
479
480 assert!(parsed.is_ok(), "parsing panicked, expected a structured error");
481 let err = parsed.unwrap().expect_err("parsing succeeded, expected an error");
482 crate::assert_diagnostic!(err, "this reference is invalid without a corresponding import");
483 }
484}