1macro_rules! span {
3 ($id:expr, $l:expr, $r:expr) => {
4 ::miden_debug_types::SourceSpan::new($id, $l..$r)
5 };
6 ($id:expr, $i:expr) => {
7 ::miden_debug_types::SourceSpan::at($id, $i)
8 };
9}
10
11lalrpop_util::lalrpop_mod!(
12 #[expect(clippy::all)]
13 grammar,
14 "/parser/grammar.rs"
15);
16
17mod error;
18mod lexer;
19mod scanner;
20mod token;
21
22use alloc::{boxed::Box, collections::BTreeSet, string::ToString, sync::Arc, vec::Vec};
23
24use miden_debug_types::{SourceFile, SourceLanguage, SourceManager, Uri};
25use miden_utils_diagnostics::Report;
26
27pub use self::{
28 error::{BinErrorKind, HexErrorKind, LiteralErrorKind, ParsingError},
29 lexer::Lexer,
30 scanner::Scanner,
31 token::{BinEncodedValue, DocumentationType, IntValue, PushValue, Token, WordValue},
32};
33use crate::{Path, ast, sema};
34
35type ParseError<'a> = lalrpop_util::ParseError<u32, Token<'a>, ParsingError>;
39
40#[derive(Default)]
46pub struct ModuleParser {
47 kind: ast::ModuleKind,
52 interned: BTreeSet<Arc<str>>,
69 warnings_as_errors: bool,
71}
72
73impl ModuleParser {
74 pub fn new(kind: ast::ModuleKind) -> Self {
76 Self {
77 kind,
78 interned: Default::default(),
79 warnings_as_errors: false,
80 }
81 }
82
83 pub fn set_warnings_as_errors(&mut self, yes: bool) {
85 self.warnings_as_errors = yes;
86 }
87
88 pub fn parse(
90 &mut self,
91 path: impl AsRef<Path>,
92 source: Arc<SourceFile>,
93 source_manager: Arc<dyn SourceManager>,
94 ) -> Result<Box<ast::Module>, Report> {
95 let path = path.as_ref();
96 if let Err(err) = Path::validate(path.as_str()) {
97 return Err(Report::msg(err.to_string()).with_source_code(source.clone()));
98 }
99 let forms = parse_forms_internal(source.clone(), &mut self.interned)
100 .map_err(|err| Report::new(err).with_source_code(source.clone()))?;
101 sema::analyze(source, self.kind, path, forms, self.warnings_as_errors, source_manager)
102 .map_err(Report::new)
103 }
104
105 #[cfg(feature = "std")]
107 pub fn parse_file<N, P>(
108 &mut self,
109 name: N,
110 path: P,
111 source_manager: Arc<dyn SourceManager>,
112 ) -> Result<Box<ast::Module>, Report>
113 where
114 N: AsRef<Path>,
115 P: AsRef<std::path::Path>,
116 {
117 use miden_debug_types::SourceManagerExt;
118 use miden_utils_diagnostics::{IntoDiagnostic, WrapErr};
119
120 let path = path.as_ref();
121 let source_file = source_manager
122 .load_file(path)
123 .into_diagnostic()
124 .wrap_err_with(|| format!("failed to load source file from '{}'", path.display()))?;
125 self.parse(name, source_file, source_manager)
126 }
127
128 pub fn parse_str(
130 &mut self,
131 name: impl AsRef<Path>,
132 source: impl ToString,
133 source_manager: Arc<dyn SourceManager>,
134 ) -> Result<Box<ast::Module>, Report> {
135 use miden_debug_types::SourceContent;
136
137 let name = name.as_ref();
138 let uri = Uri::from(name.as_str().to_string().into_boxed_str());
139 let content = SourceContent::new(
140 SourceLanguage::Masm,
141 uri.clone(),
142 source.to_string().into_boxed_str(),
143 );
144 let source_file = source_manager.load_from_raw_parts(uri, content);
145 self.parse(name, source_file, source_manager)
146 }
147}
148
149#[cfg(any(test, feature = "testing"))]
154pub fn parse_forms(source: Arc<SourceFile>) -> Result<Vec<ast::Form>, ParsingError> {
155 let mut interned = BTreeSet::default();
156 parse_forms_internal(source, &mut interned)
157}
158
159fn parse_forms_internal(
164 source: Arc<SourceFile>,
165 interned: &mut BTreeSet<Arc<str>>,
166) -> Result<Vec<ast::Form>, ParsingError> {
167 let source_id = source.id();
168 let scanner = Scanner::new(source.as_str());
169 let lexer = Lexer::new(source_id, scanner);
170 let felt_type = Arc::new(ast::types::ArrayType::new(ast::types::Type::Felt, 4));
171 grammar::FormsParser::new()
172 .parse(source_id, interned, &felt_type, core::marker::PhantomData, lexer)
173 .map_err(|err| ParsingError::from_parse_error(source_id, err))
174}
175
176#[cfg(feature = "std")]
187pub fn read_modules_from_dir(
188 dir: impl AsRef<std::path::Path>,
189 namespace: impl AsRef<Path>,
190 source_manager: Arc<dyn SourceManager>,
191 warnings_as_errors: bool,
192) -> Result<impl Iterator<Item = Box<ast::Module>>, Report> {
193 use std::collections::{BTreeMap, btree_map::Entry};
194
195 use miden_utils_diagnostics::{IntoDiagnostic, WrapErr, report};
196 use module_walker::{ModuleEntry, WalkModules};
197
198 let dir = dir.as_ref();
199 if !dir.is_dir() {
200 return Err(report!("the provided path '{}' is not a valid directory", dir.display()));
201 }
202
203 if dir.join(ast::Module::ROOT_FILENAME).exists() {
205 return Err(report!("{} is not allowed in the root directory", ast::Module::ROOT_FILENAME));
206 }
207
208 let mut modules = BTreeMap::default();
209
210 let walker = WalkModules::new(namespace.as_ref().to_path_buf(), dir)
211 .into_diagnostic()
212 .wrap_err_with(|| format!("failed to load modules from '{}'", dir.display()))?;
213 for entry in walker {
214 let ModuleEntry { mut name, source_path } = entry?;
215 if name.last().unwrap() == ast::Module::ROOT {
216 name.pop();
217 }
218
219 let mut parser = ModuleParser::new(ast::ModuleKind::Library);
221 parser.set_warnings_as_errors(warnings_as_errors);
222 let ast = parser.parse_file(&name, &source_path, source_manager.clone())?;
223 match modules.entry(name) {
224 Entry::Occupied(ref entry) => {
225 return Err(report!("duplicate module '{0}'", entry.key().clone()));
226 },
227 Entry::Vacant(entry) => {
228 entry.insert(ast);
229 },
230 }
231 }
232
233 Ok(modules.into_values())
234}
235
236#[cfg(feature = "std")]
237mod module_walker {
238 use std::{
239 ffi::OsStr,
240 fs::{self, DirEntry, FileType},
241 io,
242 path::{Path, PathBuf},
243 };
244
245 use miden_utils_diagnostics::{IntoDiagnostic, Report, report};
246
247 use crate::{Path as LibraryPath, PathBuf as LibraryPathBuf, ast::Module};
248
249 pub struct ModuleEntry {
250 pub name: LibraryPathBuf,
251 pub source_path: PathBuf,
252 }
253
254 pub struct WalkModules<'a> {
255 namespace: LibraryPathBuf,
256 root: &'a Path,
257 stack: alloc::collections::VecDeque<io::Result<DirEntry>>,
258 }
259
260 impl<'a> WalkModules<'a> {
261 pub fn new(namespace: LibraryPathBuf, path: &'a Path) -> io::Result<Self> {
262 use alloc::collections::VecDeque;
263
264 let stack = VecDeque::from_iter(fs::read_dir(path)?);
265
266 Ok(Self { namespace, root: path, stack })
267 }
268
269 fn next_entry(
270 &mut self,
271 entry: &DirEntry,
272 ty: &FileType,
273 ) -> Result<Option<ModuleEntry>, Report> {
274 if ty.is_dir() {
275 let dir = entry.path();
276 self.stack.extend(fs::read_dir(dir).into_diagnostic()?);
277 return Ok(None);
278 }
279
280 let mut file_path = entry.path();
281 let is_module = file_path
282 .extension()
283 .map(|ext| ext == AsRef::<OsStr>::as_ref(Module::FILE_EXTENSION))
284 .unwrap_or(false);
285 if !is_module {
286 return Ok(None);
287 }
288
289 file_path.set_extension("");
291 if file_path.is_dir() {
292 return Err(report!(
293 "file and directory with same name are not allowed: {}",
294 file_path.display()
295 ));
296 }
297 let relative_path = file_path
298 .strip_prefix(self.root)
299 .expect("expected path to be a child of the root directory");
300
301 let mut libpath = self.namespace.clone();
303 for component in relative_path.iter() {
304 let component = component.to_str().ok_or_else(|| {
305 let p = entry.path();
306 report!("{} is an invalid directory entry", p.display())
307 })?;
308 LibraryPath::validate(component).into_diagnostic()?;
309 libpath.push(component);
310 }
311 Ok(Some(ModuleEntry { name: libpath, source_path: entry.path() }))
312 }
313 }
314
315 impl Iterator for WalkModules<'_> {
316 type Item = Result<ModuleEntry, Report>;
317
318 fn next(&mut self) -> Option<Self::Item> {
319 loop {
320 let entry = self
321 .stack
322 .pop_front()?
323 .and_then(|entry| entry.file_type().map(|ft| (entry, ft)))
324 .into_diagnostic();
325
326 match entry {
327 Ok((ref entry, ref file_type)) => {
328 match self.next_entry(entry, file_type).transpose() {
329 None => continue,
330 result => break result,
331 }
332 },
333 Err(err) => break Some(Err(err)),
334 }
335 }
336 }
337 }
338}
339
340#[cfg(test)]
344mod tests {
345 use miden_core::assert_matches;
346 use miden_debug_types::SourceId;
347
348 use super::*;
349
350 #[test]
352 fn lex_exp() {
353 let source_id = SourceId::default();
354 let scanner = Scanner::new("begin exp.u9 end");
355 let mut lexer = Lexer::new(source_id, scanner).map(|result| result.map(|(_, t, _)| t));
356 assert_matches!(lexer.next(), Some(Ok(Token::Begin)));
357 assert_matches!(lexer.next(), Some(Ok(Token::ExpU)));
358 assert_matches!(lexer.next(), Some(Ok(Token::Int(n))) if n == 9);
359 assert_matches!(lexer.next(), Some(Ok(Token::End)));
360 }
361
362 #[test]
363 fn lex_block() {
364 let source_id = SourceId::default();
365 let scanner = Scanner::new(
366 "\
367const ERR1 = 1
368
369begin
370 u32assertw
371 u32assertw.err=ERR1
372 u32assertw.err=2
373end
374",
375 );
376 let mut lexer = Lexer::new(source_id, scanner).map(|result| result.map(|(_, t, _)| t));
377 assert_matches!(lexer.next(), Some(Ok(Token::Const)));
378 assert_matches!(lexer.next(), Some(Ok(Token::ConstantIdent("ERR1"))));
379 assert_matches!(lexer.next(), Some(Ok(Token::Equal)));
380 assert_matches!(lexer.next(), Some(Ok(Token::Int(1))));
381 assert_matches!(lexer.next(), Some(Ok(Token::Begin)));
382 assert_matches!(lexer.next(), Some(Ok(Token::U32Assertw)));
383 assert_matches!(lexer.next(), Some(Ok(Token::U32Assertw)));
384 assert_matches!(lexer.next(), Some(Ok(Token::Dot)));
385 assert_matches!(lexer.next(), Some(Ok(Token::Err)));
386 assert_matches!(lexer.next(), Some(Ok(Token::Equal)));
387 assert_matches!(lexer.next(), Some(Ok(Token::ConstantIdent("ERR1"))));
388 assert_matches!(lexer.next(), Some(Ok(Token::U32Assertw)));
389 assert_matches!(lexer.next(), Some(Ok(Token::Dot)));
390 assert_matches!(lexer.next(), Some(Ok(Token::Err)));
391 assert_matches!(lexer.next(), Some(Ok(Token::Equal)));
392 assert_matches!(lexer.next(), Some(Ok(Token::Int(2))));
393 assert_matches!(lexer.next(), Some(Ok(Token::End)));
394 assert_matches!(lexer.next(), Some(Ok(Token::Eof)));
395 }
396
397 #[test]
398 fn lex_emit() {
399 let source_id = SourceId::default();
400 let scanner = Scanner::new(
401 "\
402begin
403 push.1
404 emit.event(\"abc\")
405end
406",
407 );
408 let mut lexer = Lexer::new(source_id, scanner).map(|result| result.map(|(_, t, _)| t));
409 assert_matches!(lexer.next(), Some(Ok(Token::Begin)));
410 assert_matches!(lexer.next(), Some(Ok(Token::Push)));
411 assert_matches!(lexer.next(), Some(Ok(Token::Dot)));
412 assert_matches!(lexer.next(), Some(Ok(Token::Int(1))));
413 assert_matches!(lexer.next(), Some(Ok(Token::Emit)));
414 assert_matches!(lexer.next(), Some(Ok(Token::Dot)));
415 assert_matches!(lexer.next(), Some(Ok(Token::Event)));
416 assert_matches!(lexer.next(), Some(Ok(Token::Lparen)));
417 assert_matches!(lexer.next(), Some(Ok(Token::QuotedIdent("abc"))));
418 assert_matches!(lexer.next(), Some(Ok(Token::Rparen)));
419 assert_matches!(lexer.next(), Some(Ok(Token::End)));
420 assert_matches!(lexer.next(), Some(Ok(Token::Eof)));
421 }
422
423 #[test]
424 fn lex_invalid_token_after_whitespace_returns_error() {
425 let source_id = SourceId::default();
426 let scanner = Scanner::new("begin \u{0001}\nend\n");
427 let mut lexer = Lexer::new(source_id, scanner).map(|result| result.map(|(_, t, _)| t));
428
429 assert_matches!(lexer.next(), Some(Ok(Token::Begin)));
430 assert_matches!(
431 lexer.next(),
432 Some(Err(ParsingError::InvalidToken { span })) if span.into_range() == (6..7)
433 );
434 }
435
436 #[test]
437 fn lex_invalid_underscore_token_span() {
438 let source_id = SourceId::default();
439 let scanner = Scanner::new("begin _-\nend\n");
440 let mut lexer = Lexer::new(source_id, scanner).map(|result| result.map(|(_, t, _)| t));
441
442 assert_matches!(lexer.next(), Some(Ok(Token::Begin)));
443 assert_matches!(
444 lexer.next(),
445 Some(Err(ParsingError::InvalidToken { span })) if span.into_range() == (6..7)
446 );
447 }
448
449 #[test]
450 fn lex_single_char_token_and_ident_spans() {
451 let source_id = SourceId::default();
452 let scanner = Scanner::new("@\nA\n");
453 let mut lexer = Lexer::new(source_id, scanner);
454
455 assert_matches!(lexer.next(), Some(Ok((0, Token::At, 1))));
456 assert_matches!(lexer.next(), Some(Ok((2, Token::ConstantIdent("A"), 3))));
457 }
458
459 #[test]
460 fn overlong_path_component_is_rejected_without_panic() {
461 use std::{
462 panic::{AssertUnwindSafe, catch_unwind},
463 sync::Arc,
464 };
465
466 use crate::{
467 debuginfo::DefaultSourceManager,
468 parse::{Parse, ParseOptions},
469 };
470
471 let big_component = "a".repeat(256);
472 let source = format!("begin\n exec.{big_component}::x::foo\nend\n");
473
474 let source_manager = Arc::new(DefaultSourceManager::default());
475 let parsed = catch_unwind(AssertUnwindSafe(|| {
476 source.parse_with_options(source_manager, ParseOptions::default())
477 }));
478
479 assert!(parsed.is_ok(), "parsing panicked, expected a structured error");
480 let err = parsed.unwrap().expect_err("parsing succeeded, expected an error");
481 crate::assert_diagnostic!(err, "this reference is invalid without a corresponding import");
482 }
483}