macro_rules! span {
($id:expr, $l:expr, $r:expr) => {
::miden_debug_types::SourceSpan::new($id, $l..$r)
};
($id:expr, $i:expr) => {
::miden_debug_types::SourceSpan::at($id, $i)
};
}
lalrpop_util::lalrpop_mod!(
#[expect(clippy::all)]
#[expect(unused_lifetimes)]
grammar,
"/parser/grammar.rs"
);
mod error;
mod lexer;
mod scanner;
mod token;
use alloc::{boxed::Box, collections::BTreeSet, string::ToString, sync::Arc, vec::Vec};
use miden_debug_types::{SourceFile, SourceLanguage, SourceManager, Uri};
use miden_utils_diagnostics::Report;
pub use self::{
error::{BinErrorKind, HexErrorKind, LiteralErrorKind, ParsingError},
lexer::Lexer,
scanner::Scanner,
token::{BinEncodedValue, DocumentationType, IntValue, PushValue, Token, WordValue},
};
use crate::{Path, ast, sema};
type ParseError<'a> = lalrpop_util::ParseError<u32, Token<'a>, ParsingError>;
#[derive(Default)]
pub struct ModuleParser {
kind: ast::ModuleKind,
interned: BTreeSet<Arc<str>>,
warnings_as_errors: bool,
}
impl ModuleParser {
pub fn new(kind: ast::ModuleKind) -> Self {
Self {
kind,
interned: Default::default(),
warnings_as_errors: false,
}
}
pub fn set_warnings_as_errors(&mut self, yes: bool) {
self.warnings_as_errors = yes;
}
pub fn parse(
&mut self,
path: impl AsRef<Path>,
source: Arc<SourceFile>,
source_manager: Arc<dyn SourceManager>,
) -> Result<Box<ast::Module>, Report> {
let path = path.as_ref();
if let Err(err) = Path::validate(path.as_str()) {
return Err(Report::msg(err.to_string()).with_source_code(source.clone()));
}
let forms = parse_forms_internal(source.clone(), &mut self.interned)
.map_err(|err| Report::new(err).with_source_code(source.clone()))?;
sema::analyze(source, self.kind, path, forms, self.warnings_as_errors, source_manager)
.map_err(Report::new)
}
#[cfg(feature = "std")]
pub fn parse_file<N, P>(
&mut self,
name: N,
path: P,
source_manager: Arc<dyn SourceManager>,
) -> Result<Box<ast::Module>, Report>
where
N: AsRef<Path>,
P: AsRef<std::path::Path>,
{
use miden_debug_types::SourceManagerExt;
use miden_utils_diagnostics::{IntoDiagnostic, WrapErr};
let path = path.as_ref();
let source_file = source_manager
.load_file(path)
.into_diagnostic()
.wrap_err_with(|| format!("failed to load source file from '{}'", path.display()))?;
self.parse(name, source_file, source_manager)
}
pub fn parse_str(
&mut self,
name: impl AsRef<Path>,
source: impl ToString,
source_manager: Arc<dyn SourceManager>,
) -> Result<Box<ast::Module>, Report> {
use miden_debug_types::SourceContent;
let name = name.as_ref();
let uri = Uri::from(name.as_str().to_string().into_boxed_str());
let content = SourceContent::new(
SourceLanguage::Masm,
uri.clone(),
source.to_string().into_boxed_str(),
);
let source_file = source_manager.load_from_raw_parts(uri, content);
self.parse(name, source_file, source_manager)
}
}
#[cfg(any(test, feature = "testing"))]
pub fn parse_forms(source: Arc<SourceFile>) -> Result<Vec<ast::Form>, ParsingError> {
let mut interned = BTreeSet::default();
parse_forms_internal(source, &mut interned)
}
fn parse_forms_internal(
source: Arc<SourceFile>,
interned: &mut BTreeSet<Arc<str>>,
) -> Result<Vec<ast::Form>, ParsingError> {
let source_id = source.id();
let scanner = Scanner::new(source.as_str());
let lexer = Lexer::new(source_id, scanner);
let felt_type = Arc::new(ast::types::ArrayType::new(ast::types::Type::Felt, 4));
grammar::FormsParser::new()
.parse(source_id, interned, &felt_type, core::marker::PhantomData, lexer)
.map_err(|err| ParsingError::from_parse_error(source_id, err))
}
#[cfg(feature = "std")]
pub fn read_modules_from_dir(
dir: impl AsRef<std::path::Path>,
namespace: impl AsRef<Path>,
source_manager: Arc<dyn SourceManager>,
warnings_as_errors: bool,
) -> Result<impl Iterator<Item = Box<ast::Module>>, Report> {
use std::collections::{BTreeMap, btree_map::Entry};
use miden_utils_diagnostics::{IntoDiagnostic, WrapErr, report};
use module_walker::{ModuleEntry, WalkModules};
let dir = dir.as_ref();
if !dir.is_dir() {
return Err(report!("the provided path '{}' is not a valid directory", dir.display()));
}
if dir.join(ast::Module::ROOT_FILENAME).exists() {
return Err(report!("{} is not allowed in the root directory", ast::Module::ROOT_FILENAME));
}
let mut modules = BTreeMap::default();
let walker = WalkModules::new(namespace.as_ref().to_path_buf(), dir)
.into_diagnostic()
.wrap_err_with(|| format!("failed to load modules from '{}'", dir.display()))?;
for entry in walker {
let ModuleEntry { mut name, source_path } = entry?;
if name.last().unwrap() == ast::Module::ROOT {
name.pop();
}
let mut parser = ModuleParser::new(ast::ModuleKind::Library);
parser.set_warnings_as_errors(warnings_as_errors);
let ast = parser.parse_file(&name, &source_path, source_manager.clone())?;
match modules.entry(name) {
Entry::Occupied(ref entry) => {
return Err(report!("duplicate module '{0}'", entry.key().clone()));
},
Entry::Vacant(entry) => {
entry.insert(ast);
},
}
}
Ok(modules.into_values())
}
#[cfg(feature = "std")]
mod module_walker {
use std::{
ffi::OsStr,
fs::{self, DirEntry, FileType},
io,
path::{Path, PathBuf},
};
use miden_utils_diagnostics::{IntoDiagnostic, Report, report};
use crate::{Path as LibraryPath, PathBuf as LibraryPathBuf, ast::Module};
pub struct ModuleEntry {
pub name: LibraryPathBuf,
pub source_path: PathBuf,
}
pub struct WalkModules<'a> {
namespace: LibraryPathBuf,
root: &'a Path,
stack: alloc::collections::VecDeque<io::Result<DirEntry>>,
}
impl<'a> WalkModules<'a> {
pub fn new(namespace: LibraryPathBuf, path: &'a Path) -> io::Result<Self> {
use alloc::collections::VecDeque;
let stack = VecDeque::from_iter(fs::read_dir(path)?);
Ok(Self { namespace, root: path, stack })
}
fn next_entry(
&mut self,
entry: &DirEntry,
ty: &FileType,
) -> Result<Option<ModuleEntry>, Report> {
if ty.is_dir() {
let dir = entry.path();
self.stack.extend(fs::read_dir(dir).into_diagnostic()?);
return Ok(None);
}
let mut file_path = entry.path();
let is_module = file_path
.extension()
.map(|ext| ext == AsRef::<OsStr>::as_ref(Module::FILE_EXTENSION))
.unwrap_or(false);
if !is_module {
return Ok(None);
}
file_path.set_extension("");
if file_path.is_dir() {
return Err(report!(
"file and directory with same name are not allowed: {}",
file_path.display()
));
}
let relative_path = file_path
.strip_prefix(self.root)
.expect("expected path to be a child of the root directory");
let mut libpath = self.namespace.clone();
for component in relative_path.iter() {
let component = component.to_str().ok_or_else(|| {
let p = entry.path();
report!("{} is an invalid directory entry", p.display())
})?;
LibraryPath::validate(component).into_diagnostic()?;
libpath.push(component);
}
Ok(Some(ModuleEntry { name: libpath, source_path: entry.path() }))
}
}
impl Iterator for WalkModules<'_> {
type Item = Result<ModuleEntry, Report>;
fn next(&mut self) -> Option<Self::Item> {
loop {
let entry = self
.stack
.pop_front()?
.and_then(|entry| entry.file_type().map(|ft| (entry, ft)))
.into_diagnostic();
match entry {
Ok((ref entry, ref file_type)) => {
match self.next_entry(entry, file_type).transpose() {
None => {},
result => break result,
}
},
Err(err) => break Some(Err(err)),
}
}
}
}
}
#[cfg(test)]
mod tests {
use miden_core::assert_matches;
use miden_debug_types::SourceId;
use super::*;
#[test]
fn lex_exp() {
let source_id = SourceId::default();
let scanner = Scanner::new("begin exp.u9 end");
let mut lexer = Lexer::new(source_id, scanner).map(|result| result.map(|(_, t, _)| t));
assert_matches!(lexer.next(), Some(Ok(Token::Begin)));
assert_matches!(lexer.next(), Some(Ok(Token::ExpU)));
assert_matches!(lexer.next(), Some(Ok(Token::Int(n))) if n == 9);
assert_matches!(lexer.next(), Some(Ok(Token::End)));
}
#[test]
fn lex_block() {
let source_id = SourceId::default();
let scanner = Scanner::new(
"\
const ERR1 = 1
begin
u32assertw
u32assertw.err=ERR1
u32assertw.err=2
end
",
);
let mut lexer = Lexer::new(source_id, scanner).map(|result| result.map(|(_, t, _)| t));
assert_matches!(lexer.next(), Some(Ok(Token::Const)));
assert_matches!(lexer.next(), Some(Ok(Token::ConstantIdent("ERR1"))));
assert_matches!(lexer.next(), Some(Ok(Token::Equal)));
assert_matches!(lexer.next(), Some(Ok(Token::Int(1))));
assert_matches!(lexer.next(), Some(Ok(Token::Begin)));
assert_matches!(lexer.next(), Some(Ok(Token::U32Assertw)));
assert_matches!(lexer.next(), Some(Ok(Token::U32Assertw)));
assert_matches!(lexer.next(), Some(Ok(Token::Dot)));
assert_matches!(lexer.next(), Some(Ok(Token::Err)));
assert_matches!(lexer.next(), Some(Ok(Token::Equal)));
assert_matches!(lexer.next(), Some(Ok(Token::ConstantIdent("ERR1"))));
assert_matches!(lexer.next(), Some(Ok(Token::U32Assertw)));
assert_matches!(lexer.next(), Some(Ok(Token::Dot)));
assert_matches!(lexer.next(), Some(Ok(Token::Err)));
assert_matches!(lexer.next(), Some(Ok(Token::Equal)));
assert_matches!(lexer.next(), Some(Ok(Token::Int(2))));
assert_matches!(lexer.next(), Some(Ok(Token::End)));
assert_matches!(lexer.next(), Some(Ok(Token::Eof)));
}
#[test]
fn lex_emit() {
let source_id = SourceId::default();
let scanner = Scanner::new(
"\
begin
push.1
emit.event(\"abc\")
end
",
);
let mut lexer = Lexer::new(source_id, scanner).map(|result| result.map(|(_, t, _)| t));
assert_matches!(lexer.next(), Some(Ok(Token::Begin)));
assert_matches!(lexer.next(), Some(Ok(Token::Push)));
assert_matches!(lexer.next(), Some(Ok(Token::Dot)));
assert_matches!(lexer.next(), Some(Ok(Token::Int(1))));
assert_matches!(lexer.next(), Some(Ok(Token::Emit)));
assert_matches!(lexer.next(), Some(Ok(Token::Dot)));
assert_matches!(lexer.next(), Some(Ok(Token::Event)));
assert_matches!(lexer.next(), Some(Ok(Token::Lparen)));
assert_matches!(lexer.next(), Some(Ok(Token::QuotedIdent("abc"))));
assert_matches!(lexer.next(), Some(Ok(Token::Rparen)));
assert_matches!(lexer.next(), Some(Ok(Token::End)));
assert_matches!(lexer.next(), Some(Ok(Token::Eof)));
}
#[test]
fn lex_invalid_token_after_whitespace_returns_error() {
let source_id = SourceId::default();
let scanner = Scanner::new("begin \u{0001}\nend\n");
let mut lexer = Lexer::new(source_id, scanner).map(|result| result.map(|(_, t, _)| t));
assert_matches!(lexer.next(), Some(Ok(Token::Begin)));
assert_matches!(
lexer.next(),
Some(Err(ParsingError::InvalidToken { span })) if span.into_range() == (6..7)
);
}
#[test]
fn lex_invalid_underscore_token_span() {
let source_id = SourceId::default();
let scanner = Scanner::new("begin _-\nend\n");
let mut lexer = Lexer::new(source_id, scanner).map(|result| result.map(|(_, t, _)| t));
assert_matches!(lexer.next(), Some(Ok(Token::Begin)));
assert_matches!(
lexer.next(),
Some(Err(ParsingError::InvalidToken { span })) if span.into_range() == (6..7)
);
}
#[test]
fn lex_single_char_token_and_ident_spans() {
let source_id = SourceId::default();
let scanner = Scanner::new("@\nA\n");
let mut lexer = Lexer::new(source_id, scanner);
assert_matches!(lexer.next(), Some(Ok((0, Token::At, 1))));
assert_matches!(lexer.next(), Some(Ok((2, Token::ConstantIdent("A"), 3))));
}
#[test]
fn overlong_path_component_is_rejected_without_panic() {
use std::{
panic::{AssertUnwindSafe, catch_unwind},
sync::Arc,
};
use crate::{
debuginfo::DefaultSourceManager,
parse::{Parse, ParseOptions},
};
let big_component = "a".repeat(256);
let source = format!("begin\n exec.{big_component}::x::foo\nend\n");
let source_manager = Arc::new(DefaultSourceManager::default());
let parsed = catch_unwind(AssertUnwindSafe(|| {
source.parse_with_options(source_manager, ParseOptions::default())
}));
assert!(parsed.is_ok(), "parsing panicked, expected a structured error");
let err = parsed.unwrap().expect_err("parsing succeeded, expected an error");
crate::assert_diagnostic!(err, "this reference is invalid without a corresponding import");
}
}