Skip to main content

miden_assembly_syntax/parser/
mod.rs

1mod cst;
2mod error;
3#[cfg(test)]
4mod tests;
5mod value;
6
7use alloc::{boxed::Box, collections::BTreeSet, string::ToString, sync::Arc, vec::Vec};
8
9use miden_debug_types::{SourceFile, SourceLanguage, SourceManager, Uri};
10use miden_utils_diagnostics::{IntoDiagnostic, Report};
11
12pub use self::{
13    cst::parse_inline_masm,
14    error::{BinErrorKind, HexErrorKind, LiteralErrorKind, ParsingError},
15    value::{IntValue, PushValue, WordValue},
16};
17use crate::{Path, ast, sema};
18
19// MODULE PARSER
20// ================================================================================================
21
22/// This is a wrapper around the lower-level parser infrastructure which handles orchestrating all
23/// of the pieces needed to parse a [ast::Module] from source, and run semantic analysis on it.
24#[derive(Default)]
25pub struct ModuleParser {
26    /// The kind of module we're parsing, if known in advance.
27    ///
28    /// This is used when performing semantic analysis to detect when various invalid constructions
29    /// are encountered, such as use of the `syscall` instruction in a kernel module.
30    kind: Option<ast::ModuleKind>,
31    /// A set of interned strings allocated during parsing/semantic analysis.
32    ///
33    /// This is a very primitive and imprecise way of interning strings, but was the least invasive
34    /// at the time the new parser was implemented. In essence, we avoid duplicating allocations
35    /// for frequently occurring strings, by tracking which strings we've seen before, and
36    /// sharing a reference counted pointer instead.
37    ///
38    /// We may want to replace this eventually with a proper interner, so that we can also gain the
39    /// benefits commonly provided by interned string handles (e.g. cheap equality comparisons, no
40    /// ref- counting overhead, copyable and of smaller size).
41    ///
42    /// Note that [Ident], [ProcedureName], [LibraryPath] and others are all implemented in terms
43    /// of either the actual reference-counted string, e.g. `Arc<str>`, or in terms of [Ident],
44    /// which is essentially the former wrapped in a [SourceSpan]. If we ever replace this with
45    /// a better interner, we will also want to update those types to be in terms of whatever
46    /// the handle type of the interner is.
47    interned: BTreeSet<Arc<str>>,
48    /// When true, all warning diagnostics are promoted to error severity
49    warnings_as_errors: bool,
50}
51
52impl ModuleParser {
53    /// Construct a new parser for the given `kind` of [ast::Module].
54    pub fn new(kind: Option<ast::ModuleKind>) -> Self {
55        Self {
56            kind,
57            interned: Default::default(),
58            warnings_as_errors: false,
59        }
60    }
61
62    /// Configure this parser so that any warning diagnostics are promoted to errors.
63    pub fn set_warnings_as_errors(&mut self, yes: bool) {
64        self.warnings_as_errors = yes;
65    }
66
67    /// Parse a [ast::Module] from `source`, and give it the provided `path`.
68    ///
69    /// If `path` is unset, then it must be derivable in one of two ways:
70    ///
71    /// 1. From a `namespace` declaration in the module source
72    /// 2. Inferred as `$exec` from the presence of a `begin .. end` block in the module source
73    ///
74    /// If neither is present, then an error will be raised. It can be fixed by simply providing
75    /// `path` explicitly.
76    pub fn parse(
77        &mut self,
78        path: Option<&Path>,
79        source: Arc<SourceFile>,
80        source_manager: Arc<dyn SourceManager>,
81    ) -> Result<Box<ast::Module>, Report> {
82        use alloc::borrow::Cow;
83
84        let path = match path {
85            Some(path) => Some(Arc::<Path>::from(
86                path.canonicalize()
87                    .and_then(|p| p.to_absolute().map(Cow::into_owned))
88                    .into_diagnostic()?,
89            )),
90            None => None,
91        };
92        let forms = parse_forms_internal(source.clone(), &mut self.interned)?;
93        sema::analyze(
94            source,
95            self.kind,
96            path.as_deref(),
97            forms,
98            self.warnings_as_errors,
99            source_manager,
100        )
101        .map_err(Report::new)
102    }
103
104    /// Parse a [ast::Module], `name`, from `path`.
105    #[cfg(feature = "std")]
106    pub fn parse_file<P>(
107        &mut self,
108        path: Option<&Path>,
109        file_path: P,
110        source_manager: Arc<dyn SourceManager>,
111    ) -> Result<Box<ast::Module>, Report>
112    where
113        P: AsRef<std::path::Path>,
114    {
115        use miden_debug_types::SourceManagerExt;
116        use miden_utils_diagnostics::{IntoDiagnostic, WrapErr};
117
118        let file_path = file_path.as_ref();
119        let source_file =
120            source_manager.load_file(file_path).into_diagnostic().wrap_err_with(|| {
121                format!("failed to load source file from '{}'", file_path.display())
122            })?;
123        self.parse(path, source_file, source_manager)
124    }
125
126    /// Parse a [ast::Module], `name`, from `source`.
127    pub fn parse_str(
128        &mut self,
129        path: Option<&Path>,
130        source: impl ToString,
131        source_manager: Arc<dyn SourceManager>,
132    ) -> Result<Box<ast::Module>, Report> {
133        use miden_debug_types::SourceContent;
134
135        let source = source.to_string();
136        let source_file = match path {
137            Some(path) => {
138                let uri = Uri::from(path.as_str().to_string().into_boxed_str());
139                let content =
140                    SourceContent::new(SourceLanguage::Masm, uri.clone(), source.into_boxed_str());
141                source_manager.load_from_raw_parts(uri, content)
142            },
143            None => source_manager.load_anonymous(SourceLanguage::Masm, source),
144        };
145        self.parse(path, source_file, source_manager)
146    }
147}
148
149/// This is used in tests to parse `source` as a set of raw [ast::Form]s rather than as a
150/// [ast::Module].
151///
152/// NOTE: This does _not_ run semantic analysis.
153#[cfg(any(test, feature = "testing"))]
154pub fn parse_forms(source: Arc<SourceFile>) -> Result<Vec<ast::Form>, Report> {
155    let mut interned = BTreeSet::default();
156    parse_forms_internal(source, &mut interned)
157}
158
159/// Parse `source` as a set of [ast::Form]s
160///
161/// Aside from catching syntax errors, this does little validation of the resulting forms, that is
162/// handled by semantic analysis, which the caller is expected to perform next.
163fn parse_forms_internal(
164    source: Arc<SourceFile>,
165    interned: &mut BTreeSet<Arc<str>>,
166) -> Result<Vec<ast::Form>, Report> {
167    cst::parse_forms(source, interned)
168}
169
170// DIRECTORY PARSER
171// ================================================================================================
172
173/// Read the contents (modules) of this library from `dir`, returning any errors that occur
174/// while traversing the file system.
175///
176/// Errors may also be returned if traversal discovers issues with the modules, such as
177/// invalid names, etc.
178///
179/// Returns an iterator over all parsed modules.
180#[cfg(feature = "std")]
181pub fn read_modules_from_root(
182    root: impl AsRef<std::path::Path>,
183    namespace: Option<Arc<Path>>,
184    kind: Option<ast::ModuleKind>,
185    source_manager: Arc<dyn SourceManager>,
186    warnings_as_errors: bool,
187) -> Result<(Box<ast::Module>, Vec<Box<ast::Module>>), Report> {
188    use miden_utils_diagnostics::report;
189
190    let root = root.as_ref();
191    let root = Arc::<std::path::Path>::from(
192        root.canonicalize()
193            .map_err(|err| {
194                Report::msg(format!("invalid root module path '{}': {err}", root.display()))
195            })?
196            .into_boxed_path(),
197    );
198
199    // Make sure the path has the right file extension
200    if root
201        .extension()
202        .is_none_or(|ext| !ext.eq_ignore_ascii_case(ast::Module::FILE_EXTENSION))
203    {
204        return Err(Report::msg(format!(
205            "invalid root module path '{}': expected a .masm file",
206            root.display()
207        )));
208    }
209
210    // Make sure it is a file
211    if !root.is_file() {
212        return Err(Report::msg(format!(
213            "invalid root module path '{}': not a file",
214            root.display()
215        )));
216    }
217
218    // Capture the parent directory for resolving submodules
219    let root_dir = root
220        .parent()
221        .ok_or_else(|| {
222            Report::msg(format!(
223                "invalid root module path '{}': expected path to have a parent directory",
224                root.display()
225            ))
226        })?
227        .to_path_buf();
228
229    let mut seen = BTreeSet::<Arc<Path>>::new();
230    let mut modules = Vec::new();
231
232    let mut parser = ModuleParser::new(kind);
233    parser.set_warnings_as_errors(warnings_as_errors);
234    let root_ast = parser.parse_file(namespace.as_deref(), &root, source_manager.clone())?;
235
236    let namespace = Arc::<Path>::from(root_ast.path().to_path_buf().into_boxed_path());
237    let submodules = root_ast.submodules().to_vec();
238    seen.insert(namespace.clone());
239    walk_module_tree(
240        namespace,
241        root,
242        root_dir,
243        submodules,
244        source_manager,
245        warnings_as_errors,
246        |module| {
247            if !seen.insert(module.path().into()) {
248                Err(report!("duplicate module '{0}'", module.path()))
249            } else {
250                modules.push(module);
251                Ok(())
252            }
253        },
254    )?;
255
256    Ok((root_ast, modules))
257}
258
259#[cfg(feature = "std")]
260pub fn walk_module_tree<F>(
261    namespace: Arc<Path>,
262    root: Arc<std::path::Path>,
263    current_dir: std::path::PathBuf,
264    submodules: Vec<ast::SubmoduleDecl>,
265    source_manager: Arc<dyn SourceManager>,
266    warnings_as_errors: bool,
267    mut callback: F,
268) -> Result<(), Report>
269where
270    F: FnMut(Box<ast::Module>) -> Result<(), Report>,
271{
272    use miden_debug_types::{Spanned, Uri};
273
274    struct ModuleEntry {
275        pub name: ast::Ident,
276        pub namespace: Arc<Path>,
277        pub directory: Arc<std::path::Path>,
278        pub parent: Arc<std::path::Path>,
279    }
280
281    let current_dir = Arc::<std::path::Path>::from(current_dir.into_boxed_path());
282    let mut visited = BTreeSet::<Arc<std::path::Path>>::from_iter([root.clone()]);
283    let mut worklist = submodules
284        .iter()
285        .map(|sm| ModuleEntry {
286            name: sm.name.clone(),
287            namespace: namespace.clone(),
288            directory: current_dir.clone(),
289            parent: root.clone(),
290        })
291        .collect::<Vec<_>>();
292
293    while let Some(entry) = worklist.pop() {
294        let basename = entry.name.replace('-', "_");
295        let mod_dir = entry.directory.join(&basename);
296        let mod_file = mod_dir.with_extension("masm");
297        let mod_dir_mod_masm = mod_dir.join("mod.masm");
298
299        // If the parent module is at `mod_file`, then the parent module and submodule have the
300        // same name. We explicitly do not allow this, because what we should do is unclear. We
301        // could attempt to add an extra level of nesting, e.g.
302        // `<mod_dir>/<basename>/<basename>.masm` or `<mod_dir>/<basename>/<basename>/mod.masm`,
303        // but that may not be intended.
304        if mod_file.as_path() == &*entry.parent {
305            let span = entry.name.span();
306            let source_file = source_manager.get(span.source_id()).ok();
307            return Err(ParsingError::SelfReferentialSubmodule {
308                name: entry.name.clone(),
309                parent_module_uri: Uri::from(entry.parent),
310                span,
311                source_file,
312            }
313            .into());
314        }
315
316        let actual_path = if mod_file.is_file() {
317            if mod_dir_mod_masm.is_file() {
318                let span = entry.name.span();
319                let source_file = source_manager.get(span.source_id()).ok();
320                return Err(ParsingError::AmbiguousSubmoduleLocation {
321                    name: entry.name,
322                    first: Uri::from(mod_file),
323                    second: Uri::from(mod_dir_mod_masm),
324                    span,
325                    source_file,
326                }
327                .into());
328            }
329            mod_file
330        } else if mod_dir_mod_masm.is_file() {
331            mod_dir_mod_masm
332        } else {
333            let span = entry.name.span();
334            let source_file = source_manager.get(span.source_id()).ok();
335            return Err(ParsingError::UndefinedSubmodule {
336                name: entry.name,
337                basename: basename.into_boxed_str(),
338                directory: Uri::from(mod_dir),
339                span,
340                source_file,
341            }
342            .into());
343        };
344
345        let actual_path = Arc::<std::path::Path>::from(actual_path);
346        if !visited.insert(actual_path.clone()) {
347            let span = entry.name.span();
348            let source_file = source_manager.get(span.source_id()).ok();
349            return Err(ParsingError::DuplicateSubmoduleSource {
350                name: entry.name,
351                module_uri: Uri::from(actual_path.as_ref()),
352                span,
353                source_file,
354            }
355            .into());
356        }
357
358        let mut parser = ModuleParser::new(Some(ast::ModuleKind::Library));
359        parser.set_warnings_as_errors(warnings_as_errors);
360        let module_path = Arc::<Path>::from(entry.namespace.join(&entry.name).into_boxed_path());
361        let ast = parser.parse_file(Some(&module_path), &actual_path, source_manager.clone())?;
362
363        let directory = Arc::<std::path::Path>::from(mod_dir);
364        worklist.extend(ast.submodules().iter().map(|sm| ModuleEntry {
365            name: sm.name.clone(),
366            namespace: module_path.clone(),
367            directory: directory.clone(),
368            parent: actual_path.clone(),
369        }));
370
371        callback(ast)?;
372    }
373
374    Ok(())
375}