Skip to main content

miden_assembly_syntax/parser/cst/
mod.rs

1mod blocks;
2mod context;
3mod forms;
4mod fragments;
5mod instructions;
6
7use alloc::{collections::BTreeSet, string::String, sync::Arc, vec::Vec};
8
9use miden_debug_types::{SourceFile, SourceSpan};
10use miden_utils_diagnostics::LabeledSpan;
11
12use self::{context::LoweringContext, forms::lower_source_file};
13use crate::{
14    Report, ast,
15    diagnostics::{Diagnostic, Severity, miette, miette::MietteDiagnostic},
16};
17
18/// User-facing syntax diagnostics produced by the CST-backed parser entry point.
19///
20/// The CST parser itself can accumulate multiple recovery diagnostics; this wrapper converts those
21/// diagnostics into the severity/label structure used by the existing `miden-assembly-syntax`
22/// parser surface.
23#[derive(Debug, thiserror::Error, Diagnostic)]
24pub enum SyntaxError {
25    #[error("{message}")]
26    #[diagnostic(severity(Error))]
27    Error {
28        message: String,
29        #[label(collection)]
30        labels: Vec<LabeledSpan>,
31        #[help]
32        help: Option<String>,
33    },
34    #[error("{message}")]
35    #[diagnostic(severity(Warning))]
36    Warning {
37        message: String,
38        #[label(collection)]
39        labels: Vec<LabeledSpan>,
40        #[help]
41        help: Option<String>,
42    },
43    #[error("{message}")]
44    #[diagnostic(severity(Advice))]
45    Advice {
46        message: String,
47        #[label(collection)]
48        labels: Vec<LabeledSpan>,
49        #[help]
50        help: Option<String>,
51    },
52    #[error("invalid syntax")]
53    #[diagnostic(help("Multiple syntax errors were identified, see diagnostics for more details"))]
54    Multiple {
55        #[related]
56        diagnostics: Vec<SyntaxError>,
57    },
58}
59
60/// Parses zero or more AST forms from `source` using the CST-backed frontend.
61///
62/// This function is the public entry point for the CST backend. It first runs the lossless CST
63/// parser, converts any CST diagnostics into the existing parser-facing report surface, and only
64/// then lowers the recovered CST into the historic `Vec<Form>` boundary used by semantic analysis.
65pub fn parse_forms(
66    source: Arc<SourceFile>,
67    interned: &mut BTreeSet<Arc<str>>,
68) -> Result<Vec<ast::Form>, Report> {
69    let mut parse = miden_assembly_syntax_cst::parse_source_file(source.clone());
70    let diagnostics = parse.take_diagnostics();
71    if diagnostics.is_empty() {
72        let mut context = LoweringContext::new(parse, interned);
73        lower_source_file(&mut context).map_err(move |err| err.with_source_code(source))
74    } else {
75        Err(Report::from(SyntaxError::from(diagnostics)).with_source_code(source))
76    }
77}
78
79/// This is like `parse_forms`, but for parsing the content of inline MASM blocks in languages like
80/// Rust.
81///
82/// Inline MASM is parsed as an [ast::Block], as if it was the body of a procedure definition. This
83/// means that top-level items such as imports and constant declarations are not allowed.
84///
85/// An optional span can be provided, in which case only the contents of the span are parsed as the
86/// inline MASM.
87pub fn parse_inline_masm(
88    source: Arc<SourceFile>,
89    bounds: Option<SourceSpan>,
90    interned: &mut BTreeSet<Arc<str>>,
91) -> Result<ast::Block, Report> {
92    use miden_assembly_syntax_cst::ast::AstNode;
93    let mut parse = miden_assembly_syntax_cst::parse_inline_masm(source.clone(), bounds);
94    let diagnostics = parse.take_diagnostics();
95    if diagnostics.is_empty() {
96        let mut context = LoweringContext::new(parse, interned);
97        let cst_block = miden_assembly_syntax_cst::ast::Block::cast(context.parse().syntax())
98            .expect("inline masm root kind should always be Block");
99        blocks::lower_block(&mut context, &cst_block)
100            .map_err(move |err| Report::from(err).with_source_code(source))
101    } else {
102        Err(Report::from(SyntaxError::from(diagnostics)).with_source_code(source))
103    }
104}
105
106/// Converts recovered CST diagnostics into the user-facing syntax error surface.
107impl From<Vec<MietteDiagnostic>> for SyntaxError {
108    fn from(mut diagnostics: Vec<MietteDiagnostic>) -> Self {
109        if diagnostics.len() == 1 {
110            Self::from(diagnostics.pop().unwrap())
111        } else {
112            Self::Multiple {
113                diagnostics: diagnostics.into_iter().map(Self::from).collect(),
114            }
115        }
116    }
117}
118
119/// Converts a single CST diagnostic into the parser's severity-preserving syntax error wrapper.
120impl From<MietteDiagnostic> for SyntaxError {
121    fn from(value: MietteDiagnostic) -> Self {
122        let MietteDiagnostic {
123            message,
124            code: _,
125            severity,
126            help,
127            url: _,
128            labels,
129        } = value;
130
131        let severity = severity.unwrap_or(Severity::Error);
132        match severity {
133            Severity::Error => Self::Error {
134                message,
135                labels: labels.unwrap_or_default(),
136                help,
137            },
138            Severity::Warning => Self::Warning {
139                message,
140                labels: labels.unwrap_or_default(),
141                help,
142            },
143            Severity::Advice => Self::Advice {
144                message,
145                labels: labels.unwrap_or_default(),
146                help,
147            },
148        }
149    }
150}