Skip to main content

sysml_v2_parser/parser/
parse.rs

1//! Public parse entry points.
2
3use super::collect_errors::{
4    collect_implicit_attribute_in_part_def_warnings, collect_recovery_errors,
5    collect_requirement_id_dialect_diagnostics,
6};
7use super::diagnostics::{
8    dedup_errors, extra_closing_brace_at_eof, fragment_to_found_snippet, has_unclosed_brace,
9    is_illegal_top_level_definition, missing_closing_brace_error,
10    missing_closing_brace_error_at_eof, nom_err_to_parse_error, root_body_recovery_error,
11    root_body_scope, suppress_diagnostic_cascades, suppress_redundant_closing_brace_errors,
12    trim_ascii_start, unexpected_closing_brace_parse_error,
13};
14use super::lex;
15use super::package;
16use crate::ast::RootNamespace;
17use crate::error::{DiagnosticCategory, DiagnosticSeverity, ParseError};
18use nom_locate::LocatedSpan;
19/// Result of parsing with error recovery: a (possibly partial) AST and zero or more diagnostics.
20#[derive(Debug, Clone)]
21pub struct ParseResult {
22    /// Root namespace; contains all successfully parsed top-level elements (partial when errors occurred).
23    pub root: RootNamespace,
24    /// All parse errors encountered (multiple when recovery is used).
25    pub errors: Vec<ParseError>,
26}
27
28impl ParseResult {
29    /// True if the document parsed fully with no errors.
30    pub fn is_ok(&self) -> bool {
31        self.errors.is_empty()
32    }
33}
34/// Parse full input; must consume entire input. Strips UTF-8 BOM if present.
35#[allow(clippy::result_large_err)]
36pub fn parse_root(input: &str) -> Result<RootNamespace, ParseError> {
37    let bytes = input
38        .strip_prefix('\u{FEFF}')
39        .map(str::as_bytes)
40        .unwrap_or_else(|| input.as_bytes());
41    let located = LocatedSpan::new(bytes);
42    match package::root_namespace(located) {
43        Ok((rest, root)) => {
44            if !rest.fragment().is_empty() && has_unclosed_brace(bytes) {
45                return Err(missing_closing_brace_error_at_eof(bytes));
46            }
47            if rest.fragment().is_empty() {
48                log::debug!("parse_root: success, {} top-level elements", root.elements.len());
49                Ok(root)
50            } else if trim_ascii_start(rest.fragment()).starts_with(b"}") {
51                Err(unexpected_closing_brace_parse_error(rest))
52            } else {
53                let offset = located.location_offset() + located.fragment().len() - rest.fragment().len();
54                let unconsumed = rest.fragment();
55                let first_80 = unconsumed.get(..80.min(unconsumed.len())).unwrap_or(unconsumed);
56                log::debug!(
57                    "parse_root: expected end of input; parsed {} elements; unconsumed len={}, offset={}, first 80 bytes: {:?}",
58                    root.elements.len(),
59                    unconsumed.len(),
60                    offset,
61                    first_80,
62                );
63                log::debug!(
64                    "parse_root: unconsumed as str: {:?}",
65                    String::from_utf8_lossy(first_80),
66                );
67                let (found_snippet, found_len) = fragment_to_found_snippet(rest.fragment());
68                let mut pe = ParseError::new("expected end of input")
69                    .with_location(offset, rest.location_line(), rest.get_column())
70                    .with_length(found_len.max(1))
71                    .with_code("expected_end_of_input")
72                    .with_category(DiagnosticCategory::ParseError);
73                if !found_snippet.is_empty() {
74                    pe = pe.with_found(found_snippet);
75                }
76                if root.elements.is_empty() && is_illegal_top_level_definition(rest.fragment()) {
77                    pe = pe
78                        .with_code("illegal_top_level_definition")
79                        .with_expected("'package', 'namespace', or 'import'")
80                        .with_suggestion(
81                            "Wrap this declaration in `package ... { ... }` or `namespace ... { ... }`.",
82                        );
83                    pe.message = "illegal top-level definition".to_string();
84                }
85                Err(pe)
86            }
87        }
88        Err(nom::Err::Error(e)) => Err(missing_closing_brace_error(bytes, e.input).unwrap_or_else(|| {
89            nom_err_to_parse_error(
90                &e,
91                None,
92                Some("'package', 'namespace', or 'import' at top level; or valid element in package body"),
93            )
94        })),
95        Err(nom::Err::Failure(e)) => Err(missing_closing_brace_error(bytes, e.input).unwrap_or_else(|| {
96            nom_err_to_parse_error(
97                &e,
98                None,
99                Some("'package', 'namespace', or 'import' at top level; or valid element in package body"),
100            )
101        })),
102        Err(nom::Err::Incomplete(_)) => Err(
103            ParseError::new("unexpected end of input")
104                .with_code("unexpected_eof")
105                .with_category(DiagnosticCategory::ParseError),
106        ),
107    }
108}
109
110const MAX_RECOVERY_ERRORS: usize = 100;
111
112/// Parse input with error recovery: collects multiple diagnostics and returns a partial AST when errors occur.
113/// Use this for language servers so the user sees all parse errors and features (e.g. hover) can use the partial AST.
114pub fn parse_with_diagnostics(input: &str) -> ParseResult {
115    let bytes = input
116        .strip_prefix('\u{FEFF}')
117        .map(str::as_bytes)
118        .unwrap_or_else(|| input.as_bytes());
119    let located = LocatedSpan::new(bytes);
120
121    let mut elements = Vec::new();
122    let mut errors = Vec::new();
123
124    let (mut input, _) = match lex::ws_and_comments(located) {
125        Ok(x) => x,
126        Err(_) => {
127            return ParseResult {
128                root: RootNamespace { elements: vec![] },
129                errors: vec![ParseError::new("invalid input")
130                    .with_code("invalid_input")
131                    .with_category(DiagnosticCategory::ParseError)],
132            };
133        }
134    };
135
136    while errors.len() < MAX_RECOVERY_ERRORS {
137        // Skip leading ws/comments; if nothing left, we're done (avoids parsing "" as root_element).
138        let (rest, _) = lex::ws_and_comments(input).unwrap_or((input, ()));
139        input = rest;
140        if input.fragment().is_empty() {
141            break;
142        }
143        match package::root_element(input) {
144            Ok((rest, elem)) => {
145                elements.push(elem);
146                input = rest;
147            }
148            Err(nom::Err::Error(e)) | Err(nom::Err::Failure(e)) => {
149                let (trimmed, _) = lex::ws_and_comments(input).unwrap_or((input, ()));
150                if trim_ascii_start(trimmed.fragment()).starts_with(b"}") {
151                    errors.push(unexpected_closing_brace_parse_error(trimmed));
152                    let skip_result = lex::skip_to_next_sync_point(trimmed);
153                    match skip_result {
154                        Ok((rest, _)) => input = rest,
155                        Err(_) => break,
156                    }
157                    continue;
158                }
159                if errors.is_empty()
160                    && has_unclosed_brace(bytes)
161                    && (lex::starts_with_keyword(trimmed.fragment(), b"package")
162                        || lex::starts_with_keyword(trimmed.fragment(), b"namespace")
163                        || lex::starts_with_keyword(trimmed.fragment(), b"library")
164                        || lex::starts_with_keyword(trimmed.fragment(), b"standard"))
165                {
166                    errors.push(missing_closing_brace_error_at_eof(bytes));
167                    break;
168                }
169                if let Some(scope) = root_body_scope(input.fragment()) {
170                    let (error_input, _) = lex::ws_and_comments(e.input).unwrap_or((e.input, ()));
171                    if error_input.fragment().starts_with(b"{") {
172                        errors.push(root_body_recovery_error(error_input, scope));
173                        match lex::skip_statement_or_block(error_input) {
174                            Ok((rest, _))
175                                if rest.location_offset() > error_input.location_offset() =>
176                            {
177                                input = rest;
178                                continue;
179                            }
180                            _ => {}
181                        }
182                    }
183                }
184                let pe = missing_closing_brace_error(bytes, e.input).unwrap_or_else(|| {
185                    nom_err_to_parse_error(&e, None, Some("'package', 'namespace', or 'import'"))
186                });
187                errors.push(pe);
188                let skip_result = lex::skip_to_next_sync_point(e.input);
189                match skip_result {
190                    Ok((rest, _)) => input = rest,
191                    Err(_) => break,
192                }
193            }
194            Err(nom::Err::Incomplete(_)) => {
195                errors.push(
196                    ParseError::new("unexpected end of input")
197                        .with_location(
198                            input.location_offset(),
199                            input.location_line(),
200                            input.get_column(),
201                        )
202                        .with_length(1)
203                        .with_code("unexpected_eof")
204                        .with_category(DiagnosticCategory::ParseError),
205                );
206                break;
207            }
208        }
209    }
210
211    let (input, _) = lex::ws_and_comments(input).unwrap_or((input, ()));
212
213    if input.fragment().is_empty()
214        && !errors.iter().any(|e| {
215            matches!(
216                e.code.as_deref(),
217                Some("missing_closing_brace") | Some("unexpected_closing_brace")
218            )
219        })
220    {
221        if let Some(err) = extra_closing_brace_at_eof(bytes) {
222            errors.push(err);
223        } else if has_unclosed_brace(bytes) {
224            errors.push(missing_closing_brace_error_at_eof(bytes));
225        }
226    }
227
228    if !input.fragment().is_empty()
229        && !errors
230            .iter()
231            .any(|e| e.code.as_deref() == Some("missing_closing_brace"))
232    {
233        if trim_ascii_start(input.fragment()).starts_with(b"}") {
234            errors.push(unexpected_closing_brace_parse_error(input));
235        } else {
236            let (found_snippet, found_len) = fragment_to_found_snippet(input.fragment());
237            let mut pe = ParseError::new("expected end of input")
238                .with_location(
239                    input.location_offset(),
240                    input.location_line(),
241                    input.get_column(),
242                )
243                .with_length(found_len.max(1))
244                .with_code("expected_end_of_input")
245                .with_severity(DiagnosticSeverity::Error)
246                .with_category(DiagnosticCategory::ParseError);
247            if !found_snippet.is_empty() {
248                pe = pe.with_found(found_snippet);
249            }
250            errors.push(pe);
251        }
252    }
253
254    errors.extend(collect_recovery_errors(&RootNamespace {
255        elements: elements.clone(),
256    }));
257    errors.extend(collect_implicit_attribute_in_part_def_warnings(bytes));
258    errors.extend(collect_requirement_id_dialect_diagnostics(bytes));
259    errors = suppress_redundant_closing_brace_errors(errors);
260    errors = dedup_errors(errors);
261    errors = suppress_diagnostic_cascades(errors);
262
263    ParseResult {
264        root: RootNamespace { elements },
265        errors,
266    }
267}