Skip to main content

sysml_v2_parser/parser/
mod.rs

1//! Nom-based parser for SysML v2 textual notation.
2//!
3//! Organized into modules:
4//! - [lex]: whitespace, comments, names, qualified names, skip helpers
5//! - [attribute]: attribute definition and usage
6//! - [import]: import and relationship body
7//! - [part]: part definition and part usage
8//! - [package]: package and root namespace
9
10mod action;
11mod alias;
12mod allocation;
13mod attribute;
14mod case;
15mod connection;
16mod constraint;
17mod dependency;
18mod enumeration;
19mod expr;
20mod flow;
21mod import;
22mod individual;
23mod interface;
24mod item;
25mod lex;
26mod metadata;
27mod metadata_annotation;
28mod occurrence;
29mod package;
30mod part;
31mod port;
32mod requirement;
33mod span;
34mod state;
35mod usecase;
36mod view;
37
38pub(crate) use span::{node_from_to, span_from_to, with_span, Input};
39
40use crate::ast::{
41    ActionDefBody, ActionDefBodyElement, ActionUsageBody, ActionUsageBodyElement, CalcDefBody,
42    CalcDefBodyElement, ConstraintDefBody, ConstraintDefBodyElement, PackageBody,
43    PackageBodyElement, ParseErrorNode, PartDefBody, PartDefBodyElement, PartUsageBody,
44    PartUsageBodyElement, RequirementDefBody, RequirementDefBodyElement, RootNamespace,
45    StateDefBody, StateDefBodyElement, UseCaseDefBody, UseCaseDefBodyElement, ViewBody,
46    ViewBodyElement, ViewDefBody, ViewDefBodyElement,
47};
48use crate::error::ParseError;
49use nom::error::Error;
50use nom::Parser;
51use nom_locate::LocatedSpan;
52
53/// Result of parsing with error recovery: a (possibly partial) AST and zero or more diagnostics.
54#[derive(Debug, Clone)]
55pub struct ParseResult {
56    /// Root namespace; contains all successfully parsed top-level elements (partial when errors occurred).
57    pub root: RootNamespace,
58    /// All parse errors encountered (multiple when recovery is used).
59    pub errors: Vec<ParseError>,
60}
61
62impl ParseResult {
63    /// True if the document parsed fully with no errors.
64    pub fn is_ok(&self) -> bool {
65        self.errors.is_empty()
66    }
67}
68
69const FOUND_SNIPPET_MAX_LEN: usize = 40;
70const ILLEGAL_TOP_LEVEL_STARTERS: &[&[u8]] = &[
71    b"action",
72    b"actor",
73    b"alias",
74    b"allocate",
75    b"allocation",
76    b"attribute",
77    b"bind",
78    b"calc",
79    b"case",
80    b"concern",
81    b"connection",
82    b"constraint",
83    b"dependency",
84    b"enum",
85    b"flow",
86    b"interface",
87    b"item",
88    b"metadata",
89    b"occurrence",
90    b"part",
91    b"perform",
92    b"port",
93    b"ref",
94    b"require",
95    b"requirement",
96    b"satisfy",
97    b"state",
98    b"use",
99    b"verification",
100    b"view",
101    b"viewpoint",
102];
103
104/// Take a short snippet from the input at the error position for "found" display.
105/// Uses first line or first FOUND_SNIPPET_MAX_LEN bytes, UTF-8 with replacement char.
106fn fragment_to_found_snippet(fragment: &[u8]) -> (String, usize) {
107    let take = fragment
108        .iter()
109        .position(|&b| b == b'\n' || b == b'\r')
110        .map(|p| p.min(FOUND_SNIPPET_MAX_LEN))
111        .unwrap_or_else(|| fragment.len().min(FOUND_SNIPPET_MAX_LEN));
112    let slice = fragment.get(..take).unwrap_or(fragment);
113    let s = String::from_utf8_lossy(slice)
114        .replace('\n', "\\n")
115        .replace('\r', "\\r");
116    let len = slice.len();
117    (s.trim_end().to_string(), len)
118}
119
120pub(crate) fn recovery_found_snippet(input: Input<'_>) -> Option<String> {
121    let frag = input.fragment();
122    let take = frag
123        .iter()
124        .position(|&b| b == b'\n' || b == b'\r')
125        .unwrap_or(frag.len())
126        .min(60);
127    let snippet = String::from_utf8_lossy(&frag[..take]).trim().to_string();
128    if snippet.is_empty() {
129        None
130    } else {
131        Some(snippet)
132    }
133}
134
135/// Map nom error kind to a human-readable message for language server diagnostics.
136fn nom_error_kind_to_message(code: &nom::error::ErrorKind) -> &'static str {
137    use nom::error::ErrorKind;
138    match code {
139        ErrorKind::Tag => "expected keyword or token",
140        ErrorKind::Digit => "expected number",
141        ErrorKind::Alpha => "expected identifier",
142        ErrorKind::AlphaNumeric => "expected identifier",
143        ErrorKind::Space => "expected whitespace",
144        ErrorKind::MultiSpace => "expected whitespace",
145        ErrorKind::Eof => "unexpected end of input",
146        ErrorKind::TakeUntil => "expected terminator",
147        ErrorKind::TakeWhile1 => "expected token",
148        ErrorKind::Alt => {
149            "expected package, import, part, port, interface, alias, attribute, or action"
150        }
151        ErrorKind::Many0 | ErrorKind::Many1 => "expected list of elements",
152        _ => "parse error",
153    }
154}
155
156/// Map nom error kind to a specific code for LSP/quick fixes.
157fn nom_error_kind_to_code(code: &nom::error::ErrorKind) -> &'static str {
158    use nom::error::ErrorKind;
159    match code {
160        ErrorKind::Tag => "expected_keyword",
161        ErrorKind::Digit => "expected_number",
162        ErrorKind::Alpha | ErrorKind::AlphaNumeric => "expected_identifier",
163        ErrorKind::Space | ErrorKind::MultiSpace => "expected_whitespace",
164        ErrorKind::Eof => "unexpected_eof",
165        ErrorKind::TakeUntil => "expected_terminator",
166        ErrorKind::TakeWhile1 => "expected_token",
167        ErrorKind::Alt => "expected_alt",
168        ErrorKind::Many0 | ErrorKind::Many1 => "expected_list",
169        _ => "parse_error",
170    }
171}
172
173fn nom_err_to_parse_error(
174    e: &Error<Input<'_>>,
175    length_override: Option<usize>,
176    expected_context: Option<&'static str>,
177) -> ParseError {
178    let offset = e.input.location_offset();
179    let line = e.input.location_line();
180    let column = e.input.get_column();
181    let fragment = e.input.fragment();
182    let (found_snippet, found_len) = fragment_to_found_snippet(fragment);
183    let message = nom_error_kind_to_message(&e.code).to_string();
184    let span_len = length_override.unwrap_or(found_len).max(1);
185    let mut pe = ParseError::new(message)
186        .with_location(offset, line, column)
187        .with_length(span_len)
188        .with_code(nom_error_kind_to_code(&e.code));
189    if !found_snippet.is_empty() {
190        pe = pe.with_found(found_snippet);
191    }
192    if let Some(ctx) = expected_context {
193        pe = pe.with_expected(ctx);
194    }
195    let at_root = expected_context.is_some_and(|ctx| {
196        ctx.contains("'package', 'namespace', or 'import'") || ctx.contains("top level")
197    });
198    if at_root && is_illegal_top_level_definition(fragment) {
199        pe.message = "illegal top-level definition".to_string();
200        pe.code = Some("illegal_top_level_definition".to_string());
201        pe.expected = Some("'package', 'namespace', or 'import'".to_string());
202        pe.suggestion = Some(
203            "Wrap this declaration in `package ... { ... }` or `namespace ... { ... }`."
204                .to_string(),
205        );
206    }
207    pe
208}
209
210fn is_illegal_top_level_definition(fragment: &[u8]) -> bool {
211    let trimmed = trim_ascii_start(fragment);
212    !trimmed.starts_with(b"}")
213        && !trimmed.starts_with(b"//")
214        && !trimmed.starts_with(b"/*")
215        && lex::starts_with_any_keyword(trimmed, ILLEGAL_TOP_LEVEL_STARTERS)
216}
217
218fn trim_ascii_start(mut fragment: &[u8]) -> &[u8] {
219    while let Some(first) = fragment.first() {
220        if first.is_ascii_whitespace() {
221            fragment = &fragment[1..];
222            continue;
223        }
224        break;
225    }
226    fragment
227}
228
229fn starts_with_missing_name_after_keyword(
230    fragment: &[u8],
231    keyword: &[u8],
232    trailing_keywords: &[&[u8]],
233) -> bool {
234    let mut fragment = trim_ascii_start(fragment);
235    if !lex::starts_with_keyword(fragment, keyword) {
236        return false;
237    }
238    fragment = &fragment[keyword.len()..];
239    while let Some(first) = fragment.first() {
240        if first.is_ascii_whitespace() {
241            fragment = &fragment[1..];
242            continue;
243        }
244        break;
245    }
246    for trailing in trailing_keywords {
247        if lex::starts_with_keyword(fragment, trailing) {
248            fragment = &fragment[trailing.len()..];
249            while let Some(first) = fragment.first() {
250                if first.is_ascii_whitespace() {
251                    fragment = &fragment[1..];
252                    continue;
253                }
254                break;
255            }
256        }
257    }
258    fragment.starts_with(b":")
259}
260
261fn starts_with_missing_type_after_keyword(
262    fragment: &[u8],
263    keyword: &[u8],
264    trailing_keywords: &[&[u8]],
265) -> bool {
266    let mut fragment = trim_ascii_start(fragment);
267    if !lex::starts_with_keyword(fragment, keyword) {
268        return false;
269    }
270    fragment = &fragment[keyword.len()..];
271    while let Some(first) = fragment.first() {
272        if first.is_ascii_whitespace() {
273            fragment = &fragment[1..];
274            continue;
275        }
276        break;
277    }
278    for trailing in trailing_keywords {
279        if lex::starts_with_keyword(fragment, trailing) {
280            fragment = &fragment[trailing.len()..];
281            while let Some(first) = fragment.first() {
282                if first.is_ascii_whitespace() {
283                    fragment = &fragment[1..];
284                    continue;
285                }
286                break;
287            }
288        }
289    }
290
291    let mut name_len = 0usize;
292    while name_len < fragment.len()
293        && (fragment[name_len].is_ascii_alphanumeric() || fragment[name_len] == b'_')
294    {
295        name_len += 1;
296    }
297    if name_len == 0 {
298        return false;
299    }
300    fragment = &fragment[name_len..];
301    while let Some(first) = fragment.first() {
302        if first.is_ascii_whitespace() {
303            fragment = &fragment[1..];
304            continue;
305        }
306        break;
307    }
308    if !fragment.starts_with(b":") {
309        return false;
310    }
311    fragment = &fragment[1..];
312    while let Some(first) = fragment.first() {
313        if first.is_ascii_whitespace() {
314            fragment = &fragment[1..];
315            continue;
316        }
317        break;
318    }
319
320    fragment.is_empty()
321        || fragment.starts_with(b";")
322        || fragment.starts_with(b"{")
323        || fragment.starts_with(b"}")
324        || lex::starts_with_keyword(fragment, b"then")
325        || lex::starts_with_keyword(fragment, b"if")
326        || lex::starts_with_keyword(fragment, b"do")
327}
328
329fn missing_name_diagnostic(fragment: &[u8]) -> Option<(&'static str, String, String, String)> {
330    #[allow(clippy::type_complexity)]
331    let cases: &[(&[u8], &[&[u8]], &str, &str)] = &[
332        (
333            b"subject",
334            &[],
335            "subject name",
336            "Use `subject laptop: Laptop;`.",
337        ),
338        (b"actor", &[], "actor name", "Use `actor user: User;`."),
339        (b"state", &[], "state name", "Use `state ready: Mode;`."),
340        (b"part", &[], "part name", "Use `part wheel: Wheel;`."),
341        (b"ref", &[], "reference name", "Use `ref sensor: Sensor;`."),
342        (b"port", &[], "port name", "Use `port power: PowerPort;`."),
343        (
344            b"attribute",
345            &[],
346            "attribute name",
347            "Use `attribute mass: MassValue;`.",
348        ),
349        (b"in", &[], "input name", "Use `in speed: Real;`."),
350        (b"out", &[], "output name", "Use `out result: Real;`."),
351        (
352            b"perform",
353            &[b"action"],
354            "action name",
355            "Use `perform action run: Runner;`.",
356        ),
357        (
358            b"return",
359            &[],
360            "return name",
361            "Use `return result: Real;`.",
362        ),
363    ];
364
365    for (keyword, trailing, missing_what, suggestion) in cases {
366        if starts_with_missing_name_after_keyword(fragment, keyword, trailing) {
367            return Some((
368                "missing_member_name",
369                format!("expected {missing_what} before ':'"),
370                format!("{missing_what} before ':'"),
371                suggestion.to_string(),
372            ));
373        }
374    }
375    None
376}
377
378fn missing_type_diagnostic(fragment: &[u8]) -> Option<(&'static str, String, String, String)> {
379    #[allow(clippy::type_complexity)]
380    let cases: &[(&[u8], &[&[u8]], &str)] = &[
381        (b"subject", &[], "subject type"),
382        (b"actor", &[], "actor type"),
383        (b"state", &[], "state type"),
384        (b"part", &[], "part type"),
385        (b"ref", &[], "reference type"),
386        (b"port", &[], "port type"),
387        (b"attribute", &[], "attribute type"),
388        (b"in", &[], "input type"),
389        (b"out", &[], "output type"),
390        (b"perform", &[b"action"], "action type"),
391        (b"return", &[], "return type"),
392    ];
393
394    for &(keyword, trailing, missing_what) in cases {
395        if starts_with_missing_type_after_keyword(fragment, keyword, trailing) {
396            let keyword_label = String::from_utf8_lossy(keyword);
397            let sample_name = if keyword == &b"subject"[..] {
398                "laptop"
399            } else if keyword == &b"actor"[..] {
400                "user"
401            } else if keyword == &b"state"[..] {
402                "ready"
403            } else if keyword == &b"part"[..] {
404                "wheel"
405            } else if keyword == &b"ref"[..] {
406                "sensor"
407            } else if keyword == &b"port"[..] {
408                "power"
409            } else if keyword == &b"attribute"[..] {
410                "mass"
411            } else if keyword == &b"in"[..] {
412                "speed"
413            } else if keyword == &b"out"[..] {
414                "result"
415            } else if keyword == &b"perform"[..] {
416                "run"
417            } else if keyword == &b"return"[..] {
418                "result"
419            } else {
420                "member"
421            };
422            let sample_type = if keyword == &b"subject"[..] {
423                "Laptop"
424            } else if keyword == &b"actor"[..] {
425                "User"
426            } else if keyword == &b"state"[..] {
427                "Mode"
428            } else if keyword == &b"part"[..] {
429                "Wheel"
430            } else if keyword == &b"ref"[..] {
431                "Sensor"
432            } else if keyword == &b"port"[..] {
433                "PowerPort"
434            } else if keyword == &b"attribute"[..] {
435                "MassValue"
436            } else if keyword == &b"in"[..] || keyword == &b"out"[..] {
437                "Real"
438            } else if keyword == &b"perform"[..] {
439                "Runner"
440            } else if keyword == &b"return"[..] {
441                "Real"
442            } else {
443                "Type"
444            };
445            let suggestion = if keyword == &b"perform"[..] {
446                format!("Use `perform action {sample_name}: {sample_type};`.")
447            } else if keyword == &b"return"[..] {
448                format!("Use `return {sample_name}: {sample_type};`.")
449            } else {
450                format!("Use `{keyword_label} {sample_name}: {sample_type};`.")
451            };
452            return Some((
453                "missing_type_reference",
454                format!("expected {missing_what} after ':'"),
455                format!("{missing_what} after ':'"),
456                suggestion,
457            ));
458        }
459    }
460    None
461}
462
463fn missing_closing_brace_error(bytes: &[u8], input: Input<'_>) -> Option<ParseError> {
464    if !input.fragment().is_empty() {
465        return None;
466    }
467    let consumed = &bytes[..input.location_offset().min(bytes.len())];
468    let opens = consumed.iter().filter(|&&b| b == b'{').count();
469    let closes = consumed.iter().filter(|&&b| b == b'}').count();
470    if opens <= closes {
471        return None;
472    }
473    Some(missing_closing_brace_error_at_eof(consumed))
474}
475
476fn missing_closing_brace_error_at_eof(bytes: &[u8]) -> ParseError {
477    let (line, column) = eof_line_column(bytes);
478    ParseError::new("missing closing '}'")
479        .with_location(bytes.len(), line, column)
480        .with_length(1)
481        .with_code("missing_closing_brace")
482        .with_expected("'}'")
483        .with_suggestion("Add '}' to close the open body.")
484}
485
486fn has_unclosed_brace(bytes: &[u8]) -> bool {
487    let opens = bytes.iter().filter(|&&b| b == b'{').count();
488    let closes = bytes.iter().filter(|&&b| b == b'}').count();
489    opens > closes
490}
491
492fn eof_line_column(bytes: &[u8]) -> (u32, usize) {
493    let mut line = 1u32;
494    let mut column = 1usize;
495    for &b in bytes {
496        if b == b'\n' {
497            line += 1;
498            column = 1;
499        } else {
500            column += 1;
501        }
502    }
503    (line, column)
504}
505
506pub(crate) fn build_recovery_error_node(
507    input: Input<'_>,
508    starters: &[&[u8]],
509    scope_label: &str,
510    generic_code: &str,
511) -> ParseErrorNode {
512    let trimmed = trim_ascii_start(input.fragment());
513
514    if let Some((code, message, expected, suggestion)) = missing_name_diagnostic(trimmed) {
515        return ParseErrorNode {
516            message,
517            code: code.to_string(),
518            expected: Some(expected),
519            found: recovery_found_snippet(input),
520            suggestion: Some(suggestion),
521        };
522    }
523
524    if let Some((code, message, expected, suggestion)) = missing_type_diagnostic(trimmed) {
525        return ParseErrorNode {
526            message,
527            code: code.to_string(),
528            expected: Some(expected),
529            found: recovery_found_snippet(input),
530            suggestion: Some(suggestion),
531        };
532    }
533
534    if lex::looks_like_missing_semicolon(input, starters) {
535        return ParseErrorNode {
536            message: "missing semicolon before next declaration".to_string(),
537            code: "missing_semicolon".to_string(),
538            expected: Some("';'".to_string()),
539            found: recovery_found_snippet(input),
540            suggestion: Some("Insert ';' before this declaration.".to_string()),
541        };
542    }
543
544    if lex::starts_with_keyword(trimmed, b"#") || lex::starts_with_keyword(trimmed, b"@") {
545        return ParseErrorNode {
546            message: format!("unsupported annotation syntax in {scope_label}"),
547            code: generic_code.to_string(),
548            expected: Some(format!("valid {scope_label} element")),
549            found: recovery_found_snippet(input),
550            suggestion: Some(
551                "Remove this annotation or extend the parser to support annotated declarations."
552                    .to_string(),
553            ),
554        };
555    }
556
557    ParseErrorNode {
558        message: format!("unexpected token in {scope_label}"),
559        code: generic_code.to_string(),
560        expected: Some(format!("valid {scope_label} element")),
561        found: recovery_found_snippet(input),
562        suggestion: Some(format!("Fix this {scope_label} member and re-run parsing.")),
563    }
564}
565
566fn is_only_trailing_closing_braces(mut input: Input<'_>) -> bool {
567    loop {
568        let (next, _) = lex::ws_and_comments(input).unwrap_or((input, ()));
569        input = next;
570        if input.fragment().is_empty() {
571            return true;
572        }
573        if input.fragment().starts_with(b"}") {
574            match nom::bytes::complete::tag::<_, _, nom::error::Error<Input>>(&b"}"[..])
575                .parse(input)
576            {
577                Ok((next, _)) => {
578                    input = next;
579                    continue;
580                }
581                Err(_) => return false,
582            }
583        }
584        return false;
585    }
586}
587
588fn parse_error_from_recovery_node(span: &crate::ast::Span, node: &ParseErrorNode) -> ParseError {
589    let mut err = ParseError::new(node.message.clone())
590        .with_location(span.offset, span.line, span.column)
591        .with_length(span.len.max(1))
592        .with_code(node.code.clone());
593    if let Some(expected) = &node.expected {
594        err = err.with_expected(expected.clone());
595    }
596    if let Some(found) = &node.found {
597        err = err.with_found(found.clone());
598    }
599    if let Some(suggestion) = &node.suggestion {
600        err = err.with_suggestion(suggestion.clone());
601    }
602    err
603}
604
605fn collect_requirement_body_errors(body: &RequirementDefBody, errors: &mut Vec<ParseError>) {
606    if let RequirementDefBody::Brace { elements } = body {
607        for element in elements {
608            match &element.value {
609                RequirementDefBodyElement::Error(n) => {
610                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
611                }
612                RequirementDefBodyElement::Frame(n) => {
613                    collect_requirement_body_errors(&n.value.body, errors)
614                }
615                _ => {}
616            }
617        }
618    }
619}
620
621fn collect_action_def_body_errors(body: &ActionDefBody, errors: &mut Vec<ParseError>) {
622    if let ActionDefBody::Brace { elements } = body {
623        for element in elements {
624            if let ActionDefBodyElement::Error(n) = &element.value {
625                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
626            }
627        }
628    }
629}
630
631fn collect_action_usage_body_errors(body: &ActionUsageBody, errors: &mut Vec<ParseError>) {
632    if let ActionUsageBody::Brace { elements } = body {
633        for element in elements {
634            match &element.value {
635                ActionUsageBodyElement::Error(n) => {
636                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
637                }
638                ActionUsageBodyElement::ActionUsage(n) => {
639                    collect_action_usage_body_errors(&n.value.body, errors)
640                }
641                _ => {}
642            }
643        }
644    }
645}
646
647fn collect_state_body_errors(body: &StateDefBody, errors: &mut Vec<ParseError>) {
648    if let StateDefBody::Brace { elements } = body {
649        for element in elements {
650            match &element.value {
651                StateDefBodyElement::Error(n) => {
652                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
653                }
654                StateDefBodyElement::Entry(n) => collect_state_body_errors(&n.value.body, errors),
655                StateDefBodyElement::RequirementUsage(n) => {
656                    collect_requirement_body_errors(&n.value.body, errors)
657                }
658                StateDefBodyElement::StateUsage(n) => {
659                    collect_state_body_errors(&n.value.body, errors)
660                }
661                _ => {}
662            }
663        }
664    }
665}
666
667fn collect_use_case_body_errors(body: &UseCaseDefBody, errors: &mut Vec<ParseError>) {
668    if let UseCaseDefBody::Brace { elements } = body {
669        for element in elements {
670            if let UseCaseDefBodyElement::Error(n) = &element.value {
671                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
672            }
673        }
674    }
675}
676
677fn collect_constraint_body_errors(body: &ConstraintDefBody, errors: &mut Vec<ParseError>) {
678    if let ConstraintDefBody::Brace { elements } = body {
679        for element in elements {
680            if let ConstraintDefBodyElement::Error(n) = &element.value {
681                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
682            }
683        }
684    }
685}
686
687fn collect_calc_body_errors(body: &CalcDefBody, errors: &mut Vec<ParseError>) {
688    if let CalcDefBody::Brace { elements } = body {
689        for element in elements {
690            if let CalcDefBodyElement::Error(n) = &element.value {
691                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
692            }
693        }
694    }
695}
696
697fn collect_view_def_body_errors(body: &ViewDefBody, errors: &mut Vec<ParseError>) {
698    if let ViewDefBody::Brace { elements } = body {
699        for element in elements {
700            if let ViewDefBodyElement::Error(n) = &element.value {
701                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
702            }
703        }
704    }
705}
706
707fn collect_view_body_errors(body: &ViewBody, errors: &mut Vec<ParseError>) {
708    if let ViewBody::Brace { elements } = body {
709        for element in elements {
710            if let ViewBodyElement::Error(n) = &element.value {
711                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
712            }
713        }
714    }
715}
716
717fn collect_part_def_body_errors(body: &PartDefBody, errors: &mut Vec<ParseError>) {
718    if let PartDefBody::Brace { elements } = body {
719        for element in elements {
720            match &element.value {
721                PartDefBodyElement::Error(n) => {
722                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
723                }
724                PartDefBodyElement::PartUsage(n) => {
725                    collect_part_usage_body_errors(&n.value.body, errors)
726                }
727                PartDefBodyElement::Perform(n) => {
728                    collect_perform_body_errors(&n.value.body, errors)
729                }
730                _ => {}
731            }
732        }
733    }
734}
735
736fn collect_perform_body_errors(body: &crate::ast::PerformBody, _errors: &mut Vec<ParseError>) {
737    match body {
738        crate::ast::PerformBody::Semicolon => {}
739        crate::ast::PerformBody::Brace { .. } => {}
740    }
741}
742
743fn collect_part_usage_body_errors(body: &PartUsageBody, errors: &mut Vec<ParseError>) {
744    if let PartUsageBody::Brace { elements } = body {
745        for element in elements {
746            match &element.value {
747                PartUsageBodyElement::Error(n) => {
748                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
749                }
750                PartUsageBodyElement::PartUsage(n) => {
751                    collect_part_usage_body_errors(&n.value.body, errors)
752                }
753                PartUsageBodyElement::Perform(n) => {
754                    collect_perform_body_errors(&n.value.body, errors)
755                }
756                PartUsageBodyElement::StateUsage(n) => {
757                    collect_state_body_errors(&n.value.body, errors)
758                }
759                _ => {}
760            }
761        }
762    }
763}
764
765fn collect_package_body_errors(body: &PackageBody, errors: &mut Vec<ParseError>) {
766    if let PackageBody::Brace { elements } = body {
767        for element in elements {
768            match &element.value {
769                PackageBodyElement::Error(n) => {
770                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
771                }
772                PackageBodyElement::Package(n) => {
773                    collect_package_body_errors(&n.value.body, errors)
774                }
775                PackageBodyElement::LibraryPackage(n) => {
776                    collect_package_body_errors(&n.value.body, errors)
777                }
778                PackageBodyElement::PartDef(n) => {
779                    collect_part_def_body_errors(&n.value.body, errors)
780                }
781                PackageBodyElement::PartUsage(n) => {
782                    collect_part_usage_body_errors(&n.value.body, errors)
783                }
784                PackageBodyElement::ActionDef(n) => {
785                    collect_action_def_body_errors(&n.value.body, errors)
786                }
787                PackageBodyElement::ActionUsage(n) => {
788                    collect_action_usage_body_errors(&n.value.body, errors)
789                }
790                PackageBodyElement::RequirementDef(n) => {
791                    collect_requirement_body_errors(&n.value.body, errors)
792                }
793                PackageBodyElement::RequirementUsage(n) => {
794                    collect_requirement_body_errors(&n.value.body, errors)
795                }
796                PackageBodyElement::UseCaseDef(n) => {
797                    collect_use_case_body_errors(&n.value.body, errors)
798                }
799                PackageBodyElement::UseCaseUsage(n) => {
800                    collect_use_case_body_errors(&n.value.body, errors)
801                }
802                PackageBodyElement::ConcernUsage(n) => {
803                    collect_requirement_body_errors(&n.value.body, errors)
804                }
805                PackageBodyElement::StateDef(n) => collect_state_body_errors(&n.value.body, errors),
806                PackageBodyElement::StateUsage(n) => {
807                    collect_state_body_errors(&n.value.body, errors)
808                }
809                PackageBodyElement::ConstraintDef(n) => {
810                    collect_constraint_body_errors(&n.value.body, errors)
811                }
812                PackageBodyElement::CalcDef(n) => collect_calc_body_errors(&n.value.body, errors),
813                PackageBodyElement::ViewDef(n) => {
814                    collect_view_def_body_errors(&n.value.body, errors)
815                }
816                PackageBodyElement::ViewUsage(n) => collect_view_body_errors(&n.value.body, errors),
817                _ => {}
818            }
819        }
820    }
821}
822
823fn collect_recovery_errors(root: &RootNamespace) -> Vec<ParseError> {
824    let mut errors = Vec::new();
825    for element in &root.elements {
826        match &element.value {
827            crate::ast::RootElement::Package(n) => {
828                collect_package_body_errors(&n.value.body, &mut errors)
829            }
830            crate::ast::RootElement::LibraryPackage(n) => {
831                collect_package_body_errors(&n.value.body, &mut errors)
832            }
833            crate::ast::RootElement::Namespace(n) => {
834                collect_package_body_errors(&n.value.body, &mut errors)
835            }
836            crate::ast::RootElement::Import(_) => {}
837        }
838    }
839    errors
840}
841
842/// Parse full input; must consume entire input. Strips UTF-8 BOM if present.
843#[allow(clippy::result_large_err)]
844pub fn parse_root(input: &str) -> Result<RootNamespace, ParseError> {
845    let bytes = input
846        .strip_prefix('\u{FEFF}')
847        .map(str::as_bytes)
848        .unwrap_or_else(|| input.as_bytes());
849    let located = LocatedSpan::new(bytes);
850    match package::root_namespace(located) {
851        Ok((rest, root)) => {
852            if !rest.fragment().is_empty() && has_unclosed_brace(bytes) {
853                return Err(missing_closing_brace_error_at_eof(bytes));
854            }
855            if rest.fragment().is_empty() || is_only_trailing_closing_braces(rest) {
856                log::debug!("parse_root: success, {} top-level elements", root.elements.len());
857                Ok(root)
858            } else {
859                let offset = located.location_offset() + located.fragment().len() - rest.fragment().len();
860                let unconsumed = rest.fragment();
861                let first_80 = unconsumed.get(..80.min(unconsumed.len())).unwrap_or(unconsumed);
862                log::debug!(
863                    "parse_root: expected end of input; parsed {} elements; unconsumed len={}, offset={}, first 80 bytes: {:?}",
864                    root.elements.len(),
865                    unconsumed.len(),
866                    offset,
867                    first_80,
868                );
869                log::debug!(
870                    "parse_root: unconsumed as str: {:?}",
871                    String::from_utf8_lossy(first_80),
872                );
873                let (found_snippet, found_len) = fragment_to_found_snippet(rest.fragment());
874                let mut pe = ParseError::new("expected end of input")
875                    .with_location(offset, rest.location_line(), rest.get_column())
876                    .with_length(found_len.max(1))
877                    .with_code("expected_end_of_input");
878                if !found_snippet.is_empty() {
879                    pe = pe.with_found(found_snippet);
880                }
881                if root.elements.is_empty() && is_illegal_top_level_definition(rest.fragment()) {
882                    pe = pe
883                        .with_code("illegal_top_level_definition")
884                        .with_expected("'package', 'namespace', or 'import'")
885                        .with_suggestion(
886                            "Wrap this declaration in `package ... { ... }` or `namespace ... { ... }`.",
887                        );
888                    pe.message = "illegal top-level definition".to_string();
889                }
890                Err(pe)
891            }
892        }
893        Err(nom::Err::Error(e)) => Err(missing_closing_brace_error(bytes, e.input).unwrap_or_else(|| {
894            nom_err_to_parse_error(
895                &e,
896                None,
897                Some("'package', 'namespace', or 'import' at top level; or valid element in package body"),
898            )
899        })),
900        Err(nom::Err::Failure(e)) => Err(missing_closing_brace_error(bytes, e.input).unwrap_or_else(|| {
901            nom_err_to_parse_error(
902                &e,
903                None,
904                Some("'package', 'namespace', or 'import' at top level; or valid element in package body"),
905            )
906        })),
907        Err(nom::Err::Incomplete(_)) => Err(ParseError::new("unexpected end of input").with_code("unexpected_eof")),
908    }
909}
910
911const MAX_RECOVERY_ERRORS: usize = 100;
912
913/// Parse input with error recovery: collects multiple diagnostics and returns a partial AST when errors occur.
914/// Use this for language servers so the user sees all parse errors and features (e.g. hover) can use the partial AST.
915pub fn parse_with_diagnostics(input: &str) -> ParseResult {
916    let bytes = input
917        .strip_prefix('\u{FEFF}')
918        .map(str::as_bytes)
919        .unwrap_or_else(|| input.as_bytes());
920    let located = LocatedSpan::new(bytes);
921
922    let mut elements = Vec::new();
923    let mut errors = Vec::new();
924
925    let (mut input, _) = match lex::ws_and_comments(located) {
926        Ok(x) => x,
927        Err(_) => {
928            return ParseResult {
929                root: RootNamespace { elements: vec![] },
930                errors: vec![ParseError::new("invalid input").with_code("invalid_input")],
931            };
932        }
933    };
934
935    while errors.len() < MAX_RECOVERY_ERRORS {
936        // Skip leading ws/comments; if nothing left, we're done (avoids parsing "" as root_element).
937        let (rest, _) = lex::ws_and_comments(input).unwrap_or((input, ()));
938        input = rest;
939        if input.fragment().is_empty() {
940            break;
941        }
942        match package::root_element(input) {
943            Ok((rest, elem)) => {
944                elements.push(elem);
945                input = rest;
946            }
947            Err(nom::Err::Error(e)) | Err(nom::Err::Failure(e)) => {
948                let pe = missing_closing_brace_error(bytes, e.input).unwrap_or_else(|| {
949                    nom_err_to_parse_error(&e, None, Some("'package', 'namespace', or 'import'"))
950                });
951                errors.push(pe);
952                let skip_result = lex::skip_to_next_sync_point(e.input);
953                match skip_result {
954                    Ok((rest, _)) => input = rest,
955                    Err(_) => break,
956                }
957            }
958            Err(nom::Err::Incomplete(_)) => {
959                errors.push(
960                    ParseError::new("unexpected end of input")
961                        .with_location(
962                            input.location_offset(),
963                            input.location_line(),
964                            input.get_column(),
965                        )
966                        .with_length(1)
967                        .with_code("unexpected_eof"),
968                );
969                break;
970            }
971        }
972    }
973
974    let (input, _) = lex::ws_and_comments(input).unwrap_or((input, ()));
975
976    if input.fragment().is_empty()
977        && has_unclosed_brace(bytes)
978        && !errors
979            .iter()
980            .any(|e| e.code.as_deref() == Some("missing_closing_brace"))
981    {
982        errors.push(missing_closing_brace_error_at_eof(bytes));
983    }
984
985    if !input.fragment().is_empty() {
986        let (found_snippet, found_len) = fragment_to_found_snippet(input.fragment());
987        let mut pe = ParseError::new("expected end of input")
988            .with_location(
989                input.location_offset(),
990                input.location_line(),
991                input.get_column(),
992            )
993            .with_length(found_len.max(1))
994            .with_code("expected_end_of_input");
995        if !found_snippet.is_empty() {
996            pe = pe.with_found(found_snippet);
997        }
998        errors.push(pe);
999    }
1000
1001    errors.extend(collect_recovery_errors(&RootNamespace {
1002        elements: elements.clone(),
1003    }));
1004    errors.sort_by_key(|e| (e.offset.unwrap_or(usize::MAX), e.line.unwrap_or(u32::MAX)));
1005
1006    ParseResult {
1007        root: RootNamespace { elements },
1008        errors,
1009    }
1010}