Skip to main content

sysml_v2_parser/parser/
mod.rs

1//! Nom-based parser for SysML v2 textual notation.
2//!
3//! Organized into modules:
4//! - [lex]: whitespace, comments, names, qualified names, skip helpers
5//! - [attribute]: attribute definition and usage
6//! - [import]: import and relationship body
7//! - [part]: part definition and part usage
8//! - [package]: package and root namespace
9
10mod action;
11mod alias;
12mod allocation;
13mod attribute;
14mod case;
15mod connection;
16mod constraint;
17mod dependency;
18mod enumeration;
19mod expr;
20mod flow;
21mod import;
22mod individual;
23mod interface;
24mod item;
25mod lex;
26mod metadata;
27mod metadata_annotation;
28mod occurrence;
29mod package;
30mod part;
31mod port;
32mod requirement;
33mod span;
34mod state;
35mod usecase;
36mod view;
37
38pub(crate) use span::{node_from_to, span_from_to, with_span, Input};
39
40use crate::ast::{
41    ActionDefBody, ActionDefBodyElement, ActionUsageBody, ActionUsageBodyElement, CalcDefBody,
42    CalcDefBodyElement, ConstraintDefBody, ConstraintDefBodyElement, PackageBody,
43    PackageBodyElement, ParseErrorNode, PartDefBody, PartDefBodyElement, PartUsageBody,
44    PartUsageBodyElement, RequirementDefBody, RequirementDefBodyElement, RootNamespace,
45    StateDefBody, StateDefBodyElement, UseCaseDefBody, UseCaseDefBodyElement, ViewBody,
46    ViewBodyElement, ViewDefBody, ViewDefBodyElement,
47};
48use crate::error::{DiagnosticCategory, DiagnosticSeverity, ParseError};
49use nom::error::Error;
50use nom::Parser;
51use nom_locate::LocatedSpan;
52
53/// Result of parsing with error recovery: a (possibly partial) AST and zero or more diagnostics.
54#[derive(Debug, Clone)]
55pub struct ParseResult {
56    /// Root namespace; contains all successfully parsed top-level elements (partial when errors occurred).
57    pub root: RootNamespace,
58    /// All parse errors encountered (multiple when recovery is used).
59    pub errors: Vec<ParseError>,
60}
61
62impl ParseResult {
63    /// True if the document parsed fully with no errors.
64    pub fn is_ok(&self) -> bool {
65        self.errors.is_empty()
66    }
67}
68
69const FOUND_SNIPPET_MAX_LEN: usize = 40;
70const ILLEGAL_TOP_LEVEL_STARTERS: &[&[u8]] = &[
71    b"action",
72    b"actor",
73    b"alias",
74    b"allocate",
75    b"allocation",
76    b"attribute",
77    b"bind",
78    b"calc",
79    b"case",
80    b"concern",
81    b"connection",
82    b"constraint",
83    b"dependency",
84    b"enum",
85    b"flow",
86    b"interface",
87    b"item",
88    b"metadata",
89    b"occurrence",
90    b"part",
91    b"perform",
92    b"port",
93    b"ref",
94    b"require",
95    b"requirement",
96    b"satisfy",
97    b"state",
98    b"use",
99    b"verification",
100    b"view",
101    b"viewpoint",
102];
103
104/// Take a short snippet from the input at the error position for "found" display.
105/// Uses first line or first FOUND_SNIPPET_MAX_LEN bytes, UTF-8 with replacement char.
106fn fragment_to_found_snippet(fragment: &[u8]) -> (String, usize) {
107    let take = fragment
108        .iter()
109        .position(|&b| b == b'\n' || b == b'\r')
110        .map(|p| p.min(FOUND_SNIPPET_MAX_LEN))
111        .unwrap_or_else(|| fragment.len().min(FOUND_SNIPPET_MAX_LEN));
112    let slice = fragment.get(..take).unwrap_or(fragment);
113    let s = String::from_utf8_lossy(slice)
114        .replace('\n', "\\n")
115        .replace('\r', "\\r");
116    let len = slice.len();
117    (s.trim_end().to_string(), len)
118}
119
120pub(crate) fn recovery_found_snippet(input: Input<'_>) -> Option<String> {
121    let frag = input.fragment();
122    let take = frag
123        .iter()
124        .position(|&b| b == b'\n' || b == b'\r')
125        .unwrap_or(frag.len())
126        .min(60);
127    let snippet = String::from_utf8_lossy(&frag[..take]).trim().to_string();
128    if snippet.is_empty() {
129        None
130    } else {
131        Some(snippet)
132    }
133}
134
135fn recovery_found_snippet_from_span(input: Input<'_>, recovery_end: Input<'_>) -> Option<String> {
136    let consumed_len = recovery_end
137        .location_offset()
138        .saturating_sub(input.location_offset())
139        .min(input.fragment().len());
140    if consumed_len == 0 {
141        return recovery_found_snippet(input);
142    }
143    let frag = &input.fragment()[..consumed_len];
144    let take = frag
145        .iter()
146        .position(|&b| b == b'\n' || b == b'\r')
147        .unwrap_or(frag.len())
148        .min(60);
149    let snippet = String::from_utf8_lossy(&frag[..take]).trim().to_string();
150    if snippet.is_empty() {
151        recovery_found_snippet(input)
152    } else {
153        Some(snippet)
154    }
155}
156
157/// Map nom error kind to a human-readable message for language server diagnostics.
158fn nom_error_kind_to_message(code: &nom::error::ErrorKind) -> &'static str {
159    use nom::error::ErrorKind;
160    match code {
161        ErrorKind::Tag => "expected keyword or token",
162        ErrorKind::Digit => "expected number",
163        ErrorKind::Alpha => "expected identifier",
164        ErrorKind::AlphaNumeric => "expected identifier",
165        ErrorKind::Space => "expected whitespace",
166        ErrorKind::MultiSpace => "expected whitespace",
167        ErrorKind::Eof => "unexpected end of input",
168        ErrorKind::TakeUntil => "expected terminator",
169        ErrorKind::TakeWhile1 => "expected token",
170        ErrorKind::Alt => {
171            "expected package, import, part, port, interface, alias, attribute, or action"
172        }
173        ErrorKind::Many0 | ErrorKind::Many1 => "expected list of elements",
174        _ => "parse error",
175    }
176}
177
178/// Map nom error kind to a specific code for LSP/quick fixes.
179fn nom_error_kind_to_code(code: &nom::error::ErrorKind) -> &'static str {
180    use nom::error::ErrorKind;
181    match code {
182        ErrorKind::Tag => "expected_keyword",
183        ErrorKind::Digit => "expected_number",
184        ErrorKind::Alpha | ErrorKind::AlphaNumeric => "expected_identifier",
185        ErrorKind::Space | ErrorKind::MultiSpace => "expected_whitespace",
186        ErrorKind::Eof => "unexpected_eof",
187        ErrorKind::TakeUntil => "expected_terminator",
188        ErrorKind::TakeWhile1 => "expected_token",
189        ErrorKind::Alt => "expected_alt",
190        ErrorKind::Many0 | ErrorKind::Many1 => "expected_list",
191        _ => "parse_error",
192    }
193}
194
195fn nom_err_to_parse_error(
196    e: &Error<Input<'_>>,
197    length_override: Option<usize>,
198    expected_context: Option<&'static str>,
199) -> ParseError {
200    let offset = e.input.location_offset();
201    let line = e.input.location_line();
202    let column = e.input.get_column();
203    let fragment = e.input.fragment();
204    let (found_snippet, found_len) = fragment_to_found_snippet(fragment);
205    let message = nom_error_kind_to_message(&e.code).to_string();
206    let span_len = length_override.unwrap_or(found_len).max(1);
207    if trim_ascii_start(fragment).starts_with(b"}") {
208        return unexpected_closing_brace_parse_error(e.input);
209    }
210    let mut pe = ParseError::new(message)
211        .with_location(offset, line, column)
212        .with_length(span_len)
213        .with_code(nom_error_kind_to_code(&e.code))
214        .with_severity(DiagnosticSeverity::Error)
215        .with_category(DiagnosticCategory::ParseError);
216    if !found_snippet.is_empty() {
217        pe = pe.with_found(found_snippet);
218    }
219    if let Some(ctx) = expected_context {
220        pe = pe.with_expected(ctx);
221    }
222    let at_root = expected_context.is_some_and(|ctx| {
223        ctx.contains("'package', 'namespace', or 'import'") || ctx.contains("top level")
224    });
225    if at_root && is_illegal_top_level_definition(fragment) {
226        pe.message = "illegal top-level definition".to_string();
227        pe.code = Some("illegal_top_level_definition".to_string());
228        pe.expected = Some("'package', 'namespace', or 'import'".to_string());
229        pe.suggestion = Some(
230            "Wrap this declaration in `package ... { ... }` or `namespace ... { ... }`."
231                .to_string(),
232        );
233    }
234    pe
235}
236
237fn is_illegal_top_level_definition(fragment: &[u8]) -> bool {
238    let trimmed = trim_ascii_start(fragment);
239    !trimmed.starts_with(b"}")
240        && !trimmed.starts_with(b"//")
241        && !trimmed.starts_with(b"/*")
242        && lex::starts_with_any_keyword(trimmed, ILLEGAL_TOP_LEVEL_STARTERS)
243}
244
245fn trim_ascii_start(mut fragment: &[u8]) -> &[u8] {
246    while let Some(first) = fragment.first() {
247        if first.is_ascii_whitespace() {
248            fragment = &fragment[1..];
249            continue;
250        }
251        break;
252    }
253    fragment
254}
255
256fn starts_with_missing_name_after_keyword(
257    fragment: &[u8],
258    keyword: &[u8],
259    trailing_keywords: &[&[u8]],
260) -> bool {
261    let mut fragment = trim_ascii_start(fragment);
262    if !lex::starts_with_keyword(fragment, keyword) {
263        return false;
264    }
265    fragment = &fragment[keyword.len()..];
266    while let Some(first) = fragment.first() {
267        if first.is_ascii_whitespace() {
268            fragment = &fragment[1..];
269            continue;
270        }
271        break;
272    }
273    for trailing in trailing_keywords {
274        if lex::starts_with_keyword(fragment, trailing) {
275            fragment = &fragment[trailing.len()..];
276            while let Some(first) = fragment.first() {
277                if first.is_ascii_whitespace() {
278                    fragment = &fragment[1..];
279                    continue;
280                }
281                break;
282            }
283        }
284    }
285    fragment.starts_with(b":")
286}
287
288fn starts_with_missing_type_after_keyword(
289    fragment: &[u8],
290    keyword: &[u8],
291    trailing_keywords: &[&[u8]],
292) -> bool {
293    let mut fragment = trim_ascii_start(fragment);
294    if !lex::starts_with_keyword(fragment, keyword) {
295        return false;
296    }
297    fragment = &fragment[keyword.len()..];
298    while let Some(first) = fragment.first() {
299        if first.is_ascii_whitespace() {
300            fragment = &fragment[1..];
301            continue;
302        }
303        break;
304    }
305    for trailing in trailing_keywords {
306        if lex::starts_with_keyword(fragment, trailing) {
307            fragment = &fragment[trailing.len()..];
308            while let Some(first) = fragment.first() {
309                if first.is_ascii_whitespace() {
310                    fragment = &fragment[1..];
311                    continue;
312                }
313                break;
314            }
315        }
316    }
317
318    let mut name_len = 0usize;
319    while name_len < fragment.len()
320        && (fragment[name_len].is_ascii_alphanumeric() || fragment[name_len] == b'_')
321    {
322        name_len += 1;
323    }
324    if name_len == 0 {
325        return false;
326    }
327    fragment = &fragment[name_len..];
328    while let Some(first) = fragment.first() {
329        if first.is_ascii_whitespace() {
330            fragment = &fragment[1..];
331            continue;
332        }
333        break;
334    }
335    if !fragment.starts_with(b":") {
336        return false;
337    }
338    fragment = &fragment[1..];
339    while let Some(first) = fragment.first() {
340        if first.is_ascii_whitespace() {
341            fragment = &fragment[1..];
342            continue;
343        }
344        break;
345    }
346
347    fragment.is_empty()
348        || fragment.starts_with(b";")
349        || fragment.starts_with(b"{")
350        || fragment.starts_with(b"}")
351        || lex::starts_with_keyword(fragment, b"then")
352        || lex::starts_with_keyword(fragment, b"if")
353        || lex::starts_with_keyword(fragment, b"do")
354}
355
356fn missing_name_diagnostic(fragment: &[u8]) -> Option<(&'static str, String, String, String)> {
357    #[allow(clippy::type_complexity)]
358    let cases: &[(&[u8], &[&[u8]], &str, &str)] = &[
359        (
360            b"subject",
361            &[],
362            "subject name",
363            "Use `subject laptop: Laptop;`.",
364        ),
365        (b"actor", &[], "actor name", "Use `actor user: User;`."),
366        (b"state", &[], "state name", "Use `state ready: Mode;`."),
367        (b"part", &[], "part name", "Use `part wheel: Wheel;`."),
368        (b"ref", &[], "reference name", "Use `ref sensor: Sensor;`."),
369        (b"port", &[], "port name", "Use `port power: PowerPort;`."),
370        (
371            b"attribute",
372            &[],
373            "attribute name",
374            "Use `attribute mass: MassValue;`.",
375        ),
376        (b"in", &[], "input name", "Use `in speed: Real;`."),
377        (b"out", &[], "output name", "Use `out result: Real;`."),
378        (
379            b"perform",
380            &[b"action"],
381            "action name",
382            "Use `perform action run: Runner;`.",
383        ),
384        (b"return", &[], "return name", "Use `return result: Real;`."),
385    ];
386
387    for (keyword, trailing, missing_what, suggestion) in cases {
388        if starts_with_missing_name_after_keyword(fragment, keyword, trailing) {
389            return Some((
390                "missing_member_name",
391                format!("expected {missing_what} before ':'"),
392                format!("{missing_what} before ':'"),
393                suggestion.to_string(),
394            ));
395        }
396    }
397    None
398}
399
400fn missing_type_diagnostic(fragment: &[u8]) -> Option<(&'static str, String, String, String)> {
401    #[allow(clippy::type_complexity)]
402    let cases: &[(&[u8], &[&[u8]], &str)] = &[
403        (b"subject", &[], "subject type"),
404        (b"actor", &[], "actor type"),
405        (b"state", &[], "state type"),
406        (b"part", &[], "part type"),
407        (b"ref", &[], "reference type"),
408        (b"port", &[], "port type"),
409        (b"attribute", &[], "attribute type"),
410        (b"in", &[], "input type"),
411        (b"out", &[], "output type"),
412        (b"perform", &[b"action"], "action type"),
413        (b"return", &[], "return type"),
414    ];
415
416    for &(keyword, trailing, missing_what) in cases {
417        if starts_with_missing_type_after_keyword(fragment, keyword, trailing) {
418            let keyword_label = String::from_utf8_lossy(keyword);
419            let sample_name = if keyword == &b"subject"[..] {
420                "laptop"
421            } else if keyword == &b"actor"[..] {
422                "user"
423            } else if keyword == &b"state"[..] {
424                "ready"
425            } else if keyword == &b"part"[..] {
426                "wheel"
427            } else if keyword == &b"ref"[..] {
428                "sensor"
429            } else if keyword == &b"port"[..] {
430                "power"
431            } else if keyword == &b"attribute"[..] {
432                "mass"
433            } else if keyword == &b"in"[..] {
434                "speed"
435            } else if keyword == &b"out"[..] {
436                "result"
437            } else if keyword == &b"perform"[..] {
438                "run"
439            } else if keyword == &b"return"[..] {
440                "result"
441            } else {
442                "member"
443            };
444            let sample_type = if keyword == &b"subject"[..] {
445                "Laptop"
446            } else if keyword == &b"actor"[..] {
447                "User"
448            } else if keyword == &b"state"[..] {
449                "Mode"
450            } else if keyword == &b"part"[..] {
451                "Wheel"
452            } else if keyword == &b"ref"[..] {
453                "Sensor"
454            } else if keyword == &b"port"[..] {
455                "PowerPort"
456            } else if keyword == &b"attribute"[..] {
457                "MassValue"
458            } else if keyword == &b"in"[..] || keyword == &b"out"[..] {
459                "Real"
460            } else if keyword == &b"perform"[..] {
461                "Runner"
462            } else if keyword == &b"return"[..] {
463                "Real"
464            } else {
465                "Type"
466            };
467            let suggestion = if keyword == &b"perform"[..] {
468                format!("Use `perform action {sample_name}: {sample_type};`.")
469            } else if keyword == &b"return"[..] {
470                format!("Use `return {sample_name}: {sample_type};`.")
471            } else {
472                format!("Use `{keyword_label} {sample_name}: {sample_type};`.")
473            };
474            return Some((
475                "missing_type_reference",
476                format!("expected {missing_what} after ':'"),
477                format!("{missing_what} after ':'"),
478                suggestion,
479            ));
480        }
481    }
482    None
483}
484
485fn invalid_expose_separator_diagnostic(
486    fragment: &[u8],
487) -> Option<(&'static str, String, String, String)> {
488    let mut fragment = trim_ascii_start(fragment);
489    if !lex::starts_with_keyword(fragment, b"expose") {
490        return None;
491    }
492    fragment = &fragment[b"expose".len()..];
493    while let Some(first) = fragment.first() {
494        if first.is_ascii_whitespace() {
495            fragment = &fragment[1..];
496            continue;
497        }
498        break;
499    }
500    if fragment.is_empty() {
501        return None;
502    }
503
504    let mut saw_dot = false;
505    let mut in_quoted_name = false;
506    for &b in fragment {
507        if b == b'\'' {
508            in_quoted_name = !in_quoted_name;
509            continue;
510        }
511        if in_quoted_name {
512            continue;
513        }
514        if matches!(b, b';' | b'[' | b'{' | b'}' | b'\n' | b'\r') {
515            break;
516        }
517        if b == b'.' {
518            saw_dot = true;
519            break;
520        }
521    }
522    if !saw_dot {
523        return None;
524    }
525
526    Some((
527        "invalid_qualified_name_separator",
528        "invalid qualified name in expose target: use '::' instead of '.'".to_string(),
529        "qualified name segments separated by '::'".to_string(),
530        "Replace '.' with '::' in the expose target (example: `expose A::B;`).".to_string(),
531    ))
532}
533
534fn missing_semicolon_or_body_diagnostic(
535    fragment: &[u8],
536) -> Option<(&'static str, String, String, String)> {
537    let fragment = trim_ascii_start(fragment);
538    let cases: &[(&[u8], &str, &str)] = &[
539        (
540            b"action def",
541            "action definition",
542            "Use `action def Run;` or `action def Run { ... }`.",
543        ),
544        (
545            b"part def",
546            "part definition",
547            "Use `part def Wheel;` or `part def Wheel { ... }`.",
548        ),
549        (
550            b"requirement def",
551            "requirement definition",
552            "Use `requirement def R;` or `requirement def R { ... }`.",
553        ),
554        (
555            b"state def",
556            "state definition",
557            "Use `state def Ready;` or `state def Ready { ... }`.",
558        ),
559        (
560            b"view",
561            "view declaration",
562            "Use `view structure: GeneralView;` or `view structure: GeneralView { ... }`.",
563        ),
564        (
565            b"rendering def",
566            "rendering definition",
567            "Use `rendering def Diagram;` or `rendering def Diagram { ... }`.",
568        ),
569    ];
570
571    for (prefix, label, suggestion) in cases {
572        if fragment.starts_with(prefix) {
573            return Some((
574                "missing_body_or_semicolon",
575                format!("expected ';' or '{{' after {label} header"),
576                "';' or '{' after declaration header".to_string(),
577                suggestion.to_string(),
578            ));
579        }
580    }
581    None
582}
583
584fn invalid_typing_operator_diagnostic(
585    fragment: &[u8],
586) -> Option<(&'static str, String, String, String)> {
587    let fragment = trim_ascii_start(fragment);
588    let cases: &[(&[u8], &str, &str)] = &[
589        (
590            b"part def",
591            "part definition specialization",
592            "Use `part def Vehicle :> BaseVehicle;` when specializing a definition.",
593        ),
594        (
595            b"port def",
596            "port definition specialization",
597            "Use `port def PowerPort :> BasePort;` when specializing a definition.",
598        ),
599    ];
600
601    for (prefix, label, suggestion) in cases {
602        if fragment.starts_with(prefix) && fragment.windows(3).any(|w| w == b": ") {
603            return Some((
604                "invalid_typing_operator",
605                format!("invalid typing operator in {label}: use ':>' instead of ':'"),
606                "':>' specialization operator".to_string(),
607                suggestion.to_string(),
608            ));
609        }
610    }
611
612    if fragment.starts_with(b"part def")
613        && fragment.contains(&b':')
614        && !fragment.windows(2).any(|w| w == b":>")
615    {
616        return Some((
617            "invalid_typing_operator",
618            "invalid typing operator in part definition: use ':>' instead of ':'".to_string(),
619            "':>' specialization operator".to_string(),
620            "Use `part def Vehicle :> BaseVehicle;` when specializing a definition.".to_string(),
621        ));
622    }
623
624    None
625}
626
627fn missing_expression_after_operator_diagnostic(
628    fragment: &[u8],
629) -> Option<(&'static str, String, String, String)> {
630    let fragment = trim_ascii_start(fragment);
631    let cases: &[(&[u8], &str, &str)] = &[
632        (
633            b"bind",
634            "binding expression after '='",
635            "Use `bind x = y;`.",
636        ),
637        (
638            b"assign",
639            "assignment expression after ':='",
640            "Use `assign x := y;`.",
641        ),
642        (
643            b"first",
644            "target after 'then'",
645            "Use `first start then finish;`.",
646        ),
647        (
648            b"flow",
649            "target after 'to'",
650            "Use `flow source to target;`.",
651        ),
652        (
653            b"satisfy",
654            "target after 'by'",
655            "Use `satisfy Req by implementation;`.",
656        ),
657    ];
658
659    for (keyword, expected, suggestion) in cases {
660        if !lex::starts_with_keyword(fragment, keyword) {
661            continue;
662        }
663        let text = String::from_utf8_lossy(fragment);
664        if text.contains("= ;") || text.trim_end().ends_with('=') {
665            return Some((
666                "missing_expression_after_operator",
667                "expected expression after '='".to_string(),
668                expected.to_string(),
669                suggestion.to_string(),
670            ));
671        }
672        if text.contains(":= ;") || text.trim_end().ends_with(":=") {
673            return Some((
674                "missing_expression_after_operator",
675                "expected expression after ':='".to_string(),
676                expected.to_string(),
677                suggestion.to_string(),
678            ));
679        }
680        if text.contains(" then ;") || text.trim_end().ends_with(" then") {
681            return Some((
682                "missing_expression_after_operator",
683                "expected target after 'then'".to_string(),
684                expected.to_string(),
685                suggestion.to_string(),
686            ));
687        }
688        if text.contains(" to ;") || text.trim_end().ends_with(" to") {
689            return Some((
690                "missing_expression_after_operator",
691                "expected target after 'to'".to_string(),
692                expected.to_string(),
693                suggestion.to_string(),
694            ));
695        }
696        if text.contains(" by ;") || text.trim_end().ends_with(" by") {
697            return Some((
698                "missing_expression_after_operator",
699                "expected target after 'by'".to_string(),
700                expected.to_string(),
701                suggestion.to_string(),
702            ));
703        }
704    }
705    None
706}
707
708fn invalid_unit_reference_diagnostic(
709    fragment: &[u8],
710) -> Option<(&'static str, String, String, String)> {
711    let fragment = trim_ascii_start(fragment);
712    let text = String::from_utf8_lossy(fragment);
713    if !(text.contains('[') && text.contains(']')) {
714        return None;
715    }
716
717    if text.contains("[]") || text.contains("[ ]") {
718        return Some((
719            "invalid_unit_reference",
720            "expected unit name inside '[ ]'".to_string(),
721            "unit name inside '[ ]'".to_string(),
722            "Use a concrete unit such as `1750 [kg]`.".to_string(),
723        ));
724    }
725
726    if text.contains("[;")
727        || text.contains("[ ;")
728        || text.contains("[)")
729        || text.contains("[ ]")
730        || text.contains("[,")
731    {
732        return Some((
733            "invalid_unit_reference",
734            "invalid unit expression inside '[ ]'".to_string(),
735            "unit name inside '[ ]'".to_string(),
736            "Use a unit symbol or qualified unit name (example: `[kg]` or `[SI::kg]`).".to_string(),
737        ));
738    }
739
740    None
741}
742
743fn unexpected_keyword_in_scope_diagnostic(
744    fragment: &[u8],
745    starters: &[&[u8]],
746    scope_label: &str,
747) -> Option<(&'static str, String, String, String)> {
748    let fragment = trim_ascii_start(fragment);
749    if fragment.is_empty() || fragment.starts_with(b"#") || fragment.starts_with(b"@") {
750        return None;
751    }
752    let keyword_end = fragment
753        .iter()
754        .position(|b| !b.is_ascii_alphanumeric() && *b != b'_')
755        .unwrap_or(fragment.len());
756    if keyword_end == 0 {
757        return None;
758    }
759    let keyword = &fragment[..keyword_end];
760    if lex::starts_with_any_keyword(keyword, starters) {
761        return None;
762    }
763    let keyword_text = String::from_utf8_lossy(keyword);
764    Some((
765        "unexpected_keyword_in_scope",
766        format!("unexpected keyword `{keyword_text}` in {scope_label}"),
767        format!("valid {scope_label} element"),
768        format!("Replace `{keyword_text}` with a valid {scope_label} member or remove it."),
769    ))
770}
771
772fn unexpected_closing_brace_parse_error(input: Input<'_>) -> ParseError {
773    ParseError::new("unexpected closing '}'")
774        .with_location(
775            input.location_offset(),
776            input.location_line(),
777            input.get_column(),
778        )
779        .with_length(1)
780        .with_code("unexpected_closing_brace")
781        .with_expected("valid declaration or end of current body")
782        .with_found("}")
783        .with_suggestion("Remove this '}' or add the missing opening '{' before it.")
784        .with_severity(DiagnosticSeverity::Error)
785        .with_category(DiagnosticCategory::ParseError)
786}
787
788fn missing_closing_brace_error(bytes: &[u8], input: Input<'_>) -> Option<ParseError> {
789    if !input.fragment().is_empty() {
790        return None;
791    }
792    let consumed = &bytes[..input.location_offset().min(bytes.len())];
793    let opens = consumed.iter().filter(|&&b| b == b'{').count();
794    let closes = consumed.iter().filter(|&&b| b == b'}').count();
795    if opens <= closes {
796        return None;
797    }
798    Some(missing_closing_brace_error_at_eof(consumed))
799}
800
801fn missing_closing_brace_error_at_eof(bytes: &[u8]) -> ParseError {
802    let (line, column) = eof_line_column(bytes);
803    ParseError::new("missing closing '}'")
804        .with_location(bytes.len(), line, column)
805        .with_length(1)
806        .with_code("missing_closing_brace")
807        .with_expected("'}'")
808        .with_suggestion("Add '}' to close the open body.")
809        .with_category(DiagnosticCategory::ParseError)
810}
811
812fn category_from_code(code: &str) -> DiagnosticCategory {
813    if code == "unsupported_annotation_syntax" {
814        DiagnosticCategory::UnsupportedGrammarForm
815    } else if code == "unresolved_symbol" {
816        DiagnosticCategory::UnresolvedSymbol
817    } else {
818        DiagnosticCategory::ParseError
819    }
820}
821
822fn has_unclosed_brace(bytes: &[u8]) -> bool {
823    let opens = bytes.iter().filter(|&&b| b == b'{').count();
824    let closes = bytes.iter().filter(|&&b| b == b'}').count();
825    opens > closes
826}
827
828fn eof_line_column(bytes: &[u8]) -> (u32, usize) {
829    let mut line = 1u32;
830    let mut column = 1usize;
831    for &b in bytes {
832        if b == b'\n' {
833            line += 1;
834            column = 1;
835        } else {
836            column += 1;
837        }
838    }
839    (line, column)
840}
841
842pub(crate) fn build_recovery_error_node(
843    input: Input<'_>,
844    starters: &[&[u8]],
845    scope_label: &str,
846    generic_code: &str,
847) -> ParseErrorNode {
848    build_recovery_error_node_from_span(input, input, starters, scope_label, generic_code)
849}
850
851enum RecoveryClassification {
852    MissingMemberName {
853        code: String,
854        message: String,
855        expected: String,
856        suggestion: String,
857    },
858    MissingTypeReference {
859        code: String,
860        message: String,
861        expected: String,
862        suggestion: String,
863    },
864    InvalidQualifiedNameSeparator {
865        code: String,
866        message: String,
867        expected: String,
868        suggestion: String,
869    },
870    MissingBodyOrSemicolon {
871        code: String,
872        message: String,
873        expected: String,
874        suggestion: String,
875    },
876    MissingExpressionAfterOperator {
877        code: String,
878        message: String,
879        expected: String,
880        suggestion: String,
881    },
882    InvalidUnitReference {
883        code: String,
884        message: String,
885        expected: String,
886        suggestion: String,
887    },
888    InvalidTypingOperator {
889        code: String,
890        message: String,
891        expected: String,
892        suggestion: String,
893    },
894    UnexpectedKeywordInScope {
895        code: String,
896        message: String,
897        expected: String,
898        suggestion: String,
899    },
900    MissingSemicolon,
901    UnsupportedAnnotation,
902    Unexpected,
903}
904
905fn trim_ascii_end(mut fragment: &[u8]) -> &[u8] {
906    while let Some(last) = fragment.last() {
907        if last.is_ascii_whitespace() {
908            fragment = &fragment[..fragment.len() - 1];
909        } else {
910            break;
911        }
912    }
913    fragment
914}
915
916fn classify_recovery(
917    input: Input<'_>,
918    recovery_end: Input<'_>,
919    starters: &[&[u8]],
920    scope_label: &str,
921) -> RecoveryClassification {
922    let trimmed = trim_ascii_start(input.fragment());
923
924    if let Some((code, message, expected, suggestion)) = missing_name_diagnostic(trimmed) {
925        return RecoveryClassification::MissingMemberName {
926            code: code.to_string(),
927            message,
928            expected,
929            suggestion,
930        };
931    }
932
933    if let Some((code, message, expected, suggestion)) = missing_type_diagnostic(trimmed) {
934        return RecoveryClassification::MissingTypeReference {
935            code: code.to_string(),
936            message,
937            expected,
938            suggestion,
939        };
940    }
941
942    if let Some((code, message, expected, suggestion)) =
943        invalid_expose_separator_diagnostic(trimmed)
944    {
945        return RecoveryClassification::InvalidQualifiedNameSeparator {
946            code: code.to_string(),
947            message,
948            expected,
949            suggestion,
950        };
951    }
952
953    if let Some((code, message, expected, suggestion)) = invalid_typing_operator_diagnostic(trimmed)
954    {
955        return RecoveryClassification::InvalidTypingOperator {
956            code: code.to_string(),
957            message,
958            expected,
959            suggestion,
960        };
961    }
962
963    if let Some((code, message, expected, suggestion)) =
964        missing_expression_after_operator_diagnostic(trimmed)
965    {
966        return RecoveryClassification::MissingExpressionAfterOperator {
967            code: code.to_string(),
968            message,
969            expected,
970            suggestion,
971        };
972    }
973
974    if let Some((code, message, expected, suggestion)) = invalid_unit_reference_diagnostic(trimmed)
975    {
976        return RecoveryClassification::InvalidUnitReference {
977            code: code.to_string(),
978            message,
979            expected,
980            suggestion,
981        };
982    }
983
984    if let Some((code, message, expected, suggestion)) =
985        missing_semicolon_or_body_diagnostic(trimmed)
986    {
987        return RecoveryClassification::MissingBodyOrSemicolon {
988            code: code.to_string(),
989            message,
990            expected,
991            suggestion,
992        };
993    }
994
995    let consumed_len = recovery_end
996        .location_offset()
997        .saturating_sub(input.location_offset())
998        .min(input.fragment().len());
999    let raw_consumed = &input.fragment()[..consumed_len];
1000    let consumed = trim_ascii_end(raw_consumed);
1001    let recovered_to_boundary = recovery_end.location_offset() > input.location_offset() && {
1002        let (next, _) = lex::ws_and_comments(recovery_end).unwrap_or((recovery_end, ()));
1003        next.fragment().is_empty()
1004            || next.fragment().starts_with(b"}")
1005            || lex::starts_with_any_keyword(next.fragment(), starters)
1006    };
1007
1008    let consumed_has_newline = raw_consumed.contains(&b'\n') || raw_consumed.contains(&b'\r');
1009    let first_line_end = consumed
1010        .iter()
1011        .position(|b| matches!(*b, b'\n' | b'\r'))
1012        .unwrap_or(consumed.len());
1013    let first_line = trim_ascii_end(&consumed[..first_line_end]);
1014    let consumed_has_delimiters = consumed
1015        .iter()
1016        .any(|b| matches!(*b, b'{' | b'}' | b'(' | b')' | b'[' | b']'));
1017    let consumed_ends_incomplete = first_line.last().is_some_and(|b| {
1018        matches!(
1019            *b,
1020            b':' | b'=' | b',' | b'.' | b'+' | b'-' | b'*' | b'/' | b'>' | b'<' | b'|'
1021        )
1022    });
1023    let first_line_has_semicolon = first_line.contains(&b';');
1024    if recovered_to_boundary
1025        && lex::starts_with_any_keyword(trimmed, starters)
1026        && (consumed_has_newline || recovery_end.fragment().starts_with(b"}"))
1027        && !consumed.is_empty()
1028        && !consumed_has_delimiters
1029        && !consumed_ends_incomplete
1030        && !first_line_has_semicolon
1031    {
1032        return RecoveryClassification::MissingSemicolon;
1033    }
1034
1035    if lex::starts_with_keyword(trimmed, b"#") || lex::starts_with_keyword(trimmed, b"@") {
1036        return RecoveryClassification::UnsupportedAnnotation;
1037    }
1038
1039    if let Some((code, message, expected, suggestion)) =
1040        unexpected_keyword_in_scope_diagnostic(trimmed, starters, scope_label)
1041    {
1042        return RecoveryClassification::UnexpectedKeywordInScope {
1043            code: code.to_string(),
1044            message,
1045            expected,
1046            suggestion,
1047        };
1048    }
1049
1050    RecoveryClassification::Unexpected
1051}
1052
1053pub(crate) fn build_recovery_error_node_from_span(
1054    input: Input<'_>,
1055    recovery_end: Input<'_>,
1056    starters: &[&[u8]],
1057    scope_label: &str,
1058    generic_code: &str,
1059) -> ParseErrorNode {
1060    match classify_recovery(input, recovery_end, starters, scope_label) {
1061        RecoveryClassification::MissingMemberName {
1062            code,
1063            message,
1064            expected,
1065            suggestion,
1066        }
1067        | RecoveryClassification::MissingTypeReference {
1068            code,
1069            message,
1070            expected,
1071            suggestion,
1072        }
1073        | RecoveryClassification::InvalidQualifiedNameSeparator {
1074            code,
1075            message,
1076            expected,
1077            suggestion,
1078        }
1079        | RecoveryClassification::MissingBodyOrSemicolon {
1080            code,
1081            message,
1082            expected,
1083            suggestion,
1084        }
1085        | RecoveryClassification::MissingExpressionAfterOperator {
1086            code,
1087            message,
1088            expected,
1089            suggestion,
1090        }
1091        | RecoveryClassification::InvalidUnitReference {
1092            code,
1093            message,
1094            expected,
1095            suggestion,
1096        }
1097        | RecoveryClassification::InvalidTypingOperator {
1098            code,
1099            message,
1100            expected,
1101            suggestion,
1102        }
1103        | RecoveryClassification::UnexpectedKeywordInScope {
1104            code,
1105            message,
1106            expected,
1107            suggestion,
1108        } => ParseErrorNode {
1109            message,
1110            code,
1111            expected: Some(expected),
1112            found: recovery_found_snippet_from_span(input, recovery_end),
1113            suggestion: Some(suggestion),
1114            category: Some(DiagnosticCategory::ParseError),
1115        },
1116        RecoveryClassification::MissingSemicolon => ParseErrorNode {
1117            message: "missing semicolon before next declaration".to_string(),
1118            code: "missing_semicolon".to_string(),
1119            expected: Some("';'".to_string()),
1120            found: recovery_found_snippet_from_span(input, recovery_end),
1121            suggestion: Some("Insert ';' before this declaration.".to_string()),
1122            category: Some(DiagnosticCategory::ParseError),
1123        },
1124        RecoveryClassification::UnsupportedAnnotation => ParseErrorNode {
1125            message: format!("unsupported annotation syntax in {scope_label}"),
1126            code: "unsupported_annotation_syntax".to_string(),
1127            expected: Some(format!("valid {scope_label} element")),
1128            found: recovery_found_snippet_from_span(input, recovery_end),
1129            suggestion: Some(
1130                "Remove this annotation or extend the parser to support annotated declarations."
1131                    .to_string(),
1132            ),
1133            category: Some(DiagnosticCategory::UnsupportedGrammarForm),
1134        },
1135        RecoveryClassification::Unexpected => ParseErrorNode {
1136            message: format!("unexpected token in {scope_label}"),
1137            code: generic_code.to_string(),
1138            expected: Some(format!("valid {scope_label} element")),
1139            found: recovery_found_snippet_from_span(input, recovery_end),
1140            suggestion: Some(format!("Fix this {scope_label} member and re-run parsing.")),
1141            category: Some(DiagnosticCategory::ParseError),
1142        },
1143    }
1144}
1145
1146fn is_only_trailing_closing_braces(mut input: Input<'_>) -> bool {
1147    loop {
1148        let (next, _) = lex::ws_and_comments(input).unwrap_or((input, ()));
1149        input = next;
1150        if input.fragment().is_empty() {
1151            return true;
1152        }
1153        if input.fragment().starts_with(b"}") {
1154            match nom::bytes::complete::tag::<_, _, nom::error::Error<Input>>(&b"}"[..])
1155                .parse(input)
1156            {
1157                Ok((next, _)) => {
1158                    input = next;
1159                    continue;
1160                }
1161                Err(_) => return false,
1162            }
1163        }
1164        return false;
1165    }
1166}
1167
1168fn parse_error_from_recovery_node(span: &crate::ast::Span, node: &ParseErrorNode) -> ParseError {
1169    let mut err = ParseError::new(node.message.clone())
1170        .with_location(span.offset, span.line, span.column)
1171        .with_length(span.len.max(1))
1172        .with_code(node.code.clone())
1173        .with_category(
1174            node.category
1175                .unwrap_or_else(|| category_from_code(node.code.as_str())),
1176        );
1177    let severity = if node.code == "unsupported_annotation_syntax" {
1178        DiagnosticSeverity::Warning
1179    } else {
1180        DiagnosticSeverity::Error
1181    };
1182    err = err.with_severity(severity);
1183    if let Some(expected) = &node.expected {
1184        err = err.with_expected(expected.clone());
1185    }
1186    if let Some(found) = &node.found {
1187        err = err.with_found(found.clone());
1188    }
1189    if let Some(suggestion) = &node.suggestion {
1190        err = err.with_suggestion(suggestion.clone());
1191    }
1192    err
1193}
1194
1195fn diagnostic_specificity(err: &ParseError) -> u8 {
1196    match err.code.as_deref() {
1197        Some("missing_member_name")
1198        | Some("missing_type_reference")
1199        | Some("invalid_qualified_name_separator")
1200        | Some("invalid_typing_operator")
1201        | Some("missing_expression_after_operator")
1202        | Some("invalid_unit_reference")
1203        | Some("missing_body_or_semicolon")
1204        | Some("missing_semicolon")
1205        | Some("unexpected_closing_brace")
1206        | Some("missing_closing_brace")
1207        | Some("unsupported_annotation_syntax")
1208        | Some("unexpected_keyword_in_scope") => 5,
1209        Some("illegal_top_level_definition") => 4,
1210        Some(code) if code.starts_with("recovered_") => 2,
1211        Some("expected_end_of_input") | Some("expected_keyword") => 1,
1212        _ => 3,
1213    }
1214}
1215
1216fn dedup_errors(mut errors: Vec<ParseError>) -> Vec<ParseError> {
1217    errors.sort_by_key(|e| {
1218        (
1219            e.offset.unwrap_or(usize::MAX),
1220            e.line.unwrap_or(u32::MAX),
1221            e.column.unwrap_or(usize::MAX),
1222            std::cmp::Reverse(diagnostic_specificity(e)),
1223        )
1224    });
1225
1226    let mut deduped = Vec::new();
1227    for err in errors {
1228        let duplicate = deduped.iter().any(|existing: &ParseError| {
1229            let same_start = existing.offset == err.offset
1230                && existing.line == err.line
1231                && existing.column == err.column;
1232            let same_found = existing.found == err.found;
1233            let existing_specificity = diagnostic_specificity(existing);
1234            let err_specificity = diagnostic_specificity(&err);
1235            same_start
1236                && (same_found || existing.code == err.code)
1237                && existing_specificity >= err_specificity
1238        });
1239        if !duplicate {
1240            deduped.push(err);
1241        }
1242    }
1243
1244    deduped.sort_by_key(|e| (e.offset.unwrap_or(usize::MAX), e.line.unwrap_or(u32::MAX)));
1245    deduped
1246}
1247
1248fn collect_requirement_body_errors(body: &RequirementDefBody, errors: &mut Vec<ParseError>) {
1249    if let RequirementDefBody::Brace { elements } = body {
1250        for element in elements {
1251            match &element.value {
1252                RequirementDefBodyElement::Error(n) => {
1253                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1254                }
1255                RequirementDefBodyElement::Frame(n) => {
1256                    collect_requirement_body_errors(&n.value.body, errors)
1257                }
1258                _ => {}
1259            }
1260        }
1261    }
1262}
1263
1264fn collect_action_def_body_errors(body: &ActionDefBody, errors: &mut Vec<ParseError>) {
1265    if let ActionDefBody::Brace { elements } = body {
1266        for element in elements {
1267            if let ActionDefBodyElement::Error(n) = &element.value {
1268                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1269            }
1270        }
1271    }
1272}
1273
1274fn collect_action_usage_body_errors(body: &ActionUsageBody, errors: &mut Vec<ParseError>) {
1275    if let ActionUsageBody::Brace { elements } = body {
1276        for element in elements {
1277            match &element.value {
1278                ActionUsageBodyElement::Error(n) => {
1279                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1280                }
1281                ActionUsageBodyElement::ActionUsage(n) => {
1282                    collect_action_usage_body_errors(&n.value.body, errors)
1283                }
1284                _ => {}
1285            }
1286        }
1287    }
1288}
1289
1290fn collect_state_body_errors(body: &StateDefBody, errors: &mut Vec<ParseError>) {
1291    if let StateDefBody::Brace { elements } = body {
1292        for element in elements {
1293            match &element.value {
1294                StateDefBodyElement::Error(n) => {
1295                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1296                }
1297                StateDefBodyElement::Entry(n) => collect_state_body_errors(&n.value.body, errors),
1298                StateDefBodyElement::RequirementUsage(n) => {
1299                    collect_requirement_body_errors(&n.value.body, errors)
1300                }
1301                StateDefBodyElement::StateUsage(n) => {
1302                    collect_state_body_errors(&n.value.body, errors)
1303                }
1304                _ => {}
1305            }
1306        }
1307    }
1308}
1309
1310fn collect_use_case_body_errors(body: &UseCaseDefBody, errors: &mut Vec<ParseError>) {
1311    if let UseCaseDefBody::Brace { elements } = body {
1312        for element in elements {
1313            if let UseCaseDefBodyElement::Error(n) = &element.value {
1314                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1315            }
1316        }
1317    }
1318}
1319
1320fn collect_constraint_body_errors(body: &ConstraintDefBody, errors: &mut Vec<ParseError>) {
1321    if let ConstraintDefBody::Brace { elements } = body {
1322        for element in elements {
1323            if let ConstraintDefBodyElement::Error(n) = &element.value {
1324                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1325            }
1326        }
1327    }
1328}
1329
1330fn collect_calc_body_errors(body: &CalcDefBody, errors: &mut Vec<ParseError>) {
1331    if let CalcDefBody::Brace { elements } = body {
1332        for element in elements {
1333            if let CalcDefBodyElement::Error(n) = &element.value {
1334                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1335            }
1336        }
1337    }
1338}
1339
1340fn collect_view_def_body_errors(body: &ViewDefBody, errors: &mut Vec<ParseError>) {
1341    if let ViewDefBody::Brace { elements } = body {
1342        for element in elements {
1343            if let ViewDefBodyElement::Error(n) = &element.value {
1344                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1345            }
1346        }
1347    }
1348}
1349
1350fn collect_view_body_errors(body: &ViewBody, errors: &mut Vec<ParseError>) {
1351    if let ViewBody::Brace { elements } = body {
1352        for element in elements {
1353            if let ViewBodyElement::Error(n) = &element.value {
1354                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1355            }
1356        }
1357    }
1358}
1359
1360fn collect_part_def_body_errors(body: &PartDefBody, errors: &mut Vec<ParseError>) {
1361    if let PartDefBody::Brace { elements } = body {
1362        for element in elements {
1363            match &element.value {
1364                PartDefBodyElement::Error(n) => {
1365                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1366                }
1367                PartDefBodyElement::PartUsage(n) => {
1368                    collect_part_usage_body_errors(&n.value.body, errors)
1369                }
1370                PartDefBodyElement::Perform(n) => {
1371                    collect_perform_body_errors(&n.value.body, errors)
1372                }
1373                _ => {}
1374            }
1375        }
1376    }
1377}
1378
1379fn collect_perform_body_errors(body: &crate::ast::PerformBody, _errors: &mut Vec<ParseError>) {
1380    match body {
1381        crate::ast::PerformBody::Semicolon => {}
1382        crate::ast::PerformBody::Brace { .. } => {}
1383    }
1384}
1385
1386fn collect_part_usage_body_errors(body: &PartUsageBody, errors: &mut Vec<ParseError>) {
1387    if let PartUsageBody::Brace { elements } = body {
1388        for element in elements {
1389            match &element.value {
1390                PartUsageBodyElement::Error(n) => {
1391                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1392                }
1393                PartUsageBodyElement::PartUsage(n) => {
1394                    collect_part_usage_body_errors(&n.value.body, errors)
1395                }
1396                PartUsageBodyElement::Perform(n) => {
1397                    collect_perform_body_errors(&n.value.body, errors)
1398                }
1399                PartUsageBodyElement::StateUsage(n) => {
1400                    collect_state_body_errors(&n.value.body, errors)
1401                }
1402                _ => {}
1403            }
1404        }
1405    }
1406}
1407
1408fn collect_package_body_errors(body: &PackageBody, errors: &mut Vec<ParseError>) {
1409    if let PackageBody::Brace { elements } = body {
1410        for element in elements {
1411            match &element.value {
1412                PackageBodyElement::Error(n) => {
1413                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1414                }
1415                PackageBodyElement::Package(n) => {
1416                    collect_package_body_errors(&n.value.body, errors)
1417                }
1418                PackageBodyElement::LibraryPackage(n) => {
1419                    collect_package_body_errors(&n.value.body, errors)
1420                }
1421                PackageBodyElement::PartDef(n) => {
1422                    collect_part_def_body_errors(&n.value.body, errors)
1423                }
1424                PackageBodyElement::PartUsage(n) => {
1425                    collect_part_usage_body_errors(&n.value.body, errors)
1426                }
1427                PackageBodyElement::ActionDef(n) => {
1428                    collect_action_def_body_errors(&n.value.body, errors)
1429                }
1430                PackageBodyElement::ActionUsage(n) => {
1431                    collect_action_usage_body_errors(&n.value.body, errors)
1432                }
1433                PackageBodyElement::RequirementDef(n) => {
1434                    collect_requirement_body_errors(&n.value.body, errors)
1435                }
1436                PackageBodyElement::RequirementUsage(n) => {
1437                    collect_requirement_body_errors(&n.value.body, errors)
1438                }
1439                PackageBodyElement::UseCaseDef(n) => {
1440                    collect_use_case_body_errors(&n.value.body, errors)
1441                }
1442                PackageBodyElement::UseCaseUsage(n) => {
1443                    collect_use_case_body_errors(&n.value.body, errors)
1444                }
1445                PackageBodyElement::ConcernUsage(n) => {
1446                    collect_requirement_body_errors(&n.value.body, errors)
1447                }
1448                PackageBodyElement::StateDef(n) => collect_state_body_errors(&n.value.body, errors),
1449                PackageBodyElement::StateUsage(n) => {
1450                    collect_state_body_errors(&n.value.body, errors)
1451                }
1452                PackageBodyElement::ConstraintDef(n) => {
1453                    collect_constraint_body_errors(&n.value.body, errors)
1454                }
1455                PackageBodyElement::CalcDef(n) => collect_calc_body_errors(&n.value.body, errors),
1456                PackageBodyElement::ViewDef(n) => {
1457                    collect_view_def_body_errors(&n.value.body, errors)
1458                }
1459                PackageBodyElement::ViewUsage(n) => collect_view_body_errors(&n.value.body, errors),
1460                _ => {}
1461            }
1462        }
1463    }
1464}
1465
1466fn collect_recovery_errors(root: &RootNamespace) -> Vec<ParseError> {
1467    let mut errors = Vec::new();
1468    for element in &root.elements {
1469        match &element.value {
1470            crate::ast::RootElement::Package(n) => {
1471                collect_package_body_errors(&n.value.body, &mut errors)
1472            }
1473            crate::ast::RootElement::LibraryPackage(n) => {
1474                collect_package_body_errors(&n.value.body, &mut errors)
1475            }
1476            crate::ast::RootElement::Namespace(n) => {
1477                collect_package_body_errors(&n.value.body, &mut errors)
1478            }
1479            crate::ast::RootElement::Import(_) => {}
1480        }
1481    }
1482    errors
1483}
1484
1485/// Parse full input; must consume entire input. Strips UTF-8 BOM if present.
1486#[allow(clippy::result_large_err)]
1487pub fn parse_root(input: &str) -> Result<RootNamespace, ParseError> {
1488    let bytes = input
1489        .strip_prefix('\u{FEFF}')
1490        .map(str::as_bytes)
1491        .unwrap_or_else(|| input.as_bytes());
1492    let located = LocatedSpan::new(bytes);
1493    match package::root_namespace(located) {
1494        Ok((rest, root)) => {
1495            if !rest.fragment().is_empty() && has_unclosed_brace(bytes) {
1496                return Err(missing_closing_brace_error_at_eof(bytes));
1497            }
1498            if rest.fragment().is_empty() || is_only_trailing_closing_braces(rest) {
1499                log::debug!("parse_root: success, {} top-level elements", root.elements.len());
1500                Ok(root)
1501            } else {
1502                let offset = located.location_offset() + located.fragment().len() - rest.fragment().len();
1503                let unconsumed = rest.fragment();
1504                let first_80 = unconsumed.get(..80.min(unconsumed.len())).unwrap_or(unconsumed);
1505                log::debug!(
1506                    "parse_root: expected end of input; parsed {} elements; unconsumed len={}, offset={}, first 80 bytes: {:?}",
1507                    root.elements.len(),
1508                    unconsumed.len(),
1509                    offset,
1510                    first_80,
1511                );
1512                log::debug!(
1513                    "parse_root: unconsumed as str: {:?}",
1514                    String::from_utf8_lossy(first_80),
1515                );
1516                let (found_snippet, found_len) = fragment_to_found_snippet(rest.fragment());
1517                let mut pe = ParseError::new("expected end of input")
1518                    .with_location(offset, rest.location_line(), rest.get_column())
1519                    .with_length(found_len.max(1))
1520                    .with_code("expected_end_of_input")
1521                    .with_category(DiagnosticCategory::ParseError);
1522                if !found_snippet.is_empty() {
1523                    pe = pe.with_found(found_snippet);
1524                }
1525                if root.elements.is_empty() && is_illegal_top_level_definition(rest.fragment()) {
1526                    pe = pe
1527                        .with_code("illegal_top_level_definition")
1528                        .with_expected("'package', 'namespace', or 'import'")
1529                        .with_suggestion(
1530                            "Wrap this declaration in `package ... { ... }` or `namespace ... { ... }`.",
1531                        );
1532                    pe.message = "illegal top-level definition".to_string();
1533                }
1534                Err(pe)
1535            }
1536        }
1537        Err(nom::Err::Error(e)) => Err(missing_closing_brace_error(bytes, e.input).unwrap_or_else(|| {
1538            nom_err_to_parse_error(
1539                &e,
1540                None,
1541                Some("'package', 'namespace', or 'import' at top level; or valid element in package body"),
1542            )
1543        })),
1544        Err(nom::Err::Failure(e)) => Err(missing_closing_brace_error(bytes, e.input).unwrap_or_else(|| {
1545            nom_err_to_parse_error(
1546                &e,
1547                None,
1548                Some("'package', 'namespace', or 'import' at top level; or valid element in package body"),
1549            )
1550        })),
1551        Err(nom::Err::Incomplete(_)) => Err(
1552            ParseError::new("unexpected end of input")
1553                .with_code("unexpected_eof")
1554                .with_category(DiagnosticCategory::ParseError),
1555        ),
1556    }
1557}
1558
1559const MAX_RECOVERY_ERRORS: usize = 100;
1560
1561/// Parse input with error recovery: collects multiple diagnostics and returns a partial AST when errors occur.
1562/// Use this for language servers so the user sees all parse errors and features (e.g. hover) can use the partial AST.
1563pub fn parse_with_diagnostics(input: &str) -> ParseResult {
1564    let bytes = input
1565        .strip_prefix('\u{FEFF}')
1566        .map(str::as_bytes)
1567        .unwrap_or_else(|| input.as_bytes());
1568    let located = LocatedSpan::new(bytes);
1569
1570    let mut elements = Vec::new();
1571    let mut errors = Vec::new();
1572
1573    let (mut input, _) = match lex::ws_and_comments(located) {
1574        Ok(x) => x,
1575        Err(_) => {
1576            return ParseResult {
1577                root: RootNamespace { elements: vec![] },
1578                errors: vec![ParseError::new("invalid input")
1579                    .with_code("invalid_input")
1580                    .with_category(DiagnosticCategory::ParseError)],
1581            };
1582        }
1583    };
1584
1585    while errors.len() < MAX_RECOVERY_ERRORS {
1586        // Skip leading ws/comments; if nothing left, we're done (avoids parsing "" as root_element).
1587        let (rest, _) = lex::ws_and_comments(input).unwrap_or((input, ()));
1588        input = rest;
1589        if input.fragment().is_empty() {
1590            break;
1591        }
1592        match package::root_element(input) {
1593            Ok((rest, elem)) => {
1594                elements.push(elem);
1595                input = rest;
1596            }
1597            Err(nom::Err::Error(e)) | Err(nom::Err::Failure(e)) => {
1598                let (trimmed, _) = lex::ws_and_comments(input).unwrap_or((input, ()));
1599                if trim_ascii_start(trimmed.fragment()).starts_with(b"}") {
1600                    errors.push(unexpected_closing_brace_parse_error(trimmed));
1601                    let skip_result = lex::skip_to_next_sync_point(trimmed);
1602                    match skip_result {
1603                        Ok((rest, _)) => input = rest,
1604                        Err(_) => break,
1605                    }
1606                    continue;
1607                }
1608                if errors.is_empty()
1609                    && has_unclosed_brace(bytes)
1610                    && (lex::starts_with_keyword(trimmed.fragment(), b"package")
1611                        || lex::starts_with_keyword(trimmed.fragment(), b"namespace")
1612                        || lex::starts_with_keyword(trimmed.fragment(), b"library")
1613                        || lex::starts_with_keyword(trimmed.fragment(), b"standard"))
1614                {
1615                    errors.push(missing_closing_brace_error_at_eof(bytes));
1616                    break;
1617                }
1618                let pe = missing_closing_brace_error(bytes, e.input).unwrap_or_else(|| {
1619                    nom_err_to_parse_error(&e, None, Some("'package', 'namespace', or 'import'"))
1620                });
1621                errors.push(pe);
1622                let skip_result = lex::skip_to_next_sync_point(e.input);
1623                match skip_result {
1624                    Ok((rest, _)) => input = rest,
1625                    Err(_) => break,
1626                }
1627            }
1628            Err(nom::Err::Incomplete(_)) => {
1629                errors.push(
1630                    ParseError::new("unexpected end of input")
1631                        .with_location(
1632                            input.location_offset(),
1633                            input.location_line(),
1634                            input.get_column(),
1635                        )
1636                        .with_length(1)
1637                        .with_code("unexpected_eof")
1638                        .with_category(DiagnosticCategory::ParseError),
1639                );
1640                break;
1641            }
1642        }
1643    }
1644
1645    let (input, _) = lex::ws_and_comments(input).unwrap_or((input, ()));
1646
1647    if input.fragment().is_empty()
1648        && has_unclosed_brace(bytes)
1649        && !errors
1650            .iter()
1651            .any(|e| e.code.as_deref() == Some("missing_closing_brace"))
1652    {
1653        errors.push(missing_closing_brace_error_at_eof(bytes));
1654    }
1655
1656    if !input.fragment().is_empty()
1657        && !errors
1658            .iter()
1659            .any(|e| e.code.as_deref() == Some("missing_closing_brace"))
1660    {
1661        if trim_ascii_start(input.fragment()).starts_with(b"}") {
1662            errors.push(unexpected_closing_brace_parse_error(input));
1663        } else {
1664            let (found_snippet, found_len) = fragment_to_found_snippet(input.fragment());
1665            let mut pe = ParseError::new("expected end of input")
1666                .with_location(
1667                    input.location_offset(),
1668                    input.location_line(),
1669                    input.get_column(),
1670                )
1671                .with_length(found_len.max(1))
1672                .with_code("expected_end_of_input")
1673                .with_severity(DiagnosticSeverity::Error)
1674                .with_category(DiagnosticCategory::ParseError);
1675            if !found_snippet.is_empty() {
1676                pe = pe.with_found(found_snippet);
1677            }
1678            errors.push(pe);
1679        }
1680    }
1681
1682    errors.extend(collect_recovery_errors(&RootNamespace {
1683        elements: elements.clone(),
1684    }));
1685    errors = dedup_errors(errors);
1686
1687    ParseResult {
1688        root: RootNamespace { elements },
1689        errors,
1690    }
1691}