Skip to main content

sysml_v2_parser/parser/
mod.rs

1//! Nom-based parser for SysML v2 textual notation.
2//!
3//! Organized into modules:
4//! - [lex]: whitespace, comments, names, qualified names, skip helpers
5//! - [attribute]: attribute definition and usage
6//! - [import]: import and relationship body
7//! - [part]: part definition and part usage
8//! - [package]: package and root namespace
9
10mod action;
11mod alias;
12mod allocation;
13mod attribute;
14mod case;
15mod connection;
16mod constraint;
17mod dependency;
18mod enumeration;
19mod expr;
20mod flow;
21mod import;
22mod individual;
23mod interface;
24mod item;
25mod lex;
26mod metadata;
27mod metadata_annotation;
28mod occurrence;
29mod package;
30mod part;
31mod port;
32mod requirement;
33mod span;
34mod state;
35mod usecase;
36mod view;
37
38pub(crate) use span::{node_from_to, span_from_to, with_span, Input};
39
40use crate::ast::{
41    ActionDefBody, ActionDefBodyElement, ActionUsageBody, ActionUsageBodyElement, CalcDefBody,
42    CalcDefBodyElement, ConstraintDefBody, ConstraintDefBodyElement, PackageBody,
43    PackageBodyElement, ParseErrorNode, PartDefBody, PartDefBodyElement, PartUsageBody,
44    PartUsageBodyElement, RequirementDefBody, RequirementDefBodyElement, RootNamespace,
45    StateDefBody, StateDefBodyElement, UseCaseDefBody, UseCaseDefBodyElement, ViewBody,
46    ViewBodyElement, ViewDefBody, ViewDefBodyElement,
47};
48use crate::error::{DiagnosticSeverity, ParseError};
49use nom::error::Error;
50use nom::Parser;
51use nom_locate::LocatedSpan;
52
53/// Result of parsing with error recovery: a (possibly partial) AST and zero or more diagnostics.
54#[derive(Debug, Clone)]
55pub struct ParseResult {
56    /// Root namespace; contains all successfully parsed top-level elements (partial when errors occurred).
57    pub root: RootNamespace,
58    /// All parse errors encountered (multiple when recovery is used).
59    pub errors: Vec<ParseError>,
60}
61
62impl ParseResult {
63    /// True if the document parsed fully with no errors.
64    pub fn is_ok(&self) -> bool {
65        self.errors.is_empty()
66    }
67}
68
69const FOUND_SNIPPET_MAX_LEN: usize = 40;
70const ILLEGAL_TOP_LEVEL_STARTERS: &[&[u8]] = &[
71    b"action",
72    b"actor",
73    b"alias",
74    b"allocate",
75    b"allocation",
76    b"attribute",
77    b"bind",
78    b"calc",
79    b"case",
80    b"concern",
81    b"connection",
82    b"constraint",
83    b"dependency",
84    b"enum",
85    b"flow",
86    b"interface",
87    b"item",
88    b"metadata",
89    b"occurrence",
90    b"part",
91    b"perform",
92    b"port",
93    b"ref",
94    b"require",
95    b"requirement",
96    b"satisfy",
97    b"state",
98    b"use",
99    b"verification",
100    b"view",
101    b"viewpoint",
102];
103
104/// Take a short snippet from the input at the error position for "found" display.
105/// Uses first line or first FOUND_SNIPPET_MAX_LEN bytes, UTF-8 with replacement char.
106fn fragment_to_found_snippet(fragment: &[u8]) -> (String, usize) {
107    let take = fragment
108        .iter()
109        .position(|&b| b == b'\n' || b == b'\r')
110        .map(|p| p.min(FOUND_SNIPPET_MAX_LEN))
111        .unwrap_or_else(|| fragment.len().min(FOUND_SNIPPET_MAX_LEN));
112    let slice = fragment.get(..take).unwrap_or(fragment);
113    let s = String::from_utf8_lossy(slice)
114        .replace('\n', "\\n")
115        .replace('\r', "\\r");
116    let len = slice.len();
117    (s.trim_end().to_string(), len)
118}
119
120pub(crate) fn recovery_found_snippet(input: Input<'_>) -> Option<String> {
121    let frag = input.fragment();
122    let take = frag
123        .iter()
124        .position(|&b| b == b'\n' || b == b'\r')
125        .unwrap_or(frag.len())
126        .min(60);
127    let snippet = String::from_utf8_lossy(&frag[..take]).trim().to_string();
128    if snippet.is_empty() {
129        None
130    } else {
131        Some(snippet)
132    }
133}
134
135fn recovery_found_snippet_from_span(input: Input<'_>, recovery_end: Input<'_>) -> Option<String> {
136    let consumed_len = recovery_end
137        .location_offset()
138        .saturating_sub(input.location_offset())
139        .min(input.fragment().len());
140    if consumed_len == 0 {
141        return recovery_found_snippet(input);
142    }
143    let frag = &input.fragment()[..consumed_len];
144    let take = frag
145        .iter()
146        .position(|&b| b == b'\n' || b == b'\r')
147        .unwrap_or(frag.len())
148        .min(60);
149    let snippet = String::from_utf8_lossy(&frag[..take]).trim().to_string();
150    if snippet.is_empty() {
151        recovery_found_snippet(input)
152    } else {
153        Some(snippet)
154    }
155}
156
157/// Map nom error kind to a human-readable message for language server diagnostics.
158fn nom_error_kind_to_message(code: &nom::error::ErrorKind) -> &'static str {
159    use nom::error::ErrorKind;
160    match code {
161        ErrorKind::Tag => "expected keyword or token",
162        ErrorKind::Digit => "expected number",
163        ErrorKind::Alpha => "expected identifier",
164        ErrorKind::AlphaNumeric => "expected identifier",
165        ErrorKind::Space => "expected whitespace",
166        ErrorKind::MultiSpace => "expected whitespace",
167        ErrorKind::Eof => "unexpected end of input",
168        ErrorKind::TakeUntil => "expected terminator",
169        ErrorKind::TakeWhile1 => "expected token",
170        ErrorKind::Alt => {
171            "expected package, import, part, port, interface, alias, attribute, or action"
172        }
173        ErrorKind::Many0 | ErrorKind::Many1 => "expected list of elements",
174        _ => "parse error",
175    }
176}
177
178/// Map nom error kind to a specific code for LSP/quick fixes.
179fn nom_error_kind_to_code(code: &nom::error::ErrorKind) -> &'static str {
180    use nom::error::ErrorKind;
181    match code {
182        ErrorKind::Tag => "expected_keyword",
183        ErrorKind::Digit => "expected_number",
184        ErrorKind::Alpha | ErrorKind::AlphaNumeric => "expected_identifier",
185        ErrorKind::Space | ErrorKind::MultiSpace => "expected_whitespace",
186        ErrorKind::Eof => "unexpected_eof",
187        ErrorKind::TakeUntil => "expected_terminator",
188        ErrorKind::TakeWhile1 => "expected_token",
189        ErrorKind::Alt => "expected_alt",
190        ErrorKind::Many0 | ErrorKind::Many1 => "expected_list",
191        _ => "parse_error",
192    }
193}
194
195fn nom_err_to_parse_error(
196    e: &Error<Input<'_>>,
197    length_override: Option<usize>,
198    expected_context: Option<&'static str>,
199) -> ParseError {
200    let offset = e.input.location_offset();
201    let line = e.input.location_line();
202    let column = e.input.get_column();
203    let fragment = e.input.fragment();
204    let (found_snippet, found_len) = fragment_to_found_snippet(fragment);
205    let message = nom_error_kind_to_message(&e.code).to_string();
206    let span_len = length_override.unwrap_or(found_len).max(1);
207    if trim_ascii_start(fragment).starts_with(b"}") {
208        return unexpected_closing_brace_parse_error(e.input);
209    }
210    let mut pe = ParseError::new(message)
211        .with_location(offset, line, column)
212        .with_length(span_len)
213        .with_code(nom_error_kind_to_code(&e.code))
214        .with_severity(DiagnosticSeverity::Error);
215    if !found_snippet.is_empty() {
216        pe = pe.with_found(found_snippet);
217    }
218    if let Some(ctx) = expected_context {
219        pe = pe.with_expected(ctx);
220    }
221    let at_root = expected_context.is_some_and(|ctx| {
222        ctx.contains("'package', 'namespace', or 'import'") || ctx.contains("top level")
223    });
224    if at_root && is_illegal_top_level_definition(fragment) {
225        pe.message = "illegal top-level definition".to_string();
226        pe.code = Some("illegal_top_level_definition".to_string());
227        pe.expected = Some("'package', 'namespace', or 'import'".to_string());
228        pe.suggestion = Some(
229            "Wrap this declaration in `package ... { ... }` or `namespace ... { ... }`."
230                .to_string(),
231        );
232    }
233    pe
234}
235
236fn is_illegal_top_level_definition(fragment: &[u8]) -> bool {
237    let trimmed = trim_ascii_start(fragment);
238    !trimmed.starts_with(b"}")
239        && !trimmed.starts_with(b"//")
240        && !trimmed.starts_with(b"/*")
241        && lex::starts_with_any_keyword(trimmed, ILLEGAL_TOP_LEVEL_STARTERS)
242}
243
244fn trim_ascii_start(mut fragment: &[u8]) -> &[u8] {
245    while let Some(first) = fragment.first() {
246        if first.is_ascii_whitespace() {
247            fragment = &fragment[1..];
248            continue;
249        }
250        break;
251    }
252    fragment
253}
254
255fn starts_with_missing_name_after_keyword(
256    fragment: &[u8],
257    keyword: &[u8],
258    trailing_keywords: &[&[u8]],
259) -> bool {
260    let mut fragment = trim_ascii_start(fragment);
261    if !lex::starts_with_keyword(fragment, keyword) {
262        return false;
263    }
264    fragment = &fragment[keyword.len()..];
265    while let Some(first) = fragment.first() {
266        if first.is_ascii_whitespace() {
267            fragment = &fragment[1..];
268            continue;
269        }
270        break;
271    }
272    for trailing in trailing_keywords {
273        if lex::starts_with_keyword(fragment, trailing) {
274            fragment = &fragment[trailing.len()..];
275            while let Some(first) = fragment.first() {
276                if first.is_ascii_whitespace() {
277                    fragment = &fragment[1..];
278                    continue;
279                }
280                break;
281            }
282        }
283    }
284    fragment.starts_with(b":")
285}
286
287fn starts_with_missing_type_after_keyword(
288    fragment: &[u8],
289    keyword: &[u8],
290    trailing_keywords: &[&[u8]],
291) -> bool {
292    let mut fragment = trim_ascii_start(fragment);
293    if !lex::starts_with_keyword(fragment, keyword) {
294        return false;
295    }
296    fragment = &fragment[keyword.len()..];
297    while let Some(first) = fragment.first() {
298        if first.is_ascii_whitespace() {
299            fragment = &fragment[1..];
300            continue;
301        }
302        break;
303    }
304    for trailing in trailing_keywords {
305        if lex::starts_with_keyword(fragment, trailing) {
306            fragment = &fragment[trailing.len()..];
307            while let Some(first) = fragment.first() {
308                if first.is_ascii_whitespace() {
309                    fragment = &fragment[1..];
310                    continue;
311                }
312                break;
313            }
314        }
315    }
316
317    let mut name_len = 0usize;
318    while name_len < fragment.len()
319        && (fragment[name_len].is_ascii_alphanumeric() || fragment[name_len] == b'_')
320    {
321        name_len += 1;
322    }
323    if name_len == 0 {
324        return false;
325    }
326    fragment = &fragment[name_len..];
327    while let Some(first) = fragment.first() {
328        if first.is_ascii_whitespace() {
329            fragment = &fragment[1..];
330            continue;
331        }
332        break;
333    }
334    if !fragment.starts_with(b":") {
335        return false;
336    }
337    fragment = &fragment[1..];
338    while let Some(first) = fragment.first() {
339        if first.is_ascii_whitespace() {
340            fragment = &fragment[1..];
341            continue;
342        }
343        break;
344    }
345
346    fragment.is_empty()
347        || fragment.starts_with(b";")
348        || fragment.starts_with(b"{")
349        || fragment.starts_with(b"}")
350        || lex::starts_with_keyword(fragment, b"then")
351        || lex::starts_with_keyword(fragment, b"if")
352        || lex::starts_with_keyword(fragment, b"do")
353}
354
355fn missing_name_diagnostic(fragment: &[u8]) -> Option<(&'static str, String, String, String)> {
356    #[allow(clippy::type_complexity)]
357    let cases: &[(&[u8], &[&[u8]], &str, &str)] = &[
358        (
359            b"subject",
360            &[],
361            "subject name",
362            "Use `subject laptop: Laptop;`.",
363        ),
364        (b"actor", &[], "actor name", "Use `actor user: User;`."),
365        (b"state", &[], "state name", "Use `state ready: Mode;`."),
366        (b"part", &[], "part name", "Use `part wheel: Wheel;`."),
367        (b"ref", &[], "reference name", "Use `ref sensor: Sensor;`."),
368        (b"port", &[], "port name", "Use `port power: PowerPort;`."),
369        (
370            b"attribute",
371            &[],
372            "attribute name",
373            "Use `attribute mass: MassValue;`.",
374        ),
375        (b"in", &[], "input name", "Use `in speed: Real;`."),
376        (b"out", &[], "output name", "Use `out result: Real;`."),
377        (
378            b"perform",
379            &[b"action"],
380            "action name",
381            "Use `perform action run: Runner;`.",
382        ),
383        (b"return", &[], "return name", "Use `return result: Real;`."),
384    ];
385
386    for (keyword, trailing, missing_what, suggestion) in cases {
387        if starts_with_missing_name_after_keyword(fragment, keyword, trailing) {
388            return Some((
389                "missing_member_name",
390                format!("expected {missing_what} before ':'"),
391                format!("{missing_what} before ':'"),
392                suggestion.to_string(),
393            ));
394        }
395    }
396    None
397}
398
399fn missing_type_diagnostic(fragment: &[u8]) -> Option<(&'static str, String, String, String)> {
400    #[allow(clippy::type_complexity)]
401    let cases: &[(&[u8], &[&[u8]], &str)] = &[
402        (b"subject", &[], "subject type"),
403        (b"actor", &[], "actor type"),
404        (b"state", &[], "state type"),
405        (b"part", &[], "part type"),
406        (b"ref", &[], "reference type"),
407        (b"port", &[], "port type"),
408        (b"attribute", &[], "attribute type"),
409        (b"in", &[], "input type"),
410        (b"out", &[], "output type"),
411        (b"perform", &[b"action"], "action type"),
412        (b"return", &[], "return type"),
413    ];
414
415    for &(keyword, trailing, missing_what) in cases {
416        if starts_with_missing_type_after_keyword(fragment, keyword, trailing) {
417            let keyword_label = String::from_utf8_lossy(keyword);
418            let sample_name = if keyword == &b"subject"[..] {
419                "laptop"
420            } else if keyword == &b"actor"[..] {
421                "user"
422            } else if keyword == &b"state"[..] {
423                "ready"
424            } else if keyword == &b"part"[..] {
425                "wheel"
426            } else if keyword == &b"ref"[..] {
427                "sensor"
428            } else if keyword == &b"port"[..] {
429                "power"
430            } else if keyword == &b"attribute"[..] {
431                "mass"
432            } else if keyword == &b"in"[..] {
433                "speed"
434            } else if keyword == &b"out"[..] {
435                "result"
436            } else if keyword == &b"perform"[..] {
437                "run"
438            } else if keyword == &b"return"[..] {
439                "result"
440            } else {
441                "member"
442            };
443            let sample_type = if keyword == &b"subject"[..] {
444                "Laptop"
445            } else if keyword == &b"actor"[..] {
446                "User"
447            } else if keyword == &b"state"[..] {
448                "Mode"
449            } else if keyword == &b"part"[..] {
450                "Wheel"
451            } else if keyword == &b"ref"[..] {
452                "Sensor"
453            } else if keyword == &b"port"[..] {
454                "PowerPort"
455            } else if keyword == &b"attribute"[..] {
456                "MassValue"
457            } else if keyword == &b"in"[..] || keyword == &b"out"[..] {
458                "Real"
459            } else if keyword == &b"perform"[..] {
460                "Runner"
461            } else if keyword == &b"return"[..] {
462                "Real"
463            } else {
464                "Type"
465            };
466            let suggestion = if keyword == &b"perform"[..] {
467                format!("Use `perform action {sample_name}: {sample_type};`.")
468            } else if keyword == &b"return"[..] {
469                format!("Use `return {sample_name}: {sample_type};`.")
470            } else {
471                format!("Use `{keyword_label} {sample_name}: {sample_type};`.")
472            };
473            return Some((
474                "missing_type_reference",
475                format!("expected {missing_what} after ':'"),
476                format!("{missing_what} after ':'"),
477                suggestion,
478            ));
479        }
480    }
481    None
482}
483
484fn invalid_expose_separator_diagnostic(
485    fragment: &[u8],
486) -> Option<(&'static str, String, String, String)> {
487    let mut fragment = trim_ascii_start(fragment);
488    if !lex::starts_with_keyword(fragment, b"expose") {
489        return None;
490    }
491    fragment = &fragment[b"expose".len()..];
492    while let Some(first) = fragment.first() {
493        if first.is_ascii_whitespace() {
494            fragment = &fragment[1..];
495            continue;
496        }
497        break;
498    }
499    if fragment.is_empty() {
500        return None;
501    }
502
503    let mut saw_dot = false;
504    let mut in_quoted_name = false;
505    for &b in fragment {
506        if b == b'\'' {
507            in_quoted_name = !in_quoted_name;
508            continue;
509        }
510        if in_quoted_name {
511            continue;
512        }
513        if matches!(b, b';' | b'[' | b'{' | b'}' | b'\n' | b'\r') {
514            break;
515        }
516        if b == b'.' {
517            saw_dot = true;
518            break;
519        }
520    }
521    if !saw_dot {
522        return None;
523    }
524
525    Some((
526        "invalid_qualified_name_separator",
527        "invalid qualified name in expose target: use '::' instead of '.'".to_string(),
528        "qualified name segments separated by '::'".to_string(),
529        "Replace '.' with '::' in the expose target (example: `expose A::B;`).".to_string(),
530    ))
531}
532
533fn missing_semicolon_or_body_diagnostic(
534    fragment: &[u8],
535) -> Option<(&'static str, String, String, String)> {
536    let fragment = trim_ascii_start(fragment);
537    let cases: &[(&[u8], &str, &str)] = &[
538        (
539            b"action def",
540            "action definition",
541            "Use `action def Run;` or `action def Run { ... }`.",
542        ),
543        (
544            b"part def",
545            "part definition",
546            "Use `part def Wheel;` or `part def Wheel { ... }`.",
547        ),
548        (
549            b"requirement def",
550            "requirement definition",
551            "Use `requirement def R;` or `requirement def R { ... }`.",
552        ),
553        (
554            b"state def",
555            "state definition",
556            "Use `state def Ready;` or `state def Ready { ... }`.",
557        ),
558        (
559            b"view",
560            "view declaration",
561            "Use `view structure: GeneralView;` or `view structure: GeneralView { ... }`.",
562        ),
563        (
564            b"rendering def",
565            "rendering definition",
566            "Use `rendering def Diagram;` or `rendering def Diagram { ... }`.",
567        ),
568    ];
569
570    for (prefix, label, suggestion) in cases {
571        if fragment.starts_with(prefix) {
572            return Some((
573                "missing_body_or_semicolon",
574                format!("expected ';' or '{{' after {label} header"),
575                "';' or '{' after declaration header".to_string(),
576                suggestion.to_string(),
577            ));
578        }
579    }
580    None
581}
582
583fn invalid_typing_operator_diagnostic(
584    fragment: &[u8],
585) -> Option<(&'static str, String, String, String)> {
586    let fragment = trim_ascii_start(fragment);
587    let cases: &[(&[u8], &str, &str)] = &[
588        (
589            b"part def",
590            "part definition specialization",
591            "Use `part def Vehicle :> BaseVehicle;` when specializing a definition.",
592        ),
593        (
594            b"port def",
595            "port definition specialization",
596            "Use `port def PowerPort :> BasePort;` when specializing a definition.",
597        ),
598    ];
599
600    for (prefix, label, suggestion) in cases {
601        if fragment.starts_with(prefix) && fragment.windows(3).any(|w| w == b": ") {
602            return Some((
603                "invalid_typing_operator",
604                format!("invalid typing operator in {label}: use ':>' instead of ':'"),
605                "':>' specialization operator".to_string(),
606                suggestion.to_string(),
607            ));
608        }
609    }
610
611    if fragment.starts_with(b"part def")
612        && fragment.contains(&b':')
613        && !fragment.windows(2).any(|w| w == b":>")
614    {
615        return Some((
616            "invalid_typing_operator",
617            "invalid typing operator in part definition: use ':>' instead of ':'".to_string(),
618            "':>' specialization operator".to_string(),
619            "Use `part def Vehicle :> BaseVehicle;` when specializing a definition.".to_string(),
620        ));
621    }
622
623    None
624}
625
626fn missing_expression_after_operator_diagnostic(
627    fragment: &[u8],
628) -> Option<(&'static str, String, String, String)> {
629    let fragment = trim_ascii_start(fragment);
630    let cases: &[(&[u8], &str, &str)] = &[
631        (
632            b"bind",
633            "binding expression after '='",
634            "Use `bind x = y;`.",
635        ),
636        (
637            b"assign",
638            "assignment expression after ':='",
639            "Use `assign x := y;`.",
640        ),
641        (
642            b"first",
643            "target after 'then'",
644            "Use `first start then finish;`.",
645        ),
646        (
647            b"flow",
648            "target after 'to'",
649            "Use `flow source to target;`.",
650        ),
651        (
652            b"satisfy",
653            "target after 'by'",
654            "Use `satisfy Req by implementation;`.",
655        ),
656    ];
657
658    for (keyword, expected, suggestion) in cases {
659        if !lex::starts_with_keyword(fragment, keyword) {
660            continue;
661        }
662        let text = String::from_utf8_lossy(fragment);
663        if text.contains("= ;") || text.trim_end().ends_with('=') {
664            return Some((
665                "missing_expression_after_operator",
666                "expected expression after '='".to_string(),
667                expected.to_string(),
668                suggestion.to_string(),
669            ));
670        }
671        if text.contains(":= ;") || text.trim_end().ends_with(":=") {
672            return Some((
673                "missing_expression_after_operator",
674                "expected expression after ':='".to_string(),
675                expected.to_string(),
676                suggestion.to_string(),
677            ));
678        }
679        if text.contains(" then ;") || text.trim_end().ends_with(" then") {
680            return Some((
681                "missing_expression_after_operator",
682                "expected target after 'then'".to_string(),
683                expected.to_string(),
684                suggestion.to_string(),
685            ));
686        }
687        if text.contains(" to ;") || text.trim_end().ends_with(" to") {
688            return Some((
689                "missing_expression_after_operator",
690                "expected target after 'to'".to_string(),
691                expected.to_string(),
692                suggestion.to_string(),
693            ));
694        }
695        if text.contains(" by ;") || text.trim_end().ends_with(" by") {
696            return Some((
697                "missing_expression_after_operator",
698                "expected target after 'by'".to_string(),
699                expected.to_string(),
700                suggestion.to_string(),
701            ));
702        }
703    }
704    None
705}
706
707fn unexpected_keyword_in_scope_diagnostic(
708    fragment: &[u8],
709    starters: &[&[u8]],
710    scope_label: &str,
711) -> Option<(&'static str, String, String, String)> {
712    let fragment = trim_ascii_start(fragment);
713    if fragment.is_empty() || fragment.starts_with(b"#") || fragment.starts_with(b"@") {
714        return None;
715    }
716    let keyword_end = fragment
717        .iter()
718        .position(|b| !b.is_ascii_alphanumeric() && *b != b'_')
719        .unwrap_or(fragment.len());
720    if keyword_end == 0 {
721        return None;
722    }
723    let keyword = &fragment[..keyword_end];
724    if lex::starts_with_any_keyword(keyword, starters) {
725        return None;
726    }
727    let keyword_text = String::from_utf8_lossy(keyword);
728    Some((
729        "unexpected_keyword_in_scope",
730        format!("unexpected keyword `{keyword_text}` in {scope_label}"),
731        format!("valid {scope_label} element"),
732        format!("Replace `{keyword_text}` with a valid {scope_label} member or remove it."),
733    ))
734}
735
736fn unexpected_closing_brace_parse_error(input: Input<'_>) -> ParseError {
737    ParseError::new("unexpected closing '}'")
738        .with_location(
739            input.location_offset(),
740            input.location_line(),
741            input.get_column(),
742        )
743        .with_length(1)
744        .with_code("unexpected_closing_brace")
745        .with_expected("valid declaration or end of current body")
746        .with_found("}")
747        .with_suggestion("Remove this '}' or add the missing opening '{' before it.")
748        .with_severity(DiagnosticSeverity::Error)
749}
750
751fn missing_closing_brace_error(bytes: &[u8], input: Input<'_>) -> Option<ParseError> {
752    if !input.fragment().is_empty() {
753        return None;
754    }
755    let consumed = &bytes[..input.location_offset().min(bytes.len())];
756    let opens = consumed.iter().filter(|&&b| b == b'{').count();
757    let closes = consumed.iter().filter(|&&b| b == b'}').count();
758    if opens <= closes {
759        return None;
760    }
761    Some(missing_closing_brace_error_at_eof(consumed))
762}
763
764fn missing_closing_brace_error_at_eof(bytes: &[u8]) -> ParseError {
765    let (line, column) = eof_line_column(bytes);
766    ParseError::new("missing closing '}'")
767        .with_location(bytes.len(), line, column)
768        .with_length(1)
769        .with_code("missing_closing_brace")
770        .with_expected("'}'")
771        .with_suggestion("Add '}' to close the open body.")
772}
773
774fn has_unclosed_brace(bytes: &[u8]) -> bool {
775    let opens = bytes.iter().filter(|&&b| b == b'{').count();
776    let closes = bytes.iter().filter(|&&b| b == b'}').count();
777    opens > closes
778}
779
780fn eof_line_column(bytes: &[u8]) -> (u32, usize) {
781    let mut line = 1u32;
782    let mut column = 1usize;
783    for &b in bytes {
784        if b == b'\n' {
785            line += 1;
786            column = 1;
787        } else {
788            column += 1;
789        }
790    }
791    (line, column)
792}
793
794pub(crate) fn build_recovery_error_node(
795    input: Input<'_>,
796    starters: &[&[u8]],
797    scope_label: &str,
798    generic_code: &str,
799) -> ParseErrorNode {
800    build_recovery_error_node_from_span(input, input, starters, scope_label, generic_code)
801}
802
803enum RecoveryClassification {
804    MissingMemberName {
805        code: String,
806        message: String,
807        expected: String,
808        suggestion: String,
809    },
810    MissingTypeReference {
811        code: String,
812        message: String,
813        expected: String,
814        suggestion: String,
815    },
816    InvalidQualifiedNameSeparator {
817        code: String,
818        message: String,
819        expected: String,
820        suggestion: String,
821    },
822    MissingBodyOrSemicolon {
823        code: String,
824        message: String,
825        expected: String,
826        suggestion: String,
827    },
828    MissingExpressionAfterOperator {
829        code: String,
830        message: String,
831        expected: String,
832        suggestion: String,
833    },
834    InvalidTypingOperator {
835        code: String,
836        message: String,
837        expected: String,
838        suggestion: String,
839    },
840    UnexpectedKeywordInScope {
841        code: String,
842        message: String,
843        expected: String,
844        suggestion: String,
845    },
846    MissingSemicolon,
847    UnsupportedAnnotation,
848    Unexpected,
849}
850
851fn trim_ascii_end(mut fragment: &[u8]) -> &[u8] {
852    while let Some(last) = fragment.last() {
853        if last.is_ascii_whitespace() {
854            fragment = &fragment[..fragment.len() - 1];
855        } else {
856            break;
857        }
858    }
859    fragment
860}
861
862fn classify_recovery(
863    input: Input<'_>,
864    recovery_end: Input<'_>,
865    starters: &[&[u8]],
866    scope_label: &str,
867) -> RecoveryClassification {
868    let trimmed = trim_ascii_start(input.fragment());
869
870    if let Some((code, message, expected, suggestion)) = missing_name_diagnostic(trimmed) {
871        return RecoveryClassification::MissingMemberName {
872            code: code.to_string(),
873            message,
874            expected,
875            suggestion,
876        };
877    }
878
879    if let Some((code, message, expected, suggestion)) = missing_type_diagnostic(trimmed) {
880        return RecoveryClassification::MissingTypeReference {
881            code: code.to_string(),
882            message,
883            expected,
884            suggestion,
885        };
886    }
887
888    if let Some((code, message, expected, suggestion)) =
889        invalid_expose_separator_diagnostic(trimmed)
890    {
891        return RecoveryClassification::InvalidQualifiedNameSeparator {
892            code: code.to_string(),
893            message,
894            expected,
895            suggestion,
896        };
897    }
898
899    if let Some((code, message, expected, suggestion)) = invalid_typing_operator_diagnostic(trimmed)
900    {
901        return RecoveryClassification::InvalidTypingOperator {
902            code: code.to_string(),
903            message,
904            expected,
905            suggestion,
906        };
907    }
908
909    if let Some((code, message, expected, suggestion)) =
910        missing_expression_after_operator_diagnostic(trimmed)
911    {
912        return RecoveryClassification::MissingExpressionAfterOperator {
913            code: code.to_string(),
914            message,
915            expected,
916            suggestion,
917        };
918    }
919
920    if let Some((code, message, expected, suggestion)) =
921        missing_semicolon_or_body_diagnostic(trimmed)
922    {
923        return RecoveryClassification::MissingBodyOrSemicolon {
924            code: code.to_string(),
925            message,
926            expected,
927            suggestion,
928        };
929    }
930
931    let consumed_len = recovery_end
932        .location_offset()
933        .saturating_sub(input.location_offset())
934        .min(input.fragment().len());
935    let raw_consumed = &input.fragment()[..consumed_len];
936    let consumed = trim_ascii_end(raw_consumed);
937    let recovered_to_boundary = recovery_end.location_offset() > input.location_offset() && {
938        let (next, _) = lex::ws_and_comments(recovery_end).unwrap_or((recovery_end, ()));
939        next.fragment().is_empty()
940            || next.fragment().starts_with(b"}")
941            || lex::starts_with_any_keyword(next.fragment(), starters)
942    };
943
944    let consumed_has_newline = raw_consumed.contains(&b'\n') || raw_consumed.contains(&b'\r');
945    let first_line_end = consumed
946        .iter()
947        .position(|b| matches!(*b, b'\n' | b'\r'))
948        .unwrap_or(consumed.len());
949    let first_line = trim_ascii_end(&consumed[..first_line_end]);
950    let consumed_has_delimiters = consumed
951        .iter()
952        .any(|b| matches!(*b, b'{' | b'}' | b'(' | b')' | b'[' | b']'));
953    let consumed_ends_incomplete = first_line.last().is_some_and(|b| {
954        matches!(
955            *b,
956            b':' | b'=' | b',' | b'.' | b'+' | b'-' | b'*' | b'/' | b'>' | b'<' | b'|'
957        )
958    });
959    let first_line_has_semicolon = first_line.contains(&b';');
960    if recovered_to_boundary
961        && lex::starts_with_any_keyword(trimmed, starters)
962        && (consumed_has_newline || recovery_end.fragment().starts_with(b"}"))
963        && !consumed.is_empty()
964        && !consumed_has_delimiters
965        && !consumed_ends_incomplete
966        && !first_line_has_semicolon
967    {
968        return RecoveryClassification::MissingSemicolon;
969    }
970
971    if lex::starts_with_keyword(trimmed, b"#") || lex::starts_with_keyword(trimmed, b"@") {
972        return RecoveryClassification::UnsupportedAnnotation;
973    }
974
975    if let Some((code, message, expected, suggestion)) =
976        unexpected_keyword_in_scope_diagnostic(trimmed, starters, scope_label)
977    {
978        return RecoveryClassification::UnexpectedKeywordInScope {
979            code: code.to_string(),
980            message,
981            expected,
982            suggestion,
983        };
984    }
985
986    RecoveryClassification::Unexpected
987}
988
989pub(crate) fn build_recovery_error_node_from_span(
990    input: Input<'_>,
991    recovery_end: Input<'_>,
992    starters: &[&[u8]],
993    scope_label: &str,
994    generic_code: &str,
995) -> ParseErrorNode {
996    match classify_recovery(input, recovery_end, starters, scope_label) {
997        RecoveryClassification::MissingMemberName {
998            code,
999            message,
1000            expected,
1001            suggestion,
1002        }
1003        | RecoveryClassification::MissingTypeReference {
1004            code,
1005            message,
1006            expected,
1007            suggestion,
1008        }
1009        | RecoveryClassification::InvalidQualifiedNameSeparator {
1010            code,
1011            message,
1012            expected,
1013            suggestion,
1014        }
1015        | RecoveryClassification::MissingBodyOrSemicolon {
1016            code,
1017            message,
1018            expected,
1019            suggestion,
1020        }
1021        | RecoveryClassification::MissingExpressionAfterOperator {
1022            code,
1023            message,
1024            expected,
1025            suggestion,
1026        }
1027        | RecoveryClassification::InvalidTypingOperator {
1028            code,
1029            message,
1030            expected,
1031            suggestion,
1032        }
1033        | RecoveryClassification::UnexpectedKeywordInScope {
1034            code,
1035            message,
1036            expected,
1037            suggestion,
1038        } => ParseErrorNode {
1039            message,
1040            code,
1041            expected: Some(expected),
1042            found: recovery_found_snippet_from_span(input, recovery_end),
1043            suggestion: Some(suggestion),
1044        },
1045        RecoveryClassification::MissingSemicolon => ParseErrorNode {
1046            message: "missing semicolon before next declaration".to_string(),
1047            code: "missing_semicolon".to_string(),
1048            expected: Some("';'".to_string()),
1049            found: recovery_found_snippet_from_span(input, recovery_end),
1050            suggestion: Some("Insert ';' before this declaration.".to_string()),
1051        },
1052        RecoveryClassification::UnsupportedAnnotation => ParseErrorNode {
1053            message: format!("unsupported annotation syntax in {scope_label}"),
1054            code: "unsupported_annotation_syntax".to_string(),
1055            expected: Some(format!("valid {scope_label} element")),
1056            found: recovery_found_snippet_from_span(input, recovery_end),
1057            suggestion: Some(
1058                "Remove this annotation or extend the parser to support annotated declarations."
1059                    .to_string(),
1060            ),
1061        },
1062        RecoveryClassification::Unexpected => ParseErrorNode {
1063            message: format!("unexpected token in {scope_label}"),
1064            code: generic_code.to_string(),
1065            expected: Some(format!("valid {scope_label} element")),
1066            found: recovery_found_snippet_from_span(input, recovery_end),
1067            suggestion: Some(format!("Fix this {scope_label} member and re-run parsing.")),
1068        },
1069    }
1070}
1071
1072fn is_only_trailing_closing_braces(mut input: Input<'_>) -> bool {
1073    loop {
1074        let (next, _) = lex::ws_and_comments(input).unwrap_or((input, ()));
1075        input = next;
1076        if input.fragment().is_empty() {
1077            return true;
1078        }
1079        if input.fragment().starts_with(b"}") {
1080            match nom::bytes::complete::tag::<_, _, nom::error::Error<Input>>(&b"}"[..])
1081                .parse(input)
1082            {
1083                Ok((next, _)) => {
1084                    input = next;
1085                    continue;
1086                }
1087                Err(_) => return false,
1088            }
1089        }
1090        return false;
1091    }
1092}
1093
1094fn parse_error_from_recovery_node(span: &crate::ast::Span, node: &ParseErrorNode) -> ParseError {
1095    let mut err = ParseError::new(node.message.clone())
1096        .with_location(span.offset, span.line, span.column)
1097        .with_length(span.len.max(1))
1098        .with_code(node.code.clone());
1099    let severity = if node.code == "unsupported_annotation_syntax" {
1100        DiagnosticSeverity::Warning
1101    } else {
1102        DiagnosticSeverity::Error
1103    };
1104    err = err.with_severity(severity);
1105    if let Some(expected) = &node.expected {
1106        err = err.with_expected(expected.clone());
1107    }
1108    if let Some(found) = &node.found {
1109        err = err.with_found(found.clone());
1110    }
1111    if let Some(suggestion) = &node.suggestion {
1112        err = err.with_suggestion(suggestion.clone());
1113    }
1114    err
1115}
1116
1117fn diagnostic_specificity(err: &ParseError) -> u8 {
1118    match err.code.as_deref() {
1119        Some("missing_member_name")
1120        | Some("missing_type_reference")
1121        | Some("invalid_qualified_name_separator")
1122        | Some("invalid_typing_operator")
1123        | Some("missing_expression_after_operator")
1124        | Some("missing_body_or_semicolon")
1125        | Some("missing_semicolon")
1126        | Some("unexpected_closing_brace")
1127        | Some("missing_closing_brace")
1128        | Some("unsupported_annotation_syntax")
1129        | Some("unexpected_keyword_in_scope") => 5,
1130        Some("illegal_top_level_definition") => 4,
1131        Some(code) if code.starts_with("recovered_") => 2,
1132        Some("expected_end_of_input") | Some("expected_keyword") => 1,
1133        _ => 3,
1134    }
1135}
1136
1137fn dedup_errors(mut errors: Vec<ParseError>) -> Vec<ParseError> {
1138    errors.sort_by_key(|e| {
1139        (
1140            e.offset.unwrap_or(usize::MAX),
1141            e.line.unwrap_or(u32::MAX),
1142            e.column.unwrap_or(usize::MAX),
1143            std::cmp::Reverse(diagnostic_specificity(e)),
1144        )
1145    });
1146
1147    let mut deduped = Vec::new();
1148    for err in errors {
1149        let duplicate = deduped.iter().any(|existing: &ParseError| {
1150            let same_start = existing.offset == err.offset
1151                && existing.line == err.line
1152                && existing.column == err.column;
1153            let same_found = existing.found == err.found;
1154            let existing_specificity = diagnostic_specificity(existing);
1155            let err_specificity = diagnostic_specificity(&err);
1156            same_start
1157                && (same_found || existing.code == err.code)
1158                && existing_specificity >= err_specificity
1159        });
1160        if !duplicate {
1161            deduped.push(err);
1162        }
1163    }
1164
1165    deduped.sort_by_key(|e| (e.offset.unwrap_or(usize::MAX), e.line.unwrap_or(u32::MAX)));
1166    deduped
1167}
1168
1169fn collect_requirement_body_errors(body: &RequirementDefBody, errors: &mut Vec<ParseError>) {
1170    if let RequirementDefBody::Brace { elements } = body {
1171        for element in elements {
1172            match &element.value {
1173                RequirementDefBodyElement::Error(n) => {
1174                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1175                }
1176                RequirementDefBodyElement::Frame(n) => {
1177                    collect_requirement_body_errors(&n.value.body, errors)
1178                }
1179                _ => {}
1180            }
1181        }
1182    }
1183}
1184
1185fn collect_action_def_body_errors(body: &ActionDefBody, errors: &mut Vec<ParseError>) {
1186    if let ActionDefBody::Brace { elements } = body {
1187        for element in elements {
1188            if let ActionDefBodyElement::Error(n) = &element.value {
1189                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1190            }
1191        }
1192    }
1193}
1194
1195fn collect_action_usage_body_errors(body: &ActionUsageBody, errors: &mut Vec<ParseError>) {
1196    if let ActionUsageBody::Brace { elements } = body {
1197        for element in elements {
1198            match &element.value {
1199                ActionUsageBodyElement::Error(n) => {
1200                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1201                }
1202                ActionUsageBodyElement::ActionUsage(n) => {
1203                    collect_action_usage_body_errors(&n.value.body, errors)
1204                }
1205                _ => {}
1206            }
1207        }
1208    }
1209}
1210
1211fn collect_state_body_errors(body: &StateDefBody, errors: &mut Vec<ParseError>) {
1212    if let StateDefBody::Brace { elements } = body {
1213        for element in elements {
1214            match &element.value {
1215                StateDefBodyElement::Error(n) => {
1216                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1217                }
1218                StateDefBodyElement::Entry(n) => collect_state_body_errors(&n.value.body, errors),
1219                StateDefBodyElement::RequirementUsage(n) => {
1220                    collect_requirement_body_errors(&n.value.body, errors)
1221                }
1222                StateDefBodyElement::StateUsage(n) => {
1223                    collect_state_body_errors(&n.value.body, errors)
1224                }
1225                _ => {}
1226            }
1227        }
1228    }
1229}
1230
1231fn collect_use_case_body_errors(body: &UseCaseDefBody, errors: &mut Vec<ParseError>) {
1232    if let UseCaseDefBody::Brace { elements } = body {
1233        for element in elements {
1234            if let UseCaseDefBodyElement::Error(n) = &element.value {
1235                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1236            }
1237        }
1238    }
1239}
1240
1241fn collect_constraint_body_errors(body: &ConstraintDefBody, errors: &mut Vec<ParseError>) {
1242    if let ConstraintDefBody::Brace { elements } = body {
1243        for element in elements {
1244            if let ConstraintDefBodyElement::Error(n) = &element.value {
1245                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1246            }
1247        }
1248    }
1249}
1250
1251fn collect_calc_body_errors(body: &CalcDefBody, errors: &mut Vec<ParseError>) {
1252    if let CalcDefBody::Brace { elements } = body {
1253        for element in elements {
1254            if let CalcDefBodyElement::Error(n) = &element.value {
1255                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1256            }
1257        }
1258    }
1259}
1260
1261fn collect_view_def_body_errors(body: &ViewDefBody, errors: &mut Vec<ParseError>) {
1262    if let ViewDefBody::Brace { elements } = body {
1263        for element in elements {
1264            if let ViewDefBodyElement::Error(n) = &element.value {
1265                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1266            }
1267        }
1268    }
1269}
1270
1271fn collect_view_body_errors(body: &ViewBody, errors: &mut Vec<ParseError>) {
1272    if let ViewBody::Brace { elements } = body {
1273        for element in elements {
1274            if let ViewBodyElement::Error(n) = &element.value {
1275                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1276            }
1277        }
1278    }
1279}
1280
1281fn collect_part_def_body_errors(body: &PartDefBody, errors: &mut Vec<ParseError>) {
1282    if let PartDefBody::Brace { elements } = body {
1283        for element in elements {
1284            match &element.value {
1285                PartDefBodyElement::Error(n) => {
1286                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1287                }
1288                PartDefBodyElement::PartUsage(n) => {
1289                    collect_part_usage_body_errors(&n.value.body, errors)
1290                }
1291                PartDefBodyElement::Perform(n) => {
1292                    collect_perform_body_errors(&n.value.body, errors)
1293                }
1294                _ => {}
1295            }
1296        }
1297    }
1298}
1299
1300fn collect_perform_body_errors(body: &crate::ast::PerformBody, _errors: &mut Vec<ParseError>) {
1301    match body {
1302        crate::ast::PerformBody::Semicolon => {}
1303        crate::ast::PerformBody::Brace { .. } => {}
1304    }
1305}
1306
1307fn collect_part_usage_body_errors(body: &PartUsageBody, errors: &mut Vec<ParseError>) {
1308    if let PartUsageBody::Brace { elements } = body {
1309        for element in elements {
1310            match &element.value {
1311                PartUsageBodyElement::Error(n) => {
1312                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1313                }
1314                PartUsageBodyElement::PartUsage(n) => {
1315                    collect_part_usage_body_errors(&n.value.body, errors)
1316                }
1317                PartUsageBodyElement::Perform(n) => {
1318                    collect_perform_body_errors(&n.value.body, errors)
1319                }
1320                PartUsageBodyElement::StateUsage(n) => {
1321                    collect_state_body_errors(&n.value.body, errors)
1322                }
1323                _ => {}
1324            }
1325        }
1326    }
1327}
1328
1329fn collect_package_body_errors(body: &PackageBody, errors: &mut Vec<ParseError>) {
1330    if let PackageBody::Brace { elements } = body {
1331        for element in elements {
1332            match &element.value {
1333                PackageBodyElement::Error(n) => {
1334                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1335                }
1336                PackageBodyElement::Package(n) => {
1337                    collect_package_body_errors(&n.value.body, errors)
1338                }
1339                PackageBodyElement::LibraryPackage(n) => {
1340                    collect_package_body_errors(&n.value.body, errors)
1341                }
1342                PackageBodyElement::PartDef(n) => {
1343                    collect_part_def_body_errors(&n.value.body, errors)
1344                }
1345                PackageBodyElement::PartUsage(n) => {
1346                    collect_part_usage_body_errors(&n.value.body, errors)
1347                }
1348                PackageBodyElement::ActionDef(n) => {
1349                    collect_action_def_body_errors(&n.value.body, errors)
1350                }
1351                PackageBodyElement::ActionUsage(n) => {
1352                    collect_action_usage_body_errors(&n.value.body, errors)
1353                }
1354                PackageBodyElement::RequirementDef(n) => {
1355                    collect_requirement_body_errors(&n.value.body, errors)
1356                }
1357                PackageBodyElement::RequirementUsage(n) => {
1358                    collect_requirement_body_errors(&n.value.body, errors)
1359                }
1360                PackageBodyElement::UseCaseDef(n) => {
1361                    collect_use_case_body_errors(&n.value.body, errors)
1362                }
1363                PackageBodyElement::UseCaseUsage(n) => {
1364                    collect_use_case_body_errors(&n.value.body, errors)
1365                }
1366                PackageBodyElement::ConcernUsage(n) => {
1367                    collect_requirement_body_errors(&n.value.body, errors)
1368                }
1369                PackageBodyElement::StateDef(n) => collect_state_body_errors(&n.value.body, errors),
1370                PackageBodyElement::StateUsage(n) => {
1371                    collect_state_body_errors(&n.value.body, errors)
1372                }
1373                PackageBodyElement::ConstraintDef(n) => {
1374                    collect_constraint_body_errors(&n.value.body, errors)
1375                }
1376                PackageBodyElement::CalcDef(n) => collect_calc_body_errors(&n.value.body, errors),
1377                PackageBodyElement::ViewDef(n) => {
1378                    collect_view_def_body_errors(&n.value.body, errors)
1379                }
1380                PackageBodyElement::ViewUsage(n) => collect_view_body_errors(&n.value.body, errors),
1381                _ => {}
1382            }
1383        }
1384    }
1385}
1386
1387fn collect_recovery_errors(root: &RootNamespace) -> Vec<ParseError> {
1388    let mut errors = Vec::new();
1389    for element in &root.elements {
1390        match &element.value {
1391            crate::ast::RootElement::Package(n) => {
1392                collect_package_body_errors(&n.value.body, &mut errors)
1393            }
1394            crate::ast::RootElement::LibraryPackage(n) => {
1395                collect_package_body_errors(&n.value.body, &mut errors)
1396            }
1397            crate::ast::RootElement::Namespace(n) => {
1398                collect_package_body_errors(&n.value.body, &mut errors)
1399            }
1400            crate::ast::RootElement::Import(_) => {}
1401        }
1402    }
1403    errors
1404}
1405
1406/// Parse full input; must consume entire input. Strips UTF-8 BOM if present.
1407#[allow(clippy::result_large_err)]
1408pub fn parse_root(input: &str) -> Result<RootNamespace, ParseError> {
1409    let bytes = input
1410        .strip_prefix('\u{FEFF}')
1411        .map(str::as_bytes)
1412        .unwrap_or_else(|| input.as_bytes());
1413    let located = LocatedSpan::new(bytes);
1414    match package::root_namespace(located) {
1415        Ok((rest, root)) => {
1416            if !rest.fragment().is_empty() && has_unclosed_brace(bytes) {
1417                return Err(missing_closing_brace_error_at_eof(bytes));
1418            }
1419            if rest.fragment().is_empty() || is_only_trailing_closing_braces(rest) {
1420                log::debug!("parse_root: success, {} top-level elements", root.elements.len());
1421                Ok(root)
1422            } else {
1423                let offset = located.location_offset() + located.fragment().len() - rest.fragment().len();
1424                let unconsumed = rest.fragment();
1425                let first_80 = unconsumed.get(..80.min(unconsumed.len())).unwrap_or(unconsumed);
1426                log::debug!(
1427                    "parse_root: expected end of input; parsed {} elements; unconsumed len={}, offset={}, first 80 bytes: {:?}",
1428                    root.elements.len(),
1429                    unconsumed.len(),
1430                    offset,
1431                    first_80,
1432                );
1433                log::debug!(
1434                    "parse_root: unconsumed as str: {:?}",
1435                    String::from_utf8_lossy(first_80),
1436                );
1437                let (found_snippet, found_len) = fragment_to_found_snippet(rest.fragment());
1438                let mut pe = ParseError::new("expected end of input")
1439                    .with_location(offset, rest.location_line(), rest.get_column())
1440                    .with_length(found_len.max(1))
1441                    .with_code("expected_end_of_input");
1442                if !found_snippet.is_empty() {
1443                    pe = pe.with_found(found_snippet);
1444                }
1445                if root.elements.is_empty() && is_illegal_top_level_definition(rest.fragment()) {
1446                    pe = pe
1447                        .with_code("illegal_top_level_definition")
1448                        .with_expected("'package', 'namespace', or 'import'")
1449                        .with_suggestion(
1450                            "Wrap this declaration in `package ... { ... }` or `namespace ... { ... }`.",
1451                        );
1452                    pe.message = "illegal top-level definition".to_string();
1453                }
1454                Err(pe)
1455            }
1456        }
1457        Err(nom::Err::Error(e)) => Err(missing_closing_brace_error(bytes, e.input).unwrap_or_else(|| {
1458            nom_err_to_parse_error(
1459                &e,
1460                None,
1461                Some("'package', 'namespace', or 'import' at top level; or valid element in package body"),
1462            )
1463        })),
1464        Err(nom::Err::Failure(e)) => Err(missing_closing_brace_error(bytes, e.input).unwrap_or_else(|| {
1465            nom_err_to_parse_error(
1466                &e,
1467                None,
1468                Some("'package', 'namespace', or 'import' at top level; or valid element in package body"),
1469            )
1470        })),
1471        Err(nom::Err::Incomplete(_)) => Err(ParseError::new("unexpected end of input").with_code("unexpected_eof")),
1472    }
1473}
1474
1475const MAX_RECOVERY_ERRORS: usize = 100;
1476
1477/// Parse input with error recovery: collects multiple diagnostics and returns a partial AST when errors occur.
1478/// Use this for language servers so the user sees all parse errors and features (e.g. hover) can use the partial AST.
1479pub fn parse_with_diagnostics(input: &str) -> ParseResult {
1480    let bytes = input
1481        .strip_prefix('\u{FEFF}')
1482        .map(str::as_bytes)
1483        .unwrap_or_else(|| input.as_bytes());
1484    let located = LocatedSpan::new(bytes);
1485
1486    let mut elements = Vec::new();
1487    let mut errors = Vec::new();
1488
1489    let (mut input, _) = match lex::ws_and_comments(located) {
1490        Ok(x) => x,
1491        Err(_) => {
1492            return ParseResult {
1493                root: RootNamespace { elements: vec![] },
1494                errors: vec![ParseError::new("invalid input").with_code("invalid_input")],
1495            };
1496        }
1497    };
1498
1499    while errors.len() < MAX_RECOVERY_ERRORS {
1500        // Skip leading ws/comments; if nothing left, we're done (avoids parsing "" as root_element).
1501        let (rest, _) = lex::ws_and_comments(input).unwrap_or((input, ()));
1502        input = rest;
1503        if input.fragment().is_empty() {
1504            break;
1505        }
1506        match package::root_element(input) {
1507            Ok((rest, elem)) => {
1508                elements.push(elem);
1509                input = rest;
1510            }
1511            Err(nom::Err::Error(e)) | Err(nom::Err::Failure(e)) => {
1512                let (trimmed, _) = lex::ws_and_comments(input).unwrap_or((input, ()));
1513                if trim_ascii_start(trimmed.fragment()).starts_with(b"}") {
1514                    errors.push(unexpected_closing_brace_parse_error(trimmed));
1515                    let skip_result = lex::skip_to_next_sync_point(trimmed);
1516                    match skip_result {
1517                        Ok((rest, _)) => input = rest,
1518                        Err(_) => break,
1519                    }
1520                    continue;
1521                }
1522                if errors.is_empty()
1523                    && has_unclosed_brace(bytes)
1524                    && (lex::starts_with_keyword(trimmed.fragment(), b"package")
1525                        || lex::starts_with_keyword(trimmed.fragment(), b"namespace")
1526                        || lex::starts_with_keyword(trimmed.fragment(), b"library")
1527                        || lex::starts_with_keyword(trimmed.fragment(), b"standard"))
1528                {
1529                    errors.push(missing_closing_brace_error_at_eof(bytes));
1530                    break;
1531                }
1532                let pe = missing_closing_brace_error(bytes, e.input).unwrap_or_else(|| {
1533                    nom_err_to_parse_error(&e, None, Some("'package', 'namespace', or 'import'"))
1534                });
1535                errors.push(pe);
1536                let skip_result = lex::skip_to_next_sync_point(e.input);
1537                match skip_result {
1538                    Ok((rest, _)) => input = rest,
1539                    Err(_) => break,
1540                }
1541            }
1542            Err(nom::Err::Incomplete(_)) => {
1543                errors.push(
1544                    ParseError::new("unexpected end of input")
1545                        .with_location(
1546                            input.location_offset(),
1547                            input.location_line(),
1548                            input.get_column(),
1549                        )
1550                        .with_length(1)
1551                        .with_code("unexpected_eof"),
1552                );
1553                break;
1554            }
1555        }
1556    }
1557
1558    let (input, _) = lex::ws_and_comments(input).unwrap_or((input, ()));
1559
1560    if input.fragment().is_empty()
1561        && has_unclosed_brace(bytes)
1562        && !errors
1563            .iter()
1564            .any(|e| e.code.as_deref() == Some("missing_closing_brace"))
1565    {
1566        errors.push(missing_closing_brace_error_at_eof(bytes));
1567    }
1568
1569    if !input.fragment().is_empty()
1570        && !errors
1571            .iter()
1572            .any(|e| e.code.as_deref() == Some("missing_closing_brace"))
1573    {
1574        if trim_ascii_start(input.fragment()).starts_with(b"}") {
1575            errors.push(unexpected_closing_brace_parse_error(input));
1576        } else {
1577            let (found_snippet, found_len) = fragment_to_found_snippet(input.fragment());
1578            let mut pe = ParseError::new("expected end of input")
1579                .with_location(
1580                    input.location_offset(),
1581                    input.location_line(),
1582                    input.get_column(),
1583                )
1584                .with_length(found_len.max(1))
1585                .with_code("expected_end_of_input")
1586                .with_severity(DiagnosticSeverity::Error);
1587            if !found_snippet.is_empty() {
1588                pe = pe.with_found(found_snippet);
1589            }
1590            errors.push(pe);
1591        }
1592    }
1593
1594    errors.extend(collect_recovery_errors(&RootNamespace {
1595        elements: elements.clone(),
1596    }));
1597    errors = dedup_errors(errors);
1598
1599    ParseResult {
1600        root: RootNamespace { elements },
1601        errors,
1602    }
1603}