Skip to main content

sysml_v2_parser/parser/
mod.rs

1//! Nom-based parser for SysML v2 textual notation.
2//!
3//! Organized into modules:
4//! - [lex]: whitespace, comments, names, qualified names, skip helpers
5//! - [attribute]: attribute definition and usage
6//! - [import]: import and relationship body
7//! - [part]: part definition and part usage
8//! - [package]: package and root namespace
9
10mod action;
11mod alias;
12mod allocation;
13mod attribute;
14mod bnf_surface;
15mod body;
16mod case;
17mod connection;
18mod constraint;
19mod definition_prefix;
20mod dependency;
21mod enumeration;
22mod expr;
23mod flow;
24mod import;
25mod individual;
26mod interface;
27mod item;
28mod lex;
29mod metadata;
30mod metadata_annotation;
31mod occurrence;
32mod package;
33mod part;
34mod port;
35mod requirement;
36mod span;
37mod specialization;
38mod state;
39mod usage;
40mod usecase;
41mod view;
42
43use crate::ast::{
44    ActionDefBody, ActionDefBodyElement, ActionUsageBody, ActionUsageBodyElement, CalcDefBody,
45    CalcDefBodyElement, ConstraintDefBody, ConstraintDefBodyElement, PackageBody,
46    PackageBodyElement, ParseErrorNode, PartDefBody, PartDefBodyElement, PartUsageBody,
47    PartUsageBodyElement, RequirementDefBody, RequirementDefBodyElement, RootNamespace,
48    StateDefBody, StateDefBodyElement, UseCaseDefBody, UseCaseDefBodyElement, ViewBody,
49    ViewBodyElement, ViewDefBody, ViewDefBodyElement,
50};
51use crate::error::{DiagnosticCategory, DiagnosticSeverity, ParseError};
52use nom::error::Error;
53use nom_locate::LocatedSpan;
54pub(crate) use span::{node_from_to, span_from_to, with_span, Input};
55
56/// Result of parsing with error recovery: a (possibly partial) AST and zero or more diagnostics.
57#[derive(Debug, Clone)]
58pub struct ParseResult {
59    /// Root namespace; contains all successfully parsed top-level elements (partial when errors occurred).
60    pub root: RootNamespace,
61    /// All parse errors encountered (multiple when recovery is used).
62    pub errors: Vec<ParseError>,
63}
64
65impl ParseResult {
66    /// True if the document parsed fully with no errors.
67    pub fn is_ok(&self) -> bool {
68        self.errors.is_empty()
69    }
70}
71
72const FOUND_SNIPPET_MAX_LEN: usize = 40;
73const ILLEGAL_TOP_LEVEL_STARTERS: &[&[u8]] = &[
74    b"action",
75    b"actor",
76    b"alias",
77    b"allocate",
78    b"allocation",
79    b"attribute",
80    b"bind",
81    b"calc",
82    b"case",
83    b"concern",
84    b"connection",
85    b"constraint",
86    b"dependency",
87    b"enum",
88    b"flow",
89    b"interface",
90    b"item",
91    b"metadata",
92    b"occurrence",
93    b"part",
94    b"perform",
95    b"port",
96    b"ref",
97    b"require",
98    b"requirement",
99    b"satisfy",
100    b"state",
101    b"use",
102    b"verification",
103    b"view",
104    b"viewpoint",
105];
106
107/// Take a short snippet from the input at the error position for "found" display.
108/// Uses first line or first FOUND_SNIPPET_MAX_LEN bytes, UTF-8 with replacement char.
109fn fragment_to_found_snippet(fragment: &[u8]) -> (String, usize) {
110    let take = fragment
111        .iter()
112        .position(|&b| b == b'\n' || b == b'\r')
113        .map(|p| p.min(FOUND_SNIPPET_MAX_LEN))
114        .unwrap_or_else(|| fragment.len().min(FOUND_SNIPPET_MAX_LEN));
115    let slice = fragment.get(..take).unwrap_or(fragment);
116    let s = String::from_utf8_lossy(slice)
117        .replace('\n', "\\n")
118        .replace('\r', "\\r");
119    let len = slice.len();
120    (s.trim_end().to_string(), len)
121}
122
123pub(crate) fn recovery_found_snippet(input: Input<'_>) -> Option<String> {
124    let frag = input.fragment();
125    let take = frag
126        .iter()
127        .position(|&b| b == b'\n' || b == b'\r')
128        .unwrap_or(frag.len())
129        .min(60);
130    let snippet = String::from_utf8_lossy(&frag[..take]).trim().to_string();
131    if snippet.is_empty() {
132        None
133    } else {
134        Some(snippet)
135    }
136}
137
138fn recovery_found_snippet_from_span(input: Input<'_>, recovery_end: Input<'_>) -> Option<String> {
139    let consumed_len = recovery_end
140        .location_offset()
141        .saturating_sub(input.location_offset())
142        .min(input.fragment().len());
143    if consumed_len == 0 {
144        return recovery_found_snippet(input);
145    }
146    let frag = &input.fragment()[..consumed_len];
147    let take = frag
148        .iter()
149        .position(|&b| b == b'\n' || b == b'\r')
150        .unwrap_or(frag.len())
151        .min(60);
152    let snippet = String::from_utf8_lossy(&frag[..take]).trim().to_string();
153    if snippet.is_empty() {
154        recovery_found_snippet(input)
155    } else {
156        Some(snippet)
157    }
158}
159
160/// Map nom error kind to a human-readable message for language server diagnostics.
161fn nom_error_kind_to_message(code: &nom::error::ErrorKind) -> &'static str {
162    use nom::error::ErrorKind;
163    match code {
164        ErrorKind::Tag => "expected keyword or token",
165        ErrorKind::Digit => "expected number",
166        ErrorKind::Alpha => "expected identifier",
167        ErrorKind::AlphaNumeric => "expected identifier",
168        ErrorKind::Space => "expected whitespace",
169        ErrorKind::MultiSpace => "expected whitespace",
170        ErrorKind::Eof => "unexpected end of input",
171        ErrorKind::TakeUntil => "expected terminator",
172        ErrorKind::TakeWhile1 => "expected token",
173        ErrorKind::Alt => {
174            "expected package, import, part, port, interface, alias, attribute, or action"
175        }
176        ErrorKind::Many0 | ErrorKind::Many1 => "expected list of elements",
177        _ => "parse error",
178    }
179}
180
181/// Map nom error kind to a specific code for LSP/quick fixes.
182fn nom_error_kind_to_code(code: &nom::error::ErrorKind) -> &'static str {
183    use nom::error::ErrorKind;
184    match code {
185        ErrorKind::Tag => "expected_keyword",
186        ErrorKind::Digit => "expected_number",
187        ErrorKind::Alpha | ErrorKind::AlphaNumeric => "expected_identifier",
188        ErrorKind::Space | ErrorKind::MultiSpace => "expected_whitespace",
189        ErrorKind::Eof => "unexpected_eof",
190        ErrorKind::TakeUntil => "expected_terminator",
191        ErrorKind::TakeWhile1 => "expected_token",
192        ErrorKind::Alt => "expected_alt",
193        ErrorKind::Many0 | ErrorKind::Many1 => "expected_list",
194        _ => "parse_error",
195    }
196}
197
198fn nom_err_to_parse_error(
199    e: &Error<Input<'_>>,
200    length_override: Option<usize>,
201    expected_context: Option<&'static str>,
202) -> ParseError {
203    let offset = e.input.location_offset();
204    let line = e.input.location_line();
205    let column = e.input.get_column();
206    let fragment = e.input.fragment();
207    let (found_snippet, found_len) = fragment_to_found_snippet(fragment);
208    let message = nom_error_kind_to_message(&e.code).to_string();
209    let span_len = length_override.unwrap_or(found_len).max(1);
210    if trim_ascii_start(fragment).starts_with(b"}") {
211        return unexpected_closing_brace_parse_error(e.input);
212    }
213    let mut pe = ParseError::new(message)
214        .with_location(offset, line, column)
215        .with_length(span_len)
216        .with_code(nom_error_kind_to_code(&e.code))
217        .with_severity(DiagnosticSeverity::Error)
218        .with_category(DiagnosticCategory::ParseError);
219    if !found_snippet.is_empty() {
220        pe = pe.with_found(found_snippet);
221    }
222    if let Some(ctx) = expected_context {
223        pe = pe.with_expected(ctx);
224    }
225    let at_root = expected_context.is_some_and(|ctx| {
226        ctx.contains("'package', 'namespace', or 'import'") || ctx.contains("top level")
227    });
228    if at_root && is_illegal_top_level_definition(fragment) {
229        pe.message = "illegal top-level definition".to_string();
230        pe.code = Some("illegal_top_level_definition".to_string());
231        pe.expected = Some("'package', 'namespace', or 'import'".to_string());
232        pe.suggestion = Some(
233            "Wrap this declaration in `package ... { ... }` or `namespace ... { ... }`."
234                .to_string(),
235        );
236    }
237    pe
238}
239
240fn is_illegal_top_level_definition(fragment: &[u8]) -> bool {
241    let trimmed = trim_ascii_start(fragment);
242    !trimmed.starts_with(b"}")
243        && !trimmed.starts_with(b"//")
244        && !trimmed.starts_with(b"/*")
245        && lex::starts_with_any_keyword(trimmed, ILLEGAL_TOP_LEVEL_STARTERS)
246}
247
248fn trim_ascii_start(mut fragment: &[u8]) -> &[u8] {
249    while let Some(first) = fragment.first() {
250        if first.is_ascii_whitespace() {
251            fragment = &fragment[1..];
252            continue;
253        }
254        break;
255    }
256    fragment
257}
258
259fn starts_with_missing_name_after_keyword(
260    fragment: &[u8],
261    keyword: &[u8],
262    trailing_keywords: &[&[u8]],
263) -> bool {
264    let mut fragment = trim_ascii_start(fragment);
265    if !lex::starts_with_keyword(fragment, keyword) {
266        return false;
267    }
268    fragment = &fragment[keyword.len()..];
269    while let Some(first) = fragment.first() {
270        if first.is_ascii_whitespace() {
271            fragment = &fragment[1..];
272            continue;
273        }
274        break;
275    }
276    for trailing in trailing_keywords {
277        if lex::starts_with_keyword(fragment, trailing) {
278            fragment = &fragment[trailing.len()..];
279            while let Some(first) = fragment.first() {
280                if first.is_ascii_whitespace() {
281                    fragment = &fragment[1..];
282                    continue;
283                }
284                break;
285            }
286        }
287    }
288    fragment.starts_with(b":")
289        && !lex::starts_with_keyword(fragment, b":>>")
290        && !lex::starts_with_keyword(fragment, b":>")
291        && !lex::starts_with_keyword(fragment, b"::")
292}
293
294fn starts_with_missing_type_after_keyword(
295    fragment: &[u8],
296    keyword: &[u8],
297    trailing_keywords: &[&[u8]],
298) -> bool {
299    let mut fragment = trim_ascii_start(fragment);
300    if !lex::starts_with_keyword(fragment, keyword) {
301        return false;
302    }
303    fragment = &fragment[keyword.len()..];
304    while let Some(first) = fragment.first() {
305        if first.is_ascii_whitespace() {
306            fragment = &fragment[1..];
307            continue;
308        }
309        break;
310    }
311    for trailing in trailing_keywords {
312        if lex::starts_with_keyword(fragment, trailing) {
313            fragment = &fragment[trailing.len()..];
314            while let Some(first) = fragment.first() {
315                if first.is_ascii_whitespace() {
316                    fragment = &fragment[1..];
317                    continue;
318                }
319                break;
320            }
321        }
322    }
323
324    let mut name_len = 0usize;
325    while name_len < fragment.len()
326        && (fragment[name_len].is_ascii_alphanumeric() || fragment[name_len] == b'_')
327    {
328        name_len += 1;
329    }
330    if name_len == 0 {
331        return false;
332    }
333    fragment = &fragment[name_len..];
334    while let Some(first) = fragment.first() {
335        if first.is_ascii_whitespace() {
336            fragment = &fragment[1..];
337            continue;
338        }
339        break;
340    }
341    if fragment.starts_with(b":") {
342        fragment = &fragment[1..];
343    } else if lex::starts_with_keyword(fragment, b"defined") {
344        fragment = &fragment[b"defined".len()..];
345        fragment = trim_ascii_start(fragment);
346        if !lex::starts_with_keyword(fragment, b"by") {
347            return false;
348        }
349        fragment = &fragment[b"by".len()..];
350    } else if lex::starts_with_keyword(fragment, b"typed") {
351        fragment = &fragment[b"typed".len()..];
352        fragment = trim_ascii_start(fragment);
353        if !lex::starts_with_keyword(fragment, b"by") {
354            return false;
355        }
356        fragment = &fragment[b"by".len()..];
357    } else {
358        return false;
359    }
360    while let Some(first) = fragment.first() {
361        if first.is_ascii_whitespace() {
362            fragment = &fragment[1..];
363            continue;
364        }
365        break;
366    }
367
368    fragment.is_empty()
369        || fragment.starts_with(b";")
370        || fragment.starts_with(b"{")
371        || fragment.starts_with(b"}")
372        || lex::starts_with_keyword(fragment, b"then")
373        || lex::starts_with_keyword(fragment, b"if")
374        || lex::starts_with_keyword(fragment, b"do")
375}
376
377fn missing_name_diagnostic(
378    fragment: &[u8],
379    scope_label: &str,
380) -> Option<(&'static str, String, String, String)> {
381    #[allow(clippy::type_complexity)]
382    let cases: &[(&[u8], &[&[u8]], &str, &str)] = &[
383        (
384            b"subject",
385            &[],
386            "subject name",
387            "Use `subject laptop: Laptop;`.",
388        ),
389        (b"actor", &[], "actor name", "Use `actor user: User;`."),
390        (b"state", &[], "state name", "Use `state ready: Mode;`."),
391        (b"part", &[], "part name", "Use `part wheel: Wheel;`."),
392        (b"ref", &[], "reference name", "Use `ref sensor: Sensor;`."),
393        (b"port", &[], "port name", "Use `port power: PowerPort;`."),
394        (
395            b"attribute",
396            &[],
397            "attribute name",
398            "Use `attribute mass: MassValue;`.",
399        ),
400        (b"in", &[], "input name", "Use `in speed: Real;`."),
401        (b"out", &[], "output name", "Use `out result: Real;`."),
402        (
403            b"perform",
404            &[b"action"],
405            "action name",
406            "Use `perform action run: Runner;`.",
407        ),
408        (b"return", &[], "return name", "Use `return result: Real;`."),
409    ];
410
411    let allow_anonymous_requirement_params = scope_label == "requirement body";
412    for (keyword, trailing, missing_what, suggestion) in cases {
413        if allow_anonymous_requirement_params
414            && (keyword == b"subject" || keyword == b"actor")
415            && starts_with_missing_name_after_keyword(fragment, keyword, trailing)
416        {
417            // SysML allows unnamed subject/actor parameters: `actor : Battery;`
418            continue;
419        }
420        if starts_with_missing_name_after_keyword(fragment, keyword, trailing) {
421            return Some((
422                "missing_member_name",
423                format!("expected {missing_what} before ':'"),
424                format!("{missing_what} before ':'"),
425                suggestion.to_string(),
426            ));
427        }
428    }
429    None
430}
431
432fn missing_type_diagnostic(fragment: &[u8]) -> Option<(&'static str, String, String, String)> {
433    #[allow(clippy::type_complexity)]
434    let cases: &[(&[u8], &[&[u8]], &str)] = &[
435        (b"subject", &[], "subject type"),
436        (b"actor", &[], "actor type"),
437        (b"state", &[], "state type"),
438        (b"part", &[], "part type"),
439        (b"ref", &[], "reference type"),
440        (b"port", &[], "port type"),
441        (b"attribute", &[], "attribute type"),
442        (b"occurrence", &[], "occurrence type"),
443        (b"in", &[], "input type"),
444        (b"out", &[], "output type"),
445        (b"perform", &[b"action"], "action type"),
446        (b"return", &[], "return type"),
447    ];
448
449    for &(keyword, trailing, missing_what) in cases {
450        if starts_with_missing_type_after_keyword(fragment, keyword, trailing) {
451            let keyword_label = String::from_utf8_lossy(keyword);
452            let sample_name = if keyword == &b"subject"[..] {
453                "laptop"
454            } else if keyword == &b"actor"[..] {
455                "user"
456            } else if keyword == &b"state"[..] {
457                "ready"
458            } else if keyword == &b"part"[..] {
459                "wheel"
460            } else if keyword == &b"ref"[..] {
461                "sensor"
462            } else if keyword == &b"port"[..] {
463                "power"
464            } else if keyword == &b"attribute"[..] {
465                "mass"
466            } else if keyword == &b"occurrence"[..] {
467                "event"
468            } else if keyword == &b"in"[..] {
469                "speed"
470            } else if keyword == &b"out"[..] {
471                "result"
472            } else if keyword == &b"perform"[..] {
473                "run"
474            } else if keyword == &b"return"[..] {
475                "result"
476            } else {
477                "member"
478            };
479            let sample_type = if keyword == &b"subject"[..] {
480                "Laptop"
481            } else if keyword == &b"actor"[..] {
482                "User"
483            } else if keyword == &b"state"[..] {
484                "Mode"
485            } else if keyword == &b"part"[..] {
486                "Wheel"
487            } else if keyword == &b"ref"[..] {
488                "Sensor"
489            } else if keyword == &b"port"[..] {
490                "PowerPort"
491            } else if keyword == &b"attribute"[..] {
492                "MassValue"
493            } else if keyword == &b"occurrence"[..] {
494                "Event"
495            } else if keyword == &b"in"[..] || keyword == &b"out"[..] {
496                "Real"
497            } else if keyword == &b"perform"[..] {
498                "Runner"
499            } else if keyword == &b"return"[..] {
500                "Real"
501            } else {
502                "Type"
503            };
504            let suggestion = if keyword == &b"perform"[..] {
505                format!("Use `perform action {sample_name}: {sample_type};`.")
506            } else if keyword == &b"return"[..] {
507                format!("Use `return {sample_name}: {sample_type};`.")
508            } else {
509                format!("Use `{keyword_label} {sample_name}: {sample_type};`.")
510            };
511            return Some((
512                "missing_type_reference",
513                format!("expected {missing_what} after ':'"),
514                format!("{missing_what} after ':'"),
515                suggestion,
516            ));
517        }
518    }
519    None
520}
521
522fn invalid_expose_separator_diagnostic(
523    fragment: &[u8],
524) -> Option<(&'static str, String, String, String)> {
525    let mut fragment = trim_ascii_start(fragment);
526    if !lex::starts_with_keyword(fragment, b"expose") {
527        return None;
528    }
529    fragment = &fragment[b"expose".len()..];
530    while let Some(first) = fragment.first() {
531        if first.is_ascii_whitespace() {
532            fragment = &fragment[1..];
533            continue;
534        }
535        break;
536    }
537    if fragment.is_empty() {
538        return None;
539    }
540
541    let mut saw_dot = false;
542    let mut in_quoted_name = false;
543    for &b in fragment {
544        if b == b'\'' {
545            in_quoted_name = !in_quoted_name;
546            continue;
547        }
548        if in_quoted_name {
549            continue;
550        }
551        if matches!(b, b';' | b'[' | b'{' | b'}' | b'\n' | b'\r') {
552            break;
553        }
554        if b == b'.' {
555            saw_dot = true;
556            break;
557        }
558    }
559    if !saw_dot {
560        return None;
561    }
562
563    Some((
564        "invalid_qualified_name_separator",
565        "invalid qualified name in expose target: use '::' instead of '.'".to_string(),
566        "qualified name segments separated by '::'".to_string(),
567        "Replace '.' with '::' in the expose target (example: `expose A::B;`).".to_string(),
568    ))
569}
570
571fn invalid_requirement_short_name_syntax_diagnostic(
572    fragment: &[u8],
573) -> Option<(&'static str, String, String, String)> {
574    let fragment = trim_ascii_start(fragment);
575    if fragment.starts_with(b"requirement def") {
576        let mut rest = trim_ascii_start(&fragment[b"requirement def".len()..]);
577        if rest.starts_with(b"id") {
578            rest = trim_ascii_start(&rest[2..]);
579            if rest.first() == Some(&b'\'') || rest.first() == Some(&b'"') {
580                let quote = rest[0];
581                if let Some(close) = rest[1..].iter().position(|&b| b == quote) {
582                    let req_id = String::from_utf8_lossy(&rest[1..1 + close]);
583                    return Some((
584                        "invalid_requirement_short_name_syntax",
585                        format!(
586                            "requirement definition uses non-standard `id '{req_id}'` syntax; use a short name in angle brackets"
587                        ),
588                        "short name in angle brackets after `requirement def`".to_string(),
589                        format!(
590                            "Use `requirement def <'{req_id}'> ...` instead of `requirement def id '{req_id}' ...`."
591                        ),
592                    ));
593                }
594            }
595        }
596    }
597
598    // Header already consumed `id` as a name; recovery starts at the quoted requirement ID.
599    if fragment.first() == Some(&b'\'') || fragment.first() == Some(&b'"') {
600        let quote = fragment[0];
601        if let Some(close) = fragment[1..].iter().position(|&b| b == quote) {
602            let req_id = String::from_utf8_lossy(&fragment[1..1 + close]);
603            return Some((
604                "invalid_requirement_short_name_syntax",
605                format!(
606                    "requirement ID `'{req_id}'` should use short-name syntax in angle brackets, not a separate `id` keyword"
607                ),
608                "short name in angle brackets after `requirement def`".to_string(),
609                format!("Use `requirement def <'{req_id}'> ...` instead of `requirement def id '{req_id}' ...`."),
610            ));
611        }
612    }
613    None
614}
615
616fn bare_feature_declaration_in_part_def_diagnostic(
617    fragment: &[u8],
618) -> Option<(&'static str, String, String, String)> {
619    let fragment = trim_ascii_start(fragment);
620    let feature_keywords: &[&[u8]] = &[
621        b"attribute",
622        b"part",
623        b"port",
624        b"item",
625        b"ref",
626        b"bind",
627        b"connection",
628        b"interface",
629        b"action",
630        b"state",
631        b"import",
632        b"doc",
633        b"comment",
634        b"constraint",
635        b"calc",
636        b"perform",
637        b"enum",
638    ];
639    if lex::starts_with_any_keyword(fragment, feature_keywords) {
640        return None;
641    }
642    let ident_end = fragment
643        .iter()
644        .position(|b| !b.is_ascii_alphanumeric() && *b != b'_')
645        .unwrap_or(fragment.len());
646    if ident_end == 0 || !fragment[0].is_ascii_alphabetic() {
647        return None;
648    }
649    let ident = String::from_utf8_lossy(&fragment[..ident_end]);
650    let rest = trim_ascii_start(&fragment[ident_end..]);
651    if !rest.starts_with(b":") {
652        return None;
653    }
654    if fragment.windows(3).any(|w| w == b":>>" || w == b":> " || w == b"::>")
655        || fragment.windows(8).any(|w| w == b" connect")
656        || fragment.windows(4).any(|w| w == b" to ")
657    {
658        return None;
659    }
660    let rest = trim_ascii_start(&rest[1..]);
661    let type_end = rest
662        .iter()
663        .position(|b| matches!(*b, b';' | b'{' | b'}' | b'\n' | b'\r' | b'['))
664        .unwrap_or(rest.len());
665    if type_end == 0 {
666        return None;
667    }
668    let type_name = String::from_utf8_lossy(&rest[..type_end]).trim().to_string();
669    let sample_ident = ident.to_lowercase();
670    Some((
671        "bare_feature_declaration_in_part_def",
672        format!("bare feature `{ident} : {type_name}` is not valid in a part definition body"),
673        "feature kind keyword such as `attribute`, `part`, or `port`".to_string(),
674        format!("Use `attribute {sample_ident} : {type_name};` (or `item` / `port` as appropriate)."),
675    ))
676}
677
678fn starts_declaration_header(fragment: &[u8], prefix: &[u8]) -> bool {
679    if !fragment.starts_with(prefix) {
680        return false;
681    }
682    let rest = &fragment[prefix.len()..];
683    rest.is_empty()
684        || rest[0].is_ascii_whitespace()
685        || rest[0] == b'<'
686        || rest[0] == b';'
687        || rest[0] == b'{'
688}
689
690fn missing_semicolon_or_body_diagnostic(
691    fragment: &[u8],
692) -> Option<(&'static str, String, String, String)> {
693    if let Some(diag) = invalid_requirement_short_name_syntax_diagnostic(fragment) {
694        return Some(diag);
695    }
696    let fragment = trim_ascii_start(fragment);
697    let cases: &[(&[u8], &str, &str)] = &[
698        (
699            b"action def",
700            "action definition",
701            "Use `action def Run;` or `action def Run { ... }`.",
702        ),
703        (
704            b"part def",
705            "part definition",
706            "Use `part def Wheel;` or `part def Wheel { ... }`.",
707        ),
708        (
709            b"requirement def",
710            "requirement definition",
711            "Use `requirement def R;` or `requirement def R { ... }`.",
712        ),
713        (
714            b"state def",
715            "state definition",
716            "Use `state def Ready;` or `state def Ready { ... }`.",
717        ),
718        (
719            b"view",
720            "view declaration",
721            "Use `view structure: GeneralView;` or `view structure: GeneralView { ... }`.",
722        ),
723        (
724            b"rendering def",
725            "rendering definition",
726            "Use `rendering def Diagram;` or `rendering def Diagram { ... }`.",
727        ),
728    ];
729
730    for (prefix, label, suggestion) in cases {
731        if starts_declaration_header(fragment, prefix) {
732            return Some((
733                "missing_body_or_semicolon",
734                format!("expected ';' or '{{' after {label} header"),
735                "';' or '{' after declaration header".to_string(),
736                suggestion.to_string(),
737            ));
738        }
739    }
740    None
741}
742
743/// Declaration header only (stops at `{` or `;`) so body usages with `:` are not misclassified.
744fn definition_declaration_header(fragment: &[u8]) -> &[u8] {
745    let fragment = trim_ascii_start(fragment);
746    let end = fragment
747        .iter()
748        .position(|&b| b == b'{' || b == b';')
749        .unwrap_or(fragment.len());
750    trim_ascii_end(&fragment[..end])
751}
752
753/// True when a definition header uses `:` for subclassification instead of `:>`.
754fn definition_header_has_invalid_specialization_colon(header: &[u8]) -> bool {
755    let header = trim_ascii_start(header);
756    let prefixes: &[(&[u8], &str)] = &[(b"part def", "part def"), (b"port def", "port def")];
757    for (prefix, _) in prefixes {
758        if !header.starts_with(prefix) {
759            continue;
760        }
761        let mut rest = trim_ascii_start(&header[prefix.len()..]);
762        if rest.starts_with(b"<") {
763            if let Some(close) = rest[1..].iter().position(|&b| b == b'>') {
764                rest = trim_ascii_start(&rest[close + 2..]);
765            } else {
766                return false;
767            }
768        }
769        while !rest.is_empty() && !rest[0].is_ascii_whitespace() && rest[0] != b':' {
770            rest = &rest[1..];
771        }
772        rest = trim_ascii_start(rest);
773        if rest.starts_with(b":>") || rest.starts_with(b":>>") {
774            return false;
775        }
776        if rest.starts_with(b"specializes") {
777            return false;
778        }
779        if rest.first() == Some(&b':') {
780            return true;
781        }
782    }
783    false
784}
785
786fn invalid_typing_operator_diagnostic(
787    fragment: &[u8],
788) -> Option<(&'static str, String, String, String)> {
789    let header = definition_declaration_header(fragment);
790    if !definition_header_has_invalid_specialization_colon(header) {
791        return None;
792    }
793    let (label, suggestion) = if header.starts_with(b"port def") {
794        (
795            "port definition specialization",
796            "Use `port def PowerPort :> BasePort;` when specializing a definition.",
797        )
798    } else {
799        (
800            "part definition specialization",
801            "Use `part def Vehicle :> BaseVehicle;` when specializing a definition.",
802        )
803    };
804    Some((
805        "invalid_typing_operator",
806        format!("invalid typing operator in {label}: use ':>' instead of ':'"),
807        "':>' specialization operator".to_string(),
808        suggestion.to_string(),
809    ))
810}
811
812fn missing_expression_after_operator_diagnostic(
813    fragment: &[u8],
814) -> Option<(&'static str, String, String, String)> {
815    let fragment = trim_ascii_start(fragment);
816    let cases: &[(&[u8], &str, &str)] = &[
817        (
818            b"bind",
819            "binding expression after '='",
820            "Use `bind x = y;`.",
821        ),
822        (
823            b"assign",
824            "assignment expression after ':='",
825            "Use `assign x := y;`.",
826        ),
827        (
828            b"first",
829            "target after 'then'",
830            "Use `first start then finish;`.",
831        ),
832        (
833            b"flow",
834            "target after 'to'",
835            "Use `flow source to target;`.",
836        ),
837        (
838            b"satisfy",
839            "target after 'by'",
840            "Use `satisfy Req by implementation;`.",
841        ),
842    ];
843
844    for (keyword, expected, suggestion) in cases {
845        if !lex::starts_with_keyword(fragment, keyword) {
846            continue;
847        }
848        let text = String::from_utf8_lossy(fragment);
849        if text.contains("= ;") || text.trim_end().ends_with('=') {
850            return Some((
851                "missing_expression_after_operator",
852                "expected expression after '='".to_string(),
853                expected.to_string(),
854                suggestion.to_string(),
855            ));
856        }
857        if text.contains(":= ;") || text.trim_end().ends_with(":=") {
858            return Some((
859                "missing_expression_after_operator",
860                "expected expression after ':='".to_string(),
861                expected.to_string(),
862                suggestion.to_string(),
863            ));
864        }
865        if text.contains(" then ;") || text.trim_end().ends_with(" then") {
866            return Some((
867                "missing_expression_after_operator",
868                "expected target after 'then'".to_string(),
869                expected.to_string(),
870                suggestion.to_string(),
871            ));
872        }
873        if text.contains(" to ;") || text.trim_end().ends_with(" to") {
874            return Some((
875                "missing_expression_after_operator",
876                "expected target after 'to'".to_string(),
877                expected.to_string(),
878                suggestion.to_string(),
879            ));
880        }
881        if text.contains(" by ;") || text.trim_end().ends_with(" by") {
882            return Some((
883                "missing_expression_after_operator",
884                "expected target after 'by'".to_string(),
885                expected.to_string(),
886                suggestion.to_string(),
887            ));
888        }
889    }
890    None
891}
892
893fn invalid_unit_reference_diagnostic(
894    fragment: &[u8],
895) -> Option<(&'static str, String, String, String)> {
896    let fragment = trim_ascii_start(fragment);
897    let text = String::from_utf8_lossy(fragment);
898    if !(text.contains('[') && text.contains(']')) {
899        return None;
900    }
901
902    if text.contains("[]") || text.contains("[ ]") {
903        return Some((
904            "invalid_unit_reference",
905            "expected unit name inside '[ ]'".to_string(),
906            "unit name inside '[ ]'".to_string(),
907            "Use a concrete unit such as `1750 [kg]`.".to_string(),
908        ));
909    }
910
911    if text.contains("[;")
912        || text.contains("[ ;")
913        || text.contains("[)")
914        || text.contains("[ ]")
915        || text.contains("[,")
916    {
917        return Some((
918            "invalid_unit_reference",
919            "invalid unit expression inside '[ ]'".to_string(),
920            "unit name inside '[ ]'".to_string(),
921            "Use a unit symbol or qualified unit name (example: `[kg]` or `[SI::kg]`).".to_string(),
922        ));
923    }
924
925    None
926}
927
928fn unexpected_keyword_in_scope_diagnostic(
929    fragment: &[u8],
930    starters: &[&[u8]],
931    scope_label: &str,
932) -> Option<(&'static str, String, String, String)> {
933    let fragment = trim_ascii_start(fragment);
934    if fragment.is_empty() || fragment.starts_with(b"#") || fragment.starts_with(b"@") {
935        return None;
936    }
937    let keyword_end = fragment
938        .iter()
939        .position(|b| !b.is_ascii_alphanumeric() && *b != b'_')
940        .unwrap_or(fragment.len());
941    if keyword_end == 0 {
942        return None;
943    }
944    let keyword = &fragment[..keyword_end];
945    if lex::starts_with_any_keyword(keyword, starters) {
946        return None;
947    }
948    let keyword_text = String::from_utf8_lossy(keyword);
949    Some((
950        "unexpected_keyword_in_scope",
951        format!("unexpected keyword `{keyword_text}` in {scope_label}"),
952        format!("valid {scope_label} element"),
953        format!("Replace `{keyword_text}` with a valid {scope_label} member or remove it."),
954    ))
955}
956
957fn invalid_bare_identifier_in_body_diagnostic(
958    fragment: &[u8],
959    scope_label: &str,
960) -> Option<(&'static str, String, String, String)> {
961    let is_action = scope_label.contains("action body");
962    let is_state = scope_label.contains("state body");
963    if !is_action && !is_state {
964        return None;
965    }
966
967    let fragment = trim_ascii_start(fragment);
968    let ident_end = fragment
969        .iter()
970        .position(|b| !b.is_ascii_alphanumeric() && *b != b'_')
971        .unwrap_or(fragment.len());
972    if ident_end == 0 || !fragment[0].is_ascii_alphabetic() {
973        return None;
974    }
975
976    let ident = &fragment[..ident_end];
977    let rest = trim_ascii_start(&fragment[ident_end..]);
978    if !(rest.starts_with(b";")
979        || rest.starts_with(b"}")
980        || rest.starts_with(b"\n")
981        || rest.starts_with(b"\r"))
982    {
983        return None;
984    }
985
986    let ident_text = String::from_utf8_lossy(ident);
987    if is_action {
988        Some((
989            "invalid_bare_identifier_in_action_body",
990            format!("bare identifier `{ident_text}` is not a valid action body member"),
991            "action body member such as `perform`, `bind`, `in`, or `out`".to_string(),
992            format!(
993                "Use an explicit action-body form, for example `perform {ident_text};`, `bind ... = ...;`, or an `in`/`out` parameter declaration."
994            ),
995        ))
996    } else {
997        Some((
998            "invalid_bare_identifier_in_state_body",
999            format!("bare identifier `{ident_text}` is not a valid state body member"),
1000            "state body member such as `entry`, `transition`, `then`, `state`, or `ref`"
1001                .to_string(),
1002            format!(
1003                "Use an explicit state-body form, for example `then {ident_text};`, `transition ...;`, or a nested `state` member."
1004            ),
1005        ))
1006    }
1007}
1008
1009fn unexpected_closing_brace_parse_error(input: Input<'_>) -> ParseError {
1010    ParseError::new("unexpected closing '}'")
1011        .with_location(
1012            input.location_offset(),
1013            input.location_line(),
1014            input.get_column(),
1015        )
1016        .with_length(1)
1017        .with_code("unexpected_closing_brace")
1018        .with_expected("valid declaration or end of current body")
1019        .with_found("}")
1020        .with_suggestion("Remove this '}' or add the missing opening '{' before it.")
1021        .with_severity(DiagnosticSeverity::Error)
1022        .with_category(DiagnosticCategory::ParseError)
1023}
1024
1025fn missing_closing_brace_error(bytes: &[u8], input: Input<'_>) -> Option<ParseError> {
1026    if !input.fragment().is_empty() {
1027        return None;
1028    }
1029    let consumed = &bytes[..input.location_offset().min(bytes.len())];
1030    let opens = consumed.iter().filter(|&&b| b == b'{').count();
1031    let closes = consumed.iter().filter(|&&b| b == b'}').count();
1032    if opens <= closes {
1033        return None;
1034    }
1035    Some(missing_closing_brace_error_at_eof(consumed))
1036}
1037
1038fn missing_closing_brace_error_at_eof(bytes: &[u8]) -> ParseError {
1039    let (line, column) = eof_line_column(bytes);
1040    ParseError::new("missing closing '}'")
1041        .with_location(bytes.len(), line, column)
1042        .with_length(1)
1043        .with_code("missing_closing_brace")
1044        .with_expected("'}'")
1045        .with_suggestion("Add '}' to close the open body.")
1046        .with_category(DiagnosticCategory::ParseError)
1047}
1048
1049fn extra_closing_brace_at_eof(bytes: &[u8]) -> Option<ParseError> {
1050    let opens = bytes.iter().filter(|&&b| b == b'{').count();
1051    let closes = bytes.iter().filter(|&&b| b == b'}').count();
1052    if closes <= opens {
1053        return None;
1054    }
1055    let mut last_brace: Option<(usize, u32, usize)> = None;
1056    let mut line = 1u32;
1057    let mut column = 1usize;
1058    for (offset, &b) in bytes.iter().enumerate() {
1059        if b == b'}' {
1060            last_brace = Some((offset, line, column));
1061        }
1062        if b == b'\n' {
1063            line += 1;
1064            column = 1;
1065        } else {
1066            column += 1;
1067        }
1068    }
1069    let (offset, line, column) = last_brace?;
1070    Some(
1071        ParseError::new("unexpected closing '}' at end of file")
1072            .with_location(offset, line, column)
1073            .with_length(1)
1074            .with_code("unexpected_closing_brace")
1075            .with_expected("end of file or valid declaration")
1076            .with_found("}")
1077            .with_suggestion("Remove this extra '}' or add the missing opening '{' earlier in the file.")
1078            .with_category(DiagnosticCategory::ParseError),
1079    )
1080}
1081
1082fn category_from_code(code: &str) -> DiagnosticCategory {
1083    if code == "unsupported_annotation_syntax" {
1084        DiagnosticCategory::UnsupportedGrammarForm
1085    } else if code == "unresolved_symbol" {
1086        DiagnosticCategory::UnresolvedSymbol
1087    } else {
1088        DiagnosticCategory::ParseError
1089    }
1090}
1091
1092fn has_unclosed_brace(bytes: &[u8]) -> bool {
1093    let opens = bytes.iter().filter(|&&b| b == b'{').count();
1094    let closes = bytes.iter().filter(|&&b| b == b'}').count();
1095    opens > closes
1096}
1097
1098fn eof_line_column(bytes: &[u8]) -> (u32, usize) {
1099    let mut line = 1u32;
1100    let mut column = 1usize;
1101    for &b in bytes {
1102        if b == b'\n' {
1103            line += 1;
1104            column = 1;
1105        } else {
1106            column += 1;
1107        }
1108    }
1109    (line, column)
1110}
1111
1112pub(crate) fn build_recovery_error_node(
1113    input: Input<'_>,
1114    starters: &[&[u8]],
1115    scope_label: &str,
1116    generic_code: &str,
1117) -> ParseErrorNode {
1118    build_recovery_error_node_from_span(input, input, starters, scope_label, generic_code)
1119}
1120
1121enum RecoveryClassification {
1122    MissingMemberName {
1123        code: String,
1124        message: String,
1125        expected: String,
1126        suggestion: String,
1127    },
1128    MissingTypeReference {
1129        code: String,
1130        message: String,
1131        expected: String,
1132        suggestion: String,
1133    },
1134    InvalidQualifiedNameSeparator {
1135        code: String,
1136        message: String,
1137        expected: String,
1138        suggestion: String,
1139    },
1140    MissingBodyOrSemicolon {
1141        code: String,
1142        message: String,
1143        expected: String,
1144        suggestion: String,
1145    },
1146    BareFeatureDeclarationInPartDef {
1147        code: String,
1148        message: String,
1149        expected: String,
1150        suggestion: String,
1151    },
1152    MissingExpressionAfterOperator {
1153        code: String,
1154        message: String,
1155        expected: String,
1156        suggestion: String,
1157    },
1158    InvalidUnitReference {
1159        code: String,
1160        message: String,
1161        expected: String,
1162        suggestion: String,
1163    },
1164    InvalidTypingOperator {
1165        code: String,
1166        message: String,
1167        expected: String,
1168        suggestion: String,
1169    },
1170    InvalidBareIdentifierInBody {
1171        code: String,
1172        message: String,
1173        expected: String,
1174        suggestion: String,
1175    },
1176    UnexpectedKeywordInScope {
1177        code: String,
1178        message: String,
1179        expected: String,
1180        suggestion: String,
1181    },
1182    MissingSemicolon,
1183    UnsupportedAnnotation,
1184    Unexpected,
1185}
1186
1187fn trim_ascii_end(mut fragment: &[u8]) -> &[u8] {
1188    while let Some(last) = fragment.last() {
1189        if last.is_ascii_whitespace() {
1190            fragment = &fragment[..fragment.len() - 1];
1191        } else {
1192            break;
1193        }
1194    }
1195    fragment
1196}
1197
1198fn classify_recovery(
1199    input: Input<'_>,
1200    recovery_end: Input<'_>,
1201    starters: &[&[u8]],
1202    scope_label: &str,
1203) -> RecoveryClassification {
1204    let trimmed = trim_ascii_start(input.fragment());
1205
1206    if let Some((code, message, expected, suggestion)) = missing_name_diagnostic(trimmed, scope_label)
1207    {
1208        return RecoveryClassification::MissingMemberName {
1209            code: code.to_string(),
1210            message,
1211            expected,
1212            suggestion,
1213        };
1214    }
1215
1216    if let Some((code, message, expected, suggestion)) = missing_type_diagnostic(trimmed) {
1217        return RecoveryClassification::MissingTypeReference {
1218            code: code.to_string(),
1219            message,
1220            expected,
1221            suggestion,
1222        };
1223    }
1224
1225    if let Some((code, message, expected, suggestion)) =
1226        invalid_expose_separator_diagnostic(trimmed)
1227    {
1228        return RecoveryClassification::InvalidQualifiedNameSeparator {
1229            code: code.to_string(),
1230            message,
1231            expected,
1232            suggestion,
1233        };
1234    }
1235
1236    if let Some((code, message, expected, suggestion)) = invalid_typing_operator_diagnostic(trimmed)
1237    {
1238        return RecoveryClassification::InvalidTypingOperator {
1239            code: code.to_string(),
1240            message,
1241            expected,
1242            suggestion,
1243        };
1244    }
1245
1246    if let Some((code, message, expected, suggestion)) =
1247        missing_expression_after_operator_diagnostic(trimmed)
1248    {
1249        return RecoveryClassification::MissingExpressionAfterOperator {
1250            code: code.to_string(),
1251            message,
1252            expected,
1253            suggestion,
1254        };
1255    }
1256
1257    if let Some((code, message, expected, suggestion)) = invalid_unit_reference_diagnostic(trimmed)
1258    {
1259        return RecoveryClassification::InvalidUnitReference {
1260            code: code.to_string(),
1261            message,
1262            expected,
1263            suggestion,
1264        };
1265    }
1266
1267    if scope_label.contains("part definition body") {
1268        if let Some((code, message, expected, suggestion)) =
1269            bare_feature_declaration_in_part_def_diagnostic(trimmed)
1270        {
1271            return RecoveryClassification::BareFeatureDeclarationInPartDef {
1272                code: code.to_string(),
1273                message,
1274                expected,
1275                suggestion,
1276            };
1277        }
1278    }
1279
1280    if let Some((code, message, expected, suggestion)) =
1281        missing_semicolon_or_body_diagnostic(trimmed)
1282    {
1283        return RecoveryClassification::MissingBodyOrSemicolon {
1284            code: code.to_string(),
1285            message,
1286            expected,
1287            suggestion,
1288        };
1289    }
1290
1291    let consumed_len = recovery_end
1292        .location_offset()
1293        .saturating_sub(input.location_offset())
1294        .min(input.fragment().len());
1295    let raw_consumed = &input.fragment()[..consumed_len];
1296    let consumed = trim_ascii_end(raw_consumed);
1297    let recovered_to_boundary = recovery_end.location_offset() > input.location_offset() && {
1298        let (next, _) = lex::ws_and_comments(recovery_end).unwrap_or((recovery_end, ()));
1299        next.fragment().is_empty()
1300            || next.fragment().starts_with(b"}")
1301            || lex::starts_with_any_keyword(next.fragment(), starters)
1302    };
1303
1304    let consumed_has_newline = raw_consumed.contains(&b'\n') || raw_consumed.contains(&b'\r');
1305    let first_line_end = consumed
1306        .iter()
1307        .position(|b| matches!(*b, b'\n' | b'\r'))
1308        .unwrap_or(consumed.len());
1309    let first_line = trim_ascii_end(&consumed[..first_line_end]);
1310    let consumed_has_delimiters = consumed
1311        .iter()
1312        .any(|b| matches!(*b, b'{' | b'}' | b'(' | b')' | b'[' | b']'));
1313    let consumed_ends_incomplete = first_line.last().is_some_and(|b| {
1314        matches!(
1315            *b,
1316            b':' | b'=' | b',' | b'.' | b'+' | b'-' | b'*' | b'/' | b'>' | b'<' | b'|'
1317        )
1318    });
1319    let first_line_has_semicolon = first_line.contains(&b';');
1320    if recovered_to_boundary
1321        && lex::starts_with_any_keyword(trimmed, starters)
1322        && (consumed_has_newline || recovery_end.fragment().starts_with(b"}"))
1323        && !consumed.is_empty()
1324        && !consumed_has_delimiters
1325        && !consumed_ends_incomplete
1326        && !first_line_has_semicolon
1327    {
1328        return RecoveryClassification::MissingSemicolon;
1329    }
1330
1331    if lex::starts_with_keyword(trimmed, b"#") || lex::starts_with_keyword(trimmed, b"@") {
1332        return RecoveryClassification::UnsupportedAnnotation;
1333    }
1334
1335    if let Some((code, message, expected, suggestion)) =
1336        invalid_bare_identifier_in_body_diagnostic(trimmed, scope_label)
1337    {
1338        return RecoveryClassification::InvalidBareIdentifierInBody {
1339            code: code.to_string(),
1340            message,
1341            expected,
1342            suggestion,
1343        };
1344    }
1345
1346    if let Some((code, message, expected, suggestion)) =
1347        unexpected_keyword_in_scope_diagnostic(trimmed, starters, scope_label)
1348    {
1349        return RecoveryClassification::UnexpectedKeywordInScope {
1350            code: code.to_string(),
1351            message,
1352            expected,
1353            suggestion,
1354        };
1355    }
1356
1357    RecoveryClassification::Unexpected
1358}
1359
1360pub(crate) fn build_recovery_error_node_from_span(
1361    input: Input<'_>,
1362    recovery_end: Input<'_>,
1363    starters: &[&[u8]],
1364    scope_label: &str,
1365    generic_code: &str,
1366) -> ParseErrorNode {
1367    match classify_recovery(input, recovery_end, starters, scope_label) {
1368        RecoveryClassification::MissingMemberName {
1369            code,
1370            message,
1371            expected,
1372            suggestion,
1373        }
1374        | RecoveryClassification::MissingTypeReference {
1375            code,
1376            message,
1377            expected,
1378            suggestion,
1379        }
1380        | RecoveryClassification::InvalidQualifiedNameSeparator {
1381            code,
1382            message,
1383            expected,
1384            suggestion,
1385        }
1386        | RecoveryClassification::MissingBodyOrSemicolon {
1387            code,
1388            message,
1389            expected,
1390            suggestion,
1391        }
1392        | RecoveryClassification::BareFeatureDeclarationInPartDef {
1393            code,
1394            message,
1395            expected,
1396            suggestion,
1397        }
1398        | RecoveryClassification::MissingExpressionAfterOperator {
1399            code,
1400            message,
1401            expected,
1402            suggestion,
1403        }
1404        | RecoveryClassification::InvalidUnitReference {
1405            code,
1406            message,
1407            expected,
1408            suggestion,
1409        }
1410        | RecoveryClassification::InvalidTypingOperator {
1411            code,
1412            message,
1413            expected,
1414            suggestion,
1415        }
1416        | RecoveryClassification::InvalidBareIdentifierInBody {
1417            code,
1418            message,
1419            expected,
1420            suggestion,
1421        }
1422        | RecoveryClassification::UnexpectedKeywordInScope {
1423            code,
1424            message,
1425            expected,
1426            suggestion,
1427        } => ParseErrorNode {
1428            message,
1429            code,
1430            expected: Some(expected),
1431            found: recovery_found_snippet_from_span(input, recovery_end),
1432            suggestion: Some(suggestion),
1433            category: Some(DiagnosticCategory::ParseError),
1434        },
1435        RecoveryClassification::MissingSemicolon => ParseErrorNode {
1436            message: "missing semicolon before next declaration".to_string(),
1437            code: "missing_semicolon".to_string(),
1438            expected: Some("';'".to_string()),
1439            found: recovery_found_snippet_from_span(input, recovery_end),
1440            suggestion: Some("Insert ';' before this declaration.".to_string()),
1441            category: Some(DiagnosticCategory::ParseError),
1442        },
1443        RecoveryClassification::UnsupportedAnnotation => ParseErrorNode {
1444            message: format!("unsupported annotation syntax in {scope_label}"),
1445            code: "unsupported_annotation_syntax".to_string(),
1446            expected: Some(format!("valid {scope_label} element")),
1447            found: recovery_found_snippet_from_span(input, recovery_end),
1448            suggestion: Some(
1449                "Remove this annotation or extend the parser to support annotated declarations."
1450                    .to_string(),
1451            ),
1452            category: Some(DiagnosticCategory::UnsupportedGrammarForm),
1453        },
1454        RecoveryClassification::Unexpected => ParseErrorNode {
1455            message: format!("unexpected token in {scope_label}"),
1456            code: generic_code.to_string(),
1457            expected: Some(format!("valid {scope_label} element")),
1458            found: recovery_found_snippet_from_span(input, recovery_end),
1459            suggestion: Some(format!("Fix this {scope_label} member and re-run parsing.")),
1460            category: Some(DiagnosticCategory::ParseError),
1461        },
1462    }
1463}
1464
1465fn parse_error_from_recovery_node(span: &crate::ast::Span, node: &ParseErrorNode) -> ParseError {
1466    let mut err = ParseError::new(node.message.clone())
1467        .with_location(span.offset, span.line, span.column)
1468        .with_length(span.len.max(1))
1469        .with_code(node.code.clone())
1470        .with_category(
1471            node.category
1472                .unwrap_or_else(|| category_from_code(node.code.as_str())),
1473        );
1474    let severity = if node.code == "unsupported_annotation_syntax" {
1475        DiagnosticSeverity::Warning
1476    } else {
1477        DiagnosticSeverity::Error
1478    };
1479    err = err.with_severity(severity);
1480    if let Some(expected) = &node.expected {
1481        err = err.with_expected(expected.clone());
1482    }
1483    if let Some(found) = &node.found {
1484        err = err.with_found(found.clone());
1485    }
1486    if let Some(suggestion) = &node.suggestion {
1487        err = err.with_suggestion(suggestion.clone());
1488    }
1489    err
1490}
1491
1492fn diagnostic_specificity(err: &ParseError) -> u8 {
1493    match err.code.as_deref() {
1494        Some("missing_member_name")
1495        | Some("missing_type_reference")
1496        | Some("invalid_qualified_name_separator")
1497        | Some("invalid_typing_operator")
1498        | Some("missing_expression_after_operator")
1499        | Some("invalid_unit_reference")
1500        | Some("missing_body_or_semicolon")
1501        | Some("invalid_requirement_short_name_syntax")
1502        | Some("bare_feature_declaration_in_part_def")
1503        | Some("missing_semicolon")
1504        | Some("unexpected_closing_brace")
1505        | Some("missing_closing_brace")
1506        | Some("unsupported_annotation_syntax")
1507        | Some("invalid_bare_identifier_in_action_body")
1508        | Some("invalid_bare_identifier_in_state_body")
1509        | Some("recovery_cascade_suppressed")
1510        | Some("unexpected_keyword_in_scope") => 5,
1511        Some("illegal_top_level_definition") => 4,
1512        Some(code) if code.starts_with("recovered_") => 2,
1513        Some("expected_end_of_input") | Some("expected_keyword") => 1,
1514        _ => 3,
1515    }
1516}
1517
1518/// Drop `unexpected_closing_brace` on a line that already has a parse error for an
1519/// invalid statement block (e.g. `badstmt {} }` — the second `}` closes the package).
1520fn suppress_redundant_closing_brace_errors(errors: Vec<ParseError>) -> Vec<ParseError> {
1521    let lines_with_block_error: std::collections::HashSet<u32> = errors
1522        .iter()
1523        .filter(|e| e.code.as_deref() != Some("unexpected_closing_brace"))
1524        .filter_map(|e| e.line)
1525        .filter(|line| {
1526            errors.iter().any(|other| {
1527                other.line == Some(*line)
1528                    && other
1529                        .found
1530                        .as_deref()
1531                        .is_some_and(|f| f.contains('{') && f.contains('}'))
1532            })
1533        })
1534        .collect();
1535
1536    errors
1537        .into_iter()
1538        .filter(|e| {
1539            if e.code.as_deref() != Some("unexpected_closing_brace") {
1540                return true;
1541            }
1542            e.line
1543                .map(|line| !lines_with_block_error.contains(&line))
1544                .unwrap_or(true)
1545        })
1546        .collect()
1547}
1548
1549fn dedup_errors(mut errors: Vec<ParseError>) -> Vec<ParseError> {
1550    errors.sort_by_key(|e| {
1551        (
1552            e.offset.unwrap_or(usize::MAX),
1553            e.line.unwrap_or(u32::MAX),
1554            e.column.unwrap_or(usize::MAX),
1555            std::cmp::Reverse(diagnostic_specificity(e)),
1556        )
1557    });
1558
1559    let mut deduped = Vec::new();
1560    for err in errors {
1561        let duplicate = deduped.iter().any(|existing: &ParseError| {
1562            let same_start = existing.offset == err.offset
1563                && existing.line == err.line
1564                && existing.column == err.column;
1565            let same_found = existing.found == err.found;
1566            let existing_specificity = diagnostic_specificity(existing);
1567            let err_specificity = diagnostic_specificity(&err);
1568            same_start
1569                && (same_found || existing.code == err.code)
1570                && existing_specificity >= err_specificity
1571        });
1572        if !duplicate {
1573            deduped.push(err);
1574        }
1575    }
1576
1577    deduped.sort_by_key(|e| (e.offset.unwrap_or(usize::MAX), e.line.unwrap_or(u32::MAX)));
1578    deduped
1579}
1580
1581fn is_cascade_candidate(err: &ParseError) -> bool {
1582    matches!(
1583        err.code.as_deref(),
1584        Some("missing_semicolon") | Some("missing_body_or_semicolon")
1585    ) || err
1586        .code
1587        .as_deref()
1588        .is_some_and(|code| code.starts_with("recovered_"))
1589}
1590
1591fn cascade_family(err: &ParseError) -> Option<&str> {
1592    match err.code.as_deref() {
1593        Some("missing_semicolon") => Some("missing_semicolon"),
1594        Some("missing_body_or_semicolon") => Some("missing_body_or_semicolon"),
1595        Some(code) if code.starts_with("recovered_") => Some("recovered"),
1596        _ => None,
1597    }
1598}
1599
1600const MAX_CASCADE_LINE_DISTANCE: u32 = 50;
1601
1602fn make_cascade_summary(run: &[ParseError]) -> Option<ParseError> {
1603    let summary_anchor = run.first()?;
1604    let suppressed = run.len().saturating_sub(1);
1605    let family = cascade_family(summary_anchor).unwrap_or("recovery");
1606    let mut err = ParseError::new(format!(
1607        "suppressed {suppressed} cascading {family} diagnostic{} after earlier recovery errors",
1608        if suppressed == 1 { "" } else { "s" }
1609    ))
1610    .with_location(
1611        summary_anchor.offset?,
1612        summary_anchor.line?,
1613        summary_anchor.column?,
1614    )
1615    .with_length(summary_anchor.length.unwrap_or(1).max(1))
1616    .with_code("recovery_cascade_suppressed")
1617    .with_expected("fix the first syntax error in this body")
1618    .with_suggestion(
1619        "Fix the earliest diagnostic in this body first; later syntax errors may be cascades.",
1620    )
1621    .with_severity(DiagnosticSeverity::Warning)
1622    .with_category(DiagnosticCategory::ParseError);
1623    if let Some(found) = &summary_anchor.found {
1624        err = err.with_found(found.clone());
1625    }
1626    Some(err)
1627}
1628
1629fn suppress_diagnostic_cascades(errors: Vec<ParseError>) -> Vec<ParseError> {
1630    const MAX_UNSUMMARIZED_CASCADE: usize = 1;
1631
1632    let mut output = Vec::new();
1633    let mut run: Vec<ParseError> = Vec::new();
1634
1635    let flush_run = |run: &mut Vec<ParseError>, output: &mut Vec<ParseError>| {
1636        if run.is_empty() {
1637            return;
1638        }
1639        if run.len() <= MAX_UNSUMMARIZED_CASCADE {
1640            output.append(run);
1641        } else {
1642            let primary_offset = run.first().and_then(|e| e.offset);
1643            if let Some(mut primary) = run.first().cloned() {
1644                primary.is_cascade = Some(false);
1645                output.push(primary);
1646            }
1647            for suppressed in run.iter().skip(MAX_UNSUMMARIZED_CASCADE) {
1648                let _ = primary_offset;
1649                let _ = suppressed;
1650            }
1651            if let Some(summary) = make_cascade_summary(run) {
1652                output.push(summary);
1653            }
1654            run.clear();
1655        }
1656    };
1657
1658    for err in errors {
1659        let continues_run = run.last().is_some_and(|previous| {
1660            is_cascade_candidate(&err)
1661                && cascade_family(previous) == cascade_family(&err)
1662                && previous
1663                    .line
1664                    .zip(err.line)
1665                    .is_some_and(|(a, b)| b <= a.saturating_add(MAX_CASCADE_LINE_DISTANCE))
1666        });
1667
1668        if is_cascade_candidate(&err) && (run.is_empty() || continues_run) {
1669            run.push(err);
1670        } else {
1671            flush_run(&mut run, &mut output);
1672            if is_cascade_candidate(&err) {
1673                run.push(err);
1674            } else {
1675                output.push(err);
1676            }
1677        }
1678    }
1679    flush_run(&mut run, &mut output);
1680    output.sort_by_key(|e| (e.offset.unwrap_or(usize::MAX), e.line.unwrap_or(u32::MAX)));
1681    output
1682}
1683
1684fn root_body_recovery_error(input: Input<'_>, scope: &str) -> ParseError {
1685    let (found, len) = fragment_to_found_snippet(input.fragment());
1686    let mut err = ParseError::new(format!(
1687        "could not parse {scope} body; skipped to next root element"
1688    ))
1689    .with_location(
1690        input.location_offset(),
1691        input.location_line(),
1692        input.get_column(),
1693    )
1694    .with_length(len.max(1))
1695    .with_code("recovered_root_body")
1696    .with_expected(format!("valid {scope} body"))
1697    .with_suggestion(
1698        "Fix the first syntax error in this body; later root-level diagnostics may be cascades.",
1699    )
1700    .with_severity(DiagnosticSeverity::Error)
1701    .with_category(DiagnosticCategory::ParseError);
1702    if !found.is_empty() {
1703        err = err.with_found(found);
1704    }
1705    err
1706}
1707
1708fn root_body_scope(fragment: &[u8]) -> Option<&'static str> {
1709    let fragment = trim_ascii_start(fragment);
1710    if lex::starts_with_keyword(fragment, b"package")
1711        || lex::starts_with_keyword(fragment, b"library")
1712        || lex::starts_with_keyword(fragment, b"standard")
1713    {
1714        Some("package")
1715    } else if lex::starts_with_keyword(fragment, b"namespace") {
1716        Some("namespace")
1717    } else {
1718        None
1719    }
1720}
1721
1722fn collect_requirement_body_errors(body: &RequirementDefBody, errors: &mut Vec<ParseError>) {
1723    if let RequirementDefBody::Brace { elements } = body {
1724        for element in elements {
1725            match &element.value {
1726                RequirementDefBodyElement::Error(n) => {
1727                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1728                }
1729                RequirementDefBodyElement::Frame(n) => {
1730                    collect_requirement_body_errors(&n.value.body, errors)
1731                }
1732                _ => {}
1733            }
1734        }
1735    }
1736}
1737
1738fn collect_action_def_body_errors(body: &ActionDefBody, errors: &mut Vec<ParseError>) {
1739    if let ActionDefBody::Brace { elements } = body {
1740        for element in elements {
1741            if let ActionDefBodyElement::Error(n) = &element.value {
1742                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1743            }
1744        }
1745    }
1746}
1747
1748fn collect_action_usage_body_errors(body: &ActionUsageBody, errors: &mut Vec<ParseError>) {
1749    if let ActionUsageBody::Brace { elements } = body {
1750        for element in elements {
1751            match &element.value {
1752                ActionUsageBodyElement::Error(n) => {
1753                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1754                }
1755                ActionUsageBodyElement::ActionUsage(n) => {
1756                    collect_action_usage_body_errors(&n.value.body, errors)
1757                }
1758                _ => {}
1759            }
1760        }
1761    }
1762}
1763
1764fn collect_state_body_errors(body: &StateDefBody, errors: &mut Vec<ParseError>) {
1765    if let StateDefBody::Brace { elements } = body {
1766        for element in elements {
1767            match &element.value {
1768                StateDefBodyElement::Error(n) => {
1769                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1770                }
1771                StateDefBodyElement::Entry(n) => collect_state_body_errors(&n.value.body, errors),
1772                StateDefBodyElement::RequirementUsage(n) => {
1773                    collect_requirement_body_errors(&n.value.body, errors)
1774                }
1775                StateDefBodyElement::StateUsage(n) => {
1776                    collect_state_body_errors(&n.value.body, errors)
1777                }
1778                _ => {}
1779            }
1780        }
1781    }
1782}
1783
1784fn collect_use_case_body_errors(body: &UseCaseDefBody, errors: &mut Vec<ParseError>) {
1785    if let UseCaseDefBody::Brace { elements } = body {
1786        for element in elements {
1787            if let UseCaseDefBodyElement::Error(n) = &element.value {
1788                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1789            }
1790        }
1791    }
1792}
1793
1794fn collect_constraint_body_errors(body: &ConstraintDefBody, errors: &mut Vec<ParseError>) {
1795    if let ConstraintDefBody::Brace { elements } = body {
1796        for element in elements {
1797            if let ConstraintDefBodyElement::Error(n) = &element.value {
1798                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1799            }
1800        }
1801    }
1802}
1803
1804fn collect_calc_body_errors(body: &CalcDefBody, errors: &mut Vec<ParseError>) {
1805    if let CalcDefBody::Brace { elements } = body {
1806        for element in elements {
1807            if let CalcDefBodyElement::Error(n) = &element.value {
1808                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1809            }
1810        }
1811    }
1812}
1813
1814fn collect_view_def_body_errors(body: &ViewDefBody, errors: &mut Vec<ParseError>) {
1815    if let ViewDefBody::Brace { elements } = body {
1816        for element in elements {
1817            if let ViewDefBodyElement::Error(n) = &element.value {
1818                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1819            }
1820        }
1821    }
1822}
1823
1824fn collect_view_body_errors(body: &ViewBody, errors: &mut Vec<ParseError>) {
1825    if let ViewBody::Brace { elements } = body {
1826        for element in elements {
1827            if let ViewBodyElement::Error(n) = &element.value {
1828                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1829            }
1830        }
1831    }
1832}
1833
1834fn collect_part_def_body_errors(body: &PartDefBody, errors: &mut Vec<ParseError>) {
1835    if let PartDefBody::Brace { elements } = body {
1836        for element in elements {
1837            match &element.value {
1838                PartDefBodyElement::Error(n) => {
1839                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1840                }
1841                PartDefBodyElement::PartUsage(n) => {
1842                    collect_part_usage_body_errors(&n.value.body, errors)
1843                }
1844                PartDefBodyElement::Perform(n) => {
1845                    collect_perform_body_errors(&n.value.body, errors)
1846                }
1847                _ => {}
1848            }
1849        }
1850    }
1851}
1852
1853fn collect_perform_body_errors(body: &crate::ast::PerformBody, _errors: &mut Vec<ParseError>) {
1854    match body {
1855        crate::ast::PerformBody::Semicolon => {}
1856        crate::ast::PerformBody::Brace { .. } => {}
1857    }
1858}
1859
1860fn collect_part_usage_body_errors(body: &PartUsageBody, errors: &mut Vec<ParseError>) {
1861    if let PartUsageBody::Brace { elements } = body {
1862        for element in elements {
1863            match &element.value {
1864                PartUsageBodyElement::Error(n) => {
1865                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1866                }
1867                PartUsageBodyElement::PartUsage(n) => {
1868                    collect_part_usage_body_errors(&n.value.body, errors)
1869                }
1870                PartUsageBodyElement::Perform(n) => {
1871                    collect_perform_body_errors(&n.value.body, errors)
1872                }
1873                PartUsageBodyElement::StateUsage(n) => {
1874                    collect_state_body_errors(&n.value.body, errors)
1875                }
1876                _ => {}
1877            }
1878        }
1879    }
1880}
1881
1882fn collect_package_body_errors(body: &PackageBody, errors: &mut Vec<ParseError>) {
1883    if let PackageBody::Brace { elements } = body {
1884        for element in elements {
1885            match &element.value {
1886                PackageBodyElement::Error(n) => {
1887                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1888                }
1889                PackageBodyElement::Package(n) => {
1890                    collect_package_body_errors(&n.value.body, errors)
1891                }
1892                PackageBodyElement::LibraryPackage(n) => {
1893                    collect_package_body_errors(&n.value.body, errors)
1894                }
1895                PackageBodyElement::PartDef(n) => {
1896                    collect_part_def_body_errors(&n.value.body, errors)
1897                }
1898                PackageBodyElement::PartUsage(n) => {
1899                    collect_part_usage_body_errors(&n.value.body, errors)
1900                }
1901                PackageBodyElement::ActionDef(n) => {
1902                    collect_action_def_body_errors(&n.value.body, errors)
1903                }
1904                PackageBodyElement::ActionUsage(n) => {
1905                    collect_action_usage_body_errors(&n.value.body, errors)
1906                }
1907                PackageBodyElement::RequirementDef(n) => {
1908                    collect_requirement_body_errors(&n.value.body, errors)
1909                }
1910                PackageBodyElement::RequirementUsage(n) => {
1911                    collect_requirement_body_errors(&n.value.body, errors)
1912                }
1913                PackageBodyElement::UseCaseDef(n) => {
1914                    collect_use_case_body_errors(&n.value.body, errors)
1915                }
1916                PackageBodyElement::UseCaseUsage(n) => {
1917                    collect_use_case_body_errors(&n.value.body, errors)
1918                }
1919                PackageBodyElement::ConcernUsage(n) => {
1920                    collect_requirement_body_errors(&n.value.body, errors)
1921                }
1922                PackageBodyElement::StateDef(n) => collect_state_body_errors(&n.value.body, errors),
1923                PackageBodyElement::StateUsage(n) => {
1924                    collect_state_body_errors(&n.value.body, errors)
1925                }
1926                PackageBodyElement::ConstraintDef(n) => {
1927                    collect_constraint_body_errors(&n.value.body, errors)
1928                }
1929                PackageBodyElement::CalcDef(n) => collect_calc_body_errors(&n.value.body, errors),
1930                PackageBodyElement::ViewDef(n) => {
1931                    collect_view_def_body_errors(&n.value.body, errors)
1932                }
1933                PackageBodyElement::ViewUsage(n) => collect_view_body_errors(&n.value.body, errors),
1934                _ => {}
1935            }
1936        }
1937    }
1938}
1939
1940fn collect_implicit_attribute_in_part_def_warnings(bytes: &[u8]) -> Vec<ParseError> {
1941    let text = String::from_utf8_lossy(bytes);
1942    let mut errors = Vec::new();
1943    let mut in_part_def_body = false;
1944    let mut brace_depth = 0i32;
1945    let mut offset = 0usize;
1946    for (line_idx, line) in text.lines().enumerate() {
1947        let trimmed = line.trim();
1948        if trimmed.starts_with("part def") {
1949            in_part_def_body = false;
1950            brace_depth = 0;
1951        }
1952        if trimmed.contains('{') {
1953            if in_part_def_body || trimmed.starts_with("part def") {
1954                in_part_def_body = true;
1955            }
1956            brace_depth += trimmed.chars().filter(|&c| c == '{').count() as i32;
1957        }
1958        if trimmed.contains('}') {
1959            brace_depth -= trimmed.chars().filter(|&c| c == '}').count() as i32;
1960            if brace_depth <= 0 {
1961                in_part_def_body = false;
1962            }
1963        }
1964        if in_part_def_body && brace_depth > 0 {
1965            let skip = trimmed.starts_with("attribute")
1966                || trimmed.starts_with("part ")
1967                || trimmed.starts_with("port ")
1968                || trimmed.starts_with("interface")
1969                || trimmed.starts_with("connect")
1970                || trimmed.contains(":>")
1971                || trimmed.contains("::>")
1972                || trimmed.is_empty()
1973                || trimmed.starts_with("//")
1974                || trimmed.starts_with("/*")
1975                || trimmed.starts_with("doc ");
1976            if !skip {
1977                if let Some((code, message, expected, suggestion)) =
1978                    bare_feature_declaration_in_part_def_diagnostic(trimmed.as_bytes())
1979                {
1980                    let line_no = (line_idx + 1) as u32;
1981                    let column = line.find(trimmed).unwrap_or(0) + 1;
1982                    let line_offset = offset + line.find(trimmed).unwrap_or(0);
1983                    errors.push(
1984                        ParseError::new(message)
1985                            .with_location(line_offset, line_no, column)
1986                            .with_length(trimmed.len().max(1))
1987                            .with_code(code)
1988                            .with_expected(expected)
1989                            .with_suggestion(suggestion)
1990                            .with_severity(DiagnosticSeverity::Warning)
1991                            .with_category(DiagnosticCategory::ParseError),
1992                    );
1993                }
1994            }
1995        }
1996        offset += line.len() + 1;
1997    }
1998    errors
1999}
2000
2001fn collect_requirement_id_dialect_diagnostics(bytes: &[u8]) -> Vec<ParseError> {
2002    let pattern = b"requirement def id ";
2003    let mut errors = Vec::new();
2004    let mut search_from = 0usize;
2005    while search_from < bytes.len() {
2006        let Some(rel) = bytes[search_from..]
2007            .windows(pattern.len())
2008            .position(|window| window == pattern)
2009        else {
2010            break;
2011        };
2012        let offset = search_from + rel;
2013        let after = trim_ascii_start(&bytes[offset + pattern.len()..]);
2014        if after.first() != Some(&b'\'') && after.first() != Some(&b'"') {
2015            search_from = offset + 1;
2016            continue;
2017        }
2018        let quote = after[0];
2019        let Some(close) = after[1..].iter().position(|&b| b == quote) else {
2020            search_from = offset + 1;
2021            continue;
2022        };
2023        let req_id = String::from_utf8_lossy(&after[1..1 + close]);
2024        let (line, column) = offset_to_line_column(bytes, offset);
2025        errors.push(
2026            ParseError::new(format!(
2027                "requirement definition uses non-standard `id '{req_id}'` syntax; use a short name in angle brackets"
2028            ))
2029            .with_location(offset, line, column)
2030            .with_length(pattern.len().max(1))
2031            .with_code("invalid_requirement_short_name_syntax")
2032            .with_expected("short name in angle brackets after `requirement def`".to_string())
2033            .with_suggestion(format!(
2034                "Use `requirement def <'{req_id}'> ...` instead of `requirement def id '{req_id}' ...`."
2035            ))
2036            .with_category(DiagnosticCategory::ParseError),
2037        );
2038        search_from = offset + pattern.len();
2039    }
2040    errors
2041}
2042
2043fn offset_to_line_column(bytes: &[u8], offset: usize) -> (u32, usize) {
2044    let mut line = 1u32;
2045    let mut column = 1usize;
2046    for (idx, &b) in bytes.iter().enumerate() {
2047        if idx >= offset {
2048            break;
2049        }
2050        if b == b'\n' {
2051            line += 1;
2052            column = 1;
2053        } else {
2054            column += 1;
2055        }
2056    }
2057    (line, column)
2058}
2059
2060fn collect_recovery_errors(root: &RootNamespace) -> Vec<ParseError> {
2061    let mut errors = Vec::new();
2062    for element in &root.elements {
2063        match &element.value {
2064            crate::ast::RootElement::Package(n) => {
2065                collect_package_body_errors(&n.value.body, &mut errors)
2066            }
2067            crate::ast::RootElement::LibraryPackage(n) => {
2068                collect_package_body_errors(&n.value.body, &mut errors)
2069            }
2070            crate::ast::RootElement::Namespace(n) => {
2071                collect_package_body_errors(&n.value.body, &mut errors)
2072            }
2073            crate::ast::RootElement::Import(_) => {}
2074        }
2075    }
2076    errors
2077}
2078
2079/// Parse full input; must consume entire input. Strips UTF-8 BOM if present.
2080#[allow(clippy::result_large_err)]
2081pub fn parse_root(input: &str) -> Result<RootNamespace, ParseError> {
2082    let bytes = input
2083        .strip_prefix('\u{FEFF}')
2084        .map(str::as_bytes)
2085        .unwrap_or_else(|| input.as_bytes());
2086    let located = LocatedSpan::new(bytes);
2087    match package::root_namespace(located) {
2088        Ok((rest, root)) => {
2089            if !rest.fragment().is_empty() && has_unclosed_brace(bytes) {
2090                return Err(missing_closing_brace_error_at_eof(bytes));
2091            }
2092            if rest.fragment().is_empty() {
2093                log::debug!("parse_root: success, {} top-level elements", root.elements.len());
2094                Ok(root)
2095            } else if trim_ascii_start(rest.fragment()).starts_with(b"}") {
2096                Err(unexpected_closing_brace_parse_error(rest))
2097            } else {
2098                let offset = located.location_offset() + located.fragment().len() - rest.fragment().len();
2099                let unconsumed = rest.fragment();
2100                let first_80 = unconsumed.get(..80.min(unconsumed.len())).unwrap_or(unconsumed);
2101                log::debug!(
2102                    "parse_root: expected end of input; parsed {} elements; unconsumed len={}, offset={}, first 80 bytes: {:?}",
2103                    root.elements.len(),
2104                    unconsumed.len(),
2105                    offset,
2106                    first_80,
2107                );
2108                log::debug!(
2109                    "parse_root: unconsumed as str: {:?}",
2110                    String::from_utf8_lossy(first_80),
2111                );
2112                let (found_snippet, found_len) = fragment_to_found_snippet(rest.fragment());
2113                let mut pe = ParseError::new("expected end of input")
2114                    .with_location(offset, rest.location_line(), rest.get_column())
2115                    .with_length(found_len.max(1))
2116                    .with_code("expected_end_of_input")
2117                    .with_category(DiagnosticCategory::ParseError);
2118                if !found_snippet.is_empty() {
2119                    pe = pe.with_found(found_snippet);
2120                }
2121                if root.elements.is_empty() && is_illegal_top_level_definition(rest.fragment()) {
2122                    pe = pe
2123                        .with_code("illegal_top_level_definition")
2124                        .with_expected("'package', 'namespace', or 'import'")
2125                        .with_suggestion(
2126                            "Wrap this declaration in `package ... { ... }` or `namespace ... { ... }`.",
2127                        );
2128                    pe.message = "illegal top-level definition".to_string();
2129                }
2130                Err(pe)
2131            }
2132        }
2133        Err(nom::Err::Error(e)) => Err(missing_closing_brace_error(bytes, e.input).unwrap_or_else(|| {
2134            nom_err_to_parse_error(
2135                &e,
2136                None,
2137                Some("'package', 'namespace', or 'import' at top level; or valid element in package body"),
2138            )
2139        })),
2140        Err(nom::Err::Failure(e)) => Err(missing_closing_brace_error(bytes, e.input).unwrap_or_else(|| {
2141            nom_err_to_parse_error(
2142                &e,
2143                None,
2144                Some("'package', 'namespace', or 'import' at top level; or valid element in package body"),
2145            )
2146        })),
2147        Err(nom::Err::Incomplete(_)) => Err(
2148            ParseError::new("unexpected end of input")
2149                .with_code("unexpected_eof")
2150                .with_category(DiagnosticCategory::ParseError),
2151        ),
2152    }
2153}
2154
2155const MAX_RECOVERY_ERRORS: usize = 100;
2156
2157/// Parse input with error recovery: collects multiple diagnostics and returns a partial AST when errors occur.
2158/// Use this for language servers so the user sees all parse errors and features (e.g. hover) can use the partial AST.
2159pub fn parse_with_diagnostics(input: &str) -> ParseResult {
2160    let bytes = input
2161        .strip_prefix('\u{FEFF}')
2162        .map(str::as_bytes)
2163        .unwrap_or_else(|| input.as_bytes());
2164    let located = LocatedSpan::new(bytes);
2165
2166    let mut elements = Vec::new();
2167    let mut errors = Vec::new();
2168
2169    let (mut input, _) = match lex::ws_and_comments(located) {
2170        Ok(x) => x,
2171        Err(_) => {
2172            return ParseResult {
2173                root: RootNamespace { elements: vec![] },
2174                errors: vec![ParseError::new("invalid input")
2175                    .with_code("invalid_input")
2176                    .with_category(DiagnosticCategory::ParseError)],
2177            };
2178        }
2179    };
2180
2181    while errors.len() < MAX_RECOVERY_ERRORS {
2182        // Skip leading ws/comments; if nothing left, we're done (avoids parsing "" as root_element).
2183        let (rest, _) = lex::ws_and_comments(input).unwrap_or((input, ()));
2184        input = rest;
2185        if input.fragment().is_empty() {
2186            break;
2187        }
2188        match package::root_element(input) {
2189            Ok((rest, elem)) => {
2190                elements.push(elem);
2191                input = rest;
2192            }
2193            Err(nom::Err::Error(e)) | Err(nom::Err::Failure(e)) => {
2194                let (trimmed, _) = lex::ws_and_comments(input).unwrap_or((input, ()));
2195                if trim_ascii_start(trimmed.fragment()).starts_with(b"}") {
2196                    errors.push(unexpected_closing_brace_parse_error(trimmed));
2197                    let skip_result = lex::skip_to_next_sync_point(trimmed);
2198                    match skip_result {
2199                        Ok((rest, _)) => input = rest,
2200                        Err(_) => break,
2201                    }
2202                    continue;
2203                }
2204                if errors.is_empty()
2205                    && has_unclosed_brace(bytes)
2206                    && (lex::starts_with_keyword(trimmed.fragment(), b"package")
2207                        || lex::starts_with_keyword(trimmed.fragment(), b"namespace")
2208                        || lex::starts_with_keyword(trimmed.fragment(), b"library")
2209                        || lex::starts_with_keyword(trimmed.fragment(), b"standard"))
2210                {
2211                    errors.push(missing_closing_brace_error_at_eof(bytes));
2212                    break;
2213                }
2214                if let Some(scope) = root_body_scope(input.fragment()) {
2215                    let (error_input, _) = lex::ws_and_comments(e.input).unwrap_or((e.input, ()));
2216                    if error_input.fragment().starts_with(b"{") {
2217                        errors.push(root_body_recovery_error(error_input, scope));
2218                        match lex::skip_statement_or_block(error_input) {
2219                            Ok((rest, _))
2220                                if rest.location_offset() > error_input.location_offset() =>
2221                            {
2222                                input = rest;
2223                                continue;
2224                            }
2225                            _ => {}
2226                        }
2227                    }
2228                }
2229                let pe = missing_closing_brace_error(bytes, e.input).unwrap_or_else(|| {
2230                    nom_err_to_parse_error(&e, None, Some("'package', 'namespace', or 'import'"))
2231                });
2232                errors.push(pe);
2233                let skip_result = lex::skip_to_next_sync_point(e.input);
2234                match skip_result {
2235                    Ok((rest, _)) => input = rest,
2236                    Err(_) => break,
2237                }
2238            }
2239            Err(nom::Err::Incomplete(_)) => {
2240                errors.push(
2241                    ParseError::new("unexpected end of input")
2242                        .with_location(
2243                            input.location_offset(),
2244                            input.location_line(),
2245                            input.get_column(),
2246                        )
2247                        .with_length(1)
2248                        .with_code("unexpected_eof")
2249                        .with_category(DiagnosticCategory::ParseError),
2250                );
2251                break;
2252            }
2253        }
2254    }
2255
2256    let (input, _) = lex::ws_and_comments(input).unwrap_or((input, ()));
2257
2258    if input.fragment().is_empty()
2259        && !errors.iter().any(|e| {
2260            matches!(
2261                e.code.as_deref(),
2262                Some("missing_closing_brace") | Some("unexpected_closing_brace")
2263            )
2264        })
2265    {
2266        if let Some(err) = extra_closing_brace_at_eof(bytes) {
2267            errors.push(err);
2268        } else if has_unclosed_brace(bytes) {
2269            errors.push(missing_closing_brace_error_at_eof(bytes));
2270        }
2271    }
2272
2273    if !input.fragment().is_empty()
2274        && !errors
2275            .iter()
2276            .any(|e| e.code.as_deref() == Some("missing_closing_brace"))
2277    {
2278        if trim_ascii_start(input.fragment()).starts_with(b"}") {
2279            errors.push(unexpected_closing_brace_parse_error(input));
2280        } else {
2281            let (found_snippet, found_len) = fragment_to_found_snippet(input.fragment());
2282            let mut pe = ParseError::new("expected end of input")
2283                .with_location(
2284                    input.location_offset(),
2285                    input.location_line(),
2286                    input.get_column(),
2287                )
2288                .with_length(found_len.max(1))
2289                .with_code("expected_end_of_input")
2290                .with_severity(DiagnosticSeverity::Error)
2291                .with_category(DiagnosticCategory::ParseError);
2292            if !found_snippet.is_empty() {
2293                pe = pe.with_found(found_snippet);
2294            }
2295            errors.push(pe);
2296        }
2297    }
2298
2299    errors.extend(collect_recovery_errors(&RootNamespace {
2300        elements: elements.clone(),
2301    }));
2302    errors.extend(collect_implicit_attribute_in_part_def_warnings(bytes));
2303    errors.extend(collect_requirement_id_dialect_diagnostics(bytes));
2304    errors = suppress_redundant_closing_brace_errors(errors);
2305    errors = dedup_errors(errors);
2306    errors = suppress_diagnostic_cascades(errors);
2307
2308    ParseResult {
2309        root: RootNamespace { elements },
2310        errors,
2311    }
2312}