Skip to main content

sysml_v2_parser/parser/
mod.rs

1//! Nom-based parser for SysML v2 textual notation.
2//!
3//! Organized into modules:
4//! - [lex]: whitespace, comments, names, qualified names, skip helpers
5//! - [attribute]: attribute definition and usage
6//! - [import]: import and relationship body
7//! - [part]: part definition and part usage
8//! - [package]: package and root namespace
9
10mod action;
11mod alias;
12mod allocation;
13mod attribute;
14mod case;
15mod connection;
16mod body;
17mod constraint;
18mod definition_prefix;
19mod dependency;
20mod enumeration;
21mod expr;
22mod flow;
23mod import;
24mod individual;
25mod interface;
26mod item;
27mod lex;
28mod metadata;
29mod metadata_annotation;
30mod occurrence;
31mod package;
32mod part;
33mod port;
34mod requirement;
35mod specialization;
36mod span;
37mod state;
38mod usecase;
39mod view;
40
41pub(crate) use span::{node_from_to, span_from_to, with_span, Input};
42use crate::ast::{
43    ActionDefBody, ActionDefBodyElement, ActionUsageBody, ActionUsageBodyElement, CalcDefBody,
44    CalcDefBodyElement, ConstraintDefBody, ConstraintDefBodyElement, PackageBody,
45    PackageBodyElement, ParseErrorNode, PartDefBody, PartDefBodyElement, PartUsageBody,
46    PartUsageBodyElement, RequirementDefBody, RequirementDefBodyElement, RootNamespace,
47    StateDefBody, StateDefBodyElement, UseCaseDefBody, UseCaseDefBodyElement, ViewBody,
48    ViewBodyElement, ViewDefBody, ViewDefBodyElement,
49};
50use crate::error::{DiagnosticCategory, DiagnosticSeverity, ParseError};
51use nom::error::Error;
52use nom_locate::LocatedSpan;
53
54/// Result of parsing with error recovery: a (possibly partial) AST and zero or more diagnostics.
55#[derive(Debug, Clone)]
56pub struct ParseResult {
57    /// Root namespace; contains all successfully parsed top-level elements (partial when errors occurred).
58    pub root: RootNamespace,
59    /// All parse errors encountered (multiple when recovery is used).
60    pub errors: Vec<ParseError>,
61}
62
63impl ParseResult {
64    /// True if the document parsed fully with no errors.
65    pub fn is_ok(&self) -> bool {
66        self.errors.is_empty()
67    }
68}
69
70const FOUND_SNIPPET_MAX_LEN: usize = 40;
71const ILLEGAL_TOP_LEVEL_STARTERS: &[&[u8]] = &[
72    b"action",
73    b"actor",
74    b"alias",
75    b"allocate",
76    b"allocation",
77    b"attribute",
78    b"bind",
79    b"calc",
80    b"case",
81    b"concern",
82    b"connection",
83    b"constraint",
84    b"dependency",
85    b"enum",
86    b"flow",
87    b"interface",
88    b"item",
89    b"metadata",
90    b"occurrence",
91    b"part",
92    b"perform",
93    b"port",
94    b"ref",
95    b"require",
96    b"requirement",
97    b"satisfy",
98    b"state",
99    b"use",
100    b"verification",
101    b"view",
102    b"viewpoint",
103];
104
105/// Take a short snippet from the input at the error position for "found" display.
106/// Uses first line or first FOUND_SNIPPET_MAX_LEN bytes, UTF-8 with replacement char.
107fn fragment_to_found_snippet(fragment: &[u8]) -> (String, usize) {
108    let take = fragment
109        .iter()
110        .position(|&b| b == b'\n' || b == b'\r')
111        .map(|p| p.min(FOUND_SNIPPET_MAX_LEN))
112        .unwrap_or_else(|| fragment.len().min(FOUND_SNIPPET_MAX_LEN));
113    let slice = fragment.get(..take).unwrap_or(fragment);
114    let s = String::from_utf8_lossy(slice)
115        .replace('\n', "\\n")
116        .replace('\r', "\\r");
117    let len = slice.len();
118    (s.trim_end().to_string(), len)
119}
120
121pub(crate) fn recovery_found_snippet(input: Input<'_>) -> Option<String> {
122    let frag = input.fragment();
123    let take = frag
124        .iter()
125        .position(|&b| b == b'\n' || b == b'\r')
126        .unwrap_or(frag.len())
127        .min(60);
128    let snippet = String::from_utf8_lossy(&frag[..take]).trim().to_string();
129    if snippet.is_empty() {
130        None
131    } else {
132        Some(snippet)
133    }
134}
135
136fn recovery_found_snippet_from_span(input: Input<'_>, recovery_end: Input<'_>) -> Option<String> {
137    let consumed_len = recovery_end
138        .location_offset()
139        .saturating_sub(input.location_offset())
140        .min(input.fragment().len());
141    if consumed_len == 0 {
142        return recovery_found_snippet(input);
143    }
144    let frag = &input.fragment()[..consumed_len];
145    let take = frag
146        .iter()
147        .position(|&b| b == b'\n' || b == b'\r')
148        .unwrap_or(frag.len())
149        .min(60);
150    let snippet = String::from_utf8_lossy(&frag[..take]).trim().to_string();
151    if snippet.is_empty() {
152        recovery_found_snippet(input)
153    } else {
154        Some(snippet)
155    }
156}
157
158/// Map nom error kind to a human-readable message for language server diagnostics.
159fn nom_error_kind_to_message(code: &nom::error::ErrorKind) -> &'static str {
160    use nom::error::ErrorKind;
161    match code {
162        ErrorKind::Tag => "expected keyword or token",
163        ErrorKind::Digit => "expected number",
164        ErrorKind::Alpha => "expected identifier",
165        ErrorKind::AlphaNumeric => "expected identifier",
166        ErrorKind::Space => "expected whitespace",
167        ErrorKind::MultiSpace => "expected whitespace",
168        ErrorKind::Eof => "unexpected end of input",
169        ErrorKind::TakeUntil => "expected terminator",
170        ErrorKind::TakeWhile1 => "expected token",
171        ErrorKind::Alt => {
172            "expected package, import, part, port, interface, alias, attribute, or action"
173        }
174        ErrorKind::Many0 | ErrorKind::Many1 => "expected list of elements",
175        _ => "parse error",
176    }
177}
178
179/// Map nom error kind to a specific code for LSP/quick fixes.
180fn nom_error_kind_to_code(code: &nom::error::ErrorKind) -> &'static str {
181    use nom::error::ErrorKind;
182    match code {
183        ErrorKind::Tag => "expected_keyword",
184        ErrorKind::Digit => "expected_number",
185        ErrorKind::Alpha | ErrorKind::AlphaNumeric => "expected_identifier",
186        ErrorKind::Space | ErrorKind::MultiSpace => "expected_whitespace",
187        ErrorKind::Eof => "unexpected_eof",
188        ErrorKind::TakeUntil => "expected_terminator",
189        ErrorKind::TakeWhile1 => "expected_token",
190        ErrorKind::Alt => "expected_alt",
191        ErrorKind::Many0 | ErrorKind::Many1 => "expected_list",
192        _ => "parse_error",
193    }
194}
195
196fn nom_err_to_parse_error(
197    e: &Error<Input<'_>>,
198    length_override: Option<usize>,
199    expected_context: Option<&'static str>,
200) -> ParseError {
201    let offset = e.input.location_offset();
202    let line = e.input.location_line();
203    let column = e.input.get_column();
204    let fragment = e.input.fragment();
205    let (found_snippet, found_len) = fragment_to_found_snippet(fragment);
206    let message = nom_error_kind_to_message(&e.code).to_string();
207    let span_len = length_override.unwrap_or(found_len).max(1);
208    if trim_ascii_start(fragment).starts_with(b"}") {
209        return unexpected_closing_brace_parse_error(e.input);
210    }
211    let mut pe = ParseError::new(message)
212        .with_location(offset, line, column)
213        .with_length(span_len)
214        .with_code(nom_error_kind_to_code(&e.code))
215        .with_severity(DiagnosticSeverity::Error)
216        .with_category(DiagnosticCategory::ParseError);
217    if !found_snippet.is_empty() {
218        pe = pe.with_found(found_snippet);
219    }
220    if let Some(ctx) = expected_context {
221        pe = pe.with_expected(ctx);
222    }
223    let at_root = expected_context.is_some_and(|ctx| {
224        ctx.contains("'package', 'namespace', or 'import'") || ctx.contains("top level")
225    });
226    if at_root && is_illegal_top_level_definition(fragment) {
227        pe.message = "illegal top-level definition".to_string();
228        pe.code = Some("illegal_top_level_definition".to_string());
229        pe.expected = Some("'package', 'namespace', or 'import'".to_string());
230        pe.suggestion = Some(
231            "Wrap this declaration in `package ... { ... }` or `namespace ... { ... }`."
232                .to_string(),
233        );
234    }
235    pe
236}
237
238fn is_illegal_top_level_definition(fragment: &[u8]) -> bool {
239    let trimmed = trim_ascii_start(fragment);
240    !trimmed.starts_with(b"}")
241        && !trimmed.starts_with(b"//")
242        && !trimmed.starts_with(b"/*")
243        && lex::starts_with_any_keyword(trimmed, ILLEGAL_TOP_LEVEL_STARTERS)
244}
245
246fn trim_ascii_start(mut fragment: &[u8]) -> &[u8] {
247    while let Some(first) = fragment.first() {
248        if first.is_ascii_whitespace() {
249            fragment = &fragment[1..];
250            continue;
251        }
252        break;
253    }
254    fragment
255}
256
257fn starts_with_missing_name_after_keyword(
258    fragment: &[u8],
259    keyword: &[u8],
260    trailing_keywords: &[&[u8]],
261) -> bool {
262    let mut fragment = trim_ascii_start(fragment);
263    if !lex::starts_with_keyword(fragment, keyword) {
264        return false;
265    }
266    fragment = &fragment[keyword.len()..];
267    while let Some(first) = fragment.first() {
268        if first.is_ascii_whitespace() {
269            fragment = &fragment[1..];
270            continue;
271        }
272        break;
273    }
274    for trailing in trailing_keywords {
275        if lex::starts_with_keyword(fragment, trailing) {
276            fragment = &fragment[trailing.len()..];
277            while let Some(first) = fragment.first() {
278                if first.is_ascii_whitespace() {
279                    fragment = &fragment[1..];
280                    continue;
281                }
282                break;
283            }
284        }
285    }
286    fragment.starts_with(b":")
287        && !lex::starts_with_keyword(fragment, b":>>")
288        && !lex::starts_with_keyword(fragment, b":>")
289        && !lex::starts_with_keyword(fragment, b"::")
290}
291
292fn starts_with_missing_type_after_keyword(
293    fragment: &[u8],
294    keyword: &[u8],
295    trailing_keywords: &[&[u8]],
296) -> bool {
297    let mut fragment = trim_ascii_start(fragment);
298    if !lex::starts_with_keyword(fragment, keyword) {
299        return false;
300    }
301    fragment = &fragment[keyword.len()..];
302    while let Some(first) = fragment.first() {
303        if first.is_ascii_whitespace() {
304            fragment = &fragment[1..];
305            continue;
306        }
307        break;
308    }
309    for trailing in trailing_keywords {
310        if lex::starts_with_keyword(fragment, trailing) {
311            fragment = &fragment[trailing.len()..];
312            while let Some(first) = fragment.first() {
313                if first.is_ascii_whitespace() {
314                    fragment = &fragment[1..];
315                    continue;
316                }
317                break;
318            }
319        }
320    }
321
322    let mut name_len = 0usize;
323    while name_len < fragment.len()
324        && (fragment[name_len].is_ascii_alphanumeric() || fragment[name_len] == b'_')
325    {
326        name_len += 1;
327    }
328    if name_len == 0 {
329        return false;
330    }
331    fragment = &fragment[name_len..];
332    while let Some(first) = fragment.first() {
333        if first.is_ascii_whitespace() {
334            fragment = &fragment[1..];
335            continue;
336        }
337        break;
338    }
339    if !fragment.starts_with(b":") {
340        return false;
341    }
342    fragment = &fragment[1..];
343    while let Some(first) = fragment.first() {
344        if first.is_ascii_whitespace() {
345            fragment = &fragment[1..];
346            continue;
347        }
348        break;
349    }
350
351    fragment.is_empty()
352        || fragment.starts_with(b";")
353        || fragment.starts_with(b"{")
354        || fragment.starts_with(b"}")
355        || lex::starts_with_keyword(fragment, b"then")
356        || lex::starts_with_keyword(fragment, b"if")
357        || lex::starts_with_keyword(fragment, b"do")
358}
359
360fn missing_name_diagnostic(fragment: &[u8]) -> Option<(&'static str, String, String, String)> {
361    #[allow(clippy::type_complexity)]
362    let cases: &[(&[u8], &[&[u8]], &str, &str)] = &[
363        (
364            b"subject",
365            &[],
366            "subject name",
367            "Use `subject laptop: Laptop;`.",
368        ),
369        (b"actor", &[], "actor name", "Use `actor user: User;`."),
370        (b"state", &[], "state name", "Use `state ready: Mode;`."),
371        (b"part", &[], "part name", "Use `part wheel: Wheel;`."),
372        (b"ref", &[], "reference name", "Use `ref sensor: Sensor;`."),
373        (b"port", &[], "port name", "Use `port power: PowerPort;`."),
374        (
375            b"attribute",
376            &[],
377            "attribute name",
378            "Use `attribute mass: MassValue;`.",
379        ),
380        (b"in", &[], "input name", "Use `in speed: Real;`."),
381        (b"out", &[], "output name", "Use `out result: Real;`."),
382        (
383            b"perform",
384            &[b"action"],
385            "action name",
386            "Use `perform action run: Runner;`.",
387        ),
388        (b"return", &[], "return name", "Use `return result: Real;`."),
389    ];
390
391    for (keyword, trailing, missing_what, suggestion) in cases {
392        if starts_with_missing_name_after_keyword(fragment, keyword, trailing) {
393            return Some((
394                "missing_member_name",
395                format!("expected {missing_what} before ':'"),
396                format!("{missing_what} before ':'"),
397                suggestion.to_string(),
398            ));
399        }
400    }
401    None
402}
403
404fn missing_type_diagnostic(fragment: &[u8]) -> Option<(&'static str, String, String, String)> {
405    #[allow(clippy::type_complexity)]
406    let cases: &[(&[u8], &[&[u8]], &str)] = &[
407        (b"subject", &[], "subject type"),
408        (b"actor", &[], "actor type"),
409        (b"state", &[], "state type"),
410        (b"part", &[], "part type"),
411        (b"ref", &[], "reference type"),
412        (b"port", &[], "port type"),
413        (b"attribute", &[], "attribute type"),
414        (b"in", &[], "input type"),
415        (b"out", &[], "output type"),
416        (b"perform", &[b"action"], "action type"),
417        (b"return", &[], "return type"),
418    ];
419
420    for &(keyword, trailing, missing_what) in cases {
421        if starts_with_missing_type_after_keyword(fragment, keyword, trailing) {
422            let keyword_label = String::from_utf8_lossy(keyword);
423            let sample_name = if keyword == &b"subject"[..] {
424                "laptop"
425            } else if keyword == &b"actor"[..] {
426                "user"
427            } else if keyword == &b"state"[..] {
428                "ready"
429            } else if keyword == &b"part"[..] {
430                "wheel"
431            } else if keyword == &b"ref"[..] {
432                "sensor"
433            } else if keyword == &b"port"[..] {
434                "power"
435            } else if keyword == &b"attribute"[..] {
436                "mass"
437            } else if keyword == &b"in"[..] {
438                "speed"
439            } else if keyword == &b"out"[..] {
440                "result"
441            } else if keyword == &b"perform"[..] {
442                "run"
443            } else if keyword == &b"return"[..] {
444                "result"
445            } else {
446                "member"
447            };
448            let sample_type = if keyword == &b"subject"[..] {
449                "Laptop"
450            } else if keyword == &b"actor"[..] {
451                "User"
452            } else if keyword == &b"state"[..] {
453                "Mode"
454            } else if keyword == &b"part"[..] {
455                "Wheel"
456            } else if keyword == &b"ref"[..] {
457                "Sensor"
458            } else if keyword == &b"port"[..] {
459                "PowerPort"
460            } else if keyword == &b"attribute"[..] {
461                "MassValue"
462            } else if keyword == &b"in"[..] || keyword == &b"out"[..] {
463                "Real"
464            } else if keyword == &b"perform"[..] {
465                "Runner"
466            } else if keyword == &b"return"[..] {
467                "Real"
468            } else {
469                "Type"
470            };
471            let suggestion = if keyword == &b"perform"[..] {
472                format!("Use `perform action {sample_name}: {sample_type};`.")
473            } else if keyword == &b"return"[..] {
474                format!("Use `return {sample_name}: {sample_type};`.")
475            } else {
476                format!("Use `{keyword_label} {sample_name}: {sample_type};`.")
477            };
478            return Some((
479                "missing_type_reference",
480                format!("expected {missing_what} after ':'"),
481                format!("{missing_what} after ':'"),
482                suggestion,
483            ));
484        }
485    }
486    None
487}
488
489fn invalid_expose_separator_diagnostic(
490    fragment: &[u8],
491) -> Option<(&'static str, String, String, String)> {
492    let mut fragment = trim_ascii_start(fragment);
493    if !lex::starts_with_keyword(fragment, b"expose") {
494        return None;
495    }
496    fragment = &fragment[b"expose".len()..];
497    while let Some(first) = fragment.first() {
498        if first.is_ascii_whitespace() {
499            fragment = &fragment[1..];
500            continue;
501        }
502        break;
503    }
504    if fragment.is_empty() {
505        return None;
506    }
507
508    let mut saw_dot = false;
509    let mut in_quoted_name = false;
510    for &b in fragment {
511        if b == b'\'' {
512            in_quoted_name = !in_quoted_name;
513            continue;
514        }
515        if in_quoted_name {
516            continue;
517        }
518        if matches!(b, b';' | b'[' | b'{' | b'}' | b'\n' | b'\r') {
519            break;
520        }
521        if b == b'.' {
522            saw_dot = true;
523            break;
524        }
525    }
526    if !saw_dot {
527        return None;
528    }
529
530    Some((
531        "invalid_qualified_name_separator",
532        "invalid qualified name in expose target: use '::' instead of '.'".to_string(),
533        "qualified name segments separated by '::'".to_string(),
534        "Replace '.' with '::' in the expose target (example: `expose A::B;`).".to_string(),
535    ))
536}
537
538fn missing_semicolon_or_body_diagnostic(
539    fragment: &[u8],
540) -> Option<(&'static str, String, String, String)> {
541    let fragment = trim_ascii_start(fragment);
542    let cases: &[(&[u8], &str, &str)] = &[
543        (
544            b"action def",
545            "action definition",
546            "Use `action def Run;` or `action def Run { ... }`.",
547        ),
548        (
549            b"part def",
550            "part definition",
551            "Use `part def Wheel;` or `part def Wheel { ... }`.",
552        ),
553        (
554            b"requirement def",
555            "requirement definition",
556            "Use `requirement def R;` or `requirement def R { ... }`.",
557        ),
558        (
559            b"state def",
560            "state definition",
561            "Use `state def Ready;` or `state def Ready { ... }`.",
562        ),
563        (
564            b"view",
565            "view declaration",
566            "Use `view structure: GeneralView;` or `view structure: GeneralView { ... }`.",
567        ),
568        (
569            b"rendering def",
570            "rendering definition",
571            "Use `rendering def Diagram;` or `rendering def Diagram { ... }`.",
572        ),
573    ];
574
575    for (prefix, label, suggestion) in cases {
576        if fragment.starts_with(prefix) {
577            return Some((
578                "missing_body_or_semicolon",
579                format!("expected ';' or '{{' after {label} header"),
580                "';' or '{' after declaration header".to_string(),
581                suggestion.to_string(),
582            ));
583        }
584    }
585    None
586}
587
588fn invalid_typing_operator_diagnostic(
589    fragment: &[u8],
590) -> Option<(&'static str, String, String, String)> {
591    let fragment = trim_ascii_start(fragment);
592    let cases: &[(&[u8], &str, &str)] = &[
593        (
594            b"part def",
595            "part definition specialization",
596            "Use `part def Vehicle :> BaseVehicle;` when specializing a definition.",
597        ),
598        (
599            b"port def",
600            "port definition specialization",
601            "Use `port def PowerPort :> BasePort;` when specializing a definition.",
602        ),
603    ];
604
605    for (prefix, label, suggestion) in cases {
606        if fragment.starts_with(prefix) && fragment.windows(3).any(|w| w == b": ") {
607            return Some((
608                "invalid_typing_operator",
609                format!("invalid typing operator in {label}: use ':>' instead of ':'"),
610                "':>' specialization operator".to_string(),
611                suggestion.to_string(),
612            ));
613        }
614    }
615
616    if fragment.starts_with(b"part def")
617        && fragment.contains(&b':')
618        && !fragment.windows(2).any(|w| w == b":>")
619    {
620        return Some((
621            "invalid_typing_operator",
622            "invalid typing operator in part definition: use ':>' instead of ':'".to_string(),
623            "':>' specialization operator".to_string(),
624            "Use `part def Vehicle :> BaseVehicle;` when specializing a definition.".to_string(),
625        ));
626    }
627
628    None
629}
630
631fn missing_expression_after_operator_diagnostic(
632    fragment: &[u8],
633) -> Option<(&'static str, String, String, String)> {
634    let fragment = trim_ascii_start(fragment);
635    let cases: &[(&[u8], &str, &str)] = &[
636        (
637            b"bind",
638            "binding expression after '='",
639            "Use `bind x = y;`.",
640        ),
641        (
642            b"assign",
643            "assignment expression after ':='",
644            "Use `assign x := y;`.",
645        ),
646        (
647            b"first",
648            "target after 'then'",
649            "Use `first start then finish;`.",
650        ),
651        (
652            b"flow",
653            "target after 'to'",
654            "Use `flow source to target;`.",
655        ),
656        (
657            b"satisfy",
658            "target after 'by'",
659            "Use `satisfy Req by implementation;`.",
660        ),
661    ];
662
663    for (keyword, expected, suggestion) in cases {
664        if !lex::starts_with_keyword(fragment, keyword) {
665            continue;
666        }
667        let text = String::from_utf8_lossy(fragment);
668        if text.contains("= ;") || text.trim_end().ends_with('=') {
669            return Some((
670                "missing_expression_after_operator",
671                "expected expression after '='".to_string(),
672                expected.to_string(),
673                suggestion.to_string(),
674            ));
675        }
676        if text.contains(":= ;") || text.trim_end().ends_with(":=") {
677            return Some((
678                "missing_expression_after_operator",
679                "expected expression after ':='".to_string(),
680                expected.to_string(),
681                suggestion.to_string(),
682            ));
683        }
684        if text.contains(" then ;") || text.trim_end().ends_with(" then") {
685            return Some((
686                "missing_expression_after_operator",
687                "expected target after 'then'".to_string(),
688                expected.to_string(),
689                suggestion.to_string(),
690            ));
691        }
692        if text.contains(" to ;") || text.trim_end().ends_with(" to") {
693            return Some((
694                "missing_expression_after_operator",
695                "expected target after 'to'".to_string(),
696                expected.to_string(),
697                suggestion.to_string(),
698            ));
699        }
700        if text.contains(" by ;") || text.trim_end().ends_with(" by") {
701            return Some((
702                "missing_expression_after_operator",
703                "expected target after 'by'".to_string(),
704                expected.to_string(),
705                suggestion.to_string(),
706            ));
707        }
708    }
709    None
710}
711
712fn invalid_unit_reference_diagnostic(
713    fragment: &[u8],
714) -> Option<(&'static str, String, String, String)> {
715    let fragment = trim_ascii_start(fragment);
716    let text = String::from_utf8_lossy(fragment);
717    if !(text.contains('[') && text.contains(']')) {
718        return None;
719    }
720
721    if text.contains("[]") || text.contains("[ ]") {
722        return Some((
723            "invalid_unit_reference",
724            "expected unit name inside '[ ]'".to_string(),
725            "unit name inside '[ ]'".to_string(),
726            "Use a concrete unit such as `1750 [kg]`.".to_string(),
727        ));
728    }
729
730    if text.contains("[;")
731        || text.contains("[ ;")
732        || text.contains("[)")
733        || text.contains("[ ]")
734        || text.contains("[,")
735    {
736        return Some((
737            "invalid_unit_reference",
738            "invalid unit expression inside '[ ]'".to_string(),
739            "unit name inside '[ ]'".to_string(),
740            "Use a unit symbol or qualified unit name (example: `[kg]` or `[SI::kg]`).".to_string(),
741        ));
742    }
743
744    None
745}
746
747fn unexpected_keyword_in_scope_diagnostic(
748    fragment: &[u8],
749    starters: &[&[u8]],
750    scope_label: &str,
751) -> Option<(&'static str, String, String, String)> {
752    let fragment = trim_ascii_start(fragment);
753    if fragment.is_empty() || fragment.starts_with(b"#") || fragment.starts_with(b"@") {
754        return None;
755    }
756    let keyword_end = fragment
757        .iter()
758        .position(|b| !b.is_ascii_alphanumeric() && *b != b'_')
759        .unwrap_or(fragment.len());
760    if keyword_end == 0 {
761        return None;
762    }
763    let keyword = &fragment[..keyword_end];
764    if lex::starts_with_any_keyword(keyword, starters) {
765        return None;
766    }
767    let keyword_text = String::from_utf8_lossy(keyword);
768    Some((
769        "unexpected_keyword_in_scope",
770        format!("unexpected keyword `{keyword_text}` in {scope_label}"),
771        format!("valid {scope_label} element"),
772        format!("Replace `{keyword_text}` with a valid {scope_label} member or remove it."),
773    ))
774}
775
776fn invalid_bare_identifier_in_body_diagnostic(
777    fragment: &[u8],
778    scope_label: &str,
779) -> Option<(&'static str, String, String, String)> {
780    let is_action = scope_label.contains("action body");
781    let is_state = scope_label.contains("state body");
782    if !is_action && !is_state {
783        return None;
784    }
785
786    let fragment = trim_ascii_start(fragment);
787    let ident_end = fragment
788        .iter()
789        .position(|b| !b.is_ascii_alphanumeric() && *b != b'_')
790        .unwrap_or(fragment.len());
791    if ident_end == 0 || !fragment[0].is_ascii_alphabetic() {
792        return None;
793    }
794
795    let ident = &fragment[..ident_end];
796    let rest = trim_ascii_start(&fragment[ident_end..]);
797    if !(rest.starts_with(b";")
798        || rest.starts_with(b"}")
799        || rest.starts_with(b"\n")
800        || rest.starts_with(b"\r"))
801    {
802        return None;
803    }
804
805    let ident_text = String::from_utf8_lossy(ident);
806    if is_action {
807        Some((
808            "invalid_bare_identifier_in_action_body",
809            format!("bare identifier `{ident_text}` is not a valid action body member"),
810            "action body member such as `perform`, `bind`, `in`, or `out`".to_string(),
811            format!(
812                "Use an explicit action-body form, for example `perform {ident_text};`, `bind ... = ...;`, or an `in`/`out` parameter declaration."
813            ),
814        ))
815    } else {
816        Some((
817            "invalid_bare_identifier_in_state_body",
818            format!("bare identifier `{ident_text}` is not a valid state body member"),
819            "state body member such as `entry`, `transition`, `then`, `state`, or `ref`"
820                .to_string(),
821            format!(
822                "Use an explicit state-body form, for example `then {ident_text};`, `transition ...;`, or a nested `state` member."
823            ),
824        ))
825    }
826}
827
828fn unexpected_closing_brace_parse_error(input: Input<'_>) -> ParseError {
829    ParseError::new("unexpected closing '}'")
830        .with_location(
831            input.location_offset(),
832            input.location_line(),
833            input.get_column(),
834        )
835        .with_length(1)
836        .with_code("unexpected_closing_brace")
837        .with_expected("valid declaration or end of current body")
838        .with_found("}")
839        .with_suggestion("Remove this '}' or add the missing opening '{' before it.")
840        .with_severity(DiagnosticSeverity::Error)
841        .with_category(DiagnosticCategory::ParseError)
842}
843
844fn missing_closing_brace_error(bytes: &[u8], input: Input<'_>) -> Option<ParseError> {
845    if !input.fragment().is_empty() {
846        return None;
847    }
848    let consumed = &bytes[..input.location_offset().min(bytes.len())];
849    let opens = consumed.iter().filter(|&&b| b == b'{').count();
850    let closes = consumed.iter().filter(|&&b| b == b'}').count();
851    if opens <= closes {
852        return None;
853    }
854    Some(missing_closing_brace_error_at_eof(consumed))
855}
856
857fn missing_closing_brace_error_at_eof(bytes: &[u8]) -> ParseError {
858    let (line, column) = eof_line_column(bytes);
859    ParseError::new("missing closing '}'")
860        .with_location(bytes.len(), line, column)
861        .with_length(1)
862        .with_code("missing_closing_brace")
863        .with_expected("'}'")
864        .with_suggestion("Add '}' to close the open body.")
865        .with_category(DiagnosticCategory::ParseError)
866}
867
868fn category_from_code(code: &str) -> DiagnosticCategory {
869    if code == "unsupported_annotation_syntax" {
870        DiagnosticCategory::UnsupportedGrammarForm
871    } else if code == "unresolved_symbol" {
872        DiagnosticCategory::UnresolvedSymbol
873    } else {
874        DiagnosticCategory::ParseError
875    }
876}
877
878fn has_unclosed_brace(bytes: &[u8]) -> bool {
879    let opens = bytes.iter().filter(|&&b| b == b'{').count();
880    let closes = bytes.iter().filter(|&&b| b == b'}').count();
881    opens > closes
882}
883
884fn eof_line_column(bytes: &[u8]) -> (u32, usize) {
885    let mut line = 1u32;
886    let mut column = 1usize;
887    for &b in bytes {
888        if b == b'\n' {
889            line += 1;
890            column = 1;
891        } else {
892            column += 1;
893        }
894    }
895    (line, column)
896}
897
898pub(crate) fn build_recovery_error_node(
899    input: Input<'_>,
900    starters: &[&[u8]],
901    scope_label: &str,
902    generic_code: &str,
903) -> ParseErrorNode {
904    build_recovery_error_node_from_span(input, input, starters, scope_label, generic_code)
905}
906
907enum RecoveryClassification {
908    MissingMemberName {
909        code: String,
910        message: String,
911        expected: String,
912        suggestion: String,
913    },
914    MissingTypeReference {
915        code: String,
916        message: String,
917        expected: String,
918        suggestion: String,
919    },
920    InvalidQualifiedNameSeparator {
921        code: String,
922        message: String,
923        expected: String,
924        suggestion: String,
925    },
926    MissingBodyOrSemicolon {
927        code: String,
928        message: String,
929        expected: String,
930        suggestion: String,
931    },
932    MissingExpressionAfterOperator {
933        code: String,
934        message: String,
935        expected: String,
936        suggestion: String,
937    },
938    InvalidUnitReference {
939        code: String,
940        message: String,
941        expected: String,
942        suggestion: String,
943    },
944    InvalidTypingOperator {
945        code: String,
946        message: String,
947        expected: String,
948        suggestion: String,
949    },
950    InvalidBareIdentifierInBody {
951        code: String,
952        message: String,
953        expected: String,
954        suggestion: String,
955    },
956    UnexpectedKeywordInScope {
957        code: String,
958        message: String,
959        expected: String,
960        suggestion: String,
961    },
962    MissingSemicolon,
963    UnsupportedAnnotation,
964    Unexpected,
965}
966
967fn trim_ascii_end(mut fragment: &[u8]) -> &[u8] {
968    while let Some(last) = fragment.last() {
969        if last.is_ascii_whitespace() {
970            fragment = &fragment[..fragment.len() - 1];
971        } else {
972            break;
973        }
974    }
975    fragment
976}
977
978fn classify_recovery(
979    input: Input<'_>,
980    recovery_end: Input<'_>,
981    starters: &[&[u8]],
982    scope_label: &str,
983) -> RecoveryClassification {
984    let trimmed = trim_ascii_start(input.fragment());
985
986    if let Some((code, message, expected, suggestion)) = missing_name_diagnostic(trimmed) {
987        return RecoveryClassification::MissingMemberName {
988            code: code.to_string(),
989            message,
990            expected,
991            suggestion,
992        };
993    }
994
995    if let Some((code, message, expected, suggestion)) = missing_type_diagnostic(trimmed) {
996        return RecoveryClassification::MissingTypeReference {
997            code: code.to_string(),
998            message,
999            expected,
1000            suggestion,
1001        };
1002    }
1003
1004    if let Some((code, message, expected, suggestion)) =
1005        invalid_expose_separator_diagnostic(trimmed)
1006    {
1007        return RecoveryClassification::InvalidQualifiedNameSeparator {
1008            code: code.to_string(),
1009            message,
1010            expected,
1011            suggestion,
1012        };
1013    }
1014
1015    if let Some((code, message, expected, suggestion)) = invalid_typing_operator_diagnostic(trimmed)
1016    {
1017        return RecoveryClassification::InvalidTypingOperator {
1018            code: code.to_string(),
1019            message,
1020            expected,
1021            suggestion,
1022        };
1023    }
1024
1025    if let Some((code, message, expected, suggestion)) =
1026        missing_expression_after_operator_diagnostic(trimmed)
1027    {
1028        return RecoveryClassification::MissingExpressionAfterOperator {
1029            code: code.to_string(),
1030            message,
1031            expected,
1032            suggestion,
1033        };
1034    }
1035
1036    if let Some((code, message, expected, suggestion)) = invalid_unit_reference_diagnostic(trimmed)
1037    {
1038        return RecoveryClassification::InvalidUnitReference {
1039            code: code.to_string(),
1040            message,
1041            expected,
1042            suggestion,
1043        };
1044    }
1045
1046    if let Some((code, message, expected, suggestion)) =
1047        missing_semicolon_or_body_diagnostic(trimmed)
1048    {
1049        return RecoveryClassification::MissingBodyOrSemicolon {
1050            code: code.to_string(),
1051            message,
1052            expected,
1053            suggestion,
1054        };
1055    }
1056
1057    let consumed_len = recovery_end
1058        .location_offset()
1059        .saturating_sub(input.location_offset())
1060        .min(input.fragment().len());
1061    let raw_consumed = &input.fragment()[..consumed_len];
1062    let consumed = trim_ascii_end(raw_consumed);
1063    let recovered_to_boundary = recovery_end.location_offset() > input.location_offset() && {
1064        let (next, _) = lex::ws_and_comments(recovery_end).unwrap_or((recovery_end, ()));
1065        next.fragment().is_empty()
1066            || next.fragment().starts_with(b"}")
1067            || lex::starts_with_any_keyword(next.fragment(), starters)
1068    };
1069
1070    let consumed_has_newline = raw_consumed.contains(&b'\n') || raw_consumed.contains(&b'\r');
1071    let first_line_end = consumed
1072        .iter()
1073        .position(|b| matches!(*b, b'\n' | b'\r'))
1074        .unwrap_or(consumed.len());
1075    let first_line = trim_ascii_end(&consumed[..first_line_end]);
1076    let consumed_has_delimiters = consumed
1077        .iter()
1078        .any(|b| matches!(*b, b'{' | b'}' | b'(' | b')' | b'[' | b']'));
1079    let consumed_ends_incomplete = first_line.last().is_some_and(|b| {
1080        matches!(
1081            *b,
1082            b':' | b'=' | b',' | b'.' | b'+' | b'-' | b'*' | b'/' | b'>' | b'<' | b'|'
1083        )
1084    });
1085    let first_line_has_semicolon = first_line.contains(&b';');
1086    if recovered_to_boundary
1087        && lex::starts_with_any_keyword(trimmed, starters)
1088        && (consumed_has_newline || recovery_end.fragment().starts_with(b"}"))
1089        && !consumed.is_empty()
1090        && !consumed_has_delimiters
1091        && !consumed_ends_incomplete
1092        && !first_line_has_semicolon
1093    {
1094        return RecoveryClassification::MissingSemicolon;
1095    }
1096
1097    if lex::starts_with_keyword(trimmed, b"#") || lex::starts_with_keyword(trimmed, b"@") {
1098        return RecoveryClassification::UnsupportedAnnotation;
1099    }
1100
1101    if let Some((code, message, expected, suggestion)) =
1102        invalid_bare_identifier_in_body_diagnostic(trimmed, scope_label)
1103    {
1104        return RecoveryClassification::InvalidBareIdentifierInBody {
1105            code: code.to_string(),
1106            message,
1107            expected,
1108            suggestion,
1109        };
1110    }
1111
1112    if let Some((code, message, expected, suggestion)) =
1113        unexpected_keyword_in_scope_diagnostic(trimmed, starters, scope_label)
1114    {
1115        return RecoveryClassification::UnexpectedKeywordInScope {
1116            code: code.to_string(),
1117            message,
1118            expected,
1119            suggestion,
1120        };
1121    }
1122
1123    RecoveryClassification::Unexpected
1124}
1125
1126pub(crate) fn build_recovery_error_node_from_span(
1127    input: Input<'_>,
1128    recovery_end: Input<'_>,
1129    starters: &[&[u8]],
1130    scope_label: &str,
1131    generic_code: &str,
1132) -> ParseErrorNode {
1133    match classify_recovery(input, recovery_end, starters, scope_label) {
1134        RecoveryClassification::MissingMemberName {
1135            code,
1136            message,
1137            expected,
1138            suggestion,
1139        }
1140        | RecoveryClassification::MissingTypeReference {
1141            code,
1142            message,
1143            expected,
1144            suggestion,
1145        }
1146        | RecoveryClassification::InvalidQualifiedNameSeparator {
1147            code,
1148            message,
1149            expected,
1150            suggestion,
1151        }
1152        | RecoveryClassification::MissingBodyOrSemicolon {
1153            code,
1154            message,
1155            expected,
1156            suggestion,
1157        }
1158        | RecoveryClassification::MissingExpressionAfterOperator {
1159            code,
1160            message,
1161            expected,
1162            suggestion,
1163        }
1164        | RecoveryClassification::InvalidUnitReference {
1165            code,
1166            message,
1167            expected,
1168            suggestion,
1169        }
1170        | RecoveryClassification::InvalidTypingOperator {
1171            code,
1172            message,
1173            expected,
1174            suggestion,
1175        }
1176        | RecoveryClassification::InvalidBareIdentifierInBody {
1177            code,
1178            message,
1179            expected,
1180            suggestion,
1181        }
1182        | RecoveryClassification::UnexpectedKeywordInScope {
1183            code,
1184            message,
1185            expected,
1186            suggestion,
1187        } => ParseErrorNode {
1188            message,
1189            code,
1190            expected: Some(expected),
1191            found: recovery_found_snippet_from_span(input, recovery_end),
1192            suggestion: Some(suggestion),
1193            category: Some(DiagnosticCategory::ParseError),
1194        },
1195        RecoveryClassification::MissingSemicolon => ParseErrorNode {
1196            message: "missing semicolon before next declaration".to_string(),
1197            code: "missing_semicolon".to_string(),
1198            expected: Some("';'".to_string()),
1199            found: recovery_found_snippet_from_span(input, recovery_end),
1200            suggestion: Some("Insert ';' before this declaration.".to_string()),
1201            category: Some(DiagnosticCategory::ParseError),
1202        },
1203        RecoveryClassification::UnsupportedAnnotation => ParseErrorNode {
1204            message: format!("unsupported annotation syntax in {scope_label}"),
1205            code: "unsupported_annotation_syntax".to_string(),
1206            expected: Some(format!("valid {scope_label} element")),
1207            found: recovery_found_snippet_from_span(input, recovery_end),
1208            suggestion: Some(
1209                "Remove this annotation or extend the parser to support annotated declarations."
1210                    .to_string(),
1211            ),
1212            category: Some(DiagnosticCategory::UnsupportedGrammarForm),
1213        },
1214        RecoveryClassification::Unexpected => ParseErrorNode {
1215            message: format!("unexpected token in {scope_label}"),
1216            code: generic_code.to_string(),
1217            expected: Some(format!("valid {scope_label} element")),
1218            found: recovery_found_snippet_from_span(input, recovery_end),
1219            suggestion: Some(format!("Fix this {scope_label} member and re-run parsing.")),
1220            category: Some(DiagnosticCategory::ParseError),
1221        },
1222    }
1223}
1224
1225fn parse_error_from_recovery_node(span: &crate::ast::Span, node: &ParseErrorNode) -> ParseError {
1226    let mut err = ParseError::new(node.message.clone())
1227        .with_location(span.offset, span.line, span.column)
1228        .with_length(span.len.max(1))
1229        .with_code(node.code.clone())
1230        .with_category(
1231            node.category
1232                .unwrap_or_else(|| category_from_code(node.code.as_str())),
1233        );
1234    let severity = if node.code == "unsupported_annotation_syntax" {
1235        DiagnosticSeverity::Warning
1236    } else {
1237        DiagnosticSeverity::Error
1238    };
1239    err = err.with_severity(severity);
1240    if let Some(expected) = &node.expected {
1241        err = err.with_expected(expected.clone());
1242    }
1243    if let Some(found) = &node.found {
1244        err = err.with_found(found.clone());
1245    }
1246    if let Some(suggestion) = &node.suggestion {
1247        err = err.with_suggestion(suggestion.clone());
1248    }
1249    err
1250}
1251
1252fn diagnostic_specificity(err: &ParseError) -> u8 {
1253    match err.code.as_deref() {
1254        Some("missing_member_name")
1255        | Some("missing_type_reference")
1256        | Some("invalid_qualified_name_separator")
1257        | Some("invalid_typing_operator")
1258        | Some("missing_expression_after_operator")
1259        | Some("invalid_unit_reference")
1260        | Some("missing_body_or_semicolon")
1261        | Some("missing_semicolon")
1262        | Some("unexpected_closing_brace")
1263        | Some("missing_closing_brace")
1264        | Some("unsupported_annotation_syntax")
1265        | Some("invalid_bare_identifier_in_action_body")
1266        | Some("invalid_bare_identifier_in_state_body")
1267        | Some("recovery_cascade_suppressed")
1268        | Some("unexpected_keyword_in_scope") => 5,
1269        Some("illegal_top_level_definition") => 4,
1270        Some(code) if code.starts_with("recovered_") => 2,
1271        Some("expected_end_of_input") | Some("expected_keyword") => 1,
1272        _ => 3,
1273    }
1274}
1275
1276fn dedup_errors(mut errors: Vec<ParseError>) -> Vec<ParseError> {
1277    errors.sort_by_key(|e| {
1278        (
1279            e.offset.unwrap_or(usize::MAX),
1280            e.line.unwrap_or(u32::MAX),
1281            e.column.unwrap_or(usize::MAX),
1282            std::cmp::Reverse(diagnostic_specificity(e)),
1283        )
1284    });
1285
1286    let mut deduped = Vec::new();
1287    for err in errors {
1288        let duplicate = deduped.iter().any(|existing: &ParseError| {
1289            let same_start = existing.offset == err.offset
1290                && existing.line == err.line
1291                && existing.column == err.column;
1292            let same_found = existing.found == err.found;
1293            let existing_specificity = diagnostic_specificity(existing);
1294            let err_specificity = diagnostic_specificity(&err);
1295            same_start
1296                && (same_found || existing.code == err.code)
1297                && existing_specificity >= err_specificity
1298        });
1299        if !duplicate {
1300            deduped.push(err);
1301        }
1302    }
1303
1304    deduped.sort_by_key(|e| (e.offset.unwrap_or(usize::MAX), e.line.unwrap_or(u32::MAX)));
1305    deduped
1306}
1307
1308fn is_cascade_candidate(err: &ParseError) -> bool {
1309    matches!(err.code.as_deref(), Some("missing_semicolon"))
1310        || err
1311            .code
1312            .as_deref()
1313            .is_some_and(|code| code.starts_with("recovered_"))
1314}
1315
1316fn cascade_family(err: &ParseError) -> Option<&str> {
1317    if matches!(err.code.as_deref(), Some("missing_semicolon")) {
1318        Some("missing_semicolon")
1319    } else if err
1320        .code
1321        .as_deref()
1322        .is_some_and(|code| code.starts_with("recovered_"))
1323    {
1324        Some("recovered")
1325    } else {
1326        None
1327    }
1328}
1329
1330fn make_cascade_summary(run: &[ParseError]) -> Option<ParseError> {
1331    let summary_anchor = run.first()?;
1332    let suppressed = run.len().saturating_sub(3);
1333    let family = cascade_family(summary_anchor).unwrap_or("recovery");
1334    let mut err = ParseError::new(format!(
1335        "suppressed {suppressed} cascading {family} diagnostic{} after earlier recovery errors",
1336        if suppressed == 1 { "" } else { "s" }
1337    ))
1338    .with_location(
1339        summary_anchor.offset?,
1340        summary_anchor.line?,
1341        summary_anchor.column?,
1342    )
1343    .with_length(summary_anchor.length.unwrap_or(1).max(1))
1344    .with_code("recovery_cascade_suppressed")
1345    .with_expected("fix the first syntax error in this body")
1346    .with_suggestion(
1347        "Fix the earliest diagnostic in this body first; later syntax errors may be cascades.",
1348    )
1349    .with_severity(DiagnosticSeverity::Warning)
1350    .with_category(DiagnosticCategory::ParseError);
1351    if let Some(found) = &summary_anchor.found {
1352        err = err.with_found(found.clone());
1353    }
1354    Some(err)
1355}
1356
1357fn suppress_diagnostic_cascades(errors: Vec<ParseError>) -> Vec<ParseError> {
1358    const MAX_UNSUMMARIZED_CASCADE: usize = 3;
1359
1360    let mut output = Vec::new();
1361    let mut run: Vec<ParseError> = Vec::new();
1362
1363    let flush_run = |run: &mut Vec<ParseError>, output: &mut Vec<ParseError>| {
1364        if run.len() <= MAX_UNSUMMARIZED_CASCADE {
1365            output.append(run);
1366        } else {
1367            output.extend(run.drain(..MAX_UNSUMMARIZED_CASCADE));
1368            if let Some(summary) = make_cascade_summary(run) {
1369                output.push(summary);
1370            }
1371            run.clear();
1372        }
1373    };
1374
1375    for err in errors {
1376        let continues_run = run.last().is_some_and(|previous| {
1377            is_cascade_candidate(&err)
1378                && cascade_family(previous) == cascade_family(&err)
1379                && previous.line.zip(err.line).is_some_and(|(a, b)| b <= a + 1)
1380        });
1381
1382        if is_cascade_candidate(&err) && (run.is_empty() || continues_run) {
1383            run.push(err);
1384        } else {
1385            flush_run(&mut run, &mut output);
1386            if is_cascade_candidate(&err) {
1387                run.push(err);
1388            } else {
1389                output.push(err);
1390            }
1391        }
1392    }
1393    flush_run(&mut run, &mut output);
1394    output.sort_by_key(|e| (e.offset.unwrap_or(usize::MAX), e.line.unwrap_or(u32::MAX)));
1395    output
1396}
1397
1398fn root_body_recovery_error(input: Input<'_>, scope: &str) -> ParseError {
1399    let (found, len) = fragment_to_found_snippet(input.fragment());
1400    let mut err = ParseError::new(format!(
1401        "could not parse {scope} body; skipped to next root element"
1402    ))
1403    .with_location(
1404        input.location_offset(),
1405        input.location_line(),
1406        input.get_column(),
1407    )
1408    .with_length(len.max(1))
1409    .with_code("recovered_root_body")
1410    .with_expected(format!("valid {scope} body"))
1411    .with_suggestion(
1412        "Fix the first syntax error in this body; later root-level diagnostics may be cascades.",
1413    )
1414    .with_severity(DiagnosticSeverity::Error)
1415    .with_category(DiagnosticCategory::ParseError);
1416    if !found.is_empty() {
1417        err = err.with_found(found);
1418    }
1419    err
1420}
1421
1422fn root_body_scope(fragment: &[u8]) -> Option<&'static str> {
1423    let fragment = trim_ascii_start(fragment);
1424    if lex::starts_with_keyword(fragment, b"package")
1425        || lex::starts_with_keyword(fragment, b"library")
1426        || lex::starts_with_keyword(fragment, b"standard")
1427    {
1428        Some("package")
1429    } else if lex::starts_with_keyword(fragment, b"namespace") {
1430        Some("namespace")
1431    } else {
1432        None
1433    }
1434}
1435
1436fn collect_requirement_body_errors(body: &RequirementDefBody, errors: &mut Vec<ParseError>) {
1437    if let RequirementDefBody::Brace { elements } = body {
1438        for element in elements {
1439            match &element.value {
1440                RequirementDefBodyElement::Error(n) => {
1441                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1442                }
1443                RequirementDefBodyElement::Frame(n) => {
1444                    collect_requirement_body_errors(&n.value.body, errors)
1445                }
1446                _ => {}
1447            }
1448        }
1449    }
1450}
1451
1452fn collect_action_def_body_errors(body: &ActionDefBody, errors: &mut Vec<ParseError>) {
1453    if let ActionDefBody::Brace { elements } = body {
1454        for element in elements {
1455            if let ActionDefBodyElement::Error(n) = &element.value {
1456                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1457            }
1458        }
1459    }
1460}
1461
1462fn collect_action_usage_body_errors(body: &ActionUsageBody, errors: &mut Vec<ParseError>) {
1463    if let ActionUsageBody::Brace { elements } = body {
1464        for element in elements {
1465            match &element.value {
1466                ActionUsageBodyElement::Error(n) => {
1467                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1468                }
1469                ActionUsageBodyElement::ActionUsage(n) => {
1470                    collect_action_usage_body_errors(&n.value.body, errors)
1471                }
1472                _ => {}
1473            }
1474        }
1475    }
1476}
1477
1478fn collect_state_body_errors(body: &StateDefBody, errors: &mut Vec<ParseError>) {
1479    if let StateDefBody::Brace { elements } = body {
1480        for element in elements {
1481            match &element.value {
1482                StateDefBodyElement::Error(n) => {
1483                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1484                }
1485                StateDefBodyElement::Entry(n) => collect_state_body_errors(&n.value.body, errors),
1486                StateDefBodyElement::RequirementUsage(n) => {
1487                    collect_requirement_body_errors(&n.value.body, errors)
1488                }
1489                StateDefBodyElement::StateUsage(n) => {
1490                    collect_state_body_errors(&n.value.body, errors)
1491                }
1492                _ => {}
1493            }
1494        }
1495    }
1496}
1497
1498fn collect_use_case_body_errors(body: &UseCaseDefBody, errors: &mut Vec<ParseError>) {
1499    if let UseCaseDefBody::Brace { elements } = body {
1500        for element in elements {
1501            if let UseCaseDefBodyElement::Error(n) = &element.value {
1502                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1503            }
1504        }
1505    }
1506}
1507
1508fn collect_constraint_body_errors(body: &ConstraintDefBody, errors: &mut Vec<ParseError>) {
1509    if let ConstraintDefBody::Brace { elements } = body {
1510        for element in elements {
1511            if let ConstraintDefBodyElement::Error(n) = &element.value {
1512                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1513            }
1514        }
1515    }
1516}
1517
1518fn collect_calc_body_errors(body: &CalcDefBody, errors: &mut Vec<ParseError>) {
1519    if let CalcDefBody::Brace { elements } = body {
1520        for element in elements {
1521            if let CalcDefBodyElement::Error(n) = &element.value {
1522                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1523            }
1524        }
1525    }
1526}
1527
1528fn collect_view_def_body_errors(body: &ViewDefBody, errors: &mut Vec<ParseError>) {
1529    if let ViewDefBody::Brace { elements } = body {
1530        for element in elements {
1531            if let ViewDefBodyElement::Error(n) = &element.value {
1532                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1533            }
1534        }
1535    }
1536}
1537
1538fn collect_view_body_errors(body: &ViewBody, errors: &mut Vec<ParseError>) {
1539    if let ViewBody::Brace { elements } = body {
1540        for element in elements {
1541            if let ViewBodyElement::Error(n) = &element.value {
1542                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1543            }
1544        }
1545    }
1546}
1547
1548fn collect_part_def_body_errors(body: &PartDefBody, errors: &mut Vec<ParseError>) {
1549    if let PartDefBody::Brace { elements } = body {
1550        for element in elements {
1551            match &element.value {
1552                PartDefBodyElement::Error(n) => {
1553                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1554                }
1555                PartDefBodyElement::PartUsage(n) => {
1556                    collect_part_usage_body_errors(&n.value.body, errors)
1557                }
1558                PartDefBodyElement::Perform(n) => {
1559                    collect_perform_body_errors(&n.value.body, errors)
1560                }
1561                _ => {}
1562            }
1563        }
1564    }
1565}
1566
1567fn collect_perform_body_errors(body: &crate::ast::PerformBody, _errors: &mut Vec<ParseError>) {
1568    match body {
1569        crate::ast::PerformBody::Semicolon => {}
1570        crate::ast::PerformBody::Brace { .. } => {}
1571    }
1572}
1573
1574fn collect_part_usage_body_errors(body: &PartUsageBody, errors: &mut Vec<ParseError>) {
1575    if let PartUsageBody::Brace { elements } = body {
1576        for element in elements {
1577            match &element.value {
1578                PartUsageBodyElement::Error(n) => {
1579                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1580                }
1581                PartUsageBodyElement::PartUsage(n) => {
1582                    collect_part_usage_body_errors(&n.value.body, errors)
1583                }
1584                PartUsageBodyElement::Perform(n) => {
1585                    collect_perform_body_errors(&n.value.body, errors)
1586                }
1587                PartUsageBodyElement::StateUsage(n) => {
1588                    collect_state_body_errors(&n.value.body, errors)
1589                }
1590                _ => {}
1591            }
1592        }
1593    }
1594}
1595
1596fn collect_package_body_errors(body: &PackageBody, errors: &mut Vec<ParseError>) {
1597    if let PackageBody::Brace { elements } = body {
1598        for element in elements {
1599            match &element.value {
1600                PackageBodyElement::Error(n) => {
1601                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1602                }
1603                PackageBodyElement::Package(n) => {
1604                    collect_package_body_errors(&n.value.body, errors)
1605                }
1606                PackageBodyElement::LibraryPackage(n) => {
1607                    collect_package_body_errors(&n.value.body, errors)
1608                }
1609                PackageBodyElement::PartDef(n) => {
1610                    collect_part_def_body_errors(&n.value.body, errors)
1611                }
1612                PackageBodyElement::PartUsage(n) => {
1613                    collect_part_usage_body_errors(&n.value.body, errors)
1614                }
1615                PackageBodyElement::ActionDef(n) => {
1616                    collect_action_def_body_errors(&n.value.body, errors)
1617                }
1618                PackageBodyElement::ActionUsage(n) => {
1619                    collect_action_usage_body_errors(&n.value.body, errors)
1620                }
1621                PackageBodyElement::RequirementDef(n) => {
1622                    collect_requirement_body_errors(&n.value.body, errors)
1623                }
1624                PackageBodyElement::RequirementUsage(n) => {
1625                    collect_requirement_body_errors(&n.value.body, errors)
1626                }
1627                PackageBodyElement::UseCaseDef(n) => {
1628                    collect_use_case_body_errors(&n.value.body, errors)
1629                }
1630                PackageBodyElement::UseCaseUsage(n) => {
1631                    collect_use_case_body_errors(&n.value.body, errors)
1632                }
1633                PackageBodyElement::ConcernUsage(n) => {
1634                    collect_requirement_body_errors(&n.value.body, errors)
1635                }
1636                PackageBodyElement::StateDef(n) => collect_state_body_errors(&n.value.body, errors),
1637                PackageBodyElement::StateUsage(n) => {
1638                    collect_state_body_errors(&n.value.body, errors)
1639                }
1640                PackageBodyElement::ConstraintDef(n) => {
1641                    collect_constraint_body_errors(&n.value.body, errors)
1642                }
1643                PackageBodyElement::CalcDef(n) => collect_calc_body_errors(&n.value.body, errors),
1644                PackageBodyElement::ViewDef(n) => {
1645                    collect_view_def_body_errors(&n.value.body, errors)
1646                }
1647                PackageBodyElement::ViewUsage(n) => collect_view_body_errors(&n.value.body, errors),
1648                _ => {}
1649            }
1650        }
1651    }
1652}
1653
1654fn collect_recovery_errors(root: &RootNamespace) -> Vec<ParseError> {
1655    let mut errors = Vec::new();
1656    for element in &root.elements {
1657        match &element.value {
1658            crate::ast::RootElement::Package(n) => {
1659                collect_package_body_errors(&n.value.body, &mut errors)
1660            }
1661            crate::ast::RootElement::LibraryPackage(n) => {
1662                collect_package_body_errors(&n.value.body, &mut errors)
1663            }
1664            crate::ast::RootElement::Namespace(n) => {
1665                collect_package_body_errors(&n.value.body, &mut errors)
1666            }
1667            crate::ast::RootElement::Import(_) => {}
1668        }
1669    }
1670    errors
1671}
1672
1673/// Parse full input; must consume entire input. Strips UTF-8 BOM if present.
1674#[allow(clippy::result_large_err)]
1675pub fn parse_root(input: &str) -> Result<RootNamespace, ParseError> {
1676    let bytes = input
1677        .strip_prefix('\u{FEFF}')
1678        .map(str::as_bytes)
1679        .unwrap_or_else(|| input.as_bytes());
1680    let located = LocatedSpan::new(bytes);
1681    match package::root_namespace(located) {
1682        Ok((rest, root)) => {
1683            if !rest.fragment().is_empty() && has_unclosed_brace(bytes) {
1684                return Err(missing_closing_brace_error_at_eof(bytes));
1685            }
1686            if rest.fragment().is_empty() {
1687                log::debug!("parse_root: success, {} top-level elements", root.elements.len());
1688                Ok(root)
1689            } else if trim_ascii_start(rest.fragment()).starts_with(b"}") {
1690                Err(unexpected_closing_brace_parse_error(rest))
1691            } else {
1692                let offset = located.location_offset() + located.fragment().len() - rest.fragment().len();
1693                let unconsumed = rest.fragment();
1694                let first_80 = unconsumed.get(..80.min(unconsumed.len())).unwrap_or(unconsumed);
1695                log::debug!(
1696                    "parse_root: expected end of input; parsed {} elements; unconsumed len={}, offset={}, first 80 bytes: {:?}",
1697                    root.elements.len(),
1698                    unconsumed.len(),
1699                    offset,
1700                    first_80,
1701                );
1702                log::debug!(
1703                    "parse_root: unconsumed as str: {:?}",
1704                    String::from_utf8_lossy(first_80),
1705                );
1706                let (found_snippet, found_len) = fragment_to_found_snippet(rest.fragment());
1707                let mut pe = ParseError::new("expected end of input")
1708                    .with_location(offset, rest.location_line(), rest.get_column())
1709                    .with_length(found_len.max(1))
1710                    .with_code("expected_end_of_input")
1711                    .with_category(DiagnosticCategory::ParseError);
1712                if !found_snippet.is_empty() {
1713                    pe = pe.with_found(found_snippet);
1714                }
1715                if root.elements.is_empty() && is_illegal_top_level_definition(rest.fragment()) {
1716                    pe = pe
1717                        .with_code("illegal_top_level_definition")
1718                        .with_expected("'package', 'namespace', or 'import'")
1719                        .with_suggestion(
1720                            "Wrap this declaration in `package ... { ... }` or `namespace ... { ... }`.",
1721                        );
1722                    pe.message = "illegal top-level definition".to_string();
1723                }
1724                Err(pe)
1725            }
1726        }
1727        Err(nom::Err::Error(e)) => Err(missing_closing_brace_error(bytes, e.input).unwrap_or_else(|| {
1728            nom_err_to_parse_error(
1729                &e,
1730                None,
1731                Some("'package', 'namespace', or 'import' at top level; or valid element in package body"),
1732            )
1733        })),
1734        Err(nom::Err::Failure(e)) => Err(missing_closing_brace_error(bytes, e.input).unwrap_or_else(|| {
1735            nom_err_to_parse_error(
1736                &e,
1737                None,
1738                Some("'package', 'namespace', or 'import' at top level; or valid element in package body"),
1739            )
1740        })),
1741        Err(nom::Err::Incomplete(_)) => Err(
1742            ParseError::new("unexpected end of input")
1743                .with_code("unexpected_eof")
1744                .with_category(DiagnosticCategory::ParseError),
1745        ),
1746    }
1747}
1748
1749const MAX_RECOVERY_ERRORS: usize = 100;
1750
1751/// Parse input with error recovery: collects multiple diagnostics and returns a partial AST when errors occur.
1752/// Use this for language servers so the user sees all parse errors and features (e.g. hover) can use the partial AST.
1753pub fn parse_with_diagnostics(input: &str) -> ParseResult {
1754    let bytes = input
1755        .strip_prefix('\u{FEFF}')
1756        .map(str::as_bytes)
1757        .unwrap_or_else(|| input.as_bytes());
1758    let located = LocatedSpan::new(bytes);
1759
1760    let mut elements = Vec::new();
1761    let mut errors = Vec::new();
1762
1763    let (mut input, _) = match lex::ws_and_comments(located) {
1764        Ok(x) => x,
1765        Err(_) => {
1766            return ParseResult {
1767                root: RootNamespace { elements: vec![] },
1768                errors: vec![ParseError::new("invalid input")
1769                    .with_code("invalid_input")
1770                    .with_category(DiagnosticCategory::ParseError)],
1771            };
1772        }
1773    };
1774
1775    while errors.len() < MAX_RECOVERY_ERRORS {
1776        // Skip leading ws/comments; if nothing left, we're done (avoids parsing "" as root_element).
1777        let (rest, _) = lex::ws_and_comments(input).unwrap_or((input, ()));
1778        input = rest;
1779        if input.fragment().is_empty() {
1780            break;
1781        }
1782        match package::root_element(input) {
1783            Ok((rest, elem)) => {
1784                elements.push(elem);
1785                input = rest;
1786            }
1787            Err(nom::Err::Error(e)) | Err(nom::Err::Failure(e)) => {
1788                let (trimmed, _) = lex::ws_and_comments(input).unwrap_or((input, ()));
1789                if trim_ascii_start(trimmed.fragment()).starts_with(b"}") {
1790                    errors.push(unexpected_closing_brace_parse_error(trimmed));
1791                    let skip_result = lex::skip_to_next_sync_point(trimmed);
1792                    match skip_result {
1793                        Ok((rest, _)) => input = rest,
1794                        Err(_) => break,
1795                    }
1796                    continue;
1797                }
1798                if errors.is_empty()
1799                    && has_unclosed_brace(bytes)
1800                    && (lex::starts_with_keyword(trimmed.fragment(), b"package")
1801                        || lex::starts_with_keyword(trimmed.fragment(), b"namespace")
1802                        || lex::starts_with_keyword(trimmed.fragment(), b"library")
1803                        || lex::starts_with_keyword(trimmed.fragment(), b"standard"))
1804                {
1805                    errors.push(missing_closing_brace_error_at_eof(bytes));
1806                    break;
1807                }
1808                if let Some(scope) = root_body_scope(input.fragment()) {
1809                    let (error_input, _) = lex::ws_and_comments(e.input).unwrap_or((e.input, ()));
1810                    if error_input.fragment().starts_with(b"{") {
1811                        errors.push(root_body_recovery_error(error_input, scope));
1812                        match lex::skip_statement_or_block(error_input) {
1813                            Ok((rest, _))
1814                                if rest.location_offset() > error_input.location_offset() =>
1815                            {
1816                                input = rest;
1817                                continue;
1818                            }
1819                            _ => {}
1820                        }
1821                    }
1822                }
1823                let pe = missing_closing_brace_error(bytes, e.input).unwrap_or_else(|| {
1824                    nom_err_to_parse_error(&e, None, Some("'package', 'namespace', or 'import'"))
1825                });
1826                errors.push(pe);
1827                let skip_result = lex::skip_to_next_sync_point(e.input);
1828                match skip_result {
1829                    Ok((rest, _)) => input = rest,
1830                    Err(_) => break,
1831                }
1832            }
1833            Err(nom::Err::Incomplete(_)) => {
1834                errors.push(
1835                    ParseError::new("unexpected end of input")
1836                        .with_location(
1837                            input.location_offset(),
1838                            input.location_line(),
1839                            input.get_column(),
1840                        )
1841                        .with_length(1)
1842                        .with_code("unexpected_eof")
1843                        .with_category(DiagnosticCategory::ParseError),
1844                );
1845                break;
1846            }
1847        }
1848    }
1849
1850    let (input, _) = lex::ws_and_comments(input).unwrap_or((input, ()));
1851
1852    if input.fragment().is_empty()
1853        && has_unclosed_brace(bytes)
1854        && !errors
1855            .iter()
1856            .any(|e| e.code.as_deref() == Some("missing_closing_brace"))
1857    {
1858        errors.push(missing_closing_brace_error_at_eof(bytes));
1859    }
1860
1861    if !input.fragment().is_empty()
1862        && !errors
1863            .iter()
1864            .any(|e| e.code.as_deref() == Some("missing_closing_brace"))
1865    {
1866        if trim_ascii_start(input.fragment()).starts_with(b"}") {
1867            errors.push(unexpected_closing_brace_parse_error(input));
1868        } else {
1869            let (found_snippet, found_len) = fragment_to_found_snippet(input.fragment());
1870            let mut pe = ParseError::new("expected end of input")
1871                .with_location(
1872                    input.location_offset(),
1873                    input.location_line(),
1874                    input.get_column(),
1875                )
1876                .with_length(found_len.max(1))
1877                .with_code("expected_end_of_input")
1878                .with_severity(DiagnosticSeverity::Error)
1879                .with_category(DiagnosticCategory::ParseError);
1880            if !found_snippet.is_empty() {
1881                pe = pe.with_found(found_snippet);
1882            }
1883            errors.push(pe);
1884        }
1885    }
1886
1887    errors.extend(collect_recovery_errors(&RootNamespace {
1888        elements: elements.clone(),
1889    }));
1890    errors = dedup_errors(errors);
1891    errors = suppress_diagnostic_cascades(errors);
1892
1893    ParseResult {
1894        root: RootNamespace { elements },
1895        errors,
1896    }
1897}