Skip to main content

sysml_v2_parser/parser/
mod.rs

1//! Nom-based parser for SysML v2 textual notation.
2//!
3//! Organized into modules:
4//! - [lex]: whitespace, comments, names, qualified names, skip helpers
5//! - [attribute]: attribute definition and usage
6//! - [import]: import and relationship body
7//! - [part]: part definition and part usage
8//! - [package]: package and root namespace
9
10mod action;
11mod alias;
12mod allocation;
13mod attribute;
14mod case;
15mod connection;
16mod constraint;
17mod dependency;
18mod enumeration;
19mod expr;
20mod flow;
21mod import;
22mod individual;
23mod interface;
24mod item;
25mod lex;
26mod metadata;
27mod metadata_annotation;
28mod occurrence;
29mod package;
30mod part;
31mod port;
32mod requirement;
33mod span;
34mod state;
35mod usecase;
36mod view;
37
38pub(crate) use span::{node_from_to, span_from_to, with_span, Input};
39
40use crate::ast::{
41    ActionDefBody, ActionDefBodyElement, ActionUsageBody, ActionUsageBodyElement, CalcDefBody,
42    CalcDefBodyElement, ConstraintDefBody, ConstraintDefBodyElement, PackageBody,
43    PackageBodyElement, ParseErrorNode, PartDefBody, PartDefBodyElement, PartUsageBody,
44    PartUsageBodyElement, RequirementDefBody, RequirementDefBodyElement, RootNamespace,
45    StateDefBody, StateDefBodyElement, UseCaseDefBody, UseCaseDefBodyElement, ViewBody,
46    ViewBodyElement, ViewDefBody, ViewDefBodyElement,
47};
48use crate::error::{DiagnosticCategory, DiagnosticSeverity, ParseError};
49use nom::error::Error;
50use nom_locate::LocatedSpan;
51
52/// Result of parsing with error recovery: a (possibly partial) AST and zero or more diagnostics.
53#[derive(Debug, Clone)]
54pub struct ParseResult {
55    /// Root namespace; contains all successfully parsed top-level elements (partial when errors occurred).
56    pub root: RootNamespace,
57    /// All parse errors encountered (multiple when recovery is used).
58    pub errors: Vec<ParseError>,
59}
60
61impl ParseResult {
62    /// True if the document parsed fully with no errors.
63    pub fn is_ok(&self) -> bool {
64        self.errors.is_empty()
65    }
66}
67
68const FOUND_SNIPPET_MAX_LEN: usize = 40;
69const ILLEGAL_TOP_LEVEL_STARTERS: &[&[u8]] = &[
70    b"action",
71    b"actor",
72    b"alias",
73    b"allocate",
74    b"allocation",
75    b"attribute",
76    b"bind",
77    b"calc",
78    b"case",
79    b"concern",
80    b"connection",
81    b"constraint",
82    b"dependency",
83    b"enum",
84    b"flow",
85    b"interface",
86    b"item",
87    b"metadata",
88    b"occurrence",
89    b"part",
90    b"perform",
91    b"port",
92    b"ref",
93    b"require",
94    b"requirement",
95    b"satisfy",
96    b"state",
97    b"use",
98    b"verification",
99    b"view",
100    b"viewpoint",
101];
102
103/// Take a short snippet from the input at the error position for "found" display.
104/// Uses first line or first FOUND_SNIPPET_MAX_LEN bytes, UTF-8 with replacement char.
105fn fragment_to_found_snippet(fragment: &[u8]) -> (String, usize) {
106    let take = fragment
107        .iter()
108        .position(|&b| b == b'\n' || b == b'\r')
109        .map(|p| p.min(FOUND_SNIPPET_MAX_LEN))
110        .unwrap_or_else(|| fragment.len().min(FOUND_SNIPPET_MAX_LEN));
111    let slice = fragment.get(..take).unwrap_or(fragment);
112    let s = String::from_utf8_lossy(slice)
113        .replace('\n', "\\n")
114        .replace('\r', "\\r");
115    let len = slice.len();
116    (s.trim_end().to_string(), len)
117}
118
119pub(crate) fn recovery_found_snippet(input: Input<'_>) -> Option<String> {
120    let frag = input.fragment();
121    let take = frag
122        .iter()
123        .position(|&b| b == b'\n' || b == b'\r')
124        .unwrap_or(frag.len())
125        .min(60);
126    let snippet = String::from_utf8_lossy(&frag[..take]).trim().to_string();
127    if snippet.is_empty() {
128        None
129    } else {
130        Some(snippet)
131    }
132}
133
134fn recovery_found_snippet_from_span(input: Input<'_>, recovery_end: Input<'_>) -> Option<String> {
135    let consumed_len = recovery_end
136        .location_offset()
137        .saturating_sub(input.location_offset())
138        .min(input.fragment().len());
139    if consumed_len == 0 {
140        return recovery_found_snippet(input);
141    }
142    let frag = &input.fragment()[..consumed_len];
143    let take = frag
144        .iter()
145        .position(|&b| b == b'\n' || b == b'\r')
146        .unwrap_or(frag.len())
147        .min(60);
148    let snippet = String::from_utf8_lossy(&frag[..take]).trim().to_string();
149    if snippet.is_empty() {
150        recovery_found_snippet(input)
151    } else {
152        Some(snippet)
153    }
154}
155
156/// Map nom error kind to a human-readable message for language server diagnostics.
157fn nom_error_kind_to_message(code: &nom::error::ErrorKind) -> &'static str {
158    use nom::error::ErrorKind;
159    match code {
160        ErrorKind::Tag => "expected keyword or token",
161        ErrorKind::Digit => "expected number",
162        ErrorKind::Alpha => "expected identifier",
163        ErrorKind::AlphaNumeric => "expected identifier",
164        ErrorKind::Space => "expected whitespace",
165        ErrorKind::MultiSpace => "expected whitespace",
166        ErrorKind::Eof => "unexpected end of input",
167        ErrorKind::TakeUntil => "expected terminator",
168        ErrorKind::TakeWhile1 => "expected token",
169        ErrorKind::Alt => {
170            "expected package, import, part, port, interface, alias, attribute, or action"
171        }
172        ErrorKind::Many0 | ErrorKind::Many1 => "expected list of elements",
173        _ => "parse error",
174    }
175}
176
177/// Map nom error kind to a specific code for LSP/quick fixes.
178fn nom_error_kind_to_code(code: &nom::error::ErrorKind) -> &'static str {
179    use nom::error::ErrorKind;
180    match code {
181        ErrorKind::Tag => "expected_keyword",
182        ErrorKind::Digit => "expected_number",
183        ErrorKind::Alpha | ErrorKind::AlphaNumeric => "expected_identifier",
184        ErrorKind::Space | ErrorKind::MultiSpace => "expected_whitespace",
185        ErrorKind::Eof => "unexpected_eof",
186        ErrorKind::TakeUntil => "expected_terminator",
187        ErrorKind::TakeWhile1 => "expected_token",
188        ErrorKind::Alt => "expected_alt",
189        ErrorKind::Many0 | ErrorKind::Many1 => "expected_list",
190        _ => "parse_error",
191    }
192}
193
194fn nom_err_to_parse_error(
195    e: &Error<Input<'_>>,
196    length_override: Option<usize>,
197    expected_context: Option<&'static str>,
198) -> ParseError {
199    let offset = e.input.location_offset();
200    let line = e.input.location_line();
201    let column = e.input.get_column();
202    let fragment = e.input.fragment();
203    let (found_snippet, found_len) = fragment_to_found_snippet(fragment);
204    let message = nom_error_kind_to_message(&e.code).to_string();
205    let span_len = length_override.unwrap_or(found_len).max(1);
206    if trim_ascii_start(fragment).starts_with(b"}") {
207        return unexpected_closing_brace_parse_error(e.input);
208    }
209    let mut pe = ParseError::new(message)
210        .with_location(offset, line, column)
211        .with_length(span_len)
212        .with_code(nom_error_kind_to_code(&e.code))
213        .with_severity(DiagnosticSeverity::Error)
214        .with_category(DiagnosticCategory::ParseError);
215    if !found_snippet.is_empty() {
216        pe = pe.with_found(found_snippet);
217    }
218    if let Some(ctx) = expected_context {
219        pe = pe.with_expected(ctx);
220    }
221    let at_root = expected_context.is_some_and(|ctx| {
222        ctx.contains("'package', 'namespace', or 'import'") || ctx.contains("top level")
223    });
224    if at_root && is_illegal_top_level_definition(fragment) {
225        pe.message = "illegal top-level definition".to_string();
226        pe.code = Some("illegal_top_level_definition".to_string());
227        pe.expected = Some("'package', 'namespace', or 'import'".to_string());
228        pe.suggestion = Some(
229            "Wrap this declaration in `package ... { ... }` or `namespace ... { ... }`."
230                .to_string(),
231        );
232    }
233    pe
234}
235
236fn is_illegal_top_level_definition(fragment: &[u8]) -> bool {
237    let trimmed = trim_ascii_start(fragment);
238    !trimmed.starts_with(b"}")
239        && !trimmed.starts_with(b"//")
240        && !trimmed.starts_with(b"/*")
241        && lex::starts_with_any_keyword(trimmed, ILLEGAL_TOP_LEVEL_STARTERS)
242}
243
244fn trim_ascii_start(mut fragment: &[u8]) -> &[u8] {
245    while let Some(first) = fragment.first() {
246        if first.is_ascii_whitespace() {
247            fragment = &fragment[1..];
248            continue;
249        }
250        break;
251    }
252    fragment
253}
254
255fn starts_with_missing_name_after_keyword(
256    fragment: &[u8],
257    keyword: &[u8],
258    trailing_keywords: &[&[u8]],
259) -> bool {
260    let mut fragment = trim_ascii_start(fragment);
261    if !lex::starts_with_keyword(fragment, keyword) {
262        return false;
263    }
264    fragment = &fragment[keyword.len()..];
265    while let Some(first) = fragment.first() {
266        if first.is_ascii_whitespace() {
267            fragment = &fragment[1..];
268            continue;
269        }
270        break;
271    }
272    for trailing in trailing_keywords {
273        if lex::starts_with_keyword(fragment, trailing) {
274            fragment = &fragment[trailing.len()..];
275            while let Some(first) = fragment.first() {
276                if first.is_ascii_whitespace() {
277                    fragment = &fragment[1..];
278                    continue;
279                }
280                break;
281            }
282        }
283    }
284    fragment.starts_with(b":")
285}
286
287fn starts_with_missing_type_after_keyword(
288    fragment: &[u8],
289    keyword: &[u8],
290    trailing_keywords: &[&[u8]],
291) -> bool {
292    let mut fragment = trim_ascii_start(fragment);
293    if !lex::starts_with_keyword(fragment, keyword) {
294        return false;
295    }
296    fragment = &fragment[keyword.len()..];
297    while let Some(first) = fragment.first() {
298        if first.is_ascii_whitespace() {
299            fragment = &fragment[1..];
300            continue;
301        }
302        break;
303    }
304    for trailing in trailing_keywords {
305        if lex::starts_with_keyword(fragment, trailing) {
306            fragment = &fragment[trailing.len()..];
307            while let Some(first) = fragment.first() {
308                if first.is_ascii_whitespace() {
309                    fragment = &fragment[1..];
310                    continue;
311                }
312                break;
313            }
314        }
315    }
316
317    let mut name_len = 0usize;
318    while name_len < fragment.len()
319        && (fragment[name_len].is_ascii_alphanumeric() || fragment[name_len] == b'_')
320    {
321        name_len += 1;
322    }
323    if name_len == 0 {
324        return false;
325    }
326    fragment = &fragment[name_len..];
327    while let Some(first) = fragment.first() {
328        if first.is_ascii_whitespace() {
329            fragment = &fragment[1..];
330            continue;
331        }
332        break;
333    }
334    if !fragment.starts_with(b":") {
335        return false;
336    }
337    fragment = &fragment[1..];
338    while let Some(first) = fragment.first() {
339        if first.is_ascii_whitespace() {
340            fragment = &fragment[1..];
341            continue;
342        }
343        break;
344    }
345
346    fragment.is_empty()
347        || fragment.starts_with(b";")
348        || fragment.starts_with(b"{")
349        || fragment.starts_with(b"}")
350        || lex::starts_with_keyword(fragment, b"then")
351        || lex::starts_with_keyword(fragment, b"if")
352        || lex::starts_with_keyword(fragment, b"do")
353}
354
355fn missing_name_diagnostic(fragment: &[u8]) -> Option<(&'static str, String, String, String)> {
356    #[allow(clippy::type_complexity)]
357    let cases: &[(&[u8], &[&[u8]], &str, &str)] = &[
358        (
359            b"subject",
360            &[],
361            "subject name",
362            "Use `subject laptop: Laptop;`.",
363        ),
364        (b"actor", &[], "actor name", "Use `actor user: User;`."),
365        (b"state", &[], "state name", "Use `state ready: Mode;`."),
366        (b"part", &[], "part name", "Use `part wheel: Wheel;`."),
367        (b"ref", &[], "reference name", "Use `ref sensor: Sensor;`."),
368        (b"port", &[], "port name", "Use `port power: PowerPort;`."),
369        (
370            b"attribute",
371            &[],
372            "attribute name",
373            "Use `attribute mass: MassValue;`.",
374        ),
375        (b"in", &[], "input name", "Use `in speed: Real;`."),
376        (b"out", &[], "output name", "Use `out result: Real;`."),
377        (
378            b"perform",
379            &[b"action"],
380            "action name",
381            "Use `perform action run: Runner;`.",
382        ),
383        (b"return", &[], "return name", "Use `return result: Real;`."),
384    ];
385
386    for (keyword, trailing, missing_what, suggestion) in cases {
387        if starts_with_missing_name_after_keyword(fragment, keyword, trailing) {
388            return Some((
389                "missing_member_name",
390                format!("expected {missing_what} before ':'"),
391                format!("{missing_what} before ':'"),
392                suggestion.to_string(),
393            ));
394        }
395    }
396    None
397}
398
399fn missing_type_diagnostic(fragment: &[u8]) -> Option<(&'static str, String, String, String)> {
400    #[allow(clippy::type_complexity)]
401    let cases: &[(&[u8], &[&[u8]], &str)] = &[
402        (b"subject", &[], "subject type"),
403        (b"actor", &[], "actor type"),
404        (b"state", &[], "state type"),
405        (b"part", &[], "part type"),
406        (b"ref", &[], "reference type"),
407        (b"port", &[], "port type"),
408        (b"attribute", &[], "attribute type"),
409        (b"in", &[], "input type"),
410        (b"out", &[], "output type"),
411        (b"perform", &[b"action"], "action type"),
412        (b"return", &[], "return type"),
413    ];
414
415    for &(keyword, trailing, missing_what) in cases {
416        if starts_with_missing_type_after_keyword(fragment, keyword, trailing) {
417            let keyword_label = String::from_utf8_lossy(keyword);
418            let sample_name = if keyword == &b"subject"[..] {
419                "laptop"
420            } else if keyword == &b"actor"[..] {
421                "user"
422            } else if keyword == &b"state"[..] {
423                "ready"
424            } else if keyword == &b"part"[..] {
425                "wheel"
426            } else if keyword == &b"ref"[..] {
427                "sensor"
428            } else if keyword == &b"port"[..] {
429                "power"
430            } else if keyword == &b"attribute"[..] {
431                "mass"
432            } else if keyword == &b"in"[..] {
433                "speed"
434            } else if keyword == &b"out"[..] {
435                "result"
436            } else if keyword == &b"perform"[..] {
437                "run"
438            } else if keyword == &b"return"[..] {
439                "result"
440            } else {
441                "member"
442            };
443            let sample_type = if keyword == &b"subject"[..] {
444                "Laptop"
445            } else if keyword == &b"actor"[..] {
446                "User"
447            } else if keyword == &b"state"[..] {
448                "Mode"
449            } else if keyword == &b"part"[..] {
450                "Wheel"
451            } else if keyword == &b"ref"[..] {
452                "Sensor"
453            } else if keyword == &b"port"[..] {
454                "PowerPort"
455            } else if keyword == &b"attribute"[..] {
456                "MassValue"
457            } else if keyword == &b"in"[..] || keyword == &b"out"[..] {
458                "Real"
459            } else if keyword == &b"perform"[..] {
460                "Runner"
461            } else if keyword == &b"return"[..] {
462                "Real"
463            } else {
464                "Type"
465            };
466            let suggestion = if keyword == &b"perform"[..] {
467                format!("Use `perform action {sample_name}: {sample_type};`.")
468            } else if keyword == &b"return"[..] {
469                format!("Use `return {sample_name}: {sample_type};`.")
470            } else {
471                format!("Use `{keyword_label} {sample_name}: {sample_type};`.")
472            };
473            return Some((
474                "missing_type_reference",
475                format!("expected {missing_what} after ':'"),
476                format!("{missing_what} after ':'"),
477                suggestion,
478            ));
479        }
480    }
481    None
482}
483
484fn invalid_expose_separator_diagnostic(
485    fragment: &[u8],
486) -> Option<(&'static str, String, String, String)> {
487    let mut fragment = trim_ascii_start(fragment);
488    if !lex::starts_with_keyword(fragment, b"expose") {
489        return None;
490    }
491    fragment = &fragment[b"expose".len()..];
492    while let Some(first) = fragment.first() {
493        if first.is_ascii_whitespace() {
494            fragment = &fragment[1..];
495            continue;
496        }
497        break;
498    }
499    if fragment.is_empty() {
500        return None;
501    }
502
503    let mut saw_dot = false;
504    let mut in_quoted_name = false;
505    for &b in fragment {
506        if b == b'\'' {
507            in_quoted_name = !in_quoted_name;
508            continue;
509        }
510        if in_quoted_name {
511            continue;
512        }
513        if matches!(b, b';' | b'[' | b'{' | b'}' | b'\n' | b'\r') {
514            break;
515        }
516        if b == b'.' {
517            saw_dot = true;
518            break;
519        }
520    }
521    if !saw_dot {
522        return None;
523    }
524
525    Some((
526        "invalid_qualified_name_separator",
527        "invalid qualified name in expose target: use '::' instead of '.'".to_string(),
528        "qualified name segments separated by '::'".to_string(),
529        "Replace '.' with '::' in the expose target (example: `expose A::B;`).".to_string(),
530    ))
531}
532
533fn missing_semicolon_or_body_diagnostic(
534    fragment: &[u8],
535) -> Option<(&'static str, String, String, String)> {
536    let fragment = trim_ascii_start(fragment);
537    let cases: &[(&[u8], &str, &str)] = &[
538        (
539            b"action def",
540            "action definition",
541            "Use `action def Run;` or `action def Run { ... }`.",
542        ),
543        (
544            b"part def",
545            "part definition",
546            "Use `part def Wheel;` or `part def Wheel { ... }`.",
547        ),
548        (
549            b"requirement def",
550            "requirement definition",
551            "Use `requirement def R;` or `requirement def R { ... }`.",
552        ),
553        (
554            b"state def",
555            "state definition",
556            "Use `state def Ready;` or `state def Ready { ... }`.",
557        ),
558        (
559            b"view",
560            "view declaration",
561            "Use `view structure: GeneralView;` or `view structure: GeneralView { ... }`.",
562        ),
563        (
564            b"rendering def",
565            "rendering definition",
566            "Use `rendering def Diagram;` or `rendering def Diagram { ... }`.",
567        ),
568    ];
569
570    for (prefix, label, suggestion) in cases {
571        if fragment.starts_with(prefix) {
572            return Some((
573                "missing_body_or_semicolon",
574                format!("expected ';' or '{{' after {label} header"),
575                "';' or '{' after declaration header".to_string(),
576                suggestion.to_string(),
577            ));
578        }
579    }
580    None
581}
582
583fn invalid_typing_operator_diagnostic(
584    fragment: &[u8],
585) -> Option<(&'static str, String, String, String)> {
586    let fragment = trim_ascii_start(fragment);
587    let cases: &[(&[u8], &str, &str)] = &[
588        (
589            b"part def",
590            "part definition specialization",
591            "Use `part def Vehicle :> BaseVehicle;` when specializing a definition.",
592        ),
593        (
594            b"port def",
595            "port definition specialization",
596            "Use `port def PowerPort :> BasePort;` when specializing a definition.",
597        ),
598    ];
599
600    for (prefix, label, suggestion) in cases {
601        if fragment.starts_with(prefix) && fragment.windows(3).any(|w| w == b": ") {
602            return Some((
603                "invalid_typing_operator",
604                format!("invalid typing operator in {label}: use ':>' instead of ':'"),
605                "':>' specialization operator".to_string(),
606                suggestion.to_string(),
607            ));
608        }
609    }
610
611    if fragment.starts_with(b"part def")
612        && fragment.contains(&b':')
613        && !fragment.windows(2).any(|w| w == b":>")
614    {
615        return Some((
616            "invalid_typing_operator",
617            "invalid typing operator in part definition: use ':>' instead of ':'".to_string(),
618            "':>' specialization operator".to_string(),
619            "Use `part def Vehicle :> BaseVehicle;` when specializing a definition.".to_string(),
620        ));
621    }
622
623    None
624}
625
626fn missing_expression_after_operator_diagnostic(
627    fragment: &[u8],
628) -> Option<(&'static str, String, String, String)> {
629    let fragment = trim_ascii_start(fragment);
630    let cases: &[(&[u8], &str, &str)] = &[
631        (
632            b"bind",
633            "binding expression after '='",
634            "Use `bind x = y;`.",
635        ),
636        (
637            b"assign",
638            "assignment expression after ':='",
639            "Use `assign x := y;`.",
640        ),
641        (
642            b"first",
643            "target after 'then'",
644            "Use `first start then finish;`.",
645        ),
646        (
647            b"flow",
648            "target after 'to'",
649            "Use `flow source to target;`.",
650        ),
651        (
652            b"satisfy",
653            "target after 'by'",
654            "Use `satisfy Req by implementation;`.",
655        ),
656    ];
657
658    for (keyword, expected, suggestion) in cases {
659        if !lex::starts_with_keyword(fragment, keyword) {
660            continue;
661        }
662        let text = String::from_utf8_lossy(fragment);
663        if text.contains("= ;") || text.trim_end().ends_with('=') {
664            return Some((
665                "missing_expression_after_operator",
666                "expected expression after '='".to_string(),
667                expected.to_string(),
668                suggestion.to_string(),
669            ));
670        }
671        if text.contains(":= ;") || text.trim_end().ends_with(":=") {
672            return Some((
673                "missing_expression_after_operator",
674                "expected expression after ':='".to_string(),
675                expected.to_string(),
676                suggestion.to_string(),
677            ));
678        }
679        if text.contains(" then ;") || text.trim_end().ends_with(" then") {
680            return Some((
681                "missing_expression_after_operator",
682                "expected target after 'then'".to_string(),
683                expected.to_string(),
684                suggestion.to_string(),
685            ));
686        }
687        if text.contains(" to ;") || text.trim_end().ends_with(" to") {
688            return Some((
689                "missing_expression_after_operator",
690                "expected target after 'to'".to_string(),
691                expected.to_string(),
692                suggestion.to_string(),
693            ));
694        }
695        if text.contains(" by ;") || text.trim_end().ends_with(" by") {
696            return Some((
697                "missing_expression_after_operator",
698                "expected target after 'by'".to_string(),
699                expected.to_string(),
700                suggestion.to_string(),
701            ));
702        }
703    }
704    None
705}
706
707fn invalid_unit_reference_diagnostic(
708    fragment: &[u8],
709) -> Option<(&'static str, String, String, String)> {
710    let fragment = trim_ascii_start(fragment);
711    let text = String::from_utf8_lossy(fragment);
712    if !(text.contains('[') && text.contains(']')) {
713        return None;
714    }
715
716    if text.contains("[]") || text.contains("[ ]") {
717        return Some((
718            "invalid_unit_reference",
719            "expected unit name inside '[ ]'".to_string(),
720            "unit name inside '[ ]'".to_string(),
721            "Use a concrete unit such as `1750 [kg]`.".to_string(),
722        ));
723    }
724
725    if text.contains("[;")
726        || text.contains("[ ;")
727        || text.contains("[)")
728        || text.contains("[ ]")
729        || text.contains("[,")
730    {
731        return Some((
732            "invalid_unit_reference",
733            "invalid unit expression inside '[ ]'".to_string(),
734            "unit name inside '[ ]'".to_string(),
735            "Use a unit symbol or qualified unit name (example: `[kg]` or `[SI::kg]`).".to_string(),
736        ));
737    }
738
739    None
740}
741
742fn unexpected_keyword_in_scope_diagnostic(
743    fragment: &[u8],
744    starters: &[&[u8]],
745    scope_label: &str,
746) -> Option<(&'static str, String, String, String)> {
747    let fragment = trim_ascii_start(fragment);
748    if fragment.is_empty() || fragment.starts_with(b"#") || fragment.starts_with(b"@") {
749        return None;
750    }
751    let keyword_end = fragment
752        .iter()
753        .position(|b| !b.is_ascii_alphanumeric() && *b != b'_')
754        .unwrap_or(fragment.len());
755    if keyword_end == 0 {
756        return None;
757    }
758    let keyword = &fragment[..keyword_end];
759    if lex::starts_with_any_keyword(keyword, starters) {
760        return None;
761    }
762    let keyword_text = String::from_utf8_lossy(keyword);
763    Some((
764        "unexpected_keyword_in_scope",
765        format!("unexpected keyword `{keyword_text}` in {scope_label}"),
766        format!("valid {scope_label} element"),
767        format!("Replace `{keyword_text}` with a valid {scope_label} member or remove it."),
768    ))
769}
770
771fn invalid_bare_identifier_in_body_diagnostic(
772    fragment: &[u8],
773    scope_label: &str,
774) -> Option<(&'static str, String, String, String)> {
775    let is_action = scope_label.contains("action body");
776    let is_state = scope_label.contains("state body");
777    if !is_action && !is_state {
778        return None;
779    }
780
781    let fragment = trim_ascii_start(fragment);
782    let ident_end = fragment
783        .iter()
784        .position(|b| !b.is_ascii_alphanumeric() && *b != b'_')
785        .unwrap_or(fragment.len());
786    if ident_end == 0 || !fragment[0].is_ascii_alphabetic() {
787        return None;
788    }
789
790    let ident = &fragment[..ident_end];
791    let rest = trim_ascii_start(&fragment[ident_end..]);
792    if !(rest.starts_with(b";")
793        || rest.starts_with(b"}")
794        || rest.starts_with(b"\n")
795        || rest.starts_with(b"\r"))
796    {
797        return None;
798    }
799
800    let ident_text = String::from_utf8_lossy(ident);
801    if is_action {
802        Some((
803            "invalid_bare_identifier_in_action_body",
804            format!("bare identifier `{ident_text}` is not a valid action body member"),
805            "action body member such as `perform`, `bind`, `in`, or `out`".to_string(),
806            format!(
807                "Use an explicit action-body form, for example `perform {ident_text};`, `bind ... = ...;`, or an `in`/`out` parameter declaration."
808            ),
809        ))
810    } else {
811        Some((
812            "invalid_bare_identifier_in_state_body",
813            format!("bare identifier `{ident_text}` is not a valid state body member"),
814            "state body member such as `entry`, `transition`, `then`, `state`, or `ref`"
815                .to_string(),
816            format!(
817                "Use an explicit state-body form, for example `then {ident_text};`, `transition ...;`, or a nested `state` member."
818            ),
819        ))
820    }
821}
822
823fn unexpected_closing_brace_parse_error(input: Input<'_>) -> ParseError {
824    ParseError::new("unexpected closing '}'")
825        .with_location(
826            input.location_offset(),
827            input.location_line(),
828            input.get_column(),
829        )
830        .with_length(1)
831        .with_code("unexpected_closing_brace")
832        .with_expected("valid declaration or end of current body")
833        .with_found("}")
834        .with_suggestion("Remove this '}' or add the missing opening '{' before it.")
835        .with_severity(DiagnosticSeverity::Error)
836        .with_category(DiagnosticCategory::ParseError)
837}
838
839fn missing_closing_brace_error(bytes: &[u8], input: Input<'_>) -> Option<ParseError> {
840    if !input.fragment().is_empty() {
841        return None;
842    }
843    let consumed = &bytes[..input.location_offset().min(bytes.len())];
844    let opens = consumed.iter().filter(|&&b| b == b'{').count();
845    let closes = consumed.iter().filter(|&&b| b == b'}').count();
846    if opens <= closes {
847        return None;
848    }
849    Some(missing_closing_brace_error_at_eof(consumed))
850}
851
852fn missing_closing_brace_error_at_eof(bytes: &[u8]) -> ParseError {
853    let (line, column) = eof_line_column(bytes);
854    ParseError::new("missing closing '}'")
855        .with_location(bytes.len(), line, column)
856        .with_length(1)
857        .with_code("missing_closing_brace")
858        .with_expected("'}'")
859        .with_suggestion("Add '}' to close the open body.")
860        .with_category(DiagnosticCategory::ParseError)
861}
862
863fn category_from_code(code: &str) -> DiagnosticCategory {
864    if code == "unsupported_annotation_syntax" {
865        DiagnosticCategory::UnsupportedGrammarForm
866    } else if code == "unresolved_symbol" {
867        DiagnosticCategory::UnresolvedSymbol
868    } else {
869        DiagnosticCategory::ParseError
870    }
871}
872
873fn has_unclosed_brace(bytes: &[u8]) -> bool {
874    let opens = bytes.iter().filter(|&&b| b == b'{').count();
875    let closes = bytes.iter().filter(|&&b| b == b'}').count();
876    opens > closes
877}
878
879fn eof_line_column(bytes: &[u8]) -> (u32, usize) {
880    let mut line = 1u32;
881    let mut column = 1usize;
882    for &b in bytes {
883        if b == b'\n' {
884            line += 1;
885            column = 1;
886        } else {
887            column += 1;
888        }
889    }
890    (line, column)
891}
892
893pub(crate) fn build_recovery_error_node(
894    input: Input<'_>,
895    starters: &[&[u8]],
896    scope_label: &str,
897    generic_code: &str,
898) -> ParseErrorNode {
899    build_recovery_error_node_from_span(input, input, starters, scope_label, generic_code)
900}
901
902enum RecoveryClassification {
903    MissingMemberName {
904        code: String,
905        message: String,
906        expected: String,
907        suggestion: String,
908    },
909    MissingTypeReference {
910        code: String,
911        message: String,
912        expected: String,
913        suggestion: String,
914    },
915    InvalidQualifiedNameSeparator {
916        code: String,
917        message: String,
918        expected: String,
919        suggestion: String,
920    },
921    MissingBodyOrSemicolon {
922        code: String,
923        message: String,
924        expected: String,
925        suggestion: String,
926    },
927    MissingExpressionAfterOperator {
928        code: String,
929        message: String,
930        expected: String,
931        suggestion: String,
932    },
933    InvalidUnitReference {
934        code: String,
935        message: String,
936        expected: String,
937        suggestion: String,
938    },
939    InvalidTypingOperator {
940        code: String,
941        message: String,
942        expected: String,
943        suggestion: String,
944    },
945    InvalidBareIdentifierInBody {
946        code: String,
947        message: String,
948        expected: String,
949        suggestion: String,
950    },
951    UnexpectedKeywordInScope {
952        code: String,
953        message: String,
954        expected: String,
955        suggestion: String,
956    },
957    MissingSemicolon,
958    UnsupportedAnnotation,
959    Unexpected,
960}
961
962fn trim_ascii_end(mut fragment: &[u8]) -> &[u8] {
963    while let Some(last) = fragment.last() {
964        if last.is_ascii_whitespace() {
965            fragment = &fragment[..fragment.len() - 1];
966        } else {
967            break;
968        }
969    }
970    fragment
971}
972
973fn classify_recovery(
974    input: Input<'_>,
975    recovery_end: Input<'_>,
976    starters: &[&[u8]],
977    scope_label: &str,
978) -> RecoveryClassification {
979    let trimmed = trim_ascii_start(input.fragment());
980
981    if let Some((code, message, expected, suggestion)) = missing_name_diagnostic(trimmed) {
982        return RecoveryClassification::MissingMemberName {
983            code: code.to_string(),
984            message,
985            expected,
986            suggestion,
987        };
988    }
989
990    if let Some((code, message, expected, suggestion)) = missing_type_diagnostic(trimmed) {
991        return RecoveryClassification::MissingTypeReference {
992            code: code.to_string(),
993            message,
994            expected,
995            suggestion,
996        };
997    }
998
999    if let Some((code, message, expected, suggestion)) =
1000        invalid_expose_separator_diagnostic(trimmed)
1001    {
1002        return RecoveryClassification::InvalidQualifiedNameSeparator {
1003            code: code.to_string(),
1004            message,
1005            expected,
1006            suggestion,
1007        };
1008    }
1009
1010    if let Some((code, message, expected, suggestion)) = invalid_typing_operator_diagnostic(trimmed)
1011    {
1012        return RecoveryClassification::InvalidTypingOperator {
1013            code: code.to_string(),
1014            message,
1015            expected,
1016            suggestion,
1017        };
1018    }
1019
1020    if let Some((code, message, expected, suggestion)) =
1021        missing_expression_after_operator_diagnostic(trimmed)
1022    {
1023        return RecoveryClassification::MissingExpressionAfterOperator {
1024            code: code.to_string(),
1025            message,
1026            expected,
1027            suggestion,
1028        };
1029    }
1030
1031    if let Some((code, message, expected, suggestion)) = invalid_unit_reference_diagnostic(trimmed)
1032    {
1033        return RecoveryClassification::InvalidUnitReference {
1034            code: code.to_string(),
1035            message,
1036            expected,
1037            suggestion,
1038        };
1039    }
1040
1041    if let Some((code, message, expected, suggestion)) =
1042        missing_semicolon_or_body_diagnostic(trimmed)
1043    {
1044        return RecoveryClassification::MissingBodyOrSemicolon {
1045            code: code.to_string(),
1046            message,
1047            expected,
1048            suggestion,
1049        };
1050    }
1051
1052    let consumed_len = recovery_end
1053        .location_offset()
1054        .saturating_sub(input.location_offset())
1055        .min(input.fragment().len());
1056    let raw_consumed = &input.fragment()[..consumed_len];
1057    let consumed = trim_ascii_end(raw_consumed);
1058    let recovered_to_boundary = recovery_end.location_offset() > input.location_offset() && {
1059        let (next, _) = lex::ws_and_comments(recovery_end).unwrap_or((recovery_end, ()));
1060        next.fragment().is_empty()
1061            || next.fragment().starts_with(b"}")
1062            || lex::starts_with_any_keyword(next.fragment(), starters)
1063    };
1064
1065    let consumed_has_newline = raw_consumed.contains(&b'\n') || raw_consumed.contains(&b'\r');
1066    let first_line_end = consumed
1067        .iter()
1068        .position(|b| matches!(*b, b'\n' | b'\r'))
1069        .unwrap_or(consumed.len());
1070    let first_line = trim_ascii_end(&consumed[..first_line_end]);
1071    let consumed_has_delimiters = consumed
1072        .iter()
1073        .any(|b| matches!(*b, b'{' | b'}' | b'(' | b')' | b'[' | b']'));
1074    let consumed_ends_incomplete = first_line.last().is_some_and(|b| {
1075        matches!(
1076            *b,
1077            b':' | b'=' | b',' | b'.' | b'+' | b'-' | b'*' | b'/' | b'>' | b'<' | b'|'
1078        )
1079    });
1080    let first_line_has_semicolon = first_line.contains(&b';');
1081    if recovered_to_boundary
1082        && lex::starts_with_any_keyword(trimmed, starters)
1083        && (consumed_has_newline || recovery_end.fragment().starts_with(b"}"))
1084        && !consumed.is_empty()
1085        && !consumed_has_delimiters
1086        && !consumed_ends_incomplete
1087        && !first_line_has_semicolon
1088    {
1089        return RecoveryClassification::MissingSemicolon;
1090    }
1091
1092    if lex::starts_with_keyword(trimmed, b"#") || lex::starts_with_keyword(trimmed, b"@") {
1093        return RecoveryClassification::UnsupportedAnnotation;
1094    }
1095
1096    if let Some((code, message, expected, suggestion)) =
1097        invalid_bare_identifier_in_body_diagnostic(trimmed, scope_label)
1098    {
1099        return RecoveryClassification::InvalidBareIdentifierInBody {
1100            code: code.to_string(),
1101            message,
1102            expected,
1103            suggestion,
1104        };
1105    }
1106
1107    if let Some((code, message, expected, suggestion)) =
1108        unexpected_keyword_in_scope_diagnostic(trimmed, starters, scope_label)
1109    {
1110        return RecoveryClassification::UnexpectedKeywordInScope {
1111            code: code.to_string(),
1112            message,
1113            expected,
1114            suggestion,
1115        };
1116    }
1117
1118    RecoveryClassification::Unexpected
1119}
1120
1121pub(crate) fn build_recovery_error_node_from_span(
1122    input: Input<'_>,
1123    recovery_end: Input<'_>,
1124    starters: &[&[u8]],
1125    scope_label: &str,
1126    generic_code: &str,
1127) -> ParseErrorNode {
1128    match classify_recovery(input, recovery_end, starters, scope_label) {
1129        RecoveryClassification::MissingMemberName {
1130            code,
1131            message,
1132            expected,
1133            suggestion,
1134        }
1135        | RecoveryClassification::MissingTypeReference {
1136            code,
1137            message,
1138            expected,
1139            suggestion,
1140        }
1141        | RecoveryClassification::InvalidQualifiedNameSeparator {
1142            code,
1143            message,
1144            expected,
1145            suggestion,
1146        }
1147        | RecoveryClassification::MissingBodyOrSemicolon {
1148            code,
1149            message,
1150            expected,
1151            suggestion,
1152        }
1153        | RecoveryClassification::MissingExpressionAfterOperator {
1154            code,
1155            message,
1156            expected,
1157            suggestion,
1158        }
1159        | RecoveryClassification::InvalidUnitReference {
1160            code,
1161            message,
1162            expected,
1163            suggestion,
1164        }
1165        | RecoveryClassification::InvalidTypingOperator {
1166            code,
1167            message,
1168            expected,
1169            suggestion,
1170        }
1171        | RecoveryClassification::InvalidBareIdentifierInBody {
1172            code,
1173            message,
1174            expected,
1175            suggestion,
1176        }
1177        | RecoveryClassification::UnexpectedKeywordInScope {
1178            code,
1179            message,
1180            expected,
1181            suggestion,
1182        } => ParseErrorNode {
1183            message,
1184            code,
1185            expected: Some(expected),
1186            found: recovery_found_snippet_from_span(input, recovery_end),
1187            suggestion: Some(suggestion),
1188            category: Some(DiagnosticCategory::ParseError),
1189        },
1190        RecoveryClassification::MissingSemicolon => ParseErrorNode {
1191            message: "missing semicolon before next declaration".to_string(),
1192            code: "missing_semicolon".to_string(),
1193            expected: Some("';'".to_string()),
1194            found: recovery_found_snippet_from_span(input, recovery_end),
1195            suggestion: Some("Insert ';' before this declaration.".to_string()),
1196            category: Some(DiagnosticCategory::ParseError),
1197        },
1198        RecoveryClassification::UnsupportedAnnotation => ParseErrorNode {
1199            message: format!("unsupported annotation syntax in {scope_label}"),
1200            code: "unsupported_annotation_syntax".to_string(),
1201            expected: Some(format!("valid {scope_label} element")),
1202            found: recovery_found_snippet_from_span(input, recovery_end),
1203            suggestion: Some(
1204                "Remove this annotation or extend the parser to support annotated declarations."
1205                    .to_string(),
1206            ),
1207            category: Some(DiagnosticCategory::UnsupportedGrammarForm),
1208        },
1209        RecoveryClassification::Unexpected => ParseErrorNode {
1210            message: format!("unexpected token in {scope_label}"),
1211            code: generic_code.to_string(),
1212            expected: Some(format!("valid {scope_label} element")),
1213            found: recovery_found_snippet_from_span(input, recovery_end),
1214            suggestion: Some(format!("Fix this {scope_label} member and re-run parsing.")),
1215            category: Some(DiagnosticCategory::ParseError),
1216        },
1217    }
1218}
1219
1220fn parse_error_from_recovery_node(span: &crate::ast::Span, node: &ParseErrorNode) -> ParseError {
1221    let mut err = ParseError::new(node.message.clone())
1222        .with_location(span.offset, span.line, span.column)
1223        .with_length(span.len.max(1))
1224        .with_code(node.code.clone())
1225        .with_category(
1226            node.category
1227                .unwrap_or_else(|| category_from_code(node.code.as_str())),
1228        );
1229    let severity = if node.code == "unsupported_annotation_syntax" {
1230        DiagnosticSeverity::Warning
1231    } else {
1232        DiagnosticSeverity::Error
1233    };
1234    err = err.with_severity(severity);
1235    if let Some(expected) = &node.expected {
1236        err = err.with_expected(expected.clone());
1237    }
1238    if let Some(found) = &node.found {
1239        err = err.with_found(found.clone());
1240    }
1241    if let Some(suggestion) = &node.suggestion {
1242        err = err.with_suggestion(suggestion.clone());
1243    }
1244    err
1245}
1246
1247fn diagnostic_specificity(err: &ParseError) -> u8 {
1248    match err.code.as_deref() {
1249        Some("missing_member_name")
1250        | Some("missing_type_reference")
1251        | Some("invalid_qualified_name_separator")
1252        | Some("invalid_typing_operator")
1253        | Some("missing_expression_after_operator")
1254        | Some("invalid_unit_reference")
1255        | Some("missing_body_or_semicolon")
1256        | Some("missing_semicolon")
1257        | Some("unexpected_closing_brace")
1258        | Some("missing_closing_brace")
1259        | Some("unsupported_annotation_syntax")
1260        | Some("invalid_bare_identifier_in_action_body")
1261        | Some("invalid_bare_identifier_in_state_body")
1262        | Some("recovery_cascade_suppressed")
1263        | Some("unexpected_keyword_in_scope") => 5,
1264        Some("illegal_top_level_definition") => 4,
1265        Some(code) if code.starts_with("recovered_") => 2,
1266        Some("expected_end_of_input") | Some("expected_keyword") => 1,
1267        _ => 3,
1268    }
1269}
1270
1271fn dedup_errors(mut errors: Vec<ParseError>) -> Vec<ParseError> {
1272    errors.sort_by_key(|e| {
1273        (
1274            e.offset.unwrap_or(usize::MAX),
1275            e.line.unwrap_or(u32::MAX),
1276            e.column.unwrap_or(usize::MAX),
1277            std::cmp::Reverse(diagnostic_specificity(e)),
1278        )
1279    });
1280
1281    let mut deduped = Vec::new();
1282    for err in errors {
1283        let duplicate = deduped.iter().any(|existing: &ParseError| {
1284            let same_start = existing.offset == err.offset
1285                && existing.line == err.line
1286                && existing.column == err.column;
1287            let same_found = existing.found == err.found;
1288            let existing_specificity = diagnostic_specificity(existing);
1289            let err_specificity = diagnostic_specificity(&err);
1290            same_start
1291                && (same_found || existing.code == err.code)
1292                && existing_specificity >= err_specificity
1293        });
1294        if !duplicate {
1295            deduped.push(err);
1296        }
1297    }
1298
1299    deduped.sort_by_key(|e| (e.offset.unwrap_or(usize::MAX), e.line.unwrap_or(u32::MAX)));
1300    deduped
1301}
1302
1303fn is_cascade_candidate(err: &ParseError) -> bool {
1304    matches!(err.code.as_deref(), Some("missing_semicolon"))
1305        || err
1306            .code
1307            .as_deref()
1308            .is_some_and(|code| code.starts_with("recovered_"))
1309}
1310
1311fn cascade_family(err: &ParseError) -> Option<&str> {
1312    if matches!(err.code.as_deref(), Some("missing_semicolon")) {
1313        Some("missing_semicolon")
1314    } else if err
1315        .code
1316        .as_deref()
1317        .is_some_and(|code| code.starts_with("recovered_"))
1318    {
1319        Some("recovered")
1320    } else {
1321        None
1322    }
1323}
1324
1325fn make_cascade_summary(run: &[ParseError]) -> Option<ParseError> {
1326    let summary_anchor = run.first()?;
1327    let suppressed = run.len().saturating_sub(3);
1328    let family = cascade_family(summary_anchor).unwrap_or("recovery");
1329    let mut err = ParseError::new(format!(
1330        "suppressed {suppressed} cascading {family} diagnostic{} after earlier recovery errors",
1331        if suppressed == 1 { "" } else { "s" }
1332    ))
1333    .with_location(
1334        summary_anchor.offset?,
1335        summary_anchor.line?,
1336        summary_anchor.column?,
1337    )
1338    .with_length(summary_anchor.length.unwrap_or(1).max(1))
1339    .with_code("recovery_cascade_suppressed")
1340    .with_expected("fix the first syntax error in this body")
1341    .with_suggestion(
1342        "Fix the earliest diagnostic in this body first; later syntax errors may be cascades.",
1343    )
1344    .with_severity(DiagnosticSeverity::Warning)
1345    .with_category(DiagnosticCategory::ParseError);
1346    if let Some(found) = &summary_anchor.found {
1347        err = err.with_found(found.clone());
1348    }
1349    Some(err)
1350}
1351
1352fn suppress_diagnostic_cascades(errors: Vec<ParseError>) -> Vec<ParseError> {
1353    const MAX_UNSUMMARIZED_CASCADE: usize = 3;
1354
1355    let mut output = Vec::new();
1356    let mut run: Vec<ParseError> = Vec::new();
1357
1358    let flush_run = |run: &mut Vec<ParseError>, output: &mut Vec<ParseError>| {
1359        if run.len() <= MAX_UNSUMMARIZED_CASCADE {
1360            output.append(run);
1361        } else {
1362            output.extend(run.drain(..MAX_UNSUMMARIZED_CASCADE));
1363            if let Some(summary) = make_cascade_summary(run) {
1364                output.push(summary);
1365            }
1366            run.clear();
1367        }
1368    };
1369
1370    for err in errors {
1371        let continues_run = run.last().is_some_and(|previous| {
1372            is_cascade_candidate(&err)
1373                && cascade_family(previous) == cascade_family(&err)
1374                && previous.line.zip(err.line).is_some_and(|(a, b)| b <= a + 1)
1375        });
1376
1377        if is_cascade_candidate(&err) && (run.is_empty() || continues_run) {
1378            run.push(err);
1379        } else {
1380            flush_run(&mut run, &mut output);
1381            if is_cascade_candidate(&err) {
1382                run.push(err);
1383            } else {
1384                output.push(err);
1385            }
1386        }
1387    }
1388    flush_run(&mut run, &mut output);
1389    output.sort_by_key(|e| (e.offset.unwrap_or(usize::MAX), e.line.unwrap_or(u32::MAX)));
1390    output
1391}
1392
1393fn root_body_recovery_error(input: Input<'_>, scope: &str) -> ParseError {
1394    let (found, len) = fragment_to_found_snippet(input.fragment());
1395    let mut err = ParseError::new(format!(
1396        "could not parse {scope} body; skipped to next root element"
1397    ))
1398    .with_location(
1399        input.location_offset(),
1400        input.location_line(),
1401        input.get_column(),
1402    )
1403    .with_length(len.max(1))
1404    .with_code("recovered_root_body")
1405    .with_expected(format!("valid {scope} body"))
1406    .with_suggestion(
1407        "Fix the first syntax error in this body; later root-level diagnostics may be cascades.",
1408    )
1409    .with_severity(DiagnosticSeverity::Error)
1410    .with_category(DiagnosticCategory::ParseError);
1411    if !found.is_empty() {
1412        err = err.with_found(found);
1413    }
1414    err
1415}
1416
1417fn root_body_scope(fragment: &[u8]) -> Option<&'static str> {
1418    let fragment = trim_ascii_start(fragment);
1419    if lex::starts_with_keyword(fragment, b"package")
1420        || lex::starts_with_keyword(fragment, b"library")
1421        || lex::starts_with_keyword(fragment, b"standard")
1422    {
1423        Some("package")
1424    } else if lex::starts_with_keyword(fragment, b"namespace") {
1425        Some("namespace")
1426    } else {
1427        None
1428    }
1429}
1430
1431fn collect_requirement_body_errors(body: &RequirementDefBody, errors: &mut Vec<ParseError>) {
1432    if let RequirementDefBody::Brace { elements } = body {
1433        for element in elements {
1434            match &element.value {
1435                RequirementDefBodyElement::Error(n) => {
1436                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1437                }
1438                RequirementDefBodyElement::Frame(n) => {
1439                    collect_requirement_body_errors(&n.value.body, errors)
1440                }
1441                _ => {}
1442            }
1443        }
1444    }
1445}
1446
1447fn collect_action_def_body_errors(body: &ActionDefBody, errors: &mut Vec<ParseError>) {
1448    if let ActionDefBody::Brace { elements } = body {
1449        for element in elements {
1450            if let ActionDefBodyElement::Error(n) = &element.value {
1451                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1452            }
1453        }
1454    }
1455}
1456
1457fn collect_action_usage_body_errors(body: &ActionUsageBody, errors: &mut Vec<ParseError>) {
1458    if let ActionUsageBody::Brace { elements } = body {
1459        for element in elements {
1460            match &element.value {
1461                ActionUsageBodyElement::Error(n) => {
1462                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1463                }
1464                ActionUsageBodyElement::ActionUsage(n) => {
1465                    collect_action_usage_body_errors(&n.value.body, errors)
1466                }
1467                _ => {}
1468            }
1469        }
1470    }
1471}
1472
1473fn collect_state_body_errors(body: &StateDefBody, errors: &mut Vec<ParseError>) {
1474    if let StateDefBody::Brace { elements } = body {
1475        for element in elements {
1476            match &element.value {
1477                StateDefBodyElement::Error(n) => {
1478                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1479                }
1480                StateDefBodyElement::Entry(n) => collect_state_body_errors(&n.value.body, errors),
1481                StateDefBodyElement::RequirementUsage(n) => {
1482                    collect_requirement_body_errors(&n.value.body, errors)
1483                }
1484                StateDefBodyElement::StateUsage(n) => {
1485                    collect_state_body_errors(&n.value.body, errors)
1486                }
1487                _ => {}
1488            }
1489        }
1490    }
1491}
1492
1493fn collect_use_case_body_errors(body: &UseCaseDefBody, errors: &mut Vec<ParseError>) {
1494    if let UseCaseDefBody::Brace { elements } = body {
1495        for element in elements {
1496            if let UseCaseDefBodyElement::Error(n) = &element.value {
1497                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1498            }
1499        }
1500    }
1501}
1502
1503fn collect_constraint_body_errors(body: &ConstraintDefBody, errors: &mut Vec<ParseError>) {
1504    if let ConstraintDefBody::Brace { elements } = body {
1505        for element in elements {
1506            if let ConstraintDefBodyElement::Error(n) = &element.value {
1507                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1508            }
1509        }
1510    }
1511}
1512
1513fn collect_calc_body_errors(body: &CalcDefBody, errors: &mut Vec<ParseError>) {
1514    if let CalcDefBody::Brace { elements } = body {
1515        for element in elements {
1516            if let CalcDefBodyElement::Error(n) = &element.value {
1517                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1518            }
1519        }
1520    }
1521}
1522
1523fn collect_view_def_body_errors(body: &ViewDefBody, errors: &mut Vec<ParseError>) {
1524    if let ViewDefBody::Brace { elements } = body {
1525        for element in elements {
1526            if let ViewDefBodyElement::Error(n) = &element.value {
1527                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1528            }
1529        }
1530    }
1531}
1532
1533fn collect_view_body_errors(body: &ViewBody, errors: &mut Vec<ParseError>) {
1534    if let ViewBody::Brace { elements } = body {
1535        for element in elements {
1536            if let ViewBodyElement::Error(n) = &element.value {
1537                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1538            }
1539        }
1540    }
1541}
1542
1543fn collect_part_def_body_errors(body: &PartDefBody, errors: &mut Vec<ParseError>) {
1544    if let PartDefBody::Brace { elements } = body {
1545        for element in elements {
1546            match &element.value {
1547                PartDefBodyElement::Error(n) => {
1548                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1549                }
1550                PartDefBodyElement::PartUsage(n) => {
1551                    collect_part_usage_body_errors(&n.value.body, errors)
1552                }
1553                PartDefBodyElement::Perform(n) => {
1554                    collect_perform_body_errors(&n.value.body, errors)
1555                }
1556                _ => {}
1557            }
1558        }
1559    }
1560}
1561
1562fn collect_perform_body_errors(body: &crate::ast::PerformBody, _errors: &mut Vec<ParseError>) {
1563    match body {
1564        crate::ast::PerformBody::Semicolon => {}
1565        crate::ast::PerformBody::Brace { .. } => {}
1566    }
1567}
1568
1569fn collect_part_usage_body_errors(body: &PartUsageBody, errors: &mut Vec<ParseError>) {
1570    if let PartUsageBody::Brace { elements } = body {
1571        for element in elements {
1572            match &element.value {
1573                PartUsageBodyElement::Error(n) => {
1574                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1575                }
1576                PartUsageBodyElement::PartUsage(n) => {
1577                    collect_part_usage_body_errors(&n.value.body, errors)
1578                }
1579                PartUsageBodyElement::Perform(n) => {
1580                    collect_perform_body_errors(&n.value.body, errors)
1581                }
1582                PartUsageBodyElement::StateUsage(n) => {
1583                    collect_state_body_errors(&n.value.body, errors)
1584                }
1585                _ => {}
1586            }
1587        }
1588    }
1589}
1590
1591fn collect_package_body_errors(body: &PackageBody, errors: &mut Vec<ParseError>) {
1592    if let PackageBody::Brace { elements } = body {
1593        for element in elements {
1594            match &element.value {
1595                PackageBodyElement::Error(n) => {
1596                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1597                }
1598                PackageBodyElement::Package(n) => {
1599                    collect_package_body_errors(&n.value.body, errors)
1600                }
1601                PackageBodyElement::LibraryPackage(n) => {
1602                    collect_package_body_errors(&n.value.body, errors)
1603                }
1604                PackageBodyElement::PartDef(n) => {
1605                    collect_part_def_body_errors(&n.value.body, errors)
1606                }
1607                PackageBodyElement::PartUsage(n) => {
1608                    collect_part_usage_body_errors(&n.value.body, errors)
1609                }
1610                PackageBodyElement::ActionDef(n) => {
1611                    collect_action_def_body_errors(&n.value.body, errors)
1612                }
1613                PackageBodyElement::ActionUsage(n) => {
1614                    collect_action_usage_body_errors(&n.value.body, errors)
1615                }
1616                PackageBodyElement::RequirementDef(n) => {
1617                    collect_requirement_body_errors(&n.value.body, errors)
1618                }
1619                PackageBodyElement::RequirementUsage(n) => {
1620                    collect_requirement_body_errors(&n.value.body, errors)
1621                }
1622                PackageBodyElement::UseCaseDef(n) => {
1623                    collect_use_case_body_errors(&n.value.body, errors)
1624                }
1625                PackageBodyElement::UseCaseUsage(n) => {
1626                    collect_use_case_body_errors(&n.value.body, errors)
1627                }
1628                PackageBodyElement::ConcernUsage(n) => {
1629                    collect_requirement_body_errors(&n.value.body, errors)
1630                }
1631                PackageBodyElement::StateDef(n) => collect_state_body_errors(&n.value.body, errors),
1632                PackageBodyElement::StateUsage(n) => {
1633                    collect_state_body_errors(&n.value.body, errors)
1634                }
1635                PackageBodyElement::ConstraintDef(n) => {
1636                    collect_constraint_body_errors(&n.value.body, errors)
1637                }
1638                PackageBodyElement::CalcDef(n) => collect_calc_body_errors(&n.value.body, errors),
1639                PackageBodyElement::ViewDef(n) => {
1640                    collect_view_def_body_errors(&n.value.body, errors)
1641                }
1642                PackageBodyElement::ViewUsage(n) => collect_view_body_errors(&n.value.body, errors),
1643                _ => {}
1644            }
1645        }
1646    }
1647}
1648
1649fn collect_recovery_errors(root: &RootNamespace) -> Vec<ParseError> {
1650    let mut errors = Vec::new();
1651    for element in &root.elements {
1652        match &element.value {
1653            crate::ast::RootElement::Package(n) => {
1654                collect_package_body_errors(&n.value.body, &mut errors)
1655            }
1656            crate::ast::RootElement::LibraryPackage(n) => {
1657                collect_package_body_errors(&n.value.body, &mut errors)
1658            }
1659            crate::ast::RootElement::Namespace(n) => {
1660                collect_package_body_errors(&n.value.body, &mut errors)
1661            }
1662            crate::ast::RootElement::Import(_) => {}
1663        }
1664    }
1665    errors
1666}
1667
1668/// Parse full input; must consume entire input. Strips UTF-8 BOM if present.
1669#[allow(clippy::result_large_err)]
1670pub fn parse_root(input: &str) -> Result<RootNamespace, ParseError> {
1671    let bytes = input
1672        .strip_prefix('\u{FEFF}')
1673        .map(str::as_bytes)
1674        .unwrap_or_else(|| input.as_bytes());
1675    let located = LocatedSpan::new(bytes);
1676    match package::root_namespace(located) {
1677        Ok((rest, root)) => {
1678            if !rest.fragment().is_empty() && has_unclosed_brace(bytes) {
1679                return Err(missing_closing_brace_error_at_eof(bytes));
1680            }
1681            if rest.fragment().is_empty() {
1682                log::debug!("parse_root: success, {} top-level elements", root.elements.len());
1683                Ok(root)
1684            } else if trim_ascii_start(rest.fragment()).starts_with(b"}") {
1685                Err(unexpected_closing_brace_parse_error(rest))
1686            } else {
1687                let offset = located.location_offset() + located.fragment().len() - rest.fragment().len();
1688                let unconsumed = rest.fragment();
1689                let first_80 = unconsumed.get(..80.min(unconsumed.len())).unwrap_or(unconsumed);
1690                log::debug!(
1691                    "parse_root: expected end of input; parsed {} elements; unconsumed len={}, offset={}, first 80 bytes: {:?}",
1692                    root.elements.len(),
1693                    unconsumed.len(),
1694                    offset,
1695                    first_80,
1696                );
1697                log::debug!(
1698                    "parse_root: unconsumed as str: {:?}",
1699                    String::from_utf8_lossy(first_80),
1700                );
1701                let (found_snippet, found_len) = fragment_to_found_snippet(rest.fragment());
1702                let mut pe = ParseError::new("expected end of input")
1703                    .with_location(offset, rest.location_line(), rest.get_column())
1704                    .with_length(found_len.max(1))
1705                    .with_code("expected_end_of_input")
1706                    .with_category(DiagnosticCategory::ParseError);
1707                if !found_snippet.is_empty() {
1708                    pe = pe.with_found(found_snippet);
1709                }
1710                if root.elements.is_empty() && is_illegal_top_level_definition(rest.fragment()) {
1711                    pe = pe
1712                        .with_code("illegal_top_level_definition")
1713                        .with_expected("'package', 'namespace', or 'import'")
1714                        .with_suggestion(
1715                            "Wrap this declaration in `package ... { ... }` or `namespace ... { ... }`.",
1716                        );
1717                    pe.message = "illegal top-level definition".to_string();
1718                }
1719                Err(pe)
1720            }
1721        }
1722        Err(nom::Err::Error(e)) => Err(missing_closing_brace_error(bytes, e.input).unwrap_or_else(|| {
1723            nom_err_to_parse_error(
1724                &e,
1725                None,
1726                Some("'package', 'namespace', or 'import' at top level; or valid element in package body"),
1727            )
1728        })),
1729        Err(nom::Err::Failure(e)) => Err(missing_closing_brace_error(bytes, e.input).unwrap_or_else(|| {
1730            nom_err_to_parse_error(
1731                &e,
1732                None,
1733                Some("'package', 'namespace', or 'import' at top level; or valid element in package body"),
1734            )
1735        })),
1736        Err(nom::Err::Incomplete(_)) => Err(
1737            ParseError::new("unexpected end of input")
1738                .with_code("unexpected_eof")
1739                .with_category(DiagnosticCategory::ParseError),
1740        ),
1741    }
1742}
1743
1744const MAX_RECOVERY_ERRORS: usize = 100;
1745
1746/// Parse input with error recovery: collects multiple diagnostics and returns a partial AST when errors occur.
1747/// Use this for language servers so the user sees all parse errors and features (e.g. hover) can use the partial AST.
1748pub fn parse_with_diagnostics(input: &str) -> ParseResult {
1749    let bytes = input
1750        .strip_prefix('\u{FEFF}')
1751        .map(str::as_bytes)
1752        .unwrap_or_else(|| input.as_bytes());
1753    let located = LocatedSpan::new(bytes);
1754
1755    let mut elements = Vec::new();
1756    let mut errors = Vec::new();
1757
1758    let (mut input, _) = match lex::ws_and_comments(located) {
1759        Ok(x) => x,
1760        Err(_) => {
1761            return ParseResult {
1762                root: RootNamespace { elements: vec![] },
1763                errors: vec![ParseError::new("invalid input")
1764                    .with_code("invalid_input")
1765                    .with_category(DiagnosticCategory::ParseError)],
1766            };
1767        }
1768    };
1769
1770    while errors.len() < MAX_RECOVERY_ERRORS {
1771        // Skip leading ws/comments; if nothing left, we're done (avoids parsing "" as root_element).
1772        let (rest, _) = lex::ws_and_comments(input).unwrap_or((input, ()));
1773        input = rest;
1774        if input.fragment().is_empty() {
1775            break;
1776        }
1777        match package::root_element(input) {
1778            Ok((rest, elem)) => {
1779                elements.push(elem);
1780                input = rest;
1781            }
1782            Err(nom::Err::Error(e)) | Err(nom::Err::Failure(e)) => {
1783                let (trimmed, _) = lex::ws_and_comments(input).unwrap_or((input, ()));
1784                if trim_ascii_start(trimmed.fragment()).starts_with(b"}") {
1785                    errors.push(unexpected_closing_brace_parse_error(trimmed));
1786                    let skip_result = lex::skip_to_next_sync_point(trimmed);
1787                    match skip_result {
1788                        Ok((rest, _)) => input = rest,
1789                        Err(_) => break,
1790                    }
1791                    continue;
1792                }
1793                if errors.is_empty()
1794                    && has_unclosed_brace(bytes)
1795                    && (lex::starts_with_keyword(trimmed.fragment(), b"package")
1796                        || lex::starts_with_keyword(trimmed.fragment(), b"namespace")
1797                        || lex::starts_with_keyword(trimmed.fragment(), b"library")
1798                        || lex::starts_with_keyword(trimmed.fragment(), b"standard"))
1799                {
1800                    errors.push(missing_closing_brace_error_at_eof(bytes));
1801                    break;
1802                }
1803                if let Some(scope) = root_body_scope(input.fragment()) {
1804                    let (error_input, _) = lex::ws_and_comments(e.input).unwrap_or((e.input, ()));
1805                    if error_input.fragment().starts_with(b"{") {
1806                        errors.push(root_body_recovery_error(error_input, scope));
1807                        match lex::skip_statement_or_block(error_input) {
1808                            Ok((rest, _))
1809                                if rest.location_offset() > error_input.location_offset() =>
1810                            {
1811                                input = rest;
1812                                continue;
1813                            }
1814                            _ => {}
1815                        }
1816                    }
1817                }
1818                let pe = missing_closing_brace_error(bytes, e.input).unwrap_or_else(|| {
1819                    nom_err_to_parse_error(&e, None, Some("'package', 'namespace', or 'import'"))
1820                });
1821                errors.push(pe);
1822                let skip_result = lex::skip_to_next_sync_point(e.input);
1823                match skip_result {
1824                    Ok((rest, _)) => input = rest,
1825                    Err(_) => break,
1826                }
1827            }
1828            Err(nom::Err::Incomplete(_)) => {
1829                errors.push(
1830                    ParseError::new("unexpected end of input")
1831                        .with_location(
1832                            input.location_offset(),
1833                            input.location_line(),
1834                            input.get_column(),
1835                        )
1836                        .with_length(1)
1837                        .with_code("unexpected_eof")
1838                        .with_category(DiagnosticCategory::ParseError),
1839                );
1840                break;
1841            }
1842        }
1843    }
1844
1845    let (input, _) = lex::ws_and_comments(input).unwrap_or((input, ()));
1846
1847    if input.fragment().is_empty()
1848        && has_unclosed_brace(bytes)
1849        && !errors
1850            .iter()
1851            .any(|e| e.code.as_deref() == Some("missing_closing_brace"))
1852    {
1853        errors.push(missing_closing_brace_error_at_eof(bytes));
1854    }
1855
1856    if !input.fragment().is_empty()
1857        && !errors
1858            .iter()
1859            .any(|e| e.code.as_deref() == Some("missing_closing_brace"))
1860    {
1861        if trim_ascii_start(input.fragment()).starts_with(b"}") {
1862            errors.push(unexpected_closing_brace_parse_error(input));
1863        } else {
1864            let (found_snippet, found_len) = fragment_to_found_snippet(input.fragment());
1865            let mut pe = ParseError::new("expected end of input")
1866                .with_location(
1867                    input.location_offset(),
1868                    input.location_line(),
1869                    input.get_column(),
1870                )
1871                .with_length(found_len.max(1))
1872                .with_code("expected_end_of_input")
1873                .with_severity(DiagnosticSeverity::Error)
1874                .with_category(DiagnosticCategory::ParseError);
1875            if !found_snippet.is_empty() {
1876                pe = pe.with_found(found_snippet);
1877            }
1878            errors.push(pe);
1879        }
1880    }
1881
1882    errors.extend(collect_recovery_errors(&RootNamespace {
1883        elements: elements.clone(),
1884    }));
1885    errors = dedup_errors(errors);
1886    errors = suppress_diagnostic_cascades(errors);
1887
1888    ParseResult {
1889        root: RootNamespace { elements },
1890        errors,
1891    }
1892}