Skip to main content

sysml_v2_parser/parser/
mod.rs

1//! Nom-based parser for SysML v2 textual notation.
2//!
3//! Organized into modules:
4//! - [lex]: whitespace, comments, names, qualified names, skip helpers
5//! - [attribute]: attribute definition and usage
6//! - [import]: import and relationship body
7//! - [part]: part definition and part usage
8//! - [package]: package and root namespace
9
10mod action;
11mod alias;
12mod allocation;
13mod attribute;
14mod bnf_surface;
15mod body;
16mod case;
17mod connection;
18mod constraint;
19mod definition_prefix;
20mod dependency;
21mod enumeration;
22mod expr;
23mod flow;
24mod import;
25mod individual;
26mod interface;
27mod item;
28mod lex;
29mod metadata;
30mod metadata_annotation;
31mod occurrence;
32mod package;
33mod part;
34mod port;
35mod requirement;
36mod span;
37mod specialization;
38mod state;
39mod usage;
40mod usecase;
41mod view;
42
43use crate::ast::{
44    ActionDefBody, ActionDefBodyElement, ActionUsageBody, ActionUsageBodyElement, CalcDefBody,
45    CalcDefBodyElement, ConstraintDefBody, ConstraintDefBodyElement, PackageBody,
46    PackageBodyElement, ParseErrorNode, PartDefBody, PartDefBodyElement, PartUsageBody,
47    PartUsageBodyElement, RequirementDefBody, RequirementDefBodyElement, RootNamespace,
48    StateDefBody, StateDefBodyElement, UseCaseDefBody, UseCaseDefBodyElement, ViewBody,
49    ViewBodyElement, ViewDefBody, ViewDefBodyElement,
50};
51use crate::error::{DiagnosticCategory, DiagnosticSeverity, ParseError};
52use nom::error::Error;
53use nom_locate::LocatedSpan;
54pub(crate) use span::{node_from_to, span_from_to, with_span, Input};
55
56/// Result of parsing with error recovery: a (possibly partial) AST and zero or more diagnostics.
57#[derive(Debug, Clone)]
58pub struct ParseResult {
59    /// Root namespace; contains all successfully parsed top-level elements (partial when errors occurred).
60    pub root: RootNamespace,
61    /// All parse errors encountered (multiple when recovery is used).
62    pub errors: Vec<ParseError>,
63}
64
65impl ParseResult {
66    /// True if the document parsed fully with no errors.
67    pub fn is_ok(&self) -> bool {
68        self.errors.is_empty()
69    }
70}
71
72const FOUND_SNIPPET_MAX_LEN: usize = 40;
73const ILLEGAL_TOP_LEVEL_STARTERS: &[&[u8]] = &[
74    b"action",
75    b"actor",
76    b"alias",
77    b"allocate",
78    b"allocation",
79    b"attribute",
80    b"bind",
81    b"calc",
82    b"case",
83    b"concern",
84    b"connection",
85    b"constraint",
86    b"dependency",
87    b"enum",
88    b"flow",
89    b"interface",
90    b"item",
91    b"metadata",
92    b"occurrence",
93    b"part",
94    b"perform",
95    b"port",
96    b"ref",
97    b"require",
98    b"requirement",
99    b"satisfy",
100    b"state",
101    b"use",
102    b"verification",
103    b"view",
104    b"viewpoint",
105];
106
107/// Take a short snippet from the input at the error position for "found" display.
108/// Uses first line or first FOUND_SNIPPET_MAX_LEN bytes, UTF-8 with replacement char.
109fn fragment_to_found_snippet(fragment: &[u8]) -> (String, usize) {
110    let take = fragment
111        .iter()
112        .position(|&b| b == b'\n' || b == b'\r')
113        .map(|p| p.min(FOUND_SNIPPET_MAX_LEN))
114        .unwrap_or_else(|| fragment.len().min(FOUND_SNIPPET_MAX_LEN));
115    let slice = fragment.get(..take).unwrap_or(fragment);
116    let s = String::from_utf8_lossy(slice)
117        .replace('\n', "\\n")
118        .replace('\r', "\\r");
119    let len = slice.len();
120    (s.trim_end().to_string(), len)
121}
122
123pub(crate) fn recovery_found_snippet(input: Input<'_>) -> Option<String> {
124    let frag = input.fragment();
125    let take = frag
126        .iter()
127        .position(|&b| b == b'\n' || b == b'\r')
128        .unwrap_or(frag.len())
129        .min(60);
130    let snippet = String::from_utf8_lossy(&frag[..take]).trim().to_string();
131    if snippet.is_empty() {
132        None
133    } else {
134        Some(snippet)
135    }
136}
137
138fn recovery_found_snippet_from_span(input: Input<'_>, recovery_end: Input<'_>) -> Option<String> {
139    let consumed_len = recovery_end
140        .location_offset()
141        .saturating_sub(input.location_offset())
142        .min(input.fragment().len());
143    if consumed_len == 0 {
144        return recovery_found_snippet(input);
145    }
146    let frag = &input.fragment()[..consumed_len];
147    let take = frag
148        .iter()
149        .position(|&b| b == b'\n' || b == b'\r')
150        .unwrap_or(frag.len())
151        .min(60);
152    let snippet = String::from_utf8_lossy(&frag[..take]).trim().to_string();
153    if snippet.is_empty() {
154        recovery_found_snippet(input)
155    } else {
156        Some(snippet)
157    }
158}
159
160/// Map nom error kind to a human-readable message for language server diagnostics.
161fn nom_error_kind_to_message(code: &nom::error::ErrorKind) -> &'static str {
162    use nom::error::ErrorKind;
163    match code {
164        ErrorKind::Tag => "expected keyword or token",
165        ErrorKind::Digit => "expected number",
166        ErrorKind::Alpha => "expected identifier",
167        ErrorKind::AlphaNumeric => "expected identifier",
168        ErrorKind::Space => "expected whitespace",
169        ErrorKind::MultiSpace => "expected whitespace",
170        ErrorKind::Eof => "unexpected end of input",
171        ErrorKind::TakeUntil => "expected terminator",
172        ErrorKind::TakeWhile1 => "expected token",
173        ErrorKind::Alt => {
174            "expected package, import, part, port, interface, alias, attribute, or action"
175        }
176        ErrorKind::Many0 | ErrorKind::Many1 => "expected list of elements",
177        _ => "parse error",
178    }
179}
180
181/// Map nom error kind to a specific code for LSP/quick fixes.
182fn nom_error_kind_to_code(code: &nom::error::ErrorKind) -> &'static str {
183    use nom::error::ErrorKind;
184    match code {
185        ErrorKind::Tag => "expected_keyword",
186        ErrorKind::Digit => "expected_number",
187        ErrorKind::Alpha | ErrorKind::AlphaNumeric => "expected_identifier",
188        ErrorKind::Space | ErrorKind::MultiSpace => "expected_whitespace",
189        ErrorKind::Eof => "unexpected_eof",
190        ErrorKind::TakeUntil => "expected_terminator",
191        ErrorKind::TakeWhile1 => "expected_token",
192        ErrorKind::Alt => "expected_alt",
193        ErrorKind::Many0 | ErrorKind::Many1 => "expected_list",
194        _ => "parse_error",
195    }
196}
197
198fn nom_err_to_parse_error(
199    e: &Error<Input<'_>>,
200    length_override: Option<usize>,
201    expected_context: Option<&'static str>,
202) -> ParseError {
203    let offset = e.input.location_offset();
204    let line = e.input.location_line();
205    let column = e.input.get_column();
206    let fragment = e.input.fragment();
207    let (found_snippet, found_len) = fragment_to_found_snippet(fragment);
208    let message = nom_error_kind_to_message(&e.code).to_string();
209    let span_len = length_override.unwrap_or(found_len).max(1);
210    if trim_ascii_start(fragment).starts_with(b"}") {
211        return unexpected_closing_brace_parse_error(e.input);
212    }
213    let mut pe = ParseError::new(message)
214        .with_location(offset, line, column)
215        .with_length(span_len)
216        .with_code(nom_error_kind_to_code(&e.code))
217        .with_severity(DiagnosticSeverity::Error)
218        .with_category(DiagnosticCategory::ParseError);
219    if !found_snippet.is_empty() {
220        pe = pe.with_found(found_snippet);
221    }
222    if let Some(ctx) = expected_context {
223        pe = pe.with_expected(ctx);
224    }
225    let at_root = expected_context.is_some_and(|ctx| {
226        ctx.contains("'package', 'namespace', or 'import'") || ctx.contains("top level")
227    });
228    if at_root && is_illegal_top_level_definition(fragment) {
229        pe.message = "illegal top-level definition".to_string();
230        pe.code = Some("illegal_top_level_definition".to_string());
231        pe.expected = Some("'package', 'namespace', or 'import'".to_string());
232        pe.suggestion = Some(
233            "Wrap this declaration in `package ... { ... }` or `namespace ... { ... }`."
234                .to_string(),
235        );
236    }
237    pe
238}
239
240fn is_illegal_top_level_definition(fragment: &[u8]) -> bool {
241    let trimmed = trim_ascii_start(fragment);
242    !trimmed.starts_with(b"}")
243        && !trimmed.starts_with(b"//")
244        && !trimmed.starts_with(b"/*")
245        && lex::starts_with_any_keyword(trimmed, ILLEGAL_TOP_LEVEL_STARTERS)
246}
247
248fn trim_ascii_start(mut fragment: &[u8]) -> &[u8] {
249    while let Some(first) = fragment.first() {
250        if first.is_ascii_whitespace() {
251            fragment = &fragment[1..];
252            continue;
253        }
254        break;
255    }
256    fragment
257}
258
259fn starts_with_missing_name_after_keyword(
260    fragment: &[u8],
261    keyword: &[u8],
262    trailing_keywords: &[&[u8]],
263) -> bool {
264    let mut fragment = trim_ascii_start(fragment);
265    if !lex::starts_with_keyword(fragment, keyword) {
266        return false;
267    }
268    fragment = &fragment[keyword.len()..];
269    while let Some(first) = fragment.first() {
270        if first.is_ascii_whitespace() {
271            fragment = &fragment[1..];
272            continue;
273        }
274        break;
275    }
276    for trailing in trailing_keywords {
277        if lex::starts_with_keyword(fragment, trailing) {
278            fragment = &fragment[trailing.len()..];
279            while let Some(first) = fragment.first() {
280                if first.is_ascii_whitespace() {
281                    fragment = &fragment[1..];
282                    continue;
283                }
284                break;
285            }
286        }
287    }
288    fragment.starts_with(b":")
289        && !lex::starts_with_keyword(fragment, b":>>")
290        && !lex::starts_with_keyword(fragment, b":>")
291        && !lex::starts_with_keyword(fragment, b"::")
292}
293
294fn starts_with_missing_type_after_keyword(
295    fragment: &[u8],
296    keyword: &[u8],
297    trailing_keywords: &[&[u8]],
298) -> bool {
299    let mut fragment = trim_ascii_start(fragment);
300    if !lex::starts_with_keyword(fragment, keyword) {
301        return false;
302    }
303    fragment = &fragment[keyword.len()..];
304    while let Some(first) = fragment.first() {
305        if first.is_ascii_whitespace() {
306            fragment = &fragment[1..];
307            continue;
308        }
309        break;
310    }
311    for trailing in trailing_keywords {
312        if lex::starts_with_keyword(fragment, trailing) {
313            fragment = &fragment[trailing.len()..];
314            while let Some(first) = fragment.first() {
315                if first.is_ascii_whitespace() {
316                    fragment = &fragment[1..];
317                    continue;
318                }
319                break;
320            }
321        }
322    }
323
324    let mut name_len = 0usize;
325    while name_len < fragment.len()
326        && (fragment[name_len].is_ascii_alphanumeric() || fragment[name_len] == b'_')
327    {
328        name_len += 1;
329    }
330    if name_len == 0 {
331        return false;
332    }
333    fragment = &fragment[name_len..];
334    while let Some(first) = fragment.first() {
335        if first.is_ascii_whitespace() {
336            fragment = &fragment[1..];
337            continue;
338        }
339        break;
340    }
341    if fragment.starts_with(b":") {
342        fragment = &fragment[1..];
343    } else if lex::starts_with_keyword(fragment, b"defined") {
344        fragment = &fragment[b"defined".len()..];
345        fragment = trim_ascii_start(fragment);
346        if !lex::starts_with_keyword(fragment, b"by") {
347            return false;
348        }
349        fragment = &fragment[b"by".len()..];
350    } else if lex::starts_with_keyword(fragment, b"typed") {
351        fragment = &fragment[b"typed".len()..];
352        fragment = trim_ascii_start(fragment);
353        if !lex::starts_with_keyword(fragment, b"by") {
354            return false;
355        }
356        fragment = &fragment[b"by".len()..];
357    } else {
358        return false;
359    }
360    while let Some(first) = fragment.first() {
361        if first.is_ascii_whitespace() {
362            fragment = &fragment[1..];
363            continue;
364        }
365        break;
366    }
367
368    fragment.is_empty()
369        || fragment.starts_with(b";")
370        || fragment.starts_with(b"{")
371        || fragment.starts_with(b"}")
372        || lex::starts_with_keyword(fragment, b"then")
373        || lex::starts_with_keyword(fragment, b"if")
374        || lex::starts_with_keyword(fragment, b"do")
375}
376
377fn missing_name_diagnostic(fragment: &[u8]) -> Option<(&'static str, String, String, String)> {
378    #[allow(clippy::type_complexity)]
379    let cases: &[(&[u8], &[&[u8]], &str, &str)] = &[
380        (
381            b"subject",
382            &[],
383            "subject name",
384            "Use `subject laptop: Laptop;`.",
385        ),
386        (b"actor", &[], "actor name", "Use `actor user: User;`."),
387        (b"state", &[], "state name", "Use `state ready: Mode;`."),
388        (b"part", &[], "part name", "Use `part wheel: Wheel;`."),
389        (b"ref", &[], "reference name", "Use `ref sensor: Sensor;`."),
390        (b"port", &[], "port name", "Use `port power: PowerPort;`."),
391        (
392            b"attribute",
393            &[],
394            "attribute name",
395            "Use `attribute mass: MassValue;`.",
396        ),
397        (b"in", &[], "input name", "Use `in speed: Real;`."),
398        (b"out", &[], "output name", "Use `out result: Real;`."),
399        (
400            b"perform",
401            &[b"action"],
402            "action name",
403            "Use `perform action run: Runner;`.",
404        ),
405        (b"return", &[], "return name", "Use `return result: Real;`."),
406    ];
407
408    for (keyword, trailing, missing_what, suggestion) in cases {
409        if starts_with_missing_name_after_keyword(fragment, keyword, trailing) {
410            return Some((
411                "missing_member_name",
412                format!("expected {missing_what} before ':'"),
413                format!("{missing_what} before ':'"),
414                suggestion.to_string(),
415            ));
416        }
417    }
418    None
419}
420
421fn missing_type_diagnostic(fragment: &[u8]) -> Option<(&'static str, String, String, String)> {
422    #[allow(clippy::type_complexity)]
423    let cases: &[(&[u8], &[&[u8]], &str)] = &[
424        (b"subject", &[], "subject type"),
425        (b"actor", &[], "actor type"),
426        (b"state", &[], "state type"),
427        (b"part", &[], "part type"),
428        (b"ref", &[], "reference type"),
429        (b"port", &[], "port type"),
430        (b"attribute", &[], "attribute type"),
431        (b"occurrence", &[], "occurrence type"),
432        (b"in", &[], "input type"),
433        (b"out", &[], "output type"),
434        (b"perform", &[b"action"], "action type"),
435        (b"return", &[], "return type"),
436    ];
437
438    for &(keyword, trailing, missing_what) in cases {
439        if starts_with_missing_type_after_keyword(fragment, keyword, trailing) {
440            let keyword_label = String::from_utf8_lossy(keyword);
441            let sample_name = if keyword == &b"subject"[..] {
442                "laptop"
443            } else if keyword == &b"actor"[..] {
444                "user"
445            } else if keyword == &b"state"[..] {
446                "ready"
447            } else if keyword == &b"part"[..] {
448                "wheel"
449            } else if keyword == &b"ref"[..] {
450                "sensor"
451            } else if keyword == &b"port"[..] {
452                "power"
453            } else if keyword == &b"attribute"[..] {
454                "mass"
455            } else if keyword == &b"occurrence"[..] {
456                "event"
457            } else if keyword == &b"in"[..] {
458                "speed"
459            } else if keyword == &b"out"[..] {
460                "result"
461            } else if keyword == &b"perform"[..] {
462                "run"
463            } else if keyword == &b"return"[..] {
464                "result"
465            } else {
466                "member"
467            };
468            let sample_type = if keyword == &b"subject"[..] {
469                "Laptop"
470            } else if keyword == &b"actor"[..] {
471                "User"
472            } else if keyword == &b"state"[..] {
473                "Mode"
474            } else if keyword == &b"part"[..] {
475                "Wheel"
476            } else if keyword == &b"ref"[..] {
477                "Sensor"
478            } else if keyword == &b"port"[..] {
479                "PowerPort"
480            } else if keyword == &b"attribute"[..] {
481                "MassValue"
482            } else if keyword == &b"occurrence"[..] {
483                "Event"
484            } else if keyword == &b"in"[..] || keyword == &b"out"[..] {
485                "Real"
486            } else if keyword == &b"perform"[..] {
487                "Runner"
488            } else if keyword == &b"return"[..] {
489                "Real"
490            } else {
491                "Type"
492            };
493            let suggestion = if keyword == &b"perform"[..] {
494                format!("Use `perform action {sample_name}: {sample_type};`.")
495            } else if keyword == &b"return"[..] {
496                format!("Use `return {sample_name}: {sample_type};`.")
497            } else {
498                format!("Use `{keyword_label} {sample_name}: {sample_type};`.")
499            };
500            return Some((
501                "missing_type_reference",
502                format!("expected {missing_what} after ':'"),
503                format!("{missing_what} after ':'"),
504                suggestion,
505            ));
506        }
507    }
508    None
509}
510
511fn invalid_expose_separator_diagnostic(
512    fragment: &[u8],
513) -> Option<(&'static str, String, String, String)> {
514    let mut fragment = trim_ascii_start(fragment);
515    if !lex::starts_with_keyword(fragment, b"expose") {
516        return None;
517    }
518    fragment = &fragment[b"expose".len()..];
519    while let Some(first) = fragment.first() {
520        if first.is_ascii_whitespace() {
521            fragment = &fragment[1..];
522            continue;
523        }
524        break;
525    }
526    if fragment.is_empty() {
527        return None;
528    }
529
530    let mut saw_dot = false;
531    let mut in_quoted_name = false;
532    for &b in fragment {
533        if b == b'\'' {
534            in_quoted_name = !in_quoted_name;
535            continue;
536        }
537        if in_quoted_name {
538            continue;
539        }
540        if matches!(b, b';' | b'[' | b'{' | b'}' | b'\n' | b'\r') {
541            break;
542        }
543        if b == b'.' {
544            saw_dot = true;
545            break;
546        }
547    }
548    if !saw_dot {
549        return None;
550    }
551
552    Some((
553        "invalid_qualified_name_separator",
554        "invalid qualified name in expose target: use '::' instead of '.'".to_string(),
555        "qualified name segments separated by '::'".to_string(),
556        "Replace '.' with '::' in the expose target (example: `expose A::B;`).".to_string(),
557    ))
558}
559
560fn missing_semicolon_or_body_diagnostic(
561    fragment: &[u8],
562) -> Option<(&'static str, String, String, String)> {
563    let fragment = trim_ascii_start(fragment);
564    let cases: &[(&[u8], &str, &str)] = &[
565        (
566            b"action def",
567            "action definition",
568            "Use `action def Run;` or `action def Run { ... }`.",
569        ),
570        (
571            b"part def",
572            "part definition",
573            "Use `part def Wheel;` or `part def Wheel { ... }`.",
574        ),
575        (
576            b"requirement def",
577            "requirement definition",
578            "Use `requirement def R;` or `requirement def R { ... }`.",
579        ),
580        (
581            b"state def",
582            "state definition",
583            "Use `state def Ready;` or `state def Ready { ... }`.",
584        ),
585        (
586            b"view",
587            "view declaration",
588            "Use `view structure: GeneralView;` or `view structure: GeneralView { ... }`.",
589        ),
590        (
591            b"rendering def",
592            "rendering definition",
593            "Use `rendering def Diagram;` or `rendering def Diagram { ... }`.",
594        ),
595    ];
596
597    for (prefix, label, suggestion) in cases {
598        if fragment.starts_with(prefix) {
599            return Some((
600                "missing_body_or_semicolon",
601                format!("expected ';' or '{{' after {label} header"),
602                "';' or '{' after declaration header".to_string(),
603                suggestion.to_string(),
604            ));
605        }
606    }
607    None
608}
609
610fn invalid_typing_operator_diagnostic(
611    fragment: &[u8],
612) -> Option<(&'static str, String, String, String)> {
613    let fragment = trim_ascii_start(fragment);
614    let cases: &[(&[u8], &str, &str)] = &[
615        (
616            b"part def",
617            "part definition specialization",
618            "Use `part def Vehicle :> BaseVehicle;` when specializing a definition.",
619        ),
620        (
621            b"port def",
622            "port definition specialization",
623            "Use `port def PowerPort :> BasePort;` when specializing a definition.",
624        ),
625    ];
626
627    for (prefix, label, suggestion) in cases {
628        if fragment.starts_with(prefix) && fragment.windows(3).any(|w| w == b": ") {
629            return Some((
630                "invalid_typing_operator",
631                format!("invalid typing operator in {label}: use ':>' instead of ':'"),
632                "':>' specialization operator".to_string(),
633                suggestion.to_string(),
634            ));
635        }
636    }
637
638    if fragment.starts_with(b"part def")
639        && fragment.contains(&b':')
640        && !fragment.windows(2).any(|w| w == b":>")
641    {
642        return Some((
643            "invalid_typing_operator",
644            "invalid typing operator in part definition: use ':>' instead of ':'".to_string(),
645            "':>' specialization operator".to_string(),
646            "Use `part def Vehicle :> BaseVehicle;` when specializing a definition.".to_string(),
647        ));
648    }
649
650    None
651}
652
653fn missing_expression_after_operator_diagnostic(
654    fragment: &[u8],
655) -> Option<(&'static str, String, String, String)> {
656    let fragment = trim_ascii_start(fragment);
657    let cases: &[(&[u8], &str, &str)] = &[
658        (
659            b"bind",
660            "binding expression after '='",
661            "Use `bind x = y;`.",
662        ),
663        (
664            b"assign",
665            "assignment expression after ':='",
666            "Use `assign x := y;`.",
667        ),
668        (
669            b"first",
670            "target after 'then'",
671            "Use `first start then finish;`.",
672        ),
673        (
674            b"flow",
675            "target after 'to'",
676            "Use `flow source to target;`.",
677        ),
678        (
679            b"satisfy",
680            "target after 'by'",
681            "Use `satisfy Req by implementation;`.",
682        ),
683    ];
684
685    for (keyword, expected, suggestion) in cases {
686        if !lex::starts_with_keyword(fragment, keyword) {
687            continue;
688        }
689        let text = String::from_utf8_lossy(fragment);
690        if text.contains("= ;") || text.trim_end().ends_with('=') {
691            return Some((
692                "missing_expression_after_operator",
693                "expected expression after '='".to_string(),
694                expected.to_string(),
695                suggestion.to_string(),
696            ));
697        }
698        if text.contains(":= ;") || text.trim_end().ends_with(":=") {
699            return Some((
700                "missing_expression_after_operator",
701                "expected expression after ':='".to_string(),
702                expected.to_string(),
703                suggestion.to_string(),
704            ));
705        }
706        if text.contains(" then ;") || text.trim_end().ends_with(" then") {
707            return Some((
708                "missing_expression_after_operator",
709                "expected target after 'then'".to_string(),
710                expected.to_string(),
711                suggestion.to_string(),
712            ));
713        }
714        if text.contains(" to ;") || text.trim_end().ends_with(" to") {
715            return Some((
716                "missing_expression_after_operator",
717                "expected target after 'to'".to_string(),
718                expected.to_string(),
719                suggestion.to_string(),
720            ));
721        }
722        if text.contains(" by ;") || text.trim_end().ends_with(" by") {
723            return Some((
724                "missing_expression_after_operator",
725                "expected target after 'by'".to_string(),
726                expected.to_string(),
727                suggestion.to_string(),
728            ));
729        }
730    }
731    None
732}
733
734fn invalid_unit_reference_diagnostic(
735    fragment: &[u8],
736) -> Option<(&'static str, String, String, String)> {
737    let fragment = trim_ascii_start(fragment);
738    let text = String::from_utf8_lossy(fragment);
739    if !(text.contains('[') && text.contains(']')) {
740        return None;
741    }
742
743    if text.contains("[]") || text.contains("[ ]") {
744        return Some((
745            "invalid_unit_reference",
746            "expected unit name inside '[ ]'".to_string(),
747            "unit name inside '[ ]'".to_string(),
748            "Use a concrete unit such as `1750 [kg]`.".to_string(),
749        ));
750    }
751
752    if text.contains("[;")
753        || text.contains("[ ;")
754        || text.contains("[)")
755        || text.contains("[ ]")
756        || text.contains("[,")
757    {
758        return Some((
759            "invalid_unit_reference",
760            "invalid unit expression inside '[ ]'".to_string(),
761            "unit name inside '[ ]'".to_string(),
762            "Use a unit symbol or qualified unit name (example: `[kg]` or `[SI::kg]`).".to_string(),
763        ));
764    }
765
766    None
767}
768
769fn unexpected_keyword_in_scope_diagnostic(
770    fragment: &[u8],
771    starters: &[&[u8]],
772    scope_label: &str,
773) -> Option<(&'static str, String, String, String)> {
774    let fragment = trim_ascii_start(fragment);
775    if fragment.is_empty() || fragment.starts_with(b"#") || fragment.starts_with(b"@") {
776        return None;
777    }
778    let keyword_end = fragment
779        .iter()
780        .position(|b| !b.is_ascii_alphanumeric() && *b != b'_')
781        .unwrap_or(fragment.len());
782    if keyword_end == 0 {
783        return None;
784    }
785    let keyword = &fragment[..keyword_end];
786    if lex::starts_with_any_keyword(keyword, starters) {
787        return None;
788    }
789    let keyword_text = String::from_utf8_lossy(keyword);
790    Some((
791        "unexpected_keyword_in_scope",
792        format!("unexpected keyword `{keyword_text}` in {scope_label}"),
793        format!("valid {scope_label} element"),
794        format!("Replace `{keyword_text}` with a valid {scope_label} member or remove it."),
795    ))
796}
797
798fn invalid_bare_identifier_in_body_diagnostic(
799    fragment: &[u8],
800    scope_label: &str,
801) -> Option<(&'static str, String, String, String)> {
802    let is_action = scope_label.contains("action body");
803    let is_state = scope_label.contains("state body");
804    if !is_action && !is_state {
805        return None;
806    }
807
808    let fragment = trim_ascii_start(fragment);
809    let ident_end = fragment
810        .iter()
811        .position(|b| !b.is_ascii_alphanumeric() && *b != b'_')
812        .unwrap_or(fragment.len());
813    if ident_end == 0 || !fragment[0].is_ascii_alphabetic() {
814        return None;
815    }
816
817    let ident = &fragment[..ident_end];
818    let rest = trim_ascii_start(&fragment[ident_end..]);
819    if !(rest.starts_with(b";")
820        || rest.starts_with(b"}")
821        || rest.starts_with(b"\n")
822        || rest.starts_with(b"\r"))
823    {
824        return None;
825    }
826
827    let ident_text = String::from_utf8_lossy(ident);
828    if is_action {
829        Some((
830            "invalid_bare_identifier_in_action_body",
831            format!("bare identifier `{ident_text}` is not a valid action body member"),
832            "action body member such as `perform`, `bind`, `in`, or `out`".to_string(),
833            format!(
834                "Use an explicit action-body form, for example `perform {ident_text};`, `bind ... = ...;`, or an `in`/`out` parameter declaration."
835            ),
836        ))
837    } else {
838        Some((
839            "invalid_bare_identifier_in_state_body",
840            format!("bare identifier `{ident_text}` is not a valid state body member"),
841            "state body member such as `entry`, `transition`, `then`, `state`, or `ref`"
842                .to_string(),
843            format!(
844                "Use an explicit state-body form, for example `then {ident_text};`, `transition ...;`, or a nested `state` member."
845            ),
846        ))
847    }
848}
849
850fn unexpected_closing_brace_parse_error(input: Input<'_>) -> ParseError {
851    ParseError::new("unexpected closing '}'")
852        .with_location(
853            input.location_offset(),
854            input.location_line(),
855            input.get_column(),
856        )
857        .with_length(1)
858        .with_code("unexpected_closing_brace")
859        .with_expected("valid declaration or end of current body")
860        .with_found("}")
861        .with_suggestion("Remove this '}' or add the missing opening '{' before it.")
862        .with_severity(DiagnosticSeverity::Error)
863        .with_category(DiagnosticCategory::ParseError)
864}
865
866fn missing_closing_brace_error(bytes: &[u8], input: Input<'_>) -> Option<ParseError> {
867    if !input.fragment().is_empty() {
868        return None;
869    }
870    let consumed = &bytes[..input.location_offset().min(bytes.len())];
871    let opens = consumed.iter().filter(|&&b| b == b'{').count();
872    let closes = consumed.iter().filter(|&&b| b == b'}').count();
873    if opens <= closes {
874        return None;
875    }
876    Some(missing_closing_brace_error_at_eof(consumed))
877}
878
879fn missing_closing_brace_error_at_eof(bytes: &[u8]) -> ParseError {
880    let (line, column) = eof_line_column(bytes);
881    ParseError::new("missing closing '}'")
882        .with_location(bytes.len(), line, column)
883        .with_length(1)
884        .with_code("missing_closing_brace")
885        .with_expected("'}'")
886        .with_suggestion("Add '}' to close the open body.")
887        .with_category(DiagnosticCategory::ParseError)
888}
889
890fn category_from_code(code: &str) -> DiagnosticCategory {
891    if code == "unsupported_annotation_syntax" {
892        DiagnosticCategory::UnsupportedGrammarForm
893    } else if code == "unresolved_symbol" {
894        DiagnosticCategory::UnresolvedSymbol
895    } else {
896        DiagnosticCategory::ParseError
897    }
898}
899
900fn has_unclosed_brace(bytes: &[u8]) -> bool {
901    let opens = bytes.iter().filter(|&&b| b == b'{').count();
902    let closes = bytes.iter().filter(|&&b| b == b'}').count();
903    opens > closes
904}
905
906fn eof_line_column(bytes: &[u8]) -> (u32, usize) {
907    let mut line = 1u32;
908    let mut column = 1usize;
909    for &b in bytes {
910        if b == b'\n' {
911            line += 1;
912            column = 1;
913        } else {
914            column += 1;
915        }
916    }
917    (line, column)
918}
919
920pub(crate) fn build_recovery_error_node(
921    input: Input<'_>,
922    starters: &[&[u8]],
923    scope_label: &str,
924    generic_code: &str,
925) -> ParseErrorNode {
926    build_recovery_error_node_from_span(input, input, starters, scope_label, generic_code)
927}
928
929enum RecoveryClassification {
930    MissingMemberName {
931        code: String,
932        message: String,
933        expected: String,
934        suggestion: String,
935    },
936    MissingTypeReference {
937        code: String,
938        message: String,
939        expected: String,
940        suggestion: String,
941    },
942    InvalidQualifiedNameSeparator {
943        code: String,
944        message: String,
945        expected: String,
946        suggestion: String,
947    },
948    MissingBodyOrSemicolon {
949        code: String,
950        message: String,
951        expected: String,
952        suggestion: String,
953    },
954    MissingExpressionAfterOperator {
955        code: String,
956        message: String,
957        expected: String,
958        suggestion: String,
959    },
960    InvalidUnitReference {
961        code: String,
962        message: String,
963        expected: String,
964        suggestion: String,
965    },
966    InvalidTypingOperator {
967        code: String,
968        message: String,
969        expected: String,
970        suggestion: String,
971    },
972    InvalidBareIdentifierInBody {
973        code: String,
974        message: String,
975        expected: String,
976        suggestion: String,
977    },
978    UnexpectedKeywordInScope {
979        code: String,
980        message: String,
981        expected: String,
982        suggestion: String,
983    },
984    MissingSemicolon,
985    UnsupportedAnnotation,
986    Unexpected,
987}
988
989fn trim_ascii_end(mut fragment: &[u8]) -> &[u8] {
990    while let Some(last) = fragment.last() {
991        if last.is_ascii_whitespace() {
992            fragment = &fragment[..fragment.len() - 1];
993        } else {
994            break;
995        }
996    }
997    fragment
998}
999
1000fn classify_recovery(
1001    input: Input<'_>,
1002    recovery_end: Input<'_>,
1003    starters: &[&[u8]],
1004    scope_label: &str,
1005) -> RecoveryClassification {
1006    let trimmed = trim_ascii_start(input.fragment());
1007
1008    if let Some((code, message, expected, suggestion)) = missing_name_diagnostic(trimmed) {
1009        return RecoveryClassification::MissingMemberName {
1010            code: code.to_string(),
1011            message,
1012            expected,
1013            suggestion,
1014        };
1015    }
1016
1017    if let Some((code, message, expected, suggestion)) = missing_type_diagnostic(trimmed) {
1018        return RecoveryClassification::MissingTypeReference {
1019            code: code.to_string(),
1020            message,
1021            expected,
1022            suggestion,
1023        };
1024    }
1025
1026    if let Some((code, message, expected, suggestion)) =
1027        invalid_expose_separator_diagnostic(trimmed)
1028    {
1029        return RecoveryClassification::InvalidQualifiedNameSeparator {
1030            code: code.to_string(),
1031            message,
1032            expected,
1033            suggestion,
1034        };
1035    }
1036
1037    if let Some((code, message, expected, suggestion)) = invalid_typing_operator_diagnostic(trimmed)
1038    {
1039        return RecoveryClassification::InvalidTypingOperator {
1040            code: code.to_string(),
1041            message,
1042            expected,
1043            suggestion,
1044        };
1045    }
1046
1047    if let Some((code, message, expected, suggestion)) =
1048        missing_expression_after_operator_diagnostic(trimmed)
1049    {
1050        return RecoveryClassification::MissingExpressionAfterOperator {
1051            code: code.to_string(),
1052            message,
1053            expected,
1054            suggestion,
1055        };
1056    }
1057
1058    if let Some((code, message, expected, suggestion)) = invalid_unit_reference_diagnostic(trimmed)
1059    {
1060        return RecoveryClassification::InvalidUnitReference {
1061            code: code.to_string(),
1062            message,
1063            expected,
1064            suggestion,
1065        };
1066    }
1067
1068    if let Some((code, message, expected, suggestion)) =
1069        missing_semicolon_or_body_diagnostic(trimmed)
1070    {
1071        return RecoveryClassification::MissingBodyOrSemicolon {
1072            code: code.to_string(),
1073            message,
1074            expected,
1075            suggestion,
1076        };
1077    }
1078
1079    let consumed_len = recovery_end
1080        .location_offset()
1081        .saturating_sub(input.location_offset())
1082        .min(input.fragment().len());
1083    let raw_consumed = &input.fragment()[..consumed_len];
1084    let consumed = trim_ascii_end(raw_consumed);
1085    let recovered_to_boundary = recovery_end.location_offset() > input.location_offset() && {
1086        let (next, _) = lex::ws_and_comments(recovery_end).unwrap_or((recovery_end, ()));
1087        next.fragment().is_empty()
1088            || next.fragment().starts_with(b"}")
1089            || lex::starts_with_any_keyword(next.fragment(), starters)
1090    };
1091
1092    let consumed_has_newline = raw_consumed.contains(&b'\n') || raw_consumed.contains(&b'\r');
1093    let first_line_end = consumed
1094        .iter()
1095        .position(|b| matches!(*b, b'\n' | b'\r'))
1096        .unwrap_or(consumed.len());
1097    let first_line = trim_ascii_end(&consumed[..first_line_end]);
1098    let consumed_has_delimiters = consumed
1099        .iter()
1100        .any(|b| matches!(*b, b'{' | b'}' | b'(' | b')' | b'[' | b']'));
1101    let consumed_ends_incomplete = first_line.last().is_some_and(|b| {
1102        matches!(
1103            *b,
1104            b':' | b'=' | b',' | b'.' | b'+' | b'-' | b'*' | b'/' | b'>' | b'<' | b'|'
1105        )
1106    });
1107    let first_line_has_semicolon = first_line.contains(&b';');
1108    if recovered_to_boundary
1109        && lex::starts_with_any_keyword(trimmed, starters)
1110        && (consumed_has_newline || recovery_end.fragment().starts_with(b"}"))
1111        && !consumed.is_empty()
1112        && !consumed_has_delimiters
1113        && !consumed_ends_incomplete
1114        && !first_line_has_semicolon
1115    {
1116        return RecoveryClassification::MissingSemicolon;
1117    }
1118
1119    if lex::starts_with_keyword(trimmed, b"#") || lex::starts_with_keyword(trimmed, b"@") {
1120        return RecoveryClassification::UnsupportedAnnotation;
1121    }
1122
1123    if let Some((code, message, expected, suggestion)) =
1124        invalid_bare_identifier_in_body_diagnostic(trimmed, scope_label)
1125    {
1126        return RecoveryClassification::InvalidBareIdentifierInBody {
1127            code: code.to_string(),
1128            message,
1129            expected,
1130            suggestion,
1131        };
1132    }
1133
1134    if let Some((code, message, expected, suggestion)) =
1135        unexpected_keyword_in_scope_diagnostic(trimmed, starters, scope_label)
1136    {
1137        return RecoveryClassification::UnexpectedKeywordInScope {
1138            code: code.to_string(),
1139            message,
1140            expected,
1141            suggestion,
1142        };
1143    }
1144
1145    RecoveryClassification::Unexpected
1146}
1147
1148pub(crate) fn build_recovery_error_node_from_span(
1149    input: Input<'_>,
1150    recovery_end: Input<'_>,
1151    starters: &[&[u8]],
1152    scope_label: &str,
1153    generic_code: &str,
1154) -> ParseErrorNode {
1155    match classify_recovery(input, recovery_end, starters, scope_label) {
1156        RecoveryClassification::MissingMemberName {
1157            code,
1158            message,
1159            expected,
1160            suggestion,
1161        }
1162        | RecoveryClassification::MissingTypeReference {
1163            code,
1164            message,
1165            expected,
1166            suggestion,
1167        }
1168        | RecoveryClassification::InvalidQualifiedNameSeparator {
1169            code,
1170            message,
1171            expected,
1172            suggestion,
1173        }
1174        | RecoveryClassification::MissingBodyOrSemicolon {
1175            code,
1176            message,
1177            expected,
1178            suggestion,
1179        }
1180        | RecoveryClassification::MissingExpressionAfterOperator {
1181            code,
1182            message,
1183            expected,
1184            suggestion,
1185        }
1186        | RecoveryClassification::InvalidUnitReference {
1187            code,
1188            message,
1189            expected,
1190            suggestion,
1191        }
1192        | RecoveryClassification::InvalidTypingOperator {
1193            code,
1194            message,
1195            expected,
1196            suggestion,
1197        }
1198        | RecoveryClassification::InvalidBareIdentifierInBody {
1199            code,
1200            message,
1201            expected,
1202            suggestion,
1203        }
1204        | RecoveryClassification::UnexpectedKeywordInScope {
1205            code,
1206            message,
1207            expected,
1208            suggestion,
1209        } => ParseErrorNode {
1210            message,
1211            code,
1212            expected: Some(expected),
1213            found: recovery_found_snippet_from_span(input, recovery_end),
1214            suggestion: Some(suggestion),
1215            category: Some(DiagnosticCategory::ParseError),
1216        },
1217        RecoveryClassification::MissingSemicolon => ParseErrorNode {
1218            message: "missing semicolon before next declaration".to_string(),
1219            code: "missing_semicolon".to_string(),
1220            expected: Some("';'".to_string()),
1221            found: recovery_found_snippet_from_span(input, recovery_end),
1222            suggestion: Some("Insert ';' before this declaration.".to_string()),
1223            category: Some(DiagnosticCategory::ParseError),
1224        },
1225        RecoveryClassification::UnsupportedAnnotation => ParseErrorNode {
1226            message: format!("unsupported annotation syntax in {scope_label}"),
1227            code: "unsupported_annotation_syntax".to_string(),
1228            expected: Some(format!("valid {scope_label} element")),
1229            found: recovery_found_snippet_from_span(input, recovery_end),
1230            suggestion: Some(
1231                "Remove this annotation or extend the parser to support annotated declarations."
1232                    .to_string(),
1233            ),
1234            category: Some(DiagnosticCategory::UnsupportedGrammarForm),
1235        },
1236        RecoveryClassification::Unexpected => ParseErrorNode {
1237            message: format!("unexpected token in {scope_label}"),
1238            code: generic_code.to_string(),
1239            expected: Some(format!("valid {scope_label} element")),
1240            found: recovery_found_snippet_from_span(input, recovery_end),
1241            suggestion: Some(format!("Fix this {scope_label} member and re-run parsing.")),
1242            category: Some(DiagnosticCategory::ParseError),
1243        },
1244    }
1245}
1246
1247fn parse_error_from_recovery_node(span: &crate::ast::Span, node: &ParseErrorNode) -> ParseError {
1248    let mut err = ParseError::new(node.message.clone())
1249        .with_location(span.offset, span.line, span.column)
1250        .with_length(span.len.max(1))
1251        .with_code(node.code.clone())
1252        .with_category(
1253            node.category
1254                .unwrap_or_else(|| category_from_code(node.code.as_str())),
1255        );
1256    let severity = if node.code == "unsupported_annotation_syntax" {
1257        DiagnosticSeverity::Warning
1258    } else {
1259        DiagnosticSeverity::Error
1260    };
1261    err = err.with_severity(severity);
1262    if let Some(expected) = &node.expected {
1263        err = err.with_expected(expected.clone());
1264    }
1265    if let Some(found) = &node.found {
1266        err = err.with_found(found.clone());
1267    }
1268    if let Some(suggestion) = &node.suggestion {
1269        err = err.with_suggestion(suggestion.clone());
1270    }
1271    err
1272}
1273
1274fn diagnostic_specificity(err: &ParseError) -> u8 {
1275    match err.code.as_deref() {
1276        Some("missing_member_name")
1277        | Some("missing_type_reference")
1278        | Some("invalid_qualified_name_separator")
1279        | Some("invalid_typing_operator")
1280        | Some("missing_expression_after_operator")
1281        | Some("invalid_unit_reference")
1282        | Some("missing_body_or_semicolon")
1283        | Some("missing_semicolon")
1284        | Some("unexpected_closing_brace")
1285        | Some("missing_closing_brace")
1286        | Some("unsupported_annotation_syntax")
1287        | Some("invalid_bare_identifier_in_action_body")
1288        | Some("invalid_bare_identifier_in_state_body")
1289        | Some("recovery_cascade_suppressed")
1290        | Some("unexpected_keyword_in_scope") => 5,
1291        Some("illegal_top_level_definition") => 4,
1292        Some(code) if code.starts_with("recovered_") => 2,
1293        Some("expected_end_of_input") | Some("expected_keyword") => 1,
1294        _ => 3,
1295    }
1296}
1297
1298fn dedup_errors(mut errors: Vec<ParseError>) -> Vec<ParseError> {
1299    errors.sort_by_key(|e| {
1300        (
1301            e.offset.unwrap_or(usize::MAX),
1302            e.line.unwrap_or(u32::MAX),
1303            e.column.unwrap_or(usize::MAX),
1304            std::cmp::Reverse(diagnostic_specificity(e)),
1305        )
1306    });
1307
1308    let mut deduped = Vec::new();
1309    for err in errors {
1310        let duplicate = deduped.iter().any(|existing: &ParseError| {
1311            let same_start = existing.offset == err.offset
1312                && existing.line == err.line
1313                && existing.column == err.column;
1314            let same_found = existing.found == err.found;
1315            let existing_specificity = diagnostic_specificity(existing);
1316            let err_specificity = diagnostic_specificity(&err);
1317            same_start
1318                && (same_found || existing.code == err.code)
1319                && existing_specificity >= err_specificity
1320        });
1321        if !duplicate {
1322            deduped.push(err);
1323        }
1324    }
1325
1326    deduped.sort_by_key(|e| (e.offset.unwrap_or(usize::MAX), e.line.unwrap_or(u32::MAX)));
1327    deduped
1328}
1329
1330fn is_cascade_candidate(err: &ParseError) -> bool {
1331    matches!(err.code.as_deref(), Some("missing_semicolon"))
1332        || err
1333            .code
1334            .as_deref()
1335            .is_some_and(|code| code.starts_with("recovered_"))
1336}
1337
1338fn cascade_family(err: &ParseError) -> Option<&str> {
1339    if matches!(err.code.as_deref(), Some("missing_semicolon")) {
1340        Some("missing_semicolon")
1341    } else if err
1342        .code
1343        .as_deref()
1344        .is_some_and(|code| code.starts_with("recovered_"))
1345    {
1346        Some("recovered")
1347    } else {
1348        None
1349    }
1350}
1351
1352fn make_cascade_summary(run: &[ParseError]) -> Option<ParseError> {
1353    let summary_anchor = run.first()?;
1354    let suppressed = run.len().saturating_sub(3);
1355    let family = cascade_family(summary_anchor).unwrap_or("recovery");
1356    let mut err = ParseError::new(format!(
1357        "suppressed {suppressed} cascading {family} diagnostic{} after earlier recovery errors",
1358        if suppressed == 1 { "" } else { "s" }
1359    ))
1360    .with_location(
1361        summary_anchor.offset?,
1362        summary_anchor.line?,
1363        summary_anchor.column?,
1364    )
1365    .with_length(summary_anchor.length.unwrap_or(1).max(1))
1366    .with_code("recovery_cascade_suppressed")
1367    .with_expected("fix the first syntax error in this body")
1368    .with_suggestion(
1369        "Fix the earliest diagnostic in this body first; later syntax errors may be cascades.",
1370    )
1371    .with_severity(DiagnosticSeverity::Warning)
1372    .with_category(DiagnosticCategory::ParseError);
1373    if let Some(found) = &summary_anchor.found {
1374        err = err.with_found(found.clone());
1375    }
1376    Some(err)
1377}
1378
1379fn suppress_diagnostic_cascades(errors: Vec<ParseError>) -> Vec<ParseError> {
1380    const MAX_UNSUMMARIZED_CASCADE: usize = 3;
1381
1382    let mut output = Vec::new();
1383    let mut run: Vec<ParseError> = Vec::new();
1384
1385    let flush_run = |run: &mut Vec<ParseError>, output: &mut Vec<ParseError>| {
1386        if run.len() <= MAX_UNSUMMARIZED_CASCADE {
1387            output.append(run);
1388        } else {
1389            output.extend(run.drain(..MAX_UNSUMMARIZED_CASCADE));
1390            if let Some(summary) = make_cascade_summary(run) {
1391                output.push(summary);
1392            }
1393            run.clear();
1394        }
1395    };
1396
1397    for err in errors {
1398        let continues_run = run.last().is_some_and(|previous| {
1399            is_cascade_candidate(&err)
1400                && cascade_family(previous) == cascade_family(&err)
1401                && previous.line.zip(err.line).is_some_and(|(a, b)| b <= a + 1)
1402        });
1403
1404        if is_cascade_candidate(&err) && (run.is_empty() || continues_run) {
1405            run.push(err);
1406        } else {
1407            flush_run(&mut run, &mut output);
1408            if is_cascade_candidate(&err) {
1409                run.push(err);
1410            } else {
1411                output.push(err);
1412            }
1413        }
1414    }
1415    flush_run(&mut run, &mut output);
1416    output.sort_by_key(|e| (e.offset.unwrap_or(usize::MAX), e.line.unwrap_or(u32::MAX)));
1417    output
1418}
1419
1420fn root_body_recovery_error(input: Input<'_>, scope: &str) -> ParseError {
1421    let (found, len) = fragment_to_found_snippet(input.fragment());
1422    let mut err = ParseError::new(format!(
1423        "could not parse {scope} body; skipped to next root element"
1424    ))
1425    .with_location(
1426        input.location_offset(),
1427        input.location_line(),
1428        input.get_column(),
1429    )
1430    .with_length(len.max(1))
1431    .with_code("recovered_root_body")
1432    .with_expected(format!("valid {scope} body"))
1433    .with_suggestion(
1434        "Fix the first syntax error in this body; later root-level diagnostics may be cascades.",
1435    )
1436    .with_severity(DiagnosticSeverity::Error)
1437    .with_category(DiagnosticCategory::ParseError);
1438    if !found.is_empty() {
1439        err = err.with_found(found);
1440    }
1441    err
1442}
1443
1444fn root_body_scope(fragment: &[u8]) -> Option<&'static str> {
1445    let fragment = trim_ascii_start(fragment);
1446    if lex::starts_with_keyword(fragment, b"package")
1447        || lex::starts_with_keyword(fragment, b"library")
1448        || lex::starts_with_keyword(fragment, b"standard")
1449    {
1450        Some("package")
1451    } else if lex::starts_with_keyword(fragment, b"namespace") {
1452        Some("namespace")
1453    } else {
1454        None
1455    }
1456}
1457
1458fn collect_requirement_body_errors(body: &RequirementDefBody, errors: &mut Vec<ParseError>) {
1459    if let RequirementDefBody::Brace { elements } = body {
1460        for element in elements {
1461            match &element.value {
1462                RequirementDefBodyElement::Error(n) => {
1463                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1464                }
1465                RequirementDefBodyElement::Frame(n) => {
1466                    collect_requirement_body_errors(&n.value.body, errors)
1467                }
1468                _ => {}
1469            }
1470        }
1471    }
1472}
1473
1474fn collect_action_def_body_errors(body: &ActionDefBody, errors: &mut Vec<ParseError>) {
1475    if let ActionDefBody::Brace { elements } = body {
1476        for element in elements {
1477            if let ActionDefBodyElement::Error(n) = &element.value {
1478                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1479            }
1480        }
1481    }
1482}
1483
1484fn collect_action_usage_body_errors(body: &ActionUsageBody, errors: &mut Vec<ParseError>) {
1485    if let ActionUsageBody::Brace { elements } = body {
1486        for element in elements {
1487            match &element.value {
1488                ActionUsageBodyElement::Error(n) => {
1489                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1490                }
1491                ActionUsageBodyElement::ActionUsage(n) => {
1492                    collect_action_usage_body_errors(&n.value.body, errors)
1493                }
1494                _ => {}
1495            }
1496        }
1497    }
1498}
1499
1500fn collect_state_body_errors(body: &StateDefBody, errors: &mut Vec<ParseError>) {
1501    if let StateDefBody::Brace { elements } = body {
1502        for element in elements {
1503            match &element.value {
1504                StateDefBodyElement::Error(n) => {
1505                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1506                }
1507                StateDefBodyElement::Entry(n) => collect_state_body_errors(&n.value.body, errors),
1508                StateDefBodyElement::RequirementUsage(n) => {
1509                    collect_requirement_body_errors(&n.value.body, errors)
1510                }
1511                StateDefBodyElement::StateUsage(n) => {
1512                    collect_state_body_errors(&n.value.body, errors)
1513                }
1514                _ => {}
1515            }
1516        }
1517    }
1518}
1519
1520fn collect_use_case_body_errors(body: &UseCaseDefBody, errors: &mut Vec<ParseError>) {
1521    if let UseCaseDefBody::Brace { elements } = body {
1522        for element in elements {
1523            if let UseCaseDefBodyElement::Error(n) = &element.value {
1524                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1525            }
1526        }
1527    }
1528}
1529
1530fn collect_constraint_body_errors(body: &ConstraintDefBody, errors: &mut Vec<ParseError>) {
1531    if let ConstraintDefBody::Brace { elements } = body {
1532        for element in elements {
1533            if let ConstraintDefBodyElement::Error(n) = &element.value {
1534                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1535            }
1536        }
1537    }
1538}
1539
1540fn collect_calc_body_errors(body: &CalcDefBody, errors: &mut Vec<ParseError>) {
1541    if let CalcDefBody::Brace { elements } = body {
1542        for element in elements {
1543            if let CalcDefBodyElement::Error(n) = &element.value {
1544                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1545            }
1546        }
1547    }
1548}
1549
1550fn collect_view_def_body_errors(body: &ViewDefBody, errors: &mut Vec<ParseError>) {
1551    if let ViewDefBody::Brace { elements } = body {
1552        for element in elements {
1553            if let ViewDefBodyElement::Error(n) = &element.value {
1554                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1555            }
1556        }
1557    }
1558}
1559
1560fn collect_view_body_errors(body: &ViewBody, errors: &mut Vec<ParseError>) {
1561    if let ViewBody::Brace { elements } = body {
1562        for element in elements {
1563            if let ViewBodyElement::Error(n) = &element.value {
1564                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1565            }
1566        }
1567    }
1568}
1569
1570fn collect_part_def_body_errors(body: &PartDefBody, errors: &mut Vec<ParseError>) {
1571    if let PartDefBody::Brace { elements } = body {
1572        for element in elements {
1573            match &element.value {
1574                PartDefBodyElement::Error(n) => {
1575                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1576                }
1577                PartDefBodyElement::PartUsage(n) => {
1578                    collect_part_usage_body_errors(&n.value.body, errors)
1579                }
1580                PartDefBodyElement::Perform(n) => {
1581                    collect_perform_body_errors(&n.value.body, errors)
1582                }
1583                _ => {}
1584            }
1585        }
1586    }
1587}
1588
1589fn collect_perform_body_errors(body: &crate::ast::PerformBody, _errors: &mut Vec<ParseError>) {
1590    match body {
1591        crate::ast::PerformBody::Semicolon => {}
1592        crate::ast::PerformBody::Brace { .. } => {}
1593    }
1594}
1595
1596fn collect_part_usage_body_errors(body: &PartUsageBody, errors: &mut Vec<ParseError>) {
1597    if let PartUsageBody::Brace { elements } = body {
1598        for element in elements {
1599            match &element.value {
1600                PartUsageBodyElement::Error(n) => {
1601                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1602                }
1603                PartUsageBodyElement::PartUsage(n) => {
1604                    collect_part_usage_body_errors(&n.value.body, errors)
1605                }
1606                PartUsageBodyElement::Perform(n) => {
1607                    collect_perform_body_errors(&n.value.body, errors)
1608                }
1609                PartUsageBodyElement::StateUsage(n) => {
1610                    collect_state_body_errors(&n.value.body, errors)
1611                }
1612                _ => {}
1613            }
1614        }
1615    }
1616}
1617
1618fn collect_package_body_errors(body: &PackageBody, errors: &mut Vec<ParseError>) {
1619    if let PackageBody::Brace { elements } = body {
1620        for element in elements {
1621            match &element.value {
1622                PackageBodyElement::Error(n) => {
1623                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1624                }
1625                PackageBodyElement::Package(n) => {
1626                    collect_package_body_errors(&n.value.body, errors)
1627                }
1628                PackageBodyElement::LibraryPackage(n) => {
1629                    collect_package_body_errors(&n.value.body, errors)
1630                }
1631                PackageBodyElement::PartDef(n) => {
1632                    collect_part_def_body_errors(&n.value.body, errors)
1633                }
1634                PackageBodyElement::PartUsage(n) => {
1635                    collect_part_usage_body_errors(&n.value.body, errors)
1636                }
1637                PackageBodyElement::ActionDef(n) => {
1638                    collect_action_def_body_errors(&n.value.body, errors)
1639                }
1640                PackageBodyElement::ActionUsage(n) => {
1641                    collect_action_usage_body_errors(&n.value.body, errors)
1642                }
1643                PackageBodyElement::RequirementDef(n) => {
1644                    collect_requirement_body_errors(&n.value.body, errors)
1645                }
1646                PackageBodyElement::RequirementUsage(n) => {
1647                    collect_requirement_body_errors(&n.value.body, errors)
1648                }
1649                PackageBodyElement::UseCaseDef(n) => {
1650                    collect_use_case_body_errors(&n.value.body, errors)
1651                }
1652                PackageBodyElement::UseCaseUsage(n) => {
1653                    collect_use_case_body_errors(&n.value.body, errors)
1654                }
1655                PackageBodyElement::ConcernUsage(n) => {
1656                    collect_requirement_body_errors(&n.value.body, errors)
1657                }
1658                PackageBodyElement::StateDef(n) => collect_state_body_errors(&n.value.body, errors),
1659                PackageBodyElement::StateUsage(n) => {
1660                    collect_state_body_errors(&n.value.body, errors)
1661                }
1662                PackageBodyElement::ConstraintDef(n) => {
1663                    collect_constraint_body_errors(&n.value.body, errors)
1664                }
1665                PackageBodyElement::CalcDef(n) => collect_calc_body_errors(&n.value.body, errors),
1666                PackageBodyElement::ViewDef(n) => {
1667                    collect_view_def_body_errors(&n.value.body, errors)
1668                }
1669                PackageBodyElement::ViewUsage(n) => collect_view_body_errors(&n.value.body, errors),
1670                _ => {}
1671            }
1672        }
1673    }
1674}
1675
1676fn collect_recovery_errors(root: &RootNamespace) -> Vec<ParseError> {
1677    let mut errors = Vec::new();
1678    for element in &root.elements {
1679        match &element.value {
1680            crate::ast::RootElement::Package(n) => {
1681                collect_package_body_errors(&n.value.body, &mut errors)
1682            }
1683            crate::ast::RootElement::LibraryPackage(n) => {
1684                collect_package_body_errors(&n.value.body, &mut errors)
1685            }
1686            crate::ast::RootElement::Namespace(n) => {
1687                collect_package_body_errors(&n.value.body, &mut errors)
1688            }
1689            crate::ast::RootElement::Import(_) => {}
1690        }
1691    }
1692    errors
1693}
1694
1695/// Parse full input; must consume entire input. Strips UTF-8 BOM if present.
1696#[allow(clippy::result_large_err)]
1697pub fn parse_root(input: &str) -> Result<RootNamespace, ParseError> {
1698    let bytes = input
1699        .strip_prefix('\u{FEFF}')
1700        .map(str::as_bytes)
1701        .unwrap_or_else(|| input.as_bytes());
1702    let located = LocatedSpan::new(bytes);
1703    match package::root_namespace(located) {
1704        Ok((rest, root)) => {
1705            if !rest.fragment().is_empty() && has_unclosed_brace(bytes) {
1706                return Err(missing_closing_brace_error_at_eof(bytes));
1707            }
1708            if rest.fragment().is_empty() {
1709                log::debug!("parse_root: success, {} top-level elements", root.elements.len());
1710                Ok(root)
1711            } else if trim_ascii_start(rest.fragment()).starts_with(b"}") {
1712                Err(unexpected_closing_brace_parse_error(rest))
1713            } else {
1714                let offset = located.location_offset() + located.fragment().len() - rest.fragment().len();
1715                let unconsumed = rest.fragment();
1716                let first_80 = unconsumed.get(..80.min(unconsumed.len())).unwrap_or(unconsumed);
1717                log::debug!(
1718                    "parse_root: expected end of input; parsed {} elements; unconsumed len={}, offset={}, first 80 bytes: {:?}",
1719                    root.elements.len(),
1720                    unconsumed.len(),
1721                    offset,
1722                    first_80,
1723                );
1724                log::debug!(
1725                    "parse_root: unconsumed as str: {:?}",
1726                    String::from_utf8_lossy(first_80),
1727                );
1728                let (found_snippet, found_len) = fragment_to_found_snippet(rest.fragment());
1729                let mut pe = ParseError::new("expected end of input")
1730                    .with_location(offset, rest.location_line(), rest.get_column())
1731                    .with_length(found_len.max(1))
1732                    .with_code("expected_end_of_input")
1733                    .with_category(DiagnosticCategory::ParseError);
1734                if !found_snippet.is_empty() {
1735                    pe = pe.with_found(found_snippet);
1736                }
1737                if root.elements.is_empty() && is_illegal_top_level_definition(rest.fragment()) {
1738                    pe = pe
1739                        .with_code("illegal_top_level_definition")
1740                        .with_expected("'package', 'namespace', or 'import'")
1741                        .with_suggestion(
1742                            "Wrap this declaration in `package ... { ... }` or `namespace ... { ... }`.",
1743                        );
1744                    pe.message = "illegal top-level definition".to_string();
1745                }
1746                Err(pe)
1747            }
1748        }
1749        Err(nom::Err::Error(e)) => Err(missing_closing_brace_error(bytes, e.input).unwrap_or_else(|| {
1750            nom_err_to_parse_error(
1751                &e,
1752                None,
1753                Some("'package', 'namespace', or 'import' at top level; or valid element in package body"),
1754            )
1755        })),
1756        Err(nom::Err::Failure(e)) => Err(missing_closing_brace_error(bytes, e.input).unwrap_or_else(|| {
1757            nom_err_to_parse_error(
1758                &e,
1759                None,
1760                Some("'package', 'namespace', or 'import' at top level; or valid element in package body"),
1761            )
1762        })),
1763        Err(nom::Err::Incomplete(_)) => Err(
1764            ParseError::new("unexpected end of input")
1765                .with_code("unexpected_eof")
1766                .with_category(DiagnosticCategory::ParseError),
1767        ),
1768    }
1769}
1770
1771const MAX_RECOVERY_ERRORS: usize = 100;
1772
1773/// Parse input with error recovery: collects multiple diagnostics and returns a partial AST when errors occur.
1774/// Use this for language servers so the user sees all parse errors and features (e.g. hover) can use the partial AST.
1775pub fn parse_with_diagnostics(input: &str) -> ParseResult {
1776    let bytes = input
1777        .strip_prefix('\u{FEFF}')
1778        .map(str::as_bytes)
1779        .unwrap_or_else(|| input.as_bytes());
1780    let located = LocatedSpan::new(bytes);
1781
1782    let mut elements = Vec::new();
1783    let mut errors = Vec::new();
1784
1785    let (mut input, _) = match lex::ws_and_comments(located) {
1786        Ok(x) => x,
1787        Err(_) => {
1788            return ParseResult {
1789                root: RootNamespace { elements: vec![] },
1790                errors: vec![ParseError::new("invalid input")
1791                    .with_code("invalid_input")
1792                    .with_category(DiagnosticCategory::ParseError)],
1793            };
1794        }
1795    };
1796
1797    while errors.len() < MAX_RECOVERY_ERRORS {
1798        // Skip leading ws/comments; if nothing left, we're done (avoids parsing "" as root_element).
1799        let (rest, _) = lex::ws_and_comments(input).unwrap_or((input, ()));
1800        input = rest;
1801        if input.fragment().is_empty() {
1802            break;
1803        }
1804        match package::root_element(input) {
1805            Ok((rest, elem)) => {
1806                elements.push(elem);
1807                input = rest;
1808            }
1809            Err(nom::Err::Error(e)) | Err(nom::Err::Failure(e)) => {
1810                let (trimmed, _) = lex::ws_and_comments(input).unwrap_or((input, ()));
1811                if trim_ascii_start(trimmed.fragment()).starts_with(b"}") {
1812                    errors.push(unexpected_closing_brace_parse_error(trimmed));
1813                    let skip_result = lex::skip_to_next_sync_point(trimmed);
1814                    match skip_result {
1815                        Ok((rest, _)) => input = rest,
1816                        Err(_) => break,
1817                    }
1818                    continue;
1819                }
1820                if errors.is_empty()
1821                    && has_unclosed_brace(bytes)
1822                    && (lex::starts_with_keyword(trimmed.fragment(), b"package")
1823                        || lex::starts_with_keyword(trimmed.fragment(), b"namespace")
1824                        || lex::starts_with_keyword(trimmed.fragment(), b"library")
1825                        || lex::starts_with_keyword(trimmed.fragment(), b"standard"))
1826                {
1827                    errors.push(missing_closing_brace_error_at_eof(bytes));
1828                    break;
1829                }
1830                if let Some(scope) = root_body_scope(input.fragment()) {
1831                    let (error_input, _) = lex::ws_and_comments(e.input).unwrap_or((e.input, ()));
1832                    if error_input.fragment().starts_with(b"{") {
1833                        errors.push(root_body_recovery_error(error_input, scope));
1834                        match lex::skip_statement_or_block(error_input) {
1835                            Ok((rest, _))
1836                                if rest.location_offset() > error_input.location_offset() =>
1837                            {
1838                                input = rest;
1839                                continue;
1840                            }
1841                            _ => {}
1842                        }
1843                    }
1844                }
1845                let pe = missing_closing_brace_error(bytes, e.input).unwrap_or_else(|| {
1846                    nom_err_to_parse_error(&e, None, Some("'package', 'namespace', or 'import'"))
1847                });
1848                errors.push(pe);
1849                let skip_result = lex::skip_to_next_sync_point(e.input);
1850                match skip_result {
1851                    Ok((rest, _)) => input = rest,
1852                    Err(_) => break,
1853                }
1854            }
1855            Err(nom::Err::Incomplete(_)) => {
1856                errors.push(
1857                    ParseError::new("unexpected end of input")
1858                        .with_location(
1859                            input.location_offset(),
1860                            input.location_line(),
1861                            input.get_column(),
1862                        )
1863                        .with_length(1)
1864                        .with_code("unexpected_eof")
1865                        .with_category(DiagnosticCategory::ParseError),
1866                );
1867                break;
1868            }
1869        }
1870    }
1871
1872    let (input, _) = lex::ws_and_comments(input).unwrap_or((input, ()));
1873
1874    if input.fragment().is_empty()
1875        && has_unclosed_brace(bytes)
1876        && !errors
1877            .iter()
1878            .any(|e| e.code.as_deref() == Some("missing_closing_brace"))
1879    {
1880        errors.push(missing_closing_brace_error_at_eof(bytes));
1881    }
1882
1883    if !input.fragment().is_empty()
1884        && !errors
1885            .iter()
1886            .any(|e| e.code.as_deref() == Some("missing_closing_brace"))
1887    {
1888        if trim_ascii_start(input.fragment()).starts_with(b"}") {
1889            errors.push(unexpected_closing_brace_parse_error(input));
1890        } else {
1891            let (found_snippet, found_len) = fragment_to_found_snippet(input.fragment());
1892            let mut pe = ParseError::new("expected end of input")
1893                .with_location(
1894                    input.location_offset(),
1895                    input.location_line(),
1896                    input.get_column(),
1897                )
1898                .with_length(found_len.max(1))
1899                .with_code("expected_end_of_input")
1900                .with_severity(DiagnosticSeverity::Error)
1901                .with_category(DiagnosticCategory::ParseError);
1902            if !found_snippet.is_empty() {
1903                pe = pe.with_found(found_snippet);
1904            }
1905            errors.push(pe);
1906        }
1907    }
1908
1909    errors.extend(collect_recovery_errors(&RootNamespace {
1910        elements: elements.clone(),
1911    }));
1912    errors = dedup_errors(errors);
1913    errors = suppress_diagnostic_cascades(errors);
1914
1915    ParseResult {
1916        root: RootNamespace { elements },
1917        errors,
1918    }
1919}