Skip to main content

sysml_v2_parser/parser/
mod.rs

1//! Nom-based parser for SysML v2 textual notation.
2//!
3//! Organized into modules:
4//! - [lex]: whitespace, comments, names, qualified names, skip helpers
5//! - [attribute]: attribute definition and usage
6//! - [import]: import and relationship body
7//! - [part]: part definition and part usage
8//! - [package]: package and root namespace
9
10mod action;
11mod alias;
12mod allocation;
13mod attribute;
14mod case;
15mod connection;
16mod constraint;
17mod dependency;
18mod enumeration;
19mod expr;
20mod flow;
21mod import;
22mod individual;
23mod interface;
24mod item;
25mod lex;
26mod metadata;
27mod metadata_annotation;
28mod occurrence;
29mod package;
30mod part;
31mod port;
32mod requirement;
33mod span;
34mod state;
35mod usecase;
36mod view;
37
38pub(crate) use span::{node_from_to, span_from_to, with_span, Input};
39
40use crate::ast::{
41    ActionDefBody, ActionDefBodyElement, ActionUsageBody, ActionUsageBodyElement, CalcDefBody,
42    CalcDefBodyElement, ConstraintDefBody, ConstraintDefBodyElement, PackageBody,
43    PackageBodyElement, ParseErrorNode, PartDefBody, PartDefBodyElement, PartUsageBody,
44    PartUsageBodyElement, RequirementDefBody, RequirementDefBodyElement, RootNamespace,
45    StateDefBody, StateDefBodyElement, UseCaseDefBody, UseCaseDefBodyElement, ViewBody,
46    ViewBodyElement, ViewDefBody, ViewDefBodyElement,
47};
48use crate::error::ParseError;
49use nom::error::Error;
50use nom::Parser;
51use nom_locate::LocatedSpan;
52
53/// Result of parsing with error recovery: a (possibly partial) AST and zero or more diagnostics.
54#[derive(Debug, Clone)]
55pub struct ParseResult {
56    /// Root namespace; contains all successfully parsed top-level elements (partial when errors occurred).
57    pub root: RootNamespace,
58    /// All parse errors encountered (multiple when recovery is used).
59    pub errors: Vec<ParseError>,
60}
61
62impl ParseResult {
63    /// True if the document parsed fully with no errors.
64    pub fn is_ok(&self) -> bool {
65        self.errors.is_empty()
66    }
67}
68
69const FOUND_SNIPPET_MAX_LEN: usize = 40;
70const ILLEGAL_TOP_LEVEL_STARTERS: &[&[u8]] = &[
71    b"action",
72    b"actor",
73    b"alias",
74    b"allocate",
75    b"allocation",
76    b"attribute",
77    b"bind",
78    b"calc",
79    b"case",
80    b"concern",
81    b"connection",
82    b"constraint",
83    b"dependency",
84    b"enum",
85    b"flow",
86    b"interface",
87    b"item",
88    b"metadata",
89    b"occurrence",
90    b"part",
91    b"perform",
92    b"port",
93    b"ref",
94    b"require",
95    b"requirement",
96    b"satisfy",
97    b"state",
98    b"use",
99    b"verification",
100    b"view",
101    b"viewpoint",
102];
103
104/// Take a short snippet from the input at the error position for "found" display.
105/// Uses first line or first FOUND_SNIPPET_MAX_LEN bytes, UTF-8 with replacement char.
106fn fragment_to_found_snippet(fragment: &[u8]) -> (String, usize) {
107    let take = fragment
108        .iter()
109        .position(|&b| b == b'\n' || b == b'\r')
110        .map(|p| p.min(FOUND_SNIPPET_MAX_LEN))
111        .unwrap_or_else(|| fragment.len().min(FOUND_SNIPPET_MAX_LEN));
112    let slice = fragment.get(..take).unwrap_or(fragment);
113    let s = String::from_utf8_lossy(slice)
114        .replace('\n', "\\n")
115        .replace('\r', "\\r");
116    let len = slice.len();
117    (s.trim_end().to_string(), len)
118}
119
120pub(crate) fn recovery_found_snippet(input: Input<'_>) -> Option<String> {
121    let frag = input.fragment();
122    let take = frag
123        .iter()
124        .position(|&b| b == b'\n' || b == b'\r')
125        .unwrap_or(frag.len())
126        .min(60);
127    let snippet = String::from_utf8_lossy(&frag[..take]).trim().to_string();
128    if snippet.is_empty() {
129        None
130    } else {
131        Some(snippet)
132    }
133}
134
135/// Map nom error kind to a human-readable message for language server diagnostics.
136fn nom_error_kind_to_message(code: &nom::error::ErrorKind) -> &'static str {
137    use nom::error::ErrorKind;
138    match code {
139        ErrorKind::Tag => "expected keyword or token",
140        ErrorKind::Digit => "expected number",
141        ErrorKind::Alpha => "expected identifier",
142        ErrorKind::AlphaNumeric => "expected identifier",
143        ErrorKind::Space => "expected whitespace",
144        ErrorKind::MultiSpace => "expected whitespace",
145        ErrorKind::Eof => "unexpected end of input",
146        ErrorKind::TakeUntil => "expected terminator",
147        ErrorKind::TakeWhile1 => "expected token",
148        ErrorKind::Alt => {
149            "expected package, import, part, port, interface, alias, attribute, or action"
150        }
151        ErrorKind::Many0 | ErrorKind::Many1 => "expected list of elements",
152        _ => "parse error",
153    }
154}
155
156/// Map nom error kind to a specific code for LSP/quick fixes.
157fn nom_error_kind_to_code(code: &nom::error::ErrorKind) -> &'static str {
158    use nom::error::ErrorKind;
159    match code {
160        ErrorKind::Tag => "expected_keyword",
161        ErrorKind::Digit => "expected_number",
162        ErrorKind::Alpha | ErrorKind::AlphaNumeric => "expected_identifier",
163        ErrorKind::Space | ErrorKind::MultiSpace => "expected_whitespace",
164        ErrorKind::Eof => "unexpected_eof",
165        ErrorKind::TakeUntil => "expected_terminator",
166        ErrorKind::TakeWhile1 => "expected_token",
167        ErrorKind::Alt => "expected_alt",
168        ErrorKind::Many0 | ErrorKind::Many1 => "expected_list",
169        _ => "parse_error",
170    }
171}
172
173fn nom_err_to_parse_error(
174    e: &Error<Input<'_>>,
175    length_override: Option<usize>,
176    expected_context: Option<&'static str>,
177) -> ParseError {
178    let offset = e.input.location_offset();
179    let line = e.input.location_line();
180    let column = e.input.get_column();
181    let fragment = e.input.fragment();
182    let (found_snippet, found_len) = fragment_to_found_snippet(fragment);
183    let message = nom_error_kind_to_message(&e.code).to_string();
184    let span_len = length_override.unwrap_or(found_len).max(1);
185    let mut pe = ParseError::new(message)
186        .with_location(offset, line, column)
187        .with_length(span_len)
188        .with_code(nom_error_kind_to_code(&e.code));
189    if !found_snippet.is_empty() {
190        pe = pe.with_found(found_snippet);
191    }
192    if let Some(ctx) = expected_context {
193        pe = pe.with_expected(ctx);
194    }
195    let at_root = expected_context.is_some_and(|ctx| {
196        ctx.contains("'package', 'namespace', or 'import'") || ctx.contains("top level")
197    });
198    if at_root && is_illegal_top_level_definition(fragment) {
199        pe.message = "illegal top-level definition".to_string();
200        pe.code = Some("illegal_top_level_definition".to_string());
201        pe.expected = Some("'package', 'namespace', or 'import'".to_string());
202        pe.suggestion = Some(
203            "Wrap this declaration in `package ... { ... }` or `namespace ... { ... }`."
204                .to_string(),
205        );
206    }
207    pe
208}
209
210fn is_illegal_top_level_definition(fragment: &[u8]) -> bool {
211    let trimmed = trim_ascii_start(fragment);
212    !trimmed.starts_with(b"}")
213        && !trimmed.starts_with(b"//")
214        && !trimmed.starts_with(b"/*")
215        && lex::starts_with_any_keyword(trimmed, ILLEGAL_TOP_LEVEL_STARTERS)
216}
217
218fn trim_ascii_start(mut fragment: &[u8]) -> &[u8] {
219    while let Some(first) = fragment.first() {
220        if first.is_ascii_whitespace() {
221            fragment = &fragment[1..];
222            continue;
223        }
224        break;
225    }
226    fragment
227}
228
229fn starts_with_missing_name_after_keyword(
230    fragment: &[u8],
231    keyword: &[u8],
232    trailing_keywords: &[&[u8]],
233) -> bool {
234    let mut fragment = trim_ascii_start(fragment);
235    if !lex::starts_with_keyword(fragment, keyword) {
236        return false;
237    }
238    fragment = &fragment[keyword.len()..];
239    while let Some(first) = fragment.first() {
240        if first.is_ascii_whitespace() {
241            fragment = &fragment[1..];
242            continue;
243        }
244        break;
245    }
246    for trailing in trailing_keywords {
247        if lex::starts_with_keyword(fragment, trailing) {
248            fragment = &fragment[trailing.len()..];
249            while let Some(first) = fragment.first() {
250                if first.is_ascii_whitespace() {
251                    fragment = &fragment[1..];
252                    continue;
253                }
254                break;
255            }
256        }
257    }
258    fragment.starts_with(b":")
259}
260
261fn starts_with_missing_type_after_keyword(
262    fragment: &[u8],
263    keyword: &[u8],
264    trailing_keywords: &[&[u8]],
265) -> bool {
266    let mut fragment = trim_ascii_start(fragment);
267    if !lex::starts_with_keyword(fragment, keyword) {
268        return false;
269    }
270    fragment = &fragment[keyword.len()..];
271    while let Some(first) = fragment.first() {
272        if first.is_ascii_whitespace() {
273            fragment = &fragment[1..];
274            continue;
275        }
276        break;
277    }
278    for trailing in trailing_keywords {
279        if lex::starts_with_keyword(fragment, trailing) {
280            fragment = &fragment[trailing.len()..];
281            while let Some(first) = fragment.first() {
282                if first.is_ascii_whitespace() {
283                    fragment = &fragment[1..];
284                    continue;
285                }
286                break;
287            }
288        }
289    }
290
291    let mut name_len = 0usize;
292    while name_len < fragment.len()
293        && (fragment[name_len].is_ascii_alphanumeric() || fragment[name_len] == b'_')
294    {
295        name_len += 1;
296    }
297    if name_len == 0 {
298        return false;
299    }
300    fragment = &fragment[name_len..];
301    while let Some(first) = fragment.first() {
302        if first.is_ascii_whitespace() {
303            fragment = &fragment[1..];
304            continue;
305        }
306        break;
307    }
308    if !fragment.starts_with(b":") {
309        return false;
310    }
311    fragment = &fragment[1..];
312    while let Some(first) = fragment.first() {
313        if first.is_ascii_whitespace() {
314            fragment = &fragment[1..];
315            continue;
316        }
317        break;
318    }
319
320    fragment.is_empty()
321        || fragment.starts_with(b";")
322        || fragment.starts_with(b"{")
323        || fragment.starts_with(b"}")
324        || lex::starts_with_keyword(fragment, b"then")
325        || lex::starts_with_keyword(fragment, b"if")
326        || lex::starts_with_keyword(fragment, b"do")
327}
328
329fn missing_name_diagnostic(fragment: &[u8]) -> Option<(&'static str, String, String, String)> {
330    #[allow(clippy::type_complexity)]
331    let cases: &[(&[u8], &[&[u8]], &str, &str)] = &[
332        (
333            b"subject",
334            &[],
335            "subject name",
336            "Use `subject laptop: Laptop;`.",
337        ),
338        (b"actor", &[], "actor name", "Use `actor user: User;`."),
339        (b"state", &[], "state name", "Use `state ready: Mode;`."),
340        (b"part", &[], "part name", "Use `part wheel: Wheel;`."),
341        (b"ref", &[], "reference name", "Use `ref sensor: Sensor;`."),
342        (b"port", &[], "port name", "Use `port power: PowerPort;`."),
343        (
344            b"attribute",
345            &[],
346            "attribute name",
347            "Use `attribute mass: MassValue;`.",
348        ),
349        (b"in", &[], "input name", "Use `in speed: Real;`."),
350        (b"out", &[], "output name", "Use `out result: Real;`."),
351        (
352            b"perform",
353            &[b"action"],
354            "action name",
355            "Use `perform action run: Runner;`.",
356        ),
357        (
358            b"return",
359            &[],
360            "return name",
361            "Use `return result: Real;`.",
362        ),
363    ];
364
365    for (keyword, trailing, missing_what, suggestion) in cases {
366        if starts_with_missing_name_after_keyword(fragment, keyword, trailing) {
367            return Some((
368                "missing_member_name",
369                format!("expected {missing_what} before ':'"),
370                format!("{missing_what} before ':'"),
371                suggestion.to_string(),
372            ));
373        }
374    }
375    None
376}
377
378fn missing_type_diagnostic(fragment: &[u8]) -> Option<(&'static str, String, String, String)> {
379    #[allow(clippy::type_complexity)]
380    let cases: &[(&[u8], &[&[u8]], &str)] = &[
381        (b"subject", &[], "subject type"),
382        (b"actor", &[], "actor type"),
383        (b"state", &[], "state type"),
384        (b"part", &[], "part type"),
385        (b"ref", &[], "reference type"),
386        (b"port", &[], "port type"),
387        (b"attribute", &[], "attribute type"),
388        (b"in", &[], "input type"),
389        (b"out", &[], "output type"),
390        (b"perform", &[b"action"], "action type"),
391        (b"return", &[], "return type"),
392    ];
393
394    for &(keyword, trailing, missing_what) in cases {
395        if starts_with_missing_type_after_keyword(fragment, keyword, trailing) {
396            let keyword_label = String::from_utf8_lossy(keyword);
397            let sample_name = if keyword == &b"subject"[..] {
398                "laptop"
399            } else if keyword == &b"actor"[..] {
400                "user"
401            } else if keyword == &b"state"[..] {
402                "ready"
403            } else if keyword == &b"part"[..] {
404                "wheel"
405            } else if keyword == &b"ref"[..] {
406                "sensor"
407            } else if keyword == &b"port"[..] {
408                "power"
409            } else if keyword == &b"attribute"[..] {
410                "mass"
411            } else if keyword == &b"in"[..] {
412                "speed"
413            } else if keyword == &b"out"[..] {
414                "result"
415            } else if keyword == &b"perform"[..] {
416                "run"
417            } else if keyword == &b"return"[..] {
418                "result"
419            } else {
420                "member"
421            };
422            let sample_type = if keyword == &b"subject"[..] {
423                "Laptop"
424            } else if keyword == &b"actor"[..] {
425                "User"
426            } else if keyword == &b"state"[..] {
427                "Mode"
428            } else if keyword == &b"part"[..] {
429                "Wheel"
430            } else if keyword == &b"ref"[..] {
431                "Sensor"
432            } else if keyword == &b"port"[..] {
433                "PowerPort"
434            } else if keyword == &b"attribute"[..] {
435                "MassValue"
436            } else if keyword == &b"in"[..] || keyword == &b"out"[..] {
437                "Real"
438            } else if keyword == &b"perform"[..] {
439                "Runner"
440            } else if keyword == &b"return"[..] {
441                "Real"
442            } else {
443                "Type"
444            };
445            let suggestion = if keyword == &b"perform"[..] {
446                format!("Use `perform action {sample_name}: {sample_type};`.")
447            } else if keyword == &b"return"[..] {
448                format!("Use `return {sample_name}: {sample_type};`.")
449            } else {
450                format!("Use `{keyword_label} {sample_name}: {sample_type};`.")
451            };
452            return Some((
453                "missing_type_reference",
454                format!("expected {missing_what} after ':'"),
455                format!("{missing_what} after ':'"),
456                suggestion,
457            ));
458        }
459    }
460    None
461}
462
463fn missing_closing_brace_error(bytes: &[u8], input: Input<'_>) -> Option<ParseError> {
464    if !input.fragment().is_empty() {
465        return None;
466    }
467    let consumed = &bytes[..input.location_offset().min(bytes.len())];
468    let opens = consumed.iter().filter(|&&b| b == b'{').count();
469    let closes = consumed.iter().filter(|&&b| b == b'}').count();
470    if opens <= closes {
471        return None;
472    }
473    Some(missing_closing_brace_error_at_eof(consumed))
474}
475
476fn missing_closing_brace_error_at_eof(bytes: &[u8]) -> ParseError {
477    let (line, column) = eof_line_column(bytes);
478    ParseError::new("missing closing '}'")
479        .with_location(bytes.len(), line, column)
480        .with_length(1)
481        .with_code("missing_closing_brace")
482        .with_expected("'}'")
483        .with_suggestion("Add '}' to close the open body.")
484}
485
486fn has_unclosed_brace(bytes: &[u8]) -> bool {
487    let opens = bytes.iter().filter(|&&b| b == b'{').count();
488    let closes = bytes.iter().filter(|&&b| b == b'}').count();
489    opens > closes
490}
491
492fn eof_line_column(bytes: &[u8]) -> (u32, usize) {
493    let mut line = 1u32;
494    let mut column = 1usize;
495    for &b in bytes {
496        if b == b'\n' {
497            line += 1;
498            column = 1;
499        } else {
500            column += 1;
501        }
502    }
503    (line, column)
504}
505
506pub(crate) fn build_recovery_error_node(
507    input: Input<'_>,
508    starters: &[&[u8]],
509    scope_label: &str,
510    generic_code: &str,
511) -> ParseErrorNode {
512    let trimmed = trim_ascii_start(input.fragment());
513
514    if let Some((code, message, expected, suggestion)) = missing_name_diagnostic(trimmed) {
515        return ParseErrorNode {
516            message,
517            code: code.to_string(),
518            expected: Some(expected),
519            found: recovery_found_snippet(input),
520            suggestion: Some(suggestion),
521        };
522    }
523
524    if let Some((code, message, expected, suggestion)) = missing_type_diagnostic(trimmed) {
525        return ParseErrorNode {
526            message,
527            code: code.to_string(),
528            expected: Some(expected),
529            found: recovery_found_snippet(input),
530            suggestion: Some(suggestion),
531        };
532    }
533
534    if lex::looks_like_missing_semicolon(input, starters) {
535        return ParseErrorNode {
536            message: "missing semicolon before next declaration".to_string(),
537            code: "missing_semicolon".to_string(),
538            expected: Some("';'".to_string()),
539            found: recovery_found_snippet(input),
540            suggestion: Some("Insert ';' before this declaration.".to_string()),
541        };
542    }
543
544    if lex::starts_with_keyword(trimmed, b"#") || lex::starts_with_keyword(trimmed, b"@") {
545        return ParseErrorNode {
546            message: format!("unsupported annotation syntax in {scope_label}"),
547            code: generic_code.to_string(),
548            expected: Some(format!("valid {scope_label} element")),
549            found: recovery_found_snippet(input),
550            suggestion: Some(
551                "Remove this annotation or extend the parser to support annotated declarations."
552                    .to_string(),
553            ),
554        };
555    }
556
557    ParseErrorNode {
558        message: format!("unexpected token in {scope_label}"),
559        code: generic_code.to_string(),
560        expected: Some(format!("valid {scope_label} element")),
561        found: recovery_found_snippet(input),
562        suggestion: Some(format!("Fix this {scope_label} member and re-run parsing.")),
563    }
564}
565
566fn is_only_trailing_closing_braces(mut input: Input<'_>) -> bool {
567    loop {
568        let (next, _) = lex::ws_and_comments(input).unwrap_or((input, ()));
569        input = next;
570        if input.fragment().is_empty() {
571            return true;
572        }
573        if input.fragment().starts_with(b"}") {
574            match nom::bytes::complete::tag::<_, _, nom::error::Error<Input>>(&b"}"[..])
575                .parse(input)
576            {
577                Ok((next, _)) => {
578                    input = next;
579                    continue;
580                }
581                Err(_) => return false,
582            }
583        }
584        return false;
585    }
586}
587
588fn parse_error_from_recovery_node(span: &crate::ast::Span, node: &ParseErrorNode) -> ParseError {
589    let mut err = ParseError::new(node.message.clone())
590        .with_location(span.offset, span.line, span.column)
591        .with_length(span.len.max(1))
592        .with_code(node.code.clone());
593    if let Some(expected) = &node.expected {
594        err = err.with_expected(expected.clone());
595    }
596    if let Some(found) = &node.found {
597        err = err.with_found(found.clone());
598    }
599    if let Some(suggestion) = &node.suggestion {
600        err = err.with_suggestion(suggestion.clone());
601    }
602    err
603}
604
605fn collect_requirement_body_errors(body: &RequirementDefBody, errors: &mut Vec<ParseError>) {
606    if let RequirementDefBody::Brace { elements } = body {
607        for element in elements {
608            match &element.value {
609                RequirementDefBodyElement::Error(n) => {
610                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
611                }
612                RequirementDefBodyElement::Frame(n) => {
613                    collect_requirement_body_errors(&n.value.body, errors)
614                }
615                _ => {}
616            }
617        }
618    }
619}
620
621fn collect_action_def_body_errors(body: &ActionDefBody, errors: &mut Vec<ParseError>) {
622    if let ActionDefBody::Brace { elements } = body {
623        for element in elements {
624            if let ActionDefBodyElement::Error(n) = &element.value {
625                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
626            }
627        }
628    }
629}
630
631fn collect_action_usage_body_errors(body: &ActionUsageBody, errors: &mut Vec<ParseError>) {
632    if let ActionUsageBody::Brace { elements } = body {
633        for element in elements {
634            match &element.value {
635                ActionUsageBodyElement::Error(n) => {
636                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
637                }
638                ActionUsageBodyElement::ActionUsage(n) => {
639                    collect_action_usage_body_errors(&n.value.body, errors)
640                }
641                _ => {}
642            }
643        }
644    }
645}
646
647fn collect_state_body_errors(body: &StateDefBody, errors: &mut Vec<ParseError>) {
648    if let StateDefBody::Brace { elements } = body {
649        for element in elements {
650            match &element.value {
651                StateDefBodyElement::Error(n) => {
652                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
653                }
654                StateDefBodyElement::Entry(n) => collect_state_body_errors(&n.value.body, errors),
655                StateDefBodyElement::StateUsage(n) => {
656                    collect_state_body_errors(&n.value.body, errors)
657                }
658                _ => {}
659            }
660        }
661    }
662}
663
664fn collect_use_case_body_errors(body: &UseCaseDefBody, errors: &mut Vec<ParseError>) {
665    if let UseCaseDefBody::Brace { elements } = body {
666        for element in elements {
667            if let UseCaseDefBodyElement::Error(n) = &element.value {
668                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
669            }
670        }
671    }
672}
673
674fn collect_constraint_body_errors(body: &ConstraintDefBody, errors: &mut Vec<ParseError>) {
675    if let ConstraintDefBody::Brace { elements } = body {
676        for element in elements {
677            if let ConstraintDefBodyElement::Error(n) = &element.value {
678                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
679            }
680        }
681    }
682}
683
684fn collect_calc_body_errors(body: &CalcDefBody, errors: &mut Vec<ParseError>) {
685    if let CalcDefBody::Brace { elements } = body {
686        for element in elements {
687            if let CalcDefBodyElement::Error(n) = &element.value {
688                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
689            }
690        }
691    }
692}
693
694fn collect_view_def_body_errors(body: &ViewDefBody, errors: &mut Vec<ParseError>) {
695    if let ViewDefBody::Brace { elements } = body {
696        for element in elements {
697            if let ViewDefBodyElement::Error(n) = &element.value {
698                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
699            }
700        }
701    }
702}
703
704fn collect_view_body_errors(body: &ViewBody, errors: &mut Vec<ParseError>) {
705    if let ViewBody::Brace { elements } = body {
706        for element in elements {
707            if let ViewBodyElement::Error(n) = &element.value {
708                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
709            }
710        }
711    }
712}
713
714fn collect_part_def_body_errors(body: &PartDefBody, errors: &mut Vec<ParseError>) {
715    if let PartDefBody::Brace { elements } = body {
716        for element in elements {
717            match &element.value {
718                PartDefBodyElement::Error(n) => {
719                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
720                }
721                PartDefBodyElement::PartUsage(n) => {
722                    collect_part_usage_body_errors(&n.value.body, errors)
723                }
724                PartDefBodyElement::Perform(n) => {
725                    collect_perform_body_errors(&n.value.body, errors)
726                }
727                _ => {}
728            }
729        }
730    }
731}
732
733fn collect_perform_body_errors(body: &crate::ast::PerformBody, _errors: &mut Vec<ParseError>) {
734    match body {
735        crate::ast::PerformBody::Semicolon => {}
736        crate::ast::PerformBody::Brace { .. } => {}
737    }
738}
739
740fn collect_part_usage_body_errors(body: &PartUsageBody, errors: &mut Vec<ParseError>) {
741    if let PartUsageBody::Brace { elements } = body {
742        for element in elements {
743            match &element.value {
744                PartUsageBodyElement::Error(n) => {
745                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
746                }
747                PartUsageBodyElement::PartUsage(n) => {
748                    collect_part_usage_body_errors(&n.value.body, errors)
749                }
750                PartUsageBodyElement::Perform(n) => {
751                    collect_perform_body_errors(&n.value.body, errors)
752                }
753                PartUsageBodyElement::StateUsage(n) => {
754                    collect_state_body_errors(&n.value.body, errors)
755                }
756                _ => {}
757            }
758        }
759    }
760}
761
762fn collect_package_body_errors(body: &PackageBody, errors: &mut Vec<ParseError>) {
763    if let PackageBody::Brace { elements } = body {
764        for element in elements {
765            match &element.value {
766                PackageBodyElement::Error(n) => {
767                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
768                }
769                PackageBodyElement::Package(n) => {
770                    collect_package_body_errors(&n.value.body, errors)
771                }
772                PackageBodyElement::LibraryPackage(n) => {
773                    collect_package_body_errors(&n.value.body, errors)
774                }
775                PackageBodyElement::PartDef(n) => {
776                    collect_part_def_body_errors(&n.value.body, errors)
777                }
778                PackageBodyElement::PartUsage(n) => {
779                    collect_part_usage_body_errors(&n.value.body, errors)
780                }
781                PackageBodyElement::ActionDef(n) => {
782                    collect_action_def_body_errors(&n.value.body, errors)
783                }
784                PackageBodyElement::ActionUsage(n) => {
785                    collect_action_usage_body_errors(&n.value.body, errors)
786                }
787                PackageBodyElement::RequirementDef(n) => {
788                    collect_requirement_body_errors(&n.value.body, errors)
789                }
790                PackageBodyElement::RequirementUsage(n) => {
791                    collect_requirement_body_errors(&n.value.body, errors)
792                }
793                PackageBodyElement::UseCaseDef(n) => {
794                    collect_use_case_body_errors(&n.value.body, errors)
795                }
796                PackageBodyElement::UseCaseUsage(n) => {
797                    collect_use_case_body_errors(&n.value.body, errors)
798                }
799                PackageBodyElement::ConcernUsage(n) => {
800                    collect_requirement_body_errors(&n.value.body, errors)
801                }
802                PackageBodyElement::StateDef(n) => collect_state_body_errors(&n.value.body, errors),
803                PackageBodyElement::StateUsage(n) => {
804                    collect_state_body_errors(&n.value.body, errors)
805                }
806                PackageBodyElement::ConstraintDef(n) => {
807                    collect_constraint_body_errors(&n.value.body, errors)
808                }
809                PackageBodyElement::CalcDef(n) => collect_calc_body_errors(&n.value.body, errors),
810                PackageBodyElement::ViewDef(n) => {
811                    collect_view_def_body_errors(&n.value.body, errors)
812                }
813                PackageBodyElement::ViewUsage(n) => collect_view_body_errors(&n.value.body, errors),
814                _ => {}
815            }
816        }
817    }
818}
819
820fn collect_recovery_errors(root: &RootNamespace) -> Vec<ParseError> {
821    let mut errors = Vec::new();
822    for element in &root.elements {
823        match &element.value {
824            crate::ast::RootElement::Package(n) => {
825                collect_package_body_errors(&n.value.body, &mut errors)
826            }
827            crate::ast::RootElement::LibraryPackage(n) => {
828                collect_package_body_errors(&n.value.body, &mut errors)
829            }
830            crate::ast::RootElement::Namespace(n) => {
831                collect_package_body_errors(&n.value.body, &mut errors)
832            }
833            crate::ast::RootElement::Import(_) => {}
834        }
835    }
836    errors
837}
838
839/// Parse full input; must consume entire input. Strips UTF-8 BOM if present.
840#[allow(clippy::result_large_err)]
841pub fn parse_root(input: &str) -> Result<RootNamespace, ParseError> {
842    let bytes = input
843        .strip_prefix('\u{FEFF}')
844        .map(str::as_bytes)
845        .unwrap_or_else(|| input.as_bytes());
846    let located = LocatedSpan::new(bytes);
847    match package::root_namespace(located) {
848        Ok((rest, root)) => {
849            if !rest.fragment().is_empty() && has_unclosed_brace(bytes) {
850                return Err(missing_closing_brace_error_at_eof(bytes));
851            }
852            if rest.fragment().is_empty() || is_only_trailing_closing_braces(rest) {
853                log::debug!("parse_root: success, {} top-level elements", root.elements.len());
854                Ok(root)
855            } else {
856                let offset = located.location_offset() + located.fragment().len() - rest.fragment().len();
857                let unconsumed = rest.fragment();
858                let first_80 = unconsumed.get(..80.min(unconsumed.len())).unwrap_or(unconsumed);
859                log::debug!(
860                    "parse_root: expected end of input; parsed {} elements; unconsumed len={}, offset={}, first 80 bytes: {:?}",
861                    root.elements.len(),
862                    unconsumed.len(),
863                    offset,
864                    first_80,
865                );
866                log::debug!(
867                    "parse_root: unconsumed as str: {:?}",
868                    String::from_utf8_lossy(first_80),
869                );
870                let (found_snippet, found_len) = fragment_to_found_snippet(rest.fragment());
871                let mut pe = ParseError::new("expected end of input")
872                    .with_location(offset, rest.location_line(), rest.get_column())
873                    .with_length(found_len.max(1))
874                    .with_code("expected_end_of_input");
875                if !found_snippet.is_empty() {
876                    pe = pe.with_found(found_snippet);
877                }
878                if root.elements.is_empty() && is_illegal_top_level_definition(rest.fragment()) {
879                    pe = pe
880                        .with_code("illegal_top_level_definition")
881                        .with_expected("'package', 'namespace', or 'import'")
882                        .with_suggestion(
883                            "Wrap this declaration in `package ... { ... }` or `namespace ... { ... }`.",
884                        );
885                    pe.message = "illegal top-level definition".to_string();
886                }
887                Err(pe)
888            }
889        }
890        Err(nom::Err::Error(e)) => Err(missing_closing_brace_error(bytes, e.input).unwrap_or_else(|| {
891            nom_err_to_parse_error(
892                &e,
893                None,
894                Some("'package', 'namespace', or 'import' at top level; or valid element in package body"),
895            )
896        })),
897        Err(nom::Err::Failure(e)) => Err(missing_closing_brace_error(bytes, e.input).unwrap_or_else(|| {
898            nom_err_to_parse_error(
899                &e,
900                None,
901                Some("'package', 'namespace', or 'import' at top level; or valid element in package body"),
902            )
903        })),
904        Err(nom::Err::Incomplete(_)) => Err(ParseError::new("unexpected end of input").with_code("unexpected_eof")),
905    }
906}
907
908const MAX_RECOVERY_ERRORS: usize = 100;
909
910/// Parse input with error recovery: collects multiple diagnostics and returns a partial AST when errors occur.
911/// Use this for language servers so the user sees all parse errors and features (e.g. hover) can use the partial AST.
912pub fn parse_with_diagnostics(input: &str) -> ParseResult {
913    let bytes = input
914        .strip_prefix('\u{FEFF}')
915        .map(str::as_bytes)
916        .unwrap_or_else(|| input.as_bytes());
917    let located = LocatedSpan::new(bytes);
918
919    let mut elements = Vec::new();
920    let mut errors = Vec::new();
921
922    let (mut input, _) = match lex::ws_and_comments(located) {
923        Ok(x) => x,
924        Err(_) => {
925            return ParseResult {
926                root: RootNamespace { elements: vec![] },
927                errors: vec![ParseError::new("invalid input").with_code("invalid_input")],
928            };
929        }
930    };
931
932    while errors.len() < MAX_RECOVERY_ERRORS {
933        // Skip leading ws/comments; if nothing left, we're done (avoids parsing "" as root_element).
934        let (rest, _) = lex::ws_and_comments(input).unwrap_or((input, ()));
935        input = rest;
936        if input.fragment().is_empty() {
937            break;
938        }
939        match package::root_element(input) {
940            Ok((rest, elem)) => {
941                elements.push(elem);
942                input = rest;
943            }
944            Err(nom::Err::Error(e)) | Err(nom::Err::Failure(e)) => {
945                let pe = missing_closing_brace_error(bytes, e.input).unwrap_or_else(|| {
946                    nom_err_to_parse_error(&e, None, Some("'package', 'namespace', or 'import'"))
947                });
948                errors.push(pe);
949                let skip_result = lex::skip_to_next_sync_point(e.input);
950                match skip_result {
951                    Ok((rest, _)) => input = rest,
952                    Err(_) => break,
953                }
954            }
955            Err(nom::Err::Incomplete(_)) => {
956                errors.push(
957                    ParseError::new("unexpected end of input")
958                        .with_location(
959                            input.location_offset(),
960                            input.location_line(),
961                            input.get_column(),
962                        )
963                        .with_length(1)
964                        .with_code("unexpected_eof"),
965                );
966                break;
967            }
968        }
969    }
970
971    let (input, _) = lex::ws_and_comments(input).unwrap_or((input, ()));
972
973    if input.fragment().is_empty()
974        && has_unclosed_brace(bytes)
975        && !errors
976            .iter()
977            .any(|e| e.code.as_deref() == Some("missing_closing_brace"))
978    {
979        errors.push(missing_closing_brace_error_at_eof(bytes));
980    }
981
982    if !input.fragment().is_empty() {
983        let (found_snippet, found_len) = fragment_to_found_snippet(input.fragment());
984        let mut pe = ParseError::new("expected end of input")
985            .with_location(
986                input.location_offset(),
987                input.location_line(),
988                input.get_column(),
989            )
990            .with_length(found_len.max(1))
991            .with_code("expected_end_of_input");
992        if !found_snippet.is_empty() {
993            pe = pe.with_found(found_snippet);
994        }
995        errors.push(pe);
996    }
997
998    errors.extend(collect_recovery_errors(&RootNamespace {
999        elements: elements.clone(),
1000    }));
1001    errors.sort_by_key(|e| (e.offset.unwrap_or(usize::MAX), e.line.unwrap_or(u32::MAX)));
1002
1003    ParseResult {
1004        root: RootNamespace { elements },
1005        errors,
1006    }
1007}