Skip to main content

sysml_v2_parser/parser/
mod.rs

1//! Nom-based parser for SysML v2 textual notation.
2//!
3//! Organized into modules:
4//! - [lex]: whitespace, comments, names, qualified names, skip helpers
5//! - [attribute]: attribute definition and usage
6//! - [import]: import and relationship body
7//! - [part]: part definition and part usage
8//! - [package]: package and root namespace
9
10mod action;
11mod alias;
12mod allocation;
13mod attribute;
14mod case;
15mod connection;
16mod constraint;
17mod dependency;
18mod enumeration;
19mod expr;
20mod flow;
21mod import;
22mod individual;
23mod interface;
24mod item;
25mod lex;
26mod metadata;
27mod metadata_annotation;
28mod occurrence;
29mod package;
30mod part;
31mod port;
32mod requirement;
33mod span;
34mod state;
35mod usecase;
36mod view;
37
38pub(crate) use span::{node_from_to, span_from_to, with_span, Input};
39
40use crate::ast::{
41    ActionDefBody, ActionDefBodyElement, ActionUsageBody, ActionUsageBodyElement, CalcDefBody,
42    CalcDefBodyElement, ConstraintDefBody, ConstraintDefBodyElement, PackageBody,
43    PackageBodyElement, ParseErrorNode, PartDefBody, PartDefBodyElement, PartUsageBody,
44    PartUsageBodyElement, RequirementDefBody, RequirementDefBodyElement, RootNamespace,
45    StateDefBody, StateDefBodyElement, UseCaseDefBody, UseCaseDefBodyElement, ViewBody,
46    ViewBodyElement, ViewDefBody, ViewDefBodyElement,
47};
48use crate::error::ParseError;
49use nom::error::Error;
50use nom::Parser;
51use nom_locate::LocatedSpan;
52
53/// Result of parsing with error recovery: a (possibly partial) AST and zero or more diagnostics.
54#[derive(Debug, Clone)]
55pub struct ParseResult {
56    /// Root namespace; contains all successfully parsed top-level elements (partial when errors occurred).
57    pub root: RootNamespace,
58    /// All parse errors encountered (multiple when recovery is used).
59    pub errors: Vec<ParseError>,
60}
61
62impl ParseResult {
63    /// True if the document parsed fully with no errors.
64    pub fn is_ok(&self) -> bool {
65        self.errors.is_empty()
66    }
67}
68
69const FOUND_SNIPPET_MAX_LEN: usize = 40;
70const ILLEGAL_TOP_LEVEL_STARTERS: &[&[u8]] = &[
71    b"action",
72    b"actor",
73    b"alias",
74    b"allocate",
75    b"allocation",
76    b"attribute",
77    b"bind",
78    b"calc",
79    b"case",
80    b"concern",
81    b"connection",
82    b"constraint",
83    b"dependency",
84    b"enum",
85    b"flow",
86    b"interface",
87    b"item",
88    b"metadata",
89    b"occurrence",
90    b"part",
91    b"perform",
92    b"port",
93    b"ref",
94    b"require",
95    b"requirement",
96    b"satisfy",
97    b"state",
98    b"use",
99    b"verification",
100    b"view",
101    b"viewpoint",
102];
103
104/// Take a short snippet from the input at the error position for "found" display.
105/// Uses first line or first FOUND_SNIPPET_MAX_LEN bytes, UTF-8 with replacement char.
106fn fragment_to_found_snippet(fragment: &[u8]) -> (String, usize) {
107    let take = fragment
108        .iter()
109        .position(|&b| b == b'\n' || b == b'\r')
110        .map(|p| p.min(FOUND_SNIPPET_MAX_LEN))
111        .unwrap_or_else(|| fragment.len().min(FOUND_SNIPPET_MAX_LEN));
112    let slice = fragment.get(..take).unwrap_or(fragment);
113    let s = String::from_utf8_lossy(slice)
114        .replace('\n', "\\n")
115        .replace('\r', "\\r");
116    let len = slice.len();
117    (s.trim_end().to_string(), len)
118}
119
120pub(crate) fn recovery_found_snippet(input: Input<'_>) -> Option<String> {
121    let frag = input.fragment();
122    let take = frag
123        .iter()
124        .position(|&b| b == b'\n' || b == b'\r')
125        .unwrap_or(frag.len())
126        .min(60);
127    let snippet = String::from_utf8_lossy(&frag[..take]).trim().to_string();
128    if snippet.is_empty() {
129        None
130    } else {
131        Some(snippet)
132    }
133}
134
135/// Map nom error kind to a human-readable message for language server diagnostics.
136fn nom_error_kind_to_message(code: &nom::error::ErrorKind) -> &'static str {
137    use nom::error::ErrorKind;
138    match code {
139        ErrorKind::Tag => "expected keyword or token",
140        ErrorKind::Digit => "expected number",
141        ErrorKind::Alpha => "expected identifier",
142        ErrorKind::AlphaNumeric => "expected identifier",
143        ErrorKind::Space => "expected whitespace",
144        ErrorKind::MultiSpace => "expected whitespace",
145        ErrorKind::Eof => "unexpected end of input",
146        ErrorKind::TakeUntil => "expected terminator",
147        ErrorKind::TakeWhile1 => "expected token",
148        ErrorKind::Alt => {
149            "expected package, import, part, port, interface, alias, attribute, or action"
150        }
151        ErrorKind::Many0 | ErrorKind::Many1 => "expected list of elements",
152        _ => "parse error",
153    }
154}
155
156/// Map nom error kind to a specific code for LSP/quick fixes.
157fn nom_error_kind_to_code(code: &nom::error::ErrorKind) -> &'static str {
158    use nom::error::ErrorKind;
159    match code {
160        ErrorKind::Tag => "expected_keyword",
161        ErrorKind::Digit => "expected_number",
162        ErrorKind::Alpha | ErrorKind::AlphaNumeric => "expected_identifier",
163        ErrorKind::Space | ErrorKind::MultiSpace => "expected_whitespace",
164        ErrorKind::Eof => "unexpected_eof",
165        ErrorKind::TakeUntil => "expected_terminator",
166        ErrorKind::TakeWhile1 => "expected_token",
167        ErrorKind::Alt => "expected_alt",
168        ErrorKind::Many0 | ErrorKind::Many1 => "expected_list",
169        _ => "parse_error",
170    }
171}
172
173fn nom_err_to_parse_error(
174    e: &Error<Input<'_>>,
175    length_override: Option<usize>,
176    expected_context: Option<&'static str>,
177) -> ParseError {
178    let offset = e.input.location_offset();
179    let line = e.input.location_line();
180    let column = e.input.get_column();
181    let fragment = e.input.fragment();
182    let (found_snippet, found_len) = fragment_to_found_snippet(fragment);
183    let message = nom_error_kind_to_message(&e.code).to_string();
184    let span_len = length_override.unwrap_or(found_len).max(1);
185    let mut pe = ParseError::new(message)
186        .with_location(offset, line, column)
187        .with_length(span_len)
188        .with_code(nom_error_kind_to_code(&e.code));
189    if !found_snippet.is_empty() {
190        pe = pe.with_found(found_snippet);
191    }
192    if let Some(ctx) = expected_context {
193        pe = pe.with_expected(ctx);
194    }
195    let at_root = expected_context.is_some_and(|ctx| {
196        ctx.contains("'package', 'namespace', or 'import'") || ctx.contains("top level")
197    });
198    if at_root && is_illegal_top_level_definition(fragment) {
199        pe.message = "illegal top-level definition".to_string();
200        pe.code = Some("illegal_top_level_definition".to_string());
201        pe.expected = Some("'package', 'namespace', or 'import'".to_string());
202        pe.suggestion = Some(
203            "Wrap this declaration in `package ... { ... }` or `namespace ... { ... }`."
204                .to_string(),
205        );
206    }
207    pe
208}
209
210fn is_illegal_top_level_definition(fragment: &[u8]) -> bool {
211    let trimmed = trim_ascii_start(fragment);
212    !trimmed.starts_with(b"}")
213        && !trimmed.starts_with(b"//")
214        && !trimmed.starts_with(b"/*")
215        && lex::starts_with_any_keyword(trimmed, ILLEGAL_TOP_LEVEL_STARTERS)
216}
217
218fn trim_ascii_start(mut fragment: &[u8]) -> &[u8] {
219    while let Some(first) = fragment.first() {
220        if first.is_ascii_whitespace() {
221            fragment = &fragment[1..];
222            continue;
223        }
224        break;
225    }
226    fragment
227}
228
229fn starts_with_missing_name_after_keyword(
230    fragment: &[u8],
231    keyword: &[u8],
232    trailing_keywords: &[&[u8]],
233) -> bool {
234    let mut fragment = trim_ascii_start(fragment);
235    if !lex::starts_with_keyword(fragment, keyword) {
236        return false;
237    }
238    fragment = &fragment[keyword.len()..];
239    while let Some(first) = fragment.first() {
240        if first.is_ascii_whitespace() {
241            fragment = &fragment[1..];
242            continue;
243        }
244        break;
245    }
246    for trailing in trailing_keywords {
247        if lex::starts_with_keyword(fragment, trailing) {
248            fragment = &fragment[trailing.len()..];
249            while let Some(first) = fragment.first() {
250                if first.is_ascii_whitespace() {
251                    fragment = &fragment[1..];
252                    continue;
253                }
254                break;
255            }
256        }
257    }
258    fragment.starts_with(b":")
259}
260
261fn starts_with_missing_type_after_keyword(
262    fragment: &[u8],
263    keyword: &[u8],
264    trailing_keywords: &[&[u8]],
265) -> bool {
266    let mut fragment = trim_ascii_start(fragment);
267    if !lex::starts_with_keyword(fragment, keyword) {
268        return false;
269    }
270    fragment = &fragment[keyword.len()..];
271    while let Some(first) = fragment.first() {
272        if first.is_ascii_whitespace() {
273            fragment = &fragment[1..];
274            continue;
275        }
276        break;
277    }
278    for trailing in trailing_keywords {
279        if lex::starts_with_keyword(fragment, trailing) {
280            fragment = &fragment[trailing.len()..];
281            while let Some(first) = fragment.first() {
282                if first.is_ascii_whitespace() {
283                    fragment = &fragment[1..];
284                    continue;
285                }
286                break;
287            }
288        }
289    }
290
291    let mut name_len = 0usize;
292    while name_len < fragment.len()
293        && (fragment[name_len].is_ascii_alphanumeric() || fragment[name_len] == b'_')
294    {
295        name_len += 1;
296    }
297    if name_len == 0 {
298        return false;
299    }
300    fragment = &fragment[name_len..];
301    while let Some(first) = fragment.first() {
302        if first.is_ascii_whitespace() {
303            fragment = &fragment[1..];
304            continue;
305        }
306        break;
307    }
308    if !fragment.starts_with(b":") {
309        return false;
310    }
311    fragment = &fragment[1..];
312    while let Some(first) = fragment.first() {
313        if first.is_ascii_whitespace() {
314            fragment = &fragment[1..];
315            continue;
316        }
317        break;
318    }
319
320    fragment.is_empty()
321        || fragment.starts_with(b";")
322        || fragment.starts_with(b"{")
323        || fragment.starts_with(b"}")
324        || lex::starts_with_keyword(fragment, b"then")
325        || lex::starts_with_keyword(fragment, b"if")
326        || lex::starts_with_keyword(fragment, b"do")
327}
328
329fn missing_name_diagnostic(fragment: &[u8]) -> Option<(&'static str, String, String, String)> {
330    #[allow(clippy::type_complexity)]
331    let cases: &[(&[u8], &[&[u8]], &str, &str)] = &[
332        (
333            b"subject",
334            &[],
335            "subject name",
336            "Use `subject laptop: Laptop;`.",
337        ),
338        (b"actor", &[], "actor name", "Use `actor user: User;`."),
339        (b"state", &[], "state name", "Use `state ready: Mode;`."),
340        (b"part", &[], "part name", "Use `part wheel: Wheel;`."),
341        (b"ref", &[], "reference name", "Use `ref sensor: Sensor;`."),
342        (b"port", &[], "port name", "Use `port power: PowerPort;`."),
343        (
344            b"attribute",
345            &[],
346            "attribute name",
347            "Use `attribute mass: MassValue;`.",
348        ),
349        (b"in", &[], "input name", "Use `in speed: Real;`."),
350        (b"out", &[], "output name", "Use `out result: Real;`."),
351        (
352            b"perform",
353            &[b"action"],
354            "action name",
355            "Use `perform action run: Runner;`.",
356        ),
357        (
358            b"return",
359            &[],
360            "return name",
361            "Use `return result: Real;`.",
362        ),
363    ];
364
365    for (keyword, trailing, missing_what, suggestion) in cases {
366        if starts_with_missing_name_after_keyword(fragment, keyword, trailing) {
367            return Some((
368                "missing_member_name",
369                format!("expected {missing_what} before ':'"),
370                format!("{missing_what} before ':'"),
371                suggestion.to_string(),
372            ));
373        }
374    }
375    None
376}
377
378fn missing_type_diagnostic(fragment: &[u8]) -> Option<(&'static str, String, String, String)> {
379    #[allow(clippy::type_complexity)]
380    let cases: &[(&[u8], &[&[u8]], &str)] = &[
381        (b"subject", &[], "subject type"),
382        (b"actor", &[], "actor type"),
383        (b"state", &[], "state type"),
384        (b"part", &[], "part type"),
385        (b"ref", &[], "reference type"),
386        (b"port", &[], "port type"),
387        (b"attribute", &[], "attribute type"),
388        (b"in", &[], "input type"),
389        (b"out", &[], "output type"),
390        (b"perform", &[b"action"], "action type"),
391        (b"return", &[], "return type"),
392    ];
393
394    for &(keyword, trailing, missing_what) in cases {
395        if starts_with_missing_type_after_keyword(fragment, keyword, trailing) {
396            let keyword_label = String::from_utf8_lossy(keyword);
397            let sample_name = if keyword == &b"subject"[..] {
398                "laptop"
399            } else if keyword == &b"actor"[..] {
400                "user"
401            } else if keyword == &b"state"[..] {
402                "ready"
403            } else if keyword == &b"part"[..] {
404                "wheel"
405            } else if keyword == &b"ref"[..] {
406                "sensor"
407            } else if keyword == &b"port"[..] {
408                "power"
409            } else if keyword == &b"attribute"[..] {
410                "mass"
411            } else if keyword == &b"in"[..] {
412                "speed"
413            } else if keyword == &b"out"[..] {
414                "result"
415            } else if keyword == &b"perform"[..] {
416                "run"
417            } else if keyword == &b"return"[..] {
418                "result"
419            } else {
420                "member"
421            };
422            let sample_type = if keyword == &b"subject"[..] {
423                "Laptop"
424            } else if keyword == &b"actor"[..] {
425                "User"
426            } else if keyword == &b"state"[..] {
427                "Mode"
428            } else if keyword == &b"part"[..] {
429                "Wheel"
430            } else if keyword == &b"ref"[..] {
431                "Sensor"
432            } else if keyword == &b"port"[..] {
433                "PowerPort"
434            } else if keyword == &b"attribute"[..] {
435                "MassValue"
436            } else if keyword == &b"in"[..] || keyword == &b"out"[..] {
437                "Real"
438            } else if keyword == &b"perform"[..] {
439                "Runner"
440            } else if keyword == &b"return"[..] {
441                "Real"
442            } else {
443                "Type"
444            };
445            let suggestion = if keyword == &b"perform"[..] {
446                format!("Use `perform action {sample_name}: {sample_type};`.")
447            } else if keyword == &b"return"[..] {
448                format!("Use `return {sample_name}: {sample_type};`.")
449            } else {
450                format!("Use `{keyword_label} {sample_name}: {sample_type};`.")
451            };
452            return Some((
453                "missing_type_reference",
454                format!("expected {missing_what} after ':'"),
455                format!("{missing_what} after ':'"),
456                suggestion,
457            ));
458        }
459    }
460    None
461}
462
463fn invalid_expose_separator_diagnostic(
464    fragment: &[u8],
465) -> Option<(&'static str, String, String, String)> {
466    let mut fragment = trim_ascii_start(fragment);
467    if !lex::starts_with_keyword(fragment, b"expose") {
468        return None;
469    }
470    fragment = &fragment[b"expose".len()..];
471    while let Some(first) = fragment.first() {
472        if first.is_ascii_whitespace() {
473            fragment = &fragment[1..];
474            continue;
475        }
476        break;
477    }
478    if fragment.is_empty() {
479        return None;
480    }
481
482    let mut saw_dot = false;
483    let mut in_quoted_name = false;
484    for &b in fragment {
485        if b == b'\'' {
486            in_quoted_name = !in_quoted_name;
487            continue;
488        }
489        if in_quoted_name {
490            continue;
491        }
492        if matches!(b, b';' | b'[' | b'{' | b'}' | b'\n' | b'\r') {
493            break;
494        }
495        if b == b'.' {
496            saw_dot = true;
497            break;
498        }
499    }
500    if !saw_dot {
501        return None;
502    }
503
504    Some((
505        "invalid_qualified_name_separator",
506        "invalid qualified name in expose target: use '::' instead of '.'".to_string(),
507        "qualified name segments separated by '::'".to_string(),
508        "Replace '.' with '::' in the expose target (example: `expose A::B;`).".to_string(),
509    ))
510}
511
512fn missing_closing_brace_error(bytes: &[u8], input: Input<'_>) -> Option<ParseError> {
513    if !input.fragment().is_empty() {
514        return None;
515    }
516    let consumed = &bytes[..input.location_offset().min(bytes.len())];
517    let opens = consumed.iter().filter(|&&b| b == b'{').count();
518    let closes = consumed.iter().filter(|&&b| b == b'}').count();
519    if opens <= closes {
520        return None;
521    }
522    Some(missing_closing_brace_error_at_eof(consumed))
523}
524
525fn missing_closing_brace_error_at_eof(bytes: &[u8]) -> ParseError {
526    let (line, column) = eof_line_column(bytes);
527    ParseError::new("missing closing '}'")
528        .with_location(bytes.len(), line, column)
529        .with_length(1)
530        .with_code("missing_closing_brace")
531        .with_expected("'}'")
532        .with_suggestion("Add '}' to close the open body.")
533}
534
535fn has_unclosed_brace(bytes: &[u8]) -> bool {
536    let opens = bytes.iter().filter(|&&b| b == b'{').count();
537    let closes = bytes.iter().filter(|&&b| b == b'}').count();
538    opens > closes
539}
540
541fn eof_line_column(bytes: &[u8]) -> (u32, usize) {
542    let mut line = 1u32;
543    let mut column = 1usize;
544    for &b in bytes {
545        if b == b'\n' {
546            line += 1;
547            column = 1;
548        } else {
549            column += 1;
550        }
551    }
552    (line, column)
553}
554
555pub(crate) fn build_recovery_error_node(
556    input: Input<'_>,
557    starters: &[&[u8]],
558    scope_label: &str,
559    generic_code: &str,
560) -> ParseErrorNode {
561    let trimmed = trim_ascii_start(input.fragment());
562
563    if let Some((code, message, expected, suggestion)) = missing_name_diagnostic(trimmed) {
564        return ParseErrorNode {
565            message,
566            code: code.to_string(),
567            expected: Some(expected),
568            found: recovery_found_snippet(input),
569            suggestion: Some(suggestion),
570        };
571    }
572
573    if let Some((code, message, expected, suggestion)) = missing_type_diagnostic(trimmed) {
574        return ParseErrorNode {
575            message,
576            code: code.to_string(),
577            expected: Some(expected),
578            found: recovery_found_snippet(input),
579            suggestion: Some(suggestion),
580        };
581    }
582
583    if let Some((code, message, expected, suggestion)) = invalid_expose_separator_diagnostic(trimmed)
584    {
585        return ParseErrorNode {
586            message,
587            code: code.to_string(),
588            expected: Some(expected),
589            found: recovery_found_snippet(input),
590            suggestion: Some(suggestion),
591        };
592    }
593
594    if lex::looks_like_missing_semicolon(input, starters) {
595        return ParseErrorNode {
596            message: "missing semicolon before next declaration".to_string(),
597            code: "missing_semicolon".to_string(),
598            expected: Some("';'".to_string()),
599            found: recovery_found_snippet(input),
600            suggestion: Some("Insert ';' before this declaration.".to_string()),
601        };
602    }
603
604    if lex::starts_with_keyword(trimmed, b"#") || lex::starts_with_keyword(trimmed, b"@") {
605        return ParseErrorNode {
606            message: format!("unsupported annotation syntax in {scope_label}"),
607            code: generic_code.to_string(),
608            expected: Some(format!("valid {scope_label} element")),
609            found: recovery_found_snippet(input),
610            suggestion: Some(
611                "Remove this annotation or extend the parser to support annotated declarations."
612                    .to_string(),
613            ),
614        };
615    }
616
617    ParseErrorNode {
618        message: format!("unexpected token in {scope_label}"),
619        code: generic_code.to_string(),
620        expected: Some(format!("valid {scope_label} element")),
621        found: recovery_found_snippet(input),
622        suggestion: Some(format!("Fix this {scope_label} member and re-run parsing.")),
623    }
624}
625
626fn is_only_trailing_closing_braces(mut input: Input<'_>) -> bool {
627    loop {
628        let (next, _) = lex::ws_and_comments(input).unwrap_or((input, ()));
629        input = next;
630        if input.fragment().is_empty() {
631            return true;
632        }
633        if input.fragment().starts_with(b"}") {
634            match nom::bytes::complete::tag::<_, _, nom::error::Error<Input>>(&b"}"[..])
635                .parse(input)
636            {
637                Ok((next, _)) => {
638                    input = next;
639                    continue;
640                }
641                Err(_) => return false,
642            }
643        }
644        return false;
645    }
646}
647
648fn parse_error_from_recovery_node(span: &crate::ast::Span, node: &ParseErrorNode) -> ParseError {
649    let mut err = ParseError::new(node.message.clone())
650        .with_location(span.offset, span.line, span.column)
651        .with_length(span.len.max(1))
652        .with_code(node.code.clone());
653    if let Some(expected) = &node.expected {
654        err = err.with_expected(expected.clone());
655    }
656    if let Some(found) = &node.found {
657        err = err.with_found(found.clone());
658    }
659    if let Some(suggestion) = &node.suggestion {
660        err = err.with_suggestion(suggestion.clone());
661    }
662    err
663}
664
665fn collect_requirement_body_errors(body: &RequirementDefBody, errors: &mut Vec<ParseError>) {
666    if let RequirementDefBody::Brace { elements } = body {
667        for element in elements {
668            match &element.value {
669                RequirementDefBodyElement::Error(n) => {
670                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
671                }
672                RequirementDefBodyElement::Frame(n) => {
673                    collect_requirement_body_errors(&n.value.body, errors)
674                }
675                _ => {}
676            }
677        }
678    }
679}
680
681fn collect_action_def_body_errors(body: &ActionDefBody, errors: &mut Vec<ParseError>) {
682    if let ActionDefBody::Brace { elements } = body {
683        for element in elements {
684            if let ActionDefBodyElement::Error(n) = &element.value {
685                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
686            }
687        }
688    }
689}
690
691fn collect_action_usage_body_errors(body: &ActionUsageBody, errors: &mut Vec<ParseError>) {
692    if let ActionUsageBody::Brace { elements } = body {
693        for element in elements {
694            match &element.value {
695                ActionUsageBodyElement::Error(n) => {
696                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
697                }
698                ActionUsageBodyElement::ActionUsage(n) => {
699                    collect_action_usage_body_errors(&n.value.body, errors)
700                }
701                _ => {}
702            }
703        }
704    }
705}
706
707fn collect_state_body_errors(body: &StateDefBody, errors: &mut Vec<ParseError>) {
708    if let StateDefBody::Brace { elements } = body {
709        for element in elements {
710            match &element.value {
711                StateDefBodyElement::Error(n) => {
712                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
713                }
714                StateDefBodyElement::Entry(n) => collect_state_body_errors(&n.value.body, errors),
715                StateDefBodyElement::RequirementUsage(n) => {
716                    collect_requirement_body_errors(&n.value.body, errors)
717                }
718                StateDefBodyElement::StateUsage(n) => {
719                    collect_state_body_errors(&n.value.body, errors)
720                }
721                _ => {}
722            }
723        }
724    }
725}
726
727fn collect_use_case_body_errors(body: &UseCaseDefBody, errors: &mut Vec<ParseError>) {
728    if let UseCaseDefBody::Brace { elements } = body {
729        for element in elements {
730            if let UseCaseDefBodyElement::Error(n) = &element.value {
731                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
732            }
733        }
734    }
735}
736
737fn collect_constraint_body_errors(body: &ConstraintDefBody, errors: &mut Vec<ParseError>) {
738    if let ConstraintDefBody::Brace { elements } = body {
739        for element in elements {
740            if let ConstraintDefBodyElement::Error(n) = &element.value {
741                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
742            }
743        }
744    }
745}
746
747fn collect_calc_body_errors(body: &CalcDefBody, errors: &mut Vec<ParseError>) {
748    if let CalcDefBody::Brace { elements } = body {
749        for element in elements {
750            if let CalcDefBodyElement::Error(n) = &element.value {
751                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
752            }
753        }
754    }
755}
756
757fn collect_view_def_body_errors(body: &ViewDefBody, errors: &mut Vec<ParseError>) {
758    if let ViewDefBody::Brace { elements } = body {
759        for element in elements {
760            if let ViewDefBodyElement::Error(n) = &element.value {
761                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
762            }
763        }
764    }
765}
766
767fn collect_view_body_errors(body: &ViewBody, errors: &mut Vec<ParseError>) {
768    if let ViewBody::Brace { elements } = body {
769        for element in elements {
770            if let ViewBodyElement::Error(n) = &element.value {
771                errors.push(parse_error_from_recovery_node(&element.span, &n.value));
772            }
773        }
774    }
775}
776
777fn collect_part_def_body_errors(body: &PartDefBody, errors: &mut Vec<ParseError>) {
778    if let PartDefBody::Brace { elements } = body {
779        for element in elements {
780            match &element.value {
781                PartDefBodyElement::Error(n) => {
782                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
783                }
784                PartDefBodyElement::PartUsage(n) => {
785                    collect_part_usage_body_errors(&n.value.body, errors)
786                }
787                PartDefBodyElement::Perform(n) => {
788                    collect_perform_body_errors(&n.value.body, errors)
789                }
790                _ => {}
791            }
792        }
793    }
794}
795
796fn collect_perform_body_errors(body: &crate::ast::PerformBody, _errors: &mut Vec<ParseError>) {
797    match body {
798        crate::ast::PerformBody::Semicolon => {}
799        crate::ast::PerformBody::Brace { .. } => {}
800    }
801}
802
803fn collect_part_usage_body_errors(body: &PartUsageBody, errors: &mut Vec<ParseError>) {
804    if let PartUsageBody::Brace { elements } = body {
805        for element in elements {
806            match &element.value {
807                PartUsageBodyElement::Error(n) => {
808                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
809                }
810                PartUsageBodyElement::PartUsage(n) => {
811                    collect_part_usage_body_errors(&n.value.body, errors)
812                }
813                PartUsageBodyElement::Perform(n) => {
814                    collect_perform_body_errors(&n.value.body, errors)
815                }
816                PartUsageBodyElement::StateUsage(n) => {
817                    collect_state_body_errors(&n.value.body, errors)
818                }
819                _ => {}
820            }
821        }
822    }
823}
824
825fn collect_package_body_errors(body: &PackageBody, errors: &mut Vec<ParseError>) {
826    if let PackageBody::Brace { elements } = body {
827        for element in elements {
828            match &element.value {
829                PackageBodyElement::Error(n) => {
830                    errors.push(parse_error_from_recovery_node(&element.span, &n.value));
831                }
832                PackageBodyElement::Package(n) => {
833                    collect_package_body_errors(&n.value.body, errors)
834                }
835                PackageBodyElement::LibraryPackage(n) => {
836                    collect_package_body_errors(&n.value.body, errors)
837                }
838                PackageBodyElement::PartDef(n) => {
839                    collect_part_def_body_errors(&n.value.body, errors)
840                }
841                PackageBodyElement::PartUsage(n) => {
842                    collect_part_usage_body_errors(&n.value.body, errors)
843                }
844                PackageBodyElement::ActionDef(n) => {
845                    collect_action_def_body_errors(&n.value.body, errors)
846                }
847                PackageBodyElement::ActionUsage(n) => {
848                    collect_action_usage_body_errors(&n.value.body, errors)
849                }
850                PackageBodyElement::RequirementDef(n) => {
851                    collect_requirement_body_errors(&n.value.body, errors)
852                }
853                PackageBodyElement::RequirementUsage(n) => {
854                    collect_requirement_body_errors(&n.value.body, errors)
855                }
856                PackageBodyElement::UseCaseDef(n) => {
857                    collect_use_case_body_errors(&n.value.body, errors)
858                }
859                PackageBodyElement::UseCaseUsage(n) => {
860                    collect_use_case_body_errors(&n.value.body, errors)
861                }
862                PackageBodyElement::ConcernUsage(n) => {
863                    collect_requirement_body_errors(&n.value.body, errors)
864                }
865                PackageBodyElement::StateDef(n) => collect_state_body_errors(&n.value.body, errors),
866                PackageBodyElement::StateUsage(n) => {
867                    collect_state_body_errors(&n.value.body, errors)
868                }
869                PackageBodyElement::ConstraintDef(n) => {
870                    collect_constraint_body_errors(&n.value.body, errors)
871                }
872                PackageBodyElement::CalcDef(n) => collect_calc_body_errors(&n.value.body, errors),
873                PackageBodyElement::ViewDef(n) => {
874                    collect_view_def_body_errors(&n.value.body, errors)
875                }
876                PackageBodyElement::ViewUsage(n) => collect_view_body_errors(&n.value.body, errors),
877                _ => {}
878            }
879        }
880    }
881}
882
883fn collect_recovery_errors(root: &RootNamespace) -> Vec<ParseError> {
884    let mut errors = Vec::new();
885    for element in &root.elements {
886        match &element.value {
887            crate::ast::RootElement::Package(n) => {
888                collect_package_body_errors(&n.value.body, &mut errors)
889            }
890            crate::ast::RootElement::LibraryPackage(n) => {
891                collect_package_body_errors(&n.value.body, &mut errors)
892            }
893            crate::ast::RootElement::Namespace(n) => {
894                collect_package_body_errors(&n.value.body, &mut errors)
895            }
896            crate::ast::RootElement::Import(_) => {}
897        }
898    }
899    errors
900}
901
902/// Parse full input; must consume entire input. Strips UTF-8 BOM if present.
903#[allow(clippy::result_large_err)]
904pub fn parse_root(input: &str) -> Result<RootNamespace, ParseError> {
905    let bytes = input
906        .strip_prefix('\u{FEFF}')
907        .map(str::as_bytes)
908        .unwrap_or_else(|| input.as_bytes());
909    let located = LocatedSpan::new(bytes);
910    match package::root_namespace(located) {
911        Ok((rest, root)) => {
912            if !rest.fragment().is_empty() && has_unclosed_brace(bytes) {
913                return Err(missing_closing_brace_error_at_eof(bytes));
914            }
915            if rest.fragment().is_empty() || is_only_trailing_closing_braces(rest) {
916                log::debug!("parse_root: success, {} top-level elements", root.elements.len());
917                Ok(root)
918            } else {
919                let offset = located.location_offset() + located.fragment().len() - rest.fragment().len();
920                let unconsumed = rest.fragment();
921                let first_80 = unconsumed.get(..80.min(unconsumed.len())).unwrap_or(unconsumed);
922                log::debug!(
923                    "parse_root: expected end of input; parsed {} elements; unconsumed len={}, offset={}, first 80 bytes: {:?}",
924                    root.elements.len(),
925                    unconsumed.len(),
926                    offset,
927                    first_80,
928                );
929                log::debug!(
930                    "parse_root: unconsumed as str: {:?}",
931                    String::from_utf8_lossy(first_80),
932                );
933                let (found_snippet, found_len) = fragment_to_found_snippet(rest.fragment());
934                let mut pe = ParseError::new("expected end of input")
935                    .with_location(offset, rest.location_line(), rest.get_column())
936                    .with_length(found_len.max(1))
937                    .with_code("expected_end_of_input");
938                if !found_snippet.is_empty() {
939                    pe = pe.with_found(found_snippet);
940                }
941                if root.elements.is_empty() && is_illegal_top_level_definition(rest.fragment()) {
942                    pe = pe
943                        .with_code("illegal_top_level_definition")
944                        .with_expected("'package', 'namespace', or 'import'")
945                        .with_suggestion(
946                            "Wrap this declaration in `package ... { ... }` or `namespace ... { ... }`.",
947                        );
948                    pe.message = "illegal top-level definition".to_string();
949                }
950                Err(pe)
951            }
952        }
953        Err(nom::Err::Error(e)) => Err(missing_closing_brace_error(bytes, e.input).unwrap_or_else(|| {
954            nom_err_to_parse_error(
955                &e,
956                None,
957                Some("'package', 'namespace', or 'import' at top level; or valid element in package body"),
958            )
959        })),
960        Err(nom::Err::Failure(e)) => Err(missing_closing_brace_error(bytes, e.input).unwrap_or_else(|| {
961            nom_err_to_parse_error(
962                &e,
963                None,
964                Some("'package', 'namespace', or 'import' at top level; or valid element in package body"),
965            )
966        })),
967        Err(nom::Err::Incomplete(_)) => Err(ParseError::new("unexpected end of input").with_code("unexpected_eof")),
968    }
969}
970
971const MAX_RECOVERY_ERRORS: usize = 100;
972
973/// Parse input with error recovery: collects multiple diagnostics and returns a partial AST when errors occur.
974/// Use this for language servers so the user sees all parse errors and features (e.g. hover) can use the partial AST.
975pub fn parse_with_diagnostics(input: &str) -> ParseResult {
976    let bytes = input
977        .strip_prefix('\u{FEFF}')
978        .map(str::as_bytes)
979        .unwrap_or_else(|| input.as_bytes());
980    let located = LocatedSpan::new(bytes);
981
982    let mut elements = Vec::new();
983    let mut errors = Vec::new();
984
985    let (mut input, _) = match lex::ws_and_comments(located) {
986        Ok(x) => x,
987        Err(_) => {
988            return ParseResult {
989                root: RootNamespace { elements: vec![] },
990                errors: vec![ParseError::new("invalid input").with_code("invalid_input")],
991            };
992        }
993    };
994
995    while errors.len() < MAX_RECOVERY_ERRORS {
996        // Skip leading ws/comments; if nothing left, we're done (avoids parsing "" as root_element).
997        let (rest, _) = lex::ws_and_comments(input).unwrap_or((input, ()));
998        input = rest;
999        if input.fragment().is_empty() {
1000            break;
1001        }
1002        match package::root_element(input) {
1003            Ok((rest, elem)) => {
1004                elements.push(elem);
1005                input = rest;
1006            }
1007            Err(nom::Err::Error(e)) | Err(nom::Err::Failure(e)) => {
1008                let pe = missing_closing_brace_error(bytes, e.input).unwrap_or_else(|| {
1009                    nom_err_to_parse_error(&e, None, Some("'package', 'namespace', or 'import'"))
1010                });
1011                errors.push(pe);
1012                let skip_result = lex::skip_to_next_sync_point(e.input);
1013                match skip_result {
1014                    Ok((rest, _)) => input = rest,
1015                    Err(_) => break,
1016                }
1017            }
1018            Err(nom::Err::Incomplete(_)) => {
1019                errors.push(
1020                    ParseError::new("unexpected end of input")
1021                        .with_location(
1022                            input.location_offset(),
1023                            input.location_line(),
1024                            input.get_column(),
1025                        )
1026                        .with_length(1)
1027                        .with_code("unexpected_eof"),
1028                );
1029                break;
1030            }
1031        }
1032    }
1033
1034    let (input, _) = lex::ws_and_comments(input).unwrap_or((input, ()));
1035
1036    if input.fragment().is_empty()
1037        && has_unclosed_brace(bytes)
1038        && !errors
1039            .iter()
1040            .any(|e| e.code.as_deref() == Some("missing_closing_brace"))
1041    {
1042        errors.push(missing_closing_brace_error_at_eof(bytes));
1043    }
1044
1045    if !input.fragment().is_empty() {
1046        let (found_snippet, found_len) = fragment_to_found_snippet(input.fragment());
1047        let mut pe = ParseError::new("expected end of input")
1048            .with_location(
1049                input.location_offset(),
1050                input.location_line(),
1051                input.get_column(),
1052            )
1053            .with_length(found_len.max(1))
1054            .with_code("expected_end_of_input");
1055        if !found_snippet.is_empty() {
1056            pe = pe.with_found(found_snippet);
1057        }
1058        errors.push(pe);
1059    }
1060
1061    errors.extend(collect_recovery_errors(&RootNamespace {
1062        elements: elements.clone(),
1063    }));
1064    errors.sort_by_key(|e| (e.offset.unwrap_or(usize::MAX), e.line.unwrap_or(u32::MAX)));
1065
1066    ParseResult {
1067        root: RootNamespace { elements },
1068        errors,
1069    }
1070}