1mod action;
11mod alias;
12mod allocation;
13mod attribute;
14mod case;
15mod connection;
16mod constraint;
17mod dependency;
18mod enumeration;
19mod expr;
20mod flow;
21mod import;
22mod individual;
23mod interface;
24mod item;
25mod lex;
26mod metadata;
27mod metadata_annotation;
28mod occurrence;
29mod package;
30mod part;
31mod port;
32mod requirement;
33mod span;
34mod state;
35mod usecase;
36mod view;
37
38pub(crate) use span::{node_from_to, span_from_to, with_span, Input};
39
40use crate::ast::{
41 ActionDefBody, ActionDefBodyElement, ActionUsageBody, ActionUsageBodyElement, CalcDefBody,
42 CalcDefBodyElement, ConstraintDefBody, ConstraintDefBodyElement, PackageBody,
43 PackageBodyElement, ParseErrorNode, PartDefBody, PartDefBodyElement, PartUsageBody,
44 PartUsageBodyElement, RequirementDefBody, RequirementDefBodyElement, RootNamespace,
45 StateDefBody, StateDefBodyElement, UseCaseDefBody, UseCaseDefBodyElement, ViewBody,
46 ViewBodyElement, ViewDefBody, ViewDefBodyElement,
47};
48use crate::error::{DiagnosticCategory, DiagnosticSeverity, ParseError};
49use nom::error::Error;
50use nom_locate::LocatedSpan;
51
52#[derive(Debug, Clone)]
54pub struct ParseResult {
55 pub root: RootNamespace,
57 pub errors: Vec<ParseError>,
59}
60
61impl ParseResult {
62 pub fn is_ok(&self) -> bool {
64 self.errors.is_empty()
65 }
66}
67
68const FOUND_SNIPPET_MAX_LEN: usize = 40;
69const ILLEGAL_TOP_LEVEL_STARTERS: &[&[u8]] = &[
70 b"action",
71 b"actor",
72 b"alias",
73 b"allocate",
74 b"allocation",
75 b"attribute",
76 b"bind",
77 b"calc",
78 b"case",
79 b"concern",
80 b"connection",
81 b"constraint",
82 b"dependency",
83 b"enum",
84 b"flow",
85 b"interface",
86 b"item",
87 b"metadata",
88 b"occurrence",
89 b"part",
90 b"perform",
91 b"port",
92 b"ref",
93 b"require",
94 b"requirement",
95 b"satisfy",
96 b"state",
97 b"use",
98 b"verification",
99 b"view",
100 b"viewpoint",
101];
102
103fn fragment_to_found_snippet(fragment: &[u8]) -> (String, usize) {
106 let take = fragment
107 .iter()
108 .position(|&b| b == b'\n' || b == b'\r')
109 .map(|p| p.min(FOUND_SNIPPET_MAX_LEN))
110 .unwrap_or_else(|| fragment.len().min(FOUND_SNIPPET_MAX_LEN));
111 let slice = fragment.get(..take).unwrap_or(fragment);
112 let s = String::from_utf8_lossy(slice)
113 .replace('\n', "\\n")
114 .replace('\r', "\\r");
115 let len = slice.len();
116 (s.trim_end().to_string(), len)
117}
118
119pub(crate) fn recovery_found_snippet(input: Input<'_>) -> Option<String> {
120 let frag = input.fragment();
121 let take = frag
122 .iter()
123 .position(|&b| b == b'\n' || b == b'\r')
124 .unwrap_or(frag.len())
125 .min(60);
126 let snippet = String::from_utf8_lossy(&frag[..take]).trim().to_string();
127 if snippet.is_empty() {
128 None
129 } else {
130 Some(snippet)
131 }
132}
133
134fn recovery_found_snippet_from_span(input: Input<'_>, recovery_end: Input<'_>) -> Option<String> {
135 let consumed_len = recovery_end
136 .location_offset()
137 .saturating_sub(input.location_offset())
138 .min(input.fragment().len());
139 if consumed_len == 0 {
140 return recovery_found_snippet(input);
141 }
142 let frag = &input.fragment()[..consumed_len];
143 let take = frag
144 .iter()
145 .position(|&b| b == b'\n' || b == b'\r')
146 .unwrap_or(frag.len())
147 .min(60);
148 let snippet = String::from_utf8_lossy(&frag[..take]).trim().to_string();
149 if snippet.is_empty() {
150 recovery_found_snippet(input)
151 } else {
152 Some(snippet)
153 }
154}
155
156fn nom_error_kind_to_message(code: &nom::error::ErrorKind) -> &'static str {
158 use nom::error::ErrorKind;
159 match code {
160 ErrorKind::Tag => "expected keyword or token",
161 ErrorKind::Digit => "expected number",
162 ErrorKind::Alpha => "expected identifier",
163 ErrorKind::AlphaNumeric => "expected identifier",
164 ErrorKind::Space => "expected whitespace",
165 ErrorKind::MultiSpace => "expected whitespace",
166 ErrorKind::Eof => "unexpected end of input",
167 ErrorKind::TakeUntil => "expected terminator",
168 ErrorKind::TakeWhile1 => "expected token",
169 ErrorKind::Alt => {
170 "expected package, import, part, port, interface, alias, attribute, or action"
171 }
172 ErrorKind::Many0 | ErrorKind::Many1 => "expected list of elements",
173 _ => "parse error",
174 }
175}
176
177fn nom_error_kind_to_code(code: &nom::error::ErrorKind) -> &'static str {
179 use nom::error::ErrorKind;
180 match code {
181 ErrorKind::Tag => "expected_keyword",
182 ErrorKind::Digit => "expected_number",
183 ErrorKind::Alpha | ErrorKind::AlphaNumeric => "expected_identifier",
184 ErrorKind::Space | ErrorKind::MultiSpace => "expected_whitespace",
185 ErrorKind::Eof => "unexpected_eof",
186 ErrorKind::TakeUntil => "expected_terminator",
187 ErrorKind::TakeWhile1 => "expected_token",
188 ErrorKind::Alt => "expected_alt",
189 ErrorKind::Many0 | ErrorKind::Many1 => "expected_list",
190 _ => "parse_error",
191 }
192}
193
194fn nom_err_to_parse_error(
195 e: &Error<Input<'_>>,
196 length_override: Option<usize>,
197 expected_context: Option<&'static str>,
198) -> ParseError {
199 let offset = e.input.location_offset();
200 let line = e.input.location_line();
201 let column = e.input.get_column();
202 let fragment = e.input.fragment();
203 let (found_snippet, found_len) = fragment_to_found_snippet(fragment);
204 let message = nom_error_kind_to_message(&e.code).to_string();
205 let span_len = length_override.unwrap_or(found_len).max(1);
206 if trim_ascii_start(fragment).starts_with(b"}") {
207 return unexpected_closing_brace_parse_error(e.input);
208 }
209 let mut pe = ParseError::new(message)
210 .with_location(offset, line, column)
211 .with_length(span_len)
212 .with_code(nom_error_kind_to_code(&e.code))
213 .with_severity(DiagnosticSeverity::Error)
214 .with_category(DiagnosticCategory::ParseError);
215 if !found_snippet.is_empty() {
216 pe = pe.with_found(found_snippet);
217 }
218 if let Some(ctx) = expected_context {
219 pe = pe.with_expected(ctx);
220 }
221 let at_root = expected_context.is_some_and(|ctx| {
222 ctx.contains("'package', 'namespace', or 'import'") || ctx.contains("top level")
223 });
224 if at_root && is_illegal_top_level_definition(fragment) {
225 pe.message = "illegal top-level definition".to_string();
226 pe.code = Some("illegal_top_level_definition".to_string());
227 pe.expected = Some("'package', 'namespace', or 'import'".to_string());
228 pe.suggestion = Some(
229 "Wrap this declaration in `package ... { ... }` or `namespace ... { ... }`."
230 .to_string(),
231 );
232 }
233 pe
234}
235
236fn is_illegal_top_level_definition(fragment: &[u8]) -> bool {
237 let trimmed = trim_ascii_start(fragment);
238 !trimmed.starts_with(b"}")
239 && !trimmed.starts_with(b"//")
240 && !trimmed.starts_with(b"/*")
241 && lex::starts_with_any_keyword(trimmed, ILLEGAL_TOP_LEVEL_STARTERS)
242}
243
244fn trim_ascii_start(mut fragment: &[u8]) -> &[u8] {
245 while let Some(first) = fragment.first() {
246 if first.is_ascii_whitespace() {
247 fragment = &fragment[1..];
248 continue;
249 }
250 break;
251 }
252 fragment
253}
254
255fn starts_with_missing_name_after_keyword(
256 fragment: &[u8],
257 keyword: &[u8],
258 trailing_keywords: &[&[u8]],
259) -> bool {
260 let mut fragment = trim_ascii_start(fragment);
261 if !lex::starts_with_keyword(fragment, keyword) {
262 return false;
263 }
264 fragment = &fragment[keyword.len()..];
265 while let Some(first) = fragment.first() {
266 if first.is_ascii_whitespace() {
267 fragment = &fragment[1..];
268 continue;
269 }
270 break;
271 }
272 for trailing in trailing_keywords {
273 if lex::starts_with_keyword(fragment, trailing) {
274 fragment = &fragment[trailing.len()..];
275 while let Some(first) = fragment.first() {
276 if first.is_ascii_whitespace() {
277 fragment = &fragment[1..];
278 continue;
279 }
280 break;
281 }
282 }
283 }
284 fragment.starts_with(b":")
285}
286
287fn starts_with_missing_type_after_keyword(
288 fragment: &[u8],
289 keyword: &[u8],
290 trailing_keywords: &[&[u8]],
291) -> bool {
292 let mut fragment = trim_ascii_start(fragment);
293 if !lex::starts_with_keyword(fragment, keyword) {
294 return false;
295 }
296 fragment = &fragment[keyword.len()..];
297 while let Some(first) = fragment.first() {
298 if first.is_ascii_whitespace() {
299 fragment = &fragment[1..];
300 continue;
301 }
302 break;
303 }
304 for trailing in trailing_keywords {
305 if lex::starts_with_keyword(fragment, trailing) {
306 fragment = &fragment[trailing.len()..];
307 while let Some(first) = fragment.first() {
308 if first.is_ascii_whitespace() {
309 fragment = &fragment[1..];
310 continue;
311 }
312 break;
313 }
314 }
315 }
316
317 let mut name_len = 0usize;
318 while name_len < fragment.len()
319 && (fragment[name_len].is_ascii_alphanumeric() || fragment[name_len] == b'_')
320 {
321 name_len += 1;
322 }
323 if name_len == 0 {
324 return false;
325 }
326 fragment = &fragment[name_len..];
327 while let Some(first) = fragment.first() {
328 if first.is_ascii_whitespace() {
329 fragment = &fragment[1..];
330 continue;
331 }
332 break;
333 }
334 if !fragment.starts_with(b":") {
335 return false;
336 }
337 fragment = &fragment[1..];
338 while let Some(first) = fragment.first() {
339 if first.is_ascii_whitespace() {
340 fragment = &fragment[1..];
341 continue;
342 }
343 break;
344 }
345
346 fragment.is_empty()
347 || fragment.starts_with(b";")
348 || fragment.starts_with(b"{")
349 || fragment.starts_with(b"}")
350 || lex::starts_with_keyword(fragment, b"then")
351 || lex::starts_with_keyword(fragment, b"if")
352 || lex::starts_with_keyword(fragment, b"do")
353}
354
355fn missing_name_diagnostic(fragment: &[u8]) -> Option<(&'static str, String, String, String)> {
356 #[allow(clippy::type_complexity)]
357 let cases: &[(&[u8], &[&[u8]], &str, &str)] = &[
358 (
359 b"subject",
360 &[],
361 "subject name",
362 "Use `subject laptop: Laptop;`.",
363 ),
364 (b"actor", &[], "actor name", "Use `actor user: User;`."),
365 (b"state", &[], "state name", "Use `state ready: Mode;`."),
366 (b"part", &[], "part name", "Use `part wheel: Wheel;`."),
367 (b"ref", &[], "reference name", "Use `ref sensor: Sensor;`."),
368 (b"port", &[], "port name", "Use `port power: PowerPort;`."),
369 (
370 b"attribute",
371 &[],
372 "attribute name",
373 "Use `attribute mass: MassValue;`.",
374 ),
375 (b"in", &[], "input name", "Use `in speed: Real;`."),
376 (b"out", &[], "output name", "Use `out result: Real;`."),
377 (
378 b"perform",
379 &[b"action"],
380 "action name",
381 "Use `perform action run: Runner;`.",
382 ),
383 (b"return", &[], "return name", "Use `return result: Real;`."),
384 ];
385
386 for (keyword, trailing, missing_what, suggestion) in cases {
387 if starts_with_missing_name_after_keyword(fragment, keyword, trailing) {
388 return Some((
389 "missing_member_name",
390 format!("expected {missing_what} before ':'"),
391 format!("{missing_what} before ':'"),
392 suggestion.to_string(),
393 ));
394 }
395 }
396 None
397}
398
399fn missing_type_diagnostic(fragment: &[u8]) -> Option<(&'static str, String, String, String)> {
400 #[allow(clippy::type_complexity)]
401 let cases: &[(&[u8], &[&[u8]], &str)] = &[
402 (b"subject", &[], "subject type"),
403 (b"actor", &[], "actor type"),
404 (b"state", &[], "state type"),
405 (b"part", &[], "part type"),
406 (b"ref", &[], "reference type"),
407 (b"port", &[], "port type"),
408 (b"attribute", &[], "attribute type"),
409 (b"in", &[], "input type"),
410 (b"out", &[], "output type"),
411 (b"perform", &[b"action"], "action type"),
412 (b"return", &[], "return type"),
413 ];
414
415 for &(keyword, trailing, missing_what) in cases {
416 if starts_with_missing_type_after_keyword(fragment, keyword, trailing) {
417 let keyword_label = String::from_utf8_lossy(keyword);
418 let sample_name = if keyword == &b"subject"[..] {
419 "laptop"
420 } else if keyword == &b"actor"[..] {
421 "user"
422 } else if keyword == &b"state"[..] {
423 "ready"
424 } else if keyword == &b"part"[..] {
425 "wheel"
426 } else if keyword == &b"ref"[..] {
427 "sensor"
428 } else if keyword == &b"port"[..] {
429 "power"
430 } else if keyword == &b"attribute"[..] {
431 "mass"
432 } else if keyword == &b"in"[..] {
433 "speed"
434 } else if keyword == &b"out"[..] {
435 "result"
436 } else if keyword == &b"perform"[..] {
437 "run"
438 } else if keyword == &b"return"[..] {
439 "result"
440 } else {
441 "member"
442 };
443 let sample_type = if keyword == &b"subject"[..] {
444 "Laptop"
445 } else if keyword == &b"actor"[..] {
446 "User"
447 } else if keyword == &b"state"[..] {
448 "Mode"
449 } else if keyword == &b"part"[..] {
450 "Wheel"
451 } else if keyword == &b"ref"[..] {
452 "Sensor"
453 } else if keyword == &b"port"[..] {
454 "PowerPort"
455 } else if keyword == &b"attribute"[..] {
456 "MassValue"
457 } else if keyword == &b"in"[..] || keyword == &b"out"[..] {
458 "Real"
459 } else if keyword == &b"perform"[..] {
460 "Runner"
461 } else if keyword == &b"return"[..] {
462 "Real"
463 } else {
464 "Type"
465 };
466 let suggestion = if keyword == &b"perform"[..] {
467 format!("Use `perform action {sample_name}: {sample_type};`.")
468 } else if keyword == &b"return"[..] {
469 format!("Use `return {sample_name}: {sample_type};`.")
470 } else {
471 format!("Use `{keyword_label} {sample_name}: {sample_type};`.")
472 };
473 return Some((
474 "missing_type_reference",
475 format!("expected {missing_what} after ':'"),
476 format!("{missing_what} after ':'"),
477 suggestion,
478 ));
479 }
480 }
481 None
482}
483
484fn invalid_expose_separator_diagnostic(
485 fragment: &[u8],
486) -> Option<(&'static str, String, String, String)> {
487 let mut fragment = trim_ascii_start(fragment);
488 if !lex::starts_with_keyword(fragment, b"expose") {
489 return None;
490 }
491 fragment = &fragment[b"expose".len()..];
492 while let Some(first) = fragment.first() {
493 if first.is_ascii_whitespace() {
494 fragment = &fragment[1..];
495 continue;
496 }
497 break;
498 }
499 if fragment.is_empty() {
500 return None;
501 }
502
503 let mut saw_dot = false;
504 let mut in_quoted_name = false;
505 for &b in fragment {
506 if b == b'\'' {
507 in_quoted_name = !in_quoted_name;
508 continue;
509 }
510 if in_quoted_name {
511 continue;
512 }
513 if matches!(b, b';' | b'[' | b'{' | b'}' | b'\n' | b'\r') {
514 break;
515 }
516 if b == b'.' {
517 saw_dot = true;
518 break;
519 }
520 }
521 if !saw_dot {
522 return None;
523 }
524
525 Some((
526 "invalid_qualified_name_separator",
527 "invalid qualified name in expose target: use '::' instead of '.'".to_string(),
528 "qualified name segments separated by '::'".to_string(),
529 "Replace '.' with '::' in the expose target (example: `expose A::B;`).".to_string(),
530 ))
531}
532
533fn missing_semicolon_or_body_diagnostic(
534 fragment: &[u8],
535) -> Option<(&'static str, String, String, String)> {
536 let fragment = trim_ascii_start(fragment);
537 let cases: &[(&[u8], &str, &str)] = &[
538 (
539 b"action def",
540 "action definition",
541 "Use `action def Run;` or `action def Run { ... }`.",
542 ),
543 (
544 b"part def",
545 "part definition",
546 "Use `part def Wheel;` or `part def Wheel { ... }`.",
547 ),
548 (
549 b"requirement def",
550 "requirement definition",
551 "Use `requirement def R;` or `requirement def R { ... }`.",
552 ),
553 (
554 b"state def",
555 "state definition",
556 "Use `state def Ready;` or `state def Ready { ... }`.",
557 ),
558 (
559 b"view",
560 "view declaration",
561 "Use `view structure: GeneralView;` or `view structure: GeneralView { ... }`.",
562 ),
563 (
564 b"rendering def",
565 "rendering definition",
566 "Use `rendering def Diagram;` or `rendering def Diagram { ... }`.",
567 ),
568 ];
569
570 for (prefix, label, suggestion) in cases {
571 if fragment.starts_with(prefix) {
572 return Some((
573 "missing_body_or_semicolon",
574 format!("expected ';' or '{{' after {label} header"),
575 "';' or '{' after declaration header".to_string(),
576 suggestion.to_string(),
577 ));
578 }
579 }
580 None
581}
582
583fn invalid_typing_operator_diagnostic(
584 fragment: &[u8],
585) -> Option<(&'static str, String, String, String)> {
586 let fragment = trim_ascii_start(fragment);
587 let cases: &[(&[u8], &str, &str)] = &[
588 (
589 b"part def",
590 "part definition specialization",
591 "Use `part def Vehicle :> BaseVehicle;` when specializing a definition.",
592 ),
593 (
594 b"port def",
595 "port definition specialization",
596 "Use `port def PowerPort :> BasePort;` when specializing a definition.",
597 ),
598 ];
599
600 for (prefix, label, suggestion) in cases {
601 if fragment.starts_with(prefix) && fragment.windows(3).any(|w| w == b": ") {
602 return Some((
603 "invalid_typing_operator",
604 format!("invalid typing operator in {label}: use ':>' instead of ':'"),
605 "':>' specialization operator".to_string(),
606 suggestion.to_string(),
607 ));
608 }
609 }
610
611 if fragment.starts_with(b"part def")
612 && fragment.contains(&b':')
613 && !fragment.windows(2).any(|w| w == b":>")
614 {
615 return Some((
616 "invalid_typing_operator",
617 "invalid typing operator in part definition: use ':>' instead of ':'".to_string(),
618 "':>' specialization operator".to_string(),
619 "Use `part def Vehicle :> BaseVehicle;` when specializing a definition.".to_string(),
620 ));
621 }
622
623 None
624}
625
626fn missing_expression_after_operator_diagnostic(
627 fragment: &[u8],
628) -> Option<(&'static str, String, String, String)> {
629 let fragment = trim_ascii_start(fragment);
630 let cases: &[(&[u8], &str, &str)] = &[
631 (
632 b"bind",
633 "binding expression after '='",
634 "Use `bind x = y;`.",
635 ),
636 (
637 b"assign",
638 "assignment expression after ':='",
639 "Use `assign x := y;`.",
640 ),
641 (
642 b"first",
643 "target after 'then'",
644 "Use `first start then finish;`.",
645 ),
646 (
647 b"flow",
648 "target after 'to'",
649 "Use `flow source to target;`.",
650 ),
651 (
652 b"satisfy",
653 "target after 'by'",
654 "Use `satisfy Req by implementation;`.",
655 ),
656 ];
657
658 for (keyword, expected, suggestion) in cases {
659 if !lex::starts_with_keyword(fragment, keyword) {
660 continue;
661 }
662 let text = String::from_utf8_lossy(fragment);
663 if text.contains("= ;") || text.trim_end().ends_with('=') {
664 return Some((
665 "missing_expression_after_operator",
666 "expected expression after '='".to_string(),
667 expected.to_string(),
668 suggestion.to_string(),
669 ));
670 }
671 if text.contains(":= ;") || text.trim_end().ends_with(":=") {
672 return Some((
673 "missing_expression_after_operator",
674 "expected expression after ':='".to_string(),
675 expected.to_string(),
676 suggestion.to_string(),
677 ));
678 }
679 if text.contains(" then ;") || text.trim_end().ends_with(" then") {
680 return Some((
681 "missing_expression_after_operator",
682 "expected target after 'then'".to_string(),
683 expected.to_string(),
684 suggestion.to_string(),
685 ));
686 }
687 if text.contains(" to ;") || text.trim_end().ends_with(" to") {
688 return Some((
689 "missing_expression_after_operator",
690 "expected target after 'to'".to_string(),
691 expected.to_string(),
692 suggestion.to_string(),
693 ));
694 }
695 if text.contains(" by ;") || text.trim_end().ends_with(" by") {
696 return Some((
697 "missing_expression_after_operator",
698 "expected target after 'by'".to_string(),
699 expected.to_string(),
700 suggestion.to_string(),
701 ));
702 }
703 }
704 None
705}
706
707fn invalid_unit_reference_diagnostic(
708 fragment: &[u8],
709) -> Option<(&'static str, String, String, String)> {
710 let fragment = trim_ascii_start(fragment);
711 let text = String::from_utf8_lossy(fragment);
712 if !(text.contains('[') && text.contains(']')) {
713 return None;
714 }
715
716 if text.contains("[]") || text.contains("[ ]") {
717 return Some((
718 "invalid_unit_reference",
719 "expected unit name inside '[ ]'".to_string(),
720 "unit name inside '[ ]'".to_string(),
721 "Use a concrete unit such as `1750 [kg]`.".to_string(),
722 ));
723 }
724
725 if text.contains("[;")
726 || text.contains("[ ;")
727 || text.contains("[)")
728 || text.contains("[ ]")
729 || text.contains("[,")
730 {
731 return Some((
732 "invalid_unit_reference",
733 "invalid unit expression inside '[ ]'".to_string(),
734 "unit name inside '[ ]'".to_string(),
735 "Use a unit symbol or qualified unit name (example: `[kg]` or `[SI::kg]`).".to_string(),
736 ));
737 }
738
739 None
740}
741
742fn unexpected_keyword_in_scope_diagnostic(
743 fragment: &[u8],
744 starters: &[&[u8]],
745 scope_label: &str,
746) -> Option<(&'static str, String, String, String)> {
747 let fragment = trim_ascii_start(fragment);
748 if fragment.is_empty() || fragment.starts_with(b"#") || fragment.starts_with(b"@") {
749 return None;
750 }
751 let keyword_end = fragment
752 .iter()
753 .position(|b| !b.is_ascii_alphanumeric() && *b != b'_')
754 .unwrap_or(fragment.len());
755 if keyword_end == 0 {
756 return None;
757 }
758 let keyword = &fragment[..keyword_end];
759 if lex::starts_with_any_keyword(keyword, starters) {
760 return None;
761 }
762 let keyword_text = String::from_utf8_lossy(keyword);
763 Some((
764 "unexpected_keyword_in_scope",
765 format!("unexpected keyword `{keyword_text}` in {scope_label}"),
766 format!("valid {scope_label} element"),
767 format!("Replace `{keyword_text}` with a valid {scope_label} member or remove it."),
768 ))
769}
770
771fn invalid_bare_identifier_in_body_diagnostic(
772 fragment: &[u8],
773 scope_label: &str,
774) -> Option<(&'static str, String, String, String)> {
775 let is_action = scope_label.contains("action body");
776 let is_state = scope_label.contains("state body");
777 if !is_action && !is_state {
778 return None;
779 }
780
781 let fragment = trim_ascii_start(fragment);
782 let ident_end = fragment
783 .iter()
784 .position(|b| !b.is_ascii_alphanumeric() && *b != b'_')
785 .unwrap_or(fragment.len());
786 if ident_end == 0 || !fragment[0].is_ascii_alphabetic() {
787 return None;
788 }
789
790 let ident = &fragment[..ident_end];
791 let rest = trim_ascii_start(&fragment[ident_end..]);
792 if !(rest.starts_with(b";")
793 || rest.starts_with(b"}")
794 || rest.starts_with(b"\n")
795 || rest.starts_with(b"\r"))
796 {
797 return None;
798 }
799
800 let ident_text = String::from_utf8_lossy(ident);
801 if is_action {
802 Some((
803 "invalid_bare_identifier_in_action_body",
804 format!("bare identifier `{ident_text}` is not a valid action body member"),
805 "action body member such as `perform`, `bind`, `in`, or `out`".to_string(),
806 format!(
807 "Use an explicit action-body form, for example `perform {ident_text};`, `bind ... = ...;`, or an `in`/`out` parameter declaration."
808 ),
809 ))
810 } else {
811 Some((
812 "invalid_bare_identifier_in_state_body",
813 format!("bare identifier `{ident_text}` is not a valid state body member"),
814 "state body member such as `entry`, `transition`, `then`, `state`, or `ref`"
815 .to_string(),
816 format!(
817 "Use an explicit state-body form, for example `then {ident_text};`, `transition ...;`, or a nested `state` member."
818 ),
819 ))
820 }
821}
822
823fn unexpected_closing_brace_parse_error(input: Input<'_>) -> ParseError {
824 ParseError::new("unexpected closing '}'")
825 .with_location(
826 input.location_offset(),
827 input.location_line(),
828 input.get_column(),
829 )
830 .with_length(1)
831 .with_code("unexpected_closing_brace")
832 .with_expected("valid declaration or end of current body")
833 .with_found("}")
834 .with_suggestion("Remove this '}' or add the missing opening '{' before it.")
835 .with_severity(DiagnosticSeverity::Error)
836 .with_category(DiagnosticCategory::ParseError)
837}
838
839fn missing_closing_brace_error(bytes: &[u8], input: Input<'_>) -> Option<ParseError> {
840 if !input.fragment().is_empty() {
841 return None;
842 }
843 let consumed = &bytes[..input.location_offset().min(bytes.len())];
844 let opens = consumed.iter().filter(|&&b| b == b'{').count();
845 let closes = consumed.iter().filter(|&&b| b == b'}').count();
846 if opens <= closes {
847 return None;
848 }
849 Some(missing_closing_brace_error_at_eof(consumed))
850}
851
852fn missing_closing_brace_error_at_eof(bytes: &[u8]) -> ParseError {
853 let (line, column) = eof_line_column(bytes);
854 ParseError::new("missing closing '}'")
855 .with_location(bytes.len(), line, column)
856 .with_length(1)
857 .with_code("missing_closing_brace")
858 .with_expected("'}'")
859 .with_suggestion("Add '}' to close the open body.")
860 .with_category(DiagnosticCategory::ParseError)
861}
862
863fn category_from_code(code: &str) -> DiagnosticCategory {
864 if code == "unsupported_annotation_syntax" {
865 DiagnosticCategory::UnsupportedGrammarForm
866 } else if code == "unresolved_symbol" {
867 DiagnosticCategory::UnresolvedSymbol
868 } else {
869 DiagnosticCategory::ParseError
870 }
871}
872
873fn has_unclosed_brace(bytes: &[u8]) -> bool {
874 let opens = bytes.iter().filter(|&&b| b == b'{').count();
875 let closes = bytes.iter().filter(|&&b| b == b'}').count();
876 opens > closes
877}
878
879fn eof_line_column(bytes: &[u8]) -> (u32, usize) {
880 let mut line = 1u32;
881 let mut column = 1usize;
882 for &b in bytes {
883 if b == b'\n' {
884 line += 1;
885 column = 1;
886 } else {
887 column += 1;
888 }
889 }
890 (line, column)
891}
892
893pub(crate) fn build_recovery_error_node(
894 input: Input<'_>,
895 starters: &[&[u8]],
896 scope_label: &str,
897 generic_code: &str,
898) -> ParseErrorNode {
899 build_recovery_error_node_from_span(input, input, starters, scope_label, generic_code)
900}
901
902enum RecoveryClassification {
903 MissingMemberName {
904 code: String,
905 message: String,
906 expected: String,
907 suggestion: String,
908 },
909 MissingTypeReference {
910 code: String,
911 message: String,
912 expected: String,
913 suggestion: String,
914 },
915 InvalidQualifiedNameSeparator {
916 code: String,
917 message: String,
918 expected: String,
919 suggestion: String,
920 },
921 MissingBodyOrSemicolon {
922 code: String,
923 message: String,
924 expected: String,
925 suggestion: String,
926 },
927 MissingExpressionAfterOperator {
928 code: String,
929 message: String,
930 expected: String,
931 suggestion: String,
932 },
933 InvalidUnitReference {
934 code: String,
935 message: String,
936 expected: String,
937 suggestion: String,
938 },
939 InvalidTypingOperator {
940 code: String,
941 message: String,
942 expected: String,
943 suggestion: String,
944 },
945 InvalidBareIdentifierInBody {
946 code: String,
947 message: String,
948 expected: String,
949 suggestion: String,
950 },
951 UnexpectedKeywordInScope {
952 code: String,
953 message: String,
954 expected: String,
955 suggestion: String,
956 },
957 MissingSemicolon,
958 UnsupportedAnnotation,
959 Unexpected,
960}
961
962fn trim_ascii_end(mut fragment: &[u8]) -> &[u8] {
963 while let Some(last) = fragment.last() {
964 if last.is_ascii_whitespace() {
965 fragment = &fragment[..fragment.len() - 1];
966 } else {
967 break;
968 }
969 }
970 fragment
971}
972
973fn classify_recovery(
974 input: Input<'_>,
975 recovery_end: Input<'_>,
976 starters: &[&[u8]],
977 scope_label: &str,
978) -> RecoveryClassification {
979 let trimmed = trim_ascii_start(input.fragment());
980
981 if let Some((code, message, expected, suggestion)) = missing_name_diagnostic(trimmed) {
982 return RecoveryClassification::MissingMemberName {
983 code: code.to_string(),
984 message,
985 expected,
986 suggestion,
987 };
988 }
989
990 if let Some((code, message, expected, suggestion)) = missing_type_diagnostic(trimmed) {
991 return RecoveryClassification::MissingTypeReference {
992 code: code.to_string(),
993 message,
994 expected,
995 suggestion,
996 };
997 }
998
999 if let Some((code, message, expected, suggestion)) =
1000 invalid_expose_separator_diagnostic(trimmed)
1001 {
1002 return RecoveryClassification::InvalidQualifiedNameSeparator {
1003 code: code.to_string(),
1004 message,
1005 expected,
1006 suggestion,
1007 };
1008 }
1009
1010 if let Some((code, message, expected, suggestion)) = invalid_typing_operator_diagnostic(trimmed)
1011 {
1012 return RecoveryClassification::InvalidTypingOperator {
1013 code: code.to_string(),
1014 message,
1015 expected,
1016 suggestion,
1017 };
1018 }
1019
1020 if let Some((code, message, expected, suggestion)) =
1021 missing_expression_after_operator_diagnostic(trimmed)
1022 {
1023 return RecoveryClassification::MissingExpressionAfterOperator {
1024 code: code.to_string(),
1025 message,
1026 expected,
1027 suggestion,
1028 };
1029 }
1030
1031 if let Some((code, message, expected, suggestion)) = invalid_unit_reference_diagnostic(trimmed)
1032 {
1033 return RecoveryClassification::InvalidUnitReference {
1034 code: code.to_string(),
1035 message,
1036 expected,
1037 suggestion,
1038 };
1039 }
1040
1041 if let Some((code, message, expected, suggestion)) =
1042 missing_semicolon_or_body_diagnostic(trimmed)
1043 {
1044 return RecoveryClassification::MissingBodyOrSemicolon {
1045 code: code.to_string(),
1046 message,
1047 expected,
1048 suggestion,
1049 };
1050 }
1051
1052 let consumed_len = recovery_end
1053 .location_offset()
1054 .saturating_sub(input.location_offset())
1055 .min(input.fragment().len());
1056 let raw_consumed = &input.fragment()[..consumed_len];
1057 let consumed = trim_ascii_end(raw_consumed);
1058 let recovered_to_boundary = recovery_end.location_offset() > input.location_offset() && {
1059 let (next, _) = lex::ws_and_comments(recovery_end).unwrap_or((recovery_end, ()));
1060 next.fragment().is_empty()
1061 || next.fragment().starts_with(b"}")
1062 || lex::starts_with_any_keyword(next.fragment(), starters)
1063 };
1064
1065 let consumed_has_newline = raw_consumed.contains(&b'\n') || raw_consumed.contains(&b'\r');
1066 let first_line_end = consumed
1067 .iter()
1068 .position(|b| matches!(*b, b'\n' | b'\r'))
1069 .unwrap_or(consumed.len());
1070 let first_line = trim_ascii_end(&consumed[..first_line_end]);
1071 let consumed_has_delimiters = consumed
1072 .iter()
1073 .any(|b| matches!(*b, b'{' | b'}' | b'(' | b')' | b'[' | b']'));
1074 let consumed_ends_incomplete = first_line.last().is_some_and(|b| {
1075 matches!(
1076 *b,
1077 b':' | b'=' | b',' | b'.' | b'+' | b'-' | b'*' | b'/' | b'>' | b'<' | b'|'
1078 )
1079 });
1080 let first_line_has_semicolon = first_line.contains(&b';');
1081 if recovered_to_boundary
1082 && lex::starts_with_any_keyword(trimmed, starters)
1083 && (consumed_has_newline || recovery_end.fragment().starts_with(b"}"))
1084 && !consumed.is_empty()
1085 && !consumed_has_delimiters
1086 && !consumed_ends_incomplete
1087 && !first_line_has_semicolon
1088 {
1089 return RecoveryClassification::MissingSemicolon;
1090 }
1091
1092 if lex::starts_with_keyword(trimmed, b"#") || lex::starts_with_keyword(trimmed, b"@") {
1093 return RecoveryClassification::UnsupportedAnnotation;
1094 }
1095
1096 if let Some((code, message, expected, suggestion)) =
1097 invalid_bare_identifier_in_body_diagnostic(trimmed, scope_label)
1098 {
1099 return RecoveryClassification::InvalidBareIdentifierInBody {
1100 code: code.to_string(),
1101 message,
1102 expected,
1103 suggestion,
1104 };
1105 }
1106
1107 if let Some((code, message, expected, suggestion)) =
1108 unexpected_keyword_in_scope_diagnostic(trimmed, starters, scope_label)
1109 {
1110 return RecoveryClassification::UnexpectedKeywordInScope {
1111 code: code.to_string(),
1112 message,
1113 expected,
1114 suggestion,
1115 };
1116 }
1117
1118 RecoveryClassification::Unexpected
1119}
1120
1121pub(crate) fn build_recovery_error_node_from_span(
1122 input: Input<'_>,
1123 recovery_end: Input<'_>,
1124 starters: &[&[u8]],
1125 scope_label: &str,
1126 generic_code: &str,
1127) -> ParseErrorNode {
1128 match classify_recovery(input, recovery_end, starters, scope_label) {
1129 RecoveryClassification::MissingMemberName {
1130 code,
1131 message,
1132 expected,
1133 suggestion,
1134 }
1135 | RecoveryClassification::MissingTypeReference {
1136 code,
1137 message,
1138 expected,
1139 suggestion,
1140 }
1141 | RecoveryClassification::InvalidQualifiedNameSeparator {
1142 code,
1143 message,
1144 expected,
1145 suggestion,
1146 }
1147 | RecoveryClassification::MissingBodyOrSemicolon {
1148 code,
1149 message,
1150 expected,
1151 suggestion,
1152 }
1153 | RecoveryClassification::MissingExpressionAfterOperator {
1154 code,
1155 message,
1156 expected,
1157 suggestion,
1158 }
1159 | RecoveryClassification::InvalidUnitReference {
1160 code,
1161 message,
1162 expected,
1163 suggestion,
1164 }
1165 | RecoveryClassification::InvalidTypingOperator {
1166 code,
1167 message,
1168 expected,
1169 suggestion,
1170 }
1171 | RecoveryClassification::InvalidBareIdentifierInBody {
1172 code,
1173 message,
1174 expected,
1175 suggestion,
1176 }
1177 | RecoveryClassification::UnexpectedKeywordInScope {
1178 code,
1179 message,
1180 expected,
1181 suggestion,
1182 } => ParseErrorNode {
1183 message,
1184 code,
1185 expected: Some(expected),
1186 found: recovery_found_snippet_from_span(input, recovery_end),
1187 suggestion: Some(suggestion),
1188 category: Some(DiagnosticCategory::ParseError),
1189 },
1190 RecoveryClassification::MissingSemicolon => ParseErrorNode {
1191 message: "missing semicolon before next declaration".to_string(),
1192 code: "missing_semicolon".to_string(),
1193 expected: Some("';'".to_string()),
1194 found: recovery_found_snippet_from_span(input, recovery_end),
1195 suggestion: Some("Insert ';' before this declaration.".to_string()),
1196 category: Some(DiagnosticCategory::ParseError),
1197 },
1198 RecoveryClassification::UnsupportedAnnotation => ParseErrorNode {
1199 message: format!("unsupported annotation syntax in {scope_label}"),
1200 code: "unsupported_annotation_syntax".to_string(),
1201 expected: Some(format!("valid {scope_label} element")),
1202 found: recovery_found_snippet_from_span(input, recovery_end),
1203 suggestion: Some(
1204 "Remove this annotation or extend the parser to support annotated declarations."
1205 .to_string(),
1206 ),
1207 category: Some(DiagnosticCategory::UnsupportedGrammarForm),
1208 },
1209 RecoveryClassification::Unexpected => ParseErrorNode {
1210 message: format!("unexpected token in {scope_label}"),
1211 code: generic_code.to_string(),
1212 expected: Some(format!("valid {scope_label} element")),
1213 found: recovery_found_snippet_from_span(input, recovery_end),
1214 suggestion: Some(format!("Fix this {scope_label} member and re-run parsing.")),
1215 category: Some(DiagnosticCategory::ParseError),
1216 },
1217 }
1218}
1219
1220fn parse_error_from_recovery_node(span: &crate::ast::Span, node: &ParseErrorNode) -> ParseError {
1221 let mut err = ParseError::new(node.message.clone())
1222 .with_location(span.offset, span.line, span.column)
1223 .with_length(span.len.max(1))
1224 .with_code(node.code.clone())
1225 .with_category(
1226 node.category
1227 .unwrap_or_else(|| category_from_code(node.code.as_str())),
1228 );
1229 let severity = if node.code == "unsupported_annotation_syntax" {
1230 DiagnosticSeverity::Warning
1231 } else {
1232 DiagnosticSeverity::Error
1233 };
1234 err = err.with_severity(severity);
1235 if let Some(expected) = &node.expected {
1236 err = err.with_expected(expected.clone());
1237 }
1238 if let Some(found) = &node.found {
1239 err = err.with_found(found.clone());
1240 }
1241 if let Some(suggestion) = &node.suggestion {
1242 err = err.with_suggestion(suggestion.clone());
1243 }
1244 err
1245}
1246
1247fn diagnostic_specificity(err: &ParseError) -> u8 {
1248 match err.code.as_deref() {
1249 Some("missing_member_name")
1250 | Some("missing_type_reference")
1251 | Some("invalid_qualified_name_separator")
1252 | Some("invalid_typing_operator")
1253 | Some("missing_expression_after_operator")
1254 | Some("invalid_unit_reference")
1255 | Some("missing_body_or_semicolon")
1256 | Some("missing_semicolon")
1257 | Some("unexpected_closing_brace")
1258 | Some("missing_closing_brace")
1259 | Some("unsupported_annotation_syntax")
1260 | Some("invalid_bare_identifier_in_action_body")
1261 | Some("invalid_bare_identifier_in_state_body")
1262 | Some("recovery_cascade_suppressed")
1263 | Some("unexpected_keyword_in_scope") => 5,
1264 Some("illegal_top_level_definition") => 4,
1265 Some(code) if code.starts_with("recovered_") => 2,
1266 Some("expected_end_of_input") | Some("expected_keyword") => 1,
1267 _ => 3,
1268 }
1269}
1270
1271fn dedup_errors(mut errors: Vec<ParseError>) -> Vec<ParseError> {
1272 errors.sort_by_key(|e| {
1273 (
1274 e.offset.unwrap_or(usize::MAX),
1275 e.line.unwrap_or(u32::MAX),
1276 e.column.unwrap_or(usize::MAX),
1277 std::cmp::Reverse(diagnostic_specificity(e)),
1278 )
1279 });
1280
1281 let mut deduped = Vec::new();
1282 for err in errors {
1283 let duplicate = deduped.iter().any(|existing: &ParseError| {
1284 let same_start = existing.offset == err.offset
1285 && existing.line == err.line
1286 && existing.column == err.column;
1287 let same_found = existing.found == err.found;
1288 let existing_specificity = diagnostic_specificity(existing);
1289 let err_specificity = diagnostic_specificity(&err);
1290 same_start
1291 && (same_found || existing.code == err.code)
1292 && existing_specificity >= err_specificity
1293 });
1294 if !duplicate {
1295 deduped.push(err);
1296 }
1297 }
1298
1299 deduped.sort_by_key(|e| (e.offset.unwrap_or(usize::MAX), e.line.unwrap_or(u32::MAX)));
1300 deduped
1301}
1302
1303fn is_cascade_candidate(err: &ParseError) -> bool {
1304 matches!(err.code.as_deref(), Some("missing_semicolon"))
1305 || err
1306 .code
1307 .as_deref()
1308 .is_some_and(|code| code.starts_with("recovered_"))
1309}
1310
1311fn cascade_family(err: &ParseError) -> Option<&str> {
1312 if matches!(err.code.as_deref(), Some("missing_semicolon")) {
1313 Some("missing_semicolon")
1314 } else if err
1315 .code
1316 .as_deref()
1317 .is_some_and(|code| code.starts_with("recovered_"))
1318 {
1319 Some("recovered")
1320 } else {
1321 None
1322 }
1323}
1324
1325fn make_cascade_summary(run: &[ParseError]) -> Option<ParseError> {
1326 let summary_anchor = run.first()?;
1327 let suppressed = run.len().saturating_sub(3);
1328 let family = cascade_family(summary_anchor).unwrap_or("recovery");
1329 let mut err = ParseError::new(format!(
1330 "suppressed {suppressed} cascading {family} diagnostic{} after earlier recovery errors",
1331 if suppressed == 1 { "" } else { "s" }
1332 ))
1333 .with_location(
1334 summary_anchor.offset?,
1335 summary_anchor.line?,
1336 summary_anchor.column?,
1337 )
1338 .with_length(summary_anchor.length.unwrap_or(1).max(1))
1339 .with_code("recovery_cascade_suppressed")
1340 .with_expected("fix the first syntax error in this body")
1341 .with_suggestion(
1342 "Fix the earliest diagnostic in this body first; later syntax errors may be cascades.",
1343 )
1344 .with_severity(DiagnosticSeverity::Warning)
1345 .with_category(DiagnosticCategory::ParseError);
1346 if let Some(found) = &summary_anchor.found {
1347 err = err.with_found(found.clone());
1348 }
1349 Some(err)
1350}
1351
1352fn suppress_diagnostic_cascades(errors: Vec<ParseError>) -> Vec<ParseError> {
1353 const MAX_UNSUMMARIZED_CASCADE: usize = 3;
1354
1355 let mut output = Vec::new();
1356 let mut run: Vec<ParseError> = Vec::new();
1357
1358 let flush_run = |run: &mut Vec<ParseError>, output: &mut Vec<ParseError>| {
1359 if run.len() <= MAX_UNSUMMARIZED_CASCADE {
1360 output.append(run);
1361 } else {
1362 output.extend(run.drain(..MAX_UNSUMMARIZED_CASCADE));
1363 if let Some(summary) = make_cascade_summary(run) {
1364 output.push(summary);
1365 }
1366 run.clear();
1367 }
1368 };
1369
1370 for err in errors {
1371 let continues_run = run.last().is_some_and(|previous| {
1372 is_cascade_candidate(&err)
1373 && cascade_family(previous) == cascade_family(&err)
1374 && previous.line.zip(err.line).is_some_and(|(a, b)| b <= a + 1)
1375 });
1376
1377 if is_cascade_candidate(&err) && (run.is_empty() || continues_run) {
1378 run.push(err);
1379 } else {
1380 flush_run(&mut run, &mut output);
1381 if is_cascade_candidate(&err) {
1382 run.push(err);
1383 } else {
1384 output.push(err);
1385 }
1386 }
1387 }
1388 flush_run(&mut run, &mut output);
1389 output.sort_by_key(|e| (e.offset.unwrap_or(usize::MAX), e.line.unwrap_or(u32::MAX)));
1390 output
1391}
1392
1393fn root_body_recovery_error(input: Input<'_>, scope: &str) -> ParseError {
1394 let (found, len) = fragment_to_found_snippet(input.fragment());
1395 let mut err = ParseError::new(format!(
1396 "could not parse {scope} body; skipped to next root element"
1397 ))
1398 .with_location(
1399 input.location_offset(),
1400 input.location_line(),
1401 input.get_column(),
1402 )
1403 .with_length(len.max(1))
1404 .with_code("recovered_root_body")
1405 .with_expected(format!("valid {scope} body"))
1406 .with_suggestion(
1407 "Fix the first syntax error in this body; later root-level diagnostics may be cascades.",
1408 )
1409 .with_severity(DiagnosticSeverity::Error)
1410 .with_category(DiagnosticCategory::ParseError);
1411 if !found.is_empty() {
1412 err = err.with_found(found);
1413 }
1414 err
1415}
1416
1417fn root_body_scope(fragment: &[u8]) -> Option<&'static str> {
1418 let fragment = trim_ascii_start(fragment);
1419 if lex::starts_with_keyword(fragment, b"package")
1420 || lex::starts_with_keyword(fragment, b"library")
1421 || lex::starts_with_keyword(fragment, b"standard")
1422 {
1423 Some("package")
1424 } else if lex::starts_with_keyword(fragment, b"namespace") {
1425 Some("namespace")
1426 } else {
1427 None
1428 }
1429}
1430
1431fn collect_requirement_body_errors(body: &RequirementDefBody, errors: &mut Vec<ParseError>) {
1432 if let RequirementDefBody::Brace { elements } = body {
1433 for element in elements {
1434 match &element.value {
1435 RequirementDefBodyElement::Error(n) => {
1436 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1437 }
1438 RequirementDefBodyElement::Frame(n) => {
1439 collect_requirement_body_errors(&n.value.body, errors)
1440 }
1441 _ => {}
1442 }
1443 }
1444 }
1445}
1446
1447fn collect_action_def_body_errors(body: &ActionDefBody, errors: &mut Vec<ParseError>) {
1448 if let ActionDefBody::Brace { elements } = body {
1449 for element in elements {
1450 if let ActionDefBodyElement::Error(n) = &element.value {
1451 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1452 }
1453 }
1454 }
1455}
1456
1457fn collect_action_usage_body_errors(body: &ActionUsageBody, errors: &mut Vec<ParseError>) {
1458 if let ActionUsageBody::Brace { elements } = body {
1459 for element in elements {
1460 match &element.value {
1461 ActionUsageBodyElement::Error(n) => {
1462 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1463 }
1464 ActionUsageBodyElement::ActionUsage(n) => {
1465 collect_action_usage_body_errors(&n.value.body, errors)
1466 }
1467 _ => {}
1468 }
1469 }
1470 }
1471}
1472
1473fn collect_state_body_errors(body: &StateDefBody, errors: &mut Vec<ParseError>) {
1474 if let StateDefBody::Brace { elements } = body {
1475 for element in elements {
1476 match &element.value {
1477 StateDefBodyElement::Error(n) => {
1478 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1479 }
1480 StateDefBodyElement::Entry(n) => collect_state_body_errors(&n.value.body, errors),
1481 StateDefBodyElement::RequirementUsage(n) => {
1482 collect_requirement_body_errors(&n.value.body, errors)
1483 }
1484 StateDefBodyElement::StateUsage(n) => {
1485 collect_state_body_errors(&n.value.body, errors)
1486 }
1487 _ => {}
1488 }
1489 }
1490 }
1491}
1492
1493fn collect_use_case_body_errors(body: &UseCaseDefBody, errors: &mut Vec<ParseError>) {
1494 if let UseCaseDefBody::Brace { elements } = body {
1495 for element in elements {
1496 if let UseCaseDefBodyElement::Error(n) = &element.value {
1497 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1498 }
1499 }
1500 }
1501}
1502
1503fn collect_constraint_body_errors(body: &ConstraintDefBody, errors: &mut Vec<ParseError>) {
1504 if let ConstraintDefBody::Brace { elements } = body {
1505 for element in elements {
1506 if let ConstraintDefBodyElement::Error(n) = &element.value {
1507 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1508 }
1509 }
1510 }
1511}
1512
1513fn collect_calc_body_errors(body: &CalcDefBody, errors: &mut Vec<ParseError>) {
1514 if let CalcDefBody::Brace { elements } = body {
1515 for element in elements {
1516 if let CalcDefBodyElement::Error(n) = &element.value {
1517 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1518 }
1519 }
1520 }
1521}
1522
1523fn collect_view_def_body_errors(body: &ViewDefBody, errors: &mut Vec<ParseError>) {
1524 if let ViewDefBody::Brace { elements } = body {
1525 for element in elements {
1526 if let ViewDefBodyElement::Error(n) = &element.value {
1527 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1528 }
1529 }
1530 }
1531}
1532
1533fn collect_view_body_errors(body: &ViewBody, errors: &mut Vec<ParseError>) {
1534 if let ViewBody::Brace { elements } = body {
1535 for element in elements {
1536 if let ViewBodyElement::Error(n) = &element.value {
1537 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1538 }
1539 }
1540 }
1541}
1542
1543fn collect_part_def_body_errors(body: &PartDefBody, errors: &mut Vec<ParseError>) {
1544 if let PartDefBody::Brace { elements } = body {
1545 for element in elements {
1546 match &element.value {
1547 PartDefBodyElement::Error(n) => {
1548 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1549 }
1550 PartDefBodyElement::PartUsage(n) => {
1551 collect_part_usage_body_errors(&n.value.body, errors)
1552 }
1553 PartDefBodyElement::Perform(n) => {
1554 collect_perform_body_errors(&n.value.body, errors)
1555 }
1556 _ => {}
1557 }
1558 }
1559 }
1560}
1561
1562fn collect_perform_body_errors(body: &crate::ast::PerformBody, _errors: &mut Vec<ParseError>) {
1563 match body {
1564 crate::ast::PerformBody::Semicolon => {}
1565 crate::ast::PerformBody::Brace { .. } => {}
1566 }
1567}
1568
1569fn collect_part_usage_body_errors(body: &PartUsageBody, errors: &mut Vec<ParseError>) {
1570 if let PartUsageBody::Brace { elements } = body {
1571 for element in elements {
1572 match &element.value {
1573 PartUsageBodyElement::Error(n) => {
1574 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1575 }
1576 PartUsageBodyElement::PartUsage(n) => {
1577 collect_part_usage_body_errors(&n.value.body, errors)
1578 }
1579 PartUsageBodyElement::Perform(n) => {
1580 collect_perform_body_errors(&n.value.body, errors)
1581 }
1582 PartUsageBodyElement::StateUsage(n) => {
1583 collect_state_body_errors(&n.value.body, errors)
1584 }
1585 _ => {}
1586 }
1587 }
1588 }
1589}
1590
1591fn collect_package_body_errors(body: &PackageBody, errors: &mut Vec<ParseError>) {
1592 if let PackageBody::Brace { elements } = body {
1593 for element in elements {
1594 match &element.value {
1595 PackageBodyElement::Error(n) => {
1596 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1597 }
1598 PackageBodyElement::Package(n) => {
1599 collect_package_body_errors(&n.value.body, errors)
1600 }
1601 PackageBodyElement::LibraryPackage(n) => {
1602 collect_package_body_errors(&n.value.body, errors)
1603 }
1604 PackageBodyElement::PartDef(n) => {
1605 collect_part_def_body_errors(&n.value.body, errors)
1606 }
1607 PackageBodyElement::PartUsage(n) => {
1608 collect_part_usage_body_errors(&n.value.body, errors)
1609 }
1610 PackageBodyElement::ActionDef(n) => {
1611 collect_action_def_body_errors(&n.value.body, errors)
1612 }
1613 PackageBodyElement::ActionUsage(n) => {
1614 collect_action_usage_body_errors(&n.value.body, errors)
1615 }
1616 PackageBodyElement::RequirementDef(n) => {
1617 collect_requirement_body_errors(&n.value.body, errors)
1618 }
1619 PackageBodyElement::RequirementUsage(n) => {
1620 collect_requirement_body_errors(&n.value.body, errors)
1621 }
1622 PackageBodyElement::UseCaseDef(n) => {
1623 collect_use_case_body_errors(&n.value.body, errors)
1624 }
1625 PackageBodyElement::UseCaseUsage(n) => {
1626 collect_use_case_body_errors(&n.value.body, errors)
1627 }
1628 PackageBodyElement::ConcernUsage(n) => {
1629 collect_requirement_body_errors(&n.value.body, errors)
1630 }
1631 PackageBodyElement::StateDef(n) => collect_state_body_errors(&n.value.body, errors),
1632 PackageBodyElement::StateUsage(n) => {
1633 collect_state_body_errors(&n.value.body, errors)
1634 }
1635 PackageBodyElement::ConstraintDef(n) => {
1636 collect_constraint_body_errors(&n.value.body, errors)
1637 }
1638 PackageBodyElement::CalcDef(n) => collect_calc_body_errors(&n.value.body, errors),
1639 PackageBodyElement::ViewDef(n) => {
1640 collect_view_def_body_errors(&n.value.body, errors)
1641 }
1642 PackageBodyElement::ViewUsage(n) => collect_view_body_errors(&n.value.body, errors),
1643 _ => {}
1644 }
1645 }
1646 }
1647}
1648
1649fn collect_recovery_errors(root: &RootNamespace) -> Vec<ParseError> {
1650 let mut errors = Vec::new();
1651 for element in &root.elements {
1652 match &element.value {
1653 crate::ast::RootElement::Package(n) => {
1654 collect_package_body_errors(&n.value.body, &mut errors)
1655 }
1656 crate::ast::RootElement::LibraryPackage(n) => {
1657 collect_package_body_errors(&n.value.body, &mut errors)
1658 }
1659 crate::ast::RootElement::Namespace(n) => {
1660 collect_package_body_errors(&n.value.body, &mut errors)
1661 }
1662 crate::ast::RootElement::Import(_) => {}
1663 }
1664 }
1665 errors
1666}
1667
1668#[allow(clippy::result_large_err)]
1670pub fn parse_root(input: &str) -> Result<RootNamespace, ParseError> {
1671 let bytes = input
1672 .strip_prefix('\u{FEFF}')
1673 .map(str::as_bytes)
1674 .unwrap_or_else(|| input.as_bytes());
1675 let located = LocatedSpan::new(bytes);
1676 match package::root_namespace(located) {
1677 Ok((rest, root)) => {
1678 if !rest.fragment().is_empty() && has_unclosed_brace(bytes) {
1679 return Err(missing_closing_brace_error_at_eof(bytes));
1680 }
1681 if rest.fragment().is_empty() {
1682 log::debug!("parse_root: success, {} top-level elements", root.elements.len());
1683 Ok(root)
1684 } else if trim_ascii_start(rest.fragment()).starts_with(b"}") {
1685 Err(unexpected_closing_brace_parse_error(rest))
1686 } else {
1687 let offset = located.location_offset() + located.fragment().len() - rest.fragment().len();
1688 let unconsumed = rest.fragment();
1689 let first_80 = unconsumed.get(..80.min(unconsumed.len())).unwrap_or(unconsumed);
1690 log::debug!(
1691 "parse_root: expected end of input; parsed {} elements; unconsumed len={}, offset={}, first 80 bytes: {:?}",
1692 root.elements.len(),
1693 unconsumed.len(),
1694 offset,
1695 first_80,
1696 );
1697 log::debug!(
1698 "parse_root: unconsumed as str: {:?}",
1699 String::from_utf8_lossy(first_80),
1700 );
1701 let (found_snippet, found_len) = fragment_to_found_snippet(rest.fragment());
1702 let mut pe = ParseError::new("expected end of input")
1703 .with_location(offset, rest.location_line(), rest.get_column())
1704 .with_length(found_len.max(1))
1705 .with_code("expected_end_of_input")
1706 .with_category(DiagnosticCategory::ParseError);
1707 if !found_snippet.is_empty() {
1708 pe = pe.with_found(found_snippet);
1709 }
1710 if root.elements.is_empty() && is_illegal_top_level_definition(rest.fragment()) {
1711 pe = pe
1712 .with_code("illegal_top_level_definition")
1713 .with_expected("'package', 'namespace', or 'import'")
1714 .with_suggestion(
1715 "Wrap this declaration in `package ... { ... }` or `namespace ... { ... }`.",
1716 );
1717 pe.message = "illegal top-level definition".to_string();
1718 }
1719 Err(pe)
1720 }
1721 }
1722 Err(nom::Err::Error(e)) => Err(missing_closing_brace_error(bytes, e.input).unwrap_or_else(|| {
1723 nom_err_to_parse_error(
1724 &e,
1725 None,
1726 Some("'package', 'namespace', or 'import' at top level; or valid element in package body"),
1727 )
1728 })),
1729 Err(nom::Err::Failure(e)) => Err(missing_closing_brace_error(bytes, e.input).unwrap_or_else(|| {
1730 nom_err_to_parse_error(
1731 &e,
1732 None,
1733 Some("'package', 'namespace', or 'import' at top level; or valid element in package body"),
1734 )
1735 })),
1736 Err(nom::Err::Incomplete(_)) => Err(
1737 ParseError::new("unexpected end of input")
1738 .with_code("unexpected_eof")
1739 .with_category(DiagnosticCategory::ParseError),
1740 ),
1741 }
1742}
1743
1744const MAX_RECOVERY_ERRORS: usize = 100;
1745
1746pub fn parse_with_diagnostics(input: &str) -> ParseResult {
1749 let bytes = input
1750 .strip_prefix('\u{FEFF}')
1751 .map(str::as_bytes)
1752 .unwrap_or_else(|| input.as_bytes());
1753 let located = LocatedSpan::new(bytes);
1754
1755 let mut elements = Vec::new();
1756 let mut errors = Vec::new();
1757
1758 let (mut input, _) = match lex::ws_and_comments(located) {
1759 Ok(x) => x,
1760 Err(_) => {
1761 return ParseResult {
1762 root: RootNamespace { elements: vec![] },
1763 errors: vec![ParseError::new("invalid input")
1764 .with_code("invalid_input")
1765 .with_category(DiagnosticCategory::ParseError)],
1766 };
1767 }
1768 };
1769
1770 while errors.len() < MAX_RECOVERY_ERRORS {
1771 let (rest, _) = lex::ws_and_comments(input).unwrap_or((input, ()));
1773 input = rest;
1774 if input.fragment().is_empty() {
1775 break;
1776 }
1777 match package::root_element(input) {
1778 Ok((rest, elem)) => {
1779 elements.push(elem);
1780 input = rest;
1781 }
1782 Err(nom::Err::Error(e)) | Err(nom::Err::Failure(e)) => {
1783 let (trimmed, _) = lex::ws_and_comments(input).unwrap_or((input, ()));
1784 if trim_ascii_start(trimmed.fragment()).starts_with(b"}") {
1785 errors.push(unexpected_closing_brace_parse_error(trimmed));
1786 let skip_result = lex::skip_to_next_sync_point(trimmed);
1787 match skip_result {
1788 Ok((rest, _)) => input = rest,
1789 Err(_) => break,
1790 }
1791 continue;
1792 }
1793 if errors.is_empty()
1794 && has_unclosed_brace(bytes)
1795 && (lex::starts_with_keyword(trimmed.fragment(), b"package")
1796 || lex::starts_with_keyword(trimmed.fragment(), b"namespace")
1797 || lex::starts_with_keyword(trimmed.fragment(), b"library")
1798 || lex::starts_with_keyword(trimmed.fragment(), b"standard"))
1799 {
1800 errors.push(missing_closing_brace_error_at_eof(bytes));
1801 break;
1802 }
1803 if let Some(scope) = root_body_scope(input.fragment()) {
1804 let (error_input, _) = lex::ws_and_comments(e.input).unwrap_or((e.input, ()));
1805 if error_input.fragment().starts_with(b"{") {
1806 errors.push(root_body_recovery_error(error_input, scope));
1807 match lex::skip_statement_or_block(error_input) {
1808 Ok((rest, _))
1809 if rest.location_offset() > error_input.location_offset() =>
1810 {
1811 input = rest;
1812 continue;
1813 }
1814 _ => {}
1815 }
1816 }
1817 }
1818 let pe = missing_closing_brace_error(bytes, e.input).unwrap_or_else(|| {
1819 nom_err_to_parse_error(&e, None, Some("'package', 'namespace', or 'import'"))
1820 });
1821 errors.push(pe);
1822 let skip_result = lex::skip_to_next_sync_point(e.input);
1823 match skip_result {
1824 Ok((rest, _)) => input = rest,
1825 Err(_) => break,
1826 }
1827 }
1828 Err(nom::Err::Incomplete(_)) => {
1829 errors.push(
1830 ParseError::new("unexpected end of input")
1831 .with_location(
1832 input.location_offset(),
1833 input.location_line(),
1834 input.get_column(),
1835 )
1836 .with_length(1)
1837 .with_code("unexpected_eof")
1838 .with_category(DiagnosticCategory::ParseError),
1839 );
1840 break;
1841 }
1842 }
1843 }
1844
1845 let (input, _) = lex::ws_and_comments(input).unwrap_or((input, ()));
1846
1847 if input.fragment().is_empty()
1848 && has_unclosed_brace(bytes)
1849 && !errors
1850 .iter()
1851 .any(|e| e.code.as_deref() == Some("missing_closing_brace"))
1852 {
1853 errors.push(missing_closing_brace_error_at_eof(bytes));
1854 }
1855
1856 if !input.fragment().is_empty()
1857 && !errors
1858 .iter()
1859 .any(|e| e.code.as_deref() == Some("missing_closing_brace"))
1860 {
1861 if trim_ascii_start(input.fragment()).starts_with(b"}") {
1862 errors.push(unexpected_closing_brace_parse_error(input));
1863 } else {
1864 let (found_snippet, found_len) = fragment_to_found_snippet(input.fragment());
1865 let mut pe = ParseError::new("expected end of input")
1866 .with_location(
1867 input.location_offset(),
1868 input.location_line(),
1869 input.get_column(),
1870 )
1871 .with_length(found_len.max(1))
1872 .with_code("expected_end_of_input")
1873 .with_severity(DiagnosticSeverity::Error)
1874 .with_category(DiagnosticCategory::ParseError);
1875 if !found_snippet.is_empty() {
1876 pe = pe.with_found(found_snippet);
1877 }
1878 errors.push(pe);
1879 }
1880 }
1881
1882 errors.extend(collect_recovery_errors(&RootNamespace {
1883 elements: elements.clone(),
1884 }));
1885 errors = dedup_errors(errors);
1886 errors = suppress_diagnostic_cascades(errors);
1887
1888 ParseResult {
1889 root: RootNamespace { elements },
1890 errors,
1891 }
1892}