1mod action;
11mod alias;
12mod allocation;
13mod attribute;
14mod bnf_surface;
15mod body;
16mod case;
17mod connection;
18mod constraint;
19mod definition_prefix;
20mod dependency;
21mod enumeration;
22mod expr;
23mod flow;
24mod import;
25mod individual;
26mod interface;
27mod item;
28mod lex;
29mod metadata;
30mod metadata_annotation;
31mod occurrence;
32mod package;
33mod part;
34mod port;
35mod requirement;
36mod span;
37mod specialization;
38mod state;
39mod usage;
40mod usecase;
41mod view;
42
43use crate::ast::{
44 ActionDefBody, ActionDefBodyElement, ActionUsageBody, ActionUsageBodyElement, CalcDefBody,
45 CalcDefBodyElement, ConstraintDefBody, ConstraintDefBodyElement, PackageBody,
46 PackageBodyElement, ParseErrorNode, PartDefBody, PartDefBodyElement, PartUsageBody,
47 PartUsageBodyElement, RequirementDefBody, RequirementDefBodyElement, RootNamespace,
48 StateDefBody, StateDefBodyElement, UseCaseDefBody, UseCaseDefBodyElement, ViewBody,
49 ViewBodyElement, ViewDefBody, ViewDefBodyElement,
50};
51use crate::error::{DiagnosticCategory, DiagnosticSeverity, ParseError};
52use nom::error::Error;
53use nom_locate::LocatedSpan;
54pub(crate) use span::{node_from_to, span_from_to, with_span, Input};
55
56#[derive(Debug, Clone)]
58pub struct ParseResult {
59 pub root: RootNamespace,
61 pub errors: Vec<ParseError>,
63}
64
65impl ParseResult {
66 pub fn is_ok(&self) -> bool {
68 self.errors.is_empty()
69 }
70}
71
72const FOUND_SNIPPET_MAX_LEN: usize = 40;
73const ILLEGAL_TOP_LEVEL_STARTERS: &[&[u8]] = &[
74 b"action",
75 b"actor",
76 b"alias",
77 b"allocate",
78 b"allocation",
79 b"attribute",
80 b"bind",
81 b"calc",
82 b"case",
83 b"concern",
84 b"connection",
85 b"constraint",
86 b"dependency",
87 b"enum",
88 b"flow",
89 b"interface",
90 b"item",
91 b"metadata",
92 b"occurrence",
93 b"part",
94 b"perform",
95 b"port",
96 b"ref",
97 b"require",
98 b"requirement",
99 b"satisfy",
100 b"state",
101 b"use",
102 b"verification",
103 b"view",
104 b"viewpoint",
105];
106
107fn fragment_to_found_snippet(fragment: &[u8]) -> (String, usize) {
110 let take = fragment
111 .iter()
112 .position(|&b| b == b'\n' || b == b'\r')
113 .map(|p| p.min(FOUND_SNIPPET_MAX_LEN))
114 .unwrap_or_else(|| fragment.len().min(FOUND_SNIPPET_MAX_LEN));
115 let slice = fragment.get(..take).unwrap_or(fragment);
116 let s = String::from_utf8_lossy(slice)
117 .replace('\n', "\\n")
118 .replace('\r', "\\r");
119 let len = slice.len();
120 (s.trim_end().to_string(), len)
121}
122
123pub(crate) fn recovery_found_snippet(input: Input<'_>) -> Option<String> {
124 let frag = input.fragment();
125 let take = frag
126 .iter()
127 .position(|&b| b == b'\n' || b == b'\r')
128 .unwrap_or(frag.len())
129 .min(60);
130 let snippet = String::from_utf8_lossy(&frag[..take]).trim().to_string();
131 if snippet.is_empty() {
132 None
133 } else {
134 Some(snippet)
135 }
136}
137
138fn recovery_found_snippet_from_span(input: Input<'_>, recovery_end: Input<'_>) -> Option<String> {
139 let consumed_len = recovery_end
140 .location_offset()
141 .saturating_sub(input.location_offset())
142 .min(input.fragment().len());
143 if consumed_len == 0 {
144 return recovery_found_snippet(input);
145 }
146 let frag = &input.fragment()[..consumed_len];
147 let take = frag
148 .iter()
149 .position(|&b| b == b'\n' || b == b'\r')
150 .unwrap_or(frag.len())
151 .min(60);
152 let snippet = String::from_utf8_lossy(&frag[..take]).trim().to_string();
153 if snippet.is_empty() {
154 recovery_found_snippet(input)
155 } else {
156 Some(snippet)
157 }
158}
159
160fn nom_error_kind_to_message(code: &nom::error::ErrorKind) -> &'static str {
162 use nom::error::ErrorKind;
163 match code {
164 ErrorKind::Tag => "expected keyword or token",
165 ErrorKind::Digit => "expected number",
166 ErrorKind::Alpha => "expected identifier",
167 ErrorKind::AlphaNumeric => "expected identifier",
168 ErrorKind::Space => "expected whitespace",
169 ErrorKind::MultiSpace => "expected whitespace",
170 ErrorKind::Eof => "unexpected end of input",
171 ErrorKind::TakeUntil => "expected terminator",
172 ErrorKind::TakeWhile1 => "expected token",
173 ErrorKind::Alt => {
174 "expected package, import, part, port, interface, alias, attribute, or action"
175 }
176 ErrorKind::Many0 | ErrorKind::Many1 => "expected list of elements",
177 _ => "parse error",
178 }
179}
180
181fn nom_error_kind_to_code(code: &nom::error::ErrorKind) -> &'static str {
183 use nom::error::ErrorKind;
184 match code {
185 ErrorKind::Tag => "expected_keyword",
186 ErrorKind::Digit => "expected_number",
187 ErrorKind::Alpha | ErrorKind::AlphaNumeric => "expected_identifier",
188 ErrorKind::Space | ErrorKind::MultiSpace => "expected_whitespace",
189 ErrorKind::Eof => "unexpected_eof",
190 ErrorKind::TakeUntil => "expected_terminator",
191 ErrorKind::TakeWhile1 => "expected_token",
192 ErrorKind::Alt => "expected_alt",
193 ErrorKind::Many0 | ErrorKind::Many1 => "expected_list",
194 _ => "parse_error",
195 }
196}
197
198fn nom_err_to_parse_error(
199 e: &Error<Input<'_>>,
200 length_override: Option<usize>,
201 expected_context: Option<&'static str>,
202) -> ParseError {
203 let offset = e.input.location_offset();
204 let line = e.input.location_line();
205 let column = e.input.get_column();
206 let fragment = e.input.fragment();
207 let (found_snippet, found_len) = fragment_to_found_snippet(fragment);
208 let message = nom_error_kind_to_message(&e.code).to_string();
209 let span_len = length_override.unwrap_or(found_len).max(1);
210 if trim_ascii_start(fragment).starts_with(b"}") {
211 return unexpected_closing_brace_parse_error(e.input);
212 }
213 let mut pe = ParseError::new(message)
214 .with_location(offset, line, column)
215 .with_length(span_len)
216 .with_code(nom_error_kind_to_code(&e.code))
217 .with_severity(DiagnosticSeverity::Error)
218 .with_category(DiagnosticCategory::ParseError);
219 if !found_snippet.is_empty() {
220 pe = pe.with_found(found_snippet);
221 }
222 if let Some(ctx) = expected_context {
223 pe = pe.with_expected(ctx);
224 }
225 let at_root = expected_context.is_some_and(|ctx| {
226 ctx.contains("'package', 'namespace', or 'import'") || ctx.contains("top level")
227 });
228 if at_root && is_illegal_top_level_definition(fragment) {
229 pe.message = "illegal top-level definition".to_string();
230 pe.code = Some("illegal_top_level_definition".to_string());
231 pe.expected = Some("'package', 'namespace', or 'import'".to_string());
232 pe.suggestion = Some(
233 "Wrap this declaration in `package ... { ... }` or `namespace ... { ... }`."
234 .to_string(),
235 );
236 }
237 pe
238}
239
240fn is_illegal_top_level_definition(fragment: &[u8]) -> bool {
241 let trimmed = trim_ascii_start(fragment);
242 !trimmed.starts_with(b"}")
243 && !trimmed.starts_with(b"//")
244 && !trimmed.starts_with(b"/*")
245 && lex::starts_with_any_keyword(trimmed, ILLEGAL_TOP_LEVEL_STARTERS)
246}
247
248fn trim_ascii_start(mut fragment: &[u8]) -> &[u8] {
249 while let Some(first) = fragment.first() {
250 if first.is_ascii_whitespace() {
251 fragment = &fragment[1..];
252 continue;
253 }
254 break;
255 }
256 fragment
257}
258
259fn starts_with_missing_name_after_keyword(
260 fragment: &[u8],
261 keyword: &[u8],
262 trailing_keywords: &[&[u8]],
263) -> bool {
264 let mut fragment = trim_ascii_start(fragment);
265 if !lex::starts_with_keyword(fragment, keyword) {
266 return false;
267 }
268 fragment = &fragment[keyword.len()..];
269 while let Some(first) = fragment.first() {
270 if first.is_ascii_whitespace() {
271 fragment = &fragment[1..];
272 continue;
273 }
274 break;
275 }
276 for trailing in trailing_keywords {
277 if lex::starts_with_keyword(fragment, trailing) {
278 fragment = &fragment[trailing.len()..];
279 while let Some(first) = fragment.first() {
280 if first.is_ascii_whitespace() {
281 fragment = &fragment[1..];
282 continue;
283 }
284 break;
285 }
286 }
287 }
288 fragment.starts_with(b":")
289 && !lex::starts_with_keyword(fragment, b":>>")
290 && !lex::starts_with_keyword(fragment, b":>")
291 && !lex::starts_with_keyword(fragment, b"::")
292}
293
294fn starts_with_missing_type_after_keyword(
295 fragment: &[u8],
296 keyword: &[u8],
297 trailing_keywords: &[&[u8]],
298) -> bool {
299 let mut fragment = trim_ascii_start(fragment);
300 if !lex::starts_with_keyword(fragment, keyword) {
301 return false;
302 }
303 fragment = &fragment[keyword.len()..];
304 while let Some(first) = fragment.first() {
305 if first.is_ascii_whitespace() {
306 fragment = &fragment[1..];
307 continue;
308 }
309 break;
310 }
311 for trailing in trailing_keywords {
312 if lex::starts_with_keyword(fragment, trailing) {
313 fragment = &fragment[trailing.len()..];
314 while let Some(first) = fragment.first() {
315 if first.is_ascii_whitespace() {
316 fragment = &fragment[1..];
317 continue;
318 }
319 break;
320 }
321 }
322 }
323
324 let mut name_len = 0usize;
325 while name_len < fragment.len()
326 && (fragment[name_len].is_ascii_alphanumeric() || fragment[name_len] == b'_')
327 {
328 name_len += 1;
329 }
330 if name_len == 0 {
331 return false;
332 }
333 fragment = &fragment[name_len..];
334 while let Some(first) = fragment.first() {
335 if first.is_ascii_whitespace() {
336 fragment = &fragment[1..];
337 continue;
338 }
339 break;
340 }
341 if fragment.starts_with(b":") {
342 fragment = &fragment[1..];
343 } else if lex::starts_with_keyword(fragment, b"defined") {
344 fragment = &fragment[b"defined".len()..];
345 fragment = trim_ascii_start(fragment);
346 if !lex::starts_with_keyword(fragment, b"by") {
347 return false;
348 }
349 fragment = &fragment[b"by".len()..];
350 } else if lex::starts_with_keyword(fragment, b"typed") {
351 fragment = &fragment[b"typed".len()..];
352 fragment = trim_ascii_start(fragment);
353 if !lex::starts_with_keyword(fragment, b"by") {
354 return false;
355 }
356 fragment = &fragment[b"by".len()..];
357 } else {
358 return false;
359 }
360 while let Some(first) = fragment.first() {
361 if first.is_ascii_whitespace() {
362 fragment = &fragment[1..];
363 continue;
364 }
365 break;
366 }
367
368 fragment.is_empty()
369 || fragment.starts_with(b";")
370 || fragment.starts_with(b"{")
371 || fragment.starts_with(b"}")
372 || lex::starts_with_keyword(fragment, b"then")
373 || lex::starts_with_keyword(fragment, b"if")
374 || lex::starts_with_keyword(fragment, b"do")
375}
376
377fn missing_name_diagnostic(fragment: &[u8]) -> Option<(&'static str, String, String, String)> {
378 #[allow(clippy::type_complexity)]
379 let cases: &[(&[u8], &[&[u8]], &str, &str)] = &[
380 (
381 b"subject",
382 &[],
383 "subject name",
384 "Use `subject laptop: Laptop;`.",
385 ),
386 (b"actor", &[], "actor name", "Use `actor user: User;`."),
387 (b"state", &[], "state name", "Use `state ready: Mode;`."),
388 (b"part", &[], "part name", "Use `part wheel: Wheel;`."),
389 (b"ref", &[], "reference name", "Use `ref sensor: Sensor;`."),
390 (b"port", &[], "port name", "Use `port power: PowerPort;`."),
391 (
392 b"attribute",
393 &[],
394 "attribute name",
395 "Use `attribute mass: MassValue;`.",
396 ),
397 (b"in", &[], "input name", "Use `in speed: Real;`."),
398 (b"out", &[], "output name", "Use `out result: Real;`."),
399 (
400 b"perform",
401 &[b"action"],
402 "action name",
403 "Use `perform action run: Runner;`.",
404 ),
405 (b"return", &[], "return name", "Use `return result: Real;`."),
406 ];
407
408 for (keyword, trailing, missing_what, suggestion) in cases {
409 if starts_with_missing_name_after_keyword(fragment, keyword, trailing) {
410 return Some((
411 "missing_member_name",
412 format!("expected {missing_what} before ':'"),
413 format!("{missing_what} before ':'"),
414 suggestion.to_string(),
415 ));
416 }
417 }
418 None
419}
420
421fn missing_type_diagnostic(fragment: &[u8]) -> Option<(&'static str, String, String, String)> {
422 #[allow(clippy::type_complexity)]
423 let cases: &[(&[u8], &[&[u8]], &str)] = &[
424 (b"subject", &[], "subject type"),
425 (b"actor", &[], "actor type"),
426 (b"state", &[], "state type"),
427 (b"part", &[], "part type"),
428 (b"ref", &[], "reference type"),
429 (b"port", &[], "port type"),
430 (b"attribute", &[], "attribute type"),
431 (b"occurrence", &[], "occurrence type"),
432 (b"in", &[], "input type"),
433 (b"out", &[], "output type"),
434 (b"perform", &[b"action"], "action type"),
435 (b"return", &[], "return type"),
436 ];
437
438 for &(keyword, trailing, missing_what) in cases {
439 if starts_with_missing_type_after_keyword(fragment, keyword, trailing) {
440 let keyword_label = String::from_utf8_lossy(keyword);
441 let sample_name = if keyword == &b"subject"[..] {
442 "laptop"
443 } else if keyword == &b"actor"[..] {
444 "user"
445 } else if keyword == &b"state"[..] {
446 "ready"
447 } else if keyword == &b"part"[..] {
448 "wheel"
449 } else if keyword == &b"ref"[..] {
450 "sensor"
451 } else if keyword == &b"port"[..] {
452 "power"
453 } else if keyword == &b"attribute"[..] {
454 "mass"
455 } else if keyword == &b"occurrence"[..] {
456 "event"
457 } else if keyword == &b"in"[..] {
458 "speed"
459 } else if keyword == &b"out"[..] {
460 "result"
461 } else if keyword == &b"perform"[..] {
462 "run"
463 } else if keyword == &b"return"[..] {
464 "result"
465 } else {
466 "member"
467 };
468 let sample_type = if keyword == &b"subject"[..] {
469 "Laptop"
470 } else if keyword == &b"actor"[..] {
471 "User"
472 } else if keyword == &b"state"[..] {
473 "Mode"
474 } else if keyword == &b"part"[..] {
475 "Wheel"
476 } else if keyword == &b"ref"[..] {
477 "Sensor"
478 } else if keyword == &b"port"[..] {
479 "PowerPort"
480 } else if keyword == &b"attribute"[..] {
481 "MassValue"
482 } else if keyword == &b"occurrence"[..] {
483 "Event"
484 } else if keyword == &b"in"[..] || keyword == &b"out"[..] {
485 "Real"
486 } else if keyword == &b"perform"[..] {
487 "Runner"
488 } else if keyword == &b"return"[..] {
489 "Real"
490 } else {
491 "Type"
492 };
493 let suggestion = if keyword == &b"perform"[..] {
494 format!("Use `perform action {sample_name}: {sample_type};`.")
495 } else if keyword == &b"return"[..] {
496 format!("Use `return {sample_name}: {sample_type};`.")
497 } else {
498 format!("Use `{keyword_label} {sample_name}: {sample_type};`.")
499 };
500 return Some((
501 "missing_type_reference",
502 format!("expected {missing_what} after ':'"),
503 format!("{missing_what} after ':'"),
504 suggestion,
505 ));
506 }
507 }
508 None
509}
510
511fn invalid_expose_separator_diagnostic(
512 fragment: &[u8],
513) -> Option<(&'static str, String, String, String)> {
514 let mut fragment = trim_ascii_start(fragment);
515 if !lex::starts_with_keyword(fragment, b"expose") {
516 return None;
517 }
518 fragment = &fragment[b"expose".len()..];
519 while let Some(first) = fragment.first() {
520 if first.is_ascii_whitespace() {
521 fragment = &fragment[1..];
522 continue;
523 }
524 break;
525 }
526 if fragment.is_empty() {
527 return None;
528 }
529
530 let mut saw_dot = false;
531 let mut in_quoted_name = false;
532 for &b in fragment {
533 if b == b'\'' {
534 in_quoted_name = !in_quoted_name;
535 continue;
536 }
537 if in_quoted_name {
538 continue;
539 }
540 if matches!(b, b';' | b'[' | b'{' | b'}' | b'\n' | b'\r') {
541 break;
542 }
543 if b == b'.' {
544 saw_dot = true;
545 break;
546 }
547 }
548 if !saw_dot {
549 return None;
550 }
551
552 Some((
553 "invalid_qualified_name_separator",
554 "invalid qualified name in expose target: use '::' instead of '.'".to_string(),
555 "qualified name segments separated by '::'".to_string(),
556 "Replace '.' with '::' in the expose target (example: `expose A::B;`).".to_string(),
557 ))
558}
559
560fn missing_semicolon_or_body_diagnostic(
561 fragment: &[u8],
562) -> Option<(&'static str, String, String, String)> {
563 let fragment = trim_ascii_start(fragment);
564 let cases: &[(&[u8], &str, &str)] = &[
565 (
566 b"action def",
567 "action definition",
568 "Use `action def Run;` or `action def Run { ... }`.",
569 ),
570 (
571 b"part def",
572 "part definition",
573 "Use `part def Wheel;` or `part def Wheel { ... }`.",
574 ),
575 (
576 b"requirement def",
577 "requirement definition",
578 "Use `requirement def R;` or `requirement def R { ... }`.",
579 ),
580 (
581 b"state def",
582 "state definition",
583 "Use `state def Ready;` or `state def Ready { ... }`.",
584 ),
585 (
586 b"view",
587 "view declaration",
588 "Use `view structure: GeneralView;` or `view structure: GeneralView { ... }`.",
589 ),
590 (
591 b"rendering def",
592 "rendering definition",
593 "Use `rendering def Diagram;` or `rendering def Diagram { ... }`.",
594 ),
595 ];
596
597 for (prefix, label, suggestion) in cases {
598 if fragment.starts_with(prefix) {
599 return Some((
600 "missing_body_or_semicolon",
601 format!("expected ';' or '{{' after {label} header"),
602 "';' or '{' after declaration header".to_string(),
603 suggestion.to_string(),
604 ));
605 }
606 }
607 None
608}
609
610fn invalid_typing_operator_diagnostic(
611 fragment: &[u8],
612) -> Option<(&'static str, String, String, String)> {
613 let fragment = trim_ascii_start(fragment);
614 let cases: &[(&[u8], &str, &str)] = &[
615 (
616 b"part def",
617 "part definition specialization",
618 "Use `part def Vehicle :> BaseVehicle;` when specializing a definition.",
619 ),
620 (
621 b"port def",
622 "port definition specialization",
623 "Use `port def PowerPort :> BasePort;` when specializing a definition.",
624 ),
625 ];
626
627 for (prefix, label, suggestion) in cases {
628 if fragment.starts_with(prefix) && fragment.windows(3).any(|w| w == b": ") {
629 return Some((
630 "invalid_typing_operator",
631 format!("invalid typing operator in {label}: use ':>' instead of ':'"),
632 "':>' specialization operator".to_string(),
633 suggestion.to_string(),
634 ));
635 }
636 }
637
638 if fragment.starts_with(b"part def")
639 && fragment.contains(&b':')
640 && !fragment.windows(2).any(|w| w == b":>")
641 {
642 return Some((
643 "invalid_typing_operator",
644 "invalid typing operator in part definition: use ':>' instead of ':'".to_string(),
645 "':>' specialization operator".to_string(),
646 "Use `part def Vehicle :> BaseVehicle;` when specializing a definition.".to_string(),
647 ));
648 }
649
650 None
651}
652
653fn missing_expression_after_operator_diagnostic(
654 fragment: &[u8],
655) -> Option<(&'static str, String, String, String)> {
656 let fragment = trim_ascii_start(fragment);
657 let cases: &[(&[u8], &str, &str)] = &[
658 (
659 b"bind",
660 "binding expression after '='",
661 "Use `bind x = y;`.",
662 ),
663 (
664 b"assign",
665 "assignment expression after ':='",
666 "Use `assign x := y;`.",
667 ),
668 (
669 b"first",
670 "target after 'then'",
671 "Use `first start then finish;`.",
672 ),
673 (
674 b"flow",
675 "target after 'to'",
676 "Use `flow source to target;`.",
677 ),
678 (
679 b"satisfy",
680 "target after 'by'",
681 "Use `satisfy Req by implementation;`.",
682 ),
683 ];
684
685 for (keyword, expected, suggestion) in cases {
686 if !lex::starts_with_keyword(fragment, keyword) {
687 continue;
688 }
689 let text = String::from_utf8_lossy(fragment);
690 if text.contains("= ;") || text.trim_end().ends_with('=') {
691 return Some((
692 "missing_expression_after_operator",
693 "expected expression after '='".to_string(),
694 expected.to_string(),
695 suggestion.to_string(),
696 ));
697 }
698 if text.contains(":= ;") || text.trim_end().ends_with(":=") {
699 return Some((
700 "missing_expression_after_operator",
701 "expected expression after ':='".to_string(),
702 expected.to_string(),
703 suggestion.to_string(),
704 ));
705 }
706 if text.contains(" then ;") || text.trim_end().ends_with(" then") {
707 return Some((
708 "missing_expression_after_operator",
709 "expected target after 'then'".to_string(),
710 expected.to_string(),
711 suggestion.to_string(),
712 ));
713 }
714 if text.contains(" to ;") || text.trim_end().ends_with(" to") {
715 return Some((
716 "missing_expression_after_operator",
717 "expected target after 'to'".to_string(),
718 expected.to_string(),
719 suggestion.to_string(),
720 ));
721 }
722 if text.contains(" by ;") || text.trim_end().ends_with(" by") {
723 return Some((
724 "missing_expression_after_operator",
725 "expected target after 'by'".to_string(),
726 expected.to_string(),
727 suggestion.to_string(),
728 ));
729 }
730 }
731 None
732}
733
734fn invalid_unit_reference_diagnostic(
735 fragment: &[u8],
736) -> Option<(&'static str, String, String, String)> {
737 let fragment = trim_ascii_start(fragment);
738 let text = String::from_utf8_lossy(fragment);
739 if !(text.contains('[') && text.contains(']')) {
740 return None;
741 }
742
743 if text.contains("[]") || text.contains("[ ]") {
744 return Some((
745 "invalid_unit_reference",
746 "expected unit name inside '[ ]'".to_string(),
747 "unit name inside '[ ]'".to_string(),
748 "Use a concrete unit such as `1750 [kg]`.".to_string(),
749 ));
750 }
751
752 if text.contains("[;")
753 || text.contains("[ ;")
754 || text.contains("[)")
755 || text.contains("[ ]")
756 || text.contains("[,")
757 {
758 return Some((
759 "invalid_unit_reference",
760 "invalid unit expression inside '[ ]'".to_string(),
761 "unit name inside '[ ]'".to_string(),
762 "Use a unit symbol or qualified unit name (example: `[kg]` or `[SI::kg]`).".to_string(),
763 ));
764 }
765
766 None
767}
768
769fn unexpected_keyword_in_scope_diagnostic(
770 fragment: &[u8],
771 starters: &[&[u8]],
772 scope_label: &str,
773) -> Option<(&'static str, String, String, String)> {
774 let fragment = trim_ascii_start(fragment);
775 if fragment.is_empty() || fragment.starts_with(b"#") || fragment.starts_with(b"@") {
776 return None;
777 }
778 let keyword_end = fragment
779 .iter()
780 .position(|b| !b.is_ascii_alphanumeric() && *b != b'_')
781 .unwrap_or(fragment.len());
782 if keyword_end == 0 {
783 return None;
784 }
785 let keyword = &fragment[..keyword_end];
786 if lex::starts_with_any_keyword(keyword, starters) {
787 return None;
788 }
789 let keyword_text = String::from_utf8_lossy(keyword);
790 Some((
791 "unexpected_keyword_in_scope",
792 format!("unexpected keyword `{keyword_text}` in {scope_label}"),
793 format!("valid {scope_label} element"),
794 format!("Replace `{keyword_text}` with a valid {scope_label} member or remove it."),
795 ))
796}
797
798fn invalid_bare_identifier_in_body_diagnostic(
799 fragment: &[u8],
800 scope_label: &str,
801) -> Option<(&'static str, String, String, String)> {
802 let is_action = scope_label.contains("action body");
803 let is_state = scope_label.contains("state body");
804 if !is_action && !is_state {
805 return None;
806 }
807
808 let fragment = trim_ascii_start(fragment);
809 let ident_end = fragment
810 .iter()
811 .position(|b| !b.is_ascii_alphanumeric() && *b != b'_')
812 .unwrap_or(fragment.len());
813 if ident_end == 0 || !fragment[0].is_ascii_alphabetic() {
814 return None;
815 }
816
817 let ident = &fragment[..ident_end];
818 let rest = trim_ascii_start(&fragment[ident_end..]);
819 if !(rest.starts_with(b";")
820 || rest.starts_with(b"}")
821 || rest.starts_with(b"\n")
822 || rest.starts_with(b"\r"))
823 {
824 return None;
825 }
826
827 let ident_text = String::from_utf8_lossy(ident);
828 if is_action {
829 Some((
830 "invalid_bare_identifier_in_action_body",
831 format!("bare identifier `{ident_text}` is not a valid action body member"),
832 "action body member such as `perform`, `bind`, `in`, or `out`".to_string(),
833 format!(
834 "Use an explicit action-body form, for example `perform {ident_text};`, `bind ... = ...;`, or an `in`/`out` parameter declaration."
835 ),
836 ))
837 } else {
838 Some((
839 "invalid_bare_identifier_in_state_body",
840 format!("bare identifier `{ident_text}` is not a valid state body member"),
841 "state body member such as `entry`, `transition`, `then`, `state`, or `ref`"
842 .to_string(),
843 format!(
844 "Use an explicit state-body form, for example `then {ident_text};`, `transition ...;`, or a nested `state` member."
845 ),
846 ))
847 }
848}
849
850fn unexpected_closing_brace_parse_error(input: Input<'_>) -> ParseError {
851 ParseError::new("unexpected closing '}'")
852 .with_location(
853 input.location_offset(),
854 input.location_line(),
855 input.get_column(),
856 )
857 .with_length(1)
858 .with_code("unexpected_closing_brace")
859 .with_expected("valid declaration or end of current body")
860 .with_found("}")
861 .with_suggestion("Remove this '}' or add the missing opening '{' before it.")
862 .with_severity(DiagnosticSeverity::Error)
863 .with_category(DiagnosticCategory::ParseError)
864}
865
866fn missing_closing_brace_error(bytes: &[u8], input: Input<'_>) -> Option<ParseError> {
867 if !input.fragment().is_empty() {
868 return None;
869 }
870 let consumed = &bytes[..input.location_offset().min(bytes.len())];
871 let opens = consumed.iter().filter(|&&b| b == b'{').count();
872 let closes = consumed.iter().filter(|&&b| b == b'}').count();
873 if opens <= closes {
874 return None;
875 }
876 Some(missing_closing_brace_error_at_eof(consumed))
877}
878
879fn missing_closing_brace_error_at_eof(bytes: &[u8]) -> ParseError {
880 let (line, column) = eof_line_column(bytes);
881 ParseError::new("missing closing '}'")
882 .with_location(bytes.len(), line, column)
883 .with_length(1)
884 .with_code("missing_closing_brace")
885 .with_expected("'}'")
886 .with_suggestion("Add '}' to close the open body.")
887 .with_category(DiagnosticCategory::ParseError)
888}
889
890fn category_from_code(code: &str) -> DiagnosticCategory {
891 if code == "unsupported_annotation_syntax" {
892 DiagnosticCategory::UnsupportedGrammarForm
893 } else if code == "unresolved_symbol" {
894 DiagnosticCategory::UnresolvedSymbol
895 } else {
896 DiagnosticCategory::ParseError
897 }
898}
899
900fn has_unclosed_brace(bytes: &[u8]) -> bool {
901 let opens = bytes.iter().filter(|&&b| b == b'{').count();
902 let closes = bytes.iter().filter(|&&b| b == b'}').count();
903 opens > closes
904}
905
906fn eof_line_column(bytes: &[u8]) -> (u32, usize) {
907 let mut line = 1u32;
908 let mut column = 1usize;
909 for &b in bytes {
910 if b == b'\n' {
911 line += 1;
912 column = 1;
913 } else {
914 column += 1;
915 }
916 }
917 (line, column)
918}
919
920pub(crate) fn build_recovery_error_node(
921 input: Input<'_>,
922 starters: &[&[u8]],
923 scope_label: &str,
924 generic_code: &str,
925) -> ParseErrorNode {
926 build_recovery_error_node_from_span(input, input, starters, scope_label, generic_code)
927}
928
929enum RecoveryClassification {
930 MissingMemberName {
931 code: String,
932 message: String,
933 expected: String,
934 suggestion: String,
935 },
936 MissingTypeReference {
937 code: String,
938 message: String,
939 expected: String,
940 suggestion: String,
941 },
942 InvalidQualifiedNameSeparator {
943 code: String,
944 message: String,
945 expected: String,
946 suggestion: String,
947 },
948 MissingBodyOrSemicolon {
949 code: String,
950 message: String,
951 expected: String,
952 suggestion: String,
953 },
954 MissingExpressionAfterOperator {
955 code: String,
956 message: String,
957 expected: String,
958 suggestion: String,
959 },
960 InvalidUnitReference {
961 code: String,
962 message: String,
963 expected: String,
964 suggestion: String,
965 },
966 InvalidTypingOperator {
967 code: String,
968 message: String,
969 expected: String,
970 suggestion: String,
971 },
972 InvalidBareIdentifierInBody {
973 code: String,
974 message: String,
975 expected: String,
976 suggestion: String,
977 },
978 UnexpectedKeywordInScope {
979 code: String,
980 message: String,
981 expected: String,
982 suggestion: String,
983 },
984 MissingSemicolon,
985 UnsupportedAnnotation,
986 Unexpected,
987}
988
989fn trim_ascii_end(mut fragment: &[u8]) -> &[u8] {
990 while let Some(last) = fragment.last() {
991 if last.is_ascii_whitespace() {
992 fragment = &fragment[..fragment.len() - 1];
993 } else {
994 break;
995 }
996 }
997 fragment
998}
999
1000fn classify_recovery(
1001 input: Input<'_>,
1002 recovery_end: Input<'_>,
1003 starters: &[&[u8]],
1004 scope_label: &str,
1005) -> RecoveryClassification {
1006 let trimmed = trim_ascii_start(input.fragment());
1007
1008 if let Some((code, message, expected, suggestion)) = missing_name_diagnostic(trimmed) {
1009 return RecoveryClassification::MissingMemberName {
1010 code: code.to_string(),
1011 message,
1012 expected,
1013 suggestion,
1014 };
1015 }
1016
1017 if let Some((code, message, expected, suggestion)) = missing_type_diagnostic(trimmed) {
1018 return RecoveryClassification::MissingTypeReference {
1019 code: code.to_string(),
1020 message,
1021 expected,
1022 suggestion,
1023 };
1024 }
1025
1026 if let Some((code, message, expected, suggestion)) =
1027 invalid_expose_separator_diagnostic(trimmed)
1028 {
1029 return RecoveryClassification::InvalidQualifiedNameSeparator {
1030 code: code.to_string(),
1031 message,
1032 expected,
1033 suggestion,
1034 };
1035 }
1036
1037 if let Some((code, message, expected, suggestion)) = invalid_typing_operator_diagnostic(trimmed)
1038 {
1039 return RecoveryClassification::InvalidTypingOperator {
1040 code: code.to_string(),
1041 message,
1042 expected,
1043 suggestion,
1044 };
1045 }
1046
1047 if let Some((code, message, expected, suggestion)) =
1048 missing_expression_after_operator_diagnostic(trimmed)
1049 {
1050 return RecoveryClassification::MissingExpressionAfterOperator {
1051 code: code.to_string(),
1052 message,
1053 expected,
1054 suggestion,
1055 };
1056 }
1057
1058 if let Some((code, message, expected, suggestion)) = invalid_unit_reference_diagnostic(trimmed)
1059 {
1060 return RecoveryClassification::InvalidUnitReference {
1061 code: code.to_string(),
1062 message,
1063 expected,
1064 suggestion,
1065 };
1066 }
1067
1068 if let Some((code, message, expected, suggestion)) =
1069 missing_semicolon_or_body_diagnostic(trimmed)
1070 {
1071 return RecoveryClassification::MissingBodyOrSemicolon {
1072 code: code.to_string(),
1073 message,
1074 expected,
1075 suggestion,
1076 };
1077 }
1078
1079 let consumed_len = recovery_end
1080 .location_offset()
1081 .saturating_sub(input.location_offset())
1082 .min(input.fragment().len());
1083 let raw_consumed = &input.fragment()[..consumed_len];
1084 let consumed = trim_ascii_end(raw_consumed);
1085 let recovered_to_boundary = recovery_end.location_offset() > input.location_offset() && {
1086 let (next, _) = lex::ws_and_comments(recovery_end).unwrap_or((recovery_end, ()));
1087 next.fragment().is_empty()
1088 || next.fragment().starts_with(b"}")
1089 || lex::starts_with_any_keyword(next.fragment(), starters)
1090 };
1091
1092 let consumed_has_newline = raw_consumed.contains(&b'\n') || raw_consumed.contains(&b'\r');
1093 let first_line_end = consumed
1094 .iter()
1095 .position(|b| matches!(*b, b'\n' | b'\r'))
1096 .unwrap_or(consumed.len());
1097 let first_line = trim_ascii_end(&consumed[..first_line_end]);
1098 let consumed_has_delimiters = consumed
1099 .iter()
1100 .any(|b| matches!(*b, b'{' | b'}' | b'(' | b')' | b'[' | b']'));
1101 let consumed_ends_incomplete = first_line.last().is_some_and(|b| {
1102 matches!(
1103 *b,
1104 b':' | b'=' | b',' | b'.' | b'+' | b'-' | b'*' | b'/' | b'>' | b'<' | b'|'
1105 )
1106 });
1107 let first_line_has_semicolon = first_line.contains(&b';');
1108 if recovered_to_boundary
1109 && lex::starts_with_any_keyword(trimmed, starters)
1110 && (consumed_has_newline || recovery_end.fragment().starts_with(b"}"))
1111 && !consumed.is_empty()
1112 && !consumed_has_delimiters
1113 && !consumed_ends_incomplete
1114 && !first_line_has_semicolon
1115 {
1116 return RecoveryClassification::MissingSemicolon;
1117 }
1118
1119 if lex::starts_with_keyword(trimmed, b"#") || lex::starts_with_keyword(trimmed, b"@") {
1120 return RecoveryClassification::UnsupportedAnnotation;
1121 }
1122
1123 if let Some((code, message, expected, suggestion)) =
1124 invalid_bare_identifier_in_body_diagnostic(trimmed, scope_label)
1125 {
1126 return RecoveryClassification::InvalidBareIdentifierInBody {
1127 code: code.to_string(),
1128 message,
1129 expected,
1130 suggestion,
1131 };
1132 }
1133
1134 if let Some((code, message, expected, suggestion)) =
1135 unexpected_keyword_in_scope_diagnostic(trimmed, starters, scope_label)
1136 {
1137 return RecoveryClassification::UnexpectedKeywordInScope {
1138 code: code.to_string(),
1139 message,
1140 expected,
1141 suggestion,
1142 };
1143 }
1144
1145 RecoveryClassification::Unexpected
1146}
1147
1148pub(crate) fn build_recovery_error_node_from_span(
1149 input: Input<'_>,
1150 recovery_end: Input<'_>,
1151 starters: &[&[u8]],
1152 scope_label: &str,
1153 generic_code: &str,
1154) -> ParseErrorNode {
1155 match classify_recovery(input, recovery_end, starters, scope_label) {
1156 RecoveryClassification::MissingMemberName {
1157 code,
1158 message,
1159 expected,
1160 suggestion,
1161 }
1162 | RecoveryClassification::MissingTypeReference {
1163 code,
1164 message,
1165 expected,
1166 suggestion,
1167 }
1168 | RecoveryClassification::InvalidQualifiedNameSeparator {
1169 code,
1170 message,
1171 expected,
1172 suggestion,
1173 }
1174 | RecoveryClassification::MissingBodyOrSemicolon {
1175 code,
1176 message,
1177 expected,
1178 suggestion,
1179 }
1180 | RecoveryClassification::MissingExpressionAfterOperator {
1181 code,
1182 message,
1183 expected,
1184 suggestion,
1185 }
1186 | RecoveryClassification::InvalidUnitReference {
1187 code,
1188 message,
1189 expected,
1190 suggestion,
1191 }
1192 | RecoveryClassification::InvalidTypingOperator {
1193 code,
1194 message,
1195 expected,
1196 suggestion,
1197 }
1198 | RecoveryClassification::InvalidBareIdentifierInBody {
1199 code,
1200 message,
1201 expected,
1202 suggestion,
1203 }
1204 | RecoveryClassification::UnexpectedKeywordInScope {
1205 code,
1206 message,
1207 expected,
1208 suggestion,
1209 } => ParseErrorNode {
1210 message,
1211 code,
1212 expected: Some(expected),
1213 found: recovery_found_snippet_from_span(input, recovery_end),
1214 suggestion: Some(suggestion),
1215 category: Some(DiagnosticCategory::ParseError),
1216 },
1217 RecoveryClassification::MissingSemicolon => ParseErrorNode {
1218 message: "missing semicolon before next declaration".to_string(),
1219 code: "missing_semicolon".to_string(),
1220 expected: Some("';'".to_string()),
1221 found: recovery_found_snippet_from_span(input, recovery_end),
1222 suggestion: Some("Insert ';' before this declaration.".to_string()),
1223 category: Some(DiagnosticCategory::ParseError),
1224 },
1225 RecoveryClassification::UnsupportedAnnotation => ParseErrorNode {
1226 message: format!("unsupported annotation syntax in {scope_label}"),
1227 code: "unsupported_annotation_syntax".to_string(),
1228 expected: Some(format!("valid {scope_label} element")),
1229 found: recovery_found_snippet_from_span(input, recovery_end),
1230 suggestion: Some(
1231 "Remove this annotation or extend the parser to support annotated declarations."
1232 .to_string(),
1233 ),
1234 category: Some(DiagnosticCategory::UnsupportedGrammarForm),
1235 },
1236 RecoveryClassification::Unexpected => ParseErrorNode {
1237 message: format!("unexpected token in {scope_label}"),
1238 code: generic_code.to_string(),
1239 expected: Some(format!("valid {scope_label} element")),
1240 found: recovery_found_snippet_from_span(input, recovery_end),
1241 suggestion: Some(format!("Fix this {scope_label} member and re-run parsing.")),
1242 category: Some(DiagnosticCategory::ParseError),
1243 },
1244 }
1245}
1246
1247fn parse_error_from_recovery_node(span: &crate::ast::Span, node: &ParseErrorNode) -> ParseError {
1248 let mut err = ParseError::new(node.message.clone())
1249 .with_location(span.offset, span.line, span.column)
1250 .with_length(span.len.max(1))
1251 .with_code(node.code.clone())
1252 .with_category(
1253 node.category
1254 .unwrap_or_else(|| category_from_code(node.code.as_str())),
1255 );
1256 let severity = if node.code == "unsupported_annotation_syntax" {
1257 DiagnosticSeverity::Warning
1258 } else {
1259 DiagnosticSeverity::Error
1260 };
1261 err = err.with_severity(severity);
1262 if let Some(expected) = &node.expected {
1263 err = err.with_expected(expected.clone());
1264 }
1265 if let Some(found) = &node.found {
1266 err = err.with_found(found.clone());
1267 }
1268 if let Some(suggestion) = &node.suggestion {
1269 err = err.with_suggestion(suggestion.clone());
1270 }
1271 err
1272}
1273
1274fn diagnostic_specificity(err: &ParseError) -> u8 {
1275 match err.code.as_deref() {
1276 Some("missing_member_name")
1277 | Some("missing_type_reference")
1278 | Some("invalid_qualified_name_separator")
1279 | Some("invalid_typing_operator")
1280 | Some("missing_expression_after_operator")
1281 | Some("invalid_unit_reference")
1282 | Some("missing_body_or_semicolon")
1283 | Some("missing_semicolon")
1284 | Some("unexpected_closing_brace")
1285 | Some("missing_closing_brace")
1286 | Some("unsupported_annotation_syntax")
1287 | Some("invalid_bare_identifier_in_action_body")
1288 | Some("invalid_bare_identifier_in_state_body")
1289 | Some("recovery_cascade_suppressed")
1290 | Some("unexpected_keyword_in_scope") => 5,
1291 Some("illegal_top_level_definition") => 4,
1292 Some(code) if code.starts_with("recovered_") => 2,
1293 Some("expected_end_of_input") | Some("expected_keyword") => 1,
1294 _ => 3,
1295 }
1296}
1297
1298fn dedup_errors(mut errors: Vec<ParseError>) -> Vec<ParseError> {
1299 errors.sort_by_key(|e| {
1300 (
1301 e.offset.unwrap_or(usize::MAX),
1302 e.line.unwrap_or(u32::MAX),
1303 e.column.unwrap_or(usize::MAX),
1304 std::cmp::Reverse(diagnostic_specificity(e)),
1305 )
1306 });
1307
1308 let mut deduped = Vec::new();
1309 for err in errors {
1310 let duplicate = deduped.iter().any(|existing: &ParseError| {
1311 let same_start = existing.offset == err.offset
1312 && existing.line == err.line
1313 && existing.column == err.column;
1314 let same_found = existing.found == err.found;
1315 let existing_specificity = diagnostic_specificity(existing);
1316 let err_specificity = diagnostic_specificity(&err);
1317 same_start
1318 && (same_found || existing.code == err.code)
1319 && existing_specificity >= err_specificity
1320 });
1321 if !duplicate {
1322 deduped.push(err);
1323 }
1324 }
1325
1326 deduped.sort_by_key(|e| (e.offset.unwrap_or(usize::MAX), e.line.unwrap_or(u32::MAX)));
1327 deduped
1328}
1329
1330fn is_cascade_candidate(err: &ParseError) -> bool {
1331 matches!(err.code.as_deref(), Some("missing_semicolon"))
1332 || err
1333 .code
1334 .as_deref()
1335 .is_some_and(|code| code.starts_with("recovered_"))
1336}
1337
1338fn cascade_family(err: &ParseError) -> Option<&str> {
1339 if matches!(err.code.as_deref(), Some("missing_semicolon")) {
1340 Some("missing_semicolon")
1341 } else if err
1342 .code
1343 .as_deref()
1344 .is_some_and(|code| code.starts_with("recovered_"))
1345 {
1346 Some("recovered")
1347 } else {
1348 None
1349 }
1350}
1351
1352fn make_cascade_summary(run: &[ParseError]) -> Option<ParseError> {
1353 let summary_anchor = run.first()?;
1354 let suppressed = run.len().saturating_sub(3);
1355 let family = cascade_family(summary_anchor).unwrap_or("recovery");
1356 let mut err = ParseError::new(format!(
1357 "suppressed {suppressed} cascading {family} diagnostic{} after earlier recovery errors",
1358 if suppressed == 1 { "" } else { "s" }
1359 ))
1360 .with_location(
1361 summary_anchor.offset?,
1362 summary_anchor.line?,
1363 summary_anchor.column?,
1364 )
1365 .with_length(summary_anchor.length.unwrap_or(1).max(1))
1366 .with_code("recovery_cascade_suppressed")
1367 .with_expected("fix the first syntax error in this body")
1368 .with_suggestion(
1369 "Fix the earliest diagnostic in this body first; later syntax errors may be cascades.",
1370 )
1371 .with_severity(DiagnosticSeverity::Warning)
1372 .with_category(DiagnosticCategory::ParseError);
1373 if let Some(found) = &summary_anchor.found {
1374 err = err.with_found(found.clone());
1375 }
1376 Some(err)
1377}
1378
1379fn suppress_diagnostic_cascades(errors: Vec<ParseError>) -> Vec<ParseError> {
1380 const MAX_UNSUMMARIZED_CASCADE: usize = 3;
1381
1382 let mut output = Vec::new();
1383 let mut run: Vec<ParseError> = Vec::new();
1384
1385 let flush_run = |run: &mut Vec<ParseError>, output: &mut Vec<ParseError>| {
1386 if run.len() <= MAX_UNSUMMARIZED_CASCADE {
1387 output.append(run);
1388 } else {
1389 output.extend(run.drain(..MAX_UNSUMMARIZED_CASCADE));
1390 if let Some(summary) = make_cascade_summary(run) {
1391 output.push(summary);
1392 }
1393 run.clear();
1394 }
1395 };
1396
1397 for err in errors {
1398 let continues_run = run.last().is_some_and(|previous| {
1399 is_cascade_candidate(&err)
1400 && cascade_family(previous) == cascade_family(&err)
1401 && previous.line.zip(err.line).is_some_and(|(a, b)| b <= a + 1)
1402 });
1403
1404 if is_cascade_candidate(&err) && (run.is_empty() || continues_run) {
1405 run.push(err);
1406 } else {
1407 flush_run(&mut run, &mut output);
1408 if is_cascade_candidate(&err) {
1409 run.push(err);
1410 } else {
1411 output.push(err);
1412 }
1413 }
1414 }
1415 flush_run(&mut run, &mut output);
1416 output.sort_by_key(|e| (e.offset.unwrap_or(usize::MAX), e.line.unwrap_or(u32::MAX)));
1417 output
1418}
1419
1420fn root_body_recovery_error(input: Input<'_>, scope: &str) -> ParseError {
1421 let (found, len) = fragment_to_found_snippet(input.fragment());
1422 let mut err = ParseError::new(format!(
1423 "could not parse {scope} body; skipped to next root element"
1424 ))
1425 .with_location(
1426 input.location_offset(),
1427 input.location_line(),
1428 input.get_column(),
1429 )
1430 .with_length(len.max(1))
1431 .with_code("recovered_root_body")
1432 .with_expected(format!("valid {scope} body"))
1433 .with_suggestion(
1434 "Fix the first syntax error in this body; later root-level diagnostics may be cascades.",
1435 )
1436 .with_severity(DiagnosticSeverity::Error)
1437 .with_category(DiagnosticCategory::ParseError);
1438 if !found.is_empty() {
1439 err = err.with_found(found);
1440 }
1441 err
1442}
1443
1444fn root_body_scope(fragment: &[u8]) -> Option<&'static str> {
1445 let fragment = trim_ascii_start(fragment);
1446 if lex::starts_with_keyword(fragment, b"package")
1447 || lex::starts_with_keyword(fragment, b"library")
1448 || lex::starts_with_keyword(fragment, b"standard")
1449 {
1450 Some("package")
1451 } else if lex::starts_with_keyword(fragment, b"namespace") {
1452 Some("namespace")
1453 } else {
1454 None
1455 }
1456}
1457
1458fn collect_requirement_body_errors(body: &RequirementDefBody, errors: &mut Vec<ParseError>) {
1459 if let RequirementDefBody::Brace { elements } = body {
1460 for element in elements {
1461 match &element.value {
1462 RequirementDefBodyElement::Error(n) => {
1463 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1464 }
1465 RequirementDefBodyElement::Frame(n) => {
1466 collect_requirement_body_errors(&n.value.body, errors)
1467 }
1468 _ => {}
1469 }
1470 }
1471 }
1472}
1473
1474fn collect_action_def_body_errors(body: &ActionDefBody, errors: &mut Vec<ParseError>) {
1475 if let ActionDefBody::Brace { elements } = body {
1476 for element in elements {
1477 if let ActionDefBodyElement::Error(n) = &element.value {
1478 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1479 }
1480 }
1481 }
1482}
1483
1484fn collect_action_usage_body_errors(body: &ActionUsageBody, errors: &mut Vec<ParseError>) {
1485 if let ActionUsageBody::Brace { elements } = body {
1486 for element in elements {
1487 match &element.value {
1488 ActionUsageBodyElement::Error(n) => {
1489 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1490 }
1491 ActionUsageBodyElement::ActionUsage(n) => {
1492 collect_action_usage_body_errors(&n.value.body, errors)
1493 }
1494 _ => {}
1495 }
1496 }
1497 }
1498}
1499
1500fn collect_state_body_errors(body: &StateDefBody, errors: &mut Vec<ParseError>) {
1501 if let StateDefBody::Brace { elements } = body {
1502 for element in elements {
1503 match &element.value {
1504 StateDefBodyElement::Error(n) => {
1505 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1506 }
1507 StateDefBodyElement::Entry(n) => collect_state_body_errors(&n.value.body, errors),
1508 StateDefBodyElement::RequirementUsage(n) => {
1509 collect_requirement_body_errors(&n.value.body, errors)
1510 }
1511 StateDefBodyElement::StateUsage(n) => {
1512 collect_state_body_errors(&n.value.body, errors)
1513 }
1514 _ => {}
1515 }
1516 }
1517 }
1518}
1519
1520fn collect_use_case_body_errors(body: &UseCaseDefBody, errors: &mut Vec<ParseError>) {
1521 if let UseCaseDefBody::Brace { elements } = body {
1522 for element in elements {
1523 if let UseCaseDefBodyElement::Error(n) = &element.value {
1524 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1525 }
1526 }
1527 }
1528}
1529
1530fn collect_constraint_body_errors(body: &ConstraintDefBody, errors: &mut Vec<ParseError>) {
1531 if let ConstraintDefBody::Brace { elements } = body {
1532 for element in elements {
1533 if let ConstraintDefBodyElement::Error(n) = &element.value {
1534 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1535 }
1536 }
1537 }
1538}
1539
1540fn collect_calc_body_errors(body: &CalcDefBody, errors: &mut Vec<ParseError>) {
1541 if let CalcDefBody::Brace { elements } = body {
1542 for element in elements {
1543 if let CalcDefBodyElement::Error(n) = &element.value {
1544 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1545 }
1546 }
1547 }
1548}
1549
1550fn collect_view_def_body_errors(body: &ViewDefBody, errors: &mut Vec<ParseError>) {
1551 if let ViewDefBody::Brace { elements } = body {
1552 for element in elements {
1553 if let ViewDefBodyElement::Error(n) = &element.value {
1554 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1555 }
1556 }
1557 }
1558}
1559
1560fn collect_view_body_errors(body: &ViewBody, errors: &mut Vec<ParseError>) {
1561 if let ViewBody::Brace { elements } = body {
1562 for element in elements {
1563 if let ViewBodyElement::Error(n) = &element.value {
1564 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1565 }
1566 }
1567 }
1568}
1569
1570fn collect_part_def_body_errors(body: &PartDefBody, errors: &mut Vec<ParseError>) {
1571 if let PartDefBody::Brace { elements } = body {
1572 for element in elements {
1573 match &element.value {
1574 PartDefBodyElement::Error(n) => {
1575 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1576 }
1577 PartDefBodyElement::PartUsage(n) => {
1578 collect_part_usage_body_errors(&n.value.body, errors)
1579 }
1580 PartDefBodyElement::Perform(n) => {
1581 collect_perform_body_errors(&n.value.body, errors)
1582 }
1583 _ => {}
1584 }
1585 }
1586 }
1587}
1588
1589fn collect_perform_body_errors(body: &crate::ast::PerformBody, _errors: &mut Vec<ParseError>) {
1590 match body {
1591 crate::ast::PerformBody::Semicolon => {}
1592 crate::ast::PerformBody::Brace { .. } => {}
1593 }
1594}
1595
1596fn collect_part_usage_body_errors(body: &PartUsageBody, errors: &mut Vec<ParseError>) {
1597 if let PartUsageBody::Brace { elements } = body {
1598 for element in elements {
1599 match &element.value {
1600 PartUsageBodyElement::Error(n) => {
1601 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1602 }
1603 PartUsageBodyElement::PartUsage(n) => {
1604 collect_part_usage_body_errors(&n.value.body, errors)
1605 }
1606 PartUsageBodyElement::Perform(n) => {
1607 collect_perform_body_errors(&n.value.body, errors)
1608 }
1609 PartUsageBodyElement::StateUsage(n) => {
1610 collect_state_body_errors(&n.value.body, errors)
1611 }
1612 _ => {}
1613 }
1614 }
1615 }
1616}
1617
1618fn collect_package_body_errors(body: &PackageBody, errors: &mut Vec<ParseError>) {
1619 if let PackageBody::Brace { elements } = body {
1620 for element in elements {
1621 match &element.value {
1622 PackageBodyElement::Error(n) => {
1623 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1624 }
1625 PackageBodyElement::Package(n) => {
1626 collect_package_body_errors(&n.value.body, errors)
1627 }
1628 PackageBodyElement::LibraryPackage(n) => {
1629 collect_package_body_errors(&n.value.body, errors)
1630 }
1631 PackageBodyElement::PartDef(n) => {
1632 collect_part_def_body_errors(&n.value.body, errors)
1633 }
1634 PackageBodyElement::PartUsage(n) => {
1635 collect_part_usage_body_errors(&n.value.body, errors)
1636 }
1637 PackageBodyElement::ActionDef(n) => {
1638 collect_action_def_body_errors(&n.value.body, errors)
1639 }
1640 PackageBodyElement::ActionUsage(n) => {
1641 collect_action_usage_body_errors(&n.value.body, errors)
1642 }
1643 PackageBodyElement::RequirementDef(n) => {
1644 collect_requirement_body_errors(&n.value.body, errors)
1645 }
1646 PackageBodyElement::RequirementUsage(n) => {
1647 collect_requirement_body_errors(&n.value.body, errors)
1648 }
1649 PackageBodyElement::UseCaseDef(n) => {
1650 collect_use_case_body_errors(&n.value.body, errors)
1651 }
1652 PackageBodyElement::UseCaseUsage(n) => {
1653 collect_use_case_body_errors(&n.value.body, errors)
1654 }
1655 PackageBodyElement::ConcernUsage(n) => {
1656 collect_requirement_body_errors(&n.value.body, errors)
1657 }
1658 PackageBodyElement::StateDef(n) => collect_state_body_errors(&n.value.body, errors),
1659 PackageBodyElement::StateUsage(n) => {
1660 collect_state_body_errors(&n.value.body, errors)
1661 }
1662 PackageBodyElement::ConstraintDef(n) => {
1663 collect_constraint_body_errors(&n.value.body, errors)
1664 }
1665 PackageBodyElement::CalcDef(n) => collect_calc_body_errors(&n.value.body, errors),
1666 PackageBodyElement::ViewDef(n) => {
1667 collect_view_def_body_errors(&n.value.body, errors)
1668 }
1669 PackageBodyElement::ViewUsage(n) => collect_view_body_errors(&n.value.body, errors),
1670 _ => {}
1671 }
1672 }
1673 }
1674}
1675
1676fn collect_recovery_errors(root: &RootNamespace) -> Vec<ParseError> {
1677 let mut errors = Vec::new();
1678 for element in &root.elements {
1679 match &element.value {
1680 crate::ast::RootElement::Package(n) => {
1681 collect_package_body_errors(&n.value.body, &mut errors)
1682 }
1683 crate::ast::RootElement::LibraryPackage(n) => {
1684 collect_package_body_errors(&n.value.body, &mut errors)
1685 }
1686 crate::ast::RootElement::Namespace(n) => {
1687 collect_package_body_errors(&n.value.body, &mut errors)
1688 }
1689 crate::ast::RootElement::Import(_) => {}
1690 }
1691 }
1692 errors
1693}
1694
1695#[allow(clippy::result_large_err)]
1697pub fn parse_root(input: &str) -> Result<RootNamespace, ParseError> {
1698 let bytes = input
1699 .strip_prefix('\u{FEFF}')
1700 .map(str::as_bytes)
1701 .unwrap_or_else(|| input.as_bytes());
1702 let located = LocatedSpan::new(bytes);
1703 match package::root_namespace(located) {
1704 Ok((rest, root)) => {
1705 if !rest.fragment().is_empty() && has_unclosed_brace(bytes) {
1706 return Err(missing_closing_brace_error_at_eof(bytes));
1707 }
1708 if rest.fragment().is_empty() {
1709 log::debug!("parse_root: success, {} top-level elements", root.elements.len());
1710 Ok(root)
1711 } else if trim_ascii_start(rest.fragment()).starts_with(b"}") {
1712 Err(unexpected_closing_brace_parse_error(rest))
1713 } else {
1714 let offset = located.location_offset() + located.fragment().len() - rest.fragment().len();
1715 let unconsumed = rest.fragment();
1716 let first_80 = unconsumed.get(..80.min(unconsumed.len())).unwrap_or(unconsumed);
1717 log::debug!(
1718 "parse_root: expected end of input; parsed {} elements; unconsumed len={}, offset={}, first 80 bytes: {:?}",
1719 root.elements.len(),
1720 unconsumed.len(),
1721 offset,
1722 first_80,
1723 );
1724 log::debug!(
1725 "parse_root: unconsumed as str: {:?}",
1726 String::from_utf8_lossy(first_80),
1727 );
1728 let (found_snippet, found_len) = fragment_to_found_snippet(rest.fragment());
1729 let mut pe = ParseError::new("expected end of input")
1730 .with_location(offset, rest.location_line(), rest.get_column())
1731 .with_length(found_len.max(1))
1732 .with_code("expected_end_of_input")
1733 .with_category(DiagnosticCategory::ParseError);
1734 if !found_snippet.is_empty() {
1735 pe = pe.with_found(found_snippet);
1736 }
1737 if root.elements.is_empty() && is_illegal_top_level_definition(rest.fragment()) {
1738 pe = pe
1739 .with_code("illegal_top_level_definition")
1740 .with_expected("'package', 'namespace', or 'import'")
1741 .with_suggestion(
1742 "Wrap this declaration in `package ... { ... }` or `namespace ... { ... }`.",
1743 );
1744 pe.message = "illegal top-level definition".to_string();
1745 }
1746 Err(pe)
1747 }
1748 }
1749 Err(nom::Err::Error(e)) => Err(missing_closing_brace_error(bytes, e.input).unwrap_or_else(|| {
1750 nom_err_to_parse_error(
1751 &e,
1752 None,
1753 Some("'package', 'namespace', or 'import' at top level; or valid element in package body"),
1754 )
1755 })),
1756 Err(nom::Err::Failure(e)) => Err(missing_closing_brace_error(bytes, e.input).unwrap_or_else(|| {
1757 nom_err_to_parse_error(
1758 &e,
1759 None,
1760 Some("'package', 'namespace', or 'import' at top level; or valid element in package body"),
1761 )
1762 })),
1763 Err(nom::Err::Incomplete(_)) => Err(
1764 ParseError::new("unexpected end of input")
1765 .with_code("unexpected_eof")
1766 .with_category(DiagnosticCategory::ParseError),
1767 ),
1768 }
1769}
1770
1771const MAX_RECOVERY_ERRORS: usize = 100;
1772
1773pub fn parse_with_diagnostics(input: &str) -> ParseResult {
1776 let bytes = input
1777 .strip_prefix('\u{FEFF}')
1778 .map(str::as_bytes)
1779 .unwrap_or_else(|| input.as_bytes());
1780 let located = LocatedSpan::new(bytes);
1781
1782 let mut elements = Vec::new();
1783 let mut errors = Vec::new();
1784
1785 let (mut input, _) = match lex::ws_and_comments(located) {
1786 Ok(x) => x,
1787 Err(_) => {
1788 return ParseResult {
1789 root: RootNamespace { elements: vec![] },
1790 errors: vec![ParseError::new("invalid input")
1791 .with_code("invalid_input")
1792 .with_category(DiagnosticCategory::ParseError)],
1793 };
1794 }
1795 };
1796
1797 while errors.len() < MAX_RECOVERY_ERRORS {
1798 let (rest, _) = lex::ws_and_comments(input).unwrap_or((input, ()));
1800 input = rest;
1801 if input.fragment().is_empty() {
1802 break;
1803 }
1804 match package::root_element(input) {
1805 Ok((rest, elem)) => {
1806 elements.push(elem);
1807 input = rest;
1808 }
1809 Err(nom::Err::Error(e)) | Err(nom::Err::Failure(e)) => {
1810 let (trimmed, _) = lex::ws_and_comments(input).unwrap_or((input, ()));
1811 if trim_ascii_start(trimmed.fragment()).starts_with(b"}") {
1812 errors.push(unexpected_closing_brace_parse_error(trimmed));
1813 let skip_result = lex::skip_to_next_sync_point(trimmed);
1814 match skip_result {
1815 Ok((rest, _)) => input = rest,
1816 Err(_) => break,
1817 }
1818 continue;
1819 }
1820 if errors.is_empty()
1821 && has_unclosed_brace(bytes)
1822 && (lex::starts_with_keyword(trimmed.fragment(), b"package")
1823 || lex::starts_with_keyword(trimmed.fragment(), b"namespace")
1824 || lex::starts_with_keyword(trimmed.fragment(), b"library")
1825 || lex::starts_with_keyword(trimmed.fragment(), b"standard"))
1826 {
1827 errors.push(missing_closing_brace_error_at_eof(bytes));
1828 break;
1829 }
1830 if let Some(scope) = root_body_scope(input.fragment()) {
1831 let (error_input, _) = lex::ws_and_comments(e.input).unwrap_or((e.input, ()));
1832 if error_input.fragment().starts_with(b"{") {
1833 errors.push(root_body_recovery_error(error_input, scope));
1834 match lex::skip_statement_or_block(error_input) {
1835 Ok((rest, _))
1836 if rest.location_offset() > error_input.location_offset() =>
1837 {
1838 input = rest;
1839 continue;
1840 }
1841 _ => {}
1842 }
1843 }
1844 }
1845 let pe = missing_closing_brace_error(bytes, e.input).unwrap_or_else(|| {
1846 nom_err_to_parse_error(&e, None, Some("'package', 'namespace', or 'import'"))
1847 });
1848 errors.push(pe);
1849 let skip_result = lex::skip_to_next_sync_point(e.input);
1850 match skip_result {
1851 Ok((rest, _)) => input = rest,
1852 Err(_) => break,
1853 }
1854 }
1855 Err(nom::Err::Incomplete(_)) => {
1856 errors.push(
1857 ParseError::new("unexpected end of input")
1858 .with_location(
1859 input.location_offset(),
1860 input.location_line(),
1861 input.get_column(),
1862 )
1863 .with_length(1)
1864 .with_code("unexpected_eof")
1865 .with_category(DiagnosticCategory::ParseError),
1866 );
1867 break;
1868 }
1869 }
1870 }
1871
1872 let (input, _) = lex::ws_and_comments(input).unwrap_or((input, ()));
1873
1874 if input.fragment().is_empty()
1875 && has_unclosed_brace(bytes)
1876 && !errors
1877 .iter()
1878 .any(|e| e.code.as_deref() == Some("missing_closing_brace"))
1879 {
1880 errors.push(missing_closing_brace_error_at_eof(bytes));
1881 }
1882
1883 if !input.fragment().is_empty()
1884 && !errors
1885 .iter()
1886 .any(|e| e.code.as_deref() == Some("missing_closing_brace"))
1887 {
1888 if trim_ascii_start(input.fragment()).starts_with(b"}") {
1889 errors.push(unexpected_closing_brace_parse_error(input));
1890 } else {
1891 let (found_snippet, found_len) = fragment_to_found_snippet(input.fragment());
1892 let mut pe = ParseError::new("expected end of input")
1893 .with_location(
1894 input.location_offset(),
1895 input.location_line(),
1896 input.get_column(),
1897 )
1898 .with_length(found_len.max(1))
1899 .with_code("expected_end_of_input")
1900 .with_severity(DiagnosticSeverity::Error)
1901 .with_category(DiagnosticCategory::ParseError);
1902 if !found_snippet.is_empty() {
1903 pe = pe.with_found(found_snippet);
1904 }
1905 errors.push(pe);
1906 }
1907 }
1908
1909 errors.extend(collect_recovery_errors(&RootNamespace {
1910 elements: elements.clone(),
1911 }));
1912 errors = dedup_errors(errors);
1913 errors = suppress_diagnostic_cascades(errors);
1914
1915 ParseResult {
1916 root: RootNamespace { elements },
1917 errors,
1918 }
1919}