1mod action;
11mod alias;
12mod allocation;
13mod attribute;
14mod case;
15mod connection;
16mod body;
17mod constraint;
18mod definition_prefix;
19mod dependency;
20mod enumeration;
21mod expr;
22mod flow;
23mod import;
24mod individual;
25mod interface;
26mod item;
27mod lex;
28mod metadata;
29mod metadata_annotation;
30mod occurrence;
31mod package;
32mod part;
33mod port;
34mod requirement;
35mod specialization;
36mod span;
37mod state;
38mod usecase;
39mod view;
40
41pub(crate) use span::{node_from_to, span_from_to, with_span, Input};
42use crate::ast::{
43 ActionDefBody, ActionDefBodyElement, ActionUsageBody, ActionUsageBodyElement, CalcDefBody,
44 CalcDefBodyElement, ConstraintDefBody, ConstraintDefBodyElement, PackageBody,
45 PackageBodyElement, ParseErrorNode, PartDefBody, PartDefBodyElement, PartUsageBody,
46 PartUsageBodyElement, RequirementDefBody, RequirementDefBodyElement, RootNamespace,
47 StateDefBody, StateDefBodyElement, UseCaseDefBody, UseCaseDefBodyElement, ViewBody,
48 ViewBodyElement, ViewDefBody, ViewDefBodyElement,
49};
50use crate::error::{DiagnosticCategory, DiagnosticSeverity, ParseError};
51use nom::error::Error;
52use nom_locate::LocatedSpan;
53
54#[derive(Debug, Clone)]
56pub struct ParseResult {
57 pub root: RootNamespace,
59 pub errors: Vec<ParseError>,
61}
62
63impl ParseResult {
64 pub fn is_ok(&self) -> bool {
66 self.errors.is_empty()
67 }
68}
69
70const FOUND_SNIPPET_MAX_LEN: usize = 40;
71const ILLEGAL_TOP_LEVEL_STARTERS: &[&[u8]] = &[
72 b"action",
73 b"actor",
74 b"alias",
75 b"allocate",
76 b"allocation",
77 b"attribute",
78 b"bind",
79 b"calc",
80 b"case",
81 b"concern",
82 b"connection",
83 b"constraint",
84 b"dependency",
85 b"enum",
86 b"flow",
87 b"interface",
88 b"item",
89 b"metadata",
90 b"occurrence",
91 b"part",
92 b"perform",
93 b"port",
94 b"ref",
95 b"require",
96 b"requirement",
97 b"satisfy",
98 b"state",
99 b"use",
100 b"verification",
101 b"view",
102 b"viewpoint",
103];
104
105fn fragment_to_found_snippet(fragment: &[u8]) -> (String, usize) {
108 let take = fragment
109 .iter()
110 .position(|&b| b == b'\n' || b == b'\r')
111 .map(|p| p.min(FOUND_SNIPPET_MAX_LEN))
112 .unwrap_or_else(|| fragment.len().min(FOUND_SNIPPET_MAX_LEN));
113 let slice = fragment.get(..take).unwrap_or(fragment);
114 let s = String::from_utf8_lossy(slice)
115 .replace('\n', "\\n")
116 .replace('\r', "\\r");
117 let len = slice.len();
118 (s.trim_end().to_string(), len)
119}
120
121pub(crate) fn recovery_found_snippet(input: Input<'_>) -> Option<String> {
122 let frag = input.fragment();
123 let take = frag
124 .iter()
125 .position(|&b| b == b'\n' || b == b'\r')
126 .unwrap_or(frag.len())
127 .min(60);
128 let snippet = String::from_utf8_lossy(&frag[..take]).trim().to_string();
129 if snippet.is_empty() {
130 None
131 } else {
132 Some(snippet)
133 }
134}
135
136fn recovery_found_snippet_from_span(input: Input<'_>, recovery_end: Input<'_>) -> Option<String> {
137 let consumed_len = recovery_end
138 .location_offset()
139 .saturating_sub(input.location_offset())
140 .min(input.fragment().len());
141 if consumed_len == 0 {
142 return recovery_found_snippet(input);
143 }
144 let frag = &input.fragment()[..consumed_len];
145 let take = frag
146 .iter()
147 .position(|&b| b == b'\n' || b == b'\r')
148 .unwrap_or(frag.len())
149 .min(60);
150 let snippet = String::from_utf8_lossy(&frag[..take]).trim().to_string();
151 if snippet.is_empty() {
152 recovery_found_snippet(input)
153 } else {
154 Some(snippet)
155 }
156}
157
158fn nom_error_kind_to_message(code: &nom::error::ErrorKind) -> &'static str {
160 use nom::error::ErrorKind;
161 match code {
162 ErrorKind::Tag => "expected keyword or token",
163 ErrorKind::Digit => "expected number",
164 ErrorKind::Alpha => "expected identifier",
165 ErrorKind::AlphaNumeric => "expected identifier",
166 ErrorKind::Space => "expected whitespace",
167 ErrorKind::MultiSpace => "expected whitespace",
168 ErrorKind::Eof => "unexpected end of input",
169 ErrorKind::TakeUntil => "expected terminator",
170 ErrorKind::TakeWhile1 => "expected token",
171 ErrorKind::Alt => {
172 "expected package, import, part, port, interface, alias, attribute, or action"
173 }
174 ErrorKind::Many0 | ErrorKind::Many1 => "expected list of elements",
175 _ => "parse error",
176 }
177}
178
179fn nom_error_kind_to_code(code: &nom::error::ErrorKind) -> &'static str {
181 use nom::error::ErrorKind;
182 match code {
183 ErrorKind::Tag => "expected_keyword",
184 ErrorKind::Digit => "expected_number",
185 ErrorKind::Alpha | ErrorKind::AlphaNumeric => "expected_identifier",
186 ErrorKind::Space | ErrorKind::MultiSpace => "expected_whitespace",
187 ErrorKind::Eof => "unexpected_eof",
188 ErrorKind::TakeUntil => "expected_terminator",
189 ErrorKind::TakeWhile1 => "expected_token",
190 ErrorKind::Alt => "expected_alt",
191 ErrorKind::Many0 | ErrorKind::Many1 => "expected_list",
192 _ => "parse_error",
193 }
194}
195
196fn nom_err_to_parse_error(
197 e: &Error<Input<'_>>,
198 length_override: Option<usize>,
199 expected_context: Option<&'static str>,
200) -> ParseError {
201 let offset = e.input.location_offset();
202 let line = e.input.location_line();
203 let column = e.input.get_column();
204 let fragment = e.input.fragment();
205 let (found_snippet, found_len) = fragment_to_found_snippet(fragment);
206 let message = nom_error_kind_to_message(&e.code).to_string();
207 let span_len = length_override.unwrap_or(found_len).max(1);
208 if trim_ascii_start(fragment).starts_with(b"}") {
209 return unexpected_closing_brace_parse_error(e.input);
210 }
211 let mut pe = ParseError::new(message)
212 .with_location(offset, line, column)
213 .with_length(span_len)
214 .with_code(nom_error_kind_to_code(&e.code))
215 .with_severity(DiagnosticSeverity::Error)
216 .with_category(DiagnosticCategory::ParseError);
217 if !found_snippet.is_empty() {
218 pe = pe.with_found(found_snippet);
219 }
220 if let Some(ctx) = expected_context {
221 pe = pe.with_expected(ctx);
222 }
223 let at_root = expected_context.is_some_and(|ctx| {
224 ctx.contains("'package', 'namespace', or 'import'") || ctx.contains("top level")
225 });
226 if at_root && is_illegal_top_level_definition(fragment) {
227 pe.message = "illegal top-level definition".to_string();
228 pe.code = Some("illegal_top_level_definition".to_string());
229 pe.expected = Some("'package', 'namespace', or 'import'".to_string());
230 pe.suggestion = Some(
231 "Wrap this declaration in `package ... { ... }` or `namespace ... { ... }`."
232 .to_string(),
233 );
234 }
235 pe
236}
237
238fn is_illegal_top_level_definition(fragment: &[u8]) -> bool {
239 let trimmed = trim_ascii_start(fragment);
240 !trimmed.starts_with(b"}")
241 && !trimmed.starts_with(b"//")
242 && !trimmed.starts_with(b"/*")
243 && lex::starts_with_any_keyword(trimmed, ILLEGAL_TOP_LEVEL_STARTERS)
244}
245
246fn trim_ascii_start(mut fragment: &[u8]) -> &[u8] {
247 while let Some(first) = fragment.first() {
248 if first.is_ascii_whitespace() {
249 fragment = &fragment[1..];
250 continue;
251 }
252 break;
253 }
254 fragment
255}
256
257fn starts_with_missing_name_after_keyword(
258 fragment: &[u8],
259 keyword: &[u8],
260 trailing_keywords: &[&[u8]],
261) -> bool {
262 let mut fragment = trim_ascii_start(fragment);
263 if !lex::starts_with_keyword(fragment, keyword) {
264 return false;
265 }
266 fragment = &fragment[keyword.len()..];
267 while let Some(first) = fragment.first() {
268 if first.is_ascii_whitespace() {
269 fragment = &fragment[1..];
270 continue;
271 }
272 break;
273 }
274 for trailing in trailing_keywords {
275 if lex::starts_with_keyword(fragment, trailing) {
276 fragment = &fragment[trailing.len()..];
277 while let Some(first) = fragment.first() {
278 if first.is_ascii_whitespace() {
279 fragment = &fragment[1..];
280 continue;
281 }
282 break;
283 }
284 }
285 }
286 fragment.starts_with(b":")
287 && !lex::starts_with_keyword(fragment, b":>>")
288 && !lex::starts_with_keyword(fragment, b":>")
289 && !lex::starts_with_keyword(fragment, b"::")
290}
291
292fn starts_with_missing_type_after_keyword(
293 fragment: &[u8],
294 keyword: &[u8],
295 trailing_keywords: &[&[u8]],
296) -> bool {
297 let mut fragment = trim_ascii_start(fragment);
298 if !lex::starts_with_keyword(fragment, keyword) {
299 return false;
300 }
301 fragment = &fragment[keyword.len()..];
302 while let Some(first) = fragment.first() {
303 if first.is_ascii_whitespace() {
304 fragment = &fragment[1..];
305 continue;
306 }
307 break;
308 }
309 for trailing in trailing_keywords {
310 if lex::starts_with_keyword(fragment, trailing) {
311 fragment = &fragment[trailing.len()..];
312 while let Some(first) = fragment.first() {
313 if first.is_ascii_whitespace() {
314 fragment = &fragment[1..];
315 continue;
316 }
317 break;
318 }
319 }
320 }
321
322 let mut name_len = 0usize;
323 while name_len < fragment.len()
324 && (fragment[name_len].is_ascii_alphanumeric() || fragment[name_len] == b'_')
325 {
326 name_len += 1;
327 }
328 if name_len == 0 {
329 return false;
330 }
331 fragment = &fragment[name_len..];
332 while let Some(first) = fragment.first() {
333 if first.is_ascii_whitespace() {
334 fragment = &fragment[1..];
335 continue;
336 }
337 break;
338 }
339 if !fragment.starts_with(b":") {
340 return false;
341 }
342 fragment = &fragment[1..];
343 while let Some(first) = fragment.first() {
344 if first.is_ascii_whitespace() {
345 fragment = &fragment[1..];
346 continue;
347 }
348 break;
349 }
350
351 fragment.is_empty()
352 || fragment.starts_with(b";")
353 || fragment.starts_with(b"{")
354 || fragment.starts_with(b"}")
355 || lex::starts_with_keyword(fragment, b"then")
356 || lex::starts_with_keyword(fragment, b"if")
357 || lex::starts_with_keyword(fragment, b"do")
358}
359
360fn missing_name_diagnostic(fragment: &[u8]) -> Option<(&'static str, String, String, String)> {
361 #[allow(clippy::type_complexity)]
362 let cases: &[(&[u8], &[&[u8]], &str, &str)] = &[
363 (
364 b"subject",
365 &[],
366 "subject name",
367 "Use `subject laptop: Laptop;`.",
368 ),
369 (b"actor", &[], "actor name", "Use `actor user: User;`."),
370 (b"state", &[], "state name", "Use `state ready: Mode;`."),
371 (b"part", &[], "part name", "Use `part wheel: Wheel;`."),
372 (b"ref", &[], "reference name", "Use `ref sensor: Sensor;`."),
373 (b"port", &[], "port name", "Use `port power: PowerPort;`."),
374 (
375 b"attribute",
376 &[],
377 "attribute name",
378 "Use `attribute mass: MassValue;`.",
379 ),
380 (b"in", &[], "input name", "Use `in speed: Real;`."),
381 (b"out", &[], "output name", "Use `out result: Real;`."),
382 (
383 b"perform",
384 &[b"action"],
385 "action name",
386 "Use `perform action run: Runner;`.",
387 ),
388 (b"return", &[], "return name", "Use `return result: Real;`."),
389 ];
390
391 for (keyword, trailing, missing_what, suggestion) in cases {
392 if starts_with_missing_name_after_keyword(fragment, keyword, trailing) {
393 return Some((
394 "missing_member_name",
395 format!("expected {missing_what} before ':'"),
396 format!("{missing_what} before ':'"),
397 suggestion.to_string(),
398 ));
399 }
400 }
401 None
402}
403
404fn missing_type_diagnostic(fragment: &[u8]) -> Option<(&'static str, String, String, String)> {
405 #[allow(clippy::type_complexity)]
406 let cases: &[(&[u8], &[&[u8]], &str)] = &[
407 (b"subject", &[], "subject type"),
408 (b"actor", &[], "actor type"),
409 (b"state", &[], "state type"),
410 (b"part", &[], "part type"),
411 (b"ref", &[], "reference type"),
412 (b"port", &[], "port type"),
413 (b"attribute", &[], "attribute type"),
414 (b"in", &[], "input type"),
415 (b"out", &[], "output type"),
416 (b"perform", &[b"action"], "action type"),
417 (b"return", &[], "return type"),
418 ];
419
420 for &(keyword, trailing, missing_what) in cases {
421 if starts_with_missing_type_after_keyword(fragment, keyword, trailing) {
422 let keyword_label = String::from_utf8_lossy(keyword);
423 let sample_name = if keyword == &b"subject"[..] {
424 "laptop"
425 } else if keyword == &b"actor"[..] {
426 "user"
427 } else if keyword == &b"state"[..] {
428 "ready"
429 } else if keyword == &b"part"[..] {
430 "wheel"
431 } else if keyword == &b"ref"[..] {
432 "sensor"
433 } else if keyword == &b"port"[..] {
434 "power"
435 } else if keyword == &b"attribute"[..] {
436 "mass"
437 } else if keyword == &b"in"[..] {
438 "speed"
439 } else if keyword == &b"out"[..] {
440 "result"
441 } else if keyword == &b"perform"[..] {
442 "run"
443 } else if keyword == &b"return"[..] {
444 "result"
445 } else {
446 "member"
447 };
448 let sample_type = if keyword == &b"subject"[..] {
449 "Laptop"
450 } else if keyword == &b"actor"[..] {
451 "User"
452 } else if keyword == &b"state"[..] {
453 "Mode"
454 } else if keyword == &b"part"[..] {
455 "Wheel"
456 } else if keyword == &b"ref"[..] {
457 "Sensor"
458 } else if keyword == &b"port"[..] {
459 "PowerPort"
460 } else if keyword == &b"attribute"[..] {
461 "MassValue"
462 } else if keyword == &b"in"[..] || keyword == &b"out"[..] {
463 "Real"
464 } else if keyword == &b"perform"[..] {
465 "Runner"
466 } else if keyword == &b"return"[..] {
467 "Real"
468 } else {
469 "Type"
470 };
471 let suggestion = if keyword == &b"perform"[..] {
472 format!("Use `perform action {sample_name}: {sample_type};`.")
473 } else if keyword == &b"return"[..] {
474 format!("Use `return {sample_name}: {sample_type};`.")
475 } else {
476 format!("Use `{keyword_label} {sample_name}: {sample_type};`.")
477 };
478 return Some((
479 "missing_type_reference",
480 format!("expected {missing_what} after ':'"),
481 format!("{missing_what} after ':'"),
482 suggestion,
483 ));
484 }
485 }
486 None
487}
488
489fn invalid_expose_separator_diagnostic(
490 fragment: &[u8],
491) -> Option<(&'static str, String, String, String)> {
492 let mut fragment = trim_ascii_start(fragment);
493 if !lex::starts_with_keyword(fragment, b"expose") {
494 return None;
495 }
496 fragment = &fragment[b"expose".len()..];
497 while let Some(first) = fragment.first() {
498 if first.is_ascii_whitespace() {
499 fragment = &fragment[1..];
500 continue;
501 }
502 break;
503 }
504 if fragment.is_empty() {
505 return None;
506 }
507
508 let mut saw_dot = false;
509 let mut in_quoted_name = false;
510 for &b in fragment {
511 if b == b'\'' {
512 in_quoted_name = !in_quoted_name;
513 continue;
514 }
515 if in_quoted_name {
516 continue;
517 }
518 if matches!(b, b';' | b'[' | b'{' | b'}' | b'\n' | b'\r') {
519 break;
520 }
521 if b == b'.' {
522 saw_dot = true;
523 break;
524 }
525 }
526 if !saw_dot {
527 return None;
528 }
529
530 Some((
531 "invalid_qualified_name_separator",
532 "invalid qualified name in expose target: use '::' instead of '.'".to_string(),
533 "qualified name segments separated by '::'".to_string(),
534 "Replace '.' with '::' in the expose target (example: `expose A::B;`).".to_string(),
535 ))
536}
537
538fn missing_semicolon_or_body_diagnostic(
539 fragment: &[u8],
540) -> Option<(&'static str, String, String, String)> {
541 let fragment = trim_ascii_start(fragment);
542 let cases: &[(&[u8], &str, &str)] = &[
543 (
544 b"action def",
545 "action definition",
546 "Use `action def Run;` or `action def Run { ... }`.",
547 ),
548 (
549 b"part def",
550 "part definition",
551 "Use `part def Wheel;` or `part def Wheel { ... }`.",
552 ),
553 (
554 b"requirement def",
555 "requirement definition",
556 "Use `requirement def R;` or `requirement def R { ... }`.",
557 ),
558 (
559 b"state def",
560 "state definition",
561 "Use `state def Ready;` or `state def Ready { ... }`.",
562 ),
563 (
564 b"view",
565 "view declaration",
566 "Use `view structure: GeneralView;` or `view structure: GeneralView { ... }`.",
567 ),
568 (
569 b"rendering def",
570 "rendering definition",
571 "Use `rendering def Diagram;` or `rendering def Diagram { ... }`.",
572 ),
573 ];
574
575 for (prefix, label, suggestion) in cases {
576 if fragment.starts_with(prefix) {
577 return Some((
578 "missing_body_or_semicolon",
579 format!("expected ';' or '{{' after {label} header"),
580 "';' or '{' after declaration header".to_string(),
581 suggestion.to_string(),
582 ));
583 }
584 }
585 None
586}
587
588fn invalid_typing_operator_diagnostic(
589 fragment: &[u8],
590) -> Option<(&'static str, String, String, String)> {
591 let fragment = trim_ascii_start(fragment);
592 let cases: &[(&[u8], &str, &str)] = &[
593 (
594 b"part def",
595 "part definition specialization",
596 "Use `part def Vehicle :> BaseVehicle;` when specializing a definition.",
597 ),
598 (
599 b"port def",
600 "port definition specialization",
601 "Use `port def PowerPort :> BasePort;` when specializing a definition.",
602 ),
603 ];
604
605 for (prefix, label, suggestion) in cases {
606 if fragment.starts_with(prefix) && fragment.windows(3).any(|w| w == b": ") {
607 return Some((
608 "invalid_typing_operator",
609 format!("invalid typing operator in {label}: use ':>' instead of ':'"),
610 "':>' specialization operator".to_string(),
611 suggestion.to_string(),
612 ));
613 }
614 }
615
616 if fragment.starts_with(b"part def")
617 && fragment.contains(&b':')
618 && !fragment.windows(2).any(|w| w == b":>")
619 {
620 return Some((
621 "invalid_typing_operator",
622 "invalid typing operator in part definition: use ':>' instead of ':'".to_string(),
623 "':>' specialization operator".to_string(),
624 "Use `part def Vehicle :> BaseVehicle;` when specializing a definition.".to_string(),
625 ));
626 }
627
628 None
629}
630
631fn missing_expression_after_operator_diagnostic(
632 fragment: &[u8],
633) -> Option<(&'static str, String, String, String)> {
634 let fragment = trim_ascii_start(fragment);
635 let cases: &[(&[u8], &str, &str)] = &[
636 (
637 b"bind",
638 "binding expression after '='",
639 "Use `bind x = y;`.",
640 ),
641 (
642 b"assign",
643 "assignment expression after ':='",
644 "Use `assign x := y;`.",
645 ),
646 (
647 b"first",
648 "target after 'then'",
649 "Use `first start then finish;`.",
650 ),
651 (
652 b"flow",
653 "target after 'to'",
654 "Use `flow source to target;`.",
655 ),
656 (
657 b"satisfy",
658 "target after 'by'",
659 "Use `satisfy Req by implementation;`.",
660 ),
661 ];
662
663 for (keyword, expected, suggestion) in cases {
664 if !lex::starts_with_keyword(fragment, keyword) {
665 continue;
666 }
667 let text = String::from_utf8_lossy(fragment);
668 if text.contains("= ;") || text.trim_end().ends_with('=') {
669 return Some((
670 "missing_expression_after_operator",
671 "expected expression after '='".to_string(),
672 expected.to_string(),
673 suggestion.to_string(),
674 ));
675 }
676 if text.contains(":= ;") || text.trim_end().ends_with(":=") {
677 return Some((
678 "missing_expression_after_operator",
679 "expected expression after ':='".to_string(),
680 expected.to_string(),
681 suggestion.to_string(),
682 ));
683 }
684 if text.contains(" then ;") || text.trim_end().ends_with(" then") {
685 return Some((
686 "missing_expression_after_operator",
687 "expected target after 'then'".to_string(),
688 expected.to_string(),
689 suggestion.to_string(),
690 ));
691 }
692 if text.contains(" to ;") || text.trim_end().ends_with(" to") {
693 return Some((
694 "missing_expression_after_operator",
695 "expected target after 'to'".to_string(),
696 expected.to_string(),
697 suggestion.to_string(),
698 ));
699 }
700 if text.contains(" by ;") || text.trim_end().ends_with(" by") {
701 return Some((
702 "missing_expression_after_operator",
703 "expected target after 'by'".to_string(),
704 expected.to_string(),
705 suggestion.to_string(),
706 ));
707 }
708 }
709 None
710}
711
712fn invalid_unit_reference_diagnostic(
713 fragment: &[u8],
714) -> Option<(&'static str, String, String, String)> {
715 let fragment = trim_ascii_start(fragment);
716 let text = String::from_utf8_lossy(fragment);
717 if !(text.contains('[') && text.contains(']')) {
718 return None;
719 }
720
721 if text.contains("[]") || text.contains("[ ]") {
722 return Some((
723 "invalid_unit_reference",
724 "expected unit name inside '[ ]'".to_string(),
725 "unit name inside '[ ]'".to_string(),
726 "Use a concrete unit such as `1750 [kg]`.".to_string(),
727 ));
728 }
729
730 if text.contains("[;")
731 || text.contains("[ ;")
732 || text.contains("[)")
733 || text.contains("[ ]")
734 || text.contains("[,")
735 {
736 return Some((
737 "invalid_unit_reference",
738 "invalid unit expression inside '[ ]'".to_string(),
739 "unit name inside '[ ]'".to_string(),
740 "Use a unit symbol or qualified unit name (example: `[kg]` or `[SI::kg]`).".to_string(),
741 ));
742 }
743
744 None
745}
746
747fn unexpected_keyword_in_scope_diagnostic(
748 fragment: &[u8],
749 starters: &[&[u8]],
750 scope_label: &str,
751) -> Option<(&'static str, String, String, String)> {
752 let fragment = trim_ascii_start(fragment);
753 if fragment.is_empty() || fragment.starts_with(b"#") || fragment.starts_with(b"@") {
754 return None;
755 }
756 let keyword_end = fragment
757 .iter()
758 .position(|b| !b.is_ascii_alphanumeric() && *b != b'_')
759 .unwrap_or(fragment.len());
760 if keyword_end == 0 {
761 return None;
762 }
763 let keyword = &fragment[..keyword_end];
764 if lex::starts_with_any_keyword(keyword, starters) {
765 return None;
766 }
767 let keyword_text = String::from_utf8_lossy(keyword);
768 Some((
769 "unexpected_keyword_in_scope",
770 format!("unexpected keyword `{keyword_text}` in {scope_label}"),
771 format!("valid {scope_label} element"),
772 format!("Replace `{keyword_text}` with a valid {scope_label} member or remove it."),
773 ))
774}
775
776fn invalid_bare_identifier_in_body_diagnostic(
777 fragment: &[u8],
778 scope_label: &str,
779) -> Option<(&'static str, String, String, String)> {
780 let is_action = scope_label.contains("action body");
781 let is_state = scope_label.contains("state body");
782 if !is_action && !is_state {
783 return None;
784 }
785
786 let fragment = trim_ascii_start(fragment);
787 let ident_end = fragment
788 .iter()
789 .position(|b| !b.is_ascii_alphanumeric() && *b != b'_')
790 .unwrap_or(fragment.len());
791 if ident_end == 0 || !fragment[0].is_ascii_alphabetic() {
792 return None;
793 }
794
795 let ident = &fragment[..ident_end];
796 let rest = trim_ascii_start(&fragment[ident_end..]);
797 if !(rest.starts_with(b";")
798 || rest.starts_with(b"}")
799 || rest.starts_with(b"\n")
800 || rest.starts_with(b"\r"))
801 {
802 return None;
803 }
804
805 let ident_text = String::from_utf8_lossy(ident);
806 if is_action {
807 Some((
808 "invalid_bare_identifier_in_action_body",
809 format!("bare identifier `{ident_text}` is not a valid action body member"),
810 "action body member such as `perform`, `bind`, `in`, or `out`".to_string(),
811 format!(
812 "Use an explicit action-body form, for example `perform {ident_text};`, `bind ... = ...;`, or an `in`/`out` parameter declaration."
813 ),
814 ))
815 } else {
816 Some((
817 "invalid_bare_identifier_in_state_body",
818 format!("bare identifier `{ident_text}` is not a valid state body member"),
819 "state body member such as `entry`, `transition`, `then`, `state`, or `ref`"
820 .to_string(),
821 format!(
822 "Use an explicit state-body form, for example `then {ident_text};`, `transition ...;`, or a nested `state` member."
823 ),
824 ))
825 }
826}
827
828fn unexpected_closing_brace_parse_error(input: Input<'_>) -> ParseError {
829 ParseError::new("unexpected closing '}'")
830 .with_location(
831 input.location_offset(),
832 input.location_line(),
833 input.get_column(),
834 )
835 .with_length(1)
836 .with_code("unexpected_closing_brace")
837 .with_expected("valid declaration or end of current body")
838 .with_found("}")
839 .with_suggestion("Remove this '}' or add the missing opening '{' before it.")
840 .with_severity(DiagnosticSeverity::Error)
841 .with_category(DiagnosticCategory::ParseError)
842}
843
844fn missing_closing_brace_error(bytes: &[u8], input: Input<'_>) -> Option<ParseError> {
845 if !input.fragment().is_empty() {
846 return None;
847 }
848 let consumed = &bytes[..input.location_offset().min(bytes.len())];
849 let opens = consumed.iter().filter(|&&b| b == b'{').count();
850 let closes = consumed.iter().filter(|&&b| b == b'}').count();
851 if opens <= closes {
852 return None;
853 }
854 Some(missing_closing_brace_error_at_eof(consumed))
855}
856
857fn missing_closing_brace_error_at_eof(bytes: &[u8]) -> ParseError {
858 let (line, column) = eof_line_column(bytes);
859 ParseError::new("missing closing '}'")
860 .with_location(bytes.len(), line, column)
861 .with_length(1)
862 .with_code("missing_closing_brace")
863 .with_expected("'}'")
864 .with_suggestion("Add '}' to close the open body.")
865 .with_category(DiagnosticCategory::ParseError)
866}
867
868fn category_from_code(code: &str) -> DiagnosticCategory {
869 if code == "unsupported_annotation_syntax" {
870 DiagnosticCategory::UnsupportedGrammarForm
871 } else if code == "unresolved_symbol" {
872 DiagnosticCategory::UnresolvedSymbol
873 } else {
874 DiagnosticCategory::ParseError
875 }
876}
877
878fn has_unclosed_brace(bytes: &[u8]) -> bool {
879 let opens = bytes.iter().filter(|&&b| b == b'{').count();
880 let closes = bytes.iter().filter(|&&b| b == b'}').count();
881 opens > closes
882}
883
884fn eof_line_column(bytes: &[u8]) -> (u32, usize) {
885 let mut line = 1u32;
886 let mut column = 1usize;
887 for &b in bytes {
888 if b == b'\n' {
889 line += 1;
890 column = 1;
891 } else {
892 column += 1;
893 }
894 }
895 (line, column)
896}
897
898pub(crate) fn build_recovery_error_node(
899 input: Input<'_>,
900 starters: &[&[u8]],
901 scope_label: &str,
902 generic_code: &str,
903) -> ParseErrorNode {
904 build_recovery_error_node_from_span(input, input, starters, scope_label, generic_code)
905}
906
907enum RecoveryClassification {
908 MissingMemberName {
909 code: String,
910 message: String,
911 expected: String,
912 suggestion: String,
913 },
914 MissingTypeReference {
915 code: String,
916 message: String,
917 expected: String,
918 suggestion: String,
919 },
920 InvalidQualifiedNameSeparator {
921 code: String,
922 message: String,
923 expected: String,
924 suggestion: String,
925 },
926 MissingBodyOrSemicolon {
927 code: String,
928 message: String,
929 expected: String,
930 suggestion: String,
931 },
932 MissingExpressionAfterOperator {
933 code: String,
934 message: String,
935 expected: String,
936 suggestion: String,
937 },
938 InvalidUnitReference {
939 code: String,
940 message: String,
941 expected: String,
942 suggestion: String,
943 },
944 InvalidTypingOperator {
945 code: String,
946 message: String,
947 expected: String,
948 suggestion: String,
949 },
950 InvalidBareIdentifierInBody {
951 code: String,
952 message: String,
953 expected: String,
954 suggestion: String,
955 },
956 UnexpectedKeywordInScope {
957 code: String,
958 message: String,
959 expected: String,
960 suggestion: String,
961 },
962 MissingSemicolon,
963 UnsupportedAnnotation,
964 Unexpected,
965}
966
967fn trim_ascii_end(mut fragment: &[u8]) -> &[u8] {
968 while let Some(last) = fragment.last() {
969 if last.is_ascii_whitespace() {
970 fragment = &fragment[..fragment.len() - 1];
971 } else {
972 break;
973 }
974 }
975 fragment
976}
977
978fn classify_recovery(
979 input: Input<'_>,
980 recovery_end: Input<'_>,
981 starters: &[&[u8]],
982 scope_label: &str,
983) -> RecoveryClassification {
984 let trimmed = trim_ascii_start(input.fragment());
985
986 if let Some((code, message, expected, suggestion)) = missing_name_diagnostic(trimmed) {
987 return RecoveryClassification::MissingMemberName {
988 code: code.to_string(),
989 message,
990 expected,
991 suggestion,
992 };
993 }
994
995 if let Some((code, message, expected, suggestion)) = missing_type_diagnostic(trimmed) {
996 return RecoveryClassification::MissingTypeReference {
997 code: code.to_string(),
998 message,
999 expected,
1000 suggestion,
1001 };
1002 }
1003
1004 if let Some((code, message, expected, suggestion)) =
1005 invalid_expose_separator_diagnostic(trimmed)
1006 {
1007 return RecoveryClassification::InvalidQualifiedNameSeparator {
1008 code: code.to_string(),
1009 message,
1010 expected,
1011 suggestion,
1012 };
1013 }
1014
1015 if let Some((code, message, expected, suggestion)) = invalid_typing_operator_diagnostic(trimmed)
1016 {
1017 return RecoveryClassification::InvalidTypingOperator {
1018 code: code.to_string(),
1019 message,
1020 expected,
1021 suggestion,
1022 };
1023 }
1024
1025 if let Some((code, message, expected, suggestion)) =
1026 missing_expression_after_operator_diagnostic(trimmed)
1027 {
1028 return RecoveryClassification::MissingExpressionAfterOperator {
1029 code: code.to_string(),
1030 message,
1031 expected,
1032 suggestion,
1033 };
1034 }
1035
1036 if let Some((code, message, expected, suggestion)) = invalid_unit_reference_diagnostic(trimmed)
1037 {
1038 return RecoveryClassification::InvalidUnitReference {
1039 code: code.to_string(),
1040 message,
1041 expected,
1042 suggestion,
1043 };
1044 }
1045
1046 if let Some((code, message, expected, suggestion)) =
1047 missing_semicolon_or_body_diagnostic(trimmed)
1048 {
1049 return RecoveryClassification::MissingBodyOrSemicolon {
1050 code: code.to_string(),
1051 message,
1052 expected,
1053 suggestion,
1054 };
1055 }
1056
1057 let consumed_len = recovery_end
1058 .location_offset()
1059 .saturating_sub(input.location_offset())
1060 .min(input.fragment().len());
1061 let raw_consumed = &input.fragment()[..consumed_len];
1062 let consumed = trim_ascii_end(raw_consumed);
1063 let recovered_to_boundary = recovery_end.location_offset() > input.location_offset() && {
1064 let (next, _) = lex::ws_and_comments(recovery_end).unwrap_or((recovery_end, ()));
1065 next.fragment().is_empty()
1066 || next.fragment().starts_with(b"}")
1067 || lex::starts_with_any_keyword(next.fragment(), starters)
1068 };
1069
1070 let consumed_has_newline = raw_consumed.contains(&b'\n') || raw_consumed.contains(&b'\r');
1071 let first_line_end = consumed
1072 .iter()
1073 .position(|b| matches!(*b, b'\n' | b'\r'))
1074 .unwrap_or(consumed.len());
1075 let first_line = trim_ascii_end(&consumed[..first_line_end]);
1076 let consumed_has_delimiters = consumed
1077 .iter()
1078 .any(|b| matches!(*b, b'{' | b'}' | b'(' | b')' | b'[' | b']'));
1079 let consumed_ends_incomplete = first_line.last().is_some_and(|b| {
1080 matches!(
1081 *b,
1082 b':' | b'=' | b',' | b'.' | b'+' | b'-' | b'*' | b'/' | b'>' | b'<' | b'|'
1083 )
1084 });
1085 let first_line_has_semicolon = first_line.contains(&b';');
1086 if recovered_to_boundary
1087 && lex::starts_with_any_keyword(trimmed, starters)
1088 && (consumed_has_newline || recovery_end.fragment().starts_with(b"}"))
1089 && !consumed.is_empty()
1090 && !consumed_has_delimiters
1091 && !consumed_ends_incomplete
1092 && !first_line_has_semicolon
1093 {
1094 return RecoveryClassification::MissingSemicolon;
1095 }
1096
1097 if lex::starts_with_keyword(trimmed, b"#") || lex::starts_with_keyword(trimmed, b"@") {
1098 return RecoveryClassification::UnsupportedAnnotation;
1099 }
1100
1101 if let Some((code, message, expected, suggestion)) =
1102 invalid_bare_identifier_in_body_diagnostic(trimmed, scope_label)
1103 {
1104 return RecoveryClassification::InvalidBareIdentifierInBody {
1105 code: code.to_string(),
1106 message,
1107 expected,
1108 suggestion,
1109 };
1110 }
1111
1112 if let Some((code, message, expected, suggestion)) =
1113 unexpected_keyword_in_scope_diagnostic(trimmed, starters, scope_label)
1114 {
1115 return RecoveryClassification::UnexpectedKeywordInScope {
1116 code: code.to_string(),
1117 message,
1118 expected,
1119 suggestion,
1120 };
1121 }
1122
1123 RecoveryClassification::Unexpected
1124}
1125
1126pub(crate) fn build_recovery_error_node_from_span(
1127 input: Input<'_>,
1128 recovery_end: Input<'_>,
1129 starters: &[&[u8]],
1130 scope_label: &str,
1131 generic_code: &str,
1132) -> ParseErrorNode {
1133 match classify_recovery(input, recovery_end, starters, scope_label) {
1134 RecoveryClassification::MissingMemberName {
1135 code,
1136 message,
1137 expected,
1138 suggestion,
1139 }
1140 | RecoveryClassification::MissingTypeReference {
1141 code,
1142 message,
1143 expected,
1144 suggestion,
1145 }
1146 | RecoveryClassification::InvalidQualifiedNameSeparator {
1147 code,
1148 message,
1149 expected,
1150 suggestion,
1151 }
1152 | RecoveryClassification::MissingBodyOrSemicolon {
1153 code,
1154 message,
1155 expected,
1156 suggestion,
1157 }
1158 | RecoveryClassification::MissingExpressionAfterOperator {
1159 code,
1160 message,
1161 expected,
1162 suggestion,
1163 }
1164 | RecoveryClassification::InvalidUnitReference {
1165 code,
1166 message,
1167 expected,
1168 suggestion,
1169 }
1170 | RecoveryClassification::InvalidTypingOperator {
1171 code,
1172 message,
1173 expected,
1174 suggestion,
1175 }
1176 | RecoveryClassification::InvalidBareIdentifierInBody {
1177 code,
1178 message,
1179 expected,
1180 suggestion,
1181 }
1182 | RecoveryClassification::UnexpectedKeywordInScope {
1183 code,
1184 message,
1185 expected,
1186 suggestion,
1187 } => ParseErrorNode {
1188 message,
1189 code,
1190 expected: Some(expected),
1191 found: recovery_found_snippet_from_span(input, recovery_end),
1192 suggestion: Some(suggestion),
1193 category: Some(DiagnosticCategory::ParseError),
1194 },
1195 RecoveryClassification::MissingSemicolon => ParseErrorNode {
1196 message: "missing semicolon before next declaration".to_string(),
1197 code: "missing_semicolon".to_string(),
1198 expected: Some("';'".to_string()),
1199 found: recovery_found_snippet_from_span(input, recovery_end),
1200 suggestion: Some("Insert ';' before this declaration.".to_string()),
1201 category: Some(DiagnosticCategory::ParseError),
1202 },
1203 RecoveryClassification::UnsupportedAnnotation => ParseErrorNode {
1204 message: format!("unsupported annotation syntax in {scope_label}"),
1205 code: "unsupported_annotation_syntax".to_string(),
1206 expected: Some(format!("valid {scope_label} element")),
1207 found: recovery_found_snippet_from_span(input, recovery_end),
1208 suggestion: Some(
1209 "Remove this annotation or extend the parser to support annotated declarations."
1210 .to_string(),
1211 ),
1212 category: Some(DiagnosticCategory::UnsupportedGrammarForm),
1213 },
1214 RecoveryClassification::Unexpected => ParseErrorNode {
1215 message: format!("unexpected token in {scope_label}"),
1216 code: generic_code.to_string(),
1217 expected: Some(format!("valid {scope_label} element")),
1218 found: recovery_found_snippet_from_span(input, recovery_end),
1219 suggestion: Some(format!("Fix this {scope_label} member and re-run parsing.")),
1220 category: Some(DiagnosticCategory::ParseError),
1221 },
1222 }
1223}
1224
1225fn parse_error_from_recovery_node(span: &crate::ast::Span, node: &ParseErrorNode) -> ParseError {
1226 let mut err = ParseError::new(node.message.clone())
1227 .with_location(span.offset, span.line, span.column)
1228 .with_length(span.len.max(1))
1229 .with_code(node.code.clone())
1230 .with_category(
1231 node.category
1232 .unwrap_or_else(|| category_from_code(node.code.as_str())),
1233 );
1234 let severity = if node.code == "unsupported_annotation_syntax" {
1235 DiagnosticSeverity::Warning
1236 } else {
1237 DiagnosticSeverity::Error
1238 };
1239 err = err.with_severity(severity);
1240 if let Some(expected) = &node.expected {
1241 err = err.with_expected(expected.clone());
1242 }
1243 if let Some(found) = &node.found {
1244 err = err.with_found(found.clone());
1245 }
1246 if let Some(suggestion) = &node.suggestion {
1247 err = err.with_suggestion(suggestion.clone());
1248 }
1249 err
1250}
1251
1252fn diagnostic_specificity(err: &ParseError) -> u8 {
1253 match err.code.as_deref() {
1254 Some("missing_member_name")
1255 | Some("missing_type_reference")
1256 | Some("invalid_qualified_name_separator")
1257 | Some("invalid_typing_operator")
1258 | Some("missing_expression_after_operator")
1259 | Some("invalid_unit_reference")
1260 | Some("missing_body_or_semicolon")
1261 | Some("missing_semicolon")
1262 | Some("unexpected_closing_brace")
1263 | Some("missing_closing_brace")
1264 | Some("unsupported_annotation_syntax")
1265 | Some("invalid_bare_identifier_in_action_body")
1266 | Some("invalid_bare_identifier_in_state_body")
1267 | Some("recovery_cascade_suppressed")
1268 | Some("unexpected_keyword_in_scope") => 5,
1269 Some("illegal_top_level_definition") => 4,
1270 Some(code) if code.starts_with("recovered_") => 2,
1271 Some("expected_end_of_input") | Some("expected_keyword") => 1,
1272 _ => 3,
1273 }
1274}
1275
1276fn dedup_errors(mut errors: Vec<ParseError>) -> Vec<ParseError> {
1277 errors.sort_by_key(|e| {
1278 (
1279 e.offset.unwrap_or(usize::MAX),
1280 e.line.unwrap_or(u32::MAX),
1281 e.column.unwrap_or(usize::MAX),
1282 std::cmp::Reverse(diagnostic_specificity(e)),
1283 )
1284 });
1285
1286 let mut deduped = Vec::new();
1287 for err in errors {
1288 let duplicate = deduped.iter().any(|existing: &ParseError| {
1289 let same_start = existing.offset == err.offset
1290 && existing.line == err.line
1291 && existing.column == err.column;
1292 let same_found = existing.found == err.found;
1293 let existing_specificity = diagnostic_specificity(existing);
1294 let err_specificity = diagnostic_specificity(&err);
1295 same_start
1296 && (same_found || existing.code == err.code)
1297 && existing_specificity >= err_specificity
1298 });
1299 if !duplicate {
1300 deduped.push(err);
1301 }
1302 }
1303
1304 deduped.sort_by_key(|e| (e.offset.unwrap_or(usize::MAX), e.line.unwrap_or(u32::MAX)));
1305 deduped
1306}
1307
1308fn is_cascade_candidate(err: &ParseError) -> bool {
1309 matches!(err.code.as_deref(), Some("missing_semicolon"))
1310 || err
1311 .code
1312 .as_deref()
1313 .is_some_and(|code| code.starts_with("recovered_"))
1314}
1315
1316fn cascade_family(err: &ParseError) -> Option<&str> {
1317 if matches!(err.code.as_deref(), Some("missing_semicolon")) {
1318 Some("missing_semicolon")
1319 } else if err
1320 .code
1321 .as_deref()
1322 .is_some_and(|code| code.starts_with("recovered_"))
1323 {
1324 Some("recovered")
1325 } else {
1326 None
1327 }
1328}
1329
1330fn make_cascade_summary(run: &[ParseError]) -> Option<ParseError> {
1331 let summary_anchor = run.first()?;
1332 let suppressed = run.len().saturating_sub(3);
1333 let family = cascade_family(summary_anchor).unwrap_or("recovery");
1334 let mut err = ParseError::new(format!(
1335 "suppressed {suppressed} cascading {family} diagnostic{} after earlier recovery errors",
1336 if suppressed == 1 { "" } else { "s" }
1337 ))
1338 .with_location(
1339 summary_anchor.offset?,
1340 summary_anchor.line?,
1341 summary_anchor.column?,
1342 )
1343 .with_length(summary_anchor.length.unwrap_or(1).max(1))
1344 .with_code("recovery_cascade_suppressed")
1345 .with_expected("fix the first syntax error in this body")
1346 .with_suggestion(
1347 "Fix the earliest diagnostic in this body first; later syntax errors may be cascades.",
1348 )
1349 .with_severity(DiagnosticSeverity::Warning)
1350 .with_category(DiagnosticCategory::ParseError);
1351 if let Some(found) = &summary_anchor.found {
1352 err = err.with_found(found.clone());
1353 }
1354 Some(err)
1355}
1356
1357fn suppress_diagnostic_cascades(errors: Vec<ParseError>) -> Vec<ParseError> {
1358 const MAX_UNSUMMARIZED_CASCADE: usize = 3;
1359
1360 let mut output = Vec::new();
1361 let mut run: Vec<ParseError> = Vec::new();
1362
1363 let flush_run = |run: &mut Vec<ParseError>, output: &mut Vec<ParseError>| {
1364 if run.len() <= MAX_UNSUMMARIZED_CASCADE {
1365 output.append(run);
1366 } else {
1367 output.extend(run.drain(..MAX_UNSUMMARIZED_CASCADE));
1368 if let Some(summary) = make_cascade_summary(run) {
1369 output.push(summary);
1370 }
1371 run.clear();
1372 }
1373 };
1374
1375 for err in errors {
1376 let continues_run = run.last().is_some_and(|previous| {
1377 is_cascade_candidate(&err)
1378 && cascade_family(previous) == cascade_family(&err)
1379 && previous.line.zip(err.line).is_some_and(|(a, b)| b <= a + 1)
1380 });
1381
1382 if is_cascade_candidate(&err) && (run.is_empty() || continues_run) {
1383 run.push(err);
1384 } else {
1385 flush_run(&mut run, &mut output);
1386 if is_cascade_candidate(&err) {
1387 run.push(err);
1388 } else {
1389 output.push(err);
1390 }
1391 }
1392 }
1393 flush_run(&mut run, &mut output);
1394 output.sort_by_key(|e| (e.offset.unwrap_or(usize::MAX), e.line.unwrap_or(u32::MAX)));
1395 output
1396}
1397
1398fn root_body_recovery_error(input: Input<'_>, scope: &str) -> ParseError {
1399 let (found, len) = fragment_to_found_snippet(input.fragment());
1400 let mut err = ParseError::new(format!(
1401 "could not parse {scope} body; skipped to next root element"
1402 ))
1403 .with_location(
1404 input.location_offset(),
1405 input.location_line(),
1406 input.get_column(),
1407 )
1408 .with_length(len.max(1))
1409 .with_code("recovered_root_body")
1410 .with_expected(format!("valid {scope} body"))
1411 .with_suggestion(
1412 "Fix the first syntax error in this body; later root-level diagnostics may be cascades.",
1413 )
1414 .with_severity(DiagnosticSeverity::Error)
1415 .with_category(DiagnosticCategory::ParseError);
1416 if !found.is_empty() {
1417 err = err.with_found(found);
1418 }
1419 err
1420}
1421
1422fn root_body_scope(fragment: &[u8]) -> Option<&'static str> {
1423 let fragment = trim_ascii_start(fragment);
1424 if lex::starts_with_keyword(fragment, b"package")
1425 || lex::starts_with_keyword(fragment, b"library")
1426 || lex::starts_with_keyword(fragment, b"standard")
1427 {
1428 Some("package")
1429 } else if lex::starts_with_keyword(fragment, b"namespace") {
1430 Some("namespace")
1431 } else {
1432 None
1433 }
1434}
1435
1436fn collect_requirement_body_errors(body: &RequirementDefBody, errors: &mut Vec<ParseError>) {
1437 if let RequirementDefBody::Brace { elements } = body {
1438 for element in elements {
1439 match &element.value {
1440 RequirementDefBodyElement::Error(n) => {
1441 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1442 }
1443 RequirementDefBodyElement::Frame(n) => {
1444 collect_requirement_body_errors(&n.value.body, errors)
1445 }
1446 _ => {}
1447 }
1448 }
1449 }
1450}
1451
1452fn collect_action_def_body_errors(body: &ActionDefBody, errors: &mut Vec<ParseError>) {
1453 if let ActionDefBody::Brace { elements } = body {
1454 for element in elements {
1455 if let ActionDefBodyElement::Error(n) = &element.value {
1456 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1457 }
1458 }
1459 }
1460}
1461
1462fn collect_action_usage_body_errors(body: &ActionUsageBody, errors: &mut Vec<ParseError>) {
1463 if let ActionUsageBody::Brace { elements } = body {
1464 for element in elements {
1465 match &element.value {
1466 ActionUsageBodyElement::Error(n) => {
1467 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1468 }
1469 ActionUsageBodyElement::ActionUsage(n) => {
1470 collect_action_usage_body_errors(&n.value.body, errors)
1471 }
1472 _ => {}
1473 }
1474 }
1475 }
1476}
1477
1478fn collect_state_body_errors(body: &StateDefBody, errors: &mut Vec<ParseError>) {
1479 if let StateDefBody::Brace { elements } = body {
1480 for element in elements {
1481 match &element.value {
1482 StateDefBodyElement::Error(n) => {
1483 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1484 }
1485 StateDefBodyElement::Entry(n) => collect_state_body_errors(&n.value.body, errors),
1486 StateDefBodyElement::RequirementUsage(n) => {
1487 collect_requirement_body_errors(&n.value.body, errors)
1488 }
1489 StateDefBodyElement::StateUsage(n) => {
1490 collect_state_body_errors(&n.value.body, errors)
1491 }
1492 _ => {}
1493 }
1494 }
1495 }
1496}
1497
1498fn collect_use_case_body_errors(body: &UseCaseDefBody, errors: &mut Vec<ParseError>) {
1499 if let UseCaseDefBody::Brace { elements } = body {
1500 for element in elements {
1501 if let UseCaseDefBodyElement::Error(n) = &element.value {
1502 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1503 }
1504 }
1505 }
1506}
1507
1508fn collect_constraint_body_errors(body: &ConstraintDefBody, errors: &mut Vec<ParseError>) {
1509 if let ConstraintDefBody::Brace { elements } = body {
1510 for element in elements {
1511 if let ConstraintDefBodyElement::Error(n) = &element.value {
1512 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1513 }
1514 }
1515 }
1516}
1517
1518fn collect_calc_body_errors(body: &CalcDefBody, errors: &mut Vec<ParseError>) {
1519 if let CalcDefBody::Brace { elements } = body {
1520 for element in elements {
1521 if let CalcDefBodyElement::Error(n) = &element.value {
1522 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1523 }
1524 }
1525 }
1526}
1527
1528fn collect_view_def_body_errors(body: &ViewDefBody, errors: &mut Vec<ParseError>) {
1529 if let ViewDefBody::Brace { elements } = body {
1530 for element in elements {
1531 if let ViewDefBodyElement::Error(n) = &element.value {
1532 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1533 }
1534 }
1535 }
1536}
1537
1538fn collect_view_body_errors(body: &ViewBody, errors: &mut Vec<ParseError>) {
1539 if let ViewBody::Brace { elements } = body {
1540 for element in elements {
1541 if let ViewBodyElement::Error(n) = &element.value {
1542 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1543 }
1544 }
1545 }
1546}
1547
1548fn collect_part_def_body_errors(body: &PartDefBody, errors: &mut Vec<ParseError>) {
1549 if let PartDefBody::Brace { elements } = body {
1550 for element in elements {
1551 match &element.value {
1552 PartDefBodyElement::Error(n) => {
1553 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1554 }
1555 PartDefBodyElement::PartUsage(n) => {
1556 collect_part_usage_body_errors(&n.value.body, errors)
1557 }
1558 PartDefBodyElement::Perform(n) => {
1559 collect_perform_body_errors(&n.value.body, errors)
1560 }
1561 _ => {}
1562 }
1563 }
1564 }
1565}
1566
1567fn collect_perform_body_errors(body: &crate::ast::PerformBody, _errors: &mut Vec<ParseError>) {
1568 match body {
1569 crate::ast::PerformBody::Semicolon => {}
1570 crate::ast::PerformBody::Brace { .. } => {}
1571 }
1572}
1573
1574fn collect_part_usage_body_errors(body: &PartUsageBody, errors: &mut Vec<ParseError>) {
1575 if let PartUsageBody::Brace { elements } = body {
1576 for element in elements {
1577 match &element.value {
1578 PartUsageBodyElement::Error(n) => {
1579 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1580 }
1581 PartUsageBodyElement::PartUsage(n) => {
1582 collect_part_usage_body_errors(&n.value.body, errors)
1583 }
1584 PartUsageBodyElement::Perform(n) => {
1585 collect_perform_body_errors(&n.value.body, errors)
1586 }
1587 PartUsageBodyElement::StateUsage(n) => {
1588 collect_state_body_errors(&n.value.body, errors)
1589 }
1590 _ => {}
1591 }
1592 }
1593 }
1594}
1595
1596fn collect_package_body_errors(body: &PackageBody, errors: &mut Vec<ParseError>) {
1597 if let PackageBody::Brace { elements } = body {
1598 for element in elements {
1599 match &element.value {
1600 PackageBodyElement::Error(n) => {
1601 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1602 }
1603 PackageBodyElement::Package(n) => {
1604 collect_package_body_errors(&n.value.body, errors)
1605 }
1606 PackageBodyElement::LibraryPackage(n) => {
1607 collect_package_body_errors(&n.value.body, errors)
1608 }
1609 PackageBodyElement::PartDef(n) => {
1610 collect_part_def_body_errors(&n.value.body, errors)
1611 }
1612 PackageBodyElement::PartUsage(n) => {
1613 collect_part_usage_body_errors(&n.value.body, errors)
1614 }
1615 PackageBodyElement::ActionDef(n) => {
1616 collect_action_def_body_errors(&n.value.body, errors)
1617 }
1618 PackageBodyElement::ActionUsage(n) => {
1619 collect_action_usage_body_errors(&n.value.body, errors)
1620 }
1621 PackageBodyElement::RequirementDef(n) => {
1622 collect_requirement_body_errors(&n.value.body, errors)
1623 }
1624 PackageBodyElement::RequirementUsage(n) => {
1625 collect_requirement_body_errors(&n.value.body, errors)
1626 }
1627 PackageBodyElement::UseCaseDef(n) => {
1628 collect_use_case_body_errors(&n.value.body, errors)
1629 }
1630 PackageBodyElement::UseCaseUsage(n) => {
1631 collect_use_case_body_errors(&n.value.body, errors)
1632 }
1633 PackageBodyElement::ConcernUsage(n) => {
1634 collect_requirement_body_errors(&n.value.body, errors)
1635 }
1636 PackageBodyElement::StateDef(n) => collect_state_body_errors(&n.value.body, errors),
1637 PackageBodyElement::StateUsage(n) => {
1638 collect_state_body_errors(&n.value.body, errors)
1639 }
1640 PackageBodyElement::ConstraintDef(n) => {
1641 collect_constraint_body_errors(&n.value.body, errors)
1642 }
1643 PackageBodyElement::CalcDef(n) => collect_calc_body_errors(&n.value.body, errors),
1644 PackageBodyElement::ViewDef(n) => {
1645 collect_view_def_body_errors(&n.value.body, errors)
1646 }
1647 PackageBodyElement::ViewUsage(n) => collect_view_body_errors(&n.value.body, errors),
1648 _ => {}
1649 }
1650 }
1651 }
1652}
1653
1654fn collect_recovery_errors(root: &RootNamespace) -> Vec<ParseError> {
1655 let mut errors = Vec::new();
1656 for element in &root.elements {
1657 match &element.value {
1658 crate::ast::RootElement::Package(n) => {
1659 collect_package_body_errors(&n.value.body, &mut errors)
1660 }
1661 crate::ast::RootElement::LibraryPackage(n) => {
1662 collect_package_body_errors(&n.value.body, &mut errors)
1663 }
1664 crate::ast::RootElement::Namespace(n) => {
1665 collect_package_body_errors(&n.value.body, &mut errors)
1666 }
1667 crate::ast::RootElement::Import(_) => {}
1668 }
1669 }
1670 errors
1671}
1672
1673#[allow(clippy::result_large_err)]
1675pub fn parse_root(input: &str) -> Result<RootNamespace, ParseError> {
1676 let bytes = input
1677 .strip_prefix('\u{FEFF}')
1678 .map(str::as_bytes)
1679 .unwrap_or_else(|| input.as_bytes());
1680 let located = LocatedSpan::new(bytes);
1681 match package::root_namespace(located) {
1682 Ok((rest, root)) => {
1683 if !rest.fragment().is_empty() && has_unclosed_brace(bytes) {
1684 return Err(missing_closing_brace_error_at_eof(bytes));
1685 }
1686 if rest.fragment().is_empty() {
1687 log::debug!("parse_root: success, {} top-level elements", root.elements.len());
1688 Ok(root)
1689 } else if trim_ascii_start(rest.fragment()).starts_with(b"}") {
1690 Err(unexpected_closing_brace_parse_error(rest))
1691 } else {
1692 let offset = located.location_offset() + located.fragment().len() - rest.fragment().len();
1693 let unconsumed = rest.fragment();
1694 let first_80 = unconsumed.get(..80.min(unconsumed.len())).unwrap_or(unconsumed);
1695 log::debug!(
1696 "parse_root: expected end of input; parsed {} elements; unconsumed len={}, offset={}, first 80 bytes: {:?}",
1697 root.elements.len(),
1698 unconsumed.len(),
1699 offset,
1700 first_80,
1701 );
1702 log::debug!(
1703 "parse_root: unconsumed as str: {:?}",
1704 String::from_utf8_lossy(first_80),
1705 );
1706 let (found_snippet, found_len) = fragment_to_found_snippet(rest.fragment());
1707 let mut pe = ParseError::new("expected end of input")
1708 .with_location(offset, rest.location_line(), rest.get_column())
1709 .with_length(found_len.max(1))
1710 .with_code("expected_end_of_input")
1711 .with_category(DiagnosticCategory::ParseError);
1712 if !found_snippet.is_empty() {
1713 pe = pe.with_found(found_snippet);
1714 }
1715 if root.elements.is_empty() && is_illegal_top_level_definition(rest.fragment()) {
1716 pe = pe
1717 .with_code("illegal_top_level_definition")
1718 .with_expected("'package', 'namespace', or 'import'")
1719 .with_suggestion(
1720 "Wrap this declaration in `package ... { ... }` or `namespace ... { ... }`.",
1721 );
1722 pe.message = "illegal top-level definition".to_string();
1723 }
1724 Err(pe)
1725 }
1726 }
1727 Err(nom::Err::Error(e)) => Err(missing_closing_brace_error(bytes, e.input).unwrap_or_else(|| {
1728 nom_err_to_parse_error(
1729 &e,
1730 None,
1731 Some("'package', 'namespace', or 'import' at top level; or valid element in package body"),
1732 )
1733 })),
1734 Err(nom::Err::Failure(e)) => Err(missing_closing_brace_error(bytes, e.input).unwrap_or_else(|| {
1735 nom_err_to_parse_error(
1736 &e,
1737 None,
1738 Some("'package', 'namespace', or 'import' at top level; or valid element in package body"),
1739 )
1740 })),
1741 Err(nom::Err::Incomplete(_)) => Err(
1742 ParseError::new("unexpected end of input")
1743 .with_code("unexpected_eof")
1744 .with_category(DiagnosticCategory::ParseError),
1745 ),
1746 }
1747}
1748
1749const MAX_RECOVERY_ERRORS: usize = 100;
1750
1751pub fn parse_with_diagnostics(input: &str) -> ParseResult {
1754 let bytes = input
1755 .strip_prefix('\u{FEFF}')
1756 .map(str::as_bytes)
1757 .unwrap_or_else(|| input.as_bytes());
1758 let located = LocatedSpan::new(bytes);
1759
1760 let mut elements = Vec::new();
1761 let mut errors = Vec::new();
1762
1763 let (mut input, _) = match lex::ws_and_comments(located) {
1764 Ok(x) => x,
1765 Err(_) => {
1766 return ParseResult {
1767 root: RootNamespace { elements: vec![] },
1768 errors: vec![ParseError::new("invalid input")
1769 .with_code("invalid_input")
1770 .with_category(DiagnosticCategory::ParseError)],
1771 };
1772 }
1773 };
1774
1775 while errors.len() < MAX_RECOVERY_ERRORS {
1776 let (rest, _) = lex::ws_and_comments(input).unwrap_or((input, ()));
1778 input = rest;
1779 if input.fragment().is_empty() {
1780 break;
1781 }
1782 match package::root_element(input) {
1783 Ok((rest, elem)) => {
1784 elements.push(elem);
1785 input = rest;
1786 }
1787 Err(nom::Err::Error(e)) | Err(nom::Err::Failure(e)) => {
1788 let (trimmed, _) = lex::ws_and_comments(input).unwrap_or((input, ()));
1789 if trim_ascii_start(trimmed.fragment()).starts_with(b"}") {
1790 errors.push(unexpected_closing_brace_parse_error(trimmed));
1791 let skip_result = lex::skip_to_next_sync_point(trimmed);
1792 match skip_result {
1793 Ok((rest, _)) => input = rest,
1794 Err(_) => break,
1795 }
1796 continue;
1797 }
1798 if errors.is_empty()
1799 && has_unclosed_brace(bytes)
1800 && (lex::starts_with_keyword(trimmed.fragment(), b"package")
1801 || lex::starts_with_keyword(trimmed.fragment(), b"namespace")
1802 || lex::starts_with_keyword(trimmed.fragment(), b"library")
1803 || lex::starts_with_keyword(trimmed.fragment(), b"standard"))
1804 {
1805 errors.push(missing_closing_brace_error_at_eof(bytes));
1806 break;
1807 }
1808 if let Some(scope) = root_body_scope(input.fragment()) {
1809 let (error_input, _) = lex::ws_and_comments(e.input).unwrap_or((e.input, ()));
1810 if error_input.fragment().starts_with(b"{") {
1811 errors.push(root_body_recovery_error(error_input, scope));
1812 match lex::skip_statement_or_block(error_input) {
1813 Ok((rest, _))
1814 if rest.location_offset() > error_input.location_offset() =>
1815 {
1816 input = rest;
1817 continue;
1818 }
1819 _ => {}
1820 }
1821 }
1822 }
1823 let pe = missing_closing_brace_error(bytes, e.input).unwrap_or_else(|| {
1824 nom_err_to_parse_error(&e, None, Some("'package', 'namespace', or 'import'"))
1825 });
1826 errors.push(pe);
1827 let skip_result = lex::skip_to_next_sync_point(e.input);
1828 match skip_result {
1829 Ok((rest, _)) => input = rest,
1830 Err(_) => break,
1831 }
1832 }
1833 Err(nom::Err::Incomplete(_)) => {
1834 errors.push(
1835 ParseError::new("unexpected end of input")
1836 .with_location(
1837 input.location_offset(),
1838 input.location_line(),
1839 input.get_column(),
1840 )
1841 .with_length(1)
1842 .with_code("unexpected_eof")
1843 .with_category(DiagnosticCategory::ParseError),
1844 );
1845 break;
1846 }
1847 }
1848 }
1849
1850 let (input, _) = lex::ws_and_comments(input).unwrap_or((input, ()));
1851
1852 if input.fragment().is_empty()
1853 && has_unclosed_brace(bytes)
1854 && !errors
1855 .iter()
1856 .any(|e| e.code.as_deref() == Some("missing_closing_brace"))
1857 {
1858 errors.push(missing_closing_brace_error_at_eof(bytes));
1859 }
1860
1861 if !input.fragment().is_empty()
1862 && !errors
1863 .iter()
1864 .any(|e| e.code.as_deref() == Some("missing_closing_brace"))
1865 {
1866 if trim_ascii_start(input.fragment()).starts_with(b"}") {
1867 errors.push(unexpected_closing_brace_parse_error(input));
1868 } else {
1869 let (found_snippet, found_len) = fragment_to_found_snippet(input.fragment());
1870 let mut pe = ParseError::new("expected end of input")
1871 .with_location(
1872 input.location_offset(),
1873 input.location_line(),
1874 input.get_column(),
1875 )
1876 .with_length(found_len.max(1))
1877 .with_code("expected_end_of_input")
1878 .with_severity(DiagnosticSeverity::Error)
1879 .with_category(DiagnosticCategory::ParseError);
1880 if !found_snippet.is_empty() {
1881 pe = pe.with_found(found_snippet);
1882 }
1883 errors.push(pe);
1884 }
1885 }
1886
1887 errors.extend(collect_recovery_errors(&RootNamespace {
1888 elements: elements.clone(),
1889 }));
1890 errors = dedup_errors(errors);
1891 errors = suppress_diagnostic_cascades(errors);
1892
1893 ParseResult {
1894 root: RootNamespace { elements },
1895 errors,
1896 }
1897}