1mod action;
11mod alias;
12mod allocation;
13mod attribute;
14mod bnf_surface;
15mod body;
16mod case;
17mod connection;
18mod constraint;
19mod definition_prefix;
20mod dependency;
21mod enumeration;
22mod expr;
23mod flow;
24mod import;
25mod individual;
26mod interface;
27mod item;
28mod lex;
29mod metadata;
30mod metadata_annotation;
31mod occurrence;
32mod package;
33mod part;
34mod port;
35mod requirement;
36mod span;
37mod specialization;
38mod state;
39mod usage;
40mod usecase;
41mod view;
42
43use crate::ast::{
44 ActionDefBody, ActionDefBodyElement, ActionUsageBody, ActionUsageBodyElement, CalcDefBody,
45 CalcDefBodyElement, ConstraintDefBody, ConstraintDefBodyElement, PackageBody,
46 PackageBodyElement, ParseErrorNode, PartDefBody, PartDefBodyElement, PartUsageBody,
47 PartUsageBodyElement, RequirementDefBody, RequirementDefBodyElement, RootNamespace,
48 StateDefBody, StateDefBodyElement, UseCaseDefBody, UseCaseDefBodyElement, ViewBody,
49 ViewBodyElement, ViewDefBody, ViewDefBodyElement,
50};
51use crate::error::{DiagnosticCategory, DiagnosticSeverity, ParseError};
52use nom::error::Error;
53use nom_locate::LocatedSpan;
54pub(crate) use span::{node_from_to, span_from_to, with_span, Input};
55
56#[derive(Debug, Clone)]
58pub struct ParseResult {
59 pub root: RootNamespace,
61 pub errors: Vec<ParseError>,
63}
64
65impl ParseResult {
66 pub fn is_ok(&self) -> bool {
68 self.errors.is_empty()
69 }
70}
71
72const FOUND_SNIPPET_MAX_LEN: usize = 40;
73const ILLEGAL_TOP_LEVEL_STARTERS: &[&[u8]] = &[
74 b"action",
75 b"actor",
76 b"alias",
77 b"allocate",
78 b"allocation",
79 b"attribute",
80 b"bind",
81 b"calc",
82 b"case",
83 b"concern",
84 b"connection",
85 b"constraint",
86 b"dependency",
87 b"enum",
88 b"flow",
89 b"interface",
90 b"item",
91 b"metadata",
92 b"occurrence",
93 b"part",
94 b"perform",
95 b"port",
96 b"ref",
97 b"require",
98 b"requirement",
99 b"satisfy",
100 b"state",
101 b"use",
102 b"verification",
103 b"view",
104 b"viewpoint",
105];
106
107fn fragment_to_found_snippet(fragment: &[u8]) -> (String, usize) {
110 let take = fragment
111 .iter()
112 .position(|&b| b == b'\n' || b == b'\r')
113 .map(|p| p.min(FOUND_SNIPPET_MAX_LEN))
114 .unwrap_or_else(|| fragment.len().min(FOUND_SNIPPET_MAX_LEN));
115 let slice = fragment.get(..take).unwrap_or(fragment);
116 let s = String::from_utf8_lossy(slice)
117 .replace('\n', "\\n")
118 .replace('\r', "\\r");
119 let len = slice.len();
120 (s.trim_end().to_string(), len)
121}
122
123pub(crate) fn recovery_found_snippet(input: Input<'_>) -> Option<String> {
124 let frag = input.fragment();
125 let take = frag
126 .iter()
127 .position(|&b| b == b'\n' || b == b'\r')
128 .unwrap_or(frag.len())
129 .min(60);
130 let snippet = String::from_utf8_lossy(&frag[..take]).trim().to_string();
131 if snippet.is_empty() {
132 None
133 } else {
134 Some(snippet)
135 }
136}
137
138fn recovery_found_snippet_from_span(input: Input<'_>, recovery_end: Input<'_>) -> Option<String> {
139 let consumed_len = recovery_end
140 .location_offset()
141 .saturating_sub(input.location_offset())
142 .min(input.fragment().len());
143 if consumed_len == 0 {
144 return recovery_found_snippet(input);
145 }
146 let frag = &input.fragment()[..consumed_len];
147 let take = frag
148 .iter()
149 .position(|&b| b == b'\n' || b == b'\r')
150 .unwrap_or(frag.len())
151 .min(60);
152 let snippet = String::from_utf8_lossy(&frag[..take]).trim().to_string();
153 if snippet.is_empty() {
154 recovery_found_snippet(input)
155 } else {
156 Some(snippet)
157 }
158}
159
160fn nom_error_kind_to_message(code: &nom::error::ErrorKind) -> &'static str {
162 use nom::error::ErrorKind;
163 match code {
164 ErrorKind::Tag => "expected keyword or token",
165 ErrorKind::Digit => "expected number",
166 ErrorKind::Alpha => "expected identifier",
167 ErrorKind::AlphaNumeric => "expected identifier",
168 ErrorKind::Space => "expected whitespace",
169 ErrorKind::MultiSpace => "expected whitespace",
170 ErrorKind::Eof => "unexpected end of input",
171 ErrorKind::TakeUntil => "expected terminator",
172 ErrorKind::TakeWhile1 => "expected token",
173 ErrorKind::Alt => {
174 "expected package, import, part, port, interface, alias, attribute, or action"
175 }
176 ErrorKind::Many0 | ErrorKind::Many1 => "expected list of elements",
177 _ => "parse error",
178 }
179}
180
181fn nom_error_kind_to_code(code: &nom::error::ErrorKind) -> &'static str {
183 use nom::error::ErrorKind;
184 match code {
185 ErrorKind::Tag => "expected_keyword",
186 ErrorKind::Digit => "expected_number",
187 ErrorKind::Alpha | ErrorKind::AlphaNumeric => "expected_identifier",
188 ErrorKind::Space | ErrorKind::MultiSpace => "expected_whitespace",
189 ErrorKind::Eof => "unexpected_eof",
190 ErrorKind::TakeUntil => "expected_terminator",
191 ErrorKind::TakeWhile1 => "expected_token",
192 ErrorKind::Alt => "expected_alt",
193 ErrorKind::Many0 | ErrorKind::Many1 => "expected_list",
194 _ => "parse_error",
195 }
196}
197
198fn nom_err_to_parse_error(
199 e: &Error<Input<'_>>,
200 length_override: Option<usize>,
201 expected_context: Option<&'static str>,
202) -> ParseError {
203 let offset = e.input.location_offset();
204 let line = e.input.location_line();
205 let column = e.input.get_column();
206 let fragment = e.input.fragment();
207 let (found_snippet, found_len) = fragment_to_found_snippet(fragment);
208 let message = nom_error_kind_to_message(&e.code).to_string();
209 let span_len = length_override.unwrap_or(found_len).max(1);
210 if trim_ascii_start(fragment).starts_with(b"}") {
211 return unexpected_closing_brace_parse_error(e.input);
212 }
213 let mut pe = ParseError::new(message)
214 .with_location(offset, line, column)
215 .with_length(span_len)
216 .with_code(nom_error_kind_to_code(&e.code))
217 .with_severity(DiagnosticSeverity::Error)
218 .with_category(DiagnosticCategory::ParseError);
219 if !found_snippet.is_empty() {
220 pe = pe.with_found(found_snippet);
221 }
222 if let Some(ctx) = expected_context {
223 pe = pe.with_expected(ctx);
224 }
225 let at_root = expected_context.is_some_and(|ctx| {
226 ctx.contains("'package', 'namespace', or 'import'") || ctx.contains("top level")
227 });
228 if at_root && is_illegal_top_level_definition(fragment) {
229 pe.message = "illegal top-level definition".to_string();
230 pe.code = Some("illegal_top_level_definition".to_string());
231 pe.expected = Some("'package', 'namespace', or 'import'".to_string());
232 pe.suggestion = Some(
233 "Wrap this declaration in `package ... { ... }` or `namespace ... { ... }`."
234 .to_string(),
235 );
236 }
237 pe
238}
239
240fn is_illegal_top_level_definition(fragment: &[u8]) -> bool {
241 let trimmed = trim_ascii_start(fragment);
242 !trimmed.starts_with(b"}")
243 && !trimmed.starts_with(b"//")
244 && !trimmed.starts_with(b"/*")
245 && lex::starts_with_any_keyword(trimmed, ILLEGAL_TOP_LEVEL_STARTERS)
246}
247
248fn trim_ascii_start(mut fragment: &[u8]) -> &[u8] {
249 while let Some(first) = fragment.first() {
250 if first.is_ascii_whitespace() {
251 fragment = &fragment[1..];
252 continue;
253 }
254 break;
255 }
256 fragment
257}
258
259fn starts_with_missing_name_after_keyword(
260 fragment: &[u8],
261 keyword: &[u8],
262 trailing_keywords: &[&[u8]],
263) -> bool {
264 let mut fragment = trim_ascii_start(fragment);
265 if !lex::starts_with_keyword(fragment, keyword) {
266 return false;
267 }
268 fragment = &fragment[keyword.len()..];
269 while let Some(first) = fragment.first() {
270 if first.is_ascii_whitespace() {
271 fragment = &fragment[1..];
272 continue;
273 }
274 break;
275 }
276 for trailing in trailing_keywords {
277 if lex::starts_with_keyword(fragment, trailing) {
278 fragment = &fragment[trailing.len()..];
279 while let Some(first) = fragment.first() {
280 if first.is_ascii_whitespace() {
281 fragment = &fragment[1..];
282 continue;
283 }
284 break;
285 }
286 }
287 }
288 fragment.starts_with(b":")
289 && !lex::starts_with_keyword(fragment, b":>>")
290 && !lex::starts_with_keyword(fragment, b":>")
291 && !lex::starts_with_keyword(fragment, b"::")
292}
293
294fn starts_with_missing_type_after_keyword(
295 fragment: &[u8],
296 keyword: &[u8],
297 trailing_keywords: &[&[u8]],
298) -> bool {
299 let mut fragment = trim_ascii_start(fragment);
300 if !lex::starts_with_keyword(fragment, keyword) {
301 return false;
302 }
303 fragment = &fragment[keyword.len()..];
304 while let Some(first) = fragment.first() {
305 if first.is_ascii_whitespace() {
306 fragment = &fragment[1..];
307 continue;
308 }
309 break;
310 }
311 for trailing in trailing_keywords {
312 if lex::starts_with_keyword(fragment, trailing) {
313 fragment = &fragment[trailing.len()..];
314 while let Some(first) = fragment.first() {
315 if first.is_ascii_whitespace() {
316 fragment = &fragment[1..];
317 continue;
318 }
319 break;
320 }
321 }
322 }
323
324 let mut name_len = 0usize;
325 while name_len < fragment.len()
326 && (fragment[name_len].is_ascii_alphanumeric() || fragment[name_len] == b'_')
327 {
328 name_len += 1;
329 }
330 if name_len == 0 {
331 return false;
332 }
333 fragment = &fragment[name_len..];
334 while let Some(first) = fragment.first() {
335 if first.is_ascii_whitespace() {
336 fragment = &fragment[1..];
337 continue;
338 }
339 break;
340 }
341 if fragment.starts_with(b":") {
342 fragment = &fragment[1..];
343 } else if lex::starts_with_keyword(fragment, b"defined") {
344 fragment = &fragment[b"defined".len()..];
345 fragment = trim_ascii_start(fragment);
346 if !lex::starts_with_keyword(fragment, b"by") {
347 return false;
348 }
349 fragment = &fragment[b"by".len()..];
350 } else if lex::starts_with_keyword(fragment, b"typed") {
351 fragment = &fragment[b"typed".len()..];
352 fragment = trim_ascii_start(fragment);
353 if !lex::starts_with_keyword(fragment, b"by") {
354 return false;
355 }
356 fragment = &fragment[b"by".len()..];
357 } else {
358 return false;
359 }
360 while let Some(first) = fragment.first() {
361 if first.is_ascii_whitespace() {
362 fragment = &fragment[1..];
363 continue;
364 }
365 break;
366 }
367
368 fragment.is_empty()
369 || fragment.starts_with(b";")
370 || fragment.starts_with(b"{")
371 || fragment.starts_with(b"}")
372 || lex::starts_with_keyword(fragment, b"then")
373 || lex::starts_with_keyword(fragment, b"if")
374 || lex::starts_with_keyword(fragment, b"do")
375}
376
377fn missing_name_diagnostic(
378 fragment: &[u8],
379 scope_label: &str,
380) -> Option<(&'static str, String, String, String)> {
381 #[allow(clippy::type_complexity)]
382 let cases: &[(&[u8], &[&[u8]], &str, &str)] = &[
383 (
384 b"subject",
385 &[],
386 "subject name",
387 "Use `subject laptop: Laptop;`.",
388 ),
389 (b"actor", &[], "actor name", "Use `actor user: User;`."),
390 (b"state", &[], "state name", "Use `state ready: Mode;`."),
391 (b"part", &[], "part name", "Use `part wheel: Wheel;`."),
392 (b"ref", &[], "reference name", "Use `ref sensor: Sensor;`."),
393 (b"port", &[], "port name", "Use `port power: PowerPort;`."),
394 (
395 b"attribute",
396 &[],
397 "attribute name",
398 "Use `attribute mass: MassValue;`.",
399 ),
400 (b"in", &[], "input name", "Use `in speed: Real;`."),
401 (b"out", &[], "output name", "Use `out result: Real;`."),
402 (
403 b"perform",
404 &[b"action"],
405 "action name",
406 "Use `perform action run: Runner;`.",
407 ),
408 (b"return", &[], "return name", "Use `return result: Real;`."),
409 ];
410
411 let allow_anonymous_requirement_params = scope_label == "requirement body";
412 for (keyword, trailing, missing_what, suggestion) in cases {
413 if allow_anonymous_requirement_params
414 && (keyword == b"subject" || keyword == b"actor")
415 && starts_with_missing_name_after_keyword(fragment, keyword, trailing)
416 {
417 continue;
419 }
420 if starts_with_missing_name_after_keyword(fragment, keyword, trailing) {
421 return Some((
422 "missing_member_name",
423 format!("expected {missing_what} before ':'"),
424 format!("{missing_what} before ':'"),
425 suggestion.to_string(),
426 ));
427 }
428 }
429 None
430}
431
432fn missing_type_diagnostic(fragment: &[u8]) -> Option<(&'static str, String, String, String)> {
433 #[allow(clippy::type_complexity)]
434 let cases: &[(&[u8], &[&[u8]], &str)] = &[
435 (b"subject", &[], "subject type"),
436 (b"actor", &[], "actor type"),
437 (b"state", &[], "state type"),
438 (b"part", &[], "part type"),
439 (b"ref", &[], "reference type"),
440 (b"port", &[], "port type"),
441 (b"attribute", &[], "attribute type"),
442 (b"occurrence", &[], "occurrence type"),
443 (b"in", &[], "input type"),
444 (b"out", &[], "output type"),
445 (b"perform", &[b"action"], "action type"),
446 (b"return", &[], "return type"),
447 ];
448
449 for &(keyword, trailing, missing_what) in cases {
450 if starts_with_missing_type_after_keyword(fragment, keyword, trailing) {
451 let keyword_label = String::from_utf8_lossy(keyword);
452 let sample_name = if keyword == &b"subject"[..] {
453 "laptop"
454 } else if keyword == &b"actor"[..] {
455 "user"
456 } else if keyword == &b"state"[..] {
457 "ready"
458 } else if keyword == &b"part"[..] {
459 "wheel"
460 } else if keyword == &b"ref"[..] {
461 "sensor"
462 } else if keyword == &b"port"[..] {
463 "power"
464 } else if keyword == &b"attribute"[..] {
465 "mass"
466 } else if keyword == &b"occurrence"[..] {
467 "event"
468 } else if keyword == &b"in"[..] {
469 "speed"
470 } else if keyword == &b"out"[..] {
471 "result"
472 } else if keyword == &b"perform"[..] {
473 "run"
474 } else if keyword == &b"return"[..] {
475 "result"
476 } else {
477 "member"
478 };
479 let sample_type = if keyword == &b"subject"[..] {
480 "Laptop"
481 } else if keyword == &b"actor"[..] {
482 "User"
483 } else if keyword == &b"state"[..] {
484 "Mode"
485 } else if keyword == &b"part"[..] {
486 "Wheel"
487 } else if keyword == &b"ref"[..] {
488 "Sensor"
489 } else if keyword == &b"port"[..] {
490 "PowerPort"
491 } else if keyword == &b"attribute"[..] {
492 "MassValue"
493 } else if keyword == &b"occurrence"[..] {
494 "Event"
495 } else if keyword == &b"in"[..] || keyword == &b"out"[..] {
496 "Real"
497 } else if keyword == &b"perform"[..] {
498 "Runner"
499 } else if keyword == &b"return"[..] {
500 "Real"
501 } else {
502 "Type"
503 };
504 let suggestion = if keyword == &b"perform"[..] {
505 format!("Use `perform action {sample_name}: {sample_type};`.")
506 } else if keyword == &b"return"[..] {
507 format!("Use `return {sample_name}: {sample_type};`.")
508 } else {
509 format!("Use `{keyword_label} {sample_name}: {sample_type};`.")
510 };
511 return Some((
512 "missing_type_reference",
513 format!("expected {missing_what} after ':'"),
514 format!("{missing_what} after ':'"),
515 suggestion,
516 ));
517 }
518 }
519 None
520}
521
522fn invalid_expose_separator_diagnostic(
523 fragment: &[u8],
524) -> Option<(&'static str, String, String, String)> {
525 let mut fragment = trim_ascii_start(fragment);
526 if !lex::starts_with_keyword(fragment, b"expose") {
527 return None;
528 }
529 fragment = &fragment[b"expose".len()..];
530 while let Some(first) = fragment.first() {
531 if first.is_ascii_whitespace() {
532 fragment = &fragment[1..];
533 continue;
534 }
535 break;
536 }
537 if fragment.is_empty() {
538 return None;
539 }
540
541 let mut saw_dot = false;
542 let mut in_quoted_name = false;
543 for &b in fragment {
544 if b == b'\'' {
545 in_quoted_name = !in_quoted_name;
546 continue;
547 }
548 if in_quoted_name {
549 continue;
550 }
551 if matches!(b, b';' | b'[' | b'{' | b'}' | b'\n' | b'\r') {
552 break;
553 }
554 if b == b'.' {
555 saw_dot = true;
556 break;
557 }
558 }
559 if !saw_dot {
560 return None;
561 }
562
563 Some((
564 "invalid_qualified_name_separator",
565 "invalid qualified name in expose target: use '::' instead of '.'".to_string(),
566 "qualified name segments separated by '::'".to_string(),
567 "Replace '.' with '::' in the expose target (example: `expose A::B;`).".to_string(),
568 ))
569}
570
571fn invalid_requirement_short_name_syntax_diagnostic(
572 fragment: &[u8],
573) -> Option<(&'static str, String, String, String)> {
574 let fragment = trim_ascii_start(fragment);
575 if fragment.starts_with(b"requirement def") {
576 let mut rest = trim_ascii_start(&fragment[b"requirement def".len()..]);
577 if rest.starts_with(b"id") {
578 rest = trim_ascii_start(&rest[2..]);
579 if rest.first() == Some(&b'\'') || rest.first() == Some(&b'"') {
580 let quote = rest[0];
581 if let Some(close) = rest[1..].iter().position(|&b| b == quote) {
582 let req_id = String::from_utf8_lossy(&rest[1..1 + close]);
583 return Some((
584 "invalid_requirement_short_name_syntax",
585 format!(
586 "requirement definition uses non-standard `id '{req_id}'` syntax; use a short name in angle brackets"
587 ),
588 "short name in angle brackets after `requirement def`".to_string(),
589 format!(
590 "Use `requirement def <'{req_id}'> ...` instead of `requirement def id '{req_id}' ...`."
591 ),
592 ));
593 }
594 }
595 }
596 }
597
598 if fragment.first() == Some(&b'\'') || fragment.first() == Some(&b'"') {
600 let quote = fragment[0];
601 if let Some(close) = fragment[1..].iter().position(|&b| b == quote) {
602 let req_id = String::from_utf8_lossy(&fragment[1..1 + close]);
603 return Some((
604 "invalid_requirement_short_name_syntax",
605 format!(
606 "requirement ID `'{req_id}'` should use short-name syntax in angle brackets, not a separate `id` keyword"
607 ),
608 "short name in angle brackets after `requirement def`".to_string(),
609 format!("Use `requirement def <'{req_id}'> ...` instead of `requirement def id '{req_id}' ...`."),
610 ));
611 }
612 }
613 None
614}
615
616fn bare_feature_declaration_in_part_def_diagnostic(
617 fragment: &[u8],
618) -> Option<(&'static str, String, String, String)> {
619 let fragment = trim_ascii_start(fragment);
620 let feature_keywords: &[&[u8]] = &[
621 b"attribute",
622 b"part",
623 b"port",
624 b"item",
625 b"ref",
626 b"bind",
627 b"connection",
628 b"interface",
629 b"action",
630 b"state",
631 b"import",
632 b"doc",
633 b"comment",
634 b"constraint",
635 b"calc",
636 b"perform",
637 b"enum",
638 ];
639 if lex::starts_with_any_keyword(fragment, feature_keywords) {
640 return None;
641 }
642 let ident_end = fragment
643 .iter()
644 .position(|b| !b.is_ascii_alphanumeric() && *b != b'_')
645 .unwrap_or(fragment.len());
646 if ident_end == 0 || !fragment[0].is_ascii_alphabetic() {
647 return None;
648 }
649 let ident = String::from_utf8_lossy(&fragment[..ident_end]);
650 let rest = trim_ascii_start(&fragment[ident_end..]);
651 if !rest.starts_with(b":") {
652 return None;
653 }
654 if fragment.windows(3).any(|w| w == b":>>" || w == b":> " || w == b"::>")
655 || fragment.windows(8).any(|w| w == b" connect")
656 || fragment.windows(4).any(|w| w == b" to ")
657 {
658 return None;
659 }
660 let rest = trim_ascii_start(&rest[1..]);
661 let type_end = rest
662 .iter()
663 .position(|b| matches!(*b, b';' | b'{' | b'}' | b'\n' | b'\r' | b'['))
664 .unwrap_or(rest.len());
665 if type_end == 0 {
666 return None;
667 }
668 let type_name = String::from_utf8_lossy(&rest[..type_end]).trim().to_string();
669 let sample_ident = ident.to_lowercase();
670 Some((
671 "bare_feature_declaration_in_part_def",
672 format!("bare feature `{ident} : {type_name}` is not valid in a part definition body"),
673 "feature kind keyword such as `attribute`, `part`, or `port`".to_string(),
674 format!("Use `attribute {sample_ident} : {type_name};` (or `item` / `port` as appropriate)."),
675 ))
676}
677
678fn starts_declaration_header(fragment: &[u8], prefix: &[u8]) -> bool {
679 if !fragment.starts_with(prefix) {
680 return false;
681 }
682 let rest = &fragment[prefix.len()..];
683 rest.is_empty()
684 || rest[0].is_ascii_whitespace()
685 || rest[0] == b'<'
686 || rest[0] == b';'
687 || rest[0] == b'{'
688}
689
690fn missing_semicolon_or_body_diagnostic(
691 fragment: &[u8],
692) -> Option<(&'static str, String, String, String)> {
693 if let Some(diag) = invalid_requirement_short_name_syntax_diagnostic(fragment) {
694 return Some(diag);
695 }
696 let fragment = trim_ascii_start(fragment);
697 let cases: &[(&[u8], &str, &str)] = &[
698 (
699 b"action def",
700 "action definition",
701 "Use `action def Run;` or `action def Run { ... }`.",
702 ),
703 (
704 b"part def",
705 "part definition",
706 "Use `part def Wheel;` or `part def Wheel { ... }`.",
707 ),
708 (
709 b"requirement def",
710 "requirement definition",
711 "Use `requirement def R;` or `requirement def R { ... }`.",
712 ),
713 (
714 b"state def",
715 "state definition",
716 "Use `state def Ready;` or `state def Ready { ... }`.",
717 ),
718 (
719 b"view",
720 "view declaration",
721 "Use `view structure: GeneralView;` or `view structure: GeneralView { ... }`.",
722 ),
723 (
724 b"rendering def",
725 "rendering definition",
726 "Use `rendering def Diagram;` or `rendering def Diagram { ... }`.",
727 ),
728 ];
729
730 for (prefix, label, suggestion) in cases {
731 if starts_declaration_header(fragment, prefix) {
732 return Some((
733 "missing_body_or_semicolon",
734 format!("expected ';' or '{{' after {label} header"),
735 "';' or '{' after declaration header".to_string(),
736 suggestion.to_string(),
737 ));
738 }
739 }
740 None
741}
742
743fn definition_declaration_header(fragment: &[u8]) -> &[u8] {
745 let fragment = trim_ascii_start(fragment);
746 let end = fragment
747 .iter()
748 .position(|&b| b == b'{' || b == b';')
749 .unwrap_or(fragment.len());
750 trim_ascii_end(&fragment[..end])
751}
752
753fn definition_header_has_invalid_specialization_colon(header: &[u8]) -> bool {
755 let header = trim_ascii_start(header);
756 let prefixes: &[(&[u8], &str)] = &[(b"part def", "part def"), (b"port def", "port def")];
757 for (prefix, _) in prefixes {
758 if !header.starts_with(prefix) {
759 continue;
760 }
761 let mut rest = trim_ascii_start(&header[prefix.len()..]);
762 if rest.starts_with(b"<") {
763 if let Some(close) = rest[1..].iter().position(|&b| b == b'>') {
764 rest = trim_ascii_start(&rest[close + 2..]);
765 } else {
766 return false;
767 }
768 }
769 while !rest.is_empty() && !rest[0].is_ascii_whitespace() && rest[0] != b':' {
770 rest = &rest[1..];
771 }
772 rest = trim_ascii_start(rest);
773 if rest.starts_with(b":>") || rest.starts_with(b":>>") {
774 return false;
775 }
776 if rest.starts_with(b"specializes") {
777 return false;
778 }
779 if rest.first() == Some(&b':') {
780 return true;
781 }
782 }
783 false
784}
785
786fn invalid_typing_operator_diagnostic(
787 fragment: &[u8],
788) -> Option<(&'static str, String, String, String)> {
789 let header = definition_declaration_header(fragment);
790 if !definition_header_has_invalid_specialization_colon(header) {
791 return None;
792 }
793 let (label, suggestion) = if header.starts_with(b"port def") {
794 (
795 "port definition specialization",
796 "Use `port def PowerPort :> BasePort;` when specializing a definition.",
797 )
798 } else {
799 (
800 "part definition specialization",
801 "Use `part def Vehicle :> BaseVehicle;` when specializing a definition.",
802 )
803 };
804 Some((
805 "invalid_typing_operator",
806 format!("invalid typing operator in {label}: use ':>' instead of ':'"),
807 "':>' specialization operator".to_string(),
808 suggestion.to_string(),
809 ))
810}
811
812fn missing_expression_after_operator_diagnostic(
813 fragment: &[u8],
814) -> Option<(&'static str, String, String, String)> {
815 let fragment = trim_ascii_start(fragment);
816 let cases: &[(&[u8], &str, &str)] = &[
817 (
818 b"bind",
819 "binding expression after '='",
820 "Use `bind x = y;`.",
821 ),
822 (
823 b"assign",
824 "assignment expression after ':='",
825 "Use `assign x := y;`.",
826 ),
827 (
828 b"first",
829 "target after 'then'",
830 "Use `first start then finish;`.",
831 ),
832 (
833 b"flow",
834 "target after 'to'",
835 "Use `flow source to target;`.",
836 ),
837 (
838 b"satisfy",
839 "target after 'by'",
840 "Use `satisfy Req by implementation;`.",
841 ),
842 ];
843
844 for (keyword, expected, suggestion) in cases {
845 if !lex::starts_with_keyword(fragment, keyword) {
846 continue;
847 }
848 let text = String::from_utf8_lossy(fragment);
849 if text.contains("= ;") || text.trim_end().ends_with('=') {
850 return Some((
851 "missing_expression_after_operator",
852 "expected expression after '='".to_string(),
853 expected.to_string(),
854 suggestion.to_string(),
855 ));
856 }
857 if text.contains(":= ;") || text.trim_end().ends_with(":=") {
858 return Some((
859 "missing_expression_after_operator",
860 "expected expression after ':='".to_string(),
861 expected.to_string(),
862 suggestion.to_string(),
863 ));
864 }
865 if text.contains(" then ;") || text.trim_end().ends_with(" then") {
866 return Some((
867 "missing_expression_after_operator",
868 "expected target after 'then'".to_string(),
869 expected.to_string(),
870 suggestion.to_string(),
871 ));
872 }
873 if text.contains(" to ;") || text.trim_end().ends_with(" to") {
874 return Some((
875 "missing_expression_after_operator",
876 "expected target after 'to'".to_string(),
877 expected.to_string(),
878 suggestion.to_string(),
879 ));
880 }
881 if text.contains(" by ;") || text.trim_end().ends_with(" by") {
882 return Some((
883 "missing_expression_after_operator",
884 "expected target after 'by'".to_string(),
885 expected.to_string(),
886 suggestion.to_string(),
887 ));
888 }
889 }
890 None
891}
892
893fn invalid_unit_reference_diagnostic(
894 fragment: &[u8],
895) -> Option<(&'static str, String, String, String)> {
896 let fragment = trim_ascii_start(fragment);
897 let text = String::from_utf8_lossy(fragment);
898 if !(text.contains('[') && text.contains(']')) {
899 return None;
900 }
901
902 if text.contains("[]") || text.contains("[ ]") {
903 return Some((
904 "invalid_unit_reference",
905 "expected unit name inside '[ ]'".to_string(),
906 "unit name inside '[ ]'".to_string(),
907 "Use a concrete unit such as `1750 [kg]`.".to_string(),
908 ));
909 }
910
911 if text.contains("[;")
912 || text.contains("[ ;")
913 || text.contains("[)")
914 || text.contains("[ ]")
915 || text.contains("[,")
916 {
917 return Some((
918 "invalid_unit_reference",
919 "invalid unit expression inside '[ ]'".to_string(),
920 "unit name inside '[ ]'".to_string(),
921 "Use a unit symbol or qualified unit name (example: `[kg]` or `[SI::kg]`).".to_string(),
922 ));
923 }
924
925 None
926}
927
928fn unexpected_keyword_in_scope_diagnostic(
929 fragment: &[u8],
930 starters: &[&[u8]],
931 scope_label: &str,
932) -> Option<(&'static str, String, String, String)> {
933 let fragment = trim_ascii_start(fragment);
934 if fragment.is_empty() || fragment.starts_with(b"#") || fragment.starts_with(b"@") {
935 return None;
936 }
937 let keyword_end = fragment
938 .iter()
939 .position(|b| !b.is_ascii_alphanumeric() && *b != b'_')
940 .unwrap_or(fragment.len());
941 if keyword_end == 0 {
942 return None;
943 }
944 let keyword = &fragment[..keyword_end];
945 if lex::starts_with_any_keyword(keyword, starters) {
946 return None;
947 }
948 let keyword_text = String::from_utf8_lossy(keyword);
949 Some((
950 "unexpected_keyword_in_scope",
951 format!("unexpected keyword `{keyword_text}` in {scope_label}"),
952 format!("valid {scope_label} element"),
953 format!("Replace `{keyword_text}` with a valid {scope_label} member or remove it."),
954 ))
955}
956
957fn invalid_bare_identifier_in_body_diagnostic(
958 fragment: &[u8],
959 scope_label: &str,
960) -> Option<(&'static str, String, String, String)> {
961 let is_action = scope_label.contains("action body");
962 let is_state = scope_label.contains("state body");
963 if !is_action && !is_state {
964 return None;
965 }
966
967 let fragment = trim_ascii_start(fragment);
968 let ident_end = fragment
969 .iter()
970 .position(|b| !b.is_ascii_alphanumeric() && *b != b'_')
971 .unwrap_or(fragment.len());
972 if ident_end == 0 || !fragment[0].is_ascii_alphabetic() {
973 return None;
974 }
975
976 let ident = &fragment[..ident_end];
977 let rest = trim_ascii_start(&fragment[ident_end..]);
978 if !(rest.starts_with(b";")
979 || rest.starts_with(b"}")
980 || rest.starts_with(b"\n")
981 || rest.starts_with(b"\r"))
982 {
983 return None;
984 }
985
986 let ident_text = String::from_utf8_lossy(ident);
987 if is_action {
988 Some((
989 "invalid_bare_identifier_in_action_body",
990 format!("bare identifier `{ident_text}` is not a valid action body member"),
991 "action body member such as `perform`, `bind`, `in`, or `out`".to_string(),
992 format!(
993 "Use an explicit action-body form, for example `perform {ident_text};`, `bind ... = ...;`, or an `in`/`out` parameter declaration."
994 ),
995 ))
996 } else {
997 Some((
998 "invalid_bare_identifier_in_state_body",
999 format!("bare identifier `{ident_text}` is not a valid state body member"),
1000 "state body member such as `entry`, `transition`, `then`, `state`, or `ref`"
1001 .to_string(),
1002 format!(
1003 "Use an explicit state-body form, for example `then {ident_text};`, `transition ...;`, or a nested `state` member."
1004 ),
1005 ))
1006 }
1007}
1008
1009fn unexpected_closing_brace_parse_error(input: Input<'_>) -> ParseError {
1010 ParseError::new("unexpected closing '}'")
1011 .with_location(
1012 input.location_offset(),
1013 input.location_line(),
1014 input.get_column(),
1015 )
1016 .with_length(1)
1017 .with_code("unexpected_closing_brace")
1018 .with_expected("valid declaration or end of current body")
1019 .with_found("}")
1020 .with_suggestion("Remove this '}' or add the missing opening '{' before it.")
1021 .with_severity(DiagnosticSeverity::Error)
1022 .with_category(DiagnosticCategory::ParseError)
1023}
1024
1025fn missing_closing_brace_error(bytes: &[u8], input: Input<'_>) -> Option<ParseError> {
1026 if !input.fragment().is_empty() {
1027 return None;
1028 }
1029 let consumed = &bytes[..input.location_offset().min(bytes.len())];
1030 let opens = consumed.iter().filter(|&&b| b == b'{').count();
1031 let closes = consumed.iter().filter(|&&b| b == b'}').count();
1032 if opens <= closes {
1033 return None;
1034 }
1035 Some(missing_closing_brace_error_at_eof(consumed))
1036}
1037
1038fn missing_closing_brace_error_at_eof(bytes: &[u8]) -> ParseError {
1039 let (line, column) = eof_line_column(bytes);
1040 ParseError::new("missing closing '}'")
1041 .with_location(bytes.len(), line, column)
1042 .with_length(1)
1043 .with_code("missing_closing_brace")
1044 .with_expected("'}'")
1045 .with_suggestion("Add '}' to close the open body.")
1046 .with_category(DiagnosticCategory::ParseError)
1047}
1048
1049fn extra_closing_brace_at_eof(bytes: &[u8]) -> Option<ParseError> {
1050 let opens = bytes.iter().filter(|&&b| b == b'{').count();
1051 let closes = bytes.iter().filter(|&&b| b == b'}').count();
1052 if closes <= opens {
1053 return None;
1054 }
1055 let mut last_brace: Option<(usize, u32, usize)> = None;
1056 let mut line = 1u32;
1057 let mut column = 1usize;
1058 for (offset, &b) in bytes.iter().enumerate() {
1059 if b == b'}' {
1060 last_brace = Some((offset, line, column));
1061 }
1062 if b == b'\n' {
1063 line += 1;
1064 column = 1;
1065 } else {
1066 column += 1;
1067 }
1068 }
1069 let (offset, line, column) = last_brace?;
1070 Some(
1071 ParseError::new("unexpected closing '}' at end of file")
1072 .with_location(offset, line, column)
1073 .with_length(1)
1074 .with_code("unexpected_closing_brace")
1075 .with_expected("end of file or valid declaration")
1076 .with_found("}")
1077 .with_suggestion("Remove this extra '}' or add the missing opening '{' earlier in the file.")
1078 .with_category(DiagnosticCategory::ParseError),
1079 )
1080}
1081
1082fn category_from_code(code: &str) -> DiagnosticCategory {
1083 if code == "unsupported_annotation_syntax" {
1084 DiagnosticCategory::UnsupportedGrammarForm
1085 } else if code == "unresolved_symbol" {
1086 DiagnosticCategory::UnresolvedSymbol
1087 } else {
1088 DiagnosticCategory::ParseError
1089 }
1090}
1091
1092fn has_unclosed_brace(bytes: &[u8]) -> bool {
1093 let opens = bytes.iter().filter(|&&b| b == b'{').count();
1094 let closes = bytes.iter().filter(|&&b| b == b'}').count();
1095 opens > closes
1096}
1097
1098fn eof_line_column(bytes: &[u8]) -> (u32, usize) {
1099 let mut line = 1u32;
1100 let mut column = 1usize;
1101 for &b in bytes {
1102 if b == b'\n' {
1103 line += 1;
1104 column = 1;
1105 } else {
1106 column += 1;
1107 }
1108 }
1109 (line, column)
1110}
1111
1112pub(crate) fn build_recovery_error_node(
1113 input: Input<'_>,
1114 starters: &[&[u8]],
1115 scope_label: &str,
1116 generic_code: &str,
1117) -> ParseErrorNode {
1118 build_recovery_error_node_from_span(input, input, starters, scope_label, generic_code)
1119}
1120
1121enum RecoveryClassification {
1122 MissingMemberName {
1123 code: String,
1124 message: String,
1125 expected: String,
1126 suggestion: String,
1127 },
1128 MissingTypeReference {
1129 code: String,
1130 message: String,
1131 expected: String,
1132 suggestion: String,
1133 },
1134 InvalidQualifiedNameSeparator {
1135 code: String,
1136 message: String,
1137 expected: String,
1138 suggestion: String,
1139 },
1140 MissingBodyOrSemicolon {
1141 code: String,
1142 message: String,
1143 expected: String,
1144 suggestion: String,
1145 },
1146 BareFeatureDeclarationInPartDef {
1147 code: String,
1148 message: String,
1149 expected: String,
1150 suggestion: String,
1151 },
1152 MissingExpressionAfterOperator {
1153 code: String,
1154 message: String,
1155 expected: String,
1156 suggestion: String,
1157 },
1158 InvalidUnitReference {
1159 code: String,
1160 message: String,
1161 expected: String,
1162 suggestion: String,
1163 },
1164 InvalidTypingOperator {
1165 code: String,
1166 message: String,
1167 expected: String,
1168 suggestion: String,
1169 },
1170 InvalidBareIdentifierInBody {
1171 code: String,
1172 message: String,
1173 expected: String,
1174 suggestion: String,
1175 },
1176 UnexpectedKeywordInScope {
1177 code: String,
1178 message: String,
1179 expected: String,
1180 suggestion: String,
1181 },
1182 MissingSemicolon,
1183 UnsupportedAnnotation,
1184 Unexpected,
1185}
1186
1187fn trim_ascii_end(mut fragment: &[u8]) -> &[u8] {
1188 while let Some(last) = fragment.last() {
1189 if last.is_ascii_whitespace() {
1190 fragment = &fragment[..fragment.len() - 1];
1191 } else {
1192 break;
1193 }
1194 }
1195 fragment
1196}
1197
1198fn classify_recovery(
1199 input: Input<'_>,
1200 recovery_end: Input<'_>,
1201 starters: &[&[u8]],
1202 scope_label: &str,
1203) -> RecoveryClassification {
1204 let trimmed = trim_ascii_start(input.fragment());
1205
1206 if let Some((code, message, expected, suggestion)) = missing_name_diagnostic(trimmed, scope_label)
1207 {
1208 return RecoveryClassification::MissingMemberName {
1209 code: code.to_string(),
1210 message,
1211 expected,
1212 suggestion,
1213 };
1214 }
1215
1216 if let Some((code, message, expected, suggestion)) = missing_type_diagnostic(trimmed) {
1217 return RecoveryClassification::MissingTypeReference {
1218 code: code.to_string(),
1219 message,
1220 expected,
1221 suggestion,
1222 };
1223 }
1224
1225 if let Some((code, message, expected, suggestion)) =
1226 invalid_expose_separator_diagnostic(trimmed)
1227 {
1228 return RecoveryClassification::InvalidQualifiedNameSeparator {
1229 code: code.to_string(),
1230 message,
1231 expected,
1232 suggestion,
1233 };
1234 }
1235
1236 if let Some((code, message, expected, suggestion)) = invalid_typing_operator_diagnostic(trimmed)
1237 {
1238 return RecoveryClassification::InvalidTypingOperator {
1239 code: code.to_string(),
1240 message,
1241 expected,
1242 suggestion,
1243 };
1244 }
1245
1246 if let Some((code, message, expected, suggestion)) =
1247 missing_expression_after_operator_diagnostic(trimmed)
1248 {
1249 return RecoveryClassification::MissingExpressionAfterOperator {
1250 code: code.to_string(),
1251 message,
1252 expected,
1253 suggestion,
1254 };
1255 }
1256
1257 if let Some((code, message, expected, suggestion)) = invalid_unit_reference_diagnostic(trimmed)
1258 {
1259 return RecoveryClassification::InvalidUnitReference {
1260 code: code.to_string(),
1261 message,
1262 expected,
1263 suggestion,
1264 };
1265 }
1266
1267 if scope_label.contains("part definition body") {
1268 if let Some((code, message, expected, suggestion)) =
1269 bare_feature_declaration_in_part_def_diagnostic(trimmed)
1270 {
1271 return RecoveryClassification::BareFeatureDeclarationInPartDef {
1272 code: code.to_string(),
1273 message,
1274 expected,
1275 suggestion,
1276 };
1277 }
1278 }
1279
1280 if let Some((code, message, expected, suggestion)) =
1281 missing_semicolon_or_body_diagnostic(trimmed)
1282 {
1283 return RecoveryClassification::MissingBodyOrSemicolon {
1284 code: code.to_string(),
1285 message,
1286 expected,
1287 suggestion,
1288 };
1289 }
1290
1291 let consumed_len = recovery_end
1292 .location_offset()
1293 .saturating_sub(input.location_offset())
1294 .min(input.fragment().len());
1295 let raw_consumed = &input.fragment()[..consumed_len];
1296 let consumed = trim_ascii_end(raw_consumed);
1297 let recovered_to_boundary = recovery_end.location_offset() > input.location_offset() && {
1298 let (next, _) = lex::ws_and_comments(recovery_end).unwrap_or((recovery_end, ()));
1299 next.fragment().is_empty()
1300 || next.fragment().starts_with(b"}")
1301 || lex::starts_with_any_keyword(next.fragment(), starters)
1302 };
1303
1304 let consumed_has_newline = raw_consumed.contains(&b'\n') || raw_consumed.contains(&b'\r');
1305 let first_line_end = consumed
1306 .iter()
1307 .position(|b| matches!(*b, b'\n' | b'\r'))
1308 .unwrap_or(consumed.len());
1309 let first_line = trim_ascii_end(&consumed[..first_line_end]);
1310 let consumed_has_delimiters = consumed
1311 .iter()
1312 .any(|b| matches!(*b, b'{' | b'}' | b'(' | b')' | b'[' | b']'));
1313 let consumed_ends_incomplete = first_line.last().is_some_and(|b| {
1314 matches!(
1315 *b,
1316 b':' | b'=' | b',' | b'.' | b'+' | b'-' | b'*' | b'/' | b'>' | b'<' | b'|'
1317 )
1318 });
1319 let first_line_has_semicolon = first_line.contains(&b';');
1320 if recovered_to_boundary
1321 && lex::starts_with_any_keyword(trimmed, starters)
1322 && (consumed_has_newline || recovery_end.fragment().starts_with(b"}"))
1323 && !consumed.is_empty()
1324 && !consumed_has_delimiters
1325 && !consumed_ends_incomplete
1326 && !first_line_has_semicolon
1327 {
1328 return RecoveryClassification::MissingSemicolon;
1329 }
1330
1331 if lex::starts_with_keyword(trimmed, b"#") || lex::starts_with_keyword(trimmed, b"@") {
1332 return RecoveryClassification::UnsupportedAnnotation;
1333 }
1334
1335 if let Some((code, message, expected, suggestion)) =
1336 invalid_bare_identifier_in_body_diagnostic(trimmed, scope_label)
1337 {
1338 return RecoveryClassification::InvalidBareIdentifierInBody {
1339 code: code.to_string(),
1340 message,
1341 expected,
1342 suggestion,
1343 };
1344 }
1345
1346 if let Some((code, message, expected, suggestion)) =
1347 unexpected_keyword_in_scope_diagnostic(trimmed, starters, scope_label)
1348 {
1349 return RecoveryClassification::UnexpectedKeywordInScope {
1350 code: code.to_string(),
1351 message,
1352 expected,
1353 suggestion,
1354 };
1355 }
1356
1357 RecoveryClassification::Unexpected
1358}
1359
1360pub(crate) fn build_recovery_error_node_from_span(
1361 input: Input<'_>,
1362 recovery_end: Input<'_>,
1363 starters: &[&[u8]],
1364 scope_label: &str,
1365 generic_code: &str,
1366) -> ParseErrorNode {
1367 match classify_recovery(input, recovery_end, starters, scope_label) {
1368 RecoveryClassification::MissingMemberName {
1369 code,
1370 message,
1371 expected,
1372 suggestion,
1373 }
1374 | RecoveryClassification::MissingTypeReference {
1375 code,
1376 message,
1377 expected,
1378 suggestion,
1379 }
1380 | RecoveryClassification::InvalidQualifiedNameSeparator {
1381 code,
1382 message,
1383 expected,
1384 suggestion,
1385 }
1386 | RecoveryClassification::MissingBodyOrSemicolon {
1387 code,
1388 message,
1389 expected,
1390 suggestion,
1391 }
1392 | RecoveryClassification::BareFeatureDeclarationInPartDef {
1393 code,
1394 message,
1395 expected,
1396 suggestion,
1397 }
1398 | RecoveryClassification::MissingExpressionAfterOperator {
1399 code,
1400 message,
1401 expected,
1402 suggestion,
1403 }
1404 | RecoveryClassification::InvalidUnitReference {
1405 code,
1406 message,
1407 expected,
1408 suggestion,
1409 }
1410 | RecoveryClassification::InvalidTypingOperator {
1411 code,
1412 message,
1413 expected,
1414 suggestion,
1415 }
1416 | RecoveryClassification::InvalidBareIdentifierInBody {
1417 code,
1418 message,
1419 expected,
1420 suggestion,
1421 }
1422 | RecoveryClassification::UnexpectedKeywordInScope {
1423 code,
1424 message,
1425 expected,
1426 suggestion,
1427 } => ParseErrorNode {
1428 message,
1429 code,
1430 expected: Some(expected),
1431 found: recovery_found_snippet_from_span(input, recovery_end),
1432 suggestion: Some(suggestion),
1433 category: Some(DiagnosticCategory::ParseError),
1434 },
1435 RecoveryClassification::MissingSemicolon => ParseErrorNode {
1436 message: "missing semicolon before next declaration".to_string(),
1437 code: "missing_semicolon".to_string(),
1438 expected: Some("';'".to_string()),
1439 found: recovery_found_snippet_from_span(input, recovery_end),
1440 suggestion: Some("Insert ';' before this declaration.".to_string()),
1441 category: Some(DiagnosticCategory::ParseError),
1442 },
1443 RecoveryClassification::UnsupportedAnnotation => ParseErrorNode {
1444 message: format!("unsupported annotation syntax in {scope_label}"),
1445 code: "unsupported_annotation_syntax".to_string(),
1446 expected: Some(format!("valid {scope_label} element")),
1447 found: recovery_found_snippet_from_span(input, recovery_end),
1448 suggestion: Some(
1449 "Remove this annotation or extend the parser to support annotated declarations."
1450 .to_string(),
1451 ),
1452 category: Some(DiagnosticCategory::UnsupportedGrammarForm),
1453 },
1454 RecoveryClassification::Unexpected => ParseErrorNode {
1455 message: format!("unexpected token in {scope_label}"),
1456 code: generic_code.to_string(),
1457 expected: Some(format!("valid {scope_label} element")),
1458 found: recovery_found_snippet_from_span(input, recovery_end),
1459 suggestion: Some(format!("Fix this {scope_label} member and re-run parsing.")),
1460 category: Some(DiagnosticCategory::ParseError),
1461 },
1462 }
1463}
1464
1465fn parse_error_from_recovery_node(span: &crate::ast::Span, node: &ParseErrorNode) -> ParseError {
1466 let mut err = ParseError::new(node.message.clone())
1467 .with_location(span.offset, span.line, span.column)
1468 .with_length(span.len.max(1))
1469 .with_code(node.code.clone())
1470 .with_category(
1471 node.category
1472 .unwrap_or_else(|| category_from_code(node.code.as_str())),
1473 );
1474 let severity = if node.code == "unsupported_annotation_syntax" {
1475 DiagnosticSeverity::Warning
1476 } else {
1477 DiagnosticSeverity::Error
1478 };
1479 err = err.with_severity(severity);
1480 if let Some(expected) = &node.expected {
1481 err = err.with_expected(expected.clone());
1482 }
1483 if let Some(found) = &node.found {
1484 err = err.with_found(found.clone());
1485 }
1486 if let Some(suggestion) = &node.suggestion {
1487 err = err.with_suggestion(suggestion.clone());
1488 }
1489 err
1490}
1491
1492fn diagnostic_specificity(err: &ParseError) -> u8 {
1493 match err.code.as_deref() {
1494 Some("missing_member_name")
1495 | Some("missing_type_reference")
1496 | Some("invalid_qualified_name_separator")
1497 | Some("invalid_typing_operator")
1498 | Some("missing_expression_after_operator")
1499 | Some("invalid_unit_reference")
1500 | Some("missing_body_or_semicolon")
1501 | Some("invalid_requirement_short_name_syntax")
1502 | Some("bare_feature_declaration_in_part_def")
1503 | Some("missing_semicolon")
1504 | Some("unexpected_closing_brace")
1505 | Some("missing_closing_brace")
1506 | Some("unsupported_annotation_syntax")
1507 | Some("invalid_bare_identifier_in_action_body")
1508 | Some("invalid_bare_identifier_in_state_body")
1509 | Some("recovery_cascade_suppressed")
1510 | Some("unexpected_keyword_in_scope") => 5,
1511 Some("illegal_top_level_definition") => 4,
1512 Some(code) if code.starts_with("recovered_") => 2,
1513 Some("expected_end_of_input") | Some("expected_keyword") => 1,
1514 _ => 3,
1515 }
1516}
1517
1518fn suppress_redundant_closing_brace_errors(errors: Vec<ParseError>) -> Vec<ParseError> {
1521 let lines_with_block_error: std::collections::HashSet<u32> = errors
1522 .iter()
1523 .filter(|e| e.code.as_deref() != Some("unexpected_closing_brace"))
1524 .filter_map(|e| e.line)
1525 .filter(|line| {
1526 errors.iter().any(|other| {
1527 other.line == Some(*line)
1528 && other
1529 .found
1530 .as_deref()
1531 .is_some_and(|f| f.contains('{') && f.contains('}'))
1532 })
1533 })
1534 .collect();
1535
1536 errors
1537 .into_iter()
1538 .filter(|e| {
1539 if e.code.as_deref() != Some("unexpected_closing_brace") {
1540 return true;
1541 }
1542 e.line
1543 .map(|line| !lines_with_block_error.contains(&line))
1544 .unwrap_or(true)
1545 })
1546 .collect()
1547}
1548
1549fn dedup_errors(mut errors: Vec<ParseError>) -> Vec<ParseError> {
1550 errors.sort_by_key(|e| {
1551 (
1552 e.offset.unwrap_or(usize::MAX),
1553 e.line.unwrap_or(u32::MAX),
1554 e.column.unwrap_or(usize::MAX),
1555 std::cmp::Reverse(diagnostic_specificity(e)),
1556 )
1557 });
1558
1559 let mut deduped = Vec::new();
1560 for err in errors {
1561 let duplicate = deduped.iter().any(|existing: &ParseError| {
1562 let same_start = existing.offset == err.offset
1563 && existing.line == err.line
1564 && existing.column == err.column;
1565 let same_found = existing.found == err.found;
1566 let existing_specificity = diagnostic_specificity(existing);
1567 let err_specificity = diagnostic_specificity(&err);
1568 same_start
1569 && (same_found || existing.code == err.code)
1570 && existing_specificity >= err_specificity
1571 });
1572 if !duplicate {
1573 deduped.push(err);
1574 }
1575 }
1576
1577 deduped.sort_by_key(|e| (e.offset.unwrap_or(usize::MAX), e.line.unwrap_or(u32::MAX)));
1578 deduped
1579}
1580
1581fn is_cascade_candidate(err: &ParseError) -> bool {
1582 matches!(
1583 err.code.as_deref(),
1584 Some("missing_semicolon") | Some("missing_body_or_semicolon")
1585 ) || err
1586 .code
1587 .as_deref()
1588 .is_some_and(|code| code.starts_with("recovered_"))
1589}
1590
1591fn cascade_family(err: &ParseError) -> Option<&str> {
1592 match err.code.as_deref() {
1593 Some("missing_semicolon") => Some("missing_semicolon"),
1594 Some("missing_body_or_semicolon") => Some("missing_body_or_semicolon"),
1595 Some(code) if code.starts_with("recovered_") => Some("recovered"),
1596 _ => None,
1597 }
1598}
1599
1600const MAX_CASCADE_LINE_DISTANCE: u32 = 50;
1601
1602fn make_cascade_summary(run: &[ParseError]) -> Option<ParseError> {
1603 let summary_anchor = run.first()?;
1604 let suppressed = run.len().saturating_sub(1);
1605 let family = cascade_family(summary_anchor).unwrap_or("recovery");
1606 let mut err = ParseError::new(format!(
1607 "suppressed {suppressed} cascading {family} diagnostic{} after earlier recovery errors",
1608 if suppressed == 1 { "" } else { "s" }
1609 ))
1610 .with_location(
1611 summary_anchor.offset?,
1612 summary_anchor.line?,
1613 summary_anchor.column?,
1614 )
1615 .with_length(summary_anchor.length.unwrap_or(1).max(1))
1616 .with_code("recovery_cascade_suppressed")
1617 .with_expected("fix the first syntax error in this body")
1618 .with_suggestion(
1619 "Fix the earliest diagnostic in this body first; later syntax errors may be cascades.",
1620 )
1621 .with_severity(DiagnosticSeverity::Warning)
1622 .with_category(DiagnosticCategory::ParseError);
1623 if let Some(found) = &summary_anchor.found {
1624 err = err.with_found(found.clone());
1625 }
1626 Some(err)
1627}
1628
1629fn suppress_diagnostic_cascades(errors: Vec<ParseError>) -> Vec<ParseError> {
1630 const MAX_UNSUMMARIZED_CASCADE: usize = 1;
1631
1632 let mut output = Vec::new();
1633 let mut run: Vec<ParseError> = Vec::new();
1634
1635 let flush_run = |run: &mut Vec<ParseError>, output: &mut Vec<ParseError>| {
1636 if run.is_empty() {
1637 return;
1638 }
1639 if run.len() <= MAX_UNSUMMARIZED_CASCADE {
1640 output.append(run);
1641 } else {
1642 let primary_offset = run.first().and_then(|e| e.offset);
1643 if let Some(mut primary) = run.first().cloned() {
1644 primary.is_cascade = Some(false);
1645 output.push(primary);
1646 }
1647 for suppressed in run.iter().skip(MAX_UNSUMMARIZED_CASCADE) {
1648 let _ = primary_offset;
1649 let _ = suppressed;
1650 }
1651 if let Some(summary) = make_cascade_summary(run) {
1652 output.push(summary);
1653 }
1654 run.clear();
1655 }
1656 };
1657
1658 for err in errors {
1659 let continues_run = run.last().is_some_and(|previous| {
1660 is_cascade_candidate(&err)
1661 && cascade_family(previous) == cascade_family(&err)
1662 && previous
1663 .line
1664 .zip(err.line)
1665 .is_some_and(|(a, b)| b <= a.saturating_add(MAX_CASCADE_LINE_DISTANCE))
1666 });
1667
1668 if is_cascade_candidate(&err) && (run.is_empty() || continues_run) {
1669 run.push(err);
1670 } else {
1671 flush_run(&mut run, &mut output);
1672 if is_cascade_candidate(&err) {
1673 run.push(err);
1674 } else {
1675 output.push(err);
1676 }
1677 }
1678 }
1679 flush_run(&mut run, &mut output);
1680 output.sort_by_key(|e| (e.offset.unwrap_or(usize::MAX), e.line.unwrap_or(u32::MAX)));
1681 output
1682}
1683
1684fn root_body_recovery_error(input: Input<'_>, scope: &str) -> ParseError {
1685 let (found, len) = fragment_to_found_snippet(input.fragment());
1686 let mut err = ParseError::new(format!(
1687 "could not parse {scope} body; skipped to next root element"
1688 ))
1689 .with_location(
1690 input.location_offset(),
1691 input.location_line(),
1692 input.get_column(),
1693 )
1694 .with_length(len.max(1))
1695 .with_code("recovered_root_body")
1696 .with_expected(format!("valid {scope} body"))
1697 .with_suggestion(
1698 "Fix the first syntax error in this body; later root-level diagnostics may be cascades.",
1699 )
1700 .with_severity(DiagnosticSeverity::Error)
1701 .with_category(DiagnosticCategory::ParseError);
1702 if !found.is_empty() {
1703 err = err.with_found(found);
1704 }
1705 err
1706}
1707
1708fn root_body_scope(fragment: &[u8]) -> Option<&'static str> {
1709 let fragment = trim_ascii_start(fragment);
1710 if lex::starts_with_keyword(fragment, b"package")
1711 || lex::starts_with_keyword(fragment, b"library")
1712 || lex::starts_with_keyword(fragment, b"standard")
1713 {
1714 Some("package")
1715 } else if lex::starts_with_keyword(fragment, b"namespace") {
1716 Some("namespace")
1717 } else {
1718 None
1719 }
1720}
1721
1722fn collect_requirement_body_errors(body: &RequirementDefBody, errors: &mut Vec<ParseError>) {
1723 if let RequirementDefBody::Brace { elements } = body {
1724 for element in elements {
1725 match &element.value {
1726 RequirementDefBodyElement::Error(n) => {
1727 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1728 }
1729 RequirementDefBodyElement::Frame(n) => {
1730 collect_requirement_body_errors(&n.value.body, errors)
1731 }
1732 _ => {}
1733 }
1734 }
1735 }
1736}
1737
1738fn collect_action_def_body_errors(body: &ActionDefBody, errors: &mut Vec<ParseError>) {
1739 if let ActionDefBody::Brace { elements } = body {
1740 for element in elements {
1741 if let ActionDefBodyElement::Error(n) = &element.value {
1742 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1743 }
1744 }
1745 }
1746}
1747
1748fn collect_action_usage_body_errors(body: &ActionUsageBody, errors: &mut Vec<ParseError>) {
1749 if let ActionUsageBody::Brace { elements } = body {
1750 for element in elements {
1751 match &element.value {
1752 ActionUsageBodyElement::Error(n) => {
1753 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1754 }
1755 ActionUsageBodyElement::ActionUsage(n) => {
1756 collect_action_usage_body_errors(&n.value.body, errors)
1757 }
1758 _ => {}
1759 }
1760 }
1761 }
1762}
1763
1764fn collect_state_body_errors(body: &StateDefBody, errors: &mut Vec<ParseError>) {
1765 if let StateDefBody::Brace { elements } = body {
1766 for element in elements {
1767 match &element.value {
1768 StateDefBodyElement::Error(n) => {
1769 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1770 }
1771 StateDefBodyElement::Entry(n) => collect_state_body_errors(&n.value.body, errors),
1772 StateDefBodyElement::RequirementUsage(n) => {
1773 collect_requirement_body_errors(&n.value.body, errors)
1774 }
1775 StateDefBodyElement::StateUsage(n) => {
1776 collect_state_body_errors(&n.value.body, errors)
1777 }
1778 _ => {}
1779 }
1780 }
1781 }
1782}
1783
1784fn collect_use_case_body_errors(body: &UseCaseDefBody, errors: &mut Vec<ParseError>) {
1785 if let UseCaseDefBody::Brace { elements } = body {
1786 for element in elements {
1787 if let UseCaseDefBodyElement::Error(n) = &element.value {
1788 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1789 }
1790 }
1791 }
1792}
1793
1794fn collect_constraint_body_errors(body: &ConstraintDefBody, errors: &mut Vec<ParseError>) {
1795 if let ConstraintDefBody::Brace { elements } = body {
1796 for element in elements {
1797 if let ConstraintDefBodyElement::Error(n) = &element.value {
1798 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1799 }
1800 }
1801 }
1802}
1803
1804fn collect_calc_body_errors(body: &CalcDefBody, errors: &mut Vec<ParseError>) {
1805 if let CalcDefBody::Brace { elements } = body {
1806 for element in elements {
1807 if let CalcDefBodyElement::Error(n) = &element.value {
1808 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1809 }
1810 }
1811 }
1812}
1813
1814fn collect_view_def_body_errors(body: &ViewDefBody, errors: &mut Vec<ParseError>) {
1815 if let ViewDefBody::Brace { elements } = body {
1816 for element in elements {
1817 if let ViewDefBodyElement::Error(n) = &element.value {
1818 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1819 }
1820 }
1821 }
1822}
1823
1824fn collect_view_body_errors(body: &ViewBody, errors: &mut Vec<ParseError>) {
1825 if let ViewBody::Brace { elements } = body {
1826 for element in elements {
1827 if let ViewBodyElement::Error(n) = &element.value {
1828 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1829 }
1830 }
1831 }
1832}
1833
1834fn collect_part_def_body_errors(body: &PartDefBody, errors: &mut Vec<ParseError>) {
1835 if let PartDefBody::Brace { elements } = body {
1836 for element in elements {
1837 match &element.value {
1838 PartDefBodyElement::Error(n) => {
1839 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1840 }
1841 PartDefBodyElement::PartUsage(n) => {
1842 collect_part_usage_body_errors(&n.value.body, errors)
1843 }
1844 PartDefBodyElement::Perform(n) => {
1845 collect_perform_body_errors(&n.value.body, errors)
1846 }
1847 _ => {}
1848 }
1849 }
1850 }
1851}
1852
1853fn collect_perform_body_errors(body: &crate::ast::PerformBody, _errors: &mut Vec<ParseError>) {
1854 match body {
1855 crate::ast::PerformBody::Semicolon => {}
1856 crate::ast::PerformBody::Brace { .. } => {}
1857 }
1858}
1859
1860fn collect_part_usage_body_errors(body: &PartUsageBody, errors: &mut Vec<ParseError>) {
1861 if let PartUsageBody::Brace { elements } = body {
1862 for element in elements {
1863 match &element.value {
1864 PartUsageBodyElement::Error(n) => {
1865 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1866 }
1867 PartUsageBodyElement::PartUsage(n) => {
1868 collect_part_usage_body_errors(&n.value.body, errors)
1869 }
1870 PartUsageBodyElement::Perform(n) => {
1871 collect_perform_body_errors(&n.value.body, errors)
1872 }
1873 PartUsageBodyElement::StateUsage(n) => {
1874 collect_state_body_errors(&n.value.body, errors)
1875 }
1876 _ => {}
1877 }
1878 }
1879 }
1880}
1881
1882fn collect_package_body_errors(body: &PackageBody, errors: &mut Vec<ParseError>) {
1883 if let PackageBody::Brace { elements } = body {
1884 for element in elements {
1885 match &element.value {
1886 PackageBodyElement::Error(n) => {
1887 errors.push(parse_error_from_recovery_node(&element.span, &n.value));
1888 }
1889 PackageBodyElement::Package(n) => {
1890 collect_package_body_errors(&n.value.body, errors)
1891 }
1892 PackageBodyElement::LibraryPackage(n) => {
1893 collect_package_body_errors(&n.value.body, errors)
1894 }
1895 PackageBodyElement::PartDef(n) => {
1896 collect_part_def_body_errors(&n.value.body, errors)
1897 }
1898 PackageBodyElement::PartUsage(n) => {
1899 collect_part_usage_body_errors(&n.value.body, errors)
1900 }
1901 PackageBodyElement::ActionDef(n) => {
1902 collect_action_def_body_errors(&n.value.body, errors)
1903 }
1904 PackageBodyElement::ActionUsage(n) => {
1905 collect_action_usage_body_errors(&n.value.body, errors)
1906 }
1907 PackageBodyElement::RequirementDef(n) => {
1908 collect_requirement_body_errors(&n.value.body, errors)
1909 }
1910 PackageBodyElement::RequirementUsage(n) => {
1911 collect_requirement_body_errors(&n.value.body, errors)
1912 }
1913 PackageBodyElement::UseCaseDef(n) => {
1914 collect_use_case_body_errors(&n.value.body, errors)
1915 }
1916 PackageBodyElement::UseCaseUsage(n) => {
1917 collect_use_case_body_errors(&n.value.body, errors)
1918 }
1919 PackageBodyElement::ConcernUsage(n) => {
1920 collect_requirement_body_errors(&n.value.body, errors)
1921 }
1922 PackageBodyElement::StateDef(n) => collect_state_body_errors(&n.value.body, errors),
1923 PackageBodyElement::StateUsage(n) => {
1924 collect_state_body_errors(&n.value.body, errors)
1925 }
1926 PackageBodyElement::ConstraintDef(n) => {
1927 collect_constraint_body_errors(&n.value.body, errors)
1928 }
1929 PackageBodyElement::CalcDef(n) => collect_calc_body_errors(&n.value.body, errors),
1930 PackageBodyElement::ViewDef(n) => {
1931 collect_view_def_body_errors(&n.value.body, errors)
1932 }
1933 PackageBodyElement::ViewUsage(n) => collect_view_body_errors(&n.value.body, errors),
1934 _ => {}
1935 }
1936 }
1937 }
1938}
1939
1940fn collect_implicit_attribute_in_part_def_warnings(bytes: &[u8]) -> Vec<ParseError> {
1941 let text = String::from_utf8_lossy(bytes);
1942 let mut errors = Vec::new();
1943 let mut in_part_def_body = false;
1944 let mut brace_depth = 0i32;
1945 let mut offset = 0usize;
1946 for (line_idx, line) in text.lines().enumerate() {
1947 let trimmed = line.trim();
1948 if trimmed.starts_with("part def") {
1949 in_part_def_body = false;
1950 brace_depth = 0;
1951 }
1952 if trimmed.contains('{') {
1953 if in_part_def_body || trimmed.starts_with("part def") {
1954 in_part_def_body = true;
1955 }
1956 brace_depth += trimmed.chars().filter(|&c| c == '{').count() as i32;
1957 }
1958 if trimmed.contains('}') {
1959 brace_depth -= trimmed.chars().filter(|&c| c == '}').count() as i32;
1960 if brace_depth <= 0 {
1961 in_part_def_body = false;
1962 }
1963 }
1964 if in_part_def_body && brace_depth > 0 {
1965 let skip = trimmed.starts_with("attribute")
1966 || trimmed.starts_with("part ")
1967 || trimmed.starts_with("port ")
1968 || trimmed.starts_with("interface")
1969 || trimmed.starts_with("connect")
1970 || trimmed.contains(":>")
1971 || trimmed.contains("::>")
1972 || trimmed.is_empty()
1973 || trimmed.starts_with("//")
1974 || trimmed.starts_with("/*")
1975 || trimmed.starts_with("doc ");
1976 if !skip {
1977 if let Some((code, message, expected, suggestion)) =
1978 bare_feature_declaration_in_part_def_diagnostic(trimmed.as_bytes())
1979 {
1980 let line_no = (line_idx + 1) as u32;
1981 let column = line.find(trimmed).unwrap_or(0) + 1;
1982 let line_offset = offset + line.find(trimmed).unwrap_or(0);
1983 errors.push(
1984 ParseError::new(message)
1985 .with_location(line_offset, line_no, column)
1986 .with_length(trimmed.len().max(1))
1987 .with_code(code)
1988 .with_expected(expected)
1989 .with_suggestion(suggestion)
1990 .with_severity(DiagnosticSeverity::Warning)
1991 .with_category(DiagnosticCategory::ParseError),
1992 );
1993 }
1994 }
1995 }
1996 offset += line.len() + 1;
1997 }
1998 errors
1999}
2000
2001fn collect_requirement_id_dialect_diagnostics(bytes: &[u8]) -> Vec<ParseError> {
2002 let pattern = b"requirement def id ";
2003 let mut errors = Vec::new();
2004 let mut search_from = 0usize;
2005 while search_from < bytes.len() {
2006 let Some(rel) = bytes[search_from..]
2007 .windows(pattern.len())
2008 .position(|window| window == pattern)
2009 else {
2010 break;
2011 };
2012 let offset = search_from + rel;
2013 let after = trim_ascii_start(&bytes[offset + pattern.len()..]);
2014 if after.first() != Some(&b'\'') && after.first() != Some(&b'"') {
2015 search_from = offset + 1;
2016 continue;
2017 }
2018 let quote = after[0];
2019 let Some(close) = after[1..].iter().position(|&b| b == quote) else {
2020 search_from = offset + 1;
2021 continue;
2022 };
2023 let req_id = String::from_utf8_lossy(&after[1..1 + close]);
2024 let (line, column) = offset_to_line_column(bytes, offset);
2025 errors.push(
2026 ParseError::new(format!(
2027 "requirement definition uses non-standard `id '{req_id}'` syntax; use a short name in angle brackets"
2028 ))
2029 .with_location(offset, line, column)
2030 .with_length(pattern.len().max(1))
2031 .with_code("invalid_requirement_short_name_syntax")
2032 .with_expected("short name in angle brackets after `requirement def`".to_string())
2033 .with_suggestion(format!(
2034 "Use `requirement def <'{req_id}'> ...` instead of `requirement def id '{req_id}' ...`."
2035 ))
2036 .with_category(DiagnosticCategory::ParseError),
2037 );
2038 search_from = offset + pattern.len();
2039 }
2040 errors
2041}
2042
2043fn offset_to_line_column(bytes: &[u8], offset: usize) -> (u32, usize) {
2044 let mut line = 1u32;
2045 let mut column = 1usize;
2046 for (idx, &b) in bytes.iter().enumerate() {
2047 if idx >= offset {
2048 break;
2049 }
2050 if b == b'\n' {
2051 line += 1;
2052 column = 1;
2053 } else {
2054 column += 1;
2055 }
2056 }
2057 (line, column)
2058}
2059
2060fn collect_recovery_errors(root: &RootNamespace) -> Vec<ParseError> {
2061 let mut errors = Vec::new();
2062 for element in &root.elements {
2063 match &element.value {
2064 crate::ast::RootElement::Package(n) => {
2065 collect_package_body_errors(&n.value.body, &mut errors)
2066 }
2067 crate::ast::RootElement::LibraryPackage(n) => {
2068 collect_package_body_errors(&n.value.body, &mut errors)
2069 }
2070 crate::ast::RootElement::Namespace(n) => {
2071 collect_package_body_errors(&n.value.body, &mut errors)
2072 }
2073 crate::ast::RootElement::Import(_) => {}
2074 }
2075 }
2076 errors
2077}
2078
2079#[allow(clippy::result_large_err)]
2081pub fn parse_root(input: &str) -> Result<RootNamespace, ParseError> {
2082 let bytes = input
2083 .strip_prefix('\u{FEFF}')
2084 .map(str::as_bytes)
2085 .unwrap_or_else(|| input.as_bytes());
2086 let located = LocatedSpan::new(bytes);
2087 match package::root_namespace(located) {
2088 Ok((rest, root)) => {
2089 if !rest.fragment().is_empty() && has_unclosed_brace(bytes) {
2090 return Err(missing_closing_brace_error_at_eof(bytes));
2091 }
2092 if rest.fragment().is_empty() {
2093 log::debug!("parse_root: success, {} top-level elements", root.elements.len());
2094 Ok(root)
2095 } else if trim_ascii_start(rest.fragment()).starts_with(b"}") {
2096 Err(unexpected_closing_brace_parse_error(rest))
2097 } else {
2098 let offset = located.location_offset() + located.fragment().len() - rest.fragment().len();
2099 let unconsumed = rest.fragment();
2100 let first_80 = unconsumed.get(..80.min(unconsumed.len())).unwrap_or(unconsumed);
2101 log::debug!(
2102 "parse_root: expected end of input; parsed {} elements; unconsumed len={}, offset={}, first 80 bytes: {:?}",
2103 root.elements.len(),
2104 unconsumed.len(),
2105 offset,
2106 first_80,
2107 );
2108 log::debug!(
2109 "parse_root: unconsumed as str: {:?}",
2110 String::from_utf8_lossy(first_80),
2111 );
2112 let (found_snippet, found_len) = fragment_to_found_snippet(rest.fragment());
2113 let mut pe = ParseError::new("expected end of input")
2114 .with_location(offset, rest.location_line(), rest.get_column())
2115 .with_length(found_len.max(1))
2116 .with_code("expected_end_of_input")
2117 .with_category(DiagnosticCategory::ParseError);
2118 if !found_snippet.is_empty() {
2119 pe = pe.with_found(found_snippet);
2120 }
2121 if root.elements.is_empty() && is_illegal_top_level_definition(rest.fragment()) {
2122 pe = pe
2123 .with_code("illegal_top_level_definition")
2124 .with_expected("'package', 'namespace', or 'import'")
2125 .with_suggestion(
2126 "Wrap this declaration in `package ... { ... }` or `namespace ... { ... }`.",
2127 );
2128 pe.message = "illegal top-level definition".to_string();
2129 }
2130 Err(pe)
2131 }
2132 }
2133 Err(nom::Err::Error(e)) => Err(missing_closing_brace_error(bytes, e.input).unwrap_or_else(|| {
2134 nom_err_to_parse_error(
2135 &e,
2136 None,
2137 Some("'package', 'namespace', or 'import' at top level; or valid element in package body"),
2138 )
2139 })),
2140 Err(nom::Err::Failure(e)) => Err(missing_closing_brace_error(bytes, e.input).unwrap_or_else(|| {
2141 nom_err_to_parse_error(
2142 &e,
2143 None,
2144 Some("'package', 'namespace', or 'import' at top level; or valid element in package body"),
2145 )
2146 })),
2147 Err(nom::Err::Incomplete(_)) => Err(
2148 ParseError::new("unexpected end of input")
2149 .with_code("unexpected_eof")
2150 .with_category(DiagnosticCategory::ParseError),
2151 ),
2152 }
2153}
2154
2155const MAX_RECOVERY_ERRORS: usize = 100;
2156
2157pub fn parse_with_diagnostics(input: &str) -> ParseResult {
2160 let bytes = input
2161 .strip_prefix('\u{FEFF}')
2162 .map(str::as_bytes)
2163 .unwrap_or_else(|| input.as_bytes());
2164 let located = LocatedSpan::new(bytes);
2165
2166 let mut elements = Vec::new();
2167 let mut errors = Vec::new();
2168
2169 let (mut input, _) = match lex::ws_and_comments(located) {
2170 Ok(x) => x,
2171 Err(_) => {
2172 return ParseResult {
2173 root: RootNamespace { elements: vec![] },
2174 errors: vec![ParseError::new("invalid input")
2175 .with_code("invalid_input")
2176 .with_category(DiagnosticCategory::ParseError)],
2177 };
2178 }
2179 };
2180
2181 while errors.len() < MAX_RECOVERY_ERRORS {
2182 let (rest, _) = lex::ws_and_comments(input).unwrap_or((input, ()));
2184 input = rest;
2185 if input.fragment().is_empty() {
2186 break;
2187 }
2188 match package::root_element(input) {
2189 Ok((rest, elem)) => {
2190 elements.push(elem);
2191 input = rest;
2192 }
2193 Err(nom::Err::Error(e)) | Err(nom::Err::Failure(e)) => {
2194 let (trimmed, _) = lex::ws_and_comments(input).unwrap_or((input, ()));
2195 if trim_ascii_start(trimmed.fragment()).starts_with(b"}") {
2196 errors.push(unexpected_closing_brace_parse_error(trimmed));
2197 let skip_result = lex::skip_to_next_sync_point(trimmed);
2198 match skip_result {
2199 Ok((rest, _)) => input = rest,
2200 Err(_) => break,
2201 }
2202 continue;
2203 }
2204 if errors.is_empty()
2205 && has_unclosed_brace(bytes)
2206 && (lex::starts_with_keyword(trimmed.fragment(), b"package")
2207 || lex::starts_with_keyword(trimmed.fragment(), b"namespace")
2208 || lex::starts_with_keyword(trimmed.fragment(), b"library")
2209 || lex::starts_with_keyword(trimmed.fragment(), b"standard"))
2210 {
2211 errors.push(missing_closing_brace_error_at_eof(bytes));
2212 break;
2213 }
2214 if let Some(scope) = root_body_scope(input.fragment()) {
2215 let (error_input, _) = lex::ws_and_comments(e.input).unwrap_or((e.input, ()));
2216 if error_input.fragment().starts_with(b"{") {
2217 errors.push(root_body_recovery_error(error_input, scope));
2218 match lex::skip_statement_or_block(error_input) {
2219 Ok((rest, _))
2220 if rest.location_offset() > error_input.location_offset() =>
2221 {
2222 input = rest;
2223 continue;
2224 }
2225 _ => {}
2226 }
2227 }
2228 }
2229 let pe = missing_closing_brace_error(bytes, e.input).unwrap_or_else(|| {
2230 nom_err_to_parse_error(&e, None, Some("'package', 'namespace', or 'import'"))
2231 });
2232 errors.push(pe);
2233 let skip_result = lex::skip_to_next_sync_point(e.input);
2234 match skip_result {
2235 Ok((rest, _)) => input = rest,
2236 Err(_) => break,
2237 }
2238 }
2239 Err(nom::Err::Incomplete(_)) => {
2240 errors.push(
2241 ParseError::new("unexpected end of input")
2242 .with_location(
2243 input.location_offset(),
2244 input.location_line(),
2245 input.get_column(),
2246 )
2247 .with_length(1)
2248 .with_code("unexpected_eof")
2249 .with_category(DiagnosticCategory::ParseError),
2250 );
2251 break;
2252 }
2253 }
2254 }
2255
2256 let (input, _) = lex::ws_and_comments(input).unwrap_or((input, ()));
2257
2258 if input.fragment().is_empty()
2259 && !errors.iter().any(|e| {
2260 matches!(
2261 e.code.as_deref(),
2262 Some("missing_closing_brace") | Some("unexpected_closing_brace")
2263 )
2264 })
2265 {
2266 if let Some(err) = extra_closing_brace_at_eof(bytes) {
2267 errors.push(err);
2268 } else if has_unclosed_brace(bytes) {
2269 errors.push(missing_closing_brace_error_at_eof(bytes));
2270 }
2271 }
2272
2273 if !input.fragment().is_empty()
2274 && !errors
2275 .iter()
2276 .any(|e| e.code.as_deref() == Some("missing_closing_brace"))
2277 {
2278 if trim_ascii_start(input.fragment()).starts_with(b"}") {
2279 errors.push(unexpected_closing_brace_parse_error(input));
2280 } else {
2281 let (found_snippet, found_len) = fragment_to_found_snippet(input.fragment());
2282 let mut pe = ParseError::new("expected end of input")
2283 .with_location(
2284 input.location_offset(),
2285 input.location_line(),
2286 input.get_column(),
2287 )
2288 .with_length(found_len.max(1))
2289 .with_code("expected_end_of_input")
2290 .with_severity(DiagnosticSeverity::Error)
2291 .with_category(DiagnosticCategory::ParseError);
2292 if !found_snippet.is_empty() {
2293 pe = pe.with_found(found_snippet);
2294 }
2295 errors.push(pe);
2296 }
2297 }
2298
2299 errors.extend(collect_recovery_errors(&RootNamespace {
2300 elements: elements.clone(),
2301 }));
2302 errors.extend(collect_implicit_attribute_in_part_def_warnings(bytes));
2303 errors.extend(collect_requirement_id_dialect_diagnostics(bytes));
2304 errors = suppress_redundant_closing_brace_errors(errors);
2305 errors = dedup_errors(errors);
2306 errors = suppress_diagnostic_cascades(errors);
2307
2308 ParseResult {
2309 root: RootNamespace { elements },
2310 errors,
2311 }
2312}