perl_parser_core/syntax/error/
classifier.rs1use super::ParseError;
33use perl_ast::Node;
34
35#[derive(Debug, Clone, PartialEq)]
40pub enum ParseErrorKind {
41 UnexpectedToken {
43 expected: String,
45 found: String,
47 },
48 UnclosedString,
50 UnclosedRegex,
52 UnclosedBlock,
54 MissingSemicolon,
56 InvalidSyntax,
58 UnclosedParenthesis,
60 UnclosedBracket,
62 UnclosedBrace,
64 UnterminatedHeredoc,
66 InvalidVariableName,
68 InvalidSubroutineName,
70 MissingOperator,
72 MissingOperand,
74 UnexpectedEof,
76}
77
78pub struct ErrorClassifier;
83
84impl Default for ErrorClassifier {
85 fn default() -> Self {
86 Self::new()
87 }
88}
89
90impl ErrorClassifier {
91 pub fn new() -> Self {
97 ErrorClassifier
98 }
99
100 pub fn classify(&self, error_node: &Node, source: &str) -> ParseErrorKind {
114 let error_text = {
116 let start = error_node.location.start;
117 let end = (start + 10).min(source.len()); if start < source.len() && end <= source.len() && start <= end {
119 &source[start..end]
120 } else {
121 ""
122 }
123 };
124
125 let quote_count = source.matches('"').count();
127 let single_quote_count = source.matches('\'').count();
128
129 if !quote_count.is_multiple_of(2) {
131 return ParseErrorKind::UnclosedString;
132 }
133 if !single_quote_count.is_multiple_of(2) {
134 return ParseErrorKind::UnclosedString;
135 }
136
137 if error_text.starts_with('"') && !error_text.ends_with('"') {
139 return ParseErrorKind::UnclosedString;
140 }
141
142 if error_text.starts_with('\'') && !error_text.ends_with('\'') {
143 return ParseErrorKind::UnclosedString;
144 }
145
146 if error_text.starts_with('/') && !error_text.contains("//") {
147 if !error_text[1..].contains('/') {
149 return ParseErrorKind::UnclosedRegex;
150 }
151 }
152
153 {
155 let pos = error_node.location.start;
156 let line_start = source[..pos].rfind('\n').map(|i| i + 1).unwrap_or(0);
157 let line_end = source[pos..].find('\n').map(|i| pos + i).unwrap_or(source.len());
158
159 let line = &source[line_start..line_end];
160
161 if !line.trim().is_empty()
163 && !line.trim().ends_with(';')
164 && !line.trim().ends_with('{')
165 && !line.trim().ends_with('}')
166 {
167 if line.contains("my ")
169 || line.contains("our ")
170 || line.contains("local ")
171 || line.contains("print ")
172 || line.contains("say ")
173 || line.contains("return ")
174 {
175 return ParseErrorKind::MissingSemicolon;
176 }
177 }
178
179 let open_parens = line.matches('(').count();
181 let close_parens = line.matches(')').count();
182 if open_parens > close_parens {
183 return ParseErrorKind::UnclosedParenthesis;
184 }
185
186 let open_brackets = line.matches('[').count();
187 let close_brackets = line.matches(']').count();
188 if open_brackets > close_brackets {
189 return ParseErrorKind::UnclosedBracket;
190 }
191
192 let open_braces = line.matches('{').count();
193 let close_braces = line.matches('}').count();
194 if open_braces > close_braces {
195 return ParseErrorKind::UnclosedBrace;
196 }
197 }
198
199 if error_node.location.start >= source.len() - 1 {
201 return ParseErrorKind::UnexpectedEof;
202 }
203
204 ParseErrorKind::InvalidSyntax
206 }
207
208 pub fn get_diagnostic_message(&self, kind: &ParseErrorKind) -> String {
221 match kind {
222 ParseErrorKind::UnexpectedToken { expected, found } => {
223 format!("Expected {} but found {}", expected, found)
224 }
225 ParseErrorKind::UnclosedString => "Unclosed string literal".to_string(),
226 ParseErrorKind::UnclosedRegex => "Unclosed regular expression".to_string(),
227 ParseErrorKind::UnclosedBlock => "Unclosed code block - missing '}'".to_string(),
228 ParseErrorKind::MissingSemicolon => "Missing semicolon at end of statement".to_string(),
229 ParseErrorKind::InvalidSyntax => "Invalid syntax".to_string(),
230 ParseErrorKind::UnclosedParenthesis => "Unclosed parenthesis - missing ')'".to_string(),
231 ParseErrorKind::UnclosedBracket => "Unclosed bracket - missing ']'".to_string(),
232 ParseErrorKind::UnclosedBrace => "Unclosed brace - missing '}'".to_string(),
233 ParseErrorKind::UnterminatedHeredoc => "Unterminated heredoc".to_string(),
234 ParseErrorKind::InvalidVariableName => "Invalid variable name".to_string(),
235 ParseErrorKind::InvalidSubroutineName => "Invalid subroutine name".to_string(),
236 ParseErrorKind::MissingOperator => "Missing operator".to_string(),
237 ParseErrorKind::MissingOperand => "Missing operand".to_string(),
238 ParseErrorKind::UnexpectedEof => "Unexpected end of file".to_string(),
239 }
240 }
241
242 pub fn get_suggestion(&self, kind: &ParseErrorKind) -> Option<String> {
255 match kind {
256 ParseErrorKind::MissingSemicolon => {
257 Some("Add a semicolon ';' at the end of the statement".to_string())
258 }
259 ParseErrorKind::UnclosedString => {
260 Some("Add a closing quote to terminate the string".to_string())
261 }
262 ParseErrorKind::UnclosedParenthesis => {
263 Some("Add a closing parenthesis ')' to match the opening '('".to_string())
264 }
265 ParseErrorKind::UnclosedBracket => {
266 Some("Add a closing bracket ']' to match the opening '['".to_string())
267 }
268 ParseErrorKind::UnclosedBrace => {
269 Some("Add a closing brace '}' to match the opening '{'".to_string())
270 }
271 ParseErrorKind::UnclosedBlock => {
272 Some("Add a closing brace '}' to complete the code block".to_string())
273 }
274 ParseErrorKind::UnclosedRegex => {
275 Some("Add a closing delimiter to terminate the regex pattern".to_string())
276 }
277 ParseErrorKind::UnterminatedHeredoc => {
278 Some("Add the heredoc terminator marker on its own line".to_string())
279 }
280 ParseErrorKind::InvalidVariableName => {
281 Some("Variable names must start with a letter or underscore, followed by alphanumeric characters or underscores".to_string())
282 }
283 ParseErrorKind::InvalidSubroutineName => {
284 Some("Subroutine names must start with a letter or underscore, followed by alphanumeric characters or underscores".to_string())
285 }
286 ParseErrorKind::MissingOperator => {
287 Some("Add an operator between operands (e.g., +, -, *, /, ., ==, !=)".to_string())
288 }
289 ParseErrorKind::MissingOperand => {
290 Some("Add a value or expression after the operator".to_string())
291 }
292 ParseErrorKind::UnexpectedEof => {
293 Some("The file ended unexpectedly - check for unclosed blocks, strings, or parentheses".to_string())
294 }
295 ParseErrorKind::UnexpectedToken { expected, found: _ } => {
296 Some(format!("Expected {} at this location", expected))
297 }
298 ParseErrorKind::InvalidSyntax => None,
299 }
300 }
301
302 pub fn get_explanation(&self, kind: &ParseErrorKind) -> Option<String> {
315 match kind {
316 ParseErrorKind::MissingSemicolon => {
317 Some("In Perl, most statements must end with a semicolon. The only exceptions are the last statement in a block and statements that end with a block (like if, while, sub, etc.).".to_string())
318 }
319 ParseErrorKind::UnclosedString => {
320 Some("String literals must be properly terminated with a matching quote. Use double quotes (\") for interpolated strings or single quotes (') for literal strings.".to_string())
321 }
322 ParseErrorKind::UnclosedRegex => {
323 Some("Regular expressions must be properly delimited. Common forms include /pattern/, m/pattern/, s/old/new/, and qr/pattern/.".to_string())
324 }
325 ParseErrorKind::UnterminatedHeredoc => {
326 Some("Heredoc blocks must have their terminator marker appear on a line by itself with no leading or trailing whitespace (unless using <<~MARKER for indented heredocs).".to_string())
327 }
328 ParseErrorKind::InvalidVariableName => {
329 Some("Perl variable names (after the sigil) must follow identifier rules: start with a letter (a-z, A-Z) or underscore (_), followed by any combination of letters, digits, or underscores.".to_string())
330 }
331 ParseErrorKind::UnclosedBlock => {
332 Some("Code blocks must have matching braces. Each opening '{' needs a corresponding closing '}'.".to_string())
333 }
334 _ => None,
335 }
336 }
337}
338
339#[derive(Debug, Clone, Default, PartialEq, Eq)]
349pub struct RecoverySalvageMetrics {
350 pub recovered_node_count: usize,
352 pub unrecovered_diagnostic_count: usize,
355 pub error_node_count: usize,
357 pub first_unrecovered_error_node: Option<String>,
360}
361
362impl RecoverySalvageMetrics {
363 pub fn is_dirty(&self) -> bool {
366 self.error_node_count > 0
367 || self.recovered_node_count > 0
368 || self.unrecovered_diagnostic_count > 0
369 }
370
371 pub fn is_structured_recovery_only(&self) -> bool {
375 self.recovered_node_count > 0
376 && self.error_node_count == 0
377 && self.unrecovered_diagnostic_count == 0
378 }
379}
380
381pub fn classify_recovery_salvage(ast: &Node, diagnostics: &[ParseError]) -> RecoverySalvageMetrics {
387 let mut error_node_count = 0usize;
388 let mut first_start = usize::MAX;
389 let mut first_unrecovered_error_node: Option<String> = None;
390
391 fn walk(
392 node: &Node,
393 error_node_count: &mut usize,
394 first_start: &mut usize,
395 first_unrecovered_error_node: &mut Option<String>,
396 ) {
397 if let perl_ast::NodeKind::Error { message, .. } = &node.kind {
398 *error_node_count = error_node_count.saturating_add(1);
399 if node.location.start < *first_start {
400 *first_start = node.location.start;
401 *first_unrecovered_error_node = Some(message.clone());
402 }
403 }
404 node.for_each_child(|child| {
405 walk(child, error_node_count, first_start, first_unrecovered_error_node);
406 });
407 }
408 walk(ast, &mut error_node_count, &mut first_start, &mut first_unrecovered_error_node);
409
410 let recovered_node_count =
411 diagnostics.iter().filter(|e| matches!(e, ParseError::Recovered { .. })).count();
412 let unrecovered_diagnostic_count = diagnostics.len().saturating_sub(recovered_node_count);
413
414 RecoverySalvageMetrics {
415 recovered_node_count,
416 unrecovered_diagnostic_count,
417 error_node_count,
418 first_unrecovered_error_node,
419 }
420}
421
422#[cfg(test)]
423mod tests {
424 use super::*;
425 use perl_ast::{Node, NodeKind, SourceLocation};
426
427 #[test]
428 fn test_classify_unclosed_string() {
429 let classifier = ErrorClassifier::new();
430 let source = r#"my $x = "hello"#;
431
432 let error_node = Node::new(
438 NodeKind::Error {
439 message: "Unclosed string".to_string(),
440 expected: vec![],
441 found: None,
442 partial: None,
443 },
444 SourceLocation { start: 9, end: 15 }, );
446
447 let kind = classifier.classify(&error_node, source);
448 assert_eq!(kind, ParseErrorKind::UnclosedString);
449 }
450
451 #[test]
452 fn test_classify_missing_semicolon() {
453 let classifier = ErrorClassifier::new();
454 let source = "my $x = 42\nmy $y = 10";
455
456 let error = Node::new(
458 NodeKind::Error {
459 message: "Unexpected token".to_string(),
460 expected: vec![],
461 found: None,
462 partial: None,
463 },
464 SourceLocation { start: 10, end: 11 }, );
466 let kind = classifier.classify(&error, source);
467 assert_eq!(kind, ParseErrorKind::MissingSemicolon);
468 }
469
470 fn make_error_node(message: &str, start: usize, end: usize) -> Node {
473 Node::new(
474 NodeKind::Error {
475 message: message.to_string(),
476 expected: vec![],
477 found: None,
478 partial: None,
479 },
480 SourceLocation { start, end },
481 )
482 }
483
484 fn make_program_node(children: Vec<Node>) -> Node {
485 Node::new(NodeKind::Program { statements: children }, SourceLocation { start: 0, end: 100 })
486 }
487
488 #[test]
489 fn clean_parse_produces_zero_metrics() {
490 let root = make_program_node(vec![]);
492 let metrics = classify_recovery_salvage(&root, &[]);
493 assert_eq!(metrics.recovered_node_count, 0);
494 assert_eq!(metrics.unrecovered_diagnostic_count, 0);
495 assert_eq!(metrics.error_node_count, 0);
496 assert!(metrics.first_unrecovered_error_node.is_none());
497 assert!(!metrics.is_dirty());
498 assert!(!metrics.is_structured_recovery_only());
499 }
500
501 #[test]
502 fn error_node_without_diagnostics_is_dirty_but_not_structured_recovery() {
503 let error = make_error_node("unexpected token", 5, 10);
506 let root = make_program_node(vec![error]);
507 let metrics = classify_recovery_salvage(&root, &[]);
508
509 assert_eq!(metrics.error_node_count, 1);
510 assert_eq!(metrics.recovered_node_count, 0);
511 assert_eq!(metrics.unrecovered_diagnostic_count, 0);
512 assert!(metrics.is_dirty(), "error node alone makes result dirty");
513 assert!(
514 !metrics.is_structured_recovery_only(),
515 "no recovery diagnostics — not structured-recovery-only"
516 );
517 assert_eq!(metrics.first_unrecovered_error_node.as_deref(), Some("unexpected token"));
518 }
519
520 #[test]
521 fn multiple_error_nodes_reports_earliest_by_start_offset() {
522 let later = make_error_node("later error", 50, 60);
525 let earlier = make_error_node("earlier error", 10, 20);
526 let root = make_program_node(vec![later, earlier]);
527 let metrics = classify_recovery_salvage(&root, &[]);
528
529 assert_eq!(metrics.error_node_count, 2);
530 assert_eq!(
531 metrics.first_unrecovered_error_node.as_deref(),
532 Some("earlier error"),
533 "earliest by start offset must win"
534 );
535 }
536}