use perl_ast::Node;
#[derive(Debug, Clone, PartialEq)]
pub enum ParseErrorKind {
UnexpectedToken {
expected: String,
found: String,
},
UnclosedString,
UnclosedRegex,
UnclosedBlock,
MissingSemicolon,
InvalidSyntax,
UnclosedParenthesis,
UnclosedBracket,
UnclosedBrace,
UnterminatedHeredoc,
InvalidVariableName,
InvalidSubroutineName,
MissingOperator,
MissingOperand,
UnexpectedEof,
}
pub struct ErrorClassifier;
impl Default for ErrorClassifier {
fn default() -> Self {
Self::new()
}
}
impl ErrorClassifier {
pub fn new() -> Self {
ErrorClassifier
}
pub fn classify(&self, error_node: &Node, source: &str) -> ParseErrorKind {
let error_text = {
let start = error_node.location.start;
let end = (start + 10).min(source.len()); if start < source.len() && end <= source.len() && start <= end {
&source[start..end]
} else {
""
}
};
let quote_count = source.matches('"').count();
let single_quote_count = source.matches('\'').count();
if !quote_count.is_multiple_of(2) {
return ParseErrorKind::UnclosedString;
}
if !single_quote_count.is_multiple_of(2) {
return ParseErrorKind::UnclosedString;
}
if error_text.starts_with('"') && !error_text.ends_with('"') {
return ParseErrorKind::UnclosedString;
}
if error_text.starts_with('\'') && !error_text.ends_with('\'') {
return ParseErrorKind::UnclosedString;
}
if error_text.starts_with('/') && !error_text.contains("//") {
if !error_text[1..].contains('/') {
return ParseErrorKind::UnclosedRegex;
}
}
{
let pos = error_node.location.start;
let line_start = source[..pos].rfind('\n').map(|i| i + 1).unwrap_or(0);
let line_end = source[pos..].find('\n').map(|i| pos + i).unwrap_or(source.len());
let line = &source[line_start..line_end];
if !line.trim().is_empty()
&& !line.trim().ends_with(';')
&& !line.trim().ends_with('{')
&& !line.trim().ends_with('}')
{
if line.contains("my ")
|| line.contains("our ")
|| line.contains("local ")
|| line.contains("print ")
|| line.contains("say ")
|| line.contains("return ")
{
return ParseErrorKind::MissingSemicolon;
}
}
let open_parens = line.matches('(').count();
let close_parens = line.matches(')').count();
if open_parens > close_parens {
return ParseErrorKind::UnclosedParenthesis;
}
let open_brackets = line.matches('[').count();
let close_brackets = line.matches(']').count();
if open_brackets > close_brackets {
return ParseErrorKind::UnclosedBracket;
}
let open_braces = line.matches('{').count();
let close_braces = line.matches('}').count();
if open_braces > close_braces {
return ParseErrorKind::UnclosedBrace;
}
}
if error_node.location.start >= source.len() - 1 {
return ParseErrorKind::UnexpectedEof;
}
ParseErrorKind::InvalidSyntax
}
pub fn get_diagnostic_message(&self, kind: &ParseErrorKind) -> String {
match kind {
ParseErrorKind::UnexpectedToken { expected, found } => {
format!("Expected {} but found {}", expected, found)
}
ParseErrorKind::UnclosedString => "Unclosed string literal".to_string(),
ParseErrorKind::UnclosedRegex => "Unclosed regular expression".to_string(),
ParseErrorKind::UnclosedBlock => "Unclosed code block - missing '}'".to_string(),
ParseErrorKind::MissingSemicolon => "Missing semicolon at end of statement".to_string(),
ParseErrorKind::InvalidSyntax => "Invalid syntax".to_string(),
ParseErrorKind::UnclosedParenthesis => "Unclosed parenthesis - missing ')'".to_string(),
ParseErrorKind::UnclosedBracket => "Unclosed bracket - missing ']'".to_string(),
ParseErrorKind::UnclosedBrace => "Unclosed brace - missing '}'".to_string(),
ParseErrorKind::UnterminatedHeredoc => "Unterminated heredoc".to_string(),
ParseErrorKind::InvalidVariableName => "Invalid variable name".to_string(),
ParseErrorKind::InvalidSubroutineName => "Invalid subroutine name".to_string(),
ParseErrorKind::MissingOperator => "Missing operator".to_string(),
ParseErrorKind::MissingOperand => "Missing operand".to_string(),
ParseErrorKind::UnexpectedEof => "Unexpected end of file".to_string(),
}
}
pub fn get_suggestion(&self, kind: &ParseErrorKind) -> Option<String> {
match kind {
ParseErrorKind::MissingSemicolon => {
Some("Add a semicolon ';' at the end of the statement".to_string())
}
ParseErrorKind::UnclosedString => {
Some("Add a closing quote to terminate the string".to_string())
}
ParseErrorKind::UnclosedParenthesis => {
Some("Add a closing parenthesis ')' to match the opening '('".to_string())
}
ParseErrorKind::UnclosedBracket => {
Some("Add a closing bracket ']' to match the opening '['".to_string())
}
ParseErrorKind::UnclosedBrace => {
Some("Add a closing brace '}' to match the opening '{'".to_string())
}
ParseErrorKind::UnclosedBlock => {
Some("Add a closing brace '}' to complete the code block".to_string())
}
ParseErrorKind::UnclosedRegex => {
Some("Add a closing delimiter to terminate the regex pattern".to_string())
}
ParseErrorKind::UnterminatedHeredoc => {
Some("Add the heredoc terminator marker on its own line".to_string())
}
ParseErrorKind::InvalidVariableName => {
Some("Variable names must start with a letter or underscore, followed by alphanumeric characters or underscores".to_string())
}
ParseErrorKind::InvalidSubroutineName => {
Some("Subroutine names must start with a letter or underscore, followed by alphanumeric characters or underscores".to_string())
}
ParseErrorKind::MissingOperator => {
Some("Add an operator between operands (e.g., +, -, *, /, ., ==, !=)".to_string())
}
ParseErrorKind::MissingOperand => {
Some("Add a value or expression after the operator".to_string())
}
ParseErrorKind::UnexpectedEof => {
Some("The file ended unexpectedly - check for unclosed blocks, strings, or parentheses".to_string())
}
ParseErrorKind::UnexpectedToken { expected, found: _ } => {
Some(format!("Expected {} at this location", expected))
}
ParseErrorKind::InvalidSyntax => None,
}
}
pub fn get_explanation(&self, kind: &ParseErrorKind) -> Option<String> {
match kind {
ParseErrorKind::MissingSemicolon => {
Some("In Perl, most statements must end with a semicolon. The only exceptions are the last statement in a block and statements that end with a block (like if, while, sub, etc.).".to_string())
}
ParseErrorKind::UnclosedString => {
Some("String literals must be properly terminated with a matching quote. Use double quotes (\") for interpolated strings or single quotes (') for literal strings.".to_string())
}
ParseErrorKind::UnclosedRegex => {
Some("Regular expressions must be properly delimited. Common forms include /pattern/, m/pattern/, s/old/new/, and qr/pattern/.".to_string())
}
ParseErrorKind::UnterminatedHeredoc => {
Some("Heredoc blocks must have their terminator marker appear on a line by itself with no leading or trailing whitespace (unless using <<~MARKER for indented heredocs).".to_string())
}
ParseErrorKind::InvalidVariableName => {
Some("Perl variable names (after the sigil) must follow identifier rules: start with a letter (a-z, A-Z) or underscore (_), followed by any combination of letters, digits, or underscores.".to_string())
}
ParseErrorKind::UnclosedBlock => {
Some("Code blocks must have matching braces. Each opening '{' needs a corresponding closing '}'.".to_string())
}
_ => None,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use perl_ast::{Node, NodeKind, SourceLocation};
#[test]
fn test_classify_unclosed_string() {
let classifier = ErrorClassifier::new();
let source = r#"my $x = "hello"#;
let error_node = Node::new(
NodeKind::Error {
message: "Unclosed string".to_string(),
expected: vec![],
found: None,
partial: None,
},
SourceLocation { start: 9, end: 15 }, );
let kind = classifier.classify(&error_node, source);
assert_eq!(kind, ParseErrorKind::UnclosedString);
}
#[test]
fn test_classify_missing_semicolon() {
let classifier = ErrorClassifier::new();
let source = "my $x = 42\nmy $y = 10";
let error = Node::new(
NodeKind::Error {
message: "Unexpected token".to_string(),
expected: vec![],
found: None,
partial: None,
},
SourceLocation { start: 10, end: 11 }, );
let kind = classifier.classify(&error, source);
assert_eq!(kind, ParseErrorKind::MissingSemicolon);
}
}