1use perl_ast::Node;
33
34#[derive(Debug, Clone, PartialEq)]
39pub enum ParseErrorKind {
40 UnexpectedToken {
42 expected: String,
44 found: String,
46 },
47 UnclosedString,
49 UnclosedRegex,
51 UnclosedBlock,
53 MissingSemicolon,
55 InvalidSyntax,
57 UnclosedParenthesis,
59 UnclosedBracket,
61 UnclosedBrace,
63 UnterminatedHeredoc,
65 InvalidVariableName,
67 InvalidSubroutineName,
69 MissingOperator,
71 MissingOperand,
73 UnexpectedEof,
75}
76
77pub struct ErrorClassifier;
82
83impl Default for ErrorClassifier {
84 fn default() -> Self {
85 Self::new()
86 }
87}
88
89impl ErrorClassifier {
90 pub fn new() -> Self {
96 ErrorClassifier
97 }
98
99 pub fn classify(&self, error_node: &Node, source: &str) -> ParseErrorKind {
113 let error_text = {
115 let start = error_node.location.start;
116 let end = (start + 10).min(source.len()); if start < source.len() && end <= source.len() && start <= end {
118 &source[start..end]
119 } else {
120 ""
121 }
122 };
123
124 let quote_count = source.matches('"').count();
126 let single_quote_count = source.matches('\'').count();
127
128 if !quote_count.is_multiple_of(2) {
130 return ParseErrorKind::UnclosedString;
131 }
132 if !single_quote_count.is_multiple_of(2) {
133 return ParseErrorKind::UnclosedString;
134 }
135
136 if error_text.starts_with('"') && !error_text.ends_with('"') {
138 return ParseErrorKind::UnclosedString;
139 }
140
141 if error_text.starts_with('\'') && !error_text.ends_with('\'') {
142 return ParseErrorKind::UnclosedString;
143 }
144
145 if error_text.starts_with('/') && !error_text.contains("//") {
146 if !error_text[1..].contains('/') {
148 return ParseErrorKind::UnclosedRegex;
149 }
150 }
151
152 {
154 let pos = error_node.location.start;
155 let line_start = source[..pos].rfind('\n').map(|i| i + 1).unwrap_or(0);
156 let line_end = source[pos..].find('\n').map(|i| pos + i).unwrap_or(source.len());
157
158 let line = &source[line_start..line_end];
159
160 if !line.trim().is_empty()
162 && !line.trim().ends_with(';')
163 && !line.trim().ends_with('{')
164 && !line.trim().ends_with('}')
165 {
166 if line.contains("my ")
168 || line.contains("our ")
169 || line.contains("local ")
170 || line.contains("print ")
171 || line.contains("say ")
172 || line.contains("return ")
173 {
174 return ParseErrorKind::MissingSemicolon;
175 }
176 }
177
178 let open_parens = line.matches('(').count();
180 let close_parens = line.matches(')').count();
181 if open_parens > close_parens {
182 return ParseErrorKind::UnclosedParenthesis;
183 }
184
185 let open_brackets = line.matches('[').count();
186 let close_brackets = line.matches(']').count();
187 if open_brackets > close_brackets {
188 return ParseErrorKind::UnclosedBracket;
189 }
190
191 let open_braces = line.matches('{').count();
192 let close_braces = line.matches('}').count();
193 if open_braces > close_braces {
194 return ParseErrorKind::UnclosedBrace;
195 }
196 }
197
198 if error_node.location.start >= source.len() - 1 {
200 return ParseErrorKind::UnexpectedEof;
201 }
202
203 ParseErrorKind::InvalidSyntax
205 }
206
207 pub fn get_diagnostic_message(&self, kind: &ParseErrorKind) -> String {
220 match kind {
221 ParseErrorKind::UnexpectedToken { expected, found } => {
222 format!("Expected {} but found {}", expected, found)
223 }
224 ParseErrorKind::UnclosedString => "Unclosed string literal".to_string(),
225 ParseErrorKind::UnclosedRegex => "Unclosed regular expression".to_string(),
226 ParseErrorKind::UnclosedBlock => "Unclosed code block - missing '}'".to_string(),
227 ParseErrorKind::MissingSemicolon => "Missing semicolon at end of statement".to_string(),
228 ParseErrorKind::InvalidSyntax => "Invalid syntax".to_string(),
229 ParseErrorKind::UnclosedParenthesis => "Unclosed parenthesis - missing ')'".to_string(),
230 ParseErrorKind::UnclosedBracket => "Unclosed bracket - missing ']'".to_string(),
231 ParseErrorKind::UnclosedBrace => "Unclosed brace - missing '}'".to_string(),
232 ParseErrorKind::UnterminatedHeredoc => "Unterminated heredoc".to_string(),
233 ParseErrorKind::InvalidVariableName => "Invalid variable name".to_string(),
234 ParseErrorKind::InvalidSubroutineName => "Invalid subroutine name".to_string(),
235 ParseErrorKind::MissingOperator => "Missing operator".to_string(),
236 ParseErrorKind::MissingOperand => "Missing operand".to_string(),
237 ParseErrorKind::UnexpectedEof => "Unexpected end of file".to_string(),
238 }
239 }
240
241 pub fn get_suggestion(&self, kind: &ParseErrorKind) -> Option<String> {
254 match kind {
255 ParseErrorKind::MissingSemicolon => {
256 Some("Add a semicolon ';' at the end of the statement".to_string())
257 }
258 ParseErrorKind::UnclosedString => {
259 Some("Add a closing quote to terminate the string".to_string())
260 }
261 ParseErrorKind::UnclosedParenthesis => {
262 Some("Add a closing parenthesis ')' to match the opening '('".to_string())
263 }
264 ParseErrorKind::UnclosedBracket => {
265 Some("Add a closing bracket ']' to match the opening '['".to_string())
266 }
267 ParseErrorKind::UnclosedBrace => {
268 Some("Add a closing brace '}' to match the opening '{'".to_string())
269 }
270 ParseErrorKind::UnclosedBlock => {
271 Some("Add a closing brace '}' to complete the code block".to_string())
272 }
273 ParseErrorKind::UnclosedRegex => {
274 Some("Add a closing delimiter to terminate the regex pattern".to_string())
275 }
276 ParseErrorKind::UnterminatedHeredoc => {
277 Some("Add the heredoc terminator marker on its own line".to_string())
278 }
279 ParseErrorKind::InvalidVariableName => {
280 Some("Variable names must start with a letter or underscore, followed by alphanumeric characters or underscores".to_string())
281 }
282 ParseErrorKind::InvalidSubroutineName => {
283 Some("Subroutine names must start with a letter or underscore, followed by alphanumeric characters or underscores".to_string())
284 }
285 ParseErrorKind::MissingOperator => {
286 Some("Add an operator between operands (e.g., +, -, *, /, ., ==, !=)".to_string())
287 }
288 ParseErrorKind::MissingOperand => {
289 Some("Add a value or expression after the operator".to_string())
290 }
291 ParseErrorKind::UnexpectedEof => {
292 Some("The file ended unexpectedly - check for unclosed blocks, strings, or parentheses".to_string())
293 }
294 ParseErrorKind::UnexpectedToken { expected, found: _ } => {
295 Some(format!("Expected {} at this location", expected))
296 }
297 ParseErrorKind::InvalidSyntax => None,
298 }
299 }
300
301 pub fn get_explanation(&self, kind: &ParseErrorKind) -> Option<String> {
314 match kind {
315 ParseErrorKind::MissingSemicolon => {
316 Some("In Perl, most statements must end with a semicolon. The only exceptions are the last statement in a block and statements that end with a block (like if, while, sub, etc.).".to_string())
317 }
318 ParseErrorKind::UnclosedString => {
319 Some("String literals must be properly terminated with a matching quote. Use double quotes (\") for interpolated strings or single quotes (') for literal strings.".to_string())
320 }
321 ParseErrorKind::UnclosedRegex => {
322 Some("Regular expressions must be properly delimited. Common forms include /pattern/, m/pattern/, s/old/new/, and qr/pattern/.".to_string())
323 }
324 ParseErrorKind::UnterminatedHeredoc => {
325 Some("Heredoc blocks must have their terminator marker appear on a line by itself with no leading or trailing whitespace (unless using <<~MARKER for indented heredocs).".to_string())
326 }
327 ParseErrorKind::InvalidVariableName => {
328 Some("Perl variable names (after the sigil) must follow identifier rules: start with a letter (a-z, A-Z) or underscore (_), followed by any combination of letters, digits, or underscores.".to_string())
329 }
330 ParseErrorKind::UnclosedBlock => {
331 Some("Code blocks must have matching braces. Each opening '{' needs a corresponding closing '}'.".to_string())
332 }
333 _ => None,
334 }
335 }
336}
337
338#[cfg(test)]
339mod tests {
340 use super::*;
341 use perl_ast::{Node, NodeKind, SourceLocation};
342
343 #[test]
344 fn test_classify_unclosed_string() {
345 let classifier = ErrorClassifier::new();
346 let source = r#"my $x = "hello"#;
347
348 let error_node = Node::new(
354 NodeKind::Error {
355 message: "Unclosed string".to_string(),
356 expected: vec![],
357 found: None,
358 partial: None,
359 },
360 SourceLocation { start: 9, end: 15 }, );
362
363 let kind = classifier.classify(&error_node, source);
364 assert_eq!(kind, ParseErrorKind::UnclosedString);
365 }
366
367 #[test]
368 fn test_classify_missing_semicolon() {
369 let classifier = ErrorClassifier::new();
370 let source = "my $x = 42\nmy $y = 10";
371
372 let error = Node::new(
374 NodeKind::Error {
375 message: "Unexpected token".to_string(),
376 expected: vec![],
377 found: None,
378 partial: None,
379 },
380 SourceLocation { start: 10, end: 11 }, );
382 let kind = classifier.classify(&error, source);
383 assert_eq!(kind, ParseErrorKind::MissingSemicolon);
384 }
385}