Skip to main content

perl_error/
classifier.rs

1//! Error classification and diagnostic generation for Perl parsing workflows
2//!
3//! This module provides intelligent error classification for parsing failures in Perl scripts,
4//! offering specific error types and recovery suggestions for LSP workflow operations.
5//!
6//! # LSP Workflow Integration
7//!
8//! Error classification supports robust Perl parsing across LSP workflow stages:
9//! - **Parse**: Classify syntax errors during parser construction
10//! - **Index**: Provide error context for symbol extraction and indexing
11//! - **Navigate**: Surface recovery hints for definition and reference resolution
12//! - **Complete**: Enable error-tolerant completion and quick fixes
13//! - **Analyze**: Drive diagnostics and remediation guidance
14//!
15//! # Usage Examples
16//!
17//! ```ignore
18//! use perl_parser::error_classifier::{ErrorClassifier, ParseErrorKind};
19//! use perl_parser::{Parser, ast::Node};
20//!
21//! let classifier = ErrorClassifier::new();
22//! let source = "my $value = \"unclosed string...";
23//! let mut parser = Parser::new(source);
24//! let _result = parser.parse(); // This will fail due to unclosed string
25//!
26//! // Classify parsing errors for better user feedback
27//! // let error_kind = classifier.classify(&error_node, source);
28//! // let message = classifier.get_diagnostic_message(&error_kind);
29//! // let suggestion = classifier.get_suggestion(&error_kind);
30//! ```
31
32use perl_ast::Node;
33
34/// Specific types of parse errors found in Perl script content
35///
36/// Provides detailed categorization of parsing failures to enable targeted
37/// error recovery strategies during LSP workflows.
38#[derive(Debug, Clone, PartialEq)]
39pub enum ParseErrorKind {
40    /// Parser encountered unexpected token during Perl script analysis
41    UnexpectedToken {
42        /// Token type that was expected during parsing
43        expected: String,
44        /// Actual token found in Perl script content
45        found: String,
46    },
47    /// String literal not properly closed in Perl script
48    UnclosedString,
49    /// Regular expression pattern not properly closed
50    UnclosedRegex,
51    /// Code block (braces) not properly closed
52    UnclosedBlock,
53    /// Required semicolon missing in Perl script
54    MissingSemicolon,
55    /// General syntax error in Perl parsing code
56    InvalidSyntax,
57    /// Parenthesis not properly closed in expression
58    UnclosedParenthesis,
59    /// Array or hash bracket not properly closed
60    UnclosedBracket,
61    /// Hash or block brace not properly closed
62    UnclosedBrace,
63    /// Heredoc block not properly terminated
64    UnterminatedHeredoc,
65    /// Variable name does not follow Perl naming rules
66    InvalidVariableName,
67    /// Subroutine name does not follow Perl naming rules
68    InvalidSubroutineName,
69    /// Required operator missing in expression
70    MissingOperator,
71    /// Required operand missing in expression
72    MissingOperand,
73    /// Unexpected end of file during parsing
74    UnexpectedEof,
75}
76
77/// Perl script error classification engine for LSP workflow operations
78///
79/// Analyzes parsing errors and provides specific error types with recovery suggestions
80/// for robust Perl parsing workflows within enterprise LSP environments.
81pub struct ErrorClassifier;
82
83impl Default for ErrorClassifier {
84    fn default() -> Self {
85        Self::new()
86    }
87}
88
89impl ErrorClassifier {
90    /// Create new error classifier for Perl script analysis
91    ///
92    /// # Returns
93    ///
94    /// Configured classifier ready for LSP workflow error analysis
95    pub fn new() -> Self {
96        ErrorClassifier
97    }
98
99    /// Classify parsing error based on AST node and source context
100    ///
101    /// Analyzes error patterns in Perl script content to provide specific
102    /// error types for targeted recovery strategies during LSP workflow.
103    ///
104    /// # Arguments
105    ///
106    /// * `error_node` - AST node where error occurred
107    /// * `source` - Complete Perl script source code for context analysis
108    ///
109    /// # Returns
110    ///
111    /// Specific error type for targeted recovery during Perl parsing
112    pub fn classify(&self, error_node: &Node, source: &str) -> ParseErrorKind {
113        // Get the error text if available based on location
114        let error_text = {
115            let start = error_node.location.start;
116            let end = (start + 10).min(source.len()); // Look at next 10 chars
117            if start < source.len() && end <= source.len() && start <= end {
118                &source[start..end]
119            } else {
120                ""
121            }
122        };
123
124        // Check for common patterns - check the entire source for unclosed quotes
125        let quote_count = source.matches('"').count();
126        let single_quote_count = source.matches('\'').count();
127
128        // Check if we have unclosed quotes
129        if !quote_count.is_multiple_of(2) {
130            return ParseErrorKind::UnclosedString;
131        }
132        if !single_quote_count.is_multiple_of(2) {
133            return ParseErrorKind::UnclosedString;
134        }
135
136        // Also check the error text itself
137        if error_text.starts_with('"') && !error_text.ends_with('"') {
138            return ParseErrorKind::UnclosedString;
139        }
140
141        if error_text.starts_with('\'') && !error_text.ends_with('\'') {
142            return ParseErrorKind::UnclosedString;
143        }
144
145        if error_text.starts_with('/') && !error_text.contains("//") {
146            // Could be unclosed regex
147            if !error_text[1..].contains('/') {
148                return ParseErrorKind::UnclosedRegex;
149            }
150        }
151
152        // Check context around error
153        {
154            let pos = error_node.location.start;
155            let line_start = source[..pos].rfind('\n').map(|i| i + 1).unwrap_or(0);
156            let line_end = source[pos..].find('\n').map(|i| pos + i).unwrap_or(source.len());
157
158            let line = &source[line_start..line_end];
159
160            // Check for missing semicolon
161            if !line.trim().is_empty()
162                && !line.trim().ends_with(';')
163                && !line.trim().ends_with('{')
164                && !line.trim().ends_with('}')
165            {
166                // Look for common statement patterns
167                if line.contains("my ")
168                    || line.contains("our ")
169                    || line.contains("local ")
170                    || line.contains("print ")
171                    || line.contains("say ")
172                    || line.contains("return ")
173                {
174                    return ParseErrorKind::MissingSemicolon;
175                }
176            }
177
178            // Check for unclosed delimiters
179            let open_parens = line.matches('(').count();
180            let close_parens = line.matches(')').count();
181            if open_parens > close_parens {
182                return ParseErrorKind::UnclosedParenthesis;
183            }
184
185            let open_brackets = line.matches('[').count();
186            let close_brackets = line.matches(']').count();
187            if open_brackets > close_brackets {
188                return ParseErrorKind::UnclosedBracket;
189            }
190
191            let open_braces = line.matches('{').count();
192            let close_braces = line.matches('}').count();
193            if open_braces > close_braces {
194                return ParseErrorKind::UnclosedBrace;
195            }
196        }
197
198        // Check if we're at EOF
199        if error_node.location.start >= source.len() - 1 {
200            return ParseErrorKind::UnexpectedEof;
201        }
202
203        // Default to invalid syntax
204        ParseErrorKind::InvalidSyntax
205    }
206
207    /// Generate user-friendly diagnostic message for classified error
208    ///
209    /// Converts error classification into readable message for Perl script developers
210    /// during LSP workflow processing and debugging operations.
211    ///
212    /// # Arguments
213    ///
214    /// * `kind` - Classified error type from Perl script analysis
215    ///
216    /// # Returns
217    ///
218    /// Human-readable error message describing the parsing issue
219    pub fn get_diagnostic_message(&self, kind: &ParseErrorKind) -> String {
220        match kind {
221            ParseErrorKind::UnexpectedToken { expected, found } => {
222                format!("Expected {} but found {}", expected, found)
223            }
224            ParseErrorKind::UnclosedString => "Unclosed string literal".to_string(),
225            ParseErrorKind::UnclosedRegex => "Unclosed regular expression".to_string(),
226            ParseErrorKind::UnclosedBlock => "Unclosed code block - missing '}'".to_string(),
227            ParseErrorKind::MissingSemicolon => "Missing semicolon at end of statement".to_string(),
228            ParseErrorKind::InvalidSyntax => "Invalid syntax".to_string(),
229            ParseErrorKind::UnclosedParenthesis => "Unclosed parenthesis - missing ')'".to_string(),
230            ParseErrorKind::UnclosedBracket => "Unclosed bracket - missing ']'".to_string(),
231            ParseErrorKind::UnclosedBrace => "Unclosed brace - missing '}'".to_string(),
232            ParseErrorKind::UnterminatedHeredoc => "Unterminated heredoc".to_string(),
233            ParseErrorKind::InvalidVariableName => "Invalid variable name".to_string(),
234            ParseErrorKind::InvalidSubroutineName => "Invalid subroutine name".to_string(),
235            ParseErrorKind::MissingOperator => "Missing operator".to_string(),
236            ParseErrorKind::MissingOperand => "Missing operand".to_string(),
237            ParseErrorKind::UnexpectedEof => "Unexpected end of file".to_string(),
238        }
239    }
240
241    /// Generate recovery suggestion for classified parsing error
242    ///
243    /// Provides actionable recovery suggestions for Perl script developers
244    /// to resolve parsing issues during LSP workflow development.
245    ///
246    /// # Arguments
247    ///
248    /// * `kind` - Classified error type requiring recovery suggestion
249    ///
250    /// # Returns
251    ///
252    /// Optional recovery suggestion or None if no specific suggestion available
253    pub fn get_suggestion(&self, kind: &ParseErrorKind) -> Option<String> {
254        match kind {
255            ParseErrorKind::MissingSemicolon => {
256                Some("Add a semicolon ';' at the end of the statement".to_string())
257            }
258            ParseErrorKind::UnclosedString => {
259                Some("Add a closing quote to terminate the string".to_string())
260            }
261            ParseErrorKind::UnclosedParenthesis => {
262                Some("Add a closing parenthesis ')' to match the opening '('".to_string())
263            }
264            ParseErrorKind::UnclosedBracket => {
265                Some("Add a closing bracket ']' to match the opening '['".to_string())
266            }
267            ParseErrorKind::UnclosedBrace => {
268                Some("Add a closing brace '}' to match the opening '{'".to_string())
269            }
270            ParseErrorKind::UnclosedBlock => {
271                Some("Add a closing brace '}' to complete the code block".to_string())
272            }
273            ParseErrorKind::UnclosedRegex => {
274                Some("Add a closing delimiter to terminate the regex pattern".to_string())
275            }
276            ParseErrorKind::UnterminatedHeredoc => {
277                Some("Add the heredoc terminator marker on its own line".to_string())
278            }
279            ParseErrorKind::InvalidVariableName => {
280                Some("Variable names must start with a letter or underscore, followed by alphanumeric characters or underscores".to_string())
281            }
282            ParseErrorKind::InvalidSubroutineName => {
283                Some("Subroutine names must start with a letter or underscore, followed by alphanumeric characters or underscores".to_string())
284            }
285            ParseErrorKind::MissingOperator => {
286                Some("Add an operator between operands (e.g., +, -, *, /, ., ==, !=)".to_string())
287            }
288            ParseErrorKind::MissingOperand => {
289                Some("Add a value or expression after the operator".to_string())
290            }
291            ParseErrorKind::UnexpectedEof => {
292                Some("The file ended unexpectedly - check for unclosed blocks, strings, or parentheses".to_string())
293            }
294            ParseErrorKind::UnexpectedToken { expected, found: _ } => {
295                Some(format!("Expected {} at this location", expected))
296            }
297            ParseErrorKind::InvalidSyntax => None,
298        }
299    }
300
301    /// Get a detailed explanation for the error kind
302    ///
303    /// Provides additional context and explanation beyond the basic diagnostic message
304    /// to help developers understand the root cause of the error.
305    ///
306    /// # Arguments
307    ///
308    /// * `kind` - Classified error type
309    ///
310    /// # Returns
311    ///
312    /// Optional detailed explanation
313    pub fn get_explanation(&self, kind: &ParseErrorKind) -> Option<String> {
314        match kind {
315            ParseErrorKind::MissingSemicolon => {
316                Some("In Perl, most statements must end with a semicolon. The only exceptions are the last statement in a block and statements that end with a block (like if, while, sub, etc.).".to_string())
317            }
318            ParseErrorKind::UnclosedString => {
319                Some("String literals must be properly terminated with a matching quote. Use double quotes (\") for interpolated strings or single quotes (') for literal strings.".to_string())
320            }
321            ParseErrorKind::UnclosedRegex => {
322                Some("Regular expressions must be properly delimited. Common forms include /pattern/, m/pattern/, s/old/new/, and qr/pattern/.".to_string())
323            }
324            ParseErrorKind::UnterminatedHeredoc => {
325                Some("Heredoc blocks must have their terminator marker appear on a line by itself with no leading or trailing whitespace (unless using <<~MARKER for indented heredocs).".to_string())
326            }
327            ParseErrorKind::InvalidVariableName => {
328                Some("Perl variable names (after the sigil) must follow identifier rules: start with a letter (a-z, A-Z) or underscore (_), followed by any combination of letters, digits, or underscores.".to_string())
329            }
330            ParseErrorKind::UnclosedBlock => {
331                Some("Code blocks must have matching braces. Each opening '{' needs a corresponding closing '}'.".to_string())
332            }
333            _ => None,
334        }
335    }
336}
337
338#[cfg(test)]
339mod tests {
340    use super::*;
341    use perl_ast::{Node, NodeKind, SourceLocation};
342
343    #[test]
344    fn test_classify_unclosed_string() {
345        let classifier = ErrorClassifier::new();
346        let source = r#"my $x = "hello"#;
347
348        // Manually construct error node
349        // "hello is at index 9 (my  = ) is 0..8
350        // m y   $ x   =   "
351        // 0123456789
352
353        let error_node = Node::new(
354            NodeKind::Error {
355                message: "Unclosed string".to_string(),
356                expected: vec![],
357                found: None,
358                partial: None,
359            },
360            SourceLocation { start: 9, end: 15 }, // "hello
361        );
362
363        let kind = classifier.classify(&error_node, source);
364        assert_eq!(kind, ParseErrorKind::UnclosedString);
365    }
366
367    #[test]
368    fn test_classify_missing_semicolon() {
369        let classifier = ErrorClassifier::new();
370        let source = "my $x = 42\nmy $y = 10";
371
372        // Simulate an error node at the end of first line
373        let error = Node::new(
374            NodeKind::Error {
375                message: "Unexpected token".to_string(),
376                expected: vec![],
377                found: None,
378                partial: None,
379            },
380            SourceLocation { start: 10, end: 11 }, // newline char
381        );
382        let kind = classifier.classify(&error, source);
383        assert_eq!(kind, ParseErrorKind::MissingSemicolon);
384    }
385}