perl_error/
lib.rs

1//! Error types for the Perl parser within the Perl parsing workflow pipeline
2//!
3//! This module defines comprehensive error handling for Perl parsing operations that occur
4//! throughout the Perl parsing workflow workflow: Parse → Index → Navigate → Complete → Analyze.
5//!
6//! # Error Recovery Strategy
7//!
8//! When parsing errors occur during Perl parsing:
9//! 1. **Parse stage**: Parsing failures indicate corrupted or malformed Perl source
10//! 2. **Analyze stage**: Syntax errors suggest script inconsistencies requiring fallback processing
11//! 3. **Navigate stage**: Parse failures can break thread analysis - graceful degradation applies
12//! 4. **Complete stage**: Errors impact output generation but preserve original content
13//! 5. **Analyze stage**: Parse failures affect search indexing but maintain basic metadata
14//!
15//! # Performance Context
16//!
17//! Error handling is optimized for large Perl codebase processing scenarios with minimal memory overhead
18//! and fast recovery paths to maintain enterprise-scale performance targets.
19//!
20//! # Usage Examples
21//!
22//! ## Basic Error Handling
23//!
24//! ```ignore
25//! use perl_parser::{Parser, ParseError, ParseResult};
26//!
27//! fn parse_with_error_handling(code: &str) -> ParseResult<()> {
28//!     let mut parser = Parser::new(code);
29//!     match parser.parse() {
30//!         Ok(ast) => {
31//!             println!("Parsing successful");
32//!             Ok(())
33//!         }
34//!         Err(ParseError::UnexpectedEof) => {
35//!             eprintln!("Incomplete code: unexpected end of input");
36//!             Err(ParseError::UnexpectedEof)
37//!         }
38//!         Err(ParseError::UnexpectedToken { found, expected, location }) => {
39//!             eprintln!("Syntax error at position {}: found '{}', expected '{}'",
40//!                      location, found, expected);
41//!             Err(ParseError::UnexpectedToken { found, expected, location })
42//!         }
43//!         Err(e) => {
44//!             eprintln!("Parse error: {}", e);
45//!             Err(e)
46//!         }
47//!     }
48//! }
49//! ```
50//!
51//! ## Error Recovery in LSP Context
52//!
53//! ```ignore
54//! use perl_parser::{Parser, ParseError, error_recovery::ErrorRecovery};
55//!
56//! fn parse_with_recovery(code: &str) -> Vec<String> {
57//!     let mut parser = Parser::new(code);
58//!     let mut errors = Vec::new();
59//!
60//!     match parser.parse() {
61//!         Ok(_) => println!("Parse successful"),
62//!         Err(err) => {
63//!             // Log error for diagnostics
64//!             errors.push(format!("Parse error: {}", err));
65//!
66//!             // Attempt error recovery for LSP
67//!             match err {
68//!                 ParseError::UnexpectedToken { .. } => {
69//!                     // Continue parsing from next statement
70//!                     println!("Attempting recovery...");
71//!                 }
72//!                 ParseError::RecursionLimit => {
73//!                     // Use iterative parsing approach
74//!                     println!("Switching to iterative parsing...");
75//!                 }
76//!                 _ => {
77//!                     // Use fallback parsing strategy
78//!                     println!("Using fallback parsing...");
79//!                 }
80//!             }
81//!         }
82//!     }
83//!     errors
84//! }
85//! ```
86//!
87//! ## Comprehensive Error Context
88//!
89//! ```
90//! use perl_error::ParseError;
91//!
92//! fn create_detailed_error() -> ParseError {
93//!     ParseError::UnexpectedToken {
94//!         found: "number".to_string(),
95//!         expected: "identifier".to_string(),
96//!         location: 10, // byte position 10
97//!     }
98//! }
99//!
100//! fn handle_error_with_context(error: &ParseError) {
101//!     match error {
102//!         ParseError::UnexpectedToken { found, expected, location } => {
103//!             println!("Syntax error at byte position {}: found '{}', expected '{}'",
104//!                     location, found, expected);
105//!         }
106//!         ParseError::UnexpectedEof => {
107//!             println!("Incomplete input: unexpected end of file");
108//!         }
109//!         _ => {
110//!             println!("Parse error: {}", error);
111//!         }
112//!     }
113//! }
114//! ```
115
116use perl_position_tracking::LineIndex;
117use thiserror::Error;
118
119#[derive(Debug, Clone)]
120/// Rich error context with source line and fix suggestions
121pub struct ErrorContext {
122    /// The original parse error
123    pub error: ParseError,
124    /// Line number (0-indexed)
125    pub line: usize,
126    /// Column number (0-indexed)
127    pub column: usize,
128    /// The actual source line text
129    pub source_line: String,
130    /// Optional fix suggestion
131    pub suggestion: Option<String>,
132}
133
134impl From<perl_regex::RegexError> for ParseError {
135    fn from(err: perl_regex::RegexError) -> Self {
136        match err {
137            perl_regex::RegexError::Syntax { message, offset } => {
138                ParseError::syntax(message, offset)
139            }
140        }
141    }
142}
143
144/// Where in the parse tree a recovery was performed.
145///
146/// Used by [`ParseError::Recovered`] to describe the syntactic context in which
147/// the parser applied a recovery strategy. LSP providers use this to decide
148/// which features can still be offered after a recovery.
149#[derive(Debug, Clone, PartialEq)]
150pub enum RecoverySite {
151    /// Inside a parenthesised argument list `(...)`.
152    ArgList,
153    /// Inside an array subscript `[...]`.
154    ArraySubscript,
155    /// Inside a hash subscript `{...}`.
156    HashSubscript,
157    /// After a `->` dereference arrow (postfix chain).
158    PostfixChain,
159    /// After a binary infix operator (right-hand side missing).
160    InfixRhs,
161}
162
163/// What kind of recovery was applied at a [`RecoverySite`].
164///
165/// Pairs with [`RecoverySite`] in [`ParseError::Recovered`] to describe the
166/// exact repair the parser made. This information lets consumers (e.g. LSP
167/// providers) understand the confidence level of the resulting AST region.
168#[derive(Debug, Clone, PartialEq)]
169pub enum RecoveryKind {
170    /// A synthetic closing delimiter (`)` or `]`) was inferred.
171    InsertedCloser,
172    /// A [`NodeKind::MissingExpression`] placeholder was inserted.
173    MissingOperand,
174    /// A postfix chain was cut short due to a missing continuation.
175    TruncatedChain,
176    /// A statement boundary (`;`) was inferred from context.
177    InferredSemicolon,
178}
179
180/// Budget limits for parser operations to prevent runaway parsing.
181///
182/// These limits ensure the parser terminates in bounded time even when
183/// processing malformed or adversarial input. Each budget parameter has
184/// a sensible default that works for most real-world Perl code.
185///
186/// # Usage
187///
188/// ```
189/// use perl_error::ParseBudget;
190///
191/// // Use defaults for normal parsing
192/// let budget = ParseBudget::default();
193///
194/// // Stricter limits for untrusted input
195/// let strict = ParseBudget {
196///     max_errors: 10,
197///     max_depth: 64,
198///     max_tokens_skipped: 100,
199///     max_recoveries: 50,
200/// };
201/// ```
202#[derive(Debug, Clone, Copy, PartialEq, Eq)]
203pub struct ParseBudget {
204    /// Maximum number of errors to collect before giving up.
205    /// After this limit, parsing stops to avoid flooding diagnostics.
206    /// Default: 100
207    pub max_errors: usize,
208
209    /// Maximum nesting depth for recursive constructs (blocks, expressions).
210    /// Prevents stack overflow on deeply nested input.
211    /// Default: 256
212    pub max_depth: usize,
213
214    /// Maximum tokens to skip during a single recovery attempt.
215    /// Prevents infinite loops when recovery can't find a sync point.
216    /// Default: 1000
217    pub max_tokens_skipped: usize,
218
219    /// Maximum number of recovery attempts per parse.
220    /// Bounds total recovery work to prevent pathological cases.
221    /// Default: 500
222    pub max_recoveries: usize,
223}
224
225impl Default for ParseBudget {
226    fn default() -> Self {
227        Self { max_errors: 100, max_depth: 256, max_tokens_skipped: 1000, max_recoveries: 500 }
228    }
229}
230
231impl ParseBudget {
232    /// Create a budget suitable for IDE/LSP usage with generous limits.
233    pub fn for_ide() -> Self {
234        Self::default()
235    }
236
237    /// Create a strict budget for parsing untrusted input.
238    pub fn strict() -> Self {
239        Self { max_errors: 10, max_depth: 64, max_tokens_skipped: 100, max_recoveries: 50 }
240    }
241
242    /// Create an unlimited budget (use with caution).
243    pub fn unlimited() -> Self {
244        Self {
245            max_errors: usize::MAX,
246            max_depth: usize::MAX,
247            max_tokens_skipped: usize::MAX,
248            max_recoveries: usize::MAX,
249        }
250    }
251}
252
253/// Tracks budget consumption during parsing.
254///
255/// This struct monitors how much of the parse budget has been used
256/// and provides methods to check and consume budget atomically.
257#[derive(Debug, Clone, Default)]
258pub struct BudgetTracker {
259    /// Number of errors emitted so far.
260    pub errors_emitted: usize,
261    /// Current nesting depth.
262    pub current_depth: usize,
263    /// Maximum depth reached during parse.
264    pub max_depth_reached: usize,
265    /// Total tokens skipped across all recovery attempts.
266    pub tokens_skipped: usize,
267    /// Number of recovery attempts made.
268    pub recoveries_attempted: usize,
269}
270
271impl BudgetTracker {
272    /// Create a new budget tracker.
273    pub fn new() -> Self {
274        Self::default()
275    }
276
277    /// Check if error budget is exhausted.
278    pub fn errors_exhausted(&self, budget: &ParseBudget) -> bool {
279        self.errors_emitted >= budget.max_errors
280    }
281
282    /// Check if depth budget would be exceeded by going one level deeper.
283    pub fn depth_would_exceed(&self, budget: &ParseBudget) -> bool {
284        self.current_depth >= budget.max_depth
285    }
286
287    /// Check if skip budget would be exceeded by skipping `count` more tokens.
288    pub fn skip_would_exceed(&self, budget: &ParseBudget, count: usize) -> bool {
289        self.tokens_skipped.saturating_add(count) > budget.max_tokens_skipped
290    }
291
292    /// Check if recovery budget is exhausted.
293    pub fn recoveries_exhausted(&self, budget: &ParseBudget) -> bool {
294        self.recoveries_attempted >= budget.max_recoveries
295    }
296
297    /// Begin a recovery attempt, checking budget first.
298    ///
299    /// Returns `false` if another recovery attempt would exceed the budget.
300    /// If this returns `true`, the recovery attempt has been recorded.
301    pub fn begin_recovery(&mut self, budget: &ParseBudget) -> bool {
302        if self.recoveries_attempted >= budget.max_recoveries {
303            return false;
304        }
305        self.recoveries_attempted = self.recoveries_attempted.saturating_add(1);
306        true
307    }
308
309    /// Check if skipping `additional` more tokens would stay within budget.
310    ///
311    /// This considers both already-skipped tokens and the proposed additional count.
312    pub fn can_skip_more(&self, budget: &ParseBudget, additional: usize) -> bool {
313        self.tokens_skipped.saturating_add(additional) <= budget.max_tokens_skipped
314    }
315
316    /// Record an error emission.
317    pub fn record_error(&mut self) {
318        self.errors_emitted = self.errors_emitted.saturating_add(1);
319    }
320
321    /// Enter a deeper nesting level.
322    pub fn enter_depth(&mut self) {
323        self.current_depth = self.current_depth.saturating_add(1);
324        if self.current_depth > self.max_depth_reached {
325            self.max_depth_reached = self.current_depth;
326        }
327    }
328
329    /// Exit a nesting level.
330    pub fn exit_depth(&mut self) {
331        self.current_depth = self.current_depth.saturating_sub(1);
332    }
333
334    /// Record tokens skipped during recovery.
335    pub fn record_skip(&mut self, count: usize) {
336        self.tokens_skipped = self.tokens_skipped.saturating_add(count);
337    }
338
339    /// Record a recovery attempt.
340    pub fn record_recovery(&mut self) {
341        self.recoveries_attempted = self.recoveries_attempted.saturating_add(1);
342    }
343}
344
345/// Result type for parser operations in the Perl parsing workflow pipeline
346///
347/// This type encapsulates success/failure outcomes throughout the Parse → Index →
348/// Navigate → Complete → Analyze workflow, enabling consistent error propagation and recovery
349/// strategies across all pipeline stages.
350pub type ParseResult<T> = Result<T, ParseError>;
351
352#[derive(Error, Debug, Clone, PartialEq)]
353/// Comprehensive error types that can occur during Perl parsing workflows
354///
355/// These errors are designed to provide detailed context about parsing failures that occur during
356/// Perl code analysis, script processing, and metadata extraction. Each error variant includes
357/// location information to enable precise recovery strategies in large Perl file processing scenarios.
358///
359/// # Error Recovery Patterns
360///
361/// - **Syntax Errors**: Attempt fallback parsing or skip problematic content sections
362/// - **Lexer Errors**: Re-tokenize with relaxed rules or binary content detection
363/// - **Recursion Limits**: Flatten deeply nested structures or process iteratively
364/// - **String Handling**: Apply encoding detection and normalization workflows
365///
366/// # Enterprise Scale Considerations
367///
368/// Error handling is optimized for large Perl files and multi-file workspaces, ensuring
369/// memory-efficient error propagation and logging.
370pub enum ParseError {
371    /// Parser encountered unexpected end of input during Perl code analysis
372    ///
373    /// This occurs when processing truncated Perl scripts or incomplete Perl source during
374    /// the Parse stage. Recovery strategy: attempt partial parsing and preserve available content.
375    #[error("Unexpected end of input")]
376    UnexpectedEof,
377
378    /// Parser found an unexpected token during Perl parsing workflow
379    ///
380    /// Common during Analyze stage when Perl scripts contain syntax variations or encoding issues.
381    /// Recovery strategy: skip problematic tokens and attempt continued parsing with relaxed rules.
382    #[error("expected {expected}, found {found} at position {location}")]
383    UnexpectedToken {
384        /// Token type that was expected during Perl script parsing
385        expected: String,
386        /// Actual token found in Perl script content
387        found: String,
388        /// Byte position where unexpected token was encountered
389        location: usize,
390    },
391
392    /// General syntax error occurred during Perl code parsing
393    ///
394    /// This encompasses malformed Perl constructs found in Perl scripts during Navigate stage analysis.
395    /// Recovery strategy: isolate syntax error scope and continue processing surrounding content.
396    #[error("Invalid syntax at position {location}: {message}")]
397    SyntaxError {
398        /// Descriptive error message explaining the syntax issue
399        message: String,
400        /// Byte position where syntax error occurred in Perl script
401        location: usize,
402    },
403
404    /// Lexical analysis failure during Perl script tokenization
405    ///
406    /// Indicates character encoding issues or binary content mixed with text during Parse stage.
407    /// Recovery strategy: apply encoding detection and re-attempt tokenization with binary fallbacks.
408    #[error("Lexer error: {message}")]
409    LexerError {
410        /// Detailed lexer error message describing tokenization failure
411        message: String,
412    },
413
414    /// Parser recursion depth exceeded during complex Perl script analysis
415    ///
416    /// Occurs with deeply nested structures in Perl code during Complete stage processing.
417    /// Recovery strategy: flatten recursive structures and process iteratively to maintain performance.
418    #[error("Maximum recursion depth exceeded")]
419    RecursionLimit,
420
421    /// Invalid numeric literal found in Perl script content
422    ///
423    /// Common when processing malformed configuration values during Analyze stage analysis.
424    /// Recovery strategy: substitute default values and log for manual review.
425    #[error("Invalid number literal: {literal}")]
426    InvalidNumber {
427        /// The malformed numeric literal found in Perl script content
428        literal: String,
429    },
430
431    /// Malformed string literal in Perl parsing workflow
432    ///
433    /// Indicates quote mismatches or encoding issues in Perl script strings during parsing.
434    /// Recovery strategy: attempt string repair and normalization before re-parsing.
435    #[error("Invalid string literal")]
436    InvalidString,
437
438    /// Unclosed delimiter detected during Perl code parsing
439    ///
440    /// Commonly found in truncated or corrupted Perl script content during Parse stage.
441    /// Recovery strategy: auto-close delimiters and continue parsing with synthetic boundaries.
442    #[error("Unclosed delimiter: {delimiter}")]
443    UnclosedDelimiter {
444        /// The delimiter character that was left unclosed
445        delimiter: char,
446    },
447
448    /// Invalid regular expression syntax in Perl parsing workflow
449    ///
450    /// Occurs when parsing regex patterns in data filters during Navigate stage analysis.
451    /// Recovery strategy: fallback to literal string matching and preserve original pattern.
452    #[error("Invalid regex: {message}")]
453    InvalidRegex {
454        /// Specific error message describing regex syntax issue
455        message: String,
456    },
457
458    /// Nesting depth limit exceeded for recursive structures
459    #[error("Nesting depth limit exceeded: {depth} > {max_depth}")]
460    NestingTooDeep {
461        /// Current nesting depth
462        depth: usize,
463        /// Maximum allowed depth
464        max_depth: usize,
465    },
466
467    /// Parsing was cancelled by an external cancellation token
468    #[error("Parsing cancelled")]
469    Cancelled,
470
471    /// A syntax error was recovered from — parsing continued with a synthetic node.
472    ///
473    /// This variant is emitted alongside the partial AST node that was produced
474    /// by the recovery. LSP providers iterate `parser.errors()` and count
475    /// `Recovered` variants to determine confidence for gating features.
476    #[error("Recovered from {kind:?} at {site:?} (position {location})")]
477    Recovered {
478        /// Where in the parse tree the recovery occurred.
479        site: RecoverySite,
480        /// What kind of repair was applied.
481        kind: RecoveryKind,
482        /// Byte offset of the recovery point in the source.
483        location: usize,
484    },
485}
486
487/// Error classification and diagnostic generation for parsed Perl code.
488pub mod classifier;
489/// Error recovery strategies and traits for the Perl parser.
490pub mod recovery;
491
492use perl_ast::Node;
493
494/// Structured output from parsing, combining AST with all diagnostics.
495///
496/// This type replaces the simple `Result<Node, ParseError>` pattern to enable
497/// error recovery. Even when errors occur, parsing continues and produces a
498/// partial AST alongside collected diagnostics.
499///
500/// # Usage
501///
502/// ```ignore
503/// use perl_parser::{Parser, ParseOutput};
504///
505/// let mut parser = Parser::new("my $x = ;");
506/// let output = parser.parse_with_recovery();
507///
508/// // AST is always available (may contain error nodes)
509/// println!("Statements: {:?}", output.ast);
510///
511/// // Diagnostics are collected separately
512/// for error in &output.diagnostics {
513///     println!("Error: {}", error);
514/// }
515///
516/// // Budget tracking shows resource usage
517/// println!("Errors: {}", output.budget_usage.errors_emitted);
518/// ```
519#[derive(Debug, Clone)]
520pub struct ParseOutput {
521    /// The parsed AST. Always present, but may contain error nodes
522    /// if parsing encountered recoverable errors.
523    pub ast: Node,
524
525    /// All diagnostics (errors and warnings) collected during parsing.
526    /// These are ordered by source position.
527    pub diagnostics: Vec<ParseError>,
528
529    /// Budget consumption during this parse.
530    /// Useful for diagnosing pathological inputs.
531    pub budget_usage: BudgetTracker,
532
533    /// Whether parsing completed normally or was terminated early
534    /// due to budget exhaustion.
535    pub terminated_early: bool,
536
537    /// Number of recovery operations applied during this parse.
538    ///
539    /// Counts the [`ParseError::Recovered`] variants in `diagnostics`.
540    /// LSP providers use this as a confidence signal: `0` means a clean parse,
541    /// `> 0` means at least one synthetic repair was made.
542    pub recovered_count: usize,
543}
544
545impl ParseOutput {
546    /// Create a successful parse output with no errors.
547    pub fn success(ast: Node) -> Self {
548        Self {
549            ast,
550            diagnostics: Vec::new(),
551            budget_usage: BudgetTracker::new(),
552            terminated_early: false,
553            recovered_count: 0,
554        }
555    }
556
557    /// Create a parse output with errors.
558    ///
559    /// Note: This re-derives budget_usage from diagnostics count.
560    /// For accurate budget tracking, use `finish()` instead.
561    pub fn with_errors(ast: Node, diagnostics: Vec<ParseError>) -> Self {
562        let mut budget_usage = BudgetTracker::new();
563        budget_usage.errors_emitted = diagnostics.len();
564        let recovered_count =
565            diagnostics.iter().filter(|e| matches!(e, ParseError::Recovered { .. })).count();
566        Self { ast, diagnostics, budget_usage, terminated_early: false, recovered_count }
567    }
568
569    /// Create a parse output with full budget tracking.
570    ///
571    /// This is the preferred constructor when the actual BudgetTracker
572    /// from parsing is available, as it preserves accurate metrics.
573    pub fn finish(
574        ast: Node,
575        diagnostics: Vec<ParseError>,
576        budget_usage: BudgetTracker,
577        terminated_early: bool,
578    ) -> Self {
579        let recovered_count =
580            diagnostics.iter().filter(|e| matches!(e, ParseError::Recovered { .. })).count();
581        Self { ast, diagnostics, budget_usage, terminated_early, recovered_count }
582    }
583
584    /// Check if parse completed without any errors.
585    pub fn is_ok(&self) -> bool {
586        self.diagnostics.is_empty()
587    }
588
589    /// Check if parse had errors.
590    pub fn has_errors(&self) -> bool {
591        !self.diagnostics.is_empty()
592    }
593
594    /// Get the error count.
595    pub fn error_count(&self) -> usize {
596        self.diagnostics.len()
597    }
598}
599
600impl ParseError {
601    /// Create a new syntax error for Perl parsing workflow failures
602    ///
603    /// # Arguments
604    ///
605    /// * `message` - Descriptive error message with context about the syntax issue
606    /// * `location` - Character position within the Perl code where error occurred
607    ///
608    /// # Returns
609    ///
610    /// A [`ParseError::SyntaxError`] variant with embedded location context for recovery strategies
611    ///
612    /// # Examples
613    ///
614    /// ```rust
615    /// use perl_error::ParseError;
616    ///
617    /// let error = ParseError::syntax("Missing semicolon in Perl script", 42);
618    /// assert!(matches!(error, ParseError::SyntaxError { .. }));
619    /// ```
620    pub fn syntax(message: impl Into<String>, location: usize) -> Self {
621        ParseError::SyntaxError { message: message.into(), location }
622    }
623
624    /// Create a new unexpected token error during Perl script parsing
625    ///
626    /// # Arguments
627    ///
628    /// * `expected` - Token type that was expected by the parser
629    /// * `found` - Actual token type that was encountered
630    /// * `location` - Character position where the unexpected token was found
631    ///
632    /// # Returns
633    ///
634    /// A [`ParseError::UnexpectedToken`] variant with detailed token mismatch information
635    ///
636    /// # Examples
637    ///
638    /// ```rust
639    /// use perl_error::ParseError;
640    ///
641    /// let error = ParseError::unexpected("semicolon", "comma", 15);
642    /// assert!(matches!(error, ParseError::UnexpectedToken { .. }));
643    /// ```
644    ///
645    /// # Email Processing Context
646    ///
647    /// This is commonly used during the Analyze stage when Perl scripts contain
648    /// syntax variations that require token-level recovery strategies.
649    pub fn unexpected(
650        expected: impl Into<String>,
651        found: impl Into<String>,
652        location: usize,
653    ) -> Self {
654        ParseError::UnexpectedToken { expected: expected.into(), found: found.into(), location }
655    }
656
657    /// Get the byte location of the error if available
658    pub fn location(&self) -> Option<usize> {
659        match self {
660            ParseError::UnexpectedToken { location, .. } => Some(*location),
661            ParseError::SyntaxError { location, .. } => Some(*location),
662            ParseError::Recovered { location, .. } => Some(*location),
663            _ => None,
664        }
665    }
666
667    /// Generate a fix suggestion based on the error type
668    pub fn suggestion(&self) -> Option<String> {
669        match self {
670            ParseError::UnexpectedToken { expected, found, .. } => {
671                // Check for common missing delimiters
672                if expected.contains(';') {
673                    return Some("add a semicolon ';' at the end of the statement".to_string());
674                }
675                if expected.contains('}') {
676                    return Some("add a closing brace '}' to end the block".to_string());
677                }
678                if expected.contains(')') {
679                    return Some("add a closing parenthesis ')' to end the group".to_string());
680                }
681                if expected.contains(']') {
682                    return Some("add a closing bracket ']' to end the array".to_string());
683                }
684                // Fat arrow found where expression expected — likely a missing value
685                // before a hash pair separator
686                if expected.contains("expression") && found.contains("=>") {
687                    return Some(
688                        "'=>' (fat arrow) is not valid here; \
689                         did you forget a value before it?"
690                            .to_string(),
691                    );
692                }
693                // Arrow found where expression expected
694                if expected.contains("expression") && found.contains("->") {
695                    return Some(
696                        "'->' (arrow) is not valid here; \
697                         did you forget the object or reference before it?"
698                            .to_string(),
699                    );
700                }
701                // Expected a variable (e.g. after my/our/local/state)
702                if expected.to_lowercase().contains("variable") {
703                    return Some(
704                        "expected a variable like $foo, @bar, or %hash after the declaration keyword"
705                            .to_string(),
706                    );
707                }
708                None
709            }
710            ParseError::UnclosedDelimiter { delimiter } => {
711                Some(format!("add closing '{}' to complete the literal", delimiter))
712            }
713            _ => None,
714        }
715    }
716}
717
718/// Enrich a list of errors with source context
719pub fn get_error_contexts(errors: &[ParseError], source: &str) -> Vec<ErrorContext> {
720    let index = LineIndex::new(source.to_string());
721
722    errors
723        .iter()
724        .map(|error| {
725            let loc = error.location().unwrap_or(source.len());
726            // Handle EOF/out-of-bounds safely
727            let safe_loc = std::cmp::min(loc, source.len());
728
729            let (line_u32, col_u32) = index.offset_to_position(safe_loc);
730            let line = line_u32 as usize;
731            let col = col_u32 as usize;
732
733            let source_line = source.lines().nth(line).unwrap_or("").to_string();
734
735            ErrorContext {
736                error: error.clone(),
737                line,
738                column: col,
739                source_line,
740                suggestion: error.suggestion(),
741            }
742        })
743        .collect()
744}
745
746#[cfg(test)]
747mod tests {
748    use super::*;
749
750    #[test]
751    fn test_parse_budget_defaults() {
752        let budget = ParseBudget::default();
753        assert_eq!(budget.max_errors, 100);
754        assert_eq!(budget.max_depth, 256);
755        assert_eq!(budget.max_tokens_skipped, 1000);
756        assert_eq!(budget.max_recoveries, 500);
757    }
758
759    #[test]
760    fn test_parse_budget_strict() {
761        let budget = ParseBudget::strict();
762        assert_eq!(budget.max_errors, 10);
763        assert_eq!(budget.max_depth, 64);
764        assert_eq!(budget.max_tokens_skipped, 100);
765        assert_eq!(budget.max_recoveries, 50);
766    }
767
768    #[test]
769    fn test_budget_tracker_errors() {
770        let budget = ParseBudget { max_errors: 3, ..Default::default() };
771        let mut tracker = BudgetTracker::new();
772
773        assert!(!tracker.errors_exhausted(&budget));
774
775        tracker.record_error();
776        tracker.record_error();
777        assert!(!tracker.errors_exhausted(&budget));
778
779        tracker.record_error();
780        assert!(tracker.errors_exhausted(&budget));
781    }
782
783    #[test]
784    fn test_budget_tracker_depth() {
785        let budget = ParseBudget { max_depth: 2, ..Default::default() };
786        let mut tracker = BudgetTracker::new();
787
788        assert!(!tracker.depth_would_exceed(&budget));
789
790        tracker.enter_depth();
791        assert!(!tracker.depth_would_exceed(&budget));
792
793        tracker.enter_depth();
794        assert!(tracker.depth_would_exceed(&budget));
795
796        tracker.exit_depth();
797        assert!(!tracker.depth_would_exceed(&budget));
798    }
799
800    #[test]
801    fn test_budget_tracker_skip() {
802        let budget = ParseBudget { max_tokens_skipped: 5, ..Default::default() };
803        let mut tracker = BudgetTracker::new();
804
805        assert!(!tracker.skip_would_exceed(&budget, 3));
806        tracker.record_skip(3);
807
808        assert!(!tracker.skip_would_exceed(&budget, 2));
809        assert!(tracker.skip_would_exceed(&budget, 3));
810    }
811
812    #[test]
813    fn test_budget_tracker_recoveries() {
814        let budget = ParseBudget { max_recoveries: 2, ..Default::default() };
815        let mut tracker = BudgetTracker::new();
816
817        assert!(!tracker.recoveries_exhausted(&budget));
818
819        tracker.record_recovery();
820        assert!(!tracker.recoveries_exhausted(&budget));
821
822        tracker.record_recovery();
823        assert!(tracker.recoveries_exhausted(&budget));
824    }
825
826    #[test]
827    fn test_parse_output_success() {
828        use perl_ast::{Node, NodeKind, SourceLocation};
829
830        let ast = Node::new(
831            NodeKind::Program { statements: vec![] },
832            SourceLocation { start: 0, end: 0 },
833        );
834        let output = ParseOutput::success(ast);
835
836        assert!(output.is_ok());
837        assert!(!output.has_errors());
838        assert_eq!(output.error_count(), 0);
839        assert!(!output.terminated_early);
840    }
841
842    #[test]
843    fn test_parse_output_with_errors() {
844        use perl_ast::{Node, NodeKind, SourceLocation};
845
846        let ast = Node::new(
847            NodeKind::Program { statements: vec![] },
848            SourceLocation { start: 0, end: 0 },
849        );
850        let errors = vec![ParseError::syntax("error 1", 0), ParseError::syntax("error 2", 5)];
851        let output = ParseOutput::with_errors(ast, errors);
852
853        assert!(!output.is_ok());
854        assert!(output.has_errors());
855        assert_eq!(output.error_count(), 2);
856    }
857
858    #[test]
859    fn test_parse_output_finish_preserves_tracker() {
860        use perl_ast::{Node, NodeKind, SourceLocation};
861
862        let ast = Node::new(
863            NodeKind::Program { statements: vec![] },
864            SourceLocation { start: 0, end: 0 },
865        );
866        let errors = vec![ParseError::syntax("error 1", 0)];
867
868        // Create a tracker with specific values
869        let mut tracker = BudgetTracker::new();
870        tracker.errors_emitted = 5;
871        tracker.tokens_skipped = 42;
872        tracker.recoveries_attempted = 3;
873        tracker.max_depth_reached = 10;
874
875        let output = ParseOutput::finish(ast, errors, tracker, true);
876
877        // Verify all tracker values are preserved
878        assert_eq!(output.budget_usage.errors_emitted, 5);
879        assert_eq!(output.budget_usage.tokens_skipped, 42);
880        assert_eq!(output.budget_usage.recoveries_attempted, 3);
881        assert_eq!(output.budget_usage.max_depth_reached, 10);
882        assert!(output.terminated_early);
883        assert_eq!(output.error_count(), 1);
884    }
885
886    #[test]
887    fn test_begin_recovery_checks_budget_first() {
888        let budget = ParseBudget { max_recoveries: 0, ..Default::default() };
889        let mut tracker = BudgetTracker::new();
890
891        // Should fail immediately - budget is 0
892        assert!(!tracker.begin_recovery(&budget));
893        assert_eq!(tracker.recoveries_attempted, 0);
894    }
895
896    #[test]
897    fn test_can_skip_more_boundary_conditions() {
898        let budget = ParseBudget { max_tokens_skipped: 10, ..Default::default() };
899        let mut tracker = BudgetTracker::new();
900
901        // At 0 skipped, can skip up to 10
902        assert!(tracker.can_skip_more(&budget, 10));
903        assert!(!tracker.can_skip_more(&budget, 11));
904
905        // Skip 5
906        tracker.record_skip(5);
907
908        // At 5 skipped, can skip up to 5 more
909        assert!(tracker.can_skip_more(&budget, 5));
910        assert!(!tracker.can_skip_more(&budget, 6));
911
912        // Skip 5 more to reach limit
913        tracker.record_skip(5);
914
915        // At limit, cannot skip any more
916        assert!(!tracker.can_skip_more(&budget, 1));
917        assert!(tracker.can_skip_more(&budget, 0));
918    }
919
920    #[test]
921    fn test_error_context_enrichment() {
922        let source = "line1\nline2;\nline3";
923        // 'e' of line1 is at 4. 5 is newline.
924        let errors = vec![ParseError::unexpected("';'", "newline", 5)];
925
926        let contexts = get_error_contexts(&errors, source);
927        assert_eq!(contexts.len(), 1);
928        assert_eq!(contexts[0].line, 0); // line1 is line 0
929        assert_eq!(contexts[0].source_line, "line1");
930        let suggestion = contexts[0].suggestion.as_deref().unwrap_or("");
931        assert!(suggestion.contains("semicolon"));
932    }
933
934    #[test]
935    fn test_recovery_site_and_kind_variants() {
936        // Verify all RecoverySite and RecoveryKind variants are constructible and comparable.
937        let sites = [
938            RecoverySite::ArgList,
939            RecoverySite::ArraySubscript,
940            RecoverySite::HashSubscript,
941            RecoverySite::PostfixChain,
942            RecoverySite::InfixRhs,
943        ];
944        let kinds = [
945            RecoveryKind::InsertedCloser,
946            RecoveryKind::MissingOperand,
947            RecoveryKind::TruncatedChain,
948            RecoveryKind::InferredSemicolon,
949        ];
950        // Each site and kind is debug-formattable and clone-able.
951        for s in &sites {
952            let _ = format!("{s:?}");
953            let _ = s.clone();
954        }
955        for k in &kinds {
956            let _ = format!("{k:?}");
957            let _ = k.clone();
958        }
959        // PartialEq works.
960        assert_eq!(RecoverySite::ArgList, RecoverySite::ArgList);
961        assert_ne!(RecoverySite::ArgList, RecoverySite::PostfixChain);
962        assert_eq!(RecoveryKind::InsertedCloser, RecoveryKind::InsertedCloser);
963        assert_ne!(RecoveryKind::InsertedCloser, RecoveryKind::MissingOperand);
964    }
965
966    #[test]
967    fn test_parse_error_recovered_variant() {
968        let err = ParseError::Recovered {
969            site: RecoverySite::ArgList,
970            kind: RecoveryKind::InsertedCloser,
971            location: 42,
972        };
973        // location() returns Some for Recovered variant.
974        assert_eq!(err.location(), Some(42));
975        // suggestion() returns None for Recovered.
976        assert!(err.suggestion().is_none());
977        // Display works (via thiserror).
978        let s = format!("{err}");
979        assert!(s.contains("Recovered") || s.contains("position 42"));
980    }
981
982    #[test]
983    fn test_parse_output_recovered_count_with_errors() {
984        use perl_ast::{Node, NodeKind, SourceLocation};
985
986        let ast = Node::new(
987            NodeKind::Program { statements: vec![] },
988            SourceLocation { start: 0, end: 0 },
989        );
990        let errors = vec![
991            ParseError::syntax("error 1", 0),
992            ParseError::Recovered {
993                site: RecoverySite::ArgList,
994                kind: RecoveryKind::MissingOperand,
995                location: 10,
996            },
997            ParseError::Recovered {
998                site: RecoverySite::PostfixChain,
999                kind: RecoveryKind::TruncatedChain,
1000                location: 20,
1001            },
1002        ];
1003        let output = ParseOutput::with_errors(ast, errors);
1004
1005        assert_eq!(output.error_count(), 3);
1006        assert_eq!(output.recovered_count, 2);
1007    }
1008
1009    #[test]
1010    fn test_parse_output_success_has_zero_recovered_count() {
1011        use perl_ast::{Node, NodeKind, SourceLocation};
1012
1013        let ast = Node::new(
1014            NodeKind::Program { statements: vec![] },
1015            SourceLocation { start: 0, end: 0 },
1016        );
1017        let output = ParseOutput::success(ast);
1018        assert_eq!(output.recovered_count, 0);
1019    }
1020
1021    #[test]
1022    fn test_parse_output_finish_recovered_count() {
1023        use perl_ast::{Node, NodeKind, SourceLocation};
1024
1025        let ast = Node::new(
1026            NodeKind::Program { statements: vec![] },
1027            SourceLocation { start: 0, end: 0 },
1028        );
1029        let errors = vec![
1030            ParseError::syntax("error", 0),
1031            ParseError::Recovered {
1032                site: RecoverySite::InfixRhs,
1033                kind: RecoveryKind::InferredSemicolon,
1034                location: 5,
1035            },
1036        ];
1037        let tracker = BudgetTracker::new();
1038        let output = ParseOutput::finish(ast, errors, tracker, false);
1039
1040        assert_eq!(output.recovered_count, 1);
1041        assert!(!output.terminated_early);
1042    }
1043}
perl_error/lib.rs

perl_error/
lib.rs