perl_error/lib.rs
1//! Error types for the Perl parser within the Perl parsing workflow pipeline
2//!
3//! This module defines comprehensive error handling for Perl parsing operations that occur
4//! throughout the Perl parsing workflow workflow: Parse → Index → Navigate → Complete → Analyze.
5//!
6//! # Error Recovery Strategy
7//!
8//! When parsing errors occur during Perl parsing:
9//! 1. **Parse stage**: Parsing failures indicate corrupted or malformed Perl source
10//! 2. **Analyze stage**: Syntax errors suggest script inconsistencies requiring fallback processing
11//! 3. **Navigate stage**: Parse failures can break thread analysis - graceful degradation applies
12//! 4. **Complete stage**: Errors impact output generation but preserve original content
13//! 5. **Analyze stage**: Parse failures affect search indexing but maintain basic metadata
14//!
15//! # Performance Context
16//!
17//! Error handling is optimized for large Perl codebase processing scenarios with minimal memory overhead
18//! and fast recovery paths to maintain enterprise-scale performance targets.
19//!
20//! # Usage Examples
21//!
22//! ## Basic Error Handling
23//!
24//! ```ignore
25//! use perl_parser::{Parser, ParseError, ParseResult};
26//!
27//! fn parse_with_error_handling(code: &str) -> ParseResult<()> {
28//! let mut parser = Parser::new(code);
29//! match parser.parse() {
30//! Ok(ast) => {
31//! println!("Parsing successful");
32//! Ok(())
33//! }
34//! Err(ParseError::UnexpectedEof) => {
35//! eprintln!("Incomplete code: unexpected end of input");
36//! Err(ParseError::UnexpectedEof)
37//! }
38//! Err(ParseError::UnexpectedToken { found, expected, location }) => {
39//! eprintln!("Syntax error at position {}: found '{}', expected '{}'",
40//! location, found, expected);
41//! Err(ParseError::UnexpectedToken { found, expected, location })
42//! }
43//! Err(e) => {
44//! eprintln!("Parse error: {}", e);
45//! Err(e)
46//! }
47//! }
48//! }
49//! ```
50//!
51//! ## Error Recovery in LSP Context
52//!
53//! ```ignore
54//! use perl_parser::{Parser, ParseError, error_recovery::ErrorRecovery};
55//!
56//! fn parse_with_recovery(code: &str) -> Vec<String> {
57//! let mut parser = Parser::new(code);
58//! let mut errors = Vec::new();
59//!
60//! match parser.parse() {
61//! Ok(_) => println!("Parse successful"),
62//! Err(err) => {
63//! // Log error for diagnostics
64//! errors.push(format!("Parse error: {}", err));
65//!
66//! // Attempt error recovery for LSP
67//! match err {
68//! ParseError::UnexpectedToken { .. } => {
69//! // Continue parsing from next statement
70//! println!("Attempting recovery...");
71//! }
72//! ParseError::RecursionLimit => {
73//! // Use iterative parsing approach
74//! println!("Switching to iterative parsing...");
75//! }
76//! _ => {
77//! // Use fallback parsing strategy
78//! println!("Using fallback parsing...");
79//! }
80//! }
81//! }
82//! }
83//! errors
84//! }
85//! ```
86//!
87//! ## Comprehensive Error Context
88//!
89//! ```
90//! use perl_error::ParseError;
91//!
92//! fn create_detailed_error() -> ParseError {
93//! ParseError::UnexpectedToken {
94//! found: "number".to_string(),
95//! expected: "identifier".to_string(),
96//! location: 10, // byte position 10
97//! }
98//! }
99//!
100//! fn handle_error_with_context(error: &ParseError) {
101//! match error {
102//! ParseError::UnexpectedToken { found, expected, location } => {
103//! println!("Syntax error at byte position {}: found '{}', expected '{}'",
104//! location, found, expected);
105//! }
106//! ParseError::UnexpectedEof => {
107//! println!("Incomplete input: unexpected end of file");
108//! }
109//! _ => {
110//! println!("Parse error: {}", error);
111//! }
112//! }
113//! }
114//! ```
115
116use perl_position_tracking::LineIndex;
117use thiserror::Error;
118
119#[derive(Debug, Clone)]
120/// Rich error context with source line and fix suggestions
121pub struct ErrorContext {
122 /// The original parse error
123 pub error: ParseError,
124 /// Line number (0-indexed)
125 pub line: usize,
126 /// Column number (0-indexed)
127 pub column: usize,
128 /// The actual source line text
129 pub source_line: String,
130 /// Optional fix suggestion
131 pub suggestion: Option<String>,
132}
133
134impl From<perl_regex::RegexError> for ParseError {
135 fn from(err: perl_regex::RegexError) -> Self {
136 match err {
137 perl_regex::RegexError::Syntax { message, offset } => {
138 ParseError::syntax(message, offset)
139 }
140 }
141 }
142}
143
144/// Where in the parse tree a recovery was performed.
145///
146/// Used by [`ParseError::Recovered`] to describe the syntactic context in which
147/// the parser applied a recovery strategy. LSP providers use this to decide
148/// which features can still be offered after a recovery.
149#[derive(Debug, Clone, PartialEq)]
150pub enum RecoverySite {
151 /// Inside a parenthesised argument list `(...)`.
152 ArgList,
153 /// Inside an array subscript `[...]`.
154 ArraySubscript,
155 /// Inside a hash subscript `{...}`.
156 HashSubscript,
157 /// After a `->` dereference arrow (postfix chain).
158 PostfixChain,
159 /// After a binary infix operator (right-hand side missing).
160 InfixRhs,
161}
162
163/// What kind of recovery was applied at a [`RecoverySite`].
164///
165/// Pairs with [`RecoverySite`] in [`ParseError::Recovered`] to describe the
166/// exact repair the parser made. This information lets consumers (e.g. LSP
167/// providers) understand the confidence level of the resulting AST region.
168#[derive(Debug, Clone, PartialEq)]
169pub enum RecoveryKind {
170 /// A synthetic closing delimiter (`)` or `]`) was inferred.
171 InsertedCloser,
172 /// A [`NodeKind::MissingExpression`] placeholder was inserted.
173 MissingOperand,
174 /// A postfix chain was cut short due to a missing continuation.
175 TruncatedChain,
176 /// A statement boundary (`;`) was inferred from context.
177 InferredSemicolon,
178}
179
180/// Budget limits for parser operations to prevent runaway parsing.
181///
182/// These limits ensure the parser terminates in bounded time even when
183/// processing malformed or adversarial input. Each budget parameter has
184/// a sensible default that works for most real-world Perl code.
185///
186/// # Usage
187///
188/// ```
189/// use perl_error::ParseBudget;
190///
191/// // Use defaults for normal parsing
192/// let budget = ParseBudget::default();
193///
194/// // Stricter limits for untrusted input
195/// let strict = ParseBudget {
196/// max_errors: 10,
197/// max_depth: 64,
198/// max_tokens_skipped: 100,
199/// max_recoveries: 50,
200/// };
201/// ```
202#[derive(Debug, Clone, Copy, PartialEq, Eq)]
203pub struct ParseBudget {
204 /// Maximum number of errors to collect before giving up.
205 /// After this limit, parsing stops to avoid flooding diagnostics.
206 /// Default: 100
207 pub max_errors: usize,
208
209 /// Maximum nesting depth for recursive constructs (blocks, expressions).
210 /// Prevents stack overflow on deeply nested input.
211 /// Default: 256
212 pub max_depth: usize,
213
214 /// Maximum tokens to skip during a single recovery attempt.
215 /// Prevents infinite loops when recovery can't find a sync point.
216 /// Default: 1000
217 pub max_tokens_skipped: usize,
218
219 /// Maximum number of recovery attempts per parse.
220 /// Bounds total recovery work to prevent pathological cases.
221 /// Default: 500
222 pub max_recoveries: usize,
223}
224
225impl Default for ParseBudget {
226 fn default() -> Self {
227 Self { max_errors: 100, max_depth: 256, max_tokens_skipped: 1000, max_recoveries: 500 }
228 }
229}
230
231impl ParseBudget {
232 /// Create a budget suitable for IDE/LSP usage with generous limits.
233 pub fn for_ide() -> Self {
234 Self::default()
235 }
236
237 /// Create a strict budget for parsing untrusted input.
238 pub fn strict() -> Self {
239 Self { max_errors: 10, max_depth: 64, max_tokens_skipped: 100, max_recoveries: 50 }
240 }
241
242 /// Create an unlimited budget (use with caution).
243 pub fn unlimited() -> Self {
244 Self {
245 max_errors: usize::MAX,
246 max_depth: usize::MAX,
247 max_tokens_skipped: usize::MAX,
248 max_recoveries: usize::MAX,
249 }
250 }
251}
252
253/// Tracks budget consumption during parsing.
254///
255/// This struct monitors how much of the parse budget has been used
256/// and provides methods to check and consume budget atomically.
257#[derive(Debug, Clone, Default)]
258pub struct BudgetTracker {
259 /// Number of errors emitted so far.
260 pub errors_emitted: usize,
261 /// Current nesting depth.
262 pub current_depth: usize,
263 /// Maximum depth reached during parse.
264 pub max_depth_reached: usize,
265 /// Total tokens skipped across all recovery attempts.
266 pub tokens_skipped: usize,
267 /// Number of recovery attempts made.
268 pub recoveries_attempted: usize,
269}
270
271impl BudgetTracker {
272 /// Create a new budget tracker.
273 pub fn new() -> Self {
274 Self::default()
275 }
276
277 /// Check if error budget is exhausted.
278 pub fn errors_exhausted(&self, budget: &ParseBudget) -> bool {
279 self.errors_emitted >= budget.max_errors
280 }
281
282 /// Check if depth budget would be exceeded by going one level deeper.
283 pub fn depth_would_exceed(&self, budget: &ParseBudget) -> bool {
284 self.current_depth >= budget.max_depth
285 }
286
287 /// Check if skip budget would be exceeded by skipping `count` more tokens.
288 pub fn skip_would_exceed(&self, budget: &ParseBudget, count: usize) -> bool {
289 self.tokens_skipped.saturating_add(count) > budget.max_tokens_skipped
290 }
291
292 /// Check if recovery budget is exhausted.
293 pub fn recoveries_exhausted(&self, budget: &ParseBudget) -> bool {
294 self.recoveries_attempted >= budget.max_recoveries
295 }
296
297 /// Begin a recovery attempt, checking budget first.
298 ///
299 /// Returns `false` if another recovery attempt would exceed the budget.
300 /// If this returns `true`, the recovery attempt has been recorded.
301 pub fn begin_recovery(&mut self, budget: &ParseBudget) -> bool {
302 if self.recoveries_attempted >= budget.max_recoveries {
303 return false;
304 }
305 self.recoveries_attempted = self.recoveries_attempted.saturating_add(1);
306 true
307 }
308
309 /// Check if skipping `additional` more tokens would stay within budget.
310 ///
311 /// This considers both already-skipped tokens and the proposed additional count.
312 pub fn can_skip_more(&self, budget: &ParseBudget, additional: usize) -> bool {
313 self.tokens_skipped.saturating_add(additional) <= budget.max_tokens_skipped
314 }
315
316 /// Record an error emission.
317 pub fn record_error(&mut self) {
318 self.errors_emitted = self.errors_emitted.saturating_add(1);
319 }
320
321 /// Enter a deeper nesting level.
322 pub fn enter_depth(&mut self) {
323 self.current_depth = self.current_depth.saturating_add(1);
324 if self.current_depth > self.max_depth_reached {
325 self.max_depth_reached = self.current_depth;
326 }
327 }
328
329 /// Exit a nesting level.
330 pub fn exit_depth(&mut self) {
331 self.current_depth = self.current_depth.saturating_sub(1);
332 }
333
334 /// Record tokens skipped during recovery.
335 pub fn record_skip(&mut self, count: usize) {
336 self.tokens_skipped = self.tokens_skipped.saturating_add(count);
337 }
338
339 /// Record a recovery attempt.
340 pub fn record_recovery(&mut self) {
341 self.recoveries_attempted = self.recoveries_attempted.saturating_add(1);
342 }
343}
344
345/// Result type for parser operations in the Perl parsing workflow pipeline
346///
347/// This type encapsulates success/failure outcomes throughout the Parse → Index →
348/// Navigate → Complete → Analyze workflow, enabling consistent error propagation and recovery
349/// strategies across all pipeline stages.
350pub type ParseResult<T> = Result<T, ParseError>;
351
352#[derive(Error, Debug, Clone, PartialEq)]
353/// Comprehensive error types that can occur during Perl parsing workflows
354///
355/// These errors are designed to provide detailed context about parsing failures that occur during
356/// Perl code analysis, script processing, and metadata extraction. Each error variant includes
357/// location information to enable precise recovery strategies in large Perl file processing scenarios.
358///
359/// # Error Recovery Patterns
360///
361/// - **Syntax Errors**: Attempt fallback parsing or skip problematic content sections
362/// - **Lexer Errors**: Re-tokenize with relaxed rules or binary content detection
363/// - **Recursion Limits**: Flatten deeply nested structures or process iteratively
364/// - **String Handling**: Apply encoding detection and normalization workflows
365///
366/// # Enterprise Scale Considerations
367///
368/// Error handling is optimized for large Perl files and multi-file workspaces, ensuring
369/// memory-efficient error propagation and logging.
370pub enum ParseError {
371 /// Parser encountered unexpected end of input during Perl code analysis
372 ///
373 /// This occurs when processing truncated Perl scripts or incomplete Perl source during
374 /// the Parse stage. Recovery strategy: attempt partial parsing and preserve available content.
375 #[error("Unexpected end of input")]
376 UnexpectedEof,
377
378 /// Parser found an unexpected token during Perl parsing workflow
379 ///
380 /// Common during Analyze stage when Perl scripts contain syntax variations or encoding issues.
381 /// Recovery strategy: skip problematic tokens and attempt continued parsing with relaxed rules.
382 #[error("expected {expected}, found {found} at position {location}")]
383 UnexpectedToken {
384 /// Token type that was expected during Perl script parsing
385 expected: String,
386 /// Actual token found in Perl script content
387 found: String,
388 /// Byte position where unexpected token was encountered
389 location: usize,
390 },
391
392 /// General syntax error occurred during Perl code parsing
393 ///
394 /// This encompasses malformed Perl constructs found in Perl scripts during Navigate stage analysis.
395 /// Recovery strategy: isolate syntax error scope and continue processing surrounding content.
396 #[error("Invalid syntax at position {location}: {message}")]
397 SyntaxError {
398 /// Descriptive error message explaining the syntax issue
399 message: String,
400 /// Byte position where syntax error occurred in Perl script
401 location: usize,
402 },
403
404 /// Lexical analysis failure during Perl script tokenization
405 ///
406 /// Indicates character encoding issues or binary content mixed with text during Parse stage.
407 /// Recovery strategy: apply encoding detection and re-attempt tokenization with binary fallbacks.
408 #[error("Lexer error: {message}")]
409 LexerError {
410 /// Detailed lexer error message describing tokenization failure
411 message: String,
412 },
413
414 /// Parser recursion depth exceeded during complex Perl script analysis
415 ///
416 /// Occurs with deeply nested structures in Perl code during Complete stage processing.
417 /// Recovery strategy: flatten recursive structures and process iteratively to maintain performance.
418 #[error("Maximum recursion depth exceeded")]
419 RecursionLimit,
420
421 /// Invalid numeric literal found in Perl script content
422 ///
423 /// Common when processing malformed configuration values during Analyze stage analysis.
424 /// Recovery strategy: substitute default values and log for manual review.
425 #[error("Invalid number literal: {literal}")]
426 InvalidNumber {
427 /// The malformed numeric literal found in Perl script content
428 literal: String,
429 },
430
431 /// Malformed string literal in Perl parsing workflow
432 ///
433 /// Indicates quote mismatches or encoding issues in Perl script strings during parsing.
434 /// Recovery strategy: attempt string repair and normalization before re-parsing.
435 #[error("Invalid string literal")]
436 InvalidString,
437
438 /// Unclosed delimiter detected during Perl code parsing
439 ///
440 /// Commonly found in truncated or corrupted Perl script content during Parse stage.
441 /// Recovery strategy: auto-close delimiters and continue parsing with synthetic boundaries.
442 #[error("Unclosed delimiter: {delimiter}")]
443 UnclosedDelimiter {
444 /// The delimiter character that was left unclosed
445 delimiter: char,
446 },
447
448 /// Invalid regular expression syntax in Perl parsing workflow
449 ///
450 /// Occurs when parsing regex patterns in data filters during Navigate stage analysis.
451 /// Recovery strategy: fallback to literal string matching and preserve original pattern.
452 #[error("Invalid regex: {message}")]
453 InvalidRegex {
454 /// Specific error message describing regex syntax issue
455 message: String,
456 },
457
458 /// Nesting depth limit exceeded for recursive structures
459 #[error("Nesting depth limit exceeded: {depth} > {max_depth}")]
460 NestingTooDeep {
461 /// Current nesting depth
462 depth: usize,
463 /// Maximum allowed depth
464 max_depth: usize,
465 },
466
467 /// Parsing was cancelled by an external cancellation token
468 #[error("Parsing cancelled")]
469 Cancelled,
470
471 /// A syntax error was recovered from — parsing continued with a synthetic node.
472 ///
473 /// This variant is emitted alongside the partial AST node that was produced
474 /// by the recovery. LSP providers iterate `parser.errors()` and count
475 /// `Recovered` variants to determine confidence for gating features.
476 #[error("Recovered from {kind:?} at {site:?} (position {location})")]
477 Recovered {
478 /// Where in the parse tree the recovery occurred.
479 site: RecoverySite,
480 /// What kind of repair was applied.
481 kind: RecoveryKind,
482 /// Byte offset of the recovery point in the source.
483 location: usize,
484 },
485}
486
487/// Error classification and diagnostic generation for parsed Perl code.
488pub mod classifier;
489/// Error recovery strategies and traits for the Perl parser.
490pub mod recovery;
491
492use perl_ast::Node;
493
494/// Structured output from parsing, combining AST with all diagnostics.
495///
496/// This type replaces the simple `Result<Node, ParseError>` pattern to enable
497/// error recovery. Even when errors occur, parsing continues and produces a
498/// partial AST alongside collected diagnostics.
499///
500/// # Usage
501///
502/// ```ignore
503/// use perl_parser::{Parser, ParseOutput};
504///
505/// let mut parser = Parser::new("my $x = ;");
506/// let output = parser.parse_with_recovery();
507///
508/// // AST is always available (may contain error nodes)
509/// println!("Statements: {:?}", output.ast);
510///
511/// // Diagnostics are collected separately
512/// for error in &output.diagnostics {
513/// println!("Error: {}", error);
514/// }
515///
516/// // Budget tracking shows resource usage
517/// println!("Errors: {}", output.budget_usage.errors_emitted);
518/// ```
519#[derive(Debug, Clone)]
520pub struct ParseOutput {
521 /// The parsed AST. Always present, but may contain error nodes
522 /// if parsing encountered recoverable errors.
523 pub ast: Node,
524
525 /// All diagnostics (errors and warnings) collected during parsing.
526 /// These are ordered by source position.
527 pub diagnostics: Vec<ParseError>,
528
529 /// Budget consumption during this parse.
530 /// Useful for diagnosing pathological inputs.
531 pub budget_usage: BudgetTracker,
532
533 /// Whether parsing completed normally or was terminated early
534 /// due to budget exhaustion.
535 pub terminated_early: bool,
536
537 /// Number of recovery operations applied during this parse.
538 ///
539 /// Counts the [`ParseError::Recovered`] variants in `diagnostics`.
540 /// LSP providers use this as a confidence signal: `0` means a clean parse,
541 /// `> 0` means at least one synthetic repair was made.
542 pub recovered_count: usize,
543}
544
545impl ParseOutput {
546 /// Create a successful parse output with no errors.
547 pub fn success(ast: Node) -> Self {
548 Self {
549 ast,
550 diagnostics: Vec::new(),
551 budget_usage: BudgetTracker::new(),
552 terminated_early: false,
553 recovered_count: 0,
554 }
555 }
556
557 /// Create a parse output with errors.
558 ///
559 /// Note: This re-derives budget_usage from diagnostics count.
560 /// For accurate budget tracking, use `finish()` instead.
561 pub fn with_errors(ast: Node, diagnostics: Vec<ParseError>) -> Self {
562 let mut budget_usage = BudgetTracker::new();
563 budget_usage.errors_emitted = diagnostics.len();
564 let recovered_count =
565 diagnostics.iter().filter(|e| matches!(e, ParseError::Recovered { .. })).count();
566 Self { ast, diagnostics, budget_usage, terminated_early: false, recovered_count }
567 }
568
569 /// Create a parse output with full budget tracking.
570 ///
571 /// This is the preferred constructor when the actual BudgetTracker
572 /// from parsing is available, as it preserves accurate metrics.
573 pub fn finish(
574 ast: Node,
575 diagnostics: Vec<ParseError>,
576 budget_usage: BudgetTracker,
577 terminated_early: bool,
578 ) -> Self {
579 let recovered_count =
580 diagnostics.iter().filter(|e| matches!(e, ParseError::Recovered { .. })).count();
581 Self { ast, diagnostics, budget_usage, terminated_early, recovered_count }
582 }
583
584 /// Check if parse completed without any errors.
585 pub fn is_ok(&self) -> bool {
586 self.diagnostics.is_empty()
587 }
588
589 /// Check if parse had errors.
590 pub fn has_errors(&self) -> bool {
591 !self.diagnostics.is_empty()
592 }
593
594 /// Get the error count.
595 pub fn error_count(&self) -> usize {
596 self.diagnostics.len()
597 }
598}
599
600impl ParseError {
601 /// Create a new syntax error for Perl parsing workflow failures
602 ///
603 /// # Arguments
604 ///
605 /// * `message` - Descriptive error message with context about the syntax issue
606 /// * `location` - Character position within the Perl code where error occurred
607 ///
608 /// # Returns
609 ///
610 /// A [`ParseError::SyntaxError`] variant with embedded location context for recovery strategies
611 ///
612 /// # Examples
613 ///
614 /// ```rust
615 /// use perl_error::ParseError;
616 ///
617 /// let error = ParseError::syntax("Missing semicolon in Perl script", 42);
618 /// assert!(matches!(error, ParseError::SyntaxError { .. }));
619 /// ```
620 pub fn syntax(message: impl Into<String>, location: usize) -> Self {
621 ParseError::SyntaxError { message: message.into(), location }
622 }
623
624 /// Create a new unexpected token error during Perl script parsing
625 ///
626 /// # Arguments
627 ///
628 /// * `expected` - Token type that was expected by the parser
629 /// * `found` - Actual token type that was encountered
630 /// * `location` - Character position where the unexpected token was found
631 ///
632 /// # Returns
633 ///
634 /// A [`ParseError::UnexpectedToken`] variant with detailed token mismatch information
635 ///
636 /// # Examples
637 ///
638 /// ```rust
639 /// use perl_error::ParseError;
640 ///
641 /// let error = ParseError::unexpected("semicolon", "comma", 15);
642 /// assert!(matches!(error, ParseError::UnexpectedToken { .. }));
643 /// ```
644 ///
645 /// # Email Processing Context
646 ///
647 /// This is commonly used during the Analyze stage when Perl scripts contain
648 /// syntax variations that require token-level recovery strategies.
649 pub fn unexpected(
650 expected: impl Into<String>,
651 found: impl Into<String>,
652 location: usize,
653 ) -> Self {
654 ParseError::UnexpectedToken { expected: expected.into(), found: found.into(), location }
655 }
656
657 /// Get the byte location of the error if available
658 pub fn location(&self) -> Option<usize> {
659 match self {
660 ParseError::UnexpectedToken { location, .. } => Some(*location),
661 ParseError::SyntaxError { location, .. } => Some(*location),
662 ParseError::Recovered { location, .. } => Some(*location),
663 _ => None,
664 }
665 }
666
667 /// Generate a fix suggestion based on the error type
668 pub fn suggestion(&self) -> Option<String> {
669 match self {
670 ParseError::UnexpectedToken { expected, found, .. } => {
671 // Check for common missing delimiters
672 if expected.contains(';') {
673 return Some("add a semicolon ';' at the end of the statement".to_string());
674 }
675 if expected.contains('}') {
676 return Some("add a closing brace '}' to end the block".to_string());
677 }
678 if expected.contains(')') {
679 return Some("add a closing parenthesis ')' to end the group".to_string());
680 }
681 if expected.contains(']') {
682 return Some("add a closing bracket ']' to end the array".to_string());
683 }
684 // Fat arrow found where expression expected — likely a missing value
685 // before a hash pair separator
686 if expected.contains("expression") && found.contains("=>") {
687 return Some(
688 "'=>' (fat arrow) is not valid here; \
689 did you forget a value before it?"
690 .to_string(),
691 );
692 }
693 // Arrow found where expression expected
694 if expected.contains("expression") && found.contains("->") {
695 return Some(
696 "'->' (arrow) is not valid here; \
697 did you forget the object or reference before it?"
698 .to_string(),
699 );
700 }
701 // Expected a variable (e.g. after my/our/local/state)
702 if expected.to_lowercase().contains("variable") {
703 return Some(
704 "expected a variable like $foo, @bar, or %hash after the declaration keyword"
705 .to_string(),
706 );
707 }
708 None
709 }
710 ParseError::UnclosedDelimiter { delimiter } => {
711 Some(format!("add closing '{}' to complete the literal", delimiter))
712 }
713 _ => None,
714 }
715 }
716}
717
718/// Enrich a list of errors with source context
719pub fn get_error_contexts(errors: &[ParseError], source: &str) -> Vec<ErrorContext> {
720 let index = LineIndex::new(source.to_string());
721
722 errors
723 .iter()
724 .map(|error| {
725 let loc = error.location().unwrap_or(source.len());
726 // Handle EOF/out-of-bounds safely
727 let safe_loc = std::cmp::min(loc, source.len());
728
729 let (line_u32, col_u32) = index.offset_to_position(safe_loc);
730 let line = line_u32 as usize;
731 let col = col_u32 as usize;
732
733 let source_line = source.lines().nth(line).unwrap_or("").to_string();
734
735 ErrorContext {
736 error: error.clone(),
737 line,
738 column: col,
739 source_line,
740 suggestion: error.suggestion(),
741 }
742 })
743 .collect()
744}
745
746#[cfg(test)]
747mod tests {
748 use super::*;
749
750 #[test]
751 fn test_parse_budget_defaults() {
752 let budget = ParseBudget::default();
753 assert_eq!(budget.max_errors, 100);
754 assert_eq!(budget.max_depth, 256);
755 assert_eq!(budget.max_tokens_skipped, 1000);
756 assert_eq!(budget.max_recoveries, 500);
757 }
758
759 #[test]
760 fn test_parse_budget_strict() {
761 let budget = ParseBudget::strict();
762 assert_eq!(budget.max_errors, 10);
763 assert_eq!(budget.max_depth, 64);
764 assert_eq!(budget.max_tokens_skipped, 100);
765 assert_eq!(budget.max_recoveries, 50);
766 }
767
768 #[test]
769 fn test_budget_tracker_errors() {
770 let budget = ParseBudget { max_errors: 3, ..Default::default() };
771 let mut tracker = BudgetTracker::new();
772
773 assert!(!tracker.errors_exhausted(&budget));
774
775 tracker.record_error();
776 tracker.record_error();
777 assert!(!tracker.errors_exhausted(&budget));
778
779 tracker.record_error();
780 assert!(tracker.errors_exhausted(&budget));
781 }
782
783 #[test]
784 fn test_budget_tracker_depth() {
785 let budget = ParseBudget { max_depth: 2, ..Default::default() };
786 let mut tracker = BudgetTracker::new();
787
788 assert!(!tracker.depth_would_exceed(&budget));
789
790 tracker.enter_depth();
791 assert!(!tracker.depth_would_exceed(&budget));
792
793 tracker.enter_depth();
794 assert!(tracker.depth_would_exceed(&budget));
795
796 tracker.exit_depth();
797 assert!(!tracker.depth_would_exceed(&budget));
798 }
799
800 #[test]
801 fn test_budget_tracker_skip() {
802 let budget = ParseBudget { max_tokens_skipped: 5, ..Default::default() };
803 let mut tracker = BudgetTracker::new();
804
805 assert!(!tracker.skip_would_exceed(&budget, 3));
806 tracker.record_skip(3);
807
808 assert!(!tracker.skip_would_exceed(&budget, 2));
809 assert!(tracker.skip_would_exceed(&budget, 3));
810 }
811
812 #[test]
813 fn test_budget_tracker_recoveries() {
814 let budget = ParseBudget { max_recoveries: 2, ..Default::default() };
815 let mut tracker = BudgetTracker::new();
816
817 assert!(!tracker.recoveries_exhausted(&budget));
818
819 tracker.record_recovery();
820 assert!(!tracker.recoveries_exhausted(&budget));
821
822 tracker.record_recovery();
823 assert!(tracker.recoveries_exhausted(&budget));
824 }
825
826 #[test]
827 fn test_parse_output_success() {
828 use perl_ast::{Node, NodeKind, SourceLocation};
829
830 let ast = Node::new(
831 NodeKind::Program { statements: vec![] },
832 SourceLocation { start: 0, end: 0 },
833 );
834 let output = ParseOutput::success(ast);
835
836 assert!(output.is_ok());
837 assert!(!output.has_errors());
838 assert_eq!(output.error_count(), 0);
839 assert!(!output.terminated_early);
840 }
841
842 #[test]
843 fn test_parse_output_with_errors() {
844 use perl_ast::{Node, NodeKind, SourceLocation};
845
846 let ast = Node::new(
847 NodeKind::Program { statements: vec![] },
848 SourceLocation { start: 0, end: 0 },
849 );
850 let errors = vec![ParseError::syntax("error 1", 0), ParseError::syntax("error 2", 5)];
851 let output = ParseOutput::with_errors(ast, errors);
852
853 assert!(!output.is_ok());
854 assert!(output.has_errors());
855 assert_eq!(output.error_count(), 2);
856 }
857
858 #[test]
859 fn test_parse_output_finish_preserves_tracker() {
860 use perl_ast::{Node, NodeKind, SourceLocation};
861
862 let ast = Node::new(
863 NodeKind::Program { statements: vec![] },
864 SourceLocation { start: 0, end: 0 },
865 );
866 let errors = vec![ParseError::syntax("error 1", 0)];
867
868 // Create a tracker with specific values
869 let mut tracker = BudgetTracker::new();
870 tracker.errors_emitted = 5;
871 tracker.tokens_skipped = 42;
872 tracker.recoveries_attempted = 3;
873 tracker.max_depth_reached = 10;
874
875 let output = ParseOutput::finish(ast, errors, tracker, true);
876
877 // Verify all tracker values are preserved
878 assert_eq!(output.budget_usage.errors_emitted, 5);
879 assert_eq!(output.budget_usage.tokens_skipped, 42);
880 assert_eq!(output.budget_usage.recoveries_attempted, 3);
881 assert_eq!(output.budget_usage.max_depth_reached, 10);
882 assert!(output.terminated_early);
883 assert_eq!(output.error_count(), 1);
884 }
885
886 #[test]
887 fn test_begin_recovery_checks_budget_first() {
888 let budget = ParseBudget { max_recoveries: 0, ..Default::default() };
889 let mut tracker = BudgetTracker::new();
890
891 // Should fail immediately - budget is 0
892 assert!(!tracker.begin_recovery(&budget));
893 assert_eq!(tracker.recoveries_attempted, 0);
894 }
895
896 #[test]
897 fn test_can_skip_more_boundary_conditions() {
898 let budget = ParseBudget { max_tokens_skipped: 10, ..Default::default() };
899 let mut tracker = BudgetTracker::new();
900
901 // At 0 skipped, can skip up to 10
902 assert!(tracker.can_skip_more(&budget, 10));
903 assert!(!tracker.can_skip_more(&budget, 11));
904
905 // Skip 5
906 tracker.record_skip(5);
907
908 // At 5 skipped, can skip up to 5 more
909 assert!(tracker.can_skip_more(&budget, 5));
910 assert!(!tracker.can_skip_more(&budget, 6));
911
912 // Skip 5 more to reach limit
913 tracker.record_skip(5);
914
915 // At limit, cannot skip any more
916 assert!(!tracker.can_skip_more(&budget, 1));
917 assert!(tracker.can_skip_more(&budget, 0));
918 }
919
920 #[test]
921 fn test_error_context_enrichment() {
922 let source = "line1\nline2;\nline3";
923 // 'e' of line1 is at 4. 5 is newline.
924 let errors = vec![ParseError::unexpected("';'", "newline", 5)];
925
926 let contexts = get_error_contexts(&errors, source);
927 assert_eq!(contexts.len(), 1);
928 assert_eq!(contexts[0].line, 0); // line1 is line 0
929 assert_eq!(contexts[0].source_line, "line1");
930 let suggestion = contexts[0].suggestion.as_deref().unwrap_or("");
931 assert!(suggestion.contains("semicolon"));
932 }
933
934 #[test]
935 fn test_recovery_site_and_kind_variants() {
936 // Verify all RecoverySite and RecoveryKind variants are constructible and comparable.
937 let sites = [
938 RecoverySite::ArgList,
939 RecoverySite::ArraySubscript,
940 RecoverySite::HashSubscript,
941 RecoverySite::PostfixChain,
942 RecoverySite::InfixRhs,
943 ];
944 let kinds = [
945 RecoveryKind::InsertedCloser,
946 RecoveryKind::MissingOperand,
947 RecoveryKind::TruncatedChain,
948 RecoveryKind::InferredSemicolon,
949 ];
950 // Each site and kind is debug-formattable and clone-able.
951 for s in &sites {
952 let _ = format!("{s:?}");
953 let _ = s.clone();
954 }
955 for k in &kinds {
956 let _ = format!("{k:?}");
957 let _ = k.clone();
958 }
959 // PartialEq works.
960 assert_eq!(RecoverySite::ArgList, RecoverySite::ArgList);
961 assert_ne!(RecoverySite::ArgList, RecoverySite::PostfixChain);
962 assert_eq!(RecoveryKind::InsertedCloser, RecoveryKind::InsertedCloser);
963 assert_ne!(RecoveryKind::InsertedCloser, RecoveryKind::MissingOperand);
964 }
965
966 #[test]
967 fn test_parse_error_recovered_variant() {
968 let err = ParseError::Recovered {
969 site: RecoverySite::ArgList,
970 kind: RecoveryKind::InsertedCloser,
971 location: 42,
972 };
973 // location() returns Some for Recovered variant.
974 assert_eq!(err.location(), Some(42));
975 // suggestion() returns None for Recovered.
976 assert!(err.suggestion().is_none());
977 // Display works (via thiserror).
978 let s = format!("{err}");
979 assert!(s.contains("Recovered") || s.contains("position 42"));
980 }
981
982 #[test]
983 fn test_parse_output_recovered_count_with_errors() {
984 use perl_ast::{Node, NodeKind, SourceLocation};
985
986 let ast = Node::new(
987 NodeKind::Program { statements: vec![] },
988 SourceLocation { start: 0, end: 0 },
989 );
990 let errors = vec![
991 ParseError::syntax("error 1", 0),
992 ParseError::Recovered {
993 site: RecoverySite::ArgList,
994 kind: RecoveryKind::MissingOperand,
995 location: 10,
996 },
997 ParseError::Recovered {
998 site: RecoverySite::PostfixChain,
999 kind: RecoveryKind::TruncatedChain,
1000 location: 20,
1001 },
1002 ];
1003 let output = ParseOutput::with_errors(ast, errors);
1004
1005 assert_eq!(output.error_count(), 3);
1006 assert_eq!(output.recovered_count, 2);
1007 }
1008
1009 #[test]
1010 fn test_parse_output_success_has_zero_recovered_count() {
1011 use perl_ast::{Node, NodeKind, SourceLocation};
1012
1013 let ast = Node::new(
1014 NodeKind::Program { statements: vec![] },
1015 SourceLocation { start: 0, end: 0 },
1016 );
1017 let output = ParseOutput::success(ast);
1018 assert_eq!(output.recovered_count, 0);
1019 }
1020
1021 #[test]
1022 fn test_parse_output_finish_recovered_count() {
1023 use perl_ast::{Node, NodeKind, SourceLocation};
1024
1025 let ast = Node::new(
1026 NodeKind::Program { statements: vec![] },
1027 SourceLocation { start: 0, end: 0 },
1028 );
1029 let errors = vec![
1030 ParseError::syntax("error", 0),
1031 ParseError::Recovered {
1032 site: RecoverySite::InfixRhs,
1033 kind: RecoveryKind::InferredSemicolon,
1034 location: 5,
1035 },
1036 ];
1037 let tracker = BudgetTracker::new();
1038 let output = ParseOutput::finish(ast, errors, tracker, false);
1039
1040 assert_eq!(output.recovered_count, 1);
1041 assert!(!output.terminated_early);
1042 }
1043}