perl_parser_core/syntax/error/mod.rs
1//! Error types for the Perl parser within the Perl parsing workflow pipeline
2//!
3//! This module defines comprehensive error handling for Perl parsing operations that occur
4//! throughout the Perl parsing workflow workflow: Parse → Index → Navigate → Complete → Analyze.
5//!
6//! # Error Recovery Strategy
7//!
8//! When parsing errors occur during Perl parsing:
9//! 1. **Parse stage**: Parsing failures indicate corrupted or malformed Perl source
10//! 2. **Analyze stage**: Syntax errors suggest script inconsistencies requiring fallback processing
11//! 3. **Navigate stage**: Parse failures can break thread analysis - graceful degradation applies
12//! 4. **Complete stage**: Errors impact output generation but preserve original content
13//! 5. **Analyze stage**: Parse failures affect search indexing but maintain basic metadata
14//!
15//! # Performance Context
16//!
17//! Error handling is optimized for large Perl codebase processing scenarios with minimal memory overhead
18//! and fast recovery paths to maintain enterprise-scale performance targets.
19//!
20//! # Usage Examples
21//!
22//! ## Basic Error Handling
23//!
24//! ```ignore
25//! use perl_parser::{Parser, ParseError, ParseResult};
26//!
27//! fn parse_with_error_handling(code: &str) -> ParseResult<()> {
28//! let mut parser = Parser::new(code);
29//! match parser.parse() {
30//! Ok(ast) => {
31//! println!("Parsing successful");
32//! Ok(())
33//! }
34//! Err(ParseError::UnexpectedEof) => {
35//! eprintln!("Incomplete code: unexpected end of input");
36//! Err(ParseError::UnexpectedEof)
37//! }
38//! Err(ParseError::UnexpectedToken { found, expected, location }) => {
39//! eprintln!("Syntax error at position {}: found '{}', expected '{}'",
40//! location, found, expected);
41//! Err(ParseError::UnexpectedToken { found, expected, location })
42//! }
43//! Err(e) => {
44//! eprintln!("Parse error: {}", e);
45//! Err(e)
46//! }
47//! }
48//! }
49//! ```
50//!
51//! ## Error Recovery in LSP Context
52//!
53//! ```ignore
54//! use perl_parser::{Parser, ParseError, error_recovery::ErrorRecovery};
55//!
56//! fn parse_with_recovery(code: &str) -> Vec<String> {
57//! let mut parser = Parser::new(code);
58//! let mut errors = Vec::new();
59//!
60//! match parser.parse() {
61//! Ok(_) => println!("Parse successful"),
62//! Err(err) => {
63//! // Log error for diagnostics
64//! errors.push(format!("Parse error: {}", err));
65//!
66//! // Attempt error recovery for LSP
67//! match err {
68//! ParseError::UnexpectedToken { .. } => {
69//! // Continue parsing from next statement
70//! println!("Attempting recovery...");
71//! }
72//! ParseError::RecursionLimit => {
73//! // Use iterative parsing approach
74//! println!("Switching to iterative parsing...");
75//! }
76//! _ => {
77//! // Use fallback parsing strategy
78//! println!("Using fallback parsing...");
79//! }
80//! }
81//! }
82//! }
83//! errors
84//! }
85//! ```
86//!
87//! ## Comprehensive Error Context
88//!
89//! ```
90//! use perl_error::ParseError;
91//!
92//! fn create_detailed_error() -> ParseError {
93//! ParseError::UnexpectedToken {
94//! found: "number".to_string(),
95//! expected: "identifier".to_string(),
96//! location: 10, // byte position 10
97//! }
98//! }
99//!
100//! fn handle_error_with_context(error: &ParseError) {
101//! match error {
102//! ParseError::UnexpectedToken { found, expected, location } => {
103//! println!("Syntax error at byte position {}: found '{}', expected '{}'",
104//! location, found, expected);
105//! }
106//! ParseError::UnexpectedEof => {
107//! println!("Incomplete input: unexpected end of file");
108//! }
109//! _ => {
110//! println!("Parse error: {}", error);
111//! }
112//! }
113//! }
114//! ```
115
116use perl_position_tracking::LineIndex;
117use thiserror::Error;
118
119#[derive(Debug, Clone)]
120/// Rich error context with source line and fix suggestions
121pub struct ErrorContext {
122 /// The original parse error
123 pub error: ParseError,
124 /// Line number (0-indexed)
125 pub line: usize,
126 /// Column number (0-indexed)
127 pub column: usize,
128 /// The actual source line text
129 pub source_line: String,
130 /// Optional fix suggestion
131 pub suggestion: Option<String>,
132}
133
134impl From<perl_regex::RegexError> for ParseError {
135 fn from(err: perl_regex::RegexError) -> Self {
136 match err {
137 perl_regex::RegexError::Syntax { message, offset } => {
138 ParseError::syntax(message, offset)
139 }
140 }
141 }
142}
143
144/// Where in the parse tree a recovery was performed.
145///
146/// Used by [`ParseError::Recovered`] to describe the syntactic context in which
147/// the parser applied a recovery strategy. LSP providers use this to decide
148/// which features can still be offered after a recovery.
149#[derive(Debug, Clone, PartialEq)]
150pub enum RecoverySite {
151 /// Inside a parenthesised argument list `(...)`.
152 ArgList,
153 /// Inside an array subscript `[...]`.
154 ArraySubscript,
155 /// Inside a hash subscript `{...}`.
156 HashSubscript,
157 /// After a `->` dereference arrow (postfix chain).
158 PostfixChain,
159 /// After a binary infix operator (right-hand side missing).
160 InfixRhs,
161}
162
163/// What kind of recovery was applied at a [`RecoverySite`].
164///
165/// Pairs with [`RecoverySite`] in [`ParseError::Recovered`] to describe the
166/// exact repair the parser made. This information lets consumers (e.g. LSP
167/// providers) understand the confidence level of the resulting AST region.
168#[derive(Debug, Clone, PartialEq)]
169pub enum RecoveryKind {
170 /// A synthetic closing delimiter (`)` or `]`) was inferred.
171 InsertedCloser,
172 /// A [`NodeKind::MissingExpression`] placeholder was inserted.
173 MissingOperand,
174 /// A postfix chain was cut short due to a missing continuation.
175 TruncatedChain,
176 /// A statement boundary (`;`) was inferred from context.
177 InferredSemicolon,
178}
179
180/// Budget limits for parser operations to prevent runaway parsing.
181///
182/// These limits ensure the parser terminates in bounded time even when
183/// processing malformed or adversarial input. Each budget parameter has
184/// a sensible default that works for most real-world Perl code.
185///
186/// # Usage
187///
188/// ```
189/// use perl_error::ParseBudget;
190///
191/// // Use defaults for normal parsing
192/// let budget = ParseBudget::default();
193///
194/// // Stricter limits for untrusted input
195/// let strict = ParseBudget {
196/// max_errors: 10,
197/// max_depth: 64,
198/// max_tokens_skipped: 100,
199/// max_recoveries: 50,
200/// };
201/// ```
202#[derive(Debug, Clone, Copy, PartialEq, Eq)]
203pub struct ParseBudget {
204 /// Maximum number of errors to collect before giving up.
205 /// After this limit, parsing stops to avoid flooding diagnostics.
206 /// Default: 100
207 pub max_errors: usize,
208
209 /// Maximum nesting depth for recursive constructs (blocks, expressions).
210 /// Prevents stack overflow on deeply nested input.
211 /// Default: 256
212 pub max_depth: usize,
213
214 /// Maximum tokens to skip during a single recovery attempt.
215 /// Prevents infinite loops when recovery can't find a sync point.
216 /// Default: 1000
217 pub max_tokens_skipped: usize,
218
219 /// Maximum number of recovery attempts per parse.
220 /// Bounds total recovery work to prevent pathological cases.
221 /// Default: 500
222 pub max_recoveries: usize,
223}
224
225impl Default for ParseBudget {
226 fn default() -> Self {
227 Self { max_errors: 100, max_depth: 256, max_tokens_skipped: 1000, max_recoveries: 500 }
228 }
229}
230
231impl ParseBudget {
232 /// Create a budget suitable for IDE/LSP usage with generous limits.
233 pub fn for_ide() -> Self {
234 Self::default()
235 }
236
237 /// Create a strict budget for parsing untrusted input.
238 pub fn strict() -> Self {
239 Self { max_errors: 10, max_depth: 64, max_tokens_skipped: 100, max_recoveries: 50 }
240 }
241
242 /// Create an unlimited budget (use with caution).
243 pub fn unlimited() -> Self {
244 Self {
245 max_errors: usize::MAX,
246 max_depth: usize::MAX,
247 max_tokens_skipped: usize::MAX,
248 max_recoveries: usize::MAX,
249 }
250 }
251}
252
253/// Tracks budget consumption during parsing.
254///
255/// This struct monitors how much of the parse budget has been used
256/// and provides methods to check and consume budget atomically.
257#[derive(Debug, Clone, Default)]
258pub struct BudgetTracker {
259 /// Number of errors emitted so far.
260 pub errors_emitted: usize,
261 /// Current nesting depth.
262 pub current_depth: usize,
263 /// Maximum depth reached during parse.
264 pub max_depth_reached: usize,
265 /// Total tokens skipped across all recovery attempts.
266 pub tokens_skipped: usize,
267 /// Number of recovery attempts made.
268 pub recoveries_attempted: usize,
269}
270
271impl BudgetTracker {
272 /// Create a new budget tracker.
273 pub fn new() -> Self {
274 Self::default()
275 }
276
277 /// Check if error budget is exhausted.
278 pub fn errors_exhausted(&self, budget: &ParseBudget) -> bool {
279 self.errors_emitted >= budget.max_errors
280 }
281
282 /// Check if depth budget would be exceeded by going one level deeper.
283 pub fn depth_would_exceed(&self, budget: &ParseBudget) -> bool {
284 self.current_depth >= budget.max_depth
285 }
286
287 /// Check if skip budget would be exceeded by skipping `count` more tokens.
288 pub fn skip_would_exceed(&self, budget: &ParseBudget, count: usize) -> bool {
289 self.tokens_skipped.saturating_add(count) > budget.max_tokens_skipped
290 }
291
292 /// Check if recovery budget is exhausted.
293 pub fn recoveries_exhausted(&self, budget: &ParseBudget) -> bool {
294 self.recoveries_attempted >= budget.max_recoveries
295 }
296
297 /// Begin a recovery attempt, checking budget first.
298 ///
299 /// Returns `false` if another recovery attempt would exceed the budget.
300 /// If this returns `true`, the recovery attempt has been recorded.
301 pub fn begin_recovery(&mut self, budget: &ParseBudget) -> bool {
302 if self.recoveries_attempted >= budget.max_recoveries {
303 return false;
304 }
305 self.recoveries_attempted = self.recoveries_attempted.saturating_add(1);
306 true
307 }
308
309 /// Check if skipping `additional` more tokens would stay within budget.
310 ///
311 /// This considers both already-skipped tokens and the proposed additional count.
312 pub fn can_skip_more(&self, budget: &ParseBudget, additional: usize) -> bool {
313 self.tokens_skipped.saturating_add(additional) <= budget.max_tokens_skipped
314 }
315
316 /// Record an error emission.
317 pub fn record_error(&mut self) {
318 self.errors_emitted = self.errors_emitted.saturating_add(1);
319 }
320
321 /// Enter a deeper nesting level.
322 pub fn enter_depth(&mut self) {
323 self.current_depth = self.current_depth.saturating_add(1);
324 if self.current_depth > self.max_depth_reached {
325 self.max_depth_reached = self.current_depth;
326 }
327 }
328
329 /// Exit a nesting level.
330 pub fn exit_depth(&mut self) {
331 self.current_depth = self.current_depth.saturating_sub(1);
332 }
333
334 /// Record tokens skipped during recovery.
335 pub fn record_skip(&mut self, count: usize) {
336 self.tokens_skipped = self.tokens_skipped.saturating_add(count);
337 }
338
339 /// Record a recovery attempt.
340 pub fn record_recovery(&mut self) {
341 self.recoveries_attempted = self.recoveries_attempted.saturating_add(1);
342 }
343}
344
345/// Result type for parser operations in the Perl parsing workflow pipeline
346///
347/// This type encapsulates success/failure outcomes throughout the Parse → Index →
348/// Navigate → Complete → Analyze workflow, enabling consistent error propagation and recovery
349/// strategies across all pipeline stages.
350pub type ParseResult<T> = Result<T, ParseError>;
351
352#[derive(Error, Debug, Clone, PartialEq)]
353/// Comprehensive error types that can occur during Perl parsing workflows
354///
355/// These errors are designed to provide detailed context about parsing failures that occur during
356/// Perl code analysis, script processing, and metadata extraction. Each error variant includes
357/// location information to enable precise recovery strategies in large Perl file processing scenarios.
358///
359/// # Error Recovery Patterns
360///
361/// - **Syntax Errors**: Attempt fallback parsing or skip problematic content sections
362/// - **Lexer Errors**: Re-tokenize with relaxed rules or binary content detection
363/// - **Recursion Limits**: Flatten deeply nested structures or process iteratively
364/// - **String Handling**: Apply encoding detection and normalization workflows
365///
366/// # Enterprise Scale Considerations
367///
368/// Error handling is optimized for large Perl files and multi-file workspaces, ensuring
369/// memory-efficient error propagation and logging.
370pub enum ParseError {
371 /// Parser encountered unexpected end of input during Perl code analysis
372 ///
373 /// This occurs when processing truncated Perl scripts or incomplete Perl source during
374 /// the Parse stage. Recovery strategy: attempt partial parsing and preserve available content.
375 #[error("Unexpected end of input")]
376 UnexpectedEof,
377
378 /// Parser found an unexpected token during Perl parsing workflow
379 ///
380 /// Common during Analyze stage when Perl scripts contain syntax variations or encoding issues.
381 /// Recovery strategy: skip problematic tokens and attempt continued parsing with relaxed rules.
382 #[error("expected {expected}, found {found} at position {location}")]
383 UnexpectedToken {
384 /// Token type that was expected during Perl script parsing
385 expected: String,
386 /// Actual token found in Perl script content
387 found: String,
388 /// Byte position where unexpected token was encountered
389 location: usize,
390 },
391
392 /// General syntax error occurred during Perl code parsing
393 ///
394 /// This encompasses malformed Perl constructs found in Perl scripts during Navigate stage analysis.
395 /// Recovery strategy: isolate syntax error scope and continue processing surrounding content.
396 #[error("Invalid syntax at position {location}: {message}")]
397 SyntaxError {
398 /// Descriptive error message explaining the syntax issue
399 message: String,
400 /// Byte position where syntax error occurred in Perl script
401 location: usize,
402 },
403
404 /// Lexical analysis failure during Perl script tokenization
405 ///
406 /// Indicates character encoding issues or binary content mixed with text during Parse stage.
407 /// Recovery strategy: apply encoding detection and re-attempt tokenization with binary fallbacks.
408 #[error("Lexer error: {message}")]
409 LexerError {
410 /// Detailed lexer error message describing tokenization failure
411 message: String,
412 },
413
414 /// Parser recursion depth exceeded during complex Perl script analysis
415 ///
416 /// Occurs with deeply nested structures in Perl code during Complete stage processing.
417 /// Recovery strategy: flatten recursive structures and process iteratively to maintain performance.
418 #[error("Maximum recursion depth exceeded")]
419 RecursionLimit,
420
421 /// Invalid numeric literal found in Perl script content
422 ///
423 /// Common when processing malformed configuration values during Analyze stage analysis.
424 /// Recovery strategy: substitute default values and log for manual review.
425 #[error("Invalid number literal: {literal}")]
426 InvalidNumber {
427 /// The malformed numeric literal found in Perl script content
428 literal: String,
429 },
430
431 /// Malformed string literal in Perl parsing workflow
432 ///
433 /// Indicates quote mismatches or encoding issues in Perl script strings during parsing.
434 /// Recovery strategy: attempt string repair and normalization before re-parsing.
435 #[error("Invalid string literal")]
436 InvalidString,
437
438 /// Unclosed delimiter detected during Perl code parsing
439 ///
440 /// Commonly found in truncated or corrupted Perl script content during Parse stage.
441 /// Recovery strategy: auto-close delimiters and continue parsing with synthetic boundaries.
442 #[error("Unclosed delimiter: {delimiter}")]
443 UnclosedDelimiter {
444 /// The delimiter character that was left unclosed
445 delimiter: char,
446 },
447
448 /// Invalid regular expression syntax in Perl parsing workflow
449 ///
450 /// Occurs when parsing regex patterns in data filters during Navigate stage analysis.
451 /// Recovery strategy: fallback to literal string matching and preserve original pattern.
452 #[error("Invalid regex: {message}")]
453 InvalidRegex {
454 /// Specific error message describing regex syntax issue
455 message: String,
456 },
457
458 /// Nesting depth limit exceeded for recursive structures
459 #[error("Nesting depth limit exceeded: {depth} > {max_depth}")]
460 NestingTooDeep {
461 /// Current nesting depth
462 depth: usize,
463 /// Maximum allowed depth
464 max_depth: usize,
465 },
466
467 /// Parsing was cancelled by an external cancellation token
468 #[error("Parsing cancelled")]
469 Cancelled,
470
471 /// A syntax error was recovered from — parsing continued with a synthetic node.
472 ///
473 /// This variant is emitted alongside the partial AST node that was produced
474 /// by the recovery. LSP providers iterate `parser.errors()` and count
475 /// `Recovered` variants to determine confidence for gating features.
476 #[error("Recovered from {kind:?} at {site:?} (position {location})")]
477 Recovered {
478 /// Where in the parse tree the recovery occurred.
479 site: RecoverySite,
480 /// What kind of repair was applied.
481 kind: RecoveryKind,
482 /// Byte offset of the recovery point in the source.
483 location: usize,
484 },
485}
486
487/// Error classification and diagnostic generation for parsed Perl code.
488pub mod classifier;
489/// Error recovery strategies and traits for the Perl parser.
490pub mod recovery;
491
492use perl_ast::Node;
493
494/// Structured output from parsing, combining AST with all diagnostics.
495///
496/// This type replaces the simple `Result<Node, ParseError>` pattern to enable
497/// error recovery. Even when errors occur, parsing continues and produces a
498/// partial AST alongside collected diagnostics.
499///
500/// # Usage
501///
502/// ```ignore
503/// use perl_parser::{Parser, ParseOutput};
504///
505/// let mut parser = Parser::new("my $x = ;");
506/// let output = parser.parse_with_recovery();
507///
508/// // AST is always available (may contain error nodes)
509/// println!("Statements: {:?}", output.ast);
510///
511/// // Diagnostics are collected separately
512/// for error in &output.diagnostics {
513/// println!("Error: {}", error);
514/// }
515///
516/// // Budget tracking shows resource usage
517/// println!("Errors: {}", output.budget_usage.errors_emitted);
518/// ```
519#[derive(Debug, Clone)]
520pub struct ParseOutput {
521 /// The parsed AST. Always present, but may contain error nodes
522 /// if parsing encountered recoverable errors.
523 pub ast: Node,
524
525 /// All diagnostics (errors and warnings) collected during parsing.
526 /// These are ordered by source position.
527 pub diagnostics: Vec<ParseError>,
528
529 /// Budget consumption during this parse.
530 /// Useful for diagnosing pathological inputs.
531 pub budget_usage: BudgetTracker,
532
533 /// Whether parsing completed normally or was terminated early
534 /// due to budget exhaustion.
535 pub terminated_early: bool,
536
537 /// Number of recovery operations applied during this parse.
538 ///
539 /// Counts the [`ParseError::Recovered`] variants in `diagnostics`.
540 /// LSP providers use this as a confidence signal: `0` means a clean parse,
541 /// `> 0` means at least one synthetic repair was made.
542 pub recovered_count: usize,
543}
544
545/// Closeout classification for a parsed file.
546///
547/// Used by corpus-level reporting to distinguish successful structured
548/// recovery from unrecovered parser damage and catastrophic failures.
549#[derive(Debug, Clone, Copy, PartialEq, Eq)]
550pub enum RecoverySalvageClass {
551 /// No diagnostics and no `ERROR` AST nodes.
552 Clean,
553 /// Only structured recovery diagnostics were emitted; no `ERROR` nodes.
554 StructuredRecoveryOnly,
555 /// Parse produced one or more `ERROR` AST nodes.
556 ErrorNodesPresent,
557 /// Parse failed catastrophically (`parse()` returned `Err`).
558 CatastrophicFailure,
559}
560
561/// Per-file recovery/salvage summary.
562#[derive(Debug, Clone, PartialEq, Eq)]
563pub struct RecoverySalvageProfile {
564 /// Whether this parse was a catastrophic failure.
565 pub catastrophic: bool,
566 /// Number of `ParseError::Recovered` diagnostics observed.
567 pub recovered_count: usize,
568 /// Number of `NodeKind::Error` nodes observed in the AST.
569 pub error_node_count: usize,
570 /// Message from the earliest unrecovered `ERROR` node, if any.
571 pub first_unrecovered_error_node: Option<String>,
572 /// Coarse classification used by corpus closeout reports.
573 pub class: RecoverySalvageClass,
574}
575
576impl RecoverySalvageProfile {
577 /// Build a recovery/salvage profile for one parsed file.
578 pub fn from_parse(ast: &Node, diagnostics: &[ParseError], catastrophic: bool) -> Self {
579 let mut error_node_count = 0usize;
580 let mut first_start = usize::MAX;
581 let mut first_unrecovered_error_node: Option<String> = None;
582
583 fn walk(
584 node: &Node,
585 error_node_count: &mut usize,
586 first_start: &mut usize,
587 first_unrecovered_error_node: &mut Option<String>,
588 ) {
589 if let perl_ast::NodeKind::Error { message, .. } = &node.kind {
590 *error_node_count = error_node_count.saturating_add(1);
591 if node.location.start < *first_start {
592 *first_start = node.location.start;
593 *first_unrecovered_error_node = Some(message.clone());
594 }
595 }
596 node.for_each_child(|child| {
597 walk(child, error_node_count, first_start, first_unrecovered_error_node);
598 });
599 }
600 walk(ast, &mut error_node_count, &mut first_start, &mut first_unrecovered_error_node);
601
602 let recovered_count =
603 diagnostics.iter().filter(|e| matches!(e, ParseError::Recovered { .. })).count();
604
605 let class = if catastrophic {
606 RecoverySalvageClass::CatastrophicFailure
607 } else if error_node_count > 0 {
608 RecoverySalvageClass::ErrorNodesPresent
609 } else if recovered_count > 0 {
610 RecoverySalvageClass::StructuredRecoveryOnly
611 } else {
612 RecoverySalvageClass::Clean
613 };
614
615 Self {
616 catastrophic,
617 recovered_count,
618 error_node_count,
619 first_unrecovered_error_node,
620 class,
621 }
622 }
623}
624
625impl ParseOutput {
626 /// Create a successful parse output with no errors.
627 pub fn success(ast: Node) -> Self {
628 Self {
629 ast,
630 diagnostics: Vec::new(),
631 budget_usage: BudgetTracker::new(),
632 terminated_early: false,
633 recovered_count: 0,
634 }
635 }
636
637 /// Create a parse output with errors.
638 ///
639 /// Note: This re-derives budget_usage from diagnostics count.
640 /// For accurate budget tracking, use `finish()` instead.
641 pub fn with_errors(ast: Node, diagnostics: Vec<ParseError>) -> Self {
642 let mut budget_usage = BudgetTracker::new();
643 budget_usage.errors_emitted = diagnostics.len();
644 let recovered_count =
645 diagnostics.iter().filter(|e| matches!(e, ParseError::Recovered { .. })).count();
646 Self { ast, diagnostics, budget_usage, terminated_early: false, recovered_count }
647 }
648
649 /// Create a parse output with full budget tracking.
650 ///
651 /// This is the preferred constructor when the actual BudgetTracker
652 /// from parsing is available, as it preserves accurate metrics.
653 pub fn finish(
654 ast: Node,
655 diagnostics: Vec<ParseError>,
656 budget_usage: BudgetTracker,
657 terminated_early: bool,
658 ) -> Self {
659 let recovered_count =
660 diagnostics.iter().filter(|e| matches!(e, ParseError::Recovered { .. })).count();
661 Self { ast, diagnostics, budget_usage, terminated_early, recovered_count }
662 }
663
664 /// Check if parse completed without any errors.
665 pub fn is_ok(&self) -> bool {
666 self.diagnostics.is_empty()
667 }
668
669 /// Check if parse had errors.
670 pub fn has_errors(&self) -> bool {
671 !self.diagnostics.is_empty()
672 }
673
674 /// Get the error count.
675 pub fn error_count(&self) -> usize {
676 self.diagnostics.len()
677 }
678}
679
680impl ParseError {
681 /// Create a new syntax error for Perl parsing workflow failures
682 ///
683 /// # Arguments
684 ///
685 /// * `message` - Descriptive error message with context about the syntax issue
686 /// * `location` - Character position within the Perl code where error occurred
687 ///
688 /// # Returns
689 ///
690 /// A [`ParseError::SyntaxError`] variant with embedded location context for recovery strategies
691 ///
692 /// # Examples
693 ///
694 /// ```rust
695 /// use perl_error::ParseError;
696 ///
697 /// let error = ParseError::syntax("Missing semicolon in Perl script", 42);
698 /// assert!(matches!(error, ParseError::SyntaxError { .. }));
699 /// ```
700 pub fn syntax(message: impl Into<String>, location: usize) -> Self {
701 ParseError::SyntaxError { message: message.into(), location }
702 }
703
704 /// Create a new unexpected token error during Perl script parsing
705 ///
706 /// # Arguments
707 ///
708 /// * `expected` - Token type that was expected by the parser
709 /// * `found` - Actual token type that was encountered
710 /// * `location` - Character position where the unexpected token was found
711 ///
712 /// # Returns
713 ///
714 /// A [`ParseError::UnexpectedToken`] variant with detailed token mismatch information
715 ///
716 /// # Examples
717 ///
718 /// ```rust
719 /// use perl_error::ParseError;
720 ///
721 /// let error = ParseError::unexpected("semicolon", "comma", 15);
722 /// assert!(matches!(error, ParseError::UnexpectedToken { .. }));
723 /// ```
724 ///
725 /// # Email Processing Context
726 ///
727 /// This is commonly used during the Analyze stage when Perl scripts contain
728 /// syntax variations that require token-level recovery strategies.
729 pub fn unexpected(
730 expected: impl Into<String>,
731 found: impl Into<String>,
732 location: usize,
733 ) -> Self {
734 ParseError::UnexpectedToken { expected: expected.into(), found: found.into(), location }
735 }
736
737 /// Get the byte location of the error if available
738 pub fn location(&self) -> Option<usize> {
739 match self {
740 ParseError::UnexpectedToken { location, .. } => Some(*location),
741 ParseError::SyntaxError { location, .. } => Some(*location),
742 ParseError::Recovered { location, .. } => Some(*location),
743 _ => None,
744 }
745 }
746
747 /// Generate a fix suggestion based on the error type
748 pub fn suggestion(&self) -> Option<String> {
749 match self {
750 ParseError::UnexpectedToken { expected, found, .. } => {
751 // Check for common missing delimiters
752 if expected.contains(';') {
753 return Some("add a semicolon ';' at the end of the statement".to_string());
754 }
755 if expected.contains('}') {
756 return Some("add a closing brace '}' to end the block".to_string());
757 }
758 if expected.contains(')') {
759 return Some("add a closing parenthesis ')' to end the group".to_string());
760 }
761 if expected.contains(']') {
762 return Some("add a closing bracket ']' to end the array".to_string());
763 }
764 // Fat arrow found where expression expected — likely a missing value
765 // before a hash pair separator
766 if expected.contains("expression") && found.contains("=>") {
767 return Some(
768 "'=>' (fat arrow) is not valid here; \
769 did you forget a value before it?"
770 .to_string(),
771 );
772 }
773 // Arrow found where expression expected
774 if expected.contains("expression") && found.contains("->") {
775 return Some(
776 "'->' (arrow) is not valid here; \
777 did you forget the object or reference before it?"
778 .to_string(),
779 );
780 }
781 // Expected a variable (e.g. after my/our/local/state)
782 if expected.to_lowercase().contains("variable") {
783 return Some(
784 "expected a variable like $foo, @bar, or %hash after the declaration keyword"
785 .to_string(),
786 );
787 }
788 None
789 }
790 ParseError::UnclosedDelimiter { delimiter } => {
791 Some(format!("add closing '{}' to complete the literal", delimiter))
792 }
793 _ => None,
794 }
795 }
796}
797
798/// Enrich a list of errors with source context
799pub fn get_error_contexts(errors: &[ParseError], source: &str) -> Vec<ErrorContext> {
800 let index = LineIndex::new(source.to_string());
801
802 errors
803 .iter()
804 .map(|error| {
805 let loc = error.location().unwrap_or(source.len());
806 // Handle EOF/out-of-bounds safely
807 let safe_loc = std::cmp::min(loc, source.len());
808
809 let (line_u32, col_u32) = index.offset_to_position(safe_loc);
810 let line = line_u32 as usize;
811 let col = col_u32 as usize;
812
813 let source_line = source.lines().nth(line).unwrap_or("").to_string();
814
815 ErrorContext {
816 error: error.clone(),
817 line,
818 column: col,
819 source_line,
820 suggestion: error.suggestion(),
821 }
822 })
823 .collect()
824}
825
826#[cfg(test)]
827mod tests {
828 use super::*;
829
830 #[test]
831 fn test_parse_budget_defaults() {
832 let budget = ParseBudget::default();
833 assert_eq!(budget.max_errors, 100);
834 assert_eq!(budget.max_depth, 256);
835 assert_eq!(budget.max_tokens_skipped, 1000);
836 assert_eq!(budget.max_recoveries, 500);
837 }
838
839 #[test]
840 fn test_parse_budget_strict() {
841 let budget = ParseBudget::strict();
842 assert_eq!(budget.max_errors, 10);
843 assert_eq!(budget.max_depth, 64);
844 assert_eq!(budget.max_tokens_skipped, 100);
845 assert_eq!(budget.max_recoveries, 50);
846 }
847
848 #[test]
849 fn test_budget_tracker_errors() {
850 let budget = ParseBudget { max_errors: 3, ..Default::default() };
851 let mut tracker = BudgetTracker::new();
852
853 assert!(!tracker.errors_exhausted(&budget));
854
855 tracker.record_error();
856 tracker.record_error();
857 assert!(!tracker.errors_exhausted(&budget));
858
859 tracker.record_error();
860 assert!(tracker.errors_exhausted(&budget));
861 }
862
863 #[test]
864 fn test_budget_tracker_depth() {
865 let budget = ParseBudget { max_depth: 2, ..Default::default() };
866 let mut tracker = BudgetTracker::new();
867
868 assert!(!tracker.depth_would_exceed(&budget));
869
870 tracker.enter_depth();
871 assert!(!tracker.depth_would_exceed(&budget));
872
873 tracker.enter_depth();
874 assert!(tracker.depth_would_exceed(&budget));
875
876 tracker.exit_depth();
877 assert!(!tracker.depth_would_exceed(&budget));
878 }
879
880 #[test]
881 fn test_budget_tracker_skip() {
882 let budget = ParseBudget { max_tokens_skipped: 5, ..Default::default() };
883 let mut tracker = BudgetTracker::new();
884
885 assert!(!tracker.skip_would_exceed(&budget, 3));
886 tracker.record_skip(3);
887
888 assert!(!tracker.skip_would_exceed(&budget, 2));
889 assert!(tracker.skip_would_exceed(&budget, 3));
890 }
891
892 #[test]
893 fn test_budget_tracker_recoveries() {
894 let budget = ParseBudget { max_recoveries: 2, ..Default::default() };
895 let mut tracker = BudgetTracker::new();
896
897 assert!(!tracker.recoveries_exhausted(&budget));
898
899 tracker.record_recovery();
900 assert!(!tracker.recoveries_exhausted(&budget));
901
902 tracker.record_recovery();
903 assert!(tracker.recoveries_exhausted(&budget));
904 }
905
906 #[test]
907 fn test_parse_output_success() {
908 use perl_ast::{Node, NodeKind, SourceLocation};
909
910 let ast = Node::new(
911 NodeKind::Program { statements: vec![] },
912 SourceLocation { start: 0, end: 0 },
913 );
914 let output = ParseOutput::success(ast);
915
916 assert!(output.is_ok());
917 assert!(!output.has_errors());
918 assert_eq!(output.error_count(), 0);
919 assert!(!output.terminated_early);
920 }
921
922 #[test]
923 fn test_parse_output_with_errors() {
924 use perl_ast::{Node, NodeKind, SourceLocation};
925
926 let ast = Node::new(
927 NodeKind::Program { statements: vec![] },
928 SourceLocation { start: 0, end: 0 },
929 );
930 let errors = vec![ParseError::syntax("error 1", 0), ParseError::syntax("error 2", 5)];
931 let output = ParseOutput::with_errors(ast, errors);
932
933 assert!(!output.is_ok());
934 assert!(output.has_errors());
935 assert_eq!(output.error_count(), 2);
936 }
937
938 #[test]
939 fn test_parse_output_finish_preserves_tracker() {
940 use perl_ast::{Node, NodeKind, SourceLocation};
941
942 let ast = Node::new(
943 NodeKind::Program { statements: vec![] },
944 SourceLocation { start: 0, end: 0 },
945 );
946 let errors = vec![ParseError::syntax("error 1", 0)];
947
948 // Create a tracker with specific values
949 let mut tracker = BudgetTracker::new();
950 tracker.errors_emitted = 5;
951 tracker.tokens_skipped = 42;
952 tracker.recoveries_attempted = 3;
953 tracker.max_depth_reached = 10;
954
955 let output = ParseOutput::finish(ast, errors, tracker, true);
956
957 // Verify all tracker values are preserved
958 assert_eq!(output.budget_usage.errors_emitted, 5);
959 assert_eq!(output.budget_usage.tokens_skipped, 42);
960 assert_eq!(output.budget_usage.recoveries_attempted, 3);
961 assert_eq!(output.budget_usage.max_depth_reached, 10);
962 assert!(output.terminated_early);
963 assert_eq!(output.error_count(), 1);
964 }
965
966 #[test]
967 fn test_begin_recovery_checks_budget_first() {
968 let budget = ParseBudget { max_recoveries: 0, ..Default::default() };
969 let mut tracker = BudgetTracker::new();
970
971 // Should fail immediately - budget is 0
972 assert!(!tracker.begin_recovery(&budget));
973 assert_eq!(tracker.recoveries_attempted, 0);
974 }
975
976 #[test]
977 fn test_can_skip_more_boundary_conditions() {
978 let budget = ParseBudget { max_tokens_skipped: 10, ..Default::default() };
979 let mut tracker = BudgetTracker::new();
980
981 // At 0 skipped, can skip up to 10
982 assert!(tracker.can_skip_more(&budget, 10));
983 assert!(!tracker.can_skip_more(&budget, 11));
984
985 // Skip 5
986 tracker.record_skip(5);
987
988 // At 5 skipped, can skip up to 5 more
989 assert!(tracker.can_skip_more(&budget, 5));
990 assert!(!tracker.can_skip_more(&budget, 6));
991
992 // Skip 5 more to reach limit
993 tracker.record_skip(5);
994
995 // At limit, cannot skip any more
996 assert!(!tracker.can_skip_more(&budget, 1));
997 assert!(tracker.can_skip_more(&budget, 0));
998 }
999
1000 #[test]
1001 fn test_error_context_enrichment() {
1002 let source = "line1\nline2;\nline3";
1003 // 'e' of line1 is at 4. 5 is newline.
1004 let errors = vec![ParseError::unexpected("';'", "newline", 5)];
1005
1006 let contexts = get_error_contexts(&errors, source);
1007 assert_eq!(contexts.len(), 1);
1008 assert_eq!(contexts[0].line, 0); // line1 is line 0
1009 assert_eq!(contexts[0].source_line, "line1");
1010 let suggestion = contexts[0].suggestion.as_deref().unwrap_or("");
1011 assert!(suggestion.contains("semicolon"));
1012 }
1013
1014 #[test]
1015 fn test_recovery_site_and_kind_variants() {
1016 // Verify all RecoverySite and RecoveryKind variants are constructible and comparable.
1017 let sites = [
1018 RecoverySite::ArgList,
1019 RecoverySite::ArraySubscript,
1020 RecoverySite::HashSubscript,
1021 RecoverySite::PostfixChain,
1022 RecoverySite::InfixRhs,
1023 ];
1024 let kinds = [
1025 RecoveryKind::InsertedCloser,
1026 RecoveryKind::MissingOperand,
1027 RecoveryKind::TruncatedChain,
1028 RecoveryKind::InferredSemicolon,
1029 ];
1030 // Each site and kind is debug-formattable and clone-able.
1031 for s in &sites {
1032 let _ = format!("{s:?}");
1033 let _ = s.clone();
1034 }
1035 for k in &kinds {
1036 let _ = format!("{k:?}");
1037 let _ = k.clone();
1038 }
1039 // PartialEq works.
1040 assert_eq!(RecoverySite::ArgList, RecoverySite::ArgList);
1041 assert_ne!(RecoverySite::ArgList, RecoverySite::PostfixChain);
1042 assert_eq!(RecoveryKind::InsertedCloser, RecoveryKind::InsertedCloser);
1043 assert_ne!(RecoveryKind::InsertedCloser, RecoveryKind::MissingOperand);
1044 }
1045
1046 #[test]
1047 fn test_parse_error_recovered_variant() {
1048 let err = ParseError::Recovered {
1049 site: RecoverySite::ArgList,
1050 kind: RecoveryKind::InsertedCloser,
1051 location: 42,
1052 };
1053 // location() returns Some for Recovered variant.
1054 assert_eq!(err.location(), Some(42));
1055 // suggestion() returns None for Recovered.
1056 assert!(err.suggestion().is_none());
1057 // Display works (via thiserror).
1058 let s = format!("{err}");
1059 assert!(s.contains("Recovered") || s.contains("position 42"));
1060 }
1061
1062 #[test]
1063 fn test_parse_output_recovered_count_with_errors() {
1064 use perl_ast::{Node, NodeKind, SourceLocation};
1065
1066 let ast = Node::new(
1067 NodeKind::Program { statements: vec![] },
1068 SourceLocation { start: 0, end: 0 },
1069 );
1070 let errors = vec![
1071 ParseError::syntax("error 1", 0),
1072 ParseError::Recovered {
1073 site: RecoverySite::ArgList,
1074 kind: RecoveryKind::MissingOperand,
1075 location: 10,
1076 },
1077 ParseError::Recovered {
1078 site: RecoverySite::PostfixChain,
1079 kind: RecoveryKind::TruncatedChain,
1080 location: 20,
1081 },
1082 ];
1083 let output = ParseOutput::with_errors(ast, errors);
1084
1085 assert_eq!(output.error_count(), 3);
1086 assert_eq!(output.recovered_count, 2);
1087 }
1088
1089 #[test]
1090 fn test_parse_output_success_has_zero_recovered_count() {
1091 use perl_ast::{Node, NodeKind, SourceLocation};
1092
1093 let ast = Node::new(
1094 NodeKind::Program { statements: vec![] },
1095 SourceLocation { start: 0, end: 0 },
1096 );
1097 let output = ParseOutput::success(ast);
1098 assert_eq!(output.recovered_count, 0);
1099 }
1100
1101 #[test]
1102 fn test_parse_output_finish_recovered_count() {
1103 use perl_ast::{Node, NodeKind, SourceLocation};
1104
1105 let ast = Node::new(
1106 NodeKind::Program { statements: vec![] },
1107 SourceLocation { start: 0, end: 0 },
1108 );
1109 let errors = vec![
1110 ParseError::syntax("error", 0),
1111 ParseError::Recovered {
1112 site: RecoverySite::InfixRhs,
1113 kind: RecoveryKind::InferredSemicolon,
1114 location: 5,
1115 },
1116 ];
1117 let tracker = BudgetTracker::new();
1118 let output = ParseOutput::finish(ast, errors, tracker, false);
1119
1120 assert_eq!(output.recovered_count, 1);
1121 assert!(!output.terminated_early);
1122 }
1123}