perl_parser_core/syntax/error/
mod.rs

1//! Error types for the Perl parser within the Perl parsing workflow pipeline
2//!
3//! This module defines comprehensive error handling for Perl parsing operations that occur
4//! throughout the Perl parsing workflow workflow: Parse → Index → Navigate → Complete → Analyze.
5//!
6//! # Error Recovery Strategy
7//!
8//! When parsing errors occur during Perl parsing:
9//! 1. **Parse stage**: Parsing failures indicate corrupted or malformed Perl source
10//! 2. **Analyze stage**: Syntax errors suggest script inconsistencies requiring fallback processing
11//! 3. **Navigate stage**: Parse failures can break thread analysis - graceful degradation applies
12//! 4. **Complete stage**: Errors impact output generation but preserve original content
13//! 5. **Analyze stage**: Parse failures affect search indexing but maintain basic metadata
14//!
15//! # Performance Context
16//!
17//! Error handling is optimized for large Perl codebase processing scenarios with minimal memory overhead
18//! and fast recovery paths to maintain enterprise-scale performance targets.
19//!
20//! # Usage Examples
21//!
22//! ## Basic Error Handling
23//!
24//! ```ignore
25//! use perl_parser::{Parser, ParseError, ParseResult};
26//!
27//! fn parse_with_error_handling(code: &str) -> ParseResult<()> {
28//!     let mut parser = Parser::new(code);
29//!     match parser.parse() {
30//!         Ok(ast) => {
31//!             println!("Parsing successful");
32//!             Ok(())
33//!         }
34//!         Err(ParseError::UnexpectedEof) => {
35//!             eprintln!("Incomplete code: unexpected end of input");
36//!             Err(ParseError::UnexpectedEof)
37//!         }
38//!         Err(ParseError::UnexpectedToken { found, expected, location }) => {
39//!             eprintln!("Syntax error at position {}: found '{}', expected '{}'",
40//!                      location, found, expected);
41//!             Err(ParseError::UnexpectedToken { found, expected, location })
42//!         }
43//!         Err(e) => {
44//!             eprintln!("Parse error: {}", e);
45//!             Err(e)
46//!         }
47//!     }
48//! }
49//! ```
50//!
51//! ## Error Recovery in LSP Context
52//!
53//! ```ignore
54//! use perl_parser::{Parser, ParseError, error_recovery::ErrorRecovery};
55//!
56//! fn parse_with_recovery(code: &str) -> Vec<String> {
57//!     let mut parser = Parser::new(code);
58//!     let mut errors = Vec::new();
59//!
60//!     match parser.parse() {
61//!         Ok(_) => println!("Parse successful"),
62//!         Err(err) => {
63//!             // Log error for diagnostics
64//!             errors.push(format!("Parse error: {}", err));
65//!
66//!             // Attempt error recovery for LSP
67//!             match err {
68//!                 ParseError::UnexpectedToken { .. } => {
69//!                     // Continue parsing from next statement
70//!                     println!("Attempting recovery...");
71//!                 }
72//!                 ParseError::RecursionLimit => {
73//!                     // Use iterative parsing approach
74//!                     println!("Switching to iterative parsing...");
75//!                 }
76//!                 _ => {
77//!                     // Use fallback parsing strategy
78//!                     println!("Using fallback parsing...");
79//!                 }
80//!             }
81//!         }
82//!     }
83//!     errors
84//! }
85//! ```
86//!
87//! ## Comprehensive Error Context
88//!
89//! ```
90//! use perl_error::ParseError;
91//!
92//! fn create_detailed_error() -> ParseError {
93//!     ParseError::UnexpectedToken {
94//!         found: "number".to_string(),
95//!         expected: "identifier".to_string(),
96//!         location: 10, // byte position 10
97//!     }
98//! }
99//!
100//! fn handle_error_with_context(error: &ParseError) {
101//!     match error {
102//!         ParseError::UnexpectedToken { found, expected, location } => {
103//!             println!("Syntax error at byte position {}: found '{}', expected '{}'",
104//!                     location, found, expected);
105//!         }
106//!         ParseError::UnexpectedEof => {
107//!             println!("Incomplete input: unexpected end of file");
108//!         }
109//!         _ => {
110//!             println!("Parse error: {}", error);
111//!         }
112//!     }
113//! }
114//! ```
115
116use perl_position_tracking::LineIndex;
117use thiserror::Error;
118
119#[derive(Debug, Clone)]
120/// Rich error context with source line and fix suggestions
121pub struct ErrorContext {
122    /// The original parse error
123    pub error: ParseError,
124    /// Line number (0-indexed)
125    pub line: usize,
126    /// Column number (0-indexed)
127    pub column: usize,
128    /// The actual source line text
129    pub source_line: String,
130    /// Optional fix suggestion
131    pub suggestion: Option<String>,
132}
133
134impl From<perl_regex::RegexError> for ParseError {
135    fn from(err: perl_regex::RegexError) -> Self {
136        match err {
137            perl_regex::RegexError::Syntax { message, offset } => {
138                ParseError::syntax(message, offset)
139            }
140        }
141    }
142}
143
144/// Where in the parse tree a recovery was performed.
145///
146/// Used by [`ParseError::Recovered`] to describe the syntactic context in which
147/// the parser applied a recovery strategy. LSP providers use this to decide
148/// which features can still be offered after a recovery.
149#[derive(Debug, Clone, PartialEq)]
150pub enum RecoverySite {
151    /// Inside a parenthesised argument list `(...)`.
152    ArgList,
153    /// Inside an array subscript `[...]`.
154    ArraySubscript,
155    /// Inside a hash subscript `{...}`.
156    HashSubscript,
157    /// After a `->` dereference arrow (postfix chain).
158    PostfixChain,
159    /// After a binary infix operator (right-hand side missing).
160    InfixRhs,
161}
162
163/// What kind of recovery was applied at a [`RecoverySite`].
164///
165/// Pairs with [`RecoverySite`] in [`ParseError::Recovered`] to describe the
166/// exact repair the parser made. This information lets consumers (e.g. LSP
167/// providers) understand the confidence level of the resulting AST region.
168#[derive(Debug, Clone, PartialEq)]
169pub enum RecoveryKind {
170    /// A synthetic closing delimiter (`)` or `]`) was inferred.
171    InsertedCloser,
172    /// A [`NodeKind::MissingExpression`] placeholder was inserted.
173    MissingOperand,
174    /// A postfix chain was cut short due to a missing continuation.
175    TruncatedChain,
176    /// A statement boundary (`;`) was inferred from context.
177    InferredSemicolon,
178}
179
180/// Budget limits for parser operations to prevent runaway parsing.
181///
182/// These limits ensure the parser terminates in bounded time even when
183/// processing malformed or adversarial input. Each budget parameter has
184/// a sensible default that works for most real-world Perl code.
185///
186/// # Usage
187///
188/// ```
189/// use perl_error::ParseBudget;
190///
191/// // Use defaults for normal parsing
192/// let budget = ParseBudget::default();
193///
194/// // Stricter limits for untrusted input
195/// let strict = ParseBudget {
196///     max_errors: 10,
197///     max_depth: 64,
198///     max_tokens_skipped: 100,
199///     max_recoveries: 50,
200/// };
201/// ```
202#[derive(Debug, Clone, Copy, PartialEq, Eq)]
203pub struct ParseBudget {
204    /// Maximum number of errors to collect before giving up.
205    /// After this limit, parsing stops to avoid flooding diagnostics.
206    /// Default: 100
207    pub max_errors: usize,
208
209    /// Maximum nesting depth for recursive constructs (blocks, expressions).
210    /// Prevents stack overflow on deeply nested input.
211    /// Default: 256
212    pub max_depth: usize,
213
214    /// Maximum tokens to skip during a single recovery attempt.
215    /// Prevents infinite loops when recovery can't find a sync point.
216    /// Default: 1000
217    pub max_tokens_skipped: usize,
218
219    /// Maximum number of recovery attempts per parse.
220    /// Bounds total recovery work to prevent pathological cases.
221    /// Default: 500
222    pub max_recoveries: usize,
223}
224
225impl Default for ParseBudget {
226    fn default() -> Self {
227        Self { max_errors: 100, max_depth: 256, max_tokens_skipped: 1000, max_recoveries: 500 }
228    }
229}
230
231impl ParseBudget {
232    /// Create a budget suitable for IDE/LSP usage with generous limits.
233    pub fn for_ide() -> Self {
234        Self::default()
235    }
236
237    /// Create a strict budget for parsing untrusted input.
238    pub fn strict() -> Self {
239        Self { max_errors: 10, max_depth: 64, max_tokens_skipped: 100, max_recoveries: 50 }
240    }
241
242    /// Create an unlimited budget (use with caution).
243    pub fn unlimited() -> Self {
244        Self {
245            max_errors: usize::MAX,
246            max_depth: usize::MAX,
247            max_tokens_skipped: usize::MAX,
248            max_recoveries: usize::MAX,
249        }
250    }
251}
252
253/// Tracks budget consumption during parsing.
254///
255/// This struct monitors how much of the parse budget has been used
256/// and provides methods to check and consume budget atomically.
257#[derive(Debug, Clone, Default)]
258pub struct BudgetTracker {
259    /// Number of errors emitted so far.
260    pub errors_emitted: usize,
261    /// Current nesting depth.
262    pub current_depth: usize,
263    /// Maximum depth reached during parse.
264    pub max_depth_reached: usize,
265    /// Total tokens skipped across all recovery attempts.
266    pub tokens_skipped: usize,
267    /// Number of recovery attempts made.
268    pub recoveries_attempted: usize,
269}
270
271impl BudgetTracker {
272    /// Create a new budget tracker.
273    pub fn new() -> Self {
274        Self::default()
275    }
276
277    /// Check if error budget is exhausted.
278    pub fn errors_exhausted(&self, budget: &ParseBudget) -> bool {
279        self.errors_emitted >= budget.max_errors
280    }
281
282    /// Check if depth budget would be exceeded by going one level deeper.
283    pub fn depth_would_exceed(&self, budget: &ParseBudget) -> bool {
284        self.current_depth >= budget.max_depth
285    }
286
287    /// Check if skip budget would be exceeded by skipping `count` more tokens.
288    pub fn skip_would_exceed(&self, budget: &ParseBudget, count: usize) -> bool {
289        self.tokens_skipped.saturating_add(count) > budget.max_tokens_skipped
290    }
291
292    /// Check if recovery budget is exhausted.
293    pub fn recoveries_exhausted(&self, budget: &ParseBudget) -> bool {
294        self.recoveries_attempted >= budget.max_recoveries
295    }
296
297    /// Begin a recovery attempt, checking budget first.
298    ///
299    /// Returns `false` if another recovery attempt would exceed the budget.
300    /// If this returns `true`, the recovery attempt has been recorded.
301    pub fn begin_recovery(&mut self, budget: &ParseBudget) -> bool {
302        if self.recoveries_attempted >= budget.max_recoveries {
303            return false;
304        }
305        self.recoveries_attempted = self.recoveries_attempted.saturating_add(1);
306        true
307    }
308
309    /// Check if skipping `additional` more tokens would stay within budget.
310    ///
311    /// This considers both already-skipped tokens and the proposed additional count.
312    pub fn can_skip_more(&self, budget: &ParseBudget, additional: usize) -> bool {
313        self.tokens_skipped.saturating_add(additional) <= budget.max_tokens_skipped
314    }
315
316    /// Record an error emission.
317    pub fn record_error(&mut self) {
318        self.errors_emitted = self.errors_emitted.saturating_add(1);
319    }
320
321    /// Enter a deeper nesting level.
322    pub fn enter_depth(&mut self) {
323        self.current_depth = self.current_depth.saturating_add(1);
324        if self.current_depth > self.max_depth_reached {
325            self.max_depth_reached = self.current_depth;
326        }
327    }
328
329    /// Exit a nesting level.
330    pub fn exit_depth(&mut self) {
331        self.current_depth = self.current_depth.saturating_sub(1);
332    }
333
334    /// Record tokens skipped during recovery.
335    pub fn record_skip(&mut self, count: usize) {
336        self.tokens_skipped = self.tokens_skipped.saturating_add(count);
337    }
338
339    /// Record a recovery attempt.
340    pub fn record_recovery(&mut self) {
341        self.recoveries_attempted = self.recoveries_attempted.saturating_add(1);
342    }
343}
344
345/// Result type for parser operations in the Perl parsing workflow pipeline
346///
347/// This type encapsulates success/failure outcomes throughout the Parse → Index →
348/// Navigate → Complete → Analyze workflow, enabling consistent error propagation and recovery
349/// strategies across all pipeline stages.
350pub type ParseResult<T> = Result<T, ParseError>;
351
352#[derive(Error, Debug, Clone, PartialEq)]
353/// Comprehensive error types that can occur during Perl parsing workflows
354///
355/// These errors are designed to provide detailed context about parsing failures that occur during
356/// Perl code analysis, script processing, and metadata extraction. Each error variant includes
357/// location information to enable precise recovery strategies in large Perl file processing scenarios.
358///
359/// # Error Recovery Patterns
360///
361/// - **Syntax Errors**: Attempt fallback parsing or skip problematic content sections
362/// - **Lexer Errors**: Re-tokenize with relaxed rules or binary content detection
363/// - **Recursion Limits**: Flatten deeply nested structures or process iteratively
364/// - **String Handling**: Apply encoding detection and normalization workflows
365///
366/// # Enterprise Scale Considerations
367///
368/// Error handling is optimized for large Perl files and multi-file workspaces, ensuring
369/// memory-efficient error propagation and logging.
370pub enum ParseError {
371    /// Parser encountered unexpected end of input during Perl code analysis
372    ///
373    /// This occurs when processing truncated Perl scripts or incomplete Perl source during
374    /// the Parse stage. Recovery strategy: attempt partial parsing and preserve available content.
375    #[error("Unexpected end of input")]
376    UnexpectedEof,
377
378    /// Parser found an unexpected token during Perl parsing workflow
379    ///
380    /// Common during Analyze stage when Perl scripts contain syntax variations or encoding issues.
381    /// Recovery strategy: skip problematic tokens and attempt continued parsing with relaxed rules.
382    #[error("expected {expected}, found {found} at position {location}")]
383    UnexpectedToken {
384        /// Token type that was expected during Perl script parsing
385        expected: String,
386        /// Actual token found in Perl script content
387        found: String,
388        /// Byte position where unexpected token was encountered
389        location: usize,
390    },
391
392    /// General syntax error occurred during Perl code parsing
393    ///
394    /// This encompasses malformed Perl constructs found in Perl scripts during Navigate stage analysis.
395    /// Recovery strategy: isolate syntax error scope and continue processing surrounding content.
396    #[error("Invalid syntax at position {location}: {message}")]
397    SyntaxError {
398        /// Descriptive error message explaining the syntax issue
399        message: String,
400        /// Byte position where syntax error occurred in Perl script
401        location: usize,
402    },
403
404    /// Lexical analysis failure during Perl script tokenization
405    ///
406    /// Indicates character encoding issues or binary content mixed with text during Parse stage.
407    /// Recovery strategy: apply encoding detection and re-attempt tokenization with binary fallbacks.
408    #[error("Lexer error: {message}")]
409    LexerError {
410        /// Detailed lexer error message describing tokenization failure
411        message: String,
412    },
413
414    /// Parser recursion depth exceeded during complex Perl script analysis
415    ///
416    /// Occurs with deeply nested structures in Perl code during Complete stage processing.
417    /// Recovery strategy: flatten recursive structures and process iteratively to maintain performance.
418    #[error("Maximum recursion depth exceeded")]
419    RecursionLimit,
420
421    /// Invalid numeric literal found in Perl script content
422    ///
423    /// Common when processing malformed configuration values during Analyze stage analysis.
424    /// Recovery strategy: substitute default values and log for manual review.
425    #[error("Invalid number literal: {literal}")]
426    InvalidNumber {
427        /// The malformed numeric literal found in Perl script content
428        literal: String,
429    },
430
431    /// Malformed string literal in Perl parsing workflow
432    ///
433    /// Indicates quote mismatches or encoding issues in Perl script strings during parsing.
434    /// Recovery strategy: attempt string repair and normalization before re-parsing.
435    #[error("Invalid string literal")]
436    InvalidString,
437
438    /// Unclosed delimiter detected during Perl code parsing
439    ///
440    /// Commonly found in truncated or corrupted Perl script content during Parse stage.
441    /// Recovery strategy: auto-close delimiters and continue parsing with synthetic boundaries.
442    #[error("Unclosed delimiter: {delimiter}")]
443    UnclosedDelimiter {
444        /// The delimiter character that was left unclosed
445        delimiter: char,
446    },
447
448    /// Invalid regular expression syntax in Perl parsing workflow
449    ///
450    /// Occurs when parsing regex patterns in data filters during Navigate stage analysis.
451    /// Recovery strategy: fallback to literal string matching and preserve original pattern.
452    #[error("Invalid regex: {message}")]
453    InvalidRegex {
454        /// Specific error message describing regex syntax issue
455        message: String,
456    },
457
458    /// Nesting depth limit exceeded for recursive structures
459    #[error("Nesting depth limit exceeded: {depth} > {max_depth}")]
460    NestingTooDeep {
461        /// Current nesting depth
462        depth: usize,
463        /// Maximum allowed depth
464        max_depth: usize,
465    },
466
467    /// Parsing was cancelled by an external cancellation token
468    #[error("Parsing cancelled")]
469    Cancelled,
470
471    /// A syntax error was recovered from — parsing continued with a synthetic node.
472    ///
473    /// This variant is emitted alongside the partial AST node that was produced
474    /// by the recovery. LSP providers iterate `parser.errors()` and count
475    /// `Recovered` variants to determine confidence for gating features.
476    #[error("Recovered from {kind:?} at {site:?} (position {location})")]
477    Recovered {
478        /// Where in the parse tree the recovery occurred.
479        site: RecoverySite,
480        /// What kind of repair was applied.
481        kind: RecoveryKind,
482        /// Byte offset of the recovery point in the source.
483        location: usize,
484    },
485}
486
487/// Error classification and diagnostic generation for parsed Perl code.
488pub mod classifier;
489/// Error recovery strategies and traits for the Perl parser.
490pub mod recovery;
491
492use perl_ast::Node;
493
494/// Structured output from parsing, combining AST with all diagnostics.
495///
496/// This type replaces the simple `Result<Node, ParseError>` pattern to enable
497/// error recovery. Even when errors occur, parsing continues and produces a
498/// partial AST alongside collected diagnostics.
499///
500/// # Usage
501///
502/// ```ignore
503/// use perl_parser::{Parser, ParseOutput};
504///
505/// let mut parser = Parser::new("my $x = ;");
506/// let output = parser.parse_with_recovery();
507///
508/// // AST is always available (may contain error nodes)
509/// println!("Statements: {:?}", output.ast);
510///
511/// // Diagnostics are collected separately
512/// for error in &output.diagnostics {
513///     println!("Error: {}", error);
514/// }
515///
516/// // Budget tracking shows resource usage
517/// println!("Errors: {}", output.budget_usage.errors_emitted);
518/// ```
519#[derive(Debug, Clone)]
520pub struct ParseOutput {
521    /// The parsed AST. Always present, but may contain error nodes
522    /// if parsing encountered recoverable errors.
523    pub ast: Node,
524
525    /// All diagnostics (errors and warnings) collected during parsing.
526    /// These are ordered by source position.
527    pub diagnostics: Vec<ParseError>,
528
529    /// Budget consumption during this parse.
530    /// Useful for diagnosing pathological inputs.
531    pub budget_usage: BudgetTracker,
532
533    /// Whether parsing completed normally or was terminated early
534    /// due to budget exhaustion.
535    pub terminated_early: bool,
536
537    /// Number of recovery operations applied during this parse.
538    ///
539    /// Counts the [`ParseError::Recovered`] variants in `diagnostics`.
540    /// LSP providers use this as a confidence signal: `0` means a clean parse,
541    /// `> 0` means at least one synthetic repair was made.
542    pub recovered_count: usize,
543}
544
545/// Closeout classification for a parsed file.
546///
547/// Used by corpus-level reporting to distinguish successful structured
548/// recovery from unrecovered parser damage and catastrophic failures.
549#[derive(Debug, Clone, Copy, PartialEq, Eq)]
550pub enum RecoverySalvageClass {
551    /// No diagnostics and no `ERROR` AST nodes.
552    Clean,
553    /// Only structured recovery diagnostics were emitted; no `ERROR` nodes.
554    StructuredRecoveryOnly,
555    /// Parse produced one or more `ERROR` AST nodes.
556    ErrorNodesPresent,
557    /// Parse failed catastrophically (`parse()` returned `Err`).
558    CatastrophicFailure,
559}
560
561/// Per-file recovery/salvage summary.
562#[derive(Debug, Clone, PartialEq, Eq)]
563pub struct RecoverySalvageProfile {
564    /// Whether this parse was a catastrophic failure.
565    pub catastrophic: bool,
566    /// Number of `ParseError::Recovered` diagnostics observed.
567    pub recovered_count: usize,
568    /// Number of `NodeKind::Error` nodes observed in the AST.
569    pub error_node_count: usize,
570    /// Message from the earliest unrecovered `ERROR` node, if any.
571    pub first_unrecovered_error_node: Option<String>,
572    /// Coarse classification used by corpus closeout reports.
573    pub class: RecoverySalvageClass,
574}
575
576impl RecoverySalvageProfile {
577    /// Build a recovery/salvage profile for one parsed file.
578    pub fn from_parse(ast: &Node, diagnostics: &[ParseError], catastrophic: bool) -> Self {
579        let mut error_node_count = 0usize;
580        let mut first_start = usize::MAX;
581        let mut first_unrecovered_error_node: Option<String> = None;
582
583        fn walk(
584            node: &Node,
585            error_node_count: &mut usize,
586            first_start: &mut usize,
587            first_unrecovered_error_node: &mut Option<String>,
588        ) {
589            if let perl_ast::NodeKind::Error { message, .. } = &node.kind {
590                *error_node_count = error_node_count.saturating_add(1);
591                if node.location.start < *first_start {
592                    *first_start = node.location.start;
593                    *first_unrecovered_error_node = Some(message.clone());
594                }
595            }
596            node.for_each_child(|child| {
597                walk(child, error_node_count, first_start, first_unrecovered_error_node);
598            });
599        }
600        walk(ast, &mut error_node_count, &mut first_start, &mut first_unrecovered_error_node);
601
602        let recovered_count =
603            diagnostics.iter().filter(|e| matches!(e, ParseError::Recovered { .. })).count();
604
605        let class = if catastrophic {
606            RecoverySalvageClass::CatastrophicFailure
607        } else if error_node_count > 0 {
608            RecoverySalvageClass::ErrorNodesPresent
609        } else if recovered_count > 0 {
610            RecoverySalvageClass::StructuredRecoveryOnly
611        } else {
612            RecoverySalvageClass::Clean
613        };
614
615        Self {
616            catastrophic,
617            recovered_count,
618            error_node_count,
619            first_unrecovered_error_node,
620            class,
621        }
622    }
623}
624
625impl ParseOutput {
626    /// Create a successful parse output with no errors.
627    pub fn success(ast: Node) -> Self {
628        Self {
629            ast,
630            diagnostics: Vec::new(),
631            budget_usage: BudgetTracker::new(),
632            terminated_early: false,
633            recovered_count: 0,
634        }
635    }
636
637    /// Create a parse output with errors.
638    ///
639    /// Note: This re-derives budget_usage from diagnostics count.
640    /// For accurate budget tracking, use `finish()` instead.
641    pub fn with_errors(ast: Node, diagnostics: Vec<ParseError>) -> Self {
642        let mut budget_usage = BudgetTracker::new();
643        budget_usage.errors_emitted = diagnostics.len();
644        let recovered_count =
645            diagnostics.iter().filter(|e| matches!(e, ParseError::Recovered { .. })).count();
646        Self { ast, diagnostics, budget_usage, terminated_early: false, recovered_count }
647    }
648
649    /// Create a parse output with full budget tracking.
650    ///
651    /// This is the preferred constructor when the actual BudgetTracker
652    /// from parsing is available, as it preserves accurate metrics.
653    pub fn finish(
654        ast: Node,
655        diagnostics: Vec<ParseError>,
656        budget_usage: BudgetTracker,
657        terminated_early: bool,
658    ) -> Self {
659        let recovered_count =
660            diagnostics.iter().filter(|e| matches!(e, ParseError::Recovered { .. })).count();
661        Self { ast, diagnostics, budget_usage, terminated_early, recovered_count }
662    }
663
664    /// Check if parse completed without any errors.
665    pub fn is_ok(&self) -> bool {
666        self.diagnostics.is_empty()
667    }
668
669    /// Check if parse had errors.
670    pub fn has_errors(&self) -> bool {
671        !self.diagnostics.is_empty()
672    }
673
674    /// Get the error count.
675    pub fn error_count(&self) -> usize {
676        self.diagnostics.len()
677    }
678}
679
680impl ParseError {
681    /// Create a new syntax error for Perl parsing workflow failures
682    ///
683    /// # Arguments
684    ///
685    /// * `message` - Descriptive error message with context about the syntax issue
686    /// * `location` - Character position within the Perl code where error occurred
687    ///
688    /// # Returns
689    ///
690    /// A [`ParseError::SyntaxError`] variant with embedded location context for recovery strategies
691    ///
692    /// # Examples
693    ///
694    /// ```rust
695    /// use perl_error::ParseError;
696    ///
697    /// let error = ParseError::syntax("Missing semicolon in Perl script", 42);
698    /// assert!(matches!(error, ParseError::SyntaxError { .. }));
699    /// ```
700    pub fn syntax(message: impl Into<String>, location: usize) -> Self {
701        ParseError::SyntaxError { message: message.into(), location }
702    }
703
704    /// Create a new unexpected token error during Perl script parsing
705    ///
706    /// # Arguments
707    ///
708    /// * `expected` - Token type that was expected by the parser
709    /// * `found` - Actual token type that was encountered
710    /// * `location` - Character position where the unexpected token was found
711    ///
712    /// # Returns
713    ///
714    /// A [`ParseError::UnexpectedToken`] variant with detailed token mismatch information
715    ///
716    /// # Examples
717    ///
718    /// ```rust
719    /// use perl_error::ParseError;
720    ///
721    /// let error = ParseError::unexpected("semicolon", "comma", 15);
722    /// assert!(matches!(error, ParseError::UnexpectedToken { .. }));
723    /// ```
724    ///
725    /// # Email Processing Context
726    ///
727    /// This is commonly used during the Analyze stage when Perl scripts contain
728    /// syntax variations that require token-level recovery strategies.
729    pub fn unexpected(
730        expected: impl Into<String>,
731        found: impl Into<String>,
732        location: usize,
733    ) -> Self {
734        ParseError::UnexpectedToken { expected: expected.into(), found: found.into(), location }
735    }
736
737    /// Get the byte location of the error if available
738    pub fn location(&self) -> Option<usize> {
739        match self {
740            ParseError::UnexpectedToken { location, .. } => Some(*location),
741            ParseError::SyntaxError { location, .. } => Some(*location),
742            ParseError::Recovered { location, .. } => Some(*location),
743            _ => None,
744        }
745    }
746
747    /// Generate a fix suggestion based on the error type
748    pub fn suggestion(&self) -> Option<String> {
749        match self {
750            ParseError::UnexpectedToken { expected, found, .. } => {
751                // Check for common missing delimiters
752                if expected.contains(';') {
753                    return Some("add a semicolon ';' at the end of the statement".to_string());
754                }
755                if expected.contains('}') {
756                    return Some("add a closing brace '}' to end the block".to_string());
757                }
758                if expected.contains(')') {
759                    return Some("add a closing parenthesis ')' to end the group".to_string());
760                }
761                if expected.contains(']') {
762                    return Some("add a closing bracket ']' to end the array".to_string());
763                }
764                // Fat arrow found where expression expected — likely a missing value
765                // before a hash pair separator
766                if expected.contains("expression") && found.contains("=>") {
767                    return Some(
768                        "'=>' (fat arrow) is not valid here; \
769                         did you forget a value before it?"
770                            .to_string(),
771                    );
772                }
773                // Arrow found where expression expected
774                if expected.contains("expression") && found.contains("->") {
775                    return Some(
776                        "'->' (arrow) is not valid here; \
777                         did you forget the object or reference before it?"
778                            .to_string(),
779                    );
780                }
781                // Expected a variable (e.g. after my/our/local/state)
782                if expected.to_lowercase().contains("variable") {
783                    return Some(
784                        "expected a variable like $foo, @bar, or %hash after the declaration keyword"
785                            .to_string(),
786                    );
787                }
788                None
789            }
790            ParseError::UnclosedDelimiter { delimiter } => {
791                Some(format!("add closing '{}' to complete the literal", delimiter))
792            }
793            _ => None,
794        }
795    }
796}
797
798/// Enrich a list of errors with source context
799pub fn get_error_contexts(errors: &[ParseError], source: &str) -> Vec<ErrorContext> {
800    let index = LineIndex::new(source.to_string());
801
802    errors
803        .iter()
804        .map(|error| {
805            let loc = error.location().unwrap_or(source.len());
806            // Handle EOF/out-of-bounds safely
807            let safe_loc = std::cmp::min(loc, source.len());
808
809            let (line_u32, col_u32) = index.offset_to_position(safe_loc);
810            let line = line_u32 as usize;
811            let col = col_u32 as usize;
812
813            let source_line = source.lines().nth(line).unwrap_or("").to_string();
814
815            ErrorContext {
816                error: error.clone(),
817                line,
818                column: col,
819                source_line,
820                suggestion: error.suggestion(),
821            }
822        })
823        .collect()
824}
825
826#[cfg(test)]
827mod tests {
828    use super::*;
829
830    #[test]
831    fn test_parse_budget_defaults() {
832        let budget = ParseBudget::default();
833        assert_eq!(budget.max_errors, 100);
834        assert_eq!(budget.max_depth, 256);
835        assert_eq!(budget.max_tokens_skipped, 1000);
836        assert_eq!(budget.max_recoveries, 500);
837    }
838
839    #[test]
840    fn test_parse_budget_strict() {
841        let budget = ParseBudget::strict();
842        assert_eq!(budget.max_errors, 10);
843        assert_eq!(budget.max_depth, 64);
844        assert_eq!(budget.max_tokens_skipped, 100);
845        assert_eq!(budget.max_recoveries, 50);
846    }
847
848    #[test]
849    fn test_budget_tracker_errors() {
850        let budget = ParseBudget { max_errors: 3, ..Default::default() };
851        let mut tracker = BudgetTracker::new();
852
853        assert!(!tracker.errors_exhausted(&budget));
854
855        tracker.record_error();
856        tracker.record_error();
857        assert!(!tracker.errors_exhausted(&budget));
858
859        tracker.record_error();
860        assert!(tracker.errors_exhausted(&budget));
861    }
862
863    #[test]
864    fn test_budget_tracker_depth() {
865        let budget = ParseBudget { max_depth: 2, ..Default::default() };
866        let mut tracker = BudgetTracker::new();
867
868        assert!(!tracker.depth_would_exceed(&budget));
869
870        tracker.enter_depth();
871        assert!(!tracker.depth_would_exceed(&budget));
872
873        tracker.enter_depth();
874        assert!(tracker.depth_would_exceed(&budget));
875
876        tracker.exit_depth();
877        assert!(!tracker.depth_would_exceed(&budget));
878    }
879
880    #[test]
881    fn test_budget_tracker_skip() {
882        let budget = ParseBudget { max_tokens_skipped: 5, ..Default::default() };
883        let mut tracker = BudgetTracker::new();
884
885        assert!(!tracker.skip_would_exceed(&budget, 3));
886        tracker.record_skip(3);
887
888        assert!(!tracker.skip_would_exceed(&budget, 2));
889        assert!(tracker.skip_would_exceed(&budget, 3));
890    }
891
892    #[test]
893    fn test_budget_tracker_recoveries() {
894        let budget = ParseBudget { max_recoveries: 2, ..Default::default() };
895        let mut tracker = BudgetTracker::new();
896
897        assert!(!tracker.recoveries_exhausted(&budget));
898
899        tracker.record_recovery();
900        assert!(!tracker.recoveries_exhausted(&budget));
901
902        tracker.record_recovery();
903        assert!(tracker.recoveries_exhausted(&budget));
904    }
905
906    #[test]
907    fn test_parse_output_success() {
908        use perl_ast::{Node, NodeKind, SourceLocation};
909
910        let ast = Node::new(
911            NodeKind::Program { statements: vec![] },
912            SourceLocation { start: 0, end: 0 },
913        );
914        let output = ParseOutput::success(ast);
915
916        assert!(output.is_ok());
917        assert!(!output.has_errors());
918        assert_eq!(output.error_count(), 0);
919        assert!(!output.terminated_early);
920    }
921
922    #[test]
923    fn test_parse_output_with_errors() {
924        use perl_ast::{Node, NodeKind, SourceLocation};
925
926        let ast = Node::new(
927            NodeKind::Program { statements: vec![] },
928            SourceLocation { start: 0, end: 0 },
929        );
930        let errors = vec![ParseError::syntax("error 1", 0), ParseError::syntax("error 2", 5)];
931        let output = ParseOutput::with_errors(ast, errors);
932
933        assert!(!output.is_ok());
934        assert!(output.has_errors());
935        assert_eq!(output.error_count(), 2);
936    }
937
938    #[test]
939    fn test_parse_output_finish_preserves_tracker() {
940        use perl_ast::{Node, NodeKind, SourceLocation};
941
942        let ast = Node::new(
943            NodeKind::Program { statements: vec![] },
944            SourceLocation { start: 0, end: 0 },
945        );
946        let errors = vec![ParseError::syntax("error 1", 0)];
947
948        // Create a tracker with specific values
949        let mut tracker = BudgetTracker::new();
950        tracker.errors_emitted = 5;
951        tracker.tokens_skipped = 42;
952        tracker.recoveries_attempted = 3;
953        tracker.max_depth_reached = 10;
954
955        let output = ParseOutput::finish(ast, errors, tracker, true);
956
957        // Verify all tracker values are preserved
958        assert_eq!(output.budget_usage.errors_emitted, 5);
959        assert_eq!(output.budget_usage.tokens_skipped, 42);
960        assert_eq!(output.budget_usage.recoveries_attempted, 3);
961        assert_eq!(output.budget_usage.max_depth_reached, 10);
962        assert!(output.terminated_early);
963        assert_eq!(output.error_count(), 1);
964    }
965
966    #[test]
967    fn test_begin_recovery_checks_budget_first() {
968        let budget = ParseBudget { max_recoveries: 0, ..Default::default() };
969        let mut tracker = BudgetTracker::new();
970
971        // Should fail immediately - budget is 0
972        assert!(!tracker.begin_recovery(&budget));
973        assert_eq!(tracker.recoveries_attempted, 0);
974    }
975
976    #[test]
977    fn test_can_skip_more_boundary_conditions() {
978        let budget = ParseBudget { max_tokens_skipped: 10, ..Default::default() };
979        let mut tracker = BudgetTracker::new();
980
981        // At 0 skipped, can skip up to 10
982        assert!(tracker.can_skip_more(&budget, 10));
983        assert!(!tracker.can_skip_more(&budget, 11));
984
985        // Skip 5
986        tracker.record_skip(5);
987
988        // At 5 skipped, can skip up to 5 more
989        assert!(tracker.can_skip_more(&budget, 5));
990        assert!(!tracker.can_skip_more(&budget, 6));
991
992        // Skip 5 more to reach limit
993        tracker.record_skip(5);
994
995        // At limit, cannot skip any more
996        assert!(!tracker.can_skip_more(&budget, 1));
997        assert!(tracker.can_skip_more(&budget, 0));
998    }
999
1000    #[test]
1001    fn test_error_context_enrichment() {
1002        let source = "line1\nline2;\nline3";
1003        // 'e' of line1 is at 4. 5 is newline.
1004        let errors = vec![ParseError::unexpected("';'", "newline", 5)];
1005
1006        let contexts = get_error_contexts(&errors, source);
1007        assert_eq!(contexts.len(), 1);
1008        assert_eq!(contexts[0].line, 0); // line1 is line 0
1009        assert_eq!(contexts[0].source_line, "line1");
1010        let suggestion = contexts[0].suggestion.as_deref().unwrap_or("");
1011        assert!(suggestion.contains("semicolon"));
1012    }
1013
1014    #[test]
1015    fn test_recovery_site_and_kind_variants() {
1016        // Verify all RecoverySite and RecoveryKind variants are constructible and comparable.
1017        let sites = [
1018            RecoverySite::ArgList,
1019            RecoverySite::ArraySubscript,
1020            RecoverySite::HashSubscript,
1021            RecoverySite::PostfixChain,
1022            RecoverySite::InfixRhs,
1023        ];
1024        let kinds = [
1025            RecoveryKind::InsertedCloser,
1026            RecoveryKind::MissingOperand,
1027            RecoveryKind::TruncatedChain,
1028            RecoveryKind::InferredSemicolon,
1029        ];
1030        // Each site and kind is debug-formattable and clone-able.
1031        for s in &sites {
1032            let _ = format!("{s:?}");
1033            let _ = s.clone();
1034        }
1035        for k in &kinds {
1036            let _ = format!("{k:?}");
1037            let _ = k.clone();
1038        }
1039        // PartialEq works.
1040        assert_eq!(RecoverySite::ArgList, RecoverySite::ArgList);
1041        assert_ne!(RecoverySite::ArgList, RecoverySite::PostfixChain);
1042        assert_eq!(RecoveryKind::InsertedCloser, RecoveryKind::InsertedCloser);
1043        assert_ne!(RecoveryKind::InsertedCloser, RecoveryKind::MissingOperand);
1044    }
1045
1046    #[test]
1047    fn test_parse_error_recovered_variant() {
1048        let err = ParseError::Recovered {
1049            site: RecoverySite::ArgList,
1050            kind: RecoveryKind::InsertedCloser,
1051            location: 42,
1052        };
1053        // location() returns Some for Recovered variant.
1054        assert_eq!(err.location(), Some(42));
1055        // suggestion() returns None for Recovered.
1056        assert!(err.suggestion().is_none());
1057        // Display works (via thiserror).
1058        let s = format!("{err}");
1059        assert!(s.contains("Recovered") || s.contains("position 42"));
1060    }
1061
1062    #[test]
1063    fn test_parse_output_recovered_count_with_errors() {
1064        use perl_ast::{Node, NodeKind, SourceLocation};
1065
1066        let ast = Node::new(
1067            NodeKind::Program { statements: vec![] },
1068            SourceLocation { start: 0, end: 0 },
1069        );
1070        let errors = vec![
1071            ParseError::syntax("error 1", 0),
1072            ParseError::Recovered {
1073                site: RecoverySite::ArgList,
1074                kind: RecoveryKind::MissingOperand,
1075                location: 10,
1076            },
1077            ParseError::Recovered {
1078                site: RecoverySite::PostfixChain,
1079                kind: RecoveryKind::TruncatedChain,
1080                location: 20,
1081            },
1082        ];
1083        let output = ParseOutput::with_errors(ast, errors);
1084
1085        assert_eq!(output.error_count(), 3);
1086        assert_eq!(output.recovered_count, 2);
1087    }
1088
1089    #[test]
1090    fn test_parse_output_success_has_zero_recovered_count() {
1091        use perl_ast::{Node, NodeKind, SourceLocation};
1092
1093        let ast = Node::new(
1094            NodeKind::Program { statements: vec![] },
1095            SourceLocation { start: 0, end: 0 },
1096        );
1097        let output = ParseOutput::success(ast);
1098        assert_eq!(output.recovered_count, 0);
1099    }
1100
1101    #[test]
1102    fn test_parse_output_finish_recovered_count() {
1103        use perl_ast::{Node, NodeKind, SourceLocation};
1104
1105        let ast = Node::new(
1106            NodeKind::Program { statements: vec![] },
1107            SourceLocation { start: 0, end: 0 },
1108        );
1109        let errors = vec![
1110            ParseError::syntax("error", 0),
1111            ParseError::Recovered {
1112                site: RecoverySite::InfixRhs,
1113                kind: RecoveryKind::InferredSemicolon,
1114                location: 5,
1115            },
1116        ];
1117        let tracker = BudgetTracker::new();
1118        let output = ParseOutput::finish(ast, errors, tracker, false);
1119
1120        assert_eq!(output.recovered_count, 1);
1121        assert!(!output.terminated_early);
1122    }
1123}
perl_parser_core/syntax/error/mod.rs

perl_parser_core/syntax/error/
mod.rs