use perl_position_tracking::LineIndex;
use thiserror::Error;
#[derive(Debug, Clone)]
pub struct ErrorContext {
pub error: ParseError,
pub line: usize,
pub column: usize,
pub source_line: String,
pub suggestion: Option<String>,
}
impl From<perl_regex::RegexError> for ParseError {
fn from(err: perl_regex::RegexError) -> Self {
match err {
perl_regex::RegexError::Syntax { message, offset } => {
ParseError::syntax(message, offset)
}
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum RecoverySite {
ArgList,
ArraySubscript,
HashSubscript,
PostfixChain,
InfixRhs,
}
#[derive(Debug, Clone, PartialEq)]
pub enum RecoveryKind {
InsertedCloser,
MissingOperand,
TruncatedChain,
InferredSemicolon,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct ParseBudget {
pub max_errors: usize,
pub max_depth: usize,
pub max_tokens_skipped: usize,
pub max_recoveries: usize,
}
impl Default for ParseBudget {
fn default() -> Self {
Self { max_errors: 100, max_depth: 256, max_tokens_skipped: 1000, max_recoveries: 500 }
}
}
impl ParseBudget {
pub fn for_ide() -> Self {
Self::default()
}
pub fn strict() -> Self {
Self { max_errors: 10, max_depth: 64, max_tokens_skipped: 100, max_recoveries: 50 }
}
pub fn unlimited() -> Self {
Self {
max_errors: usize::MAX,
max_depth: usize::MAX,
max_tokens_skipped: usize::MAX,
max_recoveries: usize::MAX,
}
}
}
#[derive(Debug, Clone, Default)]
pub struct BudgetTracker {
pub errors_emitted: usize,
pub current_depth: usize,
pub max_depth_reached: usize,
pub tokens_skipped: usize,
pub recoveries_attempted: usize,
}
impl BudgetTracker {
pub fn new() -> Self {
Self::default()
}
pub fn errors_exhausted(&self, budget: &ParseBudget) -> bool {
self.errors_emitted >= budget.max_errors
}
pub fn depth_would_exceed(&self, budget: &ParseBudget) -> bool {
self.current_depth >= budget.max_depth
}
pub fn skip_would_exceed(&self, budget: &ParseBudget, count: usize) -> bool {
self.tokens_skipped.saturating_add(count) > budget.max_tokens_skipped
}
pub fn recoveries_exhausted(&self, budget: &ParseBudget) -> bool {
self.recoveries_attempted >= budget.max_recoveries
}
pub fn begin_recovery(&mut self, budget: &ParseBudget) -> bool {
if self.recoveries_attempted >= budget.max_recoveries {
return false;
}
self.recoveries_attempted = self.recoveries_attempted.saturating_add(1);
true
}
pub fn can_skip_more(&self, budget: &ParseBudget, additional: usize) -> bool {
self.tokens_skipped.saturating_add(additional) <= budget.max_tokens_skipped
}
pub fn record_error(&mut self) {
self.errors_emitted = self.errors_emitted.saturating_add(1);
}
pub fn enter_depth(&mut self) {
self.current_depth = self.current_depth.saturating_add(1);
if self.current_depth > self.max_depth_reached {
self.max_depth_reached = self.current_depth;
}
}
pub fn exit_depth(&mut self) {
self.current_depth = self.current_depth.saturating_sub(1);
}
pub fn record_skip(&mut self, count: usize) {
self.tokens_skipped = self.tokens_skipped.saturating_add(count);
}
pub fn record_recovery(&mut self) {
self.recoveries_attempted = self.recoveries_attempted.saturating_add(1);
}
}
pub type ParseResult<T> = Result<T, ParseError>;
#[derive(Error, Debug, Clone, PartialEq)]
pub enum ParseError {
#[error("Unexpected end of input")]
UnexpectedEof,
#[error("expected {expected}, found {found} at position {location}")]
UnexpectedToken {
expected: String,
found: String,
location: usize,
},
#[error("Invalid syntax at position {location}: {message}")]
SyntaxError {
message: String,
location: usize,
},
#[error("Lexer error: {message}")]
LexerError {
message: String,
},
#[error("Maximum recursion depth exceeded")]
RecursionLimit,
#[error("Invalid number literal: {literal}")]
InvalidNumber {
literal: String,
},
#[error("Invalid string literal")]
InvalidString,
#[error("Unclosed delimiter: {delimiter}")]
UnclosedDelimiter {
delimiter: char,
},
#[error("Invalid regex: {message}")]
InvalidRegex {
message: String,
},
#[error("Nesting depth limit exceeded: {depth} > {max_depth}")]
NestingTooDeep {
depth: usize,
max_depth: usize,
},
#[error("Parsing cancelled")]
Cancelled,
#[error("Recovered from {kind:?} at {site:?} (position {location})")]
Recovered {
site: RecoverySite,
kind: RecoveryKind,
location: usize,
},
}
pub mod classifier;
pub mod recovery;
use perl_ast::Node;
#[derive(Debug, Clone)]
pub struct ParseOutput {
pub ast: Node,
pub diagnostics: Vec<ParseError>,
pub budget_usage: BudgetTracker,
pub terminated_early: bool,
pub recovered_count: usize,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum RecoverySalvageClass {
Clean,
StructuredRecoveryOnly,
ErrorNodesPresent,
CatastrophicFailure,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct RecoverySalvageProfile {
pub catastrophic: bool,
pub recovered_count: usize,
pub error_node_count: usize,
pub first_unrecovered_error_node: Option<String>,
pub class: RecoverySalvageClass,
}
impl RecoverySalvageProfile {
pub fn from_parse(ast: &Node, diagnostics: &[ParseError], catastrophic: bool) -> Self {
let mut error_node_count = 0usize;
let mut first_start = usize::MAX;
let mut first_unrecovered_error_node: Option<String> = None;
fn walk(
node: &Node,
error_node_count: &mut usize,
first_start: &mut usize,
first_unrecovered_error_node: &mut Option<String>,
) {
if let perl_ast::NodeKind::Error { message, .. } = &node.kind {
*error_node_count = error_node_count.saturating_add(1);
if node.location.start < *first_start {
*first_start = node.location.start;
*first_unrecovered_error_node = Some(message.clone());
}
}
node.for_each_child(|child| {
walk(child, error_node_count, first_start, first_unrecovered_error_node);
});
}
walk(ast, &mut error_node_count, &mut first_start, &mut first_unrecovered_error_node);
let recovered_count =
diagnostics.iter().filter(|e| matches!(e, ParseError::Recovered { .. })).count();
let class = if catastrophic {
RecoverySalvageClass::CatastrophicFailure
} else if error_node_count > 0 {
RecoverySalvageClass::ErrorNodesPresent
} else if recovered_count > 0 {
RecoverySalvageClass::StructuredRecoveryOnly
} else {
RecoverySalvageClass::Clean
};
Self {
catastrophic,
recovered_count,
error_node_count,
first_unrecovered_error_node,
class,
}
}
}
impl ParseOutput {
pub fn success(ast: Node) -> Self {
Self {
ast,
diagnostics: Vec::new(),
budget_usage: BudgetTracker::new(),
terminated_early: false,
recovered_count: 0,
}
}
pub fn with_errors(ast: Node, diagnostics: Vec<ParseError>) -> Self {
let mut budget_usage = BudgetTracker::new();
budget_usage.errors_emitted = diagnostics.len();
let recovered_count =
diagnostics.iter().filter(|e| matches!(e, ParseError::Recovered { .. })).count();
Self { ast, diagnostics, budget_usage, terminated_early: false, recovered_count }
}
pub fn finish(
ast: Node,
diagnostics: Vec<ParseError>,
budget_usage: BudgetTracker,
terminated_early: bool,
) -> Self {
let recovered_count =
diagnostics.iter().filter(|e| matches!(e, ParseError::Recovered { .. })).count();
Self { ast, diagnostics, budget_usage, terminated_early, recovered_count }
}
pub fn is_ok(&self) -> bool {
self.diagnostics.is_empty()
}
pub fn has_errors(&self) -> bool {
!self.diagnostics.is_empty()
}
pub fn error_count(&self) -> usize {
self.diagnostics.len()
}
}
impl ParseError {
pub fn syntax(message: impl Into<String>, location: usize) -> Self {
ParseError::SyntaxError { message: message.into(), location }
}
pub fn unexpected(
expected: impl Into<String>,
found: impl Into<String>,
location: usize,
) -> Self {
ParseError::UnexpectedToken { expected: expected.into(), found: found.into(), location }
}
pub fn location(&self) -> Option<usize> {
match self {
ParseError::UnexpectedToken { location, .. } => Some(*location),
ParseError::SyntaxError { location, .. } => Some(*location),
ParseError::Recovered { location, .. } => Some(*location),
_ => None,
}
}
pub fn suggestion(&self) -> Option<String> {
match self {
ParseError::UnexpectedToken { expected, found, .. } => {
if expected.contains(';') {
return Some("add a semicolon ';' at the end of the statement".to_string());
}
if expected.contains('}') {
return Some("add a closing brace '}' to end the block".to_string());
}
if expected.contains(')') {
return Some("add a closing parenthesis ')' to end the group".to_string());
}
if expected.contains(']') {
return Some("add a closing bracket ']' to end the array".to_string());
}
if expected.contains("expression") && found.contains("=>") {
return Some(
"'=>' (fat arrow) is not valid here; \
did you forget a value before it?"
.to_string(),
);
}
if expected.contains("expression") && found.contains("->") {
return Some(
"'->' (arrow) is not valid here; \
did you forget the object or reference before it?"
.to_string(),
);
}
if expected.to_lowercase().contains("variable") {
return Some(
"expected a variable like $foo, @bar, or %hash after the declaration keyword"
.to_string(),
);
}
None
}
ParseError::UnclosedDelimiter { delimiter } => {
Some(format!("add closing '{}' to complete the literal", delimiter))
}
_ => None,
}
}
}
pub fn get_error_contexts(errors: &[ParseError], source: &str) -> Vec<ErrorContext> {
let index = LineIndex::new(source.to_string());
errors
.iter()
.map(|error| {
let loc = error.location().unwrap_or(source.len());
let safe_loc = std::cmp::min(loc, source.len());
let (line_u32, col_u32) = index.offset_to_position(safe_loc);
let line = line_u32 as usize;
let col = col_u32 as usize;
let source_line = source.lines().nth(line).unwrap_or("").to_string();
ErrorContext {
error: error.clone(),
line,
column: col,
source_line,
suggestion: error.suggestion(),
}
})
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_budget_defaults() {
let budget = ParseBudget::default();
assert_eq!(budget.max_errors, 100);
assert_eq!(budget.max_depth, 256);
assert_eq!(budget.max_tokens_skipped, 1000);
assert_eq!(budget.max_recoveries, 500);
}
#[test]
fn test_parse_budget_strict() {
let budget = ParseBudget::strict();
assert_eq!(budget.max_errors, 10);
assert_eq!(budget.max_depth, 64);
assert_eq!(budget.max_tokens_skipped, 100);
assert_eq!(budget.max_recoveries, 50);
}
#[test]
fn test_budget_tracker_errors() {
let budget = ParseBudget { max_errors: 3, ..Default::default() };
let mut tracker = BudgetTracker::new();
assert!(!tracker.errors_exhausted(&budget));
tracker.record_error();
tracker.record_error();
assert!(!tracker.errors_exhausted(&budget));
tracker.record_error();
assert!(tracker.errors_exhausted(&budget));
}
#[test]
fn test_budget_tracker_depth() {
let budget = ParseBudget { max_depth: 2, ..Default::default() };
let mut tracker = BudgetTracker::new();
assert!(!tracker.depth_would_exceed(&budget));
tracker.enter_depth();
assert!(!tracker.depth_would_exceed(&budget));
tracker.enter_depth();
assert!(tracker.depth_would_exceed(&budget));
tracker.exit_depth();
assert!(!tracker.depth_would_exceed(&budget));
}
#[test]
fn test_budget_tracker_skip() {
let budget = ParseBudget { max_tokens_skipped: 5, ..Default::default() };
let mut tracker = BudgetTracker::new();
assert!(!tracker.skip_would_exceed(&budget, 3));
tracker.record_skip(3);
assert!(!tracker.skip_would_exceed(&budget, 2));
assert!(tracker.skip_would_exceed(&budget, 3));
}
#[test]
fn test_budget_tracker_recoveries() {
let budget = ParseBudget { max_recoveries: 2, ..Default::default() };
let mut tracker = BudgetTracker::new();
assert!(!tracker.recoveries_exhausted(&budget));
tracker.record_recovery();
assert!(!tracker.recoveries_exhausted(&budget));
tracker.record_recovery();
assert!(tracker.recoveries_exhausted(&budget));
}
#[test]
fn test_parse_output_success() {
use perl_ast::{Node, NodeKind, SourceLocation};
let ast = Node::new(
NodeKind::Program { statements: vec![] },
SourceLocation { start: 0, end: 0 },
);
let output = ParseOutput::success(ast);
assert!(output.is_ok());
assert!(!output.has_errors());
assert_eq!(output.error_count(), 0);
assert!(!output.terminated_early);
}
#[test]
fn test_parse_output_with_errors() {
use perl_ast::{Node, NodeKind, SourceLocation};
let ast = Node::new(
NodeKind::Program { statements: vec![] },
SourceLocation { start: 0, end: 0 },
);
let errors = vec![ParseError::syntax("error 1", 0), ParseError::syntax("error 2", 5)];
let output = ParseOutput::with_errors(ast, errors);
assert!(!output.is_ok());
assert!(output.has_errors());
assert_eq!(output.error_count(), 2);
}
#[test]
fn test_parse_output_finish_preserves_tracker() {
use perl_ast::{Node, NodeKind, SourceLocation};
let ast = Node::new(
NodeKind::Program { statements: vec![] },
SourceLocation { start: 0, end: 0 },
);
let errors = vec![ParseError::syntax("error 1", 0)];
let mut tracker = BudgetTracker::new();
tracker.errors_emitted = 5;
tracker.tokens_skipped = 42;
tracker.recoveries_attempted = 3;
tracker.max_depth_reached = 10;
let output = ParseOutput::finish(ast, errors, tracker, true);
assert_eq!(output.budget_usage.errors_emitted, 5);
assert_eq!(output.budget_usage.tokens_skipped, 42);
assert_eq!(output.budget_usage.recoveries_attempted, 3);
assert_eq!(output.budget_usage.max_depth_reached, 10);
assert!(output.terminated_early);
assert_eq!(output.error_count(), 1);
}
#[test]
fn test_begin_recovery_checks_budget_first() {
let budget = ParseBudget { max_recoveries: 0, ..Default::default() };
let mut tracker = BudgetTracker::new();
assert!(!tracker.begin_recovery(&budget));
assert_eq!(tracker.recoveries_attempted, 0);
}
#[test]
fn test_can_skip_more_boundary_conditions() {
let budget = ParseBudget { max_tokens_skipped: 10, ..Default::default() };
let mut tracker = BudgetTracker::new();
assert!(tracker.can_skip_more(&budget, 10));
assert!(!tracker.can_skip_more(&budget, 11));
tracker.record_skip(5);
assert!(tracker.can_skip_more(&budget, 5));
assert!(!tracker.can_skip_more(&budget, 6));
tracker.record_skip(5);
assert!(!tracker.can_skip_more(&budget, 1));
assert!(tracker.can_skip_more(&budget, 0));
}
#[test]
fn test_error_context_enrichment() {
let source = "line1\nline2;\nline3";
let errors = vec![ParseError::unexpected("';'", "newline", 5)];
let contexts = get_error_contexts(&errors, source);
assert_eq!(contexts.len(), 1);
assert_eq!(contexts[0].line, 0); assert_eq!(contexts[0].source_line, "line1");
let suggestion = contexts[0].suggestion.as_deref().unwrap_or("");
assert!(suggestion.contains("semicolon"));
}
#[test]
fn test_recovery_site_and_kind_variants() {
let sites = [
RecoverySite::ArgList,
RecoverySite::ArraySubscript,
RecoverySite::HashSubscript,
RecoverySite::PostfixChain,
RecoverySite::InfixRhs,
];
let kinds = [
RecoveryKind::InsertedCloser,
RecoveryKind::MissingOperand,
RecoveryKind::TruncatedChain,
RecoveryKind::InferredSemicolon,
];
for s in &sites {
let _ = format!("{s:?}");
let _ = s.clone();
}
for k in &kinds {
let _ = format!("{k:?}");
let _ = k.clone();
}
assert_eq!(RecoverySite::ArgList, RecoverySite::ArgList);
assert_ne!(RecoverySite::ArgList, RecoverySite::PostfixChain);
assert_eq!(RecoveryKind::InsertedCloser, RecoveryKind::InsertedCloser);
assert_ne!(RecoveryKind::InsertedCloser, RecoveryKind::MissingOperand);
}
#[test]
fn test_parse_error_recovered_variant() {
let err = ParseError::Recovered {
site: RecoverySite::ArgList,
kind: RecoveryKind::InsertedCloser,
location: 42,
};
assert_eq!(err.location(), Some(42));
assert!(err.suggestion().is_none());
let s = format!("{err}");
assert!(s.contains("Recovered") || s.contains("position 42"));
}
#[test]
fn test_parse_output_recovered_count_with_errors() {
use perl_ast::{Node, NodeKind, SourceLocation};
let ast = Node::new(
NodeKind::Program { statements: vec![] },
SourceLocation { start: 0, end: 0 },
);
let errors = vec![
ParseError::syntax("error 1", 0),
ParseError::Recovered {
site: RecoverySite::ArgList,
kind: RecoveryKind::MissingOperand,
location: 10,
},
ParseError::Recovered {
site: RecoverySite::PostfixChain,
kind: RecoveryKind::TruncatedChain,
location: 20,
},
];
let output = ParseOutput::with_errors(ast, errors);
assert_eq!(output.error_count(), 3);
assert_eq!(output.recovered_count, 2);
}
#[test]
fn test_parse_output_success_has_zero_recovered_count() {
use perl_ast::{Node, NodeKind, SourceLocation};
let ast = Node::new(
NodeKind::Program { statements: vec![] },
SourceLocation { start: 0, end: 0 },
);
let output = ParseOutput::success(ast);
assert_eq!(output.recovered_count, 0);
}
#[test]
fn test_parse_output_finish_recovered_count() {
use perl_ast::{Node, NodeKind, SourceLocation};
let ast = Node::new(
NodeKind::Program { statements: vec![] },
SourceLocation { start: 0, end: 0 },
);
let errors = vec![
ParseError::syntax("error", 0),
ParseError::Recovered {
site: RecoverySite::InfixRhs,
kind: RecoveryKind::InferredSemicolon,
location: 5,
},
];
let tracker = BudgetTracker::new();
let output = ParseOutput::finish(ast, errors, tracker, false);
assert_eq!(output.recovered_count, 1);
assert!(!output.terminated_early);
}
}