use crate::{
error::{BudgetTracker, ParseBudget},
error_recovery::ParseError,
position::{Position, Range},
token_wrapper::TokenWithPosition,
};
use perl_ast_v2::NodeIdGenerator;
use perl_lexer::TokenType;
use perl_position_tracking::LineStartsCache;
use std::collections::VecDeque;
pub struct ParserContext {
tokens: VecDeque<TokenWithPosition>,
current: usize,
pub id_generator: NodeIdGenerator,
errors: Vec<ParseError>,
source: String,
_position_tracker: PositionTracker,
budget: ParseBudget,
budget_tracker: BudgetTracker,
}
struct PositionTracker {
line_cache: LineStartsCache,
source: String,
}
impl PositionTracker {
fn new(source: String) -> Self {
let line_cache = LineStartsCache::new(&source);
PositionTracker { line_cache, source }
}
fn byte_to_position(&self, byte_offset: usize) -> Position {
let (line, character) = self.line_cache.offset_to_position(&self.source, byte_offset);
Position::new(byte_offset, line + 1, character + 1)
}
}
impl ParserContext {
pub fn new(source: String) -> Self {
let mut tokens = VecDeque::new();
let position_tracker = PositionTracker::new(source.clone());
let mut lexer = perl_lexer::PerlLexer::new(&source);
loop {
match lexer.next_token() {
Some(token) => {
if matches!(token.token_type, TokenType::EOF) {
break;
}
let start = token.start;
let end = token.end;
let start_pos = position_tracker.byte_to_position(start);
let end_pos = position_tracker.byte_to_position(end);
tokens.push_back(TokenWithPosition::new(token, start_pos, end_pos));
}
None => break,
}
}
ParserContext {
tokens,
current: 0,
id_generator: NodeIdGenerator::new(),
errors: Vec::new(),
source,
_position_tracker: position_tracker,
budget: ParseBudget::default(),
budget_tracker: BudgetTracker::new(),
}
}
pub fn with_budget(source: String, budget: ParseBudget) -> Self {
let mut ctx = Self::new(source);
ctx.budget = budget;
ctx
}
pub fn budget(&self) -> &ParseBudget {
&self.budget
}
pub fn budget_tracker(&self) -> &BudgetTracker {
&self.budget_tracker
}
pub fn budget_tracker_mut(&mut self) -> &mut BudgetTracker {
&mut self.budget_tracker
}
pub fn errors_exhausted(&self) -> bool {
self.budget_tracker.errors_exhausted(&self.budget)
}
pub fn depth_would_exceed(&self) -> bool {
self.budget_tracker.depth_would_exceed(&self.budget)
}
pub fn enter_depth(&mut self) -> bool {
if self.depth_would_exceed() {
return false;
}
self.budget_tracker.enter_depth();
true
}
pub fn exit_depth(&mut self) {
self.budget_tracker.exit_depth();
}
pub fn current_token(&self) -> Option<&TokenWithPosition> {
self.tokens.get(self.current)
}
pub fn peek_token(&self, offset: usize) -> Option<&TokenWithPosition> {
self.tokens.get(self.current + offset)
}
pub fn advance(&mut self) -> Option<&TokenWithPosition> {
if self.current < self.tokens.len() {
self.current += 1;
}
self.current_token()
}
pub fn is_eof(&self) -> bool {
self.current >= self.tokens.len()
}
pub fn current_position(&self) -> Position {
if let Some(token) = self.current_token() {
token.range().start
} else if let Some(last_token) = self.tokens.back() {
last_token.range().end
} else {
Position::new(0, 1, 1)
}
}
pub fn current_position_range(&self) -> Range {
if let Some(token) = self.current_token() {
token.range()
} else {
let pos = self.current_position();
Range::new(pos, pos)
}
}
pub fn add_error(&mut self, error: ParseError) -> bool {
if self.errors_exhausted() {
return false;
}
self.errors.push(error);
self.budget_tracker.record_error();
true
}
pub fn add_error_unchecked(&mut self, error: ParseError) {
self.errors.push(error);
self.budget_tracker.record_error();
}
pub fn take_errors(&mut self) -> Vec<ParseError> {
std::mem::take(&mut self.errors)
}
pub fn current_index(&self) -> usize {
self.current
}
pub fn set_index(&mut self, index: usize) {
self.current = index.min(self.tokens.len());
}
pub fn expect(&mut self, expected: TokenType) -> Result<&TokenWithPosition, ParseError> {
match self.current_token() {
Some(token) if token.token.token_type == expected => {
self.advance();
Ok(&self.tokens[self.current - 1])
}
Some(token) => Err(ParseError::new(
format!("Expected {:?}, found {:?}", expected, token.token.token_type),
token.range(),
)
.with_expected(vec![format!("{:?}", expected)])
.with_found(format!("{:?}", token.token.token_type))),
None => Err(ParseError::new(
format!("Expected {:?}, found end of file", expected),
self.current_position_range(),
)
.with_expected(vec![format!("{:?}", expected)])
.with_found("EOF".to_string())),
}
}
pub fn check(&self, token_type: &TokenType) -> bool {
self.current_token().map(|t| &t.token.token_type == token_type).unwrap_or(false)
}
pub fn consume(&mut self, token_type: &TokenType) -> bool {
if self.check(token_type) {
self.advance();
true
} else {
false
}
}
pub fn source_slice(&self, range: &Range) -> &str {
&self.source[range.start.byte..range.end.byte]
}
}
#[cfg(test)]
mod tests {
use super::*;
use perl_tdd_support::must_some;
#[test]
fn test_parser_context_creation() {
let source = "my $x = 42;".to_string();
let ctx = ParserContext::new(source);
assert!(!ctx.is_eof());
assert!(!ctx.tokens.is_empty());
}
#[test]
fn test_token_advancement() {
let source = "my $x".to_string();
let mut ctx = ParserContext::new(source);
assert!(matches!(
ctx.current_token().map(|t| &t.token.token_type),
Some(TokenType::Keyword(k)) if k.as_ref() == "my"
));
ctx.advance();
assert!(ctx.current_token().is_some());
}
#[test]
fn test_error_accumulation() {
let mut ctx = ParserContext::new("test".to_string());
let error1 = ParseError::new("Error 1".to_string(), ctx.current_position_range());
let error2 = ParseError::new("Error 2".to_string(), ctx.current_position_range());
ctx.add_error(error1);
ctx.add_error(error2);
let errors = ctx.take_errors();
assert_eq!(errors.len(), 2);
assert_eq!(errors[0].message, "Error 1");
assert_eq!(errors[1].message, "Error 2");
}
#[test]
fn test_multiline_positions() {
let source = "my $x = 42;\nmy $y = 43;".to_string();
let ctx = ParserContext::new(source.clone());
let first_offset = must_some(source.find("my"));
let second_offset = must_some(source.rfind("my"));
let first = must_some(ctx.tokens.iter().find(|t| t.range().start.byte == first_offset));
assert_eq!(first.range().start.line, 1);
assert_eq!(first.range().start.column, 1);
assert_eq!(first.range().end.line, 1);
assert_eq!(first.range().end.column, 3);
let second = must_some(ctx.tokens.iter().find(|t| t.range().start.byte == second_offset));
assert_eq!(second.range().start.line, 2);
assert_eq!(second.range().start.column, 1);
assert_eq!(second.range().end.line, 2);
assert_eq!(second.range().end.column, 3);
}
#[test]
fn test_multiline_string_token_positions() {
let source = "my $s = \"a\nb\";".to_string();
let ctx = ParserContext::new(source.clone());
let string_offset = must_some(source.find('"'));
let token = must_some(ctx.tokens.iter().find(|t| t.range().start.byte == string_offset));
assert_eq!(token.range().start.line, 1);
assert_eq!(token.range().start.column, 9);
assert_eq!(token.range().end.line, 2);
assert_eq!(token.range().end.column, 3);
}
#[test]
fn test_utf16_position_mapping() {
let source = "my $emoji = 😀;".to_string();
let ctx = ParserContext::new(source.clone());
let equals_offset = must_some(source.find('='));
let equals_token =
must_some(ctx.tokens.iter().find(|t| t.range().start.byte == equals_offset));
assert_eq!(equals_token.range().start.line, 1);
assert!(equals_token.range().start.column > 0);
}
#[test]
fn test_crlf_line_endings() {
let source = "my $x = 42;\r\nmy $y = 43;".to_string();
let ctx = ParserContext::new(source.clone());
let first_offset = must_some(source.find("my"));
let second_offset = must_some(source.rfind("my"));
let first = must_some(ctx.tokens.iter().find(|t| t.range().start.byte == first_offset));
assert_eq!(first.range().start.line, 1);
assert_eq!(first.range().start.column, 1);
let second = must_some(ctx.tokens.iter().find(|t| t.range().start.byte == second_offset));
assert_eq!(second.range().start.line, 2);
assert_eq!(second.range().start.column, 1);
}
#[test]
fn test_empty_source() {
let source = "".to_string();
let ctx = ParserContext::new(source);
assert!(ctx.tokens.is_empty());
assert!(ctx.is_eof());
}
#[test]
fn test_single_token() {
let source = "42".to_string();
let ctx = ParserContext::new(source);
assert_eq!(ctx.tokens.len(), 1);
let token = &ctx.tokens[0];
assert_eq!(token.range().start.byte, 0);
assert_eq!(token.range().start.line, 1);
assert_eq!(token.range().start.column, 1);
assert_eq!(token.range().end.byte, 2);
assert_eq!(token.range().end.line, 1);
assert_eq!(token.range().end.column, 3);
}
}