use crate::Token;
use perl_position_tracking::Position;
#[derive(Debug, Clone)]
pub struct TokenWithPosition {
pub token: Token,
pub start_pos: Position,
pub end_pos: Position,
}
impl TokenWithPosition {
pub fn new(token: Token, start_pos: Position, end_pos: Position) -> Self {
TokenWithPosition { token, start_pos, end_pos }
}
pub fn kind(&self) -> &crate::TokenType {
&self.token.token_type
}
pub fn text(&self) -> &str {
&self.token.text
}
pub fn byte_range(&self) -> (usize, usize) {
(self.token.start, self.token.end)
}
pub fn range(&self) -> perl_position_tracking::Range {
perl_position_tracking::Range::new(self.start_pos, self.end_pos)
}
}
pub struct PositionTracker<'a> {
source: &'a str,
line_starts: Vec<usize>,
}
impl<'a> PositionTracker<'a> {
pub fn new(source: &'a str) -> Self {
let mut line_starts = vec![0];
for (i, ch) in source.char_indices() {
if ch == '\n' {
line_starts.push(i + 1);
}
}
PositionTracker { source, line_starts }
}
pub fn byte_to_position(&self, byte: usize) -> Position {
let line = match self.line_starts.binary_search(&byte) {
Ok(line) => line,
Err(line) => line.saturating_sub(1),
};
let line_start = self.line_starts[line];
let column = self.calculate_column(line_start, byte);
Position::new(byte, (line + 1) as u32, column)
}
fn calculate_column(&self, line_start: usize, byte: usize) -> u32 {
let byte = self.clamp_to_char_boundary(byte);
let line_slice = &self.source[line_start..byte];
(line_slice.chars().count() + 1) as u32
}
fn clamp_to_char_boundary(&self, byte: usize) -> usize {
let mut clamped = byte.min(self.source.len());
while clamped > 0 && !self.source.is_char_boundary(clamped) {
clamped -= 1;
}
clamped
}
pub fn wrap_token(&self, token: Token) -> TokenWithPosition {
let start_pos = self.byte_to_position(token.start);
let end_pos = self.byte_to_position(token.end);
TokenWithPosition::new(token, start_pos, end_pos)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{Token, TokenType};
use std::sync::Arc;
#[test]
fn test_position_tracker() {
let source = "hello\nworld\n";
let tracker = PositionTracker::new(source);
let pos = tracker.byte_to_position(0);
assert_eq!(pos.line, 1);
assert_eq!(pos.column, 1);
let pos = tracker.byte_to_position(3);
assert_eq!(pos.line, 1);
assert_eq!(pos.column, 4);
let pos = tracker.byte_to_position(6);
assert_eq!(pos.line, 2);
assert_eq!(pos.column, 1);
}
#[test]
fn test_token_wrapping() {
let source = "my $x";
let tracker = PositionTracker::new(source);
let token = Token::new(TokenType::Keyword(Arc::from("my")), Arc::from("my"), 0, 2);
let wrapped = tracker.wrap_token(token);
assert_eq!(wrapped.start_pos.line, 1);
assert_eq!(wrapped.start_pos.column, 1);
assert_eq!(wrapped.end_pos.column, 3);
}
#[test]
fn test_byte_to_position_handles_non_char_boundary_offsets() {
let source = "éa\n";
let tracker = PositionTracker::new(source);
let pos = tracker.byte_to_position(1);
assert_eq!(pos.line, 1);
assert_eq!(pos.column, 1);
let pos = tracker.byte_to_position(2);
assert_eq!(pos.line, 1);
assert_eq!(pos.column, 2);
}
}