perl-lexer 0.13.2

High-performance Perl lexer with context-aware tokenization
Documentation
//! Token wrapper with enhanced position tracking
//!
//! This module provides a wrapper around lexer tokens that adds
//! line and column information for incremental parsing support.

use crate::Token;
use perl_position_tracking::Position;

/// Token with full position information
#[derive(Debug, Clone)]
pub struct TokenWithPosition {
    /// The original token
    pub token: Token,
    /// Start position with line/column
    pub start_pos: Position,
    /// End position with line/column
    pub end_pos: Position,
}

impl TokenWithPosition {
    /// Create a new token with position
    pub fn new(token: Token, start_pos: Position, end_pos: Position) -> Self {
        TokenWithPosition { token, start_pos, end_pos }
    }

    /// Get the token type
    pub fn kind(&self) -> &crate::TokenType {
        &self.token.token_type
    }

    /// Get the token text
    pub fn text(&self) -> &str {
        &self.token.text
    }

    /// Get byte range
    pub fn byte_range(&self) -> (usize, usize) {
        (self.token.start, self.token.end)
    }

    /// Get the position range
    pub fn range(&self) -> perl_position_tracking::Range {
        perl_position_tracking::Range::new(self.start_pos, self.end_pos)
    }
}

/// Position tracker for converting byte offsets to line/column
pub struct PositionTracker<'a> {
    source: &'a str,
    line_starts: Vec<usize>,
}

impl<'a> PositionTracker<'a> {
    /// Create a new position tracker for the given source
    pub fn new(source: &'a str) -> Self {
        let mut line_starts = vec![0];

        for (i, ch) in source.char_indices() {
            if ch == '\n' {
                line_starts.push(i + 1);
            }
        }

        PositionTracker { source, line_starts }
    }

    /// Convert a byte offset to a Position
    pub fn byte_to_position(&self, byte: usize) -> Position {
        // Binary search for the line
        let line = match self.line_starts.binary_search(&byte) {
            Ok(line) => line,
            Err(line) => line.saturating_sub(1),
        };

        let line_start = self.line_starts[line];
        let column = self.calculate_column(line_start, byte);

        Position::new(byte, (line + 1) as u32, column)
    }

    /// Calculate column number accounting for UTF-8
    fn calculate_column(&self, line_start: usize, byte: usize) -> u32 {
        let byte = self.clamp_to_char_boundary(byte);
        let line_slice = &self.source[line_start..byte];
        (line_slice.chars().count() + 1) as u32
    }

    fn clamp_to_char_boundary(&self, byte: usize) -> usize {
        let mut clamped = byte.min(self.source.len());
        while clamped > 0 && !self.source.is_char_boundary(clamped) {
            clamped -= 1;
        }
        clamped
    }

    /// Wrap a token with position information
    pub fn wrap_token(&self, token: Token) -> TokenWithPosition {
        let start_pos = self.byte_to_position(token.start);
        let end_pos = self.byte_to_position(token.end);
        TokenWithPosition::new(token, start_pos, end_pos)
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::{Token, TokenType};
    use std::sync::Arc;

    #[test]
    fn test_position_tracker() {
        let source = "hello\nworld\n";
        let tracker = PositionTracker::new(source);

        // First line
        let pos = tracker.byte_to_position(0);
        assert_eq!(pos.line, 1);
        assert_eq!(pos.column, 1);

        let pos = tracker.byte_to_position(3);
        assert_eq!(pos.line, 1);
        assert_eq!(pos.column, 4);

        // Second line
        let pos = tracker.byte_to_position(6);
        assert_eq!(pos.line, 2);
        assert_eq!(pos.column, 1);
    }

    #[test]
    fn test_token_wrapping() {
        let source = "my $x";
        let tracker = PositionTracker::new(source);

        let token = Token::new(TokenType::Keyword(Arc::from("my")), Arc::from("my"), 0, 2);

        let wrapped = tracker.wrap_token(token);
        assert_eq!(wrapped.start_pos.line, 1);
        assert_eq!(wrapped.start_pos.column, 1);
        assert_eq!(wrapped.end_pos.column, 3);
    }

    #[test]
    fn test_byte_to_position_handles_non_char_boundary_offsets() {
        let source = "éa\n";
        let tracker = PositionTracker::new(source);

        let pos = tracker.byte_to_position(1);
        assert_eq!(pos.line, 1);
        assert_eq!(pos.column, 1);

        let pos = tracker.byte_to_position(2);
        assert_eq!(pos.line, 1);
        assert_eq!(pos.column, 2);
    }
}