Skip to main content

perl_tokenizer/
token_wrapper.rs

1//! Token wrapper with enhanced position tracking
2//!
3//! This module provides a wrapper around lexer tokens that adds
4//! line and column information for incremental parsing support.
5
6use perl_lexer::Token;
7use perl_position_tracking::Position;
8
9/// Token with full position information
10#[derive(Debug, Clone)]
11pub struct TokenWithPosition {
12    /// The original token
13    pub token: Token,
14    /// Start position with line/column
15    pub start_pos: Position,
16    /// End position with line/column
17    pub end_pos: Position,
18}
19
20impl TokenWithPosition {
21    /// Create a new token with position
22    pub fn new(token: Token, start_pos: Position, end_pos: Position) -> Self {
23        TokenWithPosition { token, start_pos, end_pos }
24    }
25
26    /// Get the token type
27    pub fn kind(&self) -> &perl_lexer::TokenType {
28        &self.token.token_type
29    }
30
31    /// Get the token text
32    pub fn text(&self) -> &str {
33        &self.token.text
34    }
35
36    /// Get byte range
37    pub fn byte_range(&self) -> (usize, usize) {
38        (self.token.start, self.token.end)
39    }
40
41    /// Get the position range
42    pub fn range(&self) -> perl_position_tracking::Range {
43        perl_position_tracking::Range::new(self.start_pos, self.end_pos)
44    }
45}
46
47/// Position tracker for converting byte offsets to line/column
48pub struct PositionTracker<'a> {
49    source: &'a str,
50    line_starts: Vec<usize>,
51}
52
53impl<'a> PositionTracker<'a> {
54    /// Create a new position tracker for the given source
55    pub fn new(source: &'a str) -> Self {
56        let mut line_starts = vec![0];
57
58        for (i, ch) in source.char_indices() {
59            if ch == '\n' {
60                line_starts.push(i + 1);
61            }
62        }
63
64        PositionTracker { source, line_starts }
65    }
66
67    /// Convert a byte offset to a Position
68    pub fn byte_to_position(&self, byte: usize) -> Position {
69        // Binary search for the line
70        let line = match self.line_starts.binary_search(&byte) {
71            Ok(line) => line,
72            Err(line) => line.saturating_sub(1),
73        };
74
75        let line_start = self.line_starts[line];
76        let column = self.calculate_column(line_start, byte);
77
78        Position::new(byte, (line + 1) as u32, column)
79    }
80
81    /// Calculate column number accounting for UTF-8
82    fn calculate_column(&self, line_start: usize, byte: usize) -> u32 {
83        let line_slice = &self.source[line_start..byte.min(self.source.len())];
84        (line_slice.chars().count() + 1) as u32
85    }
86
87    /// Wrap a token with position information
88    pub fn wrap_token(&self, token: Token) -> TokenWithPosition {
89        let start_pos = self.byte_to_position(token.start);
90        let end_pos = self.byte_to_position(token.end);
91        TokenWithPosition::new(token, start_pos, end_pos)
92    }
93}
94
95#[cfg(test)]
96mod tests {
97    use super::*;
98    use perl_lexer::{Token, TokenType};
99    use std::sync::Arc;
100
101    #[test]
102    fn test_position_tracker() {
103        let source = "hello\nworld\n";
104        let tracker = PositionTracker::new(source);
105
106        // First line
107        let pos = tracker.byte_to_position(0);
108        assert_eq!(pos.line, 1);
109        assert_eq!(pos.column, 1);
110
111        let pos = tracker.byte_to_position(3);
112        assert_eq!(pos.line, 1);
113        assert_eq!(pos.column, 4);
114
115        // Second line
116        let pos = tracker.byte_to_position(6);
117        assert_eq!(pos.line, 2);
118        assert_eq!(pos.column, 1);
119    }
120
121    #[test]
122    fn test_token_wrapping() {
123        let source = "my $x";
124        let tracker = PositionTracker::new(source);
125
126        let token = Token::new(TokenType::Keyword(Arc::from("my")), Arc::from("my"), 0, 2);
127
128        let wrapped = tracker.wrap_token(token);
129        assert_eq!(wrapped.start_pos.line, 1);
130        assert_eq!(wrapped.start_pos.column, 1);
131        assert_eq!(wrapped.end_pos.column, 3);
132    }
133}