Skip to main content

perl_lexer/tokenizer/
token_wrapper.rs

1//! Token wrapper with enhanced position tracking
2//!
3//! This module provides a wrapper around lexer tokens that adds
4//! line and column information for incremental parsing support.
5
6use crate::Token;
7use perl_position_tracking::Position;
8
9/// Token with full position information
10#[derive(Debug, Clone)]
11pub struct TokenWithPosition {
12    /// The original token
13    pub token: Token,
14    /// Start position with line/column
15    pub start_pos: Position,
16    /// End position with line/column
17    pub end_pos: Position,
18}
19
20impl TokenWithPosition {
21    /// Create a new token with position
22    pub fn new(token: Token, start_pos: Position, end_pos: Position) -> Self {
23        TokenWithPosition { token, start_pos, end_pos }
24    }
25
26    /// Get the token type
27    pub fn kind(&self) -> &crate::TokenType {
28        &self.token.token_type
29    }
30
31    /// Get the token text
32    pub fn text(&self) -> &str {
33        &self.token.text
34    }
35
36    /// Get byte range
37    pub fn byte_range(&self) -> (usize, usize) {
38        (self.token.start, self.token.end)
39    }
40
41    /// Get the position range
42    pub fn range(&self) -> perl_position_tracking::Range {
43        perl_position_tracking::Range::new(self.start_pos, self.end_pos)
44    }
45}
46
47/// Position tracker for converting byte offsets to line/column
48pub struct PositionTracker<'a> {
49    source: &'a str,
50    line_starts: Vec<usize>,
51}
52
53impl<'a> PositionTracker<'a> {
54    /// Create a new position tracker for the given source
55    pub fn new(source: &'a str) -> Self {
56        let mut line_starts = vec![0];
57
58        for (i, ch) in source.char_indices() {
59            if ch == '\n' {
60                line_starts.push(i + 1);
61            }
62        }
63
64        PositionTracker { source, line_starts }
65    }
66
67    /// Convert a byte offset to a Position
68    pub fn byte_to_position(&self, byte: usize) -> Position {
69        // Binary search for the line
70        let line = match self.line_starts.binary_search(&byte) {
71            Ok(line) => line,
72            Err(line) => line.saturating_sub(1),
73        };
74
75        let line_start = self.line_starts[line];
76        let column = self.calculate_column(line_start, byte);
77
78        Position::new(byte, (line + 1) as u32, column)
79    }
80
81    /// Calculate column number accounting for UTF-8
82    fn calculate_column(&self, line_start: usize, byte: usize) -> u32 {
83        let byte = self.clamp_to_char_boundary(byte);
84        let line_slice = &self.source[line_start..byte];
85        (line_slice.chars().count() + 1) as u32
86    }
87
88    fn clamp_to_char_boundary(&self, byte: usize) -> usize {
89        let mut clamped = byte.min(self.source.len());
90        while clamped > 0 && !self.source.is_char_boundary(clamped) {
91            clamped -= 1;
92        }
93        clamped
94    }
95
96    /// Wrap a token with position information
97    pub fn wrap_token(&self, token: Token) -> TokenWithPosition {
98        let start_pos = self.byte_to_position(token.start);
99        let end_pos = self.byte_to_position(token.end);
100        TokenWithPosition::new(token, start_pos, end_pos)
101    }
102}
103
104#[cfg(test)]
105mod tests {
106    use super::*;
107    use crate::{Token, TokenType};
108    use std::sync::Arc;
109
110    #[test]
111    fn test_position_tracker() {
112        let source = "hello\nworld\n";
113        let tracker = PositionTracker::new(source);
114
115        // First line
116        let pos = tracker.byte_to_position(0);
117        assert_eq!(pos.line, 1);
118        assert_eq!(pos.column, 1);
119
120        let pos = tracker.byte_to_position(3);
121        assert_eq!(pos.line, 1);
122        assert_eq!(pos.column, 4);
123
124        // Second line
125        let pos = tracker.byte_to_position(6);
126        assert_eq!(pos.line, 2);
127        assert_eq!(pos.column, 1);
128    }
129
130    #[test]
131    fn test_token_wrapping() {
132        let source = "my $x";
133        let tracker = PositionTracker::new(source);
134
135        let token = Token::new(TokenType::Keyword(Arc::from("my")), Arc::from("my"), 0, 2);
136
137        let wrapped = tracker.wrap_token(token);
138        assert_eq!(wrapped.start_pos.line, 1);
139        assert_eq!(wrapped.start_pos.column, 1);
140        assert_eq!(wrapped.end_pos.column, 3);
141    }
142
143    #[test]
144    fn test_byte_to_position_handles_non_char_boundary_offsets() {
145        let source = "éa\n";
146        let tracker = PositionTracker::new(source);
147
148        let pos = tracker.byte_to_position(1);
149        assert_eq!(pos.line, 1);
150        assert_eq!(pos.column, 1);
151
152        let pos = tracker.byte_to_position(2);
153        assert_eq!(pos.line, 1);
154        assert_eq!(pos.column, 2);
155    }
156}