perl_lexer/tokenizer/
token_wrapper.rs1use crate::Token;
7use perl_position_tracking::Position;
8
9#[derive(Debug, Clone)]
11pub struct TokenWithPosition {
12 pub token: Token,
14 pub start_pos: Position,
16 pub end_pos: Position,
18}
19
20impl TokenWithPosition {
21 pub fn new(token: Token, start_pos: Position, end_pos: Position) -> Self {
23 TokenWithPosition { token, start_pos, end_pos }
24 }
25
26 pub fn kind(&self) -> &crate::TokenType {
28 &self.token.token_type
29 }
30
31 pub fn text(&self) -> &str {
33 &self.token.text
34 }
35
36 pub fn byte_range(&self) -> (usize, usize) {
38 (self.token.start, self.token.end)
39 }
40
41 pub fn range(&self) -> perl_position_tracking::Range {
43 perl_position_tracking::Range::new(self.start_pos, self.end_pos)
44 }
45}
46
47pub struct PositionTracker<'a> {
49 source: &'a str,
50 line_starts: Vec<usize>,
51}
52
53impl<'a> PositionTracker<'a> {
54 pub fn new(source: &'a str) -> Self {
56 let mut line_starts = vec![0];
57
58 for (i, ch) in source.char_indices() {
59 if ch == '\n' {
60 line_starts.push(i + 1);
61 }
62 }
63
64 PositionTracker { source, line_starts }
65 }
66
67 pub fn byte_to_position(&self, byte: usize) -> Position {
69 let line = match self.line_starts.binary_search(&byte) {
71 Ok(line) => line,
72 Err(line) => line.saturating_sub(1),
73 };
74
75 let line_start = self.line_starts[line];
76 let column = self.calculate_column(line_start, byte);
77
78 Position::new(byte, (line + 1) as u32, column)
79 }
80
81 fn calculate_column(&self, line_start: usize, byte: usize) -> u32 {
83 let byte = self.clamp_to_char_boundary(byte);
84 let line_slice = &self.source[line_start..byte];
85 (line_slice.chars().count() + 1) as u32
86 }
87
88 fn clamp_to_char_boundary(&self, byte: usize) -> usize {
89 let mut clamped = byte.min(self.source.len());
90 while clamped > 0 && !self.source.is_char_boundary(clamped) {
91 clamped -= 1;
92 }
93 clamped
94 }
95
96 pub fn wrap_token(&self, token: Token) -> TokenWithPosition {
98 let start_pos = self.byte_to_position(token.start);
99 let end_pos = self.byte_to_position(token.end);
100 TokenWithPosition::new(token, start_pos, end_pos)
101 }
102}
103
104#[cfg(test)]
105mod tests {
106 use super::*;
107 use crate::{Token, TokenType};
108 use std::sync::Arc;
109
110 #[test]
111 fn test_position_tracker() {
112 let source = "hello\nworld\n";
113 let tracker = PositionTracker::new(source);
114
115 let pos = tracker.byte_to_position(0);
117 assert_eq!(pos.line, 1);
118 assert_eq!(pos.column, 1);
119
120 let pos = tracker.byte_to_position(3);
121 assert_eq!(pos.line, 1);
122 assert_eq!(pos.column, 4);
123
124 let pos = tracker.byte_to_position(6);
126 assert_eq!(pos.line, 2);
127 assert_eq!(pos.column, 1);
128 }
129
130 #[test]
131 fn test_token_wrapping() {
132 let source = "my $x";
133 let tracker = PositionTracker::new(source);
134
135 let token = Token::new(TokenType::Keyword(Arc::from("my")), Arc::from("my"), 0, 2);
136
137 let wrapped = tracker.wrap_token(token);
138 assert_eq!(wrapped.start_pos.line, 1);
139 assert_eq!(wrapped.start_pos.column, 1);
140 assert_eq!(wrapped.end_pos.column, 3);
141 }
142
143 #[test]
144 fn test_byte_to_position_handles_non_char_boundary_offsets() {
145 let source = "éa\n";
146 let tracker = PositionTracker::new(source);
147
148 let pos = tracker.byte_to_position(1);
149 assert_eq!(pos.line, 1);
150 assert_eq!(pos.column, 1);
151
152 let pos = tracker.byte_to_position(2);
153 assert_eq!(pos.line, 1);
154 assert_eq!(pos.column, 2);
155 }
156}