lex_core/lex/token/normalization/
utilities.rs1use crate::lex::token::core::Token;
19use std::ops::Range as ByteRange;
20
21#[allow(dead_code)]
25pub trait SourceTokenProvider {
26 fn source_tokens(&self) -> &[(Token, ByteRange<usize>)];
31}
32
33#[allow(dead_code)]
47pub fn unroll<T: SourceTokenProvider>(tokens: &[T]) -> Vec<(Token, ByteRange<usize>)> {
48 tokens
49 .iter()
50 .flat_map(|t| t.source_tokens().iter().cloned())
51 .collect()
52}
53
54#[allow(dead_code)]
69pub fn flatten_token_vecs(
70 token_vecs: &[Vec<(Token, ByteRange<usize>)>],
71) -> Vec<(Token, ByteRange<usize>)> {
72 token_vecs.iter().flat_map(|v| v.iter().cloned()).collect()
73}
74
75pub fn compute_bounding_box(tokens: &[(Token, ByteRange<usize>)]) -> ByteRange<usize> {
92 if tokens.is_empty() {
93 return 0..0;
94 }
95
96 let min_start = tokens
97 .iter()
98 .map(|(_, range)| range.start)
99 .min()
100 .unwrap_or(0);
101 let max_end = tokens.iter().map(|(_, range)| range.end).max().unwrap_or(0);
102
103 min_start..max_end
104}
105
106pub fn extract_text(range: ByteRange<usize>, source: &str) -> String {
120 source[range].to_string()
121}
122
123pub fn compute_column(offset: usize, source: &str) -> usize {
139 let mut last_newline = 0;
140 for (i, c) in source.char_indices() {
141 if i >= offset {
142 break;
143 }
144 if c == '\n' {
145 last_newline = i + 1;
146 }
147 }
148 offset - last_newline
149}
150
151#[allow(dead_code)]
159pub fn tokens_to_text(tokens: &[(Token, ByteRange<usize>)], source: &str) -> String {
160 let range = compute_bounding_box(tokens);
161 extract_text(range, source)
162}
163
164#[cfg(test)]
165mod tests {
166 use super::*;
167
168 struct MockToken {
170 tokens: Vec<(Token, ByteRange<usize>)>,
171 }
172
173 impl SourceTokenProvider for MockToken {
174 fn source_tokens(&self) -> &[(Token, ByteRange<usize>)] {
175 &self.tokens
176 }
177 }
178
179 #[test]
180 fn test_compute_bounding_box_single_token() {
181 let tokens = vec![(
182 Token::Text("hello".to_string()),
183 ByteRange { start: 0, end: 5 },
184 )];
185 let bbox = compute_bounding_box(&tokens);
186 assert_eq!(bbox, 0..5);
187 }
188
189 #[test]
190 fn test_compute_bounding_box_multiple_contiguous() {
191 let tokens = vec![
192 (
193 Token::Text("hello".to_string()),
194 ByteRange { start: 0, end: 5 },
195 ),
196 (Token::Whitespace(1), ByteRange { start: 5, end: 6 }),
197 (
198 Token::Text("world".to_string()),
199 ByteRange { start: 6, end: 11 },
200 ),
201 ];
202 let bbox = compute_bounding_box(&tokens);
203 assert_eq!(bbox, 0..11);
204 }
205
206 #[test]
207 fn test_compute_bounding_box_non_contiguous() {
208 let tokens = vec![
210 (
211 Token::Text("hello".to_string()),
212 ByteRange { start: 0, end: 5 },
213 ),
214 (
215 Token::Text("world".to_string()),
216 ByteRange { start: 10, end: 15 },
217 ),
218 ];
219 let bbox = compute_bounding_box(&tokens);
220 assert_eq!(bbox, 0..15);
221 }
222
223 #[test]
224 fn test_compute_bounding_box_empty_returns_zero_range() {
225 let tokens: Vec<(Token, ByteRange<usize>)> = vec![];
226 assert_eq!(compute_bounding_box(&tokens), 0..0);
227 }
228
229 #[test]
230 fn test_extract_text_simple() {
231 let source = "hello world";
232 assert_eq!(
233 extract_text(ByteRange { start: 0, end: 5 }, source),
234 "hello"
235 );
236 assert_eq!(
237 extract_text(ByteRange { start: 6, end: 11 }, source),
238 "world"
239 );
240 }
241
242 #[test]
243 fn test_extract_text_multiline() {
244 let source = "line one\nline two\nline three";
245 assert_eq!(
246 extract_text(ByteRange { start: 0, end: 8 }, source),
247 "line one"
248 );
249 assert_eq!(
250 extract_text(ByteRange { start: 9, end: 17 }, source),
251 "line two"
252 );
253 }
254
255 #[test]
256 fn test_extract_text_unicode() {
257 let source = "hello 世界";
258 let text = extract_text(ByteRange { start: 6, end: 12 }, source);
260 assert_eq!(text, "世界");
261 }
262
263 #[test]
264 fn test_unroll_single_token() {
265 let mock = MockToken {
266 tokens: vec![(
267 Token::Text("hello".to_string()),
268 ByteRange { start: 0, end: 5 },
269 )],
270 };
271 let unrolled = unroll(&[mock]);
272 assert_eq!(unrolled.len(), 1);
273 assert_eq!(unrolled[0].1, 0..5);
274 }
275
276 #[test]
277 fn test_unroll_multiple_tokens() {
278 let mock1 = MockToken {
279 tokens: vec![(
280 Token::Text("hello".to_string()),
281 ByteRange { start: 0, end: 5 },
282 )],
283 };
284 let mock2 = MockToken {
285 tokens: vec![
286 (Token::Whitespace(1), ByteRange { start: 5, end: 6 }),
287 (
288 Token::Text("world".to_string()),
289 ByteRange { start: 6, end: 11 },
290 ),
291 ],
292 };
293 let unrolled = unroll(&[mock1, mock2]);
294 assert_eq!(unrolled.len(), 3);
295 assert_eq!(unrolled[0].1, 0..5);
296 assert_eq!(unrolled[1].1, 5..6);
297 assert_eq!(unrolled[2].1, 6..11);
298 }
299
300 #[test]
301 fn test_tokens_to_text_convenience() {
302 let source = "hello world";
303 let tokens = vec![
304 (
305 Token::Text("hello".to_string()),
306 ByteRange { start: 0, end: 5 },
307 ),
308 (Token::Whitespace(1), ByteRange { start: 5, end: 6 }),
309 ];
310 let text = tokens_to_text(&tokens, source);
311 assert_eq!(text, "hello ");
312 }
313
314 #[test]
315 fn test_flatten_token_vecs_empty() {
316 let vecs: Vec<Vec<(Token, ByteRange<usize>)>> = vec![];
317 let flattened = flatten_token_vecs(&vecs);
318 assert_eq!(flattened.len(), 0);
319 }
320
321 #[test]
322 fn test_flatten_token_vecs_single() {
323 let vecs = vec![vec![
324 (
325 Token::Text("hello".to_string()),
326 ByteRange { start: 0, end: 5 },
327 ),
328 (Token::Whitespace(1), ByteRange { start: 5, end: 6 }),
329 ]];
330 let flattened = flatten_token_vecs(&vecs);
331 assert_eq!(flattened.len(), 2);
332 assert_eq!(flattened[0].1, 0..5);
333 assert_eq!(flattened[1].1, 5..6);
334 }
335
336 #[test]
337 fn test_flatten_token_vecs_multiple() {
338 let vecs = vec![
339 vec![(
340 Token::Text("hello".to_string()),
341 ByteRange { start: 0, end: 5 },
342 )],
343 vec![
344 (Token::Whitespace(1), ByteRange { start: 5, end: 6 }),
345 (
346 Token::Text("world".to_string()),
347 ByteRange { start: 6, end: 11 },
348 ),
349 ],
350 ];
351 let flattened = flatten_token_vecs(&vecs);
352 assert_eq!(flattened.len(), 3);
353 assert_eq!(flattened[0].1, 0..5);
354 assert_eq!(flattened[1].1, 5..6);
355 assert_eq!(flattened[2].1, 6..11);
356 }
357}