lex_core/lex/token/normalization/
utilities.rs1use crate::lex::token::core::Token;
19use std::ops::Range as ByteRange;
20
21#[allow(dead_code)]
25pub trait SourceTokenProvider {
26 fn source_tokens(&self) -> &[(Token, ByteRange<usize>)];
31}
32
33#[allow(dead_code)]
47pub fn unroll<T: SourceTokenProvider>(tokens: &[T]) -> Vec<(Token, ByteRange<usize>)> {
48 tokens
49 .iter()
50 .flat_map(|t| t.source_tokens().iter().cloned())
51 .collect()
52}
53
54#[allow(dead_code)]
69pub fn flatten_token_vecs(
70 token_vecs: &[Vec<(Token, ByteRange<usize>)>],
71) -> Vec<(Token, ByteRange<usize>)> {
72 token_vecs.iter().flat_map(|v| v.iter().cloned()).collect()
73}
74
75pub fn compute_bounding_box(tokens: &[(Token, ByteRange<usize>)]) -> ByteRange<usize> {
95 assert!(
96 !tokens.is_empty(),
97 "Cannot compute bounding box from empty token list"
98 );
99
100 let min_start = tokens
101 .iter()
102 .map(|(_, range)| range.start)
103 .min()
104 .unwrap_or(0);
105 let max_end = tokens.iter().map(|(_, range)| range.end).max().unwrap_or(0);
106
107 min_start..max_end
108}
109
110pub fn extract_text(range: ByteRange<usize>, source: &str) -> String {
124 source[range].to_string()
125}
126
127pub fn compute_column(offset: usize, source: &str) -> usize {
143 let mut last_newline = 0;
144 for (i, c) in source.char_indices() {
145 if i >= offset {
146 break;
147 }
148 if c == '\n' {
149 last_newline = i + 1;
150 }
151 }
152 offset - last_newline
153}
154
155#[allow(dead_code)]
163pub fn tokens_to_text(tokens: &[(Token, ByteRange<usize>)], source: &str) -> String {
164 let range = compute_bounding_box(tokens);
165 extract_text(range, source)
166}
167
168#[cfg(test)]
169mod tests {
170 use super::*;
171
172 struct MockToken {
174 tokens: Vec<(Token, ByteRange<usize>)>,
175 }
176
177 impl SourceTokenProvider for MockToken {
178 fn source_tokens(&self) -> &[(Token, ByteRange<usize>)] {
179 &self.tokens
180 }
181 }
182
183 #[test]
184 fn test_compute_bounding_box_single_token() {
185 let tokens = vec![(
186 Token::Text("hello".to_string()),
187 ByteRange { start: 0, end: 5 },
188 )];
189 let bbox = compute_bounding_box(&tokens);
190 assert_eq!(bbox, 0..5);
191 }
192
193 #[test]
194 fn test_compute_bounding_box_multiple_contiguous() {
195 let tokens = vec![
196 (
197 Token::Text("hello".to_string()),
198 ByteRange { start: 0, end: 5 },
199 ),
200 (Token::Whitespace(1), ByteRange { start: 5, end: 6 }),
201 (
202 Token::Text("world".to_string()),
203 ByteRange { start: 6, end: 11 },
204 ),
205 ];
206 let bbox = compute_bounding_box(&tokens);
207 assert_eq!(bbox, 0..11);
208 }
209
210 #[test]
211 fn test_compute_bounding_box_non_contiguous() {
212 let tokens = vec![
214 (
215 Token::Text("hello".to_string()),
216 ByteRange { start: 0, end: 5 },
217 ),
218 (
219 Token::Text("world".to_string()),
220 ByteRange { start: 10, end: 15 },
221 ),
222 ];
223 let bbox = compute_bounding_box(&tokens);
224 assert_eq!(bbox, 0..15);
225 }
226
227 #[test]
228 #[should_panic(expected = "Cannot compute bounding box from empty token list")]
229 fn test_compute_bounding_box_empty_panics() {
230 let tokens: Vec<(Token, ByteRange<usize>)> = vec![];
231 compute_bounding_box(&tokens);
232 }
233
234 #[test]
235 fn test_extract_text_simple() {
236 let source = "hello world";
237 assert_eq!(
238 extract_text(ByteRange { start: 0, end: 5 }, source),
239 "hello"
240 );
241 assert_eq!(
242 extract_text(ByteRange { start: 6, end: 11 }, source),
243 "world"
244 );
245 }
246
247 #[test]
248 fn test_extract_text_multiline() {
249 let source = "line one\nline two\nline three";
250 assert_eq!(
251 extract_text(ByteRange { start: 0, end: 8 }, source),
252 "line one"
253 );
254 assert_eq!(
255 extract_text(ByteRange { start: 9, end: 17 }, source),
256 "line two"
257 );
258 }
259
260 #[test]
261 fn test_extract_text_unicode() {
262 let source = "hello 世界";
263 let text = extract_text(ByteRange { start: 6, end: 12 }, source);
265 assert_eq!(text, "世界");
266 }
267
268 #[test]
269 fn test_unroll_single_token() {
270 let mock = MockToken {
271 tokens: vec![(
272 Token::Text("hello".to_string()),
273 ByteRange { start: 0, end: 5 },
274 )],
275 };
276 let unrolled = unroll(&[mock]);
277 assert_eq!(unrolled.len(), 1);
278 assert_eq!(unrolled[0].1, 0..5);
279 }
280
281 #[test]
282 fn test_unroll_multiple_tokens() {
283 let mock1 = MockToken {
284 tokens: vec![(
285 Token::Text("hello".to_string()),
286 ByteRange { start: 0, end: 5 },
287 )],
288 };
289 let mock2 = MockToken {
290 tokens: vec![
291 (Token::Whitespace(1), ByteRange { start: 5, end: 6 }),
292 (
293 Token::Text("world".to_string()),
294 ByteRange { start: 6, end: 11 },
295 ),
296 ],
297 };
298 let unrolled = unroll(&[mock1, mock2]);
299 assert_eq!(unrolled.len(), 3);
300 assert_eq!(unrolled[0].1, 0..5);
301 assert_eq!(unrolled[1].1, 5..6);
302 assert_eq!(unrolled[2].1, 6..11);
303 }
304
305 #[test]
306 fn test_tokens_to_text_convenience() {
307 let source = "hello world";
308 let tokens = vec![
309 (
310 Token::Text("hello".to_string()),
311 ByteRange { start: 0, end: 5 },
312 ),
313 (Token::Whitespace(1), ByteRange { start: 5, end: 6 }),
314 ];
315 let text = tokens_to_text(&tokens, source);
316 assert_eq!(text, "hello ");
317 }
318
319 #[test]
320 fn test_flatten_token_vecs_empty() {
321 let vecs: Vec<Vec<(Token, ByteRange<usize>)>> = vec![];
322 let flattened = flatten_token_vecs(&vecs);
323 assert_eq!(flattened.len(), 0);
324 }
325
326 #[test]
327 fn test_flatten_token_vecs_single() {
328 let vecs = vec![vec![
329 (
330 Token::Text("hello".to_string()),
331 ByteRange { start: 0, end: 5 },
332 ),
333 (Token::Whitespace(1), ByteRange { start: 5, end: 6 }),
334 ]];
335 let flattened = flatten_token_vecs(&vecs);
336 assert_eq!(flattened.len(), 2);
337 assert_eq!(flattened[0].1, 0..5);
338 assert_eq!(flattened[1].1, 5..6);
339 }
340
341 #[test]
342 fn test_flatten_token_vecs_multiple() {
343 let vecs = vec![
344 vec![(
345 Token::Text("hello".to_string()),
346 ByteRange { start: 0, end: 5 },
347 )],
348 vec![
349 (Token::Whitespace(1), ByteRange { start: 5, end: 6 }),
350 (
351 Token::Text("world".to_string()),
352 ByteRange { start: 6, end: 11 },
353 ),
354 ],
355 ];
356 let flattened = flatten_token_vecs(&vecs);
357 assert_eq!(flattened.len(), 3);
358 assert_eq!(flattened[0].1, 0..5);
359 assert_eq!(flattened[1].1, 5..6);
360 assert_eq!(flattened[2].1, 6..11);
361 }
362}