1use alloc::boxed::Box;
2use alloc::vec::Vec;
3
4mod build;
5
6use crate::range_set::RangeSet;
7
8#[derive(Clone, Debug)]
10pub struct SourceSnippet {
11 start_line: usize,
12 lines: Vec<SourceLine>,
13 line_map: Vec<usize>,
14 metas: Vec<SourceUnitMeta>,
15}
16
17#[derive(Clone, Debug, PartialEq, Eq)]
18pub(crate) struct SourceLine {
19 pub(crate) text: Box<str>,
20 pub(crate) alts: RangeSet<usize>,
21 width: usize,
22}
23
24#[derive(Clone, PartialEq, Eq)]
25struct SourceUnitMeta {
26 inner: u16,
27}
28
29impl core::fmt::Debug for SourceUnitMeta {
30 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
31 if self.is_extra() {
32 f.write_str("SourceUnitMeta::extra()")
33 } else {
34 f.debug_struct("SourceUnitMeta")
35 .field("width", &self.width())
36 .field("utf8_len", &self.utf8_len())
37 .finish()
38 }
39 }
40}
41
42impl SourceUnitMeta {
43 #[inline]
44 fn extra() -> Self {
45 Self { inner: 0x8000 }
46 }
47
48 #[inline]
49 fn new(width: usize, utf8_len: usize) -> Self {
50 assert!(width <= 0x7F);
51 assert!(utf8_len <= 0x7F);
52 Self {
53 inner: (width as u16) | ((utf8_len as u16) << 7),
54 }
55 }
56
57 #[inline]
58 fn is_extra(&self) -> bool {
59 self.inner & 0x8000 != 0
60 }
61
62 #[inline]
63 fn width(&self) -> usize {
64 usize::from(self.inner & 0x7F)
65 }
66
67 #[inline]
68 fn utf8_len(&self) -> usize {
69 usize::from((self.inner >> 7) & 0x7F)
70 }
71}
72
73#[derive(Clone, Debug, PartialEq, Eq)]
74pub(crate) struct SourceSpan {
75 pub(crate) start_line: usize,
76 pub(crate) start_col: usize,
77 pub(crate) start_utf8: usize,
78 pub(crate) end_line: usize,
79 pub(crate) end_col: usize,
80 pub(crate) end_utf8: usize,
81}
82
83impl SourceSnippet {
84 pub fn get_line_col(&self, pos: usize) -> (usize, usize) {
85 let line = match self.line_map.binary_search(&pos) {
86 Ok(i) => i + 1,
87 Err(i) => i,
88 };
89 let line_start = if line == 0 {
90 0
91 } else {
92 self.line_map[line - 1]
93 };
94 let col = self.metas[line_start..pos]
95 .iter()
96 .map(SourceUnitMeta::width)
97 .sum();
98
99 (line, col)
100 }
101
102 #[inline]
103 pub(crate) fn start_line(&self) -> usize {
104 self.start_line
105 }
106
107 #[inline]
108 pub(crate) fn line(&self, i: usize) -> &SourceLine {
109 &self.lines[i]
110 }
111
112 pub(crate) fn convert_span(&self, mut start: usize, mut end: usize) -> SourceSpan {
113 end = end.max(start);
114
115 while self.metas.get(start).is_some_and(SourceUnitMeta::is_extra) {
116 start -= 1;
117 }
118 while self.metas.get(end).is_some_and(SourceUnitMeta::is_extra) {
119 end += 1;
120 }
121 start = start.min(self.metas.len());
122 end = end.min(self.metas.len());
123
124 let start_line = match self.line_map.binary_search(&start) {
125 Ok(i) => i + 1,
126 Err(i) => i,
127 };
128 let start_line_start = if start_line == 0 {
129 0
130 } else {
131 self.line_map[start_line - 1]
132 };
133 let mut start_col = 0;
134 let mut start_utf8 = 0;
135 for meta in self.metas[start_line_start..start].iter() {
136 start_col += meta.width();
137 start_utf8 += meta.utf8_len();
138 }
139
140 let end_line;
141 let mut end_col;
142 let mut end_utf8;
143 if end == start {
144 end_line = start_line;
145 end_col = start_col;
146 end_utf8 = start_utf8;
147 } else {
148 end_line = match self.line_map.binary_search(&end) {
149 Ok(i) => i,
150 Err(i) => i,
151 };
152 let end_line_start = if end_line == 0 {
153 0
154 } else {
155 self.line_map[end_line - 1]
156 };
157 end_col = 0;
158 end_utf8 = 0;
159 for meta in self.metas[end_line_start..end].iter() {
160 end_col += meta.width();
161 end_utf8 += meta.utf8_len();
162 }
163 }
164
165 SourceSpan {
166 start_line,
167 start_col,
168 start_utf8,
169 end_line,
170 end_col,
171 end_utf8,
172 }
173 }
174}
175
176#[cfg(test)]
177mod tests {
178 use super::{SourceSnippet, SourceSpan};
179
180 #[test]
181 fn test_get_line_col() {
182 let snippet = SourceSnippet::build_from_utf8(0, b"123\n456", 4);
183
184 assert_eq!(snippet.get_line_col(0), (0, 0));
185 assert_eq!(snippet.get_line_col(1), (0, 1));
186 assert_eq!(snippet.get_line_col(2), (0, 2));
187 assert_eq!(snippet.get_line_col(3), (0, 3));
188 assert_eq!(snippet.get_line_col(4), (1, 0));
189 assert_eq!(snippet.get_line_col(5), (1, 1));
190 assert_eq!(snippet.get_line_col(6), (1, 2));
191 }
192
193 #[test]
194 fn test_convert_span_simple() {
195 let snippet = SourceSnippet::build_from_utf8(0, b"123\n456", 4);
196
197 assert_eq!(
198 snippet.convert_span(0, 0),
199 SourceSpan {
200 start_line: 0,
201 start_col: 0,
202 start_utf8: 0,
203 end_line: 0,
204 end_col: 0,
205 end_utf8: 0,
206 },
207 );
208 assert_eq!(
209 snippet.convert_span(0, 1),
210 SourceSpan {
211 start_line: 0,
212 start_col: 0,
213 start_utf8: 0,
214 end_line: 0,
215 end_col: 1,
216 end_utf8: 1,
217 },
218 );
219 assert_eq!(
220 snippet.convert_span(1, 2),
221 SourceSpan {
222 start_line: 0,
223 start_col: 1,
224 start_utf8: 1,
225 end_line: 0,
226 end_col: 2,
227 end_utf8: 2,
228 },
229 );
230 assert_eq!(
231 snippet.convert_span(2, 3),
232 SourceSpan {
233 start_line: 0,
234 start_col: 2,
235 start_utf8: 2,
236 end_line: 0,
237 end_col: 3,
238 end_utf8: 3,
239 },
240 );
241 assert_eq!(
242 snippet.convert_span(3, 4),
243 SourceSpan {
244 start_line: 0,
245 start_col: 3,
246 start_utf8: 3,
247 end_line: 0,
248 end_col: 4,
249 end_utf8: 3,
250 },
251 );
252 assert_eq!(
253 snippet.convert_span(4, 5),
254 SourceSpan {
255 start_line: 1,
256 start_col: 0,
257 start_utf8: 0,
258 end_line: 1,
259 end_col: 1,
260 end_utf8: 1,
261 },
262 );
263 assert_eq!(
264 snippet.convert_span(4, 4),
265 SourceSpan {
266 start_line: 1,
267 start_col: 0,
268 start_utf8: 0,
269 end_line: 1,
270 end_col: 0,
271 end_utf8: 0,
272 },
273 );
274 assert_eq!(
275 snippet.convert_span(5, 6),
276 SourceSpan {
277 start_line: 1,
278 start_col: 1,
279 start_utf8: 1,
280 end_line: 1,
281 end_col: 2,
282 end_utf8: 2,
283 },
284 );
285 assert_eq!(
286 snippet.convert_span(6, 7),
287 SourceSpan {
288 start_line: 1,
289 start_col: 2,
290 start_utf8: 2,
291 end_line: 1,
292 end_col: 3,
293 end_utf8: 3,
294 },
295 );
296 assert_eq!(
297 snippet.convert_span(7, 8),
298 SourceSpan {
299 start_line: 1,
300 start_col: 3,
301 start_utf8: 3,
302 end_line: 1,
303 end_col: 3,
304 end_utf8: 3,
305 },
306 );
307 assert_eq!(
308 snippet.convert_span(8, 9),
309 SourceSpan {
310 start_line: 1,
311 start_col: 3,
312 start_utf8: 3,
313 end_line: 1,
314 end_col: 3,
315 end_utf8: 3,
316 },
317 );
318 }
319
320 #[test]
321 fn test_convert_span_multi_byte() {
322 let snippet = SourceSnippet::build_from_utf8(0, b"1\xEF\xBC\x923\n456", 4);
323
324 assert_eq!(
325 snippet.convert_span(0, 1),
326 SourceSpan {
327 start_line: 0,
328 start_col: 0,
329 start_utf8: 0,
330 end_line: 0,
331 end_col: 1,
332 end_utf8: 1,
333 },
334 );
335 assert_eq!(
336 snippet.convert_span(1, 2),
337 SourceSpan {
338 start_line: 0,
339 start_col: 1,
340 start_utf8: 1,
341 end_line: 0,
342 end_col: 3,
343 end_utf8: 4,
344 },
345 );
346 assert_eq!(
347 snippet.convert_span(1, 3),
348 SourceSpan {
349 start_line: 0,
350 start_col: 1,
351 start_utf8: 1,
352 end_line: 0,
353 end_col: 3,
354 end_utf8: 4,
355 },
356 );
357 assert_eq!(
358 snippet.convert_span(1, 4),
359 SourceSpan {
360 start_line: 0,
361 start_col: 1,
362 start_utf8: 1,
363 end_line: 0,
364 end_col: 3,
365 end_utf8: 4,
366 },
367 );
368 assert_eq!(
369 snippet.convert_span(2, 3),
370 SourceSpan {
371 start_line: 0,
372 start_col: 1,
373 start_utf8: 1,
374 end_line: 0,
375 end_col: 3,
376 end_utf8: 4,
377 },
378 );
379 assert_eq!(
380 snippet.convert_span(2, 4),
381 SourceSpan {
382 start_line: 0,
383 start_col: 1,
384 start_utf8: 1,
385 end_line: 0,
386 end_col: 3,
387 end_utf8: 4,
388 },
389 );
390 assert_eq!(
391 snippet.convert_span(3, 4),
392 SourceSpan {
393 start_line: 0,
394 start_col: 1,
395 start_utf8: 1,
396 end_line: 0,
397 end_col: 3,
398 end_utf8: 4,
399 },
400 );
401 assert_eq!(
402 snippet.convert_span(4, 5),
403 SourceSpan {
404 start_line: 0,
405 start_col: 3,
406 start_utf8: 4,
407 end_line: 0,
408 end_col: 4,
409 end_utf8: 5,
410 },
411 );
412 assert_eq!(
413 snippet.convert_span(6, 7),
414 SourceSpan {
415 start_line: 1,
416 start_col: 0,
417 start_utf8: 0,
418 end_line: 1,
419 end_col: 1,
420 end_utf8: 1,
421 },
422 );
423 }
424
425 #[test]
426 fn test_convert_span_invalid_utf8() {
427 let snippet = SourceSnippet::build_from_utf8(0, b"1\xFF2\n3", 4);
428
429 assert_eq!(
430 snippet.convert_span(0, 1),
431 SourceSpan {
432 start_line: 0,
433 start_col: 0,
434 start_utf8: 0,
435 end_line: 0,
436 end_col: 1,
437 end_utf8: 1,
438 },
439 );
440 assert_eq!(
441 snippet.convert_span(1, 2),
442 SourceSpan {
443 start_line: 0,
444 start_col: 1,
445 start_utf8: 1,
446 end_line: 0,
447 end_col: 5,
448 end_utf8: 5,
449 },
450 );
451 assert_eq!(
452 snippet.convert_span(2, 3),
453 SourceSpan {
454 start_line: 0,
455 start_col: 5,
456 start_utf8: 5,
457 end_line: 0,
458 end_col: 6,
459 end_utf8: 6,
460 },
461 );
462 assert_eq!(
463 snippet.convert_span(4, 5),
464 SourceSpan {
465 start_line: 1,
466 start_col: 0,
467 start_utf8: 0,
468 end_line: 1,
469 end_col: 1,
470 end_utf8: 1,
471 },
472 );
473 }
474}