sourceannot/snippet/
mod.rs

1use alloc::boxed::Box;
2use alloc::vec::Vec;
3
4mod build;
5
6use crate::range_set::RangeSet;
7
8/// A snippet of source code.
9#[derive(Clone, Debug)]
10pub struct SourceSnippet {
11    start_line: usize,
12    lines: Vec<SourceLine>,
13    line_map: Vec<usize>,
14    metas: Vec<SourceUnitMeta>,
15}
16
17#[derive(Clone, Debug, PartialEq, Eq)]
18pub(crate) struct SourceLine {
19    pub(crate) text: Box<str>,
20    pub(crate) alts: RangeSet<usize>,
21    width: usize,
22}
23
24#[derive(Clone, PartialEq, Eq)]
25struct SourceUnitMeta {
26    inner: u16,
27}
28
29impl core::fmt::Debug for SourceUnitMeta {
30    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
31        if self.is_extra() {
32            f.write_str("SourceUnitMeta::extra()")
33        } else {
34            f.debug_struct("SourceUnitMeta")
35                .field("width", &self.width())
36                .field("utf8_len", &self.utf8_len())
37                .finish()
38        }
39    }
40}
41
42impl SourceUnitMeta {
43    #[inline]
44    fn extra() -> Self {
45        Self { inner: 0x8000 }
46    }
47
48    #[inline]
49    fn new(width: usize, utf8_len: usize) -> Self {
50        assert!(width <= 0x7F);
51        assert!(utf8_len <= 0x7F);
52        Self {
53            inner: (width as u16) | ((utf8_len as u16) << 7),
54        }
55    }
56
57    #[inline]
58    fn is_extra(&self) -> bool {
59        self.inner & 0x8000 != 0
60    }
61
62    #[inline]
63    fn width(&self) -> usize {
64        usize::from(self.inner & 0x7F)
65    }
66
67    #[inline]
68    fn utf8_len(&self) -> usize {
69        usize::from((self.inner >> 7) & 0x7F)
70    }
71}
72
73#[derive(Clone, Debug, PartialEq, Eq)]
74pub(crate) struct SourceSpan {
75    pub(crate) start_line: usize,
76    pub(crate) start_col: usize,
77    pub(crate) start_utf8: usize,
78    pub(crate) end_line: usize,
79    pub(crate) end_col: usize,
80    pub(crate) end_utf8: usize,
81}
82
83impl SourceSnippet {
84    pub fn get_line_col(&self, pos: usize) -> (usize, usize) {
85        let line = match self.line_map.binary_search(&pos) {
86            Ok(i) => i + 1,
87            Err(i) => i,
88        };
89        let line_start = if line == 0 {
90            0
91        } else {
92            self.line_map[line - 1]
93        };
94        let col = self.metas[line_start..pos]
95            .iter()
96            .map(SourceUnitMeta::width)
97            .sum();
98
99        (line, col)
100    }
101
102    #[inline]
103    pub(crate) fn start_line(&self) -> usize {
104        self.start_line
105    }
106
107    #[inline]
108    pub(crate) fn line(&self, i: usize) -> &SourceLine {
109        &self.lines[i]
110    }
111
112    pub(crate) fn convert_span(&self, mut start: usize, mut end: usize) -> SourceSpan {
113        end = end.max(start);
114
115        while self.metas.get(start).is_some_and(SourceUnitMeta::is_extra) {
116            start -= 1;
117        }
118        while self.metas.get(end).is_some_and(SourceUnitMeta::is_extra) {
119            end += 1;
120        }
121        start = start.min(self.metas.len());
122        end = end.min(self.metas.len());
123
124        let start_line = match self.line_map.binary_search(&start) {
125            Ok(i) => i + 1,
126            Err(i) => i,
127        };
128        let start_line_start = if start_line == 0 {
129            0
130        } else {
131            self.line_map[start_line - 1]
132        };
133        let mut start_col = 0;
134        let mut start_utf8 = 0;
135        for meta in self.metas[start_line_start..start].iter() {
136            start_col += meta.width();
137            start_utf8 += meta.utf8_len();
138        }
139
140        let end_line;
141        let mut end_col;
142        let mut end_utf8;
143        if end == start {
144            end_line = start_line;
145            end_col = start_col;
146            end_utf8 = start_utf8;
147        } else {
148            end_line = match self.line_map.binary_search(&end) {
149                Ok(i) => i,
150                Err(i) => i,
151            };
152            let end_line_start = if end_line == 0 {
153                0
154            } else {
155                self.line_map[end_line - 1]
156            };
157            end_col = 0;
158            end_utf8 = 0;
159            for meta in self.metas[end_line_start..end].iter() {
160                end_col += meta.width();
161                end_utf8 += meta.utf8_len();
162            }
163        }
164
165        SourceSpan {
166            start_line,
167            start_col,
168            start_utf8,
169            end_line,
170            end_col,
171            end_utf8,
172        }
173    }
174}
175
176#[cfg(test)]
177mod tests {
178    use super::{SourceSnippet, SourceSpan};
179
180    #[test]
181    fn test_get_line_col() {
182        let snippet = SourceSnippet::build_from_utf8(0, b"123\n456", 4);
183
184        assert_eq!(snippet.get_line_col(0), (0, 0));
185        assert_eq!(snippet.get_line_col(1), (0, 1));
186        assert_eq!(snippet.get_line_col(2), (0, 2));
187        assert_eq!(snippet.get_line_col(3), (0, 3));
188        assert_eq!(snippet.get_line_col(4), (1, 0));
189        assert_eq!(snippet.get_line_col(5), (1, 1));
190        assert_eq!(snippet.get_line_col(6), (1, 2));
191    }
192
193    #[test]
194    fn test_convert_span_simple() {
195        let snippet = SourceSnippet::build_from_utf8(0, b"123\n456", 4);
196
197        assert_eq!(
198            snippet.convert_span(0, 0),
199            SourceSpan {
200                start_line: 0,
201                start_col: 0,
202                start_utf8: 0,
203                end_line: 0,
204                end_col: 0,
205                end_utf8: 0,
206            },
207        );
208        assert_eq!(
209            snippet.convert_span(0, 1),
210            SourceSpan {
211                start_line: 0,
212                start_col: 0,
213                start_utf8: 0,
214                end_line: 0,
215                end_col: 1,
216                end_utf8: 1,
217            },
218        );
219        assert_eq!(
220            snippet.convert_span(1, 2),
221            SourceSpan {
222                start_line: 0,
223                start_col: 1,
224                start_utf8: 1,
225                end_line: 0,
226                end_col: 2,
227                end_utf8: 2,
228            },
229        );
230        assert_eq!(
231            snippet.convert_span(2, 3),
232            SourceSpan {
233                start_line: 0,
234                start_col: 2,
235                start_utf8: 2,
236                end_line: 0,
237                end_col: 3,
238                end_utf8: 3,
239            },
240        );
241        assert_eq!(
242            snippet.convert_span(3, 4),
243            SourceSpan {
244                start_line: 0,
245                start_col: 3,
246                start_utf8: 3,
247                end_line: 0,
248                end_col: 4,
249                end_utf8: 3,
250            },
251        );
252        assert_eq!(
253            snippet.convert_span(4, 5),
254            SourceSpan {
255                start_line: 1,
256                start_col: 0,
257                start_utf8: 0,
258                end_line: 1,
259                end_col: 1,
260                end_utf8: 1,
261            },
262        );
263        assert_eq!(
264            snippet.convert_span(4, 4),
265            SourceSpan {
266                start_line: 1,
267                start_col: 0,
268                start_utf8: 0,
269                end_line: 1,
270                end_col: 0,
271                end_utf8: 0,
272            },
273        );
274        assert_eq!(
275            snippet.convert_span(5, 6),
276            SourceSpan {
277                start_line: 1,
278                start_col: 1,
279                start_utf8: 1,
280                end_line: 1,
281                end_col: 2,
282                end_utf8: 2,
283            },
284        );
285        assert_eq!(
286            snippet.convert_span(6, 7),
287            SourceSpan {
288                start_line: 1,
289                start_col: 2,
290                start_utf8: 2,
291                end_line: 1,
292                end_col: 3,
293                end_utf8: 3,
294            },
295        );
296        assert_eq!(
297            snippet.convert_span(7, 8),
298            SourceSpan {
299                start_line: 1,
300                start_col: 3,
301                start_utf8: 3,
302                end_line: 1,
303                end_col: 3,
304                end_utf8: 3,
305            },
306        );
307        assert_eq!(
308            snippet.convert_span(8, 9),
309            SourceSpan {
310                start_line: 1,
311                start_col: 3,
312                start_utf8: 3,
313                end_line: 1,
314                end_col: 3,
315                end_utf8: 3,
316            },
317        );
318    }
319
320    #[test]
321    fn test_convert_span_multi_byte() {
322        let snippet = SourceSnippet::build_from_utf8(0, b"1\xEF\xBC\x923\n456", 4);
323
324        assert_eq!(
325            snippet.convert_span(0, 1),
326            SourceSpan {
327                start_line: 0,
328                start_col: 0,
329                start_utf8: 0,
330                end_line: 0,
331                end_col: 1,
332                end_utf8: 1,
333            },
334        );
335        assert_eq!(
336            snippet.convert_span(1, 2),
337            SourceSpan {
338                start_line: 0,
339                start_col: 1,
340                start_utf8: 1,
341                end_line: 0,
342                end_col: 3,
343                end_utf8: 4,
344            },
345        );
346        assert_eq!(
347            snippet.convert_span(1, 3),
348            SourceSpan {
349                start_line: 0,
350                start_col: 1,
351                start_utf8: 1,
352                end_line: 0,
353                end_col: 3,
354                end_utf8: 4,
355            },
356        );
357        assert_eq!(
358            snippet.convert_span(1, 4),
359            SourceSpan {
360                start_line: 0,
361                start_col: 1,
362                start_utf8: 1,
363                end_line: 0,
364                end_col: 3,
365                end_utf8: 4,
366            },
367        );
368        assert_eq!(
369            snippet.convert_span(2, 3),
370            SourceSpan {
371                start_line: 0,
372                start_col: 1,
373                start_utf8: 1,
374                end_line: 0,
375                end_col: 3,
376                end_utf8: 4,
377            },
378        );
379        assert_eq!(
380            snippet.convert_span(2, 4),
381            SourceSpan {
382                start_line: 0,
383                start_col: 1,
384                start_utf8: 1,
385                end_line: 0,
386                end_col: 3,
387                end_utf8: 4,
388            },
389        );
390        assert_eq!(
391            snippet.convert_span(3, 4),
392            SourceSpan {
393                start_line: 0,
394                start_col: 1,
395                start_utf8: 1,
396                end_line: 0,
397                end_col: 3,
398                end_utf8: 4,
399            },
400        );
401        assert_eq!(
402            snippet.convert_span(4, 5),
403            SourceSpan {
404                start_line: 0,
405                start_col: 3,
406                start_utf8: 4,
407                end_line: 0,
408                end_col: 4,
409                end_utf8: 5,
410            },
411        );
412        assert_eq!(
413            snippet.convert_span(6, 7),
414            SourceSpan {
415                start_line: 1,
416                start_col: 0,
417                start_utf8: 0,
418                end_line: 1,
419                end_col: 1,
420                end_utf8: 1,
421            },
422        );
423    }
424
425    #[test]
426    fn test_convert_span_invalid_utf8() {
427        let snippet = SourceSnippet::build_from_utf8(0, b"1\xFF2\n3", 4);
428
429        assert_eq!(
430            snippet.convert_span(0, 1),
431            SourceSpan {
432                start_line: 0,
433                start_col: 0,
434                start_utf8: 0,
435                end_line: 0,
436                end_col: 1,
437                end_utf8: 1,
438            },
439        );
440        assert_eq!(
441            snippet.convert_span(1, 2),
442            SourceSpan {
443                start_line: 0,
444                start_col: 1,
445                start_utf8: 1,
446                end_line: 0,
447                end_col: 5,
448                end_utf8: 5,
449            },
450        );
451        assert_eq!(
452            snippet.convert_span(2, 3),
453            SourceSpan {
454                start_line: 0,
455                start_col: 5,
456                start_utf8: 5,
457                end_line: 0,
458                end_col: 6,
459                end_utf8: 6,
460            },
461        );
462        assert_eq!(
463            snippet.convert_span(4, 5),
464            SourceSpan {
465                start_line: 1,
466                start_col: 0,
467                start_utf8: 0,
468                end_line: 1,
469                end_col: 1,
470                end_utf8: 1,
471            },
472        );
473    }
474}