Skip to main content

patch_prolog_frontend/
source_map.rs

1//! Maps byte offsets (the stable position in a `Span`) back to human
2//! line/column. Built on demand from the source text a consumer already
3//! holds — positions are not threaded through parser return types.
4
5/// Line/column resolver for a single source buffer.
6pub struct SourceMap<'a> {
7    src: &'a str,
8    /// Byte offset of the start of each line (line 0 starts at 0).
9    line_starts: Vec<u32>,
10}
11
12impl<'a> SourceMap<'a> {
13    pub fn new(src: &'a str) -> Self {
14        let mut line_starts = vec![0u32];
15        for (i, b) in src.bytes().enumerate() {
16            if b == b'\n' {
17                line_starts.push((i + 1) as u32);
18            }
19        }
20        SourceMap { src, line_starts }
21    }
22
23    /// 1-based line and column for human-facing `file:line:col` rendering.
24    /// The column counts **characters** from the line start, not bytes — so
25    /// tooling that expects byte columns will mismatch on multibyte/emoji
26    /// source. (LSP wire positions use `utf16_position` instead.)
27    pub fn line_col(&self, offset: u32) -> (usize, usize) {
28        let line = self.line_index(offset);
29        let start = self.line_starts[line] as usize;
30        let col = self
31            .src
32            .get(start..offset as usize)
33            .map(|s| s.chars().count())
34            .unwrap_or(0);
35        (line + 1, col + 1)
36    }
37
38    /// 0-based line and 0-based UTF-16 column — an LSP `Position`.
39    pub fn utf16_position(&self, offset: u32) -> (u32, u32) {
40        let line = self.line_index(offset);
41        let start = self.line_starts[line] as usize;
42        let col: u32 = self
43            .src
44            .get(start..offset as usize)
45            .map(|s| s.chars().map(|c| c.len_utf16() as u32).sum())
46            .unwrap_or(0);
47        (line as u32, col)
48    }
49
50    fn line_index(&self, offset: u32) -> usize {
51        match self.line_starts.binary_search(&offset) {
52            Ok(i) => i,
53            Err(i) => i - 1,
54        }
55    }
56}
57
58#[cfg(test)]
59mod tests {
60    use super::*;
61
62    #[test]
63    fn resolves_line_and_col() {
64        let sm = SourceMap::new("ab\ncde\nf");
65        assert_eq!(sm.line_col(0), (1, 1)); // 'a'
66        assert_eq!(sm.line_col(1), (1, 2)); // 'b'
67        assert_eq!(sm.line_col(3), (2, 1)); // 'c'
68        assert_eq!(sm.line_col(5), (2, 3)); // 'e'
69        assert_eq!(sm.line_col(7), (3, 1)); // 'f'
70    }
71
72    #[test]
73    fn utf16_columns_count_code_units() {
74        // '😀' is 4 UTF-8 bytes, 2 UTF-16 units.
75        let sm = SourceMap::new("😀x");
76        assert_eq!(sm.utf16_position(0), (0, 0)); // before emoji
77        assert_eq!(sm.utf16_position(4), (0, 2)); // 'x', after the 2 UTF-16 units
78    }
79}