Skip to main content

oak_vfs/
line_map.rs

1use oak_core::source::Source;
2
3/// A map that tracks line starts in a source file for efficient coordinate conversion.
4///
5/// `LineMap` provides methods to convert between byte offsets and (line, column) coordinates.
6/// It is optimized for cases where multiple conversions are needed for the same source.
7#[derive(Debug, Clone)]
8pub struct LineMap {
9    /// The byte offsets of the start of each line.
10    line_starts: Vec<usize>,
11    /// The total length of the source in bytes.
12    len: usize,
13}
14
15impl LineMap {
16    /// Creates a new `LineMap` from a source.
17    ///
18    /// This will scan the entire source to find line endings (`\n`).
19    ///
20    /// # Examples
21    ///
22    /// ```
23    /// # use oak_core::source::SourceText;
24    /// # use oak_vfs::LineMap;
25    /// let source = SourceText::new("hello\nworld");
26    /// let line_map = LineMap::from_source(&source);
27    /// assert_eq!(line_map.line_count(), 2);
28    /// ```
29    pub fn from_source<S: Source + ?Sized>(source: &S) -> Self {
30        let len = source.length();
31        let mut line_starts = Vec::new();
32        line_starts.push(0);
33
34        let mut offset = 0usize;
35        while offset < len {
36            let chunk = source.chunk_at(offset);
37            let text = chunk.slice_from(offset);
38            for (i, b) in text.as_bytes().iter().enumerate() {
39                if *b == b'\n' {
40                    let next = offset + i + 1;
41                    if next <= len {
42                        line_starts.push(next)
43                    }
44                }
45            }
46            offset = chunk.end()
47        }
48
49        Self { line_starts, len }
50    }
51
52    /// Returns the total number of lines in the source.
53    pub fn line_count(&self) -> usize {
54        self.line_starts.len()
55    }
56
57    /// Returns the byte offset of the start of the given line (0-indexed).
58    pub fn line_start(&self, line: u32) -> Option<usize> {
59        self.line_starts.get(line as usize).copied()
60    }
61
62    /// Returns the byte offset of the end of the given line (0-indexed).
63    ///
64    /// The end of the line includes the line ending character(s) if present,
65    /// except for the last line which ends at the end of the source.
66    pub fn line_end(&self, line: u32) -> Option<usize> {
67        let idx = line as usize;
68        let start = *self.line_starts.get(idx)?;
69        let next = self.line_starts.get(idx + 1).copied().unwrap_or(self.len);
70        Some(next.max(start))
71    }
72
73    /// Converts a byte offset to (line, column) coordinates using UTF-16 for the column.
74    ///
75    /// This is useful for LSP integration where positions are typically specified in UTF-16.
76    ///
77    /// # Examples
78    ///
79    /// ```
80    /// # use oak_core::source::SourceText;
81    /// # use oak_vfs::LineMap;
82    /// let source = SourceText::new("hello\nworld");
83    /// let line_map = LineMap::from_source(&source);
84    /// let (line, col) = line_map.offset_to_line_col_utf16(&source, 7);
85    /// assert_eq!(line, 1);
86    /// assert_eq!(col, 1);
87    /// ```
88    pub fn offset_to_line_col_utf16<S: Source + ?Sized>(&self, source: &S, offset: usize) -> (u32, u32) {
89        let offset = offset.min(self.len);
90        let line_idx = match self.line_starts.binary_search(&offset) {
91            Ok(i) => i,
92            Err(0) => 0,
93            Err(i) => i - 1,
94        };
95        let line_start = self.line_starts[line_idx];
96        let slice = source.get_text_in(core::range::Range { start: line_start, end: offset });
97        let col = slice.as_ref().encode_utf16().count() as u32;
98        (line_idx as u32, col)
99    }
100
101    /// Converts (line, column) coordinates (in UTF-16) to a byte offset.
102    ///
103    /// # Examples
104    ///
105    /// ```
106    /// # use oak_core::source::SourceText;
107    /// # use oak_vfs::LineMap;
108    /// let source = SourceText::new("hello\nworld");
109    /// let line_map = LineMap::from_source(&source);
110    /// let offset = line_map.line_col_utf16_to_offset(&source, 1, 1);
111    /// assert_eq!(offset, 7);
112    /// ```
113    pub fn line_col_utf16_to_offset<S: Source + ?Sized>(&self, source: &S, line: u32, col_utf16: u32) -> usize {
114        let Some(line_start) = self.line_start(line)
115        else {
116            return self.len;
117        };
118        let line_end = self.line_end(line).unwrap_or(self.len);
119        let slice = source.get_text_in(core::range::Range { start: line_start, end: line_end });
120        let text = slice.as_ref();
121        let target = col_utf16 as usize;
122
123        let mut utf16 = 0usize;
124        for (byte_idx, ch) in text.char_indices() {
125            if utf16 >= target {
126                return (line_start + byte_idx).min(self.len);
127            }
128            utf16 += ch.len_utf16()
129        }
130        line_end.min(self.len)
131    }
132}