oak_vfs/line_map.rs
1use oak_core::source::Source;
2
3/// A map that tracks line starts in a source file for efficient coordinate conversion.
4///
5/// `LineMap` provides methods to convert between byte offsets and (line, column) coordinates.
6/// It is optimized for cases where multiple conversions are needed for the same source.
7#[derive(Debug, Clone)]
8pub struct LineMap {
9 /// The byte offsets of the start of each line.
10 line_starts: Vec<usize>,
11 /// The total length of the source in bytes.
12 len: usize,
13}
14
15impl LineMap {
16 /// Creates a new `LineMap` from a source.
17 ///
18 /// This will scan the entire source to find line endings (`\n`).
19 ///
20 /// # Examples
21 ///
22 /// ```
23 /// # use oak_core::source::SourceText;
24 /// # use oak_vfs::LineMap;
25 /// let source = SourceText::new("hello\nworld");
26 /// let line_map = LineMap::from_source(&source);
27 /// assert_eq!(line_map.line_count(), 2);
28 /// ```
29 pub fn from_source<S: Source + ?Sized>(source: &S) -> Self {
30 let len = source.length();
31 let mut line_starts = Vec::new();
32 line_starts.push(0);
33
34 let mut offset = 0usize;
35 while offset < len {
36 let chunk = source.chunk_at(offset);
37 let text = chunk.slice_from(offset);
38 for (i, b) in text.as_bytes().iter().enumerate() {
39 if *b == b'\n' {
40 let next = offset + i + 1;
41 if next <= len {
42 line_starts.push(next)
43 }
44 }
45 }
46 offset = chunk.end()
47 }
48
49 Self { line_starts, len }
50 }
51
52 /// Returns the total number of lines in the source.
53 pub fn line_count(&self) -> usize {
54 self.line_starts.len()
55 }
56
57 /// Returns the byte offset of the start of the given line (0-indexed).
58 pub fn line_start(&self, line: u32) -> Option<usize> {
59 self.line_starts.get(line as usize).copied()
60 }
61
62 /// Returns the byte offset of the end of the given line (0-indexed).
63 ///
64 /// The end of the line includes the line ending character(s) if present,
65 /// except for the last line which ends at the end of the source.
66 pub fn line_end(&self, line: u32) -> Option<usize> {
67 let idx = line as usize;
68 let start = *self.line_starts.get(idx)?;
69 let next = self.line_starts.get(idx + 1).copied().unwrap_or(self.len);
70 Some(next.max(start))
71 }
72
73 /// Converts a byte offset to (line, column) coordinates using UTF-16 for the column.
74 ///
75 /// This is useful for LSP integration where positions are typically specified in UTF-16.
76 ///
77 /// # Examples
78 ///
79 /// ```
80 /// # use oak_core::source::SourceText;
81 /// # use oak_vfs::LineMap;
82 /// let source = SourceText::new("hello\nworld");
83 /// let line_map = LineMap::from_source(&source);
84 /// let (line, col) = line_map.offset_to_line_col_utf16(&source, 7);
85 /// assert_eq!(line, 1);
86 /// assert_eq!(col, 1);
87 /// ```
88 pub fn offset_to_line_col_utf16<S: Source + ?Sized>(&self, source: &S, offset: usize) -> (u32, u32) {
89 let offset = offset.min(self.len);
90 let line_idx = match self.line_starts.binary_search(&offset) {
91 Ok(i) => i,
92 Err(0) => 0,
93 Err(i) => i - 1,
94 };
95 let line_start = self.line_starts[line_idx];
96 let slice = source.get_text_in(core::range::Range { start: line_start, end: offset });
97 let col = slice.as_ref().encode_utf16().count() as u32;
98 (line_idx as u32, col)
99 }
100
101 /// Converts (line, column) coordinates (in UTF-16) to a byte offset.
102 ///
103 /// # Examples
104 ///
105 /// ```
106 /// # use oak_core::source::SourceText;
107 /// # use oak_vfs::LineMap;
108 /// let source = SourceText::new("hello\nworld");
109 /// let line_map = LineMap::from_source(&source);
110 /// let offset = line_map.line_col_utf16_to_offset(&source, 1, 1);
111 /// assert_eq!(offset, 7);
112 /// ```
113 pub fn line_col_utf16_to_offset<S: Source + ?Sized>(&self, source: &S, line: u32, col_utf16: u32) -> usize {
114 let Some(line_start) = self.line_start(line)
115 else {
116 return self.len;
117 };
118 let line_end = self.line_end(line).unwrap_or(self.len);
119 let slice = source.get_text_in(core::range::Range { start: line_start, end: line_end });
120 let text = slice.as_ref();
121 let target = col_utf16 as usize;
122
123 let mut utf16 = 0usize;
124 for (byte_idx, ch) in text.char_indices() {
125 if utf16 >= target {
126 return (line_start + byte_idx).min(self.len);
127 }
128 utf16 += ch.len_utf16()
129 }
130 line_end.min(self.len)
131 }
132}