cyrs_syntax/
line_index.rs1use text_size::{TextRange, TextSize};
14
15#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
18pub struct LineCol {
19 pub line: u32,
21 pub col: u32,
23}
24
25#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
29pub struct WideLineCol {
30 pub line: u32,
32 pub col: u32,
34}
35
36#[derive(Debug, Clone)]
38pub struct LineIndex {
39 newlines: Vec<TextSize>,
41 wide_chars: Vec<Vec<WideChar>>,
44}
45
46#[derive(Debug, Clone, Copy)]
47struct WideChar {
48 start: TextSize,
50 len: TextSize,
52}
53
54impl LineIndex {
55 #[must_use]
57 pub fn new(text: &str) -> Self {
58 let mut newlines = vec![TextSize::from(0)];
59 let mut wide_chars: Vec<Vec<WideChar>> = vec![Vec::new()];
60 let mut line_start = TextSize::from(0);
61
62 for (offset, ch) in text.char_indices() {
63 let offset = TextSize::try_from(offset).expect("source fits in u32 bytes");
64 if ch == '\n' {
65 let next_line_start = offset + TextSize::of('\n');
66 newlines.push(next_line_start);
67 wide_chars.push(Vec::new());
68 line_start = next_line_start;
69 continue;
70 }
71 if !ch.is_ascii() {
72 let start_in_line = offset - line_start;
73 wide_chars
74 .last_mut()
75 .expect("at least one line always exists")
76 .push(WideChar {
77 start: start_in_line,
78 len: TextSize::of(ch),
79 });
80 }
81 }
82
83 Self {
84 newlines,
85 wide_chars,
86 }
87 }
88
89 #[must_use]
100 pub fn line_col(&self, offset: TextSize) -> LineCol {
101 let line = self
102 .newlines
103 .partition_point(|&start| start <= offset)
104 .saturating_sub(1);
105 let line_start = self.newlines[line];
106 let col = offset - line_start;
107 LineCol {
108 line: u32::try_from(line).expect("line count fits in u32"),
109 col: u32::from(col),
110 }
111 }
112
113 #[must_use]
115 pub fn to_utf16(&self, pos: LineCol) -> WideLineCol {
116 let line = pos.line as usize;
117 if line >= self.wide_chars.len() || self.wide_chars[line].is_empty() {
118 return WideLineCol {
119 line: pos.line,
120 col: pos.col,
121 };
122 }
123 let mut col = pos.col;
124 for wc in &self.wide_chars[line] {
125 if u32::from(wc.start) >= pos.col {
126 break;
127 }
128 let utf8_len = u32::from(wc.len);
132 let utf16_len: u32 = if utf8_len == 4 { 2 } else { 1 };
133 col = col - utf8_len + utf16_len;
134 }
135 WideLineCol {
136 line: pos.line,
137 col,
138 }
139 }
140
141 #[must_use]
143 pub fn from_utf16(&self, pos: WideLineCol) -> LineCol {
144 let line = pos.line as usize;
145 if line >= self.wide_chars.len() || self.wide_chars[line].is_empty() {
146 return LineCol {
147 line: pos.line,
148 col: pos.col,
149 };
150 }
151 let mut utf16_seen: u32 = 0;
152 let mut col = pos.col;
153 for wc in &self.wide_chars[line] {
154 let wc_col_utf8 = u32::from(wc.start);
155 if wc_col_utf8 + utf16_seen >= col {
158 break;
159 }
160 let utf8_len = u32::from(wc.len);
161 let utf16_len: u32 = if utf8_len == 4 { 2 } else { 1 };
162 col = col + utf8_len - utf16_len;
165 utf16_seen += utf16_len;
166 }
167 LineCol {
168 line: pos.line,
169 col,
170 }
171 }
172
173 #[must_use]
182 pub fn line_range(&self, line: u32) -> Option<TextRange> {
183 let idx = line as usize;
184 let start = *self.newlines.get(idx)?;
185 let end = self
186 .newlines
187 .get(idx + 1)
188 .copied()
189 .unwrap_or(TextSize::from(u32::MAX));
190 Some(TextRange::new(start, end))
191 }
192
193 #[must_use]
196 pub fn line_count(&self) -> u32 {
197 u32::try_from(self.newlines.len()).expect("line count fits in u32")
198 }
199}
200
201#[cfg(test)]
202mod tests {
203 use super::{LineCol, LineIndex, WideLineCol};
204 use pretty_assertions::assert_eq;
205 use text_size::{TextRange, TextSize};
206
207 #[test]
208 fn empty_input_is_one_line() {
209 let idx = LineIndex::new("");
210 assert_eq!(idx.line_count(), 1);
211 assert_eq!(idx.line_col(TextSize::from(0)), LineCol { line: 0, col: 0 });
212 }
213
214 #[test]
215 fn ascii_single_line_line_col() {
216 let idx = LineIndex::new("abc");
217 assert_eq!(idx.line_count(), 1);
218 assert_eq!(idx.line_col(TextSize::from(2)), LineCol { line: 0, col: 2 });
219 }
220
221 #[test]
222 fn ascii_multi_line_line_col() {
223 let idx = LineIndex::new("ab\ncde\nf");
229 assert_eq!(idx.line_count(), 3);
230 assert_eq!(idx.line_col(TextSize::from(0)), LineCol { line: 0, col: 0 });
231 assert_eq!(idx.line_col(TextSize::from(2)), LineCol { line: 0, col: 2 });
232 assert_eq!(idx.line_col(TextSize::from(3)), LineCol { line: 1, col: 0 });
233 assert_eq!(idx.line_col(TextSize::from(6)), LineCol { line: 1, col: 3 });
234 assert_eq!(idx.line_col(TextSize::from(7)), LineCol { line: 2, col: 0 });
235 }
236
237 #[test]
238 fn utf8_offset_is_bytes_not_chars() {
239 let idx = LineIndex::new("éllo");
242 assert_eq!(idx.line_col(TextSize::from(2)), LineCol { line: 0, col: 2 });
243 assert_eq!(idx.line_col(TextSize::from(1)), LineCol { line: 0, col: 1 });
246 }
247
248 #[test]
249 fn utf16_round_trip_bmp() {
250 let idx = LineIndex::new("abé");
253 let utf8 = idx.line_col(TextSize::from(4));
254 assert_eq!(utf8, LineCol { line: 0, col: 4 });
255 let utf16 = idx.to_utf16(utf8);
256 assert_eq!(utf16, WideLineCol { line: 0, col: 3 });
257 let back = idx.from_utf16(utf16);
258 assert_eq!(back, utf8);
259 }
260
261 #[test]
262 fn utf16_round_trip_astral() {
263 let idx = LineIndex::new("a\u{1F600}b");
266 let utf8 = idx.line_col(TextSize::from(5));
267 assert_eq!(utf8, LineCol { line: 0, col: 5 });
268 let utf16 = idx.to_utf16(utf8);
269 assert_eq!(utf16, WideLineCol { line: 0, col: 3 });
270 let back = idx.from_utf16(utf16);
271 assert_eq!(back, utf8);
272 }
273
274 #[test]
275 fn line_range_last_line_open_ended() {
276 let idx = LineIndex::new("ab\ncd");
278 assert_eq!(
279 idx.line_range(0),
280 Some(TextRange::new(TextSize::from(0), TextSize::from(3)))
281 );
282 assert_eq!(
283 idx.line_range(1),
284 Some(TextRange::new(TextSize::from(3), TextSize::from(u32::MAX)))
285 );
286 assert_eq!(idx.line_range(2), None);
287 }
288}