1#![deny(unsafe_code)]
7#![warn(rust_2018_idioms)]
8#![warn(missing_docs)]
9
10#[derive(Clone, Debug)]
12pub struct LineIndex {
13 line_starts: Vec<usize>,
15 text_len: usize,
17}
18
19impl LineIndex {
20 #[must_use]
22 pub fn new(text: &str) -> Self {
23 let mut line_starts = vec![0];
24 for (idx, ch) in text.char_indices() {
25 if ch == '\n' {
26 line_starts.push(idx + 1);
27 }
28 }
29 Self { line_starts, text_len: text.len() }
30 }
31
32 #[must_use]
34 pub fn byte_to_position(&self, byte: usize) -> (usize, usize) {
35 let line = self.line_starts.binary_search(&byte).unwrap_or_else(|i| i.saturating_sub(1));
36 let column = byte - self.line_starts[line];
37 (line, column)
38 }
39
40 #[must_use]
46 pub fn position_to_byte(&self, line: usize, column: usize) -> Option<usize> {
47 let start = *self.line_starts.get(line)?;
48 let line_end = self
52 .line_starts
53 .get(line + 1)
54 .map_or(self.text_len, |next_start| next_start.saturating_sub(1));
55 let max_column = line_end.saturating_sub(start);
56
57 if column > max_column {
58 return None;
59 }
60
61 Some(start + column)
62 }
63
64 #[must_use]
71 pub fn position_to_byte_checked(&self, line: usize, column: usize) -> Option<usize> {
72 let start = *self.line_starts.get(line)?;
73 let line_end = self
76 .line_starts
77 .get(line + 1)
78 .map_or(self.text_len, |next_start| next_start.saturating_sub(1));
79 let max_column = line_end.saturating_sub(start);
80
81 if column > max_column {
82 return None;
83 }
84
85 Some(start + column)
86 }
87}
88
89#[cfg(test)]
90mod tests {
91 use super::*;
92
93 #[test]
94 fn empty_string_has_one_line() {
95 let idx = LineIndex::new("");
96 assert_eq!(idx.byte_to_position(0), (0, 0));
97 assert_eq!(idx.position_to_byte(0, 0), Some(0));
98 assert_eq!(idx.position_to_byte(1, 0), None);
99 }
100
101 #[test]
102 fn single_line_no_newline() {
103 let idx = LineIndex::new("hello");
104 assert_eq!(idx.byte_to_position(0), (0, 0));
105 assert_eq!(idx.byte_to_position(4), (0, 4));
106 assert_eq!(idx.position_to_byte(0, 0), Some(0));
107 assert_eq!(idx.position_to_byte(0, 4), Some(4));
108 assert_eq!(idx.position_to_byte(0, 5), Some(5));
109 assert_eq!(idx.position_to_byte(0, 6), None);
110 }
111
112 #[test]
113 fn two_lines_byte_to_position() {
114 let idx = LineIndex::new("ab\ncd");
116 assert_eq!(idx.byte_to_position(0), (0, 0));
117 assert_eq!(idx.byte_to_position(1), (0, 1));
118 assert_eq!(idx.byte_to_position(2), (0, 2)); assert_eq!(idx.byte_to_position(3), (1, 0));
120 assert_eq!(idx.byte_to_position(4), (1, 1));
121 }
122
123 #[test]
124 fn two_lines_position_to_byte() {
125 let idx = LineIndex::new("ab\ncd");
126 assert_eq!(idx.position_to_byte(0, 0), Some(0));
127 assert_eq!(idx.position_to_byte(0, 2), Some(2)); assert_eq!(idx.position_to_byte(1, 0), Some(3));
129 assert_eq!(idx.position_to_byte(1, 1), Some(4));
130 assert_eq!(idx.position_to_byte(1, 2), Some(5)); assert_eq!(idx.position_to_byte(1, 3), None); assert_eq!(idx.position_to_byte(2, 0), None); }
134
135 #[test]
136 fn position_to_byte_checked_excludes_newline_as_next_line_start() {
137 let idx = LineIndex::new("ab\ncd");
139 assert_eq!(idx.position_to_byte_checked(0, 2), Some(2));
141 assert_eq!(idx.position_to_byte_checked(0, 3), None);
143 assert_eq!(idx.position_to_byte_checked(1, 0), Some(3));
144 assert_eq!(idx.position_to_byte_checked(2, 0), None);
145 }
146
147 #[test]
148 fn trailing_newline_creates_empty_last_line() {
149 let idx = LineIndex::new("foo\n");
151 assert_eq!(idx.byte_to_position(3), (0, 3)); assert_eq!(idx.byte_to_position(4), (1, 0)); assert_eq!(idx.position_to_byte(1, 0), Some(4));
154 }
155
156 #[test]
157 fn multiple_lines_roundtrip() {
158 let text = "line0\nline1\nline2";
159 let idx = LineIndex::new(text);
160 for (byte, _) in text.char_indices() {
161 let (line, col) = idx.byte_to_position(byte);
162 assert_eq!(idx.position_to_byte(line, col), Some(byte));
163 }
164 }
165
166 #[test]
167 fn columns_are_byte_offsets_for_multibyte_chars() {
168 let text = "αβ\nγ";
171 let idx = LineIndex::new(text);
172 assert_eq!(idx.byte_to_position(0), (0, 0)); assert_eq!(idx.byte_to_position(2), (0, 2)); assert_eq!(idx.byte_to_position(4), (0, 4)); assert_eq!(idx.byte_to_position(5), (1, 0)); assert_eq!(idx.byte_to_position(6), (1, 1)); assert_eq!(idx.position_to_byte(0, 2), Some(2));
179 assert_eq!(idx.position_to_byte(1, 0), Some(5));
180 }
181
182 #[test]
183 fn four_byte_emoji_at_line_boundary() {
184 let text = "a\n😀b";
187 let idx = LineIndex::new(text);
188 assert_eq!(idx.byte_to_position(2), (1, 0)); assert_eq!(idx.byte_to_position(5), (1, 3)); assert_eq!(idx.byte_to_position(6), (1, 4)); assert_eq!(idx.position_to_byte(1, 0), Some(2));
192 assert_eq!(idx.position_to_byte(1, 4), Some(6));
193 }
194
195 #[test]
196 fn crlf_keeps_carriage_return_on_preceding_line() {
197 let text = "a\r\nb";
199 let idx = LineIndex::new(text);
200 assert_eq!(idx.byte_to_position(0), (0, 0));
201 assert_eq!(idx.byte_to_position(1), (0, 1)); assert_eq!(idx.byte_to_position(2), (0, 2)); assert_eq!(idx.byte_to_position(3), (1, 0)); assert_eq!(idx.position_to_byte_checked(0, 2), Some(2));
206 assert_eq!(idx.position_to_byte_checked(0, 3), None);
207 }
208
209 #[test]
210 fn consecutive_newlines_create_empty_lines() {
211 let text = "\n\n\n";
214 let idx = LineIndex::new(text);
215 assert_eq!(idx.byte_to_position(0), (0, 0)); assert_eq!(idx.byte_to_position(1), (1, 0)); assert_eq!(idx.byte_to_position(2), (2, 0)); assert_eq!(idx.byte_to_position(3), (3, 0)); assert_eq!(idx.position_to_byte(1, 0), Some(1));
221 assert_eq!(idx.position_to_byte(1, 1), None); assert_eq!(idx.position_to_byte(3, 0), Some(3));
223 assert_eq!(idx.position_to_byte(3, 1), None);
224 }
225
226 #[test]
227 fn single_newline_is_two_lines() {
228 let idx = LineIndex::new("\n");
230 assert_eq!(idx.byte_to_position(0), (0, 0));
231 assert_eq!(idx.byte_to_position(1), (1, 0));
232 assert_eq!(idx.position_to_byte(0, 0), Some(0));
233 assert_eq!(idx.position_to_byte(0, 1), None); assert_eq!(idx.position_to_byte(1, 0), Some(1));
235 assert_eq!(idx.position_to_byte_checked(0, 0), Some(0));
236 assert_eq!(idx.position_to_byte_checked(1, 0), Some(1));
237 }
238
239 #[test]
240 fn position_to_byte_checked_at_trailing_empty_line() {
241 let idx = LineIndex::new("foo\n");
243 assert_eq!(idx.position_to_byte_checked(1, 0), Some(4));
244 assert_eq!(idx.position_to_byte_checked(1, 1), None);
245 assert_eq!(idx.position_to_byte_checked(2, 0), None);
246 }
247
248 #[test]
249 fn unicode_roundtrip_at_every_char_boundary() {
250 let text = "héllo\n世界\n🎉end";
253 let idx = LineIndex::new(text);
254 for (byte, _) in text.char_indices() {
255 let (line, col) = idx.byte_to_position(byte);
256 assert_eq!(
257 idx.position_to_byte(line, col),
258 Some(byte),
259 "roundtrip failed at byte {byte}"
260 );
261 }
262 }
263
264 #[test]
265 fn checked_empty_string_origin_is_addressable() -> Result<(), Box<dyn std::error::Error>> {
266 let idx = LineIndex::new("");
267 assert_eq!(idx.position_to_byte_checked(0, 0), Some(0));
268 Ok(())
269 }
270
271 #[test]
272 fn checked_empty_string_col_one_out_of_range() -> Result<(), Box<dyn std::error::Error>> {
273 let idx = LineIndex::new("");
274 assert_eq!(idx.position_to_byte_checked(0, 1), None);
275 Ok(())
276 }
277
278 #[test]
279 fn checked_empty_string_line_one_out_of_range() -> Result<(), Box<dyn std::error::Error>> {
280 let idx = LineIndex::new("");
281 assert_eq!(idx.position_to_byte_checked(1, 0), None);
282 Ok(())
283 }
284
285 #[test]
286 fn checked_single_line_all_columns_in_range() -> Result<(), Box<dyn std::error::Error>> {
287 let idx = LineIndex::new("hello");
288 assert_eq!(idx.position_to_byte_checked(0, 0), Some(0));
289 assert_eq!(idx.position_to_byte_checked(0, 4), Some(4));
290 assert_eq!(idx.position_to_byte_checked(0, 5), Some(5));
291 Ok(())
292 }
293
294 #[test]
295 fn checked_single_line_col_beyond_text_len_is_none() -> Result<(), Box<dyn std::error::Error>> {
296 let idx = LineIndex::new("hello");
297 assert_eq!(idx.position_to_byte_checked(0, 6), None);
298 Ok(())
299 }
300
301 #[test]
302 fn checked_newline_byte_is_last_addressable_on_its_line()
303 -> Result<(), Box<dyn std::error::Error>> {
304 let idx = LineIndex::new("abc\ndef");
305 assert_eq!(idx.position_to_byte_checked(0, 3), Some(3));
306 Ok(())
307 }
308
309 #[test]
310 fn checked_next_line_start_is_not_addressable_on_current_line()
311 -> Result<(), Box<dyn std::error::Error>> {
312 let idx = LineIndex::new("abc\ndef");
313 assert_eq!(idx.position_to_byte_checked(0, 4), None);
314 assert_eq!(idx.position_to_byte_checked(0, 100), None);
315 Ok(())
316 }
317
318 #[test]
319 fn checked_trailing_newline_empty_final_line_origin() -> Result<(), Box<dyn std::error::Error>>
320 {
321 let idx = LineIndex::new("foo\n");
322 assert_eq!(idx.position_to_byte_checked(1, 0), Some(4));
323 Ok(())
324 }
325
326 #[test]
327 fn checked_trailing_newline_empty_final_line_col_one_is_none()
328 -> Result<(), Box<dyn std::error::Error>> {
329 let idx = LineIndex::new("foo\n");
330 assert_eq!(idx.position_to_byte_checked(1, 1), None);
331 Ok(())
332 }
333
334 #[test]
335 fn checked_crlf_cr_byte_addressable_on_its_line() -> Result<(), Box<dyn std::error::Error>> {
336 let idx = LineIndex::new("ab\r\ncd");
337 assert_eq!(idx.position_to_byte_checked(0, 2), Some(2));
338 assert_eq!(idx.position_to_byte_checked(0, 3), Some(3));
339 assert_eq!(idx.position_to_byte_checked(0, 4), None);
340 Ok(())
341 }
342
343 #[test]
344 fn checked_crlf_second_line() -> Result<(), Box<dyn std::error::Error>> {
345 let idx = LineIndex::new("ab\r\ncd");
346 assert_eq!(idx.position_to_byte_checked(1, 0), Some(4));
347 assert_eq!(idx.position_to_byte_checked(1, 1), Some(5));
348 assert_eq!(idx.position_to_byte_checked(1, 2), Some(6));
349 assert_eq!(idx.position_to_byte_checked(1, 3), None);
350 Ok(())
351 }
352
353 #[test]
354 fn checked_unicode_two_byte_char_boundary() -> Result<(), Box<dyn std::error::Error>> {
355 let idx = LineIndex::new("caf\u{00e9}");
356 assert_eq!(idx.position_to_byte_checked(0, 3), Some(3));
357 assert_eq!(idx.position_to_byte_checked(0, 4), Some(4));
358 assert_eq!(idx.position_to_byte_checked(0, 5), Some(5));
359 assert_eq!(idx.position_to_byte_checked(0, 6), None);
360 Ok(())
361 }
362
363 #[test]
364 fn checked_unicode_multiline_second_line_boundary() -> Result<(), Box<dyn std::error::Error>> {
365 let idx = LineIndex::new("a\u{00e9}\nb");
366 assert_eq!(idx.position_to_byte_checked(0, 3), Some(3));
367 assert_eq!(idx.position_to_byte_checked(0, 4), None);
368 assert_eq!(idx.position_to_byte_checked(1, 0), Some(4));
369 assert_eq!(idx.position_to_byte_checked(1, 1), Some(5));
370 assert_eq!(idx.position_to_byte_checked(1, 2), None);
371 Ok(())
372 }
373
374 #[test]
375 fn checked_and_unchecked_agree_on_final_line() -> Result<(), Box<dyn std::error::Error>> {
376 let idx = LineIndex::new("abc\nxyz");
377 for col in 0..=5 {
378 assert_eq!(
379 idx.position_to_byte(1, col),
380 idx.position_to_byte_checked(1, col),
381 "methods diverged at col {col} on final line"
382 );
383 }
384 Ok(())
385 }
386
387 #[test]
388 fn byte_to_position_at_text_len_single_line() -> Result<(), Box<dyn std::error::Error>> {
389 let idx = LineIndex::new("hi");
390 assert_eq!(idx.byte_to_position(2), (0, 2));
391 Ok(())
392 }
393
394 #[test]
395 fn byte_to_position_at_text_len_multiline() -> Result<(), Box<dyn std::error::Error>> {
396 let idx = LineIndex::new("a\nb");
397 assert_eq!(idx.byte_to_position(3), (1, 1));
398 Ok(())
399 }
400
401 #[test]
402 fn clone_preserves_index_state() -> Result<(), Box<dyn std::error::Error>> {
403 let idx = LineIndex::new("foo\nbar");
404 let cloned = idx.clone();
405 assert_eq!(idx.byte_to_position(4), cloned.byte_to_position(4));
406 assert_eq!(idx.position_to_byte(1, 0), cloned.position_to_byte(1, 0));
407 Ok(())
408 }
409
410 #[test]
411 fn debug_format_is_non_empty() -> Result<(), Box<dyn std::error::Error>> {
412 let idx = LineIndex::new("test\ndata");
413 let debug_str = format!("{idx:?}");
414 assert!(!debug_str.is_empty());
415 Ok(())
416 }
417
418 #[test]
419 fn consecutive_newlines_produce_empty_lines() -> Result<(), Box<dyn std::error::Error>> {
420 let idx = LineIndex::new("\n\n");
421 assert_eq!(idx.byte_to_position(0), (0, 0));
422 assert_eq!(idx.byte_to_position(1), (1, 0));
423 assert_eq!(idx.byte_to_position(2), (2, 0));
424 assert_eq!(idx.position_to_byte(0, 0), Some(0));
425 assert_eq!(idx.position_to_byte(1, 0), Some(1));
426 assert_eq!(idx.position_to_byte(2, 0), Some(2));
427 assert_eq!(idx.position_to_byte(3, 0), None);
428 Ok(())
429 }
430}