1#![deny(unsafe_code)]
7#![warn(rust_2018_idioms)]
8#![warn(missing_docs)]
9
10#[derive(Clone, Debug)]
12pub struct LineIndex {
13 line_starts: Vec<usize>,
15 text_len: usize,
17}
18
19impl LineIndex {
20 #[must_use]
22 pub fn new(text: &str) -> Self {
23 let mut line_starts = vec![0];
24 for (idx, ch) in text.char_indices() {
25 if ch == '\n' {
26 line_starts.push(idx + 1);
27 }
28 }
29 Self { line_starts, text_len: text.len() }
30 }
31
32 #[must_use]
34 pub fn byte_to_position(&self, byte: usize) -> (usize, usize) {
35 let line = self.line_starts.binary_search(&byte).unwrap_or_else(|i| i.saturating_sub(1));
36 let column = byte - self.line_starts[line];
37 (line, column)
38 }
39
40 #[must_use]
46 pub fn position_to_byte(&self, line: usize, column: usize) -> Option<usize> {
47 let start = *self.line_starts.get(line)?;
48 let line_end = self
52 .line_starts
53 .get(line + 1)
54 .map_or(self.text_len, |next_start| next_start.saturating_sub(1));
55 let max_column = line_end.saturating_sub(start);
56
57 if column > max_column {
58 return None;
59 }
60
61 Some(start + column)
62 }
63
64 #[must_use]
93 pub fn position_to_byte_utf16(&self, text: &str, line: usize, column: usize) -> Option<usize> {
94 let line_start = *self.line_starts.get(line)?;
95 let line_end = self.line_starts.get(line + 1).copied().unwrap_or(self.text_len);
99 let line_text = text.get(line_start..line_end)?;
100
101 let mut utf16_units: usize = 0;
103 for (byte_offset, ch) in line_text.char_indices() {
104 if utf16_units == column {
105 return Some(line_start + byte_offset);
106 }
107 let ch_utf16 = ch.len_utf16();
108 if utf16_units + ch_utf16 > column {
110 return None;
111 }
112 utf16_units += ch_utf16;
113 }
114
115 if utf16_units == column { Some(line_start + line_text.len()) } else { None }
118 }
119
120 #[must_use]
127 pub fn position_to_byte_checked(&self, line: usize, column: usize) -> Option<usize> {
128 let start = *self.line_starts.get(line)?;
129 let line_end = self
132 .line_starts
133 .get(line + 1)
134 .map_or(self.text_len, |next_start| next_start.saturating_sub(1));
135 let max_column = line_end.saturating_sub(start);
136
137 if column > max_column {
138 return None;
139 }
140
141 Some(start + column)
142 }
143}
144
145#[cfg(test)]
146mod tests {
147 use super::*;
148
149 #[test]
150 fn empty_string_has_one_line() {
151 let idx = LineIndex::new("");
152 assert_eq!(idx.byte_to_position(0), (0, 0));
153 assert_eq!(idx.position_to_byte(0, 0), Some(0));
154 assert_eq!(idx.position_to_byte(1, 0), None);
155 }
156
157 #[test]
158 fn single_line_no_newline() {
159 let idx = LineIndex::new("hello");
160 assert_eq!(idx.byte_to_position(0), (0, 0));
161 assert_eq!(idx.byte_to_position(4), (0, 4));
162 assert_eq!(idx.position_to_byte(0, 0), Some(0));
163 assert_eq!(idx.position_to_byte(0, 4), Some(4));
164 assert_eq!(idx.position_to_byte(0, 5), Some(5));
165 assert_eq!(idx.position_to_byte(0, 6), None);
166 }
167
168 #[test]
169 fn two_lines_byte_to_position() {
170 let idx = LineIndex::new("ab\ncd");
172 assert_eq!(idx.byte_to_position(0), (0, 0));
173 assert_eq!(idx.byte_to_position(1), (0, 1));
174 assert_eq!(idx.byte_to_position(2), (0, 2)); assert_eq!(idx.byte_to_position(3), (1, 0));
176 assert_eq!(idx.byte_to_position(4), (1, 1));
177 }
178
179 #[test]
180 fn two_lines_position_to_byte() {
181 let idx = LineIndex::new("ab\ncd");
182 assert_eq!(idx.position_to_byte(0, 0), Some(0));
183 assert_eq!(idx.position_to_byte(0, 2), Some(2)); assert_eq!(idx.position_to_byte(1, 0), Some(3));
185 assert_eq!(idx.position_to_byte(1, 1), Some(4));
186 assert_eq!(idx.position_to_byte(1, 2), Some(5)); assert_eq!(idx.position_to_byte(1, 3), None); assert_eq!(idx.position_to_byte(2, 0), None); }
190
191 #[test]
192 fn position_to_byte_checked_excludes_newline_as_next_line_start() {
193 let idx = LineIndex::new("ab\ncd");
195 assert_eq!(idx.position_to_byte_checked(0, 2), Some(2));
197 assert_eq!(idx.position_to_byte_checked(0, 3), None);
199 assert_eq!(idx.position_to_byte_checked(1, 0), Some(3));
200 assert_eq!(idx.position_to_byte_checked(2, 0), None);
201 }
202
203 #[test]
204 fn trailing_newline_creates_empty_last_line() {
205 let idx = LineIndex::new("foo\n");
207 assert_eq!(idx.byte_to_position(3), (0, 3)); assert_eq!(idx.byte_to_position(4), (1, 0)); assert_eq!(idx.position_to_byte(1, 0), Some(4));
210 }
211
212 #[test]
213 fn multiple_lines_roundtrip() {
214 let text = "line0\nline1\nline2";
215 let idx = LineIndex::new(text);
216 for (byte, _) in text.char_indices() {
217 let (line, col) = idx.byte_to_position(byte);
218 assert_eq!(idx.position_to_byte(line, col), Some(byte));
219 }
220 }
221
222 #[test]
223 fn columns_are_byte_offsets_for_multibyte_chars() {
224 let text = "αβ\nγ";
227 let idx = LineIndex::new(text);
228 assert_eq!(idx.byte_to_position(0), (0, 0)); assert_eq!(idx.byte_to_position(2), (0, 2)); assert_eq!(idx.byte_to_position(4), (0, 4)); assert_eq!(idx.byte_to_position(5), (1, 0)); assert_eq!(idx.byte_to_position(6), (1, 1)); assert_eq!(idx.position_to_byte(0, 2), Some(2));
235 assert_eq!(idx.position_to_byte(1, 0), Some(5));
236 }
237
238 #[test]
239 fn four_byte_emoji_at_line_boundary() {
240 let text = "a\n😀b";
243 let idx = LineIndex::new(text);
244 assert_eq!(idx.byte_to_position(2), (1, 0)); assert_eq!(idx.byte_to_position(5), (1, 3)); assert_eq!(idx.byte_to_position(6), (1, 4)); assert_eq!(idx.position_to_byte(1, 0), Some(2));
248 assert_eq!(idx.position_to_byte(1, 4), Some(6));
249 }
250
251 #[test]
252 fn crlf_keeps_carriage_return_on_preceding_line() {
253 let text = "a\r\nb";
255 let idx = LineIndex::new(text);
256 assert_eq!(idx.byte_to_position(0), (0, 0));
257 assert_eq!(idx.byte_to_position(1), (0, 1)); assert_eq!(idx.byte_to_position(2), (0, 2)); assert_eq!(idx.byte_to_position(3), (1, 0)); assert_eq!(idx.position_to_byte_checked(0, 2), Some(2));
262 assert_eq!(idx.position_to_byte_checked(0, 3), None);
263 }
264
265 #[test]
266 fn consecutive_newlines_create_empty_lines() {
267 let text = "\n\n\n";
270 let idx = LineIndex::new(text);
271 assert_eq!(idx.byte_to_position(0), (0, 0)); assert_eq!(idx.byte_to_position(1), (1, 0)); assert_eq!(idx.byte_to_position(2), (2, 0)); assert_eq!(idx.byte_to_position(3), (3, 0)); assert_eq!(idx.position_to_byte(1, 0), Some(1));
277 assert_eq!(idx.position_to_byte(1, 1), None); assert_eq!(idx.position_to_byte(3, 0), Some(3));
279 assert_eq!(idx.position_to_byte(3, 1), None);
280 }
281
282 #[test]
283 fn single_newline_is_two_lines() {
284 let idx = LineIndex::new("\n");
286 assert_eq!(idx.byte_to_position(0), (0, 0));
287 assert_eq!(idx.byte_to_position(1), (1, 0));
288 assert_eq!(idx.position_to_byte(0, 0), Some(0));
289 assert_eq!(idx.position_to_byte(0, 1), None); assert_eq!(idx.position_to_byte(1, 0), Some(1));
291 assert_eq!(idx.position_to_byte_checked(0, 0), Some(0));
292 assert_eq!(idx.position_to_byte_checked(1, 0), Some(1));
293 }
294
295 #[test]
296 fn position_to_byte_checked_at_trailing_empty_line() {
297 let idx = LineIndex::new("foo\n");
299 assert_eq!(idx.position_to_byte_checked(1, 0), Some(4));
300 assert_eq!(idx.position_to_byte_checked(1, 1), None);
301 assert_eq!(idx.position_to_byte_checked(2, 0), None);
302 }
303
304 #[test]
305 fn unicode_roundtrip_at_every_char_boundary() {
306 let text = "héllo\n世界\n🎉end";
309 let idx = LineIndex::new(text);
310 for (byte, _) in text.char_indices() {
311 let (line, col) = idx.byte_to_position(byte);
312 assert_eq!(
313 idx.position_to_byte(line, col),
314 Some(byte),
315 "roundtrip failed at byte {byte}"
316 );
317 }
318 }
319
320 #[test]
321 fn checked_empty_string_origin_is_addressable() -> Result<(), Box<dyn std::error::Error>> {
322 let idx = LineIndex::new("");
323 assert_eq!(idx.position_to_byte_checked(0, 0), Some(0));
324 Ok(())
325 }
326
327 #[test]
328 fn checked_empty_string_col_one_out_of_range() -> Result<(), Box<dyn std::error::Error>> {
329 let idx = LineIndex::new("");
330 assert_eq!(idx.position_to_byte_checked(0, 1), None);
331 Ok(())
332 }
333
334 #[test]
335 fn checked_empty_string_line_one_out_of_range() -> Result<(), Box<dyn std::error::Error>> {
336 let idx = LineIndex::new("");
337 assert_eq!(idx.position_to_byte_checked(1, 0), None);
338 Ok(())
339 }
340
341 #[test]
342 fn checked_single_line_all_columns_in_range() -> Result<(), Box<dyn std::error::Error>> {
343 let idx = LineIndex::new("hello");
344 assert_eq!(idx.position_to_byte_checked(0, 0), Some(0));
345 assert_eq!(idx.position_to_byte_checked(0, 4), Some(4));
346 assert_eq!(idx.position_to_byte_checked(0, 5), Some(5));
347 Ok(())
348 }
349
350 #[test]
351 fn checked_single_line_col_beyond_text_len_is_none() -> Result<(), Box<dyn std::error::Error>> {
352 let idx = LineIndex::new("hello");
353 assert_eq!(idx.position_to_byte_checked(0, 6), None);
354 Ok(())
355 }
356
357 #[test]
358 fn checked_newline_byte_is_last_addressable_on_its_line()
359 -> Result<(), Box<dyn std::error::Error>> {
360 let idx = LineIndex::new("abc\ndef");
361 assert_eq!(idx.position_to_byte_checked(0, 3), Some(3));
362 Ok(())
363 }
364
365 #[test]
366 fn checked_next_line_start_is_not_addressable_on_current_line()
367 -> Result<(), Box<dyn std::error::Error>> {
368 let idx = LineIndex::new("abc\ndef");
369 assert_eq!(idx.position_to_byte_checked(0, 4), None);
370 assert_eq!(idx.position_to_byte_checked(0, 100), None);
371 Ok(())
372 }
373
374 #[test]
375 fn checked_trailing_newline_empty_final_line_origin() -> Result<(), Box<dyn std::error::Error>>
376 {
377 let idx = LineIndex::new("foo\n");
378 assert_eq!(idx.position_to_byte_checked(1, 0), Some(4));
379 Ok(())
380 }
381
382 #[test]
383 fn checked_trailing_newline_empty_final_line_col_one_is_none()
384 -> Result<(), Box<dyn std::error::Error>> {
385 let idx = LineIndex::new("foo\n");
386 assert_eq!(idx.position_to_byte_checked(1, 1), None);
387 Ok(())
388 }
389
390 #[test]
391 fn checked_crlf_cr_byte_addressable_on_its_line() -> Result<(), Box<dyn std::error::Error>> {
392 let idx = LineIndex::new("ab\r\ncd");
393 assert_eq!(idx.position_to_byte_checked(0, 2), Some(2));
394 assert_eq!(idx.position_to_byte_checked(0, 3), Some(3));
395 assert_eq!(idx.position_to_byte_checked(0, 4), None);
396 Ok(())
397 }
398
399 #[test]
400 fn checked_crlf_second_line() -> Result<(), Box<dyn std::error::Error>> {
401 let idx = LineIndex::new("ab\r\ncd");
402 assert_eq!(idx.position_to_byte_checked(1, 0), Some(4));
403 assert_eq!(idx.position_to_byte_checked(1, 1), Some(5));
404 assert_eq!(idx.position_to_byte_checked(1, 2), Some(6));
405 assert_eq!(idx.position_to_byte_checked(1, 3), None);
406 Ok(())
407 }
408
409 #[test]
410 fn checked_unicode_two_byte_char_boundary() -> Result<(), Box<dyn std::error::Error>> {
411 let idx = LineIndex::new("caf\u{00e9}");
412 assert_eq!(idx.position_to_byte_checked(0, 3), Some(3));
413 assert_eq!(idx.position_to_byte_checked(0, 4), Some(4));
414 assert_eq!(idx.position_to_byte_checked(0, 5), Some(5));
415 assert_eq!(idx.position_to_byte_checked(0, 6), None);
416 Ok(())
417 }
418
419 #[test]
420 fn checked_unicode_multiline_second_line_boundary() -> Result<(), Box<dyn std::error::Error>> {
421 let idx = LineIndex::new("a\u{00e9}\nb");
422 assert_eq!(idx.position_to_byte_checked(0, 3), Some(3));
423 assert_eq!(idx.position_to_byte_checked(0, 4), None);
424 assert_eq!(idx.position_to_byte_checked(1, 0), Some(4));
425 assert_eq!(idx.position_to_byte_checked(1, 1), Some(5));
426 assert_eq!(idx.position_to_byte_checked(1, 2), None);
427 Ok(())
428 }
429
430 #[test]
431 fn checked_and_unchecked_agree_on_final_line() -> Result<(), Box<dyn std::error::Error>> {
432 let idx = LineIndex::new("abc\nxyz");
433 for col in 0..=5 {
434 assert_eq!(
435 idx.position_to_byte(1, col),
436 idx.position_to_byte_checked(1, col),
437 "methods diverged at col {col} on final line"
438 );
439 }
440 Ok(())
441 }
442
443 #[test]
444 fn byte_to_position_at_text_len_single_line() -> Result<(), Box<dyn std::error::Error>> {
445 let idx = LineIndex::new("hi");
446 assert_eq!(idx.byte_to_position(2), (0, 2));
447 Ok(())
448 }
449
450 #[test]
451 fn byte_to_position_at_text_len_multiline() -> Result<(), Box<dyn std::error::Error>> {
452 let idx = LineIndex::new("a\nb");
453 assert_eq!(idx.byte_to_position(3), (1, 1));
454 Ok(())
455 }
456
457 #[test]
458 fn clone_preserves_index_state() -> Result<(), Box<dyn std::error::Error>> {
459 let idx = LineIndex::new("foo\nbar");
460 let cloned = idx.clone();
461 assert_eq!(idx.byte_to_position(4), cloned.byte_to_position(4));
462 assert_eq!(idx.position_to_byte(1, 0), cloned.position_to_byte(1, 0));
463 Ok(())
464 }
465
466 #[test]
467 fn debug_format_is_non_empty() -> Result<(), Box<dyn std::error::Error>> {
468 let idx = LineIndex::new("test\ndata");
469 let debug_str = format!("{idx:?}");
470 assert!(!debug_str.is_empty());
471 Ok(())
472 }
473
474 #[test]
475 fn consecutive_newlines_produce_empty_lines() -> Result<(), Box<dyn std::error::Error>> {
476 let idx = LineIndex::new("\n\n");
477 assert_eq!(idx.byte_to_position(0), (0, 0));
478 assert_eq!(idx.byte_to_position(1), (1, 0));
479 assert_eq!(idx.byte_to_position(2), (2, 0));
480 assert_eq!(idx.position_to_byte(0, 0), Some(0));
481 assert_eq!(idx.position_to_byte(1, 0), Some(1));
482 assert_eq!(idx.position_to_byte(2, 0), Some(2));
483 assert_eq!(idx.position_to_byte(3, 0), None);
484 Ok(())
485 }
486}