Skip to main content

perl_line_index/
lib.rs

1//! Byte-oriented line/column indexing helpers.
2//!
3//! This crate has one responsibility: map byte offsets to `(line, column)`
4//! and back using cached line starts.
5
6#![deny(unsafe_code)]
7#![warn(rust_2018_idioms)]
8#![warn(missing_docs)]
9#![warn(clippy::all)]
10
11/// Line index for byte <-> (line, col) mapping.
12#[derive(Clone, Debug)]
13pub struct LineIndex {
14    /// Byte offset of each line start.
15    line_starts: Vec<usize>,
16    /// Total UTF-8 byte length of the indexed text.
17    text_len: usize,
18}
19
20impl LineIndex {
21    /// Build a line index from UTF-8 text.
22    #[must_use]
23    pub fn new(text: &str) -> Self {
24        let mut line_starts = vec![0];
25        for (idx, ch) in text.char_indices() {
26            if ch == '\n' {
27                line_starts.push(idx + 1);
28            }
29        }
30        Self { line_starts, text_len: text.len() }
31    }
32
33    /// Convert a byte offset to `(line, column)` using byte columns.
34    #[must_use]
35    pub fn byte_to_position(&self, byte: usize) -> (usize, usize) {
36        let line = self.line_starts.binary_search(&byte).unwrap_or_else(|i| i.saturating_sub(1));
37        let column = byte - self.line_starts[line];
38        (line, column)
39    }
40
41    /// Convert `(line, column)` back to byte offset.
42    ///
43    /// Returns `None` when the line is out of range or when the column extends
44    /// past the end of the line (including the newline character, but not the
45    /// start of the next line).
46    #[must_use]
47    pub fn position_to_byte(&self, line: usize, column: usize) -> Option<usize> {
48        let start = *self.line_starts.get(line)?;
49        // line_end is the last addressable byte on this line (the newline char for
50        // non-final lines, or text_len for the final line).  next_line_start itself
51        // belongs to the *next* line, so we subtract one.
52        let line_end = self
53            .line_starts
54            .get(line + 1)
55            .map_or(self.text_len, |next_start| next_start.saturating_sub(1));
56        let max_column = line_end.saturating_sub(start);
57
58        if column > max_column {
59            return None;
60        }
61
62        Some(start + column)
63    }
64
65    /// Convert `(line, column)` back to byte offset, returning `None` when
66    /// the column crosses the line boundary.
67    ///
68    /// The newline character at the end of a line is the last addressable
69    /// column on that line.  The byte at `next_line_start` belongs to the
70    /// *next* line and is therefore out of range.
71    #[must_use]
72    pub fn position_to_byte_checked(&self, line: usize, column: usize) -> Option<usize> {
73        let start = *self.line_starts.get(line)?;
74        // Subtract one from next_line_start so the newline byte is reachable
75        // but the first byte of the next line is not.
76        let line_end = self
77            .line_starts
78            .get(line + 1)
79            .map_or(self.text_len, |next_start| next_start.saturating_sub(1));
80        let max_column = line_end.saturating_sub(start);
81
82        if column > max_column {
83            return None;
84        }
85
86        Some(start + column)
87    }
88}
89
90#[cfg(test)]
91mod tests {
92    use super::*;
93
94    #[test]
95    fn empty_string_has_one_line() {
96        let idx = LineIndex::new("");
97        assert_eq!(idx.byte_to_position(0), (0, 0));
98        assert_eq!(idx.position_to_byte(0, 0), Some(0));
99        assert_eq!(idx.position_to_byte(1, 0), None);
100    }
101
102    #[test]
103    fn single_line_no_newline() {
104        let idx = LineIndex::new("hello");
105        assert_eq!(idx.byte_to_position(0), (0, 0));
106        assert_eq!(idx.byte_to_position(4), (0, 4));
107        assert_eq!(idx.position_to_byte(0, 0), Some(0));
108        assert_eq!(idx.position_to_byte(0, 4), Some(4));
109        assert_eq!(idx.position_to_byte(0, 5), Some(5));
110        assert_eq!(idx.position_to_byte(0, 6), None);
111    }
112
113    #[test]
114    fn two_lines_byte_to_position() {
115        // "ab\ncd"  bytes: a=0, b=1, \n=2, c=3, d=4
116        let idx = LineIndex::new("ab\ncd");
117        assert_eq!(idx.byte_to_position(0), (0, 0));
118        assert_eq!(idx.byte_to_position(1), (0, 1));
119        assert_eq!(idx.byte_to_position(2), (0, 2)); // the newline is on line 0
120        assert_eq!(idx.byte_to_position(3), (1, 0));
121        assert_eq!(idx.byte_to_position(4), (1, 1));
122    }
123
124    #[test]
125    fn two_lines_position_to_byte() {
126        let idx = LineIndex::new("ab\ncd");
127        assert_eq!(idx.position_to_byte(0, 0), Some(0));
128        assert_eq!(idx.position_to_byte(0, 2), Some(2)); // newline byte
129        assert_eq!(idx.position_to_byte(1, 0), Some(3));
130        assert_eq!(idx.position_to_byte(1, 1), Some(4));
131        assert_eq!(idx.position_to_byte(1, 2), Some(5)); // last line, end of text
132        assert_eq!(idx.position_to_byte(1, 3), None); // beyond text
133        assert_eq!(idx.position_to_byte(2, 0), None); // no third line
134    }
135
136    #[test]
137    fn position_to_byte_checked_excludes_newline_as_next_line_start() {
138        // "ab\ncd"
139        let idx = LineIndex::new("ab\ncd");
140        // Line 0 ends at the newline (byte 2); col 2 = newline byte is still on line 0
141        assert_eq!(idx.position_to_byte_checked(0, 2), Some(2));
142        // col 3 is the first byte of line 1 — out of range for line 0
143        assert_eq!(idx.position_to_byte_checked(0, 3), None);
144        assert_eq!(idx.position_to_byte_checked(1, 0), Some(3));
145        assert_eq!(idx.position_to_byte_checked(2, 0), None);
146    }
147
148    #[test]
149    fn trailing_newline_creates_empty_last_line() {
150        // "foo\n" — line 1 starts at byte 4 and is empty
151        let idx = LineIndex::new("foo\n");
152        assert_eq!(idx.byte_to_position(3), (0, 3)); // newline
153        assert_eq!(idx.byte_to_position(4), (1, 0)); // empty last line start
154        assert_eq!(idx.position_to_byte(1, 0), Some(4));
155    }
156
157    #[test]
158    fn multiple_lines_roundtrip() {
159        let text = "line0\nline1\nline2";
160        let idx = LineIndex::new(text);
161        for (byte, _) in text.char_indices() {
162            let (line, col) = idx.byte_to_position(byte);
163            assert_eq!(idx.position_to_byte(line, col), Some(byte));
164        }
165    }
166}