Skip to main content

perl_line_index/
lib.rs

1//! Byte-oriented line/column indexing helpers.
2//!
3//! This crate has one responsibility: map byte offsets to `(line, column)`
4//! and back using cached line starts.
5
6#![deny(unsafe_code)]
7#![warn(rust_2018_idioms)]
8#![warn(missing_docs)]
9
10/// Line index for byte <-> (line, col) mapping.
11#[derive(Clone, Debug)]
12pub struct LineIndex {
13    /// Byte offset of each line start.
14    line_starts: Vec<usize>,
15    /// Total UTF-8 byte length of the indexed text.
16    text_len: usize,
17}
18
19impl LineIndex {
20    /// Build a line index from UTF-8 text.
21    #[must_use]
22    pub fn new(text: &str) -> Self {
23        let mut line_starts = vec![0];
24        for (idx, ch) in text.char_indices() {
25            if ch == '\n' {
26                line_starts.push(idx + 1);
27            }
28        }
29        Self { line_starts, text_len: text.len() }
30    }
31
32    /// Convert a byte offset to `(line, column)` using byte columns.
33    #[must_use]
34    pub fn byte_to_position(&self, byte: usize) -> (usize, usize) {
35        let line = self.line_starts.binary_search(&byte).unwrap_or_else(|i| i.saturating_sub(1));
36        let column = byte - self.line_starts[line];
37        (line, column)
38    }
39
40    /// Convert `(line, column)` back to byte offset.
41    ///
42    /// Returns `None` when the line is out of range or when the column extends
43    /// past the end of the line (including the newline character, but not the
44    /// start of the next line).
45    #[must_use]
46    pub fn position_to_byte(&self, line: usize, column: usize) -> Option<usize> {
47        let start = *self.line_starts.get(line)?;
48        // line_end is the last addressable byte on this line (the newline char for
49        // non-final lines, or text_len for the final line).  next_line_start itself
50        // belongs to the *next* line, so we subtract one.
51        let line_end = self
52            .line_starts
53            .get(line + 1)
54            .map_or(self.text_len, |next_start| next_start.saturating_sub(1));
55        let max_column = line_end.saturating_sub(start);
56
57        if column > max_column {
58            return None;
59        }
60
61        Some(start + column)
62    }
63
64    /// Convert `(line, column)` back to byte offset, returning `None` when
65    /// the column crosses the line boundary.
66    ///
67    /// The newline character at the end of a line is the last addressable
68    /// column on that line.  The byte at `next_line_start` belongs to the
69    /// *next* line and is therefore out of range.
70    #[must_use]
71    pub fn position_to_byte_checked(&self, line: usize, column: usize) -> Option<usize> {
72        let start = *self.line_starts.get(line)?;
73        // Subtract one from next_line_start so the newline byte is reachable
74        // but the first byte of the next line is not.
75        let line_end = self
76            .line_starts
77            .get(line + 1)
78            .map_or(self.text_len, |next_start| next_start.saturating_sub(1));
79        let max_column = line_end.saturating_sub(start);
80
81        if column > max_column {
82            return None;
83        }
84
85        Some(start + column)
86    }
87}
88
89#[cfg(test)]
90mod tests {
91    use super::*;
92
93    #[test]
94    fn empty_string_has_one_line() {
95        let idx = LineIndex::new("");
96        assert_eq!(idx.byte_to_position(0), (0, 0));
97        assert_eq!(idx.position_to_byte(0, 0), Some(0));
98        assert_eq!(idx.position_to_byte(1, 0), None);
99    }
100
101    #[test]
102    fn single_line_no_newline() {
103        let idx = LineIndex::new("hello");
104        assert_eq!(idx.byte_to_position(0), (0, 0));
105        assert_eq!(idx.byte_to_position(4), (0, 4));
106        assert_eq!(idx.position_to_byte(0, 0), Some(0));
107        assert_eq!(idx.position_to_byte(0, 4), Some(4));
108        assert_eq!(idx.position_to_byte(0, 5), Some(5));
109        assert_eq!(idx.position_to_byte(0, 6), None);
110    }
111
112    #[test]
113    fn two_lines_byte_to_position() {
114        // "ab\ncd"  bytes: a=0, b=1, \n=2, c=3, d=4
115        let idx = LineIndex::new("ab\ncd");
116        assert_eq!(idx.byte_to_position(0), (0, 0));
117        assert_eq!(idx.byte_to_position(1), (0, 1));
118        assert_eq!(idx.byte_to_position(2), (0, 2)); // the newline is on line 0
119        assert_eq!(idx.byte_to_position(3), (1, 0));
120        assert_eq!(idx.byte_to_position(4), (1, 1));
121    }
122
123    #[test]
124    fn two_lines_position_to_byte() {
125        let idx = LineIndex::new("ab\ncd");
126        assert_eq!(idx.position_to_byte(0, 0), Some(0));
127        assert_eq!(idx.position_to_byte(0, 2), Some(2)); // newline byte
128        assert_eq!(idx.position_to_byte(1, 0), Some(3));
129        assert_eq!(idx.position_to_byte(1, 1), Some(4));
130        assert_eq!(idx.position_to_byte(1, 2), Some(5)); // last line, end of text
131        assert_eq!(idx.position_to_byte(1, 3), None); // beyond text
132        assert_eq!(idx.position_to_byte(2, 0), None); // no third line
133    }
134
135    #[test]
136    fn position_to_byte_checked_excludes_newline_as_next_line_start() {
137        // "ab\ncd"
138        let idx = LineIndex::new("ab\ncd");
139        // Line 0 ends at the newline (byte 2); col 2 = newline byte is still on line 0
140        assert_eq!(idx.position_to_byte_checked(0, 2), Some(2));
141        // col 3 is the first byte of line 1 — out of range for line 0
142        assert_eq!(idx.position_to_byte_checked(0, 3), None);
143        assert_eq!(idx.position_to_byte_checked(1, 0), Some(3));
144        assert_eq!(idx.position_to_byte_checked(2, 0), None);
145    }
146
147    #[test]
148    fn trailing_newline_creates_empty_last_line() {
149        // "foo\n" — line 1 starts at byte 4 and is empty
150        let idx = LineIndex::new("foo\n");
151        assert_eq!(idx.byte_to_position(3), (0, 3)); // newline
152        assert_eq!(idx.byte_to_position(4), (1, 0)); // empty last line start
153        assert_eq!(idx.position_to_byte(1, 0), Some(4));
154    }
155
156    #[test]
157    fn multiple_lines_roundtrip() {
158        let text = "line0\nline1\nline2";
159        let idx = LineIndex::new(text);
160        for (byte, _) in text.char_indices() {
161            let (line, col) = idx.byte_to_position(byte);
162            assert_eq!(idx.position_to_byte(line, col), Some(byte));
163        }
164    }
165}