fission_text_engine/line_index.rs
1//! Efficient mapping between byte offsets and line/column positions.
2//!
3//! [`LineIndex`] is built once from a snapshot of the buffer text and then
4//! queried many times. Rebuilding is cheap (single linear scan) and should
5//! be done after every edit batch.
6
7use ropey::Rope;
8
9/// A `(line, col)` pair — both 0-based, with `col` measured in **bytes**
10/// relative to the start of the line.
11#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
12pub struct LineCol {
13 /// 0-based line number.
14 pub line: usize,
15 /// 0-based column as a **byte** offset from the start of the line.
16 pub col: usize,
17}
18
19/// Precomputed index that maps between byte offsets and `(line, col)` pairs.
20///
21/// It stores the byte offset of the start of every line so that both directions
22/// of the mapping are O(log n) via binary search.
23#[derive(Debug, Clone)]
24pub struct LineIndex {
25 /// `line_starts[i]` is the byte offset of the first byte of line `i`.
26 line_starts: Vec<usize>,
27 /// A copy of the source text used for UTF-16 column translation. Storing
28 /// the full text is intentional: the index is short-lived (rebuilt after
29 /// each edit batch) and avoids lifetime coupling to the buffer.
30 source: String,
31}
32
33impl LineIndex {
34 /// Build a new index from a [`Rope`].
35 pub fn build(rope: &Rope) -> Self {
36 let source: String = rope.chunks().collect();
37 let mut line_starts = vec![0usize];
38 for (i, b) in source.as_bytes().iter().enumerate() {
39 if *b == b'\n' {
40 line_starts.push(i + 1);
41 }
42 }
43 Self {
44 line_starts,
45 source,
46 }
47 }
48
49 /// Build from a plain `&str` (convenience for testing).
50 pub fn build_from_str(text: &str) -> Self {
51 let rope = Rope::from_str(text);
52 Self::build(&rope)
53 }
54
55 // ── Queries ─────────────────────────────────────────────────────────
56
57 /// Number of lines in the indexed text.
58 pub fn line_count(&self) -> usize {
59 self.line_starts.len()
60 }
61
62 /// Convert a `LineCol` to an absolute byte offset.
63 ///
64 /// Returns `None` if the position is out of range.
65 pub fn line_col_to_byte(&self, lc: LineCol) -> Option<usize> {
66 let start = *self.line_starts.get(lc.line)?;
67 let offset = start + lc.col;
68 if offset > self.source.len() {
69 return None;
70 }
71 Some(offset)
72 }
73
74 /// Convert an absolute byte offset to a `LineCol`.
75 ///
76 /// Returns `None` if `byte_offset > source.len()`.
77 pub fn byte_to_line_col(&self, byte_offset: usize) -> Option<LineCol> {
78 if byte_offset > self.source.len() {
79 return None;
80 }
81 // Binary search: find the last line whose start <= byte_offset.
82 let line = match self.line_starts.binary_search(&byte_offset) {
83 Ok(exact) => exact,
84 Err(insert) => insert - 1,
85 };
86 let col = byte_offset - self.line_starts[line];
87 Some(LineCol { line, col })
88 }
89
90 /// Byte offset of the first byte of `line` (0-based).
91 ///
92 /// Returns `None` if `line >= line_count()`.
93 pub fn line_start_byte(&self, line: usize) -> Option<usize> {
94 self.line_starts.get(line).copied()
95 }
96
97 /// Byte offset one past the last byte of `line` (exclusive end).
98 ///
99 /// For the last line this equals `source.len()`. Returns `None` if
100 /// `line >= line_count()`.
101 pub fn line_end_byte(&self, line: usize) -> Option<usize> {
102 if line >= self.line_starts.len() {
103 return None;
104 }
105 if line + 1 < self.line_starts.len() {
106 Some(self.line_starts[line + 1])
107 } else {
108 Some(self.source.len())
109 }
110 }
111
112 /// Convert a **UTF-16 code-unit column** (as used by LSP) to a byte
113 /// offset within the document.
114 ///
115 /// `line` is 0-based. `utf16_col` is the number of UTF-16 code units
116 /// from the start of the line.
117 ///
118 /// Returns `None` if the position cannot be mapped (line out of range or
119 /// column past end of line).
120 pub fn utf16_col_to_byte(&self, line: usize, utf16_col: usize) -> Option<usize> {
121 let line_start = *self.line_starts.get(line)?;
122 let line_end = self.line_end_byte(line)?;
123 let line_text = &self.source[line_start..line_end];
124
125 let mut utf16_units = 0usize;
126 for (byte_idx, ch) in line_text.char_indices() {
127 if utf16_units == utf16_col {
128 return Some(line_start + byte_idx);
129 }
130 utf16_units += ch.len_utf16();
131 }
132 // Allow pointing one-past-end (cursor at EOL).
133 if utf16_units == utf16_col {
134 return Some(line_start + line_text.len());
135 }
136 None
137 }
138
139 /// Convert a byte offset to a **(line, utf16_col)** pair — the inverse of
140 /// [`utf16_col_to_byte`](Self::utf16_col_to_byte).
141 pub fn byte_to_utf16_col(&self, byte_offset: usize) -> Option<(usize, usize)> {
142 let lc = self.byte_to_line_col(byte_offset)?;
143 let line_start = self.line_starts[lc.line];
144 let prefix = &self.source[line_start..byte_offset];
145 let utf16_col: usize = prefix.chars().map(|c| c.len_utf16()).sum();
146 Some((lc.line, utf16_col))
147 }
148}