Skip to main content

fission_text_engine/
line_index.rs

1//! Efficient mapping between byte offsets and line/column positions.
2//!
3//! [`LineIndex`] is built once from a snapshot of the buffer text and then
4//! queried many times.  Rebuilding is cheap (single linear scan) and should
5//! be done after every edit batch.
6
7use ropey::Rope;
8
9/// A `(line, col)` pair — both 0-based, with `col` measured in **bytes**
10/// relative to the start of the line.
11#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
12pub struct LineCol {
13    /// 0-based line number.
14    pub line: usize,
15    /// 0-based column as a **byte** offset from the start of the line.
16    pub col: usize,
17}
18
19/// Precomputed index that maps between byte offsets and `(line, col)` pairs.
20///
21/// It stores the byte offset of the start of every line so that both directions
22/// of the mapping are O(log n) via binary search.
23#[derive(Debug, Clone)]
24pub struct LineIndex {
25    /// `line_starts[i]` is the byte offset of the first byte of line `i`.
26    line_starts: Vec<usize>,
27    /// A copy of the source text used for UTF-16 column translation.  Storing
28    /// the full text is intentional: the index is short-lived (rebuilt after
29    /// each edit batch) and avoids lifetime coupling to the buffer.
30    source: String,
31}
32
33impl LineIndex {
34    /// Build a new index from a [`Rope`].
35    pub fn build(rope: &Rope) -> Self {
36        let source: String = rope.chunks().collect();
37        let mut line_starts = vec![0usize];
38        for (i, b) in source.as_bytes().iter().enumerate() {
39            if *b == b'\n' {
40                line_starts.push(i + 1);
41            }
42        }
43        Self {
44            line_starts,
45            source,
46        }
47    }
48
49    /// Build from a plain `&str` (convenience for testing).
50    pub fn build_from_str(text: &str) -> Self {
51        let rope = Rope::from_str(text);
52        Self::build(&rope)
53    }
54
55    // ── Queries ─────────────────────────────────────────────────────────
56
57    /// Number of lines in the indexed text.
58    pub fn line_count(&self) -> usize {
59        self.line_starts.len()
60    }
61
62    /// Convert a `LineCol` to an absolute byte offset.
63    ///
64    /// Returns `None` if the position is out of range.
65    pub fn line_col_to_byte(&self, lc: LineCol) -> Option<usize> {
66        let start = *self.line_starts.get(lc.line)?;
67        let offset = start + lc.col;
68        if offset > self.source.len() {
69            return None;
70        }
71        Some(offset)
72    }
73
74    /// Convert an absolute byte offset to a `LineCol`.
75    ///
76    /// Returns `None` if `byte_offset > source.len()`.
77    pub fn byte_to_line_col(&self, byte_offset: usize) -> Option<LineCol> {
78        if byte_offset > self.source.len() {
79            return None;
80        }
81        // Binary search: find the last line whose start <= byte_offset.
82        let line = match self.line_starts.binary_search(&byte_offset) {
83            Ok(exact) => exact,
84            Err(insert) => insert - 1,
85        };
86        let col = byte_offset - self.line_starts[line];
87        Some(LineCol { line, col })
88    }
89
90    /// Byte offset of the first byte of `line` (0-based).
91    ///
92    /// Returns `None` if `line >= line_count()`.
93    pub fn line_start_byte(&self, line: usize) -> Option<usize> {
94        self.line_starts.get(line).copied()
95    }
96
97    /// Byte offset one past the last byte of `line` (exclusive end).
98    ///
99    /// For the last line this equals `source.len()`.  Returns `None` if
100    /// `line >= line_count()`.
101    pub fn line_end_byte(&self, line: usize) -> Option<usize> {
102        if line >= self.line_starts.len() {
103            return None;
104        }
105        if line + 1 < self.line_starts.len() {
106            Some(self.line_starts[line + 1])
107        } else {
108            Some(self.source.len())
109        }
110    }
111
112    /// Convert a **UTF-16 code-unit column** (as used by LSP) to a byte
113    /// offset within the document.
114    ///
115    /// `line` is 0-based.  `utf16_col` is the number of UTF-16 code units
116    /// from the start of the line.
117    ///
118    /// Returns `None` if the position cannot be mapped (line out of range or
119    /// column past end of line).
120    pub fn utf16_col_to_byte(&self, line: usize, utf16_col: usize) -> Option<usize> {
121        let line_start = *self.line_starts.get(line)?;
122        let line_end = self.line_end_byte(line)?;
123        let line_text = &self.source[line_start..line_end];
124
125        let mut utf16_units = 0usize;
126        for (byte_idx, ch) in line_text.char_indices() {
127            if utf16_units == utf16_col {
128                return Some(line_start + byte_idx);
129            }
130            utf16_units += ch.len_utf16();
131        }
132        // Allow pointing one-past-end (cursor at EOL).
133        if utf16_units == utf16_col {
134            return Some(line_start + line_text.len());
135        }
136        None
137    }
138
139    /// Convert a byte offset to a **(line, utf16_col)** pair — the inverse of
140    /// [`utf16_col_to_byte`](Self::utf16_col_to_byte).
141    pub fn byte_to_utf16_col(&self, byte_offset: usize) -> Option<(usize, usize)> {
142        let lc = self.byte_to_line_col(byte_offset)?;
143        let line_start = self.line_starts[lc.line];
144        let prefix = &self.source[line_start..byte_offset];
145        let utf16_col: usize = prefix.chars().map(|c| c.len_utf16()).sum();
146        Some((lc.line, utf16_col))
147    }
148}