Skip to main content

editor_core/
line_index.rs

1//! Stage 2: Logical Line Index
2//!
3//! Provides efficient line indexing using Rope data structure, supporting O(log N) access and editing.
4
5use crate::text_buffer::TextBuffer;
6
7/// Metadata for a logical line.
8#[derive(Debug, Clone)]
9pub struct LineMetadata {
10    /// Fast path flag: whether this is pure ASCII.
11    pub is_pure_ascii: bool,
12    /// Byte length of this line.
13    pub byte_length: usize,
14    /// Character count of this line.
15    pub char_count: usize,
16}
17
18impl LineMetadata {
19    /// Create an empty line metadata record.
20    pub fn new() -> Self {
21        Self {
22            is_pure_ascii: true,
23            byte_length: 0,
24            char_count: 0,
25        }
26    }
27
28    /// Build line metadata for a single logical line (no trailing `\n`).
29    pub fn from_text(text: &str) -> Self {
30        let is_pure_ascii = text.is_ascii();
31        Self {
32            is_pure_ascii,
33            byte_length: text.len(),
34            char_count: text.chars().count(),
35        }
36    }
37}
38
39impl Default for LineMetadata {
40    fn default() -> Self {
41        Self::new()
42    }
43}
44
45/// Logical line index - implemented using Rope data structure
46///
47/// Rope provides O(log N) line access, insertion, and deletion performance, suitable for large file editing
48#[derive(Clone)]
49pub struct LineIndex {
50    /// Rope-backed text buffer that also provides line indexing primitives.
51    text_buffer: TextBuffer,
52}
53
54impl LineIndex {
55    /// Create a new line index
56    pub fn new() -> Self {
57        Self {
58            text_buffer: TextBuffer::new(),
59        }
60    }
61
62    /// Build line index from already-normalized internal text.
63    ///
64    /// This low-level constructor does not normalize line endings. Higher-level editor entry points
65    /// normalize CRLF and lone CR to LF before constructing `LineIndex`; direct CRLF input here keeps
66    /// `\r` as ordinary line content.
67    pub fn from_text(text: &str) -> Self {
68        Self {
69            text_buffer: TextBuffer::from_text(text),
70        }
71    }
72
73    /// Return the internal text buffer used as the canonical line-index backing store.
74    pub(crate) fn text_buffer(&self) -> &TextBuffer {
75        &self.text_buffer
76    }
77
78    /// Delete the specified line
79    pub fn delete_line(&mut self, line_number: usize) {
80        if line_number >= self.text_buffer.line_count() {
81            return;
82        }
83
84        let start_char = self.text_buffer.line_to_char(line_number);
85        let end_char = if line_number + 1 < self.text_buffer.line_count() {
86            self.text_buffer.line_to_char(line_number + 1)
87        } else {
88            self.text_buffer.len_chars()
89        };
90
91        self.text_buffer.delete(start_char, end_char - start_char);
92    }
93
94    /// Get metadata for the specified line number (simulated)
95    pub fn get_line(&self, line_number: usize) -> Option<LineMetadata> {
96        let text = self.text_buffer.get_line_text(line_number)?;
97        Some(LineMetadata::from_text(&text))
98    }
99
100    fn legacy_line_to_content_byte_offset(&self, line_number: usize) -> usize {
101        if line_number == 0 {
102            return 0;
103        }
104
105        if line_number >= self.text_buffer.line_count() {
106            // Return total bytes minus newline count
107            let newline_count = self.text_buffer.line_count().saturating_sub(1);
108            return self.text_buffer.len_bytes().saturating_sub(newline_count);
109        }
110
111        // Rope's line_to_byte includes all newlines from previous lines
112        // Subtract line_number newlines to match old behavior
113        self.text_buffer
114            .line_to_byte(line_number)
115            .saturating_sub(line_number)
116    }
117
118    /// Legacy byte offset for a line start, excluding previous LF separator bytes.
119    ///
120    /// Prefer [`LineIndex::position_to_char_offset`] plus
121    /// [`LineIndex::char_offset_to_byte_offset`] for offsets in the canonical document text. This
122    /// compatibility method preserves the older byte-offset convention where previous `\n` bytes are
123    /// not counted. If this index was built directly from CRLF text, `\r` remains line content and is
124    /// counted.
125    #[deprecated(
126        note = "legacy byte offset excludes previous LF separators; use position_to_char_offset plus char_offset_to_byte_offset"
127    )]
128    pub fn line_to_offset(&self, line_number: usize) -> usize {
129        self.legacy_line_to_content_byte_offset(line_number)
130    }
131
132    /// Legacy line lookup from a byte offset that excludes previous LF separator bytes.
133    ///
134    /// Prefer [`LineIndex::byte_offset_to_char_offset`] plus [`LineIndex::char_offset_to_position`]
135    /// for offsets in the canonical document text. This compatibility method uses the same legacy
136    /// convention as [`LineIndex::line_to_offset`].
137    #[deprecated(
138        note = "legacy byte offset excludes previous LF separators; use byte_offset_to_char_offset plus char_offset_to_position"
139    )]
140    pub fn offset_to_line(&self, offset: usize) -> usize {
141        if offset == 0 {
142            return 0;
143        }
144
145        // Need to add back newline count to get actual Rope byte offset
146        // Binary search to find the correct line
147        let mut low = 0;
148        let mut high = self.text_buffer.line_count();
149
150        while low < high {
151            let mid = (low + high) / 2;
152            let mid_offset = self.legacy_line_to_content_byte_offset(mid);
153
154            if mid_offset < offset {
155                low = mid + 1;
156            } else if mid_offset > offset {
157                high = mid;
158            } else {
159                return mid;
160            }
161        }
162
163        low.saturating_sub(1)
164            .min(self.text_buffer.line_count().saturating_sub(1))
165    }
166
167    /// Get line number and offset within line from character offset
168    pub fn char_offset_to_position(&self, char_offset: usize) -> (usize, usize) {
169        self.text_buffer.char_offset_to_position(char_offset)
170    }
171
172    /// Get character offset from line number and column number
173    pub fn position_to_char_offset(&self, line: usize, column: usize) -> usize {
174        self.text_buffer.position_to_char_offset(line, column)
175    }
176
177    /// Get total line count
178    pub fn line_count(&self) -> usize {
179        self.text_buffer.line_count()
180    }
181
182    /// Get total byte count
183    pub fn byte_count(&self) -> usize {
184        self.text_buffer.len_bytes()
185    }
186
187    /// Get total character count
188    pub fn char_count(&self) -> usize {
189        self.text_buffer.len_chars()
190    }
191
192    /// Get the character at the specified character offset (Unicode scalar index).
193    ///
194    /// Returns `None` if `char_offset` is out of bounds.
195    pub fn char_at(&self, char_offset: usize) -> Option<char> {
196        self.text_buffer.char_at(char_offset)
197    }
198
199    /// Convert a character offset (Unicode scalar values) to a UTF-8 byte offset.
200    ///
201    /// The returned byte offset is clamped to the document length.
202    pub fn char_offset_to_byte_offset(&self, char_offset: usize) -> usize {
203        self.text_buffer.char_offset_to_byte_offset(char_offset)
204    }
205
206    /// Convert a UTF-8 byte offset to a character offset (Unicode scalar values).
207    ///
208    /// The returned character offset is clamped to the document length.
209    pub fn byte_offset_to_char_offset(&self, byte_offset: usize) -> usize {
210        self.text_buffer.byte_offset_to_char_offset(byte_offset)
211    }
212
213    /// Convert a character offset to `(line, byte_column)` where `byte_column` is measured in UTF-8 bytes.
214    pub fn char_offset_to_line_byte_column(&self, char_offset: usize) -> (usize, usize) {
215        let char_offset = char_offset.min(self.text_buffer.len_chars());
216        let line = self.text_buffer.char_to_line(char_offset);
217        let line_start_char = self.text_buffer.line_to_char(line);
218
219        let line_start_byte = self.text_buffer.char_offset_to_byte_offset(line_start_char);
220        let byte_offset = self.text_buffer.char_offset_to_byte_offset(char_offset);
221        (line, byte_offset.saturating_sub(line_start_byte))
222    }
223
224    /// Insert text (at specified character offset)
225    pub fn insert(&mut self, char_offset: usize, text: &str) {
226        self.text_buffer.insert(char_offset, text);
227    }
228
229    /// Delete text range (character offset)
230    pub fn delete(&mut self, start_char: usize, len_chars: usize) {
231        self.text_buffer.delete(start_char, len_chars);
232    }
233
234    /// Get complete text
235    pub fn get_text(&self) -> String {
236        self.text_buffer.get_text()
237    }
238
239    /// Get text in the specified character range.
240    pub fn get_range(&self, start_char: usize, len_chars: usize) -> String {
241        self.text_buffer.get_range(start_char, len_chars)
242    }
243
244    /// Get text of the specified line (excluding newline)
245    pub fn get_line_text(&self, line_number: usize) -> Option<String> {
246        self.text_buffer.get_line_text(line_number)
247    }
248}
249
250impl Default for LineIndex {
251    fn default() -> Self {
252        Self::new()
253    }
254}
255
256#[cfg(test)]
257mod tests {
258    use super::*;
259
260    #[test]
261    fn test_new_line_index() {
262        let index = LineIndex::new();
263        assert_eq!(index.line_count(), 1); // Rope empty document has 1 line
264        assert_eq!(index.byte_count(), 0);
265        assert_eq!(index.char_count(), 0);
266    }
267
268    #[test]
269    fn test_from_text() {
270        let text = "Line 1\nLine 2\nLine 3";
271        let index = LineIndex::from_text(text);
272
273        assert_eq!(index.line_count(), 3);
274        assert_eq!(index.byte_count(), text.len());
275        assert_eq!(index.char_count(), text.chars().count());
276    }
277
278    #[test]
279    #[allow(deprecated)]
280    fn test_line_to_offset() {
281        let text = "First line\nSecond line\nThird line";
282        let index = LineIndex::from_text(text);
283
284        assert_eq!(index.line_to_offset(0), 0);
285        assert_eq!(index.line_to_offset(1), 10); // "First line" (excluding \n)
286        assert_eq!(index.line_to_offset(2), 21); // "First line" (10) + "Second line" (11) = 21
287    }
288
289    #[test]
290    #[allow(deprecated)]
291    fn test_offset_to_line() {
292        let text = "First line\nSecond line\nThird line";
293        let index = LineIndex::from_text(text);
294
295        assert_eq!(index.offset_to_line(0), 0);
296        assert_eq!(index.offset_to_line(5), 0);
297        assert_eq!(index.offset_to_line(11), 1);
298        assert_eq!(index.offset_to_line(23), 2);
299    }
300
301    #[test]
302    fn test_char_offset_to_position() {
303        let text = "ABC\nDEF\nGHI";
304        let index = LineIndex::from_text(text);
305
306        assert_eq!(index.char_offset_to_position(0), (0, 0)); // A
307        assert_eq!(index.char_offset_to_position(2), (0, 2)); // C
308        assert_eq!(index.char_offset_to_position(4), (1, 0)); // D
309        assert_eq!(index.char_offset_to_position(8), (2, 0)); // G
310    }
311
312    #[test]
313    fn test_position_to_char_offset() {
314        let text = "ABC\nDEF\nGHI";
315        let index = LineIndex::from_text(text);
316
317        assert_eq!(index.position_to_char_offset(0, 0), 0); // A
318        assert_eq!(index.position_to_char_offset(0, 2), 2); // C
319        assert_eq!(index.position_to_char_offset(1, 0), 4); // D
320        assert_eq!(index.position_to_char_offset(2, 0), 8); // G
321    }
322
323    #[test]
324    fn test_utf8_cjk() {
325        let text = "你好\n世界";
326        let index = LineIndex::from_text(text);
327
328        assert_eq!(index.line_count(), 2);
329        assert_eq!(index.byte_count(), text.len());
330        assert_eq!(index.char_count(), 5); // 5 characters (你好\n世界)
331
332        // First line: "你好"
333        assert_eq!(index.char_offset_to_position(0), (0, 0));
334        assert_eq!(index.char_offset_to_position(1), (0, 1));
335        // Second line: "世界" (newline at character offset 2)
336        assert_eq!(index.char_offset_to_position(3), (1, 0));
337    }
338
339    #[test]
340    fn test_get_line() {
341        let text = "Line 1\nLine 2\nLine 3";
342        let index = LineIndex::from_text(text);
343
344        let line0 = index.get_line(0);
345        assert!(line0.is_some());
346        let meta = line0.unwrap();
347        assert!(meta.is_pure_ascii);
348
349        let line_none = index.get_line(10);
350        assert!(line_none.is_none());
351    }
352
353    #[test]
354    fn test_insert_delete_lines() {
355        let mut index = LineIndex::from_text("Line 1\nLine 2");
356        assert_eq!(index.line_count(), 2);
357
358        index.delete_line(0);
359        assert_eq!(index.line_count(), 1);
360    }
361
362    #[test]
363    fn test_mixed_ascii_cjk() {
364        let text = "Hello 你好\nWorld 世界";
365        let index = LineIndex::from_text(text);
366
367        assert_eq!(index.line_count(), 2);
368        assert!(index.byte_count() > index.char_count());
369    }
370
371    #[test]
372    fn test_large_document() {
373        let mut lines = Vec::new();
374        for i in 0..10000 {
375            lines.push(format!("Line {}", i));
376        }
377        let text = lines.join("\n");
378
379        let index = LineIndex::from_text(&text);
380        assert_eq!(index.line_count(), 10000);
381
382        // Test accessing middle line
383        let line_5000 = index.get_line(5000);
384        assert!(line_5000.is_some());
385    }
386
387    #[test]
388    fn test_insert_text() {
389        let mut index = LineIndex::from_text("Hello World");
390
391        index.insert(6, "Beautiful ");
392        assert_eq!(index.get_text(), "Hello Beautiful World");
393    }
394
395    #[test]
396    fn test_delete_text() {
397        let mut index = LineIndex::from_text("Hello Beautiful World");
398
399        index.delete(6, 10); // Delete "Beautiful "
400        assert_eq!(index.get_text(), "Hello World");
401    }
402
403    #[test]
404    fn test_char_byte_offset_roundtrip() {
405        let text = "a你好\n🌍b";
406        let index = LineIndex::from_text(text);
407
408        for char_offset in 0..=index.char_count() {
409            let byte_offset = index.char_offset_to_byte_offset(char_offset);
410            let recovered = index.byte_offset_to_char_offset(byte_offset);
411            assert_eq!(recovered, char_offset);
412
413            let (line, byte_col) = index.char_offset_to_line_byte_column(char_offset);
414            let line_start_char = index.position_to_char_offset(line, 0);
415            let line_start_byte = index.char_offset_to_byte_offset(line_start_char);
416            assert_eq!(line_start_byte + byte_col, byte_offset);
417        }
418    }
419}