Skip to main content

editor_core/
line_index.rs

1//! Stage 2: Logical Line Index
2//!
3//! Provides efficient line indexing using Rope data structure, supporting O(log N) access and editing.
4
5use crate::storage::Piece;
6use ropey::Rope;
7
8/// Metadata for a logical line
9#[derive(Debug, Clone)]
10pub struct LineMetadata {
11    /// List of Pieces referenced by this line (fragments may span multiple pieces)
12    pub pieces: Vec<Piece>,
13    /// Fast path flag: whether this is pure ASCII
14    pub is_pure_ascii: bool,
15    /// Byte length of this line
16    pub byte_length: usize,
17    /// Character count of this line
18    pub char_count: usize,
19}
20
21impl LineMetadata {
22    /// Create an empty line metadata record.
23    pub fn new() -> Self {
24        Self {
25            pieces: Vec::new(),
26            is_pure_ascii: true,
27            byte_length: 0,
28            char_count: 0,
29        }
30    }
31
32    /// Build line metadata for a single logical line (no trailing `\n`).
33    pub fn from_text(text: &str) -> Self {
34        let is_pure_ascii = text.is_ascii();
35        Self {
36            pieces: Vec::new(),
37            is_pure_ascii,
38            byte_length: text.len(),
39            char_count: text.chars().count(),
40        }
41    }
42}
43
44impl Default for LineMetadata {
45    fn default() -> Self {
46        Self::new()
47    }
48}
49
50/// Logical line index - implemented using Rope data structure
51///
52/// Rope provides O(log N) line access, insertion, and deletion performance, suitable for large file editing
53#[derive(Clone)]
54pub struct LineIndex {
55    /// Rope data structure that automatically manages line indexing
56    rope: Rope,
57}
58
59impl LineIndex {
60    /// Create a new line index
61    pub fn new() -> Self {
62        Self { rope: Rope::new() }
63    }
64
65    /// Build line index from text
66    pub fn from_text(text: &str) -> Self {
67        Self {
68            rope: Rope::from_str(text),
69        }
70    }
71
72    /// Append a new line
73    pub fn append_line(&mut self, line: LineMetadata) {
74        // Reconstruct text from LineMetadata and append
75        let current_len = self.rope.len_chars();
76
77        // If not the first line, add a newline first
78        if current_len > 0 {
79            self.rope.insert(current_len, "\n");
80        }
81
82        // Add line content (LineMetadata doesn't store actual text, using placeholder here)
83        // Note: This is for backward compatibility, in actual use should call insert() directly
84        let placeholder = "x".repeat(line.char_count);
85        self.rope.insert(self.rope.len_chars(), &placeholder);
86    }
87
88    /// Insert a line at the specified position
89    pub fn insert_line(&mut self, line_number: usize, line: LineMetadata) {
90        if line_number >= self.rope.len_lines() {
91            self.append_line(line);
92            return;
93        }
94
95        // Get character offset at insertion position
96        let insert_pos = self.rope.line_to_char(line_number);
97
98        // Insert new line content
99        let placeholder = "x".repeat(line.char_count);
100        self.rope.insert(insert_pos, &placeholder);
101        self.rope.insert(insert_pos + line.char_count, "\n");
102    }
103
104    /// Delete the specified line
105    pub fn delete_line(&mut self, line_number: usize) {
106        if line_number >= self.rope.len_lines() {
107            return;
108        }
109
110        let start_char = self.rope.line_to_char(line_number);
111        let end_char = if line_number + 1 < self.rope.len_lines() {
112            self.rope.line_to_char(line_number + 1)
113        } else {
114            self.rope.len_chars()
115        };
116
117        self.rope.remove(start_char..end_char);
118    }
119
120    /// Get metadata for the specified line number (simulated)
121    pub fn get_line(&self, line_number: usize) -> Option<LineMetadata> {
122        if line_number >= self.rope.len_lines() {
123            return None;
124        }
125
126        let line = self.rope.line(line_number);
127        let mut text = line.to_string();
128
129        // Remove trailing newline (Rope's line() includes newline)
130        if text.ends_with('\n') {
131            text.pop();
132        }
133        if text.ends_with('\r') {
134            text.pop();
135        }
136
137        Some(LineMetadata::from_text(&text))
138    }
139
140    /// Get mutable reference (Rope doesn't need this method, kept for compatibility)
141    pub fn get_line_mut(&mut self, line_number: usize) -> Option<&mut LineMetadata> {
142        // Rope doesn't support getting mutable references directly, return None
143        let _line_num = line_number;
144        None
145    }
146
147    /// Get byte offset for line number (excluding newlines of previous lines)
148    pub fn line_to_offset(&self, line_number: usize) -> usize {
149        if line_number == 0 {
150            return 0;
151        }
152
153        if line_number >= self.rope.len_lines() {
154            // Return total bytes minus newline count
155            let newline_count = self.rope.len_lines().saturating_sub(1);
156            return self.rope.len_bytes().saturating_sub(newline_count);
157        }
158
159        // Rope's line_to_byte includes all newlines from previous lines
160        // Subtract line_number newlines to match old behavior
161        self.rope
162            .line_to_byte(line_number)
163            .saturating_sub(line_number)
164    }
165
166    /// Get line number from byte offset (offset excludes newlines)
167    pub fn offset_to_line(&self, offset: usize) -> usize {
168        if offset == 0 {
169            return 0;
170        }
171
172        // Need to add back newline count to get actual Rope byte offset
173        // Binary search to find the correct line
174        let mut low = 0;
175        let mut high = self.rope.len_lines();
176
177        while low < high {
178            let mid = (low + high) / 2;
179            let mid_offset = self.line_to_offset(mid);
180
181            if mid_offset < offset {
182                low = mid + 1;
183            } else if mid_offset > offset {
184                high = mid;
185            } else {
186                return mid;
187            }
188        }
189
190        low.saturating_sub(1)
191            .min(self.rope.len_lines().saturating_sub(1))
192    }
193
194    /// Get line number and offset within line from character offset
195    pub fn char_offset_to_position(&self, char_offset: usize) -> (usize, usize) {
196        let char_offset = char_offset.min(self.rope.len_chars());
197
198        let line_idx = self.rope.char_to_line(char_offset);
199        let line_start_char = self.rope.line_to_char(line_idx);
200        let char_in_line = char_offset - line_start_char;
201
202        (line_idx, char_in_line)
203    }
204
205    /// Get character offset from line number and column number
206    pub fn position_to_char_offset(&self, line: usize, column: usize) -> usize {
207        if line >= self.rope.len_lines() {
208            return self.rope.len_chars();
209        }
210
211        let line_start_char = self.rope.line_to_char(line);
212        let line_len = if line + 1 < self.rope.len_lines() {
213            self.rope.line_to_char(line + 1) - line_start_char - 1 // -1 for newline
214        } else {
215            self.rope.len_chars() - line_start_char
216        };
217
218        line_start_char + column.min(line_len)
219    }
220
221    /// Get total line count
222    pub fn line_count(&self) -> usize {
223        self.rope.len_lines()
224    }
225
226    /// Get total byte count
227    pub fn byte_count(&self) -> usize {
228        self.rope.len_bytes()
229    }
230
231    /// Get total character count
232    pub fn char_count(&self) -> usize {
233        self.rope.len_chars()
234    }
235
236    /// Convert a character offset (Unicode scalar values) to a UTF-8 byte offset.
237    ///
238    /// The returned byte offset is clamped to the document length.
239    pub fn char_offset_to_byte_offset(&self, char_offset: usize) -> usize {
240        let char_offset = char_offset.min(self.rope.len_chars());
241        self.rope.char_to_byte(char_offset)
242    }
243
244    /// Convert a UTF-8 byte offset to a character offset (Unicode scalar values).
245    ///
246    /// The returned character offset is clamped to the document length.
247    pub fn byte_offset_to_char_offset(&self, byte_offset: usize) -> usize {
248        let byte_offset = byte_offset.min(self.rope.len_bytes());
249        self.rope.byte_to_char(byte_offset)
250    }
251
252    /// Convert a character offset to `(line, byte_column)` where `byte_column` is measured in UTF-8 bytes.
253    pub fn char_offset_to_line_byte_column(&self, char_offset: usize) -> (usize, usize) {
254        let char_offset = char_offset.min(self.rope.len_chars());
255        let line = self.rope.char_to_line(char_offset);
256        let line_start_char = self.rope.line_to_char(line);
257
258        let line_start_byte = self.rope.char_to_byte(line_start_char);
259        let byte_offset = self.rope.char_to_byte(char_offset);
260        (line, byte_offset.saturating_sub(line_start_byte))
261    }
262
263    /// Insert text (at specified character offset)
264    pub fn insert(&mut self, char_offset: usize, text: &str) {
265        let char_offset = char_offset.min(self.rope.len_chars());
266        self.rope.insert(char_offset, text);
267    }
268
269    /// Delete text range (character offset)
270    pub fn delete(&mut self, start_char: usize, len_chars: usize) {
271        let start_char = start_char.min(self.rope.len_chars());
272        let end_char = (start_char + len_chars).min(self.rope.len_chars());
273
274        if start_char < end_char {
275            self.rope.remove(start_char..end_char);
276        }
277    }
278
279    /// Get complete text
280    pub fn get_text(&self) -> String {
281        self.rope.to_string()
282    }
283
284    /// Get text of the specified line (excluding newline)
285    pub fn get_line_text(&self, line_number: usize) -> Option<String> {
286        if line_number >= self.rope.len_lines() {
287            return None;
288        }
289
290        let mut text = self.rope.line(line_number).to_string();
291
292        // Remove trailing newline
293        if text.ends_with('\n') {
294            text.pop();
295        }
296
297        Some(text)
298    }
299}
300
301impl Default for LineIndex {
302    fn default() -> Self {
303        Self::new()
304    }
305}
306
307#[cfg(test)]
308mod tests {
309    use super::*;
310
311    #[test]
312    fn test_new_line_index() {
313        let index = LineIndex::new();
314        assert_eq!(index.line_count(), 1); // Rope empty document has 1 line
315        assert_eq!(index.byte_count(), 0);
316        assert_eq!(index.char_count(), 0);
317    }
318
319    #[test]
320    fn test_from_text() {
321        let text = "Line 1\nLine 2\nLine 3";
322        let index = LineIndex::from_text(text);
323
324        assert_eq!(index.line_count(), 3);
325        assert_eq!(index.byte_count(), text.len());
326        assert_eq!(index.char_count(), text.chars().count());
327    }
328
329    #[test]
330    fn test_line_to_offset() {
331        let text = "First line\nSecond line\nThird line";
332        let index = LineIndex::from_text(text);
333
334        assert_eq!(index.line_to_offset(0), 0);
335        assert_eq!(index.line_to_offset(1), 10); // "First line" (excluding \n)
336        assert_eq!(index.line_to_offset(2), 21); // "First line" (10) + "Second line" (11) = 21
337    }
338
339    #[test]
340    fn test_offset_to_line() {
341        let text = "First line\nSecond line\nThird line";
342        let index = LineIndex::from_text(text);
343
344        assert_eq!(index.offset_to_line(0), 0);
345        assert_eq!(index.offset_to_line(5), 0);
346        assert_eq!(index.offset_to_line(11), 1);
347        assert_eq!(index.offset_to_line(23), 2);
348    }
349
350    #[test]
351    fn test_char_offset_to_position() {
352        let text = "ABC\nDEF\nGHI";
353        let index = LineIndex::from_text(text);
354
355        assert_eq!(index.char_offset_to_position(0), (0, 0)); // A
356        assert_eq!(index.char_offset_to_position(2), (0, 2)); // C
357        assert_eq!(index.char_offset_to_position(4), (1, 0)); // D
358        assert_eq!(index.char_offset_to_position(8), (2, 0)); // G
359    }
360
361    #[test]
362    fn test_position_to_char_offset() {
363        let text = "ABC\nDEF\nGHI";
364        let index = LineIndex::from_text(text);
365
366        assert_eq!(index.position_to_char_offset(0, 0), 0); // A
367        assert_eq!(index.position_to_char_offset(0, 2), 2); // C
368        assert_eq!(index.position_to_char_offset(1, 0), 4); // D
369        assert_eq!(index.position_to_char_offset(2, 0), 8); // G
370    }
371
372    #[test]
373    fn test_utf8_cjk() {
374        let text = "你好\n世界";
375        let index = LineIndex::from_text(text);
376
377        assert_eq!(index.line_count(), 2);
378        assert_eq!(index.byte_count(), text.len());
379        assert_eq!(index.char_count(), 5); // 5 characters (你好\n世界)
380
381        // First line: "你好"
382        assert_eq!(index.char_offset_to_position(0), (0, 0));
383        assert_eq!(index.char_offset_to_position(1), (0, 1));
384        // Second line: "世界" (newline at character offset 2)
385        assert_eq!(index.char_offset_to_position(3), (1, 0));
386    }
387
388    #[test]
389    fn test_get_line() {
390        let text = "Line 1\nLine 2\nLine 3";
391        let index = LineIndex::from_text(text);
392
393        let line0 = index.get_line(0);
394        assert!(line0.is_some());
395        let meta = line0.unwrap();
396        assert!(meta.is_pure_ascii);
397
398        let line_none = index.get_line(10);
399        assert!(line_none.is_none());
400    }
401
402    #[test]
403    fn test_insert_delete_lines() {
404        let mut index = LineIndex::from_text("Line 1\nLine 2");
405        assert_eq!(index.line_count(), 2);
406
407        index.delete_line(0);
408        assert_eq!(index.line_count(), 1);
409    }
410
411    #[test]
412    fn test_mixed_ascii_cjk() {
413        let text = "Hello 你好\nWorld 世界";
414        let index = LineIndex::from_text(text);
415
416        assert_eq!(index.line_count(), 2);
417        assert!(index.byte_count() > index.char_count());
418    }
419
420    #[test]
421    fn test_large_document() {
422        let mut lines = Vec::new();
423        for i in 0..10000 {
424            lines.push(format!("Line {}", i));
425        }
426        let text = lines.join("\n");
427
428        let index = LineIndex::from_text(&text);
429        assert_eq!(index.line_count(), 10000);
430
431        // Test accessing middle line
432        let line_5000 = index.get_line(5000);
433        assert!(line_5000.is_some());
434    }
435
436    #[test]
437    fn test_insert_text() {
438        let mut index = LineIndex::from_text("Hello World");
439
440        index.insert(6, "Beautiful ");
441        assert_eq!(index.get_text(), "Hello Beautiful World");
442    }
443
444    #[test]
445    fn test_delete_text() {
446        let mut index = LineIndex::from_text("Hello Beautiful World");
447
448        index.delete(6, 10); // Delete "Beautiful "
449        assert_eq!(index.get_text(), "Hello World");
450    }
451
452    #[test]
453    fn test_char_byte_offset_roundtrip() {
454        let text = "a你好\n🌍b";
455        let index = LineIndex::from_text(text);
456
457        for char_offset in 0..=index.char_count() {
458            let byte_offset = index.char_offset_to_byte_offset(char_offset);
459            let recovered = index.byte_offset_to_char_offset(byte_offset);
460            assert_eq!(recovered, char_offset);
461
462            let (line, byte_col) = index.char_offset_to_line_byte_column(char_offset);
463            let line_start_char = index.rope.line_to_char(line);
464            let line_start_byte = index.rope.char_to_byte(line_start_char);
465            assert_eq!(line_start_byte + byte_col, byte_offset);
466        }
467    }
468}