Skip to main content

perl_position_tracking/
line_index.rs

1//! Line index for efficient UTF-16 position calculations.
2use ropey::Rope;
3#[derive(Debug, Clone)]
4pub struct LineStartsCache {
5    line_starts: Vec<usize>,
6}
7impl LineStartsCache {
8    pub fn new(text: &str) -> Self {
9        let mut ls = vec![0];
10        let mut i = 0;
11        let b = text.as_bytes();
12        while i < b.len() {
13            if b[i] == b'\n' {
14                ls.push(i + 1);
15            } else if b[i] == b'\r' {
16                if i + 1 < b.len() && b[i + 1] == b'\n' {
17                    ls.push(i + 2);
18                    i += 1;
19                } else {
20                    ls.push(i + 1);
21                }
22            }
23            i += 1;
24        }
25        Self { line_starts: ls }
26    }
27    pub fn new_rope(rope: &Rope) -> Self {
28        let mut ls = vec![0];
29        for li in 0..rope.len_lines() {
30            if li > 0 {
31                ls.push(rope.line_to_byte(li));
32            }
33        }
34        Self { line_starts: ls }
35    }
36    pub fn offset_to_position(&self, text: &str, offset: usize) -> (u32, u32) {
37        let offset = offset.min(text.len());
38        let line = self.line_starts.binary_search(&offset).unwrap_or_else(|i| i.saturating_sub(1));
39        let ls = self.line_starts[line];
40        (line as u32, text[ls..offset].chars().map(|c| c.len_utf16()).sum::<usize>() as u32)
41    }
42    pub fn position_to_offset(&self, text: &str, line: u32, character: u32) -> usize {
43        let line = line as usize;
44        if line >= self.line_starts.len() {
45            return text.len();
46        }
47        let ls = self.line_starts[line];
48        let le = if line + 1 < self.line_starts.len() {
49            let ns = self.line_starts[line + 1];
50            let mut end = ns.saturating_sub(1);
51            let b = text.as_bytes();
52            while end > ls && (b[end] == b'\n' || b[end] == b'\r') {
53                end = end.saturating_sub(1);
54            }
55            end + 1
56        } else {
57            text.len()
58        };
59        let lt = &text[ls..le];
60        let mut uc = 0;
61        let mut bo = 0;
62        for ch in lt.chars() {
63            if uc >= character as usize {
64                break;
65            }
66            uc += ch.len_utf16();
67            bo += ch.len_utf8();
68        }
69        ls + bo.min(lt.len())
70    }
71    pub fn offset_to_position_rope(&self, rope: &Rope, offset: usize) -> (u32, u32) {
72        let offset = offset.min(rope.len_bytes());
73        let line = self.line_starts.binary_search(&offset).unwrap_or_else(|i| i.saturating_sub(1));
74        let ls = self.line_starts[line];
75        (
76            line as u32,
77            rope.byte_slice(ls..offset).chars().map(|c| c.len_utf16()).sum::<usize>() as u32,
78        )
79    }
80    pub fn position_to_offset_rope(&self, rope: &Rope, line: u32, character: u32) -> usize {
81        let line = line as usize;
82        if line >= self.line_starts.len() {
83            return rope.len_bytes();
84        }
85        let ls = self.line_starts[line];
86        let le = if line + 1 < self.line_starts.len() {
87            self.line_starts[line + 1]
88        } else {
89            rope.len_bytes()
90        };
91        let sl = rope.byte_slice(ls..le);
92        let mut uc = 0;
93        let mut bo = 0;
94        for ch in sl.chars() {
95            if uc >= character as usize {
96                break;
97            }
98            uc += ch.len_utf16();
99            bo += ch.len_utf8();
100        }
101        ls + bo
102    }
103}
104
105/// Stores line information for efficient position lookups, owning the text.
106#[derive(Debug, Clone)]
107pub struct LineIndex {
108    /// Byte offset of each line start
109    line_starts: Vec<usize>,
110    /// The source text
111    text: String,
112}
113
114impl LineIndex {
115    /// Create a new LineIndex from source text
116    pub fn new(text: String) -> Self {
117        let mut line_starts = vec![0];
118        for (i, ch) in text.char_indices() {
119            if ch == '\n' {
120                line_starts.push(i + 1);
121            }
122        }
123
124        Self { line_starts, text }
125    }
126
127    /// Convert byte offset to position (0-based line and UTF-16 column)
128    pub fn offset_to_position(&self, offset: usize) -> (u32, u32) {
129        let line = self.line_starts.binary_search(&offset).unwrap_or_else(|i| i.saturating_sub(1));
130
131        let line_start = self.line_starts[line];
132        let column = self.utf16_column(line, offset - line_start);
133
134        (line as u32, column as u32)
135    }
136
137    /// Convert position to byte offset
138    pub fn position_to_offset(&self, line: u32, character: u32) -> Option<usize> {
139        let line = line as usize;
140        if line >= self.line_starts.len() {
141            return None;
142        }
143
144        let line_start = self.line_starts[line];
145        let line_end = if line + 1 < self.line_starts.len() {
146            // Don't subtract 1 - include the newline in the line
147            self.line_starts[line + 1]
148        } else {
149            self.text.len()
150        };
151
152        // Get the full line including newline
153        let line_text = &self.text[line_start..line_end];
154
155        // Find the byte offset for the UTF-16 character position
156        let byte_offset = self.utf16_to_byte_offset(line_text, character as usize)?;
157
158        Some(line_start + byte_offset)
159    }
160
161    /// Get UTF-16 column from byte offset within a line
162    fn utf16_column(&self, line: usize, byte_offset: usize) -> usize {
163        let line_start = self.line_starts[line];
164
165        // Get the text from line start to the target byte offset
166        let target_byte = line_start + byte_offset;
167        if target_byte > self.text.len() {
168            return 0;
169        }
170
171        let line_text = &self.text[line_start..target_byte];
172
173        // Count UTF-16 code units in the substring
174        line_text.chars().map(|ch| ch.len_utf16()).sum()
175    }
176
177    /// Convert UTF-16 offset to byte offset within a line
178    fn utf16_to_byte_offset(&self, line_text: &str, utf16_offset: usize) -> Option<usize> {
179        let mut current_utf16 = 0;
180
181        for (byte_offset, ch) in line_text.char_indices() {
182            if current_utf16 == utf16_offset {
183                return Some(byte_offset);
184            }
185            current_utf16 += ch.len_utf16();
186            if current_utf16 > utf16_offset {
187                // UTF-16 offset is in the middle of a character
188                return None;
189            }
190        }
191
192        // Check if we're at the end of the line
193        if current_utf16 == utf16_offset { Some(line_text.len()) } else { None }
194    }
195
196    /// Create a range from byte offsets
197    pub fn range(&self, start: usize, end: usize) -> ((u32, u32), (u32, u32)) {
198        let start_pos = self.offset_to_position(start);
199        let end_pos = self.offset_to_position(end);
200        (start_pos, end_pos)
201    }
202}