perl_position_tracking/
line_index.rs1use ropey::Rope;
3
4#[inline]
6fn is_utf8_continuation(b: u8) -> bool {
7 (b & 0b1100_0000) == 0b1000_0000
8}
9
10#[derive(Debug, Clone)]
12pub struct LineStartsCache {
13 line_starts: Vec<usize>,
14}
15impl LineStartsCache {
16 fn normalize_text_offset(text: &str, offset: usize) -> usize {
18 let mut normalized = offset.min(text.len());
19 while normalized > 0 && !text.is_char_boundary(normalized) {
20 normalized -= 1;
21 }
22 normalized
23 }
24
25 pub fn new(text: &str) -> Self {
27 let mut ls = vec![0];
28 let mut i = 0;
29 let b = text.as_bytes();
30 while i < b.len() {
31 if b[i] == b'\n' {
32 ls.push(i + 1);
33 } else if b[i] == b'\r' {
34 if i + 1 < b.len() && b[i + 1] == b'\n' {
35 ls.push(i + 2);
36 i += 1;
37 } else {
38 ls.push(i + 1);
39 }
40 }
41 i += 1;
42 }
43 Self { line_starts: ls }
44 }
45
46 pub fn new_rope(rope: &Rope) -> Self {
48 let mut ls = vec![0];
49 for li in 0..rope.len_lines() {
50 if li > 0 {
51 ls.push(rope.line_to_byte(li));
52 }
53 }
54 Self { line_starts: ls }
55 }
56
57 pub fn offset_to_position(&self, text: &str, offset: usize) -> (u32, u32) {
59 let offset = Self::normalize_text_offset(text, offset);
60 let line = self.line_starts.binary_search(&offset).unwrap_or_else(|i| i.saturating_sub(1));
61 let ls = self.line_starts[line];
62 (line as u32, text[ls..offset].chars().map(|c| c.len_utf16()).sum::<usize>() as u32)
63 }
64
65 pub fn position_to_offset(&self, text: &str, line: u32, character: u32) -> usize {
67 let line = line as usize;
68 if line >= self.line_starts.len() {
69 return text.len();
70 }
71 let ls = self.line_starts[line];
72 let le = if line + 1 < self.line_starts.len() {
73 let ns = self.line_starts[line + 1];
74 let mut end = ns.saturating_sub(1);
75 let b = text.as_bytes();
76 while end > ls && (b[end] == b'\n' || b[end] == b'\r') {
77 end = end.saturating_sub(1);
78 }
79 end + 1
80 } else {
81 text.len()
82 };
83 let lt = &text[ls..le];
84 let mut uc = 0;
85 let mut bo = 0;
86 for ch in lt.chars() {
87 if uc >= character as usize {
88 break;
89 }
90 uc += ch.len_utf16();
91 bo += ch.len_utf8();
92 }
93 ls + bo.min(lt.len())
94 }
95
96 pub fn offset_to_position_rope(&self, rope: &Rope, offset: usize) -> (u32, u32) {
98 let offset = Self::normalize_rope_offset(rope, offset);
99 let line = self.line_starts.binary_search(&offset).unwrap_or_else(|i| i.saturating_sub(1));
100 let ls = self.line_starts[line];
101 (
102 line as u32,
103 rope.byte_slice(ls..offset).chars().map(|c| c.len_utf16()).sum::<usize>() as u32,
104 )
105 }
106
107 fn normalize_rope_offset(rope: &Rope, offset: usize) -> usize {
116 let len = rope.len_bytes();
117 let mut normalized = offset.min(len);
118 while normalized > 0 && normalized < len && is_utf8_continuation(rope.byte(normalized)) {
119 normalized -= 1;
120 }
121 normalized
122 }
123
124 pub fn position_to_offset_rope(&self, rope: &Rope, line: u32, character: u32) -> usize {
126 let line = line as usize;
127 if line >= self.line_starts.len() {
128 return rope.len_bytes();
129 }
130 let ls = self.line_starts[line];
131 let le = if line + 1 < self.line_starts.len() {
132 self.line_starts[line + 1]
133 } else {
134 rope.len_bytes()
135 };
136 let sl = rope.byte_slice(ls..le);
137 let mut uc = 0;
138 let mut bo = 0;
139 for ch in sl.chars() {
140 if uc >= character as usize {
141 break;
142 }
143 uc += ch.len_utf16();
144 bo += ch.len_utf8();
145 }
146 ls + bo
147 }
148}
149
150#[derive(Debug, Clone)]
152pub struct LineIndex {
153 line_starts: Vec<usize>,
155 text: String,
157}
158
159impl LineIndex {
160 pub fn new(text: String) -> Self {
162 let mut line_starts = vec![0];
163 let bytes = text.as_bytes();
164 let mut i = 0;
165 while i < bytes.len() {
166 if bytes[i] == b'\n' {
167 line_starts.push(i + 1);
168 } else if bytes[i] == b'\r' {
169 if i + 1 < bytes.len() && bytes[i + 1] == b'\n' {
170 line_starts.push(i + 2);
171 i += 1;
172 } else {
173 line_starts.push(i + 1);
174 }
175 }
176 i += 1;
177 }
178
179 Self { line_starts, text }
180 }
181
182 pub fn offset_to_position(&self, offset: usize) -> (u32, u32) {
184 let offset = self.normalize_offset(offset);
185 let line = self.line_starts.binary_search(&offset).unwrap_or_else(|i| i.saturating_sub(1));
186
187 let line_start = self.line_starts[line];
188 let column = self.utf16_column(line, offset - line_start);
189
190 (line as u32, column as u32)
191 }
192
193 pub fn position_to_offset(&self, line: u32, character: u32) -> Option<usize> {
195 let line = line as usize;
196 if line >= self.line_starts.len() {
197 return None;
198 }
199
200 let line_start = self.line_starts[line];
201 let line_end = if line + 1 < self.line_starts.len() {
202 self.line_starts[line + 1]
204 } else {
205 self.text.len()
206 };
207
208 let line_text = &self.text[line_start..line_end];
210
211 let byte_offset = self.utf16_to_byte_offset(line_text, character as usize)?;
213
214 Some(line_start + byte_offset)
215 }
216
217 fn utf16_column(&self, line: usize, byte_offset: usize) -> usize {
219 let line_start = self.line_starts[line];
220
221 let target_byte = line_start + byte_offset;
223 if target_byte > self.text.len() {
224 return 0;
225 }
226
227 let line_text = &self.text[line_start..target_byte];
228
229 line_text.chars().map(|ch| ch.len_utf16()).sum()
231 }
232
233 fn utf16_to_byte_offset(&self, line_text: &str, utf16_offset: usize) -> Option<usize> {
235 let mut current_utf16 = 0;
236
237 for (byte_offset, ch) in line_text.char_indices() {
238 if current_utf16 == utf16_offset {
239 return Some(byte_offset);
240 }
241 current_utf16 += ch.len_utf16();
242 if current_utf16 > utf16_offset {
243 return None;
245 }
246 }
247
248 if current_utf16 == utf16_offset { Some(line_text.len()) } else { None }
250 }
251
252 fn normalize_offset(&self, offset: usize) -> usize {
254 let mut normalized = offset.min(self.text.len());
255 while normalized > 0 && !self.text.is_char_boundary(normalized) {
256 normalized -= 1;
257 }
258 normalized
259 }
260
261 pub fn range(&self, start: usize, end: usize) -> ((u32, u32), (u32, u32)) {
263 let start_pos = self.offset_to_position(start);
264 let end_pos = self.offset_to_position(end);
265 (start_pos, end_pos)
266 }
267}