perl_position_tracking/
mapper.rs1use crate::WirePosition as Position;
10use ropey::Rope;
11use serde_json::Value;
12
13pub struct PositionMapper {
37 rope: Rope,
39 line_ending: LineEnding,
41}
42
43#[derive(Debug, Clone, Copy, PartialEq, Eq)]
45pub enum LineEnding {
46 Lf,
48 CrLf,
50 Cr,
52 Mixed,
54}
55
56impl PositionMapper {
57 pub fn new(text: &str) -> Self {
73 let rope = Rope::from_str(text);
74 let line_ending = detect_line_ending(text);
75 Self { rope, line_ending }
76 }
77
78 pub fn update(&mut self, text: &str) {
80 self.rope = Rope::from_str(text);
81 self.line_ending = detect_line_ending(text);
82 }
83
84 pub fn apply_edit(&mut self, start_byte: usize, end_byte: usize, new_text: &str) {
86 let start_byte = start_byte.min(self.rope.len_bytes());
88 let end_byte = end_byte.min(self.rope.len_bytes());
89
90 let start_char = self.rope.byte_to_char(start_byte);
92 let end_char = self.rope.byte_to_char(end_byte);
93
94 if end_char > start_char {
96 self.rope.remove(start_char..end_char);
97 }
98
99 if !new_text.is_empty() {
101 self.rope.insert(start_char, new_text);
102 }
103
104 self.line_ending = detect_line_ending(&self.rope.to_string());
106 }
107
108 pub fn lsp_pos_to_byte(&self, pos: Position) -> Option<usize> {
124 let line_idx = pos.line as usize;
125 if line_idx >= self.rope.len_lines() {
126 return None;
127 }
128
129 let line_start_byte = self.rope.line_to_byte(line_idx);
130 let line = self.rope.line(line_idx);
131
132 let mut utf16_offset = 0u32;
134 let mut byte_offset = 0;
135
136 for ch in line.chars() {
137 if utf16_offset >= pos.character {
138 break;
139 }
140 let ch_utf16_len = if ch as u32 > 0xFFFF { 2 } else { 1 };
141 utf16_offset += ch_utf16_len;
142 byte_offset += ch.len_utf8();
143 }
144
145 Some(line_start_byte + byte_offset)
146 }
147
148 pub fn byte_to_lsp_pos(&self, byte_offset: usize) -> Position {
163 let byte_offset = byte_offset.min(self.rope.len_bytes());
164
165 let line_idx = self.rope.byte_to_line(byte_offset);
166 let line_start_byte = self.rope.line_to_byte(line_idx);
167 let byte_in_line = byte_offset - line_start_byte;
168
169 let line = self.rope.line(line_idx);
171 let mut utf16_offset = 0u32;
172 let mut current_byte = 0;
173
174 for ch in line.chars() {
175 if current_byte >= byte_in_line {
176 break;
177 }
178 let ch_len = ch.len_utf8();
179 if current_byte + ch_len > byte_in_line {
180 break;
182 }
183 current_byte += ch_len;
184 let ch_utf16_len = if ch as u32 > 0xFFFF { 2 } else { 1 };
185 utf16_offset += ch_utf16_len;
186 }
187
188 Position { line: line_idx as u32, character: utf16_offset }
189 }
190
191 pub fn text(&self) -> String {
193 self.rope.to_string()
194 }
195
196 pub fn slice(&self, start_byte: usize, end_byte: usize) -> String {
198 let start = start_byte.min(self.rope.len_bytes());
199 let end = end_byte.min(self.rope.len_bytes());
200 self.rope.slice(self.rope.byte_to_char(start)..self.rope.byte_to_char(end)).to_string()
201 }
202
203 pub fn len_bytes(&self) -> usize {
205 self.rope.len_bytes()
206 }
207
208 pub fn len_lines(&self) -> usize {
210 self.rope.len_lines()
211 }
212
213 pub fn lsp_pos_to_char(&self, pos: Position) -> Option<usize> {
215 self.lsp_pos_to_byte(pos).map(|byte| self.rope.byte_to_char(byte))
216 }
217
218 pub fn char_to_lsp_pos(&self, char_idx: usize) -> Position {
220 let byte_offset = self.rope.char_to_byte(char_idx);
221 self.byte_to_lsp_pos(byte_offset)
222 }
223
224 pub fn is_empty(&self) -> bool {
226 self.rope.len_bytes() == 0
227 }
228
229 pub fn line_ending(&self) -> LineEnding {
231 self.line_ending
232 }
233}
234
235pub fn json_to_position(pos: &Value) -> Option<Position> {
239 Some(Position {
240 line: pos["line"].as_u64()? as u32,
241 character: pos["character"].as_u64()? as u32,
242 })
243}
244
245pub fn position_to_json(pos: Position) -> Value {
249 serde_json::json!({
250 "line": pos.line,
251 "character": pos.character
252 })
253}
254
255fn detect_line_ending(text: &str) -> LineEnding {
257 let mut crlf_count = 0;
258 let mut lf_count = 0;
259 let mut cr_count = 0;
260
261 let bytes = text.as_bytes();
262 let mut i = 0;
263 while i < bytes.len() {
264 if i + 1 < bytes.len() && bytes[i] == b'\r' && bytes[i + 1] == b'\n' {
265 crlf_count += 1;
266 i += 2;
267 } else if bytes[i] == b'\n' {
268 lf_count += 1;
269 i += 1;
270 } else if bytes[i] == b'\r' {
271 cr_count += 1;
272 i += 1;
273 } else {
274 i += 1;
275 }
276 }
277
278 if crlf_count > 0 && lf_count == 0 && cr_count == 0 {
280 LineEnding::CrLf
281 } else if lf_count > 0 && crlf_count == 0 && cr_count == 0 {
282 LineEnding::Lf
283 } else if cr_count > 0 && crlf_count == 0 && lf_count == 0 {
284 LineEnding::Cr
285 } else if crlf_count > 0 || lf_count > 0 || cr_count > 0 {
286 LineEnding::Mixed
287 } else {
288 LineEnding::Lf }
290}
291
292pub fn apply_edit_utf8(
296 text: &mut String,
297 start_byte: usize,
298 old_end_byte: usize,
299 replacement: &str,
300) {
301 if !text.is_char_boundary(start_byte) || !text.is_char_boundary(old_end_byte) {
302 return;
304 }
305 text.replace_range(start_byte..old_end_byte, replacement);
306}
307
308pub fn newline_count(text: &str) -> usize {
312 text.chars().filter(|&c| c == '\n').count()
313}
314
315pub fn last_line_column_utf8(text: &str) -> u32 {
319 if let Some(last_newline) = text.rfind('\n') {
320 (text.len() - last_newline - 1) as u32
321 } else {
322 text.len() as u32
323 }
324}
325
326#[cfg(test)]
327mod tests {
328 use super::*;
329
330 #[test]
331 fn test_lf_positions() {
332 let text = "line 1\nline 2\nline 3";
333 let mapper = PositionMapper::new(text);
334
335 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 0 }), Some(0));
337 assert_eq!(mapper.byte_to_lsp_pos(0), Position { line: 0, character: 0 });
338
339 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 3 }), Some(3));
341 assert_eq!(mapper.byte_to_lsp_pos(3), Position { line: 0, character: 3 });
342
343 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 1, character: 0 }), Some(7));
345 assert_eq!(mapper.byte_to_lsp_pos(7), Position { line: 1, character: 0 });
346 }
347
348 #[test]
349 fn test_crlf_positions() {
350 let text = "line 1\r\nline 2\r\nline 3";
351 let mapper = PositionMapper::new(text);
352
353 assert_eq!(mapper.line_ending(), LineEnding::CrLf);
354
355 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 1, character: 0 }), Some(8));
357 assert_eq!(mapper.byte_to_lsp_pos(8), Position { line: 1, character: 0 });
358
359 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 2, character: 0 }), Some(16));
361 assert_eq!(mapper.byte_to_lsp_pos(16), Position { line: 2, character: 0 });
362 }
363
364 #[test]
365 fn test_utf16_positions() {
366 let text = "hello 😀 world"; let mapper = PositionMapper::new(text);
368
369 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 6 }), Some(6));
371
372 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 8 }), Some(10)); assert_eq!(mapper.byte_to_lsp_pos(10), Position { line: 0, character: 8 });
377 }
378
379 #[test]
380 fn test_mixed_line_endings() {
381 let text = "line 1\r\nline 2\nline 3\rline 4";
382 let mapper = PositionMapper::new(text);
383
384 assert_eq!(mapper.line_ending(), LineEnding::Mixed);
385
386 assert_eq!(mapper.byte_to_lsp_pos(0), Position { line: 0, character: 0 });
388 assert_eq!(mapper.byte_to_lsp_pos(8), Position { line: 1, character: 0 });
389 assert_eq!(mapper.byte_to_lsp_pos(15), Position { line: 2, character: 0 });
390 assert_eq!(mapper.byte_to_lsp_pos(22), Position { line: 3, character: 0 });
391 }
392
393 #[test]
394 fn test_incremental_edit() {
395 let mut mapper = PositionMapper::new("hello world");
396
397 mapper.apply_edit(6, 11, "Rust");
399 assert_eq!(mapper.text(), "hello Rust");
400
401 mapper.apply_edit(5, 5, " beautiful");
403 assert_eq!(mapper.text(), "hello beautiful Rust");
404
405 mapper.apply_edit(5, 16, " ");
407 assert_eq!(mapper.text(), "hello Rust");
408 }
409}