1use crate::WirePosition as Position;
10use ropey::Rope;
11use serde_json::Value;
12
13pub struct PositionMapper {
37 rope: Rope,
39 line_ending: LineEnding,
41}
42
43#[derive(Debug, Clone, Copy, PartialEq, Eq)]
45pub enum LineEnding {
46 Lf,
48 CrLf,
50 Cr,
52 Mixed,
54}
55
56impl PositionMapper {
57 pub fn new(text: &str) -> Self {
73 let rope = Rope::from_str(text);
74 let line_ending = detect_line_ending(text);
75 Self { rope, line_ending }
76 }
77
78 pub fn update(&mut self, text: &str) {
80 self.rope = Rope::from_str(text);
81 self.line_ending = detect_line_ending(text);
82 }
83
84 pub fn apply_edit(&mut self, start_byte: usize, end_byte: usize, new_text: &str) {
86 let start_byte = start_byte.min(self.rope.len_bytes());
88 let end_byte = end_byte.min(self.rope.len_bytes());
89
90 let start_char = self.rope.byte_to_char(start_byte);
92 let end_char = self.rope.byte_to_char(end_byte);
93
94 if end_char > start_char {
96 self.rope.remove(start_char..end_char);
97 }
98
99 if !new_text.is_empty() {
101 self.rope.insert(start_char, new_text);
102 }
103
104 self.line_ending = detect_line_ending(&self.rope.to_string());
106 }
107
108 pub fn lsp_pos_to_byte(&self, pos: Position) -> Option<usize> {
124 let line_idx = pos.line as usize;
125 if line_idx >= self.rope.len_lines() {
126 return None;
127 }
128
129 let line_start_byte = self.rope.line_to_byte(line_idx);
130 let line = self.rope.line(line_idx);
131
132 let mut utf16_offset = 0u32;
134 let mut byte_offset = 0;
135
136 for ch in line.chars() {
137 if utf16_offset >= pos.character {
138 break;
139 }
140
141 let ch_utf16_len = if ch as u32 > 0xFFFF { 2 } else { 1 };
142 let next_utf16 = utf16_offset + ch_utf16_len;
143
144 if next_utf16 > pos.character {
147 break;
148 }
149
150 utf16_offset = next_utf16;
151 byte_offset += ch.len_utf8();
152 }
153
154 Some(line_start_byte + byte_offset)
155 }
156
157 pub fn byte_to_lsp_pos(&self, byte_offset: usize) -> Position {
172 let byte_offset = byte_offset.min(self.rope.len_bytes());
173
174 let line_idx = self.rope.byte_to_line(byte_offset);
175 let line_start_byte = self.rope.line_to_byte(line_idx);
176 let byte_in_line = byte_offset - line_start_byte;
177
178 let line = self.rope.line(line_idx);
180 let mut utf16_offset = 0u32;
181 let mut current_byte = 0;
182
183 for ch in line.chars() {
184 if current_byte >= byte_in_line {
185 break;
186 }
187 let ch_len = ch.len_utf8();
188 if current_byte + ch_len > byte_in_line {
189 break;
191 }
192 current_byte += ch_len;
193 let ch_utf16_len = if ch as u32 > 0xFFFF { 2 } else { 1 };
194 utf16_offset += ch_utf16_len;
195 }
196
197 Position { line: line_idx as u32, character: utf16_offset }
198 }
199
200 pub fn text(&self) -> String {
202 self.rope.to_string()
203 }
204
205 pub fn slice(&self, start_byte: usize, end_byte: usize) -> String {
207 let start = start_byte.min(self.rope.len_bytes());
208 let end = end_byte.min(self.rope.len_bytes());
209 self.rope.slice(self.rope.byte_to_char(start)..self.rope.byte_to_char(end)).to_string()
210 }
211
212 pub fn len_bytes(&self) -> usize {
214 self.rope.len_bytes()
215 }
216
217 pub fn len_lines(&self) -> usize {
219 self.rope.len_lines()
220 }
221
222 pub fn lsp_pos_to_char(&self, pos: Position) -> Option<usize> {
224 self.lsp_pos_to_byte(pos).map(|byte| self.rope.byte_to_char(byte))
225 }
226
227 pub fn char_to_lsp_pos(&self, char_idx: usize) -> Position {
229 let byte_offset = self.rope.char_to_byte(char_idx);
230 self.byte_to_lsp_pos(byte_offset)
231 }
232
233 pub fn is_empty(&self) -> bool {
235 self.rope.len_bytes() == 0
236 }
237
238 pub fn line_ending(&self) -> LineEnding {
240 self.line_ending
241 }
242}
243
244pub fn json_to_position(pos: &Value) -> Option<Position> {
248 Some(Position {
249 line: pos["line"].as_u64()? as u32,
250 character: pos["character"].as_u64()? as u32,
251 })
252}
253
254pub fn position_to_json(pos: Position) -> Value {
258 serde_json::json!({
259 "line": pos.line,
260 "character": pos.character
261 })
262}
263
264fn detect_line_ending(text: &str) -> LineEnding {
266 let mut crlf_count = 0;
267 let mut lf_count = 0;
268 let mut cr_count = 0;
269
270 let bytes = text.as_bytes();
271 let mut i = 0;
272 while i < bytes.len() {
273 if i + 1 < bytes.len() && bytes[i] == b'\r' && bytes[i + 1] == b'\n' {
274 crlf_count += 1;
275 i += 2;
276 } else if bytes[i] == b'\n' {
277 lf_count += 1;
278 i += 1;
279 } else if bytes[i] == b'\r' {
280 cr_count += 1;
281 i += 1;
282 } else {
283 i += 1;
284 }
285 }
286
287 if crlf_count > 0 && lf_count == 0 && cr_count == 0 {
289 LineEnding::CrLf
290 } else if lf_count > 0 && crlf_count == 0 && cr_count == 0 {
291 LineEnding::Lf
292 } else if cr_count > 0 && crlf_count == 0 && lf_count == 0 {
293 LineEnding::Cr
294 } else if crlf_count > 0 || lf_count > 0 || cr_count > 0 {
295 LineEnding::Mixed
296 } else {
297 LineEnding::Lf }
299}
300
301pub fn apply_edit_utf8(
305 text: &mut String,
306 start_byte: usize,
307 old_end_byte: usize,
308 replacement: &str,
309) {
310 if !text.is_char_boundary(start_byte) || !text.is_char_boundary(old_end_byte) {
311 return;
313 }
314 text.replace_range(start_byte..old_end_byte, replacement);
315}
316
317pub fn newline_count(text: &str) -> usize {
321 text.chars().filter(|&c| c == '\n').count()
322}
323
324pub fn last_line_column_utf8(text: &str) -> u32 {
328 if let Some(last_newline) = text.rfind('\n') {
329 (text.len() - last_newline - 1) as u32
330 } else {
331 text.len() as u32
332 }
333}
334
335#[cfg(test)]
336mod tests {
337 use super::*;
338 use perl_tdd_support::must_some;
339
340 #[test]
341 fn test_lf_positions() {
342 let text = "line 1\nline 2\nline 3";
343 let mapper = PositionMapper::new(text);
344
345 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 0 }), Some(0));
347 assert_eq!(mapper.byte_to_lsp_pos(0), Position { line: 0, character: 0 });
348
349 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 3 }), Some(3));
351 assert_eq!(mapper.byte_to_lsp_pos(3), Position { line: 0, character: 3 });
352
353 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 1, character: 0 }), Some(7));
355 assert_eq!(mapper.byte_to_lsp_pos(7), Position { line: 1, character: 0 });
356 }
357
358 #[test]
359 fn test_crlf_positions() {
360 let text = "line 1\r\nline 2\r\nline 3";
361 let mapper = PositionMapper::new(text);
362
363 assert_eq!(mapper.line_ending(), LineEnding::CrLf);
364
365 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 1, character: 0 }), Some(8));
367 assert_eq!(mapper.byte_to_lsp_pos(8), Position { line: 1, character: 0 });
368
369 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 2, character: 0 }), Some(16));
371 assert_eq!(mapper.byte_to_lsp_pos(16), Position { line: 2, character: 0 });
372 }
373
374 #[test]
375 fn test_utf16_positions() {
376 let text = "hello 😀 world"; let mapper = PositionMapper::new(text);
378
379 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 6 }), Some(6));
381
382 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 8 }), Some(10)); assert_eq!(mapper.byte_to_lsp_pos(10), Position { line: 0, character: 8 });
387 }
388
389 #[test]
390 fn test_utf16_positions_clamp_mid_surrogate_to_char_start() {
391 let text = "a😀b";
392 let mapper = PositionMapper::new(text);
393
394 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 2 }), Some(1));
396 }
397
398 #[test]
399 fn test_utf16_surrogate_pair_boundaries() {
400 let text = "x💖y";
404 let mapper = PositionMapper::new(text);
405
406 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 0 }), Some(0));
408 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 1 }), Some(1));
409
410 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 2 }), Some(1));
413
414 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 3 }), Some(5));
416
417 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 4 }), Some(6));
419 }
420
421 #[test]
422 fn test_utf16_max_code_point() {
423 let max_char = '\u{10FFFF}';
426 let text = format!("a{max_char}b");
427 let mapper = PositionMapper::new(&text);
428
429 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 0 }), Some(0));
431 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 1 }), Some(1));
432 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 2 }), Some(1));
434 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 3 }), Some(5));
435 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 4 }), Some(6));
436
437 assert_eq!(mapper.byte_to_lsp_pos(0), Position { line: 0, character: 0 });
439 assert_eq!(mapper.byte_to_lsp_pos(1), Position { line: 0, character: 1 });
440 assert_eq!(mapper.byte_to_lsp_pos(5), Position { line: 0, character: 3 });
441 assert_eq!(mapper.byte_to_lsp_pos(6), Position { line: 0, character: 4 });
442 }
443
444 #[test]
445 fn test_utf16_mixed_bmp_and_supplementary_plane() {
446 let text = "aé💖ñ🎉b";
451 let mapper = PositionMapper::new(text);
452
453 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 0 }), Some(0)); assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 1 }), Some(1)); assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 2 }), Some(3)); assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 3 }), Some(3)); assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 4 }), Some(7)); assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 5 }), Some(9)); assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 6 }), Some(9)); assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 7 }), Some(13)); assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 8 }), Some(14)); }
471
472 #[test]
473 fn test_utf16_zero_length_input() {
474 let text = "";
475 let mapper = PositionMapper::new(text);
476
477 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 0 }), Some(0));
480 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 5 }), Some(0));
482
483 assert!(mapper.lsp_pos_to_byte(Position { line: 1, character: 0 }).is_none());
485
486 assert_eq!(mapper.byte_to_lsp_pos(0), Position { line: 0, character: 0 });
488 }
489
490 #[test]
491 fn test_utf16_consecutive_surrogate_pairs() {
492 let text = "💖💖";
495 let mapper = PositionMapper::new(text);
496
497 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 0 }), Some(0));
500 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 1 }), Some(0));
502 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 2 }), Some(4));
503 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 3 }), Some(4));
505 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 4 }), Some(8));
506 }
507
508 #[test]
509 fn test_utf16_clamp_matches_convert_helper() {
510 use crate::convert::utf16_line_col_to_offset;
515
516 let text = "a😀b💖c\nx💡y";
517 let mapper = PositionMapper::new(text);
518
519 for col in 0..=7 {
522 let mapper_byte =
523 mapper.lsp_pos_to_byte(Position { line: 0, character: col }).unwrap_or(usize::MAX);
524 let helper_byte = utf16_line_col_to_offset(text, 0, col);
525 assert_eq!(
526 mapper_byte, helper_byte,
527 "disagreement at line 0 col {col}: mapper={mapper_byte} helper={helper_byte}"
528 );
529 }
530 }
531
532 #[test]
533 fn test_mixed_line_endings() {
534 let text = "line 1\r\nline 2\nline 3\rline 4";
535 let mapper = PositionMapper::new(text);
536
537 assert_eq!(mapper.line_ending(), LineEnding::Mixed);
538
539 assert_eq!(mapper.byte_to_lsp_pos(0), Position { line: 0, character: 0 });
541 assert_eq!(mapper.byte_to_lsp_pos(8), Position { line: 1, character: 0 });
542 assert_eq!(mapper.byte_to_lsp_pos(15), Position { line: 2, character: 0 });
543 assert_eq!(mapper.byte_to_lsp_pos(22), Position { line: 3, character: 0 });
544 }
545
546 #[test]
547 fn test_incremental_edit() {
548 let mut mapper = PositionMapper::new("hello world");
549
550 mapper.apply_edit(6, 11, "Rust");
552 assert_eq!(mapper.text(), "hello Rust");
553
554 mapper.apply_edit(5, 5, " beautiful");
556 assert_eq!(mapper.text(), "hello beautiful Rust");
557
558 mapper.apply_edit(5, 16, " ");
560 assert_eq!(mapper.text(), "hello Rust");
561 }
562
563 #[test]
570 fn test_multibyte_utf8_round_trip_byte_to_lsp_pos_to_byte() {
571 let text = "aé🦀b";
574 let mapper = PositionMapper::new(text);
575
576 assert_eq!(mapper.byte_to_lsp_pos(0), Position { line: 0, character: 0 }); assert_eq!(mapper.byte_to_lsp_pos(1), Position { line: 0, character: 1 }); assert_eq!(mapper.byte_to_lsp_pos(3), Position { line: 0, character: 2 }); assert_eq!(mapper.byte_to_lsp_pos(7), Position { line: 0, character: 4 }); for (byte_offset, col) in [(0u32, 0u32), (1, 1), (3, 2), (7, 4)] {
585 let pos = Position { line: 0, character: col };
586 let got_byte = must_some(mapper.lsp_pos_to_byte(pos));
587 assert_eq!(
588 got_byte, byte_offset as usize,
589 "lsp_pos_to_byte for col {col} should be byte {byte_offset}"
590 );
591 assert_eq!(
593 mapper.byte_to_lsp_pos(got_byte),
594 pos,
595 "byte_to_lsp_pos should round-trip for col {col}"
596 );
597 }
598 }
599
600 #[test]
602 fn test_lsp_pos_to_char_and_char_to_lsp_pos_round_trip() {
603 let text = "aéb";
605 let mapper = PositionMapper::new(text);
606
607 let char_idx = must_some(mapper.lsp_pos_to_char(Position { line: 0, character: 1 }));
609 assert_eq!(char_idx, 1, "char index of 'é' is 1");
610
611 let pos = mapper.char_to_lsp_pos(char_idx);
613 assert_eq!(pos, Position { line: 0, character: 1 });
614
615 let char_b = must_some(mapper.lsp_pos_to_char(Position { line: 0, character: 2 }));
617 assert_eq!(char_b, 2);
618 assert_eq!(mapper.char_to_lsp_pos(char_b), Position { line: 0, character: 2 });
619 }
620
621 #[test]
624 fn test_crlf_lsp_pos_to_byte_per_line() {
625 let text = "abc\r\ndef\r\nghi";
628 let mapper = PositionMapper::new(text);
629 assert_eq!(mapper.line_ending(), LineEnding::CrLf);
630
631 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 0 }), Some(0));
633 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 2 }), Some(2));
635 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 1, character: 0 }), Some(5));
637 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 1, character: 2 }), Some(7));
639 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 2, character: 0 }), Some(10));
641 }
642
643 #[test]
645 fn test_out_of_bounds_line_returns_none() {
646 let text = "one\ntwo\n";
647 let mapper = PositionMapper::new(text);
648
649 assert!(
652 mapper.lsp_pos_to_byte(Position { line: 3, character: 0 }).is_none(),
653 "line past end of document should return None"
654 );
655 assert!(
656 mapper.lsp_pos_to_char(Position { line: 3, character: 0 }).is_none(),
657 "line past end of document should return None for lsp_pos_to_char"
658 );
659 }
660
661 #[test]
664 fn test_out_of_bounds_column_clamps_to_line_end() {
665 let text = "hello\nworld\n";
666 let mapper = PositionMapper::new(text);
667
668 let clamped = must_some(mapper.lsp_pos_to_byte(Position { line: 0, character: 9999 }));
671
672 assert!(clamped <= 6, "clamped byte {clamped} should not exceed end of line 0 (byte 6)");
675 }
676}