1use crate::WirePosition as Position;
10use ropey::Rope;
11use serde_json::Value;
12
13pub struct PositionMapper {
37 rope: Rope,
39 line_ending: LineEnding,
41}
42
43#[derive(Debug, Clone, Copy, PartialEq, Eq)]
45pub enum LineEnding {
46 Lf,
48 CrLf,
50 Cr,
52 Mixed,
54}
55
56impl PositionMapper {
57 pub fn new(text: &str) -> Self {
73 let rope = Rope::from_str(text);
74 let line_ending = detect_line_ending(text);
75 Self { rope, line_ending }
76 }
77
78 pub fn update(&mut self, text: &str) {
80 self.rope = Rope::from_str(text);
81 self.line_ending = detect_line_ending(text);
82 }
83
84 pub fn apply_edit(&mut self, start_byte: usize, end_byte: usize, new_text: &str) {
86 let start_byte = start_byte.min(self.rope.len_bytes());
88 let end_byte = end_byte.min(self.rope.len_bytes());
89
90 let start_char = self.rope.byte_to_char(start_byte);
92 let end_char = self.rope.byte_to_char(end_byte);
93
94 if end_char > start_char {
96 self.rope.remove(start_char..end_char);
97 }
98
99 if !new_text.is_empty() {
101 self.rope.insert(start_char, new_text);
102 }
103
104 self.line_ending = detect_line_ending(&self.rope.to_string());
106 }
107
108 pub fn lsp_pos_to_byte(&self, pos: Position) -> Option<usize> {
124 let line_idx = pos.line as usize;
125 if line_idx >= self.rope.len_lines() {
126 return None;
127 }
128
129 let line_start_byte = self.rope.line_to_byte(line_idx);
130 let line = self.rope.line(line_idx);
131
132 let mut utf16_offset = 0u32;
134 let mut byte_offset = 0;
135
136 for ch in line.chars() {
137 if utf16_offset >= pos.character {
138 break;
139 }
140
141 let ch_utf16_len = if ch as u32 > 0xFFFF { 2 } else { 1 };
142 let next_utf16 = utf16_offset + ch_utf16_len;
143
144 if next_utf16 > pos.character {
147 break;
148 }
149
150 utf16_offset = next_utf16;
151 byte_offset += ch.len_utf8();
152 }
153
154 Some(line_start_byte + byte_offset)
155 }
156
157 pub fn byte_to_lsp_pos(&self, byte_offset: usize) -> Position {
172 let byte_offset = byte_offset.min(self.rope.len_bytes());
173
174 let line_idx = self.rope.byte_to_line(byte_offset);
175 let line_start_byte = self.rope.line_to_byte(line_idx);
176 let byte_in_line = byte_offset - line_start_byte;
177
178 let line = self.rope.line(line_idx);
180 let mut utf16_offset = 0u32;
181 let mut current_byte = 0;
182
183 for ch in line.chars() {
184 if current_byte >= byte_in_line {
185 break;
186 }
187 let ch_len = ch.len_utf8();
188 if current_byte + ch_len > byte_in_line {
189 break;
191 }
192 current_byte += ch_len;
193 let ch_utf16_len = if ch as u32 > 0xFFFF { 2 } else { 1 };
194 utf16_offset += ch_utf16_len;
195 }
196
197 Position { line: line_idx as u32, character: utf16_offset }
198 }
199
200 pub fn text(&self) -> String {
202 self.rope.to_string()
203 }
204
205 pub fn slice(&self, start_byte: usize, end_byte: usize) -> String {
207 let start = start_byte.min(self.rope.len_bytes());
208 let end = end_byte.min(self.rope.len_bytes());
209 self.rope.slice(self.rope.byte_to_char(start)..self.rope.byte_to_char(end)).to_string()
210 }
211
212 pub fn len_bytes(&self) -> usize {
214 self.rope.len_bytes()
215 }
216
217 pub fn len_lines(&self) -> usize {
219 self.rope.len_lines()
220 }
221
222 pub fn lsp_pos_to_char(&self, pos: Position) -> Option<usize> {
224 self.lsp_pos_to_byte(pos).map(|byte| self.rope.byte_to_char(byte))
225 }
226
227 pub fn char_to_lsp_pos(&self, char_idx: usize) -> Position {
229 let byte_offset = self.rope.char_to_byte(char_idx);
230 self.byte_to_lsp_pos(byte_offset)
231 }
232
233 pub fn is_empty(&self) -> bool {
235 self.rope.len_bytes() == 0
236 }
237
238 pub fn line_ending(&self) -> LineEnding {
240 self.line_ending
241 }
242}
243
244pub fn json_to_position(pos: &Value) -> Option<Position> {
248 Some(Position {
249 line: pos["line"].as_u64()? as u32,
250 character: pos["character"].as_u64()? as u32,
251 })
252}
253
254pub fn position_to_json(pos: Position) -> Value {
258 serde_json::json!({
259 "line": pos.line,
260 "character": pos.character
261 })
262}
263
264fn detect_line_ending(text: &str) -> LineEnding {
266 let mut crlf_count = 0;
267 let mut lf_count = 0;
268 let mut cr_count = 0;
269
270 let bytes = text.as_bytes();
271 let mut i = 0;
272 while i < bytes.len() {
273 if i + 1 < bytes.len() && bytes[i] == b'\r' && bytes[i + 1] == b'\n' {
274 crlf_count += 1;
275 i += 2;
276 } else if bytes[i] == b'\n' {
277 lf_count += 1;
278 i += 1;
279 } else if bytes[i] == b'\r' {
280 cr_count += 1;
281 i += 1;
282 } else {
283 i += 1;
284 }
285 }
286
287 if crlf_count > 0 && lf_count == 0 && cr_count == 0 {
289 LineEnding::CrLf
290 } else if lf_count > 0 && crlf_count == 0 && cr_count == 0 {
291 LineEnding::Lf
292 } else if cr_count > 0 && crlf_count == 0 && lf_count == 0 {
293 LineEnding::Cr
294 } else if crlf_count > 0 || lf_count > 0 || cr_count > 0 {
295 LineEnding::Mixed
296 } else {
297 LineEnding::Lf }
299}
300
301pub fn apply_edit_utf8(
305 text: &mut String,
306 start_byte: usize,
307 old_end_byte: usize,
308 replacement: &str,
309) {
310 if !text.is_char_boundary(start_byte) || !text.is_char_boundary(old_end_byte) {
311 return;
313 }
314 text.replace_range(start_byte..old_end_byte, replacement);
315}
316
317pub fn newline_count(text: &str) -> usize {
321 text.chars().filter(|&c| c == '\n').count()
322}
323
324pub fn last_line_column_utf8(text: &str) -> u32 {
328 if let Some(last_newline) = text.rfind('\n') {
329 (text.len() - last_newline - 1) as u32
330 } else {
331 text.len() as u32
332 }
333}
334
335#[cfg(test)]
336mod tests {
337 use super::*;
338
339 #[test]
340 fn test_lf_positions() {
341 let text = "line 1\nline 2\nline 3";
342 let mapper = PositionMapper::new(text);
343
344 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 0 }), Some(0));
346 assert_eq!(mapper.byte_to_lsp_pos(0), Position { line: 0, character: 0 });
347
348 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 3 }), Some(3));
350 assert_eq!(mapper.byte_to_lsp_pos(3), Position { line: 0, character: 3 });
351
352 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 1, character: 0 }), Some(7));
354 assert_eq!(mapper.byte_to_lsp_pos(7), Position { line: 1, character: 0 });
355 }
356
357 #[test]
358 fn test_crlf_positions() {
359 let text = "line 1\r\nline 2\r\nline 3";
360 let mapper = PositionMapper::new(text);
361
362 assert_eq!(mapper.line_ending(), LineEnding::CrLf);
363
364 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 1, character: 0 }), Some(8));
366 assert_eq!(mapper.byte_to_lsp_pos(8), Position { line: 1, character: 0 });
367
368 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 2, character: 0 }), Some(16));
370 assert_eq!(mapper.byte_to_lsp_pos(16), Position { line: 2, character: 0 });
371 }
372
373 #[test]
374 fn test_utf16_positions() {
375 let text = "hello 😀 world"; let mapper = PositionMapper::new(text);
377
378 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 6 }), Some(6));
380
381 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 8 }), Some(10)); assert_eq!(mapper.byte_to_lsp_pos(10), Position { line: 0, character: 8 });
386 }
387
388 #[test]
389 fn test_utf16_positions_clamp_mid_surrogate_to_char_start() {
390 let text = "a😀b";
391 let mapper = PositionMapper::new(text);
392
393 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 2 }), Some(1));
395 }
396
397 #[test]
398 fn test_utf16_surrogate_pair_boundaries() {
399 let text = "x💖y";
403 let mapper = PositionMapper::new(text);
404
405 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 0 }), Some(0));
407 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 1 }), Some(1));
408
409 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 2 }), Some(1));
412
413 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 3 }), Some(5));
415
416 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 4 }), Some(6));
418 }
419
420 #[test]
421 fn test_utf16_max_code_point() {
422 let max_char = '\u{10FFFF}';
425 let text = format!("a{max_char}b");
426 let mapper = PositionMapper::new(&text);
427
428 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 0 }), Some(0));
430 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 1 }), Some(1));
431 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 2 }), Some(1));
433 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 3 }), Some(5));
434 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 4 }), Some(6));
435
436 assert_eq!(mapper.byte_to_lsp_pos(0), Position { line: 0, character: 0 });
438 assert_eq!(mapper.byte_to_lsp_pos(1), Position { line: 0, character: 1 });
439 assert_eq!(mapper.byte_to_lsp_pos(5), Position { line: 0, character: 3 });
440 assert_eq!(mapper.byte_to_lsp_pos(6), Position { line: 0, character: 4 });
441 }
442
443 #[test]
444 fn test_utf16_mixed_bmp_and_supplementary_plane() {
445 let text = "aé💖ñ🎉b";
450 let mapper = PositionMapper::new(text);
451
452 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 0 }), Some(0)); assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 1 }), Some(1)); assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 2 }), Some(3)); assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 3 }), Some(3)); assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 4 }), Some(7)); assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 5 }), Some(9)); assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 6 }), Some(9)); assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 7 }), Some(13)); assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 8 }), Some(14)); }
470
471 #[test]
472 fn test_utf16_zero_length_input() {
473 let text = "";
474 let mapper = PositionMapper::new(text);
475
476 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 0 }), Some(0));
479 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 5 }), Some(0));
481
482 assert!(mapper.lsp_pos_to_byte(Position { line: 1, character: 0 }).is_none());
484
485 assert_eq!(mapper.byte_to_lsp_pos(0), Position { line: 0, character: 0 });
487 }
488
489 #[test]
490 fn test_utf16_consecutive_surrogate_pairs() {
491 let text = "💖💖";
494 let mapper = PositionMapper::new(text);
495
496 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 0 }), Some(0));
499 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 1 }), Some(0));
501 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 2 }), Some(4));
502 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 3 }), Some(4));
504 assert_eq!(mapper.lsp_pos_to_byte(Position { line: 0, character: 4 }), Some(8));
505 }
506
507 #[test]
508 fn test_utf16_clamp_matches_convert_helper() {
509 use crate::convert::utf16_line_col_to_offset;
514
515 let text = "a😀b💖c\nx💡y";
516 let mapper = PositionMapper::new(text);
517
518 for col in 0..=7 {
521 let mapper_byte =
522 mapper.lsp_pos_to_byte(Position { line: 0, character: col }).unwrap_or(usize::MAX);
523 let helper_byte = utf16_line_col_to_offset(text, 0, col);
524 assert_eq!(
525 mapper_byte, helper_byte,
526 "disagreement at line 0 col {col}: mapper={mapper_byte} helper={helper_byte}"
527 );
528 }
529 }
530
531 #[test]
532 fn test_mixed_line_endings() {
533 let text = "line 1\r\nline 2\nline 3\rline 4";
534 let mapper = PositionMapper::new(text);
535
536 assert_eq!(mapper.line_ending(), LineEnding::Mixed);
537
538 assert_eq!(mapper.byte_to_lsp_pos(0), Position { line: 0, character: 0 });
540 assert_eq!(mapper.byte_to_lsp_pos(8), Position { line: 1, character: 0 });
541 assert_eq!(mapper.byte_to_lsp_pos(15), Position { line: 2, character: 0 });
542 assert_eq!(mapper.byte_to_lsp_pos(22), Position { line: 3, character: 0 });
543 }
544
545 #[test]
546 fn test_incremental_edit() {
547 let mut mapper = PositionMapper::new("hello world");
548
549 mapper.apply_edit(6, 11, "Rust");
551 assert_eq!(mapper.text(), "hello Rust");
552
553 mapper.apply_edit(5, 5, " beautiful");
555 assert_eq!(mapper.text(), "hello beautiful Rust");
556
557 mapper.apply_edit(5, 16, " ");
559 assert_eq!(mapper.text(), "hello Rust");
560 }
561}