1use lsp_types::{Position, Range, TextDocumentContentChangeEvent};
2
3#[derive(Debug)]
4pub struct FullTextDocument {
5 language_id: String,
6 version: i32,
7 content: String,
8
9 line_offsets: Vec<u32>,
13}
14
15fn computed_line_offsets(text: &str, is_at_line_start: bool, text_offset: Option<u32>) -> Vec<u32> {
16 let text_offset = text_offset.unwrap_or(0);
17 let mut line_offsets = if is_at_line_start {
18 vec![text_offset]
19 } else {
20 vec![]
21 };
22
23 let mut chars = text.char_indices().peekable();
24 while let Some((idx, char)) = chars.next() {
25 let idx: u32 = idx
26 .try_into()
27 .expect("The length of the text involved in the calculation is too long");
28 if char == '\r' && chars.peek() == Some(&(idx as usize + 1, '\n')) {
29 chars.next();
30 line_offsets.push(text_offset + idx + 2);
31 } else if char == '\n' || char == '\r' {
32 line_offsets.push(text_offset + idx + 1);
33 }
34 }
35
36 line_offsets
37}
38
39fn line_offset_utf16(line: &str, offset: u32) -> u32 {
50 let mut c = 0;
51 for (idx, char) in line.char_indices() {
52 if idx + char.len_utf8() > offset as usize || idx == offset as usize {
53 break;
54 }
55 c += char.len_utf16() as u32;
56 }
57 c
58}
59
60impl FullTextDocument {
61 pub fn new(language_id: String, version: i32, content: String) -> Self {
62 let line_offsets = computed_line_offsets(&content, true, None);
63 Self {
64 language_id,
65 version,
66 content,
67 line_offsets,
68 }
69 }
70
71 pub fn update(&mut self, changes: &[TextDocumentContentChangeEvent], version: i32) {
72 for change in changes {
73 let TextDocumentContentChangeEvent { range, text, .. } = change;
74 match range {
75 Some(range) => {
76 let Range { start, end } = range;
78 let (start, start_offset) = self.find_canonical_position(start);
79 let (end, end_offset) = self.find_canonical_position(end);
80 assert!(
81 start_offset <= end_offset,
82 "Start offset must be less than end offset. {}:{} (offset {}) is not <= {}:{} (offset {})",
83 start.line, start.character, start_offset,
84 end.line, end.character, end_offset
85 );
86 self.content
87 .replace_range((start_offset as usize)..(end_offset as usize), text);
88
89 let (start_line, end_line) = (start.line, end.line);
90 assert!(start_line <= end_line);
91 let added_line_offsets = computed_line_offsets(text, false, Some(start_offset));
92 let num_added_line_offsets = added_line_offsets.len();
93
94 let splice_start = start_line as usize + 1;
95 let splice_end = std::cmp::min(end_line, self.line_count() - 1) as usize;
96 self.line_offsets
97 .splice(splice_start..=splice_end, added_line_offsets);
98
99 let diff =
100 (text.len() as i32).saturating_sub_unsigned(end_offset - start_offset);
101 if diff != 0 {
102 for i in
103 (splice_start + num_added_line_offsets)..(self.line_count() as usize)
104 {
105 self.line_offsets[i] = self.line_offsets[i].saturating_add_signed(diff);
106 }
107 }
108 }
109 None => {
110 self.line_offsets = computed_line_offsets(text, true, None);
113
114 self.content = text.to_owned();
116 }
117 }
118 }
119
120 self.version = version;
121 }
122
123 fn find_canonical_position(&self, position: &Position) -> (Position, u32) {
128 let offset = self.offset_at(*position);
129 if offset == 0 {
130 (
131 Position {
132 line: 0,
133 character: 0,
134 },
135 0,
136 )
137 } else if self.content.as_bytes().get(offset as usize - 1) == Some(&b'\n') {
138 if self.line_offsets[position.line as usize] == offset {
139 (*position, offset)
140 } else if self.line_offsets[position.line as usize + 1] == offset {
141 (
142 Position {
143 line: position.line + 1,
144 character: 0,
145 },
146 offset,
147 )
148 } else {
149 panic!(
150 "Could not determine canonical value for {position:?} in {:?}",
151 self.content
152 )
153 }
154 } else {
155 (*position, offset)
156 }
157 }
158
159 pub fn language_id(&self) -> &str {
161 &self.language_id
162 }
163
164 pub fn version(&self) -> i32 {
166 self.version
167 }
168
169 pub fn get_content(&self, range: Option<Range>) -> &str {
191 match range {
192 Some(Range { start, end }) => {
193 let start = self.offset_at(start);
194 let end = self.offset_at(end).min(self.content_len());
195 self.content.get(start as usize..end as usize).unwrap()
196 }
197 None => &self.content,
198 }
199 }
200
201 fn get_line_and_offset(&self, line: u32) -> Option<(&str, u32)> {
202 self.line_offsets.get(line as usize).map(|&line_offset| {
203 let len: u32 = self.content_len();
204 let eol_offset = self.line_offsets.get((line + 1) as usize).unwrap_or(&len);
205 let line = &self.content[line_offset as usize..*eol_offset as usize];
206 (line, line_offset)
207 })
208 }
209
210 fn get_line(&self, line: u32) -> Option<&str> {
211 self.get_line_and_offset(line).map(|(line, _)| line)
212 }
213
214 pub fn line_count(&self) -> u32 {
216 self.line_offsets
217 .len()
218 .try_into()
219 .expect("The number of lines of text passed in is too long")
220 }
221
222 pub fn content_len(&self) -> u32 {
224 self.content
225 .len()
226 .try_into()
227 .expect("The length of the text passed in is too long")
228 }
229
230 pub fn position_at(&self, offset: u32) -> Position {
236 let offset = offset.min(self.content_len());
237 let line_count = self.line_count();
238 if line_count == 1 {
239 return Position {
241 line: 0,
242 character: line_offset_utf16(self.get_line(0).unwrap(), offset),
243 };
244 }
245
246 let (mut low, mut high) = (0, line_count);
247 while low < high {
248 let mid = (low + high) / 2;
249 if offset
250 >= *self
251 .line_offsets
252 .get(mid as usize)
253 .expect("Unknown mid value")
254 {
255 low = mid + 1;
256 } else {
257 high = mid;
258 }
259 }
260
261 if low == 0 {
262 return Position {
264 line: 0,
265 character: line_offset_utf16(self.get_line(0).unwrap(), offset),
266 };
267 }
268
269 let line = low - 1;
270
271 Position {
272 line,
273 character: line_offset_utf16(
274 self.get_line(line).unwrap(),
275 offset - self.line_offsets[line as usize],
276 ),
277 }
278 }
279
280 pub fn offset_at(&self, position: Position) -> u32 {
283 let Position { line, character } = position;
284 match self.get_line_and_offset(line) {
285 Some((line, offset)) => {
286 let mut c = 0;
287 let iter = line.char_indices();
288 for (idx, char) in iter {
289 if c == character {
290 return offset + idx as u32;
291 }
292 c += char.len_utf16() as u32;
293 }
294 offset + line.len() as u32
295 }
296 None => {
297 if line >= self.line_count() {
298 self.content_len()
299 } else {
300 0
301 }
302 }
303 }
304 }
305}
306
307#[cfg(test)]
308mod tests {
309 use super::*;
310
311 fn full_text_document() -> FullTextDocument {
312 FullTextDocument::new(
313 "js".to_string(),
314 2,
315 "he\nllo\nworld\r\nfoo\rbar".to_string(),
316 )
317 }
318
319 #[test]
320 fn test_offset_at() {
321 let text_document = full_text_document();
322
323 let offset = text_document.offset_at(Position {
324 line: 1,
325 character: 1,
326 });
327 assert_eq!(offset, 4);
328
329 let offset = text_document.offset_at(Position {
330 line: 2,
331 character: 3,
332 });
333 assert_eq!(offset, 10);
334
335 let offset = text_document.offset_at(Position {
337 line: 3,
338 character: 1,
339 });
340 assert_eq!(offset, 15);
341 }
342
343 #[test]
345 fn test_offset_at_bmp() {
346 let text_document = FullTextDocument::new("js".to_string(), 2, "\u{20AC} euro".to_string());
348
349 let offset = text_document.offset_at(Position {
350 line: 0,
351 character: 2,
354 });
355 assert_eq!(offset, 4);
356 }
357
358 #[test]
360 fn test_offset_at_smp() {
361 let text_document = FullTextDocument::new("js".to_string(), 2, "\u{10437} yee".to_string());
363 let offset = text_document.offset_at(Position {
364 line: 0,
365 character: 3,
368 });
369 assert_eq!(offset, 5);
370 }
371
372 #[test]
374 fn test_offset_at_beyond_end_of_line() {
375 let text_document =
376 FullTextDocument::new("js".to_string(), 2, "\u{20AC} abc\nline 2".to_string());
377 assert_eq!(text_document.line_offsets, vec![0, 8]);
381
382 let offset = text_document.offset_at(Position {
383 line: 0,
384 character: 100,
385 });
386 assert_eq!(offset, 8);
387 }
388
389 #[test]
390 fn test_position_at() {
391 let text_document = full_text_document();
392
393 let position = text_document.position_at(5);
394 assert_eq!(
395 position,
396 Position {
397 line: 1,
398 character: 2
399 }
400 );
401
402 let position = text_document.position_at(11);
403 assert_eq!(
404 position,
405 Position {
406 line: 2,
407 character: 4,
408 }
409 );
410
411 let position = text_document.position_at(15);
412 assert_eq!(
413 position,
414 Position {
415 line: 3,
416 character: 1,
417 }
418 );
419
420 let position = text_document.position_at(0);
421 assert_eq!(
422 position,
423 Position {
424 line: 0,
425 character: 0,
426 }
427 );
428 }
429
430 #[test]
432 fn test_position_at_bmp() {
433 let text_document = FullTextDocument::new("js".to_string(), 2, "\u{20AC} euro".to_string());
435 let position = text_document.position_at(4);
436 assert_eq!(
437 position,
438 Position {
439 line: 0,
440 character: 2,
443 }
444 );
445
446 let text_document =
448 FullTextDocument::new("js".to_string(), 2, "\n\n\u{20AC} euro\n\n".to_string());
449 let position = text_document.position_at(6);
450 assert_eq!(
451 position,
452 Position {
453 line: 2,
454 character: 2,
457 }
458 );
459 }
460
461 #[test]
463 fn test_position_at_smp() {
464 let text_document = FullTextDocument::new("js".to_string(), 2, "\u{10437} yee".to_string());
466 assert_eq!(
467 text_document.position_at(5),
468 Position {
469 line: 0,
470 character: 3,
473 }
474 );
475
476 assert_eq!(
478 text_document.position_at(2),
479 Position {
480 line: 0,
481 character: 0,
482 }
483 );
484
485 let text_document =
487 FullTextDocument::new("js".to_string(), 2, "\n\n\u{10437} yee\n\n".to_string());
488 let position = text_document.position_at(7);
489 assert_eq!(
490 position,
491 Position {
492 line: 2,
493 character: 3,
496 }
497 );
498 }
499
500 #[test]
502 fn test_position_at_line_head() {
503 let text_document = FullTextDocument::new("js".to_string(), 2, "\nyee\n\n".to_string());
504 let position = text_document.position_at(1);
505 assert_eq!(
506 position,
507 Position {
508 line: 1,
509 character: 0,
510 }
511 );
512 }
513
514 #[test]
515 fn test_get_content() {
516 let text_document = full_text_document();
517
518 let start = Position {
519 line: 0,
520 character: 0,
521 };
522 let end = Position {
523 line: 1,
524 character: 2,
525 };
526 let range = Range { start, end };
527 let content = text_document.get_content(Some(range));
528 assert_eq!(content, "he\nll");
529
530 let end = Position {
531 line: 100,
532 character: 100,
533 };
534 let range = Range { start, end };
535 let content = text_document.get_content(Some(range));
536 assert_eq!(content, text_document.content);
537
538 let range = Range {
539 start: Position {
540 line: 1,
541 character: 0,
542 },
543 end: Position {
544 line: 2,
545 character: 3,
546 },
547 };
548 let content = text_document.get_content(Some(range));
549 assert_eq!(content, "llo\nwor");
550 }
551
552 #[test]
554 fn test_get_content_bmp() {
555 let text_document = FullTextDocument::new("js".to_string(), 2, "\u{20AC} euro".to_string());
557
558 let range = Range {
560 start: Position {
561 line: 0,
562 character: 0,
563 },
564 end: Position {
565 line: 0,
566 character: 1,
567 },
568 };
569 let content = text_document.get_content(Some(range));
570 assert_eq!(content, "\u{20AC}");
571
572 let range = Range {
575 start: Position {
576 line: 0,
577 character: 2,
578 },
579 end: Position {
580 line: 0,
581 character: 3,
582 },
583 };
584 let content = text_document.get_content(Some(range));
585 assert_eq!(content, "e");
586 }
587
588 #[test]
590 fn test_get_content_smp() {
591 let text_document = FullTextDocument::new("js".to_string(), 2, "\u{10437} yee".to_string());
593
594 let range = Range {
596 start: Position {
597 line: 0,
598 character: 0,
599 },
600 end: Position {
601 line: 0,
602 character: 2,
603 },
604 };
605 let content = text_document.get_content(Some(range));
606 assert_eq!(content, "\u{10437}");
607 }
608
609 #[test]
610 fn test_update_full_content() {
611 let mut text_document = full_text_document();
612 let new_text = "hello\n js!";
613
614 text_document.update(
615 &[TextDocumentContentChangeEvent {
616 text: new_text.to_string(),
617 range: None,
618 range_length: None,
619 }],
620 1,
621 );
622
623 assert_eq!(&text_document.content, new_text);
624 assert_eq!(text_document.line_offsets, vec![0, 6]);
625 }
626
627 #[test]
628 fn test_update_part_content() {
629 let mut text_document = full_text_document();
630 assert_eq!(text_document.version(), 2);
631 let new_text = String::from("xx\ny");
632 let range = Range {
633 start: Position {
634 line: 1,
635 character: 0,
636 },
637 end: Position {
638 line: 1,
639 character: 3,
640 },
641 };
642 text_document.update(
643 &[TextDocumentContentChangeEvent {
644 range: Some(range),
645 range_length: None,
646 text: new_text,
647 }],
648 1,
649 );
650
651 assert_eq!(&text_document.content, "he\nxx\ny\nworld\r\nfoo\rbar");
652 assert_eq!(text_document.line_offsets, vec![0, 3, 6, 8, 15, 19]);
653 assert_eq!(text_document.version(), 1)
654 }
655
656 #[test]
657 fn test_update_new_content_at_end() {
658 let mut text_document = full_text_document();
659 let new_text = String::from("bar\nbaz");
660
661 let range = Range {
662 start: Position {
663 line: 4,
664 character: 0,
665 },
666 end: Position {
667 line: 5,
668 character: 0,
669 },
670 };
671 text_document.update(
672 &[TextDocumentContentChangeEvent {
673 range: Some(range),
674 range_length: None,
675 text: new_text,
676 }],
677 1,
678 );
679
680 assert_eq!(&text_document.content, "he\nllo\nworld\r\nfoo\rbar\nbaz");
681 assert_eq!(text_document.line_offsets, vec![0, 3, 7, 14, 18, 22]);
682 }
683
684 #[test]
685 #[should_panic(
686 expected = "Start offset must be less than end offset. 2:0 (offset 7) is not <= 1:0 (offset 3)"
687 )]
688 fn test_update_invalid_range() {
689 let mut text_document = full_text_document();
690 let range = Range {
692 start: Position {
693 line: 2,
694 character: 0,
695 },
696 end: Position {
697 line: 1,
698 character: 0,
699 },
700 };
701 text_document.update(
702 &[TextDocumentContentChangeEvent {
703 text: String::from(""),
704 range: Some(range),
705 range_length: Some(0),
706 }],
707 1,
708 );
709 }
710
711 #[test]
714 fn test_multiple_position_same_offset() {
715 let text_document = full_text_document();
716 let end_of_first_line = Position {
717 line: 0,
718 character: 3,
719 };
720 let start_of_second_line = Position {
721 line: 1,
722 character: 0,
723 };
724 assert_eq!(
725 text_document.offset_at(end_of_first_line),
726 text_document.offset_at(start_of_second_line)
727 );
728
729 let beyond_end_of_first_line = Position {
730 line: 0,
731 character: 10_000,
732 };
733 assert_eq!(
734 text_document.offset_at(beyond_end_of_first_line),
735 text_document.offset_at(start_of_second_line)
736 );
737 }
738
739 #[test]
740 fn test_insert_using_positions_after_newline_at_end_of_line() {
741 let mut doc = FullTextDocument::new(
742 "text".to_string(),
743 0,
744 "0:1332533\n0:1332534\n0:1332535\n0:1332536\n".to_string(),
745 );
746 doc.update(
747 &[TextDocumentContentChangeEvent {
748 range: Some(Range {
749 start: Position {
751 line: 1,
752 character: 10,
753 },
754 end: Position {
756 line: 2,
757 character: 10,
758 },
759 }),
760 range_length: None,
761 text: "1:6188912\n1:6188913\n1:6188914\n".to_string(),
762 }],
763 1,
764 );
765 assert_eq!(
766 doc.get_content(None),
767 concat!(
768 "0:1332533\n0:1332534\n",
769 "1:6188912\n1:6188913\n1:6188914\n",
770 "0:1332536\n",
771 ),
772 );
773 assert_eq!(doc.line_offsets, vec!(0, 10, 20, 30, 40, 50, 60));
774 }
775
776 #[test]
777 fn test_line_offsets() {
778 let mut doc =
779 FullTextDocument::new("text".to_string(), 0, "123456789\n123456789\n".to_string());
780 assert_eq!(doc.line_offsets, vec!(0, 10, 20));
781 doc.update(
782 &[TextDocumentContentChangeEvent {
783 range: Some(Range {
784 start: Position {
785 line: 1,
786 character: 5,
787 },
788 end: Position {
789 line: 1,
790 character: 5,
791 },
792 }),
793 range_length: None,
794 text: "\nA\nB\nC\n".to_string(),
795 }],
796 1,
797 );
798 assert_eq!(doc.get_content(None), "123456789\n12345\nA\nB\nC\n6789\n",);
799 assert_eq!(doc.line_offsets, vec!(0, 10, 16, 18, 20, 22, 27));
800 }
801
802 #[test]
807 fn test_find_canonical_position_regression() {
808 let str = "\u{20AC}456789\n123456789\n";
812 let mut doc = FullTextDocument::new("text".to_string(), 0, str.to_string());
813
814 let pos = Position {
815 line: 0,
816 character: 6,
817 };
818 let offset = doc.offset_at(pos) as usize;
819 assert_ne!(str.as_bytes().get(offset - 1), Some(&b'\n'));
820 assert_eq!(str.chars().nth(offset - 1), Some('\n'));
821
822 doc.update(
823 &[TextDocumentContentChangeEvent {
824 range: Some(Range {
825 start: pos,
826 end: Position {
827 line: 0,
828 character: 7,
829 },
830 }),
831 range_length: None,
832 text: "X".to_string(),
833 }],
834 1,
835 );
836 assert_eq!(doc.get_content(None), "\u{20AC}45678X\n123456789\n",);
837 assert_eq!(doc.line_offsets, vec!(0, 10, 20));
838 }
839}