1#[derive(Debug, Clone, Copy, PartialEq, Eq)]
8pub enum BufferType {
9 Original,
11 Add,
13}
14
15#[derive(Debug, Clone)]
17pub struct Piece {
18 pub buffer_type: BufferType,
20 pub start: usize,
22 pub byte_length: usize,
24 pub char_count: usize,
26}
27
28impl Piece {
29 pub fn new(
31 buffer_type: BufferType,
32 start: usize,
33 byte_length: usize,
34 char_count: usize,
35 ) -> Self {
36 Self {
37 buffer_type,
38 start,
39 byte_length,
40 char_count,
41 }
42 }
43}
44
45pub struct PieceTable {
47 original_buffer: Vec<u8>,
49 add_buffer: Vec<u8>,
51 pieces: Vec<Piece>,
53 operation_count: usize,
55 gc_threshold: usize,
57}
58
59#[derive(Debug, Clone, PartialEq, Eq)]
61pub enum PieceTableError {
62 InvalidPieceRange {
64 buffer_type: BufferType,
66 start: usize,
68 byte_length: usize,
70 buffer_len: usize,
72 },
73 InvalidPieceUtf8 {
75 buffer_type: BufferType,
77 start: usize,
79 byte_length: usize,
81 },
82}
83
84impl std::fmt::Display for PieceTableError {
85 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
86 match self {
87 Self::InvalidPieceRange {
88 buffer_type,
89 start,
90 byte_length,
91 buffer_len,
92 } => write!(
93 f,
94 "invalid {:?} piece range {}..{} for buffer length {}",
95 buffer_type,
96 start,
97 start.saturating_add(*byte_length),
98 buffer_len
99 ),
100 Self::InvalidPieceUtf8 {
101 buffer_type,
102 start,
103 byte_length,
104 } => write!(
105 f,
106 "invalid UTF-8 in {:?} piece range {}..{}",
107 buffer_type,
108 start,
109 start.saturating_add(*byte_length)
110 ),
111 }
112 }
113}
114
115impl std::error::Error for PieceTableError {}
116
117fn piece_table_invariant_error<T>(error: PieceTableError, fallback: T) -> T {
118 let _ = &fallback;
119
120 #[cfg(debug_assertions)]
121 panic!("PieceTable invariant violated: {error}");
122
123 #[cfg(not(debug_assertions))]
124 {
125 let _ = error;
126 fallback
127 }
128}
129
130impl PieceTable {
131 pub fn new(text: &str) -> Self {
133 let bytes = text.as_bytes().to_vec();
134 let char_count = text.chars().count();
135 let byte_length = bytes.len();
136
137 let pieces = if byte_length > 0 {
138 vec![Piece::new(BufferType::Original, 0, byte_length, char_count)]
139 } else {
140 Vec::new()
141 };
142
143 Self {
144 original_buffer: bytes,
145 add_buffer: Vec::new(),
146 pieces,
147 operation_count: 0,
148 gc_threshold: 1000, }
150 }
151
152 pub fn empty() -> Self {
154 Self {
155 original_buffer: Vec::new(),
156 add_buffer: Vec::new(),
157 pieces: Vec::new(),
158 operation_count: 0,
159 gc_threshold: 1000,
160 }
161 }
162
163 pub fn insert(&mut self, offset: usize, text: &str) {
165 if let Err(error) = self.try_insert(offset, text) {
166 piece_table_invariant_error(error, ());
167 }
168 }
169
170 pub fn try_insert(&mut self, offset: usize, text: &str) -> Result<(), PieceTableError> {
172 if text.is_empty() {
173 return Ok(());
174 }
175
176 let text_bytes = text.as_bytes();
177 let text_char_count = text.chars().count();
178 let text_byte_length = text_bytes.len();
179 let add_start = self.add_buffer.len();
180
181 let (piece_index, char_offset_in_piece) = self.find_piece_at_offset(offset);
183
184 enum InsertPlan {
185 Insert { index: usize, piece: Piece },
186 Splice { index: usize, pieces: Vec<Piece> },
187 Push(Piece),
188 }
189
190 let new_piece = Piece::new(
191 BufferType::Add,
192 add_start,
193 text_byte_length,
194 text_char_count,
195 );
196
197 let plan = if let Some(piece_idx) = piece_index {
198 let piece = self.pieces[piece_idx].clone();
199
200 if char_offset_in_piece == 0 {
201 InsertPlan::Insert {
203 index: piece_idx,
204 piece: new_piece,
205 }
206 } else if char_offset_in_piece == piece.char_count {
207 InsertPlan::Insert {
209 index: piece_idx + 1,
210 piece: new_piece,
211 }
212 } else {
213 let (left_piece, right_piece) = self.split_piece(&piece, char_offset_in_piece)?;
215 InsertPlan::Splice {
216 index: piece_idx,
217 pieces: vec![left_piece, new_piece, right_piece],
218 }
219 }
220 } else {
221 InsertPlan::Push(new_piece)
223 };
224
225 self.add_buffer.extend_from_slice(text_bytes);
226 match plan {
227 InsertPlan::Insert { index, piece } => self.pieces.insert(index, piece),
228 InsertPlan::Splice { index, pieces } => {
229 self.pieces.splice(index..=index, pieces);
230 }
231 InsertPlan::Push(piece) => self.pieces.push(piece),
232 }
233
234 self.try_merge_adjacent_pieces();
236
237 self.try_check_gc()?;
239 Ok(())
240 }
241
242 pub fn delete(&mut self, start_offset: usize, length: usize) {
244 if let Err(error) = self.try_delete(start_offset, length) {
245 piece_table_invariant_error(error, ());
246 }
247 }
248
249 pub fn try_delete(
251 &mut self,
252 start_offset: usize,
253 length: usize,
254 ) -> Result<(), PieceTableError> {
255 if length == 0 {
256 return Ok(());
257 }
258
259 let end_offset = start_offset.saturating_add(length);
260
261 let (start_piece_idx, start_char_offset) = self.find_piece_at_offset(start_offset);
263 let (end_piece_idx, end_char_offset) = self.find_piece_at_offset(end_offset);
264
265 match (start_piece_idx, end_piece_idx) {
266 (Some(start_idx), Some(end_idx)) if start_idx == end_idx => {
267 let piece = self.pieces[start_idx].clone();
269
270 if start_char_offset == 0 && end_char_offset == piece.char_count {
271 self.pieces.remove(start_idx);
273 } else if start_char_offset == 0 {
274 let (_, right) = self.split_piece(&piece, end_char_offset)?;
276 self.pieces[start_idx] = right;
277 } else if end_char_offset == piece.char_count {
278 let (left, _) = self.split_piece(&piece, start_char_offset)?;
280 self.pieces[start_idx] = left;
281 } else {
282 let (left, temp) = self.split_piece(&piece, start_char_offset)?;
284 let (_, right) =
285 self.split_piece(&temp, end_char_offset - start_char_offset)?;
286 self.pieces.splice(start_idx..=start_idx, vec![left, right]);
287 }
288 }
289 (Some(start_idx), Some(end_idx)) => {
290 let start_piece = self.pieces[start_idx].clone();
292 let end_piece = self.pieces[end_idx].clone();
293
294 let mut new_pieces = Vec::new();
295
296 if start_char_offset > 0 {
298 let (left, _) = self.split_piece(&start_piece, start_char_offset)?;
299 new_pieces.push(left);
300 }
301
302 if end_char_offset < end_piece.char_count {
304 let (_, right) = self.split_piece(&end_piece, end_char_offset)?;
305 new_pieces.push(right);
306 }
307
308 self.pieces.splice(start_idx..=end_idx, new_pieces);
310 }
311 (None, None) => {
312 }
314 _ => {
315 if let Some(start_idx) = start_piece_idx {
317 let start_piece = self.pieces[start_idx].clone();
319 if start_char_offset == 0 {
320 self.pieces.truncate(start_idx);
321 } else {
322 let (left, _) = self.split_piece(&start_piece, start_char_offset)?;
323 self.pieces.truncate(start_idx);
324 self.pieces.push(left);
325 }
326 }
327 }
328 }
329
330 self.try_check_gc()?;
332 Ok(())
333 }
334
335 pub fn get_text(&self) -> String {
337 match self.try_get_text() {
338 Ok(text) => text,
339 Err(error) => piece_table_invariant_error(error, String::new()),
340 }
341 }
342
343 pub fn try_get_text(&self) -> Result<String, PieceTableError> {
345 let mut result = String::new();
346 for piece in &self.pieces {
347 result.push_str(self.piece_text(piece)?);
348 }
349 Ok(result)
350 }
351
352 pub fn get_range(&self, start_offset: usize, length: usize) -> String {
354 match self.try_get_range(start_offset, length) {
355 Ok(text) => text,
356 Err(error) => piece_table_invariant_error(error, String::new()),
357 }
358 }
359
360 pub fn try_get_range(
362 &self,
363 start_offset: usize,
364 length: usize,
365 ) -> Result<String, PieceTableError> {
366 let mut result = String::new();
367 let mut current_offset: usize = 0;
368 let end_offset = start_offset.saturating_add(length);
369
370 for piece in &self.pieces {
371 let piece_end = current_offset.saturating_add(piece.char_count);
372
373 if current_offset >= end_offset {
374 break;
375 }
376
377 if piece_end > start_offset {
378 let piece_text = self.piece_text(piece)?;
379
380 let skip_chars = start_offset.saturating_sub(current_offset);
381
382 let take_chars = if piece_end > end_offset {
383 end_offset - current_offset.max(start_offset)
384 } else {
385 piece.char_count - skip_chars
386 };
387
388 result.push_str(
389 &piece_text
390 .chars()
391 .skip(skip_chars)
392 .take(take_chars)
393 .collect::<String>(),
394 );
395 }
396
397 current_offset = piece_end;
398 }
399
400 Ok(result)
401 }
402
403 pub fn char_count(&self) -> usize {
405 self.pieces.iter().map(|p| p.char_count).sum()
406 }
407
408 pub fn byte_count(&self) -> usize {
410 self.pieces.iter().map(|p| p.byte_length).sum()
411 }
412
413 pub fn add_buffer_size(&self) -> usize {
415 self.add_buffer.len()
416 }
417
418 fn buffer_for_piece(&self, piece: &Piece) -> &[u8] {
419 match piece.buffer_type {
420 BufferType::Original => &self.original_buffer,
421 BufferType::Add => &self.add_buffer,
422 }
423 }
424
425 fn piece_bytes(&self, piece: &Piece) -> Result<&[u8], PieceTableError> {
426 let buffer = self.buffer_for_piece(piece);
427 let end = piece.start.checked_add(piece.byte_length).ok_or(
428 PieceTableError::InvalidPieceRange {
429 buffer_type: piece.buffer_type,
430 start: piece.start,
431 byte_length: piece.byte_length,
432 buffer_len: buffer.len(),
433 },
434 )?;
435
436 buffer
437 .get(piece.start..end)
438 .ok_or(PieceTableError::InvalidPieceRange {
439 buffer_type: piece.buffer_type,
440 start: piece.start,
441 byte_length: piece.byte_length,
442 buffer_len: buffer.len(),
443 })
444 }
445
446 fn piece_text(&self, piece: &Piece) -> Result<&str, PieceTableError> {
447 std::str::from_utf8(self.piece_bytes(piece)?).map_err(|_| {
448 PieceTableError::InvalidPieceUtf8 {
449 buffer_type: piece.buffer_type,
450 start: piece.start,
451 byte_length: piece.byte_length,
452 }
453 })
454 }
455
456 fn find_piece_at_offset(&self, offset: usize) -> (Option<usize>, usize) {
459 let mut current_offset: usize = 0;
460
461 for (idx, piece) in self.pieces.iter().enumerate() {
462 let next_offset = current_offset.saturating_add(piece.char_count);
463 if offset <= next_offset {
464 return (Some(idx), offset - current_offset);
465 }
466 current_offset = next_offset;
467 }
468
469 match self.pieces.last() {
470 Some(piece) => (Some(self.pieces.len() - 1), piece.char_count),
471 None => (None, 0),
472 }
473 }
474
475 fn split_piece(
478 &self,
479 piece: &Piece,
480 char_offset: usize,
481 ) -> Result<(Piece, Piece), PieceTableError> {
482 let piece_text = self.piece_text(piece)?;
483 let char_offset = char_offset.min(piece.char_count);
484
485 let byte_offset = piece_text
488 .char_indices()
489 .nth(char_offset)
490 .map(|(i, _)| i)
491 .unwrap_or(piece.byte_length);
492
493 let left = Piece::new(piece.buffer_type, piece.start, byte_offset, char_offset);
494
495 let right = Piece::new(
496 piece.buffer_type,
497 piece.start + byte_offset,
498 piece.byte_length - byte_offset,
499 piece.char_count - char_offset,
500 );
501
502 Ok((left, right))
503 }
504
505 fn can_merge(&self, p1: &Piece, p2: &Piece) -> bool {
507 p1.buffer_type == p2.buffer_type &&
508 p1.buffer_type == BufferType::Add && p1.start.saturating_add(p1.byte_length) == p2.start
510 }
511
512 fn merge_pieces(&self, p1: &Piece, p2: &Piece) -> Piece {
514 Piece::new(
515 p1.buffer_type,
516 p1.start,
517 p1.byte_length.saturating_add(p2.byte_length),
518 p1.char_count.saturating_add(p2.char_count),
519 )
520 }
521
522 fn try_merge_adjacent_pieces(&mut self) {
524 let mut i = 0;
525 while i + 1 < self.pieces.len() {
526 if self.can_merge(&self.pieces[i], &self.pieces[i + 1]) {
527 let merged = self.merge_pieces(&self.pieces[i], &self.pieces[i + 1]);
528 self.pieces.splice(i..=i + 1, vec![merged]);
529 } else {
530 i += 1;
531 }
532 }
533 }
534
535 pub fn gc(&mut self) {
537 if let Err(error) = self.try_gc() {
538 piece_table_invariant_error(error, ());
539 }
540 }
541
542 pub fn try_gc(&mut self) -> Result<(), PieceTableError> {
544 let mut referenced_ranges: Vec<(usize, usize)> = Vec::new();
546 for piece in self
547 .pieces
548 .iter()
549 .filter(|p| p.buffer_type == BufferType::Add)
550 {
551 let end = piece.start.checked_add(piece.byte_length).ok_or(
552 PieceTableError::InvalidPieceRange {
553 buffer_type: piece.buffer_type,
554 start: piece.start,
555 byte_length: piece.byte_length,
556 buffer_len: self.add_buffer.len(),
557 },
558 )?;
559 if self.add_buffer.get(piece.start..end).is_none() {
560 return Err(PieceTableError::InvalidPieceRange {
561 buffer_type: piece.buffer_type,
562 start: piece.start,
563 byte_length: piece.byte_length,
564 buffer_len: self.add_buffer.len(),
565 });
566 }
567 referenced_ranges.push((piece.start, end));
568 }
569
570 if referenced_ranges.is_empty() {
571 self.add_buffer.clear();
573 return Ok(());
574 }
575
576 referenced_ranges.sort_by_key(|r| r.0);
578
579 let mut merged_ranges = vec![referenced_ranges[0]];
581 for range in referenced_ranges.iter().skip(1) {
582 let last_idx = merged_ranges.len() - 1;
583 if range.0 <= merged_ranges[last_idx].1 {
584 merged_ranges[last_idx].1 = merged_ranges[last_idx].1.max(range.1);
586 } else {
587 merged_ranges.push(*range);
588 }
589 }
590
591 let mut new_add_buffer = Vec::new();
593 let mut mappings: Vec<(usize, usize, usize)> = Vec::new(); for (old_start, old_end) in merged_ranges {
596 let new_start = new_add_buffer.len();
597 let slice = self.add_buffer.get(old_start..old_end).ok_or(
598 PieceTableError::InvalidPieceRange {
599 buffer_type: BufferType::Add,
600 start: old_start,
601 byte_length: old_end.saturating_sub(old_start),
602 buffer_len: self.add_buffer.len(),
603 },
604 )?;
605 new_add_buffer.extend_from_slice(slice);
606 mappings.push((old_start, old_end, new_start));
607 }
608
609 for piece in &mut self.pieces {
611 if piece.buffer_type != BufferType::Add {
612 continue;
613 }
614
615 let idx = match mappings.binary_search_by_key(&piece.start, |(s, _, _)| *s) {
617 Ok(exact) => exact,
618 Err(insert_pos) => insert_pos.saturating_sub(1),
619 };
620
621 if let Some((old_start, old_end, new_start)) = mappings.get(idx).copied()
622 && piece.start < old_end
623 {
624 piece.start = new_start + (piece.start - old_start);
625 }
626 }
627
628 self.add_buffer = new_add_buffer;
629 self.operation_count = 0; Ok(())
631 }
632
633 fn try_check_gc(&mut self) -> Result<(), PieceTableError> {
634 self.operation_count += 1;
635 if self.operation_count >= self.gc_threshold {
636 self.try_gc()?;
637 }
638 Ok(())
639 }
640
641 pub fn set_gc_threshold(&mut self, threshold: usize) {
643 self.gc_threshold = threshold;
644 }
645}
646
647#[cfg(test)]
648mod tests {
649 use super::*;
650
651 #[test]
652 fn test_new_piece_table() {
653 let pt = PieceTable::new("Hello, World!");
654 assert_eq!(pt.get_text(), "Hello, World!");
655 assert_eq!(pt.char_count(), 13);
656 }
657
658 #[test]
659 fn test_empty_piece_table() {
660 let pt = PieceTable::empty();
661 assert_eq!(pt.get_text(), "");
662 assert_eq!(pt.char_count(), 0);
663 }
664
665 #[test]
666 fn test_insert_at_start() {
667 let mut pt = PieceTable::new("World");
668 pt.insert(0, "Hello, ");
669 assert_eq!(pt.get_text(), "Hello, World");
670 }
671
672 #[test]
673 fn test_insert_at_end() {
674 let mut pt = PieceTable::new("Hello");
675 pt.insert(5, ", World");
676 assert_eq!(pt.get_text(), "Hello, World");
677 }
678
679 #[test]
680 fn test_insert_in_middle() {
681 let mut pt = PieceTable::new("Hlo");
682 pt.insert(1, "el");
683 assert_eq!(pt.get_text(), "Hello");
684 }
685
686 #[test]
687 fn test_delete_at_start() {
688 let mut pt = PieceTable::new("Hello, World");
689 pt.delete(0, 7);
690 assert_eq!(pt.get_text(), "World");
691 }
692
693 #[test]
694 fn test_delete_at_end() {
695 let mut pt = PieceTable::new("Hello, World");
696 pt.delete(5, 7);
697 assert_eq!(pt.get_text(), "Hello");
698 }
699
700 #[test]
701 fn test_delete_in_middle() {
702 let mut pt = PieceTable::new("Hello, World");
703 pt.delete(5, 2);
704 assert_eq!(pt.get_text(), "HelloWorld");
705 }
706
707 #[test]
708 fn test_multiple_operations() {
709 let mut pt = PieceTable::new("Hello");
710 pt.insert(5, " World");
711 pt.insert(5, ",");
712 pt.delete(0, 7);
713 pt.insert(0, "Hi, ");
714 assert_eq!(pt.get_text(), "Hi, World");
715 }
716
717 #[test]
718 fn test_utf8_chinese() {
719 let mut pt = PieceTable::new("你好");
720 assert_eq!(pt.char_count(), 2);
721 assert_eq!(pt.byte_count(), 6);
722
723 pt.insert(1, "们");
724 assert_eq!(pt.get_text(), "你们好");
725 assert_eq!(pt.char_count(), 3);
726 }
727
728 #[test]
729 fn test_utf8_emoji() {
730 let mut pt = PieceTable::new("Hello 👋");
731 pt.insert(6, "World ");
732 assert_eq!(pt.get_text(), "Hello World 👋");
733 }
734
735 #[test]
736 fn test_get_range() {
737 let pt = PieceTable::new("Hello, World!");
738 assert_eq!(pt.get_range(0, 5), "Hello");
739 assert_eq!(pt.get_range(7, 5), "World");
740 assert_eq!(pt.get_range(0, 13), "Hello, World!");
741 }
742
743 #[test]
744 fn test_try_get_text_reports_invalid_piece_range_without_panic() {
745 let mut pt = PieceTable::new("abc");
746 pt.pieces[0].byte_length = usize::MAX;
747
748 let result = std::panic::catch_unwind(|| pt.try_get_text());
749 assert!(matches!(
750 result,
751 Ok(Err(PieceTableError::InvalidPieceRange { .. }))
752 ));
753 }
754
755 #[test]
756 fn test_try_get_text_reports_invalid_utf8_without_panic() {
757 let mut pt = PieceTable::empty();
758 pt.add_buffer.push(0xff);
759 pt.pieces.push(Piece::new(BufferType::Add, 0, 1, 1));
760
761 let result = std::panic::catch_unwind(|| pt.try_get_text());
762 assert!(matches!(
763 result,
764 Ok(Err(PieceTableError::InvalidPieceUtf8 { .. }))
765 ));
766 }
767
768 #[test]
769 fn test_try_insert_reports_split_piece_invariant_failure() {
770 let mut pt = PieceTable::empty();
771 pt.add_buffer.push(0xff);
772 pt.pieces.push(Piece::new(BufferType::Add, 0, 1, 2));
773
774 let result =
775 std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| pt.try_insert(1, "x")));
776 assert!(matches!(
777 result,
778 Ok(Err(PieceTableError::InvalidPieceUtf8 { .. }))
779 ));
780 assert_eq!(pt.add_buffer, vec![0xff]);
781 }
782
783 #[test]
784 fn test_piece_merging() {
785 let mut pt = PieceTable::new("Hello");
786
787 let initial_pieces = pt.pieces.len();
789 pt.insert(5, " ");
790 pt.insert(6, "World");
791
792 assert_eq!(pt.get_text(), "Hello World");
794 assert!(pt.pieces.len() <= initial_pieces + 1);
796 }
797
798 #[test]
799 fn test_gc_basic() {
800 let mut pt = PieceTable::new("Hello");
801
802 pt.insert(5, " World");
804 pt.insert(11, "!");
805
806 let add_buffer_size_before = pt.add_buffer.len();
807
808 pt.delete(5, 6); pt.gc();
813
814 assert_eq!(pt.get_text(), "Hello!");
816
817 assert!(pt.add_buffer.len() < add_buffer_size_before);
819 }
820
821 #[test]
822 fn test_gc_multiple_references() {
823 let mut pt = PieceTable::new("ABC");
824
825 pt.insert(1, "1");
827 pt.insert(3, "2");
828 pt.insert(5, "3");
829
830 assert_eq!(pt.get_text(), "A1B2C3");
831
832 pt.gc();
834
835 assert_eq!(pt.get_text(), "A1B2C3");
837
838 assert!(!pt.add_buffer.is_empty());
840 }
841
842 #[test]
843 fn test_auto_gc_trigger() {
844 let mut pt = PieceTable::new("Test");
845 pt.set_gc_threshold(5); for i in 0..6 {
849 pt.insert(4 + i, "x");
850 }
851
852 assert!(pt.operation_count < 6);
854 }
855}