1#[derive(Debug, Clone, Copy, PartialEq, Eq)]
8pub enum BufferType {
9 Original,
11 Add,
13}
14
15#[derive(Debug, Clone)]
17pub struct Piece {
18 pub buffer_type: BufferType,
20 pub start: usize,
22 pub byte_length: usize,
24 pub char_count: usize,
26}
27
28impl Piece {
29 pub fn new(
31 buffer_type: BufferType,
32 start: usize,
33 byte_length: usize,
34 char_count: usize,
35 ) -> Self {
36 Self {
37 buffer_type,
38 start,
39 byte_length,
40 char_count,
41 }
42 }
43}
44
45pub struct PieceTable {
47 original_buffer: Vec<u8>,
49 add_buffer: Vec<u8>,
51 pieces: Vec<Piece>,
53 operation_count: usize,
55 gc_threshold: usize,
57}
58
59impl PieceTable {
60 pub fn new(text: &str) -> Self {
62 let bytes = text.as_bytes().to_vec();
63 let char_count = text.chars().count();
64 let byte_length = bytes.len();
65
66 let pieces = if byte_length > 0 {
67 vec![Piece::new(BufferType::Original, 0, byte_length, char_count)]
68 } else {
69 Vec::new()
70 };
71
72 Self {
73 original_buffer: bytes,
74 add_buffer: Vec::new(),
75 pieces,
76 operation_count: 0,
77 gc_threshold: 1000, }
79 }
80
81 pub fn empty() -> Self {
83 Self {
84 original_buffer: Vec::new(),
85 add_buffer: Vec::new(),
86 pieces: Vec::new(),
87 operation_count: 0,
88 gc_threshold: 1000,
89 }
90 }
91
92 pub fn insert(&mut self, offset: usize, text: &str) {
94 if text.is_empty() {
95 return;
96 }
97
98 let text_bytes = text.as_bytes();
99 let text_char_count = text.chars().count();
100 let text_byte_length = text_bytes.len();
101
102 let add_start = self.add_buffer.len();
104 self.add_buffer.extend_from_slice(text_bytes);
105
106 let (piece_index, char_offset_in_piece) = self.find_piece_at_offset(offset);
108
109 if let Some(piece_idx) = piece_index {
110 let piece = &self.pieces[piece_idx];
111
112 if char_offset_in_piece == 0 {
113 let new_piece = Piece::new(
115 BufferType::Add,
116 add_start,
117 text_byte_length,
118 text_char_count,
119 );
120 self.pieces.insert(piece_idx, new_piece);
121 } else if char_offset_in_piece == piece.char_count {
122 let new_piece = Piece::new(
124 BufferType::Add,
125 add_start,
126 text_byte_length,
127 text_char_count,
128 );
129 self.pieces.insert(piece_idx + 1, new_piece);
130 } else {
131 let (left_piece, right_piece) = self.split_piece(piece, char_offset_in_piece);
133 let new_piece = Piece::new(
134 BufferType::Add,
135 add_start,
136 text_byte_length,
137 text_char_count,
138 );
139
140 self.pieces.splice(
142 piece_idx..=piece_idx,
143 vec![left_piece, new_piece, right_piece],
144 );
145 }
146 } else {
147 let new_piece = Piece::new(
149 BufferType::Add,
150 add_start,
151 text_byte_length,
152 text_char_count,
153 );
154 self.pieces.push(new_piece);
155 }
156
157 self.try_merge_adjacent_pieces();
159
160 self.check_gc();
162 }
163
164 pub fn delete(&mut self, start_offset: usize, length: usize) {
166 if length == 0 {
167 return;
168 }
169
170 let end_offset = start_offset + length;
171
172 let (start_piece_idx, start_char_offset) = self.find_piece_at_offset(start_offset);
174 let (end_piece_idx, end_char_offset) = self.find_piece_at_offset(end_offset);
175
176 match (start_piece_idx, end_piece_idx) {
177 (Some(start_idx), Some(end_idx)) if start_idx == end_idx => {
178 let piece = &self.pieces[start_idx];
180
181 if start_char_offset == 0 && end_char_offset == piece.char_count {
182 self.pieces.remove(start_idx);
184 } else if start_char_offset == 0 {
185 let (_, right) = self.split_piece(piece, end_char_offset);
187 self.pieces[start_idx] = right;
188 } else if end_char_offset == piece.char_count {
189 let (left, _) = self.split_piece(piece, start_char_offset);
191 self.pieces[start_idx] = left;
192 } else {
193 let (left, temp) = self.split_piece(piece, start_char_offset);
195 let (_, right) = self.split_piece(&temp, end_char_offset - start_char_offset);
196 self.pieces.splice(start_idx..=start_idx, vec![left, right]);
197 }
198 }
199 (Some(start_idx), Some(end_idx)) => {
200 let start_piece = &self.pieces[start_idx];
202 let end_piece = &self.pieces[end_idx];
203
204 let mut new_pieces = Vec::new();
205
206 if start_char_offset > 0 {
208 let (left, _) = self.split_piece(start_piece, start_char_offset);
209 new_pieces.push(left);
210 }
211
212 if end_char_offset < end_piece.char_count {
214 let (_, right) = self.split_piece(end_piece, end_char_offset);
215 new_pieces.push(right);
216 }
217
218 self.pieces.splice(start_idx..=end_idx, new_pieces);
220 }
221 (None, None) => {
222 }
224 _ => {
225 if let Some(start_idx) = start_piece_idx {
227 let start_piece = &self.pieces[start_idx];
229 if start_char_offset == 0 {
230 self.pieces.truncate(start_idx);
231 } else {
232 let (left, _) = self.split_piece(start_piece, start_char_offset);
233 self.pieces.truncate(start_idx);
234 self.pieces.push(left);
235 }
236 }
237 }
238 }
239
240 self.check_gc();
242 }
243
244 pub fn get_text(&self) -> String {
246 let mut result = String::new();
247 for piece in &self.pieces {
248 let buffer = match piece.buffer_type {
249 BufferType::Original => &self.original_buffer,
250 BufferType::Add => &self.add_buffer,
251 };
252 let slice = &buffer[piece.start..piece.start + piece.byte_length];
253 result.push_str(std::str::from_utf8(slice).unwrap());
254 }
255 result
256 }
257
258 pub fn get_range(&self, start_offset: usize, length: usize) -> String {
260 let mut result = String::new();
261 let mut current_offset = 0;
262 let end_offset = start_offset + length;
263
264 for piece in &self.pieces {
265 let piece_end = current_offset + piece.char_count;
266
267 if current_offset >= end_offset {
268 break;
269 }
270
271 if piece_end > start_offset {
272 let buffer = match piece.buffer_type {
273 BufferType::Original => &self.original_buffer,
274 BufferType::Add => &self.add_buffer,
275 };
276
277 let piece_text =
278 std::str::from_utf8(&buffer[piece.start..piece.start + piece.byte_length])
279 .unwrap();
280
281 let skip_chars = start_offset.saturating_sub(current_offset);
282
283 let take_chars = if piece_end > end_offset {
284 end_offset - current_offset.max(start_offset)
285 } else {
286 piece.char_count - skip_chars
287 };
288
289 result.push_str(
290 &piece_text
291 .chars()
292 .skip(skip_chars)
293 .take(take_chars)
294 .collect::<String>(),
295 );
296 }
297
298 current_offset = piece_end;
299 }
300
301 result
302 }
303
304 pub fn char_count(&self) -> usize {
306 self.pieces.iter().map(|p| p.char_count).sum()
307 }
308
309 pub fn byte_count(&self) -> usize {
311 self.pieces.iter().map(|p| p.byte_length).sum()
312 }
313
314 pub fn add_buffer_size(&self) -> usize {
316 self.add_buffer.len()
317 }
318
319 fn find_piece_at_offset(&self, offset: usize) -> (Option<usize>, usize) {
322 let mut current_offset = 0;
323
324 for (idx, piece) in self.pieces.iter().enumerate() {
325 let next_offset = current_offset + piece.char_count;
326 if offset <= next_offset {
327 return (Some(idx), offset - current_offset);
328 }
329 current_offset = next_offset;
330 }
331
332 if self.pieces.is_empty() {
333 (None, 0)
334 } else {
335 (
336 Some(self.pieces.len() - 1),
337 self.pieces.last().unwrap().char_count,
338 )
339 }
340 }
341
342 fn split_piece(&self, piece: &Piece, char_offset: usize) -> (Piece, Piece) {
345 let buffer = match piece.buffer_type {
346 BufferType::Original => &self.original_buffer,
347 BufferType::Add => &self.add_buffer,
348 };
349
350 let piece_text =
351 std::str::from_utf8(&buffer[piece.start..piece.start + piece.byte_length]).unwrap();
352
353 let byte_offset = piece_text
356 .char_indices()
357 .nth(char_offset)
358 .map(|(i, _)| i)
359 .unwrap_or(piece.byte_length);
360
361 let left = Piece::new(piece.buffer_type, piece.start, byte_offset, char_offset);
362
363 let right = Piece::new(
364 piece.buffer_type,
365 piece.start + byte_offset,
366 piece.byte_length - byte_offset,
367 piece.char_count - char_offset,
368 );
369
370 (left, right)
371 }
372
373 fn can_merge(&self, p1: &Piece, p2: &Piece) -> bool {
375 p1.buffer_type == p2.buffer_type &&
376 p1.buffer_type == BufferType::Add && p1.start + p1.byte_length == p2.start
378 }
379
380 fn merge_pieces(&self, p1: &Piece, p2: &Piece) -> Piece {
382 Piece::new(
383 p1.buffer_type,
384 p1.start,
385 p1.byte_length + p2.byte_length,
386 p1.char_count + p2.char_count,
387 )
388 }
389
390 fn try_merge_adjacent_pieces(&mut self) {
392 let mut i = 0;
393 while i + 1 < self.pieces.len() {
394 if self.can_merge(&self.pieces[i], &self.pieces[i + 1]) {
395 let merged = self.merge_pieces(&self.pieces[i], &self.pieces[i + 1]);
396 self.pieces.splice(i..=i + 1, vec![merged]);
397 } else {
398 i += 1;
399 }
400 }
401 }
402
403 pub fn gc(&mut self) {
405 let mut referenced_ranges: Vec<(usize, usize)> = self
407 .pieces
408 .iter()
409 .filter(|p| p.buffer_type == BufferType::Add)
410 .map(|p| (p.start, p.start + p.byte_length))
411 .collect();
412
413 if referenced_ranges.is_empty() {
414 self.add_buffer.clear();
416 return;
417 }
418
419 referenced_ranges.sort_by_key(|r| r.0);
421
422 let mut merged_ranges = vec![referenced_ranges[0]];
424 for range in referenced_ranges.iter().skip(1) {
425 let last_idx = merged_ranges.len() - 1;
426 if range.0 <= merged_ranges[last_idx].1 {
427 merged_ranges[last_idx].1 = merged_ranges[last_idx].1.max(range.1);
429 } else {
430 merged_ranges.push(*range);
431 }
432 }
433
434 let mut new_add_buffer = Vec::new();
436 let mut mappings: Vec<(usize, usize, usize)> = Vec::new(); for (old_start, old_end) in merged_ranges {
439 let new_start = new_add_buffer.len();
440 new_add_buffer.extend_from_slice(&self.add_buffer[old_start..old_end]);
441 mappings.push((old_start, old_end, new_start));
442 }
443
444 for piece in &mut self.pieces {
446 if piece.buffer_type != BufferType::Add {
447 continue;
448 }
449
450 let idx = match mappings.binary_search_by_key(&piece.start, |(s, _, _)| *s) {
452 Ok(exact) => exact,
453 Err(insert_pos) => insert_pos.saturating_sub(1),
454 };
455
456 if let Some((old_start, old_end, new_start)) = mappings.get(idx).copied()
457 && piece.start < old_end
458 {
459 piece.start = new_start + (piece.start - old_start);
460 }
461 }
462
463 self.add_buffer = new_add_buffer;
464 self.operation_count = 0; }
466
467 fn check_gc(&mut self) {
469 self.operation_count += 1;
470 if self.operation_count >= self.gc_threshold {
471 self.gc();
472 }
473 }
474
475 pub fn set_gc_threshold(&mut self, threshold: usize) {
477 self.gc_threshold = threshold;
478 }
479}
480
481#[cfg(test)]
482mod tests {
483 use super::*;
484
485 #[test]
486 fn test_new_piece_table() {
487 let pt = PieceTable::new("Hello, World!");
488 assert_eq!(pt.get_text(), "Hello, World!");
489 assert_eq!(pt.char_count(), 13);
490 }
491
492 #[test]
493 fn test_empty_piece_table() {
494 let pt = PieceTable::empty();
495 assert_eq!(pt.get_text(), "");
496 assert_eq!(pt.char_count(), 0);
497 }
498
499 #[test]
500 fn test_insert_at_start() {
501 let mut pt = PieceTable::new("World");
502 pt.insert(0, "Hello, ");
503 assert_eq!(pt.get_text(), "Hello, World");
504 }
505
506 #[test]
507 fn test_insert_at_end() {
508 let mut pt = PieceTable::new("Hello");
509 pt.insert(5, ", World");
510 assert_eq!(pt.get_text(), "Hello, World");
511 }
512
513 #[test]
514 fn test_insert_in_middle() {
515 let mut pt = PieceTable::new("Hlo");
516 pt.insert(1, "el");
517 assert_eq!(pt.get_text(), "Hello");
518 }
519
520 #[test]
521 fn test_delete_at_start() {
522 let mut pt = PieceTable::new("Hello, World");
523 pt.delete(0, 7);
524 assert_eq!(pt.get_text(), "World");
525 }
526
527 #[test]
528 fn test_delete_at_end() {
529 let mut pt = PieceTable::new("Hello, World");
530 pt.delete(5, 7);
531 assert_eq!(pt.get_text(), "Hello");
532 }
533
534 #[test]
535 fn test_delete_in_middle() {
536 let mut pt = PieceTable::new("Hello, World");
537 pt.delete(5, 2);
538 assert_eq!(pt.get_text(), "HelloWorld");
539 }
540
541 #[test]
542 fn test_multiple_operations() {
543 let mut pt = PieceTable::new("Hello");
544 pt.insert(5, " World");
545 pt.insert(5, ",");
546 pt.delete(0, 7);
547 pt.insert(0, "Hi, ");
548 assert_eq!(pt.get_text(), "Hi, World");
549 }
550
551 #[test]
552 fn test_utf8_chinese() {
553 let mut pt = PieceTable::new("你好");
554 assert_eq!(pt.char_count(), 2);
555 assert_eq!(pt.byte_count(), 6);
556
557 pt.insert(1, "们");
558 assert_eq!(pt.get_text(), "你们好");
559 assert_eq!(pt.char_count(), 3);
560 }
561
562 #[test]
563 fn test_utf8_emoji() {
564 let mut pt = PieceTable::new("Hello 👋");
565 pt.insert(6, "World ");
566 assert_eq!(pt.get_text(), "Hello World 👋");
567 }
568
569 #[test]
570 fn test_get_range() {
571 let pt = PieceTable::new("Hello, World!");
572 assert_eq!(pt.get_range(0, 5), "Hello");
573 assert_eq!(pt.get_range(7, 5), "World");
574 assert_eq!(pt.get_range(0, 13), "Hello, World!");
575 }
576
577 #[test]
578 fn test_piece_merging() {
579 let mut pt = PieceTable::new("Hello");
580
581 let initial_pieces = pt.pieces.len();
583 pt.insert(5, " ");
584 pt.insert(6, "World");
585
586 assert_eq!(pt.get_text(), "Hello World");
588 assert!(pt.pieces.len() <= initial_pieces + 1);
590 }
591
592 #[test]
593 fn test_gc_basic() {
594 let mut pt = PieceTable::new("Hello");
595
596 pt.insert(5, " World");
598 pt.insert(11, "!");
599
600 let add_buffer_size_before = pt.add_buffer.len();
601
602 pt.delete(5, 6); pt.gc();
607
608 assert_eq!(pt.get_text(), "Hello!");
610
611 assert!(pt.add_buffer.len() < add_buffer_size_before);
613 }
614
615 #[test]
616 fn test_gc_multiple_references() {
617 let mut pt = PieceTable::new("ABC");
618
619 pt.insert(1, "1");
621 pt.insert(3, "2");
622 pt.insert(5, "3");
623
624 assert_eq!(pt.get_text(), "A1B2C3");
625
626 pt.gc();
628
629 assert_eq!(pt.get_text(), "A1B2C3");
631
632 assert!(!pt.add_buffer.is_empty());
634 }
635
636 #[test]
637 fn test_auto_gc_trigger() {
638 let mut pt = PieceTable::new("Test");
639 pt.set_gc_threshold(5); for i in 0..6 {
643 pt.insert(4 + i, "x");
644 }
645
646 assert!(pt.operation_count < 6);
648 }
649}