1#![feature(test)]
7
8mod buffer;
9mod piece;
10
11use str_indices::chars::count as count_chars;
12use str_indices::chars::to_byte_idx as char_to_byte;
13
14use buffer::{Buffer, Buffers};
15use piece::Piece;
16
17#[derive(Debug)]
18pub struct PieceTable<'b> {
19 pieces: Vec<Piece>,
20 buffers: Buffers<'b>,
21
22 len_chars: usize,
23 len_bytes: usize,
24
25 #[cfg(feature = "contiguous-inserts")]
34 last_insert: Option<(usize, usize)>,
35}
36
37impl<'b> PieceTable<'b> {
38 pub fn new(initial: &'b str) -> Self {
48 let initial_piece = Piece {
49 buffer: Buffer::Original,
50 start: 0,
51 len_bytes: initial.len(),
52 len_chars: count_chars(initial),
53 };
54
55 Self {
56 pieces: vec![initial_piece],
57 buffers: Buffers::from_initial(initial),
58 len_chars: count_chars(initial),
59 len_bytes: initial.len(),
60 #[cfg(feature = "contiguous-inserts")]
61 last_insert: None,
62 }
63 }
64
65 pub fn text(&self) -> String {
79 let mut text = String::with_capacity(self.len_bytes);
80
81 for piece in &self.pieces {
82 let end = piece.start + piece.len_bytes;
83 text.push_str(&self.buffers[piece.buffer][piece.start..end]);
84 }
85
86 debug_assert_eq!(text.len(), self.len_bytes);
87 debug_assert_eq!(count_chars(&text), self.len_chars);
88
89 text
90 }
91
92 pub fn remove<R>(&mut self, range: R)
119 where
120 R: std::ops::RangeBounds<usize>,
121 {
122 let (start, end) = self.simplify_range_bounds(range);
123 if start >= end {
124 return; }
126
127 #[cfg(feature = "contiguous-inserts")]
128 if self.last_insert.is_some_and(|(i, _piece)| i >= start) {
129 self.last_insert = None;
132 }
133
134 let (start_piece_idx, start_char_idx) = self.piece_at_char(start);
135 let (end_piece_idx, end_char_idx) = self.piece_at_char(end);
136
137 if start_piece_idx == end_piece_idx {
138 let piece_idx = start_piece_idx;
139 self.remove_within_piece(piece_idx, start_char_idx, end_char_idx);
140 return;
141 }
142
143 self.trim_piece_start(end_piece_idx, end_char_idx);
144 self.remove_pieces(start_piece_idx + 1..end_piece_idx);
145 self.trim_piece_end(start_piece_idx, start_char_idx);
146 }
147
148 pub fn insert(&mut self, char_idx: usize, text: &str) {
170 let len_chars = count_chars(text);
171
172 self.len_chars += len_chars;
173 self.len_bytes += text.len();
174
175 #[cfg(feature = "contiguous-inserts")]
176 if let Some((ref mut i, piece_idx)) = self.last_insert
177 && *i == char_idx
178 {
179 *i += len_chars;
180 self.extend_piece(text, len_chars, piece_idx);
181 return;
182 }
183
184 let (piece_idx, relative_char_idx) = self.piece_at_char(char_idx);
185
186 if relative_char_idx == 0 {
187 self.insert_piece(piece_idx, text);
188 } else if relative_char_idx == self.pieces[piece_idx].len_chars {
189 self.insert_piece(piece_idx + 1, text);
190 } else {
191 self.split_piece_and_insert(piece_idx, relative_char_idx, text);
194 }
195
196 #[cfg(feature = "contiguous-inserts")]
197 {
198 let piece_idx =
199 if relative_char_idx == 0 { piece_idx } else { piece_idx + 1 };
200 self.last_insert = Some((char_idx + len_chars, piece_idx));
201 }
202 }
203
204 #[inline(always)]
216 pub fn len_chars(&self) -> usize {
217 self.len_chars
218 }
219
220 #[inline(always)]
232 pub fn len_bytes(&self) -> usize {
233 self.len_bytes
234 }
235
236 fn split_piece_and_insert(
237 &mut self,
238 piece_idx: usize,
239 char_idx: usize,
240 text: &str,
241 ) {
242 let piece = &mut self.pieces[piece_idx];
243
244 let piece_text = &self.buffers[piece.buffer][piece.start..];
246 let byte_idx = char_to_byte(piece_text, char_idx);
247 let after = Piece {
248 buffer: piece.buffer,
249 start: piece.start + byte_idx,
250 len_bytes: piece.len_bytes - byte_idx,
251 len_chars: piece.len_chars - char_idx,
252 };
253
254 piece.len_bytes = byte_idx;
256 piece.len_chars = char_idx;
257
258 self.insert_piece(piece_idx + 1, text);
260 self.pieces.insert(piece_idx + 2, after);
261 }
262
263 pub fn iter(&self) -> impl Iterator<Item = &str> {
274 self.pieces.iter().map(|p| &self.buffers[p.buffer][p.byte_range()])
275 }
276
277 fn insert_piece(&mut self, index: usize, text: &str) {
280 let piece = Piece {
281 buffer: Buffer::Add,
282 start: self.buffers.add.len(),
283 len_chars: text.chars().count(),
284 len_bytes: text.len(),
285 };
286
287 self.buffers.add.push_str(text);
288 self.pieces.insert(index, piece);
289 }
290
291 fn piece_at_char(&self, char_idx: usize) -> (usize, usize) {
292 assert!(char_idx <= self.len_chars, "index out of bounds");
293
294 let mut offset = 0;
295 for (i, piece) in self.pieces.iter().enumerate() {
296 offset += piece.len_chars;
297
298 if offset >= char_idx {
299 let relative_idx = char_idx - (offset - piece.len_chars);
300 return (i, relative_idx);
301 }
302 }
303
304 unreachable!(
305 "this code will be ran only if `index` is larger than the total \
306 size len all the pieces together, but this was already asserted"
307 )
308 }
309
310 fn simplify_range_bounds<R>(&mut self, range: R) -> (usize, usize)
311 where
312 R: std::ops::RangeBounds<usize>,
313 {
314 let start = match range.start_bound() {
315 std::ops::Bound::Included(&i) => i,
316 std::ops::Bound::Excluded(&i) => i + 1,
317 std::ops::Bound::Unbounded => 0,
318 };
319 let end = match range.end_bound() {
320 std::ops::Bound::Included(&i) => i + 1,
321 std::ops::Bound::Excluded(&i) => i,
322 std::ops::Bound::Unbounded => self.len_chars,
323 };
324 (start, end)
325 }
326
327 fn trim_piece_end(&mut self, piece_idx: usize, start_char_idx: usize) {
328 let piece = &mut self.pieces[piece_idx];
329 let text = &self.buffers[piece.buffer][piece.byte_range()];
330
331 if start_char_idx == 0 {
332 self.pieces.remove(piece_idx);
333 } else if start_char_idx < piece.len_chars {
334 let byte_idx = char_to_byte(text, start_char_idx);
335 self.len_chars -= piece.len_chars - start_char_idx;
336 self.len_bytes -= piece.len_bytes - byte_idx;
337 piece.len_bytes = byte_idx;
338 piece.len_chars = start_char_idx;
339 }
340 }
341
342 fn trim_piece_start(&mut self, piece_idx: usize, end_char_idx: usize) {
343 let piece = &mut self.pieces[piece_idx];
344 let text = &self.buffers[piece.buffer][piece.byte_range()];
345
346 if end_char_idx == piece.len_chars {
347 self.pieces.remove(piece_idx);
348 } else if end_char_idx > 0 {
349 let byte_idx = char_to_byte(text, end_char_idx);
350 piece.start += byte_idx;
351 piece.len_bytes -= byte_idx;
352 piece.len_chars -= end_char_idx;
353 self.len_chars -= end_char_idx;
354 self.len_bytes -= byte_idx;
355 }
356 }
357
358 fn remove_within_piece(
359 &mut self,
360 piece_idx: usize,
361 start_char_idx: usize,
362 end_char_idx: usize,
363 ) {
364 let piece = &mut self.pieces[piece_idx];
365 let text = &self.buffers[piece.buffer][piece.byte_range()];
366
367 if start_char_idx == 0 && end_char_idx == piece.len_chars {
369 let piece = &self.pieces[piece_idx];
370 self.len_bytes -= piece.len_bytes;
371 self.len_chars -= piece.len_chars;
372 self.pieces.remove(piece_idx);
373 return;
374 }
375
376 let start_offset = char_to_byte(text, start_char_idx);
377 let end_offset = char_to_byte(text, end_char_idx);
378
379 let new_len_bytes = end_offset - start_offset;
380 let new_len_chars = end_char_idx - start_char_idx;
381
382 piece.start += start_offset;
383 piece.len_bytes = new_len_bytes;
384 piece.len_chars = new_len_chars;
385
386 let removed_bytes = piece.len_bytes - new_len_bytes;
387 self.len_bytes -= removed_bytes;
388 let removed_chars = piece.len_chars - new_len_chars;
389 self.len_chars -= removed_chars;
390 }
391
392 fn remove_pieces(&mut self, range: std::ops::Range<usize>) {
393 self.pieces.drain(range).for_each(|p| {
394 self.len_chars -= p.len_chars;
395 self.len_bytes -= p.len_bytes;
396 });
397 }
398
399 #[cfg(feature = "contiguous-inserts")]
403 fn extend_piece(
404 &mut self,
405 text: &str,
406 text_len_chars: usize,
407 piece_idx: usize,
408 ) {
409 let piece = &mut self.pieces[piece_idx];
410
411 debug_assert_eq!(piece.buffer, Buffer::Add);
412 debug_assert_eq!(self.buffers.add.len(), piece.byte_range().end);
413
414 piece.len_bytes += text.len();
415 piece.len_chars += text_len_chars;
416
417 self.buffers.add.push_str(text);
418 }
419}
420
421impl std::fmt::Display for PieceTable<'_> {
422 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
423 self.iter().try_for_each(|p| write!(f, "{p}"))
424 }
425}
426
427#[cfg(test)]
428mod tests {
429 use super::*;
430
431 #[test]
432 fn contiguous_insertion() {
433 let mut pt = PieceTable::new("ag");
434
435 let letters = ('b'..='f').map(|ch| ch.to_string());
436 letters.enumerate().for_each(|(i, ch)| pt.insert(i + 1, &ch));
437
438 assert_eq!(pt.text(), "abcdefg");
439
440 if cfg!(feature = "contiguous-inserts") {
441 assert_eq!(pt.pieces.len(), 3);
442 } else {
443 assert_eq!(pt.pieces.len(), 7);
444 }
445 }
446}
447
448#[cfg(test)]
449mod benches {
450 extern crate test;
451
452 use self::test::Bencher;
453 use std::process::Termination;
454
455 use super::*;
456
457 #[bench]
458 fn bench_sequential_inserts(b: &mut Bencher) -> impl Termination {
459 b.iter(|| {
460 const CH: &str = "a";
461 let mut pt = PieceTable::new("asdfjlkajslkdfjlkajsldkfjlkasjdlkfj");
462 for i in 10..10000 {
463 pt.insert(i, CH);
464 }
465 pt.insert(2, CH);
466 pt.remove(4..294);
467 for i in 3..5531 {
468 pt.insert(i, CH);
469 }
470 });
471 }
472}