peace_table/
lib.rs

1//! A UTF-8, char oriented, text editing optimized, [Piece Table]
2//! implementation.
3//!
4//! [Piece Table]: https://en.wikipedia.org/wiki/Piece_table
5
6#![feature(test)]
7
8mod buffer;
9mod piece;
10
11use str_indices::chars::count as count_chars;
12use str_indices::chars::to_byte_idx as char_to_byte;
13
14use buffer::{Buffer, Buffers};
15use piece::Piece;
16
17#[derive(Debug)]
18pub struct PieceTable<'b> {
19    pieces: Vec<Piece>,
20    buffers: Buffers<'b>,
21
22    len_chars: usize,
23    len_bytes: usize,
24
25    /// The char index after the last insertion, and the piece the last
26    /// insertion was inserting to (i.e., `(char, piece)`). If there is no last
27    /// insertion, or the last edit is not an insertion (thus invalidating
28    /// the `last_insert` value), it will contain a [`None`].
29    ///
30    /// This is used as an optimization, so that instead of creating a new
31    /// piece when inserting contiguous text (for every insert), we will just
32    /// expand the last piece.
33    #[cfg(feature = "contiguous-inserts")]
34    last_insert: Option<(usize, usize)>,
35}
36
37impl<'b> PieceTable<'b> {
38    /// Create a new [`PieceTable`] with the initial contents set to `initial`.
39    ///
40    /// # Examples
41    ///
42    /// ```
43    /// # use peace_table::PieceTable;
44    /// let pt = PieceTable::new("initial");
45    /// assert_eq!(pt.text(), "initial");
46    /// ```
47    pub fn new(initial: &'b str) -> Self {
48        let initial_piece = Piece {
49            buffer: Buffer::Original,
50            start: 0,
51            len_bytes: initial.len(),
52            len_chars: count_chars(initial),
53        };
54
55        Self {
56            pieces: vec![initial_piece],
57            buffers: Buffers::from_initial(initial),
58            len_chars: count_chars(initial),
59            len_bytes: initial.len(),
60            #[cfg(feature = "contiguous-inserts")]
61            last_insert: None,
62        }
63    }
64
65    /// Collect the text from the piece table.
66    ///
67    /// This function allocates a new string. You can use [`PieceTable::iter`]
68    /// to iterate over `&str` chunks without allocations.
69    ///
70    /// # Examples
71    ///
72    /// ```
73    /// # use peace_table::PieceTable;
74    /// let mut pt = PieceTable::new("content");
75    /// pt.insert(0, "abcd, ");
76    /// assert_eq!(pt.text(), "abcd, content");
77    /// ```
78    pub fn text(&self) -> String {
79        let mut text = String::with_capacity(self.len_bytes);
80
81        for piece in &self.pieces {
82            let end = piece.start + piece.len_bytes;
83            text.push_str(&self.buffers[piece.buffer][piece.start..end]);
84        }
85
86        debug_assert_eq!(text.len(), self.len_bytes);
87        debug_assert_eq!(count_chars(&text), self.len_chars);
88
89        text
90    }
91
92    /// Removes the text in the given char index range.
93    ///
94    /// # Examples
95    ///
96    /// ```
97    /// # use peace_table::PieceTable;
98    /// let mut pt = PieceTable::new("hello_there");
99    /// pt.insert(5, "  ");
100    /// pt.insert(7, " ");
101    /// pt.remove(6..=8);
102    /// assert_eq!(pt.text(), "hello there");
103    /// ```
104    ///
105    /// ```
106    /// # use peace_table::PieceTable;
107    /// let mut pt = PieceTable::new("012345");
108    /// pt.remove(0..=5);
109    /// assert_eq!(pt.text(), "");
110    /// ```
111    ///
112    /// ```
113    /// # use peace_table::PieceTable;
114    /// let mut pt = PieceTable::new("012345");
115    /// pt.remove(5..0);
116    /// assert_eq!(pt.text(), "012345"); // unchanged
117    /// ```
118    pub fn remove<R>(&mut self, range: R)
119    where
120        R: std::ops::RangeBounds<usize>,
121    {
122        let (start, end) = self.simplify_range_bounds(range);
123        if start >= end {
124            return; // the range is empty
125        }
126
127        #[cfg(feature = "contiguous-inserts")]
128        if self.last_insert.is_some_and(|(i, _piece)| i >= start) {
129            // If the removal is _after_ the index of the last insert, it does
130            // not affect it.
131            self.last_insert = None;
132        }
133
134        let (start_piece_idx, start_char_idx) = self.piece_at_char(start);
135        let (end_piece_idx, end_char_idx) = self.piece_at_char(end);
136
137        if start_piece_idx == end_piece_idx {
138            let piece_idx = start_piece_idx;
139            self.remove_within_piece(piece_idx, start_char_idx, end_char_idx);
140            return;
141        }
142
143        self.trim_piece_start(end_piece_idx, end_char_idx);
144        self.remove_pieces(start_piece_idx + 1..end_piece_idx);
145        self.trim_piece_end(start_piece_idx, start_char_idx);
146    }
147
148    /// Insert `content` at position `index`.
149    ///
150    /// # Examples
151    ///
152    /// ```
153    /// # use peace_table::PieceTable;
154    /// let mut pt = PieceTable::new("rld");
155    /// pt.insert(0, "hellowo");
156    /// pt.insert(5, " ");
157    /// assert_eq!(pt.text(), "hello world");
158    /// ```
159    ///
160    /// # Panics
161    ///
162    /// Will panic if index is larger than the size of the contents.
163    ///
164    /// ```should_panic
165    /// # use peace_table::PieceTable;
166    /// let mut pt = PieceTable::new("012");
167    /// pt.insert(4, " "); // will panic
168    /// ```
169    pub fn insert(&mut self, char_idx: usize, text: &str) {
170        let len_chars = count_chars(text);
171
172        self.len_chars += len_chars;
173        self.len_bytes += text.len();
174
175        #[cfg(feature = "contiguous-inserts")]
176        if let Some((ref mut i, piece_idx)) = self.last_insert
177            && *i == char_idx
178        {
179            *i += len_chars;
180            self.extend_piece(text, len_chars, piece_idx);
181            return;
182        }
183
184        let (piece_idx, relative_char_idx) = self.piece_at_char(char_idx);
185
186        if relative_char_idx == 0 {
187            self.insert_piece(piece_idx, text);
188        } else if relative_char_idx == self.pieces[piece_idx].len_chars {
189            self.insert_piece(piece_idx + 1, text);
190        } else {
191            // This is guarenteed to be a valid char index inside the piece, due
192            // to an earlier assertion in `piece_at_char`.
193            self.split_piece_and_insert(piece_idx, relative_char_idx, text);
194        }
195
196        #[cfg(feature = "contiguous-inserts")]
197        {
198            let piece_idx =
199                if relative_char_idx == 0 { piece_idx } else { piece_idx + 1 };
200            self.last_insert = Some((char_idx + len_chars, piece_idx));
201        }
202    }
203
204    /// Total number of chars in the piece table.
205    ///
206    /// Runs in `O(1)`.
207    ///
208    /// # Examples
209    ///
210    /// ```
211    /// # use peace_table::PieceTable;
212    /// let mut pt = PieceTable::new("123456");
213    /// assert_eq!(pt.len_chars(), 6);
214    /// ```
215    #[inline(always)]
216    pub fn len_chars(&self) -> usize {
217        self.len_chars
218    }
219
220    /// Total number of bytes in the piece table.
221    ///
222    /// Runs in `O(1)`.
223    ///
224    /// # Examples
225    ///
226    /// ```
227    /// # use peace_table::PieceTable;
228    /// let mut pt = PieceTable::new("1234⑤");
229    /// assert_eq!(pt.len_bytes(), 7); // the 5 takes 3 bytes
230    /// ```
231    #[inline(always)]
232    pub fn len_bytes(&self) -> usize {
233        self.len_bytes
234    }
235
236    fn split_piece_and_insert(
237        &mut self,
238        piece_idx: usize,
239        char_idx: usize,
240        text: &str,
241    ) {
242        let piece = &mut self.pieces[piece_idx];
243
244        // Create the `after` piece, before modifying `piece`.
245        let piece_text = &self.buffers[piece.buffer][piece.start..];
246        let byte_idx = char_to_byte(piece_text, char_idx);
247        let after = Piece {
248            buffer: piece.buffer,
249            start: piece.start + byte_idx,
250            len_bytes: piece.len_bytes - byte_idx,
251            len_chars: piece.len_chars - char_idx,
252        };
253
254        // Modify `piece` inplace to be the `before` piece.
255        piece.len_bytes = byte_idx;
256        piece.len_chars = char_idx;
257
258        // Insert the new and the `after` piece.
259        self.insert_piece(piece_idx + 1, text);
260        self.pieces.insert(piece_idx + 2, after);
261    }
262
263    /// Returns an iterator over all the `&str` chunks in the table.
264    ///
265    /// # Examples
266    ///
267    /// ```
268    /// # use peace_table::PieceTable;
269    /// let mut pt = PieceTable::new("hithere");
270    /// pt.insert(2, ", and hello, ");
271    /// assert_eq!(pt.iter().collect::<String>(), "hi, and hello, there");
272    /// ```
273    pub fn iter(&self) -> impl Iterator<Item = &str> {
274        self.pieces.iter().map(|p| &self.buffers[p.buffer][p.byte_range()])
275    }
276
277    /// Create a new "add" piece with `content`, and insert that piece at
278    /// `index`.
279    fn insert_piece(&mut self, index: usize, text: &str) {
280        let piece = Piece {
281            buffer: Buffer::Add,
282            start: self.buffers.add.len(),
283            len_chars: text.chars().count(),
284            len_bytes: text.len(),
285        };
286
287        self.buffers.add.push_str(text);
288        self.pieces.insert(index, piece);
289    }
290
291    fn piece_at_char(&self, char_idx: usize) -> (usize, usize) {
292        assert!(char_idx <= self.len_chars, "index out of bounds");
293
294        let mut offset = 0;
295        for (i, piece) in self.pieces.iter().enumerate() {
296            offset += piece.len_chars;
297
298            if offset >= char_idx {
299                let relative_idx = char_idx - (offset - piece.len_chars);
300                return (i, relative_idx);
301            }
302        }
303
304        unreachable!(
305            "this code will be ran only if `index` is larger than the total \
306             size len all the pieces together, but this was already asserted"
307        )
308    }
309
310    fn simplify_range_bounds<R>(&mut self, range: R) -> (usize, usize)
311    where
312        R: std::ops::RangeBounds<usize>,
313    {
314        let start = match range.start_bound() {
315            std::ops::Bound::Included(&i) => i,
316            std::ops::Bound::Excluded(&i) => i + 1,
317            std::ops::Bound::Unbounded => 0,
318        };
319        let end = match range.end_bound() {
320            std::ops::Bound::Included(&i) => i + 1,
321            std::ops::Bound::Excluded(&i) => i,
322            std::ops::Bound::Unbounded => self.len_chars,
323        };
324        (start, end)
325    }
326
327    fn trim_piece_end(&mut self, piece_idx: usize, start_char_idx: usize) {
328        let piece = &mut self.pieces[piece_idx];
329        let text = &self.buffers[piece.buffer][piece.byte_range()];
330
331        if start_char_idx == 0 {
332            self.pieces.remove(piece_idx);
333        } else if start_char_idx < piece.len_chars {
334            let byte_idx = char_to_byte(text, start_char_idx);
335            self.len_chars -= piece.len_chars - start_char_idx;
336            self.len_bytes -= piece.len_bytes - byte_idx;
337            piece.len_bytes = byte_idx;
338            piece.len_chars = start_char_idx;
339        }
340    }
341
342    fn trim_piece_start(&mut self, piece_idx: usize, end_char_idx: usize) {
343        let piece = &mut self.pieces[piece_idx];
344        let text = &self.buffers[piece.buffer][piece.byte_range()];
345
346        if end_char_idx == piece.len_chars {
347            self.pieces.remove(piece_idx);
348        } else if end_char_idx > 0 {
349            let byte_idx = char_to_byte(text, end_char_idx);
350            piece.start += byte_idx;
351            piece.len_bytes -= byte_idx;
352            piece.len_chars -= end_char_idx;
353            self.len_chars -= end_char_idx;
354            self.len_bytes -= byte_idx;
355        }
356    }
357
358    fn remove_within_piece(
359        &mut self,
360        piece_idx: usize,
361        start_char_idx: usize,
362        end_char_idx: usize,
363    ) {
364        let piece = &mut self.pieces[piece_idx];
365        let text = &self.buffers[piece.buffer][piece.byte_range()];
366
367        // If the range describes an entire piece, remove it.
368        if start_char_idx == 0 && end_char_idx == piece.len_chars {
369            let piece = &self.pieces[piece_idx];
370            self.len_bytes -= piece.len_bytes;
371            self.len_chars -= piece.len_chars;
372            self.pieces.remove(piece_idx);
373            return;
374        }
375
376        let start_offset = char_to_byte(text, start_char_idx);
377        let end_offset = char_to_byte(text, end_char_idx);
378
379        let new_len_bytes = end_offset - start_offset;
380        let new_len_chars = end_char_idx - start_char_idx;
381
382        piece.start += start_offset;
383        piece.len_bytes = new_len_bytes;
384        piece.len_chars = new_len_chars;
385
386        let removed_bytes = piece.len_bytes - new_len_bytes;
387        self.len_bytes -= removed_bytes;
388        let removed_chars = piece.len_chars - new_len_chars;
389        self.len_chars -= removed_chars;
390    }
391
392    fn remove_pieces(&mut self, range: std::ops::Range<usize>) {
393        self.pieces.drain(range).for_each(|p| {
394            self.len_chars -= p.len_chars;
395            self.len_bytes -= p.len_bytes;
396        });
397    }
398
399    /// Extend a piece's end, and inserts the text to the end of the `add`
400    /// buffer. This function assumes that the last insert to the table was to
401    /// the end of the piece.
402    #[cfg(feature = "contiguous-inserts")]
403    fn extend_piece(
404        &mut self,
405        text: &str,
406        text_len_chars: usize,
407        piece_idx: usize,
408    ) {
409        let piece = &mut self.pieces[piece_idx];
410
411        debug_assert_eq!(piece.buffer, Buffer::Add);
412        debug_assert_eq!(self.buffers.add.len(), piece.byte_range().end);
413
414        piece.len_bytes += text.len();
415        piece.len_chars += text_len_chars;
416
417        self.buffers.add.push_str(text);
418    }
419}
420
421impl std::fmt::Display for PieceTable<'_> {
422    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
423        self.iter().try_for_each(|p| write!(f, "{p}"))
424    }
425}
426
427#[cfg(test)]
428mod tests {
429    use super::*;
430
431    #[test]
432    fn contiguous_insertion() {
433        let mut pt = PieceTable::new("ag");
434
435        let letters = ('b'..='f').map(|ch| ch.to_string());
436        letters.enumerate().for_each(|(i, ch)| pt.insert(i + 1, &ch));
437
438        assert_eq!(pt.text(), "abcdefg");
439
440        if cfg!(feature = "contiguous-inserts") {
441            assert_eq!(pt.pieces.len(), 3);
442        } else {
443            assert_eq!(pt.pieces.len(), 7);
444        }
445    }
446}
447
448#[cfg(test)]
449mod benches {
450    extern crate test;
451
452    use self::test::Bencher;
453    use std::process::Termination;
454
455    use super::*;
456
457    #[bench]
458    fn bench_sequential_inserts(b: &mut Bencher) -> impl Termination {
459        b.iter(|| {
460            const CH: &str = "a";
461            let mut pt = PieceTable::new("asdfjlkajslkdfjlkajsldkfjlkasjdlkfj");
462            for i in 10..10000 {
463                pt.insert(i, CH);
464            }
465            pt.insert(2, CH);
466            pt.remove(4..294);
467            for i in 3..5531 {
468                pt.insert(i, CH);
469            }
470        });
471    }
472}