#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BufferType {
Original,
Add,
}
#[derive(Debug, Clone)]
pub struct Piece {
pub buffer_type: BufferType,
pub start: usize,
pub byte_length: usize,
pub char_count: usize,
}
impl Piece {
pub fn new(
buffer_type: BufferType,
start: usize,
byte_length: usize,
char_count: usize,
) -> Self {
Self {
buffer_type,
start,
byte_length,
char_count,
}
}
}
pub struct PieceTable {
original_buffer: Vec<u8>,
add_buffer: Vec<u8>,
pieces: Vec<Piece>,
operation_count: usize,
gc_threshold: usize,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum PieceTableError {
InvalidPieceRange {
buffer_type: BufferType,
start: usize,
byte_length: usize,
buffer_len: usize,
},
InvalidPieceUtf8 {
buffer_type: BufferType,
start: usize,
byte_length: usize,
},
}
impl std::fmt::Display for PieceTableError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::InvalidPieceRange {
buffer_type,
start,
byte_length,
buffer_len,
} => write!(
f,
"invalid {:?} piece range {}..{} for buffer length {}",
buffer_type,
start,
start.saturating_add(*byte_length),
buffer_len
),
Self::InvalidPieceUtf8 {
buffer_type,
start,
byte_length,
} => write!(
f,
"invalid UTF-8 in {:?} piece range {}..{}",
buffer_type,
start,
start.saturating_add(*byte_length)
),
}
}
}
impl std::error::Error for PieceTableError {}
fn piece_table_invariant_error<T>(error: PieceTableError, fallback: T) -> T {
let _ = &fallback;
#[cfg(debug_assertions)]
panic!("PieceTable invariant violated: {error}");
#[cfg(not(debug_assertions))]
{
let _ = error;
fallback
}
}
impl PieceTable {
pub fn new(text: &str) -> Self {
let bytes = text.as_bytes().to_vec();
let char_count = text.chars().count();
let byte_length = bytes.len();
let pieces = if byte_length > 0 {
vec![Piece::new(BufferType::Original, 0, byte_length, char_count)]
} else {
Vec::new()
};
Self {
original_buffer: bytes,
add_buffer: Vec::new(),
pieces,
operation_count: 0,
gc_threshold: 1000, }
}
pub fn empty() -> Self {
Self {
original_buffer: Vec::new(),
add_buffer: Vec::new(),
pieces: Vec::new(),
operation_count: 0,
gc_threshold: 1000,
}
}
pub fn insert(&mut self, offset: usize, text: &str) {
if let Err(error) = self.try_insert(offset, text) {
piece_table_invariant_error(error, ());
}
}
pub fn try_insert(&mut self, offset: usize, text: &str) -> Result<(), PieceTableError> {
if text.is_empty() {
return Ok(());
}
let text_bytes = text.as_bytes();
let text_char_count = text.chars().count();
let text_byte_length = text_bytes.len();
let add_start = self.add_buffer.len();
let (piece_index, char_offset_in_piece) = self.find_piece_at_offset(offset);
enum InsertPlan {
Insert { index: usize, piece: Piece },
Splice { index: usize, pieces: Vec<Piece> },
Push(Piece),
}
let new_piece = Piece::new(
BufferType::Add,
add_start,
text_byte_length,
text_char_count,
);
let plan = if let Some(piece_idx) = piece_index {
let piece = self.pieces[piece_idx].clone();
if char_offset_in_piece == 0 {
InsertPlan::Insert {
index: piece_idx,
piece: new_piece,
}
} else if char_offset_in_piece == piece.char_count {
InsertPlan::Insert {
index: piece_idx + 1,
piece: new_piece,
}
} else {
let (left_piece, right_piece) = self.split_piece(&piece, char_offset_in_piece)?;
InsertPlan::Splice {
index: piece_idx,
pieces: vec![left_piece, new_piece, right_piece],
}
}
} else {
InsertPlan::Push(new_piece)
};
self.add_buffer.extend_from_slice(text_bytes);
match plan {
InsertPlan::Insert { index, piece } => self.pieces.insert(index, piece),
InsertPlan::Splice { index, pieces } => {
self.pieces.splice(index..=index, pieces);
}
InsertPlan::Push(piece) => self.pieces.push(piece),
}
self.try_merge_adjacent_pieces();
self.try_check_gc()?;
Ok(())
}
pub fn delete(&mut self, start_offset: usize, length: usize) {
if let Err(error) = self.try_delete(start_offset, length) {
piece_table_invariant_error(error, ());
}
}
pub fn try_delete(
&mut self,
start_offset: usize,
length: usize,
) -> Result<(), PieceTableError> {
if length == 0 {
return Ok(());
}
let end_offset = start_offset.saturating_add(length);
let (start_piece_idx, start_char_offset) = self.find_piece_at_offset(start_offset);
let (end_piece_idx, end_char_offset) = self.find_piece_at_offset(end_offset);
match (start_piece_idx, end_piece_idx) {
(Some(start_idx), Some(end_idx)) if start_idx == end_idx => {
let piece = self.pieces[start_idx].clone();
if start_char_offset == 0 && end_char_offset == piece.char_count {
self.pieces.remove(start_idx);
} else if start_char_offset == 0 {
let (_, right) = self.split_piece(&piece, end_char_offset)?;
self.pieces[start_idx] = right;
} else if end_char_offset == piece.char_count {
let (left, _) = self.split_piece(&piece, start_char_offset)?;
self.pieces[start_idx] = left;
} else {
let (left, temp) = self.split_piece(&piece, start_char_offset)?;
let (_, right) =
self.split_piece(&temp, end_char_offset - start_char_offset)?;
self.pieces.splice(start_idx..=start_idx, vec![left, right]);
}
}
(Some(start_idx), Some(end_idx)) => {
let start_piece = self.pieces[start_idx].clone();
let end_piece = self.pieces[end_idx].clone();
let mut new_pieces = Vec::new();
if start_char_offset > 0 {
let (left, _) = self.split_piece(&start_piece, start_char_offset)?;
new_pieces.push(left);
}
if end_char_offset < end_piece.char_count {
let (_, right) = self.split_piece(&end_piece, end_char_offset)?;
new_pieces.push(right);
}
self.pieces.splice(start_idx..=end_idx, new_pieces);
}
(None, None) => {
}
_ => {
if let Some(start_idx) = start_piece_idx {
let start_piece = self.pieces[start_idx].clone();
if start_char_offset == 0 {
self.pieces.truncate(start_idx);
} else {
let (left, _) = self.split_piece(&start_piece, start_char_offset)?;
self.pieces.truncate(start_idx);
self.pieces.push(left);
}
}
}
}
self.try_check_gc()?;
Ok(())
}
pub fn get_text(&self) -> String {
match self.try_get_text() {
Ok(text) => text,
Err(error) => piece_table_invariant_error(error, String::new()),
}
}
pub fn try_get_text(&self) -> Result<String, PieceTableError> {
let mut result = String::new();
for piece in &self.pieces {
result.push_str(self.piece_text(piece)?);
}
Ok(result)
}
pub fn get_range(&self, start_offset: usize, length: usize) -> String {
match self.try_get_range(start_offset, length) {
Ok(text) => text,
Err(error) => piece_table_invariant_error(error, String::new()),
}
}
pub fn try_get_range(
&self,
start_offset: usize,
length: usize,
) -> Result<String, PieceTableError> {
let mut result = String::new();
let mut current_offset: usize = 0;
let end_offset = start_offset.saturating_add(length);
for piece in &self.pieces {
let piece_end = current_offset.saturating_add(piece.char_count);
if current_offset >= end_offset {
break;
}
if piece_end > start_offset {
let piece_text = self.piece_text(piece)?;
let skip_chars = start_offset.saturating_sub(current_offset);
let take_chars = if piece_end > end_offset {
end_offset - current_offset.max(start_offset)
} else {
piece.char_count - skip_chars
};
result.push_str(
&piece_text
.chars()
.skip(skip_chars)
.take(take_chars)
.collect::<String>(),
);
}
current_offset = piece_end;
}
Ok(result)
}
pub fn char_count(&self) -> usize {
self.pieces.iter().map(|p| p.char_count).sum()
}
pub fn byte_count(&self) -> usize {
self.pieces.iter().map(|p| p.byte_length).sum()
}
pub fn add_buffer_size(&self) -> usize {
self.add_buffer.len()
}
fn buffer_for_piece(&self, piece: &Piece) -> &[u8] {
match piece.buffer_type {
BufferType::Original => &self.original_buffer,
BufferType::Add => &self.add_buffer,
}
}
fn piece_bytes(&self, piece: &Piece) -> Result<&[u8], PieceTableError> {
let buffer = self.buffer_for_piece(piece);
let end = piece.start.checked_add(piece.byte_length).ok_or(
PieceTableError::InvalidPieceRange {
buffer_type: piece.buffer_type,
start: piece.start,
byte_length: piece.byte_length,
buffer_len: buffer.len(),
},
)?;
buffer
.get(piece.start..end)
.ok_or(PieceTableError::InvalidPieceRange {
buffer_type: piece.buffer_type,
start: piece.start,
byte_length: piece.byte_length,
buffer_len: buffer.len(),
})
}
fn piece_text(&self, piece: &Piece) -> Result<&str, PieceTableError> {
std::str::from_utf8(self.piece_bytes(piece)?).map_err(|_| {
PieceTableError::InvalidPieceUtf8 {
buffer_type: piece.buffer_type,
start: piece.start,
byte_length: piece.byte_length,
}
})
}
fn find_piece_at_offset(&self, offset: usize) -> (Option<usize>, usize) {
let mut current_offset: usize = 0;
for (idx, piece) in self.pieces.iter().enumerate() {
let next_offset = current_offset.saturating_add(piece.char_count);
if offset <= next_offset {
return (Some(idx), offset - current_offset);
}
current_offset = next_offset;
}
match self.pieces.last() {
Some(piece) => (Some(self.pieces.len() - 1), piece.char_count),
None => (None, 0),
}
}
fn split_piece(
&self,
piece: &Piece,
char_offset: usize,
) -> Result<(Piece, Piece), PieceTableError> {
let piece_text = self.piece_text(piece)?;
let char_offset = char_offset.min(piece.char_count);
let byte_offset = piece_text
.char_indices()
.nth(char_offset)
.map(|(i, _)| i)
.unwrap_or(piece.byte_length);
let left = Piece::new(piece.buffer_type, piece.start, byte_offset, char_offset);
let right = Piece::new(
piece.buffer_type,
piece.start + byte_offset,
piece.byte_length - byte_offset,
piece.char_count - char_offset,
);
Ok((left, right))
}
fn can_merge(&self, p1: &Piece, p2: &Piece) -> bool {
p1.buffer_type == p2.buffer_type &&
p1.buffer_type == BufferType::Add && p1.start.saturating_add(p1.byte_length) == p2.start
}
fn merge_pieces(&self, p1: &Piece, p2: &Piece) -> Piece {
Piece::new(
p1.buffer_type,
p1.start,
p1.byte_length.saturating_add(p2.byte_length),
p1.char_count.saturating_add(p2.char_count),
)
}
fn try_merge_adjacent_pieces(&mut self) {
let mut i = 0;
while i + 1 < self.pieces.len() {
if self.can_merge(&self.pieces[i], &self.pieces[i + 1]) {
let merged = self.merge_pieces(&self.pieces[i], &self.pieces[i + 1]);
self.pieces.splice(i..=i + 1, vec![merged]);
} else {
i += 1;
}
}
}
pub fn gc(&mut self) {
if let Err(error) = self.try_gc() {
piece_table_invariant_error(error, ());
}
}
pub fn try_gc(&mut self) -> Result<(), PieceTableError> {
let mut referenced_ranges: Vec<(usize, usize)> = Vec::new();
for piece in self
.pieces
.iter()
.filter(|p| p.buffer_type == BufferType::Add)
{
let end = piece.start.checked_add(piece.byte_length).ok_or(
PieceTableError::InvalidPieceRange {
buffer_type: piece.buffer_type,
start: piece.start,
byte_length: piece.byte_length,
buffer_len: self.add_buffer.len(),
},
)?;
if self.add_buffer.get(piece.start..end).is_none() {
return Err(PieceTableError::InvalidPieceRange {
buffer_type: piece.buffer_type,
start: piece.start,
byte_length: piece.byte_length,
buffer_len: self.add_buffer.len(),
});
}
referenced_ranges.push((piece.start, end));
}
if referenced_ranges.is_empty() {
self.add_buffer.clear();
return Ok(());
}
referenced_ranges.sort_by_key(|r| r.0);
let mut merged_ranges = vec![referenced_ranges[0]];
for range in referenced_ranges.iter().skip(1) {
let last_idx = merged_ranges.len() - 1;
if range.0 <= merged_ranges[last_idx].1 {
merged_ranges[last_idx].1 = merged_ranges[last_idx].1.max(range.1);
} else {
merged_ranges.push(*range);
}
}
let mut new_add_buffer = Vec::new();
let mut mappings: Vec<(usize, usize, usize)> = Vec::new();
for (old_start, old_end) in merged_ranges {
let new_start = new_add_buffer.len();
let slice = self.add_buffer.get(old_start..old_end).ok_or(
PieceTableError::InvalidPieceRange {
buffer_type: BufferType::Add,
start: old_start,
byte_length: old_end.saturating_sub(old_start),
buffer_len: self.add_buffer.len(),
},
)?;
new_add_buffer.extend_from_slice(slice);
mappings.push((old_start, old_end, new_start));
}
for piece in &mut self.pieces {
if piece.buffer_type != BufferType::Add {
continue;
}
let idx = match mappings.binary_search_by_key(&piece.start, |(s, _, _)| *s) {
Ok(exact) => exact,
Err(insert_pos) => insert_pos.saturating_sub(1),
};
if let Some((old_start, old_end, new_start)) = mappings.get(idx).copied()
&& piece.start < old_end
{
piece.start = new_start + (piece.start - old_start);
}
}
self.add_buffer = new_add_buffer;
self.operation_count = 0; Ok(())
}
fn try_check_gc(&mut self) -> Result<(), PieceTableError> {
self.operation_count += 1;
if self.operation_count >= self.gc_threshold {
self.try_gc()?;
}
Ok(())
}
pub fn set_gc_threshold(&mut self, threshold: usize) {
self.gc_threshold = threshold;
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_new_piece_table() {
let pt = PieceTable::new("Hello, World!");
assert_eq!(pt.get_text(), "Hello, World!");
assert_eq!(pt.char_count(), 13);
}
#[test]
fn test_empty_piece_table() {
let pt = PieceTable::empty();
assert_eq!(pt.get_text(), "");
assert_eq!(pt.char_count(), 0);
}
#[test]
fn test_insert_at_start() {
let mut pt = PieceTable::new("World");
pt.insert(0, "Hello, ");
assert_eq!(pt.get_text(), "Hello, World");
}
#[test]
fn test_insert_at_end() {
let mut pt = PieceTable::new("Hello");
pt.insert(5, ", World");
assert_eq!(pt.get_text(), "Hello, World");
}
#[test]
fn test_insert_in_middle() {
let mut pt = PieceTable::new("Hlo");
pt.insert(1, "el");
assert_eq!(pt.get_text(), "Hello");
}
#[test]
fn test_delete_at_start() {
let mut pt = PieceTable::new("Hello, World");
pt.delete(0, 7);
assert_eq!(pt.get_text(), "World");
}
#[test]
fn test_delete_at_end() {
let mut pt = PieceTable::new("Hello, World");
pt.delete(5, 7);
assert_eq!(pt.get_text(), "Hello");
}
#[test]
fn test_delete_in_middle() {
let mut pt = PieceTable::new("Hello, World");
pt.delete(5, 2);
assert_eq!(pt.get_text(), "HelloWorld");
}
#[test]
fn test_multiple_operations() {
let mut pt = PieceTable::new("Hello");
pt.insert(5, " World");
pt.insert(5, ",");
pt.delete(0, 7);
pt.insert(0, "Hi, ");
assert_eq!(pt.get_text(), "Hi, World");
}
#[test]
fn test_utf8_chinese() {
let mut pt = PieceTable::new("你好");
assert_eq!(pt.char_count(), 2);
assert_eq!(pt.byte_count(), 6);
pt.insert(1, "们");
assert_eq!(pt.get_text(), "你们好");
assert_eq!(pt.char_count(), 3);
}
#[test]
fn test_utf8_emoji() {
let mut pt = PieceTable::new("Hello 👋");
pt.insert(6, "World ");
assert_eq!(pt.get_text(), "Hello World 👋");
}
#[test]
fn test_get_range() {
let pt = PieceTable::new("Hello, World!");
assert_eq!(pt.get_range(0, 5), "Hello");
assert_eq!(pt.get_range(7, 5), "World");
assert_eq!(pt.get_range(0, 13), "Hello, World!");
}
#[test]
fn test_try_get_text_reports_invalid_piece_range_without_panic() {
let mut pt = PieceTable::new("abc");
pt.pieces[0].byte_length = usize::MAX;
let result = std::panic::catch_unwind(|| pt.try_get_text());
assert!(matches!(
result,
Ok(Err(PieceTableError::InvalidPieceRange { .. }))
));
}
#[test]
fn test_try_get_text_reports_invalid_utf8_without_panic() {
let mut pt = PieceTable::empty();
pt.add_buffer.push(0xff);
pt.pieces.push(Piece::new(BufferType::Add, 0, 1, 1));
let result = std::panic::catch_unwind(|| pt.try_get_text());
assert!(matches!(
result,
Ok(Err(PieceTableError::InvalidPieceUtf8 { .. }))
));
}
#[test]
fn test_try_insert_reports_split_piece_invariant_failure() {
let mut pt = PieceTable::empty();
pt.add_buffer.push(0xff);
pt.pieces.push(Piece::new(BufferType::Add, 0, 1, 2));
let result =
std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| pt.try_insert(1, "x")));
assert!(matches!(
result,
Ok(Err(PieceTableError::InvalidPieceUtf8 { .. }))
));
assert_eq!(pt.add_buffer, vec![0xff]);
}
#[test]
fn test_piece_merging() {
let mut pt = PieceTable::new("Hello");
let initial_pieces = pt.pieces.len();
pt.insert(5, " ");
pt.insert(6, "World");
assert_eq!(pt.get_text(), "Hello World");
assert!(pt.pieces.len() <= initial_pieces + 1);
}
#[test]
fn test_gc_basic() {
let mut pt = PieceTable::new("Hello");
pt.insert(5, " World");
pt.insert(11, "!");
let add_buffer_size_before = pt.add_buffer.len();
pt.delete(5, 6);
pt.gc();
assert_eq!(pt.get_text(), "Hello!");
assert!(pt.add_buffer.len() < add_buffer_size_before);
}
#[test]
fn test_gc_multiple_references() {
let mut pt = PieceTable::new("ABC");
pt.insert(1, "1");
pt.insert(3, "2");
pt.insert(5, "3");
assert_eq!(pt.get_text(), "A1B2C3");
pt.gc();
assert_eq!(pt.get_text(), "A1B2C3");
assert!(!pt.add_buffer.is_empty());
}
#[test]
fn test_auto_gc_trigger() {
let mut pt = PieceTable::new("Test");
pt.set_gc_threshold(5);
for i in 0..6 {
pt.insert(4 + i, "x");
}
assert!(pt.operation_count < 6);
}
}