use std::{
cmp::{max, min, Ordering},
collections::BTreeMap,
fmt,
};
const MIN_GAP: usize = 32;
const MIN_GAP_GROW: usize = 64;
const MAX_GAP_GROW: usize = 1024 * 8;
#[inline]
fn clamp_gap_size(len: usize, min_gap: usize) -> usize {
min(max(len / 20, min_gap), MAX_GAP_GROW)
}
#[inline]
fn count_chars(bytes: &[u8]) -> usize {
if bytes.is_empty() {
return 0;
}
let mut n_chars = 0;
let mut cur = 0;
while cur < bytes.len() {
let ch = unsafe { decode_char_at(cur, bytes) };
cur += ch.len_utf8();
n_chars += 1;
}
n_chars
}
type ByteOffset = usize;
type CharOffset = usize;
#[derive(Default, Debug, Clone, PartialEq, Eq)]
pub struct GapBuffer {
data: Box<[u8]>,
cap: usize,
gap_start: usize,
gap_end: usize,
next_gap: usize,
line_endings: BTreeMap<ByteOffset, CharOffset>,
n_chars: usize,
}
fn compute_line_endings(s: &str) -> (usize, BTreeMap<ByteOffset, CharOffset>) {
let mut n_chars = 0;
let mut line_endings = BTreeMap::new();
for (line_chars, (idx, ch)) in s.char_indices().enumerate() {
n_chars += 1;
if ch == '\n' {
line_endings.insert(idx, line_chars);
}
}
(n_chars, line_endings)
}
impl From<String> for GapBuffer {
fn from(s: String) -> Self {
let gap_start = s.len();
let next_gap = clamp_gap_size(gap_start, MIN_GAP_GROW);
let cap = gap_start + next_gap;
let (n_chars, line_endings) = compute_line_endings(&s);
let mut v = s.into_bytes();
v.resize(cap, 0);
let mut gb = Self {
data: v.into_boxed_slice(),
cap,
gap_start,
gap_end: cap,
next_gap,
n_chars,
line_endings,
};
gb.move_gap_to(0);
gb
}
}
impl From<&str> for GapBuffer {
fn from(s: &str) -> Self {
let gap_start = s.len();
let next_gap = clamp_gap_size(gap_start, MIN_GAP_GROW);
let cap = gap_start + next_gap;
let (n_chars, line_endings) = compute_line_endings(s);
let mut v = Vec::with_capacity(cap);
v.extend_from_slice(s.as_bytes());
v.resize(cap, 0);
let mut gb = Self {
data: v.into_boxed_slice(),
cap,
gap_start,
gap_end: cap,
next_gap,
n_chars,
line_endings,
};
gb.move_gap_to(0);
gb
}
}
impl fmt::Display for GapBuffer {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match String::from_utf8(self.bytes()) {
Ok(s) => write!(f, "{s}"),
Err(_) => Err(fmt::Error),
}
}
}
macro_rules! assert_line_endings {
($self:expr) => {{
let true_endings: Vec<usize> = $self
.data
.iter()
.enumerate()
.filter(|&(i, &b)| b == b'\n' && (i < $self.gap_start || i >= $self.gap_end))
.map(|(i, _)| i)
.collect();
let tracked_line_endings: Vec<usize> = $self.line_endings.keys().copied().collect();
assert_eq!(
tracked_line_endings, true_endings,
"incorrect byte positions for line endings with gap_start={} gap_end={}",
$self.gap_start, $self.gap_end
);
let true_endings: Vec<usize> = $self
.to_string()
.chars()
.enumerate()
.filter(|&(_, c)| c == '\n')
.map(|(i, _)| i)
.collect();
let tracked_line_endings: Vec<usize> = $self.line_endings.values().copied().collect();
assert_eq!(
tracked_line_endings, true_endings,
"incorrect character positions for line endings with gap_start={} gap_end={}",
$self.gap_start, $self.gap_end
);
}};
}
impl GapBuffer {
#[inline]
fn gap(&self) -> usize {
self.gap_end - self.gap_start
}
#[inline]
pub fn len(&self) -> usize {
self.cap - self.gap()
}
#[inline]
pub fn is_empty(&self) -> bool {
self.cap == self.gap()
}
pub fn bytes(&self) -> Vec<u8> {
let mut v = Vec::with_capacity(self.len());
v.extend(&self.data[..self.gap_start]);
v.extend(&self.data[self.gap_end..]);
v
}
pub fn iter_lines(&self) -> impl Iterator<Item = Slice<'_>> {
let mut line_idx = 0;
std::iter::from_fn(move || {
if line_idx == self.len_lines() {
return None;
}
let slice = self.line(line_idx);
line_idx += 1;
Some(slice)
})
}
#[inline]
pub fn len_lines(&self) -> usize {
match self.line_endings.last_key_value() {
Some((&raw_idx, _)) => {
let n = self.line_endings.len();
let byte_idx = if raw_idx > self.gap_start {
raw_idx - self.gap_end
} else {
raw_idx
};
if byte_idx < self.len() {
n + 1
} else {
n
}
}
None => 1,
}
}
#[inline]
pub fn len_chars(&self) -> usize {
self.n_chars
}
pub fn byte_line_endings(&self) -> Vec<usize> {
let mut endings: Vec<_> = self
.line_endings
.keys()
.map(|i| self.raw_byte_to_byte(*i))
.collect();
let eob = self.len();
match endings.last() {
Some(&idx) if idx == eob => (),
_ => endings.push(eob),
}
endings
}
pub fn clear(&mut self) {
self.move_gap_to(0);
self.gap_end = self.cap;
self.line_endings.clear();
self.n_chars = 0;
#[cfg(test)]
assert_line_endings!(self);
}
#[inline]
pub fn char(&self, char_idx: usize) -> char {
let byte_idx = self.char_to_raw_byte(char_idx);
unsafe { decode_char_at(byte_idx, &self.data) }
}
#[inline]
pub fn get_char(&self, char_idx: usize) -> Option<char> {
if char_idx < self.n_chars {
Some(self.char(char_idx))
} else {
None
}
}
#[inline]
fn char_len(&self, byte_idx: usize) -> usize {
unsafe { decode_char_at(byte_idx, &self.data) }.len_utf8()
}
#[inline]
pub fn line(&self, line_idx: usize) -> Slice<'_> {
if line_idx >= self.len_lines() {
assert_line_endings!(self);
panic!(
"line index was {line_idx} but buffer has {} lines",
self.len_lines()
)
}
let to = match self.line_endings.iter().nth(line_idx) {
Some((&idx, _)) => idx + self.char_len(idx),
None => self.cap,
};
let from = if line_idx == 0 {
0
} else {
let idx = *self.line_endings.iter().nth(line_idx - 1).unwrap().0;
idx + 1
};
Slice::from_raw_offsets(from, to, self)
}
#[inline]
pub fn line_len_chars(&self, line_idx: usize) -> usize {
if line_idx >= self.len_lines() {
assert_line_endings!(self);
panic!(
"line index was {line_idx} but buffer has {} lines",
self.len_lines()
)
}
let chars_to = match self.line_endings.iter().nth(line_idx) {
Some((_, &char_idx)) => char_idx + 1,
None if line_idx == 0 => return self.n_chars,
None => self.n_chars,
};
let chars_from = if line_idx == 0 {
0
} else {
*self.line_endings.iter().nth(line_idx - 1).unwrap().1 + 1
};
chars_to - chars_from
}
pub fn maximal_slice_from_offset(&self, byte_offset: usize) -> &[u8] {
if byte_offset > self.len() {
&[]
} else {
let i = self.byte_to_raw_byte(byte_offset);
match i.cmp(&self.gap_start) {
Ordering::Less => &self.data[i..self.gap_start],
Ordering::Equal => &self.data[self.gap_end..],
Ordering::Greater => &self.data[i..],
}
}
}
pub fn slice_from_byte_offsets(&self, byte_from: usize, byte_to: usize) -> Slice<'_> {
let from = self.byte_to_raw_byte(byte_from);
let to = self.byte_to_raw_byte(byte_to);
Slice::from_raw_offsets(from, to, self)
}
pub fn slice(&self, char_from: usize, char_to: usize) -> Slice<'_> {
let from = self.char_to_raw_byte(char_from);
let to = self.offset_char_to_raw_byte(char_to, from, char_from);
Slice::from_raw_offsets(from, to, self)
}
fn chars_in_raw_range(&self, raw_from: usize, raw_to: usize) -> usize {
if raw_to <= self.gap_start || raw_from >= self.gap_end {
count_chars(&self.data[raw_from..raw_to])
} else {
count_chars(&self.data[raw_from..self.gap_start])
+ count_chars(&self.data[self.gap_end..raw_to])
}
}
pub fn raw_byte_to_char(&self, byte_idx: usize) -> usize {
self.chars_in_raw_range(0, byte_idx)
}
pub fn char_to_line(&self, char_idx: usize) -> usize {
match self.try_char_to_line(char_idx) {
Some(line_idx) => line_idx,
None => {
assert_line_endings!(self);
panic!(
"char index was {char_idx} but the buffer char length is {}",
self.len_chars()
);
}
}
}
pub fn try_char_to_line(&self, char_idx: usize) -> Option<usize> {
match char_idx.cmp(&self.n_chars) {
Ordering::Less => {
for (i, &char_offset) in self.line_endings.values().enumerate() {
if char_idx <= char_offset {
return Some(i);
}
}
Some(self.len_lines() - 1)
}
Ordering::Equal => Some(self.len_lines() - 1),
Ordering::Greater => None,
}
}
pub fn line_to_char(&self, line_idx: usize) -> usize {
match self.try_line_to_char(line_idx) {
Some(char_idx) => char_idx,
None => {
assert_line_endings!(self);
panic!(
"line index was {line_idx} but the buffer has {} lines",
self.len_lines()
);
}
}
}
pub fn try_line_to_char(&self, line_idx: usize) -> Option<usize> {
if line_idx > self.len_lines() - 1 {
return None;
}
if line_idx == 0 {
Some(0)
} else {
let k = *self.line_endings.iter().nth(line_idx - 1).unwrap().1;
Some(k + 1)
}
}
pub fn insert_char(&mut self, char_idx: usize, ch: char) {
let len = ch.len_utf8();
if self.gap().saturating_sub(len) < MIN_GAP {
self.grow_gap(len);
}
let idx = self.char_to_byte(char_idx);
self.move_gap_to(idx);
ch.encode_utf8(&mut self.data[self.gap_start..]);
self.gap_start += len;
self.n_chars += 1;
if ch == '\n' {
self.line_endings.insert(idx, char_idx);
}
self.update_line_endings(|(&bidx, &cidx)| {
if bidx > idx {
(bidx, cidx + 1)
} else {
(bidx, cidx)
}
});
#[cfg(test)]
assert_line_endings!(self);
}
pub fn insert_str(&mut self, char_idx: usize, s: &str) {
let len = s.len();
let len_chars = s.chars().count();
if self.gap().saturating_sub(len) < MIN_GAP {
self.grow_gap(len);
}
let idx = self.char_to_byte(char_idx);
self.move_gap_to(idx);
self.data[self.gap_start..self.gap_start + len].copy_from_slice(s.as_bytes());
self.gap_start += len;
self.n_chars += s.chars().count();
for (i, (offset, ch)) in s.char_indices().enumerate() {
if ch == '\n' {
self.line_endings.insert(idx + offset, char_idx + i);
}
}
self.update_line_endings(|(&bidx, &cidx)| {
if bidx >= idx + len {
(bidx, cidx + len_chars)
} else {
(bidx, cidx)
}
});
#[cfg(test)]
assert_line_endings!(self);
}
pub fn remove_char(&mut self, char_idx: usize) {
let idx = self.char_to_byte(char_idx);
let len = self.char_len(self.char_to_raw_byte(char_idx));
if idx != self.gap_start {
self.move_gap_to(idx);
}
self.gap_end += len;
self.n_chars -= 1;
if self.data[self.gap_end - 1] == b'\n' {
self.line_endings.remove(&(self.gap_end - 1));
}
for (_, count) in self.line_endings.iter_mut() {
if *count >= char_idx {
*count -= 1;
}
}
#[cfg(test)]
assert_line_endings!(self);
}
pub fn remove_range(&mut self, char_from: usize, char_to: usize) {
if char_from == char_to {
return;
}
assert!(
char_from < char_to,
"invalid range: from={char_from} > to={char_to}"
);
let raw_from = self.char_to_raw_byte(char_from);
let from = self.raw_byte_to_byte(raw_from);
let to = self.offset_char_to_byte(char_to, raw_from, char_from);
debug_assert!(from < to, "invalid byte range: from={from} > to={to}");
self.move_gap_to(from);
let n_bytes = to - from;
let n_chars = char_to - char_from;
self.gap_end += n_bytes;
self.n_chars -= n_chars;
self.line_endings
.retain(|idx, _| !((self.gap_end - n_bytes)..(self.gap_end)).contains(idx));
for (_, count) in self.line_endings.iter_mut() {
if *count >= char_to {
*count -= n_chars;
} else if *count > char_from {
*count = char_from;
}
}
#[cfg(test)]
assert_line_endings!(self);
}
fn update_line_endings<F>(&mut self, f: F)
where
F: Fn((&usize, &usize)) -> (usize, usize),
{
self.line_endings = self.line_endings.iter().map(f).collect();
}
fn grow_gap(&mut self, n: usize) {
if n >= self.next_gap {
self.next_gap = clamp_gap_size(self.len() + n, n.next_power_of_two());
}
let gap_increase = self.next_gap + n - self.gap();
let cap = self.cap + self.next_gap + n;
let mut buf = Vec::with_capacity(cap);
buf.extend_from_slice(&self.data[..self.gap_start]); buf.resize(buf.len() + self.next_gap + n, 0); buf.extend_from_slice(&self.data[self.gap_end..]);
let start = self.gap_start;
self.update_line_endings(|(&bidx, &cidx)| {
if bidx > start {
(bidx + gap_increase, cidx)
} else {
(bidx, cidx)
}
});
self.next_gap = clamp_gap_size(self.len(), self.next_gap * 2);
self.data = buf.into_boxed_slice();
self.gap_end += gap_increase;
self.cap = cap;
#[cfg(test)]
assert_line_endings!(self);
}
fn move_gap_to(&mut self, byte_idx: usize) {
assert!(
byte_idx <= self.len(),
"index out of bounds: {byte_idx} > {}",
self.len()
);
let gap = self.gap();
let (src, dest) = match byte_idx.cmp(&self.gap_start) {
Ordering::Equal => return,
Ordering::Less => {
let start = self.gap_start;
self.update_line_endings(|(&bidx, &cidx)| {
if bidx >= byte_idx && bidx <= start {
(bidx + gap, cidx)
} else {
(bidx, cidx)
}
});
(byte_idx..self.gap_start, byte_idx + gap)
}
Ordering::Greater => {
let end = self.gap_end;
self.update_line_endings(|(&bidx, &cidx)| {
if bidx >= end && bidx < byte_idx + gap {
(bidx - gap, cidx)
} else {
(bidx, cidx)
}
});
(self.gap_end..byte_idx + gap, self.gap_start)
}
};
self.data.copy_within(src, dest);
self.gap_end = byte_idx + gap;
self.gap_start = byte_idx;
#[cfg(test)]
assert_line_endings!(self);
}
#[inline]
pub fn char_to_byte(&self, char_idx: usize) -> usize {
self.offset_char_to_byte(char_idx, 0, 0)
}
#[inline]
fn raw_byte_to_byte(&self, raw: usize) -> usize {
if raw > self.gap_start {
raw - self.gap()
} else {
raw
}
}
#[inline]
pub fn byte_to_raw_byte(&self, byte: usize) -> usize {
if byte > self.gap_start {
byte + self.gap()
} else {
byte
}
}
#[inline]
fn offset_char_to_byte(
&self,
char_idx: usize,
byte_offset: usize,
char_offset: usize,
) -> usize {
let raw = self.offset_char_to_raw_byte(char_idx, byte_offset, char_offset);
self.raw_byte_to_byte(raw)
}
#[inline]
fn char_to_raw_byte(&self, char_idx: usize) -> usize {
self.offset_char_to_raw_byte(char_idx, 0, 0)
}
fn offset_char_to_raw_byte(
&self,
char_idx: usize,
mut byte_offset: usize,
mut char_offset: usize,
) -> usize {
let mut to = usize::MAX;
for (&b, &c) in self
.line_endings
.iter()
.skip_while(move |(b, _)| **b < byte_offset)
{
match c.cmp(&char_idx) {
Ordering::Less => (byte_offset, char_offset) = (b, c),
Ordering::Equal => return b,
Ordering::Greater => {
to = b;
break;
}
}
}
let slice = Slice::from_raw_offsets(byte_offset, to, self);
let mut chars = slice.chars();
let mut cur = byte_offset;
for _ in 0..(char_idx - char_offset) {
chars.next();
cur = chars.cur + byte_offset;
}
if cur > self.gap_start && cur < self.gap_end {
cur += self.gap()
}
cur
}
}
#[derive(Default, Debug, Copy, Clone, PartialEq, Eq)]
pub struct Slice<'a> {
from: usize,
left: &'a [u8],
right: &'a [u8],
}
impl<'a> Slice<'a> {
#[inline]
fn from_raw_offsets(from: usize, to: usize, gb: &'a GapBuffer) -> Slice<'a> {
let to = min(to, gb.data.len());
if to <= gb.gap_start || from >= gb.gap_end {
return Slice {
from,
left: &gb.data[from..to],
right: &[],
};
}
debug_assert!(from <= gb.gap_start, "line offset sits in gap");
Slice {
from,
left: &gb.data[from..gb.gap_start],
right: &gb.data[gb.gap_end..to],
}
}
pub fn from_byte(&self) -> usize {
self.from
}
pub fn len_utf8(&self) -> usize {
self.chars().count()
}
pub fn as_strs(&self) -> (&str, &str) {
unsafe {
(
std::str::from_utf8_unchecked(self.left),
std::str::from_utf8_unchecked(self.right),
)
}
}
pub fn slice_iter(self) -> SliceIter<'a> {
SliceIter {
inner: self,
pos: Some(false),
}
}
pub fn chars(self) -> Chars<'a> {
Chars { s: self, cur: 0 }
}
pub fn indexed_chars(self, from: usize, rev: bool) -> IdxChars<'a> {
let (cur, idx) = if rev {
(
self.left.len() + self.right.len(),
from + count_chars(self.left) + count_chars(self.right),
)
} else {
(0, from)
};
IdxChars {
s: self,
cur,
idx,
rev,
}
}
fn cur_and_data(&self, cur: usize) -> (usize, &[u8]) {
if cur < self.left.len() {
(cur, self.left)
} else {
(cur - self.left.len(), self.right)
}
}
}
impl fmt::Display for Slice<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut v = Vec::with_capacity(self.left.len() + self.right.len());
v.extend_from_slice(self.left);
v.extend_from_slice(self.right);
match String::from_utf8(v) {
Ok(s) => write!(f, "{s}"),
Err(_) => Err(fmt::Error),
}
}
}
impl<'b> PartialEq<&'b str> for Slice<'_> {
fn eq(&self, other: &&'b str) -> bool {
let b = other.as_bytes();
&b[..self.left.len()] == self.left && &b[self.left.len()..] == self.right
}
}
impl PartialEq<String> for Slice<'_> {
fn eq(&self, other: &String) -> bool {
*self == other.as_str()
}
}
#[derive(Debug)]
pub struct SliceIter<'a> {
inner: Slice<'a>,
pos: Option<bool>,
}
impl<'a> Iterator for SliceIter<'a> {
type Item = &'a [u8];
fn next(&mut self) -> Option<Self::Item> {
match self.pos? {
false => {
self.pos = Some(true);
Some(self.inner.left)
}
true => {
self.pos = None;
Some(self.inner.right)
}
}
}
}
#[derive(Default, Debug, Copy, Clone, PartialEq, Eq)]
pub struct Chars<'a> {
s: Slice<'a>,
cur: usize,
}
impl Iterator for Chars<'_> {
type Item = char;
fn next(&mut self) -> Option<Self::Item> {
if self.cur >= self.s.left.len() + self.s.right.len() {
return None;
}
let (cur, data) = self.s.cur_and_data(self.cur);
let ch = unsafe { decode_char_at(cur, data) };
let len = ch.len_utf8();
self.cur += len;
Some(ch)
}
}
#[derive(Default, Debug, Copy, Clone, PartialEq, Eq)]
pub struct IdxChars<'a> {
s: Slice<'a>,
cur: usize,
idx: usize,
rev: bool,
}
impl Iterator for IdxChars<'_> {
type Item = (usize, char);
fn next(&mut self) -> Option<Self::Item> {
if (!self.rev && self.cur >= self.s.left.len() + self.s.right.len())
|| (self.rev && self.cur == 0)
{
return None;
}
if self.rev {
let (cur, data) = self.s.cur_and_data(self.cur - 1);
let ch = unsafe { decode_char_ending_at(cur, data) };
let len = ch.len_utf8();
self.idx -= 1;
self.cur -= len;
Some((self.idx, ch))
} else {
let (cur, data) = self.s.cur_and_data(self.cur);
let ch = unsafe { decode_char_at(cur, data) };
let len = ch.len_utf8();
let res = Some((self.idx, ch));
self.cur += len;
self.idx += 1;
res
}
}
}
const CONT_MASK: u8 = 0b0011_1111;
#[inline]
const fn utf8_first_byte(byte: u8, width: u32) -> u32 {
(byte & (0x7F >> width)) as u32
}
#[inline]
const fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 {
(ch << 6) | (byte & CONT_MASK) as u32
}
#[inline]
const fn utf8_is_cont_byte(byte: u8) -> bool {
(byte as i8) < -64
}
#[inline]
unsafe fn decode_char_at(start: usize, bytes: &[u8]) -> char {
let x = bytes[start];
if x < 128 {
return char::from_u32_unchecked(x as u32);
}
let init = utf8_first_byte(x, 2);
let y = bytes[start + 1];
let mut ch = utf8_acc_cont_byte(init, y);
if x >= 0xE0 {
let z = bytes[start + 2];
let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z);
ch = init << 12 | y_z;
if x >= 0xF0 {
let w = bytes[start + 3];
ch = (init & 7) << 18 | utf8_acc_cont_byte(y_z, w);
}
}
char::from_u32_unchecked(ch)
}
#[inline]
unsafe fn decode_char_ending_at(end: usize, bytes: &[u8]) -> char {
let w = match bytes[end] {
b if b < 128 => return char::from_u32_unchecked(b as u32),
b => b,
};
let mut ch;
let z = bytes[end - 1];
ch = utf8_first_byte(z, 2);
if utf8_is_cont_byte(z) {
let y = bytes[end - 2];
ch = utf8_first_byte(y, 3);
if utf8_is_cont_byte(y) {
let x = bytes[end - 3];
ch = utf8_first_byte(x, 4);
ch = utf8_acc_cont_byte(ch, y);
}
ch = utf8_acc_cont_byte(ch, z);
}
ch = utf8_acc_cont_byte(ch, w);
char::from_u32_unchecked(ch)
}
#[cfg(test)]
mod tests {
use super::*;
use simple_test_case::test_case;
fn debug_buffer_content(gb: &GapBuffer) -> String {
let mut v = gb.data.to_vec();
v[gb.gap_start..gb.gap_end].copy_from_slice("_".repeat(gb.gap()).as_bytes());
String::from_utf8(v).expect("valid utf8")
}
fn raw_debug_buffer_content(gb: &GapBuffer) -> String {
let mut v = gb.data.to_vec();
for b in v[gb.gap_start..gb.gap_end].iter_mut() {
if *b == b'\0' {
*b = b'_';
}
}
v.insert(gb.gap_end, b']');
v.insert(gb.gap_start, b'[');
String::from_utf8(v).expect("valid utf8")
}
#[test]
fn from_string_matches_from_str() {
let s = "this is a test";
let gb1 = GapBuffer::from(s.to_string());
let gb2 = GapBuffer::from(s);
assert_eq!(gb1, gb2);
}
#[test]
fn to_string_works() {
let s = "this is a test";
let gb = GapBuffer::from(s.to_string());
assert_eq!(gb.to_string(), s);
}
#[test]
fn insert_into_empty_string_initial_gb_works() {
let mut gb = GapBuffer::from(String::new());
gb.insert_char(0, 'a');
assert_eq!(gb.to_string(), "a");
}
#[test_case("fooβfooβfoo"; "interleaved multibyte and ascii")]
#[test_case("hello, δΈη!"; "blocks of multibyte and ascii")]
#[test_case("hello, world!"; "just ascii")]
#[test]
fn len_chars_works(s: &str) {
let mut gb = GapBuffer::from(s);
let len_s = s.chars().count();
println!("initial: {:?}", raw_debug_buffer_content(&gb));
assert_eq!(gb.len_chars(), len_s);
assert_eq!(
gb.len_chars(),
gb.to_string().chars().count(),
"char iter len != len_chars"
);
assert_eq!(
gb.line(0).chars().count(),
gb.line_len_chars(0),
"line_len_chars != len_chars"
);
gb.insert_char(5, 'X');
println!("after insert X: {:?}", raw_debug_buffer_content(&gb));
assert_eq!(gb.len_chars(), len_s + 1);
assert_eq!(
gb.len_chars(),
gb.to_string().chars().count(),
"char iter len != len_chars"
);
gb.insert_char(3, 'η');
println!("after insert η: {:?}", raw_debug_buffer_content(&gb));
assert_eq!(gb.len_chars(), len_s + 2);
assert_eq!(
gb.len_chars(),
gb.to_string().chars().count(),
"char iter len != len_chars"
);
assert_eq!(gb.char(3), 'η');
gb.remove_char(3);
println!("after remove η: {:?}", raw_debug_buffer_content(&gb));
assert_eq!(gb.len_chars(), len_s + 1);
assert_eq!(
gb.len_chars(),
gb.to_string().chars().count(),
"char iter len != len_chars"
);
assert_eq!(gb.char(5), 'X');
gb.remove_char(5);
println!("after remove X: {:?}", debug_buffer_content(&gb));
assert_eq!(gb.len_chars(), len_s);
assert_eq!(
gb.len_chars(),
gb.to_string().chars().count(),
"char iter len != len_chars"
);
assert_eq!(gb.to_string(), s);
}
#[test_case("fooβfooβfoo"; "interleaved multibyte and ascii")]
#[test_case("hello, δΈη!"; "blocks of multibyte and ascii")]
#[test_case("hello, world!"; "just ascii")]
#[test]
fn move_gap_to_maintains_content(s: &str) {
let mut gb = GapBuffer::from(s);
for i in 0..gb.len_chars() {
let idx = gb.char_to_byte(i);
gb.move_gap_to(idx);
let (s1, s2) = (
std::str::from_utf8(&gb.data[..gb.gap_start]).unwrap(),
std::str::from_utf8(&gb.data[gb.gap_end..]).unwrap(),
);
assert_eq!(format!("{s1}{s2}"), s, "idx={idx}");
}
}
#[test]
fn move_gap_to_maintains_line_content() {
let s = "hello, world!\nhow are you?\nthis is a test";
let mut gb = GapBuffer::from(s);
assert_eq!(gb.len_lines(), 3);
for i in 0..gb.len_chars() {
let idx = gb.char_to_byte(i);
gb.move_gap_to(idx);
assert_eq!(gb.len_lines(), 3);
assert_eq!(gb.line(0).to_string(), "hello, world!\n", "idx={idx}");
assert_eq!(gb.line(1).to_string(), "how are you?\n", "idx={idx}");
assert_eq!(gb.line(2).to_string(), "this is a test", "idx={idx}");
}
}
#[test_case(0, 0, 0; "BOF cur at BOF")]
#[test_case(27, 0, 0; "BOF cur at EOF")]
#[test_case(27, 5, 5; "in the buffer cur at EOF")]
#[test_case(5, 5, 5; "in the buffer cur at gap")]
#[test_case(5, 3, 3; "in the buffer cur before gap")]
#[test_case(5, 11, 15; "in the buffer cur after gap")]
#[test_case(5, 7, 7; "multi byte 1")]
#[test_case(5, 8, 10; "multi byte 2")]
#[test]
fn char_to_byte_works(cur: usize, char_idx: usize, expected: usize) {
let s = "hello, δΈη!\nhow are you?";
let mut gb = GapBuffer::from(s);
assert_eq!(s.len(), 27, "EOF case is not 0..s.len()");
assert_eq!("δΈ".len(), 3);
gb.move_gap_to(cur);
let byte_idx = gb.char_to_byte(char_idx);
assert_eq!(byte_idx, expected, "{:?}", debug_buffer_content(&gb));
}
#[test_case(0, 0, 0; "BOF cur at BOF")]
#[test_case(27, 0, 0; "BOF cur at EOF")]
#[test_case(27, 5, 5; "in the buffer cur at EOF")]
#[test_case(5, 5, 5; "in the buffer cur at gap")]
#[test_case(5, 3, 3; "in the buffer cur before gap")]
#[test_case(5, 11, 79; "in the buffer cur after gap")]
#[test_case(5, 7, 71; "multi byte 1")]
#[test_case(5, 8, 74; "multi byte 2")]
#[test]
fn char_to_raw_byte_works(cur: usize, char_idx: usize, expected: usize) {
let s = "hello, δΈη!\nhow are you?";
let mut gb = GapBuffer::from(s);
assert_eq!(s.len(), 27, "EOF case is not 0..s.len()");
assert_eq!("δΈ".len(), 3);
gb.move_gap_to(cur);
let char_idx = gb.char_to_raw_byte(char_idx);
assert_eq!(char_idx, expected, "{:?}", debug_buffer_content(&gb));
}
#[test_case(0, 0, "hello, world!\n"; "first line cur at BOF")]
#[test_case(0, 1, "how are you?"; "second line cur at BOF")]
#[test_case(26, 0, "hello, world!\n"; "first line cur at EOF")]
#[test_case(26, 1, "how are you?"; "second line cur at EOF")]
#[test_case(10, 0, "hello, world!\n"; "first line cur in line")]
#[test_case(10, 1, "how are you?"; "second line cur in line")]
#[test]
fn slice_to_string_works(cur: usize, line: usize, expected: &str) {
let mut gb = GapBuffer::from("hello, world!\nhow are you?");
gb.move_gap_to(cur);
assert_eq!(gb.line(line).to_string(), expected);
}
#[test]
fn line_to_char_works() {
let l1 = "hello, world!\n";
let l2 = "how are you?\n";
let l3 = "this is a test";
let gb = GapBuffer::from(format!("{l1}{l2}{l3}"));
assert_eq!(gb.line_to_char(0), 0);
assert_eq!(gb.line_to_char(1), l1.chars().count());
assert_eq!(gb.line_to_char(2), l1.chars().count() + l2.chars().count());
}
#[test_case(0, 0; "start of first line")]
#[test_case(5, 0; "in first line")]
#[test_case(13, 0; "end of first line")]
#[test_case(14, 1; "start of second line")]
#[test_case(20, 1; "in second line")]
#[test_case(26, 1; "end of second line")]
#[test_case(27, 2; "start of third line")]
#[test_case(30, 2; "in third line")]
#[test_case(40, 2; "end of third line")]
#[test]
fn char_to_line_works(char_idx: usize, line_idx: usize) {
let l1 = "hello, world!\n";
let l2 = "how are you?\n";
let l3 = "this is a test";
let gb = GapBuffer::from(format!("{l1}{l2}{l3}"));
assert_eq!(gb.char_to_line(char_idx), line_idx);
}
#[test_case(&[(0, 'h')], "hello world"; "insert front")]
#[test_case(&[(4, ',')], "ello, world"; "insert inner")]
#[test_case(&[(10, '!')], "ello world!"; "insert back")]
#[test_case(&[(4, ','), (11, '!')], "ello, world!"; "insert inner then back")]
#[test_case(&[(4, ','), (0, 'h')], "hello, world"; "insert inner then front")]
#[test_case(&[(0, 'h'), (5, ','),], "hello, world"; "insert front then inner")]
#[test_case(&[(10, '!'), (0, 'h'), (5, ',')], "hello, world!"; "insert all")]
#[test]
fn insert_char(inserts: &[(usize, char)], expected: &str) {
let mut gb = GapBuffer::from("ello world");
for &(idx, ch) in inserts {
gb.insert_char(idx, ch);
}
assert_eq!(gb.to_string(), expected, "{:?}", debug_buffer_content(&gb))
}
#[test]
fn insert_char_with_moving_cur() {
let mut gb = GapBuffer::from("hello ");
gb.insert_char(6, 'w');
gb.insert_char(7, 'o');
gb.insert_char(8, 'r');
gb.insert_char(9, 'l');
gb.insert_char(10, 'd');
gb.insert_char(11, '!');
gb.insert_char(5, ',');
assert_eq!(
gb.to_string(),
"hello, world!",
"{:?}",
debug_buffer_content(&gb)
)
}
#[test]
fn insert_newline_char_is_tracked_correctly() {
let s = "hello, world!\nhow are you?";
let mut gb = GapBuffer::from(s);
assert_eq!(gb.len_lines(), 2);
println!("initial: {:?}", raw_debug_buffer_content(&gb));
gb.insert_char(6, '\n');
println!("insert: {:?}", raw_debug_buffer_content(&gb));
assert_eq!(gb.len_lines(), 3);
assert_eq!(gb.line(0).to_string(), "hello,\n");
assert_eq!(gb.line(1).to_string(), " world!\n");
assert_eq!(gb.line(2).to_string(), "how are you?");
for idx in 0..=gb.len_chars() {
gb.move_gap_to(idx);
assert_eq!(gb.len_lines(), 3);
assert_eq!(gb.line(0).to_string(), "hello,\n", "idx={idx}");
assert_eq!(gb.line(1).to_string(), " world!\n", "idx={idx}");
assert_eq!(gb.line(2).to_string(), "how are you?", "idx={idx}");
}
}
#[test_case(&[(0, "hell")], "helloworl"; "insert front")]
#[test_case(&[(1, ", ")], "o, worl"; "insert inner")]
#[test_case(&[(5, "d!")], "oworld!"; "insert back")]
#[test_case(&[(5, "d!"), (0, "hell"), (5, ", ")], "hello, world!"; "insert all")]
#[test_case(&[(5, "d!"), (0, "hell"), (5, ",\n")], "hello,\nworld!"; "insert all w newline")]
#[test]
fn insert_str(inserts: &[(usize, &str)], expected: &str) {
let mut gb = GapBuffer::from("oworl");
for &(idx, s) in inserts {
gb.insert_str(idx, s);
}
assert_eq!(gb.to_string(), expected, "{:?}", debug_buffer_content(&gb))
}
#[test]
fn insert_newline_in_str_is_tracked_correctly() {
let s = "hello, world!\nhow are you?";
let mut gb = GapBuffer::from(s);
assert_eq!(gb.len_lines(), 2);
let s2 = " sailor\nisn't this fun?\nwhat a wonderful\n";
gb.insert_str(6, s2);
for idx in 0..=gb.len_chars() {
gb.move_gap_to(idx);
assert_eq!(gb.len_lines(), 5);
assert_eq!(gb.line(0).to_string(), "hello, sailor\n", "idx={idx}");
assert_eq!(gb.line(1).to_string(), "isn't this fun?\n", "idx={idx}");
assert_eq!(gb.line(2).to_string(), "what a wonderful\n", "idx={idx}");
assert_eq!(gb.line(3).to_string(), " world!\n", "idx={idx}");
assert_eq!(gb.line(4).to_string(), "how are you?", "idx={idx}");
}
}
#[test_case(6, "hello,world!"; "at gap start")]
#[test_case(7, "hello, orld!"; "at gap end")]
#[test_case(12, "hello, world"; "after gap")]
#[test_case(0, "ello, world!"; "before gap")]
#[test]
fn remove_char(idx: usize, expected: &str) {
let mut gb = GapBuffer::from("hello, world!");
gb.move_gap_to(6); gb.remove_char(idx);
assert_eq!(gb.to_string(), expected, "{:?}", debug_buffer_content(&gb))
}
#[test]
fn remove_newline_char_is_tracked_correctly() {
let s = "hello, world!\nhow are you?";
let mut gb = GapBuffer::from(s);
assert_eq!(gb.len_lines(), 2);
gb.remove_char(13);
assert_eq!(gb.len_lines(), 1);
assert_eq!(gb.line(0).to_string(), "hello, world!how are you?");
}
#[test_case(6, 9, "hello,rld!"; "at gap start")]
#[test_case(7, 10, "hello, ld!"; "at gap end")]
#[test_case(10, 13, "hello, wor"; "after gap")]
#[test_case(0, 5, ", world!"; "before gap")]
#[test_case(0, 13, ""; "remove all")]
#[test]
fn remove_range_works(from: usize, to: usize, expected: &str) {
let s = "hello, world!";
assert_eq!(s.len(), 13, "remove all case is not 0..s.len()");
let mut gb = GapBuffer::from(s);
gb.move_gap_to(6); gb.remove_range(from, to);
assert_eq!(gb.to_string(), expected, "{:?}", debug_buffer_content(&gb))
}
#[test]
fn remove_range_w_multibyte_chars_works() {
let s = "fooβfooβfoo";
let mut gb = GapBuffer::from(s);
gb.remove_range(0, 3);
assert_eq!(gb.to_string(), "βfooβfoo");
assert_eq!(gb.len_chars(), 8);
gb.remove_range(1, 4);
assert_eq!(gb.to_string(), "ββfoo");
assert_eq!(gb.len_chars(), 5);
gb.remove_range(2, 5);
assert_eq!(gb.to_string(), "ββ");
assert_eq!(gb.len_chars(), 2);
}
#[test]
fn remove_range_for_last_line_works() {
let s = "hello, world!\nthis is the last line";
let mut gb = GapBuffer::from(s);
gb.remove_range(14, s.len());
assert_eq!(gb.to_string(), "hello, world!\n");
assert_eq!(gb.len_lines(), 2);
}
#[test_case(10, 15, "hello, worow are you?"; "spanning newline")]
#[test_case(7, 14, "hello, how are you?"; "ending on newline")]
#[test_case(13, 26, "hello, world!"; "starting on newline")]
#[test]
fn remove_newline_in_str_is_tracked_correctly(from: usize, to: usize, expected: &str) {
let s = "hello, world!\nhow are you?";
let mut gb = GapBuffer::from(s);
assert_eq!(gb.len_lines(), 2);
gb.remove_range(from, to);
assert_eq!(gb.len_lines(), 1);
assert_eq!(gb.to_string(), expected);
assert_eq!(gb.line(0).to_string(), expected);
}
#[test_case('X'; "ascii")]
#[test_case('η'; "multi-byte")]
#[test]
fn insert_remove_char_is_idempotent(ch: char) {
let s = "hello, world!";
let mut gb = GapBuffer::from(s);
gb.insert_char(6, ch);
gb.remove_char(6);
assert_eq!(gb.to_string(), s, "{:?}", debug_buffer_content(&gb))
}
#[test_case("TEST", 1; "without trailing newline")]
#[test_case("TEST\n", 2; "with trailing newline")]
#[test_case("TEST\nTEST", 2; "with internal newline")]
#[test]
fn insert_remove_str_is_idempotent(edit: &str, expected_lines: usize) {
let s = "hello, world!";
let mut gb = GapBuffer::from(s);
println!("initial: {:?}", raw_debug_buffer_content(&gb));
for n in 0..gb.len_lines() {
println!("{:?}", gb.line(n).to_string());
}
gb.insert_str(6, edit);
assert_eq!(gb.len_lines(), expected_lines);
println!("insert: {:?}", raw_debug_buffer_content(&gb));
for n in 0..gb.len_lines() {
println!("{:?}", gb.line(n).to_string());
}
gb.remove_range(6, 6 + edit.len());
println!("remove: {:?}", raw_debug_buffer_content(&gb));
for n in 0..gb.len_lines() {
println!("{:?}", gb.line(n).to_string());
}
assert_eq!(gb.to_string(), s);
}
#[test]
fn chars_work() {
let s1 = "hello, world!\n";
let s2 = "how are you?";
let gb = GapBuffer::from(format!("{s1}{s2}"));
let l1_chars: String = gb.line(0).chars().collect();
assert_eq!(l1_chars, s1);
let l2_chars: String = gb.line(1).chars().collect();
assert_eq!(l2_chars, s2);
}
#[test_case(
"hello, δΈη!", false,
&[(0, 'h'), (1, 'e'), (2, 'l'), (3, 'l'), (4, 'o'),
(5, ','), (6, ' '), (7, 'δΈ'), (8, 'η'), (9, '!')];
"multi-byte block forward"
)]
#[test_case(
"hello, δΈη!", true,
&[(9, '!'), (8, 'η'), (7, 'δΈ'), (6, ' '), (5, ','),
(4, 'o'), (3, 'l'), (2, 'l'), (1, 'e'), (0, 'h')];
"multi-byte block reversed"
)]
#[test_case(
"fooβfooβfoo", false,
&[(0, 'f'), (1, 'o'), (2, 'o'), (3, 'β'), (4, 'f'), (5, 'o'),
(6, 'o'), (7, 'β'), (8, 'f'), (9, 'o'), (10, 'o')];
"interleaved forward"
)]
#[test_case(
"fooβfooβfoo", true,
&[(10, 'o'), (9, 'o'), (8, 'f'), (7, 'β'), (6, 'o'), (5, 'o'),
(4, 'f'), (3, 'β'), (2, 'o'), (1, 'o'), (0, 'f')];
"interleaved reversed"
)]
#[test]
fn indexed_chars_works(s: &str, rev: bool, expected: &[(usize, char)]) {
let mut gb = GapBuffer::from(s);
let v: Vec<(usize, char)> = gb.line(0).indexed_chars(0, rev).collect();
assert_eq!(&v, expected);
for i in 0..gb.len_chars() {
let idx = gb.char_to_byte(i);
gb.move_gap_to(idx);
let v: Vec<(usize, char)> = gb.line(0).indexed_chars(0, rev).collect();
assert_eq!(&v, expected, "idx={idx}");
}
}
#[test_case("fooβfooβfoo"; "interleaved multibyte and ascii")]
#[test_case("hello, δΈη!"; "blocks of multibyte and ascii")]
#[test]
fn chars_works(s: &str) {
let mut gb = GapBuffer::from(s);
let chars: String = gb.line(0).chars().collect();
assert_eq!(chars, s);
for i in 0..gb.len_chars() {
let idx = gb.char_to_byte(i);
gb.move_gap_to(idx);
let chars: String = gb.line(0).chars().collect();
assert_eq!(chars, s, "idx={idx}");
}
}
#[test]
fn slice_works() {
let mut gb = GapBuffer::from("hello, world!\nhow are you?");
let slice = Slice::from_raw_offsets(0, gb.cap, &gb);
let (s1, s2) = slice.as_strs();
assert_eq!(s1, "");
assert_eq!(s2, "hello, world!\nhow are you?");
let slice = gb.slice(6, 17);
let (s1, s2) = slice.as_strs();
assert_eq!(s1, " world!\nhow");
assert_eq!(s2, "");
gb.move_gap_to(12);
println!("after move: {:?}", raw_debug_buffer_content(&gb));
let slice = gb.slice(6, 17);
let (s1, s2) = slice.as_strs();
assert_eq!(s1, " world");
assert_eq!(s2, "!\nhow");
}
#[test]
fn slice_eq_str_works() {
let mut gb = GapBuffer::from("hello, world!\nhow are you?");
gb.move_gap_to(3);
let slice = gb.slice(0, 5);
assert_eq!(slice, "hello");
}
#[test]
fn chars_in_raw_range_works() {
let mut gb = GapBuffer::from("hello, world!\nhow are you?");
let char_from = 7;
let char_to = 12;
for i in 0..gb.len_chars() {
let idx = gb.char_to_byte(i);
gb.move_gap_to(idx);
let byte_from = gb.char_to_raw_byte(char_from);
let byte_to = gb.char_to_raw_byte(char_to);
let n_chars = gb.chars_in_raw_range(byte_from, byte_to);
assert_eq!(n_chars, char_to - char_from, "gap at {i}");
let n_chars = gb.chars_in_raw_range(0, gb.char_to_raw_byte(gb.n_chars));
assert_eq!(n_chars, gb.n_chars, "gap at {i}");
}
}
fn _insert_chars(gb: &mut GapBuffer, s: &str) {
for (idx, ch) in s.chars().enumerate() {
gb.insert_char(idx + 4, ch);
}
}
fn _insert_str(gb: &mut GapBuffer, s: &str) {
gb.insert_str(4, s);
}
#[test_case(_insert_chars; "individual chars")]
#[test_case(_insert_str; "whole string")]
#[test]
fn insert_with_multibyte_chars_preserves_line_endings(insert: fn(&mut GapBuffer, &str)) {
let slice_str = |gb: &GapBuffer| gb.slice(0, gb.len_chars()).to_string();
let mut gb = GapBuffer::from("foo\nbar\nbaz\n");
let s = "δΈ\nη π¦\n ";
insert(&mut gb, s);
assert_eq!(slice_str(&gb), "foo\nδΈ\nη π¦\n bar\nbaz\n");
assert_eq!(gb.char(8), 'π¦');
gb.remove_char(8);
assert_eq!(slice_str(&gb), "foo\nδΈ\nη \n bar\nbaz\n");
}
#[test]
fn char_works_with_multibyte_characters() {
let s = "δΈ\nη π¦\n ";
let gb = GapBuffer::from(s);
for (idx, ch) in s.chars().enumerate() {
assert_eq!(gb.char(idx), ch);
}
}
}