use std::{
borrow::Cow,
cell::UnsafeCell,
cmp::{Ordering, max, min},
collections::HashMap,
fmt,
};
const MIN_GAP: usize = 32;
const MIN_GAP_GROW: usize = 64;
const MAX_GAP_GROW: usize = 1024 * 8;
#[inline]
fn clamp_gap_size(len: usize, min_gap: usize) -> usize {
min(max(len / 20, min_gap), MAX_GAP_GROW)
}
#[inline]
fn count_chars(bytes: &[u8]) -> usize {
if bytes.is_empty() {
return 0;
}
let mut n_chars = 0;
let mut cur = 0;
while cur < bytes.len() {
let ch = unsafe { decode_char_at(cur, bytes) };
cur += ch.len_utf8();
n_chars += 1;
}
n_chars
}
type ByteOffset = usize;
type CharOffset = usize;
#[derive(Debug)]
struct CharToByteCache(UnsafeCell<HashMap<CharOffset, ByteOffset>>);
impl CharToByteCache {
fn clear(&self) {
unsafe { (&mut *self.0.get()).clear() }
}
fn get(&self, char_idx: CharOffset) -> Option<ByteOffset> {
unsafe { (&*self.0.get()).get(&char_idx).copied() }
}
fn insert(&self, char_idx: CharOffset, byte_idx: ByteOffset) {
unsafe {
(&mut *self.0.get()).insert(char_idx, byte_idx);
}
}
}
impl Clone for CharToByteCache {
fn clone(&self) -> Self {
unsafe { Self(UnsafeCell::new((&*self.0.get()).clone())) }
}
}
impl PartialEq for CharToByteCache {
fn eq(&self, _: &Self) -> bool {
true }
}
impl Eq for CharToByteCache {}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct GapBuffer {
data: Box<[u8]>,
cap: usize,
gap_start: usize,
gap_end: usize,
next_gap: usize,
n_chars: usize,
line_endings: Vec<(ByteOffset, CharOffset)>,
char_to_byte_cache: CharToByteCache,
}
impl Default for GapBuffer {
fn default() -> Self {
Self::new()
}
}
fn compute_line_endings(s: &str) -> (usize, Vec<(ByteOffset, CharOffset)>) {
let mut n_chars = 0;
let mut line_endings = Vec::new();
for (line_chars, (idx, ch)) in s.char_indices().enumerate() {
n_chars += 1;
if ch == '\n' {
line_endings.push((idx, line_chars));
}
}
(n_chars, line_endings)
}
impl From<String> for GapBuffer {
fn from(s: String) -> Self {
let gap_start = s.len();
let next_gap = clamp_gap_size(gap_start, MIN_GAP_GROW);
let cap = gap_start + next_gap;
let (n_chars, line_endings) = compute_line_endings(&s);
let mut v = s.into_bytes();
v.resize(cap, 0);
let mut gb = Self {
data: v.into_boxed_slice(),
cap,
gap_start,
gap_end: cap,
next_gap,
n_chars,
line_endings,
char_to_byte_cache: CharToByteCache(UnsafeCell::new(HashMap::new())),
};
gb.move_gap_to(0);
gb
}
}
impl From<&str> for GapBuffer {
fn from(s: &str) -> Self {
let gap_start = s.len();
let next_gap = clamp_gap_size(gap_start, MIN_GAP_GROW);
let cap = gap_start + next_gap;
let (n_chars, line_endings) = compute_line_endings(s);
let mut v = Vec::with_capacity(cap);
v.extend_from_slice(s.as_bytes());
v.resize(cap, 0);
let mut gb = Self {
data: v.into_boxed_slice(),
cap,
gap_start,
gap_end: cap,
next_gap,
n_chars,
line_endings,
char_to_byte_cache: CharToByteCache(UnsafeCell::new(HashMap::new())),
};
gb.move_gap_to(0);
gb
}
}
impl fmt::Display for GapBuffer {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let (s1, s2) = self.as_strs();
write!(f, "{s1}{s2}")
}
}
impl<'a> PartialEq<&'a str> for GapBuffer {
fn eq(&self, other: &&'a str) -> bool {
let b = other.as_bytes();
if b.len() != self.len() {
return false;
}
b[..self.gap_start] == self.data[..self.gap_start]
&& b[self.gap_start..] == self.data[self.gap_end..]
}
}
impl PartialEq<String> for GapBuffer {
fn eq(&self, other: &String) -> bool {
*self == other.as_str()
}
}
macro_rules! assert_line_endings {
($self:expr) => {{
let true_endings: Vec<usize> = $self
.data
.iter()
.enumerate()
.filter(|&(i, &b)| b == b'\n' && (i < $self.gap_start || i >= $self.gap_end))
.map(|(i, _)| i)
.collect();
let tracked_line_endings: Vec<usize> = $self.line_endings.iter().map(|(n, _)| *n).collect();
assert_eq!(
tracked_line_endings, true_endings,
"incorrect byte positions for line endings with gap_start={} gap_end={}",
$self.gap_start, $self.gap_end
);
let true_endings: Vec<usize> = $self
.to_string()
.chars()
.enumerate()
.filter(|&(_, c)| c == '\n')
.map(|(i, _)| i)
.collect();
let tracked_line_endings: Vec<usize> = $self.line_endings.iter().map(|(_, n)| *n).collect();
assert_eq!(
tracked_line_endings, true_endings,
"incorrect character positions for line endings with gap_start={} gap_end={}",
$self.gap_start, $self.gap_end
);
}};
}
impl GapBuffer {
pub fn new() -> Self {
Self::from("")
}
#[inline]
fn gap(&self) -> usize {
self.gap_end - self.gap_start
}
#[inline]
pub fn len(&self) -> usize {
self.cap - self.gap()
}
#[inline]
pub fn is_empty(&self) -> bool {
self.cap == self.gap()
}
pub fn make_contiguous(&mut self) -> &[u8] {
self.move_gap_to(0);
&self.data[self.gap_end..]
}
pub fn is_contiguous(&self) -> bool {
self.gap_start == 0 || self.gap_end == self.cap
}
pub fn as_str(&mut self) -> &str {
let raw = self.make_contiguous();
unsafe { std::str::from_utf8_unchecked(raw) }
}
pub unsafe fn substr_from(&self, byte_offset: usize) -> &str {
unsafe { std::str::from_utf8_unchecked(&self.data[self.gap_end + byte_offset..]) }
}
pub unsafe fn assume_contiguous_bytes(&self) -> &[u8] {
&self.data[self.gap_end..]
}
pub unsafe fn assume_contiguous_str(&self) -> &str {
unsafe { std::str::from_utf8_unchecked(&self.data[self.gap_end..]) }
}
pub fn as_strs(&self) -> (&str, &str) {
let (left, right) = self.as_byte_slices();
unsafe {
(
std::str::from_utf8_unchecked(left),
std::str::from_utf8_unchecked(right),
)
}
}
pub fn as_byte_slices(&self) -> (&[u8], &[u8]) {
(&self.data[0..self.gap_start], &self.data[self.gap_end..])
}
pub fn has_trailing_newline(&self) -> bool {
let (l, r) = self.as_byte_slices();
if r.is_empty() {
l.ends_with(b"\n")
} else {
r.ends_with(b"\n")
}
}
pub fn chars(&self) -> Chars<'_> {
self.as_slice().chars()
}
pub fn iter_lines(&self) -> impl Iterator<Item = Slice<'_>> {
let mut line_idx = 0;
std::iter::from_fn(move || {
if line_idx == self.len_lines() {
return None;
}
let slice = self.line(line_idx);
line_idx += 1;
Some(slice)
})
}
#[inline]
pub fn len_lines(&self) -> usize {
match self.line_endings.last() {
Some(&(raw_idx, _)) => {
let n = self.line_endings.len();
let byte_idx = if raw_idx > self.gap_start {
raw_idx - self.gap_end
} else {
raw_idx
};
if byte_idx < self.len() { n + 1 } else { n }
}
None => 1,
}
}
#[inline]
pub fn len_chars(&self) -> usize {
self.n_chars
}
#[inline]
pub fn line_end_byte(&self, line_idx: usize) -> usize {
if line_idx < self.line_endings.len() {
self.raw_byte_to_byte(self.line_endings[line_idx].0)
} else {
self.len()
}
}
pub fn lines_before_byte_offset(&self, byte_offset: usize) -> usize {
let raw_offset = self.byte_to_raw_byte(byte_offset);
self.line_endings.partition_point(|(b, _)| *b < raw_offset)
}
pub fn clear(&mut self) {
self.move_gap_to(0);
self.gap_end = self.cap;
self.line_endings.clear();
self.n_chars = 0;
#[cfg(test)]
assert_line_endings!(self);
}
#[inline]
pub fn char(&self, char_idx: usize) -> char {
let byte_idx = self.char_to_raw_byte(char_idx);
unsafe { decode_char_at(byte_idx, &self.data) }
}
#[inline]
pub fn get_char(&self, char_idx: usize) -> Option<char> {
if char_idx < self.n_chars {
Some(self.char(char_idx))
} else {
None
}
}
#[inline]
pub fn char_at(&self, byte_idx: usize) -> char {
let byte_idx = self.byte_to_raw_byte(byte_idx);
unsafe { decode_char_at(byte_idx, &self.data) }
}
#[inline]
pub fn get_char_at(&self, byte_idx: usize) -> Option<char> {
if byte_idx < self.len() {
Some(self.char_at(byte_idx))
} else {
None
}
}
#[inline]
fn char_len(&self, byte_idx: usize) -> usize {
unsafe { decode_char_at(byte_idx, &self.data) }.len_utf8()
}
#[inline]
pub fn line(&self, line_idx: usize) -> Slice<'_> {
if line_idx >= self.len_lines() {
assert_line_endings!(self);
panic!(
"line index was {line_idx} but buffer has {} lines",
self.len_lines()
)
}
let to = if line_idx < self.line_endings.len() {
let idx = self.line_endings[line_idx].0;
idx + self.char_len(idx)
} else {
self.cap
};
let from = if line_idx == 0 {
0
} else {
let idx = self.line_endings[line_idx - 1].0;
idx + 1
};
if from == to {
Slice::NULL
} else {
Slice::from_raw_offsets(from, to, self)
}
}
pub fn line_is_blank(&self, line_idx: usize) -> bool {
matches!(
self.line(line_idx).as_strs(),
("", "") | ("", "\n") | ("\n", "")
)
}
#[inline]
pub fn line_len_chars(&self, line_idx: usize) -> usize {
if line_idx >= self.len_lines() {
assert_line_endings!(self);
panic!(
"line index was {line_idx} but buffer has {} lines",
self.len_lines()
)
}
let chars_to = if self.line_endings.is_empty() {
return self.n_chars;
} else if line_idx < self.line_endings.len() {
self.line_endings[line_idx].1 + 1
} else {
self.n_chars
};
let chars_from = if line_idx == 0 {
0
} else {
self.line_endings[line_idx - 1].1 + 1
};
chars_to - chars_from
}
pub fn maximal_slice_from_offset(&self, byte_offset: usize) -> &[u8] {
if byte_offset > self.len() {
&[]
} else {
let i = self.byte_to_raw_byte(byte_offset);
match i.cmp(&self.gap_start) {
Ordering::Less => &self.data[i..self.gap_start],
Ordering::Equal => &self.data[self.gap_end..],
Ordering::Greater => &self.data[i..],
}
}
}
pub fn slice_from_byte_offsets(&self, byte_from: usize, byte_to: usize) -> Slice<'_> {
if byte_from == byte_to {
return Slice::NULL;
}
let from = self.byte_to_raw_byte(byte_from);
let to = self.byte_to_raw_byte(byte_to);
Slice::from_raw_offsets(from, to, self)
}
pub fn slice(&self, char_from: usize, char_to: usize) -> Slice<'_> {
if char_from == char_to {
return Slice::NULL;
}
let byte_from = self.char_to_raw_byte(char_from);
let byte_to = self.offset_char_to_raw_byte(char_to, byte_from, char_from);
Slice::from_raw_offsets(byte_from, byte_to, self)
}
pub fn as_slice(&self) -> Slice<'_> {
self.slice(0, self.len_chars())
}
pub fn chars_in_raw_range(&self, raw_from: usize, raw_to: usize) -> usize {
if raw_to <= self.gap_start || raw_from >= self.gap_end {
count_chars(&self.data[raw_from..raw_to])
} else {
count_chars(&self.data[raw_from..self.gap_start])
+ count_chars(&self.data[self.gap_end..raw_to])
}
}
pub fn char_to_line(&self, char_idx: usize) -> usize {
match self.try_char_to_line(char_idx) {
Some(line_idx) => line_idx,
None => {
assert_line_endings!(self);
panic!(
"char index was {char_idx} but the buffer char length is {}",
self.len_chars()
);
}
}
}
pub fn try_char_to_line(&self, char_idx: usize) -> Option<usize> {
match char_idx.cmp(&self.n_chars) {
Ordering::Less => {
if char_idx < self.n_chars {
Some(self.line_endings.partition_point(|(_, i)| *i < char_idx))
} else {
Some(self.len_lines() - 1)
}
}
Ordering::Equal => Some(self.len_lines() - 1),
Ordering::Greater => None,
}
}
pub fn line_to_char(&self, line_idx: usize) -> usize {
match self.try_line_to_char(line_idx) {
Some(char_idx) => char_idx,
None => {
assert_line_endings!(self);
panic!(
"line index was {line_idx} but the buffer has {} lines",
self.len_lines()
);
}
}
}
pub fn try_line_to_char(&self, line_idx: usize) -> Option<usize> {
if line_idx > self.len_lines() - 1 {
return None;
}
if line_idx == 0 {
Some(0)
} else {
let k = self.line_endings[line_idx - 1].1;
Some(k + 1)
}
}
pub fn line_to_byte(&self, line_idx: usize) -> usize {
let raw = if line_idx == 0 {
0
} else {
self.line_endings[line_idx - 1].0 + 1
};
self.raw_byte_to_byte(raw)
}
pub fn insert_char(&mut self, char_idx: usize, ch: char) {
let len = ch.len_utf8();
if self.gap().saturating_sub(len) < MIN_GAP {
self.grow_gap(len);
}
let idx = self.char_to_byte(char_idx);
self.move_gap_to(idx);
ch.encode_utf8(&mut self.data[self.gap_start..]);
self.gap_start += len;
self.n_chars += 1;
let endings: &[(ByteOffset, CharOffset)] =
if ch == '\n' { &[(idx, char_idx)] } else { &[] };
self.add_line_endings(idx, 1, endings);
self.char_to_byte_cache.clear();
#[cfg(test)]
assert_line_endings!(self);
}
pub fn insert_str(&mut self, char_idx: usize, s: &str) {
let len = s.len();
let len_chars = s.chars().count();
if self.gap().saturating_sub(len) < MIN_GAP {
self.grow_gap(len);
}
let idx = self.char_to_byte(char_idx);
self.move_gap_to(idx);
self.data[self.gap_start..self.gap_start + len].copy_from_slice(s.as_bytes());
self.gap_start += len;
self.n_chars += len_chars;
let endings: Vec<_> = s
.char_indices()
.enumerate()
.filter(|(_, (_, ch))| *ch == '\n')
.map(|(i, (offset, _))| (idx + offset, char_idx + i))
.collect();
self.add_line_endings(idx, len_chars, &endings);
self.char_to_byte_cache.clear();
#[cfg(test)]
assert_line_endings!(self);
}
pub fn remove_char(&mut self, char_idx: usize) {
let raw_idx = self.char_to_raw_byte(char_idx);
let idx = self.raw_byte_to_byte(raw_idx);
let len = self.char_len(raw_idx);
if idx != self.gap_start {
self.move_gap_to(idx);
} else {
self.char_to_byte_cache.clear();
}
self.gap_end += len;
self.n_chars -= 1;
self.remove_line_endings(idx, idx + len, 1);
self.char_to_byte_cache.clear();
#[cfg(test)]
assert_line_endings!(self);
}
pub fn remove_range(&mut self, char_from: usize, char_to: usize) {
if char_from == char_to {
return;
}
assert!(
char_from < char_to,
"invalid range: from={char_from} > to={char_to}"
);
let raw_from = self.char_to_raw_byte(char_from);
let from = self.raw_byte_to_byte(raw_from);
let to = self.offset_char_to_byte(char_to, raw_from, char_from);
debug_assert!(from < to, "invalid byte range: from={from} > to={to}");
self.move_gap_to(from);
let n_bytes = to - from;
let n_chars = char_to - char_from;
self.remove_line_endings(from, self.byte_to_raw_byte(to), n_chars);
self.gap_end += n_bytes;
self.n_chars -= n_chars;
self.char_to_byte_cache.clear();
#[cfg(test)]
assert_line_endings!(self);
}
fn add_line_endings(
&mut self,
byte_from: usize,
n_chars: usize,
new_endings: &[(ByteOffset, CharOffset)],
) {
let idx = self.line_endings.partition_point(|(b, _)| *b < byte_from);
for (_, c) in &mut self.line_endings[idx..] {
*c += n_chars;
}
self.line_endings
.splice(idx..idx, new_endings.iter().copied());
}
fn remove_line_endings(&mut self, byte_from: usize, byte_to: usize, n_chars: usize) {
self.line_endings
.retain(|(b, _)| *b < self.gap_start || *b >= self.gap_end);
if self.line_endings.is_empty()
|| self
.line_endings
.last()
.map(|(b, _)| *b < byte_from)
.unwrap_or(false)
{
return;
}
let line_from = self.line_endings.partition_point(|(b, _)| *b < byte_from);
let line_to = self.line_endings.partition_point(|(b, _)| *b < byte_to);
if line_from == line_to {
let (b, _) = self.line_endings[line_from];
if b >= byte_from && b < byte_to {
self.line_endings.remove(line_from);
}
} else {
self.line_endings.drain(line_from..line_to);
}
for (_, c) in &mut self.line_endings[line_from..] {
*c -= n_chars;
}
}
fn grow_gap(&mut self, n: usize) {
if n >= self.next_gap {
self.next_gap = clamp_gap_size(self.len() + n, n.next_power_of_two());
}
let gap_increase = self.next_gap + n;
let cap = self.cap + self.next_gap + n;
let mut buf = Vec::with_capacity(cap);
buf.extend_from_slice(&self.data[..self.gap_start]); buf.resize(buf.len() + self.next_gap + self.gap() + n, 0); buf.extend_from_slice(&self.data[self.gap_end..]);
let start = self.gap_start;
let from = self.line_endings.partition_point(|(b, _)| *b <= start);
for (b, _) in &mut self.line_endings[from..] {
*b += gap_increase;
}
self.next_gap = clamp_gap_size(self.len(), self.next_gap * 2);
self.data = buf.into_boxed_slice();
self.gap_end += gap_increase;
self.cap = cap;
self.char_to_byte_cache.clear();
#[cfg(test)]
assert_line_endings!(self);
}
fn move_gap_to(&mut self, byte_idx: usize) {
assert!(
byte_idx <= self.len(),
"index out of bounds: {byte_idx} > {}",
self.len()
);
let gap = self.gap();
let (src, dest) = match byte_idx.cmp(&self.gap_start) {
Ordering::Equal => return,
Ordering::Less => {
let start = self.gap_start;
let from = self.line_endings.partition_point(|(b, _)| *b < byte_idx);
let to = self.line_endings.partition_point(|(b, _)| *b <= start);
for (b, _) in &mut self.line_endings[from..to] {
*b += gap;
}
(byte_idx..self.gap_start, byte_idx + gap)
}
Ordering::Greater => {
let end = self.gap_end;
let from = self.line_endings.partition_point(|(b, _)| *b < end);
let to = self
.line_endings
.partition_point(|(b, _)| *b < byte_idx + gap);
for (b, _) in &mut self.line_endings[from..to] {
*b -= gap;
}
(self.gap_end..byte_idx + gap, self.gap_start)
}
};
self.data.copy_within(src, dest);
self.gap_end = byte_idx + gap;
self.gap_start = byte_idx;
self.char_to_byte_cache.clear();
#[cfg(test)]
assert_line_endings!(self);
}
pub fn byte_to_char(&self, byte_idx: usize) -> usize {
let mut to = usize::MAX;
let raw_byte_idx = self.byte_to_raw_byte(byte_idx);
let (mut raw_byte_offset, mut char_offset) = (0, 0);
for &(b, c) in self.line_endings.iter() {
match b.cmp(&raw_byte_idx) {
Ordering::Less => (raw_byte_offset, char_offset) = (b, c),
Ordering::Equal => {
return c;
}
Ordering::Greater => {
to = b;
break;
}
}
}
let slice = Slice::from_raw_offsets(raw_byte_offset, to, self);
let mut byte_cur = self.raw_byte_to_byte(raw_byte_offset);
let mut cur = char_offset;
for ch in slice.chars() {
if byte_cur == byte_idx {
break;
}
byte_cur += ch.len_utf8();
cur += 1;
}
cur
}
#[inline]
pub fn char_to_byte(&self, char_idx: usize) -> usize {
self.offset_char_to_byte(char_idx, 0, 0)
}
pub fn char_range_to_byte_range(&self, ch_from: usize, ch_to: usize) -> (usize, usize) {
let raw_byte_from = self.char_to_raw_byte(ch_from);
let byte_from = self.raw_byte_to_byte(raw_byte_from);
let byte_to = self.offset_char_to_byte(ch_to, raw_byte_from, ch_from);
(byte_from, byte_to)
}
#[inline]
fn raw_byte_to_byte(&self, raw: usize) -> usize {
if raw > self.gap_start {
raw - self.gap()
} else {
raw
}
}
#[inline]
pub fn byte_to_raw_byte(&self, byte: usize) -> usize {
if byte > self.gap_start {
byte + self.gap()
} else {
byte
}
}
#[inline]
pub(crate) fn offset_char_to_byte(
&self,
char_idx: usize,
byte_offset: usize,
char_offset: usize,
) -> usize {
let raw = self.offset_char_to_raw_byte(char_idx, byte_offset, char_offset);
self.raw_byte_to_byte(raw)
}
#[inline]
fn char_to_raw_byte(&self, char_idx: usize) -> usize {
self.offset_char_to_raw_byte(char_idx, 0, 0)
}
fn offset_char_to_raw_byte(
&self,
char_idx: usize,
mut byte_offset: usize,
mut char_offset: usize,
) -> usize {
if let Some(i) = self.char_to_byte_cache.get(char_idx) {
return i;
}
if self.n_chars == 0 {
assert!(
char_idx == 0 || char_idx == 1,
"char index should be 0 or 1 for an empty buffer"
);
return self.gap_start;
}
if self.line_endings.is_empty() && char_idx == self.len_chars().saturating_sub(1) {
if self.gap_end == self.cap && self.gap_start > 0 {
let ch_end = self.gap_start - 1;
let ch = unsafe { decode_char_ending_at(ch_end, &self.data) };
let i = self.gap_start.saturating_sub(ch.len_utf8());
self.char_to_byte_cache.insert(char_idx, i);
return i;
} else {
let ch = unsafe { decode_char_ending_at(self.cap - 1, &self.data) };
let i = self.cap.saturating_sub(ch.len_utf8());
self.char_to_byte_cache.insert(char_idx, i);
return i;
};
}
let mut to = usize::MAX;
let line_from = self.line_endings.partition_point(|(b, _)| *b < byte_offset);
let line_endings = &self.line_endings[line_from..];
match line_endings.binary_search_by_key(&char_idx, |&(_, c)| c) {
Ok(i) => {
let b = line_endings[i].0;
self.char_to_byte_cache.insert(char_idx, b);
return b;
}
Err(i) => {
if i > 0 {
let (b, c) = line_endings[i - 1];
byte_offset = b;
char_offset = c;
}
if i < line_endings.len() {
to = line_endings[i].0;
}
}
}
let slice = Slice::from_raw_offsets(byte_offset, to, self);
let mut chars = slice.chars();
let mut cur = byte_offset;
for _ in 0..(char_idx - char_offset) {
chars.next();
cur = chars.cur + byte_offset;
}
let slice_enclosed_gap = self.gap_start >= byte_offset && self.gap_end <= to;
if cur >= self.gap_start && (cur < self.gap_end || slice_enclosed_gap) {
cur += self.gap();
}
self.char_to_byte_cache.insert(char_idx, cur);
cur
}
#[cfg(test)]
fn shred_gap(&mut self) {
for b in self.data[self.gap_start..self.gap_end].iter_mut() {
*b = b'\0';
}
}
}
#[derive(Default, Debug, Copy, Clone, PartialEq, Eq)]
pub struct Slice<'a> {
from: usize,
left: &'a [u8],
right: &'a [u8],
}
impl<'a> From<Slice<'a>> for Cow<'a, str> {
fn from(s: Slice<'a>) -> Self {
if s.left.is_empty() {
Cow::Borrowed(unsafe { std::str::from_utf8_unchecked(s.right) })
} else if s.right.is_empty() {
Cow::Borrowed(unsafe { std::str::from_utf8_unchecked(s.left) })
} else {
Cow::Owned(s.to_string())
}
}
}
impl<'a> Slice<'a> {
const NULL: Slice<'a> = Slice {
from: 0,
left: &[],
right: &[],
};
pub fn is_contiguous(&self) -> bool {
self.left.is_empty() || self.right.is_empty()
}
pub fn from(&self) -> usize {
self.from
}
#[inline]
fn from_raw_offsets(from: usize, to: usize, gb: &'a GapBuffer) -> Slice<'a> {
let to = min(to, gb.data.len());
let logical_from = gb.raw_byte_to_byte(from);
if to <= gb.gap_start || from >= gb.gap_end {
return Slice {
from: logical_from,
left: &gb.data[from..to],
right: &[],
};
}
debug_assert!(from <= gb.gap_start, "line offset sits in gap");
Slice {
from: logical_from,
left: &gb.data[from..gb.gap_start],
right: &gb.data[gb.gap_end..to],
}
}
pub fn subslice_from_byte_offsets(&self, from: usize, to: usize) -> Slice<'_> {
if from == to {
return Slice::NULL;
}
let to = min(to, self.len());
let logical_from = self.from + from;
if to <= self.left.len() {
Slice {
from: logical_from,
left: &self.left[from..to],
right: &[],
}
} else if from >= self.left.len() {
Slice {
from: logical_from,
left: &[],
right: &self.right[from..to],
}
} else {
Slice {
from: logical_from,
left: &self.left[from..],
right: &self.right[..to],
}
}
}
pub fn len_utf8(&self) -> usize {
self.chars().count()
}
pub fn len(&self) -> usize {
self.left.len() + self.right.len()
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn as_strs(&self) -> (&str, &str) {
unsafe {
(
std::str::from_utf8_unchecked(self.left),
std::str::from_utf8_unchecked(self.right),
)
}
}
pub fn as_slices(&self) -> (&[u8], &[u8]) {
(self.left, self.right)
}
pub fn slice_iter(self) -> SliceIter<'a> {
SliceIter {
inner: self,
pos: Some(false),
}
}
pub fn chars(self) -> Chars<'a> {
Chars { s: self, cur: 0 }
}
pub fn rev_chars(self) -> RevChars<'a> {
RevChars {
s: self,
cur: self.left.len() + self.right.len(),
}
}
pub fn indexed_chars(self, from: usize, rev: bool) -> IdxChars<'a> {
let (cur, idx) = if rev {
(
self.left.len() + self.right.len(),
from + count_chars(self.left) + count_chars(self.right),
)
} else {
(0, from)
};
IdxChars {
s: self,
cur,
idx,
rev,
}
}
fn cur_and_data(&self, cur: usize) -> (usize, &[u8]) {
if cur < self.left.len() {
(cur, self.left)
} else {
(cur - self.left.len(), self.right)
}
}
}
impl fmt::Display for Slice<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut v = Vec::with_capacity(self.left.len() + self.right.len());
v.extend_from_slice(self.left);
v.extend_from_slice(self.right);
match String::from_utf8(v) {
Ok(s) => write!(f, "{s}"),
Err(_) => Err(fmt::Error),
}
}
}
impl<'a> PartialEq<&'a str> for Slice<'_> {
fn eq(&self, other: &&'a str) -> bool {
let b = other.as_bytes();
if b.len() != self.left.len() + self.right.len() {
return false;
}
&b[..self.left.len()] == self.left && &b[self.left.len()..] == self.right
}
}
impl PartialEq<String> for Slice<'_> {
fn eq(&self, other: &String) -> bool {
*self == other.as_str()
}
}
#[derive(Debug)]
pub struct SliceIter<'a> {
inner: Slice<'a>,
pos: Option<bool>,
}
impl<'a> Iterator for SliceIter<'a> {
type Item = &'a [u8];
fn next(&mut self) -> Option<Self::Item> {
match self.pos? {
false => {
self.pos = Some(true);
Some(self.inner.left)
}
true => {
self.pos = None;
Some(self.inner.right)
}
}
}
}
#[derive(Default, Debug, Copy, Clone, PartialEq, Eq)]
pub struct Chars<'a> {
s: Slice<'a>,
cur: usize,
}
impl Iterator for Chars<'_> {
type Item = char;
fn next(&mut self) -> Option<Self::Item> {
if self.cur >= self.s.left.len() + self.s.right.len() {
return None;
}
let (cur, data) = self.s.cur_and_data(self.cur);
let ch = unsafe { decode_char_at(cur, data) };
self.cur += ch.len_utf8();
Some(ch)
}
}
#[derive(Default, Debug, Copy, Clone, PartialEq, Eq)]
pub struct RevChars<'a> {
s: Slice<'a>,
cur: usize,
}
impl Iterator for RevChars<'_> {
type Item = char;
fn next(&mut self) -> Option<Self::Item> {
if self.cur == 0 {
return None;
}
let (cur, data) = self.s.cur_and_data(self.cur - 1);
let ch = unsafe { decode_char_ending_at(cur, data) };
self.cur -= ch.len_utf8();
Some(ch)
}
}
#[derive(Default, Debug, Copy, Clone, PartialEq, Eq)]
pub struct IdxChars<'a> {
s: Slice<'a>,
cur: usize,
idx: usize,
rev: bool,
}
impl Iterator for IdxChars<'_> {
type Item = (usize, char);
fn next(&mut self) -> Option<Self::Item> {
if (!self.rev && self.cur >= self.s.left.len() + self.s.right.len())
|| (self.rev && self.cur == 0)
{
return None;
}
if self.rev {
let (cur, data) = self.s.cur_and_data(self.cur - 1);
let ch = unsafe { decode_char_ending_at(cur, data) };
let len = ch.len_utf8();
self.idx -= 1;
self.cur -= len;
Some((self.idx, ch))
} else {
let (cur, data) = self.s.cur_and_data(self.cur);
let ch = unsafe { decode_char_at(cur, data) };
let len = ch.len_utf8();
let res = Some((self.idx, ch));
self.cur += len;
self.idx += 1;
res
}
}
}
const CONT_MASK: u8 = 0b0011_1111;
#[inline]
const fn utf8_first_byte(byte: u8, width: u32) -> u32 {
(byte & (0x7F >> width)) as u32
}
#[inline]
const fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 {
(ch << 6) | (byte & CONT_MASK) as u32
}
#[inline]
const fn utf8_is_cont_byte(byte: u8) -> bool {
(byte as i8) < -64
}
#[inline]
unsafe fn decode_char_at(start: usize, bytes: &[u8]) -> char {
let x = bytes[start];
if x < 128 {
return unsafe { char::from_u32_unchecked(x as u32) };
}
let init = utf8_first_byte(x, 2);
let y = bytes[start + 1];
let mut ch = utf8_acc_cont_byte(init, y);
if x >= 0xE0 {
let z = bytes[start + 2];
let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z);
ch = (init << 12) | y_z;
if x >= 0xF0 {
let w = bytes[start + 3];
ch = ((init & 7) << 18) | utf8_acc_cont_byte(y_z, w);
}
}
unsafe { char::from_u32_unchecked(ch) }
}
#[inline]
unsafe fn decode_char_ending_at(end: usize, bytes: &[u8]) -> char {
let w = match bytes[end] {
b if b < 128 => return unsafe { char::from_u32_unchecked(b as u32) },
b => b,
};
let mut ch;
let z = bytes[end - 1];
ch = utf8_first_byte(z, 2);
if utf8_is_cont_byte(z) {
let y = bytes[end - 2];
ch = utf8_first_byte(y, 3);
if utf8_is_cont_byte(y) {
let x = bytes[end - 3];
ch = utf8_first_byte(x, 4);
ch = utf8_acc_cont_byte(ch, y);
}
ch = utf8_acc_cont_byte(ch, z);
}
ch = utf8_acc_cont_byte(ch, w);
unsafe { char::from_u32_unchecked(ch) }
}
#[cfg(test)]
mod tests {
use super::*;
use simple_test_case::test_case;
fn debug_buffer_content(gb: &GapBuffer) -> String {
let mut v = gb.data.to_vec();
v[gb.gap_start..gb.gap_end].copy_from_slice("_".repeat(gb.gap()).as_bytes());
String::from_utf8(v).expect("valid utf8")
}
fn raw_debug_buffer_content(gb: &GapBuffer) -> String {
let mut v = gb.data.to_vec();
for b in v[gb.gap_start..gb.gap_end].iter_mut() {
if *b == b'\0' {
*b = b'_';
}
}
v.insert(gb.gap_end, b']');
v.insert(gb.gap_start, b'[');
String::from_utf8(v).expect("valid utf8")
}
#[test]
fn from_string_matches_from_str() {
let s = "this is a test";
let gb1 = GapBuffer::from(s.to_string());
let gb2 = GapBuffer::from(s);
assert_eq!(gb1, gb2);
}
#[test]
fn to_string_works() {
let s = "this is a test";
let gb = GapBuffer::from(s.to_string());
assert_eq!(gb.to_string(), s);
}
#[test]
fn insert_into_empty_string_initial_gb_works() {
let mut gb = GapBuffer::from(String::new());
gb.insert_char(0, 'a');
gb.shred_gap();
assert_eq!(gb.to_string(), "a");
}
#[test_case("fooβfooβfoo"; "interleaved multibyte and ascii")]
#[test_case("hello, δΈη!"; "blocks of multibyte and ascii")]
#[test_case("hello, world!"; "just ascii")]
#[test]
fn len_chars_works(s: &str) {
let mut gb = GapBuffer::from(s);
let len_s = s.chars().count();
println!("initial: {:?}", raw_debug_buffer_content(&gb));
assert_eq!(gb.len_chars(), len_s);
assert_eq!(
gb.len_chars(),
gb.to_string().chars().count(),
"char iter len != len_chars"
);
assert_eq!(
gb.line(0).chars().count(),
gb.line_len_chars(0),
"line_len_chars != len_chars"
);
gb.insert_char(5, 'X');
gb.shred_gap();
println!("after insert X: {:?}", raw_debug_buffer_content(&gb));
assert_eq!(gb.len_chars(), len_s + 1);
assert_eq!(
gb.len_chars(),
gb.to_string().chars().count(),
"char iter len != len_chars"
);
gb.insert_char(3, 'η');
gb.shred_gap();
println!("after insert η: {:?}", raw_debug_buffer_content(&gb));
assert_eq!(gb.len_chars(), len_s + 2);
assert_eq!(
gb.len_chars(),
gb.to_string().chars().count(),
"char iter len != len_chars"
);
assert_eq!(gb.char(3), 'η');
gb.remove_char(3);
gb.shred_gap();
println!("after remove η: {:?}", raw_debug_buffer_content(&gb));
assert_eq!(gb.len_chars(), len_s + 1);
assert_eq!(
gb.len_chars(),
gb.to_string().chars().count(),
"char iter len != len_chars"
);
assert_eq!(gb.char(5), 'X');
gb.remove_char(5);
gb.shred_gap();
println!("after remove X: {:?}", debug_buffer_content(&gb));
assert_eq!(gb.len_chars(), len_s);
assert_eq!(
gb.len_chars(),
gb.to_string().chars().count(),
"char iter len != len_chars"
);
assert_eq!(gb.to_string(), s);
}
#[test_case(0, 14; "first line")]
#[test_case(1, 13; "second line")]
#[test_case(2, 14; "last line no trailing newline")]
#[test]
fn line_len_chars_works(line_idx: usize, expected: usize) {
let gb = GapBuffer::from("hello, world!\nhow are you?\nthis is a test");
assert_eq!(gb.line_len_chars(line_idx), expected);
}
#[test_case("fooβfooβfoo"; "interleaved multibyte and ascii")]
#[test_case("hello, δΈη!"; "blocks of multibyte and ascii")]
#[test_case("hello, world!"; "just ascii")]
#[test]
fn move_gap_to_maintains_content(s: &str) {
let mut gb = GapBuffer::from(s);
for i in 0..gb.len_chars() {
let idx = gb.char_to_byte(i);
gb.move_gap_to(idx);
gb.shred_gap();
let (s1, s2) = (
std::str::from_utf8(&gb.data[..gb.gap_start]).unwrap(),
std::str::from_utf8(&gb.data[gb.gap_end..]).unwrap(),
);
assert_eq!(format!("{s1}{s2}"), s, "idx={idx}");
}
}
#[test]
fn move_gap_to_maintains_line_content() {
let s = "hello, world!\nhow are you?\nthis is a test";
let mut gb = GapBuffer::from(s);
assert_eq!(gb.len_lines(), 3);
for i in 0..gb.len_chars() {
let idx = gb.char_to_byte(i);
gb.move_gap_to(idx);
gb.shred_gap();
assert_eq!(gb.len_lines(), 3);
assert_eq!(gb.line(0).to_string(), "hello, world!\n", "idx={idx}");
assert_eq!(gb.line(1).to_string(), "how are you?\n", "idx={idx}");
assert_eq!(gb.line(2).to_string(), "this is a test", "idx={idx}");
}
}
#[test_case(0, 0, 0; "BOF cur at BOF")]
#[test_case(27, 0, 0; "BOF cur at EOF")]
#[test_case(27, 5, 5; "in the buffer cur at EOF")]
#[test_case(5, 5, 5; "in the buffer cur at gap")]
#[test_case(5, 3, 3; "in the buffer cur before gap")]
#[test_case(5, 11, 15; "in the buffer cur after gap")]
#[test_case(5, 7, 7; "multi byte 1")]
#[test_case(5, 8, 10; "multi byte 2")]
#[test]
fn char_to_byte_works(cur: usize, char_idx: usize, expected: usize) {
let s = "hello, δΈη!\nhow are you?";
let mut gb = GapBuffer::from(s);
assert_eq!(s.len(), 27, "EOF case is not 0..s.len()");
assert_eq!("δΈ".len(), 3);
gb.move_gap_to(cur);
gb.shred_gap();
let byte_idx = gb.char_to_byte(char_idx);
assert_eq!(byte_idx, expected, "{:?}", debug_buffer_content(&gb));
}
#[test_case("β"; "one multi-byte char")]
#[test_case("β β"; "two multi-byte chars")]
#[test_case("Ψ foo"; "one multi-byte char followed by ascii")]
#[test_case("δΈη abc"; "two multi-byte chars followed by ascii")]
#[test]
fn char_to_byte_works_with_leading_multibyte_char(s: &str) {
let mut gb = GapBuffer::new();
gb.insert_str(0, s);
gb.shred_gap();
let byte_idx = gb.char_to_byte(0);
assert_eq!(byte_idx, 0, "{:?}", debug_buffer_content(&gb));
}
#[test_case("β", 0; "one multi-byte char")]
#[test_case("β β", 3; "two multi-byte chars")]
#[test_case("foo Ψ", 4; "ascii followed by one multi-byte char")]
#[test_case("abc δΈη", 7; "ascii followed by two multi-byte chars")]
#[test_case("Hello, world!β", 13; "error case from issue 140")]
#[test]
fn char_to_byte_works_with_single_line_buffers_ending_in_a_multibyte_char(
s: &str,
expected: usize,
) {
let mut gb = GapBuffer::new();
gb.insert_str(0, s);
gb.shred_gap();
let last_char = s.chars().count() - 1;
let byte_idx = gb.char_to_byte(last_char);
assert_eq!(byte_idx, expected, "{:?}", debug_buffer_content(&gb));
}
#[test_case(0, 0, 64, 'h'; "BOF cur at BOF")]
#[test_case(27, 0, 0, 'h'; "BOF cur at EOF")]
#[test_case(27, 5, 5, ','; "in the buffer cur at EOF")]
#[test_case(5, 5, 69, ','; "in the buffer cur at gap")]
#[test_case(5, 3, 3, 'l'; "in the buffer cur after gap")]
#[test_case(5, 11, 79, 'h'; "in the buffer cur before gap")]
#[test_case(5, 7, 71, 'δΈ'; "multi byte 1")]
#[test_case(5, 8, 74, 'η'; "multi byte 2")]
#[test]
fn char_to_raw_byte_works(cur: usize, char_idx: usize, expected: usize, expected_ch: char) {
let s = "hello, δΈη!\nhow are you?";
let mut gb = GapBuffer::from(s);
assert_eq!(s.len(), 27, "EOF case is not 0..s.len()");
assert_eq!("δΈ".len(), 3);
gb.move_gap_to(cur);
gb.shred_gap();
let byte_idx = gb.char_to_raw_byte(char_idx);
assert_eq!(byte_idx, expected, "{:?}", debug_buffer_content(&gb));
let ch = unsafe { decode_char_at(byte_idx, &gb.data) };
assert_eq!(ch, expected_ch);
}
#[test_case("hello, world! this is the last line"; "ascii no newline")]
#[test_case("hello, world!\nthis is the last line"; "ascii single newline")]
#[test_case("fooβfooβfoo"; "mixed width no newlines")]
#[test_case("hello, δΈη!\nhow are you?"; "mixed width single newline")]
#[test]
fn byte_to_char_works(s: &str) {
let mut gb = GapBuffer::from(s);
for pos in 0..gb.len_chars() - 1 {
gb.move_gap_to(gb.char_to_byte(pos));
gb.shred_gap();
for (ch_idx, (byte_idx, _ch)) in s.char_indices().enumerate() {
let idx = gb.byte_to_char(byte_idx);
assert_eq!(idx, ch_idx, "gap@{pos}");
}
}
}
#[test_case(0, 0, "hello, world!\n"; "first line cur at BOF")]
#[test_case(0, 1, "how are you?"; "second line cur at BOF")]
#[test_case(26, 0, "hello, world!\n"; "first line cur at EOF")]
#[test_case(26, 1, "how are you?"; "second line cur at EOF")]
#[test_case(10, 0, "hello, world!\n"; "first line cur in line")]
#[test_case(10, 1, "how are you?"; "second line cur in line")]
#[test]
fn slice_to_string_works(cur: usize, line: usize, expected: &str) {
let mut gb = GapBuffer::from("hello, world!\nhow are you?");
gb.move_gap_to(cur);
gb.shred_gap();
assert_eq!(gb.line(line).to_string(), expected);
}
#[test]
fn line_to_char_works() {
let l1 = "hello, world!\n";
let l2 = "how are you?\n";
let l3 = "this is a test";
let gb = GapBuffer::from(format!("{l1}{l2}{l3}"));
assert_eq!(gb.line_to_char(0), 0);
assert_eq!(gb.line_to_char(1), l1.chars().count());
assert_eq!(gb.line_to_char(2), l1.chars().count() + l2.chars().count());
}
#[test_case(0, 0; "start of first line")]
#[test_case(5, 0; "in first line")]
#[test_case(13, 0; "end of first line")]
#[test_case(14, 1; "start of second line")]
#[test_case(20, 1; "in second line")]
#[test_case(26, 1; "end of second line")]
#[test_case(27, 2; "start of third line")]
#[test_case(30, 2; "in third line")]
#[test_case(40, 2; "end of third line")]
#[test]
fn char_to_line_works(char_idx: usize, line_idx: usize) {
let l1 = "hello, world!\n";
let l2 = "how are you?\n";
let l3 = "this is a test";
let gb = GapBuffer::from(format!("{l1}{l2}{l3}"));
assert_eq!(gb.char_to_line(char_idx), line_idx);
}
#[test_case(&[(0, 'h')], "hello world"; "insert front")]
#[test_case(&[(4, ',')], "ello, world"; "insert inner")]
#[test_case(&[(10, '!')], "ello world!"; "insert back")]
#[test_case(&[(4, ','), (11, '!')], "ello, world!"; "insert inner then back")]
#[test_case(&[(4, ','), (0, 'h')], "hello, world"; "insert inner then front")]
#[test_case(&[(0, 'h'), (5, ','),], "hello, world"; "insert front then inner")]
#[test_case(&[(10, '!'), (0, 'h'), (5, ',')], "hello, world!"; "insert all")]
#[test]
fn insert_char(inserts: &[(usize, char)], expected: &str) {
let mut gb = GapBuffer::from("ello world");
for &(idx, ch) in inserts {
gb.insert_char(idx, ch);
gb.shred_gap();
}
assert_eq!(gb.to_string(), expected, "{:?}", debug_buffer_content(&gb))
}
#[test]
fn insert_char_with_moving_cur() {
let mut gb = GapBuffer::from("hello ");
gb.insert_char(6, 'w');
gb.shred_gap();
gb.insert_char(7, 'o');
gb.shred_gap();
gb.insert_char(8, 'r');
gb.shred_gap();
gb.insert_char(9, 'l');
gb.shred_gap();
gb.insert_char(10, 'd');
gb.shred_gap();
gb.insert_char(11, '!');
gb.shred_gap();
gb.insert_char(5, ',');
gb.shred_gap();
assert_eq!(
gb.to_string(),
"hello, world!",
"{:?}",
debug_buffer_content(&gb)
)
}
#[test]
fn insert_newline_char_is_tracked_correctly() {
let s = "hello, world!\nhow are you?";
let mut gb = GapBuffer::from(s);
assert_eq!(gb.len_lines(), 2);
println!("initial: {:?}", raw_debug_buffer_content(&gb));
gb.insert_char(6, '\n');
gb.shred_gap();
println!("insert: {:?}", raw_debug_buffer_content(&gb));
assert_eq!(gb.len_lines(), 3);
assert_eq!(gb.line(0).to_string(), "hello,\n");
assert_eq!(gb.line(1).to_string(), " world!\n");
assert_eq!(gb.line(2).to_string(), "how are you?");
for idx in 0..=gb.len_chars() {
gb.move_gap_to(idx);
gb.shred_gap();
assert_eq!(gb.len_lines(), 3);
assert_eq!(gb.line(0).to_string(), "hello,\n", "idx={idx}");
assert_eq!(gb.line(1).to_string(), " world!\n", "idx={idx}");
assert_eq!(gb.line(2).to_string(), "how are you?", "idx={idx}");
}
}
#[test_case(&[(0, "hell")], "helloworl"; "insert front")]
#[test_case(&[(1, ", ")], "o, worl"; "insert inner")] #[test_case(&[(5, "d!")], "oworld!"; "insert back")]
#[test_case(&[(5, "d!"), (0, "hell"), (5, ", ")], "hello, world!"; "insert all")]
#[test_case(&[(5, "d!"), (0, "hell"), (5, ",\n")], "hello,\nworld!"; "insert all w newline")]
#[test]
fn insert_str(inserts: &[(usize, &str)], expected: &str) {
let mut gb = GapBuffer::from("oworl");
for &(idx, s) in inserts {
gb.insert_str(idx, s);
gb.shred_gap();
}
assert_eq!(gb.to_string(), expected, "{:?}", debug_buffer_content(&gb))
}
#[test]
fn insert_newline_in_str_is_tracked_correctly() {
let s = "hello, world!\nhow are you?";
let mut gb = GapBuffer::from(s);
assert_eq!(gb.len_lines(), 2);
let s2 = " sailor\nisn't this fun?\nwhat a wonderful\n";
gb.insert_str(6, s2);
gb.shred_gap();
for idx in 0..=gb.len_chars() {
gb.move_gap_to(idx);
gb.shred_gap();
assert_eq!(gb.len_lines(), 5);
assert_eq!(gb.line(0).to_string(), "hello, sailor\n", "idx={idx}");
assert_eq!(gb.line(1).to_string(), "isn't this fun?\n", "idx={idx}");
assert_eq!(gb.line(2).to_string(), "what a wonderful\n", "idx={idx}");
assert_eq!(gb.line(3).to_string(), " world!\n", "idx={idx}");
assert_eq!(gb.line(4).to_string(), "how are you?", "idx={idx}");
}
}
#[test_case(6, "hello,world!"; "at gap start")]
#[test_case(7, "hello, orld!"; "at gap end")]
#[test_case(12, "hello, world"; "after gap")]
#[test_case(0, "ello, world!"; "before gap")]
#[test]
fn remove_char(idx: usize, expected: &str) {
let mut gb = GapBuffer::from("hello, world!");
gb.move_gap_to(6); gb.shred_gap();
gb.remove_char(idx);
gb.shred_gap();
assert_eq!(gb.to_string(), expected, "{:?}", debug_buffer_content(&gb))
}
#[test]
fn remove_newline_char_is_tracked_correctly() {
let s = "hello, world!\nhow are you?";
let mut gb = GapBuffer::from(s);
assert_eq!(gb.len_lines(), 2);
gb.remove_char(13);
gb.shred_gap();
assert_eq!(gb.len_lines(), 1);
assert_eq!(gb.line(0).to_string(), "hello, world!how are you?");
}
#[test_case(6, 9, "hello,rld!"; "at gap start")]
#[test_case(7, 10, "hello, ld!"; "at gap end")]
#[test_case(10, 13, "hello, wor"; "after gap")]
#[test_case(0, 5, ", world!"; "before gap")]
#[test_case(0, 13, ""; "remove all")]
#[test]
fn remove_range_works(from: usize, to: usize, expected: &str) {
let s = "hello, world!";
assert_eq!(s.len(), 13, "remove all case is not 0..s.len()");
let mut gb = GapBuffer::from(s);
gb.move_gap_to(6); gb.shred_gap();
gb.remove_range(from, to);
gb.shred_gap();
assert_eq!(gb.to_string(), expected, "{:?}", debug_buffer_content(&gb))
}
#[test]
fn remove_range_w_multibyte_chars_works() {
let s = "fooβfooβfoo";
let mut gb = GapBuffer::from(s);
gb.remove_range(0, 3);
gb.shred_gap();
assert_eq!(gb.to_string(), "βfooβfoo");
assert_eq!(gb.len_chars(), 8);
gb.remove_range(1, 4);
gb.shred_gap();
assert_eq!(gb.to_string(), "ββfoo");
assert_eq!(gb.len_chars(), 5);
gb.remove_range(2, 5);
gb.shred_gap();
assert_eq!(gb.to_string(), "ββ");
assert_eq!(gb.len_chars(), 2);
}
#[test]
fn remove_range_for_last_line_works() {
let s = "hello, world!\nthis is the last line";
let mut gb = GapBuffer::from(s);
gb.remove_range(14, s.len());
gb.shred_gap();
assert_eq!(gb.to_string(), "hello, world!\n");
assert_eq!(gb.len_lines(), 2);
}
#[test_case(10, 15, "hello, worow are you?"; "spanning newline")]
#[test_case(7, 14, "hello, how are you?"; "ending on newline")]
#[test_case(13, 26, "hello, world!"; "starting on newline")]
#[test]
fn remove_newline_in_str_is_tracked_correctly(from: usize, to: usize, expected: &str) {
let s = "hello, world!\nhow are you?";
let mut gb = GapBuffer::from(s);
assert_eq!(gb.len_lines(), 2);
gb.remove_range(from, to);
gb.shred_gap();
assert_eq!(gb.len_lines(), 1);
assert_eq!(gb.to_string(), expected);
assert_eq!(gb.line(0).to_string(), expected);
}
#[test_case('X'; "ascii")]
#[test_case('η'; "multi-byte")]
#[test]
fn insert_remove_char_is_idempotent(ch: char) {
let s = "hello, world!";
let mut gb = GapBuffer::from(s);
gb.insert_char(6, ch);
gb.shred_gap();
gb.remove_char(6);
gb.shred_gap();
assert_eq!(gb.to_string(), s, "{:?}", debug_buffer_content(&gb))
}
#[test_case("TEST", 1; "without trailing newline")]
#[test_case("TEST\n", 2; "with trailing newline")]
#[test_case("TEST\nTEST", 2; "with internal newline")]
#[test]
fn insert_remove_str_is_idempotent(edit: &str, expected_lines: usize) {
let s = "hello, world!";
let mut gb = GapBuffer::from(s);
println!("initial: {:?}", raw_debug_buffer_content(&gb));
for n in 0..gb.len_lines() {
println!("{:?}", gb.line(n).to_string());
}
gb.insert_str(6, edit);
gb.shred_gap();
assert_eq!(gb.len_lines(), expected_lines);
println!("insert: {:?}", raw_debug_buffer_content(&gb));
for n in 0..gb.len_lines() {
println!("{:?}", gb.line(n).to_string());
}
gb.remove_range(6, 6 + edit.len());
gb.shred_gap();
println!("remove: {:?}", raw_debug_buffer_content(&gb));
for n in 0..gb.len_lines() {
println!("{:?}", gb.line(n).to_string());
}
assert_eq!(gb.to_string(), s);
}
#[test]
fn chars_work() {
let s1 = "hello, world!\n";
let s2 = "how are you?";
let gb = GapBuffer::from(format!("{s1}{s2}"));
let l1_chars: String = gb.line(0).chars().collect();
assert_eq!(l1_chars, s1);
let l2_chars: String = gb.line(1).chars().collect();
assert_eq!(l2_chars, s2);
}
#[test_case(
"hello, δΈη!", false,
&[(0, 'h'), (1, 'e'), (2, 'l'), (3, 'l'), (4, 'o'),
(5, ','), (6, ' '), (7, 'δΈ'), (8, 'η'), (9, '!')];
"multi-byte block forward"
)]
#[test_case(
"hello, δΈη!", true,
&[(9, '!'), (8, 'η'), (7, 'δΈ'), (6, ' '), (5, ','),
(4, 'o'), (3, 'l'), (2, 'l'), (1, 'e'), (0, 'h')];
"multi-byte block reversed"
)]
#[test_case(
"fooβfooβfoo", false,
&[(0, 'f'), (1, 'o'), (2, 'o'), (3, 'β'), (4, 'f'), (5, 'o'),
(6, 'o'), (7, 'β'), (8, 'f'), (9, 'o'), (10, 'o')];
"interleaved forward"
)]
#[test_case(
"fooβfooβfoo", true,
&[(10, 'o'), (9, 'o'), (8, 'f'), (7, 'β'), (6, 'o'), (5, 'o'),
(4, 'f'), (3, 'β'), (2, 'o'), (1, 'o'), (0, 'f')];
"interleaved reversed"
)]
#[test]
fn indexed_chars_works(s: &str, rev: bool, expected: &[(usize, char)]) {
let mut gb = GapBuffer::from(s);
let v: Vec<(usize, char)> = gb.line(0).indexed_chars(0, rev).collect();
assert_eq!(&v, expected);
for i in 0..gb.len_chars() {
let idx = gb.char_to_byte(i);
gb.move_gap_to(idx);
gb.shred_gap();
let v: Vec<(usize, char)> = gb.line(0).indexed_chars(0, rev).collect();
assert_eq!(&v, expected, "idx={idx}");
}
}
#[test_case("fooβfooβfoo"; "interleaved multibyte and ascii")]
#[test_case("hello, δΈη!"; "blocks of multibyte and ascii")]
#[test]
fn chars_works(s: &str) {
let mut gb = GapBuffer::from(s);
let chars: String = gb.line(0).chars().collect();
assert_eq!(chars, s);
for i in 0..gb.len_chars() {
let idx = gb.char_to_byte(i);
gb.move_gap_to(idx);
gb.shred_gap();
let chars: String = gb.line(0).chars().collect();
assert_eq!(chars, s, "idx={idx}");
}
}
#[test]
fn slice_works() {
let mut gb = GapBuffer::from("hello, world!\nhow are you?");
let slice = Slice::from_raw_offsets(0, gb.cap, &gb);
let (s1, s2) = slice.as_strs();
assert_eq!(s1, "");
assert_eq!(s2, "hello, world!\nhow are you?");
let slice = gb.slice(6, 17);
let (s1, s2) = slice.as_strs();
assert_eq!(s1, " world!\nhow");
assert_eq!(s2, "");
gb.move_gap_to(12);
gb.shred_gap();
println!("after move: {:?}", raw_debug_buffer_content(&gb));
let slice = gb.slice(6, 17);
let (s1, s2) = slice.as_strs();
assert_eq!(s1, " world");
assert_eq!(s2, "!\nhow");
}
#[test]
fn null_slice_is_empty() {
let mut gb = GapBuffer::from("hello, world!\nhow are you?");
for i in 0..gb.len_chars() {
let idx = gb.char_to_byte(i);
gb.move_gap_to(idx);
gb.shred_gap();
let slice = gb.slice(0, 0);
assert!(slice.left.is_empty(), "{i} slice left: {:?}", slice.left);
assert!(slice.right.is_empty(), "{i} slice right: {:?}", slice.right);
}
}
#[test]
fn slice_eq_str_works() {
let mut gb = GapBuffer::from("hello, world!\nhow are you?");
gb.move_gap_to(3);
gb.shred_gap();
let slice = gb.slice(0, 5);
assert_eq!(slice, "hello");
}
#[test]
fn chars_in_raw_range_works() {
let mut gb = GapBuffer::from("hello, world!\nhow are you?");
let char_from = 7;
let char_to = 12;
for i in 0..gb.len_chars() {
let idx = gb.char_to_byte(i);
gb.move_gap_to(idx);
gb.shred_gap();
let byte_from = gb.char_to_raw_byte(char_from);
let byte_to = gb.char_to_raw_byte(char_to);
let n_chars = gb.chars_in_raw_range(byte_from, byte_to);
assert_eq!(n_chars, char_to - char_from, "gap at {i}");
let n_chars = gb.chars_in_raw_range(0, gb.char_to_raw_byte(gb.n_chars));
assert_eq!(n_chars, gb.n_chars, "gap at {i}");
}
}
#[test_case(0, "\n", 2; "insert newline at start")]
#[test_case(6, "\n", 2; "insert newline at end of first line")]
#[test_case(3, "\n", 2; "insert newline in middle of line")]
#[test_case(0, "new\n", 2; "insert line at start")]
#[test_case(6, "\nnew", 2; "insert line at end")]
#[test_case(3, "X\nY\n", 3; "insert multiple newlines in middle")]
#[test_case(0, "a\nb\nc\n", 4; "insert multiple lines at start")]
#[test_case(6, "\na\nb\nc", 4; "insert multiple lines at end")]
#[test]
fn insert_str_tracks_line_endings_correctly(at: usize, insert: &str, expected_lines: usize) {
let s = "line 1";
let mut gb = GapBuffer::from(s);
assert_eq!(gb.len_lines(), 1);
gb.insert_str(at, insert);
gb.shred_gap();
assert_eq!(gb.len_lines(), expected_lines);
}
#[test_case(0, 7, "line 2\nline 3", 2; "remove first line from start")]
#[test_case(0, 14, "line 3", 1; "remove first two lines from start")]
#[test_case(7, 14, "line 1\nline 3", 2; "remove middle line")]
#[test_case(14, 20, "line 1\nline 2\n", 3; "remove last line")]
#[test_case(6, 14, "line 1line 3", 1; "remove spanning first newline")]
#[test]
fn remove_range_tracks_line_endings_correctly(
from: usize,
to: usize,
expected: &str,
expected_lines: usize,
) {
let s = "line 1\nline 2\nline 3";
let mut gb = GapBuffer::from(s);
assert_eq!(gb.len_lines(), 3);
gb.remove_range(from, to);
gb.shred_gap();
assert_eq!(gb.to_string(), expected);
assert_eq!(gb.len_lines(), expected_lines);
}
fn _insert_chars(gb: &mut GapBuffer, s: &str) {
for (idx, ch) in s.chars().enumerate() {
gb.insert_char(idx + 4, ch);
gb.shred_gap();
}
}
fn _insert_str(gb: &mut GapBuffer, s: &str) {
gb.insert_str(4, s);
gb.shred_gap();
}
#[test_case(_insert_chars; "individual chars")]
#[test_case(_insert_str; "whole string")]
#[test]
fn insert_with_multibyte_chars_preserves_line_endings(insert: fn(&mut GapBuffer, &str)) {
let slice_str = |gb: &GapBuffer| gb.slice(0, gb.len_chars()).to_string();
let mut gb = GapBuffer::from("foo\nbar\nbaz\n");
let s = "δΈ\nη π¦\n ";
insert(&mut gb, s);
assert_eq!(slice_str(&gb), "foo\nδΈ\nη π¦\n bar\nbaz\n");
assert_eq!(gb.char(8), 'π¦');
gb.remove_char(8);
gb.shred_gap();
assert_eq!(slice_str(&gb), "foo\nδΈ\nη \n bar\nbaz\n");
}
#[test]
fn char_works_with_multibyte_characters() {
let s = "δΈ\nη π¦\n ";
let gb = GapBuffer::from(s);
for (idx, ch) in s.chars().enumerate() {
assert_eq!(gb.char(idx), ch);
}
}
#[test]
fn char_to_raw_byte_line_end_with_no_newlines() {
let mut gb =
GapBuffer::from("// does it need to be a doc comment? that is a long enough line to");
gb.move_gap_to(0);
gb.shred_gap();
assert_eq!(gb.char_to_raw_byte(65), 129);
gb.move_gap_to(10);
gb.shred_gap();
assert_eq!(gb.char_to_raw_byte(65), 129);
gb.move_gap_to(66);
gb.shred_gap();
assert_eq!(gb.char_to_raw_byte(65), 65);
gb.insert_char(66, '\n');
gb.shred_gap();
assert_eq!(
gb.to_string(),
"// does it need to be a doc comment? that is a long enough line to\n"
);
}
#[test_case(0, "", "foo bar"; "gap at start")]
#[test_case(3, "foo", " bar"; "gap in between")]
#[test]
fn as_strs_works(byte_idx: usize, left: &str, right: &str) {
let mut gb = GapBuffer::from("foo bar");
gb.move_gap_to(byte_idx);
gb.shred_gap();
let (l, r) = gb.as_strs();
assert_eq!((l, r), (left, right));
}
#[test_case(0; "gap at start")]
#[test_case(3; "gap in between")]
#[test]
fn as_str_works(byte_idx: usize) {
let mut gb = GapBuffer::from("foo bar");
gb.move_gap_to(byte_idx);
gb.shred_gap();
let s = gb.as_str();
assert_eq!(s, "foo bar");
assert_eq!(gb.gap_start, 0);
}
}