ad_editor/buffer/
internal.rs

1//! Internal data structures and helpers for maintaining buffer state.
2//!
3//! When working at the implementation of this datastructure it is important to keep in mind that
4//! there are several related, but distinct, pairs of concepts:
5//! - The "logical" buffer state as presented to users of the API, vs the "raw" buffer state that
6//!   is actually stored. The logical buffer is guaranteed to be valid utf-8 while the raw buffer
7//!   is allowed to contain arbitrary byte sequences within the current "gap" region.
8//! - Character offsets vs byte offsets. Character offsets only ever apply to the logical buffer
9//!   state while byte offsets can be both logical and raw.
10//!
11//! ### References
12//! - <https://www.cs.unm.edu/~crowley/papers/sds.pdf>
13//! - <http://doc.cat-v.org/plan_9/4th_edition/papers/sam/>
14//! - <https://www.averylaird.com/programming/piece-table/2018/05/10/insertions-piece-table>
15//! - <https://www.staff.city.ac.uk/~ross/papers/FingerTree.pdf>
16//! - <https://nullprogram.com/blog/2017/09/07/>
17//! - <https://coredumped.dev/2023/08/09/text-showdown-gap-buffers-vs-ropes/>
18//! - <https://code.visualstudio.com/blogs/2018/03/23/text-buffer-reimplementation>
19use std::{
20    borrow::Cow,
21    cell::UnsafeCell,
22    cmp::{Ordering, max, min},
23    collections::HashMap,
24    fmt,
25};
26
27// The internal data is `[u8]` so the values here are in terms of bytes
28
29const MIN_GAP: usize = 32;
30const MIN_GAP_GROW: usize = 64;
31const MAX_GAP_GROW: usize = 1024 * 8;
32
33/// For a given buffer length, calculate the new size of the gap we need when reallocating.
34/// This is set to 5% of the length of our data buffer but bounded by MIN_GAP and MAX_GAP.
35#[inline]
36fn clamp_gap_size(len: usize, min_gap: usize) -> usize {
37    min(max(len / 20, min_gap), MAX_GAP_GROW)
38}
39
40#[inline]
41fn count_chars(bytes: &[u8]) -> usize {
42    if bytes.is_empty() {
43        return 0;
44    }
45
46    let mut n_chars = 0;
47    let mut cur = 0;
48
49    while cur < bytes.len() {
50        // SAFETY: we know we are in bounds and that we contain valid utf-8 data
51        let ch = unsafe { decode_char_at(cur, bytes) };
52        cur += ch.len_utf8();
53        n_chars += 1;
54    }
55
56    n_chars
57}
58
59type ByteOffset = usize;
60type CharOffset = usize;
61
62/// A simple [HashMap] based cache of mappings from character positions to byte positions within
63/// the buffer since the last time it was modified (either by altering the logical buffer content,
64/// moving the gap or resizing the buffer).
65#[derive(Debug)]
66struct CharToByteCache(UnsafeCell<HashMap<CharOffset, ByteOffset>>);
67
68impl CharToByteCache {
69    fn clear(&self) {
70        // SAFETY: Only called internally as part of methods that have mutable access to the buffer
71        unsafe { (&mut *self.0.get()).clear() }
72    }
73
74    fn get(&self, char_idx: CharOffset) -> Option<ByteOffset> {
75        // SAFETY: Only called in offset_char_to_raw_byte when used to return cached values
76        unsafe { (&*self.0.get()).get(&char_idx).copied() }
77    }
78
79    fn insert(&self, char_idx: CharOffset, byte_idx: ByteOffset) {
80        // SAFETY: Only called in offset_char_to_raw_byte when used to cache values
81        unsafe {
82            (&mut *self.0.get()).insert(char_idx, byte_idx);
83        }
84    }
85}
86
87impl Clone for CharToByteCache {
88    fn clone(&self) -> Self {
89        // SAFETY: We only reach in to the inner map to clone it and wrap in a new UnsafeCell
90        unsafe { Self(UnsafeCell::new((&*self.0.get()).clone())) }
91    }
92}
93impl PartialEq for CharToByteCache {
94    fn eq(&self, _: &Self) -> bool {
95        true // we don't care about the cache for GapBuffer equality
96    }
97}
98impl Eq for CharToByteCache {}
99
100/// An implementation of a gap buffer that tracks internal meta-data to help with accessing
101/// sub-regions of the text such as character ranges and lines.
102#[derive(Debug, Clone, PartialEq, Eq)]
103pub struct GapBuffer {
104    /// the raw data being stored (both buffer content and the gap)
105    data: Box<[u8]>,
106    /// current size of the allocation for data
107    cap: usize,
108    /// raw byte offset to the first character in the gap
109    gap_start: usize,
110    /// raw byte offset to the last character in the gap
111    gap_end: usize,
112    /// size in bytes for the next gap when re-allocating
113    next_gap: usize,
114    /// total number of characters in the buffer
115    /// this is != line_endings.last() if there is no trailing newline
116    n_chars: usize,
117    /// line ending raw byte offset -> char offset
118    line_endings: Vec<(ByteOffset, CharOffset)>,
119    /// Simple cache of computed char->byte mappings since the last time the buffer was modified
120    char_to_byte_cache: CharToByteCache,
121}
122
123impl Default for GapBuffer {
124    fn default() -> Self {
125        Self::new()
126    }
127}
128
129fn compute_line_endings(s: &str) -> (usize, Vec<(ByteOffset, CharOffset)>) {
130    let mut n_chars = 0;
131    let mut line_endings = Vec::new();
132
133    for (line_chars, (idx, ch)) in s.char_indices().enumerate() {
134        n_chars += 1;
135        if ch == '\n' {
136            line_endings.push((idx, line_chars));
137        }
138    }
139
140    (n_chars, line_endings)
141}
142
143impl From<String> for GapBuffer {
144    fn from(s: String) -> Self {
145        let gap_start = s.len();
146        let next_gap = clamp_gap_size(gap_start, MIN_GAP_GROW);
147        let cap = gap_start + next_gap;
148        let (n_chars, line_endings) = compute_line_endings(&s);
149        let mut v = s.into_bytes();
150        v.resize(cap, 0);
151
152        let mut gb = Self {
153            data: v.into_boxed_slice(),
154            cap,
155            gap_start,
156            gap_end: cap,
157            next_gap,
158            n_chars,
159            line_endings,
160            char_to_byte_cache: CharToByteCache(UnsafeCell::new(HashMap::new())),
161        };
162
163        gb.move_gap_to(0);
164        gb
165    }
166}
167
168impl From<&str> for GapBuffer {
169    fn from(s: &str) -> Self {
170        let gap_start = s.len();
171        let next_gap = clamp_gap_size(gap_start, MIN_GAP_GROW);
172        let cap = gap_start + next_gap;
173        let (n_chars, line_endings) = compute_line_endings(s);
174        let mut v = Vec::with_capacity(cap);
175        v.extend_from_slice(s.as_bytes());
176        v.resize(cap, 0);
177
178        let mut gb = Self {
179            data: v.into_boxed_slice(),
180            cap,
181            gap_start,
182            gap_end: cap,
183            next_gap,
184            n_chars,
185            line_endings,
186            char_to_byte_cache: CharToByteCache(UnsafeCell::new(HashMap::new())),
187        };
188
189        gb.move_gap_to(0);
190        gb
191    }
192}
193
194impl fmt::Display for GapBuffer {
195    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
196        let (s1, s2) = self.as_strs();
197        write!(f, "{s1}{s2}")
198    }
199}
200
201impl<'a> PartialEq<&'a str> for GapBuffer {
202    fn eq(&self, other: &&'a str) -> bool {
203        let b = other.as_bytes();
204        if b.len() != self.len() {
205            return false;
206        }
207
208        b[..self.gap_start] == self.data[..self.gap_start]
209            && b[self.gap_start..] == self.data[self.gap_end..]
210    }
211}
212
213impl PartialEq<String> for GapBuffer {
214    fn eq(&self, other: &String) -> bool {
215        *self == other.as_str()
216    }
217}
218
219/// One of the most common "hard to find" bugs I encounter around the GapBuffer is detecting
220/// when and where the tracking of line endings becomes corrupted. This macro is called at
221/// points where the line_endings map is modified guarded by #[cfg(test)] so that it does not
222/// affect the performance of the editor when it is in use. It is also called in situations
223/// where we are already panicking in order to check to see if the reason for the panic was
224/// because there is a bug around line endings that the current test suite didn't catch.
225macro_rules! assert_line_endings {
226    ($self:expr) => {{
227        let true_endings: Vec<usize> = $self
228            .data
229            .iter()
230            .enumerate()
231            .filter(|&(i, &b)| b == b'\n' && (i < $self.gap_start || i >= $self.gap_end))
232            .map(|(i, _)| i)
233            .collect();
234        let tracked_line_endings: Vec<usize> = $self.line_endings.iter().map(|(n, _)| *n).collect();
235
236        assert_eq!(
237            tracked_line_endings, true_endings,
238            "incorrect byte positions for line endings with gap_start={} gap_end={}",
239            $self.gap_start, $self.gap_end
240        );
241
242        let true_endings: Vec<usize> = $self
243            .to_string()
244            .chars()
245            .enumerate()
246            .filter(|&(_, c)| c == '\n')
247            .map(|(i, _)| i)
248            .collect();
249        let tracked_line_endings: Vec<usize> = $self.line_endings.iter().map(|(_, n)| *n).collect();
250
251        assert_eq!(
252            tracked_line_endings, true_endings,
253            "incorrect character positions for line endings with gap_start={} gap_end={}",
254            $self.gap_start, $self.gap_end
255        );
256    }};
257}
258
259impl GapBuffer {
260    /// Construct a new empty GapBuffer
261    pub fn new() -> Self {
262        Self::from("")
263    }
264
265    /// Number of bytes in the gap
266    #[inline]
267    fn gap(&self) -> usize {
268        self.gap_end - self.gap_start
269    }
270
271    /// The current length of "active" data in the buffer (i.e. not including the gap)
272    #[inline]
273    pub fn len(&self) -> usize {
274        self.cap - self.gap()
275    }
276
277    /// Whether or not the visible buffer contents are empty or not.
278    /// This can return true while there is "deleted" data in the gap.
279    #[inline]
280    pub fn is_empty(&self) -> bool {
281        self.cap == self.gap()
282    }
283
284    /// Rearrange the internal storage of this buffer so that the active data is in a single
285    /// contiguous slice which is then returned.
286    pub fn make_contiguous(&mut self) -> &[u8] {
287        self.move_gap_to(0);
288        &self.data[self.gap_end..]
289    }
290
291    /// Whether or not the full data within the buffer is contiguous (all on one side of the gap).
292    pub fn is_contiguous(&self) -> bool {
293        self.gap_start == 0 || self.gap_end == self.cap
294    }
295
296    /// The contents of the buffer as a single `&str`.
297    ///
298    /// This method requires a mutable reference as we need to move the gap in order to ensure that
299    /// all of the active data within the buffer is contiguous.
300    pub fn as_str(&mut self) -> &str {
301        let raw = self.make_contiguous();
302
303        // SAFETY: we know we have valid utf-8 data internally and make_contiguous moves the gap so
304        // that `raw` contains all of the live data within the buffer.
305        unsafe { std::str::from_utf8_unchecked(raw) }
306    }
307
308    /// A contiguous substring of the buffer from the give byte offset.
309    ///
310    /// # Safety
311    /// You must call [GapBuffer::make_contiguous] before calling this method.
312    pub unsafe fn substr_from(&self, byte_offset: usize) -> &str {
313        // SAFETY: See above
314        unsafe { std::str::from_utf8_unchecked(&self.data[self.gap_end + byte_offset..]) }
315    }
316
317    /// Assume that the gap is at 0 and return the full contents of the inner buffer as a slice of
318    /// bytes.
319    ///
320    /// # Safety
321    /// You must call [GapBuffer::make_contiguous] before calling this method.
322    pub unsafe fn assume_contiguous_bytes(&self) -> &[u8] {
323        &self.data[self.gap_end..]
324    }
325
326    /// Assume that the gap is at 0 and return the full contents of the inner buffer as a &str
327    ///
328    /// # Safety
329    /// You must call [GapBuffer::make_contiguous] before calling this method.
330    pub unsafe fn assume_contiguous_str(&self) -> &str {
331        // SAFETY: See above
332        unsafe { std::str::from_utf8_unchecked(&self.data[self.gap_end..]) }
333    }
334
335    /// The contents of the buffer either side of the current gap as &str slices.
336    ///
337    /// If [make_contiguous][GapBuffer::make_contiguous] was previously called, the first `&str`
338    /// will be empty and the full content of the buffer will be in the second `&str`.
339    pub fn as_strs(&self) -> (&str, &str) {
340        let (left, right) = self.as_byte_slices();
341
342        // SAFETY: we know that we have valid utf8 data internally and that the position of the gap
343        // does not split any utf-8 codepoints.
344        unsafe {
345            (
346                std::str::from_utf8_unchecked(left),
347                std::str::from_utf8_unchecked(right),
348            )
349        }
350    }
351
352    /// The contents of the buffer either side of the current gap as byte slices.
353    ///
354    /// If [make_contiguous][GapBuffer::make_contiguous] was previously called, the first slice
355    /// will be empty and the full content of the buffer will be in the second slice.
356    pub fn as_byte_slices(&self) -> (&[u8], &[u8]) {
357        (&self.data[0..self.gap_start], &self.data[self.gap_end..])
358    }
359
360    /// Whether or not the contents of the buffer end with a final newline character.
361    ///
362    /// POSIX semantics define a line as "A sequence of zero or more non-newline characters plus
363    /// a terminating newline character."
364    ///
365    /// See: <https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap03.html#tag_03_206>
366    pub fn has_trailing_newline(&self) -> bool {
367        let (l, r) = self.as_byte_slices();
368
369        if r.is_empty() {
370            l.ends_with(b"\n")
371        } else {
372            r.ends_with(b"\n")
373        }
374    }
375
376    /// Iterate over the characters of the buffer
377    pub fn chars(&self) -> Chars<'_> {
378        self.as_slice().chars()
379    }
380
381    /// Iterate over the lines of the buffer
382    pub fn iter_lines(&self) -> impl Iterator<Item = Slice<'_>> {
383        let mut line_idx = 0;
384
385        std::iter::from_fn(move || {
386            if line_idx == self.len_lines() {
387                return None;
388            }
389            let slice = self.line(line_idx);
390            line_idx += 1;
391
392            Some(slice)
393        })
394    }
395
396    /// The number of lines within the buffer
397    #[inline]
398    pub fn len_lines(&self) -> usize {
399        match self.line_endings.last() {
400            Some(&(raw_idx, _)) => {
401                let n = self.line_endings.len();
402                let byte_idx = if raw_idx > self.gap_start {
403                    raw_idx - self.gap_end
404                } else {
405                    raw_idx
406                };
407
408                if byte_idx < self.len() { n + 1 } else { n }
409            }
410
411            None => 1,
412        }
413    }
414
415    /// The number of characters in the buffer
416    #[inline]
417    pub fn len_chars(&self) -> usize {
418        self.n_chars
419    }
420
421    /// The byte offset of the end of the given line.
422    ///
423    /// For lines with a trailing newline this is the position of the newline character,
424    /// for the final line (if there is no trailing newline) this is the end of the buffer.
425    #[inline]
426    pub fn line_end_byte(&self, line_idx: usize) -> usize {
427        if line_idx < self.line_endings.len() {
428            self.raw_byte_to_byte(self.line_endings[line_idx].0)
429        } else {
430            self.len()
431        }
432    }
433
434    pub fn lines_before_byte_offset(&self, byte_offset: usize) -> usize {
435        let raw_offset = self.byte_to_raw_byte(byte_offset);
436        self.line_endings.partition_point(|(b, _)| *b < raw_offset)
437    }
438
439    /// Clear the contents of the buffer.
440    ///
441    /// # Note
442    /// This does not actually zero out the data currently within the buffer or truncate the
443    /// allocation in any way. It simply resets internal state so that it behaves like an empty
444    /// initial buffer.
445    pub fn clear(&mut self) {
446        self.move_gap_to(0);
447        self.gap_end = self.cap;
448        self.line_endings.clear();
449        self.n_chars = 0;
450
451        #[cfg(test)]
452        assert_line_endings!(self);
453    }
454
455    /// The character at the specified character index.
456    ///
457    /// # Panics
458    /// This method will panic if the given character index is out of bounds
459    #[inline]
460    pub fn char(&self, char_idx: usize) -> char {
461        let byte_idx = self.char_to_raw_byte(char_idx);
462
463        // SAFETY: we know that we have valid utf8 data internally
464        unsafe { decode_char_at(byte_idx, &self.data) }
465    }
466
467    /// The character at the specified character index.
468    #[inline]
469    pub fn get_char(&self, char_idx: usize) -> Option<char> {
470        if char_idx < self.n_chars {
471            Some(self.char(char_idx))
472        } else {
473            None
474        }
475    }
476
477    /// The character at the specified byte index.
478    ///
479    /// # Panics
480    /// This method will panic if the given byte index is out of bounds
481    #[inline]
482    pub fn char_at(&self, byte_idx: usize) -> char {
483        let byte_idx = self.byte_to_raw_byte(byte_idx);
484
485        // SAFETY: we know that we have valid utf8 data internally
486        unsafe { decode_char_at(byte_idx, &self.data) }
487    }
488
489    /// The character at the specified byte index.
490    #[inline]
491    pub fn get_char_at(&self, byte_idx: usize) -> Option<char> {
492        if byte_idx < self.len() {
493            Some(self.char_at(byte_idx))
494        } else {
495            None
496        }
497    }
498
499    #[inline]
500    fn char_len(&self, byte_idx: usize) -> usize {
501        // SAFETY: we know that we have valid utf8 data internally
502        unsafe { decode_char_at(byte_idx, &self.data) }.len_utf8()
503    }
504
505    /// The requested line as a [Slice].
506    ///
507    /// # Panics
508    /// This method will panic if the given line index is out of bounds
509    #[inline]
510    pub fn line(&self, line_idx: usize) -> Slice<'_> {
511        if line_idx >= self.len_lines() {
512            assert_line_endings!(self);
513            panic!(
514                "line index was {line_idx} but buffer has {} lines",
515                self.len_lines()
516            )
517        }
518
519        let to = if line_idx < self.line_endings.len() {
520            let idx = self.line_endings[line_idx].0;
521            idx + self.char_len(idx)
522        } else {
523            self.cap
524        };
525
526        let from = if line_idx == 0 {
527            0
528        } else {
529            let idx = self.line_endings[line_idx - 1].0;
530            idx + 1
531        };
532
533        if from == to {
534            Slice::NULL
535        } else {
536            Slice::from_raw_offsets(from, to, self)
537        }
538    }
539
540    /// Returns true if the requested line is empty or only contains a single trailing newline.
541    ///
542    /// See [line][Self::line] for panic details.
543    pub fn line_is_blank(&self, line_idx: usize) -> bool {
544        matches!(
545            self.line(line_idx).as_strs(),
546            ("", "") | ("", "\n") | ("\n", "")
547        )
548    }
549
550    /// The number of characters in the requested line.
551    ///
552    /// # Panics
553    /// This method will panic if the given line index is out of bounds
554    #[inline]
555    pub fn line_len_chars(&self, line_idx: usize) -> usize {
556        if line_idx >= self.len_lines() {
557            assert_line_endings!(self);
558            panic!(
559                "line index was {line_idx} but buffer has {} lines",
560                self.len_lines()
561            )
562        }
563
564        let chars_to = if self.line_endings.is_empty() {
565            return self.n_chars;
566        } else if line_idx < self.line_endings.len() {
567            self.line_endings[line_idx].1 + 1
568        } else {
569            self.n_chars
570        };
571
572        let chars_from = if line_idx == 0 {
573            0
574        } else {
575            self.line_endings[line_idx - 1].1 + 1
576        };
577
578        chars_to - chars_from
579    }
580
581    /// Primarily intended for supplying contiguous ranges of bytes to tree-sitter when
582    /// parsing. Returns a byte slice from the underlying data buffer without entering
583    /// the gap.
584    pub fn maximal_slice_from_offset(&self, byte_offset: usize) -> &[u8] {
585        if byte_offset > self.len() {
586            &[]
587        } else {
588            let i = self.byte_to_raw_byte(byte_offset);
589            match i.cmp(&self.gap_start) {
590                Ordering::Less => &self.data[i..self.gap_start],
591                Ordering::Equal => &self.data[self.gap_end..],
592                Ordering::Greater => &self.data[i..],
593            }
594        }
595    }
596
597    /// An exclusive range of characters from the buffer
598    pub fn slice_from_byte_offsets(&self, byte_from: usize, byte_to: usize) -> Slice<'_> {
599        if byte_from == byte_to {
600            return Slice::NULL;
601        }
602
603        let from = self.byte_to_raw_byte(byte_from);
604        let to = self.byte_to_raw_byte(byte_to);
605
606        Slice::from_raw_offsets(from, to, self)
607    }
608
609    /// An exclusive range of characters from the buffer
610    pub fn slice(&self, char_from: usize, char_to: usize) -> Slice<'_> {
611        if char_from == char_to {
612            return Slice::NULL;
613        }
614
615        let byte_from = self.char_to_raw_byte(char_from);
616        let byte_to = self.offset_char_to_raw_byte(char_to, byte_from, char_from);
617
618        Slice::from_raw_offsets(byte_from, byte_to, self)
619    }
620
621    pub fn as_slice(&self) -> Slice<'_> {
622        self.slice(0, self.len_chars())
623    }
624
625    pub fn chars_in_raw_range(&self, raw_from: usize, raw_to: usize) -> usize {
626        if raw_to <= self.gap_start || raw_from >= self.gap_end {
627            count_chars(&self.data[raw_from..raw_to])
628        } else {
629            count_chars(&self.data[raw_from..self.gap_start])
630                + count_chars(&self.data[self.gap_end..raw_to])
631        }
632    }
633
634    /// Convert a character index to the index of the line containing it
635    ///
636    /// # Panics
637    /// This method will panic if the given char index is out of bounds
638    pub fn char_to_line(&self, char_idx: usize) -> usize {
639        match self.try_char_to_line(char_idx) {
640            Some(line_idx) => line_idx,
641            None => {
642                assert_line_endings!(self);
643                panic!(
644                    "char index was {char_idx} but the buffer char length is {}",
645                    self.len_chars()
646                );
647            }
648        }
649    }
650
651    /// Convert a character index to the index of the line containing it
652    pub fn try_char_to_line(&self, char_idx: usize) -> Option<usize> {
653        match char_idx.cmp(&self.n_chars) {
654            Ordering::Less => {
655                if char_idx < self.n_chars {
656                    Some(self.line_endings.partition_point(|(_, i)| *i < char_idx))
657                } else {
658                    Some(self.len_lines() - 1)
659                }
660            }
661
662            // We allow setting the cursor to the end of the buffer for inserts
663            Ordering::Equal => Some(self.len_lines() - 1),
664
665            Ordering::Greater => None,
666        }
667    }
668
669    /// Convert a line index to the character index of its first character
670    ///
671    /// # Panics
672    /// This method will panic if the given char index is out of bounds
673    pub fn line_to_char(&self, line_idx: usize) -> usize {
674        match self.try_line_to_char(line_idx) {
675            Some(char_idx) => char_idx,
676            None => {
677                assert_line_endings!(self);
678                panic!(
679                    "line index was {line_idx} but the buffer has {} lines",
680                    self.len_lines()
681                );
682            }
683        }
684    }
685
686    /// Convert a line index to the character index of its first character
687    pub fn try_line_to_char(&self, line_idx: usize) -> Option<usize> {
688        if line_idx > self.len_lines() - 1 {
689            return None;
690        }
691
692        if line_idx == 0 {
693            Some(0)
694        } else {
695            let k = self.line_endings[line_idx - 1].1;
696            Some(k + 1)
697        }
698    }
699
700    /// Convert a line index to the byte index of its first character
701    ///
702    /// # Panics
703    /// This method will panic if the given byte index is out of bounds
704    pub fn line_to_byte(&self, line_idx: usize) -> usize {
705        let raw = if line_idx == 0 {
706            0
707        } else {
708            self.line_endings[line_idx - 1].0 + 1
709        };
710
711        self.raw_byte_to_byte(raw)
712    }
713
714    /// Insert a single character at the specifified character index.
715    ///
716    /// This is O(1) if idx is at the current gap start and the gap is large enough to accommodate
717    /// the new text, otherwise data will need to be copied in order to relocate the gap.
718    pub fn insert_char(&mut self, char_idx: usize, ch: char) {
719        let len = ch.len_utf8();
720        if self.gap().saturating_sub(len) < MIN_GAP {
721            self.grow_gap(len);
722        }
723
724        let idx = self.char_to_byte(char_idx);
725        self.move_gap_to(idx);
726
727        ch.encode_utf8(&mut self.data[self.gap_start..]);
728        self.gap_start += len;
729        self.n_chars += 1;
730
731        let endings: &[(ByteOffset, CharOffset)] =
732            if ch == '\n' { &[(idx, char_idx)] } else { &[] };
733
734        self.add_line_endings(idx, 1, endings);
735        self.char_to_byte_cache.clear();
736
737        #[cfg(test)]
738        assert_line_endings!(self);
739    }
740
741    /// Insert a string at the specifified character index.
742    ///
743    /// This is O(1) if idx is at the current gap start and the gap is large enough to accommodate
744    /// the new text, otherwise data will need to be copied in order to relocate the gap.
745    pub fn insert_str(&mut self, char_idx: usize, s: &str) {
746        let len = s.len();
747        let len_chars = s.chars().count();
748        if self.gap().saturating_sub(len) < MIN_GAP {
749            self.grow_gap(len);
750        }
751
752        let idx = self.char_to_byte(char_idx);
753        self.move_gap_to(idx);
754
755        self.data[self.gap_start..self.gap_start + len].copy_from_slice(s.as_bytes());
756        self.gap_start += len;
757        self.n_chars += len_chars;
758
759        let endings: Vec<_> = s
760            .char_indices()
761            .enumerate()
762            .filter(|(_, (_, ch))| *ch == '\n')
763            .map(|(i, (offset, _))| (idx + offset, char_idx + i))
764            .collect();
765
766        self.add_line_endings(idx, len_chars, &endings);
767        self.char_to_byte_cache.clear();
768
769        #[cfg(test)]
770        assert_line_endings!(self);
771    }
772
773    /// Remove the requested character index from the visible region of the buffer
774    pub fn remove_char(&mut self, char_idx: usize) {
775        let raw_idx = self.char_to_raw_byte(char_idx);
776        let idx = self.raw_byte_to_byte(raw_idx);
777        let len = self.char_len(raw_idx);
778
779        if idx != self.gap_start {
780            self.move_gap_to(idx);
781        } else {
782            self.char_to_byte_cache.clear();
783        }
784
785        self.gap_end += len;
786        self.n_chars -= 1;
787
788        self.remove_line_endings(idx, idx + len, 1);
789        self.char_to_byte_cache.clear();
790
791        #[cfg(test)]
792        assert_line_endings!(self);
793    }
794
795    /// Remove the requested range (from..to) from the visible region of the buffer.
796    ///
797    /// # Panics
798    /// This method will panic if `char_from > char_to` or if the requested range is out of bounds.
799    pub fn remove_range(&mut self, char_from: usize, char_to: usize) {
800        if char_from == char_to {
801            return;
802        }
803
804        assert!(
805            char_from < char_to,
806            "invalid range: from={char_from} > to={char_to}"
807        );
808
809        let raw_from = self.char_to_raw_byte(char_from);
810        let from = self.raw_byte_to_byte(raw_from);
811        let to = self.offset_char_to_byte(char_to, raw_from, char_from);
812        debug_assert!(from < to, "invalid byte range: from={from} > to={to}");
813        self.move_gap_to(from);
814
815        let n_bytes = to - from;
816        let n_chars = char_to - char_from;
817
818        self.remove_line_endings(from, self.byte_to_raw_byte(to), n_chars);
819
820        self.gap_end += n_bytes;
821        self.n_chars -= n_chars;
822        self.char_to_byte_cache.clear();
823
824        #[cfg(test)]
825        assert_line_endings!(self);
826    }
827
828    fn add_line_endings(
829        &mut self,
830        byte_from: usize,
831        n_chars: usize,
832        new_endings: &[(ByteOffset, CharOffset)],
833    ) {
834        let idx = self.line_endings.partition_point(|(b, _)| *b < byte_from);
835        for (_, c) in &mut self.line_endings[idx..] {
836            *c += n_chars;
837        }
838
839        self.line_endings
840            .splice(idx..idx, new_endings.iter().copied());
841    }
842
843    fn remove_line_endings(&mut self, byte_from: usize, byte_to: usize, n_chars: usize) {
844        self.line_endings
845            .retain(|(b, _)| *b < self.gap_start || *b >= self.gap_end);
846
847        // If we have no line endings at all, or the removal occurred past the end of the last line
848        // endings we have tracked then there's nothing to do.
849        // > In the second case, _not_ early returning here results in the calls to partition_point
850        //   below being invalid.
851        if self.line_endings.is_empty()
852            || self
853                .line_endings
854                .last()
855                .map(|(b, _)| *b < byte_from)
856                .unwrap_or(false)
857        {
858            return;
859        }
860
861        let line_from = self.line_endings.partition_point(|(b, _)| *b < byte_from);
862        let line_to = self.line_endings.partition_point(|(b, _)| *b < byte_to);
863
864        if line_from == line_to {
865            let (b, _) = self.line_endings[line_from];
866            if b >= byte_from && b < byte_to {
867                self.line_endings.remove(line_from);
868            }
869        } else {
870            self.line_endings.drain(line_from..line_to);
871        }
872
873        for (_, c) in &mut self.line_endings[line_from..] {
874            *c -= n_chars;
875        }
876    }
877
878    fn grow_gap(&mut self, n: usize) {
879        if n >= self.next_gap {
880            self.next_gap = clamp_gap_size(self.len() + n, n.next_power_of_two());
881        }
882
883        let gap_increase = self.next_gap + n;
884        let cap = self.cap + self.next_gap + n;
885        let mut buf = Vec::with_capacity(cap);
886
887        buf.extend_from_slice(&self.data[..self.gap_start]); // data to gap
888        buf.resize(buf.len() + self.next_gap + self.gap() + n, 0); // the new gap (zeroed)
889        buf.extend_from_slice(&self.data[self.gap_end..]); // data after gap
890
891        let start = self.gap_start;
892        let from = self.line_endings.partition_point(|(b, _)| *b <= start);
893        for (b, _) in &mut self.line_endings[from..] {
894            *b += gap_increase;
895        }
896
897        self.next_gap = clamp_gap_size(self.len(), self.next_gap * 2);
898        self.data = buf.into_boxed_slice();
899        self.gap_end += gap_increase;
900        self.cap = cap;
901        self.char_to_byte_cache.clear();
902
903        #[cfg(test)]
904        assert_line_endings!(self);
905    }
906
907    /// The byte_idx argument here is an absolute position within the "live" buffer which will mark
908    /// the first byte of the gap region following the move.
909    ///
910    /// We do not require that the data within the gap region is valid utf-8 so it is fine for this
911    /// offset to land in the middle of existing multi-byte characters so long as the regions
912    /// outside of the gap stay valid utf-8.
913    ///
914    /// # Panics
915    /// This method will panic if the given index is out of bounds
916    fn move_gap_to(&mut self, byte_idx: usize) {
917        // we need space to fit the current gap size
918        assert!(
919            byte_idx <= self.len(),
920            "index out of bounds: {byte_idx} > {}",
921            self.len()
922        );
923
924        let gap = self.gap();
925
926        let (src, dest) = match byte_idx.cmp(&self.gap_start) {
927            Ordering::Equal => return,
928
929            // Gap moving left
930            Ordering::Less => {
931                let start = self.gap_start;
932                let from = self.line_endings.partition_point(|(b, _)| *b < byte_idx);
933                let to = self.line_endings.partition_point(|(b, _)| *b <= start);
934                for (b, _) in &mut self.line_endings[from..to] {
935                    *b += gap;
936                }
937
938                (byte_idx..self.gap_start, byte_idx + gap)
939            }
940
941            // Gap moving right
942            Ordering::Greater => {
943                let end = self.gap_end;
944                let from = self.line_endings.partition_point(|(b, _)| *b < end);
945                let to = self
946                    .line_endings
947                    .partition_point(|(b, _)| *b < byte_idx + gap);
948                for (b, _) in &mut self.line_endings[from..to] {
949                    *b -= gap;
950                }
951
952                (self.gap_end..byte_idx + gap, self.gap_start)
953            }
954        };
955
956        self.data.copy_within(src, dest);
957        self.gap_end = byte_idx + gap;
958        self.gap_start = byte_idx;
959        self.char_to_byte_cache.clear();
960
961        #[cfg(test)]
962        assert_line_endings!(self);
963    }
964
965    /// Convert a logical byte offset into a character offset within the buffer.
966    ///
967    /// This is primarily used to create substrings via the [GapBuffer::substr_from] method when
968    /// running regular expressions over a gap buffer.
969    ///
970    /// This is a simplified version of the equivalent logic in offset_char_to_raw_byte without the
971    /// cache and fast search on single line buffers. This is not typically in the hot path for
972    /// general editor functionality so we don't mind being a little slower in order to keep the
973    /// logic easier to reason about
974    pub fn byte_to_char(&self, byte_idx: usize) -> usize {
975        let mut to = usize::MAX;
976        let raw_byte_idx = self.byte_to_raw_byte(byte_idx);
977        let (mut raw_byte_offset, mut char_offset) = (0, 0);
978
979        // Determine which line the character lies in based on the byte index, skipping all
980        // lines that are before the byte offset we were given.
981        for &(b, c) in self.line_endings.iter() {
982            match b.cmp(&raw_byte_idx) {
983                Ordering::Less => (raw_byte_offset, char_offset) = (b, c),
984                Ordering::Equal => {
985                    return c;
986                }
987                Ordering::Greater => {
988                    to = b;
989                    break;
990                }
991            }
992        }
993
994        let slice = Slice::from_raw_offsets(raw_byte_offset, to, self);
995        let mut byte_cur = self.raw_byte_to_byte(raw_byte_offset);
996        let mut cur = char_offset;
997
998        for ch in slice.chars() {
999            if byte_cur == byte_idx {
1000                break;
1001            }
1002            byte_cur += ch.len_utf8();
1003            cur += 1;
1004        }
1005
1006        cur
1007    }
1008
1009    /// Convert a character offset within the logical buffer to a byte offset
1010    /// within the logical buffer. This is used to account for multi-byte characters
1011    /// within the buffer and is treated as a String-like index but it does not
1012    /// account for the position of the gap.
1013    #[inline]
1014    pub fn char_to_byte(&self, char_idx: usize) -> usize {
1015        self.offset_char_to_byte(char_idx, 0, 0)
1016    }
1017
1018    /// Convert a character range to a byte range
1019    pub fn char_range_to_byte_range(&self, ch_from: usize, ch_to: usize) -> (usize, usize) {
1020        let raw_byte_from = self.char_to_raw_byte(ch_from);
1021        let byte_from = self.raw_byte_to_byte(raw_byte_from);
1022        let byte_to = self.offset_char_to_byte(ch_to, raw_byte_from, ch_from);
1023
1024        (byte_from, byte_to)
1025    }
1026
1027    #[inline]
1028    fn raw_byte_to_byte(&self, raw: usize) -> usize {
1029        if raw > self.gap_start {
1030            raw - self.gap()
1031        } else {
1032            raw
1033        }
1034    }
1035
1036    #[inline]
1037    pub fn byte_to_raw_byte(&self, byte: usize) -> usize {
1038        if byte > self.gap_start {
1039            byte + self.gap()
1040        } else {
1041            byte
1042        }
1043    }
1044
1045    #[inline]
1046    pub(crate) fn offset_char_to_byte(
1047        &self,
1048        char_idx: usize,
1049        byte_offset: usize,
1050        char_offset: usize,
1051    ) -> usize {
1052        let raw = self.offset_char_to_raw_byte(char_idx, byte_offset, char_offset);
1053
1054        self.raw_byte_to_byte(raw)
1055    }
1056
1057    /// Convert a character offset within the logical buffer to a raw byte offset
1058    /// within the underlying allocation we maintain. This is an absolute index
1059    /// into our allocated array that accounts for the position of the gap.
1060    #[inline]
1061    fn char_to_raw_byte(&self, char_idx: usize) -> usize {
1062        self.offset_char_to_raw_byte(char_idx, 0, 0)
1063    }
1064
1065    /// Allow for skipping to a given byte index before starting the search as an optimisation when we
1066    /// are searching for multiple positions in sequence (e.g. the start and end of a range).
1067    fn offset_char_to_raw_byte(
1068        &self,
1069        char_idx: usize,
1070        mut byte_offset: usize,
1071        mut char_offset: usize,
1072    ) -> usize {
1073        if let Some(i) = self.char_to_byte_cache.get(char_idx) {
1074            return i;
1075        }
1076
1077        // Empty buffer is a special case: the only valid char indices are 0 or 1 which maps to gap_start/EOF
1078        if self.n_chars == 0 {
1079            assert!(
1080                char_idx == 0 || char_idx == 1,
1081                "char index should be 0 or 1 for an empty buffer"
1082            );
1083            return self.gap_start;
1084        }
1085
1086        // When looking for the last character of a buffer containing a single line we can decode
1087        // backwards from either the start of the gap or the end of the raw buffer depending on
1088        // where the gap currently lies.
1089        if self.line_endings.is_empty() && char_idx == self.len_chars().saturating_sub(1) {
1090            if self.gap_end == self.cap && self.gap_start > 0 {
1091                let ch_end = self.gap_start - 1;
1092                // SAFETY: we know that we have valid utf-8 data immediately before the gap
1093                let ch = unsafe { decode_char_ending_at(ch_end, &self.data) };
1094
1095                let i = self.gap_start.saturating_sub(ch.len_utf8());
1096                self.char_to_byte_cache.insert(char_idx, i);
1097                return i;
1098            } else {
1099                // SAFETY: we know that we have valid data at the end of the buffer as
1100                // self.gap_end != self.cap, so decoding the final character is valid
1101                let ch = unsafe { decode_char_ending_at(self.cap - 1, &self.data) };
1102
1103                let i = self.cap.saturating_sub(ch.len_utf8());
1104                self.char_to_byte_cache.insert(char_idx, i);
1105                return i;
1106            };
1107        }
1108
1109        let mut to = usize::MAX;
1110
1111        // Determine which line the character lies in based on the character index, skipping all
1112        // lines that are before the byte offset we were given.
1113        let line_from = self.line_endings.partition_point(|(b, _)| *b < byte_offset);
1114        let line_endings = &self.line_endings[line_from..];
1115
1116        match line_endings.binary_search_by_key(&char_idx, |&(_, c)| c) {
1117            Ok(i) => {
1118                let b = line_endings[i].0;
1119                self.char_to_byte_cache.insert(char_idx, b);
1120                return b;
1121            }
1122            Err(i) => {
1123                if i > 0 {
1124                    let (b, c) = line_endings[i - 1];
1125                    byte_offset = b;
1126                    char_offset = c;
1127                }
1128                if i < line_endings.len() {
1129                    to = line_endings[i].0;
1130                }
1131            }
1132        }
1133
1134        let slice = Slice::from_raw_offsets(byte_offset, to, self);
1135        let mut chars = slice.chars();
1136        let mut cur = byte_offset;
1137        for _ in 0..(char_idx - char_offset) {
1138            chars.next();
1139            cur = chars.cur + byte_offset;
1140        }
1141
1142        let slice_enclosed_gap = self.gap_start >= byte_offset && self.gap_end <= to;
1143        // Cur landed inside the gap or we counted over the gap while iterating the slice above
1144        if cur >= self.gap_start && (cur < self.gap_end || slice_enclosed_gap) {
1145            cur += self.gap();
1146        }
1147
1148        self.char_to_byte_cache.insert(char_idx, cur);
1149        cur
1150    }
1151
1152    /// Used in tests to ensure that the data inside of the gap can't accidentally be read into as
1153    /// valid buffer content due to there not making sufficient edits to the buffer state during the
1154    /// test.
1155    #[cfg(test)]
1156    fn shred_gap(&mut self) {
1157        for b in self.data[self.gap_start..self.gap_end].iter_mut() {
1158            *b = b'\0';
1159        }
1160    }
1161}
1162
1163/// A view on a region of the GapBuffer.
1164///
1165/// Slices will become invalidated if the gap is moved from the position they were created with
1166#[derive(Default, Debug, Copy, Clone, PartialEq, Eq)]
1167pub struct Slice<'a> {
1168    /// The logical byte offset that this slice was taken from
1169    from: usize,
1170    left: &'a [u8],
1171    right: &'a [u8],
1172}
1173
1174impl<'a> From<Slice<'a>> for Cow<'a, str> {
1175    fn from(s: Slice<'a>) -> Self {
1176        if s.left.is_empty() {
1177            // SAFETY: we know that we have valid utf8 data internally
1178            Cow::Borrowed(unsafe { std::str::from_utf8_unchecked(s.right) })
1179        } else if s.right.is_empty() {
1180            // SAFETY: we know that we have valid utf8 data internally
1181            Cow::Borrowed(unsafe { std::str::from_utf8_unchecked(s.left) })
1182        } else {
1183            Cow::Owned(s.to_string())
1184        }
1185    }
1186}
1187
1188impl<'a> Slice<'a> {
1189    const NULL: Slice<'a> = Slice {
1190        from: 0,
1191        left: &[],
1192        right: &[],
1193    };
1194
1195    pub fn is_contiguous(&self) -> bool {
1196        self.left.is_empty() || self.right.is_empty()
1197    }
1198
1199    /// The logical byte offset that this slice was taken from.
1200    pub fn from(&self) -> usize {
1201        self.from
1202    }
1203
1204    #[inline]
1205    fn from_raw_offsets(from: usize, to: usize, gb: &'a GapBuffer) -> Slice<'a> {
1206        let to = min(to, gb.data.len());
1207        let logical_from = gb.raw_byte_to_byte(from);
1208
1209        if to <= gb.gap_start || from >= gb.gap_end {
1210            return Slice {
1211                from: logical_from,
1212                left: &gb.data[from..to],
1213                right: &[],
1214            };
1215        }
1216
1217        debug_assert!(from <= gb.gap_start, "line offset sits in gap");
1218
1219        Slice {
1220            from: logical_from,
1221            left: &gb.data[from..gb.gap_start],
1222            right: &gb.data[gb.gap_end..to],
1223        }
1224    }
1225
1226    pub fn subslice_from_byte_offsets(&self, from: usize, to: usize) -> Slice<'_> {
1227        if from == to {
1228            return Slice::NULL;
1229        }
1230
1231        let to = min(to, self.len());
1232        let logical_from = self.from + from;
1233
1234        if to <= self.left.len() {
1235            Slice {
1236                from: logical_from,
1237                left: &self.left[from..to],
1238                right: &[],
1239            }
1240        } else if from >= self.left.len() {
1241            Slice {
1242                from: logical_from,
1243                left: &[],
1244                right: &self.right[from..to],
1245            }
1246        } else {
1247            Slice {
1248                from: logical_from,
1249                left: &self.left[from..],
1250                right: &self.right[..to],
1251            }
1252        }
1253    }
1254
1255    /// The number of utf-8 characters within this slice.
1256    ///
1257    /// Calculating involves parsing the entire slice as utf-8.
1258    pub fn len_utf8(&self) -> usize {
1259        self.chars().count()
1260    }
1261
1262    pub fn len(&self) -> usize {
1263        self.left.len() + self.right.len()
1264    }
1265
1266    pub fn is_empty(&self) -> bool {
1267        self.len() == 0
1268    }
1269
1270    /// The two sides of this slice as &str references
1271    pub fn as_strs(&self) -> (&str, &str) {
1272        // SAFETY: we know that we have valid utf8 data internally
1273        unsafe {
1274            (
1275                std::str::from_utf8_unchecked(self.left),
1276                std::str::from_utf8_unchecked(self.right),
1277            )
1278        }
1279    }
1280
1281    /// The two sides of this slice as &[u8] slices
1282    pub fn as_slices(&self) -> (&[u8], &[u8]) {
1283        (self.left, self.right)
1284    }
1285
1286    /// Iterate over the contiguous &[u8] regions within this slice
1287    pub fn slice_iter(self) -> SliceIter<'a> {
1288        SliceIter {
1289            inner: self,
1290            pos: Some(false),
1291        }
1292    }
1293
1294    /// Iterate over the characters in this slice
1295    pub fn chars(self) -> Chars<'a> {
1296        Chars { s: self, cur: 0 }
1297    }
1298
1299    /// Iterate over the characters in this slice in reverse
1300    pub fn rev_chars(self) -> RevChars<'a> {
1301        RevChars {
1302            s: self,
1303            cur: self.left.len() + self.right.len(),
1304        }
1305    }
1306
1307    /// Iterate over the characters in this slice with their corresponding character indices
1308    pub fn indexed_chars(self, from: usize, rev: bool) -> IdxChars<'a> {
1309        let (cur, idx) = if rev {
1310            (
1311                self.left.len() + self.right.len(),
1312                from + count_chars(self.left) + count_chars(self.right),
1313            )
1314        } else {
1315            (0, from)
1316        };
1317
1318        IdxChars {
1319            s: self,
1320            cur,
1321            idx,
1322            rev,
1323        }
1324    }
1325
1326    fn cur_and_data(&self, cur: usize) -> (usize, &[u8]) {
1327        if cur < self.left.len() {
1328            (cur, self.left)
1329        } else {
1330            (cur - self.left.len(), self.right)
1331        }
1332    }
1333}
1334
1335impl fmt::Display for Slice<'_> {
1336    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1337        let mut v = Vec::with_capacity(self.left.len() + self.right.len());
1338        v.extend_from_slice(self.left);
1339        v.extend_from_slice(self.right);
1340
1341        match String::from_utf8(v) {
1342            Ok(s) => write!(f, "{s}"),
1343            Err(_) => Err(fmt::Error),
1344        }
1345    }
1346}
1347
1348impl<'a> PartialEq<&'a str> for Slice<'_> {
1349    fn eq(&self, other: &&'a str) -> bool {
1350        let b = other.as_bytes();
1351        if b.len() != self.left.len() + self.right.len() {
1352            return false;
1353        }
1354
1355        &b[..self.left.len()] == self.left && &b[self.left.len()..] == self.right
1356    }
1357}
1358
1359impl PartialEq<String> for Slice<'_> {
1360    fn eq(&self, other: &String) -> bool {
1361        *self == other.as_str()
1362    }
1363}
1364
1365#[derive(Debug)]
1366pub struct SliceIter<'a> {
1367    inner: Slice<'a>,
1368    pos: Option<bool>,
1369}
1370
1371impl<'a> Iterator for SliceIter<'a> {
1372    type Item = &'a [u8];
1373
1374    fn next(&mut self) -> Option<Self::Item> {
1375        match self.pos? {
1376            false => {
1377                self.pos = Some(true);
1378                Some(self.inner.left)
1379            }
1380
1381            true => {
1382                self.pos = None;
1383                Some(self.inner.right)
1384            }
1385        }
1386    }
1387}
1388
1389/// An iterator of characters from a [Slice]
1390#[derive(Default, Debug, Copy, Clone, PartialEq, Eq)]
1391pub struct Chars<'a> {
1392    s: Slice<'a>,
1393    cur: usize,
1394}
1395
1396impl Iterator for Chars<'_> {
1397    type Item = char;
1398
1399    fn next(&mut self) -> Option<Self::Item> {
1400        if self.cur >= self.s.left.len() + self.s.right.len() {
1401            return None;
1402        }
1403
1404        let (cur, data) = self.s.cur_and_data(self.cur);
1405        // SAFETY: we know we are in bounds and that we contain valid utf-8 data
1406        let ch = unsafe { decode_char_at(cur, data) };
1407        self.cur += ch.len_utf8();
1408
1409        Some(ch)
1410    }
1411}
1412
1413/// An iterator of characters from a [Slice]
1414#[derive(Default, Debug, Copy, Clone, PartialEq, Eq)]
1415pub struct RevChars<'a> {
1416    s: Slice<'a>,
1417    cur: usize,
1418}
1419
1420impl Iterator for RevChars<'_> {
1421    type Item = char;
1422
1423    fn next(&mut self) -> Option<Self::Item> {
1424        if self.cur == 0 {
1425            return None;
1426        }
1427
1428        let (cur, data) = self.s.cur_and_data(self.cur - 1);
1429        // SAFETY: we know we are in bounds and that we contain valid utf-8 data
1430        let ch = unsafe { decode_char_ending_at(cur, data) };
1431        self.cur -= ch.len_utf8();
1432
1433        Some(ch)
1434    }
1435}
1436
1437/// An iterator of characters and their indices from a [Slice]
1438#[derive(Default, Debug, Copy, Clone, PartialEq, Eq)]
1439pub struct IdxChars<'a> {
1440    s: Slice<'a>,
1441    cur: usize,
1442    idx: usize,
1443    rev: bool,
1444}
1445
1446impl Iterator for IdxChars<'_> {
1447    type Item = (usize, char);
1448
1449    fn next(&mut self) -> Option<Self::Item> {
1450        if (!self.rev && self.cur >= self.s.left.len() + self.s.right.len())
1451            || (self.rev && self.cur == 0)
1452        {
1453            return None;
1454        }
1455
1456        if self.rev {
1457            let (cur, data) = self.s.cur_and_data(self.cur - 1);
1458            // SAFETY: we know we are in bounds and that we contain valid utf-8 data
1459            let ch = unsafe { decode_char_ending_at(cur, data) };
1460            let len = ch.len_utf8();
1461            self.idx -= 1;
1462            self.cur -= len;
1463            Some((self.idx, ch))
1464        } else {
1465            let (cur, data) = self.s.cur_and_data(self.cur);
1466            // SAFETY: we know we are in bounds and that we contain valid utf-8 data
1467            let ch = unsafe { decode_char_at(cur, data) };
1468            let len = ch.len_utf8();
1469            let res = Some((self.idx, ch));
1470            self.cur += len;
1471            self.idx += 1;
1472            res
1473        }
1474    }
1475}
1476
1477// The following helper functions are adapted from nightly APIs in std::core::str
1478// -> https://doc.rust-lang.org/stable/src/core/str/validations.rs.html
1479
1480/// Mask of the value bits of a continuation byte.
1481const CONT_MASK: u8 = 0b0011_1111;
1482
1483/// Returns the initial codepoint accumulator for the first byte.
1484/// The first byte is special, only want bottom 5 bits for width 2, 4 bits
1485/// for width 3, and 3 bits for width 4.
1486#[inline]
1487const fn utf8_first_byte(byte: u8, width: u32) -> u32 {
1488    (byte & (0x7F >> width)) as u32
1489}
1490
1491/// Returns the value of `ch` updated with continuation byte `byte`.
1492#[inline]
1493const fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 {
1494    (ch << 6) | (byte & CONT_MASK) as u32
1495}
1496
1497/// Checks whether the byte is a UTF-8 continuation byte (i.e., starts with the
1498/// bits `10`).
1499#[inline]
1500const fn utf8_is_cont_byte(byte: u8) -> bool {
1501    (byte as i8) < -64
1502}
1503
1504/// Decode a utf-8 code point from `bytes` starting at `start`.
1505/// `bytes` must contain valid utf-8 data beginning at `start`
1506#[inline]
1507unsafe fn decode_char_at(start: usize, bytes: &[u8]) -> char {
1508    // Decode UTF-8
1509    // SAFETY: `bytes` contains UTF-8-like string data so we have the next byte,
1510    let x = bytes[start];
1511    if x < 128 {
1512        // SAFETY: x is in the correct range
1513        return unsafe { char::from_u32_unchecked(x as u32) };
1514    }
1515
1516    // Multibyte case follows
1517    // Decode from a byte combination out of: [[[x y] z] w]
1518    // NOTE: Performance is sensitive to the exact formulation here
1519    let init = utf8_first_byte(x, 2);
1520    // SAFETY: `bytes` contains UTF-8-like string data so we have the next byte,
1521    let y = bytes[start + 1];
1522    let mut ch = utf8_acc_cont_byte(init, y);
1523
1524    if x >= 0xE0 {
1525        // [[x y z] w] case
1526        // 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid
1527        // SAFETY: `bytes` contains UTF-8-like string data so we have the next byte,
1528        let z = bytes[start + 2];
1529        let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z);
1530        ch = (init << 12) | y_z;
1531        if x >= 0xF0 {
1532            // [x y z w] case
1533            // use only the lower 3 bits of `init`
1534            // SAFETY: `bytes` contains UTF-8-like string data so we have the next byte,
1535            let w = bytes[start + 3];
1536            ch = ((init & 7) << 18) | utf8_acc_cont_byte(y_z, w);
1537        }
1538    }
1539
1540    // SAFETY: we know that ch is valid at this point
1541    unsafe { char::from_u32_unchecked(ch) }
1542}
1543
1544/// Decode a utf-8 code point from `bytes` ending at `end`.
1545/// `bytes` must contain valid utf-8 data ending at `end`
1546#[inline]
1547unsafe fn decode_char_ending_at(end: usize, bytes: &[u8]) -> char {
1548    // Decode UTF-8
1549    let w = match bytes[end] {
1550        // SAFETY: b is in range
1551        b if b < 128 => return unsafe { char::from_u32_unchecked(b as u32) },
1552        b => b,
1553    };
1554
1555    // Multibyte case follows
1556    // Decode from a byte combination out of: [x [y [z w]]]
1557    let mut ch;
1558    // SAFETY: `bytes` contains UTF-8-like string data so we have the next byte,
1559    let z = bytes[end - 1];
1560    ch = utf8_first_byte(z, 2);
1561    if utf8_is_cont_byte(z) {
1562        // SAFETY: `bytes` contains UTF-8-like string data so we have the next byte,
1563        let y = bytes[end - 2];
1564        ch = utf8_first_byte(y, 3);
1565        if utf8_is_cont_byte(y) {
1566            // SAFETY: `bytes` contains UTF-8-like string data so we have the next byte,
1567            let x = bytes[end - 3];
1568            ch = utf8_first_byte(x, 4);
1569            ch = utf8_acc_cont_byte(ch, y);
1570        }
1571        ch = utf8_acc_cont_byte(ch, z);
1572    }
1573    ch = utf8_acc_cont_byte(ch, w);
1574
1575    // SAFETY: we know that ch is valid at this point
1576    unsafe { char::from_u32_unchecked(ch) }
1577}
1578
1579#[cfg(test)]
1580mod tests {
1581    use super::*;
1582    use simple_test_case::test_case;
1583
1584    fn debug_buffer_content(gb: &GapBuffer) -> String {
1585        let mut v = gb.data.to_vec();
1586        v[gb.gap_start..gb.gap_end].copy_from_slice("_".repeat(gb.gap()).as_bytes());
1587        String::from_utf8(v).expect("valid utf8")
1588    }
1589
1590    fn raw_debug_buffer_content(gb: &GapBuffer) -> String {
1591        let mut v = gb.data.to_vec();
1592        for b in v[gb.gap_start..gb.gap_end].iter_mut() {
1593            if *b == b'\0' {
1594                *b = b'_';
1595            }
1596        }
1597        v.insert(gb.gap_end, b']');
1598        v.insert(gb.gap_start, b'[');
1599
1600        String::from_utf8(v).expect("valid utf8")
1601    }
1602
1603    #[test]
1604    fn from_string_matches_from_str() {
1605        let s = "this is a test";
1606        let gb1 = GapBuffer::from(s.to_string());
1607        let gb2 = GapBuffer::from(s);
1608
1609        assert_eq!(gb1, gb2);
1610    }
1611
1612    #[test]
1613    fn to_string_works() {
1614        let s = "this is a test";
1615        let gb = GapBuffer::from(s.to_string());
1616        assert_eq!(gb.to_string(), s);
1617    }
1618
1619    #[test]
1620    fn insert_into_empty_string_initial_gb_works() {
1621        let mut gb = GapBuffer::from(String::new());
1622        gb.insert_char(0, 'a');
1623        gb.shred_gap();
1624
1625        assert_eq!(gb.to_string(), "a");
1626    }
1627
1628    #[test_case("foo│foo│foo"; "interleaved multibyte and ascii")]
1629    #[test_case("hello, 世界!"; "blocks of multibyte and ascii")]
1630    #[test_case("hello, world!"; "just ascii")]
1631    #[test]
1632    fn len_chars_works(s: &str) {
1633        let mut gb = GapBuffer::from(s);
1634        let len_s = s.chars().count();
1635
1636        println!("initial:          {:?}", raw_debug_buffer_content(&gb));
1637        assert_eq!(gb.len_chars(), len_s);
1638        assert_eq!(
1639            gb.len_chars(),
1640            gb.to_string().chars().count(),
1641            "char iter len != len_chars"
1642        );
1643        assert_eq!(
1644            gb.line(0).chars().count(),
1645            gb.line_len_chars(0),
1646            "line_len_chars != len_chars"
1647        );
1648
1649        gb.insert_char(5, 'X');
1650        gb.shred_gap();
1651
1652        println!("after insert X:   {:?}", raw_debug_buffer_content(&gb));
1653        assert_eq!(gb.len_chars(), len_s + 1);
1654        assert_eq!(
1655            gb.len_chars(),
1656            gb.to_string().chars().count(),
1657            "char iter len != len_chars"
1658        );
1659
1660        gb.insert_char(3, '界');
1661        gb.shred_gap();
1662        println!("after insert 界:  {:?}", raw_debug_buffer_content(&gb));
1663        assert_eq!(gb.len_chars(), len_s + 2);
1664        assert_eq!(
1665            gb.len_chars(),
1666            gb.to_string().chars().count(),
1667            "char iter len != len_chars"
1668        );
1669
1670        assert_eq!(gb.char(3), '界');
1671        gb.remove_char(3);
1672        gb.shred_gap();
1673        println!("after remove 界:  {:?}", raw_debug_buffer_content(&gb));
1674        assert_eq!(gb.len_chars(), len_s + 1);
1675        assert_eq!(
1676            gb.len_chars(),
1677            gb.to_string().chars().count(),
1678            "char iter len != len_chars"
1679        );
1680
1681        assert_eq!(gb.char(5), 'X');
1682        gb.remove_char(5);
1683        gb.shred_gap();
1684        println!("after remove X:   {:?}", debug_buffer_content(&gb));
1685        assert_eq!(gb.len_chars(), len_s);
1686        assert_eq!(
1687            gb.len_chars(),
1688            gb.to_string().chars().count(),
1689            "char iter len != len_chars"
1690        );
1691        assert_eq!(gb.to_string(), s);
1692    }
1693
1694    #[test_case(0, 14; "first line")]
1695    #[test_case(1, 13; "second line")]
1696    #[test_case(2, 14; "last line no trailing newline")]
1697    #[test]
1698    fn line_len_chars_works(line_idx: usize, expected: usize) {
1699        let gb = GapBuffer::from("hello, world!\nhow are you?\nthis is a test");
1700
1701        assert_eq!(gb.line_len_chars(line_idx), expected);
1702    }
1703
1704    #[test_case("foo│foo│foo"; "interleaved multibyte and ascii")]
1705    #[test_case("hello, 世界!"; "blocks of multibyte and ascii")]
1706    #[test_case("hello, world!"; "just ascii")]
1707    #[test]
1708    fn move_gap_to_maintains_content(s: &str) {
1709        let mut gb = GapBuffer::from(s);
1710
1711        for i in 0..gb.len_chars() {
1712            let idx = gb.char_to_byte(i);
1713            gb.move_gap_to(idx);
1714            gb.shred_gap();
1715
1716            // Splitting into the two sections like this allows us to verify that
1717            // we have valid utf-8 encoded text on either side of the gap.
1718            let (s1, s2) = (
1719                std::str::from_utf8(&gb.data[..gb.gap_start]).unwrap(),
1720                std::str::from_utf8(&gb.data[gb.gap_end..]).unwrap(),
1721            );
1722
1723            assert_eq!(format!("{s1}{s2}"), s, "idx={idx}");
1724        }
1725    }
1726
1727    #[test]
1728    fn move_gap_to_maintains_line_content() {
1729        let s = "hello, world!\nhow are you?\nthis is a test";
1730        let mut gb = GapBuffer::from(s);
1731        assert_eq!(gb.len_lines(), 3);
1732
1733        for i in 0..gb.len_chars() {
1734            let idx = gb.char_to_byte(i);
1735            gb.move_gap_to(idx);
1736            gb.shred_gap();
1737            assert_eq!(gb.len_lines(), 3);
1738
1739            assert_eq!(gb.line(0).to_string(), "hello, world!\n", "idx={idx}");
1740            assert_eq!(gb.line(1).to_string(), "how are you?\n", "idx={idx}");
1741            assert_eq!(gb.line(2).to_string(), "this is a test", "idx={idx}");
1742        }
1743    }
1744
1745    #[test_case(0, 0, 0; "BOF cur at BOF")]
1746    #[test_case(27, 0, 0; "BOF cur at EOF")]
1747    #[test_case(27, 5, 5; "in the buffer cur at EOF")]
1748    #[test_case(5, 5, 5; "in the buffer cur at gap")]
1749    #[test_case(5, 3, 3; "in the buffer cur before gap")]
1750    #[test_case(5, 11, 15; "in the buffer cur after gap")]
1751    #[test_case(5, 7, 7; "multi byte 1")]
1752    #[test_case(5, 8, 10; "multi byte 2")]
1753    #[test]
1754    fn char_to_byte_works(cur: usize, char_idx: usize, expected: usize) {
1755        let s = "hello, 世界!\nhow are you?";
1756        let mut gb = GapBuffer::from(s);
1757        assert_eq!(s.len(), 27, "EOF case is not 0..s.len()");
1758        assert_eq!("世".len(), 3);
1759        gb.move_gap_to(cur);
1760        gb.shred_gap();
1761
1762        let byte_idx = gb.char_to_byte(char_idx);
1763        assert_eq!(byte_idx, expected, "{:?}", debug_buffer_content(&gb));
1764    }
1765
1766    // Regression test for (gh#140)
1767    #[test_case("⍉"; "one multi-byte char")]
1768    #[test_case("⌠⌖"; "two multi-byte chars")]
1769    #[test_case("؏ foo"; "one multi-byte char followed by ascii")]
1770    #[test_case("世界 abc"; "two multi-byte chars followed by ascii")]
1771    #[test]
1772    fn char_to_byte_works_with_leading_multibyte_char(s: &str) {
1773        let mut gb = GapBuffer::new();
1774        gb.insert_str(0, s);
1775        gb.shred_gap();
1776
1777        let byte_idx = gb.char_to_byte(0);
1778        assert_eq!(byte_idx, 0, "{:?}", debug_buffer_content(&gb));
1779    }
1780
1781    // Regression test for (gh#140)
1782    #[test_case("⍉", 0; "one multi-byte char")]
1783    #[test_case("⌠⌖", 3; "two multi-byte chars")]
1784    #[test_case("foo ؏", 4; "ascii followed by one multi-byte char")]
1785    #[test_case("abc 世界", 7; "ascii followed by two multi-byte chars")]
1786    #[test_case("Hello, world!⍉", 13; "error case from issue 140")]
1787    #[test]
1788    fn char_to_byte_works_with_single_line_buffers_ending_in_a_multibyte_char(
1789        s: &str,
1790        expected: usize,
1791    ) {
1792        let mut gb = GapBuffer::new();
1793        gb.insert_str(0, s);
1794        gb.shred_gap();
1795        let last_char = s.chars().count() - 1;
1796
1797        let byte_idx = gb.char_to_byte(last_char);
1798        assert_eq!(byte_idx, expected, "{:?}", debug_buffer_content(&gb));
1799    }
1800
1801    #[test_case(0, 0, 64, 'h'; "BOF cur at BOF")]
1802    #[test_case(27, 0, 0, 'h'; "BOF cur at EOF")]
1803    #[test_case(27, 5, 5, ','; "in the buffer cur at EOF")]
1804    #[test_case(5, 5, 69, ','; "in the buffer cur at gap")]
1805    #[test_case(5, 3, 3, 'l'; "in the buffer cur after gap")]
1806    #[test_case(5, 11, 79, 'h'; "in the buffer cur before gap")]
1807    #[test_case(5, 7, 71, '世'; "multi byte 1")]
1808    #[test_case(5, 8, 74, '界'; "multi byte 2")]
1809    #[test]
1810    fn char_to_raw_byte_works(cur: usize, char_idx: usize, expected: usize, expected_ch: char) {
1811        let s = "hello, 世界!\nhow are you?";
1812        let mut gb = GapBuffer::from(s);
1813        assert_eq!(s.len(), 27, "EOF case is not 0..s.len()");
1814        assert_eq!("世".len(), 3);
1815        gb.move_gap_to(cur);
1816        gb.shred_gap();
1817
1818        let byte_idx = gb.char_to_raw_byte(char_idx);
1819        assert_eq!(byte_idx, expected, "{:?}", debug_buffer_content(&gb));
1820
1821        // SAFETY: safe if the test is passing
1822        let ch = unsafe { decode_char_at(byte_idx, &gb.data) };
1823        assert_eq!(ch, expected_ch);
1824    }
1825
1826    #[test_case("hello, world! this is the last line"; "ascii no newline")]
1827    #[test_case("hello, world!\nthis is the last line"; "ascii single newline")]
1828    #[test_case("foo│foo│foo"; "mixed width no newlines")]
1829    #[test_case("hello, 世界!\nhow are you?"; "mixed width single newline")]
1830    #[test]
1831    fn byte_to_char_works(s: &str) {
1832        let mut gb = GapBuffer::from(s);
1833
1834        for pos in 0..gb.len_chars() - 1 {
1835            gb.move_gap_to(gb.char_to_byte(pos));
1836            gb.shred_gap();
1837
1838            for (ch_idx, (byte_idx, _ch)) in s.char_indices().enumerate() {
1839                let idx = gb.byte_to_char(byte_idx);
1840                assert_eq!(idx, ch_idx, "gap@{pos}");
1841            }
1842        }
1843    }
1844
1845    #[test_case(0, 0, "hello, world!\n"; "first line cur at BOF")]
1846    #[test_case(0, 1, "how are you?"; "second line cur at BOF")]
1847    #[test_case(26, 0, "hello, world!\n"; "first line cur at EOF")]
1848    #[test_case(26, 1, "how are you?"; "second line cur at EOF")]
1849    #[test_case(10, 0, "hello, world!\n"; "first line cur in line")]
1850    #[test_case(10, 1, "how are you?"; "second line cur in line")]
1851    #[test]
1852    fn slice_to_string_works(cur: usize, line: usize, expected: &str) {
1853        let mut gb = GapBuffer::from("hello, world!\nhow are you?");
1854        gb.move_gap_to(cur);
1855        gb.shred_gap();
1856
1857        assert_eq!(gb.line(line).to_string(), expected);
1858    }
1859
1860    #[test]
1861    fn line_to_char_works() {
1862        let l1 = "hello, world!\n";
1863        let l2 = "how are you?\n";
1864        let l3 = "this is a test";
1865
1866        let gb = GapBuffer::from(format!("{l1}{l2}{l3}"));
1867
1868        assert_eq!(gb.line_to_char(0), 0);
1869        assert_eq!(gb.line_to_char(1), l1.chars().count());
1870        assert_eq!(gb.line_to_char(2), l1.chars().count() + l2.chars().count());
1871    }
1872
1873    #[test_case(0, 0; "start of first line")]
1874    #[test_case(5, 0; "in first line")]
1875    #[test_case(13, 0; "end of first line")]
1876    #[test_case(14, 1; "start of second line")]
1877    #[test_case(20, 1; "in second line")]
1878    #[test_case(26, 1; "end of second line")]
1879    #[test_case(27, 2; "start of third line")]
1880    #[test_case(30, 2; "in third line")]
1881    #[test_case(40, 2; "end of third line")]
1882    #[test]
1883    fn char_to_line_works(char_idx: usize, line_idx: usize) {
1884        let l1 = "hello, world!\n";
1885        let l2 = "how are you?\n";
1886        let l3 = "this is a test";
1887
1888        let gb = GapBuffer::from(format!("{l1}{l2}{l3}"));
1889
1890        assert_eq!(gb.char_to_line(char_idx), line_idx);
1891    }
1892
1893    #[test_case(&[(0, 'h')], "hello world"; "insert front")]
1894    #[test_case(&[(4, ',')], "ello, world"; "insert inner")]
1895    #[test_case(&[(10, '!')], "ello world!"; "insert back")]
1896    #[test_case(&[(4, ','), (11, '!')], "ello, world!"; "insert inner then back")]
1897    #[test_case(&[(4, ','), (0, 'h')], "hello, world"; "insert inner then front")]
1898    #[test_case(&[(0, 'h'), (5, ','),], "hello, world"; "insert front then inner")]
1899    #[test_case(&[(10, '!'), (0, 'h'), (5, ',')], "hello, world!"; "insert all")]
1900    #[test]
1901    fn insert_char(inserts: &[(usize, char)], expected: &str) {
1902        let mut gb = GapBuffer::from("ello world");
1903
1904        for &(idx, ch) in inserts {
1905            gb.insert_char(idx, ch);
1906            gb.shred_gap();
1907        }
1908
1909        assert_eq!(gb.to_string(), expected, "{:?}", debug_buffer_content(&gb))
1910    }
1911
1912    #[test]
1913    fn insert_char_with_moving_cur() {
1914        let mut gb = GapBuffer::from("hello ");
1915        gb.insert_char(6, 'w');
1916        gb.shred_gap();
1917        gb.insert_char(7, 'o');
1918        gb.shred_gap();
1919        gb.insert_char(8, 'r');
1920        gb.shred_gap();
1921        gb.insert_char(9, 'l');
1922        gb.shred_gap();
1923        gb.insert_char(10, 'd');
1924        gb.shred_gap();
1925        gb.insert_char(11, '!');
1926        gb.shred_gap();
1927        gb.insert_char(5, ',');
1928        gb.shred_gap();
1929
1930        assert_eq!(
1931            gb.to_string(),
1932            "hello, world!",
1933            "{:?}",
1934            debug_buffer_content(&gb)
1935        )
1936    }
1937
1938    #[test]
1939    fn insert_newline_char_is_tracked_correctly() {
1940        let s = "hello, world!\nhow are you?";
1941        let mut gb = GapBuffer::from(s);
1942        assert_eq!(gb.len_lines(), 2);
1943
1944        println!("initial: {:?}", raw_debug_buffer_content(&gb));
1945        gb.insert_char(6, '\n');
1946        gb.shred_gap();
1947        println!("insert:  {:?}", raw_debug_buffer_content(&gb));
1948
1949        assert_eq!(gb.len_lines(), 3);
1950        assert_eq!(gb.line(0).to_string(), "hello,\n");
1951        assert_eq!(gb.line(1).to_string(), " world!\n");
1952        assert_eq!(gb.line(2).to_string(), "how are you?");
1953
1954        for idx in 0..=gb.len_chars() {
1955            gb.move_gap_to(idx);
1956            gb.shred_gap();
1957            assert_eq!(gb.len_lines(), 3);
1958
1959            assert_eq!(gb.line(0).to_string(), "hello,\n", "idx={idx}");
1960            assert_eq!(gb.line(1).to_string(), " world!\n", "idx={idx}");
1961            assert_eq!(gb.line(2).to_string(), "how are you?", "idx={idx}");
1962        }
1963    }
1964
1965    #[test_case(&[(0, "hell")], "helloworl"; "insert front")]
1966    #[test_case(&[(1, ", ")], "o, worl"; "insert inner")] // typos:ignore
1967    #[test_case(&[(5, "d!")], "oworld!"; "insert back")]
1968    #[test_case(&[(5, "d!"), (0, "hell"), (5, ", ")], "hello, world!"; "insert all")]
1969    #[test_case(&[(5, "d!"), (0, "hell"), (5, ",\n")], "hello,\nworld!"; "insert all w newline")]
1970    #[test]
1971    fn insert_str(inserts: &[(usize, &str)], expected: &str) {
1972        let mut gb = GapBuffer::from("oworl");
1973        for &(idx, s) in inserts {
1974            gb.insert_str(idx, s);
1975            gb.shred_gap();
1976        }
1977
1978        assert_eq!(gb.to_string(), expected, "{:?}", debug_buffer_content(&gb))
1979    }
1980
1981    #[test]
1982    fn insert_newline_in_str_is_tracked_correctly() {
1983        let s = "hello, world!\nhow are you?";
1984        let mut gb = GapBuffer::from(s);
1985        assert_eq!(gb.len_lines(), 2);
1986
1987        let s2 = " sailor\nisn't this fun?\nwhat a wonderful\n";
1988        gb.insert_str(6, s2);
1989        gb.shred_gap();
1990
1991        for idx in 0..=gb.len_chars() {
1992            gb.move_gap_to(idx);
1993            gb.shred_gap();
1994            assert_eq!(gb.len_lines(), 5);
1995
1996            assert_eq!(gb.line(0).to_string(), "hello, sailor\n", "idx={idx}");
1997            assert_eq!(gb.line(1).to_string(), "isn't this fun?\n", "idx={idx}");
1998            assert_eq!(gb.line(2).to_string(), "what a wonderful\n", "idx={idx}");
1999            assert_eq!(gb.line(3).to_string(), " world!\n", "idx={idx}");
2000            assert_eq!(gb.line(4).to_string(), "how are you?", "idx={idx}");
2001        }
2002    }
2003
2004    #[test_case(6, "hello,world!"; "at gap start")]
2005    #[test_case(7, "hello, orld!"; "at gap end")]
2006    #[test_case(12, "hello, world"; "after gap")]
2007    #[test_case(0, "ello, world!"; "before gap")]
2008    #[test]
2009    fn remove_char(idx: usize, expected: &str) {
2010        let mut gb = GapBuffer::from("hello, world!");
2011        gb.move_gap_to(6); // space before world
2012        gb.shred_gap();
2013        gb.remove_char(idx);
2014        gb.shred_gap();
2015
2016        assert_eq!(gb.to_string(), expected, "{:?}", debug_buffer_content(&gb))
2017    }
2018
2019    #[test]
2020    fn remove_newline_char_is_tracked_correctly() {
2021        let s = "hello, world!\nhow are you?";
2022        let mut gb = GapBuffer::from(s);
2023        assert_eq!(gb.len_lines(), 2);
2024
2025        gb.remove_char(13);
2026        gb.shred_gap();
2027
2028        assert_eq!(gb.len_lines(), 1);
2029        assert_eq!(gb.line(0).to_string(), "hello, world!how are you?");
2030    }
2031
2032    #[test_case(6, 9, "hello,rld!"; "at gap start")]
2033    #[test_case(7, 10, "hello, ld!"; "at gap end")]
2034    #[test_case(10, 13, "hello, wor"; "after gap")]
2035    #[test_case(0, 5, ", world!"; "before gap")]
2036    #[test_case(0, 13, ""; "remove all")]
2037    #[test]
2038    fn remove_range_works(from: usize, to: usize, expected: &str) {
2039        let s = "hello, world!";
2040        assert_eq!(s.len(), 13, "remove all case is not 0..s.len()");
2041
2042        let mut gb = GapBuffer::from(s);
2043        gb.move_gap_to(6); // space before world
2044        gb.shred_gap();
2045        gb.remove_range(from, to);
2046        gb.shred_gap();
2047
2048        assert_eq!(gb.to_string(), expected, "{:?}", debug_buffer_content(&gb))
2049    }
2050
2051    #[test]
2052    fn remove_range_w_multibyte_chars_works() {
2053        let s = "foo│foo│foo";
2054        let mut gb = GapBuffer::from(s);
2055
2056        gb.remove_range(0, 3);
2057        gb.shred_gap();
2058        assert_eq!(gb.to_string(), "│foo│foo");
2059        assert_eq!(gb.len_chars(), 8);
2060
2061        gb.remove_range(1, 4);
2062        gb.shred_gap();
2063        assert_eq!(gb.to_string(), "││foo");
2064        assert_eq!(gb.len_chars(), 5);
2065
2066        gb.remove_range(2, 5);
2067        gb.shred_gap();
2068        assert_eq!(gb.to_string(), "││");
2069        assert_eq!(gb.len_chars(), 2);
2070    }
2071
2072    #[test]
2073    fn remove_range_for_last_line_works() {
2074        let s = "hello, world!\nthis is the last line";
2075        let mut gb = GapBuffer::from(s);
2076        gb.remove_range(14, s.len());
2077        gb.shred_gap();
2078        assert_eq!(gb.to_string(), "hello, world!\n");
2079        assert_eq!(gb.len_lines(), 2);
2080    }
2081
2082    #[test_case(10, 15, "hello, worow are you?"; "spanning newline")]
2083    #[test_case(7, 14, "hello, how are you?"; "ending on newline")]
2084    #[test_case(13, 26, "hello, world!"; "starting on newline")]
2085    #[test]
2086    fn remove_newline_in_str_is_tracked_correctly(from: usize, to: usize, expected: &str) {
2087        let s = "hello, world!\nhow are you?";
2088        let mut gb = GapBuffer::from(s);
2089        assert_eq!(gb.len_lines(), 2);
2090
2091        gb.remove_range(from, to);
2092        gb.shred_gap();
2093
2094        assert_eq!(gb.len_lines(), 1);
2095        assert_eq!(gb.to_string(), expected);
2096        assert_eq!(gb.line(0).to_string(), expected);
2097    }
2098
2099    #[test_case('X'; "ascii")]
2100    #[test_case('界'; "multi-byte")]
2101    #[test]
2102    fn insert_remove_char_is_idempotent(ch: char) {
2103        let s = "hello, world!";
2104        let mut gb = GapBuffer::from(s);
2105        gb.insert_char(6, ch);
2106        gb.shred_gap();
2107        gb.remove_char(6);
2108        gb.shred_gap();
2109
2110        assert_eq!(gb.to_string(), s, "{:?}", debug_buffer_content(&gb))
2111    }
2112
2113    #[test_case("TEST", 1; "without trailing newline")]
2114    #[test_case("TEST\n", 2; "with trailing newline")]
2115    #[test_case("TEST\nTEST", 2; "with internal newline")]
2116    #[test]
2117    fn insert_remove_str_is_idempotent(edit: &str, expected_lines: usize) {
2118        let s = "hello, world!";
2119        let mut gb = GapBuffer::from(s);
2120
2121        println!("initial: {:?}", raw_debug_buffer_content(&gb));
2122        for n in 0..gb.len_lines() {
2123            println!("{:?}", gb.line(n).to_string());
2124        }
2125
2126        gb.insert_str(6, edit);
2127        gb.shred_gap();
2128        assert_eq!(gb.len_lines(), expected_lines);
2129        println!("insert:  {:?}", raw_debug_buffer_content(&gb));
2130        for n in 0..gb.len_lines() {
2131            println!("{:?}", gb.line(n).to_string());
2132        }
2133
2134        gb.remove_range(6, 6 + edit.len());
2135        gb.shred_gap();
2136        println!("remove:  {:?}", raw_debug_buffer_content(&gb));
2137        for n in 0..gb.len_lines() {
2138            println!("{:?}", gb.line(n).to_string());
2139        }
2140
2141        assert_eq!(gb.to_string(), s);
2142    }
2143
2144    #[test]
2145    fn chars_work() {
2146        let s1 = "hello, world!\n";
2147        let s2 = "how are you?";
2148        let gb = GapBuffer::from(format!("{s1}{s2}"));
2149
2150        let l1_chars: String = gb.line(0).chars().collect();
2151        assert_eq!(l1_chars, s1);
2152
2153        let l2_chars: String = gb.line(1).chars().collect();
2154        assert_eq!(l2_chars, s2);
2155    }
2156
2157    #[test_case(
2158        "hello, 世界!", false,
2159        &[(0, 'h'), (1, 'e'), (2, 'l'), (3, 'l'), (4, 'o'),
2160          (5, ','), (6, ' '), (7, '世'), (8, '界'), (9, '!')];
2161        "multi-byte block forward"
2162    )]
2163    #[test_case(
2164        "hello, 世界!", true,
2165        &[(9, '!'), (8, '界'), (7, '世'), (6, ' '), (5, ','),
2166          (4, 'o'), (3, 'l'), (2, 'l'), (1, 'e'), (0, 'h')];
2167        "multi-byte block reversed"
2168    )]
2169    #[test_case(
2170        "foo│foo│foo", false,
2171        &[(0, 'f'), (1, 'o'), (2, 'o'), (3, '│'), (4, 'f'), (5, 'o'),
2172          (6, 'o'), (7, '│'), (8, 'f'), (9, 'o'), (10, 'o')];
2173        "interleaved forward"
2174    )]
2175    #[test_case(
2176        "foo│foo│foo", true,
2177        &[(10, 'o'), (9, 'o'), (8, 'f'), (7, '│'), (6, 'o'), (5, 'o'),
2178          (4, 'f'), (3, '│'), (2, 'o'), (1, 'o'), (0, 'f')];
2179        "interleaved reversed"
2180    )]
2181    #[test]
2182    fn indexed_chars_works(s: &str, rev: bool, expected: &[(usize, char)]) {
2183        let mut gb = GapBuffer::from(s);
2184        let v: Vec<(usize, char)> = gb.line(0).indexed_chars(0, rev).collect();
2185        assert_eq!(&v, expected);
2186
2187        for i in 0..gb.len_chars() {
2188            let idx = gb.char_to_byte(i);
2189            gb.move_gap_to(idx);
2190            gb.shred_gap();
2191
2192            let v: Vec<(usize, char)> = gb.line(0).indexed_chars(0, rev).collect();
2193            assert_eq!(&v, expected, "idx={idx}");
2194        }
2195    }
2196
2197    #[test_case("foo│foo│foo"; "interleaved multibyte and ascii")]
2198    #[test_case("hello, 世界!"; "blocks of multibyte and ascii")]
2199    #[test]
2200    fn chars_works(s: &str) {
2201        let mut gb = GapBuffer::from(s);
2202        let chars: String = gb.line(0).chars().collect();
2203        assert_eq!(chars, s);
2204
2205        for i in 0..gb.len_chars() {
2206            let idx = gb.char_to_byte(i);
2207            gb.move_gap_to(idx);
2208            gb.shred_gap();
2209
2210            let chars: String = gb.line(0).chars().collect();
2211            assert_eq!(chars, s, "idx={idx}");
2212        }
2213    }
2214
2215    #[test]
2216    fn slice_works() {
2217        let mut gb = GapBuffer::from("hello, world!\nhow are you?");
2218        let slice = Slice::from_raw_offsets(0, gb.cap, &gb);
2219        let (s1, s2) = slice.as_strs();
2220        assert_eq!(s1, "");
2221        assert_eq!(s2, "hello, world!\nhow are you?");
2222
2223        let slice = gb.slice(6, 17);
2224        let (s1, s2) = slice.as_strs();
2225        assert_eq!(s1, " world!\nhow");
2226        assert_eq!(s2, "");
2227
2228        gb.move_gap_to(12);
2229        gb.shred_gap();
2230        println!("after move:  {:?}", raw_debug_buffer_content(&gb));
2231
2232        let slice = gb.slice(6, 17);
2233        let (s1, s2) = slice.as_strs();
2234        assert_eq!(s1, " world");
2235        assert_eq!(s2, "!\nhow");
2236    }
2237
2238    #[test]
2239    fn null_slice_is_empty() {
2240        let mut gb = GapBuffer::from("hello, world!\nhow are you?");
2241
2242        for i in 0..gb.len_chars() {
2243            let idx = gb.char_to_byte(i);
2244            gb.move_gap_to(idx);
2245            gb.shred_gap();
2246
2247            let slice = gb.slice(0, 0);
2248
2249            assert!(slice.left.is_empty(), "{i} slice left: {:?}", slice.left);
2250            assert!(slice.right.is_empty(), "{i} slice right: {:?}", slice.right);
2251        }
2252    }
2253
2254    #[test]
2255    fn slice_eq_str_works() {
2256        let mut gb = GapBuffer::from("hello, world!\nhow are you?");
2257        gb.move_gap_to(3);
2258        gb.shred_gap();
2259        let slice = gb.slice(0, 5);
2260        assert_eq!(slice, "hello");
2261    }
2262
2263    #[test]
2264    fn chars_in_raw_range_works() {
2265        let mut gb = GapBuffer::from("hello, world!\nhow are you?");
2266        let char_from = 7;
2267        let char_to = 12;
2268
2269        for i in 0..gb.len_chars() {
2270            let idx = gb.char_to_byte(i);
2271            gb.move_gap_to(idx);
2272            gb.shred_gap();
2273
2274            let byte_from = gb.char_to_raw_byte(char_from);
2275            let byte_to = gb.char_to_raw_byte(char_to);
2276            let n_chars = gb.chars_in_raw_range(byte_from, byte_to);
2277            assert_eq!(n_chars, char_to - char_from, "gap at {i}");
2278
2279            let n_chars = gb.chars_in_raw_range(0, gb.char_to_raw_byte(gb.n_chars));
2280            assert_eq!(n_chars, gb.n_chars, "gap at {i}");
2281        }
2282    }
2283
2284    #[test_case(0, "\n", 2; "insert newline at start")]
2285    #[test_case(6, "\n", 2; "insert newline at end of first line")]
2286    #[test_case(3, "\n", 2; "insert newline in middle of line")]
2287    #[test_case(0, "new\n", 2; "insert line at start")]
2288    #[test_case(6, "\nnew", 2; "insert line at end")]
2289    #[test_case(3, "X\nY\n", 3; "insert multiple newlines in middle")]
2290    #[test_case(0, "a\nb\nc\n", 4; "insert multiple lines at start")]
2291    #[test_case(6, "\na\nb\nc", 4; "insert multiple lines at end")]
2292    #[test]
2293    fn insert_str_tracks_line_endings_correctly(at: usize, insert: &str, expected_lines: usize) {
2294        let s = "line 1";
2295        let mut gb = GapBuffer::from(s);
2296        assert_eq!(gb.len_lines(), 1);
2297
2298        gb.insert_str(at, insert);
2299        gb.shred_gap();
2300
2301        assert_eq!(gb.len_lines(), expected_lines);
2302    }
2303
2304    #[test_case(0, 7, "line 2\nline 3", 2; "remove first line from start")]
2305    #[test_case(0, 14, "line 3", 1; "remove first two lines from start")]
2306    #[test_case(7, 14, "line 1\nline 3", 2; "remove middle line")]
2307    #[test_case(14, 20, "line 1\nline 2\n", 3; "remove last line")]
2308    #[test_case(6, 14, "line 1line 3", 1; "remove spanning first newline")]
2309    #[test]
2310    fn remove_range_tracks_line_endings_correctly(
2311        from: usize,
2312        to: usize,
2313        expected: &str,
2314        expected_lines: usize,
2315    ) {
2316        let s = "line 1\nline 2\nline 3";
2317        let mut gb = GapBuffer::from(s);
2318        assert_eq!(gb.len_lines(), 3);
2319
2320        gb.remove_range(from, to);
2321        gb.shred_gap();
2322
2323        assert_eq!(gb.to_string(), expected);
2324        assert_eq!(gb.len_lines(), expected_lines);
2325    }
2326
2327    fn _insert_chars(gb: &mut GapBuffer, s: &str) {
2328        for (idx, ch) in s.chars().enumerate() {
2329            gb.insert_char(idx + 4, ch);
2330            gb.shred_gap();
2331        }
2332    }
2333
2334    fn _insert_str(gb: &mut GapBuffer, s: &str) {
2335        gb.insert_str(4, s);
2336        gb.shred_gap();
2337    }
2338
2339    #[test_case(_insert_chars; "individual chars")]
2340    #[test_case(_insert_str; "whole string")]
2341    #[test]
2342    fn insert_with_multibyte_chars_preserves_line_endings(insert: fn(&mut GapBuffer, &str)) {
2343        let slice_str = |gb: &GapBuffer| gb.slice(0, gb.len_chars()).to_string();
2344
2345        let mut gb = GapBuffer::from("foo\nbar\nbaz\n");
2346        let s = "世\n界 🦊\n ";
2347
2348        insert(&mut gb, s);
2349
2350        assert_eq!(slice_str(&gb), "foo\n世\n界 🦊\n bar\nbaz\n");
2351
2352        assert_eq!(gb.char(8), '🦊');
2353        gb.remove_char(8);
2354        gb.shred_gap();
2355
2356        assert_eq!(slice_str(&gb), "foo\n世\n界 \n bar\nbaz\n");
2357    }
2358
2359    #[test]
2360    fn char_works_with_multibyte_characters() {
2361        let s = "世\n界 🦊\n ";
2362        let gb = GapBuffer::from(s);
2363
2364        for (idx, ch) in s.chars().enumerate() {
2365            assert_eq!(gb.char(idx), ch);
2366        }
2367    }
2368
2369    #[test]
2370    fn char_to_raw_byte_line_end_with_no_newlines() {
2371        let mut gb =
2372            GapBuffer::from("// does it need to be a doc comment? that is a long enough line to");
2373        gb.move_gap_to(0);
2374        gb.shred_gap();
2375        assert_eq!(gb.char_to_raw_byte(65), 129);
2376        gb.move_gap_to(10);
2377        gb.shred_gap();
2378        assert_eq!(gb.char_to_raw_byte(65), 129);
2379        gb.move_gap_to(66);
2380        gb.shred_gap();
2381        assert_eq!(gb.char_to_raw_byte(65), 65);
2382        gb.insert_char(66, '\n');
2383        gb.shred_gap();
2384        assert_eq!(
2385            gb.to_string(),
2386            "// does it need to be a doc comment? that is a long enough line to\n"
2387        );
2388    }
2389
2390    #[test_case(0, "", "foo bar"; "gap at start")]
2391    #[test_case(3, "foo", " bar"; "gap in between")]
2392    #[test]
2393    fn as_strs_works(byte_idx: usize, left: &str, right: &str) {
2394        let mut gb = GapBuffer::from("foo bar");
2395        gb.move_gap_to(byte_idx);
2396        gb.shred_gap();
2397
2398        let (l, r) = gb.as_strs();
2399
2400        assert_eq!((l, r), (left, right));
2401    }
2402
2403    #[test_case(0; "gap at start")]
2404    #[test_case(3; "gap in between")]
2405    #[test]
2406    fn as_str_works(byte_idx: usize) {
2407        let mut gb = GapBuffer::from("foo bar");
2408        gb.move_gap_to(byte_idx);
2409        gb.shred_gap();
2410
2411        let s = gb.as_str();
2412
2413        assert_eq!(s, "foo bar");
2414        assert_eq!(gb.gap_start, 0);
2415    }
2416}
ad_editor/buffer/internal.rs

ad_editor/buffer/
internal.rs