ad_editor/buffer/
internal.rs

1//! Internal data structures and helpers for maintaining buffer state
2//!
3//! ### References
4//! - https://www.cs.unm.edu/~crowley/papers/sds.pdf
5//! - http://doc.cat-v.org/plan_9/4th_edition/papers/sam/
6//! - https://www.averylaird.com/programming/piece-table/2018/05/10/insertions-piece-table
7//! - https://www.staff.city.ac.uk/~ross/papers/FingerTree.pdf
8//! - https://nullprogram.com/blog/2017/09/07/
9//! - https://coredumped.dev/2023/08/09/text-showdown-gap-buffers-vs-ropes/
10//! - https://code.visualstudio.com/blogs/2018/03/23/text-buffer-reimplementation
11use std::{
12    cmp::{max, min, Ordering},
13    collections::BTreeMap,
14    fmt,
15};
16
17// The internal data is [u8] so the values here are in terms of bytes
18
19const MIN_GAP: usize = 32;
20const MIN_GAP_GROW: usize = 64;
21const MAX_GAP_GROW: usize = 1024 * 8;
22
23/// For a given buffer length, calculate the new size of the gap we need when reallocating.
24/// This is set to 5% of the length of our data buffer but bounded by MIN_GAP and MAX_GAP.
25#[inline]
26fn clamp_gap_size(len: usize, min_gap: usize) -> usize {
27    min(max(len / 20, min_gap), MAX_GAP_GROW)
28}
29
30#[inline]
31fn count_chars(bytes: &[u8]) -> usize {
32    if bytes.is_empty() {
33        return 0;
34    }
35
36    let mut n_chars = 0;
37    let mut cur = 0;
38
39    while cur < bytes.len() {
40        // SAFETY: we know we are in bounds and that we contain valid utf-8 data
41        let ch = unsafe { decode_char_at(cur, bytes) };
42        cur += ch.len_utf8();
43        n_chars += 1;
44    }
45
46    n_chars
47}
48
49type ByteOffset = usize;
50type CharOffset = usize;
51
52/// An implementation of a gap buffer that tracks internal meta-data to help with accessing
53/// sub-regions of the text such as character ranges and lines.
54#[derive(Default, Debug, Clone, PartialEq, Eq)]
55pub struct GapBuffer {
56    /// the raw data being stored (both buffer content and the gap)
57    data: Box<[u8]>,
58    /// current size of the allocation for data
59    cap: usize,
60    /// byte offset to the first character in the gap
61    gap_start: usize,
62    /// byte offset to the last character in the gap
63    gap_end: usize,
64    /// size in bytes for the next gap when re-allocating
65    next_gap: usize,
66    /// line ending byte offset -> char offset
67    line_endings: BTreeMap<ByteOffset, CharOffset>,
68    /// total number of characters in the buffer
69    /// this is != line_endings.last() if there is no trailing newline
70    n_chars: usize,
71}
72
73fn compute_line_endings(s: &str) -> (usize, BTreeMap<ByteOffset, CharOffset>) {
74    let mut n_chars = 0;
75    let mut line_endings = BTreeMap::new();
76
77    for (line_chars, (idx, ch)) in s.char_indices().enumerate() {
78        n_chars += 1;
79        if ch == '\n' {
80            line_endings.insert(idx, line_chars);
81        }
82    }
83
84    (n_chars, line_endings)
85}
86
87impl From<String> for GapBuffer {
88    fn from(s: String) -> Self {
89        let gap_start = s.len();
90        let next_gap = clamp_gap_size(gap_start, MIN_GAP_GROW);
91        let cap = gap_start + next_gap;
92        let (n_chars, line_endings) = compute_line_endings(&s);
93        let mut v = s.into_bytes();
94        v.resize(cap, 0);
95
96        let mut gb = Self {
97            data: v.into_boxed_slice(),
98            cap,
99            gap_start,
100            gap_end: cap,
101            next_gap,
102            n_chars,
103            line_endings,
104        };
105
106        gb.move_gap_to(0);
107        gb
108    }
109}
110
111impl From<&str> for GapBuffer {
112    fn from(s: &str) -> Self {
113        let gap_start = s.len();
114        let next_gap = clamp_gap_size(gap_start, MIN_GAP_GROW);
115        let cap = gap_start + next_gap;
116        let (n_chars, line_endings) = compute_line_endings(s);
117        let mut v = Vec::with_capacity(cap);
118        v.extend_from_slice(s.as_bytes());
119        v.resize(cap, 0);
120
121        let mut gb = Self {
122            data: v.into_boxed_slice(),
123            cap,
124            gap_start,
125            gap_end: cap,
126            next_gap,
127            n_chars,
128            line_endings,
129        };
130
131        gb.move_gap_to(0);
132        gb
133    }
134}
135
136impl fmt::Display for GapBuffer {
137    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
138        match String::from_utf8(self.bytes()) {
139            Ok(s) => write!(f, "{s}"),
140            Err(_) => Err(fmt::Error),
141        }
142    }
143}
144
145/// One of the most common "hard to find" bugs I encounter around the GapBuffer is detecting
146/// when and where the tracking of line endings becomes corrupted. This macro is called at
147/// points where the line_endings map is modified guarded by #[cfg(test)] so that it does not
148/// affect the performance of the editor when it is in use. It is also called in situations
149/// where we are already panicing in order to check to see if the reason for the panic was
150/// because there is a bug around line endings that the current test suite didn't catch.
151macro_rules! assert_line_endings {
152    ($self:expr) => {{
153        let true_endings: Vec<usize> = $self
154            .data
155            .iter()
156            .enumerate()
157            .filter(|&(i, &b)| b == b'\n' && (i < $self.gap_start || i >= $self.gap_end))
158            .map(|(i, _)| i)
159            .collect();
160        let tracked_line_endings: Vec<usize> = $self.line_endings.keys().copied().collect();
161
162        assert_eq!(
163            tracked_line_endings, true_endings,
164            "incorrect byte positions for line endings with gap_start={} gap_end={}",
165            $self.gap_start, $self.gap_end
166        );
167
168        let true_endings: Vec<usize> = $self
169            .to_string()
170            .chars()
171            .enumerate()
172            .filter(|&(_, c)| c == '\n')
173            .map(|(i, _)| i)
174            .collect();
175        let tracked_line_endings: Vec<usize> = $self.line_endings.values().copied().collect();
176
177        assert_eq!(
178            tracked_line_endings, true_endings,
179            "incorrect character positions for line endings with gap_start={} gap_end={}",
180            $self.gap_start, $self.gap_end
181        );
182    }};
183}
184
185impl GapBuffer {
186    /// Number of bytes in the gap
187    #[inline]
188    fn gap(&self) -> usize {
189        self.gap_end - self.gap_start
190    }
191
192    /// The current length of "active" data in the buffer (i.e. not including the gap)
193    #[inline]
194    pub fn len(&self) -> usize {
195        self.cap - self.gap()
196    }
197
198    /// Whether or not the visible buffer contents are empty or not.
199    /// This can return true while there is "deleted" data in the gap.
200    #[inline]
201    pub fn is_empty(&self) -> bool {
202        self.cap == self.gap()
203    }
204
205    /// The raw content of the buffer
206    pub fn bytes(&self) -> Vec<u8> {
207        let mut v = Vec::with_capacity(self.len());
208        v.extend(&self.data[..self.gap_start]);
209        v.extend(&self.data[self.gap_end..]);
210
211        v
212    }
213
214    /// Iterate over the lines of the buffer
215    pub fn iter_lines(&self) -> impl Iterator<Item = Slice<'_>> {
216        let mut line_idx = 0;
217
218        std::iter::from_fn(move || {
219            if line_idx == self.len_lines() {
220                return None;
221            }
222            let slice = self.line(line_idx);
223            line_idx += 1;
224
225            Some(slice)
226        })
227    }
228
229    /// The number of lines within the buffer
230    #[inline]
231    pub fn len_lines(&self) -> usize {
232        match self.line_endings.last_key_value() {
233            Some((&raw_idx, _)) => {
234                let n = self.line_endings.len();
235                let byte_idx = if raw_idx > self.gap_start {
236                    raw_idx - self.gap_end
237                } else {
238                    raw_idx
239                };
240
241                if byte_idx < self.len() {
242                    n + 1
243                } else {
244                    n
245                }
246            }
247
248            None => 1,
249        }
250    }
251
252    /// The number of characters in the buffer
253    #[inline]
254    pub fn len_chars(&self) -> usize {
255        self.n_chars
256    }
257
258    pub fn byte_line_endings(&self) -> Vec<usize> {
259        let mut endings: Vec<_> = self
260            .line_endings
261            .keys()
262            .map(|i| self.raw_byte_to_byte(*i))
263            .collect();
264        let eob = self.len();
265
266        match endings.last() {
267            Some(&idx) if idx == eob => (),
268            _ => endings.push(eob),
269        }
270
271        endings
272    }
273
274    /// Clear the contents of the buffer.
275    ///
276    /// # Note
277    /// This does not actually zero out the data currently within the buffer or truncate the
278    /// allocation in any way. It simply resets internal state so that it behaves like an empty
279    /// initial buffer.
280    pub fn clear(&mut self) {
281        self.move_gap_to(0);
282        self.gap_end = self.cap;
283        self.line_endings.clear();
284        self.n_chars = 0;
285
286        #[cfg(test)]
287        assert_line_endings!(self);
288    }
289
290    /// The character at the specified character index.
291    ///
292    /// # Panics
293    /// This method will panic if the given character index is out of bounds
294    #[inline]
295    pub fn char(&self, char_idx: usize) -> char {
296        let byte_idx = self.char_to_raw_byte(char_idx);
297
298        // SAFETY: we know that we have valid utf8 data internally
299        unsafe { decode_char_at(byte_idx, &self.data) }
300    }
301
302    /// The character at the specified character index.
303    #[inline]
304    pub fn get_char(&self, char_idx: usize) -> Option<char> {
305        if char_idx < self.n_chars {
306            Some(self.char(char_idx))
307        } else {
308            None
309        }
310    }
311
312    #[inline]
313    fn char_len(&self, byte_idx: usize) -> usize {
314        // SAFETY: we know that we have valid utf8 data internally
315        unsafe { decode_char_at(byte_idx, &self.data) }.len_utf8()
316    }
317
318    /// The requested line as a [Slice].
319    ///
320    /// # Panics
321    /// This method will panic if the given line index is out of bounds
322    #[inline]
323    pub fn line(&self, line_idx: usize) -> Slice<'_> {
324        if line_idx >= self.len_lines() {
325            assert_line_endings!(self);
326            panic!(
327                "line index was {line_idx} but buffer has {} lines",
328                self.len_lines()
329            )
330        }
331
332        let to = match self.line_endings.iter().nth(line_idx) {
333            Some((&idx, _)) => idx + self.char_len(idx),
334            None => self.cap,
335        };
336        let from = if line_idx == 0 {
337            0
338        } else {
339            let idx = *self.line_endings.iter().nth(line_idx - 1).unwrap().0;
340            idx + 1
341        };
342
343        Slice::from_raw_offsets(from, to, self)
344    }
345
346    /// The number of characters in the requested line.
347    ///
348    /// # Panics
349    /// This method will panic if the given line index is out of bounds
350    #[inline]
351    pub fn line_len_chars(&self, line_idx: usize) -> usize {
352        if line_idx >= self.len_lines() {
353            assert_line_endings!(self);
354            panic!(
355                "line index was {line_idx} but buffer has {} lines",
356                self.len_lines()
357            )
358        }
359
360        let chars_to = match self.line_endings.iter().nth(line_idx) {
361            Some((_, &char_idx)) => char_idx + 1,
362            None if line_idx == 0 => return self.n_chars,
363            None => self.n_chars,
364        };
365
366        let chars_from = if line_idx == 0 {
367            0
368        } else {
369            *self.line_endings.iter().nth(line_idx - 1).unwrap().1 + 1
370        };
371
372        chars_to - chars_from
373    }
374
375    /// Primarily intended for supplying contiguous ranges of bytes to tree-sitter when
376    /// parsing. Returns a byte slice from the underlying data buffer without entering
377    /// the gap.
378    pub fn maximal_slice_from_offset(&self, byte_offset: usize) -> &[u8] {
379        if byte_offset > self.len() {
380            &[]
381        } else {
382            let i = self.byte_to_raw_byte(byte_offset);
383            match i.cmp(&self.gap_start) {
384                Ordering::Less => &self.data[i..self.gap_start],
385                Ordering::Equal => &self.data[self.gap_end..],
386                Ordering::Greater => &self.data[i..],
387            }
388        }
389    }
390
391    /// An exclusive range of characters from the buffer
392    pub fn slice_from_byte_offsets(&self, byte_from: usize, byte_to: usize) -> Slice<'_> {
393        let from = self.byte_to_raw_byte(byte_from);
394        let to = self.byte_to_raw_byte(byte_to);
395
396        Slice::from_raw_offsets(from, to, self)
397    }
398
399    /// An exclusive range of characters from the buffer
400    pub fn slice(&self, char_from: usize, char_to: usize) -> Slice<'_> {
401        let from = self.char_to_raw_byte(char_from);
402        let to = self.offset_char_to_raw_byte(char_to, from, char_from);
403
404        Slice::from_raw_offsets(from, to, self)
405    }
406
407    fn chars_in_raw_range(&self, raw_from: usize, raw_to: usize) -> usize {
408        if raw_to <= self.gap_start || raw_from >= self.gap_end {
409            count_chars(&self.data[raw_from..raw_to])
410        } else {
411            count_chars(&self.data[raw_from..self.gap_start])
412                + count_chars(&self.data[self.gap_end..raw_to])
413        }
414    }
415
416    /// Convert a byte index to a character index
417    pub fn raw_byte_to_char(&self, byte_idx: usize) -> usize {
418        self.chars_in_raw_range(0, byte_idx)
419    }
420
421    /// Convert a character index to the index of the line containing it
422    ///
423    /// # Panics
424    /// This method will panic if the given char index is out of bounds
425    pub fn char_to_line(&self, char_idx: usize) -> usize {
426        match self.try_char_to_line(char_idx) {
427            Some(line_idx) => line_idx,
428            None => {
429                assert_line_endings!(self);
430                panic!(
431                    "char index was {char_idx} but the buffer char length is {}",
432                    self.len_chars()
433                );
434            }
435        }
436    }
437
438    /// Convert a character index to the index of the line containing it
439    pub fn try_char_to_line(&self, char_idx: usize) -> Option<usize> {
440        match char_idx.cmp(&self.n_chars) {
441            Ordering::Less => {
442                for (i, &char_offset) in self.line_endings.values().enumerate() {
443                    if char_idx <= char_offset {
444                        return Some(i);
445                    }
446                }
447                Some(self.len_lines() - 1)
448            }
449
450            // We allow setting the cursor to the end of the buffer for inserts
451            Ordering::Equal => Some(self.len_lines() - 1),
452
453            Ordering::Greater => None,
454        }
455    }
456
457    /// Convert a line index to the character index of its first character
458    ///
459    /// # Panics
460    /// This method will panic if the given char index is out of bounds
461    pub fn line_to_char(&self, line_idx: usize) -> usize {
462        match self.try_line_to_char(line_idx) {
463            Some(char_idx) => char_idx,
464            None => {
465                assert_line_endings!(self);
466                panic!(
467                    "line index was {line_idx} but the buffer has {} lines",
468                    self.len_lines()
469                );
470            }
471        }
472    }
473
474    /// Convert a line index to the character index of its first character
475    pub fn try_line_to_char(&self, line_idx: usize) -> Option<usize> {
476        if line_idx > self.len_lines() - 1 {
477            return None;
478        }
479
480        if line_idx == 0 {
481            Some(0)
482        } else {
483            let k = *self.line_endings.iter().nth(line_idx - 1).unwrap().1;
484            Some(k + 1)
485        }
486    }
487
488    /// Insert a single character at the specifified byte index.
489    ///
490    /// This is O(1) if idx is at the current gap start and the gap is large enough to accomodate
491    /// the new text, otherwise data will need to be copied in order to relocate the gap.
492    pub fn insert_char(&mut self, char_idx: usize, ch: char) {
493        let len = ch.len_utf8();
494        if self.gap().saturating_sub(len) < MIN_GAP {
495            self.grow_gap(len);
496        }
497
498        let idx = self.char_to_byte(char_idx);
499        self.move_gap_to(idx);
500
501        ch.encode_utf8(&mut self.data[self.gap_start..]);
502        self.gap_start += len;
503        self.n_chars += 1;
504
505        if ch == '\n' {
506            self.line_endings.insert(idx, char_idx);
507        }
508
509        self.update_line_endings(|(&bidx, &cidx)| {
510            if bidx > idx {
511                (bidx, cidx + 1)
512            } else {
513                (bidx, cidx)
514            }
515        });
516
517        #[cfg(test)]
518        assert_line_endings!(self);
519    }
520
521    /// Insert a string at the specifified byte index.
522    ///
523    /// This is O(1) if idx is at the current gap start and the gap is large enough to accomodate
524    /// the new text, otherwise data will need to be copied in order to relocate the gap.
525    pub fn insert_str(&mut self, char_idx: usize, s: &str) {
526        let len = s.len();
527        let len_chars = s.chars().count();
528        if self.gap().saturating_sub(len) < MIN_GAP {
529            self.grow_gap(len);
530        }
531
532        let idx = self.char_to_byte(char_idx);
533        self.move_gap_to(idx);
534
535        self.data[self.gap_start..self.gap_start + len].copy_from_slice(s.as_bytes());
536        self.gap_start += len;
537        self.n_chars += s.chars().count();
538
539        for (i, (offset, ch)) in s.char_indices().enumerate() {
540            if ch == '\n' {
541                self.line_endings.insert(idx + offset, char_idx + i);
542            }
543        }
544
545        self.update_line_endings(|(&bidx, &cidx)| {
546            if bidx >= idx + len {
547                (bidx, cidx + len_chars)
548            } else {
549                (bidx, cidx)
550            }
551        });
552
553        #[cfg(test)]
554        assert_line_endings!(self);
555    }
556
557    /// Remove the requested character index from the visible region of the buffer
558    pub fn remove_char(&mut self, char_idx: usize) {
559        let idx = self.char_to_byte(char_idx);
560        let len = self.char_len(self.char_to_raw_byte(char_idx));
561
562        if idx != self.gap_start {
563            self.move_gap_to(idx);
564        }
565
566        self.gap_end += len;
567        self.n_chars -= 1;
568
569        if self.data[self.gap_end - 1] == b'\n' {
570            self.line_endings.remove(&(self.gap_end - 1));
571        }
572
573        for (_, count) in self.line_endings.iter_mut() {
574            if *count >= char_idx {
575                *count -= 1;
576            }
577        }
578
579        #[cfg(test)]
580        assert_line_endings!(self);
581    }
582
583    /// Remove the requested range (from..to) from the visible region of the buffer.
584    ///
585    /// # Panics
586    /// This method will panic if `char_from < char_to`
587    pub fn remove_range(&mut self, char_from: usize, char_to: usize) {
588        if char_from == char_to {
589            return;
590        }
591
592        assert!(
593            char_from < char_to,
594            "invalid range: from={char_from} > to={char_to}"
595        );
596
597        let raw_from = self.char_to_raw_byte(char_from);
598        let from = self.raw_byte_to_byte(raw_from);
599        let to = self.offset_char_to_byte(char_to, raw_from, char_from);
600        debug_assert!(from < to, "invalid byte range: from={from} > to={to}");
601        self.move_gap_to(from);
602
603        let n_bytes = to - from;
604        let n_chars = char_to - char_from;
605
606        self.gap_end += n_bytes;
607        self.n_chars -= n_chars;
608
609        self.line_endings
610            .retain(|idx, _| !((self.gap_end - n_bytes)..(self.gap_end)).contains(idx));
611
612        for (_, count) in self.line_endings.iter_mut() {
613            if *count >= char_to {
614                *count -= n_chars;
615            } else if *count > char_from {
616                *count = char_from;
617            }
618        }
619
620        #[cfg(test)]
621        assert_line_endings!(self);
622    }
623
624    /// BTreeMap doesn't support iter_mut with mutable keys so we need to map over the existing
625    /// line endings and collect into a new map.
626    fn update_line_endings<F>(&mut self, f: F)
627    where
628        F: Fn((&usize, &usize)) -> (usize, usize),
629    {
630        self.line_endings = self.line_endings.iter().map(f).collect();
631    }
632
633    fn grow_gap(&mut self, n: usize) {
634        if n >= self.next_gap {
635            self.next_gap = clamp_gap_size(self.len() + n, n.next_power_of_two());
636        }
637
638        let gap_increase = self.next_gap + n - self.gap();
639        let cap = self.cap + self.next_gap + n;
640        let mut buf = Vec::with_capacity(cap);
641
642        buf.extend_from_slice(&self.data[..self.gap_start]); // data to gap
643        buf.resize(buf.len() + self.next_gap + n, 0); // the new gap (zeroed)
644        buf.extend_from_slice(&self.data[self.gap_end..]); // data after gap
645
646        let start = self.gap_start;
647        self.update_line_endings(|(&bidx, &cidx)| {
648            if bidx > start {
649                (bidx + gap_increase, cidx)
650            } else {
651                (bidx, cidx)
652            }
653        });
654
655        self.next_gap = clamp_gap_size(self.len(), self.next_gap * 2);
656        self.data = buf.into_boxed_slice();
657        self.gap_end += gap_increase;
658        self.cap = cap;
659
660        #[cfg(test)]
661        assert_line_endings!(self);
662    }
663
664    /// The byte_idx argument here is an absolute position within the "live" buffer which will mark
665    /// the first byte of the gap region following the move.
666    ///
667    /// We do not require that the data within the gap region is valid utf-8 so it is fine for this
668    /// offset to land in the middle of existing multi-byte characters so long as the regions
669    /// outside of the gap stay valid utf-8.
670    ///
671    /// # Panics
672    /// This method will panic if the given index is out of bounds
673    fn move_gap_to(&mut self, byte_idx: usize) {
674        // we need space to fit the current gap size
675        assert!(
676            byte_idx <= self.len(),
677            "index out of bounds: {byte_idx} > {}",
678            self.len()
679        );
680
681        let gap = self.gap();
682
683        let (src, dest) = match byte_idx.cmp(&self.gap_start) {
684            Ordering::Equal => return,
685
686            // Gap moving left
687            Ordering::Less => {
688                let start = self.gap_start;
689                self.update_line_endings(|(&bidx, &cidx)| {
690                    if bidx >= byte_idx && bidx <= start {
691                        (bidx + gap, cidx)
692                    } else {
693                        (bidx, cidx)
694                    }
695                });
696
697                (byte_idx..self.gap_start, byte_idx + gap)
698            }
699
700            // Gap moving right
701            Ordering::Greater => {
702                let end = self.gap_end;
703                self.update_line_endings(|(&bidx, &cidx)| {
704                    if bidx >= end && bidx < byte_idx + gap {
705                        (bidx - gap, cidx)
706                    } else {
707                        (bidx, cidx)
708                    }
709                });
710
711                (self.gap_end..byte_idx + gap, self.gap_start)
712            }
713        };
714
715        self.data.copy_within(src, dest);
716        self.gap_end = byte_idx + gap;
717        self.gap_start = byte_idx;
718
719        #[cfg(test)]
720        assert_line_endings!(self);
721    }
722
723    /// Convert a character offset within the logical buffer to a byte offset
724    /// within the logical buffer. This is used to account for multi-byte characters
725    /// within the buffer and is treated as a String-like index but it does not
726    /// account for the position of the gap.
727    #[inline]
728    pub fn char_to_byte(&self, char_idx: usize) -> usize {
729        self.offset_char_to_byte(char_idx, 0, 0)
730    }
731
732    #[inline]
733    fn raw_byte_to_byte(&self, raw: usize) -> usize {
734        if raw > self.gap_start {
735            raw - self.gap()
736        } else {
737            raw
738        }
739    }
740
741    #[inline]
742    pub fn byte_to_raw_byte(&self, byte: usize) -> usize {
743        if byte > self.gap_start {
744            byte + self.gap()
745        } else {
746            byte
747        }
748    }
749
750    #[inline]
751    fn offset_char_to_byte(
752        &self,
753        char_idx: usize,
754        byte_offset: usize,
755        char_offset: usize,
756    ) -> usize {
757        let raw = self.offset_char_to_raw_byte(char_idx, byte_offset, char_offset);
758
759        self.raw_byte_to_byte(raw)
760    }
761
762    /// Convert a character offset within the logical buffer to a raw byte offset
763    /// within the underlying allocation we maintain. This is an absolute index
764    /// into our allocated array that accounts for the position of the gap.
765    #[inline]
766    fn char_to_raw_byte(&self, char_idx: usize) -> usize {
767        self.offset_char_to_raw_byte(char_idx, 0, 0)
768    }
769
770    /// Allow for skipping to a given byte index before starting the search as an optimisation when we
771    /// are searching for multiple positions in sequence (e.g. the start and end of a range).
772    fn offset_char_to_raw_byte(
773        &self,
774        char_idx: usize,
775        mut byte_offset: usize,
776        mut char_offset: usize,
777    ) -> usize {
778        let mut to = usize::MAX;
779
780        for (&b, &c) in self
781            .line_endings
782            .iter()
783            .skip_while(move |(b, _)| **b < byte_offset)
784        {
785            match c.cmp(&char_idx) {
786                Ordering::Less => (byte_offset, char_offset) = (b, c),
787                Ordering::Equal => return b,
788                Ordering::Greater => {
789                    to = b;
790                    break;
791                }
792            }
793        }
794
795        let slice = Slice::from_raw_offsets(byte_offset, to, self);
796        let mut chars = slice.chars();
797        let mut cur = byte_offset;
798        for _ in 0..(char_idx - char_offset) {
799            chars.next();
800            cur = chars.cur + byte_offset;
801        }
802
803        if cur > self.gap_start && cur < self.gap_end {
804            cur += self.gap()
805        }
806
807        cur
808    }
809}
810
811/// A view on a region of the GapBuffer.
812///
813/// Slices will become invalidated if the gap is moved from the position they were created with
814#[derive(Default, Debug, Copy, Clone, PartialEq, Eq)]
815pub struct Slice<'a> {
816    from: usize,
817    left: &'a [u8],
818    right: &'a [u8],
819}
820
821impl<'a> Slice<'a> {
822    #[inline]
823    fn from_raw_offsets(from: usize, to: usize, gb: &'a GapBuffer) -> Slice<'a> {
824        let to = min(to, gb.data.len());
825
826        if to <= gb.gap_start || from >= gb.gap_end {
827            return Slice {
828                from,
829                left: &gb.data[from..to],
830                right: &[],
831            };
832        }
833
834        debug_assert!(from <= gb.gap_start, "line offset sits in gap");
835
836        Slice {
837            from,
838            left: &gb.data[from..gb.gap_start],
839            right: &gb.data[gb.gap_end..to],
840        }
841    }
842
843    /// The byte offset that this slice starts at within the parent [GapBuffer].
844    pub fn from_byte(&self) -> usize {
845        self.from
846    }
847
848    /// The number of utf-8 characters within this slice.
849    ///
850    /// Calculating involves parsing the entire slice as utf-8.
851    pub fn len_utf8(&self) -> usize {
852        self.chars().count()
853    }
854
855    /// The two sides of this slice as &str references
856    pub fn as_strs(&self) -> (&str, &str) {
857        // SAFETY: we know that we have valid utf8 data internally
858        unsafe {
859            (
860                std::str::from_utf8_unchecked(self.left),
861                std::str::from_utf8_unchecked(self.right),
862            )
863        }
864    }
865
866    /// Iterate over the contiguous &[u8] regions within this slice
867    pub fn slice_iter(self) -> SliceIter<'a> {
868        SliceIter {
869            inner: self,
870            pos: Some(false),
871        }
872    }
873
874    /// Iterate over the characters in this slice
875    pub fn chars(self) -> Chars<'a> {
876        Chars { s: self, cur: 0 }
877    }
878
879    /// Iterate over the characters in this slice with their corresponding character indices
880    pub fn indexed_chars(self, from: usize, rev: bool) -> IdxChars<'a> {
881        let (cur, idx) = if rev {
882            (
883                self.left.len() + self.right.len(),
884                from + count_chars(self.left) + count_chars(self.right),
885            )
886        } else {
887            (0, from)
888        };
889
890        IdxChars {
891            s: self,
892            cur,
893            idx,
894            rev,
895        }
896    }
897
898    fn cur_and_data(&self, cur: usize) -> (usize, &[u8]) {
899        if cur < self.left.len() {
900            (cur, self.left)
901        } else {
902            (cur - self.left.len(), self.right)
903        }
904    }
905}
906
907impl fmt::Display for Slice<'_> {
908    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
909        let mut v = Vec::with_capacity(self.left.len() + self.right.len());
910        v.extend_from_slice(self.left);
911        v.extend_from_slice(self.right);
912
913        match String::from_utf8(v) {
914            Ok(s) => write!(f, "{s}"),
915            Err(_) => Err(fmt::Error),
916        }
917    }
918}
919
920impl<'b> PartialEq<&'b str> for Slice<'_> {
921    fn eq(&self, other: &&'b str) -> bool {
922        let b = other.as_bytes();
923
924        &b[..self.left.len()] == self.left && &b[self.left.len()..] == self.right
925    }
926}
927
928impl PartialEq<String> for Slice<'_> {
929    fn eq(&self, other: &String) -> bool {
930        *self == other.as_str()
931    }
932}
933
934#[derive(Debug)]
935pub struct SliceIter<'a> {
936    inner: Slice<'a>,
937    pos: Option<bool>,
938}
939
940impl<'a> Iterator for SliceIter<'a> {
941    type Item = &'a [u8];
942
943    fn next(&mut self) -> Option<Self::Item> {
944        match self.pos? {
945            false => {
946                self.pos = Some(true);
947                Some(self.inner.left)
948            }
949
950            true => {
951                self.pos = None;
952                Some(self.inner.right)
953            }
954        }
955    }
956}
957
958/// An iterator of characters from a [Slice]
959#[derive(Default, Debug, Copy, Clone, PartialEq, Eq)]
960pub struct Chars<'a> {
961    s: Slice<'a>,
962    cur: usize,
963}
964
965impl Iterator for Chars<'_> {
966    type Item = char;
967
968    fn next(&mut self) -> Option<Self::Item> {
969        if self.cur >= self.s.left.len() + self.s.right.len() {
970            return None;
971        }
972
973        let (cur, data) = self.s.cur_and_data(self.cur);
974        // SAFETY: we know we are in bounds and that we contain valid utf-8 data
975        let ch = unsafe { decode_char_at(cur, data) };
976        let len = ch.len_utf8();
977        self.cur += len;
978
979        Some(ch)
980    }
981}
982
983/// An iterator of characters and their indices from a [Slice]
984#[derive(Default, Debug, Copy, Clone, PartialEq, Eq)]
985pub struct IdxChars<'a> {
986    s: Slice<'a>,
987    cur: usize,
988    idx: usize,
989    rev: bool,
990}
991
992impl Iterator for IdxChars<'_> {
993    type Item = (usize, char);
994
995    fn next(&mut self) -> Option<Self::Item> {
996        if (!self.rev && self.cur >= self.s.left.len() + self.s.right.len())
997            || (self.rev && self.cur == 0)
998        {
999            return None;
1000        }
1001
1002        if self.rev {
1003            let (cur, data) = self.s.cur_and_data(self.cur - 1);
1004            // SAFETY: we know we are in bounds and that we contain valid utf-8 data
1005            let ch = unsafe { decode_char_ending_at(cur, data) };
1006            let len = ch.len_utf8();
1007            self.idx -= 1;
1008            self.cur -= len;
1009            Some((self.idx, ch))
1010        } else {
1011            let (cur, data) = self.s.cur_and_data(self.cur);
1012            // SAFETY: we know we are in bounds and that we contain valid utf-8 data
1013            let ch = unsafe { decode_char_at(cur, data) };
1014            let len = ch.len_utf8();
1015            let res = Some((self.idx, ch));
1016            self.cur += len;
1017            self.idx += 1;
1018            res
1019        }
1020    }
1021}
1022
1023// The following helper functions are adapted from nightly APIs in std::core::str
1024// -> https://doc.rust-lang.org/stable/src/core/str/validations.rs.html
1025
1026/// Mask of the value bits of a continuation byte.
1027const CONT_MASK: u8 = 0b0011_1111;
1028
1029/// Returns the initial codepoint accumulator for the first byte.
1030/// The first byte is special, only want bottom 5 bits for width 2, 4 bits
1031/// for width 3, and 3 bits for width 4.
1032#[inline]
1033const fn utf8_first_byte(byte: u8, width: u32) -> u32 {
1034    (byte & (0x7F >> width)) as u32
1035}
1036
1037/// Returns the value of `ch` updated with continuation byte `byte`.
1038#[inline]
1039const fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 {
1040    (ch << 6) | (byte & CONT_MASK) as u32
1041}
1042
1043/// Checks whether the byte is a UTF-8 continuation byte (i.e., starts with the
1044/// bits `10`).
1045#[inline]
1046const fn utf8_is_cont_byte(byte: u8) -> bool {
1047    (byte as i8) < -64
1048}
1049
1050/// Decode a utf-8 code point from `bytes` starting at `start`.
1051/// `bytes` must contain valid utf-8 data beginning at `start`
1052#[inline]
1053unsafe fn decode_char_at(start: usize, bytes: &[u8]) -> char {
1054    // Decode UTF-8
1055    // SAFETY: `bytes` contains UTF-8-like string data so we have the next byte,
1056    let x = bytes[start];
1057    if x < 128 {
1058        return char::from_u32_unchecked(x as u32);
1059    }
1060
1061    // Multibyte case follows
1062    // Decode from a byte combination out of: [[[x y] z] w]
1063    // NOTE: Performance is sensitive to the exact formulation here
1064    let init = utf8_first_byte(x, 2);
1065    // SAFETY: `bytes` contains UTF-8-like string data so we have the next byte,
1066    let y = bytes[start + 1];
1067    let mut ch = utf8_acc_cont_byte(init, y);
1068
1069    if x >= 0xE0 {
1070        // [[x y z] w] case
1071        // 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid
1072        // SAFETY: `bytes` contains UTF-8-like string data so we have the next byte,
1073        let z = bytes[start + 2];
1074        let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z);
1075        ch = init << 12 | y_z;
1076        if x >= 0xF0 {
1077            // [x y z w] case
1078            // use only the lower 3 bits of `init`
1079            // SAFETY: `bytes` contains UTF-8-like string data so we have the next byte,
1080            let w = bytes[start + 3];
1081            ch = (init & 7) << 18 | utf8_acc_cont_byte(y_z, w);
1082        }
1083    }
1084
1085    char::from_u32_unchecked(ch)
1086}
1087
1088/// Decode a utf-8 code point from `bytes` ending at `end`.
1089/// `bytes` must contain valid utf-8 data ending at `end`
1090#[inline]
1091unsafe fn decode_char_ending_at(end: usize, bytes: &[u8]) -> char {
1092    // Decode UTF-8
1093    let w = match bytes[end] {
1094        b if b < 128 => return char::from_u32_unchecked(b as u32),
1095        b => b,
1096    };
1097
1098    // Multibyte case follows
1099    // Decode from a byte combination out of: [x [y [z w]]]
1100    let mut ch;
1101    // SAFETY: `bytes` contains UTF-8-like string data so we have the next byte,
1102    let z = bytes[end - 1];
1103    ch = utf8_first_byte(z, 2);
1104    if utf8_is_cont_byte(z) {
1105        // SAFETY: `bytes` contains UTF-8-like string data so we have the next byte,
1106        let y = bytes[end - 2];
1107        ch = utf8_first_byte(y, 3);
1108        if utf8_is_cont_byte(y) {
1109            // SAFETY: `bytes` contains UTF-8-like string data so we have the next byte,
1110            let x = bytes[end - 3];
1111            ch = utf8_first_byte(x, 4);
1112            ch = utf8_acc_cont_byte(ch, y);
1113        }
1114        ch = utf8_acc_cont_byte(ch, z);
1115    }
1116    ch = utf8_acc_cont_byte(ch, w);
1117
1118    char::from_u32_unchecked(ch)
1119}
1120
1121#[cfg(test)]
1122mod tests {
1123    use super::*;
1124    use simple_test_case::test_case;
1125
1126    fn debug_buffer_content(gb: &GapBuffer) -> String {
1127        let mut v = gb.data.to_vec();
1128        v[gb.gap_start..gb.gap_end].copy_from_slice("_".repeat(gb.gap()).as_bytes());
1129        String::from_utf8(v).expect("valid utf8")
1130    }
1131
1132    fn raw_debug_buffer_content(gb: &GapBuffer) -> String {
1133        let mut v = gb.data.to_vec();
1134        for b in v[gb.gap_start..gb.gap_end].iter_mut() {
1135            if *b == b'\0' {
1136                *b = b'_';
1137            }
1138        }
1139        v.insert(gb.gap_end, b']');
1140        v.insert(gb.gap_start, b'[');
1141
1142        String::from_utf8(v).expect("valid utf8")
1143    }
1144
1145    #[test]
1146    fn from_string_matches_from_str() {
1147        let s = "this is a test";
1148        let gb1 = GapBuffer::from(s.to_string());
1149        let gb2 = GapBuffer::from(s);
1150
1151        assert_eq!(gb1, gb2);
1152    }
1153
1154    #[test]
1155    fn to_string_works() {
1156        let s = "this is a test";
1157        let gb = GapBuffer::from(s.to_string());
1158        assert_eq!(gb.to_string(), s);
1159    }
1160
1161    #[test]
1162    fn insert_into_empty_string_initial_gb_works() {
1163        let mut gb = GapBuffer::from(String::new());
1164        gb.insert_char(0, 'a');
1165        assert_eq!(gb.to_string(), "a");
1166    }
1167
1168    #[test_case("foo│foo│foo"; "interleaved multibyte and ascii")]
1169    #[test_case("hello, 世界!"; "blocks of multibyte and ascii")]
1170    #[test_case("hello, world!"; "just ascii")]
1171    #[test]
1172    fn len_chars_works(s: &str) {
1173        let mut gb = GapBuffer::from(s);
1174        let len_s = s.chars().count();
1175
1176        println!("initial:          {:?}", raw_debug_buffer_content(&gb));
1177        assert_eq!(gb.len_chars(), len_s);
1178        assert_eq!(
1179            gb.len_chars(),
1180            gb.to_string().chars().count(),
1181            "char iter len != len_chars"
1182        );
1183        assert_eq!(
1184            gb.line(0).chars().count(),
1185            gb.line_len_chars(0),
1186            "line_len_chars != len_chars"
1187        );
1188
1189        gb.insert_char(5, 'X');
1190        println!("after insert X:   {:?}", raw_debug_buffer_content(&gb));
1191        assert_eq!(gb.len_chars(), len_s + 1);
1192        assert_eq!(
1193            gb.len_chars(),
1194            gb.to_string().chars().count(),
1195            "char iter len != len_chars"
1196        );
1197
1198        gb.insert_char(3, '界');
1199        println!("after insert 界:  {:?}", raw_debug_buffer_content(&gb));
1200        assert_eq!(gb.len_chars(), len_s + 2);
1201        assert_eq!(
1202            gb.len_chars(),
1203            gb.to_string().chars().count(),
1204            "char iter len != len_chars"
1205        );
1206
1207        assert_eq!(gb.char(3), '界');
1208        gb.remove_char(3);
1209        println!("after remove 界:  {:?}", raw_debug_buffer_content(&gb));
1210        assert_eq!(gb.len_chars(), len_s + 1);
1211        assert_eq!(
1212            gb.len_chars(),
1213            gb.to_string().chars().count(),
1214            "char iter len != len_chars"
1215        );
1216
1217        assert_eq!(gb.char(5), 'X');
1218        gb.remove_char(5);
1219        println!("after remove X:   {:?}", debug_buffer_content(&gb));
1220        assert_eq!(gb.len_chars(), len_s);
1221        assert_eq!(
1222            gb.len_chars(),
1223            gb.to_string().chars().count(),
1224            "char iter len != len_chars"
1225        );
1226        assert_eq!(gb.to_string(), s);
1227    }
1228
1229    #[test_case("foo│foo│foo"; "interleaved multibyte and ascii")]
1230    #[test_case("hello, 世界!"; "blocks of multibyte and ascii")]
1231    #[test_case("hello, world!"; "just ascii")]
1232    #[test]
1233    fn move_gap_to_maintains_content(s: &str) {
1234        let mut gb = GapBuffer::from(s);
1235
1236        for i in 0..gb.len_chars() {
1237            let idx = gb.char_to_byte(i);
1238            gb.move_gap_to(idx);
1239
1240            // Splitting into the two sections like this allows us to verify that
1241            // we have valid utf-8 encoded text on either side of the gap.
1242            let (s1, s2) = (
1243                std::str::from_utf8(&gb.data[..gb.gap_start]).unwrap(),
1244                std::str::from_utf8(&gb.data[gb.gap_end..]).unwrap(),
1245            );
1246
1247            assert_eq!(format!("{s1}{s2}"), s, "idx={idx}");
1248        }
1249    }
1250
1251    #[test]
1252    fn move_gap_to_maintains_line_content() {
1253        let s = "hello, world!\nhow are you?\nthis is a test";
1254        let mut gb = GapBuffer::from(s);
1255        assert_eq!(gb.len_lines(), 3);
1256
1257        for i in 0..gb.len_chars() {
1258            let idx = gb.char_to_byte(i);
1259            gb.move_gap_to(idx);
1260            assert_eq!(gb.len_lines(), 3);
1261
1262            assert_eq!(gb.line(0).to_string(), "hello, world!\n", "idx={idx}");
1263            assert_eq!(gb.line(1).to_string(), "how are you?\n", "idx={idx}");
1264            assert_eq!(gb.line(2).to_string(), "this is a test", "idx={idx}");
1265        }
1266    }
1267
1268    #[test_case(0, 0, 0; "BOF cur at BOF")]
1269    #[test_case(27, 0, 0; "BOF cur at EOF")]
1270    #[test_case(27, 5, 5; "in the buffer cur at EOF")]
1271    #[test_case(5, 5, 5; "in the buffer cur at gap")]
1272    #[test_case(5, 3, 3; "in the buffer cur before gap")]
1273    #[test_case(5, 11, 15; "in the buffer cur after gap")]
1274    #[test_case(5, 7, 7; "multi byte 1")]
1275    #[test_case(5, 8, 10; "multi byte 2")]
1276    #[test]
1277    fn char_to_byte_works(cur: usize, char_idx: usize, expected: usize) {
1278        let s = "hello, 世界!\nhow are you?";
1279        let mut gb = GapBuffer::from(s);
1280        assert_eq!(s.len(), 27, "EOF case is not 0..s.len()");
1281        assert_eq!("世".len(), 3);
1282        gb.move_gap_to(cur);
1283
1284        let byte_idx = gb.char_to_byte(char_idx);
1285        assert_eq!(byte_idx, expected, "{:?}", debug_buffer_content(&gb));
1286    }
1287
1288    #[test_case(0, 0, 0; "BOF cur at BOF")]
1289    #[test_case(27, 0, 0; "BOF cur at EOF")]
1290    #[test_case(27, 5, 5; "in the buffer cur at EOF")]
1291    #[test_case(5, 5, 5; "in the buffer cur at gap")]
1292    #[test_case(5, 3, 3; "in the buffer cur before gap")]
1293    #[test_case(5, 11, 79; "in the buffer cur after gap")]
1294    #[test_case(5, 7, 71; "multi byte 1")]
1295    #[test_case(5, 8, 74; "multi byte 2")]
1296    #[test]
1297    fn char_to_raw_byte_works(cur: usize, char_idx: usize, expected: usize) {
1298        let s = "hello, 世界!\nhow are you?";
1299        let mut gb = GapBuffer::from(s);
1300        assert_eq!(s.len(), 27, "EOF case is not 0..s.len()");
1301        assert_eq!("世".len(), 3);
1302        gb.move_gap_to(cur);
1303
1304        let char_idx = gb.char_to_raw_byte(char_idx);
1305        assert_eq!(char_idx, expected, "{:?}", debug_buffer_content(&gb));
1306    }
1307
1308    #[test_case(0, 0, "hello, world!\n"; "first line cur at BOF")]
1309    #[test_case(0, 1, "how are you?"; "second line cur at BOF")]
1310    #[test_case(26, 0, "hello, world!\n"; "first line cur at EOF")]
1311    #[test_case(26, 1, "how are you?"; "second line cur at EOF")]
1312    #[test_case(10, 0, "hello, world!\n"; "first line cur in line")]
1313    #[test_case(10, 1, "how are you?"; "second line cur in line")]
1314    #[test]
1315    fn slice_to_string_works(cur: usize, line: usize, expected: &str) {
1316        let mut gb = GapBuffer::from("hello, world!\nhow are you?");
1317        gb.move_gap_to(cur);
1318
1319        assert_eq!(gb.line(line).to_string(), expected);
1320    }
1321
1322    #[test]
1323    fn line_to_char_works() {
1324        let l1 = "hello, world!\n";
1325        let l2 = "how are you?\n";
1326        let l3 = "this is a test";
1327
1328        let gb = GapBuffer::from(format!("{l1}{l2}{l3}"));
1329
1330        assert_eq!(gb.line_to_char(0), 0);
1331        assert_eq!(gb.line_to_char(1), l1.chars().count());
1332        assert_eq!(gb.line_to_char(2), l1.chars().count() + l2.chars().count());
1333    }
1334
1335    #[test_case(0, 0; "start of first line")]
1336    #[test_case(5, 0; "in first line")]
1337    #[test_case(13, 0; "end of first line")]
1338    #[test_case(14, 1; "start of second line")]
1339    #[test_case(20, 1; "in second line")]
1340    #[test_case(26, 1; "end of second line")]
1341    #[test_case(27, 2; "start of third line")]
1342    #[test_case(30, 2; "in third line")]
1343    #[test_case(40, 2; "end of third line")]
1344    #[test]
1345    fn char_to_line_works(char_idx: usize, line_idx: usize) {
1346        let l1 = "hello, world!\n";
1347        let l2 = "how are you?\n";
1348        let l3 = "this is a test";
1349
1350        let gb = GapBuffer::from(format!("{l1}{l2}{l3}"));
1351
1352        assert_eq!(gb.char_to_line(char_idx), line_idx);
1353    }
1354
1355    #[test_case(&[(0, 'h')], "hello world"; "insert front")]
1356    #[test_case(&[(4, ',')], "ello, world"; "insert inner")]
1357    #[test_case(&[(10, '!')], "ello world!"; "insert back")]
1358    #[test_case(&[(4, ','), (11, '!')], "ello, world!"; "insert inner then back")]
1359    #[test_case(&[(4, ','), (0, 'h')], "hello, world"; "insert inner then front")]
1360    #[test_case(&[(0, 'h'), (5, ','),], "hello, world"; "insert front then inner")]
1361    #[test_case(&[(10, '!'), (0, 'h'), (5, ',')], "hello, world!"; "insert all")]
1362    #[test]
1363    fn insert_char(inserts: &[(usize, char)], expected: &str) {
1364        let mut gb = GapBuffer::from("ello world");
1365
1366        for &(idx, ch) in inserts {
1367            gb.insert_char(idx, ch);
1368        }
1369
1370        assert_eq!(gb.to_string(), expected, "{:?}", debug_buffer_content(&gb))
1371    }
1372
1373    #[test]
1374    fn insert_char_with_moving_cur() {
1375        let mut gb = GapBuffer::from("hello ");
1376        gb.insert_char(6, 'w');
1377        gb.insert_char(7, 'o');
1378        gb.insert_char(8, 'r');
1379        gb.insert_char(9, 'l');
1380        gb.insert_char(10, 'd');
1381        gb.insert_char(11, '!');
1382        gb.insert_char(5, ',');
1383
1384        assert_eq!(
1385            gb.to_string(),
1386            "hello, world!",
1387            "{:?}",
1388            debug_buffer_content(&gb)
1389        )
1390    }
1391
1392    #[test]
1393    fn insert_newline_char_is_tracked_correctly() {
1394        let s = "hello, world!\nhow are you?";
1395        let mut gb = GapBuffer::from(s);
1396        assert_eq!(gb.len_lines(), 2);
1397
1398        println!("initial: {:?}", raw_debug_buffer_content(&gb));
1399        gb.insert_char(6, '\n');
1400        println!("insert:  {:?}", raw_debug_buffer_content(&gb));
1401
1402        assert_eq!(gb.len_lines(), 3);
1403        assert_eq!(gb.line(0).to_string(), "hello,\n");
1404        assert_eq!(gb.line(1).to_string(), " world!\n");
1405        assert_eq!(gb.line(2).to_string(), "how are you?");
1406
1407        for idx in 0..=gb.len_chars() {
1408            gb.move_gap_to(idx);
1409            assert_eq!(gb.len_lines(), 3);
1410
1411            assert_eq!(gb.line(0).to_string(), "hello,\n", "idx={idx}");
1412            assert_eq!(gb.line(1).to_string(), " world!\n", "idx={idx}");
1413            assert_eq!(gb.line(2).to_string(), "how are you?", "idx={idx}");
1414        }
1415    }
1416
1417    #[test_case(&[(0, "hell")], "helloworl"; "insert front")]
1418    #[test_case(&[(1, ", ")], "o, worl"; "insert inner")]
1419    #[test_case(&[(5, "d!")], "oworld!"; "insert back")]
1420    #[test_case(&[(5, "d!"), (0, "hell"), (5, ", ")], "hello, world!"; "insert all")]
1421    #[test_case(&[(5, "d!"), (0, "hell"), (5, ",\n")], "hello,\nworld!"; "insert all w newline")]
1422    #[test]
1423    fn insert_str(inserts: &[(usize, &str)], expected: &str) {
1424        let mut gb = GapBuffer::from("oworl");
1425        for &(idx, s) in inserts {
1426            gb.insert_str(idx, s);
1427        }
1428
1429        assert_eq!(gb.to_string(), expected, "{:?}", debug_buffer_content(&gb))
1430    }
1431
1432    #[test]
1433    fn insert_newline_in_str_is_tracked_correctly() {
1434        let s = "hello, world!\nhow are you?";
1435        let mut gb = GapBuffer::from(s);
1436        assert_eq!(gb.len_lines(), 2);
1437
1438        let s2 = " sailor\nisn't this fun?\nwhat a wonderful\n";
1439        gb.insert_str(6, s2);
1440
1441        for idx in 0..=gb.len_chars() {
1442            gb.move_gap_to(idx);
1443            assert_eq!(gb.len_lines(), 5);
1444
1445            assert_eq!(gb.line(0).to_string(), "hello, sailor\n", "idx={idx}");
1446            assert_eq!(gb.line(1).to_string(), "isn't this fun?\n", "idx={idx}");
1447            assert_eq!(gb.line(2).to_string(), "what a wonderful\n", "idx={idx}");
1448            assert_eq!(gb.line(3).to_string(), " world!\n", "idx={idx}");
1449            assert_eq!(gb.line(4).to_string(), "how are you?", "idx={idx}");
1450        }
1451    }
1452
1453    #[test_case(6, "hello,world!"; "at gap start")]
1454    #[test_case(7, "hello, orld!"; "at gap end")]
1455    #[test_case(12, "hello, world"; "after gap")]
1456    #[test_case(0, "ello, world!"; "before gap")]
1457    #[test]
1458    fn remove_char(idx: usize, expected: &str) {
1459        let mut gb = GapBuffer::from("hello, world!");
1460        gb.move_gap_to(6); // space before world
1461        gb.remove_char(idx);
1462
1463        assert_eq!(gb.to_string(), expected, "{:?}", debug_buffer_content(&gb))
1464    }
1465
1466    #[test]
1467    fn remove_newline_char_is_tracked_correctly() {
1468        let s = "hello, world!\nhow are you?";
1469        let mut gb = GapBuffer::from(s);
1470        assert_eq!(gb.len_lines(), 2);
1471
1472        gb.remove_char(13);
1473
1474        assert_eq!(gb.len_lines(), 1);
1475        assert_eq!(gb.line(0).to_string(), "hello, world!how are you?");
1476    }
1477
1478    #[test_case(6, 9, "hello,rld!"; "at gap start")]
1479    #[test_case(7, 10, "hello, ld!"; "at gap end")]
1480    #[test_case(10, 13, "hello, wor"; "after gap")]
1481    #[test_case(0, 5, ", world!"; "before gap")]
1482    #[test_case(0, 13, ""; "remove all")]
1483    #[test]
1484    fn remove_range_works(from: usize, to: usize, expected: &str) {
1485        let s = "hello, world!";
1486        assert_eq!(s.len(), 13, "remove all case is not 0..s.len()");
1487
1488        let mut gb = GapBuffer::from(s);
1489        gb.move_gap_to(6); // space before world
1490        gb.remove_range(from, to);
1491
1492        assert_eq!(gb.to_string(), expected, "{:?}", debug_buffer_content(&gb))
1493    }
1494
1495    #[test]
1496    fn remove_range_w_multibyte_chars_works() {
1497        let s = "foo│foo│foo";
1498        let mut gb = GapBuffer::from(s);
1499
1500        gb.remove_range(0, 3);
1501        assert_eq!(gb.to_string(), "│foo│foo");
1502        assert_eq!(gb.len_chars(), 8);
1503
1504        gb.remove_range(1, 4);
1505        assert_eq!(gb.to_string(), "││foo");
1506        assert_eq!(gb.len_chars(), 5);
1507
1508        gb.remove_range(2, 5);
1509        assert_eq!(gb.to_string(), "││");
1510        assert_eq!(gb.len_chars(), 2);
1511    }
1512
1513    #[test]
1514    fn remove_range_for_last_line_works() {
1515        let s = "hello, world!\nthis is the last line";
1516        let mut gb = GapBuffer::from(s);
1517        gb.remove_range(14, s.len());
1518        assert_eq!(gb.to_string(), "hello, world!\n");
1519        assert_eq!(gb.len_lines(), 2);
1520    }
1521
1522    #[test_case(10, 15, "hello, worow are you?"; "spanning newline")]
1523    #[test_case(7, 14, "hello, how are you?"; "ending on newline")]
1524    #[test_case(13, 26, "hello, world!"; "starting on newline")]
1525    #[test]
1526    fn remove_newline_in_str_is_tracked_correctly(from: usize, to: usize, expected: &str) {
1527        let s = "hello, world!\nhow are you?";
1528        let mut gb = GapBuffer::from(s);
1529        assert_eq!(gb.len_lines(), 2);
1530
1531        gb.remove_range(from, to);
1532
1533        assert_eq!(gb.len_lines(), 1);
1534        assert_eq!(gb.to_string(), expected);
1535        assert_eq!(gb.line(0).to_string(), expected);
1536    }
1537
1538    #[test_case('X'; "ascii")]
1539    #[test_case('界'; "multi-byte")]
1540    #[test]
1541    fn insert_remove_char_is_idempotent(ch: char) {
1542        let s = "hello, world!";
1543        let mut gb = GapBuffer::from(s);
1544        gb.insert_char(6, ch);
1545        gb.remove_char(6);
1546
1547        assert_eq!(gb.to_string(), s, "{:?}", debug_buffer_content(&gb))
1548    }
1549
1550    #[test_case("TEST", 1; "without trailing newline")]
1551    #[test_case("TEST\n", 2; "with trailing newline")]
1552    #[test_case("TEST\nTEST", 2; "with internal newline")]
1553    #[test]
1554    fn insert_remove_str_is_idempotent(edit: &str, expected_lines: usize) {
1555        let s = "hello, world!";
1556        let mut gb = GapBuffer::from(s);
1557
1558        println!("initial: {:?}", raw_debug_buffer_content(&gb));
1559        for n in 0..gb.len_lines() {
1560            println!("{:?}", gb.line(n).to_string());
1561        }
1562
1563        gb.insert_str(6, edit);
1564        assert_eq!(gb.len_lines(), expected_lines);
1565        println!("insert:  {:?}", raw_debug_buffer_content(&gb));
1566        for n in 0..gb.len_lines() {
1567            println!("{:?}", gb.line(n).to_string());
1568        }
1569
1570        gb.remove_range(6, 6 + edit.len());
1571        println!("remove:  {:?}", raw_debug_buffer_content(&gb));
1572        for n in 0..gb.len_lines() {
1573            println!("{:?}", gb.line(n).to_string());
1574        }
1575
1576        assert_eq!(gb.to_string(), s);
1577    }
1578
1579    #[test]
1580    fn chars_work() {
1581        let s1 = "hello, world!\n";
1582        let s2 = "how are you?";
1583        let gb = GapBuffer::from(format!("{s1}{s2}"));
1584
1585        let l1_chars: String = gb.line(0).chars().collect();
1586        assert_eq!(l1_chars, s1);
1587
1588        let l2_chars: String = gb.line(1).chars().collect();
1589        assert_eq!(l2_chars, s2);
1590    }
1591
1592    #[test_case(
1593        "hello, 世界!", false,
1594        &[(0, 'h'), (1, 'e'), (2, 'l'), (3, 'l'), (4, 'o'),
1595          (5, ','), (6, ' '), (7, '世'), (8, '界'), (9, '!')];
1596        "multi-byte block forward"
1597    )]
1598    #[test_case(
1599        "hello, 世界!", true,
1600        &[(9, '!'), (8, '界'), (7, '世'), (6, ' '), (5, ','),
1601          (4, 'o'), (3, 'l'), (2, 'l'), (1, 'e'), (0, 'h')];
1602        "multi-byte block reversed"
1603    )]
1604    #[test_case(
1605        "foo│foo│foo", false,
1606        &[(0, 'f'), (1, 'o'), (2, 'o'), (3, '│'), (4, 'f'), (5, 'o'),
1607          (6, 'o'), (7, '│'), (8, 'f'), (9, 'o'), (10, 'o')];
1608        "interleaved forward"
1609    )]
1610    #[test_case(
1611        "foo│foo│foo", true,
1612        &[(10, 'o'), (9, 'o'), (8, 'f'), (7, '│'), (6, 'o'), (5, 'o'),
1613          (4, 'f'), (3, '│'), (2, 'o'), (1, 'o'), (0, 'f')];
1614        "interleaved reversed"
1615    )]
1616    #[test]
1617    fn indexed_chars_works(s: &str, rev: bool, expected: &[(usize, char)]) {
1618        let mut gb = GapBuffer::from(s);
1619        let v: Vec<(usize, char)> = gb.line(0).indexed_chars(0, rev).collect();
1620        assert_eq!(&v, expected);
1621
1622        for i in 0..gb.len_chars() {
1623            let idx = gb.char_to_byte(i);
1624            gb.move_gap_to(idx);
1625
1626            let v: Vec<(usize, char)> = gb.line(0).indexed_chars(0, rev).collect();
1627            assert_eq!(&v, expected, "idx={idx}");
1628        }
1629    }
1630
1631    #[test_case("foo│foo│foo"; "interleaved multibyte and ascii")]
1632    #[test_case("hello, 世界!"; "blocks of multibyte and ascii")]
1633    #[test]
1634    fn chars_works(s: &str) {
1635        let mut gb = GapBuffer::from(s);
1636        let chars: String = gb.line(0).chars().collect();
1637        assert_eq!(chars, s);
1638
1639        for i in 0..gb.len_chars() {
1640            let idx = gb.char_to_byte(i);
1641            gb.move_gap_to(idx);
1642
1643            let chars: String = gb.line(0).chars().collect();
1644            assert_eq!(chars, s, "idx={idx}");
1645        }
1646    }
1647
1648    #[test]
1649    fn slice_works() {
1650        let mut gb = GapBuffer::from("hello, world!\nhow are you?");
1651        let slice = Slice::from_raw_offsets(0, gb.cap, &gb);
1652        let (s1, s2) = slice.as_strs();
1653        assert_eq!(s1, "");
1654        assert_eq!(s2, "hello, world!\nhow are you?");
1655
1656        let slice = gb.slice(6, 17);
1657        let (s1, s2) = slice.as_strs();
1658        assert_eq!(s1, " world!\nhow");
1659        assert_eq!(s2, "");
1660
1661        gb.move_gap_to(12);
1662        println!("after move:  {:?}", raw_debug_buffer_content(&gb));
1663
1664        let slice = gb.slice(6, 17);
1665        let (s1, s2) = slice.as_strs();
1666        assert_eq!(s1, " world");
1667        assert_eq!(s2, "!\nhow");
1668    }
1669
1670    #[test]
1671    fn slice_eq_str_works() {
1672        let mut gb = GapBuffer::from("hello, world!\nhow are you?");
1673        gb.move_gap_to(3);
1674        let slice = gb.slice(0, 5);
1675        assert_eq!(slice, "hello");
1676    }
1677
1678    #[test]
1679    fn chars_in_raw_range_works() {
1680        let mut gb = GapBuffer::from("hello, world!\nhow are you?");
1681        let char_from = 7;
1682        let char_to = 12;
1683
1684        for i in 0..gb.len_chars() {
1685            let idx = gb.char_to_byte(i);
1686            gb.move_gap_to(idx);
1687
1688            let byte_from = gb.char_to_raw_byte(char_from);
1689            let byte_to = gb.char_to_raw_byte(char_to);
1690            let n_chars = gb.chars_in_raw_range(byte_from, byte_to);
1691            assert_eq!(n_chars, char_to - char_from, "gap at {i}");
1692
1693            let n_chars = gb.chars_in_raw_range(0, gb.char_to_raw_byte(gb.n_chars));
1694            assert_eq!(n_chars, gb.n_chars, "gap at {i}");
1695        }
1696    }
1697
1698    fn _insert_chars(gb: &mut GapBuffer, s: &str) {
1699        for (idx, ch) in s.chars().enumerate() {
1700            gb.insert_char(idx + 4, ch);
1701        }
1702    }
1703
1704    fn _insert_str(gb: &mut GapBuffer, s: &str) {
1705        gb.insert_str(4, s);
1706    }
1707
1708    #[test_case(_insert_chars; "individual chars")]
1709    #[test_case(_insert_str; "whole string")]
1710    #[test]
1711    fn insert_with_multibyte_chars_preserves_line_endings(insert: fn(&mut GapBuffer, &str)) {
1712        let slice_str = |gb: &GapBuffer| gb.slice(0, gb.len_chars()).to_string();
1713
1714        let mut gb = GapBuffer::from("foo\nbar\nbaz\n");
1715        let s = "世\n界 🦊\n ";
1716
1717        insert(&mut gb, s);
1718
1719        assert_eq!(slice_str(&gb), "foo\n世\n界 🦊\n bar\nbaz\n");
1720
1721        assert_eq!(gb.char(8), '🦊');
1722        gb.remove_char(8);
1723
1724        assert_eq!(slice_str(&gb), "foo\n世\n界 \n bar\nbaz\n");
1725    }
1726
1727    #[test]
1728    fn char_works_with_multibyte_characters() {
1729        let s = "世\n界 🦊\n ";
1730        let gb = GapBuffer::from(s);
1731
1732        for (idx, ch) in s.chars().enumerate() {
1733            assert_eq!(gb.char(idx), ch);
1734        }
1735    }
1736}