ad_editor/buffer/
internal.rs

1//! Internal data structures and helpers for maintaining buffer state
2//!
3//! ### References
4//! - https://www.cs.unm.edu/~crowley/papers/sds.pdf
5//! - http://doc.cat-v.org/plan_9/4th_edition/papers/sam/
6//! - https://www.averylaird.com/programming/piece-table/2018/05/10/insertions-piece-table
7//! - https://www.staff.city.ac.uk/~ross/papers/FingerTree.pdf
8//! - https://nullprogram.com/blog/2017/09/07/
9//! - https://coredumped.dev/2023/08/09/text-showdown-gap-buffers-vs-ropes/
10//! - https://code.visualstudio.com/blogs/2018/03/23/text-buffer-reimplementation
11use std::{
12    cmp::{max, min, Ordering},
13    collections::BTreeMap,
14    fmt,
15};
16
17// The internal data is [u8] so the values here are in terms of bytes
18
19const MIN_GAP: usize = 32;
20const MIN_GAP_GROW: usize = 64;
21const MAX_GAP_GROW: usize = 1024 * 8;
22
23/// For a given buffer length, calculate the new size of the gap we need when reallocating.
24/// This is set to 5% of the length of our data buffer but bounded by MIN_GAP and MAX_GAP.
25#[inline]
26fn clamp_gap_size(len: usize, min_gap: usize) -> usize {
27    min(max(len / 20, min_gap), MAX_GAP_GROW)
28}
29
30#[inline]
31fn count_chars(bytes: &[u8]) -> usize {
32    if bytes.is_empty() {
33        return 0;
34    }
35
36    let mut n_chars = 0;
37    let mut cur = 0;
38
39    while cur < bytes.len() {
40        // SAFETY: we know we are in bounds and that we contain valid utf-8 data
41        let ch = unsafe { decode_char_at(cur, bytes) };
42        cur += ch.len_utf8();
43        n_chars += 1;
44    }
45
46    n_chars
47}
48
49type ByteOffset = usize;
50type CharOffset = usize;
51
52/// An implementation of a gap buffer that tracks internal meta-data to help with accessing
53/// sub-regions of the text such as character ranges and lines.
54#[derive(Default, Debug, Clone, PartialEq, Eq)]
55pub struct GapBuffer {
56    /// the raw data being stored (both buffer content and the gap)
57    data: Box<[u8]>,
58    /// current size of the allocation for data
59    cap: usize,
60    /// byte offset to the first character in the gap
61    gap_start: usize,
62    /// byte offset to the last character in the gap
63    gap_end: usize,
64    /// size in bytes for the next gap when re-allocating
65    next_gap: usize,
66    /// line ending byte offset -> char offset
67    line_endings: BTreeMap<ByteOffset, CharOffset>,
68    /// total number of characters in the buffer
69    /// this is != line_endings.last() if there is no trailing newline
70    n_chars: usize,
71}
72
73fn compute_line_endings(s: &str) -> (usize, BTreeMap<ByteOffset, CharOffset>) {
74    let mut n_chars = 0;
75    let mut line_endings = BTreeMap::new();
76
77    for (line_chars, (idx, ch)) in s.char_indices().enumerate() {
78        n_chars += 1;
79        if ch == '\n' {
80            line_endings.insert(idx, line_chars);
81        }
82    }
83
84    (n_chars, line_endings)
85}
86
87impl From<String> for GapBuffer {
88    fn from(s: String) -> Self {
89        let gap_start = s.len();
90        let next_gap = clamp_gap_size(gap_start, MIN_GAP_GROW);
91        let cap = gap_start + next_gap;
92        let (n_chars, line_endings) = compute_line_endings(&s);
93        let mut v = s.into_bytes();
94        v.resize(cap, 0);
95
96        let mut gb = Self {
97            data: v.into_boxed_slice(),
98            cap,
99            gap_start,
100            gap_end: cap,
101            next_gap,
102            n_chars,
103            line_endings,
104        };
105
106        gb.move_gap_to(0);
107        gb
108    }
109}
110
111impl From<&str> for GapBuffer {
112    fn from(s: &str) -> Self {
113        let gap_start = s.len();
114        let next_gap = clamp_gap_size(gap_start, MIN_GAP_GROW);
115        let cap = gap_start + next_gap;
116        let (n_chars, line_endings) = compute_line_endings(s);
117        let mut v = Vec::with_capacity(cap);
118        v.extend_from_slice(s.as_bytes());
119        v.resize(cap, 0);
120
121        let mut gb = Self {
122            data: v.into_boxed_slice(),
123            cap,
124            gap_start,
125            gap_end: cap,
126            next_gap,
127            n_chars,
128            line_endings,
129        };
130
131        gb.move_gap_to(0);
132        gb
133    }
134}
135
136impl fmt::Display for GapBuffer {
137    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
138        match String::from_utf8(self.bytes()) {
139            Ok(s) => write!(f, "{s}"),
140            Err(_) => Err(fmt::Error),
141        }
142    }
143}
144
145/// One of the most common "hard to find" bugs I encounter around the GapBuffer is detecting
146/// when and where the tracking of line endings becomes corrupted. This macro is called at
147/// points where the line_endings map is modified guarded by #[cfg(test)] so that it does not
148/// affect the performance of the editor when it is in use. It is also called in situations
149/// where we are already panicing in order to check to see if the reason for the panic was
150/// because there is a bug around line endings that the current test suite didn't catch.
151macro_rules! assert_line_endings {
152    ($self:expr) => {{
153        let true_endings: Vec<usize> = $self
154            .data
155            .iter()
156            .enumerate()
157            .filter(|&(i, &b)| b == b'\n' && (i < $self.gap_start || i >= $self.gap_end))
158            .map(|(i, _)| i)
159            .collect();
160        let tracked_line_endings: Vec<usize> = $self.line_endings.keys().copied().collect();
161
162        assert_eq!(
163            tracked_line_endings, true_endings,
164            "incorrect byte positions for line endings with gap_start={} gap_end={}",
165            $self.gap_start, $self.gap_end
166        );
167
168        let true_endings: Vec<usize> = $self
169            .to_string()
170            .chars()
171            .enumerate()
172            .filter(|&(_, c)| c == '\n')
173            .map(|(i, _)| i)
174            .collect();
175        let tracked_line_endings: Vec<usize> = $self.line_endings.values().copied().collect();
176
177        assert_eq!(
178            tracked_line_endings, true_endings,
179            "incorrect character positions for line endings with gap_start={} gap_end={}",
180            $self.gap_start, $self.gap_end
181        );
182    }};
183}
184
185impl GapBuffer {
186    /// Number of bytes in the gap
187    #[inline]
188    fn gap(&self) -> usize {
189        self.gap_end - self.gap_start
190    }
191
192    /// The current length of "active" data in the buffer (i.e. not including the gap)
193    #[inline]
194    pub fn len(&self) -> usize {
195        self.cap - self.gap()
196    }
197
198    /// Whether or not the visible buffer contents are empty or not.
199    /// This can return true while there is "deleted" data in the gap.
200    #[inline]
201    pub fn is_empty(&self) -> bool {
202        self.cap == self.gap()
203    }
204
205    /// The raw content of the buffer
206    pub fn bytes(&self) -> Vec<u8> {
207        let mut v = Vec::with_capacity(self.len());
208        v.extend(&self.data[..self.gap_start]);
209        v.extend(&self.data[self.gap_end..]);
210
211        v
212    }
213
214    /// Iterate over the lines of the buffer
215    pub fn iter_lines(&self) -> impl Iterator<Item = Slice<'_>> {
216        let mut line_idx = 0;
217
218        std::iter::from_fn(move || {
219            if line_idx == self.len_lines() {
220                return None;
221            }
222            let slice = self.line(line_idx);
223            line_idx += 1;
224
225            Some(slice)
226        })
227    }
228
229    /// The number of lines within the buffer
230    #[inline]
231    pub fn len_lines(&self) -> usize {
232        match self.line_endings.last_key_value() {
233            Some((&raw_idx, _)) => {
234                let n = self.line_endings.len();
235                let byte_idx = if raw_idx > self.gap_start {
236                    raw_idx - self.gap_end
237                } else {
238                    raw_idx
239                };
240
241                if byte_idx < self.len() {
242                    n + 1
243                } else {
244                    n
245                }
246            }
247
248            None => 1,
249        }
250    }
251
252    /// The number of characters in the buffer
253    #[inline]
254    pub fn len_chars(&self) -> usize {
255        self.n_chars
256    }
257
258    /// Clear the contents of the buffer.
259    ///
260    /// # Note
261    /// This does not actually zero out the data currently within the buffer or truncate the
262    /// allocation in any way. It simply resets internal state so that it behaves like an empty
263    /// initial buffer.
264    pub fn clear(&mut self) {
265        self.move_gap_to(0);
266        self.gap_end = self.cap;
267        self.line_endings.clear();
268        self.n_chars = 0;
269
270        #[cfg(test)]
271        assert_line_endings!(self);
272    }
273
274    /// The character at the specified character index.
275    ///
276    /// # Panics
277    /// This method will panic if the given character index is out of bounds
278    #[inline]
279    pub fn char(&self, char_idx: usize) -> char {
280        let byte_idx = self.char_to_raw_byte(char_idx);
281
282        // SAFETY: we know that we have valid utf8 data internally
283        unsafe { decode_char_at(byte_idx, &self.data) }
284    }
285
286    /// The character at the specified character index.
287    #[inline]
288    pub fn get_char(&self, char_idx: usize) -> Option<char> {
289        if char_idx < self.n_chars {
290            Some(self.char(char_idx))
291        } else {
292            None
293        }
294    }
295
296    #[inline]
297    fn char_len(&self, byte_idx: usize) -> usize {
298        // SAFETY: we know that we have valid utf8 data internally
299        unsafe { decode_char_at(byte_idx, &self.data) }.len_utf8()
300    }
301
302    /// The requested line as a [Slice].
303    ///
304    /// # Panics
305    /// This method will panic if the given line index is out of bounds
306    #[inline]
307    pub fn line(&self, line_idx: usize) -> Slice<'_> {
308        if line_idx >= self.len_lines() {
309            assert_line_endings!(self);
310            panic!(
311                "line index was {line_idx} but buffer has {} lines",
312                self.len_lines()
313            )
314        }
315
316        let to = match self.line_endings.iter().nth(line_idx) {
317            Some((&idx, _)) => idx + self.char_len(idx),
318            None => self.cap,
319        };
320        let from = if line_idx == 0 {
321            0
322        } else {
323            let idx = *self.line_endings.iter().nth(line_idx - 1).unwrap().0;
324            idx + 1
325        };
326
327        Slice::from_raw_offsets(from, to, self)
328    }
329
330    /// The number of characters in the requested line.
331    ///
332    /// # Panics
333    /// This method will panic if the given line index is out of bounds
334    #[inline]
335    pub fn line_len_chars(&self, line_idx: usize) -> usize {
336        if line_idx >= self.len_lines() {
337            assert_line_endings!(self);
338            panic!(
339                "line index was {line_idx} but buffer has {} lines",
340                self.len_lines()
341            )
342        }
343
344        let chars_to = match self.line_endings.iter().nth(line_idx) {
345            Some((_, &char_idx)) => char_idx + 1,
346            None if line_idx == 0 => return self.n_chars,
347            None => self.n_chars,
348        };
349
350        let chars_from = if line_idx == 0 {
351            0
352        } else {
353            *self.line_endings.iter().nth(line_idx - 1).unwrap().1 + 1
354        };
355
356        chars_to - chars_from
357    }
358
359    /// An exclusive range of characters from the buffer
360    pub fn slice(&self, char_from: usize, char_to: usize) -> Slice<'_> {
361        let from = self.char_to_raw_byte(char_from);
362        let to = self.offset_char_to_raw_byte(char_to, from, char_from);
363
364        Slice::from_raw_offsets(from, to, self)
365    }
366
367    fn chars_in_raw_range(&self, raw_from: usize, raw_to: usize) -> usize {
368        if raw_to <= self.gap_start || raw_from >= self.gap_end {
369            count_chars(&self.data[raw_from..raw_to])
370        } else {
371            count_chars(&self.data[raw_from..self.gap_start])
372                + count_chars(&self.data[self.gap_end..raw_to])
373        }
374    }
375
376    /// Convert a byte index to a character index
377    pub fn byte_to_char(&self, byte_idx: usize) -> usize {
378        self.chars_in_raw_range(0, byte_idx)
379    }
380
381    /// Convert a character index to the index of the line containing it
382    ///
383    /// # Panics
384    /// This method will panic if the given char index is out of bounds
385    pub fn char_to_line(&self, char_idx: usize) -> usize {
386        match self.try_char_to_line(char_idx) {
387            Some(line_idx) => line_idx,
388            None => {
389                assert_line_endings!(self);
390                panic!(
391                    "char index was {char_idx} but the buffer char length is {}",
392                    self.len_chars()
393                );
394            }
395        }
396    }
397
398    /// Convert a character index to the index of the line containing it
399    pub fn try_char_to_line(&self, char_idx: usize) -> Option<usize> {
400        match char_idx.cmp(&self.n_chars) {
401            Ordering::Less => {
402                for (i, &char_offset) in self.line_endings.values().enumerate() {
403                    if char_idx <= char_offset {
404                        return Some(i);
405                    }
406                }
407                Some(self.len_lines() - 1)
408            }
409
410            // We allow setting the cursor to the end of the buffer for inserts
411            Ordering::Equal => Some(self.len_lines() - 1),
412
413            Ordering::Greater => None,
414        }
415    }
416
417    /// Convert a line index to the character index of its first character
418    ///
419    /// # Panics
420    /// This method will panic if the given char index is out of bounds
421    pub fn line_to_char(&self, line_idx: usize) -> usize {
422        match self.try_line_to_char(line_idx) {
423            Some(char_idx) => char_idx,
424            None => {
425                assert_line_endings!(self);
426                panic!(
427                    "line index was {line_idx} but the buffer has {} lines",
428                    self.len_lines()
429                );
430            }
431        }
432    }
433
434    /// Convert a line index to the character index of its first character
435    pub fn try_line_to_char(&self, line_idx: usize) -> Option<usize> {
436        if line_idx > self.len_lines() - 1 {
437            return None;
438        }
439
440        if line_idx == 0 {
441            Some(0)
442        } else {
443            let k = *self.line_endings.iter().nth(line_idx - 1).unwrap().1;
444            Some(k + 1)
445        }
446    }
447
448    /// Insert a single character at the specifified byte index.
449    ///
450    /// This is O(1) if idx is at the current gap start and the gap is large enough to accomodate
451    /// the new text, otherwise data will need to be copied in order to relocate the gap.
452    pub fn insert_char(&mut self, char_idx: usize, ch: char) {
453        let len = ch.len_utf8();
454        if self.gap().saturating_sub(len) < MIN_GAP {
455            self.grow_gap(len);
456        }
457
458        let idx = self.char_to_byte(char_idx);
459        self.move_gap_to(idx);
460
461        ch.encode_utf8(&mut self.data[self.gap_start..]);
462        self.gap_start += len;
463        self.n_chars += 1;
464
465        if ch == '\n' {
466            self.line_endings.insert(idx, char_idx);
467        }
468
469        self.update_line_endings(|(&bidx, &cidx)| {
470            if bidx > idx {
471                (bidx, cidx + 1)
472            } else {
473                (bidx, cidx)
474            }
475        });
476
477        #[cfg(test)]
478        assert_line_endings!(self);
479    }
480
481    /// Insert a string at the specifified byte index.
482    ///
483    /// This is O(1) if idx is at the current gap start and the gap is large enough to accomodate
484    /// the new text, otherwise data will need to be copied in order to relocate the gap.
485    pub fn insert_str(&mut self, char_idx: usize, s: &str) {
486        let len = s.len();
487        let len_chars = s.chars().count();
488        if self.gap().saturating_sub(len) < MIN_GAP {
489            self.grow_gap(len);
490        }
491
492        let idx = self.char_to_byte(char_idx);
493        self.move_gap_to(idx);
494
495        self.data[self.gap_start..self.gap_start + len].copy_from_slice(s.as_bytes());
496        self.gap_start += len;
497        self.n_chars += s.chars().count();
498
499        for (i, (offset, ch)) in s.char_indices().enumerate() {
500            if ch == '\n' {
501                self.line_endings.insert(idx + offset, char_idx + i);
502            }
503        }
504
505        self.update_line_endings(|(&bidx, &cidx)| {
506            if bidx >= idx + len {
507                (bidx, cidx + len_chars)
508            } else {
509                (bidx, cidx)
510            }
511        });
512
513        #[cfg(test)]
514        assert_line_endings!(self);
515    }
516
517    /// Remove the requested character index from the visible region of the buffer
518    pub fn remove_char(&mut self, char_idx: usize) {
519        let idx = self.char_to_byte(char_idx);
520        let len = self.char_len(self.char_to_raw_byte(char_idx));
521
522        if idx != self.gap_start {
523            self.move_gap_to(idx);
524        }
525
526        self.gap_end += len;
527        self.n_chars -= 1;
528
529        if self.data[self.gap_end - 1] == b'\n' {
530            self.line_endings.remove(&(self.gap_end - 1));
531        }
532
533        for (_, count) in self.line_endings.iter_mut() {
534            if *count >= char_idx {
535                *count -= 1;
536            }
537        }
538
539        #[cfg(test)]
540        assert_line_endings!(self);
541    }
542
543    /// Remove the requested range (from..to) from the visible region of the buffer.
544    ///
545    /// # Panics
546    /// This method will panic if `char_from < char_to`
547    pub fn remove_range(&mut self, char_from: usize, char_to: usize) {
548        if char_from == char_to {
549            return;
550        }
551
552        assert!(
553            char_from < char_to,
554            "invalid range: from={char_from} > to={char_to}"
555        );
556
557        let raw_from = self.char_to_raw_byte(char_from);
558        let from = self.raw_byte_to_byte(raw_from);
559        let to = self.offset_char_to_byte(char_to, raw_from, char_from);
560        debug_assert!(from < to, "invalid byte range: from={from} > to={to}");
561        self.move_gap_to(from);
562
563        let n_bytes = to - from;
564        let n_chars = char_to - char_from;
565
566        self.gap_end += n_bytes;
567        self.n_chars -= n_chars;
568
569        self.line_endings
570            .retain(|idx, _| !((self.gap_end - n_bytes)..(self.gap_end)).contains(idx));
571
572        for (_, count) in self.line_endings.iter_mut() {
573            if *count >= char_to {
574                *count -= n_chars;
575            } else if *count > char_from {
576                *count = char_from;
577            }
578        }
579
580        #[cfg(test)]
581        assert_line_endings!(self);
582    }
583
584    /// BTreeMap doesn't support iter_mut with mutable keys so we need to map over the existing
585    /// line endings and collect into a new map.
586    fn update_line_endings<F>(&mut self, f: F)
587    where
588        F: Fn((&usize, &usize)) -> (usize, usize),
589    {
590        self.line_endings = self.line_endings.iter().map(f).collect();
591    }
592
593    fn grow_gap(&mut self, n: usize) {
594        if n >= self.next_gap {
595            self.next_gap = clamp_gap_size(self.len() + n, n.next_power_of_two());
596        }
597
598        let gap_increase = self.next_gap + n - self.gap();
599        let cap = self.cap + self.next_gap + n;
600        let mut buf = Vec::with_capacity(cap);
601
602        buf.extend_from_slice(&self.data[..self.gap_start]); // data to gap
603        buf.resize(buf.len() + self.next_gap + n, 0); // the new gap (zeroed)
604        buf.extend_from_slice(&self.data[self.gap_end..]); // data after gap
605
606        let start = self.gap_start;
607        self.update_line_endings(|(&bidx, &cidx)| {
608            if bidx > start {
609                (bidx + gap_increase, cidx)
610            } else {
611                (bidx, cidx)
612            }
613        });
614
615        self.next_gap = clamp_gap_size(self.len(), self.next_gap * 2);
616        self.data = buf.into_boxed_slice();
617        self.gap_end += gap_increase;
618        self.cap = cap;
619
620        #[cfg(test)]
621        assert_line_endings!(self);
622    }
623
624    /// The byte_idx argument here is an absolute position within the "live" buffer which will mark
625    /// the first byte of the gap region following the move.
626    ///
627    /// We do not require that the data within the gap region is valid utf-8 so it is fine for this
628    /// offset to land in the middle of existing multi-byte characters so long as the regions
629    /// outside of the gap stay valid utf-8.
630    ///
631    /// # Panics
632    /// This method will panic if the given index is out of bounds
633    fn move_gap_to(&mut self, byte_idx: usize) {
634        // we need space to fit the current gap size
635        assert!(
636            byte_idx <= self.len(),
637            "index out of bounds: {byte_idx} > {}",
638            self.len()
639        );
640
641        let gap = self.gap();
642
643        let (src, dest) = match byte_idx.cmp(&self.gap_start) {
644            Ordering::Equal => return,
645
646            // Gap moving left
647            Ordering::Less => {
648                let start = self.gap_start;
649                self.update_line_endings(|(&bidx, &cidx)| {
650                    if bidx >= byte_idx && bidx <= start {
651                        (bidx + gap, cidx)
652                    } else {
653                        (bidx, cidx)
654                    }
655                });
656
657                (byte_idx..self.gap_start, byte_idx + gap)
658            }
659
660            // Gap moving right
661            Ordering::Greater => {
662                let end = self.gap_end;
663                self.update_line_endings(|(&bidx, &cidx)| {
664                    if bidx >= end && bidx < byte_idx + gap {
665                        (bidx - gap, cidx)
666                    } else {
667                        (bidx, cidx)
668                    }
669                });
670
671                (self.gap_end..byte_idx + gap, self.gap_start)
672            }
673        };
674
675        self.data.copy_within(src, dest);
676        self.gap_end = byte_idx + gap;
677        self.gap_start = byte_idx;
678
679        #[cfg(test)]
680        assert_line_endings!(self);
681    }
682
683    /// Convert a character offset within the logical buffer to a byte offset
684    /// within the logical buffer. This is used to account for multi-byte characters
685    /// within the buffer and is treated as a String-like index but it does not
686    /// account for the position of the gap.
687    #[inline]
688    fn char_to_byte(&self, char_idx: usize) -> usize {
689        self.offset_char_to_byte(char_idx, 0, 0)
690    }
691
692    #[inline]
693    fn raw_byte_to_byte(&self, raw: usize) -> usize {
694        if raw > self.gap_start {
695            raw - self.gap()
696        } else {
697            raw
698        }
699    }
700
701    #[inline]
702    fn offset_char_to_byte(
703        &self,
704        char_idx: usize,
705        byte_offset: usize,
706        char_offset: usize,
707    ) -> usize {
708        let raw = self.offset_char_to_raw_byte(char_idx, byte_offset, char_offset);
709
710        self.raw_byte_to_byte(raw)
711    }
712
713    /// Convert a character offset within the logical buffer to a raw byte offset
714    /// within the underlying allocation we maintain. This is an absolute index
715    /// into our allocated array that accounts for the position of the gap.
716    #[inline]
717    fn char_to_raw_byte(&self, char_idx: usize) -> usize {
718        self.offset_char_to_raw_byte(char_idx, 0, 0)
719    }
720
721    /// Allow for skipping to a given byte index before starting the search as an optimisation when we
722    /// are searching for multiple positions in sequence (e.g. the start and end of a range).
723    fn offset_char_to_raw_byte(
724        &self,
725        char_idx: usize,
726        mut byte_offset: usize,
727        mut char_offset: usize,
728    ) -> usize {
729        let mut to = usize::MAX;
730
731        for (&b, &c) in self
732            .line_endings
733            .iter()
734            .skip_while(move |(b, _)| **b < byte_offset)
735        {
736            match c.cmp(&char_idx) {
737                Ordering::Less => (byte_offset, char_offset) = (b, c),
738                Ordering::Equal => return b,
739                Ordering::Greater => {
740                    to = b;
741                    break;
742                }
743            }
744        }
745
746        let slice = Slice::from_raw_offsets(byte_offset, to, self);
747        let mut chars = slice.chars();
748        let mut cur = byte_offset;
749        for _ in 0..(char_idx - char_offset) {
750            chars.next();
751            cur = chars.cur + byte_offset;
752        }
753
754        if cur > self.gap_start && cur < self.gap_end {
755            cur += self.gap()
756        }
757
758        cur
759    }
760}
761
762/// A view on a region of the GapBuffer.
763///
764/// Slices will become invalidated if the gap is moved from the position they were created with
765#[derive(Default, Debug, Copy, Clone, PartialEq, Eq)]
766pub struct Slice<'a> {
767    from: usize,
768    left: &'a [u8],
769    right: &'a [u8],
770}
771
772impl<'a> Slice<'a> {
773    #[inline]
774    fn from_raw_offsets(from: usize, to: usize, gb: &'a GapBuffer) -> Slice<'a> {
775        let to = min(to, gb.data.len());
776
777        if to <= gb.gap_start || from >= gb.gap_end {
778            return Slice {
779                from,
780                left: &gb.data[from..to],
781                right: &[],
782            };
783        }
784
785        debug_assert!(from <= gb.gap_start, "line offset sits in gap");
786
787        Slice {
788            from,
789            left: &gb.data[from..gb.gap_start],
790            right: &gb.data[gb.gap_end..to],
791        }
792    }
793
794    /// The two sides of this slice as &str references
795    pub fn as_strs(&self) -> (&str, &str) {
796        // SAFETY: we know that we have valid utf8 data internally
797        unsafe {
798            (
799                std::str::from_utf8_unchecked(self.left),
800                std::str::from_utf8_unchecked(self.right),
801            )
802        }
803    }
804
805    /// Iterate over the characters in this slice
806    pub fn chars(self) -> Chars<'a> {
807        Chars { s: self, cur: 0 }
808    }
809
810    /// Iterate over the characters in this slice with their corresponding character indices
811    pub fn indexed_chars(self, from: usize, rev: bool) -> IdxChars<'a> {
812        let (cur, idx) = if rev {
813            (
814                self.left.len() + self.right.len(),
815                from + count_chars(self.left) + count_chars(self.right),
816            )
817        } else {
818            (0, from)
819        };
820
821        IdxChars {
822            s: self,
823            cur,
824            idx,
825            rev,
826        }
827    }
828
829    fn cur_and_data(&self, cur: usize) -> (usize, &[u8]) {
830        if cur < self.left.len() {
831            (cur, self.left)
832        } else {
833            (cur - self.left.len(), self.right)
834        }
835    }
836}
837
838impl<'a> fmt::Display for Slice<'a> {
839    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
840        let mut v = Vec::with_capacity(self.left.len() + self.right.len());
841        v.extend_from_slice(self.left);
842        v.extend_from_slice(self.right);
843
844        match String::from_utf8(v) {
845            Ok(s) => write!(f, "{s}"),
846            Err(_) => Err(fmt::Error),
847        }
848    }
849}
850
851impl<'a, 'b> PartialEq<&'b str> for Slice<'a> {
852    fn eq(&self, other: &&'b str) -> bool {
853        let b = other.as_bytes();
854
855        &b[..self.left.len()] == self.left && &b[self.left.len()..] == self.right
856    }
857}
858
859impl<'a> PartialEq<String> for Slice<'a> {
860    fn eq(&self, other: &String) -> bool {
861        *self == other.as_str()
862    }
863}
864
865/// An iterator of characters from a [Slice]
866#[derive(Default, Debug, Copy, Clone, PartialEq, Eq)]
867pub struct Chars<'a> {
868    s: Slice<'a>,
869    cur: usize,
870}
871
872impl<'a> Iterator for Chars<'a> {
873    type Item = char;
874
875    fn next(&mut self) -> Option<Self::Item> {
876        if self.cur >= self.s.left.len() + self.s.right.len() {
877            return None;
878        }
879
880        let (cur, data) = self.s.cur_and_data(self.cur);
881        // SAFETY: we know we are in bounds and that we contain valid utf-8 data
882        let ch = unsafe { decode_char_at(cur, data) };
883        let len = ch.len_utf8();
884        self.cur += len;
885        Some(ch)
886    }
887}
888
889/// An iterator of characters and their indices from a [Slice]
890#[derive(Default, Debug, Copy, Clone, PartialEq, Eq)]
891pub struct IdxChars<'a> {
892    s: Slice<'a>,
893    cur: usize,
894    idx: usize,
895    rev: bool,
896}
897
898impl<'a> Iterator for IdxChars<'a> {
899    type Item = (usize, char);
900
901    fn next(&mut self) -> Option<Self::Item> {
902        if (!self.rev && self.cur >= self.s.left.len() + self.s.right.len())
903            || (self.rev && self.cur == 0)
904        {
905            return None;
906        }
907
908        if self.rev {
909            let (cur, data) = self.s.cur_and_data(self.cur - 1);
910            // SAFETY: we know we are in bounds and that we contain valid utf-8 data
911            let ch = unsafe { decode_char_ending_at(cur, data) };
912            let len = ch.len_utf8();
913            self.idx -= 1;
914            self.cur -= len;
915            Some((self.idx, ch))
916        } else {
917            let (cur, data) = self.s.cur_and_data(self.cur);
918            // SAFETY: we know we are in bounds and that we contain valid utf-8 data
919            let ch = unsafe { decode_char_at(cur, data) };
920            let len = ch.len_utf8();
921            let res = Some((self.idx, ch));
922            self.cur += len;
923            self.idx += 1;
924            res
925        }
926    }
927}
928
929// The following helper functions are adapted from nightly APIs in std::core::str
930// -> https://doc.rust-lang.org/stable/src/core/str/validations.rs.html
931
932/// Mask of the value bits of a continuation byte.
933const CONT_MASK: u8 = 0b0011_1111;
934
935/// Returns the initial codepoint accumulator for the first byte.
936/// The first byte is special, only want bottom 5 bits for width 2, 4 bits
937/// for width 3, and 3 bits for width 4.
938#[inline]
939const fn utf8_first_byte(byte: u8, width: u32) -> u32 {
940    (byte & (0x7F >> width)) as u32
941}
942
943/// Returns the value of `ch` updated with continuation byte `byte`.
944#[inline]
945const fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 {
946    (ch << 6) | (byte & CONT_MASK) as u32
947}
948
949/// Checks whether the byte is a UTF-8 continuation byte (i.e., starts with the
950/// bits `10`).
951#[inline]
952const fn utf8_is_cont_byte(byte: u8) -> bool {
953    (byte as i8) < -64
954}
955
956/// Decode a utf-8 code point from `bytes` starting at `start`.
957/// `bytes` must contain valid utf-8 data beginning at `start`
958#[inline]
959unsafe fn decode_char_at(start: usize, bytes: &[u8]) -> char {
960    // Decode UTF-8
961    // SAFETY: `bytes` contains UTF-8-like string data so we have the next byte,
962    let x = bytes[start];
963    if x < 128 {
964        return char::from_u32_unchecked(x as u32);
965    }
966
967    // Multibyte case follows
968    // Decode from a byte combination out of: [[[x y] z] w]
969    // NOTE: Performance is sensitive to the exact formulation here
970    let init = utf8_first_byte(x, 2);
971    // SAFETY: `bytes` contains UTF-8-like string data so we have the next byte,
972    let y = bytes[start + 1];
973    let mut ch = utf8_acc_cont_byte(init, y);
974
975    if x >= 0xE0 {
976        // [[x y z] w] case
977        // 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid
978        // SAFETY: `bytes` contains UTF-8-like string data so we have the next byte,
979        let z = bytes[start + 2];
980        let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z);
981        ch = init << 12 | y_z;
982        if x >= 0xF0 {
983            // [x y z w] case
984            // use only the lower 3 bits of `init`
985            // SAFETY: `bytes` contains UTF-8-like string data so we have the next byte,
986            let w = bytes[start + 3];
987            ch = (init & 7) << 18 | utf8_acc_cont_byte(y_z, w);
988        }
989    }
990
991    char::from_u32_unchecked(ch)
992}
993
994/// Decode a utf-8 code point from `bytes` ending at `end`.
995/// `bytes` must contain valid utf-8 data ending at `end`
996#[inline]
997unsafe fn decode_char_ending_at(end: usize, bytes: &[u8]) -> char {
998    // Decode UTF-8
999    let w = match bytes[end] {
1000        b if b < 128 => return char::from_u32_unchecked(b as u32),
1001        b => b,
1002    };
1003
1004    // Multibyte case follows
1005    // Decode from a byte combination out of: [x [y [z w]]]
1006    let mut ch;
1007    // SAFETY: `bytes` contains UTF-8-like string data so we have the next byte,
1008    let z = bytes[end - 1];
1009    ch = utf8_first_byte(z, 2);
1010    if utf8_is_cont_byte(z) {
1011        // SAFETY: `bytes` contains UTF-8-like string data so we have the next byte,
1012        let y = bytes[end - 2];
1013        ch = utf8_first_byte(y, 3);
1014        if utf8_is_cont_byte(y) {
1015            // SAFETY: `bytes` contains UTF-8-like string data so we have the next byte,
1016            let x = bytes[end - 3];
1017            ch = utf8_first_byte(x, 4);
1018            ch = utf8_acc_cont_byte(ch, y);
1019        }
1020        ch = utf8_acc_cont_byte(ch, z);
1021    }
1022    ch = utf8_acc_cont_byte(ch, w);
1023
1024    char::from_u32_unchecked(ch)
1025}
1026
1027#[cfg(test)]
1028mod tests {
1029    use super::*;
1030    use simple_test_case::test_case;
1031
1032    fn debug_buffer_content(gb: &GapBuffer) -> String {
1033        let mut v = gb.data.to_vec();
1034        v[gb.gap_start..gb.gap_end].copy_from_slice("_".repeat(gb.gap()).as_bytes());
1035        String::from_utf8(v).expect("valid utf8")
1036    }
1037
1038    fn raw_debug_buffer_content(gb: &GapBuffer) -> String {
1039        let mut v = gb.data.to_vec();
1040        for b in v[gb.gap_start..gb.gap_end].iter_mut() {
1041            if *b == b'\0' {
1042                *b = b'_';
1043            }
1044        }
1045        v.insert(gb.gap_end, b']');
1046        v.insert(gb.gap_start, b'[');
1047
1048        String::from_utf8(v).expect("valid utf8")
1049    }
1050
1051    #[test]
1052    fn from_string_matches_from_str() {
1053        let s = "this is a test";
1054        let gb1 = GapBuffer::from(s.to_string());
1055        let gb2 = GapBuffer::from(s);
1056
1057        assert_eq!(gb1, gb2);
1058    }
1059
1060    #[test]
1061    fn to_string_works() {
1062        let s = "this is a test";
1063        let gb = GapBuffer::from(s.to_string());
1064        assert_eq!(gb.to_string(), s);
1065    }
1066
1067    #[test]
1068    fn insert_into_empty_string_initial_gb_works() {
1069        let mut gb = GapBuffer::from(String::new());
1070        gb.insert_char(0, 'a');
1071        assert_eq!(gb.to_string(), "a");
1072    }
1073
1074    #[test_case("foo│foo│foo"; "interleaved multibyte and ascii")]
1075    #[test_case("hello, 世界!"; "blocks of multibyte and ascii")]
1076    #[test_case("hello, world!"; "just ascii")]
1077    #[test]
1078    fn len_chars_works(s: &str) {
1079        let mut gb = GapBuffer::from(s);
1080        let len_s = s.chars().count();
1081
1082        println!("initial:          {:?}", raw_debug_buffer_content(&gb));
1083        assert_eq!(gb.len_chars(), len_s);
1084        assert_eq!(
1085            gb.len_chars(),
1086            gb.to_string().chars().count(),
1087            "char iter len != len_chars"
1088        );
1089        assert_eq!(
1090            gb.line(0).chars().count(),
1091            gb.line_len_chars(0),
1092            "line_len_chars != len_chars"
1093        );
1094
1095        gb.insert_char(5, 'X');
1096        println!("after insert X:   {:?}", raw_debug_buffer_content(&gb));
1097        assert_eq!(gb.len_chars(), len_s + 1);
1098        assert_eq!(
1099            gb.len_chars(),
1100            gb.to_string().chars().count(),
1101            "char iter len != len_chars"
1102        );
1103
1104        gb.insert_char(3, '界');
1105        println!("after insert 界:  {:?}", raw_debug_buffer_content(&gb));
1106        assert_eq!(gb.len_chars(), len_s + 2);
1107        assert_eq!(
1108            gb.len_chars(),
1109            gb.to_string().chars().count(),
1110            "char iter len != len_chars"
1111        );
1112
1113        assert_eq!(gb.char(3), '界');
1114        gb.remove_char(3);
1115        println!("after remove 界:  {:?}", raw_debug_buffer_content(&gb));
1116        assert_eq!(gb.len_chars(), len_s + 1);
1117        assert_eq!(
1118            gb.len_chars(),
1119            gb.to_string().chars().count(),
1120            "char iter len != len_chars"
1121        );
1122
1123        assert_eq!(gb.char(5), 'X');
1124        gb.remove_char(5);
1125        println!("after remove X:   {:?}", debug_buffer_content(&gb));
1126        assert_eq!(gb.len_chars(), len_s);
1127        assert_eq!(
1128            gb.len_chars(),
1129            gb.to_string().chars().count(),
1130            "char iter len != len_chars"
1131        );
1132        assert_eq!(gb.to_string(), s);
1133    }
1134
1135    #[test_case("foo│foo│foo"; "interleaved multibyte and ascii")]
1136    #[test_case("hello, 世界!"; "blocks of multibyte and ascii")]
1137    #[test_case("hello, world!"; "just ascii")]
1138    #[test]
1139    fn move_gap_to_maintains_content(s: &str) {
1140        let mut gb = GapBuffer::from(s);
1141
1142        for i in 0..gb.len_chars() {
1143            let idx = gb.char_to_byte(i);
1144            gb.move_gap_to(idx);
1145
1146            // Splitting into the two sections like this allows us to verify that
1147            // we have valid utf-8 encoded text on either side of the gap.
1148            let (s1, s2) = (
1149                std::str::from_utf8(&gb.data[..gb.gap_start]).unwrap(),
1150                std::str::from_utf8(&gb.data[gb.gap_end..]).unwrap(),
1151            );
1152
1153            assert_eq!(format!("{s1}{s2}"), s, "idx={idx}");
1154        }
1155    }
1156
1157    #[test]
1158    fn move_gap_to_maintains_line_content() {
1159        let s = "hello, world!\nhow are you?\nthis is a test";
1160        let mut gb = GapBuffer::from(s);
1161        assert_eq!(gb.len_lines(), 3);
1162
1163        for i in 0..gb.len_chars() {
1164            let idx = gb.char_to_byte(i);
1165            gb.move_gap_to(idx);
1166            assert_eq!(gb.len_lines(), 3);
1167
1168            assert_eq!(gb.line(0).to_string(), "hello, world!\n", "idx={idx}");
1169            assert_eq!(gb.line(1).to_string(), "how are you?\n", "idx={idx}");
1170            assert_eq!(gb.line(2).to_string(), "this is a test", "idx={idx}");
1171        }
1172    }
1173
1174    #[test_case(0, 0, 0; "BOF cur at BOF")]
1175    #[test_case(27, 0, 0; "BOF cur at EOF")]
1176    #[test_case(27, 5, 5; "in the buffer cur at EOF")]
1177    #[test_case(5, 5, 5; "in the buffer cur at gap")]
1178    #[test_case(5, 3, 3; "in the buffer cur before gap")]
1179    #[test_case(5, 11, 15; "in the buffer cur after gap")]
1180    #[test_case(5, 7, 7; "multi byte 1")]
1181    #[test_case(5, 8, 10; "multi byte 2")]
1182    #[test]
1183    fn char_to_byte_works(cur: usize, char_idx: usize, expected: usize) {
1184        let s = "hello, 世界!\nhow are you?";
1185        let mut gb = GapBuffer::from(s);
1186        assert_eq!(s.len(), 27, "EOF case is not 0..s.len()");
1187        assert_eq!("世".len(), 3);
1188        gb.move_gap_to(cur);
1189
1190        let byte_idx = gb.char_to_byte(char_idx);
1191        assert_eq!(byte_idx, expected, "{:?}", debug_buffer_content(&gb));
1192    }
1193
1194    #[test_case(0, 0, 0; "BOF cur at BOF")]
1195    #[test_case(27, 0, 0; "BOF cur at EOF")]
1196    #[test_case(27, 5, 5; "in the buffer cur at EOF")]
1197    #[test_case(5, 5, 5; "in the buffer cur at gap")]
1198    #[test_case(5, 3, 3; "in the buffer cur before gap")]
1199    #[test_case(5, 11, 79; "in the buffer cur after gap")]
1200    #[test_case(5, 7, 71; "multi byte 1")]
1201    #[test_case(5, 8, 74; "multi byte 2")]
1202    #[test]
1203    fn char_to_raw_byte_works(cur: usize, char_idx: usize, expected: usize) {
1204        let s = "hello, 世界!\nhow are you?";
1205        let mut gb = GapBuffer::from(s);
1206        assert_eq!(s.len(), 27, "EOF case is not 0..s.len()");
1207        assert_eq!("世".len(), 3);
1208        gb.move_gap_to(cur);
1209
1210        let char_idx = gb.char_to_raw_byte(char_idx);
1211        assert_eq!(char_idx, expected, "{:?}", debug_buffer_content(&gb));
1212    }
1213
1214    #[test_case(0, 0, "hello, world!\n"; "first line cur at BOF")]
1215    #[test_case(0, 1, "how are you?"; "second line cur at BOF")]
1216    #[test_case(26, 0, "hello, world!\n"; "first line cur at EOF")]
1217    #[test_case(26, 1, "how are you?"; "second line cur at EOF")]
1218    #[test_case(10, 0, "hello, world!\n"; "first line cur in line")]
1219    #[test_case(10, 1, "how are you?"; "second line cur in line")]
1220    #[test]
1221    fn slice_to_string_works(cur: usize, line: usize, expected: &str) {
1222        let mut gb = GapBuffer::from("hello, world!\nhow are you?");
1223        gb.move_gap_to(cur);
1224
1225        assert_eq!(gb.line(line).to_string(), expected);
1226    }
1227
1228    #[test]
1229    fn line_to_char_works() {
1230        let l1 = "hello, world!\n";
1231        let l2 = "how are you?\n";
1232        let l3 = "this is a test";
1233
1234        let gb = GapBuffer::from(format!("{l1}{l2}{l3}"));
1235
1236        assert_eq!(gb.line_to_char(0), 0);
1237        assert_eq!(gb.line_to_char(1), l1.chars().count());
1238        assert_eq!(gb.line_to_char(2), l1.chars().count() + l2.chars().count());
1239    }
1240
1241    #[test_case(0, 0; "start of first line")]
1242    #[test_case(5, 0; "in first line")]
1243    #[test_case(13, 0; "end of first line")]
1244    #[test_case(14, 1; "start of second line")]
1245    #[test_case(20, 1; "in second line")]
1246    #[test_case(26, 1; "end of second line")]
1247    #[test_case(27, 2; "start of third line")]
1248    #[test_case(30, 2; "in third line")]
1249    #[test_case(40, 2; "end of third line")]
1250    #[test]
1251    fn char_to_line_works(char_idx: usize, line_idx: usize) {
1252        let l1 = "hello, world!\n";
1253        let l2 = "how are you?\n";
1254        let l3 = "this is a test";
1255
1256        let gb = GapBuffer::from(format!("{l1}{l2}{l3}"));
1257
1258        assert_eq!(gb.char_to_line(char_idx), line_idx);
1259    }
1260
1261    #[test_case(&[(0, 'h')], "hello world"; "insert front")]
1262    #[test_case(&[(4, ',')], "ello, world"; "insert inner")]
1263    #[test_case(&[(10, '!')], "ello world!"; "insert back")]
1264    #[test_case(&[(4, ','), (11, '!')], "ello, world!"; "insert inner then back")]
1265    #[test_case(&[(4, ','), (0, 'h')], "hello, world"; "insert inner then front")]
1266    #[test_case(&[(0, 'h'), (5, ','),], "hello, world"; "insert front then inner")]
1267    #[test_case(&[(10, '!'), (0, 'h'), (5, ',')], "hello, world!"; "insert all")]
1268    #[test]
1269    fn insert_char(inserts: &[(usize, char)], expected: &str) {
1270        let mut gb = GapBuffer::from("ello world");
1271
1272        for &(idx, ch) in inserts {
1273            gb.insert_char(idx, ch);
1274        }
1275
1276        assert_eq!(gb.to_string(), expected, "{:?}", debug_buffer_content(&gb))
1277    }
1278
1279    #[test]
1280    fn insert_char_with_moving_cur() {
1281        let mut gb = GapBuffer::from("hello ");
1282        gb.insert_char(6, 'w');
1283        gb.insert_char(7, 'o');
1284        gb.insert_char(8, 'r');
1285        gb.insert_char(9, 'l');
1286        gb.insert_char(10, 'd');
1287        gb.insert_char(11, '!');
1288        gb.insert_char(5, ',');
1289
1290        assert_eq!(
1291            gb.to_string(),
1292            "hello, world!",
1293            "{:?}",
1294            debug_buffer_content(&gb)
1295        )
1296    }
1297
1298    #[test]
1299    fn insert_newline_char_is_tracked_correctly() {
1300        let s = "hello, world!\nhow are you?";
1301        let mut gb = GapBuffer::from(s);
1302        assert_eq!(gb.len_lines(), 2);
1303
1304        println!("initial: {:?}", raw_debug_buffer_content(&gb));
1305        gb.insert_char(6, '\n');
1306        println!("insert:  {:?}", raw_debug_buffer_content(&gb));
1307
1308        assert_eq!(gb.len_lines(), 3);
1309        assert_eq!(gb.line(0).to_string(), "hello,\n");
1310        assert_eq!(gb.line(1).to_string(), " world!\n");
1311        assert_eq!(gb.line(2).to_string(), "how are you?");
1312
1313        for idx in 0..=gb.len_chars() {
1314            gb.move_gap_to(idx);
1315            assert_eq!(gb.len_lines(), 3);
1316
1317            assert_eq!(gb.line(0).to_string(), "hello,\n", "idx={idx}");
1318            assert_eq!(gb.line(1).to_string(), " world!\n", "idx={idx}");
1319            assert_eq!(gb.line(2).to_string(), "how are you?", "idx={idx}");
1320        }
1321    }
1322
1323    #[test_case(&[(0, "hell")], "helloworl"; "insert front")]
1324    #[test_case(&[(1, ", ")], "o, worl"; "insert inner")]
1325    #[test_case(&[(5, "d!")], "oworld!"; "insert back")]
1326    #[test_case(&[(5, "d!"), (0, "hell"), (5, ", ")], "hello, world!"; "insert all")]
1327    #[test_case(&[(5, "d!"), (0, "hell"), (5, ",\n")], "hello,\nworld!"; "insert all w newline")]
1328    #[test]
1329    fn insert_str(inserts: &[(usize, &str)], expected: &str) {
1330        let mut gb = GapBuffer::from("oworl");
1331        for &(idx, s) in inserts {
1332            gb.insert_str(idx, s);
1333        }
1334
1335        assert_eq!(gb.to_string(), expected, "{:?}", debug_buffer_content(&gb))
1336    }
1337
1338    #[test]
1339    fn insert_newline_in_str_is_tracked_correctly() {
1340        let s = "hello, world!\nhow are you?";
1341        let mut gb = GapBuffer::from(s);
1342        assert_eq!(gb.len_lines(), 2);
1343
1344        let s2 = " sailor\nisn't this fun?\nwhat a wonderful\n";
1345        gb.insert_str(6, s2);
1346
1347        for idx in 0..=gb.len_chars() {
1348            gb.move_gap_to(idx);
1349            assert_eq!(gb.len_lines(), 5);
1350
1351            assert_eq!(gb.line(0).to_string(), "hello, sailor\n", "idx={idx}");
1352            assert_eq!(gb.line(1).to_string(), "isn't this fun?\n", "idx={idx}");
1353            assert_eq!(gb.line(2).to_string(), "what a wonderful\n", "idx={idx}");
1354            assert_eq!(gb.line(3).to_string(), " world!\n", "idx={idx}");
1355            assert_eq!(gb.line(4).to_string(), "how are you?", "idx={idx}");
1356        }
1357    }
1358
1359    #[test_case(6, "hello,world!"; "at gap start")]
1360    #[test_case(7, "hello, orld!"; "at gap end")]
1361    #[test_case(12, "hello, world"; "after gap")]
1362    #[test_case(0, "ello, world!"; "before gap")]
1363    #[test]
1364    fn remove_char(idx: usize, expected: &str) {
1365        let mut gb = GapBuffer::from("hello, world!");
1366        gb.move_gap_to(6); // space before world
1367        gb.remove_char(idx);
1368
1369        assert_eq!(gb.to_string(), expected, "{:?}", debug_buffer_content(&gb))
1370    }
1371
1372    #[test]
1373    fn remove_newline_char_is_tracked_correctly() {
1374        let s = "hello, world!\nhow are you?";
1375        let mut gb = GapBuffer::from(s);
1376        assert_eq!(gb.len_lines(), 2);
1377
1378        gb.remove_char(13);
1379
1380        assert_eq!(gb.len_lines(), 1);
1381        assert_eq!(gb.line(0).to_string(), "hello, world!how are you?");
1382    }
1383
1384    #[test_case(6, 9, "hello,rld!"; "at gap start")]
1385    #[test_case(7, 10, "hello, ld!"; "at gap end")]
1386    #[test_case(10, 13, "hello, wor"; "after gap")]
1387    #[test_case(0, 5, ", world!"; "before gap")]
1388    #[test_case(0, 13, ""; "remove all")]
1389    #[test]
1390    fn remove_range_works(from: usize, to: usize, expected: &str) {
1391        let s = "hello, world!";
1392        assert_eq!(s.len(), 13, "remove all case is not 0..s.len()");
1393
1394        let mut gb = GapBuffer::from(s);
1395        gb.move_gap_to(6); // space before world
1396        gb.remove_range(from, to);
1397
1398        assert_eq!(gb.to_string(), expected, "{:?}", debug_buffer_content(&gb))
1399    }
1400
1401    #[test]
1402    fn remove_range_w_multibyte_chars_works() {
1403        let s = "foo│foo│foo";
1404        let mut gb = GapBuffer::from(s);
1405
1406        gb.remove_range(0, 3);
1407        assert_eq!(gb.to_string(), "│foo│foo");
1408        assert_eq!(gb.len_chars(), 8);
1409
1410        gb.remove_range(1, 4);
1411        assert_eq!(gb.to_string(), "││foo");
1412        assert_eq!(gb.len_chars(), 5);
1413
1414        gb.remove_range(2, 5);
1415        assert_eq!(gb.to_string(), "││");
1416        assert_eq!(gb.len_chars(), 2);
1417    }
1418
1419    #[test]
1420    fn remove_range_for_last_line_works() {
1421        let s = "hello, world!\nthis is the last line";
1422        let mut gb = GapBuffer::from(s);
1423        gb.remove_range(14, s.len());
1424        assert_eq!(gb.to_string(), "hello, world!\n");
1425        assert_eq!(gb.len_lines(), 2);
1426    }
1427
1428    #[test_case(10, 15, "hello, worow are you?"; "spanning newline")]
1429    #[test_case(7, 14, "hello, how are you?"; "ending on newline")]
1430    #[test_case(13, 26, "hello, world!"; "starting on newline")]
1431    #[test]
1432    fn remove_newline_in_str_is_tracked_correctly(from: usize, to: usize, expected: &str) {
1433        let s = "hello, world!\nhow are you?";
1434        let mut gb = GapBuffer::from(s);
1435        assert_eq!(gb.len_lines(), 2);
1436
1437        gb.remove_range(from, to);
1438
1439        assert_eq!(gb.len_lines(), 1);
1440        assert_eq!(gb.to_string(), expected);
1441        assert_eq!(gb.line(0).to_string(), expected);
1442    }
1443
1444    #[test_case('X'; "ascii")]
1445    #[test_case('界'; "multi-byte")]
1446    #[test]
1447    fn insert_remove_char_is_idempotent(ch: char) {
1448        let s = "hello, world!";
1449        let mut gb = GapBuffer::from(s);
1450        gb.insert_char(6, ch);
1451        gb.remove_char(6);
1452
1453        assert_eq!(gb.to_string(), s, "{:?}", debug_buffer_content(&gb))
1454    }
1455
1456    #[test_case("TEST", 1; "without trailing newline")]
1457    #[test_case("TEST\n", 2; "with trailing newline")]
1458    #[test_case("TEST\nTEST", 2; "with internal newline")]
1459    #[test]
1460    fn insert_remove_str_is_idempotent(edit: &str, expected_lines: usize) {
1461        let s = "hello, world!";
1462        let mut gb = GapBuffer::from(s);
1463
1464        println!("initial: {:?}", raw_debug_buffer_content(&gb));
1465        for n in 0..gb.len_lines() {
1466            println!("{:?}", gb.line(n).to_string());
1467        }
1468
1469        gb.insert_str(6, edit);
1470        assert_eq!(gb.len_lines(), expected_lines);
1471        println!("insert:  {:?}", raw_debug_buffer_content(&gb));
1472        for n in 0..gb.len_lines() {
1473            println!("{:?}", gb.line(n).to_string());
1474        }
1475
1476        gb.remove_range(6, 6 + edit.len());
1477        println!("remove:  {:?}", raw_debug_buffer_content(&gb));
1478        for n in 0..gb.len_lines() {
1479            println!("{:?}", gb.line(n).to_string());
1480        }
1481
1482        assert_eq!(gb.to_string(), s);
1483    }
1484
1485    #[test]
1486    fn chars_work() {
1487        let s1 = "hello, world!\n";
1488        let s2 = "how are you?";
1489        let gb = GapBuffer::from(format!("{s1}{s2}"));
1490
1491        let l1_chars: String = gb.line(0).chars().collect();
1492        assert_eq!(l1_chars, s1);
1493
1494        let l2_chars: String = gb.line(1).chars().collect();
1495        assert_eq!(l2_chars, s2);
1496    }
1497
1498    #[test_case(
1499        "hello, 世界!", false,
1500        &[(0, 'h'), (1, 'e'), (2, 'l'), (3, 'l'), (4, 'o'),
1501          (5, ','), (6, ' '), (7, '世'), (8, '界'), (9, '!')];
1502        "multi-byte block forward"
1503    )]
1504    #[test_case(
1505        "hello, 世界!", true,
1506        &[(9, '!'), (8, '界'), (7, '世'), (6, ' '), (5, ','),
1507          (4, 'o'), (3, 'l'), (2, 'l'), (1, 'e'), (0, 'h')];
1508        "multi-byte block reversed"
1509    )]
1510    #[test_case(
1511        "foo│foo│foo", false,
1512        &[(0, 'f'), (1, 'o'), (2, 'o'), (3, '│'), (4, 'f'), (5, 'o'),
1513          (6, 'o'), (7, '│'), (8, 'f'), (9, 'o'), (10, 'o')];
1514        "interleaved forward"
1515    )]
1516    #[test_case(
1517        "foo│foo│foo", true,
1518        &[(10, 'o'), (9, 'o'), (8, 'f'), (7, '│'), (6, 'o'), (5, 'o'),
1519          (4, 'f'), (3, '│'), (2, 'o'), (1, 'o'), (0, 'f')];
1520        "interleaved reversed"
1521    )]
1522    #[test]
1523    fn indexed_chars_works(s: &str, rev: bool, expected: &[(usize, char)]) {
1524        let mut gb = GapBuffer::from(s);
1525        let v: Vec<(usize, char)> = gb.line(0).indexed_chars(0, rev).collect();
1526        assert_eq!(&v, expected);
1527
1528        for i in 0..gb.len_chars() {
1529            let idx = gb.char_to_byte(i);
1530            gb.move_gap_to(idx);
1531
1532            let v: Vec<(usize, char)> = gb.line(0).indexed_chars(0, rev).collect();
1533            assert_eq!(&v, expected, "idx={idx}");
1534        }
1535    }
1536
1537    #[test_case("foo│foo│foo"; "interleaved multibyte and ascii")]
1538    #[test_case("hello, 世界!"; "blocks of multibyte and ascii")]
1539    #[test]
1540    fn chars_works(s: &str) {
1541        let mut gb = GapBuffer::from(s);
1542        let chars: String = gb.line(0).chars().collect();
1543        assert_eq!(chars, s);
1544
1545        for i in 0..gb.len_chars() {
1546            let idx = gb.char_to_byte(i);
1547            gb.move_gap_to(idx);
1548
1549            let chars: String = gb.line(0).chars().collect();
1550            assert_eq!(chars, s, "idx={idx}");
1551        }
1552    }
1553
1554    #[test]
1555    fn slice_works() {
1556        let mut gb = GapBuffer::from("hello, world!\nhow are you?");
1557        let slice = Slice::from_raw_offsets(0, gb.cap, &gb);
1558        let (s1, s2) = slice.as_strs();
1559        assert_eq!(s1, "");
1560        assert_eq!(s2, "hello, world!\nhow are you?");
1561
1562        let slice = gb.slice(6, 17);
1563        let (s1, s2) = slice.as_strs();
1564        assert_eq!(s1, " world!\nhow");
1565        assert_eq!(s2, "");
1566
1567        gb.move_gap_to(12);
1568        println!("after move:  {:?}", raw_debug_buffer_content(&gb));
1569
1570        let slice = gb.slice(6, 17);
1571        let (s1, s2) = slice.as_strs();
1572        assert_eq!(s1, " world");
1573        assert_eq!(s2, "!\nhow");
1574    }
1575
1576    #[test]
1577    fn slice_eq_str_works() {
1578        let mut gb = GapBuffer::from("hello, world!\nhow are you?");
1579        gb.move_gap_to(3);
1580        let slice = gb.slice(0, 5);
1581        assert_eq!(slice, "hello");
1582    }
1583
1584    #[test]
1585    fn chars_in_raw_range_works() {
1586        let mut gb = GapBuffer::from("hello, world!\nhow are you?");
1587        let char_from = 7;
1588        let char_to = 12;
1589
1590        for i in 0..gb.len_chars() {
1591            let idx = gb.char_to_byte(i);
1592            gb.move_gap_to(idx);
1593
1594            let byte_from = gb.char_to_raw_byte(char_from);
1595            let byte_to = gb.char_to_raw_byte(char_to);
1596            let n_chars = gb.chars_in_raw_range(byte_from, byte_to);
1597            assert_eq!(n_chars, char_to - char_from, "gap at {i}");
1598
1599            let n_chars = gb.chars_in_raw_range(0, gb.char_to_raw_byte(gb.n_chars));
1600            assert_eq!(n_chars, gb.n_chars, "gap at {i}");
1601        }
1602    }
1603
1604    fn _insert_chars(gb: &mut GapBuffer, s: &str) {
1605        for (idx, ch) in s.chars().enumerate() {
1606            gb.insert_char(idx + 4, ch);
1607        }
1608    }
1609
1610    fn _insert_str(gb: &mut GapBuffer, s: &str) {
1611        gb.insert_str(4, s);
1612    }
1613
1614    #[test_case(_insert_chars; "individual chars")]
1615    #[test_case(_insert_str; "whole string")]
1616    #[test]
1617    fn insert_with_multibyte_chars_preserves_line_endings(insert: fn(&mut GapBuffer, &str)) {
1618        let slice_str = |gb: &GapBuffer| gb.slice(0, gb.len_chars()).to_string();
1619
1620        let mut gb = GapBuffer::from("foo\nbar\nbaz\n");
1621        let s = "世\n界 🦊\n ";
1622
1623        insert(&mut gb, s);
1624
1625        assert_eq!(slice_str(&gb), "foo\n世\n界 🦊\n bar\nbaz\n");
1626
1627        assert_eq!(gb.char(8), '🦊');
1628        gb.remove_char(8);
1629
1630        assert_eq!(slice_str(&gb), "foo\n世\n界 \n bar\nbaz\n");
1631    }
1632
1633    #[test]
1634    fn char_works_with_multibyte_characters() {
1635        let s = "世\n界 🦊\n ";
1636        let gb = GapBuffer::from(s);
1637
1638        for (idx, ch) in s.chars().enumerate() {
1639            assert_eq!(gb.char(idx), ch);
1640        }
1641    }
1642}