text_buffer/
buffer.rs

1#![warn(clippy::all, clippy::pedantic)]
2#![allow(clippy::must_use_candidate)]
3#![allow(clippy::missing_panics_doc)]
4use crate::{
5    metric::{BufferMetrics, Metric},
6    Position,
7};
8use get_size::GetSize;
9use std::{
10    borrow::Cow,
11    cmp,
12    fmt::{self, Debug, Display},
13    ops::{Bound, Deref, Range, RangeBounds},
14};
15use str_indices::chars;
16
17/// A Gap buffer. This represents the text of a buffer, and allows for
18/// efficient insertion and deletion of text.
19#[derive(Default, GetSize)]
20pub struct Buffer {
21    /// The buffer data
22    data: Box<[u8]>,
23    /// start of the gap. Both gap_start and gap_end are the same point, but
24    /// gap_start is never a valid byte index, and gap_end is always used
25    /// instead.
26    gap_start: usize,
27    /// The end of the gap in bytes
28    gap_end: usize,
29    /// The number of characters until the gap
30    gap_chars: usize,
31    /// The current cursor.
32    cursor: GapMetric,
33    total: Metric,
34    metrics: BufferMetrics,
35    new_gap_size: usize,
36}
37
38impl Debug for Buffer {
39    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
40        let start = self.to_str(..self.gap_start);
41        let end = self.to_str(self.gap_end..);
42        // repeat _ for the gap length
43        let gap = "_".repeat(self.gap_len());
44        f.debug_struct("Buffer")
45            .field("data", &format!("{start}{gap}{end}"))
46            .field("gap_start", &self.gap_start)
47            .field("gap_end", &self.gap_end)
48            .field("gap_chars", &self.gap_chars)
49            .field("cursor", &self.cursor)
50            .field("metrics", &self.metrics)
51            .field("total_chars", &self.total.chars)
52            .field("new_gap_size", &self.new_gap_size)
53            .finish()
54    }
55}
56
57impl Display for Buffer {
58    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
59        write!(f, "{}", self.read(0..self.len()))
60    }
61}
62
63const METRIC_SIZE: usize = crate::metric::MAX_LEAF;
64struct MetricBuilder<'a> {
65    slice: &'a str,
66    start: usize,
67    end: usize,
68}
69
70impl<'a> MetricBuilder<'a> {
71    fn new(slice: &'a str) -> Self {
72        Self { slice, start: 0, end: slice.len().min(METRIC_SIZE) }
73    }
74}
75
76impl<'a> Iterator for MetricBuilder<'a> {
77    type Item = Metric;
78
79    fn next(&mut self) -> Option<Self::Item> {
80        if self.start == self.slice.len() {
81            return None;
82        }
83        let mut end = self.end;
84        while !self.slice.is_char_boundary(end) {
85            end -= 1;
86        }
87        let slice = &self.slice[self.start..end];
88        self.start = end;
89        self.end = cmp::min(self.end + METRIC_SIZE, self.slice.len());
90        Some(metrics(slice))
91    }
92
93    fn size_hint(&self) -> (usize, Option<usize>) {
94        let len = self.slice.len() - self.start;
95        let extra = usize::from(len % METRIC_SIZE != 0);
96        let size = len / METRIC_SIZE;
97        (size + extra, None)
98    }
99}
100
101/// Metric with gap buffer accounted for
102#[derive(Debug, Default, Copy, Clone, Eq, GetSize)]
103struct GapMetric {
104    bytes: usize,
105    chars: usize,
106}
107
108impl std::ops::Sub for GapMetric {
109    type Output = Metric;
110
111    fn sub(self, rhs: Self) -> Self::Output {
112        Metric { bytes: self.bytes - rhs.bytes, chars: self.chars - rhs.chars }
113    }
114}
115
116impl Display for GapMetric {
117    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
118        write!(f, "{}", Metric { bytes: self.bytes, chars: self.chars })
119    }
120}
121
122impl PartialEq for GapMetric {
123    fn eq(&self, other: &Self) -> bool {
124        let eq = self.bytes == other.bytes;
125        if eq {
126            debug_assert_eq!(self.chars, other.chars);
127        } else {
128            debug_assert_ne!(self.chars, other.chars);
129        }
130        eq
131    }
132}
133
134fn calc_start_gap_size(len: usize) -> usize {
135    // The gap can be up to 5% of the total size, but at least 64 bytes
136    cmp::max(len / 20, Buffer::GAP_SIZE)
137}
138
139impl From<String> for Buffer {
140    #[inline]
141    fn from(data: String) -> Self {
142        // reuse the allocation from the string. This means we *might* have a
143        // gap of 0
144        let builder = MetricBuilder::new(&data);
145        let metrics = BufferMetrics::build(builder);
146        let (storage, len) = {
147            let len = data.len();
148            let mut vec: Vec<u8> = data.into_bytes();
149            vec.resize(vec.capacity(), 0);
150            debug_assert_eq!(vec.capacity(), vec.len());
151            (vec.into_boxed_slice(), len)
152        };
153        let gap_len = storage.len() - len;
154        let total = metrics.len();
155        Self {
156            data: storage,
157            gap_start: len,
158            gap_end: len + gap_len,
159            gap_chars: total.chars,
160            cursor: GapMetric::default(),
161            total,
162            metrics,
163            new_gap_size: calc_start_gap_size(len),
164        }
165    }
166}
167
168impl From<&str> for Buffer {
169    #[inline]
170    fn from(data: &str) -> Self {
171        let new_gap_size = calc_start_gap_size(data.len());
172        let storage = {
173            let capacity = data.len() + new_gap_size;
174            let mut storage = Vec::with_capacity(capacity);
175            storage.resize(new_gap_size, 0);
176            storage.extend_from_slice(data.as_bytes());
177            assert_eq!(storage.len(), capacity);
178            storage.into_boxed_slice()
179        };
180        let builder = MetricBuilder::new(data);
181        let metrics = BufferMetrics::build(builder);
182        Self {
183            data: storage,
184            gap_start: 0,
185            gap_end: new_gap_size,
186            gap_chars: 0,
187            cursor: GapMetric { bytes: new_gap_size, chars: 0 },
188            total: metrics.len(),
189            new_gap_size,
190            metrics,
191        }
192    }
193}
194
195impl<T> PartialEq<T> for Buffer
196where
197    T: Deref<Target = str>,
198{
199    fn eq(&self, other: &T) -> bool {
200        PartialEq::eq(self, Deref::deref(other))
201    }
202}
203
204impl PartialEq<str> for Buffer {
205    fn eq(&self, other: &str) -> bool {
206        if self.len() != other.len() {
207            return false;
208        }
209        self.to_str(..self.gap_start) == &other[..self.gap_start]
210            && self.to_str(self.gap_end..) == &other[self.gap_start..]
211    }
212}
213
214impl Buffer {
215    #[cfg(not(test))]
216    const GAP_SIZE: usize = 64;
217    #[cfg(test)]
218    const GAP_SIZE: usize = 5;
219    const MAX_GAP: usize = 1024 * 8;
220
221    #[must_use]
222    pub fn new() -> Self {
223        Self::with_gap(Self::GAP_SIZE)
224    }
225
226    #[must_use]
227    pub fn with_gap(gap: usize) -> Self {
228        Self { new_gap_size: gap, ..Self::default() }
229    }
230
231    /// Grow the buffer to accommodate the new slice. Moves the gap to the
232    /// cursor position at the same time.
233    fn grow(&mut self, slice: &str) {
234        // If the string being inserted is large, we want to grow the gap faster
235        if slice.len() >= self.new_gap_size {
236            let len = slice.len() + self.len();
237            self.new_gap_size =
238                cmp::max(len / 20, cmp::min(slice.len().next_power_of_two(), Self::MAX_GAP));
239        }
240        let new_capacity = {
241            let pre_gap = self.gap_start;
242            let post_gap = self.data.len() - self.gap_end;
243            pre_gap + slice.len() + self.new_gap_size + post_gap
244        };
245        let mut buffer = Vec::with_capacity(new_capacity);
246        let bytes;
247        #[allow(clippy::comparison_chain)]
248        if self.cursor.chars < self.gap_chars {
249            buffer.extend_from_slice(&self.data[..self.cursor.bytes]); // pre cursor
250            buffer.extend_from_slice(slice.as_bytes()); // new text
251            buffer.resize(buffer.len() + self.new_gap_size, 0); // new gap
252            bytes = buffer.len();
253            buffer.extend_from_slice(&self.data[self.cursor.bytes..self.gap_start]); // cursor to gap
254            buffer.extend_from_slice(&self.data[self.gap_end..]); // post gap
255        } else if self.cursor.chars > self.gap_chars {
256            buffer.extend_from_slice(&self.data[..self.gap_start]); // pre gap
257            buffer.extend_from_slice(&self.data[self.gap_end..self.cursor.bytes]); // gap to cursor
258            buffer.extend_from_slice(slice.as_bytes()); // new text
259            buffer.resize(buffer.len() + self.new_gap_size, 0); // new gap
260            bytes = buffer.len();
261            buffer.extend_from_slice(&self.data[self.cursor.bytes..]); // post cursor
262        } else {
263            // cursor is at gap
264            buffer.extend_from_slice(&self.data[..self.gap_start]); // pre gap
265            buffer.extend_from_slice(slice.as_bytes()); // new text
266            buffer.resize(buffer.len() + self.new_gap_size, 0); // new gap
267            bytes = buffer.len();
268            buffer.extend_from_slice(&self.data[self.gap_end..]); // post gap
269        }
270        self.cursor.bytes = bytes;
271        self.data = buffer.into_boxed_slice();
272        debug_assert_eq!(self.data.len(), new_capacity);
273        let new = metrics(slice);
274        self.cursor.chars += new.chars;
275        self.gap_chars = self.cursor.chars;
276        self.gap_end = self.cursor.bytes;
277        self.gap_start = self.gap_end - self.new_gap_size;
278        self.total += new;
279        self.new_gap_size =
280            cmp::max(self.len() / 20, cmp::min(self.new_gap_size * 2, Self::MAX_GAP));
281    }
282
283    #[inline]
284    pub fn insert_char(&mut self, chr: char) {
285        let buf = &mut [0; 4];
286        self.insert(chr.encode_utf8(buf));
287    }
288
289    #[inline]
290    pub fn insert(&mut self, slice: &str) {
291        if slice.is_empty() {
292            return;
293        }
294        self.metrics.insert(self.to_abs_pos(self.cursor), MetricBuilder::new(slice));
295        if self.gap_len() < slice.len() {
296            self.grow(slice);
297        } else {
298            // if gap is not at cursor, move it there
299            if self.gap_chars != self.cursor.chars {
300                self.move_gap(self.cursor);
301            }
302            let new_slice = &mut self.data[self.gap_start..(self.gap_start + slice.len())];
303            new_slice.copy_from_slice(slice.as_bytes());
304            self.gap_start += slice.len();
305            let new = metrics(slice);
306            self.gap_chars += new.chars;
307            self.cursor.chars += new.chars;
308            self.total += new;
309        }
310    }
311
312    #[inline]
313    pub fn delete_backwards(&mut self, size: usize) {
314        let size = size.min(self.cursor.chars);
315        self.delete_range(self.cursor.chars - size, self.cursor.chars);
316    }
317
318    #[inline]
319    pub fn delete_forwards(&mut self, size: usize) {
320        self.delete_range(self.cursor.chars, self.cursor.chars + size);
321    }
322
323    #[inline]
324    pub fn delete_range(&mut self, beg: usize, end: usize) {
325        if beg == end {
326            return;
327        }
328        let (mut beg_chars, mut end_chars) = (beg, end);
329        if beg_chars > end_chars {
330            (beg_chars, end_chars) = (end_chars, beg_chars);
331        }
332        end_chars = end_chars.min(self.total.chars);
333        beg_chars = beg_chars.min(self.total.chars);
334        let end_bytes = self.char_to_byte(end_chars);
335        let beg_bytes = self.char_to_byte(beg_chars);
336        if end_bytes != beg_bytes {
337            let beg = GapMetric { bytes: beg_bytes, chars: beg_chars };
338            let end = GapMetric { bytes: end_bytes, chars: end_chars };
339            self.metrics.delete(self.to_abs_pos(beg), self.to_abs_pos(end));
340            self.delete_byte_range(beg, end);
341        }
342    }
343
344    fn delete_byte_range(&mut self, beg: GapMetric, end: GapMetric) {
345        // TODO: optimize this so that we count the chars deleted when calculating position
346        assert!(beg.bytes <= end.bytes, "beg ({beg}) is greater then end ({end})");
347        assert!(end.bytes <= self.data.len(), "end out of bounds");
348        self.assert_char_boundary(beg.bytes);
349        self.assert_char_boundary(end.bytes);
350        if end.bytes < self.gap_start {
351            // delete before gap
352            //
353            // hello New York City||||||||||
354            //     ^      ^       ^         ^
355            //     beg    end     gap_start gap_end
356            //
357            // shift end..gap_start to the right
358            //
359            // hell|||||||||||||||||ork City
360            //     ^                ^
361            //     gap_start        gap_end
362
363            // update character count
364            let deleted = end - beg;
365            let delete_offset_chars = self.gap_chars - end.chars;
366            self.gap_chars -= deleted.chars + delete_offset_chars;
367            self.total -= deleted;
368            let new_end = self.gap_end - (self.gap_start - end.bytes);
369            // shift data
370            self.data.copy_within(end.bytes..self.gap_start, new_end);
371            // update cursor
372            self.update_cursor_chars(beg.bytes, end.bytes, deleted.chars);
373            if self.cursor.bytes < self.gap_start {
374                if self.cursor.bytes > end.bytes {
375                    self.cursor.bytes += self.gap_len();
376                } else if self.cursor.bytes >= beg.bytes {
377                    self.cursor.bytes = new_end;
378                }
379            }
380            // update gap position
381            self.gap_end = new_end;
382            self.gap_start = beg.bytes;
383        } else if beg.bytes >= self.gap_end {
384            // delete after gap
385            //
386            // ||||||||||hello New York City
387            // ^         ^         ^   ^
388            // gap_start gap_end   beg end
389            //
390            // shift gap_end..beg to the left
391            //
392            // hello New |||||||||||||| City
393            //           ^             ^
394            //           gap_start     gap_end
395
396            // update character count
397
398            let deleted = end - beg;
399            self.total -= deleted;
400            self.gap_chars += beg.chars - self.gap_chars;
401            // shift data
402            self.data.copy_within(self.gap_end..beg.bytes, self.gap_start);
403            // update cursor
404            self.update_cursor_chars(beg.bytes, end.bytes, deleted.chars);
405            if self.cursor.bytes >= self.gap_end {
406                if self.cursor.bytes < beg.bytes {
407                    self.cursor.bytes -= self.gap_len();
408                } else if self.cursor.bytes < end.bytes {
409                    self.cursor.bytes = end.bytes;
410                }
411            }
412            // update gap position
413            self.gap_start += beg.bytes - self.gap_end;
414            self.gap_end = end.bytes;
415        } else if beg.bytes < self.gap_start && end.bytes >= self.gap_end {
416            // delete spans gap
417            //
418            // hello|||||||||| New York City
419            //  ^   ^         ^       ^
420            //  beg gap_start gap_end end
421            //
422            // update start and end of gap
423            //
424            // h||||||||||||||||||||||k City
425            //  ^                     ^
426            //  gap_start             gap_end
427
428            // update character count
429            let gap_start = GapMetric { bytes: self.gap_start, chars: self.gap_chars };
430            let before = gap_start - beg;
431            let gap_end = GapMetric { bytes: self.gap_end, chars: self.gap_chars };
432            let after = end - gap_end;
433            self.gap_chars -= before.chars;
434            self.total -= before + after;
435            // update gap position
436            self.gap_start = beg.bytes;
437            self.gap_end = end.bytes;
438            self.update_cursor_chars(beg.bytes, end.bytes, before.chars + after.chars);
439            if (beg.bytes..end.bytes).contains(&self.cursor.bytes) {
440                self.cursor.bytes = end.bytes;
441            }
442        } else {
443            unreachable!(
444                "delete region inside gap -- gap: {}-{}, span: {beg}-{end}",
445                self.gap_start, self.gap_end
446            );
447        }
448    }
449
450    fn update_cursor_chars(&mut self, beg: usize, end: usize, size: usize) {
451        if self.cursor.bytes > beg {
452            if self.cursor.bytes > end {
453                self.cursor.chars -= size;
454            } else {
455                self.cursor.chars = self.gap_chars;
456            }
457        }
458    }
459
460    #[inline]
461    pub fn cursor(&self) -> Position {
462        Position::new(self.to_abs_pos(self.cursor))
463    }
464
465    #[inline]
466    pub fn char_at(&self, pos: usize) -> Option<char> {
467        if pos == self.len_chars() {
468            return None;
469        }
470        let byte = self.char_to_byte(pos);
471        let mut end = byte + 1;
472        // UTF-8 character can only be 4 bytes long
473        for _ in 0..3 {
474            if self.is_char_boundary(end) {
475                break;
476            }
477            end += 1;
478        }
479        debug_assert!(self.is_char_boundary(end));
480        self.to_str(byte..end).chars().next()
481    }
482
483    #[inline]
484    pub fn move_gap_out_of(&mut self, range: impl RangeBounds<usize>) {
485        if !range.contains(&self.gap_chars)
486            || range.start_bound() == Bound::Included(&self.gap_chars)
487        {
488            return;
489        }
490
491        let Range { start, end } = Self::bounds_to_range(range);
492
493        let pos = if self.gap_chars - start < end - self.gap_chars {
494            GapMetric { bytes: self.char_to_byte(start), chars: start }
495        } else {
496            GapMetric { bytes: self.char_to_byte(end), chars: end }
497        };
498        self.move_gap(pos);
499    }
500
501    fn bounds_to_range(bounds: impl RangeBounds<usize>) -> Range<usize> {
502        let start = match bounds.start_bound() {
503            Bound::Included(x) => *x,
504            Bound::Excluded(_) => unreachable!(),
505            Bound::Unbounded => 0,
506        };
507
508        let end = match bounds.end_bound() {
509            Bound::Included(_) => unimplemented!("inclusive end bound not supported"),
510            Bound::Excluded(x) => *x,
511            Bound::Unbounded => usize::MAX,
512        };
513
514        start..end
515    }
516
517    #[doc(hidden)]
518    #[inline]
519    pub fn benchmark_move_gap(&mut self) {
520        if self.gap_chars == 0 {
521            self.move_gap(GapMetric { bytes: self.data.len(), chars: self.len_chars() });
522        } else {
523            self.move_gap(GapMetric::default());
524        }
525    }
526
527    #[doc(hidden)]
528    #[inline]
529    pub fn benchmark_build_metrics(string: &str) -> usize {
530        let builder = MetricBuilder::new(string);
531        let metrics = BufferMetrics::build(builder);
532        metrics.len().bytes
533    }
534
535    fn move_gap(&mut self, pos: GapMetric) {
536        assert!(pos.bytes <= self.data.len(), "attempt to move gap out of bounds");
537        self.assert_char_boundary(pos.bytes);
538        if pos.bytes < self.gap_start {
539            // move gap backwards
540            let shift = GapMetric { bytes: self.gap_start, chars: self.gap_chars } - pos;
541            self.gap_chars -= shift.chars;
542
543            self.data.copy_within(pos.bytes..self.gap_start, self.gap_end - shift.bytes);
544            // if gap moves across cursor, update cursor position
545            if self.cursor.bytes < self.gap_start && self.cursor.bytes >= pos.bytes {
546                self.cursor.bytes += self.gap_len();
547            }
548            self.gap_start = pos.bytes;
549            self.gap_end -= shift.bytes;
550        } else if pos.bytes >= self.gap_end {
551            // move gap forwards
552            self.gap_chars += pos.chars - self.gap_chars;
553            self.data.copy_within(self.gap_end..pos.bytes, self.gap_start);
554            let size = pos.bytes - self.gap_end;
555            // if gap moves across cursor, update cursor position
556            if self.cursor.bytes >= self.gap_end && self.cursor.bytes < pos.bytes {
557                self.cursor.bytes -= self.gap_len();
558            }
559            self.gap_start += size;
560            self.gap_end = pos.bytes;
561        } else {
562            panic!(
563                "move gap position byte: ({pos}) inside gap ({}-{})",
564                self.gap_start, self.gap_end
565            );
566        }
567    }
568
569    #[inline]
570    pub fn set_cursor(&mut self, pos: usize) {
571        let pos = pos.min(self.total.chars);
572        let byte_pos = self.char_to_byte(pos);
573        self.cursor = GapMetric { bytes: byte_pos, chars: pos };
574    }
575
576    fn to_abs_pos(&self, pos: GapMetric) -> Metric {
577        let chars = pos.chars;
578        let bytes = if pos.bytes < self.gap_start {
579            pos.bytes
580        } else if pos.bytes >= self.gap_end {
581            pos.bytes - self.gap_len()
582        } else {
583            unreachable!()
584        };
585        Metric { bytes, chars }
586    }
587
588    fn to_gapped_pos(&self, pos: Metric) -> GapMetric {
589        let chars = pos.chars;
590        let bytes = if pos.bytes < self.gap_start {
591            pos.bytes
592        } else if pos.bytes >= self.gap_start {
593            pos.bytes + self.gap_len()
594        } else {
595            unreachable!()
596        };
597        GapMetric { bytes, chars }
598    }
599
600    #[inline]
601    pub fn len(&self) -> usize {
602        debug_assert_eq!(self.total.bytes + self.gap_len(), self.data.len());
603        self.total.bytes
604    }
605
606    #[inline]
607    pub const fn len_chars(&self) -> usize {
608        self.total.chars
609    }
610
611    #[inline]
612    pub const fn is_empty(&self) -> bool {
613        self.total.chars == 0
614    }
615
616    const fn gap_len(&self) -> usize {
617        self.gap_end - self.gap_start
618    }
619
620    fn char_to_byte(&self, pos: usize) -> usize {
621        if pos == self.gap_chars {
622            return self.gap_end;
623        }
624
625        if pos + 1 == self.gap_chars {
626            for i in 1..=4 {
627                let pos = self.gap_start - i;
628                if self.is_char_boundary(pos) {
629                    return pos;
630                }
631            }
632            unreachable!("couldn't find char boundary");
633        }
634
635        let (base, offset) = self.metrics.search_char(pos);
636        debug_assert_eq!(base.chars + offset, pos);
637
638        let base = self.to_gapped_pos(base);
639
640        if offset == 0 {
641            return base.bytes;
642        }
643
644        self.assert_char_boundary(base.bytes);
645
646        if base.chars < self.gap_chars {
647            if pos < self.gap_chars {
648                let string = self.to_str(base.bytes..self.gap_start);
649                chars::to_byte_idx(string, offset) + base.bytes
650            } else {
651                // the char crosses the gap
652                let string = self.to_str(self.gap_end..);
653                self.gap_end + chars::to_byte_idx(string, pos - self.gap_chars)
654            }
655        } else {
656            let string = self.to_str(base.bytes..);
657            chars::to_byte_idx(string, offset) + base.bytes
658        }
659    }
660
661    fn to_str(&self, range: impl std::slice::SliceIndex<[u8], Output = [u8]>) -> &str {
662        if cfg!(debug_assertions) {
663            std::str::from_utf8(&self.data[range]).unwrap()
664        } else {
665            unsafe { std::str::from_utf8_unchecked(&self.data[range]) }
666        }
667    }
668
669    #[inline]
670    pub fn as_str(&mut self) -> &str {
671        self.move_gap_out_of(..);
672        let slice = if self.gap_start == 0 {
673            self.to_str(self.gap_end..)
674        } else {
675            self.to_str(..self.gap_start)
676        };
677        assert_eq!(slice.len(), self.len());
678        slice
679    }
680
681    #[inline]
682    pub fn read(&self, bounds: impl RangeBounds<usize>) -> Cow<'_, str> {
683        // if past gap_start, add gap_len to range
684        let mut range = Self::bounds_to_range(bounds);
685        let orig_len = range.len();
686        if range.end >= self.total.bytes {
687            range.end = self.total.bytes;
688        }
689        if range.start >= self.gap_start {
690            range.start += self.gap_len();
691        }
692        if range.end >= self.gap_start {
693            range.end += self.gap_len();
694        }
695        assert!(range.start <= self.data.len(), "range start out of bounds");
696        for i in 0..4 {
697            if self.is_char_boundary(range.end - i) {
698                range.end -= i;
699                break;
700            }
701        }
702        for i in 0..4 {
703            if self.is_char_boundary(range.start + i) {
704                range.start += i;
705                break;
706            }
707        }
708        // assert the range does not overlap with the gap
709        assert!(range.start >= self.gap_end || range.start < self.gap_start);
710        assert!(range.end >= self.gap_end || range.end < self.gap_start);
711
712        // the range straddles the gap, so we need to copy the two halves
713        if range.start < self.gap_start && self.gap_start < range.end {
714            let mut string = String::with_capacity(range.len());
715            string += self.to_str(range.start..self.gap_start);
716            string += self.to_str(self.gap_end..range.end);
717            assert_eq!(string.len(), orig_len);
718            Cow::Owned(string)
719        } else {
720            Cow::Borrowed(self.to_str(range))
721        }
722    }
723
724    fn assert_char_boundary(&self, pos: usize) {
725        if cfg!(debug_assertions) {
726            if pos == self.gap_start {
727                return;
728            }
729            assert!(self.is_char_boundary(pos), "position ({pos}) not on utf8 boundary");
730        }
731    }
732
733    fn is_char_boundary(&self, pos: usize) -> bool {
734        match self.data.get(pos) {
735            Some(byte) => is_char_boundary(*byte),
736            None => pos == self.data.len(),
737        }
738    }
739}
740
741fn metrics(slice: &str) -> Metric {
742    let chars = chars::count(slice);
743    Metric { bytes: slice.len(), chars }
744}
745
746#[allow(clippy::cast_possible_wrap)]
747const fn is_char_boundary(byte: u8) -> bool {
748    // This is bit magic equivalent to: b < 128 || b >= 192
749    (byte as i8) >= -0x40
750}
751
752#[cfg(test)]
753mod test {
754    use super::*;
755
756    #[test]
757    fn create() {
758        let string = "hello buffer";
759        let buffer = Buffer::from(string);
760        assert_eq!(buffer.data.len(), string.len() + Buffer::GAP_SIZE);
761        assert_eq!(buffer.gap_end, Buffer::GAP_SIZE);
762        assert_eq!(buffer.gap_start, 0);
763
764        let string = String::from("hello buffer");
765        let buffer = Buffer::from(string);
766        assert_eq!(buffer, "hello buffer");
767    }
768
769    #[test]
770    fn test_empty() {
771        let mut buffer = Buffer::new();
772        assert_eq!(buffer.data.len(), 0);
773        assert_eq!(buffer.gap_len(), 0);
774        assert_eq!(buffer, "");
775        buffer.insert("hello");
776        assert_eq!(buffer, "hello");
777
778        let mut buffer = Buffer::new();
779        buffer.delete_range(0, 0);
780        assert_eq!(buffer, "");
781        buffer.delete_range(0, 5);
782        assert_eq!(buffer, "");
783    }
784
785    #[test]
786    fn insert() {
787        let string = "hello buffer";
788        let mut buffer = Buffer::from(string);
789        buffer.len();
790        buffer.insert_char('x');
791        buffer.len();
792        assert_eq!(buffer.data.len(), string.len() + Buffer::GAP_SIZE);
793        assert_eq!(buffer.gap_end, Buffer::GAP_SIZE);
794        assert_eq!(buffer.gap_start, 1);
795        assert_eq!(buffer, "xhello buffer");
796    }
797
798    #[test]
799    fn insert_slice() {
800        let string = String::from("world");
801        let new_string = "hi ";
802        let mut buffer = Buffer::from(string);
803        buffer.insert(new_string);
804        buffer.move_gap_out_of(..);
805        assert_eq!(buffer, "hi world");
806        buffer.insert("starting Θ text ");
807        assert_eq!(buffer, "hi starting Θ text world");
808        buffer.set_cursor(21);
809        buffer.insert("x");
810        assert_eq!(buffer, "hi starting Θ text woxrld");
811    }
812
813    #[test]
814    fn empty() {
815        let mut buffer = Buffer::from("");
816        assert_eq!(buffer, "");
817        buffer.delete_range(1, 2);
818        assert_eq!(buffer, "");
819    }
820
821    #[test]
822    fn test_delete() {
823        let world = "world";
824        let hello = "hello ";
825        let mut buffer = Buffer::from(world);
826        buffer.insert(hello);
827        buffer.delete_backwards(4);
828        assert_eq!(buffer.gap_start, hello.len() - 4);
829        buffer.move_gap_out_of(..);
830        buffer.move_gap_out_of(..);
831        buffer.move_gap(GapMetric { bytes: buffer.char_to_byte(7), chars: 7 });
832        buffer.move_gap_out_of(..);
833        assert_eq!(buffer, "heworld");
834    }
835
836    #[test]
837    fn delete_forwards() {
838        let world = "world";
839        let hello = "hello ";
840        let mut buffer = Buffer::from(world);
841        buffer.insert(hello);
842        buffer.delete_forwards(4);
843        buffer.move_gap_out_of(..);
844        assert_eq!(buffer, "hello d");
845    }
846
847    #[test]
848    fn test_delete_region() {
849        let mut buffer = Buffer::from("world");
850        buffer.insert("hello ");
851        buffer.delete_range(1, 3);
852        buffer.move_gap_out_of(..);
853        assert_eq!(buffer, "hlo world");
854        buffer.delete_range(4, 6);
855        buffer.move_gap_out_of(..);
856        assert_eq!(buffer, "hlo rld");
857    }
858
859    #[test]
860    fn test_char_at() {
861        let buffer = Buffer::from("aµ福");
862        assert_eq!(buffer.char_at(0), Some('a'));
863        assert_eq!(buffer.char_at(1), Some('µ'));
864        assert_eq!(buffer.char_at(2), Some('福'));
865        assert_eq!(buffer.char_at(3), None);
866    }
867
868    #[test]
869    fn test_delete_nothing() {
870        let mut buffer = Buffer::from("world");
871        buffer.insert("hello ");
872        buffer.delete_range(3, 3);
873        assert_eq!(buffer, "hello world");
874    }
875
876    // cases found during fuzzing
877    #[test]
878    fn edge_cases() {
879        let mut buffer = Buffer::from(":?abdix7");
880        assert_eq!(buffer.len(), 8);
881        buffer.delete_range(2, 5);
882        assert_eq!(buffer.len(), 5);
883        buffer.delete_range(5, 4);
884        assert_eq!(buffer.len(), 4);
885        buffer.delete_range(0, 3);
886
887        let mut buffer = Buffer::from("xyz");
888        buffer.insert("abc");
889        buffer.set_cursor(2);
890        buffer.delete_range(1, 4);
891        assert_eq!(buffer, "ayz");
892        buffer.insert("b");
893        assert_eq!(buffer, "abyz");
894
895        let mut buffer = Buffer::from("ƽaejcoeuz");
896        buffer.delete_range(5, 6);
897        buffer.delete_range(1, 8);
898        assert_eq!(buffer, "ƽ");
899    }
900
901    // from reference implementation
902    #[test]
903    fn test_delete_to_gap() {
904        let mut buffer = Buffer::from("\n\n\n\nAutomerge is too");
905        buffer.insert("per. Some graduate students in ");
906        buffer.set_cursor(10);
907        buffer.delete_forwards(21);
908        assert_eq!(buffer, "per. Some \n\n\n\nAutomerge is too");
909    }
910
911    // fuzzing
912    #[test]
913    fn test_bounds() {
914        let mut buffer = Buffer::from("world");
915        buffer.insert("hello ");
916        buffer.delete_range(3, 100);
917        assert_eq!(buffer, "hel");
918        buffer.delete_range(10, 1);
919        assert_eq!(buffer, "h");
920
921        let mut buffer = Buffer::from(",skeobg x");
922        buffer.delete_range(10, 10);
923
924        let mut buffer = Buffer::from("+skeocptv'eigp");
925        buffer.delete_range(30, 6);
926    }
927
928    #[test]
929    fn resize() {
930        let world = "world";
931        let hello = "hello ";
932        let mut buffer = Buffer::from(world);
933        buffer.insert(hello);
934        assert_eq!(buffer.gap_start, hello.len());
935        assert_eq!(buffer, "hello world");
936    }
937
938    #[test]
939    fn cursor() {
940        let string = "world";
941        let new_string = "hi ";
942        let mut buffer = Buffer::from(string);
943        buffer.insert(new_string);
944        assert_eq!(buffer.gap_chars, new_string.len());
945    }
946
947    #[test]
948    fn test_read() {
949        let mut buffer = Buffer::from("hello world");
950        assert_eq!(buffer.read(..), Cow::Borrowed("hello world"));
951        buffer.set_cursor(5);
952        assert_eq!(buffer.read(0..0), Cow::Borrowed(""));
953        assert_eq!(buffer.read(0..5), Cow::Borrowed("hello"));
954        assert_eq!(buffer.read(5..11), Cow::Borrowed(" world"));
955        assert_eq!(buffer.read(4..6), Cow::<str>::Owned(String::from("o ")));
956    }
957
958    #[test]
959    fn test_build_unicode() {
960        let string = "aaaaaaaaaՂaaaaaaaaa";
961        let _ = Buffer::from(string);
962    }
963
964    #[test]
965    fn test_append() {
966        let mut buffer = Buffer::from("aa");
967        buffer.set_cursor(3);
968        let string = "B\u{1b}BBBBBB\u{1b}\0\0\0\0\0\0BB\u{1b}\u{1b}\u{1b}\u{1b}\u{1b}\u{1b}B\u{7}BBBBBBBBBBB\u{1b}\u{1b}\u{1b}B\u{7}BBBBBBBBBBBB\u{1b}\u{1b}B\u{7}BBBBBBBBB";
969        buffer.insert(string);
970    }
971
972    #[test]
973    fn test_fuzzer() {
974        let mut buffer = Buffer::new();
975        buffer.set_cursor(1);
976        buffer.insert("Ղ\u{2}\u{2}\0\0\0");
977        buffer.set_cursor(4);
978        buffer.insert("&\0''''''''''''''''''''%'''''&\0''''''''''''''''''''%@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@''''''''''");
979        buffer.set_cursor(39);
980        buffer.insert("'\u{2}&\0''''''''''''''''''''%''''''''''''''''''''''''''''");
981        buffer.delete_range(184, 169);
982        buffer.set_cursor(127);
983        buffer.insert("00000000061288823:*********");
984        buffer.set_cursor(132);
985        buffer.insert("5''''''''''''''\0\0\0\0\0'''''''");
986        buffer.set_cursor(97);
987        buffer.insert("''?????????????????????z?????????????????????'''''\u{1d}\u{1d}\u{1d}\u{1d}\u{1d}\u{1d}\u{1d}\u{1d}\u{1d}\u{1d}\u{1d}\u{1d}\u{1d}\u{1d}\u{10}");
988        buffer.delete_range(13, 138);
989        buffer.set_cursor(25);
990        buffer
991            .insert("yyyyyyyyyyyyyy\u{2}\0\u{2}\0\0\u{1}\u{17}H\u{17}\u{17}\u{17}\u{17}\u{17}\0\0");
992        buffer.set_cursor(138);
993        buffer.insert("\u{17}?\u{17}\u{17}\u{17}\u{17}\u{17}\u{17}\u{17}\u{17}\u{17}\0\0\0\0\0\0\u{3}\0\0\0''''''''");
994        buffer.set_cursor(39);
995        buffer.insert("\0\0\0''''''''''");
996        buffer.delete_range(247, 45);
997    }
998
999    #[test]
1000    fn test_pos() {
1001        let mut buffer = Buffer::new();
1002        buffer.set_cursor(1);
1003        buffer.insert("AAAAAAAAAAAAAAAAAAA");
1004        buffer.set_cursor(10);
1005        buffer.insert("AAAAAA\0\0AAAAAA");
1006        buffer.set_cursor(26);
1007    }
1008}