im_rope/
lib.rs

1#![warn(missing_docs)]
2#![warn(clippy::pedantic)]
3/*! A Unicode string backed by an RRB vector.
4 *
5 * Similarly to the standard library [`String`] type, a [`Rope`] owns its
6 * storage and guarantees that its contents are valid Unicode. Unlike
7 * a [`String`], it is backed not by a [`Vec<u8>`] but by an
8 * [`im::Vector<u8>`]. These in turn are backed by a balanced tree structure
9 * known as an an RRB tree, which makes a wide variety of operations asymptotically
10 * efficient. In particular, ropes can be cloned in constant time, and can be
11 * split or concatenated in logarithmic time.
12 */
13
14pub mod accessor;
15pub mod pattern;
16mod validations;
17
18#[cfg(test)]
19mod test_utils;
20#[cfg(test)]
21mod tests;
22
23#[cfg(any(test, feature = "proptest"))]
24pub mod proptest;
25
26use accessor::{Accessor, BorrowingAccessor, OwningAccessor, PopVecBytes};
27use im::vector;
28use im::vector::Vector;
29use static_cow::{IntoOwning, ToOwning};
30use std::borrow::Borrow;
31use std::cmp::Ordering;
32use std::iter::{DoubleEndedIterator, FusedIterator, Iterator};
33use std::ops::{Deref, DerefMut, Range, RangeBounds};
34use std::panic::{AssertUnwindSafe, RefUnwindSafe};
35use validations::{
36    ends_on_utf8_boundary, next_code_point, next_code_point_reverse, run_utf8_validation,
37    starts_on_utf8_boundary, utf8_char_width, utf8_is_first_byte,
38};
39
40use pattern::Pattern;
41pub use validations::Utf8Error;
42
43/// Guards against panics during unsafe rope mutation.
44///
45/// `VectorGuard` is returned from [`Rope::as_mut_vector`]. It implements
46/// `DerefMut<Target=Vector<u8>>`. If a `VectorGuard` is dropped by panicking,
47/// it will clear the vector to prevent observation of a rope containing invalid
48/// UTF-8 after catching the panic.
49#[derive(Debug)]
50pub struct VectorGuard<'a>(&'a mut Vector<u8>);
51
52impl<'a> Deref for VectorGuard<'a> {
53    type Target = Vector<u8>;
54
55    fn deref(&self) -> &Self::Target {
56        self.0
57    }
58}
59
60impl<'a> DerefMut for VectorGuard<'a> {
61    fn deref_mut(&mut self) -> &mut Self::Target {
62        self.0
63    }
64}
65
66impl<'a> Drop for VectorGuard<'a> {
67    fn drop(&mut self) {
68        if std::thread::panicking()
69            && std::panic::catch_unwind(AssertUnwindSafe(|| self.0.clear())).is_err()
70        {
71            // It shouldn't be possible for clearing a `Vector<u8>` to panic,
72            // but if it happens then the only safe thing to do is abort.
73            std::process::abort()
74        }
75    }
76}
77
78/** A Unicode string backed by an RRB vector.
79 *
80 * See top-level crate documentation for a full introduction.
81 */
82#[repr(transparent)]
83#[derive(Clone, Default)]
84pub struct Rope {
85    inner: Vector<u8>,
86}
87
88impl Rope {
89    /// Constructs an empty rope.
90    #[must_use]
91    #[inline]
92    pub fn new() -> Self {
93        Rope {
94            inner: Vector::new(),
95        }
96    }
97
98    /// Constructs a rope from a `Vector<u8>` without validating it.
99    ///
100    /// If you are looking for a checked version of this method, use
101    /// the `TryFrom<Vector<u8>>` trait implementation.
102    ///
103    /// # Complexity
104    /// O(1) time and space, or O(log N) if debug assertions are enabled.
105    ///
106    /// # Safety
107    /// `v` must be valid UTF-8.
108    #[must_use]
109    #[inline]
110    pub unsafe fn from_vector_unchecked(v: Vector<u8>) -> Rope {
111        // Validating the whole rope and turning an O(1) call into O(N) is too
112        // much even for debug mode, but we can check the ends and this will hit
113        // a lot of common mistakes.
114        debug_assert!(starts_on_utf8_boundary(&v));
115        debug_assert!(ends_on_utf8_boundary(&v));
116        Rope { inner: v }
117    }
118
119    #[must_use]
120    #[inline]
121    /// Returns a guarded, mutable reference to the rope's underlying
122    /// `Vector<u8>`.
123    ///
124    /// # Complexity
125    /// O(1) time and space.
126    ///
127    /// # Safety
128    /// When the `VectorGuard` is dropped, the contents of the vector must be
129    /// valid UTF-8. The guard will clear the vector if it is dropped by
130    /// panicking, but the caller is still responsible for validity upon normal
131    /// return.
132    ///
133    /// # Examples
134    /// ```
135    /// # use im_rope::{Rope,VectorGuard};
136    /// # use im::vector::Vector;
137    /// # use std::ops::DerefMut;
138    /// # use std::panic::{catch_unwind,AssertUnwindSafe};
139    /// // Add a 😀, one byte at a time.
140    /// fn add_smiley(v: &mut Vector<u8>) {
141    ///     v.push_back(0xf0);
142    ///     v.push_back(0x9f);
143    ///     v.push_back(0x98);
144    ///     panic!("💩!");
145    ///     v.push_back(0x80);
146    /// }
147    ///
148    /// fn main() {
149    ///     let mut rope = Rope::from("My mood today is: ");
150    ///     match catch_unwind(AssertUnwindSafe(|| unsafe {
151    ///         add_smiley(rope.as_mut_vector().deref_mut());
152    ///     })) {
153    ///         Ok(_) => {
154    ///             unreachable!("We won't get here because we panicked.");
155    ///             // But if we hadn't, then we'd see:
156    ///             assert_eq!(rope, Rope::from("My mood today is: 😀"))
157    ///         },
158    ///         Err(_) => {
159    ///             // Phew! The guard saved us from being able to observe the invalid UTF-8,
160    ///             // and just gave us an empty rope instead. This is a weird result, but it's safe,
161    ///             // and we signed ourselves up for weirdness by using `AssertUnwindSafe`.
162    ///             assert_eq!(rope, Rope::new());
163    ///         }
164    ///     }
165    /// }
166    pub unsafe fn as_mut_vector(&mut self) -> VectorGuard<'_> {
167        VectorGuard(&mut self.inner)
168    }
169
170    /// Gets the length of a rope, in bytes.
171    ///
172    /// # Complexity
173    /// O(1) time and space.
174    #[must_use]
175    pub fn len(&self) -> usize {
176        self.as_ref().len()
177    }
178
179    /// Tests whether a rope is empty.
180    ///
181    /// # Complexity
182    /// O(1) time and space.
183    #[must_use]
184    pub fn is_empty(&self) -> bool {
185        self.as_ref().is_empty()
186    }
187
188    /// Tests whether two ropes refer to the same content in memory.
189    ///
190    /// # Complexity
191    /// O(1) time and space.
192    #[must_use]
193    pub fn ptr_eq(&self, other: &Self) -> bool {
194        self.as_ref().ptr_eq(&other.inner)
195    }
196
197    /// Converts a rope into its underlying `Vector<u8>`.
198    ///
199    /// # Complexity
200    /// O(1) time and space.
201    #[must_use]
202    pub fn into_inner(self) -> Vector<u8> {
203        self.inner
204    }
205
206    /// Clears the rope, making it empty.
207    ///
208    /// # Complexity
209    /// O(N) time and space.
210    #[inline]
211    pub fn clear(&mut self) {
212        // SAFETY: an empty rope is valid UTF-8.
213        unsafe {
214            self.as_mut_vector().clear();
215        }
216    }
217
218    /// Gets an iterator over the chars of a rope.
219    ///
220    /// # Complexity
221    ///
222    /// O(1) time and space to construct the iterator. Each call to `next()` is
223    /// O(1) space and amortized O(1) time, with a worst case of
224    /// O(log N).
225    ///
226    /// # Examples
227    /// ```
228    /// # use im_rope::Rope;
229    /// let hello = Rope::from("🦆🦢🦤");
230    /// let mut chars = hello.chars();
231    /// assert_eq!(chars.next(), Some('🦆'));
232    /// assert_eq!(chars.next(), Some('🦢'));
233    /// assert_eq!(chars.next(), Some('🦤'));
234    /// assert_eq!(chars.next(), None);
235    /// ```
236    #[must_use]
237    #[inline]
238    pub fn chars(&self) -> Chars<BorrowingAccessor<'_>> {
239        Chars::borrowed(self)
240    }
241
242    /// Converts a rope into an iterator over its chars.
243    ///
244    /// # Complexity
245    ///
246    /// O(1) time and space to construct the iterator. Each call to `next()` is
247    /// O(1) space and amortized O(1) time, with a worst case of
248    /// O(log N).
249    ///
250    /// # Examples
251    /// ```
252    /// # use im_rope::Rope;
253    /// let mut chars = Rope::from("🦨🦝🦊").into_chars();
254    /// assert_eq!(chars.next(), Some('🦨'));
255    /// assert_eq!(chars.next(), Some('🦝'));
256    /// assert_eq!(chars.next(), Some('🦊'));
257    /// assert_eq!(chars.next(), None);
258    /// ```
259
260    #[must_use]
261    #[inline]
262    pub fn into_chars(self) -> Chars<OwningAccessor> {
263        Chars::owned(self)
264    }
265
266    /// Gets an iterator over the bytes of a rope.
267    ///
268    /// # Complexity
269    ///
270    /// O(1) time and space to construct the iterator. Each call to `next()` is
271    /// O(1) space and amortized O(1) time, with a worst case of O(log N).
272    #[must_use]
273    #[inline]
274    pub fn bytes(&self) -> Bytes<BorrowingAccessor<'_>> {
275        Bytes::borrowed(self)
276    }
277
278    /// Converts a rope into an iterator over its chars.
279    ///
280    /// # Complexity
281    ///
282    /// O(1) time and space to construct the iterator. Each call to `next()` is
283    /// O(1) space and amortized O(1) time, with a worst case of O(log N).
284    #[must_use]
285    #[inline]
286    pub fn into_bytes(self) -> Bytes<OwningAccessor> {
287        Bytes::owned(self)
288    }
289
290    /// Gets an iterator over the chars of a rope and their indices.
291    ///
292    /// # Complexity
293    ///
294    /// O(1) time and space to construct the iterator. Each call to `next()` is
295    /// O(1) space and amortized O(1) time, with a worst case of O(log N).
296    /// # Examples
297    /// ```
298    /// # use im_rope::Rope;
299    /// let hello = Rope::from("🐎🦓🦄");
300    /// let mut chars = hello.char_indices();
301    /// assert_eq!(chars.next(), Some((0, '🐎')));
302    /// assert_eq!(chars.next(), Some((4, '🦓')));
303    /// assert_eq!(chars.next(), Some((8, '🦄')));
304    /// assert_eq!(chars.next(), None);
305    /// ```
306
307    #[must_use]
308    #[inline]
309    pub fn char_indices(&self) -> CharIndices<BorrowingAccessor<'_>> {
310        CharIndices::borrowed(self)
311    }
312
313    /// Converts a rope into an iterator over its chars and their indices.
314    ///
315    /// # Complexity
316    ///
317    /// O(1) time and space to construct the iterator. Each call to `next()` is
318    /// O(1) space and amortized O(1) time, with a worst case of O(log N).
319    /// # Examples
320    /// ```
321    /// # use im_rope::Rope;
322    /// let hello = Rope::from("🐎🦓🦄");
323    /// let mut chars = hello.into_char_indices();
324    /// assert_eq!(chars.next(), Some((0, '🐎')));
325    /// assert_eq!(chars.next(), Some((4, '🦓')));
326    /// assert_eq!(chars.next(), Some((8, '🦄')));
327    /// assert_eq!(chars.next(), None);
328    /// ```
329    #[must_use]
330    #[inline]
331    pub fn into_char_indices(self) -> CharIndices<OwningAccessor> {
332        CharIndices::owned(self)
333    }
334
335    /// Gets an iterator over contiguous chunks of a rope.
336    ///
337    /// The content of a rope is kept internally in 64-byte chunks. Each
338    /// [`Chunk`] provided by the iterator returned from this function contains
339    /// either a `&str` or a `char`. A returned `&str` is a string of all the
340    /// complete characters stored contiguously in a single chunk. A returned
341    /// `char` is a character whose UTF-8 encoded representation straddles two
342    /// chunks.
343    ///
344    /// # Complexity
345    ///
346    /// O(1) time and space to construct the iterator. Each call to `next()` is
347    /// O(1) space and amortized O(1) time, with a worst case of O(log N).
348    ///
349    /// # Examples
350    ///
351    /// In this example, 'x' takes one byte to encode while '🦀' takes four, so
352    /// the entire string takes up two chunks of storage, but the 16th crab is
353    /// split across the two.
354    ///
355    /// ```
356    /// # use im_rope::{Rope,Chunk};
357    /// let xx_31crabs_xx = "xx🦀🦀🦀🦀🦀🦀🦀🦀🦀🦀🦀🦀🦀🦀🦀🦀🦀🦀🦀🦀🦀🦀🦀🦀🦀🦀🦀🦀🦀🦀🦀xx";
358    /// let rope = Rope::from(xx_31crabs_xx);
359    /// let mut chunks = rope.chunks();
360    ///
361    /// assert_eq!(chunks.next(), Some(Chunk::Str("xx🦀🦀🦀🦀🦀🦀🦀🦀🦀🦀🦀🦀🦀🦀🦀")));
362    /// assert_eq!(chunks.next(), Some(Chunk::Char('🦀')));
363    /// assert_eq!(chunks.next(), Some(Chunk::Str("🦀🦀🦀🦀🦀🦀🦀🦀🦀🦀🦀🦀🦀🦀🦀xx")));
364    /// assert_eq!(chunks.next(), None);
365    /// ```
366    #[must_use]
367    #[inline]
368    pub fn chunks(&self) -> Chunks<'_> {
369        Chunks {
370            inner: self.as_ref().leaves(),
371            unconsumed_fwd: &[],
372            unconsumed_back: &[],
373        }
374    }
375
376    /// Checks that `index`-th byte is the first byte in a UTF-8 code point
377    /// sequence or the end of the rope.
378    ///
379    /// The start and end of the rope (when `index == self.len()`) are
380    /// considered to be boundaries.
381    ///
382    /// Returns `false` if `index` is greater than `self.len()`.
383    ///
384    /// # Complexity
385    /// O(log N) time, O(1) space.
386    ///
387    /// # Examples
388    ///
389    /// ```
390    /// # use im_rope::Rope;
391    /// let rope = Rope::from("x💩x");
392    ///
393    /// assert!(rope.is_char_boundary(0));
394    /// assert!(rope.is_char_boundary(1));
395    /// assert!(! rope.is_char_boundary(2));
396    /// assert!(! rope.is_char_boundary(3));
397    /// assert!(! rope.is_char_boundary(4));
398    /// assert!(rope.is_char_boundary(5));
399    /// assert!(rope.is_char_boundary(6));
400    /// assert!(! rope.is_char_boundary(42));
401    /// ```
402    #[must_use]
403    pub fn is_char_boundary(&self, index: usize) -> bool {
404        if index == 0 {
405            return true;
406        }
407
408        match self.inner.get(index) {
409            None => index == self.len(),
410            Some(&b) => utf8_is_first_byte(b),
411        }
412    }
413
414    /// Returns the first character of the rope, or `None` if the rope is empty.
415    #[must_use]
416    #[inline]
417    pub fn front(&self) -> Option<char> {
418        // SAFETY: we rely on `self` being valid UTF-8 so that
419        // `next_code_point` will return a valid codepoint to pass to
420        // `char_from_u32_debug`.
421        unsafe {
422            next_code_point(&mut self.as_ref().iter().copied()).map(|c| char_from_u32_debug(c))
423        }
424    }
425
426    /// Removes and returns the first character of the rope, or returns `None`
427    /// if the rope is empty.
428    #[inline]
429    pub fn pop_front(&mut self) -> Option<char> {
430        // SAFETY: we rely on the invariant that `self` contains valid UTF-8.
431        // `next_code_point` will keep popping bytes until it has read an
432        // entire character, so if the invariant holds before this call it also
433        // holds after.
434        unsafe {
435            let mut v = self.as_mut_vector();
436            next_code_point(&mut PopVecBytes(&mut v)).map(|c| char_from_u32_debug(c))
437        }
438    }
439
440    /// Prepends the character `ch` to the rope.
441    #[inline]
442    pub fn push_front(&mut self, ch: char) {
443        let mut buf: [u8; 4] = [0; 4];
444        let str = ch.encode_utf8(&mut buf);
445        // SAFETY: we must prepend valid UTF-8, which we trust
446        // `char::encode_utf8` to supply.
447        unsafe {
448            let mut v = self.as_mut_vector();
449            for byte in str.bytes().rev() {
450                v.push_front(byte);
451            }
452        }
453    }
454
455    /// Returns the last character of the rope, or `None` if the rope is empty.
456    #[must_use]
457    #[inline]
458    pub fn back(&self) -> Option<char> {
459        // SAFETY: we rely on `self` being valid UTF-8 so that
460        // `next_code_point_reverse` will return a valid codepoint to pass to
461        // `char_from_u32_debug`.
462        unsafe {
463            next_code_point_reverse(&mut self.as_ref().iter().copied())
464                .map(|c| char_from_u32_debug(c))
465        }
466    }
467
468    /// Removes and returns the last character of the rope, or returns `None`
469    /// if the rope is empty.
470    #[inline]
471    pub fn pop_back(&mut self) -> Option<char> {
472        // SAFETY: we rely on the invariant that `self` contains valid UTF-8.
473        // `next_code_point_reverse` will keep popping bytes until it has read an
474        // entire character, so if the invariant holds before this call it also
475        // holds after.
476        unsafe {
477            let mut v = self.as_mut_vector();
478            next_code_point_reverse(&mut PopVecBytes(&mut v)).map(|c| char_from_u32_debug(c))
479        }
480    }
481
482    /// Appends the character `ch` to the end of the rope.
483    #[inline]
484    pub fn push_back(&mut self, ch: char) {
485        let mut buf: [u8; 4] = [0; 4];
486        let str = ch.encode_utf8(&mut buf);
487        // SAFETY: we must append valid UTF-8, which we trust
488        // `char::encode_utf8` to supply.
489        unsafe {
490            let mut v = self.as_mut_vector();
491            for byte in str.bytes() {
492                v.push_back(byte);
493            }
494        }
495    }
496
497    /// Divides one rope into two at an index.
498    ///
499    /// The argument, `mid`, should be a byte offset from the start of the
500    /// string. It must also be on a UTF-8 character boundary.
501    ///
502    /// The two ropes returned go from the start of the rope to `mid`, and from
503    /// `mid` to the end of the rope.
504    ///
505    /// # Complexity
506    /// O(log N) time and space.
507    ///
508    /// # Safety
509    /// `mid` must be on a UTF-8 character boundary.
510    #[must_use]
511    pub unsafe fn split_at_unchecked(&self, mid: usize) -> (Rope, Rope) {
512        let (a, b) = self.as_ref().clone().split_at(mid);
513        debug_assert!(starts_on_utf8_boundary(&b));
514        (
515            Self::from_vector_unchecked(a),
516            Self::from_vector_unchecked(b),
517        )
518    }
519
520    /// Tries dividing one rope into two at an index.
521    ///
522    /// The argument, `mid`, should be a byte offset from the start of the
523    /// string.
524    ///
525    /// On success, the two ropes returned go from the start of the rope to
526    /// `mid`, and from `mid` to the end of the rope.
527    ///
528    /// # Complexity
529    /// O(log N) time and space.
530    ///
531    /// # Errors
532    /// Errors if `mid` is not on a UTF-8 character boundary.
533    ///
534    /// # Examples
535    /// ```
536    /// # use im_rope::Rope;
537    /// let rope = Rope::from("🟥🟩🟦");
538    /// assert_eq!(rope.try_split_at(0), Ok(("".into(),"🟥🟩🟦".into())));
539    /// assert_eq!(rope.try_split_at(4), Ok(("🟥".into(),"🟩🟦".into())));
540    /// assert_eq!(rope.try_split_at(8), Ok(("🟥🟩".into(),"🟦".into())));
541    /// assert_eq!(rope.try_split_at(12), Ok(("🟥🟩🟦".into(),"".into())));
542    ///
543    /// // Fails because it falls in middle of 🟥's encoding.
544    /// assert!(rope.try_split_at(2).is_err());
545    /// ```
546    pub fn try_split_at(&self, mid: usize) -> Result<(Rope, Rope), Utf8BoundaryError> {
547        if mid > self.len() {
548            return Err(Utf8BoundaryError(mid));
549        }
550
551        let (x, y) = self.as_ref().clone().split_at(mid);
552
553        if starts_on_utf8_boundary(&y) {
554            // SAFETY: `x` and `y` must each be valid UTF-8. Per the `Rope`
555            // invariant, the original string was valid, and we just verified
556            // that we've split it at a character boundary. Therefore, both
557            // subropes are also valid.
558            unsafe {
559                Ok((
560                    Self::from_vector_unchecked(x),
561                    Self::from_vector_unchecked(y),
562                ))
563            }
564        } else {
565            Err(Utf8BoundaryError(mid))
566        }
567    }
568
569    /// Divides one rope into two at an index.
570    ///
571    /// The argument, `mid`, should be a byte offset from the start of the
572    /// string.
573    ///
574    /// The two ropes returned go from the start of the rope to `mid`, and from
575    /// `mid` to the end of the rope.
576    ///
577    /// # Complexity
578    /// O(log N) time and space.
579    ///
580    /// # Panics
581    /// Panics if `mid` is not on a UTF-8 character boundary.
582    ///
583    /// # Examples
584    /// ```
585    /// # use im_rope::Rope;
586    /// let rope = Rope::from("🟥🟩🟦");
587    /// assert_eq!(rope.split_at(0), ("".into(), "🟥🟩🟦".into()));
588    /// assert_eq!(rope.split_at(4), ("🟥".into(), "🟩🟦".into()));
589    /// assert_eq!(rope.split_at(8), ("🟥🟩".into(), "🟦".into()));
590    /// assert_eq!(rope.split_at(12), ("🟥🟩🟦".into(),"".into()));
591    /// ```
592
593    #[must_use]
594    #[inline]
595    pub fn split_at(&self, mid: usize) -> (Rope, Rope) {
596        self.try_split_at(mid).unwrap()
597    }
598
599    /// Returns a subrope over the given range of bytes.
600    ///
601    /// # Complexity
602    /// O(log N) time and space.
603    ///
604    /// # Safety
605    /// Both sides of `range` must be UTF-8 character boundaries.
606    #[must_use]
607    pub unsafe fn subrope_unchecked<R: RangeBounds<usize>>(&self, range: R) -> Rope {
608        let (start, end) = to_range_tuple(&range, self.len());
609        let mut v = self.as_ref().skip(start);
610        if cfg!(debug_assertions) {
611            let junk = v.split_off(end - start);
612            debug_assert!(starts_on_utf8_boundary(&junk));
613        } else {
614            // As of im-15.1 `truncate` isn't actually any faster than
615            // `split_off`, but maybe this will get fixed at some point.
616            v.truncate(end - start);
617        }
618
619        Self::from_vector_unchecked(v)
620    }
621
622    /// Tries returning a subrope over the given range of bytes.
623    ///
624    /// # Complexity
625    /// O(log N) time and space.
626    ///
627    /// # Errors
628    /// Errors if either side of `range` is not on a UTF-8 character boundary.
629    pub fn try_subrope<R: RangeBounds<usize>>(&self, range: R) -> Result<Rope, Utf8BoundaryError> {
630        let (start, end) = to_range_tuple(&range, self.len());
631
632        if start > self.len() {
633            Err(Utf8BoundaryError(start))
634        } else if end > self.len() {
635            Err(Utf8BoundaryError(end))
636        } else if start >= end {
637            Ok(Rope::new())
638        } else {
639            let mut v = if start > 0 {
640                let v = self.as_ref().skip(start);
641                if !starts_on_utf8_boundary(&v) {
642                    return Err(Utf8BoundaryError(start));
643                }
644                v
645            } else {
646                self.as_ref().clone()
647            };
648
649            let sublen = end - start;
650
651            if sublen == v.len() {
652                // SAFETY: `v` must be valid UTF-8. Per the `Rope` invariant,
653                // `v` is cut from a valid rope. It has the same ending as the
654                // original rope, and we've just verified that it begins on a
655                // character boundary, so it is therefore valid.
656                unsafe { Ok(Self::from_vector_unchecked(v)) }
657            } else {
658                v.truncate(sublen + 1);
659                // SAFETY: We truncated `v` to `sublen + 1` which must be
660                // positive since `sublen` is unsigned. `v` is now in fact
661                // `sublen + 1` bytes long, because the `truncate` call would
662                // have panicked if the original vector were shorter than that.
663                // (In fact this panic is impossible because sublen is `end -
664                // start` and we already validated at the start of this method
665                // that `end` is not greater than the length of the rope).
666                // Therefore, `v` is non-empty an `pop_back` will never return
667                // `None`.
668                let back = unsafe { v.pop_back().unwrap_unchecked() };
669                if utf8_is_first_byte(back) {
670                    // SAFETY: `v` must be valid UTF-8. Per the `Rope`
671                    // invariant, `v` is cut from a valid rope. Previously, we
672                    // verified that `v` begins on a character boundary, and now
673                    // we've verified that it ends on one as well, so it is
674                    // therefore valid.
675                    unsafe { Ok(Self::from_vector_unchecked(v)) }
676                } else {
677                    Err(Utf8BoundaryError(end))
678                }
679            }
680        }
681    }
682
683    /// Returns a subrope over the given range of bytes.
684    ///
685    /// # Complexity
686    /// O(log N) time and space.
687    ///
688    /// # Panics
689    /// Panics if either side of `range` is not a UTF-8 character boundary.
690    #[must_use]
691    #[inline]
692    pub fn subrope<R: RangeBounds<usize>>(&self, range: R) -> Rope {
693        self.try_subrope(range)
694            .expect("Both sides of `range` must be character boundaries")
695    }
696
697    /// Removes the subrope given by `range` from `self` and returns it.
698    ///
699    /// # Complexity
700    /// O(log N) time and space.
701    ///
702    /// # Safety
703    /// Both ends of `range` must fall on UTF-8 character boundaries.
704    #[allow(clippy::return_self_not_must_use)]
705    pub unsafe fn extract_unchecked<R: RangeBounds<usize>>(&mut self, range: R) -> Rope {
706        let (start, end) = to_range_tuple(&range, self.len());
707        let mut v = self.as_mut_vector();
708
709        if start >= end {
710            Rope::new()
711        } else if end == v.len() {
712            let w = v.split_off(start);
713            Rope::from_vector_unchecked(w)
714        } else if start == 0 {
715            let mut w = v.split_off(end);
716            debug_assert!(starts_on_utf8_boundary(&w));
717            std::mem::swap(&mut *v, &mut w);
718            Rope::from_vector_unchecked(w)
719        } else {
720            let mut w = v.split_off(start);
721            debug_assert!(starts_on_utf8_boundary(&w));
722            let u = w.split_off(end - start);
723            debug_assert!(starts_on_utf8_boundary(&u));
724            v.append(u);
725            Rope::from_vector_unchecked(w)
726        }
727    }
728
729    /// Tries removing the subrope given by `range` from `self`, and returns it.
730    ///
731    /// # Complexity
732    /// O(log N) time and space.
733    ///
734    /// # Errors
735    /// Errors if either end of `range` does not fall on a UTF-8 character boundary.
736    /// On error, `self` is left unchanged.
737    pub fn try_extract<R: RangeBounds<usize>>(
738        &mut self,
739        range: R,
740    ) -> Result<Rope, Utf8BoundaryError> {
741        let (start, end) = to_range_tuple(&range, self.len());
742
743        if start > self.len() {
744            Err(Utf8BoundaryError(start))
745        } else if end > self.len() {
746            Err(Utf8BoundaryError(end))
747        } else if start >= end {
748            Ok(Rope::new())
749        } else if end == self.len() {
750            // SAFETY: On exit from this block `v` must be valid UTF-8, and on
751            // successful return `w` must be as well. The `Rope` invariant
752            // assures that `v` is initially valid. After removing `w`, we check
753            // whether it begins on a UTF-8 boundary; if so, this suffices that
754            // both `v` and `w` are valid. If not, then `w` is re-appended to
755            // `v`, restoring the original state which is valid by hypothesis.
756            //
757            // This technique provides a faster happy path than validating prior
758            // to splitting, because we only need do one lookup rather than two
759            // on the index we're splitting on.
760            unsafe {
761                let mut v = self.as_mut_vector();
762                let w = v.split_off(start);
763                if starts_on_utf8_boundary(&w) {
764                    Ok(Rope::from_vector_unchecked(w))
765                } else {
766                    v.append(w);
767                    Err(Utf8BoundaryError(start))
768                }
769            }
770        } else if start == 0 {
771            // SAFETY: same trick as above.
772            unsafe {
773                let mut v = self.as_mut_vector();
774                let mut w = v.split_off(end);
775
776                if starts_on_utf8_boundary(&w) {
777                    std::mem::swap(&mut *v, &mut w);
778                    Ok(Rope::from_vector_unchecked(w))
779                } else {
780                    v.append(w);
781                    Err(Utf8BoundaryError(end))
782                }
783            }
784        } else {
785            // SAFETY: same as above but now we have to do it on both boundaries.
786            unsafe {
787                let mut v = self.as_mut_vector();
788                let mut w = v.split_off(start);
789
790                if starts_on_utf8_boundary(&w) {
791                    let x = w.split_off(end - start);
792                    if starts_on_utf8_boundary(&x) {
793                        v.append(x);
794                        Ok(Rope::from_vector_unchecked(w))
795                    } else {
796                        w.append(x);
797                        v.append(w);
798                        Err(Utf8BoundaryError(end))
799                    }
800                } else {
801                    v.append(w);
802                    Err(Utf8BoundaryError(start))
803                }
804            }
805        }
806    }
807
808    /// Removes the subrope given by `range` from `self` and returns it.
809    ///
810    /// # Complexity
811    /// O(log N) time and space.
812    ///
813    /// # Panics
814    /// Panics if either end of `range` does not fall on a UTF-8 character boundary.
815    ///
816    /// # Examples
817    /// ```
818    /// # use im_rope::Rope;
819    /// let mut rope = Rope::from("🟥🟩🟦");
820    /// let extracted = rope.extract(4..8);
821    /// assert_eq!(extracted, "🟩");
822    /// assert_eq!(rope, "🟥🟦");
823    /// ```
824    #[inline]
825    #[allow(clippy::return_self_not_must_use)]
826    pub fn extract<R: RangeBounds<usize>>(&mut self, range: R) -> Rope {
827        self.try_extract(range).unwrap()
828    }
829
830    /// Appends `other` to the end of `self`.
831    ///
832    /// # Complexity
833    ///
834    /// If `other` is a `Rope`, O(log (N + M)) time and space. In general, O(M +
835    /// log N) time and space. Here `M` is `other.len()` and `N` is `self.len()`.
836    #[inline]
837    pub fn append<A: StrLike>(&mut self, other: A) {
838        // SAFETY: Upon exit from this block, `v` must be valid UTF-8. The
839        // `Rope` invariant assures that `v` is initially valid, and the
840        // `StrLike` invariant assures that we're apppending something valid.
841        // The concatenation of two valid ropes is valid.
842        unsafe {
843            let mut v = self.as_mut_vector();
844            v.append(other.into_vector());
845        }
846    }
847
848    /// Appends `other` to the end of `self`.
849    ///
850    /// # Complexity
851    ///
852    /// If `other` is a `Rope`, O(log (N + M)) time and space. In general, O(M +
853    /// log N) time and space. Here `M` is `other.len()` and `N` is `self.len()`.
854    ///
855    /// # Safety
856    ///
857    /// `other` must be valid UTF-8. Note that this function is only useful for
858    /// when you have an `other` which is [`BytesLike`] but not [`StrLike`]. If
859    /// you already have a `StrLike` type, there is no additional cost to using
860    /// the safe version of this function.
861    #[inline]
862    pub unsafe fn append_unchecked<O: BytesLike>(&mut self, other: O) {
863        let mut v = self.as_mut_vector();
864        v.append(other.into_vector());
865    }
866
867    /// Prepends `other` to the start of `self`.
868    ///
869    /// # Complexity
870    ///
871    /// If `other` is a `Rope`, O(log (N + M)) time and space. In general, O(M +
872    /// log N) time and space. Here `M` is `other.len()` and `N` is `self.len()`.
873    #[inline]
874    pub fn prepend<A: StrLike>(&mut self, other: A) {
875        let mut o = other.into_rope();
876        std::mem::swap(self, &mut o);
877        self.append(o);
878    }
879
880    /// Prepends `other` to the start of `self`.
881    ///
882    /// # Complexity
883    ///
884    /// If `other` is a `Rope`, O(log (N + M)) time and space. In general, O(M +
885    /// log N) time and space. Here `M` is `other.len()` and `N` is `self.len()`.
886    ///
887    /// # Safety
888    ///
889    /// `other` must be valid UTF-8. Note that this function is only useful for
890    /// when you have an `other` which is [`BytesLike`] but not [`StrLike`]. If
891    /// you already have a `StrLike` type, there is no additional cost to using
892    /// the safe version of this function.
893    #[inline]
894    pub unsafe fn prepend_unchecked<O: BytesLike>(&mut self, other: O) {
895        let mut o = Rope::from_vector_unchecked(other.into_vector());
896        std::mem::swap(self, &mut o);
897        self.append(o);
898    }
899
900    /// Inserts `other` into `self` at position `at`.
901    ///
902    /// # Complexity
903    ///
904    /// If `other` is a `Rope`, O(log (N + M)) time and space. In general, O(M +
905    /// log N) time and space. Here `M` is `other.len()` and `N` is `self.len()`.
906    ///
907    /// # Safety
908    ///
909    /// `other` must be valid UTF-8, and `at` must be a UTF-8 character boundary.
910    #[inline]
911    pub unsafe fn insert_unchecked<O: BytesLike>(&mut self, at: usize, other: O) {
912        let mut v = self.as_mut_vector();
913        let w = v.split_off(at);
914        debug_assert!(starts_on_utf8_boundary(&w));
915        v.append(other.into_vector());
916        v.append(w);
917    }
918
919    /// Tries inserting `other` into `self` at position `at`.
920    ///
921    /// # Complexity
922    ///
923    /// If `other` is a `Rope`, O(log (N + M)) time and space. In general, O(M +
924    /// log N) time and space. Here `M` is `other.len()` and `N` is `self.len()`.
925    ///
926    /// # Errors
927    /// Errors if `at` is not on a UTF-8 character boundary. On error, `self`
928    /// is left unchanged.
929    pub fn try_insert<A: StrLike>(&mut self, at: usize, other: A) -> Result<(), Utf8BoundaryError> {
930        // SAFETY: On exit from this block `v` must be valid UTF-8. The `Rope`
931        // invariant assures that `v` is initially valid. After splitting `w`,
932        // we check whether it begins on a UTF-8 boundary; if so, this suffices
933        // that both `v` and `w` are valid. If not, then `w` is re-appended to
934        // `v`, restoring the original state which is valid by hypothesis. In
935        // the sucessful case, we first append `other`, which is valid by the
936        // `StrLike` invariant, then we append `w`. Appending valid UTF-8
937        // strings to each other gives vaild UTF-8.
938        //
939        // This technique provides a faster happy path than validating prior to
940        // splitting, because we only need do one lookup rather than two on the
941        // index we're splitting on.
942        unsafe {
943            let mut v = self.as_mut_vector();
944
945            let w = v.split_off(at);
946            if starts_on_utf8_boundary(&w) {
947                v.append(other.into_vector());
948                v.append(w);
949                Ok(())
950            } else {
951                v.append(w);
952                Err(Utf8BoundaryError(at))
953            }
954        }
955    }
956
957    /// Inserts `other` into `self` at position `at`.
958    ///
959    /// # Complexity
960    ///
961    /// If `other` is a `Rope`, O(log (N + M)) time and space. In general, O(M +
962    /// log N) time and space. Here `M` is `other.len()` and `N` is `self.len()`.
963    ///
964    /// # Panics
965    ///
966    /// Panics if `at` is not a UTF-8 character boundary.
967    ///
968    /// # Examples
969    /// ```
970    /// # use im_rope::Rope;
971    /// # let mut rope = Rope::from("one three");
972    /// rope.insert(4, "two ");
973    /// assert_eq!(rope, Rope::from("one two three"));
974    /// ```
975    #[inline]
976    pub fn insert<A: StrLike>(&mut self, at: usize, other: A) {
977        self.try_insert(at, other).unwrap();
978    }
979
980    /// Searches forward for all occurences of `needle` within `self`.
981    ///
982    /// Returns an iterator over [`Range<usize>`], giving the locations of
983    /// matches.
984    ///
985    /// # Complexity
986    /// Dependent on the type of [`Pattern`] used; see its documentation for
987    /// more details.
988    #[inline]
989    #[must_use]
990    pub fn find_all<P>(&self, needle: P) -> FindAll<BorrowingAccessor<'_>, P>
991    where
992        P: Pattern,
993    {
994        FindAll {
995            inner: needle._find_all(BorrowingAccessor::new(self.as_ref())),
996        }
997    }
998
999    /// Searches backward for all occurences of `needle` within `self`.
1000    ///
1001    /// Returns an iterator over [`Range<usize>`], giving the locations of
1002    /// matches.
1003    ///
1004    /// # Complexity
1005    /// Dependent on the type of [`Pattern`] used; see its documentation for
1006    /// more details.
1007    #[inline]
1008    #[must_use]
1009    pub fn rfind_all<P>(&self, needle: P) -> RFindAll<BorrowingAccessor<'_>, P>
1010    where
1011        P: Pattern,
1012    {
1013        RFindAll {
1014            inner: needle._rfind_all(BorrowingAccessor::new(self.as_ref())),
1015        }
1016    }
1017
1018    /// Searches forward for the first occurence of `needle` within
1019    /// `self`.
1020    ///
1021    /// If any is found, returns a [`Range<usize>`] giving its location.
1022    ///
1023    /// # Complexity
1024    /// Dependent on the type of [`Pattern`] used; see its documentation for
1025    /// more details.
1026    #[inline]
1027    #[must_use]
1028    pub fn find<P>(&self, needle: P) -> Option<(Range<usize>, P::Output)>
1029    where
1030        P: Pattern,
1031    {
1032        self.find_all(needle).next()
1033    }
1034
1035    /// Searches backward for the first occurence of `needle` within
1036    /// `self`.
1037    ///
1038    /// If any is found, returns a [`Range<usize>`] giving its location.
1039    ///
1040    /// # Complexity
1041    /// Dependent on the type of [`Pattern`] used; see its documentation for
1042    /// more details.
1043    #[inline]
1044    #[must_use]
1045    pub fn rfind<P>(&self, needle: P) -> Option<(Range<usize>, P::Output)>
1046    where
1047        P: Pattern,
1048    {
1049        self.rfind_all(needle).next()
1050    }
1051
1052    /// Returns `true` iff `self` the beginning of `self` matches `needle`.
1053    ///
1054    /// # Complexity
1055    /// Dependent on the type of [`Pattern`] used; see its documentation for
1056    /// more details.
1057    #[inline]
1058    #[must_use]
1059    pub fn starts_with<P>(&self, needle: P) -> bool
1060    where
1061        P: Pattern,
1062    {
1063        needle._is_prefix(self)
1064    }
1065
1066    /// Returns `true` iff `self` the end of `self` matches `needle`.
1067    ///
1068    /// # Complexity
1069    /// Dependent on the type of [`Pattern`] used; see its documentation for
1070    /// more details.
1071    #[inline]
1072    #[must_use]
1073    pub fn ends_with<P>(&self, needle: P) -> bool
1074    where
1075        P: Pattern,
1076    {
1077        needle._is_suffix(self)
1078    }
1079
1080    /// Returns a forward iterator over subropes of `self` separated by
1081    /// `delimiter`.
1082    ///
1083    /// # Complexity
1084    /// Dependent on the type of [`Pattern`] used; see its documentation for
1085    /// more details.
1086    ///
1087    /// # Examples
1088    ///
1089    /// ```
1090    /// # use im_rope::Rope;
1091    /// let rope = Rope::from("one,two,three");
1092    /// let mut tokens = rope.split(',');
1093    /// assert_eq!(tokens.next(), Some("one".into()));
1094    /// assert_eq!(tokens.next(), Some("two".into()));
1095    /// assert_eq!(tokens.next(), Some("three".into()));
1096    /// assert_eq!(tokens.next(), None);
1097    /// ```
1098    #[inline]
1099    #[must_use]
1100    pub fn split<P>(&self, delimiter: P) -> Split<BorrowingAccessor<'_>, P>
1101    where
1102        P: Pattern,
1103    {
1104        Split::new(self, delimiter, 0)
1105    }
1106
1107    /// Returns a forward iterator over the first `limit` subropes of
1108    /// `self` separated by `delimiter`.
1109    ///
1110    /// After `limit` delimiters have been encountered, the remainder of the
1111    /// rope will be returned as a single subrope.
1112    ///
1113    /// # Complexity
1114    /// Dependent on the type of [`Pattern`] used; see its documentation for
1115    /// more details.
1116    ///
1117    /// # Examples
1118    ///
1119    /// ```
1120    /// # use im_rope::Rope;
1121    /// let rope = Rope::from("one,two,three,four");
1122    /// let mut tokens = rope.splitn(3, ',');
1123    /// assert_eq!(tokens.next(), Some("one".into()));
1124    /// assert_eq!(tokens.next(), Some("two".into()));
1125    /// assert_eq!(tokens.next(), Some("three,four".into()));
1126    /// assert_eq!(tokens.next(), None);
1127    /// ```
1128    #[inline]
1129    #[must_use]
1130    pub fn splitn<P>(&self, limit: usize, delimiter: P) -> SplitN<BorrowingAccessor<'_>, P>
1131    where
1132        P: Pattern,
1133    {
1134        SplitN::new(self, delimiter, limit)
1135    }
1136
1137    /// Returns a forward iterator over subropes of `self` separated or
1138    /// terminated by `terminator`.
1139    ///
1140    /// # Complexity
1141    /// Dependent on the type of [`Pattern`] used; see its documentation for
1142    /// more details.
1143    ///
1144    /// # Examples
1145    ///
1146    /// ```
1147    /// # use im_rope::Rope;
1148    /// let rope =  Rope::from("one;two;three;");
1149    /// let mut tokens = rope.split_terminator(';');
1150    /// assert_eq!(tokens.next(), Some("one".into()));
1151    /// assert_eq!(tokens.next(), Some("two".into()));
1152    /// assert_eq!(tokens.next(), Some("three".into()));
1153    /// assert_eq!(tokens.next(), None);
1154    /// ```
1155    #[inline]
1156    #[must_use]
1157    pub fn split_terminator<P>(&self, terminator: P) -> SplitTerminator<BorrowingAccessor<'_>, P>
1158    where
1159        P: Pattern,
1160    {
1161        SplitTerminator::new(self, terminator, 0)
1162    }
1163
1164    /// Returns a forward iterator over subropes of `self` separated or
1165    /// terminated by `delimiter`. The delimiter will be included in the
1166    /// returned subropes.
1167    ///
1168    /// # Complexity
1169    /// Dependent on the type of [`Pattern`] used; see its documentation for
1170    /// more details.
1171    ///
1172    /// # Examples
1173    ///
1174    /// ```
1175    /// # use im_rope::Rope;
1176    /// let rope = Rope::from("one;two;three;");
1177    /// let mut tokens = rope.split_inclusive(';');
1178    /// assert_eq!(tokens.next(), Some("one;".into()));
1179    /// assert_eq!(tokens.next(), Some("two;".into()));
1180    /// assert_eq!(tokens.next(), Some("three;".into()));
1181    /// assert_eq!(tokens.next(), None);
1182    /// ```
1183    #[inline]
1184    #[must_use]
1185    pub fn split_inclusive<P>(&self, delimiter: P) -> SplitInclusive<BorrowingAccessor<'_>, P>
1186    where
1187        P: Pattern,
1188    {
1189        SplitInclusive::new(self, delimiter, 0)
1190    }
1191
1192    /// Returns a backward iterator over subropes of `self` separated by
1193    /// `delimiter`.
1194    ///
1195    /// # Complexity
1196    /// Dependent on the type of [`Pattern`] used; see its documentation for
1197    /// more details.
1198    /// # Examples
1199    ///
1200    /// ```
1201    /// # use im_rope::Rope;
1202    /// let rope = Rope::from("one,two,three");
1203    /// let mut tokens = rope.rsplit(',');
1204    /// assert_eq!(tokens.next(), Some("three".into()));
1205    /// assert_eq!(tokens.next(), Some("two".into()));
1206    /// assert_eq!(tokens.next(), Some("one".into()));
1207    /// assert_eq!(tokens.next(), None);
1208    /// ```
1209    #[inline]
1210    #[must_use]
1211    pub fn rsplit<P>(&self, delimiter: P) -> RSplit<BorrowingAccessor<'_>, P>
1212    where
1213        P: Pattern,
1214    {
1215        RSplit::new(self, delimiter, 0)
1216    }
1217
1218    /// Returns a backward iterator over the first `limit` subropes of
1219    /// `self` separated by `delimiter`.
1220    ///
1221    /// After `limit` delimiters have been encountered, the remainder of the
1222    /// rope will be returned as a single subrope.
1223    ///
1224    /// # Complexity
1225    /// Dependent on the type of [`Pattern`] used; see its documentation for
1226    /// more details.
1227    ///
1228    /// # Examples
1229    ///
1230    /// ```
1231    /// # use im_rope::Rope;
1232    /// let rope = Rope::from("one,two,three,four");
1233    /// let mut tokens = rope.rsplitn(3, ',');
1234    /// assert_eq!(tokens.next(), Some("four".into()));
1235    /// assert_eq!(tokens.next(), Some("three".into()));
1236    /// assert_eq!(tokens.next(), Some("one,two".into()));
1237    /// assert_eq!(tokens.next(), None);
1238    /// ```
1239    #[inline]
1240    #[must_use]
1241    pub fn rsplitn<P>(&self, limit: usize, delimiter: P) -> RSplitN<BorrowingAccessor<'_>, P>
1242    where
1243        P: Pattern,
1244    {
1245        RSplitN::new(self, delimiter, limit)
1246    }
1247
1248    /// Returns a backward iterator over subropes of `self` separated or
1249    /// terminated by `terminator`.
1250    ///
1251    /// # Complexity
1252    /// Dependent on the type of [`Pattern`] used; see its documentation for
1253    /// more details.
1254    ///
1255    /// # Examples
1256    ///
1257    /// ```
1258    /// # use im_rope::Rope;
1259    /// let rope = Rope::from("one;two;three;");
1260    /// let mut tokens = rope.rsplit_terminator(';');
1261    /// assert_eq!(tokens.next(), Some("three".into()));
1262    /// assert_eq!(tokens.next(), Some("two".into()));
1263    /// assert_eq!(tokens.next(), Some("one".into()));
1264    /// assert_eq!(tokens.next(), None);
1265    /// ```
1266    #[inline]
1267    #[must_use]
1268    pub fn rsplit_terminator<P>(&self, terminator: P) -> RSplitTerminator<BorrowingAccessor<'_>, P>
1269    where
1270        P: Pattern,
1271    {
1272        RSplitTerminator::new(self, terminator, 0)
1273    }
1274
1275    /// Splits `self` into lines.
1276    ///
1277    /// Lines are ended with either a newline (\n) or a carriage return with a
1278    /// line feed (\r\n).
1279    ///
1280    /// The final line ending is optional. A string that ends with a final line
1281    /// ending will return the same lines as an otherwise identical string
1282    /// without a final line ending.
1283    ///
1284    /// # Complexity
1285    /// O(N log N) time for the search, O(log M) space to construct each line,
1286    /// where M is the length of the line.
1287    ///
1288    /// # Examples
1289    /// ```
1290    /// # use im_rope::Rope;
1291    /// let rope = Rope::from("one\ntwo\r\nthree");
1292    /// let mut lines = rope.lines();
1293    /// assert_eq!(lines.next(), Some("one".into()));
1294    /// assert_eq!(lines.next(), Some("two".into()));
1295    /// assert_eq!(lines.next(), Some("three".into()));
1296    /// assert_eq!(lines.next(), None);
1297    /// ```
1298    ///
1299    /// ```
1300    /// # use im_rope::Rope;
1301    /// let rope = Rope::from("one\ntwo\r\nthree\n");
1302    /// let mut lines = rope.lines();
1303    /// assert_eq!(lines.next(), Some("one".into()));
1304    /// assert_eq!(lines.next(), Some("two".into()));
1305    /// assert_eq!(lines.next(), Some("three".into()));
1306    /// assert_eq!(lines.next(), None);
1307    /// ```
1308    ///
1309    /// A bare carriage return at the end of the line will not be stripped. This
1310    /// is contrast to the behavior [`std::str::Lines`], which as of Rust 1.66
1311    /// still has this as a bug (which is expeccted to be fixed).
1312    /// ```
1313    /// # use im_rope::Rope;
1314    /// let rope = Rope::from("one\ntwo\r\nthree\r");
1315    /// let mut lines = rope.lines();
1316    /// assert_eq!(lines.next(), Some("one".into()));
1317    /// assert_eq!(lines.next(), Some("two".into()));
1318    /// assert_eq!(lines.next(), Some("three\r".into()));
1319    /// assert_eq!(lines.next(), None);
1320    /// ```
1321    #[inline]
1322    #[must_use]
1323    pub fn lines(&self) -> Lines<BorrowingAccessor<'_>> {
1324        Lines::borrowed(self)
1325    }
1326}
1327
1328macro_rules! reverse_cmp {
1329    ($ty:ty) => {
1330        impl PartialEq<Rope> for $ty {
1331            fn eq(&self, other: &Rope) -> bool {
1332                other.eq(self)
1333            }
1334        }
1335
1336        impl PartialOrd<Rope> for $ty {
1337            fn partial_cmp(&self, other: &Rope) -> Option<Ordering> {
1338                other.partial_cmp(self).map(|o| o.reverse())
1339            }
1340        }
1341    };
1342}
1343
1344impl PartialEq<[u8]> for Rope {
1345    fn eq(&self, mut other: &[u8]) -> bool {
1346        if self.len() != other.len() {
1347            return false;
1348        }
1349
1350        for chunk in self.as_ref().leaves() {
1351            if chunk.ne(&other[..chunk.len()]) {
1352                return false;
1353            }
1354            other = &other[chunk.len()..];
1355        }
1356
1357        true
1358    }
1359}
1360
1361impl PartialOrd<[u8]> for Rope {
1362    #[allow(clippy::redundant_else)]
1363    fn partial_cmp(&self, mut other: &[u8]) -> Option<Ordering> {
1364        for chunk in self.inner.leaves() {
1365            if chunk.len() > other.len() {
1366                match chunk[..other.len()].cmp(other) {
1367                    Ordering::Equal => return Some(Ordering::Greater),
1368                    ord => return Some(ord),
1369                }
1370            } else {
1371                match chunk.cmp(&other[..chunk.len()]) {
1372                    Ordering::Equal => {
1373                        other = &other[chunk.len()..];
1374                    }
1375                    ord => return Some(ord),
1376                }
1377            }
1378        }
1379        if other.is_empty() {
1380            Some(Ordering::Equal)
1381        } else {
1382            Some(Ordering::Less)
1383        }
1384    }
1385}
1386
1387reverse_cmp!([u8]);
1388
1389impl PartialEq<str> for Rope {
1390    fn eq(&self, other: &str) -> bool {
1391        self.eq(other.as_bytes())
1392    }
1393}
1394
1395impl PartialOrd<str> for Rope {
1396    fn partial_cmp(&self, other: &str) -> Option<Ordering> {
1397        self.partial_cmp(other.as_bytes())
1398    }
1399}
1400
1401reverse_cmp!(str);
1402
1403impl PartialEq<&str> for Rope {
1404    fn eq(&self, other: &&str) -> bool {
1405        self.eq(other.as_bytes())
1406    }
1407}
1408
1409impl PartialOrd<&str> for Rope {
1410    fn partial_cmp(&self, other: &&str) -> Option<Ordering> {
1411        self.partial_cmp(other.as_bytes())
1412    }
1413}
1414
1415reverse_cmp!(&str);
1416
1417impl PartialEq<Vec<u8>> for Rope {
1418    fn eq(&self, other: &Vec<u8>) -> bool {
1419        self.eq(other.as_slice())
1420    }
1421}
1422
1423impl PartialOrd<Vec<u8>> for Rope {
1424    fn partial_cmp(&self, other: &Vec<u8>) -> Option<Ordering> {
1425        self.partial_cmp(other.as_slice())
1426    }
1427}
1428
1429reverse_cmp!(Vec<u8>);
1430
1431impl PartialEq<String> for Rope {
1432    fn eq(&self, other: &String) -> bool {
1433        self.eq(other.as_bytes())
1434    }
1435}
1436
1437impl PartialOrd<String> for Rope {
1438    fn partial_cmp(&self, other: &String) -> Option<Ordering> {
1439        self.partial_cmp(other.as_bytes())
1440    }
1441}
1442
1443reverse_cmp!(String);
1444
1445// The PartialOrd and PartialEq implementations for Vector<A> just iterate
1446// over each element. Because we're specialized on u8, we can speed things
1447// up by comparing chunks at a time.
1448impl PartialOrd<Vector<u8>> for Rope {
1449    #[allow(clippy::redundant_else)]
1450    fn partial_cmp(&self, other: &Vector<u8>) -> Option<Ordering> {
1451        let mut self_iter = self.as_ref().leaves();
1452        let mut other_iter = other.leaves();
1453
1454        let mut self_chunk: &[u8] = &[];
1455        let mut other_chunk: &[u8] = &[];
1456
1457        loop {
1458            match self_chunk.len().cmp(&other_chunk.len()) {
1459                Ordering::Less => match self_chunk.cmp(&other_chunk[..self_chunk.len()]) {
1460                    Ordering::Equal => {
1461                        let self_len = self_chunk.len();
1462                        self_chunk = match next_nonempty(&mut self_iter) {
1463                            None => return Some(Ordering::Less),
1464                            Some(chunk) => chunk,
1465                        };
1466                        other_chunk = &other_chunk[self_len..];
1467                    }
1468                    ord => return Some(ord),
1469                },
1470                Ordering::Equal => match self_chunk.cmp(other_chunk) {
1471                    Ordering::Equal => {
1472                        self_chunk = match next_nonempty(&mut self_iter) {
1473                            None => {
1474                                if next_nonempty(&mut other_iter).is_some() {
1475                                    return Some(Ordering::Less);
1476                                } else {
1477                                    return Some(Ordering::Equal);
1478                                }
1479                            }
1480                            Some(chunk) => chunk,
1481                        };
1482
1483                        other_chunk = match next_nonempty(&mut other_iter) {
1484                            None => return Some(Ordering::Greater),
1485                            Some(chunk) => chunk,
1486                        }
1487                    }
1488
1489                    ord => return Some(ord),
1490                },
1491
1492                Ordering::Greater => match self_chunk[..other_chunk.len()].cmp(other_chunk) {
1493                    Ordering::Equal => {
1494                        self_chunk = &self_chunk[other_chunk.len()..];
1495                        other_chunk = match next_nonempty(&mut other_iter) {
1496                            None => return Some(Ordering::Greater),
1497                            Some(chunk) => chunk,
1498                        }
1499                    }
1500                    ord => return Some(ord),
1501                },
1502            }
1503        }
1504    }
1505}
1506
1507impl PartialEq<Vector<u8>> for Rope {
1508    fn eq(&self, other: &Vector<u8>) -> bool {
1509        if self.inner.ptr_eq(other) {
1510            true
1511        } else if self.inner.len() != other.len() {
1512            false
1513        } else {
1514            self.partial_cmp(other).unwrap() == Ordering::Equal
1515        }
1516    }
1517}
1518
1519reverse_cmp!(Vector<u8>);
1520
1521impl PartialEq<Rope> for Rope {
1522    fn eq(&self, other: &Rope) -> bool {
1523        self.eq(&other.inner)
1524    }
1525}
1526
1527impl Eq for Rope {}
1528
1529impl PartialOrd<Rope> for Rope {
1530    fn partial_cmp(&self, other: &Rope) -> Option<Ordering> {
1531        self.partial_cmp(&other.inner)
1532    }
1533}
1534
1535impl Ord for Rope {
1536    fn cmp(&self, other: &Self) -> Ordering {
1537        self.partial_cmp(other).unwrap()
1538    }
1539}
1540
1541impl std::hash::Hash for Rope {
1542    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
1543        state.write_usize(self.len());
1544        for b in self.bytes() {
1545            state.write_u8(b);
1546        }
1547    }
1548}
1549
1550impl From<String> for Rope {
1551    fn from(s: String) -> Self {
1552        //SAFETY: `String`s guarantee the UTF-8 validity of their contents.
1553        unsafe { Self::from_vector_unchecked(s.into_bytes().into()) }
1554    }
1555}
1556
1557impl From<&str> for Rope {
1558    fn from(s: &str) -> Self {
1559        //SAFETY: `str`s guarantee the UTF-8 validity of their contents.
1560        unsafe { Self::from_vector_unchecked(s.as_bytes().into()) }
1561    }
1562}
1563
1564impl From<&String> for Rope {
1565    fn from(s: &String) -> Self {
1566        //SAFETY: `String`s guarantee the UTF-8 validity of their contents.
1567        unsafe { Self::from_vector_unchecked(s.as_bytes().into()) }
1568    }
1569}
1570
1571impl From<char> for Rope {
1572    fn from(ch: char) -> Self {
1573        let mut buf: [u8; 4] = Default::default();
1574        let str = ch.encode_utf8(&mut buf);
1575        Rope::from(str as &str)
1576    }
1577}
1578
1579impl From<&char> for Rope {
1580    fn from(ch: &char) -> Self {
1581        Self::from(*ch)
1582    }
1583}
1584
1585impl TryFrom<Vector<u8>> for Rope {
1586    type Error = FromUtf8Error;
1587    fn try_from(v: Vector<u8>) -> Result<Self, Self::Error> {
1588        match run_utf8_validation(&v) {
1589            Ok(()) => unsafe {
1590                // SAFETY: `v` must be valid UTF-8, which we just finished
1591                // checking.
1592                Ok(Self::from_vector_unchecked(v))
1593            },
1594            Err(e) => Err(FromUtf8Error {
1595                vector: v,
1596                error: e,
1597            }),
1598        }
1599    }
1600}
1601
1602impl TryFrom<&Vector<u8>> for Rope {
1603    type Error = FromUtf8Error;
1604    fn try_from(v: &Vector<u8>) -> Result<Self, Self::Error> {
1605        Self::try_from(v.clone())
1606    }
1607}
1608
1609impl TryFrom<Vec<u8>> for Rope {
1610    type Error = std::string::FromUtf8Error;
1611    fn try_from(v: Vec<u8>) -> Result<Self, Self::Error> {
1612        Ok(Self::from(String::from_utf8(v)?))
1613    }
1614}
1615
1616impl TryFrom<&Vec<u8>> for Rope {
1617    type Error = Utf8Error;
1618    fn try_from(v: &Vec<u8>) -> Result<Self, Self::Error> {
1619        Self::try_from(v.as_slice())
1620    }
1621}
1622
1623impl TryFrom<&[u8]> for Rope {
1624    type Error = Utf8Error;
1625    fn try_from(v: &[u8]) -> Result<Self, Self::Error> {
1626        Ok(Self::from(std::str::from_utf8(v)?))
1627    }
1628}
1629
1630impl From<&Rope> for Vec<u8> {
1631    fn from(t: &Rope) -> Vec<u8> {
1632        let mut v: Vec<u8> = Vec::with_capacity(t.len());
1633        for chunk in t.as_ref().leaves() {
1634            v.extend_from_slice(chunk);
1635        }
1636        v
1637    }
1638}
1639
1640impl From<Rope> for Vec<u8> {
1641    fn from(t: Rope) -> Self {
1642        (&t).into()
1643    }
1644}
1645
1646impl From<&Rope> for Vector<u8> {
1647    fn from(value: &Rope) -> Self {
1648        value.clone().into_inner()
1649    }
1650}
1651
1652impl From<Rope> for Vector<u8> {
1653    fn from(value: Rope) -> Self {
1654        value.into_inner()
1655    }
1656}
1657
1658impl From<&Rope> for String {
1659    fn from(t: &Rope) -> String {
1660        // SAFETY: The `Rope` invariant guarantees that we have valid UTF-8.
1661        unsafe { string_from_utf8_debug(t.into()) }
1662    }
1663}
1664
1665impl From<Rope> for String {
1666    fn from(t: Rope) -> String {
1667        String::from(&t)
1668    }
1669}
1670
1671impl AsRef<Vector<u8>> for Rope {
1672    fn as_ref(&self) -> &Vector<u8> {
1673        &self.inner
1674    }
1675}
1676
1677impl Borrow<Vector<u8>> for Rope {
1678    fn borrow(&self) -> &Vector<u8> {
1679        &self.inner
1680    }
1681}
1682
1683impl<A> Extend<A> for Rope
1684where
1685    A: StrLike,
1686{
1687    fn extend<T: IntoIterator<Item = A>>(&mut self, iter: T) {
1688        for item in iter {
1689            self.append(item);
1690        }
1691    }
1692}
1693
1694impl<A> FromIterator<A> for Rope
1695where
1696    A: StrLike,
1697{
1698    fn from_iter<T: IntoIterator<Item = A>>(iter: T) -> Self {
1699        let mut rope = Rope::new();
1700        rope.extend(iter);
1701        rope
1702    }
1703}
1704
1705impl std::ops::Add<Rope> for Rope {
1706    type Output = Rope;
1707
1708    fn add(mut self, rhs: Rope) -> Self::Output {
1709        self.append(rhs);
1710        self
1711    }
1712}
1713
1714impl<'a> std::ops::Add<&'a Rope> for Rope {
1715    type Output = Rope;
1716
1717    fn add(mut self, rhs: &Rope) -> Self::Output {
1718        self.append(rhs);
1719        self
1720    }
1721}
1722
1723impl<'a> std::ops::Add<Rope> for &'a Rope {
1724    type Output = Rope;
1725
1726    fn add(self, rhs: Rope) -> Self::Output {
1727        let mut out = self.clone();
1728        out.append(rhs);
1729        out
1730    }
1731}
1732
1733impl<'a> std::ops::Add<&'a Rope> for &'a Rope {
1734    type Output = Rope;
1735
1736    fn add(self, rhs: &Rope) -> Self::Output {
1737        let mut out = self.clone();
1738        out.append(rhs);
1739        out
1740    }
1741}
1742
1743impl<T> std::ops::AddAssign<T> for Rope
1744where
1745    T: StrLike,
1746{
1747    fn add_assign(&mut self, rhs: T) {
1748        self.append(rhs);
1749    }
1750}
1751
1752impl std::fmt::Debug for Rope {
1753    // See <https://github.com/rust-lang/rust/issues/107035> for why this
1754    // method is annoyingly complicated.
1755    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1756        f.write_str("\"")?;
1757
1758        for chunk in self.chunks() {
1759            match chunk {
1760                Chunk::Str(str) => {
1761                    for ch in str.chars() {
1762                        if ch == '\'' {
1763                            f.write_str("'")?;
1764                        } else {
1765                            std::fmt::Display::fmt(&ch.escape_debug(), f)?;
1766                        }
1767                    }
1768                }
1769                Chunk::Char(ch) => {
1770                    if ch == '\'' {
1771                        f.write_str("'")?;
1772                    } else {
1773                        std::fmt::Display::fmt(&ch.escape_debug(), f)?;
1774                    }
1775                }
1776            }
1777        }
1778
1779        f.write_str("\"")
1780    }
1781}
1782
1783impl std::fmt::Display for Rope {
1784    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1785        for chunk in self.chunks() {
1786            match chunk {
1787                Chunk::Str(str) => str.fmt(f)?,
1788                Chunk::Char(c) => c.fmt(f)?,
1789            }
1790        }
1791        Ok(())
1792    }
1793}
1794
1795impl std::fmt::Write for Rope {
1796    fn write_str(&mut self, s: &str) -> std::fmt::Result {
1797        self.append(s);
1798        Ok(())
1799    }
1800}
1801
1802#[cfg(any(test, feature = "serde"))]
1803impl serde::Serialize for Rope {
1804    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
1805    where
1806        S: serde::Serializer,
1807    {
1808        serializer.collect_str(self)
1809    }
1810}
1811
1812#[cfg(any(test, feature = "serde"))]
1813impl<'de> serde::Deserialize<'de> for Rope {
1814    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
1815    where
1816        D: serde::Deserializer<'de>,
1817    {
1818        struct StrVisitor;
1819
1820        impl<'de> serde::de::Visitor<'de> for StrVisitor {
1821            type Value = Rope;
1822
1823            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
1824                formatter.write_str("a string")
1825            }
1826
1827            fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
1828            where
1829                E: serde::de::Error,
1830            {
1831                Ok(Rope::from(v))
1832            }
1833        }
1834
1835        deserializer.deserialize_str(StrVisitor)
1836    }
1837}
1838
1839#[cfg(any(test, feature = "proptest"))]
1840impl ::proptest::arbitrary::Arbitrary for Rope {
1841    type Parameters = self::proptest::RopeParam;
1842    type Strategy = self::proptest::RopeStrategy;
1843
1844    fn arbitrary_with(args: Self::Parameters) -> Self::Strategy {
1845        self::proptest::RopeStrategy(args)
1846    }
1847}
1848
1849/// A trait for types that can be converted into a [`Vector<u8>`].
1850///
1851/// This trait acts as a bound for certain unsafe functions that construct or
1852/// insert into `Rope`s.
1853pub trait BytesLike: RefUnwindSafe + Sized {
1854    /// Convert `self` to a [`Vector<u8>`].
1855    fn into_vector(self) -> Vector<u8>;
1856}
1857
1858impl BytesLike for Vector<u8> {
1859    fn into_vector(self) -> Vector<u8> {
1860        self
1861    }
1862}
1863
1864impl<'a> BytesLike for &'a Vector<u8> {
1865    fn into_vector(self) -> Vector<u8> {
1866        self.clone()
1867    }
1868}
1869
1870impl BytesLike for Rope {
1871    fn into_vector(self) -> Vector<u8> {
1872        self.into_inner()
1873    }
1874}
1875
1876impl BytesLike for &Rope {
1877    fn into_vector(self) -> Vector<u8> {
1878        self.as_ref().clone()
1879    }
1880}
1881
1882impl<'a> BytesLike for &'a [u8] {
1883    fn into_vector(self) -> Vector<u8> {
1884        self.into()
1885    }
1886}
1887
1888impl BytesLike for Vec<u8> {
1889    fn into_vector(self) -> Vector<u8> {
1890        self.into()
1891    }
1892}
1893
1894impl BytesLike for &Vec<u8> {
1895    fn into_vector(self) -> Vector<u8> {
1896        self.into()
1897    }
1898}
1899
1900impl<'a> BytesLike for &'a str {
1901    fn into_vector(self) -> Vector<u8> {
1902        self.as_bytes().into_vector()
1903    }
1904}
1905
1906impl BytesLike for String {
1907    fn into_vector(self) -> Vector<u8> {
1908        self.as_bytes().into_vector()
1909    }
1910}
1911
1912impl<'a> BytesLike for &'a String {
1913    fn into_vector(self) -> Vector<u8> {
1914        self.as_bytes().into_vector()
1915    }
1916}
1917
1918impl BytesLike for char {
1919    fn into_vector(self) -> Vector<u8> {
1920        let mut buf: [u8; 4] = Default::default();
1921        self.encode_utf8(&mut buf).into_vector()
1922    }
1923}
1924
1925impl<'a> BytesLike for &'a char {
1926    fn into_vector(self) -> Vector<u8> {
1927        (*self).into_vector()
1928    }
1929}
1930/// A trait for types that can be converted into a [`Vector<u8>`] containing
1931/// valid UTF-8.
1932///
1933/// This trait acts as a bound for certain safe functions that construct or
1934/// insert into [`Rope`]s.
1935///
1936/// # Safety
1937/// This trait is `unsafe` because implementors must guarantee that their
1938/// implementation of [`BytesLike::into_vector`] always returns valid UTF-8.
1939pub unsafe trait StrLike: BytesLike {
1940    /// Convert `self` to a `Rope`.
1941    fn into_rope(self) -> Rope {
1942        unsafe { Rope::from_vector_unchecked(self.into_vector()) }
1943    }
1944}
1945
1946// SAFETY: each of these types guarantees the UTF-8 validity of its contents.
1947unsafe impl StrLike for Rope {}
1948unsafe impl<'a> StrLike for &'a Rope {}
1949unsafe impl<'a> StrLike for &'a str {}
1950unsafe impl StrLike for String {}
1951unsafe impl<'a> StrLike for &'a String {}
1952unsafe impl StrLike for char {}
1953unsafe impl<'a> StrLike for &'a char {}
1954
1955/// A possible error value when converting a [`Vector<u8>`] into a [`Rope`].
1956#[derive(Debug, Clone, PartialEq, Eq)]
1957pub struct FromUtf8Error {
1958    vector: Vector<u8>,
1959    error: Utf8Error,
1960}
1961
1962impl FromUtf8Error {
1963    /// Returns a reference to the vector that was being converted.
1964    #[inline]
1965    #[must_use]
1966    pub fn as_vector(&self) -> &Vector<u8> {
1967        &self.vector
1968    }
1969
1970    /// Returns the vector that was being converted.
1971    #[inline]
1972    #[must_use]
1973    pub fn into_vector(self) -> Vector<u8> {
1974        self.vector
1975    }
1976
1977    /// Returns the details of the conversion error.
1978    #[inline]
1979    #[must_use]
1980    pub fn utf8_error(&self) -> Utf8Error {
1981        self.error
1982    }
1983}
1984
1985impl std::fmt::Display for FromUtf8Error {
1986    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1987        std::fmt::Display::fmt(&self.error, f)
1988    }
1989}
1990
1991impl std::error::Error for FromUtf8Error {}
1992
1993/// An error returned when attempting to divide a [`Rope`] at a location that is
1994/// not a UTF-8 character boundary.
1995#[derive(Debug, Copy, Clone, PartialEq, Eq)]
1996pub struct Utf8BoundaryError(usize);
1997
1998impl Utf8BoundaryError {
1999    /// Returns the location where the split was attempted.
2000    #[inline]
2001    #[must_use]
2002    pub fn location(&self) -> usize {
2003        self.0
2004    }
2005}
2006
2007impl std::fmt::Display for Utf8BoundaryError {
2008    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
2009        write!(f, "Index {} is not at a UTF-8 character boundary", self.0)
2010    }
2011}
2012
2013impl std::error::Error for Utf8BoundaryError {}
2014
2015///An iterator over the bytes of a rope.
2016///
2017/// Call [`bytes`](Rope::bytes) or [`into_bytes`](Rope::into_bytes) to construct
2018/// one.
2019pub struct Bytes<A> {
2020    inner: A,
2021}
2022
2023impl<'a> Bytes<BorrowingAccessor<'a>> {
2024    fn borrowed(rope: &'a Rope) -> Bytes<BorrowingAccessor<'a>> {
2025        Bytes {
2026            inner: BorrowingAccessor::new(rope.as_ref()),
2027        }
2028    }
2029}
2030
2031impl Bytes<OwningAccessor> {
2032    fn owned(rope: Rope) -> Bytes<OwningAccessor> {
2033        Bytes {
2034            inner: OwningAccessor::new(rope.into()),
2035        }
2036    }
2037}
2038
2039impl<A> ToOwning for Bytes<A>
2040where
2041    A: Accessor,
2042{
2043    type Owning = Bytes<A::Owning>;
2044
2045    fn to_owning(&self) -> Self::Owning {
2046        Bytes {
2047            inner: self.inner.to_owning(),
2048        }
2049    }
2050}
2051
2052impl<A> IntoOwning for Bytes<A>
2053where
2054    A: Accessor,
2055{
2056    fn into_owning(self) -> Self::Owning {
2057        Bytes {
2058            inner: self.inner.into_owning(),
2059        }
2060    }
2061}
2062
2063impl<A> Iterator for Bytes<A>
2064where
2065    A: Accessor,
2066{
2067    type Item = u8;
2068
2069    fn next(&mut self) -> Option<Self::Item> {
2070        Some(self.inner.front_byte()?.1)
2071    }
2072
2073    fn size_hint(&self) -> (usize, Option<usize>) {
2074        let len = self.inner.back_index() - self.inner.front_index();
2075        (len, Some(len))
2076    }
2077
2078    fn last(mut self) -> Option<Self::Item> {
2079        self.next_back()
2080    }
2081}
2082
2083impl<A> DoubleEndedIterator for Bytes<A>
2084where
2085    A: Accessor,
2086{
2087    fn next_back(&mut self) -> Option<Self::Item> {
2088        Some(self.inner.back_byte()?.1)
2089    }
2090}
2091
2092impl<A> FusedIterator for Bytes<A> where A: Accessor {}
2093
2094/** An iterator over the [`char`]s of a [`Rope`].
2095 *
2096 * Call [`Rope::chars`] to construct one. */
2097pub struct Chars<A> {
2098    inner: A,
2099}
2100
2101impl<'a> Chars<BorrowingAccessor<'a>> {
2102    fn borrowed(rope: &'a Rope) -> Chars<BorrowingAccessor<'a>> {
2103        Chars {
2104            inner: BorrowingAccessor::new(rope.as_ref()),
2105        }
2106    }
2107}
2108
2109impl Chars<OwningAccessor> {
2110    fn owned(rope: Rope) -> Chars<OwningAccessor> {
2111        Chars {
2112            inner: OwningAccessor::new(rope.into()),
2113        }
2114    }
2115}
2116
2117impl<A> ToOwning for Chars<A>
2118where
2119    A: Accessor,
2120{
2121    type Owning = Chars<A::Owning>;
2122
2123    fn to_owning(&self) -> Self::Owning {
2124        Chars {
2125            inner: self.inner.to_owning(),
2126        }
2127    }
2128}
2129
2130impl<A> IntoOwning for Chars<A>
2131where
2132    A: Accessor,
2133{
2134    fn into_owning(self) -> Self::Owning {
2135        Chars {
2136            inner: self.inner.into_owning(),
2137        }
2138    }
2139}
2140
2141impl<A> Iterator for Chars<A>
2142where
2143    A: Accessor,
2144{
2145    type Item = char;
2146
2147    #[inline]
2148    fn next(&mut self) -> Option<char> {
2149        // SAFETY: `Rope` invariant says iteration is over a valid UTF-8 string and
2150        // the resulting `ch` is a valid Unicode Scalar Value.
2151        unsafe { next_code_point(&mut self.inner.byte_iter()).map(|ch| char_from_u32_debug(ch)) }
2152    }
2153
2154    #[inline]
2155    fn size_hint(&self) -> (usize, Option<usize>) {
2156        let len = self.inner.back_index() - self.inner.front_index();
2157        (len.saturating_add(3) / 4, Some(len))
2158    }
2159
2160    #[inline]
2161    fn last(mut self) -> Option<char> {
2162        // No need to go through the entire string.
2163        self.next_back()
2164    }
2165}
2166
2167impl<A> DoubleEndedIterator for Chars<A>
2168where
2169    A: Accessor,
2170{
2171    #[inline]
2172    fn next_back(&mut self) -> Option<char> {
2173        // SAFETY: `Rope` invariant says iteration is over a valid UTF-8 string and
2174        // the resulting `ch` is a valid Unicode Scalar Value.
2175        unsafe {
2176            next_code_point_reverse(&mut self.inner.byte_iter()).map(|ch| char_from_u32_debug(ch))
2177        }
2178    }
2179}
2180
2181impl<A> FusedIterator for Chars<A> where A: Accessor {}
2182/** An iterator over the [`char`]s of a [`Rope`] and their indices.
2183 *
2184 * Call [`Rope::char_indices`] to construct one.
2185 */
2186pub struct CharIndices<A> {
2187    inner: A,
2188}
2189
2190impl<A> CharIndices<A>
2191where
2192    A: Accessor,
2193{
2194    fn new(accessor: A) -> CharIndices<A> {
2195        CharIndices { inner: accessor }
2196    }
2197}
2198
2199impl<'a> CharIndices<BorrowingAccessor<'a>> {
2200    fn borrowed(rope: &'a Rope) -> CharIndices<BorrowingAccessor<'a>> {
2201        CharIndices {
2202            inner: BorrowingAccessor::new(rope.as_ref()),
2203        }
2204    }
2205}
2206
2207impl CharIndices<OwningAccessor> {
2208    fn owned(rope: Rope) -> CharIndices<OwningAccessor> {
2209        CharIndices {
2210            inner: OwningAccessor::new(rope.into()),
2211        }
2212    }
2213}
2214
2215impl<A> ToOwning for CharIndices<A>
2216where
2217    A: Accessor,
2218{
2219    type Owning = CharIndices<A::Owning>;
2220
2221    fn to_owning(&self) -> Self::Owning {
2222        CharIndices {
2223            inner: self.inner.to_owning(),
2224        }
2225    }
2226}
2227
2228impl<A> IntoOwning for CharIndices<A>
2229where
2230    A: Accessor,
2231{
2232    fn into_owning(self) -> Self::Owning {
2233        CharIndices {
2234            inner: self.inner.into_owning(),
2235        }
2236    }
2237}
2238
2239impl<A> Iterator for CharIndices<A>
2240where
2241    A: Accessor,
2242{
2243    type Item = (usize, char);
2244
2245    fn next(&mut self) -> Option<Self::Item> {
2246        let i = self.inner.front_index();
2247        // SAFETY: `Rope` invariant says iteration is over a valid UTF-8 string and
2248        // the resulting `ch` is a valid Unicode Scalar Value.
2249        unsafe {
2250            next_code_point(&mut self.inner.byte_iter()).map(|ch| (i, char_from_u32_debug(ch)))
2251        }
2252    }
2253
2254    #[inline]
2255    fn size_hint(&self) -> (usize, Option<usize>) {
2256        let len = self.inner.back_index() - self.inner.front_index();
2257        (len.saturating_add(3) / 4, Some(len))
2258    }
2259
2260    #[inline]
2261    fn last(mut self) -> Option<(usize, char)> {
2262        // No need to go through the entire string.
2263        self.next_back()
2264    }
2265}
2266
2267impl<A> DoubleEndedIterator for CharIndices<A>
2268where
2269    A: Accessor,
2270{
2271    #[inline]
2272    fn next_back(&mut self) -> Option<(usize, char)> {
2273        // SAFETY: `Rope` invariant says iteration is over a valid UTF-8 string and
2274        // the resulting `ch` is a valid Unicode Scalar Value.
2275        unsafe {
2276            next_code_point_reverse(&mut self.inner.byte_iter())
2277                .map(|ch| (self.inner.back_index(), char_from_u32_debug(ch)))
2278        }
2279    }
2280}
2281
2282impl<A> FusedIterator for CharIndices<A> where A: Accessor {}
2283
2284/** A chunk of a [`Rope`].
2285 *
2286 * These are returned from the [`Chunks`] iterator. Each chunk is either a substring
2287 * stored contiguously in a single leaf of the underlying RRB vector, or a single
2288 * character whose encoded representation straddles two leaves.
2289 */
2290#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
2291pub enum Chunk<'a> {
2292    /// A contiguously-stored substring
2293    Str(&'a str),
2294    /// A non-contiguously stored character
2295    Char(char),
2296}
2297/** An iterator over chunks of a [`Rope`].
2298 *
2299 * Call [`chunks`](Rope::chunks) to construct one.
2300 */
2301pub struct Chunks<'a> {
2302    /* Invariants:
2303    1. We are iterating over a Vector containing already-validated UTF-8.
2304    2. Anything already read from `self.inner` but not yet returned to the caller
2305       is stored in `self.unconsumed_fwd` or `self.unconsumed_back`.
2306    3. `self.unconsumed_fwd` always begins on a character boundary, but does not
2307       necessarily end on one.
2308    4. `self.unconsumed_back` always ends on a character boundary, but does not
2309       necessarily begin on one.
2310    */
2311    inner: vector::Chunks<'a, u8>,
2312    unconsumed_fwd: &'a [u8],
2313    unconsumed_back: &'a [u8],
2314}
2315
2316impl<'a> Iterator for Chunks<'a> {
2317    type Item = Chunk<'a>;
2318
2319    #[allow(clippy::redundant_else)]
2320    fn next(&mut self) -> Option<Self::Item> {
2321        while self.unconsumed_fwd.is_empty() {
2322            if let Some(unconsumed) = self.inner.next() {
2323                self.unconsumed_fwd = unconsumed;
2324            } else {
2325                if self.unconsumed_back.is_empty() {
2326                    return None;
2327                }
2328                self.unconsumed_fwd = self.unconsumed_back;
2329                self.unconsumed_back = &[];
2330            }
2331        }
2332
2333        let mut unconsumed = self.unconsumed_fwd;
2334
2335        let start_of_last_char_option =
2336            (0..unconsumed.len()).rfind(|&i| utf8_is_first_byte(unconsumed[i]));
2337
2338        let start_of_last_char = unsafe {
2339            // SAFETY: Per the invaraiant, unconsumed always begins on a
2340            // character boundary, and just above we ensured that unconsumed is
2341            // non-empty. Therefore, it must always contain a first byte, so the
2342            // search must always succeed.
2343            start_of_last_char_option.unwrap_unchecked()
2344        };
2345
2346        let last_char_width = utf8_char_width(unconsumed[start_of_last_char]);
2347
2348        if start_of_last_char + last_char_width == unconsumed.len() {
2349            // `unconsumed` ends on a character boundary, so we can return the
2350            // entire thing.
2351            let ret = unsafe {
2352                // SAFETY: Per the invariant, we have valid UTF-8 that begins on
2353                // a character boundary, and we've just ensured that it ends on
2354                // one as well.
2355                str_from_utf8_debug(unconsumed)
2356            };
2357            self.unconsumed_fwd = &[];
2358            return Some(Chunk::Str(ret));
2359        } else if start_of_last_char > 0 {
2360            // The last character is incomplete, but we have some complete ones
2361            // before it, so return those and leave the last character
2362            // unconsumed.
2363            let ret = unsafe {
2364                // SAFETY: Per the invariant, we have valid UTF-8 that begins on
2365                // a character boundary, and we're slicing it so that it ends on
2366                // one as well.
2367                str_from_utf8_debug(&unconsumed[..start_of_last_char])
2368            };
2369            self.unconsumed_fwd = &unconsumed[start_of_last_char..];
2370            return Some(Chunk::Str(ret));
2371        } else {
2372            // We don't have any complete character, so we need to keep reading
2373            // until we do.
2374            let mut bytes_available = unconsumed.len();
2375            // 4 is the maximum UTF-8 encoded character length
2376            let mut buf: [u8; 4] = Default::default();
2377
2378            buf[..bytes_available].copy_from_slice(unconsumed);
2379            unconsumed = &[];
2380
2381            while bytes_available < last_char_width {
2382                while unconsumed.is_empty() {
2383                    unconsumed = self.inner.next().unwrap_or_else(|| {
2384                        let ret = self.unconsumed_back;
2385                        self.unconsumed_back = &[];
2386                        ret
2387                    });
2388                }
2389                buf[bytes_available] = unconsumed[0];
2390                unconsumed = &unconsumed[1..];
2391                bytes_available += 1;
2392            }
2393
2394            // We now have a complete charater in `buf`. Save the unconsumed
2395            // output and then decode and return that character.
2396            self.unconsumed_fwd = unconsumed;
2397
2398            // SAFETY: `buf` now contains exactly one valid UTF-8 character.
2399            unsafe {
2400                let c = next_code_point(&mut buf.iter().copied()).unwrap_unchecked();
2401                return Some(Chunk::Char(char_from_u32_debug(c)));
2402            }
2403        }
2404    }
2405}
2406
2407impl<'a> DoubleEndedIterator for Chunks<'a> {
2408    #[allow(clippy::redundant_else)]
2409    fn next_back(&mut self) -> Option<Self::Item> {
2410        while self.unconsumed_back.is_empty() {
2411            if let Some(unconsumed) = self.inner.next_back() {
2412                self.unconsumed_back = unconsumed;
2413            } else {
2414                if self.unconsumed_fwd.is_empty() {
2415                    return None;
2416                }
2417                self.unconsumed_back = self.unconsumed_fwd;
2418                self.unconsumed_fwd = &[];
2419            }
2420        }
2421
2422        let mut unconsumed = self.unconsumed_back;
2423
2424        let start_of_first_char = (0..unconsumed.len())
2425            .find(|&i| utf8_is_first_byte(unconsumed[i]))
2426            .unwrap_or(unconsumed.len());
2427
2428        if start_of_first_char == 0 {
2429            // `unconsumed` starts on a character boundary, so we can return the
2430            // entire thing.
2431            let ret = unsafe {
2432                // SAFETY: Per the invariant, we have valid UTF-8 that ends on
2433                // a character boundary, and we've just ensured that it begins on
2434                // one as well.
2435                str_from_utf8_debug(unconsumed)
2436            };
2437            self.unconsumed_back = &[];
2438            return Some(Chunk::Str(ret));
2439        } else if start_of_first_char < unconsumed.len() {
2440            // The buffer begins with an incomplete character, but we have some
2441            // complete ones after it, so return those and leave the beginning
2442            // unconsumed.
2443
2444            let ret = unsafe {
2445                // SAFETY: Per the invariant, we have valid UTF-8 that begins on
2446                // a character boundary, and we're slicing it so that it ends on
2447                // one as well.
2448                str_from_utf8_debug(&unconsumed[start_of_first_char..])
2449            };
2450            self.unconsumed_back = &unconsumed[..start_of_first_char];
2451            return Some(Chunk::Str(ret));
2452        } else {
2453            // We don't have any complete character, so we need to keep reading
2454            // until we do.
2455            let mut bytes_available = unconsumed.len();
2456            // 4 is the maximum UTF-8 encoded character length
2457            let mut buf: [u8; 4] = Default::default();
2458
2459            buf[4 - bytes_available..].copy_from_slice(unconsumed);
2460            unconsumed = &[];
2461
2462            while !utf8_is_first_byte(buf[4 - bytes_available]) {
2463                while unconsumed.is_empty() {
2464                    unconsumed = self.inner.next_back().unwrap_or_else(|| {
2465                        let ret = self.unconsumed_fwd;
2466                        self.unconsumed_fwd = &[];
2467                        ret
2468                    });
2469                }
2470
2471                buf[3 - bytes_available] = unconsumed[unconsumed.len() - 1];
2472                unconsumed = &unconsumed[..unconsumed.len() - 1];
2473                bytes_available += 1;
2474            }
2475
2476            // We now have a complete charater in `buf`. Save the unconsumed
2477            // output and then decode and return that character.
2478            self.unconsumed_back = unconsumed;
2479            // SAFETY: `buf` now contains exactly one valid UTF-8 character.
2480            unsafe {
2481                let c = next_code_point_reverse(&mut buf.iter().copied()).unwrap_unchecked();
2482                return Some(Chunk::Char(char_from_u32_debug(c)));
2483            }
2484        }
2485    }
2486}
2487
2488impl<'a> FusedIterator for Chunks<'a> {}
2489///An iterator returned by [`find_all`](Rope::rfind_all).
2490pub struct FindAll<A, P>
2491where
2492    P: Pattern,
2493    A: Accessor,
2494{
2495    inner: P::FindAllImpl<A>,
2496}
2497
2498impl<A, P> FindAll<A, P>
2499where
2500    A: Accessor,
2501    P: Pattern,
2502{
2503    fn new(inner: P::FindAllImpl<A>) -> FindAll<A, P> {
2504        FindAll { inner }
2505    }
2506}
2507
2508impl<A, P> ToOwning for FindAll<A, P>
2509where
2510    P: Pattern,
2511    A: Accessor,
2512{
2513    type Owning = FindAll<OwningAccessor, P::Owned>;
2514
2515    fn to_owning(&self) -> Self::Owning {
2516        FindAll {
2517            inner: P::_convert_to_owning(&self.inner),
2518        }
2519    }
2520}
2521
2522impl<A, P> IntoOwning for FindAll<A, P>
2523where
2524    P: Pattern,
2525    A: Accessor,
2526{
2527    fn into_owning(self) -> Self::Owning {
2528        FindAll {
2529            inner: P::_convert_into_owning(self.inner),
2530        }
2531    }
2532}
2533
2534impl<A, P> Iterator for FindAll<A, P>
2535where
2536    P: Pattern,
2537    A: Accessor,
2538{
2539    type Item = (Range<usize>, P::Output);
2540
2541    fn next(&mut self) -> Option<Self::Item> {
2542        self.inner.next()
2543    }
2544
2545    fn size_hint(&self) -> (usize, Option<usize>) {
2546        self.inner.size_hint()
2547    }
2548}
2549
2550impl<A, P> FusedIterator for FindAll<A, P>
2551where
2552    P: Pattern,
2553    A: Accessor,
2554{
2555}
2556
2557impl<A, P> DoubleEndedIterator for FindAll<A, P>
2558where
2559    P: Pattern,
2560    A: Accessor,
2561    P::FindAllImpl<A>: DoubleEndedIterator,
2562{
2563    fn next_back(&mut self) -> Option<Self::Item> {
2564        self.inner.next_back()
2565    }
2566}
2567
2568///An iterator returned by [`rfind_all`](Rope::rfind_all).
2569pub struct RFindAll<A, P>
2570where
2571    P: Pattern,
2572    A: Accessor,
2573{
2574    inner: P::RFindAllImpl<A>,
2575}
2576
2577impl<A, P> RFindAll<A, P>
2578where
2579    A: Accessor,
2580    P: Pattern,
2581{
2582    fn new(inner: P::RFindAllImpl<A>) -> RFindAll<A, P> {
2583        RFindAll { inner }
2584    }
2585}
2586
2587impl<A, P> ToOwning for RFindAll<A, P>
2588where
2589    P: Pattern,
2590    A: Accessor,
2591{
2592    type Owning = RFindAll<OwningAccessor, P::Owned>;
2593
2594    fn to_owning(&self) -> Self::Owning {
2595        RFindAll {
2596            inner: P::_rconvert_to_owning(&self.inner),
2597        }
2598    }
2599}
2600
2601impl<A, P> IntoOwning for RFindAll<A, P>
2602where
2603    P: Pattern,
2604    A: Accessor,
2605{
2606    fn into_owning(self) -> Self::Owning {
2607        RFindAll {
2608            inner: P::_rconvert_into_owning(self.inner),
2609        }
2610    }
2611}
2612
2613impl<A, P> Iterator for RFindAll<A, P>
2614where
2615    P: Pattern,
2616    A: Accessor,
2617{
2618    type Item = (Range<usize>, P::Output);
2619
2620    fn next(&mut self) -> Option<Self::Item> {
2621        self.inner.next()
2622    }
2623
2624    fn size_hint(&self) -> (usize, Option<usize>) {
2625        self.inner.size_hint()
2626    }
2627}
2628
2629impl<A, P> DoubleEndedIterator for RFindAll<A, P>
2630where
2631    P: Pattern,
2632    A: Accessor,
2633    P::RFindAllImpl<A>: DoubleEndedIterator,
2634{
2635    fn next_back(&mut self) -> Option<Self::Item> {
2636        self.inner.next_back()
2637    }
2638}
2639
2640impl<A, P> FusedIterator for RFindAll<A, P>
2641where
2642    P: Pattern,
2643    A: Accessor,
2644{
2645}
2646
2647struct SplitImpl<A, M, const LIMITED: bool, const TERMINATED: bool, const INCLUSIVE: bool> {
2648    haystack: A,
2649    matcher: M,
2650    limit: usize,
2651    empty_at_back: bool,
2652}
2653
2654impl<A, P, const LIMITED: bool, const TERMINATED: bool, const INCLUSIVE: bool>
2655    SplitImpl<A, FindAll<A, P>, LIMITED, TERMINATED, INCLUSIVE>
2656where
2657    A: Accessor,
2658    P: Pattern,
2659{
2660    fn new_forward(
2661        haystack: A,
2662        needle: P,
2663        limit: usize,
2664    ) -> SplitImpl<A, FindAll<A, P>, LIMITED, TERMINATED, INCLUSIVE> {
2665        SplitImpl {
2666            matcher: FindAll::new(needle._find_all(haystack.shallow_clone())),
2667            haystack,
2668            limit,
2669            empty_at_back: !TERMINATED,
2670        }
2671    }
2672}
2673
2674impl<A, P, const LIMITED: bool, const TERMINATED: bool, const INCLUSIVE: bool>
2675    SplitImpl<A, RFindAll<A, P>, LIMITED, TERMINATED, INCLUSIVE>
2676where
2677    A: Accessor,
2678    P: Pattern,
2679{
2680    fn new_backward(
2681        haystack: A,
2682        needle: P,
2683        limit: usize,
2684    ) -> SplitImpl<A, RFindAll<A, P>, LIMITED, TERMINATED, INCLUSIVE> {
2685        SplitImpl {
2686            matcher: RFindAll::new(needle._rfind_all(haystack.shallow_clone())),
2687            haystack,
2688            limit,
2689            empty_at_back: !TERMINATED,
2690        }
2691    }
2692}
2693
2694impl<A, M, const LIMITED: bool, const TERMINATED: bool, const INCLUSIVE: bool>
2695    SplitImpl<A, M, LIMITED, TERMINATED, INCLUSIVE>
2696where
2697    A: Accessor,
2698{
2699    // SAFETY: ranges returned by `next_match` must have both ends on
2700    // UTF-8 character boundaries.
2701    unsafe fn forward<F: FnMut(&mut Self) -> Option<Range<usize>>>(
2702        &mut self,
2703        next_match: &mut F,
2704    ) -> Option<Rope> {
2705        if LIMITED && self.limit == 0 {
2706            return None;
2707        }
2708        match next_match(self) {
2709            Some(range) if !LIMITED || self.limit > 1 => {
2710                let mut v = self.haystack.take_front(range.end);
2711
2712                if !INCLUSIVE {
2713                    v.truncate(v.len() - (range.end - range.start));
2714                }
2715
2716                if LIMITED {
2717                    self.limit -= 1;
2718                }
2719                Some(Rope::from_vector_unchecked(v))
2720            }
2721            _ => {
2722                let ret = Rope::from_vector_unchecked(
2723                    self.haystack.take_back(self.haystack.front_index()),
2724                );
2725
2726                let empty_at_back = self.empty_at_back;
2727                self.empty_at_back = false;
2728
2729                if empty_at_back || !ret.is_empty() {
2730                    if LIMITED {
2731                        self.limit -= 1;
2732                    }
2733                    Some(ret)
2734                } else {
2735                    None
2736                }
2737            }
2738        }
2739    }
2740
2741    // SAFETY: ranges returned by `next_match` must have both ends on
2742    // UTF-8 character boundaries.
2743    unsafe fn backward<F: FnMut(&mut Self) -> Option<Range<usize>>>(
2744        &mut self,
2745        next_match: &mut F,
2746    ) -> Option<Rope> {
2747        if LIMITED && self.limit == 0 {
2748            return None;
2749        }
2750
2751        match next_match(self) {
2752            Some(range) if !LIMITED || self.limit > 1 => {
2753                let rope = Rope::from_vector_unchecked(self.haystack.take_back(range.end));
2754
2755                if !INCLUSIVE {
2756                    self.haystack.take_back(range.start);
2757                }
2758
2759                let empty_at_back = self.empty_at_back;
2760                self.empty_at_back = true;
2761
2762                if rope.is_empty() && !empty_at_back {
2763                    self.backward(next_match)
2764                } else {
2765                    if LIMITED {
2766                        self.limit -= 1;
2767                    }
2768                    Some(rope)
2769                }
2770            }
2771            _ => {
2772                let rope = Rope::from_vector_unchecked(
2773                    self.haystack.take_back(self.haystack.front_index()),
2774                );
2775
2776                if rope.is_empty() && !self.empty_at_back {
2777                    None
2778                } else {
2779                    self.empty_at_back = false;
2780                    if LIMITED {
2781                        self.limit -= 1;
2782                    }
2783                    Some(rope)
2784                }
2785            }
2786        }
2787    }
2788}
2789
2790impl<A, M, const LIMITED: bool, const TERMINATED: bool, const INCLUSIVE: bool> ToOwning
2791    for SplitImpl<A, M, LIMITED, TERMINATED, INCLUSIVE>
2792where
2793    A: Accessor,
2794    M: ToOwning,
2795{
2796    type Owning = SplitImpl<A::Owning, M::Owning, LIMITED, TERMINATED, INCLUSIVE>;
2797
2798    fn to_owning(&self) -> Self::Owning {
2799        SplitImpl {
2800            haystack: self.haystack.to_owning(),
2801            matcher: self.matcher.to_owning(),
2802            limit: self.limit.to_owning(),
2803            empty_at_back: self.empty_at_back.to_owning(),
2804        }
2805    }
2806}
2807
2808impl<A, M, const LIMITED: bool, const TERMINATED: bool, const INCLUSIVE: bool> IntoOwning
2809    for SplitImpl<A, M, LIMITED, TERMINATED, INCLUSIVE>
2810where
2811    A: Accessor,
2812    M: IntoOwning,
2813{
2814    fn into_owning(self) -> Self::Owning {
2815        SplitImpl {
2816            haystack: self.haystack.into_owning(),
2817            matcher: self.matcher.into_owning(),
2818            limit: self.limit.into_owning(),
2819            empty_at_back: self.empty_at_back.into_owning(),
2820        }
2821    }
2822}
2823
2824impl<A, P, const LIMITED: bool, const TERMINATED: bool, const INCLUSIVE: bool> Iterator
2825    for SplitImpl<A, FindAll<A, P>, LIMITED, TERMINATED, INCLUSIVE>
2826where
2827    A: Accessor,
2828    P: Pattern,
2829{
2830    type Item = Rope;
2831
2832    fn next(&mut self) -> Option<Self::Item> {
2833        let mut next_match = |s: &mut Self| {
2834            let (range, _) = s.matcher.next()?;
2835            Some(range)
2836        };
2837
2838        // SAFETY: next_match must yield ranges whose ends fall on UTF-8
2839        // character boundaries. These ranges come from the pattern's
2840        // `_find_all` method. All `Pattern` implementations in this crate assure
2841        // that this holds, and since `Pattern` is a sealed trait, the user
2842        // cannot supply other implementations.
2843        unsafe { self.forward(&mut next_match) }
2844    }
2845}
2846
2847impl<A, P, const TERMINATED: bool, const INCLUSIVE: bool> DoubleEndedIterator
2848    for SplitImpl<A, FindAll<A, P>, false, TERMINATED, INCLUSIVE>
2849where
2850    A: Accessor,
2851    P: Pattern,
2852    P::FindAllImpl<A>: DoubleEndedIterator,
2853{
2854    fn next_back(&mut self) -> Option<Self::Item> {
2855        let mut next_match = |s: &mut Self| {
2856            let (range, _) = s.matcher.next_back()?;
2857            Some(range)
2858        };
2859
2860        // SAFETY: next_match must yield ranges whose ends fall on UTF-8
2861        // character boundaries. These ranges come from the pattern's
2862        // `_find_all` method. All `Pattern` implementations in this crate assure
2863        // that this holds, and since `Pattern` is a sealed trait, the user
2864        // cannot supply other implementations.
2865        unsafe { self.backward(&mut next_match) }
2866    }
2867}
2868
2869impl<A, P, const LIMITED: bool, const TERMINATED: bool, const INCLUSIVE: bool> Iterator
2870    for SplitImpl<A, RFindAll<A, P>, LIMITED, TERMINATED, INCLUSIVE>
2871where
2872    A: Accessor,
2873    P: Pattern,
2874{
2875    type Item = Rope;
2876
2877    fn next(&mut self) -> Option<Self::Item> {
2878        let mut next_match = |s: &mut Self| {
2879            let (range, _) = s.matcher.next()?;
2880            Some(range)
2881        };
2882
2883        // SAFETY: next_match must yield ranges whose ends fall on UTF-8
2884        // character boundaries. These ranges come from the pattern's
2885        // `_rfind_all` method. All `Pattern` implementations in this crate assure
2886        // that this holds, and since `Pattern` is a sealed trait, the user
2887        // cannot supply other implementations.
2888        unsafe { self.backward(&mut next_match) }
2889    }
2890}
2891
2892impl<A, P, const TERMINATED: bool, const INCLUSIVE: bool> DoubleEndedIterator
2893    for SplitImpl<A, RFindAll<A, P>, false, TERMINATED, INCLUSIVE>
2894where
2895    A: Accessor,
2896    P: Pattern,
2897    P::RFindAllImpl<A>: DoubleEndedIterator,
2898{
2899    fn next_back(&mut self) -> Option<Self::Item> {
2900        let mut next_match = |s: &mut Self| {
2901            let (range, _) = s.matcher.next_back()?;
2902            Some(range)
2903        };
2904
2905        // SAFETY: next_match must yield ranges whose ends fall on UTF-8
2906        // character boundaries. These ranges come from the pattern's
2907        // `_rfind_all` method. All `Pattern` implementations in this crate assure
2908        // that this holds, and since `Pattern` is a sealed trait, the user
2909        // cannot supply other implementations.
2910        unsafe { self.forward(&mut next_match) }
2911    }
2912}
2913
2914macro_rules! def_split {
2915    ($name:ident, $doc: literal, $limited:expr, $terminated:expr, $inclusive:expr) => {
2916        #[doc = $doc]
2917        pub struct $name<A, P>
2918        where
2919            A: Accessor,
2920            P: Pattern,
2921        {
2922            inner: SplitImpl<A, FindAll<A, P>, $limited, $terminated, $inclusive>,
2923        }
2924
2925        impl<'h, P> $name<BorrowingAccessor<'h>, P>
2926        where
2927            P: Pattern,
2928        {
2929            fn new(haystack: &'h Rope, needle: P, limit: usize) -> $name<BorrowingAccessor<'h>, P> {
2930                let accessor = BorrowingAccessor::new(haystack.as_ref());
2931                $name {
2932                    inner: SplitImpl::new_forward(accessor, needle, limit),
2933                }
2934            }
2935        }
2936
2937        impl<A, P> ToOwning for $name<A, P>
2938        where
2939            A: Accessor,
2940            P: Pattern,
2941        {
2942            type Owning = $name<A::Owning, P::Owned>;
2943
2944            fn to_owning(&self) -> Self::Owning {
2945                $name {
2946                    inner: self.inner.to_owning(),
2947                }
2948            }
2949        }
2950
2951        impl<A, P> IntoOwning for $name<A, P>
2952        where
2953            A: Accessor,
2954            P: Pattern,
2955        {
2956            fn into_owning(self) -> Self::Owning {
2957                $name {
2958                    inner: self.inner.into_owning(),
2959                }
2960            }
2961        }
2962
2963        impl<A, P> Iterator for $name<A, P>
2964        where
2965            A: Accessor,
2966            P: Pattern,
2967        {
2968            type Item = Rope;
2969
2970            fn next(&mut self) -> Option<Self::Item> {
2971                self.inner.next()
2972            }
2973
2974            fn size_hint(&self) -> (usize, Option<usize>) {
2975                self.inner.size_hint()
2976            }
2977        }
2978
2979        impl<A, P> FusedIterator for $name<A, P>
2980        where
2981            A: Accessor,
2982            P: Pattern,
2983        {
2984        }
2985    };
2986}
2987
2988macro_rules! def_split_double {
2989    ($name:ident) => {
2990        impl<A, P> DoubleEndedIterator for $name<A, P>
2991        where
2992            A: Accessor,
2993            P: Pattern,
2994            <P as Pattern>::FindAllImpl<A>: DoubleEndedIterator,
2995        {
2996            fn next_back(&mut self) -> Option<Self::Item> {
2997                self.inner.next_back()
2998            }
2999        }
3000    };
3001}
3002
3003def_split!(
3004    Split,
3005    "An iterator returned by [`split`](Rope::split).",
3006    false,
3007    false,
3008    false
3009);
3010def_split!(
3011    SplitN,
3012    "An iterator returned by [`splitn`](Rope::splitn).",
3013    true,
3014    false,
3015    false
3016);
3017def_split!(
3018    SplitTerminator,
3019    "An iterator returned by [`split_terminator`](Rope::split_terminator).",
3020    false,
3021    true,
3022    false
3023);
3024def_split!(
3025    SplitInclusive,
3026    "An iterator returned by [`split_inclusive`](Rope::split_inclusive).",
3027    false,
3028    true,
3029    true
3030);
3031def_split_double!(Split);
3032def_split_double!(SplitTerminator);
3033def_split_double!(SplitInclusive);
3034
3035macro_rules! def_rsplit {
3036    ($name:ident, $doc: literal, $limited:expr, $terminated:expr, $inclusive:expr) => {
3037        #[doc = $doc]
3038        pub struct $name<A, P>
3039        where
3040            A: Accessor,
3041            P: Pattern,
3042        {
3043            inner: SplitImpl<A, RFindAll<A, P>, $limited, $terminated, $inclusive>,
3044        }
3045
3046        impl<'h, P> $name<BorrowingAccessor<'h>, P>
3047        where
3048            P: Pattern,
3049        {
3050            fn new(haystack: &'h Rope, needle: P, limit: usize) -> $name<BorrowingAccessor<'h>, P> {
3051                let accessor = BorrowingAccessor::new(haystack.as_ref());
3052                $name {
3053                    inner: SplitImpl::new_backward(accessor, needle, limit),
3054                }
3055            }
3056        }
3057
3058        impl<A, P> ToOwning for $name<A, P>
3059        where
3060            A: Accessor,
3061            P: Pattern,
3062        {
3063            type Owning = $name<A::Owning, P::Owned>;
3064
3065            fn to_owning(&self) -> Self::Owning {
3066                $name {
3067                    inner: self.inner.to_owning(),
3068                }
3069            }
3070        }
3071
3072        impl<A, P> IntoOwning for $name<A, P>
3073        where
3074            A: Accessor,
3075            P: Pattern,
3076        {
3077            fn into_owning(self) -> Self::Owning {
3078                $name {
3079                    inner: self.inner.into_owning(),
3080                }
3081            }
3082        }
3083
3084        impl<A, P> Iterator for $name<A, P>
3085        where
3086            A: Accessor,
3087            P: Pattern,
3088        {
3089            type Item = Rope;
3090
3091            fn next(&mut self) -> Option<Self::Item> {
3092                self.inner.next()
3093            }
3094
3095            fn size_hint(&self) -> (usize, Option<usize>) {
3096                self.inner.size_hint()
3097            }
3098        }
3099
3100        impl<A, P> FusedIterator for $name<A, P>
3101        where
3102            A: Accessor,
3103            P: Pattern,
3104        {
3105        }
3106    };
3107}
3108
3109macro_rules! def_rsplit_double {
3110    ($name:ident) => {
3111        impl<A, P> DoubleEndedIterator for $name<A, P>
3112        where
3113            A: Accessor,
3114            P: Pattern,
3115            <P as Pattern>::RFindAllImpl<A>: DoubleEndedIterator,
3116        {
3117            fn next_back(&mut self) -> Option<Self::Item> {
3118                self.inner.next_back()
3119            }
3120        }
3121    };
3122}
3123
3124def_rsplit!(
3125    RSplit,
3126    "An iterator returned by [`rsplit`](Rope::rsplit).",
3127    false,
3128    false,
3129    false
3130);
3131def_rsplit!(
3132    RSplitN,
3133    "An iterator returned by [`rsplitn`](Rope::rsplitn).",
3134    true,
3135    false,
3136    false
3137);
3138def_rsplit!(
3139    RSplitTerminator,
3140    "An iterator returned by [`rsplit_terminator`](Rope::rsplit_terminator).",
3141    false,
3142    true,
3143    false
3144);
3145
3146def_rsplit_double!(RSplit);
3147def_rsplit_double!(RSplitTerminator);
3148
3149/// An iterator returned by [`lines`](Rope::lines).
3150pub struct Lines<A>
3151where
3152    A: Accessor,
3153{
3154    inner: SplitInclusive<A, char>,
3155}
3156
3157impl<'h> Lines<BorrowingAccessor<'h>> {
3158    fn borrowed(haystack: &'h Rope) -> Lines<BorrowingAccessor<'h>> {
3159        Lines {
3160            inner: haystack.split_inclusive('\n'),
3161        }
3162    }
3163}
3164
3165impl<A> Lines<A>
3166where
3167    A: Accessor,
3168{
3169    fn strip_ending(line: &mut Rope) {
3170        if line.back() == Some('\n') {
3171            line.pop_back();
3172            if line.back() == Some('\r') {
3173                line.pop_back();
3174            }
3175        }
3176    }
3177}
3178
3179impl<A> ToOwning for Lines<A>
3180where
3181    A: Accessor,
3182{
3183    type Owning = Lines<A::Owning>;
3184
3185    fn to_owning(&self) -> Self::Owning {
3186        Lines {
3187            inner: self.inner.to_owning(),
3188        }
3189    }
3190}
3191
3192impl<A> IntoOwning for Lines<A>
3193where
3194    A: Accessor,
3195{
3196    fn into_owning(self) -> Self::Owning {
3197        Lines {
3198            inner: self.inner.into_owning(),
3199        }
3200    }
3201}
3202
3203impl<A> Iterator for Lines<A>
3204where
3205    A: Accessor,
3206{
3207    type Item = Rope;
3208
3209    fn next(&mut self) -> Option<Self::Item> {
3210        let mut line = self.inner.next()?;
3211        Self::strip_ending(&mut line);
3212        Some(line)
3213    }
3214
3215    fn size_hint(&self) -> (usize, Option<usize>) {
3216        self.inner.size_hint()
3217    }
3218
3219    fn last(mut self) -> Option<Self::Item> {
3220        self.next_back()
3221    }
3222}
3223
3224impl<A> DoubleEndedIterator for Lines<A>
3225where
3226    A: Accessor,
3227{
3228    fn next_back(&mut self) -> Option<Self::Item> {
3229        let mut line = self.inner.next_back()?;
3230        Self::strip_ending(&mut line);
3231        Some(line)
3232    }
3233}
3234
3235impl<A> FusedIterator for Lines<A> where A: Accessor {}
3236
3237#[inline]
3238fn to_range_tuple<R: RangeBounds<usize>>(r: &R, len: usize) -> (usize, usize) {
3239    (
3240        match r.start_bound() {
3241            std::ops::Bound::Included(&i) => i,
3242            std::ops::Bound::Excluded(&i) => i + 1,
3243            std::ops::Bound::Unbounded => 0,
3244        },
3245        match r.end_bound() {
3246            std::ops::Bound::Included(&i) => i + 1,
3247            std::ops::Bound::Excluded(&i) => i,
3248            std::ops::Bound::Unbounded => len,
3249        },
3250    )
3251}
3252
3253// Probably this function is unnecessary? I don't think vectors ever really
3254// return empty chunks, but there's no documented guarantee of this.
3255fn next_nonempty<'a, I, A>(iter: &mut I) -> Option<&'a [A]>
3256where
3257    I: Iterator<Item = &'a [A]>,
3258{
3259    loop {
3260        if let Some(chunk) = iter.next() {
3261            if chunk.is_empty() {
3262                continue;
3263            }
3264            return Some(chunk);
3265        }
3266        return None;
3267    }
3268}
3269
3270// SAFETY: `ch` must be a valid Unicode codepoint.
3271unsafe fn char_from_u32_debug(ch: u32) -> char {
3272    if cfg!(debug_assertions) {
3273        char::from_u32(ch).unwrap()
3274    } else {
3275        char::from_u32_unchecked(ch)
3276    }
3277}
3278
3279// SAFETY: `bytes` must be valid UTF-8.
3280unsafe fn str_from_utf8_debug(bytes: &[u8]) -> &str {
3281    if cfg!(debug_assertions) {
3282        std::str::from_utf8(bytes).unwrap()
3283    } else {
3284        std::str::from_utf8_unchecked(bytes)
3285    }
3286}
3287
3288// SAFETY: `bytes` must be valid UTF-8.
3289unsafe fn string_from_utf8_debug(bytes: Vec<u8>) -> String {
3290    if cfg!(debug_assertions) {
3291        String::from_utf8(bytes).unwrap()
3292    } else {
3293        String::from_utf8_unchecked(bytes)
3294    }
3295}