rat_text/
grapheme.rs

1use crate::text_store::SkipLine;
2use crate::{Cursor, TextError};
3use ropey::RopeSlice;
4use ropey::iter::Chunks;
5use std::borrow::Cow;
6use std::cmp;
7use std::fmt::Debug;
8use std::ops::Range;
9use unicode_segmentation::{GraphemeCursor, GraphemeIncomplete};
10
11/// One grapheme.
12#[derive(Debug, PartialEq)]
13pub struct Grapheme<'a> {
14    /// grapheme
15    grapheme: Cow<'a, str>,
16    /// byte-range of the grapheme in the given slice.
17    text_bytes: Range<usize>,
18}
19
20impl PartialEq<&str> for Grapheme<'_> {
21    fn eq(&self, other: &&str) -> bool {
22        self.grapheme.as_ref() == *other
23    }
24}
25
26impl PartialEq<str> for Grapheme<'_> {
27    fn eq(&self, other: &str) -> bool {
28        self.grapheme.as_ref() == other
29    }
30}
31
32impl PartialEq<&String> for Grapheme<'_> {
33    fn eq(&self, other: &&String) -> bool {
34        self.grapheme.as_ref() == *other
35    }
36}
37
38impl PartialEq<String> for Grapheme<'_> {
39    fn eq(&self, other: &String) -> bool {
40        self.grapheme.as_ref() == other
41    }
42}
43
44impl PartialEq<char> for Grapheme<'_> {
45    fn eq(&self, other: &char) -> bool {
46        let mut chars = self.grapheme.chars();
47        chars.next() == Some(*other)
48    }
49}
50
51impl<'a> Grapheme<'a> {
52    pub fn new(grapheme: Cow<'a, str>, text_bytes: Range<usize>) -> Self {
53        Self {
54            grapheme,
55            text_bytes,
56        }
57    }
58
59    /// First (only) char of the grapheme is a whitespace.
60    #[inline]
61    pub fn is_whitespace(&self) -> bool {
62        self.grapheme
63            .chars()
64            .next()
65            .map(|v| v.is_whitespace())
66            .unwrap_or(false)
67    }
68
69    /// First (only) char of the grapheme is a whitespace.
70    #[inline]
71    pub fn is_alphanumeric(&self) -> bool {
72        self.grapheme
73            .chars()
74            .next()
75            .map(|v| v.is_alphanumeric())
76            .unwrap_or(false)
77    }
78
79    /// Is a linebreak.
80    #[inline]
81    #[allow(clippy::nonminimal_bool)]
82    pub fn is_line_break(&self) -> bool {
83        if cfg!(feature = "cr_lines") {
84            self.grapheme == "\r" || self.grapheme == "\n" || self.grapheme == "\r\n"
85        } else if cfg!(feature = "unicode_lines") {
86            self.grapheme == "\r"
87                || self.grapheme == "\n"
88                || self.grapheme == "\r\n"
89                || self.grapheme == "\u{000D}"
90                || self.grapheme == "\u{000C}"
91                || self.grapheme == "\u{000B}"
92                || self.grapheme == "\u{0085}"
93                || self.grapheme == "\u{2028}"
94                || self.grapheme == "\u{2029}"
95        } else {
96            self.grapheme == "\n" || self.grapheme == "\r\n"
97        }
98    }
99
100    /// Get the grapheme.
101    #[inline]
102    pub fn grapheme(&'a self) -> &'a str {
103        self.grapheme.as_ref()
104    }
105
106    /// Destructure to the grapheme.
107    #[inline]
108    pub fn into_parts(self) -> (Cow<'a, str>, Range<usize>) {
109        (self.grapheme, self.text_bytes)
110    }
111
112    /// Get the byte-range as absolute range into the complete text.
113    #[inline]
114    pub fn text_bytes(&self) -> Range<usize> {
115        self.text_bytes.clone()
116    }
117}
118
119/// A cursor over graphemes of a string.
120#[derive(Debug, Clone)]
121pub struct StrGraphemes<'a> {
122    text_offset: usize,
123    text: &'a str,
124    cursor: GraphemeCursor,
125}
126
127impl<'a> StrGraphemes<'a> {
128    /// Iterate the graphemes of a str-slice.
129    ///
130    /// * slice_offset - offset of the slice in the complete text.
131    /// * slice - slice
132    ///
133    pub(crate) fn new(slice_offset: usize, slice: &'a str) -> Self {
134        Self {
135            text_offset: slice_offset,
136            text: slice,
137            cursor: GraphemeCursor::new(0, slice.len(), true),
138        }
139    }
140
141    /// Iterate the graphemes of a str-slice.
142    ///
143    /// * slice_offset - offset of the slice in the complete text.
144    /// * slice - slice
145    /// * offset - relative offset into the slice
146    ///
147    pub(crate) fn new_offset(slice_offset: usize, slice: &'a str, offset: usize) -> Self {
148        Self {
149            text_offset: slice_offset,
150            text: slice,
151            cursor: GraphemeCursor::new(offset, slice.len(), true),
152        }
153    }
154}
155
156impl Cursor for StrGraphemes<'_> {
157    fn prev(&mut self) -> Option<Self::Item> {
158        let start = self.cursor.cur_cursor();
159        let prev = self.cursor.prev_boundary(self.text, 0).unwrap()?;
160        Some(Grapheme {
161            grapheme: Cow::Borrowed(&self.text[prev..start]),
162            text_bytes: self.text_offset + prev..self.text_offset + start,
163        })
164    }
165
166    fn text_offset(&self) -> usize {
167        self.text_offset + self.cursor.cur_cursor()
168    }
169}
170
171impl SkipLine for StrGraphemes<'_> {
172    fn skip_line(&mut self) -> Result<(), TextError> {
173        self.cursor.set_cursor(self.text.len());
174        Ok(())
175    }
176
177    fn skip_to(&mut self, byte_pos: usize) -> Result<(), TextError> {
178        assert!(byte_pos >= self.text_offset);
179        let offset = byte_pos - self.text_offset;
180        self.cursor.set_cursor(offset);
181        Ok(())
182    }
183}
184
185impl<'a> Iterator for StrGraphemes<'a> {
186    type Item = Grapheme<'a>;
187
188    #[inline]
189    fn next(&mut self) -> Option<Grapheme<'a>> {
190        let start = self.cursor.cur_cursor();
191        let next = self.cursor.next_boundary(self.text, 0).unwrap()?;
192        Some(Grapheme {
193            grapheme: Cow::Borrowed(&self.text[start..next]),
194            text_bytes: self.text_offset + start..self.text_offset + next,
195        })
196    }
197
198    #[inline]
199    fn size_hint(&self) -> (usize, Option<usize>) {
200        let slen = self.text.len() - self.cursor.cur_cursor();
201        (cmp::min(slen, 1), Some(slen))
202    }
203}
204
205/// An implementation of a graphemes iterator, for iterating over
206/// the graphemes of a RopeSlice.
207#[derive(Debug, Clone)]
208pub struct RopeGraphemes<'a> {
209    text_offset: usize,
210    text: RopeSlice<'a>,
211    chunks: Chunks<'a>,
212    was_next: Option<bool>,
213    cur_chunk: &'a str,
214    cur_chunk_start: usize,
215    cursor: GraphemeCursor,
216}
217
218impl<'a> RopeGraphemes<'a> {
219    /// New grapheme iterator.
220    ///
221    /// * slice_offset - offset of the slice in the complete text.
222    /// * slice - slice of the complete text
223    pub(crate) fn new(slice_offset: usize, slice: RopeSlice<'a>) -> RopeGraphemes<'a> {
224        let mut chunks = slice.chunks();
225
226        // was_next is only useful, if there was a true next().
227        // otherwise it confuses the algorithm.
228        let (first_chunk, was_next) = match chunks.next() {
229            Some(v) => (v, Some(true)),
230            None => ("", None),
231        };
232
233        RopeGraphemes {
234            text_offset: slice_offset,
235            text: slice,
236            chunks,
237            was_next,
238            cur_chunk: first_chunk,
239            cur_chunk_start: 0,
240            cursor: GraphemeCursor::new(0, slice.len_bytes(), true),
241        }
242    }
243
244    /// New grapheme iterator.
245    ///
246    /// * slice_offset - offset of the slice in the complete text.
247    /// * slice - slice of the complete text
248    /// * offset - relative offset into the slice
249    ///
250    /// Offset must be a valid char boundary.
251    pub(crate) fn new_offset(
252        slice_offset: usize,
253        slice: RopeSlice<'a>,
254        offset: usize,
255    ) -> Result<RopeGraphemes<'a>, TextError> {
256        let Some((mut chunks, chunk_start, _, _)) = slice.get_chunks_at_byte(offset) else {
257            return Err(TextError::ByteIndexOutOfBounds(offset, slice.len_bytes()));
258        };
259
260        // was_next is only useful, if there was a true next().
261        // otherwise it confuses the algorithm.
262        let (first_chunk, was_next) = match chunks.next() {
263            Some(v) => (v, Some(true)),
264            None => ("", None),
265        };
266
267        Ok(RopeGraphemes {
268            text_offset: slice_offset,
269            text: slice,
270            chunks,
271            was_next,
272            cur_chunk: first_chunk,
273            cur_chunk_start: chunk_start,
274            cursor: GraphemeCursor::new(offset, slice.len_bytes(), true),
275        })
276    }
277}
278
279impl<'a> Cursor for RopeGraphemes<'a> {
280    #[inline]
281    fn prev(&mut self) -> Option<Grapheme<'a>> {
282        let a = self.cursor.cur_cursor();
283        let b;
284        loop {
285            match self
286                .cursor
287                .prev_boundary(self.cur_chunk, self.cur_chunk_start)
288            {
289                Ok(None) => {
290                    return None;
291                }
292                Ok(Some(n)) => {
293                    b = n;
294                    break;
295                }
296                Err(GraphemeIncomplete::PrevChunk) => {
297                    if self.was_next == Some(true) {
298                        // skip current
299                        self.chunks.prev();
300                    }
301                    (self.cur_chunk, self.was_next) = match self.chunks.prev() {
302                        Some(v) => (v, Some(false)),
303                        None => ("", None),
304                    };
305                    self.cur_chunk_start -= self.cur_chunk.len();
306                }
307                Err(GraphemeIncomplete::PreContext(idx)) => {
308                    let (chunk, byte_idx, _, _) = self.text.chunk_at_byte(idx.saturating_sub(1));
309                    self.cursor.provide_context(chunk, byte_idx);
310                }
311                _ => unreachable!(),
312            }
313        }
314
315        if a >= self.cur_chunk_start + self.cur_chunk.len() {
316            let a_char = self.text.byte_to_char(a);
317            let b_char = self.text.byte_to_char(b);
318
319            Some(Grapheme {
320                grapheme: Cow::Owned(self.text.slice(b_char..a_char).to_string()),
321                text_bytes: self.text_offset + b..self.text_offset + a,
322            })
323        } else {
324            let a2 = a - self.cur_chunk_start;
325            let b2 = b - self.cur_chunk_start;
326            Some(Grapheme {
327                grapheme: Cow::Borrowed(&self.cur_chunk[b2..a2]),
328                text_bytes: self.text_offset + b..self.text_offset + a,
329            })
330        }
331    }
332
333    fn text_offset(&self) -> usize {
334        self.text_offset + self.cursor.cur_cursor()
335    }
336}
337
338impl<'a> SkipLine for RopeGraphemes<'a> {
339    fn skip_line(&mut self) -> Result<(), TextError> {
340        let cursor = self.cursor.cur_cursor();
341        let line = self.text.try_byte_to_line(cursor)?;
342        let next_offset = self.text.try_line_to_byte(line + 1)?;
343
344        let Some((mut chunks, chunk_start, _, _)) = self.text.get_chunks_at_byte(next_offset)
345        else {
346            return Err(TextError::ByteIndexOutOfBounds(
347                next_offset,
348                self.text.len_bytes(),
349            ));
350        };
351
352        // was_next is only useful, if there was a true next().
353        // otherwise it confuses the algorithm.
354        let (first_chunk, _was_next) = match chunks.next() {
355            Some(v) => (v, Some(true)),
356            None => ("", None),
357        };
358
359        self.chunks = chunks;
360        self.cur_chunk = first_chunk;
361        self.cur_chunk_start = chunk_start;
362        self.cursor = GraphemeCursor::new(next_offset, self.text.len_bytes(), true);
363
364        Ok(())
365    }
366
367    fn skip_to(&mut self, byte_pos: usize) -> Result<(), TextError> {
368        assert!(byte_pos >= self.text_offset);
369        // byte_pos is absolute to all text, but everything here is
370        // relative to the slice.
371        let byte_pos = byte_pos - self.text_offset;
372
373        let Some((mut chunks, chunk_start, _, _)) = self.text.get_chunks_at_byte(byte_pos) else {
374            return Err(TextError::ByteIndexOutOfBounds(
375                byte_pos,
376                self.text.len_bytes(),
377            ));
378        };
379
380        // was_next is only useful, if there was a true next().
381        // otherwise it confuses the algorithm.
382        let (first_chunk, _was_next) = match chunks.next() {
383            Some(v) => (v, Some(true)),
384            None => ("", None),
385        };
386
387        self.chunks = chunks;
388        self.cur_chunk = first_chunk;
389        self.cur_chunk_start = chunk_start;
390        self.cursor = GraphemeCursor::new(byte_pos, self.text.len_bytes(), true);
391
392        Ok(())
393    }
394}
395
396impl<'a> Iterator for RopeGraphemes<'a> {
397    type Item = Grapheme<'a>;
398
399    #[inline]
400    fn next(&mut self) -> Option<Grapheme<'a>> {
401        let a = self.cursor.cur_cursor();
402        let b;
403        loop {
404            match self
405                .cursor
406                .next_boundary(self.cur_chunk, self.cur_chunk_start)
407            {
408                Ok(None) => {
409                    return None;
410                }
411                Ok(Some(n)) => {
412                    b = n;
413                    break;
414                }
415                Err(GraphemeIncomplete::NextChunk) => {
416                    self.cur_chunk_start += self.cur_chunk.len();
417                    if self.was_next == Some(false) {
418                        // skip current
419                        self.chunks.next();
420                    }
421                    (self.cur_chunk, self.was_next) = match self.chunks.next() {
422                        Some(v) => (v, Some(true)),
423                        None => ("", None),
424                    };
425                }
426                Err(GraphemeIncomplete::PreContext(idx)) => {
427                    let (chunk, byte_idx, _, _) = self.text.chunk_at_byte(idx.saturating_sub(1));
428                    self.cursor.provide_context(chunk, byte_idx);
429                }
430                _ => unreachable!(),
431            }
432        }
433
434        if a < self.cur_chunk_start {
435            let a_char = self.text.byte_to_char(a);
436            let b_char = self.text.byte_to_char(b);
437
438            Some(Grapheme {
439                grapheme: Cow::Owned(self.text.slice(a_char..b_char).to_string()),
440                text_bytes: self.text_offset + a..self.text_offset + b,
441            })
442        } else {
443            let a2 = a - self.cur_chunk_start;
444            let b2 = b - self.cur_chunk_start;
445            Some(Grapheme {
446                grapheme: Cow::Borrowed(&self.cur_chunk[a2..b2]),
447                text_bytes: self.text_offset + a..self.text_offset + b,
448            })
449        }
450    }
451}
452
453#[cfg(test)]
454mod test_str {
455    use crate::Cursor;
456    use crate::grapheme::StrGraphemes;
457
458    #[test]
459    fn test_str_graphemes0() {
460        let s = String::from("\r\n");
461        let mut s0 = StrGraphemes::new(0, &s);
462        assert_eq!(s0.next().unwrap(), "\r\n");
463    }
464
465    #[test]
466    fn test_str_graphemes1() {
467        // basic graphemes
468        let s = String::from("qwertz");
469
470        let mut s0 = StrGraphemes::new(0, &s);
471        assert_eq!(s0.next().unwrap(), "q");
472        assert_eq!(s0.next().unwrap(), "w");
473        assert_eq!(s0.next().unwrap(), "e");
474        assert_eq!(s0.next().unwrap(), "r");
475        assert_eq!(s0.next().unwrap(), "t");
476        assert_eq!(s0.next().unwrap(), "z");
477        assert!(s0.next().is_none());
478        assert_eq!(s0.prev().unwrap(), "z");
479        assert_eq!(s0.prev().unwrap(), "t");
480        assert_eq!(s0.prev().unwrap(), "r");
481        assert_eq!(s0.prev().unwrap(), "e");
482        assert_eq!(s0.prev().unwrap(), "w");
483        assert_eq!(s0.prev().unwrap(), "q");
484
485        let mut s0 = StrGraphemes::new(1, &s[1..s.len() - 1]);
486        assert_eq!(s0.next().unwrap(), "w");
487        assert_eq!(s0.next().unwrap(), "e");
488        assert_eq!(s0.next().unwrap(), "r");
489        assert_eq!(s0.next().unwrap(), "t");
490        assert!(s0.next().is_none());
491        assert_eq!(s0.prev().unwrap(), "t");
492        assert_eq!(s0.prev().unwrap(), "r");
493        assert_eq!(s0.prev().unwrap(), "e");
494        assert_eq!(s0.prev().unwrap(), "w");
495
496        let mut s0 = StrGraphemes::new(3, &s[3..3]);
497        assert!(s0.next().is_none());
498        assert!(s0.prev().is_none());
499    }
500
501    #[test]
502    fn test_str_graphemes2() {
503        // complicated graphemes
504        let s = String::from("w🤷‍♂️xw🤷‍♀️xw🤦‍♂️xw❤️xw🤦‍♀️xw💕🙍🏿‍♀️x");
505
506        let mut s0 = StrGraphemes::new(0, &s);
507        assert_eq!(s0.next().unwrap(), "w");
508        assert_eq!(s0.next().unwrap(), "🤷‍♂️");
509        assert_eq!(s0.next().unwrap(), "x");
510        assert_eq!(s0.next().unwrap(), "w");
511        assert_eq!(s0.next().unwrap(), "🤷‍♀️");
512        assert_eq!(s0.next().unwrap(), "x");
513        assert_eq!(s0.next().unwrap(), "w");
514        assert_eq!(s0.next().unwrap(), "🤦‍♂️");
515        assert_eq!(s0.next().unwrap(), "x");
516        assert_eq!(s0.next().unwrap(), "w");
517        assert_eq!(s0.next().unwrap(), "❤️");
518        assert_eq!(s0.next().unwrap(), "x");
519        assert_eq!(s0.next().unwrap(), "w");
520        assert_eq!(s0.next().unwrap(), "🤦‍♀️");
521        assert_eq!(s0.next().unwrap(), "x");
522        assert_eq!(s0.next().unwrap(), "w");
523        assert_eq!(s0.next().unwrap(), "💕");
524        assert_eq!(s0.next().unwrap(), "🙍🏿‍♀️");
525        assert_eq!(s0.next().unwrap(), "x");
526        assert!(s0.next().is_none());
527        assert_eq!(s0.prev().unwrap(), "x");
528        assert_eq!(s0.prev().unwrap(), "🙍🏿‍♀️");
529        assert_eq!(s0.prev().unwrap(), "💕");
530        assert_eq!(s0.prev().unwrap(), "w");
531        assert_eq!(s0.prev().unwrap(), "x");
532        assert_eq!(s0.prev().unwrap(), "🤦‍♀️");
533        assert_eq!(s0.prev().unwrap(), "w");
534        assert_eq!(s0.prev().unwrap(), "x");
535        assert_eq!(s0.prev().unwrap(), "❤️");
536        assert_eq!(s0.prev().unwrap(), "w");
537        assert_eq!(s0.prev().unwrap(), "x");
538        assert_eq!(s0.prev().unwrap(), "🤦‍♂️");
539        assert_eq!(s0.prev().unwrap(), "w");
540        assert_eq!(s0.prev().unwrap(), "x");
541        assert_eq!(s0.prev().unwrap(), "🤷‍♀️");
542        assert_eq!(s0.prev().unwrap(), "w");
543        assert_eq!(s0.prev().unwrap(), "x");
544        assert_eq!(s0.prev().unwrap(), "🤷‍♂️");
545        assert_eq!(s0.prev().unwrap(), "w");
546    }
547
548    #[test]
549    fn test_str_graphemes3() {
550        // complicated slices
551        let s = String::from("qwertz");
552        let mut s0 = StrGraphemes::new_offset(0, &s, 3);
553        assert_eq!(s0.next().unwrap(), "r");
554        assert_eq!(s0.prev().unwrap(), "r");
555        assert_eq!(s0.prev().unwrap(), "e");
556
557        let mut s0 = StrGraphemes::new_offset(0, &s, 3);
558        assert_eq!(s0.next().unwrap().text_bytes(), 3..4);
559        assert_eq!(s0.prev().unwrap().text_bytes(), 3..4);
560        assert_eq!(s0.prev().unwrap().text_bytes(), 2..3);
561
562        let s = String::from("w🤷‍♂️🤷‍♀️🤦‍♂️❤️🤦‍♀️💕🙍🏿‍♀️x");
563        let mut s0 = StrGraphemes::new_offset(0, &s, 21);
564        assert_eq!(s0.next().unwrap(), "♀\u{fe0f}");
565        assert_eq!(s0.next().unwrap(), "🤦\u{200d}♂\u{fe0f}");
566        assert_eq!(s0.prev().unwrap(), "🤦\u{200d}♂\u{fe0f}");
567        assert_eq!(s0.prev().unwrap(), "🤷\u{200d}♀\u{fe0f}");
568
569        let s = String::from("w🤷‍♂️🤷‍♀️🤦‍♂️❤️🤦‍♀️💕🙍🏿‍♀️x");
570        let mut s0 = StrGraphemes::new_offset(0, &s, 21);
571        assert_eq!(s0.next().unwrap().text_bytes(), 21..27);
572        assert_eq!(s0.next().unwrap().text_bytes(), 27..40);
573        assert_eq!(s0.prev().unwrap().text_bytes(), 27..40);
574        assert_eq!(s0.prev().unwrap().text_bytes(), 14..27);
575    }
576
577    #[test]
578    fn test_str_graphemes4() {
579        // offsets and partial slices
580        let s = String::from("qwertz");
581        let mut s0 = StrGraphemes::new_offset(1, &s[1..5], 2);
582        s0.next();
583        assert_eq!(s0.text_offset(), 4);
584        s0.next();
585        assert_eq!(s0.text_offset(), 5);
586        s0.next();
587        assert_eq!(s0.text_offset(), 5);
588        s0.next();
589        assert_eq!(s0.text_offset(), 5);
590        s0.prev();
591        assert_eq!(s0.text_offset(), 4);
592        s0.prev();
593        assert_eq!(s0.text_offset(), 3);
594        s0.prev();
595        assert_eq!(s0.text_offset(), 2);
596        s0.prev();
597        assert_eq!(s0.text_offset(), 1);
598        s0.prev();
599        assert_eq!(s0.text_offset(), 1);
600    }
601}
602
603#[cfg(test)]
604mod test_rope {
605    use crate::Cursor;
606    use crate::grapheme::{RopeGraphemes, StrGraphemes};
607    use ropey::Rope;
608
609    #[test]
610    fn test_rope_graphemes1() {
611        // basic graphemes
612        let s = Rope::from("qwertz");
613
614        let mut s0 = RopeGraphemes::new(0, s.byte_slice(..));
615        assert_eq!(s0.next().unwrap(), "q");
616        assert_eq!(s0.next().unwrap(), "w");
617        assert_eq!(s0.next().unwrap(), "e");
618        assert_eq!(s0.next().unwrap(), "r");
619        assert_eq!(s0.next().unwrap(), "t");
620        assert_eq!(s0.next().unwrap(), "z");
621        assert!(s0.next().is_none());
622        assert_eq!(s0.prev().unwrap(), "z");
623        assert_eq!(s0.prev().unwrap(), "t");
624        assert_eq!(s0.prev().unwrap(), "r");
625        assert_eq!(s0.prev().unwrap(), "e");
626        assert_eq!(s0.prev().unwrap(), "w");
627        assert_eq!(s0.prev().unwrap(), "q");
628
629        let mut s0 = RopeGraphemes::new(1, s.byte_slice(1..s.len_bytes() - 1));
630        assert_eq!(s0.next().unwrap(), "w");
631        assert_eq!(s0.next().unwrap(), "e");
632        assert_eq!(s0.next().unwrap(), "r");
633        assert_eq!(s0.next().unwrap(), "t");
634        assert!(s0.next().is_none());
635        assert_eq!(s0.prev().unwrap(), "t");
636        assert_eq!(s0.prev().unwrap(), "r");
637        assert_eq!(s0.prev().unwrap(), "e");
638        assert_eq!(s0.prev().unwrap(), "w");
639
640        let mut s0 = RopeGraphemes::new(3, s.byte_slice(3..3));
641        assert!(s0.next().is_none());
642        assert!(s0.prev().is_none());
643    }
644
645    #[test]
646    fn test_rope_graphemes2() {
647        // complicated graphemes
648        let s = Rope::from("w🤷‍♂️xw🤷‍♀️xw🤦‍♂️xw❤️xw🤦‍♀️xw💕🙍🏿‍♀️x");
649
650        let mut s0 = RopeGraphemes::new(0, s.byte_slice(..));
651        assert_eq!(s0.next().unwrap(), "w");
652        assert_eq!(s0.next().unwrap(), "🤷‍♂️");
653        assert_eq!(s0.next().unwrap(), "x");
654        assert_eq!(s0.next().unwrap(), "w");
655        assert_eq!(s0.next().unwrap(), "🤷‍♀️");
656        assert_eq!(s0.next().unwrap(), "x");
657        assert_eq!(s0.next().unwrap(), "w");
658        assert_eq!(s0.next().unwrap(), "🤦‍♂️");
659        assert_eq!(s0.next().unwrap(), "x");
660        assert_eq!(s0.next().unwrap(), "w");
661        assert_eq!(s0.next().unwrap(), "❤️");
662        assert_eq!(s0.next().unwrap(), "x");
663        assert_eq!(s0.next().unwrap(), "w");
664        assert_eq!(s0.next().unwrap(), "🤦‍♀️");
665        assert_eq!(s0.next().unwrap(), "x");
666        assert_eq!(s0.next().unwrap(), "w");
667        assert_eq!(s0.next().unwrap(), "💕");
668        assert_eq!(s0.next().unwrap(), "🙍🏿‍♀️");
669        assert_eq!(s0.next().unwrap(), "x");
670        assert!(s0.next().is_none());
671        assert_eq!(s0.prev().unwrap(), "x");
672        assert_eq!(s0.prev().unwrap(), "🙍🏿‍♀️");
673        assert_eq!(s0.prev().unwrap(), "💕");
674        assert_eq!(s0.prev().unwrap(), "w");
675        assert_eq!(s0.prev().unwrap(), "x");
676        assert_eq!(s0.prev().unwrap(), "🤦‍♀️");
677        assert_eq!(s0.prev().unwrap(), "w");
678        assert_eq!(s0.prev().unwrap(), "x");
679        assert_eq!(s0.prev().unwrap(), "❤️");
680        assert_eq!(s0.prev().unwrap(), "w");
681        assert_eq!(s0.prev().unwrap(), "x");
682        assert_eq!(s0.prev().unwrap(), "🤦‍♂️");
683        assert_eq!(s0.prev().unwrap(), "w");
684        assert_eq!(s0.prev().unwrap(), "x");
685        assert_eq!(s0.prev().unwrap(), "🤷‍♀️");
686        assert_eq!(s0.prev().unwrap(), "w");
687        assert_eq!(s0.prev().unwrap(), "x");
688        assert_eq!(s0.prev().unwrap(), "🤷‍♂️");
689        assert_eq!(s0.prev().unwrap(), "w");
690    }
691
692    #[test]
693    fn test_rope_graphemes3() {
694        // complicated graphemes
695        let s = Rope::from("qwertz");
696        let mut s0 = RopeGraphemes::new_offset(0, s.byte_slice(..), 3).expect("fine");
697        assert_eq!(s0.next().unwrap(), "r");
698        assert_eq!(s0.prev().unwrap(), "r");
699        assert_eq!(s0.prev().unwrap(), "e");
700
701        let mut s0 = RopeGraphemes::new_offset(0, s.byte_slice(..), 3).expect("fine");
702        assert_eq!(s0.next().unwrap().text_bytes(), 3..4);
703        assert_eq!(s0.prev().unwrap().text_bytes(), 3..4);
704        assert_eq!(s0.prev().unwrap().text_bytes(), 2..3);
705
706        let s = Rope::from("w🤷‍♂️🤷‍♀️🤦‍♂️❤️🤦‍♀️💕🙍🏿‍♀️x");
707        let mut s0 = RopeGraphemes::new_offset(0, s.byte_slice(..), 21).expect("fine");
708        assert_eq!(s0.next().unwrap(), "♀\u{fe0f}");
709        assert_eq!(s0.next().unwrap(), "🤦\u{200d}♂\u{fe0f}");
710        assert_eq!(s0.prev().unwrap(), "🤦\u{200d}♂\u{fe0f}");
711        assert_eq!(s0.prev().unwrap(), "🤷\u{200d}♀\u{fe0f}");
712
713        let s = Rope::from("w🤷‍♂️🤷‍♀️🤦‍♂️❤️🤦‍♀️💕🙍🏿‍♀️x");
714        let mut s0 = RopeGraphemes::new_offset(0, s.byte_slice(..), 21).expect("fine");
715        assert_eq!(s0.next().unwrap().text_bytes(), 21..27);
716        assert_eq!(s0.next().unwrap().text_bytes(), 27..40);
717        assert_eq!(s0.prev().unwrap().text_bytes(), 27..40);
718        assert_eq!(s0.prev().unwrap().text_bytes(), 14..27);
719    }
720
721    #[test]
722    fn test_rope_graphemes4() {
723        // offsets and partial slices
724        let s = Rope::from("qwertz");
725        let mut s0 = RopeGraphemes::new_offset(1, s.byte_slice(1..5), 2).expect("fine");
726        s0.next();
727        assert_eq!(s0.text_offset(), 4);
728        s0.next();
729        assert_eq!(s0.text_offset(), 5);
730        s0.next();
731        assert_eq!(s0.text_offset(), 5);
732        s0.next();
733        assert_eq!(s0.text_offset(), 5);
734        s0.prev();
735        assert_eq!(s0.text_offset(), 4);
736        s0.prev();
737        assert_eq!(s0.text_offset(), 3);
738        s0.prev();
739        assert_eq!(s0.text_offset(), 2);
740        s0.prev();
741        assert_eq!(s0.text_offset(), 1);
742        s0.prev();
743        assert_eq!(s0.text_offset(), 1);
744    }
745
746    #[test]
747    fn test_rope_graphemes6() {
748        // text rope boundary
749        let s = Rope::from(
750            "012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678)\
751             abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghiJ\
752             012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678)\
753             abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghiJ\
754             012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678)\
755             abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghiJ\
756             012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678)\
757             abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghiJ\
758             012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678)\
759             abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghiJ\
760             012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678)\
761             abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghiJ\
762             ",
763        );
764        assert_eq!(s.len_bytes(), 1200);
765        let mut s0 = RopeGraphemes::new_offset(1, s.byte_slice(1..1199), 0).expect("fine");
766        assert_eq!(s0.nth(598).unwrap(), "J");
767
768        assert_eq!(s0.next().unwrap(), "0");
769        assert_eq!(s0.text_offset(), 601);
770        assert_eq!(s0.next().unwrap(), "1");
771        assert_eq!(s0.text_offset(), 602);
772        assert_eq!(s0.prev().unwrap(), "1");
773        assert_eq!(s0.text_offset(), 601);
774        assert_eq!(s0.prev().unwrap(), "0");
775        assert_eq!(s0.text_offset(), 600);
776        assert_eq!(s0.prev().unwrap(), "J");
777        assert_eq!(s0.text_offset(), 599);
778    }
779
780    #[test]
781    fn test_rope_graphemes7() {
782        // test complicated grapheme at rope boundary
783        let s = Rope::from(
784            "012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678)\
785             abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghiJ\
786             012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678)\
787             abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghiJ\
788             012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678)\
789             abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghi🤷‍♂️\
790             012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678)\
791             abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghiJ\
792             012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678)\
793             abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghiJ\
794             012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678)\
795             abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghiJ\
796             ",
797        );
798        assert_eq!(s.len_bytes(), 1212);
799        assert_eq!(s.chunks().next().unwrap().len(), 606);
800        let mut s0 = RopeGraphemes::new_offset(1, s.byte_slice(1..1199), 0).expect("fine");
801        assert_eq!(s0.nth(598).unwrap(), "🤷‍♂️");
802
803        assert_eq!(s0.next().unwrap(), "0");
804        assert_eq!(s0.text_offset(), 613);
805        assert_eq!(s0.next().unwrap(), "1");
806        assert_eq!(s0.text_offset(), 614);
807        assert_eq!(s0.prev().unwrap(), "1");
808        assert_eq!(s0.text_offset(), 613);
809        assert_eq!(s0.prev().unwrap(), "0");
810        assert_eq!(s0.text_offset(), 612);
811        assert_eq!(s0.prev().unwrap(), "🤷‍♂️");
812        assert_eq!(s0.text_offset(), 599);
813        assert_eq!(s0.prev().unwrap(), "i");
814        assert_eq!(s0.text_offset(), 598);
815
816        assert_eq!(s0.next().unwrap(), "i");
817        assert_eq!(s0.text_offset(), 599);
818        assert_eq!(s0.next().unwrap(), "🤷‍♂️");
819        assert_eq!(s0.text_offset(), 612);
820        assert_eq!(s0.next().unwrap(), "0");
821        assert_eq!(s0.text_offset(), 613);
822        assert_eq!(s0.next().unwrap(), "1");
823        assert_eq!(s0.text_offset(), 614);
824    }
825
826    #[test]
827    fn test_rev_graphemes() {
828        let mut it = StrGraphemes::new_offset(0, "\r\n", 2);
829        assert_eq!(it.prev().unwrap(), "\r\n");
830
831        let mut it = StrGraphemes::new_offset(0, "\r\r\n", 3);
832        assert_eq!(it.prev().unwrap(), "\r\n");
833        assert_eq!(it.prev().unwrap(), "\r");
834
835        let mut it = StrGraphemes::new_offset(0, "\r\r\n\n", 4);
836        assert_eq!(it.prev().unwrap(), "\n");
837        assert_eq!(it.prev().unwrap(), "\r\n");
838        assert_eq!(it.prev().unwrap(), "\r");
839    }
840}