rat_text/
grapheme.rs

1use crate::text_store::SkipLine;
2use crate::{Cursor, TextError};
3use ropey::RopeSlice;
4use ropey::iter::Chunks;
5use std::borrow::Cow;
6use std::cmp;
7use std::fmt::Debug;
8use std::ops::Range;
9use unicode_segmentation::{GraphemeCursor, GraphemeIncomplete};
10
11/// One grapheme.
12#[derive(Debug, PartialEq)]
13pub struct Grapheme<'a> {
14    /// grapheme
15    grapheme: Cow<'a, str>,
16    /// byte-range of the grapheme in the given slice.
17    text_bytes: Range<usize>,
18}
19
20impl<R: AsRef<str>> PartialEq<R> for Grapheme<'_> {
21    fn eq(&self, other: &R) -> bool {
22        self.grapheme.as_ref() == other.as_ref()
23    }
24}
25
26impl<'a> Grapheme<'a> {
27    pub fn new(grapheme: Cow<'a, str>, text_bytes: Range<usize>) -> Self {
28        Self {
29            grapheme,
30            text_bytes,
31        }
32    }
33
34    /// First (only) char of the grapheme is a whitespace.
35    #[inline]
36    pub fn is_whitespace(&self) -> bool {
37        self.grapheme
38            .chars()
39            .next()
40            .map(|v| v.is_whitespace())
41            .unwrap_or(false)
42    }
43
44    /// Is a linebreak.
45    #[inline]
46    #[allow(clippy::nonminimal_bool)]
47    pub fn is_line_break(&self) -> bool {
48        self.grapheme == "\r"
49            || self.grapheme == "\n"
50            || self.grapheme == "\r\n"
51            || self.grapheme == "\u{000D}"
52            || self.grapheme == "\u{000C}"
53            || self.grapheme == "\u{000B}"
54            || self.grapheme == "\u{0085}"
55            || self.grapheme == "\u{2028}"
56            || self.grapheme == "\u{2029}"
57    }
58
59    /// Get the grapheme.
60    #[inline]
61    pub fn grapheme(&'a self) -> &'a str {
62        self.grapheme.as_ref()
63    }
64
65    /// Destructure to the grapheme.
66    #[inline]
67    pub fn into_parts(self) -> (Cow<'a, str>, Range<usize>) {
68        (self.grapheme, self.text_bytes)
69    }
70
71    /// Get the byte-range as absolute range into the complete text.
72    #[inline]
73    pub fn text_bytes(&self) -> Range<usize> {
74        self.text_bytes.clone()
75    }
76}
77
78/// A cursor over graphemes of a string.
79#[derive(Debug, Clone)]
80pub struct StrGraphemes<'a> {
81    text_offset: usize,
82    text: &'a str,
83    cursor: GraphemeCursor,
84}
85
86impl<'a> StrGraphemes<'a> {
87    /// Iterate the graphemes of a str-slice.
88    ///
89    /// * slice_offset - offset of the slice in the complete text.
90    /// * slice - slice
91    ///
92    pub(crate) fn new(slice_offset: usize, slice: &'a str) -> Self {
93        Self {
94            text_offset: slice_offset,
95            text: slice,
96            cursor: GraphemeCursor::new(0, slice.len(), true),
97        }
98    }
99
100    /// Iterate the graphemes of a str-slice.
101    ///
102    /// * slice_offset - offset of the slice in the complete text.
103    /// * slice - slice
104    /// * offset - relative offset into the slice
105    ///
106    pub(crate) fn new_offset(slice_offset: usize, slice: &'a str, offset: usize) -> Self {
107        Self {
108            text_offset: slice_offset,
109            text: slice,
110            cursor: GraphemeCursor::new(offset, slice.len(), true),
111        }
112    }
113}
114
115impl Cursor for StrGraphemes<'_> {
116    fn prev(&mut self) -> Option<Self::Item> {
117        let start = self.cursor.cur_cursor();
118        let prev = self.cursor.prev_boundary(self.text, 0).unwrap()?;
119        Some(Grapheme {
120            grapheme: Cow::Borrowed(&self.text[prev..start]),
121            text_bytes: self.text_offset + prev..self.text_offset + start,
122        })
123    }
124
125    fn rev_cursor(self) -> impl Cursor<Item = Self::Item> {
126        RevStrGraphemes { it: self }
127    }
128
129    fn text_offset(&self) -> usize {
130        self.text_offset + self.cursor.cur_cursor()
131    }
132}
133
134impl SkipLine for StrGraphemes<'_> {
135    fn skip_line(&mut self) -> Result<(), TextError> {
136        self.cursor.set_cursor(self.text.len());
137        Ok(())
138    }
139
140    fn skip_to(&mut self, byte_pos: usize) -> Result<(), TextError> {
141        assert!(byte_pos >= self.text_offset);
142        let offset = byte_pos - self.text_offset;
143        self.cursor.set_cursor(offset);
144        Ok(())
145    }
146}
147
148impl<'a> Iterator for StrGraphemes<'a> {
149    type Item = Grapheme<'a>;
150
151    #[inline]
152    fn next(&mut self) -> Option<Grapheme<'a>> {
153        let start = self.cursor.cur_cursor();
154        let next = self.cursor.next_boundary(self.text, 0).unwrap()?;
155        Some(Grapheme {
156            grapheme: Cow::Borrowed(&self.text[start..next]),
157            text_bytes: self.text_offset + start..self.text_offset + next,
158        })
159    }
160
161    #[inline]
162    fn size_hint(&self) -> (usize, Option<usize>) {
163        let slen = self.text.len() - self.cursor.cur_cursor();
164        (cmp::min(slen, 1), Some(slen))
165    }
166}
167
168#[derive(Debug)]
169pub(crate) struct RevStrGraphemes<'a> {
170    it: StrGraphemes<'a>,
171}
172
173impl<'a> Iterator for RevStrGraphemes<'a> {
174    type Item = Grapheme<'a>;
175
176    #[inline]
177    fn next(&mut self) -> Option<Self::Item> {
178        self.it.prev()
179    }
180}
181
182impl Cursor for RevStrGraphemes<'_> {
183    #[inline]
184    fn prev(&mut self) -> Option<Self::Item> {
185        self.it.next()
186    }
187
188    #[inline]
189    fn rev_cursor(self) -> impl Cursor<Item = Self::Item> {
190        self.it
191    }
192
193    fn text_offset(&self) -> usize {
194        self.it.text_offset()
195    }
196}
197
198impl SkipLine for RevStrGraphemes<'_> {
199    fn skip_line(&mut self) -> Result<(), TextError> {
200        unimplemented!("no skip_line()");
201    }
202
203    fn skip_to(&mut self, _byte_pos: usize) -> Result<(), TextError> {
204        unimplemented!("no skip_to()");
205    }
206}
207
208/// An implementation of a graphemes iterator, for iterating over
209/// the graphemes of a RopeSlice.
210#[derive(Debug, Clone)]
211pub struct RopeGraphemes<'a> {
212    text_offset: usize,
213    text: RopeSlice<'a>,
214    chunks: Chunks<'a>,
215    was_next: Option<bool>,
216    cur_chunk: &'a str,
217    cur_chunk_start: usize,
218    cursor: GraphemeCursor,
219}
220
221impl<'a> RopeGraphemes<'a> {
222    /// New grapheme iterator.
223    ///
224    /// * slice_offset - offset of the slice in the complete text.
225    /// * slice - slice of the complete text
226    pub(crate) fn new(slice_offset: usize, slice: RopeSlice<'a>) -> RopeGraphemes<'a> {
227        let mut chunks = slice.chunks();
228
229        // was_next is only useful, if there was a true next().
230        // otherwise it confuses the algorithm.
231        let (first_chunk, was_next) = match chunks.next() {
232            Some(v) => (v, Some(true)),
233            None => ("", None),
234        };
235
236        RopeGraphemes {
237            text_offset: slice_offset,
238            text: slice,
239            chunks,
240            was_next,
241            cur_chunk: first_chunk,
242            cur_chunk_start: 0,
243            cursor: GraphemeCursor::new(0, slice.len_bytes(), true),
244        }
245    }
246
247    /// New grapheme iterator.
248    ///
249    /// * slice_offset - offset of the slice in the complete text.
250    /// * slice - slice of the complete text
251    /// * offset - relative offset into the slice
252    ///
253    /// Offset must be a valid char boundary.
254    pub(crate) fn new_offset(
255        slice_offset: usize,
256        slice: RopeSlice<'a>,
257        offset: usize,
258    ) -> Result<RopeGraphemes<'a>, TextError> {
259        let Some((mut chunks, chunk_start, _, _)) = slice.get_chunks_at_byte(offset) else {
260            return Err(TextError::ByteIndexOutOfBounds(offset, slice.len_bytes()));
261        };
262
263        // was_next is only useful, if there was a true next().
264        // otherwise it confuses the algorithm.
265        let (first_chunk, was_next) = match chunks.next() {
266            Some(v) => (v, Some(true)),
267            None => ("", None),
268        };
269
270        Ok(RopeGraphemes {
271            text_offset: slice_offset,
272            text: slice,
273            chunks,
274            was_next,
275            cur_chunk: first_chunk,
276            cur_chunk_start: chunk_start,
277            cursor: GraphemeCursor::new(offset, slice.len_bytes(), true),
278        })
279    }
280}
281
282impl<'a> Cursor for RopeGraphemes<'a> {
283    #[inline]
284    fn prev(&mut self) -> Option<Grapheme<'a>> {
285        let a = self.cursor.cur_cursor();
286        let b;
287        loop {
288            match self
289                .cursor
290                .prev_boundary(self.cur_chunk, self.cur_chunk_start)
291            {
292                Ok(None) => {
293                    return None;
294                }
295                Ok(Some(n)) => {
296                    b = n;
297                    break;
298                }
299                Err(GraphemeIncomplete::PrevChunk) => {
300                    if self.was_next == Some(true) {
301                        // skip current
302                        self.chunks.prev();
303                    }
304                    (self.cur_chunk, self.was_next) = match self.chunks.prev() {
305                        Some(v) => (v, Some(false)),
306                        None => ("", None),
307                    };
308                    self.cur_chunk_start -= self.cur_chunk.len();
309                }
310                Err(GraphemeIncomplete::PreContext(idx)) => {
311                    let (chunk, byte_idx, _, _) = self.text.chunk_at_byte(idx.saturating_sub(1));
312                    self.cursor.provide_context(chunk, byte_idx);
313                }
314                _ => unreachable!(),
315            }
316        }
317
318        if a >= self.cur_chunk_start + self.cur_chunk.len() {
319            let a_char = self.text.byte_to_char(a);
320            let b_char = self.text.byte_to_char(b);
321
322            Some(Grapheme {
323                grapheme: Cow::Owned(self.text.slice(b_char..a_char).to_string()),
324                text_bytes: self.text_offset + b..self.text_offset + a,
325            })
326        } else {
327            let a2 = a - self.cur_chunk_start;
328            let b2 = b - self.cur_chunk_start;
329            Some(Grapheme {
330                grapheme: Cow::Borrowed(&self.cur_chunk[b2..a2]),
331                text_bytes: self.text_offset + b..self.text_offset + a,
332            })
333        }
334    }
335
336    fn rev_cursor(self) -> impl Cursor<Item = Self::Item> {
337        RevRopeGraphemes { it: self }
338    }
339
340    fn text_offset(&self) -> usize {
341        self.text_offset + self.cursor.cur_cursor()
342    }
343}
344
345impl<'a> SkipLine for RopeGraphemes<'a> {
346    fn skip_line(&mut self) -> Result<(), TextError> {
347        let cursor = self.cursor.cur_cursor();
348        let line = self.text.try_byte_to_line(cursor)?;
349        let next_offset = self.text.try_line_to_byte(line + 1)?;
350
351        let Some((mut chunks, chunk_start, _, _)) = self.text.get_chunks_at_byte(next_offset)
352        else {
353            return Err(TextError::ByteIndexOutOfBounds(
354                next_offset,
355                self.text.len_bytes(),
356            ));
357        };
358
359        // was_next is only useful, if there was a true next().
360        // otherwise it confuses the algorithm.
361        let (first_chunk, _was_next) = match chunks.next() {
362            Some(v) => (v, Some(true)),
363            None => ("", None),
364        };
365
366        self.chunks = chunks;
367        self.cur_chunk = first_chunk;
368        self.cur_chunk_start = chunk_start;
369        self.cursor = GraphemeCursor::new(next_offset, self.text.len_bytes(), true);
370
371        Ok(())
372    }
373
374    fn skip_to(&mut self, byte_pos: usize) -> Result<(), TextError> {
375        assert!(byte_pos >= self.text_offset);
376        // byte_pos is absolute to all text, but everything here is
377        // relative to the slice.
378        let byte_pos = byte_pos - self.text_offset;
379
380        let Some((mut chunks, chunk_start, _, _)) = self.text.get_chunks_at_byte(byte_pos) else {
381            return Err(TextError::ByteIndexOutOfBounds(
382                byte_pos,
383                self.text.len_bytes(),
384            ));
385        };
386
387        // was_next is only useful, if there was a true next().
388        // otherwise it confuses the algorithm.
389        let (first_chunk, _was_next) = match chunks.next() {
390            Some(v) => (v, Some(true)),
391            None => ("", None),
392        };
393
394        self.chunks = chunks;
395        self.cur_chunk = first_chunk;
396        self.cur_chunk_start = chunk_start;
397        self.cursor = GraphemeCursor::new(byte_pos, self.text.len_bytes(), true);
398
399        Ok(())
400    }
401}
402
403impl<'a> Iterator for RopeGraphemes<'a> {
404    type Item = Grapheme<'a>;
405
406    #[inline]
407    fn next(&mut self) -> Option<Grapheme<'a>> {
408        let a = self.cursor.cur_cursor();
409        let b;
410        loop {
411            match self
412                .cursor
413                .next_boundary(self.cur_chunk, self.cur_chunk_start)
414            {
415                Ok(None) => {
416                    return None;
417                }
418                Ok(Some(n)) => {
419                    b = n;
420                    break;
421                }
422                Err(GraphemeIncomplete::NextChunk) => {
423                    self.cur_chunk_start += self.cur_chunk.len();
424                    if self.was_next == Some(false) {
425                        // skip current
426                        self.chunks.next();
427                    }
428                    (self.cur_chunk, self.was_next) = match self.chunks.next() {
429                        Some(v) => (v, Some(true)),
430                        None => ("", None),
431                    };
432                }
433                Err(GraphemeIncomplete::PreContext(idx)) => {
434                    let (chunk, byte_idx, _, _) = self.text.chunk_at_byte(idx.saturating_sub(1));
435                    self.cursor.provide_context(chunk, byte_idx);
436                }
437                _ => unreachable!(),
438            }
439        }
440
441        if a < self.cur_chunk_start {
442            let a_char = self.text.byte_to_char(a);
443            let b_char = self.text.byte_to_char(b);
444
445            Some(Grapheme {
446                grapheme: Cow::Owned(self.text.slice(a_char..b_char).to_string()),
447                text_bytes: self.text_offset + a..self.text_offset + b,
448            })
449        } else {
450            let a2 = a - self.cur_chunk_start;
451            let b2 = b - self.cur_chunk_start;
452            Some(Grapheme {
453                grapheme: Cow::Borrowed(&self.cur_chunk[a2..b2]),
454                text_bytes: self.text_offset + a..self.text_offset + b,
455            })
456        }
457    }
458}
459
460#[derive(Debug)]
461pub(crate) struct RevRopeGraphemes<'a> {
462    it: RopeGraphemes<'a>,
463}
464
465impl<'a> Iterator for RevRopeGraphemes<'a> {
466    type Item = Grapheme<'a>;
467
468    #[inline]
469    fn next(&mut self) -> Option<Self::Item> {
470        self.it.prev()
471    }
472}
473
474impl Cursor for RevRopeGraphemes<'_> {
475    #[inline]
476    fn prev(&mut self) -> Option<Self::Item> {
477        self.it.next()
478    }
479
480    #[inline]
481    fn rev_cursor(self) -> impl Cursor<Item = Self::Item> {
482        self.it
483    }
484
485    fn text_offset(&self) -> usize {
486        self.it.text_offset()
487    }
488}
489
490impl SkipLine for RevRopeGraphemes<'_> {
491    fn skip_line(&mut self) -> Result<(), TextError> {
492        unimplemented!("no skip_line()")
493    }
494
495    fn skip_to(&mut self, _byte_pos: usize) -> Result<(), TextError> {
496        unimplemented!("no skip_to()")
497    }
498}
499
500#[cfg(test)]
501mod test_str {
502    use crate::Cursor;
503    use crate::grapheme::StrGraphemes;
504
505    #[test]
506    fn test_str_graphemes0() {
507        let s = String::from("\r\n");
508        let mut s0 = StrGraphemes::new(0, &s);
509        assert_eq!(s0.next().unwrap(), "\r\n");
510    }
511
512    #[test]
513    fn test_str_graphemes1() {
514        // basic graphemes
515        let s = String::from("qwertz");
516
517        let mut s0 = StrGraphemes::new(0, &s);
518        assert_eq!(s0.next().unwrap(), "q");
519        assert_eq!(s0.next().unwrap(), "w");
520        assert_eq!(s0.next().unwrap(), "e");
521        assert_eq!(s0.next().unwrap(), "r");
522        assert_eq!(s0.next().unwrap(), "t");
523        assert_eq!(s0.next().unwrap(), "z");
524        assert!(s0.next().is_none());
525        assert_eq!(s0.prev().unwrap(), "z");
526        assert_eq!(s0.prev().unwrap(), "t");
527        assert_eq!(s0.prev().unwrap(), "r");
528        assert_eq!(s0.prev().unwrap(), "e");
529        assert_eq!(s0.prev().unwrap(), "w");
530        assert_eq!(s0.prev().unwrap(), "q");
531
532        let mut s0 = StrGraphemes::new(1, &s[1..s.len() - 1]);
533        assert_eq!(s0.next().unwrap(), "w");
534        assert_eq!(s0.next().unwrap(), "e");
535        assert_eq!(s0.next().unwrap(), "r");
536        assert_eq!(s0.next().unwrap(), "t");
537        assert!(s0.next().is_none());
538        assert_eq!(s0.prev().unwrap(), "t");
539        assert_eq!(s0.prev().unwrap(), "r");
540        assert_eq!(s0.prev().unwrap(), "e");
541        assert_eq!(s0.prev().unwrap(), "w");
542
543        let mut s0 = StrGraphemes::new(3, &s[3..3]);
544        assert!(s0.next().is_none());
545        assert!(s0.prev().is_none());
546    }
547
548    #[test]
549    fn test_str_graphemes2() {
550        // complicated graphemes
551        let s = String::from("w🤷‍♂️xw🤷‍♀️xw🤦‍♂️xw❤️xw🤦‍♀️xw💕🙍🏿‍♀️x");
552
553        let mut s0 = StrGraphemes::new(0, &s);
554        assert_eq!(s0.next().unwrap(), "w");
555        assert_eq!(s0.next().unwrap(), "🤷‍♂️");
556        assert_eq!(s0.next().unwrap(), "x");
557        assert_eq!(s0.next().unwrap(), "w");
558        assert_eq!(s0.next().unwrap(), "🤷‍♀️");
559        assert_eq!(s0.next().unwrap(), "x");
560        assert_eq!(s0.next().unwrap(), "w");
561        assert_eq!(s0.next().unwrap(), "🤦‍♂️");
562        assert_eq!(s0.next().unwrap(), "x");
563        assert_eq!(s0.next().unwrap(), "w");
564        assert_eq!(s0.next().unwrap(), "❤️");
565        assert_eq!(s0.next().unwrap(), "x");
566        assert_eq!(s0.next().unwrap(), "w");
567        assert_eq!(s0.next().unwrap(), "🤦‍♀️");
568        assert_eq!(s0.next().unwrap(), "x");
569        assert_eq!(s0.next().unwrap(), "w");
570        assert_eq!(s0.next().unwrap(), "💕");
571        assert_eq!(s0.next().unwrap(), "🙍🏿‍♀️");
572        assert_eq!(s0.next().unwrap(), "x");
573        assert!(s0.next().is_none());
574        assert_eq!(s0.prev().unwrap(), "x");
575        assert_eq!(s0.prev().unwrap(), "🙍🏿‍♀️");
576        assert_eq!(s0.prev().unwrap(), "💕");
577        assert_eq!(s0.prev().unwrap(), "w");
578        assert_eq!(s0.prev().unwrap(), "x");
579        assert_eq!(s0.prev().unwrap(), "🤦‍♀️");
580        assert_eq!(s0.prev().unwrap(), "w");
581        assert_eq!(s0.prev().unwrap(), "x");
582        assert_eq!(s0.prev().unwrap(), "❤️");
583        assert_eq!(s0.prev().unwrap(), "w");
584        assert_eq!(s0.prev().unwrap(), "x");
585        assert_eq!(s0.prev().unwrap(), "🤦‍♂️");
586        assert_eq!(s0.prev().unwrap(), "w");
587        assert_eq!(s0.prev().unwrap(), "x");
588        assert_eq!(s0.prev().unwrap(), "🤷‍♀️");
589        assert_eq!(s0.prev().unwrap(), "w");
590        assert_eq!(s0.prev().unwrap(), "x");
591        assert_eq!(s0.prev().unwrap(), "🤷‍♂️");
592        assert_eq!(s0.prev().unwrap(), "w");
593    }
594
595    #[test]
596    fn test_str_graphemes3() {
597        // complicated slices
598        let s = String::from("qwertz");
599        let mut s0 = StrGraphemes::new_offset(0, &s, 3);
600        assert_eq!(s0.next().unwrap(), "r");
601        assert_eq!(s0.prev().unwrap(), "r");
602        assert_eq!(s0.prev().unwrap(), "e");
603
604        let mut s0 = StrGraphemes::new_offset(0, &s, 3);
605        assert_eq!(s0.next().unwrap().text_bytes(), 3..4);
606        assert_eq!(s0.prev().unwrap().text_bytes(), 3..4);
607        assert_eq!(s0.prev().unwrap().text_bytes(), 2..3);
608
609        let s = String::from("w🤷‍♂️🤷‍♀️🤦‍♂️❤️🤦‍♀️💕🙍🏿‍♀️x");
610        let mut s0 = StrGraphemes::new_offset(0, &s, 21);
611        assert_eq!(s0.next().unwrap(), "♀\u{fe0f}");
612        assert_eq!(s0.next().unwrap(), "🤦\u{200d}♂\u{fe0f}");
613        assert_eq!(s0.prev().unwrap(), "🤦\u{200d}♂\u{fe0f}");
614        assert_eq!(s0.prev().unwrap(), "🤷\u{200d}♀\u{fe0f}");
615
616        let s = String::from("w🤷‍♂️🤷‍♀️🤦‍♂️❤️🤦‍♀️💕🙍🏿‍♀️x");
617        let mut s0 = StrGraphemes::new_offset(0, &s, 21);
618        assert_eq!(s0.next().unwrap().text_bytes(), 21..27);
619        assert_eq!(s0.next().unwrap().text_bytes(), 27..40);
620        assert_eq!(s0.prev().unwrap().text_bytes(), 27..40);
621        assert_eq!(s0.prev().unwrap().text_bytes(), 14..27);
622    }
623
624    #[test]
625    fn test_str_graphemes4() {
626        // offsets and partial slices
627        let s = String::from("qwertz");
628        let mut s0 = StrGraphemes::new_offset(1, &s[1..5], 2);
629        s0.next();
630        assert_eq!(s0.text_offset(), 4);
631        s0.next();
632        assert_eq!(s0.text_offset(), 5);
633        s0.next();
634        assert_eq!(s0.text_offset(), 5);
635        s0.next();
636        assert_eq!(s0.text_offset(), 5);
637        s0.prev();
638        assert_eq!(s0.text_offset(), 4);
639        s0.prev();
640        assert_eq!(s0.text_offset(), 3);
641        s0.prev();
642        assert_eq!(s0.text_offset(), 2);
643        s0.prev();
644        assert_eq!(s0.text_offset(), 1);
645        s0.prev();
646        assert_eq!(s0.text_offset(), 1);
647    }
648
649    #[test]
650    fn test_str_graphemes5() {
651        // offsets and partial slices
652        let s = String::from("qwertz");
653        let mut s0 = StrGraphemes::new_offset(1, &s[1..5], 2).rev_cursor();
654        assert_eq!(s0.next().unwrap(), "e");
655        assert_eq!(s0.text_offset(), 2);
656
657        assert_eq!(s0.next().unwrap(), "w");
658        assert_eq!(s0.text_offset(), 1);
659
660        assert_eq!(s0.prev().unwrap(), "w");
661        assert_eq!(s0.text_offset(), 2);
662
663        assert_eq!(s0.prev().unwrap(), "e");
664        assert_eq!(s0.text_offset(), 3);
665
666        assert_eq!(s0.prev().unwrap(), "r");
667        assert_eq!(s0.text_offset(), 4);
668
669        assert_eq!(s0.prev().unwrap(), "t");
670        assert_eq!(s0.text_offset(), 5);
671    }
672}
673
674#[cfg(test)]
675mod test_rope {
676    use crate::Cursor;
677    use crate::grapheme::{RopeGraphemes, StrGraphemes};
678    use ropey::Rope;
679
680    #[test]
681    fn test_rope_graphemes1() {
682        // basic graphemes
683        let s = Rope::from("qwertz");
684
685        let mut s0 = RopeGraphemes::new(0, s.byte_slice(..));
686        assert_eq!(s0.next().unwrap(), "q");
687        assert_eq!(s0.next().unwrap(), "w");
688        assert_eq!(s0.next().unwrap(), "e");
689        assert_eq!(s0.next().unwrap(), "r");
690        assert_eq!(s0.next().unwrap(), "t");
691        assert_eq!(s0.next().unwrap(), "z");
692        assert!(s0.next().is_none());
693        assert_eq!(s0.prev().unwrap(), "z");
694        assert_eq!(s0.prev().unwrap(), "t");
695        assert_eq!(s0.prev().unwrap(), "r");
696        assert_eq!(s0.prev().unwrap(), "e");
697        assert_eq!(s0.prev().unwrap(), "w");
698        assert_eq!(s0.prev().unwrap(), "q");
699
700        let mut s0 = RopeGraphemes::new(1, s.byte_slice(1..s.len_bytes() - 1));
701        assert_eq!(s0.next().unwrap(), "w");
702        assert_eq!(s0.next().unwrap(), "e");
703        assert_eq!(s0.next().unwrap(), "r");
704        assert_eq!(s0.next().unwrap(), "t");
705        assert!(s0.next().is_none());
706        assert_eq!(s0.prev().unwrap(), "t");
707        assert_eq!(s0.prev().unwrap(), "r");
708        assert_eq!(s0.prev().unwrap(), "e");
709        assert_eq!(s0.prev().unwrap(), "w");
710
711        let mut s0 = RopeGraphemes::new(3, s.byte_slice(3..3));
712        assert!(s0.next().is_none());
713        assert!(s0.prev().is_none());
714    }
715
716    #[test]
717    fn test_rope_graphemes2() {
718        // complicated graphemes
719        let s = Rope::from("w🤷‍♂️xw🤷‍♀️xw🤦‍♂️xw❤️xw🤦‍♀️xw💕🙍🏿‍♀️x");
720
721        let mut s0 = RopeGraphemes::new(0, s.byte_slice(..));
722        assert_eq!(s0.next().unwrap(), "w");
723        assert_eq!(s0.next().unwrap(), "🤷‍♂️");
724        assert_eq!(s0.next().unwrap(), "x");
725        assert_eq!(s0.next().unwrap(), "w");
726        assert_eq!(s0.next().unwrap(), "🤷‍♀️");
727        assert_eq!(s0.next().unwrap(), "x");
728        assert_eq!(s0.next().unwrap(), "w");
729        assert_eq!(s0.next().unwrap(), "🤦‍♂️");
730        assert_eq!(s0.next().unwrap(), "x");
731        assert_eq!(s0.next().unwrap(), "w");
732        assert_eq!(s0.next().unwrap(), "❤️");
733        assert_eq!(s0.next().unwrap(), "x");
734        assert_eq!(s0.next().unwrap(), "w");
735        assert_eq!(s0.next().unwrap(), "🤦‍♀️");
736        assert_eq!(s0.next().unwrap(), "x");
737        assert_eq!(s0.next().unwrap(), "w");
738        assert_eq!(s0.next().unwrap(), "💕");
739        assert_eq!(s0.next().unwrap(), "🙍🏿‍♀️");
740        assert_eq!(s0.next().unwrap(), "x");
741        assert!(s0.next().is_none());
742        assert_eq!(s0.prev().unwrap(), "x");
743        assert_eq!(s0.prev().unwrap(), "🙍🏿‍♀️");
744        assert_eq!(s0.prev().unwrap(), "💕");
745        assert_eq!(s0.prev().unwrap(), "w");
746        assert_eq!(s0.prev().unwrap(), "x");
747        assert_eq!(s0.prev().unwrap(), "🤦‍♀️");
748        assert_eq!(s0.prev().unwrap(), "w");
749        assert_eq!(s0.prev().unwrap(), "x");
750        assert_eq!(s0.prev().unwrap(), "❤️");
751        assert_eq!(s0.prev().unwrap(), "w");
752        assert_eq!(s0.prev().unwrap(), "x");
753        assert_eq!(s0.prev().unwrap(), "🤦‍♂️");
754        assert_eq!(s0.prev().unwrap(), "w");
755        assert_eq!(s0.prev().unwrap(), "x");
756        assert_eq!(s0.prev().unwrap(), "🤷‍♀️");
757        assert_eq!(s0.prev().unwrap(), "w");
758        assert_eq!(s0.prev().unwrap(), "x");
759        assert_eq!(s0.prev().unwrap(), "🤷‍♂️");
760        assert_eq!(s0.prev().unwrap(), "w");
761    }
762
763    #[test]
764    fn test_rope_graphemes3() {
765        // complicated graphemes
766        let s = Rope::from("qwertz");
767        let mut s0 = RopeGraphemes::new_offset(0, s.byte_slice(..), 3).expect("fine");
768        assert_eq!(s0.next().unwrap(), "r");
769        assert_eq!(s0.prev().unwrap(), "r");
770        assert_eq!(s0.prev().unwrap(), "e");
771
772        let mut s0 = RopeGraphemes::new_offset(0, s.byte_slice(..), 3).expect("fine");
773        assert_eq!(s0.next().unwrap().text_bytes(), 3..4);
774        assert_eq!(s0.prev().unwrap().text_bytes(), 3..4);
775        assert_eq!(s0.prev().unwrap().text_bytes(), 2..3);
776
777        let s = Rope::from("w🤷‍♂️🤷‍♀️🤦‍♂️❤️🤦‍♀️💕🙍🏿‍♀️x");
778        let mut s0 = RopeGraphemes::new_offset(0, s.byte_slice(..), 21).expect("fine");
779        assert_eq!(s0.next().unwrap(), "♀\u{fe0f}");
780        assert_eq!(s0.next().unwrap(), "🤦\u{200d}♂\u{fe0f}");
781        assert_eq!(s0.prev().unwrap(), "🤦\u{200d}♂\u{fe0f}");
782        assert_eq!(s0.prev().unwrap(), "🤷\u{200d}♀\u{fe0f}");
783
784        let s = Rope::from("w🤷‍♂️🤷‍♀️🤦‍♂️❤️🤦‍♀️💕🙍🏿‍♀️x");
785        let mut s0 = RopeGraphemes::new_offset(0, s.byte_slice(..), 21).expect("fine");
786        assert_eq!(s0.next().unwrap().text_bytes(), 21..27);
787        assert_eq!(s0.next().unwrap().text_bytes(), 27..40);
788        assert_eq!(s0.prev().unwrap().text_bytes(), 27..40);
789        assert_eq!(s0.prev().unwrap().text_bytes(), 14..27);
790    }
791
792    #[test]
793    fn test_rope_graphemes4() {
794        // offsets and partial slices
795        let s = Rope::from("qwertz");
796        let mut s0 = RopeGraphemes::new_offset(1, s.byte_slice(1..5), 2).expect("fine");
797        s0.next();
798        assert_eq!(s0.text_offset(), 4);
799        s0.next();
800        assert_eq!(s0.text_offset(), 5);
801        s0.next();
802        assert_eq!(s0.text_offset(), 5);
803        s0.next();
804        assert_eq!(s0.text_offset(), 5);
805        s0.prev();
806        assert_eq!(s0.text_offset(), 4);
807        s0.prev();
808        assert_eq!(s0.text_offset(), 3);
809        s0.prev();
810        assert_eq!(s0.text_offset(), 2);
811        s0.prev();
812        assert_eq!(s0.text_offset(), 1);
813        s0.prev();
814        assert_eq!(s0.text_offset(), 1);
815    }
816
817    #[test]
818    fn test_rope_graphemes5() {
819        // offsets and partial slices
820        let s = Rope::from("qwertz");
821        let mut s0 = RopeGraphemes::new_offset(1, s.byte_slice(1..5), 2)
822            .expect("fine")
823            .rev_cursor();
824        assert_eq!(s0.next().unwrap(), "e");
825        assert_eq!(s0.text_offset(), 2);
826
827        assert_eq!(s0.next().unwrap(), "w");
828        assert_eq!(s0.text_offset(), 1);
829
830        assert_eq!(s0.prev().unwrap(), "w");
831        assert_eq!(s0.text_offset(), 2);
832
833        assert_eq!(s0.prev().unwrap(), "e");
834        assert_eq!(s0.text_offset(), 3);
835
836        assert_eq!(s0.prev().unwrap(), "r");
837        assert_eq!(s0.text_offset(), 4);
838
839        assert_eq!(s0.prev().unwrap(), "t");
840        assert_eq!(s0.text_offset(), 5);
841    }
842
843    #[test]
844    fn test_rope_graphemes6() {
845        // text rope boundary
846        let s = Rope::from(
847            "012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678)\
848             abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghiJ\
849             012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678)\
850             abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghiJ\
851             012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678)\
852             abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghiJ\
853             012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678)\
854             abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghiJ\
855             012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678)\
856             abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghiJ\
857             012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678)\
858             abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghiJ\
859             ",
860        );
861        assert_eq!(s.len_bytes(), 1200);
862        let mut s0 = RopeGraphemes::new_offset(1, s.byte_slice(1..1199), 0).expect("fine");
863        assert_eq!(s0.nth(598).unwrap(), "J");
864
865        assert_eq!(s0.next().unwrap(), "0");
866        assert_eq!(s0.text_offset(), 601);
867        assert_eq!(s0.next().unwrap(), "1");
868        assert_eq!(s0.text_offset(), 602);
869        assert_eq!(s0.prev().unwrap(), "1");
870        assert_eq!(s0.text_offset(), 601);
871        assert_eq!(s0.prev().unwrap(), "0");
872        assert_eq!(s0.text_offset(), 600);
873        assert_eq!(s0.prev().unwrap(), "J");
874        assert_eq!(s0.text_offset(), 599);
875    }
876
877    #[test]
878    fn test_rope_graphemes7() {
879        // test complicated grapheme at rope boundary
880        let s = Rope::from(
881            "012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678)\
882             abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghiJ\
883             012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678)\
884             abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghiJ\
885             012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678)\
886             abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghi🤷‍♂️\
887             012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678)\
888             abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghiJ\
889             012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678)\
890             abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghiJ\
891             012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678)\
892             abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghiJ\
893             ",
894        );
895        assert_eq!(s.len_bytes(), 1212);
896        assert_eq!(s.chunks().next().unwrap().len(), 606);
897        let mut s0 = RopeGraphemes::new_offset(1, s.byte_slice(1..1199), 0).expect("fine");
898        assert_eq!(s0.nth(598).unwrap(), "🤷‍♂️");
899
900        assert_eq!(s0.next().unwrap(), "0");
901        assert_eq!(s0.text_offset(), 613);
902        assert_eq!(s0.next().unwrap(), "1");
903        assert_eq!(s0.text_offset(), 614);
904        assert_eq!(s0.prev().unwrap(), "1");
905        assert_eq!(s0.text_offset(), 613);
906        assert_eq!(s0.prev().unwrap(), "0");
907        assert_eq!(s0.text_offset(), 612);
908        assert_eq!(s0.prev().unwrap(), "🤷‍♂️");
909        assert_eq!(s0.text_offset(), 599);
910        assert_eq!(s0.prev().unwrap(), "i");
911        assert_eq!(s0.text_offset(), 598);
912
913        assert_eq!(s0.next().unwrap(), "i");
914        assert_eq!(s0.text_offset(), 599);
915        assert_eq!(s0.next().unwrap(), "🤷‍♂️");
916        assert_eq!(s0.text_offset(), 612);
917        assert_eq!(s0.next().unwrap(), "0");
918        assert_eq!(s0.text_offset(), 613);
919        assert_eq!(s0.next().unwrap(), "1");
920        assert_eq!(s0.text_offset(), 614);
921    }
922
923    #[test]
924    fn test_rev_graphemes() {
925        let mut it = StrGraphemes::new_offset(0, "\r\n", 2);
926        assert_eq!(it.prev().unwrap(), "\r\n");
927
928        let mut it = StrGraphemes::new_offset(0, "\r\r\n", 3);
929        assert_eq!(it.prev().unwrap(), "\r\n");
930        assert_eq!(it.prev().unwrap(), "\r");
931
932        let mut it = StrGraphemes::new_offset(0, "\r\r\n\n", 4);
933        assert_eq!(it.prev().unwrap(), "\n");
934        assert_eq!(it.prev().unwrap(), "\r\n");
935        assert_eq!(it.prev().unwrap(), "\r");
936    }
937}