rat_text/text_store/
text_rope.rs

1use crate::grapheme::{RopeGraphemes, StrGraphemes};
2use crate::text_store::{Cursor, TextStore};
3use crate::{TextError, TextPosition, TextRange, upos_type};
4use ropey::{Rope, RopeSlice};
5use std::borrow::Cow;
6use std::cell::Cell;
7use std::cmp::min;
8use std::ops::Range;
9use unicode_segmentation::UnicodeSegmentation;
10
11/// Text store with a rope.
12#[derive(Debug, Clone, Default)]
13pub struct TextRope {
14    text: Rope,
15    // minimum byte position changed since last reset.
16    min_changed: Cell<Option<usize>>,
17    // tmp buf
18    buf: String,
19}
20
21impl TextRope {
22    /// New empty.
23    pub fn new() -> Self {
24        Self::default()
25    }
26
27    /// New from string.
28    pub fn new_text(t: &str) -> Self {
29        Self {
30            text: Rope::from_str(t),
31            min_changed: Default::default(),
32            buf: Default::default(),
33        }
34    }
35
36    /// New from rope.
37    pub fn new_rope(r: Rope) -> Self {
38        Self {
39            text: r,
40            min_changed: Default::default(),
41            buf: Default::default(),
42        }
43    }
44
45    /// Borrow the rope
46    pub fn rope(&self) -> &Rope {
47        &self.text
48    }
49}
50
51impl TextRope {
52    fn invalidate(&self, byte_pos: usize) {
53        self.min_changed.update(|v| match v {
54            None => Some(byte_pos),
55            Some(w) => Some(min(byte_pos, w)),
56        });
57    }
58
59    fn normalize_row(&self, row: upos_type) -> Result<upos_type, TextError> {
60        let text_len = self.len_lines() as upos_type;
61        let rope_len = self.text.len_lines() as upos_type;
62
63        if row <= rope_len {
64            Ok(row)
65        } else if row <= text_len {
66            Ok(row - 1)
67        } else {
68            Err(TextError::LineIndexOutOfBounds(row, text_len))
69        }
70    }
71
72    fn normalize(&self, pos: TextPosition) -> Result<(TextPosition, usize), TextError> {
73        let len = self.len_lines();
74        if pos.y > len {
75            Err(TextError::LineIndexOutOfBounds(pos.y, len))
76        } else if pos.x > 0 && pos.y == len {
77            Err(TextError::ColumnIndexOutOfBounds(pos.x, 0))
78        } else if pos.x > 0 && pos.y == len - 1 && !self.has_final_newline() {
79            Err(TextError::ColumnIndexOutOfBounds(pos.x, 0))
80        } else if pos.x == 0 && pos.y == len {
81            let pos_byte = self.byte_range_at(pos)?;
82            Ok((
83                self.byte_to_pos(pos_byte.start).expect("valid-byte"),
84                pos_byte.start,
85            ))
86        } else if pos.x == 0 && pos.y == len - 1 && !self.has_final_newline() {
87            let pos_byte = self.byte_range_at(pos)?;
88            Ok((
89                self.byte_to_pos(pos_byte.start).expect("valid-byte"),
90                pos_byte.start,
91            ))
92        } else {
93            let pos_byte = self.byte_range_at(pos)?;
94            Ok((pos, pos_byte.start))
95        }
96    }
97}
98
99impl TextStore for TextRope {
100    type GraphemeIter<'a> = RopeGraphemes<'a>;
101
102    /// Can store multi-line content?
103    ///
104    /// If this returns false it is an error to call any function with
105    /// a row other than `0`.
106    fn is_multi_line(&self) -> bool {
107        true
108    }
109
110    /// Minimum byte position that has been changed
111    /// since the last call of min_changed().
112    ///
113    /// Used to invalidate caches.
114    fn cache_validity(&self) -> Option<usize> {
115        self.min_changed.take()
116    }
117
118    /// Content as string.
119    fn string(&self) -> String {
120        self.text.to_string()
121    }
122
123    /// Set content.
124    fn set_string(&mut self, t: &str) {
125        self.invalidate(0);
126        self.text = Rope::from_str(t);
127    }
128
129    /// Grapheme position to byte position.
130    /// This is the (start,end) position of the single grapheme after pos.
131    ///
132    /// * pos must be a valid position: row <= len_lines, col <= line_width of the row.
133    fn byte_range_at(&self, pos: TextPosition) -> Result<Range<usize>, TextError> {
134        let it_line = self.line_graphemes(pos.y)?;
135
136        let mut col = 0;
137        let mut byte_end = it_line.text_offset();
138        for grapheme in it_line {
139            if col == pos.x {
140                return Ok(grapheme.text_bytes());
141            }
142            col += 1;
143            byte_end = grapheme.text_bytes().end;
144        }
145        // one past the end is ok.
146        if col == pos.x {
147            Ok(byte_end..byte_end)
148        } else {
149            Err(TextError::ColumnIndexOutOfBounds(pos.x, col))
150        }
151    }
152
153    /// Grapheme range to byte range.
154    ///
155    /// * range must be a valid range. row <= len_lines, col <= line_width of the row.
156    fn byte_range(&self, range: TextRange) -> Result<Range<usize>, TextError> {
157        if range.start.y == range.end.y {
158            let it_line = self.line_graphemes(range.start.y)?;
159
160            let mut range_start = None;
161            let mut range_end = None;
162            let mut col = 0;
163            let mut byte_end = it_line.text_offset();
164            for grapheme in it_line {
165                if col == range.start.x {
166                    range_start = Some(grapheme.text_bytes().start);
167                }
168                if col == range.end.x {
169                    range_end = Some(grapheme.text_bytes().end);
170                }
171                if range_start.is_some() && range_end.is_some() {
172                    break;
173                }
174                col += 1;
175                byte_end = grapheme.text_bytes().end;
176            }
177            // one past the end is ok.
178            if col == range.start.x {
179                range_start = Some(byte_end);
180            }
181            if col == range.end.x {
182                range_end = Some(byte_end);
183            }
184
185            let Some(range_start) = range_start else {
186                return Err(TextError::ColumnIndexOutOfBounds(range.start.x, col));
187            };
188            let Some(range_end) = range_end else {
189                return Err(TextError::ColumnIndexOutOfBounds(range.end.x, col));
190            };
191
192            Ok(range_start..range_end)
193        } else {
194            let range_start = self.byte_range_at(range.start)?;
195            let range_end = self.byte_range_at(range.end)?;
196
197            Ok(range_start.start..range_end.start)
198        }
199    }
200
201    /// Byte position to grapheme position.
202    /// Returns the position that contains the given byte index.
203    ///
204    /// * byte must <= byte-len.
205    fn byte_to_pos(&self, byte_pos: usize) -> Result<TextPosition, TextError> {
206        let Ok(row) = self.text.try_byte_to_line(byte_pos) else {
207            return Err(TextError::ByteIndexOutOfBounds(
208                byte_pos,
209                self.text.len_bytes(),
210            ));
211        };
212        let row = row as upos_type;
213
214        let mut col = 0;
215        let it_line = self.line_graphemes(row)?;
216        for grapheme in it_line {
217            if byte_pos < grapheme.text_bytes().end {
218                break;
219            }
220            col += 1;
221        }
222
223        Ok(TextPosition::new(col, row))
224    }
225
226    /// Byte range to grapheme range.
227    ///
228    /// * byte must <= byte-len.
229    fn bytes_to_range(&self, bytes: Range<usize>) -> Result<TextRange, TextError> {
230        let Ok(start_row) = self.text.try_byte_to_line(bytes.start) else {
231            return Err(TextError::ByteIndexOutOfBounds(
232                bytes.start,
233                self.text.len_bytes(),
234            ));
235        };
236        let start_row = start_row as upos_type;
237        let Ok(end_row) = self.text.try_byte_to_line(bytes.end) else {
238            return Err(TextError::ByteIndexOutOfBounds(
239                bytes.end,
240                self.text.len_bytes(),
241            ));
242        };
243        let end_row = end_row as upos_type;
244
245        if start_row == end_row {
246            let mut col = 0;
247            let mut start = None;
248            let mut end = None;
249            let it_line = self.line_graphemes(start_row)?;
250            for grapheme in it_line {
251                if bytes.start < grapheme.text_bytes().end {
252                    if start.is_none() {
253                        start = Some(col);
254                    }
255                }
256                if bytes.end < grapheme.text_bytes().end {
257                    if end.is_none() {
258                        end = Some(col);
259                    }
260                }
261                if start.is_some() && end.is_some() {
262                    break;
263                }
264                col += 1;
265            }
266            if bytes.start == self.text.len_bytes() {
267                start = Some(col);
268            }
269            if bytes.end == self.text.len_bytes() {
270                end = Some(col);
271            }
272
273            let Some(start) = start else {
274                return Err(TextError::ByteIndexOutOfBounds(
275                    bytes.start,
276                    self.text.len_bytes(),
277                ));
278            };
279            let Some(end) = end else {
280                return Err(TextError::ByteIndexOutOfBounds(
281                    bytes.end,
282                    self.text.len_bytes(),
283                ));
284            };
285
286            Ok(TextRange::new((start, start_row), (end, end_row)))
287        } else {
288            let start = self.byte_to_pos(bytes.start)?;
289            let end = self.byte_to_pos(bytes.end)?;
290
291            Ok(TextRange::new(start, end))
292        }
293    }
294
295    /// A range of the text as `Cow<str>`.
296    ///
297    /// * range must be a valid range. row <= len_lines, col <= line_width of the row.
298    /// * pos must be inside of range.
299    fn str_slice(&self, range: TextRange) -> Result<Cow<'_, str>, TextError> {
300        let range = self.byte_range(range)?;
301        let v = self.text.byte_slice(range);
302        match v.as_str() {
303            Some(v) => Ok(Cow::Borrowed(v)),
304            None => Ok(Cow::Owned(v.to_string())),
305        }
306    }
307
308    /// A range of the text as `Cow<str>`.
309    ///
310    /// The byte-range must be a valid range.
311    fn str_slice_byte(&self, range: Range<usize>) -> Result<Cow<'_, str>, TextError> {
312        let Some(v) = self.text.get_byte_slice(range.clone()) else {
313            return Err(TextError::ByteRangeOutOfBounds(
314                Some(range.start),
315                Some(range.end),
316                self.text.len_bytes(),
317            ));
318        };
319        match v.as_str() {
320            Some(v) => Ok(Cow::Borrowed(v)),
321            None => Ok(Cow::Owned(v.to_string())),
322        }
323    }
324
325    /// Return a cursor over the graphemes of the range, start at the given position.
326    ///
327    /// * range must be a valid byte-range.
328    /// * pos must be inside of range.
329    fn graphemes_byte(
330        &self,
331        range: Range<usize>,
332        pos: usize,
333    ) -> Result<Self::GraphemeIter<'_>, TextError> {
334        if !range.contains(&pos) && range.end != pos {
335            return Err(TextError::ByteIndexOutOfBounds(pos, range.end));
336        }
337
338        let Some(s) = self.text.get_byte_slice(range.clone()) else {
339            return Err(TextError::ByteRangeInvalid(range.start, range.end));
340        };
341
342        let r = RopeGraphemes::new_offset(range.start, s, pos - range.start)?;
343
344        Ok(r)
345    }
346
347    /// Line as str.
348    ///
349    /// * row must be <= len_lines
350    fn line_at(&self, row: upos_type) -> Result<Cow<'_, str>, TextError> {
351        let len = self.len_lines() as upos_type;
352        if row < len {
353            if row < self.text.len_lines() as upos_type {
354                let v = self.text.get_line(row as usize).expect("valid_row");
355                match v.as_str() {
356                    Some(v) => Ok(Cow::Borrowed(v)),
357                    None => Ok(Cow::Owned(v.to_string())),
358                }
359            } else {
360                Ok(Cow::Borrowed(""))
361            }
362        } else {
363            Err(TextError::LineIndexOutOfBounds(row, len))
364        }
365    }
366
367    /// Iterate over text-lines, starting at line-offset.
368    ///
369    /// * row must be <= len_lines
370    fn lines_at(&self, row: upos_type) -> Result<impl Iterator<Item = Cow<'_, str>>, TextError> {
371        let len = self.len_lines() as upos_type;
372        if row < len {
373            let it = self.text.get_lines_at(row as usize).expect("valid_row");
374            Ok(it.map(|v| match v.as_str() {
375                Some(v) => Cow::Borrowed(v),
376                None => Cow::Owned(v.to_string()),
377            }))
378        } else {
379            Err(TextError::LineIndexOutOfBounds(row, len))
380        }
381    }
382
383    /// Return a line as an iterator over the graphemes.
384    /// This contains the '\n' at the end.
385    ///
386    /// * row must be <= len_lines
387    #[inline]
388    fn line_graphemes(&self, row: upos_type) -> Result<Self::GraphemeIter<'_>, TextError> {
389        let row = self.normalize_row(row)?;
390        let line_byte = self.text.try_line_to_byte(row as usize)?;
391        let line = if row < self.text.len_lines() as upos_type {
392            self.text.get_line(row as usize).expect("valid_row")
393        } else {
394            RopeSlice::from("")
395        };
396        Ok(RopeGraphemes::new(line_byte, line))
397    }
398
399    /// Line width as grapheme count.
400    /// Excludes the terminating '\n'.
401    ///
402    /// * row must be <= len_lines
403    #[inline]
404    fn line_width(&self, row: upos_type) -> Result<upos_type, TextError> {
405        let row = self.normalize_row(row)?;
406
407        if row < self.text.len_lines() as upos_type {
408            let r = self.text.get_line(row as usize).expect("valid_row");
409            let len = RopeGraphemes::new(0, r)
410                .filter(|g| !g.is_line_break())
411                .count() as upos_type;
412            Ok(len)
413        } else {
414            Ok(0)
415        }
416    }
417
418    #[inline]
419    #[allow(clippy::match_like_matches_macro)]
420    fn has_final_newline(&self) -> bool {
421        let len = self.text.len_bytes();
422        if len > 3 {
423            match (
424                self.text.get_byte(len - 3).expect("valid_pos"),
425                self.text.get_byte(len - 2).expect("valid_pos"),
426                self.text.get_byte(len - 1).expect("valid_pos"),
427            ) {
428                (_, _, b'\n')
429                | (_, _, b'\r')
430                | (_, _, 0x0c)
431                | (_, _, 0x0b)
432                | (_, _, 0x85)
433                | (0xE2, 0x80, 0xA8)
434                | (0xE2, 0x80, 0xA9) => true,
435                _ => false,
436            }
437        } else if len > 0 {
438            match self.text.get_byte(len - 1).expect("valid_pos") {
439                b'\n' | b'\r' | 0x0c | 0x0b | 0x85 => true,
440                _ => false,
441            }
442        } else {
443            false
444        }
445    }
446
447    #[inline]
448    fn len_bytes(&self) -> usize {
449        self.text.len_bytes()
450    }
451
452    #[inline]
453    fn len_lines(&self) -> upos_type {
454        match self.text.len_bytes() {
455            0 => 1,
456            _ => {
457                let l = self.text.len_lines();
458                let t = if self.has_final_newline() { 0 } else { 1 };
459                (l + t) as upos_type
460            }
461        }
462    }
463
464    /// Insert a char at the given position.
465    ///
466    /// * range must be a valid range. row <= len_lines, col <= line_width of the row.
467    fn insert_char(
468        &mut self,
469        mut pos: TextPosition,
470        ch: char,
471    ) -> Result<(TextRange, Range<usize>), TextError> {
472        // normalize the position (0, len_lines) to something sane.
473        let pos_byte;
474        (pos, pos_byte) = self.normalize(pos)?;
475
476        // invalidate cache
477        self.invalidate(pos_byte);
478
479        let mut it_gr =
480            RopeGraphemes::new_offset(0, self.text.slice(..), pos_byte).expect("valid_bytes");
481        let prev = it_gr.prev();
482        it_gr.next();
483        let next = it_gr.next();
484
485        let insert_range = if ch == '\n' {
486            if let Some(prev) = prev {
487                if prev == "\r" {
488                    TextRange::new(pos, pos)
489                } else {
490                    TextRange::new(pos, (0, pos.y + 1))
491                }
492            } else {
493                TextRange::new(pos, (0, pos.y + 1))
494            }
495        } else if ch == '\r' {
496            if let Some(next) = next {
497                if next == "\n" {
498                    TextRange::new(pos, pos)
499                } else {
500                    TextRange::new(pos, (0, pos.y + 1))
501                }
502            } else {
503                TextRange::new(pos, (0, pos.y + 1))
504            }
505        } else if cfg!(feature = "unicode_lines")
506            && (ch == '\u{000C}'
507                || ch == '\u{000B}'
508                || ch == '\u{0085}'
509                || ch == '\u{2028}'
510                || ch == '\u{2029}')
511        {
512            TextRange::new(pos, (0, pos.y + 1))
513        } else {
514            // test for combining codepoints.
515            let mut len = 0;
516            self.buf.clear();
517            if let Some(prev) = prev {
518                len += 1;
519                self.buf.push_str(prev.grapheme());
520            }
521            len += 1;
522            self.buf.push(ch);
523            if let Some(next) = next {
524                len += 1;
525                self.buf.push_str(next.grapheme());
526            }
527            let buf_len = self.buf.graphemes(true).count();
528
529            let n = len - buf_len;
530
531            if n == 0 {
532                TextRange::new(pos, (pos.x + 1, pos.y))
533            } else if n == 1 {
534                // combined some
535                TextRange::new(pos, pos)
536            } else if n == 2 {
537                // combined some
538                TextRange::new(pos, pos)
539            } else {
540                unreachable!("insert_char {:?}", self.buf);
541            }
542        };
543
544        let pos_char = self.text.try_byte_to_char(pos_byte).expect("valid_bytes");
545
546        self.text
547            .try_insert_char(pos_char, ch)
548            .expect("valid_chars");
549
550        Ok((insert_range, pos_byte..pos_byte + ch.len_utf8()))
551    }
552
553    /// Insert a text str at the given position.
554    ///
555    /// * range must be a valid range. row <= len_lines, col <= line_width of the row.
556    fn insert_str(
557        &mut self,
558        mut pos: TextPosition,
559        txt: &str,
560    ) -> Result<(TextRange, Range<usize>), TextError> {
561        // normalize the position (0, len_lines-1) to something sane.
562        let pos_byte;
563        (pos, pos_byte) = self.normalize(pos)?;
564
565        self.invalidate(pos_byte);
566
567        let pos_char = self.text.try_byte_to_char(pos_byte).expect("valid_bytes");
568
569        let mut line_count = 0;
570        let mut last_linebreak_idx = 0;
571        for c in StrGraphemes::new(0, txt) {
572            let test = if cfg!(feature = "cr_lines") {
573                c == "\r" || c == "\n" || c == "\r\n"
574            } else if cfg!(feature = "unicode_lines") {
575                c == "\r"
576                    || c == "\n"
577                    || c == "\r\n"
578                    || c == "\u{000C}"
579                    || c == "\u{000B}"
580                    || c == "\u{0085}"
581                    || c == "\u{2028}"
582                    || c == "\u{2029}"
583            } else {
584                c == "\n" || c == "\r\n"
585            };
586
587            if test {
588                line_count += 1;
589                last_linebreak_idx = c.text_bytes().end;
590            }
591        }
592
593        let insert_range = if line_count > 0 {
594            // the remainder of the line after pos extends the last line of
595            // the inserted text. they might combine in some way.
596
597            // Fill in the last line of the inserted text.
598            self.buf.clear();
599            self.buf.push_str(&txt[last_linebreak_idx..]);
600            let old_offset = self.buf.len();
601
602            // Fill in the remainder of the current text after the insert position.
603            let line_offset = self
604                .text
605                .try_line_to_byte(pos.y as usize)
606                .expect("valid-pos");
607            let split = self //
608                .byte_range_at(pos)
609                .expect("valid_pos")
610                .start
611                - line_offset;
612            let remainder = self
613                .text
614                .get_line(pos.y as usize)
615                .expect("valid-pos")
616                .get_byte_slice(split..)
617                .expect("valid-pos");
618            for cc in remainder.chars() {
619                self.buf.push(cc);
620            }
621            let new_len = self.buf.graphemes(true).count() as upos_type;
622            let old_len = self.buf[old_offset..].graphemes(true).count() as upos_type;
623
624            self.text.try_insert(pos_char, txt).expect("valid_pos");
625
626            TextRange::new(pos, (new_len - old_len, pos.y + line_count))
627        } else {
628            // no way to know if the insert text combines with a surrounding char.
629            // the difference of the grapheme len seems safe though.
630            let old_len = self.line_width(pos.y).expect("valid_line");
631            self.text.try_insert(pos_char, txt).expect("valid_pos");
632            let new_len = self.line_width(pos.y).expect("valid_line");
633
634            TextRange::new(pos, (pos.x + new_len - old_len, pos.y))
635        };
636
637        Ok((insert_range, pos_byte..pos_byte + txt.len()))
638    }
639
640    /// Remove the given text range.
641    ///
642    /// * range must be a valid range. row <= len_lines, col <= line_width of the row.
643    fn remove(
644        &mut self,
645        mut range: TextRange,
646    ) -> Result<(String, (TextRange, Range<usize>)), TextError> {
647        let start_byte_pos;
648        let end_byte_pos;
649
650        (range.start, start_byte_pos) = self.normalize(range.start)?;
651        (range.end, end_byte_pos) = self.normalize(range.end)?;
652
653        self.invalidate(start_byte_pos);
654
655        let old_text = self
656            .text
657            .get_byte_slice(start_byte_pos..end_byte_pos)
658            .expect("valid_bytes");
659        let old_text = old_text.to_string();
660
661        let start_pos = self
662            .text
663            .try_byte_to_char(start_byte_pos)
664            .expect("valid_bytes");
665        let end_pos = self
666            .text
667            .try_byte_to_char(end_byte_pos)
668            .expect("valid_bytes");
669
670        self.text.try_remove(start_pos..end_pos).expect("valid_pos");
671
672        Ok((old_text, (range, start_byte_pos..end_byte_pos)))
673    }
674
675    /// Insert a string at the given byte index.
676    /// Call this only for undo.
677    ///
678    /// byte_pos must be <= len bytes.
679    fn insert_b(&mut self, byte_pos: usize, t: &str) -> Result<(), TextError> {
680        let pos_char = self.text.try_byte_to_char(byte_pos)?;
681
682        self.invalidate(byte_pos);
683        self.text.try_insert(pos_char, t).expect("valid_pos");
684        Ok(())
685    }
686
687    /// Remove the given byte-range.
688    /// Call this only for undo.
689    ///
690    /// byte_pos must be <= len bytes.
691    fn remove_b(&mut self, byte_range: Range<usize>) -> Result<(), TextError> {
692        let start_char = self.text.try_byte_to_char(byte_range.start)?;
693        let end_char = self.text.try_byte_to_char(byte_range.end)?;
694
695        self.invalidate(byte_range.start);
696        self.text
697            .try_remove(start_char..end_char)
698            .expect("valid_range");
699        Ok(())
700    }
701}
702
703impl From<ropey::Error> for TextError {
704    fn from(err: ropey::Error) -> Self {
705        use ropey::Error;
706        match err {
707            Error::ByteIndexOutOfBounds(i, l) => TextError::ByteIndexOutOfBounds(i, l),
708            Error::CharIndexOutOfBounds(i, l) => TextError::CharIndexOutOfBounds(i, l),
709            Error::LineIndexOutOfBounds(i, l) => {
710                TextError::LineIndexOutOfBounds(i as upos_type, l as upos_type)
711            }
712            Error::Utf16IndexOutOfBounds(_, _) => {
713                unreachable!("{:?}", err)
714            }
715            Error::ByteIndexNotCharBoundary(i) => TextError::ByteIndexNotCharBoundary(i),
716            Error::ByteRangeNotCharBoundary(s, e) => TextError::ByteRangeNotCharBoundary(s, e),
717            Error::ByteRangeInvalid(s, e) => TextError::ByteRangeInvalid(s, e),
718            Error::CharRangeInvalid(s, e) => TextError::CharRangeInvalid(s, e),
719            Error::ByteRangeOutOfBounds(s, e, l) => TextError::ByteRangeOutOfBounds(s, e, l),
720            Error::CharRangeOutOfBounds(s, e, l) => TextError::CharRangeOutOfBounds(s, e, l),
721            _ => {
722                unreachable!("{:?}", err)
723            }
724        }
725    }
726}