rat_text/text_store/
text_string.rs

1use crate::grapheme::StrGraphemes;
2use crate::text_store::TextStore;
3use crate::{upos_type, TextError, TextPosition, TextRange};
4use std::borrow::Cow;
5use std::cell::Cell;
6use std::cmp::min;
7use std::iter::once;
8use std::mem;
9use std::ops::Range;
10use unicode_segmentation::UnicodeSegmentation;
11
12/// Single line text-store.
13#[derive(Debug, Default, Clone)]
14pub struct TextString {
15    // text
16    text: String,
17    // len as grapheme count
18    len: upos_type,
19    // minimum byte position changed since last reset.
20    min_changed: Cell<Option<usize>>,
21    // tmp buffer
22    buf: String,
23}
24
25/// Length as grapheme count, excluding line breaks.
26#[inline]
27fn str_len(s: &str) -> upos_type {
28    s.graphemes(true).count() as upos_type
29}
30
31impl TextString {
32    /// New empty.
33    pub fn new() -> Self {
34        Self {
35            text: Default::default(),
36            len: 0,
37            min_changed: Default::default(),
38            buf: Default::default(),
39        }
40    }
41
42    /// New from string.
43    pub fn new_text(t: &str) -> Self {
44        Self {
45            text: t.into(),
46            len: str_len(t),
47            min_changed: Default::default(),
48            buf: Default::default(),
49        }
50    }
51
52    /// New from string.
53    pub fn new_string(t: String) -> Self {
54        let len = str_len(&t);
55        Self {
56            text: t,
57            len,
58            min_changed: Default::default(),
59            buf: Default::default(),
60        }
61    }
62
63    /// str
64    pub fn as_str(&self) -> &str {
65        self.text.as_str()
66    }
67}
68
69impl TextString {
70    fn invalidate_cache(&self, byte_pos: usize) {
71        self.min_changed.update(|v| match v {
72            None => Some(byte_pos),
73            Some(w) => Some(min(byte_pos, w)),
74        });
75    }
76
77    // fn normalize_row(&self, row: upos_type) -> Result<upos_type, TextError> {
78    //     if row > 1 {
79    //         Err(TextError::LineIndexOutOfBounds(row, 1))
80    //     } else if row == 1 {
81    //         Ok(0)
82    //     } else {
83    //         Ok(row)
84    //     }
85    // }
86    //
87    // #[inline]
88    // fn normalize(&self, pos: TextPosition) -> Result<TextPosition, TextError> {
89    //     if pos.y > 1 {
90    //         Err(TextError::LineIndexOutOfBounds(pos.y, 1))
91    //     } else if pos.x > 0 && pos.y == 1 {
92    //         Err(TextError::ColumnIndexOutOfBounds(pos.x, 0))
93    //     } else if pos.x == 0 && pos.y == 1 {
94    //         Ok(TextPosition::new(self.len, 0))
95    //     } else {
96    //         Ok(pos)
97    //     }
98    // }
99}
100
101impl TextStore for TextString {
102    type GraphemeIter<'a> = StrGraphemes<'a>;
103
104    /// Can store multi-line content?
105    #[inline]
106    fn is_multi_line(&self) -> bool {
107        false
108    }
109
110    #[inline]
111    fn should_insert_newline(&self, _: TextPosition) -> bool {
112        false
113    }
114
115    /// Number of lines.
116    #[inline]
117    fn len_lines(&self) -> upos_type {
118        1
119    }
120
121    #[inline]
122    fn cache_validity(&self) -> Option<usize> {
123        self.min_changed.take()
124    }
125
126    /// Get content as string.
127    fn string(&self) -> String {
128        self.text.to_string()
129    }
130
131    /// Set content as string.
132    fn set_string(&mut self, t: &str) {
133        self.invalidate_cache(0);
134        self.text = t.to_string();
135        self.len = str_len(&self.text);
136    }
137
138    /// Grapheme position to byte position.
139    /// This is the (start,end) position of the single grapheme after pos.
140    ///
141    /// * pos must be a valid position: row <= len_lines, col <= line_width of the row.
142    fn byte_range_at(&self, pos: TextPosition) -> Result<Range<usize>, TextError> {
143        if pos == TextPosition::new(0, 1) {
144            let len = self.text.len();
145            return Ok(len..len);
146        }
147
148        if pos.y != 0 {
149            return Err(TextError::LineIndexOutOfBounds(pos.y, 1));
150        };
151
152        let mut byte_range = None;
153        for (cidx, (idx, c)) in self
154            .text
155            .grapheme_indices(true)
156            .chain(once((self.text.len(), "")))
157            .enumerate()
158        {
159            if cidx == pos.x as usize {
160                byte_range = Some(idx..idx + c.len());
161                break;
162            }
163        }
164
165        if let Some(byte_range) = byte_range {
166            Ok(byte_range)
167        } else {
168            Err(TextError::ColumnIndexOutOfBounds(
169                pos.x,
170                str_len(&self.text),
171            ))
172        }
173    }
174
175    /// Grapheme range to byte range.
176    ///
177    /// Allows the special text-position (0,1) as a substitute for EOL.
178    ///
179    /// * range must be a valid range. row <= len_lines, col <= line_width of the row.
180    fn byte_range(&self, range: TextRange) -> Result<Range<usize>, TextError> {
181        if range.start.y != 0 && range.start != TextPosition::new(0, 1) {
182            return Err(TextError::LineIndexOutOfBounds(range.start.y, 1));
183        };
184        if range.end.y != 0 && range.end != TextPosition::new(0, 1) {
185            return Err(TextError::LineIndexOutOfBounds(range.end.y, 1));
186        };
187
188        let mut byte_start = None;
189        let mut byte_end = None;
190
191        if range.start == TextPosition::new(0, 1) {
192            byte_start = Some(self.text.len());
193        }
194        if range.end == TextPosition::new(0, 1) {
195            byte_end = Some(self.text.len());
196        }
197
198        if byte_start.is_none() || byte_end.is_none() {
199            for (cidx, (idx, _)) in self
200                .text
201                .grapheme_indices(true)
202                .chain(once((self.text.len(), "")))
203                .enumerate()
204            {
205                if TextPosition::new(cidx as upos_type, 0) == range.start {
206                    byte_start = Some(idx);
207                }
208                if TextPosition::new(cidx as upos_type, 0) == range.end {
209                    byte_end = Some(idx);
210                }
211                if byte_start.is_some() && byte_end.is_some() {
212                    break;
213                }
214            }
215        }
216
217        let Some(byte_start) = byte_start else {
218            return Err(TextError::ColumnIndexOutOfBounds(
219                range.start.x,
220                str_len(&self.text),
221            ));
222        };
223        let Some(byte_end) = byte_end else {
224            return Err(TextError::ColumnIndexOutOfBounds(
225                range.end.x,
226                str_len(&self.text),
227            ));
228        };
229
230        Ok(byte_start..byte_end)
231    }
232
233    /// Byte position to grapheme position.
234    /// Returns the position that contains the given byte index.
235    ///
236    /// * byte must <= byte-len.
237    fn byte_to_pos(&self, byte_pos: usize) -> Result<TextPosition, TextError> {
238        let mut pos = None;
239
240        for (cidx, (c_start, c)) in self
241            .text
242            .grapheme_indices(true)
243            .chain(once((self.text.len(), " ")))
244            .enumerate()
245        {
246            if byte_pos < c_start + c.len() {
247                pos = Some(cidx);
248                break;
249            }
250        }
251
252        if let Some(pos) = pos {
253            Ok(TextPosition::new(pos as upos_type, 0))
254        } else {
255            Err(TextError::ByteIndexOutOfBounds(byte_pos, self.text.len()))
256        }
257    }
258
259    /// Byte range to grapheme range.
260    ///
261    /// * byte must <= byte-len.
262    fn bytes_to_range(&self, bytes: Range<usize>) -> Result<TextRange, TextError> {
263        let mut start = None;
264        let mut end = None;
265        for (cidx, (c_start, c)) in self
266            .text
267            .grapheme_indices(true)
268            .chain(once((self.text.len(), " ")))
269            .enumerate()
270        {
271            if bytes.start < c_start + c.len() {
272                if start.is_none() {
273                    start = Some(cidx as upos_type);
274                }
275            }
276            if bytes.end < c_start + c.len() {
277                if end.is_none() {
278                    end = Some(cidx as upos_type);
279                }
280            }
281            if start.is_some() && end.is_some() {
282                break;
283            }
284        }
285
286        let Some(start) = start else {
287            return Err(TextError::ByteIndexOutOfBounds(
288                bytes.start,
289                self.text.len(),
290            ));
291        };
292        let Some(end) = end else {
293            return Err(TextError::ByteIndexOutOfBounds(bytes.end, self.text.len()));
294        };
295
296        Ok(TextRange::new((start, 0), (end, 0)))
297    }
298
299    /// A range of the text as `Cow<str>`.
300    ///
301    /// * range must be a valid range. row <= len_lines, col <= line_width of the row.
302    /// * pos must be inside of range.
303    #[inline]
304    fn str_slice(&self, range: TextRange) -> Result<Cow<'_, str>, TextError> {
305        let range = self.byte_range(range)?;
306        Ok(Cow::Borrowed(&self.text[range.start..range.end]))
307    }
308
309    /// A range of the text as `Cow<str>`.
310    ///
311    /// * range must be valid
312    #[inline]
313    fn str_slice_byte(&self, range: Range<usize>) -> Result<Cow<'_, str>, TextError> {
314        Ok(Cow::Borrowed(&self.text[range.start..range.end]))
315    }
316
317    /// Return a cursor over the graphemes of the range, start at the given position.
318    ///
319    /// * range must be a valid range. row <= len_lines, col <= line_width of the row.
320    /// * pos must be inside of range.
321    fn graphemes(
322        &self,
323        range: TextRange,
324        pos: TextPosition,
325    ) -> Result<Self::GraphemeIter<'_>, TextError> {
326        let range_byte = self.byte_range(range)?;
327        let pos_byte = self.byte_range_at(pos)?;
328        Ok(StrGraphemes::new_offset(
329            range_byte.start,
330            &self.text[range_byte.clone()],
331            pos_byte.start - range_byte.start,
332        ))
333    }
334
335    fn graphemes_byte(
336        &self,
337        range: Range<usize>,
338        pos: usize,
339    ) -> Result<Self::GraphemeIter<'_>, TextError> {
340        if !range.contains(&pos) && range.end != pos {
341            return Err(TextError::ByteIndexOutOfBounds(pos, range.end));
342        }
343        if !self.text.is_char_boundary(range.start) || !self.text.is_char_boundary(range.end) {
344            return Err(TextError::ByteRangeNotCharBoundary(
345                Some(range.start),
346                Some(range.end),
347            ));
348        }
349        if !self.text.is_char_boundary(pos) {
350            return Err(TextError::ByteIndexNotCharBoundary(pos));
351        }
352
353        Ok(StrGraphemes::new_offset(
354            range.start,
355            &self.text[range.clone()],
356            pos - range.start,
357        ))
358    }
359
360    /// Line as str.
361    ///
362    /// * row must be <= len_lines
363    #[inline]
364    fn line_at(&self, row: upos_type) -> Result<Cow<'_, str>, TextError> {
365        if row == 0 {
366            Ok(Cow::Borrowed(&self.text))
367        } else if row == 1 {
368            Ok(Cow::Borrowed(""))
369        } else {
370            Err(TextError::LineIndexOutOfBounds(row, 1))
371        }
372    }
373
374    /// Iterate over text-lines, starting at line-offset.
375    ///
376    /// * row must be <= len_lines
377    #[inline]
378    fn lines_at(&self, row: upos_type) -> Result<impl Iterator<Item = Cow<'_, str>>, TextError> {
379        if row == 0 {
380            Ok(once(Cow::Borrowed(self.text.as_str())))
381        } else if row == 1 {
382            Ok(once(Cow::Borrowed("")))
383        } else {
384            Err(TextError::LineIndexOutOfBounds(row, 1))
385        }
386    }
387
388    /// Return a line as an iterator over the graphemes.
389    /// This contains the '\n' at the end.
390    ///
391    /// * row must be <= len_lines
392    #[inline]
393    fn line_graphemes(&self, row: upos_type) -> Result<Self::GraphemeIter<'_>, TextError> {
394        if row == 0 {
395            Ok(StrGraphemes::new(0, &self.text))
396        } else if row == 1 {
397            Ok(StrGraphemes::new(self.text.len(), ""))
398        } else {
399            Err(TextError::LineIndexOutOfBounds(row, 1))
400        }
401    }
402
403    /// Line width of row as grapheme count.
404    /// Excludes the terminating '\n'.
405    ///
406    /// * row must be <= len_lines
407    #[inline]
408    fn line_width(&self, row: upos_type) -> Result<upos_type, TextError> {
409        if row == 0 {
410            Ok(self.len)
411        } else if row == 1 {
412            Ok(0)
413        } else {
414            Err(TextError::LineIndexOutOfBounds(row, 1))
415        }
416    }
417
418    /// Insert a char at the given position.
419    ///
420    /// * range must be a valid range. row <= len_lines, col <= line_width of the row.
421    fn insert_char(
422        &mut self,
423        mut pos: TextPosition,
424        c: char,
425    ) -> Result<(TextRange, Range<usize>), TextError> {
426        if pos == TextPosition::new(0, 1) {
427            pos = TextPosition::new(self.len, 0);
428        }
429
430        if pos.y != 0 {
431            return Err(TextError::TextPositionOutOfBounds(pos));
432        }
433
434        let byte_pos = self.byte_range_at(pos)?;
435        let (before, after) = self.text.split_at(byte_pos.start);
436
437        self.invalidate_cache(byte_pos.start);
438
439        let old_len = self.len;
440        self.buf.clear();
441        self.buf.push_str(before);
442        self.buf.push(c);
443        self.buf.push_str(after);
444
445        let before_bytes = before.len();
446        let new_len = str_len(&self.buf);
447
448        mem::swap(&mut self.text, &mut self.buf);
449        self.len = new_len;
450
451        Ok((
452            TextRange::new((pos.x, 0), (pos.x + (new_len - old_len), 0)),
453            before_bytes..before_bytes + c.len_utf8(),
454        ))
455    }
456
457    /// Insert a str at position.
458    fn insert_str(
459        &mut self,
460        mut pos: TextPosition,
461        t: &str,
462    ) -> Result<(TextRange, Range<usize>), TextError> {
463        if pos == TextPosition::new(0, 1) {
464            pos = TextPosition::new(self.len, 0);
465        }
466
467        if pos.y != 0 {
468            return Err(TextError::TextPositionOutOfBounds(pos));
469        }
470
471        let byte_pos = self.byte_range_at(pos)?;
472        let (before, after) = self.text.split_at(byte_pos.start);
473
474        self.invalidate_cache(byte_pos.start);
475
476        let old_len = self.len;
477        self.buf.clear();
478        self.buf.push_str(before);
479        self.buf.push_str(t);
480        self.buf.push_str(after);
481
482        let before_bytes = before.len();
483        let new_len = str_len(&self.buf);
484
485        mem::swap(&mut self.text, &mut self.buf);
486        self.len = new_len;
487
488        Ok((
489            TextRange::new((pos.x, 0), (pos.x + (new_len - old_len), 0)),
490            before_bytes..before_bytes + t.len(),
491        ))
492    }
493
494    /// Remove a range.
495    fn remove(
496        &mut self,
497        mut range: TextRange,
498    ) -> Result<(String, (TextRange, Range<usize>)), TextError> {
499        if range.start == TextPosition::new(0, 1) {
500            range.start = TextPosition::new(self.len, 0);
501        }
502        if range.end == TextPosition::new(0, 1) {
503            range.end = TextPosition::new(self.len, 0);
504        }
505
506        if range.start.y != 0 {
507            return Err(TextError::TextRangeOutOfBounds(range));
508        }
509        if range.end.y != 0 {
510            return Err(TextError::TextRangeOutOfBounds(range));
511        }
512
513        let bytes = self.byte_range(range)?;
514
515        self.invalidate_cache(bytes.start);
516
517        let (before, remove, after) = (
518            &self.text[..bytes.start],
519            &self.text[bytes.start..bytes.end],
520            &self.text[bytes.end..],
521        );
522
523        self.buf.clear();
524        self.buf.push_str(before);
525        self.buf.push_str(after);
526
527        let remove_str = remove.to_string();
528        let before_bytes = before.len();
529        let remove_bytes = remove.len();
530        let new_len = str_len(&self.buf);
531
532        mem::swap(&mut self.text, &mut self.buf);
533        self.len = new_len;
534
535        Ok((
536            remove_str,
537            (range, before_bytes..before_bytes + remove_bytes),
538        ))
539    }
540
541    /// Insert a string at the given byte index.
542    fn insert_b(&mut self, byte_pos: usize, t: &str) -> Result<(), TextError> {
543        let Some((before, after)) = self.text.split_at_checked(byte_pos) else {
544            return Err(TextError::ByteIndexNotCharBoundary(byte_pos));
545        };
546
547        self.invalidate_cache(byte_pos);
548
549        self.buf.clear();
550        self.buf.push_str(before);
551        self.buf.push_str(t);
552        self.buf.push_str(after);
553        let new_len = str_len(&self.buf);
554
555        mem::swap(&mut self.text, &mut self.buf);
556        self.len = new_len;
557
558        Ok(())
559    }
560
561    /// Remove the given byte-range.
562    fn remove_b(&mut self, byte_range: Range<usize>) -> Result<(), TextError> {
563        let Some((before, after)) = self.text.split_at_checked(byte_range.start) else {
564            return Err(TextError::ByteIndexNotCharBoundary(byte_range.start));
565        };
566        let Some((_remove, after)) = after.split_at_checked(byte_range.end - byte_range.start)
567        else {
568            return Err(TextError::ByteIndexNotCharBoundary(byte_range.end));
569        };
570
571        self.invalidate_cache(byte_range.start);
572
573        self.buf.clear();
574        self.buf.push_str(before);
575        self.buf.push_str(after);
576        let new_len = str_len(&self.buf);
577
578        mem::swap(&mut self.text, &mut self.buf);
579        self.len = new_len;
580
581        Ok(())
582    }
583}