rat_text/text_store/
text_string.rs

1use crate::grapheme::StrGraphemes;
2use crate::text_store::TextStore;
3use crate::{TextError, TextPosition, TextRange, upos_type};
4use std::borrow::Cow;
5use std::cell::Cell;
6use std::cmp::min;
7use std::iter::once;
8use std::mem;
9use std::ops::Range;
10use unicode_segmentation::UnicodeSegmentation;
11
12/// Single line text-store.
13#[derive(Debug, Default, Clone)]
14pub struct TextString {
15    // text
16    text: String,
17    // len as grapheme count
18    len: upos_type,
19    // minimum byte position changed since last reset.
20    min_changed: Cell<Option<usize>>,
21    // tmp buffer
22    buf: String,
23}
24
25/// Length as grapheme count, excluding line breaks.
26#[inline]
27fn str_len(s: &str) -> upos_type {
28    s.graphemes(true).count() as upos_type
29}
30
31impl TextString {
32    /// New empty.
33    pub fn new() -> Self {
34        Self {
35            text: Default::default(),
36            len: 0,
37            min_changed: Default::default(),
38            buf: Default::default(),
39        }
40    }
41
42    /// New from string.
43    pub fn new_text(t: &str) -> Self {
44        Self {
45            text: t.into(),
46            len: str_len(t),
47            min_changed: Default::default(),
48            buf: Default::default(),
49        }
50    }
51
52    /// New from string.
53    pub fn new_string(t: String) -> Self {
54        let len = str_len(&t);
55        Self {
56            text: t,
57            len,
58            min_changed: Default::default(),
59            buf: Default::default(),
60        }
61    }
62
63    /// str
64    pub fn as_str(&self) -> &str {
65        self.text.as_str()
66    }
67}
68
69impl TextString {
70    fn invalidate_cache(&self, byte_pos: usize) {
71        self.min_changed.update(|v| match v {
72            None => Some(byte_pos),
73            Some(w) => Some(min(byte_pos, w)),
74        });
75    }
76}
77
78impl TextStore for TextString {
79    type GraphemeIter<'a> = StrGraphemes<'a>;
80
81    /// Can store multi-line content?
82    #[inline]
83    fn is_multi_line(&self) -> bool {
84        false
85    }
86
87    fn has_final_newline(&self) -> bool {
88        true
89    }
90
91    /// Number of lines.
92    #[inline]
93    fn len_lines(&self) -> upos_type {
94        1
95    }
96
97    /// Length in bytes.
98    #[inline]
99    fn len_bytes(&self) -> usize {
100        self.text.len()
101    }
102
103    #[inline]
104    fn cache_validity(&self) -> Option<usize> {
105        self.min_changed.take()
106    }
107
108    /// Get content as string.
109    fn string(&self) -> String {
110        self.text.to_string()
111    }
112
113    /// Set content as string.
114    fn set_string(&mut self, t: &str) {
115        self.invalidate_cache(0);
116        self.text = t.to_string();
117        self.len = str_len(&self.text);
118    }
119
120    /// Grapheme position to byte position.
121    /// This is the (start,end) position of the single grapheme after pos.
122    ///
123    /// * pos must be a valid position: row <= len_lines, col <= line_width of the row.
124    fn byte_range_at(&self, pos: TextPosition) -> Result<Range<usize>, TextError> {
125        if pos == TextPosition::new(0, 1) {
126            let len = self.text.len();
127            return Ok(len..len);
128        }
129
130        if pos.y != 0 {
131            return Err(TextError::LineIndexOutOfBounds(pos.y, 1));
132        };
133
134        let mut byte_range = None;
135        for (cidx, (idx, c)) in self
136            .text
137            .grapheme_indices(true)
138            .chain(once((self.text.len(), "")))
139            .enumerate()
140        {
141            if cidx == pos.x as usize {
142                byte_range = Some(idx..idx + c.len());
143                break;
144            }
145        }
146
147        if let Some(byte_range) = byte_range {
148            Ok(byte_range)
149        } else {
150            Err(TextError::ColumnIndexOutOfBounds(
151                pos.x,
152                str_len(&self.text),
153            ))
154        }
155    }
156
157    /// Grapheme range to byte range.
158    ///
159    /// Allows the special text-position (0,1) as a substitute for EOL.
160    ///
161    /// * range must be a valid range. row <= len_lines, col <= line_width of the row.
162    fn byte_range(&self, range: TextRange) -> Result<Range<usize>, TextError> {
163        if range.start.y != 0 && range.start != TextPosition::new(0, 1) {
164            return Err(TextError::LineIndexOutOfBounds(range.start.y, 1));
165        };
166        if range.end.y != 0 && range.end != TextPosition::new(0, 1) {
167            return Err(TextError::LineIndexOutOfBounds(range.end.y, 1));
168        };
169
170        let mut byte_start = None;
171        let mut byte_end = None;
172
173        if range.start == TextPosition::new(0, 1) {
174            byte_start = Some(self.text.len());
175        }
176        if range.end == TextPosition::new(0, 1) {
177            byte_end = Some(self.text.len());
178        }
179
180        if byte_start.is_none() || byte_end.is_none() {
181            for (cidx, (idx, _)) in self
182                .text
183                .grapheme_indices(true)
184                .chain(once((self.text.len(), "")))
185                .enumerate()
186            {
187                if TextPosition::new(cidx as upos_type, 0) == range.start {
188                    byte_start = Some(idx);
189                }
190                if TextPosition::new(cidx as upos_type, 0) == range.end {
191                    byte_end = Some(idx);
192                }
193                if byte_start.is_some() && byte_end.is_some() {
194                    break;
195                }
196            }
197        }
198
199        let Some(byte_start) = byte_start else {
200            return Err(TextError::ColumnIndexOutOfBounds(
201                range.start.x,
202                str_len(&self.text),
203            ));
204        };
205        let Some(byte_end) = byte_end else {
206            return Err(TextError::ColumnIndexOutOfBounds(
207                range.end.x,
208                str_len(&self.text),
209            ));
210        };
211
212        Ok(byte_start..byte_end)
213    }
214
215    /// Byte position to grapheme position.
216    /// Returns the position that contains the given byte index.
217    ///
218    /// * byte must <= byte-len.
219    fn byte_to_pos(&self, byte_pos: usize) -> Result<TextPosition, TextError> {
220        let mut pos = None;
221
222        for (cidx, (c_start, c)) in self
223            .text
224            .grapheme_indices(true)
225            .chain(once((self.text.len(), " ")))
226            .enumerate()
227        {
228            if byte_pos < c_start + c.len() {
229                pos = Some(cidx);
230                break;
231            }
232        }
233
234        if let Some(pos) = pos {
235            Ok(TextPosition::new(pos as upos_type, 0))
236        } else {
237            Err(TextError::ByteIndexOutOfBounds(byte_pos, self.text.len()))
238        }
239    }
240
241    /// Byte range to grapheme range.
242    ///
243    /// * byte must <= byte-len.
244    fn bytes_to_range(&self, bytes: Range<usize>) -> Result<TextRange, TextError> {
245        let mut start = None;
246        let mut end = None;
247        for (cidx, (c_start, c)) in self
248            .text
249            .grapheme_indices(true)
250            .chain(once((self.text.len(), " ")))
251            .enumerate()
252        {
253            if bytes.start < c_start + c.len() {
254                if start.is_none() {
255                    start = Some(cidx as upos_type);
256                }
257            }
258            if bytes.end < c_start + c.len() {
259                if end.is_none() {
260                    end = Some(cidx as upos_type);
261                }
262            }
263            if start.is_some() && end.is_some() {
264                break;
265            }
266        }
267
268        let Some(start) = start else {
269            return Err(TextError::ByteIndexOutOfBounds(
270                bytes.start,
271                self.text.len(),
272            ));
273        };
274        let Some(end) = end else {
275            return Err(TextError::ByteIndexOutOfBounds(bytes.end, self.text.len()));
276        };
277
278        Ok(TextRange::new((start, 0), (end, 0)))
279    }
280
281    /// A range of the text as `Cow<str>`.
282    ///
283    /// * range must be a valid range. row <= len_lines, col <= line_width of the row.
284    /// * pos must be inside of range.
285    #[inline]
286    fn str_slice(&self, range: TextRange) -> Result<Cow<'_, str>, TextError> {
287        let range = self.byte_range(range)?;
288        Ok(Cow::Borrowed(&self.text[range.start..range.end]))
289    }
290
291    /// A range of the text as `Cow<str>`.
292    ///
293    /// * range must be valid
294    #[inline]
295    fn str_slice_byte(&self, range: Range<usize>) -> Result<Cow<'_, str>, TextError> {
296        Ok(Cow::Borrowed(&self.text[range.start..range.end]))
297    }
298
299    fn graphemes_byte(
300        &self,
301        range: Range<usize>,
302        pos: usize,
303    ) -> Result<Self::GraphemeIter<'_>, TextError> {
304        if !range.contains(&pos) && range.end != pos {
305            return Err(TextError::ByteIndexOutOfBounds(pos, range.end));
306        }
307        if !self.text.is_char_boundary(range.start) || !self.text.is_char_boundary(range.end) {
308            return Err(TextError::ByteRangeNotCharBoundary(
309                Some(range.start),
310                Some(range.end),
311            ));
312        }
313        if !self.text.is_char_boundary(pos) {
314            return Err(TextError::ByteIndexNotCharBoundary(pos));
315        }
316
317        Ok(StrGraphemes::new_offset(
318            range.start,
319            &self.text[range.clone()],
320            pos - range.start,
321        ))
322    }
323
324    /// Line as str.
325    ///
326    /// * row must be <= len_lines
327    #[inline]
328    fn line_at(&self, row: upos_type) -> Result<Cow<'_, str>, TextError> {
329        if row == 0 {
330            Ok(Cow::Borrowed(&self.text))
331        } else if row == 1 {
332            Ok(Cow::Borrowed(""))
333        } else {
334            Err(TextError::LineIndexOutOfBounds(row, 1))
335        }
336    }
337
338    /// Iterate over text-lines, starting at line-offset.
339    ///
340    /// * row must be <= len_lines
341    #[inline]
342    fn lines_at(&self, row: upos_type) -> Result<impl Iterator<Item = Cow<'_, str>>, TextError> {
343        if row == 0 {
344            Ok(once(Cow::Borrowed(self.text.as_str())))
345        } else if row == 1 {
346            Ok(once(Cow::Borrowed("")))
347        } else {
348            Err(TextError::LineIndexOutOfBounds(row, 1))
349        }
350    }
351
352    /// Return a line as an iterator over the graphemes.
353    /// This contains the '\n' at the end.
354    ///
355    /// * row must be <= len_lines
356    #[inline]
357    fn line_graphemes(&self, row: upos_type) -> Result<Self::GraphemeIter<'_>, TextError> {
358        if row == 0 {
359            Ok(StrGraphemes::new(0, &self.text))
360        } else if row == 1 {
361            Ok(StrGraphemes::new(self.text.len(), ""))
362        } else {
363            Err(TextError::LineIndexOutOfBounds(row, 1))
364        }
365    }
366
367    /// Line width of row as grapheme count.
368    /// Excludes the terminating '\n'.
369    ///
370    /// * row must be <= len_lines
371    #[inline]
372    fn line_width(&self, row: upos_type) -> Result<upos_type, TextError> {
373        if row == 0 {
374            Ok(self.len)
375        } else if row == 1 {
376            Ok(0)
377        } else {
378            Err(TextError::LineIndexOutOfBounds(row, 1))
379        }
380    }
381
382    /// Insert a char at the given position.
383    ///
384    /// * range must be a valid range. row <= len_lines, col <= line_width of the row.
385    fn insert_char(
386        &mut self,
387        mut pos: TextPosition,
388        c: char,
389    ) -> Result<(TextRange, Range<usize>), TextError> {
390        if pos == TextPosition::new(0, 1) {
391            pos = TextPosition::new(self.len, 0);
392        }
393
394        if pos.y != 0 {
395            return Err(TextError::TextPositionOutOfBounds(pos));
396        }
397
398        let byte_pos = self.byte_range_at(pos)?;
399        let (before, after) = self.text.split_at(byte_pos.start);
400
401        self.invalidate_cache(byte_pos.start);
402
403        let old_len = self.len;
404        self.buf.clear();
405        self.buf.push_str(before);
406        self.buf.push(c);
407        self.buf.push_str(after);
408
409        let before_bytes = before.len();
410        let new_len = str_len(&self.buf);
411
412        mem::swap(&mut self.text, &mut self.buf);
413        self.len = new_len;
414
415        Ok((
416            TextRange::new((pos.x, 0), (pos.x + (new_len - old_len), 0)),
417            before_bytes..before_bytes + c.len_utf8(),
418        ))
419    }
420
421    /// Insert a str at position.
422    fn insert_str(
423        &mut self,
424        mut pos: TextPosition,
425        t: &str,
426    ) -> Result<(TextRange, Range<usize>), TextError> {
427        if pos == TextPosition::new(0, 1) {
428            pos = TextPosition::new(self.len, 0);
429        }
430
431        if pos.y != 0 {
432            return Err(TextError::TextPositionOutOfBounds(pos));
433        }
434
435        let byte_pos = self.byte_range_at(pos)?;
436        let (before, after) = self.text.split_at(byte_pos.start);
437
438        self.invalidate_cache(byte_pos.start);
439
440        let old_len = self.len;
441        self.buf.clear();
442        self.buf.push_str(before);
443        self.buf.push_str(t);
444        self.buf.push_str(after);
445
446        let before_bytes = before.len();
447        let new_len = str_len(&self.buf);
448
449        mem::swap(&mut self.text, &mut self.buf);
450        self.len = new_len;
451
452        Ok((
453            TextRange::new((pos.x, 0), (pos.x + (new_len - old_len), 0)),
454            before_bytes..before_bytes + t.len(),
455        ))
456    }
457
458    /// Remove a range.
459    fn remove(
460        &mut self,
461        mut range: TextRange,
462    ) -> Result<(String, (TextRange, Range<usize>)), TextError> {
463        if range.start == TextPosition::new(0, 1) {
464            range.start = TextPosition::new(self.len, 0);
465        }
466        if range.end == TextPosition::new(0, 1) {
467            range.end = TextPosition::new(self.len, 0);
468        }
469
470        if range.start.y != 0 {
471            return Err(TextError::TextRangeOutOfBounds(range));
472        }
473        if range.end.y != 0 {
474            return Err(TextError::TextRangeOutOfBounds(range));
475        }
476
477        let bytes = self.byte_range(range)?;
478
479        self.invalidate_cache(bytes.start);
480
481        let (before, remove, after) = (
482            &self.text[..bytes.start],
483            &self.text[bytes.start..bytes.end],
484            &self.text[bytes.end..],
485        );
486
487        self.buf.clear();
488        self.buf.push_str(before);
489        self.buf.push_str(after);
490
491        let remove_str = remove.to_string();
492        let before_bytes = before.len();
493        let remove_bytes = remove.len();
494        let new_len = str_len(&self.buf);
495
496        mem::swap(&mut self.text, &mut self.buf);
497        self.len = new_len;
498
499        Ok((
500            remove_str,
501            (range, before_bytes..before_bytes + remove_bytes),
502        ))
503    }
504
505    /// Insert a string at the given byte index.
506    fn insert_b(&mut self, byte_pos: usize, t: &str) -> Result<(), TextError> {
507        let Some((before, after)) = self.text.split_at_checked(byte_pos) else {
508            return Err(TextError::ByteIndexNotCharBoundary(byte_pos));
509        };
510
511        self.invalidate_cache(byte_pos);
512
513        self.buf.clear();
514        self.buf.push_str(before);
515        self.buf.push_str(t);
516        self.buf.push_str(after);
517        let new_len = str_len(&self.buf);
518
519        mem::swap(&mut self.text, &mut self.buf);
520        self.len = new_len;
521
522        Ok(())
523    }
524
525    /// Remove the given byte-range.
526    fn remove_b(&mut self, byte_range: Range<usize>) -> Result<(), TextError> {
527        let Some((before, after)) = self.text.split_at_checked(byte_range.start) else {
528            return Err(TextError::ByteIndexNotCharBoundary(byte_range.start));
529        };
530        let Some((_remove, after)) = after.split_at_checked(byte_range.end - byte_range.start)
531        else {
532            return Err(TextError::ByteIndexNotCharBoundary(byte_range.end));
533        };
534
535        self.invalidate_cache(byte_range.start);
536
537        self.buf.clear();
538        self.buf.push_str(before);
539        self.buf.push_str(after);
540        let new_len = str_len(&self.buf);
541
542        mem::swap(&mut self.text, &mut self.buf);
543        self.len = new_len;
544
545        Ok(())
546    }
547}