Skip to main content

mkutils/rope/
chunk.rs

1use crate::{
2    rope::{
3        chunk_summary::{ChunkSummary, Length, LengthExtendedGraphemes, LineLengthSummary},
4        extended_grapheme_iter::ExtendedGraphemeIter,
5    },
6    utils::Utils,
7};
8use arrayvec::{ArrayString, ArrayVec, CapacityError};
9use num::{Zero, traits::SaturatingSub};
10use std::{ops::Range, str::FromStr};
11use zed_sum_tree::{Item, Summary};
12
13// NOTE: [Self: Clone] required for [Self: Item]
14#[derive(Clone, Default)]
15pub struct Chunk {
16    string: ArrayString<{ Self::CAPACITY }>,
17    extended_grapheme_byte_index_intervals: ArrayVec<Range<LengthExtendedGraphemes>, { Self::CAPACITY }>,
18    newline_extended_grapheme_offsets: ArrayVec<LengthExtendedGraphemes, { Self::CAPACITY }>,
19}
20
21impl Chunk {
22    const CAPACITY: usize = 256;
23
24    #[must_use]
25    pub fn empty() -> Self {
26        Self::default()
27    }
28
29    #[must_use]
30    pub fn len(&self) -> Length {
31        Length::new(
32            self.newline_extended_grapheme_offsets.len().into(),
33            self.extended_grapheme_byte_index_intervals.len().into(),
34        )
35    }
36
37    #[must_use]
38    pub const fn extended_grapheme_iter(&self) -> ExtendedGraphemeIter<'_> {
39        ExtendedGraphemeIter::new(self)
40    }
41
42    #[must_use]
43    pub fn as_str(&self) -> &str {
44        self.string.as_str()
45    }
46
47    #[must_use]
48    pub fn extended_grapheme_byte_index_intervals(&self) -> &[Range<LengthExtendedGraphemes>] {
49        &self.extended_grapheme_byte_index_intervals
50    }
51
52    #[must_use]
53    pub fn newline_extended_grapheme_offsets_geq(
54        &self,
55        extended_grapheme_offset: LengthExtendedGraphemes,
56    ) -> &[LengthExtendedGraphemes] {
57        // NOTE: [index] is the index of the first [newline_extended_grapheme_offset] that is greater than or equal to
58        // [extended_grapheme_offset]
59        let index = self
60            .newline_extended_grapheme_offsets
61            .partition_point(|newline_extended_grapheme_offset| {
62                newline_extended_grapheme_offset < &extended_grapheme_offset
63            });
64
65        &self.newline_extended_grapheme_offsets[index..]
66    }
67
68    pub fn try_push_extended_grapheme<'a>(
69        &mut self,
70        extended_grapheme: &'a str,
71    ) -> Result<&mut Self, CapacityError<&'a str>> {
72        let extended_grapheme_byte_index_interval_begin = self.string.len().convert::<LengthExtendedGraphemes>();
73        let extended_grapheme_offset = self.len().extended_graphemes();
74
75        // NOTE:
76        // - [self.string] will always be weakly longer than [self.extended_grapheme_byte_index_intervals] (with
77        //   equality only when each extended grapheme is a single byte) and because we only push to
78        //   [self.extended_grapheme_byte_index_intervals] after successfully pushing to [self.string], we will never
79        //   panic
80        // - same holds for [self.newline_extended_grapheme_offsets]
81        self.string.try_push_str(extended_grapheme)?;
82        extended_grapheme_byte_index_interval_begin
83            .range_from_len(extended_grapheme.len().into())
84            .push_to(&mut self.extended_grapheme_byte_index_intervals);
85
86        if extended_grapheme.is_newline() {
87            self.newline_extended_grapheme_offsets.push(extended_grapheme_offset);
88        }
89
90        self.ok()
91    }
92
93    pub fn try_push_extended_graphemes<'a>(&mut self, extended_graphemes: &'a str) -> Result<&mut Self, &'a str> {
94        for (byte_index, extended_grapheme) in extended_graphemes.extended_grapheme_and_byte_index_pairs() {
95            if self.try_push_extended_grapheme(extended_grapheme).is_err() {
96                return extended_graphemes[byte_index..].ref_immut().err();
97            }
98        }
99
100        self.ok()
101    }
102
103    #[must_use]
104    pub fn line_lengths(&self) -> LineLengthSummary {
105        let length = self.len().extended_graphemes();
106        let first_line_length = self
107            .newline_extended_grapheme_offsets
108            .first()
109            .copied()
110            .unwrap_or(length);
111        let last_line_length =
112            if let Some(last_newline_extended_grapheme_offset) = self.newline_extended_grapheme_offsets.last() {
113                length
114                    .saturating_sub(last_newline_extended_grapheme_offset)
115                    .decremented()
116            } else {
117                length
118            };
119        let mut max_line_length = first_line_length.max(last_line_length);
120
121        for window in self.newline_extended_grapheme_offsets.windows(2) {
122            let line_length = window[1].saturating_sub(&window[0]).decremented();
123
124            max_line_length.max_assign(line_length);
125        }
126
127        LineLengthSummary::new(first_line_length, last_line_length, max_line_length)
128    }
129
130    #[must_use]
131    pub fn chunk_summary(&self) -> ChunkSummary {
132        ChunkSummary::new(self.len(), self.line_lengths())
133    }
134
135    #[must_use]
136    #[allow(clippy::missing_panics_doc)]
137    pub fn split(&self, extended_grapheme_offset: LengthExtendedGraphemes) -> (Self, Self) {
138        if extended_grapheme_offset.is_zero() {
139            return Self::empty().pair(self.clone());
140        }
141
142        if self.len().extended_graphemes() <= extended_grapheme_offset {
143            return self.clone().pair(Self::empty());
144        }
145
146        let extended_grapheme_offset = extended_grapheme_offset.convert::<usize>();
147        let extended_grapheme_byte_index = self.extended_grapheme_byte_index_intervals[extended_grapheme_offset]
148            .start
149            .convert::<usize>();
150        let (left_str, right_str) = self.as_str().split_at(extended_grapheme_byte_index);
151        let left_chunk = left_str.parse::<Self>().unwrap();
152        let right_chunk = right_str.parse::<Self>().unwrap();
153
154        left_chunk.pair(right_chunk)
155    }
156}
157
158impl FromStr for Chunk {
159    type Err = CapacityError<()>;
160
161    fn from_str(extended_graphemes: &str) -> Result<Self, Self::Err> {
162        match Self::empty().try_push_extended_graphemes(extended_graphemes) {
163            Ok(chunk) => chunk.mem_take().ok(),
164            Err(_remaining_extended_graphemes) => CapacityError::new(()).err(),
165        }
166    }
167}
168
169impl Item for Chunk {
170    type Summary = ChunkSummary;
171
172    fn summary(&self, _context: <Self::Summary as Summary>::Context<'_>) -> Self::Summary {
173        self.chunk_summary()
174    }
175}