lexerus/
buffer.rs

1//! [Buffer] is a container for either (depending on the context) the parsed string or the unparsed
2//! string.
3use std::ops::Range;
4
5#[cfg(test)]
6mod tests;
7
8#[derive(Eq, Clone)]
9/// [Buffer] is a container for source code. It is represented as an `enum` because there are two
10/// forms of [Buffer] which can be created:
11/// - A _contiguous_ chunk of [str] will always be allocated as a [Buffer::Cont] and make use of the simple chunk container.
12/// - A _fragmented_ chunk will always result in a heap allocation ([Box]) of chunk to join two non-adjacent chunks to each other.
13pub enum Buffer<'code> {
14    Cont { chunk: Chunk<'code> },
15    Frag(Box<Buffer<'code>>, Box<Buffer<'code>>),
16}
17
18impl<'code> PartialEq for Buffer<'code> {
19    fn eq(&self, other: &Self) -> bool {
20        match (self, other) {
21            (
22                Buffer::Cont { chunk },
23                Buffer::Cont { chunk: other },
24            ) => chunk.eq(other),
25            _ => self.to_string() == other.to_string(),
26        }
27    }
28}
29
30impl<'code> ::std::hash::Hash for Buffer<'code> {
31    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
32        match self {
33            Buffer::Cont { chunk } => chunk.hash(state),
34            Buffer::Frag(lhs, rhs) => {
35                lhs.hash(state);
36                rhs.hash(state);
37            }
38        }
39    }
40}
41
42impl<'code> std::fmt::Debug for Buffer<'code> {
43    fn fmt(
44        &self,
45        f: &mut std::fmt::Formatter<'_>,
46    ) -> std::fmt::Result {
47        match self {
48            Self::Cont { chunk } => {
49                std::fmt::Debug::fmt(chunk, f)
50            }
51            Self::Frag(arg0, arg1) => f
52                .debug_set()
53                .entries([arg0, arg1].iter())
54                .finish(),
55        }
56    }
57}
58
59impl<'code> std::fmt::Display for Buffer<'code> {
60    fn fmt(
61        &self,
62        f: &mut std::fmt::Formatter<'_>,
63    ) -> std::fmt::Result {
64        match self {
65            Buffer::Cont { chunk } => f.write_str(chunk),
66            Buffer::Frag(lhs, rhs) => {
67                write!(f, "{}{}", lhs, rhs)
68            }
69        }
70    }
71}
72
73impl<'code> Buffer<'code> {
74    fn search_char_len(&self) -> usize {
75        match self {
76            Buffer::Cont { chunk } => chunk
77                .search_str()
78                .unwrap_or("")
79                .chars()
80                .count(),
81            Buffer::Frag(lhs, rhs) => {
82                lhs.search_char_len()
83                    + rhs.search_char_len()
84            }
85        }
86    }
87}
88
89impl<'code> std::ops::Add for Buffer<'code> {
90    type Output = Self;
91
92    fn add(self, rhs: Self) -> Self::Output {
93        match (self, rhs) {
94            (
95                Buffer::Cont { chunk: lhs },
96                Buffer::Cont { chunk: rhs },
97            ) => lhs + rhs,
98            (
99                Buffer::Cont { chunk: lhs },
100                Buffer::Frag(r_lhs, r_rhs),
101            ) => Self::Frag(
102                Box::new(Buffer::Cont { chunk: lhs }),
103                Box::new(Self::Frag(r_lhs, r_rhs)),
104            ),
105            (
106                Buffer::Frag(l_lhs, l_rhs),
107                Buffer::Cont { chunk: rhs },
108            ) => Self::Frag(
109                Box::new(Buffer::Frag(l_lhs, l_rhs)),
110                Box::new(Buffer::Cont { chunk: rhs }),
111            ),
112            (
113                Buffer::Frag(l_lhs, l_rhs),
114                Buffer::Frag(r_lhs, r_rhs),
115            ) => Self::Frag(
116                Box::new(Buffer::Frag(l_lhs, l_rhs)),
117                Box::new(Buffer::Frag(r_lhs, r_rhs)),
118            ),
119        }
120    }
121}
122
123impl<'code> std::ops::Add for Chunk<'code> {
124    type Output = Buffer<'code>;
125
126    fn add(mut self, rhs: Self) -> Self::Output {
127        match self.search_range() {
128            Some(lhs_range) => match rhs.search_range() {
129                Some(rhs_range) => {
130                    // Check if code same and boundaries
131                    // align
132                    if self.code == rhs.code
133                        && (lhs_range.start
134                            == rhs_range.end
135                            || lhs_range.end
136                                == rhs_range.start)
137                    {
138                        let start = lhs_range
139                            .start
140                            .min(rhs_range.start);
141                        let end = lhs_range
142                            .end
143                            .max(rhs_range.end);
144                        self.range = start..end;
145                        Buffer::from(self)
146                    }
147                    else {
148                        let mut return_value = Buffer::Frag(
149                            Box::new(Buffer::from(self)),
150                            Box::new(Buffer::from(rhs)),
151                        );
152                        return_value.defrag();
153                        return_value
154                    }
155                }
156                None => Buffer::from(self),
157            },
158            None => Buffer::from(rhs),
159        }
160    }
161}
162
163impl<'code> Buffer<'code> {
164    fn defrag(&mut self) {
165        // Remove blank nodes
166        if let Self::Frag(lhs, rhs) = &self {
167            if let Self::Cont { chunk } = lhs.as_ref() {
168                if chunk.search_range().is_none() {
169                    *self = *rhs.clone();
170                }
171            }
172            else if let Self::Cont { chunk } =
173                rhs.as_ref()
174            {
175                if chunk.search_range().is_none() {
176                    *self = *lhs.clone();
177                }
178            }
179        }
180    }
181}
182
183impl<'code> Buffer<'code> {
184    pub fn eat_word(&mut self, word: &str) -> Option<Self> {
185        match self {
186            Buffer::Cont { chunk } => chunk
187                .eat_word(word)
188                .map(|chunk| Self::Cont { chunk }),
189
190            Buffer::Frag(lhs, rhs) => {
191                // Construct LHS word
192                let lhs_word_end =
193                    word.len().min(lhs.search_len());
194                let lhs_word = &word[0..lhs_word_end];
195
196                // Attempt to eat on LHS
197                let mut lhs_temp = *lhs.clone();
198                let lhs_new =
199                    lhs_temp.eat_word(lhs_word)?;
200
201                // Construct RHS word
202                let rhs_word = &word[lhs_word_end..];
203                let rhs_word = (!rhs_word.is_empty())
204                    .then_some(rhs_word);
205
206                // Attempt to eat on RHS only if RHS has
207                // words to eat
208                let return_value = match rhs_word {
209                    Some(rhs_word) => {
210                        // Attempt to eat on RHS
211                        let mut rhs_temp = *rhs.clone();
212                        let rhs_new =
213                            rhs_temp.eat_word(rhs_word)?;
214
215                        // Reset self
216                        // LHS can be disregarded because it
217                        // has been consumed
218                        *self = rhs_temp;
219
220                        // Create return value and defrag
221                        let mut return_value =
222                            lhs_new + rhs_new;
223                        return_value.defrag();
224                        return_value
225                    }
226                    None => {
227                        *lhs = Box::new(lhs_temp);
228                        self.defrag();
229                        lhs_new
230                    }
231                };
232
233                // Defrag and return
234                Some(return_value)
235            }
236        }
237    }
238
239    pub fn advance_char(
240        &mut self,
241        by: usize,
242    ) -> Option<Self> {
243        match self {
244            Buffer::Cont { chunk } => chunk
245                .advance_char(by)
246                .map(|chunk| Self::Cont { chunk }),
247
248            Buffer::Frag(lhs, rhs) => {
249                // Construct LHS search length
250                let by_lhs = lhs.search_char_len().min(by);
251
252                // Attempt to advance on LHS
253                let mut lhs_temp = *lhs.clone();
254                let lhs_new =
255                    lhs_temp.advance_char(by_lhs)?;
256
257                // Construct RHS search length
258                let by_rhs = by
259                    .checked_sub(by_lhs)
260                    .and_then(|value| {
261                        (!value.eq(&0)).then_some(value)
262                    });
263
264                // Get return value
265                let return_value = match by_rhs {
266                    // If RHS word exists
267                    Some(by_rhs) => {
268                        // Construct RHS
269                        let mut rhs_temp = *rhs.clone();
270                        let rhs_new = rhs_temp
271                            .advance_char(by_rhs)?;
272
273                        // Reset self
274                        // LHS can be disregarded because it
275                        // has been consumed
276                        *self = rhs_temp;
277
278                        // Create return value and defrag
279                        let mut return_value =
280                            lhs_new + rhs_new;
281                        return_value.defrag();
282                        return_value
283                    }
284                    None => {
285                        *lhs = Box::new(lhs_temp);
286                        self.defrag();
287                        lhs_new
288                    }
289                };
290
291                // Defrag and return
292                Some(return_value)
293            }
294        }
295    }
296
297    pub fn search_len(&self) -> usize {
298        match self {
299            Self::Cont { chunk } => chunk.search_len(),
300            Self::Frag(lhs, rhs) => {
301                lhs.search_len() + rhs.search_len()
302            }
303        }
304    }
305}
306
307impl<'code> From<&'code str> for Buffer<'code> {
308    fn from(code: &'code str) -> Self {
309        Self::Cont {
310            chunk: Chunk::from(code),
311        }
312    }
313}
314
315impl<'code> From<Chunk<'code>> for Buffer<'code> {
316    fn from(chunk: Chunk<'code>) -> Self {
317        Self::Cont { chunk }
318    }
319}
320
321#[derive(Eq)]
322/// Basic container for [str]. This should never be called by itself.
323pub struct Chunk<'code> {
324    code: &'code str,
325    range: Range<usize>,
326}
327
328impl<'code> PartialEq for Chunk<'code> {
329    fn eq(&self, other: &Self) -> bool {
330        self.search_str() == other.search_str()
331    }
332}
333
334impl<'code> ::std::hash::Hash for Chunk<'code> {
335    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
336        for byte in self.as_bytes() {
337            state.write_u8(*byte);
338        }
339    }
340}
341
342impl<'code> ::std::ops::Deref for Chunk<'code> {
343    type Target = str;
344
345    fn deref(&self) -> &Self::Target {
346        let range = self.search_range().unwrap_or(0..0);
347        &self.code[range]
348    }
349}
350
351impl<'code> From<&'code str> for Chunk<'code> {
352    fn from(code: &'code str) -> Self {
353        Self {
354            code,
355            range: 0..code.len(),
356        }
357    }
358}
359
360impl<'code> std::fmt::Debug for Chunk<'code> {
361    fn fmt(
362        &self,
363        f: &mut std::fmt::Formatter<'_>,
364    ) -> std::fmt::Result {
365        match self.search_range() {
366            Some(range) => {
367                write!(
368                    f,
369                    "[{:?}] {}",
370                    range,
371                    &self.code[range.clone()],
372                )
373            }
374            None => f.write_str("None"),
375        }
376    }
377}
378
379impl<'code> std::fmt::Display for Chunk<'code> {
380    fn fmt(
381        &self,
382        f: &mut std::fmt::Formatter<'_>,
383    ) -> std::fmt::Result {
384        f.write_str(self.search_str().unwrap_or(""))
385    }
386}
387
388impl<'code> Chunk<'code> {
389    fn eat_word(&mut self, word: &str) -> Option<Self> {
390        self.search_str()?
391            .starts_with(word)
392            .then_some(())?;
393        self.advance_by(word.len())
394    }
395
396    fn advance_char(&mut self, by: usize) -> Option<Self> {
397        let search_str = self.search_str()?;
398        let by = by.checked_sub(1)?;
399        let mut len = None::<usize>;
400        for (count, character) in
401            search_str.chars().enumerate()
402        {
403            // Break if count exceeded
404            if count > by {
405                break;
406            }
407            let char_len = character.len_utf8();
408            len = match len {
409                Some(len) => Some(char_len + len),
410                None => Some(char_len),
411            }
412        }
413        let len = len?;
414        self.advance_by(len)
415    }
416
417    fn search_len(&self) -> usize {
418        let search_range =
419            self.search_range().unwrap_or(0..0);
420        search_range.end - search_range.start
421    }
422}
423
424impl<'code> Chunk<'code> {
425    pub fn range_end(&self) -> usize {
426        self.range.end.min(self.code.len())
427    }
428
429    pub fn search_range(&self) -> Option<Range<usize>> {
430        // Check for overflow
431        let search_start = (self.range.start
432            < self.code.len())
433        .then_some(self.range.start)?;
434
435        // Get range end
436        let search_end = self.range_end();
437
438        // Check if the end is larger than start
439        (search_end > search_start)
440            .then_some(search_start..search_end)
441    }
442
443    pub fn search_str(&self) -> Option<&str> {
444        // Basic sanity check
445        let search_range = self.search_range()?;
446        Some(&self.code[search_range])
447    }
448
449    pub fn advance_by(
450        &mut self,
451        by: usize,
452    ) -> Option<Self> {
453        // Recalculate by
454        let by = self.range.start + by;
455
456        // Return one
457        if by > self.range_end() {
458            None
459        }
460        // Return none if past code end
461        else {
462            // Create eaten token
463            let mut eaten = self.clone();
464            eaten.range.end = by;
465
466            // Move self
467            self.range.start = by;
468
469            // Return eaten token
470            Some(eaten)
471        }
472    }
473}
474
475impl<'code> Clone for Chunk<'code> {
476    fn clone(&self) -> Self {
477        Self {
478            code: self.code,
479            range: self.range.clone(),
480        }
481    }
482}