saphyr_parser/input/
buffered.rs

1use crate::char_traits::is_breakz;
2use crate::input::Input;
3
4use arraydeque::ArrayDeque;
5
6/// The size of the [`BufferedInput`] buffer.
7///
8/// The buffer is statically allocated to avoid conditions for reallocations each time we
9/// consume/push a character. As of now, almost all lookaheads are 4 characters maximum, except:
10///   - Escape sequences parsing: some escape codes are 8 characters
11///   - Scanning indent in scalars: this looks ahead `indent + 2` characters
12///
13/// This constant must be set to at least 8. When scanning indent in scalars, the lookahead is done
14/// in a single call if and only if the indent is `BUFFER_LEN - 2` or less. If the indent is higher
15/// than that, the code will fall back to a loop of lookaheads.
16const BUFFER_LEN: usize = 16;
17
18/// A wrapper around an [`Iterator`] of [`char`]s with a buffer.
19///
20/// The YAML scanner often needs some lookahead. With fully allocated buffers such as `String` or
21/// `&str`, this is not an issue. However, with streams, we need to have a way of peeking multiple
22/// characters at a time and sometimes pushing some back into the stream.
23/// There is no "easy" way of doing this without itertools. In order to avoid pulling the entierty
24/// of itertools for one method, we use this structure.
25#[allow(clippy::module_name_repetitions)]
26pub struct BufferedInput<T: Iterator<Item = char>> {
27    /// The iterator source,
28    input: T,
29    /// Buffer for the next characters to consume.
30    buffer: ArrayDeque<char, BUFFER_LEN>,
31}
32
33impl<T: Iterator<Item = char>> BufferedInput<T> {
34    /// Create a new [`BufferedInput`] with the given input.
35    pub fn new(input: T) -> Self {
36        Self {
37            input,
38            buffer: ArrayDeque::default(),
39        }
40    }
41}
42
43impl<T: Iterator<Item = char>> Input for BufferedInput<T> {
44    #[inline]
45    fn lookahead(&mut self, count: usize) {
46        if self.buffer.len() >= count {
47            return;
48        }
49        for _ in 0..(count - self.buffer.len()) {
50            self.buffer
51                .push_back(self.input.next().unwrap_or('\0'))
52                .unwrap();
53        }
54    }
55
56    #[inline]
57    fn buflen(&self) -> usize {
58        self.buffer.len()
59    }
60
61    #[inline]
62    fn bufmaxlen(&self) -> usize {
63        BUFFER_LEN
64    }
65
66    #[inline]
67    fn raw_read_ch(&mut self) -> char {
68        self.input.next().unwrap_or('\0')
69    }
70
71    #[inline]
72    fn raw_read_non_breakz_ch(&mut self) -> Option<char> {
73        if let Some(c) = self.input.next() {
74            if is_breakz(c) {
75                self.buffer.push_back(c).unwrap();
76                None
77            } else {
78                Some(c)
79            }
80        } else {
81            None
82        }
83    }
84
85    #[inline]
86    fn skip(&mut self) {
87        self.buffer.pop_front();
88    }
89
90    #[inline]
91    fn skip_n(&mut self, count: usize) {
92        self.buffer.drain(0..count);
93    }
94
95    #[inline]
96    fn peek(&self) -> char {
97        self.buffer[0]
98    }
99
100    #[inline]
101    fn peek_nth(&self, n: usize) -> char {
102        self.buffer[n]
103    }
104}