saphyr_parser/input/buffered.rs
1use crate::char_traits::is_breakz;
2use crate::input::Input;
3
4use arraydeque::ArrayDeque;
5
6/// The size of the [`BufferedInput`] buffer.
7///
8/// The buffer is statically allocated to avoid conditions for reallocations each time we
9/// consume/push a character. As of now, almost all lookaheads are 4 characters maximum, except:
10/// - Escape sequences parsing: some escape codes are 8 characters
11/// - Scanning indent in scalars: this looks ahead `indent + 2` characters
12///
13/// This constant must be set to at least 8. When scanning indent in scalars, the lookahead is done
14/// in a single call if and only if the indent is `BUFFER_LEN - 2` or less. If the indent is higher
15/// than that, the code will fall back to a loop of lookaheads.
16const BUFFER_LEN: usize = 16;
17
18/// A wrapper around an [`Iterator`] of [`char`]s with a buffer.
19///
20/// The YAML scanner often needs some lookahead. With fully allocated buffers such as `String` or
21/// `&str`, this is not an issue. However, with streams, we need to have a way of peeking multiple
22/// characters at a time and sometimes pushing some back into the stream.
23/// There is no "easy" way of doing this without itertools. In order to avoid pulling the entierty
24/// of itertools for one method, we use this structure.
25#[allow(clippy::module_name_repetitions)]
26pub struct BufferedInput<T: Iterator<Item = char>> {
27 /// The iterator source,
28 input: T,
29 /// Buffer for the next characters to consume.
30 buffer: ArrayDeque<char, BUFFER_LEN>,
31}
32
33impl<T: Iterator<Item = char>> BufferedInput<T> {
34 /// Create a new [`BufferedInput`] with the given input.
35 pub fn new(input: T) -> Self {
36 Self {
37 input,
38 buffer: ArrayDeque::default(),
39 }
40 }
41}
42
43impl<T: Iterator<Item = char>> Input for BufferedInput<T> {
44 #[inline]
45 fn lookahead(&mut self, count: usize) {
46 if self.buffer.len() >= count {
47 return;
48 }
49 for _ in 0..(count - self.buffer.len()) {
50 self.buffer
51 .push_back(self.input.next().unwrap_or('\0'))
52 .unwrap();
53 }
54 }
55
56 #[inline]
57 fn buflen(&self) -> usize {
58 self.buffer.len()
59 }
60
61 #[inline]
62 fn bufmaxlen(&self) -> usize {
63 BUFFER_LEN
64 }
65
66 #[inline]
67 fn raw_read_ch(&mut self) -> char {
68 self.input.next().unwrap_or('\0')
69 }
70
71 #[inline]
72 fn raw_read_non_breakz_ch(&mut self) -> Option<char> {
73 if let Some(c) = self.input.next() {
74 if is_breakz(c) {
75 self.buffer.push_back(c).unwrap();
76 None
77 } else {
78 Some(c)
79 }
80 } else {
81 None
82 }
83 }
84
85 #[inline]
86 fn skip(&mut self) {
87 self.buffer.pop_front();
88 }
89
90 #[inline]
91 fn skip_n(&mut self, count: usize) {
92 self.buffer.drain(0..count);
93 }
94
95 #[inline]
96 fn peek(&self) -> char {
97 self.buffer[0]
98 }
99
100 #[inline]
101 fn peek_nth(&self, n: usize) -> char {
102 self.buffer[n]
103 }
104}