Skip to main content

saphyr_parser_bw/input/
buffered.rs

1use crate::char_traits::is_breakz;
2use crate::input::{BorrowedInput, Input};
3
4use arraydeque::ArrayDeque;
5
6/// The size of the [`BufferedInput`] buffer.
7///
8/// The buffer is statically allocated to avoid conditions for reallocations each time we
9/// consume/push a character. As of now, almost all lookaheads are 4 characters maximum, except:
10///   - Escape sequences parsing: some escape codes are 8 characters
11///   - Scanning indent in scalars: this looks ahead `indent + 2` characters
12///
13/// This constant must be set to at least 8. When scanning indent in scalars, the lookahead is done
14/// in a single call if and only if the indent is `BUFFER_LEN - 2` or less. If the indent is higher
15/// than that, the code will fall back to a loop of lookaheads.
16const BUFFER_LEN: usize = 16;
17
18/// A wrapper around an [`Iterator`] of [`char`]s with a buffer.
19///
20/// The YAML scanner often needs some lookahead. With fully allocated buffers such as `String` or
21/// `&str`, this is not an issue. However, with streams, we need to have a way of peeking multiple
22/// characters at a time and sometimes pushing some back into the stream.
23/// There is no "easy" way of doing this without itertools. In order to avoid pulling the entierty
24/// of itertools for one method, we use this structure.
25#[allow(clippy::module_name_repetitions)]
26pub struct BufferedInput<T: Iterator<Item = char>> {
27    /// The iterator source,
28    input: T,
29    /// Buffer for the next characters to consume.
30    buffer: ArrayDeque<char, BUFFER_LEN>,
31}
32
33impl<T: Iterator<Item = char>> BufferedInput<T> {
34    /// Create a new [`BufferedInput`] with the given input.
35    pub fn new(input: T) -> Self {
36        Self {
37            input,
38            buffer: ArrayDeque::default(),
39        }
40    }
41}
42
43impl<T: Iterator<Item = char>> Input for BufferedInput<T> {
44    #[inline]
45    fn lookahead(&mut self, count: usize) {
46        let target = count.min(BUFFER_LEN);
47
48        if self.buffer.len() >= target {
49            return;
50        }
51        for _ in 0..(target - self.buffer.len()) {
52            self.buffer
53                .push_back(self.input.next().unwrap_or('\0'))
54                .unwrap();
55        }
56    }
57
58    #[inline]
59    fn buflen(&self) -> usize {
60        self.buffer.len()
61    }
62
63    #[inline]
64    fn bufmaxlen(&self) -> usize {
65        BUFFER_LEN
66    }
67
68    #[inline]
69    fn raw_read_ch(&mut self) -> char {
70        self.input.next().unwrap_or('\0')
71    }
72
73    #[inline]
74    fn raw_read_non_breakz_ch(&mut self) -> Option<char> {
75        if let Some(c) = self.input.next() {
76            if is_breakz(c) {
77                self.buffer.push_back(c).unwrap();
78                None
79            } else {
80                Some(c)
81            }
82        } else {
83            None
84        }
85    }
86
87    #[inline]
88    fn skip(&mut self) {
89        self.buffer.pop_front();
90    }
91
92    #[inline]
93    fn skip_n(&mut self, count: usize) {
94        self.buffer.drain(0..count);
95    }
96
97    #[inline]
98    fn peek(&self) -> char {
99        self.buffer[0]
100    }
101
102    #[inline]
103    fn peek_nth(&self, n: usize) -> char {
104        self.buffer[n]
105    }
106}
107
108/// `BufferedInput` does not support zero-copy slicing since it's a streaming input
109/// without stable backing storage.
110impl<T: Iterator<Item = char>> BorrowedInput<'static> for BufferedInput<T> {
111    #[inline]
112    fn slice_borrowed(&self, _start: usize, _end: usize) -> Option<&'static str> {
113        None
114    }
115}
116
117#[cfg(test)]
118mod tests {
119    use super::*;
120
121    #[test]
122    fn lookahead_larger_than_buffer_is_clamped() {
123        let mut input = BufferedInput::new("abc".chars());
124
125        input.lookahead(BUFFER_LEN + 8);
126
127        assert_eq!(input.buflen(), BUFFER_LEN);
128        assert_eq!(input.peek(), 'a');
129        assert_eq!(input.peek_nth(1), 'b');
130        assert_eq!(input.peek_nth(2), 'c');
131        assert_eq!(input.peek_nth(3), '\0');
132    }
133}