lssg_lib/
char_reader.rs

1use std::{
2    io::{BufRead, BufReader, Cursor, Read},
3    mem::transmute,
4    str::Chars,
5};
6
7use super::parse_error::ParseError;
8
9/// Character Reader with peeking functionality
10/// It buffers lines internally. So if you parse a stream with that never ends with \n it will all
11/// be put into memory
12pub struct CharReader<R> {
13    reader: BufReader<R>,
14    buffer: Vec<char>,
15    has_read: bool,
16}
17
18impl<R: Read> CharReader<R> {
19    pub fn new(input: R) -> CharReader<R> {
20        let reader = BufReader::new(input);
21        CharReader {
22            reader,
23            buffer: vec![],
24            has_read: false,
25        }
26    }
27
28    pub fn from_string<'n>(input: &String) -> CharReader<&'n [u8]> {
29        CharReader {
30            reader: BufReader::new(&[]),
31            buffer: input.chars().collect(),
32            has_read: false,
33        }
34    }
35
36    pub fn has_read(&self) -> bool {
37        self.has_read
38    }
39
40    /// Will try to fill the buffer until it is filled or eof is reached
41    fn try_fill(&mut self, min: usize) -> Result<(), ParseError> {
42        if min > self.buffer.len() {
43            let mut bytes = vec![];
44            while 0 != self.reader.read_until(b'\n', &mut bytes)? && min > self.buffer.len() {}
45            // println!("B {bytes:?}");
46            self.buffer.extend(String::from_utf8(bytes)?.chars());
47        }
48        Ok(())
49    }
50
51    /// Read a character. `pos` is 0 indexed
52    pub fn peek_char(&mut self, pos: usize) -> Result<Option<char>, ParseError> {
53        self.try_fill(pos + 1)?;
54        return Ok(self.buffer.get(pos).copied());
55    }
56
57    pub fn peek_string(&mut self, length: usize) -> Result<String, ParseError> {
58        return self.peek_string_from(0, length);
59    }
60
61    // TODO(perf): return a &str[], a slice of the characters in buf. Currently not possible
62    // because rust stores chars as 4 bytes meaning `a` looks like 0x6100, you can't have multiple
63    // zero bytes in utf-8 strings so needs to be converted. Possible fix by implementing a utf-8
64    // reader storing only bytes and iterating over it.
65    //
66    /// Try to fill string with `length` bytes
67    pub fn peek_string_from(&mut self, pos: usize, length: usize) -> Result<String, ParseError> {
68        self.try_fill(pos + length)?;
69        let stop = (pos + length).min(self.buffer.len());
70        let chars = &self.buffer[pos..stop];
71
72        // have to convert characters to utf-8 because by default each char has 4 bytes.
73        let mut bytes: Vec<u8> = Vec::with_capacity(chars.len() * 4);
74        for &c in chars {
75            bytes.extend(c.encode_utf8(&mut [0; 4]).bytes());
76        }
77        let string = unsafe { String::from_utf8_unchecked(bytes) };
78        return Ok(string);
79    }
80
81    // TODO should return usize?
82    pub fn peek_until(&mut self, op: fn(char) -> bool) -> Result<Option<String>, ParseError> {
83        return self.peek_until_from(0, op);
84    }
85
86    pub fn peek_until_from(
87        &mut self,
88        pos: usize,
89        op: fn(char) -> bool,
90    ) -> Result<Option<String>, ParseError> {
91        let mut i = pos;
92        loop {
93            match self.peek_char(i)? {
94                Some(c) => {
95                    if op(c) {
96                        break;
97                    }
98                }
99                None => return Ok(None),
100            }
101            i += 1;
102        }
103
104        let string = self.peek_string_from(pos, i - pos + 1)?;
105        return Ok(Some(string));
106    }
107
108    /// Peek until matches or return None when not found
109    pub fn peek_until_match_inclusive(
110        &mut self,
111        pattern: &str,
112    ) -> Result<Option<String>, ParseError> {
113        let chars: Vec<char> = pattern.chars().collect();
114
115        let mut char_i = 0;
116        let mut i = 0;
117        loop {
118            let c = match self.peek_char(i)? {
119                Some(c) => c,
120                None => return Ok(None), // eof
121            };
122
123            // iterate where we left off
124            if chars[char_i] == c {
125                char_i += 1;
126                if char_i == chars.len() {
127                    break;
128                }
129            } else {
130                char_i = 0;
131            }
132            i += 1;
133        }
134
135        let string = self.peek_string(i + 1)?;
136        return Ok(Some(string));
137    }
138
139    pub fn consume(&mut self, length: usize) -> Result<Option<()>, ParseError> {
140        self.has_read = true;
141        self.try_fill(length)?;
142        if self.buffer.len() == 0 {
143            return Ok(None);
144        }
145        self.buffer.drain(0..length);
146        Ok(Some(()))
147    }
148
149    pub fn consume_char(&mut self) -> Result<Option<char>, ParseError> {
150        self.has_read = true;
151        self.try_fill(1)?;
152        if self.buffer.len() == 0 {
153            Ok(None)
154        } else {
155            Ok(Some(self.buffer.drain(0..1).collect::<Vec<char>>()[0]))
156        }
157    }
158
159    /// Read {length} bytes returning a smaller string on EOF
160    pub fn consume_string(&mut self, length: usize) -> Result<String, ParseError> {
161        self.has_read = true;
162        self.try_fill(length)?;
163        return Ok(self
164            .buffer
165            .drain(0..length.min(self.buffer.len()))
166            .collect());
167    }
168
169    /// Will read until eof or `op` is true including the true match
170    pub fn consume_until_inclusive(&mut self, op: fn(char) -> bool) -> Result<String, ParseError> {
171        self.has_read = true;
172        let mut result = String::new();
173        loop {
174            match self.consume_char()? {
175                Some(c) => {
176                    result.push(c);
177                    if op(c) {
178                        break;
179                    }
180                }
181                None => {
182                    break;
183                }
184            };
185        }
186        return Ok(result);
187    }
188
189    /// will read until eof or `op` is true excluding the character that matched
190    pub fn consume_until_exclusive(&mut self, op: fn(char) -> bool) -> Result<String, ParseError> {
191        self.has_read = true;
192        let mut i = 0;
193        loop {
194            match self.peek_char(i)? {
195                Some(c) => {
196                    if op(c) {
197                        break;
198                    }
199                }
200                None => break,
201            };
202            i += 1;
203        }
204        return self.consume_string(i);
205    }
206
207    pub fn consume_until_match_inclusive(&mut self, pattern: &str) -> Result<String, ParseError> {
208        self.has_read = true;
209        // TODO refactor
210        let chars: Vec<char> = pattern.chars().collect();
211        let mut char_i = 0;
212
213        let mut result = String::new();
214        loop {
215            let c = match self.consume_char()? {
216                Some(c) => c,
217                None => break,
218            };
219            result.push(c);
220            if c == chars[char_i] {
221                char_i += 1;
222                if char_i == chars.len() {
223                    break;
224                }
225            } else {
226                char_i = 0;
227            }
228        }
229        return Ok(result);
230    }
231}
232
233#[cfg(test)]
234mod tests {
235    use super::*;
236
237    #[test]
238    fn test_propegation() -> Result<(), ParseError> {
239        let mut reader = CharReader::new("This is a piece of text".as_bytes());
240        assert_eq!(reader.peek_string(4)?, "This".to_owned());
241        assert_eq!(reader.peek_char(3)?, Some('s'));
242
243        assert_eq!(reader.consume_string(5)?, "This ".to_owned());
244
245        assert_eq!(reader.peek_string(3)?, "is ".to_owned());
246        assert_eq!(reader.peek_string(2)?, "is".to_owned());
247
248        assert_eq!(reader.consume_string(11)?, "is a piece ".to_owned());
249        assert_eq!(reader.peek_string(3)?, "of ".to_owned());
250        assert_eq!(reader.peek_char(1)?, Some('f'));
251        assert_eq!(reader.consume_char()?, Some('o'));
252        assert_eq!(reader.peek_char(1)?, Some(' '));
253
254        Ok(())
255    }
256
257    #[test]
258    fn test_newline() -> Result<(), ParseError> {
259        let mut reader = CharReader::new(
260            "This is a
261Very important test"
262                .as_bytes(),
263        );
264        assert_eq!(reader.peek_string(11)?, "This is a\nV".to_owned());
265        assert_eq!(reader.consume_string(11)?, "This is a\nV".to_owned());
266        Ok(())
267    }
268}