Skip to main content

endbasic_core/
reader.rs

1// EndBASIC
2// Copyright 2020 Julio Merino
3//
4// Licensed under the Apache License, Version 2.0 (the "License"); you may not
5// use this file except in compliance with the License.  You may obtain a copy
6// of the License at:
7//
8//     http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
13// License for the specific language governing permissions and limitations
14// under the License.
15
16//! Character-based reader for an input stream with position tracking.
17
18use std::char;
19use std::fmt;
20use std::io::{self, BufRead};
21
22/// Tab length used to compute the current position within a line when encountering a tab character.
23const TAB_LENGTH: usize = 8;
24
25/// Representation of a position within a stream.
26#[derive(Clone, Copy, Debug, Eq, PartialEq)]
27pub struct LineCol {
28    /// Line number.
29    pub line: usize,
30
31    /// Column number.
32    pub col: usize,
33}
34
35impl fmt::Display for LineCol {
36    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
37        write!(f, "{}:{}", self.line, self.col)
38    }
39}
40
41#[derive(Debug)]
42#[cfg_attr(test, derive(Eq, PartialEq))]
43pub struct CharSpan {
44    /// Character in this span.
45    pub(crate) ch: char,
46
47    /// Position where this character starts.
48    pub(crate) pos: LineCol,
49}
50
51/// Possible types of buffered data in the reader.
52enum Pending {
53    /// Initial state of the reader where no data has been buffered yet.
54    Unknown,
55
56    /// Intermediate state where the reader holds a line of text, broken down by character, and an
57    /// index to the character to return on the next read.
58    Chars(Vec<char>, usize),
59
60    /// Terminal state of the reader due to an EOF condition.
61    Eof,
62
63    /// Terminal state of the reader due to an error.  If not `None`, this contains the original
64    /// error that caused the problem.  Otherwise, that error was already consumed (and thus
65    /// reaching this case indicates a problem in the caller) so we return an invalid state.
66    Error(Option<io::Error>),
67}
68
69/// Wraps `io::Read` to offer an iterator over characters.
70pub struct CharReader<'a> {
71    /// The wrapper reader from which to reach characters.
72    reader: io::BufReader<&'a mut dyn io::Read>,
73
74    /// Current state of any buffered data.
75    pending: Pending,
76
77    /// If not none, contains the character read by `peek`, which will be consumed by the next call
78    /// to `read`.
79    peeked: Option<Option<io::Result<CharSpan>>>,
80
81    /// Line and column number of the next character to be read.
82    next_pos: LineCol,
83}
84
85impl<'a> CharReader<'a> {
86    /// Constructs a new character reader from an `io::Read`.
87    pub fn from(reader: &'a mut dyn io::Read) -> Self {
88        Self {
89            reader: io::BufReader::new(reader),
90            pending: Pending::Unknown,
91            peeked: None,
92            next_pos: LineCol { line: 1, col: 1 },
93        }
94    }
95
96    /// Replenishes `pending` with the next line to process.
97    fn refill_and_next(&mut self) -> Option<io::Result<CharSpan>> {
98        self.pending = {
99            let mut line = String::new();
100            match self.reader.read_line(&mut line) {
101                Ok(0) => Pending::Eof,
102                Ok(_) => Pending::Chars(line.chars().collect(), 0),
103                Err(e) => Pending::Error(Some(e)),
104            }
105        };
106        self.next()
107    }
108
109    /// Peeks into the next character without consuming it.
110    pub(crate) fn peek(&mut self) -> Option<&io::Result<CharSpan>> {
111        if self.peeked.is_none() {
112            let next = self.next();
113            self.peeked.replace(next);
114        }
115        self.peeked.as_ref().unwrap().as_ref()
116    }
117
118    /// Gets the current position of the read, which is the position that the next character will
119    /// carry.
120    pub(crate) fn next_pos(&self) -> LineCol {
121        self.next_pos
122    }
123}
124
125impl Iterator for CharReader<'_> {
126    type Item = io::Result<CharSpan>;
127
128    fn next(&mut self) -> Option<Self::Item> {
129        if let Some(peeked) = self.peeked.take() {
130            return peeked;
131        }
132
133        match &mut self.pending {
134            Pending::Unknown => self.refill_and_next(),
135            Pending::Eof => None,
136            Pending::Chars(chars, last) => {
137                if *last == chars.len() {
138                    self.refill_and_next()
139                } else {
140                    let ch = chars[*last];
141                    *last += 1;
142
143                    let pos = self.next_pos;
144                    match ch {
145                        '\n' => {
146                            self.next_pos.line += 1;
147                            self.next_pos.col = 1;
148                        }
149                        '\t' => {
150                            self.next_pos.col =
151                                (self.next_pos.col - 1 + TAB_LENGTH) / TAB_LENGTH * TAB_LENGTH + 1;
152                        }
153                        _ => {
154                            self.next_pos.col += 1;
155                        }
156                    }
157
158                    Some(Ok(CharSpan { ch, pos }))
159                }
160            }
161            Pending::Error(e) => match e.take() {
162                Some(e) => Some(Err(e)),
163                None => Some(Err(io::Error::other("Invalid state; error already consumed"))),
164            },
165        }
166    }
167}
168
169#[cfg(test)]
170mod tests {
171    use super::*;
172
173    /// Syntactic sugar to instantiate a `CharSpan` for testing.
174    fn cs(ch: char, line: usize, col: usize) -> CharSpan {
175        CharSpan { ch, pos: LineCol { line, col } }
176    }
177
178    #[test]
179    fn test_empty() {
180        let mut input = b"".as_ref();
181        let mut reader = CharReader::from(&mut input);
182        assert!(reader.next().is_none());
183    }
184
185    #[test]
186    fn test_multibyte_chars() {
187        let mut input = "Hi 훌리오".as_bytes();
188        let mut reader = CharReader::from(&mut input);
189        assert_eq!(cs('H', 1, 1), reader.next().unwrap().unwrap());
190        assert_eq!(cs('i', 1, 2), reader.next().unwrap().unwrap());
191        assert_eq!(cs(' ', 1, 3), reader.next().unwrap().unwrap());
192        assert_eq!(cs('훌', 1, 4), reader.next().unwrap().unwrap());
193        assert_eq!(cs('리', 1, 5), reader.next().unwrap().unwrap());
194        assert_eq!(cs('오', 1, 6), reader.next().unwrap().unwrap());
195        assert!(reader.next().is_none());
196    }
197
198    #[test]
199    fn test_consecutive_newlines() {
200        let mut input = b"a\n\nbc\n".as_ref();
201        let mut reader = CharReader::from(&mut input);
202        assert_eq!(cs('a', 1, 1), reader.next().unwrap().unwrap());
203        assert_eq!(cs('\n', 1, 2), reader.next().unwrap().unwrap());
204        assert_eq!(cs('\n', 2, 1), reader.next().unwrap().unwrap());
205        assert_eq!(cs('b', 3, 1), reader.next().unwrap().unwrap());
206        assert_eq!(cs('c', 3, 2), reader.next().unwrap().unwrap());
207        assert_eq!(cs('\n', 3, 3), reader.next().unwrap().unwrap());
208        assert!(reader.next().is_none());
209    }
210
211    #[test]
212    fn test_tabs() {
213        let mut input = "1\t9\n1234567\t8\n12345678\t9".as_bytes();
214        let mut reader = CharReader::from(&mut input);
215        assert_eq!(cs('1', 1, 1), reader.next().unwrap().unwrap());
216        assert_eq!(cs('\t', 1, 2), reader.next().unwrap().unwrap());
217        assert_eq!(cs('9', 1, 9), reader.next().unwrap().unwrap());
218        assert_eq!(cs('\n', 1, 10), reader.next().unwrap().unwrap());
219        assert_eq!(cs('1', 2, 1), reader.next().unwrap().unwrap());
220        assert_eq!(cs('2', 2, 2), reader.next().unwrap().unwrap());
221        assert_eq!(cs('3', 2, 3), reader.next().unwrap().unwrap());
222        assert_eq!(cs('4', 2, 4), reader.next().unwrap().unwrap());
223        assert_eq!(cs('5', 2, 5), reader.next().unwrap().unwrap());
224        assert_eq!(cs('6', 2, 6), reader.next().unwrap().unwrap());
225        assert_eq!(cs('7', 2, 7), reader.next().unwrap().unwrap());
226        assert_eq!(cs('\t', 2, 8), reader.next().unwrap().unwrap());
227        assert_eq!(cs('8', 2, 9), reader.next().unwrap().unwrap());
228        assert_eq!(cs('\n', 2, 10), reader.next().unwrap().unwrap());
229        assert_eq!(cs('1', 3, 1), reader.next().unwrap().unwrap());
230        assert_eq!(cs('2', 3, 2), reader.next().unwrap().unwrap());
231        assert_eq!(cs('3', 3, 3), reader.next().unwrap().unwrap());
232        assert_eq!(cs('4', 3, 4), reader.next().unwrap().unwrap());
233        assert_eq!(cs('5', 3, 5), reader.next().unwrap().unwrap());
234        assert_eq!(cs('6', 3, 6), reader.next().unwrap().unwrap());
235        assert_eq!(cs('7', 3, 7), reader.next().unwrap().unwrap());
236        assert_eq!(cs('8', 3, 8), reader.next().unwrap().unwrap());
237        assert_eq!(cs('\t', 3, 9), reader.next().unwrap().unwrap());
238        assert_eq!(cs('9', 3, 17), reader.next().unwrap().unwrap());
239        assert!(reader.next().is_none());
240    }
241
242    #[test]
243    fn test_crlf() {
244        let mut input = b"a\r\nb".as_ref();
245        let mut reader = CharReader::from(&mut input);
246        assert_eq!(cs('a', 1, 1), reader.next().unwrap().unwrap());
247        assert_eq!(cs('\r', 1, 2), reader.next().unwrap().unwrap());
248        assert_eq!(cs('\n', 1, 3), reader.next().unwrap().unwrap());
249        assert_eq!(cs('b', 2, 1), reader.next().unwrap().unwrap());
250        assert!(reader.next().is_none());
251    }
252
253    #[test]
254    fn test_past_eof_returns_eof() {
255        let mut input = b"a".as_ref();
256        let mut reader = CharReader::from(&mut input);
257        assert_eq!(cs('a', 1, 1), reader.next().unwrap().unwrap());
258        assert!(reader.next().is_none());
259        assert!(reader.next().is_none());
260    }
261
262    #[test]
263    fn test_next_pos() {
264        let mut input = "Hi".as_bytes();
265        let mut reader = CharReader::from(&mut input);
266        assert_eq!(LineCol { line: 1, col: 1 }, reader.next_pos());
267        assert_eq!(cs('H', 1, 1), reader.next().unwrap().unwrap());
268        assert_eq!(LineCol { line: 1, col: 2 }, reader.next_pos());
269        assert_eq!(cs('i', 1, 2), reader.next().unwrap().unwrap());
270        assert_eq!(LineCol { line: 1, col: 3 }, reader.next_pos());
271        assert!(reader.next().is_none());
272        assert_eq!(LineCol { line: 1, col: 3 }, reader.next_pos());
273    }
274
275    /// A reader that generates an error only on the Nth read operation.
276    ///
277    /// All other reads return a line with a single character in them with the assumption that the
278    /// `CharReader` issues a single read per line.  If that assumption changes, the tests here may
279    /// start failing.
280    struct FaultyReader {
281        current_read: usize,
282        fail_at_read: usize,
283    }
284
285    impl FaultyReader {
286        /// Creates a new reader that will fail at the `fail_at_read`th operation.
287        fn new(fail_at_read: usize) -> Self {
288            let current_read = 0;
289            FaultyReader { current_read, fail_at_read }
290        }
291    }
292
293    impl io::Read for FaultyReader {
294        #[allow(clippy::branches_sharing_code)]
295        fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
296            if self.current_read == self.fail_at_read {
297                self.current_read += 1;
298                Err(io::Error::from(io::ErrorKind::InvalidInput))
299            } else {
300                self.current_read += 1;
301                buf[0] = b'1';
302                buf[1] = b'\n';
303                Ok(2)
304            }
305        }
306    }
307
308    #[test]
309    fn test_errors_prevent_further_reads() {
310        let mut reader = FaultyReader::new(2);
311        let mut reader = CharReader::from(&mut reader);
312        assert_eq!(cs('1', 1, 1), reader.next().unwrap().unwrap());
313        assert_eq!(cs('\n', 1, 2), reader.next().unwrap().unwrap());
314        assert_eq!(cs('1', 2, 1), reader.next().unwrap().unwrap());
315        assert_eq!(cs('\n', 2, 2), reader.next().unwrap().unwrap());
316        assert_eq!(io::ErrorKind::InvalidInput, reader.next().unwrap().unwrap_err().kind());
317        assert_eq!(io::ErrorKind::Other, reader.next().unwrap().unwrap_err().kind());
318        assert_eq!(io::ErrorKind::Other, reader.next().unwrap().unwrap_err().kind());
319    }
320}