endbasic_core/
reader.rs

1// EndBASIC
2// Copyright 2020 Julio Merino
3//
4// Licensed under the Apache License, Version 2.0 (the "License"); you may not
5// use this file except in compliance with the License.  You may obtain a copy
6// of the License at:
7//
8//     http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
13// License for the specific language governing permissions and limitations
14// under the License.
15
16//! Character-based reader for an input stream with position tracking.
17
18use std::cell::RefCell;
19use std::char;
20use std::io::{self, BufRead};
21use std::rc::Rc;
22
23/// Tab length used to compute the current position within a line when encountering a tab character.
24const TAB_LENGTH: usize = 8;
25
26/// Representation of a position within a stream.
27#[derive(Clone, Copy, Debug, Eq, PartialEq)]
28pub struct LineCol {
29    /// Line number.
30    pub line: usize,
31
32    /// Column number.
33    pub col: usize,
34}
35
36#[derive(Debug)]
37#[cfg_attr(test, derive(Eq, PartialEq))]
38pub struct CharSpan {
39    /// Character in this span.
40    pub(crate) ch: char,
41
42    /// Position where this character starts.
43    pub(crate) pos: LineCol,
44}
45
46/// Possible types of buffered data in the reader.
47enum Pending {
48    /// Initial state of the reader where no data has been buffered yet.
49    Unknown,
50
51    /// Intermediate state where the reader holds a line of text, broken down by character, and an
52    /// index to the character to return on the next read.
53    Chars(Vec<char>, usize),
54
55    /// Terminal state of the reader due to an EOF condition.
56    Eof,
57
58    /// Terminal state of the reader due to an error.  If not `None`, this contains the original
59    /// error that caused the problem.  Otherwise, that error was already consumed (and thus
60    /// reaching this case indicates a problem in the caller) so we return an invalid state.
61    Error(Option<io::Error>),
62}
63
64/// Wraps `io::Read` to offer an iterator over characters.
65pub struct CharReader<'a> {
66    /// The wrapper reader from which to reach characters.
67    reader: io::BufReader<&'a mut dyn io::Read>,
68
69    /// Current state of any buffered data.
70    pending: Pending,
71
72    /// Line and column number of the next character to be read.
73    next_pos: Rc<RefCell<LineCol>>,
74}
75
76impl<'a> CharReader<'a> {
77    /// Constructs a new character reader from an `io::Read`.
78    pub fn from(reader: &'a mut dyn io::Read) -> Self {
79        Self {
80            reader: io::BufReader::new(reader),
81            pending: Pending::Unknown,
82            next_pos: Rc::from(RefCell::from(LineCol { line: 1, col: 1 })),
83        }
84    }
85
86    /// Replenishes `pending` with the next line to process.
87    fn refill_and_next(&mut self) -> Option<io::Result<CharSpan>> {
88        self.pending = {
89            let mut line = String::new();
90            match self.reader.read_line(&mut line) {
91                Ok(0) => Pending::Eof,
92                Ok(_) => Pending::Chars(line.chars().collect(), 0),
93                Err(e) => Pending::Error(Some(e)),
94            }
95        };
96        self.next()
97    }
98
99    /// Obtains a view of the next position observed by this reader, which is necessary to compute
100    /// the location of EOF when the iterator is fully consumed.
101    pub(crate) fn next_pos_watcher(&self) -> Rc<RefCell<LineCol>> {
102        self.next_pos.clone()
103    }
104}
105
106impl<'a> Iterator for CharReader<'a> {
107    type Item = io::Result<CharSpan>;
108
109    fn next(&mut self) -> Option<Self::Item> {
110        match &mut self.pending {
111            Pending::Unknown => self.refill_and_next(),
112            Pending::Eof => None,
113            Pending::Chars(chars, last) => {
114                if *last == chars.len() {
115                    self.refill_and_next()
116                } else {
117                    let ch = chars[*last];
118                    *last += 1;
119
120                    let mut next_pos = self.next_pos.borrow_mut();
121                    let pos = *next_pos;
122                    match ch {
123                        '\n' => {
124                            next_pos.line += 1;
125                            next_pos.col = 1;
126                        }
127                        '\t' => {
128                            next_pos.col =
129                                (next_pos.col - 1 + TAB_LENGTH) / TAB_LENGTH * TAB_LENGTH + 1;
130                        }
131                        _ => {
132                            next_pos.col += 1;
133                        }
134                    }
135
136                    Some(Ok(CharSpan { ch, pos }))
137                }
138            }
139            Pending::Error(e) => match e.take() {
140                Some(e) => Some(Err(e)),
141                None => Some(Err(io::Error::new(
142                    io::ErrorKind::Other,
143                    "Invalid state; error already consumed",
144                ))),
145            },
146        }
147    }
148}
149
150#[cfg(test)]
151mod tests {
152    use super::*;
153
154    /// Syntactic sugar to instantiate a `CharSpan` for testing.
155    fn cs(ch: char, line: usize, col: usize) -> CharSpan {
156        CharSpan { ch, pos: LineCol { line, col } }
157    }
158
159    #[test]
160    fn test_empty() {
161        let mut input = b"".as_ref();
162        let mut reader = CharReader::from(&mut input);
163        assert!(reader.next().is_none());
164    }
165
166    #[test]
167    fn test_multibyte_chars() {
168        let mut input = "Hi 훌리오".as_bytes();
169        let mut reader = CharReader::from(&mut input);
170        assert_eq!(cs('H', 1, 1), reader.next().unwrap().unwrap());
171        assert_eq!(cs('i', 1, 2), reader.next().unwrap().unwrap());
172        assert_eq!(cs(' ', 1, 3), reader.next().unwrap().unwrap());
173        assert_eq!(cs('훌', 1, 4), reader.next().unwrap().unwrap());
174        assert_eq!(cs('리', 1, 5), reader.next().unwrap().unwrap());
175        assert_eq!(cs('오', 1, 6), reader.next().unwrap().unwrap());
176        assert!(reader.next().is_none());
177    }
178
179    #[test]
180    fn test_consecutive_newlines() {
181        let mut input = b"a\n\nbc\n".as_ref();
182        let mut reader = CharReader::from(&mut input);
183        assert_eq!(cs('a', 1, 1), reader.next().unwrap().unwrap());
184        assert_eq!(cs('\n', 1, 2), reader.next().unwrap().unwrap());
185        assert_eq!(cs('\n', 2, 1), reader.next().unwrap().unwrap());
186        assert_eq!(cs('b', 3, 1), reader.next().unwrap().unwrap());
187        assert_eq!(cs('c', 3, 2), reader.next().unwrap().unwrap());
188        assert_eq!(cs('\n', 3, 3), reader.next().unwrap().unwrap());
189        assert!(reader.next().is_none());
190    }
191
192    #[test]
193    fn test_tabs() {
194        let mut input = "1\t9\n1234567\t8\n12345678\t9".as_bytes();
195        let mut reader = CharReader::from(&mut input);
196        assert_eq!(cs('1', 1, 1), reader.next().unwrap().unwrap());
197        assert_eq!(cs('\t', 1, 2), reader.next().unwrap().unwrap());
198        assert_eq!(cs('9', 1, 9), reader.next().unwrap().unwrap());
199        assert_eq!(cs('\n', 1, 10), reader.next().unwrap().unwrap());
200        assert_eq!(cs('1', 2, 1), reader.next().unwrap().unwrap());
201        assert_eq!(cs('2', 2, 2), reader.next().unwrap().unwrap());
202        assert_eq!(cs('3', 2, 3), reader.next().unwrap().unwrap());
203        assert_eq!(cs('4', 2, 4), reader.next().unwrap().unwrap());
204        assert_eq!(cs('5', 2, 5), reader.next().unwrap().unwrap());
205        assert_eq!(cs('6', 2, 6), reader.next().unwrap().unwrap());
206        assert_eq!(cs('7', 2, 7), reader.next().unwrap().unwrap());
207        assert_eq!(cs('\t', 2, 8), reader.next().unwrap().unwrap());
208        assert_eq!(cs('8', 2, 9), reader.next().unwrap().unwrap());
209        assert_eq!(cs('\n', 2, 10), reader.next().unwrap().unwrap());
210        assert_eq!(cs('1', 3, 1), reader.next().unwrap().unwrap());
211        assert_eq!(cs('2', 3, 2), reader.next().unwrap().unwrap());
212        assert_eq!(cs('3', 3, 3), reader.next().unwrap().unwrap());
213        assert_eq!(cs('4', 3, 4), reader.next().unwrap().unwrap());
214        assert_eq!(cs('5', 3, 5), reader.next().unwrap().unwrap());
215        assert_eq!(cs('6', 3, 6), reader.next().unwrap().unwrap());
216        assert_eq!(cs('7', 3, 7), reader.next().unwrap().unwrap());
217        assert_eq!(cs('8', 3, 8), reader.next().unwrap().unwrap());
218        assert_eq!(cs('\t', 3, 9), reader.next().unwrap().unwrap());
219        assert_eq!(cs('9', 3, 17), reader.next().unwrap().unwrap());
220        assert!(reader.next().is_none());
221    }
222
223    #[test]
224    fn test_crlf() {
225        let mut input = b"a\r\nb".as_ref();
226        let mut reader = CharReader::from(&mut input);
227        assert_eq!(cs('a', 1, 1), reader.next().unwrap().unwrap());
228        assert_eq!(cs('\r', 1, 2), reader.next().unwrap().unwrap());
229        assert_eq!(cs('\n', 1, 3), reader.next().unwrap().unwrap());
230        assert_eq!(cs('b', 2, 1), reader.next().unwrap().unwrap());
231        assert!(reader.next().is_none());
232    }
233
234    #[test]
235    fn test_past_eof_returns_eof() {
236        let mut input = b"a".as_ref();
237        let mut reader = CharReader::from(&mut input);
238        assert_eq!(cs('a', 1, 1), reader.next().unwrap().unwrap());
239        assert!(reader.next().is_none());
240        assert!(reader.next().is_none());
241    }
242
243    #[test]
244    fn test_next_pos_watcher() {
245        let mut input = "Hi".as_bytes();
246        let mut reader = CharReader::from(&mut input);
247        let next_pos_watcher = reader.next_pos_watcher();
248        assert_eq!(LineCol { line: 1, col: 1 }, *next_pos_watcher.borrow());
249        assert_eq!(cs('H', 1, 1), reader.next().unwrap().unwrap());
250        assert_eq!(LineCol { line: 1, col: 2 }, *next_pos_watcher.borrow());
251        assert_eq!(cs('i', 1, 2), reader.next().unwrap().unwrap());
252        assert_eq!(LineCol { line: 1, col: 3 }, *next_pos_watcher.borrow());
253        assert!(reader.next().is_none());
254        assert_eq!(LineCol { line: 1, col: 3 }, *next_pos_watcher.borrow());
255    }
256
257    /// A reader that generates an error only on the Nth read operation.
258    ///
259    /// All other reads return a line with a single character in them with the assumption that the
260    /// `CharReader` issues a single read per line.  If that assumption changes, the tests here may
261    /// start failing.
262    struct FaultyReader {
263        current_read: usize,
264        fail_at_read: usize,
265    }
266
267    impl FaultyReader {
268        /// Creates a new reader that will fail at the `fail_at_read`th operation.
269        fn new(fail_at_read: usize) -> Self {
270            let current_read = 0;
271            FaultyReader { current_read, fail_at_read }
272        }
273    }
274
275    impl io::Read for FaultyReader {
276        #[allow(clippy::branches_sharing_code)]
277        fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
278            if self.current_read == self.fail_at_read {
279                self.current_read += 1;
280                Err(io::Error::from(io::ErrorKind::InvalidInput))
281            } else {
282                self.current_read += 1;
283                buf[0] = b'1';
284                buf[1] = b'\n';
285                Ok(2)
286            }
287        }
288    }
289
290    #[test]
291    fn test_errors_prevent_further_reads() {
292        let mut reader = FaultyReader::new(2);
293        let mut reader = CharReader::from(&mut reader);
294        assert_eq!(cs('1', 1, 1), reader.next().unwrap().unwrap());
295        assert_eq!(cs('\n', 1, 2), reader.next().unwrap().unwrap());
296        assert_eq!(cs('1', 2, 1), reader.next().unwrap().unwrap());
297        assert_eq!(cs('\n', 2, 2), reader.next().unwrap().unwrap());
298        assert_eq!(io::ErrorKind::InvalidInput, reader.next().unwrap().unwrap_err().kind());
299        assert_eq!(io::ErrorKind::Other, reader.next().unwrap().unwrap_err().kind());
300        assert_eq!(io::ErrorKind::Other, reader.next().unwrap().unwrap_err().kind());
301    }
302}