1use std::cell::RefCell;
19use std::char;
20use std::io::{self, BufRead};
21use std::rc::Rc;
22
23const TAB_LENGTH: usize = 8;
25
26#[derive(Clone, Copy, Debug, Eq, PartialEq)]
28pub struct LineCol {
29 pub line: usize,
31
32 pub col: usize,
34}
35
36#[derive(Debug)]
37#[cfg_attr(test, derive(Eq, PartialEq))]
38pub struct CharSpan {
39 pub(crate) ch: char,
41
42 pub(crate) pos: LineCol,
44}
45
46enum Pending {
48 Unknown,
50
51 Chars(Vec<char>, usize),
54
55 Eof,
57
58 Error(Option<io::Error>),
62}
63
64pub struct CharReader<'a> {
66 reader: io::BufReader<&'a mut dyn io::Read>,
68
69 pending: Pending,
71
72 next_pos: Rc<RefCell<LineCol>>,
74}
75
76impl<'a> CharReader<'a> {
77 pub fn from(reader: &'a mut dyn io::Read) -> Self {
79 Self {
80 reader: io::BufReader::new(reader),
81 pending: Pending::Unknown,
82 next_pos: Rc::from(RefCell::from(LineCol { line: 1, col: 1 })),
83 }
84 }
85
86 fn refill_and_next(&mut self) -> Option<io::Result<CharSpan>> {
88 self.pending = {
89 let mut line = String::new();
90 match self.reader.read_line(&mut line) {
91 Ok(0) => Pending::Eof,
92 Ok(_) => Pending::Chars(line.chars().collect(), 0),
93 Err(e) => Pending::Error(Some(e)),
94 }
95 };
96 self.next()
97 }
98
99 pub(crate) fn next_pos_watcher(&self) -> Rc<RefCell<LineCol>> {
102 self.next_pos.clone()
103 }
104}
105
106impl<'a> Iterator for CharReader<'a> {
107 type Item = io::Result<CharSpan>;
108
109 fn next(&mut self) -> Option<Self::Item> {
110 match &mut self.pending {
111 Pending::Unknown => self.refill_and_next(),
112 Pending::Eof => None,
113 Pending::Chars(chars, last) => {
114 if *last == chars.len() {
115 self.refill_and_next()
116 } else {
117 let ch = chars[*last];
118 *last += 1;
119
120 let mut next_pos = self.next_pos.borrow_mut();
121 let pos = *next_pos;
122 match ch {
123 '\n' => {
124 next_pos.line += 1;
125 next_pos.col = 1;
126 }
127 '\t' => {
128 next_pos.col =
129 (next_pos.col - 1 + TAB_LENGTH) / TAB_LENGTH * TAB_LENGTH + 1;
130 }
131 _ => {
132 next_pos.col += 1;
133 }
134 }
135
136 Some(Ok(CharSpan { ch, pos }))
137 }
138 }
139 Pending::Error(e) => match e.take() {
140 Some(e) => Some(Err(e)),
141 None => Some(Err(io::Error::new(
142 io::ErrorKind::Other,
143 "Invalid state; error already consumed",
144 ))),
145 },
146 }
147 }
148}
149
150#[cfg(test)]
151mod tests {
152 use super::*;
153
154 fn cs(ch: char, line: usize, col: usize) -> CharSpan {
156 CharSpan { ch, pos: LineCol { line, col } }
157 }
158
159 #[test]
160 fn test_empty() {
161 let mut input = b"".as_ref();
162 let mut reader = CharReader::from(&mut input);
163 assert!(reader.next().is_none());
164 }
165
166 #[test]
167 fn test_multibyte_chars() {
168 let mut input = "Hi 훌리오".as_bytes();
169 let mut reader = CharReader::from(&mut input);
170 assert_eq!(cs('H', 1, 1), reader.next().unwrap().unwrap());
171 assert_eq!(cs('i', 1, 2), reader.next().unwrap().unwrap());
172 assert_eq!(cs(' ', 1, 3), reader.next().unwrap().unwrap());
173 assert_eq!(cs('훌', 1, 4), reader.next().unwrap().unwrap());
174 assert_eq!(cs('리', 1, 5), reader.next().unwrap().unwrap());
175 assert_eq!(cs('오', 1, 6), reader.next().unwrap().unwrap());
176 assert!(reader.next().is_none());
177 }
178
179 #[test]
180 fn test_consecutive_newlines() {
181 let mut input = b"a\n\nbc\n".as_ref();
182 let mut reader = CharReader::from(&mut input);
183 assert_eq!(cs('a', 1, 1), reader.next().unwrap().unwrap());
184 assert_eq!(cs('\n', 1, 2), reader.next().unwrap().unwrap());
185 assert_eq!(cs('\n', 2, 1), reader.next().unwrap().unwrap());
186 assert_eq!(cs('b', 3, 1), reader.next().unwrap().unwrap());
187 assert_eq!(cs('c', 3, 2), reader.next().unwrap().unwrap());
188 assert_eq!(cs('\n', 3, 3), reader.next().unwrap().unwrap());
189 assert!(reader.next().is_none());
190 }
191
192 #[test]
193 fn test_tabs() {
194 let mut input = "1\t9\n1234567\t8\n12345678\t9".as_bytes();
195 let mut reader = CharReader::from(&mut input);
196 assert_eq!(cs('1', 1, 1), reader.next().unwrap().unwrap());
197 assert_eq!(cs('\t', 1, 2), reader.next().unwrap().unwrap());
198 assert_eq!(cs('9', 1, 9), reader.next().unwrap().unwrap());
199 assert_eq!(cs('\n', 1, 10), reader.next().unwrap().unwrap());
200 assert_eq!(cs('1', 2, 1), reader.next().unwrap().unwrap());
201 assert_eq!(cs('2', 2, 2), reader.next().unwrap().unwrap());
202 assert_eq!(cs('3', 2, 3), reader.next().unwrap().unwrap());
203 assert_eq!(cs('4', 2, 4), reader.next().unwrap().unwrap());
204 assert_eq!(cs('5', 2, 5), reader.next().unwrap().unwrap());
205 assert_eq!(cs('6', 2, 6), reader.next().unwrap().unwrap());
206 assert_eq!(cs('7', 2, 7), reader.next().unwrap().unwrap());
207 assert_eq!(cs('\t', 2, 8), reader.next().unwrap().unwrap());
208 assert_eq!(cs('8', 2, 9), reader.next().unwrap().unwrap());
209 assert_eq!(cs('\n', 2, 10), reader.next().unwrap().unwrap());
210 assert_eq!(cs('1', 3, 1), reader.next().unwrap().unwrap());
211 assert_eq!(cs('2', 3, 2), reader.next().unwrap().unwrap());
212 assert_eq!(cs('3', 3, 3), reader.next().unwrap().unwrap());
213 assert_eq!(cs('4', 3, 4), reader.next().unwrap().unwrap());
214 assert_eq!(cs('5', 3, 5), reader.next().unwrap().unwrap());
215 assert_eq!(cs('6', 3, 6), reader.next().unwrap().unwrap());
216 assert_eq!(cs('7', 3, 7), reader.next().unwrap().unwrap());
217 assert_eq!(cs('8', 3, 8), reader.next().unwrap().unwrap());
218 assert_eq!(cs('\t', 3, 9), reader.next().unwrap().unwrap());
219 assert_eq!(cs('9', 3, 17), reader.next().unwrap().unwrap());
220 assert!(reader.next().is_none());
221 }
222
223 #[test]
224 fn test_crlf() {
225 let mut input = b"a\r\nb".as_ref();
226 let mut reader = CharReader::from(&mut input);
227 assert_eq!(cs('a', 1, 1), reader.next().unwrap().unwrap());
228 assert_eq!(cs('\r', 1, 2), reader.next().unwrap().unwrap());
229 assert_eq!(cs('\n', 1, 3), reader.next().unwrap().unwrap());
230 assert_eq!(cs('b', 2, 1), reader.next().unwrap().unwrap());
231 assert!(reader.next().is_none());
232 }
233
234 #[test]
235 fn test_past_eof_returns_eof() {
236 let mut input = b"a".as_ref();
237 let mut reader = CharReader::from(&mut input);
238 assert_eq!(cs('a', 1, 1), reader.next().unwrap().unwrap());
239 assert!(reader.next().is_none());
240 assert!(reader.next().is_none());
241 }
242
243 #[test]
244 fn test_next_pos_watcher() {
245 let mut input = "Hi".as_bytes();
246 let mut reader = CharReader::from(&mut input);
247 let next_pos_watcher = reader.next_pos_watcher();
248 assert_eq!(LineCol { line: 1, col: 1 }, *next_pos_watcher.borrow());
249 assert_eq!(cs('H', 1, 1), reader.next().unwrap().unwrap());
250 assert_eq!(LineCol { line: 1, col: 2 }, *next_pos_watcher.borrow());
251 assert_eq!(cs('i', 1, 2), reader.next().unwrap().unwrap());
252 assert_eq!(LineCol { line: 1, col: 3 }, *next_pos_watcher.borrow());
253 assert!(reader.next().is_none());
254 assert_eq!(LineCol { line: 1, col: 3 }, *next_pos_watcher.borrow());
255 }
256
257 struct FaultyReader {
263 current_read: usize,
264 fail_at_read: usize,
265 }
266
267 impl FaultyReader {
268 fn new(fail_at_read: usize) -> Self {
270 let current_read = 0;
271 FaultyReader { current_read, fail_at_read }
272 }
273 }
274
275 impl io::Read for FaultyReader {
276 #[allow(clippy::branches_sharing_code)]
277 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
278 if self.current_read == self.fail_at_read {
279 self.current_read += 1;
280 Err(io::Error::from(io::ErrorKind::InvalidInput))
281 } else {
282 self.current_read += 1;
283 buf[0] = b'1';
284 buf[1] = b'\n';
285 Ok(2)
286 }
287 }
288 }
289
290 #[test]
291 fn test_errors_prevent_further_reads() {
292 let mut reader = FaultyReader::new(2);
293 let mut reader = CharReader::from(&mut reader);
294 assert_eq!(cs('1', 1, 1), reader.next().unwrap().unwrap());
295 assert_eq!(cs('\n', 1, 2), reader.next().unwrap().unwrap());
296 assert_eq!(cs('1', 2, 1), reader.next().unwrap().unwrap());
297 assert_eq!(cs('\n', 2, 2), reader.next().unwrap().unwrap());
298 assert_eq!(io::ErrorKind::InvalidInput, reader.next().unwrap().unwrap_err().kind());
299 assert_eq!(io::ErrorKind::Other, reader.next().unwrap().unwrap_err().kind());
300 assert_eq!(io::ErrorKind::Other, reader.next().unwrap().unwrap_err().kind());
301 }
302}