1use std::char;
19use std::fmt;
20use std::io::{self, BufRead};
21
22const TAB_LENGTH: usize = 8;
24
25#[derive(Clone, Copy, Debug, Eq, PartialEq)]
27pub struct LineCol {
28 pub line: usize,
30
31 pub col: usize,
33}
34
35impl fmt::Display for LineCol {
36 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
37 write!(f, "{}:{}", self.line, self.col)
38 }
39}
40
41#[derive(Debug)]
42#[cfg_attr(test, derive(Eq, PartialEq))]
43pub struct CharSpan {
44 pub(crate) ch: char,
46
47 pub(crate) pos: LineCol,
49}
50
51enum Pending {
53 Unknown,
55
56 Chars(Vec<char>, usize),
59
60 Eof,
62
63 Error(Option<io::Error>),
67}
68
69pub struct CharReader<'a> {
71 reader: io::BufReader<&'a mut dyn io::Read>,
73
74 pending: Pending,
76
77 peeked: Option<Option<io::Result<CharSpan>>>,
80
81 next_pos: LineCol,
83}
84
85impl<'a> CharReader<'a> {
86 pub fn from(reader: &'a mut dyn io::Read) -> Self {
88 Self {
89 reader: io::BufReader::new(reader),
90 pending: Pending::Unknown,
91 peeked: None,
92 next_pos: LineCol { line: 1, col: 1 },
93 }
94 }
95
96 fn refill_and_next(&mut self) -> Option<io::Result<CharSpan>> {
98 self.pending = {
99 let mut line = String::new();
100 match self.reader.read_line(&mut line) {
101 Ok(0) => Pending::Eof,
102 Ok(_) => Pending::Chars(line.chars().collect(), 0),
103 Err(e) => Pending::Error(Some(e)),
104 }
105 };
106 self.next()
107 }
108
109 pub(crate) fn peek(&mut self) -> Option<&io::Result<CharSpan>> {
111 if self.peeked.is_none() {
112 let next = self.next();
113 self.peeked.replace(next);
114 }
115 self.peeked.as_ref().unwrap().as_ref()
116 }
117
118 pub(crate) fn next_pos(&self) -> LineCol {
121 self.next_pos
122 }
123}
124
125impl Iterator for CharReader<'_> {
126 type Item = io::Result<CharSpan>;
127
128 fn next(&mut self) -> Option<Self::Item> {
129 if let Some(peeked) = self.peeked.take() {
130 return peeked;
131 }
132
133 match &mut self.pending {
134 Pending::Unknown => self.refill_and_next(),
135 Pending::Eof => None,
136 Pending::Chars(chars, last) => {
137 if *last == chars.len() {
138 self.refill_and_next()
139 } else {
140 let ch = chars[*last];
141 *last += 1;
142
143 let pos = self.next_pos;
144 match ch {
145 '\n' => {
146 self.next_pos.line += 1;
147 self.next_pos.col = 1;
148 }
149 '\t' => {
150 self.next_pos.col =
151 (self.next_pos.col - 1 + TAB_LENGTH) / TAB_LENGTH * TAB_LENGTH + 1;
152 }
153 _ => {
154 self.next_pos.col += 1;
155 }
156 }
157
158 Some(Ok(CharSpan { ch, pos }))
159 }
160 }
161 Pending::Error(e) => match e.take() {
162 Some(e) => Some(Err(e)),
163 None => Some(Err(io::Error::other("Invalid state; error already consumed"))),
164 },
165 }
166 }
167}
168
169#[cfg(test)]
170mod tests {
171 use super::*;
172
173 fn cs(ch: char, line: usize, col: usize) -> CharSpan {
175 CharSpan { ch, pos: LineCol { line, col } }
176 }
177
178 #[test]
179 fn test_empty() {
180 let mut input = b"".as_ref();
181 let mut reader = CharReader::from(&mut input);
182 assert!(reader.next().is_none());
183 }
184
185 #[test]
186 fn test_multibyte_chars() {
187 let mut input = "Hi 훌리오".as_bytes();
188 let mut reader = CharReader::from(&mut input);
189 assert_eq!(cs('H', 1, 1), reader.next().unwrap().unwrap());
190 assert_eq!(cs('i', 1, 2), reader.next().unwrap().unwrap());
191 assert_eq!(cs(' ', 1, 3), reader.next().unwrap().unwrap());
192 assert_eq!(cs('훌', 1, 4), reader.next().unwrap().unwrap());
193 assert_eq!(cs('리', 1, 5), reader.next().unwrap().unwrap());
194 assert_eq!(cs('오', 1, 6), reader.next().unwrap().unwrap());
195 assert!(reader.next().is_none());
196 }
197
198 #[test]
199 fn test_consecutive_newlines() {
200 let mut input = b"a\n\nbc\n".as_ref();
201 let mut reader = CharReader::from(&mut input);
202 assert_eq!(cs('a', 1, 1), reader.next().unwrap().unwrap());
203 assert_eq!(cs('\n', 1, 2), reader.next().unwrap().unwrap());
204 assert_eq!(cs('\n', 2, 1), reader.next().unwrap().unwrap());
205 assert_eq!(cs('b', 3, 1), reader.next().unwrap().unwrap());
206 assert_eq!(cs('c', 3, 2), reader.next().unwrap().unwrap());
207 assert_eq!(cs('\n', 3, 3), reader.next().unwrap().unwrap());
208 assert!(reader.next().is_none());
209 }
210
211 #[test]
212 fn test_tabs() {
213 let mut input = "1\t9\n1234567\t8\n12345678\t9".as_bytes();
214 let mut reader = CharReader::from(&mut input);
215 assert_eq!(cs('1', 1, 1), reader.next().unwrap().unwrap());
216 assert_eq!(cs('\t', 1, 2), reader.next().unwrap().unwrap());
217 assert_eq!(cs('9', 1, 9), reader.next().unwrap().unwrap());
218 assert_eq!(cs('\n', 1, 10), reader.next().unwrap().unwrap());
219 assert_eq!(cs('1', 2, 1), reader.next().unwrap().unwrap());
220 assert_eq!(cs('2', 2, 2), reader.next().unwrap().unwrap());
221 assert_eq!(cs('3', 2, 3), reader.next().unwrap().unwrap());
222 assert_eq!(cs('4', 2, 4), reader.next().unwrap().unwrap());
223 assert_eq!(cs('5', 2, 5), reader.next().unwrap().unwrap());
224 assert_eq!(cs('6', 2, 6), reader.next().unwrap().unwrap());
225 assert_eq!(cs('7', 2, 7), reader.next().unwrap().unwrap());
226 assert_eq!(cs('\t', 2, 8), reader.next().unwrap().unwrap());
227 assert_eq!(cs('8', 2, 9), reader.next().unwrap().unwrap());
228 assert_eq!(cs('\n', 2, 10), reader.next().unwrap().unwrap());
229 assert_eq!(cs('1', 3, 1), reader.next().unwrap().unwrap());
230 assert_eq!(cs('2', 3, 2), reader.next().unwrap().unwrap());
231 assert_eq!(cs('3', 3, 3), reader.next().unwrap().unwrap());
232 assert_eq!(cs('4', 3, 4), reader.next().unwrap().unwrap());
233 assert_eq!(cs('5', 3, 5), reader.next().unwrap().unwrap());
234 assert_eq!(cs('6', 3, 6), reader.next().unwrap().unwrap());
235 assert_eq!(cs('7', 3, 7), reader.next().unwrap().unwrap());
236 assert_eq!(cs('8', 3, 8), reader.next().unwrap().unwrap());
237 assert_eq!(cs('\t', 3, 9), reader.next().unwrap().unwrap());
238 assert_eq!(cs('9', 3, 17), reader.next().unwrap().unwrap());
239 assert!(reader.next().is_none());
240 }
241
242 #[test]
243 fn test_crlf() {
244 let mut input = b"a\r\nb".as_ref();
245 let mut reader = CharReader::from(&mut input);
246 assert_eq!(cs('a', 1, 1), reader.next().unwrap().unwrap());
247 assert_eq!(cs('\r', 1, 2), reader.next().unwrap().unwrap());
248 assert_eq!(cs('\n', 1, 3), reader.next().unwrap().unwrap());
249 assert_eq!(cs('b', 2, 1), reader.next().unwrap().unwrap());
250 assert!(reader.next().is_none());
251 }
252
253 #[test]
254 fn test_past_eof_returns_eof() {
255 let mut input = b"a".as_ref();
256 let mut reader = CharReader::from(&mut input);
257 assert_eq!(cs('a', 1, 1), reader.next().unwrap().unwrap());
258 assert!(reader.next().is_none());
259 assert!(reader.next().is_none());
260 }
261
262 #[test]
263 fn test_next_pos() {
264 let mut input = "Hi".as_bytes();
265 let mut reader = CharReader::from(&mut input);
266 assert_eq!(LineCol { line: 1, col: 1 }, reader.next_pos());
267 assert_eq!(cs('H', 1, 1), reader.next().unwrap().unwrap());
268 assert_eq!(LineCol { line: 1, col: 2 }, reader.next_pos());
269 assert_eq!(cs('i', 1, 2), reader.next().unwrap().unwrap());
270 assert_eq!(LineCol { line: 1, col: 3 }, reader.next_pos());
271 assert!(reader.next().is_none());
272 assert_eq!(LineCol { line: 1, col: 3 }, reader.next_pos());
273 }
274
275 struct FaultyReader {
281 current_read: usize,
282 fail_at_read: usize,
283 }
284
285 impl FaultyReader {
286 fn new(fail_at_read: usize) -> Self {
288 let current_read = 0;
289 FaultyReader { current_read, fail_at_read }
290 }
291 }
292
293 impl io::Read for FaultyReader {
294 #[allow(clippy::branches_sharing_code)]
295 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
296 if self.current_read == self.fail_at_read {
297 self.current_read += 1;
298 Err(io::Error::from(io::ErrorKind::InvalidInput))
299 } else {
300 self.current_read += 1;
301 buf[0] = b'1';
302 buf[1] = b'\n';
303 Ok(2)
304 }
305 }
306 }
307
308 #[test]
309 fn test_errors_prevent_further_reads() {
310 let mut reader = FaultyReader::new(2);
311 let mut reader = CharReader::from(&mut reader);
312 assert_eq!(cs('1', 1, 1), reader.next().unwrap().unwrap());
313 assert_eq!(cs('\n', 1, 2), reader.next().unwrap().unwrap());
314 assert_eq!(cs('1', 2, 1), reader.next().unwrap().unwrap());
315 assert_eq!(cs('\n', 2, 2), reader.next().unwrap().unwrap());
316 assert_eq!(io::ErrorKind::InvalidInput, reader.next().unwrap().unwrap_err().kind());
317 assert_eq!(io::ErrorKind::Other, reader.next().unwrap().unwrap_err().kind());
318 assert_eq!(io::ErrorKind::Other, reader.next().unwrap().unwrap_err().kind());
319 }
320}