1#![allow(dead_code)]
42use chisel_common::char::coords::Coords;
43use chisel_common::char::span::Span;
44use std::fmt::{Display, Formatter};
45
46pub type ScannerResult<T> = Result<T, ScannerError>;
48
49#[derive(Debug, Clone, PartialEq)]
51pub enum ScannerErrorDetails {
52 EndOfInput,
53}
54
55impl Display for ScannerErrorDetails {
57 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
58 match self {
59 ScannerErrorDetails::EndOfInput => write!(f, "end of input reached"),
60 }
61 }
62}
63
64#[derive(Debug, Clone)]
65pub struct ScannerError {
66 pub details: ScannerErrorDetails,
68 pub coords: Option<Coords>,
70}
71
72impl Display for ScannerError {
74 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
75 match self.coords {
76 Some(coords) => write!(f, "details: {}, coords: {}", self.details, coords),
77 None => write!(f, "details: {}", self.details),
78 }
79 }
80}
81
82macro_rules! scanner_error {
84 ($details: expr, $coords : expr) => {
85 Err(ScannerError {
86 details: $details,
87 coords: Some($coords),
88 })
89 };
90 ($details : expr) => {
91 Err(ScannerError {
92 details: $details,
93 coords: None,
94 })
95 };
96}
97
98pub struct CharWithCoords {
100 pub ch: char,
101 pub coords: Coords,
102}
103
104pub struct StringWithSpan {
106 pub str: String,
107 pub span: Span,
108}
109
110macro_rules! clone_char_with_coords {
112 ($src : expr) => {
113 CharWithCoords {
114 ch: $src.ch,
115 coords: $src.coords.clone(),
116 }
117 };
118}
119
120macro_rules! char_with_coords {
122 ($ch : expr, $coords : expr) => {
123 CharWithCoords {
124 ch: $ch,
125 coords: $coords,
126 }
127 };
128}
129
130#[derive()]
134pub struct Scanner<'a> {
135 source: &'a mut dyn Iterator<Item = char>,
137
138 accumulator: Vec<CharWithCoords>,
140
141 buffer: Vec<CharWithCoords>,
143
144 position: Coords,
146
147 newline: bool,
149}
150
151impl<'a> Scanner<'a> {
154 pub fn new(chars: &'a mut dyn Iterator<Item = char>) -> Self {
156 Scanner {
157 source: chars,
158 accumulator: vec![],
159 buffer: vec![],
160 position: Coords {
161 column: 0,
162 line: 1,
163 absolute: 0,
164 },
165 newline: false,
166 }
167 }
168
169 pub fn clear(&mut self) {
171 self.accumulator = vec![];
172 }
173
174 pub fn pushback(&mut self) {
177 if !self.accumulator.is_empty() {
178 self.buffer.push(self.accumulator.pop().unwrap())
179 }
180 }
181
182 pub fn position(&self) -> Coords {
184 self.position
185 }
186
187 pub fn front(&self) -> Option<CharWithCoords> {
189 return if !self.accumulator.is_empty() {
190 Some(clone_char_with_coords!(self.accumulator.last().unwrap()))
191 } else {
192 None
193 };
194 }
195
196 pub fn back(&self) -> Option<CharWithCoords> {
198 return if !self.accumulator.is_empty() {
199 Some(clone_char_with_coords!(self.accumulator.first().unwrap()))
200 } else {
201 None
202 };
203 }
204
205 pub fn advance(&mut self, skip_whitespace: bool) -> ScannerResult<()> {
207 loop {
208 match self.next() {
209 Some(cwc) => {
210 self.position.copy_from(&cwc.coords);
212
213 if skip_whitespace {
215 if !cwc.ch.is_whitespace() {
216 self.accumulator.push(cwc);
217 return Ok(());
218 }
219 } else {
220 self.accumulator.push(cwc);
221 return Ok(());
222 }
223 }
224 None => return scanner_error!(ScannerErrorDetails::EndOfInput),
225 }
226 }
227 }
228
229 pub fn try_lookahead(&mut self) -> Option<&CharWithCoords> {
231 return if !self.buffer.is_empty() {
232 self.buffer.last()
233 } else {
234 match self.next() {
235 Some(cwc) => {
236 self.buffer.push(cwc);
237 self.buffer.last()
238 }
239 None => None,
240 }
241 };
242 }
243
244 fn next(&mut self) -> Option<CharWithCoords> {
247 return if !self.buffer.is_empty() {
249 Some(self.buffer.pop().unwrap())
250 } else {
251 match self.source.next() {
253 Some(ch) => match ch {
254 '\n' => {
255 self.newline = true;
256 Some(char_with_coords!(ch, self.position.copy_increment()))
257 }
258 _ => {
259 if self.newline {
260 self.newline = false;
261 Some(char_with_coords!(
262 ch,
263 self.position.copy_increment_newline()
264 ))
265 } else {
266 Some(char_with_coords!(ch, self.position.copy_increment()))
267 }
268 }
269 },
270 None => None,
271 }
272 };
273 }
274
275 pub fn advance_n(&mut self, n: usize, skip_whitespace: bool) -> ScannerResult<()> {
279 for _ in 0..n {
280 self.advance(skip_whitespace)?;
281 }
282 Ok(())
283 }
284
285 pub fn buffer_as_string_with_span(&mut self) -> StringWithSpan {
288 return if !self.accumulator.is_empty() {
289 let mut s = String::with_capacity(self.accumulator.len());
290 self.accumulator.iter().for_each(|cwc| s.push(cwc.ch));
291 StringWithSpan {
292 str: s,
293 span: Span {
294 start: self.back().unwrap().coords,
295 end: self.front().unwrap().coords,
296 },
297 }
298 } else {
299 StringWithSpan {
300 str: String::new(),
301 span: Span {
302 start: self.position,
303 end: self.position,
304 },
305 }
306 };
307 }
308
309 pub fn buffer_as_char_array(&mut self) -> Vec<char> {
311 return if !self.accumulator.is_empty() {
312 let mut arr: Vec<char> = vec![];
313 self.accumulator.iter().for_each(|cwc| arr.push(cwc.ch));
314 arr
315 } else {
316 vec![]
317 };
318 }
319
320 pub fn buffer_as_byte_array(&self) -> Vec<u8> {
323 return if !self.accumulator.is_empty() {
324 self.accumulator.iter().map(|cwc| cwc.ch as u8).collect()
325 } else {
326 vec![]
327 };
328 }
329}
330
331#[cfg(test)]
332mod test {
333 use crate::scanner::Scanner;
334 use chisel_common::reader_from_bytes;
335 use chisel_decoders::utf8::Utf8Decoder;
336 use std::io::BufReader;
337
338 #[test]
339 fn should_create_new() {
340 let mut reader = reader_from_bytes!("{}[],:");
341 let mut decoder = Utf8Decoder::new(&mut reader);
342 let _ = Scanner::new(&mut decoder);
343 }
344
345 #[test]
346 fn should_consume_single_lines_correctly() {
347 let mut reader = reader_from_bytes!("this is a test line");
348 let mut decoder = Utf8Decoder::new(&mut reader);
349 let mut input = Scanner::new(&mut decoder);
350 let result = input.advance(true);
351 assert!(result.is_ok());
352 assert_eq!(input.front().unwrap().ch, 't');
353 for _ in 1..5 {
354 let result = input.advance(true);
355 assert!(result.is_ok());
356 }
357 assert_eq!(input.front().unwrap().ch, 'i');
358 assert_eq!(input.front().unwrap().coords.column, 6);
359
360 input.clear();
361 for _ in 1..5 {
362 let result = input.advance(false);
363 assert!(result.is_ok());
364 }
365 assert_eq!(input.front().unwrap().ch, ' ');
366 assert_eq!(input.front().unwrap().coords.column, 10)
367 }
368
369 #[test]
370 fn should_handle_pushbacks_correctly() {
371 let buffer: &[u8] = "let goodly sin and sunshine in".as_bytes();
373 let mut reader = BufReader::new(buffer);
374 let mut decoder = Utf8Decoder::new(&mut reader);
375 let mut scanner = Scanner::new(&mut decoder);
376
377 let first = scanner.advance(true);
379 assert!(first.is_ok());
380 assert_eq!(scanner.front().unwrap().ch, 'l');
381 assert_eq!(scanner.front().unwrap().coords.column, 1);
382
383 assert!(scanner.advance(true).is_ok());
385
386 scanner.pushback();
388
389 assert_eq!(scanner.front().unwrap().ch, 'l');
391
392 let _ = scanner.advance(true);
394 assert_eq!(scanner.front().unwrap().ch, 'e');
395
396 let buffer_contents = scanner.buffer_as_string_with_span();
398 assert_eq!(buffer_contents.str, String::from("le"));
399
400 scanner.clear();
402
403 assert!(scanner.buffer_as_string_with_span().str.is_empty());
405
406 assert!(scanner.advance(true).is_ok());
408
409 assert_eq!(scanner.front().unwrap().ch, 't');
411 assert_eq!(scanner.front().unwrap().coords.column, 3);
412 }
413}