1#![allow(unused_assignments)]
3#![allow(unused_variables)]
4#![allow(unreachable_code)]
5#![allow(dead_code)]
6#![allow(unused_imports)]
7
8use std::fmt::{Display, Formatter};
9
10use crate::json::numerics::LazyNumeric;
11use chisel_common::char::coords::Coords;
12use chisel_common::char::span::Span;
13
14use crate::json::tokens::{PackedToken, Token};
15use crate::scanner::{CharWithCoords, Scanner};
16
17pub type LexerResult<T> = Result<T, LexerError>;
19
20#[derive(Debug, Clone, PartialEq)]
22pub enum LexerErrorDetails {
23 InvalidFile,
25 ZeroLengthInput,
27 EndOfInput,
29 StreamFailure,
32 NonUtf8InputDetected,
34 UnexpectedToken(Token),
37 PairExpected,
39 InvalidRootObject,
41 InvalidObject,
43 InvalidArray,
45 InvalidCharacter(char),
47 MatchFailed(String, String),
49 InvalidNumericRepresentation(String),
51 InvalidEscapeSequence(String),
53 InvalidUnicodeEscapeSequence(String),
55}
56
57impl Display for LexerErrorDetails {
58 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
59 match self {
60 LexerErrorDetails::InvalidFile => write!(f, "invalid file specified"),
61 LexerErrorDetails::ZeroLengthInput => write!(f, "zero length input"),
62 LexerErrorDetails::EndOfInput => write!(f, "end of input reached"),
63 LexerErrorDetails::StreamFailure => write!(f, "failure in the underlying stream"),
64 LexerErrorDetails::NonUtf8InputDetected => write!(f, "non-UTF8 input"),
65 LexerErrorDetails::UnexpectedToken(token) => {
66 write!(f, "unexpected token found: {}", token)
67 }
68 LexerErrorDetails::PairExpected => {
69 write!(f, "pair expected, something else was found")
70 }
71 LexerErrorDetails::InvalidRootObject => write!(f, "invalid JSON"),
72 LexerErrorDetails::InvalidObject => write!(f, "invalid object"),
73 LexerErrorDetails::InvalidArray => write!(f, "invalid array"),
74 LexerErrorDetails::InvalidCharacter(ch) => write!(f, "invalid character: \'{}\'", ch),
75 LexerErrorDetails::MatchFailed(first, second) => write!(
76 f,
77 "a match failed. Looking for \"{}\", found \"{}\"",
78 first, second
79 ),
80 LexerErrorDetails::InvalidNumericRepresentation(repr) => {
81 write!(f, "invalid number representation: \"{}\"", repr)
82 }
83 LexerErrorDetails::InvalidEscapeSequence(seq) => {
84 write!(f, "invalid escape sequence: \"{}\"", seq)
85 }
86 LexerErrorDetails::InvalidUnicodeEscapeSequence(seq) => {
87 write!(f, "invalid unicode escape sequence: \"{}\"", seq)
88 }
89 }
90 }
91}
92
93#[derive(Debug, Clone)]
95pub struct LexerError {
96 pub details: LexerErrorDetails,
98 pub coords: Option<Coords>,
100}
101
102impl Display for LexerError {
103 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
104 if self.coords.is_some() {
105 write!(
106 f,
107 "details: {}, coords: {}",
108 self.details,
109 self.coords.unwrap()
110 )
111 } else {
112 write!(f, "details: {}", self.details)
113 }
114 }
115}
116
117macro_rules! wrapped_lexer_error {
119 ($details: expr, $coords : expr) => {
120 Err(LexerError {
121 details: $details,
122 coords: Some($coords),
123 })
124 };
125 ($details: expr) => {
126 Err(LexerError {
127 details: $details,
128 coords: None,
129 })
130 };
131}
132
133macro_rules! lexer_error {
135 ($details: expr, $coords: expr) => {
136 LexerError {
137 details: $details,
138 coords: Some($coords),
139 }
140 };
141 ($details: expr) => {
142 LexerError {
143 details: $details,
144 coords: None,
145 }
146 };
147}
148
149const NULL_ASCII: [u8; 4] = [0x6e, 0x75, 0x6c, 0x6c];
151const TRUE_ASCII: [u8; 4] = [0x74, 0x72, 0x75, 0x65];
153const FALSE_ASCII: [u8; 5] = [0x66, 0x61, 0x6c, 0x73, 0x65];
155
156macro_rules! packed_token {
157 ($t:expr, $s:expr, $e:expr) => {
158 Ok(($t, Span { start: $s, end: $e }))
159 };
160 ($t:expr, $s:expr) => {
161 Ok(($t, Span { start: $s, end: $s }))
162 };
163}
164
165macro_rules! match_zero {
167 () => {
168 '0'
169 };
170}
171
172macro_rules! match_minus {
174 () => {
175 '-'
176 };
177}
178
179macro_rules! match_plus_minus {
181 () => {
182 '+' | '-'
183 };
184}
185
186macro_rules! match_digit {
188 () => {
189 '0'..='9'
190 };
191}
192
193macro_rules! match_non_zero_digit {
195 () => {
196 '1'..='9'
197 };
198}
199
200macro_rules! match_exponent {
202 () => {
203 'e' | 'E'
204 };
205}
206
207macro_rules! match_period {
209 () => {
210 '.'
211 };
212}
213
214macro_rules! match_numeric_terminator {
216 () => {
217 ']' | '}' | ','
218 };
219}
220
221macro_rules! match_escape {
223 () => {
224 '\\'
225 };
226}
227
228macro_rules! match_escape_non_unicode_suffix {
230 () => {
231 'n' | 't' | 'r' | '\\' | '/' | 'b' | 'f' | '\"'
232 };
233}
234
235macro_rules! match_escape_unicode_suffix {
237 () => {
238 'u'
239 };
240}
241
242macro_rules! match_quote {
244 () => {
245 '\"'
246 };
247}
248
249macro_rules! match_newline {
251 () => {
252 '\n'
253 };
254}
255
256#[inline]
259fn adjusted_error_coords(start_coords: &Coords, source: &[u8], target: &[u8]) -> Coords {
260 let mut err_coords = Coords::from_coords(start_coords);
261 for i in 0..=target.len() {
262 if !source[i].is_ascii_whitespace() {
263 if source[i] != target[i] {
264 break;
265 } else {
266 err_coords.increment();
267 }
268 } else {
269 err_coords.increment();
270 }
271 }
272 err_coords
273}
274
275pub struct Lexer<'a> {
276 input: Scanner<'a>,
278}
279
280impl<'a> Lexer<'a> {
281 pub fn new(chars: &'a mut impl Iterator<Item = char>) -> Self {
282 Lexer {
283 input: Scanner::new(chars),
284 }
285 }
286
287 fn front(&self) -> Option<CharWithCoords> {
289 self.input.front()
290 }
291
292 fn back(&self) -> Option<CharWithCoords> {
294 self.input.back()
295 }
296
297 #[inline]
299 fn front_char(&self) -> char {
300 self.input.front().unwrap().ch
301 }
302
303 #[inline]
305 fn back_char(&self) -> char {
306 self.input.back().unwrap().ch
307 }
308
309 #[inline]
311 fn front_coords(&self) -> Coords {
312 self.input.front().unwrap().coords
313 }
314
315 #[inline]
317 fn back_coords(&self) -> Coords {
318 self.input.back().unwrap().coords
319 }
320
321 #[inline]
323 fn absolute_position(&self) -> Coords {
324 self.input.position()
325 }
326
327 #[inline]
329 fn advance(&mut self, skip_whitespace: bool) -> LexerResult<()> {
330 self.input
331 .advance(skip_whitespace)
332 .map_err(|e| lexer_error!(LexerErrorDetails::EndOfInput))
333 }
334
335 fn advance_n(&mut self, n: usize, skip_whitespace: bool) -> LexerResult<()> {
337 self.input
338 .advance_n(n, skip_whitespace)
339 .map_err(|e| lexer_error!(LexerErrorDetails::EndOfInput))
340 }
341
342 #[inline]
344 fn current_string(&mut self) -> String {
345 self.input.buffer_as_string_with_span().str
346 }
347
348 #[inline]
350 fn current_chars(&mut self) -> Vec<char> {
351 self.input.buffer_as_char_array()
352 }
353
354 #[inline]
356 fn current_bytes(&mut self) -> Vec<u8> {
357 self.input.buffer_as_byte_array()
358 }
359
360 pub fn consume(&mut self) -> LexerResult<PackedToken> {
362 self.input.clear();
363 match self.advance(true) {
364 Ok(_) => match self.input.front() {
365 Some(CharWithCoords { ch: '{', coords }) => {
366 packed_token!(Token::StartObject, coords)
367 }
368 Some(CharWithCoords { ch: '}', coords }) => packed_token!(Token::EndObject, coords),
369 Some(CharWithCoords { ch: '[', coords }) => {
370 packed_token!(Token::StartArray, coords)
371 }
372 Some(CharWithCoords { ch: ']', coords }) => packed_token!(Token::EndArray, coords),
373 Some(CharWithCoords { ch: ':', coords }) => packed_token!(Token::Colon, coords),
374 Some(CharWithCoords { ch: ',', coords }) => packed_token!(Token::Comma, coords),
375 Some(CharWithCoords { ch: '\"', coords }) => self.match_string(),
376 Some(CharWithCoords { ch: 'n', coords }) => self.match_null(),
377 Some(CharWithCoords { ch: 't', coords }) => self.match_true(),
378 Some(CharWithCoords { ch: 'f', coords }) => self.match_false(),
379 Some(CharWithCoords { ch: '-', coords }) => self.match_number(),
380 Some(CharWithCoords { ch: d, coords }) if d.is_ascii_digit() => self.match_number(),
381 Some(CharWithCoords { ch, coords }) => wrapped_lexer_error!(
382 LexerErrorDetails::InvalidCharacter(ch.clone()),
383 coords.clone()
384 ),
385 None => {
386 wrapped_lexer_error!(LexerErrorDetails::EndOfInput, self.absolute_position())
387 }
388 },
389 Err(err) => match err.details {
390 LexerErrorDetails::EndOfInput => {
391 packed_token!(Token::EndOfInput, self.input.position())
392 }
393 _ => match err.coords {
394 Some(coords) => wrapped_lexer_error!(err.details, coords),
395 None => wrapped_lexer_error!(err.details, self.absolute_position()),
396 },
397 },
398 }
399 }
400
401 #[inline]
403 fn match_string(&mut self) -> LexerResult<PackedToken> {
404 loop {
405 match self.advance(false) {
406 Ok(_) => match self.front_char() {
407 match_escape!() => match self.input.advance(false) {
408 Ok(_) => match self.front_char() {
409 match_escape_non_unicode_suffix!() => (),
410 match_escape_unicode_suffix!() => self.check_unicode_sequence()?,
411 _ => {
412 return wrapped_lexer_error!(
413 LexerErrorDetails::InvalidEscapeSequence(self.current_string()),
414 self.back_coords()
415 );
416 }
417 },
418 Err(err) => {
419 return wrapped_lexer_error!(
420 LexerErrorDetails::EndOfInput,
421 err.coords.unwrap()
422 );
423 }
424 },
425 match_quote!() => {
426 return packed_token!(
427 Token::Str(self.current_string()),
428 self.back_coords(),
429 self.front_coords()
430 );
431 }
432 _ => (),
433 },
434 Err(err) => {
435 return match err.coords {
436 Some(_) => {
437 wrapped_lexer_error!(err.details, err.coords.unwrap())
438 }
439 None => wrapped_lexer_error!(err.details, self.absolute_position()),
440 }
441 }
442 }
443 }
444 }
445
446 #[inline]
448 fn check_unicode_sequence(&mut self) -> LexerResult<()> {
449 let start_position = self.absolute_position();
450 for i in 1..=4 {
451 match self.advance(false) {
452 Ok(_) => {
453 if !self.front_char().is_ascii_hexdigit() {
454 return wrapped_lexer_error!(
455 LexerErrorDetails::InvalidUnicodeEscapeSequence(self.current_string()),
456 start_position
457 );
458 }
459 }
460 Err(e) => {
461 return wrapped_lexer_error!(
462 LexerErrorDetails::EndOfInput,
463 self.absolute_position()
464 );
465 }
466 }
467 }
468 Ok(())
469 }
470
471 #[inline]
482 fn match_number(&mut self) -> LexerResult<PackedToken> {
483 let mut have_exponent = false;
484 let mut have_decimal = false;
485
486 match self.match_valid_number_prefix() {
487 Ok(integral) => {
488 have_decimal = !integral;
489 loop {
490 match self.advance(false) {
491 Ok(_) => match self.front_char() {
492 match_digit!() => (),
493 match_exponent!() => {
494 if !have_exponent {
495 self.check_following_exponent()?;
496 have_exponent = true;
497 } else {
498 return wrapped_lexer_error!(
499 LexerErrorDetails::InvalidNumericRepresentation(
500 self.current_string()
501 ),
502 self.back_coords()
503 );
504 }
505 }
506 match_period!() => {
507 if !have_decimal {
508 have_decimal = true;
509 } else {
510 return wrapped_lexer_error!(
511 LexerErrorDetails::InvalidNumericRepresentation(
512 self.current_string()
513 ),
514 self.back_coords()
515 );
516 }
517 }
518 match_numeric_terminator!() => {
519 self.input.pushback();
520 break;
521 }
522 ch if ch.is_ascii_whitespace() => {
523 self.input.pushback();
524 break;
525 }
526 ch if ch.is_alphabetic() => {
527 return wrapped_lexer_error!(
528 LexerErrorDetails::InvalidNumericRepresentation(
529 self.current_string()
530 ),
531 self.back_coords()
532 );
533 }
534 _ => {
535 return wrapped_lexer_error!(
536 LexerErrorDetails::InvalidNumericRepresentation(
537 self.current_string()
538 ),
539 self.back_coords()
540 );
541 }
542 },
543 Err(err) => {
544 return match err.coords {
545 Some(coords) => wrapped_lexer_error!(err.details, coords),
546 None => wrapped_lexer_error!(err.details),
547 };
548 }
549 }
550 }
551 }
552 Err(err) => {
553 return match err.coords {
554 Some(coords) => wrapped_lexer_error!(err.details, coords),
555 None => wrapped_lexer_error!(err.details),
556 };
557 }
558 }
559
560 self.parse_numeric(!have_decimal)
561 }
562
563 #[inline]
564 fn check_following_exponent(&mut self) -> LexerResult<()> {
565 self.advance(false).and_then(|_| {
566 return match self.front_char() {
567 match_plus_minus!() => Ok(()),
568 _ => wrapped_lexer_error!(
569 LexerErrorDetails::InvalidNumericRepresentation(self.current_string()),
570 self.absolute_position()
571 ),
572 };
573 })
574 }
575
576 #[cfg(not(feature = "lazy-numerics"))]
579 #[inline]
580 fn parse_numeric(&mut self, integral: bool) -> LexerResult<PackedToken> {
581 if integral {
582 packed_token!(
583 Token::Integer(lexical::parse(self.input.buffer_as_byte_array()).unwrap()),
584 self.back_coords(),
585 self.front_coords()
586 )
587 } else {
588 packed_token!(
589 Token::Float(fast_float::parse(self.input.buffer_as_byte_array()).unwrap()),
590 self.back_coords(),
591 self.front_coords()
592 )
593 }
594 }
595
596 #[cfg(feature = "lazy-numerics")]
597 #[inline]
598 fn parse_numeric(&mut self, integral: bool) -> LexerResult<PackedToken> {
599 packed_token!(
600 Token::LazyNumeric(LazyNumeric::new(
601 self.input.buffer_as_byte_array().as_slice()
602 )),
603 self.back_coords(),
604 self.front_coords()
605 )
606 }
607
608 #[inline]
615 fn match_valid_number_prefix(&mut self) -> LexerResult<bool> {
616 let ch = self.back_char();
617 assert!(ch.is_ascii_digit() || ch == '-');
618 match ch {
619 match_minus!() => self
620 .input
621 .advance(false)
622 .map_err(|e| lexer_error!(LexerErrorDetails::EndOfInput))
623 .and_then(|_| self.check_following_minus()),
624 match_zero!() => self
625 .input
626 .advance(false)
627 .map_err(|e| lexer_error!(LexerErrorDetails::EndOfInput))
628 .and_then(|_| self.check_following_zero()),
629 _ => Ok(true),
630 }
631 }
632
633 #[inline]
635 fn check_following_zero(&mut self) -> LexerResult<bool> {
636 match self.front_char() {
637 match_period!() => Ok(false),
638 match_digit!() => wrapped_lexer_error!(
639 LexerErrorDetails::InvalidNumericRepresentation(self.current_string()),
640 self.back_coords()
641 ),
642 match_newline!() => {
643 self.input.pushback();
644 Ok(true)
645 }
646 _ => {
647 self.input.pushback();
648 Ok(true)
649 }
650 }
651 }
652
653 #[inline]
655 fn check_following_minus(&mut self) -> LexerResult<bool> {
656 match self.front_char() {
657 match_non_zero_digit!() => Ok(true),
658 match_zero!() => self.advance(false).and_then(|_| {
659 if self.front_char() != '.' {
660 return wrapped_lexer_error!(
661 LexerErrorDetails::InvalidNumericRepresentation(self.current_string()),
662 self.back_coords()
663 );
664 }
665 Ok(false)
666 }),
667 match_newline!() => {
668 self.input.pushback();
669 Ok(true)
670 }
671 _ => wrapped_lexer_error!(
672 LexerErrorDetails::InvalidNumericRepresentation(self.current_string()),
673 self.back_coords()
674 ),
675 }
676 }
677
678 #[inline]
680 fn match_null(&mut self) -> LexerResult<PackedToken> {
681 self.input
682 .advance_n(3, false)
683 .map_err(|e| lexer_error!(LexerErrorDetails::EndOfInput, self.absolute_position()))
684 .and_then(|_| {
685 if self.current_bytes() == NULL_ASCII {
686 packed_token!(Token::Null, self.back_coords(), self.front_coords())
687 } else {
688 wrapped_lexer_error!(
689 LexerErrorDetails::MatchFailed(String::from("null"), self.current_string()),
690 adjusted_error_coords(
691 &self.back_coords(),
692 &self.current_bytes().as_slice(),
693 &NULL_ASCII
694 )
695 )
696 }
697 })
698 }
699
700 #[inline]
702 fn match_true(&mut self) -> LexerResult<PackedToken> {
703 self.advance_n(3, false)
704 .map_err(|e| lexer_error!(LexerErrorDetails::EndOfInput, self.absolute_position()))
705 .and_then(|_| {
706 if self.current_bytes() == TRUE_ASCII {
707 packed_token!(
708 Token::Boolean(true),
709 self.back_coords(),
710 self.front_coords()
711 )
712 } else {
713 wrapped_lexer_error!(
714 LexerErrorDetails::MatchFailed(String::from("true"), self.current_string()),
715 adjusted_error_coords(
716 &self.back_coords(),
717 &self.current_bytes().as_slice(),
718 &TRUE_ASCII
719 )
720 )
721 }
722 })
723 }
724
725 #[inline]
727 fn match_false(&mut self) -> LexerResult<PackedToken> {
728 self.advance_n(4, false)
729 .map_err(|e| lexer_error!(LexerErrorDetails::EndOfInput, self.absolute_position()))
730 .and_then(|_| {
731 if self.current_bytes() == FALSE_ASCII {
732 packed_token!(
733 Token::Boolean(false),
734 self.back_coords(),
735 self.front_coords()
736 )
737 } else {
738 wrapped_lexer_error!(
739 LexerErrorDetails::MatchFailed(
740 String::from("false"),
741 self.current_string()
742 ),
743 adjusted_error_coords(
744 &self.back_coords(),
745 &self.current_bytes().as_slice(),
746 &FALSE_ASCII
747 )
748 )
749 }
750 })
751 }
752}
753
754#[cfg(test)]
755mod tests {
756 use std::env;
757 use std::fs::File;
758 use std::io::{BufRead, BufReader};
759 use std::time::Instant;
760
761 use chisel_common::char::span::Span;
762 use chisel_common::{lines_from_relative_file, reader_from_bytes};
763 use chisel_decoders::utf8::Utf8Decoder;
764
765 use crate::json::lexer::{Lexer, LexerError, LexerResult};
766 use crate::json::tokens::{PackedToken, Token};
767
768 #[test]
769 fn should_report_position_of_eoi() {
770 let input = String::from("\"this is a test");
771 let mut reader = reader_from_bytes!(input);
772 let mut decoder = Utf8Decoder::new(&mut reader);
773 let mut lexer = Lexer::new(&mut decoder);
774 let result = lexer.consume();
775 match result {
776 Err(err) => {
777 assert!(err.coords.is_some());
778 assert_eq!(err.coords.unwrap().column, input.len())
779 }
780 _ => assert!(false),
781 }
782 }
783
784 #[test]
785 fn should_parse_basic_tokens() {
786 let mut reader = reader_from_bytes!("{}[],:");
787 let mut decoder = Utf8Decoder::new(&mut reader);
788 let mut lexer = Lexer::new(&mut decoder);
789 let mut tokens: Vec<Token> = vec![];
790 let mut spans: Vec<Span> = vec![];
791 for _ in 1..=7 {
792 let token = lexer.consume().unwrap();
793 tokens.push(token.0);
794 spans.push(token.1);
795 }
796 assert_eq!(
797 tokens,
798 [
799 Token::StartObject,
800 Token::EndObject,
801 Token::StartArray,
802 Token::EndArray,
803 Token::Comma,
804 Token::Colon,
805 Token::EndOfInput
806 ]
807 );
808 }
809
810 #[test]
811 fn should_parse_null_and_booleans() {
812 let mut reader = reader_from_bytes!("null true falsetruefalse");
813 let mut decoder = Utf8Decoder::new(&mut reader);
814 let mut lexer = Lexer::new(&mut decoder);
815 let mut tokens: Vec<Token> = vec![];
816 let mut spans: Vec<Span> = vec![];
817 for _ in 1..=6 {
818 let token = lexer.consume().unwrap();
819 tokens.push(token.0);
820 spans.push(token.1);
821 }
822 assert_eq!(
823 tokens,
824 [
825 Token::Null,
826 Token::Boolean(true),
827 Token::Boolean(false),
828 Token::Boolean(true),
829 Token::Boolean(false),
830 Token::EndOfInput
831 ]
832 );
833 }
834
835 #[test]
836 fn should_parse_strings() {
837 let lines = lines_from_relative_file!("fixtures/utf-8/strings.txt");
838 for l in lines.flatten() {
839 if !l.is_empty() {
840 let mut reader = reader_from_bytes!(l);
841 let mut decoder = Utf8Decoder::new(&mut reader);
842 let mut lexer = Lexer::new(&mut decoder);
843 let token = lexer.consume().unwrap();
844 match token.0 {
845 Token::Str(str) => {
846 assert_eq!(str, l)
847 }
848 _ => panic!(),
849 }
850 }
851 }
852 }
853
854 #[test]
855 fn should_report_correct_error_char_position() {
856 let mut reader = reader_from_bytes!("{\"abc\" : \nd}");
857 let mut decoder = Utf8Decoder::new(&mut reader);
858 let mut lexer = Lexer::new(&mut decoder);
859 let mut results = vec![];
860 for _ in 0..4 {
861 results.push(lexer.consume())
862 }
863 assert!(&results[3].is_err());
864 let error = results[3].clone();
865 let coords = results[3].clone().err().unwrap().coords.unwrap();
866 assert_eq!(coords.absolute, 11);
867 assert_eq!(coords.line, 2)
868 }
869
870 #[test]
871 fn should_parse_numerics() {
872 let start = Instant::now();
873 let lines = lines_from_relative_file!("fixtures/utf-8/numbers.txt");
874 for l in lines.flatten() {
875 if !l.is_empty() {
876 println!("Parsing {}", l);
877 let mut reader = reader_from_bytes!(l);
878 let mut decoder = Utf8Decoder::new(&mut reader);
879 let mut lexer = Lexer::new(&mut decoder);
880 let token = lexer.consume().unwrap();
881 match token.0 {
882 Token::Integer(_) => {
883 assert_eq!(
884 token.0,
885 Token::Integer(l.replace(',', "").parse::<i64>().unwrap())
886 );
887 }
888 Token::Float(_) => {
889 assert_eq!(
890 token.0,
891 Token::Float(fast_float::parse(l.replace(',', "")).unwrap())
892 );
893 }
894 Token::LazyNumeric(lazy) => {
895 let value: f64 = lazy.into();
896 assert_eq!(
897 Token::Float(value),
898 Token::Float(fast_float::parse(l.replace(',', "")).unwrap())
899 );
900 }
901 _ => panic!(),
902 }
903 }
904 }
905 println!("Parsed numerics in {:?}", start.elapsed());
906 }
907
908 #[test]
909 fn should_correctly_handle_invalid_numbers() {
910 let lines = lines_from_relative_file!("fixtures/utf-8/invalid_numbers.txt");
911 for l in lines.flatten() {
912 if !l.is_empty() {
913 let mut reader = reader_from_bytes!(l);
914 let mut decoder = Utf8Decoder::new(&mut reader);
915 let mut lexer = Lexer::new(&mut decoder);
916 let token = lexer.consume();
917 assert!(token.is_err());
918 }
919 }
920 }
921
922 #[test]
923 fn should_correctly_identity_dodgy_strings() {
924 let lines = lines_from_relative_file!("fixtures/utf-8/dodgy_strings.txt");
925 for l in lines.flatten() {
926 if !l.is_empty() {
927 let mut reader = reader_from_bytes!(l);
928 let mut decoder = Utf8Decoder::new(&mut reader);
929 let mut lexer = Lexer::new(&mut decoder);
930 let mut error_token: Option<LexerError> = None;
931 loop {
932 let token = lexer.consume();
933 match token {
934 Ok(packed) => {
935 if packed.0 == Token::EndOfInput {
936 break;
937 }
938 }
939 Err(err) => {
940 error_token = Some(err.clone());
941 println!("Dodgy string found: {} : {}", l, err.coords.unwrap());
942 break;
943 }
944 }
945 }
946 assert!(error_token.is_some());
947 }
948 }
949 }
950
951 #[test]
952 fn should_correctly_report_errors_for_booleans() {
953 let mut reader = reader_from_bytes!("true farse");
954 let mut decoder = Utf8Decoder::new(&mut reader);
955 let mut lexer = Lexer::new(&mut decoder);
956 let mut results: Vec<LexerResult<PackedToken>> = vec![];
957 for _ in 1..=2 {
958 results.push(lexer.consume());
959 }
960
961 assert!(results[0].is_ok());
963 assert!(results[1].is_err());
964
965 if results[0].is_ok() {
967 match &results[0] {
968 Ok(packed) => {
969 assert_eq!((*packed).1.start.column, 1)
970 }
971 Err(_) => {}
972 }
973 }
974
975 if results[1].is_err() {
977 match &results[1] {
978 Ok(_) => {}
979 Err(err) => {
980 assert_eq!(err.coords.unwrap().column, 8)
981 }
982 }
983 }
984
985 println!("Parse error: {:?}", results[1]);
986 }
987}