1use std::{
2 collections::VecDeque,
3 fmt::Display,
4 io::{Cursor, Read},
5};
6use utf8::{self, BufReadDecoder, BufReadDecoderError};
7
8#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
10pub struct Span {
11 pub lo: usize,
12 pub len: usize,
13 pub line: usize,
14 pub column: usize,
15}
16
17#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
18pub struct LocatedToken {
19 pub span: Option<Span>,
21 pub token: Token,
22}
23
24impl LocatedToken {
25 pub fn token(&self) -> &Token {
26 &self.token
27 }
28 pub fn span(&self) -> Option<Span> {
29 self.span
30 }
31}
32
33impl Display for LocatedToken {
34 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
35 write!(f, "{}", self.token)
36 }
37}
38
39#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
40pub enum Token {
41 LeftBracket,
42 RightBracket,
43 Equals,
44 Colon,
45 Comma,
46 RightSlash,
47 Ampersand,
48 NewLine,
49 QuotedStr(String),
52 Bool(String),
53 Whitespace(String),
54 Identifier(String),
55 Number(String),
56 Comment(String),
57}
58impl Token {
59 pub fn is_location_token(&self) -> bool {
60 match self {
61 Self::LeftBracket => true,
62 Self::RightBracket => true,
63 Self::Equals => false,
64 Self::Colon => true,
65 Self::Comma => true,
66 Self::RightSlash => false,
67 Self::Ampersand => false,
68 Self::NewLine => true,
69 Self::Bool(_) => false,
70 Self::QuotedStr(_) => false,
71 Self::Whitespace(_) => true,
72 Self::Identifier(_) => false,
73 Self::Number(_) => true,
74 Self::Comment(_) => true,
75 }
76 }
77
78 pub fn is_whitespace(&self) -> bool {
79 matches!(self, Self::Whitespace(_) | Self::NewLine)
80 }
81
82 pub fn is_comment(&self) -> bool {
83 matches!(self, Self::Comment(_))
84 }
85}
86
87impl Display for Token {
88 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
89 match self {
90 Self::LeftBracket => write!(f, "("),
91 Self::RightBracket => write!(f, ")"),
92 Self::Equals => write!(f, "="),
93 Self::Colon => write!(f, ":"),
94 Self::Comma => write!(f, ","),
95 Self::RightSlash => write!(f, "/"),
96 Self::Ampersand => write!(f, "&"),
97 Self::NewLine => writeln!(f),
98 Self::Bool(s) => write!(f, "{s}"),
99 Self::QuotedStr(s) => write!(f, "{s}"),
100 Self::Whitespace(s) => write!(f, "{s}"),
101 Self::Identifier(s) => write!(f, "{s}"),
102 Self::Number(s) => write!(f, "{s}"),
103 Self::Comment(s) => write!(f, "{s}"),
104 }
105 }
106}
107
108#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
109pub enum TokenizerState {
110 Start,
111 StartInNamelist,
112 InQuote { start: usize, content: String },
113 InIdentifier { start: usize, content: String },
114 InBoolOrNumber { start: usize, content: String },
115 InNumber { start: usize, content: String },
116 InBool { start: usize, content: String },
117 InWhitespace { start: usize, content: String },
118 Comment { start: usize, content: String },
119}
120
121pub struct CharDecoder<R: std::io::Read> {
122 iter: BufReadDecoder<std::io::BufReader<R>>,
123 offset: usize,
124 chars: VecDeque<(usize, char)>,
125}
126
127impl<R: Read> CharDecoder<R> {
128 pub fn new(input: R) -> Self {
129 Self {
130 iter: BufReadDecoder::new(std::io::BufReader::new(input)),
131 chars: VecDeque::new(),
132 offset: 0,
133 }
134 }
135}
136
137impl<R: Read> Iterator for CharDecoder<R> {
138 type Item = Result<(usize, char), CharDecodeError>;
139 fn next(&mut self) -> Option<Self::Item> {
140 loop {
141 if let Some(res) = self.chars.pop_front() {
142 return Some(Ok(res));
143 } else {
144 match self.iter.next_strict()? {
145 Ok(next_string) => {
146 let offset = self.offset;
147 for r in next_string.char_indices().map(|(i, c)| (i + offset, c)) {
148 self.chars.push_back(r);
149 }
150 self.offset += next_string.len();
151 }
152 Err(BufReadDecoderError::InvalidByteSequence(s)) => {
153 return Some(Err(CharDecodeError::DecodeError(s.into())))
154 }
155 Err(BufReadDecoderError::Io(err)) => {
156 return Some(Err(CharDecodeError::IoError(err)))
157 }
158 }
159 }
160 }
161 }
162}
163
164#[derive(Debug)]
165pub enum CharDecodeError {
166 IoError(std::io::Error),
167 DecodeError(Vec<u8>),
168}
169
170impl std::fmt::Display for CharDecodeError {
171 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
172 match *self {
173 Self::IoError(_) => {
174 write!(f, "Invalid bool or number")
175 }
176 Self::DecodeError(_) => {
177 write!(f, "Invalid character")
178 }
179 }
180 }
181}
182
183impl std::error::Error for CharDecodeError {
184 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
185 match *self {
186 Self::IoError(_) => None,
187 Self::DecodeError(_) => None,
188 }
189 }
190}
191
192pub struct TokenIter<B: std::io::Read> {
193 iter: CharDecoder<std::io::BufReader<B>>,
194 buf: Option<(usize, char)>,
195 state: TokenizerState,
196 line: usize,
197 column: usize,
198}
199
200impl<R: std::io::Read> TokenIter<R> {
201 pub fn new(input: R) -> Self {
202 Self {
203 iter: CharDecoder::new(std::io::BufReader::new(input)),
204 buf: None,
205 state: TokenizerState::Start,
206 line: 0,
207 column: 0,
208 }
209 }
210 fn pos_advance_token(&mut self, token: &Token) {
211 match token {
212 Token::LeftBracket => self.pos_advance('('),
213 Token::RightBracket => self.pos_advance(')'),
214 Token::Equals => self.pos_advance('='),
215 Token::Colon => self.pos_advance(':'),
216 Token::Comma => self.pos_advance(','),
217 Token::RightSlash => self.pos_advance('/'),
218 Token::Ampersand => self.pos_advance('&'),
219 Token::NewLine => self.pos_advance('\n'),
220 Token::QuotedStr(s)
221 | Token::Bool(s)
222 | Token::Whitespace(s)
223 | Token::Identifier(s)
224 | Token::Number(s)
225 | Token::Comment(s) => {
226 for c in s.chars() {
227 self.pos_advance(c);
228 }
229 }
230 }
231 }
232 fn pos_advance(&mut self, c: char) {
233 if c == '\n' {
234 self.column = 0;
235 self.line += 1;
236 } else {
237 self.column += 1
238 }
239 }
240}
241
242impl<R: std::io::Read> Iterator for TokenIter<R> {
243 type Item = Result<LocatedToken, TokenizerError>;
244 fn next(&mut self) -> Option<Self::Item> {
245 let token = loop {
246 match self.buf.take().map(Ok).or_else(|| self.iter.next()) {
247 Some(Ok((i, c))) => {
248 let line = self.line;
249 let column = self.column;
250 match &mut self.state {
251 TokenizerState::Start => {
252 if c == '&' {
253 let token = Token::Ampersand;
256 let span = Some(Span {
257 lo: i,
258 len: 1,
259 column,
260 line,
261 });
262 let token = LocatedToken { span, token };
263 self.state = TokenizerState::StartInNamelist;
264 break Some(Ok(token));
265 } else if c == '/' {
266 let len = 1;
267 let token = LocatedToken {
268 span: Some(Span {
269 lo: i,
270 len,
271 column,
272 line,
273 }),
274 token: Token::RightSlash,
275 };
276 self.state = TokenizerState::Start;
277 break Some(Ok(token));
278 } else if c == '\n' {
279 let len = 1;
280 let token = LocatedToken {
281 span: Some(Span {
282 lo: i,
283 len,
284 column,
285 line,
286 }),
287 token: Token::Comment(c.to_string()),
288 };
289 self.state = TokenizerState::Start;
290 break Some(Ok(token));
291 } else {
292 let start = i;
294 let mut content = String::new();
295 content.push(c);
296 self.state = TokenizerState::Comment { start, content };
297 }
298 }
299 TokenizerState::StartInNamelist => {
300 if c.is_whitespace() {
301 let start = i;
302 let mut content = String::new();
303 content.push(c);
304 self.state = TokenizerState::InWhitespace { start, content };
305 } else {
306 match c {
307 '\'' => {
308 let start = i;
309 let mut content = String::new();
310 content.push(c);
311 self.state = TokenizerState::InQuote { start, content };
312 }
313 '.' => {
314 let start = i;
315 let mut content = String::new();
316 content.push(c);
317 self.state =
318 TokenizerState::InBoolOrNumber { start, content };
319 }
320 '!' => {
321 let start = i;
322 let mut content = String::new();
323 content.push(c);
324 self.state = TokenizerState::Comment { start, content };
325 }
326 '=' => {
327 let token = Token::Equals;
328 let span = Some(Span {
329 lo: i,
330 len: 1,
331 column,
332 line,
333 });
334 let token = LocatedToken { span, token };
335 self.state = TokenizerState::StartInNamelist;
336 break Some(Ok(token));
337 }
338 '(' => {
339 let token = Token::LeftBracket;
340 let span = Some(Span {
341 lo: i,
342 len: 1,
343 column,
344 line,
345 });
346 let token = LocatedToken { span, token };
347 self.state = TokenizerState::StartInNamelist;
348 break Some(Ok(token));
349 }
350 ')' => {
351 let token = Token::RightBracket;
352 let span = Some(Span {
353 lo: i,
354 len: 1,
355 column,
356 line,
357 });
358 let token = LocatedToken { span, token };
359 self.state = TokenizerState::StartInNamelist;
360 break Some(Ok(token));
361 }
362 ':' => {
363 let token = Token::Colon;
364 let span = Some(Span {
365 lo: i,
366 len: 1,
367 column,
368 line,
369 });
370 let token = LocatedToken { span, token };
371 self.state = TokenizerState::StartInNamelist;
372 break Some(Ok(token));
373 }
374 ',' => {
375 let token = Token::Comma;
376 let span = Some(Span {
377 lo: i,
378 len: 1,
379 column,
380 line,
381 });
382 let token = LocatedToken { span, token };
383 self.state = TokenizerState::StartInNamelist;
384 break Some(Ok(token));
385 }
386 '/' => {
387 let token = Token::RightSlash;
388 let span = Some(Span {
389 lo: i,
390 len: 1,
391 column,
392 line,
393 });
394 let token = LocatedToken { span, token };
395 self.state = TokenizerState::Start;
396 break Some(Ok(token));
397 }
398 '&' => {
399 let token = Token::Ampersand;
400 let span = Some(Span {
401 lo: i,
402 len: 1,
403 column,
404 line,
405 });
406 let token = LocatedToken { span, token };
407 self.state = TokenizerState::StartInNamelist;
408 break Some(Ok(token));
409 }
410 _ => {
411 if c.is_alphabetic() {
412 let start = i;
413 let mut content = String::new();
414 content.push(c);
415 self.state =
416 TokenizerState::InIdentifier { start, content };
417 } else if c.is_whitespace() {
418 let start = i;
419 let mut content = String::new();
420 content.push(c);
421 self.state =
422 TokenizerState::InWhitespace { start, content };
423 } else if c.is_ascii_digit() || c == '-' {
424 let mut content = String::new();
425 content.push(c);
426 self.state =
427 TokenizerState::InNumber { start: i, content };
428 } else {
429 let start = i;
430 let mut content = String::new();
431 content.push(c);
432 self.state = TokenizerState::Comment { start, content };
433 }
434 }
435 }
436 }
437 }
438 TokenizerState::InQuote { start, content } => match c {
439 '\'' => {
440 content.push(c);
441 let len = content.len();
442 let value = std::mem::take(content);
443 let token = LocatedToken {
444 span: Some(Span {
445 lo: *start,
446 len,
447 column,
448 line,
449 }),
450 token: Token::QuotedStr(value),
451 };
452 self.state = TokenizerState::StartInNamelist;
453 break Some(Ok(token));
454 }
455 _ => {
456 content.push(c);
457 }
458 },
459 TokenizerState::InBoolOrNumber { start, content } => {
460 if c.is_ascii_digit() {
461 content.push(c);
462 let value = std::mem::take(content);
463 self.state = TokenizerState::InNumber {
464 start: *start,
465 content: value,
466 };
467 } else {
468 match c {
469 'T' | 't' | 'F' | 'f' => {
470 content.push(c);
471 let value = std::mem::take(content);
472 self.state = TokenizerState::InBool {
473 start: *start,
474 content: value,
475 };
476 }
477 _ => {
478 content.push(c);
479 return Some(Err(TokenizerError::InvalidBoolOrNumber(
480 Span {
481 lo: *start,
482 len: content.len(),
483 line,
484 column,
485 },
486 )));
487 }
488 }
489 }
490 }
491 TokenizerState::InBool { start, content } => match c {
492 '.' => {
493 content.push(c);
494 let len = content.len();
495 let value = std::mem::take(content);
496 let token = LocatedToken {
497 span: Some(Span {
498 lo: *start,
499 len,
500 column,
501 line,
502 }),
503 token: Token::Bool(value),
504 };
505 self.state = TokenizerState::StartInNamelist;
506 break Some(Ok(token));
507 }
508 _ => {
509 content.push(c);
510 }
511 },
512 TokenizerState::Comment { start, content } => match c {
513 '\n' => {
514 content.push(c);
518 let len = content.len();
519 let value = std::mem::take(content);
520 let token = LocatedToken {
521 span: Some(Span {
522 lo: *start,
523 len,
524 column,
525 line,
526 }),
527 token: Token::Comment(value),
528 };
529 self.state = TokenizerState::Start;
530 break Some(Ok(token));
531 }
532 _ => {
533 content.push(c);
534 }
535 },
536 TokenizerState::InWhitespace { start, content } => {
537 if c.is_whitespace() {
538 content.push(c);
539 } else {
540 let len = content.len();
541 let value = std::mem::take(content);
542 let token = LocatedToken {
543 span: Some(Span {
544 lo: *start,
545 len,
546 column,
547 line,
548 }),
549 token: Token::Whitespace(value),
550 };
551 match c {
552 '\'' => {
553 let start = i;
554 let mut content = String::new();
555 content.push(c);
556 self.state = TokenizerState::InQuote { start, content };
557 }
558 '=' | '(' | ')' | ':' | ',' | '/' | '&' => {
559 self.buf.replace((i, c));
560 self.state = TokenizerState::StartInNamelist;
561 }
562 _ => {
563 if c.is_alphabetic() {
564 let start = i;
565 let mut content = String::new();
566 content.push(c);
567 self.state =
568 TokenizerState::InIdentifier { start, content };
569 } else if c.is_whitespace() {
570 let start = i;
571 let mut content = String::new();
572 content.push(c);
573 self.state =
574 TokenizerState::InWhitespace { start, content };
575 } else if c == '.' {
576 let mut content = String::new();
577 content.push(c);
578 self.state = TokenizerState::InBoolOrNumber {
579 start: i,
580 content,
581 };
582 } else if c.is_ascii_digit()
583 || c == 'e'
584 || c == 'E'
585 || c == '-'
586 || c == '+'
587 {
588 let mut content = String::new();
589 content.push(c);
590 self.state =
591 TokenizerState::InNumber { start: i, content };
592 } else if c == '!' {
593 let start = i;
594 let mut content = String::new();
595 content.push(c);
596 self.state = TokenizerState::Comment { start, content };
597 } else {
598 return Some(Err(TokenizerError::InvalidCharacter(
599 Span {
600 lo: *start,
601 len: content.len(),
602 line,
603 column,
604 },
605 )));
606 }
607 }
608 }
609 break Some(Ok(token));
610 }
611 }
612 TokenizerState::InIdentifier { start, content } => {
613 if c.is_alphanumeric() || c == '_' {
614 content.push(c);
615 } else {
616 let len = content.len();
617 let value = std::mem::take(content);
618 let span = Some(Span {
619 lo: *start,
620 len,
621 column,
622 line,
623 });
624 let token = Token::Identifier(value);
625 self.buf.replace((i, c));
626 self.state = TokenizerState::StartInNamelist;
627 let token = LocatedToken { span, token };
628 break Some(Ok(token));
629 }
630 }
631 TokenizerState::InNumber { start, content } => {
632 if c.is_ascii_digit()
633 || c == '.'
634 || c == 'e'
635 || c == 'E'
636 || c == '-'
637 || c == '+'
638 {
639 content.push(c);
640 } else {
641 let len = content.len();
642 let value = std::mem::take(content);
643 let token = LocatedToken {
644 span: Some(Span {
645 lo: *start,
646 len,
647 column,
648 line,
649 }),
650 token: Token::Number(value),
651 };
652 match c {
653 '\'' => {
654 let start = i;
655 let mut content = String::new();
656 content.push(c);
657 self.state = TokenizerState::InQuote { start, content };
658 }
659 '=' | '(' | ')' | ':' | ',' | '&' | '!' => {
660 self.buf.replace((i, c));
661 self.state = TokenizerState::StartInNamelist;
662 }
663 '/' => {
664 self.buf.replace((i, c));
665 self.state = TokenizerState::Start;
666 }
667 _ => {
668 if c.is_alphabetic() {
669 let start = i;
670 let mut content = String::new();
671 content.push(c);
672 self.state =
673 TokenizerState::InIdentifier { start, content };
674 } else if c.is_whitespace() {
675 let start = i;
676 let mut content = String::new();
677 content.push(c);
678 self.state =
679 TokenizerState::InWhitespace { start, content };
680 } else {
681 return Some(Err(TokenizerError::InvalidCharacter(
682 Span {
683 lo: *start,
684 len: content.len(),
685 line,
686 column,
687 },
688 )));
689 }
690 }
691 }
692 break Some(Ok(token));
693 }
694 }
695 }
696 }
697 Some(Err(err)) => {
698 let line = self.line;
699 let column = self.column;
700 return Some(Err(TokenizerError::CharError(
701 Span {
702 lo: 0,
703 len: 0,
704 line,
705 column,
706 },
707 err,
708 )));
709 }
710 None => {
711 let line = self.line;
712 let column = self.column;
713 match &mut self.state {
715 TokenizerState::Start | TokenizerState::StartInNamelist => {
716 break None;
717 }
718 TokenizerState::InQuote { start, content } => {
719 return Some(Err(TokenizerError::UnclosedQuote(Span {
720 lo: *start,
721 len: content.len(),
722 line,
723 column,
724 })));
725 }
726 TokenizerState::InBool { start, content } => {
727 return Some(Err(TokenizerError::UnfinishedBool(Span {
728 lo: *start,
729 len: content.len(),
730 line,
731 column,
732 })));
733 }
734 TokenizerState::InBoolOrNumber { start, content } => {
735 return Some(Err(TokenizerError::UnfinishedBoolOrNumber(Span {
736 lo: *start,
737 len: content.len(),
738 line,
739 column,
740 })));
741 }
742 TokenizerState::InWhitespace { start, content } => {
743 let len = content.len();
744 let value = std::mem::take(content);
745 let token = Token::Whitespace(value);
746 let span = Some(Span {
747 lo: *start,
748 len,
749 column,
750 line,
751 });
752 self.state = TokenizerState::StartInNamelist;
753 let token = LocatedToken { span, token };
754 break Some(Ok(token));
755 }
756 TokenizerState::Comment { start, content } => {
757 let len = content.len();
758 let value = std::mem::take(content);
759 let token = Token::Comment(value);
760 let span = Some(Span {
761 lo: *start,
762 len,
763 column,
764 line,
765 });
766 self.state = TokenizerState::Start;
767 let token = LocatedToken { span, token };
768 break Some(Ok(token));
769 }
770 TokenizerState::InIdentifier { start, content } => {
771 let len = content.len();
772 let value = std::mem::take(content);
773 let token = Token::Identifier(value);
774 let span = Some(Span {
775 lo: *start,
776 len,
777 column,
778 line,
779 });
780 self.state = TokenizerState::StartInNamelist;
781 let token = LocatedToken { span, token };
782 break Some(Ok(token));
783 }
784 TokenizerState::InNumber { start, content } => {
785 let len = content.len();
786 let value = std::mem::take(content);
787 let token = Token::Number(value);
788 let span = Some(Span {
789 lo: *start,
790 len,
791 column,
792 line,
793 });
794 self.state = TokenizerState::StartInNamelist;
795 let token = LocatedToken { span, token };
796 break Some(Ok(token));
797 }
798 }
799 }
800 }
801 };
802 if let Some(Ok(ref token)) = token {
803 self.pos_advance_token(&token.token);
804 }
805 token
806 }
807}
808
809pub fn tokenize_reader<R: Read>(input: R) -> Result<Vec<LocatedToken>, TokenizerError> {
810 let mut tokens = vec![];
811 for token in TokenIter::new(input) {
812 tokens.push(token?);
813 }
814 Ok(tokens)
815}
816
817pub fn tokenize_str(input: &str) -> Result<Vec<LocatedToken>, TokenizerError> {
818 let input = Cursor::new(input);
819 let mut tokens = vec![];
820 for token in TokenIter::new(input) {
821 tokens.push(token?);
822 }
823 Ok(tokens)
824}
825
826#[derive(Debug)]
827pub enum NmlParseError {
828 Tokenize(TokenizerError),
829 InvalidParameterName(Option<Span>),
830 NoAmpersand(Option<Span>),
831 InvalidGroupName(Option<Span>),
832 NoEquals(Option<Span>),
833 NoTokens,
834 }
837
838impl NmlParseError {
839 pub fn span(&self) -> Option<Span> {
840 match self {
841 Self::Tokenize(err) => Some(err.span()),
842 Self::InvalidParameterName(span) => *span,
843 Self::NoAmpersand(span) => *span,
844 Self::InvalidGroupName(span) => *span,
845 Self::NoEquals(span) => *span,
846 Self::NoTokens => None,
847 }
850 }
851}
852
853impl std::fmt::Display for NmlParseError {
854 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
855 match self {
856 Self::Tokenize(err) => {
857 write!(f, "{err}")
858 }
859 Self::InvalidParameterName(_) => {
860 write!(f, "invalid parameter name")
861 }
862 Self::NoAmpersand(_) => {
863 write!(f, "no ampersand at the beginning of namelist")
864 }
865 Self::InvalidGroupName(_) => {
866 write!(f, "no ampersand at the beginning of namelist")
867 }
868 Self::NoEquals(_) => {
869 write!(f, "no equals succeeding the parameter name")
870 }
871 Self::NoTokens => {
872 write!(f, "namelist ended early with insufficient tokens")
873 } }
880 }
881}
882
883impl std::error::Error for NmlParseError {
884 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
885 match self {
886 Self::Tokenize(err) => Some(err),
887 Self::InvalidParameterName(_) => None,
888 Self::NoAmpersand(_) => None,
889 Self::InvalidGroupName(_) => None,
890 Self::NoEquals(_) => None,
891 Self::NoTokens => None,
892 }
895 }
896}
897
898#[derive(Debug)]
899pub enum TokenizerError {
900 InvalidBoolOrNumber(Span),
901 InvalidCharacter(Span),
902 UnfinishedBool(Span),
904 UnfinishedBoolOrNumber(Span),
905 UnclosedQuote(Span),
906 CharError(Span, CharDecodeError),
907}
908
909impl TokenizerError {
910 pub fn span(&self) -> Span {
911 match self {
912 Self::InvalidBoolOrNumber(span) => *span,
913 Self::InvalidCharacter(span) => *span,
914 Self::UnfinishedBool(span) => *span,
915 Self::UnfinishedBoolOrNumber(span) => *span,
916 Self::UnclosedQuote(span) => *span,
917 Self::CharError(span, _) => *span,
918 }
919 }
920}
921
922impl std::fmt::Display for TokenizerError {
923 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
924 match self {
925 Self::InvalidBoolOrNumber(_) => {
926 write!(f, "Invalid bool or number")
927 }
928 Self::InvalidCharacter(_) => {
929 write!(f, "Invalid character")
930 }
931 Self::UnfinishedBool(_) => {
932 write!(f, "Unfinished bool")
933 }
934 Self::UnfinishedBoolOrNumber(_) => {
935 write!(f, "Unfinished booll or number")
936 }
937 Self::UnclosedQuote(_) => {
938 write!(f, "Unclosed quote")
939 }
940 Self::CharError(_, err) => {
941 write!(f, "UTF-8 decode error: {err}")
942 }
943 }
944 }
945}
946
947impl std::error::Error for TokenizerError {
948 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
949 match *self {
950 Self::InvalidBoolOrNumber(_) => None,
951 Self::InvalidCharacter(_) => None,
952 Self::UnfinishedBool(_) => None,
953 Self::UnfinishedBoolOrNumber(_) => None,
954 Self::UnclosedQuote(_) => None,
955 Self::CharError(_, _) => None,
956 }
957 }
958}
959
960#[cfg(test)]
961mod tests {
962 use super::*;
963
964 #[test]
965 fn trivial_tokens0() {
966 let s = "abc=2";
967 let tokens = tokenize_str(s).expect("test tokenization failed");
968 assert_eq!(
969 vec![LocatedToken {
970 span: Some(Span {
971 lo: 0,
972 len: 5,
973 column: 0,
974 line: 0
975 }),
976 token: Token::Comment("abc=2".to_string()),
977 },],
978 tokens
979 );
980 }
981
982 #[test]
983 fn trivial_tokens1() {
984 let s = "&H abc=2";
985 let tokens: Vec<_> = tokenize_str(s)
986 .expect("test tokenization failed")
987 .into_iter()
988 .map(|x| x.token)
989 .collect();
990 assert_eq!(
991 vec![
992 Token::Ampersand,
993 Token::Identifier("H".to_string()),
994 Token::Whitespace(" ".to_string()),
995 Token::Identifier("abc".to_string()),
996 Token::Equals,
997 Token::Number("2".to_string()),
998 ],
999 tokens
1000 );
1001 }
1002
1003 #[test]
1004 fn trivial_tokens2() {
1005 let s = "&H abc= 2";
1006 let tokens: Vec<_> = tokenize_str(s)
1007 .expect("test tokenization failed")
1008 .into_iter()
1009 .map(|x| x.token)
1010 .collect();
1011 assert_eq!(
1012 vec![
1013 Token::Ampersand,
1014 Token::Identifier("H".to_string()),
1015 Token::Whitespace(" ".to_string()),
1016 Token::Identifier("abc".to_string()),
1017 Token::Equals,
1018 Token::Whitespace(" ".to_string()),
1019 Token::Number("2".to_string()),
1020 ],
1021 tokens
1022 );
1023 }
1024
1025 #[test]
1026 fn trivial_tokens3() {
1027 assert_eq!(
1028 tokenize_str("&H )=2")
1029 .expect("test tokenization failed")
1030 .into_iter()
1031 .map(|x| x.token)
1032 .collect::<Vec<_>>(),
1033 vec![
1034 Token::Ampersand,
1035 Token::Identifier("H".to_string()),
1036 Token::Whitespace(" ".to_string()),
1037 Token::RightBracket,
1038 Token::Equals,
1039 Token::Number("2".to_string()),
1040 ]
1041 );
1042 }
1043
1044 #[test]
1045 fn trivial_tokens4() {
1046 let s = "&abc=2/";
1047 let tokens: Vec<_> = tokenize_str(s)
1048 .expect("test tokenization failed")
1049 .into_iter()
1050 .map(|x| x.token)
1051 .collect();
1052 assert_eq!(
1053 vec![
1054 Token::Ampersand,
1055 Token::Identifier("abc".to_string()),
1056 Token::Equals,
1057 Token::Number("2".to_string()),
1058 Token::RightSlash,
1059 ],
1060 tokens
1061 );
1062 }
1063
1064 #[test]
1065 fn trivial_tokens5() {
1066 let s = "&abc=.2/";
1067 let tokens: Vec<_> = tokenize_str(s)
1068 .expect("test tokenization failed")
1069 .into_iter()
1070 .map(|x| x.token)
1071 .collect();
1072 assert_eq!(
1073 vec![
1074 Token::Ampersand,
1075 Token::Identifier("abc".to_string()),
1076 Token::Equals,
1077 Token::Number(".2".to_string()),
1078 Token::RightSlash,
1079 ],
1080 tokens
1081 );
1082 }
1083
1084 #[test]
1085 fn trivial_tokens6() {
1086 let s = "&abc=2./";
1087 let tokens: Vec<_> = tokenize_str(s)
1088 .expect("test tokenization failed")
1089 .into_iter()
1090 .map(|x| x.token)
1091 .collect();
1092 assert_eq!(
1093 vec![
1094 Token::Ampersand,
1095 Token::Identifier("abc".to_string()),
1096 Token::Equals,
1097 Token::Number("2.".to_string()),
1098 Token::RightSlash,
1099 ],
1100 tokens
1101 );
1102 }
1103 #[test]
1104 fn trivial_tokens7() {
1105 let s = "&abc=2.\n/";
1106 let tokens: Vec<_> = tokenize_str(s)
1107 .expect("test tokenization failed")
1108 .into_iter()
1109 .map(|x| x.token)
1110 .collect();
1111 assert_eq!(
1112 vec![
1113 Token::Ampersand,
1114 Token::Identifier("abc".to_string()),
1115 Token::Equals,
1116 Token::Number("2.".to_string()),
1117 Token::Whitespace("\n".to_string()),
1118 Token::RightSlash,
1119 ],
1120 tokens
1121 );
1122 }
1123 #[test]
1124 fn trivial_tokens8() {
1125 let s = "&abc=2.\r\n/";
1126 let tokens: Vec<_> = tokenize_str(s)
1127 .expect("test tokenization failed")
1128 .into_iter()
1129 .map(|x| x.token)
1130 .collect();
1131 assert_eq!(
1132 vec![
1133 Token::Ampersand,
1134 Token::Identifier("abc".to_string()),
1135 Token::Equals,
1136 Token::Number("2.".to_string()),
1137 Token::Whitespace("\r\n".to_string()),
1138 Token::RightSlash,
1139 ],
1140 tokens
1141 );
1142 }
1143
1144 #[test]
1145 fn bad_tokens1() {
1146 let res = tokenize_str("&H abc=.TR");
1147 assert!(res.is_err());
1148 if let Err(TokenizerError::UnfinishedBool(span)) = res {
1149 assert_eq!(span.lo, 7);
1150 assert_eq!(span.len, 3);
1151 assert_eq!(span.line, 0);
1152 assert_eq!(span.column, 7);
1153 } else {
1154 panic!("Incorrect error type {:?}", res);
1155 }
1156 }
1157
1158 #[test]
1159 fn simple_tokens1() {
1160 let s = "&H abc=2,'ad c' (2,:)";
1161 let tokens: Vec<_> = tokenize_str(s)
1162 .expect("test tokenization failed")
1163 .into_iter()
1164 .map(|x| x.token)
1165 .collect();
1166 let expected = vec![
1167 Token::Ampersand,
1168 Token::Identifier("H".to_string()),
1169 Token::Whitespace(" ".to_string()),
1170 Token::Identifier("abc".to_string()),
1171 Token::Equals,
1172 Token::Number("2".to_string()),
1173 Token::Comma,
1174 Token::QuotedStr("'ad c'".to_string()),
1175 Token::Whitespace(" ".to_string()),
1176 Token::LeftBracket,
1177 Token::Number("2".to_string()),
1178 Token::Comma,
1179 Token::Colon,
1180 Token::RightBracket,
1181 ];
1182 assert_eq!(expected, tokens);
1183 }
1184
1185 #[test]
1186 fn simple_tokens2() {
1187 assert_eq!(
1188 tokenize_str("&H TEMPERATURES(1:2)=273.15, 274")
1189 .expect("test tokenization failed")
1190 .into_iter()
1191 .map(|x| x.token)
1192 .collect::<Vec<_>>(),
1193 vec![
1194 Token::Ampersand,
1195 Token::Identifier("H".to_string()),
1196 Token::Whitespace(" ".to_string()),
1197 Token::Identifier("TEMPERATURES".to_string()),
1198 Token::LeftBracket,
1199 Token::Number("1".to_string()),
1200 Token::Colon,
1201 Token::Number("2".to_string()),
1202 Token::RightBracket,
1203 Token::Equals,
1204 Token::Number("273.15".to_string()),
1205 Token::Comma,
1206 Token::Whitespace(" ".to_string()),
1207 Token::Number("274".to_string()),
1208 ]
1209 );
1210 }
1211
1212 #[test]
1213 fn simple_tokens3() {
1214 assert_eq!(
1215 tokenize_str("&H TEMPERATURES(1:2)=273.15, \n 274")
1216 .expect("test tokenization failed")
1217 .into_iter()
1218 .map(|x| x.token)
1219 .collect::<Vec<_>>(),
1220 vec![
1221 Token::Ampersand,
1222 Token::Identifier("H".to_string()),
1223 Token::Whitespace(" ".to_string()),
1224 Token::Identifier("TEMPERATURES".to_string()),
1225 Token::LeftBracket,
1226 Token::Number("1".to_string()),
1227 Token::Colon,
1228 Token::Number("2".to_string()),
1229 Token::RightBracket,
1230 Token::Equals,
1231 Token::Number("273.15".to_string()),
1232 Token::Comma,
1233 Token::Whitespace(" \n ".to_string()),
1234 Token::Number("274".to_string()),
1235 ]
1236 );
1237 }
1238
1239 #[test]
1240 fn commented_tokens1() {
1241 let tokens: Vec<_> = tokenize_str("! hi\nTEMPERATURES(1:2)=273.15, \n 274")
1242 .expect("test tokenization failed")
1243 .into_iter()
1244 .map(|l_token| l_token.token().clone())
1245 .collect();
1246 let expected = vec![
1247 Token::Comment("! hi\n".to_string()),
1248 Token::Comment("TEMPERATURES(1:2)=273.15, \n".to_string()),
1249 Token::Comment(" 274".to_string()),
1250 ];
1251 assert_eq!(tokens, expected);
1252 }
1253 #[test]
1254 fn commented_tokens2() {
1255 let tokens: Vec<_> = tokenize_str("! hi\nTEMPERATURES(1:2)=273.15, \n 274 ! hello")
1256 .expect("test tokenization failed")
1257 .into_iter()
1258 .map(|l_token| l_token.token().clone())
1259 .collect();
1260 let expected = vec![
1261 Token::Comment("! hi\n".to_string()),
1262 Token::Comment("TEMPERATURES(1:2)=273.15, \n".to_string()),
1263 Token::Comment(" 274 ! hello".to_string()),
1264 ];
1265 assert_eq!(tokens, expected);
1266 }
1267}