namelist/
tokenizer.rs

1use std::{
2    collections::VecDeque,
3    fmt::Display,
4    io::{Cursor, Read},
5};
6use utf8::{self, BufReadDecoder, BufReadDecoderError};
7
8// TODO: Add line and column numbers
9#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
10pub struct Span {
11    pub lo: usize,
12    pub len: usize,
13    pub line: usize,
14    pub column: usize,
15}
16
17#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
18pub struct LocatedToken {
19    /// Location of the token in the original file or stream.
20    pub span: Option<Span>,
21    pub token: Token,
22}
23
24impl LocatedToken {
25    pub fn token(&self) -> &Token {
26        &self.token
27    }
28    pub fn span(&self) -> Option<Span> {
29        self.span
30    }
31}
32
33impl Display for LocatedToken {
34    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
35        write!(f, "{}", self.token)
36    }
37}
38
39#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
40pub enum Token {
41    LeftBracket,
42    RightBracket,
43    Equals,
44    Colon,
45    Comma,
46    RightSlash,
47    Ampersand,
48    NewLine,
49    /// Some variable string that forms a token. Currently this could also
50    /// include numbers.
51    QuotedStr(String),
52    Bool(String),
53    Whitespace(String),
54    Identifier(String),
55    Number(String),
56    Comment(String),
57}
58impl Token {
59    pub fn is_location_token(&self) -> bool {
60        match self {
61            Self::LeftBracket => true,
62            Self::RightBracket => true,
63            Self::Equals => false,
64            Self::Colon => true,
65            Self::Comma => true,
66            Self::RightSlash => false,
67            Self::Ampersand => false,
68            Self::NewLine => true,
69            Self::Bool(_) => false,
70            Self::QuotedStr(_) => false,
71            Self::Whitespace(_) => true,
72            Self::Identifier(_) => false,
73            Self::Number(_) => true,
74            Self::Comment(_) => true,
75        }
76    }
77
78    pub fn is_whitespace(&self) -> bool {
79        matches!(self, Self::Whitespace(_) | Self::NewLine)
80    }
81
82    pub fn is_comment(&self) -> bool {
83        matches!(self, Self::Comment(_))
84    }
85}
86
87impl Display for Token {
88    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
89        match self {
90            Self::LeftBracket => write!(f, "("),
91            Self::RightBracket => write!(f, ")"),
92            Self::Equals => write!(f, "="),
93            Self::Colon => write!(f, ":"),
94            Self::Comma => write!(f, ","),
95            Self::RightSlash => write!(f, "/"),
96            Self::Ampersand => write!(f, "&"),
97            Self::NewLine => writeln!(f),
98            Self::Bool(s) => write!(f, "{s}"),
99            Self::QuotedStr(s) => write!(f, "{s}"),
100            Self::Whitespace(s) => write!(f, "{s}"),
101            Self::Identifier(s) => write!(f, "{s}"),
102            Self::Number(s) => write!(f, "{s}"),
103            Self::Comment(s) => write!(f, "{s}"),
104        }
105    }
106}
107
108#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
109pub enum TokenizerState {
110    Start,
111    StartInNamelist,
112    InQuote { start: usize, content: String },
113    InIdentifier { start: usize, content: String },
114    InBoolOrNumber { start: usize, content: String },
115    InNumber { start: usize, content: String },
116    InBool { start: usize, content: String },
117    InWhitespace { start: usize, content: String },
118    Comment { start: usize, content: String },
119}
120
121pub struct CharDecoder<R: std::io::Read> {
122    iter: BufReadDecoder<std::io::BufReader<R>>,
123    offset: usize,
124    chars: VecDeque<(usize, char)>,
125}
126
127impl<R: Read> CharDecoder<R> {
128    pub fn new(input: R) -> Self {
129        Self {
130            iter: BufReadDecoder::new(std::io::BufReader::new(input)),
131            chars: VecDeque::new(),
132            offset: 0,
133        }
134    }
135}
136
137impl<R: Read> Iterator for CharDecoder<R> {
138    type Item = Result<(usize, char), CharDecodeError>;
139    fn next(&mut self) -> Option<Self::Item> {
140        loop {
141            if let Some(res) = self.chars.pop_front() {
142                return Some(Ok(res));
143            } else {
144                match self.iter.next_strict()? {
145                    Ok(next_string) => {
146                        let offset = self.offset;
147                        for r in next_string.char_indices().map(|(i, c)| (i + offset, c)) {
148                            self.chars.push_back(r);
149                        }
150                        self.offset += next_string.len();
151                    }
152                    Err(BufReadDecoderError::InvalidByteSequence(s)) => {
153                        return Some(Err(CharDecodeError::DecodeError(s.into())))
154                    }
155                    Err(BufReadDecoderError::Io(err)) => {
156                        return Some(Err(CharDecodeError::IoError(err)))
157                    }
158                }
159            }
160        }
161    }
162}
163
164#[derive(Debug)]
165pub enum CharDecodeError {
166    IoError(std::io::Error),
167    DecodeError(Vec<u8>),
168}
169
170impl std::fmt::Display for CharDecodeError {
171    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
172        match *self {
173            Self::IoError(_) => {
174                write!(f, "Invalid bool or number")
175            }
176            Self::DecodeError(_) => {
177                write!(f, "Invalid character")
178            }
179        }
180    }
181}
182
183impl std::error::Error for CharDecodeError {
184    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
185        match *self {
186            Self::IoError(_) => None,
187            Self::DecodeError(_) => None,
188        }
189    }
190}
191
192pub struct TokenIter<B: std::io::Read> {
193    iter: CharDecoder<std::io::BufReader<B>>,
194    buf: Option<(usize, char)>,
195    state: TokenizerState,
196    line: usize,
197    column: usize,
198}
199
200impl<R: std::io::Read> TokenIter<R> {
201    pub fn new(input: R) -> Self {
202        Self {
203            iter: CharDecoder::new(std::io::BufReader::new(input)),
204            buf: None,
205            state: TokenizerState::Start,
206            line: 0,
207            column: 0,
208        }
209    }
210    fn pos_advance_token(&mut self, token: &Token) {
211        match token {
212            Token::LeftBracket => self.pos_advance('('),
213            Token::RightBracket => self.pos_advance(')'),
214            Token::Equals => self.pos_advance('='),
215            Token::Colon => self.pos_advance(':'),
216            Token::Comma => self.pos_advance(','),
217            Token::RightSlash => self.pos_advance('/'),
218            Token::Ampersand => self.pos_advance('&'),
219            Token::NewLine => self.pos_advance('\n'),
220            Token::QuotedStr(s)
221            | Token::Bool(s)
222            | Token::Whitespace(s)
223            | Token::Identifier(s)
224            | Token::Number(s)
225            | Token::Comment(s) => {
226                for c in s.chars() {
227                    self.pos_advance(c);
228                }
229            }
230        }
231    }
232    fn pos_advance(&mut self, c: char) {
233        if c == '\n' {
234            self.column = 0;
235            self.line += 1;
236        } else {
237            self.column += 1
238        }
239    }
240}
241
242impl<R: std::io::Read> Iterator for TokenIter<R> {
243    type Item = Result<LocatedToken, TokenizerError>;
244    fn next(&mut self) -> Option<Self::Item> {
245        let token = loop {
246            match self.buf.take().map(Ok).or_else(|| self.iter.next()) {
247                Some(Ok((i, c))) => {
248                    let line = self.line;
249                    let column = self.column;
250                    match &mut self.state {
251                        TokenizerState::Start => {
252                            if c == '&' {
253                                // If a new line starts with an ampersand we should
254                                // interpret what follows as a namelist.
255                                let token = Token::Ampersand;
256                                let span = Some(Span {
257                                    lo: i,
258                                    len: 1,
259                                    column,
260                                    line,
261                                });
262                                let token = LocatedToken { span, token };
263                                self.state = TokenizerState::StartInNamelist;
264                                break Some(Ok(token));
265                            } else if c == '/' {
266                                let len = 1;
267                                let token = LocatedToken {
268                                    span: Some(Span {
269                                        lo: i,
270                                        len,
271                                        column,
272                                        line,
273                                    }),
274                                    token: Token::RightSlash,
275                                };
276                                self.state = TokenizerState::Start;
277                                break Some(Ok(token));
278                            } else if c == '\n' {
279                                let len = 1;
280                                let token = LocatedToken {
281                                    span: Some(Span {
282                                        lo: i,
283                                        len,
284                                        column,
285                                        line,
286                                    }),
287                                    token: Token::Comment(c.to_string()),
288                                };
289                                self.state = TokenizerState::Start;
290                                break Some(Ok(token));
291                            } else {
292                                // Otherwise it is just 'junk' or comment.
293                                let start = i;
294                                let mut content = String::new();
295                                content.push(c);
296                                self.state = TokenizerState::Comment { start, content };
297                            }
298                        }
299                        TokenizerState::StartInNamelist => {
300                            if c.is_whitespace() {
301                                let start = i;
302                                let mut content = String::new();
303                                content.push(c);
304                                self.state = TokenizerState::InWhitespace { start, content };
305                            } else {
306                                match c {
307                                    '\'' => {
308                                        let start = i;
309                                        let mut content = String::new();
310                                        content.push(c);
311                                        self.state = TokenizerState::InQuote { start, content };
312                                    }
313                                    '.' => {
314                                        let start = i;
315                                        let mut content = String::new();
316                                        content.push(c);
317                                        self.state =
318                                            TokenizerState::InBoolOrNumber { start, content };
319                                    }
320                                    '!' => {
321                                        let start = i;
322                                        let mut content = String::new();
323                                        content.push(c);
324                                        self.state = TokenizerState::Comment { start, content };
325                                    }
326                                    '=' => {
327                                        let token = Token::Equals;
328                                        let span = Some(Span {
329                                            lo: i,
330                                            len: 1,
331                                            column,
332                                            line,
333                                        });
334                                        let token = LocatedToken { span, token };
335                                        self.state = TokenizerState::StartInNamelist;
336                                        break Some(Ok(token));
337                                    }
338                                    '(' => {
339                                        let token = Token::LeftBracket;
340                                        let span = Some(Span {
341                                            lo: i,
342                                            len: 1,
343                                            column,
344                                            line,
345                                        });
346                                        let token = LocatedToken { span, token };
347                                        self.state = TokenizerState::StartInNamelist;
348                                        break Some(Ok(token));
349                                    }
350                                    ')' => {
351                                        let token = Token::RightBracket;
352                                        let span = Some(Span {
353                                            lo: i,
354                                            len: 1,
355                                            column,
356                                            line,
357                                        });
358                                        let token = LocatedToken { span, token };
359                                        self.state = TokenizerState::StartInNamelist;
360                                        break Some(Ok(token));
361                                    }
362                                    ':' => {
363                                        let token = Token::Colon;
364                                        let span = Some(Span {
365                                            lo: i,
366                                            len: 1,
367                                            column,
368                                            line,
369                                        });
370                                        let token = LocatedToken { span, token };
371                                        self.state = TokenizerState::StartInNamelist;
372                                        break Some(Ok(token));
373                                    }
374                                    ',' => {
375                                        let token = Token::Comma;
376                                        let span = Some(Span {
377                                            lo: i,
378                                            len: 1,
379                                            column,
380                                            line,
381                                        });
382                                        let token = LocatedToken { span, token };
383                                        self.state = TokenizerState::StartInNamelist;
384                                        break Some(Ok(token));
385                                    }
386                                    '/' => {
387                                        let token = Token::RightSlash;
388                                        let span = Some(Span {
389                                            lo: i,
390                                            len: 1,
391                                            column,
392                                            line,
393                                        });
394                                        let token = LocatedToken { span, token };
395                                        self.state = TokenizerState::Start;
396                                        break Some(Ok(token));
397                                    }
398                                    '&' => {
399                                        let token = Token::Ampersand;
400                                        let span = Some(Span {
401                                            lo: i,
402                                            len: 1,
403                                            column,
404                                            line,
405                                        });
406                                        let token = LocatedToken { span, token };
407                                        self.state = TokenizerState::StartInNamelist;
408                                        break Some(Ok(token));
409                                    }
410                                    _ => {
411                                        if c.is_alphabetic() {
412                                            let start = i;
413                                            let mut content = String::new();
414                                            content.push(c);
415                                            self.state =
416                                                TokenizerState::InIdentifier { start, content };
417                                        } else if c.is_whitespace() {
418                                            let start = i;
419                                            let mut content = String::new();
420                                            content.push(c);
421                                            self.state =
422                                                TokenizerState::InWhitespace { start, content };
423                                        } else if c.is_ascii_digit() || c == '-' {
424                                            let mut content = String::new();
425                                            content.push(c);
426                                            self.state =
427                                                TokenizerState::InNumber { start: i, content };
428                                        } else {
429                                            let start = i;
430                                            let mut content = String::new();
431                                            content.push(c);
432                                            self.state = TokenizerState::Comment { start, content };
433                                        }
434                                    }
435                                }
436                            }
437                        }
438                        TokenizerState::InQuote { start, content } => match c {
439                            '\'' => {
440                                content.push(c);
441                                let len = content.len();
442                                let value = std::mem::take(content);
443                                let token = LocatedToken {
444                                    span: Some(Span {
445                                        lo: *start,
446                                        len,
447                                        column,
448                                        line,
449                                    }),
450                                    token: Token::QuotedStr(value),
451                                };
452                                self.state = TokenizerState::StartInNamelist;
453                                break Some(Ok(token));
454                            }
455                            _ => {
456                                content.push(c);
457                            }
458                        },
459                        TokenizerState::InBoolOrNumber { start, content } => {
460                            if c.is_ascii_digit() {
461                                content.push(c);
462                                let value = std::mem::take(content);
463                                self.state = TokenizerState::InNumber {
464                                    start: *start,
465                                    content: value,
466                                };
467                            } else {
468                                match c {
469                                    'T' | 't' | 'F' | 'f' => {
470                                        content.push(c);
471                                        let value = std::mem::take(content);
472                                        self.state = TokenizerState::InBool {
473                                            start: *start,
474                                            content: value,
475                                        };
476                                    }
477                                    _ => {
478                                        content.push(c);
479                                        return Some(Err(TokenizerError::InvalidBoolOrNumber(
480                                            Span {
481                                                lo: *start,
482                                                len: content.len(),
483                                                line,
484                                                column,
485                                            },
486                                        )));
487                                    }
488                                }
489                            }
490                        }
491                        TokenizerState::InBool { start, content } => match c {
492                            '.' => {
493                                content.push(c);
494                                let len = content.len();
495                                let value = std::mem::take(content);
496                                let token = LocatedToken {
497                                    span: Some(Span {
498                                        lo: *start,
499                                        len,
500                                        column,
501                                        line,
502                                    }),
503                                    token: Token::Bool(value),
504                                };
505                                self.state = TokenizerState::StartInNamelist;
506                                break Some(Ok(token));
507                            }
508                            _ => {
509                                content.push(c);
510                            }
511                        },
512                        TokenizerState::Comment { start, content } => match c {
513                            '\n' => {
514                                // If we come to a new line while processing
515                                // comments, we revert to start. Even an ampersand
516                                // does not break us out of a comment.
517                                content.push(c);
518                                let len = content.len();
519                                let value = std::mem::take(content);
520                                let token = LocatedToken {
521                                    span: Some(Span {
522                                        lo: *start,
523                                        len,
524                                        column,
525                                        line,
526                                    }),
527                                    token: Token::Comment(value),
528                                };
529                                self.state = TokenizerState::Start;
530                                break Some(Ok(token));
531                            }
532                            _ => {
533                                content.push(c);
534                            }
535                        },
536                        TokenizerState::InWhitespace { start, content } => {
537                            if c.is_whitespace() {
538                                content.push(c);
539                            } else {
540                                let len = content.len();
541                                let value = std::mem::take(content);
542                                let token = LocatedToken {
543                                    span: Some(Span {
544                                        lo: *start,
545                                        len,
546                                        column,
547                                        line,
548                                    }),
549                                    token: Token::Whitespace(value),
550                                };
551                                match c {
552                                    '\'' => {
553                                        let start = i;
554                                        let mut content = String::new();
555                                        content.push(c);
556                                        self.state = TokenizerState::InQuote { start, content };
557                                    }
558                                    '=' | '(' | ')' | ':' | ',' | '/' | '&' => {
559                                        self.buf.replace((i, c));
560                                        self.state = TokenizerState::StartInNamelist;
561                                    }
562                                    _ => {
563                                        if c.is_alphabetic() {
564                                            let start = i;
565                                            let mut content = String::new();
566                                            content.push(c);
567                                            self.state =
568                                                TokenizerState::InIdentifier { start, content };
569                                        } else if c.is_whitespace() {
570                                            let start = i;
571                                            let mut content = String::new();
572                                            content.push(c);
573                                            self.state =
574                                                TokenizerState::InWhitespace { start, content };
575                                        } else if c == '.' {
576                                            let mut content = String::new();
577                                            content.push(c);
578                                            self.state = TokenizerState::InBoolOrNumber {
579                                                start: i,
580                                                content,
581                                            };
582                                        } else if c.is_ascii_digit()
583                                            || c == 'e'
584                                            || c == 'E'
585                                            || c == '-'
586                                            || c == '+'
587                                        {
588                                            let mut content = String::new();
589                                            content.push(c);
590                                            self.state =
591                                                TokenizerState::InNumber { start: i, content };
592                                        } else if c == '!' {
593                                            let start = i;
594                                            let mut content = String::new();
595                                            content.push(c);
596                                            self.state = TokenizerState::Comment { start, content };
597                                        } else {
598                                            return Some(Err(TokenizerError::InvalidCharacter(
599                                                Span {
600                                                    lo: *start,
601                                                    len: content.len(),
602                                                    line,
603                                                    column,
604                                                },
605                                            )));
606                                        }
607                                    }
608                                }
609                                break Some(Ok(token));
610                            }
611                        }
612                        TokenizerState::InIdentifier { start, content } => {
613                            if c.is_alphanumeric() || c == '_' {
614                                content.push(c);
615                            } else {
616                                let len = content.len();
617                                let value = std::mem::take(content);
618                                let span = Some(Span {
619                                    lo: *start,
620                                    len,
621                                    column,
622                                    line,
623                                });
624                                let token = Token::Identifier(value);
625                                self.buf.replace((i, c));
626                                self.state = TokenizerState::StartInNamelist;
627                                let token = LocatedToken { span, token };
628                                break Some(Ok(token));
629                            }
630                        }
631                        TokenizerState::InNumber { start, content } => {
632                            if c.is_ascii_digit()
633                                || c == '.'
634                                || c == 'e'
635                                || c == 'E'
636                                || c == '-'
637                                || c == '+'
638                            {
639                                content.push(c);
640                            } else {
641                                let len = content.len();
642                                let value = std::mem::take(content);
643                                let token = LocatedToken {
644                                    span: Some(Span {
645                                        lo: *start,
646                                        len,
647                                        column,
648                                        line,
649                                    }),
650                                    token: Token::Number(value),
651                                };
652                                match c {
653                                    '\'' => {
654                                        let start = i;
655                                        let mut content = String::new();
656                                        content.push(c);
657                                        self.state = TokenizerState::InQuote { start, content };
658                                    }
659                                    '=' | '(' | ')' | ':' | ',' | '&' | '!' => {
660                                        self.buf.replace((i, c));
661                                        self.state = TokenizerState::StartInNamelist;
662                                    }
663                                    '/' => {
664                                        self.buf.replace((i, c));
665                                        self.state = TokenizerState::Start;
666                                    }
667                                    _ => {
668                                        if c.is_alphabetic() {
669                                            let start = i;
670                                            let mut content = String::new();
671                                            content.push(c);
672                                            self.state =
673                                                TokenizerState::InIdentifier { start, content };
674                                        } else if c.is_whitespace() {
675                                            let start = i;
676                                            let mut content = String::new();
677                                            content.push(c);
678                                            self.state =
679                                                TokenizerState::InWhitespace { start, content };
680                                        } else {
681                                            return Some(Err(TokenizerError::InvalidCharacter(
682                                                Span {
683                                                    lo: *start,
684                                                    len: content.len(),
685                                                    line,
686                                                    column,
687                                                },
688                                            )));
689                                        }
690                                    }
691                                }
692                                break Some(Ok(token));
693                            }
694                        }
695                    }
696                }
697                Some(Err(err)) => {
698                    let line = self.line;
699                    let column = self.column;
700                    return Some(Err(TokenizerError::CharError(
701                        Span {
702                            lo: 0,
703                            len: 0,
704                            line,
705                            column,
706                        },
707                        err,
708                    )));
709                }
710                None => {
711                    let line = self.line;
712                    let column = self.column;
713                    // We have reached EOF
714                    match &mut self.state {
715                        TokenizerState::Start | TokenizerState::StartInNamelist => {
716                            break None;
717                        }
718                        TokenizerState::InQuote { start, content } => {
719                            return Some(Err(TokenizerError::UnclosedQuote(Span {
720                                lo: *start,
721                                len: content.len(),
722                                line,
723                                column,
724                            })));
725                        }
726                        TokenizerState::InBool { start, content } => {
727                            return Some(Err(TokenizerError::UnfinishedBool(Span {
728                                lo: *start,
729                                len: content.len(),
730                                line,
731                                column,
732                            })));
733                        }
734                        TokenizerState::InBoolOrNumber { start, content } => {
735                            return Some(Err(TokenizerError::UnfinishedBoolOrNumber(Span {
736                                lo: *start,
737                                len: content.len(),
738                                line,
739                                column,
740                            })));
741                        }
742                        TokenizerState::InWhitespace { start, content } => {
743                            let len = content.len();
744                            let value = std::mem::take(content);
745                            let token = Token::Whitespace(value);
746                            let span = Some(Span {
747                                lo: *start,
748                                len,
749                                column,
750                                line,
751                            });
752                            self.state = TokenizerState::StartInNamelist;
753                            let token = LocatedToken { span, token };
754                            break Some(Ok(token));
755                        }
756                        TokenizerState::Comment { start, content } => {
757                            let len = content.len();
758                            let value = std::mem::take(content);
759                            let token = Token::Comment(value);
760                            let span = Some(Span {
761                                lo: *start,
762                                len,
763                                column,
764                                line,
765                            });
766                            self.state = TokenizerState::Start;
767                            let token = LocatedToken { span, token };
768                            break Some(Ok(token));
769                        }
770                        TokenizerState::InIdentifier { start, content } => {
771                            let len = content.len();
772                            let value = std::mem::take(content);
773                            let token = Token::Identifier(value);
774                            let span = Some(Span {
775                                lo: *start,
776                                len,
777                                column,
778                                line,
779                            });
780                            self.state = TokenizerState::StartInNamelist;
781                            let token = LocatedToken { span, token };
782                            break Some(Ok(token));
783                        }
784                        TokenizerState::InNumber { start, content } => {
785                            let len = content.len();
786                            let value = std::mem::take(content);
787                            let token = Token::Number(value);
788                            let span = Some(Span {
789                                lo: *start,
790                                len,
791                                column,
792                                line,
793                            });
794                            self.state = TokenizerState::StartInNamelist;
795                            let token = LocatedToken { span, token };
796                            break Some(Ok(token));
797                        }
798                    }
799                }
800            }
801        };
802        if let Some(Ok(ref token)) = token {
803            self.pos_advance_token(&token.token);
804        }
805        token
806    }
807}
808
809pub fn tokenize_reader<R: Read>(input: R) -> Result<Vec<LocatedToken>, TokenizerError> {
810    let mut tokens = vec![];
811    for token in TokenIter::new(input) {
812        tokens.push(token?);
813    }
814    Ok(tokens)
815}
816
817pub fn tokenize_str(input: &str) -> Result<Vec<LocatedToken>, TokenizerError> {
818    let input = Cursor::new(input);
819    let mut tokens = vec![];
820    for token in TokenIter::new(input) {
821        tokens.push(token?);
822    }
823    Ok(tokens)
824}
825
826#[derive(Debug)]
827pub enum NmlParseError {
828    Tokenize(TokenizerError),
829    InvalidParameterName(Option<Span>),
830    NoAmpersand(Option<Span>),
831    InvalidGroupName(Option<Span>),
832    NoEquals(Option<Span>),
833    NoTokens,
834    // Parse(Option<Span>, String),
835    // Io(std::io::Error),
836}
837
838impl NmlParseError {
839    pub fn span(&self) -> Option<Span> {
840        match self {
841            Self::Tokenize(err) => Some(err.span()),
842            Self::InvalidParameterName(span) => *span,
843            Self::NoAmpersand(span) => *span,
844            Self::InvalidGroupName(span) => *span,
845            Self::NoEquals(span) => *span,
846            Self::NoTokens => None,
847            // Self::Parse(span, _) => *span,
848            // Self::Io(_) => None,
849        }
850    }
851}
852
853impl std::fmt::Display for NmlParseError {
854    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
855        match self {
856            Self::Tokenize(err) => {
857                write!(f, "{err}")
858            }
859            Self::InvalidParameterName(_) => {
860                write!(f, "invalid parameter name")
861            }
862            Self::NoAmpersand(_) => {
863                write!(f, "no ampersand at the beginning of namelist")
864            }
865            Self::InvalidGroupName(_) => {
866                write!(f, "no ampersand at the beginning of namelist")
867            }
868            Self::NoEquals(_) => {
869                write!(f, "no equals succeeding the parameter name")
870            }
871            Self::NoTokens => {
872                write!(f, "namelist ended early with insufficient tokens")
873            } // Self::Parse(_, err) => {
874              //     write!(f, "{err}")
875              // }
876              // Self::Io(err) => {
877              //     write!(f, "{err}")
878              // }
879        }
880    }
881}
882
883impl std::error::Error for NmlParseError {
884    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
885        match self {
886            Self::Tokenize(err) => Some(err),
887            Self::InvalidParameterName(_) => None,
888            Self::NoAmpersand(_) => None,
889            Self::InvalidGroupName(_) => None,
890            Self::NoEquals(_) => None,
891            Self::NoTokens => None,
892            // Self::Parse(_, _) => None,
893            // Self::Io(err) => Some(err),
894        }
895    }
896}
897
898#[derive(Debug)]
899pub enum TokenizerError {
900    InvalidBoolOrNumber(Span),
901    InvalidCharacter(Span),
902    // InvalidParameterName(Span),
903    UnfinishedBool(Span),
904    UnfinishedBoolOrNumber(Span),
905    UnclosedQuote(Span),
906    CharError(Span, CharDecodeError),
907}
908
909impl TokenizerError {
910    pub fn span(&self) -> Span {
911        match self {
912            Self::InvalidBoolOrNumber(span) => *span,
913            Self::InvalidCharacter(span) => *span,
914            Self::UnfinishedBool(span) => *span,
915            Self::UnfinishedBoolOrNumber(span) => *span,
916            Self::UnclosedQuote(span) => *span,
917            Self::CharError(span, _) => *span,
918        }
919    }
920}
921
922impl std::fmt::Display for TokenizerError {
923    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
924        match self {
925            Self::InvalidBoolOrNumber(_) => {
926                write!(f, "Invalid bool or number")
927            }
928            Self::InvalidCharacter(_) => {
929                write!(f, "Invalid character")
930            }
931            Self::UnfinishedBool(_) => {
932                write!(f, "Unfinished bool")
933            }
934            Self::UnfinishedBoolOrNumber(_) => {
935                write!(f, "Unfinished booll or number")
936            }
937            Self::UnclosedQuote(_) => {
938                write!(f, "Unclosed quote")
939            }
940            Self::CharError(_, err) => {
941                write!(f, "UTF-8 decode error: {err}")
942            }
943        }
944    }
945}
946
947impl std::error::Error for TokenizerError {
948    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
949        match *self {
950            Self::InvalidBoolOrNumber(_) => None,
951            Self::InvalidCharacter(_) => None,
952            Self::UnfinishedBool(_) => None,
953            Self::UnfinishedBoolOrNumber(_) => None,
954            Self::UnclosedQuote(_) => None,
955            Self::CharError(_, _) => None,
956        }
957    }
958}
959
960#[cfg(test)]
961mod tests {
962    use super::*;
963
964    #[test]
965    fn trivial_tokens0() {
966        let s = "abc=2";
967        let tokens = tokenize_str(s).expect("test tokenization failed");
968        assert_eq!(
969            vec![LocatedToken {
970                span: Some(Span {
971                    lo: 0,
972                    len: 5,
973                    column: 0,
974                    line: 0
975                }),
976                token: Token::Comment("abc=2".to_string()),
977            },],
978            tokens
979        );
980    }
981
982    #[test]
983    fn trivial_tokens1() {
984        let s = "&H abc=2";
985        let tokens: Vec<_> = tokenize_str(s)
986            .expect("test tokenization failed")
987            .into_iter()
988            .map(|x| x.token)
989            .collect();
990        assert_eq!(
991            vec![
992                Token::Ampersand,
993                Token::Identifier("H".to_string()),
994                Token::Whitespace(" ".to_string()),
995                Token::Identifier("abc".to_string()),
996                Token::Equals,
997                Token::Number("2".to_string()),
998            ],
999            tokens
1000        );
1001    }
1002
1003    #[test]
1004    fn trivial_tokens2() {
1005        let s = "&H abc= 2";
1006        let tokens: Vec<_> = tokenize_str(s)
1007            .expect("test tokenization failed")
1008            .into_iter()
1009            .map(|x| x.token)
1010            .collect();
1011        assert_eq!(
1012            vec![
1013                Token::Ampersand,
1014                Token::Identifier("H".to_string()),
1015                Token::Whitespace(" ".to_string()),
1016                Token::Identifier("abc".to_string()),
1017                Token::Equals,
1018                Token::Whitespace(" ".to_string()),
1019                Token::Number("2".to_string()),
1020            ],
1021            tokens
1022        );
1023    }
1024
1025    #[test]
1026    fn trivial_tokens3() {
1027        assert_eq!(
1028            tokenize_str("&H )=2")
1029                .expect("test tokenization failed")
1030                .into_iter()
1031                .map(|x| x.token)
1032                .collect::<Vec<_>>(),
1033            vec![
1034                Token::Ampersand,
1035                Token::Identifier("H".to_string()),
1036                Token::Whitespace(" ".to_string()),
1037                Token::RightBracket,
1038                Token::Equals,
1039                Token::Number("2".to_string()),
1040            ]
1041        );
1042    }
1043
1044    #[test]
1045    fn trivial_tokens4() {
1046        let s = "&abc=2/";
1047        let tokens: Vec<_> = tokenize_str(s)
1048            .expect("test tokenization failed")
1049            .into_iter()
1050            .map(|x| x.token)
1051            .collect();
1052        assert_eq!(
1053            vec![
1054                Token::Ampersand,
1055                Token::Identifier("abc".to_string()),
1056                Token::Equals,
1057                Token::Number("2".to_string()),
1058                Token::RightSlash,
1059            ],
1060            tokens
1061        );
1062    }
1063
1064    #[test]
1065    fn trivial_tokens5() {
1066        let s = "&abc=.2/";
1067        let tokens: Vec<_> = tokenize_str(s)
1068            .expect("test tokenization failed")
1069            .into_iter()
1070            .map(|x| x.token)
1071            .collect();
1072        assert_eq!(
1073            vec![
1074                Token::Ampersand,
1075                Token::Identifier("abc".to_string()),
1076                Token::Equals,
1077                Token::Number(".2".to_string()),
1078                Token::RightSlash,
1079            ],
1080            tokens
1081        );
1082    }
1083
1084    #[test]
1085    fn trivial_tokens6() {
1086        let s = "&abc=2./";
1087        let tokens: Vec<_> = tokenize_str(s)
1088            .expect("test tokenization failed")
1089            .into_iter()
1090            .map(|x| x.token)
1091            .collect();
1092        assert_eq!(
1093            vec![
1094                Token::Ampersand,
1095                Token::Identifier("abc".to_string()),
1096                Token::Equals,
1097                Token::Number("2.".to_string()),
1098                Token::RightSlash,
1099            ],
1100            tokens
1101        );
1102    }
1103    #[test]
1104    fn trivial_tokens7() {
1105        let s = "&abc=2.\n/";
1106        let tokens: Vec<_> = tokenize_str(s)
1107            .expect("test tokenization failed")
1108            .into_iter()
1109            .map(|x| x.token)
1110            .collect();
1111        assert_eq!(
1112            vec![
1113                Token::Ampersand,
1114                Token::Identifier("abc".to_string()),
1115                Token::Equals,
1116                Token::Number("2.".to_string()),
1117                Token::Whitespace("\n".to_string()),
1118                Token::RightSlash,
1119            ],
1120            tokens
1121        );
1122    }
1123    #[test]
1124    fn trivial_tokens8() {
1125        let s = "&abc=2.\r\n/";
1126        let tokens: Vec<_> = tokenize_str(s)
1127            .expect("test tokenization failed")
1128            .into_iter()
1129            .map(|x| x.token)
1130            .collect();
1131        assert_eq!(
1132            vec![
1133                Token::Ampersand,
1134                Token::Identifier("abc".to_string()),
1135                Token::Equals,
1136                Token::Number("2.".to_string()),
1137                Token::Whitespace("\r\n".to_string()),
1138                Token::RightSlash,
1139            ],
1140            tokens
1141        );
1142    }
1143
1144    #[test]
1145    fn bad_tokens1() {
1146        let res = tokenize_str("&H abc=.TR");
1147        assert!(res.is_err());
1148        if let Err(TokenizerError::UnfinishedBool(span)) = res {
1149            assert_eq!(span.lo, 7);
1150            assert_eq!(span.len, 3);
1151            assert_eq!(span.line, 0);
1152            assert_eq!(span.column, 7);
1153        } else {
1154            panic!("Incorrect error type {:?}", res);
1155        }
1156    }
1157
1158    #[test]
1159    fn simple_tokens1() {
1160        let s = "&H abc=2,'ad c' (2,:)";
1161        let tokens: Vec<_> = tokenize_str(s)
1162            .expect("test tokenization failed")
1163            .into_iter()
1164            .map(|x| x.token)
1165            .collect();
1166        let expected = vec![
1167            Token::Ampersand,
1168            Token::Identifier("H".to_string()),
1169            Token::Whitespace(" ".to_string()),
1170            Token::Identifier("abc".to_string()),
1171            Token::Equals,
1172            Token::Number("2".to_string()),
1173            Token::Comma,
1174            Token::QuotedStr("'ad c'".to_string()),
1175            Token::Whitespace(" ".to_string()),
1176            Token::LeftBracket,
1177            Token::Number("2".to_string()),
1178            Token::Comma,
1179            Token::Colon,
1180            Token::RightBracket,
1181        ];
1182        assert_eq!(expected, tokens);
1183    }
1184
1185    #[test]
1186    fn simple_tokens2() {
1187        assert_eq!(
1188            tokenize_str("&H TEMPERATURES(1:2)=273.15, 274")
1189                .expect("test tokenization failed")
1190                .into_iter()
1191                .map(|x| x.token)
1192                .collect::<Vec<_>>(),
1193            vec![
1194                Token::Ampersand,
1195                Token::Identifier("H".to_string()),
1196                Token::Whitespace(" ".to_string()),
1197                Token::Identifier("TEMPERATURES".to_string()),
1198                Token::LeftBracket,
1199                Token::Number("1".to_string()),
1200                Token::Colon,
1201                Token::Number("2".to_string()),
1202                Token::RightBracket,
1203                Token::Equals,
1204                Token::Number("273.15".to_string()),
1205                Token::Comma,
1206                Token::Whitespace(" ".to_string()),
1207                Token::Number("274".to_string()),
1208            ]
1209        );
1210    }
1211
1212    #[test]
1213    fn simple_tokens3() {
1214        assert_eq!(
1215            tokenize_str("&H TEMPERATURES(1:2)=273.15, \n 274")
1216                .expect("test tokenization failed")
1217                .into_iter()
1218                .map(|x| x.token)
1219                .collect::<Vec<_>>(),
1220            vec![
1221                Token::Ampersand,
1222                Token::Identifier("H".to_string()),
1223                Token::Whitespace(" ".to_string()),
1224                Token::Identifier("TEMPERATURES".to_string()),
1225                Token::LeftBracket,
1226                Token::Number("1".to_string()),
1227                Token::Colon,
1228                Token::Number("2".to_string()),
1229                Token::RightBracket,
1230                Token::Equals,
1231                Token::Number("273.15".to_string()),
1232                Token::Comma,
1233                Token::Whitespace(" \n ".to_string()),
1234                Token::Number("274".to_string()),
1235            ]
1236        );
1237    }
1238
1239    #[test]
1240    fn commented_tokens1() {
1241        let tokens: Vec<_> = tokenize_str("! hi\nTEMPERATURES(1:2)=273.15, \n 274")
1242            .expect("test tokenization failed")
1243            .into_iter()
1244            .map(|l_token| l_token.token().clone())
1245            .collect();
1246        let expected = vec![
1247            Token::Comment("! hi\n".to_string()),
1248            Token::Comment("TEMPERATURES(1:2)=273.15, \n".to_string()),
1249            Token::Comment(" 274".to_string()),
1250        ];
1251        assert_eq!(tokens, expected);
1252    }
1253    #[test]
1254    fn commented_tokens2() {
1255        let tokens: Vec<_> = tokenize_str("! hi\nTEMPERATURES(1:2)=273.15, \n 274 ! hello")
1256            .expect("test tokenization failed")
1257            .into_iter()
1258            .map(|l_token| l_token.token().clone())
1259            .collect();
1260        let expected = vec![
1261            Token::Comment("! hi\n".to_string()),
1262            Token::Comment("TEMPERATURES(1:2)=273.15, \n".to_string()),
1263            Token::Comment(" 274 ! hello".to_string()),
1264        ];
1265        assert_eq!(tokens, expected);
1266    }
1267}