kconfig_parser/lex/
lexer.rs

1/*
2 Cargo KConfig - KConfig parser
3 Copyright (C) 2022  Sjoerd van Leent
4
5--------------------------------------------------------------------------------
6
7Copyright Notice: Apache
8
9Licensed under the Apache License, Version 2.0 (the "License"); you may not use
10this file except in compliance with the License. You may obtain a copy of the
11License at
12
13   https://www.apache.org/licenses/LICENSE-2.0
14
15Unless required by applicable law or agreed to in writing, software distributed
16under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
17CONDITIONS OF ANY KIND, either express or implied. See the License for the
18specific language governing permissions and limitations under the License.
19
20--------------------------------------------------------------------------------
21
22Copyright Notice: GPLv2
23
24This program is free software: you can redistribute it and/or modify
25it under the terms of the GNU General Public License as published by
26the Free Software Foundation, either version 2 of the License, or
27(at your option) any later version.
28
29This program is distributed in the hope that it will be useful,
30but WITHOUT ANY WARRANTY; without even the implied warranty of
31MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
32GNU General Public License for more details.
33
34You should have received a copy of the GNU General Public License
35along with this program.  If not, see <https://www.gnu.org/licenses/>.
36
37--------------------------------------------------------------------------------
38
39Copyright Notice: MIT
40
41Permission is hereby granted, free of charge, to any person obtaining a copy of
42this software and associated documentation files (the “Software”), to deal in
43the Software without restriction, including without limitation the rights to
44use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
45the Software, and to permit persons to whom the Software is furnished to do so,
46subject to the following conditions:
47
48The above copyright notice and this permission notice shall be included in all
49copies or substantial portions of the Software.
50
51THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
52IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
53FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
54COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
55IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
56CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
57*/
58
59//! This file contains the lexer and the logic of the lexer, implementing
60//! the lexer machine.
61
62use std::io::{BufRead, BufReader, Read};
63
64use crate::lex::structs::EqualityOperator;
65
66use super::{
67    structs::{Keyword, Lexicon, Token},
68    LexerBase,
69};
70
71fn maybe_keyword(s: &str) -> Option<Keyword> {
72    match s {
73        "source" => Some(Keyword::Source),
74        "mainmenu" => Some(Keyword::Mainmenu),
75        "config" => Some(Keyword::Config),
76        "menuconfig" => Some(Keyword::Menuconfig),
77        "choice" => Some(Keyword::Choice),
78        "endchoice" => Some(Keyword::Endchoice),
79        "menu" => Some(Keyword::Menu),
80        "endmenu" => Some(Keyword::Endmenu),
81        "if" => Some(Keyword::If),
82        "endif" => Some(Keyword::Endif),
83        "bool" => Some(Keyword::Bool),
84        "def_bool" => Some(Keyword::DefBool),
85        "tristate" => Some(Keyword::Tristate),
86        "def_tristate" => Some(Keyword::DefTristate),
87        "string" => Some(Keyword::String),
88        "hex" => Some(Keyword::Hex),
89        "int" => Some(Keyword::Int),
90        "default" => Some(Keyword::Default),
91        "depends" => Some(Keyword::Depends),
92        "on" => Some(Keyword::On),
93        "select" => Some(Keyword::Select),
94        "imply" => Some(Keyword::Imply),
95        "visible" => Some(Keyword::Visible),
96        "range" => Some(Keyword::Range),
97        "prompt" => Some(Keyword::Prompt),
98        "comment" => Some(Keyword::Comment),
99        _ => None,
100    }
101}
102
103/// Trims help lines, removes whitespace which is in front and/or back
104/// of any given help line, and concatenates the lines with a new line
105/// symbol. Then returns the complete set of lines, trimmed at the
106/// front of the complete text, keeping white lines within the middle
107/// of the text (used for paragraph separation.)
108fn trim_help(s: &str) -> String {
109    let mut result = String::new();
110    for line in s.lines() {
111        let trimmed = line.trim();
112        result.push_str(trimmed);
113        result.push('\n');
114    }
115    result.trim().to_string()
116}
117
118/// Strips the last character from the given string, and returns
119/// a new String without that character.
120fn rstrip(s: &str) -> String {
121    s[..s.len() - 1].to_string()
122}
123
124/// This structure holds the data of the actual lexer. It is expected
125/// that this structure can be mutated by the `next_token` function,
126/// which advances to the next token, and returns the term found by
127/// the lexer.
128pub struct Lexer<T>
129where
130    T: Read,
131{
132    reader: BufReader<T>,
133    reading_line: Option<String>,
134    back: Option<char>,
135    open_macro: usize,
136    col: usize,
137    line: usize,
138}
139
140/// The lexer implementation allows to read the tokens from a given
141/// type implementing the Read trait. If an error occurs while reading,
142/// or EOT has been reached, the `next_token` function will return
143/// Error or EOT as its term.
144impl<T: Read> Lexer<T> {
145    /// Creates a new lexer from the given input, where input
146    /// supports the Read trait.
147    pub fn create(input: T) -> Self {
148        return Self {
149            reader: BufReader::new(input),
150            reading_line: None,
151            back: None,
152            open_macro: 0,
153            col: 0,
154            line: 1,
155        };
156    }
157
158    /// Finds the next character of the input, if available. If
159    /// available, the characters is returned as Some Option in
160    /// a Result. If the characters are depleted, then None will
161    /// be returned in a Result. If an I/O Error occurred, it will
162    /// be returned in the Result.
163    fn next_char(&mut self) -> std::io::Result<Option<char>> {
164        // If a character has been pushed back (can only occur once),
165        // return that character.
166        if let Some(c) = self.back {
167            self.back = None;
168            return Ok(Some(c));
169        }
170
171        if self.reading_line == None {
172            let mut new_line: String = String::new();
173            match self.reader.read_line(&mut new_line)? {
174                0 => (),
175                _ => self.reading_line = Some(new_line),
176            }
177        }
178
179        // If it is still None, then nothing has been read,
180        // this must be the end.
181        if let Some(s) = &self.reading_line {
182            // There will be a next character
183            let c = s.chars().nth(0).unwrap();
184            let new_string = s[1..].to_string();
185            match new_string.len() > 0 {
186                true => self.reading_line = Some(new_string),
187                false => self.reading_line = None,
188            }
189            Ok(Some(c))
190        } else {
191            Ok(None)
192        }
193    }
194
195    /// Pushes the last character back into the Lexer as this character
196    /// is not understood by the current token, but might be valid for
197    /// the next token. Typically, this will occur at the end of parsing
198    /// the current token.
199    fn push_back(&mut self, c: char) {
200        self.back = Some(c);
201        if self.col > 0 {
202            self.col -= 1;
203        }
204
205        if c == '\n' {
206            self.line -= 1;
207        }
208    }
209}
210
211impl<T: Read> LexerBase for Lexer<T> {
212    /// Finds the next token in a mutable self. If the next token
213    /// can be found, returns it into Token. If the file or stream
214    /// of data is found, the special term EOT is returned. If an
215    /// error occurred of some sort, the special term Error is
216    /// returned.
217    fn next_token(&mut self) -> Token {
218        // If whitespace is found before a token, swallow it
219
220        enum Machine {
221            Start,
222            MacroStart,
223            Ident,
224            Immediate,
225            Append,
226            String,
227            Lt,
228            Gt,
229            Eq,
230            Ne,
231            And,
232            Or,
233
234            // Help is weird, the way it needs to be parsed is rather destructive
235            Help,
236            ProperHelp,
237            // Currently running inside a macro
238            Macro,
239            MacroIdent,
240
241            // Deals with a comment line
242            MacroComment,
243            Comment,
244        }
245
246        // Set to true if an escape symbol has been found, and the next symbol
247        // should be interpreted as a char (where available).
248        let mut escape = false;
249
250        // When parsing a help section, is set to the indent of the current help
251        // token. Each line with a higher indent is interpreted as help text,
252        // otherwise it will revert to 'regular' tokenization.
253        let mut exp_help_indent = 0;
254
255        // The current text being parsed, which can be part of the resulting
256        // token.
257        let mut cs = String::default();
258
259        // The current column number
260        let mut column = self.col;
261
262        // The current line number
263        let mut line = self.line;
264
265        // When a comment has been opened, this is set and alters the way the
266        // parsing works until the comment is closed (typically by a newline symbol)
267        let mut open_comment = false;
268
269        // When the previous token either opened or remained in macro mode, the machine
270        // is set to Macro, otherwise to Start. Note that because macro mode can be
271        // nested, this is reflected in the open_macro numeric counter.
272        let mut m = match self.open_macro > 0 {
273            false => Machine::Start,
274            true => Machine::Macro,
275        };
276
277        // Loops and 'eats' every single character until a token has been
278        // properly read, EOT is reached or an Error is returned.
279        loop {
280            let result = self.next_char();
281            match result {
282                Ok(maybe) => match maybe {
283                    Some(c) => {
284                        self.col += 1;
285                        if c == '\n' {
286                            self.line += 1;
287                            self.col = 0;
288                        } else if c == '\r' {
289                            self.col -= 1;
290                            continue;
291                        }
292
293                        if (c != '#' || escape) && !open_comment {
294                            cs.push(c)
295                        }
296                        match m {
297                            // If in open_macro mode, there are only identifiers,
298                            // unless a comma is specified, another macro start is
299                            // identified or a close is identified.
300                            Machine::Macro => match c {
301                                ',' => return Token::create(Lexicon::Comma, column, line, &cs),
302                                ')' => {
303                                    self.open_macro -= 1;
304                                    return Token::create(Lexicon::Close, column, line, &cs);
305                                }
306                                '$' => m = Machine::MacroStart,
307                                '#' => {
308                                    m = Machine::MacroComment;
309                                    open_comment = true;
310                                }
311                                '\\' => {
312                                    escape = true;
313                                    m = Machine::MacroIdent;
314                                    cs = String::new();
315                                }
316                                '(' => {
317                                    return Token::create(
318                                        Lexicon::Error(cs.clone()),
319                                        column,
320                                        line,
321                                        &cs,
322                                    );
323                                }
324                                _ => m = Machine::MacroIdent,
325                            },
326                            Machine::MacroIdent => match c {
327                                '(' => {
328                                    if !escape {
329                                        return Token::create(
330                                            Lexicon::Error(cs.clone()),
331                                            column,
332                                            line,
333                                            &cs,
334                                        );
335                                    }
336                                    escape = false;
337                                }
338                                '$' | ',' | ')' => {
339                                    if !escape {
340                                        self.push_back(c);
341                                        let s = rstrip(&cs);
342                                        return Token::create(
343                                            Lexicon::Identifier(s),
344                                            column,
345                                            line,
346                                            &cs,
347                                        );
348                                    }
349                                    escape = false;
350                                }
351                                '\\' => {
352                                    escape = !escape;
353                                    if escape {
354                                        cs = cs[..cs.len() - 1].to_string();
355                                    }
356                                }
357                                '#' => {
358                                    if !escape {
359                                        self.push_back(c);
360                                        open_comment = true;
361                                        m = Machine::MacroComment;
362                                    }
363                                    escape = false;
364                                }
365                                _ => escape = false,
366                            },
367                            Machine::MacroComment => match c {
368                                '\n' => {
369                                    m = if cs.len() == 0 {
370                                        Machine::Macro
371                                    } else {
372                                        Machine::MacroIdent
373                                    };
374                                    open_comment = false;
375                                }
376                                _ => (),
377                            },
378                            Machine::Comment => match c {
379                                '\n' => {
380                                    cs = String::new();
381                                    m = Machine::Start;
382                                    line = self.line;
383                                    column = self.col;
384                                    open_comment = false;
385                                }
386                                _ => (),
387                            },
388                            Machine::Start => match c {
389                                // Skip control characters, they don't make sense
390                                '\u{0}'..='\u{0D}' | ' ' => {
391                                    cs = String::new();
392                                    line = self.line;
393                                    column = self.col;
394                                }
395                                // If the token is a comma, token data should be returned likewise
396                                '$' => m = Machine::MacroStart,
397                                '(' => return Token::create(Lexicon::Open, column, line, &cs),
398                                ')' => return Token::create(Lexicon::Close, column, line, &cs),
399                                ':' => m = Machine::Immediate,
400                                '+' => m = Machine::Append,
401                                'a'..='z' | 'A'..='Z' | '0'..='9' | '-' => m = Machine::Ident,
402                                '#' => {
403                                    open_comment = true;
404                                    m = Machine::Comment;
405                                }
406                                '"' => m = Machine::String,
407                                '<' => m = Machine::Lt,
408                                '>' => m = Machine::Gt,
409                                '=' => m = Machine::Eq,
410                                '!' => m = Machine::Ne,
411                                '&' => m = Machine::And,
412                                '|' => m = Machine::Or,
413                                _ => return Token::create_error(column, line, &cs),
414                            },
415                            Machine::Lt => match c {
416                                '=' => {
417                                    return Token::create(
418                                        Lexicon::EqualityOperator(EqualityOperator::Lte),
419                                        column,
420                                        line,
421                                        &cs,
422                                    )
423                                }
424                                _ => {
425                                    self.push_back(c);
426                                    return Token::create(
427                                        Lexicon::EqualityOperator(EqualityOperator::Lt),
428                                        column,
429                                        line,
430                                        &cs,
431                                    );
432                                }
433                            },
434                            Machine::Gt => match c {
435                                '=' => {
436                                    return Token::create(
437                                        Lexicon::EqualityOperator(EqualityOperator::Gte),
438                                        column,
439                                        line,
440                                        &cs,
441                                    )
442                                }
443                                _ => {
444                                    self.push_back(c);
445                                    return Token::create(
446                                        Lexicon::EqualityOperator(EqualityOperator::Gt),
447                                        column,
448                                        line,
449                                        &cs,
450                                    );
451                                }
452                            },
453                            Machine::Eq => match c {
454                                '=' => {
455                                    return Token::create(
456                                        Lexicon::EqualityOperator(EqualityOperator::Eq),
457                                        column,
458                                        line,
459                                        &cs,
460                                    )
461                                }
462                                _ => {
463                                    self.push_back(c);
464                                    return Token::create(Lexicon::Assignment, column, line, &cs);
465                                }
466                            },
467                            Machine::Ne => match c {
468                                '=' => {
469                                    return Token::create(
470                                        Lexicon::EqualityOperator(EqualityOperator::Ne),
471                                        column,
472                                        line,
473                                        &cs,
474                                    )
475                                }
476                                _ => {
477                                    self.push_back(c);
478                                    return Token::create(Lexicon::Not, column, line, &cs);
479                                }
480                            },
481                            Machine::And => match c {
482                                '&' => return Token::create(Lexicon::And, column, line, &cs),
483                                _ => {
484                                    self.push_back(c);
485                                    let s = rstrip(&cs);
486                                    return Token::create(Lexicon::Error(s), column, line, &cs);
487                                }
488                            },
489                            Machine::Or => match c {
490                                '|' => return Token::create(Lexicon::Or, column, line, &cs),
491                                _ => {
492                                    self.push_back(c);
493                                    let s = rstrip(&cs);
494                                    return Token::create(Lexicon::Error(s), column, line, &cs);
495                                }
496                            },
497                            Machine::String => match c {
498                                '\\' => escape = !escape,
499                                '"' => {
500                                    if !escape {
501                                        return Token::create(
502                                            Lexicon::String(cs.to_string()),
503                                            column,
504                                            line,
505                                            &cs,
506                                        );
507                                    }
508                                    escape = false;
509                                }
510                                _ => escape = false,
511                            },
512                            Machine::MacroStart => match c {
513                                // The next token of a macro should be (
514                                '(' => {
515                                    self.open_macro += 1;
516                                    return Token::create(Lexicon::MacroOpen, column, line, &cs);
517                                }
518                                _ => {
519                                    self.push_back(c);
520                                    let s = rstrip(&cs);
521                                    return Token::create_error(column, line, &s);
522                                }
523                            },
524
525                            Machine::Immediate => match c {
526                                '=' => {
527                                    return Token::create(
528                                        Lexicon::ImmediateAssignment,
529                                        column,
530                                        line,
531                                        &cs,
532                                    )
533                                }
534                                _ => {
535                                    self.push_back(c);
536                                    let s = rstrip(&cs);
537                                    return Token::create_error(column, line, &s);
538                                }
539                            },
540
541                            Machine::Append => match c {
542                                '=' => {
543                                    return Token::create(
544                                        Lexicon::AppendAssignment,
545                                        column,
546                                        line,
547                                        &cs,
548                                    )
549                                }
550                                _ => {
551                                    self.push_back(c);
552                                    let s = rstrip(&cs);
553                                    return Token::create_error(column, line, &s);
554                                }
555                            },
556
557                            Machine::Ident => match c {
558                                // The next token should be within range 'a' .. 'z',
559                                // 'A' .. 'Z', '0' .. '9', '-', '_',
560                                'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_' => m = Machine::Ident,
561                                _ => {
562                                    self.push_back(c);
563                                    let s = rstrip(&cs);
564                                    match maybe_keyword(&s) {
565                                        Some(k) => {
566                                            return Token::create(
567                                                Lexicon::Keyword(k),
568                                                column,
569                                                line,
570                                                &cs,
571                                            )
572                                        }
573                                        None => {
574                                            if s.eq("help") {
575                                                m = Machine::Help;
576                                                exp_help_indent = column + 1;
577                                                cs = String::new();
578                                            } else {
579                                                return Token::create(
580                                                    Lexicon::Identifier(s),
581                                                    column,
582                                                    line,
583                                                    &cs,
584                                                );
585                                            }
586                                        }
587                                    }
588                                }
589                            },
590                            Machine::Help => match c {
591                                // This is the weirdest token, it consists of an identifier Help, then
592                                // followed by a help text, which can not be parsed, other than the help
593                                // needs to be on the next line, and the next line needs to start further
594                                // indented from the current line.
595                                '\n' => {
596                                    m = Machine::ProperHelp;
597                                    open_comment = false;
598                                }
599                                '\u{0}'..='\u{0D}' => cs = String::new(),
600                                ' ' => cs = String::new(),
601                                '#' => open_comment = true,
602                                _ => {
603                                    if !open_comment {
604                                        return Token::create_error(self.col, line, &cs);
605                                    }
606                                }
607                            },
608                            Machine::ProperHelp => {
609                                if self.col <= exp_help_indent {
610                                    match c {
611                                        '\n' => (),
612                                        '\u{0}'..='\u{0D}' => cs = rstrip(&cs),
613                                        ' ' => cs = rstrip(&cs),
614                                        _ => {
615                                            self.push_back(c);
616                                            let s = rstrip(&cs);
617                                            let help_str = trim_help(&s);
618                                            if help_str.len() == 0 {
619                                                // If no help text was found, then it is an error
620                                                return Token::create(
621                                                    Lexicon::Error(s),
622                                                    column,
623                                                    line,
624                                                    &cs,
625                                                );
626                                            } else {
627                                                return Token::create(
628                                                    Lexicon::Help(trim_help(&s)),
629                                                    self.col,
630                                                    line,
631                                                    &cs,
632                                                );
633                                            }
634                                        }
635                                    }
636                                }
637                            }
638                        }
639                    }
640                    // If no data has been retrieved anymore, this is EOT if in the start state of the machine,
641                    // otherwise it could be an identifier
642                    None => {
643                        return match m {
644                            // Although ending when finding a Macro Identifier makes no sense to the AST, it is
645                            // validly lexed. Let the AST worry about the issue.
646                            Machine::MacroIdent => {
647                                let s = cs.to_string();
648                                return Token::create(Lexicon::Identifier(s), column, line, &cs);
649                            }
650                            Machine::Ident => match maybe_keyword(&cs) {
651                                Some(k) => {
652                                    return Token::create(Lexicon::Keyword(k), column, line, &cs)
653                                }
654                                None => {
655                                    let s = cs.to_string();
656                                    if s.eq("help") {
657                                        // In this case, a help keyword followed by EOT is an error and makes no
658                                        // sense.
659                                        return Token::create(Lexicon::Error(s), column, line, &cs);
660                                    } else {
661                                        return Token::create(
662                                            Lexicon::Identifier(s),
663                                            column,
664                                            line,
665                                            &cs,
666                                        );
667                                    }
668                                }
669                            },
670                            Machine::ProperHelp => {
671                                Token::create(Lexicon::Help(trim_help(&cs)), self.col, line, &cs)
672                            }
673
674                            // Eq can make a valid EOT if only assignment was set
675                            Machine::Eq => Token::create(Lexicon::Assignment, self.col, line, &cs),
676
677                            // Ne can make a valid EOT if only not was set
678                            Machine::Ne => Token::create(Lexicon::Not, self.col, line, &cs),
679
680                            // Any of these states can create a valid EOT
681                            Machine::Start | Machine::Comment | Machine::Macro => {
682                                Token::create_eot(self.col, line)
683                            }
684
685                            // Any other state generates an error. The next call will then create an EOT
686                            _ => Token::create(Lexicon::Error(cs.to_string()), column, line, &cs),
687                        };
688                    }
689                },
690                // If File I/O error occures, consider it also EOT
691                _ => return Token::create_eot(self.col, line),
692            }
693        }
694    }
695}
696
697#[cfg(test)]
698mod tests {
699    use super::super::LexerBase;
700    use super::*;
701
702    #[test]
703    fn test_keywords() {
704        let s = &mut "source mainmenu config menuconfig choice endchoice menu endmenu if endif bool def_bool \
705                      tristate def_tristate string hex int default depends on select imply visible range prompt comment".as_bytes();
706        let mut l = Lexer::create(s);
707        assert_eq!(Lexicon::Keyword(Keyword::Source), l.next_token().term());
708        assert_eq!(Lexicon::Keyword(Keyword::Mainmenu), l.next_token().term());
709        assert_eq!(Lexicon::Keyword(Keyword::Config), l.next_token().term());
710        assert_eq!(Lexicon::Keyword(Keyword::Menuconfig), l.next_token().term());
711        assert_eq!(Lexicon::Keyword(Keyword::Choice), l.next_token().term());
712        assert_eq!(Lexicon::Keyword(Keyword::Endchoice), l.next_token().term());
713        assert_eq!(Lexicon::Keyword(Keyword::Menu), l.next_token().term());
714        assert_eq!(Lexicon::Keyword(Keyword::Endmenu), l.next_token().term());
715        assert_eq!(Lexicon::Keyword(Keyword::If), l.next_token().term());
716        assert_eq!(Lexicon::Keyword(Keyword::Endif), l.next_token().term());
717        assert_eq!(Lexicon::Keyword(Keyword::Bool), l.next_token().term());
718        assert_eq!(Lexicon::Keyword(Keyword::DefBool), l.next_token().term());
719        assert_eq!(Lexicon::Keyword(Keyword::Tristate), l.next_token().term());
720        assert_eq!(
721            Lexicon::Keyword(Keyword::DefTristate),
722            l.next_token().term()
723        );
724        assert_eq!(Lexicon::Keyword(Keyword::String), l.next_token().term());
725        assert_eq!(Lexicon::Keyword(Keyword::Hex), l.next_token().term());
726        assert_eq!(Lexicon::Keyword(Keyword::Int), l.next_token().term());
727        assert_eq!(Lexicon::Keyword(Keyword::Default), l.next_token().term());
728        assert_eq!(Lexicon::Keyword(Keyword::Depends), l.next_token().term());
729        assert_eq!(Lexicon::Keyword(Keyword::On), l.next_token().term());
730        assert_eq!(Lexicon::Keyword(Keyword::Select), l.next_token().term());
731        assert_eq!(Lexicon::Keyword(Keyword::Imply), l.next_token().term());
732        assert_eq!(Lexicon::Keyword(Keyword::Visible), l.next_token().term());
733        assert_eq!(Lexicon::Keyword(Keyword::Range), l.next_token().term());
734        assert_eq!(Lexicon::Keyword(Keyword::Prompt), l.next_token().term());
735        assert_eq!(Lexicon::Keyword(Keyword::Comment), l.next_token().term());
736        assert_eq!(Lexicon::EOT, l.next_token().term());
737    }
738
739    #[test]
740    fn test_identifier() {
741        let s = &mut "foo BAR".as_bytes();
742        let mut l = Lexer::create(s);
743        assert_eq!(
744            Lexicon::Identifier("foo".to_string()),
745            l.next_token().term()
746        );
747        assert_eq!(
748            Lexicon::Identifier("BAR".to_string()),
749            l.next_token().term()
750        );
751        assert_eq!(Lexicon::EOT, l.next_token().term());
752    }
753
754    #[test]
755    fn test_string() {
756        let s = &mut "\"Hello \\\" World\"".as_bytes();
757        let mut l = Lexer::create(s);
758        assert_eq!(
759            Lexicon::String("\"Hello \\\" World\"".to_string()),
760            l.next_token().term()
761        );
762        assert_eq!(Lexicon::EOT, l.next_token().term());
763    }
764
765    #[test]
766    fn test_assignment() {
767        let s = &mut "=".as_bytes();
768        let mut l = Lexer::create(s);
769        assert_eq!(Lexicon::Assignment, l.next_token().term());
770        assert_eq!(Lexicon::EOT, l.next_token().term());
771    }
772
773    #[test]
774    fn test_assignment_with_space() {
775        let s = &mut "= ".as_bytes();
776        let mut l = Lexer::create(s);
777        assert_eq!(Lexicon::Assignment, l.next_token().term());
778        assert_eq!(Lexicon::EOT, l.next_token().term());
779    }
780
781    #[test]
782    fn test_immediate_assignment() {
783        let s = &mut ":=".as_bytes();
784        let mut l = Lexer::create(s);
785        assert_eq!(Lexicon::ImmediateAssignment, l.next_token().term());
786        assert_eq!(Lexicon::EOT, l.next_token().term());
787    }
788
789    #[test]
790    fn test_append_assignment() {
791        let s = &mut "+=".as_bytes();
792        let mut l = Lexer::create(s);
793        assert_eq!(Lexicon::AppendAssignment, l.next_token().term());
794        assert_eq!(Lexicon::EOT, l.next_token().term());
795    }
796
797    #[test]
798    fn test_eq_operators() {
799        let s = &mut "< > <= >= == !=".as_bytes();
800        let mut l = Lexer::create(s);
801        assert_eq!(
802            Lexicon::EqualityOperator(EqualityOperator::Lt),
803            l.next_token().term()
804        );
805        assert_eq!(
806            Lexicon::EqualityOperator(EqualityOperator::Gt),
807            l.next_token().term()
808        );
809        assert_eq!(
810            Lexicon::EqualityOperator(EqualityOperator::Lte),
811            l.next_token().term()
812        );
813        assert_eq!(
814            Lexicon::EqualityOperator(EqualityOperator::Gte),
815            l.next_token().term()
816        );
817        assert_eq!(
818            Lexicon::EqualityOperator(EqualityOperator::Eq),
819            l.next_token().term()
820        );
821        assert_eq!(
822            Lexicon::EqualityOperator(EqualityOperator::Ne),
823            l.next_token().term()
824        );
825        assert_eq!(Lexicon::EOT, l.next_token().term());
826    }
827
828    #[test]
829    fn test_binary_comparison() {
830        let s = &mut "&& ||".as_bytes();
831        let mut l = Lexer::create(s);
832        assert_eq!(Lexicon::And, l.next_token().term());
833        assert_eq!(Lexicon::Or, l.next_token().term());
834        assert_eq!(Lexicon::EOT, l.next_token().term());
835    }
836
837    #[test]
838    fn test_not() {
839        let s = &mut "!".as_bytes();
840        let mut l = Lexer::create(s);
841        assert_eq!(Lexicon::Not, l.next_token().term());
842        assert_eq!(Lexicon::EOT, l.next_token().term());
843    }
844
845    #[test]
846    fn test_not_with_space() {
847        let s = &mut "! ".as_bytes();
848        let mut l = Lexer::create(s);
849        assert_eq!(Lexicon::Not, l.next_token().term());
850        assert_eq!(Lexicon::EOT, l.next_token().term());
851    }
852
853    #[test]
854    fn test_open() {
855        let s = &mut "(".as_bytes();
856        let mut l = Lexer::create(s);
857        assert_eq!(Lexicon::Open, l.next_token().term());
858        assert_eq!(Lexicon::EOT, l.next_token().term());
859    }
860
861    #[test]
862    fn test_close() {
863        let s = &mut ")".as_bytes();
864        let mut l = Lexer::create(s);
865        assert_eq!(Lexicon::Close, l.next_token().term());
866        assert_eq!(Lexicon::EOT, l.next_token().term());
867    }
868
869    #[test]
870    fn test_macro_open() {
871        let s = &mut "$(".as_bytes();
872        let mut l = Lexer::create(s);
873        assert_eq!(Lexicon::MacroOpen, l.next_token().term());
874        assert_eq!(Lexicon::EOT, l.next_token().term());
875    }
876
877    #[test]
878    fn test_macro_ident_comma() {
879        let s = &mut "$(foo,bar)".as_bytes();
880        let mut l = Lexer::create(s);
881        assert_eq!(Lexicon::MacroOpen, l.next_token().term());
882        assert_eq!(
883            Lexicon::Identifier("foo".to_string()),
884            l.next_token().term()
885        );
886        assert_eq!(Lexicon::Comma, l.next_token().term());
887        assert_eq!(
888            Lexicon::Identifier("bar".to_string()),
889            l.next_token().term()
890        );
891        assert_eq!(Lexicon::Close, l.next_token().term());
892        assert_eq!(Lexicon::EOT, l.next_token().term());
893    }
894
895    #[test]
896    fn test_macro_ident_comma_spaces() {
897        let s = &mut "$(foo , bar)".as_bytes();
898        let mut l = Lexer::create(s);
899        assert_eq!(Lexicon::MacroOpen, l.next_token().term());
900        assert_eq!(
901            Lexicon::Identifier("foo ".to_string()),
902            l.next_token().term()
903        );
904        assert_eq!(Lexicon::Comma, l.next_token().term());
905        assert_eq!(
906            Lexicon::Identifier(" bar".to_string()),
907            l.next_token().term()
908        );
909        assert_eq!(Lexicon::Close, l.next_token().term());
910        assert_eq!(Lexicon::EOT, l.next_token().term());
911    }
912
913    #[test]
914    fn test_macro_ident_no_keyword() {
915        let s = &mut "$(config)".as_bytes();
916        let mut l = Lexer::create(s);
917        assert_eq!(Lexicon::MacroOpen, l.next_token().term());
918        assert_eq!(
919            Lexicon::Identifier("config".to_string()),
920            l.next_token().term()
921        );
922        assert_eq!(Lexicon::Close, l.next_token().term());
923        assert_eq!(Lexicon::EOT, l.next_token().term());
924    }
925
926    #[test]
927    fn test_macro_comma() {
928        let s = &mut "$(,)".as_bytes();
929        let mut l = Lexer::create(s);
930        assert_eq!(Lexicon::MacroOpen, l.next_token().term());
931        assert_eq!(Lexicon::Comma, l.next_token().term());
932        assert_eq!(Lexicon::Close, l.next_token().term());
933        assert_eq!(Lexicon::EOT, l.next_token().term());
934    }
935
936    #[test]
937    fn test_comment() {
938        let s = &mut "foo # Foo Bar\nbar".as_bytes();
939        let mut l = Lexer::create(s);
940        assert_eq!(
941            Lexicon::Identifier("foo".to_string()),
942            l.next_token().term()
943        );
944        assert_eq!(
945            Lexicon::Identifier("bar".to_string()),
946            l.next_token().term()
947        );
948        assert_eq!(Lexicon::EOT, l.next_token().term());
949    }
950
951    #[test]
952    fn test_macro_comment() {
953        let s = &mut "$(# Foo Bar\n)".as_bytes();
954        let mut l = Lexer::create(s);
955        assert_eq!(Lexicon::MacroOpen, l.next_token().term());
956        assert_eq!(Lexicon::Close, l.next_token().term());
957        assert_eq!(Lexicon::EOT, l.next_token().term());
958    }
959
960    /**
961     * This test not only tests comments within macros, but also
962     * tests if the identifier is properly surrounding the comment.
963     */
964    #[test]
965    fn test_macro_intermediate_comment() {
966        let s = &mut "$(foo # Foo Bar\nbar)".as_bytes();
967        let mut l = Lexer::create(s);
968        assert_eq!(Lexicon::MacroOpen, l.next_token().term());
969        assert_eq!(
970            Lexicon::Identifier("foo bar".to_string()),
971            l.next_token().term()
972        );
973        assert_eq!(Lexicon::Close, l.next_token().term());
974        assert_eq!(Lexicon::EOT, l.next_token().term());
975    }
976
977    /**
978     * This test if a macro comment and comma before the comment
979     * dutifully creates two identifiers
980     */
981    #[test]
982    fn test_macro_comment_with_comma_before() {
983        let s = &mut "$(foo, # Foo Bar\nbar)".as_bytes();
984        let mut l = Lexer::create(s);
985        assert_eq!(Lexicon::MacroOpen, l.next_token().term());
986        assert_eq!(
987            Lexicon::Identifier("foo".to_string()),
988            l.next_token().term()
989        );
990        assert_eq!(Lexicon::Comma, l.next_token().term());
991        assert_eq!(
992            Lexicon::Identifier(" bar".to_string()),
993            l.next_token().term()
994        );
995        assert_eq!(Lexicon::Close, l.next_token().term());
996        assert_eq!(Lexicon::EOT, l.next_token().term());
997    }
998
999    #[test]
1000    fn test_macro_comment_with_comma_after() {
1001        let s = &mut "$(foo# Foo Bar\n, bar)".as_bytes();
1002        let mut l = Lexer::create(s);
1003        assert_eq!(Lexicon::MacroOpen, l.next_token().term());
1004        assert_eq!(
1005            Lexicon::Identifier("foo".to_string()),
1006            l.next_token().term()
1007        );
1008        assert_eq!(Lexicon::Comma, l.next_token().term());
1009        assert_eq!(
1010            Lexicon::Identifier(" bar".to_string()),
1011            l.next_token().term()
1012        );
1013        assert_eq!(Lexicon::Close, l.next_token().term());
1014        assert_eq!(Lexicon::EOT, l.next_token().term());
1015    }
1016
1017    #[test]
1018    fn test_return_character_does_nothing() {
1019        let s = &mut "foo\rbar".as_bytes();
1020        let mut l = Lexer::create(s);
1021        assert_eq!(
1022            Lexicon::Identifier("foobar".to_string()),
1023            l.next_token().term()
1024        );
1025        assert_eq!(Lexicon::EOT, l.next_token().term());
1026    }
1027
1028    #[test]
1029    fn test_negative_integer() {
1030        let s = &mut "-123".as_bytes();
1031        let mut l = Lexer::create(s);
1032        assert_eq!(
1033            Lexicon::Identifier("-123".to_owned()),
1034            l.next_token().term()
1035        );
1036        assert_eq!(Lexicon::EOT, l.next_token().term());
1037    }
1038
1039    #[test]
1040    fn test_help() {
1041        let s = &mut "help\n  spam ham eggs".as_bytes();
1042        let mut l = Lexer::create(s);
1043        assert_eq!(
1044            Lexicon::Help("spam ham eggs".to_string()),
1045            l.next_token().term()
1046        );
1047        assert_eq!(Lexicon::EOT, l.next_token().term());
1048    }
1049
1050    #[test]
1051    fn test_multiline_help() {
1052        let s = &mut "help\n  spam\n  ham\n  eggs".as_bytes();
1053        let mut l = Lexer::create(s);
1054        assert_eq!(
1055            Lexicon::Help("spam\nham\neggs".to_string()),
1056            l.next_token().term()
1057        );
1058        assert_eq!(Lexicon::EOT, l.next_token().term());
1059    }
1060
1061    #[test]
1062    fn test_help_with_other_stuff() {
1063        let s = &mut "help\n  spam ham eggs\nfoo".as_bytes();
1064        let mut l = Lexer::create(s);
1065        assert_eq!(
1066            Lexicon::Help("spam ham eggs".to_string()),
1067            l.next_token().term()
1068        );
1069    }
1070
1071    #[test]
1072    fn test_multiline_help_with_other_stuff() {
1073        let s = &mut "help\n  spam\n  ham\n  eggs\nfoo".as_bytes();
1074        let mut l = Lexer::create(s);
1075        assert_eq!(
1076            Lexicon::Help("spam\nham\neggs".to_string()),
1077            l.next_token().term()
1078        );
1079    }
1080
1081    #[test]
1082    fn test_help_with_comment() {
1083        let s = &mut "help # silly comment\n  spam ham eggs".as_bytes();
1084        let mut l = Lexer::create(s);
1085        assert_eq!(
1086            Lexicon::Help("spam ham eggs".to_string()),
1087            l.next_token().term()
1088        );
1089        assert_eq!(Lexicon::EOT, l.next_token().term());
1090    }
1091
1092    #[test]
1093    fn start_macro_in_macro() {
1094        let s = &mut "$(hello$(world))".as_bytes();
1095        let mut l = Lexer::create(s);
1096        assert_eq!(Lexicon::MacroOpen, l.next_token().term());
1097        assert_eq!(
1098            Lexicon::Identifier("hello".to_string()),
1099            l.next_token().term()
1100        );
1101        assert_eq!(Lexicon::MacroOpen, l.next_token().term());
1102        assert_eq!(
1103            Lexicon::Identifier("world".to_string()),
1104            l.next_token().term()
1105        );
1106        assert_eq!(Lexicon::Close, l.next_token().term());
1107        assert_eq!(Lexicon::Close, l.next_token().term());
1108        assert_eq!(Lexicon::EOT, l.next_token().term());
1109    }
1110    #[test]
1111    fn test_escape_in_macro_identifier() {
1112        let s = &mut "$(hello\\$\\(world\\))".as_bytes();
1113        let mut l = Lexer::create(s);
1114        assert_eq!(Lexicon::MacroOpen, l.next_token().term());
1115        assert_eq!(
1116            Lexicon::Identifier("hello$(world)".to_string()),
1117            l.next_token().term()
1118        );
1119        assert_eq!(Lexicon::Close, l.next_token().term());
1120        assert_eq!(Lexicon::EOT, l.next_token().term());
1121    }
1122    #[test]
1123    fn test_escape_in_macro_start() {
1124        let s = &mut "$(\\$\\(world\\))".as_bytes();
1125        let mut l = Lexer::create(s);
1126        assert_eq!(Lexicon::MacroOpen, l.next_token().term());
1127        assert_eq!(
1128            Lexicon::Identifier("$(world)".to_string()),
1129            l.next_token().term()
1130        );
1131        assert_eq!(Lexicon::Close, l.next_token().term());
1132        assert_eq!(Lexicon::EOT, l.next_token().term());
1133    }
1134    #[test]
1135    fn test_escape_escape_in_macro() {
1136        let s = &mut "$(\\\\)".as_bytes();
1137        let mut l = Lexer::create(s);
1138        assert_eq!(Lexicon::MacroOpen, l.next_token().term());
1139        assert_eq!(Lexicon::Identifier("\\".to_string()), l.next_token().term());
1140        assert_eq!(Lexicon::Close, l.next_token().term());
1141        assert_eq!(Lexicon::EOT, l.next_token().term());
1142    }
1143    #[test]
1144    fn test_skip_control_characters() {
1145        let s = &mut "\tfoo".as_bytes();
1146        let mut l = Lexer::create(s);
1147        assert_eq!(
1148            Lexicon::Identifier("foo".to_string()),
1149            l.next_token().term()
1150        );
1151        assert_eq!(Lexicon::EOT, l.next_token().term());
1152    }
1153
1154    #[test]
1155    fn test_col() {
1156        let s = &mut "foo bar".as_bytes();
1157        let mut l = Lexer::create(s);
1158        let t1 = l.next_token();
1159        let t2 = l.next_token();
1160        let t3 = l.next_token();
1161
1162        assert_eq!(0, t1.column());
1163        assert_eq!(4, t2.column());
1164        assert_eq!(7, t3.column());
1165    }
1166
1167    #[test]
1168    fn test_line() {
1169        let s = &mut "\nfoo\nbar".as_bytes();
1170        let mut l = Lexer::create(s);
1171        let t1 = l.next_token();
1172        let t2 = l.next_token();
1173
1174        assert_eq!(2, t1.line());
1175        assert_eq!(3, t2.line());
1176    }
1177
1178    #[test]
1179    fn test_nonsense() {
1180        let s = &mut "/".as_bytes();
1181        let mut l = Lexer::create(s);
1182        let t = l.next_token();
1183        assert_eq!(Lexicon::Error("/".to_string()), t.term());
1184        assert_eq!(Lexicon::EOT, l.next_token().term());
1185    }
1186
1187    #[test]
1188    fn test_wrong_and() {
1189        let s = &mut "& ".as_bytes();
1190        let mut l = Lexer::create(s);
1191        let t = l.next_token();
1192        assert_eq!(Lexicon::Error("&".to_string()), t.term());
1193    }
1194
1195    #[test]
1196    fn test_wrong_or() {
1197        let s = &mut "| ".as_bytes();
1198        let mut l = Lexer::create(s);
1199        let t = l.next_token();
1200        assert_eq!(Lexicon::Error("|".to_string()), t.term());
1201    }
1202
1203    #[test]
1204    fn test_wrong_dollar() {
1205        let s = &mut "$ ".as_bytes();
1206        let mut l = Lexer::create(s);
1207        let t = l.next_token();
1208        assert_eq!(Lexicon::Error("$".to_string()), t.term());
1209    }
1210
1211    #[test]
1212    fn test_wrong_colon() {
1213        let s = &mut ": ".as_bytes();
1214        let mut l = Lexer::create(s);
1215        let t = l.next_token();
1216        assert_eq!(Lexicon::Error(":".to_string()), t.term());
1217    }
1218
1219    #[test]
1220    fn test_wrong_plus() {
1221        let s = &mut "+ ".as_bytes();
1222        let mut l = Lexer::create(s);
1223        let t = l.next_token();
1224        assert_eq!(Lexicon::Error("+".to_string()), t.term());
1225    }
1226
1227    #[test]
1228    fn test_wrong_help() {
1229        let s = &mut "help e".as_bytes();
1230        let mut l = Lexer::create(s);
1231        let t = l.next_token();
1232        assert_eq!(Lexicon::Error("e".to_string()), t.term());
1233    }
1234
1235    #[test]
1236    fn test_wrong_help_with_nothing() {
1237        let s = &mut "help".as_bytes();
1238        let mut l = Lexer::create(s);
1239        let t = l.next_token();
1240        assert_eq!(Lexicon::Error("help".to_string()), t.term());
1241    }
1242
1243    #[test]
1244    fn test_no_help_string() {
1245        let s = &mut "help\n  \nfoo".as_bytes();
1246        let mut l = Lexer::create(s);
1247        let t = l.next_token();
1248        assert_eq!(Lexicon::Error("\n \n".to_string()), t.term());
1249    }
1250
1251    #[test]
1252    fn test_macro_ident_at_end() {
1253        let s = &mut "$(foo".as_bytes();
1254        let mut l = Lexer::create(s);
1255        assert_eq!(Lexicon::MacroOpen, l.next_token().term());
1256        assert_eq!(
1257            Lexicon::Identifier("foo".to_string()),
1258            l.next_token().term()
1259        );
1260    }
1261
1262    #[test]
1263    fn test_end_half_way() {
1264        let s = &mut "$".as_bytes();
1265        let mut l = Lexer::create(s);
1266        assert_eq!(Lexicon::Error("$".to_string()), l.next_token().term());
1267    }
1268
1269    #[test]
1270    fn test_macro_paren_error() {
1271        let s = &mut "$((".as_bytes();
1272        let mut l = Lexer::create(s);
1273        assert_eq!(Lexicon::MacroOpen, l.next_token().term());
1274        assert_eq!(Lexicon::Error("(".to_string()), l.next_token().term());
1275    }
1276
1277    #[test]
1278    fn test_macro_ident_paren_error() {
1279        let s = &mut "$(foo(".as_bytes();
1280        let mut l = Lexer::create(s);
1281        assert_eq!(Lexicon::MacroOpen, l.next_token().term());
1282        assert_eq!(Lexicon::Error("foo(".to_string()), l.next_token().term());
1283    }
1284}