cas_parser/tokenizer/
mod.rs

1pub mod token;
2
3use logos::{Lexer, Logos};
4pub use token::{Token, TokenKind};
5
6/// Returns an iterator over the token kinds produced by the tokenizer.
7pub fn tokenize(input: &str) -> Lexer<TokenKind> {
8    TokenKind::lexer(input)
9}
10
11/// Returns an owned array containing all of the tokens produced by the tokenizer. This allows us
12/// to backtrack in case of an error.
13pub fn tokenize_complete(input: &str) -> Box<[Token]> {
14    let mut lexer = tokenize(input);
15    let mut tokens = Vec::new();
16
17    while let Some(Ok(kind)) = lexer.next() {
18        tokens.push(Token {
19            span: lexer.span(),
20            kind,
21            lexeme: lexer.slice(),
22        });
23    }
24
25    tokens.into_boxed_slice()
26}
27
28#[cfg(test)]
29mod tests {
30    use super::*;
31
32    /// Compares the tokens produced by the tokenizer to the raw expected tokens.
33    fn compare_tokens<'source, const N: usize>(input: &'source str, expected: [(TokenKind, &'source str); N]) {
34        let mut lexer = tokenize(input);
35
36        for (expected_kind, expected_lexeme) in expected.into_iter() {
37            assert_eq!(lexer.next(), Some(Ok(expected_kind)));
38            assert_eq!(lexer.slice(), expected_lexeme);
39        }
40
41        assert_eq!(lexer.next(), None);
42    }
43
44    #[test]
45    fn basic_expr() {
46        compare_tokens(
47            "1 + 2",
48            [
49                (TokenKind::Int, "1"),
50                (TokenKind::Whitespace, " "),
51                (TokenKind::Add, "+"),
52                (TokenKind::Whitespace, " "),
53                (TokenKind::Int, "2"),
54            ],
55        );
56    }
57
58    #[test]
59    fn complex_expr() {
60        compare_tokens(
61            "3      x - 0xff + 0b101 * $",
62            [
63                (TokenKind::Int, "3"),
64                (TokenKind::Whitespace, "      "),
65                (TokenKind::Name, "x"),
66                (TokenKind::Whitespace, " "),
67                (TokenKind::Sub, "-"),
68                (TokenKind::Whitespace, " "),
69                (TokenKind::Hex, "0x"),
70                (TokenKind::Name, "ff"),
71                (TokenKind::Whitespace, " "),
72                (TokenKind::Add, "+"),
73                (TokenKind::Whitespace, " "),
74                (TokenKind::Bin, "0b"),
75                (TokenKind::Int, "101"),
76                (TokenKind::Whitespace, " "),
77                (TokenKind::Mul, "*"),
78                (TokenKind::Whitespace, " "),
79                (TokenKind::Symbol, "$"),
80            ],
81        );
82    }
83}