1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
use crate::pattern::Pattern;
use std::fmt::Debug;

/// Premade [Token] kinds for semantic (examples are not mandatory):
/// | ```rust TokenKind```   | Explanation                        | Examples             |
/// |------------------------|------------------------------------|----------------------|
/// | ```rust KEYWORD```     | Reserved words                     | if, return, ...      |
/// | ```rust DELIMITER```   | Paired delimiter symbols           | (), [], {}, ...      |
/// | ```rust PUNCTUATION``` | Punctuation symbols                | ;, ., ...            |
/// | ```rust OPERATOR```    | Symbols that operates on arguments | +, -, =, ...         |
/// | ```rust COMMENT```     | Line or block comments             | //, /* ... */, ...   |
/// | ```rust WHITESPACE```  | Non-printable characters           | -                    |
/// | ```rust LITERAL```     | Numerical, logical, textual values | 1, true, "true", ... |
/// | ```rust IDENTIFIER```  | Names assigned in a program        | x, temp, PRINT       |
/// These token kinds (except ```rust IDENTIFIER```) should be constructed with a name that
/// can be used to differentiate tokens with same kind.
#[derive(Debug, Clone, PartialEq, PartialOrd)]
pub enum TokenKind<'a> {
    /// Ordered from high priority to low priority.
    KEYWORD(&'a str),
    DELIMITER(&'a str),
    PUNCTUATION(&'a str),
    OPERATOR(&'a str),
    COMMENT(&'a str),
    WHITESPACE(&'a str),
    LITERAL(&'a str),
    IDENTIFIER,
}

/// A lexical token.
#[derive(Debug, Clone, PartialEq)]
pub struct Token<'a> {
    /// Kind of the token
    pub kind: TokenKind<'a>,
    /// The value that matched the token
    pub value: &'a str,
}

impl<'a> Token<'a> {
    /// Create a lexical token
    ///
    /// # Example
    /// ```rust
    /// use lexer::token::{TokenKind, Token};
    ///
    /// let tok = Token::new(TokenKind::OPERATOR("PLUS"), "+");
    /// ```
    pub fn new(kind: TokenKind<'a>, value: &'a str) -> Self {
        Self { kind, value }
    }
}

/// Produce [Token] that match a [Pattern]
pub struct Tokenizer<'a> {
    /// Kind of the token
    kind: TokenKind<'a>,
    /// The pattern that should match the token
    pattern: Box<dyn Pattern<'a>>,
}

impl<'a> Tokenizer<'a> {
    /// Create a Tokenizer
    ///
    /// # Example
    /// ```rust
    /// use regex::Regex;
    /// use lexer::token::{TokenKind, Tokenizer};
    /// // Create a token that matches variable names
    /// let id_regex = Regex::new(r"[a-zA-Z_$][a-zA-Z_$0-9]*").unwrap();
    /// let id = Tokenizer::new(TokenKind::IDENTIFIER, id_regex);
    /// ```
    pub fn new<P: Pattern<'a> + 'static>(kind: TokenKind<'a>, pat: P) -> Self {
        Self {
            kind,
            pattern: Box::new(pat),
        }
    }

    /// Return a [Token] from the given string if it find a match
    ///
    /// # Example
    /// ```rust
    /// use lexer::token::{TokenKind, Token, Tokenizer};
    ///
    /// let kind = TokenKind::KEYWORD("FUNC");
    /// let function = Tokenizer::new(kind.clone(), "fn");
    /// assert!(function.tokenize("test").is_none());
    /// assert_eq!(function.tokenize("fn"), Some(Token::new(kind, "fn")));
    /// ```
    pub fn tokenize(&self, value: &'a str) -> Option<Token<'a>> {
        self.pattern
            .find_one_prefix_in(value)
            .map(|mat| Token::new(self.kind.clone(), mat.as_str()))
    }
}

impl<'a> PartialEq for Tokenizer<'a> {
    fn eq(&self, other: &Self) -> bool {
        self.kind.eq(&other.kind)
    }
}

impl<'a> PartialOrd for Tokenizer<'a> {
    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
        self.kind.partial_cmp(&other.kind)
    }
}

impl<'a> Debug for Tokenizer<'a> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{:?}", self.kind)
    }
}