pattern_lexer/
token.rs

1use crate::pattern::Pattern;
2use std::fmt::Debug;
3
4/// Premade [Token] kinds for semantic (examples are not mandatory):
5///
6/// | ```TokenKind```   | Explanation                        | Examples                  |
7/// |-------------------|------------------------------------|---------------------------|
8/// | ```KEYWORD```     | Reserved words                     | `if` `return` `...`       |
9/// | ```DELIMITER```   | Paired delimiter symbols           | `()` `[]` `{}` `...`      |
10/// | ```PUNCTUATION``` | Punctuation symbols                | `;` `.` `...`             |
11/// | ```OPERATOR```    | Symbols that operates on arguments | `+` `-` `=` `...`         |
12/// | ```COMMENT```     | Line or block comments             | `//` `/* ... */` `...`    |
13/// | ```WHITESPACE```  | Non-printable characters           | `-`                       |
14/// | ```LITERAL```     | Numerical, logical, textual values | `1` `true` `"true"` `...` |
15/// | ```IDENTIFIER```  | Names assigned in a program        | `x` `temp` `PRINT` `...`  |
16///
17/// These token kinds (except ```IDENTIFIER```) should be constructed with a name that
18/// can be used to differentiate tokens with same kind.
19#[derive(Debug, Clone, PartialEq, PartialOrd)]
20pub enum TokenKind<'a> {
21    // Ordered from high priority to low priority.
22    KEYWORD(&'a str),
23    DELIMITER(&'a str),
24    PUNCTUATION(&'a str),
25    OPERATOR(&'a str),
26    COMMENT(&'a str),
27    WHITESPACE(&'a str),
28    LITERAL(&'a str),
29    IDENTIFIER,
30}
31
32/// A lexical token.
33#[derive(Debug, Clone, PartialEq)]
34pub struct Token<'a> {
35    /// Kind of the token
36    pub kind: TokenKind<'a>,
37    /// The value that matched the token
38    pub value: &'a str,
39}
40
41impl<'a> Token<'a> {
42    /// Create a lexical token
43    ///
44    /// # Example
45    /// ```rust
46    /// # use pattern_lexer::token::{TokenKind, Token};
47    /// #
48    /// let tok = Token::new(TokenKind::OPERATOR("PLUS"), "+");
49    /// ```
50    pub fn new(kind: TokenKind<'a>, value: &'a str) -> Self {
51        Self { kind, value }
52    }
53}
54
55/// Produce [Token] that match a [Pattern]
56pub struct Tokenizer<'a> {
57    /// Kind of the token
58    kind: TokenKind<'a>,
59    /// The pattern that should match the token
60    pattern: Box<dyn Pattern<'a>>,
61}
62
63impl<'a> Tokenizer<'a> {
64    /// Create a Tokenizer
65    ///
66    /// # Example
67    /// ```rust
68    /// # use regex::Regex;
69    /// # use pattern_lexer::token::{TokenKind, Tokenizer};
70    /// // Create a token that matches variable names
71    /// let id_regex = Regex::new(r"[a-zA-Z_$][a-zA-Z_$0-9]*").unwrap();
72    /// let id = Tokenizer::new(TokenKind::IDENTIFIER, id_regex);
73    /// ```
74    pub fn new<P: Pattern<'a> + 'static>(kind: TokenKind<'a>, pat: P) -> Self {
75        Self {
76            kind,
77            pattern: Box::new(pat),
78        }
79    }
80
81    /// Return a [Token] from the given `&str` if it find a match
82    ///
83    /// # Example
84    /// ```rust
85    /// # use pattern_lexer::token::{TokenKind, Token, Tokenizer};
86    /// #
87    /// let kind = TokenKind::KEYWORD("FUNC");
88    /// let function = Tokenizer::new(kind.clone(), "fn");
89    /// assert!(function.tokenize("test").is_none());
90    /// assert_eq!(function.tokenize("fn"), Some(Token::new(kind, "fn")));
91    /// ```
92    pub fn tokenize(&self, value: &'a str) -> Option<Token<'a>> {
93        self.pattern
94            .find_one_prefix_in(value)
95            .map(|mat| Token::new(self.kind.clone(), mat.as_str()))
96    }
97}
98
99impl<'a> PartialEq for Tokenizer<'a> {
100    fn eq(&self, other: &Self) -> bool {
101        self.kind.eq(&other.kind)
102    }
103}
104
105impl<'a> PartialOrd for Tokenizer<'a> {
106    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
107        self.kind.partial_cmp(&other.kind)
108    }
109}
110
111impl<'a> Debug for Tokenizer<'a> {
112    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
113        write!(f, "{:?}", self.kind)
114    }
115}