pattern_lexer/token.rs
1use crate::pattern::Pattern;
2use std::fmt::Debug;
3
4/// Premade [Token] kinds for semantic (examples are not mandatory):
5///
6/// | ```TokenKind``` | Explanation | Examples |
7/// |-------------------|------------------------------------|---------------------------|
8/// | ```KEYWORD``` | Reserved words | `if` `return` `...` |
9/// | ```DELIMITER``` | Paired delimiter symbols | `()` `[]` `{}` `...` |
10/// | ```PUNCTUATION``` | Punctuation symbols | `;` `.` `...` |
11/// | ```OPERATOR``` | Symbols that operates on arguments | `+` `-` `=` `...` |
12/// | ```COMMENT``` | Line or block comments | `//` `/* ... */` `...` |
13/// | ```WHITESPACE``` | Non-printable characters | `-` |
14/// | ```LITERAL``` | Numerical, logical, textual values | `1` `true` `"true"` `...` |
15/// | ```IDENTIFIER``` | Names assigned in a program | `x` `temp` `PRINT` `...` |
16///
17/// These token kinds (except ```IDENTIFIER```) should be constructed with a name that
18/// can be used to differentiate tokens with same kind.
19#[derive(Debug, Clone, PartialEq, PartialOrd)]
20pub enum TokenKind<'a> {
21 // Ordered from high priority to low priority.
22 KEYWORD(&'a str),
23 DELIMITER(&'a str),
24 PUNCTUATION(&'a str),
25 OPERATOR(&'a str),
26 COMMENT(&'a str),
27 WHITESPACE(&'a str),
28 LITERAL(&'a str),
29 IDENTIFIER,
30}
31
32/// A lexical token.
33#[derive(Debug, Clone, PartialEq)]
34pub struct Token<'a> {
35 /// Kind of the token
36 pub kind: TokenKind<'a>,
37 /// The value that matched the token
38 pub value: &'a str,
39}
40
41impl<'a> Token<'a> {
42 /// Create a lexical token
43 ///
44 /// # Example
45 /// ```rust
46 /// # use pattern_lexer::token::{TokenKind, Token};
47 /// #
48 /// let tok = Token::new(TokenKind::OPERATOR("PLUS"), "+");
49 /// ```
50 pub fn new(kind: TokenKind<'a>, value: &'a str) -> Self {
51 Self { kind, value }
52 }
53}
54
55/// Produce [Token] that match a [Pattern]
56pub struct Tokenizer<'a> {
57 /// Kind of the token
58 kind: TokenKind<'a>,
59 /// The pattern that should match the token
60 pattern: Box<dyn Pattern<'a>>,
61}
62
63impl<'a> Tokenizer<'a> {
64 /// Create a Tokenizer
65 ///
66 /// # Example
67 /// ```rust
68 /// # use regex::Regex;
69 /// # use pattern_lexer::token::{TokenKind, Tokenizer};
70 /// // Create a token that matches variable names
71 /// let id_regex = Regex::new(r"[a-zA-Z_$][a-zA-Z_$0-9]*").unwrap();
72 /// let id = Tokenizer::new(TokenKind::IDENTIFIER, id_regex);
73 /// ```
74 pub fn new<P: Pattern<'a> + 'static>(kind: TokenKind<'a>, pat: P) -> Self {
75 Self {
76 kind,
77 pattern: Box::new(pat),
78 }
79 }
80
81 /// Return a [Token] from the given `&str` if it find a match
82 ///
83 /// # Example
84 /// ```rust
85 /// # use pattern_lexer::token::{TokenKind, Token, Tokenizer};
86 /// #
87 /// let kind = TokenKind::KEYWORD("FUNC");
88 /// let function = Tokenizer::new(kind.clone(), "fn");
89 /// assert!(function.tokenize("test").is_none());
90 /// assert_eq!(function.tokenize("fn"), Some(Token::new(kind, "fn")));
91 /// ```
92 pub fn tokenize(&self, value: &'a str) -> Option<Token<'a>> {
93 self.pattern
94 .find_one_prefix_in(value)
95 .map(|mat| Token::new(self.kind.clone(), mat.as_str()))
96 }
97}
98
99impl<'a> PartialEq for Tokenizer<'a> {
100 fn eq(&self, other: &Self) -> bool {
101 self.kind.eq(&other.kind)
102 }
103}
104
105impl<'a> PartialOrd for Tokenizer<'a> {
106 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
107 self.kind.partial_cmp(&other.kind)
108 }
109}
110
111impl<'a> Debug for Tokenizer<'a> {
112 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
113 write!(f, "{:?}", self.kind)
114 }
115}