cas_parser/tokenizer/
token.rs

1use logos::Logos;
2use std::ops::Range;
3
4/// The different kinds of tokens that can be produced by the tokenizer.
5#[derive(Logos, Clone, Copy, Debug, PartialEq, Eq)]
6pub enum TokenKind {
7    #[regex(r"[\n\r]+")]
8    NewLine,
9
10    #[regex(r"[ \t]+")]
11    Whitespace,
12
13    #[regex(r"//.*")]
14    Comment,
15
16    #[token("==")]
17    Eq,
18
19    #[token("!=")]
20    NotEq,
21
22    #[token("~==")]
23    ApproxEq,
24
25    #[token("~!=")]
26    ApproxNotEq,
27
28    #[token("+")]
29    Add,
30
31    #[token("-")]
32    Sub,
33
34    #[token("*")]
35    Mul,
36
37    #[token("/")]
38    Div,
39
40    #[token("%")]
41    Mod,
42
43    #[token("^")]
44    Exp,
45
46    #[token(">")]
47    Greater,
48
49    #[token(">=")]
50    GreaterEq,
51
52    #[token("<")]
53    Less,
54
55    #[token("<=")]
56    LessEq,
57
58    #[token("not")]
59    Not,
60
61    #[token("!")]
62    Factorial,
63
64    #[token("&&")]
65    And,
66
67    #[token("||")]
68    Or,
69
70    #[token("&")]
71    BitAnd,
72
73    #[token("|")]
74    BitOr,
75
76    #[token("~")]
77    BitNot,
78
79    #[token(">>")]
80    BitRight,
81
82    #[token("<<")]
83    BitLeft,
84
85    #[token("=")]
86    Assign,
87
88    #[token("+=")]
89    AddAssign,
90
91    #[token("-=")]
92    SubAssign,
93
94    #[token("*=")]
95    MulAssign,
96
97    #[token("/=")]
98    DivAssign,
99
100    #[token("%=")]
101    ModAssign,
102
103    #[token("^=")]
104    ExpAssign,
105
106    #[token("&&=")]
107    AndAssign,
108
109    #[token("||=")]
110    OrAssign,
111
112    #[token("&=")]
113    BitAndAssign,
114
115    #[token("|=")]
116    BitOrAssign,
117
118    #[token(">>=")]
119    BitRightAssign,
120
121    #[token("<<=")]
122    BitLeftAssign,
123
124    #[token("0b")]
125    Bin,
126
127    #[token("0o")]
128    Oct,
129
130    #[token("0x")]
131    Hex,
132
133    #[regex(r"([a-zA-Z_]+|atan2)")] // TODO: includes horrible hard-coded test for atan2
134    Name,
135
136    #[regex(r"(if|then|else|loop|while|break|continue)")]
137    Keyword,
138
139    #[token(",")]
140    Comma,
141
142    #[token("(")]
143    OpenParen,
144
145    #[token(")")]
146    CloseParen,
147
148    #[token("{")]
149    OpenCurly,
150
151    #[token("}")]
152    CloseCurly,
153
154    #[token("[")]
155    OpenSquare,
156
157    #[token("]")]
158    CloseSquare,
159
160    #[token("'")]
161    Quote,
162
163    #[token(";")]
164    Semicolon,
165
166    #[regex(r"[0-9]+\.?")]
167    Int,
168
169    #[regex(r"([0-9]+\.[0-9]*|\.[0-9]+)")]
170    Float,
171
172    #[regex(r"(true|false)")]
173    Boolean,
174
175    #[token(".")]
176    Dot,
177
178    #[regex(r".", priority = 0)]
179    Symbol,
180}
181
182impl TokenKind {
183    /// Returns true if the token represents a token that should be ignored by the parser.
184    pub fn is_ignore(self) -> bool {
185        matches!(self, TokenKind::Whitespace | TokenKind::NewLine | TokenKind::Comment)
186    }
187
188    /// Returns true if the token represents significant whitespace.
189    pub fn is_significant_whitespace(self) -> bool {
190        matches!(self, TokenKind::NewLine)
191    }
192}
193
194/// A token produced by the tokenizer.
195#[derive(Debug, Clone, PartialEq)]
196pub struct Token<'source> {
197    /// The region of the source code that this token originated from.
198    pub span: Range<usize>,
199
200    /// The kind of token.
201    pub kind: TokenKind,
202
203    /// The raw lexeme that was parsed into this token.
204    pub lexeme: &'source str,
205}
206
207impl Token<'_> {
208    /// Returns true if the token represents a token that should be ignored by the parser.
209    pub fn is_ignore(&self) -> bool {
210        self.kind.is_ignore()
211    }
212
213    /// Returns true if the token represents significant whitespace, i.e., the token is a newline.
214    pub fn is_significant_whitespace(&self) -> bool {
215        self.kind.is_significant_whitespace()
216    }
217}