cas_parser/tokenizer/
token.rs

1use logos::Logos;
2use std::ops::Range;
3
4/// The different kinds of tokens that can be produced by the tokenizer.
5#[derive(Logos, Clone, Copy, Debug, PartialEq, Eq)]
6pub enum TokenKind {
7    #[regex(r"[\n\r]+")]
8    NewLine,
9
10    #[regex(r"[ \t]+")]
11    Whitespace,
12
13    #[regex(r"//.*")]
14    Comment,
15
16    #[token("==")]
17    Eq,
18
19    #[token("!=")]
20    NotEq,
21
22    #[token("~==")]
23    ApproxEq,
24
25    #[token("~!=")]
26    ApproxNotEq,
27
28    #[token("+")]
29    Add,
30
31    #[token("-")]
32    Sub,
33
34    #[token("*")]
35    Mul,
36
37    #[token("/")]
38    Div,
39
40    #[token("%")]
41    Mod,
42
43    #[token("^")]
44    Exp,
45
46    #[token(">")]
47    Greater,
48
49    #[token(">=")]
50    GreaterEq,
51
52    #[token("<")]
53    Less,
54
55    #[token("<=")]
56    LessEq,
57
58    #[token("not")]
59    Not,
60
61    #[token("!")]
62    Factorial,
63
64    #[token("&&")]
65    And,
66
67    #[token("||")]
68    Or,
69
70    #[token("&")]
71    BitAnd,
72
73    #[token("|")]
74    BitOr,
75
76    #[token("~")]
77    BitNot,
78
79    #[token(">>")]
80    BitRight,
81
82    #[token("<<")]
83    BitLeft,
84
85    #[token("=")]
86    Assign,
87
88    #[token("+=")]
89    AddAssign,
90
91    #[token("-=")]
92    SubAssign,
93
94    #[token("*=")]
95    MulAssign,
96
97    #[token("/=")]
98    DivAssign,
99
100    #[token("%=")]
101    ModAssign,
102
103    #[token("^=")]
104    ExpAssign,
105
106    #[token("&&=")]
107    AndAssign,
108
109    #[token("||=")]
110    OrAssign,
111
112    #[token("&=")]
113    BitAndAssign,
114
115    #[token("|=")]
116    BitOrAssign,
117
118    #[token(">>=")]
119    BitRightAssign,
120
121    #[token("<<=")]
122    BitLeftAssign,
123
124    #[token("0b")]
125    Bin,
126
127    #[token("0o")]
128    Oct,
129
130    #[token("0x")]
131    Hex,
132
133    #[regex(r"[a-zA-Z_]+|atan2")] // TODO: includes horrible hard-coded test for atan2
134    Name,
135
136    #[regex(r"let|if|then|else|for|sum|product|in|of|loop|while|break|continue|return")]
137    Keyword,
138
139    #[token(",")]
140    Comma,
141
142    #[token("(")]
143    OpenParen,
144
145    #[token(")")]
146    CloseParen,
147
148    #[token("{")]
149    OpenCurly,
150
151    #[token("}")]
152    CloseCurly,
153
154    #[token("[")]
155    OpenSquare,
156
157    #[token("]")]
158    CloseSquare,
159
160    #[token("'")]
161    Quote,
162
163    #[token(";")]
164    Semicolon,
165
166    #[regex(r"\d+")]
167    Int,
168
169    #[regex(r"(true|false)")]
170    Boolean,
171
172    #[token(".")]
173    Dot,
174
175    #[token("..")]
176    RangeHalfOpen,
177
178    #[token("..=")]
179    RangeClosed,
180
181    #[regex(r".", priority = 0)]
182    Symbol,
183}
184
185impl TokenKind {
186    /// Returns true if the token represents a token that should be ignored by the parser.
187    pub fn is_ignore(self) -> bool {
188        matches!(self, TokenKind::Whitespace | TokenKind::NewLine | TokenKind::Comment)
189    }
190
191    /// Returns true if the token represents significant whitespace.
192    pub fn is_significant_whitespace(self) -> bool {
193        matches!(self, TokenKind::NewLine)
194    }
195}
196
197/// A token produced by the tokenizer.
198#[derive(Debug, Clone, PartialEq)]
199pub struct Token<'source> {
200    /// The region of the source code that this token originated from.
201    pub span: Range<usize>,
202
203    /// The kind of token.
204    pub kind: TokenKind,
205
206    /// The raw lexeme that was parsed into this token.
207    pub lexeme: &'source str,
208}
209
210impl Token<'_> {
211    /// Returns true if the token represents a token that should be ignored by the parser.
212    pub fn is_ignore(&self) -> bool {
213        self.kind.is_ignore()
214    }
215
216    /// Returns true if the token represents significant whitespace, i.e., the token is a newline.
217    pub fn is_significant_whitespace(&self) -> bool {
218        self.kind.is_significant_whitespace()
219    }
220}