1use logos::Logos;
2use std::fmt::{Display, Formatter};
3
4#[derive(Logos, Debug, Clone, PartialEq)]
6#[logos(skip r"[ \t\n\f\r;]+")] #[logos(skip r"//[^\n]*")] #[logos(skip r"/\*[^*]*\*+(?:[^/*][^*]*\*+)*/")] pub enum Token {
10 #[token("if", |lex| lex.slice().to_owned())]
11 #[token("else", |lex| lex.slice().to_owned())]
12 #[token("while", |lex| lex.slice().to_owned())]
13 #[token("fn", |lex| lex.slice().to_owned())]
14 #[token("extern", |lex| lex.slice().to_owned())]
15 #[token("struct", |lex| lex.slice().to_owned())]
16 #[token("return", |lex| lex.slice().to_owned())]
17 #[token("let", |lex| lex.slice().to_owned())]
18 #[token("match", |lex| lex.slice().to_owned())]
19 #[token("import", |lex| lex.slice().to_owned())]
20 #[token("const", |lex| lex.slice().to_owned())]
21 #[token("continue", |lex| lex.slice().to_owned())]
22 #[token("break", |lex| lex.slice().to_owned())]
23 #[token("enum", |lex| lex.slice().to_owned())]
24 #[token("class", |lex| lex.slice().to_owned())]
25 #[token("module", |lex| lex.slice().to_owned())]
26 #[token("static", |lex| lex.slice().to_owned())]
27 Keyword(String),
28 #[token("true")]
29 True,
30 #[token("false")]
31 False,
32
33 #[regex(r#""([^"\\]|\\.)*""#, |lex| parse_string(lex.slice()))]
35 String(String),
36
37 #[regex(r#"f"([^"\\]|\\.)*""#, |lex| parse_format_string(lex.slice()))]
39 FormatString(String),
40
41 #[regex(r"-?[0-9]+(_)?i8", |lex| parse_i8(lex.slice()))]
43 Int8(i8),
44 #[regex(r"-?[0-9]+(_)?i16", |lex| parse_i16(lex.slice()))]
45 Int16(i16),
46 #[regex(r"-?[0-9]+(_)?i32", |lex| parse_i32(lex.slice()))]
47 Int32(i32),
48 #[regex(r"-?[0-9]+(_)?i64", |lex| parse_i64(lex.slice()))]
49 Int64(i64),
50 #[regex(r"-?[0-9]+", |lex| lex.slice().parse::<i64>().ok())]
51 Int(i64),
52
53 #[regex(r"-?[0-9]+\.[0-9]+(_)?f32", |lex| parse_f32(lex.slice()))]
55 Float32(f32),
56 #[regex(r"-?[0-9]+\.[0-9]+(_)?f64", |lex| parse_f64(lex.slice()))]
57 Float64(f64),
58 #[regex(r"-?[0-9]+\.[0-9]+", |lex| lex.slice().parse::<f64>().ok())]
59 Float(f64),
60 #[regex(r"[a-zA-Z_][a-zA-Z0-9_]*", |lex| lex.slice().to_owned())]
66 Identifier(String),
67
68 #[token("(")]
70 BraceLeft,
71 #[token(")")]
72 BraceRight,
73 #[token("[")]
74 BracketLeft,
75 #[token("]")]
76 BracketRight,
77 #[token("{")]
78 ParenLeft,
79 #[token("}")]
80 ParenRight,
81 #[token(".")]
82 Dot,
83 #[token(":")]
84 Colon,
85 #[token("::")]
86 ScopeSymbol,
87 #[token("=")]
88 Assign,
89 #[token(",")]
90 Comma,
91 #[token("+")]
92 Plus,
93 #[token("-")]
94 Minus,
95 #[token("*")]
96 Star,
97 #[token("/")]
98 Slash,
99 #[token("%")]
100 Mod,
101 #[token(">")]
102 Greater,
103 #[token("<")]
104 Less,
105 #[token("<=")]
106 LessEqual,
107 #[token(">=")]
108 GreaterEqual,
109 #[token("==")]
110 Equal,
111 #[token("!=")]
112 NotEqual,
113 #[token("->")]
114 Arrow,
115 #[token("!")]
116 Not,
117 #[token("&&")]
118 And,
119 #[token("||")]
120 Or,
121 #[token("|")]
122 Vertical,
123 #[token("@")]
124 Annotation,
125 #[token("&")]
126 BitAnd,
127
128 Boolean(bool),
130
131 Eof,
133}
134
135fn parse_string(s: &str) -> Option<String> {
137 let content = &s[1..s.len() - 1]; Some(unescape_string(content))
139}
140
141fn parse_format_string(s: &str) -> Option<String> {
143 let content = &s[2..s.len() - 1]; Some(unescape_string(content))
145}
146
147fn parse_i8(s: &str) -> Option<i8> {
149 let number_part = if s.ends_with("_i8") {
150 &s[..s.len() - 3] } else {
152 &s[..s.len() - 2] };
154 number_part.parse::<i8>().ok()
155}
156
157fn parse_i16(s: &str) -> Option<i16> {
159 let number_part = if s.ends_with("_i16") {
160 &s[..s.len() - 4] } else {
162 &s[..s.len() - 3] };
164 number_part.parse::<i16>().ok()
165}
166
167fn parse_i32(s: &str) -> Option<i32> {
169 let number_part = if s.ends_with("_i32") {
170 &s[..s.len() - 4] } else {
172 &s[..s.len() - 3] };
174 number_part.parse::<i32>().ok()
175}
176
177fn parse_i64(s: &str) -> Option<i64> {
179 let number_part = if s.ends_with("_i64") {
181 &s[..s.len() - 4] } else {
183 &s[..s.len() - 3] };
185
186 number_part.parse::<i64>().ok()
187}
188
189fn parse_f32(s: &str) -> Option<f32> {
191 let number_part = if s.ends_with("_f32") {
192 &s[..s.len() - 4] } else {
194 &s[..s.len() - 3] };
196 number_part.parse::<f32>().ok()
197}
198
199fn parse_f64(s: &str) -> Option<f64> {
201 let number_part = if s.ends_with("_f64") {
202 &s[..s.len() - 4] } else {
204 &s[..s.len() - 3] };
206 number_part.parse::<f64>().ok()
207}
208
209fn unescape_string(s: &str) -> String {
211 let mut result = String::new();
212 let mut chars = s.chars().peekable();
213
214 while let Some(ch) = chars.next() {
215 if ch == '\\' {
216 match chars.next() {
217 Some('n') => result.push('\n'),
218 Some('t') => result.push('\t'),
219 Some('r') => result.push('\r'),
220 Some('\\') => result.push('\\'),
221 Some('"') => result.push('"'),
222 Some('0') => result.push('\0'),
223 Some(other) => {
224 result.push('\\');
225 result.push(other);
226 }
227 None => result.push('\\'),
228 }
229 } else {
230 result.push(ch);
231 }
232 }
233
234 result
235}
236
237impl Token {
238 pub fn from_keyword_or_identifier(s: String) -> Self {
240 match s.as_str() {
241 "true" => Token::Boolean(true),
242 "false" => Token::Boolean(false),
243 _ => Token::Identifier(s),
244 }
245 }
246
247 #[allow(unused)]
249 pub fn token_id(&self) -> i8 {
250 match self {
251 Token::String(_) => 0,
252 Token::Int(_) => 1,
253 Token::Int8(_) => 37,
254 Token::Int16(_) => 38,
255 Token::Int32(_) => 39,
256 Token::Int64(_) => 40,
257 Token::Float(_) => 2,
258 Token::Float32(_) => 41,
259 Token::Float64(_) => 42,
260 Token::Identifier(_) => 3,
261 Token::Keyword(_) => 4,
262 Token::BraceLeft => 5,
263 Token::BraceRight => 6,
264 Token::BracketLeft => 7,
265 Token::BracketRight => 8,
266 Token::ParenLeft => 9,
267 Token::ParenRight => 10,
268 Token::Dot => 11,
269 Token::Comma => 12,
270 Token::Eof => 13,
271 Token::Colon => 14,
272 Token::Assign => 15,
273 Token::Plus => 16,
274 Token::Star => 17,
275 Token::Greater => 18,
276 Token::Less => 19,
277 Token::Equal => 20,
278 Token::Minus => 21,
279 Token::Slash => 22,
280 Token::Mod => 23,
281 Token::ScopeSymbol => 24,
282 Token::NotEqual => 25,
283 Token::Arrow => 26,
284 Token::Not => 27,
285 Token::And => 28,
286 Token::Vertical => 29,
287 Token::Annotation => 30,
288 Token::Or => 31,
289 Token::Boolean(_) => 32,
290 Token::FormatString(_) => 33,
291 Token::LessEqual => 34,
292 Token::GreaterEqual => 35,
293 Token::BitAnd => 36,
294 _ => -1,
295 }
296 }
297
298 pub fn is_colon(&self) -> bool {
300 matches!(self, Token::Colon)
301 }
302
303 pub fn is_parenthesis_left(&self) -> bool {
305 matches!(self, Token::ParenLeft)
306 }
307
308 pub fn is_keyword(&self, keyword: &str) -> bool {
310 match self {
311 Token::Keyword(k) => k == keyword,
312 _ => false,
313 }
314 }
315
316 pub fn is_assign(&self) -> bool {
318 matches!(self, Token::Assign)
319 }
320
321 pub fn get_identifier_value(&self) -> &str {
323 match self {
324 Token::Identifier(s) => s.as_str(),
325 _ => "",
326 }
327 }
328}
329
330impl Display for Token {
331 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
332 match self {
333 Token::String(s) => write!(f, "String({s})"),
334 Token::FormatString(s) => write!(f, "FormatString({s})"),
335 Token::Int(i) => write!(f, "Int({i})"),
336 Token::Int8(i) => write!(f, "Int8({i})"),
337 Token::Int16(i) => write!(f, "Int16({i})"),
338 Token::Int32(i) => write!(f, "Int32({i})"),
339 Token::Int64(i) => write!(f, "Int64({i})"),
340 Token::Float(fl) => write!(f, "Float({fl})"),
341 Token::Float32(fl) => write!(f, "Float32({fl})"),
342 Token::Float64(fl) => write!(f, "Float64({fl})"),
343 Token::Identifier(i) => write!(f, "Identifier({i})"),
344 Token::Boolean(b) => write!(f, "Boolean({b})"),
345 Token::Keyword(kw) => write!(f, "Keyword({kw})"),
346 Token::True => write!(f, "Boolean(true)"),
347 Token::False => write!(f, "Boolean(false)"),
348 Token::BraceLeft => write!(f, "Symbol(()"),
349 Token::BraceRight => write!(f, "Symbol())"),
350 Token::BracketLeft => write!(f, "Symbol([)"),
351 Token::BracketRight => write!(f, "Symbol(])"),
352 Token::ParenLeft => write!(f, "Symbol({{)"),
353 Token::ParenRight => write!(f, "Symbol(}})"),
354 Token::Dot => write!(f, "Symbol(.)"),
355 Token::Colon => write!(f, "Symbol(:)"),
356 Token::ScopeSymbol => write!(f, "Symbol(::)"),
357 Token::Assign => write!(f, "Symbol(=)"),
358 Token::Comma => write!(f, "Symbol(,)"),
359 Token::Plus => write!(f, "Symbol(+)"),
360 Token::Minus => write!(f, "Symbol(-)"),
361 Token::Star => write!(f, "Symbol(*)"),
362 Token::Slash => write!(f, "Symbol(/)"),
363 Token::Mod => write!(f, "Symbol(%)"),
364 Token::Greater => write!(f, "Symbol(>)"),
365 Token::Less => write!(f, "Symbol(<)"),
366 Token::LessEqual => write!(f, "Symbol(<=)"),
367 Token::GreaterEqual => write!(f, "Symbol(>=)"),
368 Token::Equal => write!(f, "Symbol(==)"),
369 Token::NotEqual => write!(f, "Symbol(!=)"),
370 Token::Arrow => write!(f, "Symbol(->)"),
371 Token::Not => write!(f, "Symbol(!)"),
372 Token::And => write!(f, "Symbol(&&)"),
373 Token::Or => write!(f, "Symbol(||)"),
374 Token::Vertical => write!(f, "Symbol(|)"),
375 Token::Annotation => write!(f, "Symbol(@)"),
376 Token::BitAnd => write!(f, "Symbol(&)"),
377 Token::Eof => write!(f, "EOF"),
378 }
379 }
380}