1use logos::Logos;
2
3use num::BigUint;
4
5#[derive(Debug, PartialEq, Clone)]
6pub enum LiteralKind {
7 Unsized,
8 Signed(BigUint),
9 Unsigned(BigUint),
10}
11
12fn parse_int(slice: &str, radix: u32) -> (BigUint, LiteralKind) {
13 let lower = slice.to_ascii_lowercase().replace(['_'], "");
14
15 let (cleaned, kind) = if lower.contains("u") {
16 let split = lower.split("u").collect::<Vec<_>>();
17 let kind = LiteralKind::Unsigned(BigUint::parse_bytes(split[1].as_bytes(), 10).unwrap());
18 (split[0], kind)
19 } else if lower.contains("i") {
20 let split = lower.split("i").collect::<Vec<_>>();
21 let kind = LiteralKind::Signed(BigUint::parse_bytes(split[1].as_bytes(), 10).unwrap());
22 (split[0], kind)
23 } else {
24 (lower.as_str(), LiteralKind::Unsized)
25 };
26
27 (
28 BigUint::parse_bytes(cleaned.as_bytes(), radix).unwrap(),
29 kind,
30 )
31}
32
33#[derive(Logos, Debug, PartialEq, Clone)]
34pub enum TokenKind {
35 #[regex(r#"(?x:
37 [\p{XID_Start}_]
38 \p{XID_Continue}*
39 (\u{3F} | \u{21} | (\u{3F}\u{21}) | \u{2048})? # ? ! ?! ⁈
40 )"#, |lex| lex.slice().to_string())]
41 Identifier(String),
42
43 #[regex(r"[0-9][0-9_]*([uUiI][0-9]+)?", |lex| {
44 parse_int(lex.slice(), 10)
45 })]
46 Integer((BigUint, LiteralKind)),
47 #[regex(r"0x[0-9A-Fa-f][0-9_A-Fa-f]*([uUiI][0-9]+)?", |lex| {
48 parse_int(&lex.slice()[2..], 16)
49 })]
50 HexInteger((BigUint, LiteralKind)),
51 #[regex(r"0b[0-1][0-1_]*([uUiI][0-9]+)?", |lex| {
52 parse_int(&lex.slice()[2..], 2)
53 })]
54 BinInteger((BigUint, LiteralKind)),
55
56 #[token("true")]
57 True,
58 #[token("false")]
59 False,
60
61 #[token("LOW")]
62 Low,
63 #[token("HIGH")]
64 High,
65 #[token("HIGHIMP")]
66 HighImp,
67
68 #[token("reg")]
70 Reg,
71 #[token("let")]
72 Let,
73 #[token("decl")]
74 Decl,
75 #[token("inst")]
76 Instance,
77 #[token("reset")]
78 Reset,
79 #[token("initial")]
80 Initial,
81 #[token("if")]
82 If,
83 #[token("else")]
84 Else,
85 #[token("match")]
86 Match,
87 #[token("set")]
88 Set,
89
90 #[token("pipeline")]
91 Pipeline,
92 #[token("stage")]
93 Stage,
94 #[token("entity")]
95 Entity,
96 #[token("trait")]
97 Trait,
98 #[token("impl")]
99 Impl,
100 #[token("for")]
101 For,
102 #[token("fn")]
103 Function,
104 #[token("enum")]
105 Enum,
106 #[token("struct")]
107 Struct,
108 #[token("port")]
109 Port,
110 #[token("mod")]
111 Mod,
112 #[token("use")]
113 Use,
114 #[token("as")]
115 As,
116 #[token("assert")]
117 Assert,
118 #[token("mut")]
119 Mut,
120 #[token("inv")]
121 Inv,
122 #[token("where")]
123 Where,
124
125 #[token("gen")]
126 Gen,
127
128 #[token("extern")]
129 Extern,
130 #[token("unsafe")]
131 Unsafe,
132
133 #[token("+")]
135 Plus,
136 #[token("-")]
137 Minus,
138 #[token("*")]
139 Asterisk,
140 #[token("/")]
141 Slash,
142 #[token("%")]
143 Percentage,
144 #[token("==")]
145 Equals,
146 #[token("!=")]
147 NotEquals,
148 #[token("<")]
149 Lt,
150 #[token(">")]
151 Gt,
152 #[token("<=")]
153 Le,
154 #[token(">=")]
155 Ge,
156 #[token(">>>")]
157 ArithmeticRightShift,
158 #[token(">>")]
159 RightShift,
160 #[token("<<")]
161 LeftShift,
162 #[token("||")]
163 DoublePipe,
164 #[token("&&")]
165 LogicalAnd,
166 #[token("^^")]
167 LogicalXor,
168 #[token("&")]
169 Ampersand,
170 #[token("|")]
171 Pipe,
172 #[token("!")]
173 Not,
174 #[token("^")]
175 BitwiseXor,
176 #[token("~")]
177 Tilde,
178 #[token("`")]
179 InfixOperatorSeparator,
180 #[token("'")]
181 SingleQuote,
182
183 #[token("=")]
185 Assignment,
186
187 #[token("(")]
188 OpenParen,
189 #[token(")")]
190 CloseParen,
191
192 #[token("{")]
193 OpenBrace,
194 #[token("}")]
195 CloseBrace,
196
197 #[token("[")]
198 OpenBracket,
199 #[token("]")]
200 CloseBracket,
201
202 #[token("=>")]
203 FatArrow,
204 #[token("->")]
205 SlimArrow,
206 #[token(",")]
207 Comma,
208 #[token(".")]
209 Dot,
210 #[token("..")]
211 DotDot,
212 #[token(";")]
213 Semi,
214 #[token(";")]
215 GreekQuestionMark,
216 #[token(":")]
217 Colon,
218 #[token("::")]
219 PathSeparator,
220 #[token("#")]
221 Hash,
222 #[token("$")]
223 Dollar,
224
225 #[regex(r#""[^"]*""#, |lex| lex.slice().replace("\"", ""))]
226 String(String),
227
228 #[regex("///[^\n]*", |lex| lex.slice()[3..].to_string())]
229 OutsideDocumentation(String),
230 #[regex("//![^\n]*", |lex| lex.slice()[3..].to_string())]
231 InsideDocumentation(String),
232
233 #[regex("[ \t\n\r]", logos::skip)]
235 Whitespace,
236
237 #[regex("//[^\n]*")]
238 Comment,
239
240 #[token("/*")]
241 BlockCommentStart,
242 #[token("*/")]
243 BlockCommentEnd,
244
245 Eof,
246}
247
248impl TokenKind {
249 pub fn as_str(&self) -> &'static str {
250 match self {
251 TokenKind::Identifier(_) => "identifier",
252 TokenKind::Integer(_) => "integer",
253 TokenKind::HexInteger(_) => "hexadecimal integer",
254 TokenKind::BinInteger(_) => "binary integer",
255 TokenKind::True => "true",
256 TokenKind::False => "false",
257 TokenKind::Low => "LOW",
258 TokenKind::High => "HIGH",
259 TokenKind::HighImp => "HIGHIMP",
260
261 TokenKind::Let => "let",
262 TokenKind::Reg => "reg",
263 TokenKind::Decl => "decl",
264 TokenKind::Entity => "entity",
265 TokenKind::Pipeline => "pipeline",
266 TokenKind::Stage => "stage",
267 TokenKind::Instance => "inst",
268 TokenKind::Reset => "reset",
269 TokenKind::Initial => "initial",
270 TokenKind::If => "if",
271 TokenKind::Else => "else",
272 TokenKind::Match => "match",
273 TokenKind::Impl => "impl",
274 TokenKind::Trait => "trait",
275 TokenKind::For => "for",
276 TokenKind::Function => "fn",
277 TokenKind::Enum => "enum",
278 TokenKind::Struct => "struct",
279 TokenKind::Port => "port",
280 TokenKind::Mod => "mod",
281 TokenKind::As => "as",
282 TokenKind::Use => "use",
283 TokenKind::Assert => "assert",
284 TokenKind::Set => "set",
285 TokenKind::Mut => "mut",
286 TokenKind::Inv => "inv",
287 TokenKind::Where => "where",
288
289 TokenKind::Gen => "gen",
290
291 TokenKind::Extern => "extern",
292 TokenKind::Unsafe => "unsafe",
293
294 TokenKind::Assignment => "=",
295 TokenKind::Plus => "+",
296 TokenKind::Minus => "-",
297 TokenKind::Asterisk => "*",
298 TokenKind::Slash => "/",
299 TokenKind::Percentage => "%",
300 TokenKind::Equals => "==",
301 TokenKind::NotEquals => "!=",
302 TokenKind::Lt => "<",
303 TokenKind::Gt => ">",
304 TokenKind::Le => "<=",
305 TokenKind::Ge => ">=",
306 TokenKind::LeftShift => "<<",
307 TokenKind::RightShift => ">>",
308 TokenKind::ArithmeticRightShift => ">>>",
309 TokenKind::DoublePipe => "||",
310 TokenKind::LogicalAnd => "&&",
311 TokenKind::LogicalXor => "^^",
312 TokenKind::Ampersand => "&",
313 TokenKind::Pipe => "|",
314 TokenKind::Not => "!",
315 TokenKind::Tilde => "~",
316 TokenKind::BitwiseXor => "^",
317 TokenKind::InfixOperatorSeparator => "`",
318
319 TokenKind::OpenParen => "(",
320 TokenKind::CloseParen => ")",
321 TokenKind::OpenBrace => "{",
322 TokenKind::CloseBrace => "}",
323 TokenKind::OpenBracket => "[",
324 TokenKind::CloseBracket => "]",
325
326 TokenKind::FatArrow => "=>",
327 TokenKind::SlimArrow => "->",
328 TokenKind::Semi => ";",
329 TokenKind::GreekQuestionMark => "GreekQuestionMark(;)",
330 TokenKind::Colon => ":",
331 TokenKind::Comma => ",",
332 TokenKind::Dot => ".",
333 TokenKind::DotDot => "..",
334 TokenKind::PathSeparator => "::",
335 TokenKind::SingleQuote => "'",
336
337 TokenKind::Hash => "#",
338 TokenKind::Dollar => "$",
339
340 TokenKind::Eof => "end of file",
341
342 TokenKind::String(_) => "string",
343
344 TokenKind::OutsideDocumentation(_) => "///",
345 TokenKind::InsideDocumentation(_) => "//!",
346
347 TokenKind::Whitespace => "whitespace",
348 TokenKind::Comment => "comment",
349
350 TokenKind::BlockCommentStart => "/*",
351 TokenKind::BlockCommentEnd => "*/",
352 }
353 }
354
355 pub fn is_identifier(&self) -> bool {
356 matches!(self, TokenKind::Identifier(_))
357 }
358
359 pub fn is_string(&self) -> bool {
360 matches!(self, TokenKind::String(_))
361 }
362
363 pub fn is_integer(&self) -> bool {
364 matches!(
365 self,
366 TokenKind::Integer(_) | TokenKind::HexInteger(_) | TokenKind::BinInteger(_)
367 )
368 }
369
370 pub fn as_biguint(&self) -> Option<BigUint> {
371 match self {
372 TokenKind::Integer((i, _))
373 | TokenKind::HexInteger((i, _))
374 | TokenKind::BinInteger((i, _)) => Some(i.clone()),
375 _ => None,
376 }
377 }
378}
379
380#[cfg(test)]
381mod tests {
382 use spade_common::num_ext::InfallibleToBigUint;
383
384 use super::*;
385
386 #[test]
387 fn identifiers_work() {
388 let mut lex = TokenKind::lexer("abc123_");
389
390 assert_eq!(
391 lex.next(),
392 Some(Ok(TokenKind::Identifier("abc123_".to_string())))
393 );
394 }
395
396 #[test]
397 fn integer_literals_work() {
398 let mut lex = TokenKind::lexer("123");
399
400 assert_eq!(
401 lex.next(),
402 Some(Ok(TokenKind::Integer((
403 123_u32.to_biguint(),
404 LiteralKind::Unsized
405 ))))
406 );
407 assert_eq!(lex.next(), None);
408 }
409
410 #[test]
411 fn sized_uint_integer_literals_work() {
412 let mut lex = TokenKind::lexer("123u3");
413
414 assert_eq!(
415 lex.next(),
416 Some(Ok(TokenKind::Integer((
417 123_u32.to_biguint(),
418 LiteralKind::Unsigned(3u32.to_biguint())
419 ))))
420 );
421 assert_eq!(lex.next(), None);
422 }
423
424 #[test]
425 fn sized_int_integer_literals_work() {
426 let mut lex = TokenKind::lexer("123i3");
427
428 assert_eq!(
429 lex.next(),
430 Some(Ok(TokenKind::Integer((
431 123_u32.to_biguint(),
432 LiteralKind::Signed(3u32.to_biguint())
433 ))))
434 );
435 assert_eq!(lex.next(), None);
436 }
437
438 #[test]
439 fn hex_array() {
440 let mut lex = TokenKind::lexer("[0x45]");
441 assert_eq!(lex.next(), Some(Ok(TokenKind::OpenBracket)));
442 assert_eq!(
443 lex.next(),
444 Some(Ok(TokenKind::HexInteger((
445 0x45_u32.to_biguint(),
446 LiteralKind::Unsized
447 ))))
448 );
449 assert_eq!(lex.next(), Some(Ok(TokenKind::CloseBracket)));
450 assert_eq!(lex.next(), None);
451 }
452
453 #[test]
454 fn invalid_hex_is_not_hex() {
455 let mut lex = TokenKind::lexer("0xg");
456 assert_eq!(
457 lex.next(),
458 Some(Ok(TokenKind::Integer((
459 0_u32.to_biguint(),
460 LiteralKind::Unsized
461 ))))
462 );
463 assert_eq!(
464 lex.next(),
465 Some(Ok(TokenKind::Identifier("xg".to_string())))
466 );
467 assert_eq!(lex.next(), None);
468 }
469
470 #[test]
471 fn doc_comments_slice_correctly() {
472 let mut lex = TokenKind::lexer("//! Hello\n///G'day");
473 assert_eq!(
474 lex.next(),
475 Some(Ok(TokenKind::InsideDocumentation(" Hello".to_string())))
476 );
477 assert_eq!(
478 lex.next(),
479 Some(Ok(TokenKind::OutsideDocumentation("G'day".to_string())))
480 );
481 assert_eq!(lex.next(), None);
482 }
483}