1use logos::Logos;
2
3use num::BigUint;
4use spade_common::name::Identifier;
5
6#[derive(Debug, PartialEq, Clone)]
7pub enum LiteralKind {
8 Unsized,
9 Signed(BigUint),
10 Unsigned(BigUint),
11}
12
13fn parse_int(slice: &str, radix: u32) -> (BigUint, LiteralKind) {
14 let lower = slice.to_ascii_lowercase().replace(['_'], "");
15
16 let (cleaned, kind) = if lower.contains("u") {
17 let split = lower.split("u").collect::<Vec<_>>();
18 let kind = LiteralKind::Unsigned(BigUint::parse_bytes(split[1].as_bytes(), 10).unwrap());
19 (split[0], kind)
20 } else if lower.contains("i") {
21 let split = lower.split("i").collect::<Vec<_>>();
22 let kind = LiteralKind::Signed(BigUint::parse_bytes(split[1].as_bytes(), 10).unwrap());
23 (split[0], kind)
24 } else {
25 (lower.as_str(), LiteralKind::Unsized)
26 };
27
28 (
29 BigUint::parse_bytes(cleaned.as_bytes(), radix).unwrap(),
30 kind,
31 )
32}
33
34fn process_ident(ident: &str) -> Identifier {
35 if ident.starts_with("r#") {
36 Identifier::intern(&ident[2..])
37 } else {
38 Identifier::intern(ident)
39 }
40}
41
42#[derive(Logos, Debug, PartialEq, Clone)]
43pub enum TokenKind {
44 #[regex(r#"(r#)?(?x:
46 [\p{XID_Start}_]
47 \p{XID_Continue}*
48 (\u{3F} | \u{21} | (\u{3F}\u{21}) | \u{2048})? # ? ! ?! ⁈
49 )"#, |lex| process_ident(lex.slice()))]
50 Identifier(Identifier),
51
52 #[regex(r"[0-9][0-9_]*([uUiI][0-9]+)?", |lex| {
53 parse_int(lex.slice(), 10)
54 })]
55 Integer((BigUint, LiteralKind)),
56 #[regex(r"0x[0-9A-Fa-f][0-9_A-Fa-f]*([uUiI][0-9]+)?", |lex| {
57 parse_int(&lex.slice()[2..], 16)
58 })]
59 HexInteger((BigUint, LiteralKind)),
60 #[regex(r"0b[0-1][0-1_]*([uUiI][0-9]+)?", |lex| {
61 parse_int(&lex.slice()[2..], 2)
62 })]
63 BinInteger((BigUint, LiteralKind)),
64
65 #[token("true")]
66 True,
67 #[token("false")]
68 False,
69
70 #[token("LOW")]
71 Low,
72 #[token("HIGH")]
73 High,
74 #[token("HIGHIMP")]
75 HighImp,
76
77 #[token("reg")]
79 Reg,
80 #[token("let")]
81 Let,
82 #[token("decl")]
83 Decl,
84 #[token("inst")]
85 Instance,
86 #[token("reset")]
87 Reset,
88 #[token("initial")]
89 Initial,
90 #[token("if")]
91 If,
92 #[token("else")]
93 Else,
94 #[token("match")]
95 Match,
96 #[token("set")]
97 Set,
98
99 #[token("pipeline")]
100 Pipeline,
101 #[token("stage")]
102 Stage,
103 #[token("entity")]
104 Entity,
105 #[token("trait")]
106 Trait,
107 #[token("impl")]
108 Impl,
109 #[token("for")]
110 For,
111 #[token("fn")]
112 Function,
113 #[token("enum")]
114 Enum,
115 #[token("struct")]
116 Struct,
117 #[token("port")]
118 Port,
119 #[token("mod")]
120 Mod,
121 #[token("use")]
122 Use,
123 #[token("as")]
124 As,
125 #[token("assert")]
126 Assert,
127 #[token("mut")]
128 Mut,
129 #[token("inv")]
130 Inv,
131 #[token("pub")]
132 Pub,
133 #[token("where")]
134 Where,
135
136 #[token("gen")]
137 Gen,
138
139 #[token("extern")]
140 Extern,
141 #[token("unsafe")]
142 Unsafe,
143
144 #[token("+")]
146 Plus,
147 #[token("-")]
148 Minus,
149 #[token("*")]
150 Asterisk,
151 #[token("/")]
152 Slash,
153 #[token("%")]
154 Percentage,
155 #[token("==")]
156 Equals,
157 #[token("!=")]
158 NotEquals,
159 #[token("<")]
160 Lt,
161 #[token(">")]
162 Gt,
163 #[token("<=")]
164 Le,
165 #[token(">=")]
166 Ge,
167 #[token(">>>")]
168 ArithmeticRightShift,
169 #[token(">>")]
170 RightShift,
171 #[token("<<")]
172 LeftShift,
173 #[token("||")]
174 DoublePipe,
175 #[token("&&")]
176 LogicalAnd,
177 #[token("^^")]
178 LogicalXor,
179 #[token("&")]
180 Ampersand,
181 #[token("|")]
182 Pipe,
183 #[token("!")]
184 Not,
185 #[token("^")]
186 BitwiseXor,
187 #[token("~")]
188 Tilde,
189 #[token("`")]
190 InfixOperatorSeparator,
191
192 #[token("=")]
194 Assignment,
195
196 #[token("(")]
197 OpenParen,
198 #[token(")")]
199 CloseParen,
200
201 #[token("{")]
202 OpenBrace,
203 #[token("}")]
204 CloseBrace,
205
206 #[token("[")]
207 OpenBracket,
208 #[token("]")]
209 CloseBracket,
210
211 #[token("=>")]
212 FatArrow,
213 #[token("->")]
214 SlimArrow,
215 #[token(",")]
216 Comma,
217 #[token(".")]
218 Dot,
219 #[token("..")]
220 DotDot,
221 #[token(";")]
222 Semi,
223 #[token(";")]
224 GreekQuestionMark,
225 #[token(":")]
226 Colon,
227 #[token("::")]
228 PathSeparator,
229 #[token("#")]
230 Hash,
231 #[token("$")]
232 Dollar,
233
234 #[regex(r#"'[\p{XID_Start}_]\p{XID_Continue}*"#, |lex| Identifier::intern(&lex.slice()[1..]))]
235 Label(Identifier),
236
237 #[regex(r#"@[\p{XID_Start}_]\p{XID_Continue}*"#, |lex| Identifier::intern(&lex.slice()[1..]))]
238 LabelRef(Identifier),
239
240 #[regex(r#"b'(\\.|[^\\'])*'"#, |lex| lex.slice()[2..(lex.slice().len() - 1)].to_string())]
241 AsciiCharLiteral(String),
242 #[regex(r#"b"(\\.|[^\\"])*""#, |lex| lex.slice()[2..(lex.slice().len() - 1)].to_string())]
243 AsciiStringLiteral(String),
244 #[regex(r#"'\w'"#)]
247 Utf8CharLiteral,
248 #[regex(r#""[^"]*""#, |lex| lex.slice().replace("\"", ""))]
249 String(String),
250
251 #[regex("///[^\n]*", |lex| lex.slice()[3..].to_string(), allow_greedy = true)]
252 OutsideDocumentation(String),
253 #[regex("//![^\n]*", |lex| lex.slice()[3..].to_string(), allow_greedy = true)]
254 InsideDocumentation(String),
255
256 #[regex("[ \t\n\r]", logos::skip)]
258 Whitespace,
259
260 #[regex("//[^\n]*", allow_greedy = true)]
261 Comment,
262
263 #[token("/*")]
264 BlockCommentStart,
265 #[token("*/")]
266 BlockCommentEnd,
267
268 Eof,
269}
270
271impl TokenKind {
272 pub fn as_str(&self) -> &'static str {
273 match self {
274 TokenKind::Identifier(_) => "identifier",
275 TokenKind::Integer(_) => "integer",
276 TokenKind::HexInteger(_) => "hexadecimal integer",
277 TokenKind::BinInteger(_) => "binary integer",
278 TokenKind::True => "true",
279 TokenKind::False => "false",
280 TokenKind::Low => "LOW",
281 TokenKind::High => "HIGH",
282 TokenKind::HighImp => "HIGHIMP",
283
284 TokenKind::Let => "let",
285 TokenKind::Reg => "reg",
286 TokenKind::Decl => "decl",
287 TokenKind::Entity => "entity",
288 TokenKind::Pipeline => "pipeline",
289 TokenKind::Stage => "stage",
290 TokenKind::Instance => "inst",
291 TokenKind::Reset => "reset",
292 TokenKind::Initial => "initial",
293 TokenKind::If => "if",
294 TokenKind::Else => "else",
295 TokenKind::Match => "match",
296 TokenKind::Impl => "impl",
297 TokenKind::Trait => "trait",
298 TokenKind::For => "for",
299 TokenKind::Function => "fn",
300 TokenKind::Enum => "enum",
301 TokenKind::Struct => "struct",
302 TokenKind::Port => "port",
303 TokenKind::Mod => "mod",
304 TokenKind::As => "as",
305 TokenKind::Use => "use",
306 TokenKind::Assert => "assert",
307 TokenKind::Set => "set",
308 TokenKind::Mut => "mut",
309 TokenKind::Inv => "inv",
310 TokenKind::Pub => "pub",
311 TokenKind::Where => "where",
312
313 TokenKind::Gen => "gen",
314
315 TokenKind::Extern => "extern",
316 TokenKind::Unsafe => "unsafe",
317
318 TokenKind::Assignment => "=",
319 TokenKind::Plus => "+",
320 TokenKind::Minus => "-",
321 TokenKind::Asterisk => "*",
322 TokenKind::Slash => "/",
323 TokenKind::Percentage => "%",
324 TokenKind::Equals => "==",
325 TokenKind::NotEquals => "!=",
326 TokenKind::Lt => "<",
327 TokenKind::Gt => ">",
328 TokenKind::Le => "<=",
329 TokenKind::Ge => ">=",
330 TokenKind::LeftShift => "<<",
331 TokenKind::RightShift => ">>",
332 TokenKind::ArithmeticRightShift => ">>>",
333 TokenKind::DoublePipe => "||",
334 TokenKind::LogicalAnd => "&&",
335 TokenKind::LogicalXor => "^^",
336 TokenKind::Ampersand => "&",
337 TokenKind::Pipe => "|",
338 TokenKind::Not => "!",
339 TokenKind::Tilde => "~",
340 TokenKind::BitwiseXor => "^",
341 TokenKind::InfixOperatorSeparator => "`",
342
343 TokenKind::OpenParen => "(",
344 TokenKind::CloseParen => ")",
345 TokenKind::OpenBrace => "{",
346 TokenKind::CloseBrace => "}",
347 TokenKind::OpenBracket => "[",
348 TokenKind::CloseBracket => "]",
349
350 TokenKind::FatArrow => "=>",
351 TokenKind::SlimArrow => "->",
352 TokenKind::Semi => ";",
353 TokenKind::GreekQuestionMark => "GreekQuestionMark(;)",
354 TokenKind::Colon => ":",
355 TokenKind::Comma => ",",
356 TokenKind::Dot => ".",
357 TokenKind::DotDot => "..",
358 TokenKind::PathSeparator => "::",
359
360 TokenKind::Hash => "#",
361 TokenKind::Dollar => "$",
362
363 TokenKind::Eof => "end of file",
364
365 TokenKind::Label(_) => "label",
366 TokenKind::LabelRef(_) => "label ref",
367
368 TokenKind::AsciiCharLiteral(_) => "ASCII char literal",
369 TokenKind::AsciiStringLiteral(_) => "ASCII string literal",
370 TokenKind::Utf8CharLiteral => "Unicode char literal",
371 TokenKind::String(_) => "string",
372
373 TokenKind::OutsideDocumentation(_) => "///",
374 TokenKind::InsideDocumentation(_) => "//!",
375
376 TokenKind::Whitespace => "whitespace",
377 TokenKind::Comment => "comment",
378
379 TokenKind::BlockCommentStart => "/*",
380 TokenKind::BlockCommentEnd => "*/",
381 }
382 }
383
384 pub fn is_identifier(&self) -> bool {
385 matches!(self, TokenKind::Identifier(_))
386 }
387
388 pub fn is_string(&self) -> bool {
389 matches!(self, TokenKind::String(_))
390 }
391
392 pub fn is_integer(&self) -> bool {
393 matches!(
394 self,
395 TokenKind::Integer(_) | TokenKind::HexInteger(_) | TokenKind::BinInteger(_)
396 )
397 }
398
399 pub fn as_biguint(&self) -> Option<BigUint> {
400 match self {
401 TokenKind::Integer((i, _))
402 | TokenKind::HexInteger((i, _))
403 | TokenKind::BinInteger((i, _)) => Some(i.clone()),
404 _ => None,
405 }
406 }
407}
408
409#[cfg(test)]
410mod tests {
411 use spade_common::num_ext::InfallibleToBigUint;
412
413 use super::*;
414
415 #[test]
416 fn identifiers_work() {
417 let mut lex = TokenKind::lexer("abc123_");
418
419 assert_eq!(
420 lex.next(),
421 Some(Ok(TokenKind::Identifier(Identifier::intern("abc123_"))))
422 );
423 }
424
425 #[test]
426 fn integer_literals_work() {
427 let mut lex = TokenKind::lexer("123");
428
429 assert_eq!(
430 lex.next(),
431 Some(Ok(TokenKind::Integer((
432 123_u32.to_biguint(),
433 LiteralKind::Unsized
434 ))))
435 );
436 assert_eq!(lex.next(), None);
437 }
438
439 #[test]
440 fn sized_uint_integer_literals_work() {
441 let mut lex = TokenKind::lexer("123u3");
442
443 assert_eq!(
444 lex.next(),
445 Some(Ok(TokenKind::Integer((
446 123_u32.to_biguint(),
447 LiteralKind::Unsigned(3u32.to_biguint())
448 ))))
449 );
450 assert_eq!(lex.next(), None);
451 }
452
453 #[test]
454 fn sized_int_integer_literals_work() {
455 let mut lex = TokenKind::lexer("123i3");
456
457 assert_eq!(
458 lex.next(),
459 Some(Ok(TokenKind::Integer((
460 123_u32.to_biguint(),
461 LiteralKind::Signed(3u32.to_biguint())
462 ))))
463 );
464 assert_eq!(lex.next(), None);
465 }
466
467 #[test]
468 fn hex_array() {
469 let mut lex = TokenKind::lexer("[0x45]");
470 assert_eq!(lex.next(), Some(Ok(TokenKind::OpenBracket)));
471 assert_eq!(
472 lex.next(),
473 Some(Ok(TokenKind::HexInteger((
474 0x45_u32.to_biguint(),
475 LiteralKind::Unsized
476 ))))
477 );
478 assert_eq!(lex.next(), Some(Ok(TokenKind::CloseBracket)));
479 assert_eq!(lex.next(), None);
480 }
481
482 #[test]
483 fn invalid_hex_is_not_hex() {
484 let mut lex = TokenKind::lexer("0xg");
485 assert_eq!(
486 lex.next(),
487 Some(Ok(TokenKind::Integer((
488 0_u32.to_biguint(),
489 LiteralKind::Unsized
490 ))))
491 );
492 assert_eq!(
493 lex.next(),
494 Some(Ok(TokenKind::Identifier(Identifier::intern("xg"))))
495 );
496 assert_eq!(lex.next(), None);
497 }
498
499 #[test]
500 fn doc_comments_slice_correctly() {
501 let mut lex = TokenKind::lexer("//! Hello\n///G'day");
502 assert_eq!(
503 lex.next(),
504 Some(Ok(TokenKind::InsideDocumentation(" Hello".to_string())))
505 );
506 assert_eq!(
507 lex.next(),
508 Some(Ok(TokenKind::OutsideDocumentation("G'day".to_string())))
509 );
510 assert_eq!(lex.next(), None);
511 }
512}