1use logos::Logos;
2
3use num::BigUint;
4
5#[derive(Debug, PartialEq, Clone)]
6pub enum LiteralKind {
7 Unsized,
8 Signed(BigUint),
9 Unsigned(BigUint),
10}
11
12fn parse_int(slice: &str, radix: u32) -> (BigUint, LiteralKind) {
13 let lower = slice.to_ascii_lowercase().replace(['_'], "");
14
15 let (cleaned, kind) = if lower.contains("u") {
16 let split = lower.split("u").collect::<Vec<_>>();
17 let kind = LiteralKind::Unsigned(BigUint::parse_bytes(split[1].as_bytes(), 10).unwrap());
18 (split[0], kind)
19 } else if lower.contains("i") {
20 let split = lower.split("i").collect::<Vec<_>>();
21 let kind = LiteralKind::Signed(BigUint::parse_bytes(split[1].as_bytes(), 10).unwrap());
22 (split[0], kind)
23 } else {
24 (lower.as_str(), LiteralKind::Unsized)
25 };
26
27 (
28 BigUint::parse_bytes(cleaned.as_bytes(), radix).unwrap(),
29 kind,
30 )
31}
32
33#[derive(Logos, Debug, PartialEq, Clone)]
34pub enum TokenKind {
35 #[regex(r#"(?x:
37 [\p{XID_Start}_]
38 \p{XID_Continue}*
39 (\u{3F} | \u{21} | (\u{3F}\u{21}) | \u{2048})? # ? ! ?! ⁈
40 )"#, |lex| lex.slice().to_string())]
41 Identifier(String),
42
43 #[regex(r"[0-9][0-9_]*([uUiI][0-9]+)?", |lex| {
44 parse_int(lex.slice(), 10)
45 })]
46 Integer((BigUint, LiteralKind)),
47 #[regex(r"0x[0-9A-Fa-f][0-9_A-Fa-f]*([uUiI][0-9]+)?", |lex| {
48 parse_int(&lex.slice()[2..], 16)
49 })]
50 HexInteger((BigUint, LiteralKind)),
51 #[regex(r"0b[0-1][0-1_]*([uUiI][0-9]+)?", |lex| {
52 parse_int(&lex.slice()[2..], 2)
53 })]
54 BinInteger((BigUint, LiteralKind)),
55
56 #[token("true")]
57 True,
58 #[token("false")]
59 False,
60
61 #[token("LOW")]
62 Low,
63 #[token("HIGH")]
64 High,
65 #[token("HIGHIMP")]
66 HighImp,
67
68 #[token("reg")]
70 Reg,
71 #[token("let")]
72 Let,
73 #[token("decl")]
74 Decl,
75 #[token("inst")]
76 Instance,
77 #[token("reset")]
78 Reset,
79 #[token("initial")]
80 Initial,
81 #[token("if")]
82 If,
83 #[token("else")]
84 Else,
85 #[token("match")]
86 Match,
87 #[token("set")]
88 Set,
89
90 #[token("pipeline")]
91 Pipeline,
92 #[token("stage")]
93 Stage,
94 #[token("entity")]
95 Entity,
96 #[token("trait")]
97 Trait,
98 #[token("impl")]
99 Impl,
100 #[token("for")]
101 For,
102 #[token("fn")]
103 Function,
104 #[token("enum")]
105 Enum,
106 #[token("struct")]
107 Struct,
108 #[token("port")]
109 Port,
110 #[token("mod")]
111 Mod,
112 #[token("use")]
113 Use,
114 #[token("as")]
115 As,
116 #[token("assert")]
117 Assert,
118 #[token("mut")]
119 Mut,
120 #[token("inv")]
121 Inv,
122 #[token("where")]
123 Where,
124
125 #[token("gen")]
126 Gen,
127
128 #[token("extern")]
129 Extern,
130
131 #[token("+")]
133 Plus,
134 #[token("-")]
135 Minus,
136 #[token("*")]
137 Asterisk,
138 #[token("/")]
139 Slash,
140 #[token("%")]
141 Percentage,
142 #[token("==")]
143 Equals,
144 #[token("!=")]
145 NotEquals,
146 #[token("<")]
147 Lt,
148 #[token(">")]
149 Gt,
150 #[token("<=")]
151 Le,
152 #[token(">=")]
153 Ge,
154 #[token(">>>")]
155 ArithmeticRightShift,
156 #[token(">>")]
157 RightShift,
158 #[token("<<")]
159 LeftShift,
160 #[token("||")]
161 LogicalOr,
162 #[token("&&")]
163 LogicalAnd,
164 #[token("^^")]
165 LogicalXor,
166 #[token("&")]
167 Ampersand,
168 #[token("|")]
169 BitwiseOr,
170 #[token("!")]
171 Not,
172 #[token("^")]
173 BitwiseXor,
174 #[token("~")]
175 Tilde,
176 #[token("`")]
177 InfixOperatorSeparator,
178 #[token("'")]
179 SingleQuote,
180
181 #[token("=")]
183 Assignment,
184
185 #[token("(")]
186 OpenParen,
187 #[token(")")]
188 CloseParen,
189
190 #[token("{")]
191 OpenBrace,
192 #[token("}")]
193 CloseBrace,
194
195 #[token("[")]
196 OpenBracket,
197 #[token("]")]
198 CloseBracket,
199
200 #[token("=>")]
201 FatArrow,
202 #[token("->")]
203 SlimArrow,
204 #[token(",")]
205 Comma,
206 #[token(".")]
207 Dot,
208 #[token("..")]
209 DotDot,
210 #[token(";")]
211 Semi,
212 #[token(";")]
213 GreekQuestionMark,
214 #[token(":")]
215 Colon,
216 #[token("::")]
217 PathSeparator,
218 #[token("#")]
219 Hash,
220 #[token("$")]
221 Dollar,
222
223 #[regex(r#""[^"]*""#, |lex| lex.slice().replace("\"", ""))]
224 String(String),
225
226 #[regex("///[^\n]*", |lex| lex.slice()[3..].to_string())]
227 OutsideDocumentation(String),
228 #[regex("//![^\n]*", |lex| lex.slice()[3..].to_string())]
229 InsideDocumentation(String),
230
231 #[regex("[ \t\n\r]", logos::skip)]
233 Whitespace,
234
235 #[regex("//[^\n]*", logos::skip)]
236 Comment,
237
238 #[token("/*")]
239 BlockCommentStart,
240 #[token("*/")]
241 BlockCommentEnd,
242
243 Eof,
244}
245
246impl TokenKind {
247 pub fn as_str(&self) -> &'static str {
248 match self {
249 TokenKind::Identifier(_) => "identifier",
250 TokenKind::Integer(_) => "integer",
251 TokenKind::HexInteger(_) => "hexadecimal integer",
252 TokenKind::BinInteger(_) => "binary integer",
253 TokenKind::True => "true",
254 TokenKind::False => "false",
255 TokenKind::Low => "LOW",
256 TokenKind::High => "HIGH",
257 TokenKind::HighImp => "HIGHIMP",
258
259 TokenKind::Let => "let",
260 TokenKind::Reg => "reg",
261 TokenKind::Decl => "decl",
262 TokenKind::Entity => "entity",
263 TokenKind::Pipeline => "pipeline",
264 TokenKind::Stage => "stage",
265 TokenKind::Instance => "inst",
266 TokenKind::Reset => "reset",
267 TokenKind::Initial => "initial",
268 TokenKind::If => "if",
269 TokenKind::Else => "else",
270 TokenKind::Match => "match",
271 TokenKind::Impl => "impl",
272 TokenKind::Trait => "trait",
273 TokenKind::For => "for",
274 TokenKind::Function => "fn",
275 TokenKind::Enum => "enum",
276 TokenKind::Struct => "struct",
277 TokenKind::Port => "port",
278 TokenKind::Mod => "mod",
279 TokenKind::As => "as",
280 TokenKind::Use => "use",
281 TokenKind::Assert => "assert",
282 TokenKind::Set => "set",
283 TokenKind::Mut => "mut",
284 TokenKind::Inv => "inv",
285 TokenKind::Where => "where",
286
287 TokenKind::Gen => "gen",
288
289 TokenKind::Extern => "extern",
290
291 TokenKind::Assignment => "=",
292 TokenKind::Plus => "+",
293 TokenKind::Minus => "-",
294 TokenKind::Asterisk => "*",
295 TokenKind::Slash => "/",
296 TokenKind::Percentage => "%",
297 TokenKind::Equals => "==",
298 TokenKind::NotEquals => "!=",
299 TokenKind::Lt => "<",
300 TokenKind::Gt => ">",
301 TokenKind::Le => "<=",
302 TokenKind::Ge => ">=",
303 TokenKind::LeftShift => "<<",
304 TokenKind::RightShift => ">>",
305 TokenKind::ArithmeticRightShift => ">>>",
306 TokenKind::LogicalOr => "||",
307 TokenKind::LogicalAnd => "&&",
308 TokenKind::LogicalXor => "^^",
309 TokenKind::Ampersand => "&",
310 TokenKind::BitwiseOr => "|",
311 TokenKind::Not => "!",
312 TokenKind::Tilde => "~",
313 TokenKind::BitwiseXor => "^",
314 TokenKind::InfixOperatorSeparator => "`",
315
316 TokenKind::OpenParen => "(",
317 TokenKind::CloseParen => ")",
318 TokenKind::OpenBrace => "{",
319 TokenKind::CloseBrace => "}",
320 TokenKind::OpenBracket => "[",
321 TokenKind::CloseBracket => "]",
322
323 TokenKind::FatArrow => "=>",
324 TokenKind::SlimArrow => "->",
325 TokenKind::Semi => ";",
326 TokenKind::GreekQuestionMark => "GreekQuestionMark(;)",
327 TokenKind::Colon => ":",
328 TokenKind::Comma => ",",
329 TokenKind::Dot => ".",
330 TokenKind::DotDot => "..",
331 TokenKind::PathSeparator => "::",
332 TokenKind::SingleQuote => "'",
333
334 TokenKind::Hash => "#",
335 TokenKind::Dollar => "$",
336
337 TokenKind::Eof => "end of file",
338
339 TokenKind::String(_) => "string",
340
341 TokenKind::OutsideDocumentation(_) => "///",
342 TokenKind::InsideDocumentation(_) => "//!",
343
344 TokenKind::Whitespace => "whitespace",
345 TokenKind::Comment => "comment",
346
347 TokenKind::BlockCommentStart => "/*",
348 TokenKind::BlockCommentEnd => "*/",
349 }
350 }
351
352 pub fn is_identifier(&self) -> bool {
353 matches!(self, TokenKind::Identifier(_))
354 }
355 pub fn is_integer(&self) -> bool {
356 matches!(
357 self,
358 TokenKind::Integer(_) | TokenKind::HexInteger(_) | TokenKind::BinInteger(_)
359 )
360 }
361
362 pub fn as_biguint(&self) -> Option<BigUint> {
363 match self {
364 TokenKind::Integer((i, _))
365 | TokenKind::HexInteger((i, _))
366 | TokenKind::BinInteger((i, _)) => Some(i.clone()),
367 _ => None,
368 }
369 }
370}
371
372#[cfg(test)]
373mod tests {
374 use spade_common::num_ext::InfallibleToBigUint;
375
376 use super::*;
377
378 #[test]
379 fn identifiers_work() {
380 let mut lex = TokenKind::lexer("abc123_");
381
382 assert_eq!(
383 lex.next(),
384 Some(Ok(TokenKind::Identifier("abc123_".to_string())))
385 );
386 }
387
388 #[test]
389 fn integer_literals_work() {
390 let mut lex = TokenKind::lexer("123");
391
392 assert_eq!(
393 lex.next(),
394 Some(Ok(TokenKind::Integer((
395 123_u32.to_biguint(),
396 LiteralKind::Unsized
397 ))))
398 );
399 assert_eq!(lex.next(), None);
400 }
401
402 #[test]
403 fn sized_uint_integer_literals_work() {
404 let mut lex = TokenKind::lexer("123u3");
405
406 assert_eq!(
407 lex.next(),
408 Some(Ok(TokenKind::Integer((
409 123_u32.to_biguint(),
410 LiteralKind::Unsigned(3u32.to_biguint())
411 ))))
412 );
413 assert_eq!(lex.next(), None);
414 }
415
416 #[test]
417 fn sized_int_integer_literals_work() {
418 let mut lex = TokenKind::lexer("123i3");
419
420 assert_eq!(
421 lex.next(),
422 Some(Ok(TokenKind::Integer((
423 123_u32.to_biguint(),
424 LiteralKind::Signed(3u32.to_biguint())
425 ))))
426 );
427 assert_eq!(lex.next(), None);
428 }
429
430 #[test]
431 fn hex_array() {
432 let mut lex = TokenKind::lexer("[0x45]");
433 assert_eq!(lex.next(), Some(Ok(TokenKind::OpenBracket)));
434 assert_eq!(
435 lex.next(),
436 Some(Ok(TokenKind::HexInteger((
437 0x45_u32.to_biguint(),
438 LiteralKind::Unsized
439 ))))
440 );
441 assert_eq!(lex.next(), Some(Ok(TokenKind::CloseBracket)));
442 assert_eq!(lex.next(), None);
443 }
444
445 #[test]
446 fn invalid_hex_is_not_hex() {
447 let mut lex = TokenKind::lexer("0xg");
448 assert_eq!(
449 lex.next(),
450 Some(Ok(TokenKind::Integer((
451 0_u32.to_biguint(),
452 LiteralKind::Unsized
453 ))))
454 );
455 assert_eq!(
456 lex.next(),
457 Some(Ok(TokenKind::Identifier("xg".to_string())))
458 );
459 assert_eq!(lex.next(), None);
460 }
461
462 #[test]
463 fn doc_comments_slice_correctly() {
464 let mut lex = TokenKind::lexer("//! Hello\n///G'day");
465 assert_eq!(
466 lex.next(),
467 Some(Ok(TokenKind::InsideDocumentation(" Hello".to_string())))
468 );
469 assert_eq!(
470 lex.next(),
471 Some(Ok(TokenKind::OutsideDocumentation("G'day".to_string())))
472 );
473 assert_eq!(lex.next(), None);
474 }
475}