1use logos::Logos;
2
3use num::BigUint;
4
5#[derive(Debug, PartialEq, Clone)]
6pub enum LiteralKind {
7 Unsized,
8 Signed(BigUint),
9 Unsigned(BigUint),
10}
11
12fn parse_int(slice: &str, radix: u32) -> (BigUint, LiteralKind) {
13 let lower = slice.to_ascii_lowercase().replace(['_'], "");
14
15 let (cleaned, kind) = if lower.contains("u") {
16 let split = lower.split("u").collect::<Vec<_>>();
17 let kind = LiteralKind::Unsigned(BigUint::parse_bytes(split[1].as_bytes(), 10).unwrap());
18 (split[0], kind)
19 } else if lower.contains("i") {
20 let split = lower.split("i").collect::<Vec<_>>();
21 let kind = LiteralKind::Signed(BigUint::parse_bytes(split[1].as_bytes(), 10).unwrap());
22 (split[0], kind)
23 } else {
24 (lower.as_str(), LiteralKind::Unsized)
25 };
26
27 (
28 BigUint::parse_bytes(cleaned.as_bytes(), radix).unwrap(),
29 kind,
30 )
31}
32
33#[derive(Logos, Debug, PartialEq, Clone)]
34pub enum TokenKind {
35 #[regex(r#"(?x:
37 [\p{XID_Start}_]
38 \p{XID_Continue}*
39 (\u{3F} | \u{21} | (\u{3F}\u{21}) | \u{2048})? # ? ! ?! ⁈
40 )"#, |lex| lex.slice().to_string())]
41 Identifier(String),
42
43 #[regex(r"[0-9][0-9_]*([uUiI][0-9]+)?", |lex| {
44 parse_int(lex.slice(), 10)
45 })]
46 Integer((BigUint, LiteralKind)),
47 #[regex(r"0x[0-9A-Fa-f][0-9_A-Fa-f]*([uUiI][0-9]+)?", |lex| {
48 parse_int(&lex.slice()[2..], 16)
49 })]
50 HexInteger((BigUint, LiteralKind)),
51 #[regex(r"0b[0-1][0-1_]*([uUiI][0-9]+)?", |lex| {
52 parse_int(&lex.slice()[2..], 2)
53 })]
54 BinInteger((BigUint, LiteralKind)),
55
56 #[token("true")]
57 True,
58 #[token("false")]
59 False,
60
61 #[token("LOW")]
62 Low,
63 #[token("HIGH")]
64 High,
65 #[token("HIGHIMP")]
66 HighImp,
67
68 #[token("reg")]
70 Reg,
71 #[token("let")]
72 Let,
73 #[token("decl")]
74 Decl,
75 #[token("inst")]
76 Instance,
77 #[token("reset")]
78 Reset,
79 #[token("initial")]
80 Initial,
81 #[token("if")]
82 If,
83 #[token("else")]
84 Else,
85 #[token("match")]
86 Match,
87 #[token("set")]
88 Set,
89
90 #[token("pipeline")]
91 Pipeline,
92 #[token("stage")]
93 Stage,
94 #[token("entity")]
95 Entity,
96 #[token("trait")]
97 Trait,
98 #[token("impl")]
99 Impl,
100 #[token("for")]
101 For,
102 #[token("fn")]
103 Function,
104 #[token("enum")]
105 Enum,
106 #[token("struct")]
107 Struct,
108 #[token("port")]
109 Port,
110 #[token("mod")]
111 Mod,
112 #[token("use")]
113 Use,
114 #[token("as")]
115 As,
116 #[token("assert")]
117 Assert,
118 #[token("mut")]
119 Mut,
120 #[token("inv")]
121 Inv,
122 #[token("where")]
123 Where,
124
125 #[token("gen")]
126 Gen,
127
128 #[token("extern")]
129 Extern,
130
131 #[token("+")]
133 Plus,
134 #[token("-")]
135 Minus,
136 #[token("*")]
137 Asterisk,
138 #[token("/")]
139 Slash,
140 #[token("%")]
141 Percentage,
142 #[token("==")]
143 Equals,
144 #[token("!=")]
145 NotEquals,
146 #[token("<")]
147 Lt,
148 #[token(">")]
149 Gt,
150 #[token("<=")]
151 Le,
152 #[token(">=")]
153 Ge,
154 #[token(">>>")]
155 ArithmeticRightShift,
156 #[token(">>")]
157 RightShift,
158 #[token("<<")]
159 LeftShift,
160 #[token("||")]
161 LogicalOr,
162 #[token("&&")]
163 LogicalAnd,
164 #[token("^^")]
165 LogicalXor,
166 #[token("&")]
167 Ampersand,
168 #[token("|")]
169 BitwiseOr,
170 #[token("!")]
171 Not,
172 #[token("^")]
173 BitwiseXor,
174 #[token("~")]
175 Tilde,
176 #[token("`")]
177 InfixOperatorSeparator,
178 #[token("'")]
179 SingleQuote,
180
181 #[token("=")]
183 Assignment,
184
185 #[token("(")]
186 OpenParen,
187 #[token(")")]
188 CloseParen,
189
190 #[token("{")]
191 OpenBrace,
192 #[token("}")]
193 CloseBrace,
194
195 #[token("[")]
196 OpenBracket,
197 #[token("]")]
198 CloseBracket,
199
200 #[token("=>")]
201 FatArrow,
202 #[token("->")]
203 SlimArrow,
204 #[token(",")]
205 Comma,
206 #[token(".")]
207 Dot,
208 #[token(";")]
209 Semi,
210 #[token(";")]
211 GreekQuestionMark,
212 #[token(":")]
213 Colon,
214 #[token("::")]
215 PathSeparator,
216 #[token("#")]
217 Hash,
218 #[token("$")]
219 Dollar,
220
221 #[regex("///[^\n]*", |lex| lex.slice()[3..].to_string())]
222 OutsideDocumentation(String),
223 #[regex("//![^\n]*", |lex| lex.slice()[3..].to_string())]
224 InsideDocumentation(String),
225
226 #[regex("[ \t\n\r]", logos::skip)]
228 Whitespace,
229
230 #[regex("//[^\n]*", logos::skip)]
231 Comment,
232
233 #[token("/*")]
234 BlockCommentStart,
235 #[token("*/")]
236 BlockCommentEnd,
237
238 Eof,
239}
240
241impl TokenKind {
242 pub fn as_str(&self) -> &'static str {
243 match self {
244 TokenKind::Identifier(_) => "identifier",
245 TokenKind::Integer(_) => "integer",
246 TokenKind::HexInteger(_) => "hexadecimal integer",
247 TokenKind::BinInteger(_) => "binary integer",
248 TokenKind::True => "true",
249 TokenKind::False => "false",
250 TokenKind::Low => "LOW",
251 TokenKind::High => "HIGH",
252 TokenKind::HighImp => "HIGHIMP",
253
254 TokenKind::Let => "let",
255 TokenKind::Reg => "reg",
256 TokenKind::Decl => "decl",
257 TokenKind::Entity => "entity",
258 TokenKind::Pipeline => "pipeline",
259 TokenKind::Stage => "stage",
260 TokenKind::Instance => "inst",
261 TokenKind::Reset => "reset",
262 TokenKind::Initial => "initial",
263 TokenKind::If => "if",
264 TokenKind::Else => "else",
265 TokenKind::Match => "match",
266 TokenKind::Impl => "impl",
267 TokenKind::Trait => "trait",
268 TokenKind::For => "for",
269 TokenKind::Function => "fn",
270 TokenKind::Enum => "enum",
271 TokenKind::Struct => "struct",
272 TokenKind::Port => "port",
273 TokenKind::Mod => "mod",
274 TokenKind::As => "as",
275 TokenKind::Use => "use",
276 TokenKind::Assert => "assert",
277 TokenKind::Set => "set",
278 TokenKind::Mut => "mut",
279 TokenKind::Inv => "inv",
280 TokenKind::Where => "where",
281
282 TokenKind::Gen => "gen",
283
284 TokenKind::Extern => "extern",
285
286 TokenKind::Assignment => "=",
287 TokenKind::Plus => "+",
288 TokenKind::Minus => "-",
289 TokenKind::Asterisk => "*",
290 TokenKind::Slash => "/",
291 TokenKind::Percentage => "%",
292 TokenKind::Equals => "==",
293 TokenKind::NotEquals => "!=",
294 TokenKind::Lt => "<",
295 TokenKind::Gt => ">",
296 TokenKind::Le => "<=",
297 TokenKind::Ge => ">=",
298 TokenKind::LeftShift => "<<",
299 TokenKind::RightShift => ">>",
300 TokenKind::ArithmeticRightShift => ">>>",
301 TokenKind::LogicalOr => "||",
302 TokenKind::LogicalAnd => "&&",
303 TokenKind::LogicalXor => "^^",
304 TokenKind::Ampersand => "&",
305 TokenKind::BitwiseOr => "|",
306 TokenKind::Not => "!",
307 TokenKind::Tilde => "~",
308 TokenKind::BitwiseXor => "^",
309 TokenKind::InfixOperatorSeparator => "`",
310
311 TokenKind::OpenParen => "(",
312 TokenKind::CloseParen => ")",
313 TokenKind::OpenBrace => "{",
314 TokenKind::CloseBrace => "}",
315 TokenKind::OpenBracket => "[",
316 TokenKind::CloseBracket => "]",
317
318 TokenKind::FatArrow => "=>",
319 TokenKind::SlimArrow => "->",
320 TokenKind::Semi => ";",
321 TokenKind::GreekQuestionMark => "GreekQuestionMark(;)",
322 TokenKind::Colon => ":",
323 TokenKind::Comma => ",",
324 TokenKind::Dot => ".",
325 TokenKind::PathSeparator => "::",
326 TokenKind::SingleQuote => "'",
327
328 TokenKind::Hash => "#",
329 TokenKind::Dollar => "$",
330
331 TokenKind::Eof => "end of file",
332
333 TokenKind::OutsideDocumentation(_) => "///",
334 TokenKind::InsideDocumentation(_) => "//!",
335
336 TokenKind::Whitespace => "whitespace",
337 TokenKind::Comment => "comment",
338
339 TokenKind::BlockCommentStart => "/*",
340 TokenKind::BlockCommentEnd => "*/",
341 }
342 }
343
344 pub fn is_identifier(&self) -> bool {
345 matches!(self, TokenKind::Identifier(_))
346 }
347 pub fn is_integer(&self) -> bool {
348 matches!(
349 self,
350 TokenKind::Integer(_) | TokenKind::HexInteger(_) | TokenKind::BinInteger(_)
351 )
352 }
353
354 pub fn as_biguint(&self) -> Option<BigUint> {
355 match self {
356 TokenKind::Integer((i, _))
357 | TokenKind::HexInteger((i, _))
358 | TokenKind::BinInteger((i, _)) => Some(i.clone()),
359 _ => None,
360 }
361 }
362}
363
364#[cfg(test)]
365mod tests {
366 use spade_common::num_ext::InfallibleToBigUint;
367
368 use super::*;
369
370 #[test]
371 fn identifiers_work() {
372 let mut lex = TokenKind::lexer("abc123_");
373
374 assert_eq!(
375 lex.next(),
376 Some(Ok(TokenKind::Identifier("abc123_".to_string())))
377 );
378 }
379
380 #[test]
381 fn integer_literals_work() {
382 let mut lex = TokenKind::lexer("123");
383
384 assert_eq!(
385 lex.next(),
386 Some(Ok(TokenKind::Integer((
387 123_u32.to_biguint(),
388 LiteralKind::Unsized
389 ))))
390 );
391 assert_eq!(lex.next(), None);
392 }
393
394 #[test]
395 fn sized_uint_integer_literals_work() {
396 let mut lex = TokenKind::lexer("123u3");
397
398 assert_eq!(
399 lex.next(),
400 Some(Ok(TokenKind::Integer((
401 123_u32.to_biguint(),
402 LiteralKind::Unsigned(3u32.to_biguint())
403 ))))
404 );
405 assert_eq!(lex.next(), None);
406 }
407
408 #[test]
409 fn sized_int_integer_literals_work() {
410 let mut lex = TokenKind::lexer("123i3");
411
412 assert_eq!(
413 lex.next(),
414 Some(Ok(TokenKind::Integer((
415 123_u32.to_biguint(),
416 LiteralKind::Signed(3u32.to_biguint())
417 ))))
418 );
419 assert_eq!(lex.next(), None);
420 }
421
422 #[test]
423 fn hex_array() {
424 let mut lex = TokenKind::lexer("[0x45]");
425 assert_eq!(lex.next(), Some(Ok(TokenKind::OpenBracket)));
426 assert_eq!(
427 lex.next(),
428 Some(Ok(TokenKind::HexInteger((
429 0x45_u32.to_biguint(),
430 LiteralKind::Unsized
431 ))))
432 );
433 assert_eq!(lex.next(), Some(Ok(TokenKind::CloseBracket)));
434 assert_eq!(lex.next(), None);
435 }
436
437 #[test]
438 fn invalid_hex_is_not_hex() {
439 let mut lex = TokenKind::lexer("0xg");
440 assert_eq!(
441 lex.next(),
442 Some(Ok(TokenKind::Integer((
443 0_u32.to_biguint(),
444 LiteralKind::Unsized
445 ))))
446 );
447 assert_eq!(
448 lex.next(),
449 Some(Ok(TokenKind::Identifier("xg".to_string())))
450 );
451 assert_eq!(lex.next(), None);
452 }
453
454 #[test]
455 fn doc_comments_slice_correctly() {
456 let mut lex = TokenKind::lexer("//! Hello\n///G'day");
457 assert_eq!(
458 lex.next(),
459 Some(Ok(TokenKind::InsideDocumentation(" Hello".to_string())))
460 );
461 assert_eq!(
462 lex.next(),
463 Some(Ok(TokenKind::OutsideDocumentation("G'day".to_string())))
464 );
465 assert_eq!(lex.next(), None);
466 }
467}