1use crate::ast::Int;
2use logos::{Lexer, Logos};
3use std::collections::VecDeque;
4use std::num::ParseIntError;
5
6#[derive(Default, Debug, Clone, PartialEq)]
7pub enum LexicalError {
8 InvalidInteger(ParseIntError),
9 #[default]
10 InvalidToken,
11}
12
13impl From<ParseIntError> for LexicalError {
14 fn from(err: ParseIntError) -> Self {
15 LexicalError::InvalidInteger(err)
16 }
17}
18
19#[derive(Logos, Debug, Clone, PartialEq)]
20pub enum Token {
21 EOF,
22 Indent,
23 Dedent,
24 Info(String),
25 Annotations(String),
26 ID(Int),
27
28 #[token(" ")]
29 Space,
30
31 #[token("\t")]
32 Tab,
33
34 #[token("\n")]
35 Newline,
36
37 #[regex("0b[01]+|0o[0-7]+|0d[0-9]+|0h[0-9A-Fa-f]+", |lex| lex.slice().to_string(), priority = 2)]
38 RadixInt(String),
39
40 #[regex("-?[0-9]+", |lex| Int::from_str(lex.slice()), priority = 3)]
41 IntegerDec(Int),
42
43 #[regex("[_A-Za-z][_A-Za-z0-9]*", |lex| lex.slice().to_string(), priority = 1)]
44 Identifier(String),
45
46 #[regex(r#""([^"\\]|\\.)*""#, |lex| lex.slice().to_string())]
47 String(String),
48
49 #[token("/")]
50 Slash,
51
52 #[token("[")]
53 LeftSquare,
54
55 #[token("]")]
56 RightSquare,
57
58 #[token("<")]
59 LeftAngle,
60
61 #[token(">")]
62 RightAngle,
63
64 #[token("{")]
65 LeftBracket,
66
67 #[token("}")]
68 RightBracket,
69
70 #[token("(")]
71 LeftParenthesis,
72
73 #[token(")")]
74 RightParenthesis,
75
76 #[token("@")]
77 AtSymbol,
78
79 #[token("`")]
80 Backtick,
81
82 #[token("%[")]
83 AnnoStart,
84
85#[token("<<")]
89 DoubleLeft,
90
91 #[token(">>")]
92 DoubleRight,
93
94 #[token("Clock")]
95 Clock,
96
97 #[token("Reset")]
98 Reset,
99
100 #[token("AsyncReset")]
101 AsyncReset,
102
103 #[token("UInt")]
104 UInt,
105
106 #[token("SInt")]
107 SInt,
108
109 #[token("probe")]
110 ProbeType,
111
112 #[token("Probe")]
113 Probe,
114
115 #[token("Analog")]
116 Analog,
117
118 #[token("Fixed")]
119 Fixed,
120
121 #[token("flip")]
122 Flip,
123
124 #[regex("add|sub|mul|div|rem|lt|leq|gt|geq|eq|neq|dshl|dshr|and|or|xor|cat", |lex| lex.slice().to_string())]
125 E2Op(String),
126
127 #[regex("asUInt|asSInt|asClock|asAsyncReset|cvt|neg|not|andr|orr|xorr", |lex| lex.slice().to_string())]
128 E1Op(String),
129
130 #[regex("pad|shl|shr|head|tail", |lex| lex.slice().to_string())]
131 E1I1Op(String),
132
133 #[regex("bits[(]", |lex| lex.slice().to_string())]
134 E1I2Op(String),
135
136 #[token("mux")]
137 Mux,
138
139 #[token("validif")]
140 ValidIf,
141
142#[token("smem")]
146 SMem,
147
148 #[token("cmem")]
149 CMem,
150
151 #[token("write")]
152 Write,
153
154 #[token("read")]
155 Read,
156
157 #[token("infer")]
158 Infer,
159
160 #[token("mport")]
161 Mport,
162
163 #[token("data-type")]
164 DataType,
165
166 #[token("depth")]
167 Depth,
168
169 #[token("read-latency")]
170 ReadLatency,
171
172 #[token("write-latency")]
173 WriteLatency,
174
175 #[token("read-under-write")]
176 ReadUnderWrite,
177
178 #[token("reader")]
179 Reader,
180
181 #[token("writer")]
182 Writer,
183
184 #[token("readwriter")]
185 Readwriter,
186
187 #[token("wire")]
188 Wire,
189
190 #[token("reg")]
191 Reg,
192
193 #[token("regreset")]
194 RegReset,
195
196 #[token("inst")]
197 Inst,
198
199 #[token("of")]
200 Of,
201
202 #[token("node")]
203 Node,
204
205 #[token("invalidate")]
206 Invalidate,
207
208 #[token("attach")]
209 Attach,
210
211 #[token("when")]
212 When,
213
214 #[token("else")]
215 Else,
216
217 #[token("stop")]
218 Stop,
219
220 #[token("printf")]
221 Printf,
222
223 #[token("assert")]
224 Assert,
225
226 #[token("skip")]
227 Skip,
228
229 #[token("input")]
230 Input,
231
232 #[token("output")]
233 Output,
234
235 #[token("module")]
236 Module,
237
238 #[token("extmodule")]
239 ExtModule,
240
241 #[token("defname")]
242 DefName,
243
244 #[token("parameter")]
245 Parameter,
246
247 #[token("intmodule")]
248 IntModule,
249
250 #[token("intrinsic")]
251 Intrinsic,
252
253 #[token("FIRRTL")]
254 FIRRTL,
255
256 #[token("version")]
257 Version,
258
259 #[token("circuit")]
260 Circuit,
261
262 #[token("connect")]
263 Connect,
264
265 #[token("public")]
266 Public,
267
268 #[token("define")]
269 Define,
270
271 #[token("const")]
272 Const,
273
274 #[regex(r"[.,:=@%<>()\[\]{}]", |lex| lex.slice().to_string())]
275 Symbol(String),
276
277 #[token(".")]
278 Period,
279
280 #[error]
281 Error
282}
283
284#[derive(Default, Debug, Clone)]
285enum LexerMode {
286 #[default]
287 Indent,
288 IntId,
289 Info,
290 DotId,
291 Anno,
292 Normal,
293}
294
295#[derive(Debug)]
296pub struct TokenString {
297 pub token: Token,
298 pub line: usize,
299 pub start: usize,
300 pub name: Option<String>,
301}
302
303impl From<(Token, usize, usize)> for TokenString {
304 fn from(value: (Token, usize, usize)) -> Self {
305 Self {
306 token: value.0,
307 line: value.1,
308 start: value.2,
309 name: None
310 }
311 }
312}
313
314impl TokenString {
315 fn new(token: Token, line: usize, start: usize, name: String) -> Self {
316 Self {
317 token,
318 line,
319 start,
320 name: Some(name)
321 }
322 }
323}
324
325#[derive(Debug)]
326pub struct FIRRTLLexer<'input> {
327 lexer: Lexer<'input, Token>,
328 tokens: VecDeque<TokenString>,
329 mode: LexerMode,
330 indent_levels: Vec<u32>,
331 cur_indent: u32,
332 info_string: String,
333 anno_string: String,
334 previous_right_square: bool,
335 angle_num: u32,
336 square_num: u32,
337 bracket_num: u32,
338 parenthesis_num: u32,
339 returned_eof: bool,
340 lineno: usize,
341}
342
343impl<'input> FIRRTLLexer<'input> {
344 const TAB_WIDTH: u32 = 2;
345
346 pub fn new(input: &'input str) -> Self {
347 Self {
348 lexer: Token::lexer(input),
349 tokens: VecDeque::new(),
350 indent_levels: vec![0],
351 mode: LexerMode::Indent,
352 cur_indent: 0,
353 info_string: String::default(),
354 anno_string: String::default(),
355 previous_right_square: false,
356 angle_num: 0,
357 square_num: 0,
358 bracket_num: 0,
359 parenthesis_num: 0,
360 returned_eof: false,
361 lineno: 1,
362 }
363 }
364
365 fn indent_mode(&mut self) -> Option<TokenString> {
366 let ts = self.tokens.pop_front().unwrap();
367 match ts.token {
368 Token::Space => {
369 self.cur_indent += 1;
370 None
371 }
372 Token::Tab => {
373 self.cur_indent = (self.cur_indent + Self::TAB_WIDTH) & !(Self::TAB_WIDTH - 1);
374 None
375 }
376 Token::Newline => {
377 self.lineno += 1;
378 self.cur_indent = 0;
379 None
380 }
381 _ => {
382 let start = ts.start;
383 self.tokens.push_front(ts);
384
385 let lvl = *self.indent_levels.last().unwrap();
386 if self.cur_indent > lvl {
387 self.mode = LexerMode::Normal;
388 self.indent_levels.push(self.cur_indent);
389 return Some(TokenString::from((Token::Indent, self.lineno, start)));
390 } else if self.cur_indent < lvl {
391 self.indent_levels.pop();
392 return Some(TokenString::from((Token::Dedent, self.lineno, start)));
393 } else {
394 self.mode = LexerMode::Normal;
395 None
396 }
397 }
398 }
399 }
400
401 fn info_mode(&mut self) -> Option<TokenString> {
402 let ts = self.tokens.pop_front().unwrap();
403 match ts.token {
404 Token::LeftSquare => {
405 self.info_string = String::default();
406 None
407 }
408 Token::RightSquare => {
409 self.mode = LexerMode::Normal;
410 Some(TokenString::from((Token::Info(self.info_string.clone()), ts.line, ts.start)))
411 }
412 _ => {
413 self.info_string.push_str(&ts.name.unwrap());
414 None
415 }
416 }
417 }
418
419 fn dotid_mode(&mut self) -> Option<TokenString> {
420 let ts = self.tokens.pop_front().unwrap();
421 match ts.token {
422 Token::IntegerDec(x) => {
423 self.mode = LexerMode::Normal;
424 Some(TokenString::from((Token::ID(x), ts.line, ts.start)))
425 }
426 Token::Backtick => {
427 self.mode = LexerMode::IntId;
428 None
429 }
430 _ => {
431 self.mode = LexerMode::Normal;
432 Some(ts)
433 }
434 }
435 }
436
437 fn intid_mode(&mut self) -> Option<TokenString> {
438 let ts = self.tokens.pop_front().unwrap();
439 match ts.token {
440 Token::IntegerDec(x) => {
441 Some(TokenString::from((Token::ID(x), ts.line, ts.start)))
442 }
443 Token::Backtick => {
444 self.mode = LexerMode::Normal;
445 None
446 }
447 _ => {
448 println!("{:?}", ts);
449 Some(TokenString::from((Token::Error, ts.line, ts.start)))
450 }
451 }
452 }
453
454 fn anno_mode(&mut self) -> Option<TokenString> {
455 let ts = self.tokens.pop_front().unwrap();
456 match ts.token {
457 Token::RightSquare => {
458 if self.previous_right_square {
459 self.mode = LexerMode::Normal;
460 Some(TokenString::from((Token::Annotations(self.anno_string.clone()), ts.line, ts.start)))
461 } else {
462 self.previous_right_square = true;
463 self.anno_string.push_str(&ts.name.unwrap());
464 None
465 }
466 }
467 _ => {
468 self.previous_right_square = false;
469 self.anno_string.push_str(&ts.name.unwrap());
470 None
471 }
472 }
473 }
474
475 fn eof_mode(&mut self) -> Option<TokenString> {
476 if *self.indent_levels.last().unwrap() != 0 {
477 self.indent_levels.pop();
478 return Some(TokenString {
479 token: Token::Dedent,
480 line: self.lineno,
481 start: 0,
482 name: None,
483
484 });
485 } else {
486 return None;
487 }
488 }
489
490 fn normal_mode(&mut self) -> Option<TokenString> {
491 let ts = self.tokens.pop_front().unwrap();
492 match &ts.token {
493 Token::Newline => {
494 self.lineno += 1;
495 self.cur_indent = 0;
496 self.mode = LexerMode::Indent;
497 None
498 }
499 Token::Space => {
500 None
501 }
502 Token::IntegerDec(x) => {
503 if self.angle_num == 0 &&
504 self.square_num == 0 &&
505 self.parenthesis_num == 0 &&
506 self.bracket_num != 0 {
507 Some(TokenString::from((Token::ID(x.clone()), ts.line, ts.start)))
508 } else {
509 Some(ts)
510 }
511 }
512 Token::AtSymbol => {
513 self.mode = LexerMode::Info;
514 None
515 }
516 Token::LeftAngle => {
517 self.angle_num += 1;
518 Some(ts)
519 }
520 Token::RightAngle => {
521 self.angle_num -= 1;
522 Some(ts)
523 }
524 Token::LeftBracket => {
525 self.bracket_num += 1;
526 Some(ts)
527 }
528 Token::RightBracket => {
529 self.bracket_num -= 1;
530 Some(ts)
531 }
532 Token::LeftParenthesis => {
533 self.parenthesis_num += 1;
534 Some(ts)
535 }
536 Token::RightParenthesis => {
537 self.parenthesis_num -= 1;
538 Some(ts)
539 }
540 Token::E1Op(_) |
541 Token::E2Op(_) |
542 Token::E1I1Op(_) |
543 Token::E1I2Op(_) => {
544 self.parenthesis_num += 1;
545 Some(ts)
546 }
547 Token::Backtick => {
548 self.mode = LexerMode::IntId;
549 None
550 }
551 Token::Period => {
552 self.mode = LexerMode::DotId;
553 Some(ts)
554 }
555 Token::AnnoStart => {
556 self.mode = LexerMode::Anno;
557 None
558 }
559 _ => {
560 Some(ts)
561 }
562 }
563 }
564
565 fn try_push(&mut self) {
566 match self.lexer.next() {
567 Some(token) => {
568 self.tokens.push_back(TokenString::new(
569 token,
570 self.lineno,
571 self.lexer.span().start,
572 self.lexer.slice().to_string()));
573 }
574 _ => { }
575 }
576 }
577
578 pub fn next_token(&mut self) -> Option<TokenString> {
579 self.try_push();
580
581 while !self.tokens.is_empty() {
582 let next_token_opt = match self.mode {
583 LexerMode::Indent => { self.indent_mode() }
584 LexerMode::IntId => { self.intid_mode() }
585 LexerMode::DotId => { self.dotid_mode() }
586 LexerMode::Info => { self.info_mode() }
587 LexerMode::Anno => { self.anno_mode() }
588 LexerMode::Normal => { self.normal_mode() }
589 };
590 match next_token_opt {
591 Some(ts) => {
592 return Some(ts)
593 }
594 _ => {
595 self.try_push();
596 continue;
597 }
598 }
599 }
600
601 if !self.returned_eof {
603 match self.eof_mode() {
604 Some(ts) => {
605 return Some(ts);
606 }
607 _ => {
608 self.returned_eof = true;
609 return None;
610 }
611 }
612 } else {
613 None
614 }
615 }
616}
617
618pub type Spanned<Tok, Loc, Error> = Result<(Loc, Tok, Loc), Error>;
619
620impl <'input> Iterator for FIRRTLLexer<'input> {
621 type Item = Spanned<Token, usize, LexicalError>;
622
623 fn next(&mut self) -> Option<Self::Item> {
624 self.next_token().map(|x| Ok((x.line, x.token, x.start)))
625 }
626}