chirrtl_parser/
lexer.rs

1use crate::ast::Int;
2use logos::{Lexer, Logos};
3use std::collections::VecDeque;
4use std::num::ParseIntError;
5
6#[derive(Default, Debug, Clone, PartialEq)]
7pub enum LexicalError {
8    InvalidInteger(ParseIntError),
9    #[default]
10    InvalidToken,
11}
12
13impl From<ParseIntError> for LexicalError {
14    fn from(err: ParseIntError) -> Self {
15        LexicalError::InvalidInteger(err)
16    }
17}
18
19#[derive(Logos, Debug, Clone, PartialEq)]
20pub enum Token {
21    EOF,
22    Indent,
23    Dedent,
24    Info(String),
25    Annotations(String),
26    ID(Int),
27
28    #[token(" ")]
29    Space,
30
31    #[token("\t")]
32    Tab,
33
34    #[token("\n")]
35    Newline,
36
37    #[regex("0b[01]+|0o[0-7]+|0d[0-9]+|0h[0-9A-Fa-f]+", |lex| lex.slice().to_string(), priority = 2)]
38    RadixInt(String),
39
40    #[regex("-?[0-9]+", |lex| Int::from_str(lex.slice()), priority = 3)]
41    IntegerDec(Int),
42
43    #[regex("[_A-Za-z][_A-Za-z0-9]*", |lex| lex.slice().to_string(), priority = 1)]
44    Identifier(String),
45
46    #[regex(r#""([^"\\]|\\.)*""#, |lex| lex.slice().to_string())]
47    String(String),
48
49    #[token("/")]
50    Slash,
51
52    #[token("[")]
53    LeftSquare,
54
55    #[token("]")]
56    RightSquare,
57
58    #[token("<")]
59    LeftAngle,
60
61    #[token(">")]
62    RightAngle,
63
64    #[token("{")]
65    LeftBracket,
66
67    #[token("}")]
68    RightBracket,
69
70    #[token("(")]
71    LeftParenthesis,
72
73    #[token(")")]
74    RightParenthesis,
75
76    #[token("@")]
77    AtSymbol,
78
79    #[token("`")]
80    Backtick,
81
82    #[token("%[")]
83    AnnoStart,
84
85// #[token("]]")]
86// AnnoEnd,
87
88    #[token("<<")]
89    DoubleLeft,
90
91    #[token(">>")]
92    DoubleRight,
93
94    #[token("Clock")]
95    Clock,
96
97    #[token("Reset")]
98    Reset,
99
100    #[token("AsyncReset")]
101    AsyncReset,
102
103    #[token("UInt")]
104    UInt,
105
106    #[token("SInt")]
107    SInt,
108
109    #[token("probe")]
110    ProbeType,
111
112    #[token("Probe")]
113    Probe,
114
115    #[token("Analog")]
116    Analog,
117
118    #[token("Fixed")]
119    Fixed,
120
121    #[token("flip")]
122    Flip,
123
124    #[regex("add|sub|mul|div|rem|lt|leq|gt|geq|eq|neq|dshl|dshr|and|or|xor|cat", |lex| lex.slice().to_string())]
125    E2Op(String),
126
127    #[regex("asUInt|asSInt|asClock|asAsyncReset|cvt|neg|not|andr|orr|xorr", |lex| lex.slice().to_string())]
128    E1Op(String),
129
130    #[regex("pad|shl|shr|head|tail", |lex| lex.slice().to_string())]
131    E1I1Op(String),
132
133    #[regex("bits[(]", |lex| lex.slice().to_string())]
134    E1I2Op(String),
135
136    #[token("mux")]
137    Mux,
138
139    #[token("validif")]
140    ValidIf,
141
142// #[token("mem")]
143// Mem,
144
145    #[token("smem")]
146    SMem,
147
148    #[token("cmem")]
149    CMem,
150
151    #[token("write")]
152    Write,
153
154    #[token("read")]
155    Read,
156
157    #[token("infer")]
158    Infer,
159
160    #[token("mport")]
161    Mport,
162
163    #[token("data-type")]
164    DataType,
165
166    #[token("depth")]
167    Depth,
168
169    #[token("read-latency")]
170    ReadLatency,
171
172    #[token("write-latency")]
173    WriteLatency,
174
175    #[token("read-under-write")]
176    ReadUnderWrite,
177
178    #[token("reader")]
179    Reader,
180
181    #[token("writer")]
182    Writer,
183
184    #[token("readwriter")]
185    Readwriter,
186
187    #[token("wire")]
188    Wire,
189
190    #[token("reg")]
191    Reg,
192
193    #[token("regreset")]
194    RegReset,
195
196    #[token("inst")]
197    Inst,
198
199    #[token("of")]
200    Of,
201
202    #[token("node")]
203    Node,
204
205    #[token("invalidate")]
206    Invalidate,
207
208    #[token("attach")]
209    Attach,
210
211    #[token("when")]
212    When,
213
214    #[token("else")]
215    Else,
216
217    #[token("stop")]
218    Stop,
219
220    #[token("printf")]
221    Printf,
222
223    #[token("assert")]
224    Assert,
225
226    #[token("skip")]
227    Skip,
228
229    #[token("input")]
230    Input,
231
232    #[token("output")]
233    Output,
234
235    #[token("module")]
236    Module,
237
238    #[token("extmodule")]
239    ExtModule,
240
241    #[token("defname")]
242    DefName,
243
244    #[token("parameter")]
245    Parameter,
246
247    #[token("intmodule")]
248    IntModule,
249
250    #[token("intrinsic")]
251    Intrinsic,
252
253    #[token("FIRRTL")]
254    FIRRTL,
255
256    #[token("version")]
257    Version,
258
259    #[token("circuit")]
260    Circuit,
261
262    #[token("connect")]
263    Connect,
264
265    #[token("public")]
266    Public,
267
268    #[token("define")]
269    Define,
270
271    #[token("const")]
272    Const,
273
274    #[regex(r"[.,:=@%<>()\[\]{}]", |lex| lex.slice().to_string())]
275    Symbol(String),
276
277    #[token(".")]
278    Period,
279
280    #[error]
281    Error
282}
283
284#[derive(Default, Debug, Clone)]
285enum LexerMode {
286    #[default]
287    Indent,
288    IntId,
289    Info,
290    DotId,
291    Anno,
292    Normal,
293}
294
295#[derive(Debug)]
296pub struct TokenString {
297    pub token: Token,
298    pub line: usize,
299    pub start: usize,
300    pub name: Option<String>,
301}
302
303impl From<(Token, usize, usize)> for TokenString {
304    fn from(value: (Token, usize, usize)) -> Self {
305        Self {
306            token: value.0,
307            line: value.1,
308            start: value.2,
309            name: None
310        }
311    }
312}
313
314impl TokenString {
315    fn new(token: Token, line: usize, start: usize, name: String) -> Self {
316        Self {
317            token,
318            line,
319            start,
320            name: Some(name)
321        }
322    }
323}
324
325#[derive(Debug)]
326pub struct FIRRTLLexer<'input> {
327    lexer: Lexer<'input, Token>,
328    tokens: VecDeque<TokenString>,
329    mode: LexerMode,
330    indent_levels: Vec<u32>,
331    cur_indent: u32,
332    info_string: String,
333    anno_string: String,
334    previous_right_square: bool,
335    angle_num: u32,
336    square_num: u32,
337    bracket_num: u32,
338    parenthesis_num: u32,
339    returned_eof: bool,
340    lineno: usize,
341}
342
343impl<'input> FIRRTLLexer<'input> {
344    const TAB_WIDTH: u32 = 2;
345
346    pub fn new(input: &'input str) -> Self {
347        Self {
348            lexer: Token::lexer(input),
349            tokens: VecDeque::new(),
350            indent_levels: vec![0],
351            mode: LexerMode::Indent,
352            cur_indent: 0,
353            info_string: String::default(),
354            anno_string: String::default(),
355            previous_right_square: false,
356            angle_num: 0,
357            square_num: 0,
358            bracket_num: 0,
359            parenthesis_num: 0,
360            returned_eof: false,
361            lineno: 1,
362        }
363    }
364
365    fn indent_mode(&mut self) -> Option<TokenString> {
366        let ts = self.tokens.pop_front().unwrap();
367        match ts.token {
368            Token::Space => {
369                self.cur_indent += 1;
370                None
371            }
372            Token::Tab => {
373                self.cur_indent = (self.cur_indent + Self::TAB_WIDTH) & !(Self::TAB_WIDTH - 1);
374                None
375            }
376            Token::Newline => {
377                self.lineno += 1;
378                self.cur_indent = 0;
379                None
380            }
381            _ => {
382                let start = ts.start;
383                self.tokens.push_front(ts);
384
385                let lvl = *self.indent_levels.last().unwrap();
386                if self.cur_indent > lvl {
387                    self.mode = LexerMode::Normal;
388                    self.indent_levels.push(self.cur_indent);
389                    return Some(TokenString::from((Token::Indent, self.lineno, start)));
390                } else if self.cur_indent < lvl {
391                    self.indent_levels.pop();
392                    return Some(TokenString::from((Token::Dedent, self.lineno, start)));
393                } else {
394                    self.mode = LexerMode::Normal;
395                    None
396                }
397            }
398        }
399    }
400
401    fn info_mode(&mut self) -> Option<TokenString> {
402        let ts = self.tokens.pop_front().unwrap();
403        match ts.token {
404            Token::LeftSquare => {
405                self.info_string = String::default();
406                None
407            }
408            Token::RightSquare => {
409                self.mode = LexerMode::Normal;
410                Some(TokenString::from((Token::Info(self.info_string.clone()), ts.line, ts.start)))
411            }
412            _ => {
413                self.info_string.push_str(&ts.name.unwrap());
414                None
415            }
416        }
417    }
418
419    fn dotid_mode(&mut self) -> Option<TokenString> {
420        let ts = self.tokens.pop_front().unwrap();
421        match ts.token {
422            Token::IntegerDec(x) => {
423                self.mode = LexerMode::Normal;
424                Some(TokenString::from((Token::ID(x), ts.line, ts.start)))
425            }
426            Token::Backtick => {
427                self.mode = LexerMode::IntId;
428                None
429            }
430            _ => {
431                self.mode = LexerMode::Normal;
432                Some(ts)
433            }
434        }
435    }
436
437    fn intid_mode(&mut self) -> Option<TokenString> {
438        let ts = self.tokens.pop_front().unwrap();
439        match ts.token {
440            Token::IntegerDec(x) => {
441                Some(TokenString::from((Token::ID(x), ts.line, ts.start)))
442            }
443            Token::Backtick => {
444                self.mode = LexerMode::Normal;
445                None
446            }
447            _ => {
448                println!("{:?}", ts);
449                Some(TokenString::from((Token::Error, ts.line, ts.start)))
450            }
451        }
452    }
453
454    fn anno_mode(&mut self) -> Option<TokenString> {
455        let ts = self.tokens.pop_front().unwrap();
456        match ts.token {
457            Token::RightSquare => {
458                if self.previous_right_square {
459                    self.mode = LexerMode::Normal;
460                    Some(TokenString::from((Token::Annotations(self.anno_string.clone()), ts.line, ts.start)))
461                } else {
462                    self.previous_right_square = true;
463                    self.anno_string.push_str(&ts.name.unwrap());
464                    None
465                }
466            }
467            _ => {
468                self.previous_right_square = false;
469                self.anno_string.push_str(&ts.name.unwrap());
470                None
471            }
472        }
473    }
474
475    fn eof_mode(&mut self) -> Option<TokenString> {
476        if *self.indent_levels.last().unwrap() != 0 {
477            self.indent_levels.pop();
478            return Some(TokenString {
479                token: Token::Dedent,
480                line: self.lineno,
481                start: 0,
482                name: None,
483
484            });
485        } else {
486            return None;
487        }
488    }
489
490    fn normal_mode(&mut self) -> Option<TokenString> {
491        let ts = self.tokens.pop_front().unwrap();
492        match &ts.token {
493            Token::Newline => {
494                self.lineno += 1;
495                self.cur_indent = 0;
496                self.mode = LexerMode::Indent;
497                None
498            }
499            Token::Space => {
500                None
501            }
502            Token::IntegerDec(x) => {
503                if self.angle_num == 0 &&
504                    self.square_num == 0 &&
505                    self.parenthesis_num == 0 &&
506                    self.bracket_num != 0 {
507                    Some(TokenString::from((Token::ID(x.clone()), ts.line, ts.start)))
508                } else {
509                    Some(ts)
510                }
511            }
512            Token::AtSymbol => {
513                self.mode = LexerMode::Info;
514                None
515            }
516            Token::LeftAngle => {
517                self.angle_num += 1;
518                Some(ts)
519            }
520            Token::RightAngle => {
521                self.angle_num -= 1;
522                Some(ts)
523            }
524            Token::LeftBracket => {
525                self.bracket_num += 1;
526                Some(ts)
527            }
528            Token::RightBracket => {
529                self.bracket_num -= 1;
530                Some(ts)
531            }
532            Token::LeftParenthesis => {
533                self.parenthesis_num += 1;
534                Some(ts)
535            }
536            Token::RightParenthesis => {
537                self.parenthesis_num -= 1;
538                Some(ts)
539            }
540            Token::E1Op(_) |
541                Token::E2Op(_) |
542                Token::E1I1Op(_) |
543                Token::E1I2Op(_) => {
544                self.parenthesis_num += 1;
545                Some(ts)
546            }
547            Token::Backtick => {
548                self.mode = LexerMode::IntId;
549                None
550            }
551            Token::Period => {
552                self.mode = LexerMode::DotId;
553                Some(ts)
554            }
555            Token::AnnoStart => {
556                self.mode = LexerMode::Anno;
557                None
558            }
559            _ => {
560                Some(ts)
561            }
562        }
563    }
564
565    fn try_push(&mut self) {
566        match self.lexer.next() {
567            Some(token) => {
568                self.tokens.push_back(TokenString::new(
569                        token,
570                        self.lineno,
571                        self.lexer.span().start,
572                        self.lexer.slice().to_string()));
573            }
574            _ => { }
575        }
576    }
577
578    pub fn next_token(&mut self) -> Option<TokenString> {
579        self.try_push();
580
581        while !self.tokens.is_empty() {
582            let next_token_opt = match self.mode {
583                LexerMode::Indent => { self.indent_mode() }
584                LexerMode::IntId  => { self.intid_mode() }
585                LexerMode::DotId  => { self.dotid_mode() }
586                LexerMode::Info   => { self.info_mode() }
587                LexerMode::Anno   => { self.anno_mode() }
588                LexerMode::Normal => { self.normal_mode() }
589            };
590            match next_token_opt {
591                Some(ts) => {
592                    return Some(ts)
593                }
594                _ => {
595                    self.try_push();
596                    continue;
597                }
598            }
599        }
600
601         // Finished all the tokens
602        if !self.returned_eof {
603            match self.eof_mode() {
604                Some(ts) => {
605                    return Some(ts);
606                }
607                _ => {
608                    self.returned_eof = true;
609                    return None;
610                }
611            }
612        } else {
613            None
614        }
615    }
616}
617
618pub type Spanned<Tok, Loc, Error> = Result<(Loc, Tok, Loc), Error>;
619
620impl <'input> Iterator for FIRRTLLexer<'input> {
621    type Item = Spanned<Token, usize, LexicalError>;
622
623    fn next(&mut self) -> Option<Self::Item> {
624        self.next_token().map(|x| Ok((x.line, x.token, x.start)))
625    }
626}