moore_svlog_syntax/
lexer.rs

1// Copyright (c) 2016-2021 Fabian Schuiki
2
3//! A lexical analyzer for SystemVerilog files, based on IEEE 1800-2009, section
4//! 5.
5
6use crate::cat::CatTokenKind;
7use crate::preproc::*;
8pub use crate::token::*;
9use moore_common::errors::*;
10use moore_common::name::*;
11use moore_common::source::*;
12
13type CatTokenAndSpan = (CatTokenKind, Span);
14pub type TokenAndSpan = (Token, Span);
15
16/// A lexical analyzer for SystemVerilog files.
17pub struct Lexer<'a> {
18    input: Preprocessor<'a>,
19    peek: [CatTokenAndSpan; 4],
20}
21
22impl<'a> Lexer<'a> {
23    pub fn new(input: Preprocessor<'a>) -> Lexer {
24        Lexer {
25            input: input,
26            peek: [(CatTokenKind::Eof, INVALID_SPAN); 4],
27        }
28    }
29
30    pub fn bump(&mut self) -> DiagResult2<()> {
31        self.peek[0] = self.peek[1];
32        self.peek[1] = self.peek[2];
33        self.peek[2] = self.peek[3];
34        self.peek[3] = match self.input.next() {
35            Some(Err(e)) => return Err(e),
36            Some(Ok(x)) => x,
37            None => (CatTokenKind::Eof, self.peek[2].1),
38        };
39
40        Ok(())
41    }
42
43    pub fn next_token(&mut self) -> DiagResult2<TokenAndSpan> {
44        // Upon the first invocation the peek buffer is still empty. In that
45        // case we need to load the first batch of tokens.
46        if self.peek[0].0 == CatTokenKind::Eof {
47            self.bump()?;
48            self.bump()?;
49            self.bump()?;
50            self.bump()?;
51        }
52
53        let name_table = get_name_table();
54
55        loop {
56            self.skip_noise()?;
57
58            // Match 4-character symbols
59            if let (
60                CatTokenKind::Symbol(c0),
61                CatTokenKind::Symbol(c1),
62                CatTokenKind::Symbol(c2),
63                CatTokenKind::Symbol(c3),
64            ) = (
65                self.peek[0].0,
66                self.peek[1].0,
67                self.peek[2].0,
68                self.peek[3].0,
69            ) {
70                let sym = match (c0, c1, c2, c3) {
71                    // Assignment
72                    ('<', '<', '<', '=') => Some(Operator(Op::AssignArithShL)),
73                    ('>', '>', '>', '=') => Some(Operator(Op::AssignArithShR)),
74                    _ => None,
75                };
76                if let Some(tkn) = sym {
77                    let sp = Span::union(self.peek[0].1, self.peek[3].1);
78                    self.bump()?;
79                    self.bump()?;
80                    self.bump()?;
81                    self.bump()?;
82                    return Ok((tkn, sp));
83                }
84            }
85
86            // Match 3-character symbols
87            if let (CatTokenKind::Symbol(c0), CatTokenKind::Symbol(c1), CatTokenKind::Symbol(c2)) =
88                (self.peek[0].0, self.peek[1].0, self.peek[2].0)
89            {
90                let sym = match (c0, c1, c2) {
91                    // Assignment
92                    ('<', '<', '=') => Some(Operator(Op::AssignLogicShL)),
93                    ('>', '>', '=') => Some(Operator(Op::AssignLogicShR)),
94
95                    // Equality
96                    ('=', '=', '=') => Some(Operator(Op::CaseEq)),
97                    ('!', '=', '=') => Some(Operator(Op::CaseNeq)),
98                    ('=', '=', '?') => Some(Operator(Op::WildcardEq)),
99                    ('!', '=', '?') => Some(Operator(Op::WildcardNeq)),
100
101                    // Logic
102                    ('<', '-', '>') => Some(Operator(Op::LogicEquiv)),
103
104                    // Shift
105                    ('<', '<', '<') => Some(Operator(Op::ArithShL)),
106                    ('>', '>', '>') => Some(Operator(Op::ArithShR)),
107
108                    // Sequence
109                    ('|', '-', '>') => Some(Operator(Op::SeqImplOl)),
110                    ('|', '=', '>') => Some(Operator(Op::SeqImplNol)),
111                    ('#', '-', '#') => Some(Operator(Op::SeqFollowOl)),
112                    ('#', '=', '#') => Some(Operator(Op::SeqFollowNol)),
113                    _ => None,
114                };
115                if let Some(tkn) = sym {
116                    let sp = Span::union(self.peek[0].1, self.peek[2].1);
117                    self.bump()?;
118                    self.bump()?;
119                    self.bump()?;
120                    return Ok((tkn, sp));
121                }
122            }
123
124            // Match 2-character symbols
125            if let (CatTokenKind::Symbol(c0), CatTokenKind::Symbol(c1)) =
126                (self.peek[0].0, self.peek[1].0)
127            {
128                let sym = match (c0, c1) {
129                    // Assignment
130                    ('+', '=') => Some(Operator(Op::AssignAdd)),
131                    ('-', '=') => Some(Operator(Op::AssignSub)),
132                    ('*', '=') => Some(Operator(Op::AssignMul)),
133                    ('/', '=') => Some(Operator(Op::AssignDiv)),
134                    ('%', '=') => Some(Operator(Op::AssignMod)),
135                    ('&', '=') => Some(Operator(Op::AssignBitAnd)),
136                    ('|', '=') => Some(Operator(Op::AssignBitOr)),
137                    ('^', '=') => Some(Operator(Op::AssignBitXor)),
138
139                    // Arithmetic
140                    ('+', '+') => Some(Operator(Op::Inc)),
141                    ('-', '-') => Some(Operator(Op::Dec)),
142                    ('*', '*') => Some(Operator(Op::Pow)),
143
144                    // Relational
145                    ('<', '=') => Some(Operator(Op::Leq)),
146                    ('>', '=') => Some(Operator(Op::Geq)),
147
148                    // Logic
149                    ('=', '=') => Some(Operator(Op::LogicEq)),
150                    ('!', '=') => Some(Operator(Op::LogicNeq)),
151                    ('-', '>') => Some(Operator(Op::LogicImpl)),
152                    ('|', '|') => Some(Operator(Op::LogicOr)),
153                    ('&', '&') => Some(Operator(Op::LogicAnd)),
154
155                    // Bitwise
156                    ('~', '&') => Some(Operator(Op::BitNand)),
157                    ('~', '|') => Some(Operator(Op::BitNor)),
158                    ('~', '^') => Some(Operator(Op::BitNxor)),
159                    ('^', '~') => Some(Operator(Op::BitXnor)),
160
161                    // Shift
162                    ('<', '<') => Some(Operator(Op::LogicShL)),
163                    ('>', '>') => Some(Operator(Op::LogicShR)),
164
165                    // Others
166                    (':', ':') => Some(Namespace),
167                    ('+', ':') => Some(AddColon),
168                    ('-', ':') => Some(SubColon),
169                    ('#', '#') => Some(DoubleHashtag),
170                    _ => None,
171                };
172                if let Some(tkn) = sym {
173                    let sp = Span::union(self.peek[0].1, self.peek[1].1);
174                    self.bump()?;
175                    self.bump()?;
176                    return Ok((tkn, sp));
177                }
178            }
179
180            // Match 1-character symbols.
181            if let CatTokenKind::Symbol(c0) = self.peek[0].0 {
182                let sym = match c0 {
183                    // Assignment
184                    '=' => Some(Operator(Op::Assign)),
185
186                    // Arithmetic
187                    '+' => Some(Operator(Op::Add)),
188                    '-' => Some(Operator(Op::Sub)),
189                    '*' => Some(Operator(Op::Mul)),
190                    '/' => Some(Operator(Op::Div)),
191                    '%' => Some(Operator(Op::Mod)),
192
193                    // Relational
194                    '<' => Some(Operator(Op::Lt)),
195                    '>' => Some(Operator(Op::Gt)),
196
197                    // Logic
198                    '!' => Some(Operator(Op::LogicNot)),
199
200                    // Bitwise
201                    '~' => Some(Operator(Op::BitNot)),
202                    '&' => Some(Operator(Op::BitAnd)),
203                    '|' => Some(Operator(Op::BitOr)),
204                    '^' => Some(Operator(Op::BitXor)),
205
206                    // Others
207                    '(' => Some(OpenDelim(Paren)),
208                    ')' => Some(CloseDelim(Paren)),
209                    '[' => Some(OpenDelim(Brack)),
210                    ']' => Some(CloseDelim(Brack)),
211                    '{' => Some(OpenDelim(Brace)),
212                    '}' => Some(CloseDelim(Brace)),
213                    '#' => Some(Hashtag),
214                    ',' => Some(Comma),
215                    '.' => Some(Period),
216                    ':' => Some(Colon),
217                    ';' => Some(Semicolon),
218                    '?' => Some(Ternary),
219                    '@' => Some(At),
220                    _ => None,
221                };
222                if let Some(tkn) = sym {
223                    let sp = self.peek[0].1;
224                    self.bump()?;
225                    return Ok((tkn, sp));
226                }
227            }
228
229            match self.peek[0] {
230                // A text token either represents an identifier or a number,
231                // depending on whether it starts with a digit or a letter. In
232                // addition to that, underscores '_' also introduce an
233                // identifier. In case the identifier corresponds to a keyword,
234                // we emit a separate `Keyword(...)` token.
235                // IEEE 1800-2009 5.6 Identifiers
236                // IEEE 1800-2009 5.6.2 Keywords
237                (CatTokenKind::Text, _) | (CatTokenKind::Symbol('_'), _) => {
238                    let (m, msp) = self.match_ident()?;
239                    return match find_keyword(&m) {
240                        Some(Kw::Begin) => Ok((OpenDelim(Bgend), msp)),
241                        Some(Kw::End) => Ok((CloseDelim(Bgend), msp)),
242                        Some(kw) => Ok((Keyword(kw), msp)),
243                        None => Ok((Ident(name_table.intern(&m, true)), msp)),
244                    };
245                }
246
247                // System tasks and system functions start with the dollar sign
248                // '$', after which all regular identifier characters are
249                // allowed.
250                // IEEE 1800-2009 5.6.3 System tasks and system functions
251                (CatTokenKind::Symbol('$'), sp) => {
252                    self.bump()?;
253                    return match self.peek[0].0 {
254                        CatTokenKind::Text
255                        | CatTokenKind::Digits
256                        | CatTokenKind::Symbol('_')
257                        | CatTokenKind::Symbol('$') => {
258                            let (m, msp) = self.match_ident()?;
259                            Ok((SysIdent(name_table.intern(&m, true)), Span::union(sp, msp)))
260                        }
261                        _ => Ok((Dollar, sp)),
262                    };
263                }
264
265                // Escaped identifiers are introduced with a backslash and last
266                // until the next whitespace or newline character.
267                // IEEE 1800-2009 5.6.1 Escaped identifiers
268                (CatTokenKind::Symbol('\\'), mut sp) => {
269                    let mut s = String::new();
270                    loop {
271                        self.bump()?;
272                        if self.peek[0].0 == CatTokenKind::Whitespace
273                            || self.peek[0].0 == CatTokenKind::Newline
274                            || self.peek[0].0 == CatTokenKind::Eof
275                        {
276                            break;
277                        }
278                        sp.expand(self.peek[0].1);
279                        s.push_str(&self.peek[0].1.extract());
280                    }
281                    if s.is_empty() {
282                        return Err(DiagBuilder2::fatal(
283                            "Expected escaped identifier after backslash '\\'",
284                        )
285                        .span(sp));
286                    }
287                    return Ok((EscIdent(name_table.intern(&s, true)), sp));
288                }
289
290                // Numbers are either introduced by a set of digits in the case
291                // of a sized literal or unsigned number, or an apostrophe in
292                // the case of an unsized based number.
293                // IEEE 1800-2009 5.7 Numbers
294                (CatTokenKind::Symbol('\''), sp) => {
295                    self.bump()?; // eat the apostrophe
296                    return self.match_based_number(None, sp);
297                }
298                (CatTokenKind::Digits, mut sp) => {
299                    // Consume the leading digits. These represent either the
300                    // size of the literal if followed by an apostrophe and a
301                    // base specification, or the number itself otherwise.
302                    let value = {
303                        let mut s = String::new();
304                        s.push_str(&sp.extract());
305                        self.bump()?; // eat the digits that were pushed onto the string above
306                        self.eat_number_body_into(&mut s, &mut sp, false)?;
307                        name_table.intern(&s, true)
308                    };
309                    let frac = if self.peek[0].0 == CatTokenKind::Symbol('.') {
310                        let mut s = String::new();
311                        self.bump()?; // eat the period
312                        self.eat_number_body_into(&mut s, &mut sp, false)?;
313                        Some(name_table.intern(&s, true))
314                    } else {
315                        None
316                    };
317                    if let Some(unit) = self.try_time_unit() {
318                        sp.expand(self.peek[0].1);
319                        self.bump()?; // eat the unit
320                        return Ok((Literal(Time(value, frac, unit)), sp));
321                    }
322                    if self.peek[0].0 == CatTokenKind::Text {
323                        return Err(DiagBuilder2::fatal(format!(
324                            "number literal `{}` may not directly be followed by letters `{}`",
325                            sp.extract(),
326                            self.peek[0].1.extract(),
327                        ))
328                        .span(sp));
329                    }
330                    if frac.is_some() {
331                        return Ok((Literal(Number(value, frac)), sp));
332                    }
333                    self.skip_noise()?; // whitespace allowed after size indication
334                    match (self.peek[0].0, self.peek[1].0) {
335                        (CatTokenKind::Symbol('\''), CatTokenKind::Text)
336                        | (CatTokenKind::Symbol('\''), CatTokenKind::Digits) => {
337                            self.bump()?; // eat the apostrophe
338                            return self.match_based_number(Some(value), sp);
339                        }
340                        _ => return Ok((Literal(Number(value, None)), sp)),
341                    }
342                }
343
344                // IEEE 1800-2009 5.9 String literals
345                (CatTokenKind::Symbol('"'), mut span) => {
346                    self.bump()?;
347                    let mut s = String::new();
348                    loop {
349                        match self.peek[0] {
350                            (CatTokenKind::Symbol('"'), sp) => {
351                                span.expand(sp);
352                                self.bump()?;
353                                break;
354                            }
355                            (CatTokenKind::Symbol('\\'), sp) => {
356                                span.expand(sp);
357                                self.bump()?;
358                                match self.peek[0] {
359                                    (CatTokenKind::Symbol('\\'), sp) => {
360                                        span.expand(sp);
361                                        s.push('\\');
362                                    }
363                                    (CatTokenKind::Newline, sp) => {
364                                        span.expand(sp);
365                                    }
366                                    (CatTokenKind::Symbol('"'), sp) => {
367                                        span.expand(sp);
368                                        s.push('"');
369                                    }
370                                    (CatTokenKind::Text, sp) => {
371                                        span.expand(sp);
372                                        s.push_str(&sp.extract());
373                                    }
374                                    _ => {
375                                        return Err(DiagBuilder2::fatal(
376                                            "Unknown escape sequence in string",
377                                        )
378                                        .span(span))
379                                    }
380                                }
381                            }
382                            (CatTokenKind::Newline, sp) => {
383                                return Err(DiagBuilder2::fatal(
384                                    "String literals cannot contain unescaped newlines",
385                                )
386                                .span(sp))
387                            }
388                            (_, sp) => {
389                                span.expand(sp);
390                                s.push_str(&sp.extract());
391                            }
392                        }
393                        self.bump()?;
394                    }
395                    return Ok((Literal(Str(name_table.intern(&s, true))), span));
396                }
397
398                (CatTokenKind::Eof, sp) => return Ok((Eof, sp)),
399                (tkn, sp) => {
400                    return Err(DiagBuilder2::fatal(format!("Unknown token {:?}", tkn)).span(sp))
401                }
402            }
403        }
404    }
405
406    /// Skips all input tokens that are excluded from the language's syntax,
407    /// i.e. whitespace, newlines, and comments. Note that during lexical
408    /// analysis whitespace may still play a vital role, espceially when parsing
409    /// number literals or string constants.
410    fn skip_noise(&mut self) -> DiagResult2<()> {
411        loop {
412            match (self.peek[0].0, self.peek[1].0) {
413                // Single-line comment inserted by preprocessor.
414                (CatTokenKind::Symbol('/'), CatTokenKind::Symbol('/')) => {
415                    self.bump()?;
416                    self.bump()?;
417                    loop {
418                        match self.peek[0].0 {
419                            CatTokenKind::Eof => break,
420                            CatTokenKind::Newline => {
421                                self.bump()?;
422                                break;
423                            }
424                            _ => self.bump()?,
425                        }
426                    }
427                }
428                // Multi-line comment inserted by preprocessor.
429                (CatTokenKind::Symbol('/'), CatTokenKind::Symbol('*')) => {
430                    self.bump()?;
431                    self.bump()?;
432                    loop {
433                        match (self.peek[0].0, self.peek[1].0) {
434                            (CatTokenKind::Eof, _) => break,
435                            (CatTokenKind::Symbol('*'), CatTokenKind::Symbol('/')) => {
436                                self.bump()?;
437                                self.bump()?;
438                                break;
439                            }
440                            _ => self.bump()?,
441                        }
442                    }
443                }
444                // SystemVerilog Attributes
445                (CatTokenKind::Symbol('('), CatTokenKind::Symbol('*'))
446                    if self.peek[2].0 != CatTokenKind::Symbol(')') =>
447                {
448                    self.bump()?;
449                    self.bump()?;
450                    loop {
451                        match (self.peek[0].0, self.peek[1].0) {
452                            (CatTokenKind::Eof, _) => break,
453                            (CatTokenKind::Symbol('*'), CatTokenKind::Symbol(')')) => {
454                                self.bump()?;
455                                self.bump()?;
456                                break;
457                            }
458                            _ => self.bump()?,
459                        }
460                    }
461                }
462                _ => (),
463            }
464            match self.peek[0].0 {
465                CatTokenKind::Whitespace | CatTokenKind::Newline | CatTokenKind::Comment => {
466                    self.bump()?
467                }
468                _ => return Ok(()),
469            }
470        }
471    }
472
473    /// Matches an identifier. This consumes all tokens from the input that when
474    /// combined still make up a valid identifier and returns the consumed
475    /// characters as a String, alongside the span they covered. In
476    /// SystemVerilog upper- and lowercase characters, digits, underscores '_',
477    /// and dollar signs '$' are all valid within an identifier.
478    fn match_ident(&mut self) -> DiagResult2<(String, Span)> {
479        let mut s = String::new();
480        let mut sp = self.peek[0].1;
481        loop {
482            match self.peek[0] {
483                (CatTokenKind::Text, this_sp)
484                | (CatTokenKind::Digits, this_sp)
485                | (CatTokenKind::Symbol('_'), this_sp)
486                | (CatTokenKind::Symbol('$'), this_sp) => {
487                    s.push_str(&this_sp.extract());
488                    sp.expand(this_sp);
489                    self.bump()?;
490                }
491                _ => break,
492            }
493        }
494        if s.is_empty() {
495            return Err(DiagBuilder2::fatal("Could not match an identifier here").span(sp));
496        }
497        assert!(!s.is_empty());
498        Ok((s, sp))
499    }
500
501    /// This function assumes that we have just consumed the apostrophe `'`
502    /// before the base indication.
503    fn match_based_number(
504        &mut self,
505        size: Option<Name>,
506        mut span: Span,
507    ) -> DiagResult2<TokenAndSpan> {
508        match self.peek[0] {
509            (CatTokenKind::Text, sp) => {
510                self.bump()?;
511                let text = sp.extract();
512                span.expand(sp);
513                let mut chars = text.chars();
514                let mut c = chars.next();
515
516                // Consume the optional sign indicator or emit an unbased and
517                // unsized literal if the apostrophe is immediately followed by
518                // [zZxX].
519                let signed = match c {
520                    Some('s') | Some('S') => {
521                        c = chars.next();
522                        true
523                    }
524                    Some('z') | Some('Z') if text.len() == 1 => {
525                        return Ok((Literal(UnbasedUnsized('z')), span))
526                    }
527                    Some('x') | Some('X') if text.len() == 1 => {
528                        return Ok((Literal(UnbasedUnsized('x')), span))
529                    }
530                    _ => false,
531                };
532
533                // Consume the base of the number.
534                let base = match c {
535                    Some('d') | Some('D') => 'd',
536                    Some('b') | Some('B') => 'b',
537                    Some('o') | Some('O') => 'o',
538                    Some('h') | Some('H') => 'h',
539                    Some(x) => {
540                        return Err(DiagBuilder2::fatal(format!(
541                            "`{}` is not a valid number base",
542                            x
543                        ))
544                        .span(span))
545                    }
546                    None => return Err(DiagBuilder2::fatal("Missing number base").span(span)),
547                };
548                c = chars.next();
549
550                // If no more characters remain, a whitespace and subsequent
551                // digits may follow. Otherwise, the remaining characters are to
552                // be treated as part of the number body and no whitespace
553                // follows.
554                let mut body = String::new();
555                if let Some(c) = c {
556                    body.push(c);
557                    body.push_str(chars.as_str());
558                } else {
559                    self.skip_noise()?;
560                }
561                self.eat_number_body_into(&mut body, &mut span, true)?;
562
563                return Ok((
564                    Literal(BasedInteger(
565                        size,
566                        signed,
567                        base,
568                        get_name_table().intern(&body, true),
569                    )),
570                    span,
571                ));
572            }
573
574            (CatTokenKind::Digits, sp) if size.is_none() => {
575                self.bump()?;
576                let value = sp.extract();
577                span.expand(sp);
578                match value.chars().next() {
579                    Some('0') if value.len() == 1 => {
580                        return Ok((Literal(UnbasedUnsized('0')), span))
581                    }
582                    Some('1') if value.len() == 1 => {
583                        return Ok((Literal(UnbasedUnsized('1')), span))
584                    }
585                    _ => {
586                        return Err(DiagBuilder2::fatal(
587                            "Unbased unsized literal may only be '0, '1, 'x, or 'z",
588                        )
589                        .span(span))
590                    }
591                }
592            }
593
594            (CatTokenKind::Symbol('?'), sp) => {
595                self.bump()?;
596                span.expand(sp);
597                return Ok((Literal(UnbasedUnsized('z')), span));
598            }
599
600            // (_, sp) => return Err(DiagBuilder2::fatal("Invalid number base").span(sp))
601            _ => return Ok((Apostrophe, span)),
602        }
603    }
604
605    /// Eats all text, digits, and underscore tokens, accumulating them (except
606    /// for the underscores) in a String.
607    fn eat_number_body_into(
608        &mut self,
609        into: &mut String,
610        span: &mut Span,
611        allow_alphabetic: bool,
612    ) -> DiagResult2<()> {
613        loop {
614            match self.peek[0] {
615                (CatTokenKind::Digits, sp) | (CatTokenKind::Text, sp) => {
616                    if self.peek[0].0 == CatTokenKind::Text && !allow_alphabetic {
617                        break;
618                    }
619                    into.push_str(&sp.extract());
620                    span.expand(sp);
621                }
622                (CatTokenKind::Symbol('_'), _) => (),
623                (CatTokenKind::Symbol('?'), sp) => {
624                    into.push('?');
625                    span.expand(sp);
626                }
627                _ => break,
628            }
629            self.bump()?;
630        }
631        Ok(())
632    }
633
634    /// Try to parse the next text token as a time unit.
635    fn try_time_unit(&mut self) -> Option<TimeUnit> {
636        if self.peek[0].0 == CatTokenKind::Text {
637            match self.peek[0].1.extract().as_str() {
638                "s" => Some(TimeUnit::Second),
639                "ms" => Some(TimeUnit::MilliSecond),
640                "us" => Some(TimeUnit::MicroSecond),
641                "ns" => Some(TimeUnit::NanoSecond),
642                "ps" => Some(TimeUnit::PicoSecond),
643                "fs" => Some(TimeUnit::FemtoSecond),
644                _ => None,
645            }
646        } else {
647            None
648        }
649    }
650}
651
652impl<'a> Iterator for Lexer<'a> {
653    type Item = DiagResult2<TokenAndSpan>;
654
655    fn next(&mut self) -> Option<Self::Item> {
656        match self.next_token() {
657            Ok((Eof, _)) => None,
658            x => Some(x),
659        }
660    }
661}
662
663#[cfg(test)]
664mod tests {
665    use super::*;
666
667    fn check(input: &str, expected: &[Token]) {
668        use std::cell::Cell;
669        thread_local!(static INDEX: Cell<usize> = Cell::new(0));
670        let sm = get_source_manager();
671        let idx = INDEX.with(|i| {
672            let v = i.get();
673            i.set(v + 1);
674            v
675        });
676        let source = sm.add(&format!("test_{}.sv", idx), input);
677        let pp = Preprocessor::new(source, &[], &[]);
678        let lexer = Lexer::new(pp);
679        let actual: Vec<_> = lexer.map(|x| x.unwrap().0).collect();
680        assert_eq!(actual, expected);
681    }
682
683    fn check_single(input: &str, expected: Token) {
684        check(input, &[expected]);
685    }
686
687    fn name(n: &str) -> Name {
688        get_name_table().intern(n, true)
689    }
690
691    /// According to IEEE 1800-2009 5.6
692    #[test]
693    fn idents() {
694        check(
695            "shiftreg_a busa_index error_condition merge_ab _bus3 n$657",
696            &vec![
697                Ident(name("shiftreg_a")),
698                Ident(name("busa_index")),
699                Ident(name("error_condition")),
700                Ident(name("merge_ab")),
701                Ident(name("_bus3")),
702                Ident(name("n$657")),
703            ],
704        );
705    }
706
707    /// According to IEEE 1800-2009 5.6.1
708    #[test]
709    fn esc_idents() {
710        check(
711            "\\busa+index \\-clock \\***error-condition*** \\net1/\\net2 \\{a,b} \\a*(b+c)",
712            &vec![
713                EscIdent(name("busa+index")),
714                EscIdent(name("-clock")),
715                EscIdent(name("***error-condition***")),
716                EscIdent(name("net1/\\net2")),
717                EscIdent(name("{a,b}")),
718                EscIdent(name("a*(b+c)")),
719            ],
720        );
721    }
722
723    /// According to IEEE 1800-2009 5.6.3
724    #[test]
725    fn sys_idents() {
726        check(
727            "$display $finish $01_ad$as3_",
728            &vec![
729                SysIdent(name("display")),
730                SysIdent(name("finish")),
731                SysIdent(name("01_ad$as3_")),
732            ],
733        );
734    }
735
736    /// According to IEEE 1800-2009 5.7.1
737    #[test]
738    fn unbased_unsized_literal() {
739        check_single("'0", Literal(UnbasedUnsized('0')));
740        check_single("'1", Literal(UnbasedUnsized('1')));
741        check_single("'X", Literal(UnbasedUnsized('x')));
742        check_single("'x", Literal(UnbasedUnsized('x')));
743        check_single("'Z", Literal(UnbasedUnsized('z')));
744        check_single("'z", Literal(UnbasedUnsized('z')));
745        check_single("'?", Literal(UnbasedUnsized('z')));
746    }
747
748    #[test]
749    fn unsized_literal_constant_numbers() {
750        check(
751            "659; 'h 837FF; 'o7460",
752            &[
753                Literal(Number(name("659"), None)),
754                Semicolon,
755                Literal(BasedInteger(None, false, 'h', name("837FF"))),
756                Semicolon,
757                Literal(BasedInteger(None, false, 'o', name("7460"))),
758            ],
759        );
760    }
761
762    #[test]
763    #[should_panic(expected = "number literal `4` may not directly be followed by letters `af`")]
764    fn unsized_literal_constant_numbers_illegal() {
765        check("4af", &vec![]);
766    }
767
768    #[test]
769    fn sized_literal_constant_numbers() {
770        check(
771            "4'b1001; 5 'D 3; 3'b01x; 12'hx; 16'hz",
772            &[
773                Literal(BasedInteger(Some(name("4")), false, 'b', name("1001"))),
774                Semicolon,
775                Literal(BasedInteger(Some(name("5")), false, 'd', name("3"))),
776                Semicolon,
777                Literal(BasedInteger(Some(name("3")), false, 'b', name("01x"))),
778                Semicolon,
779                Literal(BasedInteger(Some(name("12")), false, 'h', name("x"))),
780                Semicolon,
781                Literal(BasedInteger(Some(name("16")), false, 'h', name("z"))),
782            ],
783        );
784    }
785
786    #[test]
787    fn signed_literal_constant_numbers() {
788        check(
789            "4 'shf; 16'sd?",
790            &[
791                Literal(BasedInteger(Some(name("4")), true, 'h', name("f"))),
792                Semicolon,
793                Literal(BasedInteger(Some(name("16")), true, 'd', name("?"))),
794            ],
795        );
796    }
797
798    #[test]
799    #[ignore]
800    fn underscores_in_literal_constant_numbers() {
801        check(
802            "27_195_000; 16'b0011_0101_0001_1111; 32 'h 12ab_f001",
803            &[
804                Literal(Number(name("27195000"), None)),
805                Semicolon,
806                Literal(BasedInteger(
807                    Some(name("16")),
808                    false,
809                    'b',
810                    name("0011010100011111"),
811                )),
812                Semicolon,
813                Literal(BasedInteger(Some(name("32")), false, 'h', name("12abf001"))),
814            ],
815        );
816    }
817
818    /// According to IEEE 1800-2009 5.9
819    #[test]
820    fn multiline_string_literal() {
821        check(
822            "$display(\"Humpty Dumpty sat on a wall. \\\nHumpty Dumpty had a great fall.\")",
823            &[
824                SysIdent(name("display")),
825                OpenDelim(Paren),
826                Literal(Str(name(
827                    "Humpty Dumpty sat on a wall. Humpty Dumpty had a great fall.",
828                ))),
829                CloseDelim(Paren),
830            ],
831        );
832    }
833
834    #[test]
835    fn time_literal() {
836        check(
837            "42s 14.3ms 16.32us 9ns 0.1ps 8123fs",
838            &[
839                Literal(Time(name("42"), None, TimeUnit::Second)),
840                Literal(Time(name("14"), Some(name("3")), TimeUnit::MilliSecond)),
841                Literal(Time(name("16"), Some(name("32")), TimeUnit::MicroSecond)),
842                Literal(Time(name("9"), None, TimeUnit::NanoSecond)),
843                Literal(Time(name("0"), Some(name("1")), TimeUnit::PicoSecond)),
844                Literal(Time(name("8123"), None, TimeUnit::FemtoSecond)),
845            ],
846        );
847    }
848
849    #[test]
850    fn number_literal() {
851        check(
852            "42 4.2",
853            &[
854                Literal(Number(name("42"), None)),
855                Literal(Number(name("4"), Some(name("2")))),
856            ],
857        );
858    }
859}