moore_vhdl_syntax/lexer/
mod.rs

1// Copyright (c) 2016-2021 Fabian Schuiki
2
3//! A VHDL lexer. This module implements lexical analysis of VHDL source files.
4//! It converts a stream of input bytes into a stream of language tokens such as
5//! identifiers, literals, and symbols.
6
7pub mod bundler;
8pub mod categorizer;
9pub mod token;
10pub mod tokenizer;
11
12use self::bundler::Bundler;
13use self::categorizer::Categorizer;
14use self::token::*;
15use self::tokenizer::Tokenizer;
16use moore_common::errors::*;
17use moore_common::grind::utf8::Utf8;
18use moore_common::grind::Grinder;
19use moore_common::source::*;
20
21/// A VHDL lexer. Converts a stream of bytes to VHDL tokens. Emits errors
22/// backwards up the pipeline.
23pub struct Lexer<T: Grinder<Item = Option<u8>, Error = DiagBuilder2>> {
24    inner: Tokenizer<Bundler<Categorizer<Utf8<T>>>>,
25}
26
27impl<T> Lexer<T>
28where
29    T: Grinder<Item = Option<u8>, Error = DiagBuilder2>,
30{
31    /// Create a new lexer.
32    pub fn new(bytes: T, src: Source) -> Lexer<T> {
33        let chars = Utf8::new(bytes);
34        let cat = Categorizer::new(chars);
35        let bundles = Bundler::new(cat, src);
36        let tokens = Tokenizer::new(bundles);
37        Lexer { inner: tokens }
38    }
39}
40
41impl<T> Grinder for Lexer<T>
42where
43    T: Grinder<Item = Option<u8>, Error = DiagBuilder2>,
44{
45    type Item = Option<Spanned<Token>>;
46    type Error = DiagBuilder2;
47
48    fn next(&mut self) -> Self::Item {
49        self.inner.next()
50    }
51
52    fn emit(&mut self, err: Self::Error) {
53        self.inner.emit(err)
54    }
55}
56
57#[cfg(test)]
58mod test {
59    use super::Lexer;
60    use crate::lexer::token::*;
61    use moore_common::errors::*;
62    use moore_common::grind::{self, Grinder};
63    use moore_common::name::*;
64    use moore_common::source::*;
65
66    fn lex(src: Source) -> Vec<Token> {
67        let content = src.get_content();
68        let bytes = grind::from_iter(content.bytes().iter().map(|x| *x))
69            .vent(|err: DiagBuilder2| eprintln!("{}", err));
70        let mut tokens = Lexer::new(bytes, src);
71        let mut v = Vec::new();
72        while let Some(Spanned { value, .. }) = tokens.next() {
73            v.push(value);
74        }
75        v
76    }
77
78    fn check(input: &str, expected: &[Token]) {
79        use std::cell::Cell;
80        thread_local!(static INDEX: Cell<usize> = Cell::new(0));
81        let sm = get_source_manager();
82        let idx = INDEX.with(|i| {
83            let v = i.get();
84            i.set(v + 1);
85            v
86        });
87        let source = sm.add(&format!("test_{}.vhd", idx), input);
88        let actual = lex(source);
89        assert_eq!(actual.len(), expected.len());
90        for (a, &e) in actual.into_iter().zip(expected.into_iter()) {
91            assert_eq!(a, e);
92        }
93    }
94
95    fn name(n: &str) -> Name {
96        get_name_table().intern(n, false)
97    }
98
99    fn name_case(n: &str) -> Name {
100        get_name_table().intern(n, true)
101    }
102
103    #[test]
104    fn basic_ident() {
105        check(
106            r"
107            COUNT    X     c_out        FFT                Decoder
108            VHSIC    X1    PageCount    STORE_NEXT_ITEM    As49__8
109        ",
110            &[
111                Ident(name("COUNT")),
112                Ident(name("X")),
113                Ident(name("c_out")),
114                Ident(name("FFT")),
115                Ident(name("Decoder")),
116                Ident(name("VHSIC")),
117                Ident(name("X1")),
118                Ident(name("PageCount")),
119                Ident(name("STORE_NEXT_ITEM")),
120                Ident(name("As49__8")),
121            ],
122        );
123    }
124
125    #[test]
126    fn extended_ident() {
127        check(
128            r"
129            -- Two different identifiers, neither of which is the reserved word bus.
130            \BUS\  \bus\
131
132            -- An identifier containing three characters.
133            \a\\b\
134
135            -- Three distinct identifiers.
136            VHDL  \VHDL\  \vhdl\
137
138            -- Use of spaces and special characters.
139            \A B\  \!@#\  \_+`'''1#{}\
140        ",
141            &[
142                Ident(name_case("\\BUS\\")),
143                Ident(name_case("\\bus\\")),
144                Ident(name_case("\\a\\b\\")),
145                Ident(name("VHDL")),
146                Ident(name_case("\\VHDL\\")),
147                Ident(name_case("\\vhdl\\")),
148                Ident(name_case("\\A B\\")),
149                Ident(name_case("\\!@#\\")),
150                Ident(name_case("\\_+`'''1#{}\\")),
151            ],
152        );
153    }
154
155    #[test]
156    fn decimal_literal() {
157        check(
158            r"
159            12         0        1E6         123_456      -- Integer literals.
160            12.0       0.0      0.456       3.14159_26   -- Real literals.
161            1.34E-12   1.0E+6   6.023E+24                -- Real literals with exponents.
162        ",
163            &[
164                Lit(Literal::Abstract(None, name_case("12"), None, None)),
165                Lit(Literal::Abstract(None, name_case("0"), None, None)),
166                Lit(Literal::Abstract(
167                    None,
168                    name_case("1"),
169                    None,
170                    Some(Exponent(ExponentSign::Positive, name_case("6"))),
171                )),
172                Lit(Literal::Abstract(None, name_case("123456"), None, None)),
173                Lit(Literal::Abstract(
174                    None,
175                    name_case("12"),
176                    Some(name_case("0")),
177                    None,
178                )),
179                Lit(Literal::Abstract(
180                    None,
181                    name_case("0"),
182                    Some(name_case("0")),
183                    None,
184                )),
185                Lit(Literal::Abstract(
186                    None,
187                    name_case("0"),
188                    Some(name_case("456")),
189                    None,
190                )),
191                Lit(Literal::Abstract(
192                    None,
193                    name_case("3"),
194                    Some(name_case("1415926")),
195                    None,
196                )),
197                Lit(Literal::Abstract(
198                    None,
199                    name_case("1"),
200                    Some(name_case("34")),
201                    Some(Exponent(ExponentSign::Negative, name_case("12"))),
202                )),
203                Lit(Literal::Abstract(
204                    None,
205                    name_case("1"),
206                    Some(name_case("0")),
207                    Some(Exponent(ExponentSign::Positive, name_case("6"))),
208                )),
209                Lit(Literal::Abstract(
210                    None,
211                    name_case("6"),
212                    Some(name_case("023")),
213                    Some(Exponent(ExponentSign::Positive, name_case("24"))),
214                )),
215            ],
216        );
217    }
218
219    #[test]
220    fn based_literal() {
221        check(
222            r"
223            2#1111_1111#   16#FF#   016#0FF#       -- Integer literals of value 255
224            16#E#E1        2#1110_0000#            -- Integer literals of value 224
225            16#F.FF#E+2    2#1.1111_1111_111#E11   -- Real literals of value 4095.0
226        ",
227            &[
228                Lit(Literal::Abstract(
229                    Some(name_case("2")),
230                    name_case("11111111"),
231                    None,
232                    None,
233                )),
234                Lit(Literal::Abstract(
235                    Some(name_case("16")),
236                    name_case("FF"),
237                    None,
238                    None,
239                )),
240                Lit(Literal::Abstract(
241                    Some(name_case("016")),
242                    name_case("0FF"),
243                    None,
244                    None,
245                )),
246                Lit(Literal::Abstract(
247                    Some(name_case("16")),
248                    name_case("E"),
249                    None,
250                    Some(Exponent(ExponentSign::Positive, name_case("1"))),
251                )),
252                Lit(Literal::Abstract(
253                    Some(name_case("2")),
254                    name_case("11100000"),
255                    None,
256                    None,
257                )),
258                Lit(Literal::Abstract(
259                    Some(name_case("16")),
260                    name_case("F"),
261                    Some(name_case("FF")),
262                    Some(Exponent(ExponentSign::Positive, name_case("2"))),
263                )),
264                Lit(Literal::Abstract(
265                    Some(name_case("2")),
266                    name_case("1"),
267                    Some(name_case("11111111111")),
268                    Some(Exponent(ExponentSign::Positive, name_case("11"))),
269                )),
270            ],
271        );
272    }
273
274    #[test]
275    fn bit_string_literal() {
276        check(
277            "
278            B\"1111_1111_1111\"  -- Equivalent to the string literal \"111111111111\".
279            X\"FFF\"             -- Equivalent to B\"1111_1111_1111\".
280            O\"777\"             -- Equivalent to B\"111_111_111\".
281            X\"777\"             -- Equivalent to B\"0111_0111_0111\".
282
283            B\"XXXX_01LH\"       -- Equivalent to the string literal \"XXXX01LH\"
284            UO\"27\"             -- Equivalent to B\"010_111\"
285            UO\"2C\"             -- Equivalent to B\"011_CCC\"
286            SX\"3W\"             -- Equivalent to B\"0011_WWWW\"
287            D\"35\"              -- Equivalent to B\"100011\"
288
289            12UB\"X1\"           -- Equivalent to B\"0000_0000_00X1\"
290            12SB\"X1\"           -- Equivalent to B\"XXXX_XXXX_XXX1\"
291            12UX\"F-\"           -- Equivalent to B\"0000_1111_----\"
292            12SX\"F-\"           -- Equivalent to B\"1111_1111_----\"
293            12D\"13\"            -- Equivalent to B\"0000_0000_1101\"
294
295            12UX\"000WWW\"       -- Equivalent to B\"WWWW_WWWW_WWWW\"
296            12SX\"FFFC00\"       -- Equivalent to B\"1100_0000_0000\"
297            12SX\"XXXX00\"       -- Equivalent to B\"XXXX_0000_0000\"
298        ",
299            &[
300                Lit(Literal::BitString(
301                    None,
302                    BitStringBase::B,
303                    name_case("111111111111"),
304                )),
305                Lit(Literal::BitString(None, BitStringBase::X, name_case("FFF"))),
306                Lit(Literal::BitString(None, BitStringBase::O, name_case("777"))),
307                Lit(Literal::BitString(None, BitStringBase::X, name_case("777"))),
308                Lit(Literal::BitString(
309                    None,
310                    BitStringBase::B,
311                    name_case("XXXX01LH"),
312                )),
313                Lit(Literal::BitString(None, BitStringBase::UO, name_case("27"))),
314                Lit(Literal::BitString(None, BitStringBase::UO, name_case("2C"))),
315                Lit(Literal::BitString(None, BitStringBase::SX, name_case("3W"))),
316                Lit(Literal::BitString(None, BitStringBase::D, name_case("35"))),
317                Lit(Literal::BitString(
318                    Some(name_case("12")),
319                    BitStringBase::UB,
320                    name_case("X1"),
321                )),
322                Lit(Literal::BitString(
323                    Some(name_case("12")),
324                    BitStringBase::SB,
325                    name_case("X1"),
326                )),
327                Lit(Literal::BitString(
328                    Some(name_case("12")),
329                    BitStringBase::UX,
330                    name_case("F-"),
331                )),
332                Lit(Literal::BitString(
333                    Some(name_case("12")),
334                    BitStringBase::SX,
335                    name_case("F-"),
336                )),
337                Lit(Literal::BitString(
338                    Some(name_case("12")),
339                    BitStringBase::D,
340                    name_case("13"),
341                )),
342                Lit(Literal::BitString(
343                    Some(name_case("12")),
344                    BitStringBase::UX,
345                    name_case("000WWW"),
346                )),
347                Lit(Literal::BitString(
348                    Some(name_case("12")),
349                    BitStringBase::SX,
350                    name_case("FFFC00"),
351                )),
352                Lit(Literal::BitString(
353                    Some(name_case("12")),
354                    BitStringBase::SX,
355                    name_case("XXXX00"),
356                )),
357            ],
358        );
359    }
360
361    #[test]
362    fn character_literal() {
363        check(
364            "
365            'A'  '*'  '''  ' '
366        ",
367            &[
368                Lit(Literal::Char('A')),
369                Lit(Literal::Char('*')),
370                Lit(Literal::Char('\'')),
371                Lit(Literal::Char(' ')),
372            ],
373        );
374    }
375
376    #[test]
377    fn string_literal() {
378        check(
379            "
380            \"Setup time is too short\"  --  An error message.
381            \"\"                         --  An empty string literal.
382            \" \"   \"A\"   \"\"\"\"     --  Three string literals of length 1.
383            \"Characters such as $, %, and } are allowed in string literals.\"
384        ",
385            &[
386                Lit(Literal::String(name_case("Setup time is too short"))),
387                Lit(Literal::String(name_case(""))),
388                Lit(Literal::String(name_case(" "))),
389                Lit(Literal::String(name_case("A"))),
390                Lit(Literal::String(name_case("\""))),
391                Lit(Literal::String(name_case(
392                    "Characters such as $, %, and } are allowed in string literals.",
393                ))),
394            ],
395        );
396    }
397
398    #[test]
399    fn symbols() {
400        check(
401            "
402            (    )
403            .    ,    :    ;    '    &
404            =>   ??   <>   :=   <<   >>
405            =    /=   <    <=   >    >=
406            ?=   ?/=  ?<   ?<=  ?>   ?>=
407            +    -    *    /    **
408        ",
409            &[
410                OpenDelim(Paren),
411                CloseDelim(Paren),
412                Period,
413                Comma,
414                Colon,
415                Semicolon,
416                Apostrophe,
417                Ampersand,
418                Arrow,
419                Condition,
420                LtGt,
421                VarAssign,
422                Lshift,
423                Rshift,
424                Eq,
425                Neq,
426                Lt,
427                Leq,
428                Gt,
429                Geq,
430                MatchEq,
431                MatchNeq,
432                MatchLt,
433                MatchLeq,
434                MatchGt,
435                MatchGeq,
436                Add,
437                Sub,
438                Mul,
439                Div,
440                Pow,
441            ],
442        );
443    }
444
445    #[test]
446    fn keywords() {
447        check(
448            "
449            abs access after alias all and architecture array assert assume
450            assume_guarantee attribute begin block body buffer bus case
451            component configuration constant context cover default disconnect
452            downto else elsif end entity exit fairness file for force function
453            generate generic group guarded if impure in inertial inout is label
454            library linkage literal loop map mod nand new next nor not null of
455            on open or others out package parameter port postponed procedure
456            process property protected pure range record register reject release
457            rem report restrict restrict_guarantee return rol ror select
458            sequence severity shared signal sla sll sra srl strong subtype then
459            to transport type unaffected units until use variable vmode vprop
460            vunit wait when while with xnor xor
461        ",
462            &[
463                Keyword(Kw::Abs),
464                Keyword(Kw::Access),
465                Keyword(Kw::After),
466                Keyword(Kw::Alias),
467                Keyword(Kw::All),
468                Keyword(Kw::And),
469                Keyword(Kw::Architecture),
470                Keyword(Kw::Array),
471                Keyword(Kw::Assert),
472                Keyword(Kw::Assume),
473                Keyword(Kw::AssumeGuarantee),
474                Keyword(Kw::Attribute),
475                Keyword(Kw::Begin),
476                Keyword(Kw::Block),
477                Keyword(Kw::Body),
478                Keyword(Kw::Buffer),
479                Keyword(Kw::Bus),
480                Keyword(Kw::Case),
481                Keyword(Kw::Component),
482                Keyword(Kw::Configuration),
483                Keyword(Kw::Constant),
484                Keyword(Kw::Context),
485                Keyword(Kw::Cover),
486                Keyword(Kw::Default),
487                Keyword(Kw::Disconnect),
488                Keyword(Kw::Downto),
489                Keyword(Kw::Else),
490                Keyword(Kw::Elsif),
491                Keyword(Kw::End),
492                Keyword(Kw::Entity),
493                Keyword(Kw::Exit),
494                Keyword(Kw::Fairness),
495                Keyword(Kw::File),
496                Keyword(Kw::For),
497                Keyword(Kw::Force),
498                Keyword(Kw::Function),
499                Keyword(Kw::Generate),
500                Keyword(Kw::Generic),
501                Keyword(Kw::Group),
502                Keyword(Kw::Guarded),
503                Keyword(Kw::If),
504                Keyword(Kw::Impure),
505                Keyword(Kw::In),
506                Keyword(Kw::Inertial),
507                Keyword(Kw::Inout),
508                Keyword(Kw::Is),
509                Keyword(Kw::Label),
510                Keyword(Kw::Library),
511                Keyword(Kw::Linkage),
512                Keyword(Kw::Literal),
513                Keyword(Kw::Loop),
514                Keyword(Kw::Map),
515                Keyword(Kw::Mod),
516                Keyword(Kw::Nand),
517                Keyword(Kw::New),
518                Keyword(Kw::Next),
519                Keyword(Kw::Nor),
520                Keyword(Kw::Not),
521                Keyword(Kw::Null),
522                Keyword(Kw::Of),
523                Keyword(Kw::On),
524                Keyword(Kw::Open),
525                Keyword(Kw::Or),
526                Keyword(Kw::Others),
527                Keyword(Kw::Out),
528                Keyword(Kw::Package),
529                Keyword(Kw::Parameter),
530                Keyword(Kw::Port),
531                Keyword(Kw::Postponed),
532                Keyword(Kw::Procedure),
533                Keyword(Kw::Process),
534                Keyword(Kw::Property),
535                Keyword(Kw::Protected),
536                Keyword(Kw::Pure),
537                Keyword(Kw::Range),
538                Keyword(Kw::Record),
539                Keyword(Kw::Register),
540                Keyword(Kw::Reject),
541                Keyword(Kw::Release),
542                Keyword(Kw::Rem),
543                Keyword(Kw::Report),
544                Keyword(Kw::Restrict),
545                Keyword(Kw::RestrictGuarantee),
546                Keyword(Kw::Return),
547                Keyword(Kw::Rol),
548                Keyword(Kw::Ror),
549                Keyword(Kw::Select),
550                Keyword(Kw::Sequence),
551                Keyword(Kw::Severity),
552                Keyword(Kw::Shared),
553                Keyword(Kw::Signal),
554                Keyword(Kw::Sla),
555                Keyword(Kw::Sll),
556                Keyword(Kw::Sra),
557                Keyword(Kw::Srl),
558                Keyword(Kw::Strong),
559                Keyword(Kw::Subtype),
560                Keyword(Kw::Then),
561                Keyword(Kw::To),
562                Keyword(Kw::Transport),
563                Keyword(Kw::Type),
564                Keyword(Kw::Unaffected),
565                Keyword(Kw::Units),
566                Keyword(Kw::Until),
567                Keyword(Kw::Use),
568                Keyword(Kw::Variable),
569                Keyword(Kw::Vmode),
570                Keyword(Kw::Vprop),
571                Keyword(Kw::Vunit),
572                Keyword(Kw::Wait),
573                Keyword(Kw::When),
574                Keyword(Kw::While),
575                Keyword(Kw::With),
576                Keyword(Kw::Xnor),
577                Keyword(Kw::Xor),
578            ],
579        );
580    }
581}