moore_vhdl_syntax/lexer/
tokenizer.rs

1// Copyright (c) 2016-2021 Fabian Schuiki
2
3use crate::lexer::bundler::Bundle;
4use crate::lexer::token::*;
5use moore_common::errors::*;
6use moore_common::grind::{Grinder, Lookahead};
7use moore_common::name::*;
8use moore_common::source::*;
9
10/// A grinder that combines character bundles into lexical tokens. This is the
11/// last stage of lexical analysis.
12pub struct Tokenizer<T: Grinder> {
13    inner: Lookahead<T>,
14}
15
16impl<T: Grinder> Tokenizer<T>
17where
18    T: Grinder<Item = Option<Spanned<Bundle>>, Error = DiagBuilder2>,
19{
20    /// Create a new bundler.
21    pub fn new<I>(inner: I) -> Tokenizer<T>
22    where
23        I: Into<Lookahead<T>>,
24    {
25        Tokenizer {
26            inner: inner.into(),
27        }
28    }
29
30    /// Returns the next bundle in the input for which `is_significant` is true.
31    fn next_significant(&mut self) -> Option<Spanned<Bundle>> {
32        while let Some(v) = self.inner.next() {
33            if v.value.is_significant() {
34                return Some(v);
35            }
36        }
37        None
38    }
39
40    /// Parses a bit string literal with an optional size already parsed. These
41    /// are of the form `<size>[B|O|X|D|UB|UO|UX|SB|SO|SX]"<bits>"`.
42    fn parse_bit_string_literal(
43        &mut self,
44        int: Option<Spanned<Name>>,
45        base: Spanned<String>,
46        mut value: Spanned<String>,
47    ) -> Spanned<Token> {
48        let (int, mut span) = match int {
49            Some(Spanned { value, span }) => (Some(value), span),
50            None => (None, base.span),
51        };
52        span.end = value.span.end;
53
54        // Parse the base.
55        let base = match base.value.to_uppercase().as_str() {
56            "B" => BitStringBase::B,
57            "O" => BitStringBase::O,
58            "X" => BitStringBase::X,
59            "D" => BitStringBase::D,
60            "UB" => BitStringBase::UB,
61            "UO" => BitStringBase::UO,
62            "UX" => BitStringBase::UX,
63            "SB" => BitStringBase::SB,
64            "SO" => BitStringBase::SO,
65            "SX" => BitStringBase::SX,
66            _ => {
67                self.emit(
68                    DiagBuilder2::error(format!(
69                        "`{}` is not a valid base for a bit string literal",
70                        base.value
71                    ))
72                    .span(base.span)
73                    .add_note("Valid bases are B, O, X, UB, UO, UX, SB, SO, SX, D"),
74                );
75                BitStringBase::B
76            }
77        };
78
79        // Parse the value.
80        let mut parsed_value = String::new();
81        for c in value.value.drain(..) {
82            if !c.is_whitespace() {
83                if c != '_' {
84                    parsed_value.push(c);
85                }
86            } else {
87                self.emit(
88                    DiagBuilder2::error(format!(
89                        "Character `{}` may not appear in a bit string literal",
90                        c
91                    ))
92                    .span(value.span),
93                );
94            }
95        }
96        let value = get_name_table().intern(&parsed_value, true);
97
98        Spanned::new(Lit(Literal::BitString(int, base, value)), span)
99    }
100
101    /// Parse an integer, i.e. a sequence of digits with optional intermittent
102    /// underscores '_'.
103    fn parse_integer(&mut self, mut s: String, mut sp: Span) -> Spanned<Name> {
104        loop {
105            match self.inner.next() {
106                Some(Spanned {
107                    value: Bundle::Digits(n),
108                    span,
109                }) => {
110                    s.push_str(&n);
111                    sp.end = span.end;
112                }
113                Some(Spanned {
114                    value: Bundle::Special('_'),
115                    ..
116                }) => (),
117                n => {
118                    self.inner.undo(n);
119                    break;
120                }
121            }
122        }
123        Spanned::new(get_name_table().intern(&s, true), sp)
124    }
125
126    /// Parse an based integer, i.e. a sequence of letters and digits with
127    /// optional intermittent underscores '_'.
128    fn parse_based_integer(&mut self) -> Spanned<Name> {
129        let (mut s, mut sp) = match self.inner.next() {
130            Some(Spanned {
131                value: Bundle::Letters(n),
132                span,
133            })
134            | Some(Spanned {
135                value: Bundle::Digits(n),
136                span,
137            }) => (n, span),
138            Some(n) => {
139                let sp = n.span.begin().into();
140                self.emit(DiagBuilder2::error("Expected digits or letters").span(sp));
141                self.inner.undo(Some(n));
142                return Spanned::new(get_name_table().intern("", true), sp);
143            }
144            None => {
145                self.emit(DiagBuilder2::error("Expected digits or letters"));
146                self.inner.undo(None);
147                return Spanned::new(get_name_table().intern("", true), INVALID_SPAN);
148            }
149        };
150        loop {
151            match self.inner.next() {
152                Some(Spanned {
153                    value: Bundle::Letters(n),
154                    span,
155                })
156                | Some(Spanned {
157                    value: Bundle::Digits(n),
158                    span,
159                }) => {
160                    s.push_str(&n);
161                    sp.end = span.end;
162                }
163                Some(Spanned {
164                    value: Bundle::Special('_'),
165                    ..
166                }) => (),
167                n => {
168                    self.inner.undo(n);
169                    break;
170                }
171            }
172        }
173        Spanned::new(get_name_table().intern(&s, true), sp)
174    }
175
176    /// Try to parse an exponent, introduced by a `E` character.
177    fn try_exponent(&mut self) -> Option<Spanned<Exponent>> {
178        match self.inner.next() {
179            Some(Spanned {
180                value: Bundle::Letters(ref l),
181                span: mut sp,
182            }) if l == "e" || l == "E" => {
183                let mut n = self.inner.next();
184                let sign = match n {
185                    Some(Spanned {
186                        value: Bundle::Special('+'),
187                        ..
188                    }) => {
189                        n = self.inner.next();
190                        ExponentSign::Positive
191                    }
192                    Some(Spanned {
193                        value: Bundle::Special('-'),
194                        ..
195                    }) => {
196                        n = self.inner.next();
197                        ExponentSign::Negative
198                    }
199                    _ => ExponentSign::Positive,
200                };
201                match n {
202                    Some(Spanned {
203                        value: Bundle::Digits(s),
204                        span,
205                    }) => {
206                        let int = self.parse_integer(s, span);
207                        sp.end = int.span.end;
208                        Some(Spanned::new(Exponent(sign, int.value), sp))
209                    }
210                    n => {
211                        self.emit(
212                            DiagBuilder2::error(format!("Expected exponent after `{}`", l))
213                                .span(sp),
214                        );
215                        self.inner.undo(n);
216                        None
217                    }
218                }
219            }
220            n => {
221                self.inner.undo(n);
222                None
223            }
224        }
225    }
226
227    /// Parse any of the symbols in the language. `c0` comes from a
228    /// `Bundle::Special` that has already been parsed.
229    fn parse_symbol(&mut self, c0: char, mut span: Span) -> Option<Spanned<Token>> {
230        let n1 = self.inner.next();
231        let n2 = self.inner.next();
232
233        // Try to parse a three-character symbol.
234        if let (
235            &Some(Spanned {
236                value: Bundle::Special(c1),
237                ..
238            }),
239            &Some(Spanned {
240                value: Bundle::Special(c2),
241                span: sp,
242            }),
243        ) = (&n1, &n2)
244        {
245            if let Some(tkn) = match (c0, c1, c2) {
246                ('?', '/', '=') => Some(MatchNeq),
247                ('?', '<', '=') => Some(MatchLeq),
248                ('?', '>', '=') => Some(MatchGeq),
249                _ => None,
250            } {
251                span.expand(sp);
252                return Some(Spanned::new(tkn, span));
253            }
254        }
255        self.inner.undo(n2);
256
257        // Try to parse a two-character symbol.
258        if let &Some(Spanned {
259            value: Bundle::Special(c1),
260            span: sp,
261        }) = &n1
262        {
263            if let Some(tkn) = match (c0, c1) {
264                ('=', '>') => Some(Arrow),
265                ('?', '?') => Some(Condition),
266                ('<', '>') => Some(LtGt),
267                (':', '=') => Some(VarAssign),
268                ('<', '<') => Some(Lshift),
269                ('>', '>') => Some(Rshift),
270                ('/', '=') => Some(Neq),
271                ('<', '=') => Some(Leq),
272                ('>', '=') => Some(Geq),
273                ('?', '=') => Some(MatchEq),
274                ('?', '<') => Some(MatchLt),
275                ('?', '>') => Some(MatchGt),
276                ('*', '*') => Some(Pow),
277                _ => None,
278            } {
279                span.expand(sp);
280                return Some(Spanned::new(tkn, span));
281            }
282        }
283        self.inner.undo(n1);
284
285        // Try to parse a one-character symbol.
286        if let Some(tkn) = match c0 {
287            '(' => Some(OpenDelim(Paren)),
288            ')' => Some(CloseDelim(Paren)),
289            '[' => Some(OpenDelim(Brack)),
290            ']' => Some(CloseDelim(Brack)),
291            '.' => Some(Period),
292            ',' => Some(Comma),
293            ':' => Some(Colon),
294            ';' => Some(Semicolon),
295            '\'' => Some(Apostrophe),
296            '&' => Some(Ampersand),
297            '=' => Some(Eq),
298            '<' => Some(Lt),
299            '>' => Some(Gt),
300            '+' => Some(Add),
301            '-' => Some(Sub),
302            '*' => Some(Mul),
303            '/' => Some(Div),
304            '|' => Some(Pipe),
305            '?' => Some(Qmark),
306            _ => None,
307        } {
308            return Some(Spanned::new(tkn, span));
309        }
310
311        // If we get here, we parsed something which is allowed in VHDL source
312        // text, but is not a valid symbol on its own.
313        self.emit(DiagBuilder2::error(format!("`{}` is not a valid symbol", c0)).span(span));
314        None
315    }
316}
317
318impl<T> Grinder for Tokenizer<T>
319where
320    T: Grinder<Item = Option<Spanned<Bundle>>, Error = DiagBuilder2>,
321{
322    type Item = Option<Spanned<Token>>;
323    type Error = DiagBuilder2;
324
325    fn emit(&mut self, err: Self::Error) {
326        self.inner.emit(err);
327    }
328
329    fn next(&mut self) -> Self::Item {
330        let b = match self.next_significant() {
331            Some(v) => v,
332            None => return None,
333        };
334
335        match b.value {
336            Bundle::Letters(mut s) => {
337                let mut m = self.inner.next();
338                if let Some(Spanned {
339                    value: Bundle::StringLiteral(v),
340                    span,
341                }) = m
342                {
343                    // If the letters are immediately followed by a string literal,
344                    // parse this as a bit string literal.
345                    Some(self.parse_bit_string_literal(
346                        None,
347                        Spanned::new(s, b.span),
348                        Spanned::new(v, span),
349                    ))
350                } else {
351                    // Parse a basic identifier.
352                    let mut sp = b.span;
353                    loop {
354                        match m {
355                            Some(Spanned {
356                                value: Bundle::Letters(n),
357                                span,
358                            })
359                            | Some(Spanned {
360                                value: Bundle::Digits(n),
361                                span,
362                            }) => {
363                                s.push_str(&n);
364                                sp.end = span.end;
365                                m = self.inner.next();
366                            }
367                            Some(Spanned {
368                                value: Bundle::Special('_'),
369                                span,
370                            }) => {
371                                s.push('_');
372                                sp.end = span.end;
373                                m = self.inner.next();
374                            }
375                            n => {
376                                self.inner.undo(n);
377                                break;
378                            }
379                        }
380                    }
381
382                    // See if this identifier is a keyword.
383                    Some(Spanned::new(
384                        if let Some(kw) = find_keyword(&s) {
385                            Keyword(kw)
386                        } else {
387                            Ident(get_name_table().intern(&s, false))
388                        },
389                        sp,
390                    ))
391                }
392            }
393
394            Bundle::ExtendedIdent(s) => Some(Spanned::new(
395                Ident(get_name_table().intern(&s, true)),
396                b.span,
397            )),
398
399            Bundle::Digits(s) => {
400                // Parse the integer and decide how to continue.
401                let int = self.parse_integer(s, b.span);
402                match (self.inner.next(), self.inner.next()) {
403                    // If the integer is followed by some letters and a string
404                    // literal, interpret this as a bit string literal.
405                    (
406                        Some(Spanned {
407                            value: Bundle::Letters(b),
408                            span: sp1,
409                        }),
410                        Some(Spanned {
411                            value: Bundle::StringLiteral(s),
412                            span: sp2,
413                        }),
414                    ) => Some(self.parse_bit_string_literal(
415                        Some(int),
416                        Spanned::new(b, sp1),
417                        Spanned::new(s, sp2),
418                    )),
419
420                    // If the integer is followed by a period '.', parse the
421                    // following fractional part.
422                    (
423                        Some(Spanned {
424                            value: Bundle::Special('.'),
425                            ..
426                        }),
427                        Some(Spanned {
428                            value: Bundle::Digits(s),
429                            span,
430                        }),
431                    ) => {
432                        let frac = self.parse_integer(s, span);
433                        let exp = self.try_exponent();
434                        let mut sp = int.span;
435                        let exp = match exp {
436                            Some(Spanned { value, span }) => {
437                                sp.end = span.end;
438                                Some(value)
439                            }
440                            _ => {
441                                sp.end = frac.span.end;
442                                None
443                            }
444                        };
445                        Some(Spanned::new(
446                            Lit(Literal::Abstract(None, int.value, Some(frac.value), exp)),
447                            sp,
448                        ))
449                    }
450
451                    // If the integer is followed by a hashtag '#', parse this
452                    // as a based literal.
453                    (
454                        Some(Spanned {
455                            value: Bundle::Special('#'),
456                            ..
457                        }),
458                        n,
459                    ) => {
460                        self.inner.undo(n);
461                        let base = int;
462                        let int = self.parse_based_integer();
463                        let mut sp = Span::union(base.span, int.span);
464
465                        // Parse the optional fractional part.
466                        let n = self.inner.next();
467                        let frac = if let Some(Spanned {
468                            value: Bundle::Special('.'),
469                            ..
470                        }) = n
471                        {
472                            let f = self.parse_based_integer();
473                            sp.expand(f.span);
474                            Some(f.value)
475                        } else {
476                            self.inner.undo(n);
477                            None
478                        };
479
480                        // Match the second `#`.
481                        match self.inner.next() {
482                            Some(Spanned {
483                                value: Bundle::Special('#'),
484                                span,
485                            }) => {
486                                sp.expand(span);
487                            }
488                            n => {
489                                self.emit(
490                                    DiagBuilder2::error(
491                                        "Expected `#` after digits of based literal",
492                                    )
493                                    .span(sp.end()),
494                                );
495                                self.inner.undo(n);
496                            }
497                        }
498
499                        // Parse the optional exponent.
500                        let exp = match self.try_exponent() {
501                            Some(Spanned { value, span }) => {
502                                sp.end = span.end;
503                                Some(value)
504                            }
505                            _ => None,
506                        };
507
508                        Some(Spanned::new(
509                            Lit(Literal::Abstract(Some(base.value), int.value, frac, exp)),
510                            sp,
511                        ))
512                    }
513
514                    // Otherwise, simply check for an exponent and we're done.
515                    (n, m) => {
516                        self.inner.undo(m);
517                        self.inner.undo(n);
518                        let exp = self.try_exponent();
519                        let mut sp = int.span;
520                        let exp = match exp {
521                            Some(Spanned { value, span }) => {
522                                sp.end = span.end;
523                                Some(value)
524                            }
525                            _ => {
526                                sp.end = int.span.end;
527                                None
528                            }
529                        };
530                        Some(Spanned::new(
531                            Lit(Literal::Abstract(None, int.value, None, exp)),
532                            sp,
533                        ))
534                    }
535                }
536            }
537
538            Bundle::StringLiteral(s) => Some(Spanned::new(
539                Lit(Literal::String(get_name_table().intern(&s, true))),
540                b.span,
541            )),
542
543            Bundle::BitLiteral(c) => Some(Spanned::new(Lit(Literal::Char(c)), b.span)),
544
545            Bundle::Special(c0) => self.parse_symbol(c0, b.span),
546            Bundle::Space | Bundle::Comment => unreachable!(),
547        }
548    }
549}