moore_vhdl_syntax/lexer/
bundler.rs

1// Copyright (c) 2016-2021 Fabian Schuiki
2
3use crate::lexer::categorizer::Category;
4use moore_common::errors::DiagBuilder2;
5use moore_common::grind::{Grinder, Lookahead};
6use moore_common::source::*;
7
8/// A grinder that bundles up categorized characters into groups and converts
9/// spaces and comments into single tokens, dropping any information about their
10/// content. String and bit string literals are formed here as well.
11pub struct Bundler<T: Grinder> {
12    inner: Lookahead<T>,
13    src: Source,
14}
15
16impl<T: Grinder> Bundler<T> {
17    /// Create a new bundler.
18    pub fn new<I>(inner: I, src: Source) -> Bundler<T>
19    where
20        I: Into<Lookahead<T>>,
21    {
22        Bundler {
23            inner: inner.into(),
24            src: src,
25        }
26    }
27}
28
29impl<T> Grinder for Bundler<T>
30where
31    T: Grinder<Item = Option<(usize, char, u8, Category)>, Error = DiagBuilder2>,
32{
33    type Item = Option<Spanned<Bundle>>;
34    type Error = DiagBuilder2;
35
36    fn emit(&mut self, err: Self::Error) {
37        self.inner.emit(err);
38    }
39
40    fn next(&mut self) -> Self::Item {
41        let (begin, c, sz, cat) = match self.inner.next() {
42            Some(v) => v,
43            None => return None,
44        };
45        let mut sp = Span::new(self.src, begin, begin + sz as usize);
46
47        // Handle single-line comments.
48        if c == '-' {
49            if let Some((_, '-', _, _)) = *self.inner.lookahead(0) {
50                self.inner.next();
51                while let &Some((offset, d, sz, _)) = self.inner.lookahead(0) {
52                    if d == '\n' {
53                        break;
54                    } else {
55                        sp.end = offset + sz as usize;
56                        self.inner.next();
57                    }
58                }
59                return Some(Spanned::new(Bundle::Comment, sp));
60            }
61        }
62
63        // Handle multi-line comments.
64        if c == '/' {
65            if let Some((_, '*', _, _)) = *self.inner.lookahead(0) {
66                self.inner.next();
67                let mut p0 = None;
68                let mut p1 = None;
69                while let Some((offset, d, sz, _)) = *self.inner.lookahead(0) {
70                    if p0 == Some('*') && p1 == Some('/') {
71                        break;
72                    } else {
73                        p0 = p1;
74                        p1 = Some(d);
75                        sp.end = offset + sz as usize;
76                        self.inner.next();
77                    }
78                }
79                return Some(Spanned::new(Bundle::Comment, sp));
80            }
81        }
82
83        // Handle bit string literals.
84        if c == '\'' {
85            if let Some((_, c, _, _)) = *self.inner.lookahead(0) {
86                if c != '(' && c != ')' {
87                    if let Some((offset, '\'', sz, _)) = *self.inner.lookahead(1) {
88                        self.inner.next();
89                        self.inner.next();
90                        sp.end = offset + sz as usize;
91                        return Some(Spanned::new(Bundle::BitLiteral(c), sp));
92                    }
93                }
94            }
95        }
96
97        // Handle string literals.
98        if c == '"' {
99            let mut s = String::new();
100            while let Some((offset, d, sz, _)) = self.inner.next() {
101                sp.end = offset + sz as usize;
102                if d == '"' {
103                    if let Some((_, '"', _, _)) = *self.inner.lookahead(0) {
104                        s.push('"');
105                        self.inner.next();
106                    } else {
107                        break;
108                    }
109                } else if d == '\n' {
110                    self.emit(
111                        DiagBuilder2::error("String literal must not contain line breaks.")
112                            .span(sp.end())
113                            .add_note(
114                                "Use string concatenation (e.g. \"abc\" & \"def\") to break \
115                                 strings across lines",
116                            ),
117                    );
118                } else {
119                    s.push(d);
120                }
121            }
122            return Some(Spanned::new(Bundle::StringLiteral(s), sp));
123        }
124
125        // Handle extended identifiers.
126        if c == '\\' {
127            let mut s = String::new();
128            s.push(c);
129            while let Some((offset, d, sz, _)) = self.inner.next() {
130                sp.end = offset + sz as usize;
131                if d == '\\' {
132                    s.push('\\');
133                    if let Some((_, '\\', _, _)) = *self.inner.lookahead(0) {
134                        self.inner.next();
135                    } else {
136                        break;
137                    }
138                } else if d == '\n' {
139                    self.emit(
140                        DiagBuilder2::error("Extended identifier must not contain line breaks.")
141                            .span(sp.end()),
142                    );
143                } else {
144                    s.push(d);
145                }
146            }
147            return Some(Spanned::new(Bundle::ExtendedIdent(s), sp));
148        }
149
150        // Bundle up the remaining characters.
151        match cat {
152            // If the character is a letter or digit, aggregate all following
153            // characters of the same kind into a string.
154            Category::Letter | Category::Digit => {
155                let mut s = String::new();
156                s.push(c);
157                while let &Some((offset, d, sz, c)) = self.inner.lookahead(0) {
158                    if c == cat {
159                        s.push(d);
160                        sp.end = offset + sz as usize;
161                        self.inner.next();
162                    } else {
163                        break;
164                    }
165                }
166                Some(Spanned::new(
167                    match cat {
168                        Category::Letter => Bundle::Letters(s),
169                        Category::Digit => Bundle::Digits(s),
170                        _ => unreachable!(),
171                    },
172                    sp,
173                ))
174            }
175
176            // If the character is a space, consume adjacent spaces and emit a
177            // token that covers the correct span, but does not contain the
178            // spaces themselves.
179            Category::Space => {
180                while let Some((offset, _, sz, Category::Space)) = *self.inner.lookahead(0) {
181                    sp.end = offset + sz as usize;
182                    self.inner.next();
183                }
184                Some(Spanned::new(Bundle::Space, sp))
185            }
186
187            // Emit special characters as 1-char bundles.
188            Category::Special => Some(Spanned::new(Bundle::Special(c), sp)),
189
190            // Throw errors for invalid characters.
191            Category::Other => {
192                self.emit(
193                    DiagBuilder2::error(format!(
194                        "Character `{}` not allowed in VHDL source text",
195                        c
196                    ))
197                    .span(sp),
198                );
199                None
200            }
201        }
202    }
203}
204
205/// A bundle of characters. These are the most fundamental groups of characters
206/// as per the VHDL standard. Lexical analysis will aggregate one or more of
207/// these into more meaningful tokens.
208#[derive(Debug, Clone, PartialEq, Eq)]
209pub enum Bundle {
210    Letters(String),
211    Digits(String),
212    Special(char),
213    StringLiteral(String),
214    BitLiteral(char),
215    ExtendedIdent(String),
216    Space,
217    Comment,
218}
219
220impl Bundle {
221    /// Check whether the bundle has syntactic significance, i.e. is not a
222    /// comment or space.
223    pub fn is_significant(&self) -> bool {
224        match *self {
225            Bundle::Space | Bundle::Comment => false,
226            _ => true,
227        }
228    }
229}