1pub mod bundler;
8pub mod categorizer;
9pub mod token;
10pub mod tokenizer;
11
12use self::bundler::Bundler;
13use self::categorizer::Categorizer;
14use self::token::*;
15use self::tokenizer::Tokenizer;
16use moore_common::errors::*;
17use moore_common::grind::utf8::Utf8;
18use moore_common::grind::Grinder;
19use moore_common::source::*;
20
21pub struct Lexer<T: Grinder<Item = Option<u8>, Error = DiagBuilder2>> {
24 inner: Tokenizer<Bundler<Categorizer<Utf8<T>>>>,
25}
26
27impl<T> Lexer<T>
28where
29 T: Grinder<Item = Option<u8>, Error = DiagBuilder2>,
30{
31 pub fn new(bytes: T, src: Source) -> Lexer<T> {
33 let chars = Utf8::new(bytes);
34 let cat = Categorizer::new(chars);
35 let bundles = Bundler::new(cat, src);
36 let tokens = Tokenizer::new(bundles);
37 Lexer { inner: tokens }
38 }
39}
40
41impl<T> Grinder for Lexer<T>
42where
43 T: Grinder<Item = Option<u8>, Error = DiagBuilder2>,
44{
45 type Item = Option<Spanned<Token>>;
46 type Error = DiagBuilder2;
47
48 fn next(&mut self) -> Self::Item {
49 self.inner.next()
50 }
51
52 fn emit(&mut self, err: Self::Error) {
53 self.inner.emit(err)
54 }
55}
56
57#[cfg(test)]
58mod test {
59 use super::Lexer;
60 use crate::lexer::token::*;
61 use moore_common::errors::*;
62 use moore_common::grind::{self, Grinder};
63 use moore_common::name::*;
64 use moore_common::source::*;
65
66 fn lex(src: Source) -> Vec<Token> {
67 let content = src.get_content();
68 let bytes = grind::from_iter(content.bytes().iter().map(|x| *x))
69 .vent(|err: DiagBuilder2| eprintln!("{}", err));
70 let mut tokens = Lexer::new(bytes, src);
71 let mut v = Vec::new();
72 while let Some(Spanned { value, .. }) = tokens.next() {
73 v.push(value);
74 }
75 v
76 }
77
78 fn check(input: &str, expected: &[Token]) {
79 use std::cell::Cell;
80 thread_local!(static INDEX: Cell<usize> = Cell::new(0));
81 let sm = get_source_manager();
82 let idx = INDEX.with(|i| {
83 let v = i.get();
84 i.set(v + 1);
85 v
86 });
87 let source = sm.add(&format!("test_{}.vhd", idx), input);
88 let actual = lex(source);
89 assert_eq!(actual.len(), expected.len());
90 for (a, &e) in actual.into_iter().zip(expected.into_iter()) {
91 assert_eq!(a, e);
92 }
93 }
94
95 fn name(n: &str) -> Name {
96 get_name_table().intern(n, false)
97 }
98
99 fn name_case(n: &str) -> Name {
100 get_name_table().intern(n, true)
101 }
102
103 #[test]
104 fn basic_ident() {
105 check(
106 r"
107 COUNT X c_out FFT Decoder
108 VHSIC X1 PageCount STORE_NEXT_ITEM As49__8
109 ",
110 &[
111 Ident(name("COUNT")),
112 Ident(name("X")),
113 Ident(name("c_out")),
114 Ident(name("FFT")),
115 Ident(name("Decoder")),
116 Ident(name("VHSIC")),
117 Ident(name("X1")),
118 Ident(name("PageCount")),
119 Ident(name("STORE_NEXT_ITEM")),
120 Ident(name("As49__8")),
121 ],
122 );
123 }
124
125 #[test]
126 fn extended_ident() {
127 check(
128 r"
129 -- Two different identifiers, neither of which is the reserved word bus.
130 \BUS\ \bus\
131
132 -- An identifier containing three characters.
133 \a\\b\
134
135 -- Three distinct identifiers.
136 VHDL \VHDL\ \vhdl\
137
138 -- Use of spaces and special characters.
139 \A B\ \!@#\ \_+`'''1#{}\
140 ",
141 &[
142 Ident(name_case("\\BUS\\")),
143 Ident(name_case("\\bus\\")),
144 Ident(name_case("\\a\\b\\")),
145 Ident(name("VHDL")),
146 Ident(name_case("\\VHDL\\")),
147 Ident(name_case("\\vhdl\\")),
148 Ident(name_case("\\A B\\")),
149 Ident(name_case("\\!@#\\")),
150 Ident(name_case("\\_+`'''1#{}\\")),
151 ],
152 );
153 }
154
155 #[test]
156 fn decimal_literal() {
157 check(
158 r"
159 12 0 1E6 123_456 -- Integer literals.
160 12.0 0.0 0.456 3.14159_26 -- Real literals.
161 1.34E-12 1.0E+6 6.023E+24 -- Real literals with exponents.
162 ",
163 &[
164 Lit(Literal::Abstract(None, name_case("12"), None, None)),
165 Lit(Literal::Abstract(None, name_case("0"), None, None)),
166 Lit(Literal::Abstract(
167 None,
168 name_case("1"),
169 None,
170 Some(Exponent(ExponentSign::Positive, name_case("6"))),
171 )),
172 Lit(Literal::Abstract(None, name_case("123456"), None, None)),
173 Lit(Literal::Abstract(
174 None,
175 name_case("12"),
176 Some(name_case("0")),
177 None,
178 )),
179 Lit(Literal::Abstract(
180 None,
181 name_case("0"),
182 Some(name_case("0")),
183 None,
184 )),
185 Lit(Literal::Abstract(
186 None,
187 name_case("0"),
188 Some(name_case("456")),
189 None,
190 )),
191 Lit(Literal::Abstract(
192 None,
193 name_case("3"),
194 Some(name_case("1415926")),
195 None,
196 )),
197 Lit(Literal::Abstract(
198 None,
199 name_case("1"),
200 Some(name_case("34")),
201 Some(Exponent(ExponentSign::Negative, name_case("12"))),
202 )),
203 Lit(Literal::Abstract(
204 None,
205 name_case("1"),
206 Some(name_case("0")),
207 Some(Exponent(ExponentSign::Positive, name_case("6"))),
208 )),
209 Lit(Literal::Abstract(
210 None,
211 name_case("6"),
212 Some(name_case("023")),
213 Some(Exponent(ExponentSign::Positive, name_case("24"))),
214 )),
215 ],
216 );
217 }
218
219 #[test]
220 fn based_literal() {
221 check(
222 r"
223 2#1111_1111# 16#FF# 016#0FF# -- Integer literals of value 255
224 16#E#E1 2#1110_0000# -- Integer literals of value 224
225 16#F.FF#E+2 2#1.1111_1111_111#E11 -- Real literals of value 4095.0
226 ",
227 &[
228 Lit(Literal::Abstract(
229 Some(name_case("2")),
230 name_case("11111111"),
231 None,
232 None,
233 )),
234 Lit(Literal::Abstract(
235 Some(name_case("16")),
236 name_case("FF"),
237 None,
238 None,
239 )),
240 Lit(Literal::Abstract(
241 Some(name_case("016")),
242 name_case("0FF"),
243 None,
244 None,
245 )),
246 Lit(Literal::Abstract(
247 Some(name_case("16")),
248 name_case("E"),
249 None,
250 Some(Exponent(ExponentSign::Positive, name_case("1"))),
251 )),
252 Lit(Literal::Abstract(
253 Some(name_case("2")),
254 name_case("11100000"),
255 None,
256 None,
257 )),
258 Lit(Literal::Abstract(
259 Some(name_case("16")),
260 name_case("F"),
261 Some(name_case("FF")),
262 Some(Exponent(ExponentSign::Positive, name_case("2"))),
263 )),
264 Lit(Literal::Abstract(
265 Some(name_case("2")),
266 name_case("1"),
267 Some(name_case("11111111111")),
268 Some(Exponent(ExponentSign::Positive, name_case("11"))),
269 )),
270 ],
271 );
272 }
273
274 #[test]
275 fn bit_string_literal() {
276 check(
277 "
278 B\"1111_1111_1111\" -- Equivalent to the string literal \"111111111111\".
279 X\"FFF\" -- Equivalent to B\"1111_1111_1111\".
280 O\"777\" -- Equivalent to B\"111_111_111\".
281 X\"777\" -- Equivalent to B\"0111_0111_0111\".
282
283 B\"XXXX_01LH\" -- Equivalent to the string literal \"XXXX01LH\"
284 UO\"27\" -- Equivalent to B\"010_111\"
285 UO\"2C\" -- Equivalent to B\"011_CCC\"
286 SX\"3W\" -- Equivalent to B\"0011_WWWW\"
287 D\"35\" -- Equivalent to B\"100011\"
288
289 12UB\"X1\" -- Equivalent to B\"0000_0000_00X1\"
290 12SB\"X1\" -- Equivalent to B\"XXXX_XXXX_XXX1\"
291 12UX\"F-\" -- Equivalent to B\"0000_1111_----\"
292 12SX\"F-\" -- Equivalent to B\"1111_1111_----\"
293 12D\"13\" -- Equivalent to B\"0000_0000_1101\"
294
295 12UX\"000WWW\" -- Equivalent to B\"WWWW_WWWW_WWWW\"
296 12SX\"FFFC00\" -- Equivalent to B\"1100_0000_0000\"
297 12SX\"XXXX00\" -- Equivalent to B\"XXXX_0000_0000\"
298 ",
299 &[
300 Lit(Literal::BitString(
301 None,
302 BitStringBase::B,
303 name_case("111111111111"),
304 )),
305 Lit(Literal::BitString(None, BitStringBase::X, name_case("FFF"))),
306 Lit(Literal::BitString(None, BitStringBase::O, name_case("777"))),
307 Lit(Literal::BitString(None, BitStringBase::X, name_case("777"))),
308 Lit(Literal::BitString(
309 None,
310 BitStringBase::B,
311 name_case("XXXX01LH"),
312 )),
313 Lit(Literal::BitString(None, BitStringBase::UO, name_case("27"))),
314 Lit(Literal::BitString(None, BitStringBase::UO, name_case("2C"))),
315 Lit(Literal::BitString(None, BitStringBase::SX, name_case("3W"))),
316 Lit(Literal::BitString(None, BitStringBase::D, name_case("35"))),
317 Lit(Literal::BitString(
318 Some(name_case("12")),
319 BitStringBase::UB,
320 name_case("X1"),
321 )),
322 Lit(Literal::BitString(
323 Some(name_case("12")),
324 BitStringBase::SB,
325 name_case("X1"),
326 )),
327 Lit(Literal::BitString(
328 Some(name_case("12")),
329 BitStringBase::UX,
330 name_case("F-"),
331 )),
332 Lit(Literal::BitString(
333 Some(name_case("12")),
334 BitStringBase::SX,
335 name_case("F-"),
336 )),
337 Lit(Literal::BitString(
338 Some(name_case("12")),
339 BitStringBase::D,
340 name_case("13"),
341 )),
342 Lit(Literal::BitString(
343 Some(name_case("12")),
344 BitStringBase::UX,
345 name_case("000WWW"),
346 )),
347 Lit(Literal::BitString(
348 Some(name_case("12")),
349 BitStringBase::SX,
350 name_case("FFFC00"),
351 )),
352 Lit(Literal::BitString(
353 Some(name_case("12")),
354 BitStringBase::SX,
355 name_case("XXXX00"),
356 )),
357 ],
358 );
359 }
360
361 #[test]
362 fn character_literal() {
363 check(
364 "
365 'A' '*' ''' ' '
366 ",
367 &[
368 Lit(Literal::Char('A')),
369 Lit(Literal::Char('*')),
370 Lit(Literal::Char('\'')),
371 Lit(Literal::Char(' ')),
372 ],
373 );
374 }
375
376 #[test]
377 fn string_literal() {
378 check(
379 "
380 \"Setup time is too short\" -- An error message.
381 \"\" -- An empty string literal.
382 \" \" \"A\" \"\"\"\" -- Three string literals of length 1.
383 \"Characters such as $, %, and } are allowed in string literals.\"
384 ",
385 &[
386 Lit(Literal::String(name_case("Setup time is too short"))),
387 Lit(Literal::String(name_case(""))),
388 Lit(Literal::String(name_case(" "))),
389 Lit(Literal::String(name_case("A"))),
390 Lit(Literal::String(name_case("\""))),
391 Lit(Literal::String(name_case(
392 "Characters such as $, %, and } are allowed in string literals.",
393 ))),
394 ],
395 );
396 }
397
398 #[test]
399 fn symbols() {
400 check(
401 "
402 ( )
403 . , : ; ' &
404 => ?? <> := << >>
405 = /= < <= > >=
406 ?= ?/= ?< ?<= ?> ?>=
407 + - * / **
408 ",
409 &[
410 OpenDelim(Paren),
411 CloseDelim(Paren),
412 Period,
413 Comma,
414 Colon,
415 Semicolon,
416 Apostrophe,
417 Ampersand,
418 Arrow,
419 Condition,
420 LtGt,
421 VarAssign,
422 Lshift,
423 Rshift,
424 Eq,
425 Neq,
426 Lt,
427 Leq,
428 Gt,
429 Geq,
430 MatchEq,
431 MatchNeq,
432 MatchLt,
433 MatchLeq,
434 MatchGt,
435 MatchGeq,
436 Add,
437 Sub,
438 Mul,
439 Div,
440 Pow,
441 ],
442 );
443 }
444
445 #[test]
446 fn keywords() {
447 check(
448 "
449 abs access after alias all and architecture array assert assume
450 assume_guarantee attribute begin block body buffer bus case
451 component configuration constant context cover default disconnect
452 downto else elsif end entity exit fairness file for force function
453 generate generic group guarded if impure in inertial inout is label
454 library linkage literal loop map mod nand new next nor not null of
455 on open or others out package parameter port postponed procedure
456 process property protected pure range record register reject release
457 rem report restrict restrict_guarantee return rol ror select
458 sequence severity shared signal sla sll sra srl strong subtype then
459 to transport type unaffected units until use variable vmode vprop
460 vunit wait when while with xnor xor
461 ",
462 &[
463 Keyword(Kw::Abs),
464 Keyword(Kw::Access),
465 Keyword(Kw::After),
466 Keyword(Kw::Alias),
467 Keyword(Kw::All),
468 Keyword(Kw::And),
469 Keyword(Kw::Architecture),
470 Keyword(Kw::Array),
471 Keyword(Kw::Assert),
472 Keyword(Kw::Assume),
473 Keyword(Kw::AssumeGuarantee),
474 Keyword(Kw::Attribute),
475 Keyword(Kw::Begin),
476 Keyword(Kw::Block),
477 Keyword(Kw::Body),
478 Keyword(Kw::Buffer),
479 Keyword(Kw::Bus),
480 Keyword(Kw::Case),
481 Keyword(Kw::Component),
482 Keyword(Kw::Configuration),
483 Keyword(Kw::Constant),
484 Keyword(Kw::Context),
485 Keyword(Kw::Cover),
486 Keyword(Kw::Default),
487 Keyword(Kw::Disconnect),
488 Keyword(Kw::Downto),
489 Keyword(Kw::Else),
490 Keyword(Kw::Elsif),
491 Keyword(Kw::End),
492 Keyword(Kw::Entity),
493 Keyword(Kw::Exit),
494 Keyword(Kw::Fairness),
495 Keyword(Kw::File),
496 Keyword(Kw::For),
497 Keyword(Kw::Force),
498 Keyword(Kw::Function),
499 Keyword(Kw::Generate),
500 Keyword(Kw::Generic),
501 Keyword(Kw::Group),
502 Keyword(Kw::Guarded),
503 Keyword(Kw::If),
504 Keyword(Kw::Impure),
505 Keyword(Kw::In),
506 Keyword(Kw::Inertial),
507 Keyword(Kw::Inout),
508 Keyword(Kw::Is),
509 Keyword(Kw::Label),
510 Keyword(Kw::Library),
511 Keyword(Kw::Linkage),
512 Keyword(Kw::Literal),
513 Keyword(Kw::Loop),
514 Keyword(Kw::Map),
515 Keyword(Kw::Mod),
516 Keyword(Kw::Nand),
517 Keyword(Kw::New),
518 Keyword(Kw::Next),
519 Keyword(Kw::Nor),
520 Keyword(Kw::Not),
521 Keyword(Kw::Null),
522 Keyword(Kw::Of),
523 Keyword(Kw::On),
524 Keyword(Kw::Open),
525 Keyword(Kw::Or),
526 Keyword(Kw::Others),
527 Keyword(Kw::Out),
528 Keyword(Kw::Package),
529 Keyword(Kw::Parameter),
530 Keyword(Kw::Port),
531 Keyword(Kw::Postponed),
532 Keyword(Kw::Procedure),
533 Keyword(Kw::Process),
534 Keyword(Kw::Property),
535 Keyword(Kw::Protected),
536 Keyword(Kw::Pure),
537 Keyword(Kw::Range),
538 Keyword(Kw::Record),
539 Keyword(Kw::Register),
540 Keyword(Kw::Reject),
541 Keyword(Kw::Release),
542 Keyword(Kw::Rem),
543 Keyword(Kw::Report),
544 Keyword(Kw::Restrict),
545 Keyword(Kw::RestrictGuarantee),
546 Keyword(Kw::Return),
547 Keyword(Kw::Rol),
548 Keyword(Kw::Ror),
549 Keyword(Kw::Select),
550 Keyword(Kw::Sequence),
551 Keyword(Kw::Severity),
552 Keyword(Kw::Shared),
553 Keyword(Kw::Signal),
554 Keyword(Kw::Sla),
555 Keyword(Kw::Sll),
556 Keyword(Kw::Sra),
557 Keyword(Kw::Srl),
558 Keyword(Kw::Strong),
559 Keyword(Kw::Subtype),
560 Keyword(Kw::Then),
561 Keyword(Kw::To),
562 Keyword(Kw::Transport),
563 Keyword(Kw::Type),
564 Keyword(Kw::Unaffected),
565 Keyword(Kw::Units),
566 Keyword(Kw::Until),
567 Keyword(Kw::Use),
568 Keyword(Kw::Variable),
569 Keyword(Kw::Vmode),
570 Keyword(Kw::Vprop),
571 Keyword(Kw::Vunit),
572 Keyword(Kw::Wait),
573 Keyword(Kw::When),
574 Keyword(Kw::While),
575 Keyword(Kw::With),
576 Keyword(Kw::Xnor),
577 Keyword(Kw::Xor),
578 ],
579 );
580 }
581}