1use crate::lexer::bundler::Bundle;
4use crate::lexer::token::*;
5use moore_common::errors::*;
6use moore_common::grind::{Grinder, Lookahead};
7use moore_common::name::*;
8use moore_common::source::*;
9
10pub struct Tokenizer<T: Grinder> {
13 inner: Lookahead<T>,
14}
15
16impl<T: Grinder> Tokenizer<T>
17where
18 T: Grinder<Item = Option<Spanned<Bundle>>, Error = DiagBuilder2>,
19{
20 pub fn new<I>(inner: I) -> Tokenizer<T>
22 where
23 I: Into<Lookahead<T>>,
24 {
25 Tokenizer {
26 inner: inner.into(),
27 }
28 }
29
30 fn next_significant(&mut self) -> Option<Spanned<Bundle>> {
32 while let Some(v) = self.inner.next() {
33 if v.value.is_significant() {
34 return Some(v);
35 }
36 }
37 None
38 }
39
40 fn parse_bit_string_literal(
43 &mut self,
44 int: Option<Spanned<Name>>,
45 base: Spanned<String>,
46 mut value: Spanned<String>,
47 ) -> Spanned<Token> {
48 let (int, mut span) = match int {
49 Some(Spanned { value, span }) => (Some(value), span),
50 None => (None, base.span),
51 };
52 span.end = value.span.end;
53
54 let base = match base.value.to_uppercase().as_str() {
56 "B" => BitStringBase::B,
57 "O" => BitStringBase::O,
58 "X" => BitStringBase::X,
59 "D" => BitStringBase::D,
60 "UB" => BitStringBase::UB,
61 "UO" => BitStringBase::UO,
62 "UX" => BitStringBase::UX,
63 "SB" => BitStringBase::SB,
64 "SO" => BitStringBase::SO,
65 "SX" => BitStringBase::SX,
66 _ => {
67 self.emit(
68 DiagBuilder2::error(format!(
69 "`{}` is not a valid base for a bit string literal",
70 base.value
71 ))
72 .span(base.span)
73 .add_note("Valid bases are B, O, X, UB, UO, UX, SB, SO, SX, D"),
74 );
75 BitStringBase::B
76 }
77 };
78
79 let mut parsed_value = String::new();
81 for c in value.value.drain(..) {
82 if !c.is_whitespace() {
83 if c != '_' {
84 parsed_value.push(c);
85 }
86 } else {
87 self.emit(
88 DiagBuilder2::error(format!(
89 "Character `{}` may not appear in a bit string literal",
90 c
91 ))
92 .span(value.span),
93 );
94 }
95 }
96 let value = get_name_table().intern(&parsed_value, true);
97
98 Spanned::new(Lit(Literal::BitString(int, base, value)), span)
99 }
100
101 fn parse_integer(&mut self, mut s: String, mut sp: Span) -> Spanned<Name> {
104 loop {
105 match self.inner.next() {
106 Some(Spanned {
107 value: Bundle::Digits(n),
108 span,
109 }) => {
110 s.push_str(&n);
111 sp.end = span.end;
112 }
113 Some(Spanned {
114 value: Bundle::Special('_'),
115 ..
116 }) => (),
117 n => {
118 self.inner.undo(n);
119 break;
120 }
121 }
122 }
123 Spanned::new(get_name_table().intern(&s, true), sp)
124 }
125
126 fn parse_based_integer(&mut self) -> Spanned<Name> {
129 let (mut s, mut sp) = match self.inner.next() {
130 Some(Spanned {
131 value: Bundle::Letters(n),
132 span,
133 })
134 | Some(Spanned {
135 value: Bundle::Digits(n),
136 span,
137 }) => (n, span),
138 Some(n) => {
139 let sp = n.span.begin().into();
140 self.emit(DiagBuilder2::error("Expected digits or letters").span(sp));
141 self.inner.undo(Some(n));
142 return Spanned::new(get_name_table().intern("", true), sp);
143 }
144 None => {
145 self.emit(DiagBuilder2::error("Expected digits or letters"));
146 self.inner.undo(None);
147 return Spanned::new(get_name_table().intern("", true), INVALID_SPAN);
148 }
149 };
150 loop {
151 match self.inner.next() {
152 Some(Spanned {
153 value: Bundle::Letters(n),
154 span,
155 })
156 | Some(Spanned {
157 value: Bundle::Digits(n),
158 span,
159 }) => {
160 s.push_str(&n);
161 sp.end = span.end;
162 }
163 Some(Spanned {
164 value: Bundle::Special('_'),
165 ..
166 }) => (),
167 n => {
168 self.inner.undo(n);
169 break;
170 }
171 }
172 }
173 Spanned::new(get_name_table().intern(&s, true), sp)
174 }
175
176 fn try_exponent(&mut self) -> Option<Spanned<Exponent>> {
178 match self.inner.next() {
179 Some(Spanned {
180 value: Bundle::Letters(ref l),
181 span: mut sp,
182 }) if l == "e" || l == "E" => {
183 let mut n = self.inner.next();
184 let sign = match n {
185 Some(Spanned {
186 value: Bundle::Special('+'),
187 ..
188 }) => {
189 n = self.inner.next();
190 ExponentSign::Positive
191 }
192 Some(Spanned {
193 value: Bundle::Special('-'),
194 ..
195 }) => {
196 n = self.inner.next();
197 ExponentSign::Negative
198 }
199 _ => ExponentSign::Positive,
200 };
201 match n {
202 Some(Spanned {
203 value: Bundle::Digits(s),
204 span,
205 }) => {
206 let int = self.parse_integer(s, span);
207 sp.end = int.span.end;
208 Some(Spanned::new(Exponent(sign, int.value), sp))
209 }
210 n => {
211 self.emit(
212 DiagBuilder2::error(format!("Expected exponent after `{}`", l))
213 .span(sp),
214 );
215 self.inner.undo(n);
216 None
217 }
218 }
219 }
220 n => {
221 self.inner.undo(n);
222 None
223 }
224 }
225 }
226
227 fn parse_symbol(&mut self, c0: char, mut span: Span) -> Option<Spanned<Token>> {
230 let n1 = self.inner.next();
231 let n2 = self.inner.next();
232
233 if let (
235 &Some(Spanned {
236 value: Bundle::Special(c1),
237 ..
238 }),
239 &Some(Spanned {
240 value: Bundle::Special(c2),
241 span: sp,
242 }),
243 ) = (&n1, &n2)
244 {
245 if let Some(tkn) = match (c0, c1, c2) {
246 ('?', '/', '=') => Some(MatchNeq),
247 ('?', '<', '=') => Some(MatchLeq),
248 ('?', '>', '=') => Some(MatchGeq),
249 _ => None,
250 } {
251 span.expand(sp);
252 return Some(Spanned::new(tkn, span));
253 }
254 }
255 self.inner.undo(n2);
256
257 if let &Some(Spanned {
259 value: Bundle::Special(c1),
260 span: sp,
261 }) = &n1
262 {
263 if let Some(tkn) = match (c0, c1) {
264 ('=', '>') => Some(Arrow),
265 ('?', '?') => Some(Condition),
266 ('<', '>') => Some(LtGt),
267 (':', '=') => Some(VarAssign),
268 ('<', '<') => Some(Lshift),
269 ('>', '>') => Some(Rshift),
270 ('/', '=') => Some(Neq),
271 ('<', '=') => Some(Leq),
272 ('>', '=') => Some(Geq),
273 ('?', '=') => Some(MatchEq),
274 ('?', '<') => Some(MatchLt),
275 ('?', '>') => Some(MatchGt),
276 ('*', '*') => Some(Pow),
277 _ => None,
278 } {
279 span.expand(sp);
280 return Some(Spanned::new(tkn, span));
281 }
282 }
283 self.inner.undo(n1);
284
285 if let Some(tkn) = match c0 {
287 '(' => Some(OpenDelim(Paren)),
288 ')' => Some(CloseDelim(Paren)),
289 '[' => Some(OpenDelim(Brack)),
290 ']' => Some(CloseDelim(Brack)),
291 '.' => Some(Period),
292 ',' => Some(Comma),
293 ':' => Some(Colon),
294 ';' => Some(Semicolon),
295 '\'' => Some(Apostrophe),
296 '&' => Some(Ampersand),
297 '=' => Some(Eq),
298 '<' => Some(Lt),
299 '>' => Some(Gt),
300 '+' => Some(Add),
301 '-' => Some(Sub),
302 '*' => Some(Mul),
303 '/' => Some(Div),
304 '|' => Some(Pipe),
305 '?' => Some(Qmark),
306 _ => None,
307 } {
308 return Some(Spanned::new(tkn, span));
309 }
310
311 self.emit(DiagBuilder2::error(format!("`{}` is not a valid symbol", c0)).span(span));
314 None
315 }
316}
317
318impl<T> Grinder for Tokenizer<T>
319where
320 T: Grinder<Item = Option<Spanned<Bundle>>, Error = DiagBuilder2>,
321{
322 type Item = Option<Spanned<Token>>;
323 type Error = DiagBuilder2;
324
325 fn emit(&mut self, err: Self::Error) {
326 self.inner.emit(err);
327 }
328
329 fn next(&mut self) -> Self::Item {
330 let b = match self.next_significant() {
331 Some(v) => v,
332 None => return None,
333 };
334
335 match b.value {
336 Bundle::Letters(mut s) => {
337 let mut m = self.inner.next();
338 if let Some(Spanned {
339 value: Bundle::StringLiteral(v),
340 span,
341 }) = m
342 {
343 Some(self.parse_bit_string_literal(
346 None,
347 Spanned::new(s, b.span),
348 Spanned::new(v, span),
349 ))
350 } else {
351 let mut sp = b.span;
353 loop {
354 match m {
355 Some(Spanned {
356 value: Bundle::Letters(n),
357 span,
358 })
359 | Some(Spanned {
360 value: Bundle::Digits(n),
361 span,
362 }) => {
363 s.push_str(&n);
364 sp.end = span.end;
365 m = self.inner.next();
366 }
367 Some(Spanned {
368 value: Bundle::Special('_'),
369 span,
370 }) => {
371 s.push('_');
372 sp.end = span.end;
373 m = self.inner.next();
374 }
375 n => {
376 self.inner.undo(n);
377 break;
378 }
379 }
380 }
381
382 Some(Spanned::new(
384 if let Some(kw) = find_keyword(&s) {
385 Keyword(kw)
386 } else {
387 Ident(get_name_table().intern(&s, false))
388 },
389 sp,
390 ))
391 }
392 }
393
394 Bundle::ExtendedIdent(s) => Some(Spanned::new(
395 Ident(get_name_table().intern(&s, true)),
396 b.span,
397 )),
398
399 Bundle::Digits(s) => {
400 let int = self.parse_integer(s, b.span);
402 match (self.inner.next(), self.inner.next()) {
403 (
406 Some(Spanned {
407 value: Bundle::Letters(b),
408 span: sp1,
409 }),
410 Some(Spanned {
411 value: Bundle::StringLiteral(s),
412 span: sp2,
413 }),
414 ) => Some(self.parse_bit_string_literal(
415 Some(int),
416 Spanned::new(b, sp1),
417 Spanned::new(s, sp2),
418 )),
419
420 (
423 Some(Spanned {
424 value: Bundle::Special('.'),
425 ..
426 }),
427 Some(Spanned {
428 value: Bundle::Digits(s),
429 span,
430 }),
431 ) => {
432 let frac = self.parse_integer(s, span);
433 let exp = self.try_exponent();
434 let mut sp = int.span;
435 let exp = match exp {
436 Some(Spanned { value, span }) => {
437 sp.end = span.end;
438 Some(value)
439 }
440 _ => {
441 sp.end = frac.span.end;
442 None
443 }
444 };
445 Some(Spanned::new(
446 Lit(Literal::Abstract(None, int.value, Some(frac.value), exp)),
447 sp,
448 ))
449 }
450
451 (
454 Some(Spanned {
455 value: Bundle::Special('#'),
456 ..
457 }),
458 n,
459 ) => {
460 self.inner.undo(n);
461 let base = int;
462 let int = self.parse_based_integer();
463 let mut sp = Span::union(base.span, int.span);
464
465 let n = self.inner.next();
467 let frac = if let Some(Spanned {
468 value: Bundle::Special('.'),
469 ..
470 }) = n
471 {
472 let f = self.parse_based_integer();
473 sp.expand(f.span);
474 Some(f.value)
475 } else {
476 self.inner.undo(n);
477 None
478 };
479
480 match self.inner.next() {
482 Some(Spanned {
483 value: Bundle::Special('#'),
484 span,
485 }) => {
486 sp.expand(span);
487 }
488 n => {
489 self.emit(
490 DiagBuilder2::error(
491 "Expected `#` after digits of based literal",
492 )
493 .span(sp.end()),
494 );
495 self.inner.undo(n);
496 }
497 }
498
499 let exp = match self.try_exponent() {
501 Some(Spanned { value, span }) => {
502 sp.end = span.end;
503 Some(value)
504 }
505 _ => None,
506 };
507
508 Some(Spanned::new(
509 Lit(Literal::Abstract(Some(base.value), int.value, frac, exp)),
510 sp,
511 ))
512 }
513
514 (n, m) => {
516 self.inner.undo(m);
517 self.inner.undo(n);
518 let exp = self.try_exponent();
519 let mut sp = int.span;
520 let exp = match exp {
521 Some(Spanned { value, span }) => {
522 sp.end = span.end;
523 Some(value)
524 }
525 _ => {
526 sp.end = int.span.end;
527 None
528 }
529 };
530 Some(Spanned::new(
531 Lit(Literal::Abstract(None, int.value, None, exp)),
532 sp,
533 ))
534 }
535 }
536 }
537
538 Bundle::StringLiteral(s) => Some(Spanned::new(
539 Lit(Literal::String(get_name_table().intern(&s, true))),
540 b.span,
541 )),
542
543 Bundle::BitLiteral(c) => Some(Spanned::new(Lit(Literal::Char(c)), b.span)),
544
545 Bundle::Special(c0) => self.parse_symbol(c0, b.span),
546 Bundle::Space | Bundle::Comment => unreachable!(),
547 }
548 }
549}