1use crate::tokens::{Span, StringPart, Token, TokenKind};
6
7#[allow(dead_code)]
9#[allow(missing_docs)]
10pub struct LexerModeStack {
11 pub stack: Vec<LexerMode>,
13}
14impl LexerModeStack {
15 #[allow(dead_code)]
17 pub fn new() -> Self {
18 LexerModeStack {
19 stack: vec![LexerMode::Normal],
20 }
21 }
22 #[allow(dead_code)]
24 pub fn push(&mut self, mode: LexerMode) {
25 self.stack.push(mode);
26 }
27 #[allow(dead_code)]
29 pub fn pop(&mut self) -> Option<LexerMode> {
30 if self.stack.len() > 1 {
31 self.stack.pop()
32 } else {
33 None
34 }
35 }
36 #[allow(dead_code)]
38 pub fn current(&self) -> &LexerMode {
39 self.stack.last().unwrap_or(&LexerMode::Normal)
40 }
41 #[allow(dead_code)]
43 pub fn depth(&self) -> usize {
44 self.stack.len()
45 }
46}
47pub struct Lexer {
59 input: Vec<char>,
61 pos: usize,
63 line: usize,
65 column: usize,
67}
68impl Lexer {
69 pub fn new(input: &str) -> Self {
71 Self {
72 input: input.chars().collect(),
73 pos: 0,
74 line: 1,
75 column: 1,
76 }
77 }
78 fn peek(&self) -> Option<char> {
80 self.input.get(self.pos).copied()
81 }
82 fn peek_ahead(&self, offset: usize) -> Option<char> {
84 self.input.get(self.pos + offset).copied()
85 }
86 fn advance(&mut self) -> Option<char> {
88 let ch = self.peek()?;
89 self.pos += 1;
90 if ch == '\n' {
91 self.line += 1;
92 self.column = 1;
93 } else {
94 self.column += 1;
95 }
96 Some(ch)
97 }
98 fn skip_whitespace(&mut self) {
100 while let Some(ch) = self.peek() {
101 if ch.is_whitespace() {
102 self.advance();
103 } else if ch == '-' && self.peek_ahead(1) == Some('-') {
104 if self.peek_ahead(2) == Some('-') {
105 break;
106 }
107 self.advance();
108 self.advance();
109 while let Some(ch) = self.peek() {
110 if ch == '\n' {
111 break;
112 }
113 self.advance();
114 }
115 } else if ch == '/' && self.peek_ahead(1) == Some('-') {
116 if self.peek_ahead(2) == Some('-') {
117 break;
118 }
119 self.advance();
120 self.advance();
121 let mut depth = 1;
122 while depth > 0 {
123 match self.peek() {
124 Some('/') if self.peek_ahead(1) == Some('-') => {
125 self.advance();
126 self.advance();
127 depth += 1;
128 }
129 Some('-') if self.peek_ahead(1) == Some('/') => {
130 self.advance();
131 self.advance();
132 depth -= 1;
133 }
134 Some(_) => {
135 self.advance();
136 }
137 None => break,
138 }
139 }
140 } else {
141 break;
142 }
143 }
144 }
145 fn lex_line_doc_comment(&mut self, start: usize, start_line: usize, start_col: usize) -> Token {
147 self.advance();
148 self.advance();
149 self.advance();
150 if self.peek() == Some(' ') {
151 self.advance();
152 }
153 let mut s = String::new();
154 while let Some(ch) = self.peek() {
155 if ch == '\n' {
156 break;
157 }
158 s.push(ch);
159 self.advance();
160 }
161 Token::new(
162 TokenKind::DocComment(s),
163 Span::new(start, self.pos, start_line, start_col),
164 )
165 }
166 fn lex_block_doc_comment(
168 &mut self,
169 start: usize,
170 start_line: usize,
171 start_col: usize,
172 ) -> Token {
173 self.advance();
174 self.advance();
175 self.advance();
176 if self.peek() == Some(' ') {
177 self.advance();
178 }
179 let mut s = String::new();
180 let mut depth = 1;
181 while depth > 0 {
182 match self.peek() {
183 Some('/') if self.peek_ahead(1) == Some('-') => {
184 self.advance();
185 self.advance();
186 depth += 1;
187 if depth > 1 {
188 s.push_str("/-");
189 }
190 }
191 Some('-') if self.peek_ahead(1) == Some('/') => {
192 depth -= 1;
193 if depth > 0 {
194 s.push_str("-/");
195 }
196 self.advance();
197 self.advance();
198 }
199 Some(ch) => {
200 s.push(ch);
201 self.advance();
202 }
203 None => break,
204 }
205 }
206 let s = s.trim_end().to_string();
207 Token::new(
208 TokenKind::DocComment(s),
209 Span::new(start, self.pos, start_line, start_col),
210 )
211 }
212 fn lex_ident(&mut self, start: usize, start_line: usize, start_col: usize) -> Token {
216 let mut s = String::new();
217 if self.peek() == Some('_')
218 && !self
219 .peek_ahead(1)
220 .is_some_and(|c| c.is_alphanumeric() || c == '_')
221 {
222 self.advance();
223 return Token::new(
224 TokenKind::Underscore,
225 Span::new(start, self.pos, start_line, start_col),
226 );
227 }
228 while let Some(ch) = self.peek() {
229 if ch.is_alphanumeric() || ch == '_' || ch == '\'' {
230 s.push(ch);
231 self.advance();
232 } else {
233 break;
234 }
235 }
236 let kind = match s.as_str() {
237 "axiom" => TokenKind::Axiom,
238 "definition" | "def" => TokenKind::Definition,
239 "theorem" => TokenKind::Theorem,
240 "lemma" => TokenKind::Lemma,
241 "opaque" => TokenKind::Opaque,
242 "inductive" => TokenKind::Inductive,
243 "structure" => TokenKind::Structure,
244 "class" => TokenKind::Class,
245 "instance" => TokenKind::Instance,
246 "namespace" => TokenKind::Namespace,
247 "section" => TokenKind::Section,
248 "variable" => TokenKind::Variable,
249 "variables" => TokenKind::Variables,
250 "parameter" => TokenKind::Parameter,
251 "parameters" => TokenKind::Parameters,
252 "constant" => TokenKind::Constant,
253 "constants" => TokenKind::Constants,
254 "end" => TokenKind::End,
255 "import" => TokenKind::Import,
256 "export" => TokenKind::Export,
257 "open" => TokenKind::Open,
258 "attribute" => TokenKind::Attribute,
259 "return" => TokenKind::Return,
260 "Type" => TokenKind::Type,
261 "Prop" => TokenKind::Prop,
262 "Sort" => TokenKind::Sort,
263 "fun" => TokenKind::Fun,
264 "forall" => TokenKind::Forall,
265 "let" => TokenKind::Let,
266 "in" => TokenKind::In,
267 "if" => TokenKind::If,
268 "then" => TokenKind::Then,
269 "else" => TokenKind::Else,
270 "match" => TokenKind::Match,
271 "with" => TokenKind::With,
272 "do" => TokenKind::Do,
273 "have" => TokenKind::Have,
274 "suffices" => TokenKind::Suffices,
275 "show" => TokenKind::Show,
276 "where" => TokenKind::Where,
277 "from" => TokenKind::From,
278 "by" => TokenKind::By,
279 "exists" => TokenKind::Exists,
280 _ => TokenKind::Ident(s),
281 };
282 Token::new(kind, Span::new(start, self.pos, start_line, start_col))
283 }
284 fn lex_hex_number(&mut self, start: usize, start_line: usize, start_col: usize) -> Token {
286 let mut num = 0u64;
287 let mut has_digits = false;
288 while let Some(ch) = self.peek() {
289 if let Some(d) = ch.to_digit(16) {
290 num = num.wrapping_mul(16).wrapping_add(d as u64);
291 has_digits = true;
292 self.advance();
293 } else if ch == '_' {
294 self.advance();
295 } else {
296 break;
297 }
298 }
299 if !has_digits {
300 return Token::new(
301 TokenKind::Error("expected hex digits after 0x".to_string()),
302 Span::new(start, self.pos, start_line, start_col),
303 );
304 }
305 Token::new(
306 TokenKind::Nat(num),
307 Span::new(start, self.pos, start_line, start_col),
308 )
309 }
310 fn lex_bin_number(&mut self, start: usize, start_line: usize, start_col: usize) -> Token {
312 let mut num = 0u64;
313 let mut has_digits = false;
314 while let Some(ch) = self.peek() {
315 match ch {
316 '0' => {
317 num = num.wrapping_mul(2);
318 has_digits = true;
319 self.advance();
320 }
321 '1' => {
322 num = num.wrapping_mul(2).wrapping_add(1);
323 has_digits = true;
324 self.advance();
325 }
326 '_' => {
327 self.advance();
328 }
329 _ => break,
330 }
331 }
332 if !has_digits {
333 return Token::new(
334 TokenKind::Error("expected binary digits after 0b".to_string()),
335 Span::new(start, self.pos, start_line, start_col),
336 );
337 }
338 Token::new(
339 TokenKind::Nat(num),
340 Span::new(start, self.pos, start_line, start_col),
341 )
342 }
343 fn lex_oct_number(&mut self, start: usize, start_line: usize, start_col: usize) -> Token {
345 let mut num = 0u64;
346 let mut has_digits = false;
347 while let Some(ch) = self.peek() {
348 if let Some(d) = ch.to_digit(8) {
349 num = num.wrapping_mul(8).wrapping_add(d as u64);
350 has_digits = true;
351 self.advance();
352 } else if ch == '_' {
353 self.advance();
354 } else {
355 break;
356 }
357 }
358 if !has_digits {
359 return Token::new(
360 TokenKind::Error("expected octal digits after 0o".to_string()),
361 Span::new(start, self.pos, start_line, start_col),
362 );
363 }
364 Token::new(
365 TokenKind::Nat(num),
366 Span::new(start, self.pos, start_line, start_col),
367 )
368 }
369 fn lex_number(&mut self, start: usize, start_line: usize, start_col: usize) -> Token {
371 if self.peek() == Some('0') {
372 match self.peek_ahead(1) {
373 Some('x') | Some('X') => {
374 self.advance();
375 self.advance();
376 return self.lex_hex_number(start, start_line, start_col);
377 }
378 Some('b') | Some('B') => {
379 self.advance();
380 self.advance();
381 return self.lex_bin_number(start, start_line, start_col);
382 }
383 Some('o') | Some('O') => {
384 self.advance();
385 self.advance();
386 return self.lex_oct_number(start, start_line, start_col);
387 }
388 _ => {}
389 }
390 }
391 let mut int_str = String::new();
392 while let Some(ch) = self.peek() {
393 if ch.is_ascii_digit() {
394 int_str.push(ch);
395 self.advance();
396 } else if ch == '_' && self.peek_ahead(1).is_some_and(|c| c.is_ascii_digit()) {
397 self.advance();
398 } else {
399 break;
400 }
401 }
402 let is_float =
403 self.peek() == Some('.') && self.peek_ahead(1).is_some_and(|c| c.is_ascii_digit());
404 if is_float {
405 self.advance();
406 let mut frac_str = String::new();
407 frac_str.push_str(&int_str);
408 frac_str.push('.');
409 while let Some(ch) = self.peek() {
410 if ch.is_ascii_digit() {
411 frac_str.push(ch);
412 self.advance();
413 } else {
414 break;
415 }
416 }
417 if self.peek() == Some('e') || self.peek() == Some('E') {
418 frac_str.push('e');
419 self.advance();
420 if self.peek() == Some('+') || self.peek() == Some('-') {
421 frac_str.push(self.advance().expect("peek confirmed '+' or '-' exists"));
422 }
423 while let Some(ch) = self.peek() {
424 if ch.is_ascii_digit() {
425 frac_str.push(ch);
426 self.advance();
427 } else {
428 break;
429 }
430 }
431 }
432 let val: f64 = frac_str.parse().unwrap_or(0.0);
433 return Token::new(
434 TokenKind::Float(val),
435 Span::new(start, self.pos, start_line, start_col),
436 );
437 }
438 if self.peek() == Some('e') || self.peek() == Some('E') {
439 let mut exp_str = String::new();
440 exp_str.push_str(&int_str);
441 exp_str.push('e');
442 self.advance();
443 if self.peek() == Some('+') || self.peek() == Some('-') {
444 exp_str.push(self.advance().expect("peek confirmed '+' or '-' exists"));
445 }
446 while let Some(ch) = self.peek() {
447 if ch.is_ascii_digit() {
448 exp_str.push(ch);
449 self.advance();
450 } else {
451 break;
452 }
453 }
454 let val: f64 = exp_str.parse().unwrap_or(0.0);
455 return Token::new(
456 TokenKind::Float(val),
457 Span::new(start, self.pos, start_line, start_col),
458 );
459 }
460 let num: u64 = int_str.parse().unwrap_or(0);
461 Token::new(
462 TokenKind::Nat(num),
463 Span::new(start, self.pos, start_line, start_col),
464 )
465 }
466 fn parse_escape_char(&mut self) -> Option<char> {
469 match self.peek() {
470 Some('n') => {
471 self.advance();
472 Some('\n')
473 }
474 Some('t') => {
475 self.advance();
476 Some('\t')
477 }
478 Some('r') => {
479 self.advance();
480 Some('\r')
481 }
482 Some('\\') => {
483 self.advance();
484 Some('\\')
485 }
486 Some('"') => {
487 self.advance();
488 Some('"')
489 }
490 Some('\'') => {
491 self.advance();
492 Some('\'')
493 }
494 Some('0') => {
495 self.advance();
496 Some('\0')
497 }
498 Some('x') => {
499 self.advance();
500 let hi = self.advance().and_then(|c| c.to_digit(16))?;
501 let lo = self.advance().and_then(|c| c.to_digit(16))?;
502 let val = (hi * 16 + lo) as u8;
503 Some(val as char)
504 }
505 Some('u') => {
506 self.advance();
507 if self.peek() != Some('{') {
508 return None;
509 }
510 self.advance();
511 let mut code = 0u32;
512 let mut has_digit = false;
513 while let Some(ch) = self.peek() {
514 if ch == '}' {
515 self.advance();
516 break;
517 }
518 if let Some(d) = ch.to_digit(16) {
519 code = code * 16 + d;
520 has_digit = true;
521 self.advance();
522 } else {
523 return None;
524 }
525 }
526 if !has_digit {
527 return None;
528 }
529 char::from_u32(code)
530 }
531 Some(ch) => {
532 self.advance();
533 Some(ch)
534 }
535 None => None,
536 }
537 }
538 fn lex_string(&mut self, start: usize, start_line: usize, start_col: usize) -> Token {
540 self.advance();
541 let mut s = String::new();
542 while let Some(ch) = self.peek() {
543 if ch == '"' {
544 self.advance();
545 return Token::new(
546 TokenKind::String(s),
547 Span::new(start, self.pos, start_line, start_col),
548 );
549 } else if ch == '\\' {
550 self.advance();
551 if let Some(escaped) = self.parse_escape_char() {
552 s.push(escaped);
553 } else {
554 s.push('?');
555 }
556 } else {
557 s.push(ch);
558 self.advance();
559 }
560 }
561 Token::new(
562 TokenKind::Error("unterminated string".to_string()),
563 Span::new(start, self.pos, start_line, start_col),
564 )
565 }
566 fn lex_char(&mut self, start: usize, start_line: usize, start_col: usize) -> Token {
568 self.advance();
569 let ch = if self.peek() == Some('\\') {
570 self.advance();
571 match self.parse_escape_char() {
572 Some(c) => c,
573 None => {
574 return Token::new(
575 TokenKind::Error("invalid escape in char literal".to_string()),
576 Span::new(start, self.pos, start_line, start_col),
577 );
578 }
579 }
580 } else {
581 match self.advance() {
582 Some(c) => c,
583 None => {
584 return Token::new(
585 TokenKind::Error("unterminated char literal".to_string()),
586 Span::new(start, self.pos, start_line, start_col),
587 );
588 }
589 }
590 };
591 if self.peek() == Some('\'') {
592 self.advance();
593 Token::new(
594 TokenKind::Char(ch),
595 Span::new(start, self.pos, start_line, start_col),
596 )
597 } else {
598 Token::new(
599 TokenKind::Error("unterminated char literal".to_string()),
600 Span::new(start, self.pos, start_line, start_col),
601 )
602 }
603 }
604 fn lex_interpolated_string(
607 &mut self,
608 start: usize,
609 start_line: usize,
610 start_col: usize,
611 ) -> Token {
612 self.advance();
613 self.advance();
614 self.advance();
615 let mut parts: Vec<StringPart> = Vec::new();
616 let mut current_lit = String::new();
617 while let Some(ch) = self.peek() {
618 if ch == '"' {
619 self.advance();
620 if !current_lit.is_empty() {
621 parts.push(StringPart::Literal(current_lit));
622 }
623 return Token::new(
624 TokenKind::InterpolatedString(parts),
625 Span::new(start, self.pos, start_line, start_col),
626 );
627 } else if ch == '{' {
628 self.advance();
629 if !current_lit.is_empty() {
630 parts.push(StringPart::Literal(current_lit.clone()));
631 current_lit.clear();
632 }
633 let mut interp_tokens = Vec::new();
634 let mut depth = 1;
635 loop {
636 let tok = self.next_token_raw();
637 match &tok.kind {
638 TokenKind::LBrace => depth += 1,
639 TokenKind::RBrace => {
640 depth -= 1;
641 if depth == 0 {
642 break;
643 }
644 }
645 TokenKind::Eof => break,
646 _ => {}
647 }
648 interp_tokens.push(tok);
649 }
650 parts.push(StringPart::Interpolation(interp_tokens));
651 } else if ch == '\\' {
652 self.advance();
653 if let Some(escaped) = self.parse_escape_char() {
654 current_lit.push(escaped);
655 } else {
656 current_lit.push('?');
657 }
658 } else {
659 current_lit.push(ch);
660 self.advance();
661 }
662 }
663 Token::new(
664 TokenKind::Error("unterminated interpolated string".to_string()),
665 Span::new(start, self.pos, start_line, start_col),
666 )
667 }
668 fn next_token_raw(&mut self) -> Token {
671 self.skip_whitespace();
672 let start = self.pos;
673 let start_line = self.line;
674 let start_col = self.column;
675 let Some(ch) = self.peek() else {
676 return Token::new(
677 TokenKind::Eof,
678 Span::new(start, start, start_line, start_col),
679 );
680 };
681 if ch.is_alphabetic() || ch == '_' {
682 return self.lex_ident(start, start_line, start_col);
683 }
684 if ch.is_ascii_digit() {
685 return self.lex_number(start, start_line, start_col);
686 }
687 if ch == '"' {
688 return self.lex_string(start, start_line, start_col);
689 }
690 self.advance();
691 let kind = match ch {
692 '(' => TokenKind::LParen,
693 ')' => TokenKind::RParen,
694 '{' => TokenKind::LBrace,
695 '}' => TokenKind::RBrace,
696 '[' => TokenKind::LBracket,
697 ']' => TokenKind::RBracket,
698 ',' => TokenKind::Comma,
699 ';' => TokenKind::Semicolon,
700 '.' if self.peek() == Some('.') => {
701 self.advance();
702 TokenKind::DotDot
703 }
704 '.' => TokenKind::Dot,
705 '|' if self.peek() == Some('|') => {
706 self.advance();
707 TokenKind::OrOr
708 }
709 '|' => TokenKind::Bar,
710 '@' => TokenKind::At,
711 '#' => TokenKind::Hash,
712 '+' => TokenKind::Plus,
713 '-' if self.peek() == Some('>') => {
714 self.advance();
715 TokenKind::Arrow
716 }
717 '-' => TokenKind::Minus,
718 '>' if self.peek() == Some('=') => {
719 self.advance();
720 TokenKind::Ge
721 }
722 '>' => TokenKind::Gt,
723 '*' => TokenKind::Star,
724 '/' => TokenKind::Slash,
725 '%' => TokenKind::Percent,
726 '^' => TokenKind::Caret,
727 '_' => TokenKind::Underscore,
728 '?' => TokenKind::Question,
729 '!' if self.peek() == Some('=') => {
730 self.advance();
731 TokenKind::BangEq
732 }
733 '!' => TokenKind::Bang,
734 '&' if self.peek() == Some('&') => {
735 self.advance();
736 TokenKind::AndAnd
737 }
738 ':' if self.peek() == Some('=') => {
739 self.advance();
740 TokenKind::Assign
741 }
742 ':' => TokenKind::Colon,
743 '=' if self.peek() == Some('>') => {
744 self.advance();
745 TokenKind::FatArrow
746 }
747 '=' => TokenKind::Eq,
748 '<' if self.peek() == Some('-') => {
749 self.advance();
750 TokenKind::LeftArrow
751 }
752 '<' if self.peek() == Some('=') => {
753 self.advance();
754 TokenKind::Le
755 }
756 '<' => TokenKind::Lt,
757 '\u{2190}' => TokenKind::LeftArrow,
758 '\u{2264}' | '\u{2A7D}' => TokenKind::Le,
759 '\u{2265}' | '\u{2A7E}' => TokenKind::Ge,
760 '\u{2260}' => TokenKind::Ne,
761 '\u{2192}' => TokenKind::Arrow,
762 '\u{21D2}' => TokenKind::FatArrow,
763 '\u{2227}' => TokenKind::And,
764 '\u{2228}' => TokenKind::Or,
765 '\u{00AC}' => TokenKind::Not,
766 '\u{2194}' => TokenKind::Iff,
767 '\u{2200}' => TokenKind::Forall,
768 '\u{2203}' => TokenKind::Exists,
769 '\u{03BB}' => TokenKind::Fun,
770 '\u{27E8}' => TokenKind::LAngle,
771 '\u{27E9}' => TokenKind::RAngle,
772 _ => TokenKind::Error(format!("unexpected character: {}", ch)),
773 };
774 Token::new(kind, Span::new(start, self.pos, start_line, start_col))
775 }
776 pub fn next_token(&mut self) -> Token {
778 self.skip_whitespace();
779 let start = self.pos;
780 let start_line = self.line;
781 let start_col = self.column;
782 let Some(ch) = self.peek() else {
783 return Token::new(
784 TokenKind::Eof,
785 Span::new(start, start, start_line, start_col),
786 );
787 };
788 if ch == '/' && self.peek_ahead(1) == Some('-') && self.peek_ahead(2) == Some('-') {
789 return self.lex_block_doc_comment(start, start_line, start_col);
790 }
791 if ch == '-' && self.peek_ahead(1) == Some('-') && self.peek_ahead(2) == Some('-') {
792 return self.lex_line_doc_comment(start, start_line, start_col);
793 }
794 if ch == 's' && self.peek_ahead(1) == Some('!') && self.peek_ahead(2) == Some('"') {
795 return self.lex_interpolated_string(start, start_line, start_col);
796 }
797 if ch.is_alphabetic() || ch == '_' {
798 return self.lex_ident(start, start_line, start_col);
799 }
800 if ch.is_ascii_digit() {
801 return self.lex_number(start, start_line, start_col);
802 }
803 if ch == '"' {
804 return self.lex_string(start, start_line, start_col);
805 }
806 if ch == '\'' {
807 let is_char_lit = if self.peek_ahead(1) == Some('\\') {
808 true
809 } else {
810 self.peek_ahead(2) == Some('\'')
811 };
812 if is_char_lit {
813 return self.lex_char(start, start_line, start_col);
814 }
815 }
816 self.advance();
817 let kind = match ch {
818 '(' => TokenKind::LParen,
819 ')' => TokenKind::RParen,
820 '{' => TokenKind::LBrace,
821 '}' => TokenKind::RBrace,
822 '[' => TokenKind::LBracket,
823 ']' => TokenKind::RBracket,
824 ',' => TokenKind::Comma,
825 ';' => TokenKind::Semicolon,
826 '.' if self.peek() == Some('.') => {
827 self.advance();
828 TokenKind::DotDot
829 }
830 '.' => TokenKind::Dot,
831 '|' if self.peek() == Some('|') => {
832 self.advance();
833 TokenKind::OrOr
834 }
835 '|' => TokenKind::Bar,
836 '@' => TokenKind::At,
837 '#' => TokenKind::Hash,
838 '+' => TokenKind::Plus,
839 '-' if self.peek() == Some('>') => {
840 self.advance();
841 TokenKind::Arrow
842 }
843 '-' => TokenKind::Minus,
844 '>' if self.peek() == Some('=') => {
845 self.advance();
846 TokenKind::Ge
847 }
848 '>' => TokenKind::Gt,
849 '*' => TokenKind::Star,
850 '/' => TokenKind::Slash,
851 '%' => TokenKind::Percent,
852 '^' => TokenKind::Caret,
853 '_' => TokenKind::Underscore,
854 '?' => TokenKind::Question,
855 '\'' => TokenKind::Error("unexpected tick '".to_string()),
856 '!' if self.peek() == Some('=') => {
857 self.advance();
858 TokenKind::BangEq
859 }
860 '!' => TokenKind::Bang,
861 '&' if self.peek() == Some('&') => {
862 self.advance();
863 TokenKind::AndAnd
864 }
865 ':' if self.peek() == Some('=') => {
866 self.advance();
867 TokenKind::Assign
868 }
869 ':' => TokenKind::Colon,
870 '=' if self.peek() == Some('>') => {
871 self.advance();
872 TokenKind::FatArrow
873 }
874 '=' => TokenKind::Eq,
875 '<' if self.peek() == Some('-') => {
876 self.advance();
877 TokenKind::LeftArrow
878 }
879 '<' if self.peek() == Some('=') => {
880 self.advance();
881 TokenKind::Le
882 }
883 '<' => TokenKind::Lt,
884 '\u{2190}' => TokenKind::LeftArrow,
885 '\u{2264}' | '\u{2A7D}' => TokenKind::Le,
886 '\u{2265}' | '\u{2A7E}' => TokenKind::Ge,
887 '\u{2260}' => TokenKind::Ne,
888 '\u{2192}' => TokenKind::Arrow,
889 '\u{21D2}' => TokenKind::FatArrow,
890 '\u{2227}' => TokenKind::And,
891 '\u{2228}' => TokenKind::Or,
892 '\u{00AC}' => TokenKind::Not,
893 '\u{2194}' => TokenKind::Iff,
894 '\u{2200}' => TokenKind::Forall,
895 '\u{2203}' => TokenKind::Exists,
896 '\u{03BB}' => TokenKind::Fun,
897 '\u{27E8}' => TokenKind::LAngle,
898 '\u{27E9}' => TokenKind::RAngle,
899 _ => TokenKind::Error(format!("unexpected character: {}", ch)),
900 };
901 Token::new(kind, Span::new(start, self.pos, start_line, start_col))
902 }
903 pub fn tokenize(&mut self) -> Vec<Token> {
905 let mut tokens = Vec::new();
906 loop {
907 let tok = self.next_token();
908 let is_eof = matches!(tok.kind, TokenKind::Eof);
909 tokens.push(tok);
910 if is_eof {
911 break;
912 }
913 }
914 tokens
915 }
916}
917#[allow(dead_code)]
919#[allow(missing_docs)]
920pub struct Scanner<'a> {
921 src: &'a str,
923 pos: usize,
925 line: usize,
927 col: usize,
929}
930impl<'a> Scanner<'a> {
931 #[allow(dead_code)]
933 pub fn new(src: &'a str) -> Self {
934 Scanner {
935 src,
936 pos: 0,
937 line: 1,
938 col: 1,
939 }
940 }
941 #[allow(dead_code)]
943 pub fn peek(&self) -> Option<char> {
944 self.src[self.pos..].chars().next()
945 }
946 #[allow(dead_code)]
948 #[allow(clippy::should_implement_trait)]
949 pub fn next(&mut self) -> Option<char> {
950 let c = self.src[self.pos..].chars().next()?;
951 self.pos += c.len_utf8();
952 if c == '\n' {
953 self.line += 1;
954 self.col = 1;
955 } else {
956 self.col += 1;
957 }
958 Some(c)
959 }
960 #[allow(dead_code)]
962 pub fn is_eof(&self) -> bool {
963 self.pos >= self.src.len()
964 }
965 #[allow(dead_code)]
967 pub fn position(&self) -> usize {
968 self.pos
969 }
970 #[allow(dead_code)]
972 pub fn line(&self) -> usize {
973 self.line
974 }
975 #[allow(dead_code)]
977 pub fn col(&self) -> usize {
978 self.col
979 }
980 #[allow(dead_code)]
982 pub fn skip_whitespace(&mut self) {
983 while self.peek().map(|c| c.is_whitespace()).unwrap_or(false) {
984 self.next();
985 }
986 }
987 #[allow(dead_code)]
989 pub fn consume_while<F: Fn(char) -> bool>(&mut self, pred: F) -> &'a str {
990 let start = self.pos;
991 while self.peek().is_some_and(&pred) {
992 self.next();
993 }
994 &self.src[start..self.pos]
995 }
996 #[allow(dead_code)]
998 pub fn remaining(&self) -> &'a str {
999 &self.src[self.pos..]
1000 }
1001}
1002#[allow(dead_code)]
1004#[allow(missing_docs)]
1005#[derive(Debug, Clone, Copy)]
1006pub struct LexerPos {
1007 pub offset: usize,
1009 pub line: usize,
1011 pub col: usize,
1013}
1014#[allow(dead_code)]
1016#[allow(missing_docs)]
1017pub struct LexerRuleTable {
1018 pub rules: Vec<LexerRule>,
1020}
1021impl LexerRuleTable {
1022 #[allow(dead_code)]
1024 pub fn new() -> Self {
1025 LexerRuleTable { rules: Vec::new() }
1026 }
1027 #[allow(dead_code)]
1029 pub fn add(&mut self, rule: LexerRule) {
1030 self.rules.push(rule);
1031 }
1032 #[allow(dead_code)]
1034 pub fn find_rule(&self, c: char) -> Option<&LexerRule> {
1035 self.rules.iter().find(|r| r.start.matches(c))
1036 }
1037 #[allow(dead_code)]
1039 pub fn len(&self) -> usize {
1040 self.rules.len()
1041 }
1042 #[allow(dead_code)]
1044 pub fn is_empty(&self) -> bool {
1045 self.rules.is_empty()
1046 }
1047}
1048#[allow(dead_code)]
1050#[allow(missing_docs)]
1051#[derive(Debug, Clone, PartialEq, Eq)]
1052pub enum TokenCategory {
1053 Ident,
1055 Keyword,
1057 Literal,
1059 Operator,
1061 Delimiter,
1063 Trivia,
1065 Eof,
1067}
1068#[allow(dead_code)]
1070#[allow(missing_docs)]
1071#[derive(Debug, Clone)]
1072pub struct TokenDiff {
1073 pub only_left: usize,
1075 pub only_right: usize,
1077 pub matching: usize,
1079}
1080#[allow(dead_code)]
1082#[allow(missing_docs)]
1083#[derive(Debug, Default)]
1084pub struct LexSummary {
1085 pub token_count: usize,
1087 pub ident_count: usize,
1089 pub keyword_count: usize,
1091 pub operator_count: usize,
1093 pub literal_count: usize,
1095 pub comment_count: usize,
1097 pub whitespace_count: usize,
1099}
1100impl LexSummary {
1101 #[allow(dead_code)]
1103 pub fn new() -> Self {
1104 LexSummary::default()
1105 }
1106 #[allow(dead_code)]
1108 pub fn format(&self) -> String {
1109 format!(
1110 "tokens={} idents={} keywords={} ops={} literals={} comments={} ws={}",
1111 self.token_count,
1112 self.ident_count,
1113 self.keyword_count,
1114 self.operator_count,
1115 self.literal_count,
1116 self.comment_count,
1117 self.whitespace_count
1118 )
1119 }
1120}
1121#[allow(dead_code)]
1123#[allow(missing_docs)]
1124pub struct KeywordTrie {
1125 children: std::collections::HashMap<char, KeywordTrie>,
1127 pub is_keyword: bool,
1129}
1130impl KeywordTrie {
1131 #[allow(dead_code)]
1133 pub fn new() -> Self {
1134 KeywordTrie {
1135 children: std::collections::HashMap::new(),
1136 is_keyword: false,
1137 }
1138 }
1139 #[allow(dead_code)]
1141 pub fn insert(&mut self, keyword: &str) {
1142 let mut node = self;
1143 for c in keyword.chars() {
1144 node = node.children.entry(c).or_default();
1145 }
1146 node.is_keyword = true;
1147 }
1148 #[allow(dead_code)]
1150 pub fn contains(&self, s: &str) -> bool {
1151 let mut node = self;
1152 for c in s.chars() {
1153 match node.children.get(&c) {
1154 Some(child) => node = child,
1155 None => return false,
1156 }
1157 }
1158 node.is_keyword
1159 }
1160}
1161#[allow(dead_code)]
1163#[allow(missing_docs)]
1164#[derive(Debug, Clone)]
1165pub struct LexerRule {
1166 pub kind: String,
1168 pub start: CharClass,
1170 pub cont: CharClass,
1172}
1173impl LexerRule {
1174 #[allow(dead_code)]
1176 pub fn new(kind: &str, start: CharClass, cont: CharClass) -> Self {
1177 LexerRule {
1178 kind: kind.to_string(),
1179 start,
1180 cont,
1181 }
1182 }
1183}
1184#[allow(dead_code)]
1186#[allow(missing_docs)]
1187#[derive(Debug, Default)]
1188pub struct TriviaAccumulator {
1189 pub text: String,
1191 pub has_newlines: bool,
1193}
1194impl TriviaAccumulator {
1195 #[allow(dead_code)]
1197 pub fn new() -> Self {
1198 TriviaAccumulator {
1199 text: String::new(),
1200 has_newlines: false,
1201 }
1202 }
1203 #[allow(dead_code)]
1205 pub fn push(&mut self, s: &str) {
1206 if s.contains('\n') {
1207 self.has_newlines = true;
1208 }
1209 self.text.push_str(s);
1210 }
1211 #[allow(dead_code)]
1213 pub fn clear(&mut self) {
1214 self.text.clear();
1215 self.has_newlines = false;
1216 }
1217}
1218#[allow(dead_code)]
1220#[allow(missing_docs)]
1221pub struct CharFreqTable {
1222 pub counts: [u32; 128],
1224}
1225impl CharFreqTable {
1226 #[allow(dead_code)]
1228 #[allow(clippy::should_implement_trait)]
1229 pub fn from_str(src: &str) -> Self {
1230 let mut counts = [0u32; 128];
1231 for c in src.chars() {
1232 if (c as usize) < 128 {
1233 counts[c as usize] += 1;
1234 }
1235 }
1236 CharFreqTable { counts }
1237 }
1238 #[allow(dead_code)]
1240 pub fn count(&self, c: char) -> u32 {
1241 if (c as usize) < 128 {
1242 self.counts[c as usize]
1243 } else {
1244 0
1245 }
1246 }
1247}
1248#[allow(dead_code)]
1250#[allow(missing_docs)]
1251#[derive(Debug, Clone)]
1252pub struct RawToken {
1253 pub kind: String,
1255 pub text: String,
1257 pub start: usize,
1259 pub end: usize,
1261 pub line: usize,
1263 pub col: usize,
1265}
1266#[allow(dead_code)]
1268#[allow(missing_docs)]
1269pub struct FilteredTokenStream {
1270 pub tokens: Vec<RawToken>,
1272 pos: usize,
1274}
1275impl FilteredTokenStream {
1276 #[allow(dead_code)]
1278 pub fn new(tokens: Vec<RawToken>) -> Self {
1279 let tokens = tokens.into_iter().filter(|t| t.kind != "WS").collect();
1280 FilteredTokenStream { tokens, pos: 0 }
1281 }
1282 #[allow(dead_code)]
1284 pub fn peek(&self) -> Option<&RawToken> {
1285 self.tokens.get(self.pos)
1286 }
1287 #[allow(dead_code)]
1289 #[allow(clippy::should_implement_trait)]
1290 pub fn next(&mut self) -> Option<&RawToken> {
1291 let tok = self.tokens.get(self.pos)?;
1292 self.pos += 1;
1293 Some(tok)
1294 }
1295 #[allow(dead_code)]
1297 pub fn is_eof(&self) -> bool {
1298 self.pos >= self.tokens.len()
1299 }
1300 #[allow(dead_code)]
1302 pub fn remaining(&self) -> usize {
1303 self.tokens.len().saturating_sub(self.pos)
1304 }
1305 #[allow(dead_code)]
1307 pub fn position(&self) -> usize {
1308 self.pos
1309 }
1310}
1311#[allow(dead_code)]
1313#[allow(missing_docs)]
1314#[derive(Debug, Clone)]
1315pub enum CharClass {
1316 Alpha,
1318 Digit,
1320 AlphaNum,
1322 Whitespace,
1324 Exact(char),
1326 OneOf(Vec<char>),
1328 NoneOf(Vec<char>),
1330}
1331impl CharClass {
1332 #[allow(dead_code)]
1334 pub fn matches(&self, c: char) -> bool {
1335 match self {
1336 CharClass::Alpha => c.is_ascii_alphabetic(),
1337 CharClass::Digit => c.is_ascii_digit(),
1338 CharClass::AlphaNum => c.is_ascii_alphanumeric(),
1339 CharClass::Whitespace => c.is_whitespace(),
1340 CharClass::Exact(x) => c == *x,
1341 CharClass::OneOf(set) => set.contains(&c),
1342 CharClass::NoneOf(set) => !set.contains(&c),
1343 }
1344 }
1345}
1346#[allow(dead_code)]
1348#[allow(missing_docs)]
1349#[derive(Debug, Clone, PartialEq, Eq)]
1350pub enum LexerMode {
1351 Normal,
1353 StringLit,
1355 Comment,
1357 Tactic,
1359}
1360#[allow(dead_code)]
1362#[allow(missing_docs)]
1363#[derive(Debug, Clone, PartialEq, Eq)]
1364pub enum LexerState {
1365 Start,
1367 Ident,
1369 Number,
1371 Operator,
1373 StringStart,
1375 LineComment,
1377 Done,
1379}
1380#[allow(dead_code)]
1382#[allow(missing_docs)]
1383pub struct LineMap {
1384 pub line_starts: Vec<usize>,
1386}
1387impl LineMap {
1388 #[allow(dead_code)]
1390 #[allow(clippy::should_implement_trait)]
1391 pub fn from_str(src: &str) -> Self {
1392 let mut line_starts = vec![0];
1393 for (i, c) in src.char_indices() {
1394 if c == '\n' {
1395 line_starts.push(i + 1);
1396 }
1397 }
1398 LineMap { line_starts }
1399 }
1400 #[allow(dead_code)]
1402 pub fn line_for_offset(&self, offset: usize) -> usize {
1403 match self.line_starts.binary_search(&offset) {
1404 Ok(idx) => idx + 1,
1405 Err(idx) => idx,
1406 }
1407 }
1408 #[allow(dead_code)]
1410 pub fn col_for_offset(&self, offset: usize) -> usize {
1411 let line = self.line_for_offset(offset);
1412 let line_start = self.line_starts.get(line - 1).copied().unwrap_or(0);
1413 offset - line_start + 1
1414 }
1415 #[allow(dead_code)]
1417 pub fn line_count(&self) -> usize {
1418 self.line_starts.len()
1419 }
1420}