1use crate::error::Error;
2use crate::parsing::ast::Span;
3use crate::parsing::source::Source;
4use std::sync::Arc;
5
6#[derive(Debug, Clone, PartialEq, Eq)]
7pub enum TokenKind {
8 Spec,
10 Fact,
11 Rule,
12 Unless,
13 Then,
14 Not,
15 And,
16 In,
17 Type,
18 From,
19 With,
20 Meta,
21 Veto,
22 Now,
23 Calendar,
24 Past,
25 Future,
26
27 True,
29 False,
30 Yes,
31 No,
32 Accept,
33 Reject,
34
35 ScaleKw,
37 NumberKw,
38 TextKw,
39 DateKw,
40 TimeKw,
41 DurationKw,
42 BooleanKw,
43 PercentKw,
44 RatioKw,
45
46 Sqrt,
48 Sin,
49 Cos,
50 Tan,
51 Asin,
52 Acos,
53 Atan,
54 Log,
55 Exp,
56 Abs,
57 Floor,
58 Ceil,
59 Round,
60
61 Years,
63 Year,
64 Months,
65 Month,
66 Weeks,
67 Week,
68 Days,
69 Day,
70 Hours,
71 Hour,
72 Minutes,
73 Minute,
74 Seconds,
75 Second,
76 Milliseconds,
77 Millisecond,
78 Microseconds,
79 Microsecond,
80 Permille,
81
82 Is,
84
85 Plus,
87 Minus,
88 Star,
89 Slash,
90 Percent,
91 PercentPercent,
92 Caret,
93 Gt,
94 Lt,
95 Gte,
96 Lte,
97 EqEq,
98 BangEq,
99
100 Colon,
102 Arrow,
103 Tilde,
104 Dot,
105 At,
106 LParen,
107 RParen,
108 LBracket,
109 RBracket,
110
111 NumberLit,
113 StringLit,
114
115 Commentary,
117
118 Identifier,
120
121 Eof,
123}
124
125impl std::fmt::Display for TokenKind {
126 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
127 match self {
128 TokenKind::Spec => write!(f, "'spec'"),
129 TokenKind::Fact => write!(f, "'fact'"),
130 TokenKind::Rule => write!(f, "'rule'"),
131 TokenKind::Unless => write!(f, "'unless'"),
132 TokenKind::Then => write!(f, "'then'"),
133 TokenKind::Not => write!(f, "'not'"),
134 TokenKind::And => write!(f, "'and'"),
135 TokenKind::In => write!(f, "'in'"),
136 TokenKind::Type => write!(f, "'type'"),
137 TokenKind::From => write!(f, "'from'"),
138 TokenKind::With => write!(f, "'with'"),
139 TokenKind::Meta => write!(f, "'meta'"),
140 TokenKind::Veto => write!(f, "'veto'"),
141 TokenKind::Now => write!(f, "'now'"),
142 TokenKind::Calendar => write!(f, "'calendar'"),
143 TokenKind::Past => write!(f, "'past'"),
144 TokenKind::Future => write!(f, "'future'"),
145 TokenKind::True => write!(f, "'true'"),
146 TokenKind::False => write!(f, "'false'"),
147 TokenKind::Yes => write!(f, "'yes'"),
148 TokenKind::No => write!(f, "'no'"),
149 TokenKind::Accept => write!(f, "'accept'"),
150 TokenKind::Reject => write!(f, "'reject'"),
151 TokenKind::ScaleKw => write!(f, "'scale'"),
152 TokenKind::NumberKw => write!(f, "'number'"),
153 TokenKind::TextKw => write!(f, "'text'"),
154 TokenKind::DateKw => write!(f, "'date'"),
155 TokenKind::TimeKw => write!(f, "'time'"),
156 TokenKind::DurationKw => write!(f, "'duration'"),
157 TokenKind::BooleanKw => write!(f, "'boolean'"),
158 TokenKind::PercentKw => write!(f, "'percent'"),
159 TokenKind::RatioKw => write!(f, "'ratio'"),
160 TokenKind::Sqrt => write!(f, "'sqrt'"),
161 TokenKind::Sin => write!(f, "'sin'"),
162 TokenKind::Cos => write!(f, "'cos'"),
163 TokenKind::Tan => write!(f, "'tan'"),
164 TokenKind::Asin => write!(f, "'asin'"),
165 TokenKind::Acos => write!(f, "'acos'"),
166 TokenKind::Atan => write!(f, "'atan'"),
167 TokenKind::Log => write!(f, "'log'"),
168 TokenKind::Exp => write!(f, "'exp'"),
169 TokenKind::Abs => write!(f, "'abs'"),
170 TokenKind::Floor => write!(f, "'floor'"),
171 TokenKind::Ceil => write!(f, "'ceil'"),
172 TokenKind::Round => write!(f, "'round'"),
173 TokenKind::Years => write!(f, "'years'"),
174 TokenKind::Year => write!(f, "'year'"),
175 TokenKind::Months => write!(f, "'months'"),
176 TokenKind::Month => write!(f, "'month'"),
177 TokenKind::Weeks => write!(f, "'weeks'"),
178 TokenKind::Week => write!(f, "'week'"),
179 TokenKind::Days => write!(f, "'days'"),
180 TokenKind::Day => write!(f, "'day'"),
181 TokenKind::Hours => write!(f, "'hours'"),
182 TokenKind::Hour => write!(f, "'hour'"),
183 TokenKind::Minutes => write!(f, "'minutes'"),
184 TokenKind::Minute => write!(f, "'minute'"),
185 TokenKind::Seconds => write!(f, "'seconds'"),
186 TokenKind::Second => write!(f, "'second'"),
187 TokenKind::Milliseconds => write!(f, "'milliseconds'"),
188 TokenKind::Millisecond => write!(f, "'millisecond'"),
189 TokenKind::Microseconds => write!(f, "'microseconds'"),
190 TokenKind::Microsecond => write!(f, "'microsecond'"),
191 TokenKind::Permille => write!(f, "'permille'"),
192 TokenKind::Is => write!(f, "'is'"),
193 TokenKind::Plus => write!(f, "'+'"),
194 TokenKind::Minus => write!(f, "'-'"),
195 TokenKind::Star => write!(f, "'*'"),
196 TokenKind::Slash => write!(f, "'/'"),
197 TokenKind::Percent => write!(f, "'%'"),
198 TokenKind::PercentPercent => write!(f, "'%%'"),
199 TokenKind::Caret => write!(f, "'^'"),
200 TokenKind::Gt => write!(f, "'>'"),
201 TokenKind::Lt => write!(f, "'<'"),
202 TokenKind::Gte => write!(f, "'>='"),
203 TokenKind::Lte => write!(f, "'<='"),
204 TokenKind::EqEq => write!(f, "'=='"),
205 TokenKind::BangEq => write!(f, "'!='"),
206 TokenKind::Colon => write!(f, "':'"),
207 TokenKind::Arrow => write!(f, "'->'"),
208 TokenKind::Tilde => write!(f, "'~'"),
209 TokenKind::Dot => write!(f, "'.'"),
210 TokenKind::At => write!(f, "'@'"),
211 TokenKind::LParen => write!(f, "'('"),
212 TokenKind::RParen => write!(f, "')'"),
213 TokenKind::LBracket => write!(f, "'['"),
214 TokenKind::RBracket => write!(f, "']'"),
215 TokenKind::NumberLit => write!(f, "a number"),
216 TokenKind::StringLit => write!(f, "a string"),
217 TokenKind::Commentary => write!(f, "commentary block"),
218 TokenKind::Identifier => write!(f, "an identifier"),
219 TokenKind::Eof => write!(f, "end of file"),
220 }
221 }
222}
223
224#[derive(Debug, Clone)]
225pub struct Token {
226 pub kind: TokenKind,
227 pub span: Span,
228 pub text: String,
229}
230
231impl Token {
232 pub fn eof(offset: usize, line: usize, col: usize) -> Self {
233 Token {
234 kind: TokenKind::Eof,
235 span: Span {
236 start: offset,
237 end: offset,
238 line,
239 col,
240 },
241 text: String::new(),
242 }
243 }
244}
245
246pub struct Lexer {
247 source: Vec<char>,
248 pos: usize,
249 line: usize,
250 col: usize,
251 byte_offset: usize,
252 attribute: String,
253 source_text: Arc<str>,
254 peeked: Option<Token>,
255 peeked2: Option<Token>,
256}
257
258impl Lexer {
259 pub fn new(input: &str, attribute: &str) -> Self {
260 let source_text: Arc<str> = Arc::from(input);
261 Lexer {
262 source: input.chars().collect(),
263 pos: 0,
264 line: 1,
265 col: 1,
266 byte_offset: 0,
267 attribute: attribute.to_string(),
268 source_text,
269 peeked: None,
270 peeked2: None,
271 }
272 }
273
274 pub fn source_text(&self) -> Arc<str> {
275 self.source_text.clone()
276 }
277
278 pub fn attribute(&self) -> &str {
279 &self.attribute
280 }
281
282 pub fn peek(&mut self) -> Result<&Token, Error> {
283 if self.peeked.is_none() {
284 let token = self.lex_token()?;
285 self.peeked = Some(token);
286 }
287 Ok(self.peeked.as_ref().expect("just assigned"))
288 }
289
290 pub fn peek_second(&mut self) -> Result<&Token, Error> {
291 self.peek()?;
292 if self.peeked2.is_none() {
293 let token = self.lex_token()?;
294 self.peeked2 = Some(token);
295 }
296 Ok(self.peeked2.as_ref().expect("just assigned"))
297 }
298
299 pub fn current_span(&self) -> Span {
301 Span {
302 start: self.byte_offset,
303 end: self.byte_offset,
304 line: self.line,
305 col: self.col,
306 }
307 }
308
309 pub fn scan_raw_alphanumeric(&mut self) -> Result<String, Error> {
314 self.peeked = None;
315 self.peeked2 = None;
316 self.skip_whitespace();
317 let mut result = String::new();
318 while let Some(ch) = self.current_char() {
319 if ch.is_ascii_alphanumeric() {
320 result.push(ch);
321 self.advance();
322 } else {
323 break;
324 }
325 }
326 Ok(result)
327 }
328
329 pub fn next_token(&mut self) -> Result<Token, Error> {
330 if let Some(token) = self.peeked.take() {
331 self.peeked = self.peeked2.take();
332 return Ok(token);
333 }
334 self.lex_token()
335 }
336
337 fn current_char(&self) -> Option<char> {
338 self.source.get(self.pos).copied()
339 }
340
341 fn peek_char(&self) -> Option<char> {
342 self.source.get(self.pos + 1).copied()
343 }
344
345 fn peek_char_at(&self, offset: usize) -> Option<char> {
346 self.source.get(self.pos + offset).copied()
347 }
348
349 fn advance(&mut self) {
350 if let Some(ch) = self.current_char() {
351 self.byte_offset += ch.len_utf8();
352 if ch == '\n' {
353 self.line += 1;
354 self.col = 1;
355 } else {
356 self.col += 1;
357 }
358 self.pos += 1;
359 }
360 }
361
362 fn skip_whitespace(&mut self) {
363 while let Some(ch) = self.current_char() {
364 if ch.is_whitespace() {
365 self.advance();
366 } else {
367 break;
368 }
369 }
370 }
371
372 fn make_span(&self, start_byte: usize, start_line: usize, start_col: usize) -> Span {
373 Span {
374 start: start_byte,
375 end: self.byte_offset,
376 line: start_line,
377 col: start_col,
378 }
379 }
380
381 fn make_error(&self, message: impl Into<String>, span: Span) -> Error {
382 Error::parsing(
383 message,
384 Source::new(&self.attribute, span, self.source_text.clone()),
385 None::<String>,
386 )
387 }
388
389 fn lex_token(&mut self) -> Result<Token, Error> {
390 self.skip_whitespace();
391
392 let start_byte = self.byte_offset;
393 let start_line = self.line;
394 let start_col = self.col;
395
396 let Some(ch) = self.current_char() else {
397 return Ok(Token::eof(start_byte, start_line, start_col));
398 };
399
400 if ch == '"' && self.peek_char() == Some('"') && self.peek_char_at(2) == Some('"') {
402 return self.scan_triple_quote(start_byte, start_line, start_col);
403 }
404
405 if ch == '"' {
407 return self.scan_string(start_byte, start_line, start_col);
408 }
409
410 if ch.is_ascii_digit() {
412 return self.scan_number(start_byte, start_line, start_col);
413 }
414
415 if let Some(token) = self.try_two_char_operator(start_byte, start_line, start_col) {
417 return Ok(token);
418 }
419
420 if let Some(kind) = self.single_char_token(ch) {
422 self.advance();
423 let span = self.make_span(start_byte, start_line, start_col);
424 let text = ch.to_string();
425 return Ok(Token { kind, span, text });
426 }
427
428 if ch.is_ascii_alphabetic() || ch == '_' {
430 return Ok(self.scan_identifier(start_byte, start_line, start_col));
431 }
432
433 if ch == '@' {
435 self.advance();
436 let span = self.make_span(start_byte, start_line, start_col);
437 return Ok(Token {
438 kind: TokenKind::At,
439 span,
440 text: "@".to_string(),
441 });
442 }
443
444 self.advance();
446 let span = self.make_span(start_byte, start_line, start_col);
447 Err(self.make_error(format!("Unexpected character '{}'", ch), span))
448 }
449
450 fn scan_triple_quote(
451 &mut self,
452 start_byte: usize,
453 start_line: usize,
454 start_col: usize,
455 ) -> Result<Token, Error> {
456 self.advance(); self.advance(); self.advance(); let content_start = self.byte_offset;
461 loop {
462 match self.current_char() {
463 None => {
464 let span = self.make_span(start_byte, start_line, start_col);
465 return Err(self.make_error(
466 "Unterminated commentary block: expected closing \"\"\"",
467 span,
468 ));
469 }
470 Some('"')
471 if self.source.get(self.pos + 1) == Some(&'"')
472 && self.source.get(self.pos + 2) == Some(&'"') =>
473 {
474 let content_end = self.byte_offset;
475 self.advance(); self.advance(); self.advance(); let raw: String = self.source_text[content_start..content_end].to_string();
479 let span = self.make_span(start_byte, start_line, start_col);
480 return Ok(Token {
481 kind: TokenKind::Commentary,
482 span,
483 text: raw,
484 });
485 }
486 Some(_) => {
487 self.advance();
488 }
489 }
490 }
491 }
492
493 fn scan_string(
494 &mut self,
495 start_byte: usize,
496 start_line: usize,
497 start_col: usize,
498 ) -> Result<Token, Error> {
499 self.advance(); let mut content = String::new();
501 loop {
502 match self.current_char() {
503 None => {
504 let span = self.make_span(start_byte, start_line, start_col);
505 return Err(self.make_error("String starting here was never closed", span));
506 }
507 Some('"') => {
508 self.advance(); break;
510 }
511 Some(ch) => {
512 content.push(ch);
513 self.advance();
514 }
515 }
516 }
517 let span = self.make_span(start_byte, start_line, start_col);
518 let full_text = format!("\"{}\"", content);
521 Ok(Token {
522 kind: TokenKind::StringLit,
523 span,
524 text: full_text,
525 })
526 }
527
528 fn scan_number(
529 &mut self,
530 start_byte: usize,
531 start_line: usize,
532 start_col: usize,
533 ) -> Result<Token, Error> {
534 let mut text = String::new();
535
536 while let Some(ch) = self.current_char() {
538 if ch.is_ascii_digit() || ch == '_' || ch == ',' {
539 text.push(ch);
540 self.advance();
541 } else {
542 break;
543 }
544 }
545
546 if self.current_char() == Some('.') {
548 if let Some(next) = self.peek_char() {
550 if next.is_ascii_digit() {
551 text.push('.');
552 self.advance(); while let Some(ch) = self.current_char() {
554 if ch.is_ascii_digit() {
555 text.push(ch);
556 self.advance();
557 } else {
558 break;
559 }
560 }
561 }
562 }
563 }
564
565 if let Some(ch) = self.current_char() {
567 if ch == 'e' || ch == 'E' {
568 let mut sci_text = String::new();
569 sci_text.push(ch);
570 let save_pos = self.pos;
571 let save_byte = self.byte_offset;
572 let save_line = self.line;
573 let save_col = self.col;
574 self.advance(); if let Some(sign) = self.current_char() {
577 if sign == '+' || sign == '-' {
578 sci_text.push(sign);
579 self.advance();
580 }
581 }
582
583 if let Some(d) = self.current_char() {
584 if d.is_ascii_digit() {
585 while let Some(ch) = self.current_char() {
586 if ch.is_ascii_digit() {
587 sci_text.push(ch);
588 self.advance();
589 } else {
590 break;
591 }
592 }
593 text.push_str(&sci_text);
594 } else {
595 self.pos = save_pos;
597 self.byte_offset = save_byte;
598 self.line = save_line;
599 self.col = save_col;
600 }
601 } else {
602 self.pos = save_pos;
603 self.byte_offset = save_byte;
604 self.line = save_line;
605 self.col = save_col;
606 }
607 }
608 }
609
610 let span = self.make_span(start_byte, start_line, start_col);
611 Ok(Token {
612 kind: TokenKind::NumberLit,
613 span,
614 text,
615 })
616 }
617
618 fn try_two_char_operator(
619 &mut self,
620 start_byte: usize,
621 start_line: usize,
622 start_col: usize,
623 ) -> Option<Token> {
624 let ch = self.current_char()?;
625 let next = self.peek_char();
626
627 let kind = match (ch, next) {
628 ('-', Some('>')) => TokenKind::Arrow,
629 ('>', Some('=')) => TokenKind::Gte,
630 ('<', Some('=')) => TokenKind::Lte,
631 ('=', Some('=')) => TokenKind::EqEq,
632 ('!', Some('=')) => TokenKind::BangEq,
633 ('%', Some('%')) => {
634 TokenKind::PercentPercent
636 }
637 _ => return None,
638 };
639
640 self.advance();
641 self.advance();
642 let span = self.make_span(start_byte, start_line, start_col);
643 let text: String = self.source_text[span.start..span.end].to_string();
644 Some(Token { kind, span, text })
645 }
646
647 fn single_char_token(&self, ch: char) -> Option<TokenKind> {
648 match ch {
649 '+' => Some(TokenKind::Plus),
650 '*' => Some(TokenKind::Star),
651 '/' => Some(TokenKind::Slash),
652 '^' => Some(TokenKind::Caret),
653 ':' => Some(TokenKind::Colon),
654 '~' => Some(TokenKind::Tilde),
655 '.' => Some(TokenKind::Dot),
656 '(' => Some(TokenKind::LParen),
657 ')' => Some(TokenKind::RParen),
658 '[' => Some(TokenKind::LBracket),
659 ']' => Some(TokenKind::RBracket),
660 '>' => Some(TokenKind::Gt),
661 '<' => Some(TokenKind::Lt),
662 '%' => Some(TokenKind::Percent),
663 '-' => Some(TokenKind::Minus),
664 _ => None,
665 }
666 }
667
668 fn scan_identifier(&mut self, start_byte: usize, start_line: usize, start_col: usize) -> Token {
669 let mut text = String::new();
670 while let Some(ch) = self.current_char() {
671 if ch.is_ascii_alphanumeric() || ch == '_' {
672 text.push(ch);
673 self.advance();
674 } else {
675 break;
676 }
677 }
678
679 let kind = keyword_from_identifier(&text);
680 let span = self.make_span(start_byte, start_line, start_col);
681 Token { kind, span, text }
682 }
683}
684
685fn keyword_from_identifier(text: &str) -> TokenKind {
686 match text.to_lowercase().as_str() {
687 "spec" => TokenKind::Spec,
688 "fact" => TokenKind::Fact,
689 "rule" => TokenKind::Rule,
690 "unless" => TokenKind::Unless,
691 "then" => TokenKind::Then,
692 "not" => TokenKind::Not,
693 "and" => TokenKind::And,
694 "in" => TokenKind::In,
695 "type" => TokenKind::Type,
696 "from" => TokenKind::From,
697 "with" => TokenKind::With,
698 "meta" => TokenKind::Meta,
699 "veto" => TokenKind::Veto,
700 "now" => TokenKind::Now,
701 "calendar" => TokenKind::Calendar,
702 "past" => TokenKind::Past,
703 "future" => TokenKind::Future,
704 "true" => TokenKind::True,
705 "false" => TokenKind::False,
706 "yes" => TokenKind::Yes,
707 "no" => TokenKind::No,
708 "accept" => TokenKind::Accept,
709 "reject" => TokenKind::Reject,
710 "scale" => TokenKind::ScaleKw,
711 "number" => TokenKind::NumberKw,
712 "text" => TokenKind::TextKw,
713 "date" => TokenKind::DateKw,
714 "time" => TokenKind::TimeKw,
715 "duration" => TokenKind::DurationKw,
716 "boolean" => TokenKind::BooleanKw,
717 "percent" => TokenKind::PercentKw,
718 "ratio" => TokenKind::RatioKw,
719 "sqrt" => TokenKind::Sqrt,
720 "sin" => TokenKind::Sin,
721 "cos" => TokenKind::Cos,
722 "tan" => TokenKind::Tan,
723 "asin" => TokenKind::Asin,
724 "acos" => TokenKind::Acos,
725 "atan" => TokenKind::Atan,
726 "log" => TokenKind::Log,
727 "exp" => TokenKind::Exp,
728 "abs" => TokenKind::Abs,
729 "floor" => TokenKind::Floor,
730 "ceil" => TokenKind::Ceil,
731 "round" => TokenKind::Round,
732 "is" => TokenKind::Is,
733 "years" => TokenKind::Years,
734 "year" => TokenKind::Year,
735 "months" => TokenKind::Months,
736 "month" => TokenKind::Month,
737 "weeks" => TokenKind::Weeks,
738 "week" => TokenKind::Week,
739 "days" => TokenKind::Days,
740 "day" => TokenKind::Day,
741 "hours" => TokenKind::Hours,
742 "hour" => TokenKind::Hour,
743 "minutes" => TokenKind::Minutes,
744 "minute" => TokenKind::Minute,
745 "seconds" => TokenKind::Seconds,
746 "second" => TokenKind::Second,
747 "milliseconds" => TokenKind::Milliseconds,
748 "millisecond" => TokenKind::Millisecond,
749 "microseconds" => TokenKind::Microseconds,
750 "microsecond" => TokenKind::Microsecond,
751 "permille" => TokenKind::Permille,
752 _ => TokenKind::Identifier,
753 }
754}
755
756pub fn is_structural_keyword(kind: &TokenKind) -> bool {
761 matches!(
762 kind,
763 TokenKind::Spec
764 | TokenKind::Fact
765 | TokenKind::Rule
766 | TokenKind::Unless
767 | TokenKind::Then
768 | TokenKind::Not
769 | TokenKind::And
770 | TokenKind::In
771 | TokenKind::Type
772 | TokenKind::From
773 | TokenKind::With
774 | TokenKind::Meta
775 | TokenKind::Veto
776 | TokenKind::Now
777 | TokenKind::Sqrt
778 | TokenKind::Sin
779 | TokenKind::Cos
780 | TokenKind::Tan
781 | TokenKind::Asin
782 | TokenKind::Acos
783 | TokenKind::Atan
784 | TokenKind::Log
785 | TokenKind::Exp
786 | TokenKind::Abs
787 | TokenKind::Floor
788 | TokenKind::Ceil
789 | TokenKind::Round
790 | TokenKind::True
791 | TokenKind::False
792 | TokenKind::Yes
793 | TokenKind::No
794 | TokenKind::Accept
795 | TokenKind::Reject
796 )
797}
798
799pub fn is_type_keyword(kind: &TokenKind) -> bool {
802 matches!(
803 kind,
804 TokenKind::BooleanKw
805 | TokenKind::ScaleKw
806 | TokenKind::NumberKw
807 | TokenKind::PercentKw
808 | TokenKind::RatioKw
809 | TokenKind::TextKw
810 | TokenKind::DateKw
811 | TokenKind::TimeKw
812 | TokenKind::DurationKw
813 )
814}
815
816pub fn is_boolean_keyword(kind: &TokenKind) -> bool {
818 matches!(
819 kind,
820 TokenKind::True
821 | TokenKind::False
822 | TokenKind::Yes
823 | TokenKind::No
824 | TokenKind::Accept
825 | TokenKind::Reject
826 )
827}
828
829pub fn is_duration_unit(kind: &TokenKind) -> bool {
831 matches!(
832 kind,
833 TokenKind::Years
834 | TokenKind::Year
835 | TokenKind::Months
836 | TokenKind::Month
837 | TokenKind::Weeks
838 | TokenKind::Week
839 | TokenKind::Days
840 | TokenKind::Day
841 | TokenKind::Hours
842 | TokenKind::Hour
843 | TokenKind::Minutes
844 | TokenKind::Minute
845 | TokenKind::Seconds
846 | TokenKind::Second
847 | TokenKind::Milliseconds
848 | TokenKind::Millisecond
849 | TokenKind::Microseconds
850 | TokenKind::Microsecond
851 | TokenKind::PercentKw
852 )
853}
854
855pub fn is_math_function(kind: &TokenKind) -> bool {
857 matches!(
858 kind,
859 TokenKind::Sqrt
860 | TokenKind::Sin
861 | TokenKind::Cos
862 | TokenKind::Tan
863 | TokenKind::Asin
864 | TokenKind::Acos
865 | TokenKind::Atan
866 | TokenKind::Log
867 | TokenKind::Exp
868 | TokenKind::Abs
869 | TokenKind::Floor
870 | TokenKind::Ceil
871 | TokenKind::Round
872 )
873}
874
875pub fn is_spec_body_keyword(kind: &TokenKind) -> bool {
878 matches!(
879 kind,
880 TokenKind::Fact | TokenKind::Rule | TokenKind::Type | TokenKind::Meta
881 )
882}
883
884pub fn can_be_label(kind: &TokenKind) -> bool {
889 matches!(
890 kind,
891 TokenKind::Identifier
892 | TokenKind::Calendar
893 | TokenKind::Past
894 | TokenKind::Future
895 | TokenKind::Years
896 | TokenKind::Year
897 | TokenKind::Months
898 | TokenKind::Month
899 | TokenKind::Weeks
900 | TokenKind::Week
901 | TokenKind::Days
902 | TokenKind::Day
903 | TokenKind::Hours
904 | TokenKind::Hour
905 | TokenKind::Minutes
906 | TokenKind::Minute
907 | TokenKind::Seconds
908 | TokenKind::Second
909 | TokenKind::Milliseconds
910 | TokenKind::Millisecond
911 | TokenKind::Microseconds
912 | TokenKind::Microsecond
913 | TokenKind::Permille
914 | TokenKind::Is
915 )
916}
917
918pub fn can_be_reference_segment(kind: &TokenKind) -> bool {
921 can_be_label(kind) || is_type_keyword(kind)
922}
923
924#[cfg(test)]
925mod tests {
926 use super::*;
927
928 fn lex_all(input: &str) -> Result<Vec<Token>, Error> {
929 let mut lexer = Lexer::new(input, "test.lemma");
930 let mut tokens = Vec::new();
931 loop {
932 let token = lexer.next_token()?;
933 if token.kind == TokenKind::Eof {
934 tokens.push(token);
935 break;
936 }
937 tokens.push(token);
938 }
939 Ok(tokens)
940 }
941
942 fn lex_kinds(input: &str) -> Result<Vec<TokenKind>, Error> {
943 Ok(lex_all(input)?.into_iter().map(|t| t.kind).collect())
944 }
945
946 #[test]
947 fn lex_empty_input() {
948 let tokens = lex_all("").unwrap();
949 assert_eq!(tokens.len(), 1);
950 assert_eq!(tokens[0].kind, TokenKind::Eof);
951 }
952
953 #[test]
954 fn lex_spec_declaration() {
955 let kinds = lex_kinds("spec person").unwrap();
956 assert_eq!(
957 kinds,
958 vec![TokenKind::Spec, TokenKind::Identifier, TokenKind::Eof]
959 );
960 }
961
962 #[test]
963 fn lex_fact_definition() {
964 let kinds = lex_kinds("fact age: 25").unwrap();
965 assert_eq!(
966 kinds,
967 vec![
968 TokenKind::Fact,
969 TokenKind::Identifier,
970 TokenKind::Colon,
971 TokenKind::NumberLit,
972 TokenKind::Eof,
973 ]
974 );
975 }
976
977 #[test]
978 fn lex_rule_with_comparison() {
979 let kinds = lex_kinds("rule is_adult: age >= 18").unwrap();
980 assert_eq!(
981 kinds,
982 vec![
983 TokenKind::Rule,
984 TokenKind::Identifier,
985 TokenKind::Colon,
986 TokenKind::Identifier,
987 TokenKind::Gte,
988 TokenKind::NumberLit,
989 TokenKind::Eof,
990 ]
991 );
992 }
993
994 #[test]
995 fn lex_string_literal() {
996 let tokens = lex_all(r#""hello world""#).unwrap();
997 assert_eq!(tokens[0].kind, TokenKind::StringLit);
998 assert_eq!(tokens[0].text, "\"hello world\"");
999 }
1000
1001 #[test]
1002 fn lex_unterminated_string() {
1003 let result = lex_all(r#""hello"#);
1004 assert!(result.is_err());
1005 }
1006
1007 #[test]
1008 fn lex_number_with_decimal() {
1009 let tokens = lex_all("3.14").unwrap();
1010 assert_eq!(tokens[0].kind, TokenKind::NumberLit);
1011 assert_eq!(tokens[0].text, "3.14");
1012 }
1013
1014 #[test]
1015 fn lex_number_with_underscores() {
1016 let tokens = lex_all("1_000_000").unwrap();
1017 assert_eq!(tokens[0].kind, TokenKind::NumberLit);
1018 assert_eq!(tokens[0].text, "1_000_000");
1019 }
1020
1021 #[test]
1022 fn lex_scientific_notation() {
1023 let tokens = lex_all("1.5e+10").unwrap();
1024 assert_eq!(tokens[0].kind, TokenKind::NumberLit);
1025 assert_eq!(tokens[0].text, "1.5e+10");
1026 }
1027
1028 #[test]
1029 fn lex_all_operators() {
1030 let kinds = lex_kinds("+ - * / % ^ > < >= <= == != -> %%").unwrap();
1031 assert_eq!(
1032 &kinds[..14],
1033 &[
1034 TokenKind::Plus,
1035 TokenKind::Minus,
1036 TokenKind::Star,
1037 TokenKind::Slash,
1038 TokenKind::Percent,
1039 TokenKind::Caret,
1040 TokenKind::Gt,
1041 TokenKind::Lt,
1042 TokenKind::Gte,
1043 TokenKind::Lte,
1044 TokenKind::EqEq,
1045 TokenKind::BangEq,
1046 TokenKind::Arrow,
1047 TokenKind::PercentPercent,
1048 ]
1049 );
1050 }
1051
1052 #[test]
1053 fn lex_keywords() {
1054 let kinds = lex_kinds("spec fact rule unless then not and in type from with meta veto now")
1055 .unwrap();
1056 assert_eq!(
1057 &kinds[..14],
1058 &[
1059 TokenKind::Spec,
1060 TokenKind::Fact,
1061 TokenKind::Rule,
1062 TokenKind::Unless,
1063 TokenKind::Then,
1064 TokenKind::Not,
1065 TokenKind::And,
1066 TokenKind::In,
1067 TokenKind::Type,
1068 TokenKind::From,
1069 TokenKind::With,
1070 TokenKind::Meta,
1071 TokenKind::Veto,
1072 TokenKind::Now,
1073 ]
1074 );
1075 }
1076
1077 #[test]
1078 fn lex_boolean_keywords() {
1079 let kinds = lex_kinds("true false yes no accept reject").unwrap();
1080 assert_eq!(
1081 &kinds[..6],
1082 &[
1083 TokenKind::True,
1084 TokenKind::False,
1085 TokenKind::Yes,
1086 TokenKind::No,
1087 TokenKind::Accept,
1088 TokenKind::Reject,
1089 ]
1090 );
1091 }
1092
1093 #[test]
1094 fn lex_duration_keywords() {
1095 let kinds = lex_kinds("years months weeks days hours minutes seconds").unwrap();
1096 assert_eq!(
1097 &kinds[..7],
1098 &[
1099 TokenKind::Years,
1100 TokenKind::Months,
1101 TokenKind::Weeks,
1102 TokenKind::Days,
1103 TokenKind::Hours,
1104 TokenKind::Minutes,
1105 TokenKind::Seconds,
1106 ]
1107 );
1108 }
1109
1110 #[test]
1111 fn lex_commentary() {
1112 let tokens = lex_all(r#""""hello world""""#).unwrap();
1113 assert_eq!(tokens[0].kind, TokenKind::Commentary);
1114 assert_eq!(tokens[0].text, "hello world");
1115 }
1116
1117 #[test]
1118 fn lex_at_sign() {
1119 let kinds = lex_kinds("@user").unwrap();
1120 assert_eq!(kinds[0], TokenKind::At);
1121 assert_eq!(kinds[1], TokenKind::Identifier);
1122 }
1123
1124 #[test]
1125 fn lex_tilde() {
1126 let kinds = lex_kinds("~").unwrap();
1127 assert_eq!(kinds[0], TokenKind::Tilde);
1128 }
1129
1130 #[test]
1131 fn lex_brackets() {
1132 let kinds = lex_kinds("[number]").unwrap();
1133 assert_eq!(
1134 &kinds[..3],
1135 &[
1136 TokenKind::LBracket,
1137 TokenKind::NumberKw,
1138 TokenKind::RBracket
1139 ]
1140 );
1141 }
1142
1143 #[test]
1144 fn lex_parentheses() {
1145 let kinds = lex_kinds("(x + 1)").unwrap();
1146 assert_eq!(
1147 &kinds[..5],
1148 &[
1149 TokenKind::LParen,
1150 TokenKind::Identifier,
1151 TokenKind::Plus,
1152 TokenKind::NumberLit,
1153 TokenKind::RParen,
1154 ]
1155 );
1156 }
1157
1158 #[test]
1159 fn lex_dot_for_references() {
1160 let kinds = lex_kinds("employee.salary").unwrap();
1161 assert_eq!(
1162 &kinds[..3],
1163 &[TokenKind::Identifier, TokenKind::Dot, TokenKind::Identifier]
1164 );
1165 }
1166
1167 #[test]
1168 fn lex_spec_name_with_slashes() {
1169 let tokens = lex_all("spec contracts/employment/jack").unwrap();
1170 assert_eq!(tokens[0].kind, TokenKind::Spec);
1171 assert_eq!(tokens[1].kind, TokenKind::Identifier);
1174 }
1175
1176 #[test]
1177 fn lex_number_not_followed_by_e_identifier() {
1178 let tokens = lex_all("42 eur").unwrap();
1180 assert_eq!(tokens[0].kind, TokenKind::NumberLit);
1181 assert_eq!(tokens[0].text, "42");
1182 assert_eq!(tokens[1].kind, TokenKind::Identifier);
1183 assert_eq!(tokens[1].text, "eur");
1184 }
1185
1186 #[test]
1187 fn lex_unknown_character() {
1188 let result = lex_all("ยง");
1189 assert!(result.is_err());
1190 }
1191
1192 #[test]
1193 fn lex_peek_does_not_consume() {
1194 let mut lexer = Lexer::new("spec test", "test.lemma");
1195 let peeked_kind = lexer.peek().unwrap().kind.clone();
1196 assert_eq!(peeked_kind, TokenKind::Spec);
1197 let next = lexer.next_token().unwrap();
1198 assert_eq!(next.kind, TokenKind::Spec);
1199 }
1200
1201 #[test]
1202 fn lex_span_byte_offsets() {
1203 let tokens = lex_all("spec test").unwrap();
1204 assert_eq!(tokens[0].span.start, 0);
1205 assert_eq!(tokens[0].span.end, 4);
1206 assert_eq!(tokens[0].span.line, 1);
1207 assert_eq!(tokens[0].span.col, 1);
1208
1209 assert_eq!(tokens[1].span.start, 5);
1210 assert_eq!(tokens[1].span.end, 9);
1211 assert_eq!(tokens[1].span.line, 1);
1212 assert_eq!(tokens[1].span.col, 6);
1213 }
1214
1215 #[test]
1216 fn lex_multiline_span_tracking() {
1217 let tokens = lex_all("spec test\nfact x: 1").unwrap();
1218 let fact_token = &tokens[2]; assert_eq!(fact_token.kind, TokenKind::Fact);
1221 assert_eq!(fact_token.span.line, 2);
1222 assert_eq!(fact_token.span.col, 1);
1223 }
1224
1225 #[test]
1226 fn lex_case_insensitive_keywords() {
1227 let kinds = lex_kinds("SPEC Fact RULE").unwrap();
1229 assert_eq!(kinds[0], TokenKind::Spec);
1230 assert_eq!(kinds[1], TokenKind::Fact);
1231 assert_eq!(kinds[2], TokenKind::Rule);
1232 }
1233
1234 #[test]
1235 fn lex_math_function_keywords() {
1236 let kinds =
1237 lex_kinds("sqrt sin cos tan asin acos atan log exp abs floor ceil round").unwrap();
1238 assert_eq!(
1239 &kinds[..13],
1240 &[
1241 TokenKind::Sqrt,
1242 TokenKind::Sin,
1243 TokenKind::Cos,
1244 TokenKind::Tan,
1245 TokenKind::Asin,
1246 TokenKind::Acos,
1247 TokenKind::Atan,
1248 TokenKind::Log,
1249 TokenKind::Exp,
1250 TokenKind::Abs,
1251 TokenKind::Floor,
1252 TokenKind::Ceil,
1253 TokenKind::Round,
1254 ]
1255 );
1256 }
1257
1258 #[test]
1259 fn lex_is_keyword() {
1260 let kinds = lex_kinds("status is \"active\"").unwrap();
1261 assert_eq!(kinds[0], TokenKind::Identifier);
1262 assert_eq!(kinds[1], TokenKind::Is);
1263 assert_eq!(kinds[2], TokenKind::StringLit);
1264 }
1265
1266 #[test]
1267 fn lex_percent_not_followed_by_digit() {
1268 let kinds = lex_kinds("50%").unwrap();
1270 assert_eq!(kinds[0], TokenKind::NumberLit);
1271 assert_eq!(kinds[1], TokenKind::Percent);
1272 }
1273
1274 #[test]
1275 fn lex_number_with_commas() {
1276 let tokens = lex_all("1,000,000").unwrap();
1277 assert_eq!(tokens[0].kind, TokenKind::NumberLit);
1278 assert_eq!(tokens[0].text, "1,000,000");
1279 }
1280
1281 #[test]
1282 fn lex_arrow_chain() {
1283 let kinds = lex_kinds("-> unit eur 1.00 -> decimals 2").unwrap();
1284 assert_eq!(kinds[0], TokenKind::Arrow);
1285 assert_eq!(kinds[1], TokenKind::Identifier);
1286 assert_eq!(kinds[2], TokenKind::Identifier);
1287 assert_eq!(kinds[3], TokenKind::NumberLit);
1288 assert_eq!(kinds[4], TokenKind::Arrow);
1289 }
1290}