1use crate::error::Error;
2use crate::parsing::ast::{
3 BooleanValue, CalendarUnit, ConversionTarget, DurationUnit, PrimitiveKind, Span,
4};
5use crate::parsing::source::Source;
6use std::sync::Arc;
7
8#[derive(Debug, Clone, PartialEq, Eq)]
9pub enum TokenKind {
10 Spec,
12 Fact,
13 Rule,
14 Unless,
15 Then,
16 Not,
17 And,
18 In,
19 Type,
20 From,
21 With,
22 Meta,
23 Veto,
24 Now,
25 Calendar,
26 Past,
27 Future,
28
29 True,
31 False,
32 Yes,
33 No,
34 Accept,
35 Reject,
36
37 ScaleKw,
39 NumberKw,
40 TextKw,
41 DateKw,
42 TimeKw,
43 DurationKw,
44 BooleanKw,
45 PercentKw,
46 RatioKw,
47
48 Sqrt,
50 Sin,
51 Cos,
52 Tan,
53 Asin,
54 Acos,
55 Atan,
56 Log,
57 Exp,
58 Abs,
59 Floor,
60 Ceil,
61 Round,
62
63 Years,
65 Year,
66 Months,
67 Month,
68 Weeks,
69 Week,
70 Days,
71 Day,
72 Hours,
73 Hour,
74 Minutes,
75 Minute,
76 Seconds,
77 Second,
78 Milliseconds,
79 Millisecond,
80 Microseconds,
81 Microsecond,
82 Permille,
83
84 Is,
86
87 Plus,
89 Minus,
90 Star,
91 Slash,
92 Percent,
93 PercentPercent,
94 Caret,
95 Gt,
96 Lt,
97 Gte,
98 Lte,
99 EqEq,
100 BangEq,
101
102 Colon,
104 Arrow,
105 Tilde,
106 Dot,
107 At,
108 LParen,
109 RParen,
110 LBracket,
111 RBracket,
112
113 NumberLit,
115 StringLit,
116
117 Commentary,
119
120 Identifier,
122
123 Eof,
125}
126
127impl std::fmt::Display for TokenKind {
128 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
129 match self {
130 TokenKind::Spec => write!(f, "'spec'"),
131 TokenKind::Fact => write!(f, "'fact'"),
132 TokenKind::Rule => write!(f, "'rule'"),
133 TokenKind::Unless => write!(f, "'unless'"),
134 TokenKind::Then => write!(f, "'then'"),
135 TokenKind::Not => write!(f, "'not'"),
136 TokenKind::And => write!(f, "'and'"),
137 TokenKind::In => write!(f, "'in'"),
138 TokenKind::Type => write!(f, "'type'"),
139 TokenKind::From => write!(f, "'from'"),
140 TokenKind::With => write!(f, "'with'"),
141 TokenKind::Meta => write!(f, "'meta'"),
142 TokenKind::Veto => write!(f, "'veto'"),
143 TokenKind::Now => write!(f, "'now'"),
144 TokenKind::Calendar => write!(f, "'calendar'"),
145 TokenKind::Past => write!(f, "'past'"),
146 TokenKind::Future => write!(f, "'future'"),
147 TokenKind::True => write!(f, "'true'"),
148 TokenKind::False => write!(f, "'false'"),
149 TokenKind::Yes => write!(f, "'yes'"),
150 TokenKind::No => write!(f, "'no'"),
151 TokenKind::Accept => write!(f, "'accept'"),
152 TokenKind::Reject => write!(f, "'reject'"),
153 TokenKind::ScaleKw => write!(f, "'scale'"),
154 TokenKind::NumberKw => write!(f, "'number'"),
155 TokenKind::TextKw => write!(f, "'text'"),
156 TokenKind::DateKw => write!(f, "'date'"),
157 TokenKind::TimeKw => write!(f, "'time'"),
158 TokenKind::DurationKw => write!(f, "'duration'"),
159 TokenKind::BooleanKw => write!(f, "'boolean'"),
160 TokenKind::PercentKw => write!(f, "'percent'"),
161 TokenKind::RatioKw => write!(f, "'ratio'"),
162 TokenKind::Sqrt => write!(f, "'sqrt'"),
163 TokenKind::Sin => write!(f, "'sin'"),
164 TokenKind::Cos => write!(f, "'cos'"),
165 TokenKind::Tan => write!(f, "'tan'"),
166 TokenKind::Asin => write!(f, "'asin'"),
167 TokenKind::Acos => write!(f, "'acos'"),
168 TokenKind::Atan => write!(f, "'atan'"),
169 TokenKind::Log => write!(f, "'log'"),
170 TokenKind::Exp => write!(f, "'exp'"),
171 TokenKind::Abs => write!(f, "'abs'"),
172 TokenKind::Floor => write!(f, "'floor'"),
173 TokenKind::Ceil => write!(f, "'ceil'"),
174 TokenKind::Round => write!(f, "'round'"),
175 TokenKind::Years => write!(f, "'years'"),
176 TokenKind::Year => write!(f, "'year'"),
177 TokenKind::Months => write!(f, "'months'"),
178 TokenKind::Month => write!(f, "'month'"),
179 TokenKind::Weeks => write!(f, "'weeks'"),
180 TokenKind::Week => write!(f, "'week'"),
181 TokenKind::Days => write!(f, "'days'"),
182 TokenKind::Day => write!(f, "'day'"),
183 TokenKind::Hours => write!(f, "'hours'"),
184 TokenKind::Hour => write!(f, "'hour'"),
185 TokenKind::Minutes => write!(f, "'minutes'"),
186 TokenKind::Minute => write!(f, "'minute'"),
187 TokenKind::Seconds => write!(f, "'seconds'"),
188 TokenKind::Second => write!(f, "'second'"),
189 TokenKind::Milliseconds => write!(f, "'milliseconds'"),
190 TokenKind::Millisecond => write!(f, "'millisecond'"),
191 TokenKind::Microseconds => write!(f, "'microseconds'"),
192 TokenKind::Microsecond => write!(f, "'microsecond'"),
193 TokenKind::Permille => write!(f, "'permille'"),
194 TokenKind::Is => write!(f, "'is'"),
195 TokenKind::Plus => write!(f, "'+'"),
196 TokenKind::Minus => write!(f, "'-'"),
197 TokenKind::Star => write!(f, "'*'"),
198 TokenKind::Slash => write!(f, "'/'"),
199 TokenKind::Percent => write!(f, "'%'"),
200 TokenKind::PercentPercent => write!(f, "'%%'"),
201 TokenKind::Caret => write!(f, "'^'"),
202 TokenKind::Gt => write!(f, "'>'"),
203 TokenKind::Lt => write!(f, "'<'"),
204 TokenKind::Gte => write!(f, "'>='"),
205 TokenKind::Lte => write!(f, "'<='"),
206 TokenKind::EqEq => write!(f, "'=='"),
207 TokenKind::BangEq => write!(f, "'!='"),
208 TokenKind::Colon => write!(f, "':'"),
209 TokenKind::Arrow => write!(f, "'->'"),
210 TokenKind::Tilde => write!(f, "'~'"),
211 TokenKind::Dot => write!(f, "'.'"),
212 TokenKind::At => write!(f, "'@'"),
213 TokenKind::LParen => write!(f, "'('"),
214 TokenKind::RParen => write!(f, "')'"),
215 TokenKind::LBracket => write!(f, "'['"),
216 TokenKind::RBracket => write!(f, "']'"),
217 TokenKind::NumberLit => write!(f, "a number"),
218 TokenKind::StringLit => write!(f, "a string"),
219 TokenKind::Commentary => write!(f, "commentary block"),
220 TokenKind::Identifier => write!(f, "an identifier"),
221 TokenKind::Eof => write!(f, "end of file"),
222 }
223 }
224}
225
226#[derive(Debug, Clone)]
227pub struct Token {
228 pub kind: TokenKind,
229 pub span: Span,
230 pub text: String,
231}
232
233impl Token {
234 pub fn eof(offset: usize, line: usize, col: usize) -> Self {
235 Token {
236 kind: TokenKind::Eof,
237 span: Span {
238 start: offset,
239 end: offset,
240 line,
241 col,
242 },
243 text: String::new(),
244 }
245 }
246}
247
248pub struct Lexer {
249 source: Vec<char>,
250 pos: usize,
251 line: usize,
252 col: usize,
253 byte_offset: usize,
254 attribute: String,
255 source_text: Arc<str>,
256 peeked: Option<Token>,
257 peeked2: Option<Token>,
258}
259
260impl Lexer {
261 pub fn new(input: &str, attribute: &str) -> Self {
262 let source_text: Arc<str> = Arc::from(input);
263 Lexer {
264 source: input.chars().collect(),
265 pos: 0,
266 line: 1,
267 col: 1,
268 byte_offset: 0,
269 attribute: attribute.to_string(),
270 source_text,
271 peeked: None,
272 peeked2: None,
273 }
274 }
275
276 pub fn source_text(&self) -> Arc<str> {
277 self.source_text.clone()
278 }
279
280 pub fn attribute(&self) -> &str {
281 &self.attribute
282 }
283
284 pub fn peek(&mut self) -> Result<&Token, Error> {
285 if self.peeked.is_none() {
286 let token = self.lex_token()?;
287 self.peeked = Some(token);
288 }
289 Ok(self.peeked.as_ref().expect("just assigned"))
290 }
291
292 pub fn peek_second(&mut self) -> Result<&Token, Error> {
293 self.peek()?;
294 if self.peeked2.is_none() {
295 let token = self.lex_token()?;
296 self.peeked2 = Some(token);
297 }
298 Ok(self.peeked2.as_ref().expect("just assigned"))
299 }
300
301 pub fn current_span(&self) -> Span {
303 Span {
304 start: self.byte_offset,
305 end: self.byte_offset,
306 line: self.line,
307 col: self.col,
308 }
309 }
310
311 pub fn scan_raw_alphanumeric(&mut self) -> Result<String, Error> {
316 self.peeked = None;
317 self.peeked2 = None;
318 self.skip_whitespace();
319 let mut result = String::new();
320 while let Some(ch) = self.current_char() {
321 if ch.is_ascii_alphanumeric() {
322 result.push(ch);
323 self.advance();
324 } else {
325 break;
326 }
327 }
328 Ok(result)
329 }
330
331 pub fn next_token(&mut self) -> Result<Token, Error> {
332 if let Some(token) = self.peeked.take() {
333 self.peeked = self.peeked2.take();
334 return Ok(token);
335 }
336 self.lex_token()
337 }
338
339 fn current_char(&self) -> Option<char> {
340 self.source.get(self.pos).copied()
341 }
342
343 fn peek_char(&self) -> Option<char> {
344 self.source.get(self.pos + 1).copied()
345 }
346
347 fn peek_char_at(&self, offset: usize) -> Option<char> {
348 self.source.get(self.pos + offset).copied()
349 }
350
351 fn advance(&mut self) {
352 if let Some(ch) = self.current_char() {
353 self.byte_offset += ch.len_utf8();
354 if ch == '\n' {
355 self.line += 1;
356 self.col = 1;
357 } else {
358 self.col += 1;
359 }
360 self.pos += 1;
361 }
362 }
363
364 fn skip_whitespace(&mut self) {
365 while let Some(ch) = self.current_char() {
366 if ch.is_whitespace() {
367 self.advance();
368 } else {
369 break;
370 }
371 }
372 }
373
374 fn make_span(&self, start_byte: usize, start_line: usize, start_col: usize) -> Span {
375 Span {
376 start: start_byte,
377 end: self.byte_offset,
378 line: start_line,
379 col: start_col,
380 }
381 }
382
383 fn make_error(&self, message: impl Into<String>, span: Span) -> Error {
384 Error::parsing(message, Source::new(&self.attribute, span), None::<String>)
385 }
386
387 fn lex_token(&mut self) -> Result<Token, Error> {
388 self.skip_whitespace();
389
390 let start_byte = self.byte_offset;
391 let start_line = self.line;
392 let start_col = self.col;
393
394 let Some(ch) = self.current_char() else {
395 return Ok(Token::eof(start_byte, start_line, start_col));
396 };
397
398 if ch == '"' && self.peek_char() == Some('"') && self.peek_char_at(2) == Some('"') {
400 return self.scan_triple_quote(start_byte, start_line, start_col);
401 }
402
403 if ch == '"' {
405 return self.scan_string(start_byte, start_line, start_col);
406 }
407
408 if ch.is_ascii_digit() {
410 return self.scan_number(start_byte, start_line, start_col);
411 }
412
413 if let Some(token) = self.try_two_char_operator(start_byte, start_line, start_col) {
415 return Ok(token);
416 }
417
418 if let Some(kind) = self.single_char_token(ch) {
420 self.advance();
421 let span = self.make_span(start_byte, start_line, start_col);
422 let text = ch.to_string();
423 return Ok(Token { kind, span, text });
424 }
425
426 if ch.is_ascii_alphabetic() || ch == '_' {
428 return Ok(self.scan_identifier(start_byte, start_line, start_col));
429 }
430
431 if ch == '@' {
433 self.advance();
434 let span = self.make_span(start_byte, start_line, start_col);
435 return Ok(Token {
436 kind: TokenKind::At,
437 span,
438 text: "@".to_string(),
439 });
440 }
441
442 self.advance();
444 let span = self.make_span(start_byte, start_line, start_col);
445 Err(self.make_error(format!("Unexpected character '{}'", ch), span))
446 }
447
448 fn scan_triple_quote(
449 &mut self,
450 start_byte: usize,
451 start_line: usize,
452 start_col: usize,
453 ) -> Result<Token, Error> {
454 self.advance(); self.advance(); self.advance(); let content_start = self.byte_offset;
459 loop {
460 match self.current_char() {
461 None => {
462 let span = self.make_span(start_byte, start_line, start_col);
463 return Err(self.make_error(
464 "Unterminated commentary block: expected closing \"\"\"",
465 span,
466 ));
467 }
468 Some('"')
469 if self.source.get(self.pos + 1) == Some(&'"')
470 && self.source.get(self.pos + 2) == Some(&'"') =>
471 {
472 let content_end = self.byte_offset;
473 self.advance(); self.advance(); self.advance(); let raw: String = self.source_text[content_start..content_end].to_string();
477 let span = self.make_span(start_byte, start_line, start_col);
478 return Ok(Token {
479 kind: TokenKind::Commentary,
480 span,
481 text: raw,
482 });
483 }
484 Some(_) => {
485 self.advance();
486 }
487 }
488 }
489 }
490
491 fn scan_string(
492 &mut self,
493 start_byte: usize,
494 start_line: usize,
495 start_col: usize,
496 ) -> Result<Token, Error> {
497 self.advance(); let mut content = String::new();
499 loop {
500 match self.current_char() {
501 None => {
502 let span = self.make_span(start_byte, start_line, start_col);
503 return Err(self.make_error("String starting here was never closed", span));
504 }
505 Some('"') => {
506 self.advance(); break;
508 }
509 Some(ch) => {
510 content.push(ch);
511 self.advance();
512 }
513 }
514 }
515 let span = self.make_span(start_byte, start_line, start_col);
516 let full_text = format!("\"{}\"", content);
519 Ok(Token {
520 kind: TokenKind::StringLit,
521 span,
522 text: full_text,
523 })
524 }
525
526 fn scan_number(
527 &mut self,
528 start_byte: usize,
529 start_line: usize,
530 start_col: usize,
531 ) -> Result<Token, Error> {
532 let mut text = String::new();
533
534 while let Some(ch) = self.current_char() {
536 if ch.is_ascii_digit() || ch == '_' || ch == ',' {
537 text.push(ch);
538 self.advance();
539 } else {
540 break;
541 }
542 }
543
544 if self.current_char() == Some('.') {
546 if let Some(next) = self.peek_char() {
548 if next.is_ascii_digit() {
549 text.push('.');
550 self.advance(); while let Some(ch) = self.current_char() {
552 if ch.is_ascii_digit() {
553 text.push(ch);
554 self.advance();
555 } else {
556 break;
557 }
558 }
559 }
560 }
561 }
562
563 if let Some(ch) = self.current_char() {
565 if ch == 'e' || ch == 'E' {
566 let mut sci_text = String::new();
567 sci_text.push(ch);
568 let save_pos = self.pos;
569 let save_byte = self.byte_offset;
570 let save_line = self.line;
571 let save_col = self.col;
572 self.advance(); if let Some(sign) = self.current_char() {
575 if sign == '+' || sign == '-' {
576 sci_text.push(sign);
577 self.advance();
578 }
579 }
580
581 if let Some(d) = self.current_char() {
582 if d.is_ascii_digit() {
583 while let Some(ch) = self.current_char() {
584 if ch.is_ascii_digit() {
585 sci_text.push(ch);
586 self.advance();
587 } else {
588 break;
589 }
590 }
591 text.push_str(&sci_text);
592 } else {
593 self.pos = save_pos;
595 self.byte_offset = save_byte;
596 self.line = save_line;
597 self.col = save_col;
598 }
599 } else {
600 self.pos = save_pos;
601 self.byte_offset = save_byte;
602 self.line = save_line;
603 self.col = save_col;
604 }
605 }
606 }
607
608 let span = self.make_span(start_byte, start_line, start_col);
609 Ok(Token {
610 kind: TokenKind::NumberLit,
611 span,
612 text,
613 })
614 }
615
616 fn try_two_char_operator(
617 &mut self,
618 start_byte: usize,
619 start_line: usize,
620 start_col: usize,
621 ) -> Option<Token> {
622 let ch = self.current_char()?;
623 let next = self.peek_char();
624
625 let kind = match (ch, next) {
626 ('-', Some('>')) => TokenKind::Arrow,
627 ('>', Some('=')) => TokenKind::Gte,
628 ('<', Some('=')) => TokenKind::Lte,
629 ('=', Some('=')) => TokenKind::EqEq,
630 ('!', Some('=')) => TokenKind::BangEq,
631 ('%', Some('%')) => {
632 TokenKind::PercentPercent
634 }
635 _ => return None,
636 };
637
638 self.advance();
639 self.advance();
640 let span = self.make_span(start_byte, start_line, start_col);
641 let text: String = self.source_text[span.start..span.end].to_string();
642 Some(Token { kind, span, text })
643 }
644
645 fn single_char_token(&self, ch: char) -> Option<TokenKind> {
646 match ch {
647 '+' => Some(TokenKind::Plus),
648 '*' => Some(TokenKind::Star),
649 '/' => Some(TokenKind::Slash),
650 '^' => Some(TokenKind::Caret),
651 ':' => Some(TokenKind::Colon),
652 '~' => Some(TokenKind::Tilde),
653 '.' => Some(TokenKind::Dot),
654 '(' => Some(TokenKind::LParen),
655 ')' => Some(TokenKind::RParen),
656 '[' => Some(TokenKind::LBracket),
657 ']' => Some(TokenKind::RBracket),
658 '>' => Some(TokenKind::Gt),
659 '<' => Some(TokenKind::Lt),
660 '%' => Some(TokenKind::Percent),
661 '-' => Some(TokenKind::Minus),
662 _ => None,
663 }
664 }
665
666 fn scan_identifier(&mut self, start_byte: usize, start_line: usize, start_col: usize) -> Token {
667 let mut text = String::new();
668 while let Some(ch) = self.current_char() {
669 if ch.is_ascii_alphanumeric() || ch == '_' {
670 text.push(ch);
671 self.advance();
672 } else {
673 break;
674 }
675 }
676
677 let kind = keyword_from_identifier(&text);
678 let span = self.make_span(start_byte, start_line, start_col);
679 Token { kind, span, text }
680 }
681}
682
683fn keyword_from_identifier(text: &str) -> TokenKind {
684 match text.to_lowercase().as_str() {
685 "spec" => TokenKind::Spec,
686 "fact" => TokenKind::Fact,
687 "rule" => TokenKind::Rule,
688 "unless" => TokenKind::Unless,
689 "then" => TokenKind::Then,
690 "not" => TokenKind::Not,
691 "and" => TokenKind::And,
692 "in" => TokenKind::In,
693 "type" => TokenKind::Type,
694 "from" => TokenKind::From,
695 "with" => TokenKind::With,
696 "meta" => TokenKind::Meta,
697 "veto" => TokenKind::Veto,
698 "now" => TokenKind::Now,
699 "calendar" => TokenKind::Calendar,
700 "past" => TokenKind::Past,
701 "future" => TokenKind::Future,
702 "true" => TokenKind::True,
703 "false" => TokenKind::False,
704 "yes" => TokenKind::Yes,
705 "no" => TokenKind::No,
706 "accept" => TokenKind::Accept,
707 "reject" => TokenKind::Reject,
708 "scale" => TokenKind::ScaleKw,
709 "number" => TokenKind::NumberKw,
710 "text" => TokenKind::TextKw,
711 "date" => TokenKind::DateKw,
712 "time" => TokenKind::TimeKw,
713 "duration" => TokenKind::DurationKw,
714 "boolean" => TokenKind::BooleanKw,
715 "percent" => TokenKind::PercentKw,
716 "ratio" => TokenKind::RatioKw,
717 "sqrt" => TokenKind::Sqrt,
718 "sin" => TokenKind::Sin,
719 "cos" => TokenKind::Cos,
720 "tan" => TokenKind::Tan,
721 "asin" => TokenKind::Asin,
722 "acos" => TokenKind::Acos,
723 "atan" => TokenKind::Atan,
724 "log" => TokenKind::Log,
725 "exp" => TokenKind::Exp,
726 "abs" => TokenKind::Abs,
727 "floor" => TokenKind::Floor,
728 "ceil" => TokenKind::Ceil,
729 "round" => TokenKind::Round,
730 "is" => TokenKind::Is,
731 "years" => TokenKind::Years,
732 "year" => TokenKind::Year,
733 "months" => TokenKind::Months,
734 "month" => TokenKind::Month,
735 "weeks" => TokenKind::Weeks,
736 "week" => TokenKind::Week,
737 "days" => TokenKind::Days,
738 "day" => TokenKind::Day,
739 "hours" => TokenKind::Hours,
740 "hour" => TokenKind::Hour,
741 "minutes" => TokenKind::Minutes,
742 "minute" => TokenKind::Minute,
743 "seconds" => TokenKind::Seconds,
744 "second" => TokenKind::Second,
745 "milliseconds" => TokenKind::Milliseconds,
746 "millisecond" => TokenKind::Millisecond,
747 "microseconds" => TokenKind::Microseconds,
748 "microsecond" => TokenKind::Microsecond,
749 "permille" => TokenKind::Permille,
750 _ => TokenKind::Identifier,
751 }
752}
753
754pub fn is_structural_keyword(kind: &TokenKind) -> bool {
759 matches!(
760 kind,
761 TokenKind::Spec
762 | TokenKind::Fact
763 | TokenKind::Rule
764 | TokenKind::Unless
765 | TokenKind::Then
766 | TokenKind::Not
767 | TokenKind::And
768 | TokenKind::In
769 | TokenKind::Type
770 | TokenKind::From
771 | TokenKind::With
772 | TokenKind::Meta
773 | TokenKind::Veto
774 | TokenKind::Now
775 | TokenKind::Sqrt
776 | TokenKind::Sin
777 | TokenKind::Cos
778 | TokenKind::Tan
779 | TokenKind::Asin
780 | TokenKind::Acos
781 | TokenKind::Atan
782 | TokenKind::Log
783 | TokenKind::Exp
784 | TokenKind::Abs
785 | TokenKind::Floor
786 | TokenKind::Ceil
787 | TokenKind::Round
788 | TokenKind::True
789 | TokenKind::False
790 | TokenKind::Yes
791 | TokenKind::No
792 | TokenKind::Accept
793 | TokenKind::Reject
794 )
795}
796
797pub fn is_type_keyword(kind: &TokenKind) -> bool {
800 token_kind_to_primitive(kind).is_some()
801}
802
803#[must_use]
805pub fn token_kind_to_primitive(kind: &TokenKind) -> Option<PrimitiveKind> {
806 match kind {
807 TokenKind::BooleanKw => Some(PrimitiveKind::Boolean),
808 TokenKind::ScaleKw => Some(PrimitiveKind::Scale),
809 TokenKind::NumberKw => Some(PrimitiveKind::Number),
810 TokenKind::PercentKw => Some(PrimitiveKind::Percent),
811 TokenKind::RatioKw => Some(PrimitiveKind::Ratio),
812 TokenKind::TextKw => Some(PrimitiveKind::Text),
813 TokenKind::DateKw => Some(PrimitiveKind::Date),
814 TokenKind::TimeKw => Some(PrimitiveKind::Time),
815 TokenKind::DurationKw => Some(PrimitiveKind::Duration),
816 _ => None,
817 }
818}
819
820pub fn is_boolean_keyword(kind: &TokenKind) -> bool {
822 matches!(
823 kind,
824 TokenKind::True
825 | TokenKind::False
826 | TokenKind::Yes
827 | TokenKind::No
828 | TokenKind::Accept
829 | TokenKind::Reject
830 )
831}
832
833pub fn is_duration_unit(kind: &TokenKind) -> bool {
835 matches!(
836 kind,
837 TokenKind::Years
838 | TokenKind::Year
839 | TokenKind::Months
840 | TokenKind::Month
841 | TokenKind::Weeks
842 | TokenKind::Week
843 | TokenKind::Days
844 | TokenKind::Day
845 | TokenKind::Hours
846 | TokenKind::Hour
847 | TokenKind::Minutes
848 | TokenKind::Minute
849 | TokenKind::Seconds
850 | TokenKind::Second
851 | TokenKind::Milliseconds
852 | TokenKind::Millisecond
853 | TokenKind::Microseconds
854 | TokenKind::Microsecond
855 | TokenKind::PercentKw
856 )
857}
858
859#[must_use]
861pub fn token_kind_to_duration_unit(kind: &TokenKind) -> DurationUnit {
862 match kind {
863 TokenKind::Years | TokenKind::Year => DurationUnit::Year,
864 TokenKind::Months | TokenKind::Month => DurationUnit::Month,
865 TokenKind::Weeks | TokenKind::Week => DurationUnit::Week,
866 TokenKind::Days | TokenKind::Day => DurationUnit::Day,
867 TokenKind::Hours | TokenKind::Hour => DurationUnit::Hour,
868 TokenKind::Minutes | TokenKind::Minute => DurationUnit::Minute,
869 TokenKind::Seconds | TokenKind::Second => DurationUnit::Second,
870 TokenKind::Milliseconds | TokenKind::Millisecond => DurationUnit::Millisecond,
871 TokenKind::Microseconds | TokenKind::Microsecond => DurationUnit::Microsecond,
872 _ => unreachable!(
873 "BUG: token_kind_to_duration_unit called with non-duration token {:?}",
874 kind
875 ),
876 }
877}
878
879#[must_use]
882pub fn conversion_target_from_token(kind: &TokenKind, fallback_text: &str) -> ConversionTarget {
883 if is_duration_unit(kind) && *kind != TokenKind::PercentKw {
884 ConversionTarget::Duration(token_kind_to_duration_unit(kind))
885 } else {
886 ConversionTarget::Unit(fallback_text.to_lowercase())
887 }
888}
889
890#[must_use]
892pub fn is_calendar_unit_token(kind: &TokenKind) -> bool {
893 matches!(
894 kind,
895 TokenKind::Years
896 | TokenKind::Year
897 | TokenKind::Months
898 | TokenKind::Month
899 | TokenKind::Weeks
900 | TokenKind::Week
901 )
902}
903
904#[must_use]
906pub fn token_kind_to_calendar_unit(kind: &TokenKind) -> CalendarUnit {
907 match kind {
908 TokenKind::Years | TokenKind::Year => CalendarUnit::Year,
909 TokenKind::Months | TokenKind::Month => CalendarUnit::Month,
910 TokenKind::Weeks | TokenKind::Week => CalendarUnit::Week,
911 _ => unreachable!(
912 "BUG: token_kind_to_calendar_unit called with non-calendar token {:?}",
913 kind
914 ),
915 }
916}
917
918#[must_use]
920pub fn token_kind_to_boolean_value(kind: &TokenKind) -> BooleanValue {
921 match kind {
922 TokenKind::True => BooleanValue::True,
923 TokenKind::False => BooleanValue::False,
924 TokenKind::Yes => BooleanValue::Yes,
925 TokenKind::No => BooleanValue::No,
926 TokenKind::Accept => BooleanValue::Accept,
927 TokenKind::Reject => BooleanValue::Reject,
928 _ => unreachable!(
929 "BUG: token_kind_to_boolean_value called with non-boolean token {:?}",
930 kind
931 ),
932 }
933}
934
935pub fn is_math_function(kind: &TokenKind) -> bool {
937 matches!(
938 kind,
939 TokenKind::Sqrt
940 | TokenKind::Sin
941 | TokenKind::Cos
942 | TokenKind::Tan
943 | TokenKind::Asin
944 | TokenKind::Acos
945 | TokenKind::Atan
946 | TokenKind::Log
947 | TokenKind::Exp
948 | TokenKind::Abs
949 | TokenKind::Floor
950 | TokenKind::Ceil
951 | TokenKind::Round
952 )
953}
954
955pub fn is_spec_body_keyword(kind: &TokenKind) -> bool {
958 matches!(
959 kind,
960 TokenKind::Fact | TokenKind::Rule | TokenKind::Type | TokenKind::Meta
961 )
962}
963
964pub fn can_be_label(kind: &TokenKind) -> bool {
969 matches!(
970 kind,
971 TokenKind::Identifier
972 | TokenKind::Calendar
973 | TokenKind::Past
974 | TokenKind::Future
975 | TokenKind::Years
976 | TokenKind::Year
977 | TokenKind::Months
978 | TokenKind::Month
979 | TokenKind::Weeks
980 | TokenKind::Week
981 | TokenKind::Days
982 | TokenKind::Day
983 | TokenKind::Hours
984 | TokenKind::Hour
985 | TokenKind::Minutes
986 | TokenKind::Minute
987 | TokenKind::Seconds
988 | TokenKind::Second
989 | TokenKind::Milliseconds
990 | TokenKind::Millisecond
991 | TokenKind::Microseconds
992 | TokenKind::Microsecond
993 | TokenKind::Permille
994 | TokenKind::Is
995 )
996}
997
998pub fn can_be_reference_segment(kind: &TokenKind) -> bool {
1001 can_be_label(kind) || is_type_keyword(kind)
1002}
1003
1004#[cfg(test)]
1005mod tests {
1006 use super::*;
1007
1008 fn lex_all(input: &str) -> Result<Vec<Token>, Error> {
1009 let mut lexer = Lexer::new(input, "test.lemma");
1010 let mut tokens = Vec::new();
1011 loop {
1012 let token = lexer.next_token()?;
1013 if token.kind == TokenKind::Eof {
1014 tokens.push(token);
1015 break;
1016 }
1017 tokens.push(token);
1018 }
1019 Ok(tokens)
1020 }
1021
1022 fn lex_kinds(input: &str) -> Result<Vec<TokenKind>, Error> {
1023 Ok(lex_all(input)?.into_iter().map(|t| t.kind).collect())
1024 }
1025
1026 #[test]
1027 fn lex_empty_input() {
1028 let tokens = lex_all("").unwrap();
1029 assert_eq!(tokens.len(), 1);
1030 assert_eq!(tokens[0].kind, TokenKind::Eof);
1031 }
1032
1033 #[test]
1034 fn lex_spec_declaration() {
1035 let kinds = lex_kinds("spec person").unwrap();
1036 assert_eq!(
1037 kinds,
1038 vec![TokenKind::Spec, TokenKind::Identifier, TokenKind::Eof]
1039 );
1040 }
1041
1042 #[test]
1043 fn lex_fact_definition() {
1044 let kinds = lex_kinds("fact age: 25").unwrap();
1045 assert_eq!(
1046 kinds,
1047 vec![
1048 TokenKind::Fact,
1049 TokenKind::Identifier,
1050 TokenKind::Colon,
1051 TokenKind::NumberLit,
1052 TokenKind::Eof,
1053 ]
1054 );
1055 }
1056
1057 #[test]
1058 fn lex_rule_with_comparison() {
1059 let kinds = lex_kinds("rule is_adult: age >= 18").unwrap();
1060 assert_eq!(
1061 kinds,
1062 vec![
1063 TokenKind::Rule,
1064 TokenKind::Identifier,
1065 TokenKind::Colon,
1066 TokenKind::Identifier,
1067 TokenKind::Gte,
1068 TokenKind::NumberLit,
1069 TokenKind::Eof,
1070 ]
1071 );
1072 }
1073
1074 #[test]
1075 fn lex_string_literal() {
1076 let tokens = lex_all(r#""hello world""#).unwrap();
1077 assert_eq!(tokens[0].kind, TokenKind::StringLit);
1078 assert_eq!(tokens[0].text, "\"hello world\"");
1079 }
1080
1081 #[test]
1082 fn lex_unterminated_string() {
1083 let result = lex_all(r#""hello"#);
1084 assert!(result.is_err());
1085 }
1086
1087 #[test]
1088 fn lex_number_with_decimal() {
1089 let tokens = lex_all("3.14").unwrap();
1090 assert_eq!(tokens[0].kind, TokenKind::NumberLit);
1091 assert_eq!(tokens[0].text, "3.14");
1092 }
1093
1094 #[test]
1095 fn lex_number_with_underscores() {
1096 let tokens = lex_all("1_000_000").unwrap();
1097 assert_eq!(tokens[0].kind, TokenKind::NumberLit);
1098 assert_eq!(tokens[0].text, "1_000_000");
1099 }
1100
1101 #[test]
1102 fn lex_scientific_notation() {
1103 let tokens = lex_all("1.5e+10").unwrap();
1104 assert_eq!(tokens[0].kind, TokenKind::NumberLit);
1105 assert_eq!(tokens[0].text, "1.5e+10");
1106 }
1107
1108 #[test]
1109 fn lex_all_operators() {
1110 let kinds = lex_kinds("+ - * / % ^ > < >= <= == != -> %%").unwrap();
1111 assert_eq!(
1112 &kinds[..14],
1113 &[
1114 TokenKind::Plus,
1115 TokenKind::Minus,
1116 TokenKind::Star,
1117 TokenKind::Slash,
1118 TokenKind::Percent,
1119 TokenKind::Caret,
1120 TokenKind::Gt,
1121 TokenKind::Lt,
1122 TokenKind::Gte,
1123 TokenKind::Lte,
1124 TokenKind::EqEq,
1125 TokenKind::BangEq,
1126 TokenKind::Arrow,
1127 TokenKind::PercentPercent,
1128 ]
1129 );
1130 }
1131
1132 #[test]
1133 fn lex_keywords() {
1134 let kinds = lex_kinds("spec fact rule unless then not and in type from with meta veto now")
1135 .unwrap();
1136 assert_eq!(
1137 &kinds[..14],
1138 &[
1139 TokenKind::Spec,
1140 TokenKind::Fact,
1141 TokenKind::Rule,
1142 TokenKind::Unless,
1143 TokenKind::Then,
1144 TokenKind::Not,
1145 TokenKind::And,
1146 TokenKind::In,
1147 TokenKind::Type,
1148 TokenKind::From,
1149 TokenKind::With,
1150 TokenKind::Meta,
1151 TokenKind::Veto,
1152 TokenKind::Now,
1153 ]
1154 );
1155 }
1156
1157 #[test]
1158 fn lex_boolean_keywords() {
1159 let kinds = lex_kinds("true false yes no accept reject").unwrap();
1160 assert_eq!(
1161 &kinds[..6],
1162 &[
1163 TokenKind::True,
1164 TokenKind::False,
1165 TokenKind::Yes,
1166 TokenKind::No,
1167 TokenKind::Accept,
1168 TokenKind::Reject,
1169 ]
1170 );
1171 }
1172
1173 #[test]
1174 fn lex_duration_keywords() {
1175 let kinds = lex_kinds("years months weeks days hours minutes seconds").unwrap();
1176 assert_eq!(
1177 &kinds[..7],
1178 &[
1179 TokenKind::Years,
1180 TokenKind::Months,
1181 TokenKind::Weeks,
1182 TokenKind::Days,
1183 TokenKind::Hours,
1184 TokenKind::Minutes,
1185 TokenKind::Seconds,
1186 ]
1187 );
1188 }
1189
1190 #[test]
1191 fn lex_commentary() {
1192 let tokens = lex_all(r#""""hello world""""#).unwrap();
1193 assert_eq!(tokens[0].kind, TokenKind::Commentary);
1194 assert_eq!(tokens[0].text, "hello world");
1195 }
1196
1197 #[test]
1198 fn lex_at_sign() {
1199 let kinds = lex_kinds("@user").unwrap();
1200 assert_eq!(kinds[0], TokenKind::At);
1201 assert_eq!(kinds[1], TokenKind::Identifier);
1202 }
1203
1204 #[test]
1205 fn lex_tilde() {
1206 let kinds = lex_kinds("~").unwrap();
1207 assert_eq!(kinds[0], TokenKind::Tilde);
1208 }
1209
1210 #[test]
1211 fn lex_brackets() {
1212 let kinds = lex_kinds("[number]").unwrap();
1213 assert_eq!(
1214 &kinds[..3],
1215 &[
1216 TokenKind::LBracket,
1217 TokenKind::NumberKw,
1218 TokenKind::RBracket
1219 ]
1220 );
1221 }
1222
1223 #[test]
1224 fn lex_parentheses() {
1225 let kinds = lex_kinds("(x + 1)").unwrap();
1226 assert_eq!(
1227 &kinds[..5],
1228 &[
1229 TokenKind::LParen,
1230 TokenKind::Identifier,
1231 TokenKind::Plus,
1232 TokenKind::NumberLit,
1233 TokenKind::RParen,
1234 ]
1235 );
1236 }
1237
1238 #[test]
1239 fn lex_dot_for_references() {
1240 let kinds = lex_kinds("employee.salary").unwrap();
1241 assert_eq!(
1242 &kinds[..3],
1243 &[TokenKind::Identifier, TokenKind::Dot, TokenKind::Identifier]
1244 );
1245 }
1246
1247 #[test]
1248 fn lex_spec_name_with_slashes() {
1249 let tokens = lex_all("spec contracts/employment/jack").unwrap();
1250 assert_eq!(tokens[0].kind, TokenKind::Spec);
1251 assert_eq!(tokens[1].kind, TokenKind::Identifier);
1254 }
1255
1256 #[test]
1257 fn lex_number_not_followed_by_e_identifier() {
1258 let tokens = lex_all("42 eur").unwrap();
1260 assert_eq!(tokens[0].kind, TokenKind::NumberLit);
1261 assert_eq!(tokens[0].text, "42");
1262 assert_eq!(tokens[1].kind, TokenKind::Identifier);
1263 assert_eq!(tokens[1].text, "eur");
1264 }
1265
1266 #[test]
1267 fn lex_unknown_character() {
1268 let result = lex_all("ยง");
1269 assert!(result.is_err());
1270 }
1271
1272 #[test]
1273 fn lex_peek_does_not_consume() {
1274 let mut lexer = Lexer::new("spec test", "test.lemma");
1275 let peeked_kind = lexer.peek().unwrap().kind.clone();
1276 assert_eq!(peeked_kind, TokenKind::Spec);
1277 let next = lexer.next_token().unwrap();
1278 assert_eq!(next.kind, TokenKind::Spec);
1279 }
1280
1281 #[test]
1282 fn lex_span_byte_offsets() {
1283 let tokens = lex_all("spec test").unwrap();
1284 assert_eq!(tokens[0].span.start, 0);
1285 assert_eq!(tokens[0].span.end, 4);
1286 assert_eq!(tokens[0].span.line, 1);
1287 assert_eq!(tokens[0].span.col, 1);
1288
1289 assert_eq!(tokens[1].span.start, 5);
1290 assert_eq!(tokens[1].span.end, 9);
1291 assert_eq!(tokens[1].span.line, 1);
1292 assert_eq!(tokens[1].span.col, 6);
1293 }
1294
1295 #[test]
1296 fn lex_multiline_span_tracking() {
1297 let tokens = lex_all("spec test\nfact x: 1").unwrap();
1298 let fact_token = &tokens[2]; assert_eq!(fact_token.kind, TokenKind::Fact);
1301 assert_eq!(fact_token.span.line, 2);
1302 assert_eq!(fact_token.span.col, 1);
1303 }
1304
1305 #[test]
1306 fn lex_case_insensitive_keywords() {
1307 let kinds = lex_kinds("SPEC Fact RULE").unwrap();
1309 assert_eq!(kinds[0], TokenKind::Spec);
1310 assert_eq!(kinds[1], TokenKind::Fact);
1311 assert_eq!(kinds[2], TokenKind::Rule);
1312 }
1313
1314 #[test]
1315 fn lex_math_function_keywords() {
1316 let kinds =
1317 lex_kinds("sqrt sin cos tan asin acos atan log exp abs floor ceil round").unwrap();
1318 assert_eq!(
1319 &kinds[..13],
1320 &[
1321 TokenKind::Sqrt,
1322 TokenKind::Sin,
1323 TokenKind::Cos,
1324 TokenKind::Tan,
1325 TokenKind::Asin,
1326 TokenKind::Acos,
1327 TokenKind::Atan,
1328 TokenKind::Log,
1329 TokenKind::Exp,
1330 TokenKind::Abs,
1331 TokenKind::Floor,
1332 TokenKind::Ceil,
1333 TokenKind::Round,
1334 ]
1335 );
1336 }
1337
1338 #[test]
1339 fn lex_is_keyword() {
1340 let kinds = lex_kinds("status is \"active\"").unwrap();
1341 assert_eq!(kinds[0], TokenKind::Identifier);
1342 assert_eq!(kinds[1], TokenKind::Is);
1343 assert_eq!(kinds[2], TokenKind::StringLit);
1344 }
1345
1346 #[test]
1347 fn lex_percent_not_followed_by_digit() {
1348 let kinds = lex_kinds("50%").unwrap();
1350 assert_eq!(kinds[0], TokenKind::NumberLit);
1351 assert_eq!(kinds[1], TokenKind::Percent);
1352 }
1353
1354 #[test]
1355 fn lex_number_with_commas() {
1356 let tokens = lex_all("1,000,000").unwrap();
1357 assert_eq!(tokens[0].kind, TokenKind::NumberLit);
1358 assert_eq!(tokens[0].text, "1,000,000");
1359 }
1360
1361 #[test]
1362 fn lex_arrow_chain() {
1363 let kinds = lex_kinds("-> unit eur 1.00 -> decimals 2").unwrap();
1364 assert_eq!(kinds[0], TokenKind::Arrow);
1365 assert_eq!(kinds[1], TokenKind::Identifier);
1366 assert_eq!(kinds[2], TokenKind::Identifier);
1367 assert_eq!(kinds[3], TokenKind::NumberLit);
1368 assert_eq!(kinds[4], TokenKind::Arrow);
1369 }
1370}