1use crate::error::Error;
2use crate::parsing::ast::{
3 BooleanValue, CalendarUnit, ConversionTarget, DurationUnit, PrimitiveKind, Span,
4};
5use crate::parsing::source::Source;
6use std::sync::Arc;
7
8#[derive(Debug, Clone, PartialEq, Eq)]
9pub enum TokenKind {
10 Spec,
12 Fact,
13 Rule,
14 Unless,
15 Then,
16 Not,
17 And,
18 In,
19 Type,
20 From,
21 With,
22 Meta,
23 Veto,
24 Now,
25 Calendar,
26 Past,
27 Future,
28
29 True,
31 False,
32 Yes,
33 No,
34 Accept,
35 Reject,
36
37 ScaleKw,
39 NumberKw,
40 TextKw,
41 DateKw,
42 TimeKw,
43 DurationKw,
44 BooleanKw,
45 PercentKw,
46 RatioKw,
47
48 Sqrt,
50 Sin,
51 Cos,
52 Tan,
53 Asin,
54 Acos,
55 Atan,
56 Log,
57 Exp,
58 Abs,
59 Floor,
60 Ceil,
61 Round,
62
63 Years,
65 Year,
66 Months,
67 Month,
68 Weeks,
69 Week,
70 Days,
71 Day,
72 Hours,
73 Hour,
74 Minutes,
75 Minute,
76 Seconds,
77 Second,
78 Milliseconds,
79 Millisecond,
80 Microseconds,
81 Microsecond,
82 Permille,
83
84 Is,
86
87 Plus,
89 Minus,
90 Star,
91 Slash,
92 Percent,
93 PercentPercent,
94 Caret,
95 Gt,
96 Lt,
97 Gte,
98 Lte,
99
100 Colon,
102 Arrow,
103 Tilde,
104 Dot,
105 At,
106 LParen,
107 RParen,
108 LBracket,
109 RBracket,
110
111 NumberLit,
113 StringLit,
114
115 Commentary,
117
118 Identifier,
120
121 Eof,
123}
124
125impl std::fmt::Display for TokenKind {
126 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
127 match self {
128 TokenKind::Spec => write!(f, "'spec'"),
129 TokenKind::Fact => write!(f, "'fact'"),
130 TokenKind::Rule => write!(f, "'rule'"),
131 TokenKind::Unless => write!(f, "'unless'"),
132 TokenKind::Then => write!(f, "'then'"),
133 TokenKind::Not => write!(f, "'not'"),
134 TokenKind::And => write!(f, "'and'"),
135 TokenKind::In => write!(f, "'in'"),
136 TokenKind::Type => write!(f, "'type'"),
137 TokenKind::From => write!(f, "'from'"),
138 TokenKind::With => write!(f, "'with'"),
139 TokenKind::Meta => write!(f, "'meta'"),
140 TokenKind::Veto => write!(f, "'veto'"),
141 TokenKind::Now => write!(f, "'now'"),
142 TokenKind::Calendar => write!(f, "'calendar'"),
143 TokenKind::Past => write!(f, "'past'"),
144 TokenKind::Future => write!(f, "'future'"),
145 TokenKind::True => write!(f, "'true'"),
146 TokenKind::False => write!(f, "'false'"),
147 TokenKind::Yes => write!(f, "'yes'"),
148 TokenKind::No => write!(f, "'no'"),
149 TokenKind::Accept => write!(f, "'accept'"),
150 TokenKind::Reject => write!(f, "'reject'"),
151 TokenKind::ScaleKw => write!(f, "'scale'"),
152 TokenKind::NumberKw => write!(f, "'number'"),
153 TokenKind::TextKw => write!(f, "'text'"),
154 TokenKind::DateKw => write!(f, "'date'"),
155 TokenKind::TimeKw => write!(f, "'time'"),
156 TokenKind::DurationKw => write!(f, "'duration'"),
157 TokenKind::BooleanKw => write!(f, "'boolean'"),
158 TokenKind::PercentKw => write!(f, "'percent'"),
159 TokenKind::RatioKw => write!(f, "'ratio'"),
160 TokenKind::Sqrt => write!(f, "'sqrt'"),
161 TokenKind::Sin => write!(f, "'sin'"),
162 TokenKind::Cos => write!(f, "'cos'"),
163 TokenKind::Tan => write!(f, "'tan'"),
164 TokenKind::Asin => write!(f, "'asin'"),
165 TokenKind::Acos => write!(f, "'acos'"),
166 TokenKind::Atan => write!(f, "'atan'"),
167 TokenKind::Log => write!(f, "'log'"),
168 TokenKind::Exp => write!(f, "'exp'"),
169 TokenKind::Abs => write!(f, "'abs'"),
170 TokenKind::Floor => write!(f, "'floor'"),
171 TokenKind::Ceil => write!(f, "'ceil'"),
172 TokenKind::Round => write!(f, "'round'"),
173 TokenKind::Years => write!(f, "'years'"),
174 TokenKind::Year => write!(f, "'year'"),
175 TokenKind::Months => write!(f, "'months'"),
176 TokenKind::Month => write!(f, "'month'"),
177 TokenKind::Weeks => write!(f, "'weeks'"),
178 TokenKind::Week => write!(f, "'week'"),
179 TokenKind::Days => write!(f, "'days'"),
180 TokenKind::Day => write!(f, "'day'"),
181 TokenKind::Hours => write!(f, "'hours'"),
182 TokenKind::Hour => write!(f, "'hour'"),
183 TokenKind::Minutes => write!(f, "'minutes'"),
184 TokenKind::Minute => write!(f, "'minute'"),
185 TokenKind::Seconds => write!(f, "'seconds'"),
186 TokenKind::Second => write!(f, "'second'"),
187 TokenKind::Milliseconds => write!(f, "'milliseconds'"),
188 TokenKind::Millisecond => write!(f, "'millisecond'"),
189 TokenKind::Microseconds => write!(f, "'microseconds'"),
190 TokenKind::Microsecond => write!(f, "'microsecond'"),
191 TokenKind::Permille => write!(f, "'permille'"),
192 TokenKind::Is => write!(f, "'is'"),
193 TokenKind::Plus => write!(f, "'+'"),
194 TokenKind::Minus => write!(f, "'-'"),
195 TokenKind::Star => write!(f, "'*'"),
196 TokenKind::Slash => write!(f, "'/'"),
197 TokenKind::Percent => write!(f, "'%'"),
198 TokenKind::PercentPercent => write!(f, "'%%'"),
199 TokenKind::Caret => write!(f, "'^'"),
200 TokenKind::Gt => write!(f, "'>'"),
201 TokenKind::Lt => write!(f, "'<'"),
202 TokenKind::Gte => write!(f, "'>='"),
203 TokenKind::Lte => write!(f, "'<='"),
204 TokenKind::Colon => write!(f, "':'"),
205 TokenKind::Arrow => write!(f, "'->'"),
206 TokenKind::Tilde => write!(f, "'~'"),
207 TokenKind::Dot => write!(f, "'.'"),
208 TokenKind::At => write!(f, "'@'"),
209 TokenKind::LParen => write!(f, "'('"),
210 TokenKind::RParen => write!(f, "')'"),
211 TokenKind::LBracket => write!(f, "'['"),
212 TokenKind::RBracket => write!(f, "']'"),
213 TokenKind::NumberLit => write!(f, "a number"),
214 TokenKind::StringLit => write!(f, "a string"),
215 TokenKind::Commentary => write!(f, "commentary block"),
216 TokenKind::Identifier => write!(f, "an identifier"),
217 TokenKind::Eof => write!(f, "end of file"),
218 }
219 }
220}
221
222#[derive(Debug, Clone)]
223pub struct Token {
224 pub kind: TokenKind,
225 pub span: Span,
226 pub text: String,
227}
228
229impl Token {
230 pub fn eof(offset: usize, line: usize, col: usize) -> Self {
231 Token {
232 kind: TokenKind::Eof,
233 span: Span {
234 start: offset,
235 end: offset,
236 line,
237 col,
238 },
239 text: String::new(),
240 }
241 }
242}
243
244pub struct Lexer {
245 source: Vec<char>,
246 pos: usize,
247 line: usize,
248 col: usize,
249 byte_offset: usize,
250 attribute: String,
251 source_text: Arc<str>,
252 peeked: Option<Token>,
253 peeked2: Option<Token>,
254}
255
256impl Lexer {
257 pub fn new(input: &str, attribute: &str) -> Self {
258 let source_text: Arc<str> = Arc::from(input);
259 Lexer {
260 source: input.chars().collect(),
261 pos: 0,
262 line: 1,
263 col: 1,
264 byte_offset: 0,
265 attribute: attribute.to_string(),
266 source_text,
267 peeked: None,
268 peeked2: None,
269 }
270 }
271
272 pub fn source_text(&self) -> Arc<str> {
273 self.source_text.clone()
274 }
275
276 pub fn attribute(&self) -> &str {
277 &self.attribute
278 }
279
280 pub fn peek(&mut self) -> Result<&Token, Error> {
281 if self.peeked.is_none() {
282 let token = self.lex_token()?;
283 self.peeked = Some(token);
284 }
285 Ok(self.peeked.as_ref().expect("just assigned"))
286 }
287
288 pub fn peek_second(&mut self) -> Result<&Token, Error> {
289 self.peek()?;
290 if self.peeked2.is_none() {
291 let token = self.lex_token()?;
292 self.peeked2 = Some(token);
293 }
294 Ok(self.peeked2.as_ref().expect("just assigned"))
295 }
296
297 pub fn current_span(&self) -> Span {
299 Span {
300 start: self.byte_offset,
301 end: self.byte_offset,
302 line: self.line,
303 col: self.col,
304 }
305 }
306
307 pub fn scan_raw_alphanumeric(&mut self) -> Result<String, Error> {
312 self.peeked = None;
313 self.peeked2 = None;
314 self.skip_whitespace();
315 let mut result = String::new();
316 while let Some(ch) = self.current_char() {
317 if ch.is_ascii_alphanumeric() {
318 result.push(ch);
319 self.advance();
320 } else {
321 break;
322 }
323 }
324 Ok(result)
325 }
326
327 pub fn next_token(&mut self) -> Result<Token, Error> {
328 if let Some(token) = self.peeked.take() {
329 self.peeked = self.peeked2.take();
330 return Ok(token);
331 }
332 self.lex_token()
333 }
334
335 fn current_char(&self) -> Option<char> {
336 self.source.get(self.pos).copied()
337 }
338
339 fn peek_char(&self) -> Option<char> {
340 self.source.get(self.pos + 1).copied()
341 }
342
343 fn peek_char_at(&self, offset: usize) -> Option<char> {
344 self.source.get(self.pos + offset).copied()
345 }
346
347 fn advance(&mut self) {
348 if let Some(ch) = self.current_char() {
349 self.byte_offset += ch.len_utf8();
350 if ch == '\n' {
351 self.line += 1;
352 self.col = 1;
353 } else {
354 self.col += 1;
355 }
356 self.pos += 1;
357 }
358 }
359
360 fn skip_whitespace(&mut self) {
361 while let Some(ch) = self.current_char() {
362 if ch.is_whitespace() {
363 self.advance();
364 } else {
365 break;
366 }
367 }
368 }
369
370 fn make_span(&self, start_byte: usize, start_line: usize, start_col: usize) -> Span {
371 Span {
372 start: start_byte,
373 end: self.byte_offset,
374 line: start_line,
375 col: start_col,
376 }
377 }
378
379 fn make_error(&self, message: impl Into<String>, span: Span) -> Error {
380 Error::parsing(message, Source::new(&self.attribute, span), None::<String>)
381 }
382
383 fn lex_token(&mut self) -> Result<Token, Error> {
384 self.skip_whitespace();
385
386 let start_byte = self.byte_offset;
387 let start_line = self.line;
388 let start_col = self.col;
389
390 let Some(ch) = self.current_char() else {
391 return Ok(Token::eof(start_byte, start_line, start_col));
392 };
393
394 if ch == '"' && self.peek_char() == Some('"') && self.peek_char_at(2) == Some('"') {
396 return self.scan_triple_quote(start_byte, start_line, start_col);
397 }
398
399 if ch == '"' {
401 return self.scan_string(start_byte, start_line, start_col);
402 }
403
404 if ch.is_ascii_digit() {
406 return self.scan_number(start_byte, start_line, start_col);
407 }
408
409 if let Some(token) = self.try_two_char_operator(start_byte, start_line, start_col) {
411 return Ok(token);
412 }
413
414 if let Some(kind) = self.single_char_token(ch) {
416 self.advance();
417 let span = self.make_span(start_byte, start_line, start_col);
418 let text = ch.to_string();
419 return Ok(Token { kind, span, text });
420 }
421
422 if ch.is_ascii_alphabetic() || ch == '_' {
424 return Ok(self.scan_identifier(start_byte, start_line, start_col));
425 }
426
427 if ch == '@' {
429 self.advance();
430 let span = self.make_span(start_byte, start_line, start_col);
431 return Ok(Token {
432 kind: TokenKind::At,
433 span,
434 text: "@".to_string(),
435 });
436 }
437
438 self.advance();
440 let span = self.make_span(start_byte, start_line, start_col);
441 Err(self.make_error(format!("Unexpected character '{}'", ch), span))
442 }
443
444 fn scan_triple_quote(
445 &mut self,
446 start_byte: usize,
447 start_line: usize,
448 start_col: usize,
449 ) -> Result<Token, Error> {
450 self.advance(); self.advance(); self.advance(); let content_start = self.byte_offset;
455 loop {
456 match self.current_char() {
457 None => {
458 let span = self.make_span(start_byte, start_line, start_col);
459 return Err(self.make_error(
460 "Unterminated commentary block: expected closing \"\"\"",
461 span,
462 ));
463 }
464 Some('"')
465 if self.source.get(self.pos + 1) == Some(&'"')
466 && self.source.get(self.pos + 2) == Some(&'"') =>
467 {
468 let content_end = self.byte_offset;
469 self.advance(); self.advance(); self.advance(); let raw: String = self.source_text[content_start..content_end].to_string();
473 let span = self.make_span(start_byte, start_line, start_col);
474 return Ok(Token {
475 kind: TokenKind::Commentary,
476 span,
477 text: raw,
478 });
479 }
480 Some(_) => {
481 self.advance();
482 }
483 }
484 }
485 }
486
487 fn scan_string(
488 &mut self,
489 start_byte: usize,
490 start_line: usize,
491 start_col: usize,
492 ) -> Result<Token, Error> {
493 self.advance(); let mut content = String::new();
495 loop {
496 match self.current_char() {
497 None => {
498 let span = self.make_span(start_byte, start_line, start_col);
499 return Err(self.make_error("String starting here was never closed", span));
500 }
501 Some('"') => {
502 self.advance(); break;
504 }
505 Some(ch) => {
506 content.push(ch);
507 self.advance();
508 }
509 }
510 }
511 let span = self.make_span(start_byte, start_line, start_col);
512 let full_text = format!("\"{}\"", content);
515 Ok(Token {
516 kind: TokenKind::StringLit,
517 span,
518 text: full_text,
519 })
520 }
521
522 fn scan_number(
523 &mut self,
524 start_byte: usize,
525 start_line: usize,
526 start_col: usize,
527 ) -> Result<Token, Error> {
528 let mut text = String::new();
529
530 while let Some(ch) = self.current_char() {
532 if ch.is_ascii_digit() || ch == '_' || ch == ',' {
533 text.push(ch);
534 self.advance();
535 } else {
536 break;
537 }
538 }
539
540 if self.current_char() == Some('.') {
542 if let Some(next) = self.peek_char() {
544 if next.is_ascii_digit() {
545 text.push('.');
546 self.advance(); while let Some(ch) = self.current_char() {
548 if ch.is_ascii_digit() {
549 text.push(ch);
550 self.advance();
551 } else {
552 break;
553 }
554 }
555 }
556 }
557 }
558
559 if let Some(ch) = self.current_char() {
561 if ch == 'e' || ch == 'E' {
562 let mut sci_text = String::new();
563 sci_text.push(ch);
564 let save_pos = self.pos;
565 let save_byte = self.byte_offset;
566 let save_line = self.line;
567 let save_col = self.col;
568 self.advance(); if let Some(sign) = self.current_char() {
571 if sign == '+' || sign == '-' {
572 sci_text.push(sign);
573 self.advance();
574 }
575 }
576
577 if let Some(d) = self.current_char() {
578 if d.is_ascii_digit() {
579 while let Some(ch) = self.current_char() {
580 if ch.is_ascii_digit() {
581 sci_text.push(ch);
582 self.advance();
583 } else {
584 break;
585 }
586 }
587 text.push_str(&sci_text);
588 } else {
589 self.pos = save_pos;
591 self.byte_offset = save_byte;
592 self.line = save_line;
593 self.col = save_col;
594 }
595 } else {
596 self.pos = save_pos;
597 self.byte_offset = save_byte;
598 self.line = save_line;
599 self.col = save_col;
600 }
601 }
602 }
603
604 let span = self.make_span(start_byte, start_line, start_col);
605 Ok(Token {
606 kind: TokenKind::NumberLit,
607 span,
608 text,
609 })
610 }
611
612 fn try_two_char_operator(
613 &mut self,
614 start_byte: usize,
615 start_line: usize,
616 start_col: usize,
617 ) -> Option<Token> {
618 let ch = self.current_char()?;
619 let next = self.peek_char();
620
621 let kind = match (ch, next) {
622 ('-', Some('>')) => TokenKind::Arrow,
623 ('>', Some('=')) => TokenKind::Gte,
624 ('<', Some('=')) => TokenKind::Lte,
625 ('%', Some('%')) => {
626 TokenKind::PercentPercent
628 }
629 _ => return None,
630 };
631
632 self.advance();
633 self.advance();
634 let span = self.make_span(start_byte, start_line, start_col);
635 let text: String = self.source_text[span.start..span.end].to_string();
636 Some(Token { kind, span, text })
637 }
638
639 fn single_char_token(&self, ch: char) -> Option<TokenKind> {
640 match ch {
641 '+' => Some(TokenKind::Plus),
642 '*' => Some(TokenKind::Star),
643 '/' => Some(TokenKind::Slash),
644 '^' => Some(TokenKind::Caret),
645 ':' => Some(TokenKind::Colon),
646 '~' => Some(TokenKind::Tilde),
647 '.' => Some(TokenKind::Dot),
648 '(' => Some(TokenKind::LParen),
649 ')' => Some(TokenKind::RParen),
650 '[' => Some(TokenKind::LBracket),
651 ']' => Some(TokenKind::RBracket),
652 '>' => Some(TokenKind::Gt),
653 '<' => Some(TokenKind::Lt),
654 '%' => Some(TokenKind::Percent),
655 '-' => Some(TokenKind::Minus),
656 _ => None,
657 }
658 }
659
660 fn scan_identifier(&mut self, start_byte: usize, start_line: usize, start_col: usize) -> Token {
661 let mut text = String::new();
662 while let Some(ch) = self.current_char() {
663 if ch.is_ascii_alphanumeric() || ch == '_' {
664 text.push(ch);
665 self.advance();
666 } else {
667 break;
668 }
669 }
670
671 let kind = keyword_from_identifier(&text);
672 let span = self.make_span(start_byte, start_line, start_col);
673 Token { kind, span, text }
674 }
675}
676
677fn keyword_from_identifier(text: &str) -> TokenKind {
678 match text.to_lowercase().as_str() {
679 "spec" => TokenKind::Spec,
680 "fact" => TokenKind::Fact,
681 "rule" => TokenKind::Rule,
682 "unless" => TokenKind::Unless,
683 "then" => TokenKind::Then,
684 "not" => TokenKind::Not,
685 "and" => TokenKind::And,
686 "in" => TokenKind::In,
687 "type" => TokenKind::Type,
688 "from" => TokenKind::From,
689 "with" => TokenKind::With,
690 "meta" => TokenKind::Meta,
691 "veto" => TokenKind::Veto,
692 "now" => TokenKind::Now,
693 "calendar" => TokenKind::Calendar,
694 "past" => TokenKind::Past,
695 "future" => TokenKind::Future,
696 "true" => TokenKind::True,
697 "false" => TokenKind::False,
698 "yes" => TokenKind::Yes,
699 "no" => TokenKind::No,
700 "accept" => TokenKind::Accept,
701 "reject" => TokenKind::Reject,
702 "scale" => TokenKind::ScaleKw,
703 "number" => TokenKind::NumberKw,
704 "text" => TokenKind::TextKw,
705 "date" => TokenKind::DateKw,
706 "time" => TokenKind::TimeKw,
707 "duration" => TokenKind::DurationKw,
708 "boolean" => TokenKind::BooleanKw,
709 "percent" => TokenKind::PercentKw,
710 "ratio" => TokenKind::RatioKw,
711 "sqrt" => TokenKind::Sqrt,
712 "sin" => TokenKind::Sin,
713 "cos" => TokenKind::Cos,
714 "tan" => TokenKind::Tan,
715 "asin" => TokenKind::Asin,
716 "acos" => TokenKind::Acos,
717 "atan" => TokenKind::Atan,
718 "log" => TokenKind::Log,
719 "exp" => TokenKind::Exp,
720 "abs" => TokenKind::Abs,
721 "floor" => TokenKind::Floor,
722 "ceil" => TokenKind::Ceil,
723 "round" => TokenKind::Round,
724 "is" => TokenKind::Is,
725 "years" => TokenKind::Years,
726 "year" => TokenKind::Year,
727 "months" => TokenKind::Months,
728 "month" => TokenKind::Month,
729 "weeks" => TokenKind::Weeks,
730 "week" => TokenKind::Week,
731 "days" => TokenKind::Days,
732 "day" => TokenKind::Day,
733 "hours" => TokenKind::Hours,
734 "hour" => TokenKind::Hour,
735 "minutes" => TokenKind::Minutes,
736 "minute" => TokenKind::Minute,
737 "seconds" => TokenKind::Seconds,
738 "second" => TokenKind::Second,
739 "milliseconds" => TokenKind::Milliseconds,
740 "millisecond" => TokenKind::Millisecond,
741 "microseconds" => TokenKind::Microseconds,
742 "microsecond" => TokenKind::Microsecond,
743 "permille" => TokenKind::Permille,
744 _ => TokenKind::Identifier,
745 }
746}
747
748pub fn is_structural_keyword(kind: &TokenKind) -> bool {
753 matches!(
754 kind,
755 TokenKind::Spec
756 | TokenKind::Fact
757 | TokenKind::Rule
758 | TokenKind::Unless
759 | TokenKind::Then
760 | TokenKind::Not
761 | TokenKind::And
762 | TokenKind::In
763 | TokenKind::Type
764 | TokenKind::From
765 | TokenKind::With
766 | TokenKind::Meta
767 | TokenKind::Veto
768 | TokenKind::Now
769 | TokenKind::Sqrt
770 | TokenKind::Sin
771 | TokenKind::Cos
772 | TokenKind::Tan
773 | TokenKind::Asin
774 | TokenKind::Acos
775 | TokenKind::Atan
776 | TokenKind::Log
777 | TokenKind::Exp
778 | TokenKind::Abs
779 | TokenKind::Floor
780 | TokenKind::Ceil
781 | TokenKind::Round
782 | TokenKind::True
783 | TokenKind::False
784 | TokenKind::Yes
785 | TokenKind::No
786 | TokenKind::Accept
787 | TokenKind::Reject
788 )
789}
790
791pub fn is_type_keyword(kind: &TokenKind) -> bool {
794 token_kind_to_primitive(kind).is_some()
795}
796
797#[must_use]
799pub fn token_kind_to_primitive(kind: &TokenKind) -> Option<PrimitiveKind> {
800 match kind {
801 TokenKind::BooleanKw => Some(PrimitiveKind::Boolean),
802 TokenKind::ScaleKw => Some(PrimitiveKind::Scale),
803 TokenKind::NumberKw => Some(PrimitiveKind::Number),
804 TokenKind::PercentKw => Some(PrimitiveKind::Percent),
805 TokenKind::RatioKw => Some(PrimitiveKind::Ratio),
806 TokenKind::TextKw => Some(PrimitiveKind::Text),
807 TokenKind::DateKw => Some(PrimitiveKind::Date),
808 TokenKind::TimeKw => Some(PrimitiveKind::Time),
809 TokenKind::DurationKw => Some(PrimitiveKind::Duration),
810 _ => None,
811 }
812}
813
814pub fn is_boolean_keyword(kind: &TokenKind) -> bool {
816 matches!(
817 kind,
818 TokenKind::True
819 | TokenKind::False
820 | TokenKind::Yes
821 | TokenKind::No
822 | TokenKind::Accept
823 | TokenKind::Reject
824 )
825}
826
827pub fn is_duration_unit(kind: &TokenKind) -> bool {
829 matches!(
830 kind,
831 TokenKind::Years
832 | TokenKind::Year
833 | TokenKind::Months
834 | TokenKind::Month
835 | TokenKind::Weeks
836 | TokenKind::Week
837 | TokenKind::Days
838 | TokenKind::Day
839 | TokenKind::Hours
840 | TokenKind::Hour
841 | TokenKind::Minutes
842 | TokenKind::Minute
843 | TokenKind::Seconds
844 | TokenKind::Second
845 | TokenKind::Milliseconds
846 | TokenKind::Millisecond
847 | TokenKind::Microseconds
848 | TokenKind::Microsecond
849 | TokenKind::PercentKw
850 )
851}
852
853#[must_use]
855pub fn token_kind_to_duration_unit(kind: &TokenKind) -> DurationUnit {
856 match kind {
857 TokenKind::Years | TokenKind::Year => DurationUnit::Year,
858 TokenKind::Months | TokenKind::Month => DurationUnit::Month,
859 TokenKind::Weeks | TokenKind::Week => DurationUnit::Week,
860 TokenKind::Days | TokenKind::Day => DurationUnit::Day,
861 TokenKind::Hours | TokenKind::Hour => DurationUnit::Hour,
862 TokenKind::Minutes | TokenKind::Minute => DurationUnit::Minute,
863 TokenKind::Seconds | TokenKind::Second => DurationUnit::Second,
864 TokenKind::Milliseconds | TokenKind::Millisecond => DurationUnit::Millisecond,
865 TokenKind::Microseconds | TokenKind::Microsecond => DurationUnit::Microsecond,
866 _ => unreachable!(
867 "BUG: token_kind_to_duration_unit called with non-duration token {:?}",
868 kind
869 ),
870 }
871}
872
873#[must_use]
876pub fn conversion_target_from_token(kind: &TokenKind, fallback_text: &str) -> ConversionTarget {
877 if is_duration_unit(kind) && *kind != TokenKind::PercentKw {
878 ConversionTarget::Duration(token_kind_to_duration_unit(kind))
879 } else {
880 ConversionTarget::Unit(fallback_text.to_lowercase())
881 }
882}
883
884#[must_use]
886pub fn is_calendar_unit_token(kind: &TokenKind) -> bool {
887 matches!(
888 kind,
889 TokenKind::Years
890 | TokenKind::Year
891 | TokenKind::Months
892 | TokenKind::Month
893 | TokenKind::Weeks
894 | TokenKind::Week
895 )
896}
897
898#[must_use]
900pub fn token_kind_to_calendar_unit(kind: &TokenKind) -> CalendarUnit {
901 match kind {
902 TokenKind::Years | TokenKind::Year => CalendarUnit::Year,
903 TokenKind::Months | TokenKind::Month => CalendarUnit::Month,
904 TokenKind::Weeks | TokenKind::Week => CalendarUnit::Week,
905 _ => unreachable!(
906 "BUG: token_kind_to_calendar_unit called with non-calendar token {:?}",
907 kind
908 ),
909 }
910}
911
912#[must_use]
914pub fn token_kind_to_boolean_value(kind: &TokenKind) -> BooleanValue {
915 match kind {
916 TokenKind::True => BooleanValue::True,
917 TokenKind::False => BooleanValue::False,
918 TokenKind::Yes => BooleanValue::Yes,
919 TokenKind::No => BooleanValue::No,
920 TokenKind::Accept => BooleanValue::Accept,
921 TokenKind::Reject => BooleanValue::Reject,
922 _ => unreachable!(
923 "BUG: token_kind_to_boolean_value called with non-boolean token {:?}",
924 kind
925 ),
926 }
927}
928
929pub fn is_math_function(kind: &TokenKind) -> bool {
931 matches!(
932 kind,
933 TokenKind::Sqrt
934 | TokenKind::Sin
935 | TokenKind::Cos
936 | TokenKind::Tan
937 | TokenKind::Asin
938 | TokenKind::Acos
939 | TokenKind::Atan
940 | TokenKind::Log
941 | TokenKind::Exp
942 | TokenKind::Abs
943 | TokenKind::Floor
944 | TokenKind::Ceil
945 | TokenKind::Round
946 )
947}
948
949pub fn is_spec_body_keyword(kind: &TokenKind) -> bool {
952 matches!(
953 kind,
954 TokenKind::Fact | TokenKind::Rule | TokenKind::Type | TokenKind::Meta
955 )
956}
957
958pub fn can_be_label(kind: &TokenKind) -> bool {
963 matches!(
964 kind,
965 TokenKind::Identifier
966 | TokenKind::Calendar
967 | TokenKind::Past
968 | TokenKind::Future
969 | TokenKind::Years
970 | TokenKind::Year
971 | TokenKind::Months
972 | TokenKind::Month
973 | TokenKind::Weeks
974 | TokenKind::Week
975 | TokenKind::Days
976 | TokenKind::Day
977 | TokenKind::Hours
978 | TokenKind::Hour
979 | TokenKind::Minutes
980 | TokenKind::Minute
981 | TokenKind::Seconds
982 | TokenKind::Second
983 | TokenKind::Milliseconds
984 | TokenKind::Millisecond
985 | TokenKind::Microseconds
986 | TokenKind::Microsecond
987 | TokenKind::Permille
988 | TokenKind::Is
989 )
990}
991
992pub fn can_be_reference_segment(kind: &TokenKind) -> bool {
995 can_be_label(kind) || is_type_keyword(kind)
996}
997
998#[cfg(test)]
999mod tests {
1000 use super::*;
1001
1002 fn lex_all(input: &str) -> Result<Vec<Token>, Error> {
1003 let mut lexer = Lexer::new(input, "test.lemma");
1004 let mut tokens = Vec::new();
1005 loop {
1006 let token = lexer.next_token()?;
1007 if token.kind == TokenKind::Eof {
1008 tokens.push(token);
1009 break;
1010 }
1011 tokens.push(token);
1012 }
1013 Ok(tokens)
1014 }
1015
1016 fn lex_kinds(input: &str) -> Result<Vec<TokenKind>, Error> {
1017 Ok(lex_all(input)?.into_iter().map(|t| t.kind).collect())
1018 }
1019
1020 #[test]
1021 fn lex_empty_input() {
1022 let tokens = lex_all("").unwrap();
1023 assert_eq!(tokens.len(), 1);
1024 assert_eq!(tokens[0].kind, TokenKind::Eof);
1025 }
1026
1027 #[test]
1028 fn lex_spec_declaration() {
1029 let kinds = lex_kinds("spec person").unwrap();
1030 assert_eq!(
1031 kinds,
1032 vec![TokenKind::Spec, TokenKind::Identifier, TokenKind::Eof]
1033 );
1034 }
1035
1036 #[test]
1037 fn lex_fact_definition() {
1038 let kinds = lex_kinds("fact age: 25").unwrap();
1039 assert_eq!(
1040 kinds,
1041 vec![
1042 TokenKind::Fact,
1043 TokenKind::Identifier,
1044 TokenKind::Colon,
1045 TokenKind::NumberLit,
1046 TokenKind::Eof,
1047 ]
1048 );
1049 }
1050
1051 #[test]
1052 fn lex_rule_with_comparison() {
1053 let kinds = lex_kinds("rule is_adult: age >= 18").unwrap();
1054 assert_eq!(
1055 kinds,
1056 vec![
1057 TokenKind::Rule,
1058 TokenKind::Identifier,
1059 TokenKind::Colon,
1060 TokenKind::Identifier,
1061 TokenKind::Gte,
1062 TokenKind::NumberLit,
1063 TokenKind::Eof,
1064 ]
1065 );
1066 }
1067
1068 #[test]
1069 fn lex_string_literal() {
1070 let tokens = lex_all(r#""hello world""#).unwrap();
1071 assert_eq!(tokens[0].kind, TokenKind::StringLit);
1072 assert_eq!(tokens[0].text, "\"hello world\"");
1073 }
1074
1075 #[test]
1076 fn lex_unterminated_string() {
1077 let result = lex_all(r#""hello"#);
1078 assert!(result.is_err());
1079 }
1080
1081 #[test]
1082 fn lex_number_with_decimal() {
1083 let tokens = lex_all("3.14").unwrap();
1084 assert_eq!(tokens[0].kind, TokenKind::NumberLit);
1085 assert_eq!(tokens[0].text, "3.14");
1086 }
1087
1088 #[test]
1089 fn lex_number_with_underscores() {
1090 let tokens = lex_all("1_000_000").unwrap();
1091 assert_eq!(tokens[0].kind, TokenKind::NumberLit);
1092 assert_eq!(tokens[0].text, "1_000_000");
1093 }
1094
1095 #[test]
1096 fn lex_scientific_notation() {
1097 let tokens = lex_all("1.5e+10").unwrap();
1098 assert_eq!(tokens[0].kind, TokenKind::NumberLit);
1099 assert_eq!(tokens[0].text, "1.5e+10");
1100 }
1101
1102 #[test]
1103 fn lex_all_operators() {
1104 let kinds = lex_kinds("+ - * / % ^ > < >= <= -> %%").unwrap();
1105 assert_eq!(
1106 &kinds[..12],
1107 &[
1108 TokenKind::Plus,
1109 TokenKind::Minus,
1110 TokenKind::Star,
1111 TokenKind::Slash,
1112 TokenKind::Percent,
1113 TokenKind::Caret,
1114 TokenKind::Gt,
1115 TokenKind::Lt,
1116 TokenKind::Gte,
1117 TokenKind::Lte,
1118 TokenKind::Arrow,
1119 TokenKind::PercentPercent,
1120 ]
1121 );
1122 }
1123
1124 #[test]
1125 fn lex_keywords() {
1126 let kinds = lex_kinds("spec fact rule unless then not and in type from with meta veto now")
1127 .unwrap();
1128 assert_eq!(
1129 &kinds[..14],
1130 &[
1131 TokenKind::Spec,
1132 TokenKind::Fact,
1133 TokenKind::Rule,
1134 TokenKind::Unless,
1135 TokenKind::Then,
1136 TokenKind::Not,
1137 TokenKind::And,
1138 TokenKind::In,
1139 TokenKind::Type,
1140 TokenKind::From,
1141 TokenKind::With,
1142 TokenKind::Meta,
1143 TokenKind::Veto,
1144 TokenKind::Now,
1145 ]
1146 );
1147 }
1148
1149 #[test]
1150 fn lex_boolean_keywords() {
1151 let kinds = lex_kinds("true false yes no accept reject").unwrap();
1152 assert_eq!(
1153 &kinds[..6],
1154 &[
1155 TokenKind::True,
1156 TokenKind::False,
1157 TokenKind::Yes,
1158 TokenKind::No,
1159 TokenKind::Accept,
1160 TokenKind::Reject,
1161 ]
1162 );
1163 }
1164
1165 #[test]
1166 fn lex_duration_keywords() {
1167 let kinds = lex_kinds("years months weeks days hours minutes seconds").unwrap();
1168 assert_eq!(
1169 &kinds[..7],
1170 &[
1171 TokenKind::Years,
1172 TokenKind::Months,
1173 TokenKind::Weeks,
1174 TokenKind::Days,
1175 TokenKind::Hours,
1176 TokenKind::Minutes,
1177 TokenKind::Seconds,
1178 ]
1179 );
1180 }
1181
1182 #[test]
1183 fn lex_commentary() {
1184 let tokens = lex_all(r#""""hello world""""#).unwrap();
1185 assert_eq!(tokens[0].kind, TokenKind::Commentary);
1186 assert_eq!(tokens[0].text, "hello world");
1187 }
1188
1189 #[test]
1190 fn lex_at_sign() {
1191 let kinds = lex_kinds("@user").unwrap();
1192 assert_eq!(kinds[0], TokenKind::At);
1193 assert_eq!(kinds[1], TokenKind::Identifier);
1194 }
1195
1196 #[test]
1197 fn lex_tilde() {
1198 let kinds = lex_kinds("~").unwrap();
1199 assert_eq!(kinds[0], TokenKind::Tilde);
1200 }
1201
1202 #[test]
1203 fn lex_brackets() {
1204 let kinds = lex_kinds("[number]").unwrap();
1205 assert_eq!(
1206 &kinds[..3],
1207 &[
1208 TokenKind::LBracket,
1209 TokenKind::NumberKw,
1210 TokenKind::RBracket
1211 ]
1212 );
1213 }
1214
1215 #[test]
1216 fn lex_parentheses() {
1217 let kinds = lex_kinds("(x + 1)").unwrap();
1218 assert_eq!(
1219 &kinds[..5],
1220 &[
1221 TokenKind::LParen,
1222 TokenKind::Identifier,
1223 TokenKind::Plus,
1224 TokenKind::NumberLit,
1225 TokenKind::RParen,
1226 ]
1227 );
1228 }
1229
1230 #[test]
1231 fn lex_dot_for_references() {
1232 let kinds = lex_kinds("employee.salary").unwrap();
1233 assert_eq!(
1234 &kinds[..3],
1235 &[TokenKind::Identifier, TokenKind::Dot, TokenKind::Identifier]
1236 );
1237 }
1238
1239 #[test]
1240 fn lex_spec_name_with_slashes() {
1241 let tokens = lex_all("spec contracts/employment/jack").unwrap();
1242 assert_eq!(tokens[0].kind, TokenKind::Spec);
1243 assert_eq!(tokens[1].kind, TokenKind::Identifier);
1246 }
1247
1248 #[test]
1249 fn lex_number_not_followed_by_e_identifier() {
1250 let tokens = lex_all("42 eur").unwrap();
1252 assert_eq!(tokens[0].kind, TokenKind::NumberLit);
1253 assert_eq!(tokens[0].text, "42");
1254 assert_eq!(tokens[1].kind, TokenKind::Identifier);
1255 assert_eq!(tokens[1].text, "eur");
1256 }
1257
1258 #[test]
1259 fn lex_unknown_character() {
1260 let result = lex_all("ยง");
1261 assert!(result.is_err());
1262 }
1263
1264 #[test]
1265 fn lex_peek_does_not_consume() {
1266 let mut lexer = Lexer::new("spec test", "test.lemma");
1267 let peeked_kind = lexer.peek().unwrap().kind.clone();
1268 assert_eq!(peeked_kind, TokenKind::Spec);
1269 let next = lexer.next_token().unwrap();
1270 assert_eq!(next.kind, TokenKind::Spec);
1271 }
1272
1273 #[test]
1274 fn lex_span_byte_offsets() {
1275 let tokens = lex_all("spec test").unwrap();
1276 assert_eq!(tokens[0].span.start, 0);
1277 assert_eq!(tokens[0].span.end, 4);
1278 assert_eq!(tokens[0].span.line, 1);
1279 assert_eq!(tokens[0].span.col, 1);
1280
1281 assert_eq!(tokens[1].span.start, 5);
1282 assert_eq!(tokens[1].span.end, 9);
1283 assert_eq!(tokens[1].span.line, 1);
1284 assert_eq!(tokens[1].span.col, 6);
1285 }
1286
1287 #[test]
1288 fn lex_multiline_span_tracking() {
1289 let tokens = lex_all("spec test\nfact x: 1").unwrap();
1290 let fact_token = &tokens[2]; assert_eq!(fact_token.kind, TokenKind::Fact);
1293 assert_eq!(fact_token.span.line, 2);
1294 assert_eq!(fact_token.span.col, 1);
1295 }
1296
1297 #[test]
1298 fn lex_case_insensitive_keywords() {
1299 let kinds = lex_kinds("SPEC Fact RULE").unwrap();
1301 assert_eq!(kinds[0], TokenKind::Spec);
1302 assert_eq!(kinds[1], TokenKind::Fact);
1303 assert_eq!(kinds[2], TokenKind::Rule);
1304 }
1305
1306 #[test]
1307 fn lex_math_function_keywords() {
1308 let kinds =
1309 lex_kinds("sqrt sin cos tan asin acos atan log exp abs floor ceil round").unwrap();
1310 assert_eq!(
1311 &kinds[..13],
1312 &[
1313 TokenKind::Sqrt,
1314 TokenKind::Sin,
1315 TokenKind::Cos,
1316 TokenKind::Tan,
1317 TokenKind::Asin,
1318 TokenKind::Acos,
1319 TokenKind::Atan,
1320 TokenKind::Log,
1321 TokenKind::Exp,
1322 TokenKind::Abs,
1323 TokenKind::Floor,
1324 TokenKind::Ceil,
1325 TokenKind::Round,
1326 ]
1327 );
1328 }
1329
1330 #[test]
1331 fn lex_is_keyword() {
1332 let kinds = lex_kinds("status is \"active\"").unwrap();
1333 assert_eq!(kinds[0], TokenKind::Identifier);
1334 assert_eq!(kinds[1], TokenKind::Is);
1335 assert_eq!(kinds[2], TokenKind::StringLit);
1336 }
1337
1338 #[test]
1339 fn lex_percent_not_followed_by_digit() {
1340 let kinds = lex_kinds("50%").unwrap();
1342 assert_eq!(kinds[0], TokenKind::NumberLit);
1343 assert_eq!(kinds[1], TokenKind::Percent);
1344 }
1345
1346 #[test]
1347 fn lex_number_with_commas() {
1348 let tokens = lex_all("1,000,000").unwrap();
1349 assert_eq!(tokens[0].kind, TokenKind::NumberLit);
1350 assert_eq!(tokens[0].text, "1,000,000");
1351 }
1352
1353 #[test]
1354 fn lex_arrow_chain() {
1355 let kinds = lex_kinds("-> unit eur 1.00 -> decimals 2").unwrap();
1356 assert_eq!(kinds[0], TokenKind::Arrow);
1357 assert_eq!(kinds[1], TokenKind::Identifier);
1358 assert_eq!(kinds[2], TokenKind::Identifier);
1359 assert_eq!(kinds[3], TokenKind::NumberLit);
1360 assert_eq!(kinds[4], TokenKind::Arrow);
1361 }
1362}