1use crate::error::Error;
2use crate::parsing::ast::{
3 BooleanValue, CalendarUnit, ConversionTarget, DurationUnit, PrimitiveKind, Span,
4};
5use crate::parsing::source::Source;
6use std::sync::Arc;
7
8#[derive(Debug, Clone, PartialEq, Eq)]
9pub enum TokenKind {
10 Spec,
12 Data,
13 Rule,
14 Unless,
15 Then,
16 Not,
17 And,
18 In,
19 Type,
20 From,
21 With,
22 Meta,
23 Veto,
24 Now,
25 Calendar,
26 Past,
27 Future,
28
29 True,
31 False,
32 Yes,
33 No,
34 Accept,
35 Reject,
36
37 ScaleKw,
39 NumberKw,
40 TextKw,
41 DateKw,
42 TimeKw,
43 DurationKw,
44 BooleanKw,
45 PercentKw,
46 RatioKw,
47
48 Sqrt,
50 Sin,
51 Cos,
52 Tan,
53 Asin,
54 Acos,
55 Atan,
56 Log,
57 Exp,
58 Abs,
59 Floor,
60 Ceil,
61 Round,
62
63 Years,
65 Year,
66 Months,
67 Month,
68 Weeks,
69 Week,
70 Days,
71 Day,
72 Hours,
73 Hour,
74 Minutes,
75 Minute,
76 Seconds,
77 Second,
78 Milliseconds,
79 Millisecond,
80 Microseconds,
81 Microsecond,
82 Permille,
83
84 Is,
86
87 Plus,
89 Minus,
90 Star,
91 Slash,
92 Comma,
93 Percent,
94 PercentPercent,
95 Caret,
96 Gt,
97 Lt,
98 Gte,
99 Lte,
100
101 Colon,
103 Arrow,
104 Dot,
105 At,
106 LParen,
107 RParen,
108
109 NumberLit,
111 StringLit,
112
113 Commentary,
115
116 Identifier,
118
119 Eof,
121}
122
123impl std::fmt::Display for TokenKind {
124 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
125 match self {
126 TokenKind::Spec => write!(f, "'spec'"),
127 TokenKind::Data => write!(f, "'data'"),
128 TokenKind::Rule => write!(f, "'rule'"),
129 TokenKind::Unless => write!(f, "'unless'"),
130 TokenKind::Then => write!(f, "'then'"),
131 TokenKind::Not => write!(f, "'not'"),
132 TokenKind::And => write!(f, "'and'"),
133 TokenKind::In => write!(f, "'in'"),
134 TokenKind::Type => write!(f, "'type'"),
135 TokenKind::From => write!(f, "'from'"),
136 TokenKind::With => write!(f, "'with'"),
137 TokenKind::Meta => write!(f, "'meta'"),
138 TokenKind::Veto => write!(f, "'veto'"),
139 TokenKind::Now => write!(f, "'now'"),
140 TokenKind::Calendar => write!(f, "'calendar'"),
141 TokenKind::Past => write!(f, "'past'"),
142 TokenKind::Future => write!(f, "'future'"),
143 TokenKind::True => write!(f, "'true'"),
144 TokenKind::False => write!(f, "'false'"),
145 TokenKind::Yes => write!(f, "'yes'"),
146 TokenKind::No => write!(f, "'no'"),
147 TokenKind::Accept => write!(f, "'accept'"),
148 TokenKind::Reject => write!(f, "'reject'"),
149 TokenKind::ScaleKw => write!(f, "'scale'"),
150 TokenKind::NumberKw => write!(f, "'number'"),
151 TokenKind::TextKw => write!(f, "'text'"),
152 TokenKind::DateKw => write!(f, "'date'"),
153 TokenKind::TimeKw => write!(f, "'time'"),
154 TokenKind::DurationKw => write!(f, "'duration'"),
155 TokenKind::BooleanKw => write!(f, "'boolean'"),
156 TokenKind::PercentKw => write!(f, "'percent'"),
157 TokenKind::RatioKw => write!(f, "'ratio'"),
158 TokenKind::Sqrt => write!(f, "'sqrt'"),
159 TokenKind::Sin => write!(f, "'sin'"),
160 TokenKind::Cos => write!(f, "'cos'"),
161 TokenKind::Tan => write!(f, "'tan'"),
162 TokenKind::Asin => write!(f, "'asin'"),
163 TokenKind::Acos => write!(f, "'acos'"),
164 TokenKind::Atan => write!(f, "'atan'"),
165 TokenKind::Log => write!(f, "'log'"),
166 TokenKind::Exp => write!(f, "'exp'"),
167 TokenKind::Abs => write!(f, "'abs'"),
168 TokenKind::Floor => write!(f, "'floor'"),
169 TokenKind::Ceil => write!(f, "'ceil'"),
170 TokenKind::Round => write!(f, "'round'"),
171 TokenKind::Years => write!(f, "'years'"),
172 TokenKind::Year => write!(f, "'year'"),
173 TokenKind::Months => write!(f, "'months'"),
174 TokenKind::Month => write!(f, "'month'"),
175 TokenKind::Weeks => write!(f, "'weeks'"),
176 TokenKind::Week => write!(f, "'week'"),
177 TokenKind::Days => write!(f, "'days'"),
178 TokenKind::Day => write!(f, "'day'"),
179 TokenKind::Hours => write!(f, "'hours'"),
180 TokenKind::Hour => write!(f, "'hour'"),
181 TokenKind::Minutes => write!(f, "'minutes'"),
182 TokenKind::Minute => write!(f, "'minute'"),
183 TokenKind::Seconds => write!(f, "'seconds'"),
184 TokenKind::Second => write!(f, "'second'"),
185 TokenKind::Milliseconds => write!(f, "'milliseconds'"),
186 TokenKind::Millisecond => write!(f, "'millisecond'"),
187 TokenKind::Microseconds => write!(f, "'microseconds'"),
188 TokenKind::Microsecond => write!(f, "'microsecond'"),
189 TokenKind::Permille => write!(f, "'permille'"),
190 TokenKind::Is => write!(f, "'is'"),
191 TokenKind::Plus => write!(f, "'+'"),
192 TokenKind::Minus => write!(f, "'-'"),
193 TokenKind::Star => write!(f, "'*'"),
194 TokenKind::Slash => write!(f, "'/'"),
195 TokenKind::Comma => write!(f, "','"),
196 TokenKind::Percent => write!(f, "'%'"),
197 TokenKind::PercentPercent => write!(f, "'%%'"),
198 TokenKind::Caret => write!(f, "'^'"),
199 TokenKind::Gt => write!(f, "'>'"),
200 TokenKind::Lt => write!(f, "'<'"),
201 TokenKind::Gte => write!(f, "'>='"),
202 TokenKind::Lte => write!(f, "'<='"),
203 TokenKind::Colon => write!(f, "':'"),
204 TokenKind::Arrow => write!(f, "'->'"),
205 TokenKind::Dot => write!(f, "'.'"),
206 TokenKind::At => write!(f, "'@'"),
207 TokenKind::LParen => write!(f, "'('"),
208 TokenKind::RParen => write!(f, "')'"),
209 TokenKind::NumberLit => write!(f, "a number"),
210 TokenKind::StringLit => write!(f, "a string"),
211 TokenKind::Commentary => write!(f, "commentary block"),
212 TokenKind::Identifier => write!(f, "an identifier"),
213 TokenKind::Eof => write!(f, "end of file"),
214 }
215 }
216}
217
218#[derive(Debug, Clone)]
219pub struct Token {
220 pub kind: TokenKind,
221 pub span: Span,
222 pub text: String,
223}
224
225impl Token {
226 pub fn eof(offset: usize, line: usize, col: usize) -> Self {
227 Token {
228 kind: TokenKind::Eof,
229 span: Span {
230 start: offset,
231 end: offset,
232 line,
233 col,
234 },
235 text: String::new(),
236 }
237 }
238}
239
240pub struct Lexer {
241 source: Vec<char>,
242 pos: usize,
243 line: usize,
244 col: usize,
245 byte_offset: usize,
246 attribute: String,
247 source_text: Arc<str>,
248 peeked: Option<Token>,
249 peeked2: Option<Token>,
250}
251
252impl Lexer {
253 pub fn new(input: &str, attribute: &str) -> Self {
254 let source_text: Arc<str> = Arc::from(input);
255 Lexer {
256 source: input.chars().collect(),
257 pos: 0,
258 line: 1,
259 col: 1,
260 byte_offset: 0,
261 attribute: attribute.to_string(),
262 source_text,
263 peeked: None,
264 peeked2: None,
265 }
266 }
267
268 pub fn source_text(&self) -> Arc<str> {
269 self.source_text.clone()
270 }
271
272 pub fn attribute(&self) -> &str {
273 &self.attribute
274 }
275
276 pub fn peek(&mut self) -> Result<&Token, Error> {
277 if self.peeked.is_none() {
278 let token = self.lex_token()?;
279 self.peeked = Some(token);
280 }
281 Ok(self.peeked.as_ref().expect("just assigned"))
282 }
283
284 pub fn peek_second(&mut self) -> Result<&Token, Error> {
285 self.peek()?;
286 if self.peeked2.is_none() {
287 let token = self.lex_token()?;
288 self.peeked2 = Some(token);
289 }
290 Ok(self.peeked2.as_ref().expect("just assigned"))
291 }
292
293 pub fn current_span(&self) -> Span {
295 Span {
296 start: self.byte_offset,
297 end: self.byte_offset,
298 line: self.line,
299 col: self.col,
300 }
301 }
302
303 pub fn next_token(&mut self) -> Result<Token, Error> {
304 if let Some(token) = self.peeked.take() {
305 self.peeked = self.peeked2.take();
306 return Ok(token);
307 }
308 self.lex_token()
309 }
310
311 fn current_char(&self) -> Option<char> {
312 self.source.get(self.pos).copied()
313 }
314
315 fn peek_char(&self) -> Option<char> {
316 self.source.get(self.pos + 1).copied()
317 }
318
319 fn peek_char_at(&self, offset: usize) -> Option<char> {
320 self.source.get(self.pos + offset).copied()
321 }
322
323 fn advance(&mut self) {
324 if let Some(ch) = self.current_char() {
325 self.byte_offset += ch.len_utf8();
326 if ch == '\n' {
327 self.line += 1;
328 self.col = 1;
329 } else {
330 self.col += 1;
331 }
332 self.pos += 1;
333 }
334 }
335
336 fn skip_whitespace(&mut self) {
337 while let Some(ch) = self.current_char() {
338 if ch.is_whitespace() {
339 self.advance();
340 } else {
341 break;
342 }
343 }
344 }
345
346 fn make_span(&self, start_byte: usize, start_line: usize, start_col: usize) -> Span {
347 Span {
348 start: start_byte,
349 end: self.byte_offset,
350 line: start_line,
351 col: start_col,
352 }
353 }
354
355 fn make_error(&self, message: impl Into<String>, span: Span) -> Error {
356 Error::parsing(message, Source::new(&self.attribute, span), None::<String>)
357 }
358
359 fn lex_token(&mut self) -> Result<Token, Error> {
360 self.skip_whitespace();
361
362 let start_byte = self.byte_offset;
363 let start_line = self.line;
364 let start_col = self.col;
365
366 let Some(ch) = self.current_char() else {
367 return Ok(Token::eof(start_byte, start_line, start_col));
368 };
369
370 if ch == '"' && self.peek_char() == Some('"') && self.peek_char_at(2) == Some('"') {
372 return self.scan_triple_quote(start_byte, start_line, start_col);
373 }
374
375 if ch == '"' {
377 return self.scan_string(start_byte, start_line, start_col);
378 }
379
380 if ch.is_ascii_digit() {
382 return self.scan_number(start_byte, start_line, start_col);
383 }
384
385 if let Some(token) = self.try_two_char_operator(start_byte, start_line, start_col) {
387 return Ok(token);
388 }
389
390 if let Some(kind) = self.single_char_token(ch) {
392 self.advance();
393 let span = self.make_span(start_byte, start_line, start_col);
394 let text = ch.to_string();
395 return Ok(Token { kind, span, text });
396 }
397
398 if ch.is_ascii_alphabetic() || ch == '_' {
400 return Ok(self.scan_identifier(start_byte, start_line, start_col));
401 }
402
403 if ch == '@' {
405 self.advance();
406 let span = self.make_span(start_byte, start_line, start_col);
407 return Ok(Token {
408 kind: TokenKind::At,
409 span,
410 text: "@".to_string(),
411 });
412 }
413
414 self.advance();
416 let span = self.make_span(start_byte, start_line, start_col);
417 Err(self.make_error(format!("Unexpected character '{}'", ch), span))
418 }
419
420 fn scan_triple_quote(
421 &mut self,
422 start_byte: usize,
423 start_line: usize,
424 start_col: usize,
425 ) -> Result<Token, Error> {
426 self.advance(); self.advance(); self.advance(); let content_start = self.byte_offset;
431 loop {
432 match self.current_char() {
433 None => {
434 let span = self.make_span(start_byte, start_line, start_col);
435 return Err(self.make_error(
436 "Unterminated commentary block: expected closing \"\"\"",
437 span,
438 ));
439 }
440 Some('"')
441 if self.source.get(self.pos + 1) == Some(&'"')
442 && self.source.get(self.pos + 2) == Some(&'"') =>
443 {
444 let content_end = self.byte_offset;
445 self.advance(); self.advance(); self.advance(); let raw: String = self.source_text[content_start..content_end].to_string();
449 let span = self.make_span(start_byte, start_line, start_col);
450 return Ok(Token {
451 kind: TokenKind::Commentary,
452 span,
453 text: raw,
454 });
455 }
456 Some(_) => {
457 self.advance();
458 }
459 }
460 }
461 }
462
463 fn scan_string(
464 &mut self,
465 start_byte: usize,
466 start_line: usize,
467 start_col: usize,
468 ) -> Result<Token, Error> {
469 self.advance(); let mut content = String::new();
471 loop {
472 match self.current_char() {
473 None => {
474 let span = self.make_span(start_byte, start_line, start_col);
475 return Err(self.make_error("String starting here was never closed", span));
476 }
477 Some('"') => {
478 self.advance(); break;
480 }
481 Some(ch) => {
482 content.push(ch);
483 self.advance();
484 }
485 }
486 }
487 let span = self.make_span(start_byte, start_line, start_col);
488 let full_text = format!("\"{}\"", content);
491 Ok(Token {
492 kind: TokenKind::StringLit,
493 span,
494 text: full_text,
495 })
496 }
497
498 fn scan_number(
499 &mut self,
500 start_byte: usize,
501 start_line: usize,
502 start_col: usize,
503 ) -> Result<Token, Error> {
504 let mut text = String::new();
505
506 while let Some(ch) = self.current_char() {
508 if ch.is_ascii_digit() || ch == '_' || ch == ',' {
509 text.push(ch);
510 self.advance();
511 } else {
512 break;
513 }
514 }
515
516 if self.current_char() == Some('.') {
518 if let Some(next) = self.peek_char() {
520 if next.is_ascii_digit() {
521 text.push('.');
522 self.advance(); while let Some(ch) = self.current_char() {
524 if ch.is_ascii_digit() {
525 text.push(ch);
526 self.advance();
527 } else {
528 break;
529 }
530 }
531 }
532 }
533 }
534
535 if let Some(ch) = self.current_char() {
537 if ch == 'e' || ch == 'E' {
538 let mut sci_text = String::new();
539 sci_text.push(ch);
540 let save_pos = self.pos;
541 let save_byte = self.byte_offset;
542 let save_line = self.line;
543 let save_col = self.col;
544 self.advance(); if let Some(sign) = self.current_char() {
547 if sign == '+' || sign == '-' {
548 sci_text.push(sign);
549 self.advance();
550 }
551 }
552
553 if let Some(d) = self.current_char() {
554 if d.is_ascii_digit() {
555 while let Some(ch) = self.current_char() {
556 if ch.is_ascii_digit() {
557 sci_text.push(ch);
558 self.advance();
559 } else {
560 break;
561 }
562 }
563 text.push_str(&sci_text);
564 } else {
565 self.pos = save_pos;
567 self.byte_offset = save_byte;
568 self.line = save_line;
569 self.col = save_col;
570 }
571 } else {
572 self.pos = save_pos;
573 self.byte_offset = save_byte;
574 self.line = save_line;
575 self.col = save_col;
576 }
577 }
578 }
579
580 let span = self.make_span(start_byte, start_line, start_col);
581 Ok(Token {
582 kind: TokenKind::NumberLit,
583 span,
584 text,
585 })
586 }
587
588 fn try_two_char_operator(
589 &mut self,
590 start_byte: usize,
591 start_line: usize,
592 start_col: usize,
593 ) -> Option<Token> {
594 let ch = self.current_char()?;
595 let next = self.peek_char();
596
597 let kind = match (ch, next) {
598 ('-', Some('>')) => TokenKind::Arrow,
599 ('>', Some('=')) => TokenKind::Gte,
600 ('<', Some('=')) => TokenKind::Lte,
601 ('%', Some('%')) => {
602 TokenKind::PercentPercent
604 }
605 _ => return None,
606 };
607
608 self.advance();
609 self.advance();
610 let span = self.make_span(start_byte, start_line, start_col);
611 let text: String = self.source_text[span.start..span.end].to_string();
612 Some(Token { kind, span, text })
613 }
614
615 fn single_char_token(&self, ch: char) -> Option<TokenKind> {
616 match ch {
617 '+' => Some(TokenKind::Plus),
618 '*' => Some(TokenKind::Star),
619 '/' => Some(TokenKind::Slash),
620 ',' => Some(TokenKind::Comma),
621 '^' => Some(TokenKind::Caret),
622 ':' => Some(TokenKind::Colon),
623 '.' => Some(TokenKind::Dot),
624 '(' => Some(TokenKind::LParen),
625 ')' => Some(TokenKind::RParen),
626 '>' => Some(TokenKind::Gt),
627 '<' => Some(TokenKind::Lt),
628 '%' => Some(TokenKind::Percent),
629 '-' => Some(TokenKind::Minus),
630 _ => None,
631 }
632 }
633
634 fn scan_identifier(&mut self, start_byte: usize, start_line: usize, start_col: usize) -> Token {
635 let mut text = String::new();
636 while let Some(ch) = self.current_char() {
637 if ch.is_ascii_alphanumeric() || ch == '_' {
638 text.push(ch);
639 self.advance();
640 } else {
641 break;
642 }
643 }
644
645 let kind = keyword_from_identifier(&text);
646 let span = self.make_span(start_byte, start_line, start_col);
647 Token { kind, span, text }
648 }
649}
650
651fn keyword_from_identifier(text: &str) -> TokenKind {
652 match text.to_lowercase().as_str() {
653 "spec" => TokenKind::Spec,
654 "data" => TokenKind::Data,
655 "rule" => TokenKind::Rule,
656 "unless" => TokenKind::Unless,
657 "then" => TokenKind::Then,
658 "not" => TokenKind::Not,
659 "and" => TokenKind::And,
660 "in" => TokenKind::In,
661 "type" => TokenKind::Type,
662 "from" => TokenKind::From,
663 "with" => TokenKind::With,
664 "meta" => TokenKind::Meta,
665 "veto" => TokenKind::Veto,
666 "now" => TokenKind::Now,
667 "calendar" => TokenKind::Calendar,
668 "past" => TokenKind::Past,
669 "future" => TokenKind::Future,
670 "true" => TokenKind::True,
671 "false" => TokenKind::False,
672 "yes" => TokenKind::Yes,
673 "no" => TokenKind::No,
674 "accept" => TokenKind::Accept,
675 "reject" => TokenKind::Reject,
676 "scale" => TokenKind::ScaleKw,
677 "number" => TokenKind::NumberKw,
678 "text" => TokenKind::TextKw,
679 "date" => TokenKind::DateKw,
680 "time" => TokenKind::TimeKw,
681 "duration" => TokenKind::DurationKw,
682 "boolean" => TokenKind::BooleanKw,
683 "percent" => TokenKind::PercentKw,
684 "ratio" => TokenKind::RatioKw,
685 "sqrt" => TokenKind::Sqrt,
686 "sin" => TokenKind::Sin,
687 "cos" => TokenKind::Cos,
688 "tan" => TokenKind::Tan,
689 "asin" => TokenKind::Asin,
690 "acos" => TokenKind::Acos,
691 "atan" => TokenKind::Atan,
692 "log" => TokenKind::Log,
693 "exp" => TokenKind::Exp,
694 "abs" => TokenKind::Abs,
695 "floor" => TokenKind::Floor,
696 "ceil" => TokenKind::Ceil,
697 "round" => TokenKind::Round,
698 "is" => TokenKind::Is,
699 "years" => TokenKind::Years,
700 "year" => TokenKind::Year,
701 "months" => TokenKind::Months,
702 "month" => TokenKind::Month,
703 "weeks" => TokenKind::Weeks,
704 "week" => TokenKind::Week,
705 "days" => TokenKind::Days,
706 "day" => TokenKind::Day,
707 "hours" => TokenKind::Hours,
708 "hour" => TokenKind::Hour,
709 "minutes" => TokenKind::Minutes,
710 "minute" => TokenKind::Minute,
711 "seconds" => TokenKind::Seconds,
712 "second" => TokenKind::Second,
713 "milliseconds" => TokenKind::Milliseconds,
714 "millisecond" => TokenKind::Millisecond,
715 "microseconds" => TokenKind::Microseconds,
716 "microsecond" => TokenKind::Microsecond,
717 "permille" => TokenKind::Permille,
718 _ => TokenKind::Identifier,
719 }
720}
721
722pub fn is_structural_keyword(kind: &TokenKind) -> bool {
727 matches!(
728 kind,
729 TokenKind::Spec
730 | TokenKind::Data
731 | TokenKind::Rule
732 | TokenKind::Unless
733 | TokenKind::Then
734 | TokenKind::Not
735 | TokenKind::And
736 | TokenKind::In
737 | TokenKind::Type
738 | TokenKind::From
739 | TokenKind::With
740 | TokenKind::Meta
741 | TokenKind::Veto
742 | TokenKind::Now
743 | TokenKind::Sqrt
744 | TokenKind::Sin
745 | TokenKind::Cos
746 | TokenKind::Tan
747 | TokenKind::Asin
748 | TokenKind::Acos
749 | TokenKind::Atan
750 | TokenKind::Log
751 | TokenKind::Exp
752 | TokenKind::Abs
753 | TokenKind::Floor
754 | TokenKind::Ceil
755 | TokenKind::Round
756 | TokenKind::True
757 | TokenKind::False
758 | TokenKind::Yes
759 | TokenKind::No
760 | TokenKind::Accept
761 | TokenKind::Reject
762 )
763}
764
765pub fn is_type_keyword(kind: &TokenKind) -> bool {
768 token_kind_to_primitive(kind).is_some()
769}
770
771#[must_use]
773pub fn token_kind_to_primitive(kind: &TokenKind) -> Option<PrimitiveKind> {
774 match kind {
775 TokenKind::BooleanKw => Some(PrimitiveKind::Boolean),
776 TokenKind::ScaleKw => Some(PrimitiveKind::Scale),
777 TokenKind::NumberKw => Some(PrimitiveKind::Number),
778 TokenKind::PercentKw => Some(PrimitiveKind::Percent),
779 TokenKind::RatioKw => Some(PrimitiveKind::Ratio),
780 TokenKind::TextKw => Some(PrimitiveKind::Text),
781 TokenKind::DateKw => Some(PrimitiveKind::Date),
782 TokenKind::TimeKw => Some(PrimitiveKind::Time),
783 TokenKind::DurationKw => Some(PrimitiveKind::Duration),
784 _ => None,
785 }
786}
787
788pub fn is_boolean_keyword(kind: &TokenKind) -> bool {
790 matches!(
791 kind,
792 TokenKind::True
793 | TokenKind::False
794 | TokenKind::Yes
795 | TokenKind::No
796 | TokenKind::Accept
797 | TokenKind::Reject
798 )
799}
800
801pub fn is_duration_unit(kind: &TokenKind) -> bool {
803 matches!(
804 kind,
805 TokenKind::Years
806 | TokenKind::Year
807 | TokenKind::Months
808 | TokenKind::Month
809 | TokenKind::Weeks
810 | TokenKind::Week
811 | TokenKind::Days
812 | TokenKind::Day
813 | TokenKind::Hours
814 | TokenKind::Hour
815 | TokenKind::Minutes
816 | TokenKind::Minute
817 | TokenKind::Seconds
818 | TokenKind::Second
819 | TokenKind::Milliseconds
820 | TokenKind::Millisecond
821 | TokenKind::Microseconds
822 | TokenKind::Microsecond
823 | TokenKind::PercentKw
824 )
825}
826
827#[must_use]
829pub fn token_kind_to_duration_unit(kind: &TokenKind) -> DurationUnit {
830 match kind {
831 TokenKind::Years | TokenKind::Year => DurationUnit::Year,
832 TokenKind::Months | TokenKind::Month => DurationUnit::Month,
833 TokenKind::Weeks | TokenKind::Week => DurationUnit::Week,
834 TokenKind::Days | TokenKind::Day => DurationUnit::Day,
835 TokenKind::Hours | TokenKind::Hour => DurationUnit::Hour,
836 TokenKind::Minutes | TokenKind::Minute => DurationUnit::Minute,
837 TokenKind::Seconds | TokenKind::Second => DurationUnit::Second,
838 TokenKind::Milliseconds | TokenKind::Millisecond => DurationUnit::Millisecond,
839 TokenKind::Microseconds | TokenKind::Microsecond => DurationUnit::Microsecond,
840 _ => unreachable!(
841 "BUG: token_kind_to_duration_unit called with non-duration token {:?}",
842 kind
843 ),
844 }
845}
846
847#[must_use]
850pub fn conversion_target_from_token(kind: &TokenKind, fallback_text: &str) -> ConversionTarget {
851 if is_duration_unit(kind) && *kind != TokenKind::PercentKw {
852 ConversionTarget::Duration(token_kind_to_duration_unit(kind))
853 } else {
854 ConversionTarget::Unit(fallback_text.to_lowercase())
855 }
856}
857
858#[must_use]
860pub fn is_calendar_unit_token(kind: &TokenKind) -> bool {
861 matches!(
862 kind,
863 TokenKind::Years
864 | TokenKind::Year
865 | TokenKind::Months
866 | TokenKind::Month
867 | TokenKind::Weeks
868 | TokenKind::Week
869 )
870}
871
872#[must_use]
874pub fn token_kind_to_calendar_unit(kind: &TokenKind) -> CalendarUnit {
875 match kind {
876 TokenKind::Years | TokenKind::Year => CalendarUnit::Year,
877 TokenKind::Months | TokenKind::Month => CalendarUnit::Month,
878 TokenKind::Weeks | TokenKind::Week => CalendarUnit::Week,
879 _ => unreachable!(
880 "BUG: token_kind_to_calendar_unit called with non-calendar token {:?}",
881 kind
882 ),
883 }
884}
885
886#[must_use]
888pub fn token_kind_to_boolean_value(kind: &TokenKind) -> BooleanValue {
889 match kind {
890 TokenKind::True => BooleanValue::True,
891 TokenKind::False => BooleanValue::False,
892 TokenKind::Yes => BooleanValue::Yes,
893 TokenKind::No => BooleanValue::No,
894 TokenKind::Accept => BooleanValue::Accept,
895 TokenKind::Reject => BooleanValue::Reject,
896 _ => unreachable!(
897 "BUG: token_kind_to_boolean_value called with non-boolean token {:?}",
898 kind
899 ),
900 }
901}
902
903pub fn is_math_function(kind: &TokenKind) -> bool {
905 matches!(
906 kind,
907 TokenKind::Sqrt
908 | TokenKind::Sin
909 | TokenKind::Cos
910 | TokenKind::Tan
911 | TokenKind::Asin
912 | TokenKind::Acos
913 | TokenKind::Atan
914 | TokenKind::Log
915 | TokenKind::Exp
916 | TokenKind::Abs
917 | TokenKind::Floor
918 | TokenKind::Ceil
919 | TokenKind::Round
920 )
921}
922
923pub fn is_spec_body_keyword(kind: &TokenKind) -> bool {
926 matches!(
927 kind,
928 TokenKind::Data | TokenKind::Rule | TokenKind::Type | TokenKind::Meta
929 )
930}
931
932pub fn can_be_label(kind: &TokenKind) -> bool {
937 matches!(
938 kind,
939 TokenKind::Identifier
940 | TokenKind::Calendar
941 | TokenKind::Past
942 | TokenKind::Future
943 | TokenKind::Years
944 | TokenKind::Year
945 | TokenKind::Months
946 | TokenKind::Month
947 | TokenKind::Weeks
948 | TokenKind::Week
949 | TokenKind::Days
950 | TokenKind::Day
951 | TokenKind::Hours
952 | TokenKind::Hour
953 | TokenKind::Minutes
954 | TokenKind::Minute
955 | TokenKind::Seconds
956 | TokenKind::Second
957 | TokenKind::Milliseconds
958 | TokenKind::Millisecond
959 | TokenKind::Microseconds
960 | TokenKind::Microsecond
961 | TokenKind::Permille
962 | TokenKind::Is
963 )
964}
965
966pub fn can_be_reference_segment(kind: &TokenKind) -> bool {
969 can_be_label(kind) || is_type_keyword(kind)
970}
971
972#[cfg(test)]
973mod tests {
974 use super::*;
975
976 fn lex_all(input: &str) -> Result<Vec<Token>, Error> {
977 let mut lexer = Lexer::new(input, "test.lemma");
978 let mut tokens = Vec::new();
979 loop {
980 let token = lexer.next_token()?;
981 if token.kind == TokenKind::Eof {
982 tokens.push(token);
983 break;
984 }
985 tokens.push(token);
986 }
987 Ok(tokens)
988 }
989
990 fn lex_kinds(input: &str) -> Result<Vec<TokenKind>, Error> {
991 Ok(lex_all(input)?.into_iter().map(|t| t.kind).collect())
992 }
993
994 #[test]
995 fn lex_empty_input() {
996 let tokens = lex_all("").unwrap();
997 assert_eq!(tokens.len(), 1);
998 assert_eq!(tokens[0].kind, TokenKind::Eof);
999 }
1000
1001 #[test]
1002 fn lex_spec_declaration() {
1003 let kinds = lex_kinds("spec person").unwrap();
1004 assert_eq!(
1005 kinds,
1006 vec![TokenKind::Spec, TokenKind::Identifier, TokenKind::Eof]
1007 );
1008 }
1009
1010 #[test]
1011 fn lex_data_definition() {
1012 let kinds = lex_kinds("data age: 25").unwrap();
1013 assert_eq!(
1014 kinds,
1015 vec![
1016 TokenKind::Data,
1017 TokenKind::Identifier,
1018 TokenKind::Colon,
1019 TokenKind::NumberLit,
1020 TokenKind::Eof,
1021 ]
1022 );
1023 }
1024
1025 #[test]
1026 fn lex_rule_with_comparison() {
1027 let kinds = lex_kinds("rule is_adult: age >= 18").unwrap();
1028 assert_eq!(
1029 kinds,
1030 vec![
1031 TokenKind::Rule,
1032 TokenKind::Identifier,
1033 TokenKind::Colon,
1034 TokenKind::Identifier,
1035 TokenKind::Gte,
1036 TokenKind::NumberLit,
1037 TokenKind::Eof,
1038 ]
1039 );
1040 }
1041
1042 #[test]
1043 fn lex_string_literal() {
1044 let tokens = lex_all(r#""hello world""#).unwrap();
1045 assert_eq!(tokens[0].kind, TokenKind::StringLit);
1046 assert_eq!(tokens[0].text, "\"hello world\"");
1047 }
1048
1049 #[test]
1050 fn lex_unterminated_string() {
1051 let result = lex_all(r#""hello"#);
1052 assert!(result.is_err());
1053 }
1054
1055 #[test]
1056 fn lex_number_with_decimal() {
1057 let tokens = lex_all("3.14").unwrap();
1058 assert_eq!(tokens[0].kind, TokenKind::NumberLit);
1059 assert_eq!(tokens[0].text, "3.14");
1060 }
1061
1062 #[test]
1063 fn lex_number_with_underscores() {
1064 let tokens = lex_all("1_000_000").unwrap();
1065 assert_eq!(tokens[0].kind, TokenKind::NumberLit);
1066 assert_eq!(tokens[0].text, "1_000_000");
1067 }
1068
1069 #[test]
1070 fn lex_scientific_notation() {
1071 let tokens = lex_all("1.5e+10").unwrap();
1072 assert_eq!(tokens[0].kind, TokenKind::NumberLit);
1073 assert_eq!(tokens[0].text, "1.5e+10");
1074 }
1075
1076 #[test]
1077 fn lex_all_operators() {
1078 let kinds = lex_kinds("+ - * / % ^ > < >= <= -> %%").unwrap();
1079 assert_eq!(
1080 &kinds[..12],
1081 &[
1082 TokenKind::Plus,
1083 TokenKind::Minus,
1084 TokenKind::Star,
1085 TokenKind::Slash,
1086 TokenKind::Percent,
1087 TokenKind::Caret,
1088 TokenKind::Gt,
1089 TokenKind::Lt,
1090 TokenKind::Gte,
1091 TokenKind::Lte,
1092 TokenKind::Arrow,
1093 TokenKind::PercentPercent,
1094 ]
1095 );
1096 }
1097
1098 #[test]
1099 fn lex_keywords() {
1100 let kinds = lex_kinds("spec data rule unless then not and in type from with meta veto now")
1101 .unwrap();
1102 assert_eq!(
1103 &kinds[..14],
1104 &[
1105 TokenKind::Spec,
1106 TokenKind::Data,
1107 TokenKind::Rule,
1108 TokenKind::Unless,
1109 TokenKind::Then,
1110 TokenKind::Not,
1111 TokenKind::And,
1112 TokenKind::In,
1113 TokenKind::Type,
1114 TokenKind::From,
1115 TokenKind::With,
1116 TokenKind::Meta,
1117 TokenKind::Veto,
1118 TokenKind::Now,
1119 ]
1120 );
1121 }
1122
1123 #[test]
1124 fn lex_boolean_keywords() {
1125 let kinds = lex_kinds("true false yes no accept reject").unwrap();
1126 assert_eq!(
1127 &kinds[..6],
1128 &[
1129 TokenKind::True,
1130 TokenKind::False,
1131 TokenKind::Yes,
1132 TokenKind::No,
1133 TokenKind::Accept,
1134 TokenKind::Reject,
1135 ]
1136 );
1137 }
1138
1139 #[test]
1140 fn lex_duration_keywords() {
1141 let kinds = lex_kinds("years months weeks days hours minutes seconds").unwrap();
1142 assert_eq!(
1143 &kinds[..7],
1144 &[
1145 TokenKind::Years,
1146 TokenKind::Months,
1147 TokenKind::Weeks,
1148 TokenKind::Days,
1149 TokenKind::Hours,
1150 TokenKind::Minutes,
1151 TokenKind::Seconds,
1152 ]
1153 );
1154 }
1155
1156 #[test]
1157 fn lex_commentary() {
1158 let tokens = lex_all(r#""""hello world""""#).unwrap();
1159 assert_eq!(tokens[0].kind, TokenKind::Commentary);
1160 assert_eq!(tokens[0].text, "hello world");
1161 }
1162
1163 #[test]
1164 fn lex_at_sign() {
1165 let kinds = lex_kinds("@user").unwrap();
1166 assert_eq!(kinds[0], TokenKind::At);
1167 assert_eq!(kinds[1], TokenKind::Identifier);
1168 }
1169
1170 #[test]
1171 fn lex_parentheses() {
1172 let kinds = lex_kinds("(x + 1)").unwrap();
1173 assert_eq!(
1174 &kinds[..5],
1175 &[
1176 TokenKind::LParen,
1177 TokenKind::Identifier,
1178 TokenKind::Plus,
1179 TokenKind::NumberLit,
1180 TokenKind::RParen,
1181 ]
1182 );
1183 }
1184
1185 #[test]
1186 fn lex_dot_for_references() {
1187 let kinds = lex_kinds("employee.salary").unwrap();
1188 assert_eq!(
1189 &kinds[..3],
1190 &[TokenKind::Identifier, TokenKind::Dot, TokenKind::Identifier]
1191 );
1192 }
1193
1194 #[test]
1195 fn lex_spec_name_with_slashes() {
1196 let tokens = lex_all("spec contracts/employment/jack").unwrap();
1197 assert_eq!(tokens[0].kind, TokenKind::Spec);
1198 assert_eq!(tokens[1].kind, TokenKind::Identifier);
1201 }
1202
1203 #[test]
1204 fn lex_number_not_followed_by_e_identifier() {
1205 let tokens = lex_all("42 eur").unwrap();
1207 assert_eq!(tokens[0].kind, TokenKind::NumberLit);
1208 assert_eq!(tokens[0].text, "42");
1209 assert_eq!(tokens[1].kind, TokenKind::Identifier);
1210 assert_eq!(tokens[1].text, "eur");
1211 }
1212
1213 #[test]
1214 fn lex_unknown_character() {
1215 let result = lex_all("ยง");
1216 assert!(result.is_err());
1217 }
1218
1219 #[test]
1220 fn lex_peek_does_not_consume() {
1221 let mut lexer = Lexer::new("spec test", "test.lemma");
1222 let peeked_kind = lexer.peek().unwrap().kind.clone();
1223 assert_eq!(peeked_kind, TokenKind::Spec);
1224 let next = lexer.next_token().unwrap();
1225 assert_eq!(next.kind, TokenKind::Spec);
1226 }
1227
1228 #[test]
1229 fn lex_span_byte_offsets() {
1230 let tokens = lex_all("spec test").unwrap();
1231 assert_eq!(tokens[0].span.start, 0);
1232 assert_eq!(tokens[0].span.end, 4);
1233 assert_eq!(tokens[0].span.line, 1);
1234 assert_eq!(tokens[0].span.col, 1);
1235
1236 assert_eq!(tokens[1].span.start, 5);
1237 assert_eq!(tokens[1].span.end, 9);
1238 assert_eq!(tokens[1].span.line, 1);
1239 assert_eq!(tokens[1].span.col, 6);
1240 }
1241
1242 #[test]
1243 fn lex_multiline_span_tracking() {
1244 let tokens = lex_all("spec test\ndata x: 1").unwrap();
1245 let data_token = &tokens[2]; assert_eq!(data_token.kind, TokenKind::Data);
1248 assert_eq!(data_token.span.line, 2);
1249 assert_eq!(data_token.span.col, 1);
1250 }
1251
1252 #[test]
1253 fn lex_case_insensitive_keywords() {
1254 let kinds = lex_kinds("SPEC Data RULE").unwrap();
1256 assert_eq!(kinds[0], TokenKind::Spec);
1257 assert_eq!(kinds[1], TokenKind::Data);
1258 assert_eq!(kinds[2], TokenKind::Rule);
1259 }
1260
1261 #[test]
1262 fn lex_math_function_keywords() {
1263 let kinds =
1264 lex_kinds("sqrt sin cos tan asin acos atan log exp abs floor ceil round").unwrap();
1265 assert_eq!(
1266 &kinds[..13],
1267 &[
1268 TokenKind::Sqrt,
1269 TokenKind::Sin,
1270 TokenKind::Cos,
1271 TokenKind::Tan,
1272 TokenKind::Asin,
1273 TokenKind::Acos,
1274 TokenKind::Atan,
1275 TokenKind::Log,
1276 TokenKind::Exp,
1277 TokenKind::Abs,
1278 TokenKind::Floor,
1279 TokenKind::Ceil,
1280 TokenKind::Round,
1281 ]
1282 );
1283 }
1284
1285 #[test]
1286 fn lex_is_keyword() {
1287 let kinds = lex_kinds("status is \"active\"").unwrap();
1288 assert_eq!(kinds[0], TokenKind::Identifier);
1289 assert_eq!(kinds[1], TokenKind::Is);
1290 assert_eq!(kinds[2], TokenKind::StringLit);
1291 }
1292
1293 #[test]
1294 fn lex_percent_not_followed_by_digit() {
1295 let kinds = lex_kinds("50%").unwrap();
1297 assert_eq!(kinds[0], TokenKind::NumberLit);
1298 assert_eq!(kinds[1], TokenKind::Percent);
1299 }
1300
1301 #[test]
1302 fn lex_number_with_commas() {
1303 let tokens = lex_all("1,000,000").unwrap();
1304 assert_eq!(tokens[0].kind, TokenKind::NumberLit);
1305 assert_eq!(tokens[0].text, "1,000,000");
1306 }
1307
1308 #[test]
1309 fn lex_arrow_chain() {
1310 let kinds = lex_kinds("-> unit eur 1.00 -> decimals 2").unwrap();
1311 assert_eq!(kinds[0], TokenKind::Arrow);
1312 assert_eq!(kinds[1], TokenKind::Identifier);
1313 assert_eq!(kinds[2], TokenKind::Identifier);
1314 assert_eq!(kinds[3], TokenKind::NumberLit);
1315 assert_eq!(kinds[4], TokenKind::Arrow);
1316 }
1317}