1use crate::error::Error;
2use crate::parsing::ast::{
3 BooleanValue, CalendarUnit, ConversionTarget, DurationUnit, PrimitiveKind, Span,
4};
5use crate::parsing::source::Source;
6use std::sync::Arc;
7
8#[derive(Debug, Clone, PartialEq, Eq)]
9pub enum TokenKind {
10 Spec,
12 Repo,
13 Data,
14 Rule,
15 Unless,
16 Then,
17 Not,
18 And,
19 In,
20 Type,
21 From,
22 Uses,
23 Meta,
24 Veto,
25 Now,
26 Calendar,
27 Past,
28 Future,
29
30 True,
32 False,
33 Yes,
34 No,
35 Accept,
36 Reject,
37
38 ScaleKw,
40 NumberKw,
41 TextKw,
42 DateKw,
43 TimeKw,
44 DurationKw,
45 BooleanKw,
46 PercentKw,
47 RatioKw,
48
49 Sqrt,
51 Sin,
52 Cos,
53 Tan,
54 Asin,
55 Acos,
56 Atan,
57 Log,
58 Exp,
59 Abs,
60 Floor,
61 Ceil,
62 Round,
63
64 Years,
66 Year,
67 Months,
68 Month,
69 Weeks,
70 Week,
71 Days,
72 Day,
73 Hours,
74 Hour,
75 Minutes,
76 Minute,
77 Seconds,
78 Second,
79 Milliseconds,
80 Millisecond,
81 Microseconds,
82 Microsecond,
83 Permille,
84
85 Is,
87
88 Plus,
90 Minus,
91 Star,
92 Slash,
93 Comma,
94 Percent,
95 PercentPercent,
96 Caret,
97 Gt,
98 Lt,
99 Gte,
100 Lte,
101
102 Colon,
104 Arrow,
105 Dot,
106 At,
107 LParen,
108 RParen,
109
110 NumberLit,
112 StringLit,
113
114 Commentary,
116
117 Identifier,
119
120 Eof,
122}
123
124impl std::fmt::Display for TokenKind {
125 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
126 match self {
127 TokenKind::Spec => write!(f, "'spec'"),
128 TokenKind::Repo => write!(f, "'repo'"),
129 TokenKind::Data => write!(f, "'data'"),
130 TokenKind::Rule => write!(f, "'rule'"),
131 TokenKind::Unless => write!(f, "'unless'"),
132 TokenKind::Then => write!(f, "'then'"),
133 TokenKind::Not => write!(f, "'not'"),
134 TokenKind::And => write!(f, "'and'"),
135 TokenKind::In => write!(f, "'in'"),
136 TokenKind::Type => write!(f, "'type'"),
137 TokenKind::From => write!(f, "'from'"),
138 TokenKind::Uses => write!(f, "'uses'"),
139 TokenKind::Meta => write!(f, "'meta'"),
140 TokenKind::Veto => write!(f, "'veto'"),
141 TokenKind::Now => write!(f, "'now'"),
142 TokenKind::Calendar => write!(f, "'calendar'"),
143 TokenKind::Past => write!(f, "'past'"),
144 TokenKind::Future => write!(f, "'future'"),
145 TokenKind::True => write!(f, "'true'"),
146 TokenKind::False => write!(f, "'false'"),
147 TokenKind::Yes => write!(f, "'yes'"),
148 TokenKind::No => write!(f, "'no'"),
149 TokenKind::Accept => write!(f, "'accept'"),
150 TokenKind::Reject => write!(f, "'reject'"),
151 TokenKind::ScaleKw => write!(f, "'scale'"),
152 TokenKind::NumberKw => write!(f, "'number'"),
153 TokenKind::TextKw => write!(f, "'text'"),
154 TokenKind::DateKw => write!(f, "'date'"),
155 TokenKind::TimeKw => write!(f, "'time'"),
156 TokenKind::DurationKw => write!(f, "'duration'"),
157 TokenKind::BooleanKw => write!(f, "'boolean'"),
158 TokenKind::PercentKw => write!(f, "'percent'"),
159 TokenKind::RatioKw => write!(f, "'ratio'"),
160 TokenKind::Sqrt => write!(f, "'sqrt'"),
161 TokenKind::Sin => write!(f, "'sin'"),
162 TokenKind::Cos => write!(f, "'cos'"),
163 TokenKind::Tan => write!(f, "'tan'"),
164 TokenKind::Asin => write!(f, "'asin'"),
165 TokenKind::Acos => write!(f, "'acos'"),
166 TokenKind::Atan => write!(f, "'atan'"),
167 TokenKind::Log => write!(f, "'log'"),
168 TokenKind::Exp => write!(f, "'exp'"),
169 TokenKind::Abs => write!(f, "'abs'"),
170 TokenKind::Floor => write!(f, "'floor'"),
171 TokenKind::Ceil => write!(f, "'ceil'"),
172 TokenKind::Round => write!(f, "'round'"),
173 TokenKind::Years => write!(f, "'years'"),
174 TokenKind::Year => write!(f, "'year'"),
175 TokenKind::Months => write!(f, "'months'"),
176 TokenKind::Month => write!(f, "'month'"),
177 TokenKind::Weeks => write!(f, "'weeks'"),
178 TokenKind::Week => write!(f, "'week'"),
179 TokenKind::Days => write!(f, "'days'"),
180 TokenKind::Day => write!(f, "'day'"),
181 TokenKind::Hours => write!(f, "'hours'"),
182 TokenKind::Hour => write!(f, "'hour'"),
183 TokenKind::Minutes => write!(f, "'minutes'"),
184 TokenKind::Minute => write!(f, "'minute'"),
185 TokenKind::Seconds => write!(f, "'seconds'"),
186 TokenKind::Second => write!(f, "'second'"),
187 TokenKind::Milliseconds => write!(f, "'milliseconds'"),
188 TokenKind::Millisecond => write!(f, "'millisecond'"),
189 TokenKind::Microseconds => write!(f, "'microseconds'"),
190 TokenKind::Microsecond => write!(f, "'microsecond'"),
191 TokenKind::Permille => write!(f, "'permille'"),
192 TokenKind::Is => write!(f, "'is'"),
193 TokenKind::Plus => write!(f, "'+'"),
194 TokenKind::Minus => write!(f, "'-'"),
195 TokenKind::Star => write!(f, "'*'"),
196 TokenKind::Slash => write!(f, "'/'"),
197 TokenKind::Comma => write!(f, "','"),
198 TokenKind::Percent => write!(f, "'%'"),
199 TokenKind::PercentPercent => write!(f, "'%%'"),
200 TokenKind::Caret => write!(f, "'^'"),
201 TokenKind::Gt => write!(f, "'>'"),
202 TokenKind::Lt => write!(f, "'<'"),
203 TokenKind::Gte => write!(f, "'>='"),
204 TokenKind::Lte => write!(f, "'<='"),
205 TokenKind::Colon => write!(f, "':'"),
206 TokenKind::Arrow => write!(f, "'->'"),
207 TokenKind::Dot => write!(f, "'.'"),
208 TokenKind::At => write!(f, "'@'"),
209 TokenKind::LParen => write!(f, "'('"),
210 TokenKind::RParen => write!(f, "')'"),
211 TokenKind::NumberLit => write!(f, "a number"),
212 TokenKind::StringLit => write!(f, "a string"),
213 TokenKind::Commentary => write!(f, "commentary block"),
214 TokenKind::Identifier => write!(f, "an identifier"),
215 TokenKind::Eof => write!(f, "end of file"),
216 }
217 }
218}
219
220#[derive(Debug, Clone)]
221pub struct Token {
222 pub kind: TokenKind,
223 pub span: Span,
224 pub text: String,
225}
226
227impl Token {
228 pub fn eof(offset: usize, line: usize, col: usize) -> Self {
229 Token {
230 kind: TokenKind::Eof,
231 span: Span {
232 start: offset,
233 end: offset,
234 line,
235 col,
236 },
237 text: String::new(),
238 }
239 }
240}
241
242#[derive(Clone)]
244pub struct Lexer {
245 source: Vec<char>,
246 pos: usize,
247 line: usize,
248 col: usize,
249 byte_offset: usize,
250 source_type: crate::parsing::source::SourceType,
251 source_text: Arc<str>,
252 peeked: Option<Token>,
253 peeked2: Option<Token>,
254}
255
256impl Lexer {
257 pub fn new(input: &str, source_type: &crate::parsing::source::SourceType) -> Self {
258 let source_text: Arc<str> = Arc::from(input);
259 Lexer {
260 source: input.chars().collect(),
261 pos: 0,
262 line: 1,
263 col: 1,
264 byte_offset: 0,
265 source_type: source_type.clone(),
266 source_text,
267 peeked: None,
268 peeked2: None,
269 }
270 }
271
272 pub fn source_text(&self) -> Arc<str> {
273 self.source_text.clone()
274 }
275
276 pub fn source_type(&self) -> crate::parsing::source::SourceType {
277 self.source_type.clone()
278 }
279
280 pub fn peek(&mut self) -> Result<&Token, Error> {
281 if self.peeked.is_none() {
282 let token = self.lex_token()?;
283 self.peeked = Some(token);
284 }
285 Ok(self.peeked.as_ref().expect("just assigned"))
286 }
287
288 pub fn peek_second(&mut self) -> Result<&Token, Error> {
289 self.peek()?;
290 if self.peeked2.is_none() {
291 let token = self.lex_token()?;
292 self.peeked2 = Some(token);
293 }
294 Ok(self.peeked2.as_ref().expect("just assigned"))
295 }
296
297 pub fn current_span(&self) -> Span {
299 Span {
300 start: self.byte_offset,
301 end: self.byte_offset,
302 line: self.line,
303 col: self.col,
304 }
305 }
306
307 pub fn next_token(&mut self) -> Result<Token, Error> {
308 if let Some(token) = self.peeked.take() {
309 self.peeked = self.peeked2.take();
310 return Ok(token);
311 }
312 self.lex_token()
313 }
314
315 fn current_char(&self) -> Option<char> {
316 self.source.get(self.pos).copied()
317 }
318
319 fn peek_char(&self) -> Option<char> {
320 self.source.get(self.pos + 1).copied()
321 }
322
323 fn peek_char_at(&self, offset: usize) -> Option<char> {
324 self.source.get(self.pos + offset).copied()
325 }
326
327 fn advance(&mut self) {
328 if let Some(ch) = self.current_char() {
329 self.byte_offset += ch.len_utf8();
330 if ch == '\n' {
331 self.line += 1;
332 self.col = 1;
333 } else {
334 self.col += 1;
335 }
336 self.pos += 1;
337 }
338 }
339
340 fn skip_whitespace(&mut self) {
341 while let Some(ch) = self.current_char() {
342 if ch.is_whitespace() {
343 self.advance();
344 } else {
345 break;
346 }
347 }
348 }
349
350 fn make_span(&self, start_byte: usize, start_line: usize, start_col: usize) -> Span {
351 Span {
352 start: start_byte,
353 end: self.byte_offset,
354 line: start_line,
355 col: start_col,
356 }
357 }
358
359 fn make_error(&self, message: impl Into<String>, span: Span) -> Error {
360 Error::parsing(
361 message,
362 Source::new(self.source_type.clone(), span),
363 None::<String>,
364 )
365 }
366
367 fn lex_token(&mut self) -> Result<Token, Error> {
368 self.skip_whitespace();
369
370 let start_byte = self.byte_offset;
371 let start_line = self.line;
372 let start_col = self.col;
373
374 let Some(ch) = self.current_char() else {
375 return Ok(Token::eof(start_byte, start_line, start_col));
376 };
377
378 if ch == '"' && self.peek_char() == Some('"') && self.peek_char_at(2) == Some('"') {
380 return self.scan_triple_quote(start_byte, start_line, start_col);
381 }
382
383 if ch == '"' {
385 return self.scan_string(start_byte, start_line, start_col);
386 }
387
388 if ch.is_ascii_digit() {
390 return self.scan_number(start_byte, start_line, start_col);
391 }
392
393 if let Some(token) = self.try_two_char_operator(start_byte, start_line, start_col) {
395 return Ok(token);
396 }
397
398 if let Some(kind) = self.single_char_token(ch) {
400 self.advance();
401 let span = self.make_span(start_byte, start_line, start_col);
402 let text = ch.to_string();
403 return Ok(Token { kind, span, text });
404 }
405
406 if ch.is_ascii_alphabetic() || ch == '_' {
408 return Ok(self.scan_identifier(start_byte, start_line, start_col));
409 }
410
411 if ch == '@' {
413 self.advance();
414 let span = self.make_span(start_byte, start_line, start_col);
415 return Ok(Token {
416 kind: TokenKind::At,
417 span,
418 text: "@".to_string(),
419 });
420 }
421
422 self.advance();
424 let span = self.make_span(start_byte, start_line, start_col);
425 Err(self.make_error(format!("Unexpected character '{}'", ch), span))
426 }
427
428 fn scan_triple_quote(
429 &mut self,
430 start_byte: usize,
431 start_line: usize,
432 start_col: usize,
433 ) -> Result<Token, Error> {
434 self.advance(); self.advance(); self.advance(); let content_start = self.byte_offset;
439 loop {
440 match self.current_char() {
441 None => {
442 let span = self.make_span(start_byte, start_line, start_col);
443 return Err(self.make_error(
444 "Unterminated commentary block: expected closing \"\"\"",
445 span,
446 ));
447 }
448 Some('"')
449 if self.source.get(self.pos + 1) == Some(&'"')
450 && self.source.get(self.pos + 2) == Some(&'"') =>
451 {
452 let content_end = self.byte_offset;
453 self.advance(); self.advance(); self.advance(); let raw: String = self.source_text[content_start..content_end].to_string();
457 let span = self.make_span(start_byte, start_line, start_col);
458 return Ok(Token {
459 kind: TokenKind::Commentary,
460 span,
461 text: raw,
462 });
463 }
464 Some(_) => {
465 self.advance();
466 }
467 }
468 }
469 }
470
471 fn scan_string(
472 &mut self,
473 start_byte: usize,
474 start_line: usize,
475 start_col: usize,
476 ) -> Result<Token, Error> {
477 self.advance(); let mut content = String::new();
479 loop {
480 match self.current_char() {
481 None => {
482 let span = self.make_span(start_byte, start_line, start_col);
483 return Err(self.make_error("String starting here was never closed", span));
484 }
485 Some('"') => {
486 self.advance(); break;
488 }
489 Some(ch) => {
490 content.push(ch);
491 self.advance();
492 }
493 }
494 }
495 let span = self.make_span(start_byte, start_line, start_col);
496 let full_text = format!("\"{}\"", content);
499 Ok(Token {
500 kind: TokenKind::StringLit,
501 span,
502 text: full_text,
503 })
504 }
505
506 fn scan_number(
507 &mut self,
508 start_byte: usize,
509 start_line: usize,
510 start_col: usize,
511 ) -> Result<Token, Error> {
512 let mut text = String::new();
513
514 while let Some(ch) = self.current_char() {
516 if ch.is_ascii_digit() || ch == '_' || ch == ',' {
517 text.push(ch);
518 self.advance();
519 } else {
520 break;
521 }
522 }
523
524 if self.current_char() == Some('.') {
526 if let Some(next) = self.peek_char() {
528 if next.is_ascii_digit() {
529 text.push('.');
530 self.advance(); while let Some(ch) = self.current_char() {
532 if ch.is_ascii_digit() {
533 text.push(ch);
534 self.advance();
535 } else {
536 break;
537 }
538 }
539 }
540 }
541 }
542
543 if let Some(ch) = self.current_char() {
545 if ch == 'e' || ch == 'E' {
546 let mut sci_text = String::new();
547 sci_text.push(ch);
548 let save_pos = self.pos;
549 let save_byte = self.byte_offset;
550 let save_line = self.line;
551 let save_col = self.col;
552 self.advance(); if let Some(sign) = self.current_char() {
555 if sign == '+' || sign == '-' {
556 sci_text.push(sign);
557 self.advance();
558 }
559 }
560
561 if let Some(d) = self.current_char() {
562 if d.is_ascii_digit() {
563 while let Some(ch) = self.current_char() {
564 if ch.is_ascii_digit() {
565 sci_text.push(ch);
566 self.advance();
567 } else {
568 break;
569 }
570 }
571 text.push_str(&sci_text);
572 } else {
573 self.pos = save_pos;
575 self.byte_offset = save_byte;
576 self.line = save_line;
577 self.col = save_col;
578 }
579 } else {
580 self.pos = save_pos;
581 self.byte_offset = save_byte;
582 self.line = save_line;
583 self.col = save_col;
584 }
585 }
586 }
587
588 let span = self.make_span(start_byte, start_line, start_col);
589 Ok(Token {
590 kind: TokenKind::NumberLit,
591 span,
592 text,
593 })
594 }
595
596 fn try_two_char_operator(
597 &mut self,
598 start_byte: usize,
599 start_line: usize,
600 start_col: usize,
601 ) -> Option<Token> {
602 let ch = self.current_char()?;
603 let next = self.peek_char();
604
605 let kind = match (ch, next) {
606 ('-', Some('>')) => TokenKind::Arrow,
607 ('>', Some('=')) => TokenKind::Gte,
608 ('<', Some('=')) => TokenKind::Lte,
609 ('%', Some('%')) => {
610 TokenKind::PercentPercent
612 }
613 _ => return None,
614 };
615
616 self.advance();
617 self.advance();
618 let span = self.make_span(start_byte, start_line, start_col);
619 let text: String = self.source_text[span.start..span.end].to_string();
620 Some(Token { kind, span, text })
621 }
622
623 fn single_char_token(&self, ch: char) -> Option<TokenKind> {
624 match ch {
625 '+' => Some(TokenKind::Plus),
626 '*' => Some(TokenKind::Star),
627 '/' => Some(TokenKind::Slash),
628 ',' => Some(TokenKind::Comma),
629 '^' => Some(TokenKind::Caret),
630 ':' => Some(TokenKind::Colon),
631 '.' => Some(TokenKind::Dot),
632 '(' => Some(TokenKind::LParen),
633 ')' => Some(TokenKind::RParen),
634 '>' => Some(TokenKind::Gt),
635 '<' => Some(TokenKind::Lt),
636 '%' => Some(TokenKind::Percent),
637 '-' => Some(TokenKind::Minus),
638 _ => None,
639 }
640 }
641
642 fn scan_identifier(&mut self, start_byte: usize, start_line: usize, start_col: usize) -> Token {
643 let mut text = String::new();
644 while let Some(ch) = self.current_char() {
645 if ch.is_ascii_alphanumeric() || ch == '_' {
646 text.push(ch);
647 self.advance();
648 } else {
649 break;
650 }
651 }
652
653 let kind = keyword_from_identifier(&text);
654 let span = self.make_span(start_byte, start_line, start_col);
655 Token { kind, span, text }
656 }
657}
658
659fn keyword_from_identifier(text: &str) -> TokenKind {
660 match text.to_lowercase().as_str() {
661 "spec" => TokenKind::Spec,
662 "repo" => TokenKind::Repo,
663 "data" => TokenKind::Data,
664 "rule" => TokenKind::Rule,
665 "unless" => TokenKind::Unless,
666 "then" => TokenKind::Then,
667 "not" => TokenKind::Not,
668 "and" => TokenKind::And,
669 "in" => TokenKind::In,
670 "type" => TokenKind::Type,
671 "from" => TokenKind::From,
672 "uses" => TokenKind::Uses,
673 "meta" => TokenKind::Meta,
674 "veto" => TokenKind::Veto,
675 "now" => TokenKind::Now,
676 "calendar" => TokenKind::Calendar,
677 "past" => TokenKind::Past,
678 "future" => TokenKind::Future,
679 "true" => TokenKind::True,
680 "false" => TokenKind::False,
681 "yes" => TokenKind::Yes,
682 "no" => TokenKind::No,
683 "accept" => TokenKind::Accept,
684 "reject" => TokenKind::Reject,
685 "scale" => TokenKind::ScaleKw,
686 "number" => TokenKind::NumberKw,
687 "text" => TokenKind::TextKw,
688 "date" => TokenKind::DateKw,
689 "time" => TokenKind::TimeKw,
690 "duration" => TokenKind::DurationKw,
691 "boolean" => TokenKind::BooleanKw,
692 "percent" => TokenKind::PercentKw,
693 "ratio" => TokenKind::RatioKw,
694 "sqrt" => TokenKind::Sqrt,
695 "sin" => TokenKind::Sin,
696 "cos" => TokenKind::Cos,
697 "tan" => TokenKind::Tan,
698 "asin" => TokenKind::Asin,
699 "acos" => TokenKind::Acos,
700 "atan" => TokenKind::Atan,
701 "log" => TokenKind::Log,
702 "exp" => TokenKind::Exp,
703 "abs" => TokenKind::Abs,
704 "floor" => TokenKind::Floor,
705 "ceil" => TokenKind::Ceil,
706 "round" => TokenKind::Round,
707 "is" => TokenKind::Is,
708 "years" => TokenKind::Years,
709 "year" => TokenKind::Year,
710 "months" => TokenKind::Months,
711 "month" => TokenKind::Month,
712 "weeks" => TokenKind::Weeks,
713 "week" => TokenKind::Week,
714 "days" => TokenKind::Days,
715 "day" => TokenKind::Day,
716 "hours" => TokenKind::Hours,
717 "hour" => TokenKind::Hour,
718 "minutes" => TokenKind::Minutes,
719 "minute" => TokenKind::Minute,
720 "seconds" => TokenKind::Seconds,
721 "second" => TokenKind::Second,
722 "milliseconds" => TokenKind::Milliseconds,
723 "millisecond" => TokenKind::Millisecond,
724 "microseconds" => TokenKind::Microseconds,
725 "microsecond" => TokenKind::Microsecond,
726 "permille" => TokenKind::Permille,
727 _ => TokenKind::Identifier,
728 }
729}
730
731pub fn is_structural_keyword(kind: &TokenKind) -> bool {
736 matches!(
737 kind,
738 TokenKind::Spec
739 | TokenKind::Repo
740 | TokenKind::Data
741 | TokenKind::Rule
742 | TokenKind::Unless
743 | TokenKind::Then
744 | TokenKind::Not
745 | TokenKind::And
746 | TokenKind::In
747 | TokenKind::Type
748 | TokenKind::From
749 | TokenKind::Uses
750 | TokenKind::Meta
751 | TokenKind::Veto
752 | TokenKind::Now
753 | TokenKind::Sqrt
754 | TokenKind::Sin
755 | TokenKind::Cos
756 | TokenKind::Tan
757 | TokenKind::Asin
758 | TokenKind::Acos
759 | TokenKind::Atan
760 | TokenKind::Log
761 | TokenKind::Exp
762 | TokenKind::Abs
763 | TokenKind::Floor
764 | TokenKind::Ceil
765 | TokenKind::Round
766 | TokenKind::True
767 | TokenKind::False
768 | TokenKind::Yes
769 | TokenKind::No
770 | TokenKind::Accept
771 | TokenKind::Reject
772 )
773}
774
775pub fn is_type_keyword(kind: &TokenKind) -> bool {
778 token_kind_to_primitive(kind).is_some()
779}
780
781#[must_use]
783pub fn token_kind_to_primitive(kind: &TokenKind) -> Option<PrimitiveKind> {
784 match kind {
785 TokenKind::BooleanKw => Some(PrimitiveKind::Boolean),
786 TokenKind::ScaleKw => Some(PrimitiveKind::Scale),
787 TokenKind::NumberKw => Some(PrimitiveKind::Number),
788 TokenKind::PercentKw => Some(PrimitiveKind::Percent),
789 TokenKind::RatioKw => Some(PrimitiveKind::Ratio),
790 TokenKind::TextKw => Some(PrimitiveKind::Text),
791 TokenKind::DateKw => Some(PrimitiveKind::Date),
792 TokenKind::TimeKw => Some(PrimitiveKind::Time),
793 TokenKind::DurationKw => Some(PrimitiveKind::Duration),
794 _ => None,
795 }
796}
797
798pub fn is_boolean_keyword(kind: &TokenKind) -> bool {
800 matches!(
801 kind,
802 TokenKind::True
803 | TokenKind::False
804 | TokenKind::Yes
805 | TokenKind::No
806 | TokenKind::Accept
807 | TokenKind::Reject
808 )
809}
810
811pub fn is_duration_unit(kind: &TokenKind) -> bool {
813 matches!(
814 kind,
815 TokenKind::Years
816 | TokenKind::Year
817 | TokenKind::Months
818 | TokenKind::Month
819 | TokenKind::Weeks
820 | TokenKind::Week
821 | TokenKind::Days
822 | TokenKind::Day
823 | TokenKind::Hours
824 | TokenKind::Hour
825 | TokenKind::Minutes
826 | TokenKind::Minute
827 | TokenKind::Seconds
828 | TokenKind::Second
829 | TokenKind::Milliseconds
830 | TokenKind::Millisecond
831 | TokenKind::Microseconds
832 | TokenKind::Microsecond
833 | TokenKind::PercentKw
834 )
835}
836
837#[must_use]
839pub fn token_kind_to_duration_unit(kind: &TokenKind) -> DurationUnit {
840 match kind {
841 TokenKind::Years | TokenKind::Year => DurationUnit::Year,
842 TokenKind::Months | TokenKind::Month => DurationUnit::Month,
843 TokenKind::Weeks | TokenKind::Week => DurationUnit::Week,
844 TokenKind::Days | TokenKind::Day => DurationUnit::Day,
845 TokenKind::Hours | TokenKind::Hour => DurationUnit::Hour,
846 TokenKind::Minutes | TokenKind::Minute => DurationUnit::Minute,
847 TokenKind::Seconds | TokenKind::Second => DurationUnit::Second,
848 TokenKind::Milliseconds | TokenKind::Millisecond => DurationUnit::Millisecond,
849 TokenKind::Microseconds | TokenKind::Microsecond => DurationUnit::Microsecond,
850 _ => unreachable!(
851 "BUG: token_kind_to_duration_unit called with non-duration token {:?}",
852 kind
853 ),
854 }
855}
856
857#[must_use]
860pub fn conversion_target_from_token(kind: &TokenKind, fallback_text: &str) -> ConversionTarget {
861 if is_duration_unit(kind) && *kind != TokenKind::PercentKw {
862 ConversionTarget::Duration(token_kind_to_duration_unit(kind))
863 } else {
864 ConversionTarget::Unit(fallback_text.to_lowercase())
865 }
866}
867
868#[must_use]
870pub fn is_calendar_unit_token(kind: &TokenKind) -> bool {
871 matches!(
872 kind,
873 TokenKind::Years
874 | TokenKind::Year
875 | TokenKind::Months
876 | TokenKind::Month
877 | TokenKind::Weeks
878 | TokenKind::Week
879 )
880}
881
882#[must_use]
884pub fn token_kind_to_calendar_unit(kind: &TokenKind) -> CalendarUnit {
885 match kind {
886 TokenKind::Years | TokenKind::Year => CalendarUnit::Year,
887 TokenKind::Months | TokenKind::Month => CalendarUnit::Month,
888 TokenKind::Weeks | TokenKind::Week => CalendarUnit::Week,
889 _ => unreachable!(
890 "BUG: token_kind_to_calendar_unit called with non-calendar token {:?}",
891 kind
892 ),
893 }
894}
895
896#[must_use]
898pub fn token_kind_to_boolean_value(kind: &TokenKind) -> BooleanValue {
899 match kind {
900 TokenKind::True => BooleanValue::True,
901 TokenKind::False => BooleanValue::False,
902 TokenKind::Yes => BooleanValue::Yes,
903 TokenKind::No => BooleanValue::No,
904 TokenKind::Accept => BooleanValue::Accept,
905 TokenKind::Reject => BooleanValue::Reject,
906 _ => unreachable!(
907 "BUG: token_kind_to_boolean_value called with non-boolean token {:?}",
908 kind
909 ),
910 }
911}
912
913pub fn is_math_function(kind: &TokenKind) -> bool {
915 matches!(
916 kind,
917 TokenKind::Sqrt
918 | TokenKind::Sin
919 | TokenKind::Cos
920 | TokenKind::Tan
921 | TokenKind::Asin
922 | TokenKind::Acos
923 | TokenKind::Atan
924 | TokenKind::Log
925 | TokenKind::Exp
926 | TokenKind::Abs
927 | TokenKind::Floor
928 | TokenKind::Ceil
929 | TokenKind::Round
930 )
931}
932
933pub fn is_spec_body_keyword(kind: &TokenKind) -> bool {
936 matches!(
937 kind,
938 TokenKind::Data | TokenKind::Rule | TokenKind::Type | TokenKind::Meta
939 )
940}
941
942pub fn can_be_label(kind: &TokenKind) -> bool {
947 matches!(
948 kind,
949 TokenKind::Identifier
950 | TokenKind::Calendar
951 | TokenKind::Past
952 | TokenKind::Future
953 | TokenKind::Years
954 | TokenKind::Year
955 | TokenKind::Months
956 | TokenKind::Month
957 | TokenKind::Weeks
958 | TokenKind::Week
959 | TokenKind::Days
960 | TokenKind::Day
961 | TokenKind::Hours
962 | TokenKind::Hour
963 | TokenKind::Minutes
964 | TokenKind::Minute
965 | TokenKind::Seconds
966 | TokenKind::Second
967 | TokenKind::Milliseconds
968 | TokenKind::Millisecond
969 | TokenKind::Microseconds
970 | TokenKind::Microsecond
971 | TokenKind::Permille
972 | TokenKind::Is
973 )
974}
975
976pub fn can_be_reference_segment(kind: &TokenKind) -> bool {
979 can_be_label(kind) || is_type_keyword(kind)
980}
981
982#[must_use]
987pub fn can_be_repository_qualifier_segment(kind: &TokenKind) -> bool {
988 matches!(kind, TokenKind::Identifier)
989 || is_structural_keyword(kind)
990 || can_be_label(kind)
991 || is_type_keyword(kind)
992 || is_boolean_keyword(kind)
993 || is_math_function(kind)
994 || is_duration_unit(kind)
995}
996
997#[cfg(test)]
998mod tests {
999 use super::*;
1000
1001 fn lex_all(input: &str) -> Result<Vec<Token>, Error> {
1002 let mut lexer = Lexer::new(input, &crate::parsing::source::SourceType::Volatile);
1003 let mut tokens = Vec::new();
1004 loop {
1005 let token = lexer.next_token()?;
1006 if token.kind == TokenKind::Eof {
1007 tokens.push(token);
1008 break;
1009 }
1010 tokens.push(token);
1011 }
1012 Ok(tokens)
1013 }
1014
1015 fn lex_kinds(input: &str) -> Result<Vec<TokenKind>, Error> {
1016 Ok(lex_all(input)?.into_iter().map(|t| t.kind).collect())
1017 }
1018
1019 #[test]
1020 fn lex_empty_input() {
1021 let tokens = lex_all("").unwrap();
1022 assert_eq!(tokens.len(), 1);
1023 assert_eq!(tokens[0].kind, TokenKind::Eof);
1024 }
1025
1026 #[test]
1027 fn lex_spec_declaration() {
1028 let kinds = lex_kinds("spec person").unwrap();
1029 assert_eq!(
1030 kinds,
1031 vec![TokenKind::Spec, TokenKind::Identifier, TokenKind::Eof]
1032 );
1033 }
1034
1035 #[test]
1036 fn lex_data_definition() {
1037 let kinds = lex_kinds("data age: 25").unwrap();
1038 assert_eq!(
1039 kinds,
1040 vec![
1041 TokenKind::Data,
1042 TokenKind::Identifier,
1043 TokenKind::Colon,
1044 TokenKind::NumberLit,
1045 TokenKind::Eof,
1046 ]
1047 );
1048 }
1049
1050 #[test]
1051 fn lex_rule_with_comparison() {
1052 let kinds = lex_kinds("rule is_adult: age >= 18").unwrap();
1053 assert_eq!(
1054 kinds,
1055 vec![
1056 TokenKind::Rule,
1057 TokenKind::Identifier,
1058 TokenKind::Colon,
1059 TokenKind::Identifier,
1060 TokenKind::Gte,
1061 TokenKind::NumberLit,
1062 TokenKind::Eof,
1063 ]
1064 );
1065 }
1066
1067 #[test]
1068 fn lex_string_literal() {
1069 let tokens = lex_all(r#""hello world""#).unwrap();
1070 assert_eq!(tokens[0].kind, TokenKind::StringLit);
1071 assert_eq!(tokens[0].text, "\"hello world\"");
1072 }
1073
1074 #[test]
1075 fn lex_unterminated_string() {
1076 let result = lex_all(r#""hello"#);
1077 assert!(result.is_err());
1078 }
1079
1080 #[test]
1081 fn lex_number_with_decimal() {
1082 let tokens = lex_all("3.14").unwrap();
1083 assert_eq!(tokens[0].kind, TokenKind::NumberLit);
1084 assert_eq!(tokens[0].text, "3.14");
1085 }
1086
1087 #[test]
1088 fn lex_number_with_underscores() {
1089 let tokens = lex_all("1_000_000").unwrap();
1090 assert_eq!(tokens[0].kind, TokenKind::NumberLit);
1091 assert_eq!(tokens[0].text, "1_000_000");
1092 }
1093
1094 #[test]
1095 fn lex_scientific_notation() {
1096 let tokens = lex_all("1.5e+10").unwrap();
1097 assert_eq!(tokens[0].kind, TokenKind::NumberLit);
1098 assert_eq!(tokens[0].text, "1.5e+10");
1099 }
1100
1101 #[test]
1102 fn lex_all_operators() {
1103 let kinds = lex_kinds("+ - * / % ^ > < >= <= -> %%").unwrap();
1104 assert_eq!(
1105 &kinds[..12],
1106 &[
1107 TokenKind::Plus,
1108 TokenKind::Minus,
1109 TokenKind::Star,
1110 TokenKind::Slash,
1111 TokenKind::Percent,
1112 TokenKind::Caret,
1113 TokenKind::Gt,
1114 TokenKind::Lt,
1115 TokenKind::Gte,
1116 TokenKind::Lte,
1117 TokenKind::Arrow,
1118 TokenKind::PercentPercent,
1119 ]
1120 );
1121 }
1122
1123 #[test]
1124 fn lex_keywords() {
1125 let kinds = lex_kinds("spec data rule unless then not and in type from uses meta veto now")
1126 .unwrap();
1127 assert_eq!(
1128 &kinds[..14],
1129 &[
1130 TokenKind::Spec,
1131 TokenKind::Data,
1132 TokenKind::Rule,
1133 TokenKind::Unless,
1134 TokenKind::Then,
1135 TokenKind::Not,
1136 TokenKind::And,
1137 TokenKind::In,
1138 TokenKind::Type,
1139 TokenKind::From,
1140 TokenKind::Uses,
1141 TokenKind::Meta,
1142 TokenKind::Veto,
1143 TokenKind::Now,
1144 ]
1145 );
1146 }
1147
1148 #[test]
1149 fn lex_boolean_keywords() {
1150 let kinds = lex_kinds("true false yes no accept reject").unwrap();
1151 assert_eq!(
1152 &kinds[..6],
1153 &[
1154 TokenKind::True,
1155 TokenKind::False,
1156 TokenKind::Yes,
1157 TokenKind::No,
1158 TokenKind::Accept,
1159 TokenKind::Reject,
1160 ]
1161 );
1162 }
1163
1164 #[test]
1165 fn lex_duration_keywords() {
1166 let kinds = lex_kinds("years months weeks days hours minutes seconds").unwrap();
1167 assert_eq!(
1168 &kinds[..7],
1169 &[
1170 TokenKind::Years,
1171 TokenKind::Months,
1172 TokenKind::Weeks,
1173 TokenKind::Days,
1174 TokenKind::Hours,
1175 TokenKind::Minutes,
1176 TokenKind::Seconds,
1177 ]
1178 );
1179 }
1180
1181 #[test]
1182 fn lex_commentary() {
1183 let tokens = lex_all(r#""""hello world""""#).unwrap();
1184 assert_eq!(tokens[0].kind, TokenKind::Commentary);
1185 assert_eq!(tokens[0].text, "hello world");
1186 }
1187
1188 #[test]
1189 fn lex_at_sign() {
1190 let kinds = lex_kinds("@user").unwrap();
1191 assert_eq!(kinds[0], TokenKind::At);
1192 assert_eq!(kinds[1], TokenKind::Identifier);
1193 }
1194
1195 #[test]
1196 fn lex_parentheses() {
1197 let kinds = lex_kinds("(x + 1)").unwrap();
1198 assert_eq!(
1199 &kinds[..5],
1200 &[
1201 TokenKind::LParen,
1202 TokenKind::Identifier,
1203 TokenKind::Plus,
1204 TokenKind::NumberLit,
1205 TokenKind::RParen,
1206 ]
1207 );
1208 }
1209
1210 #[test]
1211 fn lex_dot_for_references() {
1212 let kinds = lex_kinds("employee.salary").unwrap();
1213 assert_eq!(
1214 &kinds[..3],
1215 &[TokenKind::Identifier, TokenKind::Dot, TokenKind::Identifier]
1216 );
1217 }
1218
1219 #[test]
1220 fn lex_spec_name_with_slashes() {
1221 let tokens = lex_all("spec contracts/employment/jack").unwrap();
1222 assert_eq!(tokens[0].kind, TokenKind::Spec);
1223 assert_eq!(tokens[1].kind, TokenKind::Identifier);
1226 }
1227
1228 #[test]
1229 fn lex_number_not_followed_by_e_identifier() {
1230 let tokens = lex_all("42 eur").unwrap();
1232 assert_eq!(tokens[0].kind, TokenKind::NumberLit);
1233 assert_eq!(tokens[0].text, "42");
1234 assert_eq!(tokens[1].kind, TokenKind::Identifier);
1235 assert_eq!(tokens[1].text, "eur");
1236 }
1237
1238 #[test]
1239 fn lex_unknown_character() {
1240 let result = lex_all("ยง");
1241 assert!(result.is_err());
1242 }
1243
1244 #[test]
1245 fn lex_peek_does_not_consume() {
1246 let mut lexer = Lexer::new("spec test", &crate::parsing::source::SourceType::Volatile);
1247 let peeked_kind = lexer.peek().unwrap().kind.clone();
1248 assert_eq!(peeked_kind, TokenKind::Spec);
1249 let next = lexer.next_token().unwrap();
1250 assert_eq!(next.kind, TokenKind::Spec);
1251 }
1252
1253 #[test]
1254 fn lex_span_byte_offsets() {
1255 let tokens = lex_all("spec test").unwrap();
1256 assert_eq!(tokens[0].span.start, 0);
1257 assert_eq!(tokens[0].span.end, 4);
1258 assert_eq!(tokens[0].span.line, 1);
1259 assert_eq!(tokens[0].span.col, 1);
1260
1261 assert_eq!(tokens[1].span.start, 5);
1262 assert_eq!(tokens[1].span.end, 9);
1263 assert_eq!(tokens[1].span.line, 1);
1264 assert_eq!(tokens[1].span.col, 6);
1265 }
1266
1267 #[test]
1268 fn lex_multiline_span_tracking() {
1269 let tokens = lex_all("spec test\ndata x: 1").unwrap();
1270 let data_token = &tokens[2]; assert_eq!(data_token.kind, TokenKind::Data);
1273 assert_eq!(data_token.span.line, 2);
1274 assert_eq!(data_token.span.col, 1);
1275 }
1276
1277 #[test]
1278 fn lex_case_insensitive_keywords() {
1279 let kinds = lex_kinds("SPEC Data RULE").unwrap();
1281 assert_eq!(kinds[0], TokenKind::Spec);
1282 assert_eq!(kinds[1], TokenKind::Data);
1283 assert_eq!(kinds[2], TokenKind::Rule);
1284 }
1285
1286 #[test]
1287 fn lex_math_function_keywords() {
1288 let kinds =
1289 lex_kinds("sqrt sin cos tan asin acos atan log exp abs floor ceil round").unwrap();
1290 assert_eq!(
1291 &kinds[..13],
1292 &[
1293 TokenKind::Sqrt,
1294 TokenKind::Sin,
1295 TokenKind::Cos,
1296 TokenKind::Tan,
1297 TokenKind::Asin,
1298 TokenKind::Acos,
1299 TokenKind::Atan,
1300 TokenKind::Log,
1301 TokenKind::Exp,
1302 TokenKind::Abs,
1303 TokenKind::Floor,
1304 TokenKind::Ceil,
1305 TokenKind::Round,
1306 ]
1307 );
1308 }
1309
1310 #[test]
1311 fn lex_is_keyword() {
1312 let kinds = lex_kinds("status is \"active\"").unwrap();
1313 assert_eq!(kinds[0], TokenKind::Identifier);
1314 assert_eq!(kinds[1], TokenKind::Is);
1315 assert_eq!(kinds[2], TokenKind::StringLit);
1316 }
1317
1318 #[test]
1319 fn lex_percent_not_followed_by_digit() {
1320 let kinds = lex_kinds("50%").unwrap();
1322 assert_eq!(kinds[0], TokenKind::NumberLit);
1323 assert_eq!(kinds[1], TokenKind::Percent);
1324 }
1325
1326 #[test]
1327 fn lex_number_with_commas() {
1328 let tokens = lex_all("1,000,000").unwrap();
1329 assert_eq!(tokens[0].kind, TokenKind::NumberLit);
1330 assert_eq!(tokens[0].text, "1,000,000");
1331 }
1332
1333 #[test]
1334 fn lex_arrow_chain() {
1335 let kinds = lex_kinds("-> unit eur 1.00 -> decimals 2").unwrap();
1336 assert_eq!(kinds[0], TokenKind::Arrow);
1337 assert_eq!(kinds[1], TokenKind::Identifier);
1338 assert_eq!(kinds[2], TokenKind::Identifier);
1339 assert_eq!(kinds[3], TokenKind::NumberLit);
1340 assert_eq!(kinds[4], TokenKind::Arrow);
1341 }
1342}