1use crate::error::Error;
2use crate::parsing::ast::{BooleanValue, CalendarUnit, ConversionTarget, PrimitiveKind, Span};
3use crate::parsing::source::Source;
4use std::sync::Arc;
5
6#[derive(Debug, Clone, PartialEq, Eq)]
7pub enum TokenKind {
8 Spec,
10 Repo,
11 Data,
12 Rule,
13 Unless,
14 Then,
15 Not,
16 And,
17 In,
18 As,
19 Type,
20 Uses,
21 Fill,
22 Meta,
23 Veto,
24 Now,
25 Calendar,
26 Past,
27 Future,
28
29 True,
31 False,
32 Yes,
33 No,
34 Accept,
35 Reject,
36
37 QuantityKw,
39 NumberKw,
40 TextKw,
41 DateKw,
42 TimeKw,
43 BooleanKw,
44 PercentKw,
45 RatioKw,
46
47 Sqrt,
49 Sin,
50 Cos,
51 Tan,
52 Asin,
53 Acos,
54 Atan,
55 Log,
56 Exp,
57 Abs,
58 Floor,
59 Ceil,
60 Round,
61
62 Permille,
63
64 Is,
66
67 Plus,
69 Minus,
70 Star,
71 Slash,
72 Comma,
73 Percent,
74 PercentPercent,
75 Caret,
76 Gt,
77 Lt,
78 Gte,
79 Lte,
80
81 Colon,
83 Arrow,
84 Ellipsis,
85 Dot,
86 At,
87 LParen,
88 RParen,
89
90 NumberLit,
92 StringLit,
93
94 Commentary,
96
97 Identifier,
99
100 Eof,
102}
103
104impl std::fmt::Display for TokenKind {
105 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
106 match self {
107 TokenKind::Spec => write!(f, "'spec'"),
108 TokenKind::Repo => write!(f, "'repo'"),
109 TokenKind::Data => write!(f, "'data'"),
110 TokenKind::Rule => write!(f, "'rule'"),
111 TokenKind::Unless => write!(f, "'unless'"),
112 TokenKind::Then => write!(f, "'then'"),
113 TokenKind::Not => write!(f, "'not'"),
114 TokenKind::And => write!(f, "'and'"),
115 TokenKind::In => write!(f, "'in'"),
116 TokenKind::As => write!(f, "'as'"),
117 TokenKind::Type => write!(f, "'type'"),
118 TokenKind::Uses => write!(f, "'uses'"),
119 TokenKind::Fill => write!(f, "'fill'"),
120 TokenKind::Meta => write!(f, "'meta'"),
121 TokenKind::Veto => write!(f, "'veto'"),
122 TokenKind::Now => write!(f, "'now'"),
123 TokenKind::Calendar => write!(f, "'calendar'"),
124 TokenKind::Past => write!(f, "'past'"),
125 TokenKind::Future => write!(f, "'future'"),
126 TokenKind::True => write!(f, "'true'"),
127 TokenKind::False => write!(f, "'false'"),
128 TokenKind::Yes => write!(f, "'yes'"),
129 TokenKind::No => write!(f, "'no'"),
130 TokenKind::Accept => write!(f, "'accept'"),
131 TokenKind::Reject => write!(f, "'reject'"),
132 TokenKind::QuantityKw => write!(f, "'quantity'"),
133 TokenKind::NumberKw => write!(f, "'number'"),
134 TokenKind::TextKw => write!(f, "'text'"),
135 TokenKind::DateKw => write!(f, "'date'"),
136 TokenKind::TimeKw => write!(f, "'time'"),
137 TokenKind::BooleanKw => write!(f, "'boolean'"),
138 TokenKind::PercentKw => write!(f, "'percent'"),
139 TokenKind::RatioKw => write!(f, "'ratio'"),
140 TokenKind::Sqrt => write!(f, "'sqrt'"),
141 TokenKind::Sin => write!(f, "'sin'"),
142 TokenKind::Cos => write!(f, "'cos'"),
143 TokenKind::Tan => write!(f, "'tan'"),
144 TokenKind::Asin => write!(f, "'asin'"),
145 TokenKind::Acos => write!(f, "'acos'"),
146 TokenKind::Atan => write!(f, "'atan'"),
147 TokenKind::Log => write!(f, "'log'"),
148 TokenKind::Exp => write!(f, "'exp'"),
149 TokenKind::Abs => write!(f, "'abs'"),
150 TokenKind::Floor => write!(f, "'floor'"),
151 TokenKind::Ceil => write!(f, "'ceil'"),
152 TokenKind::Round => write!(f, "'round'"),
153 TokenKind::Permille => write!(f, "'permille'"),
154 TokenKind::Is => write!(f, "'is'"),
155 TokenKind::Plus => write!(f, "'+'"),
156 TokenKind::Minus => write!(f, "'-'"),
157 TokenKind::Star => write!(f, "'*'"),
158 TokenKind::Slash => write!(f, "'/'"),
159 TokenKind::Comma => write!(f, "','"),
160 TokenKind::Percent => write!(f, "'%'"),
161 TokenKind::PercentPercent => write!(f, "'%%'"),
162 TokenKind::Caret => write!(f, "'^'"),
163 TokenKind::Gt => write!(f, "'>'"),
164 TokenKind::Lt => write!(f, "'<'"),
165 TokenKind::Gte => write!(f, "'>='"),
166 TokenKind::Lte => write!(f, "'<='"),
167 TokenKind::Colon => write!(f, "':'"),
168 TokenKind::Arrow => write!(f, "'->'"),
169 TokenKind::Ellipsis => write!(f, "'...'"),
170 TokenKind::Dot => write!(f, "'.'"),
171 TokenKind::At => write!(f, "'@'"),
172 TokenKind::LParen => write!(f, "'('"),
173 TokenKind::RParen => write!(f, "')'"),
174 TokenKind::NumberLit => write!(f, "a number"),
175 TokenKind::StringLit => write!(f, "a string"),
176 TokenKind::Commentary => write!(f, "commentary block"),
177 TokenKind::Identifier => write!(f, "an identifier"),
178 TokenKind::Eof => write!(f, "end of file"),
179 }
180 }
181}
182
183#[derive(Debug, Clone)]
184pub struct Token {
185 pub kind: TokenKind,
186 pub span: Span,
187 pub text: String,
188}
189
190impl Token {
191 pub fn eof(offset: usize, line: usize, col: usize) -> Self {
192 Token {
193 kind: TokenKind::Eof,
194 span: Span {
195 start: offset,
196 end: offset,
197 line,
198 col,
199 },
200 text: String::new(),
201 }
202 }
203}
204
205#[derive(Clone)]
206pub struct LexerCheckpoint {
207 pos: usize,
208 line: usize,
209 col: usize,
210 byte_offset: usize,
211 peeked: Option<Token>,
212 peeked2: Option<Token>,
213}
214
215#[derive(Clone)]
217pub struct Lexer {
218 source: Vec<char>,
219 pos: usize,
220 line: usize,
221 col: usize,
222 byte_offset: usize,
223 source_type: crate::parsing::source::SourceType,
224 source_text: Arc<str>,
225 peeked: Option<Token>,
226 peeked2: Option<Token>,
227}
228
229impl Lexer {
230 pub fn new(input: &str, source_type: &crate::parsing::source::SourceType) -> Self {
231 let source_text: Arc<str> = Arc::from(input);
232 Lexer {
233 source: input.chars().collect(),
234 pos: 0,
235 line: 1,
236 col: 1,
237 byte_offset: 0,
238 source_type: source_type.clone(),
239 source_text,
240 peeked: None,
241 peeked2: None,
242 }
243 }
244
245 pub fn source_text(&self) -> Arc<str> {
246 self.source_text.clone()
247 }
248
249 pub fn source_type(&self) -> crate::parsing::source::SourceType {
250 self.source_type.clone()
251 }
252
253 pub fn peek(&mut self) -> Result<&Token, Error> {
254 if self.peeked.is_none() {
255 let token = self.lex_token()?;
256 self.peeked = Some(token);
257 }
258 Ok(self.peeked.as_ref().expect("just assigned"))
259 }
260
261 pub fn peek_second(&mut self) -> Result<&Token, Error> {
262 self.peek()?;
263 if self.peeked2.is_none() {
264 let token = self.lex_token()?;
265 self.peeked2 = Some(token);
266 }
267 Ok(self.peeked2.as_ref().expect("just assigned"))
268 }
269
270 pub fn current_span(&self) -> Span {
272 Span {
273 start: self.byte_offset,
274 end: self.byte_offset,
275 line: self.line,
276 col: self.col,
277 }
278 }
279
280 pub fn next_token(&mut self) -> Result<Token, Error> {
281 if let Some(token) = self.peeked.take() {
282 self.peeked = self.peeked2.take();
283 return Ok(token);
284 }
285 self.lex_token()
286 }
287
288 pub fn checkpoint(&self) -> LexerCheckpoint {
290 LexerCheckpoint {
291 pos: self.pos,
292 line: self.line,
293 col: self.col,
294 byte_offset: self.byte_offset,
295 peeked: self.peeked.clone(),
296 peeked2: self.peeked2.clone(),
297 }
298 }
299
300 pub fn restore(&mut self, checkpoint: LexerCheckpoint) {
301 self.pos = checkpoint.pos;
302 self.line = checkpoint.line;
303 self.col = checkpoint.col;
304 self.byte_offset = checkpoint.byte_offset;
305 self.peeked = checkpoint.peeked;
306 self.peeked2 = checkpoint.peeked2;
307 }
308
309 fn current_char(&self) -> Option<char> {
310 self.source.get(self.pos).copied()
311 }
312
313 fn peek_char(&self) -> Option<char> {
314 self.source.get(self.pos + 1).copied()
315 }
316
317 fn peek_char_at(&self, offset: usize) -> Option<char> {
318 self.source.get(self.pos + offset).copied()
319 }
320
321 fn advance(&mut self) {
322 if let Some(ch) = self.current_char() {
323 self.byte_offset += ch.len_utf8();
324 if ch == '\n' {
325 self.line += 1;
326 self.col = 1;
327 } else {
328 self.col += 1;
329 }
330 self.pos += 1;
331 }
332 }
333
334 fn skip_whitespace(&mut self) {
335 while let Some(ch) = self.current_char() {
336 if ch.is_whitespace() {
337 self.advance();
338 } else {
339 break;
340 }
341 }
342 }
343
344 fn make_span(&self, start_byte: usize, start_line: usize, start_col: usize) -> Span {
345 Span {
346 start: start_byte,
347 end: self.byte_offset,
348 line: start_line,
349 col: start_col,
350 }
351 }
352
353 fn make_error(&self, message: impl Into<String>, span: Span) -> Error {
354 Error::parsing(
355 message,
356 Source::new(self.source_type.clone(), span),
357 None::<String>,
358 )
359 }
360
361 fn lex_token(&mut self) -> Result<Token, Error> {
362 self.skip_whitespace();
363
364 let start_byte = self.byte_offset;
365 let start_line = self.line;
366 let start_col = self.col;
367
368 let Some(ch) = self.current_char() else {
369 return Ok(Token::eof(start_byte, start_line, start_col));
370 };
371
372 if ch == '"' && self.peek_char() == Some('"') && self.peek_char_at(2) == Some('"') {
374 return self.scan_triple_quote(start_byte, start_line, start_col);
375 }
376
377 if ch == '"' {
379 return self.scan_string(start_byte, start_line, start_col);
380 }
381
382 if ch.is_ascii_digit() {
384 return self.scan_number(start_byte, start_line, start_col);
385 }
386
387 if let Some(token) = self.try_two_char_operator(start_byte, start_line, start_col) {
389 return Ok(token);
390 }
391
392 if ch == '.' && self.peek_char() == Some('.') && self.peek_char_at(2) == Some('.') {
394 self.advance();
395 self.advance();
396 self.advance();
397 let span = self.make_span(start_byte, start_line, start_col);
398 return Ok(Token {
399 kind: TokenKind::Ellipsis,
400 span,
401 text: "...".to_string(),
402 });
403 }
404
405 if let Some(kind) = self.single_char_token(ch) {
407 self.advance();
408 let span = self.make_span(start_byte, start_line, start_col);
409 let text = ch.to_string();
410 return Ok(Token { kind, span, text });
411 }
412
413 if ch.is_ascii_alphabetic() || ch == '_' {
415 return Ok(self.scan_identifier(start_byte, start_line, start_col));
416 }
417
418 if ch == '@' {
420 self.advance();
421 let span = self.make_span(start_byte, start_line, start_col);
422 return Ok(Token {
423 kind: TokenKind::At,
424 span,
425 text: "@".to_string(),
426 });
427 }
428
429 self.advance();
431 let span = self.make_span(start_byte, start_line, start_col);
432 Err(self.make_error(format!("Unexpected character '{}'", ch), span))
433 }
434
435 fn scan_triple_quote(
436 &mut self,
437 start_byte: usize,
438 start_line: usize,
439 start_col: usize,
440 ) -> Result<Token, Error> {
441 self.advance(); self.advance(); self.advance(); let content_start = self.byte_offset;
446 loop {
447 match self.current_char() {
448 None => {
449 let span = self.make_span(start_byte, start_line, start_col);
450 return Err(self.make_error(
451 "Unterminated commentary block: expected closing \"\"\"",
452 span,
453 ));
454 }
455 Some('"')
456 if self.source.get(self.pos + 1) == Some(&'"')
457 && self.source.get(self.pos + 2) == Some(&'"') =>
458 {
459 let content_end = self.byte_offset;
460 self.advance(); self.advance(); self.advance(); let raw: String = self.source_text[content_start..content_end].to_string();
464 let span = self.make_span(start_byte, start_line, start_col);
465 return Ok(Token {
466 kind: TokenKind::Commentary,
467 span,
468 text: raw,
469 });
470 }
471 Some(_) => {
472 self.advance();
473 }
474 }
475 }
476 }
477
478 fn scan_string(
479 &mut self,
480 start_byte: usize,
481 start_line: usize,
482 start_col: usize,
483 ) -> Result<Token, Error> {
484 self.advance(); let mut content = String::new();
486 loop {
487 match self.current_char() {
488 None => {
489 let span = self.make_span(start_byte, start_line, start_col);
490 return Err(self.make_error("String starting here was never closed", span));
491 }
492 Some('"') => {
493 self.advance(); break;
495 }
496 Some(ch) => {
497 content.push(ch);
498 self.advance();
499 }
500 }
501 }
502 let span = self.make_span(start_byte, start_line, start_col);
503 let full_text = format!("\"{}\"", content);
506 Ok(Token {
507 kind: TokenKind::StringLit,
508 span,
509 text: full_text,
510 })
511 }
512
513 fn scan_number(
514 &mut self,
515 start_byte: usize,
516 start_line: usize,
517 start_col: usize,
518 ) -> Result<Token, Error> {
519 let mut text = String::new();
520
521 while let Some(ch) = self.current_char() {
523 if ch.is_ascii_digit() || ch == '_' || ch == ',' {
524 text.push(ch);
525 self.advance();
526 } else {
527 break;
528 }
529 }
530
531 if self.current_char() == Some('.') {
533 if let Some(next) = self.peek_char() {
535 if next.is_ascii_digit() {
536 text.push('.');
537 self.advance(); while let Some(ch) = self.current_char() {
539 if ch.is_ascii_digit() {
540 text.push(ch);
541 self.advance();
542 } else {
543 break;
544 }
545 }
546 }
547 }
548 }
549
550 if let Some(ch) = self.current_char() {
552 if ch == 'e' || ch == 'E' {
553 let mut sci_text = String::new();
554 sci_text.push(ch);
555 let save_pos = self.pos;
556 let save_byte = self.byte_offset;
557 let save_line = self.line;
558 let save_col = self.col;
559 self.advance(); if let Some(sign) = self.current_char() {
562 if sign == '+' || sign == '-' {
563 sci_text.push(sign);
564 self.advance();
565 }
566 }
567
568 if let Some(d) = self.current_char() {
569 if d.is_ascii_digit() {
570 while let Some(ch) = self.current_char() {
571 if ch.is_ascii_digit() {
572 sci_text.push(ch);
573 self.advance();
574 } else {
575 break;
576 }
577 }
578 text.push_str(&sci_text);
579 } else {
580 self.pos = save_pos;
582 self.byte_offset = save_byte;
583 self.line = save_line;
584 self.col = save_col;
585 }
586 } else {
587 self.pos = save_pos;
588 self.byte_offset = save_byte;
589 self.line = save_line;
590 self.col = save_col;
591 }
592 }
593 }
594
595 let span = self.make_span(start_byte, start_line, start_col);
596 Ok(Token {
597 kind: TokenKind::NumberLit,
598 span,
599 text,
600 })
601 }
602
603 fn try_two_char_operator(
604 &mut self,
605 start_byte: usize,
606 start_line: usize,
607 start_col: usize,
608 ) -> Option<Token> {
609 let ch = self.current_char()?;
610 let next = self.peek_char();
611
612 let kind = match (ch, next) {
613 ('-', Some('>')) => TokenKind::Arrow,
614 ('>', Some('=')) => TokenKind::Gte,
615 ('<', Some('=')) => TokenKind::Lte,
616 ('%', Some('%')) => {
617 TokenKind::PercentPercent
619 }
620 _ => return None,
621 };
622
623 self.advance();
624 self.advance();
625 let span = self.make_span(start_byte, start_line, start_col);
626 let text: String = self.source_text[span.start..span.end].to_string();
627 Some(Token { kind, span, text })
628 }
629
630 fn single_char_token(&self, ch: char) -> Option<TokenKind> {
631 match ch {
632 '+' => Some(TokenKind::Plus),
633 '*' => Some(TokenKind::Star),
634 '/' => Some(TokenKind::Slash),
635 ',' => Some(TokenKind::Comma),
636 '^' => Some(TokenKind::Caret),
637 ':' => Some(TokenKind::Colon),
638 '.' => Some(TokenKind::Dot),
639 '(' => Some(TokenKind::LParen),
640 ')' => Some(TokenKind::RParen),
641 '>' => Some(TokenKind::Gt),
642 '<' => Some(TokenKind::Lt),
643 '%' => Some(TokenKind::Percent),
644 '-' => Some(TokenKind::Minus),
645 _ => None,
646 }
647 }
648
649 fn scan_identifier(&mut self, start_byte: usize, start_line: usize, start_col: usize) -> Token {
650 let mut text = String::new();
651 while let Some(ch) = self.current_char() {
652 if ch.is_ascii_alphanumeric() || ch == '_' {
653 text.push(ch);
654 self.advance();
655 } else {
656 break;
657 }
658 }
659
660 let kind = keyword_from_identifier(&text);
661 let span = self.make_span(start_byte, start_line, start_col);
662 Token { kind, span, text }
663 }
664}
665
666fn keyword_from_identifier(text: &str) -> TokenKind {
667 match text.to_lowercase().as_str() {
668 "spec" => TokenKind::Spec,
669 "repo" => TokenKind::Repo,
670 "data" => TokenKind::Data,
671 "rule" => TokenKind::Rule,
672 "unless" => TokenKind::Unless,
673 "then" => TokenKind::Then,
674 "not" => TokenKind::Not,
675 "and" => TokenKind::And,
676 "in" => TokenKind::In,
677 "as" => TokenKind::As,
678 "type" => TokenKind::Type,
679 "uses" => TokenKind::Uses,
680 "fill" => TokenKind::Fill,
681 "meta" => TokenKind::Meta,
682 "veto" => TokenKind::Veto,
683 "now" => TokenKind::Now,
684 "calendar" => TokenKind::Calendar,
685 "past" => TokenKind::Past,
686 "future" => TokenKind::Future,
687 "true" => TokenKind::True,
688 "false" => TokenKind::False,
689 "yes" => TokenKind::Yes,
690 "no" => TokenKind::No,
691 "accept" => TokenKind::Accept,
692 "reject" => TokenKind::Reject,
693 "quantity" => TokenKind::QuantityKw,
694 "number" => TokenKind::NumberKw,
695 "text" => TokenKind::TextKw,
696 "date" => TokenKind::DateKw,
697 "time" => TokenKind::TimeKw,
698 "boolean" => TokenKind::BooleanKw,
699 "percent" => TokenKind::PercentKw,
700 "ratio" => TokenKind::RatioKw,
701 "sqrt" => TokenKind::Sqrt,
702 "sin" => TokenKind::Sin,
703 "cos" => TokenKind::Cos,
704 "tan" => TokenKind::Tan,
705 "asin" => TokenKind::Asin,
706 "acos" => TokenKind::Acos,
707 "atan" => TokenKind::Atan,
708 "log" => TokenKind::Log,
709 "exp" => TokenKind::Exp,
710 "abs" => TokenKind::Abs,
711 "floor" => TokenKind::Floor,
712 "ceil" => TokenKind::Ceil,
713 "round" => TokenKind::Round,
714 "is" => TokenKind::Is,
715 "permille" => TokenKind::Permille,
716 _ => TokenKind::Identifier,
717 }
718}
719
720pub fn is_structural_keyword(kind: &TokenKind) -> bool {
725 matches!(
726 kind,
727 TokenKind::Spec
728 | TokenKind::Repo
729 | TokenKind::Data
730 | TokenKind::Rule
731 | TokenKind::Unless
732 | TokenKind::Then
733 | TokenKind::Not
734 | TokenKind::And
735 | TokenKind::In
736 | TokenKind::As
737 | TokenKind::Type
738 | TokenKind::Uses
739 | TokenKind::Fill
740 | TokenKind::Meta
741 | TokenKind::Veto
742 | TokenKind::Now
743 | TokenKind::Sqrt
744 | TokenKind::Sin
745 | TokenKind::Cos
746 | TokenKind::Tan
747 | TokenKind::Asin
748 | TokenKind::Acos
749 | TokenKind::Atan
750 | TokenKind::Log
751 | TokenKind::Exp
752 | TokenKind::Abs
753 | TokenKind::Floor
754 | TokenKind::Ceil
755 | TokenKind::Round
756 | TokenKind::True
757 | TokenKind::False
758 | TokenKind::Yes
759 | TokenKind::No
760 | TokenKind::Accept
761 | TokenKind::Reject
762 )
763}
764
765pub fn is_type_keyword(kind: &TokenKind) -> bool {
768 token_kind_to_primitive(kind).is_some()
769}
770
771#[must_use]
773pub fn token_kind_to_primitive(kind: &TokenKind) -> Option<PrimitiveKind> {
774 match kind {
775 TokenKind::BooleanKw => Some(PrimitiveKind::Boolean),
776 TokenKind::QuantityKw => Some(PrimitiveKind::Quantity),
777 TokenKind::NumberKw => Some(PrimitiveKind::Number),
778 TokenKind::PercentKw => Some(PrimitiveKind::Percent),
779 TokenKind::RatioKw => Some(PrimitiveKind::Ratio),
780 TokenKind::TextKw => Some(PrimitiveKind::Text),
781 TokenKind::DateKw => Some(PrimitiveKind::Date),
782 TokenKind::TimeKw => Some(PrimitiveKind::Time),
783 TokenKind::Calendar => Some(PrimitiveKind::Calendar),
784 _ => None,
785 }
786}
787
788pub fn is_boolean_keyword(kind: &TokenKind) -> bool {
790 matches!(
791 kind,
792 TokenKind::True
793 | TokenKind::False
794 | TokenKind::Yes
795 | TokenKind::No
796 | TokenKind::Accept
797 | TokenKind::Reject
798 )
799}
800
801#[must_use]
805pub fn conversion_target_from_token(kind: &TokenKind, fallback_text: &str) -> ConversionTarget {
806 if let Some(unit) = CalendarUnit::from_keyword(fallback_text) {
807 ConversionTarget::Calendar(unit)
808 } else if *kind == TokenKind::NumberKw {
809 ConversionTarget::Type(PrimitiveKind::Number)
810 } else {
811 ConversionTarget::Unit(fallback_text.to_lowercase())
812 }
813}
814
815#[must_use]
817pub fn token_kind_to_boolean_value(kind: &TokenKind) -> BooleanValue {
818 match kind {
819 TokenKind::True => BooleanValue::True,
820 TokenKind::False => BooleanValue::False,
821 TokenKind::Yes => BooleanValue::Yes,
822 TokenKind::No => BooleanValue::No,
823 TokenKind::Accept => BooleanValue::Accept,
824 TokenKind::Reject => BooleanValue::Reject,
825 _ => unreachable!(
826 "BUG: token_kind_to_boolean_value called with non-boolean token {:?}",
827 kind
828 ),
829 }
830}
831
832pub fn is_math_function(kind: &TokenKind) -> bool {
834 matches!(
835 kind,
836 TokenKind::Sqrt
837 | TokenKind::Sin
838 | TokenKind::Cos
839 | TokenKind::Tan
840 | TokenKind::Asin
841 | TokenKind::Acos
842 | TokenKind::Atan
843 | TokenKind::Log
844 | TokenKind::Exp
845 | TokenKind::Abs
846 | TokenKind::Floor
847 | TokenKind::Ceil
848 | TokenKind::Round
849 )
850}
851
852pub fn is_spec_body_keyword(kind: &TokenKind) -> bool {
855 matches!(
856 kind,
857 TokenKind::Data | TokenKind::Fill | TokenKind::Rule | TokenKind::Type | TokenKind::Meta
858 )
859}
860
861pub fn can_be_label(kind: &TokenKind) -> bool {
864 matches!(
865 kind,
866 TokenKind::Identifier
867 | TokenKind::Calendar
868 | TokenKind::Past
869 | TokenKind::Future
870 | TokenKind::Permille
871 | TokenKind::Is
872 )
873}
874
875pub fn can_be_reference_segment(kind: &TokenKind) -> bool {
878 can_be_label(kind) || is_type_keyword(kind)
879}
880
881#[must_use]
886pub fn can_be_repository_qualifier_segment(kind: &TokenKind) -> bool {
887 matches!(kind, TokenKind::Identifier)
888 || is_structural_keyword(kind)
889 || can_be_label(kind)
890 || is_type_keyword(kind)
891 || is_boolean_keyword(kind)
892 || is_math_function(kind)
893}
894
895#[cfg(test)]
896mod tests {
897 use super::*;
898
899 fn lex_all(input: &str) -> Result<Vec<Token>, Error> {
900 let mut lexer = Lexer::new(input, &crate::parsing::source::SourceType::Volatile);
901 let mut tokens = Vec::new();
902 loop {
903 let token = lexer.next_token()?;
904 if token.kind == TokenKind::Eof {
905 tokens.push(token);
906 break;
907 }
908 tokens.push(token);
909 }
910 Ok(tokens)
911 }
912
913 fn lex_kinds(input: &str) -> Result<Vec<TokenKind>, Error> {
914 Ok(lex_all(input)?.into_iter().map(|t| t.kind).collect())
915 }
916
917 #[test]
918 fn lex_empty_input() {
919 let tokens = lex_all("").unwrap();
920 assert_eq!(tokens.len(), 1);
921 assert_eq!(tokens[0].kind, TokenKind::Eof);
922 }
923
924 #[test]
925 fn lex_spec_declaration() {
926 let kinds = lex_kinds("spec person").unwrap();
927 assert_eq!(
928 kinds,
929 vec![TokenKind::Spec, TokenKind::Identifier, TokenKind::Eof]
930 );
931 }
932
933 #[test]
934 fn lex_data_definition() {
935 let kinds = lex_kinds("data age: 25").unwrap();
936 assert_eq!(
937 kinds,
938 vec![
939 TokenKind::Data,
940 TokenKind::Identifier,
941 TokenKind::Colon,
942 TokenKind::NumberLit,
943 TokenKind::Eof,
944 ]
945 );
946 }
947
948 #[test]
949 fn lex_rule_with_comparison() {
950 let kinds = lex_kinds("rule is_adult: age >= 18").unwrap();
951 assert_eq!(
952 kinds,
953 vec![
954 TokenKind::Rule,
955 TokenKind::Identifier,
956 TokenKind::Colon,
957 TokenKind::Identifier,
958 TokenKind::Gte,
959 TokenKind::NumberLit,
960 TokenKind::Eof,
961 ]
962 );
963 }
964
965 #[test]
966 fn lex_string_literal() {
967 let tokens = lex_all(r#""hello world""#).unwrap();
968 assert_eq!(tokens[0].kind, TokenKind::StringLit);
969 assert_eq!(tokens[0].text, "\"hello world\"");
970 }
971
972 #[test]
973 fn lex_unterminated_string() {
974 let result = lex_all(r#""hello"#);
975 assert!(result.is_err());
976 }
977
978 #[test]
979 fn lex_number_with_decimal() {
980 let tokens = lex_all("3.14").unwrap();
981 assert_eq!(tokens[0].kind, TokenKind::NumberLit);
982 assert_eq!(tokens[0].text, "3.14");
983 }
984
985 #[test]
986 fn lex_number_with_underscores() {
987 let tokens = lex_all("1_000_000").unwrap();
988 assert_eq!(tokens[0].kind, TokenKind::NumberLit);
989 assert_eq!(tokens[0].text, "1_000_000");
990 }
991
992 #[test]
993 fn lex_scientific_notation() {
994 let tokens = lex_all("1.5e+10").unwrap();
995 assert_eq!(tokens[0].kind, TokenKind::NumberLit);
996 assert_eq!(tokens[0].text, "1.5e+10");
997 }
998
999 #[test]
1000 fn lex_all_operators() {
1001 let kinds = lex_kinds("+ - * / % ^ > < >= <= -> %%").unwrap();
1002 assert_eq!(
1003 &kinds[..12],
1004 &[
1005 TokenKind::Plus,
1006 TokenKind::Minus,
1007 TokenKind::Star,
1008 TokenKind::Slash,
1009 TokenKind::Percent,
1010 TokenKind::Caret,
1011 TokenKind::Gt,
1012 TokenKind::Lt,
1013 TokenKind::Gte,
1014 TokenKind::Lte,
1015 TokenKind::Arrow,
1016 TokenKind::PercentPercent,
1017 ]
1018 );
1019 }
1020
1021 #[test]
1022 fn lex_keywords() {
1023 let kinds =
1024 lex_kinds("spec data rule unless then not and in as type uses meta veto now").unwrap();
1025 assert_eq!(
1026 &kinds[..14],
1027 &[
1028 TokenKind::Spec,
1029 TokenKind::Data,
1030 TokenKind::Rule,
1031 TokenKind::Unless,
1032 TokenKind::Then,
1033 TokenKind::Not,
1034 TokenKind::And,
1035 TokenKind::In,
1036 TokenKind::As,
1037 TokenKind::Type,
1038 TokenKind::Uses,
1039 TokenKind::Meta,
1040 TokenKind::Veto,
1041 TokenKind::Now,
1042 ]
1043 );
1044 }
1045
1046 #[test]
1047 fn lex_boolean_keywords() {
1048 let kinds = lex_kinds("true false yes no accept reject").unwrap();
1049 assert_eq!(
1050 &kinds[..6],
1051 &[
1052 TokenKind::True,
1053 TokenKind::False,
1054 TokenKind::Yes,
1055 TokenKind::No,
1056 TokenKind::Accept,
1057 TokenKind::Reject,
1058 ]
1059 );
1060 }
1061
1062 #[test]
1063 fn lex_duration_keywords() {
1064 let kinds = lex_kinds("years months weeks days hours minutes seconds").unwrap();
1065 assert_eq!(
1066 &kinds[..7],
1067 &[
1068 TokenKind::Identifier,
1069 TokenKind::Identifier,
1070 TokenKind::Identifier,
1071 TokenKind::Identifier,
1072 TokenKind::Identifier,
1073 TokenKind::Identifier,
1074 TokenKind::Identifier,
1075 ]
1076 );
1077 }
1078
1079 #[test]
1080 fn lex_commentary() {
1081 let tokens = lex_all(r#""""hello world""""#).unwrap();
1082 assert_eq!(tokens[0].kind, TokenKind::Commentary);
1083 assert_eq!(tokens[0].text, "hello world");
1084 }
1085
1086 #[test]
1087 fn lex_at_sign() {
1088 let kinds = lex_kinds("@user").unwrap();
1089 assert_eq!(kinds[0], TokenKind::At);
1090 assert_eq!(kinds[1], TokenKind::Identifier);
1091 }
1092
1093 #[test]
1094 fn lex_parentheses() {
1095 let kinds = lex_kinds("(x + 1)").unwrap();
1096 assert_eq!(
1097 &kinds[..5],
1098 &[
1099 TokenKind::LParen,
1100 TokenKind::Identifier,
1101 TokenKind::Plus,
1102 TokenKind::NumberLit,
1103 TokenKind::RParen,
1104 ]
1105 );
1106 }
1107
1108 #[test]
1109 fn lex_dot_for_references() {
1110 let kinds = lex_kinds("employee.salary").unwrap();
1111 assert_eq!(
1112 &kinds[..3],
1113 &[TokenKind::Identifier, TokenKind::Dot, TokenKind::Identifier]
1114 );
1115 }
1116
1117 #[test]
1118 fn lex_spec_name_with_slashes() {
1119 let tokens = lex_all("spec contracts/employment/jack").unwrap();
1120 assert_eq!(tokens[0].kind, TokenKind::Spec);
1121 assert_eq!(tokens[1].kind, TokenKind::Identifier);
1124 }
1125
1126 #[test]
1127 fn lex_number_not_followed_by_e_identifier() {
1128 let tokens = lex_all("42 eur").unwrap();
1130 assert_eq!(tokens[0].kind, TokenKind::NumberLit);
1131 assert_eq!(tokens[0].text, "42");
1132 assert_eq!(tokens[1].kind, TokenKind::Identifier);
1133 assert_eq!(tokens[1].text, "eur");
1134 }
1135
1136 #[test]
1137 fn lex_unknown_character() {
1138 let result = lex_all("ยง");
1139 assert!(result.is_err());
1140 }
1141
1142 #[test]
1143 fn lex_peek_does_not_consume() {
1144 let mut lexer = Lexer::new("spec test", &crate::parsing::source::SourceType::Volatile);
1145 let peeked_kind = lexer.peek().unwrap().kind.clone();
1146 assert_eq!(peeked_kind, TokenKind::Spec);
1147 let next = lexer.next_token().unwrap();
1148 assert_eq!(next.kind, TokenKind::Spec);
1149 }
1150
1151 #[test]
1152 fn lex_span_byte_offsets() {
1153 let tokens = lex_all("spec test").unwrap();
1154 assert_eq!(tokens[0].span.start, 0);
1155 assert_eq!(tokens[0].span.end, 4);
1156 assert_eq!(tokens[0].span.line, 1);
1157 assert_eq!(tokens[0].span.col, 1);
1158
1159 assert_eq!(tokens[1].span.start, 5);
1160 assert_eq!(tokens[1].span.end, 9);
1161 assert_eq!(tokens[1].span.line, 1);
1162 assert_eq!(tokens[1].span.col, 6);
1163 }
1164
1165 #[test]
1166 fn lex_multiline_span_tracking() {
1167 let tokens = lex_all("spec test\ndata x: 1").unwrap();
1168 let data_token = &tokens[2]; assert_eq!(data_token.kind, TokenKind::Data);
1171 assert_eq!(data_token.span.line, 2);
1172 assert_eq!(data_token.span.col, 1);
1173 }
1174
1175 #[test]
1176 fn lex_case_insensitive_keywords() {
1177 let kinds = lex_kinds("SPEC Data RULE").unwrap();
1179 assert_eq!(kinds[0], TokenKind::Spec);
1180 assert_eq!(kinds[1], TokenKind::Data);
1181 assert_eq!(kinds[2], TokenKind::Rule);
1182 }
1183
1184 #[test]
1185 fn lex_math_function_keywords() {
1186 let kinds =
1187 lex_kinds("sqrt sin cos tan asin acos atan log exp abs floor ceil round").unwrap();
1188 assert_eq!(
1189 &kinds[..13],
1190 &[
1191 TokenKind::Sqrt,
1192 TokenKind::Sin,
1193 TokenKind::Cos,
1194 TokenKind::Tan,
1195 TokenKind::Asin,
1196 TokenKind::Acos,
1197 TokenKind::Atan,
1198 TokenKind::Log,
1199 TokenKind::Exp,
1200 TokenKind::Abs,
1201 TokenKind::Floor,
1202 TokenKind::Ceil,
1203 TokenKind::Round,
1204 ]
1205 );
1206 }
1207
1208 #[test]
1209 fn lex_is_keyword() {
1210 let kinds = lex_kinds("status is \"active\"").unwrap();
1211 assert_eq!(kinds[0], TokenKind::Identifier);
1212 assert_eq!(kinds[1], TokenKind::Is);
1213 assert_eq!(kinds[2], TokenKind::StringLit);
1214 }
1215
1216 #[test]
1217 fn lex_percent_not_followed_by_digit() {
1218 let kinds = lex_kinds("50%").unwrap();
1220 assert_eq!(kinds[0], TokenKind::NumberLit);
1221 assert_eq!(kinds[1], TokenKind::Percent);
1222 }
1223
1224 #[test]
1225 fn lex_number_with_commas() {
1226 let tokens = lex_all("1,000,000").unwrap();
1227 assert_eq!(tokens[0].kind, TokenKind::NumberLit);
1228 assert_eq!(tokens[0].text, "1,000,000");
1229 }
1230
1231 #[test]
1232 fn lex_arrow_chain() {
1233 let kinds = lex_kinds("-> unit eur 1.00 -> decimals 2").unwrap();
1234 assert_eq!(kinds[0], TokenKind::Arrow);
1235 assert_eq!(kinds[1], TokenKind::Identifier);
1236 assert_eq!(kinds[2], TokenKind::Identifier);
1237 assert_eq!(kinds[3], TokenKind::NumberLit);
1238 assert_eq!(kinds[4], TokenKind::Arrow);
1239 }
1240}