1use crate::error::Error;
2use crate::parsing::ast::{BooleanValue, PrimitiveKind, Span};
3use crate::parsing::source::Source;
4use std::sync::Arc;
5
6#[derive(Debug, Clone, PartialEq, Eq)]
7pub enum TokenKind {
8 Spec,
10 Repo,
11 Data,
12 Rule,
13 Unless,
14 Then,
15 Not,
16 And,
17 In,
18 As,
19 Type,
20 Uses,
21 With,
22 Meta,
23 Veto,
24 Now,
25 Past,
26 Future,
27
28 True,
30 False,
31 Yes,
32 No,
33 Accept,
34 Reject,
35
36 QuantityKw,
38 NumberKw,
39 TextKw,
40 DateKw,
41 TimeKw,
42 BooleanKw,
43 PercentKw,
44 RatioKw,
45
46 Sqrt,
48 Sin,
49 Cos,
50 Tan,
51 Asin,
52 Acos,
53 Atan,
54 Log,
55 Exp,
56 Abs,
57 Floor,
58 Ceil,
59 Round,
60
61 Permille,
62
63 Is,
65
66 Plus,
68 Minus,
69 Star,
70 Slash,
71 Comma,
72 Percent,
73 PercentPercent,
74 Caret,
75 Gt,
76 Lt,
77 Gte,
78 Lte,
79
80 Colon,
82 Arrow,
83 Ellipsis,
84 Dot,
85 At,
86 LParen,
87 RParen,
88
89 NumberLit,
91 StringLit,
92
93 Commentary,
95
96 Identifier,
98
99 Eof,
101}
102
103impl std::fmt::Display for TokenKind {
104 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
105 match self {
106 TokenKind::Spec => write!(f, "'spec'"),
107 TokenKind::Repo => write!(f, "'repo'"),
108 TokenKind::Data => write!(f, "'data'"),
109 TokenKind::Rule => write!(f, "'rule'"),
110 TokenKind::Unless => write!(f, "'unless'"),
111 TokenKind::Then => write!(f, "'then'"),
112 TokenKind::Not => write!(f, "'not'"),
113 TokenKind::And => write!(f, "'and'"),
114 TokenKind::In => write!(f, "'in'"),
115 TokenKind::As => write!(f, "'as'"),
116 TokenKind::Type => write!(f, "'type'"),
117 TokenKind::Uses => write!(f, "'uses'"),
118 TokenKind::With => write!(f, "'with'"),
119 TokenKind::Meta => write!(f, "'meta'"),
120 TokenKind::Veto => write!(f, "'veto'"),
121 TokenKind::Now => write!(f, "'now'"),
122 TokenKind::Past => write!(f, "'past'"),
123 TokenKind::Future => write!(f, "'future'"),
124 TokenKind::True => write!(f, "'true'"),
125 TokenKind::False => write!(f, "'false'"),
126 TokenKind::Yes => write!(f, "'yes'"),
127 TokenKind::No => write!(f, "'no'"),
128 TokenKind::Accept => write!(f, "'accept'"),
129 TokenKind::Reject => write!(f, "'reject'"),
130 TokenKind::QuantityKw => write!(f, "'quantity'"),
131 TokenKind::NumberKw => write!(f, "'number'"),
132 TokenKind::TextKw => write!(f, "'text'"),
133 TokenKind::DateKw => write!(f, "'date'"),
134 TokenKind::TimeKw => write!(f, "'time'"),
135 TokenKind::BooleanKw => write!(f, "'boolean'"),
136 TokenKind::PercentKw => write!(f, "'percent'"),
137 TokenKind::RatioKw => write!(f, "'ratio'"),
138 TokenKind::Sqrt => write!(f, "'sqrt'"),
139 TokenKind::Sin => write!(f, "'sin'"),
140 TokenKind::Cos => write!(f, "'cos'"),
141 TokenKind::Tan => write!(f, "'tan'"),
142 TokenKind::Asin => write!(f, "'asin'"),
143 TokenKind::Acos => write!(f, "'acos'"),
144 TokenKind::Atan => write!(f, "'atan'"),
145 TokenKind::Log => write!(f, "'log'"),
146 TokenKind::Exp => write!(f, "'exp'"),
147 TokenKind::Abs => write!(f, "'abs'"),
148 TokenKind::Floor => write!(f, "'floor'"),
149 TokenKind::Ceil => write!(f, "'ceil'"),
150 TokenKind::Round => write!(f, "'round'"),
151 TokenKind::Permille => write!(f, "'permille'"),
152 TokenKind::Is => write!(f, "'is'"),
153 TokenKind::Plus => write!(f, "'+'"),
154 TokenKind::Minus => write!(f, "'-'"),
155 TokenKind::Star => write!(f, "'*'"),
156 TokenKind::Slash => write!(f, "'/'"),
157 TokenKind::Comma => write!(f, "','"),
158 TokenKind::Percent => write!(f, "'%'"),
159 TokenKind::PercentPercent => write!(f, "'%%'"),
160 TokenKind::Caret => write!(f, "'^'"),
161 TokenKind::Gt => write!(f, "'>'"),
162 TokenKind::Lt => write!(f, "'<'"),
163 TokenKind::Gte => write!(f, "'>='"),
164 TokenKind::Lte => write!(f, "'<='"),
165 TokenKind::Colon => write!(f, "':'"),
166 TokenKind::Arrow => write!(f, "'->'"),
167 TokenKind::Ellipsis => write!(f, "'...'"),
168 TokenKind::Dot => write!(f, "'.'"),
169 TokenKind::At => write!(f, "'@'"),
170 TokenKind::LParen => write!(f, "'('"),
171 TokenKind::RParen => write!(f, "')'"),
172 TokenKind::NumberLit => write!(f, "a number"),
173 TokenKind::StringLit => write!(f, "a string"),
174 TokenKind::Commentary => write!(f, "commentary block"),
175 TokenKind::Identifier => write!(f, "an identifier"),
176 TokenKind::Eof => write!(f, "end of file"),
177 }
178 }
179}
180
181#[derive(Debug, Clone)]
182pub struct Token {
183 pub kind: TokenKind,
184 pub span: Span,
185 pub text: String,
186}
187
188impl Token {
189 pub fn eof(offset: usize, line: usize, col: usize) -> Self {
190 Token {
191 kind: TokenKind::Eof,
192 span: Span {
193 start: offset,
194 end: offset,
195 line,
196 col,
197 },
198 text: String::new(),
199 }
200 }
201}
202
203#[derive(Clone)]
204pub struct LexerCheckpoint {
205 pos: usize,
206 line: usize,
207 col: usize,
208 byte_offset: usize,
209 peeked: Option<Token>,
210 peeked2: Option<Token>,
211}
212
213#[derive(Clone)]
215pub struct Lexer {
216 source: Vec<char>,
217 pos: usize,
218 line: usize,
219 col: usize,
220 byte_offset: usize,
221 source_type: crate::parsing::source::SourceType,
222 source_text: Arc<str>,
223 peeked: Option<Token>,
224 peeked2: Option<Token>,
225}
226
227impl Lexer {
228 pub fn new(input: &str, source_type: &crate::parsing::source::SourceType) -> Self {
229 let source_text: Arc<str> = Arc::from(input);
230 Lexer {
231 source: input.chars().collect(),
232 pos: 0,
233 line: 1,
234 col: 1,
235 byte_offset: 0,
236 source_type: source_type.clone(),
237 source_text,
238 peeked: None,
239 peeked2: None,
240 }
241 }
242
243 pub fn source_text(&self) -> Arc<str> {
244 self.source_text.clone()
245 }
246
247 pub fn source_type(&self) -> crate::parsing::source::SourceType {
248 self.source_type.clone()
249 }
250
251 pub fn peek(&mut self) -> Result<&Token, Error> {
252 if self.peeked.is_none() {
253 let token = self.lex_token()?;
254 self.peeked = Some(token);
255 }
256 Ok(self.peeked.as_ref().expect("just assigned"))
257 }
258
259 pub fn peek_second(&mut self) -> Result<&Token, Error> {
260 self.peek()?;
261 if self.peeked2.is_none() {
262 let token = self.lex_token()?;
263 self.peeked2 = Some(token);
264 }
265 Ok(self.peeked2.as_ref().expect("just assigned"))
266 }
267
268 pub fn current_span(&self) -> Span {
270 Span {
271 start: self.byte_offset,
272 end: self.byte_offset,
273 line: self.line,
274 col: self.col,
275 }
276 }
277
278 pub fn next_token(&mut self) -> Result<Token, Error> {
279 if let Some(token) = self.peeked.take() {
280 self.peeked = self.peeked2.take();
281 return Ok(token);
282 }
283 self.lex_token()
284 }
285
286 pub fn checkpoint(&self) -> LexerCheckpoint {
288 LexerCheckpoint {
289 pos: self.pos,
290 line: self.line,
291 col: self.col,
292 byte_offset: self.byte_offset,
293 peeked: self.peeked.clone(),
294 peeked2: self.peeked2.clone(),
295 }
296 }
297
298 pub fn restore(&mut self, checkpoint: LexerCheckpoint) {
299 self.pos = checkpoint.pos;
300 self.line = checkpoint.line;
301 self.col = checkpoint.col;
302 self.byte_offset = checkpoint.byte_offset;
303 self.peeked = checkpoint.peeked;
304 self.peeked2 = checkpoint.peeked2;
305 }
306
307 fn current_char(&self) -> Option<char> {
308 self.source.get(self.pos).copied()
309 }
310
311 fn peek_char(&self) -> Option<char> {
312 self.source.get(self.pos + 1).copied()
313 }
314
315 fn peek_char_at(&self, offset: usize) -> Option<char> {
316 self.source.get(self.pos + offset).copied()
317 }
318
319 fn advance(&mut self) {
320 if let Some(ch) = self.current_char() {
321 self.byte_offset += ch.len_utf8();
322 if ch == '\n' {
323 self.line += 1;
324 self.col = 1;
325 } else {
326 self.col += 1;
327 }
328 self.pos += 1;
329 }
330 }
331
332 fn skip_whitespace(&mut self) {
333 while let Some(ch) = self.current_char() {
334 if ch.is_whitespace() {
335 self.advance();
336 } else {
337 break;
338 }
339 }
340 }
341
342 fn make_span(&self, start_byte: usize, start_line: usize, start_col: usize) -> Span {
343 Span {
344 start: start_byte,
345 end: self.byte_offset,
346 line: start_line,
347 col: start_col,
348 }
349 }
350
351 fn make_error(&self, message: impl Into<String>, span: Span) -> Error {
352 Error::parsing(
353 message,
354 Source::new(self.source_type.clone(), span),
355 None::<String>,
356 )
357 }
358
359 fn lex_token(&mut self) -> Result<Token, Error> {
360 self.skip_whitespace();
361
362 let start_byte = self.byte_offset;
363 let start_line = self.line;
364 let start_col = self.col;
365
366 let Some(ch) = self.current_char() else {
367 return Ok(Token::eof(start_byte, start_line, start_col));
368 };
369
370 if ch == '"' && self.peek_char() == Some('"') && self.peek_char_at(2) == Some('"') {
372 return self.scan_triple_quote(start_byte, start_line, start_col);
373 }
374
375 if ch == '"' {
377 return self.scan_string(start_byte, start_line, start_col);
378 }
379
380 if ch.is_ascii_digit() {
382 return self.scan_number(start_byte, start_line, start_col);
383 }
384
385 if let Some(token) = self.try_two_char_operator(start_byte, start_line, start_col) {
387 return Ok(token);
388 }
389
390 if ch == '.' && self.peek_char() == Some('.') && self.peek_char_at(2) == Some('.') {
392 self.advance();
393 self.advance();
394 self.advance();
395 let span = self.make_span(start_byte, start_line, start_col);
396 return Ok(Token {
397 kind: TokenKind::Ellipsis,
398 span,
399 text: "...".to_string(),
400 });
401 }
402
403 if let Some(kind) = self.single_char_token(ch) {
405 self.advance();
406 let span = self.make_span(start_byte, start_line, start_col);
407 let text = ch.to_string();
408 return Ok(Token { kind, span, text });
409 }
410
411 if ch.is_ascii_alphabetic() || ch == '_' {
413 return Ok(self.scan_identifier(start_byte, start_line, start_col));
414 }
415
416 if ch == '@' {
418 self.advance();
419 let span = self.make_span(start_byte, start_line, start_col);
420 return Ok(Token {
421 kind: TokenKind::At,
422 span,
423 text: "@".to_string(),
424 });
425 }
426
427 self.advance();
429 let span = self.make_span(start_byte, start_line, start_col);
430 Err(self.make_error(format!("Unexpected character '{}'", ch), span))
431 }
432
433 fn scan_triple_quote(
434 &mut self,
435 start_byte: usize,
436 start_line: usize,
437 start_col: usize,
438 ) -> Result<Token, Error> {
439 self.advance(); self.advance(); self.advance(); let content_start = self.byte_offset;
444 loop {
445 match self.current_char() {
446 None => {
447 let span = self.make_span(start_byte, start_line, start_col);
448 return Err(self.make_error(
449 "Unterminated commentary block: expected closing \"\"\"",
450 span,
451 ));
452 }
453 Some('"')
454 if self.source.get(self.pos + 1) == Some(&'"')
455 && self.source.get(self.pos + 2) == Some(&'"') =>
456 {
457 let content_end = self.byte_offset;
458 self.advance(); self.advance(); self.advance(); let raw: String = self.source_text[content_start..content_end].to_string();
462 let span = self.make_span(start_byte, start_line, start_col);
463 return Ok(Token {
464 kind: TokenKind::Commentary,
465 span,
466 text: raw,
467 });
468 }
469 Some(_) => {
470 self.advance();
471 }
472 }
473 }
474 }
475
476 fn scan_string(
477 &mut self,
478 start_byte: usize,
479 start_line: usize,
480 start_col: usize,
481 ) -> Result<Token, Error> {
482 self.advance(); let mut content = String::new();
484 loop {
485 match self.current_char() {
486 None => {
487 let span = self.make_span(start_byte, start_line, start_col);
488 return Err(self.make_error("String starting here was never closed", span));
489 }
490 Some('"') => {
491 self.advance(); break;
493 }
494 Some(ch) => {
495 content.push(ch);
496 self.advance();
497 }
498 }
499 }
500 let span = self.make_span(start_byte, start_line, start_col);
501 let full_text = format!("\"{}\"", content);
504 Ok(Token {
505 kind: TokenKind::StringLit,
506 span,
507 text: full_text,
508 })
509 }
510
511 fn scan_number(
512 &mut self,
513 start_byte: usize,
514 start_line: usize,
515 start_col: usize,
516 ) -> Result<Token, Error> {
517 let mut text = String::new();
518
519 while let Some(ch) = self.current_char() {
521 if ch.is_ascii_digit() || ch == '_' || ch == ',' {
522 text.push(ch);
523 self.advance();
524 } else {
525 break;
526 }
527 }
528
529 if self.current_char() == Some('.') {
531 if let Some(next) = self.peek_char() {
533 if next.is_ascii_digit() {
534 text.push('.');
535 self.advance(); while let Some(ch) = self.current_char() {
537 if ch.is_ascii_digit() {
538 text.push(ch);
539 self.advance();
540 } else {
541 break;
542 }
543 }
544 }
545 }
546 }
547
548 if let Some(ch) = self.current_char() {
550 if ch == 'e' || ch == 'E' {
551 let mut sci_text = String::new();
552 sci_text.push(ch);
553 let save_pos = self.pos;
554 let save_byte = self.byte_offset;
555 let save_line = self.line;
556 let save_col = self.col;
557 self.advance(); if let Some(sign) = self.current_char() {
560 if sign == '+' || sign == '-' {
561 sci_text.push(sign);
562 self.advance();
563 }
564 }
565
566 if let Some(d) = self.current_char() {
567 if d.is_ascii_digit() {
568 while let Some(ch) = self.current_char() {
569 if ch.is_ascii_digit() {
570 sci_text.push(ch);
571 self.advance();
572 } else {
573 break;
574 }
575 }
576 text.push_str(&sci_text);
577 } else {
578 self.pos = save_pos;
580 self.byte_offset = save_byte;
581 self.line = save_line;
582 self.col = save_col;
583 }
584 } else {
585 self.pos = save_pos;
586 self.byte_offset = save_byte;
587 self.line = save_line;
588 self.col = save_col;
589 }
590 }
591 }
592
593 let span = self.make_span(start_byte, start_line, start_col);
594 Ok(Token {
595 kind: TokenKind::NumberLit,
596 span,
597 text,
598 })
599 }
600
601 fn try_two_char_operator(
602 &mut self,
603 start_byte: usize,
604 start_line: usize,
605 start_col: usize,
606 ) -> Option<Token> {
607 let ch = self.current_char()?;
608 let next = self.peek_char();
609
610 let kind = match (ch, next) {
611 ('-', Some('>')) => TokenKind::Arrow,
612 ('>', Some('=')) => TokenKind::Gte,
613 ('<', Some('=')) => TokenKind::Lte,
614 ('%', Some('%')) => {
615 TokenKind::PercentPercent
617 }
618 _ => return None,
619 };
620
621 self.advance();
622 self.advance();
623 let span = self.make_span(start_byte, start_line, start_col);
624 let text: String = self.source_text[span.start..span.end].to_string();
625 Some(Token { kind, span, text })
626 }
627
628 fn single_char_token(&self, ch: char) -> Option<TokenKind> {
629 match ch {
630 '+' => Some(TokenKind::Plus),
631 '*' => Some(TokenKind::Star),
632 '/' => Some(TokenKind::Slash),
633 ',' => Some(TokenKind::Comma),
634 '^' => Some(TokenKind::Caret),
635 ':' => Some(TokenKind::Colon),
636 '.' => Some(TokenKind::Dot),
637 '(' => Some(TokenKind::LParen),
638 ')' => Some(TokenKind::RParen),
639 '>' => Some(TokenKind::Gt),
640 '<' => Some(TokenKind::Lt),
641 '%' => Some(TokenKind::Percent),
642 '-' => Some(TokenKind::Minus),
643 _ => None,
644 }
645 }
646
647 fn scan_identifier(&mut self, start_byte: usize, start_line: usize, start_col: usize) -> Token {
648 let mut text = String::new();
649 while let Some(ch) = self.current_char() {
650 if ch.is_ascii_alphanumeric() || ch == '_' {
651 text.push(ch);
652 self.advance();
653 } else {
654 break;
655 }
656 }
657
658 let kind = keyword_from_identifier(&text);
659 let span = self.make_span(start_byte, start_line, start_col);
660 Token { kind, span, text }
661 }
662}
663
664fn keyword_from_identifier(text: &str) -> TokenKind {
665 match text.to_lowercase().as_str() {
666 "spec" => TokenKind::Spec,
667 "repo" => TokenKind::Repo,
668 "data" => TokenKind::Data,
669 "rule" => TokenKind::Rule,
670 "unless" => TokenKind::Unless,
671 "then" => TokenKind::Then,
672 "not" => TokenKind::Not,
673 "and" => TokenKind::And,
674 "in" => TokenKind::In,
675 "as" => TokenKind::As,
676 "type" => TokenKind::Type,
677 "uses" => TokenKind::Uses,
678 "with" => TokenKind::With,
679 "meta" => TokenKind::Meta,
680 "veto" => TokenKind::Veto,
681 "now" => TokenKind::Now,
682 "past" => TokenKind::Past,
683 "future" => TokenKind::Future,
684 "true" => TokenKind::True,
685 "false" => TokenKind::False,
686 "yes" => TokenKind::Yes,
687 "no" => TokenKind::No,
688 "accept" => TokenKind::Accept,
689 "reject" => TokenKind::Reject,
690 "quantity" => TokenKind::QuantityKw,
691 "number" => TokenKind::NumberKw,
692 "text" => TokenKind::TextKw,
693 "date" => TokenKind::DateKw,
694 "time" => TokenKind::TimeKw,
695 "boolean" => TokenKind::BooleanKw,
696 "percent" => TokenKind::PercentKw,
697 "ratio" => TokenKind::RatioKw,
698 "sqrt" => TokenKind::Sqrt,
699 "sin" => TokenKind::Sin,
700 "cos" => TokenKind::Cos,
701 "tan" => TokenKind::Tan,
702 "asin" => TokenKind::Asin,
703 "acos" => TokenKind::Acos,
704 "atan" => TokenKind::Atan,
705 "log" => TokenKind::Log,
706 "exp" => TokenKind::Exp,
707 "abs" => TokenKind::Abs,
708 "floor" => TokenKind::Floor,
709 "ceil" => TokenKind::Ceil,
710 "round" => TokenKind::Round,
711 "is" => TokenKind::Is,
712 "permille" => TokenKind::Permille,
713 _ => TokenKind::Identifier,
714 }
715}
716
717pub fn is_structural_keyword(kind: &TokenKind) -> bool {
722 matches!(
723 kind,
724 TokenKind::Spec
725 | TokenKind::Repo
726 | TokenKind::Data
727 | TokenKind::Rule
728 | TokenKind::Unless
729 | TokenKind::Then
730 | TokenKind::Not
731 | TokenKind::And
732 | TokenKind::In
733 | TokenKind::As
734 | TokenKind::Type
735 | TokenKind::Uses
736 | TokenKind::With
737 | TokenKind::Meta
738 | TokenKind::Veto
739 | TokenKind::Now
740 | TokenKind::Sqrt
741 | TokenKind::Sin
742 | TokenKind::Cos
743 | TokenKind::Tan
744 | TokenKind::Asin
745 | TokenKind::Acos
746 | TokenKind::Atan
747 | TokenKind::Log
748 | TokenKind::Exp
749 | TokenKind::Abs
750 | TokenKind::Floor
751 | TokenKind::Ceil
752 | TokenKind::Round
753 | TokenKind::True
754 | TokenKind::False
755 | TokenKind::Yes
756 | TokenKind::No
757 | TokenKind::Accept
758 | TokenKind::Reject
759 )
760}
761
762pub fn is_type_keyword(kind: &TokenKind) -> bool {
765 token_kind_to_primitive(kind).is_some()
766}
767
768#[must_use]
770pub fn token_kind_to_primitive(kind: &TokenKind) -> Option<PrimitiveKind> {
771 match kind {
772 TokenKind::BooleanKw => Some(PrimitiveKind::Boolean),
773 TokenKind::QuantityKw => Some(PrimitiveKind::Quantity),
774 TokenKind::NumberKw => Some(PrimitiveKind::Number),
775 TokenKind::PercentKw => Some(PrimitiveKind::Percent),
776 TokenKind::RatioKw => Some(PrimitiveKind::Ratio),
777 TokenKind::TextKw => Some(PrimitiveKind::Text),
778 TokenKind::DateKw => Some(PrimitiveKind::Date),
779 TokenKind::TimeKw => Some(PrimitiveKind::Time),
780 _ => None,
781 }
782}
783
784pub fn is_boolean_keyword(kind: &TokenKind) -> bool {
786 matches!(
787 kind,
788 TokenKind::True
789 | TokenKind::False
790 | TokenKind::Yes
791 | TokenKind::No
792 | TokenKind::Accept
793 | TokenKind::Reject
794 )
795}
796
797#[must_use]
799pub fn token_kind_to_boolean_value(kind: &TokenKind) -> BooleanValue {
800 match kind {
801 TokenKind::True => BooleanValue::True,
802 TokenKind::False => BooleanValue::False,
803 TokenKind::Yes => BooleanValue::Yes,
804 TokenKind::No => BooleanValue::No,
805 TokenKind::Accept => BooleanValue::Accept,
806 TokenKind::Reject => BooleanValue::Reject,
807 _ => unreachable!(
808 "BUG: token_kind_to_boolean_value called with non-boolean token {:?}",
809 kind
810 ),
811 }
812}
813
814pub fn is_math_function(kind: &TokenKind) -> bool {
816 matches!(
817 kind,
818 TokenKind::Sqrt
819 | TokenKind::Sin
820 | TokenKind::Cos
821 | TokenKind::Tan
822 | TokenKind::Asin
823 | TokenKind::Acos
824 | TokenKind::Atan
825 | TokenKind::Log
826 | TokenKind::Exp
827 | TokenKind::Abs
828 | TokenKind::Floor
829 | TokenKind::Ceil
830 | TokenKind::Round
831 )
832}
833
834pub fn is_spec_body_keyword(kind: &TokenKind) -> bool {
837 matches!(
838 kind,
839 TokenKind::Data | TokenKind::With | TokenKind::Rule | TokenKind::Type | TokenKind::Meta
840 )
841}
842
843pub fn can_be_label(kind: &TokenKind) -> bool {
846 matches!(
847 kind,
848 TokenKind::Identifier
849 | TokenKind::Past
850 | TokenKind::Future
851 | TokenKind::Permille
852 | TokenKind::Is
853 )
854}
855
856#[must_use]
858pub fn token_is_calendar_period_marker(tok: &Token) -> bool {
859 tok.kind == TokenKind::Identifier && tok.text == "calendar"
860}
861
862pub fn can_be_reference_segment(kind: &TokenKind) -> bool {
865 can_be_label(kind) || is_type_keyword(kind)
866}
867
868#[must_use]
873pub fn can_be_repository_qualifier_segment(kind: &TokenKind) -> bool {
874 matches!(kind, TokenKind::Identifier)
875 || is_structural_keyword(kind)
876 || can_be_label(kind)
877 || is_type_keyword(kind)
878 || is_boolean_keyword(kind)
879 || is_math_function(kind)
880}
881
882#[cfg(test)]
883mod tests {
884 use super::*;
885
886 fn lex_all(input: &str) -> Result<Vec<Token>, Error> {
887 let mut lexer = Lexer::new(input, &crate::parsing::source::SourceType::Volatile);
888 let mut tokens = Vec::new();
889 loop {
890 let token = lexer.next_token()?;
891 if token.kind == TokenKind::Eof {
892 tokens.push(token);
893 break;
894 }
895 tokens.push(token);
896 }
897 Ok(tokens)
898 }
899
900 fn lex_kinds(input: &str) -> Result<Vec<TokenKind>, Error> {
901 Ok(lex_all(input)?.into_iter().map(|t| t.kind).collect())
902 }
903
904 #[test]
905 fn lex_empty_input() {
906 let tokens = lex_all("").unwrap();
907 assert_eq!(tokens.len(), 1);
908 assert_eq!(tokens[0].kind, TokenKind::Eof);
909 }
910
911 #[test]
912 fn lex_spec_declaration() {
913 let kinds = lex_kinds("spec person").unwrap();
914 assert_eq!(
915 kinds,
916 vec![TokenKind::Spec, TokenKind::Identifier, TokenKind::Eof]
917 );
918 }
919
920 #[test]
921 fn lex_data_definition() {
922 let kinds = lex_kinds("data age: 25").unwrap();
923 assert_eq!(
924 kinds,
925 vec![
926 TokenKind::Data,
927 TokenKind::Identifier,
928 TokenKind::Colon,
929 TokenKind::NumberLit,
930 TokenKind::Eof,
931 ]
932 );
933 }
934
935 #[test]
936 fn lex_rule_with_comparison() {
937 let kinds = lex_kinds("rule is_adult: age >= 18").unwrap();
938 assert_eq!(
939 kinds,
940 vec![
941 TokenKind::Rule,
942 TokenKind::Identifier,
943 TokenKind::Colon,
944 TokenKind::Identifier,
945 TokenKind::Gte,
946 TokenKind::NumberLit,
947 TokenKind::Eof,
948 ]
949 );
950 }
951
952 #[test]
953 fn lex_string_literal() {
954 let tokens = lex_all(r#""hello world""#).unwrap();
955 assert_eq!(tokens[0].kind, TokenKind::StringLit);
956 assert_eq!(tokens[0].text, "\"hello world\"");
957 }
958
959 #[test]
960 fn lex_unterminated_string() {
961 let result = lex_all(r#""hello"#);
962 assert!(result.is_err());
963 }
964
965 #[test]
966 fn lex_number_with_decimal() {
967 let tokens = lex_all("3.14").unwrap();
968 assert_eq!(tokens[0].kind, TokenKind::NumberLit);
969 assert_eq!(tokens[0].text, "3.14");
970 }
971
972 #[test]
973 fn lex_number_with_underscores() {
974 let tokens = lex_all("1_000_000").unwrap();
975 assert_eq!(tokens[0].kind, TokenKind::NumberLit);
976 assert_eq!(tokens[0].text, "1_000_000");
977 }
978
979 #[test]
980 fn lex_scientific_notation() {
981 let tokens = lex_all("1.5e+10").unwrap();
982 assert_eq!(tokens[0].kind, TokenKind::NumberLit);
983 assert_eq!(tokens[0].text, "1.5e+10");
984 }
985
986 #[test]
987 fn lex_all_operators() {
988 let kinds = lex_kinds("+ - * / % ^ > < >= <= -> %%").unwrap();
989 assert_eq!(
990 &kinds[..12],
991 &[
992 TokenKind::Plus,
993 TokenKind::Minus,
994 TokenKind::Star,
995 TokenKind::Slash,
996 TokenKind::Percent,
997 TokenKind::Caret,
998 TokenKind::Gt,
999 TokenKind::Lt,
1000 TokenKind::Gte,
1001 TokenKind::Lte,
1002 TokenKind::Arrow,
1003 TokenKind::PercentPercent,
1004 ]
1005 );
1006 }
1007
1008 #[test]
1009 fn lex_keywords() {
1010 let kinds =
1011 lex_kinds("spec data rule unless then not and in as type uses meta veto now").unwrap();
1012 assert_eq!(
1013 &kinds[..14],
1014 &[
1015 TokenKind::Spec,
1016 TokenKind::Data,
1017 TokenKind::Rule,
1018 TokenKind::Unless,
1019 TokenKind::Then,
1020 TokenKind::Not,
1021 TokenKind::And,
1022 TokenKind::In,
1023 TokenKind::As,
1024 TokenKind::Type,
1025 TokenKind::Uses,
1026 TokenKind::Meta,
1027 TokenKind::Veto,
1028 TokenKind::Now,
1029 ]
1030 );
1031 }
1032
1033 #[test]
1034 fn lex_boolean_keywords() {
1035 let kinds = lex_kinds("true false yes no accept reject").unwrap();
1036 assert_eq!(
1037 &kinds[..6],
1038 &[
1039 TokenKind::True,
1040 TokenKind::False,
1041 TokenKind::Yes,
1042 TokenKind::No,
1043 TokenKind::Accept,
1044 TokenKind::Reject,
1045 ]
1046 );
1047 }
1048
1049 #[test]
1050 fn lex_duration_keywords() {
1051 let kinds = lex_kinds("years months weeks days hours minutes seconds").unwrap();
1052 assert_eq!(
1053 &kinds[..7],
1054 &[
1055 TokenKind::Identifier,
1056 TokenKind::Identifier,
1057 TokenKind::Identifier,
1058 TokenKind::Identifier,
1059 TokenKind::Identifier,
1060 TokenKind::Identifier,
1061 TokenKind::Identifier,
1062 ]
1063 );
1064 }
1065
1066 #[test]
1067 fn lex_commentary() {
1068 let tokens = lex_all(r#""""hello world""""#).unwrap();
1069 assert_eq!(tokens[0].kind, TokenKind::Commentary);
1070 assert_eq!(tokens[0].text, "hello world");
1071 }
1072
1073 #[test]
1074 fn lex_at_sign() {
1075 let kinds = lex_kinds("@user").unwrap();
1076 assert_eq!(kinds[0], TokenKind::At);
1077 assert_eq!(kinds[1], TokenKind::Identifier);
1078 }
1079
1080 #[test]
1081 fn lex_parentheses() {
1082 let kinds = lex_kinds("(x + 1)").unwrap();
1083 assert_eq!(
1084 &kinds[..5],
1085 &[
1086 TokenKind::LParen,
1087 TokenKind::Identifier,
1088 TokenKind::Plus,
1089 TokenKind::NumberLit,
1090 TokenKind::RParen,
1091 ]
1092 );
1093 }
1094
1095 #[test]
1096 fn lex_dot_for_references() {
1097 let kinds = lex_kinds("employee.salary").unwrap();
1098 assert_eq!(
1099 &kinds[..3],
1100 &[TokenKind::Identifier, TokenKind::Dot, TokenKind::Identifier]
1101 );
1102 }
1103
1104 #[test]
1105 fn lex_spec_name_with_slashes() {
1106 let tokens = lex_all("spec contracts/employment/jack").unwrap();
1107 assert_eq!(tokens[0].kind, TokenKind::Spec);
1108 assert_eq!(tokens[1].kind, TokenKind::Identifier);
1111 }
1112
1113 #[test]
1114 fn lex_number_not_followed_by_e_identifier() {
1115 let tokens = lex_all("42 eur").unwrap();
1117 assert_eq!(tokens[0].kind, TokenKind::NumberLit);
1118 assert_eq!(tokens[0].text, "42");
1119 assert_eq!(tokens[1].kind, TokenKind::Identifier);
1120 assert_eq!(tokens[1].text, "eur");
1121 }
1122
1123 #[test]
1124 fn lex_unknown_character() {
1125 let result = lex_all("§");
1126 assert!(result.is_err());
1127 }
1128
1129 #[test]
1130 fn lex_peek_does_not_consume() {
1131 let mut lexer = Lexer::new("spec test", &crate::parsing::source::SourceType::Volatile);
1132 let peeked_kind = lexer.peek().unwrap().kind.clone();
1133 assert_eq!(peeked_kind, TokenKind::Spec);
1134 let next = lexer.next_token().unwrap();
1135 assert_eq!(next.kind, TokenKind::Spec);
1136 }
1137
1138 #[test]
1139 fn lex_span_byte_offsets() {
1140 let tokens = lex_all("spec test").unwrap();
1141 assert_eq!(tokens[0].span.start, 0);
1142 assert_eq!(tokens[0].span.end, 4);
1143 assert_eq!(tokens[0].span.line, 1);
1144 assert_eq!(tokens[0].span.col, 1);
1145
1146 assert_eq!(tokens[1].span.start, 5);
1147 assert_eq!(tokens[1].span.end, 9);
1148 assert_eq!(tokens[1].span.line, 1);
1149 assert_eq!(tokens[1].span.col, 6);
1150 }
1151
1152 #[test]
1153 fn lex_multiline_span_tracking() {
1154 let tokens = lex_all("spec test\ndata x: 1").unwrap();
1155 let data_token = &tokens[2]; assert_eq!(data_token.kind, TokenKind::Data);
1158 assert_eq!(data_token.span.line, 2);
1159 assert_eq!(data_token.span.col, 1);
1160 }
1161
1162 #[test]
1163 fn lex_case_insensitive_keywords() {
1164 let kinds = lex_kinds("SPEC Data RULE").unwrap();
1166 assert_eq!(kinds[0], TokenKind::Spec);
1167 assert_eq!(kinds[1], TokenKind::Data);
1168 assert_eq!(kinds[2], TokenKind::Rule);
1169 }
1170
1171 #[test]
1172 fn lex_math_function_keywords() {
1173 let kinds =
1174 lex_kinds("sqrt sin cos tan asin acos atan log exp abs floor ceil round").unwrap();
1175 assert_eq!(
1176 &kinds[..13],
1177 &[
1178 TokenKind::Sqrt,
1179 TokenKind::Sin,
1180 TokenKind::Cos,
1181 TokenKind::Tan,
1182 TokenKind::Asin,
1183 TokenKind::Acos,
1184 TokenKind::Atan,
1185 TokenKind::Log,
1186 TokenKind::Exp,
1187 TokenKind::Abs,
1188 TokenKind::Floor,
1189 TokenKind::Ceil,
1190 TokenKind::Round,
1191 ]
1192 );
1193 }
1194
1195 #[test]
1196 fn lex_is_keyword() {
1197 let kinds = lex_kinds("status is \"active\"").unwrap();
1198 assert_eq!(kinds[0], TokenKind::Identifier);
1199 assert_eq!(kinds[1], TokenKind::Is);
1200 assert_eq!(kinds[2], TokenKind::StringLit);
1201 }
1202
1203 #[test]
1204 fn lex_percent_not_followed_by_digit() {
1205 let kinds = lex_kinds("50%").unwrap();
1207 assert_eq!(kinds[0], TokenKind::NumberLit);
1208 assert_eq!(kinds[1], TokenKind::Percent);
1209 }
1210
1211 #[test]
1212 fn lex_number_with_commas() {
1213 let tokens = lex_all("1,000,000").unwrap();
1214 assert_eq!(tokens[0].kind, TokenKind::NumberLit);
1215 assert_eq!(tokens[0].text, "1,000,000");
1216 }
1217
1218 #[test]
1219 fn lex_arrow_chain() {
1220 let kinds = lex_kinds("-> unit eur 1.00 -> decimals 2").unwrap();
1221 assert_eq!(kinds[0], TokenKind::Arrow);
1222 assert_eq!(kinds[1], TokenKind::Identifier);
1223 assert_eq!(kinds[2], TokenKind::Identifier);
1224 assert_eq!(kinds[3], TokenKind::NumberLit);
1225 assert_eq!(kinds[4], TokenKind::Arrow);
1226 }
1227}