1#![forbid(unsafe_code)]
2use std::iter::Peekable;
27use std::str::CharIndices;
28
29mod error;
30pub use error::{LexError, LexErrorKind};
32mod span;
33pub use span::LineMap;
35mod iter;
36pub use iter::{tokenize_iter, Tokens};
38
39#[cfg_attr(feature = "serde", derive(serde::Serialize))]
42#[derive(Debug, Clone, Copy, PartialEq, Eq)]
43pub enum BorrowedTokenKind<'a> {
44 Ident(&'a str),
45 Number(&'a str),
46 String(&'a str),
47 True,
48 False,
49 If,
50 Then,
51 Else,
52 Let,
53 Rule,
54 And,
55 Or,
56 LParen,
57 RParen,
58 LBrace,
59 RBrace,
60 LBracket,
61 RBracket,
62 Comma,
63 Colon,
64 Semicolon,
65 Arrow,
66 Eq,
67 Plus,
68 Minus,
69 Star,
70 Slash,
71}
72
73#[cfg_attr(feature = "serde", derive(serde::Serialize))]
75#[derive(Debug, Clone, Copy, PartialEq, Eq)]
76pub struct BorrowedToken<'a> {
77 pub kind: BorrowedTokenKind<'a>,
78 pub span: Span,
79}
80
81#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
83#[cfg_attr(feature = "serde", serde(tag = "type", content = "value"))]
84#[derive(Debug, Clone, PartialEq, Eq)]
85pub enum TokenKind {
86 Ident(String),
87 Number(String),
88 String(String),
89 True,
90 False,
91 If,
92 Then,
93 Else,
94 Let,
95 Rule,
96 And,
97 Or,
98 LParen,
99 RParen,
100 LBrace,
101 RBrace,
102 LBracket,
103 RBracket,
104 Comma,
105 Colon,
106 Semicolon,
107 Arrow,
108 Eq,
109 Plus,
110 Minus,
111 Star,
112 Slash,
113}
114
115#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
117#[derive(Debug, Clone, Copy, PartialEq, Eq)]
118pub struct Span {
119 pub start: usize,
120 pub end: usize,
121}
122
123#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
125#[derive(Debug, Clone, PartialEq, Eq)]
126pub struct Token {
127 pub kind: TokenKind,
128 pub span: Span,
129}
130
131#[derive(Debug)]
133pub struct Lexer<'a> {
134 src: &'a str,
135 it: Peekable<CharIndices<'a>>,
136 cur: Option<(usize, char)>,
137}
138
139impl<'a> Lexer<'a> {
140 pub fn new(src: &'a str) -> Self {
141 let mut it = src.char_indices().peekable();
142 let cur = it.next();
143 Self { src, it, cur }
144 }
145
146 fn bump(&mut self) -> Option<(usize, char)> {
147 let out = self.cur;
148 self.cur = self.it.next();
149 out
150 }
151
152 fn peek(&self) -> Option<(usize, char)> {
153 self.cur
154 }
155
156 fn skip_ws_and_comments(&mut self) {
157 loop {
158 let mut progressed = false;
159 while let Some((_, c)) = self.peek() {
160 if c.is_whitespace() {
161 self.bump();
162 progressed = true;
163 } else {
164 break;
165 }
166 }
167 if let Some((_, '/')) = self.peek() {
168 let mut clone = self.it.clone();
169 if let Some((_, '/')) = clone.next() {
170 self.bump();
171 self.bump();
172 while let Some((_, c)) = self.peek() {
173 if c == '\n' {
174 break;
175 }
176 self.bump();
177 }
178 continue;
179 }
180 }
181 if !progressed {
182 break;
183 }
184 }
185 }
186
187 fn kw_or_ident(s: &str) -> TokenKind {
188 match s {
189 "true" => TokenKind::True,
190 "false" => TokenKind::False,
191 "if" => TokenKind::If,
192 "then" => TokenKind::Then,
193 "else" => TokenKind::Else,
194 "let" => TokenKind::Let,
195 "rule" => TokenKind::Rule,
196 "and" => TokenKind::And,
197 "or" => TokenKind::Or,
198 _ => TokenKind::Ident(s.to_string()),
199 }
200 }
201
202 fn lex_number(&mut self, start: usize) -> Result<Token, LexError> {
203 let mut seen_dot = false;
204 let mut seen_exp = false;
205 let mut last_was_dot = false;
206 self.bump(); while let Some((idx, ch)) = self.peek() {
209 if ch.is_ascii_digit() {
210 self.bump();
211 last_was_dot = false;
212 } else if ch == '.' {
213 if seen_dot {
214 if last_was_dot {
216 break;
217 }
218 return Err(LexError::new(
220 LexErrorKind::InvalidNumber,
221 Span {
222 start,
223 end: idx + ch.len_utf8(),
224 },
225 ));
226 }
227 let mut clone = self.it.clone();
229 if let Some((_, next)) = clone.next() {
230 if next == '.' {
231 break;
232 }
233 if !next.is_ascii_digit() {
234 break;
235 }
236 } else {
237 break;
238 }
239 seen_dot = true;
240 last_was_dot = true;
241 self.bump();
242 } else if (ch == 'e' || ch == 'E') && !seen_exp {
243 seen_exp = true;
244 last_was_dot = false;
245 self.bump();
246 if let Some((_, sign)) = self.peek() {
247 if sign == '+' || sign == '-' {
248 self.bump();
249 }
250 }
251 match self.peek() {
252 Some((_, d)) if d.is_ascii_digit() => {}
253 _ => {
254 return Err(LexError::new(
255 LexErrorKind::InvalidNumber,
256 Span {
257 start,
258 end: idx + ch.len_utf8(),
259 },
260 ));
261 }
262 }
263 } else {
264 break;
265 }
266 }
267
268 let end = self.peek().map(|(j, _)| j).unwrap_or(self.src.len());
269 Ok(Token {
270 kind: TokenKind::Number(self.src[start..end].to_string()),
271 span: Span { start, end },
272 })
273 }
274
275 fn lex_number_borrowed(&mut self, start: usize) -> Result<BorrowedToken<'a>, LexError> {
276 let mut seen_dot = false;
277 let mut seen_exp = false;
278 let mut last_was_dot = false;
279 self.bump(); while let Some((idx, ch)) = self.peek() {
282 if ch.is_ascii_digit() {
283 self.bump();
284 last_was_dot = false;
285 } else if ch == '.' {
286 if seen_dot {
287 if last_was_dot {
288 break;
289 }
290 return Err(LexError::new(
291 LexErrorKind::InvalidNumber,
292 Span {
293 start,
294 end: idx + ch.len_utf8(),
295 },
296 ));
297 }
298 let mut clone = self.it.clone();
299 if let Some((_, next)) = clone.next() {
300 if next == '.' {
301 break;
302 }
303 if !next.is_ascii_digit() {
304 break;
305 }
306 } else {
307 break;
308 }
309 seen_dot = true;
310 last_was_dot = true;
311 self.bump();
312 } else if (ch == 'e' || ch == 'E') && !seen_exp {
313 seen_exp = true;
314 last_was_dot = false;
315 self.bump();
316 if let Some((_, sign)) = self.peek() {
317 if sign == '+' || sign == '-' {
318 self.bump();
319 }
320 }
321 match self.peek() {
322 Some((_, d)) if d.is_ascii_digit() => {}
323 _ => {
324 return Err(LexError::new(
325 LexErrorKind::InvalidNumber,
326 Span {
327 start,
328 end: idx + ch.len_utf8(),
329 },
330 ))
331 }
332 }
333 } else {
334 break;
335 }
336 }
337
338 let end = self.peek().map(|(j, _)| j).unwrap_or(self.src.len());
339 Ok(BorrowedToken {
340 kind: BorrowedTokenKind::Number(&self.src[start..end]),
341 span: Span { start, end },
342 })
343 }
344
345 fn next_token_borrowed(&mut self) -> Option<Result<BorrowedToken<'a>, LexError>> {
347 self.skip_ws_and_comments();
348 let (i, c) = self.peek()?;
349
350 if c == '"' {
352 let start = i; self.bump();
354 let content_start = start + 1;
355 loop {
356 let Some((j, ch)) = self.bump() else {
357 return Some(Err(LexError::new(
358 LexErrorKind::UnterminatedString,
359 Span {
360 start,
361 end: self.src.len(),
362 },
363 )));
364 };
365 match ch {
366 '\\' => {
367 let Some((k, esc)) = self.bump() else {
369 return Some(Err(LexError::new(
370 LexErrorKind::UnterminatedEscape,
371 Span {
372 start: j,
373 end: j + 1,
374 },
375 )));
376 };
377 match esc {
378 'n' | 't' | 'r' | '"' | '\\' => {
379 let _ = k;
380 }
381 _ => {
382 let escape_end = k + esc.len_utf8();
383 return Some(Err(LexError::new(
384 LexErrorKind::InvalidEscape,
385 Span {
386 start: j,
387 end: escape_end,
388 },
389 )));
390 }
391 }
392 }
393 '"' => {
394 let end = j + 1; return Some(Ok(BorrowedToken {
396 kind: BorrowedTokenKind::String(&self.src[content_start..j]),
397 span: Span { start, end },
398 }));
399 }
400 _ => {}
401 }
402 }
403 }
404
405 if c.is_ascii_digit() {
407 match self.lex_number_borrowed(i) {
408 Ok(tok) => return Some(Ok(tok)),
409 Err(e) => return Some(Err(e)),
410 }
411 }
412
413 if c.is_ascii_alphabetic() || c == '_' {
415 let start = i;
416 self.bump();
417 while let Some((_, p)) = self.peek() {
418 if p.is_ascii_alphanumeric() || p == '_' {
419 self.bump();
420 } else {
421 break;
422 }
423 }
424 let end = self.peek().map(|(j, _)| j).unwrap_or(self.src.len());
425 let kind = match &self.src[start..end] {
426 "true" => BorrowedTokenKind::True,
427 "false" => BorrowedTokenKind::False,
428 "if" => BorrowedTokenKind::If,
429 "then" => BorrowedTokenKind::Then,
430 "else" => BorrowedTokenKind::Else,
431 "let" => BorrowedTokenKind::Let,
432 "rule" => BorrowedTokenKind::Rule,
433 "and" => BorrowedTokenKind::And,
434 "or" => BorrowedTokenKind::Or,
435 s => BorrowedTokenKind::Ident(s),
436 };
437 return Some(Ok(BorrowedToken {
438 kind,
439 span: Span { start, end },
440 }));
441 }
442
443 if c == '-' {
445 let start = i;
446 self.bump();
447 if let Some((j, '>')) = self.peek() {
448 self.bump();
449 return Some(Ok(BorrowedToken {
450 kind: BorrowedTokenKind::Arrow,
451 span: Span { start, end: j + 1 },
452 }));
453 } else {
454 return Some(Ok(BorrowedToken {
455 kind: BorrowedTokenKind::Minus,
456 span: Span {
457 start,
458 end: start + 1,
459 },
460 }));
461 }
462 }
463
464 let start = i;
466 self.bump();
467 let tk = match c {
468 '(' => BorrowedTokenKind::LParen,
469 ')' => BorrowedTokenKind::RParen,
470 '{' => BorrowedTokenKind::LBrace,
471 '}' => BorrowedTokenKind::RBrace,
472 '[' => BorrowedTokenKind::LBracket,
473 ']' => BorrowedTokenKind::RBracket,
474 ',' => BorrowedTokenKind::Comma,
475 ':' => BorrowedTokenKind::Colon,
476 ';' => BorrowedTokenKind::Semicolon,
477 '=' => BorrowedTokenKind::Eq,
478 '+' => BorrowedTokenKind::Plus,
479 '*' => BorrowedTokenKind::Star,
480 '/' => BorrowedTokenKind::Slash,
481 other => {
482 return Some(Err(LexError::new(
483 LexErrorKind::UnexpectedChar,
484 Span {
485 start,
486 end: start + other.len_utf8(),
487 },
488 )));
489 }
490 };
491 Some(Ok(BorrowedToken {
492 kind: tk,
493 span: Span {
494 start,
495 end: start + 1,
496 },
497 }))
498 }
499
500 #[inline]
503 pub(crate) fn next_token(&mut self) -> Option<Result<Token, LexError>> {
504 self.skip_ws_and_comments();
505 let (i, c) = self.peek()?;
506
507 if c == '"' {
509 let start = i;
510 self.bump();
511 let mut s = String::new();
512 loop {
513 let Some((j, ch)) = self.bump() else {
514 return Some(Err(LexError::new(
515 LexErrorKind::UnterminatedString,
516 Span {
517 start,
518 end: self.src.len(),
519 },
520 )));
521 };
522 match ch {
523 '\\' => {
524 let Some((k, esc)) = self.bump() else {
526 return Some(Err(LexError::new(
527 LexErrorKind::UnterminatedEscape,
528 Span {
529 start: j,
530 end: j + 1,
531 },
532 )));
533 };
534 let ch = match esc {
535 'n' => '\n',
536 't' => '\t',
537 'r' => '\r',
538 '"' => '"',
539 '\\' => '\\',
540 _ => {
541 let escape_end = k + esc.len_utf8();
542 return Some(Err(LexError::new(
543 LexErrorKind::InvalidEscape,
544 Span {
545 start: j,
546 end: escape_end,
547 },
548 )));
549 }
550 };
551 s.push(ch);
552 }
553 '"' => {
554 return Some(Ok(Token {
555 kind: TokenKind::String(s),
556 span: Span { start, end: j + 1 },
557 }));
558 }
559 _ => s.push(ch),
560 }
561 }
562 }
563
564 if c.is_ascii_digit() {
566 match self.lex_number(i) {
567 Ok(tok) => return Some(Ok(tok)),
568 Err(e) => return Some(Err(e)),
569 }
570 }
571
572 if c.is_ascii_alphabetic() || c == '_' {
574 let start = i;
575 self.bump();
576 while let Some((_, p)) = self.peek() {
577 if p.is_ascii_alphanumeric() || p == '_' {
578 self.bump();
579 } else {
580 break;
581 }
582 }
583 let end = self.peek().map(|(j, _)| j).unwrap_or(self.src.len());
584 let kind = Self::kw_or_ident(&self.src[start..end]);
585 return Some(Ok(Token {
586 kind,
587 span: Span { start, end },
588 }));
589 }
590
591 if c == '-' {
593 let start = i;
594 self.bump();
595 if let Some((j, '>')) = self.peek() {
596 self.bump();
597 return Some(Ok(Token {
598 kind: TokenKind::Arrow,
599 span: Span { start, end: j + 1 },
600 }));
601 } else {
602 return Some(Ok(Token {
603 kind: TokenKind::Minus,
604 span: Span {
605 start,
606 end: start + 1,
607 },
608 }));
609 }
610 }
611
612 let start = i;
614 self.bump();
615 let tk = match c {
616 '(' => TokenKind::LParen,
617 ')' => TokenKind::RParen,
618 '{' => TokenKind::LBrace,
619 '}' => TokenKind::RBrace,
620 '[' => TokenKind::LBracket,
621 ']' => TokenKind::RBracket,
622 ',' => TokenKind::Comma,
623 ':' => TokenKind::Colon,
624 ';' => TokenKind::Semicolon,
625 '=' => TokenKind::Eq,
626 '+' => TokenKind::Plus,
627 '*' => TokenKind::Star,
628 '/' => TokenKind::Slash,
629 other => {
630 return Some(Err(LexError::new(
631 LexErrorKind::UnexpectedChar,
632 Span {
633 start,
634 end: start + other.len_utf8(),
635 },
636 )));
637 }
638 };
639 Some(Ok(Token {
640 kind: tk,
641 span: Span {
642 start,
643 end: start + 1,
644 },
645 }))
646 }
647
648 pub fn tokenize(mut self) -> Result<Vec<Token>, LexError> {
649 let mut out = Vec::new();
650 while let Some(res) = self.next_token() {
651 match res {
652 Ok(tok) => out.push(tok),
653 Err(e) => return Err(e),
654 }
655 }
656 Ok(out)
657 }
658}
659
660pub fn tokenize(src: &str) -> Result<Vec<Token>, LexError> {
663 Lexer::new(src).tokenize()
664}
665
666pub fn tokenize_borrowed(src: &str) -> Result<Vec<BorrowedToken<'_>>, LexError> {
669 let mut lx = Lexer::new(src);
670 let mut out = Vec::new();
671 while let Some(res) = lx.next_token_borrowed() {
672 match res {
673 Ok(t) => out.push(t),
674 Err(e) => return Err(e),
675 }
676 }
677 Ok(out)
678}
679
680#[cfg(test)]
681mod tests {
682 use super::*;
683 #[test]
684 fn error_kind_as_str_and_display_messages() {
685 use super::{LexError, LexErrorKind, Span};
686 let span = Span { start: 1, end: 3 };
687 let cases: &[(LexErrorKind, &str, &str)] = &[
688 (
689 LexErrorKind::UnexpectedChar,
690 "unexpected character",
691 "unexpected char",
692 ),
693 (
694 LexErrorKind::UnterminatedString,
695 "unterminated string",
696 "unterminated string",
697 ),
698 (
699 LexErrorKind::UnterminatedEscape,
700 "unterminated escape",
701 "unterminated escape",
702 ),
703 (
704 LexErrorKind::InvalidNumber,
705 "invalid number",
706 "invalid number",
707 ),
708 (
709 LexErrorKind::InvalidEscape,
710 "invalid escape sequence",
711 "invalid escape",
712 ),
713 ];
714
715 for (kind, as_str_msg, display_msg) in cases.iter().cloned() {
716 assert_eq!(kind.as_str(), as_str_msg);
717 let err = LexError::new(kind, span);
718 let rendered = format!("{}", err);
719 assert_eq!(
720 rendered,
721 format!("{} at {}..{}", display_msg, span.start, span.end)
722 );
723 let _e: &dyn std::error::Error = &err;
724 let _dbg = format!("{:?}", err.clone());
725 assert!(!_dbg.is_empty());
726 }
727 }
728 #[test]
729 fn numbers_second_dot_invalid_unless_range() {
730 let err = tokenize("123.45.6").expect_err("second dot should be invalid unless range");
732 assert!(matches!(err.kind, LexErrorKind::InvalidNumber));
733
734 let err = tokenize("1..2").expect_err("range dot should not be consumed by number");
736 assert!(matches!(err.kind, LexErrorKind::UnexpectedChar));
737 }
738
739 #[test]
740 fn numbers_exponent_rules() {
741 let toks = tokenize("1e10 1E+10 1.23e-4").unwrap();
743 assert!(toks
744 .iter()
745 .any(|t| matches!(t.kind, TokenKind::Number(ref s) if s == "1e10")));
746 assert!(toks
747 .iter()
748 .any(|t| matches!(t.kind, TokenKind::Number(ref s) if s == "1E+10")));
749 assert!(toks
750 .iter()
751 .any(|t| matches!(t.kind, TokenKind::Number(ref s) if s == "1.23e-4")));
752
753 let err = tokenize("1e+").expect_err("missing exponent digits");
755 assert!(matches!(err.kind, LexErrorKind::InvalidNumber));
756
757 let err = tokenize("2E-").expect_err("missing exponent digits");
758 assert!(matches!(err.kind, LexErrorKind::InvalidNumber));
759 }
760 #[test]
761 fn basic() {
762 let code = r#"
763 // sample
764 let rule greet(name) = "hi, " + name
765 if true and false then x = 1 else x = 2;
766 "#;
767 let toks = tokenize(code).unwrap();
768 assert!(toks.iter().any(|t| matches!(t.kind, TokenKind::Let)));
769 assert!(toks.iter().any(|t| matches!(t.kind, TokenKind::Rule)));
770 assert!(toks.iter().any(|t| matches!(t.kind, TokenKind::String(_))));
771 }
772
773 #[test]
774 fn numbers_and_ranges() {
775 let toks = tokenize("1 1.0 1.2e-3").unwrap();
777 assert!(toks
778 .iter()
779 .any(|t| matches!(t.kind, TokenKind::Number(ref s) if s == "1")));
780 assert!(toks
781 .iter()
782 .any(|t| matches!(t.kind, TokenKind::Number(ref s) if s == "1.0")));
783 assert!(toks
784 .iter()
785 .any(|t| matches!(t.kind, TokenKind::Number(ref s) if s == "1.2e-3")));
786
787 let err = tokenize("1..2").expect_err("should error on unexpected '.'");
789 assert!(matches!(err.kind, LexErrorKind::UnexpectedChar));
790 }
791
792 #[test]
793 fn string_escapes() {
794 let toks = tokenize("\"a\\n\\t\\r\\\\\\\"\"").unwrap();
796 assert!(matches!(toks[0].kind, TokenKind::String(_)));
797
798 let err = tokenize("\"\\x\"").unwrap_err();
800 assert!(matches!(err.kind, LexErrorKind::InvalidEscape));
801 }
802
803 #[test]
804 fn numbers_trailing_dot_is_error() {
805 let err = tokenize("0.").expect_err("trailing dot should error");
806 assert!(matches!(err.kind, LexErrorKind::UnexpectedChar));
807 }
808
809 #[test]
810 fn strings_empty_and_raw_newline_and_escapes() {
811 let toks = tokenize("\"\"").unwrap();
813 assert!(matches!(toks[0].kind, TokenKind::String(ref s) if s.is_empty()));
814
815 let toks = tokenize("\"a\nb\"").unwrap();
817 assert!(matches!(toks[0].kind, TokenKind::String(ref s) if s == "a\nb"));
818
819 let toks = tokenize("\"\\\"\\\\\t\"").unwrap();
821 assert!(matches!(toks[0].kind, TokenKind::String(ref s) if s == "\"\\\t"));
822 }
823
824 #[test]
825 fn streaming_iterator_matches_tokenize_and_propagates_error() {
826 let src = "let x = 1 + 2\nrule r() = \"ok\"";
828 let vec_tokens = tokenize(src).unwrap();
829 let iter_tokens: Result<Vec<_>, _> = tokenize_iter(src).collect();
830 let iter_tokens = iter_tokens.unwrap();
831 assert_eq!(vec_tokens, iter_tokens);
832
833 let src_err = "\"abc\\x\" rest";
835 let mut it = tokenize_iter(src_err);
836 match it.next() {
837 Some(Err(e)) => assert!(matches!(e.kind, LexErrorKind::InvalidEscape)),
838 other => panic!("expected first item to be Err, got {:?}", other),
839 }
840 assert!(it.next().is_none(), "iterator should end after error");
841 }
842
843 #[test]
844 fn invalid_escape_span_is_precise() {
845 let src = "\"abc\\x\"";
847 let err = tokenize(src).unwrap_err();
848 assert!(matches!(err.kind, LexErrorKind::InvalidEscape));
849 assert_eq!(err.span, Span { start: 4, end: 6 });
851 }
852
853 #[test]
854 fn strings_unterminated_and_unterminated_escape() {
855 let err = tokenize("\"abc").expect_err("unterminated string");
857 assert!(matches!(err.kind, LexErrorKind::UnterminatedString));
858
859 let err = tokenize("\"abc\\").expect_err("unterminated escape");
861 assert!(matches!(err.kind, LexErrorKind::UnterminatedEscape));
862 }
863
864 #[test]
865 fn idents_and_keywords() {
866 let toks = tokenize("let letx _x1").unwrap();
867 assert!(matches!(toks[0].kind, TokenKind::Let));
868 assert!(matches!(toks[1].kind, TokenKind::Ident(ref s) if s == "letx"));
869 assert!(matches!(toks[2].kind, TokenKind::Ident(ref s) if s == "_x1"));
870 }
871
872 #[test]
873 fn comments_do_not_leak() {
874 let toks = tokenize("foo // comment\nbar").unwrap();
875 assert!(matches!(toks[0].kind, TokenKind::Ident(ref s) if s == "foo"));
876 assert!(matches!(toks[1].kind, TokenKind::Ident(ref s) if s == "bar"));
877 assert_eq!(toks.len(), 2);
878 }
879
880 #[test]
881 fn unknown_char_errors_with_span() {
882 let err = tokenize("a @ b").expect_err("unknown char '@'");
883 assert!(matches!(err.kind, LexErrorKind::UnexpectedChar));
884 assert!(err.span.start < err.span.end);
885 }
886
887 #[test]
888 fn golden_small_input() {
889 let src = "let rule f(x) = \"hi\" + x";
890 let toks = tokenize(src).unwrap();
891 use TokenKind::*;
892 let kinds: Vec<&'static str> = toks
893 .iter()
894 .map(|t| match &t.kind {
895 Let => "Let",
896 Rule => "Rule",
897 Ident(s) if s == "f" => "Ident(f)",
898 LParen => "LParen",
899 Ident(s) if s == "x" => "Ident(x)",
900 RParen => "RParen",
901 Eq => "Eq",
902 String(s) if s == "hi" => "String(hi)",
903 Plus => "Plus",
904 Ident(s) if s == "x" => "Ident(x)",
905 other => panic!("unexpected token in golden: {:?}", other),
906 })
907 .collect();
908 assert_eq!(
909 kinds,
910 vec![
911 "Let",
912 "Rule",
913 "Ident(f)",
914 "LParen",
915 "Ident(x)",
916 "RParen",
917 "Eq",
918 "String(hi)",
919 "Plus",
920 "Ident(x)"
921 ]
922 );
923 }
924
925 #[cfg(feature = "serde")]
926 #[test]
927 fn serde_round_trip_token() {
928 let toks = tokenize("let x = 1").unwrap();
929 let json = serde_json::to_string(&toks).unwrap();
930 let back: Vec<Token> = serde_json::from_str(&json).unwrap();
931 assert_eq!(toks, back);
932 }
933
934 #[test]
935 fn borrowed_basic_no_escapes() {
936 let toks = tokenize_borrowed("let x = \"hi\" 123").unwrap();
937 use BorrowedTokenKind as K;
938 assert!(matches!(toks[0].kind, K::Let));
939 assert!(matches!(toks[1].kind, K::Ident("x")));
940 assert!(matches!(toks[3].kind, K::String("hi")));
941 assert!(matches!(toks[4].kind, K::Number("123")));
942 }
943
944 #[test]
945 fn borrowed_string_keeps_escapes() {
946 let toks = tokenize_borrowed("\"a\\n\"").unwrap();
947 use BorrowedTokenKind as K;
948 assert!(matches!(toks[0].kind, K::String("a\\n")));
949 }
950
951 #[test]
953 fn borrowed_operators_and_singles() {
954 use BorrowedTokenKind as K;
955 let src = "()->{}[],:;=+ - * / ->";
957 let toks = tokenize_borrowed(src).unwrap();
958 let kinds: Vec<&'static str> = toks
959 .iter()
960 .map(|t| match t.kind {
961 K::LParen => "LParen",
962 K::RParen => "RParen",
963 K::Arrow => "Arrow",
964 K::LBrace => "LBrace",
965 K::RBrace => "RBrace",
966 K::LBracket => "LBracket",
967 K::RBracket => "RBracket",
968 K::Comma => "Comma",
969 K::Colon => "Colon",
970 K::Semicolon => "Semicolon",
971 K::Eq => "Eq",
972 K::Plus => "Plus",
973 K::Minus => "Minus",
974 K::Star => "Star",
975 K::Slash => "Slash",
976 _ => "Other",
977 })
978 .collect();
979 assert_eq!(
980 kinds,
981 vec![
982 "LParen",
983 "RParen",
984 "Arrow",
985 "LBrace",
986 "RBrace",
987 "LBracket",
988 "RBracket",
989 "Comma",
990 "Colon",
991 "Semicolon",
992 "Eq",
993 "Plus",
994 "Minus",
995 "Star",
996 "Slash",
997 "Arrow"
998 ]
999 );
1000 }
1001
1002 #[test]
1003 fn borrowed_keywords_and_idents() {
1004 use BorrowedTokenKind as K;
1005 let toks =
1006 tokenize_borrowed("true false if then else let rule and or foo _bar a1").unwrap();
1007 assert!(matches!(toks[0].kind, K::True));
1009 assert!(matches!(toks[1].kind, K::False));
1010 assert!(matches!(toks[2].kind, K::If));
1011 assert!(matches!(toks[3].kind, K::Then));
1012 assert!(matches!(toks[4].kind, K::Else));
1013 assert!(matches!(toks[5].kind, K::Let));
1014 assert!(matches!(toks[6].kind, K::Rule));
1015 assert!(matches!(toks[7].kind, K::And));
1016 assert!(matches!(toks[8].kind, K::Or));
1017 assert!(matches!(toks[9].kind, K::Ident("foo")));
1018 assert!(matches!(toks[10].kind, K::Ident("_bar")));
1019 assert!(matches!(toks[11].kind, K::Ident("a1")));
1020 }
1021
1022 #[test]
1023 fn borrowed_comments_skipped() {
1024 use BorrowedTokenKind as K;
1025 let toks = tokenize_borrowed("foo // comment\nbar").unwrap();
1026 assert!(matches!(toks[0].kind, K::Ident("foo")));
1027 assert!(matches!(toks[1].kind, K::Ident("bar")));
1028 assert_eq!(toks.len(), 2);
1029 }
1030
1031 #[test]
1032 fn borrowed_numbers_errors_and_valid() {
1033 use BorrowedTokenKind as K;
1034 let toks = tokenize_borrowed("1 1.0 1.2e-3").unwrap();
1036 assert!(matches!(toks[0].kind, K::Number("1")));
1037 assert!(matches!(toks[1].kind, K::Number("1.0")));
1038 assert!(matches!(toks[2].kind, K::Number("1.2e-3")));
1039 let err = tokenize_borrowed("123.45.6").expect_err("second dot invalid");
1041 assert!(matches!(err.kind, LexErrorKind::InvalidNumber));
1042 let err = tokenize_borrowed("1e+").expect_err("missing exponent digits");
1044 assert!(matches!(err.kind, LexErrorKind::InvalidNumber));
1045 }
1046
1047 #[test]
1048 fn borrowed_string_errors() {
1049 let err = tokenize_borrowed("\"\\x\"").unwrap_err();
1051 assert!(matches!(err.kind, LexErrorKind::InvalidEscape));
1052 let err = tokenize_borrowed("\"abc").unwrap_err();
1054 assert!(matches!(err.kind, LexErrorKind::UnterminatedString));
1055 let err = tokenize_borrowed("\"abc\\").unwrap_err();
1057 assert!(matches!(err.kind, LexErrorKind::UnterminatedEscape));
1058 }
1059
1060 #[test]
1061 fn borrowed_unexpected_char_error() {
1062 let err = tokenize_borrowed("a @ b").expect_err("unexpected '@'");
1063 assert!(matches!(err.kind, LexErrorKind::UnexpectedChar));
1064 assert!(err.span.start < err.span.end);
1065 }
1066}