1use sema_core::{SemaError, Span};
2
3#[derive(Debug, Clone, PartialEq)]
4pub enum FStringPart {
5 Literal(String),
6 Expr(String),
7}
8
9#[derive(Debug, Clone, PartialEq)]
10pub enum Token {
11 LParen,
12 RParen,
13 LBracket,
14 RBracket,
15 LBrace,
16 RBrace,
17 Quote,
18 Quasiquote,
19 Unquote,
20 UnquoteSplice,
21 Int(i64),
22 Float(f64),
23 String(String),
24 FString(Vec<FStringPart>),
25 ShortLambdaStart,
26 Symbol(String),
27 Keyword(String),
28 Bool(bool),
29 Char(char),
30 BytevectorStart,
31 Dot,
32 Comment(String),
33 Newline,
34 Regex(String),
35}
36
37#[derive(Debug, Clone)]
38pub struct SpannedToken {
39 pub token: Token,
40 pub span: Span,
41 pub byte_start: usize,
43 pub byte_end: usize,
45}
46
47pub fn tokenize(input: &str) -> Result<Vec<SpannedToken>, SemaError> {
48 let mut tokens = Vec::new();
49 let chars: Vec<char> = input.chars().collect();
50 let byte_offsets: Vec<usize> = {
52 let mut offsets = Vec::with_capacity(chars.len() + 1);
53 let mut pos = 0;
54 for c in &chars {
55 offsets.push(pos);
56 pos += c.len_utf8();
57 }
58 offsets.push(pos);
59 offsets
60 };
61 let mut i = 0;
62 let mut line = 1;
63 let mut col = 1;
64
65 while i < chars.len() {
66 let ch = chars[i];
67 let span = Span::point(line, col);
68
69 match ch {
70 ' ' | '\t' | '\r' => {
72 col += 1;
73 i += 1;
74 }
75 '\n' => {
76 tokens.push(SpannedToken {
77 token: Token::Newline,
78 span: span.with_end(line, col + 1),
79 byte_start: byte_offsets[i],
80 byte_end: byte_offsets[i + 1],
81 });
82 line += 1;
83 col = 1;
84 i += 1;
85 }
86
87 ';' => {
89 let start = i;
90 while i < chars.len() && chars[i] != '\n' {
91 i += 1;
92 }
93 let text: String = chars[start..i].iter().collect();
94 let end_col = col + (i - start);
95 tokens.push(SpannedToken {
96 token: Token::Comment(text),
97 span: span.with_end(line, end_col),
98 byte_start: byte_offsets[start],
99 byte_end: byte_offsets[i],
100 });
101 col = end_col;
102 }
103
104 '(' => {
106 col += 1;
107 i += 1;
108 tokens.push(SpannedToken {
109 token: Token::LParen,
110 span: span.with_end(line, col),
111 byte_start: byte_offsets[i - 1],
112 byte_end: byte_offsets[i],
113 });
114 }
115 ')' => {
116 col += 1;
117 i += 1;
118 tokens.push(SpannedToken {
119 token: Token::RParen,
120 span: span.with_end(line, col),
121 byte_start: byte_offsets[i - 1],
122 byte_end: byte_offsets[i],
123 });
124 }
125 '[' => {
126 col += 1;
127 i += 1;
128 tokens.push(SpannedToken {
129 token: Token::LBracket,
130 span: span.with_end(line, col),
131 byte_start: byte_offsets[i - 1],
132 byte_end: byte_offsets[i],
133 });
134 }
135 ']' => {
136 col += 1;
137 i += 1;
138 tokens.push(SpannedToken {
139 token: Token::RBracket,
140 span: span.with_end(line, col),
141 byte_start: byte_offsets[i - 1],
142 byte_end: byte_offsets[i],
143 });
144 }
145 '{' => {
146 col += 1;
147 i += 1;
148 tokens.push(SpannedToken {
149 token: Token::LBrace,
150 span: span.with_end(line, col),
151 byte_start: byte_offsets[i - 1],
152 byte_end: byte_offsets[i],
153 });
154 }
155 '}' => {
156 col += 1;
157 i += 1;
158 tokens.push(SpannedToken {
159 token: Token::RBrace,
160 span: span.with_end(line, col),
161 byte_start: byte_offsets[i - 1],
162 byte_end: byte_offsets[i],
163 });
164 }
165
166 '\'' => {
168 col += 1;
169 i += 1;
170 tokens.push(SpannedToken {
171 token: Token::Quote,
172 span: span.with_end(line, col),
173 byte_start: byte_offsets[i - 1],
174 byte_end: byte_offsets[i],
175 });
176 }
177 '`' => {
178 col += 1;
179 i += 1;
180 tokens.push(SpannedToken {
181 token: Token::Quasiquote,
182 span: span.with_end(line, col),
183 byte_start: byte_offsets[i - 1],
184 byte_end: byte_offsets[i],
185 });
186 }
187 ',' => {
188 if i + 1 < chars.len() && chars[i + 1] == '@' {
189 col += 2;
190 i += 2;
191 tokens.push(SpannedToken {
192 token: Token::UnquoteSplice,
193 span: span.with_end(line, col),
194 byte_start: byte_offsets[i - 2],
195 byte_end: byte_offsets[i],
196 });
197 } else {
198 col += 1;
199 i += 1;
200 tokens.push(SpannedToken {
201 token: Token::Unquote,
202 span: span.with_end(line, col),
203 byte_start: byte_offsets[i - 1],
204 byte_end: byte_offsets[i],
205 });
206 }
207 }
208
209 '"' => {
211 let token_start = i;
212 let mut s = String::new();
213 i += 1;
214 col += 1;
215 while i < chars.len() && chars[i] != '"' {
216 if chars[i] == '\\' && i + 1 < chars.len() {
217 i += 1;
218 col += 1;
219 read_string_escape(&chars, &mut i, &mut col, &mut s, span)?;
220 } else {
221 if chars[i] == '\n' {
222 line += 1;
223 col = 0;
224 }
225 s.push(chars[i]);
226 }
227 i += 1;
228 col += 1;
229 }
230 if i >= chars.len() {
231 return Err(SemaError::Reader {
232 message: "unterminated string".to_string(),
233 span,
234 });
235 }
236 i += 1; col += 1;
238 tokens.push(SpannedToken {
239 token: Token::String(s),
240 span: span.with_end(line, col),
241 byte_start: byte_offsets[token_start],
242 byte_end: byte_offsets[i],
243 });
244 }
245
246 '#' => {
248 let token_start = i;
249 if i + 1 < chars.len() {
250 match chars[i + 1] {
251 't' => {
252 i += 2;
253 col += 2;
254 tokens.push(SpannedToken {
255 token: Token::Bool(true),
256 span: span.with_end(line, col),
257 byte_start: byte_offsets[token_start],
258 byte_end: byte_offsets[i],
259 });
260 }
261 'f' => {
262 i += 2;
263 col += 2;
264 tokens.push(SpannedToken {
265 token: Token::Bool(false),
266 span: span.with_end(line, col),
267 byte_start: byte_offsets[token_start],
268 byte_end: byte_offsets[i],
269 });
270 }
271 '\\' => {
272 i += 2; col += 2;
275 if i >= chars.len() {
276 return Err(SemaError::Reader {
277 message: "unexpected end of input after #\\".to_string(),
278 span,
279 });
280 }
281 let start = i;
282 if chars[i].is_alphabetic() {
283 while i < chars.len() && is_symbol_char(chars[i]) {
284 i += 1;
285 col += 1;
286 }
287 } else {
288 i += 1;
289 col += 1;
290 }
291 let name: String = chars[start..i].iter().collect();
292 let c = match name.as_str() {
293 "space" => ' ',
294 "newline" => '\n',
295 "tab" => '\t',
296 "return" => '\r',
297 "nul" => '\0',
298 s if s.chars().count() == 1 => s.chars().next().unwrap(),
299 _ => {
300 return Err(SemaError::Reader {
301 message: format!("unknown character name: {name}"),
302 span,
303 });
304 }
305 };
306 tokens.push(SpannedToken {
307 token: Token::Char(c),
308 span: span.with_end(line, col),
309 byte_start: byte_offsets[token_start],
310 byte_end: byte_offsets[i],
311 });
312 }
313 'u' if i + 3 < chars.len()
314 && chars[i + 2] == '8'
315 && chars[i + 3] == '(' =>
316 {
317 i += 4;
318 col += 4;
319 tokens.push(SpannedToken {
320 token: Token::BytevectorStart,
321 span: span.with_end(line, col),
322 byte_start: byte_offsets[token_start],
323 byte_end: byte_offsets[i],
324 });
325 }
326 '(' => {
327 i += 2; col += 2;
330 tokens.push(SpannedToken {
331 token: Token::ShortLambdaStart,
332 span: span.with_end(line, col),
333 byte_start: byte_offsets[token_start],
334 byte_end: byte_offsets[i],
335 });
336 }
337 '"' => {
338 i += 2; col += 2;
341 let mut s = String::new();
342 while i < chars.len() && chars[i] != '"' {
343 if chars[i] == '\\' && i + 1 < chars.len() && chars[i + 1] == '"' {
344 s.push('"');
345 i += 2;
346 col += 2;
347 } else {
348 if chars[i] == '\n' {
349 line += 1;
350 col = 0;
351 }
352 s.push(chars[i]);
353 i += 1;
354 col += 1;
355 }
356 }
357 if i >= chars.len() {
358 return Err(SemaError::Reader {
359 message: "unterminated regex literal".to_string(),
360 span,
361 }
362 .with_hint(
363 "add a closing `\"` to end the #\"...\" regex literal",
364 ));
365 }
366 i += 1; col += 1;
368 tokens.push(SpannedToken {
369 token: Token::Regex(s),
370 span: span.with_end(line, col),
371 byte_start: byte_offsets[token_start],
372 byte_end: byte_offsets[i],
373 });
374 }
375 '!' if line == 1 && col == 1 => {
376 while i < chars.len() && chars[i] != '\n' {
378 i += 1;
379 }
380 }
381 _ => {
382 return Err(SemaError::Reader {
383 message: format!(
384 "unexpected character after #: '{}'",
385 chars[i + 1]
386 ),
387 span,
388 });
389 }
390 }
391 } else {
392 return Err(SemaError::Reader {
393 message: "unexpected end of input after `#`".to_string(),
394 span,
395 }
396 .with_hint("# starts a special form: #t, #f, #\\char, #u8(...)"));
397 }
398 }
399
400 ':' => {
402 let token_start = i;
403 i += 1;
404 col += 1;
405 let start = i;
406 while i < chars.len() && is_symbol_char(chars[i]) {
407 i += 1;
408 col += 1;
409 }
410 if i == start {
411 return Err(SemaError::Reader {
412 message: "expected keyword name after ':'".to_string(),
413 span,
414 });
415 }
416 let name: String = chars[start..i].iter().collect();
417 tokens.push(SpannedToken {
418 token: Token::Keyword(name),
419 span: span.with_end(line, col),
420 byte_start: byte_offsets[token_start],
421 byte_end: byte_offsets[i],
422 });
423 }
424
425 _ => {
427 if ch == 'f' && i + 1 < chars.len() && chars[i + 1] == '"' {
428 let token_start = i;
430 i += 1; col += 1;
432 i += 1; col += 1;
434 let mut parts: Vec<FStringPart> = Vec::new();
435 let mut current = String::new();
436
437 while i < chars.len() && chars[i] != '"' {
438 if chars[i] == '\\' && i + 1 < chars.len() {
439 i += 1;
440 col += 1;
441 read_string_escape(&chars, &mut i, &mut col, &mut current, span)?;
442 } else if chars[i] == '$' && i + 1 < chars.len() && chars[i + 1] == '{' {
443 if !current.is_empty() {
445 parts.push(FStringPart::Literal(std::mem::take(&mut current)));
446 }
447 i += 2; col += 2;
449 let mut expr = String::new();
450 let mut depth = 1;
451 while i < chars.len() && depth > 0 {
452 if chars[i] == '{' {
453 depth += 1;
454 } else if chars[i] == '}' {
455 depth -= 1;
456 if depth == 0 {
457 break;
458 }
459 }
460 if chars[i] == '\n' {
461 line += 1;
462 col = 0;
463 }
464 expr.push(chars[i]);
465 i += 1;
466 col += 1;
467 }
468 if depth != 0 {
469 return Err(SemaError::Reader {
470 message: "unterminated interpolation in f-string".to_string(),
471 span,
472 }
473 .with_hint("add a closing `}` to end the ${...} interpolation"));
474 }
475 let trimmed = expr.trim().to_string();
476 if trimmed.is_empty() {
477 return Err(SemaError::Reader {
478 message: "empty interpolation in f-string".to_string(),
479 span,
480 }
481 .with_hint("${} must contain an expression, e.g. ${name}"));
482 }
483 parts.push(FStringPart::Expr(trimmed));
484 } else {
486 if chars[i] == '\n' {
487 line += 1;
488 col = 0;
489 }
490 current.push(chars[i]);
491 }
492 i += 1;
493 col += 1;
494 }
495
496 if i >= chars.len() {
497 return Err(SemaError::Reader {
498 message: "unterminated f-string".to_string(),
499 span,
500 }
501 .with_hint("add a closing `\"` to end the f-string"));
502 }
503 i += 1; col += 1;
505
506 if !current.is_empty() {
507 parts.push(FStringPart::Literal(current));
508 }
509
510 tokens.push(SpannedToken {
511 token: Token::FString(parts),
512 span: span.with_end(line, col),
513 byte_start: byte_offsets[token_start],
514 byte_end: byte_offsets[i],
515 });
516 } else if ch == '-' && i + 1 < chars.len() && chars[i + 1].is_ascii_digit() {
517 let token_start = i;
519 let (tok, len) = read_number(&chars[i..], &span)?;
520 i += len;
521 col += len;
522 tokens.push(SpannedToken {
523 token: tok,
524 span: span.with_end(line, col),
525 byte_start: byte_offsets[token_start],
526 byte_end: byte_offsets[i],
527 });
528 } else if ch.is_ascii_digit() {
529 let token_start = i;
530 let (tok, len) = read_number(&chars[i..], &span)?;
531 i += len;
532 col += len;
533 tokens.push(SpannedToken {
534 token: tok,
535 span: span.with_end(line, col),
536 byte_start: byte_offsets[token_start],
537 byte_end: byte_offsets[i],
538 });
539 } else if is_symbol_start(ch) {
540 let start = i;
541 while i < chars.len() && is_symbol_char(chars[i]) {
542 i += 1;
543 col += 1;
544 }
545 let name: String = chars[start..i].iter().collect();
546 let token_span = span.with_end(line, col);
547 let token_byte_start = byte_offsets[start];
549 let token_byte_end = byte_offsets[i];
550 match name.as_str() {
551 "true" => tokens.push(SpannedToken {
552 token: Token::Bool(true),
553 span: token_span,
554 byte_start: token_byte_start,
555 byte_end: token_byte_end,
556 }),
557 "false" => tokens.push(SpannedToken {
558 token: Token::Bool(false),
559 span: token_span,
560 byte_start: token_byte_start,
561 byte_end: token_byte_end,
562 }),
563 "nil" => tokens.push(SpannedToken {
564 token: Token::Symbol("nil".to_string()),
565 span: token_span,
566 byte_start: token_byte_start,
567 byte_end: token_byte_end,
568 }),
569 "." => tokens.push(SpannedToken {
570 token: Token::Dot,
571 span: token_span,
572 byte_start: token_byte_start,
573 byte_end: token_byte_end,
574 }),
575 "inf" | "+inf" | "Inf" | "Infinity" | "+Infinity" => {
579 tokens.push(SpannedToken {
580 token: Token::Float(f64::INFINITY),
581 span: token_span,
582 byte_start: token_byte_start,
583 byte_end: token_byte_end,
584 })
585 }
586 "-inf" | "-Infinity" => tokens.push(SpannedToken {
587 token: Token::Float(f64::NEG_INFINITY),
588 span: token_span,
589 byte_start: token_byte_start,
590 byte_end: token_byte_end,
591 }),
592 "nan" | "NaN" | "NAN" | "+nan" | "-nan" => tokens.push(SpannedToken {
593 token: Token::Float(f64::NAN),
594 span: token_span,
595 byte_start: token_byte_start,
596 byte_end: token_byte_end,
597 }),
598 _ => tokens.push(SpannedToken {
599 token: Token::Symbol(name),
600 span: token_span,
601 byte_start: token_byte_start,
602 byte_end: token_byte_end,
603 }),
604 }
605 } else {
606 return Err(SemaError::Reader {
607 message: format!("unexpected character: '{ch}'"),
608 span,
609 });
610 }
611 }
612 }
613 }
614
615 Ok(tokens)
616}
617
618fn read_string_escape(
622 chars: &[char],
623 i: &mut usize,
624 col: &mut usize,
625 buf: &mut String,
626 span: Span,
627) -> Result<(), SemaError> {
628 match chars[*i] {
629 'n' => buf.push('\n'),
630 't' => buf.push('\t'),
631 'r' => buf.push('\r'),
632 '\\' => buf.push('\\'),
633 '"' => buf.push('"'),
634 '0' => buf.push('\0'),
635 '$' => buf.push('$'),
636 'x' => {
637 let mut hex = String::new();
639 while *i + 1 < chars.len() && chars[*i + 1] != ';' && chars[*i + 1].is_ascii_hexdigit()
640 {
641 *i += 1;
642 *col += 1;
643 hex.push(chars[*i]);
644 }
645 if hex.is_empty() {
646 return Err(SemaError::Reader {
647 message: "empty hex escape \\x;".to_string(),
648 span,
649 });
650 }
651 if *i + 1 >= chars.len() || chars[*i + 1] != ';' {
652 return Err(SemaError::Reader {
653 message: "hex escape \\x missing terminating semicolon".to_string(),
654 span,
655 });
656 }
657 *i += 1;
658 *col += 1;
659 let code = u32::from_str_radix(&hex, 16).map_err(|_| SemaError::Reader {
660 message: format!("invalid hex escape \\x{};", hex),
661 span,
662 })?;
663 let ch = char::from_u32(code).ok_or_else(|| SemaError::Reader {
664 message: format!("invalid unicode scalar value \\x{};", hex),
665 span,
666 })?;
667 buf.push(ch);
668 }
669 'u' => {
670 let mut hex = String::new();
672 for _ in 0..4 {
673 if *i + 1 >= chars.len() || !chars[*i + 1].is_ascii_hexdigit() {
674 return Err(SemaError::Reader {
675 message: "\\u escape requires exactly 4 hex digits".to_string(),
676 span,
677 });
678 }
679 *i += 1;
680 *col += 1;
681 hex.push(chars[*i]);
682 }
683 let code = u32::from_str_radix(&hex, 16).map_err(|_| SemaError::Reader {
684 message: format!("invalid hex escape \\u{}", hex),
685 span,
686 })?;
687 let ch = char::from_u32(code).ok_or_else(|| SemaError::Reader {
688 message: format!("invalid unicode scalar value \\u{}", hex),
689 span,
690 })?;
691 buf.push(ch);
692 }
693 'U' => {
694 let mut hex = String::new();
696 for _ in 0..8 {
697 if *i + 1 >= chars.len() || !chars[*i + 1].is_ascii_hexdigit() {
698 return Err(SemaError::Reader {
699 message: "\\U escape requires exactly 8 hex digits".to_string(),
700 span,
701 });
702 }
703 *i += 1;
704 *col += 1;
705 hex.push(chars[*i]);
706 }
707 let code = u32::from_str_radix(&hex, 16).map_err(|_| SemaError::Reader {
708 message: format!("invalid hex escape \\U{}", hex),
709 span,
710 })?;
711 let ch = char::from_u32(code).ok_or_else(|| SemaError::Reader {
712 message: format!("invalid unicode scalar value \\U{}", hex),
713 span,
714 })?;
715 buf.push(ch);
716 }
717 other => {
718 buf.push('\\');
719 buf.push(other);
720 }
721 }
722 Ok(())
723}
724
725fn read_number(chars: &[char], span: &Span) -> Result<(Token, usize), SemaError> {
726 let mut i = 0;
727 if chars[i] == '-' {
728 i += 1;
729 }
730 while i < chars.len() && chars[i].is_ascii_digit() {
731 i += 1;
732 }
733 if i < chars.len() && chars[i] == '.' && i + 1 < chars.len() && chars[i + 1].is_ascii_digit() {
734 i += 1; while i < chars.len() && chars[i].is_ascii_digit() {
736 i += 1;
737 }
738 let s: String = chars[..i].iter().collect();
739 let f: f64 = s.parse().map_err(|_| SemaError::Reader {
740 message: format!("invalid float: {s}"),
741 span: *span,
742 })?;
743 Ok((Token::Float(f), i))
744 } else {
745 let s: String = chars[..i].iter().collect();
746 let n: i64 = s.parse().map_err(|_| SemaError::Reader {
747 message: format!("invalid integer: {s}"),
748 span: *span,
749 })?;
750 Ok((Token::Int(n), i))
751 }
752}
753
754fn is_symbol_start(ch: char) -> bool {
755 ch.is_alphabetic()
756 || matches!(
757 ch,
758 '+' | '-' | '*' | '/' | '!' | '?' | '<' | '>' | '=' | '_' | '&' | '%' | '^' | '~' | '.'
759 )
760}
761
762fn is_symbol_char(ch: char) -> bool {
763 is_symbol_start(ch) || ch.is_ascii_digit() || matches!(ch, '-' | '/' | '.' | '#')
764}
765
766#[cfg(test)]
767mod tests {
768 use super::*;
769
770 #[test]
771 fn test_comment_token_emitted() {
772 let tokens = tokenize("(+ 1 2) ; comment").unwrap();
773 let comment_tokens: Vec<_> = tokens
774 .iter()
775 .filter(|t| matches!(&t.token, Token::Comment(_)))
776 .collect();
777 assert_eq!(comment_tokens.len(), 1);
778 match &comment_tokens[0].token {
779 Token::Comment(text) => assert_eq!(text, "; comment"),
780 _ => panic!("expected Comment token"),
781 }
782 }
783
784 #[test]
785 fn test_newline_token_emitted() {
786 let tokens = tokenize("a\nb").unwrap();
787 let token_types: Vec<_> = tokens.iter().map(|t| &t.token).collect();
788 assert!(
789 matches!(token_types[0], Token::Symbol(s) if s == "a"),
790 "first token should be symbol 'a'"
791 );
792 assert!(
793 matches!(token_types[1], Token::Newline),
794 "second token should be Newline"
795 );
796 assert!(
797 matches!(token_types[2], Token::Symbol(s) if s == "b"),
798 "third token should be symbol 'b'"
799 );
800 }
801
802 #[test]
803 fn test_regex_token_emitted() {
804 let tokens = tokenize(r#"#"\d+""#).unwrap();
805 assert_eq!(tokens.len(), 1);
806 match &tokens[0].token {
807 Token::Regex(s) => assert_eq!(s, r"\d+"),
808 other => panic!("expected Regex token, got {:?}", other),
809 }
810 }
811
812 #[test]
813 fn test_regex_not_string() {
814 let tokens = tokenize(r#"#"[a-z]+""#).unwrap();
816 assert_eq!(tokens.len(), 1);
817 assert!(
818 !matches!(&tokens[0].token, Token::String(_)),
819 "regex should not produce Token::String"
820 );
821 assert!(
822 matches!(&tokens[0].token, Token::Regex(_)),
823 "regex should produce Token::Regex"
824 );
825 }
826
827 #[test]
828 fn test_multiple_comments_and_newlines_preserved() {
829 let tokens = tokenize("; first\n; second\n42").unwrap();
830 let token_types: Vec<&Token> = tokens.iter().map(|t| &t.token).collect();
831 assert!(matches!(token_types[0], Token::Comment(s) if s == "; first"));
832 assert!(matches!(token_types[1], Token::Newline));
833 assert!(matches!(token_types[2], Token::Comment(s) if s == "; second"));
834 assert!(matches!(token_types[3], Token::Newline));
835 assert!(matches!(token_types[4], Token::Int(42)));
836 }
837
838 #[test]
839 fn test_comment_does_not_include_trailing_newline() {
840 let tokens = tokenize("; hello world\n").unwrap();
841 match &tokens[0].token {
842 Token::Comment(text) => {
843 assert!(
844 !text.ends_with('\n'),
845 "comment should not include trailing newline"
846 );
847 assert_eq!(text, "; hello world");
848 }
849 _ => panic!("expected Comment token"),
850 }
851 assert!(matches!(&tokens[1].token, Token::Newline));
853 }
854
855 #[test]
856 fn test_inline_comment_after_code() {
857 let tokens = tokenize("(define x 42) ; set x").unwrap();
858 let has_comment = tokens
859 .iter()
860 .any(|t| matches!(&t.token, Token::Comment(s) if s == "; set x"));
861 assert!(has_comment, "should have inline comment token");
862 }
863
864 #[test]
865 fn test_trivia_order_preserved() {
866 let tokens = tokenize("a\n\n; comment\nb").unwrap();
867 let types: Vec<String> = tokens
868 .iter()
869 .map(|t| match &t.token {
870 Token::Symbol(s) => format!("sym:{}", s),
871 Token::Newline => "newline".to_string(),
872 Token::Comment(s) => format!("comment:{}", s),
873 other => format!("{:?}", other),
874 })
875 .collect();
876 assert_eq!(
877 types,
878 vec![
879 "sym:a",
880 "newline",
881 "newline",
882 "comment:; comment",
883 "newline",
884 "sym:b"
885 ]
886 );
887 }
888}