1#![allow(clippy::result_large_err)]
4
5use std::sync::Arc;
6
7use miette::NamedSource;
8
9use cljrs_types::error::{CljxError, CljxResult};
10use cljrs_types::span::Span;
11
12use crate::token::Token;
13
14fn is_symbol_char(ch: char) -> bool {
20 !matches!(
21 ch,
22 ' ' | '\t'
23 | '\n'
24 | '\r'
25 | ','
26 | '('
27 | ')'
28 | '['
29 | ']'
30 | '{'
31 | '}'
32 | '"'
33 | ';'
34 | '`'
35 | '~'
36 | '^'
37 | '@'
38 | '#'
39 | '\\'
40 | ':'
41 )
42}
43
44fn is_symbol_start(ch: char) -> bool {
47 is_symbol_char(ch) && !ch.is_ascii_digit()
48}
49
50pub struct Lexer {
53 source: Arc<String>,
54 file: Arc<String>,
55 pos: usize, line: u32, col: u32, }
59
60impl Lexer {
61 pub fn new(source: String, file: String) -> Self {
62 Self {
63 source: Arc::new(source),
64 file: Arc::new(file),
65 pos: 0,
66 line: 1,
67 col: 1,
68 }
69 }
70
71 pub fn source(&self) -> &Arc<String> {
74 &self.source
75 }
76
77 pub fn file(&self) -> &Arc<String> {
78 &self.file
79 }
80
81 fn peek(&self) -> Option<char> {
84 self.source[self.pos..].chars().next()
85 }
86
87 fn peek_next(&self) -> Option<char> {
88 let mut chars = self.source[self.pos..].chars();
89 chars.next(); chars.next()
91 }
92
93 fn advance(&mut self) -> Option<char> {
94 let ch = self.peek()?;
95 self.pos += ch.len_utf8();
96 if ch == '\n' {
97 self.line += 1;
98 self.col = 1;
99 } else {
100 self.col += ch.len_utf8() as u32;
101 }
102 Some(ch)
103 }
104
105 fn span_from(&self, start_pos: usize, start_line: u32, start_col: u32) -> Span {
106 Span::new(
107 Arc::clone(&self.file),
108 start_pos,
109 self.pos,
110 start_line,
111 start_col,
112 )
113 }
114
115 fn make_error(&self, msg: impl Into<String>, span: Span) -> CljxError {
116 CljxError::ReadError {
117 message: msg.into(),
118 span: Some(miette::SourceSpan::from(span)),
119 src: NamedSource::new((*self.file).clone(), (*self.source).clone()),
120 }
121 }
122
123 fn read_symbol_chars(&mut self) -> String {
126 let mut buf = String::new();
127 while let Some(ch) = self.peek() {
128 if is_symbol_char(ch) {
129 buf.push(ch);
130 self.advance();
131 } else {
132 break;
133 }
134 }
135 buf
136 }
137
138 fn skip_whitespace_and_comments(&mut self) {
141 loop {
142 match self.peek() {
143 Some('#') if self.pos == 0 => {
145 if self.peek_next() == Some('!') {
146 while let Some(ch) = self.advance() {
148 if ch == '\n' {
149 break;
150 }
151 }
152 } else {
153 break; }
155 }
156 Some(' ') | Some('\t') | Some('\r') | Some('\n') | Some(',') => {
157 self.advance();
158 }
159 Some(';') => {
160 while let Some(ch) = self.advance() {
161 if ch == '\n' {
162 break;
163 }
164 }
165 }
166 _ => break,
167 }
168 }
169 }
170
171 fn lex_unquote(
174 &mut self,
175 start_pos: usize,
176 start_line: u32,
177 start_col: u32,
178 ) -> CljxResult<(Token, Span)> {
179 self.advance(); if self.peek() == Some('@') {
181 self.advance();
182 Ok((
183 Token::UnquoteSplice,
184 self.span_from(start_pos, start_line, start_col),
185 ))
186 } else {
187 Ok((
188 Token::Unquote,
189 self.span_from(start_pos, start_line, start_col),
190 ))
191 }
192 }
193
194 fn lex_hash(
197 &mut self,
198 start_pos: usize,
199 start_line: u32,
200 start_col: u32,
201 ) -> CljxResult<(Token, Span)> {
202 self.advance(); match self.peek() {
204 Some('(') => {
205 self.advance();
206 Ok((
207 Token::HashFn,
208 self.span_from(start_pos, start_line, start_col),
209 ))
210 }
211 Some('{') => {
212 self.advance();
213 Ok((
214 Token::HashSet,
215 self.span_from(start_pos, start_line, start_col),
216 ))
217 }
218 Some('\'') => {
219 self.advance();
220 Ok((
221 Token::HashVar,
222 self.span_from(start_pos, start_line, start_col),
223 ))
224 }
225 Some('_') => {
226 self.advance();
227 Ok((
228 Token::HashDiscard,
229 self.span_from(start_pos, start_line, start_col),
230 ))
231 }
232 Some('"') => self.lex_regex(start_pos, start_line, start_col),
233 Some('?') => {
234 self.advance(); if self.peek() == Some('@') {
236 self.advance();
237 Ok((
238 Token::ReaderCondSplice,
239 self.span_from(start_pos, start_line, start_col),
240 ))
241 } else {
242 Ok((
243 Token::ReaderCond,
244 self.span_from(start_pos, start_line, start_col),
245 ))
246 }
247 }
248 Some('#') => self.lex_symbolic(start_pos, start_line, start_col),
249 Some(c) if is_symbol_start(c) => {
250 let name = self.read_symbol_chars();
251 Ok((
252 Token::TaggedLiteral(name),
253 self.span_from(start_pos, start_line, start_col),
254 ))
255 }
256 other => {
257 let span = self.span_from(start_pos, start_line, start_col);
258 Err(self.make_error(format!("unknown # dispatch character: {:?}", other), span))
259 }
260 }
261 }
262
263 fn lex_regex(
264 &mut self,
265 start_pos: usize,
266 start_line: u32,
267 start_col: u32,
268 ) -> CljxResult<(Token, Span)> {
269 self.advance(); let mut buf = String::new();
271 loop {
272 match self.advance() {
273 None => {
274 let span = self.span_from(start_pos, start_line, start_col);
275 return Err(self.make_error("unterminated regex literal", span));
276 }
277 Some('"') => break,
278 Some('\\') => {
279 buf.push('\\');
281 match self.advance() {
282 Some(c) => buf.push(c),
283 None => {
284 let span = self.span_from(start_pos, start_line, start_col);
285 return Err(self.make_error("unterminated regex literal", span));
286 }
287 }
288 }
289 Some(c) => buf.push(c),
290 }
291 }
292 Ok((
293 Token::Regex(buf),
294 self.span_from(start_pos, start_line, start_col),
295 ))
296 }
297
298 fn lex_symbolic(
299 &mut self,
300 start_pos: usize,
301 start_line: u32,
302 start_col: u32,
303 ) -> CljxResult<(Token, Span)> {
304 self.advance(); let name = self.read_symbol_chars();
306 match name.as_str() {
307 "Inf" | "-Inf" | "NaN" => Ok((
308 Token::Symbolic(name),
309 self.span_from(start_pos, start_line, start_col),
310 )),
311 _ => {
312 let span = self.span_from(start_pos, start_line, start_col);
313 Err(self.make_error(format!("unknown symbolic value: ##{name}"), span))
314 }
315 }
316 }
317
318 fn lex_string(
321 &mut self,
322 start_pos: usize,
323 start_line: u32,
324 start_col: u32,
325 ) -> CljxResult<(Token, Span)> {
326 self.advance(); let mut buf = String::new();
328 loop {
329 match self.advance() {
330 None => {
331 let span = self.span_from(start_pos, start_line, start_col);
332 return Err(self.make_error("unterminated string literal", span));
333 }
334 Some('"') => break,
335 Some('\\') => match self.advance() {
336 Some('n') => buf.push('\n'),
337 Some('t') => buf.push('\t'),
338 Some('r') => buf.push('\r'),
339 Some('b') => buf.push('\x08'),
340 Some('f') => buf.push('\x0C'),
341 Some('\\') => buf.push('\\'),
342 Some('"') => buf.push('"'),
343 Some('u') => {
344 let ch = self.read_unicode_escape(start_pos, start_line, start_col)?;
345 buf.push(ch);
346 }
347 Some(c) => {
348 let span = self.span_from(start_pos, start_line, start_col);
349 return Err(self.make_error(format!("unknown string escape: \\{c}"), span));
350 }
351 None => {
352 let span = self.span_from(start_pos, start_line, start_col);
353 return Err(self.make_error("unterminated string literal", span));
354 }
355 },
356 Some(c) => buf.push(c),
357 }
358 }
359 Ok((
360 Token::Str(buf),
361 self.span_from(start_pos, start_line, start_col),
362 ))
363 }
364
365 fn read_unicode_escape(
367 &mut self,
368 start_pos: usize,
369 start_line: u32,
370 start_col: u32,
371 ) -> CljxResult<char> {
372 let mut hex = String::with_capacity(4);
373 for _ in 0..4 {
374 match self.advance() {
375 Some(c) if c.is_ascii_hexdigit() => hex.push(c),
376 Some(c) => {
377 let span = self.span_from(start_pos, start_line, start_col);
378 return Err(self.make_error(
379 format!("invalid \\u escape: expected hex digit, got {c:?}"),
380 span,
381 ));
382 }
383 None => {
384 let span = self.span_from(start_pos, start_line, start_col);
385 return Err(self.make_error("unterminated \\u escape", span));
386 }
387 }
388 }
389 let code = u32::from_str_radix(&hex, 16).unwrap();
390 char::from_u32(code).ok_or_else(|| {
391 let span = self.span_from(start_pos, start_line, start_col);
392 self.make_error(format!("invalid unicode code point: \\u{hex}"), span)
393 })
394 }
395
396 fn lex_char_literal(
399 &mut self,
400 start_pos: usize,
401 start_line: u32,
402 start_col: u32,
403 ) -> CljxResult<(Token, Span)> {
404 self.advance(); let rest_start = self.pos;
408 let rest: String = self.source[rest_start..]
409 .chars()
410 .take_while(|&c| c.is_alphanumeric() || c == '-')
411 .collect();
412
413 let ch = match rest.as_str() {
414 "newline" => {
415 self.pos += "newline".len();
416 self.col += "newline".len() as u32;
417 '\n'
418 }
419 "space" => {
420 self.pos += "space".len();
421 self.col += "space".len() as u32;
422 ' '
423 }
424 "tab" => {
425 self.pos += "tab".len();
426 self.col += "tab".len() as u32;
427 '\t'
428 }
429 "backspace" => {
430 self.pos += "backspace".len();
431 self.col += "backspace".len() as u32;
432 '\x08'
433 }
434 "formfeed" => {
435 self.pos += "formfeed".len();
436 self.col += "formfeed".len() as u32;
437 '\x0C'
438 }
439 "return" => {
440 self.pos += "return".len();
441 self.col += "return".len() as u32;
442 '\r'
443 }
444 _ if rest.starts_with('u') && rest.len() >= 5 => {
445 let hex_part = &rest[1..5];
447 if hex_part.chars().all(|c| c.is_ascii_hexdigit()) {
448 let code = u32::from_str_radix(hex_part, 16).unwrap();
449 let c = char::from_u32(code).ok_or_else(|| {
450 let span = self.span_from(start_pos, start_line, start_col);
451 self.make_error(
452 format!("invalid unicode code point in char literal: \\u{hex_part}"),
453 span,
454 )
455 })?;
456 self.pos += 5;
458 self.col += 5;
459 c
460 } else {
461 let span = self.span_from(start_pos, start_line, start_col);
462 return Err(self.make_error(format!("unknown character name: {rest}"), span));
463 }
464 }
465 _ if rest.len() == 1 => {
466 let c = self.source[rest_start..].chars().next().unwrap();
468 self.pos += c.len_utf8();
469 self.col += c.len_utf8() as u32;
470 c
471 }
472 _ if rest.is_empty() => {
473 match self.source[rest_start..].chars().next() {
475 Some(c) => {
476 self.pos += c.len_utf8();
477 self.col += c.len_utf8() as u32;
478 c
479 }
480 None => {
481 let span = self.span_from(start_pos, start_line, start_col);
482 return Err(self.make_error("unexpected end of file after \\", span));
483 }
484 }
485 }
486 _ => {
487 let span = self.span_from(start_pos, start_line, start_col);
488 return Err(self.make_error(format!("unknown character name: {rest}"), span));
489 }
490 };
491
492 Ok((
493 Token::Char(ch),
494 self.span_from(start_pos, start_line, start_col),
495 ))
496 }
497
498 fn lex_keyword(
501 &mut self,
502 start_pos: usize,
503 start_line: u32,
504 start_col: u32,
505 ) -> CljxResult<(Token, Span)> {
506 self.advance(); if self.peek() == Some(':') {
508 self.advance(); let name = self.read_symbol_chars();
510 if name.is_empty() {
511 let span = self.span_from(start_pos, start_line, start_col);
512 return Err(self.make_error("empty auto-resolved keyword", span));
513 }
514 Ok((
515 Token::AutoKeyword(name),
516 self.span_from(start_pos, start_line, start_col),
517 ))
518 } else {
519 let name = self.read_symbol_chars();
520 if name.is_empty() {
521 let span = self.span_from(start_pos, start_line, start_col);
522 return Err(self.make_error("empty keyword", span));
523 }
524 Ok((
525 Token::Keyword(name),
526 self.span_from(start_pos, start_line, start_col),
527 ))
528 }
529 }
530
531 fn lex_symbol(
534 &mut self,
535 start_pos: usize,
536 start_line: u32,
537 start_col: u32,
538 ) -> CljxResult<(Token, Span)> {
539 let mut name = self.read_symbol_chars();
540
541 if self.peek() == Some('@') {
547 let version_candidate = self.peek_version_hash();
548 if let Some(hash) = version_candidate {
549 self.advance(); for _ in 0..hash.len() {
551 self.advance();
552 }
553 name.push('@');
554 name.push_str(&hash);
555 }
556 }
557
558 let tok = match name.as_str() {
559 "nil" => Token::Nil,
560 "true" => Token::Bool(true),
561 "false" => Token::Bool(false),
562 _ => Token::Symbol(name),
563 };
564 Ok((tok, self.span_from(start_pos, start_line, start_col)))
565 }
566
567 fn peek_version_hash(&self) -> Option<String> {
573 let at_byte = self.pos + 1; let rest = &self.source[at_byte..];
576 let hash: String = rest
577 .chars()
578 .take(40)
579 .take_while(|c| c.is_ascii_hexdigit())
580 .collect();
581 if hash.len() >= 7 {
582 let after = rest[hash.len()..].chars().next();
584 let is_delimited = after.is_none_or(|c| !c.is_ascii_hexdigit());
585 if is_delimited {
586 return Some(hash);
587 }
588 }
589 None
590 }
591
592 fn lex_number(
595 &mut self,
596 start_pos: usize,
597 start_line: u32,
598 start_col: u32,
599 ) -> CljxResult<(Token, Span)> {
600 let negative = match self.peek() {
602 Some('-') => {
603 self.advance();
604 true
605 }
606 Some('+') => {
607 self.advance();
608 false
609 }
610 _ => false,
611 };
612 let sign_str = if negative { "-" } else { "" };
613
614 let mut int_part = String::new();
616 while let Some(c) = self.peek() {
617 if c.is_ascii_digit() {
618 int_part.push(c);
619 self.advance();
620 } else {
621 break;
622 }
623 }
624
625 if int_part == "0" && matches!(self.peek(), Some('x') | Some('X')) {
627 self.advance(); let mut hex = String::new();
629 while let Some(c) = self.peek() {
630 if c.is_ascii_hexdigit() {
631 hex.push(c);
632 self.advance();
633 } else {
634 break;
635 }
636 }
637 if hex.is_empty() {
638 let span = self.span_from(start_pos, start_line, start_col);
639 return Err(self.make_error("expected hex digits after 0x", span));
640 }
641 let value = u128::from_str_radix(&hex, 16).unwrap_or(u128::MAX);
642 let span = self.span_from(start_pos, start_line, start_col);
643 return if negative {
644 if value <= (i64::MAX as u128) + 1 {
646 Ok((Token::Int(0i64.wrapping_sub(value as i64)), span))
647 } else {
648 Ok((Token::BigInt(format!("-{value}")), span))
650 }
651 } else if value <= i64::MAX as u128 {
652 Ok((Token::Int(value as i64), span))
653 } else {
654 Ok((Token::BigInt(value.to_string()), span))
655 };
656 }
657
658 if matches!(self.peek(), Some('r') | Some('R')) {
660 let radix: u32 = int_part.parse().unwrap_or(0);
661 self.advance(); let mut digits = String::new();
663 while let Some(c) = self.peek() {
664 if c.is_ascii_alphanumeric() {
665 digits.push(c);
666 self.advance();
667 } else {
668 break;
669 }
670 }
671 let mut value: u128 = 0;
672 for c in digits.chars() {
673 let d = c.to_digit(radix).ok_or_else(|| {
674 let span = self.span_from(start_pos, start_line, start_col);
675 self.make_error(format!("invalid digit {c:?} for radix {radix}"), span)
676 })?;
677 value = value.wrapping_mul(radix as u128).wrapping_add(d as u128);
678 }
679 if negative {
680 if value <= (i64::MAX as u128) + 1 {
682 let signed = -(value as i64);
683 return Ok((
684 Token::Int(signed),
685 self.span_from(start_pos, start_line, start_col),
686 ));
687 } else {
688 return Ok((
690 Token::BigInt(format!("-{value}")),
691 self.span_from(start_pos, start_line, start_col),
692 ));
693 }
694 } else if value <= i64::MAX as u128 {
695 return Ok((
696 Token::Int(value as i64),
697 self.span_from(start_pos, start_line, start_col),
698 ));
699 } else {
700 return Ok((
701 Token::BigInt(value.to_string()),
702 self.span_from(start_pos, start_line, start_col),
703 ));
704 }
705 }
706
707 if self.peek() == Some('N') {
709 self.advance();
710 return Ok((
711 Token::BigInt(format!("{sign_str}{int_part}")),
712 self.span_from(start_pos, start_line, start_col),
713 ));
714 }
715
716 if self.peek() == Some('M') {
718 self.advance();
719 return Ok((
720 Token::BigDecimal(format!("{sign_str}{int_part}")),
721 self.span_from(start_pos, start_line, start_col),
722 ));
723 }
724
725 if matches!(self.peek(), Some('.') | Some('e') | Some('E')) {
727 let mut raw = format!("{sign_str}{int_part}");
728 if self.peek() == Some('.') {
729 raw.push('.');
730 self.advance();
731 while let Some(c) = self.peek() {
732 if c.is_ascii_digit() {
733 raw.push(c);
734 self.advance();
735 } else {
736 break;
737 }
738 }
739 }
740 if matches!(self.peek(), Some('e') | Some('E')) {
741 raw.push('e');
742 self.advance();
743 if matches!(self.peek(), Some('+') | Some('-')) {
744 raw.push(self.peek().unwrap());
745 self.advance();
746 }
747 while let Some(c) = self.peek() {
748 if c.is_ascii_digit() {
749 raw.push(c);
750 self.advance();
751 } else {
752 break;
753 }
754 }
755 }
756 if self.peek() == Some('M') {
758 self.advance();
759 return Ok((
760 Token::BigDecimal(raw),
761 self.span_from(start_pos, start_line, start_col),
762 ));
763 }
764 let val: f64 = raw.parse().map_err(|_| {
765 let span = self.span_from(start_pos, start_line, start_col);
766 self.make_error(format!("invalid float: {raw}"), span)
767 })?;
768 return Ok((
769 Token::Float(val),
770 self.span_from(start_pos, start_line, start_col),
771 ));
772 }
773
774 if self.peek() == Some('/') && matches!(self.peek_next(), Some(c) if c.is_ascii_digit()) {
776 self.advance(); let mut denom = String::new();
778 while let Some(c) = self.peek() {
779 if c.is_ascii_digit() {
780 denom.push(c);
781 self.advance();
782 } else {
783 break;
784 }
785 }
786 return Ok((
787 Token::Ratio(format!("{sign_str}{int_part}/{denom}")),
788 self.span_from(start_pos, start_line, start_col),
789 ));
790 }
791
792 let full = format!("{sign_str}{int_part}");
794 match full.parse::<i64>() {
795 Ok(n) => Ok((
796 Token::Int(n),
797 self.span_from(start_pos, start_line, start_col),
798 )),
799 Err(_) => {
800 Ok((
802 Token::BigInt(full),
803 self.span_from(start_pos, start_line, start_col),
804 ))
805 }
806 }
807 }
808
809 pub fn next_token(&mut self) -> CljxResult<(Token, Span)> {
812 self.skip_whitespace_and_comments();
813
814 let start_pos = self.pos;
815 let start_line = self.line;
816 let start_col = self.col;
817
818 let ch = match self.peek() {
819 None => {
820 return Ok((Token::Eof, self.span_from(start_pos, start_line, start_col)));
821 }
822 Some(c) => c,
823 };
824
825 match ch {
826 '(' => {
827 self.advance();
828 Ok((
829 Token::LParen,
830 self.span_from(start_pos, start_line, start_col),
831 ))
832 }
833 ')' => {
834 self.advance();
835 Ok((
836 Token::RParen,
837 self.span_from(start_pos, start_line, start_col),
838 ))
839 }
840 '[' => {
841 self.advance();
842 Ok((
843 Token::LBracket,
844 self.span_from(start_pos, start_line, start_col),
845 ))
846 }
847 ']' => {
848 self.advance();
849 Ok((
850 Token::RBracket,
851 self.span_from(start_pos, start_line, start_col),
852 ))
853 }
854 '{' => {
855 self.advance();
856 Ok((
857 Token::LBrace,
858 self.span_from(start_pos, start_line, start_col),
859 ))
860 }
861 '}' => {
862 self.advance();
863 Ok((
864 Token::RBrace,
865 self.span_from(start_pos, start_line, start_col),
866 ))
867 }
868 '\'' => {
869 self.advance();
870 Ok((
871 Token::Quote,
872 self.span_from(start_pos, start_line, start_col),
873 ))
874 }
875 '`' => {
876 self.advance();
877 Ok((
878 Token::SyntaxQuote,
879 self.span_from(start_pos, start_line, start_col),
880 ))
881 }
882 '@' => {
883 self.advance();
884 Ok((
885 Token::Deref,
886 self.span_from(start_pos, start_line, start_col),
887 ))
888 }
889 '^' => {
890 self.advance();
891 Ok((
892 Token::Meta,
893 self.span_from(start_pos, start_line, start_col),
894 ))
895 }
896 '~' => self.lex_unquote(start_pos, start_line, start_col),
897 '#' => self.lex_hash(start_pos, start_line, start_col),
898 '"' => self.lex_string(start_pos, start_line, start_col),
899 '\\' => self.lex_char_literal(start_pos, start_line, start_col),
900 ':' => self.lex_keyword(start_pos, start_line, start_col),
901 c if c.is_ascii_digit() => self.lex_number(start_pos, start_line, start_col),
902 '+' | '-' if matches!(self.peek_next(), Some(d) if d.is_ascii_digit()) => {
903 self.lex_number(start_pos, start_line, start_col)
904 }
905 c if is_symbol_start(c) => self.lex_symbol(start_pos, start_line, start_col),
906 '+' | '-' => self.lex_symbol(start_pos, start_line, start_col),
908 c => {
909 self.advance();
910 let span = self.span_from(start_pos, start_line, start_col);
911 Err(self.make_error(format!("unexpected character: {c:?}"), span))
912 }
913 }
914 }
915}
916
917impl Iterator for Lexer {
918 type Item = CljxResult<(Token, Span)>;
919
920 fn next(&mut self) -> Option<Self::Item> {
921 match self.next_token() {
922 Ok((Token::Eof, _)) => None,
923 result => Some(result),
924 }
925 }
926}
927
928#[cfg(test)]
931mod tests {
932 use super::*;
933
934 fn lex_all(src: &str) -> Vec<Token> {
935 Lexer::new(src.to_string(), "<test>".to_string())
936 .map(|r: CljxResult<(Token, Span)>| r.expect("lex error").0)
937 .collect()
938 }
939
940 fn lex_one(src: &str) -> Token {
941 let mut l = Lexer::new(src.to_string(), "<test>".to_string());
942 l.next_token().expect("lex error").0
943 }
944
945 fn lex_err(src: &str) -> String {
946 let mut l = Lexer::new(src.to_string(), "<test>".to_string());
947 loop {
948 match l.next_token() {
949 Err(CljxError::ReadError { message, .. }) => return message,
950 Err(e) => panic!("unexpected error type: {e}"),
951 Ok((Token::Eof, _)) => panic!("expected an error but got Eof"),
952 Ok(_) => {}
953 }
954 }
955 }
956
957 #[test]
960 fn test_nil() {
961 assert_eq!(lex_one("nil"), Token::Nil);
962 }
963
964 #[test]
965 fn test_bool() {
966 assert_eq!(lex_one("true"), Token::Bool(true));
967 assert_eq!(lex_one("false"), Token::Bool(false));
968 }
969
970 #[test]
973 fn test_int_plain() {
974 assert_eq!(lex_one("42"), Token::Int(42));
975 assert_eq!(lex_one("-42"), Token::Int(-42));
976 assert_eq!(lex_one("+42"), Token::Int(42));
977 assert_eq!(lex_one("0"), Token::Int(0));
978 }
979
980 #[test]
981 fn test_bigint_suffix() {
982 assert_eq!(lex_one("42N"), Token::BigInt("42".to_string()));
983 assert_eq!(lex_one("-42N"), Token::BigInt("-42".to_string()));
984 }
985
986 #[test]
987 fn test_hex_literal() {
988 assert_eq!(lex_one("0xff"), Token::Int(255));
989 assert_eq!(lex_one("0xFF"), Token::Int(255));
990 assert_eq!(lex_one("0x0"), Token::Int(0));
991 assert_eq!(lex_one("0x7FFFFFFFFFFFFFFF"), Token::Int(i64::MAX));
992 assert_eq!(lex_one("-0x8000000000000000"), Token::Int(i64::MIN));
993 assert_eq!(lex_one("-0xff"), Token::Int(-255));
994 match lex_one("0xFFFFFFFFFFFFFFFF") {
996 Token::BigInt(_) => {}
997 other => panic!("expected BigInt for 0xFFFF…, got {other:?}"),
998 }
999 }
1000
1001 #[test]
1002 fn test_radix() {
1003 assert_eq!(lex_one("2r1010"), Token::Int(10));
1004 assert_eq!(lex_one("8r77"), Token::Int(63));
1005 assert_eq!(lex_one("16rFF"), Token::Int(255));
1006 assert_eq!(lex_one("16rff"), Token::Int(255));
1007 assert_eq!(lex_one("36rZ"), Token::Int(35));
1008 }
1009
1010 #[test]
1011 fn test_radix_overflow() {
1012 let tok = lex_one("10r18446744073709551616");
1014 match tok {
1015 Token::BigInt(_) => {}
1016 other => panic!("expected BigInt, got {other:?}"),
1017 }
1018 }
1019
1020 #[test]
1023 #[allow(clippy::approx_constant)]
1024 fn test_floats() {
1025 assert_eq!(lex_one("3.14"), Token::Float(3.14));
1026 assert_eq!(lex_one("1e10"), Token::Float(1e10));
1027 assert_eq!(lex_one("1.5e-3"), Token::Float(1.5e-3));
1028 assert_eq!(lex_one("-0.5"), Token::Float(-0.5));
1029 }
1030
1031 #[test]
1032 fn test_bigdecimal() {
1033 assert_eq!(lex_one("3.14M"), Token::BigDecimal("3.14".to_string()));
1034 assert_eq!(lex_one("1e5M"), Token::BigDecimal("1e5".to_string()));
1035 }
1036
1037 #[test]
1040 fn test_ratio() {
1041 assert_eq!(lex_one("3/4"), Token::Ratio("3/4".to_string()));
1042 assert_eq!(lex_one("-1/2"), Token::Ratio("-1/2".to_string()));
1043 }
1044
1045 #[test]
1046 fn test_ratio_vs_symbol() {
1047 let toks = lex_all("3/foo");
1049 assert_eq!(toks[0], Token::Int(3));
1050 assert_eq!(toks[1], Token::Symbol("/foo".to_string()));
1051 }
1052
1053 #[test]
1056 fn test_char_simple() {
1057 assert_eq!(lex_one("\\a"), Token::Char('a'));
1058 }
1059
1060 #[test]
1061 fn test_char_named() {
1062 assert_eq!(lex_one("\\newline"), Token::Char('\n'));
1063 assert_eq!(lex_one("\\space"), Token::Char(' '));
1064 assert_eq!(lex_one("\\tab"), Token::Char('\t'));
1065 assert_eq!(lex_one("\\backspace"), Token::Char('\x08'));
1066 assert_eq!(lex_one("\\formfeed"), Token::Char('\x0C'));
1067 assert_eq!(lex_one("\\return"), Token::Char('\r'));
1068 }
1069
1070 #[test]
1071 fn test_char_unicode() {
1072 assert_eq!(lex_one("\\u0041"), Token::Char('A'));
1073 assert_eq!(lex_one("\\u00e9"), Token::Char('é'));
1074 }
1075
1076 #[test]
1079 fn test_string_basic() {
1080 assert_eq!(lex_one("\"hello\""), Token::Str("hello".to_string()));
1081 }
1082
1083 #[test]
1084 fn test_string_escapes() {
1085 assert_eq!(
1086 lex_one(r#""\n\t\r\b\f\\\"" "#),
1087 Token::Str("\n\t\r\x08\x0C\\\"".to_string())
1088 );
1089 }
1090
1091 #[test]
1092 fn test_string_unicode_escape() {
1093 assert_eq!(lex_one("\"\\u0041\""), Token::Str("A".to_string()));
1094 }
1095
1096 #[test]
1099 fn test_symbols() {
1100 assert_eq!(lex_one("foo"), Token::Symbol("foo".to_string()));
1101 assert_eq!(lex_one("ns/name"), Token::Symbol("ns/name".to_string()));
1102 assert_eq!(lex_one("/"), Token::Symbol("/".to_string()));
1103 assert_eq!(lex_one(".."), Token::Symbol("..".to_string()));
1104 assert_eq!(lex_one(".method"), Token::Symbol(".method".to_string()));
1105 assert_eq!(lex_one("+"), Token::Symbol("+".to_string()));
1106 assert_eq!(lex_one("-"), Token::Symbol("-".to_string()));
1107 assert_eq!(lex_one("+foo"), Token::Symbol("+foo".to_string()));
1108 }
1109
1110 #[test]
1113 fn test_keyword() {
1114 assert_eq!(lex_one(":foo"), Token::Keyword("foo".to_string()));
1115 assert_eq!(lex_one(":ns/name"), Token::Keyword("ns/name".to_string()));
1116 }
1117
1118 #[test]
1119 fn test_auto_keyword() {
1120 assert_eq!(lex_one("::foo"), Token::AutoKeyword("foo".to_string()));
1121 assert_eq!(
1122 lex_one("::ns/alias"),
1123 Token::AutoKeyword("ns/alias".to_string())
1124 );
1125 }
1126
1127 #[test]
1130 fn test_delimiters() {
1131 assert_eq!(
1132 lex_all("([{}])"),
1133 vec![
1134 Token::LParen,
1135 Token::LBracket,
1136 Token::LBrace,
1137 Token::RBrace,
1138 Token::RBracket,
1139 Token::RParen,
1140 ]
1141 );
1142 }
1143
1144 #[test]
1147 fn test_reader_macros() {
1148 assert_eq!(lex_one("'x"), Token::Quote);
1149 assert_eq!(lex_one("`x"), Token::SyntaxQuote);
1150 assert_eq!(lex_one("~x"), Token::Unquote);
1151 assert_eq!(lex_one("~@x"), Token::UnquoteSplice);
1152 assert_eq!(lex_one("@x"), Token::Deref);
1153 assert_eq!(lex_one("^x"), Token::Meta);
1154 }
1155
1156 #[test]
1159 fn test_hash_dispatch() {
1160 assert_eq!(lex_one("#("), Token::HashFn);
1161 assert_eq!(lex_one("#{"), Token::HashSet);
1162 assert_eq!(lex_one("#'"), Token::HashVar);
1163 assert_eq!(lex_one("#_"), Token::HashDiscard);
1164 assert_eq!(lex_one("#?"), Token::ReaderCond);
1165 assert_eq!(lex_one("#?@"), Token::ReaderCondSplice);
1166 }
1167
1168 #[test]
1169 fn test_regex() {
1170 assert_eq!(lex_one("#\"[a-z]+\""), Token::Regex("[a-z]+".to_string()));
1171 }
1172
1173 #[test]
1174 fn test_symbolic() {
1175 assert_eq!(lex_one("##Inf"), Token::Symbolic("Inf".to_string()));
1176 assert_eq!(lex_one("##-Inf"), Token::Symbolic("-Inf".to_string()));
1177 assert_eq!(lex_one("##NaN"), Token::Symbolic("NaN".to_string()));
1178 }
1179
1180 #[test]
1181 fn test_tagged_literal() {
1182 assert_eq!(lex_one("#mytag"), Token::TaggedLiteral("mytag".to_string()));
1183 }
1184
1185 #[test]
1188 fn test_multi_token() {
1189 let toks = lex_all("(+ 1 2)");
1190 assert_eq!(
1191 toks,
1192 vec![
1193 Token::LParen,
1194 Token::Symbol("+".to_string()),
1195 Token::Int(1),
1196 Token::Int(2),
1197 Token::RParen,
1198 ]
1199 );
1200 }
1201
1202 #[test]
1205 fn test_comma_skipped() {
1206 assert_eq!(lex_all("{,,,}"), vec![Token::LBrace, Token::RBrace]);
1207 }
1208
1209 #[test]
1210 fn test_comment_skipped() {
1211 assert_eq!(lex_all("; this is a comment\n42"), vec![Token::Int(42)]);
1212 }
1213
1214 #[test]
1215 fn test_shebang_skipped() {
1216 assert_eq!(lex_all("#!/usr/bin/env cljx\n42"), vec![Token::Int(42)]);
1217 }
1218
1219 #[test]
1222 fn test_span_col() {
1223 let mut l = Lexer::new(" foo".to_string(), "<test>".to_string());
1224 let (_tok, span) = l.next_token().unwrap();
1225 assert_eq!(span.start, 2);
1226 assert_eq!(span.col, 3);
1227 }
1228
1229 #[test]
1230 fn test_span_newline() {
1231 let mut l = Lexer::new("a\nb".to_string(), "<test>".to_string());
1232 l.next_token().unwrap(); let (_tok, span) = l.next_token().unwrap(); assert_eq!(span.line, 2);
1235 assert_eq!(span.col, 1);
1236 }
1237
1238 #[test]
1241 fn test_error_unterminated_string() {
1242 let msg = lex_err("\"unterminated");
1243 assert!(msg.contains("unterminated string"));
1244 }
1245
1246 #[test]
1247 fn test_error_bad_hash_dispatch() {
1248 let msg = lex_err("#1");
1250 assert!(msg.contains("unknown # dispatch"));
1251 }
1252
1253 #[test]
1254 fn test_error_bad_unicode_escape_in_string() {
1255 let msg = lex_err("\"\\uGHIJ\"");
1256 assert!(msg.contains("invalid") || msg.contains("hex"));
1257 }
1258
1259 #[test]
1260 fn test_error_unknown_char_name() {
1261 let msg = lex_err("\\bogus");
1262 assert!(msg.contains("unknown character name"));
1263 }
1264
1265 #[test]
1266 fn test_error_unknown_symbolic() {
1267 let msg = lex_err("##Bogus");
1268 assert!(msg.contains("unknown symbolic value"));
1269 }
1270
1271 #[test]
1272 fn test_error_bad_string_escape() {
1273 let msg = lex_err("\"\\q\"");
1274 assert!(msg.contains("unknown string escape"));
1275 }
1276}