1#![allow(clippy::result_large_err)]
4
5use std::sync::Arc;
6
7use miette::NamedSource;
8
9use cljrs_types::error::{CljxError, CljxResult};
10use cljrs_types::span::Span;
11
12use crate::token::Token;
13
14fn is_symbol_char(ch: char) -> bool {
20 !matches!(
21 ch,
22 ' ' | '\t'
23 | '\n'
24 | '\r'
25 | ','
26 | '('
27 | ')'
28 | '['
29 | ']'
30 | '{'
31 | '}'
32 | '"'
33 | ';'
34 | '`'
35 | '~'
36 | '^'
37 | '@'
38 | '#'
39 | '\\'
40 | ':'
41 )
42}
43
44fn is_symbol_start(ch: char) -> bool {
47 is_symbol_char(ch) && !ch.is_ascii_digit()
48}
49
50pub struct Lexer {
53 source: Arc<String>,
54 file: Arc<String>,
55 pos: usize, line: u32, col: u32, }
59
60impl Lexer {
61 pub fn new(source: String, file: String) -> Self {
62 Self {
63 source: Arc::new(source),
64 file: Arc::new(file),
65 pos: 0,
66 line: 1,
67 col: 1,
68 }
69 }
70
71 pub fn source(&self) -> &Arc<String> {
74 &self.source
75 }
76
77 pub fn file(&self) -> &Arc<String> {
78 &self.file
79 }
80
81 fn peek(&self) -> Option<char> {
84 self.source[self.pos..].chars().next()
85 }
86
87 fn peek_next(&self) -> Option<char> {
88 let mut chars = self.source[self.pos..].chars();
89 chars.next(); chars.next()
91 }
92
93 fn advance(&mut self) -> Option<char> {
94 let ch = self.peek()?;
95 self.pos += ch.len_utf8();
96 if ch == '\n' {
97 self.line += 1;
98 self.col = 1;
99 } else {
100 self.col += ch.len_utf8() as u32;
101 }
102 Some(ch)
103 }
104
105 fn span_from(&self, start_pos: usize, start_line: u32, start_col: u32) -> Span {
106 Span::new(
107 Arc::clone(&self.file),
108 start_pos,
109 self.pos,
110 start_line,
111 start_col,
112 )
113 }
114
115 fn make_error(&self, msg: impl Into<String>, span: Span) -> CljxError {
116 CljxError::ReadError {
117 message: msg.into(),
118 span: Some(miette::SourceSpan::from(span)),
119 src: NamedSource::new((*self.file).clone(), (*self.source).clone()),
120 }
121 }
122
123 fn read_symbol_chars(&mut self) -> String {
126 let mut buf = String::new();
127 while let Some(ch) = self.peek() {
128 if is_symbol_char(ch) {
129 buf.push(ch);
130 self.advance();
131 } else {
132 break;
133 }
134 }
135 buf
136 }
137
138 fn skip_whitespace_and_comments(&mut self) {
141 loop {
142 match self.peek() {
143 Some('#') if self.pos == 0 => {
145 if self.peek_next() == Some('!') {
146 while let Some(ch) = self.advance() {
148 if ch == '\n' {
149 break;
150 }
151 }
152 } else {
153 break; }
155 }
156 Some(' ') | Some('\t') | Some('\r') | Some('\n') | Some(',') => {
157 self.advance();
158 }
159 Some(';') => {
160 while let Some(ch) = self.advance() {
161 if ch == '\n' {
162 break;
163 }
164 }
165 }
166 _ => break,
167 }
168 }
169 }
170
171 fn lex_unquote(
174 &mut self,
175 start_pos: usize,
176 start_line: u32,
177 start_col: u32,
178 ) -> CljxResult<(Token, Span)> {
179 self.advance(); if self.peek() == Some('@') {
181 self.advance();
182 Ok((
183 Token::UnquoteSplice,
184 self.span_from(start_pos, start_line, start_col),
185 ))
186 } else {
187 Ok((
188 Token::Unquote,
189 self.span_from(start_pos, start_line, start_col),
190 ))
191 }
192 }
193
194 fn lex_hash(
197 &mut self,
198 start_pos: usize,
199 start_line: u32,
200 start_col: u32,
201 ) -> CljxResult<(Token, Span)> {
202 self.advance(); match self.peek() {
204 Some('(') => {
205 self.advance();
206 Ok((
207 Token::HashFn,
208 self.span_from(start_pos, start_line, start_col),
209 ))
210 }
211 Some('{') => {
212 self.advance();
213 Ok((
214 Token::HashSet,
215 self.span_from(start_pos, start_line, start_col),
216 ))
217 }
218 Some('\'') => {
219 self.advance();
220 Ok((
221 Token::HashVar,
222 self.span_from(start_pos, start_line, start_col),
223 ))
224 }
225 Some('_') => {
226 self.advance();
227 Ok((
228 Token::HashDiscard,
229 self.span_from(start_pos, start_line, start_col),
230 ))
231 }
232 Some('"') => self.lex_regex(start_pos, start_line, start_col),
233 Some('?') => {
234 self.advance(); if self.peek() == Some('@') {
236 self.advance();
237 Ok((
238 Token::ReaderCondSplice,
239 self.span_from(start_pos, start_line, start_col),
240 ))
241 } else {
242 Ok((
243 Token::ReaderCond,
244 self.span_from(start_pos, start_line, start_col),
245 ))
246 }
247 }
248 Some('#') => self.lex_symbolic(start_pos, start_line, start_col),
249 Some(c) if is_symbol_start(c) => {
250 let name = self.read_symbol_chars();
251 Ok((
252 Token::TaggedLiteral(name),
253 self.span_from(start_pos, start_line, start_col),
254 ))
255 }
256 other => {
257 let span = self.span_from(start_pos, start_line, start_col);
258 Err(self.make_error(format!("unknown # dispatch character: {:?}", other), span))
259 }
260 }
261 }
262
263 fn lex_regex(
264 &mut self,
265 start_pos: usize,
266 start_line: u32,
267 start_col: u32,
268 ) -> CljxResult<(Token, Span)> {
269 self.advance(); let mut buf = String::new();
271 loop {
272 match self.advance() {
273 None => {
274 let span = self.span_from(start_pos, start_line, start_col);
275 return Err(self.make_error("unterminated regex literal", span));
276 }
277 Some('"') => break,
278 Some('\\') => {
279 buf.push('\\');
281 match self.advance() {
282 Some(c) => buf.push(c),
283 None => {
284 let span = self.span_from(start_pos, start_line, start_col);
285 return Err(self.make_error("unterminated regex literal", span));
286 }
287 }
288 }
289 Some(c) => buf.push(c),
290 }
291 }
292 Ok((
293 Token::Regex(buf),
294 self.span_from(start_pos, start_line, start_col),
295 ))
296 }
297
298 fn lex_symbolic(
299 &mut self,
300 start_pos: usize,
301 start_line: u32,
302 start_col: u32,
303 ) -> CljxResult<(Token, Span)> {
304 self.advance(); let name = self.read_symbol_chars();
306 match name.as_str() {
307 "Inf" | "-Inf" | "NaN" => Ok((
308 Token::Symbolic(name),
309 self.span_from(start_pos, start_line, start_col),
310 )),
311 _ => {
312 let span = self.span_from(start_pos, start_line, start_col);
313 Err(self.make_error(format!("unknown symbolic value: ##{name}"), span))
314 }
315 }
316 }
317
318 fn lex_string(
321 &mut self,
322 start_pos: usize,
323 start_line: u32,
324 start_col: u32,
325 ) -> CljxResult<(Token, Span)> {
326 self.advance(); let mut buf = String::new();
328 loop {
329 match self.advance() {
330 None => {
331 let span = self.span_from(start_pos, start_line, start_col);
332 return Err(self.make_error("unterminated string literal", span));
333 }
334 Some('"') => break,
335 Some('\\') => match self.advance() {
336 Some('n') => buf.push('\n'),
337 Some('t') => buf.push('\t'),
338 Some('r') => buf.push('\r'),
339 Some('b') => buf.push('\x08'),
340 Some('f') => buf.push('\x0C'),
341 Some('\\') => buf.push('\\'),
342 Some('"') => buf.push('"'),
343 Some('u') => {
344 let ch = self.read_unicode_escape(start_pos, start_line, start_col)?;
345 buf.push(ch);
346 }
347 Some(c) => {
348 let span = self.span_from(start_pos, start_line, start_col);
349 return Err(self.make_error(format!("unknown string escape: \\{c}"), span));
350 }
351 None => {
352 let span = self.span_from(start_pos, start_line, start_col);
353 return Err(self.make_error("unterminated string literal", span));
354 }
355 },
356 Some(c) => buf.push(c),
357 }
358 }
359 Ok((
360 Token::Str(buf),
361 self.span_from(start_pos, start_line, start_col),
362 ))
363 }
364
365 fn read_unicode_escape(
367 &mut self,
368 start_pos: usize,
369 start_line: u32,
370 start_col: u32,
371 ) -> CljxResult<char> {
372 let mut hex = String::with_capacity(4);
373 for _ in 0..4 {
374 match self.advance() {
375 Some(c) if c.is_ascii_hexdigit() => hex.push(c),
376 Some(c) => {
377 let span = self.span_from(start_pos, start_line, start_col);
378 return Err(self.make_error(
379 format!("invalid \\u escape: expected hex digit, got {c:?}"),
380 span,
381 ));
382 }
383 None => {
384 let span = self.span_from(start_pos, start_line, start_col);
385 return Err(self.make_error("unterminated \\u escape", span));
386 }
387 }
388 }
389 let code = u32::from_str_radix(&hex, 16).unwrap();
390 char::from_u32(code).ok_or_else(|| {
391 let span = self.span_from(start_pos, start_line, start_col);
392 self.make_error(format!("invalid unicode code point: \\u{hex}"), span)
393 })
394 }
395
396 fn lex_char_literal(
399 &mut self,
400 start_pos: usize,
401 start_line: u32,
402 start_col: u32,
403 ) -> CljxResult<(Token, Span)> {
404 self.advance(); let rest_start = self.pos;
408 let rest: String = self.source[rest_start..]
409 .chars()
410 .take_while(|&c| c.is_alphanumeric() || c == '-')
411 .collect();
412
413 let ch = match rest.as_str() {
414 "newline" => {
415 self.pos += "newline".len();
416 self.col += "newline".len() as u32;
417 '\n'
418 }
419 "space" => {
420 self.pos += "space".len();
421 self.col += "space".len() as u32;
422 ' '
423 }
424 "tab" => {
425 self.pos += "tab".len();
426 self.col += "tab".len() as u32;
427 '\t'
428 }
429 "backspace" => {
430 self.pos += "backspace".len();
431 self.col += "backspace".len() as u32;
432 '\x08'
433 }
434 "formfeed" => {
435 self.pos += "formfeed".len();
436 self.col += "formfeed".len() as u32;
437 '\x0C'
438 }
439 "return" => {
440 self.pos += "return".len();
441 self.col += "return".len() as u32;
442 '\r'
443 }
444 _ if rest.starts_with('u') && rest.len() >= 5 => {
445 let hex_part = &rest[1..5];
447 if hex_part.chars().all(|c| c.is_ascii_hexdigit()) {
448 let code = u32::from_str_radix(hex_part, 16).unwrap();
449 let c = char::from_u32(code).ok_or_else(|| {
450 let span = self.span_from(start_pos, start_line, start_col);
451 self.make_error(
452 format!("invalid unicode code point in char literal: \\u{hex_part}"),
453 span,
454 )
455 })?;
456 self.pos += 5;
458 self.col += 5;
459 c
460 } else {
461 let span = self.span_from(start_pos, start_line, start_col);
462 return Err(self.make_error(format!("unknown character name: {rest}"), span));
463 }
464 }
465 _ if rest.len() == 1 => {
466 let c = self.source[rest_start..].chars().next().unwrap();
468 self.pos += c.len_utf8();
469 self.col += c.len_utf8() as u32;
470 c
471 }
472 _ if rest.is_empty() => {
473 match self.source[rest_start..].chars().next() {
475 Some(c) => {
476 self.pos += c.len_utf8();
477 self.col += c.len_utf8() as u32;
478 c
479 }
480 None => {
481 let span = self.span_from(start_pos, start_line, start_col);
482 return Err(self.make_error("unexpected end of file after \\", span));
483 }
484 }
485 }
486 _ => {
487 let span = self.span_from(start_pos, start_line, start_col);
488 return Err(self.make_error(format!("unknown character name: {rest}"), span));
489 }
490 };
491
492 Ok((
493 Token::Char(ch),
494 self.span_from(start_pos, start_line, start_col),
495 ))
496 }
497
498 fn lex_keyword(
501 &mut self,
502 start_pos: usize,
503 start_line: u32,
504 start_col: u32,
505 ) -> CljxResult<(Token, Span)> {
506 self.advance(); if self.peek() == Some(':') {
508 self.advance(); let name = self.read_symbol_chars();
510 if name.is_empty() {
511 let span = self.span_from(start_pos, start_line, start_col);
512 return Err(self.make_error("empty auto-resolved keyword", span));
513 }
514 Ok((
515 Token::AutoKeyword(name),
516 self.span_from(start_pos, start_line, start_col),
517 ))
518 } else {
519 let name = self.read_symbol_chars();
520 if name.is_empty() {
521 let span = self.span_from(start_pos, start_line, start_col);
522 return Err(self.make_error("empty keyword", span));
523 }
524 Ok((
525 Token::Keyword(name),
526 self.span_from(start_pos, start_line, start_col),
527 ))
528 }
529 }
530
531 fn lex_symbol(
534 &mut self,
535 start_pos: usize,
536 start_line: u32,
537 start_col: u32,
538 ) -> CljxResult<(Token, Span)> {
539 let name = self.read_symbol_chars();
540 let tok = match name.as_str() {
541 "nil" => Token::Nil,
542 "true" => Token::Bool(true),
543 "false" => Token::Bool(false),
544 _ => Token::Symbol(name),
545 };
546 Ok((tok, self.span_from(start_pos, start_line, start_col)))
547 }
548
549 fn lex_number(
552 &mut self,
553 start_pos: usize,
554 start_line: u32,
555 start_col: u32,
556 ) -> CljxResult<(Token, Span)> {
557 let negative = match self.peek() {
559 Some('-') => {
560 self.advance();
561 true
562 }
563 Some('+') => {
564 self.advance();
565 false
566 }
567 _ => false,
568 };
569 let sign_str = if negative { "-" } else { "" };
570
571 let mut int_part = String::new();
573 while let Some(c) = self.peek() {
574 if c.is_ascii_digit() {
575 int_part.push(c);
576 self.advance();
577 } else {
578 break;
579 }
580 }
581
582 if int_part == "0" && matches!(self.peek(), Some('x') | Some('X')) {
584 self.advance(); let mut hex = String::new();
586 while let Some(c) = self.peek() {
587 if c.is_ascii_hexdigit() {
588 hex.push(c);
589 self.advance();
590 } else {
591 break;
592 }
593 }
594 if hex.is_empty() {
595 let span = self.span_from(start_pos, start_line, start_col);
596 return Err(self.make_error("expected hex digits after 0x", span));
597 }
598 let value = u128::from_str_radix(&hex, 16).unwrap_or(u128::MAX);
599 let span = self.span_from(start_pos, start_line, start_col);
600 return if negative {
601 if value <= (i64::MAX as u128) + 1 {
603 Ok((Token::Int(0i64.wrapping_sub(value as i64)), span))
604 } else {
605 Ok((Token::BigInt(format!("-{value}")), span))
607 }
608 } else if value <= i64::MAX as u128 {
609 Ok((Token::Int(value as i64), span))
610 } else {
611 Ok((Token::BigInt(value.to_string()), span))
612 };
613 }
614
615 if matches!(self.peek(), Some('r') | Some('R')) {
617 let radix: u32 = int_part.parse().unwrap_or(0);
618 self.advance(); let mut digits = String::new();
620 while let Some(c) = self.peek() {
621 if c.is_ascii_alphanumeric() {
622 digits.push(c);
623 self.advance();
624 } else {
625 break;
626 }
627 }
628 let mut value: u128 = 0;
629 for c in digits.chars() {
630 let d = c.to_digit(radix).ok_or_else(|| {
631 let span = self.span_from(start_pos, start_line, start_col);
632 self.make_error(format!("invalid digit {c:?} for radix {radix}"), span)
633 })?;
634 value = value.wrapping_mul(radix as u128).wrapping_add(d as u128);
635 }
636 if negative {
637 if value <= (i64::MAX as u128) + 1 {
639 let signed = -(value as i64);
640 return Ok((
641 Token::Int(signed),
642 self.span_from(start_pos, start_line, start_col),
643 ));
644 } else {
645 return Ok((
647 Token::BigInt(format!("-{value}")),
648 self.span_from(start_pos, start_line, start_col),
649 ));
650 }
651 } else if value <= i64::MAX as u128 {
652 return Ok((
653 Token::Int(value as i64),
654 self.span_from(start_pos, start_line, start_col),
655 ));
656 } else {
657 return Ok((
658 Token::BigInt(value.to_string()),
659 self.span_from(start_pos, start_line, start_col),
660 ));
661 }
662 }
663
664 if self.peek() == Some('N') {
666 self.advance();
667 return Ok((
668 Token::BigInt(format!("{sign_str}{int_part}")),
669 self.span_from(start_pos, start_line, start_col),
670 ));
671 }
672
673 if self.peek() == Some('M') {
675 self.advance();
676 return Ok((
677 Token::BigDecimal(format!("{sign_str}{int_part}")),
678 self.span_from(start_pos, start_line, start_col),
679 ));
680 }
681
682 if matches!(self.peek(), Some('.') | Some('e') | Some('E')) {
684 let mut raw = format!("{sign_str}{int_part}");
685 if self.peek() == Some('.') {
686 raw.push('.');
687 self.advance();
688 while let Some(c) = self.peek() {
689 if c.is_ascii_digit() {
690 raw.push(c);
691 self.advance();
692 } else {
693 break;
694 }
695 }
696 }
697 if matches!(self.peek(), Some('e') | Some('E')) {
698 raw.push('e');
699 self.advance();
700 if matches!(self.peek(), Some('+') | Some('-')) {
701 raw.push(self.peek().unwrap());
702 self.advance();
703 }
704 while let Some(c) = self.peek() {
705 if c.is_ascii_digit() {
706 raw.push(c);
707 self.advance();
708 } else {
709 break;
710 }
711 }
712 }
713 if self.peek() == Some('M') {
715 self.advance();
716 return Ok((
717 Token::BigDecimal(raw),
718 self.span_from(start_pos, start_line, start_col),
719 ));
720 }
721 let val: f64 = raw.parse().map_err(|_| {
722 let span = self.span_from(start_pos, start_line, start_col);
723 self.make_error(format!("invalid float: {raw}"), span)
724 })?;
725 return Ok((
726 Token::Float(val),
727 self.span_from(start_pos, start_line, start_col),
728 ));
729 }
730
731 if self.peek() == Some('/') && matches!(self.peek_next(), Some(c) if c.is_ascii_digit()) {
733 self.advance(); let mut denom = String::new();
735 while let Some(c) = self.peek() {
736 if c.is_ascii_digit() {
737 denom.push(c);
738 self.advance();
739 } else {
740 break;
741 }
742 }
743 return Ok((
744 Token::Ratio(format!("{sign_str}{int_part}/{denom}")),
745 self.span_from(start_pos, start_line, start_col),
746 ));
747 }
748
749 let full = format!("{sign_str}{int_part}");
751 match full.parse::<i64>() {
752 Ok(n) => Ok((
753 Token::Int(n),
754 self.span_from(start_pos, start_line, start_col),
755 )),
756 Err(_) => {
757 Ok((
759 Token::BigInt(full),
760 self.span_from(start_pos, start_line, start_col),
761 ))
762 }
763 }
764 }
765
766 pub fn next_token(&mut self) -> CljxResult<(Token, Span)> {
769 self.skip_whitespace_and_comments();
770
771 let start_pos = self.pos;
772 let start_line = self.line;
773 let start_col = self.col;
774
775 let ch = match self.peek() {
776 None => {
777 return Ok((Token::Eof, self.span_from(start_pos, start_line, start_col)));
778 }
779 Some(c) => c,
780 };
781
782 match ch {
783 '(' => {
784 self.advance();
785 Ok((
786 Token::LParen,
787 self.span_from(start_pos, start_line, start_col),
788 ))
789 }
790 ')' => {
791 self.advance();
792 Ok((
793 Token::RParen,
794 self.span_from(start_pos, start_line, start_col),
795 ))
796 }
797 '[' => {
798 self.advance();
799 Ok((
800 Token::LBracket,
801 self.span_from(start_pos, start_line, start_col),
802 ))
803 }
804 ']' => {
805 self.advance();
806 Ok((
807 Token::RBracket,
808 self.span_from(start_pos, start_line, start_col),
809 ))
810 }
811 '{' => {
812 self.advance();
813 Ok((
814 Token::LBrace,
815 self.span_from(start_pos, start_line, start_col),
816 ))
817 }
818 '}' => {
819 self.advance();
820 Ok((
821 Token::RBrace,
822 self.span_from(start_pos, start_line, start_col),
823 ))
824 }
825 '\'' => {
826 self.advance();
827 Ok((
828 Token::Quote,
829 self.span_from(start_pos, start_line, start_col),
830 ))
831 }
832 '`' => {
833 self.advance();
834 Ok((
835 Token::SyntaxQuote,
836 self.span_from(start_pos, start_line, start_col),
837 ))
838 }
839 '@' => {
840 self.advance();
841 Ok((
842 Token::Deref,
843 self.span_from(start_pos, start_line, start_col),
844 ))
845 }
846 '^' => {
847 self.advance();
848 Ok((
849 Token::Meta,
850 self.span_from(start_pos, start_line, start_col),
851 ))
852 }
853 '~' => self.lex_unquote(start_pos, start_line, start_col),
854 '#' => self.lex_hash(start_pos, start_line, start_col),
855 '"' => self.lex_string(start_pos, start_line, start_col),
856 '\\' => self.lex_char_literal(start_pos, start_line, start_col),
857 ':' => self.lex_keyword(start_pos, start_line, start_col),
858 c if c.is_ascii_digit() => self.lex_number(start_pos, start_line, start_col),
859 '+' | '-' if matches!(self.peek_next(), Some(d) if d.is_ascii_digit()) => {
860 self.lex_number(start_pos, start_line, start_col)
861 }
862 c if is_symbol_start(c) => self.lex_symbol(start_pos, start_line, start_col),
863 '+' | '-' => self.lex_symbol(start_pos, start_line, start_col),
865 c => {
866 self.advance();
867 let span = self.span_from(start_pos, start_line, start_col);
868 Err(self.make_error(format!("unexpected character: {c:?}"), span))
869 }
870 }
871 }
872}
873
874impl Iterator for Lexer {
875 type Item = CljxResult<(Token, Span)>;
876
877 fn next(&mut self) -> Option<Self::Item> {
878 match self.next_token() {
879 Ok((Token::Eof, _)) => None,
880 result => Some(result),
881 }
882 }
883}
884
885#[cfg(test)]
888mod tests {
889 use super::*;
890
891 fn lex_all(src: &str) -> Vec<Token> {
892 Lexer::new(src.to_string(), "<test>".to_string())
893 .map(|r: CljxResult<(Token, Span)>| r.expect("lex error").0)
894 .collect()
895 }
896
897 fn lex_one(src: &str) -> Token {
898 let mut l = Lexer::new(src.to_string(), "<test>".to_string());
899 l.next_token().expect("lex error").0
900 }
901
902 fn lex_err(src: &str) -> String {
903 let mut l = Lexer::new(src.to_string(), "<test>".to_string());
904 loop {
905 match l.next_token() {
906 Err(CljxError::ReadError { message, .. }) => return message,
907 Err(e) => panic!("unexpected error type: {e}"),
908 Ok((Token::Eof, _)) => panic!("expected an error but got Eof"),
909 Ok(_) => {}
910 }
911 }
912 }
913
914 #[test]
917 fn test_nil() {
918 assert_eq!(lex_one("nil"), Token::Nil);
919 }
920
921 #[test]
922 fn test_bool() {
923 assert_eq!(lex_one("true"), Token::Bool(true));
924 assert_eq!(lex_one("false"), Token::Bool(false));
925 }
926
927 #[test]
930 fn test_int_plain() {
931 assert_eq!(lex_one("42"), Token::Int(42));
932 assert_eq!(lex_one("-42"), Token::Int(-42));
933 assert_eq!(lex_one("+42"), Token::Int(42));
934 assert_eq!(lex_one("0"), Token::Int(0));
935 }
936
937 #[test]
938 fn test_bigint_suffix() {
939 assert_eq!(lex_one("42N"), Token::BigInt("42".to_string()));
940 assert_eq!(lex_one("-42N"), Token::BigInt("-42".to_string()));
941 }
942
943 #[test]
944 fn test_hex_literal() {
945 assert_eq!(lex_one("0xff"), Token::Int(255));
946 assert_eq!(lex_one("0xFF"), Token::Int(255));
947 assert_eq!(lex_one("0x0"), Token::Int(0));
948 assert_eq!(lex_one("0x7FFFFFFFFFFFFFFF"), Token::Int(i64::MAX));
949 assert_eq!(lex_one("-0x8000000000000000"), Token::Int(i64::MIN));
950 assert_eq!(lex_one("-0xff"), Token::Int(-255));
951 match lex_one("0xFFFFFFFFFFFFFFFF") {
953 Token::BigInt(_) => {}
954 other => panic!("expected BigInt for 0xFFFF…, got {other:?}"),
955 }
956 }
957
958 #[test]
959 fn test_radix() {
960 assert_eq!(lex_one("2r1010"), Token::Int(10));
961 assert_eq!(lex_one("8r77"), Token::Int(63));
962 assert_eq!(lex_one("16rFF"), Token::Int(255));
963 assert_eq!(lex_one("16rff"), Token::Int(255));
964 assert_eq!(lex_one("36rZ"), Token::Int(35));
965 }
966
967 #[test]
968 fn test_radix_overflow() {
969 let tok = lex_one("10r18446744073709551616");
971 match tok {
972 Token::BigInt(_) => {}
973 other => panic!("expected BigInt, got {other:?}"),
974 }
975 }
976
977 #[test]
980 #[allow(clippy::approx_constant)]
981 fn test_floats() {
982 assert_eq!(lex_one("3.14"), Token::Float(3.14));
983 assert_eq!(lex_one("1e10"), Token::Float(1e10));
984 assert_eq!(lex_one("1.5e-3"), Token::Float(1.5e-3));
985 assert_eq!(lex_one("-0.5"), Token::Float(-0.5));
986 }
987
988 #[test]
989 fn test_bigdecimal() {
990 assert_eq!(lex_one("3.14M"), Token::BigDecimal("3.14".to_string()));
991 assert_eq!(lex_one("1e5M"), Token::BigDecimal("1e5".to_string()));
992 }
993
994 #[test]
997 fn test_ratio() {
998 assert_eq!(lex_one("3/4"), Token::Ratio("3/4".to_string()));
999 assert_eq!(lex_one("-1/2"), Token::Ratio("-1/2".to_string()));
1000 }
1001
1002 #[test]
1003 fn test_ratio_vs_symbol() {
1004 let toks = lex_all("3/foo");
1006 assert_eq!(toks[0], Token::Int(3));
1007 assert_eq!(toks[1], Token::Symbol("/foo".to_string()));
1008 }
1009
1010 #[test]
1013 fn test_char_simple() {
1014 assert_eq!(lex_one("\\a"), Token::Char('a'));
1015 }
1016
1017 #[test]
1018 fn test_char_named() {
1019 assert_eq!(lex_one("\\newline"), Token::Char('\n'));
1020 assert_eq!(lex_one("\\space"), Token::Char(' '));
1021 assert_eq!(lex_one("\\tab"), Token::Char('\t'));
1022 assert_eq!(lex_one("\\backspace"), Token::Char('\x08'));
1023 assert_eq!(lex_one("\\formfeed"), Token::Char('\x0C'));
1024 assert_eq!(lex_one("\\return"), Token::Char('\r'));
1025 }
1026
1027 #[test]
1028 fn test_char_unicode() {
1029 assert_eq!(lex_one("\\u0041"), Token::Char('A'));
1030 assert_eq!(lex_one("\\u00e9"), Token::Char('é'));
1031 }
1032
1033 #[test]
1036 fn test_string_basic() {
1037 assert_eq!(lex_one("\"hello\""), Token::Str("hello".to_string()));
1038 }
1039
1040 #[test]
1041 fn test_string_escapes() {
1042 assert_eq!(
1043 lex_one(r#""\n\t\r\b\f\\\"" "#),
1044 Token::Str("\n\t\r\x08\x0C\\\"".to_string())
1045 );
1046 }
1047
1048 #[test]
1049 fn test_string_unicode_escape() {
1050 assert_eq!(lex_one("\"\\u0041\""), Token::Str("A".to_string()));
1051 }
1052
1053 #[test]
1056 fn test_symbols() {
1057 assert_eq!(lex_one("foo"), Token::Symbol("foo".to_string()));
1058 assert_eq!(lex_one("ns/name"), Token::Symbol("ns/name".to_string()));
1059 assert_eq!(lex_one("/"), Token::Symbol("/".to_string()));
1060 assert_eq!(lex_one(".."), Token::Symbol("..".to_string()));
1061 assert_eq!(lex_one(".method"), Token::Symbol(".method".to_string()));
1062 assert_eq!(lex_one("+"), Token::Symbol("+".to_string()));
1063 assert_eq!(lex_one("-"), Token::Symbol("-".to_string()));
1064 assert_eq!(lex_one("+foo"), Token::Symbol("+foo".to_string()));
1065 }
1066
1067 #[test]
1070 fn test_keyword() {
1071 assert_eq!(lex_one(":foo"), Token::Keyword("foo".to_string()));
1072 assert_eq!(lex_one(":ns/name"), Token::Keyword("ns/name".to_string()));
1073 }
1074
1075 #[test]
1076 fn test_auto_keyword() {
1077 assert_eq!(lex_one("::foo"), Token::AutoKeyword("foo".to_string()));
1078 assert_eq!(
1079 lex_one("::ns/alias"),
1080 Token::AutoKeyword("ns/alias".to_string())
1081 );
1082 }
1083
1084 #[test]
1087 fn test_delimiters() {
1088 assert_eq!(
1089 lex_all("([{}])"),
1090 vec![
1091 Token::LParen,
1092 Token::LBracket,
1093 Token::LBrace,
1094 Token::RBrace,
1095 Token::RBracket,
1096 Token::RParen,
1097 ]
1098 );
1099 }
1100
1101 #[test]
1104 fn test_reader_macros() {
1105 assert_eq!(lex_one("'x"), Token::Quote);
1106 assert_eq!(lex_one("`x"), Token::SyntaxQuote);
1107 assert_eq!(lex_one("~x"), Token::Unquote);
1108 assert_eq!(lex_one("~@x"), Token::UnquoteSplice);
1109 assert_eq!(lex_one("@x"), Token::Deref);
1110 assert_eq!(lex_one("^x"), Token::Meta);
1111 }
1112
1113 #[test]
1116 fn test_hash_dispatch() {
1117 assert_eq!(lex_one("#("), Token::HashFn);
1118 assert_eq!(lex_one("#{"), Token::HashSet);
1119 assert_eq!(lex_one("#'"), Token::HashVar);
1120 assert_eq!(lex_one("#_"), Token::HashDiscard);
1121 assert_eq!(lex_one("#?"), Token::ReaderCond);
1122 assert_eq!(lex_one("#?@"), Token::ReaderCondSplice);
1123 }
1124
1125 #[test]
1126 fn test_regex() {
1127 assert_eq!(lex_one("#\"[a-z]+\""), Token::Regex("[a-z]+".to_string()));
1128 }
1129
1130 #[test]
1131 fn test_symbolic() {
1132 assert_eq!(lex_one("##Inf"), Token::Symbolic("Inf".to_string()));
1133 assert_eq!(lex_one("##-Inf"), Token::Symbolic("-Inf".to_string()));
1134 assert_eq!(lex_one("##NaN"), Token::Symbolic("NaN".to_string()));
1135 }
1136
1137 #[test]
1138 fn test_tagged_literal() {
1139 assert_eq!(lex_one("#mytag"), Token::TaggedLiteral("mytag".to_string()));
1140 }
1141
1142 #[test]
1145 fn test_multi_token() {
1146 let toks = lex_all("(+ 1 2)");
1147 assert_eq!(
1148 toks,
1149 vec![
1150 Token::LParen,
1151 Token::Symbol("+".to_string()),
1152 Token::Int(1),
1153 Token::Int(2),
1154 Token::RParen,
1155 ]
1156 );
1157 }
1158
1159 #[test]
1162 fn test_comma_skipped() {
1163 assert_eq!(lex_all("{,,,}"), vec![Token::LBrace, Token::RBrace]);
1164 }
1165
1166 #[test]
1167 fn test_comment_skipped() {
1168 assert_eq!(lex_all("; this is a comment\n42"), vec![Token::Int(42)]);
1169 }
1170
1171 #[test]
1172 fn test_shebang_skipped() {
1173 assert_eq!(lex_all("#!/usr/bin/env cljx\n42"), vec![Token::Int(42)]);
1174 }
1175
1176 #[test]
1179 fn test_span_col() {
1180 let mut l = Lexer::new(" foo".to_string(), "<test>".to_string());
1181 let (_tok, span) = l.next_token().unwrap();
1182 assert_eq!(span.start, 2);
1183 assert_eq!(span.col, 3);
1184 }
1185
1186 #[test]
1187 fn test_span_newline() {
1188 let mut l = Lexer::new("a\nb".to_string(), "<test>".to_string());
1189 l.next_token().unwrap(); let (_tok, span) = l.next_token().unwrap(); assert_eq!(span.line, 2);
1192 assert_eq!(span.col, 1);
1193 }
1194
1195 #[test]
1198 fn test_error_unterminated_string() {
1199 let msg = lex_err("\"unterminated");
1200 assert!(msg.contains("unterminated string"));
1201 }
1202
1203 #[test]
1204 fn test_error_bad_hash_dispatch() {
1205 let msg = lex_err("#1");
1207 assert!(msg.contains("unknown # dispatch"));
1208 }
1209
1210 #[test]
1211 fn test_error_bad_unicode_escape_in_string() {
1212 let msg = lex_err("\"\\uGHIJ\"");
1213 assert!(msg.contains("invalid") || msg.contains("hex"));
1214 }
1215
1216 #[test]
1217 fn test_error_unknown_char_name() {
1218 let msg = lex_err("\\bogus");
1219 assert!(msg.contains("unknown character name"));
1220 }
1221
1222 #[test]
1223 fn test_error_unknown_symbolic() {
1224 let msg = lex_err("##Bogus");
1225 assert!(msg.contains("unknown symbolic value"));
1226 }
1227
1228 #[test]
1229 fn test_error_bad_string_escape() {
1230 let msg = lex_err("\"\\q\"");
1231 assert!(msg.contains("unknown string escape"));
1232 }
1233}