1use crate::scheme::value::Value;
40use std::fmt;
41
42#[derive(Debug, Clone, PartialEq)]
48pub enum Token {
49 Integer(i64),
51 Real(f64),
52 String(String),
53 Char(char),
54 Symbol(String),
55 Keyword(String),
56 Bool(bool),
57
58 LeftParen, RightParen, LeftBracket, RightBracket, Dot, Quote, Quasiquote, Unquote, UnquoteSplicing, VectorStart, Eof,
76}
77
78impl fmt::Display for Token {
79 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
80 match self {
81 Token::Integer(n) => write!(f, "{}", n),
82 Token::Real(n) => write!(f, "{}", n),
83 Token::String(s) => write!(f, "\"{}\"", s),
84 Token::Char(ch) => write!(f, "#\\{}", ch),
85 Token::Symbol(s) => write!(f, "{}", s),
86 Token::Keyword(s) => write!(f, "#:{}", s),
87 Token::Bool(b) => write!(f, "{}", if *b { "#t" } else { "#f" }),
88 Token::LeftParen => write!(f, "("),
89 Token::RightParen => write!(f, ")"),
90 Token::LeftBracket => write!(f, "["),
91 Token::RightBracket => write!(f, "]"),
92 Token::Dot => write!(f, "."),
93 Token::Quote => write!(f, "'"),
94 Token::Quasiquote => write!(f, "`"),
95 Token::Unquote => write!(f, ","),
96 Token::UnquoteSplicing => write!(f, ",@"),
97 Token::VectorStart => write!(f, "#("),
98 Token::Eof => write!(f, "<EOF>"),
99 }
100 }
101}
102
103#[derive(Debug, Clone, PartialEq, Eq, gc::Trace, gc::Finalize)]
114pub struct Position {
115 pub line: usize,
116 pub column: usize,
117}
118
119impl Position {
120 pub fn new() -> Self {
121 Position { line: 1, column: 1 }
122 }
123}
124
125impl Default for Position {
126 fn default() -> Self {
127 Self::new()
128 }
129}
130
131impl fmt::Display for Position {
132 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
133 write!(f, "{}:{}", self.line, self.column)
134 }
135}
136
137#[derive(Debug, Clone)]
143pub struct ParseError {
144 pub message: String,
145 pub position: Position,
146}
147
148impl ParseError {
149 pub fn new(message: String, position: Position) -> Self {
150 ParseError { message, position }
151 }
152}
153
154impl fmt::Display for ParseError {
155 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
156 write!(f, "Parse error at {}: {}", self.position, self.message)
157 }
158}
159
160impl std::error::Error for ParseError {}
161
162pub type ParseResult<T> = Result<T, ParseError>;
163
164pub struct Tokenizer {
184 input: Vec<char>,
186
187 pos: usize,
189
190 line: usize,
192
193 column: usize,
195
196 peeked: Option<Token>,
198}
199
200impl Tokenizer {
201 pub fn new(input: &str) -> Self {
203 Tokenizer {
204 input: input.chars().collect(),
205 pos: 0,
206 line: 1,
207 column: 1,
208 peeked: None,
209 }
210 }
211
212 pub fn position(&self) -> Position {
214 Position {
215 line: self.line,
216 column: self.column,
217 }
218 }
219
220 fn peek_char(&self) -> Option<char> {
222 if self.pos < self.input.len() {
223 Some(self.input[self.pos])
224 } else {
225 None
226 }
227 }
228
229 fn peek_char_at(&self, offset: usize) -> Option<char> {
231 let index = self.pos + offset;
232 if index < self.input.len() {
233 Some(self.input[index])
234 } else {
235 None
236 }
237 }
238
239 fn next_char(&mut self) -> Option<char> {
241 if self.pos < self.input.len() {
242 let ch = self.input[self.pos];
243 self.pos += 1;
244
245 if ch == '\n' {
247 self.line += 1;
248 self.column = 1;
249 } else {
250 self.column += 1;
251 }
252
253 Some(ch)
254 } else {
255 None
256 }
257 }
258
259 fn skip_whitespace(&mut self) {
263 loop {
264 match self.peek_char() {
265 Some(ch) if ch.is_whitespace() => {
267 self.next_char();
268 }
269
270 Some(';') => {
272 self.next_char();
273 while let Some(ch) = self.peek_char() {
274 self.next_char();
275 if ch == '\n' {
276 break;
277 }
278 }
279 }
280
281 Some('#') if self.peek_char_at(1) == Some('|') => {
283 self.next_char(); self.next_char(); let mut depth = 1;
288 while depth > 0 {
289 match self.next_char() {
290 Some('|') if self.peek_char() == Some('#') => {
291 self.next_char(); depth -= 1;
293 }
294 Some('#') if self.peek_char() == Some('|') => {
295 self.next_char(); depth += 1; }
298 Some(_) => {} None => break, }
301 }
302 }
303
304 _ => break,
306 }
307 }
308 }
309
310 fn is_delimiter(ch: char) -> bool {
312 ch.is_whitespace()
313 || matches!(
314 ch,
315 '(' | ')' | '[' | ']' | '"' | ';' | ',' | '`' | '\''
316 )
317 }
318
319 fn parse_number(&mut self, start_pos: Position) -> ParseResult<Token> {
321 let mut num_str = String::new();
322
323 while let Some(ch) = self.peek_char() {
325 if ch.is_ascii_digit() || matches!(ch, '.' | 'e' | 'E' | '+' | '-') {
326 num_str.push(ch);
327 self.next_char();
328 } else if Self::is_delimiter(ch) {
329 break;
330 } else {
331 return Err(ParseError::new(
333 format!("Invalid character in number: {}", ch),
334 start_pos,
335 ));
336 }
337 }
338
339 if let Ok(n) = num_str.parse::<i64>() {
341 return Ok(Token::Integer(n));
342 }
343
344 if let Ok(n) = num_str.parse::<f64>() {
346 return Ok(Token::Real(n));
347 }
348
349 Err(ParseError::new(
350 format!("Invalid number: {}", num_str),
351 start_pos,
352 ))
353 }
354
355 fn parse_hex_number(&mut self, start_pos: Position) -> ParseResult<Token> {
357 let mut num_str = String::new();
358
359 while let Some(ch) = self.peek_char() {
360 if ch.is_ascii_hexdigit() {
361 num_str.push(ch);
362 self.next_char();
363 } else if Self::is_delimiter(ch) {
364 break;
365 } else {
366 return Err(ParseError::new(
367 format!("Invalid character in hex number: {}", ch),
368 start_pos,
369 ));
370 }
371 }
372
373 if num_str.is_empty() {
374 return Err(ParseError::new("Empty hex number".to_string(), start_pos));
375 }
376
377 i64::from_str_radix(&num_str, 16)
378 .map(Token::Integer)
379 .map_err(|_| ParseError::new(format!("Invalid hex number: {}", num_str), start_pos))
380 }
381
382 fn parse_octal_number(&mut self, start_pos: Position) -> ParseResult<Token> {
384 let mut num_str = String::new();
385
386 while let Some(ch) = self.peek_char() {
387 if ch.is_digit(8) {
388 num_str.push(ch);
389 self.next_char();
390 } else if Self::is_delimiter(ch) {
391 break;
392 } else {
393 return Err(ParseError::new(
394 format!("Invalid character in octal number: {}", ch),
395 start_pos,
396 ));
397 }
398 }
399
400 if num_str.is_empty() {
401 return Err(ParseError::new("Empty octal number".to_string(), start_pos));
402 }
403
404 i64::from_str_radix(&num_str, 8)
405 .map(Token::Integer)
406 .map_err(|_| ParseError::new(format!("Invalid octal number: {}", num_str), start_pos))
407 }
408
409 fn parse_binary_number(&mut self, start_pos: Position) -> ParseResult<Token> {
411 let mut num_str = String::new();
412
413 while let Some(ch) = self.peek_char() {
414 if matches!(ch, '0' | '1') {
415 num_str.push(ch);
416 self.next_char();
417 } else if Self::is_delimiter(ch) {
418 break;
419 } else {
420 return Err(ParseError::new(
421 format!("Invalid character in binary number: {}", ch),
422 start_pos,
423 ));
424 }
425 }
426
427 if num_str.is_empty() {
428 return Err(ParseError::new("Empty binary number".to_string(), start_pos));
429 }
430
431 i64::from_str_radix(&num_str, 2)
432 .map(Token::Integer)
433 .map_err(|_| ParseError::new(format!("Invalid binary number: {}", num_str), start_pos))
434 }
435
436 fn parse_symbol(&mut self) -> String {
438 let mut sym = String::new();
439
440 while let Some(ch) = self.peek_char() {
441 if Self::is_delimiter(ch) {
442 break;
443 }
444 sym.push(ch);
445 self.next_char();
446 }
447
448 sym
449 }
450
451 fn parse_string(&mut self, start_pos: Position) -> ParseResult<String> {
453 self.next_char(); let mut result = String::new();
456
457 loop {
458 match self.next_char() {
459 Some('"') => {
460 let normalized = result.replace("\r\n", "\n");
464 return Ok(normalized);
465 }
466 Some('\\') => {
467 match self.next_char() {
469 Some('n') => result.push('\n'),
470 Some('t') => result.push('\t'),
471 Some('r') => result.push('\r'),
472 Some('\\') => result.push('\\'),
473 Some('"') => result.push('"'),
474 Some(ch) => result.push(ch), None => {
476 return Err(ParseError::new(
477 "Unexpected EOF in string escape".to_string(),
478 start_pos,
479 ))
480 }
481 }
482 }
483 Some(ch) => {
484 result.push(ch);
485 }
486 None => {
487 return Err(ParseError::new(
488 "Unexpected EOF in string".to_string(),
489 start_pos,
490 ))
491 }
492 }
493 }
494 }
495
496 fn parse_char(&mut self, start_pos: Position) -> ParseResult<char> {
499 if self.next_char() != Some('\\') {
501 return Err(ParseError::new(
502 "Expected \\ after # in character literal".to_string(),
503 start_pos,
504 ));
505 }
506
507 let mut name = String::new();
509 while let Some(ch) = self.peek_char() {
510 if Self::is_delimiter(ch) {
511 break;
512 }
513 name.push(ch);
514 self.next_char();
515 }
516
517 if name.is_empty() {
518 return Err(ParseError::new(
519 "Empty character literal".to_string(),
520 start_pos,
521 ));
522 }
523
524 match name.as_str() {
526 "space" => Ok(' '),
527 "newline" => Ok('\n'),
528 "tab" => Ok('\t'),
529 "return" => Ok('\r'),
530 s if s.starts_with("U-") => {
533 let hex_str = &s[2..]; u32::from_str_radix(hex_str, 16)
535 .ok()
536 .and_then(std::char::from_u32)
537 .ok_or_else(|| ParseError::new(
538 format!("Invalid Unicode character literal: #\\{}", name),
539 start_pos,
540 ))
541 }
542 s if s.chars().count() == 1 => Ok(s.chars().next().unwrap()),
545 _ => Err(ParseError::new(
546 format!("Invalid character literal: #\\{}", name),
547 start_pos,
548 )),
549 }
550 }
551
552 fn parse_cdata_string(&mut self, start_pos: Position) -> ParseResult<Token> {
556 for _ in 0..9 {
558 self.next_char();
559 }
560
561 let mut content = String::new();
562
563 loop {
565 match self.peek_char() {
566 None => {
567 return Err(ParseError::new(
568 "Unclosed CDATA section: missing ]]>".to_string(),
569 start_pos,
570 ));
571 }
572 Some(']') => {
573 if self.pos + 2 < self.input.len()
575 && self.input[self.pos] == ']'
576 && self.input[self.pos + 1] == ']'
577 && self.input[self.pos + 2] == '>'
578 {
579 self.next_char(); self.next_char(); self.next_char(); break;
584 } else {
585 content.push(']');
586 self.next_char();
587 }
588 }
589 Some(ch) => {
590 content.push(ch);
591 self.next_char();
592 }
593 }
594 }
595
596 Ok(Token::String(content))
597 }
598
599 pub fn next_token(&mut self) -> ParseResult<Token> {
601 if let Some(tok) = self.peeked.take() {
603 return Ok(tok);
604 }
605
606 self.skip_whitespace();
608
609 let start_pos = self.position();
610
611 match self.peek_char() {
612 None => Ok(Token::Eof),
613
614 Some('(') => {
615 self.next_char();
616 Ok(Token::LeftParen)
617 }
618
619 Some(')') => {
620 self.next_char();
621 Ok(Token::RightParen)
622 }
623
624 Some('[') => {
625 self.next_char();
626 Ok(Token::LeftBracket)
627 }
628
629 Some(']') => {
630 self.next_char();
631 Ok(Token::RightBracket)
632 }
633
634 Some('\'') => {
635 self.next_char();
636 Ok(Token::Quote)
637 }
638
639 Some('`') => {
640 self.next_char();
641 Ok(Token::Quasiquote)
642 }
643
644 Some(',') => {
645 self.next_char();
646 if self.peek_char() == Some('@') {
648 self.next_char();
649 Ok(Token::UnquoteSplicing)
650 } else {
651 Ok(Token::Unquote)
652 }
653 }
654
655 Some('"') => {
656 let s = self.parse_string(start_pos)?;
657 Ok(Token::String(s))
658 }
659
660 Some('#') => {
661 self.next_char(); match self.peek_char() {
663 Some('t') => {
664 self.next_char();
665 Ok(Token::Bool(true))
666 }
667 Some('f') => {
668 self.next_char();
669 Ok(Token::Bool(false))
670 }
671 Some('(') => {
672 self.next_char();
673 Ok(Token::VectorStart)
674 }
675 Some('\\') => {
676 let ch = self.parse_char(start_pos)?;
677 Ok(Token::Char(ch))
678 }
679 Some(':') => {
680 self.next_char(); let name = self.parse_symbol();
682 Ok(Token::Keyword(name))
683 }
684 Some('x') | Some('X') => {
685 self.next_char(); self.parse_hex_number(start_pos)
687 }
688 Some('o') | Some('O') => {
689 self.next_char(); self.parse_octal_number(start_pos)
691 }
692 Some('b') | Some('B') => {
693 self.next_char(); self.parse_binary_number(start_pos)
695 }
696 _ => Err(ParseError::new(
697 format!("Invalid # syntax: #{:?}", self.peek_char()),
698 start_pos,
699 )),
700 }
701 }
702
703 Some(ch) if ch.is_ascii_digit() => self.parse_number(start_pos),
704
705 Some('+') | Some('-') => {
706 if let Some(next) = self.peek_char_at(1) {
708 if next.is_ascii_digit() {
709 self.parse_number(start_pos)
710 } else {
711 let sym = self.parse_symbol();
712 Ok(Token::Symbol(sym))
713 }
714 } else {
715 let sym = self.parse_symbol();
716 Ok(Token::Symbol(sym))
717 }
718 }
719
720 Some('.') => {
721 if let Some(next) = self.peek_char_at(1) {
723 if next.is_ascii_digit() {
724 self.parse_number(start_pos)
725 } else {
726 self.next_char();
727 Ok(Token::Dot)
728 }
729 } else {
730 self.next_char();
731 Ok(Token::Dot)
732 }
733 }
734
735 Some('<') => {
736 let cdata_prefix = ['<', '!', '[', 'C', 'D', 'A', 'T', 'A', '['];
739 let is_cdata = self.pos + cdata_prefix.len() <= self.input.len()
740 && self.input[self.pos..self.pos + cdata_prefix.len()] == cdata_prefix;
741
742 if is_cdata {
743 self.parse_cdata_string(start_pos)
744 } else {
745 let sym = self.parse_symbol();
747 Ok(Token::Symbol(sym))
748 }
749 }
750
751 Some(_) => {
752 let sym = self.parse_symbol();
754
755 if sym.ends_with(':') {
757 let keyword_name = sym[..sym.len()-1].to_string();
758 Ok(Token::Keyword(keyword_name))
759 } else {
760 Ok(Token::Symbol(sym))
761 }
762 }
763 }
764 }
765
766 pub fn peek_token(&mut self) -> ParseResult<&Token> {
768 if self.peeked.is_none() {
769 let tok = self.next_token()?;
770 self.peeked = Some(tok);
771 }
772 Ok(self.peeked.as_ref().unwrap())
773 }
774}
775
776pub struct Parser {
791 tokenizer: Tokenizer,
792}
793
794impl Parser {
795 pub fn new(input: &str) -> Self {
797 Parser {
798 tokenizer: Tokenizer::new(input),
799 }
800 }
801
802 pub fn parse(&mut self) -> ParseResult<Value> {
806 self.parse_expr()
807 }
808
809 pub fn peek_token(&mut self) -> ParseResult<&Token> {
814 self.tokenizer.peek_token()
815 }
816
817 pub fn current_position(&self) -> Position {
822 self.tokenizer.position()
823 }
824
825 pub fn parse_all(&mut self) -> ParseResult<Vec<Value>> {
829 let mut exprs = Vec::new();
830
831 loop {
832 let tok = self.tokenizer.peek_token()?;
833 if *tok == Token::Eof {
834 break;
835 }
836 exprs.push(self.parse_expr()?);
837 }
838
839 Ok(exprs)
840 }
841
842 fn parse_expr(&mut self) -> ParseResult<Value> {
844 let start_pos = self.tokenizer.position();
845 let tok = self.tokenizer.next_token()?;
846
847 match tok {
848 Token::Integer(n) => Ok(Value::integer(n)),
850 Token::Real(n) => Ok(Value::real(n)),
851 Token::String(s) => Ok(Value::string(s)),
852 Token::Char(ch) => Ok(Value::char(ch)),
853 Token::Bool(b) => Ok(Value::bool(b)),
854 Token::Symbol(s) => Ok(Value::symbol(&s)),
855 Token::Keyword(s) => Ok(Value::keyword(&s)),
856
857 Token::LeftParen | Token::LeftBracket => self.parse_list(start_pos),
859
860 Token::VectorStart => self.parse_vector(start_pos),
862
863 Token::Quote => {
865 let quoted = self.parse_expr()?;
866 Ok(Value::cons_with_pos(Value::symbol("quote"), Value::cons(quoted, Value::Nil), start_pos))
867 }
868
869 Token::Quasiquote => {
871 let quoted = self.parse_expr()?;
872 Ok(Value::cons_with_pos(
873 Value::symbol("quasiquote"),
874 Value::cons(quoted, Value::Nil),
875 start_pos
876 ))
877 }
878
879 Token::Unquote => {
881 let quoted = self.parse_expr()?;
882 Ok(Value::cons_with_pos(
883 Value::symbol("unquote"),
884 Value::cons(quoted, Value::Nil),
885 start_pos
886 ))
887 }
888
889 Token::UnquoteSplicing => {
891 let quoted = self.parse_expr()?;
892 Ok(Value::cons_with_pos(
893 Value::symbol("unquote-splicing"),
894 Value::cons(quoted, Value::Nil),
895 start_pos
896 ))
897 }
898
899 Token::RightParen | Token::RightBracket => Err(ParseError::new(
901 format!("Unexpected closing delimiter: {}", tok),
902 start_pos,
903 )),
904
905 Token::Dot => Err(ParseError::new(
906 "Unexpected dot outside of list".to_string(),
907 start_pos,
908 )),
909
910 Token::Eof => Err(ParseError::new(
911 "Unexpected end of input".to_string(),
912 start_pos,
913 )),
914 }
915 }
916
917 fn parse_list(&mut self, start_pos: Position) -> ParseResult<Value> {
919 let mut elements = Vec::new();
920 let mut dotted_tail = None;
921
922 loop {
923 let tok = self.tokenizer.peek_token()?;
924
925 match tok {
926 Token::RightParen | Token::RightBracket => {
927 self.tokenizer.next_token()?; break;
929 }
930
931 Token::Dot => {
932 self.tokenizer.next_token()?; dotted_tail = Some(self.parse_expr()?);
936
937 let tok = self.tokenizer.next_token()?;
939 if !matches!(tok, Token::RightParen | Token::RightBracket) {
940 return Err(ParseError::new(
941 format!("Expected ) after dotted tail, got {}", tok),
942 start_pos,
943 ));
944 }
945 break;
946 }
947
948 Token::Eof => {
949 return Err(ParseError::new(
950 "Unexpected EOF in list".to_string(),
951 start_pos,
952 ))
953 }
954
955 _ => {
956 elements.push(self.parse_expr()?);
957 }
958 }
959 }
960
961 let mut result = dotted_tail.unwrap_or(Value::Nil);
963 for elem in elements.into_iter().rev() {
964 result = Value::cons_with_pos(elem, result, start_pos.clone());
965 }
966
967 Ok(result)
968 }
969
970 fn parse_vector(&mut self, start_pos: Position) -> ParseResult<Value> {
972 let mut elements = Vec::new();
973
974 loop {
975 let tok = self.tokenizer.peek_token()?;
976
977 match tok {
978 Token::RightParen => {
979 self.tokenizer.next_token()?; break;
981 }
982
983 Token::Eof => {
984 return Err(ParseError::new(
985 "Unexpected EOF in vector".to_string(),
986 start_pos,
987 ))
988 }
989
990 _ => {
991 elements.push(self.parse_expr()?);
992 }
993 }
994 }
995
996 Ok(Value::vector(elements))
997 }
998}
999
1000#[cfg(test)]
1005mod tests {
1006 use super::*;
1007
1008 #[test]
1009 fn test_tokenize_simple() {
1010 let mut tok = Tokenizer::new("(+ 1 2)");
1011 assert_eq!(tok.next_token().unwrap(), Token::LeftParen);
1012 assert_eq!(tok.next_token().unwrap(), Token::Symbol("+".to_string()));
1013 assert_eq!(tok.next_token().unwrap(), Token::Integer(1));
1014 assert_eq!(tok.next_token().unwrap(), Token::Integer(2));
1015 assert_eq!(tok.next_token().unwrap(), Token::RightParen);
1016 assert_eq!(tok.next_token().unwrap(), Token::Eof);
1017 }
1018
1019 #[test]
1020 fn test_tokenize_whitespace_agnostic() {
1021 let input = r#"(let ((x 1)
1023 (y 2))
1024 (+ x y))"#;
1025 let mut tok = Tokenizer::new(input);
1026
1027 assert_eq!(tok.next_token().unwrap(), Token::LeftParen);
1028 assert_eq!(tok.next_token().unwrap(), Token::Symbol("let".to_string()));
1029 assert_eq!(tok.next_token().unwrap(), Token::LeftParen);
1030 assert_eq!(tok.next_token().unwrap(), Token::LeftParen);
1031 assert_eq!(tok.next_token().unwrap(), Token::Symbol("x".to_string()));
1032 assert_eq!(tok.next_token().unwrap(), Token::Integer(1));
1033 assert_eq!(tok.next_token().unwrap(), Token::RightParen);
1034 assert_eq!(tok.next_token().unwrap(), Token::LeftParen);
1036 assert_eq!(tok.next_token().unwrap(), Token::Symbol("y".to_string()));
1037 assert_eq!(tok.next_token().unwrap(), Token::Integer(2));
1038 }
1039
1040 #[test]
1041 fn test_tokenize_strings() {
1042 let mut tok = Tokenizer::new(r#""hello world""#);
1043 assert_eq!(
1044 tok.next_token().unwrap(),
1045 Token::String("hello world".to_string())
1046 );
1047
1048 let mut tok = Tokenizer::new(r#""with\nnewline""#);
1049 assert_eq!(
1050 tok.next_token().unwrap(),
1051 Token::String("with\nnewline".to_string())
1052 );
1053 }
1054
1055 #[test]
1056 fn test_tokenize_cdata() {
1057 let mut tok = Tokenizer::new(r#"<![CDATA[<!DOCTYPE HTML>]]>"#);
1059 assert_eq!(
1060 tok.next_token().unwrap(),
1061 Token::String("<!DOCTYPE HTML>".to_string())
1062 );
1063
1064 let mut tok = Tokenizer::new("<![CDATA[\nLine 1\nLine 2\n]]>");
1066 assert_eq!(
1067 tok.next_token().unwrap(),
1068 Token::String("\nLine 1\nLine 2\n".to_string())
1069 );
1070
1071 let mut tok = Tokenizer::new("(define x <![CDATA[test]]>)");
1073 assert_eq!(tok.next_token().unwrap(), Token::LeftParen);
1074 assert_eq!(tok.next_token().unwrap(), Token::Symbol("define".to_string()));
1075 assert_eq!(tok.next_token().unwrap(), Token::Symbol("x".to_string()));
1076 assert_eq!(tok.next_token().unwrap(), Token::String("test".to_string()));
1077 assert_eq!(tok.next_token().unwrap(), Token::RightParen);
1078 }
1079
1080 #[test]
1081 fn test_tokenize_comments() {
1082 let mut tok = Tokenizer::new("(+ 1 ; comment\n 2)");
1083 assert_eq!(tok.next_token().unwrap(), Token::LeftParen);
1084 assert_eq!(tok.next_token().unwrap(), Token::Symbol("+".to_string()));
1085 assert_eq!(tok.next_token().unwrap(), Token::Integer(1));
1086 assert_eq!(tok.next_token().unwrap(), Token::Integer(2));
1088 assert_eq!(tok.next_token().unwrap(), Token::RightParen);
1089 }
1090
1091 #[test]
1092 fn test_tokenize_block_comments() {
1093 let mut tok = Tokenizer::new("(+ 1 #| block comment |# 2)");
1094 assert_eq!(tok.next_token().unwrap(), Token::LeftParen);
1095 assert_eq!(tok.next_token().unwrap(), Token::Symbol("+".to_string()));
1096 assert_eq!(tok.next_token().unwrap(), Token::Integer(1));
1097 assert_eq!(tok.next_token().unwrap(), Token::Integer(2));
1099 assert_eq!(tok.next_token().unwrap(), Token::RightParen);
1100 }
1101
1102 #[test]
1103 fn test_tokenize_booleans() {
1104 let mut tok = Tokenizer::new("#t #f");
1105 assert_eq!(tok.next_token().unwrap(), Token::Bool(true));
1106 assert_eq!(tok.next_token().unwrap(), Token::Bool(false));
1107 }
1108
1109 #[test]
1110 fn test_tokenize_characters() {
1111 let mut tok = Tokenizer::new(r#"#\a #\space #\newline"#);
1112 assert_eq!(tok.next_token().unwrap(), Token::Char('a'));
1113 assert_eq!(tok.next_token().unwrap(), Token::Char(' '));
1114 assert_eq!(tok.next_token().unwrap(), Token::Char('\n'));
1115 }
1116
1117 #[test]
1118 fn test_tokenize_hex_numbers() {
1119 let mut tok = Tokenizer::new("#xff");
1121 assert_eq!(tok.next_token().unwrap(), Token::Integer(255));
1122
1123 let mut tok = Tokenizer::new("#X10");
1125 assert_eq!(tok.next_token().unwrap(), Token::Integer(16));
1126
1127 let mut tok = Tokenizer::new("#xDEADBEEF");
1129 assert_eq!(tok.next_token().unwrap(), Token::Integer(0xDEADBEEF));
1130
1131 let mut tok = Tokenizer::new("#x0");
1133 assert_eq!(tok.next_token().unwrap(), Token::Integer(0));
1134 }
1135
1136 #[test]
1137 fn test_tokenize_octal_numbers() {
1138 let mut tok = Tokenizer::new("#o77");
1140 assert_eq!(tok.next_token().unwrap(), Token::Integer(63));
1141
1142 let mut tok = Tokenizer::new("#O10");
1144 assert_eq!(tok.next_token().unwrap(), Token::Integer(8));
1145
1146 let mut tok = Tokenizer::new("#o0");
1148 assert_eq!(tok.next_token().unwrap(), Token::Integer(0));
1149
1150 let mut tok = Tokenizer::new("#o777");
1152 assert_eq!(tok.next_token().unwrap(), Token::Integer(511));
1153 }
1154
1155 #[test]
1156 fn test_tokenize_binary_numbers() {
1157 let mut tok = Tokenizer::new("#b1010");
1159 assert_eq!(tok.next_token().unwrap(), Token::Integer(10));
1160
1161 let mut tok = Tokenizer::new("#B1111");
1163 assert_eq!(tok.next_token().unwrap(), Token::Integer(15));
1164
1165 let mut tok = Tokenizer::new("#b0");
1167 assert_eq!(tok.next_token().unwrap(), Token::Integer(0));
1168
1169 let mut tok = Tokenizer::new("#b11111111");
1171 assert_eq!(tok.next_token().unwrap(), Token::Integer(255));
1172 }
1173
1174 #[test]
1175 fn test_tokenize_quote() {
1176 let mut tok = Tokenizer::new("'(1 2)");
1177 assert_eq!(tok.next_token().unwrap(), Token::Quote);
1178 assert_eq!(tok.next_token().unwrap(), Token::LeftParen);
1179 assert_eq!(tok.next_token().unwrap(), Token::Integer(1));
1180 assert_eq!(tok.next_token().unwrap(), Token::Integer(2));
1181 assert_eq!(tok.next_token().unwrap(), Token::RightParen);
1182 }
1183
1184 #[test]
1185 fn test_error_positions() {
1186 let mut tok = Tokenizer::new("(+ 1\n \"unclosed string");
1187 tok.next_token().unwrap(); tok.next_token().unwrap(); tok.next_token().unwrap(); let err = tok.next_token().unwrap_err();
1192 assert_eq!(err.position.line, 2); assert!(err.message.contains("EOF in string"));
1194 }
1195
1196 #[test]
1201 fn test_parse_integer() {
1202 let mut parser = Parser::new("42");
1203 let val = parser.parse().unwrap();
1204 assert!(val.is_integer());
1205 if let Value::Integer(n) = val {
1206 assert_eq!(n, 42);
1207 }
1208 }
1209
1210 #[test]
1211 fn test_parse_simple_list() {
1212 let mut parser = Parser::new("(+ 1 2)");
1213 let val = parser.parse().unwrap();
1214 assert!(val.is_list());
1215
1216 if let Value::Pair(ref p) = val {
1218 let pair = p.borrow();
1219 assert!(pair.car.is_symbol());
1220 }
1221 }
1222
1223 #[test]
1224 fn test_parse_nested_list() {
1225 let mut parser = Parser::new("(+ (* 2 3) 4)");
1226 let val = parser.parse().unwrap();
1227 assert!(val.is_list());
1228 }
1229
1230 #[test]
1231 fn test_parse_quoted() {
1232 let mut parser = Parser::new("'(1 2 3)");
1233 let val = parser.parse().unwrap();
1234
1235 if let Value::Pair(ref p) = val {
1237 let pair = p.borrow();
1238 if let Value::Symbol(s) = &pair.car {
1239 assert_eq!(&**s, "quote");
1240 } else {
1241 panic!("Expected symbol 'quote'");
1242 }
1243 } else {
1244 panic!("Expected pair");
1245 }
1246 }
1247
1248 #[test]
1249 fn test_parse_vector() {
1250 let mut parser = Parser::new("#(1 2 3)");
1251 let val = parser.parse().unwrap();
1252 assert!(val.is_vector());
1253
1254 if let Value::Vector(ref v) = val {
1255 let vec = v.borrow();
1256 assert_eq!(vec.len(), 3);
1257 }
1258 }
1259
1260 #[test]
1261 fn test_parse_dotted_list() {
1262 let mut parser = Parser::new("(1 . 2)");
1263 let val = parser.parse().unwrap();
1264
1265 if let Value::Pair(ref p) = val {
1266 let pair = p.borrow();
1267 assert!(matches!(pair.car, Value::Integer(1)));
1268 assert!(matches!(pair.cdr, Value::Integer(2)));
1269 } else {
1270 panic!("Expected pair");
1271 }
1272 }
1273
1274 #[test]
1275 fn test_parse_string() {
1276 let mut parser = Parser::new(r#""hello world""#);
1277 let val = parser.parse().unwrap();
1278 assert!(val.is_string());
1279 }
1280
1281 #[test]
1282 fn test_parse_bool() {
1283 let mut parser = Parser::new("#t");
1284 let val = parser.parse().unwrap();
1285 assert!(val.is_bool());
1286 assert!(val.is_true());
1287
1288 let mut parser = Parser::new("#f");
1289 let val = parser.parse().unwrap();
1290 assert!(val.is_bool());
1291 assert!(!val.is_true());
1292 }
1293
1294 #[test]
1295 fn test_parse_multiline_let() {
1296 let input = r#"
1298 (let ((x 1)
1299 (y 2))
1300 (+ x y))
1301 "#;
1302
1303 let mut parser = Parser::new(input);
1304 let val = parser.parse().unwrap();
1305 assert!(val.is_list());
1306
1307 }
1310
1311 #[test]
1312 fn test_parse_all() {
1313 let input = "(define x 1) (define y 2) (+ x y)";
1314 let mut parser = Parser::new(input);
1315 let exprs = parser.parse_all().unwrap();
1316 assert_eq!(exprs.len(), 3);
1317 }
1318
1319 #[test]
1320 fn test_parse_empty_list() {
1321 let mut parser = Parser::new("()");
1322 let val = parser.parse().unwrap();
1323 assert!(val.is_nil());
1324 }
1325
1326 #[test]
1327 fn test_parse_keyword() {
1328 let mut parser = Parser::new("#:foo");
1329 let val = parser.parse().unwrap();
1330 if let Value::Keyword(ref k) = val {
1331 assert_eq!(&**k, "foo");
1332 } else {
1333 panic!("Expected keyword");
1334 }
1335 }
1336}
1337