1#![allow(clippy::result_large_err)]
4
5use std::sync::Arc;
6
7use miette::NamedSource;
8
9use cljrs_types::error::{CljxError, CljxResult};
10use cljrs_types::span::Span;
11
12use crate::form::{Form, FormKind};
13use crate::lexer::Lexer;
14use crate::token::Token;
15
16pub struct Parser {
19 lexer: Lexer,
20 peeked: Option<(Token, Span)>,
21}
22
23impl Parser {
24 pub fn new(source: String, file: String) -> Self {
25 Self {
26 lexer: Lexer::new(source, file),
27 peeked: None,
28 }
29 }
30
31 pub fn parse_one(&mut self) -> CljxResult<Option<Form>> {
35 loop {
36 if matches!(self.peek_tok()?, Token::Eof) {
37 return Ok(None);
38 }
39 if let Some(form) = self.parse_raw()? {
40 return Ok(Some(form)); }
42 }
43 }
44
45 pub fn parse_all(&mut self) -> CljxResult<Vec<Form>> {
47 let mut forms = Vec::new();
48 while let Some(form) = self.parse_one()? {
49 forms.push(form);
50 }
51 Ok(forms)
52 }
53
54 fn fill(&mut self) -> CljxResult<()> {
58 if self.peeked.is_none() {
59 let pair = self.lexer.next_token()?;
60 self.peeked = Some(pair);
61 }
62 Ok(())
63 }
64
65 fn bump(&mut self) -> CljxResult<(Token, Span)> {
67 self.fill()?;
68 Ok(self.peeked.take().unwrap())
69 }
70
71 fn peek_tok(&mut self) -> CljxResult<Token> {
73 self.fill()?;
74 Ok(self.peeked.as_ref().unwrap().0.clone())
75 }
76
77 fn peek_span(&mut self) -> CljxResult<Span> {
79 self.fill()?;
80 Ok(self.peeked.as_ref().unwrap().1.clone())
81 }
82
83 fn make_error(&self, msg: impl Into<String>, span: Span) -> CljxError {
86 CljxError::ReadError {
87 message: msg.into(),
88 span: Some(miette::SourceSpan::from(span)),
89 src: NamedSource::new(
90 (**self.lexer.file()).clone(),
91 (**self.lexer.source()).clone(),
92 ),
93 }
94 }
95
96 fn merged_span(&self, start: &Span, end: &Span) -> Span {
99 Span::new(
100 Arc::clone(&start.file),
101 start.start,
102 end.end,
103 start.line,
104 start.col,
105 )
106 }
107
108 fn parse_raw(&mut self) -> CljxResult<Option<Form>> {
113 let tok = self.peek_tok()?;
114 let span = self.peek_span()?;
115
116 match tok {
117 Token::Eof => Ok(None),
118
119 Token::RParen | Token::RBracket | Token::RBrace => {
121 self.bump()?;
122 Err(self.make_error("unexpected closing delimiter", span))
123 }
124
125 Token::Nil => {
127 self.bump()?;
128 Ok(Some(Form::new(FormKind::Nil, span)))
129 }
130 Token::Bool(b) => {
131 self.bump()?;
132 Ok(Some(Form::new(FormKind::Bool(b), span)))
133 }
134 Token::Int(n) => {
135 self.bump()?;
136 Ok(Some(Form::new(FormKind::Int(n), span)))
137 }
138 Token::BigInt(s) => {
139 self.bump()?;
140 Ok(Some(Form::new(FormKind::BigInt(s), span)))
141 }
142 Token::Float(f) => {
143 self.bump()?;
144 Ok(Some(Form::new(FormKind::Float(f), span)))
145 }
146 Token::BigDecimal(s) => {
147 self.bump()?;
148 Ok(Some(Form::new(FormKind::BigDecimal(s), span)))
149 }
150 Token::Ratio(s) => {
151 self.bump()?;
152 Ok(Some(Form::new(FormKind::Ratio(s), span)))
153 }
154 Token::Char(c) => {
155 self.bump()?;
156 Ok(Some(Form::new(FormKind::Char(c), span)))
157 }
158 Token::Str(s) => {
159 self.bump()?;
160 Ok(Some(Form::new(FormKind::Str(s), span)))
161 }
162 Token::Regex(s) => {
163 self.bump()?;
164 Ok(Some(Form::new(FormKind::Regex(s), span)))
165 }
166 Token::Symbolic(s) => {
167 self.bump()?;
168 let val = match s.as_str() {
169 "Inf" => f64::INFINITY,
170 "-Inf" => f64::NEG_INFINITY,
171 "NaN" => f64::NAN,
172 _ => unreachable!("lexer guarantees only Inf/-Inf/NaN"),
173 };
174 Ok(Some(Form::new(FormKind::Symbolic(val), span)))
175 }
176
177 Token::Symbol(s) => {
179 self.bump()?;
180 Ok(Some(Form::new(FormKind::Symbol(s), span)))
181 }
182 Token::Keyword(s) => {
183 self.bump()?;
184 Ok(Some(Form::new(FormKind::Keyword(s), span)))
185 }
186 Token::AutoKeyword(s) => {
187 self.bump()?;
188 Ok(Some(Form::new(FormKind::AutoKeyword(s), span)))
189 }
190
191 Token::LParen => {
193 self.bump()?;
194 let (forms, close) = self.parse_seq_forms(Token::RParen, span.clone(), "list")?;
195 Ok(Some(Form::new(
196 FormKind::List(forms),
197 self.merged_span(&span, &close),
198 )))
199 }
200 Token::LBracket => {
201 self.bump()?;
202 let (forms, close) =
203 self.parse_seq_forms(Token::RBracket, span.clone(), "vector")?;
204 Ok(Some(Form::new(
205 FormKind::Vector(forms),
206 self.merged_span(&span, &close),
207 )))
208 }
209 Token::LBrace => {
210 self.bump()?;
211 let (forms, close) = self.parse_seq_forms(Token::RBrace, span.clone(), "map")?;
212 if forms.len() % 2 != 0 {
213 return Err(
214 self.make_error("map literal must have an even number of forms", span)
215 );
216 }
217 Ok(Some(Form::new(
218 FormKind::Map(forms),
219 self.merged_span(&span, &close),
220 )))
221 }
222 Token::HashSet => {
223 self.bump()?;
224 let (forms, close) = self.parse_seq_forms(Token::RBrace, span.clone(), "set")?;
225 Ok(Some(Form::new(
226 FormKind::Set(forms),
227 self.merged_span(&span, &close),
228 )))
229 }
230 Token::HashFn => {
231 self.bump()?;
232 let (forms, close) =
233 self.parse_seq_forms(Token::RParen, span.clone(), "anonymous function")?;
234 Ok(Some(Form::new(
235 FormKind::AnonFn(forms),
236 self.merged_span(&span, &close),
237 )))
238 }
239
240 Token::Quote => {
242 self.bump()?;
243 let inner = self.require_form(span.clone(), "quoted form")?;
244 let end = inner.span.clone();
245 Ok(Some(Form::new(
246 FormKind::Quote(Box::new(inner)),
247 self.merged_span(&span, &end),
248 )))
249 }
250 Token::SyntaxQuote => {
251 self.bump()?;
252 let inner = self.require_form(span.clone(), "syntax-quoted form")?;
253 let end = inner.span.clone();
254 Ok(Some(Form::new(
255 FormKind::SyntaxQuote(Box::new(inner)),
256 self.merged_span(&span, &end),
257 )))
258 }
259 Token::Unquote => {
260 self.bump()?;
261 let inner = self.require_form(span.clone(), "unquoted form")?;
262 let end = inner.span.clone();
263 Ok(Some(Form::new(
264 FormKind::Unquote(Box::new(inner)),
265 self.merged_span(&span, &end),
266 )))
267 }
268 Token::UnquoteSplice => {
269 self.bump()?;
270 let inner = self.require_form(span.clone(), "unquote-spliced form")?;
271 let end = inner.span.clone();
272 Ok(Some(Form::new(
273 FormKind::UnquoteSplice(Box::new(inner)),
274 self.merged_span(&span, &end),
275 )))
276 }
277 Token::Deref => {
278 self.bump()?;
279 let inner = self.require_form(span.clone(), "deref form")?;
280 let end = inner.span.clone();
281 Ok(Some(Form::new(
282 FormKind::Deref(Box::new(inner)),
283 self.merged_span(&span, &end),
284 )))
285 }
286 Token::HashVar => {
287 self.bump()?;
288 let inner = self.require_form(span.clone(), "var form")?;
289 let end = inner.span.clone();
290 Ok(Some(Form::new(
291 FormKind::Var(Box::new(inner)),
292 self.merged_span(&span, &end),
293 )))
294 }
295 Token::Meta => {
296 self.bump()?;
297 let meta = self.require_form(span.clone(), "meta form")?;
298 let target = self.require_form(span.clone(), "annotated form")?;
299 let end = target.span.clone();
300 Ok(Some(Form::new(
301 FormKind::Meta(Box::new(meta), Box::new(target)),
302 self.merged_span(&span, &end),
303 )))
304 }
305
306 Token::HashDiscard => {
308 self.bump()?;
309 if matches!(self.peek_tok()?, Token::Eof) {
310 return Err(self.make_error("unexpected end of file after #_", span));
311 }
312 self.parse_raw()?; Ok(None)
314 }
315
316 Token::ReaderCond => {
318 self.bump()?;
319 let form = self.parse_reader_cond(false, span)?;
320 Ok(Some(form))
321 }
322 Token::ReaderCondSplice => {
323 self.bump()?;
324 let form = self.parse_reader_cond(true, span)?;
325 Ok(Some(form))
326 }
327
328 Token::TaggedLiteral(tag) => {
330 self.bump()?;
331 let inner = self.require_form(span.clone(), "tagged literal value")?;
332 let end = inner.span.clone();
333 Ok(Some(Form::new(
334 FormKind::TaggedLiteral(tag, Box::new(inner)),
335 self.merged_span(&span, &end),
336 )))
337 }
338 }
339 }
340
341 fn parse_seq_forms(
345 &mut self,
346 closing: Token,
347 open_span: Span,
348 name: &str,
349 ) -> CljxResult<(Vec<Form>, Span)> {
350 let mut forms = Vec::new();
351 loop {
352 let tok = self.peek_tok()?;
353 if tok == Token::Eof {
354 return Err(self.make_error(format!("unclosed {name}"), open_span));
355 }
356 if tok == closing {
357 let (_, close_span) = self.bump()?;
358 return Ok((forms, close_span));
359 }
360 if let Some(form) = self.parse_raw()? {
361 forms.push(form); }
363 }
364 }
365
366 fn require_form(&mut self, macro_span: Span, what: &str) -> CljxResult<Form> {
369 loop {
370 if matches!(self.peek_tok()?, Token::Eof) {
371 return Err(self.make_error(
372 format!("unexpected end of file; expected {what}"),
373 macro_span,
374 ));
375 }
376 if let Some(form) = self.parse_raw()? {
377 return Ok(form); }
379 }
380 }
381
382 fn parse_reader_cond(&mut self, splicing: bool, start: Span) -> CljxResult<Form> {
385 let next = self.peek_tok()?;
386 if next != Token::LParen {
387 let span = self.peek_span()?;
388 return Err(self.make_error(
389 "reader conditional requires `(` immediately after `#?`",
390 span,
391 ));
392 }
393 let (_, open_span) = self.bump()?; let (clauses, close_span) =
395 self.parse_seq_forms(Token::RParen, open_span.clone(), "reader conditional")?;
396 if clauses.len() % 2 != 0 {
397 return Err(self.make_error(
398 "reader conditional must have an even number of clauses",
399 open_span,
400 ));
401 }
402 Ok(Form::new(
403 FormKind::ReaderCond { splicing, clauses },
404 self.merged_span(&start, &close_span),
405 ))
406 }
407}
408
409impl Iterator for Parser {
412 type Item = CljxResult<Form>;
413
414 fn next(&mut self) -> Option<Self::Item> {
415 match self.parse_one() {
416 Ok(Some(form)) => Some(Ok(form)),
417 Ok(None) => None,
418 Err(e) => Some(Err(e)),
419 }
420 }
421}
422
423#[cfg(test)]
426mod tests {
427 use std::sync::Arc;
428
429 use cljrs_types::{error::CljxError, span::Span};
430
431 use super::*;
432
433 fn dummy_span() -> Span {
436 Span::new(Arc::new("<test>".to_string()), 0, 0, 1, 1)
437 }
438
439 fn f(kind: FormKind) -> Form {
441 Form::new(kind, dummy_span())
442 }
443
444 fn parse_all(src: &str) -> Vec<Form> {
445 Parser::new(src.to_string(), "<test>".to_string())
446 .parse_all()
447 .unwrap_or_else(|e| panic!("parse error: {e}"))
448 }
449
450 fn parse1(src: &str) -> Form {
451 Parser::new(src.to_string(), "<test>".to_string())
452 .parse_one()
453 .unwrap_or_else(|e| panic!("parse error: {e}"))
454 .expect("expected a form but got EOF")
455 }
456
457 fn parse_err(src: &str) -> String {
458 let mut p = Parser::new(src.to_string(), "<test>".to_string());
459 match p.parse_all() {
460 Err(CljxError::ReadError { message, .. }) => message,
461 Err(e) => panic!("unexpected error type: {e:?}"),
462 Ok(forms) => panic!("expected a parse error but got: {forms:?}"),
463 }
464 }
465
466 #[test]
469 fn test_nil() {
470 assert_eq!(parse1("nil").kind, FormKind::Nil);
471 }
472
473 #[test]
474 fn test_bool() {
475 assert_eq!(parse1("true").kind, FormKind::Bool(true));
476 assert_eq!(parse1("false").kind, FormKind::Bool(false));
477 }
478
479 #[test]
480 fn test_int() {
481 assert_eq!(parse1("42").kind, FormKind::Int(42));
482 assert_eq!(parse1("-7").kind, FormKind::Int(-7));
483 }
484
485 #[test]
486 fn test_bigint() {
487 assert_eq!(parse1("42N").kind, FormKind::BigInt("42".to_string()));
488 }
489
490 #[test]
491 #[allow(clippy::approx_constant)]
492 fn test_float() {
493 assert_eq!(parse1("3.14").kind, FormKind::Float(3.14));
494 assert_eq!(parse1("1e10").kind, FormKind::Float(1e10));
495 }
496
497 #[test]
498 fn test_bigdecimal() {
499 assert_eq!(
500 parse1("3.14M").kind,
501 FormKind::BigDecimal("3.14".to_string())
502 );
503 }
504
505 #[test]
506 fn test_ratio() {
507 assert_eq!(parse1("3/4").kind, FormKind::Ratio("3/4".to_string()));
508 assert_eq!(parse1("-1/2").kind, FormKind::Ratio("-1/2".to_string()));
509 }
510
511 #[test]
512 fn test_char() {
513 assert_eq!(parse1("\\a").kind, FormKind::Char('a'));
514 assert_eq!(parse1("\\newline").kind, FormKind::Char('\n'));
515 }
516
517 #[test]
518 fn test_str() {
519 assert_eq!(parse1("\"hello\"").kind, FormKind::Str("hello".to_string()));
520 }
521
522 #[test]
523 fn test_regex() {
524 assert_eq!(
525 parse1("#\"[a-z]+\"").kind,
526 FormKind::Regex("[a-z]+".to_string())
527 );
528 }
529
530 #[test]
531 fn test_symbolic() {
532 assert!(matches!(
533 parse1("##Inf").kind,
534 FormKind::Symbolic(f) if f == f64::INFINITY
535 ));
536 assert!(matches!(
537 parse1("##-Inf").kind,
538 FormKind::Symbolic(f) if f == f64::NEG_INFINITY
539 ));
540 assert!(matches!(
542 parse1("##NaN").kind,
543 FormKind::Symbolic(f) if f.is_nan()
544 ));
545 }
546
547 #[test]
548 fn test_symbol() {
549 assert_eq!(parse1("foo").kind, FormKind::Symbol("foo".to_string()));
550 }
551
552 #[test]
553 fn test_keyword() {
554 assert_eq!(parse1(":foo").kind, FormKind::Keyword("foo".to_string()));
555 }
556
557 #[test]
558 fn test_auto_keyword() {
559 assert_eq!(
560 parse1("::foo").kind,
561 FormKind::AutoKeyword("foo".to_string())
562 );
563 }
564
565 #[test]
568 fn test_empty_list() {
569 assert_eq!(parse1("()").kind, FormKind::List(vec![]));
570 }
571
572 #[test]
573 fn test_list() {
574 assert_eq!(
575 parse1("(1 2 3)").kind,
576 FormKind::List(vec![
577 f(FormKind::Int(1)),
578 f(FormKind::Int(2)),
579 f(FormKind::Int(3)),
580 ])
581 );
582 }
583
584 #[test]
585 fn test_vector() {
586 assert_eq!(
587 parse1("[1 2]").kind,
588 FormKind::Vector(vec![f(FormKind::Int(1)), f(FormKind::Int(2))])
589 );
590 }
591
592 #[test]
593 fn test_map() {
594 assert_eq!(
595 parse1("{:a 1}").kind,
596 FormKind::Map(vec![
597 f(FormKind::Keyword("a".to_string())),
598 f(FormKind::Int(1)),
599 ])
600 );
601 }
602
603 #[test]
604 fn test_set() {
605 assert_eq!(
606 parse1("#{1 2}").kind,
607 FormKind::Set(vec![f(FormKind::Int(1)), f(FormKind::Int(2))])
608 );
609 }
610
611 #[test]
614 fn test_nested() {
615 assert_eq!(
616 parse1("(+ [1 2] {:a 3})").kind,
617 FormKind::List(vec![
618 f(FormKind::Symbol("+".to_string())),
619 f(FormKind::Vector(vec![
620 f(FormKind::Int(1)),
621 f(FormKind::Int(2)),
622 ])),
623 f(FormKind::Map(vec![
624 f(FormKind::Keyword("a".to_string())),
625 f(FormKind::Int(3)),
626 ])),
627 ])
628 );
629 }
630
631 #[test]
634 fn test_quote() {
635 assert_eq!(
636 parse1("'foo").kind,
637 FormKind::Quote(Box::new(f(FormKind::Symbol("foo".to_string()))))
638 );
639 }
640
641 #[test]
642 fn test_syntax_quote() {
643 assert_eq!(
644 parse1("`foo").kind,
645 FormKind::SyntaxQuote(Box::new(f(FormKind::Symbol("foo".to_string()))))
646 );
647 }
648
649 #[test]
650 fn test_unquote() {
651 assert_eq!(
652 parse1("~foo").kind,
653 FormKind::Unquote(Box::new(f(FormKind::Symbol("foo".to_string()))))
654 );
655 }
656
657 #[test]
658 fn test_unquote_splice() {
659 assert_eq!(
660 parse1("~@foo").kind,
661 FormKind::UnquoteSplice(Box::new(f(FormKind::Symbol("foo".to_string()))))
662 );
663 }
664
665 #[test]
666 fn test_deref() {
667 assert_eq!(
668 parse1("@foo").kind,
669 FormKind::Deref(Box::new(f(FormKind::Symbol("foo".to_string()))))
670 );
671 }
672
673 #[test]
674 fn test_var() {
675 assert_eq!(
676 parse1("#'foo").kind,
677 FormKind::Var(Box::new(f(FormKind::Symbol("foo".to_string()))))
678 );
679 }
680
681 #[test]
684 fn test_meta_map() {
685 assert_eq!(
686 parse1("^{:a 1} foo").kind,
687 FormKind::Meta(
688 Box::new(f(FormKind::Map(vec![
689 f(FormKind::Keyword("a".to_string())),
690 f(FormKind::Int(1)),
691 ]))),
692 Box::new(f(FormKind::Symbol("foo".to_string()))),
693 )
694 );
695 }
696
697 #[test]
698 fn test_meta_keyword() {
699 assert_eq!(
700 parse1("^:kw foo").kind,
701 FormKind::Meta(
702 Box::new(f(FormKind::Keyword("kw".to_string()))),
703 Box::new(f(FormKind::Symbol("foo".to_string()))),
704 )
705 );
706 }
707
708 #[test]
709 fn test_meta_symbol() {
710 assert_eq!(
711 parse1("^Sym foo").kind,
712 FormKind::Meta(
713 Box::new(f(FormKind::Symbol("Sym".to_string()))),
714 Box::new(f(FormKind::Symbol("foo".to_string()))),
715 )
716 );
717 }
718
719 #[test]
722 fn test_anon_fn() {
723 assert_eq!(
724 parse1("#(+ % 1)").kind,
725 FormKind::AnonFn(vec![
726 f(FormKind::Symbol("+".to_string())),
727 f(FormKind::Symbol("%".to_string())),
728 f(FormKind::Int(1)),
729 ])
730 );
731 }
732
733 #[test]
736 fn test_discard_simple() {
737 let forms = parse_all("#_foo bar");
738 assert_eq!(forms.len(), 1);
739 assert_eq!(forms[0].kind, FormKind::Symbol("bar".to_string()));
740 }
741
742 #[test]
743 fn test_discard_in_vector() {
744 assert_eq!(
745 parse1("[1 #_2 3]").kind,
746 FormKind::Vector(vec![f(FormKind::Int(1)), f(FormKind::Int(3))])
747 );
748 }
749
750 #[test]
751 fn test_discard_chained() {
752 let forms = parse_all("#_ #_ 1 2 3");
754 assert_eq!(forms.len(), 2);
755 assert_eq!(forms[0].kind, FormKind::Int(2));
756 assert_eq!(forms[1].kind, FormKind::Int(3));
757 }
758
759 #[test]
762 fn test_reader_cond() {
763 assert_eq!(
764 parse1("#?(:rust 1 :clj 2)").kind,
765 FormKind::ReaderCond {
766 splicing: false,
767 clauses: vec![
768 f(FormKind::Keyword("rust".to_string())),
769 f(FormKind::Int(1)),
770 f(FormKind::Keyword("clj".to_string())),
771 f(FormKind::Int(2)),
772 ],
773 }
774 );
775 }
776
777 #[test]
778 fn test_reader_cond_splice() {
779 assert_eq!(
780 parse1("#?@(:rust [1 2])").kind,
781 FormKind::ReaderCond {
782 splicing: true,
783 clauses: vec![
784 f(FormKind::Keyword("rust".to_string())),
785 f(FormKind::Vector(vec![
786 f(FormKind::Int(1)),
787 f(FormKind::Int(2)),
788 ])),
789 ],
790 }
791 );
792 }
793
794 #[test]
797 fn test_tagged_literal() {
798 assert_eq!(
799 parse1("#inst \"2024-01-01\"").kind,
800 FormKind::TaggedLiteral(
801 "inst".to_string(),
802 Box::new(f(FormKind::Str("2024-01-01".to_string()))),
803 )
804 );
805 }
806
807 #[test]
810 fn test_span_col_offset() {
811 let form = parse1(" 42");
812 assert_eq!(form.span.start, 2);
813 assert_eq!(form.span.col, 3);
814 }
815
816 #[test]
817 fn test_span_multiline() {
818 let forms = parse_all("a\nb");
819 assert_eq!(forms[0].span.line, 1);
820 assert_eq!(forms[1].span.line, 2);
821 }
822
823 #[test]
826 fn test_parse_all_multiple() {
827 let forms = parse_all("1 2 3");
828 assert_eq!(forms.len(), 3);
829 assert_eq!(forms[0].kind, FormKind::Int(1));
830 assert_eq!(forms[1].kind, FormKind::Int(2));
831 assert_eq!(forms[2].kind, FormKind::Int(3));
832 }
833
834 #[test]
837 fn test_err_unclosed_list() {
838 let msg = parse_err("(1 2");
839 assert!(msg.contains("unclosed") || msg.contains("list"), "{msg}");
840 }
841
842 #[test]
843 fn test_err_unexpected_close() {
844 let msg = parse_err(")");
845 assert!(msg.contains("unexpected"), "{msg}");
846 }
847
848 #[test]
849 fn test_err_odd_map() {
850 let msg = parse_err("{:a}");
851 assert!(msg.contains("even") || msg.contains("map"), "{msg}");
852 }
853
854 #[test]
855 fn test_err_reader_cond_non_list() {
856 let msg = parse_err("#?[1 2]");
858 assert!(
859 msg.contains('(') || msg.contains("reader conditional"),
860 "{msg}"
861 );
862 }
863
864 #[test]
865 fn test_err_odd_reader_cond_clauses() {
866 let msg = parse_err("#?(:cljx)");
867 assert!(
868 msg.contains("even") || msg.contains("reader conditional"),
869 "{msg}"
870 );
871 }
872}