1#![allow(clippy::result_large_err)]
4
5use std::sync::Arc;
6
7use miette::NamedSource;
8
9use cljrs_types::error::{CljxError, CljxResult};
10use cljrs_types::span::Span;
11
12use crate::form::{Form, FormKind};
13use crate::lexer::Lexer;
14use crate::token::Token;
15
16pub struct Parser {
19 lexer: Lexer,
20 peeked: Option<(Token, Span)>,
21}
22
23impl Parser {
24 pub fn new(source: String, file: String) -> Self {
25 Self {
26 lexer: Lexer::new(source, file),
27 peeked: None,
28 }
29 }
30
31 pub fn parse_one(&mut self) -> CljxResult<Option<Form>> {
35 loop {
36 if matches!(self.peek_tok()?, Token::Eof) {
37 return Ok(None);
38 }
39 if let Some(form) = self.parse_raw()? {
40 return Ok(Some(form)); }
42 }
43 }
44
45 pub fn parse_all(&mut self) -> CljxResult<Vec<Form>> {
47 let mut forms = Vec::new();
48 while let Some(form) = self.parse_one()? {
49 forms.push(form);
50 }
51 Ok(forms)
52 }
53
54 fn fill(&mut self) -> CljxResult<()> {
58 if self.peeked.is_none() {
59 let pair = self.lexer.next_token()?;
60 self.peeked = Some(pair);
61 }
62 Ok(())
63 }
64
65 fn bump(&mut self) -> CljxResult<(Token, Span)> {
67 self.fill()?;
68 Ok(self.peeked.take().unwrap())
69 }
70
71 fn peek_tok(&mut self) -> CljxResult<Token> {
73 self.fill()?;
74 Ok(self.peeked.as_ref().unwrap().0.clone())
75 }
76
77 fn peek_span(&mut self) -> CljxResult<Span> {
79 self.fill()?;
80 Ok(self.peeked.as_ref().unwrap().1.clone())
81 }
82
83 fn make_error(&self, msg: impl Into<String>, span: Span) -> CljxError {
86 CljxError::ReadError {
87 message: msg.into(),
88 span: Some(miette::SourceSpan::from(span)),
89 src: NamedSource::new(
90 (**self.lexer.file()).clone(),
91 (**self.lexer.source()).clone(),
92 ),
93 }
94 }
95
96 fn merged_span(&self, start: &Span, end: &Span) -> Span {
99 Span::new(
100 Arc::clone(&start.file),
101 start.start,
102 end.end,
103 start.line,
104 start.col,
105 )
106 }
107
108 fn parse_raw(&mut self) -> CljxResult<Option<Form>> {
113 let tok = self.peek_tok()?;
114 let span = self.peek_span()?;
115
116 match tok {
117 Token::Eof => Ok(None),
118
119 Token::RParen | Token::RBracket | Token::RBrace => {
121 self.bump()?;
122 Err(self.make_error("unexpected closing delimiter", span))
123 }
124
125 Token::Nil => {
127 self.bump()?;
128 Ok(Some(Form::new(FormKind::Nil, span)))
129 }
130 Token::Bool(b) => {
131 self.bump()?;
132 Ok(Some(Form::new(FormKind::Bool(b), span)))
133 }
134 Token::Int(n) => {
135 self.bump()?;
136 Ok(Some(Form::new(FormKind::Int(n), span)))
137 }
138 Token::BigInt(s) => {
139 self.bump()?;
140 Ok(Some(Form::new(FormKind::BigInt(s), span)))
141 }
142 Token::Float(f) => {
143 self.bump()?;
144 Ok(Some(Form::new(FormKind::Float(f), span)))
145 }
146 Token::BigDecimal(s) => {
147 self.bump()?;
148 Ok(Some(Form::new(FormKind::BigDecimal(s), span)))
149 }
150 Token::Ratio(s) => {
151 self.bump()?;
152 Ok(Some(Form::new(FormKind::Ratio(s), span)))
153 }
154 Token::Char(c) => {
155 self.bump()?;
156 Ok(Some(Form::new(FormKind::Char(c), span)))
157 }
158 Token::Str(s) => {
159 self.bump()?;
160 Ok(Some(Form::new(FormKind::Str(s), span)))
161 }
162 Token::Regex(s) => {
163 self.bump()?;
164 Ok(Some(Form::new(FormKind::Regex(s), span)))
165 }
166 Token::Symbolic(s) => {
167 self.bump()?;
168 let val = match s.as_str() {
169 "Inf" => f64::INFINITY,
170 "-Inf" => f64::NEG_INFINITY,
171 "NaN" => f64::NAN,
172 _ => unreachable!("lexer guarantees only Inf/-Inf/NaN"),
173 };
174 Ok(Some(Form::new(FormKind::Symbolic(val), span)))
175 }
176
177 Token::Symbol(s) => {
179 self.bump()?;
180 Ok(Some(Form::new(FormKind::Symbol(s), span)))
181 }
182 Token::Keyword(s) => {
183 self.bump()?;
184 Ok(Some(Form::new(FormKind::Keyword(s), span)))
185 }
186 Token::AutoKeyword(s) => {
187 self.bump()?;
188 Ok(Some(Form::new(FormKind::AutoKeyword(s), span)))
189 }
190
191 Token::LParen => {
193 self.bump()?;
194 let (forms, close) = self.parse_seq_forms(Token::RParen, span.clone(), "list")?;
195 Ok(Some(Form::new(
196 FormKind::List(forms),
197 self.merged_span(&span, &close),
198 )))
199 }
200 Token::LBracket => {
201 self.bump()?;
202 let (forms, close) =
203 self.parse_seq_forms(Token::RBracket, span.clone(), "vector")?;
204 Ok(Some(Form::new(
205 FormKind::Vector(forms),
206 self.merged_span(&span, &close),
207 )))
208 }
209 Token::LBrace => {
210 self.bump()?;
211 let (forms, close) = self.parse_seq_forms(Token::RBrace, span.clone(), "map")?;
212 if forms.len() % 2 != 0 {
213 return Err(
214 self.make_error("map literal must have an even number of forms", span)
215 );
216 }
217 Ok(Some(Form::new(
218 FormKind::Map(forms),
219 self.merged_span(&span, &close),
220 )))
221 }
222 Token::HashSet => {
223 self.bump()?;
224 let (forms, close) = self.parse_seq_forms(Token::RBrace, span.clone(), "set")?;
225 Ok(Some(Form::new(
226 FormKind::Set(forms),
227 self.merged_span(&span, &close),
228 )))
229 }
230 Token::HashFn => {
231 self.bump()?;
232 let (forms, close) =
233 self.parse_seq_forms(Token::RParen, span.clone(), "anonymous function")?;
234 Ok(Some(Form::new(
235 FormKind::AnonFn(forms),
236 self.merged_span(&span, &close),
237 )))
238 }
239
240 Token::Quote => {
242 self.bump()?;
243 let inner = self.require_form(span.clone(), "quoted form")?;
244 let end = inner.span.clone();
245 Ok(Some(Form::new(
246 FormKind::Quote(Box::new(inner)),
247 self.merged_span(&span, &end),
248 )))
249 }
250 Token::SyntaxQuote => {
251 self.bump()?;
252 let inner = self.require_form(span.clone(), "syntax-quoted form")?;
253 let end = inner.span.clone();
254 Ok(Some(Form::new(
255 FormKind::SyntaxQuote(Box::new(inner)),
256 self.merged_span(&span, &end),
257 )))
258 }
259 Token::Unquote => {
260 self.bump()?;
261 let inner = self.require_form(span.clone(), "unquoted form")?;
262 let end = inner.span.clone();
263 Ok(Some(Form::new(
264 FormKind::Unquote(Box::new(inner)),
265 self.merged_span(&span, &end),
266 )))
267 }
268 Token::UnquoteSplice => {
269 self.bump()?;
270 let inner = self.require_form(span.clone(), "unquote-spliced form")?;
271 let end = inner.span.clone();
272 Ok(Some(Form::new(
273 FormKind::UnquoteSplice(Box::new(inner)),
274 self.merged_span(&span, &end),
275 )))
276 }
277 Token::Deref => {
278 self.bump()?;
279 let inner = self.require_form(span.clone(), "deref form")?;
280 let end = inner.span.clone();
281 Ok(Some(Form::new(
282 FormKind::Deref(Box::new(inner)),
283 self.merged_span(&span, &end),
284 )))
285 }
286 Token::HashVar => {
287 self.bump()?;
288 let inner = self.require_form(span.clone(), "var form")?;
289 let end = inner.span.clone();
290 Ok(Some(Form::new(
291 FormKind::Var(Box::new(inner)),
292 self.merged_span(&span, &end),
293 )))
294 }
295 Token::Meta => {
296 self.bump()?;
297 let meta = self.require_form(span.clone(), "meta form")?;
298 let target = self.require_form(span.clone(), "annotated form")?;
299 let end = target.span.clone();
300 Ok(Some(Form::new(
301 FormKind::Meta(Box::new(meta), Box::new(target)),
302 self.merged_span(&span, &end),
303 )))
304 }
305
306 Token::HashDiscard => {
308 self.bump()?;
309 if matches!(self.peek_tok()?, Token::Eof) {
310 return Err(self.make_error("unexpected end of file after #_", span));
311 }
312 self.parse_raw()?; Ok(None)
314 }
315
316 Token::ReaderCond => {
318 self.bump()?;
319 let form = self.parse_reader_cond(false, span)?;
320 Ok(Some(form))
321 }
322 Token::ReaderCondSplice => {
323 self.bump()?;
324 let form = self.parse_reader_cond(true, span)?;
325 Ok(Some(form))
326 }
327
328 Token::TaggedLiteral(tag) => {
330 self.bump()?;
331 let inner = self.require_form(span.clone(), "tagged literal value")?;
332 let end = inner.span.clone();
333 Ok(Some(Form::new(
334 FormKind::TaggedLiteral(tag, Box::new(inner)),
335 self.merged_span(&span, &end),
336 )))
337 }
338 }
339 }
340
341 fn parse_seq_forms(
345 &mut self,
346 closing: Token,
347 open_span: Span,
348 name: &str,
349 ) -> CljxResult<(Vec<Form>, Span)> {
350 let mut forms = Vec::new();
351 loop {
352 let tok = self.peek_tok()?;
353 if tok == Token::Eof {
354 return Err(self.make_error(format!("unclosed {name}"), open_span));
355 }
356 if tok == closing {
357 let (_, close_span) = self.bump()?;
358 return Ok((forms, close_span));
359 }
360 if let Some(form) = self.parse_raw()? {
361 forms.push(form); }
363 }
364 }
365
366 fn require_form(&mut self, macro_span: Span, what: &str) -> CljxResult<Form> {
369 loop {
370 if matches!(self.peek_tok()?, Token::Eof) {
371 return Err(self.make_error(
372 format!("unexpected end of file; expected {what}"),
373 macro_span,
374 ));
375 }
376 if let Some(form) = self.parse_raw()? {
377 return Ok(form); }
379 }
380 }
381
382 fn parse_reader_cond(&mut self, splicing: bool, start: Span) -> CljxResult<Form> {
385 let next = self.peek_tok()?;
386 if next != Token::LParen {
387 let span = self.peek_span()?;
388 return Err(self.make_error(
389 "reader conditional requires `(` immediately after `#?`",
390 span,
391 ));
392 }
393 let (_, open_span) = self.bump()?; let (clauses, close_span) =
395 self.parse_seq_forms(Token::RParen, open_span.clone(), "reader conditional")?;
396 if clauses.len() % 2 != 0 {
397 return Err(self.make_error(
398 "reader conditional must have an even number of clauses",
399 open_span,
400 ));
401 }
402 Ok(Form::new(
403 FormKind::ReaderCond { splicing, clauses },
404 self.merged_span(&start, &close_span),
405 ))
406 }
407}
408
409impl Iterator for Parser {
412 type Item = CljxResult<Form>;
413
414 fn next(&mut self) -> Option<Self::Item> {
415 match self.parse_one() {
416 Ok(Some(form)) => Some(Ok(form)),
417 Ok(None) => None,
418 Err(e) => Some(Err(e)),
419 }
420 }
421}
422
423#[cfg(test)]
426mod tests {
427 use std::sync::Arc;
428
429 use cljrs_types::{error::CljxError, span::Span};
430
431 use super::*;
432
433 fn dummy_span() -> Span {
436 Span::new(Arc::new("<test>".to_string()), 0, 0, 1, 1)
437 }
438
439 fn f(kind: FormKind) -> Form {
441 Form::new(kind, dummy_span())
442 }
443
444 fn parse_all(src: &str) -> Vec<Form> {
445 Parser::new(src.to_string(), "<test>".to_string())
446 .parse_all()
447 .unwrap_or_else(|e| panic!("parse error: {e}"))
448 }
449
450 fn parse1(src: &str) -> Form {
451 Parser::new(src.to_string(), "<test>".to_string())
452 .parse_one()
453 .unwrap_or_else(|e| panic!("parse error: {e}"))
454 .expect("expected a form but got EOF")
455 }
456
457 fn parse_err(src: &str) -> String {
458 let mut p = Parser::new(src.to_string(), "<test>".to_string());
459 match p.parse_all() {
460 Err(CljxError::ReadError { message, .. }) => message,
461 Err(e) => panic!("unexpected error type: {e:?}"),
462 Ok(forms) => panic!("expected a parse error but got: {forms:?}"),
463 }
464 }
465
466 #[test]
469 fn test_nil() {
470 assert_eq!(parse1("nil").kind, FormKind::Nil);
471 }
472
473 #[test]
474 fn test_bool() {
475 assert_eq!(parse1("true").kind, FormKind::Bool(true));
476 assert_eq!(parse1("false").kind, FormKind::Bool(false));
477 }
478
479 #[test]
480 fn test_int() {
481 assert_eq!(parse1("42").kind, FormKind::Int(42));
482 assert_eq!(parse1("-7").kind, FormKind::Int(-7));
483 }
484
485 #[test]
486 fn test_bigint() {
487 assert_eq!(parse1("42N").kind, FormKind::BigInt("42".to_string()));
488 }
489
490 #[test]
491 fn test_float() {
492 assert_eq!(parse1("3.14").kind, FormKind::Float(3.14));
493 assert_eq!(parse1("1e10").kind, FormKind::Float(1e10));
494 }
495
496 #[test]
497 fn test_bigdecimal() {
498 assert_eq!(
499 parse1("3.14M").kind,
500 FormKind::BigDecimal("3.14".to_string())
501 );
502 }
503
504 #[test]
505 fn test_ratio() {
506 assert_eq!(parse1("3/4").kind, FormKind::Ratio("3/4".to_string()));
507 assert_eq!(parse1("-1/2").kind, FormKind::Ratio("-1/2".to_string()));
508 }
509
510 #[test]
511 fn test_char() {
512 assert_eq!(parse1("\\a").kind, FormKind::Char('a'));
513 assert_eq!(parse1("\\newline").kind, FormKind::Char('\n'));
514 }
515
516 #[test]
517 fn test_str() {
518 assert_eq!(parse1("\"hello\"").kind, FormKind::Str("hello".to_string()));
519 }
520
521 #[test]
522 fn test_regex() {
523 assert_eq!(
524 parse1("#\"[a-z]+\"").kind,
525 FormKind::Regex("[a-z]+".to_string())
526 );
527 }
528
529 #[test]
530 fn test_symbolic() {
531 assert!(matches!(
532 parse1("##Inf").kind,
533 FormKind::Symbolic(f) if f == f64::INFINITY
534 ));
535 assert!(matches!(
536 parse1("##-Inf").kind,
537 FormKind::Symbolic(f) if f == f64::NEG_INFINITY
538 ));
539 assert!(matches!(
541 parse1("##NaN").kind,
542 FormKind::Symbolic(f) if f.is_nan()
543 ));
544 }
545
546 #[test]
547 fn test_symbol() {
548 assert_eq!(parse1("foo").kind, FormKind::Symbol("foo".to_string()));
549 }
550
551 #[test]
552 fn test_keyword() {
553 assert_eq!(parse1(":foo").kind, FormKind::Keyword("foo".to_string()));
554 }
555
556 #[test]
557 fn test_auto_keyword() {
558 assert_eq!(
559 parse1("::foo").kind,
560 FormKind::AutoKeyword("foo".to_string())
561 );
562 }
563
564 #[test]
567 fn test_empty_list() {
568 assert_eq!(parse1("()").kind, FormKind::List(vec![]));
569 }
570
571 #[test]
572 fn test_list() {
573 assert_eq!(
574 parse1("(1 2 3)").kind,
575 FormKind::List(vec![
576 f(FormKind::Int(1)),
577 f(FormKind::Int(2)),
578 f(FormKind::Int(3)),
579 ])
580 );
581 }
582
583 #[test]
584 fn test_vector() {
585 assert_eq!(
586 parse1("[1 2]").kind,
587 FormKind::Vector(vec![f(FormKind::Int(1)), f(FormKind::Int(2))])
588 );
589 }
590
591 #[test]
592 fn test_map() {
593 assert_eq!(
594 parse1("{:a 1}").kind,
595 FormKind::Map(vec![
596 f(FormKind::Keyword("a".to_string())),
597 f(FormKind::Int(1)),
598 ])
599 );
600 }
601
602 #[test]
603 fn test_set() {
604 assert_eq!(
605 parse1("#{1 2}").kind,
606 FormKind::Set(vec![f(FormKind::Int(1)), f(FormKind::Int(2))])
607 );
608 }
609
610 #[test]
613 fn test_nested() {
614 assert_eq!(
615 parse1("(+ [1 2] {:a 3})").kind,
616 FormKind::List(vec![
617 f(FormKind::Symbol("+".to_string())),
618 f(FormKind::Vector(vec![
619 f(FormKind::Int(1)),
620 f(FormKind::Int(2)),
621 ])),
622 f(FormKind::Map(vec![
623 f(FormKind::Keyword("a".to_string())),
624 f(FormKind::Int(3)),
625 ])),
626 ])
627 );
628 }
629
630 #[test]
633 fn test_quote() {
634 assert_eq!(
635 parse1("'foo").kind,
636 FormKind::Quote(Box::new(f(FormKind::Symbol("foo".to_string()))))
637 );
638 }
639
640 #[test]
641 fn test_syntax_quote() {
642 assert_eq!(
643 parse1("`foo").kind,
644 FormKind::SyntaxQuote(Box::new(f(FormKind::Symbol("foo".to_string()))))
645 );
646 }
647
648 #[test]
649 fn test_unquote() {
650 assert_eq!(
651 parse1("~foo").kind,
652 FormKind::Unquote(Box::new(f(FormKind::Symbol("foo".to_string()))))
653 );
654 }
655
656 #[test]
657 fn test_unquote_splice() {
658 assert_eq!(
659 parse1("~@foo").kind,
660 FormKind::UnquoteSplice(Box::new(f(FormKind::Symbol("foo".to_string()))))
661 );
662 }
663
664 #[test]
665 fn test_deref() {
666 assert_eq!(
667 parse1("@foo").kind,
668 FormKind::Deref(Box::new(f(FormKind::Symbol("foo".to_string()))))
669 );
670 }
671
672 #[test]
673 fn test_var() {
674 assert_eq!(
675 parse1("#'foo").kind,
676 FormKind::Var(Box::new(f(FormKind::Symbol("foo".to_string()))))
677 );
678 }
679
680 #[test]
683 fn test_meta_map() {
684 assert_eq!(
685 parse1("^{:a 1} foo").kind,
686 FormKind::Meta(
687 Box::new(f(FormKind::Map(vec![
688 f(FormKind::Keyword("a".to_string())),
689 f(FormKind::Int(1)),
690 ]))),
691 Box::new(f(FormKind::Symbol("foo".to_string()))),
692 )
693 );
694 }
695
696 #[test]
697 fn test_meta_keyword() {
698 assert_eq!(
699 parse1("^:kw foo").kind,
700 FormKind::Meta(
701 Box::new(f(FormKind::Keyword("kw".to_string()))),
702 Box::new(f(FormKind::Symbol("foo".to_string()))),
703 )
704 );
705 }
706
707 #[test]
708 fn test_meta_symbol() {
709 assert_eq!(
710 parse1("^Sym foo").kind,
711 FormKind::Meta(
712 Box::new(f(FormKind::Symbol("Sym".to_string()))),
713 Box::new(f(FormKind::Symbol("foo".to_string()))),
714 )
715 );
716 }
717
718 #[test]
721 fn test_anon_fn() {
722 assert_eq!(
723 parse1("#(+ % 1)").kind,
724 FormKind::AnonFn(vec![
725 f(FormKind::Symbol("+".to_string())),
726 f(FormKind::Symbol("%".to_string())),
727 f(FormKind::Int(1)),
728 ])
729 );
730 }
731
732 #[test]
735 fn test_discard_simple() {
736 let forms = parse_all("#_foo bar");
737 assert_eq!(forms.len(), 1);
738 assert_eq!(forms[0].kind, FormKind::Symbol("bar".to_string()));
739 }
740
741 #[test]
742 fn test_discard_in_vector() {
743 assert_eq!(
744 parse1("[1 #_2 3]").kind,
745 FormKind::Vector(vec![f(FormKind::Int(1)), f(FormKind::Int(3))])
746 );
747 }
748
749 #[test]
750 fn test_discard_chained() {
751 let forms = parse_all("#_ #_ 1 2 3");
753 assert_eq!(forms.len(), 2);
754 assert_eq!(forms[0].kind, FormKind::Int(2));
755 assert_eq!(forms[1].kind, FormKind::Int(3));
756 }
757
758 #[test]
761 fn test_reader_cond() {
762 assert_eq!(
763 parse1("#?(:rust 1 :clj 2)").kind,
764 FormKind::ReaderCond {
765 splicing: false,
766 clauses: vec![
767 f(FormKind::Keyword("rust".to_string())),
768 f(FormKind::Int(1)),
769 f(FormKind::Keyword("clj".to_string())),
770 f(FormKind::Int(2)),
771 ],
772 }
773 );
774 }
775
776 #[test]
777 fn test_reader_cond_splice() {
778 assert_eq!(
779 parse1("#?@(:rust [1 2])").kind,
780 FormKind::ReaderCond {
781 splicing: true,
782 clauses: vec![
783 f(FormKind::Keyword("rust".to_string())),
784 f(FormKind::Vector(vec![
785 f(FormKind::Int(1)),
786 f(FormKind::Int(2)),
787 ])),
788 ],
789 }
790 );
791 }
792
793 #[test]
796 fn test_tagged_literal() {
797 assert_eq!(
798 parse1("#inst \"2024-01-01\"").kind,
799 FormKind::TaggedLiteral(
800 "inst".to_string(),
801 Box::new(f(FormKind::Str("2024-01-01".to_string()))),
802 )
803 );
804 }
805
806 #[test]
809 fn test_span_col_offset() {
810 let form = parse1(" 42");
811 assert_eq!(form.span.start, 2);
812 assert_eq!(form.span.col, 3);
813 }
814
815 #[test]
816 fn test_span_multiline() {
817 let forms = parse_all("a\nb");
818 assert_eq!(forms[0].span.line, 1);
819 assert_eq!(forms[1].span.line, 2);
820 }
821
822 #[test]
825 fn test_parse_all_multiple() {
826 let forms = parse_all("1 2 3");
827 assert_eq!(forms.len(), 3);
828 assert_eq!(forms[0].kind, FormKind::Int(1));
829 assert_eq!(forms[1].kind, FormKind::Int(2));
830 assert_eq!(forms[2].kind, FormKind::Int(3));
831 }
832
833 #[test]
836 fn test_err_unclosed_list() {
837 let msg = parse_err("(1 2");
838 assert!(msg.contains("unclosed") || msg.contains("list"), "{msg}");
839 }
840
841 #[test]
842 fn test_err_unexpected_close() {
843 let msg = parse_err(")");
844 assert!(msg.contains("unexpected"), "{msg}");
845 }
846
847 #[test]
848 fn test_err_odd_map() {
849 let msg = parse_err("{:a}");
850 assert!(msg.contains("even") || msg.contains("map"), "{msg}");
851 }
852
853 #[test]
854 fn test_err_reader_cond_non_list() {
855 let msg = parse_err("#?[1 2]");
857 assert!(
858 msg.contains('(') || msg.contains("reader conditional"),
859 "{msg}"
860 );
861 }
862
863 #[test]
864 fn test_err_odd_reader_cond_clauses() {
865 let msg = parse_err("#?(:cljx)");
866 assert!(
867 msg.contains("even") || msg.contains("reader conditional"),
868 "{msg}"
869 );
870 }
871}