1mod generated;
2mod language;
3mod syntax_tree;
4mod token_text;
5
6pub(crate) mod grammar;
7
8use crate::cst::Document;
9use crate::cst::SelectionSet;
10use crate::cst::Type;
11use crate::lexer::Lexer;
12use crate::Error;
13use crate::LimitTracker;
14use crate::Token;
15use crate::TokenKind;
16pub use generated::syntax_kind::SyntaxKind;
17pub use language::SyntaxElement;
18pub use language::SyntaxNode;
19pub use language::SyntaxNodeChildren;
20pub use language::SyntaxNodePtr;
21pub use language::SyntaxToken;
22use std::cell::RefCell;
23use std::ops::ControlFlow;
24use std::rc::Rc;
25pub use syntax_tree::SyntaxTree;
26pub(crate) use syntax_tree::SyntaxTreeBuilder;
28pub(crate) use token_text::TokenText;
29
30#[derive(Debug)]
83pub struct Parser<'input> {
84 lexer: Lexer<'input>,
85 current_token: Option<Token<'input>>,
87 builder: Rc<RefCell<SyntaxTreeBuilder>>,
89 ignored: Vec<Token<'input>>,
91 errors: Vec<crate::Error>,
93 recursion_limit: LimitTracker,
95 accept_errors: bool,
97}
98
99const DEFAULT_RECURSION_LIMIT: usize = 500;
111
112impl<'input> Parser<'input> {
113 pub fn new(input: &'input str) -> Self {
115 let lexer = Lexer::new(input);
116
117 Self {
118 lexer,
119 current_token: None,
120 builder: Rc::new(RefCell::new(SyntaxTreeBuilder::new())),
121 ignored: vec![],
122 errors: Vec::new(),
123 recursion_limit: LimitTracker::new(DEFAULT_RECURSION_LIMIT),
124 accept_errors: true,
125 }
126 }
127
128 pub fn recursion_limit(mut self, recursion_limit: usize) -> Self {
130 self.recursion_limit = LimitTracker::new(recursion_limit);
131 self
132 }
133
134 pub fn token_limit(mut self, token_limit: usize) -> Self {
139 self.lexer = self.lexer.with_limit(token_limit);
140 self
141 }
142
143 pub fn parse(mut self) -> SyntaxTree<Document> {
145 grammar::document::document(&mut self);
146
147 let builder = Rc::try_unwrap(self.builder)
148 .expect("More than one reference to builder left")
149 .into_inner();
150 let builder =
151 builder.finish_document(self.errors, self.recursion_limit, self.lexer.limit_tracker);
152
153 match builder {
154 syntax_tree::SyntaxTreeWrapper::Document(tree) => tree,
155 syntax_tree::SyntaxTreeWrapper::Type(_)
156 | syntax_tree::SyntaxTreeWrapper::FieldSet(_) => {
157 unreachable!("parse constructor can only construct a document")
158 }
159 }
160 }
161
162 pub fn parse_selection_set(mut self) -> SyntaxTree<SelectionSet> {
166 grammar::selection::field_set(&mut self);
167
168 let builder = Rc::try_unwrap(self.builder)
169 .expect("More than one reference to builder left")
170 .into_inner();
171 let builder = builder.finish_selection_set(
172 self.errors,
173 self.recursion_limit,
174 self.lexer.limit_tracker,
175 );
176
177 match builder {
178 syntax_tree::SyntaxTreeWrapper::FieldSet(tree) => tree,
179 syntax_tree::SyntaxTreeWrapper::Document(_)
180 | syntax_tree::SyntaxTreeWrapper::Type(_) => {
181 unreachable!("parse_selection_set constructor can only construct a selection set")
182 }
183 }
184 }
185
186 pub fn parse_type(mut self) -> SyntaxTree<Type> {
190 grammar::ty::ty(&mut self);
191
192 let builder = Rc::try_unwrap(self.builder)
193 .expect("More than one reference to builder left")
194 .into_inner();
195 let builder =
196 builder.finish_type(self.errors, self.recursion_limit, self.lexer.limit_tracker);
197
198 match builder {
199 syntax_tree::SyntaxTreeWrapper::Type(tree) => tree,
200 syntax_tree::SyntaxTreeWrapper::FieldSet(_)
201 | syntax_tree::SyntaxTreeWrapper::Document(_) => {
202 unreachable!("parse_type constructor can only construct a type")
203 }
204 }
205 }
206
207 pub(crate) fn at(&mut self, token: TokenKind) -> bool {
209 if let Some(t) = self.peek() {
210 if t == token {
211 return true;
212 }
213 return false;
214 }
215
216 false
217 }
218
219 pub(crate) fn bump(&mut self, kind: SyntaxKind) {
221 self.eat(kind);
222 self.skip_ignored();
223 }
224
225 pub(crate) fn skip_ignored(&mut self) {
227 while let Some(TokenKind::Comment | TokenKind::Whitespace | TokenKind::Comma) = self.peek()
228 {
229 let token = self.pop();
230 self.ignored.push(token);
231 }
232 }
233
234 pub(crate) fn push_ignored(&mut self) {
236 let tokens = std::mem::take(&mut self.ignored);
237 for token in tokens {
238 let syntax_kind = match token.kind {
239 TokenKind::Comment => SyntaxKind::COMMENT,
240 TokenKind::Whitespace => SyntaxKind::WHITESPACE,
241 TokenKind::Comma => SyntaxKind::COMMA,
242 _ => unreachable!(),
243 };
244 self.push_token(syntax_kind, token);
245 }
246 }
247
248 pub(crate) fn current(&mut self) -> Option<&Token<'input>> {
250 self.peek_token()
251 }
252
253 fn eat(&mut self, kind: SyntaxKind) {
255 self.push_ignored();
256 if self.current().is_none() {
257 return;
258 }
259
260 let token = self.pop();
261 self.push_token(kind, token);
262 }
263
264 pub(crate) fn limit_err<S: Into<String>>(&mut self, message: S) {
269 let current = if let Some(current) = self.current() {
270 current
271 } else {
272 return;
273 };
274 let err = Error::limit(message, current.index());
276 self.push_err(err);
277 self.accept_errors = false;
278 }
279
280 pub(crate) fn err_at_token(&mut self, current: &Token<'_>, message: &str) {
282 let err = if current.kind == TokenKind::Eof {
283 Error::eof(message, current.index())
284 } else {
285 Error::with_loc(message, current.data().to_string(), current.index())
287 };
288 self.push_err(err);
289 }
290
291 pub(crate) fn err(&mut self, message: &str) {
293 let current = if let Some(current) = self.current() {
294 current
295 } else {
296 return;
297 };
298 let err = if current.kind == TokenKind::Eof {
299 Error::eof(message, current.index())
300 } else {
301 Error::with_loc(message, current.data().to_string(), current.index())
303 };
304 self.push_err(err);
305 }
306
307 pub(crate) fn err_and_pop(&mut self, message: &str) {
309 self.push_ignored();
310 if self.current().is_none() {
311 return;
312 }
313
314 let current = self.pop();
315 let err = if current.kind == TokenKind::Eof {
316 Error::eof(message, current.index())
317 } else {
318 Error::with_loc(message, current.data().to_string(), current.index())
320 };
321
322 self.push_token(SyntaxKind::ERROR, current);
324 self.push_err(err);
325
326 self.skip_ignored();
329 }
330
331 pub(crate) fn expect(&mut self, token: TokenKind, kind: SyntaxKind) {
334 let Some(current) = self.current() else {
335 return;
336 };
337 let is_eof = current.kind == TokenKind::Eof;
338 let data = current.data();
339 let index = current.index();
340
341 if self.at(token) {
342 self.bump(kind);
343 return;
344 }
345
346 let err = if is_eof {
347 let message = format!("expected {kind:?}, got EOF");
348 Error::eof(message, index)
349 } else {
350 let message = format!("expected {kind:?}, got {data}");
351 Error::with_loc(message, data.to_string(), index)
352 };
353
354 self.push_err(err);
355 }
356
357 pub(crate) fn push_err(&mut self, err: crate::error::Error) {
359 if self.accept_errors {
367 self.errors.push(err);
368 }
369 }
370
371 fn next_token(&mut self) -> Option<Token<'input>> {
373 for res in &mut self.lexer {
374 match res {
375 Err(err) => {
376 if err.is_limit() {
377 self.accept_errors = false;
378 }
379 self.errors.push(err);
380 }
381 Ok(token) => {
382 return Some(token);
383 }
384 }
385 }
386
387 None
388 }
389
390 pub(crate) fn pop(&mut self) -> Token<'input> {
392 if let Some(token) = self.current_token.take() {
393 return token;
394 }
395
396 self.next_token()
397 .expect("Could not pop a token from the lexer")
398 }
399
400 pub(crate) fn push_token(&mut self, kind: SyntaxKind, token: Token) {
402 self.builder.borrow_mut().token(kind, token.data())
403 }
404
405 pub(crate) fn start_node(&mut self, kind: SyntaxKind) -> NodeGuard {
412 self.push_ignored();
413
414 self.builder.borrow_mut().start_node(kind);
415 let guard = NodeGuard::new(self.builder.clone());
416 self.skip_ignored();
417
418 guard
419 }
420
421 pub(crate) fn checkpoint_node(&mut self) -> Checkpoint {
424 self.push_ignored();
427
428 let checkpoint = self.builder.borrow().checkpoint();
429 Checkpoint::new(self.builder.clone(), checkpoint)
430 }
431
432 pub(crate) fn peek(&mut self) -> Option<TokenKind> {
434 self.peek_token().map(|token| token.kind())
435 }
436
437 pub(crate) fn peek_while(
440 &mut self,
441 mut run: impl FnMut(&mut Parser, TokenKind) -> ControlFlow<()>,
442 ) {
443 while let Some(kind) = self.peek() {
444 let before = self.current_token.clone();
445 match run(self, kind) {
446 ControlFlow::Break(()) => break,
447 ControlFlow::Continue(()) => {
448 debug_assert!(
449 before != self.current_token,
450 "peek_while() iteration must advance parsing"
451 );
452 }
453 }
454 }
455 }
456
457 pub(crate) fn peek_while_kind(&mut self, expect: TokenKind, mut run: impl FnMut(&mut Parser)) {
460 while let Some(kind) = self.peek() {
461 if kind != expect {
462 break;
463 }
464
465 let before = self.current_token.clone();
466 run(self);
467 debug_assert!(
468 before != self.current_token,
469 "peek_while_kind() iteration must advance parsing"
470 );
471 }
472 }
473
474 pub(crate) fn parse_separated_list(
477 &mut self,
478 separator: TokenKind,
479 separator_syntax: SyntaxKind,
480 mut run: impl FnMut(&mut Parser),
481 ) {
482 if matches!(self.peek(), Some(kind) if kind == separator) {
483 self.bump(separator_syntax);
484 }
485
486 run(self);
487
488 self.peek_while_kind(separator, |p| {
489 p.bump(separator_syntax);
490 run(p);
491 });
492 }
493
494 pub(crate) fn peek_token(&mut self) -> Option<&Token<'input>> {
496 if self.current_token.is_none() {
497 self.current_token = self.next_token();
498 }
499 self.current_token.as_ref()
500 }
501
502 pub(crate) fn peek_token_n(&self, n: usize) -> Option<Token<'input>> {
504 self.peek_n_inner(n)
505 }
506
507 pub(crate) fn peek_n(&self, n: usize) -> Option<TokenKind> {
509 self.peek_n_inner(n).map(|token| token.kind())
510 }
511
512 fn peek_n_inner(&self, n: usize) -> Option<Token<'input>> {
513 self.current_token
514 .iter()
515 .cloned()
516 .map(Result::Ok)
517 .chain(self.lexer.clone())
518 .filter_map(Result::ok)
519 .filter(|token| !matches!(token.kind(), TokenKind::Whitespace | TokenKind::Comment))
520 .nth(n - 1)
521 }
522
523 pub(crate) fn peek_data(&mut self) -> Option<&'input str> {
525 self.peek_token().map(|token| token.data())
526 }
527
528 pub(crate) fn peek_data_n(&self, n: usize) -> Option<&'input str> {
530 self.peek_token_n(n).map(|token| token.data())
531 }
532}
533
534#[must_use]
540pub(crate) struct NodeGuard {
541 builder: Rc<RefCell<SyntaxTreeBuilder>>,
542}
543
544impl NodeGuard {
545 fn new(builder: Rc<RefCell<SyntaxTreeBuilder>>) -> Self {
546 Self { builder }
547 }
548
549 pub(crate) fn finish_node(self) {
550 drop(self);
551 }
552}
553
554impl Drop for NodeGuard {
555 fn drop(&mut self) {
556 self.builder.borrow_mut().finish_node();
557 }
558}
559
560pub(crate) struct Checkpoint {
562 builder: Rc<RefCell<SyntaxTreeBuilder>>,
563 checkpoint: rowan::Checkpoint,
564}
565
566impl Checkpoint {
567 fn new(builder: Rc<RefCell<SyntaxTreeBuilder>>, checkpoint: rowan::Checkpoint) -> Self {
568 Self {
569 builder,
570 checkpoint,
571 }
572 }
573
574 pub(crate) fn wrap_node(self, kind: SyntaxKind) -> NodeGuard {
578 self.builder.borrow_mut().wrap_node(self.checkpoint, kind);
579 NodeGuard::new(self.builder)
580 }
581}
582
583#[cfg(test)]
584mod tests {
585 use super::DEFAULT_RECURSION_LIMIT;
586 use crate::cst;
587 use crate::Error;
588 use crate::Parser;
589 use crate::SyntaxTree;
590 use expect_test::expect;
591
592 #[test]
593 fn limited_mid_node() {
594 let source = r#"
595 type Query {
596 field(arg1: Int, arg2: Int, arg3: Int, arg4: Int, arg5: Int, arg6: Int): Int
597 }
598 "#;
599 let parser = Parser::new(source)
600 .token_limit(18);
602 let tree = parser.parse();
603 let mut errors = tree.errors();
604 assert_eq!(
605 errors.next(),
606 Some(&Error::limit("token limit reached, aborting lexing", 65))
607 );
608 assert_eq!(errors.next(), None);
609 }
610
611 #[test]
612 fn multiple_limits() {
613 let source = r#"
614 query {
615 a {
616 a {
617 a {
618 a
619 }
620 }
621 }
622 }
623 "#;
624
625 let parser = Parser::new(source).recursion_limit(10).token_limit(22);
626 let cst = parser.parse();
627 let errors = cst.errors().collect::<Vec<_>>();
628 assert_eq!(
629 errors,
630 &[&Error::limit("token limit reached, aborting lexing", 170),]
631 );
632
633 let parser = Parser::new(source).recursion_limit(3).token_limit(200);
634 let cst = parser.parse();
635 let errors = cst.errors().collect::<Vec<_>>();
636 assert_eq!(
637 errors,
638 &[&Error::limit("parser recursion limit reached", 121),]
639 );
640 }
641
642 #[test]
643 fn syntax_errors_and_limits() {
644 let source = r#"
646 type Query {
647 field(arg1: Int, missing_arg): Int
648 # limit reached here
649 field2: !String
650 } and then some garbage
651 "#;
652 let parser = Parser::new(source).token_limit(22);
653 let cst = parser.parse();
654 let mut errors = cst.errors();
655 assert_eq!(
656 errors.next(),
657 Some(&Error::with_loc("expected a Name", ")".to_string(), 70))
658 );
659 assert_eq!(
661 errors.next(),
662 Some(&Error::limit("token limit reached, aborting lexing", 113))
663 );
664 assert_eq!(errors.next(), None);
665
666 let tree = expect![[r##"
667 DOCUMENT@0..113
668 WHITESPACE@0..13 "\n "
669 OBJECT_TYPE_DEFINITION@13..76
670 type_KW@13..17 "type"
671 WHITESPACE@17..18 " "
672 NAME@18..23
673 IDENT@18..23 "Query"
674 WHITESPACE@23..24 " "
675 FIELDS_DEFINITION@24..76
676 L_CURLY@24..25 "{"
677 WHITESPACE@25..42 "\n "
678 FIELD_DEFINITION@42..76
679 NAME@42..47
680 IDENT@42..47 "field"
681 ARGUMENTS_DEFINITION@47..71
682 L_PAREN@47..48 "("
683 INPUT_VALUE_DEFINITION@48..57
684 NAME@48..52
685 IDENT@48..52 "arg1"
686 COLON@52..53 ":"
687 WHITESPACE@53..54 " "
688 NAMED_TYPE@54..57
689 NAME@54..57
690 IDENT@54..57 "Int"
691 COMMA@57..58 ","
692 WHITESPACE@58..59 " "
693 INPUT_VALUE_DEFINITION@59..70
694 NAME@59..70
695 IDENT@59..70 "missing_arg"
696 R_PAREN@70..71 ")"
697 COLON@71..72 ":"
698 WHITESPACE@72..73 " "
699 NAMED_TYPE@73..76
700 NAME@73..76
701 IDENT@73..76 "Int"
702 WHITESPACE@76..93 "\n "
703 COMMENT@93..113 "# limit reached here"
704 "##]];
705 tree.assert_eq(&format!("{:#?}", cst.document().syntax));
706 }
707
708 #[test]
709 fn tree_with_syntax_errors() {
710 use crate::cst::Definition;
711
712 let source = r#"
715 garbage type Query implements X {
716 field(arg: Int): Int
717 } garbage :,, (|) interface X {}
718 "#;
719 let cst = Parser::new(source).parse();
720
721 let mut definitions = cst.document().definitions();
722 let query_def = definitions.next().unwrap();
723 let interface_def = definitions.next().unwrap();
724 assert_eq!(definitions.next(), None);
725 assert!(matches!(query_def, Definition::ObjectTypeDefinition(_)));
726 assert!(matches!(
727 interface_def,
728 Definition::InterfaceTypeDefinition(_)
729 ));
730 }
731
732 #[test]
733 fn token_limit() {
734 let cst = Parser::new("type Query { a a a a a a a a a }")
735 .token_limit(100)
736 .parse();
737 assert_eq!(cst.token_limit().high, 26);
739 }
740
741 #[test]
742 #[allow(clippy::single_char_add_str)]
744 fn recursion_limit() {
745 const SMASH_THE_STACK_FACTOR: usize = 50;
748
749 wide(2, |ast| assert_eq!(ast.errors, []));
750 wide(DEFAULT_RECURSION_LIMIT - 2, |ast| {
751 assert_eq!(ast.errors.len(), 0, "{:?}", ast.errors[0])
752 });
753 wide(DEFAULT_RECURSION_LIMIT * SMASH_THE_STACK_FACTOR, |_ast| {
754 });
757
758 deep(2, |ast| assert_eq!(ast.errors, []));
759 deep(DEFAULT_RECURSION_LIMIT - 2, |ast| {
760 assert_eq!(ast.errors.len(), 0, "{:?}", ast.errors[0])
761 });
762 deep(DEFAULT_RECURSION_LIMIT * SMASH_THE_STACK_FACTOR, |ast| {
763 assert_eq!(ast.errors.len(), 1);
768 assert!(ast.errors[0].message.contains("recursion limit reached"));
769 });
770
771 fn deep(count: usize, each: impl Fn(SyntaxTree)) {
772 let check = |input: String| each(Parser::new(&input).parse());
773
774 let mut doc = String::new();
776 doc.push_str("type O { field: ");
777 doc.push_str(&"[".repeat(count));
778 doc.push_str("Int");
779 doc.push_str(&"]".repeat(count));
780 doc.push_str(" }");
781 check(doc);
782
783 let mut doc = String::new();
785 doc.push_str("type O { field(arg: T = ");
786 doc.push_str(&"[".repeat(count));
787 doc.push_str("0");
788 doc.push_str(&"]".repeat(count));
789 doc.push_str("): Int }");
790 check(doc);
791
792 let mut doc = String::new();
794 doc.push_str("type O { field(arg: T = ");
795 doc.push_str(&"{f: ".repeat(count));
796 doc.push_str("0");
797 doc.push_str(&"}".repeat(count));
798 doc.push_str("): Int }");
799 check(doc);
800
801 let mut doc = String::new();
803 doc.push_str("query { ");
804 doc.push_str(&"f { ".repeat(count));
805 doc.push_str("f ");
806 doc.push_str(&"}".repeat(count));
807 doc.push_str("}");
808 check(doc);
809 }
810
811 fn wide(count: usize, each: impl Fn(SyntaxTree)) {
812 let check = |input: String| each(Parser::new(&input).parse());
813
814 let mut doc = String::new();
816 doc.push_str(&"directive @d on FIELD ".repeat(count));
817 check(doc);
818
819 let mut doc = String::new();
821 doc.push_str("scalar Url");
822 doc.push_str(&" @d".repeat(count));
823 check(doc);
824
825 let mut doc = String::new();
827 doc.push_str("schema {");
828 doc.push_str(&" query: Q".repeat(count));
829 doc.push_str(" }");
830 check(doc);
831
832 let mut doc = String::new();
834 doc.push_str("type O implements");
835 doc.push_str(&" & I".repeat(count));
836 check(doc);
837
838 let mut doc = String::new();
840 doc.push_str("type O {");
841 doc.push_str(&" f: T".repeat(count));
842 doc.push_str("}");
843 check(doc);
844
845 let mut doc = String::new();
847 doc.push_str("enum E {");
848 doc.push_str(&" V".repeat(count));
849 doc.push_str("}");
850 check(doc);
851
852 let mut doc = String::new();
854 doc.push_str("union U = ");
855 doc.push_str(&" | T".repeat(count));
856 check(doc);
857
858 let mut doc = String::new();
860 doc.push_str("input In {");
861 doc.push_str(&" f: T".repeat(count));
862 doc.push_str("}");
863 check(doc);
864
865 let mut doc = String::new();
867 doc.push_str("type O { field(arg: T = {");
868 doc.push_str(&" f: 0".repeat(count));
869 doc.push_str(" }): Int }");
870 check(doc);
871
872 let mut doc = String::new();
874 doc.push_str("type O { field(arg: T = [");
875 doc.push_str(&" 0,".repeat(count));
876 doc.push_str(" ]): Int }");
877 check(doc);
878
879 let mut doc = String::new();
881 doc.push_str("type O { field(");
882 doc.push_str(&"a: T ".repeat(count));
883 doc.push_str("): Int }");
884 check(doc);
885
886 let mut doc = String::new();
888 doc.push_str("query {");
889 doc.push_str(&" f".repeat(count));
890 doc.push_str(" }");
891 check(doc);
892
893 let mut doc = String::new();
895 doc.push_str("query { f(");
896 doc.push_str(&" a: 0".repeat(count));
897 doc.push_str(") }");
898 check(doc);
899
900 let mut doc = String::new();
902 doc.push_str("query Q(");
903 doc.push_str(&" $v: Int".repeat(count));
904 doc.push_str(" ) { f }");
905 check(doc);
906 }
907 }
908
909 #[test]
910 fn parse_field_set() {
911 let source = r#"{ a }"#;
912
913 let parser = Parser::new(source);
914 let cst: SyntaxTree<cst::SelectionSet> = parser.parse_selection_set();
915 let errors = cst.errors().collect::<Vec<_>>();
916 assert_eq!(errors.len(), 0);
917
918 let sel_set: cst::SelectionSet = cst.field_set();
919 let _ = sel_set.selections().map(|sel| {
920 if let cst::Selection::Field(f) = sel {
921 assert_eq!(f.name().unwrap().text().as_ref(), "a")
922 } else {
923 panic!("no field a in field set selection")
924 }
925 });
926
927 let source = r#"a { a }"#;
928
929 let parser = Parser::new(source);
930 let cst: SyntaxTree<cst::SelectionSet> = parser.parse_selection_set();
931 let errors = cst.errors().collect::<Vec<_>>();
932 assert_eq!(errors.len(), 0);
933
934 let sel_set: cst::SelectionSet = cst.field_set();
935 let _ = sel_set.selections().map(|sel| {
936 if let cst::Selection::Field(f) = sel {
937 assert_eq!(f.name().unwrap().text().as_ref(), "a")
938 } else {
939 panic!("no field a in field set selection")
940 }
941 });
942 }
943
944 #[test]
945 fn no_infinite_loop() {
946 let source = r#"{ ..."#;
947 let parser = Parser::new(source).token_limit(3);
948 let _cst = parser.parse();
949 }
950}