Skip to main content

oxc_graphql_parser/parser/
mod.rs

1mod generated;
2mod language;
3mod syntax_tree;
4mod token_text;
5
6pub(crate) mod grammar;
7
8use crate::cst::Document;
9use crate::cst::SelectionSet;
10use crate::cst::Type;
11use crate::lexer::Lexer;
12use crate::Error;
13use crate::LimitTracker;
14use crate::Token;
15use crate::TokenKind;
16pub use generated::syntax_kind::SyntaxKind;
17pub use language::SyntaxElement;
18pub use language::SyntaxNode;
19pub use language::SyntaxNodeChildren;
20pub use language::SyntaxNodePtr;
21pub use language::SyntaxToken;
22use std::cell::RefCell;
23use std::ops::ControlFlow;
24use std::rc::Rc;
25pub use syntax_tree::SyntaxTree;
26// pub(crate) use language::GraphQLLanguage;
27pub(crate) use syntax_tree::SyntaxTreeBuilder;
28pub(crate) use token_text::TokenText;
29
30/// Parse GraphQL schemas or queries into a typed CST.
31///
32/// ## Example
33///
34/// The API to parse a query or a schema is the same, as the parser currently
35/// accepts a `&str`. Here is an example of parsing a query:
36/// ```rust
37/// use oxc_graphql_parser::Parser;
38///
39/// let query = "
40/// {
41///     animal
42///     ...snackSelection
43///     ... on Pet {
44///       playmates {
45///         count
46///       }
47///     }
48/// }
49/// ";
50/// // Create a new instance of a parser given a query above.
51/// let parser = Parser::new(query);
52/// // Parse the query, and return a SyntaxTree.
53/// let cst = parser.parse();
54/// // Check that are no errors. These are not part of the CST.
55/// assert_eq!(0, cst.errors().len());
56///
57/// // Get the document root node
58/// let doc = cst.document();
59/// // ... continue
60/// ```
61///
62/// Here is how you'd parse a schema:
63/// ```rust
64/// use oxc_graphql_parser::Parser;
65/// let core_schema = r#"
66/// schema @core(feature: "https://specs.apollo.dev/join/v0.1") {
67///   query: Query
68///   mutation: Mutation
69/// }
70///
71/// enum join__Graph {
72///   ACCOUNTS @join__graph(name: "accounts")
73/// }
74/// "#;
75/// let parser = Parser::new(core_schema);
76/// let cst = parser.parse();
77///
78/// assert_eq!(0, cst.errors().len());
79///
80/// let document = cst.document();
81/// ```
82#[derive(Debug)]
83pub struct Parser<'input> {
84    lexer: Lexer<'input>,
85    /// Store one lookahead token so we don't need to reparse things as much.
86    current_token: Option<Token<'input>>,
87    /// The in-progress tree.
88    builder: Rc<RefCell<SyntaxTreeBuilder>>,
89    /// Tokens that should be added to the tree, in source order.
90    /// This includes both ignored tokens (whitespace/comments/commas) and error tokens.
91    pending: Vec<PendingToken<'input>>,
92    /// The list of syntax errors we've accumulated so far.
93    errors: Vec<crate::Error>,
94    /// The limit to apply to parsing.
95    recursion_limit: LimitTracker,
96    /// Accept parsing errors?
97    accept_errors: bool,
98    /// Allow descriptions on executable definitions (operation / fragment /
99    /// variable definitions). Off by default. Enables the 2025 draft-spec
100    /// syntax that graphql-js 16 accepts.
101    allow_executable_descriptions: bool,
102    /// Allow variable definitions on fragment definitions
103    /// (`fragment F($x: Int) on T`). Off by default. This is the "legacy
104    /// fragment variables" syntax accepted by graphql-js 16's
105    /// `allowLegacyFragmentVariables`.
106    allow_legacy_fragment_variables: bool,
107}
108
109/// A pending token to be added to the CST - either ignored (whitespace/comment/comma) or an error.
110#[derive(Debug)]
111enum PendingToken<'input> {
112    Ignored(Token<'input>),
113    /// Error token data (owned because Error is consumed after extracting data)
114    Error(String),
115}
116
117/// Chosen experimentally with:
118///
119/// * oxc-graphql-parser 0.6.2+ (e05abbf4f)
120/// * Rust 1.72.1
121/// * aarch64-apple-darwin
122/// * Unoptimized (default `cargo test` profile)
123///
124/// This couldn’t be set to much more than 2000 before the `recursion_limit` test below
125/// hit "fatal runtime error: stack overflow"
126///
127/// Defaulting to around a quarter of that, to keep a comfortable safety margin.
128const DEFAULT_RECURSION_LIMIT: usize = 500;
129
130impl<'input> Parser<'input> {
131    /// Create a new instance of a parser given an input string.
132    pub fn new(input: &'input str) -> Self {
133        let lexer = Lexer::new(input);
134
135        Self {
136            lexer,
137            current_token: None,
138            builder: Rc::new(RefCell::new(SyntaxTreeBuilder::new())),
139            pending: vec![],
140            errors: Vec::new(),
141            recursion_limit: LimitTracker::new(DEFAULT_RECURSION_LIMIT),
142            accept_errors: true,
143            allow_executable_descriptions: false,
144            allow_legacy_fragment_variables: false,
145        }
146    }
147
148    /// Configure the recursion limit to use while parsing.
149    pub fn recursion_limit(mut self, recursion_limit: usize) -> Self {
150        self.recursion_limit = LimitTracker::new(recursion_limit);
151        self
152    }
153
154    /// Allow descriptions on executable definitions (operation / fragment /
155    /// variable definitions). Off by default. When enabled, parses the 2025
156    /// draft-spec syntax accepted by graphql-js 16; otherwise a leading
157    /// description is an error as in October 2021.
158    pub fn allow_executable_descriptions(mut self, allow: bool) -> Self {
159        self.allow_executable_descriptions = allow;
160        self
161    }
162
163    pub(crate) fn executable_descriptions_allowed(&self) -> bool {
164        self.allow_executable_descriptions
165    }
166
167    /// Allow variable definitions on fragment definitions
168    /// (`fragment F($x: Int) on T`). Off by default. This is the "legacy
169    /// fragment variables" syntax accepted by graphql-js 16's
170    /// `allowLegacyFragmentVariables`.
171    pub fn allow_legacy_fragment_variables(mut self, allow: bool) -> Self {
172        self.allow_legacy_fragment_variables = allow;
173        self
174    }
175
176    pub(crate) fn legacy_fragment_variables_allowed(&self) -> bool {
177        self.allow_legacy_fragment_variables
178    }
179
180    /// Configure the limit on the number of tokens to parse. If an input document
181    /// is too big, parsing will be aborted.
182    ///
183    /// By default, there is no limit.
184    pub fn token_limit(mut self, token_limit: usize) -> Self {
185        self.lexer = self.lexer.with_limit(token_limit);
186        self
187    }
188
189    /// Parse the current tokens.
190    pub fn parse(mut self) -> SyntaxTree<Document> {
191        grammar::document::document(&mut self);
192
193        let builder = Rc::try_unwrap(self.builder)
194            .expect("More than one reference to builder left")
195            .into_inner();
196        let builder =
197            builder.finish_document(self.errors, self.recursion_limit, self.lexer.limit_tracker);
198
199        match builder {
200            syntax_tree::SyntaxTreeWrapper::Document(tree) => tree,
201            syntax_tree::SyntaxTreeWrapper::Type(_)
202            | syntax_tree::SyntaxTreeWrapper::FieldSet(_) => {
203                unreachable!("parse constructor can only construct a document")
204            }
205        }
206    }
207
208    /// Parse a selection set with optional outer braces.
209    /// This is the expected format of the string value of the `fields` argument of some directives
210    /// like [`@requires`](https://www.apollographql.com/docs/federation/federated-types/federated-directives/#requires).
211    pub fn parse_selection_set(mut self) -> SyntaxTree<SelectionSet> {
212        grammar::selection::field_set(&mut self);
213
214        let builder = Rc::try_unwrap(self.builder)
215            .expect("More than one reference to builder left")
216            .into_inner();
217        let builder = builder.finish_selection_set(
218            self.errors,
219            self.recursion_limit,
220            self.lexer.limit_tracker,
221        );
222
223        match builder {
224            syntax_tree::SyntaxTreeWrapper::FieldSet(tree) => tree,
225            syntax_tree::SyntaxTreeWrapper::Document(_)
226            | syntax_tree::SyntaxTreeWrapper::Type(_) => {
227                unreachable!("parse_selection_set constructor can only construct a selection set")
228            }
229        }
230    }
231
232    /// Parse a GraphQL type.
233    /// This is the expected format of the string value of the `type` argument
234    /// of some directives like [`@field`](https://specs.apollo.dev/join/v0.3/#@field).
235    pub fn parse_type(mut self) -> SyntaxTree<Type> {
236        grammar::ty::ty(&mut self);
237
238        let builder = Rc::try_unwrap(self.builder)
239            .expect("More than one reference to builder left")
240            .into_inner();
241        let builder =
242            builder.finish_type(self.errors, self.recursion_limit, self.lexer.limit_tracker);
243
244        match builder {
245            syntax_tree::SyntaxTreeWrapper::Type(tree) => tree,
246            syntax_tree::SyntaxTreeWrapper::FieldSet(_)
247            | syntax_tree::SyntaxTreeWrapper::Document(_) => {
248                unreachable!("parse_type constructor can only construct a type")
249            }
250        }
251    }
252
253    /// Check if the current token is `kind`.
254    pub(crate) fn at(&mut self, token: TokenKind) -> bool {
255        if let Some(t) = self.peek() {
256            if t == token {
257                return true;
258            }
259            return false;
260        }
261
262        false
263    }
264
265    /// Consume a token and add it to the syntax tree. Queue any ignored tokens that follow.
266    pub(crate) fn bump(&mut self, kind: SyntaxKind) {
267        self.eat(kind);
268        self.skip_ignored();
269    }
270
271    /// Consume and skip ignored tokens from the lexer.
272    pub(crate) fn skip_ignored(&mut self) {
273        while let Some(TokenKind::Comment | TokenKind::Whitespace | TokenKind::Comma) = self.peek()
274        {
275            let token = self.pop();
276            self.pending.push(PendingToken::Ignored(token));
277        }
278    }
279
280    /// Push pending tokens (ignored + errors) to the current node.
281    pub(crate) fn push_ignored(&mut self) {
282        let pending = std::mem::take(&mut self.pending);
283        for item in pending {
284            match item {
285                PendingToken::Ignored(token) => {
286                    let syntax_kind = match token.kind {
287                        TokenKind::Comment => SyntaxKind::COMMENT,
288                        TokenKind::Whitespace => SyntaxKind::WHITESPACE,
289                        TokenKind::Comma => SyntaxKind::COMMA,
290                        _ => unreachable!(),
291                    };
292                    self.push_token(syntax_kind, token);
293                }
294                PendingToken::Error(data) => {
295                    self.builder.borrow_mut().token(SyntaxKind::ERROR, &data);
296                }
297            }
298        }
299    }
300
301    /// Get current token's data.
302    pub(crate) fn current(&mut self) -> Option<&Token<'input>> {
303        self.peek_token()
304    }
305
306    /// Consume a token from the lexer and add it to the syntax tree.
307    fn eat(&mut self, kind: SyntaxKind) {
308        self.push_ignored();
309        if self.current().is_none() {
310            return;
311        }
312
313        let token = self.pop();
314        self.push_token(kind, token);
315    }
316
317    /// Create a parser limit error and push it into the error vector.
318    ///
319    /// Note: After a limit error is pushed, any further errors pushed
320    /// are silently discarded.
321    pub(crate) fn limit_err<S: Into<String>>(&mut self, message: S) {
322        let current = if let Some(current) = self.current() {
323            current
324        } else {
325            return;
326        };
327        // this needs to be the computed location
328        let err = Error::limit(message, current.index());
329        self.push_err(err);
330        self.accept_errors = false;
331    }
332
333    /// Create a parser error at a given location and push it into the error vector.
334    pub(crate) fn err_at_token(&mut self, current: &Token<'_>, message: &str) {
335        let err = if current.kind == TokenKind::Eof {
336            Error::eof(message, current.index())
337        } else {
338            // this needs to be the computed location
339            Error::with_loc(message, current.data().to_string(), current.index())
340        };
341        self.push_err(err);
342    }
343
344    /// Create a parser error at the current location and push it into the error vector.
345    pub(crate) fn err(&mut self, message: &str) {
346        let current = if let Some(current) = self.current() {
347            current
348        } else {
349            return;
350        };
351        let err = if current.kind == TokenKind::Eof {
352            Error::eof(message, current.index())
353        } else {
354            // this needs to be the computed location
355            Error::with_loc(message, current.data().to_string(), current.index())
356        };
357        self.push_err(err);
358    }
359
360    /// Create a parser error at the current location and eat the responsible token.
361    pub(crate) fn err_and_pop(&mut self, message: &str) {
362        self.push_ignored();
363        if self.current().is_none() {
364            return;
365        }
366
367        let current = self.pop();
368        let err = if current.kind == TokenKind::Eof {
369            Error::eof(message, current.index())
370        } else {
371            // this needs to be the computed location
372            Error::with_loc(message, current.data().to_string(), current.index())
373        };
374
375        // Keep the error in the parse tree for position information
376        self.push_token(SyntaxKind::ERROR, current);
377        self.push_err(err);
378
379        // we usually skip ignored tokens after we pop each token, so make sure we also do
380        // this when we create an error and pop.
381        self.skip_ignored();
382    }
383
384    /// Consume the next token if it is `kind` or emit an error
385    /// otherwise.
386    pub(crate) fn expect(&mut self, token: TokenKind, kind: SyntaxKind) {
387        let Some(current) = self.current() else {
388            return;
389        };
390        let is_eof = current.kind == TokenKind::Eof;
391        let data = current.data();
392        let index = current.index();
393
394        if self.at(token) {
395            self.bump(kind);
396            return;
397        }
398
399        let err = if is_eof {
400            let message = format!("expected {kind:?}, got EOF");
401            Error::eof(message, index)
402        } else {
403            let message = format!("expected {kind:?}, got {data}");
404            Error::with_loc(message, data.to_string(), index)
405        };
406
407        self.push_err(err);
408    }
409
410    /// Push an error to parser's error Vec.
411    pub(crate) fn push_err(&mut self, err: crate::error::Error) {
412        // If the parser has reached a limit, self.accept_errors will
413        // be set to false so that we do not push any more errors.
414        //
415        // This is because the limit activation will result
416        // in an early termination which will cause the parser to
417        // report "errors" which aren't really errors and thus
418        // must be ignored.
419        if self.accept_errors {
420            self.errors.push(err);
421        }
422    }
423
424    /// Gets the next token from the lexer.
425    ///
426    /// When lexer errors occur (e.g., unexpected multibyte characters), this method
427    /// queues their data to be added as ERROR tokens to the CST later. This ensures
428    /// rowan tracks the correct byte positions without "gaps" in the tree.
429    fn next_token(&mut self) -> Option<Token<'input>> {
430        for res in &mut self.lexer {
431            match res {
432                Err(err) => {
433                    if err.is_limit() {
434                        self.accept_errors = false;
435                    }
436                    // Queue the error data to be added to the CST later.
437                    let data = err.data();
438                    if !data.is_empty() {
439                        self.pending.push(PendingToken::Error(data.to_owned()));
440                    }
441                    self.errors.push(err);
442                }
443                Ok(token) => {
444                    return Some(token);
445                }
446            }
447        }
448
449        None
450    }
451
452    /// Consume a token from the lexer.
453    pub(crate) fn pop(&mut self) -> Token<'input> {
454        if let Some(token) = self.current_token.take() {
455            return token;
456        }
457
458        self.next_token()
459            .expect("Could not pop a token from the lexer")
460    }
461
462    /// Insert a token into the syntax tree.
463    pub(crate) fn push_token(&mut self, kind: SyntaxKind, token: Token) {
464        self.builder.borrow_mut().token(kind, token.data())
465    }
466
467    /// Start a node and make it current.
468    ///
469    /// This also creates a NodeGuard under the hood that will automatically
470    /// close the node(via Drop) when the guard goes out of scope.
471    /// This allows for us to not have to always close nodes when we are parsing
472    /// tokens.
473    pub(crate) fn start_node(&mut self, kind: SyntaxKind) -> NodeGuard {
474        self.push_ignored();
475
476        self.builder.borrow_mut().start_node(kind);
477        let guard = NodeGuard::new(self.builder.clone());
478        self.skip_ignored();
479
480        guard
481    }
482
483    /// Set a checkpoint for *maybe* wrapping the following parse tree in some
484    /// other node.
485    pub(crate) fn checkpoint_node(&mut self) -> Checkpoint {
486        // We may start a new node here in the future, so let's process
487        // our preceding whitespace first
488        self.push_ignored();
489
490        let checkpoint = self.builder.borrow().checkpoint();
491        Checkpoint::new(self.builder.clone(), checkpoint)
492    }
493
494    /// Peek the next Token and return its TokenKind.
495    pub(crate) fn peek(&mut self) -> Option<TokenKind> {
496        self.peek_token().map(|token| token.kind())
497    }
498
499    /// Repeatedly peek at the next token and call the parse function. The parse function must
500    /// advance parsing or break out of the loop.
501    pub(crate) fn peek_while(
502        &mut self,
503        mut run: impl FnMut(&mut Parser, TokenKind) -> ControlFlow<()>,
504    ) {
505        while let Some(kind) = self.peek() {
506            let before = self.current_token.clone();
507            match run(self, kind) {
508                ControlFlow::Break(()) => break,
509                ControlFlow::Continue(()) => {
510                    debug_assert!(
511                        before != self.current_token,
512                        "peek_while() iteration must advance parsing"
513                    );
514                }
515            }
516        }
517    }
518
519    /// Call the parse function while the next token is of the expected kind. The parse function
520    /// must consume the peeked token.
521    pub(crate) fn peek_while_kind(&mut self, expect: TokenKind, mut run: impl FnMut(&mut Parser)) {
522        while let Some(kind) = self.peek() {
523            if kind != expect {
524                break;
525            }
526
527            let before = self.current_token.clone();
528            run(self);
529            debug_assert!(
530                before != self.current_token,
531                "peek_while_kind() iteration must advance parsing"
532            );
533        }
534    }
535
536    /// Call the parse function, separated by a token given in `separator`. This parses at least
537    /// one item. The first item may optionally be prefixed by an initial separator.
538    pub(crate) fn parse_separated_list(
539        &mut self,
540        separator: TokenKind,
541        separator_syntax: SyntaxKind,
542        mut run: impl FnMut(&mut Parser),
543    ) {
544        if matches!(self.peek(), Some(kind) if kind == separator) {
545            self.bump(separator_syntax);
546        }
547
548        run(self);
549
550        self.peek_while_kind(separator, |p| {
551            p.bump(separator_syntax);
552            run(p);
553        });
554    }
555
556    /// Peek the next Token and return it.
557    pub(crate) fn peek_token(&mut self) -> Option<&Token<'input>> {
558        if self.current_token.is_none() {
559            self.current_token = self.next_token();
560        }
561        self.current_token.as_ref()
562    }
563
564    /// Peek Token `n` and return it.
565    pub(crate) fn peek_token_n(&self, n: usize) -> Option<Token<'input>> {
566        self.peek_n_inner(n)
567    }
568
569    /// Peek Token `n` and return its TokenKind.
570    pub(crate) fn peek_n(&self, n: usize) -> Option<TokenKind> {
571        self.peek_n_inner(n).map(|token| token.kind())
572    }
573
574    fn peek_n_inner(&self, n: usize) -> Option<Token<'input>> {
575        self.current_token
576            .iter()
577            .cloned()
578            .map(Result::Ok)
579            .chain(self.lexer.clone())
580            .filter_map(Result::ok)
581            .filter(|token| !matches!(token.kind(), TokenKind::Whitespace | TokenKind::Comment))
582            .nth(n - 1)
583    }
584
585    /// Peek next Token's `data` property.
586    pub(crate) fn peek_data(&mut self) -> Option<&'input str> {
587        self.peek_token().map(|token| token.data())
588    }
589
590    /// Peek `n` Token's `data` property.
591    pub(crate) fn peek_data_n(&self, n: usize) -> Option<&'input str> {
592        self.peek_token_n(n).map(|token| token.data())
593    }
594}
595
596/// A wrapper around the SyntaxTreeBuilder used to self-close nodes.
597///
598/// When the NodeGuard goes out of scope, it automatically runs `finish_node()`
599/// on the SyntaxTreeBuilder. This ensures that nodes are not forgotten to be
600/// closed.
601#[must_use]
602pub(crate) struct NodeGuard {
603    builder: Rc<RefCell<SyntaxTreeBuilder>>,
604}
605
606impl NodeGuard {
607    fn new(builder: Rc<RefCell<SyntaxTreeBuilder>>) -> Self {
608        Self { builder }
609    }
610
611    pub(crate) fn finish_node(self) {
612        drop(self);
613    }
614}
615
616impl Drop for NodeGuard {
617    fn drop(&mut self) {
618        self.builder.borrow_mut().finish_node();
619    }
620}
621
622/// A rowan Checkpoint that can self-close the new wrapper node if required.
623pub(crate) struct Checkpoint {
624    builder: Rc<RefCell<SyntaxTreeBuilder>>,
625    checkpoint: rowan::Checkpoint,
626}
627
628impl Checkpoint {
629    fn new(builder: Rc<RefCell<SyntaxTreeBuilder>>, checkpoint: rowan::Checkpoint) -> Self {
630        Self {
631            builder,
632            checkpoint,
633        }
634    }
635
636    /// Wrap the nodes that were parsed since setting this checkpoint in a new parent node of kind
637    /// `kind`. Returns a NodeGuard that when dropped, finishes this new parent node. More children
638    /// can be added to this new node in the mean time.
639    pub(crate) fn wrap_node(self, kind: SyntaxKind) -> NodeGuard {
640        self.builder.borrow_mut().wrap_node(self.checkpoint, kind);
641        NodeGuard::new(self.builder)
642    }
643}
644
645#[cfg(test)]
646mod tests {
647    use super::DEFAULT_RECURSION_LIMIT;
648    use crate::cst;
649    use crate::Error;
650    use crate::Parser;
651    use crate::SyntaxTree;
652    use expect_test::expect;
653
654    #[test]
655    fn limited_mid_node() {
656        let source = r#"
657            type Query {
658                field(arg1: Int, arg2: Int, arg3: Int, arg4: Int, arg5: Int, arg6: Int): Int
659            }
660        "#;
661        let parser = Parser::new(source)
662            // Make it stop inside the arguments list
663            .token_limit(18);
664        let tree = parser.parse();
665        let mut errors = tree.errors();
666        assert_eq!(
667            errors.next(),
668            Some(&Error::limit("token limit reached, aborting lexing", 65))
669        );
670        assert_eq!(errors.next(), None);
671    }
672
673    #[test]
674    fn multiple_limits() {
675        let source = r#"
676            query {
677                a {
678                    a {
679                        a {
680                            a
681                        }
682                    }
683                }
684            }
685        "#;
686
687        let parser = Parser::new(source).recursion_limit(10).token_limit(22);
688        let cst = parser.parse();
689        let errors = cst.errors().collect::<Vec<_>>();
690        assert_eq!(
691            errors,
692            &[&Error::limit("token limit reached, aborting lexing", 170),]
693        );
694
695        let parser = Parser::new(source).recursion_limit(3).token_limit(200);
696        let cst = parser.parse();
697        let errors = cst.errors().collect::<Vec<_>>();
698        assert_eq!(
699            errors,
700            &[&Error::limit("parser recursion limit reached", 121),]
701        );
702    }
703
704    #[test]
705    fn syntax_errors_and_limits() {
706        // Syntax errors before and after the limit
707        let source = r#"
708            type Query {
709                field(arg1: Int, missing_arg): Int
710                # limit reached here
711                field2: !String
712            } and then some garbage
713        "#;
714        let parser = Parser::new(source).token_limit(22);
715        let cst = parser.parse();
716        let mut errors = cst.errors();
717        assert_eq!(
718            errors.next(),
719            Some(&Error::with_loc("expected a Name", ")".to_string(), 70))
720        );
721        // index 113 is immediately after the comment, before the newline
722        assert_eq!(
723            errors.next(),
724            Some(&Error::limit("token limit reached, aborting lexing", 113))
725        );
726        assert_eq!(errors.next(), None);
727
728        let tree = expect![[r##"
729            DOCUMENT@0..113
730              WHITESPACE@0..13 "\n            "
731              OBJECT_TYPE_DEFINITION@13..76
732                type_KW@13..17 "type"
733                WHITESPACE@17..18 " "
734                NAME@18..23
735                  IDENT@18..23 "Query"
736                WHITESPACE@23..24 " "
737                FIELDS_DEFINITION@24..76
738                  L_CURLY@24..25 "{"
739                  WHITESPACE@25..42 "\n                "
740                  FIELD_DEFINITION@42..76
741                    NAME@42..47
742                      IDENT@42..47 "field"
743                    ARGUMENTS_DEFINITION@47..71
744                      L_PAREN@47..48 "("
745                      INPUT_VALUE_DEFINITION@48..57
746                        NAME@48..52
747                          IDENT@48..52 "arg1"
748                        COLON@52..53 ":"
749                        WHITESPACE@53..54 " "
750                        NAMED_TYPE@54..57
751                          NAME@54..57
752                            IDENT@54..57 "Int"
753                      COMMA@57..58 ","
754                      WHITESPACE@58..59 " "
755                      INPUT_VALUE_DEFINITION@59..70
756                        NAME@59..70
757                          IDENT@59..70 "missing_arg"
758                      R_PAREN@70..71 ")"
759                    COLON@71..72 ":"
760                    WHITESPACE@72..73 " "
761                    NAMED_TYPE@73..76
762                      NAME@73..76
763                        IDENT@73..76 "Int"
764              WHITESPACE@76..93 "\n                "
765              COMMENT@93..113 "# limit reached here"
766        "##]];
767        tree.assert_eq(&format!("{:#?}", cst.document().syntax));
768    }
769
770    #[test]
771    fn tree_with_syntax_errors() {
772        use crate::cst::Definition;
773
774        // Some arbitrary token spam in incorrect places--this test uses
775        // valid tokens only
776        let source = r#"
777            garbage type Query implements X {
778                field(arg: Int): Int
779            } garbage :,, (|) interface X {}
780        "#;
781        let cst = Parser::new(source).parse();
782
783        let mut definitions = cst.document().definitions();
784        let query_def = definitions.next().unwrap();
785        let interface_def = definitions.next().unwrap();
786        assert_eq!(definitions.next(), None);
787        assert!(matches!(query_def, Definition::ObjectTypeDefinition(_)));
788        assert!(matches!(
789            interface_def,
790            Definition::InterfaceTypeDefinition(_)
791        ));
792    }
793
794    #[test]
795    fn token_limit() {
796        let cst = Parser::new("type Query { a a a a a a a a a }")
797            .token_limit(100)
798            .parse();
799        // token count includes EOF token.
800        assert_eq!(cst.token_limit().high, 26);
801    }
802
803    #[test]
804    // single char v.s. multiple is less important than consistency between consecutive calls:
805    #[allow(clippy::single_char_add_str)]
806    fn recursion_limit() {
807        // A factor 50 makes this test to run in ~1 second on a laptop from 2021,
808        // in unoptimized mode
809        const SMASH_THE_STACK_FACTOR: usize = 50;
810
811        wide(2, |ast| assert_eq!(ast.errors, []));
812        wide(DEFAULT_RECURSION_LIMIT - 2, |ast| {
813            assert_eq!(ast.errors.len(), 0, "{:?}", ast.errors[0])
814        });
815        wide(DEFAULT_RECURSION_LIMIT * SMASH_THE_STACK_FACTOR, |_ast| {
816            // TODO: remove use of recursion to parse repetition and uncomment:
817            // assert_eq!(ast.errors.len(), 0)
818        });
819
820        deep(2, |ast| assert_eq!(ast.errors, []));
821        deep(DEFAULT_RECURSION_LIMIT - 2, |ast| {
822            assert_eq!(ast.errors.len(), 0, "{:?}", ast.errors[0])
823        });
824        deep(DEFAULT_RECURSION_LIMIT * SMASH_THE_STACK_FACTOR, |ast| {
825            // Parsing nested structures without recursion on the call stack
826            // is possible but not as easy as it would require an explicit stack.
827
828            // The recursion limit triggered and protected against stack overflow.
829            assert_eq!(ast.errors.len(), 1);
830            assert!(ast.errors[0].message.contains("recursion limit reached"));
831        });
832
833        fn deep(count: usize, each: impl Fn(SyntaxTree)) {
834            let check = |input: String| each(Parser::new(&input).parse());
835
836            // Nested list type
837            let mut doc = String::new();
838            doc.push_str("type O { field: ");
839            doc.push_str(&"[".repeat(count));
840            doc.push_str("Int");
841            doc.push_str(&"]".repeat(count));
842            doc.push_str(" }");
843            check(doc);
844
845            // Nested list value
846            let mut doc = String::new();
847            doc.push_str("type O { field(arg: T = ");
848            doc.push_str(&"[".repeat(count));
849            doc.push_str("0");
850            doc.push_str(&"]".repeat(count));
851            doc.push_str("): Int }");
852            check(doc);
853
854            // Nested object value
855            let mut doc = String::new();
856            doc.push_str("type O { field(arg: T = ");
857            doc.push_str(&"{f: ".repeat(count));
858            doc.push_str("0");
859            doc.push_str(&"}".repeat(count));
860            doc.push_str("): Int }");
861            check(doc);
862
863            // Nested selection set
864            let mut doc = String::new();
865            doc.push_str("query { ");
866            doc.push_str(&"f { ".repeat(count));
867            doc.push_str("f ");
868            doc.push_str(&"}".repeat(count));
869            doc.push_str("}");
870            check(doc);
871        }
872
873        fn wide(count: usize, each: impl Fn(SyntaxTree)) {
874            let check = |input: String| each(Parser::new(&input).parse());
875
876            // Repeated top-level definitions
877            let mut doc = String::new();
878            doc.push_str(&"directive @d on FIELD ".repeat(count));
879            check(doc);
880
881            // Repeated directive applications
882            let mut doc = String::new();
883            doc.push_str("scalar Url");
884            doc.push_str(&" @d".repeat(count));
885            check(doc);
886
887            // Repeated root operation
888            let mut doc = String::new();
889            doc.push_str("schema {");
890            doc.push_str(&" query: Q".repeat(count));
891            doc.push_str(" }");
892            check(doc);
893
894            // Repeated implements interface
895            let mut doc = String::new();
896            doc.push_str("type O implements");
897            doc.push_str(&" & I".repeat(count));
898            check(doc);
899
900            // Repeated object type field
901            let mut doc = String::new();
902            doc.push_str("type O {");
903            doc.push_str(&" f: T".repeat(count));
904            doc.push_str("}");
905            check(doc);
906
907            // Repeated enum value field
908            let mut doc = String::new();
909            doc.push_str("enum E {");
910            doc.push_str(&" V".repeat(count));
911            doc.push_str("}");
912            check(doc);
913
914            // Repeated union member
915            let mut doc = String::new();
916            doc.push_str("union U = ");
917            doc.push_str(&" | T".repeat(count));
918            check(doc);
919
920            // Repeated input object type field
921            let mut doc = String::new();
922            doc.push_str("input In {");
923            doc.push_str(&" f: T".repeat(count));
924            doc.push_str("}");
925            check(doc);
926
927            // Repeated input object value field
928            let mut doc = String::new();
929            doc.push_str("type O { field(arg: T = {");
930            doc.push_str(&" f: 0".repeat(count));
931            doc.push_str(" }): Int }");
932            check(doc);
933
934            // Repeated list value item
935            let mut doc = String::new();
936            doc.push_str("type O { field(arg: T = [");
937            doc.push_str(&" 0,".repeat(count));
938            doc.push_str(" ]): Int }");
939            check(doc);
940
941            // Repeated field argument definitions
942            let mut doc = String::new();
943            doc.push_str("type O { field(");
944            doc.push_str(&"a: T ".repeat(count));
945            doc.push_str("): Int }");
946            check(doc);
947
948            // Repeated field selection
949            let mut doc = String::new();
950            doc.push_str("query {");
951            doc.push_str(&" f".repeat(count));
952            doc.push_str(" }");
953            check(doc);
954
955            // Repeated field argument
956            let mut doc = String::new();
957            doc.push_str("query { f(");
958            doc.push_str(&" a: 0".repeat(count));
959            doc.push_str(") }");
960            check(doc);
961
962            // Repeated variable definition
963            let mut doc = String::new();
964            doc.push_str("query Q(");
965            doc.push_str(&" $v: Int".repeat(count));
966            doc.push_str(" ) { f }");
967            check(doc);
968        }
969    }
970
971    #[test]
972    fn parse_field_set() {
973        let source = r#"{ a }"#;
974
975        let parser = Parser::new(source);
976        let cst: SyntaxTree<cst::SelectionSet> = parser.parse_selection_set();
977        let errors = cst.errors().collect::<Vec<_>>();
978        assert_eq!(errors.len(), 0);
979
980        let sel_set: cst::SelectionSet = cst.field_set();
981        let _ = sel_set.selections().map(|sel| {
982            if let cst::Selection::Field(f) = sel {
983                assert_eq!(f.name().unwrap().text().as_ref(), "a")
984            } else {
985                panic!("no field a in field set selection")
986            }
987        });
988
989        let source = r#"a { a }"#;
990
991        let parser = Parser::new(source);
992        let cst: SyntaxTree<cst::SelectionSet> = parser.parse_selection_set();
993        let errors = cst.errors().collect::<Vec<_>>();
994        assert_eq!(errors.len(), 0);
995
996        let sel_set: cst::SelectionSet = cst.field_set();
997        let _ = sel_set.selections().map(|sel| {
998            if let cst::Selection::Field(f) = sel {
999                assert_eq!(f.name().unwrap().text().as_ref(), "a")
1000            } else {
1001                panic!("no field a in field set selection")
1002            }
1003        });
1004    }
1005
1006    #[test]
1007    fn no_infinite_loop() {
1008        let source = r#"{ ..."#;
1009        let parser = Parser::new(source).token_limit(3);
1010        let _cst = parser.parse();
1011    }
1012
1013    /// Helper to check all CST nodes/tokens have valid UTF-8 character boundaries.
1014    ///
1015    /// Prior to #1023, lexing errors were not recorded in the CST, so any tokens
1016    /// lexed after the error would have incorrect position information. This could
1017    /// even lead to panics if the incorrect positions were not on a char boundary.
1018    fn check_char_boundaries(node: &crate::SyntaxNode, source: &str) {
1019        let range = node.text_range();
1020        let start: usize = range.start().into();
1021        let end: usize = range.end().into();
1022        assert!(
1023            source.is_char_boundary(start),
1024            "Node {:?} start {} is not a char boundary",
1025            node.kind(),
1026            start
1027        );
1028        assert!(
1029            source.is_char_boundary(end),
1030            "Node {:?} end {} is not a char boundary",
1031            node.kind(),
1032            end
1033        );
1034
1035        for child in node.children_with_tokens() {
1036            match child {
1037                rowan::NodeOrToken::Node(n) => check_char_boundaries(&n, source),
1038                rowan::NodeOrToken::Token(t) => {
1039                    let range = t.text_range();
1040                    let start: usize = range.start().into();
1041                    let end: usize = range.end().into();
1042                    assert!(
1043                        source.is_char_boundary(start),
1044                        "Token {:?} start {} is not a char boundary",
1045                        t.kind(),
1046                        start
1047                    );
1048                    assert!(
1049                        source.is_char_boundary(end),
1050                        "Token {:?} end {} is not a char boundary",
1051                        t.kind(),
1052                        end
1053                    );
1054                }
1055            }
1056        }
1057    }
1058
1059    /// Unexpected CJK characters (3-byte UTF-8) should not throw off byte
1060    /// positions of later tokens.
1061    #[test]
1062    fn lexer_error_cjk_preserves_byte_positions() {
1063        use crate::cst::CstNode;
1064
1065        let source = "type Query { field: 中文类型 }";
1066        let cst = Parser::new(source).parse();
1067        assert!(!cst.errors().collect::<Vec<_>>().is_empty());
1068        check_char_boundaries(cst.document().syntax(), source);
1069    }
1070
1071    /// Mixed ASCII and multi-byte errors should not throw off byte
1072    /// positions of later tokens.
1073    #[test]
1074    fn lexer_error_mixed_preserves_byte_positions() {
1075        use crate::cst::CstNode;
1076
1077        let source = "type Query { f1: @#$ f2: 日本語 f3: !!! }";
1078        let cst = Parser::new(source).parse();
1079        assert!(!cst.errors().collect::<Vec<_>>().is_empty());
1080        check_char_boundaries(cst.document().syntax(), source);
1081    }
1082
1083    /// Unexpected emoji characters should not throw off byte
1084    /// positions of later tokens.
1085    #[test]
1086    fn lexer_error_emoji_preserves_byte_positions() {
1087        use crate::cst::CstNode;
1088
1089        let source = "type Query { field: 🚀🌍 }";
1090        let cst = Parser::new(source).parse();
1091        assert!(!cst.errors().collect::<Vec<_>>().is_empty());
1092        check_char_boundaries(cst.document().syntax(), source);
1093    }
1094}