apollo_parser/parser/
mod.rs

1mod generated;
2mod language;
3mod syntax_tree;
4mod token_text;
5
6pub(crate) mod grammar;
7
8use crate::cst::Document;
9use crate::cst::SelectionSet;
10use crate::cst::Type;
11use crate::lexer::Lexer;
12use crate::Error;
13use crate::LimitTracker;
14use crate::Token;
15use crate::TokenKind;
16pub use generated::syntax_kind::SyntaxKind;
17pub use language::SyntaxElement;
18pub use language::SyntaxNode;
19pub use language::SyntaxNodeChildren;
20pub use language::SyntaxNodePtr;
21pub use language::SyntaxToken;
22use std::cell::RefCell;
23use std::ops::ControlFlow;
24use std::rc::Rc;
25pub use syntax_tree::SyntaxTree;
26// pub(crate) use language::GraphQLLanguage;
27pub(crate) use syntax_tree::SyntaxTreeBuilder;
28pub(crate) use token_text::TokenText;
29
30/// Parse GraphQL schemas or queries into a typed CST.
31///
32/// ## Example
33///
34/// The API to parse a query or a schema is the same, as the parser currently
35/// accepts a `&str`. Here is an example of parsing a query:
36/// ```rust
37/// use apollo_parser::Parser;
38///
39/// let query = "
40/// {
41///     animal
42///     ...snackSelection
43///     ... on Pet {
44///       playmates {
45///         count
46///       }
47///     }
48/// }
49/// ";
50/// // Create a new instance of a parser given a query above.
51/// let parser = Parser::new(query);
52/// // Parse the query, and return a SyntaxTree.
53/// let cst = parser.parse();
54/// // Check that are no errors. These are not part of the CST.
55/// assert_eq!(0, cst.errors().len());
56///
57/// // Get the document root node
58/// let doc = cst.document();
59/// // ... continue
60/// ```
61///
62/// Here is how you'd parse a schema:
63/// ```rust
64/// use apollo_parser::Parser;
65/// let core_schema = r#"
66/// schema @core(feature: "https://specs.apollo.dev/join/v0.1") {
67///   query: Query
68///   mutation: Mutation
69/// }
70///
71/// enum join__Graph {
72///   ACCOUNTS @join__graph(name: "accounts")
73/// }
74/// "#;
75/// let parser = Parser::new(core_schema);
76/// let cst = parser.parse();
77///
78/// assert_eq!(0, cst.errors().len());
79///
80/// let document = cst.document();
81/// ```
82#[derive(Debug)]
83pub struct Parser<'input> {
84    lexer: Lexer<'input>,
85    /// Store one lookahead token so we don't need to reparse things as much.
86    current_token: Option<Token<'input>>,
87    /// The in-progress tree.
88    builder: Rc<RefCell<SyntaxTreeBuilder>>,
89    /// Tokens that should be added to the tree, in source order.
90    /// This includes both ignored tokens (whitespace/comments/commas) and error tokens.
91    pending: Vec<PendingToken<'input>>,
92    /// The list of syntax errors we've accumulated so far.
93    errors: Vec<crate::Error>,
94    /// The limit to apply to parsing.
95    recursion_limit: LimitTracker,
96    /// Accept parsing errors?
97    accept_errors: bool,
98}
99
100/// A pending token to be added to the CST - either ignored (whitespace/comment/comma) or an error.
101#[derive(Debug)]
102enum PendingToken<'input> {
103    Ignored(Token<'input>),
104    /// Error token data (owned because Error is consumed after extracting data)
105    Error(String),
106}
107
108/// Chosen experimentally with:
109///
110/// * apollo-parser 0.6.2+ (e05abbf4f)
111/// * Rust 1.72.1
112/// * aarch64-apple-darwin
113/// * Unoptimized (default `cargo test` profile)
114///
115/// This couldn’t be set to much more than 2000 before the `recursion_limit` test below
116/// hit "fatal runtime error: stack overflow"
117///
118/// Defaulting to around a quarter of that, to keep a comfortable safety margin.
119const DEFAULT_RECURSION_LIMIT: usize = 500;
120
121impl<'input> Parser<'input> {
122    /// Create a new instance of a parser given an input string.
123    pub fn new(input: &'input str) -> Self {
124        let lexer = Lexer::new(input);
125
126        Self {
127            lexer,
128            current_token: None,
129            builder: Rc::new(RefCell::new(SyntaxTreeBuilder::new())),
130            pending: vec![],
131            errors: Vec::new(),
132            recursion_limit: LimitTracker::new(DEFAULT_RECURSION_LIMIT),
133            accept_errors: true,
134        }
135    }
136
137    /// Configure the recursion limit to use while parsing.
138    pub fn recursion_limit(mut self, recursion_limit: usize) -> Self {
139        self.recursion_limit = LimitTracker::new(recursion_limit);
140        self
141    }
142
143    /// Configure the limit on the number of tokens to parse. If an input document
144    /// is too big, parsing will be aborted.
145    ///
146    /// By default, there is no limit.
147    pub fn token_limit(mut self, token_limit: usize) -> Self {
148        self.lexer = self.lexer.with_limit(token_limit);
149        self
150    }
151
152    /// Parse the current tokens.
153    pub fn parse(mut self) -> SyntaxTree<Document> {
154        grammar::document::document(&mut self);
155
156        let builder = Rc::try_unwrap(self.builder)
157            .expect("More than one reference to builder left")
158            .into_inner();
159        let builder =
160            builder.finish_document(self.errors, self.recursion_limit, self.lexer.limit_tracker);
161
162        match builder {
163            syntax_tree::SyntaxTreeWrapper::Document(tree) => tree,
164            syntax_tree::SyntaxTreeWrapper::Type(_)
165            | syntax_tree::SyntaxTreeWrapper::FieldSet(_) => {
166                unreachable!("parse constructor can only construct a document")
167            }
168        }
169    }
170
171    /// Parse a selection set with optional outer braces.
172    /// This is the expected format of the string value of the `fields` argument of some directives
173    /// like [`@requires`](https://www.apollographql.com/docs/federation/federated-types/federated-directives/#requires).
174    pub fn parse_selection_set(mut self) -> SyntaxTree<SelectionSet> {
175        grammar::selection::field_set(&mut self);
176
177        let builder = Rc::try_unwrap(self.builder)
178            .expect("More than one reference to builder left")
179            .into_inner();
180        let builder = builder.finish_selection_set(
181            self.errors,
182            self.recursion_limit,
183            self.lexer.limit_tracker,
184        );
185
186        match builder {
187            syntax_tree::SyntaxTreeWrapper::FieldSet(tree) => tree,
188            syntax_tree::SyntaxTreeWrapper::Document(_)
189            | syntax_tree::SyntaxTreeWrapper::Type(_) => {
190                unreachable!("parse_selection_set constructor can only construct a selection set")
191            }
192        }
193    }
194
195    /// Parse a GraphQL type.
196    /// This is the expected format of the string value of the `type` argument
197    /// of some directives like [`@field`](https://specs.apollo.dev/join/v0.3/#@field).
198    pub fn parse_type(mut self) -> SyntaxTree<Type> {
199        grammar::ty::ty(&mut self);
200
201        let builder = Rc::try_unwrap(self.builder)
202            .expect("More than one reference to builder left")
203            .into_inner();
204        let builder =
205            builder.finish_type(self.errors, self.recursion_limit, self.lexer.limit_tracker);
206
207        match builder {
208            syntax_tree::SyntaxTreeWrapper::Type(tree) => tree,
209            syntax_tree::SyntaxTreeWrapper::FieldSet(_)
210            | syntax_tree::SyntaxTreeWrapper::Document(_) => {
211                unreachable!("parse_type constructor can only construct a type")
212            }
213        }
214    }
215
216    /// Check if the current token is `kind`.
217    pub(crate) fn at(&mut self, token: TokenKind) -> bool {
218        if let Some(t) = self.peek() {
219            if t == token {
220                return true;
221            }
222            return false;
223        }
224
225        false
226    }
227
228    /// Consume a token and add it to the syntax tree. Queue any ignored tokens that follow.
229    pub(crate) fn bump(&mut self, kind: SyntaxKind) {
230        self.eat(kind);
231        self.skip_ignored();
232    }
233
234    /// Consume and skip ignored tokens from the lexer.
235    pub(crate) fn skip_ignored(&mut self) {
236        while let Some(TokenKind::Comment | TokenKind::Whitespace | TokenKind::Comma) = self.peek()
237        {
238            let token = self.pop();
239            self.pending.push(PendingToken::Ignored(token));
240        }
241    }
242
243    /// Push pending tokens (ignored + errors) to the current node.
244    pub(crate) fn push_ignored(&mut self) {
245        let pending = std::mem::take(&mut self.pending);
246        for item in pending {
247            match item {
248                PendingToken::Ignored(token) => {
249                    let syntax_kind = match token.kind {
250                        TokenKind::Comment => SyntaxKind::COMMENT,
251                        TokenKind::Whitespace => SyntaxKind::WHITESPACE,
252                        TokenKind::Comma => SyntaxKind::COMMA,
253                        _ => unreachable!(),
254                    };
255                    self.push_token(syntax_kind, token);
256                }
257                PendingToken::Error(data) => {
258                    self.builder.borrow_mut().token(SyntaxKind::ERROR, &data);
259                }
260            }
261        }
262    }
263
264    /// Get current token's data.
265    pub(crate) fn current(&mut self) -> Option<&Token<'input>> {
266        self.peek_token()
267    }
268
269    /// Consume a token from the lexer and add it to the syntax tree.
270    fn eat(&mut self, kind: SyntaxKind) {
271        self.push_ignored();
272        if self.current().is_none() {
273            return;
274        }
275
276        let token = self.pop();
277        self.push_token(kind, token);
278    }
279
280    /// Create a parser limit error and push it into the error vector.
281    ///
282    /// Note: After a limit error is pushed, any further errors pushed
283    /// are silently discarded.
284    pub(crate) fn limit_err<S: Into<String>>(&mut self, message: S) {
285        let current = if let Some(current) = self.current() {
286            current
287        } else {
288            return;
289        };
290        // this needs to be the computed location
291        let err = Error::limit(message, current.index());
292        self.push_err(err);
293        self.accept_errors = false;
294    }
295
296    /// Create a parser error at a given location and push it into the error vector.
297    pub(crate) fn err_at_token(&mut self, current: &Token<'_>, message: &str) {
298        let err = if current.kind == TokenKind::Eof {
299            Error::eof(message, current.index())
300        } else {
301            // this needs to be the computed location
302            Error::with_loc(message, current.data().to_string(), current.index())
303        };
304        self.push_err(err);
305    }
306
307    /// Create a parser error at the current location and push it into the error vector.
308    pub(crate) fn err(&mut self, message: &str) {
309        let current = if let Some(current) = self.current() {
310            current
311        } else {
312            return;
313        };
314        let err = if current.kind == TokenKind::Eof {
315            Error::eof(message, current.index())
316        } else {
317            // this needs to be the computed location
318            Error::with_loc(message, current.data().to_string(), current.index())
319        };
320        self.push_err(err);
321    }
322
323    /// Create a parser error at the current location and eat the responsible token.
324    pub(crate) fn err_and_pop(&mut self, message: &str) {
325        self.push_ignored();
326        if self.current().is_none() {
327            return;
328        }
329
330        let current = self.pop();
331        let err = if current.kind == TokenKind::Eof {
332            Error::eof(message, current.index())
333        } else {
334            // this needs to be the computed location
335            Error::with_loc(message, current.data().to_string(), current.index())
336        };
337
338        // Keep the error in the parse tree for position information
339        self.push_token(SyntaxKind::ERROR, current);
340        self.push_err(err);
341
342        // we usually skip ignored tokens after we pop each token, so make sure we also do
343        // this when we create an error and pop.
344        self.skip_ignored();
345    }
346
347    /// Consume the next token if it is `kind` or emit an error
348    /// otherwise.
349    pub(crate) fn expect(&mut self, token: TokenKind, kind: SyntaxKind) {
350        let Some(current) = self.current() else {
351            return;
352        };
353        let is_eof = current.kind == TokenKind::Eof;
354        let data = current.data();
355        let index = current.index();
356
357        if self.at(token) {
358            self.bump(kind);
359            return;
360        }
361
362        let err = if is_eof {
363            let message = format!("expected {kind:?}, got EOF");
364            Error::eof(message, index)
365        } else {
366            let message = format!("expected {kind:?}, got {data}");
367            Error::with_loc(message, data.to_string(), index)
368        };
369
370        self.push_err(err);
371    }
372
373    /// Push an error to parser's error Vec.
374    pub(crate) fn push_err(&mut self, err: crate::error::Error) {
375        // If the parser has reached a limit, self.accept_errors will
376        // be set to false so that we do not push any more errors.
377        //
378        // This is because the limit activation will result
379        // in an early termination which will cause the parser to
380        // report "errors" which aren't really errors and thus
381        // must be ignored.
382        if self.accept_errors {
383            self.errors.push(err);
384        }
385    }
386
387    /// Gets the next token from the lexer.
388    ///
389    /// When lexer errors occur (e.g., unexpected multibyte characters), this method
390    /// queues their data to be added as ERROR tokens to the CST later. This ensures
391    /// rowan tracks the correct byte positions without "gaps" in the tree.
392    fn next_token(&mut self) -> Option<Token<'input>> {
393        for res in &mut self.lexer {
394            match res {
395                Err(err) => {
396                    if err.is_limit() {
397                        self.accept_errors = false;
398                    }
399                    // Queue the error data to be added to the CST later.
400                    let data = err.data();
401                    if !data.is_empty() {
402                        self.pending.push(PendingToken::Error(data.to_owned()));
403                    }
404                    self.errors.push(err);
405                }
406                Ok(token) => {
407                    return Some(token);
408                }
409            }
410        }
411
412        None
413    }
414
415    /// Consume a token from the lexer.
416    pub(crate) fn pop(&mut self) -> Token<'input> {
417        if let Some(token) = self.current_token.take() {
418            return token;
419        }
420
421        self.next_token()
422            .expect("Could not pop a token from the lexer")
423    }
424
425    /// Insert a token into the syntax tree.
426    pub(crate) fn push_token(&mut self, kind: SyntaxKind, token: Token) {
427        self.builder.borrow_mut().token(kind, token.data())
428    }
429
430    /// Start a node and make it current.
431    ///
432    /// This also creates a NodeGuard under the hood that will automatically
433    /// close the node(via Drop) when the guard goes out of scope.
434    /// This allows for us to not have to always close nodes when we are parsing
435    /// tokens.
436    pub(crate) fn start_node(&mut self, kind: SyntaxKind) -> NodeGuard {
437        self.push_ignored();
438
439        self.builder.borrow_mut().start_node(kind);
440        let guard = NodeGuard::new(self.builder.clone());
441        self.skip_ignored();
442
443        guard
444    }
445
446    /// Set a checkpoint for *maybe* wrapping the following parse tree in some
447    /// other node.
448    pub(crate) fn checkpoint_node(&mut self) -> Checkpoint {
449        // We may start a new node here in the future, so let's process
450        // our preceding whitespace first
451        self.push_ignored();
452
453        let checkpoint = self.builder.borrow().checkpoint();
454        Checkpoint::new(self.builder.clone(), checkpoint)
455    }
456
457    /// Peek the next Token and return its TokenKind.
458    pub(crate) fn peek(&mut self) -> Option<TokenKind> {
459        self.peek_token().map(|token| token.kind())
460    }
461
462    /// Repeatedly peek at the next token and call the parse function. The parse function must
463    /// advance parsing or break out of the loop.
464    pub(crate) fn peek_while(
465        &mut self,
466        mut run: impl FnMut(&mut Parser, TokenKind) -> ControlFlow<()>,
467    ) {
468        while let Some(kind) = self.peek() {
469            let before = self.current_token.clone();
470            match run(self, kind) {
471                ControlFlow::Break(()) => break,
472                ControlFlow::Continue(()) => {
473                    debug_assert!(
474                        before != self.current_token,
475                        "peek_while() iteration must advance parsing"
476                    );
477                }
478            }
479        }
480    }
481
482    /// Call the parse function while the next token is of the expected kind. The parse function
483    /// must consume the peeked token.
484    pub(crate) fn peek_while_kind(&mut self, expect: TokenKind, mut run: impl FnMut(&mut Parser)) {
485        while let Some(kind) = self.peek() {
486            if kind != expect {
487                break;
488            }
489
490            let before = self.current_token.clone();
491            run(self);
492            debug_assert!(
493                before != self.current_token,
494                "peek_while_kind() iteration must advance parsing"
495            );
496        }
497    }
498
499    /// Call the parse function, separated by a token given in `separator`. This parses at least
500    /// one item. The first item may optionally be prefixed by an initial separator.
501    pub(crate) fn parse_separated_list(
502        &mut self,
503        separator: TokenKind,
504        separator_syntax: SyntaxKind,
505        mut run: impl FnMut(&mut Parser),
506    ) {
507        if matches!(self.peek(), Some(kind) if kind == separator) {
508            self.bump(separator_syntax);
509        }
510
511        run(self);
512
513        self.peek_while_kind(separator, |p| {
514            p.bump(separator_syntax);
515            run(p);
516        });
517    }
518
519    /// Peek the next Token and return it.
520    pub(crate) fn peek_token(&mut self) -> Option<&Token<'input>> {
521        if self.current_token.is_none() {
522            self.current_token = self.next_token();
523        }
524        self.current_token.as_ref()
525    }
526
527    /// Peek Token `n` and return it.
528    pub(crate) fn peek_token_n(&self, n: usize) -> Option<Token<'input>> {
529        self.peek_n_inner(n)
530    }
531
532    /// Peek Token `n` and return its TokenKind.
533    pub(crate) fn peek_n(&self, n: usize) -> Option<TokenKind> {
534        self.peek_n_inner(n).map(|token| token.kind())
535    }
536
537    fn peek_n_inner(&self, n: usize) -> Option<Token<'input>> {
538        self.current_token
539            .iter()
540            .cloned()
541            .map(Result::Ok)
542            .chain(self.lexer.clone())
543            .filter_map(Result::ok)
544            .filter(|token| !matches!(token.kind(), TokenKind::Whitespace | TokenKind::Comment))
545            .nth(n - 1)
546    }
547
548    /// Peek next Token's `data` property.
549    pub(crate) fn peek_data(&mut self) -> Option<&'input str> {
550        self.peek_token().map(|token| token.data())
551    }
552
553    /// Peek `n` Token's `data` property.
554    pub(crate) fn peek_data_n(&self, n: usize) -> Option<&'input str> {
555        self.peek_token_n(n).map(|token| token.data())
556    }
557}
558
559/// A wrapper around the SyntaxTreeBuilder used to self-close nodes.
560///
561/// When the NodeGuard goes out of scope, it automatically runs `finish_node()`
562/// on the SyntaxTreeBuilder. This ensures that nodes are not forgotten to be
563/// closed.
564#[must_use]
565pub(crate) struct NodeGuard {
566    builder: Rc<RefCell<SyntaxTreeBuilder>>,
567}
568
569impl NodeGuard {
570    fn new(builder: Rc<RefCell<SyntaxTreeBuilder>>) -> Self {
571        Self { builder }
572    }
573
574    pub(crate) fn finish_node(self) {
575        drop(self);
576    }
577}
578
579impl Drop for NodeGuard {
580    fn drop(&mut self) {
581        self.builder.borrow_mut().finish_node();
582    }
583}
584
585/// A rowan Checkpoint that can self-close the new wrapper node if required.
586pub(crate) struct Checkpoint {
587    builder: Rc<RefCell<SyntaxTreeBuilder>>,
588    checkpoint: rowan::Checkpoint,
589}
590
591impl Checkpoint {
592    fn new(builder: Rc<RefCell<SyntaxTreeBuilder>>, checkpoint: rowan::Checkpoint) -> Self {
593        Self {
594            builder,
595            checkpoint,
596        }
597    }
598
599    /// Wrap the nodes that were parsed since setting this checkpoint in a new parent node of kind
600    /// `kind`. Returns a NodeGuard that when dropped, finishes this new parent node. More children
601    /// can be added to this new node in the mean time.
602    pub(crate) fn wrap_node(self, kind: SyntaxKind) -> NodeGuard {
603        self.builder.borrow_mut().wrap_node(self.checkpoint, kind);
604        NodeGuard::new(self.builder)
605    }
606}
607
608#[cfg(test)]
609mod tests {
610    use super::DEFAULT_RECURSION_LIMIT;
611    use crate::cst;
612    use crate::Error;
613    use crate::Parser;
614    use crate::SyntaxTree;
615    use expect_test::expect;
616
617    #[test]
618    fn limited_mid_node() {
619        let source = r#"
620            type Query {
621                field(arg1: Int, arg2: Int, arg3: Int, arg4: Int, arg5: Int, arg6: Int): Int
622            }
623        "#;
624        let parser = Parser::new(source)
625            // Make it stop inside the arguments list
626            .token_limit(18);
627        let tree = parser.parse();
628        let mut errors = tree.errors();
629        assert_eq!(
630            errors.next(),
631            Some(&Error::limit("token limit reached, aborting lexing", 65))
632        );
633        assert_eq!(errors.next(), None);
634    }
635
636    #[test]
637    fn multiple_limits() {
638        let source = r#"
639            query {
640                a {
641                    a {
642                        a {
643                            a
644                        }
645                    }
646                }
647            }
648        "#;
649
650        let parser = Parser::new(source).recursion_limit(10).token_limit(22);
651        let cst = parser.parse();
652        let errors = cst.errors().collect::<Vec<_>>();
653        assert_eq!(
654            errors,
655            &[&Error::limit("token limit reached, aborting lexing", 170),]
656        );
657
658        let parser = Parser::new(source).recursion_limit(3).token_limit(200);
659        let cst = parser.parse();
660        let errors = cst.errors().collect::<Vec<_>>();
661        assert_eq!(
662            errors,
663            &[&Error::limit("parser recursion limit reached", 121),]
664        );
665    }
666
667    #[test]
668    fn syntax_errors_and_limits() {
669        // Syntax errors before and after the limit
670        let source = r#"
671            type Query {
672                field(arg1: Int, missing_arg): Int
673                # limit reached here
674                field2: !String
675            } and then some garbage
676        "#;
677        let parser = Parser::new(source).token_limit(22);
678        let cst = parser.parse();
679        let mut errors = cst.errors();
680        assert_eq!(
681            errors.next(),
682            Some(&Error::with_loc("expected a Name", ")".to_string(), 70))
683        );
684        // index 113 is immediately after the comment, before the newline
685        assert_eq!(
686            errors.next(),
687            Some(&Error::limit("token limit reached, aborting lexing", 113))
688        );
689        assert_eq!(errors.next(), None);
690
691        let tree = expect![[r##"
692            DOCUMENT@0..113
693              WHITESPACE@0..13 "\n            "
694              OBJECT_TYPE_DEFINITION@13..76
695                type_KW@13..17 "type"
696                WHITESPACE@17..18 " "
697                NAME@18..23
698                  IDENT@18..23 "Query"
699                WHITESPACE@23..24 " "
700                FIELDS_DEFINITION@24..76
701                  L_CURLY@24..25 "{"
702                  WHITESPACE@25..42 "\n                "
703                  FIELD_DEFINITION@42..76
704                    NAME@42..47
705                      IDENT@42..47 "field"
706                    ARGUMENTS_DEFINITION@47..71
707                      L_PAREN@47..48 "("
708                      INPUT_VALUE_DEFINITION@48..57
709                        NAME@48..52
710                          IDENT@48..52 "arg1"
711                        COLON@52..53 ":"
712                        WHITESPACE@53..54 " "
713                        NAMED_TYPE@54..57
714                          NAME@54..57
715                            IDENT@54..57 "Int"
716                      COMMA@57..58 ","
717                      WHITESPACE@58..59 " "
718                      INPUT_VALUE_DEFINITION@59..70
719                        NAME@59..70
720                          IDENT@59..70 "missing_arg"
721                      R_PAREN@70..71 ")"
722                    COLON@71..72 ":"
723                    WHITESPACE@72..73 " "
724                    NAMED_TYPE@73..76
725                      NAME@73..76
726                        IDENT@73..76 "Int"
727              WHITESPACE@76..93 "\n                "
728              COMMENT@93..113 "# limit reached here"
729        "##]];
730        tree.assert_eq(&format!("{:#?}", cst.document().syntax));
731    }
732
733    #[test]
734    fn tree_with_syntax_errors() {
735        use crate::cst::Definition;
736
737        // Some arbitrary token spam in incorrect places--this test uses
738        // valid tokens only
739        let source = r#"
740            garbage type Query implements X {
741                field(arg: Int): Int
742            } garbage :,, (|) interface X {}
743        "#;
744        let cst = Parser::new(source).parse();
745
746        let mut definitions = cst.document().definitions();
747        let query_def = definitions.next().unwrap();
748        let interface_def = definitions.next().unwrap();
749        assert_eq!(definitions.next(), None);
750        assert!(matches!(query_def, Definition::ObjectTypeDefinition(_)));
751        assert!(matches!(
752            interface_def,
753            Definition::InterfaceTypeDefinition(_)
754        ));
755    }
756
757    #[test]
758    fn token_limit() {
759        let cst = Parser::new("type Query { a a a a a a a a a }")
760            .token_limit(100)
761            .parse();
762        // token count includes EOF token.
763        assert_eq!(cst.token_limit().high, 26);
764    }
765
766    #[test]
767    // single char v.s. multiple is less important than consistency between consecutive calls:
768    #[allow(clippy::single_char_add_str)]
769    fn recursion_limit() {
770        // A factor 50 makes this test to run in ~1 second on a laptop from 2021,
771        // in unoptimized mode
772        const SMASH_THE_STACK_FACTOR: usize = 50;
773
774        wide(2, |ast| assert_eq!(ast.errors, []));
775        wide(DEFAULT_RECURSION_LIMIT - 2, |ast| {
776            assert_eq!(ast.errors.len(), 0, "{:?}", ast.errors[0])
777        });
778        wide(DEFAULT_RECURSION_LIMIT * SMASH_THE_STACK_FACTOR, |_ast| {
779            // TODO: remove use of recursion to parse repetition and uncomment:
780            // assert_eq!(ast.errors.len(), 0)
781        });
782
783        deep(2, |ast| assert_eq!(ast.errors, []));
784        deep(DEFAULT_RECURSION_LIMIT - 2, |ast| {
785            assert_eq!(ast.errors.len(), 0, "{:?}", ast.errors[0])
786        });
787        deep(DEFAULT_RECURSION_LIMIT * SMASH_THE_STACK_FACTOR, |ast| {
788            // Parsing nested structures without recursion on the call stack
789            // is possible but not as easy as it would require an explicit stack.
790
791            // The recursion limit triggered and protected against stack overflow.
792            assert_eq!(ast.errors.len(), 1);
793            assert!(ast.errors[0].message.contains("recursion limit reached"));
794        });
795
796        fn deep(count: usize, each: impl Fn(SyntaxTree)) {
797            let check = |input: String| each(Parser::new(&input).parse());
798
799            // Nested list type
800            let mut doc = String::new();
801            doc.push_str("type O { field: ");
802            doc.push_str(&"[".repeat(count));
803            doc.push_str("Int");
804            doc.push_str(&"]".repeat(count));
805            doc.push_str(" }");
806            check(doc);
807
808            // Nested list value
809            let mut doc = String::new();
810            doc.push_str("type O { field(arg: T = ");
811            doc.push_str(&"[".repeat(count));
812            doc.push_str("0");
813            doc.push_str(&"]".repeat(count));
814            doc.push_str("): Int }");
815            check(doc);
816
817            // Nested object value
818            let mut doc = String::new();
819            doc.push_str("type O { field(arg: T = ");
820            doc.push_str(&"{f: ".repeat(count));
821            doc.push_str("0");
822            doc.push_str(&"}".repeat(count));
823            doc.push_str("): Int }");
824            check(doc);
825
826            // Nested selection set
827            let mut doc = String::new();
828            doc.push_str("query { ");
829            doc.push_str(&"f { ".repeat(count));
830            doc.push_str("f ");
831            doc.push_str(&"}".repeat(count));
832            doc.push_str("}");
833            check(doc);
834        }
835
836        fn wide(count: usize, each: impl Fn(SyntaxTree)) {
837            let check = |input: String| each(Parser::new(&input).parse());
838
839            // Repeated top-level definitions
840            let mut doc = String::new();
841            doc.push_str(&"directive @d on FIELD ".repeat(count));
842            check(doc);
843
844            // Repeated directive applications
845            let mut doc = String::new();
846            doc.push_str("scalar Url");
847            doc.push_str(&" @d".repeat(count));
848            check(doc);
849
850            // Repeated root operation
851            let mut doc = String::new();
852            doc.push_str("schema {");
853            doc.push_str(&" query: Q".repeat(count));
854            doc.push_str(" }");
855            check(doc);
856
857            // Repeated implements interface
858            let mut doc = String::new();
859            doc.push_str("type O implements");
860            doc.push_str(&" & I".repeat(count));
861            check(doc);
862
863            // Repeated object type field
864            let mut doc = String::new();
865            doc.push_str("type O {");
866            doc.push_str(&" f: T".repeat(count));
867            doc.push_str("}");
868            check(doc);
869
870            // Repeated enum value field
871            let mut doc = String::new();
872            doc.push_str("enum E {");
873            doc.push_str(&" V".repeat(count));
874            doc.push_str("}");
875            check(doc);
876
877            // Repeated union member
878            let mut doc = String::new();
879            doc.push_str("union U = ");
880            doc.push_str(&" | T".repeat(count));
881            check(doc);
882
883            // Repeated input object type field
884            let mut doc = String::new();
885            doc.push_str("input In {");
886            doc.push_str(&" f: T".repeat(count));
887            doc.push_str("}");
888            check(doc);
889
890            // Repeated input object value field
891            let mut doc = String::new();
892            doc.push_str("type O { field(arg: T = {");
893            doc.push_str(&" f: 0".repeat(count));
894            doc.push_str(" }): Int }");
895            check(doc);
896
897            // Repeated list value item
898            let mut doc = String::new();
899            doc.push_str("type O { field(arg: T = [");
900            doc.push_str(&" 0,".repeat(count));
901            doc.push_str(" ]): Int }");
902            check(doc);
903
904            // Repeated field argument definitions
905            let mut doc = String::new();
906            doc.push_str("type O { field(");
907            doc.push_str(&"a: T ".repeat(count));
908            doc.push_str("): Int }");
909            check(doc);
910
911            // Repeated field selection
912            let mut doc = String::new();
913            doc.push_str("query {");
914            doc.push_str(&" f".repeat(count));
915            doc.push_str(" }");
916            check(doc);
917
918            // Repeated field argument
919            let mut doc = String::new();
920            doc.push_str("query { f(");
921            doc.push_str(&" a: 0".repeat(count));
922            doc.push_str(") }");
923            check(doc);
924
925            // Repeated variable definition
926            let mut doc = String::new();
927            doc.push_str("query Q(");
928            doc.push_str(&" $v: Int".repeat(count));
929            doc.push_str(" ) { f }");
930            check(doc);
931        }
932    }
933
934    #[test]
935    fn parse_field_set() {
936        let source = r#"{ a }"#;
937
938        let parser = Parser::new(source);
939        let cst: SyntaxTree<cst::SelectionSet> = parser.parse_selection_set();
940        let errors = cst.errors().collect::<Vec<_>>();
941        assert_eq!(errors.len(), 0);
942
943        let sel_set: cst::SelectionSet = cst.field_set();
944        let _ = sel_set.selections().map(|sel| {
945            if let cst::Selection::Field(f) = sel {
946                assert_eq!(f.name().unwrap().text().as_ref(), "a")
947            } else {
948                panic!("no field a in field set selection")
949            }
950        });
951
952        let source = r#"a { a }"#;
953
954        let parser = Parser::new(source);
955        let cst: SyntaxTree<cst::SelectionSet> = parser.parse_selection_set();
956        let errors = cst.errors().collect::<Vec<_>>();
957        assert_eq!(errors.len(), 0);
958
959        let sel_set: cst::SelectionSet = cst.field_set();
960        let _ = sel_set.selections().map(|sel| {
961            if let cst::Selection::Field(f) = sel {
962                assert_eq!(f.name().unwrap().text().as_ref(), "a")
963            } else {
964                panic!("no field a in field set selection")
965            }
966        });
967    }
968
969    #[test]
970    fn no_infinite_loop() {
971        let source = r#"{ ..."#;
972        let parser = Parser::new(source).token_limit(3);
973        let _cst = parser.parse();
974    }
975
976    /// Helper to check all CST nodes/tokens have valid UTF-8 character boundaries.
977    ///
978    /// Prior to #1023, lexing errors were not recorded in the CST, so any tokens
979    /// lexed after the error would have incorrect position information. This could
980    /// even lead to panics if the incorrect positions were not on a char boundary.
981    fn check_char_boundaries(node: &crate::SyntaxNode, source: &str) {
982        let range = node.text_range();
983        let start: usize = range.start().into();
984        let end: usize = range.end().into();
985        assert!(
986            source.is_char_boundary(start),
987            "Node {:?} start {} is not a char boundary",
988            node.kind(),
989            start
990        );
991        assert!(
992            source.is_char_boundary(end),
993            "Node {:?} end {} is not a char boundary",
994            node.kind(),
995            end
996        );
997
998        for child in node.children_with_tokens() {
999            match child {
1000                rowan::NodeOrToken::Node(n) => check_char_boundaries(&n, source),
1001                rowan::NodeOrToken::Token(t) => {
1002                    let range = t.text_range();
1003                    let start: usize = range.start().into();
1004                    let end: usize = range.end().into();
1005                    assert!(
1006                        source.is_char_boundary(start),
1007                        "Token {:?} start {} is not a char boundary",
1008                        t.kind(),
1009                        start
1010                    );
1011                    assert!(
1012                        source.is_char_boundary(end),
1013                        "Token {:?} end {} is not a char boundary",
1014                        t.kind(),
1015                        end
1016                    );
1017                }
1018            }
1019        }
1020    }
1021
1022    /// Unexpected CJK characters (3-byte UTF-8) should not throw off byte
1023    /// positions of later tokens.
1024    #[test]
1025    fn lexer_error_cjk_preserves_byte_positions() {
1026        use crate::cst::CstNode;
1027
1028        let source = "type Query { field: 中文类型 }";
1029        let cst = Parser::new(source).parse();
1030        assert!(!cst.errors().collect::<Vec<_>>().is_empty());
1031        check_char_boundaries(cst.document().syntax(), source);
1032    }
1033
1034    /// Mixed ASCII and multi-byte errors should not throw off byte
1035    /// positions of later tokens.
1036    #[test]
1037    fn lexer_error_mixed_preserves_byte_positions() {
1038        use crate::cst::CstNode;
1039
1040        let source = "type Query { f1: @#$ f2: 日本語 f3: !!! }";
1041        let cst = Parser::new(source).parse();
1042        assert!(!cst.errors().collect::<Vec<_>>().is_empty());
1043        check_char_boundaries(cst.document().syntax(), source);
1044    }
1045
1046    /// Unexpected emoji characters should not throw off byte
1047    /// positions of later tokens.
1048    #[test]
1049    fn lexer_error_emoji_preserves_byte_positions() {
1050        use crate::cst::CstNode;
1051
1052        let source = "type Query { field: 🚀🌍 }";
1053        let cst = Parser::new(source).parse();
1054        assert!(!cst.errors().collect::<Vec<_>>().is_empty());
1055        check_char_boundaries(cst.document().syntax(), source);
1056    }
1057}
apollo_parser/parser/mod.rs

apollo_parser/parser/
mod.rs