Skip to main content

apollo_compiler/
parser.rs

1//! APIs related to parsing `&str` inputs as GraphQL syntax.
2//!
3//! This module typically does not need to be imported directly.
4//! If the default parser configuration is adequate, use constructors such as:
5//!
6//! * [`ast::Document::parse`]
7//! * [`Schema::parse`]
8//! * [`Schema::parse_and_validate`]
9//! * [`ExecutableDocument::parse`]
10//! * [`ExecutableDocument::parse_and_validate`]
11//!
12//! If not, create a [`Parser`] and use its builder methods to change configuration.
13
14use crate::ast;
15use crate::ast::from_cst::Convert;
16use crate::ast::Document;
17use crate::collections::IndexMap;
18use crate::executable;
19use crate::schema::SchemaBuilder;
20use crate::validation::Details;
21use crate::validation::DiagnosticList;
22use crate::validation::Valid;
23use crate::validation::WithErrors;
24use crate::ExecutableDocument;
25use crate::Schema;
26use apollo_parser::SyntaxNode;
27use rowan::TextRange;
28use serde::Deserialize;
29use serde::Serialize;
30use std::num::NonZeroU64;
31use std::ops::Range;
32use std::path::Path;
33use std::path::PathBuf;
34use std::sync::atomic;
35use std::sync::atomic::AtomicU64;
36use std::sync::Arc;
37use std::sync::OnceLock;
38
39/// Configuration for parsing an input string as GraphQL syntax
40#[derive(Default, Debug, Clone)]
41pub struct Parser {
42    recursion_limit: Option<usize>,
43    token_limit: Option<usize>,
44    recursion_reached: usize,
45    tokens_reached: usize,
46}
47
48/// Records for validation information about a file that was parsed
49#[derive(Clone)]
50pub struct SourceFile {
51    pub(crate) path: PathBuf,
52    pub(crate) source_text: String,
53    pub(crate) source: OnceLock<ariadne::Source>,
54}
55
56/// A map of source files relevant to a given document
57pub type SourceMap = Arc<IndexMap<FileId, Arc<SourceFile>>>;
58
59/// Integer identifier for a parsed source file.
60///
61/// Used internally to support validating for example a schema built from multiple source files,
62/// and having diagnostics point to relevant sources.
63#[derive(Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)]
64pub struct FileId {
65    id: NonZeroU64,
66}
67
68#[derive(Copy, Clone)]
69pub(crate) struct TaggedFileId {
70    tag_and_id: NonZeroU64,
71}
72
73/// The source location of a parsed node:
74/// file ID and text range (start and end byte offsets) within that file.
75#[derive(Clone, Copy, Hash, PartialEq, Eq)]
76pub struct SourceSpan {
77    pub(crate) file_id: FileId,
78    pub(crate) text_range: TextRange,
79}
80
81/// A line number and column number within a GraphQL document.
82#[derive(Clone, Copy, Hash, PartialEq, Eq, Serialize, Deserialize)]
83#[serde(deny_unknown_fields)]
84pub struct LineColumn {
85    /// The line number for this location, starting at 1 for the first line.
86    pub line: usize,
87    /// The column number for this location, starting at 1 and counting characters (Unicode Scalar
88    /// Values) like [`str::chars`].
89    pub column: usize,
90}
91
92impl std::fmt::Debug for LineColumn {
93    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
94        write!(f, "{}:{}", self.line, self.column)
95    }
96}
97
98impl Parser {
99    /// Create a `Parser` with default configuration.
100    /// Use other methods to change the configuration.
101    pub fn new() -> Self {
102        Self::default()
103    }
104
105    /// Configure the recursion limit to use while parsing.
106    ///
107    /// This protects against stack overflow.
108    /// If unset, use [`apollo-parser`][apollo_parser]’s default limit.
109    /// The exact meaning is unspecified,
110    /// but for GraphQL constructs like selection sets whose syntax can be nested,
111    /// the nesting level encountered during parsing counts towards this limit.
112    pub fn recursion_limit(mut self, value: usize) -> Self {
113        self.recursion_limit = Some(value);
114        self
115    }
116
117    /// Configure the limit on the number of tokens to parse.
118    /// If an input document is too big, parsing will be aborted.
119    /// By default, there is no limit.
120    pub fn token_limit(mut self, value: usize) -> Self {
121        self.token_limit = Some(value);
122        self
123    }
124
125    /// Parse the given source text into an AST document.
126    ///
127    /// `path` is the filesystem path (or arbitrary string) used in diagnostics
128    /// to identify this source file to users.
129    pub fn parse_ast(
130        &mut self,
131        source_text: impl Into<String>,
132        path: impl AsRef<Path>,
133    ) -> Result<Document, WithErrors<Document>> {
134        let mut errors = DiagnosticList::new(Default::default());
135        let ast = self.parse_ast_inner(source_text, path, FileId::new(), &mut errors);
136        errors.into_result_with(ast)
137    }
138
139    pub(crate) fn parse_ast_inner(
140        &mut self,
141        source_text: impl Into<String>,
142        path: impl AsRef<Path>,
143        file_id: FileId,
144        errors: &mut DiagnosticList,
145    ) -> Document {
146        let tree = self.parse_common(
147            source_text.into(),
148            path.as_ref().to_owned(),
149            file_id,
150            errors,
151            |parser| parser.parse(),
152        );
153        let sources = errors.sources.clone();
154        Document::from_cst(tree.document(), file_id, sources)
155    }
156
157    pub(crate) fn parse_common<T: apollo_parser::cst::CstNode>(
158        &mut self,
159        source_text: String,
160        path: PathBuf,
161        file_id: FileId,
162        errors: &mut DiagnosticList,
163        parse: impl FnOnce(apollo_parser::Parser) -> apollo_parser::SyntaxTree<T>,
164    ) -> apollo_parser::SyntaxTree<T> {
165        let mut parser = apollo_parser::Parser::new(&source_text);
166        if let Some(value) = self.recursion_limit {
167            parser = parser.recursion_limit(value)
168        }
169        if let Some(value) = self.token_limit {
170            parser = parser.token_limit(value)
171        }
172        let tree = parse(parser);
173        self.recursion_reached = tree.recursion_limit().high;
174        self.tokens_reached = tree.token_limit().high;
175        let source_file = Arc::new(SourceFile {
176            path,
177            source_text,
178            source: OnceLock::new(),
179        });
180        Arc::make_mut(&mut errors.sources).insert(file_id, source_file);
181        for parser_error in tree.errors() {
182            // Silently skip parse errors at index beyond 4 GiB.
183            // Rowan in apollo-parser might complain about files that large
184            // before we get here anyway.
185            let Ok(index) = parser_error.index().try_into() else {
186                continue;
187            };
188            let Ok(len) = parser_error.data().len().try_into() else {
189                continue;
190            };
191            let location = Some(SourceSpan {
192                file_id,
193                text_range: rowan::TextRange::at(index, len),
194            });
195            let details = if parser_error.is_limit() {
196                Details::ParserLimit {
197                    message: parser_error.message().to_owned(),
198                }
199            } else {
200                Details::SyntaxError {
201                    message: parser_error.message().to_owned(),
202                }
203            };
204            errors.push(location, details)
205        }
206        tree
207    }
208
209    /// Parse the given source text as the sole input file of a schema.
210    ///
211    /// `path` is the filesystem path (or arbitrary string) used in diagnostics
212    /// to identify this source file to users.
213    ///
214    /// To have multiple files contribute to a schema,
215    /// use [`Schema::builder`] and [`Parser::parse_into_schema_builder`].
216    #[allow(clippy::result_large_err)] // Typically not called very often
217    pub fn parse_schema(
218        &mut self,
219        source_text: impl Into<String>,
220        path: impl AsRef<Path>,
221    ) -> Result<Schema, WithErrors<Schema>> {
222        let mut builder = Schema::builder();
223        self.parse_into_schema_builder(source_text, path, &mut builder);
224        builder.build()
225    }
226
227    /// Parse the given source text as an additional input to a schema builder.
228    ///
229    /// `path` is the filesystem path (or arbitrary string) used in diagnostics
230    /// to identify this source file to users.
231    ///
232    /// This can be used to build a schema from multiple source files.
233    ///
234    /// Errors (if any) are recorded in the builder and returned by [`SchemaBuilder::build`].
235    pub fn parse_into_schema_builder(
236        &mut self,
237        source_text: impl Into<String>,
238        path: impl AsRef<Path>,
239        builder: &mut SchemaBuilder,
240    ) {
241        let ast = self.parse_ast_inner(source_text, path, FileId::new(), &mut builder.errors);
242        let executable_definitions_are_errors = true;
243        builder.add_ast_document_not_adding_sources(&ast, executable_definitions_are_errors);
244    }
245
246    /// Parse the given source text into an executable document, with the given schema.
247    ///
248    /// `path` is the filesystem path (or arbitrary string) used in diagnostics
249    /// to identify this source file to users.
250    #[allow(clippy::result_large_err)] // Typically not called very often
251    pub fn parse_executable(
252        &mut self,
253        schema: &Valid<Schema>,
254        source_text: impl Into<String>,
255        path: impl AsRef<Path>,
256    ) -> Result<ExecutableDocument, WithErrors<ExecutableDocument>> {
257        let (document, errors) = self.parse_executable_inner(schema, source_text, path);
258        errors.into_result_with(document)
259    }
260
261    /// Parse the given source text as an additional input to an executable document builder.
262    ///
263    /// `path` is the filesystem path (or arbitrary string) used in diagnostics
264    /// to identify this source file to users.
265    ///
266    /// This can be used to build an executable document from multiple source files.
267    /// Errors (if any) are recorded in the builder and returned by
268    /// [`ExecutableDocumentBuilder::build`].
269    pub fn parse_into_executable_builder(
270        &mut self,
271        source_text: impl Into<String>,
272        path: impl AsRef<Path>,
273        builder: &mut executable::ExecutableDocumentBuilder,
274    ) {
275        let ast = self.parse_ast_inner(source_text, path, FileId::new(), builder.errors);
276        let type_system_definitions_are_errors = true;
277        builder.add_ast_document(&ast, type_system_definitions_are_errors);
278    }
279
280    pub(crate) fn parse_executable_inner(
281        &mut self,
282        schema: &Valid<Schema>,
283        source_text: impl Into<String>,
284        path: impl AsRef<Path>,
285    ) -> (ExecutableDocument, DiagnosticList) {
286        let mut errors = DiagnosticList::new(Default::default());
287        let ast = self.parse_ast_inner(source_text, path, FileId::new(), &mut errors);
288        let document = ast.to_executable_inner(schema, &mut errors);
289        (document, errors)
290    }
291
292    /// Parse a schema and executable document from the given source text
293    /// containing a mixture of type system definitions and executable definitions,
294    /// and validate them.
295    /// This is mostly useful for unit tests.
296    ///
297    /// `path` is the filesystem path (or arbitrary string) used in diagnostics
298    /// to identify this source file to users.
299    pub fn parse_mixed_validate(
300        &mut self,
301        source_text: impl Into<String>,
302        path: impl AsRef<Path>,
303    ) -> Result<(Valid<Schema>, Valid<ExecutableDocument>), DiagnosticList> {
304        let mut builder = SchemaBuilder::new();
305        let ast = self.parse_ast_inner(source_text, path, FileId::new(), &mut builder.errors);
306        let executable_definitions_are_errors = false;
307        let type_system_definitions_are_errors = false;
308        builder.add_ast_document_not_adding_sources(&ast, executable_definitions_are_errors);
309        let (mut schema, mut errors) = builder.build_inner();
310        let executable = crate::executable::from_ast::document_from_ast(
311            Some(&schema),
312            &ast,
313            &mut errors,
314            type_system_definitions_are_errors,
315        );
316        crate::schema::validation::validate_schema(&mut errors, &mut schema);
317        crate::executable::validation::validate_executable_document(
318            &mut errors,
319            &schema,
320            &executable,
321        );
322        errors
323            .into_result()
324            .map(|()| (Valid(schema), Valid(executable)))
325    }
326
327    /// Parse the given source text (e.g. `field_1 field_2 { field_2_1 }`
328    /// as a selection set with optional outer brackets.
329    ///
330    /// This is the syntax of the string argument to some Apollo Federation directives.
331    ///
332    /// `path` is the filesystem path (or arbitrary string) used in diagnostics
333    /// to identify this source file to users.
334    pub fn parse_field_set(
335        &mut self,
336        schema: &Valid<Schema>,
337        type_name: ast::NamedType,
338        source_text: impl Into<String>,
339        path: impl AsRef<Path>,
340    ) -> Result<executable::FieldSet, WithErrors<executable::FieldSet>> {
341        let (field_set, errors) = self.parse_field_set_inner(schema, type_name, source_text, path);
342        errors.into_result_with(field_set)
343    }
344
345    pub(crate) fn parse_field_set_inner(
346        &mut self,
347        schema: &Valid<Schema>,
348        type_name: ast::NamedType,
349        source_text: impl Into<String>,
350        path: impl AsRef<Path>,
351    ) -> (executable::FieldSet, DiagnosticList) {
352        let file_id = FileId::new();
353        let mut errors = DiagnosticList::new(Default::default());
354        let tree = self.parse_common(
355            source_text.into(),
356            path.as_ref().to_owned(),
357            file_id,
358            &mut errors,
359            |parser| parser.parse_selection_set(),
360        );
361        let ast = ast::from_cst::convert_selection_set(&tree.field_set(), file_id);
362        let mut selection_set = executable::SelectionSet::new(type_name);
363        let mut build_errors = executable::from_ast::BuildErrors {
364            errors: &mut errors,
365            path: executable::SelectionPath {
366                nested_fields: Vec::new(),
367                // 🤷
368                root: executable::ExecutableDefinitionName::AnonymousOperation(
369                    ast::OperationType::Query,
370                ),
371            },
372        };
373        selection_set.extend_from_ast(Some(schema), &mut build_errors, &ast);
374        let field_set = executable::FieldSet {
375            sources: errors.sources.clone(),
376            selection_set,
377        };
378        (field_set, errors)
379    }
380
381    /// Parse the given source text (e.g. `[Foo!]!`) as a reference to a GraphQL type.
382    ///
383    /// `path` is the filesystem path (or arbitrary string) used in diagnostics
384    /// to identify this source file to users.
385    pub fn parse_type(
386        &mut self,
387        source_text: impl Into<String>,
388        path: impl AsRef<Path>,
389    ) -> Result<ast::Type, DiagnosticList> {
390        let mut errors = DiagnosticList::new(Default::default());
391        let file_id = FileId::new();
392        let tree = self.parse_common(
393            source_text.into(),
394            path.as_ref().to_owned(),
395            file_id,
396            &mut errors,
397            |parser| parser.parse_type(),
398        );
399        errors.into_result().map(|()| {
400            tree.ty()
401                .convert(file_id)
402                .expect("conversion should be infallible if there were no syntax errors")
403        })
404    }
405
406    /// What level of recursion was reached during the last call to a `parse_*` method.
407    ///
408    /// Collecting this on a corpus of documents can help decide
409    /// how to set [`recursion_limit`][Self::recursion_limit].
410    pub fn recursion_reached(&self) -> usize {
411        self.recursion_reached
412    }
413
414    /// How many tokens were created during the last call to a `parse_*` method.
415    ///
416    /// Collecting this on a corpus of documents can help decide
417    /// how to set [`token_limit`][Self::token_limit].
418    pub fn tokens_reached(&self) -> usize {
419        self.tokens_reached
420    }
421}
422
423impl SourceFile {
424    /// The filesystem path (or arbitrary string) used in diagnostics
425    /// to identify this source file to users.
426    pub fn path(&self) -> &Path {
427        &self.path
428    }
429
430    pub fn source_text(&self) -> &str {
431        &self.source_text
432    }
433
434    pub(crate) fn ariadne(&self) -> &ariadne::Source {
435        self.source.get_or_init(|| {
436            // FIXME This string copy is not ideal, but changing to a reference counted string affects
437            // public API
438            ariadne::Source::from(self.source_text.clone())
439        })
440    }
441
442    /// Get [`LineColumn`] for the given 0-indexed UTF-8 byte `offset` from the start of the file.
443    ///
444    /// Returns None if the offset is out of bounds.
445    pub fn get_line_column(&self, offset: usize) -> Option<LineColumn> {
446        let (_, zero_indexed_line, zero_indexed_column) = self.ariadne().get_byte_line(offset)?;
447        Some(LineColumn {
448            line: zero_indexed_line + 1,
449            column: zero_indexed_column + 1,
450        })
451    }
452
453    /// Get starting and ending [`LineColumn`]s for the given `range` 0-indexed UTF-8 byte offsets.
454    ///
455    /// Returns `None` if either offset is out of bounds.
456    pub fn get_line_column_range(&self, range: Range<usize>) -> Option<Range<LineColumn>> {
457        let start = self.get_line_column(range.start)?;
458        let end = self.get_line_column(range.end)?;
459        Some(start..end)
460    }
461}
462
463impl std::fmt::Debug for SourceFile {
464    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
465        let Self {
466            path,
467            source_text,
468            source: _, // Skipped: it’s a cache and would make debugging other things noisy
469        } = self;
470        let mut debug_struct = f.debug_struct("SourceFile");
471        debug_struct.field("path", path);
472        if path != std::path::Path::new("built_in.graphql") {
473            debug_struct.field("source_text", source_text);
474        } else {
475            debug_struct.field(
476                "source_text",
477                &format_args!("include_str!(\"built_in.graphql\")"),
478            );
479        }
480        debug_struct.finish()
481    }
482}
483
484impl std::fmt::Debug for FileId {
485    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
486        self.id.fmt(f)
487    }
488}
489
490/// The next file ID to use. This is global so file IDs do not conflict between different compiler
491/// instances.
492static NEXT: AtomicU64 = AtomicU64::new(INITIAL);
493static INITIAL: u64 = 3;
494
495const TAG: u64 = 1 << 63;
496const ID_MASK: u64 = !TAG;
497
498#[allow(clippy::assertions_on_constants)]
499const _: () = {
500    assert!(TAG == 0x8000_0000_0000_0000);
501    assert!(ID_MASK == 0x7FFF_FFFF_FFFF_FFFF);
502};
503
504impl FileId {
505    /// The ID of the file implicitly added to type systems, for built-in scalars and introspection types
506    pub const BUILT_IN: Self = Self::const_new(1);
507
508    /// Passed to Ariadne to create a report without a location
509    pub(crate) const NONE: Self = Self::const_new(2);
510
511    // Returning a different value every time does not sound like good `impl Default`
512    #[allow(clippy::new_without_default)]
513    pub fn new() -> Self {
514        loop {
515            let id = NEXT.fetch_add(1, atomic::Ordering::AcqRel);
516            if id & TAG == 0 {
517                return Self {
518                    id: NonZeroU64::new(id).unwrap(),
519                };
520            } else {
521                // Overflowing 63 bits is unlikely, but if it somehow happens
522                // reset the counter and try again.
523                //
524                // `TaggedFileId` behaving incorrectly would be a memory safety issue,
525                // whereas a file ID collision “merely” causes
526                // diagnostics to print the wrong file name and source context.
527                Self::reset()
528            }
529        }
530    }
531
532    /// Reset file ID counter back to its initial value, used to get consistent results in tests.
533    ///
534    /// All tests in the process must use `#[serial_test::serial]`
535    #[doc(hidden)]
536    pub fn reset() {
537        NEXT.store(INITIAL, atomic::Ordering::Release)
538    }
539
540    const fn const_new(id: u64) -> Self {
541        assert!(id & ID_MASK == id);
542        // TODO: use unwrap() when const-stable https://github.com/rust-lang/rust/issues/67441
543        if let Some(id) = NonZeroU64::new(id) {
544            Self { id }
545        } else {
546            panic!()
547        }
548    }
549}
550
551impl TaggedFileId {
552    pub(crate) const fn pack(tag: bool, id: FileId) -> Self {
553        debug_assert!((id.id.get() & TAG) == 0);
554        let tag_and_id = if tag {
555            let packed = id.id.get() | TAG;
556            // SAFETY: `id.id` was non-zero, so setting an additional bit is still non-zero
557            unsafe { NonZeroU64::new_unchecked(packed) }
558        } else {
559            id.id
560        };
561        Self { tag_and_id }
562    }
563
564    pub(crate) fn tag(self) -> bool {
565        (self.tag_and_id.get() & TAG) != 0
566    }
567
568    pub(crate) fn file_id(self) -> FileId {
569        let unpacked = self.tag_and_id.get() & ID_MASK;
570        // SAFETY: `unpacked` has the same value as `id: FileId` did in `pack()`, which is non-zero
571        let id = unsafe { NonZeroU64::new_unchecked(unpacked) };
572        FileId { id }
573    }
574}
575
576impl SourceSpan {
577    pub(crate) fn new(file_id: FileId, node: &'_ SyntaxNode) -> Self {
578        Self {
579            file_id,
580            text_range: node.text_range(),
581        }
582    }
583
584    /// Returns the file ID for this location
585    pub fn file_id(&self) -> FileId {
586        self.file_id
587    }
588
589    /// Returns the offset from the start of the file to the start of the range, in UTF-8 bytes
590    pub fn offset(&self) -> usize {
591        self.text_range.start().into()
592    }
593
594    /// Returns the offset from the start of the file to the end of the range, in UTF-8 bytes
595    pub fn end_offset(&self) -> usize {
596        self.text_range.end().into()
597    }
598
599    /// Returns the length of the range, in UTF-8 bytes
600    pub fn node_len(&self) -> usize {
601        self.text_range.len().into()
602    }
603
604    /// Best effort at making a location with the given start and end
605    pub fn recompose(start_of: Option<Self>, end_of: Option<Self>) -> Option<Self> {
606        match (start_of, end_of) {
607            (None, None) => None,
608            (None, single @ Some(_)) | (single @ Some(_), None) => single,
609            (Some(start), Some(end)) => {
610                if start.file_id != end.file_id {
611                    // Pick one aribtrarily
612                    return Some(end);
613                }
614                Some(SourceSpan {
615                    file_id: start.file_id,
616                    text_range: TextRange::new(start.text_range.start(), end.text_range.end()),
617                })
618            }
619        }
620    }
621
622    /// The line and column numbers of [`Self::offset`]
623    pub fn line_column(&self, sources: &SourceMap) -> Option<LineColumn> {
624        let source = sources.get(&self.file_id)?;
625        source.get_line_column(self.offset())
626    }
627
628    /// The line and column numbers of the range from [`Self::offset`] to [`Self::end_offset`]
629    /// inclusive.
630    pub fn line_column_range(&self, sources: &SourceMap) -> Option<Range<LineColumn>> {
631        let source = sources.get(&self.file_id)?;
632        source.get_line_column_range(self.offset()..self.end_offset())
633    }
634}
635
636impl std::fmt::Debug for SourceSpan {
637    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
638        write!(
639            f,
640            "{}..{} @{:?}",
641            self.offset(),
642            self.end_offset(),
643            self.file_id,
644        )
645    }
646}