apollo_compiler/
parser.rs

1//! APIs related to parsing `&str` inputs as GraphQL syntax.
2//!
3//! This module typically does not need to be imported directly.
4//! If the default parser configuration is adequate, use constructors such as:
5//!
6//! * [`ast::Document::parse`]
7//! * [`Schema::parse`]
8//! * [`Schema::parse_and_validate`]
9//! * [`ExecutableDocument::parse`]
10//! * [`ExecutableDocument::parse_and_validate`]
11//!
12//! If not, create a [`Parser`] and use its builder methods to change configuration.
13
14use crate::ast;
15use crate::ast::from_cst::Convert;
16use crate::ast::Document;
17use crate::collections::IndexMap;
18use crate::executable;
19use crate::schema::SchemaBuilder;
20use crate::validation::Details;
21use crate::validation::DiagnosticList;
22use crate::validation::Valid;
23use crate::validation::WithErrors;
24use crate::ExecutableDocument;
25use crate::Schema;
26use apollo_parser::SyntaxNode;
27use rowan::TextRange;
28use serde::Deserialize;
29use serde::Serialize;
30use std::num::NonZeroU64;
31use std::ops::Range;
32use std::path::Path;
33use std::path::PathBuf;
34use std::sync::atomic;
35use std::sync::atomic::AtomicU64;
36use std::sync::Arc;
37use std::sync::OnceLock;
38
39/// Configuration for parsing an input string as GraphQL syntax
40#[derive(Default, Debug, Clone)]
41pub struct Parser {
42    recursion_limit: Option<usize>,
43    token_limit: Option<usize>,
44    recursion_reached: usize,
45    tokens_reached: usize,
46}
47
48/// Records for validation information about a file that was parsed
49#[derive(Clone)]
50pub struct SourceFile {
51    pub(crate) path: PathBuf,
52    pub(crate) source_text: String,
53    pub(crate) source: OnceLock<ariadne::Source>,
54}
55
56/// A map of source files relevant to a given document
57pub type SourceMap = Arc<IndexMap<FileId, Arc<SourceFile>>>;
58
59/// Integer identifier for a parsed source file.
60///
61/// Used internally to support validating for example a schema built from multiple source files,
62/// and having diagnostics point to relevant sources.
63#[derive(Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)]
64pub struct FileId {
65    id: NonZeroU64,
66}
67
68#[derive(Copy, Clone)]
69pub(crate) struct TaggedFileId {
70    tag_and_id: NonZeroU64,
71}
72
73/// The source location of a parsed node:
74/// file ID and text range (start and end byte offsets) within that file.
75#[derive(Clone, Copy, Hash, PartialEq, Eq)]
76pub struct SourceSpan {
77    pub(crate) file_id: FileId,
78    pub(crate) text_range: TextRange,
79}
80
81/// A line number and column number within a GraphQL document.
82#[derive(Clone, Copy, Hash, PartialEq, Eq, Serialize, Deserialize)]
83#[serde(deny_unknown_fields)]
84pub struct LineColumn {
85    /// The line number for this location, starting at 1 for the first line.
86    pub line: usize,
87    /// The column number for this location, starting at 1 and counting characters (Unicode Scalar
88    /// Values) like [`str::chars`].
89    pub column: usize,
90}
91
92impl std::fmt::Debug for LineColumn {
93    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
94        write!(f, "{}:{}", self.line, self.column)
95    }
96}
97
98impl Parser {
99    /// Create a `Parser` with default configuration.
100    /// Use other methods to change the configuration.
101    pub fn new() -> Self {
102        Self::default()
103    }
104
105    /// Configure the recursion limit to use while parsing.
106    ///
107    /// This protects against stack overflow.
108    /// If unset, use [`apollo-parser`][apollo_parser]’s default limit.
109    /// The exact meaning is unspecified,
110    /// but for GraphQL constructs like selection sets whose syntax can be nested,
111    /// the nesting level encountered during parsing counts towards this limit.
112    pub fn recursion_limit(mut self, value: usize) -> Self {
113        self.recursion_limit = Some(value);
114        self
115    }
116
117    /// Configure the limit on the number of tokens to parse.
118    /// If an input document is too big, parsing will be aborted.
119    /// By default, there is no limit.
120    pub fn token_limit(mut self, value: usize) -> Self {
121        self.token_limit = Some(value);
122        self
123    }
124
125    /// Parse the given source text into an AST document.
126    ///
127    /// `path` is the filesystem path (or arbitrary string) used in diagnostics
128    /// to identify this source file to users.
129    pub fn parse_ast(
130        &mut self,
131        source_text: impl Into<String>,
132        path: impl AsRef<Path>,
133    ) -> Result<Document, WithErrors<Document>> {
134        let mut errors = DiagnosticList::new(Default::default());
135        let ast = self.parse_ast_inner(source_text, path, FileId::new(), &mut errors);
136        errors.into_result_with(ast)
137    }
138
139    pub(crate) fn parse_ast_inner(
140        &mut self,
141        source_text: impl Into<String>,
142        path: impl AsRef<Path>,
143        file_id: FileId,
144        errors: &mut DiagnosticList,
145    ) -> Document {
146        let tree = self.parse_common(
147            source_text.into(),
148            path.as_ref().to_owned(),
149            file_id,
150            errors,
151            |parser| parser.parse(),
152        );
153        let sources = errors.sources.clone();
154        Document::from_cst(tree.document(), file_id, sources)
155    }
156
157    pub(crate) fn parse_common<T: apollo_parser::cst::CstNode>(
158        &mut self,
159        source_text: String,
160        path: PathBuf,
161        file_id: FileId,
162        errors: &mut DiagnosticList,
163        parse: impl FnOnce(apollo_parser::Parser) -> apollo_parser::SyntaxTree<T>,
164    ) -> apollo_parser::SyntaxTree<T> {
165        let mut parser = apollo_parser::Parser::new(&source_text);
166        if let Some(value) = self.recursion_limit {
167            parser = parser.recursion_limit(value)
168        }
169        if let Some(value) = self.token_limit {
170            parser = parser.token_limit(value)
171        }
172        let tree = parse(parser);
173        self.recursion_reached = tree.recursion_limit().high;
174        self.tokens_reached = tree.token_limit().high;
175        let source_file = Arc::new(SourceFile {
176            path,
177            source_text,
178            source: OnceLock::new(),
179        });
180        Arc::make_mut(&mut errors.sources).insert(file_id, source_file);
181        for parser_error in tree.errors() {
182            // Silently skip parse errors at index beyond 4 GiB.
183            // Rowan in apollo-parser might complain about files that large
184            // before we get here anyway.
185            let Ok(index) = parser_error.index().try_into() else {
186                continue;
187            };
188            let Ok(len) = parser_error.data().len().try_into() else {
189                continue;
190            };
191            let location = Some(SourceSpan {
192                file_id,
193                text_range: rowan::TextRange::at(index, len),
194            });
195            let details = if parser_error.is_limit() {
196                Details::ParserLimit {
197                    message: parser_error.message().to_owned(),
198                }
199            } else {
200                Details::SyntaxError {
201                    message: parser_error.message().to_owned(),
202                }
203            };
204            errors.push(location, details)
205        }
206        tree
207    }
208
209    /// Parse the given source text as the sole input file of a schema.
210    ///
211    /// `path` is the filesystem path (or arbitrary string) used in diagnostics
212    /// to identify this source file to users.
213    ///
214    /// To have multiple files contribute to a schema,
215    /// use [`Schema::builder`] and [`Parser::parse_into_schema_builder`].
216    #[allow(clippy::result_large_err)] // Typically not called very often
217    pub fn parse_schema(
218        &mut self,
219        source_text: impl Into<String>,
220        path: impl AsRef<Path>,
221    ) -> Result<Schema, WithErrors<Schema>> {
222        let mut builder = Schema::builder();
223        self.parse_into_schema_builder(source_text, path, &mut builder);
224        builder.build()
225    }
226
227    /// Parse the given source text as an additional input to a schema builder.
228    ///
229    /// `path` is the filesystem path (or arbitrary string) used in diagnostics
230    /// to identify this source file to users.
231    ///
232    /// This can be used to build a schema from multiple source files.
233    ///
234    /// Errors (if any) are recorded in the builder and returned by [`SchemaBuilder::build`].
235    pub fn parse_into_schema_builder(
236        &mut self,
237        source_text: impl Into<String>,
238        path: impl AsRef<Path>,
239        builder: &mut SchemaBuilder,
240    ) {
241        let ast = self.parse_ast_inner(source_text, path, FileId::new(), &mut builder.errors);
242        let executable_definitions_are_errors = true;
243        builder.add_ast_document_not_adding_sources(&ast, executable_definitions_are_errors);
244    }
245
246    /// Parse the given source text into an executable document, with the given schema.
247    ///
248    /// `path` is the filesystem path (or arbitrary string) used in diagnostics
249    /// to identify this source file to users.
250    #[allow(clippy::result_large_err)] // Typically not called very often
251    pub fn parse_executable(
252        &mut self,
253        schema: &Valid<Schema>,
254        source_text: impl Into<String>,
255        path: impl AsRef<Path>,
256    ) -> Result<ExecutableDocument, WithErrors<ExecutableDocument>> {
257        let (document, errors) = self.parse_executable_inner(schema, source_text, path);
258        errors.into_result_with(document)
259    }
260
261    pub(crate) fn parse_executable_inner(
262        &mut self,
263        schema: &Valid<Schema>,
264        source_text: impl Into<String>,
265        path: impl AsRef<Path>,
266    ) -> (ExecutableDocument, DiagnosticList) {
267        let mut errors = DiagnosticList::new(Default::default());
268        let ast = self.parse_ast_inner(source_text, path, FileId::new(), &mut errors);
269        let document = ast.to_executable_inner(schema, &mut errors);
270        (document, errors)
271    }
272
273    /// Parse a schema and executable document from the given source text
274    /// containing a mixture of type system definitions and executable definitions,
275    /// and validate them.
276    /// This is mostly useful for unit tests.
277    ///
278    /// `path` is the filesystem path (or arbitrary string) used in diagnostics
279    /// to identify this source file to users.
280    pub fn parse_mixed_validate(
281        &mut self,
282        source_text: impl Into<String>,
283        path: impl AsRef<Path>,
284    ) -> Result<(Valid<Schema>, Valid<ExecutableDocument>), DiagnosticList> {
285        let mut builder = SchemaBuilder::new();
286        let ast = self.parse_ast_inner(source_text, path, FileId::new(), &mut builder.errors);
287        let executable_definitions_are_errors = false;
288        let type_system_definitions_are_errors = false;
289        builder.add_ast_document_not_adding_sources(&ast, executable_definitions_are_errors);
290        let (mut schema, mut errors) = builder.build_inner();
291        let executable = crate::executable::from_ast::document_from_ast(
292            Some(&schema),
293            &ast,
294            &mut errors,
295            type_system_definitions_are_errors,
296        );
297        crate::schema::validation::validate_schema(&mut errors, &mut schema);
298        crate::executable::validation::validate_executable_document(
299            &mut errors,
300            &schema,
301            &executable,
302        );
303        errors
304            .into_result()
305            .map(|()| (Valid(schema), Valid(executable)))
306    }
307
308    /// Parse the given source text (e.g. `field_1 field_2 { field_2_1 }`
309    /// as a selection set with optional outer brackets.
310    ///
311    /// This is the syntax of the string argument to some Apollo Federation directives.
312    ///
313    /// `path` is the filesystem path (or arbitrary string) used in diagnostics
314    /// to identify this source file to users.
315    pub fn parse_field_set(
316        &mut self,
317        schema: &Valid<Schema>,
318        type_name: ast::NamedType,
319        source_text: impl Into<String>,
320        path: impl AsRef<Path>,
321    ) -> Result<executable::FieldSet, WithErrors<executable::FieldSet>> {
322        let (field_set, errors) = self.parse_field_set_inner(schema, type_name, source_text, path);
323        errors.into_result_with(field_set)
324    }
325
326    pub(crate) fn parse_field_set_inner(
327        &mut self,
328        schema: &Valid<Schema>,
329        type_name: ast::NamedType,
330        source_text: impl Into<String>,
331        path: impl AsRef<Path>,
332    ) -> (executable::FieldSet, DiagnosticList) {
333        let file_id = FileId::new();
334        let mut errors = DiagnosticList::new(Default::default());
335        let tree = self.parse_common(
336            source_text.into(),
337            path.as_ref().to_owned(),
338            file_id,
339            &mut errors,
340            |parser| parser.parse_selection_set(),
341        );
342        let ast = ast::from_cst::convert_selection_set(&tree.field_set(), file_id);
343        let mut selection_set = executable::SelectionSet::new(type_name);
344        let mut build_errors = executable::from_ast::BuildErrors {
345            errors: &mut errors,
346            path: executable::SelectionPath {
347                nested_fields: Vec::new(),
348                // 🤷
349                root: executable::ExecutableDefinitionName::AnonymousOperation(
350                    ast::OperationType::Query,
351                ),
352            },
353        };
354        selection_set.extend_from_ast(Some(schema), &mut build_errors, &ast);
355        let field_set = executable::FieldSet {
356            sources: errors.sources.clone(),
357            selection_set,
358        };
359        (field_set, errors)
360    }
361
362    /// Parse the given source text (e.g. `[Foo!]!`) as a reference to a GraphQL type.
363    ///
364    /// `path` is the filesystem path (or arbitrary string) used in diagnostics
365    /// to identify this source file to users.
366    pub fn parse_type(
367        &mut self,
368        source_text: impl Into<String>,
369        path: impl AsRef<Path>,
370    ) -> Result<ast::Type, DiagnosticList> {
371        let mut errors = DiagnosticList::new(Default::default());
372        let file_id = FileId::new();
373        let tree = self.parse_common(
374            source_text.into(),
375            path.as_ref().to_owned(),
376            file_id,
377            &mut errors,
378            |parser| parser.parse_type(),
379        );
380        errors.into_result().map(|()| {
381            tree.ty()
382                .convert(file_id)
383                .expect("conversion should be infallible if there were no syntax errors")
384        })
385    }
386
387    /// What level of recursion was reached during the last call to a `parse_*` method.
388    ///
389    /// Collecting this on a corpus of documents can help decide
390    /// how to set [`recursion_limit`][Self::recursion_limit].
391    pub fn recursion_reached(&self) -> usize {
392        self.recursion_reached
393    }
394
395    /// How many tokens were created during the last call to a `parse_*` method.
396    ///
397    /// Collecting this on a corpus of documents can help decide
398    /// how to set [`token_limit`][Self::token_limit].
399    pub fn tokens_reached(&self) -> usize {
400        self.tokens_reached
401    }
402}
403
404impl SourceFile {
405    /// The filesystem path (or arbitrary string) used in diagnostics
406    /// to identify this source file to users.
407    pub fn path(&self) -> &Path {
408        &self.path
409    }
410
411    pub fn source_text(&self) -> &str {
412        &self.source_text
413    }
414
415    pub(crate) fn ariadne(&self) -> &ariadne::Source {
416        self.source.get_or_init(|| {
417            // FIXME This string copy is not ideal, but changing to a reference counted string affects
418            // public API
419            ariadne::Source::from(self.source_text.clone())
420        })
421    }
422
423    /// Get [`LineColumn`] for the given 0-indexed UTF-8 byte `offset` from the start of the file.
424    ///
425    /// Returns None if the offset is out of bounds.
426    pub fn get_line_column(&self, offset: usize) -> Option<LineColumn> {
427        let (_, zero_indexed_line, zero_indexed_column) = self.ariadne().get_byte_line(offset)?;
428        Some(LineColumn {
429            line: zero_indexed_line + 1,
430            column: zero_indexed_column + 1,
431        })
432    }
433
434    /// Get starting and ending [`LineColumn`]s for the given `range` 0-indexed UTF-8 byte offsets.
435    ///
436    /// Returns `None` if either offset is out of bounds.
437    pub fn get_line_column_range(&self, range: Range<usize>) -> Option<Range<LineColumn>> {
438        let start = self.get_line_column(range.start)?;
439        let end = self.get_line_column(range.end)?;
440        Some(start..end)
441    }
442}
443
444impl std::fmt::Debug for SourceFile {
445    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
446        let Self {
447            path,
448            source_text,
449            source: _, // Skipped: it’s a cache and would make debugging other things noisy
450        } = self;
451        let mut debug_struct = f.debug_struct("SourceFile");
452        debug_struct.field("path", path);
453        if path != std::path::Path::new("built_in.graphql") {
454            debug_struct.field("source_text", source_text);
455        } else {
456            debug_struct.field(
457                "source_text",
458                &format_args!("include_str!(\"built_in.graphql\")"),
459            );
460        }
461        debug_struct.finish()
462    }
463}
464
465impl std::fmt::Debug for FileId {
466    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
467        self.id.fmt(f)
468    }
469}
470
471/// The next file ID to use. This is global so file IDs do not conflict between different compiler
472/// instances.
473static NEXT: AtomicU64 = AtomicU64::new(INITIAL);
474static INITIAL: u64 = 3;
475
476const TAG: u64 = 1 << 63;
477const ID_MASK: u64 = !TAG;
478
479#[allow(clippy::assertions_on_constants)]
480const _: () = {
481    assert!(TAG == 0x8000_0000_0000_0000);
482    assert!(ID_MASK == 0x7FFF_FFFF_FFFF_FFFF);
483};
484
485impl FileId {
486    /// The ID of the file implicitly added to type systems, for built-in scalars and introspection types
487    pub const BUILT_IN: Self = Self::const_new(1);
488
489    /// Passed to Ariadne to create a report without a location
490    pub(crate) const NONE: Self = Self::const_new(2);
491
492    // Returning a different value every time does not sound like good `impl Default`
493    #[allow(clippy::new_without_default)]
494    pub fn new() -> Self {
495        loop {
496            let id = NEXT.fetch_add(1, atomic::Ordering::AcqRel);
497            if id & TAG == 0 {
498                return Self {
499                    id: NonZeroU64::new(id).unwrap(),
500                };
501            } else {
502                // Overflowing 63 bits is unlikely, but if it somehow happens
503                // reset the counter and try again.
504                //
505                // `TaggedFileId` behaving incorrectly would be a memory safety issue,
506                // whereas a file ID collision “merely” causes
507                // diagnostics to print the wrong file name and source context.
508                Self::reset()
509            }
510        }
511    }
512
513    /// Reset file ID counter back to its initial value, used to get consistent results in tests.
514    ///
515    /// All tests in the process must use `#[serial_test::serial]`
516    #[doc(hidden)]
517    pub fn reset() {
518        NEXT.store(INITIAL, atomic::Ordering::Release)
519    }
520
521    const fn const_new(id: u64) -> Self {
522        assert!(id & ID_MASK == id);
523        // TODO: use unwrap() when const-stable https://github.com/rust-lang/rust/issues/67441
524        if let Some(id) = NonZeroU64::new(id) {
525            Self { id }
526        } else {
527            panic!()
528        }
529    }
530}
531
532impl TaggedFileId {
533    pub(crate) const fn pack(tag: bool, id: FileId) -> Self {
534        debug_assert!((id.id.get() & TAG) == 0);
535        let tag_and_id = if tag {
536            let packed = id.id.get() | TAG;
537            // SAFETY: `id.id` was non-zero, so setting an additional bit is still non-zero
538            unsafe { NonZeroU64::new_unchecked(packed) }
539        } else {
540            id.id
541        };
542        Self { tag_and_id }
543    }
544
545    pub(crate) fn tag(self) -> bool {
546        (self.tag_and_id.get() & TAG) != 0
547    }
548
549    pub(crate) fn file_id(self) -> FileId {
550        let unpacked = self.tag_and_id.get() & ID_MASK;
551        // SAFETY: `unpacked` has the same value as `id: FileId` did in `pack()`, which is non-zero
552        let id = unsafe { NonZeroU64::new_unchecked(unpacked) };
553        FileId { id }
554    }
555}
556
557impl SourceSpan {
558    pub(crate) fn new(file_id: FileId, node: &'_ SyntaxNode) -> Self {
559        Self {
560            file_id,
561            text_range: node.text_range(),
562        }
563    }
564
565    /// Returns the file ID for this location
566    pub fn file_id(&self) -> FileId {
567        self.file_id
568    }
569
570    /// Returns the offset from the start of the file to the start of the range, in UTF-8 bytes
571    pub fn offset(&self) -> usize {
572        self.text_range.start().into()
573    }
574
575    /// Returns the offset from the start of the file to the end of the range, in UTF-8 bytes
576    pub fn end_offset(&self) -> usize {
577        self.text_range.end().into()
578    }
579
580    /// Returns the length of the range, in UTF-8 bytes
581    pub fn node_len(&self) -> usize {
582        self.text_range.len().into()
583    }
584
585    /// Best effort at making a location with the given start and end
586    pub fn recompose(start_of: Option<Self>, end_of: Option<Self>) -> Option<Self> {
587        match (start_of, end_of) {
588            (None, None) => None,
589            (None, single @ Some(_)) | (single @ Some(_), None) => single,
590            (Some(start), Some(end)) => {
591                if start.file_id != end.file_id {
592                    // Pick one aribtrarily
593                    return Some(end);
594                }
595                Some(SourceSpan {
596                    file_id: start.file_id,
597                    text_range: TextRange::new(start.text_range.start(), end.text_range.end()),
598                })
599            }
600        }
601    }
602
603    /// The line and column numbers of [`Self::offset`]
604    pub fn line_column(&self, sources: &SourceMap) -> Option<LineColumn> {
605        let source = sources.get(&self.file_id)?;
606        source.get_line_column(self.offset())
607    }
608
609    /// The line and column numbers of the range from [`Self::offset`] to [`Self::end_offset`]
610    /// inclusive.
611    pub fn line_column_range(&self, sources: &SourceMap) -> Option<Range<LineColumn>> {
612        let source = sources.get(&self.file_id)?;
613        source.get_line_column_range(self.offset()..self.end_offset())
614    }
615}
616
617impl std::fmt::Debug for SourceSpan {
618    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
619        write!(
620            f,
621            "{}..{} @{:?}",
622            self.offset(),
623            self.end_offset(),
624            self.file_id,
625        )
626    }
627}