apollo_parser/cst/
node_ext.rs

1use crate::cst;
2use crate::cst::CstNode;
3use crate::SyntaxNode;
4use crate::TokenText;
5use rowan::GreenToken;
6use rowan::SyntaxKind;
7use std::num::ParseFloatError;
8use std::num::ParseIntError;
9
10impl cst::Name {
11    pub fn text(&self) -> TokenText {
12        text_of_first_token(self.syntax())
13    }
14}
15
16impl cst::Variable {
17    pub fn text(&self) -> TokenText {
18        self.name()
19            .expect("Cannot get variable's NAME token")
20            .text()
21    }
22}
23
24impl cst::EnumValue {
25    pub fn text(&self) -> TokenText {
26        self.name()
27            .expect("Cannot get enum value's NAME token")
28            .text()
29    }
30}
31
32impl cst::DirectiveLocation {
33    pub fn text(self) -> Option<TokenText> {
34        let txt = if self.query_token().is_some() {
35            Some("QUERY")
36        } else if self.mutation_token().is_some() {
37            Some("MUTATION")
38        } else if self.subscription_token().is_some() {
39            Some("SUBSCRIPTION")
40        } else if self.field_token().is_some() {
41            Some("FIELD")
42        } else if self.fragment_definition_token().is_some() {
43            Some("FRAGMENT_DEFINITION")
44        } else if self.fragment_spread_token().is_some() {
45            Some("FRAGMENT_SPREAD")
46        } else if self.inline_fragment_token().is_some() {
47            Some("INLINE_FRAGMENT")
48        } else if self.variable_definition_token().is_some() {
49            Some("VARIABLE_DEFINITION")
50        } else if self.schema_token().is_some() {
51            Some("SCHEMA")
52        } else if self.scalar_token().is_some() {
53            Some("SCALAR")
54        } else if self.object_token().is_some() {
55            Some("OBJECT")
56        } else if self.field_definition_token().is_some() {
57            Some("FIELD_DEFINITION")
58        } else if self.argument_definition_token().is_some() {
59            Some("ARGUMENT_DEFINITION")
60        } else if self.interface_token().is_some() {
61            Some("INTERFACE")
62        } else if self.union_token().is_some() {
63            Some("UNION")
64        } else if self.enum_token().is_some() {
65            Some("ENUM")
66        } else if self.enum_value_token().is_some() {
67            Some("ENUM_VALUE")
68        } else if self.input_object_token().is_some() {
69            Some("INPUT_OBJECT")
70        } else if self.input_field_definition_token().is_some() {
71            Some("INPUT_FIELD_DEFINITION")
72        } else {
73            None
74        };
75
76        txt.map(|txt| {
77            TokenText(GreenToken::new(
78                SyntaxKind(crate::SyntaxKind::DIRECTIVE_LOCATION as u16),
79                txt,
80            ))
81        })
82    }
83}
84
85impl cst::Definition {
86    /// Return the name of this definition, if any. Schema definitions are unnamed and always
87    /// return `None`.
88    pub fn name(&self) -> Option<cst::Name> {
89        match self {
90            Self::OperationDefinition(it) => it.name(),
91            Self::FragmentDefinition(it) => it.fragment_name()?.name(),
92            Self::DirectiveDefinition(it) => it.name(),
93            Self::SchemaDefinition(_) => None,
94            Self::ScalarTypeDefinition(it) => it.name(),
95            Self::ObjectTypeDefinition(it) => it.name(),
96            Self::InterfaceTypeDefinition(it) => it.name(),
97            Self::UnionTypeDefinition(it) => it.name(),
98            Self::EnumTypeDefinition(it) => it.name(),
99            Self::InputObjectTypeDefinition(it) => it.name(),
100            Self::SchemaExtension(_) => None,
101            Self::ScalarTypeExtension(it) => it.name(),
102            Self::ObjectTypeExtension(it) => it.name(),
103            Self::InterfaceTypeExtension(it) => it.name(),
104            Self::UnionTypeExtension(it) => it.name(),
105            Self::EnumTypeExtension(it) => it.name(),
106            Self::InputObjectTypeExtension(it) => it.name(),
107        }
108    }
109
110    pub fn kind(&self) -> &'static str {
111        match self {
112            cst::Definition::OperationDefinition(_) => "OperationDefinition",
113            cst::Definition::FragmentDefinition(_) => "FragmentDefinition",
114            cst::Definition::DirectiveDefinition(_) => "DirectiveDefinition",
115            cst::Definition::ScalarTypeDefinition(_) => "ScalarTypeDefinition",
116            cst::Definition::ObjectTypeDefinition(_) => "ObjectTypeDefinition",
117            cst::Definition::InterfaceTypeDefinition(_) => "InterfaceTypeDefinition",
118            cst::Definition::UnionTypeDefinition(_) => "UnionTypeDefinition",
119            cst::Definition::EnumTypeDefinition(_) => "EnumTypeDefinition",
120            cst::Definition::InputObjectTypeDefinition(_) => "InputObjectTypeDefinition",
121            cst::Definition::SchemaDefinition(_) => "SchemaDefinition",
122            cst::Definition::SchemaExtension(_) => "SchemaExtension",
123            cst::Definition::ScalarTypeExtension(_) => "ScalarTypeExtension",
124            cst::Definition::ObjectTypeExtension(_) => "ObjectTypeExtension",
125            cst::Definition::InterfaceTypeExtension(_) => "InterfaceTypeExtension",
126            cst::Definition::UnionTypeExtension(_) => "UnionTypeExtension",
127            cst::Definition::EnumTypeExtension(_) => "EnumTypeExtension",
128            cst::Definition::InputObjectTypeExtension(_) => "InputObjectTypeExtension",
129        }
130    }
131
132    pub fn is_executable_definition(&self) -> bool {
133        matches!(
134            self,
135            Self::OperationDefinition(_) | Self::FragmentDefinition(_)
136        )
137    }
138
139    pub fn is_extension_definition(&self) -> bool {
140        matches!(
141            self,
142            Self::SchemaExtension(_)
143                | Self::ScalarTypeExtension(_)
144                | Self::ObjectTypeExtension(_)
145                | Self::InterfaceTypeExtension(_)
146                | Self::UnionTypeExtension(_)
147                | Self::EnumTypeExtension(_)
148                | Self::InputObjectTypeExtension(_)
149        )
150    }
151}
152
153impl From<cst::StringValue> for String {
154    fn from(val: cst::StringValue) -> Self {
155        Self::from(&val)
156    }
157}
158
159/// Handle escaped characters in a StringValue.
160///
161/// Panics on invalid escape sequences. Those should be rejected in the lexer already.
162fn unescape_string(input: &str) -> String {
163    let mut output = String::with_capacity(input.len());
164
165    let mut iter = input.chars();
166    while let Some(c) = iter.next() {
167        match c {
168            '\\' => {
169                let Some(c2) = iter.next() else {
170                    output.push(c);
171                    break;
172                };
173
174                // TODO: https://github.com/apollographql/apollo-rs/issues/657 needs
175                // changes both here and in `lexer/mod.rs`
176                let mut unicode = || {
177                    // 1. Let value be the 16-bit hexadecimal value represented
178                    // by the sequence of hexadecimal digits within EscapedUnicode.
179                    let value = iter.by_ref().take(4).fold(0, |acc, c| {
180                        let digit = c.to_digit(16).unwrap();
181                        (acc << 4) + digit
182                    });
183                    // 2. Return the code point value.
184                    char::from_u32(value).unwrap()
185                };
186
187                match c2 {
188                    '"' | '\\' | '/' => output.push(c2),
189                    'b' => output.push('\u{0008}'),
190                    'f' => output.push('\u{000c}'),
191                    'n' => output.push('\n'),
192                    'r' => output.push('\r'),
193                    't' => output.push('\t'),
194                    'u' => output.push(unicode()),
195                    _ => (),
196                }
197            }
198            _ => output.push(c),
199        }
200    }
201
202    output
203}
204
205const ESCAPED_TRIPLE_QUOTE: &str = r#"\""""#;
206const TRIPLE_QUOTE: &str = r#"""""#;
207
208fn is_block_string(input: &str) -> bool {
209    input.starts_with(TRIPLE_QUOTE)
210}
211
212/// Iterator over the lines in a GraphQL string, using GraphQL's definition of newlines
213/// (\r\n, \n, or just \r).
214struct GraphQLLines<'a> {
215    input: &'a str,
216    finished: bool,
217}
218
219impl<'a> GraphQLLines<'a> {
220    fn new(input: &'a str) -> Self {
221        Self {
222            input,
223            finished: false,
224        }
225    }
226}
227
228impl<'a> Iterator for GraphQLLines<'a> {
229    type Item = &'a str;
230    fn next(&mut self) -> Option<Self::Item> {
231        // Can't just check for the input string being empty, as an empty string should still
232        // produce one line.
233        if self.finished {
234            return None;
235        }
236
237        let Some(index) = memchr::memchr2(b'\r', b'\n', self.input.as_bytes()) else {
238            self.finished = true;
239            return Some(self.input);
240        };
241        let line = &self.input[..index];
242        let rest = match self.input.get(index..=index + 1) {
243            Some("\r\n") => &self.input[index + 2..],
244            _ => &self.input[index + 1..],
245        };
246        self.input = rest;
247        Some(line)
248    }
249}
250
251/// Split lines on \n, \r\n, and just \r
252fn split_lines(input: &str) -> impl Iterator<Item = &str> {
253    GraphQLLines::new(input)
254}
255
256/// Replace a literal pattern in a string but push the output to an existing string.
257///
258/// Like `str::replace`, but doesn't allocate if there's enough space in the provided output.
259fn replace_into(input: &str, pattern: &str, replace: &str, output: &mut String) {
260    let mut last_index = 0;
261    for index in memchr::memmem::find_iter(input.as_bytes(), pattern.as_bytes()) {
262        output.push_str(&input[last_index..index]);
263        output.push_str(replace);
264        last_index = index + pattern.len();
265    }
266    if last_index < input.len() {
267        output.push_str(&input[last_index..]);
268    }
269}
270
271/// Implementation of the spec function `BlockStringValue(rawValue)`. In addition to handling
272/// indents and newline normalization, this also handles escape sequences (strictly not part of
273/// BlockStringValue in the spec, but more efficient to do it at the same time).
274///
275/// Spec: https://spec.graphql.org/October2021/#BlockStringValue()
276fn unescape_block_string(raw_value: &str) -> String {
277    /// WhiteSpace :: Horizontal Tab (U+0009) Space (U+0020)
278    fn is_whitespace(c: char) -> bool {
279        matches!(c, ' ' | '\t')
280    }
281    /// Check if a string is all WhiteSpace. This expects a single line of input.
282    fn is_whitespace_line(line: &str) -> bool {
283        line.chars().all(is_whitespace)
284    }
285    /// Count the indentation of a single line (how many WhiteSpace characters are at the start).
286    fn count_indent(line: &str) -> usize {
287        line.chars().take_while(|&c| is_whitespace(c)).count()
288    }
289
290    // 1. Let lines be the result of splitting rawValue by LineTerminator.
291    // 2. Let commonIndent be null.
292    // 3. For each line in lines:
293    let common_indent = split_lines(raw_value)
294        // 3.a. If line is the first item in lines, continue to the next line.
295        .skip(1)
296        .filter_map(|line| {
297            // 3.b. Let length be the number of characters in line.
298            // We will compare this byte length to a character length below, but
299            // `count_indent` only ever counts one-byte characters, so it's equivalent.
300            let length = line.len();
301            // 3.c. Let indent be the number of leading consecutive WhiteSpace characters in line.
302            let indent = count_indent(line);
303            // 3.d. If indent is less than length:
304            (indent < length).then_some(indent)
305        })
306        .min()
307        .unwrap_or(0);
308
309    let mut lines = split_lines(raw_value)
310        .enumerate()
311        // 4.a. For each line in lines:
312        .map(|(index, line)| {
313            // 4.a.i. If line is the first item in lines, continue to the next line.
314            if index == 0 {
315                line
316            } else {
317                // 4.a.ii. Remove commonIndent characters from the beginning of line.
318                &line[common_indent.min(line.len())..]
319            }
320        })
321        // 5. While the first item line in lines contains only WhiteSpace:
322        // 5.a. Remove the first item from lines.
323        .skip_while(|line| is_whitespace_line(line));
324
325    // (Step 6 is done at the end so we don't need an intermediate allocation.)
326
327    // 7. Let formatted be the empty character sequence.
328    let mut formatted = String::with_capacity(raw_value.len());
329
330    // 8. For each line in lines:
331    // 8.a. If line is the first item in lines:
332    if let Some(line) = lines.next() {
333        // 8.a.i. Append formatted with line.
334        replace_into(line, ESCAPED_TRIPLE_QUOTE, TRIPLE_QUOTE, &mut formatted);
335    };
336
337    let mut final_char_index = formatted.len();
338
339    // 8.b. Otherwise:
340    for line in lines {
341        // 8.b.i. Append formatted with a line feed character (U+000A).
342        formatted.push('\n');
343        // 8.b.ii. Append formatted with line.
344        replace_into(line, ESCAPED_TRIPLE_QUOTE, TRIPLE_QUOTE, &mut formatted);
345
346        // Track the last non-whitespace line for implementing step 6 in the spec.
347        if !is_whitespace_line(line) {
348            final_char_index = formatted.len();
349        }
350    }
351
352    // 6. Implemented differently: remove WhiteSpace-only lines from the end.
353    formatted.truncate(final_char_index);
354
355    // 9. Return formatted.
356    formatted
357}
358
359// TODO(@goto-bus-stop) As this handles escaping, which can fail in theory, it should be TryFrom
360impl From<&'_ cst::StringValue> for String {
361    fn from(val: &'_ cst::StringValue) -> Self {
362        let text = text_of_first_token(val.syntax());
363        // These slices would panic if the contents are invalid, but the lexer already guarantees that the
364        // string is valid.
365        if is_block_string(&text) {
366            unescape_block_string(&text[3..text.len() - 3])
367        } else {
368            unescape_string(&text[1..text.len() - 1])
369        }
370    }
371}
372
373impl TryFrom<cst::IntValue> for i32 {
374    type Error = ParseIntError;
375
376    fn try_from(val: cst::IntValue) -> Result<Self, Self::Error> {
377        Self::try_from(&val)
378    }
379}
380
381impl TryFrom<&'_ cst::IntValue> for i32 {
382    type Error = ParseIntError;
383
384    fn try_from(val: &'_ cst::IntValue) -> Result<Self, Self::Error> {
385        let text = text_of_first_token(val.syntax());
386        text.parse()
387    }
388}
389
390impl TryFrom<cst::IntValue> for f64 {
391    type Error = ParseFloatError;
392
393    fn try_from(val: cst::IntValue) -> Result<Self, Self::Error> {
394        Self::try_from(&val)
395    }
396}
397
398impl TryFrom<&'_ cst::IntValue> for f64 {
399    type Error = ParseFloatError;
400
401    fn try_from(val: &'_ cst::IntValue) -> Result<Self, Self::Error> {
402        let text = text_of_first_token(val.syntax());
403        text.parse()
404    }
405}
406
407impl TryFrom<cst::FloatValue> for f64 {
408    type Error = ParseFloatError;
409
410    fn try_from(val: cst::FloatValue) -> Result<Self, Self::Error> {
411        Self::try_from(&val)
412    }
413}
414
415impl TryFrom<&'_ cst::FloatValue> for f64 {
416    type Error = ParseFloatError;
417
418    fn try_from(val: &'_ cst::FloatValue) -> Result<Self, Self::Error> {
419        let text = text_of_first_token(val.syntax());
420        text.parse()
421    }
422}
423
424impl TryFrom<cst::BooleanValue> for bool {
425    type Error = std::str::ParseBoolError;
426
427    fn try_from(val: cst::BooleanValue) -> Result<Self, Self::Error> {
428        Self::try_from(&val)
429    }
430}
431
432impl TryFrom<&'_ cst::BooleanValue> for bool {
433    type Error = std::str::ParseBoolError;
434
435    fn try_from(val: &'_ cst::BooleanValue) -> Result<Self, Self::Error> {
436        let text = text_of_first_token(val.syntax());
437        text.parse()
438    }
439}
440
441fn text_of_first_token(node: &SyntaxNode) -> TokenText {
442    let first_token = node
443        .green()
444        .children()
445        .next()
446        .and_then(|it| it.into_token())
447        .unwrap()
448        .to_owned();
449
450    TokenText(first_token)
451}
452
453#[cfg(test)]
454mod string_tests {
455    use super::unescape_string;
456
457    #[test]
458    fn it_parses_strings() {
459        assert_eq!(unescape_string(r"simple"), "simple");
460        assert_eq!(unescape_string(r" white space "), " white space ");
461    }
462
463    #[test]
464    fn it_unescapes_strings() {
465        assert_eq!(unescape_string(r#"quote \""#), "quote \"");
466        assert_eq!(
467            unescape_string(r"escaped \n\r\b\t\f"),
468            "escaped \n\r\u{0008}\t\u{000c}"
469        );
470        assert_eq!(unescape_string(r"slashes \\ \/"), r"slashes \ /");
471        assert_eq!(
472            unescape_string("unescaped unicode outside BMP 😀"),
473            "unescaped unicode outside BMP 😀"
474        );
475        assert_eq!(
476            unescape_string(r"unicode \u1234\u5678\u90AB\uCDEF"),
477            "unicode \u{1234}\u{5678}\u{90AB}\u{CDEF}"
478        );
479    }
480}
481
482#[cfg(test)]
483mod block_string_tests {
484    use super::split_lines;
485    use super::unescape_block_string;
486
487    #[test]
488    fn it_splits_lines_by_graphql_newline_definition() {
489        let plain_newlines: Vec<_> = split_lines(
490            r#"source text
491    with some
492    new
493
494
495    lines
496        "#,
497        )
498        .collect();
499
500        assert_eq!(
501            plain_newlines,
502            [
503                "source text",
504                "    with some",
505                "    new",
506                "",
507                "",
508                "    lines",
509                "        ",
510            ]
511        );
512
513        let different_endings: Vec<_> =
514            split_lines("with\nand\r\nand\rall in the same\r\nstring").collect();
515        assert_eq!(
516            different_endings,
517            ["with", "and", "and", "all in the same", "string",]
518        );
519
520        let empty_string: Vec<_> = split_lines("").collect();
521        assert_eq!(empty_string, [""]);
522
523        let empty_line: Vec<_> = split_lines("\n\r\r\n").collect();
524        assert_eq!(empty_line, ["", "", "", ""]);
525    }
526
527    #[test]
528    fn it_normalizes_block_string_newlines() {
529        assert_eq!(unescape_block_string("multi\nline"), "multi\nline");
530        assert_eq!(unescape_block_string("multi\r\nline"), "multi\nline");
531        assert_eq!(unescape_block_string("multi\rline"), "multi\nline");
532    }
533
534    #[test]
535    fn it_does_not_unescape_block_strings() {
536        assert_eq!(
537            unescape_block_string(r"escaped \n\r\b\t\f"),
538            r"escaped \n\r\b\t\f"
539        );
540        assert_eq!(unescape_block_string(r"slashes \\ \/"), r"slashes \\ \/");
541        assert_eq!(
542            unescape_block_string("unescaped unicode outside BMP \u{1f600}"),
543            "unescaped unicode outside BMP \u{1f600}"
544        );
545    }
546
547    #[test]
548    fn it_dedents_block_strings() {
549        assert_eq!(
550            unescape_block_string("  intact whitespace with one line  "),
551            "  intact whitespace with one line  "
552        );
553
554        assert_eq!(
555            unescape_block_string(
556                r"
557            This is
558            indented
559            quite a lot
560    "
561            ),
562            r"This is
563indented
564quite a lot"
565        );
566
567        assert_eq!(
568            unescape_block_string(
569                r"
570
571        spans
572          multiple
573            lines
574
575    "
576            ),
577            r"spans
578  multiple
579    lines"
580        );
581    }
582}
apollo_parser/cst/node_ext.rs

apollo_parser/cst/
node_ext.rs