Skip to main content

libgraphql_parser/token/
graphql_token_kind.rs

1use crate::GraphQLErrorNote;
2use crate::GraphQLStringParsingError;
3use crate::smallvec::SmallVec;
4use std::borrow::Cow;
5use std::num::ParseFloatError;
6use std::num::ParseIntError;
7
8/// The kind of a GraphQL token.
9///
10/// Literal values (`IntValue`, `FloatValue`, `StringValue`) store only the raw
11/// source text.
12///
13/// # Lifetime Parameter
14///
15/// The `'src` lifetime enables zero-copy lexing: `StrGraphQLTokenSource` can
16/// borrow string slices directly from the source text using `Cow::Borrowed`,
17/// while `RustMacroGraphQLTokenSource` uses `Cow::Owned` since `proc_macro2`
18/// doesn't expose contiguous source text.
19///
20/// # Negative Numeric Literals
21///
22/// Negative numbers like `-123` are lexed as single tokens (e.g.
23/// `IntValue("-123")`), not as separate minus and number tokens. This matches
24/// the GraphQL spec's grammar for `IntValue`/`FloatValue`.
25#[derive(Clone, Debug, PartialEq)]
26pub enum GraphQLTokenKind<'src> {
27    // =========================================================================
28    // Punctuators (no allocation needed)
29    // =========================================================================
30    /// `&`
31    Ampersand,
32    /// `@`
33    At,
34    /// `!`
35    Bang,
36    /// `:`
37    Colon,
38    /// `}`
39    CurlyBraceClose,
40    /// `{`
41    CurlyBraceOpen,
42    /// `$`
43    Dollar,
44    /// `...`
45    Ellipsis,
46    /// `=`
47    Equals,
48    /// `)`
49    ParenClose,
50    /// `(`
51    ParenOpen,
52    /// `|`
53    Pipe,
54    /// `]`
55    SquareBracketClose,
56    /// `[`
57    SquareBracketOpen,
58
59    // =========================================================================
60    // Literals (raw source text only)
61    // =========================================================================
62    /// A GraphQL name/identifier.
63    ///
64    /// Uses `Cow<'src, str>` to enable zero-copy lexing from string sources.
65    Name(Cow<'src, str>),
66
67    /// Raw source text of an integer literal, including optional negative sign
68    /// (e.g. `"-123"`, `"0"`).
69    ///
70    /// Use `parse_int_value()` to parse the raw text into an `i64`.
71    /// Uses `Cow<'src, str>` to enable zero-copy lexing from string sources.
72    IntValue(Cow<'src, str>),
73
74    /// Raw source text of a float literal, including optional negative sign
75    /// (e.g. `"-1.23e-4"`, `"0.5"`).
76    ///
77    /// Use `parse_float_value()` to parse the raw text into an `f64`.
78    /// Uses `Cow<'src, str>` to enable zero-copy lexing from string sources.
79    FloatValue(Cow<'src, str>),
80
81    /// Raw source text of a string literal, including quotes
82    /// (e.g. `"\"hello\\nworld\""`, `"\"\"\"block\"\"\""`)
83    ///
84    /// Use `parse_string_value()` to process escape sequences and get the
85    /// unescaped content.
86    /// Uses `Cow<'src, str>` to enable zero-copy lexing from string sources.
87    StringValue(Cow<'src, str>),
88
89    // =========================================================================
90    // Boolean and null (distinct from Name for type safety)
91    // =========================================================================
92    /// The `true` literal.
93    True,
94    /// The `false` literal.
95    False,
96    /// The `null` literal.
97    Null,
98
99    // =========================================================================
100    // End of input
101    // =========================================================================
102    /// End of input. The associated `GraphQLToken` may carry trailing trivia.
103    Eof,
104
105    // =========================================================================
106    // Lexer error (allows error recovery)
107    // =========================================================================
108    /// A lexer error. This allows the parser to continue and collect multiple
109    /// errors in a single pass.
110    ///
111    /// # Performance Note (B19)
112    ///
113    /// The error payload is boxed to avoid bloating the enum's size. Without
114    /// the Box, the `SmallVec<[GraphQLErrorNote; 2]>` error-notes field
115    /// (~208 bytes) would force *every* variant of `GraphQLTokenKind` to be
116    /// ~232 bytes — even zero-data punctuators. Boxing shrinks the Error
117    /// variant to a single pointer, which dramatically reduces
118    /// the size of every `GraphQLToken` on the happy path where errors
119    /// never occur (zero additional heap allocations in practice).
120    ///
121    /// TODO: Explore replacing error_notes with a richer diagnostics structure
122    /// that includes things like severity level and "fix action" for IDE
123    /// integration.
124    Error(Box<GraphQLTokenError>),
125}
126
127/// The payload of a [`GraphQLTokenKind::Error`] variant.
128///
129/// Separated into its own struct so it can be heap-allocated behind a `Box`,
130/// keeping the `GraphQLTokenKind` enum small. See the performance note on
131/// [`GraphQLTokenKind::Error`] for details.
132#[derive(Clone, Debug, PartialEq)]
133pub struct GraphQLTokenError {
134    /// A human-readable error message.
135    pub message: String,
136    /// Optional notes providing additional context or suggestions.
137    pub error_notes: SmallVec<[GraphQLErrorNote; 2]>,
138}
139
140impl<'src> GraphQLTokenKind<'src> {
141    // =========================================================================
142    // Helper constructors for creating token kinds
143    // =========================================================================
144
145    /// Create a `Name` token from a borrowed string slice (zero-copy).
146    ///
147    /// Use this in `StrGraphQLTokenSource` where the source text can be
148    /// borrowed directly.
149    #[inline]
150    pub fn name_borrowed(s: &'src str) -> Self {
151        GraphQLTokenKind::Name(Cow::Borrowed(s))
152    }
153
154    /// Create a `Name` token from an owned `String`.
155    ///
156    /// Use this in `RustMacroGraphQLTokenSource` where the string must be
157    /// allocated (e.g., from `ident.to_string()`).
158    #[inline]
159    pub fn name_owned(s: String) -> Self {
160        GraphQLTokenKind::Name(Cow::Owned(s))
161    }
162
163    /// Create an `IntValue` token from a borrowed string slice (zero-copy).
164    #[inline]
165    pub fn int_value_borrowed(s: &'src str) -> Self {
166        GraphQLTokenKind::IntValue(Cow::Borrowed(s))
167    }
168
169    /// Create an `IntValue` token from an owned `String`.
170    #[inline]
171    pub fn int_value_owned(s: String) -> Self {
172        GraphQLTokenKind::IntValue(Cow::Owned(s))
173    }
174
175    /// Create a `FloatValue` token from a borrowed string slice (zero-copy).
176    #[inline]
177    pub fn float_value_borrowed(s: &'src str) -> Self {
178        GraphQLTokenKind::FloatValue(Cow::Borrowed(s))
179    }
180
181    /// Create a `FloatValue` token from an owned `String`.
182    #[inline]
183    pub fn float_value_owned(s: String) -> Self {
184        GraphQLTokenKind::FloatValue(Cow::Owned(s))
185    }
186
187    /// Create a `StringValue` token from a borrowed string slice (zero-copy).
188    #[inline]
189    pub fn string_value_borrowed(s: &'src str) -> Self {
190        GraphQLTokenKind::StringValue(Cow::Borrowed(s))
191    }
192
193    /// Create a `StringValue` token from an owned `String`.
194    #[inline]
195    pub fn string_value_owned(s: String) -> Self {
196        GraphQLTokenKind::StringValue(Cow::Owned(s))
197    }
198
199    /// Create an `Error` token.
200    ///
201    /// Error messages are always dynamically constructed, so they use plain
202    /// `String` rather than `Cow`.
203    #[inline]
204    pub fn error(message: impl Into<String>, error_notes: SmallVec<[GraphQLErrorNote; 2]>) -> Self {
205        GraphQLTokenKind::Error(Box::new(GraphQLTokenError {
206            message: message.into(),
207            error_notes,
208        }))
209    }
210
211    // =========================================================================
212    // Query methods
213    // =========================================================================
214
215    /// Returns `true` if this token is a punctuator.
216    pub fn is_punctuator(&self) -> bool {
217        match self {
218            GraphQLTokenKind::Ampersand
219            | GraphQLTokenKind::At
220            | GraphQLTokenKind::Bang
221            | GraphQLTokenKind::Colon
222            | GraphQLTokenKind::CurlyBraceClose
223            | GraphQLTokenKind::CurlyBraceOpen
224            | GraphQLTokenKind::Dollar
225            | GraphQLTokenKind::Ellipsis
226            | GraphQLTokenKind::Equals
227            | GraphQLTokenKind::ParenClose
228            | GraphQLTokenKind::ParenOpen
229            | GraphQLTokenKind::Pipe
230            | GraphQLTokenKind::SquareBracketClose
231            | GraphQLTokenKind::SquareBracketOpen => true,
232
233            GraphQLTokenKind::Name(_)
234            | GraphQLTokenKind::IntValue(_)
235            | GraphQLTokenKind::FloatValue(_)
236            | GraphQLTokenKind::StringValue(_)
237            | GraphQLTokenKind::True
238            | GraphQLTokenKind::False
239            | GraphQLTokenKind::Null
240            | GraphQLTokenKind::Eof
241            | GraphQLTokenKind::Error(_) => false,
242        }
243    }
244
245    /// Returns the string representation of this token if it is a punctuator.
246    pub fn as_punctuator_str(&self) -> Option<&'static str> {
247        match self {
248            GraphQLTokenKind::Ampersand => Some("&"),
249            GraphQLTokenKind::At => Some("@"),
250            GraphQLTokenKind::Bang => Some("!"),
251            GraphQLTokenKind::Colon => Some(":"),
252            GraphQLTokenKind::CurlyBraceClose => Some("}"),
253            GraphQLTokenKind::CurlyBraceOpen => Some("{"),
254            GraphQLTokenKind::Dollar => Some("$"),
255            GraphQLTokenKind::Ellipsis => Some("..."),
256            GraphQLTokenKind::Equals => Some("="),
257            GraphQLTokenKind::ParenClose => Some(")"),
258            GraphQLTokenKind::ParenOpen => Some("("),
259            GraphQLTokenKind::Pipe => Some("|"),
260            GraphQLTokenKind::SquareBracketClose => Some("]"),
261            GraphQLTokenKind::SquareBracketOpen => Some("["),
262
263            GraphQLTokenKind::Name(_)
264            | GraphQLTokenKind::IntValue(_)
265            | GraphQLTokenKind::FloatValue(_)
266            | GraphQLTokenKind::StringValue(_)
267            | GraphQLTokenKind::True
268            | GraphQLTokenKind::False
269            | GraphQLTokenKind::Null
270            | GraphQLTokenKind::Eof
271            | GraphQLTokenKind::Error(_) => None,
272        }
273    }
274
275    /// Returns `true` if this token is a value literal (`IntValue`,
276    /// `FloatValue`, `StringValue`, `True`, `False`, or `Null`).
277    pub fn is_value(&self) -> bool {
278        match self {
279            GraphQLTokenKind::IntValue(_)
280            | GraphQLTokenKind::FloatValue(_)
281            | GraphQLTokenKind::StringValue(_)
282            | GraphQLTokenKind::True
283            | GraphQLTokenKind::False
284            | GraphQLTokenKind::Null => true,
285
286            GraphQLTokenKind::Ampersand
287            | GraphQLTokenKind::At
288            | GraphQLTokenKind::Bang
289            | GraphQLTokenKind::Colon
290            | GraphQLTokenKind::CurlyBraceClose
291            | GraphQLTokenKind::CurlyBraceOpen
292            | GraphQLTokenKind::Dollar
293            | GraphQLTokenKind::Ellipsis
294            | GraphQLTokenKind::Equals
295            | GraphQLTokenKind::ParenClose
296            | GraphQLTokenKind::ParenOpen
297            | GraphQLTokenKind::Pipe
298            | GraphQLTokenKind::SquareBracketClose
299            | GraphQLTokenKind::SquareBracketOpen
300            | GraphQLTokenKind::Name(_)
301            | GraphQLTokenKind::Eof
302            | GraphQLTokenKind::Error(_) => false,
303        }
304    }
305
306    /// Returns `true` if this token represents a lexer error.
307    pub fn is_error(&self) -> bool {
308        matches!(self, GraphQLTokenKind::Error(_))
309    }
310
311    /// Parse an `IntValue`'s raw text to `i64`.
312    ///
313    /// Returns `None` if this is not an `IntValue`, or `Some(Err(...))` if
314    /// parsing fails.
315    pub fn parse_int_value(&self) -> Option<Result<i64, ParseIntError>> {
316        match self {
317            GraphQLTokenKind::IntValue(raw) => Some(raw.parse()),
318            _ => None,
319        }
320    }
321
322    /// Parse a `FloatValue`'s raw text to `f64`.
323    ///
324    /// Returns `None` if this is not a `FloatValue`, or `Some(Err(...))` if
325    /// parsing fails.
326    pub fn parse_float_value(&self) -> Option<Result<f64, ParseFloatError>> {
327        match self {
328            GraphQLTokenKind::FloatValue(raw) => Some(raw.parse()),
329            _ => None,
330        }
331    }
332
333    /// Parse a `StringValue`'s raw text to unescaped content.
334    ///
335    /// Handles escape sequences per the GraphQL spec:
336    /// - For single-line strings (`"..."`): processes `\n`, `\r`, `\t`, `\\`,
337    ///   `\"`, `\/`, `\b`, `\f`, `\uXXXX` (fixed 4-digit), and `\u{X...}`
338    ///   (variable length).
339    /// - For block strings (`"""..."""`): applies the indentation stripping
340    ///   algorithm per spec, then processes `\"""` escape only.
341    ///
342    /// Returns `None` if this is not a `StringValue`, or `Some(Err(...))` if
343    /// parsing fails.
344    pub fn parse_string_value(&self) -> Option<Result<String, GraphQLStringParsingError>> {
345        match self {
346            GraphQLTokenKind::StringValue(raw) => Some(parse_graphql_string(raw)),
347            _ => None,
348        }
349    }
350}
351
352/// Parse a raw GraphQL string literal into its unescaped content.
353fn parse_graphql_string(raw: &str) -> Result<String, GraphQLStringParsingError> {
354    // Check if this is a block string
355    if raw.starts_with("\"\"\"") {
356        parse_block_string(raw)
357    } else {
358        parse_single_line_string(raw)
359    }
360}
361
362/// Parse a single-line string literal.
363fn parse_single_line_string(raw: &str) -> Result<String, GraphQLStringParsingError> {
364    // Strip surrounding quotes
365    if !raw.starts_with('"') || !raw.ends_with('"') || raw.len() < 2 {
366        return Err(GraphQLStringParsingError::UnterminatedString);
367    }
368    let content = &raw[1..raw.len() - 1];
369
370    let mut result = String::with_capacity(content.len());
371    let mut chars = content.chars().peekable();
372
373    while let Some(c) = chars.next() {
374        if c == '\\' {
375            match chars.next() {
376                Some('n') => result.push('\n'),
377                Some('r') => result.push('\r'),
378                Some('t') => result.push('\t'),
379                Some('\\') => result.push('\\'),
380                Some('"') => result.push('"'),
381                Some('/') => result.push('/'),
382                Some('b') => result.push('\u{0008}'),
383                Some('f') => result.push('\u{000C}'),
384                Some('u') => {
385                    let unicode_char = parse_unicode_escape(&mut chars)?;
386                    result.push(unicode_char);
387                },
388                Some(other) => {
389                    return Err(GraphQLStringParsingError::InvalidEscapeSequence(
390                        format!("\\{other}"),
391                    ));
392                },
393                None => {
394                    return Err(GraphQLStringParsingError::InvalidEscapeSequence(
395                        "\\".to_string(),
396                    ));
397                },
398            }
399        } else {
400            result.push(c);
401        }
402    }
403
404    Ok(result)
405}
406
407/// Parse a Unicode escape sequence after seeing `\u`.
408fn parse_unicode_escape(
409    chars: &mut std::iter::Peekable<std::str::Chars>,
410) -> Result<char, GraphQLStringParsingError> {
411    // Check for variable-length syntax: \u{...}
412    if chars.peek() == Some(&'{') {
413        chars.next(); // consume '{'
414        let mut hex = String::new();
415        loop {
416            match chars.next() {
417                Some('}') => break,
418                Some(c) if c.is_ascii_hexdigit() => hex.push(c),
419                Some(c) => {
420                    return Err(GraphQLStringParsingError::InvalidUnicodeEscape(format!(
421                        "\\u{{{hex}{c}"
422                    )));
423                }
424                None => {
425                    return Err(GraphQLStringParsingError::InvalidUnicodeEscape(format!(
426                        "\\u{{{hex}"
427                    )));
428                }
429            }
430        }
431        if hex.is_empty() {
432            return Err(GraphQLStringParsingError::InvalidUnicodeEscape(
433                "\\u{}".to_string(),
434            ));
435        }
436        let code_point = u32::from_str_radix(&hex, 16).map_err(|_| {
437            GraphQLStringParsingError::InvalidUnicodeEscape(format!("\\u{{{hex}}}"))
438        })?;
439        char::from_u32(code_point).ok_or_else(|| {
440            GraphQLStringParsingError::InvalidUnicodeEscape(format!("\\u{{{hex}}}"))
441        })
442    } else {
443        // Fixed 4-digit syntax: \uXXXX
444        let mut hex = String::with_capacity(4);
445        for _ in 0..4 {
446            match chars.next() {
447                Some(c) if c.is_ascii_hexdigit() => hex.push(c),
448                Some(c) => {
449                    return Err(GraphQLStringParsingError::InvalidUnicodeEscape(format!(
450                        "\\u{hex}{c}"
451                    )));
452                }
453                None => {
454                    return Err(GraphQLStringParsingError::InvalidUnicodeEscape(format!(
455                        "\\u{hex}"
456                    )));
457                }
458            }
459        }
460        let code_point = u32::from_str_radix(&hex, 16).map_err(|_| {
461            GraphQLStringParsingError::InvalidUnicodeEscape(format!("\\u{hex}"))
462        })?;
463        char::from_u32(code_point).ok_or_else(|| {
464            GraphQLStringParsingError::InvalidUnicodeEscape(format!("\\u{hex}"))
465        })
466    }
467}
468
469/// Splits a string into lines using GraphQL line terminators.
470///
471/// The GraphQL spec (Section 2.2 "Source Text") recognizes three line
472/// terminator sequences: `\n`, `\r\n`, and bare `\r`. Rust's
473/// [`str::lines()`] does NOT treat bare `\r` as a line terminator,
474/// so this function must be used instead when processing GraphQL
475/// source text.
476///
477/// Uses `memchr2` for SIMD-accelerated scanning of `\n` and `\r`,
478/// giving throughput comparable to `str::lines()`.
479///
480/// Returns an iterator of line slices without trailing terminators.
481fn graphql_lines(s: &str) -> impl Iterator<Item = &str> {
482    let mut rest = s;
483    std::iter::from_fn(move || {
484        if rest.is_empty() {
485            return None;
486        }
487        match memchr::memchr2(b'\n', b'\r', rest.as_bytes()) {
488            Some(i) => {
489                let line = &rest[..i];
490                // \r\n is a single terminator
491                if rest.as_bytes()[i] == b'\r'
492                    && rest.as_bytes().get(i + 1) == Some(&b'\n')
493                {
494                    rest = &rest[i + 2..];
495                } else {
496                    rest = &rest[i + 1..];
497                }
498                Some(line)
499            },
500            None => {
501                // No terminator found — last line
502                let line = rest;
503                rest = "";
504                Some(line)
505            },
506        }
507    })
508}
509
510/// Returns true if a line consists entirely of GraphQL WhiteSpace
511/// (Tab U+0009 and Space U+0020).
512///
513/// Per the GraphQL spec, only these two characters are WhiteSpace:
514/// <https://spec.graphql.org/September2025/#WhiteSpace>
515///
516/// Rust's `str::trim()` strips all Unicode whitespace (30+ chars
517/// including NEL, EN QUAD, etc.), which would misclassify lines
518/// containing non-ASCII Unicode whitespace as "blank."
519fn is_graphql_blank(line: &str) -> bool {
520    line.bytes().all(|b| b == b' ' || b == b'\t')
521}
522
523/// Parse a block string literal per the GraphQL spec.
524///
525/// # Performance (B3 in benchmark-optimizations.md)
526///
527/// This uses a two-pass, low-allocation approach instead of the
528/// naive collect-into-Vec-of-Strings strategy. Key optimizations:
529///
530/// 1. **Skip `replace()` when no escaped triple quotes exist** —
531///    nearly all block strings have no `\"""`, so we avoid a heap
532///    allocation by using `Cow::Borrowed`. Only the rare case that
533///    contains `\"""` falls back to `Cow::Owned`.
534///
535/// 2. **Iterate lines without collecting into a `Vec`** — both the
536///    indent-computation pass and the output-building pass iterate
537///    `str::lines()` lazily.
538///
539/// 3. **Build result `String` directly** — instead of creating a
540///    `Vec<String>` (one heap alloc per line) and then `join()`ing,
541///    we write each stripped line directly into a single
542///    pre-allocated `String`.
543///
544/// 4. **Use index tracking instead of `remove(0)`** — the old code
545///    used `Vec::remove(0)` to strip leading blank lines, which is
546///    O(n) per removal. We instead find the first/last non-blank
547///    line indices in the first pass and skip blank lines during
548///    output.
549fn parse_block_string(
550    raw: &str,
551) -> Result<String, GraphQLStringParsingError> {
552    // Strip surrounding triple quotes
553    if !raw.starts_with("\"\"\"")
554        || !raw.ends_with("\"\"\"")
555        || raw.len() < 6
556    {
557        return Err(
558            GraphQLStringParsingError::UnterminatedString,
559        );
560    }
561    let content = &raw[3..raw.len() - 3];
562
563    // Handle escaped triple quotes. Nearly all block strings
564    // have none, so we avoid allocating in the common case by
565    // using Cow::Borrowed. Only if `\"""` is present do we
566    // fall back to an owned String via replace().
567    let content: Cow<str> =
568        if content.contains("\\\"\"\"") {
569            Cow::Owned(
570                content.replace("\\\"\"\"", "\"\"\""),
571            )
572        } else {
573            Cow::Borrowed(content)
574        };
575
576    // --- Pass 1: Compute common indent and first/last
577    //     non-blank line indices ----------------------------
578    //
579    // Per the GraphQL spec, WhiteSpace is only Tab (U+0009)
580    // and Space (U+0020):
581    // <https://spec.graphql.org/September2025/#WhiteSpace>
582    //
583    // We must use this definition consistently for blank-line
584    // filtering, indent counting, and indent stripping. Using
585    // Rust's `trim()`/`trim_start()` (which strips all Unicode
586    // whitespace) would misclassify lines containing multi-byte
587    // Unicode whitespace characters and cause byte-index slicing
588    // panics.
589    let mut common_indent: Option<usize> = None;
590    let mut first_non_blank: Option<usize> = None;
591    let mut last_non_blank: Option<usize> = None;
592    for (i, line) in graphql_lines(&content).enumerate() {
593        let blank = is_graphql_blank(line);
594
595        if !blank {
596            if first_non_blank.is_none() {
597                first_non_blank = Some(i);
598            }
599            last_non_blank = Some(i);
600        }
601
602        // Common indent excludes the first line and blank
603        // lines (per spec).
604        if i > 0 && !blank {
605            let indent = line
606                .bytes()
607                .take_while(|&b| b == b' ' || b == b'\t')
608                .count();
609            common_indent = Some(match common_indent {
610                Some(cur) if cur <= indent => cur,
611                _ => indent,
612            });
613        }
614    }
615
616    let common_indent = common_indent.unwrap_or(0);
617    let first_non_blank = match first_non_blank {
618        Some(i) => i,
619        // All lines are blank — return empty string.
620        None => return Ok(String::new()),
621    };
622    let last_non_blank = last_non_blank.unwrap_or(0);
623
624    // --- Pass 2: Build result string directly ---------------
625    let mut result =
626        String::with_capacity(content.len());
627
628    // Track whether we need a newline separator before the
629    // next line we write.
630    let mut need_newline = false;
631
632    for (i, line) in graphql_lines(&content).enumerate() {
633        // Skip leading and trailing blank lines.
634        if i < first_non_blank || i > last_non_blank {
635            continue;
636        }
637
638        if need_newline {
639            result.push('\n');
640        }
641        need_newline = true;
642
643        if i == 0 {
644            result.push_str(line);
645        } else if line.len() >= common_indent {
646            // Safe: common_indent counts only single-byte
647            // ASCII whitespace, so this is always a valid
648            // char boundary.
649            result.push_str(&line[common_indent..]);
650        } else {
651            result.push_str(line);
652        }
653    }
654
655
656    Ok(result)
657}