Skip to main content

libgraphql_parser/token/
graphql_token_kind.rs

1use crate::GraphQLErrorNotes;
2use crate::GraphQLStringParsingError;
3use std::borrow::Cow;
4use std::num::ParseFloatError;
5use std::num::ParseIntError;
6
7/// The kind of a GraphQL token.
8///
9/// Literal values (`IntValue`, `FloatValue`, `StringValue`) store only the raw
10/// source text.
11///
12/// # Lifetime Parameter
13///
14/// The `'src` lifetime enables zero-copy lexing: `StrGraphQLTokenSource` can
15/// borrow string slices directly from the source text using `Cow::Borrowed`,
16/// while `RustMacroGraphQLTokenSource` uses `Cow::Owned` since `proc_macro2`
17/// doesn't expose contiguous source text.
18///
19/// # Negative Numeric Literals
20///
21/// Negative numbers like `-123` are lexed as single tokens (e.g.
22/// `IntValue("-123")`), not as separate minus and number tokens. This matches
23/// the GraphQL spec's grammar for `IntValue`/`FloatValue`.
24#[allow(clippy::large_enum_variant)]
25#[derive(Clone, Debug, PartialEq)]
26pub enum GraphQLTokenKind<'src> {
27    // =========================================================================
28    // Punctuators (no allocation needed)
29    // =========================================================================
30    /// `&`
31    Ampersand,
32    /// `@`
33    At,
34    /// `!`
35    Bang,
36    /// `:`
37    Colon,
38    /// `}`
39    CurlyBraceClose,
40    /// `{`
41    CurlyBraceOpen,
42    /// `$`
43    Dollar,
44    /// `...`
45    Ellipsis,
46    /// `=`
47    Equals,
48    /// `)`
49    ParenClose,
50    /// `(`
51    ParenOpen,
52    /// `|`
53    Pipe,
54    /// `]`
55    SquareBracketClose,
56    /// `[`
57    SquareBracketOpen,
58
59    // =========================================================================
60    // Literals (raw source text only)
61    // =========================================================================
62    /// A GraphQL name/identifier.
63    ///
64    /// Uses `Cow<'src, str>` to enable zero-copy lexing from string sources.
65    Name(Cow<'src, str>),
66
67    /// Raw source text of an integer literal, including optional negative sign
68    /// (e.g. `"-123"`, `"0"`).
69    ///
70    /// Use `parse_int_value()` to parse the raw text into an `i64`.
71    /// Uses `Cow<'src, str>` to enable zero-copy lexing from string sources.
72    IntValue(Cow<'src, str>),
73
74    /// Raw source text of a float literal, including optional negative sign
75    /// (e.g. `"-1.23e-4"`, `"0.5"`).
76    ///
77    /// Use `parse_float_value()` to parse the raw text into an `f64`.
78    /// Uses `Cow<'src, str>` to enable zero-copy lexing from string sources.
79    FloatValue(Cow<'src, str>),
80
81    /// Raw source text of a string literal, including quotes
82    /// (e.g. `"\"hello\\nworld\""`, `"\"\"\"block\"\"\""`)
83    ///
84    /// Use `parse_string_value()` to process escape sequences and get the
85    /// unescaped content.
86    /// Uses `Cow<'src, str>` to enable zero-copy lexing from string sources.
87    StringValue(Cow<'src, str>),
88
89    // =========================================================================
90    // Boolean and null (distinct from Name for type safety)
91    // =========================================================================
92    /// The `true` literal.
93    True,
94    /// The `false` literal.
95    False,
96    /// The `null` literal.
97    Null,
98
99    // =========================================================================
100    // End of input
101    // =========================================================================
102    /// End of input. The associated `GraphQLToken` may carry trailing trivia.
103    Eof,
104
105    // =========================================================================
106    // Lexer error (allows error recovery)
107    // =========================================================================
108    /// A lexer error. This allows the parser to continue and collect multiple
109    /// errors in a single pass.
110    ///
111    /// TODO: Explore replacing error_notes with a richer diagnostics structure
112    /// that includes things like severity level and "fix action" for IDE
113    /// integration.
114    Error {
115        /// A human-readable error message.
116        message: String,
117        /// Optional notes providing additional context or suggestions.
118        error_notes: GraphQLErrorNotes,
119    },
120}
121
122impl<'src> GraphQLTokenKind<'src> {
123    // =========================================================================
124    // Helper constructors for creating token kinds
125    // =========================================================================
126
127    /// Create a `Name` token from a borrowed string slice (zero-copy).
128    ///
129    /// Use this in `StrGraphQLTokenSource` where the source text can be
130    /// borrowed directly.
131    #[inline]
132    pub fn name_borrowed(s: &'src str) -> Self {
133        GraphQLTokenKind::Name(Cow::Borrowed(s))
134    }
135
136    /// Create a `Name` token from an owned `String`.
137    ///
138    /// Use this in `RustMacroGraphQLTokenSource` where the string must be
139    /// allocated (e.g., from `ident.to_string()`).
140    #[inline]
141    pub fn name_owned(s: String) -> Self {
142        GraphQLTokenKind::Name(Cow::Owned(s))
143    }
144
145    /// Create an `IntValue` token from a borrowed string slice (zero-copy).
146    #[inline]
147    pub fn int_value_borrowed(s: &'src str) -> Self {
148        GraphQLTokenKind::IntValue(Cow::Borrowed(s))
149    }
150
151    /// Create an `IntValue` token from an owned `String`.
152    #[inline]
153    pub fn int_value_owned(s: String) -> Self {
154        GraphQLTokenKind::IntValue(Cow::Owned(s))
155    }
156
157    /// Create a `FloatValue` token from a borrowed string slice (zero-copy).
158    #[inline]
159    pub fn float_value_borrowed(s: &'src str) -> Self {
160        GraphQLTokenKind::FloatValue(Cow::Borrowed(s))
161    }
162
163    /// Create a `FloatValue` token from an owned `String`.
164    #[inline]
165    pub fn float_value_owned(s: String) -> Self {
166        GraphQLTokenKind::FloatValue(Cow::Owned(s))
167    }
168
169    /// Create a `StringValue` token from a borrowed string slice (zero-copy).
170    #[inline]
171    pub fn string_value_borrowed(s: &'src str) -> Self {
172        GraphQLTokenKind::StringValue(Cow::Borrowed(s))
173    }
174
175    /// Create a `StringValue` token from an owned `String`.
176    #[inline]
177    pub fn string_value_owned(s: String) -> Self {
178        GraphQLTokenKind::StringValue(Cow::Owned(s))
179    }
180
181    /// Create an `Error` token.
182    ///
183    /// Error messages are always dynamically constructed, so they use plain
184    /// `String` rather than `Cow`.
185    #[inline]
186    pub fn error(message: impl Into<String>, error_notes: GraphQLErrorNotes) -> Self {
187        GraphQLTokenKind::Error {
188            message: message.into(),
189            error_notes,
190        }
191    }
192
193    // =========================================================================
194    // Query methods
195    // =========================================================================
196
197    /// Returns `true` if this token is a punctuator.
198    pub fn is_punctuator(&self) -> bool {
199        match self {
200            GraphQLTokenKind::Ampersand
201            | GraphQLTokenKind::At
202            | GraphQLTokenKind::Bang
203            | GraphQLTokenKind::Colon
204            | GraphQLTokenKind::CurlyBraceClose
205            | GraphQLTokenKind::CurlyBraceOpen
206            | GraphQLTokenKind::Dollar
207            | GraphQLTokenKind::Ellipsis
208            | GraphQLTokenKind::Equals
209            | GraphQLTokenKind::ParenClose
210            | GraphQLTokenKind::ParenOpen
211            | GraphQLTokenKind::Pipe
212            | GraphQLTokenKind::SquareBracketClose
213            | GraphQLTokenKind::SquareBracketOpen => true,
214
215            GraphQLTokenKind::Name(_)
216            | GraphQLTokenKind::IntValue(_)
217            | GraphQLTokenKind::FloatValue(_)
218            | GraphQLTokenKind::StringValue(_)
219            | GraphQLTokenKind::True
220            | GraphQLTokenKind::False
221            | GraphQLTokenKind::Null
222            | GraphQLTokenKind::Eof
223            | GraphQLTokenKind::Error { .. } => false,
224        }
225    }
226
227    /// Returns the string representation of this token if it is a punctuator.
228    pub fn as_punctuator_str(&self) -> Option<&'static str> {
229        match self {
230            GraphQLTokenKind::Ampersand => Some("&"),
231            GraphQLTokenKind::At => Some("@"),
232            GraphQLTokenKind::Bang => Some("!"),
233            GraphQLTokenKind::Colon => Some(":"),
234            GraphQLTokenKind::CurlyBraceClose => Some("}"),
235            GraphQLTokenKind::CurlyBraceOpen => Some("{"),
236            GraphQLTokenKind::Dollar => Some("$"),
237            GraphQLTokenKind::Ellipsis => Some("..."),
238            GraphQLTokenKind::Equals => Some("="),
239            GraphQLTokenKind::ParenClose => Some(")"),
240            GraphQLTokenKind::ParenOpen => Some("("),
241            GraphQLTokenKind::Pipe => Some("|"),
242            GraphQLTokenKind::SquareBracketClose => Some("]"),
243            GraphQLTokenKind::SquareBracketOpen => Some("["),
244
245            GraphQLTokenKind::Name(_)
246            | GraphQLTokenKind::IntValue(_)
247            | GraphQLTokenKind::FloatValue(_)
248            | GraphQLTokenKind::StringValue(_)
249            | GraphQLTokenKind::True
250            | GraphQLTokenKind::False
251            | GraphQLTokenKind::Null
252            | GraphQLTokenKind::Eof
253            | GraphQLTokenKind::Error { .. } => None,
254        }
255    }
256
257    /// Returns `true` if this token is a value literal (`IntValue`,
258    /// `FloatValue`, `StringValue`, `True`, `False`, or `Null`).
259    pub fn is_value(&self) -> bool {
260        match self {
261            GraphQLTokenKind::IntValue(_)
262            | GraphQLTokenKind::FloatValue(_)
263            | GraphQLTokenKind::StringValue(_)
264            | GraphQLTokenKind::True
265            | GraphQLTokenKind::False
266            | GraphQLTokenKind::Null => true,
267
268            GraphQLTokenKind::Ampersand
269            | GraphQLTokenKind::At
270            | GraphQLTokenKind::Bang
271            | GraphQLTokenKind::Colon
272            | GraphQLTokenKind::CurlyBraceClose
273            | GraphQLTokenKind::CurlyBraceOpen
274            | GraphQLTokenKind::Dollar
275            | GraphQLTokenKind::Ellipsis
276            | GraphQLTokenKind::Equals
277            | GraphQLTokenKind::ParenClose
278            | GraphQLTokenKind::ParenOpen
279            | GraphQLTokenKind::Pipe
280            | GraphQLTokenKind::SquareBracketClose
281            | GraphQLTokenKind::SquareBracketOpen
282            | GraphQLTokenKind::Name(_)
283            | GraphQLTokenKind::Eof
284            | GraphQLTokenKind::Error { .. } => false,
285        }
286    }
287
288    /// Returns `true` if this token represents a lexer error.
289    pub fn is_error(&self) -> bool {
290        matches!(self, GraphQLTokenKind::Error { .. })
291    }
292
293    /// Parse an `IntValue`'s raw text to `i64`.
294    ///
295    /// Returns `None` if this is not an `IntValue`, or `Some(Err(...))` if
296    /// parsing fails.
297    pub fn parse_int_value(&self) -> Option<Result<i64, ParseIntError>> {
298        match self {
299            GraphQLTokenKind::IntValue(raw) => Some(raw.parse()),
300            _ => None,
301        }
302    }
303
304    /// Parse a `FloatValue`'s raw text to `f64`.
305    ///
306    /// Returns `None` if this is not a `FloatValue`, or `Some(Err(...))` if
307    /// parsing fails.
308    pub fn parse_float_value(&self) -> Option<Result<f64, ParseFloatError>> {
309        match self {
310            GraphQLTokenKind::FloatValue(raw) => Some(raw.parse()),
311            _ => None,
312        }
313    }
314
315    /// Parse a `StringValue`'s raw text to unescaped content.
316    ///
317    /// Handles escape sequences per the GraphQL spec:
318    /// - For single-line strings (`"..."`): processes `\n`, `\r`, `\t`, `\\`,
319    ///   `\"`, `\/`, `\b`, `\f`, `\uXXXX` (fixed 4-digit), and `\u{X...}`
320    ///   (variable length).
321    /// - For block strings (`"""..."""`): applies the indentation stripping
322    ///   algorithm per spec, then processes `\"""` escape only.
323    ///
324    /// Returns `None` if this is not a `StringValue`, or `Some(Err(...))` if
325    /// parsing fails.
326    pub fn parse_string_value(&self) -> Option<Result<String, GraphQLStringParsingError>> {
327        match self {
328            GraphQLTokenKind::StringValue(raw) => Some(parse_graphql_string(raw)),
329            _ => None,
330        }
331    }
332}
333
334/// Parse a raw GraphQL string literal into its unescaped content.
335fn parse_graphql_string(raw: &str) -> Result<String, GraphQLStringParsingError> {
336    // Check if this is a block string
337    if raw.starts_with("\"\"\"") {
338        parse_block_string(raw)
339    } else {
340        parse_single_line_string(raw)
341    }
342}
343
344/// Parse a single-line string literal.
345fn parse_single_line_string(raw: &str) -> Result<String, GraphQLStringParsingError> {
346    // Strip surrounding quotes
347    if !raw.starts_with('"') || !raw.ends_with('"') || raw.len() < 2 {
348        return Err(GraphQLStringParsingError::UnterminatedString);
349    }
350    let content = &raw[1..raw.len() - 1];
351
352    let mut result = String::with_capacity(content.len());
353    let mut chars = content.chars().peekable();
354
355    while let Some(c) = chars.next() {
356        if c == '\\' {
357            match chars.next() {
358                Some('n') => result.push('\n'),
359                Some('r') => result.push('\r'),
360                Some('t') => result.push('\t'),
361                Some('\\') => result.push('\\'),
362                Some('"') => result.push('"'),
363                Some('/') => result.push('/'),
364                Some('b') => result.push('\u{0008}'),
365                Some('f') => result.push('\u{000C}'),
366                Some('u') => {
367                    let unicode_char = parse_unicode_escape(&mut chars)?;
368                    result.push(unicode_char);
369                }
370                Some(other) => {
371                    return Err(GraphQLStringParsingError::InvalidEscapeSequence(format!(
372                        "\\{other}"
373                    )));
374                }
375                None => {
376                    return Err(GraphQLStringParsingError::InvalidEscapeSequence(
377                        "\\".to_string(),
378                    ));
379                }
380            }
381        } else {
382            result.push(c);
383        }
384    }
385
386    Ok(result)
387}
388
389/// Parse a Unicode escape sequence after seeing `\u`.
390fn parse_unicode_escape(
391    chars: &mut std::iter::Peekable<std::str::Chars>,
392) -> Result<char, GraphQLStringParsingError> {
393    // Check for variable-length syntax: \u{...}
394    if chars.peek() == Some(&'{') {
395        chars.next(); // consume '{'
396        let mut hex = String::new();
397        loop {
398            match chars.next() {
399                Some('}') => break,
400                Some(c) if c.is_ascii_hexdigit() => hex.push(c),
401                Some(c) => {
402                    return Err(GraphQLStringParsingError::InvalidUnicodeEscape(format!(
403                        "\\u{{{hex}{c}"
404                    )));
405                }
406                None => {
407                    return Err(GraphQLStringParsingError::InvalidUnicodeEscape(format!(
408                        "\\u{{{hex}"
409                    )));
410                }
411            }
412        }
413        if hex.is_empty() {
414            return Err(GraphQLStringParsingError::InvalidUnicodeEscape(
415                "\\u{}".to_string(),
416            ));
417        }
418        let code_point = u32::from_str_radix(&hex, 16).map_err(|_| {
419            GraphQLStringParsingError::InvalidUnicodeEscape(format!("\\u{{{hex}}}"))
420        })?;
421        char::from_u32(code_point).ok_or_else(|| {
422            GraphQLStringParsingError::InvalidUnicodeEscape(format!("\\u{{{hex}}}"))
423        })
424    } else {
425        // Fixed 4-digit syntax: \uXXXX
426        let mut hex = String::with_capacity(4);
427        for _ in 0..4 {
428            match chars.next() {
429                Some(c) if c.is_ascii_hexdigit() => hex.push(c),
430                Some(c) => {
431                    return Err(GraphQLStringParsingError::InvalidUnicodeEscape(format!(
432                        "\\u{hex}{c}"
433                    )));
434                }
435                None => {
436                    return Err(GraphQLStringParsingError::InvalidUnicodeEscape(format!(
437                        "\\u{hex}"
438                    )));
439                }
440            }
441        }
442        let code_point = u32::from_str_radix(&hex, 16).map_err(|_| {
443            GraphQLStringParsingError::InvalidUnicodeEscape(format!("\\u{hex}"))
444        })?;
445        char::from_u32(code_point).ok_or_else(|| {
446            GraphQLStringParsingError::InvalidUnicodeEscape(format!("\\u{hex}"))
447        })
448    }
449}
450
451/// Returns true if a line consists entirely of GraphQL WhiteSpace
452/// (Tab U+0009 and Space U+0020).
453///
454/// Per the GraphQL spec, only these two characters are WhiteSpace:
455/// <https://spec.graphql.org/September2025/#WhiteSpace>
456///
457/// Rust's `str::trim()` strips all Unicode whitespace (30+ chars
458/// including NEL, EN QUAD, etc.), which would misclassify lines
459/// containing non-ASCII Unicode whitespace as "blank."
460fn is_graphql_blank(line: &str) -> bool {
461    line.bytes().all(|b| b == b' ' || b == b'\t')
462}
463
464/// Parse a block string literal per the GraphQL spec.
465///
466/// # Performance (B3 in benchmark-optimizations.md)
467///
468/// This uses a two-pass, low-allocation approach instead of the
469/// naive collect-into-Vec-of-Strings strategy. Key optimizations:
470///
471/// 1. **Skip `replace()` when no escaped triple quotes exist** —
472///    nearly all block strings have no `\"""`, so we avoid a heap
473///    allocation by using `Cow::Borrowed`. Only the rare case that
474///    contains `\"""` falls back to `Cow::Owned`.
475///
476/// 2. **Iterate lines without collecting into a `Vec`** — both the
477///    indent-computation pass and the output-building pass iterate
478///    `str::lines()` lazily.
479///
480/// 3. **Build result `String` directly** — instead of creating a
481///    `Vec<String>` (one heap alloc per line) and then `join()`ing,
482///    we write each stripped line directly into a single
483///    pre-allocated `String`.
484///
485/// 4. **Use index tracking instead of `remove(0)`** — the old code
486///    used `Vec::remove(0)` to strip leading blank lines, which is
487///    O(n) per removal. We instead find the first/last non-blank
488///    line indices in the first pass and skip blank lines during
489///    output.
490fn parse_block_string(
491    raw: &str,
492) -> Result<String, GraphQLStringParsingError> {
493    // Strip surrounding triple quotes
494    if !raw.starts_with("\"\"\"")
495        || !raw.ends_with("\"\"\"")
496        || raw.len() < 6
497    {
498        return Err(
499            GraphQLStringParsingError::UnterminatedString,
500        );
501    }
502    let content = &raw[3..raw.len() - 3];
503
504    // Handle escaped triple quotes. Nearly all block strings
505    // have none, so we avoid allocating in the common case by
506    // using Cow::Borrowed. Only if `\"""` is present do we
507    // fall back to an owned String via replace().
508    let content: Cow<str> =
509        if content.contains("\\\"\"\"") {
510            Cow::Owned(
511                content.replace("\\\"\"\"", "\"\"\""),
512            )
513        } else {
514            Cow::Borrowed(content)
515        };
516
517    // --- Pass 1: Compute common indent and first/last
518    //     non-blank line indices ----------------------------
519    //
520    // Per the GraphQL spec, WhiteSpace is only Tab (U+0009)
521    // and Space (U+0020):
522    // <https://spec.graphql.org/September2025/#WhiteSpace>
523    //
524    // We must use this definition consistently for blank-line
525    // filtering, indent counting, and indent stripping. Using
526    // Rust's `trim()`/`trim_start()` (which strips all Unicode
527    // whitespace) would misclassify lines containing multi-byte
528    // Unicode whitespace characters and cause byte-index slicing
529    // panics.
530    let mut common_indent: Option<usize> = None;
531    let mut first_non_blank: Option<usize> = None;
532    let mut last_non_blank: Option<usize> = None;
533    for (i, line) in content.lines().enumerate() {
534        let blank = is_graphql_blank(line);
535
536        if !blank {
537            if first_non_blank.is_none() {
538                first_non_blank = Some(i);
539            }
540            last_non_blank = Some(i);
541        }
542
543        // Common indent excludes the first line and blank
544        // lines (per spec).
545        if i > 0 && !blank {
546            let indent = line
547                .bytes()
548                .take_while(|&b| b == b' ' || b == b'\t')
549                .count();
550            common_indent = Some(match common_indent {
551                Some(cur) if cur <= indent => cur,
552                _ => indent,
553            });
554        }
555    }
556
557    let common_indent = common_indent.unwrap_or(0);
558    let first_non_blank = match first_non_blank {
559        Some(i) => i,
560        // All lines are blank — return empty string.
561        None => return Ok(String::new()),
562    };
563    let last_non_blank = last_non_blank.unwrap_or(0);
564
565    // --- Pass 2: Build result string directly ---------------
566    let mut result =
567        String::with_capacity(content.len());
568
569    // Track whether we need a newline separator before the
570    // next line we write.
571    let mut need_newline = false;
572
573    for (i, line) in content.lines().enumerate() {
574        // Skip leading and trailing blank lines.
575        if i < first_non_blank || i > last_non_blank {
576            continue;
577        }
578
579        if need_newline {
580            result.push('\n');
581        }
582        need_newline = true;
583
584        if i == 0 {
585            result.push_str(line);
586        } else if line.len() >= common_indent {
587            // Safe: common_indent counts only single-byte
588            // ASCII whitespace, so this is always a valid
589            // char boundary.
590            result.push_str(&line[common_indent..]);
591        } else {
592            result.push_str(line);
593        }
594    }
595
596
597    Ok(result)
598}