libgraphql_parser/token/graphql_token_kind.rs
1use crate::GraphQLErrorNote;
2use crate::GraphQLStringParsingError;
3use crate::smallvec::SmallVec;
4use std::borrow::Cow;
5use std::num::ParseFloatError;
6use std::num::ParseIntError;
7
8/// The kind of a GraphQL token.
9///
10/// Literal values (`IntValue`, `FloatValue`, `StringValue`) store only the raw
11/// source text.
12///
13/// # Lifetime Parameter
14///
15/// The `'src` lifetime enables zero-copy lexing: `StrGraphQLTokenSource` can
16/// borrow string slices directly from the source text using `Cow::Borrowed`,
17/// while `RustMacroGraphQLTokenSource` uses `Cow::Owned` since `proc_macro2`
18/// doesn't expose contiguous source text.
19///
20/// # Negative Numeric Literals
21///
22/// Negative numbers like `-123` are lexed as single tokens (e.g.
23/// `IntValue("-123")`), not as separate minus and number tokens. This matches
24/// the GraphQL spec's grammar for `IntValue`/`FloatValue`.
25#[derive(Clone, Debug, PartialEq)]
26pub enum GraphQLTokenKind<'src> {
27 // =========================================================================
28 // Punctuators (no allocation needed)
29 // =========================================================================
30 /// `&`
31 Ampersand,
32 /// `@`
33 At,
34 /// `!`
35 Bang,
36 /// `:`
37 Colon,
38 /// `}`
39 CurlyBraceClose,
40 /// `{`
41 CurlyBraceOpen,
42 /// `$`
43 Dollar,
44 /// `...`
45 Ellipsis,
46 /// `=`
47 Equals,
48 /// `)`
49 ParenClose,
50 /// `(`
51 ParenOpen,
52 /// `|`
53 Pipe,
54 /// `]`
55 SquareBracketClose,
56 /// `[`
57 SquareBracketOpen,
58
59 // =========================================================================
60 // Literals (raw source text only)
61 // =========================================================================
62 /// A GraphQL name/identifier.
63 ///
64 /// Uses `Cow<'src, str>` to enable zero-copy lexing from string sources.
65 Name(Cow<'src, str>),
66
67 /// Raw source text of an integer literal, including optional negative sign
68 /// (e.g. `"-123"`, `"0"`).
69 ///
70 /// Use `parse_int_value()` to parse the raw text into an `i64`.
71 /// Uses `Cow<'src, str>` to enable zero-copy lexing from string sources.
72 IntValue(Cow<'src, str>),
73
74 /// Raw source text of a float literal, including optional negative sign
75 /// (e.g. `"-1.23e-4"`, `"0.5"`).
76 ///
77 /// Use `parse_float_value()` to parse the raw text into an `f64`.
78 /// Uses `Cow<'src, str>` to enable zero-copy lexing from string sources.
79 FloatValue(Cow<'src, str>),
80
81 /// Raw source text of a string literal, including quotes
82 /// (e.g. `"\"hello\\nworld\""`, `"\"\"\"block\"\"\""`)
83 ///
84 /// Use `parse_string_value()` to process escape sequences and get the
85 /// unescaped content.
86 /// Uses `Cow<'src, str>` to enable zero-copy lexing from string sources.
87 StringValue(Cow<'src, str>),
88
89 // =========================================================================
90 // Boolean and null (distinct from Name for type safety)
91 // =========================================================================
92 /// The `true` literal.
93 True,
94 /// The `false` literal.
95 False,
96 /// The `null` literal.
97 Null,
98
99 // =========================================================================
100 // End of input
101 // =========================================================================
102 /// End of input. The associated `GraphQLToken` may carry trailing trivia.
103 Eof,
104
105 // =========================================================================
106 // Lexer error (allows error recovery)
107 // =========================================================================
108 /// A lexer error. This allows the parser to continue and collect multiple
109 /// errors in a single pass.
110 ///
111 /// # Performance Note (B19)
112 ///
113 /// The error payload is boxed to avoid bloating the enum's size. Without
114 /// the Box, the `SmallVec<[GraphQLErrorNote; 2]>` error-notes field
115 /// (~208 bytes) would force *every* variant of `GraphQLTokenKind` to be
116 /// ~232 bytes — even zero-data punctuators. Boxing shrinks the Error
117 /// variant to a single pointer, which dramatically reduces
118 /// the size of every `GraphQLToken` on the happy path where errors
119 /// never occur (zero additional heap allocations in practice).
120 ///
121 /// TODO: Explore replacing error_notes with a richer diagnostics structure
122 /// that includes things like severity level and "fix action" for IDE
123 /// integration.
124 Error(Box<GraphQLTokenError>),
125}
126
127/// The payload of a [`GraphQLTokenKind::Error`] variant.
128///
129/// Separated into its own struct so it can be heap-allocated behind a `Box`,
130/// keeping the `GraphQLTokenKind` enum small. See the performance note on
131/// [`GraphQLTokenKind::Error`] for details.
132#[derive(Clone, Debug, PartialEq)]
133pub struct GraphQLTokenError {
134 /// A human-readable error message.
135 pub message: String,
136 /// Optional notes providing additional context or suggestions.
137 pub error_notes: SmallVec<[GraphQLErrorNote; 2]>,
138}
139
140impl<'src> GraphQLTokenKind<'src> {
141 // =========================================================================
142 // Helper constructors for creating token kinds
143 // =========================================================================
144
145 /// Create a `Name` token from a borrowed string slice (zero-copy).
146 ///
147 /// Use this in `StrGraphQLTokenSource` where the source text can be
148 /// borrowed directly.
149 #[inline]
150 pub fn name_borrowed(s: &'src str) -> Self {
151 GraphQLTokenKind::Name(Cow::Borrowed(s))
152 }
153
154 /// Create a `Name` token from an owned `String`.
155 ///
156 /// Use this in `RustMacroGraphQLTokenSource` where the string must be
157 /// allocated (e.g., from `ident.to_string()`).
158 #[inline]
159 pub fn name_owned(s: String) -> Self {
160 GraphQLTokenKind::Name(Cow::Owned(s))
161 }
162
163 /// Create an `IntValue` token from a borrowed string slice (zero-copy).
164 #[inline]
165 pub fn int_value_borrowed(s: &'src str) -> Self {
166 GraphQLTokenKind::IntValue(Cow::Borrowed(s))
167 }
168
169 /// Create an `IntValue` token from an owned `String`.
170 #[inline]
171 pub fn int_value_owned(s: String) -> Self {
172 GraphQLTokenKind::IntValue(Cow::Owned(s))
173 }
174
175 /// Create a `FloatValue` token from a borrowed string slice (zero-copy).
176 #[inline]
177 pub fn float_value_borrowed(s: &'src str) -> Self {
178 GraphQLTokenKind::FloatValue(Cow::Borrowed(s))
179 }
180
181 /// Create a `FloatValue` token from an owned `String`.
182 #[inline]
183 pub fn float_value_owned(s: String) -> Self {
184 GraphQLTokenKind::FloatValue(Cow::Owned(s))
185 }
186
187 /// Create a `StringValue` token from a borrowed string slice (zero-copy).
188 #[inline]
189 pub fn string_value_borrowed(s: &'src str) -> Self {
190 GraphQLTokenKind::StringValue(Cow::Borrowed(s))
191 }
192
193 /// Create a `StringValue` token from an owned `String`.
194 #[inline]
195 pub fn string_value_owned(s: String) -> Self {
196 GraphQLTokenKind::StringValue(Cow::Owned(s))
197 }
198
199 /// Create an `Error` token.
200 ///
201 /// Error messages are always dynamically constructed, so they use plain
202 /// `String` rather than `Cow`.
203 #[inline]
204 pub fn error(message: impl Into<String>, error_notes: SmallVec<[GraphQLErrorNote; 2]>) -> Self {
205 GraphQLTokenKind::Error(Box::new(GraphQLTokenError {
206 message: message.into(),
207 error_notes,
208 }))
209 }
210
211 // =========================================================================
212 // Query methods
213 // =========================================================================
214
215 /// Returns `true` if this token is a punctuator.
216 pub fn is_punctuator(&self) -> bool {
217 match self {
218 GraphQLTokenKind::Ampersand
219 | GraphQLTokenKind::At
220 | GraphQLTokenKind::Bang
221 | GraphQLTokenKind::Colon
222 | GraphQLTokenKind::CurlyBraceClose
223 | GraphQLTokenKind::CurlyBraceOpen
224 | GraphQLTokenKind::Dollar
225 | GraphQLTokenKind::Ellipsis
226 | GraphQLTokenKind::Equals
227 | GraphQLTokenKind::ParenClose
228 | GraphQLTokenKind::ParenOpen
229 | GraphQLTokenKind::Pipe
230 | GraphQLTokenKind::SquareBracketClose
231 | GraphQLTokenKind::SquareBracketOpen => true,
232
233 GraphQLTokenKind::Name(_)
234 | GraphQLTokenKind::IntValue(_)
235 | GraphQLTokenKind::FloatValue(_)
236 | GraphQLTokenKind::StringValue(_)
237 | GraphQLTokenKind::True
238 | GraphQLTokenKind::False
239 | GraphQLTokenKind::Null
240 | GraphQLTokenKind::Eof
241 | GraphQLTokenKind::Error(_) => false,
242 }
243 }
244
245 /// Returns the string representation of this token if it is a punctuator.
246 pub fn as_punctuator_str(&self) -> Option<&'static str> {
247 match self {
248 GraphQLTokenKind::Ampersand => Some("&"),
249 GraphQLTokenKind::At => Some("@"),
250 GraphQLTokenKind::Bang => Some("!"),
251 GraphQLTokenKind::Colon => Some(":"),
252 GraphQLTokenKind::CurlyBraceClose => Some("}"),
253 GraphQLTokenKind::CurlyBraceOpen => Some("{"),
254 GraphQLTokenKind::Dollar => Some("$"),
255 GraphQLTokenKind::Ellipsis => Some("..."),
256 GraphQLTokenKind::Equals => Some("="),
257 GraphQLTokenKind::ParenClose => Some(")"),
258 GraphQLTokenKind::ParenOpen => Some("("),
259 GraphQLTokenKind::Pipe => Some("|"),
260 GraphQLTokenKind::SquareBracketClose => Some("]"),
261 GraphQLTokenKind::SquareBracketOpen => Some("["),
262
263 GraphQLTokenKind::Name(_)
264 | GraphQLTokenKind::IntValue(_)
265 | GraphQLTokenKind::FloatValue(_)
266 | GraphQLTokenKind::StringValue(_)
267 | GraphQLTokenKind::True
268 | GraphQLTokenKind::False
269 | GraphQLTokenKind::Null
270 | GraphQLTokenKind::Eof
271 | GraphQLTokenKind::Error(_) => None,
272 }
273 }
274
275 /// Returns `true` if this token is a value literal (`IntValue`,
276 /// `FloatValue`, `StringValue`, `True`, `False`, or `Null`).
277 pub fn is_value(&self) -> bool {
278 match self {
279 GraphQLTokenKind::IntValue(_)
280 | GraphQLTokenKind::FloatValue(_)
281 | GraphQLTokenKind::StringValue(_)
282 | GraphQLTokenKind::True
283 | GraphQLTokenKind::False
284 | GraphQLTokenKind::Null => true,
285
286 GraphQLTokenKind::Ampersand
287 | GraphQLTokenKind::At
288 | GraphQLTokenKind::Bang
289 | GraphQLTokenKind::Colon
290 | GraphQLTokenKind::CurlyBraceClose
291 | GraphQLTokenKind::CurlyBraceOpen
292 | GraphQLTokenKind::Dollar
293 | GraphQLTokenKind::Ellipsis
294 | GraphQLTokenKind::Equals
295 | GraphQLTokenKind::ParenClose
296 | GraphQLTokenKind::ParenOpen
297 | GraphQLTokenKind::Pipe
298 | GraphQLTokenKind::SquareBracketClose
299 | GraphQLTokenKind::SquareBracketOpen
300 | GraphQLTokenKind::Name(_)
301 | GraphQLTokenKind::Eof
302 | GraphQLTokenKind::Error(_) => false,
303 }
304 }
305
306 /// Returns `true` if this token represents a lexer error.
307 pub fn is_error(&self) -> bool {
308 matches!(self, GraphQLTokenKind::Error(_))
309 }
310
311 /// Parse an `IntValue`'s raw text to `i64`.
312 ///
313 /// Returns `None` if this is not an `IntValue`, or `Some(Err(...))` if
314 /// parsing fails.
315 pub fn parse_int_value(&self) -> Option<Result<i64, ParseIntError>> {
316 match self {
317 GraphQLTokenKind::IntValue(raw) => Some(raw.parse()),
318 _ => None,
319 }
320 }
321
322 /// Parse a `FloatValue`'s raw text to `f64`.
323 ///
324 /// Returns `None` if this is not a `FloatValue`, or `Some(Err(...))` if
325 /// parsing fails.
326 pub fn parse_float_value(&self) -> Option<Result<f64, ParseFloatError>> {
327 match self {
328 GraphQLTokenKind::FloatValue(raw) => Some(raw.parse()),
329 _ => None,
330 }
331 }
332
333 /// Parse a `StringValue`'s raw text to unescaped content.
334 ///
335 /// Handles escape sequences per the GraphQL spec:
336 /// - For single-line strings (`"..."`): processes `\n`, `\r`, `\t`, `\\`,
337 /// `\"`, `\/`, `\b`, `\f`, `\uXXXX` (fixed 4-digit), and `\u{X...}`
338 /// (variable length).
339 /// - For block strings (`"""..."""`): applies the indentation stripping
340 /// algorithm per spec, then processes `\"""` escape only.
341 ///
342 /// Returns `None` if this is not a `StringValue`, or `Some(Err(...))` if
343 /// parsing fails.
344 pub fn parse_string_value(&self) -> Option<Result<String, GraphQLStringParsingError>> {
345 match self {
346 GraphQLTokenKind::StringValue(raw) => Some(parse_graphql_string(raw)),
347 _ => None,
348 }
349 }
350}
351
352/// Parse a raw GraphQL string literal into its unescaped content.
353fn parse_graphql_string(raw: &str) -> Result<String, GraphQLStringParsingError> {
354 // Check if this is a block string
355 if raw.starts_with("\"\"\"") {
356 parse_block_string(raw)
357 } else {
358 parse_single_line_string(raw)
359 }
360}
361
362/// Parse a single-line string literal.
363fn parse_single_line_string(raw: &str) -> Result<String, GraphQLStringParsingError> {
364 // Strip surrounding quotes
365 if !raw.starts_with('"') || !raw.ends_with('"') || raw.len() < 2 {
366 return Err(GraphQLStringParsingError::UnterminatedString);
367 }
368 let content = &raw[1..raw.len() - 1];
369
370 let mut result = String::with_capacity(content.len());
371 let mut chars = content.chars().peekable();
372
373 while let Some(c) = chars.next() {
374 if c == '\\' {
375 match chars.next() {
376 Some('n') => result.push('\n'),
377 Some('r') => result.push('\r'),
378 Some('t') => result.push('\t'),
379 Some('\\') => result.push('\\'),
380 Some('"') => result.push('"'),
381 Some('/') => result.push('/'),
382 Some('b') => result.push('\u{0008}'),
383 Some('f') => result.push('\u{000C}'),
384 Some('u') => {
385 let unicode_char = parse_unicode_escape(&mut chars)?;
386 result.push(unicode_char);
387 },
388 Some(other) => {
389 return Err(GraphQLStringParsingError::InvalidEscapeSequence(
390 format!("\\{other}"),
391 ));
392 },
393 None => {
394 return Err(GraphQLStringParsingError::InvalidEscapeSequence(
395 "\\".to_string(),
396 ));
397 },
398 }
399 } else {
400 result.push(c);
401 }
402 }
403
404 Ok(result)
405}
406
407/// Parse a Unicode escape sequence after seeing `\u`.
408fn parse_unicode_escape(
409 chars: &mut std::iter::Peekable<std::str::Chars>,
410) -> Result<char, GraphQLStringParsingError> {
411 // Check for variable-length syntax: \u{...}
412 if chars.peek() == Some(&'{') {
413 chars.next(); // consume '{'
414 let mut hex = String::new();
415 loop {
416 match chars.next() {
417 Some('}') => break,
418 Some(c) if c.is_ascii_hexdigit() => hex.push(c),
419 Some(c) => {
420 return Err(GraphQLStringParsingError::InvalidUnicodeEscape(format!(
421 "\\u{{{hex}{c}"
422 )));
423 }
424 None => {
425 return Err(GraphQLStringParsingError::InvalidUnicodeEscape(format!(
426 "\\u{{{hex}"
427 )));
428 }
429 }
430 }
431 if hex.is_empty() {
432 return Err(GraphQLStringParsingError::InvalidUnicodeEscape(
433 "\\u{}".to_string(),
434 ));
435 }
436 let code_point = u32::from_str_radix(&hex, 16).map_err(|_| {
437 GraphQLStringParsingError::InvalidUnicodeEscape(format!("\\u{{{hex}}}"))
438 })?;
439 char::from_u32(code_point).ok_or_else(|| {
440 GraphQLStringParsingError::InvalidUnicodeEscape(format!("\\u{{{hex}}}"))
441 })
442 } else {
443 // Fixed 4-digit syntax: \uXXXX
444 let mut hex = String::with_capacity(4);
445 for _ in 0..4 {
446 match chars.next() {
447 Some(c) if c.is_ascii_hexdigit() => hex.push(c),
448 Some(c) => {
449 return Err(GraphQLStringParsingError::InvalidUnicodeEscape(format!(
450 "\\u{hex}{c}"
451 )));
452 }
453 None => {
454 return Err(GraphQLStringParsingError::InvalidUnicodeEscape(format!(
455 "\\u{hex}"
456 )));
457 }
458 }
459 }
460 let code_point = u32::from_str_radix(&hex, 16).map_err(|_| {
461 GraphQLStringParsingError::InvalidUnicodeEscape(format!("\\u{hex}"))
462 })?;
463 char::from_u32(code_point).ok_or_else(|| {
464 GraphQLStringParsingError::InvalidUnicodeEscape(format!("\\u{hex}"))
465 })
466 }
467}
468
469/// Splits a string into lines using GraphQL line terminators.
470///
471/// The GraphQL spec (Section 2.2 "Source Text") recognizes three line
472/// terminator sequences: `\n`, `\r\n`, and bare `\r`. Rust's
473/// [`str::lines()`] does NOT treat bare `\r` as a line terminator,
474/// so this function must be used instead when processing GraphQL
475/// source text.
476///
477/// Uses `memchr2` for SIMD-accelerated scanning of `\n` and `\r`,
478/// giving throughput comparable to `str::lines()`.
479///
480/// Returns an iterator of line slices without trailing terminators.
481fn graphql_lines(s: &str) -> impl Iterator<Item = &str> {
482 let mut rest = s;
483 std::iter::from_fn(move || {
484 if rest.is_empty() {
485 return None;
486 }
487 match memchr::memchr2(b'\n', b'\r', rest.as_bytes()) {
488 Some(i) => {
489 let line = &rest[..i];
490 // \r\n is a single terminator
491 if rest.as_bytes()[i] == b'\r'
492 && rest.as_bytes().get(i + 1) == Some(&b'\n')
493 {
494 rest = &rest[i + 2..];
495 } else {
496 rest = &rest[i + 1..];
497 }
498 Some(line)
499 },
500 None => {
501 // No terminator found — last line
502 let line = rest;
503 rest = "";
504 Some(line)
505 },
506 }
507 })
508}
509
510/// Returns true if a line consists entirely of GraphQL WhiteSpace
511/// (Tab U+0009 and Space U+0020).
512///
513/// Per the GraphQL spec, only these two characters are WhiteSpace:
514/// <https://spec.graphql.org/September2025/#WhiteSpace>
515///
516/// Rust's `str::trim()` strips all Unicode whitespace (30+ chars
517/// including NEL, EN QUAD, etc.), which would misclassify lines
518/// containing non-ASCII Unicode whitespace as "blank."
519fn is_graphql_blank(line: &str) -> bool {
520 line.bytes().all(|b| b == b' ' || b == b'\t')
521}
522
523/// Parse a block string literal per the GraphQL spec.
524///
525/// # Performance (B3 in benchmark-optimizations.md)
526///
527/// This uses a two-pass, low-allocation approach instead of the
528/// naive collect-into-Vec-of-Strings strategy. Key optimizations:
529///
530/// 1. **Skip `replace()` when no escaped triple quotes exist** —
531/// nearly all block strings have no `\"""`, so we avoid a heap
532/// allocation by using `Cow::Borrowed`. Only the rare case that
533/// contains `\"""` falls back to `Cow::Owned`.
534///
535/// 2. **Iterate lines without collecting into a `Vec`** — both the
536/// indent-computation pass and the output-building pass iterate
537/// `str::lines()` lazily.
538///
539/// 3. **Build result `String` directly** — instead of creating a
540/// `Vec<String>` (one heap alloc per line) and then `join()`ing,
541/// we write each stripped line directly into a single
542/// pre-allocated `String`.
543///
544/// 4. **Use index tracking instead of `remove(0)`** — the old code
545/// used `Vec::remove(0)` to strip leading blank lines, which is
546/// O(n) per removal. We instead find the first/last non-blank
547/// line indices in the first pass and skip blank lines during
548/// output.
549fn parse_block_string(
550 raw: &str,
551) -> Result<String, GraphQLStringParsingError> {
552 // Strip surrounding triple quotes
553 if !raw.starts_with("\"\"\"")
554 || !raw.ends_with("\"\"\"")
555 || raw.len() < 6
556 {
557 return Err(
558 GraphQLStringParsingError::UnterminatedString,
559 );
560 }
561 let content = &raw[3..raw.len() - 3];
562
563 // Handle escaped triple quotes. Nearly all block strings
564 // have none, so we avoid allocating in the common case by
565 // using Cow::Borrowed. Only if `\"""` is present do we
566 // fall back to an owned String via replace().
567 let content: Cow<str> =
568 if content.contains("\\\"\"\"") {
569 Cow::Owned(
570 content.replace("\\\"\"\"", "\"\"\""),
571 )
572 } else {
573 Cow::Borrowed(content)
574 };
575
576 // --- Pass 1: Compute common indent and first/last
577 // non-blank line indices ----------------------------
578 //
579 // Per the GraphQL spec, WhiteSpace is only Tab (U+0009)
580 // and Space (U+0020):
581 // <https://spec.graphql.org/September2025/#WhiteSpace>
582 //
583 // We must use this definition consistently for blank-line
584 // filtering, indent counting, and indent stripping. Using
585 // Rust's `trim()`/`trim_start()` (which strips all Unicode
586 // whitespace) would misclassify lines containing multi-byte
587 // Unicode whitespace characters and cause byte-index slicing
588 // panics.
589 let mut common_indent: Option<usize> = None;
590 let mut first_non_blank: Option<usize> = None;
591 let mut last_non_blank: Option<usize> = None;
592 for (i, line) in graphql_lines(&content).enumerate() {
593 let blank = is_graphql_blank(line);
594
595 if !blank {
596 if first_non_blank.is_none() {
597 first_non_blank = Some(i);
598 }
599 last_non_blank = Some(i);
600 }
601
602 // Common indent excludes the first line and blank
603 // lines (per spec).
604 if i > 0 && !blank {
605 let indent = line
606 .bytes()
607 .take_while(|&b| b == b' ' || b == b'\t')
608 .count();
609 common_indent = Some(match common_indent {
610 Some(cur) if cur <= indent => cur,
611 _ => indent,
612 });
613 }
614 }
615
616 let common_indent = common_indent.unwrap_or(0);
617 let first_non_blank = match first_non_blank {
618 Some(i) => i,
619 // All lines are blank — return empty string.
620 None => return Ok(String::new()),
621 };
622 let last_non_blank = last_non_blank.unwrap_or(0);
623
624 // --- Pass 2: Build result string directly ---------------
625 let mut result =
626 String::with_capacity(content.len());
627
628 // Track whether we need a newline separator before the
629 // next line we write.
630 let mut need_newline = false;
631
632 for (i, line) in graphql_lines(&content).enumerate() {
633 // Skip leading and trailing blank lines.
634 if i < first_non_blank || i > last_non_blank {
635 continue;
636 }
637
638 if need_newline {
639 result.push('\n');
640 }
641 need_newline = true;
642
643 if i == 0 {
644 result.push_str(line);
645 } else if line.len() >= common_indent {
646 // Safe: common_indent counts only single-byte
647 // ASCII whitespace, so this is always a valid
648 // char boundary.
649 result.push_str(&line[common_indent..]);
650 } else {
651 result.push_str(line);
652 }
653 }
654
655
656 Ok(result)
657}