libgraphql_parser/token/graphql_token_kind.rs
1use crate::GraphQLErrorNotes;
2use crate::GraphQLStringParsingError;
3use std::borrow::Cow;
4use std::num::ParseFloatError;
5use std::num::ParseIntError;
6
7/// The kind of a GraphQL token.
8///
9/// Literal values (`IntValue`, `FloatValue`, `StringValue`) store only the raw
10/// source text.
11///
12/// # Lifetime Parameter
13///
14/// The `'src` lifetime enables zero-copy lexing: `StrGraphQLTokenSource` can
15/// borrow string slices directly from the source text using `Cow::Borrowed`,
16/// while `RustMacroGraphQLTokenSource` uses `Cow::Owned` since `proc_macro2`
17/// doesn't expose contiguous source text.
18///
19/// # Negative Numeric Literals
20///
21/// Negative numbers like `-123` are lexed as single tokens (e.g.
22/// `IntValue("-123")`), not as separate minus and number tokens. This matches
23/// the GraphQL spec's grammar for `IntValue`/`FloatValue`.
24#[allow(clippy::large_enum_variant)]
25#[derive(Clone, Debug, PartialEq)]
26pub enum GraphQLTokenKind<'src> {
27 // =========================================================================
28 // Punctuators (no allocation needed)
29 // =========================================================================
30 /// `&`
31 Ampersand,
32 /// `@`
33 At,
34 /// `!`
35 Bang,
36 /// `:`
37 Colon,
38 /// `}`
39 CurlyBraceClose,
40 /// `{`
41 CurlyBraceOpen,
42 /// `$`
43 Dollar,
44 /// `...`
45 Ellipsis,
46 /// `=`
47 Equals,
48 /// `)`
49 ParenClose,
50 /// `(`
51 ParenOpen,
52 /// `|`
53 Pipe,
54 /// `]`
55 SquareBracketClose,
56 /// `[`
57 SquareBracketOpen,
58
59 // =========================================================================
60 // Literals (raw source text only)
61 // =========================================================================
62 /// A GraphQL name/identifier.
63 ///
64 /// Uses `Cow<'src, str>` to enable zero-copy lexing from string sources.
65 Name(Cow<'src, str>),
66
67 /// Raw source text of an integer literal, including optional negative sign
68 /// (e.g. `"-123"`, `"0"`).
69 ///
70 /// Use `parse_int_value()` to parse the raw text into an `i64`.
71 /// Uses `Cow<'src, str>` to enable zero-copy lexing from string sources.
72 IntValue(Cow<'src, str>),
73
74 /// Raw source text of a float literal, including optional negative sign
75 /// (e.g. `"-1.23e-4"`, `"0.5"`).
76 ///
77 /// Use `parse_float_value()` to parse the raw text into an `f64`.
78 /// Uses `Cow<'src, str>` to enable zero-copy lexing from string sources.
79 FloatValue(Cow<'src, str>),
80
81 /// Raw source text of a string literal, including quotes
82 /// (e.g. `"\"hello\\nworld\""`, `"\"\"\"block\"\"\""`)
83 ///
84 /// Use `parse_string_value()` to process escape sequences and get the
85 /// unescaped content.
86 /// Uses `Cow<'src, str>` to enable zero-copy lexing from string sources.
87 StringValue(Cow<'src, str>),
88
89 // =========================================================================
90 // Boolean and null (distinct from Name for type safety)
91 // =========================================================================
92 /// The `true` literal.
93 True,
94 /// The `false` literal.
95 False,
96 /// The `null` literal.
97 Null,
98
99 // =========================================================================
100 // End of input
101 // =========================================================================
102 /// End of input. The associated `GraphQLToken` may carry trailing trivia.
103 Eof,
104
105 // =========================================================================
106 // Lexer error (allows error recovery)
107 // =========================================================================
108 /// A lexer error. This allows the parser to continue and collect multiple
109 /// errors in a single pass.
110 ///
111 /// TODO: Explore replacing error_notes with a richer diagnostics structure
112 /// that includes things like severity level and "fix action" for IDE
113 /// integration.
114 Error {
115 /// A human-readable error message.
116 message: String,
117 /// Optional notes providing additional context or suggestions.
118 error_notes: GraphQLErrorNotes,
119 },
120}
121
122impl<'src> GraphQLTokenKind<'src> {
123 // =========================================================================
124 // Helper constructors for creating token kinds
125 // =========================================================================
126
127 /// Create a `Name` token from a borrowed string slice (zero-copy).
128 ///
129 /// Use this in `StrGraphQLTokenSource` where the source text can be
130 /// borrowed directly.
131 #[inline]
132 pub fn name_borrowed(s: &'src str) -> Self {
133 GraphQLTokenKind::Name(Cow::Borrowed(s))
134 }
135
136 /// Create a `Name` token from an owned `String`.
137 ///
138 /// Use this in `RustMacroGraphQLTokenSource` where the string must be
139 /// allocated (e.g., from `ident.to_string()`).
140 #[inline]
141 pub fn name_owned(s: String) -> Self {
142 GraphQLTokenKind::Name(Cow::Owned(s))
143 }
144
145 /// Create an `IntValue` token from a borrowed string slice (zero-copy).
146 #[inline]
147 pub fn int_value_borrowed(s: &'src str) -> Self {
148 GraphQLTokenKind::IntValue(Cow::Borrowed(s))
149 }
150
151 /// Create an `IntValue` token from an owned `String`.
152 #[inline]
153 pub fn int_value_owned(s: String) -> Self {
154 GraphQLTokenKind::IntValue(Cow::Owned(s))
155 }
156
157 /// Create a `FloatValue` token from a borrowed string slice (zero-copy).
158 #[inline]
159 pub fn float_value_borrowed(s: &'src str) -> Self {
160 GraphQLTokenKind::FloatValue(Cow::Borrowed(s))
161 }
162
163 /// Create a `FloatValue` token from an owned `String`.
164 #[inline]
165 pub fn float_value_owned(s: String) -> Self {
166 GraphQLTokenKind::FloatValue(Cow::Owned(s))
167 }
168
169 /// Create a `StringValue` token from a borrowed string slice (zero-copy).
170 #[inline]
171 pub fn string_value_borrowed(s: &'src str) -> Self {
172 GraphQLTokenKind::StringValue(Cow::Borrowed(s))
173 }
174
175 /// Create a `StringValue` token from an owned `String`.
176 #[inline]
177 pub fn string_value_owned(s: String) -> Self {
178 GraphQLTokenKind::StringValue(Cow::Owned(s))
179 }
180
181 /// Create an `Error` token.
182 ///
183 /// Error messages are always dynamically constructed, so they use plain
184 /// `String` rather than `Cow`.
185 #[inline]
186 pub fn error(message: impl Into<String>, error_notes: GraphQLErrorNotes) -> Self {
187 GraphQLTokenKind::Error {
188 message: message.into(),
189 error_notes,
190 }
191 }
192
193 // =========================================================================
194 // Query methods
195 // =========================================================================
196
197 /// Returns `true` if this token is a punctuator.
198 pub fn is_punctuator(&self) -> bool {
199 match self {
200 GraphQLTokenKind::Ampersand
201 | GraphQLTokenKind::At
202 | GraphQLTokenKind::Bang
203 | GraphQLTokenKind::Colon
204 | GraphQLTokenKind::CurlyBraceClose
205 | GraphQLTokenKind::CurlyBraceOpen
206 | GraphQLTokenKind::Dollar
207 | GraphQLTokenKind::Ellipsis
208 | GraphQLTokenKind::Equals
209 | GraphQLTokenKind::ParenClose
210 | GraphQLTokenKind::ParenOpen
211 | GraphQLTokenKind::Pipe
212 | GraphQLTokenKind::SquareBracketClose
213 | GraphQLTokenKind::SquareBracketOpen => true,
214
215 GraphQLTokenKind::Name(_)
216 | GraphQLTokenKind::IntValue(_)
217 | GraphQLTokenKind::FloatValue(_)
218 | GraphQLTokenKind::StringValue(_)
219 | GraphQLTokenKind::True
220 | GraphQLTokenKind::False
221 | GraphQLTokenKind::Null
222 | GraphQLTokenKind::Eof
223 | GraphQLTokenKind::Error { .. } => false,
224 }
225 }
226
227 /// Returns the string representation of this token if it is a punctuator.
228 pub fn as_punctuator_str(&self) -> Option<&'static str> {
229 match self {
230 GraphQLTokenKind::Ampersand => Some("&"),
231 GraphQLTokenKind::At => Some("@"),
232 GraphQLTokenKind::Bang => Some("!"),
233 GraphQLTokenKind::Colon => Some(":"),
234 GraphQLTokenKind::CurlyBraceClose => Some("}"),
235 GraphQLTokenKind::CurlyBraceOpen => Some("{"),
236 GraphQLTokenKind::Dollar => Some("$"),
237 GraphQLTokenKind::Ellipsis => Some("..."),
238 GraphQLTokenKind::Equals => Some("="),
239 GraphQLTokenKind::ParenClose => Some(")"),
240 GraphQLTokenKind::ParenOpen => Some("("),
241 GraphQLTokenKind::Pipe => Some("|"),
242 GraphQLTokenKind::SquareBracketClose => Some("]"),
243 GraphQLTokenKind::SquareBracketOpen => Some("["),
244
245 GraphQLTokenKind::Name(_)
246 | GraphQLTokenKind::IntValue(_)
247 | GraphQLTokenKind::FloatValue(_)
248 | GraphQLTokenKind::StringValue(_)
249 | GraphQLTokenKind::True
250 | GraphQLTokenKind::False
251 | GraphQLTokenKind::Null
252 | GraphQLTokenKind::Eof
253 | GraphQLTokenKind::Error { .. } => None,
254 }
255 }
256
257 /// Returns `true` if this token is a value literal (`IntValue`,
258 /// `FloatValue`, `StringValue`, `True`, `False`, or `Null`).
259 pub fn is_value(&self) -> bool {
260 match self {
261 GraphQLTokenKind::IntValue(_)
262 | GraphQLTokenKind::FloatValue(_)
263 | GraphQLTokenKind::StringValue(_)
264 | GraphQLTokenKind::True
265 | GraphQLTokenKind::False
266 | GraphQLTokenKind::Null => true,
267
268 GraphQLTokenKind::Ampersand
269 | GraphQLTokenKind::At
270 | GraphQLTokenKind::Bang
271 | GraphQLTokenKind::Colon
272 | GraphQLTokenKind::CurlyBraceClose
273 | GraphQLTokenKind::CurlyBraceOpen
274 | GraphQLTokenKind::Dollar
275 | GraphQLTokenKind::Ellipsis
276 | GraphQLTokenKind::Equals
277 | GraphQLTokenKind::ParenClose
278 | GraphQLTokenKind::ParenOpen
279 | GraphQLTokenKind::Pipe
280 | GraphQLTokenKind::SquareBracketClose
281 | GraphQLTokenKind::SquareBracketOpen
282 | GraphQLTokenKind::Name(_)
283 | GraphQLTokenKind::Eof
284 | GraphQLTokenKind::Error { .. } => false,
285 }
286 }
287
288 /// Returns `true` if this token represents a lexer error.
289 pub fn is_error(&self) -> bool {
290 matches!(self, GraphQLTokenKind::Error { .. })
291 }
292
293 /// Parse an `IntValue`'s raw text to `i64`.
294 ///
295 /// Returns `None` if this is not an `IntValue`, or `Some(Err(...))` if
296 /// parsing fails.
297 pub fn parse_int_value(&self) -> Option<Result<i64, ParseIntError>> {
298 match self {
299 GraphQLTokenKind::IntValue(raw) => Some(raw.parse()),
300 _ => None,
301 }
302 }
303
304 /// Parse a `FloatValue`'s raw text to `f64`.
305 ///
306 /// Returns `None` if this is not a `FloatValue`, or `Some(Err(...))` if
307 /// parsing fails.
308 pub fn parse_float_value(&self) -> Option<Result<f64, ParseFloatError>> {
309 match self {
310 GraphQLTokenKind::FloatValue(raw) => Some(raw.parse()),
311 _ => None,
312 }
313 }
314
315 /// Parse a `StringValue`'s raw text to unescaped content.
316 ///
317 /// Handles escape sequences per the GraphQL spec:
318 /// - For single-line strings (`"..."`): processes `\n`, `\r`, `\t`, `\\`,
319 /// `\"`, `\/`, `\b`, `\f`, `\uXXXX` (fixed 4-digit), and `\u{X...}`
320 /// (variable length).
321 /// - For block strings (`"""..."""`): applies the indentation stripping
322 /// algorithm per spec, then processes `\"""` escape only.
323 ///
324 /// Returns `None` if this is not a `StringValue`, or `Some(Err(...))` if
325 /// parsing fails.
326 pub fn parse_string_value(&self) -> Option<Result<String, GraphQLStringParsingError>> {
327 match self {
328 GraphQLTokenKind::StringValue(raw) => Some(parse_graphql_string(raw)),
329 _ => None,
330 }
331 }
332}
333
334/// Parse a raw GraphQL string literal into its unescaped content.
335fn parse_graphql_string(raw: &str) -> Result<String, GraphQLStringParsingError> {
336 // Check if this is a block string
337 if raw.starts_with("\"\"\"") {
338 parse_block_string(raw)
339 } else {
340 parse_single_line_string(raw)
341 }
342}
343
344/// Parse a single-line string literal.
345fn parse_single_line_string(raw: &str) -> Result<String, GraphQLStringParsingError> {
346 // Strip surrounding quotes
347 if !raw.starts_with('"') || !raw.ends_with('"') || raw.len() < 2 {
348 return Err(GraphQLStringParsingError::UnterminatedString);
349 }
350 let content = &raw[1..raw.len() - 1];
351
352 let mut result = String::with_capacity(content.len());
353 let mut chars = content.chars().peekable();
354
355 while let Some(c) = chars.next() {
356 if c == '\\' {
357 match chars.next() {
358 Some('n') => result.push('\n'),
359 Some('r') => result.push('\r'),
360 Some('t') => result.push('\t'),
361 Some('\\') => result.push('\\'),
362 Some('"') => result.push('"'),
363 Some('/') => result.push('/'),
364 Some('b') => result.push('\u{0008}'),
365 Some('f') => result.push('\u{000C}'),
366 Some('u') => {
367 let unicode_char = parse_unicode_escape(&mut chars)?;
368 result.push(unicode_char);
369 }
370 Some(other) => {
371 return Err(GraphQLStringParsingError::InvalidEscapeSequence(format!(
372 "\\{other}"
373 )));
374 }
375 None => {
376 return Err(GraphQLStringParsingError::InvalidEscapeSequence(
377 "\\".to_string(),
378 ));
379 }
380 }
381 } else {
382 result.push(c);
383 }
384 }
385
386 Ok(result)
387}
388
389/// Parse a Unicode escape sequence after seeing `\u`.
390fn parse_unicode_escape(
391 chars: &mut std::iter::Peekable<std::str::Chars>,
392) -> Result<char, GraphQLStringParsingError> {
393 // Check for variable-length syntax: \u{...}
394 if chars.peek() == Some(&'{') {
395 chars.next(); // consume '{'
396 let mut hex = String::new();
397 loop {
398 match chars.next() {
399 Some('}') => break,
400 Some(c) if c.is_ascii_hexdigit() => hex.push(c),
401 Some(c) => {
402 return Err(GraphQLStringParsingError::InvalidUnicodeEscape(format!(
403 "\\u{{{hex}{c}"
404 )));
405 }
406 None => {
407 return Err(GraphQLStringParsingError::InvalidUnicodeEscape(format!(
408 "\\u{{{hex}"
409 )));
410 }
411 }
412 }
413 if hex.is_empty() {
414 return Err(GraphQLStringParsingError::InvalidUnicodeEscape(
415 "\\u{}".to_string(),
416 ));
417 }
418 let code_point = u32::from_str_radix(&hex, 16).map_err(|_| {
419 GraphQLStringParsingError::InvalidUnicodeEscape(format!("\\u{{{hex}}}"))
420 })?;
421 char::from_u32(code_point).ok_or_else(|| {
422 GraphQLStringParsingError::InvalidUnicodeEscape(format!("\\u{{{hex}}}"))
423 })
424 } else {
425 // Fixed 4-digit syntax: \uXXXX
426 let mut hex = String::with_capacity(4);
427 for _ in 0..4 {
428 match chars.next() {
429 Some(c) if c.is_ascii_hexdigit() => hex.push(c),
430 Some(c) => {
431 return Err(GraphQLStringParsingError::InvalidUnicodeEscape(format!(
432 "\\u{hex}{c}"
433 )));
434 }
435 None => {
436 return Err(GraphQLStringParsingError::InvalidUnicodeEscape(format!(
437 "\\u{hex}"
438 )));
439 }
440 }
441 }
442 let code_point = u32::from_str_radix(&hex, 16).map_err(|_| {
443 GraphQLStringParsingError::InvalidUnicodeEscape(format!("\\u{hex}"))
444 })?;
445 char::from_u32(code_point).ok_or_else(|| {
446 GraphQLStringParsingError::InvalidUnicodeEscape(format!("\\u{hex}"))
447 })
448 }
449}
450
451/// Returns true if a line consists entirely of GraphQL WhiteSpace
452/// (Tab U+0009 and Space U+0020).
453///
454/// Per the GraphQL spec, only these two characters are WhiteSpace:
455/// <https://spec.graphql.org/September2025/#WhiteSpace>
456///
457/// Rust's `str::trim()` strips all Unicode whitespace (30+ chars
458/// including NEL, EN QUAD, etc.), which would misclassify lines
459/// containing non-ASCII Unicode whitespace as "blank."
460fn is_graphql_blank(line: &str) -> bool {
461 line.bytes().all(|b| b == b' ' || b == b'\t')
462}
463
464/// Parse a block string literal per the GraphQL spec.
465///
466/// # Performance (B3 in benchmark-optimizations.md)
467///
468/// This uses a two-pass, low-allocation approach instead of the
469/// naive collect-into-Vec-of-Strings strategy. Key optimizations:
470///
471/// 1. **Skip `replace()` when no escaped triple quotes exist** —
472/// nearly all block strings have no `\"""`, so we avoid a heap
473/// allocation by using `Cow::Borrowed`. Only the rare case that
474/// contains `\"""` falls back to `Cow::Owned`.
475///
476/// 2. **Iterate lines without collecting into a `Vec`** — both the
477/// indent-computation pass and the output-building pass iterate
478/// `str::lines()` lazily.
479///
480/// 3. **Build result `String` directly** — instead of creating a
481/// `Vec<String>` (one heap alloc per line) and then `join()`ing,
482/// we write each stripped line directly into a single
483/// pre-allocated `String`.
484///
485/// 4. **Use index tracking instead of `remove(0)`** — the old code
486/// used `Vec::remove(0)` to strip leading blank lines, which is
487/// O(n) per removal. We instead find the first/last non-blank
488/// line indices in the first pass and skip blank lines during
489/// output.
490fn parse_block_string(
491 raw: &str,
492) -> Result<String, GraphQLStringParsingError> {
493 // Strip surrounding triple quotes
494 if !raw.starts_with("\"\"\"")
495 || !raw.ends_with("\"\"\"")
496 || raw.len() < 6
497 {
498 return Err(
499 GraphQLStringParsingError::UnterminatedString,
500 );
501 }
502 let content = &raw[3..raw.len() - 3];
503
504 // Handle escaped triple quotes. Nearly all block strings
505 // have none, so we avoid allocating in the common case by
506 // using Cow::Borrowed. Only if `\"""` is present do we
507 // fall back to an owned String via replace().
508 let content: Cow<str> =
509 if content.contains("\\\"\"\"") {
510 Cow::Owned(
511 content.replace("\\\"\"\"", "\"\"\""),
512 )
513 } else {
514 Cow::Borrowed(content)
515 };
516
517 // --- Pass 1: Compute common indent and first/last
518 // non-blank line indices ----------------------------
519 //
520 // Per the GraphQL spec, WhiteSpace is only Tab (U+0009)
521 // and Space (U+0020):
522 // <https://spec.graphql.org/September2025/#WhiteSpace>
523 //
524 // We must use this definition consistently for blank-line
525 // filtering, indent counting, and indent stripping. Using
526 // Rust's `trim()`/`trim_start()` (which strips all Unicode
527 // whitespace) would misclassify lines containing multi-byte
528 // Unicode whitespace characters and cause byte-index slicing
529 // panics.
530 let mut common_indent: Option<usize> = None;
531 let mut first_non_blank: Option<usize> = None;
532 let mut last_non_blank: Option<usize> = None;
533 for (i, line) in content.lines().enumerate() {
534 let blank = is_graphql_blank(line);
535
536 if !blank {
537 if first_non_blank.is_none() {
538 first_non_blank = Some(i);
539 }
540 last_non_blank = Some(i);
541 }
542
543 // Common indent excludes the first line and blank
544 // lines (per spec).
545 if i > 0 && !blank {
546 let indent = line
547 .bytes()
548 .take_while(|&b| b == b' ' || b == b'\t')
549 .count();
550 common_indent = Some(match common_indent {
551 Some(cur) if cur <= indent => cur,
552 _ => indent,
553 });
554 }
555 }
556
557 let common_indent = common_indent.unwrap_or(0);
558 let first_non_blank = match first_non_blank {
559 Some(i) => i,
560 // All lines are blank — return empty string.
561 None => return Ok(String::new()),
562 };
563 let last_non_blank = last_non_blank.unwrap_or(0);
564
565 // --- Pass 2: Build result string directly ---------------
566 let mut result =
567 String::with_capacity(content.len());
568
569 // Track whether we need a newline separator before the
570 // next line we write.
571 let mut need_newline = false;
572
573 for (i, line) in content.lines().enumerate() {
574 // Skip leading and trailing blank lines.
575 if i < first_non_blank || i > last_non_blank {
576 continue;
577 }
578
579 if need_newline {
580 result.push('\n');
581 }
582 need_newline = true;
583
584 if i == 0 {
585 result.push_str(line);
586 } else if line.len() >= common_indent {
587 // Safe: common_indent counts only single-byte
588 // ASCII whitespace, so this is always a valid
589 // char boundary.
590 result.push_str(&line[common_indent..]);
591 } else {
592 result.push_str(line);
593 }
594 }
595
596
597 Ok(result)
598}