Skip to main content

libgraphql_parser/token/
str_to_graphql_token_source.rs

1//! A [`GraphQLTokenSource`] that lexes from a `&str` input.
2//!
3//! This lexer implements zero-copy lexing: token values borrow directly from
4//! the source string using `Cow::Borrowed`, avoiding allocations for names,
5//! numbers, and strings.
6//!
7//! # Features
8//!
9//! - **Zero-copy lexing**: Token values borrow from source text when possible
10//! - **Dual column tracking**: Reports both UTF-8 character positions (for
11//!   display) and UTF-16 code unit positions (for LSP compatibility)
12//! - **Comment preservation**: GraphQL `#` comments are captured as trivia
13//! - **Error recovery**: Invalid characters emit `Error` tokens, allowing the
14//!   lexer to continue and report multiple errors
15//!
16//! # Usage
17//!
18//! ```rust
19//! use libgraphql_parser::token::StrGraphQLTokenSource;
20//!
21//! let source = "{ name }";
22//! let lexer = StrGraphQLTokenSource::new(source);
23//! for token in lexer {
24//!     println!("{:?}", token.kind);
25//! }
26//! // Output:
27//! // CurlyBraceOpen
28//! // Name(Borrowed("name"))
29//! // CurlyBraceClose
30//! // Eof
31//! ```
32
33use crate::ByteSpan;
34use crate::GraphQLErrorNote;
35use crate::SourceMap;
36use crate::SourceSpan;
37use crate::smallvec::smallvec;
38use crate::token::GraphQLToken;
39use crate::token::GraphQLTokenKind;
40use crate::token::GraphQLTriviaToken;
41use crate::token::GraphQLTriviaTokenVec;
42use crate::token::GraphQLTokenSource;
43use crate::token::StrGraphQLTokenSourceConfig;
44use std::borrow::Cow;
45use std::path::Path;
46use std::path::PathBuf;
47
48/// A [`GraphQLTokenSource`](crate::token::GraphQLTokenSource) that lexes from
49/// a `&str` input.
50///
51/// This lexer produces [`GraphQLToken`]s with zero-copy string values where
52/// possible. The `'src` lifetime ties token values to the source string.
53///
54/// See module documentation for details.
55pub struct StrGraphQLTokenSource<'src> {
56    /// The full source text being lexed.
57    source: &'src str,
58
59    /// Current byte offset from the start of `source`.
60    ///
61    /// The remaining text to lex is `&source[curr_byte_offset..]`.
62    curr_byte_offset: usize,
63
64    /// Trivia (comments, commas) accumulated before the next token.
65    pending_trivia: GraphQLTriviaTokenVec<'src>,
66
67    /// Whether the EOF token has been emitted.
68    finished: bool,
69
70    /// Maps byte offsets to line/column positions. Built via an O(n) pre-pass
71    /// in the constructor.
72    source_map: SourceMap<'src>,
73
74    /// Controls which trivia types (comments, commas, whitespace) are
75    /// captured on emitted tokens.
76    config: StrGraphQLTokenSourceConfig,
77}
78
79impl<'src> StrGraphQLTokenSource<'src> {
80    /// Creates a new token source from a string slice.
81    ///
82    /// # Example
83    ///
84    /// ```rust
85    /// # use libgraphql_parser::token::StrGraphQLTokenSource;
86    /// let lexer = StrGraphQLTokenSource::new("{ name }");
87    /// ```
88    pub fn new(source: &'src str) -> Self {
89        Self {
90            source,
91            curr_byte_offset: 0,
92            pending_trivia: smallvec![],
93            finished: false,
94            source_map: SourceMap::new_with_source(source, None),
95            config: StrGraphQLTokenSourceConfig::default(),
96        }
97    }
98
99    /// Creates a new token source with a custom trivia configuration.
100    ///
101    /// See [`StrGraphQLTokenSourceConfig`] for available options.
102    pub fn with_config(
103        source: &'src str,
104        config: StrGraphQLTokenSourceConfig,
105    ) -> Self {
106        Self {
107            source,
108            curr_byte_offset: 0,
109            pending_trivia: smallvec![],
110            finished: false,
111            source_map: SourceMap::new_with_source(source, None),
112            config,
113        }
114    }
115
116    /// Creates a new token source with an associated file path.
117    ///
118    /// The file path is included in token spans for error reporting.
119    pub fn with_file_path(source: &'src str, path: &'src Path) -> Self {
120        Self {
121            source,
122            curr_byte_offset: 0,
123            pending_trivia: smallvec![],
124            finished: false,
125            source_map: SourceMap::new_with_source(
126                source,
127                Some(PathBuf::from(path)),
128            ),
129            config: StrGraphQLTokenSourceConfig::default(),
130        }
131    }
132
133    // =========================================================================
134    // Position and scanning helpers
135    // =========================================================================
136
137    /// Returns the remaining source text to be lexed.
138    fn remaining(&self) -> &'src str {
139        &self.source[self.curr_byte_offset..]
140    }
141
142    /// Returns the current byte offset as a `u32`.
143    fn curr_offset(&self) -> u32 {
144        self.curr_byte_offset as u32
145    }
146
147    /// Peeks at the next character without consuming it.
148    ///
149    /// Returns `None` if at end of input.
150    ///
151    /// # Performance (B1 in benchmark-optimizations.md)
152    ///
153    /// This uses direct byte access with an ASCII fast path instead
154    /// of the naive `remaining().chars().next()`. GraphQL source text
155    /// is overwhelmingly ASCII (names, keywords, punctuators,
156    /// whitespace), so the fast path covers >99% of calls. The
157    /// non-ASCII fallback (Unicode in string literals/comments) is
158    /// rare and can remain slow.
159    ///
160    /// Without this optimization, every peek would construct a
161    /// `Chars` iterator and decode the first UTF-8 sequence — a
162    /// measurable cost given that peek is called millions of times
163    /// for large inputs.
164    #[inline]
165    fn peek_char(&self) -> Option<char> {
166        let bytes = self.source.as_bytes();
167        if self.curr_byte_offset >= bytes.len() {
168            return None;
169        }
170        let b = bytes[self.curr_byte_offset];
171        if b.is_ascii() {
172            // Fast path: single-byte ASCII character (covers >99%
173            // of GraphQL source text).
174            Some(b as char)
175        } else {
176            // Slow path: multi-byte UTF-8 character. Fall back to
177            // full UTF-8 decoding. This only triggers inside
178            // string literals or comments containing non-ASCII
179            // characters.
180            self.source[self.curr_byte_offset..].chars().next()
181        }
182    }
183
184    /// Peeks at the nth character ahead without consuming.
185    ///
186    /// `peek_char_nth(0)` is equivalent to `peek_char()`.
187    /// Returns `None` if there aren't enough characters remaining.
188    ///
189    /// Note: Unlike `peek_char()`, this still uses the iterator
190    /// approach since it needs to skip over variable-width UTF-8
191    /// characters to reach position n. This method is only called
192    /// in a few places for multi-character lookahead (e.g., number
193    /// parsing to check digit after `.`), so it is not a hot path.
194    fn peek_char_nth(&self, n: usize) -> Option<char> {
195        self.remaining().chars().nth(n)
196    }
197
198    /// Consumes the next character and advances the byte offset.
199    ///
200    /// Returns `None` if at end of input.
201    ///
202    /// # Performance (B1 in benchmark-optimizations.md)
203    ///
204    /// Uses an ASCII fast path: if the current byte is <0x80, we
205    /// know it is exactly 1 byte, so we avoid calling `ch.len_utf8()`.
206    /// Line/column tracking is deferred to the [`SourceMap`] (resolved
207    /// on demand), so `consume()` only updates `curr_byte_offset`.
208    fn consume(&mut self) -> Option<char> {
209        let bytes = self.source.as_bytes();
210        if self.curr_byte_offset >= bytes.len() {
211            return None;
212        }
213
214        let b = bytes[self.curr_byte_offset];
215
216        if b.is_ascii() {
217            self.curr_byte_offset += 1;
218            Some(b as char)
219        } else {
220            let ch = self.source[self.curr_byte_offset..]
221                .chars()
222                .next()
223                .unwrap();
224            self.curr_byte_offset += ch.len_utf8();
225            Some(ch)
226        }
227    }
228
229    /// Creates a [`ByteSpan`] from a start byte offset to the current
230    /// byte offset.
231    #[inline]
232    fn make_span(&self, start: u32) -> ByteSpan {
233        ByteSpan::new(start, self.curr_byte_offset as u32)
234    }
235
236    /// Resolves a `ByteSpan` to a `SourceSpan` using this token
237    /// source's `SourceMap`. Falls back to `SourceSpan::zero()` if
238    /// resolution fails.
239    fn resolve_span(&self, span: ByteSpan) -> SourceSpan {
240        self.source_map.resolve_span(span)
241            .unwrap_or_else(SourceSpan::zero)
242    }
243
244    // =========================================================================
245    // Token creation helpers
246    // =========================================================================
247
248    /// Creates a token with the accumulated trivia.
249    fn make_token(
250        &mut self,
251        kind: GraphQLTokenKind<'src>,
252        span: ByteSpan,
253    ) -> GraphQLToken<'src> {
254        GraphQLToken {
255            kind,
256            preceding_trivia: std::mem::take(&mut self.pending_trivia),
257            span,
258        }
259    }
260
261    // =========================================================================
262    // Lexer main loop
263    // =========================================================================
264
265    /// Advances to the next token, skipping whitespace and collecting trivia.
266    fn next_token(&mut self) -> GraphQLToken<'src> {
267        loop {
268            // Skip whitespace
269            self.skip_whitespace();
270
271            let start = self.curr_offset();
272
273            match self.peek_char() {
274                None => {
275                    // End of input
276                    let span = self.make_span(start);
277                    return self.make_token(GraphQLTokenKind::Eof, span);
278                }
279
280                Some('#') => {
281                    // Comment - collect as trivia and continue
282                    self.lex_comment(start);
283                    continue;
284                }
285
286                Some(',') => {
287                    // Comma - collect as trivia and continue
288                    self.consume();
289                    if self.config.retain_commas {
290                        let span = self.make_span(start);
291                        self.pending_trivia
292                            .push(GraphQLTriviaToken::Comma { span });
293                    }
294                    continue;
295                }
296
297                // Single-character punctuators
298                Some('!') => {
299                    self.consume();
300                    let span = self.make_span(start);
301                    return self.make_token(GraphQLTokenKind::Bang, span);
302                }
303                Some('$') => {
304                    self.consume();
305                    let span = self.make_span(start);
306                    return self.make_token(GraphQLTokenKind::Dollar, span);
307                }
308                Some('&') => {
309                    self.consume();
310                    let span = self.make_span(start);
311                    return self.make_token(GraphQLTokenKind::Ampersand, span);
312                }
313                Some('(') => {
314                    self.consume();
315                    let span = self.make_span(start);
316                    return self.make_token(GraphQLTokenKind::ParenOpen, span);
317                }
318                Some(')') => {
319                    self.consume();
320                    let span = self.make_span(start);
321                    return self.make_token(GraphQLTokenKind::ParenClose, span);
322                }
323                Some(':') => {
324                    self.consume();
325                    let span = self.make_span(start);
326                    return self.make_token(GraphQLTokenKind::Colon, span);
327                }
328                Some('=') => {
329                    self.consume();
330                    let span = self.make_span(start);
331                    return self.make_token(GraphQLTokenKind::Equals, span);
332                }
333                Some('@') => {
334                    self.consume();
335                    let span = self.make_span(start);
336                    return self.make_token(GraphQLTokenKind::At, span);
337                }
338                Some('[') => {
339                    self.consume();
340                    let span = self.make_span(start);
341                    return self.make_token(GraphQLTokenKind::SquareBracketOpen, span);
342                }
343                Some(']') => {
344                    self.consume();
345                    let span = self.make_span(start);
346                    return self.make_token(GraphQLTokenKind::SquareBracketClose, span);
347                }
348                Some('{') => {
349                    self.consume();
350                    let span = self.make_span(start);
351                    return self.make_token(GraphQLTokenKind::CurlyBraceOpen, span);
352                }
353                Some('}') => {
354                    self.consume();
355                    let span = self.make_span(start);
356                    return self.make_token(GraphQLTokenKind::CurlyBraceClose, span);
357                }
358                Some('|') => {
359                    self.consume();
360                    let span = self.make_span(start);
361                    return self.make_token(GraphQLTokenKind::Pipe, span);
362                }
363
364                // Ellipsis or dot error
365                Some('.') => {
366                    return self.lex_dot_or_ellipsis(start);
367                }
368
369                // String literals
370                Some('"') => {
371                    return self.lex_string(start);
372                }
373
374                // Names and keywords
375                Some(c) if is_name_start(c) => {
376                    return self.lex_name(start);
377                }
378
379                // Numbers (including negative)
380                Some(c) if c == '-' || c.is_ascii_digit() => {
381                    return self.lex_number(start);
382                }
383
384                // Invalid character
385                Some(_) => {
386                    return self.lex_invalid_character(start);
387                }
388            }
389        }
390    }
391
392    // =========================================================================
393    // Whitespace handling
394    // =========================================================================
395
396    /// Skips whitespace characters.
397    ///
398    /// Per the GraphQL spec, these are "ignored tokens":
399    /// - Space (U+0020)
400    /// - Tab (U+0009)
401    /// - Line terminators: LF (U+000A), CR (U+000D), CRLF
402    /// - BOM (U+FEFF) - Unicode BOM is ignored anywhere in the document
403    ///
404    /// See: <https://spec.graphql.org/September2025/#sec-Language.Source-Text.Unicode>
405    ///
406    /// Note: Comma is also whitespace in GraphQL but we handle it separately
407    /// to preserve it as trivia.
408    ///
409    /// # Performance (B2 in benchmark-optimizations.md)
410    ///
411    /// Uses byte-scanning instead of per-character `consume()`
412    /// calls, doing one branch per byte and a single
413    /// `curr_byte_offset` update at the end.
414    fn skip_whitespace(&mut self) {
415        let bytes = self.source.as_bytes();
416        let start_byte_offset = self.curr_byte_offset;
417        let retain = self.config.retain_whitespace;
418        let start = if retain {
419            Some(self.curr_offset())
420        } else {
421            None
422        };
423
424        let mut i = self.curr_byte_offset;
425
426        loop {
427            if i >= bytes.len() {
428                break;
429            }
430            match bytes[i] {
431                b' ' | b'\t' | b'\n' | b'\r' => {
432                    i += 1;
433                },
434                // BOM: U+FEFF = 0xEF 0xBB 0xBF in UTF-8.
435                0xEF if i + 2 < bytes.len()
436                    && bytes[i + 1] == 0xBB
437                    && bytes[i + 2] == 0xBF => {
438                    i += 3;
439                },
440                _ => break,
441            }
442        }
443
444        if i == self.curr_byte_offset {
445            return;
446        }
447
448        self.curr_byte_offset = i;
449
450        // Capture the whitespace run as trivia if configured.
451        if let Some(ws_start) = start {
452            let value = &self.source[start_byte_offset..i];
453            let span = self.make_span(ws_start);
454            self.pending_trivia.push(
455                GraphQLTriviaToken::Whitespace {
456                    value: Cow::Borrowed(value),
457                    span,
458                },
459            );
460        }
461    }
462
463    // =========================================================================
464    // Comment lexing
465    // =========================================================================
466
467    /// Lexes a comment and adds it to pending trivia.
468    ///
469    /// A comment starts with `#` and extends to the end of the line.
470    ///
471    /// # Performance (B2 in benchmark-optimizations.md)
472    ///
473    /// Uses byte-scanning to find end-of-line instead of
474    /// per-character `peek_char()` + `consume()`. Comments never
475    /// span multiple lines, so line number doesn't change — only
476    /// the column advances. Column is computed once at the end
477    /// via `compute_columns_for_span()` (with an ASCII fast path
478    /// for the common case).
479    fn lex_comment(&mut self, start: u32) {
480        // Consume the '#' (single ASCII byte).
481        self.curr_byte_offset += 1;
482
483        let content_start = self.curr_byte_offset;
484        let bytes = self.source.as_bytes();
485
486        // SIMD-accelerated scan to end of line or EOF.
487        let i = memchr::memchr2(b'\n', b'\r', &bytes[content_start..])
488            .map_or(bytes.len(), |offset| content_start + offset);
489
490        self.curr_byte_offset = i;
491
492        if self.config.retain_comments {
493            let content = &self.source[content_start..i];
494            let span = self.make_span(start);
495            self.pending_trivia.push(
496                GraphQLTriviaToken::Comment {
497                    value: Cow::Borrowed(content),
498                    span,
499                },
500            );
501        }
502    }
503
504    // =========================================================================
505    // Dot / Ellipsis lexing
506    // =========================================================================
507
508    /// Lexes dots, producing either an Ellipsis token or an error.
509    ///
510    /// This implements a state machine for dot handling similar to
511    /// `RustMacroGraphQLTokenSource`:
512    /// - `...` (adjacent) → `Ellipsis`
513    /// - `.` alone → Error (no hint - could be many things like `Foo.Bar`)
514    /// - `..` (adjacent) → Error with help to add third dot
515    /// - `. .` (spaced, same line) → Error with help about spacing
516    /// - `.. .` (first two adjacent, third spaced) → Error with help about
517    ///   spacing
518    /// - `. ..` (first spaced, last two adjacent) → Error with help about
519    ///   spacing
520    /// - `. . .` (all spaced, same line) → Error with help about spacing
521    /// - Dots on different lines → Separate errors
522    ///
523    /// TODO: Look for patterns like `{Name}.{Name}` and give a useful error
524    /// hint (e.g., user may have been trying to use enum syntax incorrectly).
525    fn lex_dot_or_ellipsis(&mut self, start: u32) -> GraphQLToken<'src> {
526        // Consume first dot
527        self.consume();
528
529        // Check for second dot (may be adjacent or spaced on the same line).
530        // `skip_whitespace_same_line()` never crosses newlines, so if the
531        // next char after skipping is not a dot, we fall through to the
532        // single-dot error case.
533        self.skip_whitespace_same_line();
534
535        match self.peek_char() {
536            Some('.') => {
537                let second_dot_start = self.curr_offset();
538                let first_two_adjacent = second_dot_start == start + 1;
539                self.consume();
540
541                // Check for third dot
542                self.skip_whitespace_same_line();
543
544                match self.peek_char() {
545                    Some('.') => {
546                        let third_dot_start = self.curr_offset();
547                        self.consume();
548                        let span = self.make_span(start);
549
550                        // Check if all three dots were adjacent (no whitespace)
551                        let second_third_adjacent =
552                            third_dot_start == second_dot_start + 1;
553
554                        if first_two_adjacent && second_third_adjacent {
555                            // All adjacent - valid ellipsis
556                            self.make_token(GraphQLTokenKind::Ellipsis, span)
557                        } else if first_two_adjacent {
558                            // `.. .` - first two adjacent, third spaced
559                            let kind = GraphQLTokenKind::error(
560                                "Unexpected `.. .`",
561                                smallvec![GraphQLErrorNote::help(
562                                    "This `.` may have been intended to complete a `...` \
563                                     spread operator. Try removing the extra spacing \
564                                     between the dots."
565                                )],
566                            );
567                            self.make_token(kind, span)
568                        } else if second_third_adjacent {
569                            // `. ..` - first spaced, last two adjacent
570                            let kind = GraphQLTokenKind::error(
571                                "Unexpected `. ..`",
572                                smallvec![GraphQLErrorNote::help(
573                                    "These dots may have been intended to form a `...` \
574                                     spread operator. Try removing the extra spacing \
575                                     between the dots."
576                                )],
577                            );
578                            self.make_token(kind, span)
579                        } else {
580                            // `. . .` - all spaced
581                            let kind = GraphQLTokenKind::error(
582                                "Unexpected `. . .`",
583                                smallvec![GraphQLErrorNote::help(
584                                    "These dots may have been intended to form a `...` \
585                                     spread operator. Try removing the extra spacing \
586                                     between the dots."
587                                )],
588                            );
589                            self.make_token(kind, span)
590                        }
591                    }
592                    _ => {
593                        // Only two dots found on this line
594                        let span = self.make_span(start);
595
596                        if first_two_adjacent {
597                            // Adjacent `..` - suggest adding third dot
598                            let kind = GraphQLTokenKind::error(
599                                "Unexpected `..` (use `...` for spread operator)",
600                                smallvec![GraphQLErrorNote::help(
601                                    "Add one more `.` to form the spread operator `...`"
602                                )],
603                            );
604                            self.make_token(kind, span)
605                        } else {
606                            // Spaced `. .` - suggest removing spacing
607                            let kind = GraphQLTokenKind::error(
608                                "Unexpected `. .` (use `...` for spread operator)",
609                                smallvec![GraphQLErrorNote::help(
610                                    "These dots may have been intended to form a `...` \
611                                     spread operator. Try removing the extra spacing \
612                                     between the dots."
613                                )],
614                            );
615                            self.make_token(kind, span)
616                        }
617                    }
618                }
619            }
620            _ => {
621                // Single dot (or dots on different lines)
622                // Don't assume it was meant to be ellipsis - could be `Foo.Bar` style
623                let span = self.make_span(start);
624                let kind = GraphQLTokenKind::error("Unexpected `.`", smallvec![]);
625                self.make_token(kind, span)
626            }
627        }
628    }
629
630    /// Skips whitespace but only on the same line.
631    ///
632    /// Used for dot consolidation - we only merge dots that are on the same
633    /// line.
634    fn skip_whitespace_same_line(&mut self) {
635        while let Some(ch) = self.peek_char() {
636            match ch {
637                ' ' | '\t' | '\u{FEFF}' => {
638                    self.consume();
639                }
640                _ => break,
641            }
642        }
643    }
644
645    // =========================================================================
646    // Name lexing
647    // =========================================================================
648
649    /// Lexes a name or keyword.
650    ///
651    /// Names match the pattern: `/[_A-Za-z][_0-9A-Za-z]*/`
652    ///
653    /// Keywords `true`, `false`, and `null` are emitted as distinct token
654    /// kinds.
655    ///
656    /// # Performance (B2 in benchmark-optimizations.md)
657    ///
658    /// Uses byte-scanning to find the end of the name in a tight
659    /// loop (one byte comparison per iteration), then updates
660    /// `curr_byte_offset` once for the entire name.
661    fn lex_name(&mut self, start: u32) -> GraphQLToken<'src> {
662        let name_start = self.curr_byte_offset;
663        let bytes = self.source.as_bytes();
664
665        // Byte-scan: skip first char (already validated as name
666        // start) and continue while bytes match [_0-9A-Za-z].
667        let mut i = name_start + 1;
668        while i < bytes.len() && is_name_continue_byte(bytes[i]) {
669            i += 1;
670        }
671
672        self.curr_byte_offset = i;
673
674        let name = &self.source[name_start..i];
675        let span = self.make_span(start);
676
677        // Check for keywords
678        let kind = match name {
679            "true" => GraphQLTokenKind::True,
680            "false" => GraphQLTokenKind::False,
681            "null" => GraphQLTokenKind::Null,
682            _ => GraphQLTokenKind::name_borrowed(name),
683        };
684
685        self.make_token(kind, span)
686    }
687
688    // =========================================================================
689    // Number lexing
690    // =========================================================================
691
692    /// Lexes an integer or float literal.
693    ///
694    /// Handles:
695    /// - Optional negative sign: `-`
696    /// - Integer part: `0` or `[1-9][0-9]*`
697    /// - Optional decimal part: `.[0-9]+`
698    /// - Optional exponent: `[eE][+-]?[0-9]+`
699    fn lex_number(&mut self, start: u32) -> GraphQLToken<'src> {
700        let num_start = self.curr_byte_offset;
701        let mut is_float = false;
702
703        // Optional negative sign
704        if self.peek_char() == Some('-') {
705            self.consume();
706        }
707
708        // Integer part
709        match self.peek_char() {
710            Some('0') => {
711                self.consume();
712                // Check for invalid leading zeros (e.g., 00, 01)
713                if let Some(ch) = self.peek_char()
714                    && ch.is_ascii_digit() {
715                    // Invalid: leading zeros
716                    return self.lex_number_error(
717                        start,
718                        num_start,
719                        "Invalid number: leading zeros are not allowed",
720                        Some("https://spec.graphql.org/September2025/#sec-Int-Value"),
721                    );
722                }
723            }
724            Some(ch) if ch.is_ascii_digit() => {
725                // Non-zero start
726                self.consume();
727                while let Some(ch) = self.peek_char() {
728                    if ch.is_ascii_digit() {
729                        self.consume();
730                    } else {
731                        break;
732                    }
733                }
734            }
735            Some(_) | None => {
736                // Just a `-` with no digits
737                let span = self.make_span(start);
738                let kind = GraphQLTokenKind::error("Unexpected `-`", smallvec![]);
739                return self.make_token(kind, span);
740            }
741        }
742
743        // Optional decimal part
744        if self.peek_char() == Some('.') {
745            // Check that the next character is a digit (not another dot for `...`)
746            if let Some(ch) = self.peek_char_nth(1)
747                && ch.is_ascii_digit() {
748                is_float = true;
749                self.consume(); // consume the '.'
750
751                // Consume decimal digits
752                while let Some(ch) = self.peek_char() {
753                    if ch.is_ascii_digit() {
754                        self.consume();
755                    } else {
756                        break;
757                    }
758                }
759            }
760        }
761
762        // Optional exponent part
763        if let Some(ch) = self.peek_char()
764            && (ch == 'e' || ch == 'E') {
765            is_float = true;
766            self.consume();
767
768            // Optional sign
769            if let Some(ch) = self.peek_char()
770                && (ch == '+' || ch == '-') {
771                self.consume();
772            }
773
774            // Exponent digits (required)
775            let has_exponent_digits = matches!(self.peek_char(), Some(ch) if ch.is_ascii_digit());
776            if !has_exponent_digits {
777                return self.lex_number_error(
778                    start,
779                    num_start,
780                    "Invalid number: exponent must have at least one digit",
781                    Some("https://spec.graphql.org/September2025/#sec-Float-Value"),
782                );
783            }
784
785            while let Some(ch) = self.peek_char() {
786                if ch.is_ascii_digit() {
787                    self.consume();
788                } else {
789                    break;
790                }
791            }
792        }
793
794        let num_end = self.curr_byte_offset;
795        let num_text = &self.source[num_start..num_end];
796        let span = self.make_span(start);
797
798        let kind = if is_float {
799            GraphQLTokenKind::float_value_borrowed(num_text)
800        } else {
801            GraphQLTokenKind::int_value_borrowed(num_text)
802        };
803
804        self.make_token(kind, span)
805    }
806
807    /// Creates an error token for an invalid number.
808    fn lex_number_error(
809        &mut self,
810        start: u32,
811        num_start: usize,
812        message: &str,
813        spec_url: Option<&str>,
814    ) -> GraphQLToken<'src> {
815        // Consume remaining number-like characters to provide better error recovery
816        while let Some(ch) = self.peek_char() {
817            if ch.is_ascii_digit() || ch == '.' || ch == 'e' || ch == 'E' || ch == '+' || ch == '-' {
818                self.consume();
819            } else {
820                break;
821            }
822        }
823
824        let num_end = self.curr_byte_offset;
825        let invalid_text = &self.source[num_start..num_end];
826        let span = self.make_span(start);
827
828        let mut error_notes = smallvec![];
829        if let Some(url) = spec_url {
830            error_notes.push(GraphQLErrorNote::spec(url));
831        }
832
833        let kind = GraphQLTokenKind::error(
834            format!("{message}: `{invalid_text}`"),
835            error_notes,
836        );
837
838        self.make_token(kind, span)
839    }
840
841    // =========================================================================
842    // String lexing
843    // =========================================================================
844
845    /// Creates an error token for an unescaped newline in a single-line
846    /// string. Shared by the \n and \r error paths in `lex_string()`.
847    fn lex_string_newline_error(&mut self, start: u32) -> GraphQLToken<'src> {
848        let span = self.make_span(start);
849        let kind = GraphQLTokenKind::error(
850            "Unterminated string literal",
851            smallvec![
852                GraphQLErrorNote::general(
853                    "Single-line strings cannot contain unescaped newlines"
854                ),
855                GraphQLErrorNote::help(
856                    "Use a block string (triple quotes) for multi-line \
857                     strings, or escape the newline with `\\n`"
858                ),
859            ],
860        );
861        self.make_token(kind, span)
862    }
863
864    /// Lexes a string literal (single-line or block string).
865    fn lex_string(&mut self, start: u32) -> GraphQLToken<'src> {
866        let str_start = self.curr_byte_offset;
867
868        // Check for block string (""")
869        if self.remaining().starts_with("\"\"\"") {
870            return self.lex_block_string(start, str_start);
871        }
872
873        // Single-line string — byte-scan with SIMD-accelerated
874        // sentinel search via memchr3. The three sentinel bytes:
875        //   b'"'  — end of string
876        //   b'\\' — escape sequence
877        //   b'\n' — error (unescaped newline)
878        //
879        // For \r we check the byte immediately before each \n
880        // match (to handle \r\n), and we also check the gap
881        // between the current position and the match for any
882        // bare \r. Bare \r is extremely rare in practice so
883        // the memchr call in the gap almost never fires.
884        //
885        // This is safe for multi-byte UTF-8 because all
886        // sentinels are ASCII (<0x80) and can never appear as
887        // continuation bytes in multi-byte sequences (>=0x80).
888        let bytes = self.source.as_bytes();
889        let mut i = self.curr_byte_offset + 1; // skip opening "
890
891        loop {
892            match memchr::memchr3(b'"', b'\\', b'\n', &bytes[i..]) {
893                None => {
894                    // Before declaring EOF, check if there's a
895                    // \r in the remaining bytes.
896                    if let Some(cr_off) =
897                        memchr::memchr(b'\r', &bytes[i..])
898                    {
899                        i += cr_off + 1;
900                        if i < bytes.len() && bytes[i] == b'\n' {
901                            i += 1;
902                        }
903                        self.curr_byte_offset = i;
904                        return self.lex_string_newline_error(start);
905                    }
906                    // Hit EOF without closing quote
907                    self.curr_byte_offset = bytes.len();
908                    let span = self.make_span(start);
909                    let kind = GraphQLTokenKind::error(
910                        "Unterminated string literal",
911                        smallvec![
912                            GraphQLErrorNote::general_with_span(
913                                "String started here",
914                                self.resolve_span(span),
915                            ),
916                            GraphQLErrorNote::help("Add closing `\"`"),
917                        ],
918                    );
919                    return self.make_token(kind, span);
920                },
921                Some(offset) => {
922                    // Check for bare \r in the gap [i..i+offset)
923                    if let Some(cr_off) =
924                        memchr::memchr(b'\r', &bytes[i..i + offset])
925                    {
926                        i += cr_off + 1;
927                        if i < bytes.len() && bytes[i] == b'\n' {
928                            i += 1;
929                        }
930                        self.curr_byte_offset = i;
931                        return self.lex_string_newline_error(start);
932                    }
933
934                    i += offset;
935                    match bytes[i] {
936                        b'"' => {
937                            // End of string
938                            i += 1;
939                            break;
940                        },
941                        b'\\' => {
942                            // Escape sequence — skip backslash +
943                            // next byte (which could be `"` or `\`)
944                            i += 1;
945                            if i < bytes.len() {
946                                i += 1;
947                            }
948                        },
949                        b'\n' => {
950                            // Bare \n — any preceding \r would have
951                            // been caught by the gap-check above
952                            i += 1;
953                            self.curr_byte_offset = i;
954                            return self.lex_string_newline_error(
955                                start,
956                            );
957                        },
958                        _ => unreachable!(),
959                    }
960                },
961            }
962        }
963
964        self.curr_byte_offset = i;
965        let str_end = self.curr_byte_offset;
966        let string_text = &self.source[str_start..str_end];
967        let span = self.make_span(start);
968
969        self.make_token(GraphQLTokenKind::string_value_borrowed(string_text), span)
970    }
971
972    /// Lexes a block string literal.
973    ///
974    /// # Performance (B2 in benchmark-optimizations.md)
975    ///
976    /// Uses byte-scanning instead of per-character
977    /// `peek_char()`/`consume()` calls. The scan loop checks
978    /// each byte against the special characters (`"`, `\`, `\n`,
979    /// `\r`) and skips everything else with a single `i += 1`.
980    ///
981    /// This is safe for multi-byte UTF-8 content because the
982    /// sentinel bytes (`"` = 0x22, `\` = 0x5C, `\n` = 0x0A,
983    /// `\r` = 0x0D) are all ASCII (<0x80) and can never appear
984    /// as continuation bytes in multi-byte UTF-8 sequences
985    /// (which are always >=0x80).
986    fn lex_block_string(
987        &mut self,
988        start: u32,
989        str_start: usize,
990    ) -> GraphQLToken<'src> {
991        let bytes = self.source.as_bytes();
992
993        // Skip opening """ (3 ASCII bytes, caller verified).
994        let mut i = self.curr_byte_offset + 3;
995
996        // SIMD-accelerated scan: jump to the next `"` or `\`
997        // instead of advancing byte-by-byte through
998        // documentation text. Block string bodies are typically
999        // long runs of text where neither sentinel appears.
1000        let found_close = loop {
1001            match memchr::memchr2(b'"', b'\\', &bytes[i..]) {
1002                None => {
1003                    i = bytes.len();
1004                    break false;
1005                },
1006                Some(offset) => {
1007                    i += offset;
1008                    match bytes[i] {
1009                        b'"' if i + 2 < bytes.len()
1010                            && bytes[i + 1] == b'"'
1011                            && bytes[i + 2] == b'"' =>
1012                        {
1013                            // Closing """.
1014                            i += 3;
1015                            break true;
1016                        },
1017                        b'\\' if i + 3 < bytes.len()
1018                            && bytes[i + 1] == b'"'
1019                            && bytes[i + 2] == b'"'
1020                            && bytes[i + 3] == b'"' =>
1021                        {
1022                            // Escaped triple quote \""".
1023                            i += 4;
1024                        },
1025                        _ => {
1026                            // Lone `"` or `\` — not a
1027                            // terminator, skip past it.
1028                            i += 1;
1029                        },
1030                    }
1031                },
1032            }
1033        };
1034
1035        self.curr_byte_offset = i;
1036
1037        if !found_close {
1038            // Unterminated block string.
1039            let span = self.make_span(start);
1040            let kind = GraphQLTokenKind::error(
1041                "Unterminated block string",
1042                smallvec![
1043                    GraphQLErrorNote::general_with_span(
1044                        "Block string started here",
1045                        self.resolve_span(span),
1046                    ),
1047                    GraphQLErrorNote::help("Add closing `\"\"\"`"),
1048                ],
1049            );
1050            return self.make_token(kind, span);
1051        }
1052
1053        let str_end = self.curr_byte_offset;
1054        let string_text = &self.source[str_start..str_end];
1055        let span = self.make_span(start);
1056
1057        self.make_token(
1058            GraphQLTokenKind::string_value_borrowed(string_text),
1059            span,
1060        )
1061    }
1062
1063    // =========================================================================
1064    // Invalid character handling
1065    // =========================================================================
1066
1067    /// Lexes an invalid character, producing an error token.
1068    fn lex_invalid_character(&mut self, start: u32) -> GraphQLToken<'src> {
1069        let ch = self.consume().unwrap();
1070        let span = self.make_span(start);
1071
1072        let kind = GraphQLTokenKind::error(
1073            format!("Unexpected character {}", describe_char(ch)),
1074            smallvec![],
1075        );
1076
1077        self.make_token(kind, span)
1078    }
1079}
1080
1081// =============================================================================
1082// Iterator implementation
1083// =============================================================================
1084
1085impl<'src> Iterator for StrGraphQLTokenSource<'src> {
1086    type Item = GraphQLToken<'src>;
1087
1088    fn next(&mut self) -> Option<Self::Item> {
1089        if self.finished {
1090            return None;
1091        }
1092
1093        let token = self.next_token();
1094
1095        if matches!(token.kind, GraphQLTokenKind::Eof) {
1096            self.finished = true;
1097        }
1098
1099        Some(token)
1100    }
1101}
1102
1103impl<'src> GraphQLTokenSource<'src> for StrGraphQLTokenSource<'src> {
1104    fn source_map(&self) -> &SourceMap<'src> {
1105        &self.source_map
1106    }
1107
1108    fn into_source_map(self) -> SourceMap<'src> {
1109        self.source_map
1110    }
1111}
1112
1113// =============================================================================
1114// Helper functions
1115// =============================================================================
1116
1117/// Returns `true` if `ch` can start a GraphQL name.
1118///
1119/// Per the GraphQL spec, names start with `NameStart`:
1120/// <https://spec.graphql.org/September2025/#NameStart>
1121fn is_name_start(ch: char) -> bool {
1122    ch == '_' || ch.is_ascii_alphabetic()
1123}
1124
1125/// 256-byte lookup table for GraphQL NameContinue classification.
1126///
1127/// Indexed by byte value. `true` for `_` (0x5F), `0`–`9` (0x30–0x39),
1128/// `A`–`Z` (0x41–0x5A), `a`–`z` (0x61–0x7A). All other bytes are
1129/// `false`, including non-ASCII (>=0x80) which is correct since
1130/// GraphQL names are ASCII-only per spec.
1131const NAME_CONTINUE_TABLE: [bool; 256] = {
1132    let mut table = [false; 256];
1133    let mut i = 0u16;
1134    while i < 256 {
1135        let b = i as u8;
1136        table[i as usize] = matches!(
1137            b, b'_' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z'
1138        );
1139        i += 1;
1140    }
1141    table
1142};
1143
1144/// Returns `true` if `b` can continue a GraphQL name.
1145///
1146/// Per the GraphQL spec, names continue with `NameContinue`:
1147/// <https://spec.graphql.org/September2025/#NameContinue>
1148///
1149/// Uses a lookup table for branchless O(1) classification in the
1150/// tight `lex_name()` scanning loop (see B21 in
1151/// benchmark-optimizations.md). Non-ASCII bytes (>=0x80) always
1152/// return false, which is correct since GraphQL names are
1153/// ASCII-only by spec.
1154#[inline]
1155fn is_name_continue_byte(b: u8) -> bool {
1156    NAME_CONTINUE_TABLE[b as usize]
1157}
1158
1159/// Returns a human-readable description of a character for error messages.
1160///
1161/// For printable characters, returns the character in backticks.
1162/// For invisible/control characters, includes Unicode code point description.
1163fn describe_char(ch: char) -> String {
1164    if ch.is_control() || (ch.is_whitespace() && ch != ' ') {
1165        // Invisible characters get detailed description
1166        let name = unicode_char_name(ch);
1167        if let Some(name) = name {
1168            format!("`{}` (U+{:04X}: {})", ch, ch as u32, name)
1169        } else {
1170            format!("`{}` (U+{:04X})", ch, ch as u32)
1171        }
1172    } else {
1173        format!("`{ch}`")
1174    }
1175}
1176
1177/// Returns the Unicode name for well-known invisible/control characters.
1178///
1179/// This provides meaningful names for commonly encountered invisible
1180/// characters. Returns `None` for characters without a known name.
1181fn unicode_char_name(ch: char) -> Option<&'static str> {
1182    match ch {
1183        // C0 control characters (U+0000 - U+001F)
1184        '\u{0000}' => Some("NULL"),
1185        '\u{0001}' => Some("START OF HEADING"),
1186        '\u{0002}' => Some("START OF TEXT"),
1187        '\u{0003}' => Some("END OF TEXT"),
1188        '\u{0004}' => Some("END OF TRANSMISSION"),
1189        '\u{0005}' => Some("ENQUIRY"),
1190        '\u{0006}' => Some("ACKNOWLEDGE"),
1191        '\u{0007}' => Some("BELL"),
1192        '\u{0008}' => Some("BACKSPACE"),
1193        '\u{0009}' => Some("HORIZONTAL TAB"),
1194        '\u{000A}' => Some("LINE FEED"),
1195        '\u{000B}' => Some("VERTICAL TAB"),
1196        '\u{000C}' => Some("FORM FEED"),
1197        '\u{000D}' => Some("CARRIAGE RETURN"),
1198        '\u{000E}' => Some("SHIFT OUT"),
1199        '\u{000F}' => Some("SHIFT IN"),
1200        '\u{0010}' => Some("DATA LINK ESCAPE"),
1201        '\u{0011}' => Some("DEVICE CONTROL ONE"),
1202        '\u{0012}' => Some("DEVICE CONTROL TWO"),
1203        '\u{0013}' => Some("DEVICE CONTROL THREE"),
1204        '\u{0014}' => Some("DEVICE CONTROL FOUR"),
1205        '\u{0015}' => Some("NEGATIVE ACKNOWLEDGE"),
1206        '\u{0016}' => Some("SYNCHRONOUS IDLE"),
1207        '\u{0017}' => Some("END OF TRANSMISSION BLOCK"),
1208        '\u{0018}' => Some("CANCEL"),
1209        '\u{0019}' => Some("END OF MEDIUM"),
1210        '\u{001A}' => Some("SUBSTITUTE"),
1211        '\u{001B}' => Some("ESCAPE"),
1212        '\u{001C}' => Some("FILE SEPARATOR"),
1213        '\u{001D}' => Some("GROUP SEPARATOR"),
1214        '\u{001E}' => Some("RECORD SEPARATOR"),
1215        '\u{001F}' => Some("UNIT SEPARATOR"),
1216
1217        // C1 control characters and special (U+007F - U+00A0)
1218        '\u{007F}' => Some("DELETE"),
1219        '\u{0080}' => Some("PADDING CHARACTER"),
1220        '\u{0081}' => Some("HIGH OCTET PRESET"),
1221        '\u{0082}' => Some("BREAK PERMITTED HERE"),
1222        '\u{0083}' => Some("NO BREAK HERE"),
1223        '\u{0084}' => Some("INDEX"),
1224        '\u{0085}' => Some("NEXT LINE"),
1225        '\u{0086}' => Some("START OF SELECTED AREA"),
1226        '\u{0087}' => Some("END OF SELECTED AREA"),
1227        '\u{0088}' => Some("CHARACTER TABULATION SET"),
1228        '\u{0089}' => Some("CHARACTER TABULATION WITH JUSTIFICATION"),
1229        '\u{008A}' => Some("LINE TABULATION SET"),
1230        '\u{008B}' => Some("PARTIAL LINE FORWARD"),
1231        '\u{008C}' => Some("PARTIAL LINE BACKWARD"),
1232        '\u{008D}' => Some("REVERSE LINE FEED"),
1233        '\u{008E}' => Some("SINGLE SHIFT TWO"),
1234        '\u{008F}' => Some("SINGLE SHIFT THREE"),
1235        '\u{0090}' => Some("DEVICE CONTROL STRING"),
1236        '\u{0091}' => Some("PRIVATE USE ONE"),
1237        '\u{0092}' => Some("PRIVATE USE TWO"),
1238        '\u{0093}' => Some("SET TRANSMIT STATE"),
1239        '\u{0094}' => Some("CANCEL CHARACTER"),
1240        '\u{0095}' => Some("MESSAGE WAITING"),
1241        '\u{0096}' => Some("START OF GUARDED AREA"),
1242        '\u{0097}' => Some("END OF GUARDED AREA"),
1243        '\u{0098}' => Some("START OF STRING"),
1244        '\u{0099}' => Some("SINGLE GRAPHIC CHARACTER INTRODUCER"),
1245        '\u{009A}' => Some("SINGLE CHARACTER INTRODUCER"),
1246        '\u{009B}' => Some("CONTROL SEQUENCE INTRODUCER"),
1247        '\u{009C}' => Some("STRING TERMINATOR"),
1248        '\u{009D}' => Some("OPERATING SYSTEM COMMAND"),
1249        '\u{009E}' => Some("PRIVACY MESSAGE"),
1250        '\u{009F}' => Some("APPLICATION PROGRAM COMMAND"),
1251        '\u{00A0}' => Some("NO-BREAK SPACE"),
1252        '\u{00AD}' => Some("SOFT HYPHEN"),
1253
1254        // General punctuation - spaces (U+2000 - U+200A)
1255        '\u{2000}' => Some("EN QUAD"),
1256        '\u{2001}' => Some("EM QUAD"),
1257        '\u{2002}' => Some("EN SPACE"),
1258        '\u{2003}' => Some("EM SPACE"),
1259        '\u{2004}' => Some("THREE-PER-EM SPACE"),
1260        '\u{2005}' => Some("FOUR-PER-EM SPACE"),
1261        '\u{2006}' => Some("SIX-PER-EM SPACE"),
1262        '\u{2007}' => Some("FIGURE SPACE"),
1263        '\u{2008}' => Some("PUNCTUATION SPACE"),
1264        '\u{2009}' => Some("THIN SPACE"),
1265        '\u{200A}' => Some("HAIR SPACE"),
1266
1267        // Zero-width and formatting characters (U+200B - U+200F)
1268        '\u{200B}' => Some("ZERO WIDTH SPACE"),
1269        '\u{200C}' => Some("ZERO WIDTH NON-JOINER"),
1270        '\u{200D}' => Some("ZERO WIDTH JOINER"),
1271        '\u{200E}' => Some("LEFT-TO-RIGHT MARK"),
1272        '\u{200F}' => Some("RIGHT-TO-LEFT MARK"),
1273
1274        // Bidirectional text formatting (U+202A - U+202F)
1275        '\u{202A}' => Some("LEFT-TO-RIGHT EMBEDDING"),
1276        '\u{202B}' => Some("RIGHT-TO-LEFT EMBEDDING"),
1277        '\u{202C}' => Some("POP DIRECTIONAL FORMATTING"),
1278        '\u{202D}' => Some("LEFT-TO-RIGHT OVERRIDE"),
1279        '\u{202E}' => Some("RIGHT-TO-LEFT OVERRIDE"),
1280        '\u{202F}' => Some("NARROW NO-BREAK SPACE"),
1281
1282        // More formatting (U+2060 - U+206F)
1283        '\u{2060}' => Some("WORD JOINER"),
1284        '\u{2061}' => Some("FUNCTION APPLICATION"),
1285        '\u{2062}' => Some("INVISIBLE TIMES"),
1286        '\u{2063}' => Some("INVISIBLE SEPARATOR"),
1287        '\u{2064}' => Some("INVISIBLE PLUS"),
1288        '\u{2066}' => Some("LEFT-TO-RIGHT ISOLATE"),
1289        '\u{2067}' => Some("RIGHT-TO-LEFT ISOLATE"),
1290        '\u{2068}' => Some("FIRST STRONG ISOLATE"),
1291        '\u{2069}' => Some("POP DIRECTIONAL ISOLATE"),
1292        '\u{206A}' => Some("INHIBIT SYMMETRIC SWAPPING"),
1293        '\u{206B}' => Some("ACTIVATE SYMMETRIC SWAPPING"),
1294        '\u{206C}' => Some("INHIBIT ARABIC FORM SHAPING"),
1295        '\u{206D}' => Some("ACTIVATE ARABIC FORM SHAPING"),
1296        '\u{206E}' => Some("NATIONAL DIGIT SHAPES"),
1297        '\u{206F}' => Some("NOMINAL DIGIT SHAPES"),
1298
1299        // Other special spaces
1300        '\u{2028}' => Some("LINE SEPARATOR"),
1301        '\u{2029}' => Some("PARAGRAPH SEPARATOR"),
1302        '\u{205F}' => Some("MEDIUM MATHEMATICAL SPACE"),
1303        '\u{3000}' => Some("IDEOGRAPHIC SPACE"),
1304
1305        // Special characters
1306        '\u{034F}' => Some("COMBINING GRAPHEME JOINER"),
1307        '\u{061C}' => Some("ARABIC LETTER MARK"),
1308        '\u{115F}' => Some("HANGUL CHOSEONG FILLER"),
1309        '\u{1160}' => Some("HANGUL JUNGSEONG FILLER"),
1310        '\u{17B4}' => Some("KHMER VOWEL INHERENT AQ"),
1311        '\u{17B5}' => Some("KHMER VOWEL INHERENT AA"),
1312        '\u{180E}' => Some("MONGOLIAN VOWEL SEPARATOR"),
1313
1314        // BOM and special
1315        '\u{FEFF}' => Some("BYTE ORDER MARK"),
1316        '\u{FFFE}' => Some("NONCHARACTER"),
1317        '\u{FFFF}' => Some("NONCHARACTER"),
1318
1319        // Interlinear annotation
1320        '\u{FFF9}' => Some("INTERLINEAR ANNOTATION ANCHOR"),
1321        '\u{FFFA}' => Some("INTERLINEAR ANNOTATION SEPARATOR"),
1322        '\u{FFFB}' => Some("INTERLINEAR ANNOTATION TERMINATOR"),
1323
1324        // Tag characters (U+E0000 - U+E007F)
1325        '\u{E0001}' => Some("LANGUAGE TAG"),
1326        '\u{E0020}' => Some("TAG SPACE"),
1327
1328        _ => None,
1329    }
1330}
1331
1332#[cfg(test)]
1333mod name_continue_table_tests {
1334    use super::is_name_continue_byte;
1335
1336    /// Validates that NAME_CONTINUE_TABLE matches the original
1337    /// `is_name_continue_byte` logic for all 256 byte values.
1338    ///
1339    /// This ensures the lookup table is a faithful replacement
1340    /// for `b == b'_' || b.is_ascii_alphanumeric()`.
1341    ///
1342    /// Written by Claude Code, reviewed by a human.
1343    #[test]
1344    fn name_continue_table_matches_spec() {
1345        for i in 0u16..256 {
1346            let b = i as u8;
1347            let expected = b == b'_' || b.is_ascii_alphanumeric();
1348            assert_eq!(
1349                is_name_continue_byte(b),
1350                expected,
1351                "Mismatch at byte {i} (0x{i:02X}): table says {}, \
1352                 original logic says {expected}",
1353                is_name_continue_byte(b),
1354            );
1355        }
1356    }
1357}