use crate::ByteSpan;
use crate::GraphQLErrorNote;
use crate::SourceMap;
use crate::SourceSpan;
use crate::smallvec::smallvec;
use crate::token::GraphQLToken;
use crate::token::GraphQLTokenKind;
use crate::token::GraphQLTriviaToken;
use crate::token::GraphQLTriviaTokenVec;
use crate::token::GraphQLTokenSource;
use crate::token::StrGraphQLTokenSourceConfig;
use std::borrow::Cow;
use std::path::Path;
use std::path::PathBuf;
pub struct StrGraphQLTokenSource<'src> {
source: &'src str,
curr_byte_offset: usize,
pending_trivia: GraphQLTriviaTokenVec<'src>,
finished: bool,
source_map: SourceMap<'src>,
config: StrGraphQLTokenSourceConfig,
}
impl<'src> StrGraphQLTokenSource<'src> {
pub fn new(source: &'src str) -> Self {
Self {
source,
curr_byte_offset: 0,
pending_trivia: smallvec![],
finished: false,
source_map: SourceMap::new_with_source(source, None),
config: StrGraphQLTokenSourceConfig::default(),
}
}
pub fn with_config(
source: &'src str,
config: StrGraphQLTokenSourceConfig,
) -> Self {
Self {
source,
curr_byte_offset: 0,
pending_trivia: smallvec![],
finished: false,
source_map: SourceMap::new_with_source(source, None),
config,
}
}
pub fn with_file_path(source: &'src str, path: &'src Path) -> Self {
Self {
source,
curr_byte_offset: 0,
pending_trivia: smallvec![],
finished: false,
source_map: SourceMap::new_with_source(
source,
Some(PathBuf::from(path)),
),
config: StrGraphQLTokenSourceConfig::default(),
}
}
fn remaining(&self) -> &'src str {
&self.source[self.curr_byte_offset..]
}
fn curr_offset(&self) -> u32 {
self.curr_byte_offset as u32
}
#[inline]
fn peek_char(&self) -> Option<char> {
let bytes = self.source.as_bytes();
if self.curr_byte_offset >= bytes.len() {
return None;
}
let b = bytes[self.curr_byte_offset];
if b.is_ascii() {
Some(b as char)
} else {
self.source[self.curr_byte_offset..].chars().next()
}
}
fn peek_char_nth(&self, n: usize) -> Option<char> {
self.remaining().chars().nth(n)
}
fn consume(&mut self) -> Option<char> {
let bytes = self.source.as_bytes();
if self.curr_byte_offset >= bytes.len() {
return None;
}
let b = bytes[self.curr_byte_offset];
if b.is_ascii() {
self.curr_byte_offset += 1;
Some(b as char)
} else {
let ch = self.source[self.curr_byte_offset..]
.chars()
.next()
.unwrap();
self.curr_byte_offset += ch.len_utf8();
Some(ch)
}
}
#[inline]
fn make_span(&self, start: u32) -> ByteSpan {
ByteSpan::new(start, self.curr_byte_offset as u32)
}
fn resolve_span(&self, span: ByteSpan) -> SourceSpan {
self.source_map.resolve_span(span)
.unwrap_or_else(SourceSpan::zero)
}
fn make_token(
&mut self,
kind: GraphQLTokenKind<'src>,
span: ByteSpan,
) -> GraphQLToken<'src> {
GraphQLToken {
kind,
preceding_trivia: std::mem::take(&mut self.pending_trivia),
span,
}
}
fn next_token(&mut self) -> GraphQLToken<'src> {
loop {
self.skip_whitespace();
let start = self.curr_offset();
match self.peek_char() {
None => {
let span = self.make_span(start);
return self.make_token(GraphQLTokenKind::Eof, span);
}
Some('#') => {
self.lex_comment(start);
continue;
}
Some(',') => {
self.consume();
if self.config.retain_commas {
let span = self.make_span(start);
self.pending_trivia
.push(GraphQLTriviaToken::Comma { span });
}
continue;
}
Some('!') => {
self.consume();
let span = self.make_span(start);
return self.make_token(GraphQLTokenKind::Bang, span);
}
Some('$') => {
self.consume();
let span = self.make_span(start);
return self.make_token(GraphQLTokenKind::Dollar, span);
}
Some('&') => {
self.consume();
let span = self.make_span(start);
return self.make_token(GraphQLTokenKind::Ampersand, span);
}
Some('(') => {
self.consume();
let span = self.make_span(start);
return self.make_token(GraphQLTokenKind::ParenOpen, span);
}
Some(')') => {
self.consume();
let span = self.make_span(start);
return self.make_token(GraphQLTokenKind::ParenClose, span);
}
Some(':') => {
self.consume();
let span = self.make_span(start);
return self.make_token(GraphQLTokenKind::Colon, span);
}
Some('=') => {
self.consume();
let span = self.make_span(start);
return self.make_token(GraphQLTokenKind::Equals, span);
}
Some('@') => {
self.consume();
let span = self.make_span(start);
return self.make_token(GraphQLTokenKind::At, span);
}
Some('[') => {
self.consume();
let span = self.make_span(start);
return self.make_token(GraphQLTokenKind::SquareBracketOpen, span);
}
Some(']') => {
self.consume();
let span = self.make_span(start);
return self.make_token(GraphQLTokenKind::SquareBracketClose, span);
}
Some('{') => {
self.consume();
let span = self.make_span(start);
return self.make_token(GraphQLTokenKind::CurlyBraceOpen, span);
}
Some('}') => {
self.consume();
let span = self.make_span(start);
return self.make_token(GraphQLTokenKind::CurlyBraceClose, span);
}
Some('|') => {
self.consume();
let span = self.make_span(start);
return self.make_token(GraphQLTokenKind::Pipe, span);
}
Some('.') => {
return self.lex_dot_or_ellipsis(start);
}
Some('"') => {
return self.lex_string(start);
}
Some(c) if is_name_start(c) => {
return self.lex_name(start);
}
Some(c) if c == '-' || c.is_ascii_digit() => {
return self.lex_number(start);
}
Some(_) => {
return self.lex_invalid_character(start);
}
}
}
}
fn skip_whitespace(&mut self) {
let bytes = self.source.as_bytes();
let start_byte_offset = self.curr_byte_offset;
let retain = self.config.retain_whitespace;
let start = if retain {
Some(self.curr_offset())
} else {
None
};
let mut i = self.curr_byte_offset;
loop {
if i >= bytes.len() {
break;
}
match bytes[i] {
b' ' | b'\t' | b'\n' | b'\r' => {
i += 1;
},
0xEF if i + 2 < bytes.len()
&& bytes[i + 1] == 0xBB
&& bytes[i + 2] == 0xBF => {
i += 3;
},
_ => break,
}
}
if i == self.curr_byte_offset {
return;
}
self.curr_byte_offset = i;
if let Some(ws_start) = start {
let value = &self.source[start_byte_offset..i];
let span = self.make_span(ws_start);
self.pending_trivia.push(
GraphQLTriviaToken::Whitespace {
value: Cow::Borrowed(value),
span,
},
);
}
}
fn lex_comment(&mut self, start: u32) {
self.curr_byte_offset += 1;
let content_start = self.curr_byte_offset;
let bytes = self.source.as_bytes();
let i = memchr::memchr2(b'\n', b'\r', &bytes[content_start..])
.map_or(bytes.len(), |offset| content_start + offset);
self.curr_byte_offset = i;
if self.config.retain_comments {
let content = &self.source[content_start..i];
let span = self.make_span(start);
self.pending_trivia.push(
GraphQLTriviaToken::Comment {
value: Cow::Borrowed(content),
span,
},
);
}
}
fn lex_dot_or_ellipsis(&mut self, start: u32) -> GraphQLToken<'src> {
self.consume();
self.skip_whitespace_same_line();
match self.peek_char() {
Some('.') => {
let second_dot_start = self.curr_offset();
let first_two_adjacent = second_dot_start == start + 1;
self.consume();
self.skip_whitespace_same_line();
match self.peek_char() {
Some('.') => {
let third_dot_start = self.curr_offset();
self.consume();
let span = self.make_span(start);
let second_third_adjacent =
third_dot_start == second_dot_start + 1;
if first_two_adjacent && second_third_adjacent {
self.make_token(GraphQLTokenKind::Ellipsis, span)
} else if first_two_adjacent {
let kind = GraphQLTokenKind::error(
"Unexpected `.. .`",
smallvec![GraphQLErrorNote::help(
"This `.` may have been intended to complete a `...` \
spread operator. Try removing the extra spacing \
between the dots."
)],
);
self.make_token(kind, span)
} else if second_third_adjacent {
let kind = GraphQLTokenKind::error(
"Unexpected `. ..`",
smallvec![GraphQLErrorNote::help(
"These dots may have been intended to form a `...` \
spread operator. Try removing the extra spacing \
between the dots."
)],
);
self.make_token(kind, span)
} else {
let kind = GraphQLTokenKind::error(
"Unexpected `. . .`",
smallvec![GraphQLErrorNote::help(
"These dots may have been intended to form a `...` \
spread operator. Try removing the extra spacing \
between the dots."
)],
);
self.make_token(kind, span)
}
}
_ => {
let span = self.make_span(start);
if first_two_adjacent {
let kind = GraphQLTokenKind::error(
"Unexpected `..` (use `...` for spread operator)",
smallvec![GraphQLErrorNote::help(
"Add one more `.` to form the spread operator `...`"
)],
);
self.make_token(kind, span)
} else {
let kind = GraphQLTokenKind::error(
"Unexpected `. .` (use `...` for spread operator)",
smallvec![GraphQLErrorNote::help(
"These dots may have been intended to form a `...` \
spread operator. Try removing the extra spacing \
between the dots."
)],
);
self.make_token(kind, span)
}
}
}
}
_ => {
let span = self.make_span(start);
let kind = GraphQLTokenKind::error("Unexpected `.`", smallvec![]);
self.make_token(kind, span)
}
}
}
fn skip_whitespace_same_line(&mut self) {
while let Some(ch) = self.peek_char() {
match ch {
' ' | '\t' | '\u{FEFF}' => {
self.consume();
}
_ => break,
}
}
}
fn lex_name(&mut self, start: u32) -> GraphQLToken<'src> {
let name_start = self.curr_byte_offset;
let bytes = self.source.as_bytes();
let mut i = name_start + 1;
while i < bytes.len() && is_name_continue_byte(bytes[i]) {
i += 1;
}
self.curr_byte_offset = i;
let name = &self.source[name_start..i];
let span = self.make_span(start);
let kind = match name {
"true" => GraphQLTokenKind::True,
"false" => GraphQLTokenKind::False,
"null" => GraphQLTokenKind::Null,
_ => GraphQLTokenKind::name_borrowed(name),
};
self.make_token(kind, span)
}
fn lex_number(&mut self, start: u32) -> GraphQLToken<'src> {
let num_start = self.curr_byte_offset;
let mut is_float = false;
if self.peek_char() == Some('-') {
self.consume();
}
match self.peek_char() {
Some('0') => {
self.consume();
if let Some(ch) = self.peek_char()
&& ch.is_ascii_digit() {
return self.lex_number_error(
start,
num_start,
"Invalid number: leading zeros are not allowed",
Some("https://spec.graphql.org/September2025/#sec-Int-Value"),
);
}
}
Some(ch) if ch.is_ascii_digit() => {
self.consume();
while let Some(ch) = self.peek_char() {
if ch.is_ascii_digit() {
self.consume();
} else {
break;
}
}
}
Some(_) | None => {
let span = self.make_span(start);
let kind = GraphQLTokenKind::error("Unexpected `-`", smallvec![]);
return self.make_token(kind, span);
}
}
if self.peek_char() == Some('.') {
if let Some(ch) = self.peek_char_nth(1)
&& ch.is_ascii_digit() {
is_float = true;
self.consume();
while let Some(ch) = self.peek_char() {
if ch.is_ascii_digit() {
self.consume();
} else {
break;
}
}
}
}
if let Some(ch) = self.peek_char()
&& (ch == 'e' || ch == 'E') {
is_float = true;
self.consume();
if let Some(ch) = self.peek_char()
&& (ch == '+' || ch == '-') {
self.consume();
}
let has_exponent_digits = matches!(self.peek_char(), Some(ch) if ch.is_ascii_digit());
if !has_exponent_digits {
return self.lex_number_error(
start,
num_start,
"Invalid number: exponent must have at least one digit",
Some("https://spec.graphql.org/September2025/#sec-Float-Value"),
);
}
while let Some(ch) = self.peek_char() {
if ch.is_ascii_digit() {
self.consume();
} else {
break;
}
}
}
let num_end = self.curr_byte_offset;
let num_text = &self.source[num_start..num_end];
let span = self.make_span(start);
let kind = if is_float {
GraphQLTokenKind::float_value_borrowed(num_text)
} else {
GraphQLTokenKind::int_value_borrowed(num_text)
};
self.make_token(kind, span)
}
fn lex_number_error(
&mut self,
start: u32,
num_start: usize,
message: &str,
spec_url: Option<&str>,
) -> GraphQLToken<'src> {
while let Some(ch) = self.peek_char() {
if ch.is_ascii_digit() || ch == '.' || ch == 'e' || ch == 'E' || ch == '+' || ch == '-' {
self.consume();
} else {
break;
}
}
let num_end = self.curr_byte_offset;
let invalid_text = &self.source[num_start..num_end];
let span = self.make_span(start);
let mut error_notes = smallvec![];
if let Some(url) = spec_url {
error_notes.push(GraphQLErrorNote::spec(url));
}
let kind = GraphQLTokenKind::error(
format!("{message}: `{invalid_text}`"),
error_notes,
);
self.make_token(kind, span)
}
fn lex_string_newline_error(&mut self, start: u32) -> GraphQLToken<'src> {
let span = self.make_span(start);
let kind = GraphQLTokenKind::error(
"Unterminated string literal",
smallvec![
GraphQLErrorNote::general(
"Single-line strings cannot contain unescaped newlines"
),
GraphQLErrorNote::help(
"Use a block string (triple quotes) for multi-line \
strings, or escape the newline with `\\n`"
),
],
);
self.make_token(kind, span)
}
fn lex_string(&mut self, start: u32) -> GraphQLToken<'src> {
let str_start = self.curr_byte_offset;
if self.remaining().starts_with("\"\"\"") {
return self.lex_block_string(start, str_start);
}
let bytes = self.source.as_bytes();
let mut i = self.curr_byte_offset + 1;
loop {
match memchr::memchr3(b'"', b'\\', b'\n', &bytes[i..]) {
None => {
if let Some(cr_off) =
memchr::memchr(b'\r', &bytes[i..])
{
i += cr_off + 1;
if i < bytes.len() && bytes[i] == b'\n' {
i += 1;
}
self.curr_byte_offset = i;
return self.lex_string_newline_error(start);
}
self.curr_byte_offset = bytes.len();
let span = self.make_span(start);
let kind = GraphQLTokenKind::error(
"Unterminated string literal",
smallvec![
GraphQLErrorNote::general_with_span(
"String started here",
self.resolve_span(span),
),
GraphQLErrorNote::help("Add closing `\"`"),
],
);
return self.make_token(kind, span);
},
Some(offset) => {
if let Some(cr_off) =
memchr::memchr(b'\r', &bytes[i..i + offset])
{
i += cr_off + 1;
if i < bytes.len() && bytes[i] == b'\n' {
i += 1;
}
self.curr_byte_offset = i;
return self.lex_string_newline_error(start);
}
i += offset;
match bytes[i] {
b'"' => {
i += 1;
break;
},
b'\\' => {
i += 1;
if i < bytes.len() {
i += 1;
}
},
b'\n' => {
i += 1;
self.curr_byte_offset = i;
return self.lex_string_newline_error(
start,
);
},
_ => unreachable!(),
}
},
}
}
self.curr_byte_offset = i;
let str_end = self.curr_byte_offset;
let string_text = &self.source[str_start..str_end];
let span = self.make_span(start);
self.make_token(GraphQLTokenKind::string_value_borrowed(string_text), span)
}
fn lex_block_string(
&mut self,
start: u32,
str_start: usize,
) -> GraphQLToken<'src> {
let bytes = self.source.as_bytes();
let mut i = self.curr_byte_offset + 3;
let found_close = loop {
match memchr::memchr2(b'"', b'\\', &bytes[i..]) {
None => {
i = bytes.len();
break false;
},
Some(offset) => {
i += offset;
match bytes[i] {
b'"' if i + 2 < bytes.len()
&& bytes[i + 1] == b'"'
&& bytes[i + 2] == b'"' =>
{
i += 3;
break true;
},
b'\\' if i + 3 < bytes.len()
&& bytes[i + 1] == b'"'
&& bytes[i + 2] == b'"'
&& bytes[i + 3] == b'"' =>
{
i += 4;
},
_ => {
i += 1;
},
}
},
}
};
self.curr_byte_offset = i;
if !found_close {
let span = self.make_span(start);
let kind = GraphQLTokenKind::error(
"Unterminated block string",
smallvec![
GraphQLErrorNote::general_with_span(
"Block string started here",
self.resolve_span(span),
),
GraphQLErrorNote::help("Add closing `\"\"\"`"),
],
);
return self.make_token(kind, span);
}
let str_end = self.curr_byte_offset;
let string_text = &self.source[str_start..str_end];
let span = self.make_span(start);
self.make_token(
GraphQLTokenKind::string_value_borrowed(string_text),
span,
)
}
fn lex_invalid_character(&mut self, start: u32) -> GraphQLToken<'src> {
let ch = self.consume().unwrap();
let span = self.make_span(start);
let kind = GraphQLTokenKind::error(
format!("Unexpected character {}", describe_char(ch)),
smallvec![],
);
self.make_token(kind, span)
}
}
impl<'src> Iterator for StrGraphQLTokenSource<'src> {
type Item = GraphQLToken<'src>;
fn next(&mut self) -> Option<Self::Item> {
if self.finished {
return None;
}
let token = self.next_token();
if matches!(token.kind, GraphQLTokenKind::Eof) {
self.finished = true;
}
Some(token)
}
}
impl<'src> GraphQLTokenSource<'src> for StrGraphQLTokenSource<'src> {
fn source_map(&self) -> &SourceMap<'src> {
&self.source_map
}
fn into_source_map(self) -> SourceMap<'src> {
self.source_map
}
}
fn is_name_start(ch: char) -> bool {
ch == '_' || ch.is_ascii_alphabetic()
}
const NAME_CONTINUE_TABLE: [bool; 256] = {
let mut table = [false; 256];
let mut i = 0u16;
while i < 256 {
let b = i as u8;
table[i as usize] = matches!(
b, b'_' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z'
);
i += 1;
}
table
};
#[inline]
fn is_name_continue_byte(b: u8) -> bool {
NAME_CONTINUE_TABLE[b as usize]
}
fn describe_char(ch: char) -> String {
if ch.is_control() || (ch.is_whitespace() && ch != ' ') {
let name = unicode_char_name(ch);
if let Some(name) = name {
format!("`{}` (U+{:04X}: {})", ch, ch as u32, name)
} else {
format!("`{}` (U+{:04X})", ch, ch as u32)
}
} else {
format!("`{ch}`")
}
}
fn unicode_char_name(ch: char) -> Option<&'static str> {
match ch {
'\u{0000}' => Some("NULL"),
'\u{0001}' => Some("START OF HEADING"),
'\u{0002}' => Some("START OF TEXT"),
'\u{0003}' => Some("END OF TEXT"),
'\u{0004}' => Some("END OF TRANSMISSION"),
'\u{0005}' => Some("ENQUIRY"),
'\u{0006}' => Some("ACKNOWLEDGE"),
'\u{0007}' => Some("BELL"),
'\u{0008}' => Some("BACKSPACE"),
'\u{0009}' => Some("HORIZONTAL TAB"),
'\u{000A}' => Some("LINE FEED"),
'\u{000B}' => Some("VERTICAL TAB"),
'\u{000C}' => Some("FORM FEED"),
'\u{000D}' => Some("CARRIAGE RETURN"),
'\u{000E}' => Some("SHIFT OUT"),
'\u{000F}' => Some("SHIFT IN"),
'\u{0010}' => Some("DATA LINK ESCAPE"),
'\u{0011}' => Some("DEVICE CONTROL ONE"),
'\u{0012}' => Some("DEVICE CONTROL TWO"),
'\u{0013}' => Some("DEVICE CONTROL THREE"),
'\u{0014}' => Some("DEVICE CONTROL FOUR"),
'\u{0015}' => Some("NEGATIVE ACKNOWLEDGE"),
'\u{0016}' => Some("SYNCHRONOUS IDLE"),
'\u{0017}' => Some("END OF TRANSMISSION BLOCK"),
'\u{0018}' => Some("CANCEL"),
'\u{0019}' => Some("END OF MEDIUM"),
'\u{001A}' => Some("SUBSTITUTE"),
'\u{001B}' => Some("ESCAPE"),
'\u{001C}' => Some("FILE SEPARATOR"),
'\u{001D}' => Some("GROUP SEPARATOR"),
'\u{001E}' => Some("RECORD SEPARATOR"),
'\u{001F}' => Some("UNIT SEPARATOR"),
'\u{007F}' => Some("DELETE"),
'\u{0080}' => Some("PADDING CHARACTER"),
'\u{0081}' => Some("HIGH OCTET PRESET"),
'\u{0082}' => Some("BREAK PERMITTED HERE"),
'\u{0083}' => Some("NO BREAK HERE"),
'\u{0084}' => Some("INDEX"),
'\u{0085}' => Some("NEXT LINE"),
'\u{0086}' => Some("START OF SELECTED AREA"),
'\u{0087}' => Some("END OF SELECTED AREA"),
'\u{0088}' => Some("CHARACTER TABULATION SET"),
'\u{0089}' => Some("CHARACTER TABULATION WITH JUSTIFICATION"),
'\u{008A}' => Some("LINE TABULATION SET"),
'\u{008B}' => Some("PARTIAL LINE FORWARD"),
'\u{008C}' => Some("PARTIAL LINE BACKWARD"),
'\u{008D}' => Some("REVERSE LINE FEED"),
'\u{008E}' => Some("SINGLE SHIFT TWO"),
'\u{008F}' => Some("SINGLE SHIFT THREE"),
'\u{0090}' => Some("DEVICE CONTROL STRING"),
'\u{0091}' => Some("PRIVATE USE ONE"),
'\u{0092}' => Some("PRIVATE USE TWO"),
'\u{0093}' => Some("SET TRANSMIT STATE"),
'\u{0094}' => Some("CANCEL CHARACTER"),
'\u{0095}' => Some("MESSAGE WAITING"),
'\u{0096}' => Some("START OF GUARDED AREA"),
'\u{0097}' => Some("END OF GUARDED AREA"),
'\u{0098}' => Some("START OF STRING"),
'\u{0099}' => Some("SINGLE GRAPHIC CHARACTER INTRODUCER"),
'\u{009A}' => Some("SINGLE CHARACTER INTRODUCER"),
'\u{009B}' => Some("CONTROL SEQUENCE INTRODUCER"),
'\u{009C}' => Some("STRING TERMINATOR"),
'\u{009D}' => Some("OPERATING SYSTEM COMMAND"),
'\u{009E}' => Some("PRIVACY MESSAGE"),
'\u{009F}' => Some("APPLICATION PROGRAM COMMAND"),
'\u{00A0}' => Some("NO-BREAK SPACE"),
'\u{00AD}' => Some("SOFT HYPHEN"),
'\u{2000}' => Some("EN QUAD"),
'\u{2001}' => Some("EM QUAD"),
'\u{2002}' => Some("EN SPACE"),
'\u{2003}' => Some("EM SPACE"),
'\u{2004}' => Some("THREE-PER-EM SPACE"),
'\u{2005}' => Some("FOUR-PER-EM SPACE"),
'\u{2006}' => Some("SIX-PER-EM SPACE"),
'\u{2007}' => Some("FIGURE SPACE"),
'\u{2008}' => Some("PUNCTUATION SPACE"),
'\u{2009}' => Some("THIN SPACE"),
'\u{200A}' => Some("HAIR SPACE"),
'\u{200B}' => Some("ZERO WIDTH SPACE"),
'\u{200C}' => Some("ZERO WIDTH NON-JOINER"),
'\u{200D}' => Some("ZERO WIDTH JOINER"),
'\u{200E}' => Some("LEFT-TO-RIGHT MARK"),
'\u{200F}' => Some("RIGHT-TO-LEFT MARK"),
'\u{202A}' => Some("LEFT-TO-RIGHT EMBEDDING"),
'\u{202B}' => Some("RIGHT-TO-LEFT EMBEDDING"),
'\u{202C}' => Some("POP DIRECTIONAL FORMATTING"),
'\u{202D}' => Some("LEFT-TO-RIGHT OVERRIDE"),
'\u{202E}' => Some("RIGHT-TO-LEFT OVERRIDE"),
'\u{202F}' => Some("NARROW NO-BREAK SPACE"),
'\u{2060}' => Some("WORD JOINER"),
'\u{2061}' => Some("FUNCTION APPLICATION"),
'\u{2062}' => Some("INVISIBLE TIMES"),
'\u{2063}' => Some("INVISIBLE SEPARATOR"),
'\u{2064}' => Some("INVISIBLE PLUS"),
'\u{2066}' => Some("LEFT-TO-RIGHT ISOLATE"),
'\u{2067}' => Some("RIGHT-TO-LEFT ISOLATE"),
'\u{2068}' => Some("FIRST STRONG ISOLATE"),
'\u{2069}' => Some("POP DIRECTIONAL ISOLATE"),
'\u{206A}' => Some("INHIBIT SYMMETRIC SWAPPING"),
'\u{206B}' => Some("ACTIVATE SYMMETRIC SWAPPING"),
'\u{206C}' => Some("INHIBIT ARABIC FORM SHAPING"),
'\u{206D}' => Some("ACTIVATE ARABIC FORM SHAPING"),
'\u{206E}' => Some("NATIONAL DIGIT SHAPES"),
'\u{206F}' => Some("NOMINAL DIGIT SHAPES"),
'\u{2028}' => Some("LINE SEPARATOR"),
'\u{2029}' => Some("PARAGRAPH SEPARATOR"),
'\u{205F}' => Some("MEDIUM MATHEMATICAL SPACE"),
'\u{3000}' => Some("IDEOGRAPHIC SPACE"),
'\u{034F}' => Some("COMBINING GRAPHEME JOINER"),
'\u{061C}' => Some("ARABIC LETTER MARK"),
'\u{115F}' => Some("HANGUL CHOSEONG FILLER"),
'\u{1160}' => Some("HANGUL JUNGSEONG FILLER"),
'\u{17B4}' => Some("KHMER VOWEL INHERENT AQ"),
'\u{17B5}' => Some("KHMER VOWEL INHERENT AA"),
'\u{180E}' => Some("MONGOLIAN VOWEL SEPARATOR"),
'\u{FEFF}' => Some("BYTE ORDER MARK"),
'\u{FFFE}' => Some("NONCHARACTER"),
'\u{FFFF}' => Some("NONCHARACTER"),
'\u{FFF9}' => Some("INTERLINEAR ANNOTATION ANCHOR"),
'\u{FFFA}' => Some("INTERLINEAR ANNOTATION SEPARATOR"),
'\u{FFFB}' => Some("INTERLINEAR ANNOTATION TERMINATOR"),
'\u{E0001}' => Some("LANGUAGE TAG"),
'\u{E0020}' => Some("TAG SPACE"),
_ => None,
}
}
#[cfg(test)]
mod name_continue_table_tests {
use super::is_name_continue_byte;
#[test]
fn name_continue_table_matches_spec() {
for i in 0u16..256 {
let b = i as u8;
let expected = b == b'_' || b.is_ascii_alphanumeric();
assert_eq!(
is_name_continue_byte(b),
expected,
"Mismatch at byte {i} (0x{i:02X}): table says {}, \
original logic says {expected}",
is_name_continue_byte(b),
);
}
}
}