use std::fmt;
use logos::Lexer;
use logos::Logos;
#[derive(Default, Eq, PartialEq)]
pub struct TokenExtras {
pub error_token: Option<Token<'static>>,
}
#[derive(Logos, Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
#[logos(extras = TokenExtras, skip r"[ \t\r\n\f,\ufeff]+|#[^\n\r]*")]
pub enum Token<'a> {
ErrorUnterminatedString,
ErrorUnsupportedStringCharacter,
ErrorUnterminatedBlockString,
Empty,
#[token("&")]
Ampersand,
#[token("@")]
At,
#[token("}")]
CloseBrace,
#[token("]")]
CloseBracket,
#[token(")")]
CloseParen,
#[token(":")]
Colon,
#[token("$")]
Dollar,
EndOfFile,
#[token("=")]
Equals,
#[token("!")]
Exclamation,
#[token("schema")]
Schema,
#[token("query")]
Query,
#[token("mutation")]
Mutation,
#[token("subscription")]
Subscription,
#[token("type")]
Type,
#[token("input")]
Input,
#[token("true")]
True,
#[token("false")]
False,
#[token("null")]
Null,
#[token("implements")]
Implements,
#[token("interface")]
Interface,
#[token("enum")]
Enum,
#[token("union")]
Union,
#[token("scalar")]
Scalar,
#[token("extend")]
Extend,
#[token("directive")]
Directive,
#[token("repeatable")]
Repeatable,
#[token("on")]
On,
#[token("fragment")]
Fragment,
#[regex("-?(0|[1-9][0-9]*)(\\.[0-9]+[eE][+-]?[0-9]+|\\.[0-9]+|[eE][+-]?[0-9]+)", |lex| lex.slice())]
FloatLiteral(&'a str),
#[regex("[a-zA-Z_][a-zA-Z0-9_]*", |lex| lex.slice())]
Identifier(&'a str),
#[regex("-?(0|[1-9][0-9]*)", |lex| lex.slice())]
IntegerLiteral(&'a str),
#[regex("-?0[0-9]+(\\.[0-9]+[eE][+-]?[0-9]+|\\.[0-9]+|[eE][+-]?[0-9]+)?")]
ErrorNumberLiteralLeadingZero,
#[regex("-?(0|[1-9][0-9]*)(\\.[0-9]+[eE][+-]?[0-9]+|\\.[0-9]+|[eE][+-]?[0-9]+)?[.a-zA-Z_]")]
ErrorNumberLiteralTrailingInvalid,
#[regex("-?(\\.[0-9]+[eE][+-]?[0-9]+|\\.[0-9]+)")]
ErrorFloatLiteralMissingZero,
#[token("{")]
OpenBrace,
#[token("[")]
OpenBracket,
#[token("(")]
OpenParen,
#[token(".")]
Period,
#[token("..")]
PeriodPeriod,
#[token("|")]
Pipe,
#[token("...")]
Spread,
#[token("\"", lex_string)]
StringLiteral(&'a str),
#[token("\"\"\"", lex_block_string)]
BlockStringLiteral(&'a str),
}
#[derive(Logos, Debug)]
pub enum StringToken {
#[regex(r#"\\["\\/bfnrt]"#)]
EscapedCharacter,
#[regex(r#"\\u[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f]"#)]
EscapedUnicode,
#[token("\"")]
Quote,
#[regex(r#"\n|\r|\r\n"#)]
LineTerminator,
#[regex(r#"[\u0009\u0020\u0021\u0023-\u005B\u005D-\uFFFF]+"#)]
StringCharacters,
}
fn lex_string<'a>(lexer: &mut Lexer<'a, Token<'a>>) -> Option<&'a str> {
let remainder = lexer.remainder();
let mut string_lexer = StringToken::lexer(remainder);
while let Some(string_token) = string_lexer.next() {
match string_token {
Ok(StringToken::Quote) => {
lexer.bump(string_lexer.span().end);
return Some(lexer.slice());
}
Ok(StringToken::LineTerminator) => {
lexer.bump(string_lexer.span().start);
lexer.extras.error_token = Some(Token::ErrorUnterminatedString);
return None;
}
Ok(
StringToken::EscapedCharacter
| StringToken::EscapedUnicode
| StringToken::StringCharacters,
) => {}
Err(_) => {
lexer.extras.error_token = Some(Token::ErrorUnsupportedStringCharacter);
return None;
}
}
}
lexer.extras.error_token = Some(Token::ErrorUnterminatedString);
None
}
fn lex_block_string<'a>(lexer: &mut Lexer<'a, Token<'a>>) -> Option<&'a str> {
let remainder = lexer.remainder();
let mut string_lexer = BlockStringToken::lexer(remainder);
while let Some(string_token) = string_lexer.next() {
match string_token {
Ok(BlockStringToken::TripleQuote) => {
lexer.bump(string_lexer.span().end);
return Some(lexer.slice());
}
Ok(BlockStringToken::EscapedTripleQuote | BlockStringToken::Other) => {}
Err(_) => unreachable!(),
}
}
lexer.extras.error_token = Some(Token::ErrorUnterminatedBlockString);
None
}
#[derive(Logos, Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub enum BlockStringToken {
#[token("\\\"\"\"")]
EscapedTripleQuote,
#[token("\"\"\"")]
TripleQuote,
#[regex(r#"[\u0009\u000A\u000D\u0020-\uFFFF]"#)]
Other,
}
impl fmt::Display for Token<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let message = match self {
Token::Ampersand => "ampersand ('&')",
Token::At => "at ('@')",
Token::CloseBrace => "closing brace ('}')",
Token::CloseBracket => "closing bracket (']')",
Token::CloseParen => "closing paren (')')",
Token::Colon => "colon (':')",
Token::Dollar => "dollar ('$')",
Token::EndOfFile => "end of file",
Token::Equals => "equals ('=')",
Token::Exclamation => "exclamation mark ('!')",
Token::FloatLiteral(_) => "floating point value (e.g. '3.14')",
Token::Identifier(_) => "non-variable identifier (e.g. 'x' or 'Foo')",
Token::IntegerLiteral(_) => "integer value (e.g. '0' or '42')",
Token::OpenBrace => "open brace ('{')",
Token::OpenBracket => "open bracket ('[')",
Token::OpenParen => "open parenthesis ('(')",
Token::Period => "period ('.')",
Token::PeriodPeriod => "double period ('..')",
Token::Pipe => "pipe ('|')",
Token::Spread => "spread ('...')",
Token::BlockStringLiteral(_) => "block string (e.g. '\"\"\"hi\"\"\"')",
Token::ErrorFloatLiteralMissingZero => "unsupported number (int or float) literal",
Token::ErrorNumberLiteralLeadingZero => "unsupported number (int or float) literal",
Token::ErrorNumberLiteralTrailingInvalid => "unsupported number (int or float) literal",
Token::StringLiteral(_) => "string literal (e.g. '\"...\"')",
Token::ErrorUnterminatedString => "unterminated string",
Token::ErrorUnsupportedStringCharacter => "unsupported character in string",
Token::ErrorUnterminatedBlockString => "unterminated block string",
Token::Empty => "missing expected kind",
Token::Schema => "schema",
Token::Query => "query",
Token::Mutation => "mutation",
Token::Subscription => "subscription",
Token::Type => "type",
Token::Input => "input",
Token::True => "true",
Token::False => "false",
Token::Null => "null,",
Token::Implements => "implements",
Token::Interface => "interface",
Token::Enum => "enum",
Token::Union => "union",
Token::Scalar => "scalar",
Token::Directive => "directive",
Token::Repeatable => "repeatable",
Token::On => "on",
Token::Extend => "extend",
Token::Fragment => "fragment",
};
f.write_str(message)
}
}
#[cfg(test)]
mod tests {
use super::*;
fn assert_token(source: &str, kind: Token, length: usize) {
let mut lexer = Token::lexer(source);
assert_eq!(
lexer.next(),
Some(Ok(kind)),
"Testing the lexing of string '{}'",
source
);
assert_eq!(
lexer.span(),
0..length,
"Testing the lexing of string '{}'",
source
);
}
fn assert_error(source: &str, length: usize) {
let mut lexer = Token::lexer(source);
assert_eq!(
lexer.next(),
Some(Err(())),
"Testing lexing fails for string '{}'",
source
);
assert_eq!(
lexer.span(),
0..length,
"Testing the lexing of string '{}'",
source
);
}
#[test]
fn test_number_successes() {
assert_token("4", Token::IntegerLiteral("4"), 1);
assert_token("4.123", Token::FloatLiteral("4.123"), 5);
assert_token("-4", Token::IntegerLiteral("-4"), 2);
assert_token("9", Token::IntegerLiteral("9"), 1);
assert_token("0", Token::IntegerLiteral("0"), 1);
assert_token("-4.123", Token::FloatLiteral("-4.123"), 6);
assert_token("0.123", Token::FloatLiteral("0.123"), 5);
assert_token("123e4", Token::FloatLiteral("123e4"), 5);
assert_token("123E4", Token::FloatLiteral("123E4"), 5);
assert_token("123e-4", Token::FloatLiteral("123e-4"), 6);
assert_token("123e+4", Token::FloatLiteral("123e+4"), 6);
assert_token("-1.123e4", Token::FloatLiteral("-1.123e4"), 8);
assert_token("-1.123E4", Token::FloatLiteral("-1.123E4"), 8);
assert_token("-1.123e-4", Token::FloatLiteral("-1.123e-4"), 9);
assert_token("-1.123e+4", Token::FloatLiteral("-1.123e+4"), 9);
assert_token("-1.123e4567", Token::FloatLiteral("-1.123e4567"), 11);
assert_token("-0", Token::IntegerLiteral("-0"), 2);
}
#[test]
fn test_number_failures() {
assert_token("00", Token::ErrorNumberLiteralLeadingZero, 2);
assert_token("01", Token::ErrorNumberLiteralLeadingZero, 2);
assert_token("-01", Token::ErrorNumberLiteralLeadingZero, 3);
assert_error("+1", 1);
assert_token("01.23", Token::ErrorNumberLiteralLeadingZero, 5);
assert_token("1.", Token::ErrorNumberLiteralTrailingInvalid, 2);
assert_token("1e", Token::ErrorNumberLiteralTrailingInvalid, 2);
assert_token("1.e1", Token::ErrorNumberLiteralTrailingInvalid, 2);
assert_token("1.A", Token::ErrorNumberLiteralTrailingInvalid, 2);
assert_error("-A", 1);
assert_token("1.0e", Token::ErrorNumberLiteralTrailingInvalid, 4);
assert_token("1.0eA", Token::ErrorNumberLiteralTrailingInvalid, 4);
assert_token("1.2e3e", Token::ErrorNumberLiteralTrailingInvalid, 6);
assert_token("1.2e3.4", Token::ErrorNumberLiteralTrailingInvalid, 6);
assert_token("1.23.4", Token::ErrorNumberLiteralTrailingInvalid, 5);
assert_token(".123", Token::ErrorFloatLiteralMissingZero, 4);
assert_token("1.23.{}", Token::ErrorNumberLiteralTrailingInvalid, 5);
assert_token("1.23. {}", Token::ErrorNumberLiteralTrailingInvalid, 5);
assert_token("1.23. []", Token::ErrorNumberLiteralTrailingInvalid, 5);
assert_token("1.23. foo", Token::ErrorNumberLiteralTrailingInvalid, 5);
assert_token("1.23. $foo", Token::ErrorNumberLiteralTrailingInvalid, 5);
}
#[test]
fn test_lexing() {
let input = "
query EmptyQuery($id: ID!) {
node(id: $id) {
id @skip(if: false)
...E1
}
}
";
let mut lexer = Token::lexer(input);
assert_eq!(lexer.next(), Some(Ok(Token::Query)));
assert_eq!(lexer.slice(), "query");
assert_eq!(lexer.next(), Some(Ok(Token::Identifier("EmptyQuery"))));
assert_eq!(lexer.slice(), "EmptyQuery");
assert_eq!(lexer.next(), Some(Ok(Token::OpenParen)));
assert_eq!(lexer.slice(), "(");
assert_eq!(lexer.next(), Some(Ok(Token::Dollar)));
assert_eq!(lexer.slice(), "$");
assert_eq!(lexer.next(), Some(Ok(Token::Identifier("id"))));
assert_eq!(lexer.slice(), "id");
assert_eq!(lexer.next(), Some(Ok(Token::Colon)));
assert_eq!(lexer.slice(), ":");
assert_eq!(lexer.next(), Some(Ok(Token::Identifier("ID"))));
assert_eq!(lexer.slice(), "ID");
assert_eq!(lexer.next(), Some(Ok(Token::Exclamation)));
assert_eq!(lexer.slice(), "!");
assert_eq!(lexer.next(), Some(Ok(Token::CloseParen)));
assert_eq!(lexer.slice(), ")");
assert_eq!(lexer.next(), Some(Ok(Token::OpenBrace)));
assert_eq!(lexer.slice(), "{");
assert_eq!(lexer.next(), Some(Ok(Token::Identifier("node"))));
assert_eq!(lexer.slice(), "node");
assert_eq!(lexer.next(), Some(Ok(Token::OpenParen)));
assert_eq!(lexer.slice(), "(");
assert_eq!(lexer.next(), Some(Ok(Token::Identifier("id"))));
assert_eq!(lexer.slice(), "id");
assert_eq!(lexer.next(), Some(Ok(Token::Colon)));
assert_eq!(lexer.slice(), ":");
assert_eq!(lexer.next(), Some(Ok(Token::Dollar)));
assert_eq!(lexer.slice(), "$");
assert_eq!(lexer.next(), Some(Ok(Token::Identifier("id"))));
assert_eq!(lexer.slice(), "id");
assert_eq!(lexer.next(), Some(Ok(Token::CloseParen)));
assert_eq!(lexer.slice(), ")");
assert_eq!(lexer.next(), Some(Ok(Token::OpenBrace)));
assert_eq!(lexer.slice(), "{");
assert_eq!(lexer.next(), Some(Ok(Token::Identifier("id"))));
assert_eq!(lexer.slice(), "id");
assert_eq!(lexer.next(), Some(Ok(Token::At)));
assert_eq!(lexer.slice(), "@");
assert_eq!(lexer.next(), Some(Ok(Token::Identifier("skip"))));
assert_eq!(lexer.slice(), "skip");
assert_eq!(lexer.next(), Some(Ok(Token::OpenParen)));
assert_eq!(lexer.slice(), "(");
assert_eq!(lexer.next(), Some(Ok(Token::Identifier("if"))));
assert_eq!(lexer.slice(), "if");
assert_eq!(lexer.next(), Some(Ok(Token::Colon)));
assert_eq!(lexer.slice(), ":");
assert_eq!(lexer.next(), Some(Ok(Token::False)));
assert_eq!(lexer.slice(), "false");
assert_eq!(lexer.next(), Some(Ok(Token::CloseParen)));
assert_eq!(lexer.slice(), ")");
assert_eq!(lexer.next(), Some(Ok(Token::Spread)));
assert_eq!(lexer.slice(), "...");
assert_eq!(lexer.next(), Some(Ok(Token::Identifier("E1"))));
assert_eq!(lexer.slice(), "E1");
assert_eq!(lexer.next(), Some(Ok(Token::CloseBrace)));
assert_eq!(lexer.slice(), "}");
assert_eq!(lexer.next(), Some(Ok(Token::CloseBrace)));
assert_eq!(lexer.slice(), "}");
assert_eq!(lexer.next(), None);
}
#[test]
fn test_string_lexing() {
let input = r#"
"test"
"escaped \" quote"
"unterminated
"
"#;
let mut lexer = Token::lexer(input);
assert_eq!(lexer.next(), Some(Ok(Token::StringLiteral("\"test\""))));
assert_eq!(lexer.slice(), "\"test\"");
assert_eq!(
lexer.next(),
Some(Ok(Token::StringLiteral(r#""escaped \" quote""#)))
);
assert_eq!(lexer.slice(), r#""escaped \" quote""#);
assert_eq!(lexer.next(), Some(Err(())));
assert_eq!(
lexer.extras.error_token,
Some(Token::ErrorUnterminatedString)
);
assert_eq!(lexer.slice(), "\"unterminated");
}
#[test]
fn test_invalid_character_lexing() {
let input = r#"
{
%%%
__typename
*
}
"#;
let mut lexer = Token::lexer(input);
assert_eq!(lexer.next(), Some(Ok(Token::OpenBrace)));
assert_eq!(lexer.slice(), "{");
assert_eq!(lexer.next(), Some(Err(())));
assert_eq!(lexer.slice(), "%");
assert_eq!(lexer.next(), Some(Err(())));
assert_eq!(lexer.slice(), "%");
assert_eq!(lexer.next(), Some(Err(())));
assert_eq!(lexer.slice(), "%");
assert_eq!(lexer.next(), Some(Ok(Token::Identifier("__typename"))));
assert_eq!(lexer.slice(), "__typename");
assert_eq!(lexer.next(), Some(Err(())));
assert_eq!(lexer.slice(), "*");
assert_eq!(lexer.next(), Some(Ok(Token::CloseBrace)));
assert_eq!(lexer.slice(), "}");
assert_eq!(lexer.next(), None);
}
#[test]
fn test_block_string_lexing() {
let input = r#"
# escaped
"""tes\"""t"""
# empty
""""""
# 2 quotes in a string
""""" """
"""
multi-
line
"""
"""unterminated
"#;
let mut lexer = Token::lexer(input);
assert_eq!(
lexer.next(),
Some(Ok(Token::BlockStringLiteral(r#""""tes\"""t""""#)))
);
assert_eq!(lexer.slice(), r#""""tes\"""t""""#);
assert_eq!(
lexer.next(),
Some(Ok(Token::BlockStringLiteral(r#""""""""#)))
);
assert_eq!(lexer.slice(), r#""""""""#);
assert_eq!(
lexer.next(),
Some(Ok(Token::BlockStringLiteral(r#"""""" """"#)))
);
assert_eq!(lexer.slice(), r#"""""" """"#);
assert_eq!(
lexer.next(),
Some(Ok(Token::BlockStringLiteral(
r#""""
multi-
line
""""#
)))
);
assert_eq!(
lexer.slice(),
r#""""
multi-
line
""""#
);
assert_eq!(lexer.next(), Some(Err(())));
assert_eq!(
lexer.extras.error_token,
Some(Token::ErrorUnterminatedBlockString)
);
assert_eq!(lexer.slice(), r#"""""#);
}
#[test]
fn test_bom_lexing() {
let input = "\u{feff}";
let mut lexer = Token::lexer(input);
assert_eq!(lexer.next(), None);
}
}