use crate::{
ast::LuaVersion,
visitors::{Visit, VisitMut, Visitor, VisitorMut},
ShortString,
};
use super::{Lexer, LexerResult};
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use std::{
cmp::Ordering,
fmt::{self, Display},
};
macro_rules! symbol {
{
$(#[$symbol_meta:meta])*
pub enum Symbol {
$(
$(#[$meta:meta])*
$([$($version:ident)|+])? $name:ident => $string:literal,
)+
}
} => {
paste::paste! {
$(#[$symbol_meta])*
pub enum Symbol {
$(
$(#[$meta])*
$(
#[cfg(any(
$(feature = "" $version),+
))]
)*
#[cfg_attr(feature = "serde", serde(rename = $string))]
$name,
)+
}
impl Symbol {
#[allow(unused)] pub fn from_str(symbol: &str, lua_version: LuaVersion) -> Option<Self> {
match symbol {
$(
$(
#[cfg(any(
$(feature = "" $version),+
))]
)?
$string => {
if !crate::has_version!(lua_version, $($($version,)+)?) {
return None;
}
Some(Self::$name)
},
)+
_ => None,
}
}
}
impl Display for Symbol {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
$(
$(
#[cfg(any(
$(feature = "" $version),+
))]
)*
Self::$name => f.write_str($string),
)+
}
}
}
}
};
}
symbol! {
#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
#[non_exhaustive]
#[allow(missing_docs)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Symbol {
And => "and",
Break => "break",
Do => "do",
Else => "else",
ElseIf => "elseif",
End => "end",
False => "false",
For => "for",
Function => "function",
If => "if",
In => "in",
Local => "local",
Nil => "nil",
Not => "not",
Or => "or",
Repeat => "repeat",
Return => "return",
Then => "then",
True => "true",
Until => "until",
While => "while",
[lua52 | luajit] Goto => "goto",
[luau | cfxlua] PlusEqual => "+=",
[luau | cfxlua] MinusEqual => "-=",
[luau | cfxlua] StarEqual => "*=",
[luau | cfxlua] SlashEqual => "/=",
[luau] DoubleSlashEqual => "//=",
[luau] PercentEqual => "%=",
[luau | cfxlua] CaretEqual => "^=",
[luau] TwoDotsEqual => "..=",
[luau | lua53] Ampersand => "&",
[luau] ThinArrow => "->",
[luau | lua52 | luajit] TwoColons => "::",
[luau] AtSign => "@",
[cfxlua] DoubleLessThanEqual => "<<=",
[cfxlua] DoubleGreaterThanEqual => ">>=",
[cfxlua] AmpersandEqual => "&=",
[cfxlua] PipeEqual => "|=",
[cfxlua] QuestionMarkDot => "?.",
Caret => "^",
Colon => ":",
Comma => ",",
Dot => ".",
TwoDots => "..",
Ellipsis => "...",
Equal => "=",
TwoEqual => "==",
GreaterThan => ">",
GreaterThanEqual => ">=",
[lua53] DoubleGreaterThan => ">>",
Hash => "#",
LeftBrace => "{",
LeftBracket => "[",
LeftParen => "(",
LessThan => "<",
LessThanEqual => "<=",
[lua53] DoubleLessThan => "<<",
Minus => "-",
Percent => "%",
[luau | lua53] Pipe => "|",
Plus => "+",
[luau] QuestionMark => "?",
RightBrace => "}",
RightBracket => "]",
RightParen => ")",
Semicolon => ";",
Slash => "/",
[luau | lua53] DoubleSlash => "//",
Star => "*",
[lua53] Tilde => "~",
TildeEqual => "~=",
}
}
#[cfg(feature = "luau")]
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum InterpolatedStringKind {
Begin,
Middle,
End,
Simple,
}
#[derive(Clone, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
pub enum TokenizerErrorType {
UnclosedComment,
UnclosedString,
InvalidNumber,
UnexpectedToken(char),
InvalidSymbol(String),
}
impl fmt::Display for TokenizerErrorType {
fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
TokenizerErrorType::UnclosedComment => "unclosed comment".fmt(formatter),
TokenizerErrorType::UnclosedString => "unclosed string".fmt(formatter),
TokenizerErrorType::UnexpectedToken(character) => {
write!(formatter, "unexpected character {character}")
}
TokenizerErrorType::InvalidNumber => "invalid number".fmt(formatter),
TokenizerErrorType::InvalidSymbol(symbol) => {
write!(formatter, "invalid symbol {symbol}")
}
}
}
}
fn is_usize_zero(input: &usize) -> bool {
*input == 0
}
#[derive(Clone, Debug, Eq, PartialEq)]
#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
#[cfg_attr(feature = "serde", serde(tag = "type"))]
#[non_exhaustive]
pub enum TokenType {
Eof,
Identifier {
identifier: ShortString,
},
MultiLineComment {
blocks: usize,
comment: ShortString,
},
Number {
text: ShortString,
},
Shebang {
line: ShortString,
},
SingleLineComment {
comment: ShortString,
},
StringLiteral {
literal: ShortString,
#[cfg_attr(feature = "serde", serde(skip_serializing_if = "is_usize_zero"))]
multi_line_depth: usize,
quote_type: StringLiteralQuoteType,
},
Symbol {
symbol: Symbol,
},
Whitespace {
characters: ShortString,
},
#[cfg(feature = "luau")]
InterpolatedString {
literal: ShortString,
kind: InterpolatedStringKind,
},
#[cfg(feature = "cfxlua")]
CStyleComment {
comment: ShortString,
},
}
impl TokenType {
pub fn is_trivia(&self) -> bool {
#[cfg(not(feature = "cfxlua"))]
return matches!(
self,
TokenType::Shebang { .. }
| TokenType::SingleLineComment { .. }
| TokenType::MultiLineComment { .. }
| TokenType::Whitespace { .. }
);
#[cfg(feature = "cfxlua")]
return matches!(
self,
TokenType::Shebang { .. }
| TokenType::SingleLineComment { .. }
| TokenType::MultiLineComment { .. }
| TokenType::Whitespace { .. }
| TokenType::CStyleComment { .. }
);
}
pub fn kind(&self) -> TokenKind {
match self {
TokenType::Eof => TokenKind::Eof,
TokenType::Identifier { .. } => TokenKind::Identifier,
TokenType::MultiLineComment { .. } => TokenKind::MultiLineComment,
TokenType::Number { .. } => TokenKind::Number,
TokenType::Shebang { .. } => TokenKind::Shebang,
TokenType::SingleLineComment { .. } => TokenKind::SingleLineComment,
TokenType::StringLiteral { .. } => TokenKind::StringLiteral,
TokenType::Symbol { .. } => TokenKind::Symbol,
TokenType::Whitespace { .. } => TokenKind::Whitespace,
#[cfg(feature = "luau")]
TokenType::InterpolatedString { .. } => TokenKind::InterpolatedString,
#[cfg(feature = "cfxlua")]
TokenType::CStyleComment { .. } => TokenKind::CStyleComment,
}
}
pub fn spaces(spaces: usize) -> Self {
TokenType::Whitespace {
characters: " ".repeat(spaces).into(),
}
}
pub fn tabs(tabs: usize) -> Self {
TokenType::Whitespace {
characters: "\t".repeat(tabs).into(),
}
}
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
#[non_exhaustive]
pub enum TokenKind {
Eof,
Identifier,
MultiLineComment,
Number,
Shebang,
SingleLineComment,
StringLiteral,
Symbol,
Whitespace,
#[cfg(feature = "luau")]
InterpolatedString,
#[cfg(feature = "cfxlua")]
CStyleComment,
}
#[derive(Clone, Debug, Eq, PartialEq)]
#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
pub struct Token {
pub(crate) start_position: Position,
pub(crate) end_position: Position,
pub(crate) token_type: TokenType,
}
impl Token {
pub fn new(token_type: TokenType) -> Token {
Token {
start_position: Position::default(),
end_position: Position::default(),
token_type,
}
}
pub fn start_position(&self) -> Position {
self.start_position
}
pub fn end_position(&self) -> Position {
self.end_position
}
pub fn token_type(&self) -> &TokenType {
&self.token_type
}
pub fn token_kind(&self) -> TokenKind {
self.token_type().kind()
}
}
impl fmt::Display for Token {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
use self::TokenType::*;
match self.token_type() {
Eof => Ok(()),
Number { text } => text.fmt(formatter),
Identifier { identifier } => identifier.fmt(formatter),
MultiLineComment { blocks, comment } => {
write!(formatter, "--[{0}[{1}]{0}]", "=".repeat(*blocks), comment)
}
Shebang { line } => line.fmt(formatter),
SingleLineComment { comment } => write!(formatter, "--{comment}"),
StringLiteral {
literal,
multi_line_depth,
quote_type,
} => {
if *quote_type == StringLiteralQuoteType::Brackets {
write!(
formatter,
"[{0}[{1}]{0}]",
"=".repeat(*multi_line_depth),
literal
)
} else {
write!(formatter, "{0}{1}{0}", quote_type.to_string(), literal)
}
}
Symbol { symbol } => symbol.fmt(formatter),
Whitespace { characters } => characters.fmt(formatter),
#[cfg(feature = "luau")]
InterpolatedString { literal, kind } => match kind {
InterpolatedStringKind::Begin => {
write!(formatter, "`{literal}{{")
}
InterpolatedStringKind::Middle => {
write!(formatter, "}}{literal}{{")
}
InterpolatedStringKind::End => {
write!(formatter, "}}{literal}`")
}
InterpolatedStringKind::Simple => {
write!(formatter, "`{literal}`")
}
},
#[cfg(feature = "cfxlua")]
CStyleComment { comment } => write!(formatter, "/*{comment}*/"),
}
}
}
impl Ord for Token {
fn cmp(&self, other: &Self) -> Ordering {
self.start_position().cmp(&other.start_position())
}
}
impl PartialOrd for Token {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Visit for Token {
fn visit<V: Visitor>(&self, visitor: &mut V) {
visitor.visit_token(self);
match self.token_kind() {
TokenKind::Eof => {}
TokenKind::Identifier => visitor.visit_identifier(self),
TokenKind::MultiLineComment => visitor.visit_multi_line_comment(self),
TokenKind::Number => visitor.visit_number(self),
TokenKind::Shebang => {}
TokenKind::SingleLineComment => visitor.visit_single_line_comment(self),
TokenKind::StringLiteral => visitor.visit_string_literal(self),
TokenKind::Symbol => visitor.visit_symbol(self),
TokenKind::Whitespace => visitor.visit_whitespace(self),
#[cfg(feature = "luau")]
TokenKind::InterpolatedString => visitor.visit_interpolated_string_segment(self),
#[cfg(feature = "cfxlua")]
TokenKind::CStyleComment => visitor.visit_c_style_comment(self),
}
}
}
impl VisitMut for Token {
fn visit_mut<V: VisitorMut>(self, visitor: &mut V) -> Self {
let token = visitor.visit_token(self);
match token.token_kind() {
TokenKind::Eof => token,
TokenKind::Identifier => visitor.visit_identifier(token),
TokenKind::MultiLineComment => visitor.visit_multi_line_comment(token),
TokenKind::Number => visitor.visit_number(token),
TokenKind::Shebang => token,
TokenKind::SingleLineComment => visitor.visit_single_line_comment(token),
TokenKind::StringLiteral => visitor.visit_string_literal(token),
TokenKind::Symbol => visitor.visit_symbol(token),
TokenKind::Whitespace => visitor.visit_whitespace(token),
#[cfg(feature = "luau")]
TokenKind::InterpolatedString => visitor.visit_interpolated_string_segment(token),
#[cfg(feature = "cfxlua")]
TokenKind::CStyleComment => visitor.visit_c_style_comment(token),
}
}
}
#[derive(Clone, Debug)]
#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
pub struct TokenReference {
pub(crate) leading_trivia: Vec<Token>,
pub(crate) token: Token,
pub(crate) trailing_trivia: Vec<Token>,
}
impl TokenReference {
pub fn new(leading_trivia: Vec<Token>, token: Token, trailing_trivia: Vec<Token>) -> Self {
Self {
leading_trivia,
token,
trailing_trivia,
}
}
pub fn symbol(text: &str) -> Result<Self, TokenizerErrorType> {
TokenReference::symbol_specific_lua_version(text, LuaVersion::new())
}
pub(crate) fn basic_symbol(text: &str) -> Self {
TokenReference::symbol_specific_lua_version(text, LuaVersion::lua51()).unwrap()
}
#[cfg_attr(
feature = "lua52",
doc = r##"
```rust
# use full_moon::tokenizer::{Symbol, TokenReference, TokenType, TokenizerErrorType};
# use full_moon::LuaVersion;
# fn main() -> Result<(), Box<TokenizerErrorType>> {
assert!(TokenReference::symbol_specific_lua_version("goto", LuaVersion::lua51()).is_err());
assert!(TokenReference::symbol_specific_lua_version("goto", LuaVersion::lua52()).is_ok());
# Ok(())
# }
"##
)]
pub fn symbol_specific_lua_version(
text: &str,
lua_version: LuaVersion,
) -> Result<Self, TokenizerErrorType> {
let mut lexer = Lexer::new_lazy(text, lua_version);
let mut leading_trivia = Vec::new();
let symbol;
loop {
match lexer.process_next() {
Some(LexerResult::Ok(
token @ Token {
token_type: TokenType::Whitespace { .. },
..
},
)) => {
leading_trivia.push(token);
}
Some(LexerResult::Ok(
token @ Token {
token_type: TokenType::Symbol { .. },
..
},
)) => {
symbol = token;
break;
}
Some(LexerResult::Ok(Token {
token_type: TokenType::Eof,
..
})) => {
return Err(TokenizerErrorType::InvalidSymbol(text.to_owned()));
}
Some(LexerResult::Ok(token)) => {
return Err(TokenizerErrorType::UnexpectedToken(
token.to_string().chars().next().unwrap(),
));
}
Some(LexerResult::Fatal(mut errors) | LexerResult::Recovered(_, mut errors)) => {
return Err(errors.remove(0).error);
}
None => unreachable!("we shouldn't have hit eof"),
}
}
let mut trailing_trivia = Vec::new();
loop {
match lexer.process_next() {
Some(LexerResult::Ok(
token @ Token {
token_type: TokenType::Whitespace { .. },
..
},
)) => {
trailing_trivia.push(token);
}
Some(LexerResult::Ok(Token {
token_type: TokenType::Eof,
..
})) => {
break;
}
Some(LexerResult::Ok(token)) => {
return Err(TokenizerErrorType::UnexpectedToken(
token.to_string().chars().next().unwrap(),
));
}
Some(LexerResult::Fatal(mut errors) | LexerResult::Recovered(_, mut errors)) => {
return Err(errors.remove(0).error);
}
None => {
unreachable!("we shouldn't have hit eof");
}
}
}
Ok(TokenReference {
leading_trivia,
token: symbol,
trailing_trivia,
})
}
pub fn token(&self) -> &Token {
&self.token
}
pub fn leading_trivia(&self) -> impl Iterator<Item = &Token> {
self.leading_trivia.iter()
}
pub fn trailing_trivia(&self) -> impl Iterator<Item = &Token> {
self.trailing_trivia.iter()
}
pub fn with_token(&self, token: Token) -> Self {
Self {
token,
leading_trivia: self.leading_trivia.clone(),
trailing_trivia: self.trailing_trivia.clone(),
}
}
pub fn is_symbol(&self, symbol: Symbol) -> bool {
self.token.token_type() == &TokenType::Symbol { symbol }
}
}
impl std::borrow::Borrow<Token> for &TokenReference {
fn borrow(&self) -> &Token {
self
}
}
impl std::ops::Deref for TokenReference {
type Target = Token;
fn deref(&self) -> &Self::Target {
&self.token
}
}
impl fmt::Display for TokenReference {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
for trivia in &self.leading_trivia {
trivia.fmt(formatter)?;
}
self.token.fmt(formatter)?;
for trivia in &self.trailing_trivia {
trivia.fmt(formatter)?;
}
Ok(())
}
}
impl PartialEq<Self> for TokenReference {
fn eq(&self, other: &Self) -> bool {
(**self).eq(other)
&& self.leading_trivia == other.leading_trivia
&& self.trailing_trivia == other.trailing_trivia
}
}
impl Eq for TokenReference {}
impl Ord for TokenReference {
fn cmp(&self, other: &Self) -> Ordering {
(**self).cmp(&**other)
}
}
impl PartialOrd for TokenReference {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Visit for TokenReference {
fn visit<V: Visitor>(&self, visitor: &mut V) {
visitor.visit_token(self);
if matches!(self.token().token_kind(), TokenKind::Eof) {
visitor.visit_eof(self);
}
self.leading_trivia.visit(visitor);
self.token.visit(visitor);
self.trailing_trivia.visit(visitor);
}
}
impl VisitMut for TokenReference {
fn visit_mut<V: VisitorMut>(self, visitor: &mut V) -> Self {
let mut token_reference = visitor.visit_token_reference(self);
if matches!(token_reference.token().token_kind(), TokenKind::Eof) {
token_reference = visitor.visit_eof(token_reference);
}
token_reference.leading_trivia = token_reference.leading_trivia.visit_mut(visitor);
token_reference.token = token_reference.token.visit_mut(visitor);
token_reference.trailing_trivia = token_reference.trailing_trivia.visit_mut(visitor);
token_reference
}
}
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
pub struct Position {
pub(crate) bytes: usize,
pub(crate) line: usize,
pub(crate) character: usize,
}
impl Position {
pub fn bytes(self) -> usize {
self.bytes
}
pub fn character(self) -> usize {
self.character
}
pub fn line(self) -> usize {
self.line
}
}
impl Ord for Position {
fn cmp(&self, other: &Self) -> Ordering {
self.bytes.cmp(&other.bytes)
}
}
impl PartialOrd for Position {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
#[non_exhaustive]
pub enum StringLiteralQuoteType {
Brackets,
Double,
Single,
#[cfg(feature = "cfxlua")]
Backtick,
}
impl fmt::Display for StringLiteralQuoteType {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
match *self {
StringLiteralQuoteType::Brackets => Err(fmt::Error),
StringLiteralQuoteType::Double => "\"".fmt(formatter),
StringLiteralQuoteType::Single => "'".fmt(formatter),
#[cfg(feature = "cfxlua")]
StringLiteralQuoteType::Backtick => "`".fmt(formatter),
}
}
}
#[derive(Clone, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(Deserialize, Serialize))]
pub struct TokenizerError {
pub(crate) error: TokenizerErrorType,
pub(crate) range: (Position, Position),
}
impl TokenizerError {
pub fn error(&self) -> &TokenizerErrorType {
&self.error
}
pub fn position(&self) -> Position {
self.range.0
}
pub fn range(&self) -> (Position, Position) {
self.range
}
}
impl fmt::Display for TokenizerError {
fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
write!(
formatter,
"{} ({}:{} to {}:{})",
self.error,
self.range.0.line,
self.range.0.character,
self.range.1.line,
self.range.1.character
)
}
}
impl std::error::Error for TokenizerError {}
mod tests {
#[cfg(all(feature = "luau", feature = "lua53"))]
#[test]
fn test_token_symbol_create_double_slash_equal() {
use crate::tokenizer::{Symbol, TokenType};
use super::TokenReference;
assert!(matches!(
TokenReference::symbol("//=").unwrap().token().token_type(),
TokenType::Symbol {
symbol: Symbol::DoubleSlashEqual
}
))
}
}