use crate::common::SmartString;
use rustc_hash::FxHashSet;
use std::fmt;
use std::sync::LazyLock;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub struct Position {
pub offset: usize,
pub line: usize,
pub column: usize,
}
impl Position {
pub fn new(offset: usize, line: usize, column: usize) -> Self {
Self {
offset,
line,
column,
}
}
}
impl fmt::Display for Position {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "line {}, column {}", self.line, self.column)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum TokenType {
Error,
Eof,
Identifier,
Keyword,
String,
Integer,
Float,
Operator,
Punctuator,
Comment,
Date,
Time,
Timestamp,
Parameter,
}
impl fmt::Display for TokenType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
TokenType::Error => write!(f, "ERROR"),
TokenType::Eof => write!(f, "EOF"),
TokenType::Identifier => write!(f, "IDENTIFIER"),
TokenType::Keyword => write!(f, "KEYWORD"),
TokenType::String => write!(f, "STRING"),
TokenType::Integer => write!(f, "INTEGER"),
TokenType::Float => write!(f, "FLOAT"),
TokenType::Operator => write!(f, "OPERATOR"),
TokenType::Punctuator => write!(f, "PUNCTUATOR"),
TokenType::Comment => write!(f, "COMMENT"),
TokenType::Date => write!(f, "DATE"),
TokenType::Time => write!(f, "TIME"),
TokenType::Timestamp => write!(f, "TIMESTAMP"),
TokenType::Parameter => write!(f, "PARAMETER"),
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct Token {
pub token_type: TokenType,
pub literal: SmartString,
pub position: Position,
pub quoted: bool,
}
impl Token {
#[inline]
pub fn new(token_type: TokenType, literal: impl AsRef<str>, position: Position) -> Self {
Self {
token_type,
literal: SmartString::from(literal.as_ref()),
position,
quoted: false,
}
}
#[inline]
pub fn new_quoted(token_type: TokenType, literal: impl AsRef<str>, position: Position) -> Self {
Self {
token_type,
literal: SmartString::from(literal.as_ref()),
position,
quoted: true,
}
}
pub fn error(message: impl AsRef<str>, _literal: impl AsRef<str>, position: Position) -> Self {
Self {
token_type: TokenType::Error,
literal: SmartString::from(message.as_ref()),
position,
quoted: false,
}
}
#[inline]
pub fn eof(position: Position) -> Self {
Self {
token_type: TokenType::Eof,
literal: SmartString::const_new(""),
position,
quoted: false,
}
}
pub fn is_eof(&self) -> bool {
self.token_type == TokenType::Eof
}
pub fn is_error(&self) -> bool {
self.token_type == TokenType::Error
}
pub fn is_keyword(&self, keyword: &str) -> bool {
self.token_type == TokenType::Keyword && self.literal.eq_ignore_ascii_case(keyword)
}
pub fn is_operator(&self, op: &str) -> bool {
self.token_type == TokenType::Operator && self.literal == op
}
pub fn is_punctuator(&self, punct: &str) -> bool {
self.token_type == TokenType::Punctuator && self.literal == punct
}
}
impl fmt::Display for Token {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.token_type == TokenType::Error {
write!(
f,
"{}: {} at {}",
self.token_type, self.literal, self.position
)
} else if self.token_type == TokenType::Keyword {
write!(
f,
"{}: {} at {}",
self.token_type, self.literal, self.position
)
} else {
write!(
f,
"{}: '{}' at {}",
self.token_type, self.literal, self.position
)
}
}
}
pub static KEYWORDS: &[&str] = &[
"SELECT",
"FROM",
"WHERE",
"INSERT",
"INTO",
"VALUES",
"UPDATE",
"SET",
"DELETE",
"CREATE",
"TABLE",
"DROP",
"ALTER",
"ADD",
"COLUMN",
"AND",
"OR",
"XOR",
"NOT",
"NULL",
"PRIMARY",
"PRAGMA",
"KEY",
"AUTO_INCREMENT",
"AUTOINCREMENT",
"DEFAULT",
"AS",
"OF",
"DISTINCT",
"ORDER",
"BY",
"ASC",
"DESC",
"LIMIT",
"OFFSET",
"GROUP",
"HAVING",
"JOIN",
"INNER",
"OUTER",
"LEFT",
"RIGHT",
"FULL",
"ON",
"DUPLICATE",
"CONFLICT",
"DO",
"NOTHING",
"USING",
"CROSS",
"NATURAL",
"TRUE",
"FALSE",
"INTEGER",
"FLOAT",
"TEXT",
"BOOLEAN",
"BOOL",
"TIMESTAMP",
"TIMESTAMPTZ",
"DATETIME",
"DATE",
"TIME",
"JSON",
"VECTOR",
"CASE",
"CAST",
"EXTRACT",
"WHEN",
"THEN",
"ELSE",
"END",
"BETWEEN",
"IN",
"IS",
"LIKE",
"ILIKE",
"ESCAPE",
"GLOB",
"REGEXP",
"RLIKE",
"EXISTS",
"ALL",
"ANY",
"SOME",
"IF",
"UNION",
"INTERSECT",
"EXCEPT",
"WITH",
"UNIQUE",
"CHECK",
"FOREIGN",
"REFERENCES",
"SHOW",
"DESCRIBE",
"DESC",
"TABLES",
"VIEWS",
"INDEXES",
"CASCADE",
"RESTRICT",
"INDEX",
"VIEW",
"TRIGGER",
"PROCEDURE",
"FUNCTION",
"RETURNING",
"OVER",
"PARTITION",
"RANGE",
"ROWS",
"WINDOW",
"UNBOUNDED",
"BEGIN",
"TRANSACTION",
"COMMIT",
"ROLLBACK",
"SAVEPOINT",
"PRECEDING",
"FOLLOWING",
"CURRENT",
"ROW",
"MODIFY",
"RENAME",
"TO",
"VARCHAR",
"CHAR",
"STRING",
"BIGINT",
"TINYINT",
"SMALLINT",
"REAL",
"DOUBLE",
"DECIMAL",
"NUMERIC",
"INT",
"ISOLATION",
"LEVEL",
"READ",
"COMMITTED",
"UNCOMMITTED",
"INTERVAL",
"RECURSIVE",
"UNION",
"INTERSECT",
"EXCEPT",
"NULLS",
"FIRST",
"LAST",
"TRUNCATE",
"SOME",
"FILTER",
"RETURNING",
"EXPLAIN",
"ANALYZE",
"FETCH",
"NEXT",
"ONLY",
"VACUUM",
];
static KEYWORD_SET: LazyLock<FxHashSet<&'static str>> = LazyLock::new(|| {
let mut set = FxHashSet::with_capacity_and_hasher(KEYWORDS.len(), Default::default());
for kw in KEYWORDS {
set.insert(*kw);
}
set
});
#[inline]
pub fn is_keyword(s: &str) -> bool {
if KEYWORD_SET.contains(s) {
return true;
}
if s.len() <= 32 {
let mut buf = [0u8; 32];
let bytes = s.as_bytes();
for (i, &b) in bytes.iter().enumerate() {
buf[i] = b.to_ascii_uppercase();
}
let upper = unsafe { std::str::from_utf8_unchecked(&buf[..s.len()]) };
KEYWORD_SET.contains(upper)
} else {
let upper = s.to_uppercase();
KEYWORD_SET.contains(upper.as_str())
}
}
pub static OPERATORS: &[&str] = &[
"=", ">", "<", ">=", "<=", "<>", "!=", "+", "-", "*", "/", "%",
"||", "->", "->>", "#>", "#>>", "@>", "<@", "?", "?|", "?&", "&", "|", "^", "~", "<<", ">>", "<=>", ];
static OPERATOR_SET: LazyLock<FxHashSet<&'static str>> = LazyLock::new(|| {
let mut set = FxHashSet::with_capacity_and_hasher(OPERATORS.len(), Default::default());
for op in OPERATORS {
set.insert(*op);
}
set
});
#[inline]
pub fn is_operator(s: &str) -> bool {
OPERATOR_SET.contains(s)
}
pub static PUNCTUATORS: &[char] = &[',', ';', '(', ')', '.', ':', '[', ']'];
pub fn is_punctuator(c: char) -> bool {
PUNCTUATORS.contains(&c)
}
#[inline]
pub fn punctuator_str(c: char) -> Option<&'static str> {
match c {
',' => Some(","),
';' => Some(";"),
'(' => Some("("),
')' => Some(")"),
'.' => Some("."),
':' => Some(":"),
'[' => Some("["),
']' => Some("]"),
_ => None,
}
}
pub fn is_operator_char(c: char) -> bool {
matches!(
c,
'=' | '<'
| '>'
| '!'
| '+'
| '-'
| '*'
| '/'
| '%'
| '|'
| '&'
| '^'
| '~'
| '?'
| '@'
| ':'
)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_position_display() {
let pos = Position::new(10, 2, 5);
assert_eq!(pos.to_string(), "line 2, column 5");
}
#[test]
fn test_token_type_display() {
assert_eq!(TokenType::Keyword.to_string(), "KEYWORD");
assert_eq!(TokenType::Identifier.to_string(), "IDENTIFIER");
assert_eq!(TokenType::String.to_string(), "STRING");
assert_eq!(TokenType::Eof.to_string(), "EOF");
}
#[test]
fn test_token_creation() {
let token = Token::new(TokenType::Keyword, "SELECT", Position::new(0, 1, 1));
assert_eq!(token.token_type, TokenType::Keyword);
assert_eq!(token.literal, "SELECT");
assert!(token.is_keyword("SELECT"));
assert!(token.is_keyword("select"));
assert!(!token.is_keyword("FROM"));
}
#[test]
fn test_error_token() {
let token = Token::error("unexpected character", "x", Position::new(5, 1, 6));
assert!(token.is_error());
assert_eq!(token.literal.as_str(), "unexpected character");
}
#[test]
fn test_eof_token() {
let token = Token::eof(Position::new(100, 5, 10));
assert!(token.is_eof());
assert_eq!(token.literal, "");
}
#[test]
fn test_is_keyword() {
assert!(is_keyword("SELECT"));
assert!(is_keyword("select"));
assert!(is_keyword("Select"));
assert!(!is_keyword("SELEC"));
assert!(!is_keyword("mycolumn"));
}
#[test]
fn test_is_operator() {
assert!(is_operator("="));
assert!(is_operator(">="));
assert!(is_operator("->"));
assert!(is_operator("->>"));
assert!(!is_operator("==="));
}
#[test]
fn test_is_punctuator() {
assert!(is_punctuator(','));
assert!(is_punctuator(';'));
assert!(is_punctuator('('));
assert!(is_punctuator(')'));
assert!(!is_punctuator('x'));
}
#[test]
fn test_is_operator_char() {
assert!(is_operator_char('='));
assert!(is_operator_char('+'));
assert!(is_operator_char('-'));
assert!(is_operator_char('|'));
assert!(!is_operator_char('a'));
assert!(!is_operator_char('#')); }
#[test]
fn test_token_display() {
let keyword = Token::new(TokenType::Keyword, "SELECT", Position::new(0, 1, 1));
assert!(keyword.to_string().contains("KEYWORD: SELECT"));
let string = Token::new(TokenType::String, "hello", Position::new(7, 1, 8));
assert!(string.to_string().contains("STRING: 'hello'"));
let error = Token::error("bad token", "x", Position::new(0, 1, 1));
assert!(error.to_string().contains("ERROR: bad token"));
}
}