use std::sync::LazyLock;
use rustc_hash::FxHashSet;
use super::ast::*;
use super::error::{ParseError, ParseErrors};
use super::lexer::Lexer;
use super::precedence::Precedence;
use super::token::{Token, TokenType};
static RESERVED_KEYWORDS: LazyLock<FxHashSet<&'static str>> = LazyLock::new(|| {
[
"SELECT",
"FROM",
"WHERE",
"AND",
"OR",
"NOT",
"INSERT",
"INTO",
"VALUES",
"UPDATE",
"SET",
"DELETE",
"CREATE",
"DROP",
"TABLE",
"INDEX",
"VIEW",
"ALTER",
"ADD",
"PRIMARY",
"KEY",
"FOREIGN",
"REFERENCES",
"NULL",
"TRUE",
"FALSE",
"AS",
"ON",
"JOIN",
"INNER",
"OUTER",
"FULL",
"CROSS",
"GROUP",
"BY",
"ORDER",
"HAVING",
"LIMIT",
"OFFSET",
"UNION",
"INTERSECT",
"EXCEPT",
"CASE",
"WHEN",
"THEN",
"ELSE",
"END",
"DISTINCT",
"ALL",
"EXISTS",
"IN",
"BETWEEN",
"LIKE",
"GLOB",
"REGEXP",
"RLIKE",
"IS",
"ASC",
"DESC",
"NULLS",
"BEGIN",
"COMMIT",
"ROLLBACK",
"SAVEPOINT",
"IF",
"WITH",
"RECURSIVE",
]
.into_iter()
.collect()
});
pub struct Parser {
lexer: Lexer,
pub(crate) cur_token: Token,
pub(crate) peek_token: Token,
errors: Vec<ParseError>,
pub(crate) current_clause: String,
current_statement_id: usize,
parameter_counter: usize,
}
impl Parser {
pub fn new(input: &str) -> Self {
let mut lexer = Lexer::new(input);
let cur_token = lexer.next_token();
let peek_token = lexer.next_token();
Parser {
lexer,
cur_token,
peek_token,
errors: Vec::new(),
current_clause: String::new(),
current_statement_id: 0,
parameter_counter: 1,
}
}
pub fn parse_program(&mut self) -> Result<Program, ParseErrors> {
let mut statements = Vec::with_capacity(1);
while !self.cur_token_is(TokenType::Eof) {
if self.cur_token_is(TokenType::Comment) {
self.next_token();
continue;
}
if let Some(stmt) = self.parse_statement() {
statements.push(stmt);
}
while self.peek_token_is_punctuator(";") {
self.next_token();
}
self.next_token();
self.current_statement_id += 1;
self.parameter_counter = 1;
}
if !self.errors.is_empty() {
return Err(ParseErrors::from_errors(self.errors.clone()));
}
Ok(Program { statements })
}
pub(crate) fn next_token(&mut self) {
self.cur_token = std::mem::replace(&mut self.peek_token, self.lexer.next_token());
}
pub(crate) fn cur_token_is(&self, t: TokenType) -> bool {
self.cur_token.token_type == t
}
pub(crate) fn peek_token_is(&self, t: TokenType) -> bool {
self.peek_token.token_type == t
}
pub(crate) fn cur_token_is_identifier_like(&self) -> bool {
match self.cur_token.token_type {
TokenType::Identifier => true,
TokenType::Keyword => {
!Self::is_reserved_keyword(&self.cur_token.literal)
}
_ => false,
}
}
pub(crate) fn cur_token_as_column_identifier(&self) -> Identifier {
Identifier::new(self.cur_token.clone(), self.cur_token.literal.clone())
}
pub(crate) fn is_reserved_keyword(keyword: &str) -> bool {
RESERVED_KEYWORDS.contains(keyword.to_uppercase().as_str())
}
pub(crate) fn cur_token_is_keyword(&self, keyword: &str) -> bool {
self.cur_token.token_type == TokenType::Keyword
&& self.cur_token.literal.eq_ignore_ascii_case(keyword)
}
pub(crate) fn peek_token_is_keyword(&self, keyword: &str) -> bool {
self.peek_token.token_type == TokenType::Keyword
&& self.peek_token.literal.eq_ignore_ascii_case(keyword)
}
pub(crate) fn cur_token_is_punctuator(&self, punc: &str) -> bool {
self.cur_token.token_type == TokenType::Punctuator && self.cur_token.literal == punc
}
pub(crate) fn peek_token_is_punctuator(&self, punc: &str) -> bool {
self.peek_token.token_type == TokenType::Punctuator && self.peek_token.literal == punc
}
pub(crate) fn peek_token_is_operator(&self, op: &str) -> bool {
self.peek_token.token_type == TokenType::Operator && self.peek_token.literal == op
}
pub(crate) fn expect_peek(&mut self, t: TokenType) -> bool {
if self.peek_token_is(t) {
self.next_token();
true
} else {
self.peek_error(t);
false
}
}
pub(crate) fn expect_keyword(&mut self, keyword: &str) -> bool {
if self.peek_token_is_keyword(keyword) {
self.next_token();
true
} else {
self.add_error(format!(
"expected {} after {}, got {}",
keyword,
self.cur_token.literal,
Self::format_token_for_error(&self.peek_token)
));
false
}
}
pub(crate) fn peek_precedence(&self) -> Precedence {
match self.peek_token.token_type {
TokenType::Operator => Precedence::for_operator(&self.peek_token.literal),
TokenType::Keyword => Precedence::for_operator(&self.peek_token.literal),
TokenType::Punctuator => {
if self.peek_token.literal == "." {
Precedence::Dot
} else if self.peek_token.literal == "(" {
Precedence::Call
} else if self.peek_token.literal == "[" {
Precedence::Index
} else {
Precedence::Lowest
}
}
_ => Precedence::Lowest,
}
}
pub(crate) fn cur_precedence(&self) -> Precedence {
match self.cur_token.token_type {
TokenType::Operator => Precedence::for_operator(&self.cur_token.literal),
TokenType::Keyword => Precedence::for_operator(&self.cur_token.literal),
TokenType::Punctuator => {
if self.cur_token.literal == "." {
Precedence::Dot
} else if self.cur_token.literal == "(" {
Precedence::Call
} else if self.cur_token.literal == "[" {
Precedence::Index
} else {
Precedence::Lowest
}
}
_ => Precedence::Lowest,
}
}
pub(crate) fn peek_error(&mut self, expected: TokenType) {
let expected_desc = match expected {
TokenType::Identifier => "identifier (name)",
TokenType::Keyword => "keyword",
TokenType::Punctuator => "'(' or ')'",
TokenType::String => "string literal",
TokenType::Integer => "integer",
TokenType::Float => "number",
_ => "token",
};
if self.peek_token.token_type == TokenType::Eof {
if !self.current_clause.is_empty() {
self.add_error(format!(
"expected {} after {}",
expected_desc, self.current_clause
));
} else {
self.add_error(format!(
"unexpected end of input, expected {}",
expected_desc
));
}
} else if expected == TokenType::Identifier
&& self.peek_token.token_type == TokenType::Keyword
&& Self::is_reserved_keyword(&self.peek_token.literal)
{
self.add_error(format!(
"'{}' is a reserved keyword and cannot be used as an identifier. \
Use double quotes to escape it: \"{}\"",
self.peek_token.literal.to_uppercase(),
self.peek_token.literal
));
} else {
self.add_error(format!(
"expected {}, got {}",
expected_desc,
Self::format_token_for_error(&self.peek_token)
));
}
}
pub(crate) fn format_token_for_error(token: &Token) -> String {
if token.token_type == TokenType::Eof {
"end of input".to_string()
} else {
format!("'{}'", token.literal)
}
}
pub(crate) fn add_error(&mut self, msg: String) {
self.errors
.push(ParseError::new(msg, self.cur_token.position));
}
pub fn errors(&self) -> &[ParseError] {
&self.errors
}
pub(crate) fn next_parameter_index(&mut self) -> usize {
let idx = self.parameter_counter;
self.parameter_counter += 1;
idx
}
#[allow(dead_code)]
pub(crate) fn current_statement_id(&self) -> usize {
self.current_statement_id
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parser_creation() {
let parser = Parser::new("SELECT * FROM users");
assert!(parser.cur_token_is_keyword("SELECT"));
}
#[test]
fn test_next_token() {
let mut parser = Parser::new("SELECT * FROM users");
assert!(parser.cur_token_is_keyword("SELECT"));
parser.next_token();
assert!(parser.cur_token_is(TokenType::Operator));
assert_eq!(parser.cur_token.literal, "*");
}
#[test]
fn test_peek_token() {
let parser = Parser::new("SELECT * FROM users");
assert!(parser.cur_token_is_keyword("SELECT"));
assert!(parser.peek_token_is_operator("*"));
}
}