//! SQL Parser -- recursive-descent parser that converts a token stream into an AST.
//!
//! The central type is [`Parser`], which consumes tokens produced by the
//! [`Tokenizer`](crate::tokens::Tokenizer) and builds a tree of [`Expression`]
//! nodes covering the full SQL grammar: queries, DML, DDL, set operations,
//! window functions, CTEs, and dialect-specific extensions for 30+ databases.
//!
//! The simplest entry point is [`Parser::parse_sql`], which tokenizes and
//! parses a SQL string in one call.
//!
//! # Static configuration maps
//!
//! This module also exports several `LazyLock<HashSet<TokenType>>` constants
//! (ported from Python sqlglot's `parser.py`) that classify token types:
//!
//! - [`TYPE_TOKENS`] -- all tokens that represent SQL data types
//! - [`NESTED_TYPE_TOKENS`] -- parametric types like `ARRAY`, `MAP`, `STRUCT`
//! - [`RESERVED_TOKENS`] -- tokens that cannot be used as unquoted identifiers
//! - [`NO_PAREN_FUNCTIONS`] / [`NO_PAREN_FUNCTION_NAMES`] -- zero-argument
//! functions that may be written without parentheses (e.g. `CURRENT_DATE`)
//! - [`DB_CREATABLES`] -- object kinds valid after `CREATE` (TABLE, VIEW, etc.)
//! - [`SUBQUERY_PREDICATES`] -- tokens introducing subquery predicates (ANY, ALL, EXISTS)
use crate::error::{Error, Result};
use crate::expressions::*;
use crate::tokens::{Span, Token, TokenType, Tokenizer, TokenizerConfig};
use std::collections::HashSet;
use std::sync::LazyLock;
// =============================================================================
// Parser Configuration Maps (ported from Python SQLGlot parser.py)
// =============================================================================
/// NO_PAREN_FUNCTIONS: Functions that can be called without parentheses
/// Maps TokenType to the function name for generation
/// Python: NO_PAREN_FUNCTIONS = {TokenType.CURRENT_DATE: exp.CurrentDate, ...}
pub static NO_PAREN_FUNCTIONS: LazyLock<HashSet<TokenType>> = LazyLock::new(|| {
let mut set = HashSet::new();
set.insert(TokenType::CurrentDate);
set.insert(TokenType::CurrentDateTime);
set.insert(TokenType::CurrentTime);
set.insert(TokenType::CurrentTimestamp);
set.insert(TokenType::CurrentUser);
set.insert(TokenType::CurrentRole);
set.insert(TokenType::CurrentSchema);
set.insert(TokenType::CurrentCatalog);
// Additional no-paren functions (from tokens.rs)
set.insert(TokenType::LocalTime);
set.insert(TokenType::LocalTimestamp);
set.insert(TokenType::SysTimestamp);
set.insert(TokenType::UtcDate);
set.insert(TokenType::UtcTime);
set.insert(TokenType::UtcTimestamp);
set.insert(TokenType::SessionUser);
set
});
/// NO_PAREN_FUNCTION_NAMES: String names that can be no-paren functions
/// These are often tokenized as Var/Identifier instead of specific TokenTypes
pub static NO_PAREN_FUNCTION_NAMES: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
crate::function_registry::NO_PAREN_FUNCTION_NAME_LIST
.iter()
.copied()
.collect()
});
/// STRUCT_TYPE_TOKENS: Tokens that represent struct-like types
/// Python: STRUCT_TYPE_TOKENS = {TokenType.FILE, TokenType.NESTED, TokenType.OBJECT, ...}
pub static STRUCT_TYPE_TOKENS: LazyLock<HashSet<TokenType>> = LazyLock::new(|| {
let mut set = HashSet::new();
set.insert(TokenType::File);
set.insert(TokenType::Nested);
set.insert(TokenType::Object);
set.insert(TokenType::Struct);
// Note: UNION is part of STRUCT_TYPE_TOKENS in Python but we handle it as a set operation
set
});
/// NESTED_TYPE_TOKENS: Tokens that can have nested type parameters
/// Python: NESTED_TYPE_TOKENS = {TokenType.ARRAY, TokenType.LIST, ...}
pub static NESTED_TYPE_TOKENS: LazyLock<HashSet<TokenType>> = LazyLock::new(|| {
let mut set = HashSet::new();
set.insert(TokenType::Array);
set.insert(TokenType::List);
set.insert(TokenType::LowCardinality);
set.insert(TokenType::Map);
set.insert(TokenType::Nullable);
set.insert(TokenType::Range);
// Include STRUCT_TYPE_TOKENS
set.insert(TokenType::File);
set.insert(TokenType::Nested);
set.insert(TokenType::Object);
set.insert(TokenType::Struct);
set
});
/// Check if an uppercased type name is a known SQL custom type that should stay uppercased.
/// Used to distinguish between known types like DATETIME2, SYSNAME etc. and user-defined types
/// like UserDefinedTableType that should preserve their original case.
fn convert_name_is_known_custom(name: &str) -> bool {
// Known SQL types that appear in the _ (default) branch of parse_data_type
// These should remain uppercased.
matches!(
name,
"DATETIME2"
| "DATETIMEOFFSET"
| "SMALLDATETIME"
| "DATETIME"
| "NVARCHAR2"
| "VARCHAR2"
| "NCHAR"
| "MONEY"
| "SMALLMONEY"
| "TINYINT"
| "MEDIUMINT"
| "BYTEINT"
| "SUPER"
| "HLLSKETCH"
| "TIMETZ"
| "TIMESTAMPTZ"
| "SYSNAME"
| "XML"
| "SQL_VARIANT"
| "HIERARCHYID"
| "ROWVERSION"
| "IMAGE"
| "CURSOR"
| "TABLE"
| "UNIQUEIDENTIFIER"
| "VARIANT"
| "OBJECT"
| "NUMBER"
| "BINARY_FLOAT"
| "BINARY_DOUBLE"
| "CLOB"
| "NCLOB"
| "RAW"
| "LONG"
| "MEDIUMTEXT"
| "LONGTEXT"
| "MEDIUMBLOB"
| "LONGBLOB"
| "TINYTEXT"
| "TINYBLOB"
| "INT2"
| "INT4"
| "INT8"
| "FLOAT4"
| "FLOAT8"
| "SERIAL"
| "BIGSERIAL"
| "SMALLSERIAL"
| "YEAR"
| "FIXED"
| "SIGNED"
| "UNSIGNED"
| "ROW"
| "BIT"
| "BOOLEAN"
| "BOOL"
| "TEXT"
| "STRING"
| "NTEXT"
| "INT128"
| "INT256"
| "UINT8"
| "UINT16"
| "UINT32"
| "UINT64"
| "UINT128"
| "UINT256"
| "FLOAT32"
| "FLOAT64"
| "LOWCARDINALITY"
| "NULLABLE"
| "IPADDRESS"
| "IPV4"
| "IPV6"
| "AGGREGATEFUNCTION"
| "SIMPLEAGGREGATEFUNCTION"
| "FIXEDSTRING"
| "RING"
| "NESTED"
)
}
/// ENUM_TYPE_TOKENS: Tokens that represent enum types
/// Python: ENUM_TYPE_TOKENS = {TokenType.DYNAMIC, TokenType.ENUM, ...}
pub static ENUM_TYPE_TOKENS: LazyLock<HashSet<TokenType>> = LazyLock::new(|| {
let mut set = HashSet::new();
set.insert(TokenType::Dynamic);
set.insert(TokenType::Enum);
set.insert(TokenType::Enum8);
set.insert(TokenType::Enum16);
set
});
/// AGGREGATE_TYPE_TOKENS: Tokens for aggregate function types (ClickHouse)
/// Python: AGGREGATE_TYPE_TOKENS = {TokenType.AGGREGATEFUNCTION, ...}
pub static AGGREGATE_TYPE_TOKENS: LazyLock<HashSet<TokenType>> = LazyLock::new(|| {
let mut set = HashSet::new();
set.insert(TokenType::AggregateFunction);
set.insert(TokenType::SimpleAggregateFunction);
set
});
/// TYPE_TOKENS: All tokens that represent data types
/// Python: TYPE_TOKENS = {TokenType.BIT, TokenType.BOOLEAN, ...}
pub static TYPE_TOKENS: LazyLock<HashSet<TokenType>> = LazyLock::new(|| {
let mut set = HashSet::new();
// Basic types
set.insert(TokenType::Bit);
set.insert(TokenType::Boolean);
// Integer types
set.insert(TokenType::TinyInt);
set.insert(TokenType::UTinyInt);
set.insert(TokenType::SmallInt);
set.insert(TokenType::USmallInt);
set.insert(TokenType::MediumInt);
set.insert(TokenType::UMediumInt);
set.insert(TokenType::Int);
set.insert(TokenType::UInt);
set.insert(TokenType::BigInt);
set.insert(TokenType::UBigInt);
set.insert(TokenType::BigNum);
set.insert(TokenType::Int128);
set.insert(TokenType::UInt128);
set.insert(TokenType::Int256);
set.insert(TokenType::UInt256);
// Floating point types
set.insert(TokenType::Float);
set.insert(TokenType::Double);
set.insert(TokenType::UDouble);
// Decimal types
set.insert(TokenType::Decimal);
set.insert(TokenType::Decimal32);
set.insert(TokenType::Decimal64);
set.insert(TokenType::Decimal128);
set.insert(TokenType::Decimal256);
set.insert(TokenType::DecFloat);
set.insert(TokenType::UDecimal);
set.insert(TokenType::BigDecimal);
// String types
set.insert(TokenType::Char);
set.insert(TokenType::NChar);
set.insert(TokenType::VarChar);
set.insert(TokenType::NVarChar);
set.insert(TokenType::BpChar);
set.insert(TokenType::Text);
set.insert(TokenType::MediumText);
set.insert(TokenType::LongText);
set.insert(TokenType::TinyText);
set.insert(TokenType::Name);
set.insert(TokenType::FixedString);
// Binary types
set.insert(TokenType::Binary);
set.insert(TokenType::VarBinary);
set.insert(TokenType::Blob);
set.insert(TokenType::MediumBlob);
set.insert(TokenType::LongBlob);
set.insert(TokenType::TinyBlob);
// Date/time types
set.insert(TokenType::Date);
set.insert(TokenType::Date32);
set.insert(TokenType::Time);
set.insert(TokenType::TimeTz);
set.insert(TokenType::TimeNs);
set.insert(TokenType::Timestamp);
set.insert(TokenType::TimestampTz);
set.insert(TokenType::TimestampLtz);
set.insert(TokenType::TimestampNtz);
set.insert(TokenType::TimestampS);
set.insert(TokenType::TimestampMs);
set.insert(TokenType::TimestampNs);
set.insert(TokenType::DateTime);
set.insert(TokenType::DateTime2);
set.insert(TokenType::DateTime64);
set.insert(TokenType::SmallDateTime);
set.insert(TokenType::Year);
set.insert(TokenType::Interval);
// JSON types
set.insert(TokenType::Json);
set.insert(TokenType::JsonB);
// UUID
set.insert(TokenType::Uuid);
// Spatial types
set.insert(TokenType::Geography);
set.insert(TokenType::GeographyPoint);
set.insert(TokenType::Geometry);
set.insert(TokenType::Point);
set.insert(TokenType::Ring);
set.insert(TokenType::LineString);
set.insert(TokenType::MultiLineString);
set.insert(TokenType::Polygon);
set.insert(TokenType::MultiPolygon);
// Range types (PostgreSQL)
set.insert(TokenType::Int4Range);
set.insert(TokenType::Int4MultiRange);
set.insert(TokenType::Int8Range);
set.insert(TokenType::Int8MultiRange);
set.insert(TokenType::NumRange);
set.insert(TokenType::NumMultiRange);
set.insert(TokenType::TsRange);
set.insert(TokenType::TsMultiRange);
set.insert(TokenType::TsTzRange);
set.insert(TokenType::TsTzMultiRange);
set.insert(TokenType::DateRange);
set.insert(TokenType::DateMultiRange);
// PostgreSQL special types
set.insert(TokenType::HllSketch);
set.insert(TokenType::HStore);
set.insert(TokenType::Serial);
set.insert(TokenType::SmallSerial);
set.insert(TokenType::BigSerial);
// XML
set.insert(TokenType::Xml);
// Other special types
set.insert(TokenType::Super);
set.insert(TokenType::PseudoType);
set.insert(TokenType::UserDefined);
set.insert(TokenType::Money);
set.insert(TokenType::SmallMoney);
set.insert(TokenType::RowVersion);
set.insert(TokenType::Image);
set.insert(TokenType::Variant);
set.insert(TokenType::Object);
set.insert(TokenType::ObjectIdentifier);
set.insert(TokenType::Inet);
set.insert(TokenType::IpAddress);
set.insert(TokenType::IpPrefix);
set.insert(TokenType::Ipv4);
set.insert(TokenType::Ipv6);
set.insert(TokenType::Unknown);
set.insert(TokenType::Null);
set.insert(TokenType::TDigest);
set.insert(TokenType::Vector);
set.insert(TokenType::Void);
// Include ENUM_TYPE_TOKENS
set.insert(TokenType::Dynamic);
set.insert(TokenType::Enum);
set.insert(TokenType::Enum8);
set.insert(TokenType::Enum16);
// Include NESTED_TYPE_TOKENS
set.insert(TokenType::Array);
set.insert(TokenType::List);
set.insert(TokenType::LowCardinality);
set.insert(TokenType::Map);
set.insert(TokenType::Nullable);
set.insert(TokenType::Range);
set.insert(TokenType::File);
set.insert(TokenType::Nested);
set.insert(TokenType::Struct);
// Include AGGREGATE_TYPE_TOKENS
set.insert(TokenType::AggregateFunction);
set.insert(TokenType::SimpleAggregateFunction);
set
});
/// SIGNED_TO_UNSIGNED_TYPE_TOKEN: Maps signed types to unsigned types
/// Python: SIGNED_TO_UNSIGNED_TYPE_TOKEN = {TokenType.BIGINT: TokenType.UBIGINT, ...}
pub static SIGNED_TO_UNSIGNED_TYPE_TOKEN: LazyLock<
std::collections::HashMap<TokenType, TokenType>,
> = LazyLock::new(|| {
let mut map = std::collections::HashMap::new();
map.insert(TokenType::BigInt, TokenType::UBigInt);
map.insert(TokenType::Int, TokenType::UInt);
map.insert(TokenType::MediumInt, TokenType::UMediumInt);
map.insert(TokenType::SmallInt, TokenType::USmallInt);
map.insert(TokenType::TinyInt, TokenType::UTinyInt);
map.insert(TokenType::Decimal, TokenType::UDecimal);
map.insert(TokenType::Double, TokenType::UDouble);
map
});
/// SUBQUERY_PREDICATES: Tokens that introduce subquery predicates
/// Python: SUBQUERY_PREDICATES = {TokenType.ANY: exp.Any, ...}
pub static SUBQUERY_PREDICATES: LazyLock<HashSet<TokenType>> = LazyLock::new(|| {
let mut set = HashSet::new();
set.insert(TokenType::Any);
set.insert(TokenType::All);
set.insert(TokenType::Exists);
set.insert(TokenType::Some);
set
});
/// DB_CREATABLES: Object types that can be created with CREATE
/// Python: DB_CREATABLES = {TokenType.DATABASE, TokenType.SCHEMA, ...}
pub static DB_CREATABLES: LazyLock<HashSet<TokenType>> = LazyLock::new(|| {
let mut set = HashSet::new();
set.insert(TokenType::Database);
set.insert(TokenType::Dictionary);
set.insert(TokenType::FileFormat);
set.insert(TokenType::Model);
set.insert(TokenType::Namespace);
set.insert(TokenType::Schema);
set.insert(TokenType::SemanticView);
set.insert(TokenType::Sequence);
set.insert(TokenType::Sink);
set.insert(TokenType::Source);
set.insert(TokenType::Stage);
set.insert(TokenType::StorageIntegration);
set.insert(TokenType::Streamlit);
set.insert(TokenType::Table);
set.insert(TokenType::Tag);
set.insert(TokenType::View);
set.insert(TokenType::Warehouse);
set
});
/// RESERVED_TOKENS: Tokens that cannot be used as identifiers without quoting
/// These are typically structural keywords that affect query parsing
pub static RESERVED_TOKENS: LazyLock<HashSet<TokenType>> = LazyLock::new(|| {
let mut set = HashSet::new();
// Query structure keywords
set.insert(TokenType::Select);
set.insert(TokenType::From);
set.insert(TokenType::Where);
set.insert(TokenType::GroupBy);
set.insert(TokenType::OrderBy);
set.insert(TokenType::Having);
set.insert(TokenType::Limit);
set.insert(TokenType::Offset);
set.insert(TokenType::Union);
set.insert(TokenType::Intersect);
set.insert(TokenType::Except);
set.insert(TokenType::Join);
set.insert(TokenType::On);
set.insert(TokenType::With);
set.insert(TokenType::Into);
set.insert(TokenType::Values);
set.insert(TokenType::Set);
// DDL keywords
set.insert(TokenType::Create);
set.insert(TokenType::Drop);
set.insert(TokenType::Alter);
set.insert(TokenType::Truncate);
// DML keywords
set.insert(TokenType::Insert);
set.insert(TokenType::Update);
set.insert(TokenType::Delete);
set.insert(TokenType::Merge);
// Control flow
set.insert(TokenType::Case);
set.insert(TokenType::When);
set.insert(TokenType::Then);
set.insert(TokenType::Else);
set.insert(TokenType::End);
// Boolean operators
set.insert(TokenType::And);
set.insert(TokenType::Or);
set.insert(TokenType::Not);
// Comparison
set.insert(TokenType::In);
set.insert(TokenType::Is);
set.insert(TokenType::Between);
set.insert(TokenType::Like);
set.insert(TokenType::ILike);
set.insert(TokenType::Exists);
// Literals
set.insert(TokenType::Null);
set.insert(TokenType::True);
set.insert(TokenType::False);
// Punctuation tokens (these are always reserved)
set.insert(TokenType::LParen);
set.insert(TokenType::RParen);
set.insert(TokenType::LBracket);
set.insert(TokenType::RBracket);
set.insert(TokenType::LBrace);
set.insert(TokenType::RBrace);
set.insert(TokenType::Comma);
set.insert(TokenType::Semicolon);
set.insert(TokenType::Star);
set.insert(TokenType::Eq);
set.insert(TokenType::Neq);
set.insert(TokenType::Lt);
set.insert(TokenType::Lte);
set.insert(TokenType::Gt);
set.insert(TokenType::Gte);
set
});
// Note: Function name normalization is handled directly in parse_typed_function
// by matching all aliases to the same typed expression, following Python SQLGlot's pattern.
// The generator then outputs dialect-specific names via TRANSFORMS.
/// Recursive-descent SQL parser that converts a token stream into an AST.
///
/// The parser consumes a `Vec<Token>` produced by the [`Tokenizer`](crate::tokens::Tokenizer)
/// and builds a tree of [`Expression`] nodes. It supports the full SQL grammar
/// including SELECT, DML (INSERT/UPDATE/DELETE/MERGE), DDL (CREATE/ALTER/DROP),
/// window functions, CTEs, set operations, and 30+ dialect-specific extensions.
///
/// # Quick start
///
/// For most use cases the static helper [`Parser::parse_sql`] is the simplest entry point:
///
/// ```rust,ignore
/// use polyglot_sql::parser::Parser;
///
/// let statements = Parser::parse_sql("SELECT 1; SELECT 2")?;
/// assert_eq!(statements.len(), 2);
/// ```
///
/// For dialect-aware parsing, use [`Parser::with_config`] or
/// [`Parser::parse_sql_with_config`].
pub struct Parser {
tokens: Vec<Token>,
current: usize,
config: ParserConfig,
/// Original source SQL (used for preserving exact text in Command expressions)
source: Option<String>,
/// Comments captured by parse_comparison when no comparison operator follows.
/// These are leading comments from the first token of an expression that need
/// to be placed by the caller (e.g., after an alias, or after an AND operand).
pending_leading_comments: Vec<String>,
}
/// Configuration for the SQL [`Parser`].
///
/// Controls dialect-specific parsing behavior. Most users can rely on the
/// `Default` implementation; set `dialect` when you need to handle syntax
/// that is unique to a particular database engine (e.g. BigQuery backtick
/// quoting, TSQL square-bracket identifiers, Snowflake QUALIFY clause).
#[derive(Debug, Clone, Default)]
pub struct ParserConfig {
/// Allow trailing commas in SELECT lists (e.g. BigQuery permits `SELECT a, b, FROM t`).
pub allow_trailing_commas: bool,
/// Dialect type for dialect-specific parsing behavior.
pub dialect: Option<crate::dialects::DialectType>,
}
struct SelectBodyHead {
leading_comments: Vec<String>,
post_select_comments: Vec<String>,
hint: Option<Hint>,
top: Option<Top>,
distinct: bool,
distinct_on: Option<Vec<Expression>>,
kind: Option<String>,
operation_modifiers: Vec<String>,
expressions: Vec<Expression>,
exclude: Option<Vec<Expression>>,
into: Option<SelectInto>,
}
impl Parser {
/// Create a new parser from a pre-tokenized token stream with default configuration.
///
/// Prefer [`Parser::parse_sql`] if you are starting from a raw SQL string.
pub fn new(tokens: Vec<Token>) -> Self {
Self {
tokens,
current: 0,
config: ParserConfig::default(),
source: None,
pending_leading_comments: Vec::new(),
}
}
/// Create a parser from a pre-tokenized token stream with a custom [`ParserConfig`].
pub fn with_config(tokens: Vec<Token>, config: ParserConfig) -> Self {
Self {
tokens,
current: 0,
config,
source: None,
pending_leading_comments: Vec::new(),
}
}
/// Create a parser with source SQL attached.
///
/// The original SQL text is stored so that `Command` expressions (unparsed
/// dialect-specific statements) can preserve the exact source verbatim.
pub fn with_source(tokens: Vec<Token>, config: ParserConfig, source: String) -> Self {
Self {
tokens,
current: 0,
config,
source: Some(source),
pending_leading_comments: Vec::new(),
}
}
/// Parse one or more SQL statements from a raw string.
///
/// This is the main entry point for most callers. It tokenizes the input with
/// the default [`TokenizerConfig`], then parses all semicolon-separated
/// statements and returns them as a `Vec<Expression>`.
///
/// # Errors
///
/// Returns an error if the input contains invalid tokens or syntax that the
/// parser cannot recognize.
///
/// # Example
///
/// ```rust,ignore
/// let stmts = Parser::parse_sql("SELECT a FROM t WHERE x = 1")?;
/// ```
pub fn parse_sql(sql: &str) -> Result<Vec<Expression>> {
let tokenizer = Tokenizer::default();
let tokens = tokenizer.tokenize(sql)?;
let mut parser = Parser::with_source(tokens, ParserConfig::default(), sql.to_string());
parser.parse()
}
/// Parse SQL from a string using a custom [`TokenizerConfig`].
///
/// Use this variant when the source dialect requires non-default tokenizer
/// settings (e.g. different string quoting or comment syntax).
pub fn parse_sql_with_config(
sql: &str,
tokenizer_config: TokenizerConfig,
) -> Result<Vec<Expression>> {
let tokenizer = Tokenizer::new(tokenizer_config);
let tokens = tokenizer.tokenize(sql)?;
let mut parser = Parser::with_source(tokens, ParserConfig::default(), sql.to_string());
parser.parse()
}
/// Parse all remaining statements from the token stream.
///
/// Consumes tokens until the end of input, splitting on semicolons.
/// Returns one `Expression` per statement.
pub fn parse(&mut self) -> Result<Vec<Expression>> {
let mut statements = Vec::new();
while !self.is_at_end() {
let mut stmt = self.parse_statement()?;
// Before consuming the semicolon, capture its leading comments
// and attach them to the statement (e.g., SELECT foo\n/* comment */\n;)
if self.check(TokenType::Semicolon) {
let semi_comments = self.current_leading_comments().to_vec();
if !semi_comments.is_empty() {
stmt = Expression::Annotated(Box::new(Annotated {
this: stmt,
trailing_comments: semi_comments,
}));
}
}
// ClickHouse: consume trailing SETTINGS key=val, ... after any statement
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::Settings)
{
self.skip(); // consume SETTINGS
let _ = self.parse_settings_property()?;
}
// ClickHouse: consume trailing FORMAT <name> after any statement
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::Format)
{
self.skip(); // consume FORMAT
// Accept any identifier/keyword/Null as format name
if self.check(TokenType::Null) {
self.skip();
} else if self.is_identifier_token() || self.check_keyword() {
self.skip();
}
}
// ClickHouse: PARALLEL WITH between statements (multi-statement execution)
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check_identifier("PARALLEL")
&& self.check_next(TokenType::With)
{
self.skip(); // consume PARALLEL
self.skip(); // consume WITH
statements.push(stmt);
continue;
}
// After parsing a statement, the next token must be a semicolon or EOF.
// If not, there are unconsumed tokens which indicates a parse error.
// This matches Python sqlglot's behavior (parser.py line 1826-1827).
if !self.is_at_end() && !self.check(TokenType::Semicolon) {
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
// ClickHouse fallback: consume unconsumed tokens until semicolon/EOF.
// This matches Python sqlglot's _parse_as_command behavior for
// ClickHouse-specific syntax that we don't fully parse yet.
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
self.skip();
}
} else {
return Err(self.parse_error("Invalid expression / Unexpected token"));
}
}
// Consume optional semicolons (ClickHouse allows multiple like `;;`)
while self.match_token(TokenType::Semicolon) {}
statements.push(stmt);
}
Ok(statements)
}
/// Parse a single SQL statement from the current position in the token stream.
///
/// Dispatches to the appropriate sub-parser based on the leading keyword
/// (SELECT, INSERT, CREATE, etc.). Unknown or dialect-specific statements
/// fall through to a `Command` expression that preserves the raw SQL text.
pub fn parse_statement(&mut self) -> Result<Expression> {
#[cfg(feature = "stacker")]
{
let red_zone = if cfg!(debug_assertions) {
4 * 1024 * 1024
} else {
1024 * 1024
};
stacker::maybe_grow(red_zone, 8 * 1024 * 1024, || self.parse_statement_inner())
}
#[cfg(not(feature = "stacker"))]
{
self.parse_statement_inner()
}
}
fn parse_statement_inner(&mut self) -> Result<Expression> {
// Skip any leading semicolons
while self.match_token(TokenType::Semicolon) {}
if self.is_at_end() {
return Err(self.parse_error("Unexpected end of input"));
}
match self.peek().token_type {
TokenType::Select => self.parse_select(),
TokenType::With => self.parse_with(),
TokenType::From => self.parse_from_first_query(),
TokenType::Hint => self.parse_statement_with_leading_hint(),
TokenType::LParen => self.parse_parenthesized_statement_or_expression(),
_ => self.parse_statement_slow(),
}
}
fn parse_statement_slow(&mut self) -> Result<Expression> {
// Skip any leading semicolons
while self.match_token(TokenType::Semicolon) {}
if self.is_at_end() {
return Err(self.parse_error("Unexpected end of input"));
}
match self.peek().token_type {
// Handle hint comment /*+ ... */ before a statement - convert to regular comment
TokenType::Hint => {
let hint_token = self.advance();
let hint_text = hint_token.text.clone();
// Convert hint to regular comment (preserve the + as part of the content)
let comment = format!("/* + {} */", hint_text.trim());
// Parse the following statement
let mut stmt = self.parse_statement()?;
// Attach the comment to the statement's leading_comments
match &mut stmt {
Expression::Select(select) => {
select.leading_comments.insert(0, comment);
}
Expression::Insert(insert) => {
insert.leading_comments.insert(0, comment);
}
Expression::Update(update) => {
update.leading_comments.insert(0, comment);
}
Expression::Delete(delete) => {
delete.leading_comments.insert(0, comment);
}
Expression::CreateTable(ct) => {
ct.leading_comments.insert(0, comment);
}
_ => {
// For other statement types, we can't attach comments
// but at least the statement parses successfully
}
}
Ok(stmt)
}
TokenType::Select => self.parse_select(),
TokenType::With => self.parse_with(),
TokenType::Insert => self.parse_insert(),
TokenType::Replace => self.parse_replace(),
TokenType::Update => self.parse_update(),
TokenType::Delete => self.parse_delete(),
TokenType::Create => self.parse_create(),
TokenType::Drop => self.parse_drop(),
TokenType::Alter => self.parse_alter(),
TokenType::Truncate => {
// TRUNCATE could be TRUNCATE TABLE (statement) or TRUNCATE(a, b) (function)
// Check if followed by ( to determine which
if self.check_next(TokenType::LParen) {
// TRUNCATE(a, b) - function call
self.parse_expression()
} else {
self.parse_truncate()
}
}
TokenType::Values => {
// VALUES could be VALUES(...) statement or VALUES 1, 2, 3 (bare values)
if self.check_next(TokenType::LParen)
|| self.check_next(TokenType::Number)
|| self.check_next(TokenType::String)
{
self.parse_values()
} else {
// "values" by itself is an identifier/expression
self.parse_expression()
}
}
TokenType::Use => self.parse_use(),
TokenType::Cache => self.parse_cache(),
TokenType::Uncache => self.parse_uncache(),
TokenType::Refresh => {
self.skip(); // consume REFRESH
self.parse_refresh()?
.ok_or_else(|| self.parse_error("Failed to parse REFRESH statement"))
}
TokenType::Load => self.parse_load_data(),
TokenType::Grant => self.parse_grant(),
TokenType::Revoke => self.parse_revoke(),
TokenType::Comment => self.parse_comment(),
TokenType::Merge => {
self.skip(); // consume MERGE
self.parse_merge()?
.ok_or_else(|| self.parse_error("Failed to parse MERGE statement"))
}
TokenType::Set => self.parse_set(),
TokenType::Database
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Teradata)
) =>
{
// Teradata: DATABASE tduser -> USE tduser
self.skip(); // consume DATABASE
let name = self.expect_identifier_or_keyword()?;
Ok(Expression::Use(Box::new(Use {
kind: None,
this: Identifier::new(name),
})))
}
TokenType::Lock
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Teradata)
) =>
{
self.parse_locking_statement()
}
TokenType::Command => {
self.skip(); // consume command keyword
self.parse_command()?
.ok_or_else(|| self.parse_error("Failed to parse COMMAND statement"))
}
TokenType::Rename
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Teradata)
| Some(crate::dialects::DialectType::ClickHouse)
) =>
{
self.skip(); // consume RENAME
self.parse_command()?
.ok_or_else(|| self.parse_error("Failed to parse RENAME statement"))
}
TokenType::Pragma => self.parse_pragma(),
TokenType::Rollback => self.parse_rollback(),
TokenType::Commit => self.parse_commit(),
TokenType::Begin => self.parse_transaction(),
TokenType::End => {
// In PostgreSQL, END is an alias for COMMIT (END [WORK|TRANSACTION])
// In TSQL and other dialects, END is a block delimiter (BEGIN...END)
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::PostgreSQL)
) {
self.parse_end_transaction()
} else {
self.skip(); // consume END
Ok(Expression::Command(Box::new(Command {
this: "END".to_string(),
})))
}
}
TokenType::Start => self.parse_start_transaction(),
TokenType::Describe | TokenType::Desc => self.parse_describe(),
TokenType::Show => self.parse_show(),
TokenType::Copy => self.parse_copy(),
TokenType::Put => self.parse_put(),
TokenType::Kill
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) =>
{
self.skip(); // consume KILL
self.parse_command()?
.ok_or_else(|| self.parse_error("Failed to parse KILL statement"))
}
TokenType::Kill => self.parse_kill(),
TokenType::Execute => {
// ClickHouse: EXECUTE AS username statement → parse as command
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
self.skip(); // consume EXECUTE
self.parse_command()?
.ok_or_else(|| self.parse_error("Failed to parse EXECUTE statement"))
} else if self
.peek_nth(1)
.map(|t| t.text.eq_ignore_ascii_case("IMMEDIATE"))
== Some(true)
{
// EXECUTE IMMEDIATE — Snowflake/BigQuery dynamic SQL, treat as raw command
self.skip(); // consume EXECUTE
self.parse_command()?.ok_or_else(|| {
self.parse_error("Failed to parse EXECUTE IMMEDIATE statement")
})
} else {
self.parse_execute()
}
}
TokenType::Declare => {
self.skip(); // consume DECLARE
self.parse_declare()?
.ok_or_else(|| self.parse_error("Failed to parse DECLARE statement"))
}
// GET is a command only when followed by @ (stage reference), otherwise it's a function
// If followed by ( it should be parsed as GET() function, so fall through to expression parsing
TokenType::Get
if self.check_next(TokenType::DAt) || !self.check_next(TokenType::LParen) =>
{
self.parse_get_command()
}
TokenType::Var
if self.peek().text.eq_ignore_ascii_case("RM")
|| self.peek().text.eq_ignore_ascii_case("REMOVE") =>
{
self.parse_rm_command()
}
TokenType::Var if self.peek().text.eq_ignore_ascii_case("CALL") => self.parse_call(),
TokenType::Var
if self.peek().text.eq_ignore_ascii_case("EXCHANGE")
&& matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) =>
{
self.skip(); // consume EXCHANGE
self.parse_command()?
.ok_or_else(|| self.parse_error("Failed to parse EXCHANGE statement"))
}
// EXPLAIN is treated as DESCRIBE (MySQL maps EXPLAIN -> DESCRIBE)
TokenType::Var if self.peek().text.eq_ignore_ascii_case("EXPLAIN") => {
self.parse_describe()
}
// LOCK TABLES / UNLOCK TABLES (MySQL)
TokenType::Var
if self.peek().text.eq_ignore_ascii_case("LOCK")
|| self.peek().text.eq_ignore_ascii_case("UNLOCK") =>
{
self.skip(); // consume LOCK/UNLOCK
self.parse_command()?
.ok_or_else(|| self.parse_error("Failed to parse LOCK/UNLOCK statement"))
}
TokenType::Var if self.peek().text.eq_ignore_ascii_case("ANALYZE") => {
self.skip(); // consume ANALYZE
self.parse_analyze()?
.ok_or_else(|| self.parse_error("Failed to parse ANALYZE statement"))
}
// TSQL: PRINT expression
TokenType::Var if self.peek().text.eq_ignore_ascii_case("PRINT") => {
self.skip(); // consume PRINT
self.parse_command()?
.ok_or_else(|| self.parse_error("Failed to parse PRINT statement"))
}
// TSQL: WAITFOR DELAY '00:00:05' / WAITFOR TIME '23:00:00'
TokenType::Var if self.peek().text.eq_ignore_ascii_case("WAITFOR") => {
self.skip(); // consume WAITFOR
self.parse_command()?
.ok_or_else(|| self.parse_error("Failed to parse WAITFOR statement"))
}
// TSQL: BULK INSERT table FROM 'file' WITH (options)
TokenType::Var if self.peek().text.eq_ignore_ascii_case("BULK") => {
self.skip(); // consume BULK
self.parse_command()?
.ok_or_else(|| self.parse_error("Failed to parse BULK INSERT statement"))
}
// ClickHouse: CHECK TABLE t [PARTITION p] [SETTINGS ...]
TokenType::Check
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) =>
{
self.skip(); // consume CHECK
self.parse_command()?
.ok_or_else(|| self.parse_error("Failed to parse CHECK statement"))
}
// ClickHouse: SETTINGS key=value, ... (standalone statement or after another statement)
TokenType::Settings
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) =>
{
self.skip(); // consume SETTINGS
self.parse_command()?
.ok_or_else(|| self.parse_error("Failed to parse SETTINGS statement"))
}
// ClickHouse: SYSTEM STOP/START MERGES, etc.
TokenType::System
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) =>
{
self.skip(); // consume SYSTEM
self.parse_command()?
.ok_or_else(|| self.parse_error("Failed to parse SYSTEM statement"))
}
// ClickHouse: RENAME TABLE db.t1 TO db.t2 [, db.t3 TO db.t4 ...]
TokenType::Var
if self.peek().text.eq_ignore_ascii_case("RENAME")
&& matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) =>
{
self.skip(); // consume RENAME
self.parse_command()?
.ok_or_else(|| self.parse_error("Failed to parse RENAME statement"))
}
// ClickHouse: OPTIMIZE TABLE t [FINAL] [DEDUPLICATE [BY ...]]
// MySQL: OPTIMIZE [LOCAL|NO_WRITE_TO_BINLOG] TABLE t1 [, t2, ...]
// Databricks/Spark: OPTIMIZE t [WHERE ...] [ZORDER BY (...)]
TokenType::Var
if self.peek().text.eq_ignore_ascii_case("OPTIMIZE")
&& matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
| Some(crate::dialects::DialectType::MySQL)
| Some(crate::dialects::DialectType::SingleStore)
| Some(crate::dialects::DialectType::Doris)
| Some(crate::dialects::DialectType::StarRocks)
| Some(crate::dialects::DialectType::Databricks)
| Some(crate::dialects::DialectType::Spark)
| None
) =>
{
self.skip(); // consume OPTIMIZE
self.parse_command()?
.ok_or_else(|| self.parse_error("Failed to parse OPTIMIZE statement"))
}
// ClickHouse: EXISTS [TEMPORARY] TABLE/DATABASE/DICTIONARY ...
TokenType::Exists
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && !self.check_next(TokenType::LParen) =>
{
self.skip(); // consume EXISTS
self.parse_command()?
.ok_or_else(|| self.parse_error("Failed to parse EXISTS statement"))
}
// ClickHouse: SHOW ... (various SHOW commands beyond what's already handled)
TokenType::Var
if self.peek().text.eq_ignore_ascii_case("EXISTS")
&& matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) =>
{
self.skip(); // consume EXISTS
self.parse_command()?
.ok_or_else(|| self.parse_error("Failed to parse EXISTS statement"))
}
// DuckDB: ATTACH [DATABASE] [IF NOT EXISTS] 'path' [AS alias] [(options)]
TokenType::Var if self.peek().text.eq_ignore_ascii_case("ATTACH") => {
self.skip(); // consume ATTACH
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
self.parse_command()?
.ok_or_else(|| self.parse_error("Failed to parse ATTACH statement"))
} else {
self.parse_attach_detach(true)
}
}
// UNDROP TABLE/SCHEMA/DATABASE (ClickHouse, Snowflake)
TokenType::Var
if self.peek().text.eq_ignore_ascii_case("UNDROP")
&& matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
| Some(crate::dialects::DialectType::Snowflake)
) =>
{
self.skip(); // consume UNDROP
let kind = if self.match_token(TokenType::Table) {
"TABLE"
} else if self.match_token(TokenType::Schema) {
"SCHEMA"
} else if self.match_token(TokenType::Database) {
"DATABASE"
} else {
return Err(
self.parse_error("Expected TABLE, SCHEMA, or DATABASE after UNDROP")
);
};
let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
let name = self.parse_table_ref()?;
Ok(Expression::Undrop(Box::new(crate::expressions::Undrop {
kind: kind.to_string(),
name,
if_exists,
})))
}
// ClickHouse: DETACH TABLE [IF EXISTS] ... [ON CLUSTER ...]
TokenType::Var
if self.peek().text.eq_ignore_ascii_case("DETACH")
&& matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) =>
{
self.skip(); // consume DETACH
self.parse_command()?
.ok_or_else(|| self.parse_error("Failed to parse DETACH statement"))
}
// DuckDB: DETACH [DATABASE] [IF EXISTS] name
TokenType::Var if self.peek().text.eq_ignore_ascii_case("DETACH") => {
self.skip(); // consume DETACH
self.parse_attach_detach(false)
}
// Databricks/Spark: RESTORE TABLE t TO VERSION/TIMESTAMP AS OF x
TokenType::Var if self.peek().text.eq_ignore_ascii_case("RESTORE") => {
self.skip(); // consume RESTORE
self.parse_as_command()?
.ok_or_else(|| self.parse_error("Failed to parse RESTORE statement"))
}
// Databricks/Spark: VACUUM t [RETAIN n HOURS] [DRY RUN]
TokenType::Var if self.peek().text.eq_ignore_ascii_case("VACUUM") => {
self.skip(); // consume VACUUM
self.parse_as_command()?
.ok_or_else(|| self.parse_error("Failed to parse VACUUM statement"))
}
// Snowflake: LIST @stage / LS @stage
TokenType::Var
if self.peek().text.eq_ignore_ascii_case("LIST")
|| self.peek().text.eq_ignore_ascii_case("LS") =>
{
self.skip(); // consume LIST/LS
self.parse_as_command()?
.ok_or_else(|| self.parse_error("Failed to parse LIST/LS statement"))
}
// DuckDB: INSTALL extension [FROM source]
TokenType::Var if self.peek().text.eq_ignore_ascii_case("INSTALL") => {
self.skip(); // consume INSTALL
self.parse_install(false)
}
// DuckDB: FORCE INSTALL extension | FORCE CHECKPOINT db
TokenType::Var if self.peek().text.eq_ignore_ascii_case("FORCE") => {
self.skip(); // consume FORCE
self.parse_force_statement()
}
// DuckDB: SUMMARIZE [TABLE] expression
TokenType::Var if self.peek().text.eq_ignore_ascii_case("SUMMARIZE") => {
self.skip(); // consume SUMMARIZE
self.parse_summarize_statement()
}
// DuckDB: RESET [SESSION|GLOBAL|LOCAL] variable
TokenType::Var if self.peek().text.eq_ignore_ascii_case("RESET") => {
self.skip(); // consume RESET
self.parse_as_command()?
.ok_or_else(|| self.parse_error("Failed to parse RESET statement"))
}
// DuckDB statement-level PIVOT/UNPIVOT/PIVOT_WIDER syntax
TokenType::Pivot => {
self.skip(); // consume PIVOT
self.parse_simplified_pivot(false)?
.ok_or_else(|| self.parse_error("Failed to parse PIVOT statement"))
}
TokenType::Unpivot => {
self.skip(); // consume UNPIVOT
self.parse_simplified_pivot(true)?
.ok_or_else(|| self.parse_error("Failed to parse UNPIVOT statement"))
}
// DuckDB: PIVOT_WIDER is an alias for PIVOT
TokenType::Var if self.peek().text.eq_ignore_ascii_case("PIVOT_WIDER") => {
self.skip(); // consume PIVOT_WIDER
self.parse_simplified_pivot(false)?
.ok_or_else(|| self.parse_error("Failed to parse PIVOT_WIDER statement"))
}
// BigQuery procedural FOR...IN...DO loop
TokenType::For => {
self.skip(); // consume FOR
self.parse_for_in()
}
// BigQuery/procedural LOOP, REPEAT, WHILE control flow statements
TokenType::Var if self.peek().text.eq_ignore_ascii_case("LOOP") => {
self.skip(); // consume LOOP
self.parse_command()?
.ok_or_else(|| self.parse_error("Failed to parse LOOP statement"))
}
TokenType::Var if self.peek().text.eq_ignore_ascii_case("REPEAT") => {
self.skip(); // consume REPEAT
self.parse_command()?
.ok_or_else(|| self.parse_error("Failed to parse REPEAT statement"))
}
TokenType::Var if self.peek().text.eq_ignore_ascii_case("WHILE") => {
self.skip(); // consume WHILE
self.parse_command()?
.ok_or_else(|| self.parse_error("Failed to parse WHILE statement"))
}
// Athena/Presto: UNLOAD (SELECT ...) TO 'location' WITH (options)
TokenType::Var if self.peek().text.eq_ignore_ascii_case("UNLOAD") => {
self.parse_unload()
}
// Athena: USING EXTERNAL FUNCTION ... SELECT ...
TokenType::Using => self.parse_using_external_function(),
// BigQuery: EXPORT DATA [WITH CONNECTION conn] OPTIONS (...) AS SELECT ...
TokenType::Var if self.peek().text.eq_ignore_ascii_case("EXPORT") => {
self.parse_export_data()
}
// Presto/Trino: DEALLOCATE PREPARE <name>
TokenType::Var if self.peek().text.eq_ignore_ascii_case("DEALLOCATE") => {
self.parse_deallocate_prepare()
}
// DuckDB FROM-first syntax: FROM tbl = SELECT * FROM tbl
TokenType::From => self.parse_from_first_query(),
TokenType::LParen => {
// Check if this is a parenthesized query (SELECT, WITH, PIVOT, UNPIVOT, FROM, or EXPLAIN inside)
// by looking ahead after the opening paren
let next_is_explain = self.current + 1 < self.tokens.len()
&& self.tokens[self.current + 1].token_type == TokenType::Var
&& self.tokens[self.current + 1]
.text
.eq_ignore_ascii_case("EXPLAIN");
if self.check_next(TokenType::Select)
|| self.check_next(TokenType::With)
|| self.check_next(TokenType::Pivot)
|| self.check_next(TokenType::Unpivot)
|| self.check_next(TokenType::From)
|| next_is_explain
{
// Parse parenthesized query: (SELECT ...) ORDER BY x LIMIT y OFFSET z
self.skip(); // consume (
let inner = self.parse_statement()?;
self.expect(TokenType::RParen)?;
// Wrap in Subquery to preserve parentheses when used in set operations
let subquery = Expression::Subquery(Box::new(Subquery {
this: inner,
alias: None,
column_aliases: Vec::new(),
order_by: None,
limit: None,
offset: None,
distribute_by: None,
sort_by: None,
cluster_by: None,
lateral: false,
modifiers_inside: false,
trailing_comments: Vec::new(),
inferred_type: None,
}));
// Check for set operations after the parenthesized query
let result = self.parse_set_operation(subquery)?;
// Check for ORDER BY, LIMIT, OFFSET after parenthesized subquery
self.parse_query_modifiers(result)
} else if self.check_next(TokenType::LParen) {
// Nested parentheses - could be ((SELECT...)) or ((a, b))
// For deeply nested queries like (((SELECT 1) UNION SELECT 1) UNION SELECT 1),
// recurse into parse_statement to handle the inner parenthesized query with set ops
self.skip(); // consume (
let inner = self.parse_statement()?;
// Check for set operations inside the outer parens
let result = self.parse_set_operation(inner)?;
self.expect(TokenType::RParen)?;
let subquery = Expression::Subquery(Box::new(Subquery {
this: result,
alias: None,
column_aliases: Vec::new(),
order_by: None,
limit: None,
offset: None,
distribute_by: None,
sort_by: None,
cluster_by: None,
lateral: false,
modifiers_inside: false,
trailing_comments: Vec::new(),
inferred_type: None,
}));
// Check for set operations after the outer parenthesized query
let result = self.parse_set_operation(subquery)?;
let pre_alias_comments = self.previous_trailing_comments().to_vec();
if self.match_token(TokenType::As) {
let alias = self.expect_identifier_or_keyword_with_quoted()?;
let trailing_comments = self.previous_trailing_comments().to_vec();
Ok(Expression::Alias(Box::new(Alias {
this: result,
alias,
column_aliases: Vec::new(),
pre_alias_comments,
trailing_comments,
inferred_type: None,
})))
} else {
// Check for LIMIT/OFFSET after parenthesized expression
// e.g., ((SELECT 1)) LIMIT 1
self.parse_query_modifiers(result)
}
} else {
// Regular parenthesized expression like (a, b) or (x)
// Let parse_expression handle it
let expr = self.parse_expression()?;
let pre_alias_comments = self.previous_trailing_comments().to_vec();
if self.match_token(TokenType::As) {
// Check for tuple alias: AS ("a", "b", ...)
if self.match_token(TokenType::LParen) {
let mut column_aliases = Vec::new();
loop {
let col_alias = self.expect_identifier_or_keyword_with_quoted()?;
column_aliases.push(col_alias);
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
let trailing_comments = self.previous_trailing_comments().to_vec();
Ok(Expression::Alias(Box::new(Alias {
this: expr,
alias: Identifier::empty(),
column_aliases,
pre_alias_comments,
trailing_comments,
inferred_type: None,
})))
} else {
let alias = self.expect_identifier_or_keyword_with_quoted()?;
let trailing_comments = self.previous_trailing_comments().to_vec();
Ok(Expression::Alias(Box::new(Alias {
this: expr,
alias,
column_aliases: Vec::new(),
pre_alias_comments,
trailing_comments,
inferred_type: None,
})))
}
} else {
Ok(expr)
}
}
}
_ => {
// Capture leading comments from the first token before parsing
let leading_comments = self.current_leading_comments().to_vec();
// Parse expression and check for optional alias
let expr = self.parse_expression()?;
// Capture any comments between expression and AS keyword
let pre_alias_comments = self.previous_trailing_comments().to_vec();
if self.match_token(TokenType::As) {
// Capture comments from AS token (e.g., AS /* foo */ (a, b, c))
// These go into trailing_comments (after the alias), not pre_alias_comments
let as_comments = self.previous_trailing_comments().to_vec();
// Check for tuple alias: AS ("a", "b", ...)
if self.match_token(TokenType::LParen) {
let mut column_aliases = Vec::new();
loop {
let col_alias = self.expect_identifier_or_keyword_with_quoted()?;
column_aliases.push(col_alias);
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
let mut trailing_comments = as_comments;
trailing_comments.extend_from_slice(self.previous_trailing_comments());
Ok(Expression::Alias(Box::new(Alias {
this: expr,
alias: Identifier::empty(),
column_aliases,
pre_alias_comments,
trailing_comments,
inferred_type: None,
})))
} else {
let alias = self.expect_identifier_or_keyword_with_quoted()?;
let mut trailing_comments = self.previous_trailing_comments().to_vec();
// If there were leading comments on the expression (from a separate line),
// add them as trailing comments after the alias
trailing_comments.extend(leading_comments.iter().cloned());
Ok(Expression::Alias(Box::new(Alias {
this: expr,
alias,
column_aliases: Vec::new(),
pre_alias_comments,
trailing_comments,
inferred_type: None,
})))
}
} else if (self.check(TokenType::Var) && !self.check_keyword())
|| self.is_command_keyword_as_alias()
{
// Implicit alias (without AS) - e.g., "1. x" or "1.x" -> "1. AS x"
// This handles cases like PostgreSQL's "1.x" which parses as float 1. with alias x
let alias_text = self.advance().text.clone();
let trailing_comments = self.previous_trailing_comments().to_vec();
Ok(Expression::Alias(Box::new(Alias {
this: expr,
alias: Identifier::new(alias_text),
column_aliases: Vec::new(),
pre_alias_comments,
trailing_comments,
inferred_type: None,
})))
} else if !pre_alias_comments.is_empty() {
// Wrap in Annotated to preserve trailing comments for expressions without aliases
match &expr {
Expression::Literal(_) | Expression::Boolean(_) | Expression::Null(_) => {
Ok(Expression::Annotated(Box::new(
crate::expressions::Annotated {
this: expr,
trailing_comments: pre_alias_comments,
},
)))
}
// For expressions that already have trailing_comments fields, don't double-wrap
_ => Ok(expr),
}
} else if !leading_comments.is_empty() {
// Wrap in Annotated to preserve leading comments as trailing comments
// This matches Python sqlglot which converts leading line comments to trailing block comments
Ok(Expression::Annotated(Box::new(
crate::expressions::Annotated {
this: expr,
trailing_comments: leading_comments,
},
)))
} else {
Ok(expr)
}
}
}
}
fn parse_statement_with_leading_hint(&mut self) -> Result<Expression> {
let hint_token = self.advance();
let hint_text = hint_token.text.clone();
let comment = format!("/* + {} */", hint_text.trim());
let mut stmt = self.parse_statement()?;
match &mut stmt {
Expression::Select(select) => {
select.leading_comments.insert(0, comment);
}
Expression::Insert(insert) => {
insert.leading_comments.insert(0, comment);
}
Expression::Update(update) => {
update.leading_comments.insert(0, comment);
}
Expression::Delete(delete) => {
delete.leading_comments.insert(0, comment);
}
Expression::CreateTable(ct) => {
ct.leading_comments.insert(0, comment);
}
_ => {}
}
Ok(stmt)
}
fn parse_parenthesized_statement_or_expression(&mut self) -> Result<Expression> {
let next_is_explain = self.current + 1 < self.tokens.len()
&& self.tokens[self.current + 1].token_type == TokenType::Var
&& self.tokens[self.current + 1]
.text
.eq_ignore_ascii_case("EXPLAIN");
if self.check_next(TokenType::Select)
|| self.check_next(TokenType::With)
|| self.check_next(TokenType::Pivot)
|| self.check_next(TokenType::Unpivot)
|| self.check_next(TokenType::From)
|| next_is_explain
{
self.skip(); // consume (
let inner = self.parse_statement()?;
self.expect(TokenType::RParen)?;
let subquery = Expression::Subquery(Box::new(Subquery {
this: inner,
alias: None,
column_aliases: Vec::new(),
order_by: None,
limit: None,
offset: None,
distribute_by: None,
sort_by: None,
cluster_by: None,
lateral: false,
modifiers_inside: false,
trailing_comments: Vec::new(),
inferred_type: None,
}));
let result = self.parse_set_operation(subquery)?;
self.parse_query_modifiers(result)
} else if self.check_next(TokenType::LParen) {
self.skip(); // consume (
let inner = self.parse_statement()?;
let result = self.parse_set_operation(inner)?;
self.expect(TokenType::RParen)?;
let subquery = Expression::Subquery(Box::new(Subquery {
this: result,
alias: None,
column_aliases: Vec::new(),
order_by: None,
limit: None,
offset: None,
distribute_by: None,
sort_by: None,
cluster_by: None,
lateral: false,
modifiers_inside: false,
trailing_comments: Vec::new(),
inferred_type: None,
}));
let result = self.parse_set_operation(subquery)?;
let pre_alias_comments = self.previous_trailing_comments().to_vec();
if self.match_token(TokenType::As) {
let alias = self.expect_identifier_or_keyword_with_quoted()?;
let trailing_comments = self.previous_trailing_comments().to_vec();
Ok(Expression::Alias(Box::new(Alias {
this: result,
alias,
column_aliases: Vec::new(),
pre_alias_comments,
trailing_comments,
inferred_type: None,
})))
} else {
self.parse_query_modifiers(result)
}
} else {
let expr = self.parse_expression()?;
let pre_alias_comments = self.previous_trailing_comments().to_vec();
if self.match_token(TokenType::As) {
if self.match_token(TokenType::LParen) {
let mut column_aliases = Vec::new();
loop {
let col_alias = self.expect_identifier_or_keyword_with_quoted()?;
column_aliases.push(col_alias);
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
let trailing_comments = self.previous_trailing_comments().to_vec();
Ok(Expression::Alias(Box::new(Alias {
this: expr,
alias: Identifier::empty(),
column_aliases,
pre_alias_comments,
trailing_comments,
inferred_type: None,
})))
} else {
let alias = self.expect_identifier_or_keyword_with_quoted()?;
let trailing_comments = self.previous_trailing_comments().to_vec();
Ok(Expression::Alias(Box::new(Alias {
this: expr,
alias,
column_aliases: Vec::new(),
pre_alias_comments,
trailing_comments,
inferred_type: None,
})))
}
} else {
Ok(expr)
}
}
}
/// Parse a SELECT statement
fn parse_select(&mut self) -> Result<Expression> {
let result = self.parse_select_body()?;
// Check for set operations (UNION, INTERSECT, EXCEPT)
self.parse_set_operation(result)
}
#[inline(never)]
fn parse_select_body_head(&mut self) -> Result<Box<SelectBodyHead>> {
// Capture the SELECT token to get its comments
let select_token = self.expect(TokenType::Select)?;
let leading_comments = select_token.comments;
let post_select_comments = select_token.trailing_comments;
// Parse query hint /*+ ... */ if present (comes immediately after SELECT)
let hint = if self.check(TokenType::Hint) {
Some(self.parse_hint()?)
} else {
None
};
// Parse TOP clause (SQL Server style - comes before DISTINCT)
// But not if TOP is followed by DOT (e.g., SELECT top.x - top is a table alias)
let top = if self.check(TokenType::Top)
&& !self.check_next(TokenType::Dot)
&& self.match_token(TokenType::Top)
{
// TOP can have parentheses: TOP (10) or without: TOP 10
let (amount, parenthesized) = if self.match_token(TokenType::LParen) {
let expr = if self.check(TokenType::Select) || self.check(TokenType::With) {
let stmt = self.parse_statement()?;
Expression::Subquery(Box::new(Subquery {
this: stmt,
alias: None,
column_aliases: Vec::new(),
order_by: None,
limit: None,
offset: None,
distribute_by: None,
sort_by: None,
cluster_by: None,
lateral: false,
modifiers_inside: false,
trailing_comments: Vec::new(),
inferred_type: None,
}))
} else {
self.parse_expression()?
};
self.expect(TokenType::RParen)?;
(expr, true)
} else {
(self.parse_primary()?, false)
};
let percent = self.match_token(TokenType::Percent);
let with_ties = self.match_keywords(&[TokenType::With, TokenType::Ties]);
Some(Top {
this: amount,
percent,
with_ties,
parenthesized,
})
} else {
None
};
// Parse DISTINCT / DISTINCT ON / DISTINCTROW / ALL
// Oracle: UNIQUE is equivalent to DISTINCT (SELECT UNIQUE ... is old-style Oracle syntax)
let is_distinct_token = self.match_token(TokenType::Distinct)
|| (matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Oracle)
) && self.match_token(TokenType::Unique));
let (distinct, distinct_on) = if is_distinct_token {
if self.match_token(TokenType::On) {
self.expect(TokenType::LParen)?;
let exprs = self.parse_expression_list()?;
self.expect(TokenType::RParen)?;
(true, Some(exprs))
} else {
(true, None)
}
} else if self.check_identifier("DISTINCTROW") {
self.skip();
(true, None)
} else {
if self.check(TokenType::All) && !self.check_next(TokenType::Dot) {
self.skip();
}
(false, None)
};
// TSQL: SELECT DISTINCT TOP n - TOP can come after DISTINCT
let top = if top.is_none()
&& self.check(TokenType::Top)
&& !self.check_next(TokenType::Dot)
&& self.match_token(TokenType::Top)
{
let (amount, parenthesized) = if self.match_token(TokenType::LParen) {
let expr = if self.check(TokenType::Select) || self.check(TokenType::With) {
let stmt = self.parse_statement()?;
Expression::Subquery(Box::new(Subquery {
this: stmt,
alias: None,
column_aliases: Vec::new(),
order_by: None,
limit: None,
offset: None,
distribute_by: None,
sort_by: None,
cluster_by: None,
lateral: false,
modifiers_inside: false,
trailing_comments: Vec::new(),
inferred_type: None,
}))
} else {
self.parse_expression()?
};
self.expect(TokenType::RParen)?;
(expr, true)
} else {
(self.parse_primary()?, false)
};
let percent = self.match_token(TokenType::Percent);
let with_ties = self.match_keywords(&[TokenType::With, TokenType::Ties]);
Some(Top {
this: amount,
percent,
with_ties,
parenthesized,
})
} else {
top
};
let mut operation_modifiers = Vec::new();
let is_mysql_dialect = matches!(
self.config.dialect,
Some(crate::dialects::DialectType::MySQL)
| Some(crate::dialects::DialectType::SingleStore)
| Some(crate::dialects::DialectType::StarRocks)
| Some(crate::dialects::DialectType::TiDB)
| Some(crate::dialects::DialectType::Doris)
);
if is_mysql_dialect {
const MYSQL_MODIFIERS: &[&str] = &[
"HIGH_PRIORITY",
"STRAIGHT_JOIN",
"SQL_SMALL_RESULT",
"SQL_BIG_RESULT",
"SQL_BUFFER_RESULT",
"SQL_NO_CACHE",
"SQL_CALC_FOUND_ROWS",
];
loop {
if self.check(TokenType::StraightJoin) {
self.skip();
operation_modifiers.push("STRAIGHT_JOIN".to_string());
} else if self.check(TokenType::Var) {
let upper = self.peek().text.to_ascii_uppercase();
if MYSQL_MODIFIERS.contains(&upper.as_str()) {
self.skip();
operation_modifiers.push(upper);
} else {
break;
}
} else {
break;
}
}
}
let kind = if self.match_token(TokenType::As) {
if self.match_identifier("STRUCT") {
Some("STRUCT".to_string())
} else if self.match_identifier("VALUE") {
Some("VALUE".to_string())
} else {
self.current -= 1;
None
}
} else {
None
};
let mut expressions = self.parse_select_expressions()?;
let exclude = if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Redshift)
) {
let mut retreat_for_exclude = false;
if let Some(last_expr) = expressions.last() {
match last_expr {
Expression::Alias(alias)
if alias.alias.name.eq_ignore_ascii_case("EXCLUDE") =>
{
if self.check(TokenType::LParen)
|| self.is_identifier_token()
|| self.is_safe_keyword_as_identifier()
{
let stripped = alias.this.clone();
if let Some(last) = expressions.last_mut() {
*last = stripped;
}
retreat_for_exclude = true;
}
}
_ => {}
}
}
if retreat_for_exclude || self.check(TokenType::Exclude) {
if !retreat_for_exclude {
self.skip();
}
let mut exclude_cols = Vec::new();
if self.match_token(TokenType::LParen) {
loop {
let col_expr = self.parse_expression()?;
exclude_cols.push(col_expr);
if !self.match_token(TokenType::Comma) {
break;
}
}
self.match_token(TokenType::RParen);
} else {
loop {
if self.is_at_end()
|| self.check(TokenType::From)
|| self.check(TokenType::Where)
|| self.check(TokenType::Semicolon)
|| self.check(TokenType::RParen)
{
break;
}
let col_expr = self.parse_expression()?;
exclude_cols.push(col_expr);
if !self.match_token(TokenType::Comma) {
break;
}
}
}
if exclude_cols.is_empty() {
None
} else {
Some(exclude_cols)
}
} else {
None
}
} else {
None
};
let into = if self.match_text_seq(&["BULK", "COLLECT", "INTO"]) {
let mut target_expressions = vec![self.parse_expression()?];
while self.match_token(TokenType::Comma) {
target_expressions.push(self.parse_expression()?);
}
if target_expressions.len() == 1 {
Some(SelectInto {
this: target_expressions.remove(0),
temporary: false,
unlogged: false,
bulk_collect: true,
expressions: Vec::new(),
})
} else {
Some(SelectInto {
this: Expression::Null(Null),
temporary: false,
unlogged: false,
bulk_collect: true,
expressions: target_expressions,
})
}
} else if self.match_token(TokenType::Into) {
let temporary = self.match_token(TokenType::Temporary) || self.match_identifier("TEMP");
let unlogged = !temporary && self.match_identifier("UNLOGGED");
let table_name = self.parse_table_ref()?;
if self.match_token(TokenType::Comma) {
let mut target_expressions = vec![Expression::Table(Box::new(table_name))];
target_expressions.push(self.parse_expression()?);
while self.match_token(TokenType::Comma) {
target_expressions.push(self.parse_expression()?);
}
Some(SelectInto {
this: Expression::Null(Null),
temporary,
unlogged,
bulk_collect: false,
expressions: target_expressions,
})
} else {
Some(SelectInto {
this: Expression::Table(Box::new(table_name)),
temporary,
unlogged,
bulk_collect: false,
expressions: Vec::new(),
})
}
} else {
None
};
Ok(Box::new(SelectBodyHead {
leading_comments,
post_select_comments,
hint,
top,
distinct,
distinct_on,
kind,
operation_modifiers,
expressions,
exclude,
into,
}))
}
/// Parse a SELECT statement body without consuming trailing set operations.
/// Used by `parse_select_or_paren_select` to avoid mutual recursion with
/// `parse_set_operation`, which handles set-op chaining iteratively.
fn parse_select_body(&mut self) -> Result<Expression> {
let head = self.parse_select_body_head()?;
// Parse FROM clause
let from = if self.match_token(TokenType::From) {
Some(self.parse_from()?)
} else {
None
};
// Parse JOINs
let mut joins = self.parse_joins()?;
// Handle PIVOT/UNPIVOT that comes after JOINs (e.g., SELECT * FROM a JOIN b ON ... PIVOT(...))
// Store PIVOT/UNPIVOT in the last join's pivots field (this matches SQLGlot's semantics)
while self.check(TokenType::Pivot) || self.check(TokenType::Unpivot) {
if !joins.is_empty() {
let last_idx = joins.len() - 1;
// Parse the pivot/unpivot and store in the join's pivots vector
// We pass a Null expression as the `this` since the pivot applies to the entire join result
if self.match_token(TokenType::Pivot) {
let pivot = self.parse_pivot(Expression::Null(crate::expressions::Null))?;
joins[last_idx].pivots.push(pivot);
} else if self.match_token(TokenType::Unpivot) {
let unpivot = self.parse_unpivot(Expression::Null(crate::expressions::Null))?;
joins[last_idx].pivots.push(unpivot);
}
} else {
// No joins - break to avoid infinite loop
break;
}
}
// Parse LATERAL VIEW clauses (Hive/Spark)
let lateral_views = self.parse_lateral_views()?;
// Parse PREWHERE clause (ClickHouse specific)
let prewhere = if self.match_token(TokenType::Prewhere) {
Some(self.parse_expression()?)
} else {
None
};
// Parse WHERE clause
let mut where_clause = if self.match_token(TokenType::Where) {
Some(Where {
this: self.parse_expression()?,
})
} else {
None
};
// Parse CONNECT BY clause (Oracle hierarchical queries)
let connect = self.parse_connect()?;
// Parse GROUP BY
let group_by = if self.check(TokenType::Group) {
let group_comments = self.current_leading_comments().to_vec();
if self.match_keywords(&[TokenType::Group, TokenType::By]) {
let mut gb = self.parse_group_by()?;
gb.comments = group_comments;
Some(gb)
} else {
None
}
} else if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::With)
&& (self.check_next_identifier("TOTALS")
|| self.check_next(TokenType::Rollup)
|| self.check_next(TokenType::Cube))
{
// ClickHouse: WITH TOTALS/ROLLUP/CUBE without GROUP BY
self.skip(); // consume WITH
let totals = self.match_identifier("TOTALS");
let mut expressions = Vec::new();
if self.match_token(TokenType::Rollup) {
expressions.push(Expression::Rollup(Box::new(Rollup {
expressions: Vec::new(),
})));
} else if self.match_token(TokenType::Cube) {
expressions.push(Expression::Cube(Box::new(Cube {
expressions: Vec::new(),
})));
}
// Check for chained WITH TOTALS after WITH ROLLUP/CUBE
if !totals && self.check(TokenType::With) && self.check_next_identifier("TOTALS") {
self.skip();
self.skip();
}
Some(GroupBy {
expressions,
all: None,
totals,
comments: Vec::new(),
})
} else {
None
};
// Parse HAVING
let having = if self.check(TokenType::Having) {
let having_comments = self.current_leading_comments().to_vec();
self.skip(); // consume HAVING
Some(Having {
this: self.parse_expression()?,
comments: having_comments,
})
} else {
None
};
// Parse QUALIFY clause (Snowflake, BigQuery, DuckDB)
// QUALIFY can appear before or after WINDOW clause
let mut qualify = if self.match_token(TokenType::Qualify) {
Some(Qualify {
this: self.parse_expression()?,
})
} else {
None
};
// Parse WINDOW clause (named windows)
// Only match WINDOW if followed by identifier AS ( (a real window definition)
// Otherwise "window" may be a table alias (e.g., SELECT * FROM foo window)
let windows = if self.check(TokenType::Window) && {
let next_pos = self.current + 1;
next_pos < self.tokens.len()
&& (self.tokens[next_pos].token_type == TokenType::Var
|| self.tokens[next_pos].token_type == TokenType::Identifier)
} {
self.skip(); // consume WINDOW
Some(self.parse_named_windows()?)
} else {
None
};
// QUALIFY can also appear after WINDOW clause (DuckDB)
let qualify_after_window = if qualify.is_none() && self.match_token(TokenType::Qualify) {
qualify = Some(Qualify {
this: self.parse_expression()?,
});
true
} else {
false
};
// Parse DISTRIBUTE BY (Hive/Spark) - comes before SORT BY
let distribute_by = if self.match_keywords(&[TokenType::Distribute, TokenType::By]) {
Some(self.parse_distribute_by()?)
} else {
None
};
// Parse CLUSTER BY (Hive/Spark)
let cluster_by = if self.match_keywords(&[TokenType::Cluster, TokenType::By]) {
Some(self.parse_cluster_by()?)
} else {
None
};
// Parse SORT BY (Hive/Spark) - can come before ORDER BY
let sort_by = if self.match_keywords(&[TokenType::Sort, TokenType::By]) {
Some(self.parse_sort_by()?)
} else {
None
};
// Parse ORDER BY or ORDER SIBLINGS BY (Oracle) - comes after SORT BY
let order_by = if self.check(TokenType::Order) {
let order_comments = self.current_leading_comments().to_vec();
if self.match_keywords(&[TokenType::Order, TokenType::Siblings, TokenType::By]) {
// ORDER SIBLINGS BY (Oracle hierarchical queries)
let mut ob = self.parse_order_by_with_siblings(true)?;
ob.comments = order_comments;
Some(ob)
} else if self.match_keywords(&[TokenType::Order, TokenType::By]) {
let mut ob = self.parse_order_by()?;
ob.comments = order_comments;
Some(ob)
} else {
None
}
} else {
None
};
// Parse LIMIT (supports MySQL syntax: LIMIT offset, count)
// DuckDB supports: LIMIT 10 PERCENT or LIMIT 10%
// Capture trailing comments from the token before LIMIT (e.g., WHERE condition's last token)
// These comments should be emitted after the LIMIT value, not before LIMIT.
let pre_limit_comments = if self.check(TokenType::Limit) {
let mut comments = self.previous_trailing_comments().to_vec();
// Also capture leading comments on the LIMIT token (comments on a separate line before LIMIT)
comments.extend_from_slice(self.current_leading_comments());
comments
} else {
Vec::new()
};
let (limit, offset) = if self.match_token(TokenType::Limit) {
// Clear the pre-LIMIT comments from the WHERE condition expression to avoid duplication
if !pre_limit_comments.is_empty() {
if let Some(ref mut w) = where_clause {
Self::clear_rightmost_trailing_comments(&mut w.this);
}
}
// First try parse_unary to check for PERCENT/% modifier.
// This avoids parse_expression consuming % as the modulo operator.
// Both "PERCENT" and "%" tokens have TokenType::Percent, but we need to
// distinguish PERCENT-as-modifier from %-as-modulo. "%" is PERCENT when
// followed by a clause boundary (OFFSET, end, semicolon, etc.).
let saved_pos = self.current;
let (first_expr, has_percent) = {
let unary_result = self.parse_unary();
match unary_result {
Ok(expr) => {
if self.check(TokenType::Percent) && self.is_percent_modifier() {
// Found PERCENT keyword or % symbol used as PERCENT modifier
self.skip();
(expr, true)
} else {
// No PERCENT - backtrack and use full parse_expression
self.current = saved_pos;
let full_expr = self.parse_expression()?;
// Check again for PERCENT keyword (e.g., after complex expression)
let has_pct =
if self.check(TokenType::Percent) && self.is_percent_modifier() {
self.skip();
true
} else {
false
};
(full_expr, has_pct)
}
}
Err(_) => {
// Unary parsing failed - backtrack and use parse_expression
self.current = saved_pos;
let full_expr = self.parse_expression()?;
let has_pct =
if self.check(TokenType::Percent) && self.is_percent_modifier() {
self.skip();
true
} else {
false
};
(full_expr, has_pct)
}
}
};
// MySQL syntax: LIMIT offset, count
if self.match_token(TokenType::Comma) {
let second_expr = self.parse_expression()?;
// First expression is offset, second is count
(
Some(Limit {
this: second_expr,
percent: false,
comments: pre_limit_comments.clone(),
}),
Some(Offset {
this: first_expr,
rows: None,
}),
)
} else {
// Standard: LIMIT count [PERCENT]
(
Some(Limit {
this: first_expr,
percent: has_percent,
comments: pre_limit_comments,
}),
None,
)
}
} else {
(None, None)
};
// WITH TIES after LIMIT (ClickHouse, DuckDB)
if limit.is_some() {
let _ = self.match_keywords(&[TokenType::With, TokenType::Ties]);
}
// Parse OFFSET (if not already parsed from MySQL LIMIT syntax)
// Standard SQL syntax: OFFSET n [ROW|ROWS]
// Some dialects (Presto/Trino) support: OFFSET n LIMIT m
let (limit, offset) = if offset.is_none() && self.match_token(TokenType::Offset) {
let expr = self.parse_expression()?;
// Consume optional ROW or ROWS keyword and track it
let rows = if self.match_token(TokenType::Row) || self.match_token(TokenType::Rows) {
Some(true)
} else {
None
};
let offset = Some(Offset { this: expr, rows });
// Check for LIMIT after OFFSET (Presto/Trino syntax: OFFSET n LIMIT m)
let limit = if limit.is_none() && self.match_token(TokenType::Limit) {
let limit_expr = self.parse_expression()?;
Some(Limit {
this: limit_expr,
percent: false,
comments: Vec::new(),
})
} else {
limit
};
(limit, offset)
} else {
(limit, offset)
};
// ClickHouse: LIMIT ... BY expressions
let limit_by = if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && limit.is_some()
&& self.match_token(TokenType::By)
{
let expressions = self.parse_expression_list()?;
if expressions.is_empty() {
return Err(self.parse_error("Expected expression after LIMIT BY"));
}
Some(expressions)
} else {
None
};
// ClickHouse: second LIMIT after LIMIT BY (LIMIT n BY expr LIMIT m)
// Also supports LIMIT offset, count syntax
let (limit, offset) = if limit_by.is_some() && self.match_token(TokenType::Limit) {
let first_expr = self.parse_expression()?;
if self.match_token(TokenType::Comma) {
// LIMIT offset, count
let count_expr = self.parse_expression()?;
(
Some(Limit {
this: count_expr,
percent: false,
comments: Vec::new(),
}),
Some(Offset {
this: first_expr,
rows: None,
}),
)
} else {
(
Some(Limit {
this: first_expr,
percent: false,
comments: Vec::new(),
}),
offset,
)
}
} else {
(limit, offset)
};
// Parse FETCH FIRST/NEXT clause
let fetch = if self.match_token(TokenType::Fetch) {
Some(self.parse_fetch()?)
} else {
None
};
// Parse SAMPLE / TABLESAMPLE clause
let sample = self.parse_sample_clause()?;
// Parse FOR UPDATE/SHARE locks or FOR XML/JSON (T-SQL)
let (locks, for_xml, for_json) = self.parse_locks_and_for_xml()?;
let option = self.parse_select_option_clause();
let (settings, format) = self.parse_clickhouse_select_settings_and_format()?;
let SelectBodyHead {
leading_comments,
post_select_comments,
hint,
top,
distinct,
distinct_on,
kind,
operation_modifiers,
expressions,
exclude,
into,
} = *head;
let select = Select {
expressions,
from,
joins,
lateral_views,
prewhere,
where_clause,
group_by,
having,
qualify,
order_by,
distribute_by,
cluster_by,
sort_by,
limit,
offset,
limit_by,
fetch,
distinct,
distinct_on,
top,
with: None,
sample,
settings,
format,
windows,
hint,
connect,
into,
locks,
for_xml,
for_json,
leading_comments,
post_select_comments,
kind,
operation_modifiers,
qualify_after_window,
option,
exclude,
};
Ok(Expression::Select(Box::new(select)))
}
fn parse_select_option_clause(&mut self) -> Option<String> {
if !(self.check_identifier("OPTION") && self.check_next(TokenType::LParen)) {
return None;
}
self.skip(); // consume OPTION
self.skip(); // consume (
let mut content = String::from("OPTION(");
let mut depth = 1;
while !self.is_at_end() && depth > 0 {
let tok = self.advance();
if tok.token_type == TokenType::LParen {
depth += 1;
} else if tok.token_type == TokenType::RParen {
depth -= 1;
}
if depth > 0 {
if tok.token_type == TokenType::String {
if content.len() > 7 && !content.ends_with('(') && !content.ends_with(' ') {
content.push(' ');
}
content.push('\'');
content.push_str(&tok.text.replace('\'', "''"));
content.push('\'');
} else if tok.token_type == TokenType::Eq {
content.push_str(" = ");
} else if tok.token_type == TokenType::Comma {
content.push_str(", ");
} else {
if content.len() > 7 && !content.ends_with('(') && !content.ends_with(' ') {
content.push(' ');
}
content.push_str(&tok.text);
}
}
}
content.push(')');
Some(content)
}
fn parse_clickhouse_select_settings_and_format(
&mut self,
) -> Result<(Option<Vec<Expression>>, Option<Expression>)> {
if !matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
return Ok((None, None));
}
let mut settings: Option<Vec<Expression>> = None;
let mut format: Option<Expression> = None;
loop {
if settings.is_none() && self.match_token(TokenType::Settings) {
let mut settings_exprs = Vec::new();
loop {
settings_exprs.push(self.parse_expression()?);
if !self.match_token(TokenType::Comma) {
break;
}
}
settings = Some(settings_exprs);
continue;
}
if format.is_none() && self.match_token(TokenType::Format) {
let ident = if self.check(TokenType::Null) {
let text = self.advance().text;
Identifier::new(text)
} else {
self.expect_identifier_or_keyword_with_quoted()?
};
format = Some(Expression::Identifier(ident));
if !self.is_at_end()
&& !self.check(TokenType::Semicolon)
&& !self.check(TokenType::Settings)
{
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
self.skip();
}
}
continue;
}
break;
}
Ok((settings, format))
}
/// Parse a WITH clause (CTEs)
fn parse_with(&mut self) -> Result<Expression> {
use crate::dialects::DialectType;
let with_token = self.expect(TokenType::With)?;
let leading_comments = with_token.comments;
let recursive = self.match_token(TokenType::Recursive);
let mut ctes = Vec::new();
loop {
// ClickHouse supports expression-first WITH items:
// WITH <expr> AS <alias> SELECT ...
if matches!(self.config.dialect, Some(DialectType::ClickHouse)) {
let saved_pos = self.current;
if let Ok(expr) = self.parse_expression() {
// Check if parse_expression already consumed the AS alias
// (e.g., `(1, 2) AS a` gets parsed as Alias(Tuple, "a") by the tuple alias handler)
let (inner_expr, alias_opt) = if let Expression::Alias(ref alias_box) = expr {
(alias_box.this.clone(), Some(alias_box.alias.clone()))
} else {
(expr, None)
};
if let Some(alias) = alias_opt {
// Expression already had AS alias consumed
ctes.push(Cte {
alias,
this: inner_expr,
columns: Vec::new(),
materialized: None,
key_expressions: Vec::new(),
alias_first: false,
comments: Vec::new(),
});
if self.match_token(TokenType::Comma) {
continue;
}
break;
} else if self.match_token(TokenType::As)
&& self.is_identifier_or_keyword_token()
{
// Require AS <alias> to disambiguate from standard CTE syntax
let alias = self.expect_identifier_or_keyword_with_quoted()?;
ctes.push(Cte {
alias,
this: inner_expr,
columns: Vec::new(),
materialized: None,
key_expressions: Vec::new(),
alias_first: false,
comments: Vec::new(),
});
if self.match_token(TokenType::Comma) {
continue;
}
break;
} else if self.check(TokenType::Select) || self.check(TokenType::Comma) {
// ClickHouse: WITH expr SELECT ... (unaliased expression in CTE)
ctes.push(Cte {
alias: Identifier::new(format!("{}", inner_expr)),
this: inner_expr,
columns: Vec::new(),
materialized: None,
key_expressions: Vec::new(),
alias_first: false,
comments: Vec::new(),
});
if self.match_token(TokenType::Comma) {
continue;
}
break;
}
}
// Fall back to standard CTE parsing
self.current = saved_pos;
}
// CTE names can be keywords like 'view', 'use', 'all', etc.
let name = self.expect_identifier_or_alias_keyword_with_quoted()?;
// Optional column list
// But first check for Snowflake-style CTE: WITH t (SELECT ...) - no AS keyword
// In that case, LParen is followed by SELECT, not column names
let columns = if self.check(TokenType::LParen) && !self.check_next(TokenType::Select) {
self.skip(); // consume LParen
let cols = self.parse_identifier_list()?;
self.expect(TokenType::RParen)?;
cols
} else {
Vec::new()
};
// Optional USING KEY (columns) for DuckDB recursive CTEs
let key_expressions = if self.match_keywords(&[TokenType::Using, TokenType::Key]) {
self.expect(TokenType::LParen)?;
let keys = self.parse_identifier_list()?;
self.expect(TokenType::RParen)?;
keys
} else {
Vec::new()
};
// ClickHouse: keyword -> body AS alias (single-param lambda where param is a keyword)
// e.g., WITH time -> sin(time * 2 * pi()) AS sine_wave
if matches!(self.config.dialect, Some(DialectType::ClickHouse))
&& self.check(TokenType::Arrow)
{
self.skip(); // consume ->
let body = self.parse_expression()?;
let lambda = Expression::Lambda(Box::new(LambdaExpr {
parameters: vec![name.clone()],
body,
colon: false,
parameter_types: Vec::new(),
}));
// Expect AS alias
if self.match_token(TokenType::As) && self.is_identifier_or_keyword_token() {
let alias = self.expect_identifier_or_keyword_with_quoted()?;
ctes.push(Cte {
alias,
this: lambda,
columns: Vec::new(),
materialized: None,
key_expressions: Vec::new(),
alias_first: false,
comments: Vec::new(),
});
} else {
// Unaliased lambda CTE
ctes.push(Cte {
alias: name,
this: lambda,
columns: Vec::new(),
materialized: None,
key_expressions: Vec::new(),
alias_first: false,
comments: Vec::new(),
});
}
if self.match_token(TokenType::Comma) {
continue;
}
break;
}
// AS is optional (Snowflake allows WITH t (SELECT ...) without AS)
let cte_comments = if self.match_token(TokenType::As) {
// Capture trailing comments from the AS token
// e.g., "WITH a AS /* comment */ (...)" -> comment goes after alias
self.previous_trailing_comments().to_vec()
} else {
Vec::new()
};
// Check for MATERIALIZED or NOT MATERIALIZED
let materialized = if self.match_token(TokenType::Materialized) {
Some(true)
} else if self.match_token(TokenType::Not) {
self.expect(TokenType::Materialized)?;
Some(false)
} else {
None
};
self.expect(TokenType::LParen)?;
let query = self.parse_statement()?;
self.expect(TokenType::RParen)?;
ctes.push(Cte {
alias: name,
this: query,
columns,
materialized,
key_expressions,
alias_first: true,
comments: cte_comments,
});
if !self.match_token(TokenType::Comma) {
// Check for WITH merging: WITH a AS (...) WITH b AS (...) -> merged
// If the next token is WITH (not followed by nothing), continue parsing CTEs
if self.check(TokenType::With) {
self.skip(); // consume the redundant WITH keyword
// Check if this WITH is also RECURSIVE
if self.match_token(TokenType::Recursive) && !recursive {
// If second WITH is RECURSIVE but first wasn't, ignore (keep non-recursive)
}
continue; // continue the loop to parse more CTEs
}
break;
}
// WI-14f: Skip redundant WITH keyword after comma in CTE list
// e.g., WITH a AS (SELECT 1), WITH b AS (SELECT 2) SELECT *
self.match_token(TokenType::With);
}
// Parse optional SEARCH/CYCLE clause for recursive CTEs (PostgreSQL)
// Syntax: SEARCH BREADTH|DEPTH FIRST BY column SET column [USING column]
// or: CYCLE column SET column USING column
let search = self.parse_recursive_with_search()?;
// Parse the main query
let mut main_query = self.parse_statement()?;
// Unwrap parenthesized wrappers to find the inner SELECT
// (matching Python sqlglot: while isinstance(this, Subquery) and this.is_wrapper)
loop {
match main_query {
Expression::Paren(paren) => {
main_query = paren.this;
}
Expression::Subquery(ref sub)
if sub.alias.is_none()
&& sub.order_by.is_none()
&& sub.limit.is_none()
&& sub.offset.is_none() =>
{
// Unwrap Subquery wrapper (parenthesized query without modifiers)
if let Expression::Subquery(sub) = main_query {
main_query = sub.this;
} else {
break;
}
}
_ => break,
}
}
// Attach WITH to the main query
let with_clause = With {
ctes,
recursive,
leading_comments,
search,
};
match &mut main_query {
Expression::Select(ref mut select) => {
select.with = Some(with_clause);
}
Expression::Union(ref mut union) => {
union.with = Some(with_clause);
}
Expression::Intersect(ref mut intersect) => {
intersect.with = Some(with_clause);
}
Expression::Except(ref mut except) => {
except.with = Some(with_clause);
}
Expression::Update(ref mut update) => {
update.with = Some(with_clause);
}
Expression::Insert(ref mut insert) => {
insert.with = Some(with_clause);
}
Expression::Delete(ref mut delete) => {
delete.with = Some(with_clause);
}
Expression::CreateTable(ref mut ct) => {
ct.with_cte = Some(with_clause);
}
Expression::Pivot(ref mut pivot) => {
pivot.with = Some(with_clause);
}
Expression::Merge(ref mut merge) => {
merge.with_ = Some(Box::new(Expression::With(Box::new(with_clause))));
}
_ => {}
}
Ok(main_query)
}
/// Parse SELECT expressions
fn parse_select_expressions(&mut self) -> Result<Vec<Expression>> {
let mut expressions = Vec::new();
loop {
// Check if we're at end of select list (empty list case for TSQL TOP)
// This allows queries like "SELECT TOP 10 PERCENT" with no columns
// Also check for Oracle BULK COLLECT INTO sequence
// ClickHouse: minus() is tokenized as Except but should be treated as function
let is_ch_keyword_func = matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && (self.check(TokenType::Except)
|| self.check(TokenType::Intersect))
&& self.check_next(TokenType::LParen);
// ClickHouse: `from`/`except` can be column names when followed by an operator
// (e.g., `from + from`, `from in [0]`, `from, ...`)
// Also: `from FROM t` — two consecutive FROM tokens means first is column name
let is_ch_keyword_as_column = matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && (self.check(TokenType::From)
|| self.check(TokenType::Except))
&& {
let next_tt = self
.peek_nth(1)
.map(|t| t.token_type)
.unwrap_or(TokenType::Semicolon);
matches!(
next_tt,
TokenType::Plus | TokenType::Dash | TokenType::Star | TokenType::Slash
| TokenType::Percent | TokenType::Eq | TokenType::Neq | TokenType::Lt
| TokenType::Gt | TokenType::Lte | TokenType::Gte
| TokenType::And | TokenType::Or | TokenType::Comma | TokenType::Dot
| TokenType::In | TokenType::Is | TokenType::Not | TokenType::Like
| TokenType::Between | TokenType::Semicolon | TokenType::RParen
| TokenType::As | TokenType::DPipe | TokenType::Amp | TokenType::Pipe
| TokenType::LBracket
// Two consecutive FROM tokens: first is column name (e.g., SELECT from FROM t)
| TokenType::From
)
};
if !is_ch_keyword_func
&& !is_ch_keyword_as_column
&& (self.is_at_end()
|| self.check(TokenType::From)
|| self.check(TokenType::Where)
|| self.check(TokenType::Into)
|| self.check(TokenType::Union)
|| self.check(TokenType::Intersect)
|| self.check(TokenType::Except)
|| self.check(TokenType::Order)
|| self.check(TokenType::Limit)
|| self.check(TokenType::Semicolon)
|| self.check_text_seq(&["BULK", "COLLECT", "INTO"]))
{
break;
}
// Handle star
if self.check(TokenType::Star) {
self.skip();
let star_trailing_comments = self.previous_trailing_comments().to_vec();
let star = self.parse_star_modifiers_with_comments(None, star_trailing_comments)?;
let mut star_expr = Expression::Star(star);
// ClickHouse: * APPLY(func) or * APPLY func or * APPLY(x -> expr) column transformer
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
while self.check(TokenType::Apply) {
self.skip(); // consume APPLY
let apply_expr = if self.match_token(TokenType::LParen) {
// Could be APPLY(func_name) or APPLY(x -> expr)
let expr = self.parse_expression()?;
self.expect(TokenType::RParen)?;
expr
} else {
// APPLY func or APPLY x -> expr (no parens)
// Parse as expression to handle lambdas
self.parse_expression()?
};
star_expr = Expression::Apply(Box::new(crate::expressions::Apply {
this: Box::new(star_expr),
expression: Box::new(apply_expr),
}));
}
}
// ClickHouse: Also handle EXCEPT/REPLACE between APPLYs:
// * APPLY(toDate) EXCEPT(i, j) APPLY(any)
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && (self.check(TokenType::Except)
|| self.check(TokenType::Exclude)
|| self.check(TokenType::Replace))
{
// Consume EXCEPT/REPLACE modifiers after APPLY
self.parse_star_modifiers(None)?;
// Continue with more APPLYs
while self.check(TokenType::Apply) {
self.skip();
let apply_expr = if self.match_token(TokenType::LParen) {
let expr = self.parse_expression()?;
self.expect(TokenType::RParen)?;
expr
} else {
self.parse_expression()?
};
star_expr = Expression::Apply(Box::new(crate::expressions::Apply {
this: Box::new(star_expr),
expression: Box::new(apply_expr),
}));
}
}
// ClickHouse: * followed by operators (e.g., * IS NOT NULL, * AND expr)
// Treat * as a regular expression and continue parsing operators
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && matches!(
self.peek().token_type,
TokenType::Is
| TokenType::And
| TokenType::Or
| TokenType::Eq
| TokenType::Neq
| TokenType::Lt
| TokenType::Gt
| TokenType::Lte
| TokenType::Gte
| TokenType::Not
| TokenType::Plus
| TokenType::Dash
| TokenType::Slash
| TokenType::Percent
| TokenType::Like
| TokenType::Between
| TokenType::In
) {
// Re-parse from the operator with star_expr as the left side
let left = star_expr;
// Use parse_comparison / parse_is chain
if self.check(TokenType::Is) {
self.skip(); // consume IS
let not = self.match_token(TokenType::Not);
if self.match_token(TokenType::Null) {
star_expr = if not {
Expression::Not(Box::new(UnaryOp {
this: Expression::Is(Box::new(BinaryOp::new(
left,
Expression::Null(Null),
))),
inferred_type: None,
}))
} else {
Expression::Is(Box::new(BinaryOp::new(
left,
Expression::Null(Null),
)))
};
} else {
let right = self.parse_or()?;
star_expr = if not {
Expression::Not(Box::new(UnaryOp {
this: Expression::Is(Box::new(BinaryOp::new(left, right))),
inferred_type: None,
}))
} else {
Expression::Is(Box::new(BinaryOp::new(left, right)))
};
}
} else if self.match_token(TokenType::And) {
let right = self.parse_or()?;
star_expr = Expression::And(Box::new(BinaryOp::new(left, right)));
} else if self.match_token(TokenType::Or) {
let right = self.parse_or()?;
star_expr = Expression::Or(Box::new(BinaryOp::new(left, right)));
} else {
let op_token = self.advance();
let right = self.parse_or()?;
star_expr = match op_token.token_type {
TokenType::Eq => Expression::Eq(Box::new(BinaryOp::new(left, right))),
TokenType::Neq => Expression::Neq(Box::new(BinaryOp::new(left, right))),
TokenType::Lt => Expression::Lt(Box::new(BinaryOp::new(left, right))),
TokenType::Gt => Expression::Gt(Box::new(BinaryOp::new(left, right))),
TokenType::Lte => Expression::Lte(Box::new(BinaryOp::new(left, right))),
TokenType::Gte => Expression::Gte(Box::new(BinaryOp::new(left, right))),
TokenType::Plus => {
Expression::Add(Box::new(BinaryOp::new(left, right)))
}
TokenType::Dash => {
Expression::Sub(Box::new(BinaryOp::new(left, right)))
}
_ => left, // fallback
};
}
}
expressions.push(star_expr);
} else {
// Capture leading comments from the first token before parsing
// These are comments on a separate line before the expression
let leading_comments = self.current_leading_comments().to_vec();
let expr = self.parse_expression()?;
// ClickHouse: COLUMNS(id, value) EXCEPT (id) REPLACE (5 AS id) APPLY func
// Also: a.* APPLY(toDate) EXCEPT(i, j) APPLY(any) - qualified star with APPLY
let expr = if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
let is_columns_func = match &expr {
Expression::Function(f) => f.name.eq_ignore_ascii_case("COLUMNS"),
Expression::MethodCall(m) => m.method.name.eq_ignore_ascii_case("COLUMNS"),
Expression::Columns(_) => true,
_ => false,
};
let is_qualified_star = matches!(&expr, Expression::Star(_));
if (is_columns_func || is_qualified_star)
&& (self.check(TokenType::Except)
|| self.check(TokenType::Exclude)
|| self.check(TokenType::Replace)
|| self.check(TokenType::Apply))
{
let mut result = expr;
// Parse any mix of EXCEPT/REPLACE/APPLY in any order
// e.g., * APPLY(toDate) EXCEPT(i, j) APPLY(any)
loop {
if self.check(TokenType::Except) || self.check(TokenType::Exclude) {
// Parse EXCEPT/EXCLUDE modifier
self.skip();
self.match_identifier("STRICT");
if self.match_token(TokenType::LParen) {
loop {
if self.check(TokenType::RParen) {
break;
}
let _ = self.parse_expression()?;
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
} else if self.is_identifier_token()
|| self.is_safe_keyword_as_identifier()
{
let _ = self.parse_expression()?;
}
} else if self.check(TokenType::Replace) {
// Parse REPLACE modifier: REPLACE (expr AS alias, ...)
self.skip();
self.match_identifier("STRICT");
if self.match_token(TokenType::LParen) {
loop {
if self.check(TokenType::RParen) {
break;
}
let _ = self.parse_expression()?;
if self.match_token(TokenType::As) {
if self.is_identifier_token()
|| self.is_safe_keyword_as_identifier()
{
self.skip();
}
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
} else {
let _ = self.parse_expression()?;
if self.match_token(TokenType::As) {
if self.is_identifier_token()
|| self.is_safe_keyword_as_identifier()
{
self.skip();
}
}
}
} else if self.check(TokenType::Apply) {
// Parse APPLY transformer
self.skip();
let apply_expr = if self.match_token(TokenType::LParen) {
let e = self.parse_expression()?;
self.expect(TokenType::RParen)?;
e
} else {
self.parse_expression()?
};
result = Expression::Apply(Box::new(crate::expressions::Apply {
this: Box::new(result),
expression: Box::new(apply_expr),
}));
} else {
break;
}
}
result
} else {
expr
}
} else {
expr
};
// Capture comments between expression and potential AS
let pre_alias_comments = self.previous_trailing_comments().to_vec();
// DuckDB prefix alias syntax: identifier: expression (e.g., "foo: 1" means "1 AS foo")
// Check if the expression is a simple identifier followed by a colon
let expr = if self.check(TokenType::Colon) && !self.check_next(TokenType::Colon) {
// Extract the alias name from the identifier expression
let alias_ident = match &expr {
Expression::Identifier(id) => Some(id.clone()),
Expression::Column(col) if col.table.is_none() => Some(col.name.clone()),
_ => None,
};
if let Some(alias) = alias_ident {
// Consume the colon
self.skip();
let colon_comments = self.previous_trailing_comments().to_vec();
// Parse the actual value expression
let value = self.parse_expression()?;
let value_trailing = self.previous_trailing_comments().to_vec();
// For colon-alias (foo: expr), comments between alias and colon should
// become trailing comments (placed after the alias in output).
// Comments after the value expression are also trailing.
let mut all_trailing = pre_alias_comments.clone();
all_trailing.extend(colon_comments);
all_trailing.extend(value_trailing);
Expression::Alias(Box::new(Alias {
this: value,
alias,
column_aliases: Vec::new(),
pre_alias_comments: Vec::new(),
trailing_comments: all_trailing,
inferred_type: None,
}))
} else {
// Not a simple identifier, fall through to normal alias handling
// (this handles cases where the expression is complex before the colon)
expr
}
} else if self.match_token(TokenType::As) {
// Capture comments from AS token (e.g., AS /* foo */ (a, b, c))
// These go into trailing_comments (after the alias), not pre_alias_comments
let as_comments = self.previous_trailing_comments().to_vec();
// Check for column aliases: AS (col1, col2) - used by POSEXPLODE etc.
if self.match_token(TokenType::LParen) {
let mut column_aliases = Vec::new();
loop {
if let Some(col_expr) = self.parse_id_var()? {
if let Expression::Identifier(id) = col_expr {
column_aliases.push(id);
}
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.match_token(TokenType::RParen);
let mut trailing_comments = as_comments;
trailing_comments.extend_from_slice(self.previous_trailing_comments());
Expression::Alias(Box::new(Alias {
this: expr,
alias: Identifier::new(String::new()),
column_aliases,
pre_alias_comments,
trailing_comments,
inferred_type: None,
}))
} else {
// Allow keywords as aliases (e.g., SELECT 1 AS filter)
// Use _with_quoted to preserve quoted alias
let alias = self.expect_identifier_or_keyword_with_quoted()?;
let mut trailing_comments = self.previous_trailing_comments().to_vec();
// If parse_comparison stored pending leading comments (no comparison
// followed), use those. Otherwise use the leading_comments we captured
// before parse_expression(). Both come from the same token, so we
// only add one set to avoid duplication.
if !self.pending_leading_comments.is_empty() {
trailing_comments.extend(self.pending_leading_comments.drain(..));
} else {
trailing_comments.extend(leading_comments.iter().cloned());
}
Expression::Alias(Box::new(Alias {
this: expr,
alias,
column_aliases: Vec::new(),
pre_alias_comments,
trailing_comments,
inferred_type: None,
}))
}
} else if ((self.check(TokenType::Var) && !self.check_keyword()) || self.check(TokenType::QuotedIdentifier) || self.can_be_alias_keyword() || self.is_command_keyword_as_alias() || self.check(TokenType::Overlaps)
// ClickHouse: APPLY without ( is an implicit alias (e.g., SELECT col apply)
|| (self.check(TokenType::Apply) && !self.check_next(TokenType::LParen)
&& matches!(self.config.dialect, Some(crate::dialects::DialectType::ClickHouse))))
&& !self.check_text_seq(&["BULK", "COLLECT", "INTO"])
// ClickHouse clauses must not be consumed as implicit aliases.
&& !(matches!(self.config.dialect, Some(crate::dialects::DialectType::ClickHouse))
&& (self.check(TokenType::Format) || self.check(TokenType::Settings)))
// LIMIT/OFFSET/FETCH are clause starters in most dialects and must not
// be consumed as implicit aliases in SELECT lists.
&& !(
self.check(TokenType::Fetch)
|| ((self.check(TokenType::Limit) || self.check(TokenType::Offset))
&& !matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Spark)
| Some(crate::dialects::DialectType::Hive)
))
)
// GROUP BY / ORDER BY are clause boundaries, not aliases.
&& !self.check_text_seq(&["GROUP", "BY"])
&& !self.check_text_seq(&["ORDER", "BY"])
// WINDOW is a clause boundary (named window definitions), not an alias.
&& !self.check(TokenType::Window)
// ClickHouse: PARALLEL WITH is a statement separator, not an alias.
&& !(self.check_identifier("PARALLEL") && self.check_next(TokenType::With)
&& matches!(self.config.dialect, Some(crate::dialects::DialectType::ClickHouse)))
{
// Implicit alias (without AS) - allow Var tokens, QuotedIdentifiers, command keywords (like GET, PUT, etc.), and OVERLAPS
// But NOT when it's the Oracle BULK COLLECT INTO sequence
let alias_token = self.advance();
let alias_text = alias_token.text.clone();
let is_quoted = alias_token.token_type == TokenType::QuotedIdentifier;
let trailing_comments = self.previous_trailing_comments().to_vec();
Expression::Alias(Box::new(Alias {
this: expr,
alias: Identifier {
name: alias_text,
quoted: is_quoted,
trailing_comments: Vec::new(),
span: None,
},
column_aliases: Vec::new(),
pre_alias_comments,
trailing_comments,
inferred_type: None,
}))
} else if !pre_alias_comments.is_empty() {
// Only wrap in Annotated if the expression doesn't already handle trailing comments.
// BinaryOp, Column, Cast, Function, etc. have their own trailing_comments field that the generator uses.
let already_has_trailing = matches!(
&expr,
Expression::Add(_)
| Expression::Sub(_)
| Expression::Mul(_)
| Expression::Div(_)
| Expression::Mod(_)
| Expression::Concat(_)
| Expression::BitwiseAnd(_)
| Expression::BitwiseOr(_)
| Expression::BitwiseXor(_)
| Expression::Column(_)
| Expression::Paren(_)
| Expression::Annotated(_)
| Expression::Cast(_)
| Expression::Function(_)
| Expression::Subquery(_)
);
if already_has_trailing {
expr
} else {
// Wrap in Annotated to preserve trailing comments
Expression::Annotated(Box::new(Annotated {
this: expr,
trailing_comments: pre_alias_comments,
}))
}
} else if !leading_comments.is_empty() {
// Wrap in Annotated to preserve leading comments as trailing comments
Expression::Annotated(Box::new(Annotated {
this: expr,
trailing_comments: leading_comments,
}))
} else {
expr
};
expressions.push(expr);
}
if !self.match_token(TokenType::Comma) {
break;
}
// Handle trailing comma (ClickHouse supports trailing commas in SELECT)
// ClickHouse: `from` after comma is a column name if followed by an operator
// (e.g., `from + from` or `from in [0]`), comma, or line-end
let from_is_column = matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::From)
&& {
let next_tt = self
.peek_nth(1)
.map(|t| t.token_type)
.unwrap_or(TokenType::Semicolon);
matches!(
next_tt,
TokenType::Plus
| TokenType::Dash
| TokenType::Star
| TokenType::Slash
| TokenType::Percent
| TokenType::Eq
| TokenType::Neq
| TokenType::Lt
| TokenType::Gt
| TokenType::Lte
| TokenType::Gte
| TokenType::And
| TokenType::Or
| TokenType::Comma
| TokenType::Dot
| TokenType::In
| TokenType::Is
| TokenType::Not
| TokenType::Like
| TokenType::Between
| TokenType::Semicolon
| TokenType::RParen
| TokenType::As
| TokenType::DPipe
| TokenType::Amp
| TokenType::Pipe
| TokenType::LBracket
)
};
if (self.config.allow_trailing_commas
|| matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
))
&& (!from_is_column && self.check_from_keyword()
|| self.check(TokenType::Where)
|| self.check(TokenType::GroupBy)
|| self.check(TokenType::Having)
|| self.check(TokenType::Order)
|| self.check(TokenType::Limit)
|| self.check(TokenType::Union)
|| self.check(TokenType::Intersect)
|| (self.check(TokenType::Except) && !self.check_next(TokenType::LParen) && !self.check_next(TokenType::Comma))
|| self.check(TokenType::Semicolon)
|| self.check(TokenType::RParen)
// SETTINGS/FORMAT only as boundaries when NOT followed by ( or [ (function/column ref)
|| (self.check(TokenType::Settings) && !self.check_next(TokenType::LParen) && !self.check_next(TokenType::LBracket))
|| (self.check(TokenType::Format) && !self.check_next(TokenType::LParen))
|| self.is_at_end())
{
break;
}
}
Ok(expressions)
}
/// Parse DuckDB FROM-first query syntax
/// FROM tbl = SELECT * FROM tbl
/// FROM tbl SELECT col1, col2 = SELECT col1, col2 FROM tbl
fn parse_from_first_query(&mut self) -> Result<Expression> {
self.expect(TokenType::From)?;
// Parse the FROM clause (table references)
let from = self.parse_from()?;
// Check if there's an explicit SELECT clause after FROM
let expressions = if self.check(TokenType::Select) {
self.skip(); // consume SELECT
self.parse_select_expressions()?
} else {
// No explicit SELECT means SELECT *
vec![Expression::Star(crate::expressions::Star {
table: None,
except: None,
replace: None,
rename: None,
trailing_comments: Vec::new(),
span: None,
})]
};
// Parse PREWHERE clause (ClickHouse specific)
let prewhere = if self.match_token(TokenType::Prewhere) {
Some(self.parse_expression()?)
} else {
None
};
// Parse WHERE clause
let where_clause = if self.match_token(TokenType::Where) {
Some(Where {
this: self.parse_expression()?,
})
} else {
None
};
// Parse GROUP BY
let group_by = if self.match_token(TokenType::Group) {
self.expect(TokenType::By)?;
let mut groups = Vec::new();
loop {
groups.push(self.parse_expression()?);
if !self.match_token(TokenType::Comma) {
break;
}
}
Some(GroupBy {
expressions: groups,
all: None,
totals: false,
comments: Vec::new(),
})
} else {
None
};
// Parse HAVING
let having = if self.match_token(TokenType::Having) {
Some(Having {
this: self.parse_expression()?,
comments: Vec::new(),
})
} else {
None
};
// Parse ORDER BY
let order_by = if self.match_token(TokenType::Order) {
self.expect(TokenType::By)?;
Some(self.parse_order_by()?)
} else {
None
};
// Parse LIMIT
let limit = if self.match_token(TokenType::Limit) {
let first_expr = self.parse_expression()?;
Some(Limit {
this: first_expr,
percent: false,
comments: Vec::new(),
})
} else {
None
};
// Parse OFFSET
let offset = if self.match_token(TokenType::Offset) {
let expr = self.parse_expression()?;
let rows = if self.match_token(TokenType::Row) || self.match_token(TokenType::Rows) {
Some(true)
} else {
None
};
Some(Offset { this: expr, rows })
} else {
None
};
// Build SELECT expression
let select = Select {
expressions,
from: Some(from),
joins: Vec::new(),
lateral_views: Vec::new(),
prewhere,
where_clause,
group_by,
having,
qualify: None,
order_by,
distribute_by: None,
cluster_by: None,
sort_by: None,
limit,
offset,
limit_by: None,
fetch: None,
distinct: false,
distinct_on: None,
top: None,
with: None,
sample: None,
settings: None,
format: None,
windows: None,
hint: None,
connect: None,
into: None,
locks: Vec::new(),
for_xml: Vec::new(),
for_json: Vec::new(),
leading_comments: Vec::new(),
post_select_comments: Vec::new(),
kind: None,
operation_modifiers: Vec::new(),
qualify_after_window: false,
option: None,
exclude: None,
};
// Check for set operations (UNION, INTERSECT, EXCEPT)
let result = Expression::Select(Box::new(select));
self.parse_set_operation(result)
}
/// Parse FROM clause
fn parse_from(&mut self) -> Result<From> {
let mut expressions = Vec::new();
loop {
// Capture leading comments before each table expression
// (e.g., FROM \n/* comment */\n table_name)
let pre_table_comments = if !self.is_at_end() {
self.tokens[self.current].comments.clone()
} else {
Vec::new()
};
// Clear them from the token to avoid double output
if !pre_table_comments.is_empty() && !self.is_at_end() {
self.tokens[self.current].comments.clear();
}
let mut table = self.parse_table_expression()?;
// Attach captured comments as trailing on the outermost expression
if !pre_table_comments.is_empty() {
match &mut table {
Expression::Pivot(p) => {
// For PIVOT, find the inner table and add to its leading_comments
// The generator will output these after the PIVOT clause
if let Expression::Table(ref mut t) = p.this {
t.leading_comments = pre_table_comments;
}
}
Expression::Table(ref mut t) => {
t.trailing_comments.extend(pre_table_comments);
}
_ => {}
}
}
expressions.push(table);
if !self.match_token(TokenType::Comma) {
break;
}
// Handle trailing comma in FROM clause (Snowflake allows this)
// If next token is a clause boundary keyword or end of input, break
// Note: For Redshift, UNPIVOT after comma is a table expression (SUPER object traversal),
// so we don't treat it as a boundary in that case
let is_redshift = matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Redshift)
);
let is_unpivot_boundary = !is_redshift && self.check(TokenType::Unpivot);
if self.is_at_end()
|| is_unpivot_boundary
|| matches!(
self.peek().token_type,
TokenType::Where
| TokenType::GroupBy
| TokenType::Having
| TokenType::Order
| TokenType::Limit
| TokenType::Offset
| TokenType::Union
| TokenType::Intersect
| TokenType::Except
| TokenType::Semicolon
| TokenType::RParen
| TokenType::Window
| TokenType::Qualify
| TokenType::Distribute
| TokenType::Cluster
| TokenType::Pivot
)
{
break;
}
}
Ok(From { expressions })
}
/// Parse a table expression (table name, subquery, etc.)
fn parse_table_expression(&mut self) -> Result<Expression> {
// Handle PostgreSQL ONLY modifier: FROM ONLY t1
// ONLY prevents scanning child tables in inheritance hierarchy
let has_only = self.match_token(TokenType::Only);
// Handle PostgreSQL ROWS FROM syntax:
// ROWS FROM (func1(args) AS alias1(col1 type1), func2(args) AS alias2(col2 type2)) [WITH ORDINALITY] [AS alias(cols)]
if self.match_text_seq(&["ROWS", "FROM"]) {
return self.parse_rows_from();
}
// Redshift UNPIVOT in FROM clause for SUPER object traversal:
// UNPIVOT expr [AS val_alias AT attr_alias]
// Examples:
// UNPIVOT c.c_orders[0]
// UNPIVOT c.c_orders AS val AT attr
if self.match_token(TokenType::Unpivot) {
return self.parse_redshift_unpivot_table();
}
let expr = if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::LParen)
&& (self.check_next(TokenType::Select)
|| self.check_next(TokenType::With)
|| self.check_next(TokenType::From))
{
// ClickHouse's parser corpus contains very deep FROM (SELECT ...) nesting.
// Keep the recursive path narrow and let the shared alias / modifier logic
// below handle everything after the closing `)`.
self.expect(TokenType::LParen)?;
let query = self.parse_statement()?;
self.expect(TokenType::RParen)?;
Expression::Subquery(Box::new(Subquery {
this: query,
alias: None,
column_aliases: Vec::new(),
order_by: None,
limit: None,
offset: None,
distribute_by: None,
sort_by: None,
cluster_by: None,
lateral: false,
modifiers_inside: false,
trailing_comments: self.previous_trailing_comments().to_vec(),
inferred_type: None,
}))
} else {
self.parse_table_expression_primary()?
};
self.parse_table_expression_tail(expr, has_only)
}
#[inline(never)]
fn parse_table_expression_primary(&mut self) -> Result<Expression> {
let expr = if self.check(TokenType::Values) && self.check_next(TokenType::LParen) {
// VALUES as table expression: FROM (VALUES ...)
// In ClickHouse, bare `values` without ( is a table name
self.parse_values()?
} else if self.check(TokenType::Values)
&& matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
)
{
// ClickHouse: `values` as a table name (not followed by LParen)
let token = self.advance();
let ident = Identifier::new(token.text);
let trailing_comments = self.previous_trailing_comments().to_vec();
Expression::boxed_table(TableRef {
name: ident,
schema: None,
catalog: None,
alias: None,
alias_explicit_as: false,
column_aliases: Vec::new(),
leading_comments: Vec::new(),
trailing_comments,
when: None,
only: false,
final_: false,
table_sample: None,
hints: Vec::new(),
system_time: None,
partitions: Vec::new(),
identifier_func: None,
changes: None,
version: None,
span: None,
})
} else if self.check(TokenType::DAt) {
// Snowflake stage reference: @stage_name or @"stage_name" or @namespace.stage/path
self.parse_stage_reference()?
} else if self.check(TokenType::Var) && self.peek().text.starts_with('@') {
// Snowflake stage reference tokenized as Var: @mystage/path
// When @ is followed by alphanumeric, tokenizer creates a Var token instead of DAt
self.parse_stage_reference_from_var()?
} else if self.check(TokenType::String) && self.peek().text.starts_with('@') {
// Snowflake stage reference in string: '@mystage' or '@external/location'
self.parse_stage_reference_from_string()?
} else if self.match_token(TokenType::Lateral) {
if self.check(TokenType::LParen) {
// LATERAL (SELECT ...) or LATERAL (table_expression) or LATERAL (FROM ...) for DuckDB
self.expect(TokenType::LParen)?;
if self.check(TokenType::Select)
|| self.check(TokenType::With)
|| self.check(TokenType::From)
{
let query = self.parse_statement()?;
self.expect(TokenType::RParen)?;
Expression::Subquery(Box::new(Subquery {
this: query,
alias: None,
column_aliases: Vec::new(),
order_by: None,
limit: None,
offset: None,
lateral: true,
modifiers_inside: false,
trailing_comments: Vec::new(),
distribute_by: None,
sort_by: None,
cluster_by: None,
inferred_type: None,
}))
} else {
// LATERAL (table_function()) - parenthesized non-subquery
let table_expr = self.parse_table_expression()?;
self.expect(TokenType::RParen)?;
Expression::Subquery(Box::new(Subquery {
this: table_expr,
alias: None,
column_aliases: Vec::new(),
order_by: None,
limit: None,
offset: None,
lateral: true,
modifiers_inside: false,
trailing_comments: Vec::new(),
distribute_by: None,
sort_by: None,
cluster_by: None,
inferred_type: None,
}))
}
} else {
// LATERAL function_name(args) [WITH ORDINALITY] [AS alias(columns)]
// Parse function name
let first_ident = self.expect_identifier_or_keyword_with_quoted()?;
let first_name = first_ident.name.clone();
// Parse function arguments
self.expect(TokenType::LParen)?;
let args = if self.check(TokenType::RParen) {
Vec::new()
} else {
self.parse_function_arguments()?
};
self.expect(TokenType::RParen)?;
// Handle UNNEST specially to create UnnestFunc expression
let mut func_expr = if first_name.eq_ignore_ascii_case("UNNEST") {
let mut args_iter = args.into_iter();
let this = args_iter
.next()
.ok_or_else(|| self.parse_error("Expected expression in UNNEST"))?;
let expressions: Vec<Expression> = args_iter.collect();
Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
this,
expressions,
with_ordinality: false,
alias: None,
offset_alias: None,
}))
} else {
Expression::Function(Box::new(Function {
name: first_name,
args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))
};
// Check for WITH ORDINALITY (Presto) or WITH OFFSET (BigQuery)
let mut with_offset_alias: Option<crate::expressions::Identifier> = None;
let ordinality = if self.match_token(TokenType::With) {
if self.match_token(TokenType::Ordinality) {
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
})))
} else if self.check(TokenType::Offset) || self.check_identifier("OFFSET") {
// BigQuery: WITH OFFSET [AS alias]
self.skip(); // consume OFFSET
// Check for optional offset alias: WITH OFFSET AS y or WITH OFFSET y
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::BigQuery)
) {
let has_as = self.match_token(TokenType::As);
if has_as
|| self.check(TokenType::Identifier)
|| self.check(TokenType::Var)
{
let alias_name = self.advance().text;
with_offset_alias = Some(crate::expressions::Identifier {
name: alias_name,
quoted: false,
trailing_comments: Vec::new(),
span: None,
});
}
}
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
})))
} else {
// Not ORDINALITY or OFFSET, put back WITH
self.current -= 1;
None
}
} else {
None
};
// Update the inner UnnestFunc with WITH ORDINALITY/OFFSET info
if ordinality.is_some() {
if let Expression::Unnest(ref mut u) = func_expr {
u.with_ordinality = true;
u.offset_alias = with_offset_alias;
}
}
// Parse optional alias: AS alias or just alias
let alias_ident = if self.match_token(TokenType::As) {
Some(self.expect_identifier_or_keyword_with_quoted()?)
} else if !self.is_at_end()
&& !self.check(TokenType::Comma)
&& !self.check(TokenType::RParen)
&& !self.check(TokenType::On)
&& !self.check(TokenType::Cross)
&& !self.check(TokenType::Inner)
&& !self.check(TokenType::Left)
&& !self.check(TokenType::Right)
&& !self.check(TokenType::Full)
&& !self.check(TokenType::Join)
&& !self.check(TokenType::Where)
&& !self.check(TokenType::Order)
&& !self.check(TokenType::Limit)
&& !self.check(TokenType::Semicolon)
&& (self.check(TokenType::Identifier) || self.check(TokenType::Var))
{
Some(self.expect_identifier_or_keyword_with_quoted()?)
} else {
None
};
let alias_quoted = alias_ident.as_ref().map_or(false, |id| id.quoted);
let alias = alias_ident.map(|id| id.name);
// Parse column aliases: (col1, col2, ...)
let column_aliases = if alias.is_some() && self.match_token(TokenType::LParen) {
let mut cols = Vec::new();
loop {
cols.push(self.expect_identifier_or_keyword()?);
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
cols
} else {
Vec::new()
};
Expression::Lateral(Box::new(Lateral {
this: Box::new(func_expr),
view: None,
outer: None,
alias,
alias_quoted,
cross_apply: None,
ordinality,
column_aliases,
}))
}
} else if self.match_token(TokenType::LParen) {
// Subquery or parenthesized set operation or (VALUES ...)
if self.check(TokenType::Values) {
// (VALUES (...), (...)) AS t(c1, c2) or (VALUES (0) foo(bar))
let mut values = self.parse_values()?;
self.expect(TokenType::RParen)?;
// Extract alias from Values if present and move to Subquery
let (alias, column_aliases) = if let Expression::Values(ref mut v) = values {
(v.alias.take(), std::mem::take(&mut v.column_aliases))
} else {
(None, Vec::new())
};
Expression::Subquery(Box::new(Subquery {
this: values,
alias,
column_aliases,
order_by: None,
limit: None,
offset: None,
distribute_by: None,
sort_by: None,
cluster_by: None,
lateral: false,
modifiers_inside: false,
trailing_comments: self.previous_trailing_comments().to_vec(),
inferred_type: None,
}))
} else if self.check(TokenType::Select)
|| self.check(TokenType::With)
|| self.check(TokenType::Pivot)
|| self.check(TokenType::Unpivot)
|| self.check(TokenType::From)
|| self.check(TokenType::Merge)
|| self.check(TokenType::Describe)
|| (self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("EXPLAIN"))
|| (self.check(TokenType::Var)
&& self.peek().text.eq_ignore_ascii_case("SUMMARIZE"))
{
let query = self.parse_statement()?;
self.expect(TokenType::RParen)?;
let trailing = self.previous_trailing_comments().to_vec();
// Check for set operations after parenthesized query
// If there's a set operation, wrap query in Subquery first to preserve parens
// e.g., (SELECT 1) UNION (SELECT 2) - the left operand needs Subquery wrapper
let result = if self.check(TokenType::Union)
|| self.check(TokenType::Intersect)
|| self.check(TokenType::Except)
{
let left = Expression::Subquery(Box::new(Subquery {
this: query,
alias: None,
column_aliases: Vec::new(),
order_by: None,
limit: None,
offset: None,
lateral: false,
modifiers_inside: false,
trailing_comments: Vec::new(),
distribute_by: None,
sort_by: None,
cluster_by: None,
inferred_type: None,
}));
self.parse_set_operation(left)?
} else {
query
};
Expression::Subquery(Box::new(Subquery {
this: result,
alias: None,
column_aliases: Vec::new(),
order_by: None,
limit: None,
offset: None,
distribute_by: None,
sort_by: None,
cluster_by: None,
lateral: false,
modifiers_inside: false,
trailing_comments: trailing,
inferred_type: None,
}))
} else if self.check(TokenType::LParen) {
// Nested parens like ((SELECT ...)) or ((x))
// Also handles ((SELECT 1) UNION (SELECT 2)) - set operations inside parens
let inner = self.parse_table_expression()?;
// Handle alias on subquery before set operation: ((SELECT 1) AS a UNION ALL (SELECT 2) AS b)
let inner = if self.match_token(TokenType::As) {
let alias = self.expect_identifier()?;
if let Expression::Subquery(mut subq) = inner {
subq.alias = Some(Identifier::new(alias));
Expression::Subquery(subq)
} else {
Expression::Alias(Box::new(Alias::new(inner, Identifier::new(alias))))
}
} else if self.is_identifier_token()
&& !self.check(TokenType::Union)
&& !self.check(TokenType::Intersect)
&& !self.check(TokenType::Except)
&& !self.check(TokenType::Cross)
&& !self.check(TokenType::Inner)
&& !self.check(TokenType::Left)
&& !self.check(TokenType::Right)
&& !self.check(TokenType::Full)
&& !self.check(TokenType::Join)
&& !self.check(TokenType::Order)
&& !self.check(TokenType::Limit)
&& !self.check(TokenType::Offset)
&& !self.check(TokenType::Xor)
{
// Implicit alias (no AS keyword)
let alias = self.expect_identifier()?;
if let Expression::Subquery(mut subq) = inner {
subq.alias = Some(Identifier::new(alias));
Expression::Subquery(subq)
} else {
Expression::Alias(Box::new(Alias::new(inner, Identifier::new(alias))))
}
} else {
inner
};
// ClickHouse: ((SELECT 1) AS x, (SELECT 2) AS y) — tuple of aliased subqueries
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::Comma)
{
let mut exprs = vec![inner];
while self.match_token(TokenType::Comma) {
if self.check(TokenType::RParen) {
break;
}
let e = self.parse_expression()?;
exprs.push(e);
}
self.expect(TokenType::RParen)?;
return Ok(Expression::Tuple(Box::new(Tuple { expressions: exprs })));
}
// Check for set operations after the first table expression
let had_set_operation = self.check(TokenType::Union)
|| self.check(TokenType::Intersect)
|| self.check(TokenType::Except);
let result = if had_set_operation {
// This is a set operation like ((SELECT 1) UNION (SELECT 2))
// Wrap inner in a subquery-like expression and parse set operation
let set_result = self.parse_set_operation(inner)?;
set_result
} else if self.check(TokenType::Cross)
|| self.check(TokenType::Inner)
|| self.check(TokenType::Left)
|| self.check(TokenType::Right)
|| self.check(TokenType::Full)
|| self.check(TokenType::Join)
{
// This is a join: ((SELECT 1) CROSS JOIN (SELECT 2))
let joins = self.parse_joins()?;
let lateral_views = self.parse_lateral_views()?;
Expression::JoinedTable(Box::new(JoinedTable {
left: inner,
joins,
lateral_views,
alias: None,
}))
} else {
inner
};
// Handle ORDER BY, LIMIT, OFFSET after set operations inside parens
let result = if self.check(TokenType::Order) {
// Wrap in a subquery with order/limit
self.expect(TokenType::Order)?;
self.expect(TokenType::By)?;
let order_by = self.parse_order_by()?;
let limit = if self.match_token(TokenType::Limit) {
Some(Limit {
this: self.parse_expression()?,
percent: false,
comments: Vec::new(),
})
} else {
None
};
let offset = if self.match_token(TokenType::Offset) {
Some(Offset {
this: self.parse_expression()?,
rows: None,
})
} else {
None
};
Expression::Subquery(Box::new(Subquery {
this: result,
alias: None,
column_aliases: Vec::new(),
order_by: Some(order_by),
limit,
offset,
distribute_by: None,
sort_by: None,
cluster_by: None,
lateral: false,
modifiers_inside: true, // ORDER BY was inside the parens
trailing_comments: Vec::new(),
inferred_type: None,
}))
} else if self.check(TokenType::Limit) || self.check(TokenType::Offset) {
// LIMIT/OFFSET without ORDER BY
let limit = if self.match_token(TokenType::Limit) {
Some(Limit {
this: self.parse_expression()?,
percent: false,
comments: Vec::new(),
})
} else {
None
};
let offset = if self.match_token(TokenType::Offset) {
Some(Offset {
this: self.parse_expression()?,
rows: None,
})
} else {
None
};
Expression::Subquery(Box::new(Subquery {
this: result,
alias: None,
column_aliases: Vec::new(),
order_by: None,
limit,
offset,
distribute_by: None,
sort_by: None,
cluster_by: None,
lateral: false,
modifiers_inside: true, // LIMIT/OFFSET was inside the parens
trailing_comments: Vec::new(),
inferred_type: None,
}))
} else {
result
};
self.expect(TokenType::RParen)?;
// Wrap result in Paren to preserve the outer parentheses when needed
// Cases:
// - ((SELECT 1)) -> Paren(Subquery(Select)) - inner was subquery of SELECT, wrap in Paren
// - ((SELECT 1) UNION (SELECT 2)) -> Subquery(Union) - recursive call handled set op, don't add Paren
// - ((SELECT 1) AS a UNION ALL ...) -> Union - we handled set op, need to add Paren
// - (((SELECT 1) UNION SELECT 2) ORDER BY x) -> Subquery with modifiers_inside=true
let had_modifiers = matches!(&result, Expression::Subquery(s) if s.order_by.is_some() || s.limit.is_some() || s.offset.is_some());
let result_is_subquery_of_set_op = matches!(&result, Expression::Subquery(s) if matches!(&s.this, Expression::Union(_) | Expression::Intersect(_) | Expression::Except(_)));
if had_modifiers || result_is_subquery_of_set_op {
// Subquery with modifiers or Subquery(Union) - already has proper structure
result
} else {
// All other cases need Paren wrapper to preserve outer parentheses
Expression::Paren(Box::new(Paren {
this: result,
trailing_comments: Vec::new(),
}))
}
} else if self.is_identifier_token()
|| self.is_safe_keyword_as_identifier()
|| self.can_be_alias_keyword()
{
// Parenthesized join expression: (tbl1 CROSS JOIN tbl2) or just (x)
// Also allow safe keywords and alias keywords (all, left, etc.) as table names
let (left, joins) = self.parse_table_expression_with_joins()?;
// Parse LATERAL VIEW after joins: (x CROSS JOIN foo LATERAL VIEW EXPLODE(y))
let lateral_views = self.parse_lateral_views()?;
self.expect(TokenType::RParen)?;
if joins.is_empty() && lateral_views.is_empty() {
// Just a parenthesized table expression, wrap in Paren to preserve parens
Expression::Paren(Box::new(Paren {
this: left,
trailing_comments: Vec::new(),
}))
} else {
// Create a JoinedTable
Expression::JoinedTable(Box::new(JoinedTable {
left,
joins,
lateral_views,
alias: None, // Alias is parsed separately after this
}))
}
} else {
let query = self.parse_statement()?;
self.expect(TokenType::RParen)?;
Expression::Subquery(Box::new(Subquery {
this: query,
alias: None,
column_aliases: Vec::new(),
order_by: None,
limit: None,
offset: None,
distribute_by: None,
sort_by: None,
cluster_by: None,
lateral: false,
modifiers_inside: false,
trailing_comments: self.previous_trailing_comments().to_vec(),
inferred_type: None,
}))
}
} else if self.is_identifier_token() || self.is_safe_keyword_as_identifier() || self.can_be_alias_keyword()
|| (matches!(self.config.dialect, Some(crate::dialects::DialectType::BigQuery)) && self.check(TokenType::Number))
|| self.is_mysql_numeric_identifier()
// PIVOT/UNPIVOT can be table names when not followed by (
|| (self.check(TokenType::Pivot) && !self.check_next(TokenType::LParen))
|| (self.check(TokenType::Unpivot) && !self.check_next(TokenType::LParen))
// ClickHouse: braced query parameters as table names {db:Identifier}.table
|| (matches!(self.config.dialect, Some(crate::dialects::DialectType::ClickHouse)) && self.check(TokenType::LBrace))
// ClickHouse: allow union/except/intersect as table names when not followed by ALL/DISTINCT/SELECT/(
|| (matches!(self.config.dialect, Some(crate::dialects::DialectType::ClickHouse))
&& (self.check(TokenType::Union) || self.check(TokenType::Except) || self.check(TokenType::Intersect))
&& !self.check_next(TokenType::All) && !self.check_next(TokenType::Distinct)
&& !self.check_next(TokenType::Select) && !self.check_next(TokenType::LParen))
{
// Table name - could be simple, qualified, or table function
// Also allow safe keywords (like 'table', 'view', 'case', 'all', etc.) as table names
// BigQuery: also allows numeric table parts and hyphenated identifiers
// MySQL: allows numeric-starting identifiers (e.g., 00f, 1d)
// DuckDB prefix alias syntax: alias: table (e.g., "foo: bar" means "bar AS foo")
// Check if next token is COLON (but not :: which is DCOLON for casts)
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::DuckDB)
) && self.check_next(TokenType::Colon)
&& !(self.current + 2 < self.tokens.len()
&& self.tokens[self.current + 2].token_type == TokenType::Colon)
{
// Parse the alias identifier
let alias_ident = self.parse_bigquery_table_part()?;
let pre_alias_comments = self.previous_trailing_comments().to_vec();
// Consume the colon
self.expect(TokenType::Colon)?;
let colon_comments = self.previous_trailing_comments().to_vec();
// Parse the actual table expression recursively
let mut table_expr = self.parse_table_expression()?;
// Merge comments
let mut all_comments = pre_alias_comments;
all_comments.extend(colon_comments);
// Apply the alias to the table expression
match &mut table_expr {
Expression::Table(ref mut t) => {
t.alias = Some(alias_ident);
t.alias_explicit_as = true; // Output AS keyword (required by expected format)
// Store prefix alias comments - they should come BEFORE the table's trailing comments
// For "foo /* bla */: bar /* baz */", output is "bar AS foo /* bla */ /* baz */"
// So alias comments (/* bla */) come first, then table comments (/* baz */)
if !all_comments.is_empty() {
let existing_comments = std::mem::take(&mut t.trailing_comments);
t.trailing_comments = all_comments;
t.trailing_comments.extend(existing_comments);
}
}
Expression::Subquery(ref mut s) => {
s.alias = Some(alias_ident);
}
Expression::Function(ref mut _f) => {
// Wrap function in alias
return Ok(Expression::Alias(Box::new(Alias {
this: table_expr,
alias: alias_ident,
column_aliases: Vec::new(),
pre_alias_comments: all_comments,
trailing_comments: Vec::new(),
inferred_type: None,
})));
}
_ => {
// For other expressions, wrap in Alias
return Ok(Expression::Alias(Box::new(Alias {
this: table_expr,
alias: alias_ident,
column_aliases: Vec::new(),
pre_alias_comments: all_comments,
trailing_comments: Vec::new(),
inferred_type: None,
})));
}
}
return Ok(table_expr);
}
let first_ident = self.parse_bigquery_table_part()?;
let first_name = first_ident.name.clone();
// Check for qualified name (schema.table) or table function
if self.match_token(TokenType::Dot) {
// Handle TSQL a..b syntax (database..table with empty schema)
if self.check(TokenType::Dot) {
// Two consecutive dots: a..b means catalog..table (empty schema)
self.skip(); // consume second dot
let table_ident = self.parse_bigquery_table_part()?;
let trailing_comments = self.previous_trailing_comments().to_vec();
return Ok(Expression::boxed_table(TableRef {
catalog: Some(first_ident),
schema: Some(Identifier::new("")), // Empty schema represents ..
name: table_ident,
alias: None,
alias_explicit_as: false,
column_aliases: Vec::new(),
leading_comments: Vec::new(),
trailing_comments,
when: None,
only: false,
final_: false,
table_sample: None,
hints: Vec::new(),
system_time: None,
partitions: Vec::new(),
identifier_func: None,
changes: None,
version: None,
span: None,
}));
}
// BigQuery: handle x.* wildcard table reference (e.g., SELECT * FROM x.*)
// After the first dot, if we see a Star token, it's a wildcard table name
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::BigQuery)
) && self.check(TokenType::Star)
{
self.skip(); // consume *
let trailing_comments = self.previous_trailing_comments().to_vec();
return Ok(Expression::boxed_table(TableRef {
catalog: None,
schema: Some(first_ident),
name: Identifier::new("*"),
alias: None,
alias_explicit_as: false,
column_aliases: Vec::new(),
leading_comments: Vec::new(),
trailing_comments,
when: None,
only: false,
final_: false,
table_sample: None,
hints: Vec::new(),
system_time: None,
partitions: Vec::new(),
identifier_func: None,
changes: None,
version: None,
span: None,
}));
}
// schema.table or schema.function()
// Allow keywords as table/schema names (e.g., schema.table, catalog.view)
let second_ident = self.parse_bigquery_table_part()?;
let second_name = second_ident.name.clone();
if self.match_token(TokenType::Dot) {
// BigQuery: handle a.b.* wildcard table reference
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::BigQuery)
) && self.check(TokenType::Star)
{
self.skip(); // consume *
let trailing_comments = self.previous_trailing_comments().to_vec();
return Ok(Expression::boxed_table(TableRef {
catalog: Some(first_ident),
schema: Some(second_ident),
name: Identifier::new("*"),
alias: None,
alias_explicit_as: false,
column_aliases: Vec::new(),
leading_comments: Vec::new(),
trailing_comments,
when: None,
only: false,
final_: false,
table_sample: None,
hints: Vec::new(),
system_time: None,
partitions: Vec::new(),
identifier_func: None,
changes: None,
version: None,
span: None,
}));
}
// catalog.schema.table or catalog.schema.function()
let third_ident = self.parse_bigquery_table_part()?;
let third_name = third_ident.name.clone();
// Check for 4-part name (e.g., project.dataset.INFORMATION_SCHEMA.TABLES)
if self.match_token(TokenType::Dot) {
let fourth_ident = self.parse_bigquery_table_part()?;
// BigQuery wildcard table suffix: a.b.c.d* matches all tables starting with d
let mut table_name = fourth_ident;
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::BigQuery)
) && self.check(TokenType::Star)
&& self.is_connected()
{
self.skip(); // consume *
table_name.name.push('*');
}
let trailing_comments = self.previous_trailing_comments().to_vec();
// For 4-part names, combine first two parts as catalog, third as schema
Expression::boxed_table(TableRef {
catalog: Some(Identifier::new(format!(
"{}.{}",
first_name, second_name
))),
schema: Some(third_ident),
name: table_name,
alias: None,
alias_explicit_as: false,
column_aliases: Vec::new(),
leading_comments: Vec::new(),
trailing_comments,
when: None,
only: false,
final_: false,
table_sample: None,
hints: Vec::new(),
system_time: None,
partitions: Vec::new(),
identifier_func: None,
changes: None,
version: None,
span: None,
})
} else if self.match_token(TokenType::LParen) {
// catalog.schema.function() - table-valued function
let args = if self.check(TokenType::RParen) {
Vec::new()
} else {
self.parse_function_arguments()?
};
self.expect(TokenType::RParen)?;
let trailing_comments = self.previous_trailing_comments().to_vec();
Expression::Function(Box::new(Function {
name: format!("{}.{}.{}", first_name, second_name, third_name),
args,
distinct: false,
trailing_comments,
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))
} else {
// catalog.schema.table
// BigQuery wildcard table suffix: x.y.z* matches all tables starting with z
let mut table_name = third_ident;
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::BigQuery)
) && self.check(TokenType::Star)
&& self.is_connected()
{
self.skip(); // consume *
table_name.name.push('*');
}
let trailing_comments = self.previous_trailing_comments().to_vec();
Expression::boxed_table(TableRef {
catalog: Some(first_ident),
schema: Some(second_ident),
name: table_name,
alias: None,
alias_explicit_as: false,
column_aliases: Vec::new(),
leading_comments: Vec::new(),
trailing_comments,
when: None,
only: false,
final_: false,
table_sample: None,
hints: Vec::new(),
system_time: None,
partitions: Vec::new(),
identifier_func: None,
changes: None,
version: None,
span: None,
})
}
} else if self.match_token(TokenType::LParen) {
// schema.function() - table-valued function
let args = if self.check(TokenType::RParen) {
Vec::new()
} else {
self.parse_function_arguments()?
};
self.expect(TokenType::RParen)?;
let trailing_comments = self.previous_trailing_comments().to_vec();
Expression::Function(Box::new(Function {
name: format!("{}.{}", first_name, second_name),
args,
distinct: false,
trailing_comments,
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))
} else {
// schema.table
// BigQuery wildcard table suffix: x.y* matches all tables starting with y
let mut table_name = second_ident;
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::BigQuery)
) && self.check(TokenType::Star)
&& self.is_connected()
{
self.skip(); // consume *
table_name.name.push('*');
}
let trailing_comments = self.previous_trailing_comments().to_vec();
Expression::boxed_table(TableRef {
catalog: None,
schema: Some(first_ident),
name: table_name,
alias: None,
alias_explicit_as: false,
column_aliases: Vec::new(),
leading_comments: Vec::new(),
trailing_comments,
when: None,
only: false,
final_: false,
table_sample: None,
hints: Vec::new(),
system_time: None,
partitions: Vec::new(),
identifier_func: None,
changes: None,
version: None,
span: None,
})
}
} else if self.match_token(TokenType::LParen) {
// Handle JSON_TABLE specially - it has COLUMNS clause syntax
if first_name.eq_ignore_ascii_case("JSON_TABLE") {
// Parse the JSON expression (use parse_bitwise to avoid consuming FORMAT)
let this = self
.parse_bitwise()?
.unwrap_or(Expression::Null(crate::expressions::Null));
// Check for FORMAT JSON after the expression
let this_with_format = if self.match_text_seq(&["FORMAT", "JSON"]) {
Expression::JSONFormat(Box::new(crate::expressions::JSONFormat {
this: Some(Box::new(this)),
options: Vec::new(),
is_json: None,
to_json: None,
}))
} else {
this
};
// Parse path (after comma)
let path = if self.match_token(TokenType::Comma) {
if let Some(s) = self.parse_string()? {
Some(Box::new(s))
} else {
None
}
} else {
None
};
// Oracle uses "ERROR ON ERROR" (value then behavior) instead of "ON ERROR ERROR"
// Parse error handling: ERROR ON ERROR or NULL ON ERROR
let error_handling = if self.match_identifier("ERROR")
&& self.match_text_seq(&["ON", "ERROR"])
{
Some(Box::new(Expression::Var(Box::new(Var {
this: "ERROR ON ERROR".to_string(),
}))))
} else if self.match_text_seq(&["NULL", "ON", "ERROR"]) {
Some(Box::new(Expression::Var(Box::new(Var {
this: "NULL ON ERROR".to_string(),
}))))
} else {
None
};
// Parse empty handling: ERROR ON EMPTY or NULL ON EMPTY
let empty_handling = if self.match_identifier("ERROR")
&& self.match_text_seq(&["ON", "EMPTY"])
{
Some(Box::new(Expression::Var(Box::new(Var {
this: "ERROR ON EMPTY".to_string(),
}))))
} else if self.match_text_seq(&["NULL", "ON", "EMPTY"]) {
Some(Box::new(Expression::Var(Box::new(Var {
this: "NULL ON EMPTY".to_string(),
}))))
} else {
None
};
// Parse COLUMNS clause
let schema = self.parse_json_table_columns()?;
self.expect(TokenType::RParen)?;
Expression::JSONTable(Box::new(JSONTable {
this: Box::new(this_with_format),
schema: schema.map(Box::new),
path,
error_handling,
empty_handling,
}))
} else if first_name.eq_ignore_ascii_case("XMLTABLE") {
// Handle XMLTABLE specially - it has COLUMNS clause syntax
// XMLTABLE([XMLNAMESPACES(...),] '/xpath' PASSING xml_doc COLUMNS ...)
if let Some(xml_table) = self.parse_xml_table()? {
self.expect(TokenType::RParen)?;
xml_table
} else {
return Err(self.parse_error("Failed to parse XMLTABLE"));
}
} else if first_name.eq_ignore_ascii_case("OPENJSON") {
// Handle OPENJSON specially - it has WITH clause for column definitions
// OPENJSON(json[, path]) [WITH (col1 type1 'path' [AS JSON], ...)]
if let Some(openjson_expr) = self.parse_open_json()? {
openjson_expr
} else {
return Err(self.parse_error("Failed to parse OPENJSON"));
}
} else if first_name.eq_ignore_ascii_case("SEMANTIC_VIEW") {
// Handle SEMANTIC_VIEW specially - it has METRICS/DIMENSIONS/FACTS/WHERE syntax
// SEMANTIC_VIEW(table METRICS a.b, a.c DIMENSIONS a.b, a.c WHERE expr)
let semantic_view = self.parse_semantic_view()?;
self.expect(TokenType::RParen)?;
semantic_view
} else if (first_name.eq_ignore_ascii_case("view")
|| first_name.eq_ignore_ascii_case("merge"))
&& (self.check(TokenType::Select) || self.check(TokenType::With))
{
// ClickHouse: view(SELECT ...) and merge(SELECT ...) table functions
// contain a subquery as the argument
let query = self.parse_statement()?;
self.expect(TokenType::RParen)?;
let trailing_comments = self.previous_trailing_comments().to_vec();
Expression::Function(Box::new(Function {
name: first_name.to_string(),
args: vec![query],
distinct: false,
trailing_comments,
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))
} else {
// Simple table function like UNNEST(), GAP_FILL(), etc.
let args = if self.check(TokenType::RParen) {
Vec::new()
} else {
self.parse_function_arguments()?
};
self.expect(TokenType::RParen)?;
let trailing_comments = self.previous_trailing_comments().to_vec();
// Handle UNNEST specially to create UnnestFunc expression
if first_name.eq_ignore_ascii_case("UNNEST") {
// Check for WITH ORDINALITY (Presto) or WITH OFFSET (BigQuery)
// Both are semantically the same - provide an ordinal/offset column
let with_ordinality = self
.match_keywords(&[TokenType::With, TokenType::Ordinality])
|| self.match_text_seq(&["WITH", "OFFSET"]);
// If WITH OFFSET matched, check for optional offset alias: WITH OFFSET AS y or WITH OFFSET y
let offset_alias = if with_ordinality
&& matches!(
self.config.dialect,
Some(crate::dialects::DialectType::BigQuery)
) {
let has_as = self.match_token(TokenType::As);
if has_as
|| (self.check(TokenType::Identifier) || self.check(TokenType::Var))
{
let alias_name = self.advance().text;
Some(crate::expressions::Identifier {
name: alias_name,
quoted: false,
trailing_comments: Vec::new(),
span: None,
})
} else {
None
}
} else {
None
};
let mut args_iter = args.into_iter();
let this = args_iter
.next()
.ok_or_else(|| self.parse_error("Expected expression in UNNEST"))?;
let expressions: Vec<Expression> = args_iter.collect();
Expression::Unnest(Box::new(crate::expressions::UnnestFunc {
this,
expressions,
with_ordinality,
alias: None,
offset_alias,
}))
} else {
// Check for WITH ORDINALITY after any table-valued function
let with_ordinality =
self.match_keywords(&[TokenType::With, TokenType::Ordinality]);
let func_name = if with_ordinality {
format!("{} WITH ORDINALITY", first_name)
} else {
first_name.clone()
};
let func = Function {
name: func_name,
args,
distinct: false,
trailing_comments,
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
};
let func_expr = Expression::Function(Box::new(func));
// TSQL: OPENDATASOURCE(...).Catalog.schema.table
// After a table-valued function, dot-chained access produces
// a TableRef whose identifier_func holds the function call.
if self.check(TokenType::Dot) {
self.skip(); // consume first dot
let part1 = self.parse_bigquery_table_part()?;
if self.match_token(TokenType::Dot) {
let part2 = self.parse_bigquery_table_part()?;
if self.match_token(TokenType::Dot) {
// func().a.b.c → catalog=a, schema=b, name=c
let part3 = self.parse_bigquery_table_part()?;
let tc = self.previous_trailing_comments().to_vec();
Expression::boxed_table(TableRef {
catalog: Some(part1),
schema: Some(part2),
name: part3,
alias: None,
alias_explicit_as: false,
column_aliases: Vec::new(),
leading_comments: Vec::new(),
trailing_comments: tc,
when: None,
only: false,
final_: false,
table_sample: None,
hints: Vec::new(),
system_time: None,
partitions: Vec::new(),
identifier_func: Some(Box::new(func_expr)),
changes: None,
version: None,
span: None,
})
} else {
// func().a.b → schema=a, name=b
let tc = self.previous_trailing_comments().to_vec();
Expression::boxed_table(TableRef {
catalog: None,
schema: Some(part1),
name: part2,
alias: None,
alias_explicit_as: false,
column_aliases: Vec::new(),
leading_comments: Vec::new(),
trailing_comments: tc,
when: None,
only: false,
final_: false,
table_sample: None,
hints: Vec::new(),
system_time: None,
partitions: Vec::new(),
identifier_func: Some(Box::new(func_expr)),
changes: None,
version: None,
span: None,
})
}
} else {
// func().a → name=a
let tc = self.previous_trailing_comments().to_vec();
Expression::boxed_table(TableRef {
catalog: None,
schema: None,
name: part1,
alias: None,
alias_explicit_as: false,
column_aliases: Vec::new(),
leading_comments: Vec::new(),
trailing_comments: tc,
when: None,
only: false,
final_: false,
table_sample: None,
hints: Vec::new(),
system_time: None,
partitions: Vec::new(),
identifier_func: Some(Box::new(func_expr)),
changes: None,
version: None,
span: None,
})
}
} else {
func_expr
}
}
}
} else {
// Simple table name
// BigQuery wildcard table suffix: x* matches all tables starting with x
let mut table_name = first_ident;
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::BigQuery)
) && self.check(TokenType::Star)
&& self.is_connected()
{
self.skip(); // consume *
table_name.name.push('*');
}
let trailing_comments = self.previous_trailing_comments().to_vec();
Expression::boxed_table(TableRef {
catalog: None,
schema: None,
name: table_name,
alias: None,
alias_explicit_as: false,
column_aliases: Vec::new(),
leading_comments: Vec::new(),
trailing_comments,
when: None,
only: false,
final_: false,
table_sample: None,
hints: Vec::new(),
system_time: None,
partitions: Vec::new(),
identifier_func: None,
changes: None,
version: None,
span: None,
})
}
} else if self.check(TokenType::LBrace) {
// ClickHouse query parameter: {name: Type}
if let Some(param) = self.parse_clickhouse_braced_parameter()? {
param
} else {
// Spark/Databricks widget template variable: {name}
self.skip(); // consume {
if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
let name_token = self.advance();
self.expect(TokenType::RBrace)?;
Expression::Parameter(Box::new(Parameter {
name: Some(name_token.text.clone()),
index: None,
style: ParameterStyle::Brace,
quoted: false,
string_quoted: false,
expression: None,
}))
} else {
return Err(self.parse_error("Expected identifier after {"));
}
}
} else if self.check(TokenType::Dollar) && self.check_next(TokenType::LBrace) {
// Template variable as table reference: ${variable_name} or ${kind:name}
// This is used in Databricks/Hive for parameterized queries
self.skip(); // consume $
self.skip(); // consume {
if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
let name_token = self.advance();
// Check for ${kind:name} syntax (e.g., ${hiveconf:some_var})
let expression = if self.match_token(TokenType::Colon) {
if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
let expr_token = self.advance();
Some(expr_token.text.clone())
} else {
return Err(self.parse_error("Expected identifier after : in ${...}"));
}
} else {
None
};
self.expect(TokenType::RBrace)?;
Expression::Parameter(Box::new(Parameter {
name: Some(name_token.text.clone()),
index: None,
style: ParameterStyle::DollarBrace,
quoted: false,
string_quoted: false,
expression,
}))
} else {
return Err(self.parse_error("Expected identifier after ${"));
}
} else if self.check(TokenType::String) {
// DuckDB allows string literals as table names: SELECT * FROM 'x.y'
// Convert to a quoted identifier
let string_token = self.advance();
let table_name = Identifier {
name: string_token.text.clone(),
quoted: true,
trailing_comments: Vec::new(),
span: None,
};
let trailing_comments = self.previous_trailing_comments().to_vec();
Expression::boxed_table(TableRef {
catalog: None,
schema: None,
name: table_name,
alias: None,
alias_explicit_as: false,
column_aliases: Vec::new(),
leading_comments: Vec::new(),
trailing_comments,
when: None,
only: false,
final_: false,
table_sample: None,
hints: Vec::new(),
system_time: None,
partitions: Vec::new(),
identifier_func: None,
changes: None,
version: None,
span: None,
})
} else {
return Err(self.parse_error(format!(
"Expected table name or subquery, got {:?}",
self.peek().token_type
)));
};
Ok(expr)
}
#[inline(never)]
fn parse_table_expression_tail(
&mut self,
mut expr: Expression,
has_only: bool,
) -> Result<Expression> {
// Postgres supports a wildcard (table) suffix operator, which is a no-op in this context.
// e.g., FROM t1* means "include inherited tables". Matches Python sqlglot behavior.
self.match_token(TokenType::Star);
// Check for Snowflake CHANGES clause: CHANGES (INFORMATION => ...) AT|BEFORE (...) END (...)
// Must be checked before time travel since CHANGES includes its own AT/BEFORE clauses
if self.check_keyword_text("CHANGES") {
if let Some(changes_expr) = self.parse_changes()? {
if let Expression::Table(ref mut table) = expr {
if let Expression::Changes(changes_box) = changes_expr {
table.changes = Some(changes_box);
}
}
}
}
// Check for Snowflake time travel: BEFORE (STATEMENT => ...) or AT (TIMESTAMP => ...)
if self.check(TokenType::Before) || self.check_keyword_text("AT") {
if let Some(historical_expr) = self.parse_historical_data()? {
// Attach historical data to the table expression
if let Expression::Table(ref mut table) = expr {
if let Expression::HistoricalData(hd) = historical_expr {
table.when = Some(hd);
}
}
}
}
// Check for TSQL FOR SYSTEM_TIME temporal clause (not BigQuery - handled post-alias)
// Syntax: FOR SYSTEM_TIME AS OF expr
// FOR SYSTEM_TIME FROM expr TO expr
// FOR SYSTEM_TIME BETWEEN expr AND expr
// FOR SYSTEM_TIME CONTAINED IN (expr, expr)
// FOR SYSTEM_TIME ALL
if !matches!(
self.config.dialect,
Some(crate::dialects::DialectType::BigQuery)
) && self.check(TokenType::For)
&& ((self.current + 1 < self.tokens.len()
&& self.tokens[self.current + 1]
.text
.eq_ignore_ascii_case("SYSTEM_TIME"))
|| (self.current + 2 < self.tokens.len()
&& self.tokens[self.current + 1]
.text
.eq_ignore_ascii_case("SYSTEM")
&& self.tokens[self.current + 2]
.text
.eq_ignore_ascii_case("TIME")))
{
self.skip(); // consume FOR
if self.check_keyword_text("SYSTEM_TIME") {
self.skip(); // consume SYSTEM_TIME
} else {
self.skip(); // consume SYSTEM
self.skip(); // consume TIME
}
let system_time_str = if self.match_token(TokenType::As) {
// AS OF expr
if self.check_keyword_text("OF") {
self.skip(); // consume OF
let start = self.current;
// Collect expression tokens until we hit a clause boundary
while !self.is_at_end()
&& !self.check(TokenType::Semicolon)
&& !self.check(TokenType::Where)
&& !self.check(TokenType::Join)
&& !self.check(TokenType::Left)
&& !self.check(TokenType::Right)
&& !self.check(TokenType::Inner)
&& !self.check(TokenType::Outer)
&& !self.check(TokenType::Full)
&& !self.check(TokenType::Cross)
&& !self.check(TokenType::Order)
&& !self.check(TokenType::Group)
&& !self.check(TokenType::Having)
&& !self.check(TokenType::Limit)
&& !self.check(TokenType::Union)
&& !self.check(TokenType::Except)
&& !self.check(TokenType::Intersect)
&& !self.check(TokenType::As)
&& !self.check(TokenType::Comma)
&& !self.check(TokenType::RParen)
&& !self.check(TokenType::With)
&& !self.check(TokenType::Pivot)
&& !self.check(TokenType::Unpivot)
{
self.skip();
}
let expr_text = self.tokens_to_sql_uppercased(start, self.current);
format!("FOR SYSTEM_TIME AS OF {}", expr_text)
} else {
"FOR SYSTEM_TIME AS".to_string()
}
} else if self.match_token(TokenType::Between) {
// BETWEEN expr AND expr
let start = self.current;
while !self.is_at_end() && !self.check(TokenType::And) {
self.skip();
}
let expr1_text = self.tokens_to_sql_uppercased(start, self.current);
self.skip(); // consume AND
let start2 = self.current;
while !self.is_at_end()
&& !self.check(TokenType::Semicolon)
&& !self.check(TokenType::Where)
&& !self.check(TokenType::Join)
&& !self.check(TokenType::Left)
&& !self.check(TokenType::Right)
&& !self.check(TokenType::Inner)
&& !self.check(TokenType::Outer)
&& !self.check(TokenType::Full)
&& !self.check(TokenType::Cross)
&& !self.check(TokenType::Order)
&& !self.check(TokenType::Group)
&& !self.check(TokenType::Having)
&& !self.check(TokenType::Limit)
&& !self.check(TokenType::Union)
&& !self.check(TokenType::Except)
&& !self.check(TokenType::Intersect)
&& !self.check(TokenType::As)
&& !self.check(TokenType::Comma)
&& !self.check(TokenType::RParen)
&& !self.check(TokenType::With)
&& !self.check(TokenType::Pivot)
&& !self.check(TokenType::Unpivot)
{
self.skip();
}
let expr2_text = self.tokens_to_sql_uppercased(start2, self.current);
format!("FOR SYSTEM_TIME BETWEEN {} AND {}", expr1_text, expr2_text)
} else if self.match_token(TokenType::From) {
// FROM expr TO expr
let start = self.current;
while !self.is_at_end() && !self.check(TokenType::To) {
self.skip();
}
let expr1_text = self.tokens_to_sql_uppercased(start, self.current);
self.skip(); // consume TO
let start2 = self.current;
while !self.is_at_end()
&& !self.check(TokenType::Semicolon)
&& !self.check(TokenType::Where)
&& !self.check(TokenType::As)
&& !self.check(TokenType::Comma)
&& !self.check(TokenType::RParen)
{
self.skip();
}
let expr2_text = self.tokens_to_sql_uppercased(start2, self.current);
format!("FOR SYSTEM_TIME FROM {} TO {}", expr1_text, expr2_text)
} else if self.check_identifier("CONTAINED") {
self.skip(); // consume CONTAINED
self.expect(TokenType::In)?;
self.expect(TokenType::LParen)?;
let start = self.current;
let mut depth = 1;
while !self.is_at_end() && depth > 0 {
if self.check(TokenType::LParen) {
depth += 1;
}
if self.check(TokenType::RParen) {
depth -= 1;
if depth == 0 {
break;
}
}
self.skip();
}
let inner_text = self.tokens_to_sql_uppercased(start, self.current);
self.expect(TokenType::RParen)?;
format!("FOR SYSTEM_TIME CONTAINED IN ({})", inner_text)
} else if self.match_token(TokenType::All) {
"FOR SYSTEM_TIME ALL".to_string()
} else {
"FOR SYSTEM_TIME".to_string()
};
if let Expression::Table(ref mut table) = expr {
table.system_time = Some(system_time_str);
}
}
// Check for Presto/Trino time travel: FOR VERSION AS OF / FOR TIMESTAMP AS OF
// Syntax: FOR VERSION AS OF <snapshot_id>
// FOR TIMESTAMP AS OF <timestamp_expr>
if self.check(TokenType::For) && self.current + 1 < self.tokens.len() {
let next_text = &self.tokens[self.current + 1].text;
if next_text.eq_ignore_ascii_case("VERSION")
|| next_text.eq_ignore_ascii_case("TIMESTAMP")
{
self.skip(); // consume FOR
let version_kind = self.advance().text.to_ascii_uppercase(); // consume VERSION or TIMESTAMP
// Expect AS OF
if self.match_token(TokenType::As) && self.check_keyword_text("OF") {
self.skip(); // consume OF
// Parse the expression value
if let Some(value_expr) = self.parse_bitwise()? {
let version = crate::expressions::Version {
this: Box::new(Expression::Identifier(Identifier::new(&version_kind))),
kind: "AS OF".to_string(),
expression: Some(Box::new(value_expr)),
};
if let Expression::Table(ref mut table) = expr {
table.version = Some(Box::new(version));
}
}
}
}
}
// Check for Hive-style time travel: TIMESTAMP AS OF / VERSION AS OF (without FOR)
// Syntax: TIMESTAMP AS OF <timestamp_expr>
// VERSION AS OF <snapshot_id>
if self.current < self.tokens.len() {
let current_text = &self.tokens[self.current].text;
if (current_text.eq_ignore_ascii_case("TIMESTAMP")
|| current_text.eq_ignore_ascii_case("VERSION"))
&& self.current + 2 < self.tokens.len()
&& self.tokens[self.current + 1].token_type == TokenType::As
&& self.tokens[self.current + 2]
.text
.eq_ignore_ascii_case("OF")
{
let version_kind = self.advance().text.to_ascii_uppercase(); // consume TIMESTAMP or VERSION
self.skip(); // consume AS
self.skip(); // consume OF
// Parse the expression value
if let Some(value_expr) = self.parse_bitwise()? {
let version = crate::expressions::Version {
this: Box::new(Expression::Identifier(Identifier::new(&version_kind))),
kind: "AS OF".to_string(),
expression: Some(Box::new(value_expr)),
};
if let Expression::Table(ref mut table) = expr {
table.version = Some(Box::new(version));
}
}
}
}
// Check for MySQL PARTITION(p0, p1, ...) clause
// Only supported by MySQL-compatible dialects (not generic dialect)
let supports_partition_selection = matches!(
self.config.dialect,
Some(crate::dialects::DialectType::MySQL)
| Some(crate::dialects::DialectType::SingleStore)
| Some(crate::dialects::DialectType::Doris)
| Some(crate::dialects::DialectType::StarRocks)
);
if supports_partition_selection && self.match_token(TokenType::Partition) {
if self.match_token(TokenType::LParen) {
let mut partitions = Vec::new();
loop {
let partition_name = self.expect_identifier_or_keyword_with_quoted()?;
partitions.push(partition_name);
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
if let Expression::Table(ref mut table) = expr {
table.partitions = partitions;
}
}
}
// Check for table-level TABLESAMPLE/SAMPLE: tbl TABLESAMPLE METHOD(size) or tbl SAMPLE ROW(0)
// Snowflake supports both TABLESAMPLE and SAMPLE
if self.check(TokenType::TableSample) || self.check(TokenType::Sample) {
if let Some(sample) = self.parse_table_level_sample()? {
if let Expression::Table(ref mut table) = expr {
table.table_sample = Some(Box::new(sample));
} else {
// For non-Table expressions (subqueries, functions, etc.),
// wrap in TableSample expression node
expr = Expression::TableSample(Box::new(crate::expressions::TableSample {
this: Some(Box::new(expr)),
sample: Some(Box::new(sample)),
expressions: Vec::new(),
method: None,
bucket_numerator: None,
bucket_denominator: None,
bucket_field: None,
percent: None,
rows: None,
size: None,
seed: None,
}));
}
}
}
// Check for TSQL table hints: WITH (TABLOCK, INDEX(myindex), ...)
if self.check(TokenType::With) && self.check_next(TokenType::LParen) {
if let Expression::Table(ref mut table) = expr {
if let Some(hint_expr) = self.parse_table_hints()? {
// parse_table_hints returns a Tuple wrapping individual hint expressions.
// Extract the inner hints so we store them directly.
match hint_expr {
Expression::Tuple(tuple) => {
table.hints = tuple.expressions;
}
other => {
table.hints = vec![other];
}
}
}
}
}
// Check for MySQL index hints: USE INDEX, IGNORE INDEX, FORCE INDEX
if self.check_keyword_text("USE")
|| self.check(TokenType::Ignore)
|| self.check_keyword_text("FORCE")
{
// Peek ahead to see if next token after USE/IGNORE/FORCE is INDEX or KEY
let next_idx = self.current + 1;
let is_index_hint = next_idx < self.tokens.len() && {
let next_text = &self.tokens[next_idx].text;
next_text.eq_ignore_ascii_case("INDEX") || next_text.eq_ignore_ascii_case("KEY")
};
if is_index_hint {
if let Expression::Table(ref mut table) = expr {
if let Some(hint_expr) = self.parse_table_hints()? {
match hint_expr {
Expression::Tuple(tuple) => {
table.hints = tuple.expressions;
}
other => {
table.hints = vec![other];
}
}
}
}
}
}
// Check for SQLite INDEXED BY or NOT INDEXED table hints
if self.check_identifier("INDEXED") {
self.skip(); // consume INDEXED
self.expect(TokenType::By)?;
// Parse index name (can be qualified: schema.index)
let first_part = self.expect_identifier_or_keyword()?;
let index_name = if self.match_token(TokenType::Dot) {
let second_part = self.expect_identifier_or_keyword()?;
format!("{}.{}", first_part, second_part)
} else {
first_part
};
if let Expression::Table(ref mut table) = expr {
table.hints.push(Expression::Identifier(Identifier {
name: format!("INDEXED BY {}", index_name),
quoted: false,
trailing_comments: Vec::new(),
span: None,
}));
}
} else if self.check(TokenType::Not) && self.check_next_identifier("INDEXED") {
self.skip(); // consume NOT
self.skip(); // consume INDEXED
if let Expression::Table(ref mut table) = expr {
table.hints.push(Expression::Identifier(Identifier {
name: "NOT INDEXED".to_string(),
quoted: false,
trailing_comments: Vec::new(),
span: None,
}));
}
}
// Check for PIVOT (can be followed by UNPIVOT)
// Only treat as PIVOT clause when followed by ( — otherwise it's a table alias
if self.check(TokenType::Pivot) && self.check_next(TokenType::LParen) {
self.skip(); // consume PIVOT
expr = self.parse_pivot(expr)?;
}
// Check for UNPIVOT (can follow PIVOT or be standalone)
// Only treat as UNPIVOT clause when followed by (, INCLUDE, or EXCLUDE — otherwise it's a table alias
if self.check(TokenType::Unpivot) && self.is_unpivot_clause_start() {
self.skip(); // consume UNPIVOT
expr = self.parse_unpivot(expr)?;
}
// Check for MATCH_RECOGNIZE
else if self.check(TokenType::MatchRecognize)
&& !matches!(&expr, Expression::Pivot(_) | Expression::Unpivot(_))
{
self.skip();
expr = self.parse_match_recognize(Some(expr))?;
}
// Check for alias
if self.match_token(TokenType::As) {
// Handle AS (col1, col2) without alias name - used by POSEXPLODE etc.
if self.check(TokenType::LParen) {
self.skip(); // consume LParen
let mut column_aliases = Vec::new();
loop {
if self.check(TokenType::RParen) {
break;
}
column_aliases.push(Identifier::new(self.expect_identifier_or_keyword()?));
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
expr = Expression::Alias(Box::new(Alias {
this: expr,
alias: Identifier::new(String::new()),
column_aliases,
pre_alias_comments: Vec::new(),
trailing_comments: Vec::new(),
inferred_type: None,
}));
} else {
let alias_ident_parsed = self.expect_identifier_or_alias_keyword_with_quoted()?;
let alias = alias_ident_parsed.name;
let alias_is_quoted = alias_ident_parsed.quoted;
let make_alias_ident = |name: String| -> Identifier {
if alias_is_quoted {
Identifier::quoted(name)
} else {
Identifier::new(name)
}
};
// Check for column aliases: AS t(c1, c2) or AS t(c1 type1, c2 type2) for table functions
if self.match_token(TokenType::LParen) {
// Check if this is typed column definitions (for table functions like JSON_TO_RECORDSET)
// by looking ahead: if we see identifier followed by another identifier/type (not comma/rparen),
// it's typed columns
let has_typed_columns = self.check_typed_column_list();
if has_typed_columns {
// Parse typed column definitions like: (col1 type1, col2 type2)
let mut typed_cols = Vec::new();
loop {
if self.check(TokenType::RParen) {
break;
}
// Parse column name (can be quoted)
let col_name = self.expect_identifier_or_keyword_with_quoted()?;
// Parse column type
let col_type = self.parse_data_type()?;
// Create ColumnDef expression, preserving the quoted status
let mut col_def = ColumnDef::new(col_name.name.clone(), col_type);
col_def.name = col_name;
typed_cols.push(Expression::ColumnDef(Box::new(col_def)));
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
// Create TableAlias with typed columns
let table_alias = Expression::TableAlias(Box::new(TableAlias {
this: Some(Box::new(Expression::Identifier(make_alias_ident(alias)))),
columns: typed_cols,
}));
// Wrap function with TableAlias using Tuple pattern (like ROWS FROM)
expr = Expression::Tuple(Box::new(Tuple {
expressions: vec![expr, table_alias],
}));
} else {
// Parse simple column aliases: (c1, c2, ...)
// Use expect_identifier_or_keyword to allow keywords like KEY, INDEX, VALUE as column aliases
let mut aliases = Vec::new();
loop {
if self.check(TokenType::RParen) {
break;
}
aliases.push(Identifier::new(self.expect_identifier_or_keyword()?));
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
expr = match expr {
Expression::Table(mut t) => {
t.alias = Some(make_alias_ident(alias));
t.alias_explicit_as = true;
t.column_aliases = aliases;
Expression::Table(t)
}
Expression::Subquery(mut s) => {
s.alias = Some(make_alias_ident(alias));
s.column_aliases = aliases;
Expression::Subquery(s)
}
Expression::Pivot(mut p) => {
p.alias = Some(make_alias_ident(alias));
Expression::Pivot(p)
}
Expression::Unpivot(mut u) => {
u.alias = Some(make_alias_ident(alias));
Expression::Unpivot(u)
}
Expression::MatchRecognize(mut mr) => {
mr.alias = Some(make_alias_ident(alias));
mr.alias_explicit_as = true;
Expression::MatchRecognize(mr)
}
Expression::JoinedTable(mut jt) => {
jt.alias = Some(make_alias_ident(alias));
Expression::JoinedTable(jt)
}
_ => Expression::Alias(Box::new(Alias {
this: expr,
alias: make_alias_ident(alias),
column_aliases: aliases,
pre_alias_comments: Vec::new(),
trailing_comments: Vec::new(),
inferred_type: None,
})),
};
}
} else {
// No column aliases, just simple alias
let default_column_aliases = if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && matches!(&expr, Expression::Function(func) if func.name.eq_ignore_ascii_case("generate_series"))
{
vec![Identifier::new("generate_series")]
} else {
Vec::new()
};
expr = match expr {
Expression::Table(mut t) => {
t.alias = Some(make_alias_ident(alias));
t.alias_explicit_as = true;
t.column_aliases = Vec::new();
Expression::Table(t)
}
Expression::Subquery(mut s) => {
s.alias = Some(make_alias_ident(alias));
s.column_aliases = Vec::new();
Expression::Subquery(s)
}
Expression::Pivot(mut p) => {
p.alias = Some(make_alias_ident(alias));
Expression::Pivot(p)
}
Expression::Unpivot(mut u) => {
u.alias = Some(make_alias_ident(alias));
Expression::Unpivot(u)
}
Expression::MatchRecognize(mut mr) => {
mr.alias = Some(make_alias_ident(alias));
mr.alias_explicit_as = true;
Expression::MatchRecognize(mr)
}
Expression::JoinedTable(mut jt) => {
jt.alias = Some(make_alias_ident(alias));
Expression::JoinedTable(jt)
}
_ => Expression::Alias(Box::new(Alias {
this: expr,
alias: make_alias_ident(alias),
column_aliases: default_column_aliases,
pre_alias_comments: Vec::new(),
trailing_comments: Vec::new(),
inferred_type: None,
})),
};
}
} // close the else for AS (col1, col2) handling
} else if (self.check(TokenType::QuotedIdentifier)
|| (self.check(TokenType::Var) && !self.check_keyword() && !self.check_identifier("MATCH_CONDITION")
&& !(self.check_identifier("ARRAY") && self.check_next(TokenType::Join)
&& matches!(self.config.dialect, Some(crate::dialects::DialectType::ClickHouse)))
// TSQL: OPTION(LABEL = 'foo') is a query hint, not an alias
&& !(self.check_identifier("OPTION") && self.check_next(TokenType::LParen))
// MySQL: LOCK IN SHARE MODE is a locking clause, not an alias
&& !(self.check_identifier("LOCK") && self.check_next(TokenType::In))
// ClickHouse: PARALLEL WITH is a statement separator, not a table alias
&& !(self.check_identifier("PARALLEL") && self.check_next(TokenType::With)
&& matches!(self.config.dialect, Some(crate::dialects::DialectType::ClickHouse)))
// DuckDB: POSITIONAL JOIN is a join method, not a table alias
&& !(self.check_identifier("POSITIONAL") && self.check_next(TokenType::Join))))
|| self.is_command_keyword_as_alias()
// ClickHouse: allow FIRST/LAST as implicit table aliases
// (they're keywords used in NULLS FIRST/LAST but also valid as identifiers)
|| (matches!(self.config.dialect, Some(crate::dialects::DialectType::ClickHouse))
&& (self.check(TokenType::First) || self.check(TokenType::Last)))
// PIVOT/UNPIVOT can be table aliases when not followed by clause-starting tokens
|| (self.check(TokenType::Pivot) && !self.check_next(TokenType::LParen))
|| (self.check(TokenType::Unpivot) && !self.is_unpivot_clause_start())
// PARTITION can be a table alias when the dialect doesn't support partition selection
|| (self.check(TokenType::Partition) && !matches!(
self.config.dialect,
Some(crate::dialects::DialectType::MySQL)
| Some(crate::dialects::DialectType::SingleStore)
| Some(crate::dialects::DialectType::Doris)
| Some(crate::dialects::DialectType::StarRocks)
))
|| (self.check(TokenType::Window) && {
// WINDOW can be a table alias if NOT followed by an identifier (window definition)
let next_pos = self.current + 1;
next_pos >= self.tokens.len()
|| (self.tokens[next_pos].token_type != TokenType::Var
&& self.tokens[next_pos].token_type != TokenType::Identifier)
})
{
// Implicit alias (but not MATCH_CONDITION which is a join condition keyword)
// Also allow command keywords (GET, PUT, etc.) and WINDOW (when not a clause) as implicit table aliases
let is_keyword_alias = self.peek().token_type.is_keyword();
let is_quoted_alias = self.peek().token_type == TokenType::QuotedIdentifier;
let alias = self.advance().text.clone();
// Check for column aliases: t(c1, c2)
// Use expect_identifier_or_keyword to allow keywords like KEY, INDEX, VALUE as column aliases
let mut column_aliases = if self.match_token(TokenType::LParen) {
let mut aliases = Vec::new();
loop {
aliases.push(Identifier::new(self.expect_identifier_or_keyword()?));
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
aliases
} else {
Vec::new()
};
if column_aliases.is_empty()
&& matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
)
&& matches!(&expr, Expression::Function(func) if func.name.eq_ignore_ascii_case("generate_series"))
{
column_aliases = vec![Identifier::new("generate_series")];
}
let make_alias_ident = |name: String| -> Identifier {
if is_quoted_alias {
Identifier::quoted(name)
} else {
Identifier::new(name)
}
};
expr = match expr {
Expression::Table(mut t) => {
t.alias = Some(make_alias_ident(alias));
t.alias_explicit_as = is_keyword_alias;
t.column_aliases = column_aliases;
Expression::Table(t)
}
Expression::Subquery(mut s) => {
s.alias = Some(make_alias_ident(alias));
s.column_aliases = column_aliases;
Expression::Subquery(s)
}
Expression::Pivot(mut p) => {
p.alias = Some(make_alias_ident(alias));
Expression::Pivot(p)
}
Expression::Unpivot(mut u) => {
u.alias = Some(make_alias_ident(alias));
Expression::Unpivot(u)
}
Expression::MatchRecognize(mut mr) => {
mr.alias = Some(make_alias_ident(alias));
Expression::MatchRecognize(mr)
}
Expression::JoinedTable(mut jt) => {
jt.alias = Some(make_alias_ident(alias));
Expression::JoinedTable(jt)
}
_ => Expression::Alias(Box::new(Alias {
this: expr,
alias: make_alias_ident(alias),
column_aliases,
pre_alias_comments: Vec::new(),
trailing_comments: Vec::new(),
inferred_type: None,
})),
};
}
// ClickHouse: subquery column alias list without alias name: FROM (...) (c0, c1)
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::LParen)
&& matches!(&expr, Expression::Subquery(s) if s.alias.is_none())
{
// Lookahead: check if this is (identifier, identifier, ...) — column alias list
let mut look = self.current + 1;
let mut is_col_list = true;
let mut col_count = 0;
loop {
if look >= self.tokens.len() {
is_col_list = false;
break;
}
let tt = self.tokens[look].token_type;
if tt == TokenType::Identifier
|| tt == TokenType::Var
|| tt == TokenType::QuotedIdentifier
|| tt.is_keyword()
{
col_count += 1;
look += 1;
} else {
is_col_list = false;
break;
}
if look >= self.tokens.len() {
is_col_list = false;
break;
}
if self.tokens[look].token_type == TokenType::Comma {
look += 1;
} else if self.tokens[look].token_type == TokenType::RParen {
break;
} else {
is_col_list = false;
break;
}
}
if is_col_list && col_count >= 1 {
self.skip(); // consume LParen
let mut aliases = Vec::new();
loop {
aliases.push(Identifier::new(self.advance().text.clone()));
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
if let Expression::Subquery(ref mut s) = expr {
s.column_aliases = aliases;
}
}
}
// ClickHouse FINAL modifier: table [AS alias] FINAL
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.match_token(TokenType::Final)
{
if let Expression::Table(ref mut table) = expr {
table.final_ = true;
}
}
// Check for SQLite INDEXED BY after alias: t AS t INDEXED BY idx
if self.check_identifier("INDEXED") {
self.skip(); // consume INDEXED
self.expect(TokenType::By)?;
let first_part = self.expect_identifier_or_keyword()?;
let index_name = if self.match_token(TokenType::Dot) {
let second_part = self.expect_identifier_or_keyword()?;
format!("{}.{}", first_part, second_part)
} else {
first_part
};
if let Expression::Table(ref mut table) = expr {
table.hints.push(Expression::Identifier(Identifier {
name: format!("INDEXED BY {}", index_name),
quoted: false,
trailing_comments: Vec::new(),
span: None,
}));
}
}
// Check for TSQL table hints after alias: t o WITH (NOLOCK), t AS a WITH (TABLOCK)
if self.check(TokenType::With) && self.check_next(TokenType::LParen) {
if let Expression::Table(ref mut table) = expr {
if let Some(hint_expr) = self.parse_table_hints()? {
match hint_expr {
Expression::Tuple(tuple) => {
table.hints = tuple.expressions;
}
other => {
table.hints = vec![other];
}
}
}
}
}
// Check for MySQL index hints after alias: t e USE INDEX (idx), t AS a IGNORE INDEX (idx)
if self.check_keyword_text("USE")
|| self.check(TokenType::Ignore)
|| self.check_keyword_text("FORCE")
{
let next_idx = self.current + 1;
let is_index_hint = next_idx < self.tokens.len() && {
let next_text = &self.tokens[next_idx].text;
next_text.eq_ignore_ascii_case("INDEX") || next_text.eq_ignore_ascii_case("KEY")
};
if is_index_hint {
if let Expression::Table(ref mut table) = expr {
if let Some(hint_expr) = self.parse_table_hints()? {
match hint_expr {
Expression::Tuple(tuple) => {
table.hints = tuple.expressions;
}
other => {
table.hints = vec![other];
}
}
}
}
}
}
// Check for PIVOT/UNPIVOT after alias (some dialects allow this order)
// Only treat as PIVOT/UNPIVOT clause when followed by ( — otherwise it's a table alias
if self.check(TokenType::Pivot) && self.check_next(TokenType::LParen) {
self.skip(); // consume PIVOT
expr = self.parse_pivot(expr)?;
} else if self.check(TokenType::Unpivot) && self.is_unpivot_clause_start() {
self.skip(); // consume UNPIVOT
expr = self.parse_unpivot(expr)?;
}
// Handle PIVOT/UNPIVOT alias: PIVOT(...) AS pvt
if matches!(&expr, Expression::Pivot(_) | Expression::Unpivot(_)) {
if self.match_token(TokenType::As) {
let alias = self.expect_identifier_or_alias_keyword_with_quoted()?;
match &mut expr {
Expression::Pivot(p) => p.alias = Some(alias),
Expression::Unpivot(u) => u.alias = Some(alias),
_ => {}
}
} else if !self.check_keyword()
&& (self.check(TokenType::Var) || self.check(TokenType::QuotedIdentifier))
{
let tok = self.advance();
let alias = if tok.token_type == TokenType::QuotedIdentifier {
Identifier::quoted(tok.text.clone())
} else {
Identifier::new(tok.text.clone())
};
match &mut expr {
Expression::Pivot(p) => p.alias = Some(alias),
Expression::Unpivot(u) => u.alias = Some(alias),
_ => {}
}
}
}
// Check for Redshift AT index clause for array unnesting
// Syntax: table_alias.array_column AS element_alias AT index_alias
// e.g., c.c_orders AS orders AT index
// https://docs.aws.amazon.com/redshift/latest/dg/query-super.html
if self.match_identifier("AT") {
let index_alias = self.expect_identifier_or_keyword()?;
// Convert the table expression to a column for AtIndex
let column_expr = match expr {
Expression::Table(t) => {
// Convert Table to Column reference
// For c.c_orders, table=c, name=c_orders -> column name should be c.c_orders
let mut parts = Vec::new();
if let Some(cat) = t.catalog {
parts.push(cat.name);
}
if let Some(schema) = t.schema {
parts.push(schema.name);
}
parts.push(t.name.name);
let col_name = parts.join(".");
let alias_expr = if let Some(alias) = t.alias {
Expression::Alias(Box::new(Alias {
this: Expression::boxed_column(Column {
name: Identifier::new(&col_name),
table: None,
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
}),
alias,
column_aliases: t.column_aliases,
pre_alias_comments: Vec::new(),
trailing_comments: t.trailing_comments,
inferred_type: None,
}))
} else {
Expression::boxed_column(Column {
name: Identifier::new(&col_name),
table: None,
join_mark: false,
trailing_comments: t.trailing_comments,
span: None,
inferred_type: None,
})
};
alias_expr
}
other => other, // Keep as is for non-table expressions
};
expr = Expression::AtIndex(Box::new(AtIndex {
this: Box::new(column_expr),
expression: Box::new(Expression::Identifier(Identifier::new(index_alias))),
}));
}
// Check for TABLESAMPLE/SAMPLE after alias (Snowflake ALIAS_POST_TABLESAMPLE)
// e.g., table2 AS t2 TABLESAMPLE BERNOULLI (50), table2 AS t2 SAMPLE ROW (0)
if self.check(TokenType::TableSample) || self.check(TokenType::Sample) {
if let Some(sample) = self.parse_table_level_sample()? {
// Capture trailing comments after the SAMPLE clause (e.g., -- 25% of rows in table1)
let post_sample_comments = self.previous_trailing_comments().to_vec();
if let Expression::Table(ref mut table) = expr {
table.table_sample = Some(Box::new(sample));
if !post_sample_comments.is_empty() {
table.trailing_comments.extend(post_sample_comments);
}
} else {
// For non-Table expressions, wrap in TableSample expression node
expr = Expression::TableSample(Box::new(crate::expressions::TableSample {
this: Some(Box::new(expr)),
sample: Some(Box::new(sample)),
expressions: Vec::new(),
method: None,
bucket_numerator: None,
bucket_denominator: None,
bucket_field: None,
percent: None,
rows: None,
size: None,
seed: None,
}));
}
}
}
// Apply PostgreSQL ONLY modifier if present
if has_only {
if let Expression::Table(ref mut table) = expr {
table.only = true;
}
}
// BigQuery: FOR SYSTEM_TIME AS OF after alias
// e.g., FROM foo AS t0 FOR SYSTEM_TIME AS OF '2026-01-01'
if self.check(TokenType::For)
&& ((self.current + 1 < self.tokens.len()
&& self.tokens[self.current + 1]
.text
.eq_ignore_ascii_case("SYSTEM_TIME"))
|| (self.current + 2 < self.tokens.len()
&& self.tokens[self.current + 1]
.text
.eq_ignore_ascii_case("SYSTEM")
&& self.tokens[self.current + 2]
.text
.eq_ignore_ascii_case("TIME")))
{
self.skip(); // consume FOR
if self.check_keyword_text("SYSTEM_TIME") {
self.skip(); // consume SYSTEM_TIME
} else {
self.skip(); // consume SYSTEM
self.skip(); // consume TIME
}
if self.match_token(TokenType::As) && self.check_keyword_text("OF") {
self.skip(); // consume OF
let start = self.current;
// Collect expression tokens until clause boundary
while !self.is_at_end()
&& !self.check(TokenType::Semicolon)
&& !self.check(TokenType::Where)
&& !self.check(TokenType::Join)
&& !self.check(TokenType::Left)
&& !self.check(TokenType::Right)
&& !self.check(TokenType::Inner)
&& !self.check(TokenType::Outer)
&& !self.check(TokenType::Full)
&& !self.check(TokenType::Cross)
&& !self.check(TokenType::Order)
&& !self.check(TokenType::Group)
&& !self.check(TokenType::Having)
&& !self.check(TokenType::Limit)
&& !self.check(TokenType::Union)
&& !self.check(TokenType::Except)
&& !self.check(TokenType::Intersect)
&& !self.check(TokenType::Comma)
&& !self.check(TokenType::RParen)
{
self.skip();
}
let expr_text = self.tokens_to_sql(start, self.current);
let system_time_str = format!("FOR SYSTEM_TIME AS OF {}", expr_text);
if let Expression::Table(ref mut table) = expr {
table.system_time = Some(system_time_str);
}
}
}
// BigQuery INFORMATION_SCHEMA handling
// When INFORMATION_SCHEMA is part of a table reference, merge it with the table name
// into a single quoted identifier and auto-add an alias if not present
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::BigQuery)
) {
if let Expression::Table(ref mut table) = expr {
// Case 1: Single quoted identifier containing INFORMATION_SCHEMA (e.g., `proj.dataset.INFORMATION_SCHEMA.SOME_VIEW`)
// Add an alias that is the same as the table name (only if no alias)
if table.schema.is_none() && table.catalog.is_none() && table.alias.is_none() {
let name_upper = table.name.name.to_ascii_uppercase();
if name_upper.contains("INFORMATION_SCHEMA.") {
// Set alias to be the full quoted table name
table.alias = Some(table.name.clone());
table.alias_explicit_as = true;
}
}
// Case 2: Multi-part name where schema part is INFORMATION_SCHEMA
// e.g., region_or_dataset.INFORMATION_SCHEMA.TABLES -> region_or_dataset.`INFORMATION_SCHEMA.TABLES` AS TABLES
// e.g., proj.region_or_dataset.INFORMATION_SCHEMA.TABLES -> proj.region_or_dataset.`INFORMATION_SCHEMA.TABLES` AS TABLES
// This applies even if an alias is already set (we still need to merge the parts)
else if let Some(ref schema) = table.schema {
if schema.name.eq_ignore_ascii_case("INFORMATION_SCHEMA") {
// Merge schema (INFORMATION_SCHEMA) with table name into a single quoted identifier
let merged_name = format!("{}.{}", schema.name, table.name.name);
let original_table_name = table.name.name.clone();
// Set alias to original table name (TABLES, VIEWS, etc.) only if no alias exists
if table.alias.is_none() {
table.alias = Some(Identifier::new(original_table_name));
table.alias_explicit_as = true;
}
// Create new quoted identifier
table.name = Identifier {
name: merged_name,
quoted: true,
trailing_comments: Vec::new(),
span: None,
};
// Shift: schema becomes catalog, catalog becomes None or stays
table.schema = table.catalog.take();
// catalog is now None
}
}
}
}
Ok(expr)
}
/// Parse standard PIVOT clause (in FROM clause)
/// PIVOT(agg_func [AS alias], ... FOR column IN (value [AS alias], ...) [GROUP BY ...])
fn parse_pivot(&mut self, source: Expression) -> Result<Expression> {
self.expect(TokenType::LParen)?;
// Parse aggregation functions (comma-separated, may have aliases)
// Stop when we see FOR keyword
// Use parse_primary() to handle keyword function names like FIRST, LAST
let mut expressions = Vec::new();
loop {
if self.check(TokenType::For) || self.check(TokenType::RParen) {
break;
}
// Parse the aggregation expression using parse_primary (handles keyword functions)
let func = self.parse_primary()?;
// Check for alias (AS alias or just identifier after function)
let expr = if self.match_token(TokenType::As) {
// AS alias
let alias_name = self.expect_identifier_or_keyword()?;
Expression::Alias(Box::new(Alias::new(func, Identifier::new(alias_name))))
} else if !self.check(TokenType::Comma)
&& !self.check(TokenType::For)
&& !self.check(TokenType::RParen)
{
// Implicit alias (no AS keyword): SUM(b) d
if let Some(id) = self.parse_id_var()? {
let alias_name = match &id {
Expression::Identifier(ident) => ident.name.clone(),
Expression::Column(col) => col.name.name.clone(),
_ => String::new(),
};
if !alias_name.is_empty() {
Expression::Alias(Box::new(Alias::new(func, Identifier::new(alias_name))))
} else {
func
}
} else {
func
}
} else {
func
};
expressions.push(expr);
if !self.match_token(TokenType::Comma) {
break;
}
// After consuming comma, if next is FOR, break (comma before FOR is optional/dropped)
if self.check(TokenType::For) {
break;
}
}
// FOR column IN (values)
self.expect(TokenType::For)?;
let mut fields = Vec::new();
loop {
let field = self.parse_standard_pivot_in()?;
fields.push(field);
// Check for additional FOR clauses (rare but possible)
if !self.match_token(TokenType::For) {
break;
}
}
// Handle Snowflake's DEFAULT ON NULL (default_value) clause
let default_on_null = if self.match_text_seq(&["DEFAULT", "ON", "NULL"]) {
if self.match_token(TokenType::LParen) {
let val = self.parse_expression()?;
self.expect(TokenType::RParen)?;
Some(Box::new(val))
} else {
None
}
} else {
None
};
// Parse optional GROUP BY inside PIVOT parens
let group = self.parse_group()?;
self.expect(TokenType::RParen)?;
Ok(Expression::Pivot(Box::new(Pivot {
this: source,
expressions,
fields,
using: Vec::new(),
group: group.map(Box::new),
unpivot: false,
into: None,
alias: None,
include_nulls: None,
default_on_null,
with: None,
})))
}
/// Parse FOR column IN (...) part of standard PIVOT
fn parse_standard_pivot_in(&mut self) -> Result<Expression> {
// Parse the column being pivoted
let column = self.parse_primary()?;
// IN keyword
self.expect(TokenType::In)?;
// IN values - can be parenthesized or bare identifier
if self.match_token(TokenType::LParen) {
// Check for ANY keyword
let in_exprs = if self.match_text_seq(&["ANY"]) {
let order = self.parse_order()?;
vec![Expression::PivotAny(Box::new(PivotAny {
this: order.map(Box::new),
}))]
} else {
// Parse comma-separated values with optional aliases
let mut vals = Vec::new();
loop {
if self.check(TokenType::RParen) {
break;
}
if let Some(val) = self.parse_select_or_expression()? {
// Check for alias - alias can be an identifier or an expression
// (e.g., 'PREFIX ' || CHR(38) || ' SUFFIX' in Oracle)
let val = if self.match_token(TokenType::As) {
// Parse the alias as an expression (not just an identifier)
// This allows for string concatenation aliases
let alias_expr = self.parse_bitwise()?.ok_or_else(|| {
self.parse_error(
"Expected expression after AS in PIVOT/UNPIVOT IN clause",
)
})?;
Expression::PivotAlias(Box::new(PivotAlias {
this: val,
alias: alias_expr,
}))
} else {
val
};
vals.push(val);
}
if !self.match_token(TokenType::Comma) {
break;
}
}
vals
};
self.expect(TokenType::RParen)?;
Ok(Expression::In(Box::new(In {
this: column,
expressions: in_exprs,
query: None,
not: false,
global: false,
unnest: None,
is_field: false,
})))
} else {
// Bare identifier: FOR foo IN y_enum (no parentheses)
// Store in query field to distinguish from parenthesized IN
let field_id = self.parse_id_var()?.unwrap_or(Expression::Null(Null));
Ok(Expression::In(Box::new(In {
this: column,
expressions: Vec::new(),
query: Some(field_id),
not: false,
global: false,
unnest: None,
is_field: true,
})))
}
}
/// Parse UNPIVOT clause
/// UNPIVOT (value_column FOR name_column IN (col1, col2, ...))
/// UNPIVOT ((col1, col2) FOR name_column IN (col1, col2, ...))
/// UNPIVOT INCLUDE NULLS (value_column FOR name_column IN (...))
/// UNPIVOT EXCLUDE NULLS (value_column FOR name_column IN (...))
fn parse_unpivot(&mut self, source: Expression) -> Result<Expression> {
// Check for optional INCLUDE NULLS or EXCLUDE NULLS
let include_nulls = if self.match_text_seq(&["INCLUDE", "NULLS"]) {
Some(true)
} else if self.match_text_seq(&["EXCLUDE", "NULLS"]) {
Some(false)
} else {
None
};
self.expect(TokenType::LParen)?;
// Value column(s) - can be identifier or (col1, col2, ...)
// Allow keywords as identifiers (e.g., "values" is a common column name in UNPIVOT)
let (value_column, value_column_parenthesized, extra_value_columns) =
if self.match_token(TokenType::LParen) {
// Parenthesized value column(s)
let col = self.expect_identifier_or_keyword()?;
let mut extra_cols = Vec::new();
while self.match_token(TokenType::Comma) {
extra_cols.push(Identifier::new(self.expect_identifier_or_keyword()?));
}
self.expect(TokenType::RParen)?;
(Identifier::new(col), true, extra_cols)
} else {
(
Identifier::new(self.expect_identifier_or_keyword()?),
false,
Vec::new(),
)
};
// FOR name_column
self.expect(TokenType::For)?;
let name_column = Identifier::new(self.expect_identifier_or_keyword()?);
// IN (columns with optional aliases)
// Format: col1 [AS alias1], col2 [AS alias2], ...
// Or tuple format: (col1, col2) [AS alias1], (col3, col4) [AS alias2], ...
// Aliases can be expressions like 'PREFIX ' || CHR(38) || ' SUFFIX'
self.expect(TokenType::In)?;
self.expect(TokenType::LParen)?;
let columns = {
let mut cols = Vec::new();
loop {
if self.check(TokenType::RParen) {
break;
}
// Check if this is a tuple of columns: (col1, col2)
let col_expr = if self.check(TokenType::LParen) {
// Could be a tuple of columns for multi-value unpivot
let saved = self.current;
self.skip(); // consume (
// Try parsing as identifier list (tuple of columns)
let mut tuple_cols = Vec::new();
let first = self.expect_identifier_or_keyword();
if let Ok(first_id) = first {
tuple_cols.push(Expression::column(first_id));
while self.match_token(TokenType::Comma) {
if let Ok(id) = self.expect_identifier_or_keyword() {
tuple_cols.push(Expression::column(id));
} else {
break;
}
}
if self.match_token(TokenType::RParen) && tuple_cols.len() > 1 {
// Successful tuple parse
Some(Expression::Tuple(Box::new(Tuple {
expressions: tuple_cols,
})))
} else {
// Not a tuple, backtrack
self.current = saved;
self.parse_select_or_expression()?
}
} else {
// Not an identifier, backtrack
self.current = saved;
self.parse_select_or_expression()?
}
} else {
self.parse_select_or_expression()?
};
if let Some(col) = col_expr {
// Check for alias
let col = if self.match_token(TokenType::As) {
// Parse the alias as an expression (allows string concatenation)
let alias_expr = self.parse_bitwise()?.ok_or_else(|| {
self.parse_error("Expected expression after AS in UNPIVOT IN clause")
})?;
Expression::PivotAlias(Box::new(PivotAlias {
this: col,
alias: alias_expr,
}))
} else {
col
};
cols.push(col);
}
if !self.match_token(TokenType::Comma) {
break;
}
}
cols
};
self.expect(TokenType::RParen)?;
self.expect(TokenType::RParen)?;
Ok(Expression::Unpivot(Box::new(Unpivot {
this: source,
value_column,
name_column,
columns,
alias: None,
value_column_parenthesized,
include_nulls,
extra_value_columns,
})))
}
/// Parse Redshift UNPIVOT in FROM clause for SUPER object traversal
/// Syntax: UNPIVOT expr [AS val_alias AT attr_alias]
/// Examples:
/// FROM t, UNPIVOT t.arr[0]
/// FROM t, UNPIVOT t.arr AS val AT attr
fn parse_redshift_unpivot_table(&mut self) -> Result<Expression> {
// Parse the expression (column reference with possible array subscript)
// We need to parse a primary expression that can include:
// - Simple column: c.c_orders
// - Array subscript: c.c_orders[0]
// - Multiple subscripts: c.c_orders[0].items[1]
// Using parse_primary which handles column refs with subscripts
let this = self.parse_primary()?;
// Check for optional AS val_alias AT attr_alias
let alias = if self.match_token(TokenType::As) {
let val_alias = self.expect_identifier_or_keyword()?;
// Check for AT attr_alias
if self.match_text_seq(&["AT"]) {
let attr_alias = self.expect_identifier_or_keyword()?;
// Create alias expression that captures both aliases
// We'll use the val_alias as the main alias and store attr_alias in a way
// the generator can reconstruct "AS val AT attr"
Some(Identifier::new(format!("{} AT {}", val_alias, attr_alias)))
} else {
Some(Identifier::new(val_alias))
}
} else {
None
};
// Return a Pivot expression with unpivot=true
// Use the simplified form pattern where:
// - this: the expression being unpivoted
// - expressions: empty (no ON expressions)
// - unpivot: true
// - alias: captured above
Ok(Expression::Pivot(Box::new(Pivot {
this,
expressions: Vec::new(),
fields: Vec::new(),
using: Vec::new(),
group: None,
unpivot: true,
into: None,
alias,
include_nulls: None,
default_on_null: None,
with: None,
})))
}
/// BigQuery: Parse a table part that may contain hyphens (e.g., project-id)
/// Also handles numeric table parts (e.g., foo.bar.25 -> foo.bar.`25`)
/// Returns the identifier, possibly with merged hyphenated parts and quoted flag set.
fn parse_bigquery_table_part(&mut self) -> Result<Identifier> {
use crate::dialects::DialectType;
// Try to parse a number for BigQuery numeric table parts (e.g., foo.bar.25)
if matches!(self.config.dialect, Some(DialectType::BigQuery))
&& self.check(TokenType::Number)
{
let num_token = self.advance().clone();
let mut name = num_token.text.clone();
// Check if followed by more connected tokens (e.g., 25x, 25_, 25ab)
// Numbers followed immediately by identifiers without whitespace are merged
while !self.is_at_end() && self.is_connected() {
let tok = self.advance().clone();
name.push_str(&tok.text);
}
return Ok(Identifier {
name,
quoted: true,
trailing_comments: Vec::new(),
span: None,
});
}
// MySQL numeric-starting identifiers (e.g., 00f, 1d)
if matches!(self.config.dialect, Some(DialectType::MySQL)) && self.check(TokenType::Number)
{
let num_token = self.advance().clone();
let mut name = num_token.text.clone();
// Merge with connected identifier/var tokens only (not punctuation)
while !self.is_at_end()
&& self.is_connected()
&& (self.check(TokenType::Var) || self.check(TokenType::Identifier))
{
let tok = self.advance().clone();
name.push_str(&tok.text);
}
return Ok(Identifier {
name,
quoted: true,
trailing_comments: Vec::new(),
span: None,
});
}
let mut ident = self.expect_identifier_or_keyword_with_quoted()?;
// BigQuery: merge hyphenated parts (e.g., pro-ject_id -> `pro-ject_id`)
if matches!(self.config.dialect, Some(DialectType::BigQuery)) && !ident.quoted {
// Check if next token is a dash and it looks connected (no space)
if self.check(TokenType::Dash) && self.is_connected_dash() {
let mut name = ident.name.clone();
while self.check(TokenType::Dash) && self.is_connected_dash() {
self.skip(); // consume dash
name.push('-');
// Consume the next part
let part = self.advance().clone();
name.push_str(&part.text);
// Continue consuming connected tokens (for things like a-b-c)
while !self.is_at_end()
&& self.is_connected()
&& !self.check(TokenType::Dot)
&& !self.check(TokenType::Dash)
&& !self.check(TokenType::LParen)
&& !self.check(TokenType::RParen)
{
let tok = self.advance().clone();
name.push_str(&tok.text);
}
}
ident = Identifier {
name,
quoted: false,
trailing_comments: Vec::new(),
span: None,
};
}
}
Ok(ident)
}
/// Check if the current dash token is "connected" to the next token
/// (i.e., the dash and next token are part of a hyphenated identifier)
fn is_connected_dash(&self) -> bool {
if !self.check(TokenType::Dash) {
return false;
}
if self.current + 1 >= self.tokens.len() {
return false;
}
let dash_token = &self.tokens[self.current];
let next_token = &self.tokens[self.current + 1];
// The next token after dash must be an identifier, number, or keyword
// and it must be adjacent (no whitespace between dash and next token)
let next_is_valid = matches!(
next_token.token_type,
TokenType::Identifier
| TokenType::Var
| TokenType::Number
| TokenType::All
| TokenType::Select
| TokenType::From
| TokenType::Where
) || next_token.token_type.is_keyword();
// Check adjacency: dash ends at dash.end, next starts at next.start
let adjacent = dash_token.span.end + 1 == next_token.span.start
|| dash_token.span.end == next_token.span.start;
next_is_valid && adjacent
}
/// Check if the current token is "connected" to the previous token (no whitespace)
fn is_connected(&self) -> bool {
if self.current == 0 || self.current >= self.tokens.len() {
return false;
}
let prev_token = &self.tokens[self.current - 1];
let curr_token = &self.tokens[self.current];
// Tokens are connected if they are immediately adjacent (no characters between them)
// span.end is exclusive, so if prev.end == curr.start, they are adjacent
prev_token.span.end == curr_token.span.start
}
/// Parse a table reference (schema.table format)
fn parse_table_ref(&mut self) -> Result<TableRef> {
// Capture leading comments on the first token (e.g., FROM \n/* comment */\n db.schema.tbl)
let table_ref_leading_comments = self.current_leading_comments().to_vec();
let mut result = self.parse_table_ref_inner()?;
if !table_ref_leading_comments.is_empty() && result.leading_comments.is_empty() {
result.leading_comments = table_ref_leading_comments;
}
Ok(result)
}
fn parse_table_ref_inner(&mut self) -> Result<TableRef> {
// Check for Snowflake IDENTIFIER() function: IDENTIFIER('string') or IDENTIFIER($var)
if self.check_identifier("IDENTIFIER") && self.check_next(TokenType::LParen) {
self.skip(); // consume IDENTIFIER
self.skip(); // consume (
// Parse the argument: either a string literal, a variable ($foo), or identifier
let arg = if self.check(TokenType::String) {
let s = self.advance().text.clone();
Expression::Literal(Box::new(Literal::String(s)))
} else if self.check(TokenType::Placeholder) || self.check(TokenType::Parameter) {
// ? bind parameter — the Snowflake Python connector uses
// IDENTIFIER(?) for dynamic table references. The tokenizer
// emits TokenType::Parameter for '?'; we map it to a
// Placeholder AST node so it roundtrips as a bind parameter.
self.skip();
Expression::Placeholder(Placeholder { index: None })
} else if self.check(TokenType::Dollar) {
// $foo style variable - Dollar followed by identifier
self.skip(); // consume $
let var_name = self.expect_identifier()?;
Expression::Var(Box::new(crate::expressions::Var {
this: format!("${}", var_name),
}))
} else {
// Could be an identifier too
let ident = self.expect_identifier()?;
Expression::Identifier(Identifier::new(ident))
};
self.expect(TokenType::RParen)?;
let trailing_comments = self.previous_trailing_comments().to_vec();
// Create a Function expression to represent IDENTIFIER(arg)
let identifier_func = Expression::Function(Box::new(crate::expressions::Function {
name: "IDENTIFIER".to_string(),
args: vec![arg],
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}));
return Ok(TableRef {
catalog: None,
schema: None,
name: Identifier::empty(),
alias: None,
alias_explicit_as: false,
column_aliases: Vec::new(),
leading_comments: Vec::new(),
trailing_comments,
when: None,
only: false,
final_: false,
table_sample: None,
hints: Vec::new(),
system_time: None,
partitions: Vec::new(),
identifier_func: Some(Box::new(identifier_func)),
changes: None,
version: None,
span: None,
});
}
let first = self.parse_bigquery_table_part()?;
// Check for schema.table format
if self.match_token(TokenType::Dot) {
// Handle TSQL a..b syntax (database..table with empty schema)
if self.check(TokenType::Dot) {
// Two consecutive dots: a..b means catalog..table (empty schema)
self.skip(); // consume second dot
let table = self.parse_bigquery_table_part()?;
let trailing_comments = self.previous_trailing_comments().to_vec();
Ok(TableRef {
catalog: Some(first),
schema: Some(Identifier::new("")), // Empty schema represents ..
name: table,
alias: None,
alias_explicit_as: false,
column_aliases: Vec::new(),
leading_comments: Vec::new(),
trailing_comments,
when: None,
only: false,
final_: false,
table_sample: None,
hints: Vec::new(),
system_time: None,
partitions: Vec::new(),
identifier_func: None,
changes: None,
version: None,
span: None,
})
} else {
// BigQuery: handle x.* wildcard table reference (e.g., SELECT * FROM x.*)
// After the first dot, if we see a Star token, it's a wildcard table name
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::BigQuery)
) && self.check(TokenType::Star)
{
self.skip(); // consume *
let trailing_comments = self.previous_trailing_comments().to_vec();
return Ok(TableRef {
catalog: None,
schema: Some(first),
name: Identifier::new("*"),
alias: None,
alias_explicit_as: false,
column_aliases: Vec::new(),
leading_comments: Vec::new(),
trailing_comments,
when: None,
only: false,
final_: false,
table_sample: None,
hints: Vec::new(),
system_time: None,
partitions: Vec::new(),
identifier_func: None,
changes: None,
version: None,
span: None,
});
}
let table = self.parse_bigquery_table_part()?;
// Check for catalog.schema.table format
if self.match_token(TokenType::Dot) {
// BigQuery: handle a.b.* wildcard table reference
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::BigQuery)
) && self.check(TokenType::Star)
{
self.skip(); // consume *
let trailing_comments = self.previous_trailing_comments().to_vec();
return Ok(TableRef {
catalog: Some(first),
schema: Some(table),
name: Identifier::new("*"),
alias: None,
alias_explicit_as: false,
column_aliases: Vec::new(),
leading_comments: Vec::new(),
trailing_comments,
when: None,
only: false,
final_: false,
table_sample: None,
hints: Vec::new(),
system_time: None,
partitions: Vec::new(),
identifier_func: None,
changes: None,
version: None,
span: None,
});
}
let actual_table = self.parse_bigquery_table_part()?;
let trailing_comments = self.previous_trailing_comments().to_vec();
Ok(TableRef {
catalog: Some(first),
schema: Some(table),
name: actual_table,
alias: None,
alias_explicit_as: false,
column_aliases: Vec::new(),
leading_comments: Vec::new(),
trailing_comments,
when: None,
only: false,
final_: false,
table_sample: None,
hints: Vec::new(),
system_time: None,
partitions: Vec::new(),
identifier_func: None,
changes: None,
version: None,
span: None,
})
} else {
let trailing_comments = self.previous_trailing_comments().to_vec();
Ok(TableRef {
catalog: None,
schema: Some(first),
name: table,
alias: None,
alias_explicit_as: false,
column_aliases: Vec::new(),
leading_comments: Vec::new(),
trailing_comments,
when: None,
only: false,
final_: false,
table_sample: None,
hints: Vec::new(),
system_time: None,
partitions: Vec::new(),
identifier_func: None,
changes: None,
version: None,
span: None,
})
}
}
} else {
let trailing_comments = self.previous_trailing_comments().to_vec();
Ok(TableRef {
catalog: None,
schema: None,
name: first,
alias: None,
alias_explicit_as: false,
column_aliases: Vec::new(),
leading_comments: Vec::new(),
trailing_comments,
when: None,
only: false,
final_: false,
table_sample: None,
hints: Vec::new(),
system_time: None,
partitions: Vec::new(),
identifier_func: None,
changes: None,
version: None,
span: None,
})
}
}
/// Parse a datetime field for EXTRACT function (YEAR, MONTH, DAY, etc.)
fn parse_datetime_field(&mut self) -> Result<DateTimeField> {
let token = self.advance();
let original_name = token.text.clone();
let name = original_name.to_ascii_uppercase();
match name.as_str() {
"YEAR" => Ok(DateTimeField::Year),
"MONTH" => Ok(DateTimeField::Month),
"DAY" => Ok(DateTimeField::Day),
"HOUR" => Ok(DateTimeField::Hour),
"MINUTE" => Ok(DateTimeField::Minute),
"SECOND" => Ok(DateTimeField::Second),
"MILLISECOND" => Ok(DateTimeField::Millisecond),
"MICROSECOND" => Ok(DateTimeField::Microsecond),
"DOW" | "DAYOFWEEK" => Ok(DateTimeField::DayOfWeek),
"DOY" | "DAYOFYEAR" => Ok(DateTimeField::DayOfYear),
"WEEK" => {
// Check for modifier like WEEK(monday)
if self.match_token(TokenType::LParen) {
let modifier = self.expect_identifier_or_keyword()?;
self.expect(TokenType::RParen)?;
Ok(DateTimeField::WeekWithModifier(modifier))
} else {
Ok(DateTimeField::Week)
}
}
"QUARTER" => Ok(DateTimeField::Quarter),
"EPOCH" => Ok(DateTimeField::Epoch),
"TIMEZONE" => Ok(DateTimeField::Timezone),
"TIMEZONE_HOUR" => Ok(DateTimeField::TimezoneHour),
"TIMEZONE_MINUTE" => Ok(DateTimeField::TimezoneMinute),
"DATE" => Ok(DateTimeField::Date),
"TIME" => Ok(DateTimeField::Time),
// Allow arbitrary field names for dialect-specific functionality
_ => Ok(DateTimeField::Custom(original_name)),
}
}
/// Parse a table expression followed by any joins
/// Used for parenthesized join expressions like (tbl1 CROSS JOIN tbl2)
fn parse_table_expression_with_joins(&mut self) -> Result<(Expression, Vec<Join>)> {
// First parse the left table expression
let left = self.parse_table_expression()?;
// Then parse any joins
let joins = self.parse_joins()?;
Ok((left, joins))
}
/// Parse JOIN clauses
///
/// Supports right-associative chained JOINs where ON/USING clauses are assigned right-to-left:
/// - `a JOIN b JOIN c ON cond1 ON cond2` means `a JOIN (b JOIN c ON cond1) ON cond2`
/// - The rightmost ON applies to the rightmost unconditioned JOIN
fn parse_joins(&mut self) -> Result<Vec<Join>> {
let mut joins = Vec::with_capacity(2);
let mut nesting_group: usize = 0;
// Loop: Phase 1 (parse JOINs) + Phase 2 (assign deferred conditions)
// After phase 2, if there are more JOIN keywords, continue with another round
loop {
let joins_before = joins.len();
// Phase 1: Parse all JOINs with optional inline ON/USING conditions
loop {
let pos_before_join_kind = self.current;
let join_kind_result = self.try_parse_join_kind();
let (kind, needs_join_keyword, use_inner_keyword, use_outer_keyword, join_hint) =
match join_kind_result {
Some(r) => r,
None => break,
};
// Collect comments from all tokens consumed by try_parse_join_kind:
// - Leading comments on the first token (comments on a separate line before the join)
// - Trailing comments between join keywords (e.g., INNER /* comment */ JOIN)
let mut join_comments = Vec::new();
// Capture leading comments from the first token of the join kind
if pos_before_join_kind < self.tokens.len() {
join_comments
.extend(self.tokens[pos_before_join_kind].comments.iter().cloned());
}
for i in pos_before_join_kind..self.current {
if i < self.tokens.len() {
join_comments.extend(self.tokens[i].trailing_comments.iter().cloned());
}
}
// Snowflake: DIRECTED keyword before JOIN (e.g., CROSS DIRECTED JOIN)
let directed = if needs_join_keyword && self.check_identifier("DIRECTED") {
self.skip();
true
} else {
false
};
if needs_join_keyword {
self.expect(TokenType::Join)?;
}
// ClickHouse: ARRAY JOIN uses expressions, not table references
let table = if matches!(kind, JoinKind::Array | JoinKind::LeftArray) {
let mut items = Vec::new();
// Handle ARRAY JOIN with no arguments (intentional error test)
if !self.is_at_end()
&& !self.check(TokenType::Semicolon)
&& !self.check(TokenType::RParen)
{
loop {
let expr = self.parse_expression()?;
let item = if self.match_token(TokenType::As) {
let alias_name = self.expect_identifier_or_safe_keyword()?;
Expression::Alias(Box::new(Alias {
this: expr,
alias: Identifier::new(alias_name),
column_aliases: Vec::new(),
pre_alias_comments: Vec::new(),
trailing_comments: Vec::new(),
inferred_type: None,
}))
} else {
expr
};
items.push(item);
if !self.match_token(TokenType::Comma) {
break;
}
}
} // end if !is_at_end check
if items.len() == 1 {
items.pop().unwrap()
} else if items.is_empty() {
Expression::Null(Null)
} else {
Expression::Tuple(Box::new(Tuple { expressions: items }))
}
} else {
self.parse_table_expression()?
};
// Snowflake ASOF JOIN: OFFSET/LIMIT before MATCH_CONDITION are table aliases
let table = if matches!(
kind,
JoinKind::AsOf | JoinKind::AsOfLeft | JoinKind::AsOfRight
) && (self.check(TokenType::Offset) || self.check(TokenType::Limit))
&& self
.peek_nth(1)
.map(|t| t.text.eq_ignore_ascii_case("MATCH_CONDITION"))
== Some(true)
{
let alias_name = self.advance().text.clone();
Expression::Alias(Box::new(Alias {
this: table,
alias: Identifier::new(alias_name),
column_aliases: Vec::new(),
pre_alias_comments: Vec::new(),
trailing_comments: Vec::new(),
inferred_type: None,
}))
} else {
table
};
// Try to parse inline MATCH_CONDITION/ON/USING (only if not followed by another JOIN)
// We need to peek ahead to see if there's another JOIN keyword coming
let has_match_condition = self.check_identifier("MATCH_CONDITION");
let has_inline_condition = self.check(TokenType::On)
|| self.check(TokenType::Using)
|| has_match_condition;
let next_is_join = self.check_join_keyword();
// Parse MATCH_CONDITION first (Snowflake ASOF JOIN can have MATCH_CONDITION before ON)
let match_condition = if has_match_condition && !next_is_join {
if self.match_identifier("MATCH_CONDITION") {
self.expect(TokenType::LParen)?;
let condition = self.parse_expression()?;
self.expect(TokenType::RParen)?;
Some(condition)
} else {
None
}
} else {
None
};
let (on, using) = if (has_inline_condition || match_condition.is_some())
&& !self.check_join_keyword()
{
// Parse inline condition only if there's no more JOINs following
if self.match_token(TokenType::On) {
(Some(self.parse_expression()?), Vec::new())
} else if self.match_token(TokenType::Using) {
// ClickHouse allows USING without parentheses
let has_parens = self.match_token(TokenType::LParen);
// Use parse_using_column_list to handle qualified names like t1.col
let cols = self.parse_using_column_list()?;
if has_parens {
self.expect(TokenType::RParen)?;
}
(None, cols)
} else {
(None, Vec::new())
}
} else {
(None, Vec::new())
};
joins.push(Join {
this: table,
on,
using,
kind,
use_inner_keyword,
use_outer_keyword,
deferred_condition: false,
join_hint,
match_condition,
pivots: Vec::new(),
comments: join_comments,
nesting_group,
directed,
});
}
// Phase 2: Assign deferred ON/USING conditions to unconditioned joins (right-to-left)
// Only consider joins from the current batch (joins_before..)
let unconditioned: Vec<usize> = joins[joins_before..]
.iter()
.enumerate()
.filter(|(_, j)| j.on.is_none() && j.using.is_empty())
.map(|(i, _)| joins_before + i)
.collect();
let mut idx = unconditioned.len();
while idx > 0 {
if self.match_token(TokenType::On) {
idx -= 1;
let join_idx = unconditioned[idx];
joins[join_idx].on = Some(self.parse_expression()?);
joins[join_idx].deferred_condition = true;
} else if self.match_token(TokenType::Using) {
idx -= 1;
let join_idx = unconditioned[idx];
let has_parens = self.match_token(TokenType::LParen);
// Handle empty USING ()
let cols = if has_parens && self.check(TokenType::RParen) {
Vec::new()
} else {
// Use parse_using_column_list to handle qualified names like t1.col
self.parse_using_column_list()?
};
joins[join_idx].using = cols;
if has_parens {
self.expect(TokenType::RParen)?;
}
joins[join_idx].deferred_condition = true;
} else {
break;
}
}
// If no new joins were parsed in this round, we're done
if joins.len() == joins_before {
break;
}
// If there are more JOIN keywords after deferred conditions, continue with another round
if !self.check_join_keyword() {
break;
}
nesting_group += 1;
}
Ok(joins)
}
/// Check if the current token starts a JOIN clause
fn check_join_keyword(&self) -> bool {
self.check(TokenType::Join) ||
self.check(TokenType::Inner) ||
self.check(TokenType::Left) ||
self.check(TokenType::Right) ||
self.check(TokenType::Full) ||
self.check(TokenType::Cross) ||
self.check(TokenType::Natural) ||
self.check(TokenType::Outer) ||
// ClickHouse: ARRAY JOIN, GLOBAL JOIN, ALL JOIN, ANY JOIN, PASTE JOIN
(matches!(self.config.dialect, Some(crate::dialects::DialectType::ClickHouse)) &&
(self.check_identifier("ARRAY") || self.check_identifier("GLOBAL") || self.check(TokenType::All) || self.check(TokenType::Any) || self.check_identifier("PASTE")))
}
/// Try to parse a JOIN kind
/// Returns (JoinKind, needs_join_keyword, use_inner_keyword, use_outer_keyword, join_hint)
fn try_parse_join_kind(&mut self) -> Option<(JoinKind, bool, bool, bool, Option<String>)> {
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
let start = self.current;
let mut global = false;
let mut strictness: Option<String> = None;
let mut kind: Option<JoinKind> = None;
let mut use_outer = false;
let mut use_inner = false;
if self.match_identifier("GLOBAL") {
global = true;
}
loop {
if strictness.is_none() && self.match_token(TokenType::All) {
strictness = Some("ALL".to_string());
continue;
}
if strictness.is_none() && self.match_token(TokenType::Any) {
strictness = Some("ANY".to_string());
continue;
}
if strictness.is_none() && self.match_token(TokenType::AsOf) {
strictness = Some("ASOF".to_string());
continue;
}
if strictness.is_none() && self.match_token(TokenType::Semi) {
strictness = Some("SEMI".to_string());
continue;
}
if strictness.is_none() && self.match_token(TokenType::Anti) {
strictness = Some("ANTI".to_string());
continue;
}
if kind.is_none() && self.match_token(TokenType::Left) {
use_outer = self.match_token(TokenType::Outer);
use_inner = self.match_token(TokenType::Inner);
kind = Some(JoinKind::Left);
continue;
}
if kind.is_none() && self.match_token(TokenType::Right) {
use_outer = self.match_token(TokenType::Outer);
use_inner = self.match_token(TokenType::Inner);
kind = Some(JoinKind::Right);
continue;
}
if kind.is_none() && self.match_token(TokenType::Full) {
use_outer = self.match_token(TokenType::Outer);
kind = Some(JoinKind::Full);
continue;
}
if kind.is_none() && self.match_token(TokenType::Inner) {
use_inner = true;
kind = Some(JoinKind::Inner);
continue;
}
break;
}
// ClickHouse: ARRAY JOIN or LEFT ARRAY JOIN
if self.check_identifier("ARRAY") && self.check_next(TokenType::Join) {
let array_kind = if matches!(kind, Some(JoinKind::Left)) {
JoinKind::LeftArray
} else {
JoinKind::Array
};
self.skip(); // consume ARRAY
// JOIN will be consumed by caller
return Some((array_kind, true, false, false, None));
}
// ClickHouse: PASTE JOIN (positional join, no ON/USING)
if self.check_identifier("PASTE") && self.check_next(TokenType::Join) {
self.skip(); // consume PASTE
// JOIN will be consumed by caller
return Some((JoinKind::Paste, true, false, false, None));
}
if global || strictness.is_some() || kind.is_some() {
if self.check(TokenType::Join) {
let join_kind = kind.unwrap_or(JoinKind::Inner);
let mut hints = Vec::new();
if global {
hints.push("GLOBAL".to_string());
}
if let Some(strict) = strictness {
hints.push(strict);
}
let join_hint = if hints.is_empty() {
None
} else {
Some(hints.join(" "))
};
return Some((join_kind, true, use_inner, use_outer, join_hint));
} else {
self.current = start;
}
}
}
// Check for ASOF first (DuckDB/Snowflake) - can be followed by LEFT/RIGHT/etc.
if self.match_token(TokenType::AsOf) {
// ASOF can be followed by LEFT, RIGHT, INNER, or standalone
if self.match_token(TokenType::Left) {
let use_outer = self.match_token(TokenType::Outer);
Some((JoinKind::AsOfLeft, true, false, use_outer, None))
} else if self.match_token(TokenType::Right) {
let use_outer = self.match_token(TokenType::Outer);
Some((JoinKind::AsOfRight, true, false, use_outer, None))
} else if self.match_token(TokenType::Inner) {
Some((JoinKind::AsOf, true, true, false, None))
} else {
// Standalone ASOF JOIN
Some((JoinKind::AsOf, true, false, false, None))
}
} else if self.check(TokenType::Inner) {
// Check if INNER is followed by a set operation (BigQuery INNER UNION/INTERSECT/EXCEPT)
// In that case, don't treat it as a JOIN keyword
let saved = self.current;
self.skip(); // consume INNER
if self.check(TokenType::Union)
|| self.check(TokenType::Intersect)
|| self.check(TokenType::Except)
{
self.current = saved; // backtrack
return None;
}
// Check for TSQL join hints: INNER LOOP JOIN, INNER HASH JOIN, INNER MERGE JOIN
let join_hint = self.parse_tsql_join_hint();
Some((JoinKind::Inner, true, true, false, join_hint)) // INNER keyword was explicit
} else if self.check(TokenType::Left) {
// Check if LEFT is followed by a set operation (BigQuery LEFT UNION/INTERSECT/EXCEPT)
let saved = self.current;
self.skip(); // consume LEFT
// LEFT can be followed by OUTER/INNER then set op, or directly by set op
let at_set_op = self.check(TokenType::Union)
|| self.check(TokenType::Intersect)
|| self.check(TokenType::Except);
let at_inner_set_op = self.check(TokenType::Inner) && {
let saved2 = self.current;
self.skip();
let is_setop = self.check(TokenType::Union)
|| self.check(TokenType::Intersect)
|| self.check(TokenType::Except);
self.current = saved2;
is_setop
};
if at_set_op || at_inner_set_op {
self.current = saved; // backtrack
return None;
}
// Continue with normal LEFT JOIN parsing
self.current = saved;
self.match_token(TokenType::Left); // re-consume LEFT
let use_outer = self.match_token(TokenType::Outer);
let use_inner = self.match_token(TokenType::Inner);
let join_hint = self.parse_tsql_join_hint();
// Check for SEMI, ANTI, or LATERAL
if self.match_token(TokenType::Semi) {
Some((JoinKind::LeftSemi, true, use_inner, use_outer, join_hint))
} else if self.match_token(TokenType::Anti) {
Some((JoinKind::LeftAnti, true, use_inner, use_outer, join_hint))
} else if self.match_token(TokenType::Lateral) {
Some((JoinKind::LeftLateral, true, use_inner, use_outer, join_hint))
} else {
Some((JoinKind::Left, true, use_inner, use_outer, join_hint))
}
} else if self.check(TokenType::Right) {
// Check if RIGHT is followed by a set operation (BigQuery RIGHT UNION/INTERSECT/EXCEPT)
let saved = self.current;
self.skip(); // consume RIGHT
let at_set_op = self.check(TokenType::Union)
|| self.check(TokenType::Intersect)
|| self.check(TokenType::Except);
let at_inner_set_op = self.check(TokenType::Inner) && {
let saved2 = self.current;
self.skip();
let is_setop = self.check(TokenType::Union)
|| self.check(TokenType::Intersect)
|| self.check(TokenType::Except);
self.current = saved2;
is_setop
};
if at_set_op || at_inner_set_op {
self.current = saved; // backtrack
return None;
}
// Continue with normal RIGHT JOIN parsing
self.current = saved;
self.match_token(TokenType::Right); // re-consume RIGHT
let use_outer = self.match_token(TokenType::Outer);
let use_inner = self.match_token(TokenType::Inner);
let join_hint = self.parse_tsql_join_hint();
// Check for SEMI or ANTI
if self.match_token(TokenType::Semi) {
Some((JoinKind::RightSemi, true, use_inner, use_outer, join_hint))
} else if self.match_token(TokenType::Anti) {
Some((JoinKind::RightAnti, true, use_inner, use_outer, join_hint))
} else {
Some((JoinKind::Right, true, use_inner, use_outer, join_hint))
}
} else if self.check(TokenType::Full) {
// Check if FULL is followed by a set operation (BigQuery FULL UNION/INTERSECT/EXCEPT)
let saved = self.current;
self.skip(); // consume FULL
let at_set_op = self.check(TokenType::Union)
|| self.check(TokenType::Intersect)
|| self.check(TokenType::Except);
let at_inner_set_op = self.check(TokenType::Inner) && {
let saved2 = self.current;
self.skip();
let is_setop = self.check(TokenType::Union)
|| self.check(TokenType::Intersect)
|| self.check(TokenType::Except);
self.current = saved2;
is_setop
};
if at_set_op || at_inner_set_op {
self.current = saved; // backtrack
return None;
}
// Continue with normal FULL JOIN parsing
self.current = saved;
self.match_token(TokenType::Full); // re-consume FULL
let use_outer = self.match_token(TokenType::Outer);
let join_hint = self.parse_tsql_join_hint();
Some((JoinKind::Full, true, false, use_outer, join_hint))
} else if self.match_token(TokenType::Cross) {
// CROSS JOIN or CROSS APPLY
if self.match_token(TokenType::Apply) {
Some((JoinKind::CrossApply, false, false, false, None))
} else {
Some((JoinKind::Cross, true, false, false, None))
}
} else if self.match_token(TokenType::Natural) {
// NATURAL can be followed by LEFT, RIGHT, INNER, FULL, or just JOIN
if self.match_token(TokenType::Left) {
let use_outer = self.match_token(TokenType::Outer);
Some((JoinKind::NaturalLeft, true, false, use_outer, None))
} else if self.match_token(TokenType::Right) {
let use_outer = self.match_token(TokenType::Outer);
Some((JoinKind::NaturalRight, true, false, use_outer, None))
} else if self.match_token(TokenType::Full) {
let use_outer = self.match_token(TokenType::Outer);
Some((JoinKind::NaturalFull, true, false, use_outer, None))
} else if self.match_token(TokenType::Inner) {
Some((JoinKind::Natural, true, true, false, None))
} else {
Some((JoinKind::Natural, true, false, false, None))
}
} else if self.match_token(TokenType::Outer) {
// OUTER APPLY or standalone OUTER JOIN
if self.match_token(TokenType::Apply) {
Some((JoinKind::OuterApply, false, false, true, None))
} else {
// Standalone OUTER JOIN (without LEFT/RIGHT/FULL)
Some((JoinKind::Outer, true, false, true, None))
}
} else if self.check(TokenType::Lateral) {
// Check if this is LATERAL VIEW (Hive/Spark syntax) vs LATERAL JOIN
if self.current + 1 < self.tokens.len()
&& self.tokens[self.current + 1].token_type == TokenType::View
{
// LATERAL VIEW is not a JOIN type, return None
None
} else {
self.skip(); // Consume LATERAL
Some((JoinKind::Lateral, true, false, false, None))
}
} else if self.match_token(TokenType::Semi) {
Some((JoinKind::Semi, true, false, false, None))
} else if self.match_token(TokenType::Anti) {
Some((JoinKind::Anti, true, false, false, None))
} else if self.check_identifier("POSITIONAL") && self.check_next(TokenType::Join) {
// DuckDB POSITIONAL JOIN
self.skip(); // consume POSITIONAL
Some((JoinKind::Positional, true, false, false, None))
} else if self.match_token(TokenType::StraightJoin) {
// STRAIGHT_JOIN in MySQL - doesn't need JOIN keyword after it
Some((JoinKind::Straight, false, false, false, None))
} else if self.check(TokenType::Join) {
Some((JoinKind::Inner, true, false, false, None)) // Default JOIN is INNER (without explicit INNER keyword)
} else if self.match_token(TokenType::Comma) {
// Comma-separated tables: FROM a, b (old-style ANSI join syntax)
Some((JoinKind::Implicit, false, false, false, None)) // No JOIN keyword needed
} else {
None
}
}
/// Parse TSQL join hints: LOOP, HASH, MERGE, REMOTE
fn parse_tsql_join_hint(&mut self) -> Option<String> {
if self.check_identifier("LOOP") {
self.skip();
Some("LOOP".to_string())
} else if self.check_identifier("HASH") {
self.skip();
Some("HASH".to_string())
} else if self.check_identifier("REMOTE") {
self.skip();
Some("REMOTE".to_string())
} else if self.check(TokenType::Merge) && {
// Be careful: MERGE is also a keyword for MERGE statement
// Only treat as hint if followed by JOIN
let next_pos = self.current + 1;
next_pos < self.tokens.len() && self.tokens[next_pos].token_type == TokenType::Join
} {
self.skip();
Some("MERGE".to_string())
} else {
None
}
}
/// Parse GROUP BY clause
fn parse_group_by(&mut self) -> Result<GroupBy> {
// Check for optional ALL/DISTINCT modifier
// Some(true) = ALL, Some(false) = DISTINCT, None = no modifier
let all = if self.match_token(TokenType::All) {
Some(true)
} else if self.match_token(TokenType::Distinct) {
Some(false)
} else {
None
};
let mut expressions = Vec::new();
// GROUP BY ALL / GROUP BY DISTINCT without following CUBE/ROLLUP/expressions
// should return early (e.g., Snowflake's "GROUP BY ALL" without column list).
// But in Presto/Trino, ALL/DISTINCT can be followed by CUBE/ROLLUP expressions.
if all.is_some() && self.is_at_query_modifier_or_end() {
return Ok(GroupBy {
expressions,
all,
totals: false,
comments: Vec::new(),
});
}
// GROUP BY ALL WITH ROLLUP/CUBE/TOTALS — skip expression parsing, go straight to modifiers
if all.is_some()
&& self.check(TokenType::With)
&& (self.check_next(TokenType::Cube)
|| self.check_next(TokenType::Rollup)
|| self.check_next_identifier("TOTALS"))
{
let mut totals = false;
// Process WITH ROLLUP/CUBE
if self.check_next(TokenType::Cube) || self.check_next(TokenType::Rollup) {
self.skip(); // consume WITH
if self.match_token(TokenType::Cube) {
expressions.push(Expression::Cube(Box::new(Cube {
expressions: Vec::new(),
})));
} else if self.match_token(TokenType::Rollup) {
expressions.push(Expression::Rollup(Box::new(Rollup {
expressions: Vec::new(),
})));
}
}
// Check for WITH TOTALS (possibly chained after ROLLUP/CUBE)
if self.check(TokenType::With) && self.check_next_identifier("TOTALS") {
self.skip(); // WITH
self.skip(); // TOTALS
totals = true;
}
return Ok(GroupBy {
expressions,
all,
totals,
comments: Vec::new(),
});
}
loop {
// Check for GROUPING SETS, CUBE, ROLLUP
let expr = if self.check_identifier("GROUPING")
&& self
.peek_nth(1)
.map_or(false, |t| t.text.eq_ignore_ascii_case("SETS"))
&& {
self.skip();
self.skip();
true
} {
// GROUPING SETS (...)
self.expect(TokenType::LParen)?;
let args = self.parse_grouping_sets_args()?;
self.expect(TokenType::RParen)?;
Expression::Function(Box::new(Function {
name: "GROUPING SETS".to_string(),
args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))
} else if self.match_token(TokenType::Cube) {
// CUBE (...)
self.expect(TokenType::LParen)?;
let args = self.parse_expression_list()?;
self.expect(TokenType::RParen)?;
Expression::Function(Box::new(Function {
name: "CUBE".to_string(),
args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))
} else if self.match_token(TokenType::Rollup) {
// ROLLUP (...)
self.expect(TokenType::LParen)?;
let args = self.parse_expression_list()?;
self.expect(TokenType::RParen)?;
Expression::Function(Box::new(Function {
name: "ROLLUP".to_string(),
args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))
} else {
self.parse_expression()?
};
// ClickHouse: GROUP BY expr AS alias
let expr = if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::As)
&& !self.check_next(TokenType::LParen)
{
self.skip(); // consume AS
let alias = self.expect_identifier_or_keyword_with_quoted()?;
Expression::Alias(Box::new(Alias::new(expr, alias)))
} else {
expr
};
expressions.push(expr);
if !self.match_token(TokenType::Comma) {
// Allow adjacent CUBE/ROLLUP/GROUPING SETS without comma separator
// e.g., GROUP BY CUBE(a) ROLLUP(b), GROUPING SETS((c, d))
if self.check(TokenType::Cube)
|| self.check(TokenType::Rollup)
|| (self.check_identifier("GROUPING")
&& self
.peek_nth(1)
.map_or(false, |t| t.text.eq_ignore_ascii_case("SETS")))
{
continue;
}
break;
}
}
// Check for trailing WITH CUBE or WITH ROLLUP (Hive/MySQL syntax)
// This is different from CUBE(...) or ROLLUP(...) which are parsed inline above
// Use lookahead to avoid consuming WITH if it's not followed by CUBE or ROLLUP
// (e.g., Redshift's WITH NO SCHEMA BINDING should not be consumed here)
if self.check(TokenType::With)
&& (self.check_next(TokenType::Cube) || self.check_next(TokenType::Rollup))
{
self.skip(); // consume WITH
if self.match_token(TokenType::Cube) {
// WITH CUBE - add Cube with empty expressions
expressions.push(Expression::Cube(Box::new(Cube {
expressions: Vec::new(),
})));
} else if self.match_token(TokenType::Rollup) {
// WITH ROLLUP - add Rollup with empty expressions
expressions.push(Expression::Rollup(Box::new(Rollup {
expressions: Vec::new(),
})));
}
}
// ClickHouse: WITH TOTALS
let totals = if self.check(TokenType::With) && self.check_next_identifier("TOTALS") {
self.skip(); // consume WITH
self.skip(); // consume TOTALS
true
} else {
false
};
Ok(GroupBy {
expressions,
all,
totals,
comments: Vec::new(),
})
}
/// Parse GROUPING SETS arguments which can include tuples like (x, y), nested GROUPING SETS, CUBE, ROLLUP
fn parse_grouping_sets_args(&mut self) -> Result<Vec<Expression>> {
let mut args = Vec::new();
loop {
// Check for nested GROUPING SETS, CUBE, ROLLUP
let expr = if self.check_identifier("GROUPING")
&& self
.peek_nth(1)
.map_or(false, |t| t.text.eq_ignore_ascii_case("SETS"))
&& {
self.skip();
self.skip();
true
} {
// Nested GROUPING SETS (...)
self.expect(TokenType::LParen)?;
let inner_args = self.parse_grouping_sets_args()?;
self.expect(TokenType::RParen)?;
Expression::Function(Box::new(Function {
name: "GROUPING SETS".to_string(),
args: inner_args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))
} else if self.match_token(TokenType::Cube) {
// CUBE (...)
self.expect(TokenType::LParen)?;
let inner_args = self.parse_expression_list()?;
self.expect(TokenType::RParen)?;
Expression::Function(Box::new(Function {
name: "CUBE".to_string(),
args: inner_args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))
} else if self.match_token(TokenType::Rollup) {
// ROLLUP (...)
self.expect(TokenType::LParen)?;
let inner_args = self.parse_expression_list()?;
self.expect(TokenType::RParen)?;
Expression::Function(Box::new(Function {
name: "ROLLUP".to_string(),
args: inner_args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))
} else if self.check(TokenType::LParen) {
// This could be a tuple like (x, y) or empty ()
self.skip(); // consume (
if self.check(TokenType::RParen) {
// Empty tuple ()
self.skip();
Expression::Tuple(Box::new(Tuple {
expressions: Vec::new(),
}))
} else {
let inner = self.parse_expression_list()?;
self.expect(TokenType::RParen)?;
Expression::Tuple(Box::new(Tuple { expressions: inner }))
}
} else {
self.parse_expression()?
};
args.push(expr);
if !self.match_token(TokenType::Comma) {
break;
}
}
Ok(args)
}
/// Parse ORDER BY clause
fn parse_order_by(&mut self) -> Result<OrderBy> {
self.parse_order_by_with_siblings(false)
}
/// Parse ORDER BY clause with optional siblings flag (Oracle ORDER SIBLINGS BY)
fn parse_order_by_with_siblings(&mut self, siblings: bool) -> Result<OrderBy> {
let mut expressions = Vec::new();
loop {
let expr = self.parse_expression()?;
// ClickHouse: ORDER BY expr AS alias — allow AS alias before DESC/ASC
// But NOT AS SELECT/WITH which would be CREATE TABLE ... AS SELECT
let expr = if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::As)
&& !self.check_next(TokenType::LParen)
&& !self.check_next(TokenType::Select)
&& !self.check_next(TokenType::With)
{
self.skip(); // consume AS
let alias = self.expect_identifier_or_keyword_with_quoted()?;
Expression::Alias(Box::new(Alias::new(expr, alias)))
} else {
expr
};
let (desc, explicit_asc) = if self.match_token(TokenType::Desc) {
(true, false)
} else if self.match_token(TokenType::Asc) {
(false, true)
} else {
(false, false)
};
let nulls_first = if self.match_token(TokenType::Nulls) {
if self.match_token(TokenType::First) {
Some(true)
} else if self.match_token(TokenType::Last) {
Some(false)
} else {
return Err(self.parse_error("Expected FIRST or LAST after NULLS"));
}
} else {
None
};
// Parse optional WITH FILL clause (ClickHouse)
let with_fill = if self.match_text_seq(&["WITH", "FILL"]) {
let from_ = if self.match_token(TokenType::From) {
Some(Box::new(self.parse_or()?))
} else {
None
};
let to = if self.match_text_seq(&["TO"]) {
Some(Box::new(self.parse_or()?))
} else {
None
};
let step = if self.match_text_seq(&["STEP"]) {
Some(Box::new(self.parse_or()?))
} else {
None
};
// ClickHouse: STALENESS [INTERVAL] expr
let staleness = if self.match_text_seq(&["STALENESS"]) {
Some(Box::new(self.parse_or()?))
} else {
None
};
let interpolate = if self.match_text_seq(&["INTERPOLATE"]) {
if self.match_token(TokenType::LParen) {
// Parse INTERPOLATE items: identifier [AS expression], ...
let mut items = Vec::new();
loop {
if self.check(TokenType::RParen) {
break;
}
let quoted = self.check(TokenType::QuotedIdentifier);
let name_text = self.expect_identifier_or_safe_keyword()?;
let name_id = Identifier {
name: name_text,
quoted,
trailing_comments: Vec::new(),
span: None,
};
let item = if self.match_token(TokenType::As) {
let expr = self.parse_expression()?;
// Store as Alias: this=expression, alias=name
Expression::Alias(Box::new(Alias {
this: expr,
alias: name_id,
column_aliases: Vec::new(),
pre_alias_comments: Vec::new(),
trailing_comments: Vec::new(),
inferred_type: None,
}))
} else {
Expression::Identifier(name_id)
};
items.push(item);
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
if items.len() == 1 {
Some(Box::new(items.into_iter().next().unwrap()))
} else {
Some(Box::new(Expression::Tuple(Box::new(
crate::expressions::Tuple { expressions: items },
))))
}
} else {
None
}
} else {
None
};
Some(Box::new(WithFill {
from_,
to,
step,
staleness,
interpolate,
}))
} else {
None
};
expressions.push(Ordered {
this: expr,
desc,
nulls_first,
explicit_asc,
with_fill,
});
if !self.match_token(TokenType::Comma) {
break;
}
// Handle trailing comma: if at end of input or semicolon, break
if self.is_at_end() || self.check(TokenType::Semicolon) {
break;
}
}
Ok(OrderBy {
expressions,
siblings,
comments: Vec::new(),
})
}
/// Parse query modifiers (ORDER BY, LIMIT, OFFSET, DISTRIBUTE BY, SORT BY, CLUSTER BY) for parenthesized queries
/// e.g., (SELECT 1) ORDER BY x LIMIT 1 OFFSET 1
/// e.g., (SELECT 1 UNION SELECT 2) DISTRIBUTE BY z SORT BY x
fn parse_query_modifiers(&mut self, inner: Expression) -> Result<Expression> {
// Parse DISTRIBUTE BY (Hive/Spark)
let distribute_by = if self.match_keywords(&[TokenType::Distribute, TokenType::By]) {
let exprs = self.parse_expression_list()?;
Some(DistributeBy { expressions: exprs })
} else {
None
};
// Parse SORT BY (Hive/Spark) or CLUSTER BY (Hive/Spark)
let (sort_by, cluster_by) = if self.match_keywords(&[TokenType::Sort, TokenType::By]) {
// SORT BY
let mut orders = Vec::new();
loop {
if let Some(ordered) = self.parse_ordered_item()? {
orders.push(ordered);
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
(
Some(SortBy {
expressions: orders,
}),
None,
)
} else if self.match_keywords(&[TokenType::Cluster, TokenType::By]) {
// CLUSTER BY
let mut orders = Vec::new();
loop {
if let Some(ordered) = self.parse_ordered_item()? {
orders.push(ordered);
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
(
None,
Some(ClusterBy {
expressions: orders,
}),
)
} else {
(None, None)
};
// Parse ORDER BY
let order_by = if self.match_keywords(&[TokenType::Order, TokenType::By]) {
Some(self.parse_order_by()?)
} else {
None
};
// Parse LIMIT
let limit = if self.match_token(TokenType::Limit) {
Some(Limit {
this: self.parse_expression()?,
percent: false,
comments: Vec::new(),
})
} else {
None
};
// Parse OFFSET
let offset = if self.match_token(TokenType::Offset) {
Some(Offset {
this: self.parse_expression()?,
rows: None,
})
} else {
None
};
// If we have any modifiers, wrap in a Subquery with the modifiers
if order_by.is_some()
|| limit.is_some()
|| offset.is_some()
|| distribute_by.is_some()
|| sort_by.is_some()
|| cluster_by.is_some()
{
// If inner is already a Subquery, add modifiers to it instead of double-wrapping
if let Expression::Subquery(mut subq) = inner {
subq.order_by = order_by;
subq.limit = limit;
subq.offset = offset;
subq.distribute_by = distribute_by;
subq.sort_by = sort_by;
subq.cluster_by = cluster_by;
Ok(Expression::Subquery(subq))
} else if let Expression::Paren(paren) = inner {
// If inner is a Paren containing a Subquery or other query, unwrap it
// and add modifiers to a new Subquery wrapping the Paren
// This handles cases like ((SELECT 1)) LIMIT 1
Ok(Expression::Subquery(Box::new(Subquery {
this: Expression::Paren(paren),
alias: None,
column_aliases: Vec::new(),
order_by,
limit,
offset,
distribute_by,
sort_by,
cluster_by,
lateral: false,
modifiers_inside: false,
trailing_comments: Vec::new(),
inferred_type: None,
})))
} else {
Ok(Expression::Subquery(Box::new(Subquery {
this: inner,
alias: None,
column_aliases: Vec::new(),
order_by,
limit,
offset,
distribute_by,
sort_by,
cluster_by,
lateral: false,
modifiers_inside: false,
trailing_comments: Vec::new(),
inferred_type: None,
})))
}
} else {
// No modifiers - return inner as-is (don't double-wrap if already a Subquery)
Ok(inner)
}
}
/// Parse ORDER BY expressions for use inside aggregate functions
/// Returns Vec<Ordered> instead of OrderBy struct
fn parse_order_by_list(&mut self) -> Result<Vec<Ordered>> {
let mut expressions = Vec::new();
loop {
let expr = self.parse_expression()?;
let (desc, explicit_asc) = if self.match_token(TokenType::Desc) {
(true, false)
} else if self.match_token(TokenType::Asc) {
(false, true)
} else {
(false, false)
};
let nulls_first = if self.match_token(TokenType::Nulls) {
if self.match_token(TokenType::First) {
Some(true)
} else if self.match_token(TokenType::Last) {
Some(false)
} else {
return Err(self.parse_error("Expected FIRST or LAST after NULLS"));
}
} else {
None
};
expressions.push(Ordered {
this: expr,
desc,
nulls_first,
explicit_asc,
with_fill: None,
});
if !self.match_token(TokenType::Comma) {
break;
}
}
Ok(expressions)
}
/// Parse DISTRIBUTE BY clause (Hive/Spark)
fn parse_distribute_by(&mut self) -> Result<DistributeBy> {
let mut expressions = Vec::new();
loop {
expressions.push(self.parse_expression()?);
if !self.match_token(TokenType::Comma) {
break;
}
}
Ok(DistributeBy { expressions })
}
/// Parse CLUSTER BY clause (Hive/Spark)
fn parse_cluster_by(&mut self) -> Result<ClusterBy> {
let mut expressions = Vec::new();
loop {
let expr = self.parse_expression()?;
let (desc, explicit_asc) = if self.match_token(TokenType::Desc) {
(true, false)
} else if self.match_token(TokenType::Asc) {
(false, true)
} else {
(false, false)
};
expressions.push(Ordered {
this: expr,
desc,
nulls_first: None,
explicit_asc,
with_fill: None,
});
if !self.match_token(TokenType::Comma) {
break;
}
}
Ok(ClusterBy { expressions })
}
/// Parse SORT BY clause (Hive/Spark)
fn parse_sort_by(&mut self) -> Result<SortBy> {
let mut expressions = Vec::new();
loop {
let expr = self.parse_expression()?;
let (desc, explicit_asc) = if self.match_token(TokenType::Desc) {
(true, false)
} else if self.match_token(TokenType::Asc) {
(false, true)
} else {
(false, false)
};
let nulls_first = if self.match_token(TokenType::Nulls) {
if self.match_token(TokenType::First) {
Some(true)
} else if self.match_token(TokenType::Last) {
Some(false)
} else {
return Err(self.parse_error("Expected FIRST or LAST after NULLS"));
}
} else {
None
};
expressions.push(Ordered {
this: expr,
desc,
nulls_first,
explicit_asc,
with_fill: None,
});
if !self.match_token(TokenType::Comma) {
break;
}
}
Ok(SortBy { expressions })
}
/// Parse FOR UPDATE/SHARE locking clauses or FOR XML/JSON (T-SQL)
/// Syntax: FOR UPDATE|SHARE|NO KEY UPDATE|KEY SHARE [OF tables] [NOWAIT|WAIT n|SKIP LOCKED]
/// Also handles: LOCK IN SHARE MODE (MySQL)
/// Also handles: FOR XML PATH|RAW|AUTO|EXPLICIT [, options...] (T-SQL)
/// Also handles: FOR JSON PATH|AUTO [, ROOT('name')] [, INCLUDE_NULL_VALUES] [, WITHOUT_ARRAY_WRAPPER] (T-SQL)
fn parse_locks_and_for_xml(&mut self) -> Result<(Vec<Lock>, Vec<Expression>, Vec<Expression>)> {
let mut locks = Vec::new();
let mut for_xml = Vec::new();
let mut for_json = Vec::new();
loop {
let (update, key) = if self.match_keywords(&[TokenType::For, TokenType::Update]) {
// FOR UPDATE
(
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
}))),
None,
)
} else if self.check(TokenType::For) && self.check_next_identifier("XML") {
// FOR XML (T-SQL) - parse XML options
self.skip(); // consume FOR
self.skip(); // consume XML
for_xml = self.parse_for_xml_options()?;
break; // FOR XML is always the last clause
} else if self.check(TokenType::For) && self.check_next_identifier("JSON") {
// FOR JSON (T-SQL) - parse JSON options
self.skip(); // consume FOR
self.skip(); // consume JSON
for_json = self.parse_for_json_options()?;
break; // FOR JSON is always the last clause
} else if self.check(TokenType::For) && self.check_next_identifier("SHARE") {
// FOR SHARE
self.skip(); // consume FOR
self.skip(); // consume SHARE
(None, None)
} else if self.check_identifier("LOCK") && self.check_next(TokenType::In) {
// LOCK IN SHARE MODE (MySQL) -> converted to FOR SHARE
self.skip(); // consume LOCK
self.skip(); // consume IN
if self.match_identifier("SHARE") {
let _ = self.match_identifier("MODE");
}
(None, None)
} else if self.check(TokenType::For) && self.check_next(TokenType::Key) {
// FOR KEY SHARE (PostgreSQL)
self.skip(); // consume FOR
self.skip(); // consume KEY
if !self.match_identifier("SHARE") {
break; // Not a valid lock clause
}
(
None,
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
}))),
)
} else if self.check(TokenType::For) && self.check_next(TokenType::No) {
// FOR NO KEY UPDATE (PostgreSQL)
self.skip(); // consume FOR
self.skip(); // consume NO
if !self.match_identifier("KEY") || !self.match_token(TokenType::Update) {
break; // Not a valid lock clause
}
(
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
}))),
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
}))),
)
} else {
// No more lock clauses
break;
};
// Parse optional OF clause: OF table1, table2
let expressions = if self.match_token(TokenType::Of) {
let mut tables = Vec::new();
loop {
// Parse table reference (can be schema.table or just table)
let table = self.parse_table_ref()?;
tables.push(Expression::Table(Box::new(table)));
if !self.match_token(TokenType::Comma) {
break;
}
}
tables
} else {
Vec::new()
};
// Parse wait option: NOWAIT, WAIT n, or SKIP LOCKED
// Following Python sqlglot convention:
// - NOWAIT -> Boolean(true)
// - SKIP LOCKED -> Boolean(false)
// - WAIT n -> Literal (the number)
let wait = if self.match_identifier("NOWAIT") {
// NOWAIT -> represented as Boolean(true)
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
})))
} else if self.match_identifier("WAIT") {
// WAIT n -> wait = expression (the number/literal)
Some(Box::new(self.parse_primary()?))
} else if self.match_identifier("SKIP") && self.match_identifier("LOCKED") {
// SKIP LOCKED -> represented as Boolean(false)
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: false,
})))
} else {
None
};
locks.push(Lock {
update,
expressions,
wait,
key,
});
}
Ok((locks, for_xml, for_json))
}
/// Parse FOR XML options (T-SQL)
/// Syntax: FOR XML PATH|RAW|AUTO|EXPLICIT [('element')] [, BINARY BASE64] [, ELEMENTS [XSINIL|ABSENT]] [, TYPE] [, ROOT('name')]
fn parse_for_xml_options(&mut self) -> Result<Vec<Expression>> {
let mut options = Vec::new();
loop {
// Parse XML option: could be a known option (PATH, RAW, AUTO, EXPLICIT, BINARY, ELEMENTS, TYPE, ROOT)
// or an XMLKeyValueOption like PATH('element')
if let Some(opt) = self.parse_for_xml_single_option()? {
options.push(opt);
} else {
break;
}
// Check for comma to continue parsing more options
if !self.match_token(TokenType::Comma) {
break;
}
}
Ok(options)
}
/// Parse a single FOR XML option
fn parse_for_xml_single_option(&mut self) -> Result<Option<Expression>> {
// Known XML modes: PATH, RAW, AUTO, EXPLICIT
// Known options: BINARY BASE64, ELEMENTS [XSINIL|ABSENT], TYPE, ROOT('name')
// Try to match known patterns
if self.match_identifier("PATH") {
let expression = if self.match_token(TokenType::LParen) {
let expr = self.parse_string()?;
self.expect(TokenType::RParen)?;
expr
} else {
None
};
return Ok(Some(Expression::QueryOption(Box::new(QueryOption {
this: Box::new(Expression::Var(Box::new(Var {
this: "PATH".to_string(),
}))),
expression: expression.map(|e| Box::new(e)),
}))));
}
if self.match_identifier("RAW") {
let expression = if self.match_token(TokenType::LParen) {
let expr = self.parse_string()?;
self.expect(TokenType::RParen)?;
expr
} else {
None
};
return Ok(Some(Expression::QueryOption(Box::new(QueryOption {
this: Box::new(Expression::Var(Box::new(Var {
this: "RAW".to_string(),
}))),
expression: expression.map(|e| Box::new(e)),
}))));
}
if self.match_identifier("AUTO") {
return Ok(Some(Expression::QueryOption(Box::new(QueryOption {
this: Box::new(Expression::Var(Box::new(Var {
this: "AUTO".to_string(),
}))),
expression: None,
}))));
}
if self.match_identifier("EXPLICIT") {
return Ok(Some(Expression::QueryOption(Box::new(QueryOption {
this: Box::new(Expression::Var(Box::new(Var {
this: "EXPLICIT".to_string(),
}))),
expression: None,
}))));
}
if self.match_identifier("TYPE") || self.match_token(TokenType::Type) {
return Ok(Some(Expression::QueryOption(Box::new(QueryOption {
this: Box::new(Expression::Var(Box::new(Var {
this: "TYPE".to_string(),
}))),
expression: None,
}))));
}
if self.match_identifier("BINARY") {
// BINARY BASE64
if self.match_identifier("BASE64") {
return Ok(Some(Expression::QueryOption(Box::new(QueryOption {
this: Box::new(Expression::Var(Box::new(Var {
this: "BINARY BASE64".to_string(),
}))),
expression: None,
}))));
} else {
return Ok(Some(Expression::QueryOption(Box::new(QueryOption {
this: Box::new(Expression::Var(Box::new(Var {
this: "BINARY".to_string(),
}))),
expression: None,
}))));
}
}
if self.match_identifier("ELEMENTS") {
// ELEMENTS [XSINIL|ABSENT]
let suboption = if self.match_identifier("XSINIL") {
Some("XSINIL".to_string())
} else if self.match_identifier("ABSENT") {
Some("ABSENT".to_string())
} else {
None
};
let option_name = match &suboption {
Some(sub) => format!("ELEMENTS {}", sub),
None => "ELEMENTS".to_string(),
};
return Ok(Some(Expression::QueryOption(Box::new(QueryOption {
this: Box::new(Expression::Var(Box::new(Var { this: option_name }))),
expression: None,
}))));
}
if self.match_identifier("ROOT") {
let expression = if self.match_token(TokenType::LParen) {
let expr = self.parse_string()?;
self.expect(TokenType::RParen)?;
expr
} else {
None
};
return Ok(Some(Expression::QueryOption(Box::new(QueryOption {
this: Box::new(Expression::Var(Box::new(Var {
this: "ROOT".to_string(),
}))),
expression: expression.map(|e| Box::new(e)),
}))));
}
// No more options recognized
Ok(None)
}
/// Parse FOR JSON options (T-SQL)
/// Syntax: FOR JSON PATH|AUTO [, ROOT('name')] [, INCLUDE_NULL_VALUES] [, WITHOUT_ARRAY_WRAPPER]
fn parse_for_json_options(&mut self) -> Result<Vec<Expression>> {
let mut options = Vec::new();
loop {
if let Some(opt) = self.parse_for_json_single_option()? {
options.push(opt);
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
Ok(options)
}
/// Parse a single FOR JSON option
fn parse_for_json_single_option(&mut self) -> Result<Option<Expression>> {
if self.match_identifier("PATH") {
return Ok(Some(Expression::QueryOption(Box::new(QueryOption {
this: Box::new(Expression::Var(Box::new(Var {
this: "PATH".to_string(),
}))),
expression: None,
}))));
}
if self.match_identifier("AUTO") {
return Ok(Some(Expression::QueryOption(Box::new(QueryOption {
this: Box::new(Expression::Var(Box::new(Var {
this: "AUTO".to_string(),
}))),
expression: None,
}))));
}
if self.match_identifier("ROOT") {
let expression = if self.match_token(TokenType::LParen) {
let expr = self.parse_string()?;
self.expect(TokenType::RParen)?;
expr
} else {
None
};
return Ok(Some(Expression::QueryOption(Box::new(QueryOption {
this: Box::new(Expression::Var(Box::new(Var {
this: "ROOT".to_string(),
}))),
expression: expression.map(|e| Box::new(e)),
}))));
}
if self.match_identifier("INCLUDE_NULL_VALUES") {
return Ok(Some(Expression::QueryOption(Box::new(QueryOption {
this: Box::new(Expression::Var(Box::new(Var {
this: "INCLUDE_NULL_VALUES".to_string(),
}))),
expression: None,
}))));
}
if self.match_identifier("WITHOUT_ARRAY_WRAPPER") {
return Ok(Some(Expression::QueryOption(Box::new(QueryOption {
this: Box::new(Expression::Var(Box::new(Var {
this: "WITHOUT_ARRAY_WRAPPER".to_string(),
}))),
expression: None,
}))));
}
Ok(None)
}
/// Parse CONNECT BY clause (Oracle hierarchical queries)
/// Syntax: [START WITH condition] CONNECT BY [NOCYCLE] condition [START WITH condition]
/// START WITH can appear before or after CONNECT BY
fn parse_connect(&mut self) -> Result<Option<Connect>> {
// Check for START WITH first (can appear before CONNECT BY)
let start_before = if self.match_keywords(&[TokenType::Start, TokenType::With]) {
Some(self.parse_expression()?)
} else {
None
};
// Check for CONNECT BY
if !self.match_keywords(&[TokenType::Connect, TokenType::By]) {
if start_before.is_some() {
return Err(self.parse_error("START WITH without CONNECT BY"));
}
return Ok(None);
}
// Check for NOCYCLE
let nocycle = self.match_token(TokenType::NoCycle);
// Parse the CONNECT BY condition with PRIOR support
let connect = self.parse_connect_expression()?;
// START WITH can also appear after CONNECT BY
let start = if start_before.is_some() {
start_before
} else if self.match_keywords(&[TokenType::Start, TokenType::With]) {
Some(self.parse_expression()?)
} else {
None
};
Ok(Some(Connect {
start,
connect,
nocycle,
}))
}
/// Parse expression in CONNECT BY context, treating PRIOR as prefix operator
fn parse_connect_expression(&mut self) -> Result<Expression> {
self.parse_connect_or()
}
/// Parse OR expression in CONNECT BY context
fn parse_connect_or(&mut self) -> Result<Expression> {
let mut left = self.parse_connect_and()?;
while self.match_token(TokenType::Or) {
let right = self.parse_connect_and()?;
left = Expression::Or(Box::new(BinaryOp::new(left, right)));
}
Ok(Self::maybe_rebalance_boolean_chain(left, false))
}
/// Parse AND expression in CONNECT BY context
fn parse_connect_and(&mut self) -> Result<Expression> {
let mut left = self.parse_connect_comparison()?;
while self.match_token(TokenType::And) {
let right = self.parse_connect_comparison()?;
left = Expression::And(Box::new(BinaryOp::new(left, right)));
}
Ok(Self::maybe_rebalance_boolean_chain(left, true))
}
/// Parse comparison in CONNECT BY context
fn parse_connect_comparison(&mut self) -> Result<Expression> {
let left = self.parse_connect_primary()?;
if self.match_token(TokenType::Eq) {
let right = self.parse_connect_primary()?;
return Ok(Expression::Eq(Box::new(BinaryOp::new(left, right))));
}
if self.match_token(TokenType::Neq) {
let right = self.parse_connect_primary()?;
return Ok(Expression::Neq(Box::new(BinaryOp::new(left, right))));
}
if self.match_token(TokenType::Lt) {
let right = self.parse_connect_primary()?;
return Ok(Expression::Lt(Box::new(BinaryOp::new(left, right))));
}
if self.match_token(TokenType::Lte) {
let right = self.parse_connect_primary()?;
return Ok(Expression::Lte(Box::new(BinaryOp::new(left, right))));
}
if self.match_token(TokenType::Gt) {
let right = self.parse_connect_primary()?;
return Ok(Expression::Gt(Box::new(BinaryOp::new(left, right))));
}
if self.match_token(TokenType::Gte) {
let right = self.parse_connect_primary()?;
return Ok(Expression::Gte(Box::new(BinaryOp::new(left, right))));
}
Ok(left)
}
/// Parse primary in CONNECT BY context with PRIOR support
fn parse_connect_primary(&mut self) -> Result<Expression> {
// Handle PRIOR as prefix operator
if self.match_token(TokenType::Prior) {
let expr = self.parse_primary()?;
return Ok(Expression::Prior(Box::new(Prior { this: expr })));
}
if let Some(connect_by_root) = self.try_parse_connect_by_root_expression()? {
return Ok(connect_by_root);
}
self.parse_primary()
}
/// Parse Oracle CONNECT_BY_ROOT in either supported form:
/// CONNECT_BY_ROOT col
/// CONNECT_BY_ROOT(col)
fn try_parse_connect_by_root_expression(&mut self) -> Result<Option<Expression>> {
if !(self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("CONNECT_BY_ROOT"))
{
return Ok(None);
}
self.skip();
let this = if self.match_token(TokenType::LParen) {
let expr = self.parse_expression()?;
self.expect(TokenType::RParen)?;
expr
} else {
self.parse_column()?.ok_or_else(|| {
self.parse_error("Expected expression or column after CONNECT_BY_ROOT")
})?
};
Ok(Some(Expression::ConnectByRoot(Box::new(ConnectByRoot {
this,
}))))
}
/// Parse MATCH_RECOGNIZE clause (Oracle/Snowflake/Presto/Trino pattern matching)
/// MATCH_RECOGNIZE ( [PARTITION BY ...] [ORDER BY ...] [MEASURES ...] [rows] [after] PATTERN (...) DEFINE ... )
fn parse_match_recognize(&mut self, source: Option<Expression>) -> Result<Expression> {
self.expect(TokenType::LParen)?;
// PARTITION BY (optional)
let partition_by = if self.match_keywords(&[TokenType::Partition, TokenType::By]) {
Some(self.parse_expression_list()?)
} else {
None
};
// ORDER BY (optional)
let order_by = if self.match_keywords(&[TokenType::Order, TokenType::By]) {
Some(self.parse_order_by()?.expressions)
} else {
None
};
// MEASURES (optional)
let measures = if self.match_token(TokenType::Measures) {
Some(self.parse_match_recognize_measures()?)
} else {
None
};
// Row semantics: ONE ROW PER MATCH / ALL ROWS PER MATCH
let rows = self.parse_match_recognize_rows()?;
// AFTER MATCH SKIP
let after = self.parse_match_recognize_after()?;
// PATTERN
let pattern = if self.match_token(TokenType::Pattern) {
Some(self.parse_match_recognize_pattern()?)
} else {
None
};
// DEFINE
let define = if self.match_token(TokenType::Define) {
Some(self.parse_match_recognize_define()?)
} else {
None
};
self.expect(TokenType::RParen)?;
// Alias is handled by the caller
Ok(Expression::MatchRecognize(Box::new(MatchRecognize {
this: source.map(Box::new),
partition_by,
order_by,
measures,
rows,
after,
pattern,
define,
alias: None,
alias_explicit_as: false,
})))
}
/// Parse MEASURES clause in MATCH_RECOGNIZE
fn parse_match_recognize_measures(&mut self) -> Result<Vec<MatchRecognizeMeasure>> {
let mut measures = Vec::new();
loop {
// Check for RUNNING or FINAL
let window_frame = if self.match_token(TokenType::Running) {
Some(MatchRecognizeSemantics::Running)
} else if self.match_token(TokenType::Final) {
Some(MatchRecognizeSemantics::Final)
} else {
None
};
let mut expr = self.parse_expression()?;
// Handle AS alias for measures
if self.match_token(TokenType::As) {
let alias = Identifier::new(self.expect_identifier()?);
expr = Expression::Alias(Box::new(Alias::new(expr, alias)));
}
measures.push(MatchRecognizeMeasure {
this: expr,
window_frame,
});
if !self.match_token(TokenType::Comma) {
break;
}
}
Ok(measures)
}
/// Parse row semantics in MATCH_RECOGNIZE
fn parse_match_recognize_rows(&mut self) -> Result<Option<MatchRecognizeRows>> {
// ONE ROW PER MATCH
if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("ONE") {
self.skip(); // consume ONE
if !self.match_token(TokenType::Row) {
return Err(self.parse_error("Expected ROW after ONE"));
}
if !(self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("PER")) {
return Err(self.parse_error("Expected PER after ONE ROW"));
}
self.skip(); // consume PER
if !self.match_token(TokenType::Match) {
return Err(self.parse_error("Expected MATCH after ONE ROW PER"));
}
return Ok(Some(MatchRecognizeRows::OneRowPerMatch));
}
// ALL ROWS PER MATCH [variants]
if self.match_token(TokenType::All) {
if !self.match_token(TokenType::Rows) {
return Err(self.parse_error("Expected ROWS after ALL"));
}
if !(self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("PER")) {
return Err(self.parse_error("Expected PER after ALL ROWS"));
}
self.skip(); // consume PER
if !self.match_token(TokenType::Match) {
return Err(self.parse_error("Expected MATCH after ALL ROWS PER"));
}
// Check for optional modifiers
if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("SHOW") {
self.skip();
if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("EMPTY") {
self.skip();
if self.check(TokenType::Var)
&& self.peek().text.eq_ignore_ascii_case("MATCHES")
{
self.skip();
return Ok(Some(MatchRecognizeRows::AllRowsPerMatchShowEmptyMatches));
}
}
return Err(self.parse_error("Expected EMPTY MATCHES after SHOW"));
}
if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("OMIT") {
self.skip();
if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("EMPTY") {
self.skip();
if self.check(TokenType::Var)
&& self.peek().text.eq_ignore_ascii_case("MATCHES")
{
self.skip();
return Ok(Some(MatchRecognizeRows::AllRowsPerMatchOmitEmptyMatches));
}
}
return Err(self.parse_error("Expected EMPTY MATCHES after OMIT"));
}
if self.match_token(TokenType::With) {
if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("UNMATCHED")
{
self.skip();
if self.match_token(TokenType::Rows) {
return Ok(Some(MatchRecognizeRows::AllRowsPerMatchWithUnmatchedRows));
}
}
return Err(self.parse_error("Expected UNMATCHED ROWS after WITH"));
}
return Ok(Some(MatchRecognizeRows::AllRowsPerMatch));
}
Ok(None)
}
/// Parse AFTER MATCH SKIP clause in MATCH_RECOGNIZE
fn parse_match_recognize_after(&mut self) -> Result<Option<MatchRecognizeAfter>> {
if !self.match_token(TokenType::After) {
return Ok(None);
}
if !self.match_token(TokenType::Match) {
return Err(self.parse_error("Expected MATCH after AFTER"));
}
// Check for SKIP (it might be an identifier)
if !(self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("SKIP")) {
return Err(self.parse_error("Expected SKIP after AFTER MATCH"));
}
self.skip(); // consume SKIP
// PAST LAST ROW
if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("PAST") {
self.skip();
if self.match_token(TokenType::Last) {
if self.match_token(TokenType::Row) {
return Ok(Some(MatchRecognizeAfter::PastLastRow));
}
}
return Err(self.parse_error("Expected LAST ROW after PAST"));
}
// TO NEXT ROW / TO FIRST x / TO LAST x
if self.match_token(TokenType::To) {
if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("NEXT") {
self.skip();
if self.match_token(TokenType::Row) {
return Ok(Some(MatchRecognizeAfter::ToNextRow));
}
return Err(self.parse_error("Expected ROW after NEXT"));
}
if self.match_token(TokenType::First) {
let name = self.expect_identifier()?;
return Ok(Some(MatchRecognizeAfter::ToFirst(Identifier::new(name))));
}
if self.match_token(TokenType::Last) {
let name = self.expect_identifier()?;
return Ok(Some(MatchRecognizeAfter::ToLast(Identifier::new(name))));
}
return Err(self.parse_error("Expected NEXT ROW, FIRST x, or LAST x after TO"));
}
Err(self.parse_error("Expected PAST LAST ROW or TO ... after AFTER MATCH SKIP"))
}
/// Parse PATTERN clause in MATCH_RECOGNIZE using bracket counting
fn parse_match_recognize_pattern(&mut self) -> Result<String> {
self.expect(TokenType::LParen)?;
let mut depth = 1;
let mut pattern = String::new();
while depth > 0 && !self.is_at_end() {
let token = self.advance();
match token.token_type {
TokenType::LParen => {
depth += 1;
pattern.push('(');
}
TokenType::RParen => {
depth -= 1;
if depth > 0 {
pattern.push(')');
}
}
_ => {
// Pattern quantifiers (+, *, ?, {n,m}) should not have a space before them
let is_quantifier = matches!(token.text.as_str(), "+" | "*" | "?")
|| token.text.starts_with('{');
if !pattern.is_empty()
&& !pattern.ends_with('(')
&& !pattern.ends_with(' ')
&& !is_quantifier
{
pattern.push(' ');
}
pattern.push_str(&token.text);
}
}
}
if depth > 0 {
return Err(self.parse_error("Unclosed parenthesis in PATTERN clause"));
}
Ok(pattern.trim().to_string())
}
/// Parse DEFINE clause in MATCH_RECOGNIZE
fn parse_match_recognize_define(&mut self) -> Result<Vec<(Identifier, Expression)>> {
let mut definitions = Vec::new();
loop {
let name = Identifier::new(self.expect_identifier()?);
self.expect(TokenType::As)?;
let expr = self.parse_expression()?;
definitions.push((name, expr));
if !self.match_token(TokenType::Comma) {
break;
}
}
Ok(definitions)
}
/// Parse LATERAL VIEW clauses (Hive/Spark)
/// Syntax: LATERAL VIEW [OUTER] generator_function(args) table_alias AS col1 [, col2, ...]
fn parse_lateral_views(&mut self) -> Result<Vec<LateralView>> {
let mut views = Vec::new();
while self.match_keywords(&[TokenType::Lateral, TokenType::View]) {
// Check for OUTER keyword
let outer = self.match_token(TokenType::Outer);
// Parse the generator function (EXPLODE, POSEXPLODE, INLINE, etc.)
// This is a function call expression
let this = self.parse_primary()?;
// Parse table alias (comes before AS)
let table_alias = if self.check(TokenType::Var) && !self.check_keyword() {
Some(Identifier::new(self.expect_identifier()?))
} else {
None
};
// Parse column aliases after AS keyword
// Supports both: AS a, b and AS (a, b)
let column_aliases = if self.match_token(TokenType::As) {
let mut aliases = Vec::new();
// Check for parenthesized alias list: AS ("a", "b")
if self.match_token(TokenType::LParen) {
loop {
aliases.push(Identifier::new(self.expect_identifier_or_keyword()?));
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
} else {
// Non-parenthesized aliases: AS a, b, c
// Use expect_identifier_or_keyword because aliases like "key", "value", "pos" may be keywords
loop {
aliases.push(Identifier::new(self.expect_identifier_or_keyword()?));
if !self.match_token(TokenType::Comma) {
break;
}
// Check if next token is still an identifier or keyword (column alias)
// vs starting a new LATERAL VIEW or other clause
if !self.is_identifier_or_keyword_token() {
break;
}
// Check for keywords that would end the column list
if self.peek().token_type == TokenType::Lateral
|| self.peek().token_type == TokenType::Where
|| self.peek().token_type == TokenType::Group
|| self.peek().token_type == TokenType::Having
|| self.peek().token_type == TokenType::Order
|| self.peek().token_type == TokenType::Limit
{
break;
}
}
}
aliases
} else {
Vec::new()
};
views.push(LateralView {
this,
table_alias,
column_aliases,
outer,
});
}
Ok(views)
}
/// Parse named windows (WINDOW w AS (...), ...)
fn parse_named_windows(&mut self) -> Result<Vec<NamedWindow>> {
let mut windows = Vec::new();
loop {
let name = self.expect_identifier()?;
self.expect(TokenType::As)?;
self.expect(TokenType::LParen)?;
// Parse optional base window name reference (e.g., w1 AS (w0 ORDER BY ...))
let window_name = if (self.check(TokenType::Identifier)
|| self.check(TokenType::Var)
|| self.check(TokenType::QuotedIdentifier))
&& !self.check(TokenType::Partition)
&& !self.check(TokenType::Order)
&& self.peek_nth(1).map_or(true, |t| {
matches!(
t.token_type,
TokenType::Partition
| TokenType::Order
| TokenType::Rows
| TokenType::Range
| TokenType::Groups
| TokenType::RParen
| TokenType::Comma
)
}) {
Some(self.expect_identifier()?)
} else {
None
};
// Parse window specification
let partition_by = if self.match_keywords(&[TokenType::Partition, TokenType::By]) {
Some(self.parse_expression_list()?)
} else {
None
};
let order_by = if self.match_keywords(&[TokenType::Order, TokenType::By]) {
Some(self.parse_order_by()?)
} else {
None
};
let frame = self.parse_window_frame()?;
self.expect(TokenType::RParen)?;
windows.push(NamedWindow {
name: Identifier::new(name),
spec: Over {
window_name: window_name.map(|n| Identifier::new(n)),
partition_by: partition_by.unwrap_or_default(),
order_by: order_by.map(|o| o.expressions).unwrap_or_default(),
frame,
alias: None,
},
});
if !self.match_token(TokenType::Comma) {
break;
}
}
Ok(windows)
}
/// Parse query hint /*+ ... */
fn parse_hint(&mut self) -> Result<Hint> {
let token = self.advance();
let hint_text = token.text.clone();
// For now, parse as raw hint text
// More sophisticated parsing can be added later
let expressions = if hint_text.is_empty() {
Vec::new()
} else {
vec![HintExpression::Raw(hint_text)]
};
Ok(Hint { expressions })
}
/// Parse SAMPLE / TABLESAMPLE / USING SAMPLE clause
fn parse_sample_clause(&mut self) -> Result<Option<Sample>> {
// Check for USING SAMPLE (DuckDB), SAMPLE, or TABLESAMPLE
let is_using_sample = if self.check(TokenType::Using)
&& self.current + 1 < self.tokens.len()
&& self.tokens[self.current + 1].token_type == TokenType::Sample
{
self.skip(); // consume USING
self.skip(); // consume SAMPLE
true
} else {
false
};
let use_sample_keyword = if is_using_sample {
// USING SAMPLE acts like SAMPLE
true
} else if self.match_token(TokenType::Sample) {
true
} else if self.match_token(TokenType::TableSample) {
false
} else {
return Ok(None);
};
// Parse sampling method if specified (BERNOULLI, SYSTEM, BLOCK, ROW, RESERVOIR)
let (method, method_before_size, explicit_method) =
if self.match_token(TokenType::Bernoulli) {
(SampleMethod::Bernoulli, true, true)
} else if self.match_token(TokenType::System) {
(SampleMethod::System, true, true)
} else if self.match_token(TokenType::Block) {
(SampleMethod::Block, true, true)
} else if self.match_token(TokenType::Row) {
(SampleMethod::Row, true, true)
} else if self.check_identifier("RESERVOIR") {
self.skip();
(SampleMethod::Reservoir, true, true)
} else {
// Default to BERNOULLI for both SAMPLE and TABLESAMPLE
// This matches Python SQLGlot's normalization behavior
(SampleMethod::Bernoulli, false, false)
};
// Parse size (can be in parentheses)
let has_paren = self.match_token(TokenType::LParen);
// Check for BUCKET syntax: TABLESAMPLE (BUCKET 1 OUT OF 5 ON x)
if self.match_identifier("BUCKET") {
let bucket_numerator = self.parse_primary()?;
self.match_identifier("OUT");
self.match_token(TokenType::Of); // OF is a keyword token
let bucket_denominator = self.parse_primary()?;
let bucket_field = if self.match_token(TokenType::On) {
Some(Box::new(self.parse_primary()?))
} else {
None
};
if has_paren {
self.expect(TokenType::RParen)?;
}
return Ok(Some(Sample {
method: SampleMethod::Bucket,
size: bucket_numerator.clone(),
seed: None,
offset: None,
unit_after_size: false,
use_sample_keyword,
explicit_method: true, // BUCKET is always explicit
method_before_size: false, // BUCKET appears inside parens
use_seed_keyword: false,
bucket_numerator: Some(Box::new(bucket_numerator)),
bucket_denominator: Some(Box::new(bucket_denominator)),
bucket_field,
is_using_sample,
is_percent: false,
suppress_method_output: false,
}));
}
// Use parse_unary to avoid consuming PERCENT as modulo operator
let size = self.parse_unary()?;
// Check for PERCENT/ROWS suffix after size (if not already part of the number)
// Both "%" and "PERCENT" tokens map to TokenType::Percent - accept both as PERCENT modifier
let (method, unit_after_size, is_percent) = if self.check(TokenType::Percent) {
self.skip(); // consume PERCENT or %
// If method was already explicitly specified (e.g., SYSTEM), keep it
// PERCENT here is just the unit, not the sampling method
if method_before_size {
(method, true, true)
} else {
(SampleMethod::Percent, true, true)
}
} else if self.match_token(TokenType::Rows) {
// If method was already explicitly specified, keep it
if method_before_size {
(method, true, false)
} else {
(SampleMethod::Row, true, false)
}
} else {
// No explicit unit after size - preserve the original method
(method, false, false)
};
if has_paren {
self.expect(TokenType::RParen)?;
}
// DuckDB USING SAMPLE: method and optional seed can come in parens after size
// e.g., "10 PERCENT (bernoulli)" or "10% (system, 377)"
// DuckDB USING SAMPLE: method and optional seed can come in parens after size
// e.g., "10 PERCENT (bernoulli)" or "10% (system, 377)"
let (method, seed, use_seed_keyword, explicit_method) =
if is_using_sample && self.check(TokenType::LParen) {
self.skip(); // consume LParen
// Parse method name as identifier or keyword token
// BERNOULLI, SYSTEM, RESERVOIR can be tokenized as keywords, not identifiers
let method_from_parens =
if self.check_identifier("BERNOULLI") || self.check(TokenType::Bernoulli) {
self.skip();
Some(SampleMethod::Bernoulli)
} else if self.check_identifier("SYSTEM") || self.check(TokenType::System) {
self.skip();
Some(SampleMethod::System)
} else if self.check_identifier("RESERVOIR") {
self.skip();
Some(SampleMethod::Reservoir)
} else {
None
};
// Optional seed after comma
let seed = if self.match_token(TokenType::Comma) {
Some(self.parse_expression()?)
} else {
None
};
self.expect(TokenType::RParen)?;
let final_method = method_from_parens.unwrap_or(method);
(final_method, seed, false, true)
} else {
// Parse optional SEED / REPEATABLE
let (seed, use_seed_keyword) = if self.match_token(TokenType::Seed) {
self.expect(TokenType::LParen)?;
let seed_value = self.parse_expression()?;
self.expect(TokenType::RParen)?;
(Some(seed_value), true)
} else if self.match_token(TokenType::Repeatable) {
self.expect(TokenType::LParen)?;
let seed_value = self.parse_expression()?;
self.expect(TokenType::RParen)?;
(Some(seed_value), false)
} else {
(None, false)
};
let explicit_method = explicit_method || unit_after_size;
(method, seed, use_seed_keyword, explicit_method)
};
// For DuckDB USING SAMPLE: apply default methods
// - bare number -> RESERVOIR, ROWS
// - percent -> SYSTEM, PERCENT
let (method, unit_after_size) = if is_using_sample && !explicit_method {
// No explicit method - apply defaults
(SampleMethod::Reservoir, false) // default: RESERVOIR with ROWS
} else if is_using_sample && unit_after_size && !method_before_size {
// Unit was specified after size (e.g., "10 PERCENT") but no method before
// Check if method was set in post-parens
if matches!(method, SampleMethod::Percent) {
// "10%" or "10 PERCENT" without method -> SYSTEM
(SampleMethod::System, true)
} else if matches!(method, SampleMethod::Row) {
// "50 ROWS" without method -> RESERVOIR
(SampleMethod::Reservoir, true)
} else {
(method, unit_after_size)
}
} else {
(method, unit_after_size)
};
// method_before_size: true for USING SAMPLE - we normalize to method-before-size format
// e.g., "10 PERCENT (bernoulli)" becomes "BERNOULLI (10 PERCENT)"
Ok(Some(Sample {
method,
size,
seed,
offset: None,
unit_after_size,
use_sample_keyword,
explicit_method: true, // For USING SAMPLE, always explicit
method_before_size: true, // Normalize to method-before-size format
use_seed_keyword,
bucket_numerator: None,
bucket_denominator: None,
bucket_field: None,
is_using_sample,
is_percent,
suppress_method_output: false,
}))
}
/// Parse table-level TABLESAMPLE/SAMPLE: TABLESAMPLE/SAMPLE METHOD(size [PERCENT|ROWS])
/// e.g., TABLESAMPLE RESERVOIR(20%), SAMPLE BERNOULLI(10 PERCENT), SAMPLE ROW(0)
fn parse_table_level_sample(&mut self) -> Result<Option<Sample>> {
// Accept both TABLESAMPLE and SAMPLE (Snowflake supports both)
let use_sample_keyword = if self.match_token(TokenType::Sample) {
true
} else if self.match_token(TokenType::TableSample) {
false
} else {
return Ok(None);
};
// Track which keyword was used for identity output
let _ = use_sample_keyword; // Used below for is_using_sample field
// Teradata: SAMPLE 5 or SAMPLE 0.33, .25, .1 (no parentheses)
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Teradata)
) && use_sample_keyword
&& !self.check(TokenType::LParen)
{
let mut expressions = vec![self.parse_unary()?];
while self.match_token(TokenType::Comma) {
expressions.push(self.parse_unary()?);
}
let size = if expressions.len() == 1 {
expressions.into_iter().next().unwrap()
} else {
Expression::Tuple(Box::new(Tuple { expressions }))
};
return Ok(Some(Sample {
method: SampleMethod::Percent,
size,
seed: None,
offset: None,
unit_after_size: false,
use_sample_keyword,
explicit_method: false,
method_before_size: false,
use_seed_keyword: false,
bucket_numerator: None,
bucket_denominator: None,
bucket_field: None,
is_using_sample: false,
is_percent: false,
suppress_method_output: false,
}));
}
// ClickHouse: SAMPLE 0.1 [OFFSET 0.2] (no parentheses)
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && use_sample_keyword
&& !self.check(TokenType::LParen)
{
let size = self.parse_expression()?;
let offset = if self.match_token(TokenType::Offset) {
Some(self.parse_expression()?)
} else {
None
};
return Ok(Some(Sample {
method: SampleMethod::Bernoulli,
size,
seed: None,
offset,
unit_after_size: false,
use_sample_keyword,
explicit_method: false,
method_before_size: false,
use_seed_keyword: false,
bucket_numerator: None,
bucket_denominator: None,
bucket_field: None,
is_using_sample: false,
is_percent: false,
suppress_method_output: false,
}));
}
// Parse method name (optional for table-level TABLESAMPLE)
let (method, explicit_method, method_before_size) = if self.check_identifier("RESERVOIR") {
self.skip();
(SampleMethod::Reservoir, true, true)
} else if self.match_token(TokenType::Bernoulli) {
(SampleMethod::Bernoulli, true, true)
} else if self.match_token(TokenType::System) {
(SampleMethod::System, true, true)
} else if self.match_token(TokenType::Block) {
(SampleMethod::Block, true, true)
} else if self.match_token(TokenType::Row) {
(SampleMethod::Row, true, true)
} else {
// No explicit method - default to Bernoulli internally but track as not explicit
(SampleMethod::Bernoulli, false, false)
};
// Parse (size [PERCENT|ROWS])
self.expect(TokenType::LParen)?;
// Check for BUCKET syntax: TABLESAMPLE (BUCKET 1 OUT OF 5 [ON col])
if self.match_identifier("BUCKET") {
let bucket_numerator = self.parse_primary()?;
self.match_identifier("OUT");
self.match_token(TokenType::Of);
let bucket_denominator = self.parse_primary()?;
let bucket_field = if self.match_token(TokenType::On) {
Some(Box::new(self.parse_primary()?))
} else {
None
};
self.expect(TokenType::RParen)?;
return Ok(Some(Sample {
method: SampleMethod::Bucket,
size: bucket_numerator.clone(),
seed: None,
offset: None,
unit_after_size: false,
use_sample_keyword,
explicit_method: true,
method_before_size: false,
use_seed_keyword: false,
bucket_numerator: Some(Box::new(bucket_numerator)),
bucket_denominator: Some(Box::new(bucket_denominator)),
bucket_field,
is_using_sample: false,
is_percent: false,
suppress_method_output: false,
}));
}
let size = self.parse_unary()?;
// Check for PERCENT/ROWS suffix or % symbol
let (method, unit_after_size, is_percent) =
if self.check(TokenType::Percent) && self.peek().text.eq_ignore_ascii_case("PERCENT") {
self.skip();
// If no explicit method, use Percent to represent "PERCENT" unit
if explicit_method {
(method, true, true)
} else {
(SampleMethod::Percent, true, true)
}
} else if self.match_token(TokenType::Rows) {
// If no explicit method, use Row to represent "ROWS" unit
if explicit_method {
(method, true, false)
} else {
(SampleMethod::Row, true, false)
}
} else if self.check(TokenType::Percent) && self.peek().text == "%" {
// 20% -> consume the %, treat as PERCENT unit
self.skip();
if explicit_method {
(method, true, true)
} else {
(SampleMethod::Percent, true, true)
}
} else {
(method, false, false)
};
self.expect(TokenType::RParen)?;
// Optional SEED/REPEATABLE
let (seed, use_seed_keyword) = if self.match_token(TokenType::Seed) {
self.expect(TokenType::LParen)?;
let seed_value = self.parse_expression()?;
self.expect(TokenType::RParen)?;
(Some(seed_value), true)
} else if self.match_token(TokenType::Repeatable) {
self.expect(TokenType::LParen)?;
let seed_value = self.parse_expression()?;
self.expect(TokenType::RParen)?;
(Some(seed_value), false)
} else {
(None, false)
};
Ok(Some(Sample {
method,
size,
seed,
offset: None,
unit_after_size,
use_sample_keyword,
explicit_method,
method_before_size,
use_seed_keyword,
bucket_numerator: None,
bucket_denominator: None,
bucket_field: None,
is_using_sample: false, // table-level uses TABLESAMPLE/SAMPLE keyword, not USING SAMPLE
is_percent,
suppress_method_output: false,
}))
}
/// Parse set operations (UNION, INTERSECT, EXCEPT)
fn parse_set_operation(&mut self, left: Expression) -> Result<Expression> {
let mut result = left;
let mut found_set_op = false;
loop {
// Check for BigQuery set operation modifiers BEFORE the set operation keyword
// Pattern: SELECT ... [INNER|LEFT|RIGHT|FULL] UNION/INTERSECT/EXCEPT ...
let (side, kind) = self.parse_set_operation_side_kind();
// Capture leading comments from the set operation keyword token (e.g., /*x*/ before UNION).
// These comments appeared on a new line between the left SELECT and the set operation keyword.
let set_op_leading_comments = if self.check(TokenType::Union)
|| self.check(TokenType::Intersect)
|| self.check(TokenType::Except)
{
self.current_leading_comments().to_vec()
} else {
Vec::new()
};
// Wrap left expression with comments if needed
let left = if !set_op_leading_comments.is_empty() {
Expression::Annotated(Box::new(Annotated {
this: result,
trailing_comments: set_op_leading_comments,
}))
} else {
result
};
if self.match_token(TokenType::Union) {
let all = self.match_token(TokenType::All);
let distinct = if !all {
self.match_token(TokenType::Distinct)
} else {
false
};
let (by_name, strict, corresponding, on_columns) =
self.parse_set_operation_corresponding()?;
let kind = if corresponding && !strict && side.is_none() && kind.is_none() {
Some("INNER".to_string())
} else {
kind
};
let right = self.parse_select_or_paren_select()?;
result = Expression::Union(Box::new(Union {
left,
right,
all,
distinct,
with: None,
order_by: None,
limit: None,
offset: None,
distribute_by: None,
sort_by: None,
cluster_by: None,
by_name,
side,
kind,
corresponding,
strict,
on_columns,
}));
found_set_op = true;
} else if self.match_token(TokenType::Intersect) {
let all = self.match_token(TokenType::All);
let distinct = if !all {
self.match_token(TokenType::Distinct)
} else {
false
};
let (by_name, strict, corresponding, on_columns) =
self.parse_set_operation_corresponding()?;
let kind = if corresponding && !strict && side.is_none() && kind.is_none() {
Some("INNER".to_string())
} else {
kind
};
let right = self.parse_select_or_paren_select()?;
result = Expression::Intersect(Box::new(Intersect {
left,
right,
all,
distinct,
with: None,
order_by: None,
limit: None,
offset: None,
distribute_by: None,
sort_by: None,
cluster_by: None,
by_name,
side,
kind,
corresponding,
strict,
on_columns,
}));
found_set_op = true;
} else if self.match_token(TokenType::Except) {
let all = self.match_token(TokenType::All);
let distinct = if !all {
self.match_token(TokenType::Distinct)
} else {
false
};
let (by_name, strict, corresponding, on_columns) =
self.parse_set_operation_corresponding()?;
let kind = if corresponding && !strict && side.is_none() && kind.is_none() {
Some("INNER".to_string())
} else {
kind
};
let right = self.parse_select_or_paren_select()?;
result = Expression::Except(Box::new(Except {
left,
right,
all,
distinct,
with: None,
order_by: None,
limit: None,
offset: None,
distribute_by: None,
sort_by: None,
cluster_by: None,
by_name,
side,
kind,
corresponding,
strict,
on_columns,
}));
found_set_op = true;
} else if side.is_some() || kind.is_some() {
return Err(self.parse_error(
"Expected UNION, INTERSECT, or EXCEPT after set operation modifier",
));
} else {
result = left;
break;
}
}
// Parse ORDER BY, LIMIT, OFFSET for the outermost set operation
if found_set_op {
self.parse_set_operation_modifiers(&mut result)?;
}
Ok(result)
}
/// Parse BigQuery set operation side (LEFT, RIGHT, FULL) and kind (INNER)
/// These modifiers appear BEFORE the UNION/INTERSECT/EXCEPT keyword
fn parse_set_operation_side_kind(&mut self) -> (Option<String>, Option<String>) {
let mut side = None;
let mut kind = None;
// Check for side: LEFT, RIGHT, FULL (reusing join side tokens)
if self.check(TokenType::Left)
|| self.check(TokenType::Right)
|| self.check(TokenType::Full)
{
// Only consume if followed by UNION/INTERSECT/EXCEPT (or INNER which would be followed by them)
let saved = self.current;
let side_token = self.advance();
let side_text = side_token.text.to_ascii_uppercase();
// Check if followed by set operation or INNER
if self.check(TokenType::Union)
|| self.check(TokenType::Intersect)
|| self.check(TokenType::Except)
|| self.check(TokenType::Inner)
{
side = Some(side_text);
} else {
// Not a set operation modifier, backtrack
self.current = saved;
return (None, None);
}
}
// Check for kind: INNER
if self.check(TokenType::Inner) {
let saved = self.current;
self.skip(); // consume INNER
// Check if followed by set operation
if self.check(TokenType::Union)
|| self.check(TokenType::Intersect)
|| self.check(TokenType::Except)
{
kind = Some("INNER".to_string());
} else {
// Not a set operation modifier, backtrack
self.current = saved;
if side.is_some() {
// We already consumed a side token, need to backtrack that too
self.current = saved - 1;
}
return (None, None);
}
}
(side, kind)
}
/// Parse CORRESPONDING/STRICT CORRESPONDING/BY NAME modifiers after ALL/DISTINCT
/// Returns (by_name, strict, corresponding, on_columns)
fn parse_set_operation_corresponding(&mut self) -> Result<(bool, bool, bool, Vec<Expression>)> {
let mut by_name = false;
let mut strict = false;
let mut corresponding = false;
let mut on_columns = Vec::new();
// Check for BY NAME (DuckDB style)
if self.match_token(TokenType::By) && self.match_identifier("NAME") {
by_name = true;
}
// Check for STRICT CORRESPONDING (BigQuery style)
else if self.match_identifier("STRICT") {
if self.match_identifier("CORRESPONDING") {
strict = true;
corresponding = true;
} else {
// STRICT without CORRESPONDING - backtrack
self.current -= 1;
}
}
// Check for CORRESPONDING (BigQuery style)
else if self.match_identifier("CORRESPONDING") {
corresponding = true;
}
// If CORRESPONDING is set, check for BY (columns)
if corresponding && self.match_token(TokenType::By) {
self.expect(TokenType::LParen)?;
on_columns = self
.parse_identifier_list()?
.into_iter()
.map(|id| {
Expression::boxed_column(Column {
name: id,
table: None,
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
})
})
.collect();
self.expect(TokenType::RParen)?;
}
Ok((by_name, strict, corresponding, on_columns))
}
/// Parse ORDER BY, LIMIT, OFFSET modifiers for set operations
fn parse_set_operation_modifiers(&mut self, expr: &mut Expression) -> Result<()> {
// Parse ORDER BY
let order_by = if self.match_token(TokenType::Order) {
self.expect(TokenType::By)?;
Some(self.parse_order_by()?)
} else {
None
};
// Parse LIMIT
let limit = if self.match_token(TokenType::Limit) {
Some(Box::new(self.parse_expression()?))
} else {
None
};
// Parse OFFSET
let offset = if self.match_token(TokenType::Offset) {
Some(Box::new(self.parse_expression()?))
} else {
None
};
// Apply modifiers to the outermost set operation
match expr {
Expression::Union(ref mut union) => {
if order_by.is_some() {
union.order_by = order_by;
}
if limit.is_some() {
union.limit = limit;
}
if offset.is_some() {
union.offset = offset;
}
}
Expression::Intersect(ref mut intersect) => {
if order_by.is_some() {
intersect.order_by = order_by;
}
if limit.is_some() {
intersect.limit = limit;
}
if offset.is_some() {
intersect.offset = offset;
}
}
Expression::Except(ref mut except) => {
if order_by.is_some() {
except.order_by = order_by;
}
if limit.is_some() {
except.limit = limit;
}
if offset.is_some() {
except.offset = offset;
}
}
_ => {}
}
Ok(())
}
/// Parse either a SELECT statement or a parenthesized SELECT/set operation
fn parse_select_or_paren_select(&mut self) -> Result<Expression> {
if self.match_token(TokenType::LParen) {
// Could be (SELECT ...) or ((SELECT ...) UNION ...) or (FROM ...) for DuckDB
if self.check(TokenType::Select)
|| self.check(TokenType::With)
|| self.check(TokenType::From)
{
let query = self.parse_statement()?;
self.expect(TokenType::RParen)?;
// Handle optional alias after subquery: (SELECT 1) AS a
let alias = if self.match_token(TokenType::As) {
Some(Identifier::new(self.expect_identifier()?))
} else {
None
};
// Wrap in Subquery to preserve parentheses
Ok(Expression::Subquery(Box::new(Subquery {
this: query,
alias,
column_aliases: Vec::new(),
order_by: None,
limit: None,
offset: None,
lateral: false,
modifiers_inside: false,
trailing_comments: Vec::new(),
distribute_by: None,
sort_by: None,
cluster_by: None,
inferred_type: None,
})))
} else if self.check(TokenType::LParen) {
// Nested parentheses like ((SELECT ...))
let inner = self.parse_select_or_paren_select()?;
// Check for set operations inside the parens
let result = self.parse_set_operation(inner)?;
self.expect(TokenType::RParen)?;
// Handle optional alias after subquery
let alias = if self.match_token(TokenType::As) {
Some(Identifier::new(self.expect_identifier()?))
} else {
None
};
// Wrap in Subquery to preserve parentheses
Ok(Expression::Subquery(Box::new(Subquery {
this: result,
alias,
column_aliases: Vec::new(),
order_by: None,
limit: None,
offset: None,
lateral: false,
modifiers_inside: false,
trailing_comments: Vec::new(),
distribute_by: None,
sort_by: None,
cluster_by: None,
inferred_type: None,
})))
} else {
Err(self.parse_error("Expected SELECT or ( after ("))
}
} else if self.check(TokenType::From) {
// DuckDB FROM-first syntax without parentheses: ... UNION FROM t
self.parse_from_first_query()
} else if self.check(TokenType::With) {
// WITH CTE as right-hand side of UNION/INTERSECT/EXCEPT
self.parse_statement()
} else {
// Use parse_select_body (not parse_select) to avoid mutual recursion:
// parse_select calls parse_set_operation, which calls back here.
// The caller (parse_set_operation's loop) handles set-op chaining.
self.parse_select_body()
}
}
/// Parse INSERT statement
fn parse_insert(&mut self) -> Result<Expression> {
let insert_token = self.expect(TokenType::Insert)?;
let leading_comments = insert_token.comments;
// Parse query hint /*+ ... */ if present (Oracle: INSERT /*+ APPEND */ INTO ...)
let hint = if self.check(TokenType::Hint) {
Some(self.parse_hint()?)
} else {
None
};
// Handle SQLite conflict action: INSERT OR ABORT|FAIL|IGNORE|REPLACE|ROLLBACK INTO
let conflict_action = if self.match_token(TokenType::Or) {
if self.match_identifier("ABORT") {
Some("ABORT".to_string())
} else if self.match_identifier("FAIL") {
Some("FAIL".to_string())
} else if self.match_token(TokenType::Ignore) {
Some("IGNORE".to_string())
} else if self.match_token(TokenType::Replace) {
Some("REPLACE".to_string())
} else if self.match_token(TokenType::Rollback) {
Some("ROLLBACK".to_string())
} else {
return Err(self.parse_error(
"Expected ABORT, FAIL, IGNORE, REPLACE, or ROLLBACK after INSERT OR",
));
}
} else {
None
};
// Handle INSERT IGNORE (MySQL)
let ignore = conflict_action.is_none() && self.match_token(TokenType::Ignore);
// Handle OVERWRITE for Hive/Spark: INSERT OVERWRITE TABLE ...
let overwrite = self.match_token(TokenType::Overwrite);
// Handle multi-table INSERT: INSERT [OVERWRITE] ALL/FIRST ...
if self.match_token(TokenType::All) || self.match_token(TokenType::First) {
if let Some(multi_insert) =
self.parse_multitable_inserts(leading_comments.clone(), overwrite)?
{
return Ok(multi_insert);
}
}
// Handle INTO or TABLE (OVERWRITE requires TABLE, INTO is standard)
// Also handle INSERT OVERWRITE [LOCAL] DIRECTORY 'path'
let local_directory = overwrite && self.match_token(TokenType::Local);
let is_directory = (overwrite || local_directory) && self.match_identifier("DIRECTORY");
if is_directory {
// INSERT OVERWRITE [LOCAL] DIRECTORY 'path' [ROW FORMAT ...] SELECT ...
let path = self.expect_string()?;
// Parse optional ROW FORMAT clause
let row_format = if self.match_keywords(&[TokenType::Row, TokenType::Format]) {
// ROW FORMAT DELIMITED ...
let delimited = self.match_identifier("DELIMITED");
let mut fields_terminated_by = None;
let mut collection_items_terminated_by = None;
let mut map_keys_terminated_by = None;
let mut lines_terminated_by = None;
let mut null_defined_as = None;
// Parse the various TERMINATED BY clauses
loop {
if self.match_identifier("FIELDS") || self.match_identifier("FIELD") {
self.match_identifier("TERMINATED");
self.match_token(TokenType::By);
fields_terminated_by = Some(self.expect_string()?);
} else if self.match_identifier("COLLECTION") {
self.match_identifier("ITEMS");
self.match_identifier("TERMINATED");
self.match_token(TokenType::By);
collection_items_terminated_by = Some(self.expect_string()?);
} else if self.match_identifier("MAP") {
self.match_identifier("KEYS");
self.match_identifier("TERMINATED");
self.match_token(TokenType::By);
map_keys_terminated_by = Some(self.expect_string()?);
} else if self.match_identifier("LINES") {
self.match_identifier("TERMINATED");
self.match_token(TokenType::By);
lines_terminated_by = Some(self.expect_string()?);
} else if self.match_token(TokenType::Null) {
self.match_identifier("DEFINED");
self.match_token(TokenType::As);
null_defined_as = Some(self.expect_string()?);
} else {
break;
}
}
Some(RowFormat {
delimited,
fields_terminated_by,
collection_items_terminated_by,
map_keys_terminated_by,
lines_terminated_by,
null_defined_as,
})
} else {
None
};
// Parse optional STORED AS clause
let stored_as = if self.match_identifier("STORED") {
self.expect(TokenType::As)?;
Some(self.expect_identifier()?)
} else {
None
};
// Parse the SELECT query
let query = self.parse_statement()?;
return Ok(Expression::Insert(Box::new(Insert {
table: TableRef::new(""),
columns: Vec::new(),
values: Vec::new(),
query: Some(query),
overwrite,
partition: Vec::new(),
directory: Some(DirectoryInsert {
local: local_directory,
path,
row_format,
stored_as,
}),
returning: Vec::new(),
output: None,
on_conflict: None,
leading_comments,
if_exists: false,
with: None,
ignore,
source_alias: None,
alias: None,
alias_explicit_as: false,
default_values: false,
by_name: false,
conflict_action: conflict_action.clone(),
is_replace: false,
replace_where: None,
source: None,
hint: hint.clone(),
function_target: None,
partition_by: None,
settings: Vec::new(),
})));
}
if overwrite {
// OVERWRITE can be followed by INTO (Snowflake) or TABLE (Hive/Spark)
self.match_token(TokenType::Into);
self.match_token(TokenType::Table);
} else {
self.expect(TokenType::Into)?;
// Optional TABLE keyword after INTO
self.match_token(TokenType::Table);
}
// ClickHouse: INSERT INTO [TABLE] FUNCTION func_name(args...)
let mut function_target: Option<Box<Expression>> = None;
if self.match_token(TokenType::Function) {
// Parse function call: func_name(args...)
let func_name = self.expect_identifier_or_keyword()?;
self.expect(TokenType::LParen)?;
let args = if self.check(TokenType::RParen) {
Vec::new()
} else {
self.parse_expression_list()?
};
self.expect(TokenType::RParen)?;
function_target = Some(Box::new(Expression::Function(Box::new(Function {
name: func_name,
args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))));
}
let table_name = if function_target.is_some() {
// For FUNCTION targets, use empty table name
Identifier::new(String::new())
} else {
// Allow keywords (like TABLE) as table names in INSERT statements
self.expect_identifier_or_keyword_with_quoted()?
};
// Handle qualified table names like a.b or a.b.c
let table = if self.match_token(TokenType::Dot) {
let mut schema = table_name;
let mut name = self.expect_identifier_or_keyword_with_quoted()?;
// Check for three-part name: catalog.schema.table
let catalog = if self.match_token(TokenType::Dot) {
let catalog = schema;
schema = name;
name = self.expect_identifier_or_keyword_with_quoted()?;
Some(catalog)
} else {
None
};
let trailing_comments = self.previous_trailing_comments().to_vec();
TableRef {
name,
schema: Some(schema),
catalog,
alias: None,
alias_explicit_as: false,
column_aliases: Vec::new(),
leading_comments: Vec::new(),
trailing_comments,
when: None,
only: false,
final_: false,
table_sample: None,
hints: Vec::new(),
system_time: None,
partitions: Vec::new(),
identifier_func: None,
changes: None,
version: None,
span: None,
}
} else {
let trailing_comments = self.previous_trailing_comments().to_vec();
TableRef {
name: table_name,
schema: None,
catalog: None,
alias: None,
alias_explicit_as: false,
column_aliases: Vec::new(),
leading_comments: Vec::new(),
when: None,
only: false,
final_: false,
table_sample: None,
hints: Vec::new(),
system_time: None,
trailing_comments,
partitions: Vec::new(),
identifier_func: None,
changes: None,
version: None,
span: None,
}
};
// Optional alias (PostgreSQL: INSERT INTO table AS t(...), Oracle: INSERT INTO table t ...)
let (alias, alias_explicit_as) = if self.match_token(TokenType::As) {
(Some(Identifier::new(self.expect_identifier()?)), true)
} else if self.is_identifier_token()
&& !self.check(TokenType::Values)
&& !self.check(TokenType::Select)
&& !self.check(TokenType::Default)
&& !self.check(TokenType::By)
&& !self.check(TokenType::Partition)
&& !self.check(TokenType::Output)
&& !self.check(TokenType::If)
&& !self.check(TokenType::Replace)
&& !self.check(TokenType::Table)
&& !self.check(TokenType::LParen)
{
// Implicit alias without AS (e.g., INSERT INTO dest d VALUES ...)
(Some(Identifier::new(self.expect_identifier()?)), false)
} else {
(None, false)
};
// Optional IF EXISTS (Hive)
let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
// Optional REPLACE WHERE clause (Databricks): INSERT INTO a REPLACE WHERE cond VALUES ...
let replace_where =
if self.match_token(TokenType::Replace) && self.match_token(TokenType::Where) {
Some(Box::new(self.parse_or()?))
} else {
None
};
// Optional PARTITION clause
// ClickHouse: PARTITION BY expr (no parens)
// Hive/Spark: PARTITION (col1 = val1, col2)
let mut partition_by_expr: Option<Box<Expression>> = None;
let partition = if self.check(TokenType::Partition) && self.check_next(TokenType::By) {
// ClickHouse PARTITION BY expr
self.skip(); // consume PARTITION
self.skip(); // consume BY
partition_by_expr = Some(Box::new(self.parse_expression()?));
Vec::new()
} else if self.match_token(TokenType::Partition) {
self.expect(TokenType::LParen)?;
let mut parts = Vec::new();
loop {
let col = Identifier::new(self.expect_identifier()?);
let value = if self.match_token(TokenType::Eq) {
Some(self.parse_expression()?)
} else {
None
};
parts.push((col, value));
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
parts
} else {
Vec::new()
};
// ClickHouse: SETTINGS key = val, ...
let insert_settings = if self.match_token(TokenType::Settings) {
let mut settings = Vec::new();
loop {
settings.push(self.parse_expression()?);
if !self.match_token(TokenType::Comma) {
break;
}
}
settings
} else {
Vec::new()
};
// Optional column list OR parenthesized subquery
// We need to check if ( is followed by SELECT/WITH (subquery) or identifiers (column list)
let columns = if self.check(TokenType::LParen) {
// Look ahead to see if this is a subquery or column list
if self
.peek_nth(1)
.map(|t| t.token_type == TokenType::Select || t.token_type == TokenType::With)
.unwrap_or(false)
{
// This is a parenthesized subquery, not a column list
Vec::new()
} else if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && {
// ClickHouse: INSERT INTO t (*), t(* EXCEPT ...), t(table.* EXCEPT ...), t(COLUMNS('pattern') EXCEPT ...)
let peek1 = self.peek_nth(1).map(|t| t.token_type);
peek1 == Some(TokenType::Star)
|| (peek1 == Some(TokenType::Var)
&& self.peek_nth(2).map(|t| t.token_type) == Some(TokenType::Dot)
&& self.peek_nth(3).map(|t| t.token_type) == Some(TokenType::Star))
|| (peek1 == Some(TokenType::Var)
&& self
.peek_nth(1)
.map(|t| t.text.eq_ignore_ascii_case("COLUMNS"))
.unwrap_or(false))
} {
// Consume balanced parens and skip entire column specification
self.skip(); // consume (
let mut depth = 1i32;
while !self.is_at_end() && depth > 0 {
if self.check(TokenType::LParen) {
depth += 1;
}
if self.check(TokenType::RParen) {
depth -= 1;
if depth == 0 {
break;
}
}
self.skip();
}
self.expect(TokenType::RParen)?;
Vec::new() // Treat as "all columns"
} else {
self.skip(); // consume (
let cols = self.parse_identifier_list()?;
self.expect(TokenType::RParen)?;
cols
}
} else {
Vec::new()
};
// Parse OUTPUT clause (TSQL)
let output = if self.match_token(TokenType::Output) {
Some(self.parse_output_clause()?)
} else {
None
};
// Check for BY NAME (DuckDB): INSERT INTO x BY NAME SELECT ...
let by_name = self.match_token(TokenType::By) && self.match_identifier("NAME");
// Check for DEFAULT VALUES (PostgreSQL)
let default_values =
self.match_token(TokenType::Default) && self.match_token(TokenType::Values);
// VALUES or SELECT or TABLE source (Hive/Spark) or DEFAULT VALUES (already consumed above)
let (values, query) = if default_values {
// DEFAULT VALUES: no values or query
(Vec::new(), None)
} else if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::Format)
&& self.peek_nth(1).is_some_and(|t| {
!t.text.eq_ignore_ascii_case("VALUES")
&& (t.token_type == TokenType::Var || t.token_type == TokenType::Identifier)
})
{
// ClickHouse: FORMAT <format_name> followed by raw data (CSV, JSON, TSV, etc.)
// Skip everything to next semicolon or end — the data is not SQL
self.skip(); // consume FORMAT
let format_name = self.advance().text.clone(); // consume format name
// Consume all remaining tokens until semicolon (raw data)
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
self.skip();
}
// Store as empty values with the format name in the query as a command
(
Vec::new(),
Some(Expression::Command(Box::new(crate::expressions::Command {
this: format!("FORMAT {}", format_name),
}))),
)
} else if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.match_text_seq(&["FORMAT", "VALUES"])
{
let mut all_values = Vec::new();
loop {
self.expect(TokenType::LParen)?;
let row = self.parse_expression_list()?;
self.expect(TokenType::RParen)?;
all_values.push(row);
if !self.match_token(TokenType::Comma) {
break;
}
}
(all_values, None)
} else if self.match_token(TokenType::Values) {
let mut all_values = Vec::new();
// ClickHouse: INSERT INTO t VALUES; — empty VALUES (clientError expected)
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && (self.check(TokenType::Semicolon) || self.is_at_end())
{
// Return empty INSERT as Command to avoid needing all Insert fields
return Ok(Expression::Command(Box::new(crate::expressions::Command {
this: "INSERT INTO VALUES".to_string(),
})));
}
// ClickHouse: allow bare VALUES without parens: VALUES 1, 2, 3
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && !self.check(TokenType::LParen)
{
loop {
let val = self.parse_expression()?;
all_values.push(vec![val]);
if !self.match_token(TokenType::Comma) {
break;
}
}
} else {
loop {
self.expect(TokenType::LParen)?;
// ClickHouse: allow empty VALUES () — empty tuple
let row = if self.check(TokenType::RParen) {
Vec::new()
} else {
self.parse_values_expression_list()?
};
self.expect(TokenType::RParen)?;
all_values.push(row);
if !self.match_token(TokenType::Comma) {
// ClickHouse: allow tuples without commas: VALUES (1) (2) (3)
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::LParen)
{
continue;
}
break;
}
// ClickHouse: allow trailing comma after last tuple
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && !self.check(TokenType::LParen)
{
break;
}
}
} // close else (parenthesized values)
(all_values, None)
} else if self.check(TokenType::Table) {
// Hive/Spark: INSERT OVERWRITE TABLE target TABLE source
// The TABLE keyword here indicates source table, not a subquery
(Vec::new(), None)
} else {
(Vec::new(), Some(self.parse_statement()?))
};
// Parse source table (Hive/Spark): INSERT OVERWRITE TABLE target TABLE source
let source = if self.match_token(TokenType::Table) {
// Parse source table reference (similar to target table parsing)
let source_name = self.expect_identifier_with_quoted()?;
let source_table = if self.match_token(TokenType::Dot) {
let schema = source_name;
let name = self.expect_identifier_with_quoted()?;
let trailing_comments = self.previous_trailing_comments().to_vec();
TableRef {
name,
schema: Some(schema),
catalog: None,
alias: None,
alias_explicit_as: false,
column_aliases: Vec::new(),
leading_comments: Vec::new(),
trailing_comments,
when: None,
only: false,
final_: false,
table_sample: None,
hints: Vec::new(),
system_time: None,
partitions: Vec::new(),
identifier_func: None,
changes: None,
version: None,
span: None,
}
} else {
let trailing_comments = self.previous_trailing_comments().to_vec();
TableRef {
name: source_name,
schema: None,
catalog: None,
alias: None,
alias_explicit_as: false,
column_aliases: Vec::new(),
leading_comments: Vec::new(),
trailing_comments,
when: None,
only: false,
final_: false,
table_sample: None,
hints: Vec::new(),
system_time: None,
partitions: Vec::new(),
identifier_func: None,
changes: None,
version: None,
span: None,
}
};
Some(Expression::Table(Box::new(source_table)))
} else {
None
};
// Parse optional AS alias after VALUES (MySQL: INSERT ... VALUES (...) AS new_data)
let source_alias = if self.match_token(TokenType::As) {
Some(Identifier::new(self.expect_identifier()?))
} else {
None
};
// Parse ON CONFLICT clause (PostgreSQL, SQLite) or ON DUPLICATE KEY UPDATE (MySQL)
let on_conflict = if self.match_token(TokenType::On) {
if self.match_identifier("CONFLICT") {
Some(Box::new(self.parse_on_conflict()?))
} else if self.match_identifier("DUPLICATE") {
// MySQL: ON DUPLICATE KEY UPDATE
self.expect(TokenType::Key)?;
self.expect(TokenType::Update)?;
// Parse the UPDATE SET expressions
let mut sets = Vec::new();
loop {
// Parse column = expression
let col_name = self.expect_identifier_with_quoted()?;
// Handle qualified column: table.column
let column = if self.match_token(TokenType::Dot) {
let col = self.expect_identifier_with_quoted()?;
Expression::boxed_column(Column {
name: col,
table: Some(col_name),
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
})
} else {
Expression::Identifier(col_name)
};
self.expect(TokenType::Eq)?;
let value = self.parse_expression()?;
sets.push(Expression::Eq(Box::new(BinaryOp {
left: column,
right: value,
left_comments: Vec::new(),
operator_comments: Vec::new(),
trailing_comments: Vec::new(),
inferred_type: None,
})));
if !self.match_token(TokenType::Comma) {
break;
}
}
Some(Box::new(Expression::OnConflict(Box::new(OnConflict {
duplicate: Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
}))),
expressions: sets,
action: None,
conflict_keys: None,
index_predicate: None,
constraint: None,
where_: None,
}))))
} else {
// Unexpected token after ON
return Err(self.parse_error("Expected CONFLICT or DUPLICATE after ON"));
}
} else {
None
};
// Parse RETURNING clause (PostgreSQL, SQLite)
let returning = if self.match_token(TokenType::Returning) {
self.parse_select_expressions()?
} else {
Vec::new()
};
Ok(Expression::Insert(Box::new(Insert {
table,
columns,
values,
query,
overwrite,
partition,
directory: None,
returning,
output,
on_conflict,
leading_comments,
if_exists,
with: None,
ignore,
source_alias,
alias,
alias_explicit_as,
default_values,
by_name,
conflict_action,
is_replace: false,
replace_where,
source: source.map(Box::new),
hint,
function_target,
partition_by: partition_by_expr,
settings: insert_settings,
})))
}
/// Parse ON CONFLICT clause for INSERT statements (PostgreSQL, SQLite)
/// Syntax: ON CONFLICT [(conflict_target)] [WHERE predicate] DO NOTHING | DO UPDATE SET ...
/// ON CONFLICT ON CONSTRAINT constraint_name DO ...
fn parse_on_conflict(&mut self) -> Result<Expression> {
// Check for ON CONSTRAINT variant
let constraint =
if self.match_token(TokenType::On) && self.match_token(TokenType::Constraint) {
let name = self.expect_identifier()?;
Some(Box::new(Expression::Identifier(Identifier::new(name))))
} else {
None
};
// Parse optional conflict target (column list)
let conflict_keys = if constraint.is_none() && self.match_token(TokenType::LParen) {
let keys = self.parse_expression_list()?;
self.expect(TokenType::RParen)?;
Some(Box::new(Expression::Tuple(Box::new(Tuple {
expressions: keys,
}))))
} else {
None
};
// Parse optional WHERE clause for conflict target
let index_predicate = if self.match_token(TokenType::Where) {
Some(Box::new(self.parse_expression()?))
} else {
None
};
// Parse DO NOTHING or DO UPDATE
if !self.match_identifier("DO") {
return Err(self.parse_error("Expected DO after ON CONFLICT"));
}
let action = if self.match_identifier("NOTHING") {
// DO NOTHING
Some(Box::new(Expression::Identifier(Identifier::new(
"NOTHING".to_string(),
))))
} else if self.match_token(TokenType::Update) {
// DO UPDATE SET ...
self.expect(TokenType::Set)?;
let mut sets = Vec::new();
loop {
// Parse column = expression
let col_name = self.expect_identifier_with_quoted()?;
// Handle qualified column: table.column
let column = if self.match_token(TokenType::Dot) {
let col = self.expect_identifier_with_quoted()?;
Expression::boxed_column(Column {
name: col,
table: Some(col_name),
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
})
} else {
Expression::Identifier(col_name)
};
self.expect(TokenType::Eq)?;
let value = self.parse_expression()?;
sets.push(Expression::Eq(Box::new(BinaryOp {
left: column,
right: value,
left_comments: Vec::new(),
operator_comments: Vec::new(),
trailing_comments: Vec::new(),
inferred_type: None,
})));
if !self.match_token(TokenType::Comma) {
break;
}
}
Some(Box::new(Expression::Tuple(Box::new(Tuple {
expressions: sets,
}))))
} else {
return Err(self.parse_error("Expected NOTHING or UPDATE after DO"));
};
// Parse optional WHERE clause for the UPDATE action
let where_ = if self.match_token(TokenType::Where) {
Some(Box::new(self.parse_expression()?))
} else {
None
};
Ok(Expression::OnConflict(Box::new(OnConflict {
duplicate: None,
expressions: Vec::new(),
action,
conflict_keys,
index_predicate,
constraint,
where_,
})))
}
/// Parse MySQL REPLACE [INTO] statement or REPLACE() function call
fn parse_replace(&mut self) -> Result<Expression> {
// Check if this is REPLACE() function call (REPLACE followed by '(')
// or MySQL REPLACE INTO statement
let replace_token = self.expect(TokenType::Replace)?;
let leading_comments = replace_token.comments;
if self.check(TokenType::LParen) {
// This is a REPLACE() function call, parse as expression
self.expect(TokenType::LParen)?;
let args = self.parse_expression_list()?;
self.expect(TokenType::RParen)?;
return Ok(Expression::Function(Box::new(Function {
name: "REPLACE".to_string(),
args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
})));
}
// Teradata: REPLACE VIEW -> CREATE OR REPLACE VIEW
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Teradata)
) && self.check(TokenType::View)
{
return self.parse_create_view(true, false, false, false, None, None, None, false);
}
// ClickHouse: REPLACE TABLE -> treat like CREATE OR REPLACE TABLE
// Also handle REPLACE TEMPORARY TABLE
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && (self.check(TokenType::Table) || self.check(TokenType::Temporary))
{
let temporary = self.match_token(TokenType::Temporary);
return self.parse_create_table(true, temporary, leading_comments.clone(), None);
}
// ClickHouse: REPLACE DICTIONARY -> consume as Command
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && (self.check(TokenType::Dictionary) || self.check_identifier("DICTIONARY"))
{
let mut parts = vec!["REPLACE".to_string()];
let mut _paren_depth = 0i32;
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
let token = self.advance();
if token.token_type == TokenType::LParen {
_paren_depth += 1;
}
if token.token_type == TokenType::RParen {
_paren_depth -= 1;
}
let text = if token.token_type == TokenType::String {
format!("'{}'", token.text)
} else if token.token_type == TokenType::QuotedIdentifier {
format!("\"{}\"", token.text)
} else {
token.text.clone()
};
parts.push(text);
}
return Ok(Expression::Command(Box::new(crate::expressions::Command {
this: parts.join(" "),
})));
}
// Otherwise, this is MySQL/SQLite REPLACE INTO statement - parse similarly to INSERT
self.match_token(TokenType::Into);
let table_name = self.expect_identifier_or_safe_keyword_with_quoted()?;
let table = if self.match_token(TokenType::Dot) {
let second_name = self.expect_identifier_or_safe_keyword_with_quoted()?;
TableRef {
name: second_name,
schema: Some(table_name),
catalog: None,
alias: None,
alias_explicit_as: false,
column_aliases: Vec::new(),
leading_comments: Vec::new(),
trailing_comments: Vec::new(),
when: None,
only: false,
final_: false,
table_sample: None,
hints: Vec::new(),
system_time: None,
partitions: Vec::new(),
identifier_func: None,
changes: None,
version: None,
span: None,
}
} else {
TableRef::new(table_name.name)
};
// Parse optional column list
let columns = if self.match_token(TokenType::LParen) {
let mut cols = Vec::new();
loop {
if self.check(TokenType::RParen) {
break;
}
let col = self.expect_identifier_with_quoted()?;
cols.push(col);
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
cols
} else {
Vec::new()
};
// Parse VALUES or SELECT query
let mut values = Vec::new();
let query = if self.match_token(TokenType::Values) {
loop {
self.expect(TokenType::LParen)?;
let row = self.parse_expression_list()?;
self.expect(TokenType::RParen)?;
values.push(row);
if !self.match_token(TokenType::Comma) {
break;
}
}
None
} else if !self.is_at_end() && !self.check(TokenType::Semicolon) {
// SELECT or other statement as value source
Some(self.parse_statement()?)
} else {
None
};
Ok(Expression::Insert(Box::new(Insert {
table,
columns,
values,
query,
overwrite: false,
partition: Vec::new(),
directory: None,
returning: Vec::new(),
output: None,
on_conflict: None,
leading_comments,
if_exists: false,
with: None,
ignore: false,
source_alias: None,
alias: None,
alias_explicit_as: false,
default_values: false,
by_name: false,
conflict_action: None,
is_replace: true,
replace_where: None,
source: None,
hint: None,
function_target: None,
partition_by: None,
settings: Vec::new(),
})))
}
/// Parse UPDATE statement
fn parse_update(&mut self) -> Result<Expression> {
let update_token = self.expect(TokenType::Update)?;
let leading_comments = update_token.comments;
let hint = if self.check(TokenType::Hint) {
Some(self.parse_hint()?)
} else {
None
};
// TSQL: UPDATE STATISTICS table_name - parse as Command
if self.check_identifier("STATISTICS") {
let mut parts = vec!["UPDATE".to_string()];
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
parts.push(self.advance().text);
}
return Ok(Expression::Command(Box::new(Command {
this: parts.join(" "),
})));
}
// PostgreSQL ONLY modifier: UPDATE ONLY t SET ...
let has_only = self.match_token(TokenType::Only);
// Parse table name (can be qualified: db.table_name)
let first_name = self.expect_identifier_with_quoted()?;
let mut table = if self.match_token(TokenType::Dot) {
let second_name = self.expect_identifier_with_quoted()?;
// Check for three-part name (catalog.schema.table)
if self.match_token(TokenType::Dot) {
let table_name = self.expect_identifier_with_quoted()?;
TableRef {
name: table_name,
schema: Some(second_name),
catalog: Some(first_name),
alias: None,
alias_explicit_as: false,
column_aliases: Vec::new(),
leading_comments: Vec::new(),
trailing_comments: Vec::new(),
when: None,
only: false,
final_: false,
table_sample: None,
hints: Vec::new(),
system_time: None,
partitions: Vec::new(),
identifier_func: None,
changes: None,
version: None,
span: None,
}
} else {
TableRef {
name: second_name,
schema: Some(first_name),
catalog: None,
alias: None,
alias_explicit_as: false,
column_aliases: Vec::new(),
leading_comments: Vec::new(),
trailing_comments: Vec::new(),
when: None,
only: false,
final_: false,
table_sample: None,
hints: Vec::new(),
system_time: None,
partitions: Vec::new(),
identifier_func: None,
changes: None,
version: None,
span: None,
}
}
} else {
TableRef::from_identifier(first_name)
};
table.trailing_comments = self.previous_trailing_comments().to_vec();
if has_only {
table.only = true;
}
// Optional alias (with or without AS)
if self.match_token(TokenType::As) {
table.alias = Some(self.expect_identifier_with_quoted()?);
table.alias_explicit_as = true;
} else if self.is_identifier_token() && !self.check(TokenType::Set) {
// Implicit alias (table t SET ...)
table.alias = Some(self.expect_identifier_with_quoted()?);
table.alias_explicit_as = false;
}
// Handle multi-table UPDATE syntax: UPDATE t1, t2, t3 LEFT JOIN t4 ON ... SET ...
// Capture additional tables
let mut extra_tables = Vec::new();
while self.match_token(TokenType::Comma) {
// Parse additional table name
let first_name = self.expect_identifier_with_quoted()?;
let mut extra_table = if self.match_token(TokenType::Dot) {
let second_name = self.expect_identifier_with_quoted()?;
if self.match_token(TokenType::Dot) {
let table_name = self.expect_identifier_with_quoted()?;
TableRef {
name: table_name,
schema: Some(second_name),
catalog: Some(first_name),
alias: None,
alias_explicit_as: false,
column_aliases: Vec::new(),
leading_comments: Vec::new(),
trailing_comments: Vec::new(),
when: None,
only: false,
final_: false,
table_sample: None,
hints: Vec::new(),
system_time: None,
partitions: Vec::new(),
identifier_func: None,
changes: None,
version: None,
span: None,
}
} else {
TableRef {
name: second_name,
schema: Some(first_name),
catalog: None,
alias: None,
alias_explicit_as: false,
column_aliases: Vec::new(),
leading_comments: Vec::new(),
trailing_comments: Vec::new(),
when: None,
only: false,
final_: false,
table_sample: None,
hints: Vec::new(),
system_time: None,
partitions: Vec::new(),
identifier_func: None,
changes: None,
version: None,
span: None,
}
}
} else {
TableRef::from_identifier(first_name)
};
// Optional alias
if self.match_token(TokenType::As) {
extra_table.alias = Some(self.expect_identifier_with_quoted()?);
extra_table.alias_explicit_as = true;
} else if self.is_identifier_token()
&& !self.check(TokenType::Set)
&& !self.check_keyword()
{
extra_table.alias = Some(self.expect_identifier_with_quoted()?);
extra_table.alias_explicit_as = false;
}
extra_tables.push(extra_table);
}
// Handle JOINs before SET
let mut table_joins = Vec::new();
while let Some((kind, _, use_inner_keyword, use_outer_keyword, _join_hint)) =
self.try_parse_join_kind()
{
if self.check(TokenType::Join) {
self.skip(); // consume JOIN
}
// Parse joined table (supports subqueries, LATERAL, functions, etc.)
let join_expr = self.parse_table_expression()?;
// ON clause
let on_condition = if self.match_token(TokenType::On) {
Some(self.parse_expression()?)
} else {
None
};
table_joins.push(Join {
this: join_expr,
on: on_condition,
using: Vec::new(),
kind,
use_inner_keyword,
use_outer_keyword,
deferred_condition: false,
join_hint: None,
match_condition: None,
pivots: Vec::new(),
comments: Vec::new(),
nesting_group: 0,
directed: false,
});
}
// Snowflake syntax: UPDATE table FROM (source) SET ... WHERE ...
// Check if FROM comes before SET
let (from_before_set, early_from_clause, early_from_joins) =
if self.match_token(TokenType::From) {
let from_clause = self.parse_from()?;
let from_joins = self.parse_joins()?;
(true, Some(from_clause), from_joins)
} else {
(false, None, Vec::new())
};
self.expect(TokenType::Set)?;
let mut set = Vec::new();
loop {
// Column can be qualified for multi-table UPDATE (e.g., a.id = 1)
// Use safe keyword variant to allow keywords like 'exists' as column names (ClickHouse)
let mut col_ident = self.expect_identifier_or_safe_keyword_with_quoted()?;
while self.match_token(TokenType::Dot) {
let part = self.expect_identifier_or_safe_keyword_with_quoted()?;
// For qualified columns, preserve both parts
col_ident = Identifier {
name: format!("{}.{}", col_ident.name, part.name),
quoted: col_ident.quoted || part.quoted,
trailing_comments: Vec::new(),
span: None,
};
}
self.expect(TokenType::Eq)?;
let value = self.parse_expression()?;
set.push((col_ident, value));
if !self.match_token(TokenType::Comma) {
break;
}
}
// Parse OUTPUT clause (TSQL)
let output = if self.match_token(TokenType::Output) {
Some(self.parse_output_clause()?)
} else {
None
};
// Parse FROM clause (PostgreSQL, SQL Server, Snowflake) - only if not already parsed before SET
let (from_clause, from_joins) = if from_before_set {
(early_from_clause, early_from_joins)
} else if self.match_token(TokenType::From) {
let from_clause = Some(self.parse_from()?);
let from_joins = self.parse_joins()?;
(from_clause, from_joins)
} else {
(None, Vec::new())
};
let where_clause = if self.match_token(TokenType::Where) {
Some(Where {
this: self.parse_expression()?,
})
} else {
None
};
// Parse RETURNING clause (PostgreSQL, SQLite)
let returning = if self.match_token(TokenType::Returning) {
self.parse_select_expressions()?
} else {
Vec::new()
};
// Parse ORDER BY clause (MySQL)
let order_by = if self.match_keywords(&[TokenType::Order, TokenType::By]) {
Some(self.parse_order_by()?)
} else {
None
};
// Parse LIMIT clause (MySQL)
let limit = if self.match_token(TokenType::Limit) {
Some(self.parse_expression()?)
} else {
None
};
Ok(Expression::Update(Box::new(Update {
table,
hint,
extra_tables,
table_joins,
set,
from_clause,
from_joins,
where_clause,
returning,
output,
with: None,
leading_comments,
limit,
order_by,
from_before_set,
})))
}
/// Parse DELETE statement
/// Handles:
/// - Standard: DELETE FROM t WHERE ...
/// - PostgreSQL USING: DELETE FROM t USING s WHERE ... RETURNING a
/// - DuckDB USING: DELETE FROM t USING (VALUES ...) AS t1 WHERE ...
/// - MySQL multi-table: DELETE t1 FROM t1 JOIN t2 ON ... WHERE ...
/// - MySQL multi-table: DELETE t1, t2 FROM t1 JOIN t2 JOIN t3 WHERE ...
/// - MySQL USING: DELETE FROM t1, t2 USING t1 JOIN t2 JOIN t3 WHERE ...
/// - MySQL FORCE INDEX: DELETE FROM t FORCE INDEX (idx) WHERE ...
fn parse_delete(&mut self) -> Result<Expression> {
let delete_token = self.expect(TokenType::Delete)?;
let leading_comments = delete_token.comments;
let hint = if self.check(TokenType::Hint) {
Some(self.parse_hint()?)
} else {
None
};
// Check if FROM is present. If not, this is MySQL multi-table: DELETE t1, t2 FROM ...
// or TSQL: DELETE x OUTPUT x.a FROM z
let mut tables = Vec::new();
let mut early_output = None;
let _has_from = if self.check(TokenType::From) {
self.skip(); // consume FROM
true
} else {
// MySQL multi-table: DELETE t1[, t2, ...] FROM ...
// or TSQL: DELETE x OUTPUT x.a FROM z
// or BigQuery/generic: DELETE table WHERE ... (no FROM required)
// Parse target table list (supporting dotted names)
loop {
let tref = self.parse_table_ref()?;
tables.push(tref);
if !self.match_token(TokenType::Comma) {
break;
}
}
// TSQL: OUTPUT clause can appear before FROM
if self.match_token(TokenType::Output) {
early_output = Some(self.parse_output_clause()?);
}
if self.check(TokenType::From) {
self.skip(); // consume FROM
true
} else {
// BigQuery-style: DELETE table WHERE ... (no FROM)
false
}
};
// Now parse the main table after FROM (or use from no-FROM path)
let has_only = self.match_token(TokenType::Only);
let mut table = if _has_from {
// Parse the main table(s) after FROM
// Use parse_table_ref() to handle dotted names like db.table
self.parse_table_ref()?
} else {
// BigQuery-style: table was already parsed into `tables`
// Move it out to be the main table
if !tables.is_empty() {
tables.remove(0)
} else {
return Err(self.parse_error("Expected table name in DELETE statement"));
}
};
if has_only {
table.only = true;
}
// ClickHouse: ON CLUSTER clause
let on_cluster = self.parse_on_cluster_clause()?;
// Check for additional tables after the first: DELETE FROM t1, t2 USING ...
let mut extra_from_tables = Vec::new();
if _has_from
&& tables.is_empty()
&& self.check(TokenType::Comma)
&& !self.check(TokenType::Where)
{
// Could be multi-table: DELETE FROM t1, t2 USING ...
// Check ahead if this is followed by USING or more tables
while self.match_token(TokenType::Comma) {
let extra_name = self.expect_identifier_with_quoted()?;
let extra_ref = TableRef::from_identifier(extra_name);
extra_from_tables.push(extra_ref);
}
}
// If we had DELETE FROM t1, t2 USING ..., the tables field stores t1, t2
let mut tables_from_using = false;
if !extra_from_tables.is_empty() {
// The main table + extra tables form the multi-table target
tables.push(table.clone());
tables.append(&mut extra_from_tables);
tables_from_using = true;
}
// Check for FORCE INDEX hint (MySQL): DELETE FROM t FORCE INDEX (idx)
let force_index = if self.match_text_seq(&["FORCE", "INDEX"]) {
self.expect(TokenType::LParen)?;
let idx_name = self.expect_identifier_with_quoted()?;
self.expect(TokenType::RParen)?;
Some(idx_name.name)
} else {
None
};
// Check for optional alias (with or without AS)
let (alias, alias_explicit_as) = if force_index.is_none() && self.match_token(TokenType::As)
{
(Some(self.expect_identifier_with_quoted()?), true)
} else if force_index.is_none()
&& self.is_identifier_token()
&& !self.check(TokenType::Using)
&& !self.check(TokenType::Where)
&& !self.check(TokenType::Inner)
&& !self.check(TokenType::Left)
&& !self.check(TokenType::Right)
&& !self.check(TokenType::Cross)
&& !self.check(TokenType::Full)
&& !self.check(TokenType::Join)
&& !self.check_identifier("FORCE")
{
(Some(self.expect_identifier_with_quoted()?), false)
} else {
(None, false)
};
// Parse JOINs for MySQL multi-table: DELETE t1 FROM t1 LEFT JOIN t2 ON ...
let mut joins = self.parse_joins()?;
// Parse USING clause (PostgreSQL/DuckDB/MySQL)
let mut using = Vec::new();
if self.match_token(TokenType::Using) {
loop {
// Check for subquery: USING (SELECT ...) AS ... or (VALUES ...) AS ...
if self.check(TokenType::LParen) {
// Check if next token after ( is VALUES
let is_values = self.current + 1 < self.tokens.len()
&& self.tokens[self.current + 1].token_type == TokenType::Values;
let subquery = if is_values {
// Parse (VALUES ...) as parenthesized VALUES
self.skip(); // consume (
let values = self.parse_values()?;
self.expect(TokenType::RParen)?;
Expression::Paren(Box::new(Paren {
this: values,
trailing_comments: Vec::new(),
}))
} else {
// Parse as subquery (SELECT ...) or other expression
self.parse_primary()?
};
// Parse alias
let using_alias = if self.match_token(TokenType::As) {
let alias_name = self.expect_identifier_with_quoted()?;
// Check for column aliases: AS name(col1, col2)
let col_aliases = if self.match_token(TokenType::LParen) {
let aliases = self.parse_identifier_list()?;
self.expect(TokenType::RParen)?;
aliases
} else {
Vec::new()
};
Some((alias_name, col_aliases))
} else {
None
};
// Create a TableRef from the subquery with alias
let mut tref = TableRef::new("");
if let Some((alias_name, col_aliases)) = using_alias {
tref.alias = Some(alias_name);
tref.alias_explicit_as = true;
tref.column_aliases = col_aliases;
}
// Store the subquery in the table reference using hints (as a hack)
// Actually, we need a better approach - use the table ref hints to store the subquery
tref.hints = vec![subquery];
using.push(tref);
} else {
let using_table = self.expect_identifier_with_quoted()?;
let mut using_ref = TableRef::from_identifier(using_table);
// Check for JOINs: USING t1 INNER JOIN t2 INNER JOIN t3
if self.check_join_keyword() {
// Parse JOINs as part of USING
using.push(using_ref);
let mut using_joins = self.parse_joins()?;
joins.append(&mut using_joins);
break;
}
// Optional alias for using table
if self.match_token(TokenType::As) {
using_ref.alias = Some(self.expect_identifier_with_quoted()?);
using_ref.alias_explicit_as = true;
} else if self.is_identifier_token()
&& !self.check(TokenType::Comma)
&& !self.check(TokenType::Where)
{
using_ref.alias = Some(self.expect_identifier_with_quoted()?);
}
using.push(using_ref);
}
if !self.match_token(TokenType::Comma) {
break;
}
}
}
// ClickHouse: IN PARTITION 'partition_id' clause before WHERE
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::In)
&& self
.peek_nth(1)
.is_some_and(|t| t.text.eq_ignore_ascii_case("PARTITION"))
{
self.skip(); // consume IN
self.skip(); // consume PARTITION
// Consume partition expression (string or identifier)
let _partition = self.parse_primary()?;
}
// Parse OUTPUT clause (TSQL) - may have been parsed early (before FROM)
let output = if early_output.is_some() {
early_output
} else if self.match_token(TokenType::Output) {
Some(self.parse_output_clause()?)
} else {
None
};
let where_clause = if self.match_token(TokenType::Where) {
Some(Where {
this: self.parse_expression()?,
})
} else {
None
};
// Parse ORDER BY clause (MySQL)
let order_by = if self.match_keywords(&[TokenType::Order, TokenType::By]) {
Some(self.parse_order_by()?)
} else {
None
};
// Parse LIMIT clause (MySQL)
let limit = if self.match_token(TokenType::Limit) {
Some(self.parse_expression()?)
} else {
None
};
// Parse RETURNING clause (PostgreSQL)
let returning = if self.match_token(TokenType::Returning) {
self.parse_select_expressions()?
} else {
Vec::new()
};
Ok(Expression::Delete(Box::new(Delete {
table,
hint,
on_cluster,
alias,
alias_explicit_as,
using,
where_clause,
output,
leading_comments,
with: None,
limit,
order_by,
returning,
tables,
tables_from_using,
joins,
force_index,
no_from: !_has_from,
})))
}
// ==================== DDL Parsing ====================
/// Parse a CREATE statement
fn parse_create(&mut self) -> Result<Expression> {
let create_pos = self.current; // position of CREATE token
let create_token = self.expect(TokenType::Create)?;
let leading_comments = create_token.comments;
// Handle OR REPLACE / OR ALTER (TSQL)
let or_replace = self.match_keywords(&[TokenType::Or, TokenType::Replace]);
let or_alter = !or_replace && self.match_text_seq(&["OR", "ALTER"]);
// Handle TEMPORARY
let temporary = self.match_token(TokenType::Temporary);
// Handle MATERIALIZED
let materialized = self.match_token(TokenType::Materialized);
// Parse MySQL-specific CREATE VIEW options: ALGORITHM, DEFINER, SQL SECURITY
// CREATE ALGORITHM=... DEFINER=... SQL SECURITY DEFINER VIEW ...
let mut algorithm: Option<String> = None;
let mut definer: Option<String> = None;
let mut security: Option<FunctionSecurity> = None;
while self.match_identifier("ALGORITHM")
|| self.match_identifier("DEFINER")
|| self.match_identifier("SQL")
{
let option_name = self.previous().text.to_ascii_uppercase();
if option_name == "ALGORITHM" && self.match_token(TokenType::Eq) {
// ALGORITHM=UNDEFINED|MERGE|TEMPTABLE
let value = self.expect_identifier_or_keyword()?;
algorithm = Some(value.to_ascii_uppercase());
} else if option_name == "DEFINER" && self.match_token(TokenType::Eq) {
// DEFINER=user@host (can include @ and %)
let mut definer_value = String::new();
while !self.is_at_end()
&& !self.check(TokenType::View)
&& !self.check_identifier("ALGORITHM")
&& !self.check_identifier("DEFINER")
&& !self.check_identifier("SQL")
&& !self.check_identifier("SECURITY")
{
definer_value.push_str(&self.advance().text);
}
definer = Some(definer_value);
} else if option_name == "SQL" && self.match_identifier("SECURITY") {
// SQL SECURITY DEFINER/INVOKER
if self.match_identifier("DEFINER") {
security = Some(FunctionSecurity::Definer);
} else if self.match_identifier("INVOKER") {
security = Some(FunctionSecurity::Invoker);
}
}
}
// Handle SECURE modifier for VIEW (Snowflake)
let secure = self.match_identifier("SECURE");
// Handle table modifiers: DYNAMIC, ICEBERG, EXTERNAL, HYBRID, TRANSIENT (Snowflake), UNLOGGED (PostgreSQL)
let mut table_modifier: Option<String> = if self.check_identifier("DYNAMIC") {
self.skip();
Some("DYNAMIC".to_string())
} else if self.check_identifier("ICEBERG") {
self.skip();
Some("ICEBERG".to_string())
} else if self.check_identifier("EXTERNAL") {
self.skip();
Some("EXTERNAL".to_string())
} else if self.check_identifier("HYBRID") {
self.skip();
Some("HYBRID".to_string())
} else if self.check_identifier("TRANSIENT") {
self.skip();
Some("TRANSIENT".to_string())
} else if self.check_identifier("UNLOGGED") {
self.skip();
Some("UNLOGGED".to_string())
} else if self.check_identifier("DICTIONARY") {
self.skip();
Some("DICTIONARY".to_string())
} else if self.check(TokenType::Dictionary) {
self.skip();
Some("DICTIONARY".to_string())
} else {
None
};
// Teradata: SET/MULTISET/VOLATILE/GLOBAL TEMPORARY modifiers before TABLE
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Teradata)
) {
let mut parts = Vec::new();
loop {
if self.match_token(TokenType::Set) {
parts.push(self.previous().text.to_ascii_uppercase());
} else if self.match_identifier("MULTISET") {
parts.push(self.previous().text.to_ascii_uppercase());
} else if self.match_identifier("VOLATILE") {
parts.push(self.previous().text.to_ascii_uppercase());
} else if self.match_identifier("GLOBAL") {
parts.push(self.previous().text.to_ascii_uppercase());
} else if self.match_token(TokenType::Temporary) {
parts.push(self.previous().text.to_ascii_uppercase());
} else {
break;
}
}
if !parts.is_empty() {
table_modifier = Some(parts.join(" "));
}
}
if table_modifier.as_deref() == Some("DICTIONARY") {
return self.parse_create_table(
or_replace,
temporary,
leading_comments,
table_modifier.as_deref(),
);
}
match self.peek().token_type {
TokenType::Table => {
// Check if this is CREATE TABLE FUNCTION (BigQuery)
if self.current + 1 < self.tokens.len()
&& self.tokens[self.current + 1].token_type == TokenType::Function
{
self.skip(); // consume TABLE
return self.parse_create_function(or_replace, or_alter, temporary, true);
}
let modifier = if materialized {
Some("MATERIALIZED")
} else {
table_modifier.as_deref()
};
self.parse_create_table(or_replace, temporary, leading_comments, modifier)
}
TokenType::Dictionary => {
self.parse_create_table(or_replace, temporary, leading_comments, Some("DICTIONARY"))
}
TokenType::View => self.parse_create_view(
or_replace,
or_alter,
materialized,
temporary,
algorithm,
definer,
security,
secure,
),
TokenType::Unique => {
self.skip(); // consume UNIQUE
// Check for CLUSTERED/NONCLUSTERED after UNIQUE (TSQL)
let clustered = if self.check_identifier("CLUSTERED") {
self.skip();
Some("CLUSTERED".to_string())
} else if self.check_identifier("NONCLUSTERED") {
self.skip();
Some("NONCLUSTERED".to_string())
} else {
None
};
// Check for COLUMNSTORE (TSQL: CREATE UNIQUE NONCLUSTERED COLUMNSTORE INDEX)
if self.check_identifier("COLUMNSTORE") {
self.skip();
// Prepend COLUMNSTORE to clustered
let clustered = clustered
.map(|c| format!("{} COLUMNSTORE", c))
.or_else(|| Some("COLUMNSTORE".to_string()));
self.parse_create_index_with_clustered(true, clustered)
} else {
self.parse_create_index_with_clustered(true, clustered)
}
}
TokenType::Index => self.parse_create_index_with_clustered(false, None),
TokenType::Schema => self.parse_create_schema(leading_comments),
TokenType::Database => self.parse_create_database(),
TokenType::Function => {
self.parse_create_function(or_replace, or_alter, temporary, false)
}
TokenType::Procedure => self.parse_create_procedure(or_replace, or_alter),
TokenType::Sequence => self.parse_create_sequence(temporary, or_replace),
TokenType::Trigger => {
self.parse_create_trigger(or_replace, or_alter, false, create_pos)
}
TokenType::Constraint => {
self.skip(); // consume CONSTRAINT
self.parse_create_trigger(or_replace, or_alter, true, create_pos)
}
TokenType::Type => self.parse_create_type(),
TokenType::Domain => self.parse_create_domain(),
_ => {
// Handle TSQL CLUSTERED/NONCLUSTERED [COLUMNSTORE] INDEX
if self.check_identifier("CLUSTERED") || self.check_identifier("NONCLUSTERED") {
let clustered_text = self.advance().text.to_ascii_uppercase();
// Check for COLUMNSTORE after CLUSTERED/NONCLUSTERED
let clustered = if self.check_identifier("COLUMNSTORE") {
self.skip();
Some(format!("{} COLUMNSTORE", clustered_text))
} else {
Some(clustered_text)
};
return self.parse_create_index_with_clustered(false, clustered);
}
// Handle TSQL COLUMNSTORE INDEX (without CLUSTERED/NONCLUSTERED prefix)
if self.check_identifier("COLUMNSTORE") && {
let pos = self.current;
let result = pos + 1 < self.tokens.len()
&& self.tokens[pos + 1].token_type == TokenType::Index;
result
} {
self.skip(); // consume COLUMNSTORE
// COLUMNSTORE without prefix implies NONCLUSTERED
return self.parse_create_index_with_clustered(
false,
Some("NONCLUSTERED COLUMNSTORE".to_string()),
);
}
// Handle identifiers that aren't keywords: TAG, STAGE, STREAM, etc.
if self.check_identifier("TAG") {
return self.parse_create_tag(or_replace);
}
if self.check_identifier("STAGE") {
return self.parse_create_stage(or_replace, temporary);
}
if self.check_identifier("STREAM") {
return self.parse_create_stream(or_replace);
}
if self.check_identifier("TASK") {
return self.parse_create_task(or_replace);
}
if (self.check_identifier("FILE") || self.check(TokenType::File)) && {
let next = self.current + 1;
next < self.tokens.len()
&& (self.tokens[next].text.eq_ignore_ascii_case("FORMAT"))
} {
return self.parse_create_file_format(or_replace, temporary);
}
// TSQL: CREATE SYNONYM name FOR target
if self.check_identifier("SYNONYM") {
self.skip(); // consume SYNONYM
let name = self.parse_table_ref()?;
self.expect(TokenType::For)?;
let target = self.parse_table_ref()?;
return Ok(Expression::CreateSynonym(Box::new(
crate::expressions::CreateSynonym { name, target },
)));
}
// Databricks/Spark: CREATE [OR REFRESH] STREAMING/LIVE TABLE ...
if self.check_identifier("STREAMING") || self.check_identifier("LIVE") {
let start = self.current;
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
self.skip();
}
let sql = self.tokens_to_sql(start, self.current);
let mut prefix = String::from("CREATE");
if or_replace {
prefix.push_str(" OR REPLACE");
}
prefix.push(' ');
prefix.push_str(&sql);
return Ok(Expression::Raw(Raw { sql: prefix }));
}
// Fall back to Raw for unrecognized CREATE targets
// (e.g., CREATE WAREHOUSE, CREATE STREAMLIT, CREATE STORAGE INTEGRATION, etc.)
{
let start = self.current;
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
self.skip();
}
let sql = self.tokens_to_sql(start, self.current);
let mut prefix = String::from("CREATE");
if or_replace {
prefix.push_str(" OR REPLACE");
}
if temporary {
prefix.push_str(" TEMPORARY");
}
if materialized {
prefix.push_str(" MATERIALIZED");
}
prefix.push(' ');
prefix.push_str(&sql);
Ok(Expression::Raw(Raw { sql: prefix }))
}
}
}
}
/// Parse CREATE TABLE
fn parse_create_table(
&mut self,
or_replace: bool,
temporary: bool,
leading_comments: Vec<String>,
table_modifier: Option<&str>,
) -> Result<Expression> {
if table_modifier == Some("DICTIONARY") {
let _ = self.match_token(TokenType::Dictionary);
} else {
self.expect(TokenType::Table)?;
}
// Handle IF NOT EXISTS
let if_not_exists =
self.match_keywords(&[TokenType::If, TokenType::Not, TokenType::Exists]);
let is_special_modifier = matches!(
table_modifier,
Some(
"DYNAMIC"
| "ICEBERG"
| "EXTERNAL"
| "HYBRID"
| "UNLOGGED"
| "DICTIONARY"
| "MATERIALIZED"
)
) || (table_modifier.is_some()
&& matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Teradata)
));
let is_clickhouse = matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
);
// Parse table name
let name = self.parse_table_ref()?;
// ClickHouse: UUID 'xxx' clause after table name
let uuid = if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check_identifier("UUID")
{
self.skip(); // consume UUID
let uuid_token = self.advance().clone();
// Strip surrounding quotes from the UUID string
let uuid_text = uuid_token.text.trim_matches('\'').to_string();
Some(uuid_text)
} else {
None
};
// ClickHouse: ON CLUSTER clause
let on_cluster = self.parse_on_cluster_clause()?;
// Teradata: options after name before column list
let teradata_post_name_options = if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Teradata)
) {
self.parse_teradata_post_name_options()
} else {
Vec::new()
};
// Handle PARTITION OF parent_table [(column_defs)] [FOR VALUES spec | DEFAULT] [PARTITION BY ...]
if self.match_keywords(&[TokenType::Partition, TokenType::Of]) {
return self.parse_create_table_partition_of(
name,
if_not_exists,
temporary,
or_replace,
table_modifier,
leading_comments,
);
}
// ClickHouse: EMPTY AS source_table — create empty table from source
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check_identifier("EMPTY")
{
if self.check_next(TokenType::As) {
self.skip(); // consume EMPTY
self.skip(); // consume AS
// Consume rest as Command
let start = self.current;
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
self.skip();
}
let rest_sql = self.tokens_to_sql(start, self.current);
let mut prefix = String::from("CREATE TABLE");
if if_not_exists {
prefix.push_str(" IF NOT EXISTS");
}
prefix.push(' ');
prefix.push_str(&name.name.name);
prefix.push_str(" EMPTY AS ");
prefix.push_str(&rest_sql);
return Ok(Expression::Raw(Raw { sql: prefix }));
}
}
// Handle [SHALLOW | DEEP] CLONE source_table [AT(...) | BEFORE(...)]
// Databricks/Delta Lake uses SHALLOW CLONE / DEEP CLONE
// Snowflake uses just CLONE (which is equivalent to DEEP CLONE)
let shallow_clone = self.check_identifier("SHALLOW");
let deep_clone = self.check_identifier("DEEP");
if shallow_clone || deep_clone {
self.skip(); // consume SHALLOW or DEEP
}
// Also handle COPY (BigQuery: CREATE TABLE ... COPY source_table)
// But NOT "COPY GRANTS" which is a Snowflake property
let is_copy = self.check(TokenType::Copy) && !self.check_next_identifier("GRANTS");
if self.check_identifier("CLONE") || is_copy {
self.skip(); // consume CLONE or COPY
// ClickHouse: CLONE AS source_table (AS is part of the syntax, not an alias)
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
let _ = self.match_token(TokenType::As);
}
let source = self.parse_table_ref()?;
// Parse optional AT or BEFORE time travel clause
// Note: BEFORE is a keyword token, AT is an identifier
let at_clause = if self.match_identifier("AT") || self.match_token(TokenType::Before) {
let keyword = self.previous().text.to_ascii_uppercase();
self.expect(TokenType::LParen)?;
// Parse the content: OFFSET => value or TIMESTAMP => value
let mut result = format!("{} (", keyword);
let mut prev_token_type: Option<TokenType> = None;
let mut paren_depth = 1;
while !self.is_at_end() && paren_depth > 0 {
let token = self.advance();
if token.token_type == TokenType::LParen {
paren_depth += 1;
} else if token.token_type == TokenType::RParen {
paren_depth -= 1;
if paren_depth == 0 {
break;
}
}
let needs_space = !result.ends_with('(')
&& prev_token_type != Some(TokenType::Arrow)
&& prev_token_type != Some(TokenType::Dash)
&& prev_token_type != Some(TokenType::LParen)
&& prev_token_type != Some(TokenType::Comma) // comma already adds trailing space
&& token.token_type != TokenType::LParen; // no space before (
if needs_space
&& token.token_type != TokenType::RParen
&& token.token_type != TokenType::Comma
{
result.push(' ');
}
// Properly quote string literals
if token.token_type == TokenType::String {
result.push('\'');
result.push_str(&token.text.replace('\'', "''"));
result.push('\'');
} else {
result.push_str(&token.text);
}
if token.token_type == TokenType::Arrow || token.token_type == TokenType::Comma
{
result.push(' ');
}
prev_token_type = Some(token.token_type);
}
result.push(')');
Some(Expression::Raw(Raw { sql: result }))
} else if self.match_identifier("VERSION") || self.match_identifier("TIMESTAMP") {
// Databricks: VERSION AS OF n / TIMESTAMP AS OF t
let keyword = self.previous().text.to_ascii_uppercase();
self.match_token(TokenType::As); // consume AS
self.match_identifier("OF"); // consume OF
let token = self.advance();
let value = if token.token_type == TokenType::String {
format!("'{}'", token.text.replace('\'', "''"))
} else {
token.text.clone()
};
Some(Expression::Raw(Raw {
sql: format!("{} AS OF {}", keyword, value),
}))
} else {
None
};
// Return the CLONE table immediately
return Ok(Expression::CreateTable(Box::new(CreateTable {
name,
on_cluster: on_cluster.clone(),
columns: Vec::new(),
constraints: Vec::new(),
if_not_exists,
temporary,
or_replace,
table_modifier: table_modifier.map(|s| s.to_string()),
as_select: None,
as_select_parenthesized: false,
on_commit: None,
clone_source: Some(source),
clone_at_clause: at_clause,
shallow_clone,
is_copy,
leading_comments,
with_properties: Vec::new(),
teradata_post_name_options: teradata_post_name_options.clone(),
with_data: None,
with_statistics: None,
teradata_indexes: Vec::new(),
with_cte: None,
properties: Vec::new(),
partition_of: None,
post_table_properties: Vec::new(),
mysql_table_options: Vec::new(),
inherits: Vec::new(),
on_property: None,
copy_grants: false,
using_template: None,
rollup: None,
uuid: uuid.clone(),
with_partition_columns: Vec::new(),
with_connection: None,
})));
}
// Handle WITH properties before columns/AS.
// BigQuery EXTERNAL tables use WITH differently (WITH PARTITION COLUMNS,
// WITH CONNECTION), so handle that complete flow here and early-return.
let is_bigquery_external = is_special_modifier
&& table_modifier == Some("EXTERNAL")
&& matches!(
self.config.dialect,
Some(crate::dialects::DialectType::BigQuery)
);
if is_bigquery_external {
// BigQuery: CREATE EXTERNAL TABLE [IF NOT EXISTS] name
// [(col_name col_type, ...)]
// [WITH PARTITION COLUMNS (col_name col_type, ...)]
// [WITH CONNECTION `project.region.connection`]
// OPTIONS (key = value, ...)
// Parse optional column definitions
let (columns, constraints) = if self.check(TokenType::LParen) {
self.advance(); // consume (
let result = self.parse_column_definitions()?;
self.expect(TokenType::RParen)?;
result
} else {
(Vec::new(), Vec::new())
};
let mut with_partition_columns = Vec::new();
let mut with_connection = None;
let mut properties = Vec::new();
// Parse WITH PARTITION COLUMNS / WITH CONNECTION in any order.
// Note: duplicate WITH PARTITION COLUMNS clauses silently overwrite;
// this isn't a real-world concern so we don't error on it.
while self.check(TokenType::With) {
let save = self.current;
self.advance(); // consume WITH
if self.check(TokenType::Partition) {
// WITH PARTITION COLUMNS (col_name col_type, ...)
self.advance(); // consume PARTITION
if !self.match_identifier("COLUMNS") {
return Err(self.parse_error("Expected COLUMNS after WITH PARTITION"));
}
if self.check(TokenType::LParen) {
self.advance(); // consume (
let (part_cols, _) = self.parse_column_definitions()?;
self.expect(TokenType::RParen)?;
with_partition_columns = part_cols;
}
} else if self.match_identifier("CONNECTION") {
// WITH CONNECTION `project.region.connection`
with_connection = Some(self.parse_table_ref()?);
} else {
// Not a BQ clause - revert and break
self.current = save;
break;
}
}
// Parse OPTIONS (...)
if let Some(opts) = self.parse_bigquery_options_property()? {
properties.push(opts);
}
return Ok(Expression::CreateTable(Box::new(CreateTable {
name,
on_cluster: on_cluster.clone(),
columns,
constraints,
if_not_exists,
temporary,
or_replace,
table_modifier: Some("EXTERNAL".to_string()),
// BigQuery EXTERNAL tables don't support AS SELECT
as_select: None,
as_select_parenthesized: false,
// Not applicable to BigQuery — Snowflake/Teradata/other-dialect features
on_commit: None,
clone_source: None,
clone_at_clause: None,
shallow_clone: false,
is_copy: false,
leading_comments,
// BigQuery EXTERNAL uses OPTIONS(), not generic WITH (key=value) properties
with_properties: Vec::new(),
teradata_post_name_options: teradata_post_name_options.clone(),
with_data: None,
with_statistics: None,
teradata_indexes: Vec::new(),
with_cte: None,
// BigQuery OPTIONS are stored here via parse_bigquery_options_property()
properties,
partition_of: None,
post_table_properties: Vec::new(),
mysql_table_options: Vec::new(),
inherits: Vec::new(),
on_property: None,
// COPY GRANTS is a Snowflake feature, not applicable to BigQuery
copy_grants: false,
// USING TEMPLATE is a Snowflake feature, not applicable to BigQuery
using_template: None,
rollup: None,
uuid: None,
// BigQuery-specific fields parsed above
with_partition_columns,
with_connection,
})));
}
// Generic WITH properties (e.g., CREATE TABLE z WITH (FORMAT='parquet') AS SELECT 1)
let with_properties = if self.match_token(TokenType::With) {
self.parse_with_properties()?
} else {
Vec::new()
};
// Snowflake: COPY GRANTS clause (before column list or AS)
let copy_grants = self.match_text_seq(&["COPY", "GRANTS"]);
// Snowflake: USING TEMPLATE (expr) - allows schema inference from a query
let using_template = if self.match_text_seq(&["USING", "TEMPLATE"]) {
Some(Box::new(self.parse_primary()?))
} else {
None
};
// If we have USING TEMPLATE, return early since it replaces AS SELECT
if using_template.is_some() {
return Ok(Expression::CreateTable(Box::new(CreateTable {
name,
on_cluster: on_cluster.clone(),
columns: Vec::new(),
constraints: Vec::new(),
if_not_exists,
temporary,
or_replace,
table_modifier: table_modifier.map(|s| s.to_string()),
as_select: None,
as_select_parenthesized: false,
on_commit: None,
clone_source: None,
clone_at_clause: None,
shallow_clone: false,
is_copy: false,
leading_comments,
with_properties,
teradata_post_name_options: teradata_post_name_options.clone(),
with_data: None,
with_statistics: None,
teradata_indexes: Vec::new(),
with_cte: None,
properties: Vec::new(),
partition_of: None,
post_table_properties: Vec::new(),
mysql_table_options: Vec::new(),
inherits: Vec::new(),
on_property: None,
copy_grants,
using_template,
rollup: None,
uuid: uuid.clone(),
with_partition_columns: Vec::new(),
with_connection: None,
})));
}
// Redshift: Parse DISTKEY, SORTKEY, DISTSTYLE, BACKUP before AS SELECT (CTAS without columns)
// This handles: CREATE TABLE t BACKUP YES|NO AS SELECT ...
let mut redshift_ctas_properties: Vec<Expression> = Vec::new();
loop {
if self.match_identifier("DISTKEY") {
// DISTKEY(column)
if self.match_token(TokenType::LParen) {
let col = self.expect_identifier()?;
self.expect(TokenType::RParen)?;
redshift_ctas_properties.push(Expression::DistKeyProperty(Box::new(
DistKeyProperty {
this: Box::new(Expression::boxed_column(Column {
name: Identifier::new(col),
table: None,
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
})),
},
)));
}
} else if self.check_identifier("COMPOUND") || self.check_identifier("INTERLEAVED") {
// COMPOUND SORTKEY(col, ...) or INTERLEAVED SORTKEY(col, ...)
let modifier = self.advance().text.to_ascii_uppercase();
if self.match_identifier("SORTKEY") && self.match_token(TokenType::LParen) {
let mut cols = Vec::new();
loop {
let col = self.expect_identifier()?;
cols.push(Expression::boxed_column(Column {
name: Identifier::new(col),
table: None,
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
}));
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
let compound_value = if modifier == "COMPOUND" {
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
})))
} else {
None
};
redshift_ctas_properties.push(Expression::SortKeyProperty(Box::new(
SortKeyProperty {
this: Box::new(Expression::Tuple(Box::new(Tuple {
expressions: cols,
}))),
compound: compound_value,
},
)));
}
} else if self.match_identifier("SORTKEY") {
// SORTKEY(column, ...)
if self.match_token(TokenType::LParen) {
let mut cols = Vec::new();
loop {
let col = self.expect_identifier()?;
cols.push(Expression::boxed_column(Column {
name: Identifier::new(col),
table: None,
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
}));
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
redshift_ctas_properties.push(Expression::SortKeyProperty(Box::new(
SortKeyProperty {
this: Box::new(Expression::Tuple(Box::new(Tuple {
expressions: cols,
}))),
compound: None,
},
)));
}
} else if self.match_identifier("DISTSTYLE") {
// DISTSTYLE ALL|EVEN|AUTO|KEY
if self.match_texts(&["ALL", "EVEN", "AUTO", "KEY"]) {
let style = self.previous().text.to_ascii_uppercase();
redshift_ctas_properties.push(Expression::DistStyleProperty(Box::new(
DistStyleProperty {
this: Box::new(Expression::Var(Box::new(Var { this: style }))),
},
)));
}
} else if self.match_identifier("BACKUP") {
// BACKUP YES|NO
if self.match_texts(&["YES", "NO"]) {
let value = self.previous().text.to_ascii_uppercase();
redshift_ctas_properties.push(Expression::BackupProperty(Box::new(
BackupProperty {
this: Box::new(Expression::Var(Box::new(Var { this: value }))),
},
)));
}
} else {
break;
}
}
// Check for AS SELECT (CTAS)
if self.match_token(TokenType::As) {
// ClickHouse: CREATE TABLE t AS other_table [ENGINE = ...] — copy structure from another table
// Also: CREATE TABLE t AS func_name(args...) — table from function (e.g., remote, merge)
// Detect when AS is followed by an identifier (not SELECT/WITH/LParen)
if is_clickhouse
&& !self.check(TokenType::Select)
&& !self.check(TokenType::With)
&& !self.check(TokenType::LParen)
&& (self.is_identifier_token() || self.is_safe_keyword_as_identifier())
{
// Check if this is AS func_name(...) — table function
let is_table_func = self.current + 1 < self.tokens.len()
&& self.tokens[self.current + 1].token_type == TokenType::LParen;
let source = if is_table_func {
// Parse as expression to consume function call with arguments
self.parse_primary()?;
let mut table_properties: Vec<Expression> = Vec::new();
self.parse_clickhouse_table_properties(&mut table_properties)?;
return Ok(Expression::CreateTable(Box::new(CreateTable {
name,
on_cluster: on_cluster.clone(),
columns: Vec::new(),
constraints: Vec::new(),
if_not_exists,
temporary,
or_replace,
table_modifier: table_modifier.map(|s| s.to_string()),
as_select: None,
as_select_parenthesized: false,
on_commit: None,
clone_source: None,
clone_at_clause: None,
shallow_clone: false,
is_copy: false,
leading_comments,
with_properties,
teradata_post_name_options: teradata_post_name_options.clone(),
with_data: None,
with_statistics: None,
teradata_indexes: Vec::new(),
with_cte: None,
properties: table_properties,
partition_of: None,
post_table_properties: redshift_ctas_properties,
mysql_table_options: Vec::new(),
inherits: Vec::new(),
on_property: None,
copy_grants,
using_template: None,
rollup: None,
uuid: uuid.clone(),
with_partition_columns: Vec::new(),
with_connection: None,
})));
} else {
self.parse_table_ref()?
};
// Parse ClickHouse table properties after the source table
let mut table_properties: Vec<Expression> = Vec::new();
self.parse_clickhouse_table_properties(&mut table_properties)?;
return Ok(Expression::CreateTable(Box::new(CreateTable {
name,
on_cluster: on_cluster.clone(),
columns: Vec::new(),
constraints: Vec::new(),
if_not_exists,
temporary,
or_replace,
table_modifier: table_modifier.map(|s| s.to_string()),
as_select: None,
as_select_parenthesized: false,
on_commit: None,
clone_source: Some(source),
clone_at_clause: None,
shallow_clone: false,
is_copy: false,
leading_comments,
with_properties,
teradata_post_name_options: teradata_post_name_options.clone(),
with_data: None,
with_statistics: None,
teradata_indexes: Vec::new(),
with_cte: None,
properties: table_properties,
partition_of: None,
post_table_properties: redshift_ctas_properties,
mysql_table_options: Vec::new(),
inherits: Vec::new(),
on_property: None,
copy_grants,
using_template: None,
rollup: None,
uuid: uuid.clone(),
with_partition_columns: Vec::new(),
with_connection: None,
})));
}
// The query can be:
// - SELECT ... (simple case)
// - (SELECT 1) UNION ALL (SELECT 2) (set operations)
// - (WITH cte AS (SELECT 1) SELECT * FROM cte) (CTE in parens)
let mut as_select_parenthesized = self.check(TokenType::LParen);
let query = if as_select_parenthesized {
// Parenthesized query - parse as expression which handles subqueries
// Note: parse_primary will consume set operations like UNION internally
let subquery = self.parse_primary()?;
// If parse_primary returned a set operation, the outer parens weren't wrapping
// the entire expression - they were part of the operands
if matches!(
&subquery,
Expression::Union(_) | Expression::Intersect(_) | Expression::Except(_)
) {
as_select_parenthesized = false;
subquery
} else {
// Just a parenthesized query without set ops
// Keep the Subquery wrapper if it has limit/offset/order_by
if let Expression::Subquery(ref sq) = subquery {
if sq.limit.is_some() || sq.offset.is_some() || sq.order_by.is_some() {
// Keep the Subquery to preserve the modifiers
subquery
} else {
// Extract the inner query
if let Expression::Subquery(sq) = subquery {
sq.this
} else {
subquery
}
}
} else if let Expression::Paren(p) = subquery {
p.this
} else {
subquery
}
}
} else if self.check(TokenType::With) {
// Handle WITH ... SELECT ...
self.parse_statement()?
} else {
self.parse_select()?
};
// Parse any trailing Teradata options like "WITH DATA", "NO PRIMARY INDEX", etc.
let (with_data, with_statistics, teradata_indexes) =
self.parse_teradata_table_options();
let on_commit = if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Teradata)
) && self.check(TokenType::On)
&& self.check_next(TokenType::Commit)
{
self.skip(); // ON
self.skip(); // COMMIT
if self.match_keywords(&[TokenType::Preserve, TokenType::Rows]) {
Some(OnCommit::PreserveRows)
} else if self.match_keywords(&[TokenType::Delete, TokenType::Rows]) {
Some(OnCommit::DeleteRows)
} else {
return Err(
self.parse_error("Expected PRESERVE ROWS or DELETE ROWS after ON COMMIT")
);
}
} else {
None
};
return Ok(Expression::CreateTable(Box::new(CreateTable {
name,
on_cluster: on_cluster.clone(),
columns: Vec::new(),
constraints: Vec::new(),
if_not_exists,
temporary,
or_replace,
table_modifier: table_modifier.map(|s| s.to_string()),
as_select: Some(query),
as_select_parenthesized,
on_commit,
clone_source: None,
clone_at_clause: None,
shallow_clone: false,
is_copy: false,
leading_comments,
with_properties,
teradata_post_name_options: teradata_post_name_options.clone(),
with_data,
with_statistics,
teradata_indexes,
with_cte: None,
properties: Vec::new(),
partition_of: None,
post_table_properties: redshift_ctas_properties,
mysql_table_options: Vec::new(),
inherits: Vec::new(),
on_property: None,
copy_grants,
using_template: None,
rollup: None,
uuid: uuid.clone(),
with_partition_columns: Vec::new(),
with_connection: None,
})));
}
// ClickHouse: allow table properties/AS SELECT without a column list
if is_clickhouse && !self.check(TokenType::LParen) {
let starts_props = self.check_identifier("ENGINE")
|| self.check(TokenType::Order)
|| self.check(TokenType::Sample)
|| self.check(TokenType::Settings)
|| self.check(TokenType::Comment)
|| self.check(TokenType::As);
if starts_props {
let mut table_properties: Vec<Expression> = Vec::new();
self.parse_clickhouse_table_properties(&mut table_properties)?;
let as_select = if self.match_token(TokenType::As) {
Some(self.parse_statement()?)
} else {
None
};
let as_select_parenthesized = as_select.is_some();
if as_select.is_some() {
self.parse_clickhouse_table_properties(&mut table_properties)?;
}
return Ok(Expression::CreateTable(Box::new(CreateTable {
name,
on_cluster: on_cluster.clone(),
columns: Vec::new(),
constraints: Vec::new(),
if_not_exists,
temporary,
or_replace,
table_modifier: table_modifier.map(|s| s.to_string()),
as_select,
as_select_parenthesized,
on_commit: None,
clone_source: None,
clone_at_clause: None,
shallow_clone: false,
is_copy: false,
leading_comments,
with_properties,
teradata_post_name_options: teradata_post_name_options.clone(),
with_data: None,
with_statistics: None,
teradata_indexes: Vec::new(),
with_cte: None,
properties: table_properties,
partition_of: None,
post_table_properties: Vec::new(),
mysql_table_options: Vec::new(),
inherits: Vec::new(),
on_property: None,
copy_grants,
using_template: None,
rollup: None,
uuid: uuid.clone(),
with_partition_columns: Vec::new(),
with_connection: None,
})));
}
}
// For DYNAMIC/ICEBERG/EXTERNAL tables, columns might be optional (use AS SELECT or other syntax)
// Check if we have a left paren for columns or if we're going straight to options
if !self.check(TokenType::LParen) && is_special_modifier {
// No columns - parse options and AS SELECT
let mut extra_options = Vec::new();
// Parse key=value options until AS or end
// Note: WAREHOUSE is a keyword token type, so check for it explicitly
while !self.is_at_end()
&& !self.check(TokenType::As)
&& !self.check(TokenType::Semicolon)
{
if self.is_identifier_token()
|| self.is_safe_keyword_as_identifier()
|| self.check(TokenType::Warehouse)
{
let key = self.advance().text;
if self.match_token(TokenType::Eq) {
// Capture value
let value = if self.check(TokenType::String) {
let v = format!("'{}'", self.peek().text);
self.skip();
v
} else if self.is_identifier_token() || self.is_safe_keyword_as_identifier()
{
self.advance().text
} else {
break;
};
extra_options.push((key, value));
} else {
// Just a keyword without value (like WAREHOUSE mywh)
if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
let value = self.advance().text;
extra_options.push((key, value));
}
}
} else {
break;
}
}
// Check for AS SELECT
let as_select = if self.match_token(TokenType::As) {
Some(self.parse_statement()?)
} else {
None
};
return Ok(Expression::CreateTable(Box::new(CreateTable {
name,
on_cluster: on_cluster.clone(),
columns: Vec::new(),
constraints: Vec::new(),
if_not_exists,
temporary,
or_replace,
table_modifier: table_modifier.map(|s| s.to_string()),
as_select,
as_select_parenthesized: false,
on_commit: None,
clone_source: None,
clone_at_clause: None,
shallow_clone: false,
is_copy: false,
leading_comments,
with_properties: extra_options,
teradata_post_name_options: teradata_post_name_options.clone(),
with_data: None,
with_statistics: None,
teradata_indexes: Vec::new(),
with_cte: None,
properties: Vec::new(),
partition_of: None,
post_table_properties: Vec::new(),
mysql_table_options: Vec::new(),
inherits: Vec::new(),
on_property: None,
copy_grants,
using_template: None,
rollup: None,
uuid: uuid.clone(),
with_partition_columns: Vec::new(),
with_connection: None,
})));
}
// MySQL: CREATE TABLE A LIKE B (without parentheses)
if self.check(TokenType::Like) {
self.skip(); // consume LIKE
let source_ref = self.parse_table_ref()?;
return Ok(Expression::CreateTable(Box::new(CreateTable {
name,
on_cluster: on_cluster.clone(),
columns: Vec::new(),
constraints: vec![TableConstraint::Like {
source: source_ref,
options: Vec::new(),
}],
if_not_exists,
temporary,
or_replace,
table_modifier: table_modifier.map(|s| s.to_string()),
as_select: None,
as_select_parenthesized: false,
on_commit: None,
clone_source: None,
clone_at_clause: None,
shallow_clone: false,
is_copy: false,
leading_comments,
with_properties,
teradata_post_name_options: teradata_post_name_options.clone(),
with_data: None,
with_statistics: None,
teradata_indexes: Vec::new(),
with_cte: None,
properties: Vec::new(),
partition_of: None,
post_table_properties: Vec::new(),
mysql_table_options: Vec::new(),
inherits: Vec::new(),
on_property: None,
copy_grants,
using_template: None,
rollup: None,
uuid: uuid.clone(),
with_partition_columns: Vec::new(),
with_connection: None,
})));
}
// Snowflake: CREATE TABLE a TAG (key='value', ...) without column definitions
if self.match_keyword("TAG")
|| (self.match_token(TokenType::With) && self.match_keyword("TAG"))
{
let tags = self.parse_tags()?;
return Ok(Expression::CreateTable(Box::new(CreateTable {
name,
on_cluster: on_cluster.clone(),
columns: Vec::new(),
constraints: vec![TableConstraint::Tags(tags)],
if_not_exists,
temporary,
or_replace,
table_modifier: table_modifier.map(|s| s.to_string()),
as_select: None,
as_select_parenthesized: false,
on_commit: None,
clone_source: None,
clone_at_clause: None,
shallow_clone: false,
is_copy: false,
leading_comments,
with_properties,
teradata_post_name_options: teradata_post_name_options.clone(),
with_data: None,
with_statistics: None,
teradata_indexes: Vec::new(),
with_cte: None,
properties: Vec::new(),
partition_of: None,
post_table_properties: Vec::new(),
mysql_table_options: Vec::new(),
inherits: Vec::new(),
on_property: None,
copy_grants,
using_template: None,
rollup: None,
uuid: uuid.clone(),
with_partition_columns: Vec::new(),
with_connection: None,
})));
}
// Hive/Spark/Databricks: CREATE TABLE t TBLPROPERTIES (...) without column definitions
// Check for Hive-style table properties before expecting column definitions
if self.check_identifier("TBLPROPERTIES")
|| self.check_identifier("LOCATION")
|| self.check_identifier("STORED")
|| self.check(TokenType::Row)
|| self.check(TokenType::Using)
|| self.check_identifier("CLUSTERED")
|| self.check_identifier("PARTITIONED")
|| self.check_identifier("COMMENT")
{
// Parse Hive table properties without column definitions
let hive_properties = self.parse_hive_table_properties()?;
// Check for AS SELECT (CTAS) after properties
let as_select = if self.match_token(TokenType::As) {
Some(self.parse_statement()?)
} else {
None
};
return Ok(Expression::CreateTable(Box::new(CreateTable {
name,
on_cluster: on_cluster.clone(),
columns: Vec::new(),
constraints: Vec::new(),
if_not_exists,
temporary,
or_replace,
table_modifier: table_modifier.map(|s| s.to_string()),
as_select,
as_select_parenthesized: false,
on_commit: None,
clone_source: None,
clone_at_clause: None,
shallow_clone: false,
is_copy: false,
leading_comments,
with_properties,
teradata_post_name_options: teradata_post_name_options.clone(),
with_data: None,
with_statistics: None,
teradata_indexes: Vec::new(),
with_cte: None,
properties: hive_properties,
partition_of: None,
post_table_properties: Vec::new(),
mysql_table_options: Vec::new(),
inherits: Vec::new(),
on_property: None,
copy_grants,
using_template: None,
rollup: None,
uuid: uuid.clone(),
with_partition_columns: Vec::new(),
with_connection: None,
})));
}
// Check if (SELECT ...) or (WITH ...) follows - this is CTAS without explicit AS keyword
if self.check(TokenType::LParen) {
let saved = self.current;
self.skip(); // consume (
let is_ctas = self.check(TokenType::Select) || self.check(TokenType::With);
self.current = saved;
if is_ctas {
// Parse as subquery
let subquery = self.parse_primary()?;
let query = if let Expression::Subquery(sq) = subquery {
sq.this
} else if let Expression::Paren(p) = subquery {
p.this
} else {
subquery
};
return Ok(Expression::CreateTable(Box::new(CreateTable {
name,
on_cluster: on_cluster.clone(),
columns: Vec::new(),
constraints: Vec::new(),
if_not_exists,
temporary,
or_replace,
table_modifier: table_modifier.map(|s| s.to_string()),
as_select: Some(query),
as_select_parenthesized: true,
on_commit: None,
clone_source: None,
clone_at_clause: None,
shallow_clone: false,
is_copy: false,
leading_comments,
with_properties,
teradata_post_name_options: teradata_post_name_options.clone(),
with_data: None,
with_statistics: None,
teradata_indexes: Vec::new(),
with_cte: None,
properties: Vec::new(),
partition_of: None,
post_table_properties: Vec::new(),
mysql_table_options: Vec::new(),
inherits: Vec::new(),
on_property: None,
copy_grants,
using_template: None,
rollup: None,
uuid: uuid.clone(),
with_partition_columns: Vec::new(),
with_connection: None,
})));
}
}
// BigQuery (and others): CREATE TABLE t PARTITION BY ... CLUSTER BY ... OPTIONS(...) AS (SELECT ...)
// When there are no column definitions, skip straight to property/AS parsing
let no_column_defs = !self.check(TokenType::LParen)
&& (self.check(TokenType::Partition)
|| self.check(TokenType::PartitionBy)
|| self.check(TokenType::Cluster)
|| self.check_identifier("OPTIONS")
|| self.check(TokenType::As));
// Parse column definitions
if !no_column_defs {
self.expect(TokenType::LParen)?;
}
// For DYNAMIC TABLE, column list contains only names without types
// e.g., CREATE DYNAMIC TABLE t (col1, col2, col3) TARGET_LAG=... AS SELECT ...
let (columns, constraints) = if no_column_defs {
(Vec::new(), Vec::new())
} else if table_modifier == Some("DYNAMIC") {
// Check if this looks like a simple column name list (just identifiers separated by commas)
// by peeking ahead - if next token after identifier is comma or rparen, it's a name-only list
let saved = self.current;
let is_name_only_list =
if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
self.skip();
let result = self.check(TokenType::Comma) || self.check(TokenType::RParen);
self.current = saved;
result
} else {
false
};
if is_name_only_list {
// Parse column names without types
let mut cols = Vec::new();
loop {
let name = self.expect_identifier_or_safe_keyword_with_quoted()?;
// Create a column def with an empty/placeholder type
let mut col_def = ColumnDef::new(
name.name.clone(),
DataType::Custom {
name: String::new(),
},
);
col_def.name = name;
cols.push(col_def);
if !self.match_token(TokenType::Comma) {
break;
}
}
(cols, Vec::new())
} else {
// Regular column definitions with types
self.parse_column_definitions()?
}
} else {
self.parse_column_definitions()?
};
if !no_column_defs {
self.expect(TokenType::RParen)?;
}
// Parse COMMENT before WITH properties (Presto: CREATE TABLE x (...) COMMENT 'text' WITH (...))
let pre_with_comment = if self.check(TokenType::Comment) {
let saved = self.current;
self.skip(); // consume COMMENT
if self.check(TokenType::String) {
let comment_text = self.advance().text.clone();
Some(comment_text)
} else {
self.current = saved;
None
}
} else {
None
};
// Handle WITH properties after columns (e.g., CREATE TABLE z (z INT) WITH (...))
// But skip if this is WITH(SYSTEM_VERSIONING=...) which is handled by parse_post_table_properties
let with_properties_after = if self.check(TokenType::With) {
// Lookahead: check if this is WITH(SYSTEM_VERSIONING=...)
let saved = self.current;
self.skip(); // consume WITH
let is_system_versioning = if self.check(TokenType::LParen) {
let saved2 = self.current;
self.skip(); // consume (
let result = self.check_identifier("SYSTEM_VERSIONING");
self.current = saved2; // retreat to before (
result
} else {
false
};
let is_row_access_policy = self.check_text_seq(&["ROW", "ACCESS", "POLICY"]);
if is_system_versioning || is_row_access_policy {
// Retreat back before WITH, let parse_post_table_properties handle it
self.current = saved;
Vec::new()
} else {
// Normal WITH properties parsing
self.parse_with_properties()?
}
} else {
Vec::new()
};
// Combine properties from before and after columns
let mut all_with_properties = with_properties;
all_with_properties.extend(with_properties_after);
// For DYNAMIC/ICEBERG/EXTERNAL tables with columns, parse Snowflake-specific options
// like TARGET_LAG, WAREHOUSE, CATALOG, EXTERNAL_VOLUME, LOCATION etc.
if is_special_modifier {
while !self.is_at_end()
&& !self.check(TokenType::As)
&& !self.check(TokenType::Semicolon)
{
// Check for known Snowflake table options (WAREHOUSE is a keyword, others are identifiers)
// These are Snowflake-style options that use KEY=VALUE or KEY VALUE (without =)
// Hive-style LOCATION/TBLPROPERTIES (without =) should NOT be matched here
let is_snowflake_option = self.check(TokenType::Warehouse)
|| self.check_identifier("TARGET_LAG")
|| self.check_identifier("CATALOG")
|| self.check_identifier("EXTERNAL_VOLUME")
|| self.check_identifier("BASE_LOCATION")
|| self.check_identifier("REFRESH_MODE")
|| self.check_identifier("INITIALIZE")
|| self.check_identifier("DATA_RETENTION_TIME_IN_DAYS")
|| self.check_identifier("LOCATION")
|| self.check_identifier("PARTITION")
|| self.check_identifier("FILE_FORMAT")
|| self.check_identifier("AUTO_REFRESH");
if is_snowflake_option {
// Save position before consuming key - we might need to retreat for Hive-style syntax
let saved = self.current;
let key = self.advance().text;
if self.match_token(TokenType::Eq) {
// Capture value - could be string, identifier, stage path @..., keyword, or parenthesized options
let value = if self.check(TokenType::LParen) {
// Parenthesized option list like file_format = (type = parquet compression = gzip)
self.skip(); // consume (
let mut options = String::from("(");
let mut depth = 1;
while !self.is_at_end() && depth > 0 {
let tok = self.advance();
if tok.token_type == TokenType::LParen {
depth += 1;
} else if tok.token_type == TokenType::RParen {
depth -= 1;
}
// Add space before tokens that need it (not after open paren, not before close paren)
if !options.ends_with('(')
&& !options.ends_with(' ')
&& tok.token_type != TokenType::RParen
{
options.push(' ');
}
options.push_str(&tok.text);
}
options
} else if self.check(TokenType::String) {
let v = format!("'{}'", self.peek().text);
self.skip();
v
} else if self.check(TokenType::DAt) {
// Stage path like @s1/logs/
self.skip(); // consume @
let mut path = String::from("@");
if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
path.push_str(&self.advance().text);
}
// Parse path segments, but stop before Snowflake option keywords
while self.check(TokenType::Slash) {
// Peek ahead to see if next identifier is a Snowflake option keyword
if self.current + 1 < self.tokens.len() {
let next = &self.tokens[self.current + 1];
if next.text.eq_ignore_ascii_case("FILE_FORMAT")
|| next.text.eq_ignore_ascii_case("PARTITION_TYPE")
|| next.text.eq_ignore_ascii_case("AUTO_REFRESH")
|| next.text.eq_ignore_ascii_case("LOCATION")
|| next.text.eq_ignore_ascii_case("PARTITION")
|| next.text.eq_ignore_ascii_case("WAREHOUSE")
{
// Consume the trailing slash before the keyword
self.skip();
path.push('/');
break;
}
}
self.skip();
path.push('/');
if self.is_identifier_token()
|| self.is_safe_keyword_as_identifier()
{
path.push_str(&self.advance().text);
}
}
path
} else if self.check(TokenType::Var) && self.peek().text.starts_with('@') {
// Stage path tokenized as Var (e.g., @s2/logs/)
// When @ is followed by alphanumeric, tokenizer creates a Var token
let mut path = self.advance().text;
// Parse path segments, but stop before Snowflake option keywords
while self.check(TokenType::Slash) {
// Peek ahead to see if next identifier is a Snowflake option keyword
if self.current + 1 < self.tokens.len() {
let next = &self.tokens[self.current + 1];
if next.text.eq_ignore_ascii_case("FILE_FORMAT")
|| next.text.eq_ignore_ascii_case("PARTITION_TYPE")
|| next.text.eq_ignore_ascii_case("AUTO_REFRESH")
|| next.text.eq_ignore_ascii_case("LOCATION")
|| next.text.eq_ignore_ascii_case("PARTITION")
|| next.text.eq_ignore_ascii_case("WAREHOUSE")
{
// Consume the trailing slash before the keyword
self.skip();
path.push('/');
break;
}
}
self.skip();
path.push('/');
if self.is_identifier_token()
|| self.is_safe_keyword_as_identifier()
{
path.push_str(&self.advance().text);
}
}
path
} else if self.check(TokenType::Warehouse) {
self.advance().text
} else if self.is_identifier_token() || self.is_safe_keyword_as_identifier()
{
self.advance().text
} else {
// No valid value after =, retreat and let Hive parsing try
self.current = saved;
break;
};
all_with_properties.push((key, value));
} else if self.is_identifier_token()
|| self.is_safe_keyword_as_identifier()
|| self.check(TokenType::Warehouse)
{
// WAREHOUSE mywh (without =)
let value = self.advance().text;
all_with_properties.push((key, value));
} else {
// Not a Snowflake-style option (e.g., Hive LOCATION 'path' without =)
// Retreat and let Hive parsing try
self.current = saved;
break;
}
} else {
break;
}
}
}
// Parse MySQL table options: ENGINE=val, AUTO_INCREMENT=val, DEFAULT CHARSET=val, etc.
let mysql_table_options = if is_clickhouse {
Vec::new()
} else {
self.parse_mysql_table_options()
};
// Parse StarRocks ROLLUP property: ROLLUP (r1(col1, col2), r2(col1))
let rollup = if self.match_token(TokenType::Rollup) {
self.expect(TokenType::LParen)?;
let mut indices = Vec::new();
loop {
let name = self.expect_identifier_or_keyword_with_quoted()?;
let cols = if self.match_token(TokenType::LParen) {
let mut col_list = Vec::new();
loop {
col_list.push(self.expect_identifier_or_keyword_with_quoted()?);
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
col_list
} else {
Vec::new()
};
indices.push(crate::expressions::RollupIndex {
name,
expressions: cols,
});
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
Some(crate::expressions::RollupProperty {
expressions: indices,
})
} else {
None
};
// Parse Hive table properties: ROW FORMAT, STORED AS/BY, LOCATION, TBLPROPERTIES
let hive_properties = self.parse_hive_table_properties()?;
let is_teradata = matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Teradata)
);
// Handle ON COMMIT PRESERVE ROWS or ON COMMIT DELETE ROWS
// Also handle TSQL ON filegroup or ON filegroup (partition_column)
let (mut on_commit, on_property) = if is_teradata {
(None, None)
} else if self.match_token(TokenType::On) {
if self.match_token(TokenType::Commit) {
if self.match_keywords(&[TokenType::Preserve, TokenType::Rows]) {
(Some(OnCommit::PreserveRows), None)
} else if self.match_keywords(&[TokenType::Delete, TokenType::Rows]) {
(Some(OnCommit::DeleteRows), None)
} else {
return Err(
self.parse_error("Expected PRESERVE ROWS or DELETE ROWS after ON COMMIT")
);
}
} else {
// TSQL: ON filegroup or ON filegroup (partition_column)
// Parse filegroup name as schema which allows filegroup(column) syntax
let filegroup = self.parse_schema_identifier()?;
(
None,
Some(OnProperty {
this: Box::new(filegroup),
}),
)
}
} else {
(None, None)
};
// Parse table properties like DEFAULT COLLATE (BigQuery)
let mut table_properties = hive_properties;
// If COMMENT was found before WITH, add it to table_properties as SchemaCommentProperty
if let Some(comment_text) = pre_with_comment {
table_properties.push(Expression::SchemaCommentProperty(Box::new(
SchemaCommentProperty {
this: Box::new(Expression::Literal(Box::new(Literal::String(comment_text)))),
},
)));
}
if self.match_token(TokenType::Default) && self.match_token(TokenType::Collate) {
let collation = self.parse_primary()?;
table_properties.push(Expression::CollateProperty(Box::new(CollateProperty {
this: Box::new(collation),
default: Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
}))),
})));
}
// BigQuery: OPTIONS (key=value, ...) on table - comes after column definitions
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::BigQuery)
) {
if let Some(options_property) = self.parse_bigquery_options_property()? {
table_properties.push(options_property);
}
} else if self.match_identifier("OPTIONS") {
let options = self.parse_options_list()?;
table_properties.push(Expression::Properties(Box::new(Properties {
expressions: options,
})));
}
// Doris/StarRocks: PROPERTIES ('key'='value', ...) - comes after column definitions
let is_doris_starrocks = matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Doris)
| Some(crate::dialects::DialectType::StarRocks)
);
if is_doris_starrocks && self.match_identifier("PROPERTIES") {
// Use parse_options_list which handles 'key'='value' format
let props = self.parse_options_list()?;
if !props.is_empty() {
table_properties.push(Expression::Properties(Box::new(Properties {
expressions: props,
})));
}
}
// Redshift: Parse DISTKEY, SORTKEY, DISTSTYLE, BACKUP after column definitions
// These can appear in any order and multiple times
loop {
if self.match_identifier("DISTKEY") {
// DISTKEY(column)
if let Some(distkey) = self.parse_distkey()? {
table_properties.push(distkey);
}
} else if self.match_text_seq(&["COMPOUND", "SORTKEY"]) {
// COMPOUND SORTKEY(col1, col2, ...)
if let Some(sortkey) = self.parse_sortkey()? {
// Set compound flag
if let Expression::SortKeyProperty(mut skp) = sortkey {
skp.compound = Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
})));
table_properties.push(Expression::SortKeyProperty(skp));
}
}
} else if self.match_identifier("SORTKEY") {
// SORTKEY(col1, col2, ...)
if let Some(sortkey) = self.parse_sortkey()? {
table_properties.push(sortkey);
}
} else if self.match_identifier("DISTSTYLE") {
// DISTSTYLE ALL|EVEN|AUTO|KEY
if self.match_texts(&["ALL", "EVEN", "AUTO", "KEY"]) {
let style = self.previous().text.to_ascii_uppercase();
table_properties.push(Expression::DistStyleProperty(Box::new(
DistStyleProperty {
this: Box::new(Expression::Var(Box::new(Var { this: style }))),
},
)));
}
} else if self.match_identifier("BACKUP") {
// BACKUP YES|NO
if self.match_texts(&["YES", "NO"]) {
let value = self.previous().text.to_ascii_uppercase();
table_properties.push(Expression::BackupProperty(Box::new(BackupProperty {
this: Box::new(Expression::Var(Box::new(Var { this: value }))),
})));
}
} else {
break;
}
}
// Teradata: PRIMARY/UNIQUE/INDEX and PARTITION BY clauses after columns
if is_teradata {
loop {
// Consume optional comma separator between index specs (only if followed by an index keyword)
if self.check(TokenType::Comma) {
let saved_comma = self.current;
self.skip(); // consume comma
let is_index_keyword = self.check(TokenType::Unique)
|| self.check(TokenType::PrimaryKey)
|| self.check(TokenType::Index)
|| self.check(TokenType::No);
if !is_index_keyword {
self.current = saved_comma; // retreat
}
}
if self.match_token(TokenType::Unique) {
let primary = self.match_token(TokenType::PrimaryKey);
let amp = self.match_identifier("AMP");
self.match_token(TokenType::Index);
let params = if self.match_token(TokenType::LParen) {
let cols = self.parse_identifier_list()?;
self.expect(TokenType::RParen)?;
cols.into_iter()
.map(|id| {
Expression::boxed_column(Column {
name: id,
table: None,
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
})
})
.collect()
} else {
Vec::new()
};
table_properties.push(Expression::Index(Box::new(Index {
this: None,
table: None,
unique: true,
primary: if primary {
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
})))
} else {
None
},
amp: if amp {
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
})))
} else {
None
},
params,
})));
continue;
}
if self.match_token(TokenType::PrimaryKey) {
let amp = self.match_identifier("AMP");
self.match_token(TokenType::Index);
let params = if self.match_token(TokenType::LParen) {
let cols = self.parse_identifier_list()?;
self.expect(TokenType::RParen)?;
cols.into_iter()
.map(|id| {
Expression::boxed_column(Column {
name: id,
table: None,
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
})
})
.collect()
} else {
Vec::new()
};
table_properties.push(Expression::Index(Box::new(Index {
this: None,
table: None,
unique: false,
primary: Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
}))),
amp: if amp {
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
})))
} else {
None
},
params,
})));
continue;
}
if self.match_token(TokenType::Index) {
let params = if self.match_token(TokenType::LParen) {
let cols = self.parse_identifier_list()?;
self.expect(TokenType::RParen)?;
cols.into_iter()
.map(|id| {
Expression::boxed_column(Column {
name: id,
table: None,
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
})
})
.collect()
} else {
Vec::new()
};
table_properties.push(Expression::Index(Box::new(Index {
this: None,
table: None,
unique: false,
primary: None,
amp: None,
params,
})));
continue;
}
if self.match_keywords(&[TokenType::Partition, TokenType::By]) {
let expr = self.parse_primary()?;
table_properties.push(Expression::PartitionedByProperty(Box::new(
PartitionedByProperty {
this: Box::new(expr),
},
)));
continue;
}
break;
}
if on_commit.is_none()
&& self.check(TokenType::On)
&& self.check_next(TokenType::Commit)
{
self.skip(); // ON
self.skip(); // COMMIT
if self.match_keywords(&[TokenType::Preserve, TokenType::Rows]) {
on_commit = Some(OnCommit::PreserveRows);
} else if self.match_keywords(&[TokenType::Delete, TokenType::Rows]) {
on_commit = Some(OnCommit::DeleteRows);
} else {
return Err(
self.parse_error("Expected PRESERVE ROWS or DELETE ROWS after ON COMMIT")
);
}
}
}
// ClickHouse: table properties after column definitions
if is_clickhouse {
self.parse_clickhouse_table_properties(&mut table_properties)?;
}
// ClickHouse: EMPTY AS SELECT
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.match_identifier("EMPTY")
{
table_properties.push(Expression::Var(Box::new(Var {
this: "EMPTY".to_string(),
})));
}
// Handle AS SELECT after columns/WITH (CTAS with column definitions)
// When there are no column definitions, AS comes after PARTITION BY/CLUSTER BY/OPTIONS
let as_select = if !no_column_defs && self.match_token(TokenType::As) {
Some(self.parse_statement()?)
} else {
None
};
if is_clickhouse && as_select.is_some() {
self.parse_clickhouse_table_properties(&mut table_properties)?;
}
// Parse PARTITION BY RANGE/LIST/HASH(columns) for regular CREATE TABLE
let is_bigquery = matches!(
self.config.dialect,
Some(crate::dialects::DialectType::BigQuery)
);
if !is_teradata && (self.check(TokenType::Partition) || self.check(TokenType::PartitionBy))
{
let parsed_bigquery_partition = if is_bigquery {
if let Some(partition_property) = self.parse_bigquery_partition_by_property()? {
table_properties.push(partition_property);
true
} else {
false
}
} else {
false
};
if !parsed_bigquery_partition {
let saved = self.current;
let is_partition_by = if self.match_token(TokenType::PartitionBy) {
true
} else if self.match_token(TokenType::Partition) {
self.match_token(TokenType::By)
} else {
false
};
if is_partition_by {
let partition_kind = if self.check(TokenType::Range) {
self.skip();
Some("RANGE".to_string())
} else if self.check(TokenType::List) {
self.skip();
Some("LIST".to_string())
} else if (self.check(TokenType::Identifier) || self.check(TokenType::Var))
&& self.check_next(TokenType::LParen)
{
// Only treat identifier as partition method (like HASH) if followed by (
Some(self.advance().text.to_ascii_uppercase())
} else {
// No explicit partition method (RANGE/LIST/HASH), just PARTITION BY (cols)
None
};
// StarRocks/Doris: PARTITION BY func(), col (bare expressions without RANGE/LIST)
// When the partition_kind was consumed as an identifier that's actually a function call
// and the content after the parenthesized args includes a comma, it's a bare expression list
if is_doris_starrocks
&& partition_kind.is_some()
&& !matches!(
partition_kind.as_deref(),
Some("RANGE") | Some("LIST") | Some("HASH") | Some("KEY")
)
{
// Backtrack: re-parse as bare PARTITION BY with comma-separated expressions
let func_name = partition_kind.unwrap();
let mut raw_sql = format!("PARTITION BY {}", func_name);
// Helper closure for consuming parenthesized content with proper spacing
fn consume_parens(parser: &mut Parser, raw_sql: &mut String) {
if !parser.check(TokenType::LParen) {
return;
}
parser.advance();
raw_sql.push('(');
let mut depth = 1;
let mut last_type: Option<TokenType> = None;
while !parser.is_at_end() && depth > 0 {
let tok = parser.advance();
if tok.token_type == TokenType::LParen {
depth += 1;
} else if tok.token_type == TokenType::RParen {
depth -= 1;
if depth == 0 {
break;
}
}
// Add space after commas
if matches!(last_type, Some(TokenType::Comma)) {
raw_sql.push(' ');
}
if tok.token_type == TokenType::String {
raw_sql.push('\'');
raw_sql.push_str(&tok.text);
raw_sql.push('\'');
} else {
raw_sql.push_str(&tok.text);
}
last_type = Some(tok.token_type.clone());
}
raw_sql.push(')');
}
consume_parens(self, &mut raw_sql);
// Consume more comma-separated expressions
while self.match_token(TokenType::Comma) {
raw_sql.push_str(", ");
let tok = self.advance();
raw_sql.push_str(&tok.text);
consume_parens(self, &mut raw_sql);
}
table_properties.push(Expression::Raw(Raw { sql: raw_sql }));
} else
// For Doris/StarRocks/MySQL RANGE/LIST, use structured parsing
if (is_doris_starrocks
|| matches!(
self.config.dialect,
Some(crate::dialects::DialectType::MySQL)
| Some(crate::dialects::DialectType::SingleStore)
| Some(crate::dialects::DialectType::TiDB)
))
&& matches!(partition_kind.as_deref(), Some("RANGE") | Some("LIST"))
{
let partition_expr = self.parse_doris_partition_by_range_or_list(
partition_kind
.as_ref()
.map(|s| s.as_str())
.unwrap_or("RANGE"),
)?;
table_properties.push(partition_expr);
} else {
// Generic raw SQL parsing for other dialects
let no_partition_kind = partition_kind.is_none();
let mut raw_sql = match partition_kind {
Some(kind) => format!("PARTITION BY {}", kind),
None => "PARTITION BY ".to_string(),
};
if self.check(TokenType::LParen) {
self.skip();
raw_sql.push('(');
let mut depth = 1;
let mut last_tok_type: Option<TokenType> = None;
while !self.is_at_end() && depth > 0 {
let tok = self.advance();
if tok.token_type == TokenType::LParen {
depth += 1;
} else if tok.token_type == TokenType::RParen {
depth -= 1;
if depth == 0 {
break;
}
}
// Add space before token if needed for proper formatting
let needs_space = match (&last_tok_type, &tok.token_type) {
// Add space after comma
(Some(TokenType::Comma), _) => true,
// Add space after identifiers/keywords before other identifiers/keywords
(Some(TokenType::Identifier), TokenType::Identifier) => true,
_ => false,
};
if needs_space {
raw_sql.push(' ');
}
// Handle string literals - preserve quotes
if tok.token_type == TokenType::String {
raw_sql.push('\'');
raw_sql.push_str(&tok.text);
raw_sql.push('\'');
} else {
raw_sql.push_str(&tok.text);
}
last_tok_type = Some(tok.token_type.clone());
}
raw_sql.push(')');
} else if no_partition_kind {
// Bare PARTITION BY expression list without a partition method
let mut first = true;
while !self.is_at_end()
&& !self.check(TokenType::Cluster)
&& !self.check(TokenType::As)
&& !self.check(TokenType::Semicolon)
&& !self.check(TokenType::RParen)
&& !self.check_identifier("OPTIONS")
{
if !first {
raw_sql.push_str(", ");
}
first = false;
let tok = self.advance();
raw_sql.push_str(&tok.text);
// Handle function calls: PARTITION BY DATE(col)
if self.check(TokenType::LParen) {
self.skip();
raw_sql.push('(');
let mut depth = 1;
while !self.is_at_end() && depth > 0 {
let t = self.advance();
if t.token_type == TokenType::LParen {
depth += 1;
} else if t.token_type == TokenType::RParen {
depth -= 1;
if depth == 0 {
break;
}
}
raw_sql.push_str(&t.text);
}
raw_sql.push(')');
}
if !self.match_token(TokenType::Comma) {
break;
}
}
}
table_properties.push(Expression::Raw(Raw { sql: raw_sql }));
}
} else {
self.current = saved;
}
}
}
// Parse CLUSTER BY (BigQuery) after PARTITION BY
if is_bigquery {
if let Some(cluster_property) = self.parse_bigquery_cluster_by_property()? {
table_properties.push(cluster_property);
}
} else if self.match_keywords(&[TokenType::Cluster, TokenType::By]) {
// Handle both: CLUSTER BY c1, c2 and CLUSTER BY (c1, c2)
let parens = self.match_token(TokenType::LParen);
let mut cluster_names = Vec::new();
loop {
let name = self.expect_identifier_or_keyword()?;
cluster_names.push(name);
if !self.match_token(TokenType::Comma) {
break;
}
}
if parens {
self.expect(TokenType::RParen)?;
}
let inner = cluster_names.join(", ");
let sql = if parens {
format!("CLUSTER BY ({})", inner)
} else {
format!("CLUSTER BY {}", inner)
};
table_properties.push(Expression::Raw(Raw { sql }));
}
// No-column-defs path: OPTIONS and AS SELECT come after PARTITION BY / CLUSTER BY
if no_column_defs {
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::BigQuery)
) {
if let Some(options_property) = self.parse_bigquery_options_property()? {
table_properties.push(options_property);
}
} else if self.match_identifier("OPTIONS") {
let options = self.parse_options_list()?;
table_properties.push(Expression::Properties(Box::new(Properties {
expressions: options,
})));
}
}
let as_select = if no_column_defs && self.match_token(TokenType::As) {
Some(self.parse_statement()?)
} else {
as_select
};
// For EXTERNAL tables, parse additional Snowflake options that may come after PARTITION BY
// (location=@s2/logs/, partition_type = user_specified, file_format = (...), etc.)
if is_special_modifier {
while !self.is_at_end()
&& !self.check(TokenType::As)
&& !self.check(TokenType::Semicolon)
{
let is_snowflake_option = self.check(TokenType::Warehouse)
|| self.check_identifier("TARGET_LAG")
|| self.check_identifier("CATALOG")
|| self.check_identifier("EXTERNAL_VOLUME")
|| self.check_identifier("BASE_LOCATION")
|| self.check_identifier("REFRESH_MODE")
|| self.check_identifier("INITIALIZE")
|| self.check_identifier("DATA_RETENTION_TIME_IN_DAYS")
|| self.check_identifier("LOCATION")
|| self.check_identifier("PARTITION_TYPE")
|| self.check_identifier("FILE_FORMAT")
|| self.check_identifier("AUTO_REFRESH");
if is_snowflake_option {
let key = self.advance().text;
if self.match_token(TokenType::Eq) {
let value = if self.check(TokenType::LParen) {
// Parenthesized option list
self.skip();
let mut options = String::from("(");
let mut depth = 1;
while !self.is_at_end() && depth > 0 {
let tok = self.advance();
if tok.token_type == TokenType::LParen {
depth += 1;
} else if tok.token_type == TokenType::RParen {
depth -= 1;
}
if !options.ends_with('(')
&& !options.ends_with(' ')
&& tok.token_type != TokenType::RParen
{
options.push(' ');
}
options.push_str(&tok.text);
}
options
} else if self.check(TokenType::String) {
let v = format!("'{}'", self.peek().text);
self.skip();
v
} else if self.check(TokenType::DAt) {
// Stage path like @s1/logs/
self.skip();
let mut path = String::from("@");
if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
path.push_str(&self.advance().text);
}
while self.check(TokenType::Slash) {
if self.current + 1 < self.tokens.len() {
let next = &self.tokens[self.current + 1];
if next.text.eq_ignore_ascii_case("FILE_FORMAT")
|| next.text.eq_ignore_ascii_case("PARTITION_TYPE")
|| next.text.eq_ignore_ascii_case("AUTO_REFRESH")
|| next.text.eq_ignore_ascii_case("LOCATION")
|| next.text.eq_ignore_ascii_case("PARTITION")
|| next.text.eq_ignore_ascii_case("WAREHOUSE")
{
self.skip();
path.push('/');
break;
}
}
self.skip();
path.push('/');
if self.is_identifier_token()
|| self.is_safe_keyword_as_identifier()
{
path.push_str(&self.advance().text);
}
}
path
} else if self.check(TokenType::Var) && self.peek().text.starts_with('@') {
let mut path = self.advance().text;
while self.check(TokenType::Slash) {
if self.current + 1 < self.tokens.len() {
let next = &self.tokens[self.current + 1];
if next.text.eq_ignore_ascii_case("FILE_FORMAT")
|| next.text.eq_ignore_ascii_case("PARTITION_TYPE")
|| next.text.eq_ignore_ascii_case("AUTO_REFRESH")
|| next.text.eq_ignore_ascii_case("LOCATION")
|| next.text.eq_ignore_ascii_case("PARTITION")
|| next.text.eq_ignore_ascii_case("WAREHOUSE")
{
self.skip();
path.push('/');
break;
}
}
self.skip();
path.push('/');
if self.is_identifier_token()
|| self.is_safe_keyword_as_identifier()
{
path.push_str(&self.advance().text);
}
}
path
} else if self.check(TokenType::Warehouse) {
self.advance().text
} else if self.is_identifier_token() || self.is_safe_keyword_as_identifier()
{
self.advance().text
} else {
break;
};
all_with_properties.push((key, value));
} else if self.is_identifier_token()
|| self.is_safe_keyword_as_identifier()
|| self.check(TokenType::Warehouse)
{
let value = self.advance().text;
all_with_properties.push((key, value));
}
} else {
break;
}
}
}
// Snowflake: STAGE_FILE_FORMAT = (...), STAGE_COPY_OPTIONS = (...)
while self.check_identifier("STAGE_FILE_FORMAT")
|| self.check_identifier("STAGE_COPY_OPTIONS")
{
let key = self.advance().text.to_ascii_uppercase();
self.match_token(TokenType::Eq);
// Consume the parenthesized options as raw text
if self.match_token(TokenType::LParen) {
let mut raw = format!("{} = (", key);
let mut paren_depth = 1i32;
while !self.is_at_end() && paren_depth > 0 {
let token = self.advance();
if token.token_type == TokenType::LParen {
paren_depth += 1;
} else if token.token_type == TokenType::RParen {
paren_depth -= 1;
if paren_depth == 0 {
break;
}
}
if token.token_type == TokenType::String {
raw.push_str(&format!("'{}'", token.text));
} else {
raw.push_str(&token.text);
}
// Add space between tokens (but not before closing paren)
if paren_depth > 0 {
raw.push(' ');
}
}
raw.push(')');
table_properties.push(Expression::Raw(Raw { sql: raw }));
}
}
// Parse TSQL table-level WITH(SYSTEM_VERSIONING=ON(...)) after columns
// This is different from the earlier WITH properties parsing.
// TSQL uses WITH(...) after columns for system versioning.
let post_table_properties = self.parse_post_table_properties()?;
// PostgreSQL: INHERITS (parent1, parent2, ...)
let inherits = if self.match_identifier("INHERITS") {
self.expect(TokenType::LParen)?;
let mut parents = Vec::new();
loop {
parents.push(self.parse_table_ref()?);
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
parents
} else {
Vec::new()
};
Ok(Expression::CreateTable(Box::new(CreateTable {
name,
on_cluster,
columns,
constraints,
if_not_exists,
temporary,
or_replace,
table_modifier: table_modifier.map(|s| s.to_string()),
as_select,
as_select_parenthesized: false,
on_commit,
clone_source: None,
clone_at_clause: None,
shallow_clone: false,
is_copy: false,
leading_comments,
with_properties: all_with_properties,
teradata_post_name_options: teradata_post_name_options.clone(),
with_data: None,
with_statistics: None,
teradata_indexes: Vec::new(),
with_cte: None,
properties: table_properties,
partition_of: None,
post_table_properties,
mysql_table_options,
inherits,
on_property,
copy_grants,
using_template: None,
rollup,
uuid,
with_partition_columns: Vec::new(),
with_connection: None,
})))
}
/// Parse CREATE TABLE ... PARTITION OF parent_table [(cols)] [FOR VALUES spec | DEFAULT] [PARTITION BY ...]
fn parse_create_table_partition_of(
&mut self,
name: TableRef,
if_not_exists: bool,
temporary: bool,
or_replace: bool,
table_modifier: Option<&str>,
leading_comments: Vec<String>,
) -> Result<Expression> {
// Parse parent table name
let parent_table = self.parse_table_ref()?;
// Optionally parse column constraints in parens: (unitsales DEFAULT 0) or (CONSTRAINT ...)
// This must come before FOR VALUES or DEFAULT. We distinguish from other uses
// by checking if the first token after LParen is CONSTRAINT or an identifier
// that is not a string literal.
let (columns, constraints) = if self.check(TokenType::LParen) {
// Peek ahead: current is LParen, current+1 is first token inside parens
let first_inside = self.current + 1;
// Check if this is a partition column specification: (colname DEFAULT value)
// Column names tokenize as Var (unquoted) or QuotedIdentifier (quoted)
let is_column_defs = first_inside < self.tokens.len()
&& (self.tokens[first_inside].token_type == TokenType::Constraint
|| ((self.tokens[first_inside].token_type == TokenType::Var
|| self.tokens[first_inside].token_type == TokenType::QuotedIdentifier
|| self.tokens[first_inside].token_type == TokenType::Identifier)
&& first_inside + 1 < self.tokens.len()
&& self.tokens[first_inside + 1].token_type == TokenType::Default));
if is_column_defs {
self.skip(); // consume LParen
// Use special parsing for partition column specs - they don't have data types,
// just column names with constraint overrides like DEFAULT
let (cols, constrs) = self.parse_partition_column_specs()?;
self.expect(TokenType::RParen)?;
(cols, constrs)
} else {
(Vec::new(), Vec::new())
}
} else {
(Vec::new(), Vec::new())
};
// Parse DEFAULT or FOR VALUES spec
let partition_bound: Expression = if self.match_token(TokenType::Default) {
// DEFAULT partition
Expression::Var(Box::new(Var {
this: "DEFAULT".to_string(),
}))
} else if self.match_token(TokenType::For) {
// FOR VALUES ...
self.expect(TokenType::Values)?;
self.parse_partition_bound_spec()?
} else {
// Neither DEFAULT nor FOR VALUES - could be an error
// but we'll be lenient and just create a DEFAULT
Expression::Var(Box::new(Var {
this: "DEFAULT".to_string(),
}))
};
let partition_of_expr =
Expression::PartitionedOfProperty(Box::new(PartitionedOfProperty {
this: Box::new(Expression::Table(Box::new(parent_table))),
expression: Box::new(partition_bound),
}));
// Optionally parse trailing PARTITION BY RANGE/LIST/HASH(columns)
let mut table_properties: Vec<Expression> = Vec::new();
if self.match_token(TokenType::Partition) || self.match_token(TokenType::PartitionBy) {
// Could be PARTITION BY or just PartitionBy token
if self.previous().token_type == TokenType::Partition {
self.expect(TokenType::By)?;
}
// Parse RANGE/LIST/HASH(columns)
let partition_kind = if self.check(TokenType::Identifier) || self.check(TokenType::Var)
{
let kind_text = self.advance().text.to_ascii_uppercase();
kind_text
} else if self.check(TokenType::Range) {
self.skip();
"RANGE".to_string()
} else if self.check(TokenType::List) {
self.skip();
"LIST".to_string()
} else {
"RANGE".to_string()
};
// Parse (columns)
let mut raw_sql = format!("PARTITION BY {}", partition_kind);
if self.check(TokenType::LParen) {
self.skip(); // consume LParen
raw_sql.push('(');
let mut depth = 1;
while !self.is_at_end() && depth > 0 {
let tok = self.advance();
if tok.token_type == TokenType::LParen {
depth += 1;
} else if tok.token_type == TokenType::RParen {
depth -= 1;
if depth == 0 {
break;
}
}
raw_sql.push_str(&tok.text);
}
raw_sql.push(')');
}
table_properties.push(Expression::Raw(Raw { sql: raw_sql }));
}
Ok(Expression::CreateTable(Box::new(CreateTable {
name,
on_cluster: None,
columns,
constraints,
if_not_exists,
temporary,
or_replace,
table_modifier: table_modifier.map(|s| s.to_string()),
as_select: None,
as_select_parenthesized: false,
on_commit: None,
clone_source: None,
clone_at_clause: None,
shallow_clone: false,
is_copy: false,
leading_comments,
with_properties: Vec::new(),
teradata_post_name_options: Vec::new(),
with_data: None,
with_statistics: None,
teradata_indexes: Vec::new(),
with_cte: None,
properties: table_properties,
partition_of: Some(partition_of_expr),
post_table_properties: Vec::new(),
mysql_table_options: Vec::new(),
inherits: Vec::new(),
on_property: None,
copy_grants: false,
using_template: None,
rollup: None,
uuid: None,
with_partition_columns: Vec::new(),
with_connection: None,
})))
}
/// Parse partition bound spec for PARTITION OF: IN (...), FROM (...) TO (...), or WITH (MODULUS n, REMAINDER n)
fn parse_partition_bound_spec(&mut self) -> Result<Expression> {
if self.match_token(TokenType::In) {
// IN (val, val, ...)
self.expect(TokenType::LParen)?;
let mut values = Vec::new();
loop {
let val = self.parse_expression()?;
values.push(val);
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
// Use Tuple for multiple values (generator strips parens for partition bounds)
let this_expr = if values.len() == 1 {
values.into_iter().next().unwrap()
} else {
Expression::Tuple(Box::new(Tuple {
expressions: values,
}))
};
Ok(Expression::PartitionBoundSpec(Box::new(
PartitionBoundSpec {
this: Some(Box::new(this_expr)),
expression: None,
from_expressions: None,
to_expressions: None,
},
)))
} else if self.match_token(TokenType::From) {
// FROM (val, ...) TO (val, ...)
self.expect(TokenType::LParen)?;
let mut from_vals = Vec::new();
loop {
let val = self.parse_partition_bound_value()?;
from_vals.push(val);
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
self.expect(TokenType::To)?;
self.expect(TokenType::LParen)?;
let mut to_vals = Vec::new();
loop {
let val = self.parse_partition_bound_value()?;
to_vals.push(val);
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
let from_expr = if from_vals.len() == 1 {
from_vals.into_iter().next().unwrap()
} else {
Expression::Tuple(Box::new(Tuple {
expressions: from_vals,
}))
};
let to_expr = if to_vals.len() == 1 {
to_vals.into_iter().next().unwrap()
} else {
Expression::Tuple(Box::new(Tuple {
expressions: to_vals,
}))
};
Ok(Expression::PartitionBoundSpec(Box::new(
PartitionBoundSpec {
this: None,
expression: None,
from_expressions: Some(Box::new(from_expr)),
to_expressions: Some(Box::new(to_expr)),
},
)))
} else if self.match_token(TokenType::With) {
// WITH (MODULUS n, REMAINDER n)
self.expect(TokenType::LParen)?;
self.match_text_seq(&["MODULUS"]);
let modulus = self.parse_expression()?;
self.expect(TokenType::Comma)?;
self.match_text_seq(&["REMAINDER"]);
let remainder = self.parse_expression()?;
self.expect(TokenType::RParen)?;
Ok(Expression::PartitionBoundSpec(Box::new(
PartitionBoundSpec {
this: Some(Box::new(modulus)),
expression: Some(Box::new(remainder)),
from_expressions: None,
to_expressions: None,
},
)))
} else {
Err(self.parse_error("Expected IN, FROM, or WITH after FOR VALUES in PARTITION OF"))
}
}
/// Parse a single partition bound value (number, string, MINVALUE, MAXVALUE)
fn parse_partition_bound_value(&mut self) -> Result<Expression> {
if self.match_token(TokenType::Minvalue) {
Ok(Expression::Var(Box::new(Var {
this: "MINVALUE".to_string(),
})))
} else if self.match_token(TokenType::Maxvalue) {
Ok(Expression::Var(Box::new(Var {
this: "MAXVALUE".to_string(),
})))
} else {
self.parse_expression()
}
}
/// Parse column specifications for PostgreSQL PARTITION OF syntax.
/// Unlike regular column definitions, these don't have data types - just column names
/// with constraint overrides like DEFAULT, NOT NULL, or table-level CONSTRAINT clauses.
/// Example: (unitsales DEFAULT 0) or (CONSTRAINT check_date CHECK (logdate >= '2016-07-01'))
fn parse_partition_column_specs(&mut self) -> Result<(Vec<ColumnDef>, Vec<TableConstraint>)> {
let mut columns = Vec::new();
let mut constraints = Vec::new();
loop {
// Check for table-level constraint (CONSTRAINT name ...)
if self.check(TokenType::Constraint) {
constraints.push(self.parse_table_constraint()?);
} else if self.check(TokenType::PrimaryKey)
|| self.check(TokenType::ForeignKey)
|| self.check(TokenType::Unique)
|| self.check(TokenType::Check)
|| self.check(TokenType::Exclude)
{
constraints.push(self.parse_table_constraint()?);
} else {
// Parse column name with optional constraints (no data type)
columns.push(self.parse_partition_column_spec()?);
}
if !self.match_token(TokenType::Comma) {
break;
}
// ClickHouse allows a trailing comma before the closing ')'
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::RParen)
{
break;
}
}
Ok((columns, constraints))
}
/// Parse a single partition column specification: column_name [DEFAULT value] [NOT NULL] [NULL] [WITH OPTIONS ...]
fn parse_partition_column_spec(&mut self) -> Result<ColumnDef> {
// Parse column name
let name = self.expect_identifier_or_safe_keyword_with_quoted()?;
// Create column def with Unknown data type (data type comes from parent table)
let mut col_def = ColumnDef::new(name.name.clone(), DataType::Unknown);
col_def.name = name;
// Parse column constraints (no data type expected)
loop {
if self.match_token(TokenType::Default) {
// DEFAULT value
let default_val = self.parse_expression()?;
col_def.default = Some(default_val.clone());
col_def
.constraints
.push(ColumnConstraint::Default(default_val));
col_def.constraint_order.push(ConstraintType::Default);
} else if self.match_keywords(&[TokenType::Not, TokenType::Null]) {
col_def.nullable = Some(false);
col_def.constraint_order.push(ConstraintType::NotNull);
} else if self.match_token(TokenType::Null) {
col_def.nullable = Some(true);
col_def.constraint_order.push(ConstraintType::Null);
} else if self.match_token(TokenType::Constraint) {
// Inline CONSTRAINT name ... for this column
let constraint_name = self.expect_identifier_or_safe_keyword()?;
if self.match_keywords(&[TokenType::Not, TokenType::Null]) {
col_def.nullable = Some(false);
col_def.not_null_constraint_name = Some(constraint_name);
col_def.constraint_order.push(ConstraintType::NotNull);
} else if self.match_token(TokenType::Check) {
col_def.check_constraint_name = Some(constraint_name);
if self.match_token(TokenType::LParen) {
let check_expr = self.parse_expression()?;
self.expect(TokenType::RParen)?;
col_def
.constraints
.push(ColumnConstraint::Check(check_expr));
}
col_def.constraint_order.push(ConstraintType::Check);
} else if self.match_token(TokenType::Default) {
let default_val = self.parse_expression()?;
col_def.default = Some(default_val.clone());
col_def
.constraints
.push(ColumnConstraint::Default(default_val));
col_def.constraint_order.push(ConstraintType::Default);
}
} else if self.match_text_seq(&["WITH", "OPTIONS"]) {
// PostgreSQL: WITH OPTIONS allows specifying more options
// For now, just skip this - it's rarely used
break;
} else {
break;
}
}
Ok(col_def)
}
/// Parse WITH properties for CREATE TABLE (e.g., WITH (FORMAT='parquet', x='2'))
/// Returns a list of (key, value) pairs
fn parse_with_properties(&mut self) -> Result<Vec<(String, String)>> {
self.expect(TokenType::LParen)?;
let mut properties = Vec::new();
loop {
if self.check(TokenType::RParen) {
break;
}
// Parse property name (can be keywords like FORMAT, TABLE_FORMAT)
let mut key = self.expect_identifier_or_keyword()?;
// Handle multi-word keys like "PARTITIONED BY" -> "PARTITIONED_BY"
if key.eq_ignore_ascii_case("PARTITIONED") && self.check(TokenType::By) {
self.skip(); // consume BY
key = "PARTITIONED_BY".to_string();
}
// Expect = or special case for PARTITIONED_BY=(...)
self.expect(TokenType::Eq)?;
// Parse property value - can be string, identifier, or parenthesized expression
let value = if self.check(TokenType::String) {
// Store string with quotes to preserve format
let val = format!("'{}'", self.peek().text);
self.skip();
val
} else if self.match_token(TokenType::LParen) {
// Handle PARTITIONED_BY=(x INT, y INT) or similar
let mut depth = 1;
let mut result = String::from("(");
let mut need_space = false;
while !self.is_at_end() && depth > 0 {
if self.check(TokenType::LParen) {
depth += 1;
} else if self.check(TokenType::RParen) {
depth -= 1;
if depth == 0 {
break;
}
}
let token = self.peek();
let text = &token.text;
let token_type = token.token_type;
// Determine if we need a space before this token
let is_punctuation = matches!(
token_type,
TokenType::Comma | TokenType::LParen | TokenType::RParen
);
if need_space && !is_punctuation {
result.push(' ');
}
result.push_str(text);
// Determine if we need a space after this token
need_space = token_type == TokenType::Comma
|| (!is_punctuation
&& !matches!(
token_type,
TokenType::LParen | TokenType::RParen | TokenType::Comma
));
self.skip();
}
self.expect(TokenType::RParen)?;
result.push(')');
result
} else if self.check_identifier("ARRAY")
&& self
.peek_nth(1)
.is_some_and(|t| t.token_type == TokenType::LBracket)
{
// Handle ARRAY['value', 'value', ...] syntax (Athena/Presto)
let mut result = self.advance().text.clone(); // consume ARRAY
self.expect(TokenType::LBracket)?;
result.push('[');
let mut first = true;
while !self.is_at_end() && !self.check(TokenType::RBracket) {
if !first {
if self.match_token(TokenType::Comma) {
result.push_str(", ");
} else {
break;
}
}
first = false;
// Parse array element (usually a string)
if self.check(TokenType::String) {
result.push('\'');
result.push_str(&self.advance().text);
result.push('\'');
} else if self.is_identifier_token() {
result.push_str(&self.advance().text);
} else {
break;
}
}
self.expect(TokenType::RBracket)?;
result.push(']');
result
} else if self.check(TokenType::Number) {
// Numeric value (e.g., bucket_count=64)
self.advance().text.clone()
} else {
// Just an identifier or keyword (e.g., allow_page_locks=on)
self.expect_identifier_or_keyword()?
};
properties.push((key, value));
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
Ok(properties)
}
/// Parse column definitions and table constraints
fn parse_column_definitions(&mut self) -> Result<(Vec<ColumnDef>, Vec<TableConstraint>)> {
let mut columns = Vec::new();
let mut constraints = Vec::new();
loop {
if self.check(TokenType::RParen) {
break;
}
// Check for LIKE clause (PostgreSQL)
if self.check(TokenType::Like) {
constraints.push(self.parse_like_clause()?);
}
// Check for table-level constraint
// For CHECK, only treat as constraint if followed by '(' (NOT in ClickHouse — there
// CHECK/ASSUME without CONSTRAINT keyword is not supported, and 'check' can be a column name).
// Otherwise, 'check' is a column name (e.g., CREATE TABLE t (check INT)).
else if self.check(TokenType::Constraint)
|| self.check(TokenType::PrimaryKey)
|| self.check(TokenType::ForeignKey)
|| self.check(TokenType::Unique)
|| (self.check(TokenType::Check)
&& !matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
)
&& self
.peek_nth(1)
.map_or(false, |t| t.token_type == TokenType::LParen))
|| self.check(TokenType::Exclude)
{
constraints.push(self.parse_table_constraint()?);
} else if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::Index)
{
// ClickHouse: INDEX name expr TYPE type_func(args) GRANULARITY n
self.skip(); // consume INDEX
let name = self.expect_identifier_or_keyword_with_quoted()?;
// Use parse_conjunction to handle comparisons like c0 < (SELECT _table)
let expression = self.parse_conjunction()?.ok_or_else(|| {
self.parse_error("Expected expression in ClickHouse INDEX definition")
})?;
let index_type = if self.match_token(TokenType::Type) {
// Parse function or identifier for type (e.g., bloom_filter(0.001), set(100), minmax)
// Handle keywords like 'set' that are tokenized as TokenType::Set
if let Some(func) = self.parse_function()? {
Some(Box::new(func))
} else if !self.check(TokenType::Identifier)
&& !self.check(TokenType::Var)
&& !self.is_at_end()
{
// Handle keywords as index type names (e.g., set, minmax)
let type_name = self.advance().text.clone();
if self.check(TokenType::LParen) {
// It's a function call like set(100)
self.skip(); // consume (
let mut args = Vec::new();
if !self.check(TokenType::RParen) {
args.push(self.parse_expression()?);
while self.match_token(TokenType::Comma) {
args.push(self.parse_expression()?);
}
}
self.expect(TokenType::RParen)?;
Some(Box::new(Expression::Function(Box::new(Function::new(
type_name, args,
)))))
} else {
// Just an identifier
Some(Box::new(Expression::Identifier(Identifier::new(type_name))))
}
} else if let Some(id) = self.parse_id_var()? {
Some(Box::new(id))
} else {
None
}
} else {
None
};
let granularity = if self.match_identifier("GRANULARITY") {
let gran_val = self.parse_expression()?;
Some(Box::new(gran_val))
} else {
None
};
constraints.push(TableConstraint::Index {
name: Some(name),
columns: Vec::new(),
kind: None,
modifiers: ConstraintModifiers::default(),
use_key_keyword: false,
expression: Some(Box::new(expression)),
index_type,
granularity,
});
} else if !matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && (self.check(TokenType::Index)
|| self.check(TokenType::Key)
|| self.check_identifier("FULLTEXT")
|| self.check_identifier("SPATIAL"))
{
// INDEX/KEY constraint (MySQL). Guard KEY <type> as a normal column definition
// (e.g. ClickHouse: `key UInt64`).
let looks_like_key_constraint = if self.check(TokenType::Key) {
self.check_next(TokenType::LParen)
|| ((self.check_next(TokenType::Identifier)
|| self.check_next(TokenType::Var)
|| self.check_next(TokenType::QuotedIdentifier))
&& self.current + 2 < self.tokens.len()
&& self.tokens[self.current + 2].token_type == TokenType::LParen)
} else {
true
};
if looks_like_key_constraint {
constraints.push(self.parse_index_table_constraint()?);
} else {
columns.push(self.parse_column_def()?);
}
} else if self.check_identifier("PERIOD") {
// TSQL: PERIOD FOR SYSTEM_TIME (start_col, end_col)
if let Some(period_constraint) =
self.parse_period_for_system_time_table_constraint()?
{
constraints.push(period_constraint);
} else {
// Not actually PERIOD FOR SYSTEM_TIME, treat as column definition
columns.push(self.parse_column_def()?);
}
} else if self.check_identifier("INITIALLY") {
// PostgreSQL: INITIALLY DEFERRED / INITIALLY IMMEDIATE as table-level setting
self.skip(); // consume INITIALLY
if self.match_identifier("DEFERRED") {
constraints.push(TableConstraint::InitiallyDeferred { deferred: true });
} else if self.match_identifier("IMMEDIATE") {
constraints.push(TableConstraint::InitiallyDeferred { deferred: false });
} else {
return Err(self.parse_error("Expected DEFERRED or IMMEDIATE after INITIALLY"));
}
} else if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check_identifier("PROJECTION")
{
// ClickHouse: PROJECTION name (SELECT ...) or PROJECTION name INDEX expr TYPE type_name
self.skip(); // consume PROJECTION
let name = self.expect_identifier_or_keyword_with_quoted()?;
if self.match_token(TokenType::LParen) {
let expression = self.parse_statement()?;
self.expect(TokenType::RParen)?;
// ClickHouse: PROJECTION name (SELECT ...) WITH SETTINGS (key=value, ...)
if self.check(TokenType::With)
&& self.current + 1 < self.tokens.len()
&& self.tokens[self.current + 1].token_type == TokenType::Settings
{
self.skip(); // consume WITH
self.skip(); // consume SETTINGS
if self.match_token(TokenType::LParen) {
// Consume key=value pairs
loop {
if self.check(TokenType::RParen) {
break;
}
if self.is_identifier_token()
|| self.is_safe_keyword_as_identifier()
{
self.skip(); // key
}
if self.match_token(TokenType::Eq) {
let _ = self.parse_primary()?; // value
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
}
}
constraints.push(TableConstraint::Projection { name, expression });
} else if self.match_token(TokenType::Index) {
// PROJECTION name INDEX expr TYPE type_name
let expr = self.parse_bitwise()?.ok_or_else(|| {
self.parse_error(
"Expected expression in ClickHouse PROJECTION INDEX definition",
)
})?;
let type_str = if self.match_token(TokenType::Type) {
if !self.is_at_end()
&& !self.check(TokenType::Comma)
&& !self.check(TokenType::RParen)
{
self.advance().text.clone()
} else {
String::new()
}
} else {
String::new()
};
let raw_sql = if type_str.is_empty() {
format!("INDEX {} ", expr)
} else {
format!("INDEX {} TYPE {}", expr, type_str)
};
constraints.push(TableConstraint::Projection {
name,
expression: Expression::Raw(Raw { sql: raw_sql }),
});
} else {
constraints.push(TableConstraint::Projection {
name,
expression: Expression::Null(Null),
});
}
} else {
// Parse column definition
columns.push(self.parse_column_def()?);
}
if !self.match_token(TokenType::Comma) {
break;
}
// ClickHouse: allow trailing comma before closing paren
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::RParen)
{
break;
}
}
Ok((columns, constraints))
}
/// Parse LIKE clause in CREATE TABLE: LIKE source_table [INCLUDING|EXCLUDING options]
fn parse_like_clause(&mut self) -> Result<TableConstraint> {
self.expect(TokenType::Like)?;
let source = self.parse_table_ref()?;
let mut options = Vec::new();
// Parse optional INCLUDING/EXCLUDING modifiers
loop {
if self.match_identifier("INCLUDING") {
let prop = self.expect_identifier_or_keyword()?.to_ascii_uppercase();
options.push((LikeOptionAction::Including, prop));
} else if self.match_identifier("EXCLUDING") {
let prop = self.expect_identifier_or_keyword()?.to_ascii_uppercase();
options.push((LikeOptionAction::Excluding, prop));
} else {
break;
}
}
Ok(TableConstraint::Like { source, options })
}
/// Parse a single column definition
fn parse_column_def(&mut self) -> Result<ColumnDef> {
// Column names can be keywords like 'end', 'truncate', 'view', etc.
// ClickHouse allows any keyword as column name (from, select, etc.)
let mut name = if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
self.expect_identifier_or_keyword_with_quoted()?
} else {
self.expect_identifier_or_safe_keyword_with_quoted()?
};
// ClickHouse: Nested column names like n.b for Nested() columns
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
while self.match_token(TokenType::Dot) {
let sub = self.expect_identifier_or_safe_keyword_with_quoted()?;
name = Identifier {
name: format!("{}.{}", name.name, sub.name),
quoted: name.quoted,
trailing_comments: sub.trailing_comments,
span: None,
};
}
}
// TSQL computed columns have no data type: column_name AS (expression) [PERSISTED]
// Check if AS follows immediately (no data type)
if self.check(TokenType::As) {
let mut col_def = ColumnDef::new(
name.name.clone(),
DataType::Custom {
name: String::new(),
},
);
col_def.name = name;
// Consume AS and parse computed column expression
self.skip(); // consume AS
if self.check(TokenType::LParen) {
self.parse_as_computed_column(&mut col_def)?;
}
return Ok(col_def);
}
// SQLite allows column definitions without types: CREATE TABLE t (x, y)
// ClickHouse allows typeless columns with DEFAULT/MATERIALIZED/ALIAS/EPHEMERAL
// Check if the next token indicates no type (comma, rparen, or constraint keyword)
let no_type = self.check(TokenType::Comma)
|| self.check(TokenType::RParen)
|| (matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && (self.check(TokenType::Default)
|| self.check(TokenType::Materialized)
|| self.check_identifier("ALIAS")
|| self.check_identifier("EPHEMERAL")));
let data_type = if no_type {
// No type specified - use empty custom type
DataType::Custom {
name: String::new(),
}
} else {
self.parse_data_type()?
};
let mut col_def = ColumnDef::new(name.name.clone(), data_type);
col_def.name = name;
col_def.no_type = no_type;
// Parse MySQL type modifiers (UNSIGNED, ZEROFILL)
// These come after the data type but before other constraints
while self.match_identifier("UNSIGNED")
|| self.match_identifier("ZEROFILL")
|| self.match_identifier("SIGNED")
{
let modifier = self.previous().text.to_ascii_uppercase();
if modifier == "UNSIGNED" {
col_def.unsigned = true;
} else if modifier == "ZEROFILL" {
col_def.zerofill = true;
}
// SIGNED is the default, no action needed
}
// BigQuery: OPTIONS (key=value, ...) on column - comes right after type
if self.match_identifier("OPTIONS") {
col_def.options = self.parse_options_list()?;
}
// Parse column constraints
loop {
if self.match_keywords(&[TokenType::Not, TokenType::Null]) {
col_def.nullable = Some(false);
col_def.constraint_order.push(ConstraintType::NotNull);
} else if self.match_token(TokenType::Null) {
col_def.nullable = Some(true);
col_def.constraint_order.push(ConstraintType::Null);
} else if self.match_keywords(&[TokenType::PrimaryKey, TokenType::Key]) {
// Handle PRIMARY KEY [ASC|DESC]
col_def.primary_key = true;
// Capture ASC/DESC after PRIMARY KEY
if self.match_token(TokenType::Asc) {
col_def.primary_key_order = Some(SortOrder::Asc);
} else if self.match_token(TokenType::Desc) {
col_def.primary_key_order = Some(SortOrder::Desc);
}
col_def.constraint_order.push(ConstraintType::PrimaryKey);
} else if self.match_token(TokenType::Constraint) {
// Inline CONSTRAINT name ... (e.g., CONSTRAINT fk_name REFERENCES ...)
let constraint_name = self.expect_identifier()?;
// After constraint name, expect REFERENCES, PRIMARY KEY, UNIQUE, CHECK, NOT NULL, NULL, etc.
if self.match_token(TokenType::References) {
let mut fk_ref = self.parse_foreign_key_ref()?;
fk_ref.constraint_name = Some(constraint_name);
col_def
.constraints
.push(ColumnConstraint::References(fk_ref));
col_def.constraint_order.push(ConstraintType::References);
} else if self.match_keywords(&[TokenType::PrimaryKey, TokenType::Key]) {
col_def.primary_key = true;
col_def.primary_key_constraint_name = Some(constraint_name);
col_def.constraint_order.push(ConstraintType::PrimaryKey);
} else if self.match_token(TokenType::Unique) {
col_def.unique = true;
col_def.unique_constraint_name = Some(constraint_name);
// Check for NULLS NOT DISTINCT (PostgreSQL 15+ feature)
if self.match_text_seq(&["NULLS", "NOT", "DISTINCT"]) {
col_def.unique_nulls_not_distinct = true;
}
col_def.constraint_order.push(ConstraintType::Unique);
} else if self.match_keywords(&[TokenType::Not, TokenType::Null]) {
col_def.nullable = Some(false);
col_def.not_null_constraint_name = Some(constraint_name);
col_def.constraint_order.push(ConstraintType::NotNull);
} else if self.match_token(TokenType::Check) {
col_def.check_constraint_name = Some(constraint_name);
// Parse CHECK constraint expression
if self.match_token(TokenType::LParen) {
let check_expr = self.parse_expression()?;
self.expect(TokenType::RParen)?;
col_def
.constraints
.push(ColumnConstraint::Check(check_expr));
} else if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
// ClickHouse: CHECK expr without parens
let check_expr = self.parse_or()?;
col_def
.constraints
.push(ColumnConstraint::Check(check_expr));
}
col_def.constraint_order.push(ConstraintType::Check);
}
} else if self.match_token(TokenType::Unique) {
col_def.unique = true;
// Check for NULLS NOT DISTINCT (PostgreSQL 15+ feature)
if self.match_text_seq(&["NULLS", "NOT", "DISTINCT"]) {
col_def.unique_nulls_not_distinct = true;
}
col_def.constraint_order.push(ConstraintType::Unique);
} else if self.match_token(TokenType::Check) {
// Standalone CHECK (expr) constraint (without CONSTRAINT name)
if self.match_token(TokenType::LParen) {
let check_expr = self.parse_expression()?;
self.expect(TokenType::RParen)?;
col_def
.constraints
.push(ColumnConstraint::Check(check_expr));
col_def.constraint_order.push(ConstraintType::Check);
} else if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
// ClickHouse: CHECK expr without parens
let check_expr = self.parse_or()?;
col_def
.constraints
.push(ColumnConstraint::Check(check_expr));
col_def.constraint_order.push(ConstraintType::Check);
}
} else if self.match_token(TokenType::AutoIncrement) || self.match_keyword("IDENTITY") {
col_def.auto_increment = true;
col_def.constraint_order.push(ConstraintType::AutoIncrement);
// Handle IDENTITY/AUTOINCREMENT options: START n INCREMENT m [ORDER|NOORDER] or (start, increment)
if self.match_keyword("START") {
col_def.auto_increment_start = Some(Box::new(self.parse_primary()?));
if self.match_keyword("INCREMENT") {
col_def.auto_increment_increment = Some(Box::new(self.parse_primary()?));
}
// Snowflake: ORDER or NOORDER option
if self.match_token(TokenType::Order) {
col_def.auto_increment_order = Some(true);
} else if self.match_identifier("NOORDER") {
col_def.auto_increment_order = Some(false);
}
} else if self.match_token(TokenType::LParen) {
// IDENTITY(start, increment) or AUTOINCREMENT(start, increment)
col_def.auto_increment_start = Some(Box::new(self.parse_primary()?));
if self.match_token(TokenType::Comma) {
col_def.auto_increment_increment = Some(Box::new(self.parse_primary()?));
}
self.expect(TokenType::RParen)?;
}
} else if self.match_token(TokenType::Default) {
// ClickHouse: DEFAULT expressions can be complex (today(), a + 1, cond ? x : y, etc.)
col_def.default = if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
Some(self.parse_expression()?)
} else {
Some(self.parse_unary()?)
};
col_def.constraint_order.push(ConstraintType::Default);
} else if self.match_keywords(&[TokenType::ForeignKey, TokenType::Key]) {
// Snowflake/SQL Server: FOREIGN KEY REFERENCES table(columns)
// The FOREIGN KEY keywords are followed by REFERENCES
self.expect(TokenType::References)?;
let mut fk_ref = self.parse_foreign_key_ref()?;
fk_ref.has_foreign_key_keywords = true;
col_def
.constraints
.push(ColumnConstraint::References(fk_ref));
col_def.constraint_order.push(ConstraintType::References);
} else if self.match_token(TokenType::References) {
let fk_ref = self.parse_foreign_key_ref()?;
col_def
.constraints
.push(ColumnConstraint::References(fk_ref));
col_def.constraint_order.push(ConstraintType::References);
} else if self.match_token(TokenType::Generated) {
// GENERATED [BY DEFAULT [ON NULL] | ALWAYS] AS ...
// Could be: AS IDENTITY, AS (expr) STORED|VIRTUAL, AS ROW START|END
self.parse_generated_column_constraint(&mut col_def)?;
} else if self.match_token(TokenType::Collate) {
// COLLATE collation_name (may be quoted like "de_DE")
// Also handle dotted names like pg_catalog."default"
let mut collation = self.expect_identifier_or_keyword_with_quoted()?;
// Check for dotted collation names: pg_catalog."default"
while self.match_token(TokenType::Dot) {
let next = self.expect_identifier_or_keyword_with_quoted()?;
let sep = if next.quoted {
format!("{}.\"{}\"", collation.name, next.name)
} else {
format!("{}.{}", collation.name, next.name)
};
collation = Identifier {
name: sep,
quoted: false,
trailing_comments: Vec::new(),
span: None,
};
}
col_def
.constraints
.push(ColumnConstraint::Collate(collation));
col_def.constraint_order.push(ConstraintType::Collate);
} else if self.match_token(TokenType::Comment) {
// COMMENT 'comment text'
let comment_text = self.expect_string()?;
col_def
.constraints
.push(ColumnConstraint::Comment(comment_text));
col_def.constraint_order.push(ConstraintType::Comment);
} else if self.match_keywords(&[TokenType::On, TokenType::Update]) {
// MySQL: ON UPDATE expression (e.g., ON UPDATE CURRENT_TIMESTAMP)
let expr = self.parse_unary()?;
col_def.on_update = Some(expr);
col_def.constraint_order.push(ConstraintType::OnUpdate);
} else if self.match_identifier("VISIBLE") {
col_def.visible = Some(true);
} else if self.match_identifier("INVISIBLE") {
col_def.visible = Some(false);
} else if self.match_identifier("ENCODE") {
// Redshift: ENCODE encoding_type (e.g., ZSTD, DELTA, LZO, etc.)
let encoding = self.expect_identifier_or_keyword()?;
col_def.encoding = Some(encoding);
col_def.constraint_order.push(ConstraintType::Encode);
} else if !matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.match_token(TokenType::Format)
{
// Teradata: FORMAT 'pattern' (not ClickHouse — FORMAT there is statement-level)
let format_str = self.expect_string()?;
col_def.format = Some(format_str);
} else if self.match_identifier("TITLE") {
// Teradata: TITLE 'title'
let title_str = self.expect_string()?;
col_def.title = Some(title_str);
} else if self.match_identifier("INLINE") {
// Teradata: INLINE LENGTH n
self.match_identifier("LENGTH");
let length = self.expect_number()?;
col_def.inline_length = Some(length as u64);
} else if self.match_identifier("COMPRESS") {
// Teradata: COMPRESS or COMPRESS (values) or COMPRESS 'value'
if self.match_token(TokenType::LParen) {
let values = self.parse_expression_list()?;
self.expect(TokenType::RParen)?;
col_def.compress = Some(values);
} else if self.check(TokenType::String) {
// COMPRESS 'value'
let value = self.parse_primary()?;
col_def.compress = Some(vec![value]);
} else {
// COMPRESS without values
col_def.compress = Some(Vec::new());
}
} else if self.match_identifier("CHARACTER") {
// Teradata: CHARACTER SET name
self.match_token(TokenType::Set);
let charset = self.expect_identifier_or_keyword()?;
col_def.character_set = Some(charset);
} else if self.match_identifier("UPPERCASE") {
// Teradata: UPPERCASE
col_def.uppercase = true;
} else if self.match_identifier("CASESPECIFIC") {
// Teradata: CASESPECIFIC
col_def.casespecific = Some(true);
} else if self.match_text_seq(&["NOT", "FOR", "REPLICATION"]) {
// TSQL: NOT FOR REPLICATION - skip this modifier (not preserved in output for non-TSQL)
col_def.not_for_replication = true;
} else if self.match_token(TokenType::Not) && self.match_identifier("CASESPECIFIC") {
// Teradata: NOT CASESPECIFIC
col_def.casespecific = Some(false);
} else if self.match_keyword("TAG")
|| (self.match_token(TokenType::With) && self.match_keyword("TAG"))
{
// Snowflake: TAG (key='value', ...) or WITH TAG (key='value', ...)
let tags = self.parse_tags()?;
col_def.constraints.push(ColumnConstraint::Tags(tags));
col_def.constraint_order.push(ConstraintType::Tags);
} else if self.match_token(TokenType::As) {
// Computed column: AS (expression) [STORED|VIRTUAL|PERSISTED] [NOT NULL]
// TSQL: AS (expression) [PERSISTED] [NOT NULL]
// MySQL shorthand: AS (expression) [STORED|VIRTUAL]
// Also: Snowflake External Table virtual column expression
if self.check(TokenType::LParen) {
self.parse_as_computed_column(&mut col_def)?;
}
} else if self.match_identifier("CODEC") {
// ClickHouse: CODEC(LZ4HC(9), ZSTD, DELTA)
self.expect(TokenType::LParen)?;
let start = self.current;
let mut depth = 1;
while !self.is_at_end() && depth > 0 {
if self.check(TokenType::LParen) {
depth += 1;
}
if self.check(TokenType::RParen) {
depth -= 1;
if depth == 0 {
break;
}
}
self.skip();
}
let codec_text = self.tokens_to_sql(start, self.current);
self.expect(TokenType::RParen)?;
col_def.codec = Some(codec_text);
} else if self.match_identifier("STATISTICS") {
// ClickHouse: STATISTICS(tdigest, minmax, uniq, ...)
self.expect(TokenType::LParen)?;
let mut depth = 1;
while !self.is_at_end() && depth > 0 {
if self.check(TokenType::LParen) {
depth += 1;
}
if self.check(TokenType::RParen) {
depth -= 1;
if depth == 0 {
break;
}
}
self.skip();
}
self.expect(TokenType::RParen)?;
// Statistics info is stored but we don't need it for transpilation
} else if self.match_identifier("EPHEMERAL") {
// ClickHouse: EPHEMERAL [expr] [type]
// EPHEMERAL can optionally be followed by an expression, then optionally a data type
if !self.check(TokenType::Comma)
&& !self.check(TokenType::RParen)
&& !self.is_at_end()
&& !self.check_identifier("CODEC")
&& !self.check_identifier("TTL")
&& !self.check(TokenType::Comment)
{
let expr = self.parse_bitwise()?.unwrap_or(Expression::Null(Null));
col_def.ephemeral = Some(Some(Box::new(expr)));
// ClickHouse: type can follow EPHEMERAL expression (e.g., b EPHEMERAL 'a' String)
if col_def.no_type
&& !self.check(TokenType::Comma)
&& !self.check(TokenType::RParen)
&& !self.is_at_end()
&& !self.check_identifier("CODEC")
&& !self.check_identifier("TTL")
&& !self.check(TokenType::Comment)
{
col_def.data_type = self.parse_data_type()?;
col_def.no_type = false;
}
} else {
col_def.ephemeral = Some(None);
}
} else if self.check(TokenType::Materialized) && !self.check_next(TokenType::View) {
// ClickHouse: MATERIALIZED expr (but not MATERIALIZED VIEW)
self.skip(); // consume MATERIALIZED
let expr = self.parse_or()?;
col_def.materialized_expr = Some(Box::new(expr));
} else if self.match_identifier("ALIAS") {
// ClickHouse: ALIAS expr
let expr = self.parse_or()?;
col_def.alias_expr = Some(Box::new(expr));
} else if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check_identifier("EXPRESSION")
{
// ClickHouse dictionary column: EXPRESSION expr
self.skip(); // consume EXPRESSION
let expr = self.parse_or()?;
col_def.materialized_expr = Some(Box::new(expr));
} else if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && (self.match_identifier("HIERARCHICAL")
|| self.match_identifier("IS_OBJECT_ID")
|| self.match_identifier("INJECTIVE")
|| self.match_identifier("BIDIRECTIONAL"))
{
// ClickHouse dictionary column attributes: HIERARCHICAL, IS_OBJECT_ID, INJECTIVE, BIDIRECTIONAL
// These are flag-like attributes with no value, just skip them
} else if self.match_identifier("TTL") {
// ClickHouse: TTL expr
let expr = self.parse_expression()?;
col_def.ttl_expr = Some(Box::new(expr));
} else if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::Settings)
&& self.check_next(TokenType::LParen)
{
// ClickHouse: SETTINGS (key = value, ...) on column definition
// Only match parenthesized form; non-parenthesized SETTINGS is statement-level
self.skip(); // consume SETTINGS
self.expect(TokenType::LParen)?;
let mut depth = 1i32;
while !self.is_at_end() && depth > 0 {
if self.check(TokenType::LParen) {
depth += 1;
}
if self.check(TokenType::RParen) {
depth -= 1;
if depth == 0 {
break;
}
}
self.skip();
}
self.expect(TokenType::RParen)?;
} else {
// Skip unknown column modifiers (DEFERRABLE, CHARACTER SET, etc.)
// to allow parsing to continue
if self.skip_column_modifier() {
continue;
}
break;
}
}
Ok(col_def)
}
/// Skip optional column modifiers that we don't need to preserve
fn skip_column_modifier(&mut self) -> bool {
// NOT DEFERRABLE, NOT CASESPECIFIC - handle NOT followed by specific keywords
// (NOT NULL is handled earlier in the constraint loop)
if self.check(TokenType::Not) {
// Check what follows NOT
if self.check_next_identifier("DEFERRABLE")
|| self.check_next_identifier("CASESPECIFIC")
{
self.skip(); // consume NOT
self.skip(); // consume DEFERRABLE/CASESPECIFIC
return true;
}
}
// DEFERRABLE / NOT DEFERRABLE / INITIALLY DEFERRED / INITIALLY IMMEDIATE
if self.match_identifier("DEFERRABLE")
|| self.match_identifier("DEFERRED")
|| self.match_identifier("IMMEDIATE")
{
return true;
}
// CHARACTER SET name
if self.match_identifier("CHARACTER") {
self.match_token(TokenType::Set);
// Consume charset name (can be multiple parts like LATIN, utf8_bin, etc.)
let _ = self.match_token(TokenType::Var) || self.match_token(TokenType::Identifier);
return true;
}
// UPPERCASE, CASESPECIFIC
if self.match_identifier("UPPERCASE") || self.match_identifier("CASESPECIFIC") {
return true;
}
// Note: COMPRESS, FORMAT, TITLE, and INLINE LENGTH are now properly parsed and stored in ColumnDef
false
}
/// Parse Teradata-specific table options after CREATE TABLE AS
/// Returns (with_data, with_statistics, teradata_indexes)
fn parse_teradata_table_options(&mut self) -> (Option<bool>, Option<bool>, Vec<TeradataIndex>) {
let mut with_data = None;
let mut with_statistics = None;
let mut teradata_indexes = Vec::new();
loop {
// WITH DATA [AND STATISTICS] / WITH NO DATA [AND NO STATISTICS]
if self.match_token(TokenType::With) {
let no = self.match_token(TokenType::No); // optional NO
self.match_identifier("DATA");
with_data = Some(!no); // WITH DATA = true, WITH NO DATA = false
// Optional AND [NO] STATISTICS
if self.match_token(TokenType::And) {
let no_stats = self.match_token(TokenType::No); // optional NO
self.match_identifier("STATISTICS");
with_statistics = Some(!no_stats); // AND STATISTICS = true, AND NO STATISTICS = false
}
continue;
}
// NO PRIMARY INDEX
if self.match_token(TokenType::No) {
self.match_token(TokenType::PrimaryKey);
self.match_token(TokenType::Index);
teradata_indexes.push(TeradataIndex {
kind: TeradataIndexKind::NoPrimary,
name: None,
columns: Vec::new(),
});
// Consume optional comma separator between index specs
self.match_token(TokenType::Comma);
continue;
}
// PRIMARY AMP INDEX / PRIMARY INDEX
if self.match_token(TokenType::PrimaryKey) {
let is_amp = self.match_identifier("AMP");
self.match_token(TokenType::Index);
// Optional index name
let name = if self.is_identifier_token() && !self.check(TokenType::LParen) {
Some(self.advance().text)
} else {
None
};
// Optional column list
let columns = if self.match_token(TokenType::LParen) {
let cols = self.parse_identifier_list_raw();
self.match_token(TokenType::RParen);
cols
} else {
Vec::new()
};
teradata_indexes.push(TeradataIndex {
kind: if is_amp {
TeradataIndexKind::PrimaryAmp
} else {
TeradataIndexKind::Primary
},
name,
columns,
});
// Consume optional comma separator between index specs
self.match_token(TokenType::Comma);
continue;
}
// UNIQUE [PRIMARY] INDEX
if self.match_token(TokenType::Unique) {
let is_primary = self.match_token(TokenType::PrimaryKey);
self.match_token(TokenType::Index);
// Optional index name
let name = if self.is_identifier_token() {
Some(self.advance().text)
} else {
None
};
// Optional column list
let columns = if self.match_token(TokenType::LParen) {
let cols = self.parse_identifier_list_raw();
self.match_token(TokenType::RParen);
cols
} else {
Vec::new()
};
teradata_indexes.push(TeradataIndex {
kind: if is_primary {
TeradataIndexKind::UniquePrimary
} else {
TeradataIndexKind::Unique
},
name,
columns,
});
// Consume optional comma separator between index specs
self.match_token(TokenType::Comma);
continue;
}
// Plain INDEX (non-primary, non-unique)
if self.match_token(TokenType::Index) {
// Optional index name
let name = if self.is_identifier_token() && !self.check(TokenType::LParen) {
Some(self.advance().text)
} else {
None
};
// Optional column list
let columns = if self.match_token(TokenType::LParen) {
let cols = self.parse_identifier_list_raw();
self.match_token(TokenType::RParen);
cols
} else {
Vec::new()
};
teradata_indexes.push(TeradataIndex {
kind: TeradataIndexKind::Secondary,
name,
columns,
});
// Consume optional comma separator between index specs
self.match_token(TokenType::Comma);
continue;
}
break;
}
(with_data, with_statistics, teradata_indexes)
}
/// Parse Teradata table options after name before column list (comma-separated)
fn parse_teradata_post_name_options(&mut self) -> Vec<String> {
// Options begin with a comma after the table name.
if !self.match_token(TokenType::Comma) {
return Vec::new();
}
let mut options = Vec::new();
let mut current_tokens: Vec<(String, TokenType)> = Vec::new();
let mut paren_depth = 0;
let mut in_value = false;
while !self.is_at_end() {
if self.check(TokenType::LParen) && paren_depth == 0 {
if !in_value {
// Column list begins
break;
}
let mut is_terminal = false;
if let Some((last_text, last_type)) = current_tokens.last() {
let last_upper = last_text.to_ascii_uppercase();
is_terminal = matches!(last_type, TokenType::Number | TokenType::String)
|| matches!(
last_upper.as_str(),
"ON" | "OFF"
| "DEFAULT"
| "NEVER"
| "ALWAYS"
| "MINIMUM"
| "MAXIMUM"
| "BYTES"
| "KBYTES"
| "KILOBYTES"
| "PERCENT"
);
}
if is_terminal {
break;
}
}
let token = self.advance();
match token.token_type {
TokenType::LParen => {
paren_depth += 1;
}
TokenType::RParen => {
if paren_depth > 0 {
paren_depth -= 1;
if paren_depth == 0 && in_value {
in_value = false;
}
}
}
TokenType::Eq => {
if paren_depth == 0 {
in_value = true;
}
}
TokenType::Comma => {
if paren_depth == 0 {
let option = self.join_teradata_option_tokens(current_tokens);
if !option.is_empty() {
options.push(option);
}
current_tokens = Vec::new();
in_value = false;
continue;
}
}
_ => {}
}
let text = if token.token_type == TokenType::QuotedIdentifier {
let quote_char = if self.config.dialect == Some(crate::dialects::DialectType::MySQL)
|| self.config.dialect == Some(crate::dialects::DialectType::SingleStore)
|| self.config.dialect == Some(crate::dialects::DialectType::Doris)
|| self.config.dialect == Some(crate::dialects::DialectType::StarRocks)
{
'`'
} else {
'"'
};
format!("{}{}{}", quote_char, token.text, quote_char)
} else if token.token_type == TokenType::String {
format!("'{}'", token.text)
} else {
token.text.clone()
};
let mut join_type = token.token_type;
if join_type == TokenType::Percent && token.text.eq_ignore_ascii_case("PERCENT") {
// Treat PERCENT as an identifier to preserve spacing (e.g., "1 PERCENT")
join_type = TokenType::Identifier;
}
current_tokens.push((text, join_type));
}
if !current_tokens.is_empty() {
let option = self.join_teradata_option_tokens(current_tokens);
if !option.is_empty() {
options.push(option);
}
}
options
}
/// Parse identifier list for Teradata indexes, returning raw strings
fn parse_identifier_list_raw(&mut self) -> Vec<String> {
let mut identifiers = Vec::new();
loop {
if self.is_identifier_token() || self.is_identifier_or_keyword_token() {
identifiers.push(self.advance().text);
}
if !self.match_token(TokenType::Comma) {
break;
}
}
identifiers
}
/// Parse GENERATED column constraint after GENERATED token has been consumed.
/// Handles three forms:
/// 1. GENERATED [BY DEFAULT | ALWAYS] AS IDENTITY [...] -> GeneratedAsIdentity
/// 2. GENERATED ALWAYS AS (expr) [STORED|VIRTUAL] -> ComputedColumn
/// 3. GENERATED ALWAYS AS ROW START|END [HIDDEN] -> GeneratedAsRow
fn parse_generated_column_constraint(&mut self, col_def: &mut ColumnDef) -> Result<()> {
let always;
let mut on_null = false;
// BY DEFAULT [ON NULL] | ALWAYS
if self.match_token(TokenType::By) {
self.expect(TokenType::Default)?;
on_null = self.match_keywords(&[TokenType::On, TokenType::Null]);
always = false;
} else {
self.expect(TokenType::Always)?;
always = true;
}
// Expect AS
self.expect(TokenType::As)?;
// Check what follows AS
if self.check(TokenType::Row) {
// GENERATED ALWAYS AS ROW START|END [HIDDEN]
self.skip(); // consume ROW
let start = if self.match_token(TokenType::Start) {
true
} else {
self.expect(TokenType::End)?;
false
};
let hidden = self.match_identifier("HIDDEN");
col_def
.constraints
.push(ColumnConstraint::GeneratedAsRow(GeneratedAsRow {
start,
hidden,
}));
col_def
.constraint_order
.push(ConstraintType::GeneratedAsRow);
} else if self.check(TokenType::Identity) {
// GENERATED [BY DEFAULT | ALWAYS] AS IDENTITY [(...)]
self.skip(); // consume IDENTITY
let mut start = None;
let mut increment = None;
let mut minvalue = None;
let mut maxvalue = None;
let mut cycle = None;
// Optional sequence options in parentheses
if self.match_token(TokenType::LParen) {
loop {
if self.match_token(TokenType::Start) {
self.match_token(TokenType::With);
start = Some(Box::new(self.parse_unary()?));
} else if self.match_token(TokenType::Increment) {
self.match_token(TokenType::By);
increment = Some(Box::new(self.parse_unary()?));
} else if self.match_token(TokenType::Minvalue) {
minvalue = Some(Box::new(self.parse_unary()?));
} else if self.match_token(TokenType::Maxvalue) {
maxvalue = Some(Box::new(self.parse_unary()?));
} else if self.match_token(TokenType::Cycle) {
cycle = Some(true);
} else if self.match_keywords(&[TokenType::No, TokenType::Cycle]) {
cycle = Some(false);
} else if self.check(TokenType::RParen) {
break;
} else {
self.skip();
}
}
self.expect(TokenType::RParen)?;
}
col_def
.constraints
.push(ColumnConstraint::GeneratedAsIdentity(GeneratedAsIdentity {
always,
on_null,
start,
increment,
minvalue,
maxvalue,
cycle,
}));
col_def
.constraint_order
.push(ConstraintType::GeneratedAsIdentity);
} else if self.check(TokenType::LParen) {
// GENERATED ALWAYS AS (expr) [STORED|VIRTUAL]
self.skip(); // consume LParen
let expr = self.parse_expression()?;
self.expect(TokenType::RParen)?;
// Check for STORED or VIRTUAL
let (persisted, persistence_kind) = if self.match_identifier("STORED") {
(true, Some("STORED".to_string()))
} else if self.match_identifier("VIRTUAL") {
(false, Some("VIRTUAL".to_string()))
} else {
(false, None)
};
col_def
.constraints
.push(ColumnConstraint::ComputedColumn(ComputedColumn {
expression: Box::new(expr),
persisted,
not_null: false,
persistence_kind,
data_type: None,
}));
col_def
.constraint_order
.push(ConstraintType::ComputedColumn);
} else {
// Fallback: treat as GENERATED AS IDENTITY without explicit IDENTITY keyword
col_def
.constraints
.push(ColumnConstraint::GeneratedAsIdentity(GeneratedAsIdentity {
always,
on_null,
start: None,
increment: None,
minvalue: None,
maxvalue: None,
cycle: None,
}));
col_def
.constraint_order
.push(ConstraintType::GeneratedAsIdentity);
}
Ok(())
}
/// Parse AS (expr) [STORED|VIRTUAL|PERSISTED] [TYPE] [NOT NULL] for computed columns.
/// Called after AS token has been consumed and we've confirmed LParen follows.
/// SingleStore: AS (expr) PERSISTED TYPE NOT NULL
fn parse_as_computed_column(&mut self, col_def: &mut ColumnDef) -> Result<()> {
self.expect(TokenType::LParen)?;
let expr = self.parse_expression()?;
self.expect(TokenType::RParen)?;
// Check for STORED, VIRTUAL, or PERSISTED
let (persisted, persistence_kind) = if self.match_identifier("STORED") {
(true, Some("STORED".to_string()))
} else if self.match_identifier("VIRTUAL") {
(false, Some("VIRTUAL".to_string()))
} else if self.match_identifier("PERSISTED") {
(true, Some("PERSISTED".to_string()))
} else {
(false, None)
};
// For PERSISTED columns, check for optional data type (SingleStore: PERSISTED TYPE NOT NULL)
// Also check for AUTO keyword for SingleStore: PERSISTED AUTO NOT NULL
let data_type = if persistence_kind.as_deref() == Some("PERSISTED") {
// Check if next token looks like a data type (not NOT, not end of input, not comma/rparen)
if !self.is_at_end()
&& !self.check(TokenType::Not)
&& !self.check(TokenType::Comma)
&& !self.check(TokenType::RParen)
&& !self.check(TokenType::Semicolon)
{
let tok = self.peek();
// Check for AUTO keyword (SingleStore: PERSISTED AUTO)
if tok.text.eq_ignore_ascii_case("AUTO") {
self.skip(); // consume AUTO
None // AUTO is not a data type, just a modifier
} else if tok.token_type.is_keyword()
|| tok.token_type == TokenType::Identifier
|| tok.token_type == TokenType::Var
{
Some(self.parse_data_type()?)
} else {
None
}
} else {
None
}
} else {
None
};
// For PERSISTED columns, check for NOT NULL
let not_null = if persistence_kind.as_deref() == Some("PERSISTED") {
self.match_keywords(&[TokenType::Not, TokenType::Null])
} else {
false
};
col_def
.constraints
.push(ColumnConstraint::ComputedColumn(ComputedColumn {
expression: Box::new(expr),
persisted,
not_null,
persistence_kind,
data_type,
}));
col_def
.constraint_order
.push(ConstraintType::ComputedColumn);
Ok(())
}
/// Parse PERIOD FOR SYSTEM_TIME (start_col, end_col) as a table constraint.
/// Returns None if this is not actually PERIOD FOR SYSTEM_TIME (e.g., just a column named PERIOD).
fn parse_period_for_system_time_table_constraint(&mut self) -> Result<Option<TableConstraint>> {
// Save position for possible retreat
let saved = self.current;
if self.match_identifier("PERIOD") {
// Check if followed by FOR SYSTEM_TIME
if self.match_token(TokenType::For) {
if self.match_identifier("SYSTEM_TIME") {
// Parse (start_col, end_col)
self.expect(TokenType::LParen)?;
let start_name = self.expect_identifier_or_safe_keyword_with_quoted()?;
self.expect(TokenType::Comma)?;
let end_name = self.expect_identifier_or_safe_keyword_with_quoted()?;
self.expect(TokenType::RParen)?;
return Ok(Some(TableConstraint::PeriodForSystemTime {
start_col: start_name,
end_col: end_name,
}));
}
}
}
// Not PERIOD FOR SYSTEM_TIME, retreat
self.current = saved;
Ok(None)
}
/// Parse MySQL table options that appear after the closing paren of column definitions.
/// Handles ENGINE=val, AUTO_INCREMENT=val, DEFAULT CHARSET=val, ROW_FORMAT=val,
/// COMMENT='val', COLLATE=val, etc.
fn parse_mysql_table_options(&mut self) -> Vec<(String, String)> {
let mut options = Vec::new();
loop {
// Skip optional commas between options
self.match_token(TokenType::Comma);
// DEFAULT CHARSET=val or DEFAULT CHARACTER SET=val
if self.check(TokenType::Default) {
let saved = self.current;
self.skip(); // consume DEFAULT
if self.check_identifier("CHARSET") || self.check_identifier("CHARACTER") {
let is_character = self.check_identifier("CHARACTER");
let key_part = self.advance().text.to_ascii_uppercase();
if is_character {
// CHARACTER SET
self.match_token(TokenType::Set);
}
if self.match_token(TokenType::Eq) {
let value = if self.check(TokenType::String) {
let v = format!("'{}'", self.peek().text);
self.skip();
v
} else if self.is_identifier_token()
|| self.is_safe_keyword_as_identifier()
|| self.check(TokenType::Number)
{
self.advance().text
} else {
self.current = saved;
break;
};
// Normalize CHARSET -> CHARACTER SET
let key = if is_character || key_part == "CHARSET" {
"DEFAULT CHARACTER SET".to_string()
} else {
format!("DEFAULT {}", key_part)
};
options.push((key, value));
continue;
}
}
self.current = saved;
break;
}
// ENGINE=val, AUTO_INCREMENT=val, ROW_FORMAT=val, COLLATE=val, KEY_BLOCK_SIZE=val
let is_known_option = self.check_identifier("ENGINE")
|| self.check(TokenType::AutoIncrement)
|| self.check_identifier("ROW_FORMAT")
|| self.check(TokenType::Collate)
|| self.check_identifier("KEY_BLOCK_SIZE")
|| self.check_identifier("PACK_KEYS")
|| self.check_identifier("STATS_AUTO_RECALC")
|| self.check_identifier("STATS_PERSISTENT")
|| self.check_identifier("STATS_SAMPLE_PAGES")
|| self.check_identifier("MAX_ROWS")
|| self.check_identifier("MIN_ROWS")
|| self.check_identifier("CHECKSUM")
|| self.check_identifier("DELAY_KEY_WRITE")
|| self.check_identifier("COMPRESSION")
|| self.check_identifier("CONNECTION")
|| self.check_identifier("TABLESPACE")
|| self.check_identifier("ENCRYPTION");
if is_known_option {
let key = self.advance().text.to_ascii_uppercase();
if self.match_token(TokenType::Eq) {
let value = if self.check(TokenType::String) {
let v = format!("'{}'", self.peek().text);
self.skip();
v
} else if self.check(TokenType::Number) {
self.advance().text
} else if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
self.advance().text
} else {
break;
};
options.push((key, value));
continue;
}
break;
}
// COMMENT='val' (Comment is a keyword token type)
if self.check(TokenType::Comment) {
let saved = self.current;
self.skip(); // consume COMMENT
if self.match_token(TokenType::Eq) {
if self.check(TokenType::String) {
let v = format!("'{}'", self.peek().text);
self.skip();
options.push(("COMMENT".to_string(), v));
continue;
}
} else if self.check(TokenType::String) {
let v = format!("'{}'", self.peek().text);
self.skip();
options.push(("COMMENT".to_string(), v));
continue;
}
self.current = saved;
break;
}
// CHARACTER SET=val or CHARSET=val (without DEFAULT prefix)
if self.check_identifier("CHARACTER") || self.check_identifier("CHARSET") {
let saved = self.current;
let is_character = self.check_identifier("CHARACTER");
self.skip(); // consume CHARACTER or CHARSET
if is_character {
// CHARACTER SET
if !self.match_token(TokenType::Set) {
self.current = saved;
break;
}
}
if self.match_token(TokenType::Eq) {
let value = if self.check(TokenType::String) {
let v = format!("'{}'", self.peek().text);
self.skip();
v
} else if self.is_identifier_token()
|| self.is_safe_keyword_as_identifier()
|| self.check(TokenType::Number)
{
self.advance().text
} else {
self.current = saved;
break;
};
options.push(("CHARACTER SET".to_string(), value));
continue;
}
self.current = saved;
break;
}
break;
}
options
}
/// Parse Hive-specific table properties that appear after column definitions.
/// Handles: ROW FORMAT (SERDE/DELIMITED), STORED AS/BY, LOCATION, TBLPROPERTIES
fn parse_hive_table_properties(&mut self) -> Result<Vec<Expression>> {
let mut properties = Vec::new();
loop {
// ROW FORMAT SERDE 'class' [WITH SERDEPROPERTIES (...)]
// ROW FORMAT DELIMITED [FIELDS TERMINATED BY ...] [...]
if self.check(TokenType::Row) && self.check_next(TokenType::Format) {
self.skip();
if let Some(row_format) = self.parse_row()? {
properties.push(row_format);
continue;
}
}
// STORED AS INPUTFORMAT 'input' OUTPUTFORMAT 'output'
// STORED AS format_name
// STORED BY 'storage_handler_class'
if self.match_identifier("STORED") {
if self.match_token(TokenType::By) {
// STORED BY 'storage_handler_class'
let handler = self.parse_string()?.unwrap_or(Expression::Null(Null));
properties.push(Expression::StorageHandlerProperty(Box::new(
StorageHandlerProperty {
this: Box::new(handler),
},
)));
continue;
} else if self.match_token(TokenType::As) {
// STORED AS INPUTFORMAT 'x' OUTPUTFORMAT 'y' or STORED AS format
if self.match_token(TokenType::InputFormat) {
let input_format = self.parse_string()?;
let output_format = if self.match_identifier("OUTPUTFORMAT") {
self.parse_string()?
} else {
None
};
// Use InputOutputFormat inside FileFormatProperty.this
let io_format =
Expression::InputOutputFormat(Box::new(InputOutputFormat {
input_format: input_format.map(Box::new),
output_format: output_format.map(Box::new),
}));
properties.push(Expression::FileFormatProperty(Box::new(
FileFormatProperty {
this: Some(Box::new(io_format)),
expressions: vec![],
hive_format: Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
}))),
},
)));
continue;
} else {
// STORED AS format_name (e.g., STORED AS TEXTFILE, STORED AS ORC)
let format = if self.check(TokenType::String) {
Expression::Literal(Box::new(Literal::String(
self.advance().text.clone(),
)))
} else if self.is_identifier_token() || self.is_safe_keyword_as_identifier()
{
Expression::Identifier(Identifier::new(self.advance().text.clone()))
} else {
break;
};
properties.push(Expression::FileFormatProperty(Box::new(
FileFormatProperty {
this: Some(Box::new(format)),
expressions: vec![],
hive_format: Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
}))),
},
)));
continue;
}
}
}
// USING format_name (Databricks/Spark) e.g., USING DELTA, USING PARQUET
// This is similar to STORED AS but uses different syntax
if self.match_token(TokenType::Using) {
// Parse the format name (e.g., DELTA, PARQUET, ICEBERG, etc.)
let format = if self.check(TokenType::String) {
Expression::Literal(Box::new(Literal::String(self.advance().text.clone())))
} else if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
Expression::Identifier(Identifier::new(self.advance().text.clone()))
} else {
break;
};
// Create FileFormatProperty WITHOUT hive_format to signal USING syntax
properties.push(Expression::FileFormatProperty(Box::new(
FileFormatProperty {
this: Some(Box::new(format)),
expressions: vec![],
hive_format: None, // None indicates USING syntax (not STORED AS)
},
)));
continue;
}
// LOCATION 'path'
if self.match_identifier("LOCATION") {
let path = self.parse_string()?.unwrap_or(Expression::Null(Null));
properties.push(Expression::LocationProperty(Box::new(LocationProperty {
this: Box::new(path),
})));
continue;
}
// TBLPROPERTIES ('key'='value', ...)
if self.match_identifier("TBLPROPERTIES") {
// Parse the property list manually since parse_property doesn't handle key=value
self.expect(TokenType::LParen)?;
let mut prop_exprs = Vec::new();
loop {
if self.check(TokenType::RParen) {
break;
}
// Parse 'key'='value' or key=value
let key = self.parse_primary()?;
if self.match_token(TokenType::Eq) {
let value = self.parse_primary()?;
prop_exprs.push(Expression::Eq(Box::new(BinaryOp::new(key, value))));
} else {
prop_exprs.push(key);
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
properties.push(Expression::Properties(Box::new(Properties {
expressions: prop_exprs,
})));
continue;
}
// DISTRIBUTED BY HASH (col1, col2) [BUCKETS n] (StarRocks/Doris)
if self.match_identifier("DISTRIBUTED") {
if let Some(dist_prop) = self.parse_distributed_property()? {
properties.push(dist_prop);
continue;
}
}
// CLUSTERED BY (col, col, ...) [SORTED BY (col, col, ...)] INTO n BUCKETS (Hive/Athena)
if self.match_identifier("CLUSTERED") {
self.expect(TokenType::By)?;
self.expect(TokenType::LParen)?;
let expressions = self.parse_expression_list()?;
self.expect(TokenType::RParen)?;
// Optional SORTED BY (col, col, ...)
let sorted_by = if self.match_identifier("SORTED") {
self.expect(TokenType::By)?;
self.expect(TokenType::LParen)?;
let sorted_exprs = self.parse_expression_list()?;
self.expect(TokenType::RParen)?;
Some(Box::new(Expression::Tuple(Box::new(Tuple {
expressions: sorted_exprs,
}))))
} else {
None
};
// INTO n BUCKETS
let buckets = if self.match_token(TokenType::Into) {
let num = self.parse_expression()?;
if !self.match_identifier("BUCKETS") {
return Err(self.parse_error("Expected BUCKETS after INTO <n>"));
}
Some(Box::new(num))
} else {
None
};
properties.push(Expression::ClusteredByProperty(Box::new(
ClusteredByProperty {
expressions,
sorted_by,
buckets,
},
)));
continue;
}
// PARTITIONED BY (col, col, ...) or PARTITIONED BY (col, BUCKET(n, col), ...) (Hive/Athena/Iceberg)
if self.match_identifier("PARTITIONED") {
self.expect(TokenType::By)?;
self.expect(TokenType::LParen)?;
let mut partition_exprs = Vec::new();
loop {
if self.check(TokenType::RParen) {
break;
}
// Check for transform functions like BUCKET(n, col), TRUNCATE(n, col), etc.
if self.check_identifier("BUCKET") || self.check_identifier("TRUNCATE") {
let func_name = self.advance().text.clone();
self.expect(TokenType::LParen)?;
let args = self.parse_expression_list()?;
self.expect(TokenType::RParen)?;
// Create a Function expression for BUCKET/TRUNCATE
partition_exprs.push(Expression::Function(Box::new(Function {
name: func_name,
args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
})));
} else {
// Try to parse as column definition (name data_type) for Hive-style partitioned by
// e.g., PARTITIONED BY (y INT, z STRING)
let saved_pos = self.current;
let mut parsed_as_column = false;
// Allow type keywords (like DATE, TIMESTAMP) as column names in PARTITIONED BY
if self.check(TokenType::Var)
|| self.check(TokenType::Identifier)
|| self.check(TokenType::Date)
|| self.check(TokenType::Timestamp)
|| self.check(TokenType::Int)
|| self.check(TokenType::BigInt)
|| self.check(TokenType::SmallInt)
|| self.check(TokenType::TinyInt)
|| self.check(TokenType::Float)
|| self.check(TokenType::Double)
|| self.check(TokenType::Boolean)
{
let col_name = self.advance().text.clone();
// Check if next token looks like a data type
if self.check(TokenType::Var)
|| self.check(TokenType::Identifier)
|| self.check(TokenType::Int)
|| self.check(TokenType::BigInt)
|| self.check(TokenType::SmallInt)
|| self.check(TokenType::TinyInt)
|| self.check(TokenType::Float)
|| self.check(TokenType::Double)
|| self.check(TokenType::Boolean)
|| self.check(TokenType::Date)
|| self.check(TokenType::Timestamp)
{
let type_text = self.peek().text.to_ascii_uppercase();
let is_type = matches!(
type_text.as_str(),
"INT"
| "INTEGER"
| "BIGINT"
| "SMALLINT"
| "TINYINT"
| "FLOAT"
| "DOUBLE"
| "DECIMAL"
| "NUMERIC"
| "STRING"
| "VARCHAR"
| "CHAR"
| "BINARY"
| "BOOLEAN"
| "DATE"
| "TIMESTAMP"
| "DATETIME"
| "ARRAY"
| "MAP"
| "STRUCT"
);
if is_type {
// Parse as column definition
let data_type = self.parse_data_type()?;
// Store as ColumnDef expression
partition_exprs.push(Expression::ColumnDef(Box::new(
crate::expressions::ColumnDef::new(col_name, data_type),
)));
parsed_as_column = true;
}
}
}
if !parsed_as_column {
// Backtrack and parse as regular expression
self.current = saved_pos;
partition_exprs.push(self.parse_expression()?);
}
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
properties.push(Expression::PartitionedByProperty(Box::new(
PartitionedByProperty {
this: Box::new(Expression::Tuple(Box::new(Tuple {
expressions: partition_exprs,
}))),
},
)));
continue;
}
// No more Hive properties
break;
}
Ok(properties)
}
fn parse_snowflake_row_access_policy_clause(&mut self) -> Option<String> {
let saved = self.current;
let _ = self.match_token(TokenType::With);
if !self.match_text_seq(&["ROW", "ACCESS", "POLICY"]) {
self.current = saved;
return None;
}
let body_start = self.current;
let mut depth = 0usize;
while !self.is_at_end() {
if depth == 0
&& (self.check(TokenType::As)
|| self.check(TokenType::Comment)
|| self.check(TokenType::Semicolon)
|| self.check_identifier("TAG")
|| self.check_identifier("OPTIONS")
|| self.check_identifier("BUILD")
|| self.check_text_seq(&["AUTO", "REFRESH"])
|| self.check_text_seq(&["COPY", "GRANTS"])
|| self.check_identifier("SECURITY")
|| self.check(TokenType::Refresh))
{
break;
}
if self.check(TokenType::LParen) {
depth += 1;
} else if self.check(TokenType::RParen) && depth > 0 {
depth -= 1;
}
self.skip();
}
let body = self.tokens_to_sql(body_start, self.current);
Some(if body.is_empty() {
"ROW ACCESS POLICY".to_string()
} else {
format!("ROW ACCESS POLICY {}", body)
})
}
/// Parse table-level properties that appear after the closing paren of column definitions.
/// Currently handles TSQL WITH(SYSTEM_VERSIONING=ON(...)).
fn parse_post_table_properties(&mut self) -> Result<Vec<Expression>> {
let mut properties = Vec::new();
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Snowflake)
) {
if let Some(clause) = self.parse_snowflake_row_access_policy_clause() {
properties.push(Expression::Raw(Raw {
sql: format!("WITH {}", clause),
}));
}
}
// Doris/StarRocks: UNIQUE KEY (cols) or DUPLICATE KEY (cols) after column definitions
// These are table key properties that define the distribution/sort key
let is_doris_starrocks = matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Doris)
| Some(crate::dialects::DialectType::StarRocks)
);
if is_doris_starrocks {
// UNIQUE KEY (c1, c2, ...) - defines unique key columns
if self.match_text_seq(&["UNIQUE", "KEY"]) {
let exprs = self.parse_composite_key_expressions()?;
properties.push(Expression::UniqueKeyProperty(Box::new(
crate::expressions::UniqueKeyProperty { expressions: exprs },
)));
}
// DUPLICATE KEY (c1, c2, ...) - defines duplicate key columns
else if self.match_text_seq(&["DUPLICATE", "KEY"]) {
let exprs = self.parse_composite_key_expressions()?;
properties.push(Expression::DuplicateKeyProperty(Box::new(
crate::expressions::DuplicateKeyProperty { expressions: exprs },
)));
}
// DISTRIBUTED BY HASH (col1, col2) [BUCKETS n] - comes after UNIQUE KEY / DUPLICATE KEY
if self.match_identifier("DISTRIBUTED") {
if let Some(dist_prop) = self.parse_distributed_property()? {
properties.push(dist_prop);
}
}
// PROPERTIES ('key'='value', ...) - comes after DISTRIBUTED BY
if self.match_identifier("PROPERTIES") {
let props = self.parse_options_list()?;
if !props.is_empty() {
properties.push(Expression::Properties(Box::new(Properties {
expressions: props,
})));
}
}
}
// Check for WITH( that might contain SYSTEM_VERSIONING
// We need to be careful not to consume a WITH that is meant for WITH properties
// or other purposes. We only handle WITH(SYSTEM_VERSIONING=...) here.
if self.check(TokenType::With) {
// Look ahead: WITH followed by ( followed by SYSTEM_VERSIONING
let saved = self.current;
if self.match_token(TokenType::With) {
if self.match_token(TokenType::LParen) {
if self.check_identifier("SYSTEM_VERSIONING") {
self.skip(); // consume SYSTEM_VERSIONING
self.expect(TokenType::Eq)?;
let on = if self.match_token(TokenType::On) {
true
} else if self.match_identifier("OFF") {
false
} else {
return Err(
self.parse_error("Expected ON or OFF after SYSTEM_VERSIONING=")
);
};
let mut history_table = None;
let mut data_consistency = None;
// Optional parameters: ON(HISTORY_TABLE=..., DATA_CONSISTENCY_CHECK=...)
if on && self.match_token(TokenType::LParen) {
loop {
if self.check(TokenType::RParen) {
break;
}
if self.match_identifier("HISTORY_TABLE") {
self.expect(TokenType::Eq)?;
// Parse table reference (could be [dbo].[table])
let table_ref = self.parse_table_ref()?;
history_table = Some(Expression::Table(Box::new(table_ref)));
} else if self.match_identifier("DATA_CONSISTENCY_CHECK") {
self.expect(TokenType::Eq)?;
let val = self.expect_identifier_or_keyword()?;
data_consistency = Some(Expression::Identifier(
crate::expressions::Identifier::new(val),
));
} else if self.check(TokenType::RParen) {
break;
} else {
self.skip();
}
self.match_token(TokenType::Comma);
}
self.expect(TokenType::RParen)?;
}
self.expect(TokenType::RParen)?; // close WITH(...)
properties.push(Expression::WithSystemVersioningProperty(Box::new(
WithSystemVersioningProperty {
on: if on {
Some(Box::new(Expression::Boolean(
crate::expressions::BooleanLiteral { value: true },
)))
} else {
None
},
this: history_table.map(Box::new),
data_consistency: data_consistency.map(Box::new),
retention_period: None,
with_: Some(Box::new(Expression::Boolean(
crate::expressions::BooleanLiteral { value: true },
))),
},
)));
} else {
// Not SYSTEM_VERSIONING, retreat
self.current = saved;
}
} else {
// Not WITH(...), retreat
self.current = saved;
}
}
}
Ok(properties)
}
/// Parse composite key expressions for UNIQUE KEY (cols) or DUPLICATE KEY (cols)
/// Returns a vector of column identifiers
fn parse_composite_key_expressions(&mut self) -> Result<Vec<Expression>> {
self.expect(TokenType::LParen)?;
let mut expressions = Vec::new();
loop {
if let Some(id) = self.parse_id_var()? {
expressions.push(id);
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
Ok(expressions)
}
/// Parse a table-level constraint
fn parse_table_constraint(&mut self) -> Result<TableConstraint> {
// Optional constraint name
let name = if self.match_token(TokenType::Constraint) {
// Use safe keyword version to accept keywords as constraint names (e.g., CONSTRAINT identity CHECK ...)
Some(self.expect_identifier_or_safe_keyword_with_quoted()?)
} else {
None
};
self.parse_constraint_definition(name)
}
/// Parse constraint definition (after optional CONSTRAINT name)
fn parse_constraint_definition(&mut self, name: Option<Identifier>) -> Result<TableConstraint> {
if self.match_keywords(&[TokenType::PrimaryKey, TokenType::Key]) {
// PRIMARY KEY [CLUSTERED|NONCLUSTERED] [name] (col1, col2) [INCLUDE (col3, col4)]
// MySQL allows: PRIMARY KEY pk_name (col1, col2)
// TSQL allows: PRIMARY KEY CLUSTERED (col1, col2)
// Check for TSQL CLUSTERED/NONCLUSTERED modifier
let clustered = if self.check_identifier("CLUSTERED") {
self.skip();
Some("CLUSTERED".to_string())
} else if self.check_identifier("NONCLUSTERED") {
self.skip();
Some("NONCLUSTERED".to_string())
} else {
None
};
let actual_name = if name.is_none() && !self.check(TokenType::LParen) {
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
// ClickHouse: PRIMARY KEY col (without parentheses)
None
} else if self.is_identifier_token() || self.check(TokenType::QuotedIdentifier) {
Some(self.expect_identifier_with_quoted()?)
} else if self.check(TokenType::String)
&& matches!(
self.config.dialect,
Some(crate::dialects::DialectType::MySQL)
)
{
// MySQL: double-quoted strings can be used as constraint names
// e.g., PRIMARY KEY "pk_name" (id) -> PRIMARY KEY `pk_name` (id)
let s = self.advance().text.clone();
Some(Identifier {
name: s,
quoted: true,
trailing_comments: Vec::new(),
span: None,
})
} else {
None
}
} else {
name.clone()
};
// ClickHouse: PRIMARY KEY col without parens — parse single column
let columns = if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && !self.check(TokenType::LParen)
&& (self.is_identifier_token() || self.is_safe_keyword_as_identifier())
{
let col_name = self.expect_identifier_or_keyword_with_quoted()?;
vec![col_name]
} else {
self.expect(TokenType::LParen)?;
// ClickHouse: allow empty PRIMARY KEY ()
let cols = if self.check(TokenType::RParen) {
Vec::new()
} else if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
// ClickHouse: PRIMARY KEY(v1, gcd(v1, v2)) - expressions allowed
let mut exprs = Vec::new();
loop {
let expr = self.parse_expression()?;
let name = self.expression_to_sql(&expr);
exprs.push(Identifier::new(name));
if !self.match_token(TokenType::Comma) {
break;
}
}
exprs
} else {
self.parse_index_identifier_list()?
};
self.expect(TokenType::RParen)?;
cols
};
// Parse optional INCLUDE (columns)
let include_columns = if self.match_identifier("INCLUDE") {
self.expect(TokenType::LParen)?;
let cols = self.parse_identifier_list()?;
self.expect(TokenType::RParen)?;
cols
} else {
Vec::new()
};
// Parse optional constraint modifiers (ENFORCED, DEFERRABLE, etc.)
let mut modifiers = self.parse_constraint_modifiers();
modifiers.clustered = clustered;
let has_constraint_keyword = name.is_some();
Ok(TableConstraint::PrimaryKey {
name: actual_name.or(name),
columns,
include_columns,
modifiers,
has_constraint_keyword,
})
} else if self.match_token(TokenType::Unique) {
// UNIQUE [CLUSTERED|NONCLUSTERED] [KEY|INDEX] [NULLS NOT DISTINCT] [name] (col1, col2) or UNIQUE column_name
// MySQL allows: UNIQUE KEY name (cols), UNIQUE INDEX name (cols), UNIQUE (cols)
// TSQL allows: UNIQUE CLUSTERED (cols)
// PostgreSQL 15+: UNIQUE NULLS NOT DISTINCT (cols)
// Check for TSQL CLUSTERED/NONCLUSTERED modifier
let clustered = if self.check_identifier("CLUSTERED") {
self.skip();
Some("CLUSTERED".to_string())
} else if self.check_identifier("NONCLUSTERED") {
self.skip();
Some("NONCLUSTERED".to_string())
} else {
None
};
let use_key_keyword =
self.match_token(TokenType::Key) || self.match_token(TokenType::Index);
// Check for NULLS NOT DISTINCT (PostgreSQL 15+ feature)
let nulls_not_distinct = self.match_text_seq(&["NULLS", "NOT", "DISTINCT"]);
// Check for optional constraint name (before columns)
let actual_name = if name.is_none()
&& self.is_identifier_token()
&& !self.check_next(TokenType::Comma)
{
// Name might be here: UNIQUE KEY idx_name (cols)
if self.check_next(TokenType::LParen) {
Some(self.expect_identifier_with_quoted()?)
} else {
None
}
} else {
name.clone()
};
if self.match_token(TokenType::LParen) {
let columns = self.parse_index_identifier_list()?;
self.expect(TokenType::RParen)?;
let mut modifiers = self.parse_constraint_modifiers();
modifiers.clustered = clustered;
if use_key_keyword {
// UNIQUE KEY/INDEX - use Index constraint type with UNIQUE kind
Ok(TableConstraint::Index {
name: actual_name.or(name),
columns,
kind: Some("UNIQUE".to_string()),
modifiers,
use_key_keyword,
expression: None,
index_type: None,
granularity: None,
})
} else {
let has_constraint_keyword = name.is_some();
Ok(TableConstraint::Unique {
name: actual_name.or(name),
columns,
columns_parenthesized: true,
modifiers,
has_constraint_keyword,
nulls_not_distinct,
})
}
} else {
// Single column unique (for ALTER TABLE ADD CONSTRAINT name UNIQUE colname)
let col_name = self.expect_identifier()?;
let mut modifiers = self.parse_constraint_modifiers();
modifiers.clustered = clustered;
let has_constraint_keyword = name.is_some();
Ok(TableConstraint::Unique {
name: actual_name.or(name),
columns: vec![Identifier::new(col_name)],
columns_parenthesized: false,
modifiers,
has_constraint_keyword,
nulls_not_distinct,
})
}
} else if self.match_keywords(&[TokenType::ForeignKey, TokenType::Key]) {
// FOREIGN KEY (col1) [REFERENCES other_table(col2)] [ON DELETE ...] [ON UPDATE ...]
self.expect(TokenType::LParen)?;
let columns = self.parse_identifier_list()?;
self.expect(TokenType::RParen)?;
if self.match_token(TokenType::References) {
let references = self.parse_foreign_key_ref()?;
let modifiers = self.parse_constraint_modifiers();
Ok(TableConstraint::ForeignKey {
name,
columns,
references: Some(references),
on_delete: None,
on_update: None,
modifiers,
})
} else {
// No REFERENCES - parse optional ON DELETE/ON UPDATE directly
let mut on_delete = None;
let mut on_update = None;
loop {
if self.check(TokenType::On) {
let saved = self.current;
self.skip(); // consume ON
if self.match_token(TokenType::Delete) {
on_delete = Some(self.parse_referential_action()?);
} else if self.match_token(TokenType::Update) {
on_update = Some(self.parse_referential_action()?);
} else {
self.current = saved;
break;
}
} else {
break;
}
}
let modifiers = self.parse_constraint_modifiers();
Ok(TableConstraint::ForeignKey {
name,
columns,
references: None,
on_delete,
on_update,
modifiers,
})
}
} else if self.match_token(TokenType::Check) {
// CHECK (expression) or CHECK (SELECT ...) or ClickHouse: CHECK expression (without parens)
let expression = if self.match_token(TokenType::LParen) {
let expr = if self.check(TokenType::Select) || self.check(TokenType::With) {
// SELECT/WITH in CHECK constraint — parse directly, no Subquery wrapper
// The generator already wraps CHECK content in parens
self.parse_statement()?
} else {
self.parse_expression()?
};
self.expect(TokenType::RParen)?;
expr
} else if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
self.parse_or()?
} else {
self.expect(TokenType::LParen)?;
unreachable!()
};
let modifiers = self.parse_constraint_modifiers();
Ok(TableConstraint::Check {
name,
expression,
modifiers,
})
} else if self.match_token(TokenType::Exclude) {
// PostgreSQL EXCLUDE constraint
// EXCLUDE [USING method] (element WITH operator, ...) [INCLUDE (cols)] [WHERE (expr)] [WITH (params)]
let using = if self.match_token(TokenType::Using) {
Some(self.expect_identifier()?)
} else {
None
};
self.expect(TokenType::LParen)?;
let mut elements = Vec::new();
loop {
// Parse element expression: may be a function call like INT4RANGE(vid, nid)
// or column name possibly with operator class, ASC/DESC, NULLS FIRST/LAST
let mut expr_parts = Vec::new();
let mut paren_depth = 0;
while !self.is_at_end() {
if self.check(TokenType::LParen) {
paren_depth += 1;
expr_parts.push(self.advance().text);
} else if self.check(TokenType::RParen) {
if paren_depth == 0 {
break;
}
paren_depth -= 1;
expr_parts.push(self.advance().text);
} else if paren_depth == 0 && self.check(TokenType::With) {
break;
} else if self.check(TokenType::String) {
// Preserve string literal quotes
let token = self.advance();
expr_parts.push(format!("'{}'", token.text));
} else {
expr_parts.push(self.advance().text);
}
}
let expression = expr_parts
.join(" ")
.replace(" (", "(")
.replace(" )", ")")
.replace("( ", "(")
.replace(" ,", ",");
// Parse WITH operator
self.expect(TokenType::With)?;
let operator = self.advance().text.clone();
elements.push(ExcludeElement {
expression,
operator,
});
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
// Parse optional INCLUDE (columns)
let include_columns = if self.match_identifier("INCLUDE") {
self.expect(TokenType::LParen)?;
let cols = self.parse_identifier_list()?;
self.expect(TokenType::RParen)?;
cols
} else {
Vec::new()
};
// Parse optional WITH (storage_parameters)
let with_params = if self.match_token(TokenType::With) {
self.expect(TokenType::LParen)?;
let mut params = Vec::new();
loop {
let key = self.expect_identifier()?;
self.expect(TokenType::Eq)?;
let val = self.advance().text.clone();
params.push((key, val));
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
params
} else {
Vec::new()
};
// Parse optional USING INDEX TABLESPACE tablespace_name
let using_index_tablespace =
if self.check(TokenType::Using) && self.check_next(TokenType::Index) {
self.skip(); // consume USING
self.skip(); // consume INDEX
if self.match_identifier("TABLESPACE") {
Some(self.expect_identifier()?)
} else {
None
}
} else {
None
};
// Parse optional WHERE clause
let where_clause = if self.match_token(TokenType::Where) {
self.expect(TokenType::LParen)?;
let expr = self.parse_expression()?;
self.expect(TokenType::RParen)?;
Some(Box::new(expr))
} else {
None
};
let modifiers = self.parse_constraint_modifiers();
Ok(TableConstraint::Exclude {
name,
using,
elements,
include_columns,
where_clause,
with_params,
using_index_tablespace,
modifiers,
})
} else if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check_identifier("ASSUME")
{
// ClickHouse: CONSTRAINT name ASSUME expression
// Used for query optimization assumptions
self.skip(); // consume ASSUME
let expression = if self.match_token(TokenType::LParen) {
// ASSUME (expr) or ASSUME (SELECT ...)
let expr = if self.check(TokenType::Select) || self.check(TokenType::With) {
self.parse_statement()?
} else {
self.parse_expression()?
};
self.expect(TokenType::RParen)?;
expr
} else {
self.parse_expression()?
};
Ok(TableConstraint::Assume { name, expression })
} else if self.match_token(TokenType::Default) {
// TSQL: CONSTRAINT name DEFAULT value FOR column
let expression = self.parse_expression()?;
self.expect(TokenType::For)?;
let column = self.expect_identifier_with_quoted()?;
Ok(TableConstraint::Default {
name,
expression,
column,
})
} else {
Err(self.parse_error("Expected PRIMARY KEY, UNIQUE, FOREIGN KEY, CHECK, or EXCLUDE"))
}
}
/// Parse INDEX/KEY table constraint for MySQL
/// Syntax: [FULLTEXT|SPATIAL] {INDEX|KEY} [name] [USING {BTREE|HASH}] (columns)
/// or: [FULLTEXT|SPATIAL] {INDEX|KEY} [USING {BTREE|HASH}] (columns) -- no name
fn parse_index_table_constraint(&mut self) -> Result<TableConstraint> {
// Check for FULLTEXT or SPATIAL prefix
let kind = if self.match_identifier("FULLTEXT") {
Some("FULLTEXT".to_string())
} else if self.match_identifier("SPATIAL") {
Some("SPATIAL".to_string())
} else {
None
};
// Consume INDEX or KEY keyword, track which was used
let use_key_keyword = if self.match_token(TokenType::Key) {
true
} else {
self.match_token(TokenType::Index);
false
};
// Check for USING before index name (MySQL allows: INDEX USING BTREE (col))
let early_using = if self.check(TokenType::Using) {
self.match_token(TokenType::Using);
if self.match_identifier("BTREE") {
Some("BTREE".to_string())
} else if self.match_identifier("HASH") {
Some("HASH".to_string())
} else {
None
}
} else {
None
};
// Optional index name (only if next token is not LParen or Using)
let name = if !self.check(TokenType::LParen)
&& !self.check(TokenType::Using)
&& self.is_identifier_token()
{
Some(Identifier::new(self.advance().text))
} else {
None
};
// Check for USING after index name (if not already parsed)
let late_using = if early_using.is_none() && self.match_token(TokenType::Using) {
if self.match_identifier("BTREE") {
Some("BTREE".to_string())
} else if self.match_identifier("HASH") {
Some("HASH".to_string())
} else {
None
}
} else {
None
};
// Parse columns (with optional prefix length and DESC)
self.expect(TokenType::LParen)?;
let columns = self.parse_index_identifier_list()?;
self.expect(TokenType::RParen)?;
// Parse optional constraint modifiers (USING after columns, COMMENT, etc.)
let mut modifiers = self.parse_constraint_modifiers();
// Set the using value from wherever we found it
// Both early_using (before name) and late_using (after name, before columns) mean USING is before columns
if early_using.is_some() {
modifiers.using = early_using;
modifiers.using_before_columns = true;
} else if late_using.is_some() {
modifiers.using = late_using;
modifiers.using_before_columns = true; // USING was after name but before columns
}
// If using was found in parse_constraint_modifiers (after columns), using_before_columns stays false
Ok(TableConstraint::Index {
name,
columns,
kind,
modifiers,
use_key_keyword,
expression: None,
index_type: None,
granularity: None,
})
}
/// Parse constraint modifiers like ENFORCED, DEFERRABLE, NORELY, USING, etc.
fn parse_constraint_modifiers(&mut self) -> ConstraintModifiers {
let mut modifiers = ConstraintModifiers::default();
loop {
if self.match_token(TokenType::Not) {
// NOT ENFORCED, NOT DEFERRABLE, NOT VALID
if self.match_identifier("ENFORCED") {
modifiers.enforced = Some(false);
} else if self.match_identifier("DEFERRABLE") {
modifiers.deferrable = Some(false);
} else if self.match_identifier("VALID") {
modifiers.not_valid = true;
}
} else if self.match_identifier("ENFORCED") {
modifiers.enforced = Some(true);
} else if self.match_identifier("DEFERRABLE") {
modifiers.deferrable = Some(true);
} else if self.match_identifier("INITIALLY") {
// INITIALLY DEFERRED or INITIALLY IMMEDIATE
if self.match_identifier("DEFERRED") {
modifiers.initially_deferred = Some(true);
} else if self.match_identifier("IMMEDIATE") {
modifiers.initially_deferred = Some(false);
}
} else if self.match_identifier("NORELY") {
modifiers.norely = true;
} else if self.match_identifier("RELY") {
modifiers.rely = true;
} else if self.match_token(TokenType::Using) {
// USING BTREE or USING HASH (MySQL)
if self.match_identifier("BTREE") {
modifiers.using = Some("BTREE".to_string());
} else if self.match_identifier("HASH") {
modifiers.using = Some("HASH".to_string());
}
} else if self.match_token(TokenType::Comment) {
// MySQL index COMMENT 'text'
if self.check(TokenType::String) {
modifiers.comment = Some(self.advance().text);
}
} else if self.match_identifier("VISIBLE") {
modifiers.visible = Some(true);
} else if self.match_identifier("INVISIBLE") {
modifiers.visible = Some(false);
} else if self.match_identifier("ENGINE_ATTRIBUTE") {
// MySQL ENGINE_ATTRIBUTE = 'value'
self.match_token(TokenType::Eq);
if self.check(TokenType::String) {
modifiers.engine_attribute = Some(self.advance().text);
}
} else if self.check(TokenType::With) {
let saved_with = self.current;
self.skip(); // consume WITH
if self.match_identifier("PARSER") {
// MySQL WITH PARSER name
if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
modifiers.with_parser = Some(self.advance().text);
}
} else if self.check(TokenType::LParen) {
// TSQL: WITH (PAD_INDEX=ON, STATISTICS_NORECOMPUTE=OFF, ...)
// Parse and store the options
self.skip(); // consume (
loop {
if self.check(TokenType::RParen) || self.is_at_end() {
break;
}
// Parse KEY=VALUE pair
let key = self.advance().text.clone();
if self.match_token(TokenType::Eq) {
let value = self.advance().text.clone();
modifiers.with_options.push((key, value));
}
if !self.match_token(TokenType::Comma) {
break;
}
}
let _ = self.match_token(TokenType::RParen);
} else {
// Not WITH PARSER or WITH (...), backtrack
self.current = saved_with;
break;
}
} else if self.check(TokenType::On) {
let saved_on = self.current;
self.skip(); // consume ON
if self.match_identifier("CONFLICT") {
// SQLite ON CONFLICT action: ROLLBACK, ABORT, FAIL, IGNORE, REPLACE
if self.match_token(TokenType::Rollback) {
modifiers.on_conflict = Some("ROLLBACK".to_string());
} else if self.match_identifier("ABORT") {
modifiers.on_conflict = Some("ABORT".to_string());
} else if self.match_identifier("FAIL") {
modifiers.on_conflict = Some("FAIL".to_string());
} else if self.match_token(TokenType::Ignore) {
modifiers.on_conflict = Some("IGNORE".to_string());
} else if self.match_token(TokenType::Replace) {
modifiers.on_conflict = Some("REPLACE".to_string());
}
} else if self.is_identifier_token() || self.check(TokenType::QuotedIdentifier) {
// TSQL: ON [filegroup] - parse and store
let quoted = self.check(TokenType::QuotedIdentifier);
let name = self.advance().text.clone();
modifiers.on_filegroup = Some(Identifier {
name,
quoted,
trailing_comments: Vec::new(),
span: None,
});
} else {
// Unknown ON clause, backtrack
self.current = saved_on;
break;
}
} else {
break;
}
}
modifiers
}
/// Parse foreign key reference
fn parse_foreign_key_ref(&mut self) -> Result<ForeignKeyRef> {
let table = self.parse_table_ref()?;
let columns = if self.match_token(TokenType::LParen) {
let cols = self.parse_identifier_list()?;
self.expect(TokenType::RParen)?;
cols
} else {
Vec::new()
};
// Handle optional MATCH clause (MATCH FULL, MATCH PARTIAL, MATCH SIMPLE)
// MATCH clause comes BEFORE ON DELETE/ON UPDATE in PostgreSQL
let match_type = if self.match_token(TokenType::Match) {
if self.check(TokenType::Full) {
self.skip();
Some(MatchType::Full)
} else if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
let text = self.advance().text.to_ascii_uppercase();
match text.as_str() {
"PARTIAL" => Some(MatchType::Partial),
"SIMPLE" => Some(MatchType::Simple),
_ => None,
}
} else {
None
}
} else {
None
};
// ON DELETE and ON UPDATE can appear in either order
let mut on_delete = None;
let mut on_update = None;
let mut on_update_first = false;
let mut first_clause = true;
// Try parsing up to 2 ON clauses
for _ in 0..2 {
if on_delete.is_none() && self.match_keywords(&[TokenType::On, TokenType::Delete]) {
on_delete = Some(self.parse_referential_action()?);
} else if on_update.is_none()
&& self.match_keywords(&[TokenType::On, TokenType::Update])
{
if first_clause {
on_update_first = true;
}
on_update = Some(self.parse_referential_action()?);
} else {
break;
}
first_clause = false;
}
// MATCH clause can also appear after ON DELETE/ON UPDATE
let mut match_after_actions = false;
let match_type = if match_type.is_none() && self.match_token(TokenType::Match) {
match_after_actions = on_delete.is_some() || on_update.is_some();
if self.check(TokenType::Full) {
self.skip();
Some(MatchType::Full)
} else if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
let text = self.advance().text.to_ascii_uppercase();
match text.as_str() {
"PARTIAL" => Some(MatchType::Partial),
"SIMPLE" => Some(MatchType::Simple),
_ => None,
}
} else {
None
}
} else {
match_type
};
// Handle optional DEFERRABLE / NOT DEFERRABLE
let deferrable = if self.match_identifier("DEFERRABLE") {
Some(true)
} else if self.match_token(TokenType::Not) && self.match_identifier("DEFERRABLE") {
Some(false)
} else {
None
};
Ok(ForeignKeyRef {
table,
columns,
on_delete,
on_update,
on_update_first,
match_type,
match_after_actions,
constraint_name: None, // Will be set by caller if CONSTRAINT was used
deferrable,
has_foreign_key_keywords: false, // Will be set by caller if FOREIGN KEY preceded REFERENCES
})
}
/// Parse referential action (CASCADE, SET NULL, etc.)
fn parse_referential_action(&mut self) -> Result<ReferentialAction> {
if self.match_token(TokenType::Cascade) {
Ok(ReferentialAction::Cascade)
} else if self.match_keywords(&[TokenType::Set, TokenType::Null]) {
Ok(ReferentialAction::SetNull)
} else if self.match_keywords(&[TokenType::Set, TokenType::Default]) {
Ok(ReferentialAction::SetDefault)
} else if self.match_token(TokenType::Restrict) {
Ok(ReferentialAction::Restrict)
} else if self.match_token(TokenType::No) {
// NO ACTION - NO is a token, ACTION is an identifier
if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("ACTION") {
self.skip();
}
Ok(ReferentialAction::NoAction)
} else {
Err(self.parse_error("Expected CASCADE, SET NULL, SET DEFAULT, RESTRICT, or NO ACTION"))
}
}
/// Parse Snowflake TAG clause: TAG (key='value', key2='value2')
fn parse_tags(&mut self) -> Result<Tags> {
self.expect(TokenType::LParen)?;
let mut expressions = Vec::new();
loop {
// Parse key = 'value' as a Property expression
let key = self.expect_identifier_or_keyword()?;
self.expect(TokenType::Eq)?;
let value = self.parse_primary()?;
// Create a Property expression: key = value
expressions.push(Expression::Property(Box::new(Property {
this: Box::new(Expression::Identifier(Identifier::new(key))),
value: Some(Box::new(value)),
})));
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
Ok(Tags { expressions })
}
/// Parse CREATE VIEW
fn parse_create_view(
&mut self,
or_replace: bool,
or_alter: bool,
materialized: bool,
temporary: bool,
algorithm: Option<String>,
definer: Option<String>,
security: Option<FunctionSecurity>,
secure: bool,
) -> Result<Expression> {
self.expect(TokenType::View)?;
// Handle IF NOT EXISTS
let if_not_exists =
self.match_keywords(&[TokenType::If, TokenType::Not, TokenType::Exists]);
let name = self.parse_table_ref()?;
// ClickHouse: UUID 'xxx' clause after view name
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check_identifier("UUID")
{
self.skip(); // consume UUID
let _ = self.advance(); // consume UUID string value
}
// ClickHouse: ON CLUSTER clause (after view name)
let on_cluster = self.parse_on_cluster_clause()?;
// ClickHouse: TO destination_table clause
let to_table = if self.match_token(TokenType::To) {
Some(self.parse_table_ref()?)
} else {
None
};
// Snowflake: COPY GRANTS (before column list)
let copy_grants = self.match_text_seq(&["COPY", "GRANTS"]);
let mut row_access_policy = self.parse_snowflake_row_access_policy_clause();
// For materialized views, column definitions can include data types: (c1 INT, c2 INT)
// This applies to Doris, ClickHouse, and potentially other dialects
// We need to parse this as a schema instead of simple column names
// Track if we parsed a schema (with types) vs simple columns
let mut schema: Option<Schema> = None;
let mut unique_key: Option<UniqueKeyProperty> = None;
// Optional column list with optional COMMENT and OPTIONS per column
let columns = if self.check(TokenType::LParen) {
// For materialized views or ClickHouse views, try to parse as schema with typed columns
if materialized
|| matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
)
{
// Save position to backtrack if needed
let saved_pos = self.current;
// Try to parse as schema (with typed columns)
if let Some(Expression::Schema(parsed_schema)) = self.parse_schema()? {
schema = Some(*parsed_schema);
// Doris: KEY (columns) after schema
if self.match_text_seq(&["KEY"]) {
let exprs = self.parse_composite_key_expressions()?;
unique_key = Some(UniqueKeyProperty { expressions: exprs });
}
Vec::new() // Use schema instead of columns
} else {
// Backtrack and parse as simple columns
self.current = saved_pos;
self.parse_view_columns()?
}
} else {
self.parse_view_columns()?
}
} else {
Vec::new()
};
// Snowflake: COPY GRANTS can also appear after column list
let copy_grants = copy_grants || self.match_text_seq(&["COPY", "GRANTS"]);
if row_access_policy.is_none() {
row_access_policy = self.parse_snowflake_row_access_policy_clause();
}
// Presto/Trino/StarRocks: SECURITY DEFINER/INVOKER/NONE (after view name, before AS)
// MySQL also allows SQL SECURITY DEFINER/INVOKER after the view name
// This differs from MySQL's SQL SECURITY which can also come before VIEW keyword
let (security, security_sql_style, security_after_name) = if security.is_some() {
// MySQL-style SQL SECURITY was parsed before VIEW keyword
(security, true, false)
} else if self.check_identifier("SQL")
&& self.current + 1 < self.tokens.len()
&& self.tokens[self.current + 1]
.text
.eq_ignore_ascii_case("SECURITY")
{
// SQL SECURITY after view name
self.skip(); // consume SQL
self.skip(); // consume SECURITY
let sec = if self.match_identifier("DEFINER") {
Some(FunctionSecurity::Definer)
} else if self.match_identifier("INVOKER") {
Some(FunctionSecurity::Invoker)
} else if self.match_identifier("NONE") {
Some(FunctionSecurity::None)
} else {
None
};
(sec, true, true)
} else if self.match_identifier("SECURITY") {
// Presto-style SECURITY after view name
let sec = if self.match_identifier("DEFINER") {
Some(FunctionSecurity::Definer)
} else if self.match_identifier("INVOKER") {
Some(FunctionSecurity::Invoker)
} else if self.match_identifier("NONE") {
Some(FunctionSecurity::None)
} else {
None
};
(sec, false, false)
} else {
(None, true, false)
};
// Snowflake: COMMENT = 'text'
let view_comment = if self.match_token(TokenType::Comment) {
// Match = or skip if not present (some dialects use COMMENT='text')
let _ = self.match_token(TokenType::Eq);
Some(self.expect_string()?)
} else {
None
};
// Snowflake: TAG (name='value', ...)
let tags = if self.match_identifier("TAG") {
let mut tag_list = Vec::new();
if self.match_token(TokenType::LParen) {
loop {
let tag_name = self.expect_identifier()?;
let tag_value = if self.match_token(TokenType::Eq) {
self.expect_string()?
} else {
String::new()
};
tag_list.push((tag_name, tag_value));
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
}
tag_list
} else {
Vec::new()
};
// BigQuery: OPTIONS (key=value, ...)
let options = if self.match_identifier("OPTIONS") {
self.parse_options_list()?
} else {
Vec::new()
};
// Doris: BUILD IMMEDIATE/DEFERRED for materialized views
let build = if self.match_identifier("BUILD") {
if self.match_identifier("IMMEDIATE") {
Some("IMMEDIATE".to_string())
} else if self.match_identifier("DEFERRED") {
Some("DEFERRED".to_string())
} else {
// Unexpected token after BUILD - try to consume it
let value = self.expect_identifier_or_keyword()?;
Some(value.to_ascii_uppercase())
}
} else {
None
};
// Doris: REFRESH COMPLETE/AUTO ON MANUAL/COMMIT/SCHEDULE [EVERY n UNIT] [STARTS 'datetime']
// ClickHouse: REFRESH AFTER interval / REFRESH EVERY interval [OFFSET interval] [RANDOMIZE FOR interval] [APPEND]
let refresh = if self.match_token(TokenType::Refresh) {
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
// ClickHouse REFRESH syntax: consume tokens until AS/POPULATE/TO/ENGINE or end
while !self.is_at_end()
&& !self.check(TokenType::As)
&& !self.check_identifier("POPULATE")
&& !self.check_identifier("TO")
&& !self.check_identifier("APPEND")
&& !self.check_identifier("ENGINE")
&& !self.check(TokenType::Semicolon)
{
self.skip();
}
// Consume APPEND if present (REFRESH ... APPEND TO target)
let _ = self.match_identifier("APPEND");
None
} else {
Some(Box::new(self.parse_refresh_trigger_property()?))
}
} else {
None
};
// ClickHouse: TO destination_table after REFRESH ... APPEND
// e.g., CREATE MATERIALIZED VIEW v REFRESH AFTER 1 SECOND APPEND TO tab (cols) EMPTY AS ...
let to_table = if to_table.is_none() && self.match_token(TokenType::To) {
Some(self.parse_table_ref()?)
} else {
to_table
};
// ClickHouse: column definitions after REFRESH ... APPEND TO tab (cols)
if schema.is_none()
&& self.check(TokenType::LParen)
&& matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
)
{
let saved_pos = self.current;
if let Some(Expression::Schema(parsed_schema)) = self.parse_schema()? {
schema = Some(*parsed_schema);
} else {
self.current = saved_pos;
}
}
// Redshift: AUTO REFRESH YES|NO for materialized views
let auto_refresh = if self.match_text_seq(&["AUTO", "REFRESH"]) {
if self.match_identifier("YES") {
Some(true)
} else if self.match_identifier("NO") {
Some(false)
} else {
None
}
} else {
None
};
// ClickHouse: Parse table properties (ENGINE, ORDER BY, SAMPLE, SETTINGS, TTL, etc.)
// These appear after column definitions but before AS clause for materialized views
let mut table_properties = Vec::new();
if materialized
&& matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
)
{
self.parse_clickhouse_table_properties(&mut table_properties)?;
}
// ClickHouse: POPULATE / EMPTY keywords before AS in materialized views
if materialized
&& matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
)
{
let _ = self.match_identifier("POPULATE");
let _ = self.match_identifier("EMPTY");
}
// AS is optional - some dialects (e.g., Presto) allow SELECT without AS
let has_as = self.match_token(TokenType::As);
if !has_as && !self.check(TokenType::Select) && !self.check(TokenType::With) {
// No AS and no SELECT/WITH means no query - return empty view (for partial statements)
return Ok(Expression::CreateView(Box::new(CreateView {
name,
columns,
query: Expression::Null(Null), // Placeholder for incomplete VIEW
or_replace,
or_alter,
if_not_exists,
materialized,
temporary,
secure,
algorithm,
definer,
security,
security_sql_style,
security_after_name,
query_parenthesized: false,
locking_mode: None,
locking_access: None,
copy_grants,
comment: view_comment,
row_access_policy,
tags,
options,
build,
refresh,
schema: schema.map(Box::new),
unique_key: unique_key.map(Box::new),
no_schema_binding: false,
auto_refresh,
on_cluster,
to_table,
table_properties,
})));
}
// Parse Teradata LOCKING clause: LOCKING ROW|TABLE|DATABASE FOR ACCESS|READ|WRITE
let mut locking_mode: Option<String> = None;
let mut locking_access: Option<String> = None;
if self.match_token(TokenType::Lock) || self.match_identifier("LOCKING") {
// Capture: ROW, TABLE, DATABASE, etc.
if self.match_token(TokenType::Row) {
locking_mode = Some("ROW".to_string());
} else if self.match_token(TokenType::Table) {
locking_mode = Some("TABLE".to_string());
} else if self.match_token(TokenType::Database) || self.match_identifier("DATABASE") {
locking_mode = Some("DATABASE".to_string());
}
// Capture FOR ACCESS|READ|WRITE
if self.match_token(TokenType::For) {
if self.match_identifier("ACCESS") {
locking_access = Some("ACCESS".to_string());
} else if self.match_identifier("READ") {
locking_access = Some("READ".to_string());
} else if self.match_identifier("WRITE") {
locking_access = Some("WRITE".to_string());
}
}
}
// Use parse_statement to handle SELECT, WITH...SELECT, or (SELECT...)
let query_parenthesized = self.check(TokenType::LParen);
let query = if self.check(TokenType::With) {
self.parse_statement()?
} else if query_parenthesized {
// Handle (SELECT ...) or (WITH ... SELECT ...) - parenthesized query
self.skip(); // consume (
let inner = if self.check(TokenType::With) {
self.parse_statement()?
} else {
self.parse_select()?
};
self.expect(TokenType::RParen)?;
inner
} else {
self.parse_select()?
};
// Redshift: WITH NO SCHEMA BINDING (after the query)
let no_schema_binding = self.match_text_seq(&["WITH", "NO", "SCHEMA", "BINDING"]);
Ok(Expression::CreateView(Box::new(CreateView {
name,
columns,
query,
or_replace,
or_alter,
if_not_exists,
materialized,
temporary,
secure,
algorithm,
definer,
security,
security_sql_style,
security_after_name,
query_parenthesized,
locking_mode,
locking_access,
copy_grants,
comment: view_comment,
row_access_policy,
tags,
options,
build,
refresh,
schema: schema.map(Box::new),
unique_key: unique_key.map(Box::new),
no_schema_binding,
auto_refresh,
on_cluster,
to_table,
table_properties,
})))
}
/// Parse view column list: (col1, col2 OPTIONS(...) COMMENT 'text', ...)
/// For simple view definitions without data types
fn parse_view_columns(&mut self) -> Result<Vec<ViewColumn>> {
self.expect(TokenType::LParen)?;
let mut cols = Vec::new();
loop {
let col_name = self.expect_identifier()?;
// BigQuery: OPTIONS (key=value, ...) on view column
let options = if self.match_identifier("OPTIONS") {
self.parse_options_list()?
} else {
Vec::new()
};
// Optional COMMENT 'text'
let comment = if self.match_token(TokenType::Comment) {
Some(self.expect_string()?)
} else {
None
};
cols.push(ViewColumn {
name: Identifier::new(col_name),
comment,
options,
});
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
Ok(cols)
}
/// Parse CREATE [CLUSTERED|NONCLUSTERED] INDEX
fn parse_create_index_with_clustered(
&mut self,
unique: bool,
clustered: Option<String>,
) -> Result<Expression> {
self.expect(TokenType::Index)?;
// PostgreSQL: CREATE INDEX CONCURRENTLY idx ON t(c)
let concurrently = self.match_identifier("CONCURRENTLY");
// Handle IF NOT EXISTS
let if_not_exists =
self.match_keywords(&[TokenType::If, TokenType::Not, TokenType::Exists]);
// Index name is optional when IF NOT EXISTS is specified (PostgreSQL)
let name = if if_not_exists && self.check(TokenType::On) {
Identifier::new("") // Empty name when omitted
} else {
self.expect_identifier_with_quoted()?
};
self.expect(TokenType::On)?;
let table = self.parse_table_ref()?;
// Optional USING clause
let using = if self.match_token(TokenType::Using) {
Some(self.expect_identifier()?)
} else {
None
};
// Parse index columns (optional for COLUMNSTORE indexes)
let columns = if self.match_token(TokenType::LParen) {
let cols = self.parse_index_columns()?;
self.expect(TokenType::RParen)?;
cols
} else if clustered
.as_ref()
.is_some_and(|c| c.contains("COLUMNSTORE"))
{
// COLUMNSTORE indexes don't require a column list
Vec::new()
} else if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
// ClickHouse: CREATE INDEX idx ON table expr TYPE minmax GRANULARITY 1
// No parentheses around the expression — consume to semicolon as Command
let mut parts = vec![
"CREATE".to_string(),
if unique {
"UNIQUE INDEX".to_string()
} else {
"INDEX".to_string()
},
name.name.clone(),
"ON".to_string(),
];
// Rebuild table name
if let Some(ref s) = table.schema {
parts.push(format!("{}.{}", s.name, table.name.name));
} else {
parts.push(table.name.name.clone());
}
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
let token = self.advance();
if token.token_type == TokenType::String {
parts.push(format!("'{}'", token.text));
} else if token.token_type == TokenType::QuotedIdentifier {
parts.push(format!("\"{}\"", token.text));
} else {
parts.push(token.text.clone());
}
}
return Ok(Expression::Command(Box::new(crate::expressions::Command {
this: parts.join(" "),
})));
} else {
self.expect(TokenType::LParen)?;
let cols = self.parse_index_columns()?;
self.expect(TokenType::RParen)?;
cols
};
// PostgreSQL: INCLUDE (col1, col2) clause
let include_columns = if self.match_identifier("INCLUDE") {
self.expect(TokenType::LParen)?;
let mut cols = Vec::new();
loop {
cols.push(self.expect_identifier_with_quoted()?);
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
cols
} else {
Vec::new()
};
// TSQL: WITH (option=value, ...) clause for index options
let with_options = if self.check(TokenType::With) {
// parse_with_properties expects the WITH keyword to NOT be consumed
// but we need to check if we have WITH followed by LParen
if self
.peek_nth(1)
.is_some_and(|t| t.token_type == TokenType::LParen)
{
self.skip(); // consume WITH
self.parse_with_properties()?
} else {
Vec::new()
}
} else {
Vec::new()
};
// PostgreSQL: WHERE clause for partial indexes
let where_clause = if self.match_token(TokenType::Where) {
Some(Box::new(self.parse_expression()?))
} else {
None
};
// TSQL: ON filegroup or partition scheme clause
// e.g., ON PRIMARY, ON X([y])
let on_filegroup = if self.match_token(TokenType::On) {
// Get the filegroup/partition scheme name
let token = self.advance();
let mut filegroup = token.text.clone();
// Check for partition scheme with column: ON partition_scheme(column)
if self.match_token(TokenType::LParen) {
filegroup.push('(');
// Parse the partition column(s)
loop {
let col_token = self.advance();
// For TSQL, use bracket quoting for quoted identifiers
if col_token.token_type == TokenType::QuotedIdentifier {
filegroup.push('[');
filegroup.push_str(&col_token.text);
filegroup.push(']');
} else {
filegroup.push_str(&col_token.text);
}
if !self.match_token(TokenType::Comma) {
break;
}
filegroup.push_str(", ");
}
self.expect(TokenType::RParen)?;
filegroup.push(')');
}
Some(filegroup)
} else {
None
};
Ok(Expression::CreateIndex(Box::new(CreateIndex {
name,
table,
columns,
unique,
if_not_exists,
using,
clustered,
concurrently,
where_clause,
include_columns,
with_options,
on_filegroup,
})))
}
/// Parse index columns - can be identifiers or expressions (like function calls)
fn parse_index_columns(&mut self) -> Result<Vec<IndexColumn>> {
let mut columns = Vec::new();
loop {
// Parse as expression to handle function calls like BOX(location, location)
let expr = self.parse_expression()?;
// Extract column name from expression
let column = match &expr {
Expression::Identifier(ident) => ident.clone(),
Expression::Column(col) => {
// For column expressions (e.g., simple identifier like [Col]),
// extract the identifier directly to preserve quoting
col.name.clone()
}
Expression::Function(_func) => {
// For function expressions, create an identifier from the function call
Identifier::new(self.expression_to_sql(&expr))
}
_ => Identifier::new(self.expression_to_sql(&expr)),
};
// Parse optional PostgreSQL operator class (e.g., varchar_pattern_ops, public.gin_trgm_ops)
// An opclass is an identifier that appears before ASC/DESC/NULLS and is not a keyword
let opclass = if self.is_identifier_token()
&& !self.check(TokenType::Asc)
&& !self.check(TokenType::Desc)
&& !self.check(TokenType::Nulls)
{
let mut opclass_name = self.advance().text;
// Handle qualified opclass names like public.gin_trgm_ops
while self.match_token(TokenType::Dot) {
opclass_name.push('.');
if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
opclass_name.push_str(&self.advance().text);
}
}
Some(opclass_name)
} else {
None
};
let desc = self.match_token(TokenType::Desc);
let asc = if !desc {
self.match_token(TokenType::Asc)
} else {
false
};
let nulls_first = if self.match_token(TokenType::Nulls) {
if self.match_token(TokenType::First) {
Some(true)
} else if self.match_token(TokenType::Last) {
Some(false)
} else {
None
}
} else {
None
};
columns.push(IndexColumn {
column,
desc,
asc,
nulls_first,
opclass,
});
if !self.match_token(TokenType::Comma) {
break;
}
}
Ok(columns)
}
/// Convert an expression to its SQL string representation (simple version for index expressions)
fn expression_to_sql(&self, expr: &Expression) -> String {
match expr {
Expression::Identifier(ident) => ident.name.clone(),
Expression::Function(func) => {
let args = func
.args
.iter()
.map(|a| self.expression_to_sql(a))
.collect::<Vec<_>>()
.join(", ");
format!("{}({})", func.name, args)
}
Expression::Column(col) => {
if let Some(ref table) = col.table {
format!("{}.{}", table, col.name)
} else {
col.name.to_string()
}
}
Expression::Literal(lit) => match lit.as_ref() {
Literal::String(s) => format!("'{}'", s),
Literal::Number(n) => n.clone(),
_ => "?".to_string(),
},
Expression::Null(_) => "NULL".to_string(),
Expression::Boolean(b) => {
if b.value {
"TRUE".to_string()
} else {
"FALSE".to_string()
}
}
_ => "?".to_string(),
}
}
/// Parse DROP statement
fn parse_drop(&mut self) -> Result<Expression> {
// Capture leading comments from the DROP token (e.g., "-- comment\nDROP TABLE ...")
let leading_comments = self.current_leading_comments().to_vec();
self.expect(TokenType::Drop)?;
// ClickHouse: DROP TEMPORARY TABLE / DROP TEMPORARY VIEW
if self.check(TokenType::Temporary)
&& matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
)
{
self.skip(); // consume TEMPORARY
if self.check(TokenType::View) {
return self.parse_drop_view(false);
}
return self.parse_drop_table_with_iceberg(leading_comments.clone(), false);
}
// Snowflake: DROP ICEBERG TABLE
if self.check_identifier("ICEBERG")
&& self.current + 1 < self.tokens.len()
&& self.tokens[self.current + 1].token_type == TokenType::Table
{
self.skip(); // consume ICEBERG
return self.parse_drop_table_with_iceberg(leading_comments, true);
}
match self.peek().token_type {
TokenType::Table => self.parse_drop_table_with_iceberg(leading_comments, false),
TokenType::View => self.parse_drop_view(false),
TokenType::Materialized => {
self.skip(); // consume MATERIALIZED
self.parse_drop_view(true)
}
TokenType::Index => self.parse_drop_index(),
TokenType::Schema => self.parse_drop_schema(),
TokenType::Database => self.parse_drop_database(),
TokenType::Function => self.parse_drop_function(),
TokenType::Procedure => self.parse_drop_procedure(),
TokenType::Sequence => self.parse_drop_sequence(),
TokenType::Trigger => self.parse_drop_trigger(),
TokenType::Type => self.parse_drop_type(),
TokenType::Domain => {
// DROP DOMAIN is similar to DROP TYPE
self.skip();
let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
let name = self.parse_table_ref()?;
let cascade = self.match_token(TokenType::Cascade);
if !cascade {
self.match_token(TokenType::Restrict);
}
Ok(Expression::DropType(Box::new(DropType {
name,
if_exists,
cascade,
})))
}
TokenType::Namespace => {
// DROP NAMESPACE is similar to DROP SCHEMA (Spark/Databricks)
self.skip();
let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
// Parse potentially qualified namespace name (a.b.c)
let mut name_parts = vec![self.expect_identifier()?];
while self.match_token(TokenType::Dot) {
name_parts.push(self.expect_identifier()?);
}
let name = Identifier::new(name_parts.join("."));
let cascade = self.match_token(TokenType::Cascade);
if !cascade {
self.match_token(TokenType::Restrict);
}
Ok(Expression::DropNamespace(Box::new(DropNamespace {
name,
if_exists,
cascade,
})))
}
_ => {
// ClickHouse: DROP DICTIONARY, DROP USER, DROP QUOTA, DROP ROLE,
// DROP ROW POLICY, DROP SETTINGS PROFILE, DROP NAMED COLLECTION
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
let text_upper = self.peek().text.to_ascii_uppercase();
if matches!(
text_upper.as_str(),
"DICTIONARY"
| "USER"
| "QUOTA"
| "ROLE"
| "ROW"
| "POLICY"
| "NAMED"
| "WORKLOAD"
| "RESOURCE"
| "PROFILE"
) || self.check(TokenType::Settings)
|| self.check(TokenType::Partition)
{
self.skip(); // consume keyword, previous() is now set
let mut tokens: Vec<(String, TokenType)> = vec![
("DROP".to_string(), TokenType::Var),
(
self.previous().text.to_ascii_uppercase(),
self.previous().token_type,
),
];
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
let token = self.advance();
let text = if token.token_type == TokenType::QuotedIdentifier {
format!("\"{}\"", token.text)
} else if token.token_type == TokenType::String {
format!("'{}'", token.text)
} else {
token.text.clone()
};
tokens.push((text, token.token_type));
}
return Ok(Expression::Command(Box::new(Command {
this: self.join_command_tokens(tokens),
})));
}
}
// Snowflake: DROP STREAM, DROP TASK, DROP STAGE, DROP WAREHOUSE, etc.
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Snowflake)
) {
let text_upper = self.peek().text.to_ascii_uppercase();
let is_snowflake_drop = matches!(
text_upper.as_str(),
"STREAM"
| "TASK"
| "STAGE"
| "WAREHOUSE"
| "PIPE"
| "INTEGRATION"
| "TAG"
| "NETWORK"
| "SHARE"
) || (text_upper == "FILE"
&& self.current + 1 < self.tokens.len()
&& self.tokens[self.current + 1]
.text
.eq_ignore_ascii_case("FORMAT"));
if is_snowflake_drop {
self.skip(); // consume the object type keyword
let mut tokens: Vec<(String, TokenType)> = vec![
("DROP".to_string(), TokenType::Var),
(
self.previous().text.to_ascii_uppercase(),
self.previous().token_type,
),
];
// For FILE FORMAT, also consume FORMAT
if text_upper == "FILE" {
let fmt = self.advance();
tokens.push((fmt.text.to_ascii_uppercase(), fmt.token_type));
}
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
let token = self.advance();
let text = if token.token_type == TokenType::QuotedIdentifier {
format!("\"{}\"", token.text)
} else if token.token_type == TokenType::String {
format!("'{}'", token.text)
} else {
token.text.clone()
};
tokens.push((text, token.token_type));
}
return Ok(Expression::Command(Box::new(Command {
this: self.join_command_tokens(tokens),
})));
}
}
Err(self.parse_error(format!(
"Expected TABLE, VIEW, INDEX, SCHEMA, DATABASE, FUNCTION, PROCEDURE, SEQUENCE, TRIGGER, TYPE, or NAMESPACE after DROP, got {:?}",
self.peek().token_type
)))
}
}
}
/// Parse DROP TABLE
fn parse_drop_table_with_iceberg(
&mut self,
leading_comments: Vec<String>,
iceberg: bool,
) -> Result<Expression> {
self.expect(TokenType::Table)?;
let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
// ClickHouse: IF EMPTY
if !if_exists
&& matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
)
{
if self.check(TokenType::If)
&& self.current + 1 < self.tokens.len()
&& self.tokens[self.current + 1]
.text
.eq_ignore_ascii_case("EMPTY")
{
self.skip(); // consume IF
self.skip(); // consume EMPTY
}
}
// Parse table names (can be multiple)
let mut names = Vec::new();
loop {
names.push(self.parse_table_ref()?);
if !self.match_token(TokenType::Comma) {
break;
}
}
// Handle CASCADE [CONSTRAINTS] or RESTRICT
let mut cascade = false;
let mut cascade_constraints = false;
let mut restrict = false;
if self.match_token(TokenType::Cascade) {
if self.match_identifier("CONSTRAINTS") {
cascade_constraints = true;
} else {
cascade = true;
}
} else {
restrict = self.match_token(TokenType::Restrict);
}
// Handle PURGE (Oracle)
let purge = self.match_identifier("PURGE");
// ClickHouse: ON CLUSTER clause
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
let _ = self.parse_on_cluster_clause()?;
}
// ClickHouse: SYNC keyword
let sync = if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
let s = self.match_identifier("SYNC");
self.match_identifier("NO");
self.match_identifier("DELAY");
s
} else {
false
};
Ok(Expression::DropTable(Box::new(DropTable {
names,
if_exists,
cascade,
cascade_constraints,
purge,
leading_comments,
object_id_args: None,
sync,
iceberg,
restrict,
})))
}
/// Parse DROP VIEW
fn parse_drop_view(&mut self, materialized: bool) -> Result<Expression> {
self.expect(TokenType::View)?;
let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
let name = self.parse_table_ref()?;
// ClickHouse: ON CLUSTER clause
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
let _ = self.parse_on_cluster_clause()?;
self.match_identifier("SYNC");
}
Ok(Expression::DropView(Box::new(DropView {
name,
if_exists,
materialized,
})))
}
/// Parse DROP INDEX
fn parse_drop_index(&mut self) -> Result<Expression> {
self.expect(TokenType::Index)?;
// PostgreSQL CONCURRENTLY modifier
let concurrently = self.match_identifier("CONCURRENTLY");
let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
// Parse potentially qualified index name (a.b.c)
let mut name_parts = vec![self.expect_identifier()?];
while self.match_token(TokenType::Dot) {
name_parts.push(self.expect_identifier()?);
}
let name = Identifier::new(name_parts.join("."));
// Optional ON table
let table = if self.match_token(TokenType::On) {
Some(self.parse_table_ref()?)
} else {
None
};
Ok(Expression::DropIndex(Box::new(DropIndex {
name,
table,
if_exists,
concurrently,
})))
}
/// Parse ALTER statement
fn parse_alter(&mut self) -> Result<Expression> {
self.expect(TokenType::Alter)?;
// Check for ICEBERG modifier before TABLE
let alter_table_modifier = if self.check_identifier("ICEBERG") {
self.skip();
Some("ICEBERG".to_string())
} else {
None
};
match self.peek().token_type {
TokenType::Table => {
self.skip();
// Handle IF EXISTS after ALTER TABLE
let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
// Handle PostgreSQL ONLY modifier: ALTER TABLE ONLY "Album" ...
let has_only = self.match_token(TokenType::Only);
let mut name = self.parse_table_ref()?;
if has_only {
name.only = true;
}
// ClickHouse: ON CLUSTER clause
let on_cluster = self.parse_on_cluster_clause()?;
// Hive: PARTITION(key=value, ...) clause before actions
let partition = if self.match_token(TokenType::Partition) {
self.expect(TokenType::LParen)?;
let mut parts = Vec::new();
loop {
let key = self.expect_identifier()?;
self.expect(TokenType::Eq)?;
let value = self.parse_expression()?;
parts.push((Identifier::new(key), value));
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
Some(parts)
} else {
None
};
let mut actions = Vec::new();
let mut last_was_add_column = false;
let mut with_check_modifier: Option<String> = None;
loop {
// Check for MySQL trailing options (ALGORITHM=val, LOCK=val)
// before trying to parse as a column def or action.
// The comma before ALGORITHM was consumed at the bottom of the previous iteration.
if self.check_identifier("ALGORITHM") || self.check_identifier("LOCK") {
break;
}
// TSQL: WITH CHECK / WITH NOCHECK before ADD CONSTRAINT
if self.check(TokenType::With) {
let saved = self.current;
self.skip(); // consume WITH
if self.check(TokenType::Check) {
self.skip(); // consume CHECK
with_check_modifier = Some("WITH CHECK".to_string());
// Continue to parse the actual action (ADD CONSTRAINT, etc.)
} else if self.check_identifier("NOCHECK") {
self.skip(); // consume NOCHECK
with_check_modifier = Some("WITH NOCHECK".to_string());
// Continue to parse the actual action (ADD CONSTRAINT, etc.)
} else {
// Not WITH CHECK/NOCHECK, restore position
self.current = saved;
}
}
// If last action was ADD COLUMN and we just saw a comma,
// check if this is another column definition (not a new action keyword)
if last_was_add_column
&& !self.check(TokenType::Add)
&& !self.check(TokenType::Drop)
&& !self.check(TokenType::Alter)
&& !self.check(TokenType::Rename)
&& !self.check(TokenType::Set)
&& !self.check_identifier("MODIFY")
&& !self.check(TokenType::Delete)
&& !self.check(TokenType::Update)
&& !self.check_identifier("DETACH")
&& !self.check_identifier("ATTACH")
&& !self.check_identifier("FREEZE")
&& !self.check_identifier("CLEAR")
&& !self.check_identifier("MATERIALIZE")
&& !self.check(TokenType::Comment)
&& !self.check(TokenType::Replace)
&& !self.check_identifier("MOVE")
&& !self.check_identifier("REMOVE")
&& !self.check_identifier("APPLY")
{
// Parse additional column definition
self.match_token(TokenType::Column); // optional COLUMN keyword
let if_not_exists = self.match_keywords(&[
TokenType::If,
TokenType::Not,
TokenType::Exists,
]);
let col_def = self.parse_column_def()?;
let position = if self.match_token(TokenType::First) {
Some(ColumnPosition::First)
} else if self.match_token(TokenType::After) {
let after_col = self.expect_identifier()?;
// ClickHouse: AFTER n.a (dotted nested column name)
let after_name = if self.match_token(TokenType::Dot) {
let field = self.expect_identifier()?;
format!("{}.{}", after_col, field)
} else {
after_col
};
Some(ColumnPosition::After(Identifier::new(after_name)))
} else {
None
};
actions.push(AlterTableAction::AddColumn {
column: col_def,
if_not_exists,
position,
});
// last_was_add_column remains true
} else {
// Check for MySQL trailing options (ALGORITHM=val, LOCK=val)
// before trying to parse as an action
if self.check_identifier("ALGORITHM") || self.check_identifier("LOCK") {
// Retreat one to re-process the comma in the trailing options loop
self.current -= 1; // back up past the comma consumed in loop
break;
}
let action = self.parse_alter_action()?;
last_was_add_column = matches!(action, AlterTableAction::AddColumn { .. });
actions.push(action);
}
if !self.match_token(TokenType::Comma) {
break;
}
}
// Parse trailing MySQL ALTER TABLE options: ALGORITHM=val, LOCK=val
// These can appear after actions separated by commas (comma already consumed)
// or directly if no actions were parsed
let mut algorithm = None;
let mut lock = None;
loop {
// First check without consuming comma (comma may have been consumed by action loop)
if self.check_identifier("ALGORITHM") {
self.skip();
self.expect(TokenType::Eq)?;
algorithm = Some(self.expect_identifier_or_keyword()?.to_ascii_uppercase());
self.match_token(TokenType::Comma); // optional trailing comma
} else if self.check_identifier("LOCK") {
self.skip();
self.expect(TokenType::Eq)?;
lock = Some(self.expect_identifier_or_keyword()?.to_ascii_uppercase());
self.match_token(TokenType::Comma); // optional trailing comma
} else if self.match_token(TokenType::Comma) {
// Try after comma
if self.check_identifier("ALGORITHM") {
self.skip();
self.expect(TokenType::Eq)?;
algorithm =
Some(self.expect_identifier_or_keyword()?.to_ascii_uppercase());
} else if self.check_identifier("LOCK") {
self.skip();
self.expect(TokenType::Eq)?;
lock = Some(self.expect_identifier_or_keyword()?.to_ascii_uppercase());
} else {
self.current -= 1;
break;
}
} else {
break;
}
}
// ClickHouse: consume optional trailing SETTINGS clause
// e.g., ALTER TABLE t ADD COLUMN c Int64 SETTINGS mutations_sync=2, alter_sync=2
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::Settings)
{
self.skip(); // consume SETTINGS
let _ = self.parse_settings_property()?;
}
Ok(Expression::AlterTable(Box::new(AlterTable {
name,
actions,
if_exists,
algorithm,
lock,
with_check: with_check_modifier,
partition,
on_cluster,
table_modifier: alter_table_modifier,
})))
}
TokenType::View => self.parse_alter_view_with_modifiers(None, None, None),
TokenType::Index => self.parse_alter_index(),
TokenType::Sequence => self.parse_alter_sequence(),
_ if self.check_identifier("SESSION") => {
// ALTER SESSION SET/UNSET (Snowflake)
self.skip(); // consume SESSION
match self.parse_alter_session()? {
Some(expr) => Ok(expr),
None => {
// Fall back to command
Ok(Expression::Command(Box::new(Command {
this: "ALTER SESSION".to_string(),
})))
}
}
}
_ => {
// MySQL: ALTER ALGORITHM = val VIEW, ALTER DEFINER = val VIEW,
// ALTER SQL SECURITY = val VIEW
let mut view_algorithm = None;
let mut view_definer = None;
let mut view_sql_security = None;
loop {
if self.check_identifier("ALGORITHM") {
self.skip();
self.expect(TokenType::Eq)?;
view_algorithm =
Some(self.expect_identifier_or_keyword()?.to_ascii_uppercase());
} else if self.check_identifier("DEFINER") {
self.skip();
self.expect(TokenType::Eq)?;
// Parse user@host format: 'admin'@'localhost'
let mut definer_str = String::new();
if self.check(TokenType::String) {
definer_str.push_str(&format!("'{}'", self.advance().text));
} else {
definer_str.push_str(&self.expect_identifier_or_keyword()?);
}
// Check for @ separator
if !self.is_at_end() && self.peek().text == "@" {
definer_str.push_str(&self.advance().text);
if self.check(TokenType::String) {
definer_str.push_str(&format!("'{}'", self.advance().text));
} else if !self.is_at_end() {
definer_str.push_str(&self.advance().text);
}
}
view_definer = Some(definer_str);
} else if self.check_identifier("SQL") {
self.skip();
if self.match_identifier("SECURITY") {
self.match_token(TokenType::Eq);
view_sql_security =
Some(self.expect_identifier_or_keyword()?.to_ascii_uppercase());
}
} else {
break;
}
}
if self.check(TokenType::View) {
self.parse_alter_view_with_modifiers(
view_algorithm,
view_definer,
view_sql_security,
)
} else {
// Fall back to Raw for unrecognized ALTER targets
let start = self.current;
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
self.skip();
}
let sql = self.tokens_to_sql(start, self.current);
Ok(Expression::Raw(Raw {
sql: format!("ALTER {}", sql),
}))
}
}
}
}
/// Parse ALTER TABLE action
fn parse_alter_action(&mut self) -> Result<AlterTableAction> {
if self.match_token(TokenType::Add) {
// ClickHouse: ADD INDEX idx expr TYPE minmax GRANULARITY 1
// ClickHouse: ADD PROJECTION name (SELECT ...)
// ClickHouse: ADD STATISTICS col1, col2 TYPE tdigest, uniq
// These have different syntax from MySQL ADD INDEX, so consume as Raw
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && (self.check(TokenType::Index)
|| self.check_identifier("PROJECTION")
|| self.check_identifier("STATISTICS"))
{
let is_statistics = self.check_identifier("STATISTICS");
let mut tokens: Vec<(String, TokenType)> =
vec![("ADD".to_string(), TokenType::Add)];
let mut paren_depth = 0i32;
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
// STATISTICS uses commas internally (col1, col2 TYPE t1, t2), don't break at comma
if self.check(TokenType::Comma) && paren_depth == 0 && !is_statistics {
break;
}
let token = self.advance();
if token.token_type == TokenType::LParen {
paren_depth += 1;
}
if token.token_type == TokenType::RParen {
paren_depth -= 1;
}
let text = if token.token_type == TokenType::QuotedIdentifier {
format!("\"{}\"", token.text)
} else if token.token_type == TokenType::String {
format!("'{}'", token.text)
} else {
token.text.clone()
};
tokens.push((text, token.token_type));
}
return Ok(AlterTableAction::Raw {
sql: self.join_command_tokens(tokens),
});
}
// ADD SEARCH OPTIMIZATION [ON method(columns), ...] — Snowflake
if self.check_identifier("SEARCH")
&& self
.peek_nth(1)
.map(|t| t.text.eq_ignore_ascii_case("OPTIMIZATION"))
== Some(true)
{
let mut tokens: Vec<(String, TokenType)> =
vec![("ADD".to_string(), TokenType::Add)];
let mut paren_depth = 0i32;
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
if self.check(TokenType::Comma) && paren_depth == 0 {
break;
}
let token = self.advance();
if token.token_type == TokenType::LParen {
paren_depth += 1;
}
if token.token_type == TokenType::RParen {
paren_depth -= 1;
}
tokens.push((token.text.clone(), token.token_type));
}
return Ok(AlterTableAction::Raw {
sql: self.join_command_tokens(tokens),
});
}
// ADD ROW ACCESS POLICY name ON (columns) — Snowflake
if self.check(TokenType::Row)
&& self
.peek_nth(1)
.map(|t| t.text.eq_ignore_ascii_case("ACCESS"))
== Some(true)
{
let mut tokens: Vec<(String, TokenType)> =
vec![("ADD".to_string(), TokenType::Add)];
let mut paren_depth = 0i32;
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
if self.check(TokenType::Comma) && paren_depth == 0 {
break;
}
let token = self.advance();
if token.token_type == TokenType::LParen {
paren_depth += 1;
}
if token.token_type == TokenType::RParen {
paren_depth -= 1;
}
tokens.push((token.text.clone(), token.token_type));
}
return Ok(AlterTableAction::Raw {
sql: self.join_command_tokens(tokens),
});
}
// ADD CONSTRAINT or ADD COLUMN or ADD INDEX
if self.match_token(TokenType::Constraint) {
// ADD CONSTRAINT name ...
let name = Some(self.expect_identifier_with_quoted()?);
let constraint = self.parse_constraint_definition(name)?;
Ok(AlterTableAction::AddConstraint(constraint))
} else if self.check(TokenType::PrimaryKey)
|| self.check(TokenType::ForeignKey)
|| self.check(TokenType::Check)
{
// ADD PRIMARY KEY / FOREIGN KEY / CHECK (without CONSTRAINT keyword)
let constraint = self.parse_table_constraint()?;
Ok(AlterTableAction::AddConstraint(constraint))
} else if self.check(TokenType::Index)
|| self.check(TokenType::Key)
|| self.check(TokenType::Unique)
|| self.check_identifier("FULLTEXT")
|| self.check_identifier("SPATIAL")
{
// ADD [UNIQUE|FULLTEXT|SPATIAL] [{INDEX|KEY}] [name] (columns) [USING {BTREE|HASH}]
let kind = if self.match_token(TokenType::Unique) {
Some("UNIQUE".to_string())
} else if self.match_identifier("FULLTEXT") {
Some("FULLTEXT".to_string())
} else if self.match_identifier("SPATIAL") {
Some("SPATIAL".to_string())
} else {
None
};
// Consume optional INDEX or KEY keyword, track which was used
let use_key_keyword = if self.match_token(TokenType::Key) {
true
} else {
self.match_token(TokenType::Index);
false
};
// Optional index name (before the columns)
let name = if !self.check(TokenType::LParen) && !self.check(TokenType::Using) {
Some(self.expect_identifier_with_quoted()?)
} else {
None
};
// Parse columns (with optional prefix length and DESC)
self.expect(TokenType::LParen)?;
let columns = self.parse_index_identifier_list()?;
self.expect(TokenType::RParen)?;
// Parse optional USING BTREE|HASH
let modifiers = self.parse_constraint_modifiers();
Ok(AlterTableAction::AddConstraint(TableConstraint::Index {
name,
columns,
kind,
modifiers,
use_key_keyword,
expression: None,
index_type: None,
granularity: None,
}))
} else if self.match_identifier("COLUMNS") {
// ADD COLUMNS (col1 TYPE, col2 TYPE, ...) [CASCADE] - Hive/Spark syntax
self.expect(TokenType::LParen)?;
let mut columns = Vec::new();
loop {
let col_def = self.parse_column_def()?;
columns.push(col_def);
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
let cascade = self.match_token(TokenType::Cascade);
Ok(AlterTableAction::AddColumns { columns, cascade })
} else if self.match_keywords(&[TokenType::If, TokenType::Not, TokenType::Exists]) {
// ADD IF NOT EXISTS PARTITION(key = value) - Hive/Spark syntax
// ADD IF NOT EXISTS col1 INT, col2 INT - Snowflake syntax
if self.match_token(TokenType::Partition) {
self.expect(TokenType::LParen)?;
let mut partition_exprs = Vec::new();
loop {
if let Some(expr) = self.parse_conjunction()? {
partition_exprs.push(expr);
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
let partition =
Expression::Partition(Box::new(crate::expressions::Partition {
expressions: partition_exprs,
subpartition: false,
}));
let location = if self.match_text_seq(&["LOCATION"]) {
self.parse_property()?
} else {
None
};
return Ok(AlterTableAction::AddPartition {
partition,
if_not_exists: true,
location,
});
} else {
// Snowflake: ADD IF NOT EXISTS col1 INT, [IF NOT EXISTS] col2 INT
// Parse just the first column; the caller's comma loop handles the rest
let col_def = self.parse_column_def()?;
return Ok(AlterTableAction::AddColumn {
column: col_def,
if_not_exists: true,
position: None,
});
}
} else if self.check(TokenType::Partition) {
// ADD PARTITION(key = value) - Hive/Spark syntax
self.skip(); // consume PARTITION
self.expect(TokenType::LParen)?;
let mut partition_exprs = Vec::new();
loop {
if let Some(expr) = self.parse_conjunction()? {
partition_exprs.push(expr);
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
let partition = Expression::Partition(Box::new(crate::expressions::Partition {
expressions: partition_exprs,
subpartition: false,
}));
let location = if self.match_text_seq(&["LOCATION"]) {
// Parse the LOCATION value (typically a string literal like 'path')
Some(self.parse_primary()?)
} else {
None
};
Ok(AlterTableAction::AddPartition {
partition,
if_not_exists: false,
location,
})
} else {
// ADD COLUMN or ADD (col1 TYPE, col2 TYPE) for Oracle
let has_column_keyword = self.match_token(TokenType::Column); // optional COLUMN keyword
// Check for Oracle-style ADD (col1 TYPE, col2 TYPE, ...) without COLUMN keyword
if !has_column_keyword && self.check(TokenType::LParen) {
// Oracle multi-column ADD syntax: ADD (col1 TYPE, col2 TYPE, ...)
self.skip(); // consume '('
let mut columns = Vec::new();
loop {
let col_def = self.parse_column_def()?;
columns.push(col_def);
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
// Use AddColumns with cascade=false for Oracle syntax
Ok(AlterTableAction::AddColumns {
columns,
cascade: false,
})
} else {
// Handle IF NOT EXISTS for ADD COLUMN
let if_not_exists =
self.match_keywords(&[TokenType::If, TokenType::Not, TokenType::Exists]);
let col_def = self.parse_column_def()?;
// Check for FIRST or AFTER position modifiers (MySQL/MariaDB)
let position = if self.match_token(TokenType::First) {
Some(ColumnPosition::First)
} else if self.match_token(TokenType::After) {
let after_col = self.expect_identifier()?;
// ClickHouse: AFTER n.a (dotted nested column name)
let after_name = if self.match_token(TokenType::Dot) {
let field = self.expect_identifier()?;
format!("{}.{}", after_col, field)
} else {
after_col
};
Some(ColumnPosition::After(Identifier::new(after_name)))
} else {
None
};
Ok(AlterTableAction::AddColumn {
column: col_def,
if_not_exists,
position,
})
}
}
} else if self.match_token(TokenType::Drop) {
// ClickHouse: DROP INDEX idx, DROP PROJECTION name, DROP STATISTICS, etc.
// These have different syntax from MySQL, so consume as Raw
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && (self.check(TokenType::Index)
|| self.check_identifier("PROJECTION")
|| self.check_identifier("STATISTICS")
|| self.check_identifier("DETACHED")
|| self.check_identifier("PART"))
{
let is_statistics = self.check_identifier("STATISTICS");
let mut tokens: Vec<(String, TokenType)> =
vec![("DROP".to_string(), TokenType::Drop)];
let mut paren_depth = 0i32;
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
if self.check(TokenType::Comma) && paren_depth == 0 && !is_statistics {
break;
}
let token = self.advance();
if token.token_type == TokenType::LParen {
paren_depth += 1;
}
if token.token_type == TokenType::RParen {
paren_depth -= 1;
}
let text = if token.token_type == TokenType::QuotedIdentifier {
format!("\"{}\"", token.text)
} else if token.token_type == TokenType::String {
format!("'{}'", token.text)
} else {
token.text.clone()
};
tokens.push((text, token.token_type));
}
return Ok(AlterTableAction::Raw {
sql: self.join_command_tokens(tokens),
});
}
// DROP CLUSTERING KEY — Snowflake
if self.check_identifier("CLUSTERING")
&& self.peek_nth(1).map(|t| t.text.eq_ignore_ascii_case("KEY")) == Some(true)
{
self.skip(); // consume CLUSTERING
self.skip(); // consume KEY
return Ok(AlterTableAction::Raw {
sql: "DROP CLUSTERING KEY".to_string(),
});
}
// DROP SEARCH OPTIMIZATION [ON method(columns), ...] — Snowflake
if self.check_identifier("SEARCH")
&& self
.peek_nth(1)
.map(|t| t.text.eq_ignore_ascii_case("OPTIMIZATION"))
== Some(true)
{
let mut tokens: Vec<(String, TokenType)> =
vec![("DROP".to_string(), TokenType::Drop)];
let mut paren_depth = 0i32;
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
if self.check(TokenType::Comma) && paren_depth == 0 {
break;
}
let token = self.advance();
if token.token_type == TokenType::LParen {
paren_depth += 1;
}
if token.token_type == TokenType::RParen {
paren_depth -= 1;
}
tokens.push((token.text.clone(), token.token_type));
}
return Ok(AlterTableAction::Raw {
sql: self.join_command_tokens(tokens),
});
}
// DROP [ALL] ROW ACCESS POLICY/POLICIES — Snowflake
if (self.check(TokenType::Row)
&& self
.peek_nth(1)
.map(|t| t.text.eq_ignore_ascii_case("ACCESS"))
== Some(true))
|| (self.check_identifier("ALL")
&& self.peek_nth(1).map(|t| t.text.eq_ignore_ascii_case("ROW")) == Some(true))
{
let mut tokens: Vec<(String, TokenType)> =
vec![("DROP".to_string(), TokenType::Drop)];
let mut paren_depth = 0i32;
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
if self.check(TokenType::Comma) && paren_depth == 0 {
break;
}
let token = self.advance();
if token.token_type == TokenType::LParen {
paren_depth += 1;
}
if token.token_type == TokenType::RParen {
paren_depth -= 1;
}
tokens.push((token.text.clone(), token.token_type));
}
return Ok(AlterTableAction::Raw {
sql: self.join_command_tokens(tokens),
});
}
// Handle IF EXISTS before determining what to drop
let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
if self.match_token(TokenType::Partition) {
// DROP [IF EXISTS] PARTITION expr [, PARTITION expr ...]
// ClickHouse supports: PARTITION 201901, PARTITION ALL,
// PARTITION tuple(...), PARTITION ID '...'
let mut partitions = Vec::new();
loop {
if self.check(TokenType::LParen) {
// ClickHouse: PARTITION (expr) or PARTITION (expr, expr, ...)
// Standard SQL: PARTITION (key=value, ...)
// Peek ahead: if LParen is followed by String/Number (not identifier=),
// parse as expression
let is_ch_expr = matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.current + 1 < self.tokens.len()
&& (self.tokens[self.current + 1].token_type == TokenType::String
|| self.tokens[self.current + 1].token_type == TokenType::Number
|| self.tokens[self.current + 1].token_type == TokenType::LParen
|| (self.current + 2 < self.tokens.len()
&& self.tokens[self.current + 2].token_type != TokenType::Eq));
if is_ch_expr {
// Parse as tuple expression
let expr = self.parse_expression()?;
partitions.push(vec![(Identifier::new("__expr__".to_string()), expr)]);
} else {
self.skip(); // consume (
let mut parts = Vec::new();
loop {
let key = self.expect_identifier()?;
self.expect(TokenType::Eq)?;
let value = self.parse_expression()?;
parts.push((Identifier::new(key), value));
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
partitions.push(parts);
}
} else if self.match_text_seq(&["ALL"]) {
// ClickHouse: PARTITION ALL
partitions.push(vec![(
Identifier::new("ALL".to_string()),
Expression::Boolean(BooleanLiteral { value: true }),
)]);
} else if self.match_text_seq(&["ID"]) {
// ClickHouse: PARTITION ID 'string'
let id_val = self.parse_expression()?;
partitions.push(vec![(Identifier::new("ID".to_string()), id_val)]);
} else {
// ClickHouse: PARTITION <expression> (number, tuple(...), etc.)
let expr = self.parse_expression()?;
partitions.push(vec![(Identifier::new("__expr__".to_string()), expr)]);
}
// Check for ", PARTITION" for multiple partitions
if self.match_token(TokenType::Comma) {
if !self.match_token(TokenType::Partition) {
break;
}
} else {
break;
}
}
Ok(AlterTableAction::DropPartition {
partitions,
if_exists,
})
} else if self.match_token(TokenType::Column) {
// DROP [IF EXISTS] COLUMN [IF EXISTS] name [CASCADE]
// Check for IF EXISTS after COLUMN as well
let if_exists =
if_exists || self.match_keywords(&[TokenType::If, TokenType::Exists]);
let mut name = self.expect_identifier_with_quoted()?;
// ClickHouse: nested column names like n.ui8
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.match_token(TokenType::Dot)
{
let sub = self.expect_identifier_with_quoted()?;
name.name = format!("{}.{}", name.name, sub.name);
}
let cascade = self.match_token(TokenType::Cascade);
Ok(AlterTableAction::DropColumn {
name,
if_exists,
cascade,
})
} else if self.match_token(TokenType::Constraint) {
// DROP [IF EXISTS] CONSTRAINT name
let name = self.expect_identifier_with_quoted()?;
Ok(AlterTableAction::DropConstraint { name, if_exists })
} else if self.match_keywords(&[TokenType::ForeignKey, TokenType::Key]) {
// DROP FOREIGN KEY name (Oracle/MySQL)
let name = self.expect_identifier_with_quoted()?;
Ok(AlterTableAction::DropForeignKey { name })
} else if self.check_identifier("COLUMNS") && self.check_next(TokenType::LParen) {
// DROP COLUMNS (col1, col2, ...) - Spark/Databricks syntax
self.skip(); // consume COLUMNS
self.expect(TokenType::LParen)?;
let mut names = Vec::new();
loop {
let name = self.expect_identifier_with_quoted()?;
names.push(name);
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
Ok(AlterTableAction::DropColumns { names })
} else {
// DROP [IF EXISTS] name (implicit column) [CASCADE]
let mut name = self.expect_identifier_with_quoted()?;
// ClickHouse: nested column names like n.ui8
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.match_token(TokenType::Dot)
{
let sub = self.expect_identifier_with_quoted()?;
name.name = format!("{}.{}", name.name, sub.name);
}
let cascade = self.match_token(TokenType::Cascade);
Ok(AlterTableAction::DropColumn {
name,
if_exists,
cascade,
})
}
} else if self.match_token(TokenType::Rename) {
if self.match_token(TokenType::Index) || self.match_token(TokenType::Key) {
let old_name = self.expect_identifier_or_keyword_with_quoted()?;
self.expect(TokenType::To)?;
let new_name = self.expect_identifier_or_keyword_with_quoted()?;
Ok(AlterTableAction::Raw {
sql: format!("RENAME INDEX {} TO {}", old_name.name, new_name.name),
})
} else if self.match_token(TokenType::Column) {
// RENAME COLUMN [IF EXISTS] old TO new
let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
let mut old_name = self.expect_identifier_or_safe_keyword_with_quoted()?;
// ClickHouse: nested column names like n.x
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.match_token(TokenType::Dot)
{
let field = self.expect_identifier_with_quoted()?;
old_name = Identifier {
name: format!("{}.{}", old_name.name, field.name),
quoted: false,
trailing_comments: Vec::new(),
span: None,
};
}
self.expect(TokenType::To)?;
let mut new_name = self.expect_identifier_or_safe_keyword_with_quoted()?;
// ClickHouse: nested column names like n.y
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.match_token(TokenType::Dot)
{
let field = self.expect_identifier_or_safe_keyword_with_quoted()?;
new_name = Identifier {
name: format!("{}.{}", new_name.name, field.name),
quoted: false,
trailing_comments: Vec::new(),
span: None,
};
}
Ok(AlterTableAction::RenameColumn {
old_name,
new_name,
if_exists,
})
} else if self.match_token(TokenType::To) {
// RENAME TO new_table
let new_name = self.parse_table_ref()?;
Ok(AlterTableAction::RenameTable(new_name))
} else if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
// StarRocks/Doris: RENAME new_name (without TO)
// SQLite: RENAME old_name TO new_name (without COLUMN keyword)
let first_name = self.expect_identifier_with_quoted()?;
if self.match_token(TokenType::To) {
let new_name = self.expect_identifier_with_quoted()?;
Ok(AlterTableAction::RenameColumn {
old_name: first_name,
new_name,
if_exists: false,
})
} else {
// No TO keyword: treat as RENAME TABLE (StarRocks/Doris)
Ok(AlterTableAction::RenameTable(TableRef::new(
first_name.name,
)))
}
} else {
Err(self.parse_error("Expected COLUMN or TO after RENAME"))
}
} else if self.match_token(TokenType::Alter) {
// Check for ALTER INDEX (MySQL: ALTER TABLE t ALTER INDEX i VISIBLE/INVISIBLE)
if self.match_token(TokenType::Index) {
let name = self.expect_identifier_with_quoted()?;
let visible = if self.match_identifier("VISIBLE") {
true
} else if self.match_identifier("INVISIBLE") {
false
} else {
return Err(
self.parse_error("Expected VISIBLE or INVISIBLE after ALTER INDEX name")
);
};
Ok(AlterTableAction::AlterIndex { name, visible })
} else if self.check_identifier("SORTKEY") {
// Redshift: ALTER TABLE t ALTER SORTKEY AUTO|NONE|(col1, col2)
self.skip(); // consume SORTKEY
if self.match_texts(&["AUTO", "NONE"]) {
let style = self.previous().text.to_ascii_uppercase();
Ok(AlterTableAction::AlterSortKey {
this: Some(style),
expressions: Vec::new(),
compound: false,
})
} else if self.check(TokenType::LParen) {
// (col1, col2) syntax
let wrapped = self.parse_wrapped_id_vars()?;
let expressions = if let Some(Expression::Tuple(t)) = wrapped {
t.expressions
} else {
Vec::new()
};
Ok(AlterTableAction::AlterSortKey {
this: None,
expressions,
compound: false,
})
} else {
Err(self.parse_error("Expected AUTO, NONE, or (columns) after SORTKEY"))
}
} else if self.check_identifier("COMPOUND") {
// Redshift: ALTER TABLE t ALTER COMPOUND SORTKEY (col1, col2)
self.skip(); // consume COMPOUND
if !self.match_identifier("SORTKEY") {
return Err(self.parse_error("Expected SORTKEY after COMPOUND"));
}
if self.check(TokenType::LParen) {
let wrapped = self.parse_wrapped_id_vars()?;
let expressions = if let Some(Expression::Tuple(t)) = wrapped {
t.expressions
} else {
Vec::new()
};
Ok(AlterTableAction::AlterSortKey {
this: None,
expressions,
compound: true,
})
} else {
Err(self.parse_error("Expected (columns) after COMPOUND SORTKEY"))
}
} else if self.check_identifier("DISTSTYLE") {
// Redshift: ALTER TABLE t ALTER DISTSTYLE ALL|EVEN|AUTO|KEY [DISTKEY col]
self.skip(); // consume DISTSTYLE
if self.match_texts(&["ALL", "EVEN", "AUTO"]) {
let style = self.previous().text.to_ascii_uppercase();
Ok(AlterTableAction::AlterDistStyle {
style,
distkey: None,
})
} else if self.match_token(TokenType::Key) || self.match_identifier("KEY") {
// DISTSTYLE KEY DISTKEY col
if !self.match_identifier("DISTKEY") {
return Err(self.parse_error("Expected DISTKEY after DISTSTYLE KEY"));
}
let col = self.expect_identifier_with_quoted()?;
Ok(AlterTableAction::AlterDistStyle {
style: "KEY".to_string(),
distkey: Some(col),
})
} else {
Err(self.parse_error("Expected ALL, EVEN, AUTO, or KEY after DISTSTYLE"))
}
} else if self.check_identifier("DISTKEY") {
// Redshift: ALTER TABLE t ALTER DISTKEY col (shorthand for DISTSTYLE KEY DISTKEY col)
self.skip(); // consume DISTKEY
let col = self.expect_identifier_with_quoted()?;
Ok(AlterTableAction::AlterDistStyle {
style: "KEY".to_string(),
distkey: Some(col),
})
} else {
// ALTER COLUMN
self.match_token(TokenType::Column); // optional COLUMN keyword
let name = self.expect_identifier_with_quoted()?;
let action = self.parse_alter_column_action()?;
Ok(AlterTableAction::AlterColumn {
name,
action,
use_modify_keyword: false,
})
}
} else if self.match_identifier("MODIFY") {
// ClickHouse: MODIFY ORDER BY, MODIFY SETTING, MODIFY TTL, MODIFY QUERY,
// MODIFY COLUMN name type [DEFAULT|MATERIALIZED|ALIAS] [CODEC] [TTL] [COMMENT], etc.
// These are ClickHouse-specific and have richer syntax than MySQL MODIFY COLUMN.
// Consume all ClickHouse MODIFY actions as Raw.
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
// MODIFY SETTING uses commas between settings (not action separators)
let is_setting =
self.check(TokenType::Settings) || self.check_identifier("SETTING");
let mut tokens: Vec<(String, TokenType)> =
vec![("MODIFY".to_string(), TokenType::Var)];
let mut paren_depth = 0i32;
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
if self.check(TokenType::Comma) && paren_depth == 0 && !is_setting {
break;
}
let token = self.advance();
if token.token_type == TokenType::LParen {
paren_depth += 1;
}
if token.token_type == TokenType::RParen {
paren_depth -= 1;
}
let text = if token.token_type == TokenType::QuotedIdentifier {
format!("\"{}\"", token.text)
} else if token.token_type == TokenType::String {
format!("'{}'", token.text)
} else {
token.text.clone()
};
tokens.push((text, token.token_type));
}
return Ok(AlterTableAction::Raw {
sql: self.join_command_tokens(tokens),
});
}
// MODIFY COLUMN (MySQL/Snowflake syntax — routes through same action parser as ALTER COLUMN)
self.match_token(TokenType::Column); // optional COLUMN keyword
let name = Identifier::new(self.expect_identifier()?);
let action = self.parse_alter_column_action()?;
Ok(AlterTableAction::AlterColumn {
name,
action,
use_modify_keyword: true,
})
} else if self.match_identifier("CHANGE") {
// CHANGE [COLUMN] old_name new_name [data_type] [COMMENT 'comment'] - Hive/MySQL/SingleStore syntax
// In SingleStore, data_type can be omitted for simple renames
self.match_token(TokenType::Column); // optional COLUMN keyword
let old_name = Identifier::new(self.expect_identifier()?);
let new_name = Identifier::new(self.expect_identifier()?);
// Try to parse data type - it's optional in SingleStore
let data_type = if !self.is_at_end()
&& !self.check(TokenType::Comment)
&& !self.check(TokenType::Comma)
&& !self.check(TokenType::Semicolon)
{
// Check if next token could start a data type
let tok = self.peek();
if tok.token_type.is_keyword()
|| tok.token_type == TokenType::Identifier
|| tok.token_type == TokenType::Var
{
Some(self.parse_data_type()?)
} else {
None
}
} else {
None
};
let comment = if self.match_token(TokenType::Comment) {
Some(self.expect_string()?)
} else {
None
};
let cascade = self.match_text_seq(&["CASCADE"]);
// Also check for RESTRICT (the opposite, just consume it)
if !cascade {
self.match_text_seq(&["RESTRICT"]);
}
Ok(AlterTableAction::ChangeColumn {
old_name,
new_name,
data_type,
comment,
cascade,
})
} else if self.match_token(TokenType::Constraint) {
// CONSTRAINT name ... (implicit ADD, CONSTRAINT already consumed)
// Parse the constraint name and then the constraint definition
let name = Some(self.expect_identifier_with_quoted()?);
let constraint = self.parse_constraint_definition(name)?;
Ok(AlterTableAction::AddConstraint(constraint))
} else if self.check(TokenType::PrimaryKey)
|| self.check(TokenType::ForeignKey)
|| self.check(TokenType::Unique)
{
// ADD CONSTRAINT (implicit ADD, no CONSTRAINT keyword)
let constraint = self.parse_table_constraint()?;
Ok(AlterTableAction::AddConstraint(constraint))
} else if self.match_token(TokenType::Delete) {
// ALTER TABLE t DELETE WHERE x = 1 (BigQuery syntax)
self.expect(TokenType::Where)?;
let where_clause = self.parse_expression()?;
Ok(AlterTableAction::Delete { where_clause })
} else if self.match_keyword("SWAP") {
// Snowflake: ALTER TABLE a SWAP WITH b
self.expect(TokenType::With)?;
let target = self.parse_table_ref()?;
Ok(AlterTableAction::SwapWith(target))
} else if self.match_token(TokenType::Set) {
// TSQL: ALTER TABLE t SET (SYSTEM_VERSIONING=ON, DATA_DELETION=ON, ...)
if self.check(TokenType::LParen) {
self.skip(); // consume (
let mut expressions = Vec::new();
loop {
if self.check(TokenType::RParen) {
break;
}
if self.check_identifier("SYSTEM_VERSIONING") {
let expr = self.parse_system_versioning_option()?;
expressions.push(expr);
} else if self.check_identifier("DATA_DELETION") {
let expr = self.parse_data_deletion_option()?;
expressions.push(expr);
} else {
// Generic key=value (e.g., FILESTREAM_ON = 'test')
let expr = self.parse_expression()?;
expressions.push(expr);
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
Ok(AlterTableAction::SetOptions { expressions })
} else if self.match_keyword("TAG") {
// Snowflake: SET TAG key='value', ... (key can be qualified like schema.tagname)
let mut tags = Vec::new();
loop {
// Parse qualified tag name (e.g., foo.bar or just bar)
let mut key = self.expect_identifier_or_keyword()?;
while self.match_token(TokenType::Dot) {
let next = self.expect_identifier_or_keyword()?;
key = format!("{}.{}", key, next);
}
self.expect(TokenType::Eq)?;
let value = self.parse_primary()?;
tags.push((key, value));
if !self.match_token(TokenType::Comma) {
break;
}
}
Ok(AlterTableAction::SetTag { expressions: tags })
} else if self.check_identifier("LOGGED") {
// PostgreSQL: ALTER TABLE t SET LOGGED
self.skip();
Ok(AlterTableAction::SetAttribute {
attribute: "LOGGED".to_string(),
})
} else if self.check_identifier("UNLOGGED") {
// PostgreSQL: ALTER TABLE t SET UNLOGGED
self.skip();
Ok(AlterTableAction::SetAttribute {
attribute: "UNLOGGED".to_string(),
})
} else if self.match_identifier("WITHOUT") {
// PostgreSQL: ALTER TABLE t SET WITHOUT CLUSTER/OIDS
let what = self.expect_identifier_or_keyword()?;
Ok(AlterTableAction::SetAttribute {
attribute: format!("WITHOUT {}", what),
})
} else if self.check_identifier("ACCESS") {
// PostgreSQL: ALTER TABLE t SET ACCESS METHOD method
self.skip();
// Consume "METHOD"
if !self.match_identifier("METHOD") {
return Err(self.parse_error("Expected METHOD after ACCESS"));
}
let method = self.expect_identifier_or_keyword()?;
Ok(AlterTableAction::SetAttribute {
attribute: format!("ACCESS METHOD {}", method),
})
} else if self.check_identifier("TABLESPACE") {
// PostgreSQL: ALTER TABLE t SET TABLESPACE tablespace
self.skip();
let name = self.expect_identifier_or_keyword()?;
Ok(AlterTableAction::SetAttribute {
attribute: format!("TABLESPACE {}", name),
})
} else if self.check_identifier("STAGE_FILE_FORMAT") {
// Snowflake: ALTER TABLE t SET STAGE_FILE_FORMAT = (options)
self.skip();
let options = self.parse_wrapped_options()?;
Ok(AlterTableAction::SetStageFileFormat { options })
} else if self.check_identifier("STAGE_COPY_OPTIONS") {
// Snowflake: ALTER TABLE t SET STAGE_COPY_OPTIONS = (options)
self.skip();
let options = self.parse_wrapped_options()?;
Ok(AlterTableAction::SetStageCopyOptions { options })
} else if self.match_token(TokenType::Authorization) {
// Trino: ALTER TABLE t SET AUTHORIZATION [ROLE] user
let mut auth_text = String::new();
if self.match_texts(&["ROLE"]) {
auth_text.push_str("ROLE ");
}
let user = self.expect_identifier_or_keyword()?;
auth_text.push_str(&user);
Ok(AlterTableAction::SetAttribute {
attribute: format!("AUTHORIZATION {}", auth_text),
})
} else if self.match_identifier("PROPERTIES") {
// Trino: ALTER TABLE t SET PROPERTIES x = 'y', ...
let mut properties = Vec::new();
loop {
// Parse property name (could be identifier or string literal)
let key = if self.check(TokenType::String) {
self.expect_string()?
} else {
self.expect_identifier_or_keyword()?
};
self.expect(TokenType::Eq)?;
// Parse value (could be DEFAULT or an expression)
let value = if self.match_token(TokenType::Default) {
// Use Var instead of Identifier so it won't be quoted
Expression::Var(Box::new(crate::expressions::Var {
this: "DEFAULT".to_string(),
}))
} else {
self.parse_expression()?
};
properties.push((key, value));
if !self.match_token(TokenType::Comma) {
break;
}
}
Ok(AlterTableAction::SetProperty { properties })
} else if self.match_text_seq(&["TABLE", "PROPERTIES"]) {
// Redshift: ALTER TABLE t SET TABLE PROPERTIES ('a' = '5', 'b' = 'c')
self.expect(TokenType::LParen)?;
let mut properties = Vec::new();
loop {
if self.check(TokenType::RParen) {
break;
}
// Parse key (string literal)
let key = self.parse_primary()?;
self.expect(TokenType::Eq)?;
// Parse value (string literal)
let value = self.parse_primary()?;
properties.push((key, value));
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
Ok(AlterTableAction::SetTableProperties { properties })
} else if self.match_text_seq(&["LOCATION"]) {
// Redshift: ALTER TABLE t SET LOCATION 's3://bucket/folder/'
let location = self.expect_string()?;
Ok(AlterTableAction::SetLocation { location })
} else if self.match_text_seq(&["FILE", "FORMAT"]) {
// Redshift: ALTER TABLE t SET FILE FORMAT AVRO
let format = self.expect_identifier_or_keyword()?;
Ok(AlterTableAction::SetFileFormat { format })
} else if self.peek_nth(1).map(|t| t.token_type) != Some(TokenType::Eq) {
// SET <multi-word clause> (e.g., SET PROJECTION POLICY name) — consume as Raw
let mut tokens: Vec<(String, TokenType)> =
vec![("SET".to_string(), TokenType::Set)];
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
if self.check(TokenType::Comma) {
break;
}
let token = self.advance();
let text = if token.token_type == TokenType::QuotedIdentifier {
format!("\"{}\"", token.text)
} else if token.token_type == TokenType::String {
format!("'{}'", token.text)
} else {
token.text.clone()
};
tokens.push((text, token.token_type));
}
Ok(AlterTableAction::Raw {
sql: self.join_command_tokens(tokens),
})
} else {
// Snowflake: SET property=value, ...
let mut properties = Vec::new();
loop {
let key = self.expect_identifier_or_keyword()?;
self.expect(TokenType::Eq)?;
let value = self.parse_expression()?;
properties.push((key, value));
if !self.match_token(TokenType::Comma) {
break;
}
}
Ok(AlterTableAction::SetProperty { properties })
}
} else if self.match_keyword("UNSET") {
// Snowflake: ALTER TABLE t UNSET property or UNSET TAG key
if self.match_keyword("TAG") {
// UNSET TAG key1, key2 (keys can be qualified like schema.tagname)
let mut names = Vec::new();
loop {
let mut name = self.expect_identifier_or_keyword()?;
while self.match_token(TokenType::Dot) {
let next = self.expect_identifier_or_keyword()?;
name = format!("{}.{}", name, next);
}
names.push(name);
if !self.match_token(TokenType::Comma) {
break;
}
}
Ok(AlterTableAction::UnsetTag { names })
} else if self.peek_nth(1).map(|t| {
t.token_type != TokenType::Comma
&& t.token_type != TokenType::Semicolon
&& t.token_type != TokenType::Eof
}) == Some(true)
&& !self.is_at_end()
&& self.peek_nth(1).map(|t| t.token_type != TokenType::Eq) == Some(true)
{
// UNSET <multi-word clause> (e.g., UNSET PROJECTION POLICY) — consume as Raw
let mut tokens: Vec<(String, TokenType)> =
vec![("UNSET".to_string(), TokenType::Var)];
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
if self.check(TokenType::Comma) {
break;
}
tokens.push((self.advance().text.clone(), TokenType::Var));
}
Ok(AlterTableAction::Raw {
sql: self.join_command_tokens(tokens),
})
} else {
// UNSET property1, property2
let mut properties = Vec::new();
loop {
let name = self.expect_identifier_or_keyword()?;
properties.push(name);
if !self.match_token(TokenType::Comma) {
break;
}
}
Ok(AlterTableAction::UnsetProperty { properties })
}
} else if self.match_keyword("CLUSTER") {
// Snowflake: ALTER TABLE t CLUSTER BY (col1, col2 DESC)
self.expect(TokenType::By)?;
self.expect(TokenType::LParen)?;
// Parse ordered expressions (can have ASC/DESC modifiers)
let ordered = self.parse_order_by_list()?;
// Convert Ordered to Expression (wrapping in Ordered if it has ordering)
let expressions: Vec<Expression> = ordered
.into_iter()
.map(|o| Expression::Ordered(Box::new(o)))
.collect();
self.expect(TokenType::RParen)?;
Ok(AlterTableAction::ClusterBy { expressions })
} else if self.match_token(TokenType::AutoIncrement) {
self.expect(TokenType::Eq)?;
let start = self.current;
self.parse_expression()?;
Ok(AlterTableAction::Raw {
sql: format!("AUTO_INCREMENT={}", self.tokens_to_sql(start, self.current)),
})
} else if self.match_token(TokenType::Replace) {
// ClickHouse: REPLACE PARTITION expr FROM table
if self.match_token(TokenType::Partition) {
let partition_expr = if self.match_text_seq(&["ALL"]) {
Expression::Identifier(Identifier::new("ALL".to_string()))
} else if self.match_text_seq(&["ID"]) {
let id_val = self.parse_expression()?;
// Store as Raw to preserve "ID <value>" format
let id_str = match &id_val {
Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
let Literal::String(s) = lit.as_ref() else {
unreachable!()
};
format!("ID '{}'", s)
}
_ => format!("ID {}", "?"),
};
Expression::Raw(Raw { sql: id_str })
} else {
self.parse_expression()?
};
let source = if self.match_token(TokenType::From) {
let tref = self.parse_table_ref()?;
Some(Box::new(Expression::Table(Box::new(tref))))
} else {
None
};
Ok(AlterTableAction::ReplacePartition {
partition: partition_expr,
source,
})
} else {
Err(self.parse_error("Expected PARTITION after REPLACE in ALTER TABLE"))
}
} else if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
// ClickHouse-specific ALTER TABLE mutations: UPDATE, DELETE, DETACH, ATTACH,
// FREEZE, UNFREEZE, MATERIALIZE, CLEAR, COMMENT COLUMN, MODIFY ORDER BY,
// MOVE PARTITION, FETCH PARTITION, ADD INDEX, DROP INDEX, CLEAR INDEX
// For ClickHouse, consume any unrecognized ALTER TABLE action as Raw
// (covers UPDATE, DELETE, DETACH, ATTACH, FREEZE, MOVE, FETCH, etc.)
{
let keyword = self.advance().text.clone();
let mut tokens: Vec<(String, TokenType)> = vec![(keyword, TokenType::Var)];
let mut paren_depth = 0i32;
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
// Stop at comma only when at top-level (not inside parens) — it separates ALTER actions
if self.check(TokenType::Comma) && paren_depth == 0 {
break;
}
let token = self.advance();
if token.token_type == TokenType::LParen {
paren_depth += 1;
}
if token.token_type == TokenType::RParen {
paren_depth -= 1;
}
let text = if token.token_type == TokenType::QuotedIdentifier {
format!("\"{}\"", token.text)
} else if token.token_type == TokenType::String {
format!("'{}'", token.text)
} else {
token.text.clone()
};
tokens.push((text, token.token_type));
}
Ok(AlterTableAction::Raw {
sql: self.join_command_tokens(tokens),
})
}
} else if self.check_identifier("REORGANIZE")
|| self.check_identifier("COALESCE")
|| self.check_identifier("EXCHANGE")
|| self.check_identifier("ANALYZE")
|| self.check_identifier("OPTIMIZE")
|| self.check_identifier("REBUILD")
|| self.check_identifier("REPAIR")
|| self.check_identifier("DISCARD")
|| self.check_identifier("IMPORT")
{
// MySQL partition operations: REORGANIZE PARTITION, COALESCE PARTITION, etc.
// Consume as Raw, respecting parenthesis depth
let keyword = self.advance().text.clone();
let mut tokens: Vec<(String, TokenType)> = vec![(keyword, TokenType::Var)];
let mut paren_depth = 0i32;
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
if self.check(TokenType::Comma) && paren_depth == 0 {
break;
}
let token = self.advance();
if token.token_type == TokenType::LParen {
paren_depth += 1;
}
if token.token_type == TokenType::RParen {
paren_depth -= 1;
if paren_depth < 0 {
break;
}
}
let text = if token.token_type == TokenType::QuotedIdentifier {
format!("\"{}\"", token.text)
} else if token.token_type == TokenType::String {
format!("'{}'", token.text)
} else {
token.text.clone()
};
tokens.push((text, token.token_type));
}
Ok(AlterTableAction::Raw {
sql: self.join_command_tokens(tokens),
})
} else {
Err(self.parse_error(format!(
"Expected ADD, DROP, RENAME, ALTER, SET, UNSET, SWAP, CLUSTER, or REPLACE in ALTER TABLE, got {:?}",
self.peek().token_type
)))
}
}
/// Parse TSQL SYSTEM_VERSIONING option in ALTER TABLE SET (...)
/// Handles: SYSTEM_VERSIONING=OFF, SYSTEM_VERSIONING=ON, SYSTEM_VERSIONING=ON(HISTORY_TABLE=..., ...)
fn parse_system_versioning_option(&mut self) -> Result<Expression> {
self.skip(); // consume SYSTEM_VERSIONING
self.expect(TokenType::Eq)?;
let mut prop = WithSystemVersioningProperty {
on: None,
this: None,
data_consistency: None,
retention_period: None,
with_: None,
};
if self.match_identifier("OFF") {
// SYSTEM_VERSIONING=OFF
// on is None => generates OFF
return Ok(Expression::WithSystemVersioningProperty(Box::new(prop)));
}
// SYSTEM_VERSIONING=ON or SYSTEM_VERSIONING=ON(...)
if self.match_token(TokenType::On) || self.match_identifier("ON") {
prop.on = Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
})));
}
if self.match_token(TokenType::LParen) {
// Parse options inside ON(...)
loop {
if self.check(TokenType::RParen) {
break;
}
if self.match_identifier("HISTORY_TABLE") {
self.expect(TokenType::Eq)?;
let table = self.parse_table_ref()?;
prop.this = Some(Box::new(Expression::Table(Box::new(table))));
} else if self.match_identifier("DATA_CONSISTENCY_CHECK") {
self.expect(TokenType::Eq)?;
let val = self.expect_identifier_or_keyword()?;
prop.data_consistency = Some(Box::new(Expression::Identifier(
Identifier::new(val.to_ascii_uppercase()),
)));
} else if self.match_identifier("HISTORY_RETENTION_PERIOD") {
self.expect(TokenType::Eq)?;
if let Some(rp) = self.parse_retention_period()? {
prop.retention_period = Some(Box::new(rp));
}
} else {
// Skip unknown options
self.skip();
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
}
Ok(Expression::WithSystemVersioningProperty(Box::new(prop)))
}
/// Parse TSQL DATA_DELETION option in ALTER TABLE SET (...)
/// Handles: DATA_DELETION=ON, DATA_DELETION=OFF, DATA_DELETION=ON(FILTER_COLUMN=..., RETENTION_PERIOD=...)
fn parse_data_deletion_option(&mut self) -> Result<Expression> {
self.skip(); // consume DATA_DELETION
self.expect(TokenType::Eq)?;
let on = if self.match_identifier("ON") || self.match_token(TokenType::On) {
true
} else if self.match_identifier("OFF") {
false
} else {
false
};
let on_expr = Box::new(Expression::Boolean(BooleanLiteral { value: on }));
let mut filter_column = None;
let mut retention_period = None;
if self.match_token(TokenType::LParen) {
loop {
if self.check(TokenType::RParen) {
break;
}
if self.match_identifier("FILTER_COLUMN") {
self.expect(TokenType::Eq)?;
let col = self.expect_identifier_or_keyword()?;
filter_column = Some(Box::new(Expression::boxed_column(Column {
name: Identifier::new(col),
table: None,
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
})));
} else if self.match_identifier("RETENTION_PERIOD") {
self.expect(TokenType::Eq)?;
if let Some(rp) = self.parse_retention_period()? {
retention_period = Some(Box::new(rp));
}
} else {
self.skip();
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
}
Ok(Expression::DataDeletionProperty(Box::new(
DataDeletionProperty {
on: on_expr,
filter_column,
retention_period,
},
)))
}
/// Parse ALTER COLUMN action
fn parse_alter_column_action(&mut self) -> Result<AlterColumnAction> {
if self.match_token(TokenType::Set) {
if self.match_keywords(&[TokenType::Not, TokenType::Null]) {
Ok(AlterColumnAction::SetNotNull)
} else if self.match_token(TokenType::Default) {
let expr = self.parse_primary()?;
Ok(AlterColumnAction::SetDefault(expr))
} else if self.match_identifier("DATA") {
// SET DATA TYPE
// TYPE can be a keyword token or identifier
let _ = self.match_token(TokenType::Type) || self.match_identifier("TYPE");
let data_type = self.parse_data_type()?;
// Optional COLLATE (can be identifier or string literal like 'binary')
let collate = if self.match_token(TokenType::Collate) {
if self.check(TokenType::String) {
let text = self.advance().text.clone();
Some(format!("'{}'", text))
} else {
Some(self.expect_identifier_or_keyword()?)
}
} else {
None
};
// Optional USING expression
let using = if self.match_token(TokenType::Using) {
Some(self.parse_expression()?)
} else {
None
};
Ok(AlterColumnAction::SetDataType {
data_type,
using,
collate,
})
} else if self.match_identifier("VISIBLE") {
Ok(AlterColumnAction::SetVisible)
} else if self.match_identifier("INVISIBLE") {
Ok(AlterColumnAction::SetInvisible)
} else {
Err(self.parse_error("Expected NOT NULL, DEFAULT, VISIBLE, or INVISIBLE after SET"))
}
} else if self.match_token(TokenType::Drop) {
if self.match_keywords(&[TokenType::Not, TokenType::Null]) {
Ok(AlterColumnAction::DropNotNull)
} else if self.match_token(TokenType::Default) {
Ok(AlterColumnAction::DropDefault)
} else {
Err(self.parse_error("Expected NOT NULL or DEFAULT after DROP"))
}
} else if self.match_token(TokenType::Comment) {
// ALTER COLUMN col COMMENT 'comment'
let comment = self.expect_string()?;
Ok(AlterColumnAction::Comment(comment))
} else if self.match_token(TokenType::Type)
|| self.match_identifier("TYPE")
|| self.is_identifier_token()
{
// TYPE data_type or just data_type (PostgreSQL/Redshift: ALTER COLUMN col TYPE datatype)
let data_type = self.parse_data_type()?;
// Optional COLLATE (can be identifier or string literal like 'binary')
let collate = if self.match_token(TokenType::Collate) {
if self.check(TokenType::String) {
Some(self.advance().text.clone())
} else {
Some(self.expect_identifier_or_keyword()?)
}
} else {
None
};
// Optional USING expression
let using = if self.match_token(TokenType::Using) {
Some(self.parse_expression()?)
} else {
None
};
Ok(AlterColumnAction::SetDataType {
data_type,
using,
collate,
})
} else {
Err(self.parse_error("Expected SET, DROP, or TYPE in ALTER COLUMN"))
}
}
/// Parse TRUNCATE statement
fn parse_truncate(&mut self) -> Result<Expression> {
self.expect(TokenType::Truncate)?;
// ClickHouse: TRUNCATE ALL TABLES FROM [IF EXISTS] db
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check_identifier("ALL")
&& self.current + 1 < self.tokens.len()
&& self.tokens[self.current + 1]
.text
.eq_ignore_ascii_case("TABLES")
{
// Consume remaining tokens as Command
let mut parts = vec!["TRUNCATE".to_string()];
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
let token = self.advance();
if token.token_type == TokenType::String {
parts.push(format!("'{}'", token.text));
} else {
parts.push(token.text.clone());
}
}
return Ok(Expression::Command(Box::new(crate::expressions::Command {
this: parts.join(" "),
})));
}
let target = if self.match_token(TokenType::Database) {
TruncateTarget::Database
} else {
// ClickHouse: TRUNCATE TEMPORARY TABLE t
self.match_token(TokenType::Temporary);
self.match_token(TokenType::Table); // optional TABLE keyword
TruncateTarget::Table
};
// Parse optional IF EXISTS
let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
// Parse first table with optional ONLY modifier
let has_only = self.match_token(TokenType::Only);
let mut table = self.parse_table_ref()?;
if has_only {
table.only = true;
}
// Check for * suffix on table name (PostgreSQL: inherit children)
let first_star = self.match_token(TokenType::Star);
// TSQL: WITH (PARTITIONS(1, 2 TO 5, 10 TO 20, 84))
if self.check(TokenType::With) && self.check_next(TokenType::LParen) {
if let Some(hint_expr) = self.parse_truncate_table_hints()? {
match hint_expr {
Expression::Tuple(tuple) => {
table.hints = tuple.expressions;
}
other => {
table.hints = vec![other];
}
}
}
}
// ClickHouse: ON CLUSTER clause
let on_cluster = self.parse_on_cluster_clause()?;
// Parse additional tables for multi-table TRUNCATE
let mut extra_tables = Vec::new();
if first_star {
// The first table has a * suffix, so create an entry for it
extra_tables.push(TruncateTableEntry {
table: table.clone(),
star: true,
});
}
while self.match_token(TokenType::Comma) {
let extra_only = self.match_token(TokenType::Only);
let mut extra_table = self.parse_table_ref()?;
if extra_only {
extra_table.only = true;
}
let extra_star = self.match_token(TokenType::Star);
extra_tables.push(TruncateTableEntry {
table: extra_table,
star: extra_star,
});
}
// Parse RESTART IDENTITY / CONTINUE IDENTITY
// RESTART is TokenType::Restart keyword, IDENTITY is TokenType::Identity keyword
let identity = if self.match_token(TokenType::Restart) {
self.match_token(TokenType::Identity);
Some(TruncateIdentity::Restart)
} else if self.match_identifier("CONTINUE") {
self.match_token(TokenType::Identity);
Some(TruncateIdentity::Continue)
} else {
None
};
// Parse CASCADE or RESTRICT
// CASCADE is TokenType::Cascade keyword, RESTRICT is TokenType::Restrict keyword
let cascade = self.match_token(TokenType::Cascade);
let restrict = if !cascade {
self.match_token(TokenType::Restrict)
} else {
false
};
// Parse Hive PARTITION clause: PARTITION(key = value, ...)
// parse_partition consumes the PARTITION keyword itself
let partition = self.parse_partition()?;
// ClickHouse: TRUNCATE TABLE t SETTINGS key=value, ...
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.match_token(TokenType::Settings)
{
// Consume settings expressions (they're not stored in the AST for TRUNCATE)
loop {
let _ = self.parse_expression()?;
if !self.match_token(TokenType::Comma) {
break;
}
}
}
Ok(Expression::Truncate(Box::new(Truncate {
target,
if_exists,
table,
on_cluster,
cascade,
extra_tables,
identity,
restrict,
partition: partition.map(Box::new),
})))
}
/// Parse VALUES table constructor: VALUES (1, 'a'), (2, 'b')
fn parse_values(&mut self) -> Result<Expression> {
self.expect(TokenType::Values)?;
let mut expressions = Vec::new();
// Handle bare VALUES without parentheses: VALUES 1, 2, 3 -> VALUES (1), (2), (3)
if !self.check(TokenType::LParen) {
loop {
let val = self.parse_expression()?;
expressions.push(Tuple {
expressions: vec![val],
});
if !self.match_token(TokenType::Comma) {
break;
}
}
} else {
loop {
self.expect(TokenType::LParen)?;
// Parse VALUES tuple elements with optional AS aliases (Hive syntax)
let row_values = self.parse_values_expression_list()?;
self.expect(TokenType::RParen)?;
expressions.push(Tuple {
expressions: row_values,
});
if !self.match_token(TokenType::Comma) {
break;
}
// ClickHouse: allow trailing comma after last tuple
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && !self.check(TokenType::LParen)
{
break;
}
}
}
// Check for alias: VALUES (1, 2) AS new_data or VALUES (1, 2) new_data
let (alias, column_aliases) = if self.match_token(TokenType::As) {
let alias_name = self.expect_identifier()?;
let alias = Some(Identifier::new(alias_name));
// Check for column aliases: AS new_data(a, b)
let col_aliases = if self.match_token(TokenType::LParen) {
let aliases = self.parse_identifier_list()?;
self.expect(TokenType::RParen)?;
aliases
} else {
Vec::new()
};
(alias, col_aliases)
} else if self.check(TokenType::Var) && !self.check_keyword() {
// Implicit alias: VALUES (0) foo(bar)
let alias_name = self.advance().text.clone();
let alias = Some(Identifier::new(alias_name));
let col_aliases = if self.match_token(TokenType::LParen) {
let aliases = self.parse_identifier_list()?;
self.expect(TokenType::RParen)?;
aliases
} else {
Vec::new()
};
(alias, col_aliases)
} else {
(None, Vec::new())
};
// VALUES can be followed by set operations (UNION, etc.)
let values_expr = Expression::Values(Box::new(Values {
expressions,
alias,
column_aliases,
}));
// Check for set operations after VALUES
self.parse_set_operation(values_expr)
}
/// Parse USE statement: USE db, USE DATABASE x, USE SCHEMA x.y, USE ROLE x, etc.
fn parse_use(&mut self) -> Result<Expression> {
self.expect(TokenType::Use)?;
// Check for Snowflake: USE SECONDARY ROLES ALL|NONE|role1, role2, ...
if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("SECONDARY") {
self.skip(); // consume SECONDARY
// Check for ROLES
if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("ROLES") {
self.skip(); // consume ROLES
// Parse ALL, NONE, or comma-separated role list
let mut roles = Vec::new();
loop {
if self.check(TokenType::Var)
|| self.check(TokenType::All)
|| self.check(TokenType::Identifier)
{
let role = self.advance().text.clone();
roles.push(role);
if !self.match_token(TokenType::Comma) {
break;
}
} else {
break;
}
}
let name = if roles.is_empty() {
"ALL".to_string()
} else {
roles.join(", ")
};
return Ok(Expression::Use(Box::new(Use {
kind: Some(UseKind::SecondaryRoles),
this: Identifier::new(name),
})));
}
}
// Check for kind: DATABASE, SCHEMA, ROLE, WAREHOUSE, CATALOG
// Note: ROLE and CATALOG are not keywords, so we check the text
let kind = if self.match_token(TokenType::Database) {
Some(UseKind::Database)
} else if self.match_token(TokenType::Schema) {
Some(UseKind::Schema)
} else if self.match_token(TokenType::Warehouse) {
Some(UseKind::Warehouse)
} else if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("ROLE") {
self.skip();
Some(UseKind::Role)
} else if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("CATALOG") {
self.skip();
Some(UseKind::Catalog)
} else {
None
};
// Parse the name (can be qualified like x.y)
// Use expect_identifier_or_keyword_with_quoted because names like "default", "system" are valid
let mut ident = self.expect_identifier_or_keyword_with_quoted()?;
// Handle qualified names like schema.table for USE SCHEMA x.y
if self.match_token(TokenType::Dot) {
let second_part = self.expect_identifier_or_keyword_with_quoted()?;
ident.name = format!("{}.{}", ident.name, second_part.name);
}
Ok(Expression::Use(Box::new(Use { kind, this: ident })))
}
/// Parse EXPORT DATA statement (BigQuery)
/// EXPORT DATA [WITH CONNECTION connection] OPTIONS (...) AS SELECT ...
fn parse_export_data(&mut self) -> Result<Expression> {
self.skip(); // consume EXPORT
// Expect DATA
if !self.match_identifier("DATA") {
return Err(self.parse_error("Expected DATA after EXPORT"));
}
// Optional: WITH CONNECTION connection
let connection = if self.match_text_seq(&["WITH", "CONNECTION"]) {
// Parse connection identifier (can be qualified: project.location.connection)
let first = self.expect_identifier()?;
let connection_name = if self.match_token(TokenType::Dot) {
let second = self.expect_identifier()?;
if self.match_token(TokenType::Dot) {
let third = self.expect_identifier()?;
format!("{}.{}.{}", first, second, third)
} else {
format!("{}.{}", first, second)
}
} else {
first
};
Some(Box::new(Expression::Identifier(Identifier::new(
connection_name,
))))
} else {
None
};
// Expect OPTIONS (...)
let options = if self.match_identifier("OPTIONS") {
self.parse_options_list()?
} else {
Vec::new()
};
// Expect AS
self.expect(TokenType::As)?;
// Parse the SELECT query
let query = self.parse_statement()?;
Ok(Expression::Export(Box::new(Export {
this: Box::new(query),
connection,
options,
})))
}
/// Parse CACHE TABLE statement (Spark)
/// CACHE [LAZY] TABLE name [OPTIONS(...)] [AS query]
fn parse_cache(&mut self) -> Result<Expression> {
self.expect(TokenType::Cache)?;
// Check for LAZY keyword
let lazy = self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("LAZY");
if lazy {
self.skip();
}
self.expect(TokenType::Table)?;
let table = Identifier::new(self.expect_identifier()?);
// Check for OPTIONS clause
let options =
if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("OPTIONS") {
self.skip();
self.expect(TokenType::LParen)?;
let mut opts = Vec::new();
loop {
// Parse key = value pairs (key can be string literal or identifier)
let key = if self.check(TokenType::NationalString) {
let token = self.advance();
Expression::Literal(Box::new(Literal::NationalString(token.text)))
} else if self.check(TokenType::String) {
let token = self.advance();
Expression::Literal(Box::new(Literal::String(token.text)))
} else {
Expression::Identifier(Identifier::new(self.expect_identifier()?))
};
// Eq is optional - Spark allows space-separated key value pairs
// e.g., OPTIONS ('storageLevel' 'DISK_ONLY') or OPTIONS ('key' = 'value')
let _ = self.match_token(TokenType::Eq);
let value = self.parse_expression()?;
opts.push((key, value));
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
opts
} else {
Vec::new()
};
// Check for AS clause or implicit query (SELECT without AS in Spark)
let query = if self.match_token(TokenType::As) {
Some(self.parse_statement()?)
} else if self.check(TokenType::Select) || self.check(TokenType::With) {
// Spark allows SELECT without AS keyword after CACHE TABLE
Some(self.parse_statement()?)
} else {
None
};
Ok(Expression::Cache(Box::new(Cache {
table,
lazy,
options,
query,
})))
}
/// Parse UNCACHE TABLE statement (Spark)
/// UNCACHE TABLE [IF EXISTS] name
fn parse_uncache(&mut self) -> Result<Expression> {
self.expect(TokenType::Uncache)?;
self.expect(TokenType::Table)?;
let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
let table = Identifier::new(self.expect_identifier()?);
Ok(Expression::Uncache(Box::new(Uncache { table, if_exists })))
}
/// Parse LOAD DATA statement (Hive)
/// LOAD DATA [LOCAL] INPATH 'path' [OVERWRITE] INTO TABLE table_name
/// [PARTITION (col=val, ...)] [INPUTFORMAT 'format'] [SERDE 'serde']
fn parse_load_data(&mut self) -> Result<Expression> {
self.expect(TokenType::Load)?;
// Expect DATA keyword
let data_token = self.advance();
if !data_token.text.eq_ignore_ascii_case("DATA") {
return Err(self.parse_error("Expected DATA after LOAD"));
}
// Check for LOCAL keyword
let local = self.match_token(TokenType::Local);
// Expect INPATH
self.expect(TokenType::Inpath)?;
// Parse the path (string literal)
let inpath = if self.check(TokenType::String) {
self.advance().text
} else {
return Err(self.parse_error("Expected string literal after INPATH"));
};
// Check for OVERWRITE keyword
let overwrite = self.match_token(TokenType::Overwrite);
// Expect INTO TABLE
self.expect(TokenType::Into)?;
self.expect(TokenType::Table)?;
// Parse table name (can be qualified)
let table = Expression::Table(Box::new(self.parse_table_ref()?));
// Check for PARTITION clause
let partition = if self.match_token(TokenType::Partition) {
self.expect(TokenType::LParen)?;
let mut partitions = Vec::new();
loop {
let col = Identifier::new(self.expect_identifier_or_keyword()?);
self.expect(TokenType::Eq)?;
let val = self.parse_expression()?;
partitions.push((col, val));
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
partitions
} else {
Vec::new()
};
// Check for INPUTFORMAT clause
let input_format = if self.match_token(TokenType::InputFormat) {
if self.check(TokenType::String) {
Some(self.advance().text)
} else {
return Err(self.parse_error("Expected string literal after INPUTFORMAT"));
}
} else {
None
};
// Check for SERDE clause
let serde = if self.match_token(TokenType::Serde) {
if self.check(TokenType::String) {
Some(self.advance().text)
} else {
return Err(self.parse_error("Expected string literal after SERDE"));
}
} else {
None
};
Ok(Expression::LoadData(Box::new(LoadData {
local,
inpath,
overwrite,
table,
partition,
input_format,
serde,
})))
}
/// Parse PRAGMA statement (SQLite)
/// PRAGMA [schema.]name [= value | (args...)]
fn parse_pragma(&mut self) -> Result<Expression> {
self.expect(TokenType::Pragma)?;
// Parse schema.name or just name
let first_name = self.expect_identifier_or_keyword()?;
let (schema, name) = if self.match_token(TokenType::Dot) {
// First name was schema
let pragma_name = self.expect_identifier_or_keyword()?;
(
Some(Identifier::new(first_name)),
Identifier::new(pragma_name),
)
} else {
(None, Identifier::new(first_name))
};
// Check for assignment or function call
let (value, args, use_assignment_syntax) = if self.match_token(TokenType::Eq) {
// PRAGMA name = value
let val = if self.match_token(TokenType::On) || self.match_identifier("ON") {
Expression::Var(Box::new(Var {
this: "on".to_string(),
}))
} else if self.match_identifier("OFF") {
Expression::Var(Box::new(Var {
this: "off".to_string(),
}))
} else {
self.parse_expression()?
};
(Some(val), Vec::new(), true)
} else if self.match_token(TokenType::LParen) {
// PRAGMA name(args...)
let mut arguments = Vec::new();
if !self.check(TokenType::RParen) {
loop {
let arg = if self.match_token(TokenType::On) || self.match_identifier("ON") {
Expression::Var(Box::new(Var {
this: "on".to_string(),
}))
} else if self.match_identifier("OFF") {
Expression::Var(Box::new(Var {
this: "off".to_string(),
}))
} else {
self.parse_expression()?
};
arguments.push(arg);
if !self.match_token(TokenType::Comma) {
break;
}
}
}
self.expect(TokenType::RParen)?;
(None, arguments, false)
} else {
(None, Vec::new(), false)
};
Ok(Expression::Pragma(Box::new(Pragma {
schema,
name,
value,
args,
use_assignment_syntax,
})))
}
/// Parse ROLLBACK statement
/// ROLLBACK [TO [SAVEPOINT] <name>]
fn parse_rollback(&mut self) -> Result<Expression> {
self.expect(TokenType::Rollback)?;
// Check for optional TRANSACTION, TRAN, or WORK keyword
let has_transaction = self.match_token(TokenType::Transaction)
|| self.match_identifier("TRAN")
|| self.match_identifier("WORK");
// Check for TO SAVEPOINT (standard SQL) or transaction name (TSQL)
let (savepoint, this) = if self.match_token(TokenType::To) {
// Optional SAVEPOINT keyword
self.match_token(TokenType::Savepoint);
// Savepoint name
if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
let name = self.advance().text;
(
Some(Box::new(Expression::Identifier(Identifier::new(name)))),
None,
)
} else {
(None, None)
}
} else if has_transaction
&& (self.is_identifier_token() || self.is_safe_keyword_as_identifier())
{
// TSQL: ROLLBACK TRANSACTION transaction_name
let name = self.advance().text;
(
None,
Some(Box::new(Expression::Identifier(Identifier::new(name)))),
)
} else if has_transaction {
// Just ROLLBACK TRANSACTION - store marker
(
None,
Some(Box::new(Expression::Identifier(Identifier::new(
"TRANSACTION".to_string(),
)))),
)
} else {
(None, None)
};
Ok(Expression::Rollback(Box::new(Rollback { savepoint, this })))
}
/// Parse COMMIT statement
/// COMMIT [TRANSACTION|TRAN|WORK] [transaction_name] [WITH (DELAYED_DURABILITY = ON|OFF)] [AND [NO] CHAIN]
fn parse_commit(&mut self) -> Result<Expression> {
self.expect(TokenType::Commit)?;
// Check for optional TRANSACTION, TRAN, or WORK keyword
let has_transaction = self.match_token(TokenType::Transaction)
|| self.match_identifier("TRAN")
|| self.match_identifier("WORK");
// Parse optional transaction name (TSQL)
let this = if has_transaction
&& (self.is_identifier_token() || self.is_safe_keyword_as_identifier())
&& !self.check(TokenType::With)
&& !self.check(TokenType::And)
{
let name = self.advance().text;
Some(Box::new(Expression::Identifier(Identifier::new(name))))
} else if has_transaction {
// Store marker that TRANSACTION keyword was present
Some(Box::new(Expression::Identifier(Identifier::new(
"TRANSACTION".to_string(),
))))
} else {
None
};
// Parse WITH (DELAYED_DURABILITY = ON|OFF) for TSQL
let durability = if self.match_token(TokenType::With) && self.match_token(TokenType::LParen)
{
// Check for DELAYED_DURABILITY
if self.match_identifier("DELAYED_DURABILITY") && self.match_token(TokenType::Eq) {
// ON is a keyword (TokenType::On), OFF is an identifier
let on = self.match_token(TokenType::On) || self.match_identifier("ON");
if !on {
self.match_identifier("OFF");
}
self.expect(TokenType::RParen)?;
Some(Box::new(Expression::Boolean(BooleanLiteral { value: on })))
} else {
// Skip to RParen
while !self.check(TokenType::RParen) && !self.is_at_end() {
self.skip();
}
self.match_token(TokenType::RParen);
None
}
} else {
None
};
// Parse AND [NO] CHAIN
let chain = if self.match_token(TokenType::And) {
let no_chain = self.match_token(TokenType::No);
self.match_identifier("CHAIN");
if no_chain {
// AND NO CHAIN - explicit false
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: false,
})))
} else {
// AND CHAIN - explicit true
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
})))
}
} else {
None
};
Ok(Expression::Commit(Box::new(Commit {
chain,
this,
durability,
})))
}
/// Parse END statement (PostgreSQL alias for COMMIT)
/// END [WORK|TRANSACTION] [AND [NO] CHAIN]
fn parse_end_transaction(&mut self) -> Result<Expression> {
self.expect(TokenType::End)?;
// Check for optional WORK or TRANSACTION keyword
let _has_work = self.match_identifier("WORK") || self.match_token(TokenType::Transaction);
// Parse AND [NO] CHAIN
let chain = if self.match_token(TokenType::And) {
let no_chain = self.match_token(TokenType::No);
self.match_identifier("CHAIN");
if no_chain {
// AND NO CHAIN - explicit false
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: false,
})))
} else {
// AND CHAIN - explicit true
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
})))
}
} else {
None
};
// Return as COMMIT since END is an alias
Ok(Expression::Commit(Box::new(Commit {
chain,
this: None,
durability: None,
})))
}
/// Parse BEGIN/START TRANSACTION statement
/// BEGIN [DEFERRED|IMMEDIATE|EXCLUSIVE] [TRANSACTION|TRAN|WORK] [transaction_name] [WITH MARK 'description']
/// Also handles procedural BEGIN blocks (BigQuery, etc.): BEGIN statement_list END
fn parse_transaction(&mut self) -> Result<Expression> {
self.expect(TokenType::Begin)?;
// Check if this is a procedural BEGIN block rather than a transaction
// If next token is not a transaction keyword and we have more tokens, it's a procedural block
let is_transaction = self.is_at_end()
|| self.check(TokenType::Semicolon)
|| self.check(TokenType::Transaction)
|| self.check_identifier("TRAN")
|| self.check_identifier("WORK")
|| self.check_identifier("DEFERRED")
|| self.check_identifier("IMMEDIATE")
|| self.check_identifier("EXCLUSIVE");
if !is_transaction {
// TSQL: BEGIN TRY ... END TRY [BEGIN CATCH ... END CATCH]
// These are block-structured constructs that may contain semicolons,
// so we can't use parse_command() which stops at the first semicolon.
let is_try = self.check_identifier("TRY");
let is_catch = self.check_identifier("CATCH");
if is_try || is_catch {
let block_kind = if is_try { "TRY" } else { "CATCH" };
self.skip(); // consume TRY or CATCH
let mut tokens: Vec<(String, TokenType)> = vec![
("BEGIN".to_string(), TokenType::Begin),
(block_kind.to_string(), TokenType::Var),
];
// Collect tokens until matching END TRY / END CATCH
while !self.is_at_end() {
if self.check(TokenType::End)
&& self.current + 1 < self.tokens.len()
&& self.tokens[self.current + 1]
.text
.eq_ignore_ascii_case(block_kind)
{
tokens.push(("END".to_string(), TokenType::End));
self.skip(); // consume END
tokens.push((block_kind.to_string(), TokenType::Var));
self.skip(); // consume TRY/CATCH
break;
}
let token = self.advance();
let text = if token.token_type == TokenType::String {
format!("'{}'", token.text)
} else if token.token_type == TokenType::QuotedIdentifier {
format!("\"{}\"", token.text)
} else {
token.text.clone()
};
tokens.push((text, token.token_type));
}
let mut result = Expression::Command(Box::new(Command {
this: self.join_command_tokens(tokens),
}));
// If this was a TRY block, check for a following BEGIN CATCH block
if is_try
&& self.check(TokenType::Begin)
&& self.current + 1 < self.tokens.len()
&& self.tokens[self.current + 1]
.text
.eq_ignore_ascii_case("CATCH")
{
// Recursively parse the BEGIN CATCH block
let catch_block = self.parse_transaction()?;
// Combine TRY and CATCH into a single command
if let (Expression::Command(try_cmd), Expression::Command(catch_cmd)) =
(&result, &catch_block)
{
result = Expression::Command(Box::new(Command {
this: format!("{} {}", try_cmd.this, catch_cmd.this),
}));
}
}
return Ok(result);
}
// This is a procedural BEGIN block - parse as Command
// Collect remaining tokens until end of statement
return self
.parse_command()?
.ok_or_else(|| self.parse_error("Failed to parse BEGIN block"));
}
// Check for transaction kind: DEFERRED, IMMEDIATE, EXCLUSIVE (SQLite)
let kind = if self.match_identifier("DEFERRED")
|| self.match_identifier("IMMEDIATE")
|| self.match_identifier("EXCLUSIVE")
{
Some(self.previous().text.clone())
} else {
None
};
// Check for TRANSACTION, TRAN, or WORK keyword
let has_transaction_keyword = self.match_token(TokenType::Transaction)
|| self.match_identifier("TRAN")
|| self.match_identifier("WORK");
// Parse optional transaction name (TSQL style: BEGIN TRANSACTION trans_name)
let trans_name = if has_transaction_keyword
&& (self.is_identifier_token() || self.is_safe_keyword_as_identifier())
&& !self.check(TokenType::With)
{
// Could be a transaction name or @variable
let name = self.advance().text;
Some(name)
} else {
None
};
// Combine kind and trans_name into `this`
let this = if let Some(name) = trans_name {
Some(Box::new(Expression::Identifier(Identifier::new(name))))
} else if let Some(k) = kind {
Some(Box::new(Expression::Identifier(Identifier::new(k))))
} else {
None
};
// Parse WITH MARK 'description' (TSQL)
let mark = if self.match_token(TokenType::With) && self.match_identifier("MARK") {
if self.check(TokenType::String) {
let desc = self.advance().text;
Some(Box::new(Expression::Literal(Box::new(Literal::String(
desc,
)))))
} else {
Some(Box::new(Expression::Literal(Box::new(Literal::String(
"".to_string(),
)))))
}
} else if has_transaction_keyword {
// Store "TRANSACTION" marker to preserve round-trip
Some(Box::new(Expression::Identifier(Identifier::new(
"TRANSACTION".to_string(),
))))
} else {
None
};
// Parse any additional transaction modes (isolation levels, etc.)
let mut mode_parts: Vec<String> = Vec::new();
while self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
let mut mode_tokens: Vec<String> = Vec::new();
while (self.is_identifier_token() || self.is_safe_keyword_as_identifier())
&& !self.check(TokenType::Comma)
{
mode_tokens.push(self.advance().text);
}
if !mode_tokens.is_empty() {
mode_parts.push(mode_tokens.join(" "));
}
if !self.match_token(TokenType::Comma) {
break;
}
}
let modes = if !mode_parts.is_empty() {
Some(Box::new(Expression::Identifier(Identifier::new(
mode_parts.join(", "),
))))
} else {
None
};
Ok(Expression::Transaction(Box::new(Transaction {
this,
modes,
mark,
})))
}
/// Parse START TRANSACTION statement
/// START TRANSACTION [READ ONLY | READ WRITE] [, ISOLATION LEVEL ...]
fn parse_start_transaction(&mut self) -> Result<Expression> {
self.expect(TokenType::Start)?;
// Expect TRANSACTION keyword
self.expect(TokenType::Transaction)?;
// Parse any transaction modes (READ ONLY, READ WRITE, ISOLATION LEVEL, etc.)
let mut mode_parts: Vec<String> = Vec::new();
while self.is_identifier_token()
|| self.is_safe_keyword_as_identifier()
|| self.match_identifier("READ")
{
// If we matched READ, add it to tokens
let read_matched = if self.previous().text.eq_ignore_ascii_case("READ") {
true
} else {
false
};
let mut mode_tokens: Vec<String> = Vec::new();
if read_matched {
mode_tokens.push("READ".to_string());
}
while (self.is_identifier_token() || self.is_safe_keyword_as_identifier())
&& !self.check(TokenType::Comma)
{
mode_tokens.push(self.advance().text);
}
if !mode_tokens.is_empty() {
mode_parts.push(mode_tokens.join(" "));
}
if !self.match_token(TokenType::Comma) {
break;
}
}
let modes = if !mode_parts.is_empty() {
Some(Box::new(Expression::Identifier(Identifier::new(
mode_parts.join(", "),
))))
} else {
None
};
Ok(Expression::Transaction(Box::new(Transaction {
this: None, // START TRANSACTION doesn't have a kind like DEFERRED/IMMEDIATE
modes,
// Mark as START to differentiate from BEGIN
mark: Some(Box::new(Expression::Identifier(Identifier::new(
"START".to_string(),
)))),
})))
}
/// Parse DESCRIBE statement
/// DESCRIBE [EXTENDED|FORMATTED|ANALYZE] <table_or_query>
/// Also handles EXPLAIN (parsed as Describe)
fn parse_describe(&mut self) -> Result<Expression> {
// Accept DESCRIBE, DESC, and EXPLAIN (Var token)
// Capture leading comments from the first token
let leading_comments = if self.check(TokenType::Describe) {
let token = self.advance();
token.comments
} else if self.check(TokenType::Desc) {
let token = self.advance();
token.comments
} else if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("EXPLAIN") {
let token = self.advance(); // consume EXPLAIN
token.comments
} else {
return Err(self.parse_error("Expected DESCRIBE, DESC, or EXPLAIN"));
};
// Check for EXTENDED or FORMATTED keywords
let extended = self.match_identifier("EXTENDED");
let formatted = if !extended {
self.match_identifier("FORMATTED")
} else {
false
};
// Check for style keywords like ANALYZE, HISTORY
// ClickHouse: EXPLAIN SYNTAX/AST/PLAN/PIPELINE/ESTIMATE/TABLE OVERRIDE/CURRENT TRANSACTION
// For HISTORY, we need to look ahead to ensure it's not part of a schema-qualified
// table name like "history.tbl". If the next token is a Dot, "history" is a schema name.
let style = if !extended && !formatted && self.match_identifier("ANALYZE") {
Some("ANALYZE".to_string())
} else if !extended
&& !formatted
&& matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
)
{
// ClickHouse EXPLAIN styles
let text_upper = if !self.is_at_end() {
self.peek().text.to_ascii_uppercase()
} else {
String::new()
};
match text_upper.as_str() {
"SYNTAX" | "AST" | "PLAN" | "PIPELINE" | "ESTIMATE" | "QUERY" | "CURRENT" => {
self.skip();
let mut style_str = text_upper;
// Handle multi-word: TABLE OVERRIDE, CURRENT TRANSACTION, QUERY TREE
if style_str == "CURRENT" && self.check_identifier("TRANSACTION") {
style_str.push_str(" TRANSACTION");
self.skip();
}
if style_str == "QUERY" && self.check_identifier("TREE") {
style_str.push_str(" TREE");
self.skip();
}
Some(style_str)
}
_ if self.check(TokenType::Table) => {
// EXPLAIN TABLE OVERRIDE
self.skip(); // consume TABLE
if self.check_identifier("OVERRIDE") {
self.skip();
Some("TABLE OVERRIDE".to_string())
} else {
// Not TABLE OVERRIDE, backtrack
self.current -= 1;
None
}
}
_ => None,
}
} else if !extended
&& !formatted
&& (self.check(TokenType::Identifier)
|| self.check(TokenType::Var)
|| self.check(TokenType::QuotedIdentifier))
&& self.peek().text.eq_ignore_ascii_case("HISTORY")
&& self.peek_nth(1).map(|t| t.token_type) != Some(TokenType::Dot)
{
self.skip(); // consume HISTORY
Some("HISTORY".to_string())
} else if !extended
&& !formatted
&& (self.check(TokenType::Identifier)
|| self.check(TokenType::Var)
|| self.check(TokenType::QuotedIdentifier))
&& self.peek().text.eq_ignore_ascii_case("DETAIL")
&& self.peek_nth(1).map(|t| t.token_type) != Some(TokenType::Dot)
{
self.skip(); // consume DETAIL
Some("DETAIL".to_string())
} else {
None
};
// Check for object kind like SEMANTIC VIEW, TABLE, INPUT, OUTPUT, etc.
let kind = if self.match_identifier("SEMANTIC") {
if self.match_token(TokenType::View) {
Some("SEMANTIC VIEW".to_string())
} else {
Some("SEMANTIC".to_string())
}
} else if self.match_token(TokenType::Table) {
Some("TABLE".to_string())
} else if self.match_token(TokenType::View) {
Some("VIEW".to_string())
} else if self.match_identifier("DATABASE") {
Some("DATABASE".to_string())
} else if self.match_identifier("SCHEMA") {
Some("SCHEMA".to_string())
} else if self.match_token(TokenType::Procedure) {
Some("PROCEDURE".to_string())
} else if self.match_token(TokenType::Function) {
Some("FUNCTION".to_string())
} else if self.match_token(TokenType::Input) {
Some("INPUT".to_string())
} else if self.match_token(TokenType::Output) {
Some("OUTPUT".to_string())
} else {
None
};
// ClickHouse: parse EXPLAIN settings before the target statement
// e.g., EXPLAIN actions=1, description=0 SELECT ...
// e.g., EXPLAIN PLAN actions=1 SELECT ...
let mut properties = Vec::new();
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
// Look for key=value pairs before a statement keyword
if (self.is_identifier_token()
|| self.is_safe_keyword_as_identifier()
|| self.check(TokenType::Type))
&& self.current + 1 < self.tokens.len()
&& self.tokens[self.current + 1].token_type == TokenType::Eq
{
let name = self.advance().text.to_lowercase();
self.skip(); // consume =
let value = self.advance().text.clone();
properties.push((name, value));
self.match_token(TokenType::Comma); // optional comma between settings
} else {
break;
}
}
}
// Parse target - could be a table name or a SELECT/INSERT/other statement
// ClickHouse: EXPLAIN/DESC can precede any statement or subquery
let target = if self.check(TokenType::Select) || self.check(TokenType::With) {
self.parse_statement()?
} else if self.check(TokenType::LParen) && {
// Look through nested parens for SELECT/WITH
let mut depth = 0usize;
let mut found_select = false;
for i in 0..100 {
match self.peek_nth(i).map(|t| t.token_type) {
Some(TokenType::LParen) => depth += 1,
Some(TokenType::Select) | Some(TokenType::With) if depth > 0 => {
found_select = true;
break;
}
_ => break,
}
}
found_select
} {
// DESC (((SELECT ...))) — deeply nested parenthesized subquery
self.parse_statement()?
} else if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && (self.check(TokenType::Insert)
|| self.check(TokenType::Create)
|| self.check(TokenType::Alter)
|| self.check(TokenType::Drop)
|| self.check(TokenType::Set)
|| self.check(TokenType::System))
{
self.parse_statement()?
} else if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && (self.is_identifier_token() || self.is_safe_keyword_as_identifier())
&& self.peek_nth(1).map(|t| t.token_type) == Some(TokenType::LParen)
{
// ClickHouse: DESC format(Values, '(123)') — function call as target
self.parse_expression()?
} else {
// Parse as table reference
let table = self.parse_table_ref()?;
Expression::Table(Box::new(table))
};
// Parse optional parenthesized type signature for PROCEDURE/FUNCTION
// e.g., DESCRIBE PROCEDURE get_employees(INT, VARCHAR)
let params = if matches!(kind.as_deref(), Some("PROCEDURE") | Some("FUNCTION"))
&& self.match_token(TokenType::LParen)
{
let mut type_args = Vec::new();
if !self.check(TokenType::RParen) {
loop {
// Collect tokens for this type until comma or closing paren
let mut parts = Vec::new();
let mut paren_depth = 0usize;
while !self.is_at_end() {
if self.check(TokenType::LParen) {
paren_depth += 1;
parts.push(self.advance().text.clone());
} else if self.check(TokenType::RParen) {
if paren_depth == 0 {
break;
}
paren_depth -= 1;
parts.push(self.advance().text.clone());
} else if self.check(TokenType::Comma) && paren_depth == 0 {
break;
} else {
parts.push(self.advance().text.clone());
}
}
type_args.push(parts.join(" ").trim().to_uppercase());
if !self.match_token(TokenType::Comma) {
break;
}
}
}
self.expect(TokenType::RParen)?;
type_args
} else {
Vec::new()
};
// Parse optional PARTITION clause (Spark/Hive)
let partition = if self.match_token(TokenType::Partition) {
// PARTITION(key = value, ...)
self.expect(TokenType::LParen)?;
// Parse partition expressions (e.g., ds = '2024-01-01')
let mut partition_exprs = Vec::new();
loop {
if let Some(expr) = self.parse_conjunction()? {
partition_exprs.push(expr);
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
let partition = Expression::Partition(Box::new(crate::expressions::Partition {
expressions: partition_exprs,
subpartition: false,
}));
Some(Box::new(partition))
} else {
None
};
// ClickHouse: consume optional SETTINGS clause after target
// e.g., DESC format(CSV, '...') SETTINGS key='val', key2='val2'
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::Settings)
{
self.skip(); // consume SETTINGS
let _ = self.parse_settings_property()?;
}
// Databricks: DESCRIBE ... AS JSON
let as_json = if self.check(TokenType::As)
&& self
.peek_nth(1)
.map(|t| t.text.eq_ignore_ascii_case("JSON"))
== Some(true)
{
self.skip(); // consume AS
self.skip(); // consume JSON
true
} else {
false
};
// Parse optional post-target properties like type=stage (non-ClickHouse)
if properties.is_empty() {
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
// Check for identifier or keyword that could be a property name
if self.check(TokenType::Var) || self.check(TokenType::Type) || self.check_keyword()
{
let name = self.advance().text.to_lowercase();
if self.match_token(TokenType::Eq) {
let value = self.advance().text.clone();
properties.push((name, value));
} else {
// Not a property, put it back (can't easily undo, so break)
break;
}
} else {
break;
}
}
}
Ok(Expression::Describe(Box::new(Describe {
target,
extended,
formatted,
kind,
properties,
style,
partition,
leading_comments,
as_json,
params,
})))
}
/// Parse SHOW statement
/// SHOW [TERSE] <object_type> [HISTORY] [LIKE pattern] [IN <scope>] [STARTS WITH pattern] [LIMIT n] [FROM object]
fn parse_show(&mut self) -> Result<Expression> {
self.expect(TokenType::Show)?;
// Check for TERSE
let terse = self.match_identifier("TERSE");
// Parse the thing to show (DATABASES, TABLES, SCHEMAS, etc.)
// This can be multiple words like "PRIMARY KEYS" or "IMPORTED KEYS"
let mut this_parts = Vec::new();
let mut target: Option<Expression> = None;
let mut mutex: Option<bool> = None;
// Consume identifier tokens until we hit a keyword like LIKE, IN, FROM, LIMIT, HISTORY
// Special handling for SingleStore SHOW variations
while !self.is_at_end() {
let current = self.peek();
// Stop at keywords that start clauses
if matches!(
current.token_type,
TokenType::Like
| TokenType::In
| TokenType::From
| TokenType::Limit
| TokenType::Semicolon
| TokenType::Eof
| TokenType::Where
| TokenType::For
| TokenType::Offset
| TokenType::Settings
) {
// ClickHouse: SHOW CREATE SETTINGS PROFILE - don't stop at SETTINGS
if current.token_type == TokenType::Settings
&& matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
)
&& this_parts.join(" ") == "CREATE"
{
// Fall through to process SETTINGS as part of the type name
} else {
break;
}
}
// Handle comma-separated profile types (e.g., SHOW PROFILE BLOCK IO, PAGE FAULTS)
// Append comma to the last part to preserve spacing
if current.token_type == TokenType::Comma {
if !this_parts.is_empty() {
let last = this_parts.pop().unwrap();
this_parts.push(format!("{},", last));
}
self.skip();
continue;
}
// Stop at HISTORY keyword (but not as the first word)
if !this_parts.is_empty() && current.text.eq_ignore_ascii_case("HISTORY") {
break;
}
// Stop at STARTS keyword
if current.text.eq_ignore_ascii_case("STARTS") {
break;
}
// SingleStore: SHOW PLAN <id> - handle number directly (before Var/keyword check)
// This is needed because numbers don't pass the Var/keyword check
let joined_check = this_parts.join(" ");
if joined_check == "PLAN" && current.token_type == TokenType::Number {
let id = self.advance().text;
target = Some(Expression::Literal(Box::new(Literal::Number(id))));
break;
}
// Accept identifiers and keywords as part of the object type
if current.token_type == TokenType::Var || current.token_type.is_keyword() {
let joined = this_parts.join(" ");
// SingleStore: SHOW CREATE <type> <name> - preserve case for name
// Types: AGGREGATE, PIPELINE, PROJECTION
if matches!(
joined.as_str(),
"CREATE AGGREGATE" | "CREATE PIPELINE" | "CREATE PROJECTION"
) {
let name = self.advance().text;
target = Some(Expression::Identifier(Identifier::new(name)));
break;
}
// SingleStore: SHOW <type> ON <name> - preserve case for name after ON
// Check if current token is "ON" (but not at start)
if current.text.eq_ignore_ascii_case("ON") && !this_parts.is_empty() {
this_parts.push("ON".to_string());
self.skip();
// Parse the name after ON, preserving case
if !self.is_at_end() {
let next = self.peek();
// Handle "ON TABLE name" pattern
if next.text.eq_ignore_ascii_case("TABLE") {
this_parts.push("TABLE".to_string());
self.skip();
}
// Parse the actual name
if !self.is_at_end() {
let name_tok = self.peek();
if name_tok.token_type == TokenType::Var
|| name_tok.token_type.is_keyword()
{
let name = self.advance().text;
target = Some(Expression::Identifier(Identifier::new(name)));
}
}
}
break;
}
// SingleStore: SHOW REPRODUCTION INTO OUTFILE 'filename'
if current.text.eq_ignore_ascii_case("INTO") && joined == "REPRODUCTION" {
this_parts.push("INTO".to_string());
self.skip();
if !self.is_at_end() && self.peek().text.eq_ignore_ascii_case("OUTFILE") {
this_parts.push("OUTFILE".to_string());
self.skip();
// Parse the filename
if !self.is_at_end() && self.check(TokenType::String) {
let filename = self.advance().text;
target = Some(Expression::Literal(Box::new(Literal::String(filename))));
}
}
break;
}
// SingleStore: SHOW PLAN [JSON] <id> - capture the numeric ID
if joined == "PLAN" {
// Check if current is "JSON" - if so, push it and check for number
if current.text.eq_ignore_ascii_case("JSON") {
this_parts.push("JSON".to_string());
self.skip();
// Now check for number
if !self.is_at_end() && self.check(TokenType::Number) {
let id = self.advance().text;
target = Some(Expression::Literal(Box::new(Literal::Number(id))));
}
break;
}
// Check if current is a number (plan ID)
if current.token_type == TokenType::Number {
let id = self.advance().text;
target = Some(Expression::Literal(Box::new(Literal::Number(id))));
break;
}
}
this_parts.push(current.text.to_ascii_uppercase());
self.skip();
// ClickHouse: SHOW CREATE TABLE/VIEW/DICTIONARY <qualified_name>
// After detecting CREATE TABLE/VIEW/DICTIONARY, parse the next as a table ref
let joined = this_parts.join(" ");
if matches!(
joined.as_str(),
"CREATE TABLE"
| "CREATE VIEW"
| "CREATE DICTIONARY"
| "CREATE DATABASE"
| "CREATE MATERIALIZED VIEW"
| "CREATE LIVE VIEW"
) {
if !self.is_at_end()
&& (self.check(TokenType::Var)
|| self.check(TokenType::QuotedIdentifier)
|| self.is_safe_keyword_as_identifier())
{
let table = self.parse_table_ref()?;
target = Some(Expression::Table(Box::new(table)));
}
break;
}
// ClickHouse: SHOW CREATE ROLE/PROFILE/QUOTA/ROW POLICY/POLICY with multi-name or ON clause
// These have complex syntax (comma-separated names, ON db.table) - consume as raw text
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && (matches!(
joined.as_str(),
"CREATE ROLE"
| "CREATE QUOTA"
| "CREATE SETTINGS PROFILE"
| "CREATE PROFILE"
| "CREATE ROW POLICY"
| "CREATE POLICY"
| "CREATE USER"
) || matches!(
joined.as_str(),
"SHOW CREATE ROLE"
| "SHOW CREATE QUOTA"
| "SHOW CREATE SETTINGS PROFILE"
| "SHOW CREATE PROFILE"
| "SHOW CREATE ROW POLICY"
| "SHOW CREATE POLICY"
| "SHOW CREATE USER"
)) {
let mut parts = Vec::new();
while !self.is_at_end() && self.peek().token_type != TokenType::Semicolon {
parts.push(self.advance().text.clone());
}
target = Some(Expression::Identifier(Identifier::new(parts.join(" "))));
break;
}
// ClickHouse: SHOW CREATE <qualified_name> (without TABLE/VIEW keyword)
// e.g., SHOW CREATE INFORMATION_SCHEMA.COLUMNS
if joined == "CREATE"
&& matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
)
&& !self.is_at_end()
&& (self.check(TokenType::Var) || self.check(TokenType::QuotedIdentifier))
&& !matches!(
self.peek().text.to_ascii_uppercase().as_str(),
"TABLE"
| "VIEW"
| "DICTIONARY"
| "DATABASE"
| "MATERIALIZED"
| "LIVE"
| "TEMPORARY"
| "ROLE"
| "QUOTA"
| "POLICY"
| "PROFILE"
| "USER"
| "ROW"
| "SETTINGS"
)
{
let table = self.parse_table_ref()?;
target = Some(Expression::Table(Box::new(table)));
break;
}
// Special handling for ENGINE: the next token is the engine name (case-preserved)
// followed by STATUS or MUTEX
if joined == "ENGINE" {
// Parse engine name (case-preserved)
if !self.is_at_end() {
let engine_tok = self.peek();
if engine_tok.token_type == TokenType::Var
|| engine_tok.token_type.is_keyword()
{
let engine_name = self.advance().text;
target = Some(Expression::Identifier(Identifier::new(engine_name)));
// Parse STATUS or MUTEX
if !self.is_at_end() {
let next = self.peek();
let next_upper = next.text.to_ascii_uppercase();
if next_upper == "STATUS" {
self.skip();
mutex = Some(false);
} else if next_upper == "MUTEX" {
self.skip();
mutex = Some(true);
}
}
}
}
break;
}
} else {
break;
}
}
let this = this_parts.join(" ");
// Check for HISTORY
let history = self.match_identifier("HISTORY");
// Check for FOR target (MySQL: SHOW GRANTS FOR foo, SHOW PROFILE ... FOR QUERY 5)
// SingleStore: SHOW GROUPS FOR ROLE 'role_name', SHOW GROUPS FOR USER 'username'
let for_target = if self.match_token(TokenType::For) {
// Parse the target (can be multi-word like QUERY 5, or ROLE 'name')
let mut parts = Vec::new();
while !self.is_at_end() {
let tok = self.peek();
if matches!(
tok.token_type,
TokenType::Like
| TokenType::In
| TokenType::From
| TokenType::Limit
| TokenType::Semicolon
| TokenType::Eof
| TokenType::Where
) {
break;
}
if tok.token_type == TokenType::Var
|| tok.token_type.is_keyword()
|| tok.token_type == TokenType::Number
{
parts.push(self.advance().text);
} else if tok.token_type == TokenType::String {
// Handle string literals (e.g., SHOW GROUPS FOR ROLE 'role_name')
let text = self.advance().text;
parts.push(format!("'{}'", text));
} else {
break;
}
}
if parts.is_empty() {
None
} else {
Some(Expression::Identifier(Identifier::new(parts.join(" "))))
}
} else {
None
};
// Check for LIKE pattern
let like = if self.match_token(TokenType::Like) {
Some(self.parse_primary()?)
} else {
None
};
// Check for IN scope
let (scope_kind, scope) = if self.match_token(TokenType::In) {
// Parse scope kind and optionally scope object
// Check for keywords: ACCOUNT, DATABASE, SCHEMA, TABLE, CLASS, APPLICATION
let (kind, scope_obj) = if self.match_keyword("ACCOUNT") {
(Some("ACCOUNT".to_string()), None)
} else if self.match_token(TokenType::Database) {
// IN DATABASE [name]
let scope_obj = if !self.is_at_end()
&& !self.check(TokenType::Like)
&& !self.check(TokenType::Limit)
&& !self.check(TokenType::Semicolon)
&& !self.check_keyword_text("STARTS")
{
let table = self.parse_table_ref()?;
Some(Expression::Table(Box::new(table)))
} else {
None
};
(Some("DATABASE".to_string()), scope_obj)
} else if self.match_token(TokenType::Schema) {
// IN SCHEMA [name]
let scope_obj = if !self.is_at_end()
&& !self.check(TokenType::Like)
&& !self.check(TokenType::Limit)
&& !self.check(TokenType::Semicolon)
&& !self.check_keyword_text("STARTS")
{
let table = self.parse_table_ref()?;
Some(Expression::Table(Box::new(table)))
} else {
None
};
(Some("SCHEMA".to_string()), scope_obj)
} else if self.match_token(TokenType::Table) {
// IN TABLE [name]
let scope_obj = if !self.is_at_end()
&& !self.check(TokenType::Like)
&& !self.check(TokenType::Limit)
&& !self.check(TokenType::Semicolon)
&& !self.check_keyword_text("STARTS")
{
let table = self.parse_table_ref()?;
Some(Expression::Table(Box::new(table)))
} else {
None
};
(Some("TABLE".to_string()), scope_obj)
} else if self.match_token(TokenType::View) {
// IN VIEW [name]
let scope_obj = if !self.is_at_end()
&& !self.check(TokenType::Like)
&& !self.check(TokenType::Limit)
&& !self.check(TokenType::Semicolon)
&& !self.check_keyword_text("STARTS")
{
let table = self.parse_table_ref()?;
Some(Expression::Table(Box::new(table)))
} else {
None
};
(Some("VIEW".to_string()), scope_obj)
} else if self.match_keyword("CLASS") {
// IN CLASS name
let scope_obj = if !self.is_at_end() {
let table = self.parse_table_ref()?;
Some(Expression::Table(Box::new(table)))
} else {
None
};
(Some("CLASS".to_string()), scope_obj)
} else if self.match_keyword("APPLICATION") {
// IN APPLICATION [PACKAGE] name
let kind = if self.match_keyword("PACKAGE") {
"APPLICATION PACKAGE".to_string()
} else {
"APPLICATION".to_string()
};
let scope_obj = if !self.is_at_end() {
let table = self.parse_table_ref()?;
Some(Expression::Table(Box::new(table)))
} else {
None
};
(Some(kind), scope_obj)
} else {
// Default - infer scope_kind based on what we're showing
// Python SQLGlot: SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"}
let table = self.parse_table_ref()?;
let inferred_kind = match this.as_str() {
// SHOW SCHEMAS/DATABASES IN x — no scope_kind needed, x is a catalog/database
"SCHEMAS" | "DATABASES" => None,
"OBJECTS" | "TABLES" | "VIEWS" | "SEQUENCES" | "UNIQUE KEYS"
| "IMPORTED KEYS" => Some("SCHEMA"),
"PRIMARY KEYS" => Some("TABLE"),
_ => Some("SCHEMA"), // Default to SCHEMA for unknown types
};
(
inferred_kind.map(|s| s.to_string()),
Some(Expression::Table(Box::new(table))),
)
};
(kind, scope_obj)
} else {
(None, None)
};
// Check for STARTS WITH
let starts_with = if self.match_keyword("STARTS") {
self.match_token(TokenType::With); // WITH is a keyword token
Some(self.parse_primary()?)
} else {
None
};
// Check for LIMIT
let limit = if self.match_token(TokenType::Limit) {
Some(Box::new(Limit {
this: self.parse_expression()?,
percent: false,
comments: Vec::new(),
}))
} else {
None
};
// Check for FROM (can be a string literal or identifier)
// For MySQL SHOW COLUMNS/INDEX, the first FROM is the target table,
// and the second FROM is the database
let mut from = if self.match_token(TokenType::From) {
Some(self.parse_primary()?)
} else {
None
};
// Check for second FROM clause (MySQL: SHOW COLUMNS FROM tbl FROM db, SHOW INDEX FROM foo FROM bar)
let mut db = if from.is_some() && self.match_token(TokenType::From) {
Some(self.parse_primary()?)
} else {
None
};
// Normalize MySQL SHOW INDEX/COLUMNS FROM db.tbl -> FROM tbl FROM db.
if matches!(this.as_str(), "INDEX" | "COLUMNS") && db.is_none() {
if let Some(from_expr) = from.take() {
match from_expr {
Expression::Table(mut t) => {
if let Some(db_ident) = t.schema.take().or(t.catalog.take()) {
db = Some(Expression::Identifier(db_ident));
from = Some(Expression::Identifier(t.name));
} else {
from = Some(Expression::Table(t));
}
}
Expression::Column(c) => {
if let Some(table_ident) = c.table {
db = Some(Expression::Identifier(table_ident));
from = Some(Expression::Identifier(c.name));
} else {
from = Some(Expression::Column(c));
}
}
Expression::Identifier(id) => {
if let Some((db_name, table_name)) = id.name.split_once('.') {
db = Some(Expression::Identifier(Identifier::new(db_name)));
from = Some(Expression::Identifier(Identifier {
name: table_name.to_string(),
quoted: id.quoted,
trailing_comments: id.trailing_comments,
span: None,
}));
} else {
from = Some(Expression::Identifier(id));
}
}
other => {
from = Some(other);
}
}
}
}
// MySQL: SHOW TABLES FROM db LIKE 'pattern' (LIKE can come after FROM)
let like = if like.is_none() && self.match_token(TokenType::Like) {
Some(self.parse_primary()?)
} else {
like
};
// ClickHouse: SHOW ... NOT LIKE 'pattern' / NOT ILIKE 'pattern'
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::Not)
{
if self.current + 1 < self.tokens.len()
&& matches!(
self.tokens[self.current + 1].token_type,
TokenType::Like | TokenType::ILike
)
{
self.skip(); // consume NOT
self.skip(); // consume LIKE/ILIKE
let _ = self.parse_primary()?; // consume pattern
}
}
// ClickHouse: SHOW ... ILIKE 'pattern'
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.match_token(TokenType::ILike)
{
let _ = self.parse_primary()?; // consume pattern
}
// Check for WHERE clause (MySQL: SHOW STATUS WHERE condition)
let where_clause = if self.match_token(TokenType::Where) {
Some(self.parse_expression()?)
} else {
None
};
// Check for WITH PRIVILEGES clause (Snowflake: SHOW ... WITH PRIVILEGES USAGE, MODIFY)
let privileges = if self.match_token(TokenType::With) && self.match_keyword("PRIVILEGES") {
// Parse comma-separated list of privilege names (no parentheses)
let mut privs = Vec::new();
loop {
if self.is_at_end() || self.check(TokenType::Semicolon) {
break;
}
let tok = self.peek();
if tok.token_type == TokenType::Var || tok.token_type.is_keyword() {
privs.push(self.advance().text.to_ascii_uppercase());
// Check for comma to continue
if !self.match_token(TokenType::Comma) {
break;
}
} else {
break;
}
}
privs
} else {
Vec::new()
};
// ClickHouse: SHOW ... SETTINGS key=val, key=val
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
self.parse_clickhouse_settings_clause()?;
}
Ok(Expression::Show(Box::new(Show {
this,
terse,
history,
like,
scope_kind,
scope,
starts_with,
limit,
from,
where_clause,
for_target,
db,
target,
mutex,
privileges,
})))
}
/// Parse COPY statement (Snowflake, PostgreSQL)
/// COPY INTO <table> FROM <source> [(<parameters>)]
/// COPY INTO <location> FROM <table> [(<parameters>)]
fn parse_copy(&mut self) -> Result<Expression> {
self.expect(TokenType::Copy)?;
// Check for INTO (Snowflake/TSQL style: COPY INTO)
let is_into = self.match_token(TokenType::Into);
// Parse target table or location (possibly with column list)
let this = if self.check(TokenType::LParen) {
// Subquery: COPY (SELECT ...) TO ...
self.parse_primary()?
} else if self.check(TokenType::DAt)
|| self.check(TokenType::String)
|| self.is_stage_reference()
{
// Stage or file destination (for exports): COPY INTO @stage or COPY INTO 's3://...'
self.parse_file_location()?
} else {
// Table reference, possibly with column list: COPY table (col1, col2)
let table = self.parse_table_ref()?;
// Check for column list
if self.check(TokenType::LParen) {
// Peek ahead to see if this is a column list or a subquery
// Column list won't start with SELECT
let has_column_list = {
let start = self.current;
self.skip(); // consume (
let is_select = self.check(TokenType::Select);
self.current = start; // backtrack
!is_select
};
if has_column_list {
self.skip(); // consume (
let mut columns = Vec::new();
loop {
let col_name = self.expect_identifier_or_keyword()?;
columns.push(col_name);
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
// Create a schema expression with the table and columns
Expression::Schema(Box::new(Schema {
this: Some(Box::new(Expression::Table(Box::new(table)))),
expressions: columns
.into_iter()
.map(|c| {
Expression::boxed_column(Column {
name: Identifier::new(c),
table: None,
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
})
})
.collect(),
}))
} else {
Expression::Table(Box::new(table))
}
} else {
Expression::Table(Box::new(table))
}
};
// Determine direction: FROM means loading into table, TO means exporting
let kind = self.match_token(TokenType::From);
let has_to = if !kind {
// Try TO keyword for export (TO is a keyword token, not an identifier)
self.match_token(TokenType::To)
} else {
false
};
// Parse source/destination files or stage only if FROM/TO was found
// and we're not at a parameter (which would start with identifier = ...)
let mut files = Vec::new();
if kind
|| has_to
|| self.check(TokenType::String)
|| self.is_stage_reference()
|| self.check(TokenType::LParen)
|| self.check(TokenType::Parameter)
{
// Handle ? placeholder as source (Snowflake Python connector bind params)
if self.check(TokenType::Parameter) {
self.skip(); // consume ?
files.push(Expression::Placeholder(Placeholder { index: None }));
}
// Check for subquery: FROM (SELECT ...)
else if self.check(TokenType::LParen) {
// Peek ahead to see if this is a subquery
let start = self.current;
self.skip(); // consume (
let is_select = self.check(TokenType::Select);
self.current = start; // backtrack
if is_select {
// Parse the subquery
let subquery = self.parse_primary()?;
files.push(subquery);
}
}
// Parse file location(s) until we hit a parameter or end
while !self.is_at_end() && !self.check(TokenType::Semicolon) && files.is_empty()
|| (self.check(TokenType::Comma) && !files.is_empty())
{
// Consume comma if present (for multiple files)
if !files.is_empty() && !self.match_token(TokenType::Comma) {
break;
}
// Check if this looks like a parameter (identifier followed by =)
// But stage references (@stage) are not parameters
if (self.check(TokenType::Var) || self.check_keyword())
&& !self.is_stage_reference()
{
let lookahead = self.current + 1;
if lookahead < self.tokens.len()
&& self.tokens[lookahead].token_type == TokenType::Eq
{
break; // This is a parameter, stop parsing files
}
}
// Check for WITH keyword - stop parsing files
if self.check(TokenType::With) {
break;
}
// Stop if we don't see a file location start
// Include QuotedIdentifier for Databricks backtick-quoted paths like `s3://link`
if !self.check(TokenType::String)
&& !self.is_stage_reference()
&& !self.check(TokenType::Var)
&& !self.check_keyword()
&& !self.check(TokenType::QuotedIdentifier)
&& !self.check(TokenType::Parameter)
{
break;
}
// Handle ? placeholder in file location list
if self.check(TokenType::Parameter) {
self.skip();
files.push(Expression::Placeholder(Placeholder { index: None }));
continue;
}
// For COPY INTO ... FROM table_name, handle dotted table references
// If the next token is a Var/Identifier and the one after is a Dot, parse as table reference
if (self.check(TokenType::Var) || self.is_identifier_token())
&& !self.is_stage_reference()
{
let lookahead = self.current + 1;
let has_dot = lookahead < self.tokens.len()
&& self.tokens[lookahead].token_type == TokenType::Dot;
if has_dot {
let table = self.parse_table_ref()?;
files.push(Expression::Table(Box::new(table)));
continue;
}
}
let location = self.parse_file_location()?;
files.push(location);
}
}
// Parse credentials and parameters
let mut params = Vec::new();
let mut credentials = None;
let mut with_wrapped = false;
// Parse Snowflake-style parameters: KEY = VALUE or KEY = (nested values)
// or DuckDB/PostgreSQL WITH (KEY VALUE, ...) format
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
// Match WITH keyword if present (some dialects use WITH before params)
let had_with = self.match_token(TokenType::With);
// Check for wrapped parameters in parentheses
if self.match_token(TokenType::LParen) {
if had_with {
with_wrapped = true;
}
while !self.check(TokenType::RParen) && !self.is_at_end() {
let param = self.parse_copy_parameter()?;
params.push(param);
// Consume optional comma between params
self.match_token(TokenType::Comma);
}
self.expect(TokenType::RParen)?;
break;
}
// Parse individual parameter: NAME = value
if self.check(TokenType::Var) || self.check_keyword() {
let param = self.parse_copy_parameter()?;
// Handle special CREDENTIALS parameter (case-insensitive)
if param.name.eq_ignore_ascii_case("CREDENTIALS") {
// For Redshift-style CREDENTIALS 'string' (single string value)
// vs Snowflake-style CREDENTIALS = (KEY='value', KEY2='value')
if let Some(Expression::Literal(lit)) = ¶m.value {
if let Literal::String(s) = lit.as_ref() {
// Redshift style: store as a simple credentials string
let creds = Credentials {
credentials: vec![("".to_string(), s.clone())],
storage: None,
encryption: None,
};
credentials = Some(Box::new(creds));
}
} else {
// Snowflake style: key=value pairs
let creds = Credentials {
credentials: param
.values
.iter()
.filter_map(|v| {
if let Expression::Eq(eq) = v {
let key = if let Expression::Column(c) = &eq.left {
c.name.name.clone()
} else {
return None;
};
let val = if let Expression::Literal(lit) = &eq.right {
if let Literal::String(s) = lit.as_ref() {
s.clone()
} else {
String::new()
}
} else {
return None;
};
Some((key, val))
} else {
None
}
})
.collect(),
storage: None,
encryption: None,
};
credentials = Some(Box::new(creds));
}
} else if param.name.eq_ignore_ascii_case("STORAGE_INTEGRATION") {
// Store STORAGE_INTEGRATION as a regular parameter only
// Don't use the credentials.storage field for this
params.push(param);
} else {
params.push(param);
}
} else {
break;
}
}
Ok(Expression::Copy(Box::new(CopyStmt {
this,
kind,
files,
params,
credentials,
is_into,
with_wrapped,
})))
}
/// Parse a single COPY parameter: NAME = value, NAME = (nested values), or NAME value (no =)
fn parse_copy_parameter(&mut self) -> Result<CopyParameter> {
// Preserve original case for parameter name (important for Redshift COPY options)
let name = self.expect_identifier_or_keyword()?;
let mut value = None;
let mut values = Vec::new();
let has_eq = self.match_token(TokenType::Eq);
if has_eq {
if self.match_token(TokenType::LParen) {
// Nested parameter list: KEY = (nested_key=value, ...) or KEY = (value1, value2)
// Check if this is a list of simple values (like strings) or key=value pairs
// If the first token is a string/number, it's a list of values
if self.check(TokenType::String) || self.check(TokenType::Number) {
// Simple value list: FILES = ('test1.csv', 'test2.csv')
while !self.check(TokenType::RParen) && !self.is_at_end() {
values.push(self.parse_primary()?);
if !self.match_token(TokenType::Comma) {
break;
}
}
} else {
// Key=value pairs: CREDENTIALS = (AWS_KEY_ID='id' AWS_SECRET_KEY='key')
while !self.check(TokenType::RParen) && !self.is_at_end() {
// Parse nested key=value pairs
let nested_key = self.expect_identifier_or_keyword()?.to_ascii_uppercase();
if self.match_token(TokenType::Eq) {
let nested_value = self.parse_copy_param_value()?;
// Create an Eq expression for the nested key=value
values.push(Expression::Eq(Box::new(BinaryOp {
left: Expression::boxed_column(Column {
name: Identifier::new(nested_key),
table: None,
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
}),
right: nested_value,
left_comments: Vec::new(),
operator_comments: Vec::new(),
trailing_comments: Vec::new(),
inferred_type: None,
})));
} else {
// Just a keyword/value without =
values.push(Expression::boxed_column(Column {
name: Identifier::new(nested_key),
table: None,
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
}));
}
// Consume optional comma between nested values
self.match_token(TokenType::Comma);
}
}
self.expect(TokenType::RParen)?;
} else {
// Simple value: KEY = value
value = Some(self.parse_copy_param_value()?);
}
} else {
// No = sign: DuckDB/PostgreSQL format (KEY value or KEY (col1, col2))
// Check if followed by a value: string, number, boolean, identifier, or tuple
if self.check(TokenType::LParen) {
// Check if this is a COPY_INTO_VARLEN_OPTIONS parameter
// These are Databricks/Snowflake options that contain key='value' pairs without = before (
let is_varlen_option = matches!(
name.as_str(),
"FORMAT_OPTIONS" | "COPY_OPTIONS" | "FILE_FORMAT" | "CREDENTIAL"
);
self.skip(); // consume (
if is_varlen_option {
// Parse as key='value' pairs: FORMAT_OPTIONS ('opt1'='true', 'opt2'='test')
while !self.check(TokenType::RParen) && !self.is_at_end() {
if self.check(TokenType::String) {
// Parse 'key'='value' pair
let key_token = self.advance();
let key = key_token.text.clone();
if self.match_token(TokenType::Eq) {
let val = self.parse_copy_param_value()?;
values.push(Expression::Eq(Box::new(BinaryOp {
left: Expression::Literal(Box::new(Literal::String(key))),
right: val,
left_comments: Vec::new(),
operator_comments: Vec::new(),
trailing_comments: Vec::new(),
inferred_type: None,
})));
} else {
// Just a string without =
values.push(Expression::Literal(Box::new(Literal::String(key))));
}
} else if self.check(TokenType::Var)
|| self.check_keyword()
|| self.is_identifier_token()
{
// Parse identifier='value' pair (unquoted key)
let key = self.advance().text.clone();
if self.match_token(TokenType::Eq) {
let val = self.parse_copy_param_value()?;
values.push(Expression::Eq(Box::new(BinaryOp {
left: Expression::boxed_column(Column {
name: Identifier::new(key),
table: None,
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
}),
right: val,
left_comments: Vec::new(),
operator_comments: Vec::new(),
trailing_comments: Vec::new(),
inferred_type: None,
})));
} else {
// Just an identifier without =
values.push(Expression::boxed_column(Column {
name: Identifier::new(key),
table: None,
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
}));
}
} else {
break;
}
self.match_token(TokenType::Comma);
}
} else {
// Tuple value: FORCE_NOT_NULL (col1, col2)
let mut items = Vec::new();
while !self.check(TokenType::RParen) && !self.is_at_end() {
items.push(self.parse_primary()?);
if !self.match_token(TokenType::Comma) {
break;
}
}
value = Some(Expression::Tuple(Box::new(Tuple { expressions: items })));
}
self.expect(TokenType::RParen)?;
} else if self.check(TokenType::LBrace) {
// Map literal: KV_METADATA {'key': 'value', ...}
value = Some(self.parse_primary()?);
} else if self.check(TokenType::String) || self.check(TokenType::Number) {
// String or number value
value = Some(self.parse_copy_param_value()?);
} else if self.check(TokenType::True) || self.check(TokenType::False) {
// Boolean value (TRUE/FALSE are keyword tokens)
value = Some(self.parse_copy_param_value()?);
} else if !self.check(TokenType::Comma)
&& !self.check(TokenType::RParen)
&& !self.is_at_end()
&& !self.check(TokenType::Semicolon)
{
// Identifier value: FORMAT JSON, HEADER MATCH, etc.
// But skip if this is a known flag-only parameter (Redshift COPY options that take no value)
let name_upper = name.to_ascii_uppercase();
let is_flag_param = matches!(
name_upper.as_str(),
"EMPTYASNULL"
| "BLANKSASNULL"
| "ACCEPTINVCHARS"
| "COMPUPDATE"
| "STATUPDATE"
| "NOLOAD"
| "ESCAPE"
| "REMOVEQUOTES"
| "EXPLICIT_IDS"
| "FILLRECORD"
| "TRIMBLANKS"
| "TRUNCATECOLUMNS"
| "ROUNDEC"
| "IGNOREHEADER"
| "IGNOREBLANKLINES"
| "ACCEPTANYDATE"
);
if !is_flag_param && (self.check(TokenType::Var) || self.check_keyword()) {
value = Some(self.parse_copy_param_value()?);
}
}
// If nothing matched, it's a bare flag parameter with no value (allowed)
}
Ok(CopyParameter {
name,
value,
values,
eq: has_eq,
})
}
/// Parse a value for COPY parameters (handles strings, identifiers, numbers, lists)
fn parse_copy_param_value(&mut self) -> Result<Expression> {
// Handle lists like ('file1', 'file2')
if self.match_token(TokenType::LParen) {
let mut items = Vec::new();
while !self.check(TokenType::RParen) && !self.is_at_end() {
items.push(self.parse_primary()?);
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
return Ok(Expression::Tuple(Box::new(Tuple { expressions: items })));
}
// Handle string, number, boolean, identifier
if self.check(TokenType::String) {
let token = self.advance();
return Ok(Expression::Literal(Box::new(Literal::String(
token.text.clone(),
))));
}
// Handle quoted identifier (e.g., STORAGE_INTEGRATION = "storage")
if self.check(TokenType::QuotedIdentifier) {
let token = self.advance();
return Ok(Expression::boxed_column(Column {
name: Identifier::quoted(token.text.clone()),
table: None,
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
}));
}
if self.check(TokenType::Number) {
let token = self.advance();
return Ok(Expression::Literal(Box::new(Literal::Number(
token.text.clone(),
))));
}
if self.match_token(TokenType::True) {
return Ok(Expression::Boolean(BooleanLiteral { value: true }));
}
if self.match_token(TokenType::False) {
return Ok(Expression::Boolean(BooleanLiteral { value: false }));
}
// Identifier (e.g., FORMAT_NAME=my_format)
if self.check(TokenType::Var) || self.check_keyword() {
// Could be a qualified name like MY_DATABASE.MY_SCHEMA.MY_FORMAT
let first = self.advance().text.clone();
if self.match_token(TokenType::Dot) {
let second = self.expect_identifier_or_keyword()?;
if self.match_token(TokenType::Dot) {
let third = self.expect_identifier_or_keyword()?;
return Ok(Expression::boxed_column(Column {
name: Identifier::new(format!("{}.{}.{}", first, second, third)),
table: None,
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
}));
}
return Ok(Expression::boxed_column(Column {
name: Identifier::new(format!("{}.{}", first, second)),
table: None,
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
}));
}
return Ok(Expression::boxed_column(Column {
name: Identifier::new(first),
table: None,
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
}));
}
// Placeholder ? (used by Snowflake Python connector for bind parameters)
if self.match_token(TokenType::Parameter) {
return Ok(Expression::Placeholder(Placeholder { index: None }));
}
Err(self.parse_error("Expected value for COPY parameter"))
}
/// Parse Snowflake stage reference when tokenized as String (e.g., '@mystage', '@external/location')
/// Handles: '@mystage', '@external/location'
fn parse_stage_reference_from_string(&mut self) -> Result<Expression> {
use crate::expressions::StageReference;
// The String token contains @ and the entire path
let string_token = self.advance();
let full_path = string_token.text.clone();
// Split on / to get stage name and path
let parts: Vec<&str> = full_path.splitn(2, '/').collect();
let name = parts[0].to_string();
let path = if parts.len() > 1 {
Some(format!("/{}", parts[1]))
} else {
None
};
// Handle optional parameters: (FILE_FORMAT => 'fmt', PATTERN => '*.csv')
let (file_format, pattern) = if self.match_token(TokenType::LParen) {
let mut ff = None;
let mut pat = None;
loop {
if self.match_identifier("FILE_FORMAT") {
self.expect(TokenType::FArrow)?; // =>
ff = Some(self.parse_primary()?);
} else if self.match_identifier("PATTERN") || self.match_token(TokenType::Pattern) {
// PATTERN can be tokenized as keyword or identifier
self.expect(TokenType::FArrow)?; // =>
if let Expression::Literal(lit) = self.parse_primary()? {
if let Literal::String(s) = lit.as_ref() {
pat = Some(s.clone());
}
}
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
(ff, pat)
} else {
(None, None)
};
Ok(Expression::StageReference(Box::new(StageReference {
name,
path,
file_format,
pattern,
quoted: true, // Stage reference came from a quoted string
})))
}
/// Parse Snowflake stage reference when tokenized as Var (e.g., @mystage becomes Var token)
/// Handles: @mystage, @mystage/path/to/file.csv
fn parse_stage_reference_from_var(&mut self) -> Result<Expression> {
use crate::expressions::StageReference;
// The Var token already contains @ and the stage name
let var_token = self.advance();
let mut name = var_token.text.clone();
// Handle qualified names: @namespace.stage
while self.match_token(TokenType::Dot) {
name.push('.');
if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
name.push_str(&self.advance().text);
} else if self.check(TokenType::Percent) {
// Handle table stage in qualified path: @namespace.%table_name
self.skip();
name.push('%');
if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
name.push_str(&self.advance().text);
}
} else {
break;
}
}
// Handle path after stage: @stage/path/to/file.csv
let path = if self.match_token(TokenType::Slash) {
let mut path_str = String::from("/");
// Consume path components until we hit whitespace/paren/etc.
while !self.is_at_end() {
if self.check(TokenType::Identifier)
|| self.check(TokenType::Var)
|| self.check(TokenType::Number)
|| self.check(TokenType::Dot)
|| self.check(TokenType::Dash)
|| self.check(TokenType::Star)
|| self.check(TokenType::To)
|| self.is_safe_keyword_as_identifier()
{
path_str.push_str(&self.advance().text);
} else if self.match_token(TokenType::Slash) {
path_str.push('/');
} else {
break;
}
}
Some(path_str)
} else {
None
};
// Handle optional parameters: (FILE_FORMAT => 'fmt', PATTERN => '*.csv')
let (file_format, pattern) = if self.match_token(TokenType::LParen) {
let mut ff = None;
let mut pat = None;
loop {
if self.match_identifier("FILE_FORMAT") {
self.expect(TokenType::FArrow)?; // =>
ff = Some(self.parse_primary()?);
} else if self.match_identifier("PATTERN") || self.match_token(TokenType::Pattern) {
// PATTERN can be tokenized as keyword or identifier
self.expect(TokenType::FArrow)?; // =>
if let Expression::Literal(lit) = self.parse_primary()? {
if let Literal::String(s) = lit.as_ref() {
pat = Some(s.clone());
}
}
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
(ff, pat)
} else {
(None, None)
};
Ok(Expression::StageReference(Box::new(StageReference {
name,
path,
file_format,
pattern,
quoted: false,
})))
}
/// Parse Snowflake stage reference in FROM clause
/// Handles: @stage, @"stage", @namespace.stage, @stage/path/file.csv, @~, @%table
fn parse_stage_reference(&mut self) -> Result<Expression> {
use crate::expressions::StageReference;
self.expect(TokenType::DAt)?; // consume @
// Build the stage name - can include dots, slashes, etc.
let mut name = String::from("@");
// Handle special stage types:
// @~ = user stage
// @% = table stage (followed by table name)
if self.check(TokenType::Tilde) {
self.skip();
name.push('~');
} else if self.check(TokenType::Percent) {
self.skip();
name.push('%');
// Table name follows (can be qualified: schema.table)
loop {
if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
name.push_str(&self.advance().text);
} else {
break;
}
// Handle qualified table names: %db.schema.table
if self.match_token(TokenType::Dot) {
name.push('.');
} else {
break;
}
}
} else {
// Handle quoted or unquoted stage names
loop {
if self.check(TokenType::QuotedIdentifier) {
// Preserve quotes for quoted identifiers
let text = self.advance().text;
name.push('"');
name.push_str(&text);
name.push('"');
} else if self.check(TokenType::Percent) {
// Handle table stage in qualified path: @namespace.%table_name
self.skip();
name.push('%');
if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
name.push_str(&self.advance().text);
}
} else if self.check(TokenType::Identifier)
|| self.check(TokenType::Var)
|| self.is_safe_keyword_as_identifier()
{
name.push_str(&self.advance().text);
} else {
break;
}
// Handle dots for qualified names: @namespace.stage or @"schema"."stage"
if self.match_token(TokenType::Dot) {
name.push('.');
} else {
break;
}
}
}
// Handle path after stage: @stage/path/to/file.csv
let path = if self.match_token(TokenType::Slash) {
let mut path_str = String::from("/");
// Consume path components until we hit whitespace/paren/etc.
// Note: path can include keywords like 'to', 'data', etc.
while !self.is_at_end() {
if self.check(TokenType::Identifier)
|| self.check(TokenType::Var)
|| self.check(TokenType::Number)
|| self.check(TokenType::Dot)
|| self.check(TokenType::Dash)
|| self.check(TokenType::Star)
|| self.check(TokenType::To)
|| self.is_safe_keyword_as_identifier()
{
path_str.push_str(&self.advance().text);
} else if self.match_token(TokenType::Slash) {
path_str.push('/');
} else {
break;
}
}
Some(path_str)
} else {
None
};
// Handle optional parameters: (FILE_FORMAT => 'fmt', PATTERN => '*.csv')
let (file_format, pattern) = if self.match_token(TokenType::LParen) {
let mut ff = None;
let mut pat = None;
loop {
if self.match_identifier("FILE_FORMAT") {
self.expect(TokenType::FArrow)?; // =>
ff = Some(self.parse_primary()?);
} else if self.match_identifier("PATTERN") || self.match_token(TokenType::Pattern) {
// PATTERN can be tokenized as keyword or identifier
self.expect(TokenType::FArrow)?; // =>
if let Expression::Literal(lit) = self.parse_primary()? {
if let Literal::String(s) = lit.as_ref() {
pat = Some(s.clone());
}
}
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
(ff, pat)
} else {
(None, None)
};
Ok(Expression::StageReference(Box::new(StageReference {
name,
path,
file_format,
pattern,
quoted: false,
})))
}
/// Parse file location for COPY/PUT statements
/// Handles: @stage, @db.schema.stage, @stage/path, 's3://bucket/path', file:///path
fn parse_file_location(&mut self) -> Result<Expression> {
// Stage reference starting with @ (tokenized as DAt or as a Var starting with @)
if self.check(TokenType::DAt) {
self.skip(); // consume @
let mut stage_path = String::from("@");
// Handle table stage prefix: @%table
if self.check(TokenType::Percent) || self.check(TokenType::Mod) {
stage_path.push('%');
self.skip(); // consume %
}
// Handle user stage: @~
else if self.check(TokenType::Tilde) {
stage_path.push('~');
self.skip(); // consume ~
}
// Get stage name
if self.check(TokenType::Var) || self.check_keyword() || self.is_identifier_token() {
stage_path.push_str(&self.advance().text);
}
// Parse qualified name parts: .schema.stage
while self.check(TokenType::Dot) {
self.skip(); // consume .
stage_path.push('.');
if self.check(TokenType::Var) || self.check_keyword() || self.is_identifier_token()
{
stage_path.push_str(&self.advance().text);
}
}
// Parse path after stage: /path/to/file.csv
// Consume all connected path components (dots, dashes, numbers, etc.)
// matching the logic in parse_stage_reference.
if self.match_token(TokenType::Slash) {
stage_path.push('/');
while !self.is_at_end() {
if (self.check(TokenType::Var)
|| self.check(TokenType::Identifier)
|| self.check(TokenType::Number)
|| self.check(TokenType::Dot)
|| self.check(TokenType::Dash)
|| self.check(TokenType::Star)
|| self.check(TokenType::To)
|| self.is_safe_keyword_as_identifier())
&& !self.check_next(TokenType::Eq)
{
stage_path.push_str(&self.advance().text);
} else if self.match_token(TokenType::Slash) {
stage_path.push('/');
} else {
break;
}
}
}
return Ok(Expression::Literal(Box::new(Literal::String(stage_path))));
}
// Stage reference tokenized as a Var starting with @ (e.g., @random_stage)
// This happens when the tokenizer combines @ with the following identifier
if self.check(TokenType::Var) && self.peek().text.starts_with('@') {
let mut stage_path = self.advance().text.clone();
// Parse qualified name parts: .schema.stage
while self.check(TokenType::Dot) {
self.skip(); // consume .
stage_path.push('.');
if self.check(TokenType::Var) || self.check_keyword() || self.is_identifier_token()
{
stage_path.push_str(&self.advance().text);
}
}
// Parse path after stage: /path/to/file.csv
if self.match_token(TokenType::Slash) {
stage_path.push('/');
while !self.is_at_end() {
if (self.check(TokenType::Var)
|| self.check(TokenType::Identifier)
|| self.check(TokenType::Number)
|| self.check(TokenType::Dot)
|| self.check(TokenType::Dash)
|| self.check(TokenType::Star)
|| self.check(TokenType::To)
|| self.is_safe_keyword_as_identifier())
&& !self.check_next(TokenType::Eq)
{
stage_path.push_str(&self.advance().text);
} else if self.match_token(TokenType::Slash) {
stage_path.push('/');
} else {
break;
}
}
}
return Ok(Expression::Literal(Box::new(Literal::String(stage_path))));
}
// String literal (file path or URL)
if self.check(TokenType::String) {
let token = self.advance();
return Ok(Expression::Literal(Box::new(Literal::String(
token.text.clone(),
))));
}
// Backtick-quoted identifier (Databricks style: `s3://link`)
if self.check(TokenType::QuotedIdentifier) {
let token = self.advance();
return Ok(Expression::Identifier(Identifier::quoted(
token.text.clone(),
)));
}
// Identifier (could be a stage name without @)
if self.check(TokenType::Var) || self.check_keyword() {
let ident = self.advance().text.clone();
return Ok(Expression::boxed_column(Column {
name: Identifier::new(ident),
table: None,
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
}));
}
Err(self.parse_error("Expected file location"))
}
/// Parse Snowflake stage reference as a string for PUT/GET/COPY statements
/// Handles: @stage, @%table, @~, @db.schema.stage, @"quoted"."stage", @stage/path
/// Returns a Literal::String containing the stage path
fn parse_stage_reference_as_string(&mut self) -> Result<Expression> {
// Stage reference starting with @ (tokenized as DAt)
if self.check(TokenType::DAt) {
self.skip(); // consume @
let mut stage_path = String::from("@");
// Handle table stage prefix: @%table
if self.check(TokenType::Percent) || self.check(TokenType::Mod) {
stage_path.push('%');
self.skip(); // consume %
}
// Handle user stage: @~
else if self.check(TokenType::Tilde) {
stage_path.push('~');
self.skip(); // consume ~
// After @~, parse any path segments
while self.check(TokenType::Slash) {
self.skip(); // consume /
stage_path.push('/');
if (self.check(TokenType::Var)
|| self.check_keyword()
|| self.is_identifier_token())
&& !self.check_next(TokenType::Eq)
{
stage_path.push_str(&self.advance().text);
}
}
return Ok(Expression::Literal(Box::new(Literal::String(stage_path))));
}
// Get stage name (could be quoted identifier)
if self.check(TokenType::QuotedIdentifier) {
// Preserve quoted identifier with quotes
let text = &self.peek().text;
stage_path.push('"');
stage_path.push_str(text);
stage_path.push('"');
self.skip();
} else if self.check(TokenType::Var)
|| self.check_keyword()
|| self.check(TokenType::Identifier)
{
stage_path.push_str(&self.advance().text);
}
// Parse qualified name parts: .schema.stage (may include quoted identifiers)
while self.check(TokenType::Dot) {
self.skip(); // consume .
stage_path.push('.');
if self.check(TokenType::QuotedIdentifier) {
// Preserve quoted identifier with quotes
let text = &self.peek().text;
stage_path.push('"');
stage_path.push_str(text);
stage_path.push('"');
self.skip();
} else if self.check(TokenType::Var)
|| self.check_keyword()
|| self.check(TokenType::Identifier)
{
stage_path.push_str(&self.advance().text);
}
}
// Parse path segments: /path/to/file
while self.check(TokenType::Slash) {
self.skip(); // consume /
stage_path.push('/');
// Get path segment but don't consume if followed by = (that's a parameter)
if (self.check(TokenType::Var)
|| self.check_keyword()
|| self.is_identifier_token())
&& !self.check_next(TokenType::Eq)
{
stage_path.push_str(&self.advance().text);
}
}
return Ok(Expression::Literal(Box::new(Literal::String(stage_path))));
}
// Stage reference tokenized as a Var starting with @ (e.g., @s1)
if self.check(TokenType::Var) && self.peek().text.starts_with('@') {
let mut stage_path = self.advance().text.clone();
// Parse qualified name parts: .schema.stage (may include quoted identifiers)
while self.check(TokenType::Dot) {
self.skip(); // consume .
stage_path.push('.');
if self.check(TokenType::QuotedIdentifier) {
let text = &self.peek().text;
stage_path.push('"');
stage_path.push_str(text);
stage_path.push('"');
self.skip();
} else if self.check(TokenType::Var)
|| self.check_keyword()
|| self.check(TokenType::Identifier)
{
stage_path.push_str(&self.advance().text);
}
}
// Parse path segments: /path/to/file
while self.check(TokenType::Slash) {
self.skip(); // consume /
stage_path.push('/');
if (self.check(TokenType::Var)
|| self.check_keyword()
|| self.is_identifier_token())
&& !self.check_next(TokenType::Eq)
{
stage_path.push_str(&self.advance().text);
}
}
return Ok(Expression::Literal(Box::new(Literal::String(stage_path))));
}
Err(self.parse_error("Expected stage reference starting with @"))
}
/// Parse PUT statement (Snowflake)
/// PUT file://<path> @<stage> [AUTO_COMPRESS = TRUE|FALSE] ...
fn parse_put(&mut self) -> Result<Expression> {
self.expect(TokenType::Put)?;
// Parse source file path (usually file:///path/to/file)
let (source, source_quoted) = if self.check(TokenType::String) {
(self.advance().text.clone(), true)
} else {
// Handle file://path syntax (parsed as identifier + colon + etc.)
// Stop when we see @ (start of stage reference), ? (placeholder), or quoted string
let mut source_parts = Vec::new();
while !self.is_at_end() {
// Stop if we see @ (DAt token or Var starting with @)
if self.check(TokenType::DAt) {
break;
}
if self.check(TokenType::Var) && self.peek().text.starts_with('@') {
break;
}
// Stop at ? (placeholder for stage destination), quoted string
// (e.g., '@SYSTEM$BIND/...'), or semicolon
if self.check(TokenType::Parameter)
|| self.check(TokenType::String)
|| self.check(TokenType::Semicolon)
{
break;
}
let token = self.advance();
source_parts.push(token.text.clone());
}
(source_parts.join(""), false)
};
// Parse target stage (@stage_name, ? placeholder, or quoted '@stage')
let target = if self.match_token(TokenType::Parameter) {
Expression::Placeholder(Placeholder { index: None })
} else if self.check(TokenType::String) {
// Quoted stage: '@SYSTEM$BIND/path'
let tok = self.advance();
Expression::Literal(Box::new(Literal::String(tok.text.clone())))
} else {
self.parse_stage_reference_as_string()?
};
// Parse optional parameters
// Note: Some parameter names like OVERWRITE are keywords, so we check for those explicitly
// Preserve original casing for identity tests
let mut params = Vec::new();
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
let is_param_name = self.check(TokenType::Var)
|| self.check_keyword()
|| self.check(TokenType::Overwrite);
if is_param_name {
let name = self.advance().text.clone();
let value = if self.match_token(TokenType::Eq) {
Some(self.parse_primary()?)
} else {
None
};
params.push(CopyParameter {
name,
value,
values: Vec::new(),
eq: true,
});
} else {
break;
}
}
Ok(Expression::Put(Box::new(PutStmt {
source,
source_quoted,
target,
params,
})))
}
/// Helper to join command tokens with smart spacing
/// Preserves the structure of file paths, stage references, etc.
fn join_command_tokens(&self, tokens: Vec<(String, TokenType)>) -> String {
let mut result = String::new();
let mut prev_token_type: Option<TokenType> = None;
let mut prev_prev_token_type: Option<TokenType> = None;
for (i, (text, token_type)) in tokens.iter().enumerate() {
let needs_space = if result.is_empty() {
false
} else {
match (prev_token_type, *token_type) {
// No space after @ (stage references: @stage, @%, @~)
(Some(TokenType::DAt), _) => false,
// No space around dots (identifiers: a.b.c)
(Some(TokenType::Dot), _) => false,
(_, TokenType::Dot) => false,
// No space around parentheses
(Some(TokenType::LParen), _) => false,
(_, TokenType::LParen) => false,
(_, TokenType::RParen) => false,
// No space around square brackets (array access: arr[i])
(Some(TokenType::LBracket), _) => false,
(_, TokenType::LBracket) => false,
(_, TokenType::RBracket) => false,
// No space before ,
(_, TokenType::Comma) => false,
// No space around / (paths: @s1/test)
(Some(TokenType::Slash), _) => false,
(_, TokenType::Slash) => false,
// No space around : (file://path)
(Some(TokenType::Colon), _) => false,
(_, TokenType::Colon) => false,
// No space around % (table stage: @%table)
(Some(TokenType::Mod), _) => false,
(_, TokenType::Mod) => false,
(Some(TokenType::Percent), _) => false,
(_, TokenType::Percent) => false,
// Handle = contextually:
// - No space around = in simple KEY=VALUE patterns where value is terminal
// (PARALLEL=1, ENABLED=TRUE, FILE_FORMAT='csv')
// - Keep space for expressions like SET x = x + 1
(Some(TokenType::Var), TokenType::Eq) => {
// If the var starts with @ (parameter like @id = 123), always use spaces
if i >= 1 && tokens[i - 1].0.starts_with('@') {
true
} else if i + 1 < tokens.len() {
// Check what follows: Var=Number where number is terminal (end or followed by Var)
let next_type = tokens[i + 1].1;
// Is the value terminal (end of tokens, or followed by another Var=... pattern)?
let is_terminal_value =
i + 2 >= tokens.len() || tokens[i + 2].1 == TokenType::Var;
match next_type {
// No space for terminal numbers/bools: PARALLEL=1, ENABLED=TRUE
// Return false (no space) when terminal
TokenType::Number | TokenType::True | TokenType::False => {
!is_terminal_value
}
// No space for terminal strings: FILE_FORMAT='csv'
TokenType::String => !is_terminal_value,
// Always space if followed by Var (SET x = y ...)
_ => true,
}
} else {
true
}
}
// No space after = in terminal KEY=VALUE patterns
(Some(TokenType::Eq), TokenType::Number)
| (Some(TokenType::Eq), TokenType::True)
| (Some(TokenType::Eq), TokenType::False)
| (Some(TokenType::Eq), TokenType::String) => {
// Is this a terminal value (end or followed by another Var=...)?
let is_terminal =
i + 1 >= tokens.len() || tokens[i + 1].1 == TokenType::Var;
match prev_prev_token_type {
// No space (return false) when terminal, space otherwise
// But always space if the var before = was preceded by @ (parameter)
Some(TokenType::Var) => {
// Always space if the var before = starts with @ (parameter)
if i >= 2 && tokens[i - 2].0.starts_with('@') {
true
} else {
!is_terminal
}
}
_ => true, // Space for other cases
}
}
// Always space after = when followed by Var (SET x = y, could be expression)
(Some(TokenType::Eq), TokenType::Var) => true,
// No space around :: (cast)
(Some(TokenType::DColon), _) => false,
(_, TokenType::DColon) => false,
// Default: add space
_ => true,
}
};
if needs_space {
result.push(' ');
}
result.push_str(text);
prev_prev_token_type = prev_token_type;
prev_token_type = Some(*token_type);
}
result
}
/// Join Teradata table option tokens with Teradata-specific spacing
/// - No spaces around '='
/// - No spaces around dots or parentheses
/// - Space-separated words otherwise
fn join_teradata_option_tokens(&self, tokens: Vec<(String, TokenType)>) -> String {
let mut result = String::new();
let mut prev_token_type: Option<TokenType> = None;
for (text, token_type) in tokens {
let needs_space = if result.is_empty() {
false
} else {
match (prev_token_type, token_type) {
(Some(TokenType::Dot), _) => false,
(_, TokenType::Dot) => false,
(Some(TokenType::LParen), _) => false,
(_, TokenType::LParen) => false,
(_, TokenType::RParen) => false,
(_, TokenType::Comma) => false,
(Some(TokenType::Eq), _) => false,
(_, TokenType::Eq) => false,
_ => true,
}
};
if needs_space {
result.push(' ');
}
result.push_str(&text);
prev_token_type = Some(token_type);
}
result
}
/// Parse RM or REMOVE command (Snowflake)
/// RM @stage_name / REMOVE @stage_name
fn parse_rm_command(&mut self) -> Result<Expression> {
let command_token = self.advance(); // RM or REMOVE
let command_name = command_token.text.to_ascii_uppercase();
// Collect remaining tokens with their types
let mut tokens = vec![(command_name, command_token.token_type)];
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
let token = self.advance();
tokens.push((token.text.clone(), token.token_type));
}
Ok(Expression::Command(Box::new(Command {
this: self.join_command_tokens(tokens),
})))
}
/// Parse GET command (Snowflake)
/// GET @stage_name 'file:///path'
fn parse_get_command(&mut self) -> Result<Expression> {
let get_token = self.advance(); // consume GET (it's already matched)
// Collect remaining tokens with their types, preserving quotes
let mut tokens = vec![("GET".to_string(), get_token.token_type)];
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
let token = self.advance();
// Re-add quotes around string and quoted identifier tokens
let text = match token.token_type {
TokenType::String => format!("'{}'", token.text),
TokenType::QuotedIdentifier => format!("\"{}\"", token.text),
_ => token.text.clone(),
};
tokens.push((text, token.token_type));
}
Ok(Expression::Command(Box::new(Command {
this: self.join_command_tokens(tokens),
})))
}
/// Parse CALL statement (stored procedure call)
/// CALL procedure_name(args, ...)
fn parse_call(&mut self) -> Result<Expression> {
let call_token = self.advance(); // consume CALL
// Collect remaining tokens with their types
let mut tokens = vec![("CALL".to_string(), call_token.token_type)];
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
let token = self.advance();
tokens.push((token.text.clone(), token.token_type));
}
Ok(Expression::Command(Box::new(Command {
this: self.join_command_tokens(tokens),
})))
}
/// Parse KILL statement (MySQL/MariaDB)
/// KILL [CONNECTION | QUERY] <id>
fn parse_kill(&mut self) -> Result<Expression> {
self.expect(TokenType::Kill)?;
// Check for optional kind: CONNECTION or QUERY
let kind = if self.match_identifier("CONNECTION") {
Some("CONNECTION".to_string())
} else if self.match_identifier("QUERY") {
Some("QUERY".to_string())
} else {
None
};
// Parse the target (process ID - usually a number or string)
let this = self.parse_primary()?;
Ok(Expression::Kill(Box::new(Kill { this, kind })))
}
/// Parse EXEC/EXECUTE statement (TSQL stored procedure call)
/// EXEC [schema.]procedure_name [@param=value, ...]
fn parse_execute(&mut self) -> Result<Expression> {
self.expect(TokenType::Execute)?;
// Dynamic SQL: EXEC(@sql) or EXEC (@sql)
let this = if self.check(TokenType::LParen) {
self.skip(); // consume (
let expr = self
.parse_disjunction()?
.unwrap_or(Expression::Null(crate::expressions::Null));
self.expect(TokenType::RParen)?;
Expression::Paren(Box::new(crate::expressions::Paren {
this: expr,
trailing_comments: Vec::new(),
}))
} else {
// Parse procedure name (can be qualified: schema.proc_name)
let proc_name = self.parse_table_ref()?;
Expression::Table(Box::new(proc_name))
};
// Parse optional parameters: @param=value [OUTPUT], ...
let mut parameters = Vec::new();
// Check if there are parameters (starts with @ or identifier)
while self.check(TokenType::Var) || self.check(TokenType::Parameter) {
// Get the parameter name (starts with @)
let token = self.advance();
let param_name = if token.text.starts_with('@') {
token.text.clone()
} else {
format!("@{}", token.text)
};
// Check for = (named parameter) or positional parameter
if self.match_token(TokenType::Eq) {
// Named parameter: @param = value
let value = self.parse_primary()?;
let output = self.match_token(TokenType::Output);
parameters.push(ExecuteParameter {
name: param_name,
value,
positional: false,
output,
});
} else {
// Positional parameter: @var (no = sign)
let output = self.match_token(TokenType::Output);
parameters.push(ExecuteParameter {
name: param_name.clone(),
value: Expression::boxed_column(Column {
name: Identifier::new(¶m_name),
table: None,
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
}),
positional: true,
output,
});
}
// Check for comma to continue
if !self.match_token(TokenType::Comma) {
break;
}
}
// TSQL: WITH RESULT SETS ((...), ...) or WITH RECOMPILE etc.
let suffix = if self.check(TokenType::With) {
let start = self.current;
// Collect remaining tokens until semicolon or end
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
self.skip();
}
Some(self.tokens_to_sql(start, self.current))
} else {
None
};
Ok(Expression::Execute(Box::new(ExecuteStatement {
this,
parameters,
suffix,
})))
}
/// Parse GRANT statement
/// GRANT <privileges> ON [<kind>] <object> TO <principals> [WITH GRANT OPTION]
fn parse_grant(&mut self) -> Result<Expression> {
self.expect(TokenType::Grant)?;
// ClickHouse: GRANT can grant roles (no ON clause), grant privileges (has ON clause),
// or use complex syntax. If we see TO before ON, treat as command.
// Also: multi-privilege grants (multiple ON), wildcard grants (test*.*),
// WITH REPLACE OPTION all parse as commands.
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
// Save position after GRANT keyword
let saved_pos = self.current;
// Scan ahead to check grant structure
let mut depth = 0i32;
let mut on_count = 0;
let mut found_to = false;
let mut has_star_in_name = false;
let mut has_replace_option = false;
let mut i = self.current;
while i < self.tokens.len() && self.tokens[i].token_type != TokenType::Semicolon {
match self.tokens[i].token_type {
TokenType::LParen => depth += 1,
TokenType::RParen => depth -= 1,
TokenType::On if depth == 0 => on_count += 1,
TokenType::To if depth == 0 => {
found_to = true;
}
TokenType::Star if depth == 0 && on_count > 0 && !found_to => {
// Check if star is part of a wildcard name (e.g., test*.*)
if i > 0
&& self.tokens[i - 1].token_type != TokenType::Dot
&& self.tokens[i - 1].token_type != TokenType::On
{
has_star_in_name = true;
}
}
TokenType::Replace if depth == 0 && found_to => {
has_replace_option = true;
}
_ => {}
}
i += 1;
}
if (found_to && on_count == 0) || on_count > 1 || has_star_in_name || has_replace_option
{
// Role grant, multi-privilege grant, wildcard grant, or REPLACE OPTION — parse as command
self.current = saved_pos;
return self
.parse_command()?
.ok_or_else(|| self.parse_error("Failed to parse GRANT statement"));
}
self.current = saved_pos;
}
// Parse privileges (e.g., SELECT, INSERT, UPDATE)
let privileges = self.parse_privileges()?;
// Expect ON
self.expect(TokenType::On)?;
// Parse optional kind (TABLE, SCHEMA, FUNCTION, etc.)
let kind = self.parse_object_kind()?;
// Parse securable (the object) - may be dot-separated qualified name
let securable = self.parse_securable_name()?;
// Parse optional function parameter types: func(type1, type2, ...)
let function_params = if self.check(TokenType::LParen) {
self.parse_function_param_types()?
} else {
Vec::new()
};
// Expect TO
self.expect(TokenType::To)?;
// Parse principals
let principals = self.parse_principals()?;
// Check for WITH GRANT OPTION
let grant_option = self.match_token(TokenType::With)
&& self.check(TokenType::Grant)
&& {
self.skip();
self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("OPTION")
}
&& {
self.skip();
true
};
// Check for TSQL AS principal clause
let as_principal = if self.match_token(TokenType::As) {
let name = self.expect_identifier_or_keyword()?;
Some(Identifier::new(name))
} else {
None
};
Ok(Expression::Grant(Box::new(Grant {
privileges,
kind,
securable,
function_params,
principals,
grant_option,
as_principal,
})))
}
/// Parse REVOKE statement
/// REVOKE [GRANT OPTION FOR] <privileges> ON [<kind>] <object> FROM <principals> [CASCADE]
fn parse_revoke(&mut self) -> Result<Expression> {
self.expect(TokenType::Revoke)?;
// ClickHouse: REVOKE role FROM user (no ON clause), multi-privilege, or wildcard — parse as command
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
let saved_pos = self.current;
let mut depth = 0i32;
let mut on_count = 0;
let mut found_from = false;
let mut has_star_in_name = false;
let mut i = self.current;
while i < self.tokens.len() && self.tokens[i].token_type != TokenType::Semicolon {
match self.tokens[i].token_type {
TokenType::LParen => depth += 1,
TokenType::RParen => depth -= 1,
TokenType::On if depth == 0 => on_count += 1,
TokenType::From if depth == 0 => {
found_from = true;
}
TokenType::Star if depth == 0 && on_count > 0 && !found_from => {
if i > 0
&& self.tokens[i - 1].token_type != TokenType::Dot
&& self.tokens[i - 1].token_type != TokenType::On
{
has_star_in_name = true;
}
}
_ => {}
}
i += 1;
}
if (found_from && on_count == 0) || on_count > 1 || has_star_in_name {
self.current = saved_pos;
return self
.parse_command()?
.ok_or_else(|| self.parse_error("Failed to parse REVOKE statement"));
}
self.current = saved_pos;
}
// Check for GRANT OPTION FOR
let grant_option = if self.check(TokenType::Grant) {
self.skip();
if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("OPTION") {
self.skip();
self.expect(TokenType::For)?;
true
} else {
return Err(self.parse_error("Expected OPTION after GRANT in REVOKE"));
}
} else {
false
};
// Parse privileges
let privileges = self.parse_privileges()?;
// Expect ON
self.expect(TokenType::On)?;
// Parse optional kind
let kind = self.parse_object_kind()?;
// Parse securable - may be dot-separated qualified name
let securable = self.parse_securable_name()?;
// Parse optional function parameter types: func(type1, type2, ...)
let function_params = if self.check(TokenType::LParen) {
self.parse_function_param_types()?
} else {
Vec::new()
};
// Expect FROM
self.expect(TokenType::From)?;
// Parse principals
let principals = self.parse_principals()?;
// Check for CASCADE or RESTRICT
let cascade = self.match_token(TokenType::Cascade);
let restrict = if !cascade {
self.match_token(TokenType::Restrict)
} else {
false
};
Ok(Expression::Revoke(Box::new(Revoke {
privileges,
kind,
securable,
function_params,
principals,
grant_option,
cascade,
restrict,
})))
}
/// Parse privilege list for GRANT/REVOKE
/// Handles multi-word privileges like "ALL PRIVILEGES" and column-level privileges like "SELECT(col1, col2)"
fn parse_privileges(&mut self) -> Result<Vec<Privilege>> {
let mut privileges = Vec::new();
loop {
let mut priv_parts = Vec::new();
// Collect privilege words until we hit ON, comma, LParen, or similar terminator
while !self.is_at_end() {
if self.check(TokenType::On)
|| self.check(TokenType::Comma)
|| self.check(TokenType::LParen)
{
break;
}
if self.is_identifier_or_keyword_token() {
priv_parts.push(self.advance().text.to_ascii_uppercase());
} else {
break;
}
}
if priv_parts.is_empty() {
break;
}
let priv_name = priv_parts.join(" ");
// Check for column list in parentheses: SELECT(col1, col2)
let columns = if self.match_token(TokenType::LParen) {
let mut cols = Vec::new();
loop {
// Parse column name (identifier)
if self.is_identifier_or_keyword_token() {
cols.push(self.advance().text.to_string());
} else if self.check(TokenType::RParen) {
break;
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
cols
} else {
Vec::new()
};
privileges.push(Privilege {
name: priv_name,
columns,
});
if !self.match_token(TokenType::Comma) {
break;
}
}
Ok(privileges)
}
/// Parse object kind (TABLE, SCHEMA, FUNCTION, PROCEDURE, SEQUENCE, etc.)
fn parse_object_kind(&mut self) -> Result<Option<String>> {
if self.check(TokenType::Table) {
self.skip();
Ok(Some("TABLE".to_string()))
} else if self.check(TokenType::Schema) {
self.skip();
Ok(Some("SCHEMA".to_string()))
} else if self.check(TokenType::Database) {
self.skip();
Ok(Some("DATABASE".to_string()))
} else if self.check(TokenType::Function) {
self.skip();
Ok(Some("FUNCTION".to_string()))
} else if self.check(TokenType::View) {
self.skip();
Ok(Some("VIEW".to_string()))
} else if self.check(TokenType::Procedure) {
self.skip();
Ok(Some("PROCEDURE".to_string()))
} else if self.check(TokenType::Sequence) {
self.skip();
Ok(Some("SEQUENCE".to_string()))
} else if self.check(TokenType::Warehouse) {
self.skip();
Ok(Some("WAREHOUSE".to_string()))
} else if self.check_identifier("STAGE")
|| self.check_identifier("INTEGRATION")
|| self.check_identifier("TASK")
|| self.check_identifier("STREAM")
|| self.check_identifier("PIPE")
|| self.check_identifier("TAG")
|| self.check_identifier("SHARE")
{
let kind = self.advance().text.to_ascii_uppercase();
Ok(Some(kind))
} else if self.check_identifier("FILE")
&& self.current + 1 < self.tokens.len()
&& self.tokens[self.current + 1]
.text
.eq_ignore_ascii_case("FORMAT")
{
self.skip(); // consume FILE
self.skip(); // consume FORMAT
Ok(Some("FILE FORMAT".to_string()))
} else if self.check_identifier("NETWORK")
&& self.current + 1 < self.tokens.len()
&& self.tokens[self.current + 1]
.text
.eq_ignore_ascii_case("POLICY")
{
self.skip(); // consume NETWORK
self.skip(); // consume POLICY
Ok(Some("NETWORK POLICY".to_string()))
} else {
Ok(None)
}
}
/// Parse principal list for GRANT/REVOKE
fn parse_principals(&mut self) -> Result<Vec<GrantPrincipal>> {
let mut principals = Vec::new();
loop {
// Check for ROLE keyword (TokenType::Var with text "ROLE")
let is_role =
if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("ROLE") {
self.skip();
true
} else {
false
};
// Check for GROUP keyword (Redshift) - TokenType::Group
let is_group = if !is_role && self.check(TokenType::Group) {
self.skip();
true
} else {
false
};
// Check for SHARE keyword (Snowflake)
let is_share = if !is_role && !is_group && self.check_identifier("SHARE") {
self.skip();
true
} else {
false
};
// Parse principal name (with quoted flag preserved for backtick-quoted identifiers)
let name = self.expect_identifier_or_keyword_with_quoted()?;
principals.push(GrantPrincipal {
name,
is_role,
is_group,
is_share,
});
if !self.match_token(TokenType::Comma) {
break;
}
}
Ok(principals)
}
/// Parse a securable name (potentially dot-separated qualified name)
/// e.g., "mydb.myschema.ADD5" -> Identifier("mydb.myschema.ADD5")
fn parse_securable_name(&mut self) -> Result<Identifier> {
// Accept * as a name part (e.g., GRANT ON *.* or GRANT ON db.*)
let first = if self.match_token(TokenType::Star) {
"*".to_string()
} else {
self.expect_identifier_or_keyword()?
};
let mut parts = vec![first];
while self.match_token(TokenType::Dot) {
let next = if self.match_token(TokenType::Star) {
"*".to_string()
} else {
self.expect_identifier_or_keyword()?
};
parts.push(next);
}
Ok(Identifier::new(parts.join(".")))
}
/// Parse function parameter types for GRANT/REVOKE ON FUNCTION
/// e.g., "(number, varchar)" -> vec!["number", "varchar"]
fn parse_function_param_types(&mut self) -> Result<Vec<String>> {
self.expect(TokenType::LParen)?;
let mut params = Vec::new();
if !self.check(TokenType::RParen) {
loop {
// Parse parameter type - can be a keyword (INT, VARCHAR) or identifier
let param_type = self.expect_identifier_or_keyword()?;
params.push(param_type);
if !self.match_token(TokenType::Comma) {
break;
}
}
}
self.expect(TokenType::RParen)?;
Ok(params)
}
/// Parse COMMENT ON statement
fn parse_comment(&mut self) -> Result<Expression> {
self.expect(TokenType::Comment)?;
// Check for IF EXISTS
let exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
// Expect ON
self.expect(TokenType::On)?;
// Check for MATERIALIZED (can be TokenType::Materialized or TokenType::Var)
let materialized = if self.match_token(TokenType::Materialized) {
true
} else if self.check(TokenType::Var)
&& self.peek().text.eq_ignore_ascii_case("MATERIALIZED")
{
self.skip();
true
} else {
false
};
// Parse the object kind (COLUMN, TABLE, DATABASE, PROCEDURE, etc.)
let kind = self.expect_identifier_or_keyword()?.to_ascii_uppercase();
// Parse the object name (can be qualified like schema.table.column)
// For PROCEDURE/FUNCTION, we need to handle the parameter list like my_proc(integer, integer)
let this = if kind == "PROCEDURE" || kind == "FUNCTION" {
// Parse name possibly with parameter types, preserving original case
let name_token = self.advance();
let mut name_str = name_token.text.clone();
// Parse additional qualified parts
while self.match_token(TokenType::Dot) {
let next = self.advance();
name_str.push('.');
name_str.push_str(&next.text);
}
// Check for parameter types in parentheses
if self.match_token(TokenType::LParen) {
name_str.push('(');
let mut first = true;
while !self.check(TokenType::RParen) && !self.is_at_end() {
if !first {
name_str.push_str(", ");
}
first = false;
let param_token = self.advance();
name_str.push_str(¶m_token.text);
self.match_token(TokenType::Comma);
}
self.expect(TokenType::RParen)?;
name_str.push(')');
}
Expression::Identifier(Identifier::new(name_str))
} else {
self.parse_qualified_name()?
};
// Expect IS
if self.check(TokenType::Is) {
self.skip();
} else {
return Err(self.parse_error("Expected IS in COMMENT ON statement"));
}
// Parse the comment expression (usually a string literal)
let expression = self.parse_primary()?;
Ok(Expression::Comment(Box::new(Comment {
this,
kind,
expression,
exists,
materialized,
})))
}
/// Parse SET statement
fn parse_set(&mut self) -> Result<Expression> {
self.expect(TokenType::Set)?;
let mut items = Vec::new();
// ClickHouse: SET DEFAULT ROLE ... TO user - parse as command
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::Default)
{
let mut parts = vec!["SET".to_string()];
while !self.is_at_end() && self.peek().token_type != TokenType::Semicolon {
parts.push(self.advance().text.clone());
}
return Ok(Expression::Command(Box::new(crate::expressions::Command {
this: parts.join(" "),
})));
}
// Teradata: SET QUERY_BAND = ... [UPDATE] [FOR scope]
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Teradata)
) && self.match_identifier("QUERY_BAND")
{
return self.parse_query_band();
}
// Handle MySQL SET CHARACTER SET / SET NAMES
if self.match_identifier("CHARACTER") {
// SET CHARACTER SET <charset> | SET CHARACTER SET DEFAULT
self.expect(TokenType::Set)?;
let value = if self.match_token(TokenType::Default) {
Expression::Identifier(Identifier::new("DEFAULT".to_string()))
} else {
self.parse_primary()?
};
items.push(SetItem {
name: Expression::Identifier(Identifier::new("CHARACTER SET".to_string())),
value,
kind: None,
no_equals: false,
});
return Ok(Expression::SetStatement(Box::new(SetStatement { items })));
}
if self.match_identifier("NAMES") {
// SET NAMES <charset> [COLLATE <collation>] | SET NAMES DEFAULT
let value = if self.match_token(TokenType::Default) {
Expression::Identifier(Identifier::new("DEFAULT".to_string()))
} else {
self.parse_primary()?
};
// Check for optional COLLATE clause
let collation = if self.match_identifier("COLLATE") {
Some(self.parse_primary()?)
} else {
None
};
items.push(SetItem {
name: Expression::Identifier(Identifier::new("NAMES".to_string())),
value,
kind: None,
no_equals: false,
});
if let Some(coll) = collation {
items.push(SetItem {
name: Expression::Identifier(Identifier::new("COLLATE".to_string())),
value: coll,
kind: None,
no_equals: false,
});
}
return Ok(Expression::SetStatement(Box::new(SetStatement { items })));
}
// Track whether SET VAR/VARIABLE was used (only first item gets the VARIABLE kind)
let mut set_is_variable = if self.check(TokenType::Var) {
let text = self.peek().text.to_uppercase();
if text == "VARIABLE" || text == "VAR" {
// Look ahead: VAR/VARIABLE should be followed by another name, not by = or TO
if let Some(next) = self.tokens.get(self.current + 1) {
if next.token_type != TokenType::Eq
&& next.token_type != TokenType::To
&& next.token_type != TokenType::ColonEq
{
self.skip(); // consume VAR/VARIABLE
true
} else {
false
}
} else {
false
}
} else {
false
}
} else {
false
};
loop {
// Check for GLOBAL, LOCAL, SESSION, PERSIST, PERSIST_ONLY modifiers
// LOCAL is a token type, others are identifiers
let kind = if self.match_identifier("GLOBAL") {
Some("GLOBAL".to_string())
} else if self.match_token(TokenType::Local) {
Some("LOCAL".to_string())
} else if self.match_identifier("SESSION") {
Some("SESSION".to_string())
} else if self.match_identifier("PERSIST") {
Some("PERSIST".to_string())
} else if self.match_identifier("PERSIST_ONLY") {
Some("PERSIST_ONLY".to_string())
} else if set_is_variable {
set_is_variable = false; // Only first item gets VARIABLE kind
Some("VARIABLE".to_string())
} else {
None
};
// Check for SET [GLOBAL|SESSION] TRANSACTION (MySQL)
if self.match_token(TokenType::Transaction) {
// Parse transaction characteristics (ISOLATION LEVEL, READ ONLY, READ WRITE)
let mut characteristics = Vec::new();
loop {
let mut char_tokens = Vec::new();
// Parse ISOLATION LEVEL ... or READ ONLY/WRITE
// Must handle keywords like ONLY, REPEATABLE, SERIALIZABLE, etc.
while !self.is_at_end()
&& !self.check(TokenType::Comma)
&& !self.check(TokenType::Semicolon)
{
// Allow identifiers and common transaction-related keywords
if self.is_identifier_token()
|| self.is_safe_keyword_as_identifier()
|| self.check(TokenType::Only)
|| self.check(TokenType::Repeatable)
{
char_tokens.push(self.advance().text);
} else {
break;
}
}
if !char_tokens.is_empty() {
characteristics.push(char_tokens.join(" "));
}
if !self.match_token(TokenType::Comma) {
break;
}
}
let name = Expression::Identifier(Identifier::new("TRANSACTION".to_string()));
let value = if characteristics.is_empty() {
Expression::Identifier(Identifier::new("".to_string()))
} else {
Expression::Identifier(Identifier::new(characteristics.join(", ")))
};
items.push(SetItem {
name,
value,
kind,
no_equals: false,
});
break;
}
// Parse variable name - use a simple approach to avoid expression parsing issues
// Variable names can be dotted identifiers or keywords used as names
let name = {
if self.check(TokenType::AtAt) {
// @@SCOPE.variable or @@variable syntax (MySQL system variables)
self.skip(); // consume @@
let mut name_str = "@@".to_string();
let first = self.advance().text.clone();
name_str.push_str(&first);
// Handle @@scope.variable (e.g., @@GLOBAL.max_connections)
while self.match_token(TokenType::Dot) {
let next = self.advance().text.clone();
name_str.push('.');
name_str.push_str(&next);
}
Expression::Identifier(Identifier::new(name_str))
} else if self.check(TokenType::DAt) {
// @variable syntax (MySQL user variables)
self.skip(); // consume @
let mut name_str = "@".to_string();
let first = self.advance().text.clone();
name_str.push_str(&first);
Expression::Identifier(Identifier::new(name_str))
} else if self.check(TokenType::LParen) {
// Tuple of variable names: SET VARIABLE (v1, v2) = (SELECT ...)
self.skip(); // consume (
let mut vars = Vec::new();
loop {
let var_name = self.advance().text.clone();
vars.push(Expression::Column(Box::new(Column {
name: Identifier::new(var_name),
table: None,
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
})));
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
Expression::Tuple(Box::new(crate::expressions::Tuple { expressions: vars }))
} else {
let first = self.advance().text.clone();
let mut name_str = first;
// Handle dotted identifiers (e.g., schema.variable)
while self.match_token(TokenType::Dot) {
let next = self.advance().text.clone();
name_str.push('.');
name_str.push_str(&next);
}
// Handle Hive-style colon-separated names (e.g., hiveconf:some_var)
// But not := which is assignment
while self.check(TokenType::Colon) && !self.check_next(TokenType::Eq) {
self.skip(); // consume :
let next = self.advance().text.clone();
name_str.push(':');
name_str.push_str(&next);
}
Expression::Identifier(Identifier::new(name_str))
}
};
// Expect = or := or TO
if self.match_token(TokenType::Eq) || self.match_token(TokenType::ColonEq) {
// ok - standard assignment
} else if self.match_token(TokenType::To) {
// PostgreSQL uses SET var TO value
} else if self.is_at_end()
|| self.check(TokenType::Semicolon)
|| self.check(TokenType::Comma)
{
// SET x ON/OFF without = (TSQL: SET XACT_ABORT ON)
// The ON/OFF was already parsed as part of the name expression
// Handle as a name-only set (value is empty)
items.push(SetItem {
name,
value: Expression::Identifier(Identifier::new("".to_string())),
kind,
no_equals: false,
});
if !self.match_token(TokenType::Comma) {
break;
}
continue;
} else {
// Check if the next token looks like a value (ON/OFF without =)
// TSQL: SET XACT_ABORT ON, SET NOCOUNT ON
if self.check(TokenType::On) || self.check_keyword_text("OFF") {
let val = self.advance().text;
// Include ON/OFF in the name so generator doesn't add "="
let name_with_val = match &name {
Expression::Column(col) => format!("{} {}", col.name.name, val),
Expression::Identifier(id) => format!("{} {}", id.name, val),
_ => val.clone(),
};
items.push(SetItem {
name: Expression::Identifier(Identifier::new(name_with_val)),
value: Expression::Identifier(Identifier::new("".to_string())),
kind,
no_equals: false,
});
if !self.match_token(TokenType::Comma) {
break;
}
continue;
}
// TSQL/Generic: SET key value (without = or TO)
// Parse the next token as the value
if !self.is_at_end() && !self.check(TokenType::Semicolon) {
let value = self.parse_expression()?;
items.push(SetItem {
name,
value,
kind,
no_equals: true,
});
if !self.match_token(TokenType::Comma) {
break;
}
continue;
}
return Err(self.parse_error("Expected '=' or 'TO' in SET statement"));
}
// Parse value - handle ON/OFF keywords as identifiers (MySQL: SET autocommit = ON)
let value = if self.check(TokenType::On) || self.check_keyword_text("OFF") {
Expression::Identifier(Identifier::new(self.advance().text.clone()))
} else if self.match_token(TokenType::Default) {
Expression::Identifier(Identifier::new("DEFAULT".to_string()))
} else {
self.parse_expression()?
};
items.push(SetItem {
name,
value,
kind,
no_equals: false,
});
if !self.match_token(TokenType::Comma) {
break;
}
}
Ok(Expression::SetStatement(Box::new(SetStatement { items })))
}
/// Parse Teradata SET QUERY_BAND statement
fn parse_query_band(&mut self) -> Result<Expression> {
self.expect(TokenType::Eq)?;
let value = if self.match_identifier("NONE") {
Expression::Var(Box::new(Var {
this: "NONE".to_string(),
}))
} else if self.check(TokenType::String) {
Expression::Literal(Box::new(Literal::String(self.expect_string()?)))
} else {
self.parse_primary()?
};
let update = if self.match_token(TokenType::Update) || self.match_identifier("UPDATE") {
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
})))
} else {
None
};
let _ = self.match_token(TokenType::For);
let scope = if self.match_token(TokenType::Session) || self.match_identifier("SESSION") {
if self.match_identifier("VOLATILE") {
Some("SESSION VOLATILE".to_string())
} else {
Some("SESSION".to_string())
}
} else if self.match_token(TokenType::Transaction) || self.match_identifier("TRANSACTION") {
Some("TRANSACTION".to_string())
} else if self.match_identifier("VOLATILE") {
Some("VOLATILE".to_string())
} else {
None
};
Ok(Expression::QueryBand(Box::new(QueryBand {
this: Box::new(value),
scope: scope.map(|s| Box::new(Expression::Var(Box::new(Var { this: s })))),
update,
})))
}
/// Parse FETCH FIRST/NEXT clause
fn parse_fetch(&mut self) -> Result<Fetch> {
// FETCH [FIRST|NEXT] [count] [PERCENT] [ROW|ROWS] [ONLY|WITH TIES]
// FIRST or NEXT
let direction = if self.match_token(TokenType::First) {
"FIRST".to_string()
} else if self.match_token(TokenType::Next) {
"NEXT".to_string()
} else {
"FIRST".to_string() // Default
};
// Optional count - but check if next token is ROW/ROWS/PERCENT/ONLY (no count)
let count = if !self.check(TokenType::Row)
&& !self.check(TokenType::Rows)
&& !self.check(TokenType::Percent)
&& !self.check(TokenType::Only)
{
// Accept number, parenthesized expression, or TSQL @variable (Var token)
if self.check(TokenType::Number)
|| self.check(TokenType::LParen)
|| self.check(TokenType::DAt)
|| self.check(TokenType::Var)
{
Some(self.parse_primary()?)
} else {
None
}
} else {
None
};
// PERCENT modifier
let percent = self.match_token(TokenType::Percent);
// ROW or ROWS
let rows = self.match_token(TokenType::Row) || self.match_token(TokenType::Rows);
// ONLY or WITH TIES
self.match_token(TokenType::Only);
let with_ties = self.match_keywords(&[TokenType::With, TokenType::Ties]);
Ok(Fetch {
direction,
count,
percent,
rows,
with_ties,
})
}
/// Parse a qualified name (schema.table.column or just table)
fn parse_qualified_name(&mut self) -> Result<Expression> {
let first = self.expect_identifier_or_keyword()?;
let mut parts = vec![first];
while self.match_token(TokenType::Dot) {
let next = self.expect_identifier_or_keyword()?;
parts.push(next);
}
if parts.len() == 1 {
Ok(Expression::Identifier(Identifier::new(parts.remove(0))))
} else if parts.len() == 2 {
Ok(Expression::boxed_column(Column {
table: Some(Identifier::new(parts[0].clone())),
name: Identifier::new(parts[1].clone()),
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
}))
} else {
// For 3+ parts, create a Column with concatenated table parts
let column_name = parts.pop().unwrap();
let table_name = parts.join(".");
Ok(Expression::boxed_column(Column {
table: Some(Identifier::new(table_name)),
name: Identifier::new(column_name),
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
}))
}
}
// ==================== Phase 4: Additional DDL Parsing ====================
/// Parse CREATE SCHEMA statement
fn parse_create_schema(&mut self, leading_comments: Vec<String>) -> Result<Expression> {
self.expect(TokenType::Schema)?;
let if_not_exists =
self.match_keywords(&[TokenType::If, TokenType::Not, TokenType::Exists]);
let name = self.parse_identifier_parts()?;
// Parse CLONE clause (Snowflake)
let clone_from = if self.match_identifier("CLONE") {
Some(self.parse_identifier_parts()?)
} else {
None
};
// Parse AT/BEFORE clause for time travel (Snowflake)
// Note: BEFORE is a keyword token, AT is an identifier
let at_clause = if self.match_identifier("AT") || self.match_token(TokenType::Before) {
let keyword = self.previous().text.to_ascii_uppercase();
self.expect(TokenType::LParen)?;
// Parse the content: OFFSET => value or TIMESTAMP => value
let mut result = format!("{} (", keyword);
let mut prev_token_type: Option<TokenType> = None;
let mut paren_depth = 1; // Track nested parens
while !self.is_at_end() && paren_depth > 0 {
let token = self.advance();
if token.token_type == TokenType::LParen {
paren_depth += 1;
} else if token.token_type == TokenType::RParen {
paren_depth -= 1;
if paren_depth == 0 {
break; // Don't include the closing paren in result yet
}
}
// Smart spacing: no space after ( or => or - and no space before (
let needs_space = !result.ends_with('(')
&& prev_token_type != Some(TokenType::Arrow)
&& prev_token_type != Some(TokenType::Dash)
&& prev_token_type != Some(TokenType::LParen)
&& token.token_type != TokenType::LParen; // no space before (
if needs_space
&& token.token_type != TokenType::RParen
&& token.token_type != TokenType::Comma
{
result.push(' ');
}
// Properly quote string literals
if token.token_type == TokenType::String {
result.push('\'');
result.push_str(&token.text.replace('\'', "''"));
result.push('\'');
} else {
result.push_str(&token.text);
}
if token.token_type == TokenType::Arrow || token.token_type == TokenType::Comma {
result.push(' ');
}
prev_token_type = Some(token.token_type);
}
result.push(')');
Some(Expression::Raw(Raw { sql: result }))
} else {
None
};
let authorization = if self.match_token(TokenType::Authorization) {
Some(Identifier::new(self.expect_identifier()?))
} else {
None
};
// Parse schema properties like DEFAULT COLLATE or WITH (properties)
let mut properties = Vec::new();
// Parse WITH (prop1=val1, prop2=val2, ...) (Trino/Presto)
if self.match_token(TokenType::With) {
self.expect(TokenType::LParen)?;
loop {
// Parse property name (identifier or string)
let prop_name = if self.check(TokenType::String) {
Expression::Literal(Box::new(Literal::String(self.expect_string()?)))
} else {
Expression::Identifier(Identifier::new(self.expect_identifier_or_keyword()?))
};
self.expect(TokenType::Eq)?;
// Parse property value
let prop_value = self.parse_expression()?;
// Create Property expression: key=value
properties.push(Expression::Property(Box::new(Property {
this: Box::new(prop_name),
value: Some(Box::new(prop_value)),
})));
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
}
// Parse DEFAULT COLLATE 'value' (BigQuery)
if self.match_token(TokenType::Default) && self.match_token(TokenType::Collate) {
// Parse the collation value (could be string literal or identifier)
let collation = self.parse_primary()?;
properties.push(Expression::CollateProperty(Box::new(CollateProperty {
this: Box::new(collation),
default: Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
}))),
})));
}
Ok(Expression::CreateSchema(Box::new(CreateSchema {
name,
if_not_exists,
authorization,
clone_from,
at_clause,
properties,
leading_comments,
})))
}
/// Parse DROP SCHEMA statement
fn parse_drop_schema(&mut self) -> Result<Expression> {
self.expect(TokenType::Schema)?;
let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
let name = Identifier::new(self.expect_identifier()?);
let cascade = self.match_token(TokenType::Cascade);
if !cascade {
self.match_token(TokenType::Restrict);
}
Ok(Expression::DropSchema(Box::new(DropSchema {
name,
if_exists,
cascade,
})))
}
/// Parse CREATE DATABASE statement
fn parse_create_database(&mut self) -> Result<Expression> {
self.expect(TokenType::Database)?;
let if_not_exists =
self.match_keywords(&[TokenType::If, TokenType::Not, TokenType::Exists]);
let name = Identifier::new(self.expect_identifier()?);
// Check for Snowflake CLONE clause
let clone_from = if self.match_identifier("CLONE") {
Some(Identifier::new(self.expect_identifier()?))
} else {
None
};
// Parse AT/BEFORE clause for time travel (Snowflake)
// Note: BEFORE is a keyword token, AT is an identifier
let at_clause = if self.match_identifier("AT") || self.match_token(TokenType::Before) {
let keyword = self.previous().text.to_ascii_uppercase();
self.expect(TokenType::LParen)?;
// Parse the content: OFFSET => value or TIMESTAMP => value
let mut result = format!("{} (", keyword);
let mut prev_token_type: Option<TokenType> = None;
let mut paren_depth = 1; // Track nested parens
while !self.is_at_end() && paren_depth > 0 {
let token = self.advance();
if token.token_type == TokenType::LParen {
paren_depth += 1;
} else if token.token_type == TokenType::RParen {
paren_depth -= 1;
if paren_depth == 0 {
break; // Don't include the closing paren in result yet
}
}
// Smart spacing: no space after ( or => or - and no space before (
let needs_space = !result.ends_with('(')
&& prev_token_type != Some(TokenType::Arrow)
&& prev_token_type != Some(TokenType::Dash)
&& prev_token_type != Some(TokenType::LParen)
&& token.token_type != TokenType::LParen; // no space before (
if needs_space
&& token.token_type != TokenType::RParen
&& token.token_type != TokenType::Comma
{
result.push(' ');
}
// Properly quote string literals
if token.token_type == TokenType::String {
result.push('\'');
result.push_str(&token.text.replace('\'', "''"));
result.push('\'');
} else {
result.push_str(&token.text);
}
if token.token_type == TokenType::Arrow || token.token_type == TokenType::Comma {
result.push(' ');
}
prev_token_type = Some(token.token_type);
}
result.push(')');
Some(Expression::Raw(Raw { sql: result }))
} else {
None
};
// ClickHouse: ON CLUSTER clause
let _on_cluster = self.parse_on_cluster_clause()?;
let mut options = Vec::new();
// Parse database options
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
if self.match_identifier("OWNER") || self.match_token(TokenType::Eq) {
self.match_token(TokenType::Eq);
options.push(DatabaseOption::Owner(Identifier::new(
self.expect_identifier()?,
)));
} else if self.match_identifier("TEMPLATE") {
self.match_token(TokenType::Eq);
options.push(DatabaseOption::Template(Identifier::new(
self.expect_identifier()?,
)));
} else if self.match_identifier("ENCODING") {
self.match_token(TokenType::Eq);
let encoding = if self.check(TokenType::String) {
let tok = self.advance();
tok.text.trim_matches('\'').to_string()
} else {
self.expect_identifier()?
};
options.push(DatabaseOption::Encoding(encoding));
} else if self.match_identifier("CHARACTER") {
self.match_token(TokenType::Set);
self.match_token(TokenType::Eq);
let charset = if self.check(TokenType::String) {
let tok = self.advance();
tok.text.trim_matches('\'').to_string()
} else {
self.expect_identifier()?
};
options.push(DatabaseOption::CharacterSet(charset));
} else if self.match_identifier("COLLATE") {
self.match_token(TokenType::Eq);
let collate = if self.check(TokenType::String) {
let tok = self.advance();
tok.text.trim_matches('\'').to_string()
} else {
self.expect_identifier()?
};
options.push(DatabaseOption::Collate(collate));
} else if self.match_identifier("LOCATION") {
self.match_token(TokenType::Eq);
let loc = if self.check(TokenType::String) {
let tok = self.advance();
tok.text.trim_matches('\'').to_string()
} else {
self.expect_identifier()?
};
options.push(DatabaseOption::Location(loc));
} else {
break;
}
}
Ok(Expression::CreateDatabase(Box::new(CreateDatabase {
name,
if_not_exists,
options,
clone_from,
at_clause,
})))
}
/// Parse DROP DATABASE statement
fn parse_drop_database(&mut self) -> Result<Expression> {
self.expect(TokenType::Database)?;
let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
// ClickHouse: IF EMPTY
if !if_exists
&& matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
)
{
if self.check(TokenType::If)
&& self.current + 1 < self.tokens.len()
&& self.tokens[self.current + 1]
.text
.eq_ignore_ascii_case("EMPTY")
{
self.skip(); // consume IF
self.skip(); // consume EMPTY
}
}
let name = Identifier::new(self.expect_identifier()?);
// ClickHouse: ON CLUSTER clause
let sync = if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
let _ = self.parse_on_cluster_clause()?;
self.match_identifier("SYNC")
} else {
false
};
Ok(Expression::DropDatabase(Box::new(DropDatabase {
name,
if_exists,
sync,
})))
}
/// Parse CREATE FUNCTION statement
fn parse_create_function(
&mut self,
or_replace: bool,
or_alter: bool,
temporary: bool,
is_table_function: bool,
) -> Result<Expression> {
self.expect(TokenType::Function)?;
let if_not_exists =
self.match_keywords(&[TokenType::If, TokenType::Not, TokenType::Exists]);
let name = self.parse_table_ref()?;
// Parse parameters (optional - some dialects allow CREATE FUNCTION f AS 'body')
let (parameters, has_parens) = if self.match_token(TokenType::LParen) {
let params = self.parse_function_parameters()?;
self.expect(TokenType::RParen)?;
(params, true)
} else {
(Vec::new(), false)
};
// Track if LANGUAGE appears before RETURNS
let mut language_first = false;
let mut return_type = None;
let mut language = None;
let mut sql_data_access = None;
// Check for LANGUAGE before RETURNS
if self.match_token(TokenType::Language) {
language = Some(self.expect_identifier_or_keyword()?);
language_first = true;
}
// Parse RETURNS clause (may come before or after LANGUAGE)
let mut returns_table_body: Option<String> = None;
if self.match_token(TokenType::Returns) {
if self.check(TokenType::Var) && self.peek().text.starts_with('@') {
// TSQL: RETURNS @var TABLE (col_defs)
let var_name = self.advance().text.clone();
if self.check(TokenType::Table) {
self.skip(); // consume TABLE
return_type = Some(DataType::Custom {
name: "TABLE".to_string(),
});
// Parse column definitions
if self.match_token(TokenType::LParen) {
let start = self.current;
let mut depth = 1;
while depth > 0 && !self.is_at_end() {
if self.check(TokenType::LParen) {
depth += 1;
}
if self.check(TokenType::RParen) {
depth -= 1;
if depth == 0 {
break;
}
}
self.skip();
}
// Reconstruct the column definitions with proper spacing
let mut col_defs_str = String::new();
for (i, tok) in self.tokens[start..self.current].iter().enumerate() {
// Don't add space before comma, LParen, RParen
// Don't add space after LParen
let prev_tok = if i > 0 {
Some(&self.tokens[start + i - 1])
} else {
None
};
let needs_space = i > 0
&& tok.token_type != TokenType::Comma
&& tok.token_type != TokenType::RParen
&& tok.token_type != TokenType::LParen
&& prev_tok
.map(|p| p.token_type != TokenType::LParen)
.unwrap_or(true);
if needs_space {
col_defs_str.push(' ');
}
col_defs_str.push_str(&tok.text);
}
returns_table_body = Some(format!("{} TABLE ({})", var_name, col_defs_str));
self.expect(TokenType::RParen)?;
} else {
returns_table_body = Some(format!("{} TABLE", var_name));
}
} else {
// Parse data type after var name
return_type = Some(self.parse_data_type()?);
}
} else if self.check(TokenType::Table) {
// Could be:
// - TSQL: RETURNS TABLE AS RETURN ...
// - BigQuery: RETURNS TABLE <col1 TYPE, col2 TYPE>
// - Snowflake: RETURNS TABLE(col1 TYPE, col2 TYPE)
self.skip(); // consume TABLE
if self.check(TokenType::Lt) {
// BigQuery: RETURNS TABLE <col1 TYPE, col2 TYPE>
self.skip(); // consume <
let mut cols = Vec::new();
loop {
let col_name = self.expect_identifier()?;
let col_type = self.parse_data_type()?;
cols.push(format!(
"{} {}",
col_name,
self.data_type_to_string(&col_type)
));
if !self.match_token(TokenType::Comma) {
break;
}
}
if !self.match_token(TokenType::Gt) {
return Err(self.parse_error("Expected > after TABLE column definitions"));
}
returns_table_body = Some(format!("TABLE <{}>", cols.join(", ")));
} else if self.check(TokenType::LParen) {
// Snowflake: RETURNS TABLE(col1 TYPE, col2 TYPE)
self.skip(); // consume (
let mut cols = Vec::new();
loop {
let col_name = self.expect_identifier()?;
let col_type = self.parse_data_type()?;
cols.push(format!(
"{} {}",
col_name,
self.data_type_to_string(&col_type)
));
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
returns_table_body = Some(format!("TABLE ({})", cols.join(", ")));
} else {
// TSQL: RETURNS TABLE AS RETURN ...
return_type = Some(DataType::Custom {
name: "TABLE".to_string(),
});
}
} else {
// Use parse_function_return_type to preserve original type names like 'integer'
return_type = Some(self.parse_function_return_type()?);
}
}
let mut deterministic = None;
let mut returns_null_on_null_input = None;
let mut strict = false;
let mut security = None;
let mut body = None;
let mut set_options: Vec<FunctionSetOption> = Vec::new();
let mut property_order: Vec<FunctionPropertyKind> = Vec::new();
let mut using_resources: Vec<FunctionUsingResource> = Vec::new();
let mut options: Vec<Expression> = Vec::new();
let mut environment: Vec<Expression> = Vec::new();
let mut handler: Option<String> = None;
let mut handler_uses_eq = false;
let mut runtime_version: Option<String> = None;
let mut packages: Option<Vec<String>> = None;
let mut parameter_style: Option<String> = None;
// Parse function options
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
if self.check(TokenType::Returns)
&& self.current + 1 < self.tokens.len()
&& self.tokens[self.current + 1].token_type == TokenType::Null
{
// RETURNS NULL ON NULL INPUT
self.skip(); // consume RETURNS
self.skip(); // consume NULL
self.match_token(TokenType::On);
self.match_token(TokenType::Null);
self.match_token(TokenType::Input);
returns_null_on_null_input = Some(true);
if !property_order.contains(&FunctionPropertyKind::NullInput) {
property_order.push(FunctionPropertyKind::NullInput);
}
} else if self.match_token(TokenType::Returns) {
// RETURNS can come after LANGUAGE
return_type = Some(self.parse_data_type()?);
} else if self.match_token(TokenType::Language) {
// Language can be SQL, PLPGSQL, PYTHON, etc.
language = Some(self.expect_identifier_or_keyword()?);
if !property_order.contains(&FunctionPropertyKind::Language) {
property_order.push(FunctionPropertyKind::Language);
}
} else if self.match_token(TokenType::Not) && self.match_identifier("DETERMINISTIC") {
deterministic = Some(false);
if !property_order.contains(&FunctionPropertyKind::Determinism) {
property_order.push(FunctionPropertyKind::Determinism);
}
} else if self.match_identifier("DETERMINISTIC") {
deterministic = Some(true);
if !property_order.contains(&FunctionPropertyKind::Determinism) {
property_order.push(FunctionPropertyKind::Determinism);
}
} else if self.match_identifier("IMMUTABLE") {
deterministic = Some(true);
if !property_order.contains(&FunctionPropertyKind::Determinism) {
property_order.push(FunctionPropertyKind::Determinism);
}
} else if self.match_identifier("STABLE") || self.match_identifier("VOLATILE") {
deterministic = Some(false);
if !property_order.contains(&FunctionPropertyKind::Determinism) {
property_order.push(FunctionPropertyKind::Determinism);
}
} else if self.match_identifier("STRICT") {
returns_null_on_null_input = Some(true);
strict = true;
if !property_order.contains(&FunctionPropertyKind::NullInput) {
property_order.push(FunctionPropertyKind::NullInput);
}
} else if self.match_identifier("CALLED") {
self.match_token(TokenType::On);
self.match_token(TokenType::Null);
self.match_token(TokenType::Input);
returns_null_on_null_input = Some(false);
if !property_order.contains(&FunctionPropertyKind::NullInput) {
property_order.push(FunctionPropertyKind::NullInput);
}
} else if self.match_identifier("SECURITY") {
if self.match_identifier("DEFINER") {
security = Some(FunctionSecurity::Definer);
} else if self.match_identifier("INVOKER") {
security = Some(FunctionSecurity::Invoker);
}
if !property_order.contains(&FunctionPropertyKind::Security) {
property_order.push(FunctionPropertyKind::Security);
}
} else if self.match_identifier("CONTAINS") {
// CONTAINS SQL
self.match_identifier("SQL");
sql_data_access = Some(SqlDataAccess::ContainsSql);
if !property_order.contains(&FunctionPropertyKind::SqlDataAccess) {
property_order.push(FunctionPropertyKind::SqlDataAccess);
}
} else if self.match_identifier("READS") {
// READS SQL DATA
self.match_identifier("SQL");
self.match_identifier("DATA");
sql_data_access = Some(SqlDataAccess::ReadsSqlData);
if !property_order.contains(&FunctionPropertyKind::SqlDataAccess) {
property_order.push(FunctionPropertyKind::SqlDataAccess);
}
} else if self.match_identifier("MODIFIES") {
// MODIFIES SQL DATA
self.match_identifier("SQL");
self.match_identifier("DATA");
sql_data_access = Some(SqlDataAccess::ModifiesSqlData);
if !property_order.contains(&FunctionPropertyKind::SqlDataAccess) {
property_order.push(FunctionPropertyKind::SqlDataAccess);
}
} else if self.match_token(TokenType::No) && self.match_identifier("SQL") {
// NO SQL
sql_data_access = Some(SqlDataAccess::NoSql);
if !property_order.contains(&FunctionPropertyKind::SqlDataAccess) {
property_order.push(FunctionPropertyKind::SqlDataAccess);
}
} else if self.match_token(TokenType::Set) {
// PostgreSQL: SET key = value / SET key TO value / SET key FROM CURRENT
let opt_name = self.expect_identifier_or_keyword()?;
let value = if self.match_token(TokenType::From) {
// SET key FROM CURRENT
if !self.match_token(TokenType::Current) {
return Err(self.parse_error("Expected CURRENT after FROM in SET option"));
}
FunctionSetValue::FromCurrent
} else {
// SET key = value or SET key TO value
let use_to = self.match_token(TokenType::To);
if !use_to && !self.match_token(TokenType::Eq) {
return Err(self.parse_error("Expected = or TO after SET key"));
}
// Value can be a string literal or identifier
let val = if self.check(TokenType::String) {
let tok = self.advance();
format!("'{}'", tok.text)
} else {
self.expect_identifier_or_keyword()?
};
FunctionSetValue::Value { value: val, use_to }
};
set_options.push(FunctionSetOption {
name: opt_name,
value,
});
if !property_order.contains(&FunctionPropertyKind::Set) {
property_order.push(FunctionPropertyKind::Set);
}
} else if self.match_token(TokenType::As) {
// Parse function body: AS RETURN x, AS $$ ... $$, AS BEGIN ... END, AS 'body'
if !property_order.contains(&FunctionPropertyKind::As) {
property_order.push(FunctionPropertyKind::As);
}
if self.match_identifier("RETURN") {
// AS RETURN expression (or SELECT statement for TSQL TVFs)
let expr = if self.check(TokenType::Select) || self.check(TokenType::With) {
// TSQL: AS RETURN SELECT ... for table-valued functions
self.parse_statement()?
} else {
self.parse_expression()?
};
body = Some(FunctionBody::Return(expr));
} else if self.check(TokenType::Select) || self.check(TokenType::With) {
// TSQL: AS SELECT ... for table-valued functions (without RETURN keyword)
let stmt = self.parse_statement()?;
body = Some(FunctionBody::Expression(stmt));
} else if self.check(TokenType::DollarString) {
let tok = self.advance();
// Parse the dollar string token to extract tag and content
let (tag, content) = crate::tokens::parse_dollar_string_token(&tok.text);
body = Some(FunctionBody::DollarQuoted { content, tag });
} else if self.check(TokenType::String) {
let tok = self.advance();
body = Some(FunctionBody::StringLiteral(tok.text.clone()));
} else if self.match_token(TokenType::Begin) {
// Parse BEGIN...END block
let mut block_content = String::new();
let mut depth = 1;
while depth > 0 && !self.is_at_end() {
let tok = self.advance();
if tok.token_type == TokenType::Begin {
depth += 1;
} else if tok.token_type == TokenType::End {
depth -= 1;
if depth == 0 {
break;
}
}
block_content.push_str(&tok.text);
block_content.push(' ');
}
body = Some(FunctionBody::Block(block_content.trim().to_string()));
} else if self.check(TokenType::Table) {
// DuckDB: AS TABLE SELECT ... (table macro)
self.advance(); // consume TABLE
if return_type.is_none() {
return_type = Some(DataType::Custom {
name: "TABLE".to_string(),
});
}
let stmt = self.parse_statement()?;
body = Some(FunctionBody::Return(stmt));
} else {
// Expression-based body
let expr = self.parse_expression()?;
body = Some(FunctionBody::Expression(expr));
}
} else if self.match_identifier("RETURN") {
// RETURN expression (or SELECT statement for TSQL TVFs)
let expr = if self.check(TokenType::Select) || self.check(TokenType::With) {
self.parse_statement()?
} else {
self.parse_expression()?
};
body = Some(FunctionBody::Return(expr));
} else if self.match_token(TokenType::Using) {
while self.match_identifier("JAR")
|| self.match_identifier("FILE")
|| self.match_identifier("ARCHIVE")
{
let kind = self.previous().text.to_ascii_uppercase();
let uri = self.expect_string()?;
using_resources.push(FunctionUsingResource { kind, uri });
let _ = self.match_token(TokenType::Comma);
}
if !property_order.contains(&FunctionPropertyKind::Using) {
property_order.push(FunctionPropertyKind::Using);
}
} else if self.match_identifier("EXTERNAL") {
self.match_identifier("NAME");
let ext_name = if self.check(TokenType::String) {
let tok = self.advance();
tok.text.trim_matches('\'').to_string()
} else {
self.expect_identifier()?
};
body = Some(FunctionBody::External(ext_name));
} else if self.match_identifier("OPTIONS") {
// BigQuery: OPTIONS (key=value, ...) - track in property_order
let parsed_options = self.parse_options_list()?;
options.extend(parsed_options);
if !property_order.contains(&FunctionPropertyKind::Options) {
property_order.push(FunctionPropertyKind::Options);
}
} else if self.match_identifier("ENVIRONMENT") {
// Databricks: ENVIRONMENT (dependencies = '...', environment_version = '...')
let parsed_env = self.parse_environment_list()?;
environment.extend(parsed_env);
if !property_order.contains(&FunctionPropertyKind::Environment) {
property_order.push(FunctionPropertyKind::Environment);
}
} else if self.match_identifier("HANDLER") {
// Databricks: HANDLER 'handler_function'
handler_uses_eq = self.match_token(TokenType::Eq);
if self.check(TokenType::String) {
let tok = self.advance();
handler = Some(tok.text.clone());
}
if !property_order.contains(&FunctionPropertyKind::Handler) {
property_order.push(FunctionPropertyKind::Handler);
}
} else if self.match_identifier("RUNTIME_VERSION") {
let _ = self.match_token(TokenType::Eq);
if self.check(TokenType::String)
|| self.check(TokenType::Number)
|| self.is_identifier_or_keyword_token()
{
runtime_version = Some(self.advance().text.clone());
}
if !property_order.contains(&FunctionPropertyKind::RuntimeVersion) {
property_order.push(FunctionPropertyKind::RuntimeVersion);
}
} else if self.match_identifier("PACKAGES") {
let _ = self.match_token(TokenType::Eq);
let mut parsed_packages = Vec::new();
if self.match_token(TokenType::LParen) {
while !self.is_at_end() && !self.check(TokenType::RParen) {
if self.check(TokenType::String)
|| self.check(TokenType::Identifier)
|| self.check(TokenType::Var)
|| self.check(TokenType::Number)
|| self.check_keyword()
{
parsed_packages.push(self.advance().text.clone());
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
}
packages = Some(parsed_packages);
if !property_order.contains(&FunctionPropertyKind::Packages) {
property_order.push(FunctionPropertyKind::Packages);
}
} else if self.match_text_seq(&["PARAMETER", "STYLE"]) {
// Databricks: PARAMETER STYLE PANDAS
let style = self.expect_identifier_or_keyword()?;
parameter_style = Some(style.to_ascii_uppercase());
if !property_order.contains(&FunctionPropertyKind::ParameterStyle) {
property_order.push(FunctionPropertyKind::ParameterStyle);
}
} else if self.check_identifier("SQL")
&& self.current + 1 < self.tokens.len()
&& self.tokens[self.current + 1]
.text
.eq_ignore_ascii_case("SECURITY")
{
// SQL SECURITY DEFINER/INVOKER
self.skip(); // consume SQL
self.skip(); // consume SECURITY
if self.match_identifier("DEFINER") {
security = Some(FunctionSecurity::Definer);
} else if self.match_identifier("INVOKER") {
security = Some(FunctionSecurity::Invoker);
}
if !property_order.contains(&FunctionPropertyKind::Security) {
property_order.push(FunctionPropertyKind::Security);
}
} else if self.check(TokenType::Select) || self.check(TokenType::With) {
// Bare SELECT/WITH body (without AS keyword) - e.g., MySQL
let stmt = self.parse_statement()?;
body = Some(FunctionBody::Expression(stmt));
if !property_order.contains(&FunctionPropertyKind::As) {
property_order.push(FunctionPropertyKind::As);
}
} else {
break;
}
}
// BigQuery: OPTIONS (key=value, ...) can also appear after AS body (legacy position)
if options.is_empty() && self.match_identifier("OPTIONS") {
let parsed_options = self.parse_options_list()?;
options.extend(parsed_options);
if !property_order.contains(&FunctionPropertyKind::Options) {
property_order.push(FunctionPropertyKind::Options);
}
}
Ok(Expression::CreateFunction(Box::new(CreateFunction {
name,
parameters,
return_type,
body,
or_replace,
or_alter,
if_not_exists,
temporary,
language,
deterministic,
returns_null_on_null_input,
security,
has_parens,
sql_data_access,
returns_table_body,
language_first,
set_options,
strict,
options,
is_table_function,
property_order,
using_resources,
environment,
handler,
handler_uses_eq,
runtime_version,
packages,
parameter_style,
})))
}
/// Parse function parameters
fn parse_function_parameters(&mut self) -> Result<Vec<FunctionParameter>> {
let mut params = Vec::new();
if self.check(TokenType::RParen) {
return Ok(params);
}
loop {
let mut mode = None;
let mut mode_text: Option<String> = None;
// Check for parameter mode (IN, OUT, INOUT, VARIADIC)
// Note: OUT, INOUT, VARIADIC are tokenized as Var, not as dedicated keywords
if self.match_token(TokenType::In) {
// IN or IN OUT
if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("OUT") {
let out_text = self.advance().text.clone(); // consume OUT
mode_text = Some(format!("IN {}", out_text));
mode = Some(ParameterMode::InOut);
} else {
mode_text = Some("IN".to_string());
mode = Some(ParameterMode::In);
}
} else if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("OUT") {
let text = self.advance().text.clone();
mode_text = Some(text);
mode = Some(ParameterMode::Out);
} else if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("INOUT") {
let text = self.advance().text.clone();
mode_text = Some(text);
mode = Some(ParameterMode::InOut);
} else if self.check(TokenType::Var)
&& self.peek().text.eq_ignore_ascii_case("VARIADIC")
{
let text = self.advance().text.clone();
mode_text = Some(text);
mode = Some(ParameterMode::Variadic);
}
// Try to parse name and type
// After a mode keyword (VARIADIC, OUT, etc.), the next thing could be:
// - a type directly (e.g., VARIADIC INT[], OUT INT)
// - a name then a type (e.g., VARIADIC a INT[], OUT result INT)
//
// Strategy: use backtracking. Save position, try parsing as data type.
// If the result is followed by , or ) or DEFAULT, it was a type-only param.
// Otherwise, restore position and parse as name + type.
let (name, data_type) = if mode.is_some() {
let saved = self.current;
// Try parsing as a data type directly
let type_result = self.parse_data_type();
if let Ok(dt) = type_result {
if self.check(TokenType::Comma)
|| self.check(TokenType::RParen)
|| self.check(TokenType::Default)
|| self.check(TokenType::Eq)
{
// Successfully parsed as a type-only parameter
(None, dt)
} else {
// Not followed by comma/rparen — restore and parse as name + type
self.current = saved;
let first_ident =
if self.check(TokenType::Input) || self.check(TokenType::Output) {
let token = self.advance();
Identifier {
name: token.text,
quoted: false,
trailing_comments: Vec::new(),
span: None,
}
} else {
self.expect_identifier_with_quoted()?
};
self.match_token(TokenType::As);
let dt = self.parse_data_type()?;
(Some(first_ident), dt)
}
} else {
// Type parse failed — restore and try as name + type
self.current = saved;
let first_ident =
if self.check(TokenType::Input) || self.check(TokenType::Output) {
let token = self.advance();
Identifier {
name: token.text,
quoted: false,
trailing_comments: Vec::new(),
span: None,
}
} else {
self.expect_identifier_with_quoted()?
};
if self.check(TokenType::Comma)
|| self.check(TokenType::RParen)
|| self.check(TokenType::Default)
{
(None, self.identifier_to_datatype(&first_ident.name)?)
} else {
self.match_token(TokenType::As);
let dt = self.parse_data_type()?;
(Some(first_ident), dt)
}
}
} else {
// No mode keyword — original logic
// Handle keywords like INPUT that may be used as parameter names
let first_ident = if self.check(TokenType::Input) || self.check(TokenType::Output) {
let token = self.advance();
Identifier {
name: token.text,
quoted: false,
trailing_comments: Vec::new(),
span: None,
}
} else {
self.expect_identifier_with_quoted()?
};
// Check if next token is a type or if this was the type
if self.check(TokenType::Comma)
|| self.check(TokenType::RParen)
|| self.check(TokenType::Default)
{
// This was the type, no name
(None, self.identifier_to_datatype(&first_ident.name)?)
} else {
// This was the name, next is type
// TSQL allows: @param AS type (optional AS keyword)
self.match_token(TokenType::As);
let dt = self.parse_data_type()?;
(Some(first_ident), dt)
}
};
let default = if self.match_token(TokenType::Default) || self.match_token(TokenType::Eq)
{
Some(self.parse_expression()?)
} else {
None
};
params.push(FunctionParameter {
name,
data_type,
mode,
default,
mode_text: mode_text.clone(),
});
if !self.match_token(TokenType::Comma) {
break;
}
}
Ok(params)
}
/// Parse TSQL-style unparenthesized procedure parameters
/// Format: @param1 TYPE, @param2 TYPE, ... AS
fn parse_tsql_procedure_params(&mut self) -> Result<Vec<FunctionParameter>> {
let mut params = Vec::new();
loop {
if !self.check(TokenType::Var) {
break;
}
let name = self.advance().text.clone();
// Skip optional AS keyword between name and type
self.match_token(TokenType::As);
let data_type = self.parse_data_type()?;
let default = if self.match_token(TokenType::Default) || self.match_token(TokenType::Eq)
{
Some(self.parse_expression()?)
} else {
None
};
params.push(FunctionParameter {
name: Some(Identifier::new(name)),
data_type,
mode: None,
default,
mode_text: None,
});
if !self.match_token(TokenType::Comma) {
break;
}
}
Ok(params)
}
/// Convert identifier to DataType for function parameters.
/// Preserves the original identifier name to maintain exact type name as written.
/// This matches Python sqlglot's behavior where function parameter types like 'integer'
/// are stored as Identifiers rather than normalized DataTypes.
fn identifier_to_datatype(&self, ident: &str) -> Result<DataType> {
// Always use DataType::Custom to preserve the exact type name as written.
// This is important for identity tests where e.g. 'integer' should not be normalized to 'INT'.
Ok(DataType::Custom {
name: ident.to_string(),
})
}
/// Parse a data type for function RETURNS clause, preserving original type names.
/// For simple type names like 'integer', preserves the original name rather than
/// normalizing to INT. This matches Python sqlglot's behavior.
/// For MySQL, uses standard parse_data_type() to ensure proper type mapping (e.g., VARCHAR -> TEXT).
fn parse_function_return_type(&mut self) -> Result<DataType> {
// MySQL needs standard data type parsing for proper type mapping
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::MySQL)
) {
return self.parse_data_type();
}
// Check if it's a simple identifier that could be a type name
if (self.check(TokenType::Identifier) || self.check(TokenType::Var))
&& !self.check_next(TokenType::LParen) // Not a parameterized type like VARCHAR(10)
&& !self.check_next(TokenType::LBracket)
// Not an array type
{
let type_name = self.advance().text.clone();
// Check if the next token indicates we should use parse_data_type instead
// For complex types, fall through to parse_data_type
return Ok(DataType::Custom { name: type_name });
}
// For complex types, use standard parsing
self.parse_data_type()
}
/// Parse DROP FUNCTION statement
fn parse_drop_function(&mut self) -> Result<Expression> {
self.expect(TokenType::Function)?;
let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
let name = self.parse_table_ref()?;
// Optional parameter types for overloaded functions
let parameters = if self.match_token(TokenType::LParen) {
let mut types = Vec::new();
if !self.check(TokenType::RParen) {
loop {
types.push(self.parse_data_type()?);
if !self.match_token(TokenType::Comma) {
break;
}
}
}
self.expect(TokenType::RParen)?;
Some(types)
} else {
None
};
let cascade = self.match_token(TokenType::Cascade);
if !cascade {
self.match_token(TokenType::Restrict);
}
Ok(Expression::DropFunction(Box::new(DropFunction {
name,
parameters,
if_exists,
cascade,
})))
}
/// Parse CREATE PROCEDURE statement
fn parse_create_procedure(&mut self, or_replace: bool, or_alter: bool) -> Result<Expression> {
// Check if PROC shorthand was used before consuming the token
let use_proc_keyword = self.peek().text.eq_ignore_ascii_case("PROC");
self.expect(TokenType::Procedure)?;
let if_not_exists =
self.match_keywords(&[TokenType::If, TokenType::Not, TokenType::Exists]);
let name = self.parse_table_ref()?;
// Parse parameters (optional parentheses for TSQL)
let (parameters, has_parens) = if self.match_token(TokenType::LParen) {
let params = self.parse_function_parameters()?;
self.expect(TokenType::RParen)?;
(params, true)
} else if self.check(TokenType::Var) && !self.check(TokenType::As) {
// TSQL: CREATE PROCEDURE foo @a INTEGER, @b INTEGER AS ...
// Parameters without parentheses
let params = self.parse_tsql_procedure_params()?;
(params, false)
} else {
(Vec::new(), false)
};
let mut language = None;
let mut security = None;
let mut body = None;
let mut return_type = None;
let mut execute_as = None;
let mut with_options: Vec<String> = Vec::new();
// Parse procedure options
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
if self.match_token(TokenType::Returns) {
// RETURNS type [NOT NULL] (Snowflake)
return_type = Some(self.parse_data_type()?);
// Consume optional NOT NULL / NULL constraint on return type
if self.match_keywords(&[TokenType::Not, TokenType::Null]) {
// NOT NULL — ignore for now, just consume
} else {
self.match_token(TokenType::Null); // optional NULL
}
} else if self.match_identifier("EXECUTE") || self.match_token(TokenType::Execute) {
// EXECUTE AS CALLER/OWNER (Snowflake)
if self.match_token(TokenType::As) {
if self.match_identifier("CALLER") {
execute_as = Some("CALLER".to_string());
} else if self.match_identifier("OWNER") {
execute_as = Some("OWNER".to_string());
} else if self.match_identifier("SELF") {
execute_as = Some("SELF".to_string());
}
}
} else if self.match_token(TokenType::Language) {
// Language can be SQL, PLPGSQL, PYTHON, etc.
language = Some(self.expect_identifier_or_keyword()?);
} else if self.match_identifier("SECURITY") {
if self.match_identifier("DEFINER") {
security = Some(FunctionSecurity::Definer);
} else if self.match_identifier("INVOKER") {
security = Some(FunctionSecurity::Invoker);
}
} else if self.match_token(TokenType::With) {
// TSQL: WITH option1, option2, ... AS body
// Options: ENCRYPTION, RECOMPILE, SCHEMABINDING, NATIVE_COMPILATION,
// EXECUTE AS {OWNER|SELF|CALLER|'username'}
loop {
if self.match_identifier("EXECUTE") || self.match_token(TokenType::Execute) {
// EXECUTE AS {OWNER|SELF|CALLER|'username'}
self.expect(TokenType::As)?;
if self.check(TokenType::String) {
let tok = self.advance();
with_options.push(format!("EXECUTE AS '{}'", tok.text));
} else {
let ident = self.expect_identifier_or_keyword()?;
with_options.push(format!("EXECUTE AS {}", ident.to_ascii_uppercase()));
}
} else {
let opt = self.expect_identifier_or_keyword()?;
with_options.push(opt.to_ascii_uppercase());
}
if !self.match_token(TokenType::Comma) {
break;
}
}
} else if self.match_token(TokenType::As) {
// Parse procedure body
if self.check(TokenType::String) {
// TokenType::String means single-quoted - tokenizer strips quotes
let tok = self.advance();
body = Some(FunctionBody::StringLiteral(tok.text.clone()));
} else if self.check(TokenType::HeredocString) {
// $$...$$ dollar-quoted body (Snowflake/PostgreSQL)
let tok = self.advance();
body = Some(FunctionBody::Block(tok.text.clone()));
} else if self.match_token(TokenType::Begin) {
// Parse BEGIN ... END block as a list of statements
let mut statements = Vec::new();
while !self.check(TokenType::End) && !self.is_at_end() {
// Skip optional semicolons between statements
while self.match_token(TokenType::Semicolon) {}
if self.check(TokenType::End) {
break;
}
statements.push(self.parse_statement()?);
// Skip optional semicolon after statement
self.match_token(TokenType::Semicolon);
}
self.expect(TokenType::End)?;
body = Some(FunctionBody::Statements(statements));
} else {
// TSQL: AS <statement> (e.g., AS SELECT 1)
let stmt = self.parse_statement()?;
body = Some(FunctionBody::Expression(stmt));
}
} else if self.check(TokenType::Begin)
&& matches!(
self.config.dialect,
Some(crate::dialects::DialectType::MySQL)
)
{
// MySQL: BEGIN...END without AS keyword
// Collect entire block as raw text since MySQL procedural
// constructs (IF/SIGNAL/WHILE/etc.) aren't parseable as statements
let start = self.current;
self.skip(); // consume BEGIN
let mut depth = 1;
while !self.is_at_end() && depth > 0 {
if self.check(TokenType::Begin) {
depth += 1;
} else if self.check(TokenType::End) {
// Check if this END is a structured control-flow END marker
// (END IF, END WHILE, END LOOP, END CASE, END REPEAT, END FOR),
// which should NOT decrement the BEGIN/END counter.
let is_structured_end = self
.peek_nth(1)
.map(|t| {
matches!(
t.token_type,
TokenType::If | TokenType::Case | TokenType::For
) || matches!(
t.text.to_ascii_uppercase().as_str(),
"IF" | "WHILE" | "LOOP" | "REPEAT" | "CASE" | "FOR"
)
})
.unwrap_or(false);
if !is_structured_end {
depth -= 1;
if depth == 0 {
break;
}
}
}
self.skip();
}
let raw = self.tokens_to_sql(start, self.current);
self.expect(TokenType::End)?;
// Consume optional label after END (e.g., END myproc)
if self.is_identifier_token() || self.check(TokenType::Var) {
self.skip();
}
body = Some(FunctionBody::RawBlock(format!("{} END", raw)));
break;
} else {
break;
}
}
Ok(Expression::CreateProcedure(Box::new(CreateProcedure {
name,
parameters,
body,
or_replace,
or_alter,
if_not_exists,
language,
security,
return_type,
execute_as,
with_options,
has_parens,
use_proc_keyword,
})))
}
/// Parse DROP PROCEDURE statement
fn parse_drop_procedure(&mut self) -> Result<Expression> {
self.expect(TokenType::Procedure)?;
let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
let name = self.parse_table_ref()?;
let parameters = if self.match_token(TokenType::LParen) {
let mut types = Vec::new();
if !self.check(TokenType::RParen) {
loop {
types.push(self.parse_data_type()?);
if !self.match_token(TokenType::Comma) {
break;
}
}
}
self.expect(TokenType::RParen)?;
Some(types)
} else {
None
};
let cascade = self.match_token(TokenType::Cascade);
if !cascade {
self.match_token(TokenType::Restrict);
}
Ok(Expression::DropProcedure(Box::new(DropProcedure {
name,
parameters,
if_exists,
cascade,
})))
}
/// Parse CREATE SEQUENCE statement
fn parse_create_sequence(&mut self, temporary: bool, or_replace: bool) -> Result<Expression> {
self.expect(TokenType::Sequence)?;
let if_not_exists =
self.match_keywords(&[TokenType::If, TokenType::Not, TokenType::Exists]);
let name = self.parse_table_ref()?;
let mut seq = CreateSequence {
name,
if_not_exists,
temporary,
or_replace,
as_type: None,
increment: None,
minvalue: None,
maxvalue: None,
start: None,
cache: None,
cycle: false,
owned_by: None,
owned_by_none: false,
order: None,
comment: None,
sharing: None,
scale_modifier: None,
shard_modifier: None,
property_order: Vec::new(),
};
// Parse optional AS <type> clause (e.g., AS SMALLINT, AS BIGINT)
if self.match_token(TokenType::As) {
seq.as_type = Some(self.parse_data_type()?);
}
// Parse sequence options
// Handle optional WITH keyword before options (Snowflake: WITH START = n INCREMENT = n)
self.match_token(TokenType::With);
loop {
// Skip optional commas between options (Snowflake uses comma-separated options)
self.match_token(TokenType::Comma);
if self.is_at_end() || self.check(TokenType::Semicolon) {
break;
}
if self.match_token(TokenType::Increment) || self.match_identifier("INCREMENT") {
self.match_token(TokenType::By);
self.match_token(TokenType::Eq); // Snowflake uses = instead of BY
seq.increment = Some(self.parse_signed_integer()?);
seq.property_order.push(SeqPropKind::Increment);
} else if self.match_token(TokenType::Minvalue) {
seq.minvalue = Some(SequenceBound::Value(self.parse_signed_integer()?));
seq.property_order.push(SeqPropKind::Minvalue);
} else if self.match_keywords(&[TokenType::No, TokenType::Minvalue]) {
seq.minvalue = Some(SequenceBound::None);
seq.property_order.push(SeqPropKind::Minvalue);
} else if self.match_identifier("NOMINVALUE") {
seq.minvalue = Some(SequenceBound::None);
seq.property_order.push(SeqPropKind::NoMinvalueWord);
} else if self.match_token(TokenType::Maxvalue) {
seq.maxvalue = Some(SequenceBound::Value(self.parse_signed_integer()?));
seq.property_order.push(SeqPropKind::Maxvalue);
} else if self.match_keywords(&[TokenType::No, TokenType::Maxvalue]) {
seq.maxvalue = Some(SequenceBound::None);
seq.property_order.push(SeqPropKind::Maxvalue);
} else if self.match_identifier("NOMAXVALUE") {
seq.maxvalue = Some(SequenceBound::None);
seq.property_order.push(SeqPropKind::NoMaxvalueWord);
} else if self.match_token(TokenType::Start) {
self.match_token(TokenType::With);
self.match_token(TokenType::Eq); // Snowflake uses = instead of WITH
seq.start = Some(self.parse_signed_integer()?);
seq.property_order.push(SeqPropKind::Start);
} else if self.match_token(TokenType::Cache) {
seq.cache = Some(self.parse_signed_integer()?);
seq.property_order.push(SeqPropKind::Cache);
} else if self.match_identifier("NOCACHE") {
// Oracle: NOCACHE (single word)
seq.property_order.push(SeqPropKind::NoCacheWord);
} else if self.match_token(TokenType::Cycle) {
seq.cycle = true;
seq.property_order.push(SeqPropKind::Cycle);
} else if self.match_token(TokenType::NoCycle) {
// NOCYCLE keyword token - preserve as single word
seq.cycle = false;
seq.property_order.push(SeqPropKind::NoCycleWord);
} else if self.match_token(TokenType::No) {
// Two-word NO forms
if self.match_token(TokenType::Cycle) {
seq.cycle = false;
seq.property_order.push(SeqPropKind::NoCycle);
} else if self.match_token(TokenType::Cache) || self.match_identifier("CACHE") {
seq.property_order.push(SeqPropKind::NoCache);
} else if self.match_token(TokenType::Minvalue) {
seq.minvalue = Some(SequenceBound::None);
seq.property_order.push(SeqPropKind::Minvalue);
} else if self.match_token(TokenType::Maxvalue) {
seq.maxvalue = Some(SequenceBound::None);
seq.property_order.push(SeqPropKind::Maxvalue);
} else {
// Unexpected token after NO
break;
}
} else if self.match_token(TokenType::Owned) {
self.expect(TokenType::By)?;
if self.match_identifier("NONE") {
seq.owned_by = None;
seq.owned_by_none = true;
} else {
seq.owned_by = Some(self.parse_table_ref()?);
}
seq.property_order.push(SeqPropKind::OwnedBy);
} else if self.match_token(TokenType::Order) {
// Snowflake/Oracle: ORDER option
seq.order = Some(true);
seq.property_order.push(SeqPropKind::Order);
} else if self.match_identifier("NOORDER") {
// Snowflake/Oracle: NOORDER option
seq.order = Some(false);
seq.property_order.push(SeqPropKind::NoOrder);
} else if self.match_token(TokenType::Comment) || self.match_identifier("COMMENT") {
// Snowflake: COMMENT = 'value'
self.expect(TokenType::Eq)?;
let comment_val = self.expect(TokenType::String)?;
seq.comment = Some(comment_val.text.clone());
seq.property_order.push(SeqPropKind::Comment);
} else if self.match_identifier("SHARING") {
// Oracle: SHARING=value
self.expect(TokenType::Eq)?;
let val = self.expect_identifier_or_keyword()?;
seq.sharing = Some(val);
seq.property_order.push(SeqPropKind::Sharing);
} else if self.match_identifier("NOKEEP") {
seq.property_order.push(SeqPropKind::NoKeep);
} else if self.match_token(TokenType::Keep) || self.match_identifier("KEEP") {
seq.property_order.push(SeqPropKind::Keep);
} else if self.match_identifier("SCALE") {
let modifier = if self.match_identifier("EXTEND") {
"EXTEND".to_string()
} else if self.match_identifier("NOEXTEND") {
"NOEXTEND".to_string()
} else {
String::new()
};
seq.scale_modifier = Some(modifier);
seq.property_order.push(SeqPropKind::Scale);
} else if self.match_identifier("NOSCALE") {
seq.property_order.push(SeqPropKind::NoScale);
} else if self.match_identifier("SHARD") {
let modifier = if self.match_identifier("EXTEND") {
"EXTEND".to_string()
} else if self.match_identifier("NOEXTEND") {
"NOEXTEND".to_string()
} else {
String::new()
};
seq.shard_modifier = Some(modifier);
seq.property_order.push(SeqPropKind::Shard);
} else if self.match_identifier("NOSHARD") {
seq.property_order.push(SeqPropKind::NoShard);
} else if self.match_identifier("SESSION") {
seq.property_order.push(SeqPropKind::Session);
} else if self.match_identifier("GLOBAL") {
seq.property_order.push(SeqPropKind::Global);
} else {
break;
}
}
Ok(Expression::CreateSequence(Box::new(seq)))
}
/// Parse a signed integer (positive or negative)
fn parse_signed_integer(&mut self) -> Result<i64> {
let negative = self.match_token(TokenType::Dash);
let tok = self.expect(TokenType::Number)?;
let value: i64 = tok
.text
.parse()
.map_err(|_| self.parse_error(format!("Invalid integer: {}", tok.text)))?;
Ok(if negative { -value } else { value })
}
/// Parse DROP SEQUENCE statement
fn parse_drop_sequence(&mut self) -> Result<Expression> {
self.expect(TokenType::Sequence)?;
let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
let name = self.parse_table_ref()?;
let cascade = self.match_token(TokenType::Cascade);
if !cascade {
self.match_token(TokenType::Restrict);
}
Ok(Expression::DropSequence(Box::new(DropSequence {
name,
if_exists,
cascade,
})))
}
/// Parse ALTER SEQUENCE statement
fn parse_alter_sequence(&mut self) -> Result<Expression> {
self.expect(TokenType::Sequence)?;
let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
let name = self.parse_table_ref()?;
let mut seq = AlterSequence {
name,
if_exists,
increment: None,
minvalue: None,
maxvalue: None,
start: None,
restart: None,
cache: None,
cycle: None,
owned_by: None,
};
// Parse sequence options
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
if self.match_token(TokenType::Increment) || self.match_identifier("INCREMENT") {
self.match_token(TokenType::By);
seq.increment = Some(self.parse_signed_integer()?);
} else if self.match_token(TokenType::Minvalue) {
seq.minvalue = Some(SequenceBound::Value(self.parse_signed_integer()?));
} else if self.match_keywords(&[TokenType::No, TokenType::Minvalue]) {
seq.minvalue = Some(SequenceBound::None);
} else if self.match_token(TokenType::Maxvalue) {
seq.maxvalue = Some(SequenceBound::Value(self.parse_signed_integer()?));
} else if self.match_keywords(&[TokenType::No, TokenType::Maxvalue]) {
seq.maxvalue = Some(SequenceBound::None);
} else if self.match_token(TokenType::Start) {
self.match_token(TokenType::With);
seq.start = Some(self.parse_signed_integer()?);
} else if self.match_token(TokenType::Restart) {
if self.match_token(TokenType::With)
|| self.check(TokenType::Number)
|| self.check(TokenType::Dash)
{
seq.restart = Some(Some(self.parse_signed_integer()?));
} else {
seq.restart = Some(None);
}
} else if self.match_token(TokenType::Cache) {
seq.cache = Some(self.parse_signed_integer()?);
} else if self.match_token(TokenType::Cycle) {
seq.cycle = Some(true);
} else if self.match_token(TokenType::NoCycle) {
seq.cycle = Some(false);
} else if self.match_token(TokenType::Owned) {
self.expect(TokenType::By)?;
if self.match_identifier("NONE") {
seq.owned_by = Some(None);
} else {
seq.owned_by = Some(Some(self.parse_table_ref()?));
}
} else {
break;
}
}
Ok(Expression::AlterSequence(Box::new(seq)))
}
/// Parse CREATE TRIGGER statement
fn parse_create_trigger(
&mut self,
or_replace: bool,
or_alter: bool,
constraint: bool,
create_pos: usize,
) -> Result<Expression> {
self.expect(TokenType::Trigger)?;
let name = self.expect_identifier_with_quoted()?;
// TSQL triggers: CREATE TRIGGER name ON table AFTER INSERT AS BEGIN...END
// These have ON before timing, unlike standard triggers.
// Fall back to Command for these (matches Python sqlglot behavior).
if self.check(TokenType::On) && !constraint {
self.current = create_pos;
return self.fallback_to_command(create_pos);
}
// Parse timing (BEFORE, AFTER, INSTEAD OF)
let timing = if self.match_token(TokenType::Before) {
TriggerTiming::Before
} else if self.match_token(TokenType::After) {
TriggerTiming::After
} else if self.match_token(TokenType::Instead) {
self.expect(TokenType::Of)?;
TriggerTiming::InsteadOf
} else {
// Fall back to Command for unknown trigger syntax
self.current = create_pos;
return self.fallback_to_command(create_pos);
};
// Parse events
let mut events = Vec::new();
loop {
if self.match_token(TokenType::Insert) {
events.push(TriggerEvent::Insert);
} else if self.match_token(TokenType::Update) {
if self.match_token(TokenType::Of) {
let mut cols = Vec::new();
loop {
cols.push(Identifier::new(self.expect_identifier()?));
if !self.match_token(TokenType::Comma) {
break;
}
}
events.push(TriggerEvent::Update(Some(cols)));
} else {
events.push(TriggerEvent::Update(None));
}
} else if self.match_token(TokenType::Delete) {
events.push(TriggerEvent::Delete);
} else if self.match_token(TokenType::Truncate) {
events.push(TriggerEvent::Truncate);
} else {
break;
}
if !self.match_token(TokenType::Or) {
break;
}
}
self.expect(TokenType::On)?;
let table = self.parse_table_ref()?;
// Parse optional REFERENCING clause (for non-constraint triggers)
let referencing = if !constraint && self.match_token(TokenType::Referencing) {
let mut ref_clause = TriggerReferencing {
old_table: None,
new_table: None,
old_row: None,
new_row: None,
};
while self.match_token(TokenType::Old) || self.match_token(TokenType::New) {
let is_old = self.previous().token_type == TokenType::Old;
let is_table = self.match_token(TokenType::Table);
let _is_row = !is_table && self.match_token(TokenType::Row);
self.match_token(TokenType::As);
let alias = Identifier::new(self.expect_identifier()?);
if is_old {
if is_table {
ref_clause.old_table = Some(alias);
} else {
ref_clause.old_row = Some(alias);
}
} else {
if is_table {
ref_clause.new_table = Some(alias);
} else {
ref_clause.new_row = Some(alias);
}
}
}
Some(ref_clause)
} else {
None
};
// Parse deferrable options for constraint triggers (comes before FOR EACH ROW in PostgreSQL)
let mut deferrable = None;
let mut initially_deferred = None;
if constraint {
if self.match_identifier("DEFERRABLE") {
deferrable = Some(true);
} else if self.match_keywords(&[TokenType::Not, TokenType::Identifier]) {
// NOT DEFERRABLE
deferrable = Some(false);
}
if self.match_identifier("INITIALLY") {
if self.match_identifier("DEFERRED") {
initially_deferred = Some(true);
} else if self.match_identifier("IMMEDIATE") {
initially_deferred = Some(false);
}
}
}
// Parse FOR EACH ROW/STATEMENT (optional)
let for_each = if self.match_token(TokenType::For) {
self.match_token(TokenType::Each);
if self.match_token(TokenType::Row) {
Some(TriggerForEach::Row)
} else if self.match_token(TokenType::Statement) {
Some(TriggerForEach::Statement)
} else {
Some(TriggerForEach::Row)
}
} else {
None
};
// Parse optional WHEN clause (parentheses are optional, e.g. SQLite)
let (when, when_paren) = if self.match_token(TokenType::When) {
let has_paren = self.match_token(TokenType::LParen);
let expr = self.parse_expression()?;
if has_paren {
self.expect(TokenType::RParen)?;
}
(Some(expr), has_paren)
} else {
(None, false)
};
// Parse trigger body
let body = if self.match_token(TokenType::Execute) {
self.match_token(TokenType::Function);
self.match_token(TokenType::Procedure);
let func_name = self.parse_table_ref()?;
self.expect(TokenType::LParen)?;
let mut args = Vec::new();
if !self.check(TokenType::RParen) {
loop {
args.push(self.parse_expression()?);
if !self.match_token(TokenType::Comma) {
break;
}
}
}
self.expect(TokenType::RParen)?;
TriggerBody::Execute {
function: func_name,
args,
}
} else if self.match_token(TokenType::Begin) {
// Record start position (first token after BEGIN)
let body_start = if !self.is_at_end() {
self.tokens[self.current].span.start
} else {
0
};
let mut depth = 1;
while depth > 0 && !self.is_at_end() {
let tok = self.advance();
if tok.token_type == TokenType::Begin {
depth += 1;
} else if tok.token_type == TokenType::End {
depth -= 1;
if depth == 0 {
break;
}
}
}
// Extract verbatim text from source if available
let block_content = if let Some(ref source) = self.source {
// End position is the start of the END token
let body_end = if self.current > 0 {
self.tokens[self.current - 1].span.start
} else {
body_start
};
source[body_start..body_end].trim().to_string()
} else {
// Fallback: no source available
String::new()
};
TriggerBody::Block(block_content)
} else {
return Err(self.parse_error("Expected EXECUTE or BEGIN in trigger body"));
};
Ok(Expression::CreateTrigger(Box::new(CreateTrigger {
name,
table,
timing,
events,
for_each,
when,
when_paren,
body,
or_replace,
or_alter,
constraint,
deferrable,
initially_deferred,
referencing,
})))
}
/// Parse DROP TRIGGER statement
fn parse_drop_trigger(&mut self) -> Result<Expression> {
self.expect(TokenType::Trigger)?;
let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
let name = Identifier::new(self.expect_identifier()?);
let table = if self.match_token(TokenType::On) {
Some(self.parse_table_ref()?)
} else {
None
};
let cascade = self.match_token(TokenType::Cascade);
if !cascade {
self.match_token(TokenType::Restrict);
}
Ok(Expression::DropTrigger(Box::new(DropTrigger {
name,
table,
if_exists,
cascade,
})))
}
/// Parse CREATE TYPE statement
fn parse_create_type(&mut self) -> Result<Expression> {
self.expect(TokenType::Type)?;
let if_not_exists =
self.match_keywords(&[TokenType::If, TokenType::Not, TokenType::Exists]);
let name = self.parse_table_ref()?;
self.expect(TokenType::As)?;
let definition = if self.match_token(TokenType::Enum) {
// ENUM type
self.expect(TokenType::LParen)?;
let mut values = Vec::new();
loop {
let tok = self.expect(TokenType::String)?;
values.push(tok.text.trim_matches('\'').to_string());
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
TypeDefinition::Enum(values)
} else if self.match_token(TokenType::LParen) {
// Composite type
let mut attrs = Vec::new();
loop {
let attr_name = Identifier::new(self.expect_identifier()?);
let data_type = self.parse_data_type()?;
let collate = if self.match_identifier("COLLATE") {
Some(Identifier::new(self.expect_identifier()?))
} else {
None
};
attrs.push(TypeAttribute {
name: attr_name,
data_type,
collate,
});
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
TypeDefinition::Composite(attrs)
} else if self.match_token(TokenType::Range) {
// Range type
self.expect(TokenType::LParen)?;
self.match_identifier("SUBTYPE");
self.match_token(TokenType::Eq);
let subtype = self.parse_data_type()?;
let mut subtype_diff = None;
let mut canonical = None;
while self.match_token(TokenType::Comma) {
if self.match_identifier("SUBTYPE_DIFF") {
self.match_token(TokenType::Eq);
subtype_diff = Some(self.expect_identifier()?);
} else if self.match_identifier("CANONICAL") {
self.match_token(TokenType::Eq);
canonical = Some(self.expect_identifier()?);
}
}
self.expect(TokenType::RParen)?;
TypeDefinition::Range {
subtype,
subtype_diff,
canonical,
}
} else {
return Err(
self.parse_error("Expected ENUM, composite type definition, or RANGE after AS")
);
};
Ok(Expression::CreateType(Box::new(CreateType {
name,
definition,
if_not_exists,
})))
}
/// Parse CREATE DOMAIN statement
fn parse_create_domain(&mut self) -> Result<Expression> {
self.expect(TokenType::Domain)?;
let if_not_exists =
self.match_keywords(&[TokenType::If, TokenType::Not, TokenType::Exists]);
let name = self.parse_table_ref()?;
self.expect(TokenType::As)?;
let base_type = self.parse_data_type()?;
let mut default = None;
let mut constraints = Vec::new();
// Parse domain options
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
if self.match_token(TokenType::Default) {
default = Some(self.parse_expression()?);
} else if self.match_token(TokenType::Constraint) {
let constr_name = Some(Identifier::new(self.expect_identifier()?));
self.expect(TokenType::Check)?;
self.expect(TokenType::LParen)?;
let check_expr = self.parse_expression()?;
self.expect(TokenType::RParen)?;
constraints.push(DomainConstraint {
name: constr_name,
check: check_expr,
});
} else if self.match_token(TokenType::Check) {
self.expect(TokenType::LParen)?;
let check_expr = self.parse_expression()?;
self.expect(TokenType::RParen)?;
constraints.push(DomainConstraint {
name: None,
check: check_expr,
});
} else if self.match_keywords(&[TokenType::Not, TokenType::Null]) {
// NOT NULL is a constraint - represented as VALUE IS NOT NULL
constraints.push(DomainConstraint {
name: None,
check: Expression::IsNull(Box::new(IsNull {
this: Expression::Identifier(Identifier::new("VALUE")),
not: true,
postfix_form: false,
})),
});
} else {
break;
}
}
Ok(Expression::CreateType(Box::new(CreateType {
name,
definition: TypeDefinition::Domain {
base_type,
default,
constraints,
},
if_not_exists,
})))
}
/// Parse CREATE STAGE statement (Snowflake)
fn parse_create_stage(&mut self, or_replace: bool, temporary: bool) -> Result<Expression> {
self.skip(); // consume STAGE (identifier)
// Parse remaining tokens, normalizing FILE_FORMAT clause
let start = self.current;
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
self.skip();
}
let sql = self.tokens_to_sql_stage_format(start, self.current);
// Build the CREATE prefix with modifiers
let mut prefix = String::from("CREATE");
if or_replace {
prefix.push_str(" OR REPLACE");
}
if temporary {
prefix.push_str(" TEMPORARY");
}
prefix.push_str(" STAGE");
Ok(Expression::Raw(Raw {
sql: format!("{} {}", prefix, sql),
}))
}
/// Parse CREATE TAG statement (Snowflake)
fn parse_create_tag(&mut self, or_replace: bool) -> Result<Expression> {
self.skip(); // consume TAG
// Capture remaining tokens as raw SQL
let start = self.current;
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
self.skip();
}
let sql = self.tokens_to_sql(start, self.current);
let prefix = if or_replace {
"CREATE OR REPLACE TAG"
} else {
"CREATE TAG"
};
Ok(Expression::Raw(Raw {
sql: format!("{} {}", prefix, sql),
}))
}
/// Parse CREATE STREAM statement (Snowflake)
fn parse_create_stream(&mut self, _or_replace: bool) -> Result<Expression> {
self.skip(); // consume STREAM
// Capture remaining tokens as raw SQL
let start = self.current;
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
self.skip();
}
let sql = self.tokens_to_sql(start, self.current);
Ok(Expression::Raw(Raw {
sql: format!("CREATE STREAM {}", sql),
}))
}
/// Parse CREATE TASK statement (Snowflake)
/// CREATE [OR REPLACE] TASK [IF NOT EXISTS] name
/// [WAREHOUSE = wh] [SCHEDULE = '...'] [AFTER task1, ...] [WHEN expr]
/// AS sql_statement
fn parse_create_task(&mut self, or_replace: bool) -> Result<Expression> {
self.skip(); // consume TASK
let if_not_exists =
self.match_keywords(&[TokenType::If, TokenType::Not, TokenType::Exists]);
// Parse task name (possibly qualified: db.schema.task)
let mut name = String::new();
if self.check(TokenType::Var) || self.check_keyword() || self.is_identifier_token() {
name.push_str(&self.advance().text);
}
while self.check(TokenType::Dot) {
self.skip();
name.push('.');
if self.check(TokenType::Var) || self.check_keyword() || self.is_identifier_token() {
name.push_str(&self.advance().text);
}
}
// Capture properties as raw text until AS keyword
let props_start = self.current;
while !self.is_at_end() && !self.check(TokenType::Semicolon) && !self.check(TokenType::As) {
self.skip();
}
let properties = self.tokens_to_sql(props_start, self.current);
// Expect AS keyword followed by the SQL body
if !self.match_token(TokenType::As) {
return Err(self.parse_error("Expected AS keyword in CREATE TASK"));
}
let body = self.parse_statement()?;
Ok(Expression::CreateTask(Box::new(
crate::expressions::CreateTask {
or_replace,
if_not_exists,
name,
properties,
body,
},
)))
}
/// Parse CREATE FILE FORMAT statement (Snowflake)
fn parse_create_file_format(
&mut self,
or_replace: bool,
temporary: bool,
) -> Result<Expression> {
self.skip(); // consume FILE
self.skip(); // consume FORMAT
// Capture remaining tokens as raw SQL
let start = self.current;
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
self.skip();
}
let sql = self.tokens_to_sql(start, self.current);
let mut prefix = String::from("CREATE");
if or_replace {
prefix.push_str(" OR REPLACE");
}
if temporary {
prefix.push_str(" TEMPORARY");
}
prefix.push_str(" FILE FORMAT ");
prefix.push_str(&sql);
Ok(Expression::Raw(Raw { sql: prefix }))
}
/// Parse DROP TYPE statement
fn parse_drop_type(&mut self) -> Result<Expression> {
self.expect(TokenType::Type)?;
let if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
let name = self.parse_table_ref()?;
let cascade = self.match_token(TokenType::Cascade);
if !cascade {
self.match_token(TokenType::Restrict);
}
Ok(Expression::DropType(Box::new(DropType {
name,
if_exists,
cascade,
})))
}
fn parse_alter_view_with_modifiers(
&mut self,
algorithm: Option<String>,
definer: Option<String>,
sql_security: Option<String>,
) -> Result<Expression> {
self.expect(TokenType::View)?;
let name = self.parse_table_ref()?;
let mut actions = Vec::new();
// Hive: Optional column aliases with optional COMMENT: (c1, c2) or (c1 COMMENT 'text', c2)
// Only parse if we see LParen followed by identifier (not SELECT for subquery)
let columns = if self.check(TokenType::LParen) {
// Peek ahead to see if this looks like column aliases
let saved = self.current;
self.skip(); // consume LParen
// Check if this is an identifier (column name) vs SELECT keyword
let is_column_aliases = self.check(TokenType::Identifier)
|| self.check(TokenType::Var)
|| self.check(TokenType::QuotedIdentifier);
if is_column_aliases {
// Parse column aliases
let mut cols = Vec::new();
loop {
let col_name = self.expect_identifier()?;
// Optional COMMENT 'text'
let comment = if self.match_token(TokenType::Comment) {
Some(self.expect_string()?)
} else {
None
};
cols.push(ViewColumn {
name: Identifier::new(col_name),
comment,
options: Vec::new(),
});
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
cols
} else {
self.current = saved; // retreat
Vec::new()
}
} else {
Vec::new()
};
// TSQL: WITH option (SCHEMABINDING, ENCRYPTION, VIEW_METADATA) before AS
let with_option = if self.match_token(TokenType::With) {
let opt = self.expect_identifier_or_keyword()?;
Some(opt.to_ascii_uppercase())
} else {
None
};
// Parse actions
if self.match_token(TokenType::Rename) {
self.expect(TokenType::To)?;
actions.push(AlterViewAction::Rename(self.parse_table_ref()?));
} else if self.match_identifier("OWNER") {
self.expect(TokenType::To)?;
actions.push(AlterViewAction::OwnerTo(Identifier::new(
self.expect_identifier()?,
)));
} else if self.match_token(TokenType::Set) {
// Hive: SET TBLPROPERTIES ('key'='value', ...) or SET SCHEMA name
// Trino: SET AUTHORIZATION [ROLE] user
if self.match_identifier("TBLPROPERTIES") {
let props = self.parse_tblproperties_key_value_list()?;
actions.push(AlterViewAction::SetTblproperties(props));
} else if self.match_token(TokenType::Authorization) {
let mut auth_text = String::new();
if self.match_texts(&["ROLE"]) {
auth_text.push_str("ROLE ");
}
let user = self.expect_identifier()?;
auth_text.push_str(&user);
actions.push(AlterViewAction::SetAuthorization(auth_text));
} else {
self.expect(TokenType::Schema)?;
actions.push(AlterViewAction::SetSchema(Identifier::new(
self.expect_identifier()?,
)));
}
} else if self.match_identifier("UNSET") {
// Hive: UNSET TBLPROPERTIES ('key1', 'key2', ...)
if !self.match_identifier("TBLPROPERTIES") {
return Err(self.parse_error("Expected TBLPROPERTIES after UNSET"));
}
let keys = self.parse_tblproperties_key_list()?;
actions.push(AlterViewAction::UnsetTblproperties(keys));
} else if self.match_token(TokenType::Alter) {
self.match_token(TokenType::Column);
let col_name = Identifier::new(self.expect_identifier()?);
let action = self.parse_alter_column_action()?;
actions.push(AlterViewAction::AlterColumn {
name: col_name,
action,
});
} else if self.match_token(TokenType::As) {
// AS SELECT ... or AS SELECT ... UNION ... (redefine view query)
let query = self.parse_statement()?;
actions.push(AlterViewAction::AsSelect(Box::new(query)));
}
Ok(Expression::AlterView(Box::new(AlterView {
name,
actions,
algorithm,
definer,
sql_security,
with_option,
columns,
})))
}
/// Parse TBLPROPERTIES key-value list: ('key1'='value1', 'key2'='value2', ...)
fn parse_tblproperties_key_value_list(&mut self) -> Result<Vec<(String, String)>> {
self.expect(TokenType::LParen)?;
let mut props = Vec::new();
loop {
let key = self.expect_string()?;
self.expect(TokenType::Eq)?;
let value = self.expect_string()?;
props.push((key, value));
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
Ok(props)
}
/// Parse TBLPROPERTIES key list (for UNSET): ('key1', 'key2', ...)
fn parse_tblproperties_key_list(&mut self) -> Result<Vec<String>> {
self.expect(TokenType::LParen)?;
let mut keys = Vec::new();
loop {
let key = self.expect_string()?;
keys.push(key);
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
Ok(keys)
}
/// Parse ALTER INDEX statement
fn parse_alter_index(&mut self) -> Result<Expression> {
self.expect(TokenType::Index)?;
// Use expect_identifier_or_keyword_with_quoted to preserve quoted flag
let name = self.expect_identifier_or_keyword_with_quoted()?;
let table = if self.match_token(TokenType::On) {
Some(self.parse_table_ref()?)
} else {
None
};
let mut actions = Vec::new();
// Parse actions
if self.match_token(TokenType::Rename) {
self.expect(TokenType::To)?;
// Also preserve quoted flag for the new name
actions.push(AlterIndexAction::Rename(
self.expect_identifier_or_keyword_with_quoted()?,
));
} else if self.match_token(TokenType::Set) {
self.match_identifier("TABLESPACE");
actions.push(AlterIndexAction::SetTablespace(
self.expect_identifier_or_keyword_with_quoted()?,
));
} else if self.match_identifier("VISIBLE") {
actions.push(AlterIndexAction::Visible(true));
} else if self.match_identifier("INVISIBLE") {
actions.push(AlterIndexAction::Visible(false));
}
Ok(Expression::AlterIndex(Box::new(AlterIndex {
name,
table,
actions,
})))
}
// ==================== End DDL Parsing ====================
/// Parse an expression (with precedence)
/// Assignment (:=) has lower precedence than OR, matching Python sqlglot's
/// _parse_expression -> _parse_assignment -> _parse_disjunction chain
fn parse_expression(&mut self) -> Result<Expression> {
let mut left = self.parse_or()?;
// Handle := assignment operator (MySQL @var := val, DuckDB named args/settings)
// This has lower precedence than OR
while self.match_token(TokenType::ColonEq) {
let right = self.parse_or()?;
left = Expression::PropertyEQ(Box::new(BinaryOp::new(left, right)));
}
// ClickHouse ternary operator: condition ? true_value : false_value
// Parsed as: CASE WHEN condition THEN true_value ELSE false_value END
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.match_token(TokenType::Parameter)
{
if self.check(TokenType::Colon) {
return Err(
self.parse_error("Expected true expression after ? in ClickHouse ternary")
);
}
let true_value = self.parse_or()?;
let false_value = if self.match_token(TokenType::Colon) {
self.parse_or()?
} else {
Expression::Null(Null)
};
left = Expression::IfFunc(Box::new(IfFunc {
original_name: None,
condition: left,
true_value,
false_value: Some(false_value),
inferred_type: None,
}));
}
// ClickHouse: APPLY(func) column transformer
// e.g., COLUMNS('pattern') APPLY(toString) APPLY(length)
// Also: APPLY func (no parens), APPLY(x -> expr) (lambda)
// Only match APPLY when followed by ( — bare APPLY without ( is treated as an alias
// by the select expression parser (e.g., SELECT col apply -> SELECT col AS apply)
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
while self.check(TokenType::Apply) && self.check_next(TokenType::LParen) {
self.skip(); // consume APPLY
self.skip(); // consume (
let expr = self.parse_expression()?;
self.expect(TokenType::RParen)?;
left = Expression::Apply(Box::new(crate::expressions::Apply {
this: Box::new(left),
expression: Box::new(expr),
}));
}
}
Ok(left)
}
/// Parse OR expressions
fn parse_or(&mut self) -> Result<Expression> {
let mut left = self.parse_xor()?;
while self.check(TokenType::Or)
|| (self.dpipe_is_logical_or() && self.check(TokenType::DPipe))
{
let mut all_comments = self.previous_trailing_comments().to_vec();
// Also capture leading comments on the OR token (comments on a separate line before OR)
all_comments.extend_from_slice(self.current_leading_comments());
self.skip(); // consume OR
all_comments.extend_from_slice(self.previous_trailing_comments());
// Clear trailing_comments from left expression to avoid duplication
if !all_comments.is_empty() {
Self::clear_rightmost_trailing_comments(&mut left);
}
// Filter out empty/whitespace-only comments
all_comments.retain(|c| !c.trim().is_empty());
// Split: block comments go before operator, line comments go after
let mut left_comments = Vec::new();
let mut operator_comments = Vec::new();
for comment in all_comments {
if comment.starts_with("/*") {
left_comments.push(comment);
} else {
operator_comments.push(comment);
}
}
let mut right = self.parse_xor()?;
// If parse_comparison stored pending leading comments, attach them
if !self.pending_leading_comments.is_empty() {
let pending = std::mem::take(&mut self.pending_leading_comments);
right = Expression::Annotated(Box::new(Annotated {
this: right,
trailing_comments: pending,
}));
}
left = Expression::Or(Box::new(BinaryOp {
left,
right,
left_comments,
operator_comments,
trailing_comments: Vec::new(),
inferred_type: None,
}));
}
Ok(Self::maybe_rebalance_boolean_chain(left, false))
}
/// Whether `||` should be parsed as logical OR for the active dialect.
fn dpipe_is_logical_or(&self) -> bool {
matches!(
self.config.dialect,
Some(crate::dialects::DialectType::MySQL | crate::dialects::DialectType::Solr)
)
}
/// Parse XOR expressions (MySQL logical XOR)
fn parse_xor(&mut self) -> Result<Expression> {
let mut left = self.parse_and()?;
while self.match_token(TokenType::Xor) {
let right = self.parse_and()?;
left = Expression::Xor(Box::new(Xor {
this: Some(Box::new(left)),
expression: Some(Box::new(right)),
expressions: Vec::new(),
}));
}
Ok(left)
}
/// Parse AND expressions
fn parse_and(&mut self) -> Result<Expression> {
let mut left = self.parse_not()?;
while self.check(TokenType::And) {
// Capture comments from the token before AND (left operand's last token)
let mut all_comments = self.previous_trailing_comments().to_vec();
// Also capture leading comments on the AND token (comments on a separate line before AND)
all_comments.extend_from_slice(self.current_leading_comments());
self.skip(); // consume AND
// Also capture any trailing comments on the AND token itself
all_comments.extend_from_slice(self.previous_trailing_comments());
// Clear trailing_comments from left expression to avoid duplication
if !all_comments.is_empty() {
Self::clear_rightmost_trailing_comments(&mut left);
}
// Filter out empty/whitespace-only comments (e.g., bare "--" with no content)
all_comments.retain(|c| !c.trim().is_empty());
// Split comments: block comments (/*...*/) go BEFORE the operator (left_comments),
// line comments (raw text from --) go AFTER the operator (operator_comments).
// This matches Python sqlglot's behavior where inline block comments stay
// in-place and line comments shift to after the operator.
let mut left_comments = Vec::new();
let mut operator_comments = Vec::new();
for comment in all_comments {
if comment.starts_with("/*") {
left_comments.push(comment);
} else {
operator_comments.push(comment);
}
}
let mut right = self.parse_not()?;
// If parse_comparison stored pending leading comments (comments before
// the right operand's first token with no comparison following),
// attach them as trailing_comments on the right expression.
if !self.pending_leading_comments.is_empty() {
let pending = std::mem::take(&mut self.pending_leading_comments);
right = Expression::Annotated(Box::new(Annotated {
this: right,
trailing_comments: pending,
}));
}
left = Expression::And(Box::new(BinaryOp {
left,
right,
left_comments,
operator_comments,
trailing_comments: Vec::new(),
inferred_type: None,
}));
}
Ok(Self::maybe_rebalance_boolean_chain(left, true))
}
/// Rebalance AND/OR chains into a balanced tree when no connector comments are present.
/// This keeps connector chain depth logarithmic for very large predicates.
fn maybe_rebalance_boolean_chain(expr: Expression, is_and: bool) -> Expression {
if !Self::should_rebalance_boolean_chain(&expr, is_and) {
return expr;
}
let terms = Self::flatten_boolean_terms_owned(expr, is_and);
if terms.len() <= 2 {
return Self::build_balanced_boolean_tree(terms, is_and);
}
Self::build_balanced_boolean_tree(terms, is_and)
}
fn should_rebalance_boolean_chain(expr: &Expression, is_and: bool) -> bool {
let mut leaf_count = 0usize;
let mut stack = vec![expr];
while let Some(node) = stack.pop() {
match (is_and, node) {
(true, Expression::And(op)) => {
if !op.left_comments.is_empty()
|| !op.operator_comments.is_empty()
|| !op.trailing_comments.is_empty()
{
return false;
}
stack.push(&op.right);
stack.push(&op.left);
}
(false, Expression::Or(op)) => {
if !op.left_comments.is_empty()
|| !op.operator_comments.is_empty()
|| !op.trailing_comments.is_empty()
{
return false;
}
stack.push(&op.right);
stack.push(&op.left);
}
_ => leaf_count += 1,
}
}
leaf_count > 2
}
fn flatten_boolean_terms_owned(expr: Expression, is_and: bool) -> Vec<Expression> {
let mut terms = Vec::new();
let mut stack = vec![expr];
while let Some(node) = stack.pop() {
match (is_and, node) {
(true, Expression::And(op)) => {
stack.push(op.right);
stack.push(op.left);
}
(false, Expression::Or(op)) => {
stack.push(op.right);
stack.push(op.left);
}
(_, other) => terms.push(other),
}
}
terms
}
fn build_balanced_boolean_tree(mut terms: Vec<Expression>, is_and: bool) -> Expression {
if terms.is_empty() {
return Expression::Null(Null);
}
while terms.len() > 1 {
let mut next = Vec::with_capacity((terms.len() + 1) / 2);
let mut iter = terms.into_iter();
while let Some(left) = iter.next() {
if let Some(right) = iter.next() {
let combined = if is_and {
Expression::And(Box::new(BinaryOp::new(left, right)))
} else {
Expression::Or(Box::new(BinaryOp::new(left, right)))
};
next.push(combined);
} else {
next.push(left);
}
}
terms = next;
}
terms.pop().unwrap_or(Expression::Null(Null))
}
/// Parse NOT expressions
fn parse_not(&mut self) -> Result<Expression> {
if self.match_token(TokenType::Not) {
let expr = self.parse_not()?;
Ok(Expression::Not(Box::new(UnaryOp::new(expr))))
} else {
self.parse_comparison()
}
}
/// Parse comparison expressions
fn parse_comparison(&mut self) -> Result<Expression> {
// Capture leading comments from the first token before parsing the left side.
// If a comparison operator follows, these are placed after the left operand.
let pre_left_comments = self.current_leading_comments().to_vec();
let mut left = self.parse_bitwise_or()?;
// Only attach pre-left comments when a comparison operator follows.
// When no comparison follows (e.g., in SELECT list expressions or AND operands),
// the comments are returned to the caller by being accessible via the
// `comparison_pre_left_comments` field, so they can be placed appropriately
// (e.g., after an alias name, or after the expression in an AND chain).
let has_comparison_op = !self.is_at_end()
&& matches!(
self.peek().token_type,
TokenType::Eq
| TokenType::Neq
| TokenType::Lt
| TokenType::Gt
| TokenType::Lte
| TokenType::Gte
| TokenType::Is
| TokenType::In
| TokenType::Not
| TokenType::Between
| TokenType::Like
| TokenType::ILike
| TokenType::RLike
| TokenType::SimilarTo
);
if !pre_left_comments.is_empty() {
if has_comparison_op {
// Comparison follows: attach comments between left operand and operator
match &mut left {
Expression::Column(col) => {
col.trailing_comments.extend(pre_left_comments);
}
Expression::Identifier(id) => {
id.trailing_comments.extend(pre_left_comments);
}
_ => {
left = Expression::Annotated(Box::new(Annotated {
this: left,
trailing_comments: pre_left_comments,
}));
}
}
} else {
// No comparison operator: store comments for the caller to use.
// Save them as "pending" comments that the caller can retrieve.
self.pending_leading_comments = pre_left_comments;
}
}
loop {
let mut global_in = false;
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check_identifier("GLOBAL")
&& (self.check_next(TokenType::Not) || self.check_next(TokenType::In))
{
self.skip();
global_in = true;
}
let expr = if self.match_token(TokenType::Eq) {
// Check for ANY/ALL subquery
if self.match_token(TokenType::Any) || self.match_token(TokenType::Some) {
let was_any = self.previous_token_type() == Some(TokenType::Any);
self.expect(TokenType::LParen)?;
let inner = self.parse_statement()?;
self.expect(TokenType::RParen)?;
let subquery = if was_any {
self.maybe_wrap_in_subquery(inner)
} else {
inner
};
Expression::Any(Box::new(QuantifiedExpr {
this: left,
subquery,
op: Some(QuantifiedOp::Eq),
}))
} else if self.match_token(TokenType::All) {
self.expect(TokenType::LParen)?;
let inner = self.parse_statement()?;
self.expect(TokenType::RParen)?;
let subquery = self.maybe_wrap_in_subquery(inner);
Expression::All(Box::new(QuantifiedExpr {
this: left,
subquery,
op: Some(QuantifiedOp::Eq),
}))
} else {
let right = self.parse_bitwise_or()?;
let trailing_comments = self.previous_trailing_comments().to_vec();
Expression::Eq(Box::new(BinaryOp {
left,
right,
left_comments: Vec::new(),
operator_comments: Vec::new(),
trailing_comments,
inferred_type: None,
}))
}
} else if self.match_token(TokenType::Neq) {
// Check for ANY/ALL subquery
if self.match_token(TokenType::Any) || self.match_token(TokenType::Some) {
let was_any = self.previous_token_type() == Some(TokenType::Any);
self.expect(TokenType::LParen)?;
let inner = self.parse_statement()?;
self.expect(TokenType::RParen)?;
let subquery = if was_any {
self.maybe_wrap_in_subquery(inner)
} else {
inner
};
Expression::Any(Box::new(QuantifiedExpr {
this: left,
subquery,
op: Some(QuantifiedOp::Neq),
}))
} else if self.match_token(TokenType::All) {
self.expect(TokenType::LParen)?;
let inner = self.parse_statement()?;
self.expect(TokenType::RParen)?;
let subquery = self.maybe_wrap_in_subquery(inner);
Expression::All(Box::new(QuantifiedExpr {
this: left,
subquery,
op: Some(QuantifiedOp::Neq),
}))
} else {
let right = self.parse_bitwise_or()?;
let trailing_comments = self.previous_trailing_comments().to_vec();
Expression::Neq(Box::new(BinaryOp {
left,
right,
left_comments: Vec::new(),
operator_comments: Vec::new(),
trailing_comments,
inferred_type: None,
}))
}
} else if self.match_token(TokenType::Lt) {
// Check for ANY/ALL subquery
if self.match_token(TokenType::Any) || self.match_token(TokenType::Some) {
let was_any = self.previous_token_type() == Some(TokenType::Any);
self.expect(TokenType::LParen)?;
let inner = self.parse_statement()?;
self.expect(TokenType::RParen)?;
let subquery = if was_any {
self.maybe_wrap_in_subquery(inner)
} else {
inner
};
Expression::Any(Box::new(QuantifiedExpr {
this: left,
subquery,
op: Some(QuantifiedOp::Lt),
}))
} else if self.match_token(TokenType::All) {
self.expect(TokenType::LParen)?;
let inner = self.parse_statement()?;
self.expect(TokenType::RParen)?;
let subquery = self.maybe_wrap_in_subquery(inner);
Expression::All(Box::new(QuantifiedExpr {
this: left,
subquery,
op: Some(QuantifiedOp::Lt),
}))
} else {
let right = self.parse_bitwise_or()?;
let trailing_comments = self.previous_trailing_comments().to_vec();
Expression::Lt(Box::new(BinaryOp {
left,
right,
left_comments: Vec::new(),
operator_comments: Vec::new(),
trailing_comments,
inferred_type: None,
}))
}
} else if self.match_token(TokenType::Lte) {
// Check for ANY/ALL subquery
if self.match_token(TokenType::Any) || self.match_token(TokenType::Some) {
let was_any = self.previous_token_type() == Some(TokenType::Any);
self.expect(TokenType::LParen)?;
let inner = self.parse_statement()?;
self.expect(TokenType::RParen)?;
let subquery = if was_any {
self.maybe_wrap_in_subquery(inner)
} else {
inner
};
Expression::Any(Box::new(QuantifiedExpr {
this: left,
subquery,
op: Some(QuantifiedOp::Lte),
}))
} else if self.match_token(TokenType::All) {
self.expect(TokenType::LParen)?;
let inner = self.parse_statement()?;
self.expect(TokenType::RParen)?;
let subquery = self.maybe_wrap_in_subquery(inner);
Expression::All(Box::new(QuantifiedExpr {
this: left,
subquery,
op: Some(QuantifiedOp::Lte),
}))
} else {
let right = self.parse_bitwise_or()?;
let trailing_comments = self.previous_trailing_comments().to_vec();
Expression::Lte(Box::new(BinaryOp {
left,
right,
left_comments: Vec::new(),
operator_comments: Vec::new(),
trailing_comments,
inferred_type: None,
}))
}
} else if self.match_token(TokenType::Gt) {
// Check for ANY/ALL subquery
if self.match_token(TokenType::Any) || self.match_token(TokenType::Some) {
let was_any = self.previous_token_type() == Some(TokenType::Any);
self.expect(TokenType::LParen)?;
let inner = self.parse_statement()?;
self.expect(TokenType::RParen)?;
let subquery = if was_any {
self.maybe_wrap_in_subquery(inner)
} else {
inner
};
Expression::Any(Box::new(QuantifiedExpr {
this: left,
subquery,
op: Some(QuantifiedOp::Gt),
}))
} else if self.match_token(TokenType::All) {
self.expect(TokenType::LParen)?;
let inner = self.parse_statement()?;
self.expect(TokenType::RParen)?;
let subquery = self.maybe_wrap_in_subquery(inner);
Expression::All(Box::new(QuantifiedExpr {
this: left,
subquery,
op: Some(QuantifiedOp::Gt),
}))
} else {
let right = self.parse_bitwise_or()?;
let trailing_comments = self.previous_trailing_comments().to_vec();
Expression::Gt(Box::new(BinaryOp {
left,
right,
left_comments: Vec::new(),
operator_comments: Vec::new(),
trailing_comments,
inferred_type: None,
}))
}
} else if self.match_token(TokenType::Gte) {
// Check for ANY/ALL subquery
if self.match_token(TokenType::Any) || self.match_token(TokenType::Some) {
let was_any = self.previous_token_type() == Some(TokenType::Any);
self.expect(TokenType::LParen)?;
let inner = self.parse_statement()?;
self.expect(TokenType::RParen)?;
let subquery = if was_any {
self.maybe_wrap_in_subquery(inner)
} else {
inner
};
Expression::Any(Box::new(QuantifiedExpr {
this: left,
subquery,
op: Some(QuantifiedOp::Gte),
}))
} else if self.match_token(TokenType::All) {
self.expect(TokenType::LParen)?;
let inner = self.parse_statement()?;
self.expect(TokenType::RParen)?;
let subquery = self.maybe_wrap_in_subquery(inner);
Expression::All(Box::new(QuantifiedExpr {
this: left,
subquery,
op: Some(QuantifiedOp::Gte),
}))
} else {
let right = self.parse_bitwise_or()?;
let trailing_comments = self.previous_trailing_comments().to_vec();
Expression::Gte(Box::new(BinaryOp {
left,
right,
left_comments: Vec::new(),
operator_comments: Vec::new(),
trailing_comments,
inferred_type: None,
}))
}
} else if self.match_token(TokenType::NullsafeEq) {
// <=> (MySQL NULL-safe equality)
let right = self.parse_bitwise_or()?;
let trailing_comments = self.previous_trailing_comments().to_vec();
Expression::NullSafeEq(Box::new(BinaryOp {
left,
right,
left_comments: Vec::new(),
operator_comments: Vec::new(),
trailing_comments,
inferred_type: None,
}))
} else if self.check_identifier("SOUNDS") && self.check_next(TokenType::Like) {
// MySQL SOUNDS LIKE: expr SOUNDS LIKE expr -> SOUNDEX(expr) = SOUNDEX(expr)
self.skip(); // consume SOUNDS
self.skip(); // consume LIKE
let right = self.parse_bitwise_or()?;
// Transform: SOUNDEX(left) = SOUNDEX(right)
let soundex_left = Expression::Function(Box::new(Function::new(
"SOUNDEX".to_string(),
vec![left],
)));
let soundex_right = Expression::Function(Box::new(Function::new(
"SOUNDEX".to_string(),
vec![right],
)));
Expression::Eq(Box::new(BinaryOp::new(soundex_left, soundex_right)))
} else if self.match_token(TokenType::Like) {
// Check for ANY/ALL/SOME quantifier
let quantifier = if self.match_token(TokenType::Any) {
Some("ANY".to_string())
} else if self.match_token(TokenType::All) {
Some("ALL".to_string())
} else if self.match_token(TokenType::Some) {
Some("SOME".to_string())
} else {
None
};
let right = self.parse_bitwise_or()?;
let escape = if self.match_token(TokenType::Escape) {
Some(self.parse_primary()?)
} else {
None
};
Expression::Like(Box::new(LikeOp {
left,
right,
escape,
quantifier,
inferred_type: None,
}))
} else if self.match_token(TokenType::ILike) {
// Check for ANY/ALL/SOME quantifier
let quantifier = if self.match_token(TokenType::Any) {
Some("ANY".to_string())
} else if self.match_token(TokenType::All) {
Some("ALL".to_string())
} else if self.match_token(TokenType::Some) {
Some("SOME".to_string())
} else {
None
};
let right = self.parse_bitwise_or()?;
let escape = if self.match_token(TokenType::Escape) {
Some(self.parse_primary()?)
} else {
None
};
Expression::ILike(Box::new(LikeOp {
left,
right,
escape,
quantifier,
inferred_type: None,
}))
} else if self.check_identifier("SIMILAR") && self.check_next(TokenType::To) {
// SIMILAR TO operator (PostgreSQL/Redshift regex-like pattern matching)
self.skip(); // consume SIMILAR
self.skip(); // consume TO
let pattern = self.parse_bitwise_or()?;
let escape = if self.match_token(TokenType::Escape) {
Some(self.parse_primary()?)
} else {
None
};
Expression::SimilarTo(Box::new(SimilarToExpr {
this: left,
pattern,
escape,
not: false,
}))
} else if self.match_token(TokenType::Glob) {
let right = self.parse_bitwise_or()?;
Expression::Glob(Box::new(BinaryOp::new(left, right)))
} else if self.match_token(TokenType::Match) {
// SQLite MATCH operator (FTS full-text search)
let right = self.parse_bitwise_or()?;
Expression::Match(Box::new(BinaryOp::new(left, right)))
} else if self.match_token(TokenType::RLike) || self.match_token(TokenType::Tilde) {
// PostgreSQL ~ (regexp match) operator / RLIKE / REGEXP
let right = self.parse_bitwise_or()?;
Expression::RegexpLike(Box::new(RegexpFunc {
this: left,
pattern: right,
flags: None,
}))
} else if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Exasol)
) && self.check_identifier("REGEXP_LIKE")
{
// Exasol: REGEXP_LIKE as infix binary operator
self.skip(); // consume REGEXP_LIKE
let right = self.parse_bitwise_or()?;
Expression::RegexpLike(Box::new(RegexpFunc {
this: left,
pattern: right,
flags: None,
}))
} else if self.match_token(TokenType::IRLike) {
// PostgreSQL ~* (case-insensitive regexp match) operator
let right = self.parse_bitwise_or()?;
Expression::RegexpILike(Box::new(RegexpILike {
this: Box::new(left),
expression: Box::new(right),
flag: None,
}))
} else if self.match_token(TokenType::NotLike) {
// PostgreSQL !~~ (NOT LIKE) operator
let right = self.parse_bitwise_or()?;
let escape = if self.match_token(TokenType::Escape) {
Some(self.parse_primary()?)
} else {
None
};
let like_expr = Expression::Like(Box::new(LikeOp {
left,
right,
escape,
quantifier: None,
inferred_type: None,
}));
Expression::Not(Box::new(UnaryOp::new(like_expr)))
} else if self.match_token(TokenType::NotILike) {
// PostgreSQL !~~* (NOT ILIKE) operator
let right = self.parse_bitwise_or()?;
let escape = if self.match_token(TokenType::Escape) {
Some(self.parse_primary()?)
} else {
None
};
let ilike_expr = Expression::ILike(Box::new(LikeOp {
left,
right,
escape,
quantifier: None,
inferred_type: None,
}));
Expression::Not(Box::new(UnaryOp::new(ilike_expr)))
} else if self.match_token(TokenType::NotRLike) {
// PostgreSQL !~ (NOT regexp match) operator
let right = self.parse_bitwise_or()?;
let regexp_expr = Expression::RegexpLike(Box::new(RegexpFunc {
this: left,
pattern: right,
flags: None,
}));
Expression::Not(Box::new(UnaryOp::new(regexp_expr)))
} else if self.match_token(TokenType::NotIRLike) {
// PostgreSQL !~* (NOT case-insensitive regexp match) operator
let right = self.parse_bitwise_or()?;
let regexp_expr = Expression::RegexpILike(Box::new(RegexpILike {
this: Box::new(left),
expression: Box::new(right),
flag: None,
}));
Expression::Not(Box::new(UnaryOp::new(regexp_expr)))
} else if self.check(TokenType::Is)
&& !self.is_last_expression_token(TokenType::Is)
&& self.match_token(TokenType::Is)
{
let not = self.match_token(TokenType::Not);
if self.match_token(TokenType::Null) {
let expr = Expression::IsNull(Box::new(IsNull {
this: left,
not,
postfix_form: false,
}));
// ClickHouse: IS NULL :: Type — handle :: cast after IS NULL
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::DColon)
{
self.skip(); // consume ::
let data_type = self.parse_data_type_for_cast()?;
Expression::Cast(Box::new(Cast {
this: expr,
to: data_type,
trailing_comments: Vec::new(),
double_colon_syntax: true,
format: None,
default: None,
inferred_type: None,
}))
} else {
expr
}
} else if self.match_token(TokenType::True) {
// IS TRUE / IS NOT TRUE
Expression::IsTrue(Box::new(IsTrueFalse { this: left, not }))
} else if self.match_token(TokenType::False) {
// IS FALSE / IS NOT FALSE
Expression::IsFalse(Box::new(IsTrueFalse { this: left, not }))
} else if self.match_token(TokenType::Distinct) {
// IS DISTINCT FROM / IS NOT DISTINCT FROM
self.expect(TokenType::From)?;
let right = self.parse_bitwise_or()?;
if not {
// IS NOT DISTINCT FROM → null-safe equality
Expression::NullSafeEq(Box::new(BinaryOp::new(left, right)))
} else {
// IS DISTINCT FROM → null-safe inequality
Expression::NullSafeNeq(Box::new(BinaryOp::new(left, right)))
}
} else if self.match_identifier("UNKNOWN") {
// IS UNKNOWN
Expression::IsNull(Box::new(IsNull {
this: left,
not,
postfix_form: false,
}))
} else if self.match_texts(&["JSON"]) {
// IS JSON [VALUE|SCALAR|OBJECT|ARRAY] [WITH UNIQUE KEYS|WITHOUT UNIQUE KEYS|UNIQUE KEYS]
let json_type = if self.match_texts(&["VALUE"]) {
Some("VALUE".to_string())
} else if self.match_texts(&["SCALAR"]) {
Some("SCALAR".to_string())
} else if self.match_texts(&["OBJECT"]) {
Some("OBJECT".to_string())
} else if self.match_texts(&["ARRAY"]) {
Some("ARRAY".to_string())
} else {
None
};
// Parse optional key uniqueness constraint
let unique_keys = if self.match_text_seq(&["WITH", "UNIQUE", "KEYS"]) {
Some(JsonUniqueKeys::With)
} else if self.match_text_seq(&["WITHOUT", "UNIQUE", "KEYS"]) {
Some(JsonUniqueKeys::Without)
} else if self.match_text_seq(&["UNIQUE", "KEYS"]) {
// Shorthand for WITH UNIQUE KEYS
Some(JsonUniqueKeys::Shorthand)
} else {
None
};
Expression::IsJson(Box::new(IsJson {
this: left,
json_type,
unique_keys,
negated: not,
}))
} else {
// IS followed by an expression (e.g., IS ?)
// If we matched NOT, wrap the IS expression in NOT
let right = self.parse_primary()?;
let is_expr = Expression::Is(Box::new(BinaryOp::new(left, right)));
if not {
Expression::Not(Box::new(UnaryOp::new(is_expr)))
} else {
is_expr
}
}
} else if self.match_token(TokenType::Not) {
// Handle NOT IN, NOT BETWEEN, NOT LIKE, NOT ILIKE, etc.
if self.match_token(TokenType::In) {
// BigQuery: NOT IN UNNEST(expr)
if self.check_identifier("UNNEST") {
self.skip(); // consume UNNEST
self.expect(TokenType::LParen)?;
let unnest_expr = self.parse_expression()?;
self.expect(TokenType::RParen)?;
Expression::In(Box::new(In {
this: left,
expressions: Vec::new(),
query: None,
not: true,
global: global_in,
unnest: Some(Box::new(unnest_expr)),
is_field: false,
}))
} else if self.match_token(TokenType::LParen) {
if self.check(TokenType::Select) || self.check(TokenType::With) {
let subquery = self.parse_statement()?;
self.expect(TokenType::RParen)?;
Expression::In(Box::new(In {
this: left,
expressions: Vec::new(),
query: Some(subquery),
not: true,
global: global_in,
unnest: None,
is_field: false,
}))
} else if self.check(TokenType::RParen) {
// Empty NOT IN set: NOT IN ()
self.skip();
Expression::In(Box::new(In {
this: left,
expressions: Vec::new(),
query: None,
not: true,
global: global_in,
unnest: None,
is_field: false,
}))
} else {
let expressions = self.parse_expression_list()?;
self.expect(TokenType::RParen)?;
Expression::In(Box::new(In {
this: left,
expressions,
query: None,
not: true,
global: global_in,
unnest: None,
is_field: false,
}))
}
} else {
// ClickHouse/DuckDB: IN without parentheses: expr NOT IN table_name
let table_expr = self.parse_primary()?;
Expression::In(Box::new(In {
this: left,
expressions: vec![table_expr],
query: None,
not: true,
global: global_in,
unnest: None,
is_field: true,
}))
}
} else if self.match_token(TokenType::Between) {
// Check for SYMMETRIC/ASYMMETRIC qualifier
let symmetric = if self.match_texts(&["SYMMETRIC"]) {
Some(true)
} else if self.match_texts(&["ASYMMETRIC"]) {
Some(false)
} else {
None
};
let low = self.parse_bitwise_or()?;
self.expect(TokenType::And)?;
let high = self.parse_bitwise_or()?;
Expression::Between(Box::new(Between {
this: left,
low,
high,
not: true,
symmetric,
}))
} else if self.check_identifier("SOUNDS") && self.check_next(TokenType::Like) {
// MySQL NOT SOUNDS LIKE: expr NOT SOUNDS LIKE expr -> NOT SOUNDEX(expr) = SOUNDEX(expr)
self.skip(); // consume SOUNDS
self.skip(); // consume LIKE
let right = self.parse_bitwise_or()?;
let soundex_left = Expression::Function(Box::new(Function::new(
"SOUNDEX".to_string(),
vec![left],
)));
let soundex_right = Expression::Function(Box::new(Function::new(
"SOUNDEX".to_string(),
vec![right],
)));
let eq_expr =
Expression::Eq(Box::new(BinaryOp::new(soundex_left, soundex_right)));
Expression::Not(Box::new(UnaryOp::new(eq_expr)))
} else if self.match_token(TokenType::Like) {
let right = self.parse_bitwise_or()?;
let escape = if self.match_token(TokenType::Escape) {
Some(self.parse_primary()?)
} else {
None
};
let like_expr = Expression::Like(Box::new(LikeOp {
left,
right,
escape,
quantifier: None,
inferred_type: None,
}));
Expression::Not(Box::new(UnaryOp::new(like_expr)))
} else if self.match_token(TokenType::ILike) {
let right = self.parse_bitwise_or()?;
let escape = if self.match_token(TokenType::Escape) {
Some(self.parse_primary()?)
} else {
None
};
let ilike_expr = Expression::ILike(Box::new(LikeOp {
left,
right,
escape,
quantifier: None,
inferred_type: None,
}));
Expression::Not(Box::new(UnaryOp::new(ilike_expr)))
} else if self.check_identifier("SIMILAR") && self.check_next(TokenType::To) {
// NOT SIMILAR TO
self.skip(); // consume SIMILAR
self.skip(); // consume TO
let pattern = self.parse_bitwise_or()?;
let escape = if self.match_token(TokenType::Escape) {
Some(self.parse_primary()?)
} else {
None
};
Expression::SimilarTo(Box::new(SimilarToExpr {
this: left,
pattern,
escape,
not: true,
}))
} else if self.match_token(TokenType::RLike) {
let right = self.parse_bitwise_or()?;
let regexp_expr = Expression::RegexpLike(Box::new(RegexpFunc {
this: left,
pattern: right,
flags: None,
}));
Expression::Not(Box::new(UnaryOp::new(regexp_expr)))
} else if self.match_token(TokenType::Null) {
// SQLite: a NOT NULL (postfix form, two separate tokens)
// Creates NOT(a IS NULL) which is semantically equivalent
let is_null =
Expression::Is(Box::new(BinaryOp::new(left, Expression::Null(Null))));
Expression::Not(Box::new(UnaryOp::new(is_null)))
} else {
// NOT followed by something else - revert
return Ok(left);
}
} else if self.match_token(TokenType::In) {
// BigQuery: IN UNNEST(expr)
if self.check_identifier("UNNEST") {
self.skip(); // consume UNNEST
self.expect(TokenType::LParen)?;
let unnest_expr = self.parse_expression()?;
self.expect(TokenType::RParen)?;
Expression::In(Box::new(In {
this: left,
expressions: Vec::new(),
query: None,
not: false,
global: global_in,
unnest: Some(Box::new(unnest_expr)),
is_field: false,
}))
} else if self.match_token(TokenType::LParen) {
// Standard IN (list) or IN (subquery)
// Check if this is a subquery (IN (SELECT ...) or IN (WITH ... SELECT ...))
if self.check(TokenType::Select) || self.check(TokenType::With) {
// Use parse_statement to handle both SELECT and WITH...SELECT
let subquery = self.parse_statement()?;
self.expect(TokenType::RParen)?;
Expression::In(Box::new(In {
this: left,
expressions: Vec::new(),
query: Some(subquery),
not: false,
global: global_in,
unnest: None,
is_field: false,
}))
} else if self.check(TokenType::RParen) {
// Empty IN set: IN ()
self.skip();
Expression::In(Box::new(In {
this: left,
expressions: Vec::new(),
query: None,
not: false,
global: global_in,
unnest: None,
is_field: false,
}))
} else {
let expressions = self.parse_expression_list()?;
self.expect(TokenType::RParen)?;
Expression::In(Box::new(In {
this: left,
expressions,
query: None,
not: false,
global: global_in,
unnest: None,
is_field: false,
}))
}
} else {
// DuckDB: IN without parentheses for array/list membership: 'red' IN tbl.flags
let expr = self.parse_bitwise_or()?;
Expression::In(Box::new(In {
this: left,
expressions: vec![expr],
query: None,
not: false,
global: global_in,
unnest: None,
is_field: true,
}))
}
} else if self.match_token(TokenType::Between) {
// Check for SYMMETRIC/ASYMMETRIC qualifier
let symmetric = if self.match_texts(&["SYMMETRIC"]) {
Some(true)
} else if self.match_texts(&["ASYMMETRIC"]) {
Some(false)
} else {
None
};
let low = self.parse_bitwise_or()?;
self.expect(TokenType::And)?;
let high = self.parse_bitwise_or()?;
Expression::Between(Box::new(Between {
this: left,
low,
high,
not: false,
symmetric,
}))
} else if self.match_token(TokenType::Adjacent) {
let right = self.parse_bitwise_or()?;
Expression::Adjacent(Box::new(BinaryOp::new(left, right)))
} else if self.check(TokenType::Overlaps)
&& self.current + 1 < self.tokens.len()
&& !matches!(
self.tokens[self.current + 1].token_type,
TokenType::Semicolon
| TokenType::Comma
| TokenType::From
| TokenType::Where
| TokenType::RParen
| TokenType::As
| TokenType::Join
| TokenType::On
| TokenType::OrderBy
| TokenType::GroupBy
| TokenType::Having
| TokenType::Limit
| TokenType::Union
| TokenType::Except
| TokenType::Intersect
| TokenType::Eof
)
{
self.skip(); // consume OVERLAPS
let right = self.parse_bitwise_or()?;
Expression::Overlaps(Box::new(OverlapsExpr {
this: Some(left),
expression: Some(right),
left_start: None,
left_end: None,
right_start: None,
right_end: None,
}))
} else if self.match_token(TokenType::IsNull) {
// ISNULL postfix operator (PostgreSQL/SQLite)
Expression::IsNull(Box::new(IsNull {
this: left,
not: false,
postfix_form: true,
}))
} else if self.match_token(TokenType::NotNull) {
// NOTNULL postfix operator (PostgreSQL/SQLite)
Expression::IsNull(Box::new(IsNull {
this: left,
not: true,
postfix_form: true,
}))
} else if self.match_token(TokenType::AtAt) {
// PostgreSQL text search match operator (@@)
let right = self.parse_bitwise_or()?;
Expression::TsMatch(Box::new(BinaryOp::new(left, right)))
} else if self.match_token(TokenType::AtGt) {
// PostgreSQL array contains all operator (@>)
let right = self.parse_bitwise_or()?;
Expression::ArrayContainsAll(Box::new(BinaryOp::new(left, right)))
} else if self.match_token(TokenType::LtAt) {
// PostgreSQL array contained by operator (<@)
let right = self.parse_bitwise_or()?;
Expression::ArrayContainedBy(Box::new(BinaryOp::new(left, right)))
} else if self.match_token(TokenType::DAmp) {
// PostgreSQL array overlaps operator (&&)
let right = self.parse_bitwise_or()?;
Expression::ArrayOverlaps(Box::new(BinaryOp::new(left, right)))
} else if self.match_token(TokenType::QMarkAmp) {
// PostgreSQL JSONB contains all top keys operator (?&)
let right = self.parse_bitwise_or()?;
Expression::JSONBContainsAllTopKeys(Box::new(BinaryOp::new(left, right)))
} else if self.match_token(TokenType::QMarkPipe) {
// PostgreSQL JSONB contains any top key operator (?|)
let right = self.parse_bitwise_or()?;
Expression::JSONBContainsAnyTopKeys(Box::new(BinaryOp::new(left, right)))
} else if !matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.match_token(TokenType::Parameter)
{
// PostgreSQL JSONB contains key operator (?)
// Note: ? is tokenized as Parameter, but when used between expressions
// it's the JSONB key existence operator
// ClickHouse uses ? as ternary operator instead, handled in parse_assignment()
let right = self.parse_bitwise_or()?;
Expression::JSONBContains(Box::new(BinaryFunc {
original_name: Some("?".to_string()),
this: left,
expression: right,
inferred_type: None,
}))
} else if self.match_token(TokenType::HashDash) {
// PostgreSQL JSONB delete at path operator (#-)
let right = self.parse_bitwise_or()?;
Expression::JSONBDeleteAtPath(Box::new(BinaryOp::new(left, right)))
} else if self.match_token(TokenType::AmpLt) {
// PostgreSQL range extends left operator (&<)
let right = self.parse_bitwise_or()?;
Expression::ExtendsLeft(Box::new(BinaryOp::new(left, right)))
} else if self.match_token(TokenType::AmpGt) {
// PostgreSQL range extends right operator (&>)
let right = self.parse_bitwise_or()?;
Expression::ExtendsRight(Box::new(BinaryOp::new(left, right)))
} else if self.match_identifier("MEMBER") {
// MySQL MEMBER OF(expr) operator - JSON membership test
self.expect(TokenType::Of)?;
self.expect(TokenType::LParen)?;
let right = self.parse_expression()?;
self.expect(TokenType::RParen)?;
Expression::MemberOf(Box::new(BinaryOp::new(left, right)))
} else if self.match_token(TokenType::CaretAt) {
// DuckDB/PostgreSQL starts-with operator (^@)
let right = self.parse_bitwise_or()?;
Expression::StartsWith(Box::new(BinaryFunc {
original_name: Some("^@".to_string()),
this: left,
expression: right,
inferred_type: None,
}))
} else if self.match_token(TokenType::LrArrow) {
// PostgreSQL distance operator (<->)
let right = self.parse_bitwise_or()?;
Expression::EuclideanDistance(Box::new(EuclideanDistance {
this: Box::new(left),
expression: Box::new(right),
}))
} else if self.match_token(TokenType::Operator) {
// PostgreSQL OPERATOR(schema.op) syntax for schema-qualified operators
// Example: col1 OPERATOR(pg_catalog.~) col2
self.expect(TokenType::LParen)?;
// Collect all tokens between parentheses as the operator text
// This can include schema names, dots, and operator symbols like ~
let mut op_text = String::new();
while !self.check(TokenType::RParen) && !self.is_at_end() {
op_text.push_str(&self.peek().text);
self.skip();
}
self.expect(TokenType::RParen)?;
// Collect any inline comments (e.g., /* foo */) between OPERATOR() and the RHS
// Try trailing comments of the RParen (previous token) first,
// then leading comments of the next token
let mut comments = if self.current > 0 {
std::mem::take(&mut self.tokens[self.current - 1].trailing_comments)
} else {
Vec::new()
};
if comments.is_empty() && !self.is_at_end() {
comments = std::mem::take(&mut self.tokens[self.current].comments);
}
// Parse the right-hand side expression
let right = self.parse_bitwise_or()?;
Expression::Operator(Box::new(Operator {
this: Box::new(left),
operator: Some(Box::new(Expression::Identifier(Identifier::new(op_text)))),
expression: Box::new(right),
comments,
}))
} else {
return Ok(left);
};
left = expr;
}
}
/// Parse bitwise OR expressions (|)
fn parse_bitwise_or(&mut self) -> Result<Expression> {
let mut left = self.parse_bitwise_xor()?;
loop {
if self.match_token(TokenType::Pipe) {
let right = self.parse_bitwise_xor()?;
left = Expression::BitwiseOr(Box::new(BinaryOp::new(left, right)));
} else {
return Ok(left);
}
}
}
/// Parse bitwise operators with an existing left expression
/// Used for DuckDB's @ operator when @col is tokenized as a single Var token
/// We already have the column, now need to continue parsing any binary operators
/// Follows the same precedence chain: bitwise -> shift -> addition -> multiplication
fn parse_bitwise_continuation(&mut self, left: Expression) -> Result<Expression> {
// Start from multiplication level since we have a primary expression (col)
// Then work up through addition, shift, bitwise AND/XOR/OR
let mult_result = self.parse_multiplication_continuation(left)?;
let add_result = self.parse_addition_continuation(mult_result)?;
self.parse_bitwise_or_continuation(add_result)
}
/// Parse bitwise OR with an existing left expression
fn parse_bitwise_or_continuation(&mut self, mut left: Expression) -> Result<Expression> {
loop {
if self.match_token(TokenType::Pipe) {
let right = self.parse_bitwise_xor()?;
left = Expression::BitwiseOr(Box::new(BinaryOp::new(left, right)));
} else {
return Ok(left);
}
}
}
/// Parse multiplication/division with an existing left expression
fn parse_multiplication_continuation(&mut self, mut left: Expression) -> Result<Expression> {
loop {
let expr = if self.match_token(TokenType::Star) {
let right = self.parse_power()?;
Expression::Mul(Box::new(BinaryOp::new(left, right)))
} else if self.match_token(TokenType::Slash) {
let right = self.parse_power()?;
Expression::Div(Box::new(BinaryOp::new(left, right)))
} else if self.match_token(TokenType::Percent) {
let right = self.parse_power()?;
Expression::Mod(Box::new(BinaryOp::new(left, right)))
} else if !self.check(TokenType::QuotedIdentifier)
&& (self.match_identifier("DIV") || self.match_token(TokenType::Div))
{
// DIV keyword for integer division (Hive/Spark/MySQL/ClickHouse)
// Don't match QuotedIdentifier — `DIV` is an identifier alias, not an operator
// If DIV was matched as a Var (not keyword Div token), verify it's actually
// an operator by checking that a right operand follows. Otherwise it's an alias.
let matched_as_var = self.previous().token_type == TokenType::Var;
if matched_as_var
&& (self.is_at_end()
|| self.check(TokenType::Semicolon)
|| self.check(TokenType::From)
|| self.check(TokenType::Where)
|| self.check(TokenType::Comma)
|| self.check(TokenType::RParen))
{
// Backtrack: DIV is being used as an alias, not an operator
self.current -= 1;
return Ok(left);
}
let right = self.parse_power()?;
Expression::IntDiv(Box::new(crate::expressions::BinaryFunc {
this: left,
expression: right,
original_name: None,
inferred_type: None,
}))
} else {
return Ok(left);
};
left = expr;
}
}
/// Parse addition/subtraction with an existing left expression
fn parse_addition_continuation(&mut self, mut left: Expression) -> Result<Expression> {
loop {
let left_comments = self.previous_trailing_comments().to_vec();
let expr = if self.match_token(TokenType::Plus) {
let operator_comments = self.previous_trailing_comments().to_vec();
let right = self.parse_at_time_zone()?;
let trailing_comments = self.previous_trailing_comments().to_vec();
Expression::Add(Box::new(BinaryOp {
left,
right,
left_comments,
operator_comments,
trailing_comments,
inferred_type: None,
}))
} else if self.match_token(TokenType::Dash) {
let operator_comments = self.previous_trailing_comments().to_vec();
let right = self.parse_at_time_zone()?;
let trailing_comments = self.previous_trailing_comments().to_vec();
Expression::Sub(Box::new(BinaryOp {
left,
right,
left_comments,
operator_comments,
trailing_comments,
inferred_type: None,
}))
} else if !self.dpipe_is_logical_or() && self.match_token(TokenType::DPipe) {
let operator_comments = self.previous_trailing_comments().to_vec();
let right = self.parse_at_time_zone()?;
let trailing_comments = self.previous_trailing_comments().to_vec();
Expression::Concat(Box::new(BinaryOp {
left,
right,
left_comments,
operator_comments,
trailing_comments,
inferred_type: None,
}))
} else if self.match_token(TokenType::DQMark) {
let right = self.parse_at_time_zone()?;
Expression::Coalesce(Box::new(crate::expressions::VarArgFunc {
expressions: vec![left, right],
original_name: None,
inferred_type: None,
}))
} else {
return Ok(left);
};
left = expr;
}
}
/// Parse bitwise XOR expressions (^)
fn parse_bitwise_xor(&mut self) -> Result<Expression> {
let mut left = self.parse_bitwise_and()?;
loop {
// In PostgreSQL, ^ is POWER (handled at parse_power level), and # is BitwiseXor
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::PostgreSQL)
| Some(crate::dialects::DialectType::Redshift)
) {
if self.match_token(TokenType::Hash) {
let right = self.parse_bitwise_and()?;
left = Expression::BitwiseXor(Box::new(BinaryOp::new(left, right)));
} else {
return Ok(left);
}
} else if self.match_token(TokenType::Caret) {
let right = self.parse_bitwise_and()?;
left = Expression::BitwiseXor(Box::new(BinaryOp::new(left, right)));
} else {
return Ok(left);
}
}
}
/// Parse bitwise AND expressions (&)
fn parse_bitwise_and(&mut self) -> Result<Expression> {
let mut left = self.parse_shift()?;
loop {
if self.match_token(TokenType::Amp) {
let right = self.parse_shift()?;
left = Expression::BitwiseAnd(Box::new(BinaryOp::new(left, right)));
} else {
return Ok(left);
}
}
}
/// Parse shift expressions (<< and >>)
fn parse_shift(&mut self) -> Result<Expression> {
let mut left = self.parse_addition()?;
loop {
if self.match_token(TokenType::LtLt) {
let right = self.parse_addition()?;
left = Expression::BitwiseLeftShift(Box::new(BinaryOp::new(left, right)));
} else if self.match_token(TokenType::GtGt) {
let right = self.parse_addition()?;
left = Expression::BitwiseRightShift(Box::new(BinaryOp::new(left, right)));
} else {
return Ok(left);
}
}
}
/// Parse addition/subtraction
fn parse_addition(&mut self) -> Result<Expression> {
let mut left = self.parse_at_time_zone()?;
loop {
// Capture comments after left operand before consuming operator
let left_comments = self.previous_trailing_comments().to_vec();
let expr = if self.match_token(TokenType::Plus) {
// Capture comments after operator (before right operand)
let operator_comments = self.previous_trailing_comments().to_vec();
let right = self.parse_at_time_zone()?;
let trailing_comments = self.previous_trailing_comments().to_vec();
Expression::Add(Box::new(BinaryOp {
left,
right,
left_comments,
operator_comments,
trailing_comments,
inferred_type: None,
}))
} else if self.match_token(TokenType::Dash) {
let operator_comments = self.previous_trailing_comments().to_vec();
let right = self.parse_at_time_zone()?;
let trailing_comments = self.previous_trailing_comments().to_vec();
Expression::Sub(Box::new(BinaryOp {
left,
right,
left_comments,
operator_comments,
trailing_comments,
inferred_type: None,
}))
} else if !self.dpipe_is_logical_or() && self.match_token(TokenType::DPipe) {
let operator_comments = self.previous_trailing_comments().to_vec();
let right = self.parse_at_time_zone()?;
let trailing_comments = self.previous_trailing_comments().to_vec();
Expression::Concat(Box::new(BinaryOp {
left,
right,
left_comments,
operator_comments,
trailing_comments,
inferred_type: None,
}))
} else if self.match_token(TokenType::DQMark) {
let right = self.parse_at_time_zone()?;
Expression::Coalesce(Box::new(crate::expressions::VarArgFunc {
expressions: vec![left, right],
original_name: None,
inferred_type: None,
}))
} else {
return Ok(left);
};
left = expr;
}
}
/// Parse AT TIME ZONE expression
fn parse_at_time_zone(&mut self) -> Result<Expression> {
let mut expr = self.parse_multiplication()?;
// Check for AT TIME ZONE (can be chained)
while self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("AT") {
self.skip(); // consume AT
// Check for TIME ZONE
if self.check(TokenType::Time) {
self.skip(); // consume TIME
if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("ZONE") {
self.skip(); // consume ZONE
let zone = self.parse_unary()?;
expr = Expression::AtTimeZone(Box::new(AtTimeZone { this: expr, zone }));
} else {
return Err(self.parse_error("Expected ZONE after AT TIME"));
}
} else {
return Err(self.parse_error("Expected TIME after AT"));
}
}
Ok(expr)
}
/// Parse multiplication/division
fn parse_multiplication(&mut self) -> Result<Expression> {
let mut left = self.parse_power()?;
loop {
let expr = if self.match_token(TokenType::Star) {
let right = self.parse_power()?;
Expression::Mul(Box::new(BinaryOp::new(left, right)))
} else if self.match_token(TokenType::Slash) {
let right = self.parse_power()?;
Expression::Div(Box::new(BinaryOp::new(left, right)))
} else if self.match_token(TokenType::Percent) {
let right = self.parse_power()?;
Expression::Mod(Box::new(BinaryOp::new(left, right)))
} else if !self.check(TokenType::QuotedIdentifier)
&& (self.match_identifier("MOD") || self.match_token(TokenType::Mod))
{
// MySQL/Teradata: x MOD y (infix modulo operator)
// Don't match QuotedIdentifier — `MOD` is an identifier alias, not an operator
let right = self.parse_power()?;
Expression::Mod(Box::new(BinaryOp::new(left, right)))
} else if !self.check(TokenType::QuotedIdentifier)
&& (self.match_identifier("DIV") || self.match_token(TokenType::Div))
{
// DIV keyword for integer division (Hive/Spark/MySQL/ClickHouse)
// Don't match QuotedIdentifier — `DIV` is an identifier alias, not an operator
// If DIV was matched as a Var (not keyword Div token), verify it's actually
// an operator by checking that a right operand follows. Otherwise it's an alias.
let matched_as_var = self.previous().token_type == TokenType::Var;
if matched_as_var
&& (self.is_at_end()
|| self.check(TokenType::Semicolon)
|| self.check(TokenType::From)
|| self.check(TokenType::Where)
|| self.check(TokenType::Comma)
|| self.check(TokenType::RParen))
{
// Backtrack: DIV is being used as an alias, not an operator
self.current -= 1;
return Ok(left);
}
let right = self.parse_power()?;
Expression::IntDiv(Box::new(crate::expressions::BinaryFunc {
this: left,
expression: right,
original_name: None,
inferred_type: None,
}))
} else {
return Ok(left);
};
left = expr;
}
}
/// Parse power/exponentiation (**) operator
/// In PostgreSQL/Redshift, ^ (Caret) is POWER, not BitwiseXor
fn parse_power(&mut self) -> Result<Expression> {
let mut left = self.parse_unary()?;
loop {
if self.match_token(TokenType::DStar) {
let right = self.parse_unary()?;
left = Expression::Power(Box::new(BinaryFunc {
original_name: Some("**".to_string()),
this: left,
expression: right,
inferred_type: None,
}));
} else if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::PostgreSQL)
| Some(crate::dialects::DialectType::Redshift)
| Some(crate::dialects::DialectType::DuckDB)
) && self.match_token(TokenType::Caret)
{
let right = self.parse_unary()?;
left = Expression::Power(Box::new(BinaryFunc {
original_name: None,
this: left,
expression: right,
inferred_type: None,
}));
} else {
return Ok(left);
}
}
}
/// Try to parse a type literal expression like: point '(4,4)', timestamp '2024-01-01'
/// PostgreSQL allows type name followed by string literal as a cast shorthand.
/// Returns None if not a type literal pattern, so caller can fall through to parse_primary.
fn try_parse_type_literal(&mut self) -> Result<Option<Expression>> {
// Save position for backtracking
let start_pos = self.current;
// Check if we're at an identifier or Var token that could be a type name
if !self.check(TokenType::Identifier) && !self.check(TokenType::Var) {
return Ok(None);
}
// Get the potential type name without consuming
let type_name = self.peek().text.to_ascii_uppercase();
// Check if this looks like a known data type that supports literal syntax
// These are types where PostgreSQL allows TYPE 'value' syntax
// NOTE: DATE, TIME, TIMESTAMP, INTERVAL are NOT here because they have their own
// token types and are handled specially in parse_primary
let is_type_literal_type = matches!(
type_name.as_str(),
// Geometric types (PostgreSQL)
"POINT" | "LINE" | "LSEG" | "BOX" | "PATH" | "POLYGON" | "CIRCLE" |
// Network types (PostgreSQL)
"INET" | "CIDR" | "MACADDR" | "MACADDR8" |
// Other types that support literal syntax
"UUID" | "JSON" | "JSONB" | "XML" | "BIT" | "VARBIT" |
// Range types (PostgreSQL)
"INT4RANGE" | "INT8RANGE" | "NUMRANGE" | "TSRANGE" | "TSTZRANGE" | "DATERANGE"
);
if !is_type_literal_type {
return Ok(None);
}
// Check if the next token (after type name) is a string literal
if self.current + 1 >= self.tokens.len() {
return Ok(None);
}
if self.tokens[self.current + 1].token_type != TokenType::String {
return Ok(None);
}
// This looks like a type literal! Parse it.
// Consume the type name
self.skip();
// Try to parse the data type from the name
let data_type = match self.parse_data_type_from_name(&type_name) {
Ok(dt) => dt,
Err(_) => {
// If we can't parse the type, backtrack
self.current = start_pos;
return Ok(None);
}
};
// Parse the string literal
if !self.check(TokenType::String) {
// Backtrack - something went wrong
self.current = start_pos;
return Ok(None);
}
let string_token = self.advance();
let value = Expression::Literal(Box::new(Literal::String(string_token.text.clone())));
// JSON literal: JSON '"foo"' -> ParseJson expression (matches Python sqlglot)
if matches!(data_type, DataType::Json | DataType::JsonB)
|| matches!(type_name.as_str(), "JSON" | "JSONB")
{
return Ok(Some(Expression::ParseJson(Box::new(UnaryFunc {
this: value,
original_name: None,
inferred_type: None,
}))));
}
// Create the Cast expression
Ok(Some(Expression::Cast(Box::new(Cast {
this: value,
to: data_type,
trailing_comments: Vec::new(),
double_colon_syntax: false,
format: None,
default: None,
inferred_type: None,
}))))
}
/// Try to parse type shorthand CAST: INT 1, VARCHAR 'x', STRING 'x', TEXT 'y', etc.
/// In generic mode (no dialect), a type keyword followed by a literal becomes CAST(literal AS type).
/// This matches Python sqlglot's `_parse_types()` behavior.
fn try_parse_type_shorthand_cast(&mut self) -> Result<Option<Expression>> {
// Only apply in generic mode
let is_generic = self.config.dialect.is_none()
|| matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Generic)
);
if !is_generic {
return Ok(None);
}
let start_pos = self.current;
// Check if current token is a type keyword
if !self.is_type_keyword() {
return Ok(None);
}
// Don't apply if the type keyword is followed by a left paren (function call)
// or is not followed by a literal
if self.current + 1 >= self.tokens.len() {
return Ok(None);
}
let next_type = self.tokens[self.current + 1].token_type;
// The value after the type keyword must be a literal (number or string)
if !matches!(next_type, TokenType::Number | TokenType::String) {
return Ok(None);
}
// Get the type name
let type_token = self.advance();
let type_name = type_token.text.to_ascii_uppercase();
// Parse the data type
let data_type = match type_name.as_str() {
"INT" | "INTEGER" => DataType::Int {
length: None,
integer_spelling: type_name == "INTEGER",
},
"BIGINT" => DataType::BigInt { length: None },
"SMALLINT" => DataType::SmallInt { length: None },
"TINYINT" => DataType::TinyInt { length: None },
"FLOAT" => DataType::Float {
precision: None,
scale: None,
real_spelling: false,
},
"DOUBLE" => DataType::Double {
precision: None,
scale: None,
},
"DECIMAL" | "NUMERIC" => DataType::Decimal {
precision: None,
scale: None,
},
"REAL" => DataType::Float {
precision: None,
scale: None,
real_spelling: true,
},
"VARCHAR" => DataType::VarChar {
length: None,
parenthesized_length: false,
},
"CHAR" => DataType::Char { length: None },
"TEXT" | "STRING" => DataType::Text,
"BOOLEAN" | "BOOL" => DataType::Boolean,
"BINARY" => DataType::Binary { length: None },
"VARBINARY" => DataType::VarBinary { length: None },
_ => {
// Unknown type, backtrack
self.current = start_pos;
return Ok(None);
}
};
// Parse the literal value
let value = if self.check(TokenType::String) {
let tok = self.advance();
Expression::Literal(Box::new(Literal::String(tok.text.clone())))
} else if self.check(TokenType::Number) {
let tok = self.advance();
Expression::Literal(Box::new(Literal::Number(tok.text.clone())))
} else {
self.current = start_pos;
return Ok(None);
};
// Create the Cast expression
Ok(Some(Expression::Cast(Box::new(Cast {
this: value,
to: data_type,
trailing_comments: Vec::new(),
double_colon_syntax: false,
format: None,
default: None,
inferred_type: None,
}))))
}
/// Parse unary expressions
fn parse_unary(&mut self) -> Result<Expression> {
if self.match_token(TokenType::Plus) {
// Unary plus is a no-op - just parse the inner expression
// This handles +++1 -> 1, +-1 -> -1, etc.
self.parse_unary()
} else if self.match_token(TokenType::Dash) {
let expr = self.parse_unary()?;
Ok(Expression::Neg(Box::new(UnaryOp::new(expr))))
} else if self.match_token(TokenType::Plus) {
// Unary plus: +1, +expr — just return the inner expression (no-op)
self.parse_unary()
} else if self.match_token(TokenType::Tilde) {
let expr = self.parse_unary()?;
Ok(Expression::BitwiseNot(Box::new(UnaryOp::new(expr))))
} else if self.match_token(TokenType::DPipeSlash) {
// ||/ (Cube root - PostgreSQL)
let expr = self.parse_unary()?;
Ok(Expression::Cbrt(Box::new(UnaryFunc::with_name(
expr,
"||/".to_string(),
))))
} else if self.match_token(TokenType::PipeSlash) {
// |/ (Square root - PostgreSQL)
let expr = self.parse_unary()?;
Ok(Expression::Sqrt(Box::new(UnaryFunc::with_name(
expr,
"|/".to_string(),
))))
} else if self.check(TokenType::DAt)
&& matches!(
self.config.dialect,
Some(crate::dialects::DialectType::DuckDB)
)
{
// DuckDB @ operator: @(-1), @(expr), @-1
// @ is the ABS operator in DuckDB with low precedence
// Python sqlglot: "@": lambda self: exp.Abs(this=self._parse_bitwise())
// This means @col + 1 parses as ABS(col + 1), not ABS(col) + 1
self.skip(); // consume @
// Parse at bitwise level for correct precedence (matches Python sqlglot)
let expr = self.parse_bitwise_or()?;
Ok(Expression::Abs(Box::new(UnaryFunc::new(expr))))
} else if self.check(TokenType::Var)
&& self.peek().text.starts_with('@')
&& matches!(
self.config.dialect,
Some(crate::dialects::DialectType::DuckDB)
)
{
// DuckDB @ operator with identifier: @col, @col + 1
// Tokenizer creates "@col" as a single Var token, so we need to handle it here
// Python sqlglot: "@": lambda self: exp.Abs(this=self._parse_bitwise())
let token = self.advance(); // consume @col token
let col_name = &token.text[1..]; // strip leading @
// Create column expression for the identifier part
let col_expr = Expression::boxed_column(Column {
name: Identifier::new(col_name),
table: None,
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
});
// Check if followed by operators that should be included in the ABS
// We need to parse any remaining operators at bitwise level
// First, check if there's a binary operator after this column
if self.check(TokenType::Plus)
|| self.check(TokenType::Dash)
|| self.check(TokenType::Star)
|| self.check(TokenType::Slash)
|| self.check(TokenType::Percent)
|| self.check(TokenType::Amp)
|| self.check(TokenType::Pipe)
|| self.check(TokenType::Caret)
|| self.check(TokenType::LtLt)
|| self.check(TokenType::GtGt)
{
// There are more operators - we need to continue parsing at bitwise level
// But parse_bitwise_or expects to start fresh, not continue with existing left
// So we use a helper approach: parse_bitwise_continuation
let full_expr = self.parse_bitwise_continuation(col_expr)?;
Ok(Expression::Abs(Box::new(UnaryFunc::new(full_expr))))
} else {
// Just the column, no more operators
Ok(Expression::Abs(Box::new(UnaryFunc::new(col_expr))))
}
} else if self.check(TokenType::DAt)
&& (self.check_next(TokenType::LParen) || self.check_next(TokenType::Dash))
{
// Non-DuckDB dialects: only handle @(expr) and @-expr as ABS
self.skip(); // consume @
let expr = self.parse_bitwise_or()?;
Ok(Expression::Abs(Box::new(UnaryFunc::new(expr))))
} else if self.check(TokenType::Prior)
&& !self.check_next(TokenType::As)
&& !self.check_next(TokenType::Comma)
&& !self.check_next(TokenType::RParen)
&& !self.check_next(TokenType::Semicolon)
&& self.current + 1 < self.tokens.len()
{
// Oracle PRIOR expression - references parent row's value in hierarchical queries
// Can appear in SELECT list, CONNECT BY, or other expression contexts
// Python sqlglot: "PRIOR": lambda self: self.expression(exp.Prior, this=self._parse_bitwise())
// When followed by AS/comma/rparen/end, treat PRIOR as an identifier (column name)
self.skip(); // consume PRIOR
let expr = self.parse_bitwise_or()?;
Ok(Expression::Prior(Box::new(Prior { this: expr })))
} else {
// Try to parse type literals like: point '(4,4)', timestamp '2024-01-01', interval '1 day'
// PostgreSQL allows type name followed by string literal as a cast shorthand
if let Some(type_literal) = self.try_parse_type_literal()? {
return self.parse_postfix_operators(type_literal);
}
// Try to parse type shorthand CAST: INT 1, VARCHAR 'x', STRING 'x', TEXT 'y', etc.
// In generic mode, type keyword followed by literal -> CAST(literal AS type)
if let Some(type_cast) = self.try_parse_type_shorthand_cast()? {
return self.parse_postfix_operators(type_cast);
}
let expr = self.parse_primary()?;
// Handle postfix exclamation mark for Snowflake model attribute syntax: model!PREDICT(...)
self.parse_postfix_operators(expr)
}
}
/// Parse postfix operators like ! (model attribute in Snowflake) and : (JSON path in Snowflake)
fn parse_postfix_operators(&mut self, mut expr: Expression) -> Result<Expression> {
// Handle Oracle/Redshift outer join marker (+) after column reference
// Syntax: column_ref (+) indicates optional side of join
if self.check(TokenType::LParen) && self.check_next(TokenType::Plus) {
// Look ahead to verify it's ( + )
let saved_pos = self.current;
if self.match_token(TokenType::LParen)
&& self.match_token(TokenType::Plus)
&& self.match_token(TokenType::RParen)
{
// Set join_mark on the column expression
if let Expression::Column(ref mut col) = expr {
col.join_mark = true;
}
} else {
self.current = saved_pos;
}
}
// Handle EXCLAMATION for Snowflake model attribute syntax: model!PREDICT(...)
while self.match_token(TokenType::Exclamation) {
// Parse the attribute/function after the exclamation mark
// This can be either a simple identifier (model!admin) or a function call (model!PREDICT(1))
let attr = self.parse_primary()?;
expr = Expression::ModelAttribute(Box::new(ModelAttribute {
this: Box::new(expr),
expression: Box::new(attr),
}));
}
// Handle COLON for Snowflake JSON path extraction: a:field or a:field.subfield
// This creates JSONExtract expressions that transform to GET_PATH(a, 'field') in Snowflake
expr = self.parse_colon_json_path(expr)?;
// Handle DCOLON (::) - in SingleStore it's JSON extraction, in other dialects it's cast
// SingleStore JSON path syntax:
// a::b -> JSON_EXTRACT_JSON(a, 'b')
// a::$b -> JSON_EXTRACT_STRING(a, 'b')
// a::%b -> JSON_EXTRACT_DOUBLE(a, 'b')
// a::?names -> JSON match syntax
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::SingleStore)
) {
expr = self.parse_singlestore_json_path(expr)?;
} else {
// For other dialects, :: is cast syntax
// IMPORTANT: Use parse_data_type_for_cast to avoid consuming subscripts as array dimensions
// e.g., ::VARIANT[0] should be cast to VARIANT followed by subscript [0]
while self.match_token(TokenType::DColon) {
let data_type = self.parse_data_type_for_cast()?;
expr = Expression::Cast(Box::new(Cast {
this: expr,
to: data_type,
trailing_comments: Vec::new(),
double_colon_syntax: true,
format: None,
default: None,
inferred_type: None,
}));
}
}
// Teradata: (FORMAT '...') phrase after an expression
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Teradata)
) && self.check(TokenType::LParen)
&& self.check_next(TokenType::Format)
{
self.skip(); // consume (
self.skip(); // consume FORMAT
let format = self.expect_string()?;
self.expect(TokenType::RParen)?;
expr = Expression::FormatPhrase(Box::new(FormatPhrase {
this: Box::new(expr),
format,
}));
}
Ok(expr)
}
/// Parse SingleStore JSON path extraction syntax
/// Examples:
/// a::b -> JSON_EXTRACT_JSON(a, 'b')
/// a::$b -> JSON_EXTRACT_STRING(a, 'b')
/// a::%b -> JSON_EXTRACT_DOUBLE(a, 'b')
/// a::`b`::`2` -> nested JSON extraction
fn parse_singlestore_json_path(&mut self, mut expr: Expression) -> Result<Expression> {
loop {
if self.match_token(TokenType::DColon) {
// :: followed by identifier -> JSON_EXTRACT_JSON
// Check if next is a backtick-quoted identifier or regular identifier
let path_key = if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
self.advance().text
} else if self.check(TokenType::Number) {
// a::2 -> JSON_EXTRACT_JSON(a, '2')
self.advance().text
} else {
return Err(self.parse_error("Expected identifier after ::"));
};
expr = Expression::Function(Box::new(Function::new(
"JSON_EXTRACT_JSON".to_string(),
vec![expr, Expression::string(&path_key)],
)));
} else if self.match_token(TokenType::DColonDollar) {
// ::$ followed by identifier -> JSON_EXTRACT_STRING
let path_key = if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
self.advance().text
} else {
return Err(self.parse_error("Expected identifier after ::$"));
};
expr = Expression::Function(Box::new(Function::new(
"JSON_EXTRACT_STRING".to_string(),
vec![expr, Expression::string(&path_key)],
)));
} else if self.match_token(TokenType::DColonPercent) {
// ::% followed by identifier -> JSON_EXTRACT_DOUBLE
let path_key = if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
self.advance().text
} else {
return Err(self.parse_error("Expected identifier after ::%"));
};
expr = Expression::Function(Box::new(Function::new(
"JSON_EXTRACT_DOUBLE".to_string(),
vec![expr, Expression::string(&path_key)],
)));
} else if self.match_token(TokenType::DColonQMark) {
// ::? followed by identifier -> Keep as JSONMatchAny expression for now
let path_key = if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
self.advance().text
} else {
return Err(self.parse_error("Expected identifier after ::?"));
};
// For now, create a function that will be handled specially
expr = Expression::Function(Box::new(Function::new(
"JSON_EXTRACT_JSON".to_string(), // placeholder
vec![expr, Expression::string(&format!("?{}", path_key))],
)));
} else {
break;
}
}
Ok(expr)
}
/// Parse colon-separated JSON path syntax (Snowflake variant extraction)
/// Examples:
/// a:from -> GET_PATH(a, 'from')
/// a:b.c.d -> GET_PATH(a, 'b.c.d')
/// a:from::STRING -> CAST(GET_PATH(a, 'from') AS VARCHAR)
/// a:b:c.d -> GET_PATH(a, 'b.c.d') (multiple colons joined into single path)
fn parse_colon_json_path(&mut self, mut this: Expression) -> Result<Expression> {
// DuckDB uses colon for prefix alias syntax (e.g., "alias: expr" means "expr AS alias")
// Skip JSON path extraction for DuckDB - it's handled separately in parse_select_expressions
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::DuckDB)
) {
return Ok(this);
}
// ClickHouse uses : as part of the ternary operator (condition ? true : false)
// Skip JSON path extraction for ClickHouse to avoid consuming the ternary separator
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
return Ok(this);
}
// Only apply colon JSON path parsing to identifiers, columns, and function results
// This prevents {'key': 'value'} object literals from being misinterpreted
let is_valid_json_path_base = matches!(
&this,
Expression::Column(_) |
Expression::Identifier(_) |
Expression::Dot(_) |
Expression::JSONExtract(_) | // Allow chained paths like a:b:c
Expression::Function(_) | // Allow function results like PARSE_JSON(...):x
Expression::ParseJson(_) | // Allow PARSE_JSON specifically
Expression::Parameter(_) // Allow positional params like $1:name
);
if !is_valid_json_path_base {
return Ok(this);
}
// Check if we have a colon (but NOT double-colon which is cast syntax)
if !self.check(TokenType::Colon) {
return Ok(this);
}
// Make sure this is not a double-colon (::) which is cast syntax
if self.check_next(TokenType::Colon) {
// This is :: (DColon should have been tokenized, but just in case)
return Ok(this);
}
// Collect ALL the JSON path parts across multiple colons
// a:b.c:d.e -> GET_PATH(a, 'b.c.d.e')
// a:b[0].c -> GET_PATH(a, 'b[0].c')
let mut path_string = String::new();
// Parse all colon-separated path segments
while self.check(TokenType::Colon) && !self.check_next(TokenType::Colon) {
// Save position before consuming colon so we can backtrack
// if what follows isn't a valid JSON path component (e.g., DuckDB's "foo: 1" label syntax)
let saved_pos = self.current;
let saved_path_len = path_string.len();
// Consume the colon
self.skip();
// Parse first path component (required) - can be any identifier including keywords
// Also handle backtick-quoted identifiers like `zip code` or `fb:testid`
// Also handle bracket notation directly after colon: c1:['price'] or c1:["foo bar"]
// IMPORTANT: Check QuotedIdentifier FIRST since is_identifier_token() includes QuotedIdentifier
let mut had_initial_component = false;
if self.check(TokenType::QuotedIdentifier) {
// Quoted field name in variant access
// Snowflake: v:"fruit" → double-quoted key → stored as plain text 'fruit'
// Databricks: raw:`zip code` → backtick-quoted key → stored as bracket notation '["zip code"]'
let quoted_name = self.advance().text.clone();
let is_snowflake = matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Snowflake)
);
let needs_bracket = quoted_name.contains(' ') || quoted_name.contains('\'');
if is_snowflake && !needs_bracket {
// Snowflake double-quoted keys without special chars are stored as plain text
// Add dot separator for plain segments
if !path_string.is_empty() {
path_string.push('.');
}
path_string.push_str("ed_name);
} else if is_snowflake && needs_bracket {
// Snowflake keys with spaces/apostrophes use bracket notation: ["key with spaces"]
// No dot before bracket notation
path_string.push_str("[\"");
// Don't escape single quotes here - the generator will handle escaping
// when outputting the string literal
path_string.push_str("ed_name);
path_string.push_str("\"]");
} else {
// Other dialects (Databricks): wrap in bracket notation
// No dot before bracket notation
path_string.push_str("[\"");
for c in quoted_name.chars() {
if c == '"' {
path_string.push_str("\\\"");
} else {
path_string.push(c);
}
}
path_string.push_str("\"]");
}
had_initial_component = true;
} else if self.is_identifier_token()
|| self.is_safe_keyword_as_identifier()
|| self.is_reserved_keyword_as_identifier()
{
// Add a dot separator for plain identifier segments
if !path_string.is_empty() {
path_string.push('.');
}
let first_part = self.advance().text;
path_string.push_str(&first_part);
had_initial_component = true;
} else if self.check(TokenType::LBracket) {
// Bracket notation directly after colon: c1:['price'] or c1:["foo bar"]
// Mark that we have a valid path start - the bracket will be parsed in the loop below
had_initial_component = true;
}
if !had_initial_component {
// Not a valid JSON path component - backtrack and stop
// This handles cases like DuckDB's "foo: 1" label/alias syntax
// where the colon is followed by a non-identifier (e.g., a number)
self.current = saved_pos;
path_string.truncate(saved_path_len);
break;
}
// Parse optional array indices and additional path components
loop {
// Handle array index: [0], [1], [*], ['key'], ["key"], etc.
if self.match_token(TokenType::LBracket) {
// Parse the index expression (typically a number, identifier, * for wildcard, or string key)
if self.check(TokenType::Number) {
path_string.push('[');
let idx = self.advance().text;
path_string.push_str(&idx);
self.expect(TokenType::RBracket)?;
path_string.push(']');
} else if self.check(TokenType::Star) {
// Wildcard array access: [*] matches all array elements
path_string.push('[');
self.skip();
path_string.push('*');
self.expect(TokenType::RBracket)?;
path_string.push(']');
} else if self.check(TokenType::String) {
// Single-quoted string key access: ['bicycle']
// Convert to dot notation for simple keys, keep bracket notation for keys with spaces
let key = self.advance().text;
self.expect(TokenType::RBracket)?;
// Check if the key contains spaces or special characters that require bracket notation
let needs_brackets =
key.contains(' ') || key.contains('"') || key.contains('\'');
if needs_brackets {
// Keep bracket notation with double quotes: ["zip code"]
path_string.push_str("[\"");
for c in key.chars() {
if c == '"' {
path_string.push_str("\\\"");
} else {
path_string.push(c);
}
}
path_string.push_str("\"]");
} else {
// Convert to dot notation: store['bicycle'] -> store.bicycle
// But only add dot if path_string is not empty (handles c1:['price'] -> c1:price)
if !path_string.is_empty() {
path_string.push('.');
}
path_string.push_str(&key);
}
} else if self.check(TokenType::QuotedIdentifier) {
// Double-quoted string key access: ["zip code"]
// These are tokenized as QuotedIdentifier, not String
// Must be checked BEFORE is_identifier_token() since it includes QuotedIdentifier
let key = self.advance().text;
self.expect(TokenType::RBracket)?;
// Always use bracket notation with double quotes for quoted identifiers
path_string.push_str("[\"");
for c in key.chars() {
if c == '"' {
path_string.push_str("\\\"");
} else {
path_string.push(c);
}
}
path_string.push_str("\"]");
} else if self.is_identifier_token() {
// Check if this is a "dynamic bracket" — a column reference like s.x
// inside brackets. We detect this by checking if the identifier is
// followed by a dot (making it a qualified column reference).
let saved_bracket_pos = self.current;
let ident_text = self.advance().text.clone();
if self.check(TokenType::Dot) {
// Dynamic bracket: [s.x] where s.x is a column reference
// Backtrack to before the identifier so we can parse the full expression
self.current = saved_bracket_pos;
// Parse the full expression inside the brackets
let index_expr = self.parse_expression()?;
self.expect(TokenType::RBracket)?;
// Build JSONExtract for the path accumulated so far
let path_expr =
Expression::Literal(Box::new(Literal::String(path_string)));
let json_extract = Expression::JSONExtract(Box::new(JSONExtract {
this: Box::new(this),
expression: Box::new(path_expr),
only_json_types: None,
expressions: Vec::new(),
variant_extract: Some(Box::new(Expression::Boolean(
BooleanLiteral { value: true },
))),
json_query: None,
option: None,
quote: None,
on_condition: None,
requires_json: None,
}));
// Wrap in Subscript
let subscript = Expression::Subscript(Box::new(Subscript {
this: json_extract,
index: index_expr,
}));
// Now continue parsing any remaining path after the dynamic bracket.
// This handles patterns like [s.x].r.d or [s.x]:r or [s.x].r.d[s.y]
// We parse dots into a new path string, and if we encounter another
// dynamic bracket, we recurse.
let mut suffix_path = String::new();
loop {
if self.match_token(TokenType::Dot) {
// Dot access after dynamic bracket: [s.x].r.d
if !suffix_path.is_empty() {
suffix_path.push('.');
}
if self.is_identifier_token()
|| self.is_safe_keyword_as_identifier()
|| self.is_reserved_keyword_as_identifier()
{
let part = self.advance().text;
suffix_path.push_str(&part);
} else {
return Err(self.parse_error(
"Expected identifier after . in JSON path",
));
}
} else if self.check(TokenType::LBracket) {
// Another bracket after dot path: [s.x].r.d[s.y]
// We need to check if this bracket contains a dynamic expression
break;
} else {
break;
}
}
// Build the result depending on whether there are suffix dot paths
let result_base = if suffix_path.is_empty() {
subscript
} else {
// Create another JSONExtract for the suffix path
Expression::JSONExtract(Box::new(JSONExtract {
this: Box::new(subscript),
expression: Box::new(Expression::Literal(Box::new(
Literal::String(suffix_path),
))),
only_json_types: None,
expressions: Vec::new(),
variant_extract: Some(Box::new(Expression::Boolean(
BooleanLiteral { value: true },
))),
json_query: None,
option: None,
quote: None,
on_condition: None,
requires_json: None,
}))
};
// Check for another bracket (e.g., [s.y] after .r.d)
if self.match_token(TokenType::LBracket) {
// Parse the index expression
let index_expr2 = self.parse_expression()?;
self.expect(TokenType::RBracket)?;
let subscript2 = Expression::Subscript(Box::new(Subscript {
this: result_base,
index: index_expr2,
}));
// Update `this` and `path_string` so we properly continue the outer loop
this = subscript2;
path_string = String::new();
} else {
this = result_base;
path_string = String::new();
}
// Continue parsing more colon segments or break
// Need to break out of the inner loop to let the outer while loop
// check for more colon segments
break;
} else {
// Simple identifier index: [idx]
path_string.push('[');
path_string.push_str(&ident_text);
self.expect(TokenType::RBracket)?;
path_string.push(']');
}
} else {
// Empty brackets or unexpected token - just close the bracket
path_string.push('[');
self.expect(TokenType::RBracket)?;
path_string.push(']');
}
} else if self.match_token(TokenType::Dot) {
// Handle dot access
path_string.push('.');
if self.is_identifier_token()
|| self.is_safe_keyword_as_identifier()
|| self.is_reserved_keyword_as_identifier()
{
let part = self.advance().text;
path_string.push_str(&part);
} else {
return Err(self.parse_error("Expected identifier after . in JSON path"));
}
} else {
break;
}
}
}
// If no path was parsed (e.g., backtracked on first colon), return the original expression
if path_string.is_empty() {
return Ok(this);
}
// Create the JSONExtract expression with variant_extract marker
let path_expr = Expression::Literal(Box::new(Literal::String(path_string)));
let json_extract = Expression::JSONExtract(Box::new(JSONExtract {
this: Box::new(this),
expression: Box::new(path_expr),
only_json_types: None,
expressions: Vec::new(),
variant_extract: Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
}))),
json_query: None,
option: None,
quote: None,
on_condition: None,
requires_json: None,
}));
Ok(json_extract)
}
/// Check if the current token is a reserved keyword that can be used as identifier in JSON path
fn is_reserved_keyword_as_identifier(&self) -> bool {
if self.is_at_end() {
return false;
}
let token = self.peek();
// Allow reserved keywords like FROM, SELECT, etc. as JSON path components
matches!(
token.token_type,
TokenType::From
| TokenType::Select
| TokenType::Where
| TokenType::And
| TokenType::Or
| TokenType::Not
| TokenType::In
| TokenType::As
| TokenType::On
| TokenType::Join
| TokenType::Left
| TokenType::Right
| TokenType::Inner
| TokenType::Outer
| TokenType::Cross
| TokenType::Full
| TokenType::Group
| TokenType::Order
| TokenType::By
| TokenType::Having
| TokenType::Limit
| TokenType::Offset
| TokenType::Union
| TokenType::Except
| TokenType::Intersect
| TokenType::All
| TokenType::Distinct
| TokenType::Case
| TokenType::When
| TokenType::Then
| TokenType::Else
| TokenType::End
| TokenType::Null
| TokenType::True
| TokenType::False
| TokenType::Between
| TokenType::Like
| TokenType::Is
| TokenType::Exists
| TokenType::Insert
| TokenType::Update
| TokenType::Delete
| TokenType::Create
| TokenType::Alter
| TokenType::Drop
| TokenType::Table
| TokenType::View
| TokenType::Index
| TokenType::Set
| TokenType::Values
| TokenType::Into
| TokenType::Default
| TokenType::Key
| TokenType::Unique
| TokenType::Check
| TokenType::Constraint
| TokenType::References
)
}
fn maybe_parse_clickhouse_array_alias(&mut self, expr: Expression) -> Result<Expression> {
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::As)
&& !self.check_next(TokenType::RBracket)
{
self.skip(); // consume AS
let alias = self.expect_identifier()?;
Ok(Expression::Alias(Box::new(Alias::new(
expr,
Identifier::new(alias),
))))
} else {
Ok(expr)
}
}
fn build_bracket_array(expressions: Vec<Expression>) -> Expression {
Expression::ArrayFunc(Box::new(ArrayConstructor {
expressions,
bracket_notation: true,
use_list_keyword: false,
}))
}
fn parse_bracket_array_literal(&mut self) -> Result<Expression> {
struct ArrayFrame {
expressions: Vec<Expression>,
}
let mut frames = vec![ArrayFrame {
expressions: Vec::new(),
}];
let mut completed_expr: Option<Expression> = None;
loop {
if let Some(expr) = completed_expr.take() {
let expr = self.maybe_parse_clickhouse_array_alias(expr)?;
let expr = self.maybe_parse_subscript(expr)?;
if let Some(frame) = frames.last_mut() {
frame.expressions.push(expr);
} else {
return Err(self.parse_error("Unbalanced array literal"));
}
if self.match_token(TokenType::Comma) {
continue;
}
self.expect(TokenType::RBracket)?;
let frame = frames
.pop()
.ok_or_else(|| self.parse_error("Unbalanced array literal"))?;
let array_expr = Self::build_bracket_array(frame.expressions);
if frames.is_empty() {
return self.maybe_parse_subscript(array_expr);
}
completed_expr = Some(array_expr);
continue;
}
if self.match_token(TokenType::RBracket) {
let frame = frames
.pop()
.ok_or_else(|| self.parse_error("Unbalanced array literal"))?;
let array_expr = Self::build_bracket_array(frame.expressions);
if frames.is_empty() {
return self.maybe_parse_subscript(array_expr);
}
completed_expr = Some(array_expr);
continue;
}
if self.match_token(TokenType::LBracket) {
frames.push(ArrayFrame {
expressions: Vec::new(),
});
continue;
}
let expr = self.parse_expression()?;
if frames
.last()
.is_some_and(|frame| frame.expressions.is_empty())
&& self.match_token(TokenType::For)
{
let loop_var = self.parse_primary()?;
let position = if self.match_token(TokenType::Comma) {
Some(self.parse_primary()?)
} else {
None
};
if !self.match_token(TokenType::In) {
return Err(self.parse_error("Expected IN in comprehension"));
}
let iterator = self.parse_expression()?;
let condition = if self.match_token(TokenType::If) {
Some(self.parse_expression()?)
} else {
None
};
self.expect(TokenType::RBracket)?;
let comprehension = Expression::Comprehension(Box::new(Comprehension {
this: Box::new(expr),
expression: Box::new(loop_var),
position: position.map(Box::new),
iterator: Some(Box::new(iterator)),
condition: condition.map(Box::new),
}));
if frames.len() == 1 {
return Ok(comprehension);
}
let frame = frames
.pop()
.ok_or_else(|| self.parse_error("Unbalanced array comprehension"))?;
debug_assert!(frame.expressions.is_empty());
completed_expr = Some(comprehension);
continue;
}
completed_expr = Some(expr);
}
}
fn parse_parenthesized_primary(&mut self, lparen_comments: Vec<String>) -> Result<Expression> {
// Empty parens () — could be empty tuple or zero-param lambda () -> body
if self.check(TokenType::RParen) {
self.skip(); // consume )
// Check for lambda: () -> body
if self.match_token(TokenType::Arrow) || self.match_token(TokenType::FArrow) {
let body = self.parse_expression()?;
return Ok(Expression::Lambda(Box::new(LambdaExpr {
parameters: Vec::new(),
body,
colon: false,
parameter_types: Vec::new(),
})));
}
// Otherwise empty tuple
return self.maybe_parse_subscript(Expression::Tuple(Box::new(Tuple {
expressions: Vec::new(),
})));
}
// Check if this is a VALUES expression inside parens: (VALUES ...)
if self.check(TokenType::Values) {
let values = self.parse_values()?;
self.expect(TokenType::RParen)?;
return Ok(Expression::Subquery(Box::new(Subquery {
this: values,
alias: None,
column_aliases: Vec::new(),
order_by: None,
limit: None,
offset: None,
distribute_by: None,
sort_by: None,
cluster_by: None,
lateral: false,
modifiers_inside: false,
trailing_comments: self.previous_trailing_comments().to_vec(),
inferred_type: None,
})));
}
// Check if this is a subquery (SELECT, WITH, DuckDB FROM-first, or ClickHouse EXPLAIN)
let is_explain_subquery = self.check(TokenType::Var)
&& self.peek().text.eq_ignore_ascii_case("EXPLAIN")
&& self.peek_nth(1).map_or(false, |t| {
matches!(
t.token_type,
TokenType::Select
| TokenType::Insert
| TokenType::Create
| TokenType::Alter
| TokenType::Drop
| TokenType::Set
| TokenType::System
| TokenType::Table
) || matches!(
t.text.to_ascii_uppercase().as_str(),
"SYNTAX" | "AST" | "PLAN" | "PIPELINE" | "ESTIMATE" | "CURRENT" | "QUERY"
) || (t.token_type == TokenType::Var
&& self
.peek_nth(2)
.map_or(false, |t2| t2.token_type == TokenType::Eq))
});
// ClickHouse: (from, to, ...) -> body is a tuple-lambda with keyword params
// Detect pattern: (keyword/ident, keyword/ident, ...) ->
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
let mut look = self.current;
let mut is_tuple_lambda = true;
let mut param_count = 0;
loop {
if look >= self.tokens.len() {
is_tuple_lambda = false;
break;
}
let tt = self.tokens[look].token_type;
if tt == TokenType::Identifier
|| tt == TokenType::Var
|| tt == TokenType::QuotedIdentifier
|| tt.is_keyword()
{
param_count += 1;
look += 1;
} else {
is_tuple_lambda = false;
break;
}
if look >= self.tokens.len() {
is_tuple_lambda = false;
break;
}
if self.tokens[look].token_type == TokenType::Comma {
look += 1;
} else if self.tokens[look].token_type == TokenType::RParen {
look += 1;
break;
} else {
is_tuple_lambda = false;
break;
}
}
if is_tuple_lambda
&& param_count >= 1
&& look < self.tokens.len()
&& self.tokens[look].token_type == TokenType::Arrow
{
let mut params = Vec::new();
loop {
let tok = self.advance();
params.push(Identifier::new(tok.text));
if self.match_token(TokenType::Comma) {
continue;
}
break;
}
self.expect(TokenType::RParen)?;
self.expect(TokenType::Arrow)?;
let body = self.parse_expression()?;
return Ok(Expression::Lambda(Box::new(LambdaExpr {
parameters: params,
body,
colon: false,
parameter_types: Vec::new(),
})));
}
}
if self.check(TokenType::Select)
|| self.check(TokenType::With)
|| self.check(TokenType::From)
|| is_explain_subquery
{
let query = self.parse_statement()?;
let limit = if self.match_token(TokenType::Limit) {
Some(Limit {
this: self.parse_expression()?,
percent: false,
comments: Vec::new(),
})
} else {
None
};
let offset = if self.match_token(TokenType::Offset) {
Some(Offset {
this: self.parse_expression()?,
rows: None,
})
} else {
None
};
self.expect(TokenType::RParen)?;
let subquery = if limit.is_some() || offset.is_some() {
Expression::Subquery(Box::new(Subquery {
this: query,
alias: None,
column_aliases: Vec::new(),
order_by: None,
limit,
offset,
distribute_by: None,
sort_by: None,
cluster_by: None,
lateral: false,
modifiers_inside: true,
trailing_comments: self.previous_trailing_comments().to_vec(),
inferred_type: None,
}))
} else {
Expression::Subquery(Box::new(Subquery {
this: query,
alias: None,
column_aliases: Vec::new(),
order_by: None,
limit: None,
offset: None,
distribute_by: None,
sort_by: None,
cluster_by: None,
lateral: false,
modifiers_inside: false,
trailing_comments: self.previous_trailing_comments().to_vec(),
inferred_type: None,
}))
};
let set_result = self.parse_set_operation(subquery)?;
let had_set_operation = matches!(
&set_result,
Expression::Union(_) | Expression::Intersect(_) | Expression::Except(_)
);
let result = if had_set_operation {
let order_by = if self.check(TokenType::Order) {
self.expect(TokenType::Order)?;
self.expect(TokenType::By)?;
Some(self.parse_order_by()?)
} else {
None
};
let limit_after = if self.match_token(TokenType::Limit) {
Some(Limit {
this: self.parse_expression()?,
percent: false,
comments: Vec::new(),
})
} else {
None
};
let offset_after = if self.match_token(TokenType::Offset) {
Some(Offset {
this: self.parse_expression()?,
rows: None,
})
} else {
None
};
if order_by.is_some() || limit_after.is_some() || offset_after.is_some() {
Expression::Subquery(Box::new(Subquery {
this: set_result,
alias: None,
column_aliases: Vec::new(),
order_by,
limit: limit_after,
offset: offset_after,
lateral: false,
modifiers_inside: false,
trailing_comments: Vec::new(),
distribute_by: None,
sort_by: None,
cluster_by: None,
inferred_type: None,
}))
} else {
set_result
}
} else {
set_result
};
return self.maybe_parse_subscript(result);
}
if self.check(TokenType::LParen) {
let expr = self.parse_expression()?;
let first_expr = if self.match_token(TokenType::As) {
let alias = self.expect_identifier_or_alias_keyword_with_quoted()?;
Expression::Alias(Box::new(Alias::new(expr, alias)))
} else {
expr
};
if self.match_token(TokenType::Comma) {
let mut expressions = vec![first_expr];
loop {
if self.check(TokenType::RParen) {
break;
}
let elem = self.parse_expression()?;
let elem = if self.match_token(TokenType::As) {
let alias = self.expect_identifier_or_keyword()?;
Expression::Alias(Box::new(Alias::new(elem, Identifier::new(alias))))
} else {
elem
};
expressions.push(elem);
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
let tuple_expr = Expression::Tuple(Box::new(Tuple { expressions }));
return self.maybe_parse_subscript(tuple_expr);
}
let result = first_expr;
self.expect(TokenType::RParen)?;
let mut nested_paren_comments = lparen_comments.clone();
nested_paren_comments.extend_from_slice(self.previous_trailing_comments());
if self.check(TokenType::Union)
|| self.check(TokenType::Intersect)
|| self.check(TokenType::Except)
{
if let Expression::Subquery(subq) = &result {
let set_result = self.parse_set_operation(subq.this.clone())?;
let order_by = if self.check(TokenType::Order) {
self.expect(TokenType::Order)?;
self.expect(TokenType::By)?;
Some(self.parse_order_by()?)
} else {
None
};
let limit = if self.match_token(TokenType::Limit) {
Some(Limit {
this: self.parse_expression()?,
percent: false,
comments: Vec::new(),
})
} else {
None
};
let offset = if self.match_token(TokenType::Offset) {
Some(Offset {
this: self.parse_expression()?,
rows: None,
})
} else {
None
};
return Ok(Expression::Subquery(Box::new(Subquery {
this: set_result,
alias: None,
column_aliases: Vec::new(),
order_by,
limit,
offset,
lateral: false,
modifiers_inside: false,
trailing_comments: Vec::new(),
distribute_by: None,
sort_by: None,
cluster_by: None,
inferred_type: None,
})));
}
}
return self.maybe_parse_over(Expression::Paren(Box::new(Paren {
this: result,
trailing_comments: nested_paren_comments,
})));
}
let expr = self.parse_expression()?;
let first_expr = if self.match_token(TokenType::As) {
let alias = self.expect_identifier_or_keyword_with_quoted()?;
Expression::Alias(Box::new(Alias::new(expr, alias)))
} else {
expr
};
if self.match_token(TokenType::Comma) {
let mut expressions = vec![first_expr];
if self.check(TokenType::RParen) {
self.skip();
let tuple_expr = Expression::Tuple(Box::new(Tuple { expressions }));
return self.maybe_parse_subscript(tuple_expr);
}
loop {
let elem = self.parse_expression()?;
let elem_with_alias = if self.match_token(TokenType::As) {
let alias = self.expect_identifier_or_keyword_with_quoted()?;
Expression::Alias(Box::new(Alias::new(elem, alias)))
} else {
elem
};
expressions.push(elem_with_alias);
if !self.match_token(TokenType::Comma) {
break;
}
if self.check(TokenType::RParen) {
break;
}
}
self.expect(TokenType::RParen)?;
if self.match_token(TokenType::Arrow) {
let parameters = expressions
.into_iter()
.filter_map(|e| {
if let Expression::Column(c) = e {
Some(c.name)
} else if let Expression::Identifier(id) = e {
Some(id)
} else {
None
}
})
.collect();
let body = self.parse_expression()?;
return Ok(Expression::Lambda(Box::new(LambdaExpr {
parameters,
body,
colon: false,
parameter_types: Vec::new(),
})));
}
let tuple_expr = Expression::Tuple(Box::new(Tuple { expressions }));
let result = if self.check(TokenType::As) {
let after_as = self.current + 1;
let after_ident = self.current + 2;
let is_type_constructor = after_ident < self.tokens.len()
&& (self.tokens[after_as].token_type == TokenType::Identifier
|| self.tokens[after_as].token_type == TokenType::Var
|| self.tokens[after_as].token_type == TokenType::Nullable
|| self.tokens[after_as].token_type == TokenType::Struct
|| self.tokens[after_as].token_type == TokenType::Array)
&& (self.tokens[after_ident].token_type == TokenType::LParen
|| self.tokens[after_ident].token_type == TokenType::Lt);
let is_cast_type = after_ident < self.tokens.len()
&& (self.tokens[after_as].token_type == TokenType::Identifier
|| self.tokens[after_as].token_type == TokenType::Var
|| self.tokens[after_as].token_type.is_keyword())
&& self.tokens[after_ident].token_type == TokenType::RParen;
if is_type_constructor || is_cast_type {
tuple_expr
} else {
self.skip();
let alias = self.expect_identifier()?;
Expression::Alias(Box::new(Alias::new(tuple_expr, Identifier::new(alias))))
}
} else {
tuple_expr
};
return self.maybe_parse_subscript(result);
}
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.match_token(TokenType::Arrow)
{
let parameters = if let Expression::Column(c) = first_expr {
vec![c.name]
} else if let Expression::Identifier(id) = first_expr {
vec![id]
} else {
return Err(self.parse_error("Expected identifier as lambda parameter"));
};
let body = self.parse_expression()?;
self.expect(TokenType::RParen)?;
return Ok(Expression::Paren(Box::new(Paren {
this: Expression::Lambda(Box::new(LambdaExpr {
parameters,
body,
colon: false,
parameter_types: Vec::new(),
})),
trailing_comments: Vec::new(),
})));
}
self.expect(TokenType::RParen)?;
let mut paren_comments = lparen_comments.clone();
paren_comments.extend_from_slice(self.previous_trailing_comments());
if self.match_token(TokenType::Arrow) {
let parameters = if let Expression::Column(c) = first_expr {
vec![c.name]
} else if let Expression::Identifier(id) = first_expr {
vec![id]
} else {
return Err(self.parse_error("Expected identifier as lambda parameter"));
};
let body = self.parse_expression()?;
return Ok(Expression::Lambda(Box::new(LambdaExpr {
parameters,
body,
colon: false,
parameter_types: Vec::new(),
})));
}
self.maybe_parse_over(Expression::Paren(Box::new(Paren {
this: first_expr,
trailing_comments: paren_comments,
})))
}
fn parse_identifier_primary(&mut self) -> Result<Expression> {
// Check for no-paren functions like CURRENT_TIMESTAMP, CURRENT_DATE, etc.
// These should be parsed as functions even without parentheses
let upper_name = self.peek().text.to_ascii_uppercase();
if !self.check_next(TokenType::LParen)
&& !self.check_next(TokenType::Dot)
&& crate::function_registry::is_no_paren_function_name_upper(upper_name.as_str())
&& !(matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && upper_name.as_str() == "CURRENT_TIMESTAMP")
{
let token = self.advance();
let func = Expression::Function(Box::new(Function {
name: token.text.clone(),
args: Vec::new(),
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: true,
quoted: false,
span: None,
inferred_type: None,
}));
return self.maybe_parse_subscript(func);
}
let ident = self.expect_identifier_with_quoted()?;
let name = ident.name.clone();
let quoted = ident.quoted;
let is_teradata_format_phrase = matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Teradata)
) && self.check(TokenType::LParen)
&& self.check_next(TokenType::Format);
if !is_teradata_format_phrase && self.match_token(TokenType::LParen) {
let upper_name = name.to_ascii_uppercase();
let canonical_upper_name =
crate::function_registry::canonical_typed_function_name_upper(&upper_name);
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && Self::is_clickhouse_simple_function_chain_name(canonical_upper_name)
{
let func_expr = self.parse_clickhouse_simple_function_chain(name, quoted)?;
return self.maybe_parse_over(func_expr);
}
let func_expr = self.parse_typed_function(&name, &upper_name, quoted)?;
let func_expr = self.maybe_parse_clickhouse_parameterized_agg(func_expr)?;
return self.maybe_parse_over(func_expr);
}
if self.match_token(TokenType::Dot) {
if self.match_token(TokenType::Star) {
let star = self.parse_star_modifiers(Some(ident))?;
let mut star_expr = Expression::Star(star);
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
loop {
if self.check(TokenType::Apply) {
self.skip();
let apply_expr = if self.match_token(TokenType::LParen) {
let e = self.parse_expression()?;
self.expect(TokenType::RParen)?;
e
} else {
self.parse_expression()?
};
star_expr = Expression::Apply(Box::new(crate::expressions::Apply {
this: Box::new(star_expr),
expression: Box::new(apply_expr),
}));
} else if self.check(TokenType::Except) || self.check(TokenType::Exclude) {
self.skip();
self.match_identifier("STRICT");
if self.match_token(TokenType::LParen) {
loop {
if self.check(TokenType::RParen) {
break;
}
let _ = self.parse_expression()?;
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
} else if self.is_identifier_token()
|| self.is_safe_keyword_as_identifier()
{
let _ = self.parse_expression()?;
}
} else if self.check(TokenType::Replace) {
self.skip();
self.match_identifier("STRICT");
if self.match_token(TokenType::LParen) {
loop {
if self.check(TokenType::RParen) {
break;
}
let _ = self.parse_expression()?;
if self.match_token(TokenType::As) {
if self.is_identifier_token()
|| self.is_safe_keyword_as_identifier()
{
self.skip();
}
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
} else {
let _ = self.parse_expression()?;
if self.match_token(TokenType::As) {
if self.is_identifier_token()
|| self.is_safe_keyword_as_identifier()
{
self.skip();
}
}
}
} else {
break;
}
}
}
return Ok(star_expr);
}
if self.check(TokenType::Number) {
let field_name = self.advance().text;
let col_expr = Expression::Dot(Box::new(DotAccess {
this: Expression::boxed_column(Column {
name: ident,
table: None,
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
}),
field: Identifier::new(field_name),
}));
return self.maybe_parse_subscript(col_expr);
}
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::Dash)
&& self.current + 1 < self.tokens.len()
&& self.tokens[self.current + 1].token_type == TokenType::Number
{
self.skip();
let num = self.advance().text;
let field_name = format!("-{}", num);
let col_expr = Expression::Dot(Box::new(DotAccess {
this: Expression::boxed_column(Column {
name: ident,
table: None,
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
}),
field: Identifier::new(field_name),
}));
return self.maybe_parse_subscript(col_expr);
}
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::Caret)
{
self.skip();
let mut field_name = "^".to_string();
if self.check(TokenType::Identifier)
|| self.check(TokenType::Var)
|| self.check_keyword()
{
field_name.push_str(&self.advance().text);
}
let col_expr = Expression::Dot(Box::new(DotAccess {
this: Expression::boxed_column(Column {
name: ident,
table: None,
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
}),
field: Identifier::new(field_name),
}));
return self.maybe_parse_subscript(col_expr);
}
let col_ident = self.expect_identifier_or_keyword_with_quoted()?;
if self.check(TokenType::LParen) && self.check_next(TokenType::Plus) {
let saved_pos = self.current;
if self.match_token(TokenType::LParen)
&& self.match_token(TokenType::Plus)
&& self.match_token(TokenType::RParen)
{
let trailing_comments = self.previous_trailing_comments().to_vec();
let col = Expression::boxed_column(Column {
name: col_ident,
table: Some(ident),
join_mark: true,
trailing_comments,
span: None,
inferred_type: None,
});
return self.maybe_parse_subscript(col);
} else {
self.current = saved_pos;
}
}
if self.check(TokenType::LParen) {
self.skip();
let args = if self.check(TokenType::RParen) {
Vec::new()
} else {
self.parse_function_arguments()?
};
self.expect(TokenType::RParen)?;
let method_call = Expression::MethodCall(Box::new(MethodCall {
this: Expression::boxed_column(Column {
name: ident.clone(),
table: None,
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
}),
method: col_ident,
args,
}));
return self.maybe_parse_subscript(method_call);
}
let trailing_comments = self.previous_trailing_comments().to_vec();
let col = Expression::boxed_column(Column {
name: col_ident,
table: Some(ident),
join_mark: false,
trailing_comments,
span: None,
inferred_type: None,
});
return self.maybe_parse_subscript(col);
}
if !quoted
&& matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Oracle) | None
)
{
if let Some(pseudocolumn_type) = PseudocolumnType::from_str(&name) {
return Ok(Expression::Pseudocolumn(Pseudocolumn {
kind: pseudocolumn_type,
}));
}
}
if self.check(TokenType::Arrow)
&& !self
.peek_nth(1)
.map_or(false, |t| t.token_type == TokenType::String)
{
self.skip();
let body = self.parse_expression()?;
return Ok(Expression::Lambda(Box::new(LambdaExpr {
parameters: vec![ident],
body,
colon: false,
parameter_types: Vec::new(),
})));
}
let trailing_comments = self.previous_trailing_comments().to_vec();
let col = Expression::boxed_column(Column {
name: ident,
table: None,
join_mark: false,
trailing_comments,
span: None,
inferred_type: None,
});
self.maybe_parse_subscript(col)
}
fn is_clickhouse_simple_function_chain_name(canonical_upper_name: &str) -> bool {
matches!(
canonical_upper_name,
"BITOR"
| "BITAND"
| "BITXOR"
| "BIT_OR"
| "BIT_AND"
| "BIT_XOR"
| "BITSHIFTLEFT"
| "BITSHIFTRIGHT"
)
}
fn parse_clickhouse_simple_function_chain(
&mut self,
name: String,
quoted: bool,
) -> Result<Expression> {
struct FunctionFrame {
name: String,
quoted: bool,
args: Vec<Expression>,
}
let mut frames = vec![FunctionFrame {
name,
quoted,
args: Vec::new(),
}];
let mut completed_arg: Option<Expression> = None;
loop {
if let Some(arg) = completed_arg.take() {
if let Some(frame) = frames.last_mut() {
frame.args.push(arg);
} else {
return Err(self.parse_error("Unbalanced function call chain"));
}
if self.match_token(TokenType::Comma) {
continue;
}
self.expect(TokenType::RParen)?;
let frame = frames
.pop()
.ok_or_else(|| self.parse_error("Unbalanced function call chain"))?;
let mut func = Function::new(frame.name, frame.args);
func.quoted = frame.quoted;
let expr = Expression::Function(Box::new(func));
if frames.is_empty() {
return Ok(expr);
}
completed_arg = Some(expr);
continue;
}
if self.check(TokenType::RParen) {
self.skip();
let frame = frames
.pop()
.ok_or_else(|| self.parse_error("Unbalanced function call chain"))?;
let mut func = Function::new(frame.name, frame.args);
func.quoted = frame.quoted;
let expr = Expression::Function(Box::new(func));
if frames.is_empty() {
return Ok(expr);
}
completed_arg = Some(expr);
continue;
}
let saved_pos = self.current;
if self.is_identifier_token() {
let ident = self.expect_identifier_with_quoted()?;
let upper_name = ident.name.to_ascii_uppercase();
let canonical_upper_name =
crate::function_registry::canonical_typed_function_name_upper(&upper_name);
if self.match_token(TokenType::LParen)
&& Self::is_clickhouse_simple_function_chain_name(canonical_upper_name)
{
frames.push(FunctionFrame {
name: ident.name,
quoted: ident.quoted,
args: Vec::new(),
});
continue;
}
self.current = saved_pos;
}
completed_arg = Some(self.parse_single_function_argument()?);
}
}
/// Parse primary expressions
fn parse_primary(&mut self) -> Result<Expression> {
// Handle APPROXIMATE COUNT(DISTINCT expr) - Redshift syntax
// Parses as ApproxDistinct expression
if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("APPROXIMATE") {
let saved_pos = self.current;
self.skip(); // consume APPROXIMATE
// Parse the COUNT(DISTINCT ...) that follows
let func = self.parse_primary()?;
// Check if it's COUNT with DISTINCT
if let Expression::Count(ref count_expr) = func {
if count_expr.distinct {
let this_expr = count_expr.this.clone().unwrap_or_else(|| {
Expression::Star(crate::expressions::Star {
table: None,
except: None,
replace: None,
rename: None,
trailing_comments: Vec::new(),
span: None,
})
});
return Ok(Expression::ApproxDistinct(Box::new(
crate::expressions::AggFunc {
this: this_expr,
distinct: false,
filter: None,
order_by: Vec::new(),
name: Some("APPROX_DISTINCT".to_string()),
ignore_nulls: None,
having_max: None,
limit: None,
inferred_type: None,
},
)));
}
}
// Not COUNT(DISTINCT ...) - backtrack
self.current = saved_pos;
}
if let Some(connect_by_root) = self.try_parse_connect_by_root_expression()? {
return Ok(connect_by_root);
}
// PostgreSQL VARIADIC prefix in function call arguments
// e.g., SELECT MLEAST(VARIADIC ARRAY[10, -1, 5, 4.4])
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::PostgreSQL)
| Some(crate::dialects::DialectType::Redshift)
) {
if self.check(TokenType::Var) && self.peek().text.eq_ignore_ascii_case("VARIADIC") {
self.skip(); // consume VARIADIC
let expr = self.parse_bitwise_or()?;
return Ok(Expression::Variadic(Box::new(
crate::expressions::Variadic {
this: Box::new(expr),
},
)));
}
}
// MySQL charset introducer: _utf8mb4 'string', _latin1 x'hex', etc.
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::MySQL)
| Some(crate::dialects::DialectType::SingleStore)
| Some(crate::dialects::DialectType::Doris)
| Some(crate::dialects::DialectType::StarRocks)
) {
if self.check(TokenType::Var) || self.check(TokenType::Identifier) {
if self.peek().text.starts_with('_')
&& Self::is_mysql_charset_introducer(&self.peek().text.to_ascii_uppercase())
{
// Check if next token is a string literal or hex string
if self.current + 1 < self.tokens.len() {
let next_tt = self.tokens[self.current + 1].token_type;
if matches!(
next_tt,
TokenType::String | TokenType::HexString | TokenType::BitString
) {
let charset_token = self.advance(); // consume charset name
let charset_name = charset_token.text.clone();
let literal = self.parse_primary()?; // parse the string/hex literal
return Ok(Expression::Introducer(Box::new(
crate::expressions::Introducer {
this: Box::new(Expression::Column(Box::new(
crate::expressions::Column {
name: crate::expressions::Identifier {
name: charset_name,
quoted: false,
trailing_comments: Vec::new(),
span: None,
},
table: None,
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
},
))),
expression: Box::new(literal),
},
)));
}
}
}
}
}
// Array literal: [1, 2, 3] or comprehension: [expr FOR var IN iterator]
if self.match_token(TokenType::LBracket) {
return self.parse_bracket_array_literal();
}
// Map/Struct literal with curly braces: {'a': 1, 'b': 2}
// Or Snowflake wildcard syntax: {*}, {tbl.*}, {* EXCLUDE (...)}, {* ILIKE '...'}
if self.match_token(TokenType::LBrace) {
// ClickHouse query parameter: {name: Type}
// We consumed `{` above, so rewind and let the dedicated parser consume it.
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
self.current -= 1;
if let Some(param) = self.parse_clickhouse_braced_parameter()? {
return self.maybe_parse_subscript(param);
}
// Not a ClickHouse query parameter, restore position after `{` for map/wildcard parsing.
self.current += 1;
}
// Parse empty map: {}
if self.match_token(TokenType::RBrace) {
return self.maybe_parse_subscript(Expression::MapFunc(Box::new(MapConstructor {
keys: Vec::new(),
values: Vec::new(),
curly_brace_syntax: true,
with_map_keyword: false,
})));
}
// Check for ODBC escape syntax: {fn function_name(args)}
// This must be checked before wildcards and map literals
if self.check_identifier("fn") {
self.skip(); // consume 'fn'
// Parse function call
let func_name = self.expect_identifier_or_keyword_with_quoted()?;
self.expect(TokenType::LParen)?;
// Parse function arguments
let mut args = Vec::new();
if !self.check(TokenType::RParen) {
loop {
args.push(self.parse_expression()?);
if !self.match_token(TokenType::Comma) {
break;
}
}
}
self.expect(TokenType::RParen)?;
self.expect(TokenType::RBrace)?;
// Return as a regular function call (the ODBC escape is just syntax sugar)
return Ok(Expression::Function(Box::new(Function::new(
func_name.name,
args,
))));
}
// Check for ODBC datetime literals: {d'2024-01-01'}, {t'12:00:00'}, {ts'2024-01-01 12:00:00'}
if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
let type_text = self.peek().text.to_lowercase();
if (type_text == "d" || type_text == "t" || type_text == "ts")
&& self.check_next(TokenType::String)
{
self.skip(); // consume type indicator (d, t, or ts)
let value = self.expect_string()?;
self.expect(TokenType::RBrace)?;
// Return appropriate expression based on type
return match type_text.as_str() {
"d" => Ok(Expression::Date(Box::new(
crate::expressions::UnaryFunc::new(Expression::Literal(Box::new(
crate::expressions::Literal::String(value),
))),
))),
"t" => Ok(Expression::Time(Box::new(
crate::expressions::UnaryFunc::new(Expression::Literal(Box::new(
crate::expressions::Literal::String(value),
))),
))),
"ts" => Ok(Expression::Timestamp(Box::new(
crate::expressions::TimestampFunc {
this: Some(Box::new(Expression::Literal(Box::new(
crate::expressions::Literal::String(value),
)))),
zone: None,
with_tz: None,
safe: None,
},
))),
_ => {
Err(self
.parse_error(format!("Unknown ODBC datetime type: {}", type_text)))
}
};
}
}
// Check for Snowflake wildcard syntax: {*}, {tbl.*}, {* EXCLUDE (...)}, {* ILIKE '...'}
// Pattern: either {*...} or {identifier/var followed by .*}
// Note: Identifiers may be tokenized as Var or Identifier
let is_table_star = (self.check(TokenType::Identifier) || self.check(TokenType::Var))
&& self.check_next(TokenType::Dot)
&& self
.tokens
.get(self.current + 2)
.map(|t| t.token_type == TokenType::Star)
.unwrap_or(false);
let is_wildcard = self.check(TokenType::Star) || is_table_star;
if is_wildcard {
// Parse the wildcard expression
let wildcard_expr = if self.match_token(TokenType::Star) {
// {*} or {* EXCLUDE ...} or {* ILIKE ...}
// Check for ILIKE first since it's different from standard star modifiers
if self.check_keyword_text("ILIKE") {
self.skip();
let pattern = self.parse_expression()?;
// Create an ILike expression with Star as left side
Expression::ILike(Box::new(LikeOp {
left: Expression::Star(Star {
table: None,
except: None,
replace: None,
rename: None,
trailing_comments: Vec::new(),
span: None,
}),
right: pattern,
escape: None,
quantifier: None,
inferred_type: None,
}))
} else {
// {*} or {* EXCLUDE ...}
let star = self.parse_star_modifiers(None)?;
Expression::Star(star)
}
} else {
// {tbl.*} - table qualified wildcard
let table_name = self.expect_identifier_or_keyword_with_quoted()?;
self.expect(TokenType::Dot)?;
self.expect(TokenType::Star)?;
let star = self.parse_star_modifiers(Some(table_name))?;
Expression::Star(star)
};
self.expect(TokenType::RBrace)?;
// Wrap in BracedWildcard for generation
return Ok(Expression::BracedWildcard(Box::new(wildcard_expr)));
}
// Parse key-value pairs: key: value, ...
let mut keys = Vec::new();
let mut values = Vec::new();
loop {
let key = self.parse_expression()?;
self.expect(TokenType::Colon)?;
let value = self.parse_expression()?;
keys.push(key);
values.push(value);
if !self.match_token(TokenType::Comma) {
break;
}
// Handle trailing comma
if self.check(TokenType::RBrace) {
break;
}
}
self.expect(TokenType::RBrace)?;
return self.maybe_parse_subscript(Expression::MapFunc(Box::new(MapConstructor {
keys,
values,
curly_brace_syntax: true,
with_map_keyword: false,
})));
}
// Parenthesized expression or subquery
if self.match_token(TokenType::LParen) {
let lparen_comments = self.previous_trailing_comments().to_vec();
return self.parse_parenthesized_primary(lparen_comments);
}
// NULL
if self.match_token(TokenType::Null) {
return Ok(Expression::Null(Null));
}
// TRUE
if self.match_token(TokenType::True) {
return Ok(Expression::Boolean(BooleanLiteral { value: true }));
}
// FALSE
if self.match_token(TokenType::False) {
return Ok(Expression::Boolean(BooleanLiteral { value: false }));
}
// LAMBDA expression (DuckDB syntax: LAMBDA x : expr)
if self.check(TokenType::Lambda) {
if let Some(lambda) = self.parse_lambda()? {
return Ok(lambda);
}
}
// CASE expression - but not if followed by DOT (then it's an identifier like case.column)
if self.check(TokenType::Case) && !self.check_next(TokenType::Dot) {
let case_expr = self.parse_case()?;
return self.maybe_parse_over(case_expr);
}
// CAST expression
if self.check(TokenType::Cast) {
let cast_expr = self.parse_cast()?;
return self.maybe_parse_subscript(cast_expr);
}
// TRY_CAST expression
if self.check(TokenType::TryCast) {
let cast_expr = self.parse_try_cast()?;
return self.maybe_parse_subscript(cast_expr);
}
// SAFE_CAST expression (BigQuery)
if self.check(TokenType::SafeCast) {
let cast_expr = self.parse_safe_cast()?;
return self.maybe_parse_subscript(cast_expr);
}
// EXISTS - either subquery predicate EXISTS(SELECT ...) or Hive array function EXISTS(array, lambda)
// ClickHouse: EXISTS without ( is a column name/identifier
if self.check(TokenType::Exists)
&& matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
)
&& !self.check_next(TokenType::LParen)
{
let tok = self.advance();
return Ok(Expression::Identifier(Identifier::new(tok.text)));
}
if self.match_token(TokenType::Exists) {
self.expect(TokenType::LParen)?;
// Check if this is a subquery EXISTS (SELECT, WITH, or FROM for DuckDB)
// ClickHouse: also handle EXISTS((SELECT ...)) with double parens
if self.check(TokenType::Select)
|| self.check(TokenType::With)
|| self.check(TokenType::From)
|| (self.check(TokenType::LParen)
&& self
.peek_nth(1)
.map(|t| {
matches!(
t.token_type,
TokenType::Select | TokenType::With | TokenType::From
)
})
.unwrap_or(false))
{
let query = self.parse_statement()?;
self.expect(TokenType::RParen)?;
return Ok(Expression::Exists(Box::new(Exists {
this: query,
not: false,
})));
}
// Otherwise it's Hive's array EXISTS function: EXISTS(array, lambda_predicate)
// This function checks if any element in the array matches the predicate
let array_expr = self.parse_expression()?;
self.expect(TokenType::Comma)?;
let predicate = self.parse_expression()?;
self.expect(TokenType::RParen)?;
return Ok(Expression::Function(Box::new(Function {
name: "EXISTS".to_string(),
args: vec![array_expr, predicate],
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
})));
}
// INTERVAL expression or identifier
if self.check(TokenType::Interval) {
if let Some(interval_expr) = self.try_parse_interval()? {
return Ok(interval_expr);
}
// INTERVAL is used as an identifier
let token = self.advance();
return Ok(Expression::Identifier(Identifier::new(token.text)));
}
// DATE literal: DATE '2024-01-15' or DATE function: DATE(expr)
if self.check(TokenType::Date) {
let token = self.advance();
let original_text = token.text.clone();
if self.check(TokenType::String) {
let str_token = self.advance();
if self.config.dialect.is_none() {
// Generic (no dialect): DATE 'literal' -> CAST('literal' AS DATE)
return Ok(Expression::Cast(Box::new(Cast {
this: Expression::Literal(Box::new(Literal::String(str_token.text))),
to: DataType::Date,
trailing_comments: Vec::new(),
double_colon_syntax: false,
format: None,
default: None,
inferred_type: None,
})));
}
return Ok(Expression::Literal(Box::new(Literal::Date(str_token.text))));
}
// Check for DATE() function call
if self.match_token(TokenType::LParen) {
let func_expr = self.parse_typed_function(&original_text, "DATE", false)?;
return self.maybe_parse_over(func_expr);
}
// Fallback to DATE as column reference - preserve original case
return Ok(Expression::boxed_column(Column {
name: Identifier::new(original_text),
table: None,
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
}));
}
// TIME literal: TIME '10:30:00' or TIME function: TIME(expr)
if self.check(TokenType::Time) {
let token = self.advance();
let original_text = token.text.clone();
if self.check(TokenType::String) {
let str_token = self.advance();
return Ok(Expression::Literal(Box::new(Literal::Time(str_token.text))));
}
// Check for TIME() function call
if self.match_token(TokenType::LParen) {
let func_expr = self.parse_typed_function(&original_text, "TIME", false)?;
return self.maybe_parse_over(func_expr);
}
// Fallback to TIME as column reference - preserve original case
return self.maybe_parse_subscript(Expression::boxed_column(Column {
name: Identifier::new(original_text),
table: None,
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
}));
}
// TIMESTAMP literal: TIMESTAMP '2024-01-15 10:30:00' or TIMESTAMP function: TIMESTAMP(expr)
// Also handles TIMESTAMP(n) WITH TIME ZONE as a data type expression
if self.check(TokenType::Timestamp) {
let token = self.advance();
let original_text = token.text.clone();
if self.check(TokenType::String) {
let str_token = self.advance();
if self.config.dialect.is_none() {
// Generic (no dialect): TIMESTAMP 'literal' -> CAST('literal' AS TIMESTAMP)
return Ok(Expression::Cast(Box::new(Cast {
this: Expression::Literal(Box::new(Literal::String(str_token.text))),
to: DataType::Timestamp {
precision: None,
timezone: false,
},
trailing_comments: Vec::new(),
double_colon_syntax: false,
format: None,
default: None,
inferred_type: None,
})));
}
// Dialect-specific: keep as Literal::Timestamp for dialect transforms
return Ok(Expression::Literal(Box::new(Literal::Timestamp(
str_token.text,
))));
}
// Check for TIMESTAMP(n) WITH/WITHOUT TIME ZONE or TIMESTAMP(n) 'literal' as data type
// This is a data type, not a function call
if self.check(TokenType::LParen) {
// Look ahead to see if this is TIMESTAMP(number) WITH/WITHOUT/String (data type)
// vs TIMESTAMP(expr) (function call)
let is_data_type = self.check_next(TokenType::Number) && {
// Check if after (number) there's WITH, WITHOUT, or String literal
let mut lookahead = self.current + 2;
// Skip the number
while lookahead < self.tokens.len()
&& self.tokens[lookahead].token_type == TokenType::RParen
{
lookahead += 1;
break;
}
// Check for WITH, WITHOUT, or String after the closing paren
lookahead < self.tokens.len()
&& (self.tokens[lookahead].token_type == TokenType::With
|| self.tokens[lookahead].text.eq_ignore_ascii_case("WITHOUT")
|| self.tokens[lookahead].token_type == TokenType::String)
};
if is_data_type {
// Parse as data type: TIMESTAMP(precision) [WITH/WITHOUT TIME ZONE] ['literal']
self.skip(); // consume (
let precision = Some(self.expect_number()? as u32);
self.expect(TokenType::RParen)?;
let data_type = if self.match_token(TokenType::With) {
if self.match_token(TokenType::Local) {
// WITH LOCAL TIME ZONE -> TIMESTAMPLTZ
self.match_keyword("TIME");
self.match_keyword("ZONE");
DataType::Custom {
name: format!("TIMESTAMPLTZ({})", precision.unwrap()),
}
} else {
self.match_keyword("TIME");
self.match_keyword("ZONE");
DataType::Timestamp {
precision,
timezone: true,
}
}
} else if self.match_keyword("WITHOUT") {
self.match_keyword("TIME");
self.match_keyword("ZONE");
DataType::Timestamp {
precision,
timezone: false,
}
} else {
DataType::Timestamp {
precision,
timezone: false,
}
};
// Check for following string literal -> wrap in CAST
if self.check(TokenType::String) {
let str_token = self.advance();
return Ok(Expression::Cast(Box::new(Cast {
this: Expression::Literal(Box::new(Literal::String(str_token.text))),
to: data_type,
trailing_comments: Vec::new(),
double_colon_syntax: false,
format: None,
default: None,
inferred_type: None,
})));
}
return Ok(Expression::DataType(data_type));
}
// Otherwise parse as function call
self.skip(); // consume (
let func_expr = self.parse_typed_function(&original_text, "TIMESTAMP", false)?;
return self.maybe_parse_over(func_expr);
}
// Check for TIMESTAMP WITH/WITHOUT TIME ZONE (no precision) as data type
// Use lookahead to verify WITH is followed by TIME (not WITH FILL, WITH TOTALS, etc.)
if (self.check(TokenType::With)
&& self.peek_nth(1).map_or(false, |t| {
t.text.eq_ignore_ascii_case("TIME") || t.text.eq_ignore_ascii_case("LOCAL")
}))
|| self.check_keyword_text("WITHOUT")
{
let data_type = if self.match_token(TokenType::With) {
if self.match_token(TokenType::Local) {
// WITH LOCAL TIME ZONE -> TIMESTAMPLTZ
self.match_keyword("TIME");
self.match_keyword("ZONE");
DataType::Custom {
name: "TIMESTAMPLTZ".to_string(),
}
} else {
self.match_keyword("TIME");
self.match_keyword("ZONE");
DataType::Timestamp {
precision: None,
timezone: true,
}
}
} else if self.match_keyword("WITHOUT") {
self.match_keyword("TIME");
self.match_keyword("ZONE");
DataType::Timestamp {
precision: None,
timezone: false,
}
} else {
DataType::Timestamp {
precision: None,
timezone: false,
}
};
// Check for following string literal -> wrap in CAST
if self.check(TokenType::String) {
let str_token = self.advance();
return Ok(Expression::Cast(Box::new(Cast {
this: Expression::Literal(Box::new(Literal::String(str_token.text))),
to: data_type,
trailing_comments: Vec::new(),
double_colon_syntax: false,
format: None,
default: None,
inferred_type: None,
})));
}
return Ok(Expression::DataType(data_type));
}
// Fallback to TIMESTAMP as column reference - preserve original case
return Ok(Expression::boxed_column(Column {
name: Identifier::new(original_text),
table: None,
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
}));
}
// DATETIME literal: DATETIME '2024-01-15 10:30:00' or DATETIME function: DATETIME(expr)
if self.check(TokenType::DateTime) {
let token = self.advance();
let original_text = token.text.clone();
if self.check(TokenType::String) {
let str_token = self.advance();
return Ok(Expression::Literal(Box::new(Literal::Datetime(
str_token.text,
))));
}
// Check for DATETIME() function call
if self.match_token(TokenType::LParen) {
let func_expr = self.parse_typed_function(&original_text, "DATETIME", false)?;
return self.maybe_parse_over(func_expr);
}
// Fallback to DATETIME as column reference - preserve original case
return Ok(Expression::boxed_column(Column {
name: Identifier::new(original_text),
table: None,
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
}));
}
// ROW() function (window function for row number)
if self.check(TokenType::Row) && self.check_next(TokenType::LParen) {
self.skip(); // consume ROW
self.expect(TokenType::LParen)?;
// ROW() typically takes no arguments
let args = if !self.check(TokenType::RParen) {
self.parse_expression_list()?
} else {
Vec::new()
};
self.expect(TokenType::RParen)?;
let func_expr = Expression::Function(Box::new(Function {
name: "ROW".to_string(),
args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}));
return self.maybe_parse_over(func_expr);
}
// Number - support postfix operators like ::type
if self.check(TokenType::Number) {
let token = self.advance();
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::MySQL)
) {
let text = token.text.as_str();
if text.len() > 2
&& (text.starts_with("0x") || text.starts_with("0X"))
&& !text[2..].chars().all(|c| c.is_ascii_hexdigit())
{
let ident = Expression::Identifier(Identifier {
name: token.text,
quoted: true,
trailing_comments: Vec::new(),
span: None,
});
return self.maybe_parse_subscript(ident);
}
}
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Teradata)
) && token.text == "0"
{
if let Some(next) = self.tokens.get(self.current) {
let is_adjacent = token.span.end == next.span.start;
let next_text = next.text.as_str();
let is_hex_prefix = next_text.starts_with('x') || next_text.starts_with('X');
if is_adjacent
&& matches!(next.token_type, TokenType::Identifier | TokenType::Var)
&& is_hex_prefix
&& next_text.len() > 1
&& next_text[1..].chars().all(|c| c.is_ascii_hexdigit())
{
// Consume the hex suffix token and emit a HexString literal
let hex_token = self.advance();
let hex = hex_token.text[1..].to_string();
let literal = Expression::Literal(Box::new(Literal::HexString(hex)));
return self.maybe_parse_subscript(literal);
}
}
}
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
if let Some(next) = self.tokens.get(self.current) {
let is_adjacent = token.span.end == next.span.start;
if is_adjacent
&& matches!(next.token_type, TokenType::Identifier | TokenType::Var)
&& next.text.starts_with('_')
{
let suffix = next.text.clone();
self.skip(); // consume suffix token
let combined = format!("{}{}", token.text, suffix);
let literal = Expression::Literal(Box::new(Literal::Number(combined)));
return self.maybe_parse_subscript(literal);
}
}
}
// Check for numeric literal suffix encoded as "number::TYPE" by tokenizer
let literal = if let Some(sep_pos) = token.text.find("::") {
let num_part = &token.text[..sep_pos];
let type_name = &token.text[sep_pos + 2..];
let num_expr = Expression::Literal(Box::new(Literal::Number(num_part.to_string())));
let data_type = match type_name {
"BIGINT" => crate::expressions::DataType::BigInt { length: None },
"SMALLINT" => crate::expressions::DataType::SmallInt { length: None },
"TINYINT" => crate::expressions::DataType::TinyInt { length: None },
"DOUBLE" => crate::expressions::DataType::Double {
precision: None,
scale: None,
},
"FLOAT" => crate::expressions::DataType::Float {
precision: None,
scale: None,
real_spelling: false,
},
"DECIMAL" => crate::expressions::DataType::Decimal {
precision: None,
scale: None,
},
_ => crate::expressions::DataType::Custom {
name: type_name.to_string(),
},
};
Expression::Cast(Box::new(crate::expressions::Cast {
this: num_expr,
to: data_type,
trailing_comments: Vec::new(),
double_colon_syntax: false,
format: None,
default: None,
inferred_type: None,
}))
} else {
Expression::Literal(Box::new(Literal::Number(token.text)))
};
return self.maybe_parse_subscript(literal);
}
// String - support postfix operators like ::type, ->, ->>
// Also handle adjacent string literals (SQL standard) which concatenate: 'x' 'y' 'z' -> CONCAT('x', 'y', 'z')
if self.check(TokenType::String) {
let token = self.advance();
let first_literal = Expression::Literal(Box::new(Literal::String(token.text)));
// Check for adjacent string literals (PostgreSQL and SQL standard feature)
// 'x' 'y' 'z' should be treated as string concatenation
if self.check(TokenType::String) {
let mut expressions = vec![first_literal];
while self.check(TokenType::String) {
let next_token = self.advance();
expressions.push(Expression::Literal(Box::new(Literal::String(
next_token.text,
))));
}
// Create CONCAT function call with all adjacent strings
let concat_func =
Expression::Function(Box::new(Function::new("CONCAT", expressions)));
return self.maybe_parse_subscript(concat_func);
}
return self.maybe_parse_subscript(first_literal);
}
// Dollar-quoted string: $$...$$ or $tag$...$tag$ -- preserve as DollarString
// so the generator can handle dialect-specific conversion
if self.check(TokenType::DollarString) {
let token = self.advance();
let literal = Expression::Literal(Box::new(Literal::DollarString(token.text)));
return self.maybe_parse_subscript(literal);
}
// Triple-quoted string with double quotes: """..."""
if self.check(TokenType::TripleDoubleQuotedString) {
let token = self.advance();
let literal =
Expression::Literal(Box::new(Literal::TripleQuotedString(token.text, '"')));
return self.maybe_parse_subscript(literal);
}
// Triple-quoted string with single quotes: '''...'''
if self.check(TokenType::TripleSingleQuotedString) {
let token = self.advance();
let literal =
Expression::Literal(Box::new(Literal::TripleQuotedString(token.text, '\'')));
return self.maybe_parse_subscript(literal);
}
// National String (N'...')
if self.check(TokenType::NationalString) {
let token = self.advance();
let literal = Expression::Literal(Box::new(Literal::NationalString(token.text)));
return self.maybe_parse_subscript(literal);
}
// Hex String (X'...')
if self.check(TokenType::HexString) {
let token = self.advance();
let literal = Expression::Literal(Box::new(Literal::HexString(token.text)));
return self.maybe_parse_subscript(literal);
}
// Hex Number (0xA from BigQuery/SQLite) - integer in hex notation
if self.check(TokenType::HexNumber) {
let token = self.advance();
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::MySQL)
) {
let text = token.text.as_str();
if text.len() > 2
&& (text.starts_with("0x") || text.starts_with("0X"))
&& !text[2..].chars().all(|c| c.is_ascii_hexdigit())
{
let ident = Expression::Identifier(Identifier {
name: token.text,
quoted: true,
trailing_comments: Vec::new(),
span: None,
});
return self.maybe_parse_subscript(ident);
}
}
let literal = Expression::Literal(Box::new(Literal::HexNumber(token.text)));
return self.maybe_parse_subscript(literal);
}
// Bit String (B'...')
if self.check(TokenType::BitString) {
let token = self.advance();
let literal = Expression::Literal(Box::new(Literal::BitString(token.text)));
return self.maybe_parse_subscript(literal);
}
// Byte String (b"..." - BigQuery style)
if self.check(TokenType::ByteString) {
let token = self.advance();
let literal = Expression::Literal(Box::new(Literal::ByteString(token.text)));
return self.maybe_parse_subscript(literal);
}
// Raw String (r"..." - BigQuery style, backslashes are literal)
if self.check(TokenType::RawString) {
let token = self.advance();
// Raw strings preserve backslashes as literal characters.
// The generator will handle escaping when converting to a regular string.
let literal = Expression::Literal(Box::new(Literal::RawString(token.text)));
return self.maybe_parse_subscript(literal);
}
// Escape String (E'...' - PostgreSQL)
if self.check(TokenType::EscapeString) {
let token = self.advance();
// EscapeString is stored as "E'content'" - extract just the content
let literal = Expression::Literal(Box::new(Literal::EscapeString(token.text)));
return self.maybe_parse_subscript(literal);
}
// Star - check for DuckDB *COLUMNS(...) syntax first
if self.check(TokenType::Star) {
// DuckDB *COLUMNS(...) syntax: *COLUMNS(*), *COLUMNS('regex'), *COLUMNS(['col1', 'col2'])
// Check if * is followed by COLUMNS and (
if self.check_next_identifier("COLUMNS") {
// Check if there's a ( after COLUMNS
if self
.tokens
.get(self.current + 2)
.map(|t| t.token_type == TokenType::LParen)
.unwrap_or(false)
{
self.skip(); // consume *
self.skip(); // consume COLUMNS
self.skip(); // consume (
// Parse the argument: can be *, a regex string, or an array of column names
let arg = if self.check(TokenType::Star) {
self.skip(); // consume *
Expression::Star(Star {
table: None,
except: None,
replace: None,
rename: None,
trailing_comments: Vec::new(),
span: None,
})
} else {
self.parse_expression()?
};
self.expect(TokenType::RParen)?;
// Create Columns expression with unpack=true
return Ok(Expression::Columns(Box::new(Columns {
this: Box::new(arg),
unpack: Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
}))),
})));
}
}
// Regular star
self.skip(); // consume *
let star = self.parse_star_modifiers(None)?;
return Ok(Expression::Star(star));
}
// Generic type expressions: ARRAY<T>, MAP<K,V>, STRUCT<...>
// These are standalone type expressions (not in CAST context)
// But also handle STRUCT<TYPE>(args) which becomes CAST(STRUCT(args) AS STRUCT<TYPE>)
if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
let name_upper = self.peek().text.to_ascii_uppercase();
if (name_upper == "ARRAY" || name_upper == "MAP" || name_upper == "STRUCT")
&& self.check_next(TokenType::Lt)
{
self.skip(); // consume ARRAY/MAP/STRUCT
let data_type = self.parse_data_type_from_name(&name_upper)?;
// Check for typed constructor: STRUCT<TYPE>(args) or ARRAY<TYPE>(args)
// These become CAST(STRUCT(args) AS TYPE) or CAST(ARRAY(args) AS TYPE)
if self.match_token(TokenType::LParen) {
if name_upper == "STRUCT" {
// Parse struct constructor arguments
let args = if self.check(TokenType::RParen) {
Vec::new()
} else {
self.parse_struct_args()?
};
self.expect(TokenType::RParen)?;
// Convert args to Struct fields (all unnamed)
let fields: Vec<(Option<String>, Expression)> =
args.into_iter().map(|e| (None, e)).collect();
// Create CAST(STRUCT(args) AS STRUCT<TYPE>)
let struct_expr = Expression::Struct(Box::new(Struct { fields }));
let cast_expr = Expression::Cast(Box::new(Cast {
this: struct_expr,
to: data_type,
trailing_comments: Vec::new(),
double_colon_syntax: false,
format: None,
default: None,
inferred_type: None,
}));
return self.maybe_parse_subscript(cast_expr);
} else if name_upper == "ARRAY" {
// Parse array constructor arguments
let mut expressions = Vec::new();
if !self.check(TokenType::RParen) {
loop {
expressions.push(self.parse_expression()?);
if !self.match_token(TokenType::Comma) {
break;
}
}
}
self.expect(TokenType::RParen)?;
// Create CAST(ARRAY[args] AS ARRAY<TYPE>)
let array_expr = Expression::Array(Box::new(Array { expressions }));
let cast_expr = Expression::Cast(Box::new(Cast {
this: array_expr,
to: data_type,
trailing_comments: Vec::new(),
double_colon_syntax: false,
format: None,
default: None,
inferred_type: None,
}));
return self.maybe_parse_subscript(cast_expr);
}
} else if self.match_token(TokenType::LBracket) {
// ARRAY<TYPE>[values] or ARRAY<TYPE>[] - bracket-style array constructor
let expressions = if self.check(TokenType::RBracket) {
Vec::new()
} else {
self.parse_expression_list()?
};
self.expect(TokenType::RBracket)?;
// Create CAST(Array(values) AS DataType)
let array_expr = Expression::Array(Box::new(Array { expressions }));
let cast_expr = Expression::Cast(Box::new(Cast {
this: array_expr,
to: data_type,
trailing_comments: Vec::new(),
double_colon_syntax: false,
format: None,
default: None,
inferred_type: None,
}));
return self.maybe_parse_subscript(cast_expr);
}
return Ok(Expression::DataType(data_type));
}
// DuckDB-style MAP with curly brace literals: MAP {'key': value}
if name_upper == "MAP" && self.check_next(TokenType::LBrace) {
self.skip(); // consume MAP
self.expect(TokenType::LBrace)?;
// Handle empty: MAP {}
if self.match_token(TokenType::RBrace) {
return self.maybe_parse_subscript(Expression::MapFunc(Box::new(
MapConstructor {
keys: Vec::new(),
values: Vec::new(),
curly_brace_syntax: true,
with_map_keyword: true,
},
)));
}
// Parse key-value pairs
let mut keys = Vec::new();
let mut values = Vec::new();
loop {
let key = self.parse_primary()?;
self.expect(TokenType::Colon)?;
let value = self.parse_expression()?;
keys.push(key);
values.push(value);
if !self.match_token(TokenType::Comma) {
break;
}
// Handle trailing comma
if self.check(TokenType::RBrace) {
break;
}
}
self.expect(TokenType::RBrace)?;
return self.maybe_parse_subscript(Expression::MapFunc(Box::new(MapConstructor {
keys,
values,
curly_brace_syntax: true,
with_map_keyword: true,
})));
}
}
// Keywords as identifiers when followed by DOT (e.g., case.x, top.y)
// These keywords can be table/column names when used with dot notation
if (self.check(TokenType::Case) || self.check(TokenType::Top))
&& self.check_next(TokenType::Dot)
{
let token = self.advance();
let ident = Identifier::new(token.text);
self.expect(TokenType::Dot)?;
if self.match_token(TokenType::Star) {
// case.* or top.*
let star = self.parse_star_modifiers(Some(ident))?;
return Ok(Expression::Star(star));
}
// case.column or top.column
let col_ident = self.expect_identifier_or_keyword_with_quoted()?;
// Capture trailing comments from the column name token
let trailing_comments = self.previous_trailing_comments().to_vec();
let mut col = Expression::boxed_column(Column {
name: col_ident,
table: Some(ident),
join_mark: false,
trailing_comments,
span: None,
inferred_type: None,
});
// Handle Oracle/Redshift outer join marker (+) after column reference
if self.check(TokenType::LParen) && self.check_next(TokenType::Plus) {
let saved_pos = self.current;
if self.match_token(TokenType::LParen)
&& self.match_token(TokenType::Plus)
&& self.match_token(TokenType::RParen)
{
if let Expression::Column(ref mut c) = col {
c.join_mark = true;
}
} else {
self.current = saved_pos;
}
}
return self.maybe_parse_subscript(col);
}
// MySQL BINARY prefix operator: BINARY expr -> CAST(expr AS BINARY)
// Only treat as prefix operator when followed by an expression (not ( which would be BINARY() function,
// and not when it would be a data type like BINARY in column definitions)
if self.check(TokenType::Var)
&& self.peek().text.eq_ignore_ascii_case("BINARY")
&& !self.check_next(TokenType::LParen)
&& !self.check_next(TokenType::Dot)
&& !self.check_next(TokenType::RParen)
&& !self.check_next(TokenType::Comma)
&& !self.is_at_end()
{
// Check if this is actually followed by an expression token (not end of statement)
let next_idx = self.current + 1;
let has_expr = next_idx < self.tokens.len()
&& !matches!(
self.tokens[next_idx].token_type,
TokenType::Semicolon | TokenType::Eof | TokenType::RParen | TokenType::Comma
);
if has_expr {
self.skip(); // consume BINARY
let expr = self.parse_unary()?;
return Ok(Expression::Cast(Box::new(Cast {
this: expr,
to: DataType::Binary { length: None },
trailing_comments: Vec::new(),
double_colon_syntax: false,
format: None,
default: None,
inferred_type: None,
})));
}
}
// RLIKE/REGEXP as function call: RLIKE(expr, pattern, flags)
// Normally RLIKE is an operator, but Snowflake allows function syntax
if self.check(TokenType::RLike) && self.check_next(TokenType::LParen) {
let token = self.advance(); // consume RLIKE
self.skip(); // consume LParen
let args = if self.check(TokenType::RParen) {
Vec::new()
} else {
self.parse_function_arguments()?
};
self.expect(TokenType::RParen)?;
let func = Expression::Function(Box::new(Function {
name: token.text.clone(), // Preserve original case; generator handles normalization
args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}));
return self.maybe_parse_over(func);
}
// INSERT as function call: INSERT(str, pos, len, newstr)
// Snowflake/MySQL have INSERT as a string function, but INSERT is also a DML keyword.
// When followed by ( in expression context, treat as function call.
if self.check(TokenType::Insert) && self.check_next(TokenType::LParen) {
let token = self.advance(); // consume INSERT
self.skip(); // consume LParen
let args = if self.check(TokenType::RParen) {
Vec::new()
} else {
self.parse_function_arguments()?
};
self.expect(TokenType::RParen)?;
let func = Expression::Function(Box::new(Function {
name: token.text.clone(),
args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}));
return self.maybe_parse_over(func);
}
// ClickHouse: MINUS/EXCEPT/INTERSECT/REGEXP as function names (e.g., minus(a, b), REGEXP('^db'))
// MINUS is tokenized as TokenType::Except (Oracle alias), REGEXP as TokenType::RLike
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && (self.check(TokenType::Except)
|| self.check(TokenType::Intersect)
|| self.check(TokenType::RLike))
&& self.check_next(TokenType::LParen)
{
let token = self.advance(); // consume keyword
self.skip(); // consume LParen
let args = if self.check(TokenType::RParen) {
Vec::new()
} else {
self.parse_function_arguments()?
};
self.expect(TokenType::RParen)?;
let func = Expression::Function(Box::new(Function {
name: token.text.clone(),
args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}));
return self.maybe_parse_over(func);
}
// Handle CURRENT_DATE/CURRENT_TIMESTAMP/CURRENT_TIME/CURRENT_DATETIME with parentheses
// These have special token types but BigQuery and others use them as function calls with args
if matches!(
self.peek().token_type,
TokenType::CurrentDate
| TokenType::CurrentTimestamp
| TokenType::CurrentTime
| TokenType::CurrentDateTime
) {
// Snowflake: CURRENT_TIME / CURRENT_TIME(n) -> Localtime (so DuckDB can output LOCALTIME)
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Snowflake)
) && self.peek().token_type == TokenType::CurrentTime
{
self.skip(); // consume CURRENT_TIME
if self.match_token(TokenType::LParen) {
// CURRENT_TIME(n) - consume args but ignore precision
if !self.check(TokenType::RParen) {
let _ = self.parse_function_arguments()?;
}
self.expect(TokenType::RParen)?;
}
return self.maybe_parse_subscript(Expression::Localtime(Box::new(
crate::expressions::Localtime { this: None },
)));
}
if self.check_next(TokenType::LParen) {
// Parse as function call: CURRENT_DATE('UTC'), CURRENT_TIMESTAMP(), etc.
let token = self.advance(); // consume CURRENT_DATE etc.
self.skip(); // consume LParen
let args = if self.check(TokenType::RParen) {
Vec::new()
} else {
self.parse_function_arguments()?
};
self.expect(TokenType::RParen)?;
let func = Expression::Function(Box::new(Function {
name: token.text.clone(),
args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}));
return self.maybe_parse_subscript(func);
} else {
// No parens - parse as no-paren function
let token = self.advance();
let func = Expression::Function(Box::new(Function {
name: token.text.clone(),
args: Vec::new(),
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: true,
quoted: false,
span: None,
inferred_type: None,
}));
return self.maybe_parse_subscript(func);
}
}
// Type keyword followed by string literal -> CAST('value' AS TYPE)
// E.g., NUMERIC '2.25' -> CAST('2.25' AS NUMERIC)
if self.is_identifier_token() && self.check_next(TokenType::String) {
let upper_name = self.peek().text.to_ascii_uppercase();
if matches!(
upper_name.as_str(),
"NUMERIC" | "DECIMAL" | "BIGNUMERIC" | "BIGDECIMAL"
) {
self.skip(); // consume the type keyword
let str_token = self.advance(); // consume the string literal
let data_type = match upper_name.as_str() {
"NUMERIC" | "DECIMAL" | "BIGNUMERIC" | "BIGDECIMAL" => {
crate::expressions::DataType::Decimal {
precision: None,
scale: None,
}
}
_ => unreachable!("type keyword already matched in outer if-condition"),
};
return Ok(Expression::Cast(Box::new(crate::expressions::Cast {
this: Expression::Literal(Box::new(Literal::String(str_token.text))),
to: data_type,
trailing_comments: Vec::new(),
double_colon_syntax: false,
format: None,
default: None,
inferred_type: None,
})));
}
}
// Identifier, Column, or Function
if self.is_identifier_token() {
return self.parse_identifier_primary();
}
// Exasol-style IF expression: IF condition THEN true_value ELSE false_value ENDIF
// Check for IF not followed by ( (which would be IF function call handled elsewhere)
// This handles: IF age < 18 THEN 'minor' ELSE 'adult' ENDIF
// IMPORTANT: This must be checked BEFORE is_safe_keyword_as_identifier() which would
// treat IF as a column name when not followed by ( or .
// For TSQL/Fabric: IF (cond) BEGIN ... END is an IF statement, not function
if self.check(TokenType::If)
&& !self.check_next(TokenType::Dot)
&& (!self.check_next(TokenType::LParen)
|| matches!(
self.config.dialect,
Some(crate::dialects::DialectType::TSQL)
| Some(crate::dialects::DialectType::Fabric)
))
{
let saved_pos = self.current;
self.skip(); // consume IF
if let Some(if_expr) = self.parse_if()? {
return Ok(if_expr);
}
// parse_if() returned None — IF is not an IF expression here,
// restore position so it can be treated as an identifier
self.current = saved_pos;
}
// NEXT VALUE FOR sequence_name [OVER (ORDER BY ...)]
// Must check before treating NEXT as a standalone identifier via is_safe_keyword_as_identifier
if self.check(TokenType::Next)
&& self.current + 2 < self.tokens.len()
&& self.tokens[self.current + 1]
.text
.eq_ignore_ascii_case("VALUE")
&& self.tokens[self.current + 2]
.text
.eq_ignore_ascii_case("FOR")
{
self.skip(); // consume NEXT
if let Some(expr) = self.parse_next_value_for()? {
return Ok(expr);
}
}
// ClickHouse: `from` can be a column name when followed by comma or dot
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::From)
&& (self.check_next(TokenType::Comma) || self.check_next(TokenType::Dot))
{
let token = self.advance();
let name = token.text.clone();
if self.match_token(TokenType::Dot) {
// from.col qualified reference
let col_name = self.expect_identifier_or_keyword()?;
return Ok(Expression::Column(Box::new(crate::expressions::Column {
name: Identifier::new(col_name),
table: Some(Identifier::new(name)),
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
})));
}
return Ok(Expression::Column(Box::new(crate::expressions::Column {
name: Identifier::new(name),
table: None,
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
})));
}
// ClickHouse: `except` as identifier in expression context (set operations are handled at statement level)
// except(args) is already handled above in the MINUS/EXCEPT/INTERSECT function block
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::Except)
&& !self.check_next(TokenType::LParen)
{
let token = self.advance();
let name = token.text.clone();
if self.match_token(TokenType::Dot) {
let col_name = self.expect_identifier_or_keyword()?;
return Ok(Expression::Column(Box::new(crate::expressions::Column {
name: Identifier::new(col_name),
table: Some(Identifier::new(name)),
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
})));
}
return Ok(Expression::Column(Box::new(crate::expressions::Column {
name: Identifier::new(name),
table: None,
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
})));
}
// ClickHouse: structural keywords like FROM, ON, JOIN can be used as identifiers
// in expression context when followed by an operator (e.g., from + 1, on.col)
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.peek().token_type.is_keyword()
&& !self.is_safe_keyword_as_identifier()
{
let next_tt = self
.peek_nth(1)
.map(|t| t.token_type)
.unwrap_or(TokenType::Semicolon);
// A structural keyword can be used as an identifier when it appears
// in expression context. We detect this by checking what follows.
// Essentially: it's NOT an identifier only if the keyword itself starts
// a clause (e.g., FROM followed by a table name). But when it's followed
// by an operator, comma, close-paren, or even another clause keyword
// (meaning it's the last token in an expression), it's an identifier.
let is_expr_context = !matches!(
next_tt,
TokenType::Identifier
| TokenType::Var
| TokenType::QuotedIdentifier
| TokenType::LParen
| TokenType::Number
| TokenType::String
);
if is_expr_context {
let token = self.advance();
return Ok(Expression::boxed_column(Column {
name: Identifier::new(token.text),
table: None,
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
}));
}
}
// %s or %(name)s percent parameter (PostgreSQL psycopg2 style)
// Must be checked BEFORE the keyword-as-identifier handler below, since
// Percent is in is_keyword() and is_safe_keyword_as_identifier() returns true for it.
if self.check(TokenType::Percent)
&& (
self.check_next(TokenType::Var) // %s
|| self.check_next(TokenType::LParen)
// %(name)s
)
{
self.skip(); // consume %
// Check for %(name)s - named parameter
if self.match_token(TokenType::LParen) {
// Get the parameter name
if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
let name = self.advance().text;
self.expect(TokenType::RParen)?;
// Expect 's' after the closing paren
if self.check(TokenType::Var) && self.peek().text == "s" {
self.skip(); // consume 's'
}
return Ok(Expression::Parameter(Box::new(Parameter {
name: Some(name),
index: None,
style: ParameterStyle::Percent,
quoted: false,
string_quoted: false,
expression: None,
})));
} else {
return Err(self.parse_error("Expected parameter name after %("));
}
}
// Check for %s - anonymous parameter
if self.check(TokenType::Var) && self.peek().text == "s" {
self.skip(); // consume 's'
return Ok(Expression::Parameter(Box::new(Parameter {
name: None,
index: None,
style: ParameterStyle::Percent,
quoted: false,
string_quoted: false,
expression: None,
})));
}
// Not a parameter - backtrack
self.current -= 1;
}
// Some keywords can be used as identifiers (column names, table names, etc.)
// when they are "safe" keywords that don't affect query structure.
// Structural keywords like FROM, WHERE, JOIN should NOT be usable as identifiers.
if self.is_safe_keyword_as_identifier() {
let token = self.advance();
let name = token.text.clone();
// Check for function call (keyword followed by paren) - skip Teradata FORMAT phrase
let is_teradata_format_phrase = matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Teradata)
) && self.check(TokenType::LParen)
&& self.check_next(TokenType::Format);
if !is_teradata_format_phrase && self.match_token(TokenType::LParen) {
let upper_name = name.to_ascii_uppercase();
let func_expr = self.parse_typed_function(&name, &upper_name, false)?;
let func_expr = self.maybe_parse_clickhouse_parameterized_agg(func_expr)?;
return self.maybe_parse_over(func_expr);
}
// Check for qualified name (keyword.column or keyword.method())
if self.match_token(TokenType::Dot) {
if self.match_token(TokenType::Star) {
// keyword.* with potential modifiers
let ident = Identifier::new(name);
let star = self.parse_star_modifiers(Some(ident))?;
return Ok(Expression::Star(star));
}
// ClickHouse: json.^path — the ^ prefix means "get all nested subcolumns"
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::Caret)
{
self.skip(); // consume ^
let mut field_name = "^".to_string();
if self.check(TokenType::Identifier)
|| self.check(TokenType::Var)
|| self.check_keyword()
{
field_name.push_str(&self.advance().text);
}
let col = Expression::Dot(Box::new(DotAccess {
this: Expression::boxed_column(Column {
name: Identifier::new(name),
table: None,
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
}),
field: Identifier::new(field_name),
}));
return self.maybe_parse_subscript(col);
}
// Handle numeric field access: keyword.1, keyword.2 (ClickHouse tuple field access)
if self.check(TokenType::Number) {
let field_name = self.advance().text;
let col_expr = Expression::Dot(Box::new(DotAccess {
this: Expression::boxed_column(Column {
name: Identifier::new(name),
table: None,
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
}),
field: Identifier::new(field_name),
}));
return self.maybe_parse_subscript(col_expr);
}
// Allow keywords as column names
let col_ident = self.expect_identifier_or_keyword_with_quoted()?;
// Check if this is a method call
if self.check(TokenType::LParen) {
self.skip(); // consume (
let args = if self.check(TokenType::RParen) {
Vec::new()
} else {
self.parse_expression_list()?
};
self.expect(TokenType::RParen)?;
let method_call = Expression::MethodCall(Box::new(MethodCall {
this: Expression::Identifier(Identifier::new(name)),
method: col_ident,
args,
}));
return self.maybe_parse_subscript(method_call);
}
// Capture trailing comments from the column name token
let trailing_comments = self.previous_trailing_comments().to_vec();
let mut col = Expression::boxed_column(Column {
name: col_ident,
table: Some(Identifier::new(name)),
join_mark: false,
trailing_comments,
span: None,
inferred_type: None,
});
// Handle Oracle/Redshift outer join marker (+) after column reference
if self.check(TokenType::LParen) && self.check_next(TokenType::Plus) {
let saved_pos = self.current;
if self.match_token(TokenType::LParen)
&& self.match_token(TokenType::Plus)
&& self.match_token(TokenType::RParen)
{
if let Expression::Column(ref mut c) = col {
c.join_mark = true;
}
} else {
self.current = saved_pos;
}
}
return self.maybe_parse_subscript(col);
}
// Simple identifier (keyword used as column name)
// Capture trailing comments from the keyword token
let trailing_comments = self.previous_trailing_comments().to_vec();
let ident = Identifier::new(name);
let col = Expression::boxed_column(Column {
name: ident,
table: None,
join_mark: false,
trailing_comments,
span: None,
inferred_type: None,
});
return self.maybe_parse_subscript(col);
}
// @@ system variable (MySQL/SQL Server): @@version, @@IDENTITY, @@GLOBAL.var
if self.match_token(TokenType::AtAt) {
// Get the variable name
let name = if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
let mut n = self.advance().text;
// Handle @@scope.variable (e.g., @@GLOBAL.max_connections, @@SESSION.sql_mode)
if self.match_token(TokenType::Dot) {
if self.check(TokenType::Identifier)
|| self.check(TokenType::Var)
|| self.is_safe_keyword_as_identifier()
{
n.push('.');
n.push_str(&self.advance().text);
}
}
n
} else if self.check_keyword() {
// Handle @@keyword (e.g., @@sql_mode when sql_mode is a keyword)
self.advance().text
} else {
return Err(self.parse_error("Expected variable name after @@"));
};
return Ok(Expression::Parameter(Box::new(Parameter {
name: Some(name),
index: None,
style: ParameterStyle::DoubleAt,
quoted: false,
string_quoted: false,
expression: None,
})));
}
// @ user variable/parameter: @x, @"x", @JOIN, @'foo'
if self.match_token(TokenType::DAt) {
// Get the variable name - can be identifier, quoted identifier, keyword, or string
let (name, quoted, string_quoted) =
if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
(self.advance().text, false, false)
} else if self.check(TokenType::QuotedIdentifier) {
// Quoted identifier like @"x"
let token = self.advance();
(token.text, true, false)
} else if self.check(TokenType::String) {
// String-quoted like @'foo'
let token = self.advance();
(token.text, false, true)
} else if self.check(TokenType::Number) {
// Numeric like @1
let token = self.advance();
(token.text, false, false)
} else if self.peek().token_type.is_keyword() {
// Keyword used as variable name like @JOIN
let token = self.advance();
(token.text, false, false)
} else {
return Err(self.parse_error("Expected variable name after @"));
};
return Ok(Expression::Parameter(Box::new(Parameter {
name: Some(name),
index: None,
style: ParameterStyle::At,
quoted,
string_quoted,
expression: None,
})));
}
// ?:: placeholder cast (QDColon token = ? + :: fused by tokenizer)
if self.match_token(TokenType::QDColon) {
let data_type = self.parse_data_type_for_cast()?;
return Ok(Expression::Cast(Box::new(Cast {
this: Expression::Placeholder(Placeholder { index: None }),
to: data_type,
trailing_comments: Vec::new(),
double_colon_syntax: true,
format: None,
default: None,
inferred_type: None,
})));
}
// Parameter: ? placeholder or $n positional parameter
if self.check(TokenType::Parameter) {
let token = self.advance();
// Check if this is a positional parameter ($1, $2, etc.) or a plain ? placeholder
if let Ok(index) = token.text.parse::<u32>() {
// Positional parameter like $1, $2 (token text is just the number)
let param = Expression::Parameter(Box::new(Parameter {
name: None,
index: Some(index),
style: ParameterStyle::Dollar,
quoted: false,
string_quoted: false,
expression: None,
}));
// Check for JSON path access: $1:name or dot access: $1.c1
let result = self.parse_colon_json_path(param)?;
return self.maybe_parse_subscript(result);
} else {
// Plain ? placeholder
return Ok(Expression::Placeholder(Placeholder { index: None }));
}
}
// :name or :1 colon parameter
if self.match_token(TokenType::Colon) {
// Check for numeric parameter :1, :2, etc.
if self.check(TokenType::Number) {
let num_token = self.advance();
if let Ok(index) = num_token.text.parse::<u32>() {
return Ok(Expression::Parameter(Box::new(Parameter {
name: None,
index: Some(index),
style: ParameterStyle::Colon,
quoted: false,
string_quoted: false,
expression: None,
})));
}
return Err(
self.parse_error(format!("Invalid colon parameter: :{}", num_token.text))
);
}
// Get the parameter name
if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
let name = self.advance().text;
return Ok(Expression::Parameter(Box::new(Parameter {
name: Some(name),
index: None,
style: ParameterStyle::Colon,
quoted: false,
string_quoted: false,
expression: None,
})));
} else {
return Err(self.parse_error("Expected parameter name after :"));
}
}
// $n dollar parameter: $1, $2, etc.
if self.match_token(TokenType::Dollar) {
// Check for ${identifier} or ${kind:name} template variable syntax (Databricks, Hive)
// Hive supports ${hiveconf:variable_name} syntax
if self.match_token(TokenType::LBrace) {
// Parse the variable name - can be identifier or keyword
if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
let name_token = self.advance();
// Check for ${kind:name} syntax (e.g., ${hiveconf:some_var})
let expression = if self.match_token(TokenType::Colon) {
if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
let expr_token = self.advance();
Some(expr_token.text.clone())
} else {
return Err(self.parse_error("Expected identifier after : in ${...}"));
}
} else {
None
};
self.expect(TokenType::RBrace)?;
return Ok(Expression::Parameter(Box::new(Parameter {
name: Some(name_token.text.clone()),
index: None,
style: ParameterStyle::DollarBrace,
quoted: false,
string_quoted: false,
expression,
})));
} else {
return Err(self.parse_error("Expected identifier after ${"));
}
}
// Check for number following the dollar sign → positional parameter ($1, $2, etc.)
if self.check(TokenType::Number) {
let num_token = self.advance();
// Parse the number as an index
if let Ok(index) = num_token.text.parse::<u32>() {
let param_expr = Expression::Parameter(Box::new(Parameter {
name: None,
index: Some(index),
style: ParameterStyle::Dollar,
quoted: false,
string_quoted: false,
expression: None,
}));
// Check for JSON path access: $1:name or $1:name:subname
let result = self.parse_colon_json_path(param_expr)?;
// Also check for dot access: $1.c1 or $1:name.field
return self.maybe_parse_subscript(result);
}
// If it's not a valid integer, treat as error
return Err(
self.parse_error(format!("Invalid dollar parameter: ${}", num_token.text))
);
}
// Check for identifier following the dollar sign → session variable ($x, $query_id, etc.)
if self.check(TokenType::Identifier)
|| self.check(TokenType::Var)
|| self.is_safe_keyword_as_identifier()
{
let name_token = self.advance();
return Ok(Expression::Parameter(Box::new(Parameter {
name: Some(name_token.text.clone()),
index: None,
style: ParameterStyle::Dollar,
quoted: false,
string_quoted: false,
expression: None,
})));
}
// Just a $ by itself - treat as error
return Err(self.parse_error("Expected number or identifier after $"));
}
// %s or %(name)s percent parameter (PostgreSQL psycopg2 style)
if self.match_token(TokenType::Percent) {
// Check for %(name)s - named parameter
if self.match_token(TokenType::LParen) {
// Get the parameter name
if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
let name = self.advance().text;
self.expect(TokenType::RParen)?;
// Expect 's' after the closing paren
if self.check(TokenType::Var) && self.peek().text == "s" {
self.skip(); // consume 's'
}
return Ok(Expression::Parameter(Box::new(Parameter {
name: Some(name),
index: None,
style: ParameterStyle::Percent,
quoted: false,
string_quoted: false,
expression: None,
})));
} else {
return Err(self.parse_error("Expected parameter name after %("));
}
}
// Check for %s - anonymous parameter
if self.check(TokenType::Var) && self.peek().text == "s" {
self.skip(); // consume 's'
return Ok(Expression::Parameter(Box::new(Parameter {
name: None,
index: None,
style: ParameterStyle::Percent,
quoted: false,
string_quoted: false,
expression: None,
})));
}
// If not followed by 's' or '(', it's not a parameter - error
return Err(self.parse_error("Expected 's' or '(' after % for parameter"));
}
// LEFT, RIGHT, OUTER, FULL, ALL etc. keywords as identifiers when followed by DOT
// e.g., SELECT LEFT.FOO FROM ... or SELECT all.count FROM ...
if (self.check(TokenType::Left)
|| self.check(TokenType::Right)
|| self.check(TokenType::Outer)
|| self.check(TokenType::Full)
|| self.check(TokenType::All)
|| self.check(TokenType::Only)
|| self.check(TokenType::Next)
|| self.check(TokenType::If))
&& self.check_next(TokenType::Dot)
{
let token = self.advance();
let ident = Identifier::new(token.text);
self.expect(TokenType::Dot)?;
if self.match_token(TokenType::Star) {
let star = self.parse_star_modifiers(Some(ident))?;
return Ok(Expression::Star(star));
}
let col_ident = self.expect_identifier_or_keyword_with_quoted()?;
let trailing_comments = self.previous_trailing_comments().to_vec();
let mut col = Expression::boxed_column(Column {
name: col_ident,
table: Some(ident),
join_mark: false,
trailing_comments,
span: None,
inferred_type: None,
});
// Handle Oracle/Redshift outer join marker (+) after column reference
if self.check(TokenType::LParen) && self.check_next(TokenType::Plus) {
let saved_pos = self.current;
if self.match_token(TokenType::LParen)
&& self.match_token(TokenType::Plus)
&& self.match_token(TokenType::RParen)
{
if let Expression::Column(ref mut c) = col {
c.join_mark = true;
}
} else {
self.current = saved_pos;
}
}
return self.maybe_parse_subscript(col);
}
// NEXT VALUE FOR sequence_name [OVER (ORDER BY ...)]
// Must check before treating NEXT as a standalone identifier
if self.check(TokenType::Next) {
// NEXT(arg) - pattern navigation function in MATCH_RECOGNIZE
if self.check_next(TokenType::LParen) {
let token = self.advance();
self.skip(); // consume LParen
let args = self.parse_function_args_list()?;
self.expect(TokenType::RParen)?;
return Ok(Expression::Function(Box::new(Function {
name: token.text,
args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
})));
}
}
// LEFT, RIGHT, OUTER, FULL, ONLY, NEXT as standalone identifiers (not followed by JOIN or LParen)
// e.g., SELECT LEFT FROM ... or SELECT only FROM ...
// If followed by LParen, it's a function call (e.g., NEXT(bar) in MATCH_RECOGNIZE)
if self.can_be_alias_keyword()
&& !self.check_next(TokenType::Join)
&& !self.check_next(TokenType::LParen)
{
let token = self.advance();
let trailing_comments = self.previous_trailing_comments().to_vec();
let col = Expression::boxed_column(Column {
name: Identifier::new(token.text),
table: None,
join_mark: false,
trailing_comments,
span: None,
inferred_type: None,
});
return self.maybe_parse_subscript(col);
}
Err(self.parse_error(format!("Unexpected token: {:?}", self.peek().token_type)))
}
/// Check if function name is a known aggregate function
fn is_aggregate_function(name: &str) -> bool {
crate::function_registry::is_aggregate_function_name(name)
}
/// Whether the source dialect uses LOG(base, value) order (base first).
/// Default is true. BigQuery, TSQL, Tableau, Fabric use LOG(value, base).
fn log_base_first(&self) -> bool {
!matches!(
self.config.dialect,
Some(crate::dialects::DialectType::BigQuery)
| Some(crate::dialects::DialectType::TSQL)
| Some(crate::dialects::DialectType::Tableau)
| Some(crate::dialects::DialectType::Fabric)
)
}
/// Whether the source dialect treats single-arg LOG(x) as LN(x).
/// These dialects have LOG_DEFAULTS_TO_LN = True in Python sqlglot.
fn log_defaults_to_ln(&self) -> bool {
matches!(
self.config.dialect,
Some(crate::dialects::DialectType::MySQL)
| Some(crate::dialects::DialectType::BigQuery)
| Some(crate::dialects::DialectType::TSQL)
| Some(crate::dialects::DialectType::ClickHouse)
| Some(crate::dialects::DialectType::Hive)
| Some(crate::dialects::DialectType::Spark)
| Some(crate::dialects::DialectType::Databricks)
| Some(crate::dialects::DialectType::Drill)
| Some(crate::dialects::DialectType::Dremio)
)
}
/// Parse the subset of typed functions that are handled via function-registry metadata.
fn try_parse_registry_typed_function(
&mut self,
name: &str,
upper_name: &str,
canonical_upper_name: &str,
quoted: bool,
) -> Result<Option<Expression>> {
let Some(spec) =
crate::function_registry::typed_function_spec_by_canonical_upper(canonical_upper_name)
else {
return Ok(None);
};
match (spec.parse_kind, spec.canonical_name) {
(crate::function_registry::TypedParseKind::AggregateLike, "COUNT_IF") => {
let distinct = self.match_token(TokenType::Distinct);
let this = self.parse_expression()?;
// ClickHouse: handle AS alias inside countIf args: countIf(expr AS d, pred)
let this = if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::As)
{
let next_idx = self.current + 1;
let after_alias_idx = self.current + 2;
let is_alias = next_idx < self.tokens.len()
&& (matches!(
self.tokens[next_idx].token_type,
TokenType::Identifier | TokenType::Var | TokenType::QuotedIdentifier
) || self.tokens[next_idx].token_type.is_keyword())
&& after_alias_idx < self.tokens.len()
&& matches!(
self.tokens[after_alias_idx].token_type,
TokenType::RParen | TokenType::Comma
);
if is_alias {
self.skip(); // consume AS
let alias_token = self.advance();
Expression::Alias(Box::new(crate::expressions::Alias {
this,
alias: Identifier::new(alias_token.text.clone()),
column_aliases: Vec::new(),
pre_alias_comments: Vec::new(),
trailing_comments: Vec::new(),
inferred_type: None,
}))
} else {
this
}
} else {
this
};
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.match_token(TokenType::Comma)
{
let mut args = vec![this];
let arg = self.parse_expression()?;
// Handle AS alias on subsequent args too
let arg = if self.check(TokenType::As) {
let next_idx = self.current + 1;
let after_alias_idx = self.current + 2;
let is_alias = next_idx < self.tokens.len()
&& (matches!(
self.tokens[next_idx].token_type,
TokenType::Identifier
| TokenType::Var
| TokenType::QuotedIdentifier
) || self.tokens[next_idx].token_type.is_keyword())
&& after_alias_idx < self.tokens.len()
&& matches!(
self.tokens[after_alias_idx].token_type,
TokenType::RParen | TokenType::Comma
);
if is_alias {
self.skip(); // consume AS
let alias_token = self.advance();
Expression::Alias(Box::new(crate::expressions::Alias {
this: arg,
alias: Identifier::new(alias_token.text.clone()),
column_aliases: Vec::new(),
pre_alias_comments: Vec::new(),
trailing_comments: Vec::new(),
inferred_type: None,
}))
} else {
arg
}
} else {
arg
};
args.push(arg);
while self.match_token(TokenType::Comma) {
args.push(self.parse_expression()?);
}
self.expect(TokenType::RParen)?;
return Ok(Some(Expression::CombinedAggFunc(Box::new(
CombinedAggFunc {
this: Box::new(Expression::Identifier(Identifier::new("countIf"))),
expressions: args,
},
))));
}
self.expect(TokenType::RParen)?;
let filter = self.parse_filter_clause()?;
Ok(Some(Expression::CountIf(Box::new(AggFunc {
ignore_nulls: None,
this,
distinct,
filter,
order_by: Vec::new(),
having_max: None,
name: Some(name.to_string()),
limit: None,
inferred_type: None,
}))))
}
(crate::function_registry::TypedParseKind::Binary, "STARTS_WITH")
| (crate::function_registry::TypedParseKind::Binary, "ENDS_WITH") => {
let this = self.parse_expression()?;
self.expect(TokenType::Comma)?;
let expression = self.parse_expression()?;
self.expect(TokenType::RParen)?;
let func = BinaryFunc {
original_name: None,
this,
expression,
inferred_type: None,
};
let expr = match spec.canonical_name {
"STARTS_WITH" => Expression::StartsWith(Box::new(func)),
"ENDS_WITH" => Expression::EndsWith(Box::new(func)),
_ => unreachable!("binary typed parse kind already matched in caller"),
};
Ok(Some(expr))
}
(crate::function_registry::TypedParseKind::Binary, "ATAN2") => {
let this = self.parse_expression()?;
self.expect(TokenType::Comma)?;
let expression = self.parse_expression()?;
self.expect(TokenType::RParen)?;
Ok(Some(Expression::Atan2(Box::new(BinaryFunc {
original_name: None,
this,
expression,
inferred_type: None,
}))))
}
(crate::function_registry::TypedParseKind::Binary, "MAP_FROM_ARRAYS")
| (crate::function_registry::TypedParseKind::Binary, "MAP_CONTAINS_KEY")
| (crate::function_registry::TypedParseKind::Binary, "ELEMENT_AT") => {
let this = self.parse_expression()?;
self.expect(TokenType::Comma)?;
let expression = self.parse_expression()?;
self.expect(TokenType::RParen)?;
let func = BinaryFunc {
original_name: None,
this,
expression,
inferred_type: None,
};
let expr = match spec.canonical_name {
"MAP_FROM_ARRAYS" => Expression::MapFromArrays(Box::new(func)),
"MAP_CONTAINS_KEY" => Expression::MapContainsKey(Box::new(func)),
"ELEMENT_AT" => Expression::ElementAt(Box::new(func)),
_ => unreachable!("binary map parse kind already matched in caller"),
};
Ok(Some(expr))
}
(crate::function_registry::TypedParseKind::Binary, "CONTAINS")
| (crate::function_registry::TypedParseKind::Binary, "MOD")
| (crate::function_registry::TypedParseKind::Binary, "POW") => {
let this = self.parse_expression()?;
self.expect(TokenType::Comma)?;
let expression = self.parse_expression()?;
self.expect(TokenType::RParen)?;
let expr = match spec.canonical_name {
"CONTAINS" => Expression::Contains(Box::new(BinaryFunc {
original_name: None,
this,
expression,
inferred_type: None,
})),
"MOD" => Expression::ModFunc(Box::new(BinaryFunc {
original_name: None,
this,
expression,
inferred_type: None,
})),
"POW" => Expression::Power(Box::new(BinaryFunc {
original_name: None,
this,
expression,
inferred_type: None,
})),
_ => unreachable!("binary scalar parse kind already matched in caller"),
};
Ok(Some(expr))
}
(crate::function_registry::TypedParseKind::Binary, "ADD_MONTHS")
| (crate::function_registry::TypedParseKind::Binary, "MONTHS_BETWEEN")
| (crate::function_registry::TypedParseKind::Binary, "NEXT_DAY") => {
let this = self.parse_expression()?;
self.expect(TokenType::Comma)?;
let expression = self.parse_expression()?;
if spec.canonical_name == "MONTHS_BETWEEN" && self.match_token(TokenType::Comma) {
let round_off = self.parse_expression()?;
self.expect(TokenType::RParen)?;
return Ok(Some(Expression::Function(Box::new(
crate::expressions::Function::new(
"MONTHS_BETWEEN".to_string(),
vec![this, expression, round_off],
),
))));
}
self.expect(TokenType::RParen)?;
let func = BinaryFunc {
original_name: None,
this,
expression,
inferred_type: None,
};
let expr = match spec.canonical_name {
"ADD_MONTHS" => Expression::AddMonths(Box::new(func)),
"MONTHS_BETWEEN" => Expression::MonthsBetween(Box::new(func)),
"NEXT_DAY" => Expression::NextDay(Box::new(func)),
_ => unreachable!("date binary parse kind already matched in caller"),
};
Ok(Some(expr))
}
(crate::function_registry::TypedParseKind::Binary, "ARRAY_CONTAINS")
| (crate::function_registry::TypedParseKind::Binary, "ARRAY_POSITION")
| (crate::function_registry::TypedParseKind::Binary, "ARRAY_APPEND")
| (crate::function_registry::TypedParseKind::Binary, "ARRAY_PREPEND")
| (crate::function_registry::TypedParseKind::Binary, "ARRAY_UNION")
| (crate::function_registry::TypedParseKind::Binary, "ARRAY_EXCEPT")
| (crate::function_registry::TypedParseKind::Binary, "ARRAY_REMOVE") => {
let this = self.parse_expression()?;
self.expect(TokenType::Comma)?;
let expression = self.parse_expression()?;
self.expect(TokenType::RParen)?;
let func = BinaryFunc {
original_name: None,
this,
expression,
inferred_type: None,
};
let expr = match spec.canonical_name {
"ARRAY_CONTAINS" => Expression::ArrayContains(Box::new(func)),
"ARRAY_POSITION" => Expression::ArrayPosition(Box::new(func)),
"ARRAY_APPEND" => Expression::ArrayAppend(Box::new(func)),
"ARRAY_PREPEND" => Expression::ArrayPrepend(Box::new(func)),
"ARRAY_UNION" => Expression::ArrayUnion(Box::new(func)),
"ARRAY_EXCEPT" => Expression::ArrayExcept(Box::new(func)),
"ARRAY_REMOVE" => Expression::ArrayRemove(Box::new(func)),
_ => unreachable!("array binary parse kind already matched in caller"),
};
Ok(Some(expr))
}
(crate::function_registry::TypedParseKind::Unary, "LENGTH") => {
let this = self.parse_expression()?;
// PostgreSQL: LENGTH(string, encoding) accepts optional second argument
if self.match_token(TokenType::Comma) {
let encoding = self.parse_expression()?;
self.expect(TokenType::RParen)?;
// Store as a regular function to preserve both arguments
Ok(Some(Expression::Function(Box::new(Function::new(
upper_name,
vec![this, encoding],
)))))
} else {
self.expect(TokenType::RParen)?;
Ok(Some(Expression::Length(Box::new(UnaryFunc::new(this)))))
}
}
(crate::function_registry::TypedParseKind::Unary, "LOWER") => {
let this = self.parse_expression_with_clickhouse_alias()?;
self.expect(TokenType::RParen)?;
Ok(Some(Expression::Lower(Box::new(UnaryFunc::new(this)))))
}
(crate::function_registry::TypedParseKind::Unary, "UPPER") => {
let this = self.parse_expression_with_clickhouse_alias()?;
self.expect(TokenType::RParen)?;
Ok(Some(Expression::Upper(Box::new(UnaryFunc::new(this)))))
}
(crate::function_registry::TypedParseKind::Unary, "TYPEOF") => {
let this = self.parse_expression()?;
// ClickHouse: expr AS alias inside function args
let this = self.maybe_clickhouse_alias(this);
if self.match_token(TokenType::Comma) {
// Preserve additional args via generic function form
let mut all_args = vec![this];
let remaining = self.parse_function_arguments()?;
all_args.extend(remaining);
self.expect(TokenType::RParen)?;
Ok(Some(Expression::Function(Box::new(Function {
name: name.to_string(),
args: all_args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))))
} else {
self.expect(TokenType::RParen)?;
Ok(Some(Expression::Typeof(Box::new(UnaryFunc::new(this)))))
}
}
(crate::function_registry::TypedParseKind::Unary, "DAYOFWEEK")
| (crate::function_registry::TypedParseKind::Unary, "DAYOFYEAR")
| (crate::function_registry::TypedParseKind::Unary, "DAYOFMONTH")
| (crate::function_registry::TypedParseKind::Unary, "WEEKOFYEAR") => {
let this = self.parse_expression()?;
self.expect(TokenType::RParen)?;
let func = UnaryFunc::new(this);
let expr = match spec.canonical_name {
"DAYOFWEEK" => Expression::DayOfWeek(Box::new(func)),
"DAYOFYEAR" => Expression::DayOfYear(Box::new(func)),
"DAYOFMONTH" => Expression::DayOfMonth(Box::new(func)),
"WEEKOFYEAR" => Expression::WeekOfYear(Box::new(func)),
_ => unreachable!("date-part unary parse kind already matched in caller"),
};
Ok(Some(expr))
}
(crate::function_registry::TypedParseKind::Unary, "SIN")
| (crate::function_registry::TypedParseKind::Unary, "COS")
| (crate::function_registry::TypedParseKind::Unary, "TAN")
| (crate::function_registry::TypedParseKind::Unary, "ASIN")
| (crate::function_registry::TypedParseKind::Unary, "ACOS")
| (crate::function_registry::TypedParseKind::Unary, "ATAN")
| (crate::function_registry::TypedParseKind::Unary, "RADIANS")
| (crate::function_registry::TypedParseKind::Unary, "DEGREES") => {
let this = self.parse_expression()?;
// MySQL: ATAN(y, x) with 2 args is equivalent to ATAN2(y, x)
if spec.canonical_name == "ATAN" && self.match_token(TokenType::Comma) {
let expression = self.parse_expression()?;
self.expect(TokenType::RParen)?;
return Ok(Some(Expression::Atan2(Box::new(BinaryFunc {
original_name: Some("ATAN".to_string()),
this,
expression,
inferred_type: None,
}))));
}
self.expect(TokenType::RParen)?;
let func = UnaryFunc::new(this);
let expr = match spec.canonical_name {
"SIN" => Expression::Sin(Box::new(func)),
"COS" => Expression::Cos(Box::new(func)),
"TAN" => Expression::Tan(Box::new(func)),
"ASIN" => Expression::Asin(Box::new(func)),
"ACOS" => Expression::Acos(Box::new(func)),
"ATAN" => Expression::Atan(Box::new(func)),
"RADIANS" => Expression::Radians(Box::new(func)),
"DEGREES" => Expression::Degrees(Box::new(func)),
_ => unreachable!("trig unary parse kind already matched in caller"),
};
Ok(Some(expr))
}
(crate::function_registry::TypedParseKind::Unary, "YEAR")
| (crate::function_registry::TypedParseKind::Unary, "MONTH")
| (crate::function_registry::TypedParseKind::Unary, "DAY")
| (crate::function_registry::TypedParseKind::Unary, "HOUR")
| (crate::function_registry::TypedParseKind::Unary, "MINUTE")
| (crate::function_registry::TypedParseKind::Unary, "SECOND")
| (crate::function_registry::TypedParseKind::Unary, "DAYOFWEEK_ISO")
| (crate::function_registry::TypedParseKind::Unary, "QUARTER")
| (crate::function_registry::TypedParseKind::Unary, "EPOCH")
| (crate::function_registry::TypedParseKind::Unary, "EPOCH_MS") => {
let this = self.parse_expression()?;
self.expect(TokenType::RParen)?;
let func = UnaryFunc::new(this);
let expr = match spec.canonical_name {
"YEAR" => Expression::Year(Box::new(func)),
"MONTH" => Expression::Month(Box::new(func)),
"DAY" => Expression::Day(Box::new(func)),
"HOUR" => Expression::Hour(Box::new(func)),
"MINUTE" => Expression::Minute(Box::new(func)),
"SECOND" => Expression::Second(Box::new(func)),
"DAYOFWEEK_ISO" => Expression::DayOfWeekIso(Box::new(func)),
"QUARTER" => Expression::Quarter(Box::new(func)),
"EPOCH" => Expression::Epoch(Box::new(func)),
"EPOCH_MS" => Expression::EpochMs(Box::new(func)),
_ => unreachable!("date unary parse kind already matched in caller"),
};
Ok(Some(expr))
}
(crate::function_registry::TypedParseKind::Unary, "ARRAY_LENGTH")
| (crate::function_registry::TypedParseKind::Unary, "ARRAY_SIZE")
| (crate::function_registry::TypedParseKind::Unary, "CARDINALITY")
| (crate::function_registry::TypedParseKind::Unary, "ARRAY_REVERSE")
| (crate::function_registry::TypedParseKind::Unary, "ARRAY_DISTINCT")
| (crate::function_registry::TypedParseKind::Unary, "ARRAY_COMPACT")
| (crate::function_registry::TypedParseKind::Unary, "EXPLODE")
| (crate::function_registry::TypedParseKind::Unary, "EXPLODE_OUTER") => {
let this = self.parse_expression()?;
// PostgreSQL ARRAY_LENGTH and ARRAY_SIZE can take a second dimension arg.
// Preserve that by falling back to generic function form for 2-arg usage.
if (spec.canonical_name == "ARRAY_LENGTH" || spec.canonical_name == "ARRAY_SIZE")
&& self.match_token(TokenType::Comma)
{
let dimension = self.parse_expression()?;
self.expect(TokenType::RParen)?;
return Ok(Some(Expression::Function(Box::new(Function {
name: name.to_string(),
args: vec![this, dimension],
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))));
}
self.expect(TokenType::RParen)?;
let func = UnaryFunc::new(this);
let expr = match spec.canonical_name {
"ARRAY_LENGTH" => Expression::ArrayLength(Box::new(func)),
"ARRAY_SIZE" => Expression::ArraySize(Box::new(func)),
"CARDINALITY" => Expression::Cardinality(Box::new(func)),
"ARRAY_REVERSE" => Expression::ArrayReverse(Box::new(func)),
"ARRAY_DISTINCT" => Expression::ArrayDistinct(Box::new(func)),
"ARRAY_COMPACT" => Expression::ArrayCompact(Box::new(func)),
"EXPLODE" => Expression::Explode(Box::new(func)),
"EXPLODE_OUTER" => Expression::ExplodeOuter(Box::new(func)),
_ => unreachable!("array unary parse kind already matched in caller"),
};
Ok(Some(expr))
}
(crate::function_registry::TypedParseKind::Unary, "MAP_FROM_ENTRIES")
| (crate::function_registry::TypedParseKind::Unary, "MAP_KEYS")
| (crate::function_registry::TypedParseKind::Unary, "MAP_VALUES") => {
let this = self.parse_expression()?;
self.expect(TokenType::RParen)?;
let func = UnaryFunc::new(this);
let expr = match spec.canonical_name {
"MAP_FROM_ENTRIES" => Expression::MapFromEntries(Box::new(func)),
"MAP_KEYS" => Expression::MapKeys(Box::new(func)),
"MAP_VALUES" => Expression::MapValues(Box::new(func)),
_ => unreachable!("map unary parse kind already matched in caller"),
};
Ok(Some(expr))
}
(crate::function_registry::TypedParseKind::Unary, "ABS") => {
let this = self.parse_expression_with_clickhouse_alias()?;
self.expect(TokenType::RParen)?;
Ok(Some(Expression::Abs(Box::new(UnaryFunc::new(this)))))
}
(crate::function_registry::TypedParseKind::Unary, "SQRT")
| (crate::function_registry::TypedParseKind::Unary, "EXP")
| (crate::function_registry::TypedParseKind::Unary, "LN") => {
let this = self.parse_expression()?;
self.expect(TokenType::RParen)?;
let expr = match spec.canonical_name {
"SQRT" => Expression::Sqrt(Box::new(UnaryFunc::new(this))),
"EXP" => Expression::Exp(Box::new(UnaryFunc::new(this))),
"LN" => Expression::Ln(Box::new(UnaryFunc::new(this))),
_ => unreachable!("math unary parse kind already matched in caller"),
};
Ok(Some(expr))
}
(crate::function_registry::TypedParseKind::Variadic, "TO_NUMBER")
| (crate::function_registry::TypedParseKind::Variadic, "TRY_TO_NUMBER") => {
let args = self.parse_expression_list()?;
self.expect(TokenType::RParen)?;
let this = args.get(0).cloned().unwrap_or(Expression::Null(Null {}));
let format = args.get(1).cloned().map(Box::new);
let precision = args.get(2).cloned().map(Box::new);
let scale = args.get(3).cloned().map(Box::new);
let safe = if spec.canonical_name == "TRY_TO_NUMBER" {
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
})))
} else {
None
};
Ok(Some(Expression::ToNumber(Box::new(ToNumber {
this: Box::new(this),
format,
nlsparam: None,
precision,
scale,
safe,
safe_name: None,
}))))
}
(crate::function_registry::TypedParseKind::Variadic, "SUBSTRING") => {
let this = self.parse_expression()?;
// ClickHouse: implicit/explicit alias: substring('1234' lhs FROM 2) or substring('1234' AS lhs FROM 2)
let this = self.try_clickhouse_func_arg_alias(this);
// Check for SQL standard FROM syntax: SUBSTRING(str FROM pos [FOR len])
if self.match_token(TokenType::From) {
let start = self.parse_expression()?;
let start = self.try_clickhouse_func_arg_alias(start);
let length = if self.match_token(TokenType::For) {
let len = self.parse_expression()?;
Some(self.try_clickhouse_func_arg_alias(len))
} else {
None
};
self.expect(TokenType::RParen)?;
Ok(Some(Expression::Substring(Box::new(SubstringFunc {
this,
start,
length,
from_for_syntax: true,
}))))
} else if self.match_token(TokenType::For) {
// PostgreSQL: SUBSTRING(str FOR len) or SUBSTRING(str FOR len FROM pos)
let length_expr = self.parse_expression()?;
let length_expr = self.try_clickhouse_func_arg_alias(length_expr);
let start = if self.match_token(TokenType::From) {
let s = self.parse_expression()?;
self.try_clickhouse_func_arg_alias(s)
} else {
// No FROM, use 1 as default start position
Expression::Literal(Box::new(Literal::Number("1".to_string())))
};
self.expect(TokenType::RParen)?;
Ok(Some(Expression::Substring(Box::new(SubstringFunc {
this,
start,
length: Some(length_expr),
from_for_syntax: true,
}))))
} else if self.match_token(TokenType::Comma) {
// Comma-separated syntax: SUBSTRING(str, pos) or SUBSTRING(str, pos, len)
let start = self.parse_expression()?;
let start = self.try_clickhouse_func_arg_alias(start);
let length = if self.match_token(TokenType::Comma) {
let len = self.parse_expression()?;
Some(self.try_clickhouse_func_arg_alias(len))
} else {
None
};
self.expect(TokenType::RParen)?;
Ok(Some(Expression::Substring(Box::new(SubstringFunc {
this,
start,
length,
from_for_syntax: false,
}))))
} else {
// Just SUBSTRING(str) with no other args - unusual but handle it
self.expect(TokenType::RParen)?;
// Treat as function call
Ok(Some(Expression::Function(Box::new(Function {
name: name.to_string(),
args: vec![this],
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))))
}
}
(crate::function_registry::TypedParseKind::Variadic, "DATE_PART") => {
let part = self.parse_expression()?;
// For TSQL/Fabric, normalize date part aliases (e.g., "dd" -> DAY)
let mut part = if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::TSQL)
| Some(crate::dialects::DialectType::Fabric)
) {
self.normalize_tsql_date_part(part)
} else {
part
};
// Accept both FROM and comma as separator (Snowflake supports both syntaxes)
if !self.match_token(TokenType::From) && !self.match_token(TokenType::Comma) {
return Err(self.parse_error("Expected FROM or comma in DATE_PART"));
}
let from_expr = self.parse_expression()?;
self.expect(TokenType::RParen)?;
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Snowflake)
) {
if self
.try_parse_date_part_field_identifier_expr(&part)
.is_some()
{
part = self.convert_date_part_identifier_expr_to_var(part);
}
}
let mut args = vec![part, from_expr];
self.normalize_date_part_arg("DATE_PART", &mut args);
Ok(Some(Expression::Function(Box::new(Function {
name: "DATE_PART".to_string(),
args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))))
}
(crate::function_registry::TypedParseKind::Variadic, "DATEADD") => {
let mut first_arg = self.parse_expression()?;
first_arg = self.try_clickhouse_func_arg_alias(first_arg);
self.expect(TokenType::Comma)?;
let second_arg = self.parse_expression()?;
let second_arg = self.try_clickhouse_func_arg_alias(second_arg);
// Check if there's a third argument (traditional 3-arg syntax)
if self.match_token(TokenType::Comma) {
let third_arg = self.parse_expression()?;
let third_arg = self.try_clickhouse_func_arg_alias(third_arg);
self.expect(TokenType::RParen)?;
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Snowflake)
) {
if self
.try_parse_date_part_unit_identifier_expr(&first_arg)
.is_some()
{
first_arg = self.convert_date_part_identifier_expr_to_var(first_arg);
}
}
let mut args = vec![first_arg, second_arg, third_arg];
self.normalize_date_part_arg(name, &mut args);
Ok(Some(Expression::Function(Box::new(Function {
name: name.to_string(),
args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))))
} else {
// BigQuery 2-arg syntax: DATE_ADD(date, interval)
self.expect(TokenType::RParen)?;
Ok(Some(Expression::Function(Box::new(Function {
name: name.to_string(),
args: vec![first_arg, second_arg],
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))))
}
}
(crate::function_registry::TypedParseKind::Variadic, "DATEDIFF") => {
// First argument (can be unit for DATEDIFF/TIMESTAMPDIFF or datetime for TIMEDIFF)
let first_arg = self.parse_expression()?;
let first_arg = self.try_clickhouse_func_arg_alias(first_arg);
self.expect(TokenType::Comma)?;
let second_arg = self.parse_expression()?;
let second_arg = self.try_clickhouse_func_arg_alias(second_arg);
// Third argument is optional (SQLite TIMEDIFF only takes 2 args)
let mut args = if self.match_token(TokenType::Comma) {
let third_arg = self.parse_expression()?;
let third_arg = self.try_clickhouse_func_arg_alias(third_arg);
vec![first_arg, second_arg, third_arg]
} else {
vec![first_arg, second_arg]
};
// ClickHouse: optional 4th timezone argument for dateDiff
while self.match_token(TokenType::Comma) {
let arg = self.parse_expression()?;
args.push(self.try_clickhouse_func_arg_alias(arg));
}
self.expect(TokenType::RParen)?;
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Snowflake)
) && args.len() == 3
{
if let Some(unit) = self.try_parse_date_part_unit_expr(&args[0]) {
return Ok(Some(Expression::DateDiff(Box::new(DateDiffFunc {
this: args[2].clone(),
expression: args[1].clone(),
unit: Some(unit),
}))));
}
}
self.normalize_date_part_arg(name, &mut args);
Ok(Some(Expression::Function(Box::new(Function {
name: name.to_string(),
args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))))
}
(crate::function_registry::TypedParseKind::Variadic, "RANDOM") => {
// RANDOM() - no args, RANDOM(seed) - Snowflake, RANDOM(lower, upper) - Teradata
if self.check(TokenType::RParen) {
self.expect(TokenType::RParen)?;
Ok(Some(Expression::Random(Random)))
} else {
let first = self.parse_expression()?;
if self.match_token(TokenType::Comma) {
let second = self.parse_expression()?;
self.expect(TokenType::RParen)?;
Ok(Some(Expression::Rand(Box::new(Rand {
seed: None,
lower: Some(Box::new(first)),
upper: Some(Box::new(second)),
}))))
} else {
self.expect(TokenType::RParen)?;
Ok(Some(Expression::Rand(Box::new(Rand {
seed: Some(Box::new(first)),
lower: None,
upper: None,
}))))
}
}
}
(crate::function_registry::TypedParseKind::Variadic, "RAND") => {
let seed = if self.check(TokenType::RParen) {
None
} else {
Some(Box::new(self.parse_expression()?))
};
self.expect(TokenType::RParen)?;
Ok(Some(Expression::Rand(Box::new(Rand {
seed,
lower: None,
upper: None,
}))))
}
(crate::function_registry::TypedParseKind::Variadic, "PI") => {
self.expect(TokenType::RParen)?;
Ok(Some(Expression::Pi(Pi)))
}
(crate::function_registry::TypedParseKind::Variadic, "LAST_DAY") => {
let this = self.parse_expression()?;
let unit = if self.match_token(TokenType::Comma) {
Some(self.parse_datetime_field()?)
} else {
None
};
self.expect(TokenType::RParen)?;
Ok(Some(Expression::LastDay(Box::new(LastDayFunc {
this,
unit,
}))))
}
(crate::function_registry::TypedParseKind::Variadic, "POSITION") => {
let expr = self
.parse_position()?
.ok_or_else(|| self.parse_error("Expected expression in POSITION"))?;
self.expect(TokenType::RParen)?;
Ok(Some(expr))
}
(crate::function_registry::TypedParseKind::Variadic, "STRPOS") => {
let this = self.parse_expression()?;
self.expect(TokenType::Comma)?;
let substr = self.parse_expression()?;
let occurrence = if self.match_token(TokenType::Comma) {
Some(Box::new(self.parse_expression()?))
} else {
None
};
self.expect(TokenType::RParen)?;
Ok(Some(Expression::StrPosition(Box::new(StrPosition {
this: Box::new(this),
substr: Some(Box::new(substr)),
position: None,
occurrence,
}))))
}
(crate::function_registry::TypedParseKind::Variadic, "LOCATE") => {
if self.check(TokenType::RParen) {
self.skip();
return Ok(Some(Expression::Function(Box::new(Function {
name: name.to_string(),
args: vec![],
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))));
}
let first = self.parse_expression()?;
if !self.check(TokenType::Comma) && self.check(TokenType::RParen) {
self.skip();
return Ok(Some(Expression::Function(Box::new(Function {
name: name.to_string(),
args: vec![first],
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))));
}
self.expect(TokenType::Comma)?;
let second = self.parse_expression()?;
let position = if self.match_token(TokenType::Comma) {
Some(Box::new(self.parse_expression()?))
} else {
None
};
self.expect(TokenType::RParen)?;
Ok(Some(Expression::StrPosition(Box::new(StrPosition {
this: Box::new(second),
substr: Some(Box::new(first)),
position,
occurrence: None,
}))))
}
(crate::function_registry::TypedParseKind::Variadic, "INSTR") => {
let first = self.parse_expression()?;
self.expect(TokenType::Comma)?;
let second = self.parse_expression()?;
let position = if self.match_token(TokenType::Comma) {
Some(Box::new(self.parse_expression()?))
} else {
None
};
self.expect(TokenType::RParen)?;
Ok(Some(Expression::StrPosition(Box::new(StrPosition {
this: Box::new(first),
substr: Some(Box::new(second)),
position,
occurrence: None,
}))))
}
(crate::function_registry::TypedParseKind::Variadic, "NORMALIZE") => {
let this = self.parse_expression()?;
let form = if self.match_token(TokenType::Comma) {
Some(Box::new(self.parse_expression()?))
} else {
None
};
self.expect(TokenType::RParen)?;
Ok(Some(Expression::Normalize(Box::new(Normalize {
this: Box::new(this),
form,
is_casefold: None,
}))))
}
(crate::function_registry::TypedParseKind::Variadic, "INITCAP") => {
let this = self.parse_expression()?;
let delimiter = if self.match_token(TokenType::Comma) {
Some(Box::new(self.parse_expression()?))
} else {
None
};
self.expect(TokenType::RParen)?;
if let Some(delim) = delimiter {
Ok(Some(Expression::Function(Box::new(Function::new(
"INITCAP".to_string(),
vec![this, *delim],
)))))
} else {
Ok(Some(Expression::Initcap(Box::new(UnaryFunc::new(this)))))
}
}
(crate::function_registry::TypedParseKind::Variadic, "FLOOR") => {
let this = self.parse_expression()?;
let to = if self.match_token(TokenType::To) {
self.parse_var()?
} else {
None
};
let scale = if to.is_none() && self.match_token(TokenType::Comma) {
Some(self.parse_expression()?)
} else {
None
};
if self.check(TokenType::Comma) {
let mut args = vec![this];
if let Some(s) = scale {
args.push(s);
}
while self.match_token(TokenType::Comma) {
args.push(self.parse_expression()?);
}
self.expect(TokenType::RParen)?;
return Ok(Some(Expression::Function(Box::new(Function {
name: name.to_string(),
args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))));
}
self.expect(TokenType::RParen)?;
Ok(Some(Expression::Floor(Box::new(FloorFunc {
this,
scale,
to,
}))))
}
(crate::function_registry::TypedParseKind::Variadic, "LOG") => {
let first = self.parse_expression()?;
if self.match_token(TokenType::Comma) {
let second = self.parse_expression()?;
self.expect(TokenType::RParen)?;
let (value, base) = if self.log_base_first() {
(second, first)
} else {
(first, second)
};
Ok(Some(Expression::Log(Box::new(LogFunc {
this: value,
base: Some(base),
}))))
} else {
self.expect(TokenType::RParen)?;
if self.log_defaults_to_ln() {
Ok(Some(Expression::Ln(Box::new(UnaryFunc::new(first)))))
} else {
Ok(Some(Expression::Log(Box::new(LogFunc {
this: first,
base: None,
}))))
}
}
}
(crate::function_registry::TypedParseKind::Variadic, "FLATTEN") => {
let args = self.parse_function_arguments()?;
self.expect(TokenType::RParen)?;
Ok(Some(Expression::Function(Box::new(Function {
name: name.to_string(),
args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))))
}
(crate::function_registry::TypedParseKind::Variadic, "ARRAY_INTERSECT") => {
let mut expressions = vec![self.parse_expression()?];
while self.match_token(TokenType::Comma) {
expressions.push(self.parse_expression()?);
}
self.expect(TokenType::RParen)?;
Ok(Some(Expression::ArrayIntersect(Box::new(VarArgFunc {
expressions,
original_name: Some(name.to_string()),
inferred_type: None,
}))))
}
(crate::function_registry::TypedParseKind::Variadic, "CURRENT_SCHEMAS") => {
let args = if self.check(TokenType::RParen) {
Vec::new()
} else {
vec![self.parse_expression()?]
};
self.expect(TokenType::RParen)?;
Ok(Some(Expression::CurrentSchemas(Box::new(CurrentSchemas {
this: args.into_iter().next().map(Box::new),
}))))
}
(crate::function_registry::TypedParseKind::Variadic, "COALESCE") => {
let args = if self.check(TokenType::RParen) {
Vec::new()
} else {
self.parse_expression_list()?
};
self.expect(TokenType::RParen)?;
Ok(Some(Expression::Coalesce(Box::new(
crate::expressions::VarArgFunc {
original_name: None,
expressions: args,
inferred_type: None,
},
))))
}
(crate::function_registry::TypedParseKind::Variadic, "IFNULL") => {
let args = self.parse_expression_list()?;
self.expect(TokenType::RParen)?;
if args.len() >= 2 {
Ok(Some(Expression::Coalesce(Box::new(
crate::expressions::VarArgFunc {
original_name: Some("IFNULL".to_string()),
expressions: args,
inferred_type: None,
},
))))
} else {
Ok(Some(Expression::Function(Box::new(Function {
name: name.to_string(),
args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))))
}
}
(crate::function_registry::TypedParseKind::Variadic, "NVL") => {
let args = self.parse_expression_list()?;
self.expect(TokenType::RParen)?;
if args.len() > 2 {
Ok(Some(Expression::Function(Box::new(Function {
name: "COALESCE".to_string(),
args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))))
} else if args.len() == 2 {
Ok(Some(Expression::Nvl(Box::new(
crate::expressions::BinaryFunc {
original_name: Some("NVL".to_string()),
this: args[0].clone(),
expression: args[1].clone(),
inferred_type: None,
},
))))
} else {
Ok(Some(Expression::Function(Box::new(Function {
name: name.to_string(),
args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))))
}
}
(crate::function_registry::TypedParseKind::Variadic, "NVL2") => {
let args = self.parse_expression_list()?;
self.expect(TokenType::RParen)?;
if args.len() >= 3 {
Ok(Some(Expression::Nvl2(Box::new(
crate::expressions::Nvl2Func {
this: args[0].clone(),
true_value: args[1].clone(),
false_value: args[2].clone(),
inferred_type: None,
},
))))
} else {
Ok(Some(Expression::Function(Box::new(Function {
name: name.to_string(),
args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))))
}
}
(crate::function_registry::TypedParseKind::Variadic, "EXTRACT") => {
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && (self.check(TokenType::Identifier)
|| self.check(TokenType::Var)
|| self.peek().token_type.is_keyword()
|| self.check(TokenType::String)
|| self.check(TokenType::Number))
&& (self.check_next(TokenType::Comma)
|| self.check_next(TokenType::LParen)
|| self.check_next(TokenType::Var)
|| self.check_next(TokenType::Identifier))
{
let args = self.parse_function_arguments()?;
self.expect(TokenType::RParen)?;
return Ok(Some(Expression::Function(Box::new(Function {
name: name.to_string(),
args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))));
}
if self.check(TokenType::String) {
let args = self.parse_expression_list()?;
self.expect(TokenType::RParen)?;
return Ok(Some(Expression::Function(Box::new(Function {
name: name.to_string(),
args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))));
}
let field = self.parse_datetime_field()?;
if !self.match_token(TokenType::From) && !self.match_token(TokenType::Comma) {
return Err(self.parse_error("Expected FROM or comma after EXTRACT field"));
}
let this = self.parse_expression()?;
let this = self.try_clickhouse_func_arg_alias(this);
self.expect(TokenType::RParen)?;
Ok(Some(Expression::Extract(Box::new(ExtractFunc {
this,
field,
}))))
}
(crate::function_registry::TypedParseKind::Variadic, "STRUCT") => {
let args = if self.check(TokenType::RParen) {
Vec::new()
} else {
self.parse_struct_args()?
};
self.expect(TokenType::RParen)?;
Ok(Some(Expression::Function(Box::new(Function {
name: name.to_string(),
args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))))
}
(crate::function_registry::TypedParseKind::Variadic, "CHAR") => {
let args = self.parse_expression_list()?;
let charset = if self.match_token(TokenType::Using) {
if !self.is_at_end() {
let charset_token = self.advance();
Some(charset_token.text.clone())
} else {
None
}
} else {
None
};
self.expect(TokenType::RParen)?;
if charset.is_some() {
Ok(Some(Expression::CharFunc(Box::new(
crate::expressions::CharFunc {
args,
charset,
name: None,
},
))))
} else {
Ok(Some(Expression::Function(Box::new(Function {
name: name.to_string(),
args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))))
}
}
(crate::function_registry::TypedParseKind::Variadic, "CHR") => {
let args = self.parse_expression_list()?;
let charset = if self.match_token(TokenType::Using) {
if !self.is_at_end() {
let charset_token = self.advance();
Some(charset_token.text.clone())
} else {
None
}
} else {
None
};
self.expect(TokenType::RParen)?;
if charset.is_some() {
Ok(Some(Expression::CharFunc(Box::new(
crate::expressions::CharFunc {
args,
charset,
name: Some("CHR".to_string()),
},
))))
} else {
Ok(Some(Expression::Function(Box::new(Function {
name: name.to_string(),
args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))))
}
}
(crate::function_registry::TypedParseKind::Variadic, "RANGE_N") => {
let this = self.parse_bitwise_or()?;
self.expect(TokenType::Between)?;
let mut expressions = Vec::new();
while !self.check(TokenType::Each) && !self.check(TokenType::RParen) {
expressions.push(self.parse_expression()?);
if !self.match_token(TokenType::Comma) {
break;
}
}
let each = if self.match_token(TokenType::Each) {
Some(Box::new(self.parse_expression()?))
} else {
None
};
self.expect(TokenType::RParen)?;
Ok(Some(Expression::RangeN(Box::new(RangeN {
this: Box::new(this),
expressions,
each,
}))))
}
(crate::function_registry::TypedParseKind::Variadic, "XMLTABLE") => {
if let Some(xml_table) = self.parse_xml_table()? {
self.expect(TokenType::RParen)?;
Ok(Some(xml_table))
} else {
Err(self.parse_error("Failed to parse XMLTABLE"))
}
}
(crate::function_registry::TypedParseKind::Variadic, "XMLELEMENT") => {
if let Some(elem) = self.parse_xml_element()? {
self.expect(TokenType::RParen)?;
Ok(Some(elem))
} else {
self.expect(TokenType::RParen)?;
Ok(Some(Expression::Function(Box::new(Function {
name: name.to_string(),
args: Vec::new(),
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))))
}
}
(crate::function_registry::TypedParseKind::Variadic, "XMLATTRIBUTES") => {
let mut attrs = Vec::new();
if !self.check(TokenType::RParen) {
loop {
let expr = self.parse_expression()?;
if self.match_token(TokenType::As) {
let alias_ident = self.expect_identifier_or_keyword_with_quoted()?;
attrs.push(Expression::Alias(Box::new(Alias {
this: expr,
alias: alias_ident,
column_aliases: Vec::new(),
pre_alias_comments: Vec::new(),
trailing_comments: Vec::new(),
inferred_type: None,
})));
} else {
attrs.push(expr);
}
if !self.match_token(TokenType::Comma) {
break;
}
}
}
self.expect(TokenType::RParen)?;
Ok(Some(Expression::Function(Box::new(Function {
name: "XMLATTRIBUTES".to_string(),
args: attrs,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))))
}
(crate::function_registry::TypedParseKind::Variadic, "XMLCOMMENT") => {
let args = if self.check(TokenType::RParen) {
Vec::new()
} else {
self.parse_expression_list()?
};
self.expect(TokenType::RParen)?;
Ok(Some(Expression::Function(Box::new(Function {
name: "XMLCOMMENT".to_string(),
args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))))
}
(crate::function_registry::TypedParseKind::Variadic, "MATCH") => {
let expressions = if self.check(TokenType::Table)
&& !matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
self.skip();
let table_name = self.expect_identifier_or_keyword()?;
vec![Expression::Var(Box::new(Var {
this: format!("TABLE {}", table_name),
}))]
} else {
self.parse_expression_list()?
};
self.expect(TokenType::RParen)?;
if !self.check_keyword_text("AGAINST") {
return Ok(Some(Expression::Function(Box::new(Function {
name: "MATCH".to_string(),
args: expressions,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))));
}
self.skip();
self.expect(TokenType::LParen)?;
let search_expr = self.parse_primary()?;
let modifier = if self.match_text_seq(&["IN", "NATURAL", "LANGUAGE", "MODE"]) {
if self.match_text_seq(&["WITH", "QUERY", "EXPANSION"]) {
Some(Box::new(Expression::Var(Box::new(Var {
this: "IN NATURAL LANGUAGE MODE WITH QUERY EXPANSION".to_string(),
}))))
} else {
Some(Box::new(Expression::Var(Box::new(Var {
this: "IN NATURAL LANGUAGE MODE".to_string(),
}))))
}
} else if self.match_text_seq(&["IN", "BOOLEAN", "MODE"]) {
Some(Box::new(Expression::Var(Box::new(Var {
this: "IN BOOLEAN MODE".to_string(),
}))))
} else if self.match_text_seq(&["WITH", "QUERY", "EXPANSION"]) {
Some(Box::new(Expression::Var(Box::new(Var {
this: "WITH QUERY EXPANSION".to_string(),
}))))
} else {
None
};
self.expect(TokenType::RParen)?;
Ok(Some(Expression::MatchAgainst(Box::new(MatchAgainst {
this: Box::new(search_expr),
expressions,
modifier,
}))))
}
(crate::function_registry::TypedParseKind::Variadic, "TRANSFORM") => {
let expressions = if self.check(TokenType::RParen) {
Vec::new()
} else {
self.parse_function_args_with_lambda()?
};
self.expect(TokenType::RParen)?;
let row_format_before = if self.match_token(TokenType::Row) {
self.parse_row()?
} else {
None
};
let record_writer = if self.match_text_seq(&["RECORDWRITER"]) {
Some(Box::new(self.parse_expression()?))
} else {
None
};
if self.match_token(TokenType::Using) {
let command_script = Some(Box::new(self.parse_expression()?));
let schema = if self.match_token(TokenType::As) {
self.parse_schema()?
} else {
None
};
let row_format_after = if self.match_token(TokenType::Row) {
self.parse_row()?
} else {
None
};
let record_reader = if self.match_text_seq(&["RECORDREADER"]) {
Some(Box::new(self.parse_expression()?))
} else {
None
};
Ok(Some(Expression::QueryTransform(Box::new(QueryTransform {
expressions,
command_script,
schema: schema.map(Box::new),
row_format_before: row_format_before.map(Box::new),
record_writer,
row_format_after: row_format_after.map(Box::new),
record_reader,
}))))
} else {
Ok(Some(Expression::Function(Box::new(Function {
name: name.to_string(),
args: expressions,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted,
span: None,
inferred_type: None,
}))))
}
}
(crate::function_registry::TypedParseKind::Variadic, "CONVERT") => {
let is_try = upper_name == "TRY_CONVERT";
let is_tsql = matches!(
self.config.dialect,
Some(crate::dialects::DialectType::TSQL)
| Some(crate::dialects::DialectType::Fabric)
);
if is_tsql {
let saved = self.current;
let orig_type_text = if self.current < self.tokens.len() {
self.tokens[self.current].text.to_ascii_uppercase()
} else {
String::new()
};
let dt = self.parse_data_type();
if let Ok(mut dt) = dt {
if self.match_token(TokenType::Comma) {
if orig_type_text == "NVARCHAR" || orig_type_text == "NCHAR" {
dt = match dt {
crate::expressions::DataType::VarChar { length, .. } => {
if let Some(len) = length {
crate::expressions::DataType::Custom {
name: format!("{}({})", orig_type_text, len),
}
} else {
crate::expressions::DataType::Custom {
name: orig_type_text.clone(),
}
}
}
crate::expressions::DataType::Char { length } => {
if let Some(len) = length {
crate::expressions::DataType::Custom {
name: format!("{}({})", orig_type_text, len),
}
} else {
crate::expressions::DataType::Custom {
name: orig_type_text.clone(),
}
}
}
other => other,
};
}
let value = self.parse_expression()?;
let style = if self.match_token(TokenType::Comma) {
Some(self.parse_expression()?)
} else {
None
};
self.expect(TokenType::RParen)?;
let func_name = if is_try { "TRY_CONVERT" } else { "CONVERT" };
let mut args = vec![Expression::DataType(dt), value];
if let Some(s) = style {
args.push(s);
}
return Ok(Some(Expression::Function(Box::new(Function {
name: func_name.to_string(),
args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))));
}
self.current = saved;
} else {
self.current = saved;
}
}
let this = self.parse_expression()?;
if self.match_token(TokenType::Using) {
let charset = self.expect_identifier()?;
self.expect(TokenType::RParen)?;
Ok(Some(Expression::Cast(Box::new(Cast {
this,
to: DataType::CharacterSet { name: charset },
trailing_comments: Vec::new(),
double_colon_syntax: false,
format: None,
default: None,
inferred_type: None,
}))))
} else if self.match_token(TokenType::Comma) {
let mut args = vec![this];
args.push(self.parse_expression()?);
while self.match_token(TokenType::Comma) {
args.push(self.parse_expression()?);
}
self.expect(TokenType::RParen)?;
let func_name = if is_try { "TRY_CONVERT" } else { "CONVERT" };
Ok(Some(Expression::Function(Box::new(Function {
name: func_name.to_string(),
args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))))
} else {
self.expect(TokenType::RParen)?;
let func_name = if is_try { "TRY_CONVERT" } else { "CONVERT" };
Ok(Some(Expression::Function(Box::new(Function {
name: func_name.to_string(),
args: vec![this],
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))))
}
}
(crate::function_registry::TypedParseKind::Variadic, "TRIM") => {
let (position, position_explicit) = if self.match_token(TokenType::Leading) {
(TrimPosition::Leading, true)
} else if self.match_token(TokenType::Trailing) {
(TrimPosition::Trailing, true)
} else if self.match_token(TokenType::Both) {
(TrimPosition::Both, true)
} else {
(TrimPosition::Both, false)
};
if position_explicit || self.check(TokenType::From) {
if self.match_token(TokenType::From) {
let this = self.parse_expression()?;
self.expect(TokenType::RParen)?;
Ok(Some(Expression::Trim(Box::new(TrimFunc {
this,
characters: None,
position,
sql_standard_syntax: true,
position_explicit,
}))))
} else {
let first_expr = self.parse_bitwise_or()?;
let first_expr = self.try_clickhouse_func_arg_alias(first_expr);
if self.match_token(TokenType::From) {
let this = self.parse_bitwise_or()?;
let this = self.try_clickhouse_func_arg_alias(this);
self.expect(TokenType::RParen)?;
Ok(Some(Expression::Trim(Box::new(TrimFunc {
this,
characters: Some(first_expr),
position,
sql_standard_syntax: true,
position_explicit,
}))))
} else {
self.expect(TokenType::RParen)?;
Ok(Some(Expression::Trim(Box::new(TrimFunc {
this: first_expr,
characters: None,
position,
sql_standard_syntax: true,
position_explicit,
}))))
}
}
} else {
let first_expr = self.parse_expression()?;
let first_expr = self.try_clickhouse_func_arg_alias(first_expr);
if self.match_token(TokenType::From) {
let this = self.parse_expression()?;
let this = self.try_clickhouse_func_arg_alias(this);
self.expect(TokenType::RParen)?;
Ok(Some(Expression::Trim(Box::new(TrimFunc {
this,
characters: Some(first_expr),
position: TrimPosition::Both,
sql_standard_syntax: true,
position_explicit: false,
}))))
} else if self.match_token(TokenType::Comma) {
let second_expr = self.parse_expression()?;
self.expect(TokenType::RParen)?;
let trim_pattern_first = matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Spark)
);
let (this, characters) = if trim_pattern_first {
(second_expr, first_expr)
} else {
(first_expr, second_expr)
};
Ok(Some(Expression::Trim(Box::new(TrimFunc {
this,
characters: Some(characters),
position: TrimPosition::Both,
sql_standard_syntax: false,
position_explicit: false,
}))))
} else {
self.expect(TokenType::RParen)?;
Ok(Some(Expression::Trim(Box::new(TrimFunc {
this: first_expr,
characters: None,
position: TrimPosition::Both,
sql_standard_syntax: false,
position_explicit: false,
}))))
}
}
}
(crate::function_registry::TypedParseKind::Variadic, "OVERLAY") => {
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
let args = self.parse_function_arguments()?;
self.expect(TokenType::RParen)?;
return Ok(Some(Expression::Function(Box::new(Function {
name: name.to_string(),
args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))));
}
let this = self.parse_expression()?;
if self.match_token(TokenType::Placing) {
let replacement = self.parse_expression()?;
self.expect(TokenType::From)?;
let from = self.parse_expression()?;
let length = if self.match_token(TokenType::For) {
Some(self.parse_expression()?)
} else {
None
};
self.expect(TokenType::RParen)?;
Ok(Some(Expression::Overlay(Box::new(OverlayFunc {
this,
replacement,
from,
length,
}))))
} else if self.match_token(TokenType::Comma) {
let replacement = self.parse_expression()?;
if self.match_token(TokenType::Comma) {
let from = self.parse_expression()?;
let length = if self.match_token(TokenType::Comma) {
Some(self.parse_expression()?)
} else {
None
};
self.expect(TokenType::RParen)?;
Ok(Some(Expression::Overlay(Box::new(OverlayFunc {
this,
replacement,
from,
length,
}))))
} else {
self.expect(TokenType::RParen)?;
Ok(Some(Expression::Function(Box::new(Function {
name: name.to_string(),
args: vec![this, replacement],
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))))
}
} else {
self.expect(TokenType::RParen)?;
Ok(Some(Expression::Function(Box::new(Function {
name: name.to_string(),
args: vec![this],
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))))
}
}
(crate::function_registry::TypedParseKind::Variadic, "CEIL") => {
let this = self.parse_expression()?;
// Check for TO unit syntax (Druid: CEIL(__time TO WEEK))
let to = if self.match_token(TokenType::To) {
// Parse the time unit as a variable/identifier
self.parse_var()?
} else {
None
};
let decimals = if to.is_none() && self.match_token(TokenType::Comma) {
Some(self.parse_expression()?)
} else {
None
};
self.expect(TokenType::RParen)?;
Ok(Some(Expression::Ceil(Box::new(CeilFunc {
this,
decimals,
to,
}))))
}
(crate::function_registry::TypedParseKind::Variadic, "TIMESTAMP_FROM_PARTS")
| (crate::function_registry::TypedParseKind::Variadic, "TIMESTAMP_NTZ_FROM_PARTS")
| (crate::function_registry::TypedParseKind::Variadic, "TIMESTAMP_LTZ_FROM_PARTS")
| (crate::function_registry::TypedParseKind::Variadic, "TIMESTAMP_TZ_FROM_PARTS")
| (crate::function_registry::TypedParseKind::Variadic, "DATE_FROM_PARTS")
| (crate::function_registry::TypedParseKind::Variadic, "TIME_FROM_PARTS") => {
let args = self.parse_expression_list()?;
self.expect(TokenType::RParen)?;
Ok(Some(Expression::Function(Box::new(Function {
name: name.to_string(),
args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))))
}
(crate::function_registry::TypedParseKind::CastLike, "TRY_CAST") => {
let this = self.parse_expression()?;
self.expect(TokenType::As)?;
let to = self.parse_data_type()?;
self.expect(TokenType::RParen)?;
Ok(Some(Expression::TryCast(Box::new(Cast {
this,
to,
trailing_comments: Vec::new(),
double_colon_syntax: false,
format: None,
default: None,
inferred_type: None,
}))))
}
(crate::function_registry::TypedParseKind::Conditional, "IF") => {
// ClickHouse: if() with zero args is valid in test queries
if self.check(TokenType::RParen) {
self.skip();
return Ok(Some(Expression::Function(Box::new(Function {
name: name.to_string(),
args: vec![],
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))));
}
let args = self.parse_expression_list()?;
self.expect(TokenType::RParen)?;
let expr = if args.len() == 3 {
Expression::IfFunc(Box::new(crate::expressions::IfFunc {
original_name: Some(upper_name.to_string()),
condition: args[0].clone(),
true_value: args[1].clone(),
false_value: Some(args[2].clone()),
inferred_type: None,
}))
} else if args.len() == 2 {
// IF with 2 args: condition, true_value (no false_value)
Expression::IfFunc(Box::new(crate::expressions::IfFunc {
original_name: Some(upper_name.to_string()),
condition: args[0].clone(),
true_value: args[1].clone(),
false_value: None,
inferred_type: None,
}))
} else {
return Err(self.parse_error("IF function requires 2 or 3 arguments"));
};
Ok(Some(expr))
}
_ => {
self.try_parse_registry_grouped_typed_family(name, upper_name, canonical_upper_name)
}
}
}
/// Route heavy typed-function families via registry metadata groups.
fn try_parse_registry_grouped_typed_family(
&mut self,
name: &str,
upper_name: &str,
canonical_upper_name: &str,
) -> Result<Option<Expression>> {
use crate::function_registry::TypedDispatchGroup;
match crate::function_registry::typed_dispatch_group_by_name_upper(canonical_upper_name) {
Some(TypedDispatchGroup::AggregateFamily) => self
.parse_typed_aggregate_family(name, upper_name, canonical_upper_name)
.map(Some),
Some(TypedDispatchGroup::WindowFamily) => self
.parse_typed_window_family(name, upper_name, canonical_upper_name)
.map(Some),
Some(TypedDispatchGroup::JsonFamily) => self
.parse_typed_json_family(name, upper_name, canonical_upper_name)
.map(Some),
Some(TypedDispatchGroup::TranslateTeradataFamily) => {
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Teradata)
) {
self.parse_typed_translate_teradata_family(
name,
upper_name,
canonical_upper_name,
)
.map(Some)
} else {
Ok(None)
}
}
None => Ok(None),
}
}
fn make_unquoted_function(name: &str, args: Vec<Expression>) -> Expression {
Expression::Function(Box::new(Function {
name: name.to_string(),
args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))
}
fn make_simple_aggregate(
name: &str,
args: Vec<Expression>,
distinct: bool,
filter: Option<Expression>,
) -> Expression {
Expression::AggregateFunction(Box::new(AggregateFunction {
name: name.to_string(),
args,
distinct,
filter,
order_by: Vec::new(),
limit: None,
ignore_nulls: None,
inferred_type: None,
}))
}
/// Parse phase-3 typed-function slices that are straightforward pass-throughs.
fn try_parse_phase3_typed_function(
&mut self,
name: &str,
_upper_name: &str,
canonical_upper_name: &str,
) -> Result<Option<Expression>> {
let Some(behavior) =
crate::function_registry::parser_dispatch_behavior_by_name_upper(canonical_upper_name)
else {
return Ok(None);
};
match behavior {
crate::function_registry::ParserDispatchBehavior::ExprListFunction => {
let args = self.parse_expression_list()?;
self.expect(TokenType::RParen)?;
Ok(Some(Self::make_unquoted_function(name, args)))
}
crate::function_registry::ParserDispatchBehavior::OptionalExprListFunction => {
let args = if self.check(TokenType::RParen) {
Vec::new()
} else {
self.parse_expression_list()?
};
self.expect(TokenType::RParen)?;
Ok(Some(Self::make_unquoted_function(name, args)))
}
crate::function_registry::ParserDispatchBehavior::FunctionArgumentsFunction => {
let args = self.parse_function_arguments()?;
self.expect(TokenType::RParen)?;
Ok(Some(Self::make_unquoted_function(name, args)))
}
crate::function_registry::ParserDispatchBehavior::ZeroArgFunction => {
self.expect(TokenType::RParen)?;
Ok(Some(Self::make_unquoted_function(name, Vec::new())))
}
crate::function_registry::ParserDispatchBehavior::ExprListMaybeAggregateByFilter => {
let args = if self.check(TokenType::RParen) {
Vec::new()
} else {
self.parse_expression_list()?
};
self.expect(TokenType::RParen)?;
let filter = self.parse_filter_clause()?;
if filter.is_some() {
Ok(Some(Self::make_simple_aggregate(name, args, false, filter)))
} else {
Ok(Some(Self::make_unquoted_function(name, args)))
}
}
crate::function_registry::ParserDispatchBehavior::ExprListMaybeAggregateByAggSuffix => {
let args = self.parse_expression_list()?;
self.expect(TokenType::RParen)?;
let filter = self.parse_filter_clause()?;
if canonical_upper_name.ends_with("_AGG") || filter.is_some() {
Ok(Some(Self::make_simple_aggregate(name, args, false, filter)))
} else {
Ok(Some(Self::make_unquoted_function(name, args)))
}
}
crate::function_registry::ParserDispatchBehavior::HashLike => {
let args = self.parse_expression_list()?;
self.expect(TokenType::RParen)?;
let filter = self.parse_filter_clause()?;
if canonical_upper_name == "HASH_AGG" || filter.is_some() {
Ok(Some(Self::make_simple_aggregate(name, args, false, filter)))
} else {
Ok(Some(Self::make_unquoted_function(name, args)))
}
}
crate::function_registry::ParserDispatchBehavior::HllAggregate => {
let distinct = self.match_token(TokenType::Distinct);
let args = if self.match_token(TokenType::Star) {
vec![Expression::Star(Star {
table: None,
except: None,
replace: None,
rename: None,
trailing_comments: Vec::new(),
span: None,
})]
} else if self.check(TokenType::RParen) {
Vec::new()
} else {
self.parse_expression_list()?
};
self.expect(TokenType::RParen)?;
let filter = self.parse_filter_clause()?;
Ok(Some(Self::make_simple_aggregate(
name, args, distinct, filter,
)))
}
crate::function_registry::ParserDispatchBehavior::PercentileAggregate => {
let distinct = self.match_token(TokenType::Distinct);
if !distinct {
self.match_token(TokenType::All);
}
let args = self.parse_expression_list()?;
self.expect(TokenType::RParen)?;
let filter = self.parse_filter_clause()?;
Ok(Some(Self::make_simple_aggregate(
name, args, distinct, filter,
)))
}
crate::function_registry::ParserDispatchBehavior::ExprListAggregate => {
let args = self.parse_expression_list()?;
self.expect(TokenType::RParen)?;
let filter = self.parse_filter_clause()?;
Ok(Some(Self::make_simple_aggregate(name, args, false, filter)))
}
crate::function_registry::ParserDispatchBehavior::UnaryAggregate => {
let this = self.parse_expression()?;
self.expect(TokenType::RParen)?;
let filter = self.parse_filter_clause()?;
Ok(Some(Self::make_simple_aggregate(
name,
vec![this],
false,
filter,
)))
}
crate::function_registry::ParserDispatchBehavior::TranslateNonTeradata => {
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Teradata)
) {
return Ok(None);
}
let args = self.parse_expression_list()?;
self.expect(TokenType::RParen)?;
Ok(Some(Self::make_unquoted_function(name, args)))
}
}
}
/// Parse a typed function call (after the opening paren)
/// Following Python SQLGlot pattern: match all function aliases to typed expressions
fn parse_typed_function(
&mut self,
name: &str,
upper_name: &str,
quoted: bool,
) -> Result<Expression> {
// ClickHouse bitOr/bitAnd/bitXor are scalar functions, not aggregate-family calls.
// Routing them directly to the generic parser avoids aggregate-specific recursion
// and matches the syntax used throughout the ClickHouse parser corpus.
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && matches!(
upper_name,
"BITOR" | "BITAND" | "BITXOR" | "BITSHIFTLEFT" | "BITSHIFTRIGHT"
) {
return self.parse_generic_function(name, quoted);
}
let canonical_upper_name =
crate::function_registry::canonical_typed_function_name_upper(upper_name);
// Handle internal function rewrites (sqlglot internal functions that map to CAST)
if canonical_upper_name == "TIME_TO_TIME_STR" {
let arg = self.parse_expression()?;
self.expect(TokenType::RParen)?;
return Ok(Expression::Cast(Box::new(Cast {
this: arg,
to: DataType::Text,
trailing_comments: Vec::new(),
double_colon_syntax: false,
format: None,
default: None,
inferred_type: None,
})));
}
if let Some(expr) =
self.try_parse_registry_typed_function(name, upper_name, canonical_upper_name, quoted)?
{
return Ok(expr);
}
if let Some(expr) =
self.try_parse_phase3_typed_function(name, upper_name, canonical_upper_name)?
{
return Ok(expr);
}
self.parse_generic_function(name, quoted)
}
fn parse_typed_aggregate_family(
&mut self,
name: &str,
upper_name: &str,
canonical_upper_name: &str,
) -> Result<Expression> {
match canonical_upper_name {
// COUNT function
"COUNT" => {
let (this, star, distinct) = if self.check(TokenType::RParen) {
(None, false, false)
} else if self.match_token(TokenType::Star) {
(None, true, false)
} else if self.match_token(TokenType::All) {
// COUNT(ALL expr) - ALL is the default, just consume it
(Some(self.parse_expression()?), false, false)
} else if self.match_token(TokenType::Distinct) {
let first_expr = self.parse_expression()?;
// Check for multiple columns: COUNT(DISTINCT a, b, c)
if self.match_token(TokenType::Comma) {
let mut args = vec![first_expr];
loop {
args.push(self.parse_expression()?);
if !self.match_token(TokenType::Comma) {
break;
}
}
// Return as a tuple expression for COUNT DISTINCT over multiple columns
(
Some(Expression::Tuple(Box::new(Tuple { expressions: args }))),
false,
true,
)
} else {
(Some(first_expr), false, true)
}
} else {
let first_expr = self.parse_expression()?;
// ClickHouse: consume optional AS alias inside function args (e.g., count(NULL AS a))
let first_expr = if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::As)
{
self.skip(); // consume AS
let alias = self.expect_identifier_or_keyword_with_quoted()?;
Expression::Alias(Box::new(Alias {
this: first_expr,
alias,
column_aliases: Vec::new(),
pre_alias_comments: Vec::new(),
trailing_comments: Vec::new(),
inferred_type: None,
}))
} else {
first_expr
};
// Check for multiple arguments (rare but possible)
if self.match_token(TokenType::Comma) {
let mut args = vec![first_expr];
loop {
args.push(self.parse_expression()?);
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
// Multiple args without DISTINCT - treat as generic function
return Ok(Expression::Function(Box::new(Function {
name: name.to_string(),
args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
})));
}
(Some(first_expr), false, false)
};
// BigQuery: RESPECT NULLS / IGNORE NULLS inside COUNT
let ignore_nulls = if self.match_token(TokenType::Ignore)
&& self.match_token(TokenType::Nulls)
{
Some(true)
} else if self.match_token(TokenType::Respect) && self.match_token(TokenType::Nulls)
{
Some(false)
} else {
None
};
self.expect(TokenType::RParen)?;
let filter = self.parse_filter_clause()?;
// Also check for IGNORE NULLS / RESPECT NULLS after the closing paren
let ignore_nulls = if ignore_nulls.is_some() {
ignore_nulls
} else if self.match_keywords(&[TokenType::Ignore, TokenType::Nulls]) {
Some(true)
} else if self.match_keywords(&[TokenType::Respect, TokenType::Nulls]) {
Some(false)
} else {
None
};
Ok(Expression::Count(Box::new(CountFunc {
this,
star,
distinct,
filter,
ignore_nulls,
original_name: Some(name.to_string()),
inferred_type: None,
})))
}
// LIST function: LIST(SELECT ...) in Materialize - list constructor with subquery
"LIST" => {
let is_materialize = matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Materialize)
);
if is_materialize && self.check(TokenType::Select) {
let query = self.parse_select()?;
self.expect(TokenType::RParen)?;
return Ok(Expression::List(Box::new(List {
expressions: vec![query],
})));
}
// For non-Materialize or non-subquery, parse as either generic function or aggregate.
let distinct = self.match_token(TokenType::Distinct);
let args = if self.check(TokenType::RParen) {
Vec::new()
} else {
self.parse_function_arguments()?
};
let order_by = if self.match_token(TokenType::Order) {
self.expect(TokenType::By)?;
self.parse_order_by_list()?
} else {
Vec::new()
};
let limit = if self.match_token(TokenType::Limit) {
Some(Box::new(self.parse_expression()?))
} else {
None
};
self.expect(TokenType::RParen)?;
let filter = self.parse_filter_clause()?;
if distinct || !order_by.is_empty() || limit.is_some() || filter.is_some() {
Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
name: name.to_string(),
args,
distinct,
filter,
order_by,
limit,
ignore_nulls: None,
inferred_type: None,
})))
} else {
Ok(Expression::Function(Box::new(Function {
name: name.to_string(),
args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
})))
}
}
// MAP function: MAP(SELECT ...) in Materialize - map constructor with subquery
"MAP" => {
let is_materialize = matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Materialize)
);
if is_materialize && self.check(TokenType::Select) {
let query = self.parse_select()?;
self.expect(TokenType::RParen)?;
return Ok(Expression::ToMap(Box::new(ToMap {
this: Box::new(query),
})));
}
// For non-Materialize or non-subquery, fall through to generic handling
let args = if self.check(TokenType::RParen) {
Vec::new()
} else {
self.parse_function_arguments()?
};
self.expect(TokenType::RParen)?;
Ok(Expression::Function(Box::new(Function {
name: name.to_string(),
args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
})))
}
// ARRAY function: ARRAY(SELECT ...) or ARRAY((SELECT ...) LIMIT n) is an array constructor with subquery
// Different from ARRAY<type> which is a data type
"ARRAY" => {
// Check if this is ARRAY(SELECT ...) - array subquery constructor
if self.check(TokenType::Select) {
let query = self.parse_select()?;
self.expect(TokenType::RParen)?;
// Pass the query directly as an argument to ARRAY function
// The generator will handle it correctly
return Ok(Expression::Function(Box::new(Function {
name: name.to_string(),
args: vec![query],
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
})));
}
// Check if this is ARRAY((SELECT ...) LIMIT n) - BigQuery allows LIMIT outside the subquery parens
// This is common for constructs like ARRAY((SELECT AS STRUCT ...) LIMIT 10)
if self.check(TokenType::LParen) {
// This could be a parenthesized subquery with modifiers after it
// Save position in case we need to backtrack
let saved_pos = self.current;
self.skip(); // consume opening paren
// Check if there's a SELECT or WITH inside
if self.check(TokenType::Select) || self.check(TokenType::With) {
let inner_query = self.parse_statement()?;
self.expect(TokenType::RParen)?; // close inner parens
// Now check for LIMIT/OFFSET modifiers outside the inner parens
let limit = if self.match_token(TokenType::Limit) {
let expr = self.parse_expression()?;
Some(Limit {
this: expr,
percent: false,
comments: Vec::new(),
})
} else {
None
};
let offset = if self.match_token(TokenType::Offset) {
let expr = self.parse_expression()?;
let rows = if self.match_token(TokenType::Row)
|| self.match_token(TokenType::Rows)
{
Some(true)
} else {
None
};
Some(Offset { this: expr, rows })
} else {
None
};
self.expect(TokenType::RParen)?; // close ARRAY parens
// Wrap the inner query in a Subquery with the modifiers
let subquery = Expression::Subquery(Box::new(Subquery {
this: inner_query,
alias: None,
column_aliases: Vec::new(),
order_by: None,
limit,
offset,
lateral: false,
modifiers_inside: false,
trailing_comments: Vec::new(),
distribute_by: None,
sort_by: None,
cluster_by: None,
inferred_type: None,
}));
return Ok(Expression::Function(Box::new(Function {
name: name.to_string(),
args: vec![subquery],
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
})));
} else {
// Not a subquery, backtrack and parse as regular arguments
self.current = saved_pos;
}
}
// Otherwise fall through to parse as generic function or error
// This could be ARRAY(...values...) or invalid syntax
let args = if self.check(TokenType::RParen) {
Vec::new()
} else {
self.parse_function_arguments()?
};
self.expect(TokenType::RParen)?;
Ok(Expression::Function(Box::new(Function {
name: name.to_string(),
args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
})))
}
// Simple aggregate functions (SUM, AVG, MIN, MAX, etc.)
// These can have multiple arguments in some contexts (e.g., MAX(a, b) is a scalar function)
"SUM"
| "AVG"
| "MIN"
| "MAX"
| "ARRAY_AGG"
| "ARRAY_CONCAT_AGG"
| "STDDEV"
| "STDDEV_POP"
| "STDDEV_SAMP"
| "VARIANCE"
| "VAR_POP"
| "VAR_SAMP"
| "MEDIAN"
| "MODE"
| "FIRST"
| "LAST"
| "ANY_VALUE"
| "APPROX_DISTINCT"
| "APPROX_COUNT_DISTINCT"
| "BIT_AND"
| "BIT_OR"
| "BIT_XOR" => {
let distinct = if self.match_token(TokenType::Distinct) {
true
} else {
self.match_token(TokenType::All); // ALL is the default, just consume it
false
};
// MODE() can have zero arguments when used with WITHIN GROUP
// e.g., MODE() WITHIN GROUP (ORDER BY col)
if self.check(TokenType::RParen) {
// Empty args - will likely be followed by WITHIN GROUP
self.expect(TokenType::RParen)?;
let filter = self.parse_filter_clause()?;
let agg = AggFunc {
ignore_nulls: None,
this: Expression::Null(Null {}), // Placeholder for 0-arg aggregate
distinct: false,
filter,
order_by: Vec::new(),
having_max: None,
name: Some(name.to_string()),
limit: None,
inferred_type: None,
};
return Ok(match upper_name {
"MODE" => Expression::Mode(Box::new(agg)),
_ => {
// ClickHouse: allow zero-arg aggregates (server will validate)
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
Expression::Function(Box::new(Function {
name: name.to_string(),
args: Vec::new(),
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))
} else {
return Err(self.parse_error(format!(
"{} cannot have zero arguments",
upper_name
)));
}
}
});
}
let first_arg = self.parse_expression_with_clickhouse_alias()?;
// Check if there are more arguments (multi-arg scalar function like MAX(a, b))
if self.match_token(TokenType::Comma) {
// Special handling for FIRST, LAST, ANY_VALUE with boolean second arg
// In Spark/Hive: first(col, true) means FIRST(col) IGNORE NULLS
let is_ignore_nulls_func = matches!(upper_name, "FIRST" | "LAST" | "ANY_VALUE");
let second_arg = self.parse_expression()?;
// Check if this is the IGNORE NULLS pattern: func(col, true)
if is_ignore_nulls_func && self.check(TokenType::RParen) {
if let Expression::Boolean(BooleanLiteral { value: true }) = &second_arg {
// This is func(col, true) -> FUNC(col) IGNORE NULLS
self.expect(TokenType::RParen)?;
let filter = self.parse_filter_clause()?;
let agg = AggFunc {
ignore_nulls: Some(true),
this: first_arg,
distinct,
filter,
order_by: Vec::new(),
having_max: None,
name: Some(name.to_string()),
limit: None,
inferred_type: None,
};
return Ok(match upper_name {
"FIRST" => Expression::First(Box::new(agg)),
"LAST" => Expression::Last(Box::new(agg)),
"ANY_VALUE" => Expression::AnyValue(Box::new(agg)),
_ => unreachable!(
"function name already matched by is_ignore_nulls_func guard"
),
});
}
}
// Multiple arguments - treat as generic function call
let mut args = vec![first_arg, second_arg];
while self.match_token(TokenType::Comma) {
args.push(self.parse_expression()?);
}
self.expect(TokenType::RParen)?;
Ok(Expression::Function(Box::new(Function {
name: name.to_string(),
args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
})))
} else {
// Check for IGNORE NULLS / RESPECT NULLS (BigQuery style)
let ignore_nulls = if self.match_token(TokenType::Ignore)
&& self.match_token(TokenType::Nulls)
{
Some(true)
} else if self.match_token(TokenType::Respect)
&& self.match_token(TokenType::Nulls)
{
Some(false)
} else {
None
};
// Check for HAVING MAX/MIN inside aggregate (BigQuery syntax)
// e.g., ANY_VALUE(fruit HAVING MAX sold)
let having_max = if self.match_token(TokenType::Having) {
let is_max = if self.check_keyword_text("MAX") {
self.skip();
true
} else if self.check_keyword_text("MIN") {
self.skip();
false
} else {
return Err(
self.parse_error("Expected MAX or MIN after HAVING in aggregate")
);
};
let expr = self.parse_expression()?;
Some((Box::new(expr), is_max))
} else {
None
};
// Check for ORDER BY inside aggregate (e.g., ARRAY_AGG(x ORDER BY y))
let order_by = if self.match_keywords(&[TokenType::Order, TokenType::By]) {
self.parse_order_by_list()?
} else {
Vec::new()
};
// Check for LIMIT inside aggregate (e.g., ARRAY_AGG(x ORDER BY y LIMIT 2))
// Also supports LIMIT offset, count (e.g., ARRAY_AGG(x ORDER BY y LIMIT 1, 10))
let limit = if self.match_token(TokenType::Limit) {
let first = self.parse_expression()?;
if self.match_token(TokenType::Comma) {
let second = self.parse_expression()?;
// Store as Tuple(offset, count)
Some(Box::new(Expression::Tuple(Box::new(Tuple {
expressions: vec![first, second],
}))))
} else {
Some(Box::new(first))
}
} else {
None
};
// Single argument - treat as aggregate function
self.expect(TokenType::RParen)?;
let filter = self.parse_filter_clause()?;
// Also check for IGNORE NULLS / RESPECT NULLS after the closing paren
// e.g., FIRST(col) IGNORE NULLS (Hive/Spark/generic SQL syntax)
let ignore_nulls = if ignore_nulls.is_some() {
ignore_nulls
} else if self.match_keywords(&[TokenType::Ignore, TokenType::Nulls]) {
Some(true)
} else if self.match_keywords(&[TokenType::Respect, TokenType::Nulls]) {
Some(false)
} else {
None
};
let agg = AggFunc {
ignore_nulls,
this: first_arg,
distinct,
filter,
order_by,
having_max,
name: Some(name.to_string()),
limit,
inferred_type: None,
};
Ok(match upper_name {
"SUM" => Expression::Sum(Box::new(agg)),
"AVG" => Expression::Avg(Box::new(agg)),
"MIN" => Expression::Min(Box::new(agg)),
"MAX" => Expression::Max(Box::new(agg)),
"ARRAY_AGG" => Expression::ArrayAgg(Box::new(agg)),
"ARRAY_CONCAT_AGG" => Expression::ArrayConcatAgg(Box::new(agg)),
"STDDEV" => Expression::Stddev(Box::new(agg)),
"STDDEV_POP" => Expression::StddevPop(Box::new(agg)),
"STDDEV_SAMP" => Expression::StddevSamp(Box::new(agg)),
"VARIANCE" => Expression::Variance(Box::new(agg)),
"VAR_POP" => Expression::VarPop(Box::new(agg)),
"VAR_SAMP" => Expression::VarSamp(Box::new(agg)),
"MEDIAN" => Expression::Median(Box::new(agg)),
"MODE" => Expression::Mode(Box::new(agg)),
"FIRST" => Expression::First(Box::new(agg)),
"LAST" => Expression::Last(Box::new(agg)),
"ANY_VALUE" => Expression::AnyValue(Box::new(agg)),
"APPROX_DISTINCT" => Expression::ApproxDistinct(Box::new(agg)),
"APPROX_COUNT_DISTINCT" => Expression::ApproxCountDistinct(Box::new(agg)),
"BIT_AND" => Expression::BitwiseAndAgg(Box::new(agg)),
"BIT_OR" => Expression::BitwiseOrAgg(Box::new(agg)),
"BIT_XOR" => Expression::BitwiseXorAgg(Box::new(agg)),
_ => unreachable!("aggregate function name already matched in caller"),
})
}
}
// STRING_AGG - STRING_AGG([DISTINCT] expr [, separator] [ORDER BY order_list])
"STRING_AGG" => {
let distinct = self.match_token(TokenType::Distinct);
let this = self.parse_expression()?;
// Separator is optional
let separator = if self.match_token(TokenType::Comma) {
Some(self.parse_expression()?)
} else {
None
};
let order_by = if self.match_keywords(&[TokenType::Order, TokenType::By]) {
Some(self.parse_order_by_list()?)
} else {
None
};
// BigQuery: LIMIT inside STRING_AGG
let limit = if self.match_token(TokenType::Limit) {
Some(Box::new(self.parse_expression()?))
} else {
None
};
self.expect(TokenType::RParen)?;
let filter = self.parse_filter_clause()?;
Ok(Expression::StringAgg(Box::new(StringAggFunc {
this,
separator,
order_by,
distinct,
filter,
limit,
inferred_type: None,
})))
}
// GROUP_CONCAT - GROUP_CONCAT([DISTINCT] expr [, expr...] [ORDER BY order_list] [SEPARATOR 'sep'])
// MySQL allows multiple args which get wrapped in CONCAT:
// GROUP_CONCAT(a, b, c SEPARATOR ',') -> GroupConcat(CONCAT(a, b, c), SEPARATOR=',')
"GROUP_CONCAT" => {
let distinct = self.match_token(TokenType::Distinct);
let first = self.parse_expression()?;
// Check for additional comma-separated expressions (before ORDER BY or SEPARATOR)
let mut exprs = vec![first];
while self.match_token(TokenType::Comma) {
// Check if the next tokens are ORDER BY or SEPARATOR
// If so, the comma was part of the separator syntax (not more args)
if self.check(TokenType::Order) || self.check(TokenType::Separator) {
// This shouldn't happen normally in valid SQL, backtrack
break;
}
exprs.push(self.parse_expression()?);
}
// If multiple expressions, wrap in CONCAT (matches Python sqlglot behavior)
let this = if exprs.len() == 1 {
exprs.pop().unwrap()
} else {
Expression::Function(Box::new(Function::new("CONCAT".to_string(), exprs)))
};
// Parse optional ORDER BY
let order_by = if self.match_keywords(&[TokenType::Order, TokenType::By]) {
Some(self.parse_order_by_list()?)
} else {
None
};
// Parse optional SEPARATOR - can be a string literal or expression (e.g., variable)
let separator = if self.match_token(TokenType::Separator) {
Some(self.parse_expression()?)
} else {
None
};
// Parse optional LIMIT (MySQL 8.0.19+)
let limit = if self.match_token(TokenType::Limit) {
Some(Box::new(self.parse_expression()?))
} else {
None
};
self.expect(TokenType::RParen)?;
let filter = self.parse_filter_clause()?;
Ok(Expression::GroupConcat(Box::new(GroupConcatFunc {
this,
separator,
order_by,
distinct,
filter,
limit,
inferred_type: None,
})))
}
// LISTAGG - LISTAGG([DISTINCT] expr [, separator [ON OVERFLOW ...]]) WITHIN GROUP (ORDER BY ...)
"LISTAGG" => {
// Check for optional DISTINCT
let distinct = self.match_token(TokenType::Distinct);
let this = self.parse_expression()?;
let separator = if self.match_token(TokenType::Comma) {
Some(self.parse_expression()?)
} else {
None
};
// Parse optional ON OVERFLOW clause
let on_overflow = if self.match_token(TokenType::On) {
if self.match_identifier("OVERFLOW") {
if self.match_identifier("ERROR") {
Some(ListAggOverflow::Error)
} else if self.match_token(TokenType::Truncate) {
// Optional filler string
let filler = if self.check(TokenType::String) {
Some(self.parse_expression()?)
} else {
None
};
// WITH COUNT or WITHOUT COUNT
let with_count = if self.match_token(TokenType::With) {
self.match_identifier("COUNT");
true
} else if self.match_identifier("WITHOUT") {
self.match_identifier("COUNT");
false
} else {
true // default is WITH COUNT
};
Some(ListAggOverflow::Truncate { filler, with_count })
} else {
None
}
} else {
None
}
} else {
None
};
self.expect(TokenType::RParen)?;
// WITHIN GROUP (ORDER BY ...) is handled by maybe_parse_over
Ok(Expression::ListAgg(Box::new(ListAggFunc {
this,
separator,
on_overflow,
order_by: None,
distinct,
filter: None,
inferred_type: None,
})))
}
_ => unreachable!(
"phase-6 aggregate parser called with non-aggregate family name '{}'",
canonical_upper_name
),
}
}
fn parse_typed_window_family(
&mut self,
name: &str,
upper_name: &str,
canonical_upper_name: &str,
) -> Result<Expression> {
match canonical_upper_name {
// Window functions with no arguments (ClickHouse allows args in row_number)
"ROW_NUMBER" => {
if self.check(TokenType::RParen) {
self.skip();
Ok(Expression::RowNumber(RowNumber))
} else {
// ClickHouse: row_number(column1) — parse as regular function
let args = self.parse_function_args_list()?;
self.expect(TokenType::RParen)?;
let trailing_comments = self.previous_trailing_comments().to_vec();
Ok(Expression::Function(Box::new(Function {
name: name.to_string(),
args,
distinct: false,
trailing_comments,
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
})))
}
}
"RANK" => {
// DuckDB allows: RANK(ORDER BY col) OVER (...)
// Oracle allows: RANK(val1, val2, ...) WITHIN GROUP (ORDER BY ...)
let (order_by, args) = if self.check(TokenType::RParen) {
// RANK() - no arguments
(None, Vec::new())
} else if self.match_token(TokenType::Order) {
// DuckDB: RANK(ORDER BY col)
self.expect(TokenType::By)?;
(Some(self.parse_order_by()?.expressions), Vec::new())
} else {
// Oracle hypothetical: RANK(val1, val2, ...)
let mut args = vec![self.parse_expression()?];
while self.match_token(TokenType::Comma) {
args.push(self.parse_expression()?);
}
(None, args)
};
self.expect(TokenType::RParen)?;
Ok(Expression::Rank(Rank { order_by, args }))
}
"DENSE_RANK" => {
// Oracle allows: DENSE_RANK(val1, val2, ...) WITHIN GROUP (ORDER BY ...)
let args = if self.check(TokenType::RParen) {
Vec::new()
} else {
let mut args = vec![self.parse_expression()?];
while self.match_token(TokenType::Comma) {
args.push(self.parse_expression()?);
}
args
};
self.expect(TokenType::RParen)?;
Ok(Expression::DenseRank(DenseRank { args }))
}
"PERCENT_RANK" => {
// DuckDB allows: PERCENT_RANK(ORDER BY col) OVER (...)
// Oracle allows: PERCENT_RANK(val1, val2, ...) WITHIN GROUP (ORDER BY ...)
let (order_by, args) = if self.check(TokenType::RParen) {
// PERCENT_RANK() - no arguments
(None, Vec::new())
} else if self.match_token(TokenType::Order) {
// DuckDB: PERCENT_RANK(ORDER BY col)
self.expect(TokenType::By)?;
(Some(self.parse_order_by()?.expressions), Vec::new())
} else {
// Oracle hypothetical: PERCENT_RANK(val1, val2, ...)
let mut args = vec![self.parse_expression()?];
while self.match_token(TokenType::Comma) {
args.push(self.parse_expression()?);
}
(None, args)
};
self.expect(TokenType::RParen)?;
Ok(Expression::PercentRank(PercentRank { order_by, args }))
}
"CUME_DIST" => {
// DuckDB allows: CUME_DIST(ORDER BY col) OVER (...)
// Oracle allows: CUME_DIST(val1, val2, ...) WITHIN GROUP (ORDER BY ...)
let (order_by, args) = if self.check(TokenType::RParen) {
// CUME_DIST() - no arguments
(None, Vec::new())
} else if self.match_token(TokenType::Order) {
// DuckDB: CUME_DIST(ORDER BY col)
self.expect(TokenType::By)?;
(Some(self.parse_order_by()?.expressions), Vec::new())
} else {
// Oracle hypothetical: CUME_DIST(val1, val2, ...)
let mut args = vec![self.parse_expression()?];
while self.match_token(TokenType::Comma) {
args.push(self.parse_expression()?);
}
(None, args)
};
self.expect(TokenType::RParen)?;
Ok(Expression::CumeDist(CumeDist { order_by, args }))
}
// NTILE
"NTILE" => {
// num_buckets is optional (Databricks allows NTILE() with no args)
let num_buckets = if self.check(TokenType::RParen) {
None
} else {
Some(self.parse_expression()?)
};
// ClickHouse: NTILE can have extra args (e.g., ntile(3, 2)) — skip them
while matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.match_token(TokenType::Comma)
{
let _ = self.parse_expression()?;
}
// DuckDB allows: NTILE(n ORDER BY col) OVER (...)
let order_by = if self.match_token(TokenType::Order) {
self.expect(TokenType::By)?;
Some(self.parse_order_by()?.expressions)
} else {
None
};
self.expect(TokenType::RParen)?;
Ok(Expression::NTile(Box::new(NTileFunc {
num_buckets,
order_by,
})))
}
// LEAD / LAG
"LEAD" | "LAG" => {
let this = self.parse_expression()?;
let (offset, default) = if self.match_token(TokenType::Comma) {
let off = self.parse_expression()?;
let def = if self.match_token(TokenType::Comma) {
Some(self.parse_expression()?)
} else {
None
};
(Some(off), def)
} else {
(None, None)
};
// Check for IGNORE NULLS / RESPECT NULLS inside parens (e.g., Redshift: LAG(x IGNORE NULLS))
let ignore_nulls_inside = if self.match_token(TokenType::Ignore)
&& self.match_token(TokenType::Nulls)
{
Some(true)
} else if self.match_token(TokenType::Respect) && self.match_token(TokenType::Nulls)
{
Some(false)
} else {
None
};
self.expect(TokenType::RParen)?;
// Also check for IGNORE NULLS / RESPECT NULLS after parens
let ignore_nulls = if ignore_nulls_inside.is_some() {
ignore_nulls_inside
} else if self.match_keywords(&[TokenType::Ignore, TokenType::Nulls]) {
Some(true)
} else if self.match_keywords(&[TokenType::Respect, TokenType::Nulls]) {
Some(false)
} else {
None
};
let func = LeadLagFunc {
this,
offset,
default,
ignore_nulls,
};
Ok(if upper_name == "LEAD" {
Expression::Lead(Box::new(func))
} else {
Expression::Lag(Box::new(func))
})
}
// FIRST_VALUE / LAST_VALUE
"FIRST_VALUE" | "LAST_VALUE" => {
let this = self.parse_expression()?;
// Parse ORDER BY inside parens (e.g., DuckDB: LAST_VALUE(x ORDER BY x))
let order_by = if self.match_keywords(&[TokenType::Order, TokenType::By]) {
self.parse_order_by_list()?
} else {
Vec::new()
};
// Check for IGNORE NULLS / RESPECT NULLS inside the parens
let mut ignore_nulls_inside = if self.match_token(TokenType::Ignore)
&& self.match_token(TokenType::Nulls)
{
Some(true)
} else if self.match_token(TokenType::Respect) && self.match_token(TokenType::Nulls)
{
Some(false) // RESPECT NULLS explicitly sets to false
} else {
None
};
// Spark/Hive: first_value(col, true) means FIRST_VALUE(col) IGNORE NULLS
if ignore_nulls_inside.is_none() && self.match_token(TokenType::Comma) {
let second_arg = self.parse_expression()?;
if let Expression::Boolean(BooleanLiteral { value: true }) = &second_arg {
ignore_nulls_inside = Some(true);
}
// If second arg is not true, just ignore it (not standard)
}
self.expect(TokenType::RParen)?;
// Also check for IGNORE NULLS / RESPECT NULLS after the parens (some dialects use this syntax)
let ignore_nulls: Option<bool> = if ignore_nulls_inside.is_some() {
ignore_nulls_inside
} else if self.match_keywords(&[TokenType::Ignore, TokenType::Nulls]) {
Some(true)
} else if self.match_keywords(&[TokenType::Respect, TokenType::Nulls]) {
Some(false)
} else {
None
};
let func = ValueFunc {
this,
ignore_nulls,
order_by,
};
Ok(if upper_name == "FIRST_VALUE" {
Expression::FirstValue(Box::new(func))
} else {
Expression::LastValue(Box::new(func))
})
}
// NTH_VALUE
"NTH_VALUE" => {
let this = self.parse_expression()?;
self.expect(TokenType::Comma)?;
let offset = self.parse_expression()?;
// Check for IGNORE NULLS / RESPECT NULLS inside the parens
let ignore_nulls_inside = if self.match_token(TokenType::Ignore)
&& self.match_token(TokenType::Nulls)
{
Some(true)
} else if self.match_token(TokenType::Respect) && self.match_token(TokenType::Nulls)
{
Some(false)
} else {
None
};
self.expect(TokenType::RParen)?;
// Check for Snowflake FROM FIRST / FROM LAST after the parens
let from_first = if self.match_keywords(&[TokenType::From, TokenType::First]) {
Some(true)
} else if self.match_keywords(&[TokenType::From, TokenType::Last]) {
Some(false)
} else {
None
};
// Also check for IGNORE NULLS / RESPECT NULLS after the parens (and after FROM FIRST/LAST)
let ignore_nulls: Option<bool> = if ignore_nulls_inside.is_some() {
ignore_nulls_inside
} else if self.match_keywords(&[TokenType::Ignore, TokenType::Nulls]) {
Some(true)
} else if self.match_keywords(&[TokenType::Respect, TokenType::Nulls]) {
Some(false)
} else {
None
};
Ok(Expression::NthValue(Box::new(NthValueFunc {
this,
offset,
ignore_nulls,
from_first,
})))
}
_ => unreachable!(
"phase-6 window parser called with non-window family name '{}'",
canonical_upper_name
),
}
}
fn parse_typed_json_family(
&mut self,
name: &str,
upper_name: &str,
canonical_upper_name: &str,
) -> Result<Expression> {
match canonical_upper_name {
// JSON functions
"JSON_EXTRACT" | "JSON_EXTRACT_SCALAR" | "JSON_QUERY" | "JSON_VALUE" => {
let this = self.parse_expression()?;
// Path is optional for some dialects (e.g., TSQL JSON_QUERY with 1 arg defaults to '$')
let path = if self.match_token(TokenType::Comma) {
self.parse_expression()?
} else {
// Default path is '$' when not provided
Expression::Literal(Box::new(Literal::String("$".to_string())))
};
// SQLite JSON_EXTRACT supports multiple paths - check for additional paths
// If multiple paths, use generic Function instead of typed expression
if self.check(TokenType::Comma)
&& !self.check_identifier("WITH")
&& !self.check_identifier("WITHOUT")
&& !self.check_identifier("KEEP")
&& !self.check_identifier("OMIT")
&& !self.check_identifier("NULL")
&& !self.check_identifier("ERROR")
&& !self.check_identifier("EMPTY")
&& !self.check(TokenType::Returning)
{
let mut args = vec![this, path];
while self.match_token(TokenType::Comma) {
args.push(self.parse_expression()?);
}
self.expect(TokenType::RParen)?;
let func_expr = Expression::Function(Box::new(Function {
name: name.to_string(),
args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}));
// Exasol: JSON_EXTRACT(...) EMITS (col1 TYPE1, col2 TYPE2)
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Exasol)
) && self.check_identifier("EMITS")
{
self.skip(); // consume EMITS
if let Some(schema) = self.parse_schema()? {
return Ok(Expression::FunctionEmits(Box::new(FunctionEmits {
this: func_expr,
emits: schema,
})));
}
}
return Ok(func_expr);
}
// Parse JSON_QUERY/JSON_VALUE options (Trino/Presto style)
// Options: WITH/WITHOUT [CONDITIONAL|UNCONDITIONAL] [ARRAY] WRAPPER
// KEEP QUOTES / OMIT QUOTES [ON SCALAR STRING]
// NULL ON ERROR / ERROR ON ERROR / EMPTY ON ERROR
// RETURNING type
let mut wrapper_option: Option<String> = None;
let mut quotes_option: Option<String> = None;
let mut on_scalar_string = false;
let mut on_error: Option<String> = None;
let mut returning: Option<DataType> = None;
// Keep parsing options until we see RParen
while !self.check(TokenType::RParen) {
// WITH [CONDITIONAL|UNCONDITIONAL] [ARRAY] WRAPPER - match in order of specificity
if self.match_text_seq(&["WITH", "UNCONDITIONAL", "ARRAY", "WRAPPER"]) {
wrapper_option = Some("WITH UNCONDITIONAL ARRAY WRAPPER".to_string());
} else if self.match_text_seq(&["WITH", "CONDITIONAL", "ARRAY", "WRAPPER"]) {
wrapper_option = Some("WITH CONDITIONAL ARRAY WRAPPER".to_string());
} else if self.match_text_seq(&["WITH", "UNCONDITIONAL", "WRAPPER"]) {
wrapper_option = Some("WITH UNCONDITIONAL WRAPPER".to_string());
} else if self.match_text_seq(&["WITH", "CONDITIONAL", "WRAPPER"]) {
wrapper_option = Some("WITH CONDITIONAL WRAPPER".to_string());
} else if self.match_text_seq(&["WITH", "ARRAY", "WRAPPER"]) {
wrapper_option = Some("WITH ARRAY WRAPPER".to_string());
} else if self.match_text_seq(&["WITH", "WRAPPER"]) {
wrapper_option = Some("WITH WRAPPER".to_string());
// WITHOUT [CONDITIONAL] [ARRAY] WRAPPER
} else if self.match_text_seq(&["WITHOUT", "CONDITIONAL", "ARRAY", "WRAPPER"]) {
wrapper_option = Some("WITHOUT CONDITIONAL ARRAY WRAPPER".to_string());
} else if self.match_text_seq(&["WITHOUT", "CONDITIONAL", "WRAPPER"]) {
wrapper_option = Some("WITHOUT CONDITIONAL WRAPPER".to_string());
} else if self.match_text_seq(&["WITHOUT", "ARRAY", "WRAPPER"]) {
wrapper_option = Some("WITHOUT ARRAY WRAPPER".to_string());
} else if self.match_text_seq(&["WITHOUT", "WRAPPER"]) {
wrapper_option = Some("WITHOUT WRAPPER".to_string());
} else if self.match_text_seq(&["KEEP", "QUOTES"]) {
// KEEP QUOTES
quotes_option = Some("KEEP QUOTES".to_string());
} else if self.match_text_seq(&["OMIT", "QUOTES", "ON", "SCALAR", "STRING"]) {
// OMIT QUOTES ON SCALAR STRING
quotes_option = Some("OMIT QUOTES".to_string());
on_scalar_string = true;
} else if self.match_text_seq(&["OMIT", "QUOTES"]) {
// OMIT QUOTES
quotes_option = Some("OMIT QUOTES".to_string());
} else if self.match_text_seq(&["NULL", "ON", "ERROR"]) {
on_error = Some("NULL ON ERROR".to_string());
} else if self.match_text_seq(&["ERROR", "ON", "ERROR"]) {
on_error = Some("ERROR ON ERROR".to_string());
} else if self.match_text_seq(&["EMPTY", "ON", "ERROR"]) {
on_error = Some("EMPTY ON ERROR".to_string());
} else if self.match_token(TokenType::Returning) {
// RETURNING type
returning = Some(self.parse_data_type()?);
} else {
// No more options recognized, break
break;
}
}
self.expect(TokenType::RParen)?;
let func = JsonExtractFunc {
this,
path,
returning,
arrow_syntax: false,
hash_arrow_syntax: false,
wrapper_option,
quotes_option,
on_scalar_string,
on_error,
};
Ok(match upper_name {
"JSON_EXTRACT" => Expression::JsonExtract(Box::new(func)),
"JSON_EXTRACT_SCALAR" => Expression::JsonExtractScalar(Box::new(func)),
"JSON_QUERY" => Expression::JsonQuery(Box::new(func)),
"JSON_VALUE" => Expression::JsonValue(Box::new(func)),
_ => unreachable!("JSON function name already matched in caller"),
})
}
// JSON_KEYS, TO_JSON, PARSE_JSON etc. support additional args including named args (BigQuery)
// e.g., JSON_KEYS(expr, depth, mode => 'lax'), TO_JSON(expr, stringify_wide_numbers => FALSE)
// e.g., PARSE_JSON('{}', wide_number_mode => 'exact')
"JSON_ARRAY_LENGTH" | "JSON_KEYS" | "JSON_TYPE" | "TO_JSON" | "PARSE_JSON" => {
let this = self.parse_expression()?;
// ClickHouse: expr AS alias inside function args
let this = self.maybe_clickhouse_alias(this);
// Check for additional arguments (comma-separated, possibly named)
if self.match_token(TokenType::Comma) {
// Has additional arguments - parse as generic Function to preserve all args
let mut all_args = vec![this];
let remaining = self.parse_function_arguments()?;
all_args.extend(remaining);
self.expect(TokenType::RParen)?;
Ok(Expression::Function(Box::new(Function {
name: name.to_string(),
args: all_args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
})))
} else {
// Single argument - use typed expression
self.expect(TokenType::RParen)?;
let func = UnaryFunc::new(this);
Ok(match canonical_upper_name {
"JSON_ARRAY_LENGTH" => Expression::JsonArrayLength(Box::new(func)),
"JSON_KEYS" => Expression::JsonKeys(Box::new(func)),
"JSON_TYPE" => Expression::JsonType(Box::new(func)),
"TO_JSON" => Expression::ToJson(Box::new(func)),
"PARSE_JSON" => Expression::ParseJson(Box::new(func)),
_ => unreachable!("JSON function name already matched in caller"),
})
}
}
// JSON_OBJECT with SQL standard syntax: JSON_OBJECT('key': value, ...) or JSON_OBJECT(*)
"JSON_OBJECT" => {
let mut pairs = Vec::new();
let mut star = false;
if !self.check(TokenType::RParen) {
// Check for JSON_OBJECT(*) syntax
if self.check(TokenType::Star) && self.check_next(TokenType::RParen) {
self.skip(); // consume *
star = true;
} else {
loop {
// Check for KEY keyword for KEY 'key' IS value syntax (KEY is a keyword token)
let has_key_keyword = self.match_token(TokenType::Key);
// Parse key: try string first (for 'key' syntax), then column
let key = if let Some(s) = self.parse_string()? {
s
} else {
// Use parse_primary to handle function calls (ARRAY_AGG, CAST,
// f(x)) as well as simple columns. parse_primary does NOT call
// parse_postfix_operators, so a trailing ':' remains as a
// key/value separator and is not consumed as JSON path.
self.parse_primary()?
};
// Support colon, VALUE keyword (identifier), and IS keyword (for KEY...IS syntax)
let has_separator = self.match_token(TokenType::Colon)
|| self.match_identifier("VALUE")
|| (has_key_keyword && self.match_token(TokenType::Is));
if has_separator {
let value = self.parse_bitwise()?.ok_or_else(|| {
self.parse_error("Expected value expression in JSON_OBJECT")
})?;
// Check for FORMAT JSON after value
let value_with_format = if self.match_text_seq(&["FORMAT", "JSON"])
{
Expression::JSONFormat(Box::new(JSONFormat {
this: Some(Box::new(value)),
options: Vec::new(),
is_json: None,
to_json: None,
}))
} else {
value
};
pairs.push((key, value_with_format));
} else {
// Just key/value pairs without separator
if self.match_token(TokenType::Comma) {
let value = self.parse_bitwise()?.ok_or_else(|| {
self.parse_error("Expected value expression in JSON_OBJECT")
})?;
pairs.push((key, value));
} else {
return Err(self
.parse_error("Expected value expression in JSON_OBJECT"));
}
}
if !self.match_token(TokenType::Comma) {
break;
}
}
}
}
// Parse optional modifiers: NULL ON NULL, ABSENT ON NULL, WITH UNIQUE KEYS
let null_handling = if self.match_token(TokenType::Null) {
self.match_token(TokenType::On);
self.match_token(TokenType::Null);
Some(JsonNullHandling::NullOnNull)
} else if self.match_identifier("ABSENT") {
self.match_token(TokenType::On);
self.match_token(TokenType::Null);
Some(JsonNullHandling::AbsentOnNull)
} else {
None
};
let with_unique_keys = if self.match_token(TokenType::With) {
self.match_token(TokenType::Unique);
self.match_identifier("KEYS");
true
} else {
false
};
// Parse optional RETURNING clause: RETURNING type [FORMAT JSON] [ENCODING encoding]
let (returning_type, format_json, encoding) = if self
.match_token(TokenType::Returning)
{
let return_type = self.parse_data_type()?;
// Optional FORMAT JSON
let has_format_json = if self.match_token(TokenType::Format) {
// JSON might be a keyword or identifier
let _ = self.match_token(TokenType::Json) || self.match_identifier("JSON");
true
} else {
false
};
// Optional ENCODING encoding
let enc = if self.match_identifier("ENCODING") {
Some(self.expect_identifier_or_keyword()?)
} else {
None
};
(Some(return_type), has_format_json, enc)
} else {
(None, false, None)
};
self.expect(TokenType::RParen)?;
Ok(Expression::JsonObject(Box::new(JsonObjectFunc {
pairs,
null_handling,
with_unique_keys,
returning_type,
format_json,
encoding,
star,
})))
}
// JSON_ARRAY function with Oracle-specific options
// JSON_ARRAY(expr [FORMAT JSON], ... [NULL ON NULL | ABSENT ON NULL] [RETURNING type] [STRICT])
"JSON_ARRAY" => {
let mut expressions = Vec::new();
if !self.check(TokenType::RParen) {
loop {
let expr = self.parse_bitwise()?.unwrap_or(Expression::Null(Null));
// Check for FORMAT JSON after each expression
let expr_with_format = if self.match_text_seq(&["FORMAT", "JSON"]) {
Expression::JSONFormat(Box::new(JSONFormat {
this: Some(Box::new(expr)),
options: Vec::new(),
is_json: None,
to_json: None,
}))
} else {
expr
};
expressions.push(expr_with_format);
if !self.match_token(TokenType::Comma) {
break;
}
}
}
// Parse NULL ON NULL or ABSENT ON NULL
let null_handling = if self.match_text_seq(&["NULL", "ON", "NULL"]) {
Some(Box::new(Expression::Var(Box::new(Var {
this: "NULL ON NULL".to_string(),
}))))
} else if self.match_text_seq(&["ABSENT", "ON", "NULL"]) {
Some(Box::new(Expression::Var(Box::new(Var {
this: "ABSENT ON NULL".to_string(),
}))))
} else {
None
};
// Parse RETURNING type
let return_type = if self.match_token(TokenType::Returning) {
let dt = self.parse_data_type()?;
Some(Box::new(Expression::DataType(dt)))
} else {
None
};
// Parse STRICT
let strict = if self.match_identifier("STRICT") {
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
})))
} else {
None
};
self.expect(TokenType::RParen)?;
Ok(Expression::JSONArray(Box::new(JSONArray {
expressions,
null_handling,
return_type,
strict,
})))
}
// JSON_ARRAYAGG function with Oracle-specific options
// JSON_ARRAYAGG(expr [FORMAT JSON] [ORDER BY ...] [NULL ON NULL | ABSENT ON NULL] [RETURNING type] [STRICT])
"JSON_ARRAYAGG" => {
let this = self.parse_bitwise()?.unwrap_or(Expression::Null(Null));
// Check for FORMAT JSON after the expression
let this_with_format = if self.match_text_seq(&["FORMAT", "JSON"]) {
Expression::JSONFormat(Box::new(JSONFormat {
this: Some(Box::new(this)),
options: Vec::new(),
is_json: None,
to_json: None,
}))
} else {
this
};
// Parse ORDER BY clause
let order = if self.match_token(TokenType::Order) {
self.match_token(TokenType::By);
// Parse comma-separated ordered expressions
let mut order_exprs = Vec::new();
loop {
if let Some(ordered) = self.parse_ordered_item()? {
order_exprs.push(ordered);
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
if !order_exprs.is_empty() {
Some(Box::new(Expression::OrderBy(Box::new(OrderBy {
expressions: order_exprs,
siblings: false,
comments: Vec::new(),
}))))
} else {
None
}
} else {
None
};
// Parse NULL ON NULL or ABSENT ON NULL
let null_handling = if self.match_text_seq(&["NULL", "ON", "NULL"]) {
Some(Box::new(Expression::Var(Box::new(Var {
this: "NULL ON NULL".to_string(),
}))))
} else if self.match_text_seq(&["ABSENT", "ON", "NULL"]) {
Some(Box::new(Expression::Var(Box::new(Var {
this: "ABSENT ON NULL".to_string(),
}))))
} else {
None
};
// Parse RETURNING type
let return_type = if self.match_token(TokenType::Returning) {
let dt = self.parse_data_type()?;
Some(Box::new(Expression::DataType(dt)))
} else {
None
};
// Parse STRICT
let strict = if self.match_identifier("STRICT") {
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
})))
} else {
None
};
self.expect(TokenType::RParen)?;
Ok(Expression::JSONArrayAgg(Box::new(JSONArrayAgg {
this: Box::new(this_with_format),
order,
null_handling,
return_type,
strict,
})))
}
// JSON_OBJECTAGG with KEY...VALUE syntax
// JSON_OBJECTAGG(KEY key VALUE value) or JSON_OBJECTAGG(key: value)
"JSON_OBJECTAGG" => {
// Check for KEY keyword (KEY is a keyword token, not an identifier)
let _has_key_keyword = self.match_token(TokenType::Key);
// Parse key: use column parsing to avoid colon being interpreted as JSON path
let key = self.parse_column()?.unwrap_or(Expression::Null(Null));
// Support colon, comma (MySQL), or VALUE keyword
let _ = self.match_token(TokenType::Colon)
|| self.match_token(TokenType::Comma)
|| self.match_identifier("VALUE");
let value = self.parse_bitwise()?.unwrap_or(Expression::Null(Null));
// Check for FORMAT JSON after value
let value_with_format = if self.match_text_seq(&["FORMAT", "JSON"]) {
Expression::JSONFormat(Box::new(JSONFormat {
this: Some(Box::new(value)),
options: Vec::new(),
is_json: None,
to_json: None,
}))
} else {
value
};
// Parse NULL ON NULL or ABSENT ON NULL
let null_handling = if self.match_text_seq(&["NULL", "ON", "NULL"]) {
Some(Box::new(Expression::Var(Box::new(Var {
this: "NULL ON NULL".to_string(),
}))))
} else if self.match_text_seq(&["ABSENT", "ON", "NULL"]) {
Some(Box::new(Expression::Var(Box::new(Var {
this: "ABSENT ON NULL".to_string(),
}))))
} else {
None
};
// Parse WITH/WITHOUT UNIQUE KEYS
let unique_keys = if self.match_text_seq(&["WITH", "UNIQUE"]) {
self.match_identifier("KEYS");
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
})))
} else if self.match_text_seq(&["WITHOUT", "UNIQUE"]) {
self.match_identifier("KEYS");
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: false,
})))
} else {
None
};
// Parse RETURNING type
let return_type = if self.match_token(TokenType::Returning) {
let dt = self.parse_data_type()?;
Some(Box::new(Expression::DataType(dt)))
} else {
None
};
self.expect(TokenType::RParen)?;
Ok(Expression::JSONObjectAgg(Box::new(JSONObjectAgg {
expressions: vec![Expression::JSONKeyValue(Box::new(JSONKeyValue {
this: Box::new(key),
expression: Box::new(value_with_format),
}))],
null_handling,
unique_keys,
return_type,
encoding: None,
})))
}
// JSON_TABLE function - MySQL/Oracle table function for JSON data
// JSON_TABLE(json_doc [FORMAT JSON], path COLUMNS (column_list)) [AS alias]
"JSON_TABLE" => {
// Parse the JSON expression
let this = self.parse_bitwise()?.unwrap_or(Expression::Null(Null));
// Check for FORMAT JSON after the expression
let this_with_format = if self.match_text_seq(&["FORMAT", "JSON"]) {
Expression::JSONFormat(Box::new(JSONFormat {
this: Some(Box::new(this)),
options: Vec::new(),
is_json: None,
to_json: None,
}))
} else {
this
};
// Parse path (after comma)
let path = if self.match_token(TokenType::Comma) {
if let Some(s) = self.parse_string()? {
Some(Box::new(s))
} else {
None
}
} else {
None
};
// Oracle uses "ERROR ON ERROR" (value then behavior) instead of "ON ERROR ERROR"
// Parse error handling: ERROR ON ERROR or NULL ON ERROR
let error_handling =
if self.match_identifier("ERROR") && self.match_text_seq(&["ON", "ERROR"]) {
Some(Box::new(Expression::Var(Box::new(Var {
this: "ERROR ON ERROR".to_string(),
}))))
} else if self.match_text_seq(&["NULL", "ON", "ERROR"]) {
Some(Box::new(Expression::Var(Box::new(Var {
this: "NULL ON ERROR".to_string(),
}))))
} else {
None
};
// Parse empty handling: ERROR ON EMPTY or NULL ON EMPTY
let empty_handling =
if self.match_identifier("ERROR") && self.match_text_seq(&["ON", "EMPTY"]) {
Some(Box::new(Expression::Var(Box::new(Var {
this: "ERROR ON EMPTY".to_string(),
}))))
} else if self.match_text_seq(&["NULL", "ON", "EMPTY"]) {
Some(Box::new(Expression::Var(Box::new(Var {
this: "NULL ON EMPTY".to_string(),
}))))
} else {
None
};
// Parse COLUMNS clause
let schema = self.parse_json_table_columns()?;
self.expect(TokenType::RParen)?;
Ok(Expression::JSONTable(Box::new(JSONTable {
this: Box::new(this_with_format),
schema: schema.map(Box::new),
path,
error_handling,
empty_handling,
})))
}
_ => unreachable!(
"phase-6 json parser called with non-json family name '{}'",
canonical_upper_name
),
}
}
fn parse_typed_translate_teradata_family(
&mut self,
name: &str,
_upper_name: &str,
canonical_upper_name: &str,
) -> Result<Expression> {
match canonical_upper_name {
// Teradata: TRANSLATE(x USING charset [WITH ERROR])
"TRANSLATE"
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Teradata)
) =>
{
let this = self.parse_expression()?;
if self.match_token(TokenType::Using) {
let expression = self.parse_expression()?;
let with_error = if self.match_text_seq(&["WITH", "ERROR"]) {
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
})))
} else {
None
};
self.expect(TokenType::RParen)?;
Ok(Expression::TranslateCharacters(Box::new(
TranslateCharacters {
this: Box::new(this),
expression: Box::new(expression),
with_error,
},
)))
} else {
let mut args = vec![this];
if self.match_token(TokenType::Comma) {
let mut rest = self.parse_expression_list()?;
args.append(&mut rest);
}
self.expect(TokenType::RParen)?;
Ok(Expression::Function(Box::new(Function {
name: name.to_string(),
args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
})))
}
}
_ => unreachable!(
"phase-6 translate parser called with non-translate family name '{}'",
canonical_upper_name
),
}
}
/// Parse a generic function call (fallback for unrecognized functions)
fn parse_generic_function(&mut self, name: &str, quoted: bool) -> Result<Expression> {
let is_known_agg = Self::is_aggregate_function(name);
let (mut args, distinct) = if self.check(TokenType::RParen) {
(Vec::new(), false)
} else if self.check(TokenType::Star) {
// Check for DuckDB *COLUMNS(...) syntax first
if self.check_next_identifier("COLUMNS")
&& self
.tokens
.get(self.current + 2)
.map(|t| t.token_type == TokenType::LParen)
.unwrap_or(false)
{
// Parse *COLUMNS(...) as a function argument
(self.parse_function_arguments()?, false)
} else {
// Regular star: parse star modifiers like EXCLUDE/EXCEPT/REPLACE/RENAME
// e.g., COLUMNS(* EXCLUDE (empid, dept))
self.skip(); // consume *
let star = self.parse_star_modifiers(None)?;
let mut args = vec![Expression::Star(star)];
// ClickHouse: func(*, col1, col2) — star followed by more args
if self.match_token(TokenType::Comma) {
let rest = self.parse_function_arguments()?;
args.extend(rest);
}
(args, false)
}
} else if self.check(TokenType::Distinct)
&& !self.check_next(TokenType::Comma)
&& !self.check_next(TokenType::RParen)
{
// DISTINCT as aggregate modifier: func(DISTINCT expr)
// Not when followed by comma or rparen — then DISTINCT is used as an identifier value
self.skip(); // consume DISTINCT
(self.parse_function_arguments()?, true)
} else if is_known_agg && self.match_token(TokenType::All) {
// ALL is the default quantifier, just consume it
(self.parse_function_arguments()?, false)
} else {
(self.parse_function_arguments()?, false)
};
// For known aggregate functions, check for IGNORE NULLS, ORDER BY, LIMIT inside parens
let (ignore_nulls, order_by, agg_limit) = if is_known_agg {
let ignore_nulls = if self.match_token(TokenType::Ignore)
&& self.match_token(TokenType::Nulls)
{
Some(true)
} else if self.match_token(TokenType::Respect) && self.match_token(TokenType::Nulls) {
Some(false)
} else {
None
};
let order_by = if self.match_keywords(&[TokenType::Order, TokenType::By]) {
self.parse_order_by_list()?
} else {
Vec::new()
};
let limit = if self.match_token(TokenType::Limit) {
Some(Box::new(self.parse_expression()?))
} else {
None
};
(ignore_nulls, order_by, limit)
} else {
(None, Vec::new(), None)
};
// ClickHouse: SETTINGS key=value, ... before closing paren in function calls
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::Settings)
&& self.current + 2 < self.tokens.len()
&& (self.tokens[self.current + 1].token_type == TokenType::Var
|| self.tokens[self.current + 1].token_type == TokenType::Identifier)
&& self.tokens[self.current + 2].token_type == TokenType::Eq
{
self.skip(); // consume SETTINGS
loop {
let _key = if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
self.advance().text
} else {
break;
};
if self.match_token(TokenType::Eq) {
let _value = self.parse_primary()?;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
}
self.expect(TokenType::RParen)?;
let trailing_comments = self.previous_trailing_comments().to_vec();
// Check for WITHIN GROUP (ORDER BY ...)
if self.match_identifier("WITHIN") {
if self.match_identifier("GROUP") {
self.expect(TokenType::LParen)?;
self.expect(TokenType::Order)?;
self.expect(TokenType::By)?;
let within_order = self.parse_order_by_list()?;
self.expect(TokenType::RParen)?;
let func_expr = Expression::AggregateFunction(Box::new(AggregateFunction {
name: name.to_string(),
args,
distinct,
filter: None,
order_by: Vec::new(),
limit: None,
ignore_nulls: None,
inferred_type: None,
}));
let within = Expression::WithinGroup(Box::new(WithinGroup {
this: func_expr,
order_by: within_order,
}));
// Check for FILTER after WITHIN GROUP
let filter = self.parse_filter_clause()?;
if let Some(filter_expr) = filter {
return Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
name: format!("__WITHIN_GROUP_{}", name),
args: vec![within, filter_expr],
distinct: false,
filter: None,
order_by: Vec::new(),
limit: None,
ignore_nulls: None,
inferred_type: None,
})));
}
return Ok(within);
}
}
let filter = self.parse_filter_clause()?;
// Check for postfix IGNORE NULLS / RESPECT NULLS after RParen
let ignore_nulls = if ignore_nulls.is_some() {
ignore_nulls
} else if self.match_keywords(&[TokenType::Ignore, TokenType::Nulls]) {
Some(true)
} else if self.match_keywords(&[TokenType::Respect, TokenType::Nulls]) {
Some(false)
} else {
None
};
if filter.is_some() || is_known_agg || ignore_nulls.is_some() {
Ok(Expression::AggregateFunction(Box::new(AggregateFunction {
name: name.to_string(),
args,
distinct,
filter,
order_by,
limit: agg_limit,
ignore_nulls,
inferred_type: None,
})))
} else {
self.normalize_date_part_arg(name, &mut args);
let mut func = Function::new(name.to_string(), args);
func.distinct = distinct;
func.trailing_comments = trailing_comments;
func.quoted = quoted;
Ok(Expression::Function(Box::new(func)))
}
}
/// Check for an AS alias after an expression in ClickHouse function arg context.
fn maybe_clickhouse_alias(&mut self, expr: Expression) -> Expression {
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::As)
&& !self.check_next(TokenType::RParen)
&& !self.check_next(TokenType::Comma)
{
let next_idx = self.current + 1;
let is_alias = next_idx < self.tokens.len()
&& matches!(
self.tokens[next_idx].token_type,
TokenType::Identifier | TokenType::Var | TokenType::QuotedIdentifier
);
if is_alias {
self.skip(); // consume AS
let alias_token = self.advance();
let alias_name = Identifier {
name: alias_token.text.clone(),
quoted: alias_token.token_type == TokenType::QuotedIdentifier,
trailing_comments: Vec::new(),
span: None,
};
return Expression::Alias(Box::new(crate::expressions::Alias {
this: expr,
alias: alias_name,
column_aliases: Vec::new(),
pre_alias_comments: Vec::new(),
trailing_comments: Vec::new(),
inferred_type: None,
}));
}
}
expr
}
/// Parse an expression, then check for AS alias in ClickHouse function arg context.
/// ClickHouse allows: func(expr AS alias, ...) where AS creates a named alias inside function args.
fn parse_expression_with_clickhouse_alias(&mut self) -> Result<Expression> {
let expr = self.parse_expression()?;
Ok(self.maybe_clickhouse_alias(expr))
}
fn parse_single_function_argument(&mut self) -> Result<Expression> {
let is_table_or_model_arg = if !self.is_at_end() {
self.check(TokenType::Table) || self.peek().text.eq_ignore_ascii_case("MODEL")
} else {
false
};
let arg = if is_table_or_model_arg {
let prefix = self.peek().text.to_ascii_uppercase();
let saved_pos = self.current;
self.skip();
if !self.is_at_end()
&& !self.check(TokenType::FArrow)
&& !self.check(TokenType::ColonEq)
{
if let Some(table_expr) = self.parse_table_parts()? {
Expression::TableArgument(Box::new(TableArgument {
prefix,
this: table_expr,
}))
} else {
self.current = saved_pos;
self.parse_expression()?
}
} else {
self.current = saved_pos;
if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
let ident_token = self.advance();
let ident_name = ident_token.text.clone();
if self.match_token(TokenType::FArrow) {
let value = self.parse_expression()?;
Expression::NamedArgument(Box::new(NamedArgument {
name: Identifier::new(ident_name),
value,
separator: NamedArgSeparator::DArrow,
}))
} else if self.match_token(TokenType::ColonEq) {
let value = self.parse_expression()?;
Expression::NamedArgument(Box::new(NamedArgument {
name: Identifier::new(ident_name),
value,
separator: NamedArgSeparator::ColonEq,
}))
} else {
self.current = saved_pos;
self.parse_expression()?
}
} else {
self.parse_expression()?
}
}
} else if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
let saved_pos = self.current;
let ident_token = self.advance();
let ident_name = ident_token.text.clone();
if ident_name.eq_ignore_ascii_case("VARIADIC")
&& matches!(
self.config.dialect,
Some(crate::dialects::DialectType::PostgreSQL)
| Some(crate::dialects::DialectType::Redshift)
)
{
self.current = saved_pos;
self.parse_expression()?
} else if !self.is_at_end()
&& self.is_type_keyword()
&& !self.check(TokenType::FArrow)
&& !self.check(TokenType::ColonEq)
{
let type_annotation = self.parse_data_type()?;
if self.match_token(TokenType::Arrow) {
let body = self.parse_expression()?;
Expression::Lambda(Box::new(LambdaExpr {
parameters: vec![Identifier::new(ident_name)],
body,
colon: false,
parameter_types: vec![Some(type_annotation)],
}))
} else {
self.current = saved_pos;
self.parse_expression()?
}
} else if self.match_token(TokenType::Arrow) {
let body = self.parse_expression()?;
Expression::Lambda(Box::new(LambdaExpr {
parameters: vec![Identifier::new(ident_name)],
body,
colon: false,
parameter_types: Vec::new(),
}))
} else if self.match_token(TokenType::FArrow) {
let value = self.parse_expression()?;
Expression::NamedArgument(Box::new(NamedArgument {
name: Identifier::new(ident_name),
value,
separator: NamedArgSeparator::DArrow,
}))
} else if self.match_token(TokenType::ColonEq) {
let value = self.parse_expression()?;
Expression::NamedArgument(Box::new(NamedArgument {
name: Identifier::new(ident_name),
value,
separator: NamedArgSeparator::ColonEq,
}))
} else {
self.current = saved_pos;
self.parse_expression()?
}
} else {
self.parse_expression()?
};
let arg = if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::As)
&& !self.check_next(TokenType::RParen)
&& !self.check_next(TokenType::Comma)
{
let next_idx = self.current + 1;
let after_alias_idx = self.current + 2;
let is_alias_token = next_idx < self.tokens.len()
&& (matches!(
self.tokens[next_idx].token_type,
TokenType::Identifier | TokenType::Var | TokenType::QuotedIdentifier
) || self.tokens[next_idx].token_type.is_keyword());
let is_alias = is_alias_token
&& after_alias_idx < self.tokens.len()
&& matches!(
self.tokens[after_alias_idx].token_type,
TokenType::RParen | TokenType::Comma
);
if is_alias {
self.skip();
let alias_token = self.advance();
let alias_name = if alias_token.token_type == TokenType::QuotedIdentifier {
let mut ident = Identifier::new(alias_token.text.clone());
ident.quoted = true;
ident
} else {
Identifier::new(alias_token.text.clone())
};
Expression::Alias(Box::new(crate::expressions::Alias {
this: arg,
alias: alias_name,
column_aliases: Vec::new(),
pre_alias_comments: Vec::new(),
trailing_comments: Vec::new(),
inferred_type: None,
}))
} else {
arg
}
} else {
arg
};
let arg = self.try_clickhouse_implicit_alias(arg);
let trailing_comments = self.previous_trailing_comments().to_vec();
if trailing_comments.is_empty() {
Ok(arg)
} else {
Ok(match &arg {
Expression::Literal(_) | Expression::Boolean(_) | Expression::Null(_) => {
Expression::Annotated(Box::new(Annotated {
this: arg,
trailing_comments,
}))
}
_ => arg,
})
}
}
/// Parse function arguments, handling named arguments (name => value, name := value)
/// and TABLE/MODEL prefixed arguments (BigQuery)
fn parse_function_arguments(&mut self) -> Result<Vec<Expression>> {
let mut args = Vec::new();
loop {
// ClickHouse: SETTINGS key=value, ... terminates function args
// Only break if SETTINGS is followed by identifier = value pattern
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::Settings)
&& self.current + 2 < self.tokens.len()
&& (self.tokens[self.current + 1].token_type == TokenType::Var
|| self.tokens[self.current + 1].token_type == TokenType::Identifier)
&& self.tokens[self.current + 2].token_type == TokenType::Eq
{
break; // will be consumed by SETTINGS handler after loop
}
// ClickHouse: bare SELECT/WITH as function argument (e.g., view(SELECT 1), remote(..., view(SELECT ...)))
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && (self.check(TokenType::Select) || self.check(TokenType::With))
{
let query = self.parse_statement()?;
args.push(query);
if !self.match_token(TokenType::Comma) {
break;
}
continue;
}
args.push(self.parse_single_function_argument()?);
if !self.match_token(TokenType::Comma) {
break;
}
// Skip consecutive commas (Snowflake allows skipping optional named args)
// e.g., ROUND(SCALE => 1, EXPR => 2.25, , ROUNDING_MODE => 'HALF_TO_EVEN')
while self.check(TokenType::Comma) {
self.skip();
}
}
// ClickHouse: SETTINGS key=value, ... at end of function args before RParen
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::Settings)
&& self.current + 2 < self.tokens.len()
&& (self.tokens[self.current + 1].token_type == TokenType::Var
|| self.tokens[self.current + 1].token_type == TokenType::Identifier)
&& self.tokens[self.current + 2].token_type == TokenType::Eq
{
self.skip(); // consume SETTINGS
loop {
let _key = if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
self.advance().text
} else {
break;
};
if self.match_token(TokenType::Eq) {
let _value = self.parse_primary()?;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
}
Ok(args)
}
/// Parse optional FILTER clause
fn parse_filter_clause(&mut self) -> Result<Option<Expression>> {
if self.match_token(TokenType::Filter) {
self.expect(TokenType::LParen)?;
// WHERE is optional (DuckDB allows FILTER(condition) without WHERE)
self.match_token(TokenType::Where);
let filter_expr = self.parse_expression()?;
self.expect(TokenType::RParen)?;
Ok(Some(filter_expr))
} else {
Ok(None)
}
}
/// Parse STRUCT arguments with optional AS aliases: STRUCT(x, y AS name, ...)
fn parse_struct_args(&mut self) -> Result<Vec<Expression>> {
let mut args = Vec::new();
loop {
let expr = self.parse_expression()?;
// Check for AS alias
if self.match_token(TokenType::As) {
let alias = self.expect_identifier_or_keyword()?;
args.push(Expression::Alias(Box::new(Alias {
this: expr,
alias: Identifier::new(alias),
column_aliases: Vec::new(),
pre_alias_comments: Vec::new(),
trailing_comments: Vec::new(),
inferred_type: None,
})));
} else {
args.push(expr);
}
if !self.match_token(TokenType::Comma) {
break;
}
}
Ok(args)
}
/// Maybe parse OVER clause for window functions or WITHIN GROUP for ordered-set aggregates
fn maybe_parse_over(&mut self, expr: Expression) -> Result<Expression> {
let expr = self.maybe_parse_subscript(expr)?;
// For Oracle: Check for interval span after expression (e.g., (expr) DAY(9) TO SECOND(3))
// https://docs.oracle.com/en/database/oracle/oracle-database/26/sqlrf/Interval-Expressions.html
let expr = if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Oracle)
) {
self.try_parse_oracle_interval_span(expr)?
} else {
expr
};
// Check for WITHIN GROUP (for ordered-set aggregate functions like LISTAGG, PERCENTILE_CONT)
let expr = if self.check(TokenType::Within) && self.check_next(TokenType::Group) {
self.skip(); // consume WITHIN
self.skip(); // consume GROUP
self.expect(TokenType::LParen)?;
self.expect(TokenType::Order)?;
self.expect(TokenType::By)?;
let order_by = self.parse_order_by_list()?;
self.expect(TokenType::RParen)?;
Expression::WithinGroup(Box::new(WithinGroup {
this: expr,
order_by,
}))
} else {
expr
};
// Check for FILTER clause (can follow WITHIN GROUP or standalone aggregate)
// SQL:2003 syntax: aggregate_function(...) FILTER (WHERE condition)
let expr = if self.match_token(TokenType::Filter) {
self.expect(TokenType::LParen)?;
// WHERE is required in standard SQL FILTER clause
self.expect(TokenType::Where)?;
let filter_expr = self.parse_expression()?;
self.expect(TokenType::RParen)?;
Expression::Filter(Box::new(Filter {
this: Box::new(expr),
expression: Box::new(filter_expr),
}))
} else {
expr
};
// ClickHouse: IGNORE NULLS / RESPECT NULLS modifier after function call (before OVER)
// This handles cases like: func(args) IGNORE NULLS OVER w
// and parametric aggregates: func(params)(args) IGNORE NULLS
let expr = if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && (self.match_keywords(&[TokenType::Ignore, TokenType::Nulls])
|| self.match_keywords(&[TokenType::Respect, TokenType::Nulls]))
{
// Consume the modifier — we don't need to store it for transpilation
expr
} else {
expr
};
// Check for KEEP clause (Oracle: aggregate KEEP (DENSE_RANK FIRST|LAST ORDER BY ...))
// Only if KEEP is followed by LPAREN - otherwise KEEP is used as an alias
let keep = if self.check(TokenType::Keep) && self.check_next(TokenType::LParen) {
self.skip(); // consume KEEP
Some(self.parse_keep_clause()?)
} else {
None
};
// Check for OVER clause (can follow KEEP, FILTER, WITHIN GROUP, or standalone aggregate)
if self.match_token(TokenType::Over) {
let over = self.parse_over_clause()?;
Ok(Expression::WindowFunction(Box::new(WindowFunction {
this: expr,
over,
keep,
inferred_type: None,
})))
} else if keep.is_some() {
// KEEP without OVER - still a window-like construct
// Create a WindowFunction with empty Over
Ok(Expression::WindowFunction(Box::new(WindowFunction {
this: expr,
over: Over {
window_name: None,
partition_by: Vec::new(),
order_by: Vec::new(),
frame: None,
alias: None,
},
keep,
inferred_type: None,
})))
} else {
Ok(expr)
}
}
/// ClickHouse: parse parameterized aggregate functions like func(params)(args)
fn maybe_parse_clickhouse_parameterized_agg(&mut self, expr: Expression) -> Result<Expression> {
if !matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
return Ok(expr);
}
if !self.check(TokenType::LParen) {
return Ok(expr);
}
let (name, quoted, params) = match expr {
Expression::Function(func) => (func.name, func.quoted, func.args),
Expression::AggregateFunction(agg) => {
if agg.distinct
|| agg.filter.is_some()
|| !agg.order_by.is_empty()
|| agg.limit.is_some()
|| agg.ignore_nulls.is_some()
{
return Ok(Expression::AggregateFunction(agg));
}
(agg.name, false, agg.args)
}
_ => return Ok(expr),
};
self.skip(); // consume (
// Handle DISTINCT in second arg list: func(params)(DISTINCT args)
let distinct = self.match_token(TokenType::Distinct);
let expressions = if self.check(TokenType::RParen) {
Vec::new()
} else {
self.parse_function_arguments()?
};
self.expect(TokenType::RParen)?;
let ident = Identifier {
name,
quoted,
trailing_comments: Vec::new(),
span: None,
};
// If DISTINCT was used, wrap the result to indicate it
// For now, we just include it in the CombinedParameterizedAgg
let _ = distinct; // DISTINCT is consumed but not separately tracked in this AST node
Ok(Expression::CombinedParameterizedAgg(Box::new(
CombinedParameterizedAgg {
this: Box::new(Expression::Identifier(ident)),
params,
expressions,
},
)))
}
/// Parse Oracle KEEP clause: KEEP (DENSE_RANK FIRST|LAST ORDER BY ...)
fn parse_keep_clause(&mut self) -> Result<Keep> {
self.expect(TokenType::LParen)?;
// Expect DENSE_RANK
if !self.match_identifier("DENSE_RANK") {
return Err(self.parse_error("Expected DENSE_RANK in KEEP clause"));
}
// Expect FIRST or LAST
let first = if self.match_token(TokenType::First) {
true
} else if self.match_token(TokenType::Last) {
false
} else {
return Err(self.parse_error("Expected FIRST or LAST in KEEP clause"));
};
// Expect ORDER BY
self.expect(TokenType::Order)?;
self.expect(TokenType::By)?;
let order_by = self.parse_order_by_list()?;
self.expect(TokenType::RParen)?;
Ok(Keep { first, order_by })
}
/// Parse a JSON path operand - just the immediate literal/identifier without any subscript processing
/// This is used for JSON arrow operators (->, ->>) to get proper left-to-right associativity
fn parse_json_path_operand(&mut self) -> Result<Expression> {
// Negative number literal (e.g., -1)
if self.check(TokenType::Dash) {
let dash_pos = self.current;
self.skip(); // consume the dash
if self.check(TokenType::Number) {
let token = self.advance();
return Ok(Expression::Neg(Box::new(UnaryOp {
this: Expression::Literal(Box::new(Literal::Number(token.text))),
inferred_type: None,
})));
}
// Not a negative number, backtrack
self.current = dash_pos;
}
// Number literal
if self.check(TokenType::Number) {
let token = self.advance();
// Check for numeric literal suffix encoded as "number::TYPE" by tokenizer
if let Some(sep_pos) = token.text.find("::") {
let num_part = &token.text[..sep_pos];
let type_name = &token.text[sep_pos + 2..];
let num_expr = Expression::Literal(Box::new(Literal::Number(num_part.to_string())));
let data_type = match type_name {
"BIGINT" => crate::expressions::DataType::BigInt { length: None },
"SMALLINT" => crate::expressions::DataType::SmallInt { length: None },
"TINYINT" => crate::expressions::DataType::TinyInt { length: None },
"DOUBLE" => crate::expressions::DataType::Double {
precision: None,
scale: None,
},
"FLOAT" => crate::expressions::DataType::Float {
precision: None,
scale: None,
real_spelling: false,
},
"DECIMAL" => crate::expressions::DataType::Decimal {
precision: None,
scale: None,
},
_ => crate::expressions::DataType::Custom {
name: type_name.to_string(),
},
};
return Ok(Expression::TryCast(Box::new(crate::expressions::Cast {
this: num_expr,
to: data_type,
trailing_comments: Vec::new(),
double_colon_syntax: false,
format: None,
default: None,
inferred_type: None,
})));
}
return Ok(Expression::Literal(Box::new(Literal::Number(token.text))));
}
// String literal
if self.check(TokenType::String) {
let token = self.advance();
return Ok(Expression::Literal(Box::new(Literal::String(token.text))));
}
// Parenthesized expression (for complex paths)
if self.match_token(TokenType::LParen) {
let expr = self.parse_expression()?;
self.expect(TokenType::RParen)?;
return Ok(Expression::Paren(Box::new(Paren {
this: expr,
trailing_comments: Vec::new(),
})));
}
// Array literal: ['$.family', '$.species']
// Used in DuckDB for multi-path JSON extraction
if self.match_token(TokenType::LBracket) {
// Empty array: []
if self.match_token(TokenType::RBracket) {
return Ok(Expression::ArrayFunc(Box::new(ArrayConstructor {
expressions: Vec::new(),
bracket_notation: true,
use_list_keyword: false,
})));
}
// Parse array elements
let mut expressions = vec![self.parse_expression()?];
while self.match_token(TokenType::Comma) {
if self.check(TokenType::RBracket) {
break;
}
expressions.push(self.parse_expression()?);
}
self.expect(TokenType::RBracket)?;
return Ok(Expression::ArrayFunc(Box::new(ArrayConstructor {
expressions,
bracket_notation: true,
use_list_keyword: false,
})));
}
// Identifier (possibly qualified like table.column)
if self.is_identifier_token() {
let first_ident = self.expect_identifier_with_quoted()?;
// Check for qualified name: identifier.identifier
if self.match_token(TokenType::Dot) {
if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
let second_ident = if self.is_identifier_token() {
self.expect_identifier_with_quoted()?
} else {
let token = self.advance();
Identifier::new(token.text)
};
return Ok(Expression::boxed_column(Column {
name: second_ident,
table: Some(first_ident),
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
}));
}
}
return Ok(Expression::boxed_column(Column {
name: first_ident,
table: None,
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
}));
}
// Keywords as identifiers (possibly qualified)
if self.is_safe_keyword_as_identifier() {
let token = self.advance();
let first_ident = Identifier::new(token.text);
// Check for qualified name: identifier.identifier
if self.match_token(TokenType::Dot) {
if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
let second_ident = if self.is_identifier_token() {
self.expect_identifier_with_quoted()?
} else {
let token = self.advance();
Identifier::new(token.text)
};
return Ok(Expression::boxed_column(Column {
name: second_ident,
table: Some(first_ident),
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
}));
}
}
return Ok(Expression::boxed_column(Column {
name: first_ident,
table: None,
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
}));
}
Err(self.parse_error(format!(
"Unexpected token in JSON path: {:?}",
self.peek().token_type
)))
}
/// Maybe parse subscript access (array[index], struct.field)
fn maybe_parse_subscript(&mut self, mut expr: Expression) -> Result<Expression> {
loop {
// ClickHouse: empty brackets [] in JSON paths represent Array(JSON) type access.
// json.a.b[] -> json.a.b.:"Array(JSON)"
// json.a.b[][] -> json.a.b.:"Array(Array(JSON))"
// Check for consecutive empty bracket pairs before normal bracket handling.
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::LBracket)
{
let is_empty_bracket = self
.peek_nth(1)
.map_or(false, |t| t.token_type == TokenType::RBracket);
if is_empty_bracket {
let mut bracket_json_type: Option<DataType> = None;
while self.check(TokenType::LBracket) {
let is_empty = self
.peek_nth(1)
.map_or(false, |t| t.token_type == TokenType::RBracket);
if is_empty {
self.skip(); // consume [
self.skip(); // consume ]
bracket_json_type = Some(DataType::Array {
element_type: Box::new(bracket_json_type.unwrap_or(DataType::Json)),
dimension: None,
});
} else {
break;
}
}
if let Some(json_type) = bracket_json_type {
expr = Expression::JSONCast(Box::new(crate::expressions::JSONCast {
this: Box::new(expr),
to: json_type,
}));
continue;
}
}
}
if self.match_token(TokenType::LBracket) {
// Check if expr is an array/list constructor keyword (ARRAY[...] or LIST[...])
let array_constructor_type = match &expr {
Expression::Column(col) if col.table.is_none() => {
let upper = col.name.name.to_ascii_uppercase();
if upper == "ARRAY" || upper == "LIST" {
Some(upper)
} else {
None
}
}
Expression::Identifier(id) => {
let upper = id.name.to_ascii_uppercase();
if upper == "ARRAY" || upper == "LIST" {
Some(upper)
} else {
None
}
}
_ => None,
};
if let Some(constructor_type) = array_constructor_type {
// Parse ARRAY[expr, expr, ...] or LIST[expr, expr, ...]
// bracket_notation=false means we have the ARRAY/LIST keyword prefix
let use_list_keyword = constructor_type == "LIST";
if self.check(TokenType::RBracket) {
// Empty array: ARRAY[]
self.skip();
expr = Expression::ArrayFunc(Box::new(ArrayConstructor {
expressions: Vec::new(),
bracket_notation: false, // Has ARRAY/LIST keyword
use_list_keyword,
}));
} else {
let expressions = self.parse_expression_list()?;
self.expect(TokenType::RBracket)?;
expr = Expression::ArrayFunc(Box::new(ArrayConstructor {
expressions,
bracket_notation: false, // Has ARRAY/LIST keyword
use_list_keyword,
}));
}
continue;
}
// Special case: MAP[...] constructor syntax
// Check if expr is a MAP identifier
// ClickHouse: map[key] is always subscript access, not a MAP constructor
let is_map_constructor = !matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && match &expr {
Expression::Column(col) => {
col.name.name.eq_ignore_ascii_case("MAP") && col.table.is_none()
}
Expression::Identifier(id) => id.name.eq_ignore_ascii_case("MAP"),
_ => false,
};
if is_map_constructor {
let is_materialize = matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Materialize)
);
// Materialize: MAP[] empty map or MAP['a' => 1, ...] with fat arrow
if is_materialize {
if self.check(TokenType::RBracket) {
// Empty map: MAP[]
self.skip();
expr = Expression::ToMap(Box::new(ToMap {
this: Box::new(Expression::Struct(Box::new(Struct {
fields: Vec::new(),
}))),
}));
continue;
}
// Parse MAP['a' => 1, 'b' => 2, ...] with fat arrow entries
// Store entries as PropertyEQ expressions (key => value)
let mut entries = Vec::new();
loop {
let key = self.parse_expression()?;
self.expect(TokenType::FArrow)?;
let value = self.parse_expression()?;
// Store as PropertyEQ which will be output as key => value
entries.push((
None,
Expression::PropertyEQ(Box::new(BinaryOp::new(key, value))),
));
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RBracket)?;
expr = Expression::ToMap(Box::new(ToMap {
this: Box::new(Expression::Struct(Box::new(Struct {
fields: entries,
}))),
}));
continue;
}
// DuckDB/BigQuery: MAP[keys, values] syntax
let keys = self.parse_expression()?;
self.expect(TokenType::Comma)?;
let values = self.parse_expression()?;
self.expect(TokenType::RBracket)?;
expr = Expression::Function(Box::new(Function {
name: "MAP".to_string(),
args: vec![keys, values],
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: true,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}));
continue;
}
// Check for slice syntax: [start:end:step]
// Handle [:...] case where start is omitted
if self.check(TokenType::Colon) {
self.skip(); // consume first :
// Parse end - use parse_slice_element to avoid : being interpreted as parameter
let end = self.parse_slice_element()?;
// Check for step (second colon)
let step = if self.match_token(TokenType::Colon) {
self.parse_slice_element()?
} else {
None
};
self.expect(TokenType::RBracket)?;
if step.is_some() {
// Three-part slice with step: Subscript with Slice index
let slice = Expression::Slice(Box::new(Slice {
this: None, // start is omitted
expression: end.map(Box::new),
step: step.map(Box::new),
}));
expr = Expression::Subscript(Box::new(Subscript {
this: expr,
index: slice,
}));
} else {
expr = Expression::ArraySlice(Box::new(ArraySlice {
this: expr,
start: None,
end,
}));
}
} else {
let start = self.parse_slice_element()?;
// Check if this is a slice
if self.match_token(TokenType::Colon) {
let end = self.parse_slice_element()?;
// Check for step (second colon)
let step = if self.match_token(TokenType::Colon) {
self.parse_slice_element()?
} else {
None
};
self.expect(TokenType::RBracket)?;
if step.is_some() {
// Three-part slice with step: Subscript with Slice index
let slice = Expression::Slice(Box::new(Slice {
this: start.map(Box::new),
expression: end.map(Box::new),
step: step.map(Box::new),
}));
expr = Expression::Subscript(Box::new(Subscript {
this: expr,
index: slice,
}));
} else {
expr = Expression::ArraySlice(Box::new(ArraySlice {
this: expr,
start,
end,
}));
}
} else {
self.expect(TokenType::RBracket)?;
// Simple subscript access - start must be Some
let index =
start.unwrap_or_else(|| Expression::Null(crate::expressions::Null));
expr = Expression::Subscript(Box::new(Subscript { this: expr, index }));
}
}
} else if self.match_token(TokenType::DotColon) {
// In ClickHouse, the type after .: may be a quoted identifier like "Array(JSON)"
// which needs to be re-parsed as a proper data type.
let data_type = if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::QuotedIdentifier)
{
let type_text = self.advance().text.clone();
// Re-parse the quoted identifier text as a data type
self.parse_data_type_from_text(&type_text)?
} else {
self.parse_data_type()?
};
expr = Expression::JSONCast(Box::new(JSONCast {
this: Box::new(expr),
to: data_type,
}));
} else if self.match_token(TokenType::Dot) {
// Handle chained dot access (a.b.c.d)
if self.match_token(TokenType::Star) {
// expr.* - struct field expansion with potential modifiers (EXCEPT, REPLACE, etc.)
let table_name = match &expr {
Expression::Column(col) => {
if let Some(ref table) = col.table {
Some(Identifier::new(format!("{}.{}", table.name, col.name.name)))
} else {
Some(col.name.clone())
}
}
Expression::Dot(d) => {
fn dot_to_name_inner(expr: &Expression) -> String {
match expr {
Expression::Column(col) => {
if let Some(ref table) = col.table {
format!("{}.{}", table.name, col.name.name)
} else {
col.name.name.clone()
}
}
Expression::Dot(d) => {
format!("{}.{}", dot_to_name_inner(&d.this), d.field.name)
}
_ => String::new(),
}
}
Some(Identifier::new(dot_to_name_inner(&Expression::Dot(
d.clone(),
))))
}
_ => None,
};
if table_name.is_some() {
let star = self.parse_star_modifiers(table_name)?;
expr = Expression::Star(star);
// ClickHouse: a.* APPLY(func) EXCEPT(col) REPLACE(expr AS col) in any order
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
loop {
if self.check(TokenType::Apply) {
self.skip();
let apply_expr = if self.match_token(TokenType::LParen) {
let e = self.parse_expression()?;
self.expect(TokenType::RParen)?;
e
} else {
self.parse_expression()?
};
expr = Expression::Apply(Box::new(crate::expressions::Apply {
this: Box::new(expr),
expression: Box::new(apply_expr),
}));
} else if self.check(TokenType::Except)
|| self.check(TokenType::Exclude)
{
self.skip();
self.match_identifier("STRICT");
if self.match_token(TokenType::LParen) {
loop {
if self.check(TokenType::RParen) {
break;
}
let _ = self.parse_expression()?;
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
} else if self.is_identifier_token()
|| self.is_safe_keyword_as_identifier()
{
let _ = self.parse_expression()?;
}
} else if self.check(TokenType::Replace) {
self.skip();
self.match_identifier("STRICT");
if self.match_token(TokenType::LParen) {
loop {
if self.check(TokenType::RParen) {
break;
}
let _ = self.parse_expression()?;
if self.match_token(TokenType::As) {
if self.is_identifier_token()
|| self.is_safe_keyword_as_identifier()
{
self.skip();
}
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
} else {
let _ = self.parse_expression()?;
if self.match_token(TokenType::As) {
if self.is_identifier_token()
|| self.is_safe_keyword_as_identifier()
{
self.skip();
}
}
}
} else {
break;
}
}
}
} else {
// For complex expressions (like CAST, function calls), use Dot with * as field
expr = Expression::Dot(Box::new(DotAccess {
this: expr,
field: Identifier::new("*"),
}));
}
} else if self.check(TokenType::Identifier)
|| self.check(TokenType::Var)
|| self.check(TokenType::QuotedIdentifier)
|| self.check_keyword()
{
let is_quoted = self.check(TokenType::QuotedIdentifier);
let field_name = self.advance().text;
// Check if this is a method call (field followed by parentheses)
if self.check(TokenType::LParen) && !is_quoted {
// This is a method call like a.b.C() or x.EXTRACT()
self.skip(); // consume (
let args = if self.check(TokenType::RParen) {
Vec::new()
} else {
self.parse_function_arguments()?
};
self.expect(TokenType::RParen)?;
// Create a method call expression (DotAccess with function call)
expr = Expression::MethodCall(Box::new(MethodCall {
this: expr,
method: Identifier::new(field_name),
args,
}));
} else {
let mut ident = Identifier::new(field_name);
if is_quoted {
ident.quoted = true;
}
expr = Expression::Dot(Box::new(DotAccess {
this: expr,
field: ident,
}));
}
} else if self.check(TokenType::Number) {
// Handle numeric field access like a.0 or x.1
let field_name = self.advance().text;
expr = Expression::Dot(Box::new(DotAccess {
this: expr,
field: Identifier::new(field_name),
}));
} else if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::Caret)
{
// ClickHouse: json.^path — the ^ prefix means "get all nested subcolumns"
self.skip(); // consume ^
// What follows should be an identifier path
let mut field_name = "^".to_string();
if self.check(TokenType::Identifier)
|| self.check(TokenType::Var)
|| self.check_keyword()
{
field_name.push_str(&self.advance().text);
}
expr = Expression::Dot(Box::new(DotAccess {
this: expr,
field: Identifier::new(field_name),
}));
} else if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::Colon)
{
// ClickHouse: json.path.:Type — the : prefix means type cast on JSON path
self.skip(); // consume :
// Consume the type name
let mut type_name = ":".to_string();
if self.check(TokenType::Identifier)
|| self.check(TokenType::Var)
|| self.check_keyword()
{
type_name.push_str(&self.advance().text);
}
expr = Expression::Dot(Box::new(DotAccess {
this: expr,
field: Identifier::new(type_name),
}));
} else if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::Dash)
&& self
.peek_nth(1)
.is_some_and(|t| t.token_type == TokenType::Number)
{
// ClickHouse: tuple.-1 — negative tuple index
self.skip(); // consume -
let num = self.advance().text;
expr = Expression::Dot(Box::new(DotAccess {
this: expr,
field: Identifier::new(format!("-{}", num)),
}));
} else {
return Err(self.parse_error("Expected field name after dot"));
}
} else if self.match_token(TokenType::Collate) {
// Parse COLLATE 'collation_name' or COLLATE "collation_name" or COLLATE collation_name
let (collation, quoted, double_quoted) = if self.check(TokenType::String) {
// Single-quoted string: COLLATE 'de_DE'
(self.advance().text, true, false)
} else if self.check(TokenType::QuotedIdentifier) {
// Double-quoted identifier: COLLATE "de_DE"
(self.advance().text, false, true)
} else {
// Unquoted identifier: COLLATE de_DE
(self.expect_identifier_or_keyword()?, false, false)
};
expr = Expression::Collation(Box::new(CollationExpr {
this: expr,
collation,
quoted,
double_quoted,
}));
} else if self.check(TokenType::DColon)
|| self.check(TokenType::DColonDollar)
|| self.check(TokenType::DColonPercent)
|| self.check(TokenType::DColonQMark)
{
// For SingleStore, :: variants are JSON path extraction
// For other dialects, :: is cast syntax (PostgreSQL-style)
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::SingleStore)
) {
// SingleStore JSON path extraction: expr::key, expr::$key, expr::%key, expr::?key
if self.match_token(TokenType::DColon) {
// ::key -> JSON_EXTRACT_JSON(expr, 'key')
let path_key =
if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
self.advance().text
} else if self.check(TokenType::Number) {
self.advance().text
} else if self.check(TokenType::QuotedIdentifier) {
self.advance().text
} else {
return Err(self.parse_error(
"Expected identifier or number after :: in JSON path",
));
};
expr = Expression::Function(Box::new(Function::new(
"JSON_EXTRACT_JSON".to_string(),
vec![expr, Expression::string(&path_key)],
)));
} else if self.match_token(TokenType::DColonDollar) {
// ::$key -> JSON_EXTRACT_STRING(expr, 'key')
let path_key =
if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
self.advance().text
} else if self.check(TokenType::Number) {
self.advance().text
} else {
return Err(self.parse_error(
"Expected identifier or number after ::$ in JSON path",
));
};
expr = Expression::Function(Box::new(Function::new(
"JSON_EXTRACT_STRING".to_string(),
vec![expr, Expression::string(&path_key)],
)));
} else if self.match_token(TokenType::DColonPercent) {
// ::%key -> JSON_EXTRACT_DOUBLE(expr, 'key')
let path_key =
if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
self.advance().text
} else if self.check(TokenType::Number) {
self.advance().text
} else {
return Err(self.parse_error(
"Expected identifier or number after ::% in JSON path",
));
};
expr = Expression::Function(Box::new(Function::new(
"JSON_EXTRACT_DOUBLE".to_string(),
vec![expr, Expression::string(&path_key)],
)));
} else if self.match_token(TokenType::DColonQMark) {
// ::?key -> SingleStoreJsonPathQMark function (for JSON_MATCH_ANY patterns)
let path_key =
if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
self.advance().text
} else if self.check(TokenType::Number) {
self.advance().text
} else {
return Err(self.parse_error(
"Expected identifier or number after ::? in JSON path",
));
};
// Use a special function name that SingleStore generator will recognize
expr = Expression::Function(Box::new(Function::new(
"__SS_JSON_PATH_QMARK__".to_string(),
vec![expr, Expression::string(&path_key)],
)));
}
} else {
// PostgreSQL :: cast operator: expr::type
self.skip(); // consume DColon
// Use parse_data_type_for_cast to avoid consuming subscripts as array dimensions
let data_type = self.parse_data_type_for_cast()?;
expr = Expression::Cast(Box::new(Cast {
this: expr,
to: data_type,
trailing_comments: Vec::new(),
double_colon_syntax: true,
format: None,
default: None,
inferred_type: None,
}));
}
} else if self.match_token(TokenType::ColonGt) {
// SingleStore :> cast operator: expr :> type
let data_type = self.parse_data_type_for_cast()?;
expr = Expression::Cast(Box::new(Cast {
this: expr,
to: data_type,
trailing_comments: Vec::new(),
double_colon_syntax: false, // Use :> syntax in generator
format: None,
default: None,
inferred_type: None,
}));
} else if self.match_token(TokenType::NColonGt) {
// SingleStore !:> try cast operator: expr !:> type
let data_type = self.parse_data_type_for_cast()?;
expr = Expression::TryCast(Box::new(Cast {
this: expr,
to: data_type,
trailing_comments: Vec::new(),
double_colon_syntax: false,
format: None,
default: None,
inferred_type: None,
}));
} else if self.match_token(TokenType::QDColon) {
// Databricks ?:: try cast operator: expr?::type
let data_type = self.parse_data_type_for_cast()?;
expr = Expression::TryCast(Box::new(Cast {
this: expr,
to: data_type,
trailing_comments: Vec::new(),
double_colon_syntax: true, // Uses :: style syntax
format: None,
default: None,
inferred_type: None,
}));
} else if self.check(TokenType::Arrow)
&& !matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
)
{
self.skip(); // consume ->
// JSON extract operator: expr -> path (PostgreSQL, MySQL, DuckDB)
// Use parse_json_path_operand to get only the immediate operand for proper left-to-right associativity
let path = self.parse_json_path_operand()?;
expr = Expression::JsonExtract(Box::new(JsonExtractFunc {
this: expr,
path,
returning: None,
arrow_syntax: true,
hash_arrow_syntax: false,
wrapper_option: None,
quotes_option: None,
on_scalar_string: false,
on_error: None,
}));
} else if self.match_token(TokenType::DArrow) {
// JSON extract text operator: expr ->> path (PostgreSQL, MySQL, DuckDB)
// Use parse_json_path_operand to get only the immediate operand for proper left-to-right associativity
let path = self.parse_json_path_operand()?;
expr = Expression::JsonExtractScalar(Box::new(JsonExtractFunc {
this: expr,
path,
returning: None,
arrow_syntax: true,
hash_arrow_syntax: false,
wrapper_option: None,
quotes_option: None,
on_scalar_string: false,
on_error: None,
}));
} else if self.match_token(TokenType::HashArrow) {
// JSONB path extract: expr #> path (PostgreSQL)
// Use parse_json_path_operand to get only the immediate operand for proper left-to-right associativity
let path = self.parse_json_path_operand()?;
expr = Expression::JsonExtractPath(Box::new(JsonPathFunc {
this: expr,
paths: vec![path],
}));
} else if self.match_token(TokenType::DHashArrow) {
// JSONB path extract text: expr #>> path (PostgreSQL)
// For now, use JsonExtractScalar since the result is text
// Use parse_json_path_operand to get only the immediate operand for proper left-to-right associativity
let path = self.parse_json_path_operand()?;
expr = Expression::JsonExtractScalar(Box::new(JsonExtractFunc {
this: expr,
path,
returning: None,
arrow_syntax: false, // This is #>> not ->>
hash_arrow_syntax: true, // Mark as #>> operator
wrapper_option: None,
quotes_option: None,
on_scalar_string: false,
on_error: None,
}));
} else if self.check_join_marker() {
// Oracle/Redshift-style outer join marker: column (+)
// Only applies to Column expressions
if let Expression::Column(col) = &mut expr {
self.skip(); // consume (
self.skip(); // consume +
self.skip(); // consume )
col.join_mark = true;
// Don't continue - join marker is terminal (no more postfix ops after it)
break;
}
// If not a Column, just break - the marker is invalid in this context
else {
break;
}
} else {
break;
}
}
Ok(expr)
}
/// Check if the next tokens are the Oracle-style join marker (+)
fn check_join_marker(&self) -> bool {
self.check(TokenType::LParen)
&& self
.peek_nth(1)
.map_or(false, |t| t.token_type == TokenType::Plus)
&& self
.peek_nth(2)
.map_or(false, |t| t.token_type == TokenType::RParen)
}
/// Parse OVER clause
fn parse_over_clause(&mut self) -> Result<Over> {
// Handle OVER window_name (without parentheses)
if !self.check(TokenType::LParen) {
// OVER window_name - just a named window reference
let window_name = self.expect_identifier_or_keyword()?;
return Ok(Over {
window_name: Some(Identifier::new(window_name)),
partition_by: Vec::new(),
order_by: Vec::new(),
frame: None,
alias: None,
});
}
self.expect(TokenType::LParen)?;
// Check for named window reference at start of OVER clause
// e.g., OVER (w ORDER BY y) - w is a window name that can be extended
let window_name = if (self.check(TokenType::Identifier)
|| self.check(TokenType::Var)
|| self.check_keyword())
&& !self.check(TokenType::Partition)
&& !self.check(TokenType::Order)
&& !self.check(TokenType::Rows)
&& !self.check(TokenType::Range)
&& !self.check(TokenType::Groups)
&& !self.check(TokenType::Distribute)
&& !self.check(TokenType::Sort)
{
// Look ahead to see if next token indicates this is a window name
let pos = self.current;
let name = self.advance().text;
// If next token is a keyword that can follow a window name, this is a named reference
if self.check(TokenType::Order)
|| self.check(TokenType::Partition)
|| self.check(TokenType::Rows)
|| self.check(TokenType::Range)
|| self.check(TokenType::Groups)
|| self.check(TokenType::RParen)
|| self.check(TokenType::Distribute)
|| self.check(TokenType::Sort)
{
Some(Identifier::new(name))
} else {
// Not a named window, restore position
self.current = pos;
None
}
} else {
None
};
// Parse PARTITION BY or DISTRIBUTE BY (Hive uses DISTRIBUTE BY in window specs)
let partition_by = if self.match_keywords(&[TokenType::Partition, TokenType::By]) {
self.parse_expression_list()?
} else if self.match_keywords(&[TokenType::Distribute, TokenType::By]) {
// Hive: DISTRIBUTE BY is equivalent to PARTITION BY in window specs
self.parse_expression_list()?
} else {
Vec::new()
};
// Parse ORDER BY or SORT BY (Hive uses SORT BY in window specs)
let order_by = if self.match_keywords(&[TokenType::Order, TokenType::By])
|| self.match_keywords(&[TokenType::Sort, TokenType::By])
{
let mut exprs = Vec::new();
loop {
let expr = self.parse_expression()?;
let (desc, explicit_asc) = if self.match_token(TokenType::Desc) {
(true, false)
} else if self.match_token(TokenType::Asc) {
(false, true)
} else {
(false, false)
};
// ClickHouse/SQL: COLLATE 'collation' in window ORDER BY
if self.match_token(TokenType::Collate) {
// Consume collation name (string or identifier)
if self.check(TokenType::String) {
self.skip();
} else if self.check(TokenType::QuotedIdentifier) {
self.skip();
} else {
let _ = self.expect_identifier_or_keyword();
}
}
let nulls_first = if self.match_token(TokenType::Nulls) {
if self.match_token(TokenType::First) {
Some(true)
} else if self.match_token(TokenType::Last) {
Some(false)
} else {
return Err(self.parse_error("Expected FIRST or LAST after NULLS"));
}
} else {
None
};
// ClickHouse: WITH FILL in window ORDER BY
let with_fill = if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::With)
&& self.current + 1 < self.tokens.len()
&& self.tokens[self.current + 1]
.text
.eq_ignore_ascii_case("FILL")
{
self.skip(); // consume WITH
self.skip(); // consume FILL
let from_ = if self.match_token(TokenType::From) {
Some(Box::new(self.parse_or()?))
} else {
None
};
let to = if self.match_text_seq(&["TO"]) {
Some(Box::new(self.parse_or()?))
} else {
None
};
let step = if self.match_text_seq(&["STEP"]) {
Some(Box::new(self.parse_or()?))
} else {
None
};
let staleness = if self.match_text_seq(&["STALENESS"]) {
Some(Box::new(self.parse_or()?))
} else {
None
};
let interpolate = if self.match_text_seq(&["INTERPOLATE"]) {
if self.match_token(TokenType::LParen) {
let items = self.parse_expression_list()?;
self.expect(TokenType::RParen)?;
if items.len() == 1 {
Some(Box::new(items.into_iter().next().unwrap()))
} else {
Some(Box::new(Expression::Tuple(Box::new(
crate::expressions::Tuple { expressions: items },
))))
}
} else {
None
}
} else {
None
};
Some(Box::new(WithFill {
from_,
to,
step,
staleness,
interpolate,
}))
} else {
None
};
exprs.push(Ordered {
this: expr,
desc,
nulls_first,
explicit_asc,
with_fill,
});
if !self.match_token(TokenType::Comma) {
break;
}
}
exprs
} else {
Vec::new()
};
// Parse window frame
let frame = self.parse_window_frame()?;
self.expect(TokenType::RParen)?;
Ok(Over {
window_name,
partition_by,
order_by,
frame,
alias: None,
})
}
/// Parse window frame specification (ROWS/RANGE/GROUPS BETWEEN ...)
fn parse_window_frame(&mut self) -> Result<Option<WindowFrame>> {
let (kind, kind_text) = if self.match_token(TokenType::Rows) {
(
WindowFrameKind::Rows,
self.tokens[self.current - 1].text.clone(),
)
} else if self.match_token(TokenType::Range) {
(
WindowFrameKind::Range,
self.tokens[self.current - 1].text.clone(),
)
} else if self.match_token(TokenType::Groups) {
(
WindowFrameKind::Groups,
self.tokens[self.current - 1].text.clone(),
)
} else {
return Ok(None);
};
// Parse BETWEEN or single bound
let (start, start_side_text, end, end_side_text) = if self.match_token(TokenType::Between) {
let (start, st) = self.parse_window_frame_bound()?;
self.expect(TokenType::And)?;
let (end, et) = self.parse_window_frame_bound()?;
(start, st, Some(end), et)
} else {
let (start, st) = self.parse_window_frame_bound()?;
(start, st, None, None)
};
// Parse optional EXCLUDE clause
let exclude = if self.match_token(TokenType::Exclude) {
if self.match_token(TokenType::Current) {
self.expect(TokenType::Row)?;
Some(WindowFrameExclude::CurrentRow)
} else if self.match_token(TokenType::Group) {
Some(WindowFrameExclude::Group)
} else if self.match_token(TokenType::Ties) {
Some(WindowFrameExclude::Ties)
} else if self.match_token(TokenType::No) {
self.expect(TokenType::Others)?;
Some(WindowFrameExclude::NoOthers)
} else {
return Err(self
.parse_error("Expected CURRENT ROW, GROUP, TIES, or NO OTHERS after EXCLUDE"));
}
} else {
None
};
Ok(Some(WindowFrame {
kind,
start,
end,
exclude,
kind_text: Some(kind_text),
start_side_text,
end_side_text,
}))
}
/// Parse a window frame bound, returning the bound and the original text of the side keyword
fn parse_window_frame_bound(&mut self) -> Result<(WindowFrameBound, Option<String>)> {
if self.match_token(TokenType::Current) {
self.expect(TokenType::Row)?;
Ok((WindowFrameBound::CurrentRow, None))
} else if self.match_token(TokenType::Unbounded) {
if self.match_token(TokenType::Preceding) {
let text = self.tokens[self.current - 1].text.clone();
Ok((WindowFrameBound::UnboundedPreceding, Some(text)))
} else if self.match_token(TokenType::Following) {
let text = self.tokens[self.current - 1].text.clone();
Ok((WindowFrameBound::UnboundedFollowing, Some(text)))
} else {
Err(self.parse_error("Expected PRECEDING or FOLLOWING after UNBOUNDED"))
}
} else if self.match_token(TokenType::Preceding) {
let text = self.tokens[self.current - 1].text.clone();
// PRECEDING [value] (inverted syntax for some dialects)
// If no value follows (e.g., just "PRECEDING" or "PRECEDING)"), use BarePreceding
if self.check(TokenType::RParen) || self.check(TokenType::Comma) {
Ok((WindowFrameBound::BarePreceding, Some(text)))
} else {
let expr = self.parse_primary()?;
Ok((WindowFrameBound::Preceding(Box::new(expr)), Some(text)))
}
} else if self.match_token(TokenType::Following) {
let text = self.tokens[self.current - 1].text.clone();
// FOLLOWING [value] (inverted syntax for some dialects)
// If no value follows (e.g., just "FOLLOWING" or "FOLLOWING)"), use BareFollowing
if self.check(TokenType::RParen) || self.check(TokenType::Comma) {
Ok((WindowFrameBound::BareFollowing, Some(text)))
} else {
let expr = self.parse_primary()?;
Ok((WindowFrameBound::Following(Box::new(expr)), Some(text)))
}
} else {
// <expr> PRECEDING | FOLLOWING (standard syntax)
// Use parse_addition to handle expressions like 1 + 1 PRECEDING
let expr = self.parse_addition()?;
if self.match_token(TokenType::Preceding) {
let text = self.tokens[self.current - 1].text.clone();
Ok((WindowFrameBound::Preceding(Box::new(expr)), Some(text)))
} else if self.match_token(TokenType::Following) {
let text = self.tokens[self.current - 1].text.clone();
Ok((WindowFrameBound::Following(Box::new(expr)), Some(text)))
} else {
// Bare numeric bounds without PRECEDING/FOLLOWING
// (e.g., RANGE BETWEEN 1 AND 3)
Ok((WindowFrameBound::Value(Box::new(expr)), None))
}
}
}
/// Try to parse INTERVAL expression. Returns None if INTERVAL should be treated as identifier.
fn try_parse_interval(&mut self) -> Result<Option<Expression>> {
self.try_parse_interval_internal(true)
}
/// Internal interval parsing that optionally matches the INTERVAL keyword.
/// When match_interval is false, it parses a chained interval value-unit pair
/// without requiring the INTERVAL keyword.
fn try_parse_interval_internal(&mut self, match_interval: bool) -> Result<Option<Expression>> {
let start_pos = self.current;
// Consume the INTERVAL keyword if required
if match_interval {
if !self.check(TokenType::Interval) {
return Ok(None);
}
self.expect(TokenType::Interval)?;
// Check if next token is an operator - if so, INTERVAL is used as identifier
if self.check(TokenType::Eq)
|| self.check(TokenType::Neq)
|| self.check(TokenType::Lt)
|| self.check(TokenType::Gt)
|| self.check(TokenType::Lte)
|| self.check(TokenType::Gte)
|| self.check(TokenType::And)
|| self.check(TokenType::Or)
|| self.check(TokenType::Is)
|| self.check(TokenType::In)
|| self.check(TokenType::Like)
|| self.check(TokenType::ILike)
|| self.check(TokenType::Between)
|| self.check(TokenType::Then)
|| self.check(TokenType::Else)
|| self.check(TokenType::When)
|| self.check(TokenType::End)
|| self.check(TokenType::Comma)
|| self.check(TokenType::RParen)
|| self.check(TokenType::DColon)
{
// INTERVAL is used as identifier
self.current = start_pos;
return Ok(None);
}
}
// Parse the value after INTERVAL
// IMPORTANT: For string literals, don't use parse_primary() because it calls
// maybe_parse_subscript() which would consume postfix operators like ::TYPE.
// Those should be applied to the full INTERVAL expression, not just the value inside.
// e.g., INTERVAL '1 hour'::VARCHAR should be CAST(INTERVAL '1 hour' AS VARCHAR)
// not INTERVAL CAST('1 hour' AS VARCHAR)
// For non-string values, use parse_addition() to handle expressions like
// INTERVAL 2 * 2 MONTH or INTERVAL DAYOFMONTH(dt) - 1 DAY (MySQL syntax)
// This matches Python sqlglot's _parse_term() behavior which handles +, -, *, /, %
let value = if self.check(TokenType::String) {
let token = self.advance();
Some(Expression::Literal(Box::new(Literal::String(token.text))))
} else if !self.is_at_end() && !self.is_statement_terminator() {
Some(self.parse_addition()?)
} else {
None
};
// Check if we should treat INTERVAL as an identifier instead
// This happens when:
// - No value was parsed, OR
// - Value is an unqualified, unquoted column reference AND
// what follows is NOT a valid interval unit
if let Some(ref val) = value {
if let Expression::Column(col) = val {
// Column without table qualifier
if col.table.is_none() {
// Check if identifier is quoted
let is_quoted = col.name.quoted;
if !is_quoted {
// Check if next token is a valid interval unit
if !self.is_valid_interval_unit() && !self.check(TokenType::As) {
// Backtrack - INTERVAL is used as identifier
self.current = start_pos;
return Ok(None);
}
}
}
} else if let Expression::Identifier(id) = val {
// Bare identifier without table qualifier
let is_quoted = id.quoted;
if !is_quoted {
// Check if next token is a valid interval unit
if !self.is_valid_interval_unit() && !self.check(TokenType::As) {
// Backtrack - INTERVAL is used as identifier
self.current = start_pos;
return Ok(None);
}
}
}
} else if self.is_at_end() || self.is_statement_terminator() {
// No value, and at end/terminator - INTERVAL is an identifier
self.current = start_pos;
return Ok(None);
}
// Now parse the optional unit
let mut unit = self.try_parse_interval_unit()?;
// Split compound interval strings like '1 day' into value '1' and unit DAY
// This matches Python sqlglot's INTERVAL_STRING_RE behavior
// Only apply in generic mode -- dialects like PostgreSQL preserve compound strings
let is_generic = self.config.dialect.is_none()
|| matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Generic)
);
let value = if unit.is_none() && is_generic {
if let Some(Expression::Literal(ref lit)) = value {
if let Literal::String(ref s) = lit.as_ref() {
let trimmed = s.trim();
// Match pattern: optional negative sign, digits (optional decimal), space(s), alpha unit
let mut split_pos = None;
let mut found_space = false;
let bytes = trimmed.as_bytes();
let mut i = 0;
// Skip optional negative sign
if i < bytes.len() && bytes[i] == b'-' {
i += 1;
}
// Expect digits
let digit_start = i;
while i < bytes.len() && bytes[i].is_ascii_digit() {
i += 1;
}
if i > digit_start {
// Optional decimal part
if i < bytes.len() && bytes[i] == b'.' {
i += 1;
while i < bytes.len() && bytes[i].is_ascii_digit() {
i += 1;
}
}
// Expect whitespace
let space_start = i;
while i < bytes.len() && bytes[i].is_ascii_whitespace() {
i += 1;
}
if i > space_start {
found_space = true;
split_pos = Some(i);
}
}
if found_space {
if let Some(pos) = split_pos {
let unit_text = &trimmed[pos..];
// Verify it's all alpha
if !unit_text.is_empty()
&& unit_text.chars().all(|c| c.is_ascii_alphabetic())
{
let num_part = trimmed[..pos].trim_end().to_string();
let unit_upper = unit_text.to_ascii_uppercase();
// Try to parse as interval unit
if let Some(parsed_unit) =
Self::parse_interval_unit_from_string(&unit_upper)
{
// Check if the original text had an 'S' suffix (plural)
let is_plural = unit_upper.ends_with('S');
unit = Some(IntervalUnitSpec::Simple {
unit: parsed_unit,
use_plural: is_plural,
});
Some(Expression::Literal(Box::new(Literal::String(num_part))))
} else {
value
}
} else {
value
}
} else {
value
}
} else {
value
}
} else {
None
}
} else {
value
}
} else {
value
};
// Convert number literals to string literals in intervals (canonical form).
// Most dialects support INTERVAL '5' DAY, so we normalize to this form
// for easier transpilation. This matches Python sqlglot's behavior in
// _parse_interval_span: "if this and this.is_number: this = exp.Literal.string(this.to_py())"
let value = match value {
Some(Expression::Literal(lit))
if unit.is_some() && matches!(lit.as_ref(), Literal::Number(_)) =>
{
let Literal::Number(n) = lit.as_ref() else {
unreachable!()
};
Some(Expression::Literal(Box::new(Literal::String(n.clone()))))
}
other => other,
};
let interval = Expression::Interval(Box::new(Interval { this: value, unit }));
// Support for chained multi-unit interval syntax (Spark/Hive):
// INTERVAL '5' HOURS '30' MINUTES -> INTERVAL '5' HOURS + INTERVAL '30' MINUTES
// This is done by optionally matching a PLUS sign, and if followed by
// another string or number (without INTERVAL keyword), recursively parsing
// and creating an Add expression.
let before_plus = self.current;
let has_plus = self.match_token(TokenType::Plus);
// Check if followed by a STRING or NUMBER (potential chained interval)
if self.check(TokenType::String) || self.check(TokenType::Number) {
// Recursively parse the chained interval without the INTERVAL keyword
if let Some(next_interval) = self.try_parse_interval_internal(false)? {
return Ok(Some(Expression::Add(Box::new(BinaryOp::new(
interval,
next_interval,
)))));
}
}
// If we consumed a PLUS but didn't find a chained interval, backtrack
if has_plus {
self.current = before_plus;
}
Ok(Some(interval))
}
/// Check if current token is a valid interval unit
fn is_valid_interval_unit(&self) -> bool {
if self.is_at_end() {
return false;
}
let text = self.peek().text.to_ascii_uppercase();
matches!(
text.as_str(),
"YEAR"
| "YEARS"
| "MONTH"
| "MONTHS"
| "DAY"
| "DAYS"
| "HOUR"
| "HOURS"
| "MINUTE"
| "MINUTES"
| "SECOND"
| "SECONDS"
| "MILLISECOND"
| "MILLISECONDS"
| "MICROSECOND"
| "MICROSECONDS"
| "NANOSECOND"
| "NANOSECONDS"
| "WEEK"
| "WEEKS"
| "QUARTER"
| "QUARTERS"
)
}
/// Check if current token terminates a statement/expression context
fn is_statement_terminator(&self) -> bool {
if self.is_at_end() {
return true;
}
matches!(
self.peek().token_type,
TokenType::Semicolon
| TokenType::RParen
| TokenType::RBracket
| TokenType::Comma
| TokenType::From
| TokenType::Where
| TokenType::GroupBy
| TokenType::Having
| TokenType::OrderBy
| TokenType::Limit
| TokenType::Union
| TokenType::Intersect
| TokenType::Except
| TokenType::End
| TokenType::Then
| TokenType::Else
| TokenType::When
)
}
/// Try to parse interval unit - returns None if no unit present
fn try_parse_interval_unit(&mut self) -> Result<Option<IntervalUnitSpec>> {
// First, check if there's a function (like CURRENT_DATE, CAST(...))
if self.is_function_start() {
let func = self.parse_primary()?;
return Ok(Some(IntervalUnitSpec::Expr(Box::new(func))));
}
// Try to parse a simple unit or span
if let Some((unit, use_plural)) = self.try_parse_simple_interval_unit()? {
// Check for "TO" to make it a span (e.g., YEAR TO MONTH)
// Use lookahead to avoid consuming TO when it's part of WITH FILL
if self.check_keyword_text("TO") {
let saved = self.current;
self.skip(); // consume TO
if let Some((end_unit, _)) = self.try_parse_simple_interval_unit()? {
return Ok(Some(IntervalUnitSpec::Span(IntervalSpan {
this: unit,
expression: end_unit,
})));
} else {
// Not followed by a valid interval unit — backtrack
self.current = saved;
}
}
return Ok(Some(IntervalUnitSpec::Simple { unit, use_plural }));
}
// No unit found
Ok(None)
}
/// Parse an interval unit from a string (used for splitting compound interval strings)
fn parse_interval_unit_from_string(s: &str) -> Option<IntervalUnit> {
// Strip trailing 'S' for plural forms
let base = if s.ends_with('S') && s.len() > 1 {
&s[..s.len() - 1]
} else {
s
};
match base {
"YEAR" => Some(IntervalUnit::Year),
"MONTH" => Some(IntervalUnit::Month),
"DAY" => Some(IntervalUnit::Day),
"HOUR" => Some(IntervalUnit::Hour),
"MINUTE" => Some(IntervalUnit::Minute),
"SECOND" => Some(IntervalUnit::Second),
"MILLISECOND" => Some(IntervalUnit::Millisecond),
"MICROSECOND" => Some(IntervalUnit::Microsecond),
"QUARTER" => Some(IntervalUnit::Quarter),
"WEEK" => Some(IntervalUnit::Week),
_ => None,
}
}
/// Try to parse a simple interval unit (YEAR, MONTH, etc.) - returns (unit, is_plural)
fn try_parse_simple_interval_unit(&mut self) -> Result<Option<(IntervalUnit, bool)>> {
if self.is_at_end() {
return Ok(None);
}
let text_upper = self.peek().text.to_ascii_uppercase();
let result = match text_upper.as_str() {
"YEAR" => Some((IntervalUnit::Year, false)),
"YEARS" => Some((IntervalUnit::Year, true)),
"MONTH" => Some((IntervalUnit::Month, false)),
"MONTHS" => Some((IntervalUnit::Month, true)),
"DAY" => Some((IntervalUnit::Day, false)),
"DAYS" => Some((IntervalUnit::Day, true)),
"HOUR" => Some((IntervalUnit::Hour, false)),
"HOURS" => Some((IntervalUnit::Hour, true)),
"MINUTE" => Some((IntervalUnit::Minute, false)),
"MINUTES" => Some((IntervalUnit::Minute, true)),
"SECOND" => Some((IntervalUnit::Second, false)),
"SECONDS" => Some((IntervalUnit::Second, true)),
"MILLISECOND" => Some((IntervalUnit::Millisecond, false)),
"MILLISECONDS" => Some((IntervalUnit::Millisecond, true)),
"MICROSECOND" => Some((IntervalUnit::Microsecond, false)),
"MICROSECONDS" => Some((IntervalUnit::Microsecond, true)),
"NANOSECOND" => Some((IntervalUnit::Nanosecond, false)),
"NANOSECONDS" => Some((IntervalUnit::Nanosecond, true)),
"QUARTER" => Some((IntervalUnit::Quarter, false)),
"QUARTERS" => Some((IntervalUnit::Quarter, true)),
"WEEK" => Some((IntervalUnit::Week, false)),
"WEEKS" => Some((IntervalUnit::Week, true)),
_ => None,
};
if result.is_some() {
self.skip(); // consume the unit token
}
Ok(result)
}
/// Check if current position starts a function call or no-paren function
fn is_function_start(&self) -> bool {
if self.is_at_end() {
return false;
}
let token_type = self.peek().token_type;
// Check NO_PAREN_FUNCTIONS configuration map
if NO_PAREN_FUNCTIONS.contains(&token_type) {
if !matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) || token_type != TokenType::CurrentTimestamp
{
return true;
}
}
// Cast functions are always functions
if matches!(
token_type,
TokenType::Cast | TokenType::TryCast | TokenType::SafeCast
) {
return true;
}
// Check NO_PAREN_FUNCTION_NAMES for string-based lookup
// (handles cases where functions are tokenized as Var/Identifier)
let text_upper = self.peek().text.to_ascii_uppercase();
if crate::function_registry::is_no_paren_function_name_upper(text_upper.as_str()) {
if !matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) || text_upper.as_str() != "CURRENT_TIMESTAMP"
{
return true;
}
}
// Identifier followed by left paren (function call)
if self.is_identifier_token() && self.check_next(TokenType::LParen) {
return true;
}
false
}
/// Try to parse Oracle interval span after an expression.
/// Syntax: (expr) DAY[(precision)] TO SECOND[(fractional_precision)]
/// This is used in Oracle for interval expressions like:
/// (SYSTIMESTAMP - order_date) DAY(9) TO SECOND(3)
fn try_parse_oracle_interval_span(&mut self, expr: Expression) -> Result<Expression> {
let start_pos = self.current;
// Check if current token is an interval unit keyword (DAY, HOUR, MINUTE, SECOND, YEAR, MONTH)
let start_unit_name = if !self.is_at_end() {
let text = self.peek().text.to_ascii_uppercase();
if matches!(
text.as_str(),
"DAY" | "HOUR" | "MINUTE" | "SECOND" | "YEAR" | "MONTH"
) {
Some(text)
} else {
None
}
} else {
None
};
if start_unit_name.is_none() {
return Ok(expr);
}
let start_unit_name = start_unit_name.unwrap();
self.skip(); // consume the unit keyword
// Parse optional precision: DAY(9) or just DAY
let start_unit = if self.match_token(TokenType::LParen) {
// Parse precision
let precision = self.parse_expression()?;
self.expect(TokenType::RParen)?;
// Create a function-like expression for the unit with precision
Expression::Anonymous(Box::new(Anonymous {
this: Box::new(Expression::Identifier(Identifier {
name: start_unit_name.clone(),
quoted: false,
trailing_comments: Vec::new(),
span: None,
})),
expressions: vec![precision],
}))
} else {
// Simple unit without precision
Expression::Var(Box::new(Var {
this: start_unit_name,
}))
};
// Check for TO keyword
if !self.match_keyword("TO") {
// Not an interval span, backtrack
self.current = start_pos;
return Ok(expr);
}
// Parse end unit
let end_unit_name = if !self.is_at_end() {
let text = self.peek().text.to_ascii_uppercase();
if matches!(
text.as_str(),
"DAY" | "HOUR" | "MINUTE" | "SECOND" | "YEAR" | "MONTH"
) {
Some(text)
} else {
None
}
} else {
None
};
let end_unit_name = match end_unit_name {
Some(name) => name,
None => {
// No valid end unit, backtrack
self.current = start_pos;
return Ok(expr);
}
};
self.skip(); // consume the end unit keyword
// Parse optional precision for end unit: SECOND(3) or just SECOND
let end_unit = if self.match_token(TokenType::LParen) {
// Parse fractional precision
let precision = self.parse_expression()?;
self.expect(TokenType::RParen)?;
// Create a function-like expression for the unit with precision
Expression::Anonymous(Box::new(Anonymous {
this: Box::new(Expression::Identifier(Identifier {
name: end_unit_name.clone(),
quoted: false,
trailing_comments: Vec::new(),
span: None,
})),
expressions: vec![precision],
}))
} else {
// Simple unit without precision
Expression::Var(Box::new(Var {
this: end_unit_name,
}))
};
// Create an Interval expression with ExprSpan unit
Ok(Expression::Interval(Box::new(Interval {
this: Some(expr),
unit: Some(IntervalUnitSpec::ExprSpan(IntervalSpanExpr {
this: Box::new(start_unit),
expression: Box::new(end_unit),
})),
})))
}
/// Check if the current position starts a typed column list (for table function aliases)
/// like: (col1 type1, col2 type2)
/// This peeks ahead to see if the first column name is followed by a type token,
/// rather than a comma or closing paren (which would indicate simple column aliases).
/// Used for PostgreSQL functions like JSON_TO_RECORDSET that have typed column definitions.
fn check_typed_column_list(&self) -> bool {
// We're positioned after '(' - check pattern: identifier type
// If we see identifier followed by something that's not ',' or ')', it's typed
if self.is_at_end() {
return false;
}
// Check if current is an identifier (column name)
let has_identifier = self.check(TokenType::Identifier)
|| self.check(TokenType::QuotedIdentifier)
|| self.check(TokenType::Var);
if !has_identifier {
return false;
}
// Look at next token (after the identifier)
let next_pos = self.current + 1;
if next_pos >= self.tokens.len() {
return false;
}
let next_token = &self.tokens[next_pos];
// If next token is comma or rparen, it's simple column aliases
if next_token.token_type == TokenType::Comma || next_token.token_type == TokenType::RParen {
return false;
}
// If next token could be a type name (identifier, var, or type keyword), it's typed columns
// Check for type tokens or identifiers that could be type names
TYPE_TOKENS.contains(&next_token.token_type)
|| next_token.token_type == TokenType::Identifier
|| next_token.token_type == TokenType::Var
}
/// Check if current token is a no-paren function
fn is_no_paren_function(&self) -> bool {
if self.is_at_end() {
return false;
}
let token_type = self.peek().token_type;
if NO_PAREN_FUNCTIONS.contains(&token_type) {
if !matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) || token_type != TokenType::CurrentTimestamp
{
return true;
}
}
let text_upper = self.peek().text.to_ascii_uppercase();
if crate::function_registry::is_no_paren_function_name_upper(text_upper.as_str()) {
if !matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) || text_upper.as_str() != "CURRENT_TIMESTAMP"
{
return true;
}
}
false
}
/// Match a keyword by text (case-insensitive)
fn match_keyword(&mut self, keyword: &str) -> bool {
if self.is_at_end() {
return false;
}
if self.peek().text.eq_ignore_ascii_case(keyword) {
self.skip();
true
} else {
false
}
}
/// Match a sequence of keywords by text (case-insensitive)
fn match_text_seq(&mut self, keywords: &[&str]) -> bool {
for (i, &kw) in keywords.iter().enumerate() {
if self.current + i >= self.tokens.len() {
return false;
}
if !self.tokens[self.current + i].text.eq_ignore_ascii_case(kw) {
return false;
}
}
self.current += keywords.len();
true
}
/// Check (without consuming) if the next tokens match a sequence of keywords by text (case-insensitive)
fn check_text_seq(&self, keywords: &[&str]) -> bool {
for (i, &kw) in keywords.iter().enumerate() {
if self.current + i >= self.tokens.len() {
return false;
}
if !self.tokens[self.current + i].text.eq_ignore_ascii_case(kw) {
return false;
}
}
true
}
/// Match any of the given texts (case-insensitive)
fn match_texts(&mut self, texts: &[&str]) -> bool {
if self.is_at_end() {
return false;
}
for text in texts {
if self.peek().text.eq_ignore_ascii_case(text) {
self.skip();
return true;
}
}
false
}
/// Parse CASE expression
fn parse_case(&mut self) -> Result<Expression> {
self.expect(TokenType::Case)?;
// Capture trailing comments from the CASE keyword (e.g., CASE /* test */ WHEN ...)
let case_comments = self.previous_trailing_comments().to_vec();
// Check for simple CASE (CASE expr WHEN ...)
let operand = if !self.check(TokenType::When) {
Some(self.parse_expression()?)
} else {
None
};
let mut whens = Vec::new();
while self.match_token(TokenType::When) {
let condition = self.parse_expression()?;
self.expect(TokenType::Then)?;
let mut result = self.parse_expression()?;
// ClickHouse: CASE WHEN x THEN 1 as alias WHEN y THEN alias / 2 END
// Aliases can appear in CASE THEN expressions
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.match_token(TokenType::As)
{
let alias = self.expect_identifier_or_keyword()?;
result = Expression::Alias(Box::new(Alias {
this: result,
alias: Identifier::new(alias),
column_aliases: Vec::new(),
pre_alias_comments: Vec::new(),
trailing_comments: Vec::new(),
inferred_type: None,
}));
}
whens.push((condition, result));
}
let else_ = if self.match_token(TokenType::Else) {
Some(self.parse_expression()?)
} else {
None
};
self.expect(TokenType::End)?;
Ok(Expression::Case(Box::new(Case {
operand,
whens,
else_,
comments: case_comments,
inferred_type: None,
})))
}
/// Parse CAST expression
fn parse_cast(&mut self) -> Result<Expression> {
self.expect(TokenType::Cast)?;
self.expect(TokenType::LParen)?;
// Use parse_or() instead of parse_expression() to avoid consuming AS
// as an alias (e.g. CAST((1, 2) AS Tuple(a Int8, b Int16)))
// Python sqlglot uses _parse_disjunction() here, which is equivalent.
let expr = self.parse_or()?;
// ClickHouse: ternary operator inside CAST: CAST(cond ? true_val : false_val AS Type)
let expr = if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.match_token(TokenType::Parameter)
{
if self.check(TokenType::Colon) {
return Err(
self.parse_error("Expected true expression after ? in ClickHouse ternary")
);
}
let true_value = self.parse_or()?;
let false_value = if self.match_token(TokenType::Colon) {
self.parse_or()?
} else {
Expression::Null(Null)
};
Expression::IfFunc(Box::new(IfFunc {
original_name: None,
condition: expr,
true_value,
false_value: Some(false_value),
inferred_type: None,
}))
} else {
expr
};
// ClickHouse: implicit alias in CAST: cast('1234' lhs AS UInt32) or cast('1234' lhs, 'UInt32')
let expr = self.try_clickhouse_implicit_alias(expr);
// ClickHouse: CAST(expr, 'type_string') or CAST(expr, expression) syntax with comma instead of AS
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.match_token(TokenType::Comma)
{
// Parse as expression to handle concat and other operations: CAST(x, 'Str' || 'ing')
let type_expr = self.parse_expression()?;
// ClickHouse: alias on type expr: cast('1234' lhs, 'UInt32' rhs) or cast('1234', 'UInt32' AS rhs)
let type_expr = self.try_clickhouse_func_arg_alias(type_expr);
self.expect(TokenType::RParen)?;
let _trailing_comments = self.previous_trailing_comments().to_vec();
return Ok(Expression::CastToStrType(Box::new(CastToStrType {
this: Box::new(expr),
to: Some(Box::new(type_expr)),
})));
}
self.expect(TokenType::As)?;
// ClickHouse: CAST(expr AS alias AS Type) — inner alias before type
// If the next token is an identifier followed by AS, treat it as an alias
let expr = if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && (self.is_identifier_token() || self.is_safe_keyword_as_identifier())
&& self
.peek_nth(1)
.map_or(false, |t| t.token_type == TokenType::As)
{
let alias = self.expect_identifier_or_keyword_with_quoted()?;
self.expect(TokenType::As)?;
Expression::Alias(Box::new(Alias::new(expr, alias)))
} else if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && (self.is_identifier_token() || self.is_safe_keyword_as_identifier())
&& self
.peek_nth(1)
.map_or(false, |t| t.token_type == TokenType::Comma)
{
// ClickHouse: CAST(expr AS alias, type_string) — alias before comma syntax
let alias = self.expect_identifier_or_keyword_with_quoted()?;
let expr = Expression::Alias(Box::new(Alias::new(expr, alias)));
self.expect(TokenType::Comma)?;
let type_expr = self.parse_expression()?;
let type_expr = self.try_clickhouse_func_arg_alias(type_expr);
self.expect(TokenType::RParen)?;
let _trailing_comments = self.previous_trailing_comments().to_vec();
return Ok(Expression::CastToStrType(Box::new(CastToStrType {
this: Box::new(expr),
to: Some(Box::new(type_expr)),
})));
} else {
expr
};
// Teradata: CAST(x AS FORMAT 'fmt') (no explicit type)
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Teradata)
) && self.match_token(TokenType::Format)
{
let format = Some(Box::new(self.parse_expression()?));
self.expect(TokenType::RParen)?;
let trailing_comments = self.previous_trailing_comments().to_vec();
return Ok(Expression::Cast(Box::new(Cast {
this: expr,
to: DataType::Unknown,
trailing_comments,
double_colon_syntax: false,
format,
default: None,
inferred_type: None,
})));
}
let data_type = self.parse_data_type()?;
// Parse optional DEFAULT ... ON CONVERSION ERROR (Oracle)
// CAST(x AS type DEFAULT val ON CONVERSION ERROR)
let default = if self.match_token(TokenType::Default) {
let default_val = self.parse_primary()?;
// Expect "ON CONVERSION ERROR"
if !self.match_text_seq(&["ON", "CONVERSION", "ERROR"]) {
return Err(self.parse_error("Expected ON CONVERSION ERROR"));
}
Some(Box::new(default_val))
} else {
None
};
// Parse optional FORMAT clause for BigQuery: CAST(x AS STRING FORMAT 'format_string')
// Or for Oracle with comma: CAST(x AS DATE DEFAULT NULL ON CONVERSION ERROR, 'format')
// FORMAT string may be optionally wrapped in parentheses: FORMAT ('YYYY') -> FORMAT 'YYYY'
let format = if self.match_token(TokenType::Format) {
let wrapped = self.match_token(TokenType::LParen);
let fmt_expr = self.parse_primary()?;
if wrapped {
self.expect(TokenType::RParen)?;
}
// Check for AT TIME ZONE after format string
let fmt_with_tz = if self.match_text_seq(&["AT", "TIME", "ZONE"]) {
let zone = self.parse_primary()?;
Expression::AtTimeZone(Box::new(crate::expressions::AtTimeZone {
this: fmt_expr,
zone,
}))
} else {
fmt_expr
};
Some(Box::new(fmt_with_tz))
} else if self.match_token(TokenType::Comma) {
// Oracle date format: CAST(x AS DATE, 'format')
Some(Box::new(self.parse_expression()?))
} else {
None
};
self.expect(TokenType::RParen)?;
let trailing_comments = self.previous_trailing_comments().to_vec();
Ok(Expression::Cast(Box::new(Cast {
this: expr,
to: data_type,
trailing_comments,
double_colon_syntax: false,
format,
default,
inferred_type: None,
})))
}
/// Parse TRY_CAST expression
fn parse_try_cast(&mut self) -> Result<Expression> {
self.expect(TokenType::TryCast)?;
self.expect(TokenType::LParen)?;
let expr = self.parse_or()?;
self.expect(TokenType::As)?;
let data_type = self.parse_data_type()?;
// Parse optional FORMAT clause
let format = if self.match_token(TokenType::Format) {
Some(Box::new(self.parse_expression()?))
} else {
None
};
self.expect(TokenType::RParen)?;
let trailing_comments = self.previous_trailing_comments().to_vec();
Ok(Expression::TryCast(Box::new(Cast {
this: expr,
to: data_type,
trailing_comments,
double_colon_syntax: false,
format,
default: None,
inferred_type: None,
})))
}
/// Parse SAFE_CAST expression (BigQuery)
fn parse_safe_cast(&mut self) -> Result<Expression> {
self.expect(TokenType::SafeCast)?;
self.expect(TokenType::LParen)?;
let expr = self.parse_or()?;
self.expect(TokenType::As)?;
let data_type = self.parse_data_type()?;
// Parse optional FORMAT clause
let format = if self.match_token(TokenType::Format) {
Some(Box::new(self.parse_expression()?))
} else {
None
};
self.expect(TokenType::RParen)?;
let trailing_comments = self.previous_trailing_comments().to_vec();
Ok(Expression::SafeCast(Box::new(Cast {
this: expr,
to: data_type,
trailing_comments,
double_colon_syntax: false,
format,
default: None,
inferred_type: None,
})))
}
/// Parse a data type
fn parse_data_type(&mut self) -> Result<DataType> {
// Handle special token types that represent data type keywords
// Teradata tokenizes ST_GEOMETRY as TokenType::Geometry
if self.check(TokenType::Geometry) {
let _token = self.advance();
let (subtype, srid) = self.parse_spatial_type_args()?;
return Ok(DataType::Geometry { subtype, srid });
}
// Data types can be keywords (DATE, TIMESTAMP, etc.) or identifiers
let mut raw_name = self.expect_identifier_or_keyword()?;
// Allow dotted custom types like SYSUDTLIB.INT
while self.match_token(TokenType::Dot) {
let part = self.expect_identifier_or_keyword()?;
raw_name.push('.');
raw_name.push_str(&part);
}
let mut name = raw_name.to_ascii_uppercase();
// SQL standard: NATIONAL CHAR/CHARACTER → NCHAR
if name == "NATIONAL" {
let next_upper = if !self.is_at_end() {
self.peek().text.to_ascii_uppercase()
} else {
String::new()
};
if next_upper == "CHAR" || next_upper == "CHARACTER" {
self.skip(); // consume CHAR/CHARACTER
name = "NCHAR".to_string();
// NATIONAL CHARACTER VARYING → NVARCHAR equivalent
if next_upper == "CHARACTER" && self.check_identifier("VARYING") {
self.skip(); // consume VARYING
let length = if self.match_token(TokenType::LParen) {
if self.check(TokenType::RParen) {
self.skip();
None
} else {
let n = self.expect_number()? as u32;
self.expect(TokenType::RParen)?;
Some(n)
}
} else {
None
};
return Ok(DataType::VarChar {
length,
parenthesized_length: false,
});
}
}
}
let base_type = match name.as_str() {
"INT" | "INTEGER" => {
// MySQL allows INT(N) for display width; ClickHouse allows INT()
let length = if self.match_token(TokenType::LParen) {
if self.check(TokenType::RParen) {
self.skip();
None
} else {
let n = self.expect_number()? as u32;
self.expect(TokenType::RParen)?;
Some(n)
}
} else {
None
};
let integer_spelling = name == "INTEGER";
Ok(DataType::Int {
length,
integer_spelling,
})
}
"BIGINT" => {
// MySQL allows BIGINT(N) for display width; ClickHouse allows BIGINT()
let length = if self.match_token(TokenType::LParen) {
if self.check(TokenType::RParen) {
self.skip();
None
} else {
let n = self.expect_number()? as u32;
self.expect(TokenType::RParen)?;
Some(n)
}
} else {
None
};
Ok(DataType::BigInt { length })
}
"SMALLINT" => {
let length = if self.match_token(TokenType::LParen) {
if self.check(TokenType::RParen) {
self.skip();
None
} else {
let n = self.expect_number()? as u32;
self.expect(TokenType::RParen)?;
Some(n)
}
} else {
None
};
Ok(DataType::SmallInt { length })
}
"TINYINT" => {
let length = if self.match_token(TokenType::LParen) {
if self.check(TokenType::RParen) {
self.skip();
None
} else {
let n = self.expect_number()? as u32;
self.expect(TokenType::RParen)?;
Some(n)
}
} else {
None
};
Ok(DataType::TinyInt { length })
}
"FLOAT" | "REAL" => {
let real_spelling = name == "REAL";
// MySQL allows FLOAT(precision) or FLOAT(precision, scale)
let (precision, scale) = if self.match_token(TokenType::LParen) {
let p = self.expect_number()? as u32;
let s = if self.match_token(TokenType::Comma) {
Some(self.expect_number()? as u32)
} else {
None
};
self.expect(TokenType::RParen)?;
(Some(p), s)
} else {
(None, None)
};
Ok(DataType::Float {
precision,
scale,
real_spelling,
})
}
"BINARY_FLOAT" => {
// Oracle's BINARY_FLOAT -> DataType::Float
Ok(DataType::Float {
precision: None,
scale: None,
real_spelling: false,
})
}
"BINARY_DOUBLE" => {
// Oracle's BINARY_DOUBLE -> DataType::Double
Ok(DataType::Double {
precision: None,
scale: None,
})
}
"DOUBLE" => {
// Handle DOUBLE PRECISION (PostgreSQL standard SQL)
let _ = self.match_identifier("PRECISION");
// MySQL allows DOUBLE(precision, scale)
let (precision, scale) = if self.match_token(TokenType::LParen) {
let p = self.expect_number()? as u32;
let s = if self.match_token(TokenType::Comma) {
Some(self.expect_number()? as u32)
} else {
None
};
self.expect(TokenType::RParen)?;
(Some(p), s)
} else {
(None, None)
};
Ok(DataType::Double { precision, scale })
}
"DECIMAL" | "NUMERIC" => {
let (precision, scale) = if self.match_token(TokenType::LParen) {
let p = self.expect_number()? as u32;
let s = if self.match_token(TokenType::Comma) {
Some(self.expect_number()? as u32)
} else {
None
};
self.expect(TokenType::RParen)?;
(Some(p), s)
} else {
(None, None)
};
Ok(DataType::Decimal { precision, scale })
}
"BOOLEAN" | "BOOL" => Ok(DataType::Boolean),
"CHAR" | "CHARACTER" | "NCHAR" => {
let is_nchar = name == "NCHAR";
// SQL standard: CHARACTER LARGE OBJECT → CLOB/TEXT
if self.match_identifier("LARGE") && self.match_identifier("OBJECT") {
return Ok(DataType::Text);
}
// Check for VARYING to convert to VARCHAR (SQL standard: CHAR VARYING, CHARACTER VARYING)
if self.match_identifier("VARYING") {
let length = if self.match_token(TokenType::LParen) {
if self.check(TokenType::RParen) {
self.skip();
None
} else {
let n = self.expect_number()? as u32;
self.expect(TokenType::RParen)?;
Some(n)
}
} else {
None
};
Ok(DataType::VarChar {
length,
parenthesized_length: false,
})
} else {
let length = if self.match_token(TokenType::LParen) {
// Allow empty parens like NCHAR() - treat as no length specified
if self.check(TokenType::RParen) {
self.skip(); // consume RParen
None
} else {
let n = self.expect_number()? as u32;
self.expect(TokenType::RParen)?;
Some(n)
}
} else {
None
};
// CHAR CHARACTER SET charset (MySQL CAST context, no length)
// When length is specified (e.g., CHAR(4) CHARACTER SET LATIN),
// CHARACTER SET is a column attribute handled at the column def level
if length.is_none()
&& self.match_identifier("CHARACTER")
&& self.match_token(TokenType::Set)
{
let charset = self.expect_identifier_or_keyword()?;
return Ok(DataType::CharacterSet { name: charset });
}
// Preserve NCHAR as Custom DataType so target dialects can map it properly
// (Oracle keeps NCHAR, TSQL keeps NCHAR, others map to CHAR)
if is_nchar {
let name = if let Some(len) = length {
format!("NCHAR({})", len)
} else {
"NCHAR".to_string()
};
return Ok(DataType::Custom { name });
}
Ok(DataType::Char { length })
}
}
"VARCHAR" | "NVARCHAR" => {
let is_nvarchar = name == "NVARCHAR";
if self.match_token(TokenType::LParen) {
// Allow empty parens like NVARCHAR() - treat as no length specified
if self.check(TokenType::RParen) {
self.skip(); // consume RParen
if is_nvarchar {
return Ok(DataType::Custom {
name: "NVARCHAR".to_string(),
});
}
Ok(DataType::VarChar {
length: None,
parenthesized_length: false,
})
} else if self.check_identifier("MAX") {
// TSQL: VARCHAR(MAX) / NVARCHAR(MAX)
self.skip(); // consume MAX
self.expect(TokenType::RParen)?;
let type_name = if is_nvarchar {
"NVARCHAR(MAX)"
} else {
"VARCHAR(MAX)"
};
Ok(DataType::Custom {
name: type_name.to_string(),
})
} else {
// Hive allows VARCHAR((50)) - extra parentheses around the length
let parenthesized_length = self.match_token(TokenType::LParen);
let n = self.expect_number()? as u32;
if parenthesized_length {
self.expect(TokenType::RParen)?;
}
self.expect(TokenType::RParen)?;
// Preserve NVARCHAR as Custom DataType so target dialects can map properly
if is_nvarchar {
return Ok(DataType::Custom {
name: format!("NVARCHAR({})", n),
});
}
Ok(DataType::VarChar {
length: Some(n),
parenthesized_length,
})
}
} else {
if is_nvarchar {
return Ok(DataType::Custom {
name: "NVARCHAR".to_string(),
});
}
Ok(DataType::VarChar {
length: None,
parenthesized_length: false,
})
}
}
"TEXT" | "NTEXT" => {
// TEXT(n) - optional length parameter
if self.match_token(TokenType::LParen) {
let n = self.expect_number()? as u32;
self.expect(TokenType::RParen)?;
Ok(DataType::TextWithLength { length: n })
} else {
Ok(DataType::Text)
}
}
"STRING" => {
// BigQuery STRING(n) - parameterized string with max length
let length = if self.match_token(TokenType::LParen) {
let n = self.expect_number()? as u32;
self.expect(TokenType::RParen)?;
Some(n)
} else {
None
};
Ok(DataType::String { length })
}
"DATE" => Ok(DataType::Date),
"TIME" => {
// ClickHouse: Time('timezone') is a custom type with string arg
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::LParen)
&& self.current + 1 < self.tokens.len()
&& self.tokens[self.current + 1].token_type == TokenType::String
{
self.skip(); // consume LParen
let args = self.parse_custom_type_args_balanced()?;
self.expect(TokenType::RParen)?;
return Ok(DataType::Custom {
name: format!("Time({})", args),
});
}
let precision = if self.match_token(TokenType::LParen) {
if self.check(TokenType::RParen) {
self.skip();
None
} else {
let p = self.expect_number()? as u32;
self.expect(TokenType::RParen)?;
Some(p)
}
} else {
None
};
// Handle TIME WITH/WITHOUT TIME ZONE
let timezone = if self.match_token(TokenType::With) {
self.match_keyword("TIME");
self.match_keyword("ZONE");
true
} else if self.match_keyword("WITHOUT") {
self.match_keyword("TIME");
self.match_keyword("ZONE");
false
} else {
false
};
Ok(DataType::Time {
precision,
timezone,
})
}
"TIMETZ" => {
let precision = if self.match_token(TokenType::LParen) {
let p = self.expect_number()? as u32;
self.expect(TokenType::RParen)?;
Some(p)
} else {
None
};
Ok(DataType::Time {
precision,
timezone: true,
})
}
"TIMESTAMP" => {
// Parse optional precision: TIMESTAMP(p)
let precision = if self.match_token(TokenType::LParen) {
let p = self.expect_number()? as u32;
self.expect(TokenType::RParen)?;
Some(p)
} else {
None
};
// Parse optional WITH/WITHOUT TIME ZONE or WITH LOCAL TIME ZONE
// Note: TIME is a keyword (TokenType::Time) and LOCAL is a keyword (TokenType::Local)
if self.match_token(TokenType::With) {
// Check for LOCAL TIME ZONE (Exasol) vs TIME ZONE
// LOCAL is tokenized as TokenType::Local, not as Identifier
if self.match_token(TokenType::Local) {
self.match_keyword("TIME");
self.match_keyword("ZONE");
// TIMESTAMP WITH LOCAL TIME ZONE - return as custom type for Exasol handling
Ok(DataType::Custom {
name: "TIMESTAMPLTZ".to_string(),
})
} else {
self.match_keyword("TIME");
self.match_keyword("ZONE");
Ok(DataType::Timestamp {
precision,
timezone: true,
})
}
} else if self.match_keyword("WITHOUT") {
self.match_keyword("TIME");
self.match_keyword("ZONE");
Ok(DataType::Timestamp {
precision,
timezone: false,
})
} else {
Ok(DataType::Timestamp {
precision,
timezone: false,
})
}
}
"TIMESTAMPTZ" | "TIMESTAMP_TZ" => {
let precision = if self.match_token(TokenType::LParen) {
let p = self.expect_number()? as u32;
self.expect(TokenType::RParen)?;
Some(p)
} else {
None
};
Ok(DataType::Timestamp {
precision,
timezone: true,
})
}
"TIMESTAMPLTZ" | "TIMESTAMP_LTZ" => {
let precision = if self.match_token(TokenType::LParen) {
let p = self.expect_number()? as u32;
self.expect(TokenType::RParen)?;
Some(p)
} else {
None
};
let name = if let Some(p) = precision {
format!("TIMESTAMPLTZ({})", p)
} else {
"TIMESTAMPLTZ".to_string()
};
Ok(DataType::Custom { name })
}
"TIMESTAMPNTZ" | "TIMESTAMP_NTZ" => {
let precision = if self.match_token(TokenType::LParen) {
let p = self.expect_number()? as u32;
self.expect(TokenType::RParen)?;
Some(p)
} else {
None
};
let name = if let Some(p) = precision {
format!("TIMESTAMPNTZ({})", p)
} else {
"TIMESTAMPNTZ".to_string()
};
Ok(DataType::Custom { name })
}
"INTERVAL" => {
// Parse optional unit (DAYS, DAY, HOUR, etc.)
// Don't consume GENERATED, AS, NOT, NULL, etc. which are column constraints
let unit = if (self.check(TokenType::Identifier)
|| self.check(TokenType::Var)
|| self.check_keyword())
&& !self.check(TokenType::Generated)
&& !self.check(TokenType::As)
&& !self.check(TokenType::Not)
&& !self.check(TokenType::Null)
&& !self.check(TokenType::Default)
&& !self.check(TokenType::PrimaryKey)
&& !self.check(TokenType::Unique)
&& !self.check(TokenType::Check)
&& !self.check(TokenType::Constraint)
&& !self.check(TokenType::References)
&& !self.check(TokenType::Collate)
&& !self.check(TokenType::Comment)
&& !self.check(TokenType::RParen)
&& !self.check(TokenType::Comma)
{
Some(self.advance().text.to_ascii_uppercase())
} else {
None
};
// Parse optional TO unit for range intervals like DAY TO HOUR
let to = if self.match_token(TokenType::To) {
if self.check(TokenType::Identifier)
|| self.check(TokenType::Var)
|| self.check_keyword()
{
Some(self.advance().text.to_ascii_uppercase())
} else {
None
}
} else {
None
};
Ok(DataType::Interval { unit, to })
}
"JSON" => {
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.match_token(TokenType::LParen)
{
// ClickHouse: JSON(subcolumn_specs) e.g. JSON(a String, b UInt32) or JSON(max_dynamic_paths=8)
let args = self.parse_custom_type_args_balanced()?;
self.expect(TokenType::RParen)?;
// Uppercase the SKIP keyword in JSON type declarations
// e.g., "col1 String, skip col2" -> "col1 String, SKIP col2"
let args = Self::uppercase_json_type_skip_keyword(&args);
Ok(DataType::Custom {
name: format!("JSON({})", args),
})
} else {
Ok(DataType::Json)
}
}
"JSONB" => Ok(DataType::JsonB),
"UUID" => Ok(DataType::Uuid),
"BLOB" => Ok(DataType::Blob),
"BYTEA" => Ok(DataType::VarBinary { length: None }),
"BIT" => {
let length = if self.match_token(TokenType::LParen) {
let n = self.expect_number()? as u32;
self.expect(TokenType::RParen)?;
Some(n)
} else {
None
};
Ok(DataType::Bit { length })
}
"VARBIT" | "BIT VARYING" => {
let length = if self.match_token(TokenType::LParen) {
let n = self.expect_number()? as u32;
self.expect(TokenType::RParen)?;
Some(n)
} else {
None
};
Ok(DataType::VarBit { length })
}
"BINARY" => {
// SQL standard: BINARY LARGE OBJECT → BLOB
if self.match_identifier("LARGE") && self.match_identifier("OBJECT") {
return Ok(DataType::Blob);
}
// Handle BINARY VARYING (SQL standard for VARBINARY)
if self.match_identifier("VARYING") {
let length = if self.match_token(TokenType::LParen) {
let len = self.expect_number()? as u32;
self.expect(TokenType::RParen)?;
Some(len)
} else {
None
};
Ok(DataType::VarBinary { length })
} else {
let length = if self.match_token(TokenType::LParen) {
let len = self.expect_number()? as u32;
self.expect(TokenType::RParen)?;
Some(len)
} else {
None
};
Ok(DataType::Binary { length })
}
}
"VARBINARY" => {
let length = if self.match_token(TokenType::LParen) {
let len = self.expect_number()? as u32;
self.expect(TokenType::RParen)?;
Some(len)
} else {
None
};
Ok(DataType::VarBinary { length })
}
// Generic types with angle bracket or parentheses syntax: ARRAY<T>, ARRAY(T), MAP<K,V>, MAP(K,V)
"ARRAY" => {
if self.match_token(TokenType::Lt) {
// ARRAY<element_type> - angle bracket style
let element_type = self.parse_data_type()?;
self.expect_gt()?;
Ok(DataType::Array {
element_type: Box::new(element_type),
dimension: None,
})
} else if self.match_token(TokenType::LParen) {
// ARRAY(element_type) - Snowflake parentheses style
let element_type = self.parse_data_type()?;
self.expect(TokenType::RParen)?;
Ok(DataType::Array {
element_type: Box::new(element_type),
dimension: None,
})
} else {
// Just ARRAY without type parameter
Ok(DataType::Custom {
name: "ARRAY".to_string(),
})
}
}
"MAP" => {
if self.match_token(TokenType::Lt) {
// MAP<key_type, value_type> - angle bracket style
let key_type = self.parse_data_type()?;
self.expect(TokenType::Comma)?;
let value_type = self.parse_data_type()?;
self.expect_gt()?;
Ok(DataType::Map {
key_type: Box::new(key_type),
value_type: Box::new(value_type),
})
} else if self.match_token(TokenType::LBracket) {
// Materialize: MAP[TEXT => INT] type syntax
let key_type = self.parse_data_type()?;
self.expect(TokenType::FArrow)?;
let value_type = self.parse_data_type()?;
self.expect(TokenType::RBracket)?;
Ok(DataType::Map {
key_type: Box::new(key_type),
value_type: Box::new(value_type),
})
} else if self.match_token(TokenType::LParen) {
// MAP(key_type, value_type) - Snowflake parentheses style
let key_type = self.parse_data_type()?;
self.expect(TokenType::Comma)?;
let value_type = self.parse_data_type()?;
self.expect(TokenType::RParen)?;
Ok(DataType::Map {
key_type: Box::new(key_type),
value_type: Box::new(value_type),
})
} else {
// Just MAP without type parameters
Ok(DataType::Custom {
name: "MAP".to_string(),
})
}
}
// VECTOR(type, dimension) - Snowflake vector type
// VECTOR(dimension, element_type_alias) or VECTOR(dimension) - SingleStore vector type
"VECTOR" => {
if self.match_token(TokenType::LParen) {
if self.check(TokenType::Number) {
// SingleStore format: VECTOR(dimension) or VECTOR(dimension, type_alias)
let dimension = self.expect_number()? as u32;
let element_type = if self.match_token(TokenType::Comma) {
// Parse the type alias (I8, I16, I32, I64, F32, F64)
let type_alias = self.expect_identifier_or_keyword()?;
let mapped_type = match type_alias.to_ascii_uppercase().as_str() {
"I8" => DataType::TinyInt { length: None },
"I16" => DataType::SmallInt { length: None },
"I32" => DataType::Int {
length: None,
integer_spelling: false,
},
"I64" => DataType::BigInt { length: None },
"F32" => DataType::Float {
precision: None,
scale: None,
real_spelling: false,
},
"F64" => DataType::Double {
precision: None,
scale: None,
},
_ => DataType::Custom {
name: type_alias.to_string(),
},
};
Some(Box::new(mapped_type))
} else {
// Just dimension, no type
None
};
self.expect(TokenType::RParen)?;
Ok(DataType::Vector {
element_type,
dimension: Some(dimension),
})
} else {
// Snowflake format: VECTOR(type, dimension)
let element_type = self.parse_data_type()?;
self.expect(TokenType::Comma)?;
let dimension = self.expect_number()? as u32;
self.expect(TokenType::RParen)?;
Ok(DataType::Vector {
element_type: Some(Box::new(element_type)),
dimension: Some(dimension),
})
}
} else {
Ok(DataType::Custom {
name: "VECTOR".to_string(),
})
}
}
// OBJECT(field1 type1, field2 type2, ...) - Snowflake structured object type
"OBJECT" => {
if self.match_token(TokenType::LParen) {
// ClickHouse: Object('json') — string literal argument
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::String)
{
let arg = self.advance().text;
self.expect(TokenType::RParen)?;
return Ok(DataType::Custom {
name: format!("Object('{}')", arg),
});
}
let mut fields = Vec::new();
if !self.check(TokenType::RParen) {
loop {
let field_name = self.expect_identifier_or_keyword()?;
let field_type = self.parse_data_type()?;
// Optional NOT NULL constraint
let not_null = if self.match_keyword("NOT") {
// Consume NULL if present
self.match_keyword("NULL");
true
} else {
false
};
fields.push((field_name, field_type, not_null));
if !self.match_token(TokenType::Comma) {
break;
}
}
}
self.expect(TokenType::RParen)?;
// Check for RENAME FIELDS or ADD FIELDS modifier
let modifier = if self.match_keyword("RENAME") {
if self.match_keyword("FIELDS") {
Some("RENAME FIELDS".to_string())
} else {
Some("RENAME".to_string())
}
} else if self.match_keyword("ADD") {
if self.match_keyword("FIELDS") {
Some("ADD FIELDS".to_string())
} else {
Some("ADD".to_string())
}
} else {
None
};
Ok(DataType::Object { fields, modifier })
} else {
Ok(DataType::Custom {
name: "OBJECT".to_string(),
})
}
}
"STRUCT" => {
if self.match_token(TokenType::Lt) {
// STRUCT<field1 type1, field2 type2, ...> - BigQuery angle-bracket syntax
let fields = self.parse_struct_type_fields(false)?;
self.expect_gt()?;
Ok(DataType::Struct {
fields,
nested: false,
})
} else if self.match_token(TokenType::LParen) {
// STRUCT(field1 type1, field2 type2, ...) - DuckDB parenthesized syntax
let fields = self.parse_struct_type_fields(true)?;
self.expect(TokenType::RParen)?;
Ok(DataType::Struct {
fields,
nested: true,
})
} else {
// Just STRUCT without type parameters
Ok(DataType::Custom {
name: "STRUCT".to_string(),
})
}
}
"ROW" => {
// ROW(field1 type1, field2 type2, ...) - same as STRUCT with parens
if self.match_token(TokenType::LParen) {
let fields = self.parse_struct_type_fields(true)?;
self.expect(TokenType::RParen)?;
Ok(DataType::Struct {
fields,
nested: true,
})
} else {
Ok(DataType::Custom {
name: "ROW".to_string(),
})
}
}
"RECORD" => {
// RECORD(field1 type1, field2 type2, ...) - SingleStore record type (like ROW/STRUCT)
if self.match_token(TokenType::LParen) {
let fields = self.parse_struct_type_fields(true)?;
self.expect(TokenType::RParen)?;
// Use Struct with nested=true, generator will output RECORD for SingleStore
Ok(DataType::Struct {
fields,
nested: true,
})
} else {
Ok(DataType::Custom {
name: "RECORD".to_string(),
})
}
}
"ENUM" => {
// ENUM('RED', 'GREEN', 'BLUE') - DuckDB enum type
// ClickHouse: Enum('hello' = 1, 'world' = 2)
// ClickHouse also allows NULL in enum: Enum('a', 'b', NULL)
if self.match_token(TokenType::LParen) {
let mut values = Vec::new();
let mut assignments = Vec::new();
if !self.check(TokenType::RParen) {
loop {
let val = if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::Null)
{
self.skip();
"NULL".to_string()
} else {
self.expect_string()?
};
values.push(val);
// ClickHouse: optional = value assignment (including negative numbers)
if self.match_token(TokenType::Eq) {
let negative = self.match_token(TokenType::Dash);
let num_token = self.advance();
let val = if negative {
format!("-{}", num_token.text)
} else {
num_token.text.clone()
};
assignments.push(Some(val));
} else {
assignments.push(None);
}
if !self.match_token(TokenType::Comma) {
break;
}
}
}
self.expect(TokenType::RParen)?;
Ok(DataType::Enum {
values,
assignments,
})
} else {
Ok(DataType::Custom {
name: "ENUM".to_string(),
})
}
}
"SET" => {
// MySQL SET('a', 'b', 'c') type
if self.match_token(TokenType::LParen) {
let mut values = Vec::new();
if !self.check(TokenType::RParen) {
loop {
let val = self.expect_string()?;
values.push(val);
if !self.match_token(TokenType::Comma) {
break;
}
}
}
self.expect(TokenType::RParen)?;
Ok(DataType::Set { values })
} else {
Ok(DataType::Custom {
name: "SET".to_string(),
})
}
}
"UNION" if self.check(TokenType::LParen) => {
// UNION(num INT, str TEXT) - DuckDB union type (only when followed by paren)
self.skip(); // consume LParen
let struct_fields = self.parse_struct_type_fields(true)?;
self.expect(TokenType::RParen)?;
// Convert StructField to (String, DataType) for Union
let fields: Vec<(String, DataType)> = struct_fields
.into_iter()
.map(|f| (f.name, f.data_type))
.collect();
Ok(DataType::Union { fields })
}
// Spatial types
"GEOMETRY" => {
let (subtype, srid) = self.parse_spatial_type_args()?;
Ok(DataType::Geometry { subtype, srid })
}
"GEOGRAPHY" => {
let (subtype, srid) = self.parse_spatial_type_args()?;
Ok(DataType::Geography { subtype, srid })
}
// MySQL spatial subtypes without wrapper
"POINT" | "LINESTRING" | "POLYGON" | "MULTIPOINT" | "MULTILINESTRING"
| "MULTIPOLYGON" | "GEOMETRYCOLLECTION" => {
// Check for optional SRID clause (MySQL syntax)
let srid = if self.match_identifier("SRID") {
Some(self.expect_number()? as u32)
} else {
None
};
Ok(DataType::Geometry {
subtype: Some(name),
srid,
})
}
// BigQuery ANY TYPE - templated parameter type for UDFs
"ANY" => {
if self.match_token(TokenType::Type) {
Ok(DataType::Custom {
name: "ANY TYPE".to_string(),
})
} else {
Ok(DataType::Custom {
name: "ANY".to_string(),
})
}
}
// LONG VARCHAR (Exasol) - same as TEXT
"LONG" => {
if self.match_identifier("VARCHAR") {
Ok(DataType::Text)
} else {
Ok(DataType::Custom {
name: "LONG".to_string(),
})
}
}
// MySQL SIGNED [INTEGER] / UNSIGNED [INTEGER] in CAST context
// CAST(x AS SIGNED INTEGER) -> CAST(x AS SIGNED)
"SIGNED" | "UNSIGNED" => {
// Consume optional INTEGER keyword after SIGNED/UNSIGNED
if self.check_identifier("INTEGER")
|| self.check_keyword_text("INTEGER")
|| self.check_keyword_text("INT")
{
self.skip();
}
Ok(DataType::Custom { name })
}
// ClickHouse Nullable(T) wrapper type
"NULLABLE" => {
self.expect(TokenType::LParen)?;
let inner = self.parse_data_type()?;
self.expect(TokenType::RParen)?;
Ok(DataType::Nullable {
inner: Box::new(inner),
})
}
_ => {
// Handle custom types with optional parenthesized precision/args
// e.g., DATETIME2(2), DATETIMEOFFSET(7), NVARCHAR2(100)
// Use uppercase name for known SQL custom types, but preserve original case
// for user-defined type names (e.g., UserDefinedTableType)
let is_known = convert_name_is_known_custom(&name);
let custom_name = if is_known {
name.clone()
} else {
raw_name.clone()
};
if self.match_token(TokenType::LParen) {
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
let args = self.parse_custom_type_args_balanced()?;
self.expect(TokenType::RParen)?;
Ok(DataType::Custom {
name: format!("{}({})", custom_name, args),
})
} else {
let mut args = Vec::new();
let mut after_comma = true; // treat first token as start of new arg
loop {
if self.check(TokenType::RParen) {
break;
}
let token = self.advance();
// If the previous token was space-separated (not comma-separated),
// append to the last arg. E.g., VARCHAR2(2328 CHAR) -> "2328 CHAR"
if !after_comma && !args.is_empty() {
if let Some(last) = args.last_mut() {
*last = format!("{} {}", last, token.text);
}
} else {
args.push(token.text.clone());
}
after_comma = self.match_token(TokenType::Comma);
}
self.expect(TokenType::RParen)?;
// Include args in the name: DATETIME2(2), VARCHAR2(2328 CHAR)
Ok(DataType::Custom {
name: format!("{}({})", custom_name, args.join(", ")),
})
}
} else {
Ok(DataType::Custom { name: custom_name })
}
}
}?;
// UNSIGNED/SIGNED modifiers for integer types (MySQL) are handled
// by the column definition parser which sets col.unsigned = true.
// Do NOT consume them here; the column parser needs to see them.
let mut result_type = base_type;
// Materialize: handle postfix LIST syntax (INT LIST, INT LIST LIST LIST)
let is_materialize = matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Materialize)
);
if is_materialize {
while self.check_identifier("LIST") || self.check(TokenType::List) {
self.skip(); // consume LIST
result_type = DataType::List {
element_type: Box::new(result_type),
};
}
}
// PostgreSQL array syntax: TYPE[], TYPE[N], TYPE[N][M], etc.
let result_type = self.maybe_parse_array_dimensions(result_type)?;
// ClickHouse: mark string-like standard types as non-nullable by converting to Custom
// This prevents the generator from wrapping them in Nullable() during identity transforms.
// Types parsed from other dialects remain standard and will get Nullable wrapping when
// transpiling to ClickHouse.
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
return Ok(Self::clickhouse_mark_non_nullable(result_type));
}
Ok(result_type)
}
/// Convert standard types to Custom equivalents for ClickHouse to prevent Nullable wrapping.
/// This mirrors Python sqlglot's behavior of marking ClickHouse-parsed types as non-nullable.
fn clickhouse_mark_non_nullable(dt: DataType) -> DataType {
match dt {
DataType::Text => DataType::Custom {
name: "String".to_string(),
},
DataType::VarChar { .. } => DataType::Custom {
name: "String".to_string(),
},
DataType::Char { .. } => DataType::Custom {
name: "String".to_string(),
},
DataType::String { .. } => DataType::Custom {
name: "String".to_string(),
},
_ => dt,
}
}
/// Parse a data type for cast syntax (::TYPE)
/// For dialects that support fixed-size arrays (like DuckDB), brackets like [3] are
/// parsed as array dimensions (e.g., x::INT[3] means cast to INT[3] array type).
/// For other dialects (like Snowflake), brackets are subscript operations
/// (e.g., x::VARIANT[0] means cast to VARIANT, then subscript with [0]).
fn parse_data_type_for_cast(&mut self) -> Result<DataType> {
// Check if dialect supports array type suffixes (e.g., INT[], VARCHAR[3])
// PostgreSQL: INT[], TEXT[] (no fixed size)
// DuckDB: INT[3] (fixed size arrays)
let supports_array_type_suffix = matches!(
self.config.dialect,
Some(crate::dialects::DialectType::DuckDB)
| Some(crate::dialects::DialectType::PostgreSQL)
| Some(crate::dialects::DialectType::Redshift)
);
// Check if it's a quoted identifier (e.g., "udt") — preserve case and quoting
let is_quoted = self.check(TokenType::QuotedIdentifier);
let raw_name = self.expect_identifier_or_keyword()?;
if is_quoted {
// Check if the quoted name matches a known type — if so, normalize it
let known_type = self.convert_name_to_type(&raw_name);
if let Ok(ref dt) = known_type {
if !matches!(dt, DataType::Custom { .. }) {
return known_type;
}
}
// Truly custom type — preserve original case with quotes
return Ok(DataType::Custom {
name: format!("\"{}\"", raw_name),
});
}
let name = raw_name.to_ascii_uppercase();
// Handle parametric types like ARRAY<T>, MAP<K,V>
let base_type = match name.as_str() {
"ARRAY" => {
if self.match_token(TokenType::Lt) {
let element_type = self.parse_data_type()?;
self.expect_gt()?;
DataType::Array {
element_type: Box::new(element_type),
dimension: None,
}
} else if self.match_token(TokenType::LParen) {
// ClickHouse: Array(Type) syntax with parentheses
let element_type = self.parse_data_type_for_cast()?;
self.expect(TokenType::RParen)?;
DataType::Array {
element_type: Box::new(element_type),
dimension: None,
}
} else {
DataType::Custom { name }
}
}
"MAP" => {
if self.match_token(TokenType::Lt) {
let key_type = self.parse_data_type()?;
self.expect(TokenType::Comma)?;
let value_type = self.parse_data_type()?;
self.expect_gt()?;
DataType::Map {
key_type: Box::new(key_type),
value_type: Box::new(value_type),
}
} else if self.match_token(TokenType::LParen) {
// Snowflake: MAP(key_type, value_type) syntax
let key_type = self.parse_data_type_for_cast()?;
self.expect(TokenType::Comma)?;
let value_type = self.parse_data_type_for_cast()?;
self.expect(TokenType::RParen)?;
DataType::Map {
key_type: Box::new(key_type),
value_type: Box::new(value_type),
}
} else if self.match_token(TokenType::LBracket) {
// Materialize: MAP[TEXT => INT] type syntax
let key_type = self.parse_data_type_for_cast()?;
self.expect(TokenType::FArrow)?;
let value_type = self.parse_data_type_for_cast()?;
self.expect(TokenType::RBracket)?;
DataType::Map {
key_type: Box::new(key_type),
value_type: Box::new(value_type),
}
} else {
DataType::Custom { name }
}
}
"STRUCT" => {
if self.match_token(TokenType::Lt) {
let fields = self.parse_struct_type_fields(false)?;
self.expect_gt()?;
DataType::Struct {
fields,
nested: false,
}
} else if self.match_token(TokenType::LParen) {
let fields = self.parse_struct_type_fields(true)?;
self.expect(TokenType::RParen)?;
DataType::Struct {
fields,
nested: true,
}
} else {
DataType::Custom { name }
}
}
"ROW" => {
if self.match_token(TokenType::LParen) {
let fields = self.parse_struct_type_fields(true)?;
self.expect(TokenType::RParen)?;
DataType::Struct {
fields,
nested: true,
}
} else {
DataType::Custom { name }
}
}
"RECORD" => {
// SingleStore RECORD type (like ROW/STRUCT)
if self.match_token(TokenType::LParen) {
let fields = self.parse_struct_type_fields(true)?;
self.expect(TokenType::RParen)?;
DataType::Struct {
fields,
nested: true,
}
} else {
DataType::Custom { name }
}
}
// Multi-word types that need special handling in cast context
"DOUBLE" => {
// Handle DOUBLE PRECISION
let _ = self.match_identifier("PRECISION");
// ClickHouse/SQL: DOUBLE(precision) or DOUBLE(precision, scale)
let (precision, scale) = if self.match_token(TokenType::LParen) {
let p = Some(self.expect_number()? as u32);
let s = if self.match_token(TokenType::Comma) {
Some(self.expect_number()? as u32)
} else {
None
};
self.expect(TokenType::RParen)?;
(p, s)
} else {
(None, None)
};
DataType::Double { precision, scale }
}
"CHARACTER" | "CHAR" | "NCHAR" => {
// Handle CHARACTER VARYING / CHAR VARYING
if self.match_identifier("VARYING") {
let length = if self.match_token(TokenType::LParen) {
let len = Some(self.expect_number()? as u32);
self.expect(TokenType::RParen)?;
len
} else {
None
};
DataType::VarChar {
length,
parenthesized_length: false,
}
} else {
let length = if self.match_token(TokenType::LParen) {
let len = Some(self.expect_number()? as u32);
self.expect(TokenType::RParen)?;
len
} else {
None
};
// CHAR CHARACTER SET charset (MySQL CAST context, no length)
if length.is_none()
&& self.match_identifier("CHARACTER")
&& self.match_token(TokenType::Set)
{
let charset = self.expect_identifier_or_keyword()?;
return Ok(DataType::CharacterSet { name: charset });
}
DataType::Char { length }
}
}
"TIME" => {
// Handle TIME(precision) WITH/WITHOUT TIME ZONE
let precision = if self.match_token(TokenType::LParen) {
let p = Some(self.expect_number()? as u32);
self.expect(TokenType::RParen)?;
p
} else {
None
};
let timezone = if self.match_token(TokenType::With) {
self.match_keyword("TIME");
self.match_keyword("ZONE");
true
} else if self.match_keyword("WITHOUT") {
self.match_keyword("TIME");
self.match_keyword("ZONE");
false
} else {
false
};
DataType::Time {
precision,
timezone,
}
}
"TIMETZ" => {
let precision = if self.match_token(TokenType::LParen) {
let p = Some(self.expect_number()? as u32);
self.expect(TokenType::RParen)?;
p
} else {
None
};
DataType::Time {
precision,
timezone: true,
}
}
"TIMESTAMP" => {
// Handle TIMESTAMP(precision) WITH/WITHOUT TIME ZONE or WITH LOCAL TIME ZONE
let precision = if self.match_token(TokenType::LParen) {
let p = Some(self.expect_number()? as u32);
self.expect(TokenType::RParen)?;
p
} else {
None
};
// Note: TIME is a keyword (TokenType::Time), so use match_keyword instead of match_identifier
if self.match_token(TokenType::With) {
// Check for LOCAL TIME ZONE vs TIME ZONE
if self.match_token(TokenType::Local) {
self.match_keyword("TIME");
self.match_keyword("ZONE");
// TIMESTAMP WITH LOCAL TIME ZONE -> TIMESTAMPLTZ
DataType::Custom {
name: "TIMESTAMPLTZ".to_string(),
}
} else {
self.match_keyword("TIME");
self.match_keyword("ZONE");
DataType::Timestamp {
precision,
timezone: true,
}
}
} else if self.match_keyword("WITHOUT") {
self.match_keyword("TIME");
self.match_keyword("ZONE");
DataType::Timestamp {
precision,
timezone: false,
}
} else {
DataType::Timestamp {
precision,
timezone: false,
}
}
}
"TIMESTAMPTZ" | "TIMESTAMP_TZ" => {
let precision = if self.match_token(TokenType::LParen) {
let p = self.expect_number()? as u32;
self.expect(TokenType::RParen)?;
Some(p)
} else {
None
};
DataType::Timestamp {
precision,
timezone: true,
}
}
"TIMESTAMPLTZ" | "TIMESTAMP_LTZ" => {
let precision = if self.match_token(TokenType::LParen) {
let p = self.expect_number()? as u32;
self.expect(TokenType::RParen)?;
Some(p)
} else {
None
};
let dt_name = if let Some(p) = precision {
format!("TIMESTAMPLTZ({})", p)
} else {
"TIMESTAMPLTZ".to_string()
};
DataType::Custom { name: dt_name }
}
"TIMESTAMPNTZ" | "TIMESTAMP_NTZ" => {
let precision = if self.match_token(TokenType::LParen) {
let p = self.expect_number()? as u32;
self.expect(TokenType::RParen)?;
Some(p)
} else {
None
};
let dt_name = if let Some(p) = precision {
format!("TIMESTAMPNTZ({})", p)
} else {
"TIMESTAMPNTZ".to_string()
};
DataType::Custom { name: dt_name }
}
"INTERVAL" => {
// Parse optional unit (DAY, HOUR, etc.) after INTERVAL in cast context
let unit = if (self.check(TokenType::Identifier)
|| self.check(TokenType::Var)
|| self.check_keyword())
&& !self.check(TokenType::RParen)
&& !self.check(TokenType::Comma)
&& !self.check(TokenType::As)
&& !self.check(TokenType::Not)
&& !self.check(TokenType::Null)
{
Some(self.advance().text.to_ascii_uppercase())
} else {
None
};
// Parse optional TO unit for range intervals like DAY TO HOUR
let to = if self.match_token(TokenType::To) {
if self.check(TokenType::Identifier)
|| self.check(TokenType::Var)
|| self.check_keyword()
{
Some(self.advance().text.to_ascii_uppercase())
} else {
None
}
} else {
None
};
DataType::Interval { unit, to }
}
// VARCHAR/NVARCHAR with optional (N) or (MAX) parameter
"VARCHAR" | "NVARCHAR" => {
let is_nvarchar = name == "NVARCHAR";
if self.match_token(TokenType::LParen) {
if self.check(TokenType::RParen) {
self.skip();
DataType::VarChar {
length: None,
parenthesized_length: false,
}
} else if self.check_identifier("MAX") {
self.skip();
self.expect(TokenType::RParen)?;
let type_name = if is_nvarchar {
"NVARCHAR(MAX)"
} else {
"VARCHAR(MAX)"
};
DataType::Custom {
name: type_name.to_string(),
}
} else {
let n = self.expect_number()? as u32;
self.expect(TokenType::RParen)?;
DataType::VarChar {
length: Some(n),
parenthesized_length: false,
}
}
} else {
DataType::VarChar {
length: None,
parenthesized_length: false,
}
}
}
// VARBINARY with optional (N) or (MAX) parameter
"VARBINARY" => {
if self.match_token(TokenType::LParen) {
if self.check(TokenType::RParen) {
self.skip();
DataType::VarBinary { length: None }
} else if self.check_identifier("MAX") {
self.skip();
self.expect(TokenType::RParen)?;
DataType::Custom {
name: "VARBINARY(MAX)".to_string(),
}
} else {
let n = self.expect_number()? as u32;
self.expect(TokenType::RParen)?;
DataType::VarBinary { length: Some(n) }
}
} else {
DataType::VarBinary { length: None }
}
}
// DECIMAL/NUMERIC with optional (precision, scale)
"DECIMAL" | "NUMERIC" | "NUMBER" => {
if self.match_token(TokenType::LParen) {
let precision = Some(self.expect_number()? as u32);
let scale = if self.match_token(TokenType::Comma) {
Some(self.expect_number()? as u32)
} else {
None
};
self.expect(TokenType::RParen)?;
DataType::Decimal { precision, scale }
} else {
DataType::Decimal {
precision: None,
scale: None,
}
}
}
// INT/INTEGER/BIGINT/SMALLINT/TINYINT with optional (N) display width
"INT" | "INTEGER" => {
let length = if self.match_token(TokenType::LParen) {
let n = Some(self.expect_number()? as u32);
self.expect(TokenType::RParen)?;
n
} else {
None
};
DataType::Int {
length,
integer_spelling: name == "INTEGER",
}
}
"BIGINT" => {
let length = if self.match_token(TokenType::LParen) {
let n = Some(self.expect_number()? as u32);
self.expect(TokenType::RParen)?;
n
} else {
None
};
DataType::BigInt { length }
}
"SMALLINT" => {
let length = if self.match_token(TokenType::LParen) {
let n = Some(self.expect_number()? as u32);
self.expect(TokenType::RParen)?;
n
} else {
None
};
DataType::SmallInt { length }
}
"TINYINT" => {
let length = if self.match_token(TokenType::LParen) {
let n = Some(self.expect_number()? as u32);
self.expect(TokenType::RParen)?;
n
} else {
None
};
DataType::TinyInt { length }
}
// FLOAT with optional (precision)
"FLOAT" | "REAL" | "BINARY_FLOAT" => {
let (precision, scale) = if self.match_token(TokenType::LParen) {
let n = Some(self.expect_number()? as u32);
let s = if self.match_token(TokenType::Comma) {
Some(self.expect_number()? as u32)
} else {
None
};
self.expect(TokenType::RParen)?;
(n, s)
} else {
(None, None)
};
DataType::Float {
precision,
scale,
real_spelling: name == "REAL",
}
}
"BINARY_DOUBLE" => DataType::Double {
precision: None,
scale: None,
},
// BINARY with optional (length)
"BINARY" => {
let length = if self.match_token(TokenType::LParen) {
let n = Some(self.expect_number()? as u32);
self.expect(TokenType::RParen)?;
n
} else {
None
};
DataType::Binary { length }
}
// MySQL SIGNED [INTEGER] / UNSIGNED [INTEGER] in CAST context
// CAST(x AS SIGNED INTEGER) -> CAST(x AS SIGNED)
// CAST(x AS UNSIGNED INTEGER) -> CAST(x AS UNSIGNED)
"SIGNED" | "UNSIGNED" => {
// Consume optional INTEGER keyword after SIGNED/UNSIGNED
if self.check_identifier("INTEGER")
|| self.check_keyword_text("INTEGER")
|| self.check_keyword_text("INT")
{
self.skip();
}
DataType::Custom { name }
}
// ClickHouse Nullable(T) wrapper type
"NULLABLE" => {
self.expect(TokenType::LParen)?;
let inner = self.parse_data_type_for_cast()?;
self.expect(TokenType::RParen)?;
DataType::Nullable {
inner: Box::new(inner),
}
}
// VECTOR(type, dimension) - Snowflake vector type
// VECTOR(dimension, element_type_alias) or VECTOR(dimension) - SingleStore vector type
"VECTOR" => {
if self.match_token(TokenType::LParen) {
if self.check(TokenType::Number) {
// SingleStore format: VECTOR(dimension) or VECTOR(dimension, type_alias)
let dimension = self.expect_number()? as u32;
let element_type = if self.match_token(TokenType::Comma) {
let type_alias = self.expect_identifier_or_keyword()?;
let mapped_type = match type_alias.to_ascii_uppercase().as_str() {
"I8" => DataType::TinyInt { length: None },
"I16" => DataType::SmallInt { length: None },
"I32" => DataType::Int {
length: None,
integer_spelling: false,
},
"I64" => DataType::BigInt { length: None },
"F32" => DataType::Float {
precision: None,
scale: None,
real_spelling: false,
},
"F64" => DataType::Double {
precision: None,
scale: None,
},
_ => DataType::Custom {
name: type_alias.to_string(),
},
};
Some(Box::new(mapped_type))
} else {
None
};
self.expect(TokenType::RParen)?;
DataType::Vector {
element_type,
dimension: Some(dimension),
}
} else {
// Snowflake format: VECTOR(type, dimension)
let element_type = self.parse_data_type()?;
self.expect(TokenType::Comma)?;
let dimension = self.expect_number()? as u32;
self.expect(TokenType::RParen)?;
DataType::Vector {
element_type: Some(Box::new(element_type)),
dimension: Some(dimension),
}
}
} else {
DataType::Custom {
name: "VECTOR".to_string(),
}
}
}
// For simple types, use convert_name_to_type to get proper DataType variants
// This ensures VARCHAR becomes DataType::VarChar, not DataType::Custom
// For user-defined types in generic mode, preserve original case from raw_name
_ => {
let base = self.convert_name_to_type(&name)?;
// ClickHouse: consume parenthesized args for custom types like DateTime('UTC'),
// LowCardinality(String), Variant(String, UInt64), JSON(max_dynamic_paths=8)
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::LParen)
&& (matches!(
base,
DataType::Custom { .. } | DataType::Json | DataType::JsonB
))
{
self.skip(); // consume (
let args = self.parse_custom_type_args_balanced()?;
self.expect(TokenType::RParen)?;
let base_name = match &base {
DataType::Json => "JSON".to_string(),
DataType::JsonB => "JSONB".to_string(),
DataType::Custom { name } => name.clone(),
_ => unreachable!(),
};
DataType::Custom {
name: format!("{}({})", base_name, args),
}
} else if matches!(base, DataType::Custom { .. }) && self.check(TokenType::Dot) {
// Handle schema-qualified user-defined types (e.g., app.status_enum)
// by consuming dot-separated identifiers like Python sqlglot's
// _parse_user_defined_type()
// Use raw_name to preserve original case for schema-qualified types
let mut type_name = raw_name.to_string();
while self.match_token(TokenType::Dot) {
let tok = self.advance();
type_name = format!("{}.{}", type_name, tok.text);
}
DataType::Custom { name: type_name }
} else if matches!(base, DataType::Custom { .. }) && self.config.dialect.is_none() {
// Preserve original case for user-defined types in generic mode
DataType::Custom {
name: raw_name.to_string(),
}
} else {
base
}
}
};
// Materialize: handle postfix LIST syntax (INT LIST, INT LIST LIST LIST)
let is_materialize = matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Materialize)
);
let mut result_type = base_type;
if is_materialize {
while self.check_identifier("LIST") || self.check(TokenType::List) {
self.skip(); // consume LIST
result_type = DataType::List {
element_type: Box::new(result_type),
};
}
}
// For dialects that support array type suffixes (DuckDB, PostgreSQL, Redshift),
// parse array dimensions. For other dialects, brackets after a cast are subscript operations.
if supports_array_type_suffix {
self.maybe_parse_array_dimensions(result_type)
} else {
Ok(result_type)
}
}
/// Parse custom type arguments with balanced parentheses, preserving nested types
fn parse_custom_type_args_balanced(&mut self) -> Result<String> {
let mut depth = 0usize;
let mut out = String::new();
let mut prev_wordish = false;
while !self.is_at_end() {
if self.check(TokenType::RParen) && depth == 0 {
break;
}
let token = self.advance();
match token.token_type {
TokenType::LParen => {
out.push('(');
depth += 1;
prev_wordish = false;
}
TokenType::RParen => {
if depth == 0 {
break;
}
depth -= 1;
out.push(')');
prev_wordish = true;
}
TokenType::Comma => {
out.push_str(", ");
prev_wordish = false;
}
TokenType::Eq => {
out.push_str(" = ");
prev_wordish = false;
}
TokenType::Plus => {
out.push_str(" + ");
prev_wordish = false;
}
TokenType::Dash => {
out.push('-');
prev_wordish = false;
}
TokenType::Dot => {
out.push('.');
prev_wordish = false;
}
TokenType::String | TokenType::DollarString => {
if prev_wordish {
out.push(' ');
}
let escaped = token.text.replace('\'', "''");
out.push('\'');
out.push_str(&escaped);
out.push('\'');
prev_wordish = true;
}
TokenType::Number | TokenType::Parameter => {
if prev_wordish {
out.push(' ');
}
out.push_str(&token.text);
prev_wordish = true;
}
TokenType::QuotedIdentifier => {
if prev_wordish {
out.push(' ');
}
out.push('"');
out.push_str(&token.text);
out.push('"');
prev_wordish = true;
}
_ => {
if prev_wordish {
out.push(' ');
}
out.push_str(&token.text);
prev_wordish = true;
}
}
}
Ok(out)
}
/// Uppercase the `skip` keyword in ClickHouse JSON type declarations.
/// In ClickHouse, `SKIP col` within JSON(...) type specs must use uppercase SKIP.
fn uppercase_json_type_skip_keyword(args: &str) -> String {
// Replace "skip " at the start of the string or after ", " with "SKIP "
let mut result = String::with_capacity(args.len());
let mut rest = args;
let mut at_start = true;
while !rest.is_empty() {
if at_start
&& rest.len() >= 5
&& rest[..4].eq_ignore_ascii_case("skip")
&& rest.as_bytes()[4] == b' '
{
result.push_str("SKIP");
rest = &rest[4..];
at_start = false;
} else if rest.starts_with(", ") {
result.push_str(", ");
rest = &rest[2..];
at_start = true;
} else {
result.push(rest.as_bytes()[0] as char);
rest = &rest[1..];
at_start = false;
}
}
result
}
/// Parse a data type from a text string by tokenizing and sub-parsing it.
/// Used for ClickHouse JSON path types where a quoted identifier like "Array(JSON)"
/// needs to be parsed as a proper structured DataType.
fn parse_data_type_from_text(&mut self, text: &str) -> Result<DataType> {
use crate::tokens::Tokenizer;
let tokenizer = Tokenizer::default();
let tokens = tokenizer.tokenize(text)?;
if tokens.is_empty() {
return Ok(DataType::Custom {
name: text.to_string(),
});
}
// Save parser state and temporarily swap in the sub-tokens
let saved_tokens = std::mem::replace(&mut self.tokens, tokens);
let saved_current = std::mem::replace(&mut self.current, 0);
let result = self.parse_data_type();
// Restore original parser state
self.tokens = saved_tokens;
self.current = saved_current;
result
}
/// Try to parse a data type optionally - returns None if no valid type found
/// Used for JSON_TABLE column definitions where type may or may not be present
fn parse_data_type_optional(&mut self) -> Result<Option<DataType>> {
// Check if current token looks like a type name
if !self.check(TokenType::Identifier)
&& !self.check(TokenType::Var)
&& !self.check_keyword()
{
return Ok(None);
}
// Don't try to parse PATH as a type
if self.check_identifier("PATH") {
return Ok(None);
}
// ClickHouse: ALIAS, EPHEMERAL, MATERIALIZED are column modifiers, not types
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && (self.check_identifier("ALIAS")
|| self.check_identifier("EPHEMERAL")
|| self.check(TokenType::Materialized))
{
return Ok(None);
}
let saved_pos = self.current;
match self.parse_data_type() {
Ok(dt) => Ok(Some(dt)),
Err(_) => {
self.current = saved_pos;
Ok(None)
}
}
}
/// Convert a DataType to a string representation for JSONColumnDef.kind
fn data_type_to_string(&self, dt: &DataType) -> String {
match dt {
DataType::Int {
length: Some(n),
integer_spelling: true,
} => format!("INTEGER({})", n),
DataType::Int {
length: Some(n), ..
} => format!("INT({})", n),
DataType::Int {
length: None,
integer_spelling: true,
} => "INTEGER".to_string(),
DataType::Int { length: None, .. } => "INT".to_string(),
DataType::BigInt { length: Some(n) } => format!("BIGINT({})", n),
DataType::BigInt { length: None } => "BIGINT".to_string(),
DataType::SmallInt { length: Some(n) } => format!("SMALLINT({})", n),
DataType::SmallInt { length: None } => "SMALLINT".to_string(),
DataType::TinyInt { length: Some(n) } => format!("TINYINT({})", n),
DataType::TinyInt { length: None } => "TINYINT".to_string(),
DataType::Float {
precision: Some(p),
scale: Some(s),
..
} => format!("FLOAT({}, {})", p, s),
DataType::Float {
precision: Some(p),
scale: None,
..
} => format!("FLOAT({})", p),
DataType::Float {
precision: None, ..
} => "FLOAT".to_string(),
DataType::Double {
precision: Some(p),
scale: Some(s),
} => format!("DOUBLE({}, {})", p, s),
DataType::Double {
precision: Some(p),
scale: None,
} => format!("DOUBLE({})", p),
DataType::Double {
precision: None, ..
} => "DOUBLE".to_string(),
DataType::Decimal {
precision: Some(p),
scale: Some(s),
} => format!("DECIMAL({}, {})", p, s),
DataType::Decimal {
precision: Some(p),
scale: None,
} => format!("DECIMAL({})", p),
DataType::Decimal {
precision: None, ..
} => "DECIMAL".to_string(),
DataType::VarChar {
length: Some(n), ..
} => format!("VARCHAR({})", n),
DataType::VarChar { length: None, .. } => "VARCHAR".to_string(),
DataType::Char { length: Some(n) } => format!("CHAR({})", n),
DataType::Char { length: None } => "CHAR".to_string(),
DataType::Text => "TEXT".to_string(),
DataType::Boolean => "BOOLEAN".to_string(),
DataType::Date => "DATE".to_string(),
DataType::Time {
precision: Some(p), ..
} => format!("TIME({})", p),
DataType::Time {
precision: None, ..
} => "TIME".to_string(),
DataType::Timestamp {
precision: Some(p),
timezone: true,
} => format!("TIMESTAMPTZ({})", p),
DataType::Timestamp {
precision: Some(p),
timezone: false,
} => format!("TIMESTAMP({})", p),
DataType::Timestamp {
precision: None,
timezone: true,
} => "TIMESTAMPTZ".to_string(),
DataType::Timestamp {
precision: None,
timezone: false,
} => "TIMESTAMP".to_string(),
DataType::Json => "JSON".to_string(),
DataType::JsonB => "JSONB".to_string(),
DataType::Binary { length: Some(n) } => format!("BINARY({})", n),
DataType::Binary { length: None } => "BINARY".to_string(),
DataType::VarBinary { length: Some(n) } => format!("VARBINARY({})", n),
DataType::VarBinary { length: None } => "VARBINARY".to_string(),
DataType::String { length: Some(n) } => format!("STRING({})", n),
DataType::String { length: None } => "STRING".to_string(),
DataType::Array { element_type, .. } => {
format!("ARRAY({})", self.data_type_to_string(element_type))
}
DataType::Nullable { inner } => {
format!("Nullable({})", self.data_type_to_string(inner))
}
DataType::Custom { name } => name.clone(),
_ => format!("{:?}", dt),
}
}
/// Parse optional array dimensions after a type: [], [N], [N][M], ARRAY, ARRAY[N], etc.
fn maybe_parse_array_dimensions(&mut self, base_type: DataType) -> Result<DataType> {
let mut current_type = base_type;
// Handle PostgreSQL ARRAY keyword suffix: type ARRAY or type ARRAY[3]
if self.check_identifier("ARRAY") {
self.skip(); // consume ARRAY
// Check for optional dimension: ARRAY[N]
let dimension = if self.match_token(TokenType::LBracket) {
let dim = if self.check(TokenType::Number) {
let n = self.expect_number()? as u32;
Some(n)
} else {
None
};
self.expect(TokenType::RBracket)?;
dim
} else {
None
};
current_type = DataType::Array {
element_type: Box::new(current_type),
dimension,
};
}
// Handle bracket-based array dimensions: TYPE[], TYPE[N], TYPE[][N], etc.
while self.match_token(TokenType::LBracket) {
// Check for optional dimension: [N] or just []
let dimension = if self.check(TokenType::Number) {
let n = self.expect_number()? as u32;
Some(n)
} else {
None
};
self.expect(TokenType::RBracket)?;
current_type = DataType::Array {
element_type: Box::new(current_type),
dimension,
};
}
Ok(current_type)
}
/// Parse spatial type arguments like GEOMETRY(Point, 4326) or GEOGRAPHY
fn parse_spatial_type_args(&mut self) -> Result<(Option<String>, Option<u32>)> {
if self.match_token(TokenType::LParen) {
// First arg can be a subtype name (POINT, LINESTRING, etc.) or a numeric dimension
if self.check(TokenType::Number) {
// Numeric argument (e.g., ST_GEOMETRY(1) in Teradata)
let n = self.expect_number()? as u32;
self.expect(TokenType::RParen)?;
return Ok((None, Some(n)));
}
// Parse subtype
let subtype = Some(self.expect_identifier()?.to_ascii_uppercase());
// Parse optional SRID
let srid = if self.match_token(TokenType::Comma) {
Some(self.expect_number()? as u32)
} else {
None
};
self.expect(TokenType::RParen)?;
Ok((subtype, srid))
} else {
Ok((None, None))
}
}
/// Parse struct/row/union type fields: name TYPE, name TYPE, ...
/// `paren_style` indicates whether we're parsing parenthesized syntax (terminates at RParen)
/// or angle-bracket syntax (terminates at Gt/GtGt).
fn parse_struct_type_fields(&mut self, paren_style: bool) -> Result<Vec<StructField>> {
let mut fields = Vec::new();
// Check for empty field list
if (paren_style && self.check(TokenType::RParen))
|| (!paren_style && (self.check(TokenType::Gt) || self.check(TokenType::GtGt)))
{
return Ok(fields);
}
loop {
// Parse field name or just type (for anonymous struct fields)
// Track whether it was a quoted identifier to preserve quoting
let is_quoted = self.check(TokenType::QuotedIdentifier);
let first = self.expect_identifier_or_keyword()?;
let first_upper = first.to_ascii_uppercase();
// Check if this is a parametric type (ARRAY<T>, MAP<K,V>, STRUCT<...>, STRUCT(...))
let is_parametric_type = (first_upper == "ARRAY"
|| first_upper == "MAP"
|| first_upper == "STRUCT"
|| first_upper == "ROW")
&& (self.check(TokenType::Lt) || self.check(TokenType::LParen));
let (field_name, field_type) = if is_parametric_type {
// This is a parametric type as an anonymous field
let field_type = self.parse_data_type_from_name(&first_upper)?;
(String::new(), field_type)
} else if self.check(TokenType::Comma)
|| self.match_identifier("OPTIONS") // Check for OPTIONS (but don't consume yet)
|| (paren_style && self.check(TokenType::RParen))
|| (!paren_style && (self.check(TokenType::Gt) || self.check(TokenType::GtGt)))
{
// Check if we just matched OPTIONS - if so, retreat
if self.previous().text.eq_ignore_ascii_case("OPTIONS") {
self.current -= 1;
}
// Anonymous field: just a type name
let field_type = self.convert_name_to_type(&first)?;
(String::new(), field_type)
} else if self.is_identifier_token()
|| self.is_safe_keyword_as_identifier()
|| self.check(TokenType::Lt)
|| self.check(TokenType::LParen)
|| self.check(TokenType::Colon)
{
// Named field: fieldname TYPE (or fieldname: TYPE for Hive)
// Consume optional colon separator (Hive-style: `STRUCT<field_name: TYPE>`)
self.match_token(TokenType::Colon);
let field_type = self.parse_data_type()?;
// Preserve quoting for field names
let field_name = if is_quoted {
format!("\"{}\"", first)
} else {
first
};
(field_name, field_type)
} else {
// Just a type name
let field_type = self.convert_name_to_type(&first)?;
(String::new(), field_type)
};
// Spark/Databricks: Check for COMMENT clause on struct field
let comment = if self.match_token(TokenType::Comment) {
Some(self.expect_string()?)
} else {
None
};
// BigQuery: Check for OPTIONS clause on struct field
let options = if self.match_identifier("OPTIONS") {
self.parse_options_list()?
} else {
Vec::new()
};
fields.push(StructField::with_options_and_comment(
field_name, field_type, options, comment,
));
if !self.match_token(TokenType::Comma) {
break;
}
}
Ok(fields)
}
/// Parse a data type given a name that was already consumed
/// This is used for standalone type expressions like ARRAY<T>
fn parse_data_type_from_name(&mut self, name: &str) -> Result<DataType> {
match name {
"ARRAY" => {
if self.match_token(TokenType::Lt) {
let element_type = self.parse_data_type()?;
self.expect_gt()?;
Ok(DataType::Array {
element_type: Box::new(element_type),
dimension: None,
})
} else {
Ok(DataType::Custom {
name: "ARRAY".to_string(),
})
}
}
"MAP" => {
if self.match_token(TokenType::Lt) {
let key_type = self.parse_data_type()?;
self.expect(TokenType::Comma)?;
let value_type = self.parse_data_type()?;
self.expect_gt()?;
Ok(DataType::Map {
key_type: Box::new(key_type),
value_type: Box::new(value_type),
})
} else {
Ok(DataType::Custom {
name: "MAP".to_string(),
})
}
}
"STRUCT" => {
if self.match_token(TokenType::Lt) {
let fields = self.parse_struct_type_fields(false)?;
self.expect_gt()?;
Ok(DataType::Struct {
fields,
nested: false,
})
} else if self.match_token(TokenType::LParen) {
let fields = self.parse_struct_type_fields(true)?;
self.expect(TokenType::RParen)?;
Ok(DataType::Struct {
fields,
nested: true,
})
} else {
Ok(DataType::Custom {
name: "STRUCT".to_string(),
})
}
}
"ROW" => {
if self.match_token(TokenType::LParen) {
let fields = self.parse_struct_type_fields(true)?;
self.expect(TokenType::RParen)?;
Ok(DataType::Struct {
fields,
nested: true,
})
} else {
Ok(DataType::Custom {
name: "ROW".to_string(),
})
}
}
_ => Ok(DataType::Custom {
name: name.to_string(),
}),
}
}
/// Convert a type name string to a DataType
/// Used for anonymous struct fields where we have just a type name
fn convert_name_to_type(&self, name: &str) -> Result<DataType> {
let upper = name.to_ascii_uppercase();
Ok(match upper.as_str() {
"INT" => DataType::Int {
length: None,
integer_spelling: false,
},
"INTEGER" => DataType::Int {
length: None,
integer_spelling: true,
},
"BIGINT" => DataType::BigInt { length: None },
"SMALLINT" => DataType::SmallInt { length: None },
"TINYINT" => DataType::TinyInt { length: None },
"FLOAT" | "BINARY_FLOAT" => DataType::Float {
precision: None,
scale: None,
real_spelling: false,
},
"REAL" => DataType::Float {
precision: None,
scale: None,
real_spelling: true,
},
"DOUBLE" | "BINARY_DOUBLE" => DataType::Double {
precision: None,
scale: None,
},
"DECIMAL" | "NUMERIC" => DataType::Decimal {
precision: None,
scale: None,
},
"BOOLEAN" | "BOOL" => DataType::Boolean,
"CHAR" | "CHARACTER" | "NCHAR" => DataType::Char { length: None },
"VARCHAR" | "NVARCHAR" => DataType::VarChar {
length: None,
parenthesized_length: false,
},
"TEXT" | "STRING" | "NTEXT" => DataType::Text,
"DATE" => DataType::Date,
"TIME" => DataType::Time {
precision: None,
timezone: false,
},
"TIMETZ" => DataType::Time {
precision: None,
timezone: true,
},
"TIMESTAMP" => DataType::Timestamp {
precision: None,
timezone: false,
},
"INTERVAL" => DataType::Interval {
unit: None,
to: None,
},
"JSON" => DataType::Json,
"JSONB" => DataType::JsonB,
"UUID" => DataType::Uuid,
"BLOB" => DataType::Blob,
"BYTEA" => DataType::VarBinary { length: None },
"BINARY" => DataType::Binary { length: None },
"VARBINARY" => DataType::VarBinary { length: None },
"BIT" => DataType::Bit { length: None },
"VARBIT" => DataType::VarBit { length: None },
_ => DataType::Custom {
name: name.to_string(),
},
})
}
/// Parse star modifiers: EXCLUDE/EXCEPT, REPLACE, RENAME
/// Syntax varies by dialect:
/// - DuckDB: * EXCLUDE (col1, col2)
/// - BigQuery: * EXCEPT (col1, col2), * REPLACE (expr AS col)
/// - Snowflake: * EXCLUDE col, * RENAME (old AS new)
fn parse_star_modifiers(&mut self, table: Option<Identifier>) -> Result<Star> {
self.parse_star_modifiers_with_comments(table, Vec::new())
}
/// Parse star modifiers with explicit trailing comments from the star token
fn parse_star_modifiers_with_comments(
&mut self,
table: Option<Identifier>,
star_trailing_comments: Vec<String>,
) -> Result<Star> {
let mut except = None;
let mut replace = None;
let mut rename = None;
// Parse EXCLUDE / EXCEPT clause
if self.match_token(TokenType::Exclude) || self.match_token(TokenType::Except) {
// ClickHouse: EXCEPT STRICT col1, col2 (STRICT is optional modifier)
let _ = self.match_text_seq(&["STRICT"]);
let mut columns = Vec::new();
if self.match_token(TokenType::LParen) {
// EXCLUDE (col1, col2) or EXCEPT (A.COL_1, B.COL_2)
loop {
// ClickHouse: allow string literals in EXCEPT ('col_regex')
// and keywords like 'key', 'index' as column names
let col = if self.check(TokenType::String) {
self.advance().text
} else if self.is_safe_keyword_as_identifier() {
self.advance().text
} else {
self.expect_identifier()?
};
// Handle qualified column names like A.COL_1
if self.match_token(TokenType::Dot) {
let subcol = if self.is_safe_keyword_as_identifier() {
self.advance().text
} else {
self.expect_identifier()?
};
columns.push(Identifier::new(format!("{}.{}", col, subcol)));
} else {
columns.push(Identifier::new(col));
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
} else {
// EXCLUDE col (single column, Snowflake) or EXCEPT col1, col2 (ClickHouse)
// or EXCEPT 'regex' (ClickHouse)
loop {
let col = if self.check(TokenType::String) {
self.advance().text
} else if self.is_safe_keyword_as_identifier() {
self.advance().text
} else {
self.expect_identifier()?
};
columns.push(Identifier::new(col));
// ClickHouse allows comma-separated columns without parens: EXCEPT col1, col2
// But only if the next token after comma looks like a column name
if !matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) || !self.check(TokenType::Comma)
|| !matches!(
self.peek_nth(1).map(|t| t.token_type),
Some(TokenType::Identifier)
| Some(TokenType::QuotedIdentifier)
| Some(TokenType::Var)
| Some(TokenType::String)
)
{
break;
}
self.skip(); // consume comma
}
}
except = Some(columns);
}
// Parse REPLACE clause
if self.match_token(TokenType::Replace) {
// ClickHouse: REPLACE STRICT is optional modifier
let _ = self.match_text_seq(&["STRICT"]);
let mut replacements = Vec::new();
if self.match_token(TokenType::LParen) {
loop {
let expr = self.parse_expression()?;
self.expect(TokenType::As)?;
let alias = self.expect_identifier_or_keyword()?;
replacements.push(Alias::new(expr, Identifier::new(alias)));
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
} else if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
// ClickHouse: REPLACE [STRICT] expr AS name (single entry without parens)
// Multiple entries require parens: REPLACE(expr1 AS name1, expr2 AS name2)
let expr = self.parse_expression()?;
self.expect(TokenType::As)?;
let alias = self.expect_identifier_or_keyword()?;
replacements.push(Alias::new(expr, Identifier::new(alias)));
} else {
return Err(self.parse_error("Expected LParen after REPLACE"));
}
replace = Some(replacements);
}
// Parse RENAME clause (Snowflake)
if self.match_token(TokenType::Rename) {
let mut renames = Vec::new();
if self.match_token(TokenType::LParen) {
loop {
let old_name = self.expect_identifier()?;
self.expect(TokenType::As)?;
let new_name = self.expect_identifier()?;
renames.push((Identifier::new(old_name), Identifier::new(new_name)));
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
} else {
// Single rename without parens
let old_name = self.expect_identifier()?;
self.expect(TokenType::As)?;
let new_name = self.expect_identifier()?;
renames.push((Identifier::new(old_name), Identifier::new(new_name)));
}
rename = Some(renames);
}
Ok(Star {
table,
except,
replace,
rename,
trailing_comments: star_trailing_comments,
span: None,
})
}
// === Helper methods ===
/// Check if at end of tokens
#[inline]
fn is_at_end(&self) -> bool {
self.current >= self.tokens.len()
}
/// Check if current token is a query modifier keyword or end of input.
/// Used after GROUP BY ALL/DISTINCT to decide whether to parse expression lists.
fn is_at_query_modifier_or_end(&self) -> bool {
if self.is_at_end() {
return true;
}
matches!(
self.peek().token_type,
TokenType::Having
| TokenType::Qualify
| TokenType::Window
| TokenType::Order
| TokenType::Limit
| TokenType::Fetch
| TokenType::Offset
| TokenType::For
| TokenType::Lock
| TokenType::Union
| TokenType::Except
| TokenType::Intersect
| TokenType::RParen
| TokenType::Semicolon
| TokenType::Where
)
}
/// Create a parse error with position from the current token
fn parse_error(&self, message: impl Into<String>) -> Error {
let span = self.peek().span;
Error::parse(message, span.line, span.column, span.start, span.end)
}
/// Peek at current token
/// Returns reference to current token, or last token if at end
#[inline]
fn peek(&self) -> &Token {
if self.current >= self.tokens.len() {
// Return last token as fallback when at end
// In practice, callers should check is_at_end() before calling peek()
// but this prevents panic
self.tokens.last().expect("Token list should not be empty")
} else {
&self.tokens[self.current]
}
}
/// Look ahead by n positions (0 = current token)
fn peek_nth(&self, n: usize) -> Option<&Token> {
let idx = self.current + n;
if idx < self.tokens.len() {
Some(&self.tokens[idx])
} else {
None
}
}
/// Advance to next token
#[inline]
fn advance(&mut self) -> Token {
if self.current >= self.tokens.len() {
// Return last token as fallback if we're past the end
// In practice, callers should check is_at_end() before calling advance()
return self
.tokens
.last()
.cloned()
.expect("Token list should not be empty");
}
let token = self.tokens[self.current].clone();
self.current += 1;
token
}
/// Advance to next token without returning it (when result is unused)
#[inline]
fn skip(&mut self) {
if self.current < self.tokens.len() {
self.current += 1;
}
}
/// Get the previous token (last consumed)
fn previous(&self) -> &Token {
&self.tokens[self.current - 1]
}
/// Get trailing comments from the previous token
fn previous_trailing_comments(&self) -> &[String] {
if self.current > 0 {
&self.tokens[self.current - 1].trailing_comments
} else {
&[]
}
}
/// Get the token type of the previous token (the one before current).
fn previous_token_type(&self) -> Option<TokenType> {
if self.current > 0 {
Some(self.tokens[self.current - 1].token_type.clone())
} else {
None
}
}
/// Wrap a query expression in a Subquery node.
/// Only wraps if the expression is a query statement (Select, Union, etc.),
/// not for simple expressions like column references.
fn maybe_wrap_in_subquery(&self, inner: Expression) -> Expression {
if matches!(
&inner,
Expression::Select(_)
| Expression::Union(_)
| Expression::Intersect(_)
| Expression::Except(_)
) {
Expression::Subquery(Box::new(Subquery {
this: inner,
alias: None,
column_aliases: Vec::new(),
order_by: None,
limit: None,
offset: None,
distribute_by: None,
sort_by: None,
cluster_by: None,
lateral: false,
modifiers_inside: false,
trailing_comments: Vec::new(),
inferred_type: None,
}))
} else {
inner
}
}
/// Clear trailing_comments from the rightmost leaf of an expression tree.
/// Used by parse_and/parse_or to avoid comment duplication: when the same comment
/// is captured both in an expression's trailing_comments (during parse_primary) and
/// in a BinaryOp's operator_comments (during parse_and/parse_or), we clear the
/// expression's copy since the operator_comments position (after AND/OR) is correct.
fn clear_rightmost_trailing_comments(expr: &mut Expression) {
match expr {
Expression::Column(col) => col.trailing_comments.clear(),
Expression::And(op) | Expression::Or(op) => {
Self::clear_rightmost_trailing_comments(&mut op.right);
}
Expression::Not(op) => {
Self::clear_rightmost_trailing_comments(&mut op.this);
}
// For comparison ops, the rightmost is the right operand
Expression::Eq(op)
| Expression::Neq(op)
| Expression::Lt(op)
| Expression::Lte(op)
| Expression::Gt(op)
| Expression::Gte(op)
| Expression::Add(op)
| Expression::Sub(op)
| Expression::Mul(op)
| Expression::Div(op) => {
Self::clear_rightmost_trailing_comments(&mut op.right);
}
// For other expressions, trailing_comments might be stored differently
// We don't need to handle all variants, just the common ones that appear
// as operands in AND/OR expressions
_ => {}
}
}
/// Get leading comments from the current token (comments that appeared before it)
fn current_leading_comments(&self) -> &[String] {
if !self.is_at_end() {
&self.tokens[self.current].comments
} else {
&[]
}
}
/// Convert a slice of tokens to SQL string with proper quoting for strings
fn tokens_to_sql(&self, start: usize, end: usize) -> String {
let mut result = String::new();
let mut prev_line: Option<usize> = None;
let mut prev_end_offset: Option<usize> = None;
for t in &self.tokens[start..end] {
// Check if we moved to a new line (preserve original line structure)
let is_new_line = prev_line.is_some() && t.span.line > prev_line.unwrap();
// Use byte offsets to determine original spacing between tokens.
// This preserves the exact spacing from the source (e.g., TRANSFORM( vs OPTIONS ())
if is_new_line {
result.push('\n');
// Preserve original indentation
// span.column is the column AFTER the last character (1-based),
// so start column = span.column - text.chars().count()
let text_len = t.text.chars().count();
let start_col = t.span.column.saturating_sub(text_len);
// For string tokens, add 2 for the quotes that were stripped
let start_col = if t.token_type == TokenType::String {
start_col.saturating_sub(2)
} else {
start_col
};
let indent = if start_col > 1 { start_col - 1 } else { 0 };
for _ in 0..indent {
result.push(' ');
}
} else if !result.is_empty() {
// Same line: use byte offsets to detect if there was whitespace
let had_space = prev_end_offset.map_or(false, |prev_end| t.span.start > prev_end);
if had_space {
result.push(' ');
}
}
if t.token_type == TokenType::String {
// Re-add quotes around string literals
result.push('\'');
result.push_str(&t.text.replace('\'', "''"));
result.push('\'');
} else {
result.push_str(&t.text);
}
prev_line = Some(t.span.line);
prev_end_offset = Some(t.span.end);
}
result
}
/// Convert tokens to SQL for CREATE STAGE, normalizing FILE_FORMAT clause
/// Transforms FILE_FORMAT='value' to FILE_FORMAT=(FORMAT_NAME='value')
/// and FILE_FORMAT=schema.format to FILE_FORMAT=(FORMAT_NAME=schema.format)
fn tokens_to_sql_stage_format(&self, start: usize, end: usize) -> String {
let mut result = String::new();
let mut prev_token_type: Option<TokenType> = None;
let mut i = start;
while i < end {
let t = &self.tokens[i];
// Check for FILE_FORMAT= pattern that needs normalization
// FILE_FORMAT must be followed by = and then NOT by (
if (t.token_type == TokenType::Var || t.token_type == TokenType::Identifier)
&& t.text.eq_ignore_ascii_case("FILE_FORMAT")
&& i + 1 < end
&& self.tokens[i + 1].token_type == TokenType::Eq
&& (i + 2 >= end || self.tokens[i + 2].token_type != TokenType::LParen)
{
// Need to normalize: FILE_FORMAT=value -> FILE_FORMAT=(FORMAT_NAME=value)
if !result.is_empty() && prev_token_type != Some(TokenType::LParen) {
result.push(' ');
}
result.push_str("FILE_FORMAT=(FORMAT_NAME=");
// Skip FILE_FORMAT and =
i += 2;
// Collect the value (string literal or qualified identifier like schema.format)
while i < end {
let val = &self.tokens[i];
if val.token_type == TokenType::String {
// String literal: 'format1'
result.push('\'');
result.push_str(&val.text.replace('\'', "''"));
result.push('\'');
i += 1;
break;
} else if val.token_type == TokenType::Var
|| val.token_type == TokenType::Identifier
{
// Identifier: schema1 or format1
result.push_str(&val.text);
i += 1;
// Check for dot (qualified name)
if i < end && self.tokens[i].token_type == TokenType::Dot {
result.push('.');
i += 1;
// Expect identifier after dot
if i < end {
result.push_str(&self.tokens[i].text);
i += 1;
}
}
break;
} else {
break;
}
}
result.push(')');
prev_token_type = Some(TokenType::RParen);
continue;
}
// Normal token handling (same as tokens_to_sql)
let needs_space = !result.is_empty()
&& prev_token_type != Some(TokenType::LParen)
&& prev_token_type != Some(TokenType::Eq)
&& prev_token_type != Some(TokenType::Dot)
&& t.token_type != TokenType::Comma
&& t.token_type != TokenType::RParen
&& t.token_type != TokenType::LParen
&& t.token_type != TokenType::Eq
&& t.token_type != TokenType::Dot;
if needs_space {
result.push(' ');
}
if t.token_type == TokenType::String {
result.push('\'');
result.push_str(&t.text.replace('\'', "''"));
result.push('\'');
} else {
result.push_str(&t.text);
}
prev_token_type = Some(t.token_type);
i += 1;
}
result
}
/// Like tokens_to_sql but also uppercases keyword tokens and adds space after commas
fn tokens_to_sql_uppercased(&self, start: usize, end: usize) -> String {
let mut result = String::new();
let mut prev_token_type: Option<TokenType> = None;
let mut prev_token_text: Option<String> = None;
for t in &self.tokens[start..end] {
// Smart spacing: no space before comma, ), . or after (, .
// Add space before ( only when preceded by a structural keyword or identifier
// (e.g., "PRIMARY KEY (Id)", "CLUSTERED (EmpID)")
// but NOT after data type keywords (e.g., "VARCHAR(100)", "INT(11)")
let is_lparen_after_keyword = t.token_type == TokenType::LParen
&& prev_token_type.map_or(false, |p: TokenType| {
// Only add space for structural SQL keywords, not data type keywords
match p {
TokenType::PrimaryKey | TokenType::ForeignKey | TokenType::Unique
| TokenType::Check | TokenType::Index | TokenType::Key
| TokenType::Constraint | TokenType::References
| TokenType::Not | TokenType::Null
| TokenType::Default | TokenType::Values | TokenType::In
| TokenType::Exists | TokenType::Select | TokenType::From
| TokenType::Where | TokenType::Having | TokenType::Using
| TokenType::On | TokenType::Set | TokenType::Into
| TokenType::Table | TokenType::View | TokenType::Create
| TokenType::Insert | TokenType::Update | TokenType::Delete
| TokenType::Join | TokenType::Left | TokenType::Right
| TokenType::Inner | TokenType::Outer | TokenType::Full
| TokenType::Cross | TokenType::Case | TokenType::When
| TokenType::Then | TokenType::Else | TokenType::End
| TokenType::If | TokenType::Partition | TokenType::Over
| TokenType::Between | TokenType::Like | TokenType::Replace
| TokenType::Grant | TokenType::Revoke
=> true,
_ => false,
}
})
// For Var/Identifier tokens, add space before ( only for structural tokens
// (CLUSTERED, NONCLUSTERED, INDEX) but not data types (VARCHAR, INT, etc.)
|| (t.token_type == TokenType::LParen
&& prev_token_text.as_ref().map_or(false, |text| {
let upper = text.to_ascii_uppercase();
matches!(upper.as_str(),
"CLUSTERED" | "NONCLUSTERED" | "HASH" | "RANGE"
| "INCLUDE" | "FILLFACTOR" | "PAD_INDEX"
)
}));
let needs_space = !result.is_empty()
&& prev_token_type != Some(TokenType::LParen)
&& prev_token_type != Some(TokenType::Dot)
&& t.token_type != TokenType::Comma
&& t.token_type != TokenType::RParen
&& t.token_type != TokenType::Dot
&& (t.token_type != TokenType::LParen || is_lparen_after_keyword);
// Add space after comma
if prev_token_type == Some(TokenType::Comma) {
result.push(' ');
} else if needs_space {
result.push(' ');
}
if t.token_type == TokenType::String {
// Re-add quotes around string literals
result.push('\'');
result.push_str(&t.text.replace('\'', "''"));
result.push('\'');
} else if t.token_type.is_keyword() {
// Uppercase keyword tokens
result.push_str(&t.text.to_ascii_uppercase());
} else {
// For non-keyword tokens, preserve original text
result.push_str(&t.text);
}
prev_token_type = Some(t.token_type);
prev_token_text = Some(t.text.clone());
}
result
}
/// Check if current token matches type
#[inline]
fn check(&self, token_type: TokenType) -> bool {
if self.is_at_end() {
false
} else {
self.peek().token_type == token_type
}
}
/// Check if current token is a keyword
fn check_keyword(&self) -> bool {
if self.is_at_end() {
false
} else {
self.peek().token_type.is_keyword()
}
}
/// Check if current UNPIVOT token starts an UNPIVOT clause (vs being an alias).
/// UNPIVOT clause starts with: UNPIVOT(, UNPIVOT INCLUDE, or UNPIVOT EXCLUDE
fn is_unpivot_clause_start(&self) -> bool {
if !self.check(TokenType::Unpivot) {
return false;
}
let next_idx = self.current + 1;
if next_idx >= self.tokens.len() {
return false;
}
let next = &self.tokens[next_idx];
if next.token_type == TokenType::LParen {
return true;
}
// UNPIVOT INCLUDE NULLS (...) or UNPIVOT EXCLUDE NULLS (...)
let next_text = next.text.to_ascii_uppercase();
next_text == "INCLUDE" || next_text == "EXCLUDE"
}
/// Check if current token text matches (case-insensitive), does not advance
fn check_keyword_text(&self, keyword: &str) -> bool {
if self.is_at_end() {
false
} else {
self.peek().text.eq_ignore_ascii_case(keyword)
}
}
/// Check if current token is FROM keyword
fn check_from_keyword(&self) -> bool {
self.check(TokenType::From)
}
/// Check if next token matches type
fn check_next(&self, token_type: TokenType) -> bool {
if self.current + 1 >= self.tokens.len() {
false
} else {
self.tokens[self.current + 1].token_type == token_type
}
}
/// Check if next token is an identifier with specific name (case-insensitive)
fn check_next_identifier(&self, name: &str) -> bool {
if self.current + 1 >= self.tokens.len() {
false
} else {
let token = &self.tokens[self.current + 1];
(token.token_type == TokenType::Var || token.token_type == TokenType::Identifier)
&& token.text.eq_ignore_ascii_case(name)
}
}
/// Match an identifier with specific text (case insensitive)
/// Checks for Identifier, Var, and QuotedIdentifier tokens
fn match_identifier(&mut self, text: &str) -> bool {
if (self.check(TokenType::Identifier)
|| self.check(TokenType::Var)
|| self.check(TokenType::QuotedIdentifier))
&& self.peek().text.eq_ignore_ascii_case(text)
{
self.skip();
true
} else {
false
}
}
/// Check if current token is an identifier with specific text (case insensitive)
/// Does NOT advance the parser
fn check_identifier(&self, text: &str) -> bool {
if self.is_at_end() {
return false;
}
(self.check(TokenType::Identifier)
|| self.check(TokenType::Var)
|| self.check(TokenType::QuotedIdentifier))
&& self.peek().text.eq_ignore_ascii_case(text)
}
/// Check if current token is a "safe" keyword that can be used as an identifier.
/// Check if the current Percent token is a PERCENT modifier (not a modulo operator).
/// "PERCENT" spelled out is always a modifier. "%" is a modifier when followed by
/// a clause boundary (OFFSET, end of input, semicolon, RParen, comma, etc.)
fn is_percent_modifier(&self) -> bool {
if self.is_at_end() {
return false;
}
if self.peek().text.eq_ignore_ascii_case("PERCENT") {
return true;
}
// "%" symbol — only treat as PERCENT modifier if followed by a boundary
if self.peek().text == "%" {
let next_idx = self.current + 1;
if next_idx >= self.tokens.len() {
return true; // at end — it's PERCENT
}
let next_type = self.tokens[next_idx].token_type;
return matches!(
next_type,
TokenType::Offset
| TokenType::Semicolon
| TokenType::RParen
| TokenType::From
| TokenType::Where
| TokenType::GroupBy
| TokenType::OrderBy
| TokenType::Having
| TokenType::Union
| TokenType::Intersect
| TokenType::Except
| TokenType::Comma
| TokenType::With // WITH TIES
) || next_idx >= self.tokens.len();
}
false
}
/// Structural keywords like FROM, WHERE, JOIN, SELECT are NOT safe.
/// Non-structural keywords like FILTER, UPDATE, END, VALUES can be used as identifiers.
fn is_safe_keyword_as_identifier(&self) -> bool {
if self.is_at_end() {
return false;
}
let token_type = self.peek().token_type;
// Structural keywords that should NOT be used as identifiers
let is_structural = matches!(
token_type,
TokenType::From
| TokenType::Where
| TokenType::Select
| TokenType::Insert
| TokenType::Delete
| TokenType::Create
| TokenType::Drop
| TokenType::Alter
| TokenType::Join
| TokenType::Inner
| TokenType::Cross
| TokenType::On
| TokenType::GroupBy
| TokenType::OrderBy
| TokenType::Having
| TokenType::With
| TokenType::Union
| TokenType::Intersect
| TokenType::Except
| TokenType::Qualify
| TokenType::Into
| TokenType::Set
| TokenType::Using
| TokenType::Lateral
| TokenType::Natural
);
// ClickHouse allows many SQL keywords as identifiers (table names, column aliases, etc.)
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
let is_ch_structural = matches!(
token_type,
TokenType::From
| TokenType::Where
| TokenType::Select
| TokenType::Create
| TokenType::Drop
| TokenType::Alter
| TokenType::On
| TokenType::GroupBy
| TokenType::OrderBy
| TokenType::Having
| TokenType::With
| TokenType::Union
| TokenType::Intersect
| TokenType::Except
| TokenType::Into
| TokenType::Using
| TokenType::Lateral
| TokenType::Natural
);
// Also allow certain operator tokens and non-keyword tokens as identifiers
if matches!(token_type, TokenType::RLike | TokenType::Values) {
return true;
}
return self.peek().token_type.is_keyword() && !is_ch_structural;
}
// If it's a keyword but NOT structural, it's safe to use as identifier
self.peek().token_type.is_keyword() && !is_structural
}
/// Check if a token at current position is the last meaningful token in an expression context.
/// This is used to detect when a keyword like IS or KEEP should be treated as an alias
/// instead of an operator keyword.
fn is_last_expression_token(&self, _token_type: TokenType) -> bool {
// Check if the token after the current one is end-of-input or a clause boundary
let next_idx = self.current + 1;
if next_idx >= self.tokens.len() {
return true; // at end of input
}
let next_type = self.tokens[next_idx].token_type;
// Clause boundaries that indicate the current token is the last in the expression
matches!(
next_type,
TokenType::From
| TokenType::Where
| TokenType::GroupBy
| TokenType::OrderBy
| TokenType::Having
| TokenType::Limit
| TokenType::Union
| TokenType::Intersect
| TokenType::Except
| TokenType::Semicolon
| TokenType::RParen
| TokenType::Comma
)
}
/// Check if current token is a type keyword (for lambda type annotations)
fn is_type_keyword(&self) -> bool {
if self.is_at_end() {
return false;
}
let token = self.peek();
// Check for common type keywords that might appear in lambda annotations
// Use text comparison to avoid depending on specific TokenType variants
let text_upper = token.text.to_ascii_uppercase();
matches!(
text_upper.as_str(),
"INT"
| "INTEGER"
| "BIGINT"
| "SMALLINT"
| "TINYINT"
| "DOUBLE"
| "FLOAT"
| "DECIMAL"
| "NUMERIC"
| "REAL"
| "VARCHAR"
| "CHAR"
| "TEXT"
| "STRING"
| "NVARCHAR"
| "NCHAR"
| "BOOLEAN"
| "BOOL"
| "DATE"
| "TIME"
| "TIMESTAMP"
| "DATETIME"
| "INTERVAL"
| "BINARY"
| "VARBINARY"
| "BLOB"
| "ARRAY"
| "MAP"
| "STRUCT"
| "OBJECT"
| "VARIANT"
| "JSON"
| "NUMBER"
| "VARCHAR2"
)
}
/// Check if current token is a command keyword that can safely be used as an implicit alias.
/// This is a narrow set of command-like keywords (GET, PUT, COPY, SHOW, etc.) that are
/// unlikely to conflict with SQL clause keywords when used as implicit aliases.
fn is_command_keyword_as_alias(&self) -> bool {
if self.is_at_end() {
return false;
}
let token_type = self.peek().token_type;
// FORMAT is a query modifier in ClickHouse, so don't treat it as an alias there
if matches!(token_type, TokenType::Format) {
return !matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
);
}
// Base keywords that can be aliases in all dialects
if matches!(
token_type,
TokenType::Get
| TokenType::Put
| TokenType::Copy
| TokenType::Show
| TokenType::Rename
| TokenType::Enum
| TokenType::Sample
| TokenType::Collate
| TokenType::Add
) {
return true;
}
// Spark/Hive allow LIMIT and OFFSET as aliases (without quoting),
// but only when NOT followed by a number/expression (which means it's the actual clause)
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Spark)
| Some(crate::dialects::DialectType::Hive)
| Some(crate::dialects::DialectType::Databricks)
) && matches!(token_type, TokenType::Limit | TokenType::Offset)
{
let next = self.current + 1;
let next_is_value = next < self.tokens.len()
&& matches!(
self.tokens[next].token_type,
TokenType::Number
| TokenType::LParen
| TokenType::Var
| TokenType::Parameter
| TokenType::All
);
if !next_is_value {
return true;
}
}
false
}
/// Check if current token is a keyword that can be used as a table alias.
/// This is more permissive than is_safe_keyword_as_identifier - it allows
/// LEFT, RIGHT, OUTER, FULL which are JOIN keywords but can also be aliases.
fn can_be_alias_keyword(&self) -> bool {
if self.is_at_end() {
return false;
}
let token_type = self.peek().token_type;
// Keywords that can be used as aliases (similar to is_safe_keyword but more permissive)
matches!(
token_type,
TokenType::Left
| TokenType::Right
| TokenType::Outer
| TokenType::Full
| TokenType::Only
| TokenType::Next
| TokenType::All
| TokenType::If
) || self.is_safe_keyword_as_identifier()
}
/// Match and consume a token type
fn match_token(&mut self, token_type: TokenType) -> bool {
if self.check(token_type) {
self.skip();
true
} else {
false
}
}
/// Match a sequence of keywords
fn match_keywords(&mut self, keywords: &[TokenType]) -> bool {
// Check if all keywords match
for (i, &kw) in keywords.iter().enumerate() {
if self.current + i >= self.tokens.len() {
return false;
}
if self.tokens[self.current + i].token_type != kw {
return false;
}
}
// Consume all matched keywords
self.current += keywords.len();
true
}
/// Expect a specific token type
fn expect(&mut self, token_type: TokenType) -> Result<Token> {
if self.check(token_type) {
Ok(self.advance())
} else {
let got = if self.is_at_end() {
"end of input".to_string()
} else {
format!("{:?}", self.peek().token_type)
};
let got_text = if self.is_at_end() {
"".to_string()
} else {
self.peek().text.clone()
};
let start = self.current.saturating_sub(3);
let end = (self.current + 4).min(self.tokens.len());
let context = self.tokens_to_sql(start, end).replace('\n', " ");
Err(self.parse_error(format!(
"Expected {:?}, got {} ('{}') near [{}]",
token_type, got, got_text, context
)))
}
}
/// Expect a `>` token, handling the case where `>>` was tokenized as GtGt
/// This is needed for parsing nested generic types like `ARRAY<ARRAY<INT>>`
fn expect_gt(&mut self) -> Result<Token> {
if self.check(TokenType::Gt) {
Ok(self.advance())
} else if self.check(TokenType::GtGt) {
// Split >> into two > tokens
// Replace the GtGt with Gt and return a synthetic Gt token
let token = self.peek().clone();
self.tokens[self.current] = Token {
token_type: TokenType::Gt,
text: ">".to_string(),
span: Span {
start: token.span.start + 1,
end: token.span.end,
line: token.span.line,
column: token.span.column + 1,
},
comments: Vec::new(),
trailing_comments: Vec::new(),
};
Ok(Token {
token_type: TokenType::Gt,
text: ">".to_string(),
span: Span {
start: token.span.start,
end: token.span.start + 1,
line: token.span.line,
column: token.span.column,
},
comments: token.comments,
trailing_comments: Vec::new(),
})
} else {
Err(self.parse_error(format!(
"Expected Gt, got {:?}",
if self.is_at_end() {
"end of input".to_string()
} else {
format!("{:?}", self.peek().token_type)
}
)))
}
}
/// Expect a string literal and return its value
fn expect_string(&mut self) -> Result<String> {
if self.check(TokenType::String) || self.check(TokenType::DollarString) {
Ok(self.advance().text)
} else {
Err(self.parse_error(format!(
"Expected string, got {:?}",
if self.is_at_end() {
"end of input".to_string()
} else {
format!("{:?}", self.peek().token_type)
}
)))
}
}
/// Check if the current token is any kind of identifier (regular, quoted, or var)
fn is_identifier_token(&self) -> bool {
self.check(TokenType::Var)
|| self.check(TokenType::Identifier)
|| self.check(TokenType::QuotedIdentifier)
}
/// Check if current token is a stage reference (starts with @)
/// This handles both DAt token and Var tokens that start with @
fn is_stage_reference(&self) -> bool {
self.check(TokenType::DAt)
|| (self.check(TokenType::Var) && self.peek().text.starts_with('@'))
}
/// Check if the current token could be a MySQL numeric-starting identifier (e.g., 00f, 1d)
/// This checks that the Number token is followed by a connected Var/Identifier token
fn is_mysql_numeric_identifier(&self) -> bool {
if !self.check(TokenType::Number)
|| !matches!(
self.config.dialect,
Some(crate::dialects::DialectType::MySQL)
)
{
return false;
}
// Check if the next token is connected (no space) and is a var/identifier
if self.current + 1 < self.tokens.len() {
let curr = &self.tokens[self.current];
let next = &self.tokens[self.current + 1];
// Tokens are connected if they are immediately adjacent (no whitespace between)
// span.end is exclusive, so if curr.end == next.start, they are adjacent
let connected = curr.span.end == next.span.start;
connected
&& (next.token_type == TokenType::Var || next.token_type == TokenType::Identifier)
} else {
false
}
}
/// Parse a MySQL numeric-starting identifier (e.g., 00f, 1d)
/// Merges the number token with connected identifier tokens
fn parse_mysql_numeric_identifier(&mut self) -> Identifier {
let num_token = self.advance();
let mut name = num_token.text.clone();
// Merge with connected identifier/var tokens
while !self.is_at_end()
&& self.is_connected()
&& (self.check(TokenType::Var) || self.check(TokenType::Identifier))
{
let tok = self.advance();
name.push_str(&tok.text);
}
Identifier {
name,
// sqlglot treats this as an identifier token and re-emits it quoted.
quoted: true,
trailing_comments: Vec::new(),
span: None,
}
}
/// Check if an uppercase string starting with '_' is a MySQL charset introducer
fn is_mysql_charset_introducer(text: &str) -> bool {
matches!(
text,
"_ARMSCII8"
| "_ASCII"
| "_BIG5"
| "_BINARY"
| "_CP1250"
| "_CP1251"
| "_CP1256"
| "_CP1257"
| "_CP850"
| "_CP852"
| "_CP866"
| "_CP932"
| "_DEC8"
| "_EUCJPMS"
| "_EUCKR"
| "_GB18030"
| "_GB2312"
| "_GBK"
| "_GEOSTD8"
| "_GREEK"
| "_HEBREW"
| "_HP8"
| "_KEYBCS2"
| "_KOI8R"
| "_KOI8U"
| "_LATIN1"
| "_LATIN2"
| "_LATIN5"
| "_LATIN7"
| "_MACCE"
| "_MACROMAN"
| "_SJIS"
| "_SWE7"
| "_TIS620"
| "_UCS2"
| "_UJIS"
| "_UTF8"
| "_UTF16"
| "_UTF16LE"
| "_UTF32"
| "_UTF8MB3"
| "_UTF8MB4"
)
}
/// Check if the current token can be used as an identifier (includes keywords)
fn is_identifier_or_keyword_token(&self) -> bool {
self.is_identifier_token() || self.check_keyword()
}
/// Expect an identifier and return an Identifier struct with quoted flag
fn expect_identifier_with_quoted(&mut self) -> Result<Identifier> {
if self.is_mysql_numeric_identifier() {
return Ok(self.parse_mysql_numeric_identifier());
}
if self.is_identifier_token() {
let token = self.advance();
let quoted = token.token_type == TokenType::QuotedIdentifier;
Ok(Identifier {
name: token.text,
quoted,
trailing_comments: Vec::new(),
span: None,
})
} else if self.check(TokenType::LBrace)
&& matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
)
{
if let Some(param_expr) = self.parse_clickhouse_braced_parameter()? {
if let Expression::Parameter(param) = ¶m_expr {
let name = format!(
"{{{}: {}}}",
param.name.as_deref().unwrap_or(""),
param.expression.as_deref().unwrap_or("")
);
return Ok(Identifier {
name,
quoted: false,
trailing_comments: Vec::new(),
span: None,
});
}
}
Err(self.parse_error("Expected identifier, got LBrace"))
} else {
Err(self.parse_error(format!(
"Expected identifier, got {:?}",
if self.is_at_end() {
"end of input".to_string()
} else {
format!("{:?}", self.peek().token_type)
}
)))
}
}
/// Parse a possibly dot-qualified identifier into parts (e.g. "mydb.hr" → [mydb, hr]).
fn parse_identifier_parts(&mut self) -> Result<Vec<Identifier>> {
let first = self.expect_identifier_with_quoted()?;
let mut parts = vec![first];
while self.match_token(TokenType::Dot) {
parts.push(self.expect_identifier_with_quoted()?);
}
Ok(parts)
}
/// Expect an identifier or keyword (for column names, field names, etc.)
fn expect_identifier_or_keyword_with_quoted(&mut self) -> Result<Identifier> {
// MySQL numeric-starting identifiers (e.g., 00f, 1d)
if self.is_mysql_numeric_identifier() {
return Ok(self.parse_mysql_numeric_identifier());
}
// Also accept ? (Parameter) as an identifier placeholder
// For positional parameters like $23, the token text is "23" (without $)
if self.check(TokenType::Parameter) {
let token = self.advance();
// If the text is a number, it's a positional parameter like $1, $2, $23
// Construct $N as the identifier name
let name = if token.text.chars().all(|c| c.is_ascii_digit()) && !token.text.is_empty() {
format!("${}", token.text)
} else {
// Plain ? placeholder or other parameter
"?".to_string()
};
return Ok(Identifier {
name,
quoted: false,
trailing_comments: Vec::new(),
span: None,
});
}
if self.is_identifier_or_keyword_token() {
let token = self.advance();
let quoted = token.token_type == TokenType::QuotedIdentifier;
Ok(Identifier {
name: token.text,
quoted,
trailing_comments: Vec::new(),
span: None,
})
} else if self.check(TokenType::LBrace)
&& matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
)
{
// ClickHouse query parameter: {name:Type}
if let Some(param_expr) = self.parse_clickhouse_braced_parameter()? {
// Extract the parameter name to use as the identifier
if let Expression::Parameter(param) = ¶m_expr {
let name = format!(
"{{{}: {}}}",
param.name.as_deref().unwrap_or(""),
param.expression.as_deref().unwrap_or("")
);
return Ok(Identifier {
name,
quoted: false,
trailing_comments: Vec::new(),
span: None,
});
}
}
Err(self.parse_error("Expected identifier, got LBrace"))
} else {
Err(self.parse_error(format!(
"Expected identifier, got {:?}",
if self.is_at_end() {
"end of input".to_string()
} else {
format!("{:?}", self.peek().token_type)
}
)))
}
}
/// Expect an identifier
fn expect_identifier(&mut self) -> Result<String> {
if self.is_identifier_token() {
Ok(self.advance().text)
} else if self.check(TokenType::LBrace)
&& matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
)
{
if let Some(param_expr) = self.parse_clickhouse_braced_parameter()? {
if let Expression::Parameter(param) = ¶m_expr {
return Ok(format!(
"{{{}: {}}}",
param.name.as_deref().unwrap_or(""),
param.expression.as_deref().unwrap_or("")
));
}
}
Err(self.parse_error("Expected identifier, got LBrace"))
} else {
Err(self.parse_error(format!(
"Expected identifier, got {:?}",
if self.is_at_end() {
"end of input".to_string()
} else {
format!("{:?}", self.peek().token_type)
}
)))
}
}
/// Expect an identifier or keyword (for aliases, column names, etc.)
fn expect_identifier_or_keyword(&mut self) -> Result<String> {
if self.is_identifier_or_keyword_token() {
Ok(self.advance().text)
} else if self.check(TokenType::LBrace)
&& matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
)
{
if let Some(param_expr) = self.parse_clickhouse_braced_parameter()? {
if let Expression::Parameter(param) = ¶m_expr {
return Ok(format!(
"{{{}: {}}}",
param.name.as_deref().unwrap_or(""),
param.expression.as_deref().unwrap_or("")
));
}
}
Err(self.parse_error("Expected identifier, got LBrace"))
} else {
Err(self.parse_error(format!(
"Expected identifier, got {:?}",
if self.is_at_end() {
"end of input".to_string()
} else {
format!("{:?}", self.peek().token_type)
}
)))
}
}
/// Expect an identifier or safe keyword (for CTE names, column names in CREATE TABLE, etc.)
/// This is more permissive than expect_identifier but excludes structural keywords
fn expect_identifier_or_safe_keyword(&mut self) -> Result<String> {
if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
Ok(self.advance().text)
} else if self.check(TokenType::LBrace)
&& matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
)
{
if let Some(param_expr) = self.parse_clickhouse_braced_parameter()? {
if let Expression::Parameter(param) = ¶m_expr {
return Ok(format!(
"{{{}: {}}}",
param.name.as_deref().unwrap_or(""),
param.expression.as_deref().unwrap_or("")
));
}
}
Err(self.parse_error("Expected identifier, got LBrace"))
} else {
Err(self.parse_error(format!(
"Expected identifier, got {:?}",
if self.is_at_end() {
"end of input".to_string()
} else {
format!("{:?}", self.peek().token_type)
}
)))
}
}
/// Expect an identifier or safe keyword, preserving quoted flag
fn expect_identifier_or_safe_keyword_with_quoted(&mut self) -> Result<Identifier> {
if self.is_mysql_numeric_identifier() {
return Ok(self.parse_mysql_numeric_identifier());
}
if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
let token = self.advance();
let quoted = token.token_type == TokenType::QuotedIdentifier;
Ok(Identifier {
name: token.text,
quoted,
trailing_comments: Vec::new(),
span: None,
})
} else {
Err(self.parse_error(format!(
"Expected identifier, got {:?}",
if self.is_at_end() {
"end of input".to_string()
} else {
format!("{:?}", self.peek().token_type)
}
)))
}
}
fn expect_identifier_or_alias_keyword_with_quoted(&mut self) -> Result<Identifier> {
// ClickHouse: any keyword can be used as a table alias after explicit AS
let ch_keyword = matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.peek().token_type.is_keyword();
if self.is_identifier_token()
|| self.can_be_alias_keyword()
|| self.is_safe_keyword_as_identifier()
|| ch_keyword
{
let token = self.advance();
let quoted = token.token_type == TokenType::QuotedIdentifier;
Ok(Identifier {
name: token.text,
quoted,
trailing_comments: Vec::new(),
span: None,
})
} else if self.check(TokenType::String)
&& matches!(
self.config.dialect,
Some(crate::dialects::DialectType::DuckDB)
)
{
// DuckDB allows string literals as identifiers (e.g., WITH 'x' AS (...))
let token = self.advance();
Ok(Identifier {
name: token.text,
quoted: true,
trailing_comments: Vec::new(),
span: None,
})
} else {
Err(self.parse_error(format!(
"Expected identifier, got {:?}",
if self.is_at_end() {
"end of input".to_string()
} else {
format!("{:?}", self.peek().token_type)
}
)))
}
}
/// Expect a number
fn expect_number(&mut self) -> Result<i64> {
let negative = self.match_token(TokenType::Dash);
if self.check(TokenType::Number) {
let text = self.advance().text;
let val = text
.parse::<i64>()
.map_err(|_| self.parse_error(format!("Invalid number: {}", text)))?;
Ok(if negative { -val } else { val })
} else {
Err(self.parse_error("Expected number"))
}
}
/// Parse a comma-separated list of expressions.
/// Supports named arguments with => or := syntax.
fn parse_expression_list_with_capacity(
&mut self,
capacity_hint: usize,
) -> Result<Vec<Expression>> {
let mut expressions = Vec::with_capacity(capacity_hint);
loop {
// Check if this is a named argument: identifier => value or identifier := value
// Also check for safe keywords (like TYPE, FORMAT, etc.) that can be used as named arg names
let expr = if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
let start_pos = self.current;
let name = self.expect_identifier_or_keyword_with_quoted()?;
if self.match_token(TokenType::FArrow) {
// name => value
let value = self.parse_expression()?;
Expression::NamedArgument(Box::new(NamedArgument {
name,
value,
separator: NamedArgSeparator::DArrow,
}))
} else if self.match_token(TokenType::ColonEq) {
// name := value
let value = self.parse_expression()?;
Expression::NamedArgument(Box::new(NamedArgument {
name,
value,
separator: NamedArgSeparator::ColonEq,
}))
} else {
// Not a named argument, backtrack and parse as regular expression
self.current = start_pos;
self.parse_expression()?
}
} else {
self.parse_expression()?
};
// Check for AS alias on this expression (Spark/Hive: IF(cond, val AS name, ...))
let expr = if self.check(TokenType::As) {
let as_pos = self.current;
self.skip(); // consume AS
// Check if what follows looks like an alias name
if self.is_identifier_token()
|| self.is_safe_keyword_as_identifier()
|| (matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.peek().token_type.is_keyword())
{
let alias = self.expect_identifier_or_keyword_with_quoted()?;
let alias_expr = Expression::Alias(Box::new(Alias {
this: expr,
alias,
column_aliases: Vec::new(),
pre_alias_comments: Vec::new(),
trailing_comments: Vec::new(),
inferred_type: None,
}));
// ClickHouse: if followed by an operator, the alias is part of a bigger expression
// e.g., blockSize() AS bs < 1000 means (blockSize() AS bs) < 1000
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && matches!(
self.peek().token_type,
TokenType::Lt
| TokenType::Gt
| TokenType::Lte
| TokenType::Gte
| TokenType::Eq
| TokenType::Neq
| TokenType::Plus
| TokenType::Dash
| TokenType::Star
| TokenType::Slash
| TokenType::Percent
| TokenType::And
| TokenType::Or
| TokenType::Like
| TokenType::Not
| TokenType::In
| TokenType::Is
| TokenType::Between
) {
// Parse the operator and right-hand side
let op_token = self.advance();
let right = self.parse_expression()?;
match op_token.token_type {
TokenType::Lt => {
Expression::Lt(Box::new(BinaryOp::new(alias_expr, right)))
}
TokenType::Gt => {
Expression::Gt(Box::new(BinaryOp::new(alias_expr, right)))
}
TokenType::Lte => {
Expression::Lte(Box::new(BinaryOp::new(alias_expr, right)))
}
TokenType::Gte => {
Expression::Gte(Box::new(BinaryOp::new(alias_expr, right)))
}
TokenType::Eq => {
Expression::Eq(Box::new(BinaryOp::new(alias_expr, right)))
}
TokenType::Neq => {
Expression::Neq(Box::new(BinaryOp::new(alias_expr, right)))
}
TokenType::Plus => {
Expression::Add(Box::new(BinaryOp::new(alias_expr, right)))
}
TokenType::Dash => {
Expression::Sub(Box::new(BinaryOp::new(alias_expr, right)))
}
TokenType::Star => {
Expression::Mul(Box::new(BinaryOp::new(alias_expr, right)))
}
TokenType::Slash => {
Expression::Div(Box::new(BinaryOp::new(alias_expr, right)))
}
TokenType::Percent => {
Expression::Mod(Box::new(BinaryOp::new(alias_expr, right)))
}
TokenType::And => {
Expression::And(Box::new(BinaryOp::new(alias_expr, right)))
}
TokenType::Or => {
Expression::Or(Box::new(BinaryOp::new(alias_expr, right)))
}
_ => alias_expr, // fallback, shouldn't happen
}
} else {
alias_expr
}
} else {
// Not an alias name, backtrack
self.current = as_pos;
expr
}
} else {
expr
};
// Check for trailing comments on this expression
// Only wrap in Annotated for expression types that don't have their own trailing_comments field
let trailing_comments = self.previous_trailing_comments().to_vec();
let expr = if trailing_comments.is_empty() {
expr
} else {
// Only annotate Literals and other types that don't capture trailing comments
match &expr {
Expression::Literal(_) | Expression::Boolean(_) | Expression::Null(_) => {
Expression::Annotated(Box::new(Annotated {
this: expr,
trailing_comments,
}))
}
// For expressions that already capture trailing_comments, don't double-wrap
_ => expr,
}
};
expressions.push(expr);
if !self.match_token(TokenType::Comma) {
break;
}
// ClickHouse: allow trailing comma before RParen in expression lists
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::RParen)
{
break;
}
}
Ok(expressions)
}
/// Parse a comma-separated list of expressions.
/// Supports named arguments with => or := syntax.
fn parse_expression_list(&mut self) -> Result<Vec<Expression>> {
self.parse_expression_list_with_capacity(0)
}
/// Estimate top-level expression count until the next unmatched `)`.
///
/// This is used for pre-allocating comma-separated lists like `IN (...)`
/// to reduce `Vec` growth churn on very large lists.
fn estimate_expression_list_capacity_until_rparen(&self) -> usize {
if self.current >= self.tokens.len() || self.check(TokenType::RParen) {
return 0;
}
let mut idx = self.current;
let mut paren_depth = 0usize;
let mut bracket_depth = 0usize;
let mut brace_depth = 0usize;
let mut commas = 0usize;
let mut has_any_token = false;
while idx < self.tokens.len() {
let token_type = self.tokens[idx].token_type;
match token_type {
TokenType::LParen => paren_depth += 1,
TokenType::RParen => {
if paren_depth == 0 && bracket_depth == 0 && brace_depth == 0 {
break;
}
paren_depth = paren_depth.saturating_sub(1);
}
TokenType::LBracket => bracket_depth += 1,
TokenType::RBracket => bracket_depth = bracket_depth.saturating_sub(1),
TokenType::LBrace => brace_depth += 1,
TokenType::RBrace => brace_depth = brace_depth.saturating_sub(1),
TokenType::Comma if paren_depth == 0 && bracket_depth == 0 && brace_depth == 0 => {
commas += 1;
}
_ => {}
}
has_any_token = true;
idx += 1;
}
if has_any_token {
commas + 1
} else {
0
}
}
/// Parse function arguments with lambda support (for TRANSFORM and similar functions).
/// Handles Snowflake typed lambda syntax: `a int -> a + 1`
fn parse_function_args_with_lambda(&mut self) -> Result<Vec<Expression>> {
let mut expressions = Vec::new();
loop {
// Try to detect typed lambda: identifier type -> body
let expr = if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
let saved_pos = self.current;
let ident_token = self.advance();
let ident_name = ident_token.text.clone();
// Check for arrow (simple lambda: a -> body)
if self.match_token(TokenType::Arrow) {
let body = self.parse_expression()?;
Expression::Lambda(Box::new(LambdaExpr {
parameters: vec![Identifier::new(ident_name)],
body,
colon: false,
parameter_types: Vec::new(),
}))
}
// Check for type annotation followed by arrow: a int -> body
else if !self.is_at_end()
&& self.is_type_keyword()
&& !self.check(TokenType::FArrow)
&& !self.check(TokenType::ColonEq)
{
let type_annotation = self.parse_data_type()?;
if self.match_token(TokenType::Arrow) {
let body = self.parse_expression()?;
Expression::Lambda(Box::new(LambdaExpr {
parameters: vec![Identifier::new(ident_name)],
body,
colon: false,
parameter_types: vec![Some(type_annotation)],
}))
} else {
self.current = saved_pos;
self.parse_expression()?
}
} else {
// Not a lambda, backtrack and parse as regular expression
self.current = saved_pos;
self.parse_expression()?
}
} else {
self.parse_expression()?
};
expressions.push(expr);
if !self.match_token(TokenType::Comma) {
break;
}
}
Ok(expressions)
}
/// Parse a comma-separated list of expressions for VALUES tuples
/// This variant supports AS aliases on each element (Hive syntax): VALUES (1 AS a, 2 AS b, 3)
fn parse_values_expression_list(&mut self) -> Result<Vec<Expression>> {
let mut expressions = Vec::new();
loop {
// Handle DEFAULT keyword in VALUES - output as unquoted Var (like Python sqlglot's exp.var("DEFAULT"))
let expr = if self.match_token(TokenType::Default) {
Expression::Var(Box::new(crate::expressions::Var {
this: "DEFAULT".to_string(),
}))
} else {
self.parse_expression()?
};
// Capture trailing comments on the expression (e.g., `1 /* c4 */`)
let trailing_comments = self.previous_trailing_comments().to_vec();
let expr = if !trailing_comments.is_empty() {
match &expr {
Expression::Literal(_) | Expression::Boolean(_) | Expression::Null(_) => {
Expression::Annotated(Box::new(crate::expressions::Annotated {
this: expr,
trailing_comments,
}))
}
_ => expr,
}
} else {
expr
};
// Check for AS alias on this value element (Hive syntax)
let expr_with_alias = if self.match_token(TokenType::As) {
let alias = self.expect_identifier_or_keyword_with_quoted()?;
Expression::Alias(Box::new(Alias::new(expr, alias)))
} else {
expr
};
expressions.push(expr_with_alias);
if !self.match_token(TokenType::Comma) {
break;
}
// ClickHouse: trailing comma in VALUES, e.g., (1, 2, 3,)
if self.check(TokenType::RParen) {
break;
}
}
Ok(expressions)
}
/// Parse a comma-separated list of identifiers
fn parse_identifier_list(&mut self) -> Result<Vec<Identifier>> {
let mut identifiers = Vec::new();
loop {
// Allow keywords as identifiers in identifier lists (e.g., CTE column aliases)
// Check if it's a quoted identifier before consuming
let quoted = self.check(TokenType::QuotedIdentifier);
let mut name = self.expect_identifier_or_safe_keyword()?;
// ClickHouse: handle dotted names in identifier lists (e.g., INSERT INTO t (n.a, n.b))
// Use keyword_with_quoted to allow any keyword after dot (e.g., replace.from)
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
while self.match_token(TokenType::Dot) {
let sub_id = self.expect_identifier_or_keyword_with_quoted()?;
name = format!("{}.{}", name, sub_id.name);
}
}
let trailing_comments = self.previous_trailing_comments().to_vec();
identifiers.push(Identifier {
name,
quoted,
trailing_comments,
span: None,
});
if !self.match_token(TokenType::Comma) {
break;
}
// ClickHouse: allow trailing comma before RParen in identifier lists
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::RParen)
{
break;
}
}
Ok(identifiers)
}
/// Parse a comma-separated list of column references for USING clause
/// Supports qualified names like table.col but extracts only the column part
fn parse_using_column_list(&mut self) -> Result<Vec<Identifier>> {
let mut identifiers = Vec::new();
loop {
// ClickHouse: USING * — wildcard in USING clause
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.match_token(TokenType::Star)
{
identifiers.push(Identifier::new("*".to_string()));
if !self.match_token(TokenType::Comma) {
break;
}
continue;
}
// Check if it's a quoted identifier before consuming
let quoted = self.check(TokenType::QuotedIdentifier);
let mut name = self.expect_identifier_or_safe_keyword()?;
let mut final_quoted = quoted;
// Handle qualified names: table.column or schema.table.column
// Keep only the final column name
while self.match_token(TokenType::Dot) {
final_quoted = self.check(TokenType::QuotedIdentifier);
name = self.expect_identifier_or_safe_keyword()?;
}
// ClickHouse: USING (col AS alias) — consume optional AS alias
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.match_token(TokenType::As)
{
// Use the alias name instead
final_quoted = self.check(TokenType::QuotedIdentifier);
name = self.expect_identifier_or_safe_keyword()?;
}
let trailing_comments = self.previous_trailing_comments().to_vec();
identifiers.push(Identifier {
name,
quoted: final_quoted,
trailing_comments,
span: None,
});
if !self.match_token(TokenType::Comma) {
break;
}
}
Ok(identifiers)
}
/// Parse a comma-separated list of identifiers for index columns.
/// Supports MySQL prefix lengths: col(16) and sort order: col DESC
fn parse_index_identifier_list(&mut self) -> Result<Vec<Identifier>> {
let mut identifiers = Vec::new();
loop {
let quoted = self.check(TokenType::QuotedIdentifier);
let name = self.expect_identifier_or_safe_keyword()?;
let trailing_comments = self.previous_trailing_comments().to_vec();
// Check for prefix length: col(16)
let mut display_name = name.clone();
if self.match_token(TokenType::LParen) {
if self.check(TokenType::Number) {
let len = self.advance().text;
display_name = format!("{}({})", name, len);
}
self.expect(TokenType::RParen)?;
}
// Check for DESC/ASC sort order
if self.match_token(TokenType::Desc) {
display_name = format!("{} DESC", display_name);
} else if self.match_token(TokenType::Asc) {
display_name = format!("{} ASC", display_name);
}
identifiers.push(Identifier {
name: display_name,
quoted,
trailing_comments,
span: None,
});
if !self.match_token(TokenType::Comma) {
break;
}
}
Ok(identifiers)
}
// =============================================================================
// Auto-generated Missing Parser Methods
// Total: 296 methods
// =============================================================================
/// parse_add_column - Implemented from Python _parse_add_column
/// Calls: parse_column, parse_column_def_with_exists
#[allow(unused_variables, unused_mut)]
pub fn parse_add_column(&mut self) -> Result<Option<Expression>> {
if self.match_texts(&["FIRST", "AFTER"]) {
// Matched one of: FIRST, AFTER
return Ok(None);
}
Ok(None)
}
/// parse_alias - Parses alias for an expression
/// This method parses just the alias part (AS name or just name)
/// Python: _parse_alias
pub fn parse_alias(&mut self) -> Result<Option<Expression>> {
// Check for AS keyword (explicit alias)
let _explicit = self.match_token(TokenType::Alias);
// Parse the alias identifier
if let Some(alias_expr) = self.parse_id_var()? {
let alias_ident = match alias_expr {
Expression::Identifier(id) => id,
_ => return Ok(None),
};
// Return just the alias identifier wrapped in an expression
return Ok(Some(Expression::Identifier(alias_ident)));
}
Ok(None)
}
/// parse_alias_with_expr - Wraps an expression with an alias if present
pub fn parse_alias_with_expr(
&mut self,
this: Option<Expression>,
) -> Result<Option<Expression>> {
if this.is_none() {
return Ok(None);
}
let expr = this.unwrap();
// Check for AS keyword (explicit alias)
// Accept both TokenType::Alias and TokenType::As
let has_as = self.match_token(TokenType::Alias) || self.match_token(TokenType::As);
// Check for column aliases: (col1, col2)
if has_as && self.match_token(TokenType::LParen) {
let mut column_aliases = Vec::new();
loop {
if let Some(col_expr) = self.parse_id_var()? {
if let Expression::Identifier(id) = col_expr {
column_aliases.push(id);
}
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.match_token(TokenType::RParen);
if !column_aliases.is_empty() {
return Ok(Some(Expression::Alias(Box::new(Alias {
this: expr,
alias: Identifier::new(String::new()), // Empty alias when only column aliases
column_aliases,
pre_alias_comments: Vec::new(),
trailing_comments: Vec::new(),
inferred_type: None,
}))));
}
}
// Parse the alias identifier
if let Some(alias_expr) = self.parse_id_var()? {
let alias_ident = match alias_expr {
Expression::Identifier(id) => id,
_ => return Ok(Some(expr)),
};
return Ok(Some(Expression::Alias(Box::new(Alias {
this: expr,
alias: alias_ident,
column_aliases: Vec::new(),
pre_alias_comments: Vec::new(),
trailing_comments: Vec::new(),
inferred_type: None,
}))));
}
Ok(Some(expr))
}
/// parse_alter_diststyle - Implemented from Python _parse_alter_diststyle
#[allow(unused_variables, unused_mut)]
/// parse_alter_diststyle - Parses ALTER TABLE DISTSTYLE clause (Redshift)
/// Python: parser.py:7797-7802
pub fn parse_alter_diststyle(&mut self) -> Result<Option<Expression>> {
// Check for ALL, EVEN, AUTO
if self.match_texts(&["ALL", "EVEN", "AUTO"]) {
let style = self.previous().text.to_ascii_uppercase();
return Ok(Some(Expression::DistStyleProperty(Box::new(
DistStyleProperty {
this: Box::new(Expression::Identifier(Identifier::new(style))),
},
))));
}
// KEY DISTKEY column
if self.match_text_seq(&["KEY", "DISTKEY"]) {
if let Some(column) = self.parse_column()? {
return Ok(Some(Expression::DistStyleProperty(Box::new(
DistStyleProperty {
this: Box::new(column),
},
))));
}
}
Ok(None)
}
/// parse_alter_session - Parses ALTER SESSION SET/UNSET statements
/// Python: parser.py:7879-7889
pub fn parse_alter_session(&mut self) -> Result<Option<Expression>> {
// ALTER SESSION SET var = value, ...
if self.match_token(TokenType::Set) {
let mut expressions = Vec::new();
loop {
if let Some(item) = self.parse_set_item_assignment()? {
expressions.push(item);
}
if !self.match_token(TokenType::Comma) {
break;
}
}
return Ok(Some(Expression::AlterSession(Box::new(AlterSession {
expressions,
unset: None,
}))));
}
// ALTER SESSION UNSET var, ...
if self.match_text_seq(&["UNSET"]) {
let mut expressions = Vec::new();
loop {
if let Some(var) = self.parse_id_var()? {
// For UNSET, we just use the identifier directly
expressions.push(var);
}
if !self.match_token(TokenType::Comma) {
break;
}
}
return Ok(Some(Expression::AlterSession(Box::new(AlterSession {
expressions,
unset: Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
}))),
}))));
}
Ok(None)
}
/// parse_alter_sortkey - Parses ALTER TABLE SORTKEY clause (Redshift)
/// Python: parser.py:7804-7816
pub fn parse_alter_sortkey(&mut self) -> Result<Option<Expression>> {
self.parse_alter_sortkey_impl(None)
}
/// Implementation of parse_alter_sortkey with compound option
pub fn parse_alter_sortkey_impl(
&mut self,
compound: Option<bool>,
) -> Result<Option<Expression>> {
// For compound sortkey, match SORTKEY keyword
if compound == Some(true) {
self.match_text_seq(&["SORTKEY"]);
}
// Check for (column_list) syntax
if self.check(TokenType::LParen) {
let wrapped = self.parse_wrapped_id_vars()?;
// Extract expressions from Tuple
let expressions = if let Some(Expression::Tuple(t)) = wrapped {
t.expressions
} else {
Vec::new()
};
return Ok(Some(Expression::AlterSortKey(Box::new(AlterSortKey {
this: None,
expressions,
compound: compound
.map(|c| Box::new(Expression::Boolean(BooleanLiteral { value: c }))),
}))));
}
// Check for AUTO or NONE
if self.match_texts(&["AUTO", "NONE"]) {
let style = self.previous().text.to_ascii_uppercase();
return Ok(Some(Expression::AlterSortKey(Box::new(AlterSortKey {
this: Some(Box::new(Expression::Identifier(Identifier::new(style)))),
expressions: Vec::new(),
compound: compound
.map(|c| Box::new(Expression::Boolean(BooleanLiteral { value: c }))),
}))));
}
Ok(None)
}
/// parse_alter_table_add - Parses ALTER TABLE ADD clause
/// Python: parser.py:7715-7751
pub fn parse_alter_table_add(&mut self) -> Result<Option<Expression>> {
// Check for ADD keyword (optional in some contexts)
self.match_text_seq(&["ADD"]);
// Check for INDEX/KEY with optional FULLTEXT/SPATIAL prefix (MySQL)
// Syntax: ADD [FULLTEXT|SPATIAL] {INDEX|KEY} [name] (columns) [USING {BTREE|HASH}]
let kind = if self.match_identifier("FULLTEXT") {
Some("FULLTEXT".to_string())
} else if self.match_identifier("SPATIAL") {
Some("SPATIAL".to_string())
} else {
None
};
if self.check(TokenType::Index) || self.check(TokenType::Key) || kind.is_some() {
// Consume INDEX or KEY keyword, track which was used
let use_key_keyword = if self.match_token(TokenType::Key) {
true
} else {
self.match_token(TokenType::Index);
false
};
// Optional index name (before the columns)
let name = if !self.check(TokenType::LParen) && !self.check(TokenType::Using) {
Some(self.expect_identifier_with_quoted()?)
} else {
None
};
// Parse columns (with optional prefix length and DESC)
self.expect(TokenType::LParen)?;
let columns = self.parse_index_identifier_list()?;
self.expect(TokenType::RParen)?;
// Parse optional USING BTREE|HASH
let modifiers = self.parse_constraint_modifiers();
return Ok(Some(Expression::AlterTable(Box::new(AlterTable {
name: TableRef::new(""),
actions: vec![AlterTableAction::AddConstraint(TableConstraint::Index {
name,
columns,
kind,
modifiers,
use_key_keyword,
expression: None,
index_type: None,
granularity: None,
})],
if_exists: false,
algorithm: None,
lock: None,
with_check: None,
partition: None,
on_cluster: None,
table_modifier: None,
}))));
}
// Check for constraint keywords (PRIMARY KEY, FOREIGN KEY, UNIQUE, CHECK, CONSTRAINT)
if self.check(TokenType::PrimaryKey)
|| self.check(TokenType::ForeignKey)
|| self.check(TokenType::Unique)
|| self.check(TokenType::Check)
|| self.check(TokenType::Constraint)
{
// Parse a single constraint and return it wrapped in Constraint
if let Some(constraint) = self.parse_constraint()? {
return Ok(Some(Expression::Constraint(Box::new(Constraint {
this: Box::new(constraint),
expressions: Vec::new(),
}))));
}
}
// Check for COLUMNS keyword (batch column addition)
if self.match_text_seq(&["COLUMNS"]) {
// Parse schema or column definitions
if let Some(schema) = self.parse_schema()? {
return Ok(Some(schema));
}
}
// Check for IF NOT EXISTS PARTITION (must check before parse_add_column)
let exists = self.match_keywords(&[TokenType::If, TokenType::Not, TokenType::Exists]);
if self.match_token(TokenType::Partition) {
// Parse PARTITION(key = value, ...)
self.expect(TokenType::LParen)?;
let mut partition_exprs = Vec::new();
loop {
if let Some(expr) = self.parse_conjunction()? {
partition_exprs.push(expr);
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
let partition = Expression::Partition(Box::new(crate::expressions::Partition {
expressions: partition_exprs,
subpartition: false,
}));
let location = if self.match_text_seq(&["LOCATION"]) {
self.parse_property()?
} else {
None
};
return Ok(Some(Expression::AddPartition(Box::new(AddPartition {
this: Box::new(partition),
exists,
location: location.map(Box::new),
}))));
}
// Try to parse column definition (after checking for PARTITION)
if let Some(column) = self.parse_add_column()? {
return Ok(Some(column));
}
Ok(None)
}
/// parse_alter_table_alter - Parses ALTER TABLE ALTER COLUMN clause
/// Python: parser.py:7753-7795
pub fn parse_alter_table_alter(&mut self) -> Result<Option<Expression>> {
// Match optional COLUMN keyword
self.match_token(TokenType::Column);
// Parse the column name - required for ALTER COLUMN
let column = match self.parse_field()? {
Some(c) => c,
None => return Ok(None),
};
// DROP DEFAULT
if self.match_keywords(&[TokenType::Drop, TokenType::Default]) {
return Ok(Some(Expression::AlterColumn(Box::new(AlterColumn {
this: Box::new(column),
dtype: None,
collate: None,
using: None,
default: None,
drop: Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
}))),
allow_null: None,
comment: None,
visible: None,
rename_to: None,
}))));
}
// SET DEFAULT expr
if self.match_keywords(&[TokenType::Set, TokenType::Default]) {
let default_val = self.parse_disjunction()?;
return Ok(Some(Expression::AlterColumn(Box::new(AlterColumn {
this: Box::new(column),
dtype: None,
collate: None,
using: None,
default: default_val.map(Box::new),
drop: None,
allow_null: None,
comment: None,
visible: None,
rename_to: None,
}))));
}
// COMMENT 'string'
if self.match_token(TokenType::Comment) {
let comment_val = self.parse_string()?;
return Ok(Some(Expression::AlterColumn(Box::new(AlterColumn {
this: Box::new(column),
dtype: None,
collate: None,
using: None,
default: None,
drop: None,
allow_null: None,
comment: comment_val.map(Box::new),
visible: None,
rename_to: None,
}))));
}
// DROP NOT NULL
if self.match_text_seq(&["DROP", "NOT", "NULL"]) {
return Ok(Some(Expression::AlterColumn(Box::new(AlterColumn {
this: Box::new(column),
dtype: None,
collate: None,
using: None,
default: None,
drop: Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
}))),
allow_null: Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
}))),
comment: None,
visible: None,
rename_to: None,
}))));
}
// SET NOT NULL
if self.match_text_seq(&["SET", "NOT", "NULL"]) {
return Ok(Some(Expression::AlterColumn(Box::new(AlterColumn {
this: Box::new(column),
dtype: None,
collate: None,
using: None,
default: None,
drop: None,
allow_null: Some(Box::new(Expression::Boolean(BooleanLiteral {
value: false,
}))),
comment: None,
visible: None,
rename_to: None,
}))));
}
// SET VISIBLE
if self.match_text_seq(&["SET", "VISIBLE"]) {
return Ok(Some(Expression::AlterColumn(Box::new(AlterColumn {
this: Box::new(column),
dtype: None,
collate: None,
using: None,
default: None,
drop: None,
allow_null: None,
comment: None,
visible: Some(Box::new(Expression::Identifier(Identifier::new(
"VISIBLE".to_string(),
)))),
rename_to: None,
}))));
}
// SET INVISIBLE
if self.match_text_seq(&["SET", "INVISIBLE"]) {
return Ok(Some(Expression::AlterColumn(Box::new(AlterColumn {
this: Box::new(column),
dtype: None,
collate: None,
using: None,
default: None,
drop: None,
allow_null: None,
comment: None,
visible: Some(Box::new(Expression::Identifier(Identifier::new(
"INVISIBLE".to_string(),
)))),
rename_to: None,
}))));
}
// [SET DATA] TYPE type [COLLATE collation] [USING expr]
self.match_text_seq(&["SET", "DATA"]);
self.match_text_seq(&["TYPE"]);
let dtype = self.parse_types()?;
let collate = if self.match_token(TokenType::Collate) {
self.parse_term()?
} else {
None
};
let using = if self.match_token(TokenType::Using) {
self.parse_disjunction()?
} else {
None
};
Ok(Some(Expression::AlterColumn(Box::new(AlterColumn {
this: Box::new(column),
dtype: dtype.map(Box::new),
collate: collate.map(Box::new),
using: using.map(Box::new),
default: None,
drop: None,
allow_null: None,
comment: None,
visible: None,
rename_to: None,
}))))
}
/// Parse ALTER TABLE DROP action
/// Note: Main ALTER TABLE DROP logic is implemented inline in parse_alter_table
/// This method provides a separate entry point for the same functionality
pub fn parse_alter_table_drop(&mut self) -> Result<Option<Expression>> {
// Check for IF EXISTS before PARTITION
let exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
// Check if this is DROP PARTITION
if self.check(TokenType::Partition) {
return self.parse_drop_partition_with_exists(exists);
}
// Check for DROP FOREIGN KEY (Oracle/MySQL)
if self.match_keywords(&[TokenType::ForeignKey, TokenType::Key]) {
let name = self.expect_identifier_with_quoted()?;
return Ok(Some(Expression::AlterTable(Box::new(AlterTable {
name: TableRef::new(""),
actions: vec![AlterTableAction::DropForeignKey { name }],
if_exists: false,
algorithm: None,
lock: None,
with_check: None,
partition: None,
on_cluster: None,
table_modifier: None,
}))));
}
// Check for DROP COLUMNS (col1, col2, ...) syntax (Spark/Databricks)
if self.check_identifier("COLUMNS") && self.check_next(TokenType::LParen) {
self.skip(); // consume COLUMNS
self.expect(TokenType::LParen)?;
let mut columns = Vec::new();
loop {
if let Some(col) = self.parse_identifier()? {
columns.push(col);
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
if columns.is_empty() {
return Ok(None);
} else if columns.len() == 1 {
return Ok(Some(columns.remove(0)));
} else {
return Ok(Some(Expression::Tuple(Box::new(Tuple {
expressions: columns,
}))));
}
}
// Otherwise, parse as DROP COLUMN(s)
let mut columns = Vec::new();
// Parse first column
if let Some(col) = self.parse_drop_column()? {
columns.push(col);
}
// Parse additional columns (comma-separated)
while self.match_token(TokenType::Comma) {
// Match optional DROP keyword before next column
self.match_token(TokenType::Drop);
if let Some(col) = self.parse_drop_column()? {
columns.push(col);
}
}
if columns.is_empty() {
Ok(None)
} else if columns.len() == 1 {
Ok(Some(columns.remove(0)))
} else {
// Multiple columns - wrap in a Tuple
Ok(Some(Expression::Tuple(Box::new(Tuple {
expressions: columns,
}))))
}
}
/// parse_alter_table_rename - Parses ALTER TABLE RENAME clause
/// Python: parser.py:7828-7841
pub fn parse_alter_table_rename(&mut self) -> Result<Option<Expression>> {
// RENAME COLUMN old_name TO new_name
if self.match_token(TokenType::Column) {
let exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
let old_column = match self.parse_column()? {
Some(c) => c,
None => return Ok(None),
};
if !self.match_text_seq(&["TO"]) {
return Ok(None);
}
let new_column = self.parse_column()?;
return Ok(Some(Expression::RenameColumn(Box::new(RenameColumn {
this: Box::new(old_column),
to: new_column.map(Box::new),
exists,
}))));
}
// RENAME TO new_table_name
if self.match_text_seq(&["TO"]) {
// Return the table expression directly - the caller will handle it as a rename target
let new_table = self.parse_table()?;
return Ok(new_table);
}
// SQLite allows: RENAME old_name TO new_name (without COLUMN keyword)
// Try to parse as column rename if followed by identifier and TO
if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
let old_column = match self.parse_column()? {
Some(c) => c,
None => return Ok(None),
};
if self.match_text_seq(&["TO"]) {
let new_column = self.parse_column()?;
return Ok(Some(Expression::RenameColumn(Box::new(RenameColumn {
this: Box::new(old_column),
to: new_column.map(Box::new),
exists: false,
}))));
} else {
// Not TO after identifier - put it back and return error
return Err(self.parse_error("Expected COLUMN or TO after RENAME"));
}
}
Ok(None)
}
/// parse_alter_table_set - Parses ALTER TABLE SET clause
/// Python: parser.py:7843-7877
pub fn parse_alter_table_set(&mut self) -> Result<Option<Expression>> {
let mut alter_set = AlterSet {
expressions: Vec::new(),
option: None,
tablespace: None,
access_method: None,
file_format: None,
copy_options: None,
tag: None,
location: None,
serde: None,
};
// SET AUTHORIZATION [ROLE] user
if self.match_token(TokenType::Authorization) {
let mut auth_text = "AUTHORIZATION ".to_string();
if self.match_texts(&["ROLE"]) {
auth_text.push_str("ROLE ");
}
let user = self.expect_identifier()?;
auth_text.push_str(&user);
alter_set.option = Some(Box::new(Expression::Identifier(Identifier::new(auth_text))));
return Ok(Some(Expression::AlterSet(Box::new(alter_set))));
}
// SET PROPERTIES prop = value, ...
if self.match_text_seq(&["PROPERTIES"]) {
let mut assignments = Vec::new();
loop {
// Parse property name (could be identifier or string literal)
let key = if self.check(TokenType::String) {
self.parse_string()?.unwrap_or(Expression::Null(Null))
} else {
let name = self.expect_identifier()?;
Expression::Identifier(Identifier::new(name))
};
self.expect(TokenType::Eq)?;
// Parse value (could be DEFAULT or an expression)
let value = if self.match_token(TokenType::Default) {
Expression::Identifier(Identifier::new("DEFAULT".to_string()))
} else {
self.parse_expression()?
};
assignments.push(Expression::Eq(Box::new(BinaryOp {
left: key,
right: value,
left_comments: Vec::new(),
operator_comments: Vec::new(),
trailing_comments: Vec::new(),
inferred_type: None,
})));
if !self.match_token(TokenType::Comma) {
break;
}
}
alter_set.expressions = assignments;
return Ok(Some(Expression::AlterSet(Box::new(alter_set))));
}
// SET (properties) or SET TABLE PROPERTIES (properties)
if self.check(TokenType::LParen) || self.match_text_seq(&["TABLE", "PROPERTIES"]) {
let assignments = self.parse_wrapped_csv_assignments()?;
alter_set.expressions = assignments;
return Ok(Some(Expression::AlterSet(Box::new(alter_set))));
}
// SET FILESTREAM_ON = value
if self.match_text_seq(&["FILESTREAM_ON"]) {
if let Some(assignment) = self.parse_assignment()? {
alter_set.expressions = vec![assignment];
}
return Ok(Some(Expression::AlterSet(Box::new(alter_set))));
}
// SET LOGGED or SET UNLOGGED
if self.match_texts(&["LOGGED", "UNLOGGED"]) {
let option = self.previous().text.to_ascii_uppercase();
alter_set.option = Some(Box::new(Expression::Identifier(Identifier::new(option))));
return Ok(Some(Expression::AlterSet(Box::new(alter_set))));
}
// SET WITHOUT CLUSTER or SET WITHOUT OIDS
if self.match_text_seq(&["WITHOUT"]) {
if self.match_texts(&["CLUSTER", "OIDS"]) {
let option = format!("WITHOUT {}", self.previous().text.to_ascii_uppercase());
alter_set.option = Some(Box::new(Expression::Identifier(Identifier::new(option))));
return Ok(Some(Expression::AlterSet(Box::new(alter_set))));
}
}
// SET LOCATION path
if self.match_text_seq(&["LOCATION"]) {
let loc = self.parse_field()?;
alter_set.location = loc.map(Box::new);
return Ok(Some(Expression::AlterSet(Box::new(alter_set))));
}
// SET ACCESS METHOD method
if self.match_text_seq(&["ACCESS", "METHOD"]) {
let method = self.parse_field()?;
alter_set.access_method = method.map(Box::new);
return Ok(Some(Expression::AlterSet(Box::new(alter_set))));
}
// SET TABLESPACE name
if self.match_text_seq(&["TABLESPACE"]) {
let tablespace = self.parse_field()?;
alter_set.tablespace = tablespace.map(Box::new);
return Ok(Some(Expression::AlterSet(Box::new(alter_set))));
}
// SET FILE FORMAT format or SET FILEFORMAT format
if self.match_text_seq(&["FILE", "FORMAT"]) || self.match_text_seq(&["FILEFORMAT"]) {
let format = self.parse_field()?;
alter_set.file_format = format.map(Box::new);
return Ok(Some(Expression::AlterSet(Box::new(alter_set))));
}
// SET STAGE_FILE_FORMAT = (options)
if self.match_text_seq(&["STAGE_FILE_FORMAT"]) {
let options = self.parse_wrapped_options()?;
alter_set.file_format = options.map(Box::new);
return Ok(Some(Expression::AlterSet(Box::new(alter_set))));
}
// SET STAGE_COPY_OPTIONS = (options)
if self.match_text_seq(&["STAGE_COPY_OPTIONS"]) {
let options = self.parse_wrapped_options()?;
alter_set.copy_options = options.map(Box::new);
return Ok(Some(Expression::AlterSet(Box::new(alter_set))));
}
// SET TAG or SET TAGS
if self.match_text_seq(&["TAG"]) || self.match_text_seq(&["TAGS"]) {
let mut tags = Vec::new();
loop {
if let Some(assignment) = self.parse_assignment()? {
tags.push(assignment);
}
if !self.match_token(TokenType::Comma) {
break;
}
}
if !tags.is_empty() {
alter_set.tag = Some(Box::new(Expression::Tuple(Box::new(Tuple {
expressions: tags,
}))));
}
return Ok(Some(Expression::AlterSet(Box::new(alter_set))));
}
// SET SERDE 'class' [WITH SERDEPROPERTIES (...)]
if self.match_text_seq(&["SERDE"]) {
let serde = self.parse_field()?;
alter_set.serde = serde.map(Box::new);
// Parse optional properties
let properties = self.parse_wrapped()?;
if let Some(props) = properties {
alter_set.expressions = vec![props];
}
return Ok(Some(Expression::AlterSet(Box::new(alter_set))));
}
Ok(None)
}
/// Helper to parse wrapped CSV of assignments
fn parse_wrapped_csv_assignments(&mut self) -> Result<Vec<Expression>> {
if !self.match_token(TokenType::LParen) {
return Ok(Vec::new());
}
let mut assignments = Vec::new();
loop {
if let Some(assignment) = self.parse_assignment()? {
assignments.push(assignment);
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
Ok(assignments)
}
/// parse_analyze - Implemented from Python _parse_analyze
/// Calls: parse_table_parts, parse_number, parse_table
#[allow(unused_variables, unused_mut)]
/// parse_analyze - Parses ANALYZE statement
/// Python: parser.py:7937-7999
pub fn parse_analyze(&mut self) -> Result<Option<Expression>> {
// If no more tokens, return empty Analyze
if self.is_at_end() {
return Ok(Some(Expression::Analyze(Box::new(Analyze {
kind: None,
this: None,
options: Vec::new(),
mode: None,
partition: None,
expression: None,
properties: Vec::new(),
columns: Vec::new(),
}))));
}
// Parse options (VERBOSE, SKIP_LOCKED, etc.)
// StarRocks uses FULL and SAMPLE as options
let mut options = Vec::new();
let analyze_styles = [
"VERBOSE",
"SKIP_LOCKED",
"BUFFER_USAGE_LIMIT",
"FULL",
"SAMPLE",
];
while self.match_texts(&analyze_styles) {
let style = self.previous().text.to_ascii_uppercase();
if style == "BUFFER_USAGE_LIMIT" {
// Parse number after BUFFER_USAGE_LIMIT
if let Some(num) = self.parse_number()? {
options.push(Expression::Identifier(Identifier::new(format!(
"BUFFER_USAGE_LIMIT {}",
if let Expression::Literal(lit) = &num {
if let Literal::Number(n) = lit.as_ref() {
n.clone()
} else {
String::new()
}
} else {
String::new()
}
))));
}
} else {
options.push(Expression::Identifier(Identifier::new(style)));
}
}
let mut this: Option<Expression> = None;
let mut kind: Option<String> = None;
let mut inner_expression: Option<Expression> = None;
// Parse TABLE or INDEX
if self.match_token(TokenType::Table) {
kind = Some("TABLE".to_string());
this = self.parse_table_parts()?;
} else if self.match_token(TokenType::Index) {
kind = Some("INDEX".to_string());
this = self.parse_table_parts()?;
} else if self.match_text_seq(&["TABLES"]) {
kind = Some("TABLES".to_string());
if self.match_token(TokenType::From) || self.match_token(TokenType::In) {
let dir = self.previous().text.to_ascii_uppercase();
kind = Some(format!("TABLES {}", dir));
// Parse database name as identifier
let db_name = self.expect_identifier()?;
this = Some(Expression::Identifier(Identifier::new(db_name)));
}
} else if self.match_text_seq(&["DATABASE"]) {
kind = Some("DATABASE".to_string());
this = self.parse_table_parts()?;
} else if self.match_text_seq(&["CLUSTER"]) {
kind = Some("CLUSTER".to_string());
this = self.parse_table_parts()?;
} else if self.match_texts(&["LOCAL", "NO_WRITE_TO_BINLOG"]) {
// MySQL: ANALYZE LOCAL TABLE tbl / ANALYZE NO_WRITE_TO_BINLOG TABLE tbl
let opt_text = self.previous().text.to_ascii_uppercase();
options.push(Expression::Identifier(Identifier::new(opt_text)));
if self.match_token(TokenType::Table) {
kind = Some("TABLE".to_string());
}
this = self.parse_table_parts()?;
} else if self.match_text_seq(&["COMPUTE"]) {
// Check ANALYZE_EXPRESSION_PARSERS keywords before fallback to parse_table_parts
// Python: elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS)
inner_expression = self.parse_analyze_statistics()?;
} else if self.match_text_seq(&["DELETE"]) {
inner_expression = self.parse_analyze_delete()?;
} else if self.match_text_seq(&["VALIDATE"]) {
inner_expression = self.parse_analyze_validate()?;
} else if self.match_text_seq(&["LIST"]) {
inner_expression = self.parse_analyze_list()?;
} else if self.match_text_seq(&["DROP"]) {
inner_expression = self.parse_analyze_histogram()?;
} else if self.match_text_seq(&["UPDATE"]) {
inner_expression = self.parse_analyze_histogram()?;
} else if self.match_texts(&["ALL", "PREDICATE"]) {
inner_expression = self.parse_analyze_columns()?;
} else {
// Try to parse table directly (empty kind - https://prestodb.io/docs/current/sql/analyze.html)
this = self.parse_table_parts()?;
}
// Parse optional column list: ANALYZE tbl(col1, col2) (PostgreSQL)
let columns = if this.is_some() && self.match_token(TokenType::LParen) {
let mut cols = Vec::new();
loop {
cols.push(self.expect_identifier_or_keyword()?);
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
cols
} else {
Vec::new()
};
// Parse optional PARTITION
let partition = self.parse_partition()?;
// Parse optional WITH SYNC/ASYNC MODE or WITH (prop=val, ...) for Presto
let mut mode = None;
let mut properties = Vec::new();
if self.match_text_seq(&["WITH", "SYNC", "MODE"]) {
mode = Some(Box::new(Expression::Identifier(Identifier::new(
"WITH SYNC MODE".to_string(),
))));
} else if self.match_text_seq(&["WITH", "ASYNC", "MODE"]) {
mode = Some(Box::new(Expression::Identifier(Identifier::new(
"WITH ASYNC MODE".to_string(),
))));
} else if self.match_text_seq(&["WITH"]) {
// Presto syntax: ANALYZE tbl WITH (prop1=val1, prop2=val2)
if self.match_token(TokenType::LParen) {
loop {
// Parse key=value pairs
let key = self.parse_id_var()?;
if key.is_none() {
break;
}
// Expect = sign
if self.match_token(TokenType::Eq) {
// Parse the value
let value = self.parse_primary()?;
if let Some(k) = key {
properties.push(Expression::Property(Box::new(Property {
this: Box::new(k),
value: Some(Box::new(value)),
})));
}
} else if let Some(k) = key {
// Key without value
properties.push(Expression::Property(Box::new(Property {
this: Box::new(k),
value: None,
})));
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
}
}
// Parse optional inner expressions (COMPUTE, DELETE, etc.)
// Only if inner_expression wasn't already set (for cases like ANALYZE TABLE tbl VALIDATE...)
if inner_expression.is_none() {
if self.match_text_seq(&["COMPUTE"]) {
inner_expression = self.parse_analyze_statistics()?;
} else if self.match_text_seq(&["DELETE"]) {
inner_expression = self.parse_analyze_delete()?;
} else if self.match_text_seq(&["VALIDATE"]) {
inner_expression = self.parse_analyze_validate()?;
} else if self.match_text_seq(&["LIST"]) {
inner_expression = self.parse_analyze_list()?;
} else if self.match_text_seq(&["DROP"]) {
inner_expression = self.parse_analyze_histogram()?;
} else if self.match_text_seq(&["UPDATE"]) {
inner_expression = self.parse_analyze_histogram()?;
} else if self.match_texts(&["ALL", "PREDICATE"]) {
// Redshift: ANALYZE TBL ALL COLUMNS / ANALYZE TBL PREDICATE COLUMNS
inner_expression = self.parse_analyze_columns()?;
}
}
// Parse optional properties (if not already parsed from WITH clause)
// StarRocks syntax: ANALYZE TABLE TBL PROPERTIES ('prop1'=val1, 'prop2'=val2)
if properties.is_empty() && self.match_text_seq(&["PROPERTIES"]) {
if self.match_token(TokenType::LParen) {
loop {
// Parse key (can be a string literal or identifier)
let key = if self.check(TokenType::String) {
self.skip();
let key_str = self.previous().text.clone();
Expression::Literal(Box::new(Literal::String(key_str)))
} else {
self.parse_id_var()?
.unwrap_or(Expression::Identifier(Identifier::new(String::new())))
};
// Expect = sign
if self.match_token(TokenType::Eq) {
// Parse the value
let value = self.parse_primary()?;
properties.push(Expression::Property(Box::new(Property {
this: Box::new(key),
value: Some(Box::new(value)),
})));
} else {
// Key without value
properties.push(Expression::Property(Box::new(Property {
this: Box::new(key),
value: None,
})));
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
}
}
Ok(Some(Expression::Analyze(Box::new(Analyze {
kind,
this: this.map(Box::new),
options,
mode,
partition: partition.map(Box::new),
expression: inner_expression.map(Box::new),
properties,
columns,
}))))
}
/// parse_analyze_columns - Parses ANALYZE ... COLUMNS
/// Python: parser.py:8055-8059
/// Note: AnalyzeColumns not in expressions.rs, using Identifier instead
pub fn parse_analyze_columns(&mut self) -> Result<Option<Expression>> {
let prev_text = self.previous().text.to_ascii_uppercase();
if self.match_text_seq(&["COLUMNS"]) {
return Ok(Some(Expression::Identifier(Identifier::new(format!(
"{} COLUMNS",
prev_text
)))));
}
Ok(None)
}
/// parse_analyze_delete - Parses ANALYZE DELETE STATISTICS
/// Python: parser.py:8061-8065
pub fn parse_analyze_delete(&mut self) -> Result<Option<Expression>> {
let kind = if self.match_text_seq(&["SYSTEM"]) {
Some("SYSTEM".to_string())
} else {
None
};
if self.match_text_seq(&["STATISTICS"]) {
return Ok(Some(Expression::AnalyzeDelete(Box::new(AnalyzeDelete {
kind,
}))));
}
Ok(None)
}
/// parse_analyze_histogram - Parses ANALYZE ... HISTOGRAM ON
/// Python: parser.py:8073-8108
pub fn parse_analyze_histogram(&mut self) -> Result<Option<Expression>> {
let action = self.previous().text.to_ascii_uppercase(); // DROP or UPDATE
let mut expressions = Vec::new();
let mut update_options: Option<Box<Expression>> = None;
let mut expression: Option<Box<Expression>> = None;
if !self.match_text_seq(&["HISTOGRAM", "ON"]) {
return Ok(None);
}
// Parse column references
loop {
if let Some(col) = self.parse_column_reference()? {
expressions.push(col);
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
// Parse USING DATA 'json_data' (MySQL) - must check before WITH
if self.match_text_seq(&["USING", "DATA"]) {
if self.check(TokenType::String) {
let tok = self.advance();
expression = Some(Box::new(Expression::Identifier(Identifier::new(format!(
"USING DATA '{}'",
tok.text
)))));
} else {
expression = Some(Box::new(Expression::Identifier(Identifier::new(
"USING DATA".to_string(),
))));
}
}
// Parse WITH options - can have two WITH clauses:
// 1. WITH SYNC/ASYNC MODE (optional)
// 2. WITH n BUCKETS (optional)
// StarRocks syntax: WITH SYNC MODE WITH 5 BUCKETS
let mut mode_str: Option<String> = None;
let mut buckets_str: Option<String> = None;
if self.match_token(TokenType::With) {
if self.match_texts(&["SYNC", "ASYNC"]) {
let mode = self.previous().text.to_ascii_uppercase();
if self.match_text_seq(&["MODE"]) {
mode_str = Some(format!("WITH {} MODE", mode));
}
// Check for second WITH clause for buckets
if self.match_token(TokenType::With) {
if let Some(num) = self.parse_number()? {
if self.match_text_seq(&["BUCKETS"]) {
let num_str = if let Expression::Literal(lit) = &num {
if let Literal::Number(n) = lit.as_ref() {
n.clone()
} else {
String::new()
}
} else {
String::new()
};
buckets_str = Some(format!("WITH {} BUCKETS", num_str));
}
}
}
} else if let Some(num) = self.parse_number()? {
if self.match_text_seq(&["BUCKETS"]) {
let num_str = if let Expression::Literal(lit) = &num {
if let Literal::Number(n) = lit.as_ref() {
n.clone()
} else {
String::new()
}
} else {
String::new()
};
buckets_str = Some(format!("WITH {} BUCKETS", num_str));
}
}
}
// Combine mode and buckets into expression
match (mode_str, buckets_str) {
(Some(m), Some(b)) => {
expression = Some(Box::new(Expression::Identifier(Identifier::new(format!(
"{} {}",
m, b
)))));
}
(Some(m), None) => {
expression = Some(Box::new(Expression::Identifier(Identifier::new(m))));
}
(None, Some(b)) => {
expression = Some(Box::new(Expression::Identifier(Identifier::new(b))));
}
(None, None) => {}
}
// Parse AUTO UPDATE or MANUAL UPDATE (MySQL 8.0.27+)
if self.match_texts(&["MANUAL", "AUTO"]) {
let mode = self.previous().text.to_ascii_uppercase();
if self.check(TokenType::Update) {
update_options = Some(Box::new(Expression::Identifier(Identifier::new(mode))));
self.skip(); // consume UPDATE
}
}
Ok(Some(Expression::AnalyzeHistogram(Box::new(
AnalyzeHistogram {
this: Box::new(Expression::Identifier(Identifier::new(action))),
expressions,
expression,
update_options,
},
))))
}
/// parse_analyze_list - Parses ANALYZE LIST CHAINED ROWS
/// Python: parser.py:8067-8070
pub fn parse_analyze_list(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["CHAINED", "ROWS"]) {
let expression = self.parse_into()?.map(Box::new);
return Ok(Some(Expression::AnalyzeListChainedRows(Box::new(
AnalyzeListChainedRows { expression },
))));
}
Ok(None)
}
/// parse_analyze_statistics - Parses ANALYZE ... STATISTICS
/// Python: parser.py:8002-8031
pub fn parse_analyze_statistics(&mut self) -> Result<Option<Expression>> {
let kind = self.previous().text.to_ascii_uppercase();
let option = if self.match_text_seq(&["DELTA"]) {
Some(Box::new(Expression::Identifier(Identifier::new(
"DELTA".to_string(),
))))
} else {
None
};
// Expect STATISTICS keyword
if !self.match_text_seq(&["STATISTICS"]) {
return Ok(None);
}
let mut this: Option<Box<Expression>> = None;
let mut expressions = Vec::new();
if self.match_text_seq(&["NOSCAN"]) {
this = Some(Box::new(Expression::Identifier(Identifier::new(
"NOSCAN".to_string(),
))));
} else if self.match_token(TokenType::For) {
if self.match_text_seq(&["ALL", "COLUMNS"]) {
this = Some(Box::new(Expression::Identifier(Identifier::new(
"FOR ALL COLUMNS".to_string(),
))));
} else if self.match_text_seq(&["COLUMNS"]) {
this = Some(Box::new(Expression::Identifier(Identifier::new(
"FOR COLUMNS".to_string(),
))));
// Parse column list
loop {
if let Some(col) = self.parse_column_reference()? {
expressions.push(col);
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
}
} else if self.match_text_seq(&["SAMPLE"]) {
// Parse SAMPLE number [PERCENT]
if let Some(sample) = self.parse_number()? {
let sample_kind = if self.match_token(TokenType::Percent) {
Some("PERCENT".to_string())
} else {
None
};
expressions.push(Expression::AnalyzeSample(Box::new(AnalyzeSample {
kind: sample_kind.unwrap_or_default(),
sample: Some(Box::new(sample)),
})));
}
}
Ok(Some(Expression::AnalyzeStatistics(Box::new(
AnalyzeStatistics {
kind,
option,
this,
expressions,
},
))))
}
/// parse_analyze_validate - Parses ANALYZE VALIDATE
/// Python: parser.py:8034-8053
pub fn parse_analyze_validate(&mut self) -> Result<Option<Expression>> {
let mut kind = String::new();
let mut this: Option<Box<Expression>> = None;
let mut expression: Option<Box<Expression>> = None;
if self.match_text_seq(&["REF", "UPDATE"]) {
kind = "REF".to_string();
this = Some(Box::new(Expression::Identifier(Identifier::new(
"UPDATE".to_string(),
))));
if self.match_text_seq(&["SET", "DANGLING", "TO", "NULL"]) {
this = Some(Box::new(Expression::Identifier(Identifier::new(
"UPDATE SET DANGLING TO NULL".to_string(),
))));
}
} else if self.match_text_seq(&["STRUCTURE"]) {
kind = "STRUCTURE".to_string();
if self.match_text_seq(&["CASCADE", "FAST"]) {
this = Some(Box::new(Expression::Identifier(Identifier::new(
"CASCADE FAST".to_string(),
))));
} else if self.match_text_seq(&["CASCADE", "COMPLETE"]) {
if self.match_texts(&["ONLINE", "OFFLINE"]) {
let mode = self.previous().text.to_ascii_uppercase();
this = Some(Box::new(Expression::Identifier(Identifier::new(format!(
"CASCADE COMPLETE {}",
mode
)))));
expression = self.parse_into()?.map(Box::new);
}
}
}
if kind.is_empty() {
return Ok(None);
}
Ok(Some(Expression::AnalyzeValidate(Box::new(
AnalyzeValidate {
kind,
this,
expression,
},
))))
}
/// parse_attach_detach - Parses ATTACH/DETACH statements (DuckDB)
/// Python: DuckDB._parse_attach_detach
pub fn parse_attach_detach(&mut self, is_attach: bool) -> Result<Expression> {
// ATTACH [DATABASE] [IF NOT EXISTS] 'path' [AS alias] [(options)]
// DETACH [DATABASE] [IF EXISTS] name
// DATABASE can be tokenized as TokenType::Database (keyword), not just Var
let _ = self.match_identifier("DATABASE") || self.match_token(TokenType::Database);
let exists = if is_attach {
self.match_text_seq(&["IF", "NOT", "EXISTS"])
} else {
self.match_text_seq(&["IF", "EXISTS"])
};
// Parse the expression (can be a path string, identifier, or expression like 'foo' || '.foo2'
// or NOT EXISTS(subquery) for conditional attach)
let this_expr = self.parse_expression()?;
// Check for AS alias
let this = if self.match_token(TokenType::As) {
let alias = self.expect_identifier_or_keyword_with_quoted()?;
Expression::Alias(Box::new(Alias {
this: this_expr,
alias,
column_aliases: Vec::new(),
pre_alias_comments: Vec::new(),
trailing_comments: Vec::new(),
inferred_type: None,
}))
} else {
this_expr
};
if is_attach {
// Parse optional (options)
let expressions = if self.match_token(TokenType::LParen) {
let mut opts = Vec::new();
loop {
// Parse option: KEY [VALUE]
let key_name = self.advance().text.to_ascii_uppercase();
let key = Expression::Identifier(Identifier::new(key_name));
let value = if !self.check(TokenType::Comma) && !self.check(TokenType::RParen) {
// The value can be an identifier, string, boolean, etc.
let val_token = self.advance();
let val_expr = if val_token.token_type == TokenType::String {
Expression::Literal(Box::new(Literal::String(val_token.text.clone())))
} else if val_token.token_type == TokenType::True {
Expression::Boolean(BooleanLiteral { value: true })
} else if val_token.token_type == TokenType::False {
Expression::Boolean(BooleanLiteral { value: false })
} else {
Expression::Identifier(Identifier::new(val_token.text.clone()))
};
Some(Box::new(val_expr))
} else {
None
};
opts.push(Expression::AttachOption(Box::new(AttachOption {
this: Box::new(key),
expression: value,
})));
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
opts
} else {
Vec::new()
};
Ok(Expression::Attach(Box::new(Attach {
this: Box::new(this),
exists,
expressions,
})))
} else {
Ok(Expression::Detach(Box::new(Detach {
this: Box::new(this),
exists,
})))
}
}
/// parse_install - Parses INSTALL statement (DuckDB)
/// Python: DuckDB._parse_install
pub fn parse_install(&mut self, force: bool) -> Result<Expression> {
// INSTALL extension [FROM source]
let name = self.expect_identifier_or_keyword()?;
let this = Expression::Identifier(Identifier::new(name));
let from_ = if self.match_token(TokenType::From) {
// FROM can be followed by a string or identifier
Some(Box::new(self.parse_primary()?))
} else {
None
};
Ok(Expression::Install(Box::new(Install {
this: Box::new(this),
from_,
force: if force {
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
})))
} else {
None
},
})))
}
/// parse_force_statement - Parses FORCE INSTALL/CHECKPOINT (DuckDB)
/// Python: DuckDB._parse_force
pub fn parse_force_statement(&mut self) -> Result<Expression> {
if self.match_identifier("INSTALL") {
return self.parse_install(true);
}
// FORCE CHECKPOINT or other: fallback to command
self.parse_as_command()?
.ok_or_else(|| self.parse_error("Failed to parse FORCE statement"))
}
/// parse_summarize_statement - Parses SUMMARIZE statement (DuckDB)
/// Python: DuckDB parser for SUMMARIZE
pub fn parse_summarize_statement(&mut self) -> Result<Expression> {
// SUMMARIZE [TABLE] expression
let is_table = self.match_token(TokenType::Table);
// Try to parse a SELECT statement, string, or table reference
let this = if self.check(TokenType::Select) || self.check(TokenType::With) {
self.parse_select()?
} else if self.check(TokenType::String) {
self.parse_primary()?
} else {
// Parse as table name
self.parse_table_parts()?
.unwrap_or(Expression::Identifier(Identifier::new(String::new())))
};
Ok(Expression::Summarize(Box::new(Summarize {
this: Box::new(this),
table: if is_table {
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
})))
} else {
None
},
})))
}
/// parse_deallocate_prepare - Parses DEALLOCATE PREPARE <name>
/// Presto/Trino syntax for deallocating prepared statements
pub fn parse_deallocate_prepare(&mut self) -> Result<Expression> {
self.skip(); // consume DEALLOCATE
// Check for PREPARE keyword
if self.match_identifier("PREPARE") {
// Parse the statement name
let name = if !self.is_at_end() && !self.check(TokenType::Semicolon) {
self.advance().text.clone()
} else {
String::new()
};
// Build the command text
let command_text = if name.is_empty() {
"DEALLOCATE PREPARE".to_string()
} else {
format!("DEALLOCATE PREPARE {}", name)
};
Ok(Expression::Command(Box::new(Command {
this: command_text,
})))
} else {
// Just DEALLOCATE without PREPARE - consume rest as command
let mut parts = vec!["DEALLOCATE".to_string()];
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
let token = self.advance();
parts.push(token.text.clone());
}
Ok(Expression::Command(Box::new(Command {
this: parts.join(" "),
})))
}
}
/// parse_as_command - Creates Command expression
#[allow(unused_variables, unused_mut)]
/// parse_as_command - Parses remaining tokens as a raw command
/// Python: _parse_as_command
/// Used as fallback when specific parsing fails
pub fn parse_as_command(&mut self) -> Result<Option<Expression>> {
// Get the starting token text
let start_text = if self.current > 0 {
self.tokens
.get(self.current - 1)
.map(|t| t.text.clone())
.unwrap_or_default()
} else {
String::new()
};
// Consume all remaining tokens, storing both text and type
let mut tokens_info: Vec<(String, TokenType)> = Vec::new();
while !self.is_at_end() {
let token = self.advance();
tokens_info.push((token.text.clone(), token.token_type.clone()));
}
// Join tokens intelligently, avoiding spaces around punctuation
let mut expression = String::new();
for (i, (text, token_type)) in tokens_info.iter().enumerate() {
if i > 0 {
// Check if we should add a space before this token
let prev_type = &tokens_info[i - 1].1;
let needs_space = !Self::is_punctuation_token(prev_type)
&& !Self::is_punctuation_token(token_type);
if needs_space {
expression.push(' ');
}
}
expression.push_str(text);
}
Ok(Some(Expression::Command(Box::new(Command {
this: if expression.is_empty() {
start_text
} else {
format!("{} {}", start_text, expression)
},
}))))
}
/// Helper to determine if a token type is punctuation that shouldn't have spaces around it
fn is_punctuation_token(token_type: &TokenType) -> bool {
matches!(
token_type,
TokenType::Dot | TokenType::Colon | TokenType::DColon
)
}
/// Fallback to Command expression from a saved position.
/// Extracts verbatim SQL text from source if available, consuming tokens until semicolon/EOF.
fn fallback_to_command(&mut self, start_pos: usize) -> Result<Expression> {
let start_span = self.tokens[start_pos].span.start;
// Consume until semicolon or end
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
self.skip();
}
let command_text = if let Some(ref source) = self.source {
let end_span = if self.current > 0 {
self.tokens[self.current - 1].span.end
} else {
start_span
};
source[start_span..end_span].trim().to_string()
} else {
// Fallback: join token texts
let mut parts = Vec::new();
for i in start_pos..self.current {
if self.tokens[i].token_type == TokenType::String {
parts.push(format!("'{}'", self.tokens[i].text.replace('\'', "''")));
} else {
parts.push(self.tokens[i].text.clone());
}
}
parts.join(" ")
};
Ok(Expression::Command(Box::new(Command {
this: command_text,
})))
}
/// parse_assignment - Parses assignment expressions (variable := value)
/// Python: _parse_assignment
pub fn parse_assignment(&mut self) -> Result<Option<Expression>> {
// First parse a disjunction (left side of potential assignment)
let mut this = self.parse_disjunction()?;
// Handle := assignment operator
while self.match_token(TokenType::ColonEq) {
if let Some(left) = this {
let right = self.parse_assignment()?;
if let Some(right_expr) = right {
this = Some(Expression::PropertyEQ(Box::new(BinaryOp {
left,
right: right_expr,
left_comments: Vec::new(),
operator_comments: Vec::new(),
trailing_comments: Vec::new(),
inferred_type: None,
})));
} else {
this = Some(left);
break;
}
} else {
break;
}
}
// ClickHouse ternary operator: condition ? true_value : false_value
// Parsed as: If(this=condition, true=true_value, false=false_value)
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
if let Some(condition) = this {
if self.match_token(TokenType::Parameter) {
if self.check(TokenType::Colon) {
return Err(self.parse_error(
"Expected true expression after ? in ClickHouse ternary",
));
}
let true_value = self.parse_assignment()?.ok_or_else(|| {
self.parse_error("Expected true expression after ? in ClickHouse ternary")
})?;
let false_value = if self.match_token(TokenType::Colon) {
self.parse_assignment()?.unwrap_or(Expression::Null(Null))
} else {
Expression::Null(Null)
};
return Ok(Some(Expression::IfFunc(Box::new(IfFunc {
original_name: None,
condition,
true_value,
false_value: Some(false_value),
inferred_type: None,
}))));
}
this = Some(condition);
}
}
Ok(this)
}
/// parse_auto_increment - Implemented from Python _parse_auto_increment
/// Calls: parse_bitwise
#[allow(unused_variables, unused_mut)]
pub fn parse_auto_increment(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["START"]) {
return Ok(Some(Expression::GeneratedAsIdentityColumnConstraint(
Box::new(GeneratedAsIdentityColumnConstraint {
this: None,
expression: None,
on_null: None,
start: None,
increment: None,
minvalue: None,
maxvalue: None,
cycle: None,
order: None,
}),
)));
}
if self.match_text_seq(&["INCREMENT"]) {
// Matched: INCREMENT
return Ok(None);
}
if self.match_text_seq(&["ORDER"]) {
// Matched: ORDER
return Ok(None);
}
Ok(None)
}
/// parse_auto_property - Implemented from Python _parse_auto_property
#[allow(unused_variables, unused_mut)]
pub fn parse_auto_property(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["REFRESH"]) {
// Matched: REFRESH
return Ok(None);
}
Ok(None)
}
/// parse_between - Implemented from Python _parse_between
#[allow(unused_variables, unused_mut)]
pub fn parse_between(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["SYMMETRIC"]) {
// Matched: SYMMETRIC
return Ok(None);
}
if self.match_text_seq(&["ASYMMETRIC"]) {
// Matched: ASYMMETRIC
return Ok(None);
}
Ok(None)
}
/// parse_bitwise - Parses bitwise OR/XOR/AND expressions
/// Python: _parse_bitwise
/// Delegates to the existing parse_bitwise_or in the operator precedence chain
pub fn parse_bitwise(&mut self) -> Result<Option<Expression>> {
let start = self.current;
match self.parse_bitwise_or() {
Ok(expr) => Ok(Some(expr)),
Err(_err) if self.current == start => Ok(None),
Err(err) => Err(err),
}
}
/// parse_blockcompression - Implemented from Python _parse_blockcompression
#[allow(unused_variables, unused_mut)]
pub fn parse_blockcompression(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["ALWAYS"]) {
return Ok(Some(Expression::BlockCompressionProperty(Box::new(
BlockCompressionProperty {
autotemp: None,
always: None,
default: None,
manual: None,
never: None,
},
))));
}
if self.match_text_seq(&["MANUAL"]) {
// Matched: MANUAL
return Ok(None);
}
Ok(None)
}
/// parse_boolean - Parse boolean literal (TRUE/FALSE)
/// Python: if self._match(TokenType.TRUE): return exp.Boolean(this=True)
pub fn parse_boolean(&mut self) -> Result<Option<Expression>> {
if self.match_token(TokenType::True) {
return Ok(Some(Expression::Boolean(BooleanLiteral { value: true })));
}
if self.match_token(TokenType::False) {
return Ok(Some(Expression::Boolean(BooleanLiteral { value: false })));
}
Ok(None)
}
/// parse_bracket - Ported from Python _parse_bracket
/// Parses bracket expressions: array[index], array literal [1,2,3], or struct {key: value}
#[allow(unused_variables, unused_mut)]
pub fn parse_bracket(&mut self) -> Result<Option<Expression>> {
self.parse_bracket_with_expr(None)
}
/// parse_bracket_with_expr - Parses bracket with optional left-side expression
fn parse_bracket_with_expr(&mut self, this: Option<Expression>) -> Result<Option<Expression>> {
// Check for [ or {
let is_bracket = self.match_token(TokenType::LBracket);
let is_brace = if !is_bracket {
self.match_token(TokenType::LBrace)
} else {
false
};
if !is_bracket && !is_brace {
return Ok(this);
}
// Parse comma-separated expressions inside brackets
let mut expressions: Vec<Expression> = Vec::new();
if is_bracket && !self.check(TokenType::RBracket) {
// Check for slice syntax at the start: [:...] or [:-...]
// This needs to be detected before parse_bracket_key_value which calls parse_primary
// and parse_primary would consume : as a parameter prefix
let first_expr = if self.check(TokenType::Colon) {
// This is slice syntax like [:] or [:-1] or [::step]
// Parse it using slice parser with no 'this'
if let Some(slice) = self.parse_slice()? {
slice
} else {
self.parse_expression()?
}
} else if let Ok(Some(expr)) = self.parse_bracket_key_value() {
expr
} else {
// Parse regular expression and check for slice
let expr = self.parse_expression()?;
// Check if followed by colon (slice syntax like [start:end])
if self.check(TokenType::Colon) {
if let Some(slice) = self.parse_slice_with_this(Some(expr))? {
slice
} else {
return Err(self.parse_error("Failed to parse slice"));
}
} else {
expr
}
};
// Check for comprehension syntax: [expr FOR var IN iterator [IF condition]]
if self.match_token(TokenType::For) {
// Parse loop variable - typically a simple identifier like 'x'
let loop_var = self.parse_primary()?;
// Parse optional position (second variable after comma)
let position = if self.match_token(TokenType::Comma) {
Some(self.parse_primary()?)
} else {
None
};
// Expect IN keyword
if !self.match_token(TokenType::In) {
return Err(self.parse_error("Expected IN in comprehension"));
}
// Parse iterator expression
let iterator = self.parse_expression()?;
// Parse optional condition after IF
let condition = if self.match_token(TokenType::If) {
Some(self.parse_expression()?)
} else {
None
};
// Expect closing bracket
self.expect(TokenType::RBracket)?;
// Return Comprehension wrapped in an expression
return Ok(Some(Expression::Comprehension(Box::new(Comprehension {
this: Box::new(first_expr),
expression: Box::new(loop_var),
position: position.map(Box::new),
iterator: Some(Box::new(iterator)),
condition: condition.map(Box::new),
}))));
}
expressions.push(first_expr);
// Continue parsing remaining expressions
while self.match_token(TokenType::Comma) {
if let Ok(Some(expr)) = self.parse_bracket_key_value() {
expressions.push(expr);
} else {
match self.parse_expression() {
Ok(expr) => expressions.push(expr),
Err(_) => break,
}
}
}
} else if is_brace && !self.check(TokenType::RBrace) {
loop {
if let Ok(Some(expr)) = self.parse_bracket_key_value() {
expressions.push(expr);
} else {
match self.parse_expression() {
Ok(expr) => expressions.push(expr),
Err(_) => break,
}
}
if !self.match_token(TokenType::Comma) {
break;
}
}
}
// Expect closing bracket
if is_bracket {
self.expect(TokenType::RBracket)?;
} else if is_brace {
self.expect(TokenType::RBrace)?;
}
// Build the result
if is_brace {
// Struct literal: {key: value, ...}
// Convert expressions to (Option<name>, expr) pairs
let fields: Vec<(Option<String>, Expression)> =
expressions.into_iter().map(|e| (None, e)).collect();
Ok(Some(Expression::Struct(Box::new(Struct { fields }))))
} else if let Some(base_expr) = this {
// Subscript access: base[index]
if expressions.len() == 1 {
Ok(Some(Expression::Subscript(Box::new(Subscript {
this: base_expr,
index: expressions.remove(0),
}))))
} else {
// Multiple indices - create nested subscripts or array
let mut result = base_expr;
for expr in expressions {
result = Expression::Subscript(Box::new(Subscript {
this: result,
index: expr,
}));
}
Ok(Some(result))
}
} else {
// Array literal: [1, 2, 3]
Ok(Some(Expression::Array(Box::new(Array { expressions }))))
}
}
/// parse_bracket_key_value - Ported from Python _parse_bracket_key_value
/// Parses key-value pairs in brackets: key: value or key => value
#[allow(unused_variables, unused_mut)]
pub fn parse_bracket_key_value(&mut self) -> Result<Option<Expression>> {
let saved_pos = self.current;
// Try to parse as key: value or key => value
if let Ok(key) = self.parse_primary() {
// Check for : or =>
if self.match_token(TokenType::Colon) || self.match_text_seq(&["=>"]) {
match self.parse_expression() {
Ok(value) => {
// Return as NamedArgument for key-value pair
// Extract the name from the key (identifier or string literal)
let name = match &key {
Expression::Identifier(id) => id.clone(),
Expression::Literal(lit)
if matches!(
lit.as_ref(),
crate::expressions::Literal::String(s)
) =>
{
let crate::expressions::Literal::String(s) = lit.as_ref() else {
unreachable!()
};
Identifier::new(s.clone())
}
_ => Identifier::new("".to_string()),
};
return Ok(Some(Expression::NamedArgument(Box::new(NamedArgument {
name,
value,
separator: NamedArgSeparator::DArrow, // Using DArrow for colon-style key: value
}))));
}
Err(_) => {
self.current = saved_pos;
return Ok(None);
}
}
}
self.current = saved_pos;
}
Ok(None)
}
/// parse_ceil_floor - Implemented from Python _parse_ceil_floor
/// Calls: parse_lambda, parse_var
#[allow(unused_variables, unused_mut)]
pub fn parse_ceil_floor(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["TO"]) {
// Matched: TO
return Ok(None);
}
Ok(None)
}
/// parse_changes - Implemented from Python _parse_changes
/// Parses: CHANGES(INFORMATION => var) AT|BEFORE(...) END(...)
pub fn parse_changes(&mut self) -> Result<Option<Expression>> {
// Match: CHANGES(INFORMATION =>
if !self.match_text_seq(&["CHANGES", "(", "INFORMATION", "=>"]) {
return Ok(None);
}
// Parse information (any token as var, matching Python's any_token=True)
let information = if !self.is_at_end() && !self.check(TokenType::RParen) {
let tok = self.advance();
Some(Box::new(Expression::Var(Box::new(
crate::expressions::Var {
this: tok.text.clone(),
},
))))
} else {
None
};
// Match closing paren
self.match_token(TokenType::RParen);
// Parse at_before (Snowflake AT/BEFORE clause)
let at_before = self.parse_historical_data()?.map(Box::new);
// Parse end (optional second historical data clause)
let end = self.parse_historical_data()?.map(Box::new);
Ok(Some(Expression::Changes(Box::new(Changes {
information,
at_before,
end,
}))))
}
/// parse_char - Parses CHAR/CHR function with optional USING charset
/// Python: CHAR(args...) [USING charset]
/// MySQL: CHAR(n1, n2, ... USING charset)
pub fn parse_char(&mut self) -> Result<Option<Expression>> {
// Parse expressions inside CHAR()
let mut args = Vec::new();
loop {
let expr = self.parse_expression()?;
args.push(expr);
if !self.match_token(TokenType::Comma) {
break;
}
}
// Check for USING charset
let charset = if self.match_token(TokenType::Using) {
self.parse_var()?.map(|v| {
if let Expression::Identifier(id) = v {
id.name
} else {
String::new()
}
})
} else {
None
};
if args.is_empty() {
return Ok(None);
}
// If there's a charset or multiple args, use CharFunc (MySQL-style)
// Otherwise use simple Chr for single-arg CHR function
if charset.is_some() || args.len() > 1 {
Ok(Some(Expression::CharFunc(Box::new(
crate::expressions::CharFunc {
args,
charset,
name: None, // defaults to CHAR
},
))))
} else {
Ok(Some(Expression::Chr(Box::new(UnaryFunc::new(
args.into_iter().next().unwrap(),
)))))
}
}
/// parse_character_set - Ported from Python _parse_character_set
#[allow(unused_variables, unused_mut)]
/// parse_character_set - Parses CHARACTER SET property
/// Example: CHARACTER SET = utf8 or CHARACTER SET utf8mb4
pub fn parse_character_set(&mut self) -> Result<Option<Expression>> {
// Optional = sign
self.match_token(TokenType::Eq);
// Parse the charset name (variable or string)
let charset = self.parse_var_or_string()?;
if charset.is_none() {
return Ok(None);
}
Ok(Some(Expression::CharacterSetProperty(Box::new(
CharacterSetProperty {
this: Box::new(charset.unwrap()),
default: None,
},
))))
}
/// parse_checksum - Implemented from Python _parse_checksum
#[allow(unused_variables, unused_mut)]
pub fn parse_checksum(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["OFF"]) {
return Ok(Some(Expression::ChecksumProperty(Box::new(
ChecksumProperty {
on: None,
default: None,
},
))));
}
Ok(None)
}
/// parse_cluster - CLUSTER BY clause for Hive/Spark-style queries
/// Parses a list of ordered expressions (columns with optional ASC/DESC)
#[allow(unused_variables, unused_mut)]
pub fn parse_cluster(&mut self) -> Result<Option<Expression>> {
let mut expressions: Vec<Ordered> = Vec::new();
loop {
// Parse an ordered expression (column with optional direction)
if let Some(ordered) = self.parse_ordered_item()? {
expressions.push(ordered);
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
if expressions.is_empty() {
return Ok(None);
}
Ok(Some(Expression::ClusterBy(Box::new(ClusterBy {
expressions,
}))))
}
/// parse_clustered_by - Implemented from Python _parse_clustered_by
#[allow(unused_variables, unused_mut)]
pub fn parse_clustered_by(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["BY"]) {
return Ok(Some(Expression::ClusteredByProperty(Box::new(
ClusteredByProperty {
expressions: Vec::new(),
sorted_by: None,
buckets: None,
},
))));
}
if self.match_text_seq(&["SORTED", "BY"]) {
// Matched: SORTED BY
return Ok(None);
}
Ok(None)
}
/// Parse Snowflake colon JSON path extraction: data:field or data:field.subfield
/// Python: def _parse_colon_as_variant_extract(self, this)
pub fn parse_colon_as_variant_extract(
&mut self,
this: Expression,
) -> Result<Option<Expression>> {
// Build a JSON path from colon-separated identifiers
// Track whether each segment was quoted (needs bracket notation for spaces/special chars)
let mut json_path: Vec<(String, bool)> = Vec::new();
while self.match_token(TokenType::Colon) {
// Parse the path segment (field name)
if let Some(field) = self.parse_identifier()? {
if let Expression::Identifier(ident) = field {
json_path.push((
ident.name.clone(),
ident.quoted || ident.name.contains(' ') || ident.name.contains('\''),
));
}
}
// Check for dot-separated sub-paths
while self.match_token(TokenType::Dot) {
if let Some(subfield) = self.parse_identifier()? {
if let Expression::Identifier(ident) = subfield {
json_path.push((
ident.name.clone(),
ident.quoted || ident.name.contains(' ') || ident.name.contains('\''),
));
}
}
}
}
if json_path.is_empty() {
return Ok(Some(this));
}
// Build the JSON path expression string
// Use bracket notation for segments with spaces/special chars: a["b c"]
// Use dot notation for simple segments: a.b.c
let mut path_str = String::new();
for (i, (segment, needs_bracket)) in json_path.iter().enumerate() {
if *needs_bracket {
// Bracket notation: ["key with spaces"]
path_str.push('[');
path_str.push('"');
path_str.push_str(segment);
path_str.push('"');
path_str.push(']');
} else {
if i > 0 {
path_str.push('.');
}
path_str.push_str(segment);
}
}
Ok(Some(Expression::JSONExtract(Box::new(JSONExtract {
this: Box::new(this),
expression: Box::new(Expression::Literal(Box::new(Literal::String(path_str)))),
only_json_types: None,
expressions: Vec::new(),
variant_extract: Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
}))),
json_query: None,
option: None,
quote: None,
on_condition: None,
requires_json: None,
}))))
}
/// parse_column - Parse column expression
/// Python: this = self._parse_column_reference(); return self._parse_column_ops(this)
pub fn parse_column(&mut self) -> Result<Option<Expression>> {
// Parse column reference (field name that becomes a Column expression)
let column_ref = self.parse_column_reference()?;
if column_ref.is_some() {
// Apply column ops (bracket subscript, property access with dots, casts)
return self.parse_column_ops_with_expr(column_ref);
}
// Try parsing bracket directly if no column reference
self.parse_bracket()
}
/// parse_column_constraint - Ported from Python _parse_column_constraint
/// Parses column-level constraints like NOT NULL, PRIMARY KEY, UNIQUE, DEFAULT, CHECK, etc.
#[allow(unused_variables, unused_mut)]
pub fn parse_column_constraint(&mut self) -> Result<Option<Expression>> {
// Check for optional CONSTRAINT keyword and name
let constraint_name = if self.match_token(TokenType::Constraint) {
self.parse_id_var()?.and_then(|e| {
if let Expression::Identifier(id) = e {
Some(id)
} else {
None
}
})
} else {
None
};
// NOT NULL
if self.match_text_seq(&["NOT", "NULL"]) {
return Ok(Some(Expression::NotNullColumnConstraint(Box::new(
NotNullColumnConstraint { allow_null: None },
))));
}
// NOT FOR REPLICATION (SQL Server) - must be before NULL check
if self.match_text_seq(&["NOT", "FOR", "REPLICATION"]) {
return Ok(Some(Expression::Property(Box::new(
crate::expressions::Property {
this: Box::new(Expression::Identifier(Identifier::new(
"NOT FOR REPLICATION".to_string(),
))),
value: None,
},
))));
}
// NULL
if self.match_text_seq(&["NULL"]) {
return Ok(Some(Expression::NotNullColumnConstraint(Box::new(
NotNullColumnConstraint {
allow_null: Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
}))),
},
))));
}
// PRIMARY KEY
if self.match_text_seq(&["PRIMARY", "KEY"]) {
return Ok(Some(Expression::PrimaryKeyColumnConstraint(Box::new(
PrimaryKeyColumnConstraint {
desc: None,
options: Vec::new(),
},
))));
}
// UNIQUE
if self.match_text_seq(&["UNIQUE"]) {
// Check for optional KEY/INDEX
let _ = self.match_texts(&["KEY", "INDEX"]);
// Check for NULLS NOT DISTINCT (PostgreSQL 15+ feature)
let nulls = if self.match_text_seq(&["NULLS", "NOT", "DISTINCT"]) {
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
})))
} else {
None
};
return Ok(Some(Expression::UniqueColumnConstraint(Box::new(
UniqueColumnConstraint {
this: None,
index_type: None,
on_conflict: None,
nulls,
options: Vec::new(),
},
))));
}
// DEFAULT
if self.match_text_seq(&["DEFAULT"]) {
let default_value = self.parse_select_or_expression()?;
if let Some(val) = default_value {
return Ok(Some(Expression::DefaultColumnConstraint(Box::new(
DefaultColumnConstraint {
this: Box::new(val),
for_column: None,
},
))));
}
return Ok(None);
}
// CHECK
if self.match_text_seq(&["CHECK"]) {
if self.match_token(TokenType::LParen) {
let expr = self.parse_select_or_expression()?;
self.match_token(TokenType::RParen);
if let Some(check_expr) = expr {
return Ok(Some(Expression::CheckColumnConstraint(Box::new(
CheckColumnConstraint {
this: Box::new(check_expr),
enforced: None,
},
))));
}
}
return Ok(None);
}
// REFERENCES (foreign key)
if self.match_text_seq(&["REFERENCES"]) {
let table = self.parse_table_parts()?;
let columns = if self.match_token(TokenType::LParen) {
let mut cols = Vec::new();
loop {
if let Some(col) = self.parse_id_var()? {
cols.push(col);
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.match_token(TokenType::RParen);
cols
} else {
Vec::new()
};
return Ok(Some(Expression::ForeignKey(Box::new(ForeignKey {
expressions: columns,
reference: table.map(Box::new),
delete: None,
update: None,
options: Vec::new(),
}))));
}
// AUTO_INCREMENT / AUTOINCREMENT / IDENTITY
if self.match_texts(&["AUTO_INCREMENT", "AUTOINCREMENT", "IDENTITY"]) {
// Check for IDENTITY(start, increment) or IDENTITY START x INCREMENT y syntax
let mut start = None;
let mut increment = None;
if self.match_token(TokenType::LParen) {
// Parse (start, increment)
start = self.parse_bitwise()?;
if self.match_token(TokenType::Comma) {
increment = self.parse_bitwise()?;
}
self.expect(TokenType::RParen)?;
} else if self.match_text_seq(&["START"]) {
// Parse START x INCREMENT y
start = self.parse_bitwise()?;
if self.match_text_seq(&["INCREMENT"]) {
increment = self.parse_bitwise()?;
}
}
if start.is_some() || increment.is_some() {
return Ok(Some(Expression::GeneratedAsIdentityColumnConstraint(
Box::new(GeneratedAsIdentityColumnConstraint {
this: Some(Box::new(Expression::Boolean(BooleanLiteral {
value: false,
}))),
expression: None,
on_null: None,
start: start.map(Box::new),
increment: increment.map(Box::new),
minvalue: None,
maxvalue: None,
cycle: None,
order: None,
}),
)));
}
return Ok(Some(Expression::AutoIncrementColumnConstraint(
AutoIncrementColumnConstraint,
)));
}
// COMMENT 'text' - CommentColumnConstraint is a unit struct, use a different expression
if self.match_text_seq(&["COMMENT"]) {
if let Some(comment) = self.parse_string()? {
// Use CommentColumnConstraint unit struct
return Ok(Some(Expression::CommentColumnConstraint(
CommentColumnConstraint,
)));
}
return Ok(None);
}
// COLLATE collation_name - use CollateProperty instead
if self.match_text_seq(&["COLLATE"]) {
if let Some(collation) = self.parse_id_var()? {
return Ok(Some(Expression::CollateProperty(Box::new(
CollateProperty {
this: Box::new(collation),
default: None,
},
))));
}
return Ok(None);
}
// ClickHouse dictionary column attributes: HIERARCHICAL, IS_OBJECT_ID, INJECTIVE
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
if self.match_texts(&["HIERARCHICAL", "IS_OBJECT_ID", "INJECTIVE"]) {
let attr_name = self.previous().text.to_ascii_uppercase();
return Ok(Some(Expression::Property(Box::new(
crate::expressions::Property {
this: Box::new(Expression::Identifier(Identifier::new(attr_name))),
value: None,
},
))));
}
// ClickHouse EXPRESSION expr and ALIAS expr (dictionary column attributes)
if self.match_texts(&["EXPRESSION"]) {
let expr = self.parse_expression()?;
return Ok(Some(Expression::DefaultColumnConstraint(Box::new(
DefaultColumnConstraint {
this: Box::new(expr),
for_column: None,
},
))));
}
}
// GENERATED ... AS IDENTITY
if self.match_text_seq(&["GENERATED"]) {
let always = self.match_text_seq(&["ALWAYS"]);
if !always {
self.match_text_seq(&["BY", "DEFAULT"]);
}
let on_null = self.match_text_seq(&["ON", "NULL"]);
if self.match_text_seq(&["AS", "IDENTITY"]) {
return Ok(Some(Expression::GeneratedAsIdentityColumnConstraint(
Box::new(GeneratedAsIdentityColumnConstraint {
this: None,
expression: None,
on_null: if on_null {
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
})))
} else {
None
},
start: None,
increment: None,
minvalue: None,
maxvalue: None,
cycle: None,
order: None,
}),
)));
}
return Ok(None);
}
// PATH 'xpath' - for XMLTABLE/JSON_TABLE columns
if self.match_text_seq(&["PATH"]) {
if let Some(path_expr) = self.parse_string()? {
return Ok(Some(Expression::PathColumnConstraint(Box::new(
PathColumnConstraint {
this: Box::new(path_expr),
},
))));
}
return Ok(None);
}
// Return the constraint name if we matched CONSTRAINT but no actual constraint
if let Some(name) = constraint_name {
return Ok(Some(Expression::Identifier(name)));
}
Ok(None)
}
/// parse_column_def_with_exists - Ported from Python _parse_column_def_with_exists
/// Parses a column definition with optional IF [NOT] EXISTS clause
#[allow(unused_variables, unused_mut)]
pub fn parse_column_def_with_exists(&mut self) -> Result<Option<Expression>> {
let start = self.current;
// Optionally match COLUMN keyword
let _ = self.match_text_seq(&["COLUMN"]);
// Check for IF NOT EXISTS
let not_exists = self.match_text_seq(&["IF", "NOT", "EXISTS"]);
let exists = if !not_exists {
self.match_text_seq(&["IF", "EXISTS"])
} else {
false
};
// Parse the field definition
let expression = self.parse_field_def()?;
if expression.is_none() {
self.current = start;
return Ok(None);
}
// If it's a ColumnDef, we're good
if let Some(Expression::ColumnDef(ref _col_def)) = expression {
// The exists flag would be set on the ColumnDef, but our struct doesn't have that field
// Just return the expression as-is
return Ok(expression);
}
// Not a ColumnDef, backtrack
self.current = start;
Ok(None)
}
/// parse_column_ops - Parses column operations (stub for compatibility)
pub fn parse_column_ops(&mut self) -> Result<Option<Expression>> {
self.parse_column_ops_with_expr(None)
}
/// parse_column_ops_with_expr - Parses column operations (dot access, brackets, casts)
/// Python: _parse_column_ops(this)
pub fn parse_column_ops_with_expr(
&mut self,
this: Option<Expression>,
) -> Result<Option<Expression>> {
// First apply any bracket subscripts
let mut result = if let Some(expr) = this {
if self.match_token(TokenType::LBracket) {
let index = self.parse_disjunction()?;
self.match_token(TokenType::RBracket);
if let Some(idx) = index {
Some(Expression::Subscript(Box::new(Subscript {
this: expr,
index: idx,
})))
} else {
Some(expr)
}
} else {
Some(expr)
}
} else {
None
};
// Handle DOT for qualified column names: table.column or schema.table.column
while self.match_token(TokenType::Dot) {
if result.is_none() {
break;
}
// Handle .* (qualified star) with modifiers
if self.match_token(TokenType::Star) {
// Determine table name from the expression
let table_name = match &result {
Some(Expression::Column(col)) if col.table.is_none() => Some(col.name.clone()),
Some(Expression::Dot(dot)) => {
// For deep qualified names like schema.table.*, use the whole expression name
fn dot_to_name(expr: &Expression) -> String {
match expr {
Expression::Column(col) => {
if let Some(ref table) = col.table {
format!("{}.{}", table.name, col.name.name)
} else {
col.name.name.clone()
}
}
Expression::Dot(d) => {
format!("{}.{}", dot_to_name(&d.this), d.field.name)
}
_ => String::new(),
}
}
Some(Identifier::new(dot_to_name(&Expression::Dot(dot.clone()))))
}
_ => None,
};
let star = self.parse_star_modifiers(table_name)?;
result = Some(Expression::Star(star));
break;
}
// Parse the field identifier - use is_identifier_or_keyword_token to allow keywords
// like "schema" as field names in dot access
// ClickHouse: also allow numeric tuple index access like expr.1, expr.2
if self.is_identifier_or_keyword_token()
|| self.check(TokenType::QuotedIdentifier)
|| (matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::Number))
{
let token = self.advance();
let field_ident = Identifier {
name: token.text,
quoted: token.token_type == TokenType::QuotedIdentifier,
trailing_comments: Vec::new(),
span: None,
};
result = Some(Expression::Dot(Box::new(DotAccess {
this: result.take().unwrap(),
field: field_ident,
})));
} else {
break;
}
}
// Handle EXCLAMATION for Snowflake model attribute syntax: model!PREDICT(...)
if self.match_token(TokenType::Exclamation) {
if let Some(expr) = result.take() {
// Parse the attribute/function after the exclamation mark
// This can be either a simple identifier (model!admin) or a function call (model!PREDICT(1))
let attr = self.parse_unary()?;
result = Some(Expression::ModelAttribute(Box::new(ModelAttribute {
this: Box::new(expr),
expression: Box::new(attr),
})));
}
}
// Handle DCOLON for casts (PostgreSQL syntax: column::type)
if self.match_token(TokenType::DColon) {
if let Some(type_expr) = self.parse_types()? {
if let Some(expr) = result {
// Extract DataType from the expression
let data_type = match type_expr {
Expression::DataType(dt) => dt,
_ => {
result = Some(expr);
return Ok(result);
}
};
result = Some(Expression::Cast(Box::new(Cast {
this: expr,
to: data_type,
trailing_comments: Vec::new(),
double_colon_syntax: true,
format: None,
default: None,
inferred_type: None,
})));
}
}
}
// Teradata: (FORMAT '...') phrase after a column/expression
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Teradata)
) && self.check(TokenType::LParen)
&& self.check_next(TokenType::Format)
{
self.skip(); // consume (
self.skip(); // consume FORMAT
let format = self.expect_string()?;
self.expect(TokenType::RParen)?;
if let Some(expr) = result.take() {
result = Some(Expression::FormatPhrase(Box::new(FormatPhrase {
this: Box::new(expr),
format,
})));
}
}
Ok(result)
}
/// parse_column_reference - Parse column reference (field -> Column)
/// Python: this = self._parse_field(); if isinstance(this, exp.Identifier): return exp.Column(this=this)
pub fn parse_column_reference(&mut self) -> Result<Option<Expression>> {
// Parse the field (identifier or literal)
if let Some(field) = self.parse_field()? {
// If it's an identifier, wrap it in a Column expression
match &field {
Expression::Identifier(id) => {
return Ok(Some(Expression::boxed_column(Column {
name: id.clone(),
table: None,
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
})));
}
// If it's already something else (like a literal), return as-is
_ => return Ok(Some(field)),
}
}
Ok(None)
}
/// parse_command - Parses a generic SQL command
/// Python: _parse_command
/// Used for commands that we don't have specific parsing for
pub fn parse_command(&mut self) -> Result<Option<Expression>> {
// Get the command keyword from the previous token
let command_text = self.previous().text.to_ascii_uppercase();
// Collect remaining tokens as the command expression (until statement end)
// Use (text, token_type) tuples for smart spacing with join_command_tokens
let mut tokens: Vec<(String, TokenType)> = vec![(command_text, TokenType::Var)];
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
let token = self.advance();
// Preserve quotes for quoted identifiers and strings
let text = if token.token_type == TokenType::QuotedIdentifier {
// Re-add the identifier quote characters
// Use backticks as default; this handles MySQL backtick-quoted identifiers
// and double-quoted identifiers for other dialects
let quote_char = if self.config.dialect == Some(crate::dialects::DialectType::MySQL)
|| self.config.dialect == Some(crate::dialects::DialectType::SingleStore)
|| self.config.dialect == Some(crate::dialects::DialectType::Doris)
|| self.config.dialect == Some(crate::dialects::DialectType::StarRocks)
{
'`'
} else {
'"'
};
format!("{}{}{}", quote_char, token.text, quote_char)
} else if token.token_type == TokenType::String {
format!("'{}'", token.text)
} else {
token.text.clone()
};
tokens.push((text, token.token_type));
}
Ok(Some(Expression::Command(Box::new(Command {
this: self.join_command_tokens(tokens),
}))))
}
/// parse_commit_or_rollback - Implemented from Python _parse_commit_or_rollback
#[allow(unused_variables, unused_mut)]
pub fn parse_commit_or_rollback(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["TO"]) {
return Ok(Some(Expression::Rollback(Box::new(Rollback {
savepoint: None,
this: None,
}))));
}
if self.match_text_seq(&["SAVEPOINT"]) {
// Matched: SAVEPOINT
return Ok(None);
}
Ok(None)
}
/// parse_composite_key_property - Implemented from Python _parse_composite_key_property
#[allow(unused_variables, unused_mut)]
pub fn parse_composite_key_property(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["KEY"]) {
// Matched: KEY
return Ok(None);
}
Ok(None)
}
/// parse_comprehension - Implemented from Python _parse_comprehension
/// Parses list comprehension: expr FOR var [, position] IN iterator [IF condition]
pub fn parse_comprehension(&mut self, this: Option<Expression>) -> Result<Option<Expression>> {
let start_index = self.current;
// Parse expression (column)
let expression = self.parse_column()?;
// Parse optional position (if comma follows)
let position = if self.match_token(TokenType::Comma) {
self.parse_column()?.map(Box::new)
} else {
None
};
// Must have IN keyword
if !self.match_token(TokenType::In) {
// Backtrack
self.current = start_index.saturating_sub(1);
return Ok(None);
}
// Parse iterator
let iterator = self.parse_column()?.map(Box::new);
// Parse optional condition (IF followed by expression)
let condition = if self.match_text_seq(&["IF"]) {
self.parse_disjunction()?.map(Box::new)
} else {
None
};
// Build the comprehension expression
match (this, expression) {
(Some(t), Some(e)) => Ok(Some(Expression::Comprehension(Box::new(Comprehension {
this: Box::new(t),
expression: Box::new(e),
position,
iterator,
condition,
})))),
_ => Ok(None),
}
}
/// parse_compress - Parses COMPRESS column constraint (Teradata)
/// Python: _parse_compress
/// Format: COMPRESS or COMPRESS (value1, value2, ...)
pub fn parse_compress(&mut self) -> Result<Option<Expression>> {
// Check if it's a parenthesized list of values
if self.check(TokenType::LParen) {
// Parse wrapped CSV of bitwise expressions
self.skip(); // consume LParen
let mut expressions = Vec::new();
loop {
if let Some(expr) = self.parse_bitwise()? {
expressions.push(expr);
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
// Wrap in a Tuple if multiple values
let this = if expressions.len() == 1 {
Some(Box::new(expressions.into_iter().next().unwrap()))
} else if expressions.is_empty() {
None
} else {
Some(Box::new(Expression::Tuple(Box::new(Tuple { expressions }))))
};
Ok(Some(Expression::CompressColumnConstraint(Box::new(
CompressColumnConstraint { this },
))))
} else {
// Single value or no value
let this = self.parse_bitwise()?.map(Box::new);
Ok(Some(Expression::CompressColumnConstraint(Box::new(
CompressColumnConstraint { this },
))))
}
}
/// parse_conjunction - Parses AND expressions
/// Python: _parse_conjunction
/// Delegates to the existing parse_and in the operator precedence chain
pub fn parse_conjunction(&mut self) -> Result<Option<Expression>> {
match self.parse_and() {
Ok(expr) => Ok(Some(expr)),
Err(_) => Ok(None),
}
}
/// parse_connect_with_prior - Parses expression in CONNECT BY context with PRIOR support
/// Python: _parse_connect_with_prior
/// This method temporarily treats PRIOR as a prefix operator while parsing the expression
pub fn parse_connect_with_prior(&mut self) -> Result<Option<Expression>> {
// parse_connect_expression already handles PRIOR as a prefix operator
let connect = self.parse_connect_expression()?;
Ok(Some(connect))
}
/// parse_constraint - Parses named or unnamed constraint
/// Python: _parse_constraint
pub fn parse_constraint(&mut self) -> Result<Option<Expression>> {
// Check for CONSTRAINT keyword (named constraint)
if !self.match_token(TokenType::Constraint) {
// Try to parse an unnamed constraint
return self.parse_unnamed_constraint();
}
// Parse the constraint name
let name = self.parse_id_var()?;
if name.is_none() {
return Ok(None);
}
// Parse the constraint expressions (PRIMARY KEY, UNIQUE, FOREIGN KEY, CHECK, etc.)
let expressions = self.parse_unnamed_constraints()?;
Ok(Some(Expression::Constraint(Box::new(Constraint {
this: Box::new(name.unwrap()),
expressions,
}))))
}
/// parse_unnamed_constraints - Parses multiple unnamed constraints
/// Python: _parse_unnamed_constraints
pub fn parse_unnamed_constraints(&mut self) -> Result<Vec<Expression>> {
let mut constraints = Vec::new();
loop {
if let Some(constraint) = self.parse_unnamed_constraint()? {
constraints.push(constraint);
} else {
break;
}
}
Ok(constraints)
}
/// parse_unnamed_constraint - Parses a single unnamed constraint
/// Python: _parse_unnamed_constraint
pub fn parse_unnamed_constraint(&mut self) -> Result<Option<Expression>> {
// Try PRIMARY KEY
if self.match_text_seq(&["PRIMARY", "KEY"]) {
// ClickHouse: PRIMARY KEY expr (without parens) in schema = table-level PK expression
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && !self.check(TokenType::LParen)
{
let expr = self.parse_expression()?;
return Ok(Some(Expression::Raw(Raw {
sql: format!("PRIMARY KEY {}", expr),
})));
}
return self.parse_primary_key();
}
// Try UNIQUE
if self.match_texts(&["UNIQUE"]) {
return self.parse_unique();
}
// Try FOREIGN KEY
if self.match_text_seq(&["FOREIGN", "KEY"]) {
return self.parse_foreign_key();
}
// Try CHECK
if self.match_texts(&["CHECK"]) {
let expr = self.parse_wrapped()?;
if let Some(check_expr) = expr {
return Ok(Some(Expression::CheckColumnConstraint(Box::new(
CheckColumnConstraint {
this: Box::new(check_expr),
enforced: None,
},
))));
}
}
// Try NOT NULL
if self.match_text_seq(&["NOT", "NULL"]) {
return Ok(Some(Expression::NotNullColumnConstraint(Box::new(
NotNullColumnConstraint {
allow_null: None, // NOT NULL means allow_null is not set
},
))));
}
// Try NULL (allow null)
if self.match_texts(&["NULL"]) {
return Ok(Some(Expression::NotNullColumnConstraint(Box::new(
NotNullColumnConstraint {
allow_null: Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
}))),
},
))));
}
// Try DEFAULT
if self.match_token(TokenType::Default) {
let default_value = self.parse_bitwise()?;
if let Some(val) = default_value {
// TSQL: DEFAULT value FOR column (table-level default constraint)
let for_column = if self.match_token(TokenType::For) {
Some(self.expect_identifier_with_quoted()?)
} else {
None
};
return Ok(Some(Expression::DefaultColumnConstraint(Box::new(
DefaultColumnConstraint {
this: Box::new(val),
for_column,
},
))));
}
}
// Try REFERENCES (inline foreign key)
if self.match_texts(&["REFERENCES"]) {
return self.parse_references();
}
// ClickHouse: INDEX name expr TYPE type_name [GRANULARITY n]
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.match_token(TokenType::Index)
{
let name = self.expect_identifier_or_keyword_with_quoted()?;
// Use parse_conjunction to handle comparisons like c0 < (SELECT _table)
let expression = self.parse_conjunction()?.ok_or_else(|| {
self.parse_error("Expected expression in ClickHouse INDEX definition")
})?;
let index_type = if self.match_token(TokenType::Type) {
if let Some(func) = self.parse_function()? {
Some(Box::new(func))
} else if !self.is_at_end() {
let type_name = self.advance().text.clone();
if self.check(TokenType::LParen) {
self.skip();
let mut args = Vec::new();
if !self.check(TokenType::RParen) {
args.push(self.parse_expression()?);
while self.match_token(TokenType::Comma) {
args.push(self.parse_expression()?);
}
}
self.expect(TokenType::RParen)?;
Some(Box::new(Expression::Function(Box::new(Function::new(
type_name, args,
)))))
} else {
Some(Box::new(Expression::Identifier(Identifier::new(type_name))))
}
} else {
None
}
} else {
None
};
let _granularity = if self.match_identifier("GRANULARITY") {
let _ = self.parse_expression()?;
true
} else {
false
};
// Return as a raw SQL expression preserving the INDEX definition
let mut sql = format!("INDEX {} ", name.name);
if let Some(ref idx_type) = index_type {
sql.push_str(&format!("{} TYPE {} ", expression, idx_type));
}
return Ok(Some(Expression::Raw(Raw {
sql: sql.trim().to_string(),
})));
}
// ClickHouse: PROJECTION name (SELECT ...) or PROJECTION name INDEX expr TYPE type_name
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check_identifier("PROJECTION")
{
self.skip(); // consume PROJECTION
let name = self.expect_identifier_or_keyword_with_quoted()?;
// Parse the projection body - either (SELECT ...) or INDEX expr TYPE type_name
if self.match_token(TokenType::LParen) {
let mut depth = 1i32;
let start = self.current;
while !self.is_at_end() && depth > 0 {
if self.check(TokenType::LParen) {
depth += 1;
}
if self.check(TokenType::RParen) {
depth -= 1;
if depth == 0 {
break;
}
}
self.skip();
}
let body_sql = self.tokens_to_sql(start, self.current);
self.expect(TokenType::RParen)?;
return Ok(Some(Expression::Raw(Raw {
sql: format!("PROJECTION {} ({})", name.name, body_sql),
})));
}
// PROJECTION name INDEX expr TYPE type_name
if self.match_token(TokenType::Index) {
let expr = self.parse_bitwise()?.ok_or_else(|| {
self.parse_error(
"Expected expression in ClickHouse PROJECTION INDEX definition",
)
})?;
let type_str = if self.match_token(TokenType::Type) {
if !self.is_at_end() {
let t = self.advance().text.clone();
format!(" TYPE {}", t)
} else {
String::new()
}
} else {
String::new()
};
return Ok(Some(Expression::Raw(Raw {
sql: format!("PROJECTION {} INDEX {}{}", name.name, expr, type_str),
})));
}
return Ok(Some(Expression::Raw(Raw {
sql: format!("PROJECTION {}", name.name),
})));
}
Ok(None)
}
/// parse_contains_property - Implemented from Python _parse_contains_property
#[allow(unused_variables, unused_mut)]
pub fn parse_contains_property(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["SQL"]) {
// Matched: SQL
return Ok(None);
}
Ok(None)
}
/// parse_convert - Ported from Python _parse_convert
/// Parses CONVERT function: CONVERT(expr USING charset) or CONVERT(expr, type)
#[allow(unused_variables, unused_mut)]
pub fn parse_convert(&mut self) -> Result<Option<Expression>> {
// Parse the expression to convert
let this = match self.parse_bitwise() {
Ok(Some(expr)) => expr,
Ok(None) => return Ok(None),
Err(e) => return Err(e),
};
// Check for USING charset (CONVERT(x USING utf8))
if self.match_token(TokenType::Using) {
let _ = self.parse_var(); // charset
// Return as Cast with charset
return Ok(Some(Expression::Cast(Box::new(Cast {
this,
to: DataType::Char { length: None },
trailing_comments: Vec::new(),
double_colon_syntax: false,
format: None,
default: None,
inferred_type: None,
}))));
}
// Check for comma then type (CONVERT(x, INT))
if self.match_token(TokenType::Comma) {
let data_type = self.parse_data_type()?;
return Ok(Some(Expression::Cast(Box::new(Cast {
this,
to: data_type,
trailing_comments: Vec::new(),
double_colon_syntax: false,
format: None,
default: None,
inferred_type: None,
}))));
}
// No type specified, return as-is wrapped in Cast
Ok(Some(Expression::Cast(Box::new(Cast {
this,
to: DataType::Char { length: None },
trailing_comments: Vec::new(),
double_colon_syntax: false,
format: None,
default: None,
inferred_type: None,
}))))
}
/// parse_copy_parameters - Implemented from Python _parse_copy_parameters
/// parse_copy_parameters - Parses COPY statement parameters
/// Returns a tuple of CopyParameter expressions
pub fn parse_copy_parameters(&mut self) -> Result<Option<Expression>> {
let mut options = Vec::new();
while !self.is_at_end() && !self.check(TokenType::RParen) {
// Parse option name as var
let option = self.parse_var()?;
if option.is_none() {
break;
}
let option_name = match &option {
Some(Expression::Var(v)) => v.this.to_ascii_uppercase(),
Some(Expression::Identifier(id)) => id.name.to_ascii_uppercase(),
_ => String::new(),
};
// Options and values may be separated by whitespace, "=" or "AS"
self.match_token(TokenType::Eq);
self.match_token(TokenType::Alias);
// Parse value based on option type
let (expression, expressions) = if (option_name == "FILE_FORMAT"
|| option_name == "FORMAT_OPTIONS")
&& self.check(TokenType::LParen)
{
// Parse wrapped options for FILE_FORMAT
let wrapped = self.parse_wrapped_options()?;
let exprs = match wrapped {
Some(Expression::Tuple(t)) => t.expressions,
Some(e) => vec![e],
None => Vec::new(),
};
(None, exprs)
} else if option_name == "FILE_FORMAT" {
// T-SQL external file format case
let field = self.parse_field()?;
(field, Vec::new())
} else if option_name == "FORMAT"
&& self.previous().token_type == TokenType::Alias
&& self.match_texts(&["AVRO", "JSON"])
{
// FORMAT AS AVRO/JSON
let format_type = self.previous().text.to_ascii_uppercase();
let field = self.parse_field()?;
(
Some(Expression::Var(Box::new(Var {
this: format!("FORMAT AS {}", format_type),
}))),
field.map_or(Vec::new(), |f| vec![f]),
)
} else {
// Parse unquoted field or bracket
let expr = self
.parse_unquoted_field()?
.or_else(|| self.parse_bracket().ok().flatten());
(expr, Vec::new())
};
options.push(Expression::CopyParameter(Box::new(CopyParameter {
name: option_name,
value: expression,
values: expressions,
eq: true,
})));
// Optional comma separator (dialect-specific)
self.match_token(TokenType::Comma);
}
if options.is_empty() {
Ok(None)
} else {
Ok(Some(Expression::Tuple(Box::new(Tuple {
expressions: options,
}))))
}
}
/// parse_copy_property - Implemented from Python _parse_copy_property
#[allow(unused_variables, unused_mut)]
pub fn parse_copy_property(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["GRANTS"]) {
// Matched: GRANTS
return Ok(None);
}
Ok(None)
}
/// parse_create_like - Implemented from Python _parse_create_like
/// Calls: parse_id_var
#[allow(unused_variables, unused_mut)]
pub fn parse_create_like(&mut self) -> Result<Option<Expression>> {
if self.match_texts(&["INCLUDING", "EXCLUDING"]) {
// Matched one of: INCLUDING, EXCLUDING
return Ok(None);
}
Ok(None)
}
/// parse_credentials - Implemented from Python _parse_credentials
#[allow(unused_variables, unused_mut)]
pub fn parse_credentials(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["STORAGE_INTEGRATION", "="]) {
return Ok(Some(Expression::Credentials(Box::new(Credentials {
credentials: Vec::new(),
encryption: None,
storage: None,
}))));
}
if self.match_text_seq(&["CREDENTIALS"]) {
// Matched: CREDENTIALS
return Ok(None);
}
Ok(None)
}
/// parse_csv - Parses comma-separated expressions
/// Python: _parse_csv
/// In Python this takes a parse_method callback, but in Rust we use parse_expression_list
pub fn parse_csv(&mut self) -> Result<Option<Expression>> {
let expressions = self.parse_expression_list()?;
if expressions.is_empty() {
return Ok(None);
}
Ok(Some(Expression::Tuple(Box::new(Tuple { expressions }))))
}
/// parse_cte - Implemented from Python _parse_cte
/// Calls: parse_wrapped_id_vars
#[allow(unused_variables, unused_mut)]
pub fn parse_cte(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["USING", "KEY"]) {
return Ok(Some(Expression::Values(Box::new(Values {
expressions: Vec::new(),
alias: None,
column_aliases: Vec::new(),
}))));
}
if self.match_text_seq(&["NOT", "MATERIALIZED"]) {
// Matched: NOT MATERIALIZED
return Ok(None);
}
if self.match_text_seq(&["MATERIALIZED"]) {
// Matched: MATERIALIZED
return Ok(None);
}
Ok(None)
}
/// parse_cube_or_rollup - Ported from Python _parse_cube_or_rollup
/// Parses CUBE(...) or ROLLUP(...) expressions in GROUP BY
#[allow(unused_variables, unused_mut)]
pub fn parse_cube_or_rollup(&mut self) -> Result<Option<Expression>> {
// Check for CUBE or ROLLUP keyword
let is_cube = self.match_texts(&["CUBE"]);
let is_rollup = if !is_cube {
self.match_texts(&["ROLLUP"])
} else {
false
};
if !is_cube && !is_rollup {
return Ok(None);
}
// Parse wrapped expressions
self.expect(TokenType::LParen)?;
let mut expressions = Vec::new();
if !self.check(TokenType::RParen) {
loop {
match self.parse_bitwise() {
Ok(Some(expr)) => expressions.push(expr),
Ok(None) => break,
Err(e) => return Err(e),
}
if !self.match_token(TokenType::Comma) {
break;
}
}
}
self.expect(TokenType::RParen)?;
if is_cube {
Ok(Some(Expression::Cube(Box::new(Cube { expressions }))))
} else {
Ok(Some(Expression::Rollup(Box::new(Rollup { expressions }))))
}
}
/// parse_data_deletion_property - Implemented from Python _parse_data_deletion_property
/// Calls: parse_column, parse_retention_period
#[allow(unused_variables, unused_mut)]
pub fn parse_data_deletion_property(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["ON"]) {
// Matched: ON
return Ok(None);
}
if self.match_text_seq(&["OFF"]) {
// Matched: OFF
return Ok(None);
}
if self.match_text_seq(&["FILTER_COLUMN", "="]) {
// Matched: FILTER_COLUMN =
return Ok(None);
}
Ok(None)
}
/// parse_datablocksize - Implemented from Python _parse_datablocksize
/// Calls: parse_number
#[allow(unused_variables, unused_mut)]
pub fn parse_datablocksize(&mut self) -> Result<Option<Expression>> {
if self.match_texts(&["BYTES", "KBYTES", "KILOBYTES"]) {
// Matched one of: BYTES, KBYTES, KILOBYTES
return Ok(None);
}
Ok(None)
}
/// parse_dcolon - Delegates to parse_types
#[allow(unused_variables, unused_mut)]
pub fn parse_dcolon(&mut self) -> Result<Option<Expression>> {
self.parse_types()
}
/// parse_ddl_select - Ported from Python _parse_ddl_select
/// Parses a SELECT statement in DDL context (CREATE TABLE AS SELECT, INSERT INTO ... SELECT)
#[allow(unused_variables, unused_mut)]
pub fn parse_ddl_select(&mut self) -> Result<Option<Expression>> {
// Parse a nested SELECT statement
let select = self.parse_select_query()?;
if select.is_none() {
return Ok(None);
}
// Apply set operations (UNION, INTERSECT, EXCEPT)
let with_set_ops = self.parse_set_operations_with_expr(select)?;
// Return the result (query modifiers would be applied by parse_select_query already)
Ok(with_set_ops)
}
/// parse_for_in - BigQuery procedural FOR...IN...DO loop
/// Python: BigQuery._parse_for_in
/// Format: FOR variable IN (query) DO statement(s) END FOR
/// Example: FOR record IN (SELECT * FROM t) DO SELECT record.col
pub fn parse_for_in(&mut self) -> Result<Expression> {
// Parse: variable IN (query)
// This is handled by parse_range which produces an In expression
let this = self
.parse_range()?
.ok_or_else(|| self.parse_error("Expected expression after FOR"))?;
// Match DO keyword
self.match_text_seq(&["DO"]);
// Parse the body statement
let expression = self.parse_statement()?;
Ok(Expression::ForIn(Box::new(ForIn {
this: Box::new(this),
expression: Box::new(expression),
})))
}
/// parse_declare - Parses DECLARE statement
/// Python: _parse_declare
/// Format: DECLARE var1 type [DEFAULT expr], var2 type [DEFAULT expr], ...
pub fn parse_declare(&mut self) -> Result<Option<Expression>> {
// Check for OR REPLACE (Spark/Databricks)
let replace = self.match_text_seq(&["OR", "REPLACE"]);
// Try to parse comma-separated declare items
let mut expressions = Vec::new();
// BigQuery multi-variable DECLARE: DECLARE X, Y, Z INT64 [DEFAULT expr]
// Detect by looking ahead: if we see identifier, comma, identifier pattern
// before a data type keyword, collect all names then parse type once.
let saved = self.current;
let mut multi_names: Vec<Expression> = Vec::new();
if let Some(first_var) = self.parse_id_var()? {
// Check if next is a comma (BigQuery multi-var syntax)
if self.check(TokenType::Comma) && !self.check_identifier("CURSOR") {
// Speculatively collect comma-separated identifiers
multi_names.push(first_var);
while self.match_token(TokenType::Comma) {
if let Some(next_var) = self.parse_id_var()? {
multi_names.push(next_var);
} else {
break;
}
}
// Now check if we're at a data type (not comma, not @, not semicolon)
// If so, this is BigQuery multi-var syntax
if multi_names.len() > 1 && !self.is_at_end() && !self.check(TokenType::Semicolon) {
let data_type = self.parse_data_type()?;
let kind_str = self.data_type_to_sql(&data_type);
let default = if self.match_token(TokenType::Default)
|| self.match_token(TokenType::Eq)
{
Some(Box::new(self.parse_expression()?))
} else {
None
};
let first_name = multi_names.remove(0);
expressions.push(Expression::DeclareItem(Box::new(DeclareItem {
this: Box::new(first_name),
kind: Some(kind_str),
default,
has_as: false,
additional_names: multi_names,
})));
return Ok(Some(Expression::Declare(Box::new(Declare {
expressions,
replace,
}))));
}
}
}
// Reset and parse normally
self.current = saved;
loop {
if let Some(item) = self.parse_declareitem()? {
expressions.push(item);
} else {
break;
}
// Accept comma (TSQL/BigQuery) or semicolon (Snowflake scripting) as separator
if self.match_token(TokenType::Comma) || self.match_token(TokenType::Semicolon) {
// Stop if next token is BEGIN (end of DECLARE block)
if self.check(TokenType::Begin) {
break;
}
continue;
}
break;
}
// If we successfully parsed at least one item, return the Declare
if !expressions.is_empty() {
return Ok(Some(Expression::Declare(Box::new(Declare {
expressions,
replace,
}))));
}
Ok(None)
}
/// parse_declareitem - Parse a DECLARE item (variable declaration)
/// TSQL format: @var AS type [= expr] or @var type [= expr]
/// Also handles: DECLARE name CURSOR FOR SELECT ...
/// Also handles: DECLARE @var TABLE (col_defs)
#[allow(unused_variables, unused_mut)]
pub fn parse_declareitem(&mut self) -> Result<Option<Expression>> {
// Consume optional VAR or VARIABLE keyword (Spark/Databricks)
if self.check_identifier("VAR") || self.check_identifier("VARIABLE") {
self.skip();
}
// Parse the variable name (starts with @ or is a cursor name)
let var = if let Some(v) = self.parse_id_var()? {
v
} else {
return Ok(None);
};
// Check for CURSOR FOR syntax: DECLARE name CURSOR FOR SELECT ...
if self.check_identifier("CURSOR") {
self.skip(); // consume CURSOR
// Parse optional cursor options before FOR (e.g., SCROLL, INSENSITIVE, etc.)
// For now just look for FOR
if self.match_token(TokenType::For) {
// Capture the remaining tokens as the cursor query using tokens_to_sql for proper spacing
let start = self.current;
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
self.skip();
}
let query_str = self.tokens_to_sql_uppercased(start, self.current);
let kind_str = format!("CURSOR FOR {}", query_str);
return Ok(Some(Expression::DeclareItem(Box::new(DeclareItem {
this: Box::new(var),
kind: Some(kind_str),
default: None,
has_as: false,
additional_names: Vec::new(),
}))));
} else {
return Ok(Some(Expression::DeclareItem(Box::new(DeclareItem {
this: Box::new(var),
kind: Some("CURSOR".to_string()),
default: None,
has_as: false,
additional_names: Vec::new(),
}))));
}
}
// Parse optional AS keyword
let has_as = self.match_token(TokenType::As);
// Check for TABLE type with column definitions
if self.check(TokenType::Table) {
self.skip(); // consume TABLE
if self.match_token(TokenType::LParen) {
// Parse the TABLE column definitions using tokens_to_sql for proper spacing
let start = self.current;
let mut depth = 1;
while depth > 0 && !self.is_at_end() {
if self.check(TokenType::LParen) {
depth += 1;
}
if self.check(TokenType::RParen) {
depth -= 1;
if depth == 0 {
break;
}
}
self.skip();
}
let col_defs_str = self.tokens_to_sql_uppercased(start, self.current);
self.expect(TokenType::RParen)?;
let kind_str = format!("TABLE ({})", col_defs_str);
return Ok(Some(Expression::DeclareItem(Box::new(DeclareItem {
this: Box::new(var),
kind: Some(kind_str),
default: None,
has_as,
additional_names: Vec::new(),
}))));
} else {
return Ok(Some(Expression::DeclareItem(Box::new(DeclareItem {
this: Box::new(var),
kind: Some("TABLE".to_string()),
default: None,
has_as,
additional_names: Vec::new(),
}))));
}
}
// Check if next token is = or DEFAULT (no type, just default value)
// or if at end of statement (no type, no default)
let kind_str = if self.check(TokenType::Eq)
|| self.check(TokenType::Default)
|| self.is_at_end()
|| self.check(TokenType::Semicolon)
|| self.check(TokenType::Comma)
{
// No type specified
None
} else {
// Parse the data type
let data_type = self.parse_data_type()?;
Some(self.data_type_to_sql(&data_type))
};
// Parse optional DEFAULT value or = value (TSQL uses =)
let default = if self.match_token(TokenType::Default) || self.match_token(TokenType::Eq) {
Some(Box::new(self.parse_expression()?))
} else {
None
};
Ok(Some(Expression::DeclareItem(Box::new(DeclareItem {
this: Box::new(var),
kind: kind_str,
default,
has_as,
additional_names: Vec::new(),
}))))
}
/// Convert a DataType to its SQL string representation
fn data_type_to_sql(&self, dt: &DataType) -> String {
match dt {
DataType::Boolean => "BOOLEAN".to_string(),
DataType::TinyInt { length } => {
if let Some(n) = length {
format!("TINYINT({})", n)
} else {
"TINYINT".to_string()
}
}
DataType::SmallInt { length } => {
if let Some(n) = length {
format!("SMALLINT({})", n)
} else {
"SMALLINT".to_string()
}
}
DataType::Int {
length,
integer_spelling,
} => {
if let Some(n) = length {
if *integer_spelling {
format!("INTEGER({})", n)
} else {
format!("INT({})", n)
}
} else if *integer_spelling {
"INTEGER".to_string()
} else {
"INT".to_string()
}
}
DataType::BigInt { length } => {
if let Some(n) = length {
format!("BIGINT({})", n)
} else {
"BIGINT".to_string()
}
}
DataType::Float {
precision, scale, ..
} => match (precision, scale) {
(Some(p), Some(s)) => format!("FLOAT({}, {})", p, s),
(Some(p), None) => format!("FLOAT({})", p),
_ => "FLOAT".to_string(),
},
DataType::Double { precision, scale } => match (precision, scale) {
(Some(p), Some(s)) => format!("DOUBLE({}, {})", p, s),
(Some(p), None) => format!("DOUBLE({})", p),
_ => "DOUBLE".to_string(),
},
DataType::Decimal { precision, scale } => match (precision, scale) {
(Some(p), Some(s)) => format!("DECIMAL({}, {})", p, s),
(Some(p), None) => format!("DECIMAL({})", p),
_ => "DECIMAL".to_string(),
},
DataType::Char { length } => {
if let Some(n) = length {
format!("CHAR({})", n)
} else {
"CHAR".to_string()
}
}
DataType::VarChar { length, .. } => {
if let Some(n) = length {
format!("VARCHAR({})", n)
} else {
"VARCHAR".to_string()
}
}
DataType::Text => "TEXT".to_string(),
DataType::Date => "DATE".to_string(),
DataType::Time { precision, .. } => {
if let Some(p) = precision {
format!("TIME({})", p)
} else {
"TIME".to_string()
}
}
DataType::Timestamp { precision, .. } => {
if let Some(p) = precision {
format!("TIMESTAMP({})", p)
} else {
"TIMESTAMP".to_string()
}
}
DataType::Binary { length } => {
if let Some(n) = length {
format!("BINARY({})", n)
} else {
"BINARY".to_string()
}
}
DataType::VarBinary { length } => {
if let Some(n) = length {
format!("VARBINARY({})", n)
} else {
"VARBINARY".to_string()
}
}
DataType::Blob => "BLOB".to_string(),
DataType::String { length: Some(n) } => format!("STRING({})", n),
DataType::String { length: None } => "STRING".to_string(),
DataType::Json => "JSON".to_string(),
DataType::Uuid => "UUID".to_string(),
DataType::Custom { name } => name.clone(), // Custom types (INT64, FLOAT64, etc.)
_ => format!("{:?}", dt), // Fallback for unknown types
}
}
/// parse_decode - Ported from Python _parse_decode
/// Parses Oracle-style DECODE or simple DECODE function
/// If 3+ args: Oracle DECODE(expr, search1, result1, ..., default)
/// If 2 args: character set decode (expr, charset)
#[allow(unused_variables, unused_mut)]
pub fn parse_decode(&mut self) -> Result<Option<Expression>> {
// Parse comma-separated arguments
let mut args: Vec<Expression> = Vec::new();
loop {
match self.parse_expression() {
Ok(expr) => args.push(expr),
Err(_) => break,
}
if !self.match_token(TokenType::Comma) {
break;
}
}
if args.len() < 3 {
// Simple decode with charset
return Ok(Some(Expression::DecodeCase(Box::new(DecodeCase {
expressions: args,
}))));
}
// Oracle DECODE: first arg is the expression being compared
// Remaining args are search/result pairs, with optional default at end
Ok(Some(Expression::DecodeCase(Box::new(DecodeCase {
expressions: args,
}))))
}
/// parse_definer - MySQL DEFINER property
/// Parses: DEFINER = user@host
#[allow(unused_variables, unused_mut)]
pub fn parse_definer(&mut self) -> Result<Option<Expression>> {
// Optionally consume = sign
self.match_token(TokenType::Eq);
// Parse the user part
let user = self.parse_id_var()?;
if user.is_none() {
return Ok(None);
}
// Expect @ symbol
if !self.match_token(TokenType::DAt) {
return Ok(None);
}
// Parse the host part (can be identifier or % wildcard)
let host = if let Some(id) = self.parse_id_var()? {
id
} else if self.match_token(TokenType::Mod) {
// % wildcard for any host
Expression::Identifier(Identifier::new(self.previous().text.clone()))
} else {
return Ok(None);
};
// Combine user@host into a string
let user_str = match &user {
Some(Expression::Identifier(id)) => id.name.clone(),
_ => "".to_string(),
};
let host_str = match &host {
Expression::Identifier(id) => id.name.clone(),
_ => "".to_string(),
};
let definer_str = format!("{}@{}", user_str, host_str);
Ok(Some(Expression::DefinerProperty(Box::new(
DefinerProperty {
this: Box::new(Expression::Literal(Box::new(Literal::String(definer_str)))),
},
))))
}
/// parse_derived_table_values - Implemented from Python _parse_derived_table_values
#[allow(unused_variables, unused_mut)]
pub fn parse_derived_table_values(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["VALUES"]) {
return Ok(Some(Expression::Values(Box::new(Values {
expressions: Vec::new(),
alias: None,
column_aliases: Vec::new(),
}))));
}
if self.match_text_seq(&["FORMAT", "VALUES"]) {
// Matched: FORMAT VALUES
return Ok(None);
}
Ok(None)
}
/// parse_dict_property - ClickHouse dictionary property
/// Parses: property_name(kind(key1 value1, key2 value2, ...))
/// property_name should be the already matched property keyword (LAYOUT, SOURCE, etc.)
#[allow(unused_variables, unused_mut)]
pub fn parse_dict_property(&mut self, property_name: &str) -> Result<Option<Expression>> {
// Expect opening paren
if !self.match_token(TokenType::LParen) {
return Ok(None);
}
// Parse the kind (e.g., HASHED, FLAT, CLICKHOUSE, CACHE, etc.)
// Accept Var, Identifier, or keyword tokens as the kind name
let kind_str = if self.is_identifier_token() || self.check_keyword() {
self.advance().text.clone()
} else {
String::new()
};
if kind_str.is_empty() {
return Err(self.parse_error("Expected dictionary property kind"));
}
// Parse optional settings in nested parens
let settings = if self.match_token(TokenType::LParen) {
let mut setting_pairs = Vec::new();
loop {
let key = if let Some(k) = self.parse_id_var()? {
Some(k)
} else if self.is_safe_keyword_as_identifier() || self.check_keyword() {
let name = self.advance().text.clone();
Some(Expression::Identifier(Identifier::new(name)))
} else if !self.check(TokenType::RParen) && !self.check(TokenType::Comma) {
let name = self.advance().text.clone();
Some(Expression::Identifier(Identifier::new(name)))
} else {
None
};
// ClickHouse: STRUCTURE (...) contains column defs without commas — consume balanced parens
let is_structure = key.as_ref().map_or(false, |k| {
matches!(k, Expression::Identifier(id) if id.name.eq_ignore_ascii_case("STRUCTURE"))
});
let value = if is_structure && self.check(TokenType::LParen) {
let mut raw = String::new();
let mut depth = 0i32;
while !self.is_at_end() {
let tok = self.advance();
match tok.token_type {
TokenType::LParen => {
depth += 1;
raw.push('(');
}
TokenType::RParen => {
depth -= 1;
if depth == 0 {
raw.push(')');
break;
}
raw.push(')');
}
_ => {
if !raw.is_empty() && !raw.ends_with('(') {
raw.push(' ');
}
raw.push_str(&tok.text);
}
}
}
Some(Expression::Var(Box::new(Var { this: raw })))
} else {
self.parse_primary_or_var()?
};
if key.is_none() && value.is_none() {
break;
}
if let (Some(k), Some(v)) = (key, value) {
// Store as a tuple-like expression
setting_pairs.push(Expression::Tuple(Box::new(Tuple {
expressions: vec![k, v],
})));
}
// ClickHouse dict properties are space-separated, not comma-separated
// e.g. SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() DB 'test'))
// Accept optional comma but don't require it
self.match_token(TokenType::Comma);
// Break if we see RParen (end of settings)
if self.check(TokenType::RParen) {
break;
}
}
self.expect(TokenType::RParen)?;
if !setting_pairs.is_empty() {
Some(Box::new(Expression::Tuple(Box::new(Tuple {
expressions: setting_pairs,
}))))
} else {
None
}
} else {
None
};
self.expect(TokenType::RParen)?;
Ok(Some(Expression::DictProperty(Box::new(DictProperty {
this: Box::new(Expression::Identifier(Identifier::new(
property_name.to_string(),
))),
kind: kind_str,
settings,
}))))
}
/// parse_dict_range - Implemented from Python _parse_dict_range
/// Parses dictionary range specification: (MIN min_val MAX max_val) or (max_val)
pub fn parse_dict_range(&mut self, property_name: &str) -> Result<Option<Expression>> {
// Expect opening paren
self.expect(TokenType::LParen)?;
// Prefer id/var first for dictionary bounds to avoid function-keyword ambiguity
// such as `MIN discount_start_date MAX discount_end_date`.
let parse_bound = |parser: &mut Parser| -> Result<Option<Expression>> {
// Handle negative numbers: -1, -100, etc.
if parser.check(TokenType::Dash)
&& parser
.peek_nth(1)
.is_some_and(|t| t.token_type == TokenType::Number)
{
parser.advance(); // consume -
let num = parser.advance().text.clone();
return Ok(Some(Expression::Literal(Box::new(Literal::Number(
format!("-{}", num),
)))));
}
if let Some(id) = parser.parse_id_var()? {
return Ok(Some(id));
}
parser.parse_primary_or_var()
};
let (min_val, max_val) = if self.peek().text.eq_ignore_ascii_case("MIN") {
self.skip(); // consume MIN
let min = parse_bound(self)?;
if self.peek().text.eq_ignore_ascii_case("MAX") {
self.skip(); // consume MAX
}
let max = parse_bound(self)?;
(min, max)
} else {
let max = parse_bound(self)?;
let min = Some(Expression::Literal(Box::new(Literal::Number(
"0".to_string(),
))));
(min, max)
};
// Match closing paren
self.expect(TokenType::RParen)?;
Ok(Some(Expression::DictRange(Box::new(DictRange {
this: Box::new(Expression::Var(Box::new(Var {
this: property_name.to_string(),
}))),
min: min_val.map(Box::new),
max: max_val.map(Box::new),
}))))
}
/// parse_disjunction - Parses OR expressions
/// Python: _parse_disjunction
/// Delegates to the existing parse_or in the operator precedence chain
pub fn parse_disjunction(&mut self) -> Result<Option<Expression>> {
match self.parse_or() {
Ok(expr) => Ok(Some(expr)),
Err(_) => Ok(None),
}
}
/// parse_distkey - Redshift DISTKEY property for distribution key
/// Parses: DISTKEY(column_name)
#[allow(unused_variables, unused_mut)]
pub fn parse_distkey(&mut self) -> Result<Option<Expression>> {
// Parse wrapped column identifier (in parentheses)
if !self.match_token(TokenType::LParen) {
return Ok(None);
}
let column = self.parse_id_var()?;
if column.is_none() {
return Ok(None);
}
self.match_token(TokenType::RParen);
Ok(Some(Expression::DistKeyProperty(Box::new(
DistKeyProperty {
this: Box::new(column.unwrap()),
},
))))
}
/// parse_distributed_property - Implemented from Python _parse_distributed_property
#[allow(unused_variables, unused_mut)]
/// parse_distributed_property - Parses DISTRIBUTED BY property
/// Python: parser.py:2462-2481
pub fn parse_distributed_property(&mut self) -> Result<Option<Expression>> {
let mut kind = "HASH".to_string();
let mut expressions = Vec::new();
if self.match_text_seq(&["BY", "HASH"]) {
// Parse column list: (col1, col2, ...)
if let Some(wrapped) = self.parse_wrapped_id_vars()? {
if let Expression::Tuple(t) = wrapped {
expressions = t.expressions;
}
}
} else if self.match_text_seq(&["BY", "RANDOM"]) {
kind = "RANDOM".to_string();
} else {
return Ok(None);
}
// Parse optional BUCKETS
let buckets = if self.match_text_seq(&["BUCKETS"]) {
if !self.match_text_seq(&["AUTO"]) {
self.parse_number()?
} else {
None
}
} else {
None
};
// Parse optional ORDER BY
let order = self.parse_order()?;
Ok(Some(Expression::DistributedByProperty(Box::new(
DistributedByProperty {
expressions,
kind,
buckets: buckets.map(Box::new),
order: order.map(Box::new),
},
))))
}
/// Parse DROP COLUMN in ALTER TABLE
/// Note: Main ALTER TABLE DROP COLUMN logic is in parse_alter_table -> AlterTableAction::DropColumn
pub fn parse_drop_column(&mut self) -> Result<Option<Expression>> {
// Optionally match COLUMN keyword
self.match_token(TokenType::Column);
// Parse IF EXISTS
let _if_exists = self.match_keywords(&[TokenType::If, TokenType::Exists]);
// Parse the column identifier
if let Some(column) = self.parse_identifier()? {
// Check for CASCADE
let _cascade = self.match_text_seq(&["CASCADE"]);
// Return the column as an identifier (the caller handles the drop semantics)
Ok(Some(column))
} else {
Ok(None)
}
}
/// Parse DROP PARTITION in ALTER TABLE
/// Note: Main ALTER TABLE DROP PARTITION logic is in parse_alter_table -> AlterTableAction::DropPartition
pub fn parse_drop_partition(&mut self) -> Result<Option<Expression>> {
self.parse_drop_partition_with_exists(false)
}
/// Parse DROP PARTITION with exists flag
pub fn parse_drop_partition_with_exists(&mut self, exists: bool) -> Result<Option<Expression>> {
// Parse one or more partitions
let mut partitions = Vec::new();
loop {
// Parse PARTITION (key = value, ...)
if self.match_token(TokenType::Partition) {
if self.match_token(TokenType::LParen) {
// Parse partition expressions
let mut exprs = Vec::new();
loop {
let expr = self.parse_expression()?;
exprs.push(expr);
if !self.match_token(TokenType::Comma) {
break;
}
}
self.match_token(TokenType::RParen);
partitions.push(Expression::Tuple(Box::new(Tuple { expressions: exprs })));
}
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
if partitions.is_empty() {
Ok(None)
} else {
Ok(Some(Expression::DropPartition(Box::new(DropPartition {
expressions: partitions,
exists,
}))))
}
}
/// parse_equality - Parses comparison/equality expressions (= <> < > <= >=)
/// Python: _parse_equality
/// Delegates to the existing parse_comparison in the operator precedence chain
pub fn parse_equality(&mut self) -> Result<Option<Expression>> {
match self.parse_comparison() {
Ok(expr) => Ok(Some(expr)),
Err(_) => Ok(None),
}
}
/// parse_escape - Parses ESCAPE clause for LIKE patterns
/// Python: _parse_escape
/// Returns the escape character/expression if ESCAPE keyword is found
pub fn parse_escape(&mut self) -> Result<Option<Expression>> {
if !self.match_token(TokenType::Escape) {
return Ok(None);
}
// Parse escape character (usually a string like '\')
if let Some(escape_char) = self.parse_string()? {
return Ok(Some(escape_char));
}
// Or parse NULL
if let Some(null_expr) = self.parse_null()? {
return Ok(Some(null_expr));
}
Ok(None)
}
/// parse_exists - Implemented from Python _parse_exists
#[allow(unused_variables, unused_mut)]
pub fn parse_exists(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["IF"]) {
// Matched: IF
return Ok(None);
}
Ok(None)
}
/// parse_exponent - Parses exponent/power expressions
/// Python: _parse_exponent
/// In most dialects, EXPONENT is empty, so this delegates to parse_unary
pub fn parse_exponent(&mut self) -> Result<Option<Expression>> {
match self.parse_unary() {
Ok(expr) => Ok(Some(expr)),
Err(_) => Ok(None),
}
}
/// parse_expressions - Parse comma-separated expressions
/// Returns a Tuple containing all expressions, or None if empty
#[allow(unused_variables, unused_mut)]
pub fn parse_expressions(&mut self) -> Result<Option<Expression>> {
let expressions = self.parse_expression_list()?;
if expressions.is_empty() {
return Ok(None);
}
if expressions.len() == 1 {
return Ok(expressions.into_iter().next());
}
Ok(Some(Expression::Tuple(Box::new(Tuple { expressions }))))
}
/// parse_extract - Ported from Python _parse_extract
/// Parses EXTRACT(field FROM expression) function
#[allow(unused_variables, unused_mut)]
pub fn parse_extract(&mut self) -> Result<Option<Expression>> {
// Parse the field (YEAR, MONTH, DAY, HOUR, etc.)
let field_name = if self.check(TokenType::Identifier) || self.check(TokenType::Var) {
let token = self.advance();
token.text.to_ascii_uppercase()
} else {
return Ok(None);
};
// Convert field name to DateTimeField
let field = match field_name.as_str() {
"YEAR" => DateTimeField::Year,
"MONTH" => DateTimeField::Month,
"DAY" => DateTimeField::Day,
"HOUR" => DateTimeField::Hour,
"MINUTE" => DateTimeField::Minute,
"SECOND" => DateTimeField::Second,
"MILLISECOND" | "MILLISECONDS" | "MS" => DateTimeField::Millisecond,
"MICROSECOND" | "MICROSECONDS" | "US" => DateTimeField::Microsecond,
"DOW" | "DAYOFWEEK" => DateTimeField::DayOfWeek,
"DOY" | "DAYOFYEAR" => DateTimeField::DayOfYear,
"WEEK" => DateTimeField::Week,
"QUARTER" => DateTimeField::Quarter,
"EPOCH" => DateTimeField::Epoch,
"TIMEZONE" => DateTimeField::Timezone,
"TIMEZONE_HOUR" => DateTimeField::TimezoneHour,
"TIMEZONE_MINUTE" => DateTimeField::TimezoneMinute,
"DATE" => DateTimeField::Date,
"TIME" => DateTimeField::Time,
other => DateTimeField::Custom(other.to_string()),
};
// Expect FROM or comma
if !self.match_token(TokenType::From) && !self.match_token(TokenType::Comma) {
return Err(self.parse_error("Expected FROM or comma after EXTRACT field"));
}
// Parse the expression to extract from
let expression = self.parse_bitwise()?;
let this = match expression {
Some(expr) => self.try_clickhouse_func_arg_alias(expr),
None => return Err(self.parse_error("Expected expression after FROM in EXTRACT")),
};
Ok(Some(Expression::Extract(Box::new(ExtractFunc {
this,
field,
}))))
}
/// parse_factor - Parses multiplication/division expressions (* / % operators)
/// Python: _parse_factor
/// Delegates to the existing parse_multiplication in the operator precedence chain
pub fn parse_factor(&mut self) -> Result<Option<Expression>> {
// Delegate to the existing multiplication parsing
match self.parse_multiplication() {
Ok(expr) => Ok(Some(expr)),
Err(_) => Ok(None),
}
}
/// parse_fallback - Implemented from Python _parse_fallback
#[allow(unused_variables, unused_mut)]
pub fn parse_fallback(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["PROTECTION"]) {
return Ok(Some(Expression::FallbackProperty(Box::new(
FallbackProperty {
no: None,
protection: None,
},
))));
}
Ok(None)
}
/// parse_field - Parse a field (column name, literal, or expression)
/// Python: field = self._parse_primary() or self._parse_function() or self._parse_id_var()
pub fn parse_field(&mut self) -> Result<Option<Expression>> {
// Try parsing literals first
if let Some(expr) = self.parse_string()? {
return Ok(Some(expr));
}
if let Some(expr) = self.parse_number()? {
return Ok(Some(expr));
}
if let Some(expr) = self.parse_boolean()? {
return Ok(Some(expr));
}
if let Some(expr) = self.parse_null()? {
return Ok(Some(expr));
}
if let Some(expr) = self.parse_star()? {
return Ok(Some(expr));
}
// Try parsing identifier
if let Some(expr) = self.parse_identifier()? {
return Ok(Some(expr));
}
// Try parsing a variable/identifier
if let Some(expr) = self.parse_var()? {
return Ok(Some(expr));
}
// Allow keywords as identifiers in field context (e.g., "schema" as a field name)
if self.check_keyword() {
let token = self.advance();
return Ok(Some(Expression::Identifier(Identifier {
name: token.text,
quoted: false,
trailing_comments: Vec::new(),
span: None,
})));
}
Ok(None)
}
/// parse_field_def - Ported from Python _parse_field_def
/// Parses a field definition (column name + type + optional constraints)
#[allow(unused_variables, unused_mut)]
pub fn parse_field_def(&mut self) -> Result<Option<Expression>> {
// First parse the field name (identifier)
let field = self.parse_field()?;
if field.is_none() {
return Ok(None);
}
// Parse the column definition with the field as the name
self.parse_column_def_with_field(field)
}
/// Helper to parse a column definition with a pre-parsed field name
fn parse_column_def_with_field(
&mut self,
field: Option<Expression>,
) -> Result<Option<Expression>> {
if field.is_none() {
return Ok(None);
}
let this = field.unwrap();
// Get the identifier from the expression and preserve quoted-identifier state.
let name_ident = match &this {
Expression::Column(col) => col.name.clone(),
Expression::Identifier(id) => id.clone(),
Expression::Var(v) => Identifier::new(v.this.clone()),
_ => return Ok(None),
};
// Parse the data type using parse_data_type_optional (which handles unknown types gracefully)
let data_type = match self.parse_data_type_optional()? {
Some(dt) => dt,
None => DataType::Unknown,
};
// Create ColumnDef with default values
let mut col_def = ColumnDef::new(name_ident.name.clone(), data_type);
col_def.name = name_ident;
// Check for FOR ORDINALITY (JSON table columns)
if self.match_text_seq(&["FOR", "ORDINALITY"]) {
return Ok(Some(Expression::ColumnDef(Box::new(col_def))));
}
// Parse constraints and extract specific constraint values
loop {
if let Some(constraint) = self.parse_column_constraint()? {
// Check specific constraint types
match &constraint {
Expression::NotNullColumnConstraint(_) => {
col_def.nullable = Some(false);
col_def.constraints.push(ColumnConstraint::NotNull);
}
Expression::PrimaryKeyColumnConstraint(_) => {
col_def.primary_key = true;
col_def.constraints.push(ColumnConstraint::PrimaryKey);
}
Expression::UniqueColumnConstraint(_) => {
col_def.unique = true;
col_def.constraints.push(ColumnConstraint::Unique);
}
Expression::DefaultColumnConstraint(dc) => {
col_def.default = Some((*dc.this).clone());
col_def
.constraints
.push(ColumnConstraint::Default((*dc.this).clone()));
}
Expression::AutoIncrementColumnConstraint(_) => {
col_def.auto_increment = true;
}
Expression::CommentColumnConstraint(_) => {
// Comment is a unit struct, we'd need the actual comment text
}
Expression::CheckColumnConstraint(cc) => {
col_def
.constraints
.push(ColumnConstraint::Check((*cc.this).clone()));
}
Expression::PathColumnConstraint(pc) => {
col_def
.constraints
.push(ColumnConstraint::Path((*pc.this).clone()));
col_def.constraint_order.push(ConstraintType::Path);
}
_ => {}
}
} else if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.match_identifier("ALIAS")
{
// ClickHouse: ALIAS expr
let expr = self.parse_or()?;
col_def.alias_expr = Some(Box::new(expr));
} else if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::Materialized)
&& !self.check_next(TokenType::View)
{
// ClickHouse: MATERIALIZED expr
self.skip(); // consume MATERIALIZED
let expr = self.parse_or()?;
col_def.materialized_expr = Some(Box::new(expr));
} else if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.match_identifier("EPHEMERAL")
{
// ClickHouse: EPHEMERAL [expr]
if !self.check(TokenType::Comma)
&& !self.check(TokenType::RParen)
&& !self.is_at_end()
&& !self.check_identifier("CODEC")
&& !self.check_identifier("TTL")
&& !self.check(TokenType::Comment)
{
let expr = self.parse_bitwise()?.unwrap_or(Expression::Null(Null));
col_def.ephemeral = Some(Some(Box::new(expr)));
} else {
col_def.ephemeral = Some(None);
}
} else if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check_identifier("CODEC")
{
// ClickHouse: CODEC(LZ4HC(9), ZSTD, DELTA)
self.skip(); // consume CODEC
self.expect(TokenType::LParen)?;
let start = self.current;
let mut depth = 1;
while !self.is_at_end() && depth > 0 {
if self.check(TokenType::LParen) {
depth += 1;
}
if self.check(TokenType::RParen) {
depth -= 1;
if depth == 0 {
break;
}
}
self.skip();
}
let codec_text = self.tokens_to_sql(start, self.current);
self.expect(TokenType::RParen)?;
col_def.codec = Some(codec_text);
} else if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.match_identifier("TTL")
{
// ClickHouse: TTL expr
let expr = self.parse_expression()?;
col_def.ttl_expr = Some(Box::new(expr));
} else {
break;
}
}
Ok(Some(Expression::ColumnDef(Box::new(col_def))))
}
/// parse_foreign_key - Implemented from Python _parse_foreign_key
/// Calls: parse_key_constraint_options, parse_wrapped_id_vars, parse_references
#[allow(unused_variables, unused_mut)]
pub fn parse_foreign_key(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["NO", "ACTION"]) {
return Ok(Some(Expression::ForeignKey(Box::new(ForeignKey {
expressions: Vec::new(),
reference: None,
delete: None,
update: None,
options: Vec::new(),
}))));
}
Ok(None)
}
/// parse_format_json - Implemented from Python _parse_format_json
#[allow(unused_variables, unused_mut)]
pub fn parse_format_json(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["FORMAT", "JSON"]) {
// Matched: FORMAT JSON
return Ok(None);
}
Ok(None)
}
/// parse_format_name - Snowflake FILE_FORMAT = format_name property
/// Parses: format_name (string or identifier)
#[allow(unused_variables, unused_mut)]
pub fn parse_format_name(&mut self) -> Result<Option<Expression>> {
// Try to parse a string first, then fall back to table parts
let value = if let Some(s) = self.parse_string()? {
s
} else if let Some(tp) = self.parse_table_parts()? {
tp
} else {
return Ok(None);
};
Ok(Some(Expression::Property(Box::new(Property {
this: Box::new(Expression::Identifier(Identifier::new(
"FORMAT_NAME".to_string(),
))),
value: Some(Box::new(value)),
}))))
}
/// parse_freespace - Teradata FREESPACE property
/// Parses: FREESPACE = number [PERCENT]
#[allow(unused_variables, unused_mut)]
pub fn parse_freespace(&mut self) -> Result<Option<Expression>> {
// Optionally consume = sign
self.match_token(TokenType::Eq);
// Parse the number value
let this = self.parse_number()?;
if this.is_none() {
return Ok(None);
}
// Check for PERCENT keyword
let percent = if self.match_token(TokenType::Percent) {
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
})))
} else {
None
};
Ok(Some(Expression::FreespaceProperty(Box::new(
FreespaceProperty {
this: Box::new(this.unwrap()),
percent,
},
))))
}
/// parse_function - Ported from Python _parse_function
/// Parses function calls like func_name(args) or {fn func_name(args)} (ODBC syntax)
pub fn parse_function(&mut self) -> Result<Option<Expression>> {
// Check for ODBC escape syntax: {fn function_call}
let fn_syntax = if self.check(TokenType::LBrace) {
if let Some(next) = self.tokens.get(self.current + 1) {
if next.text.eq_ignore_ascii_case("FN") {
self.skip(); // consume {
self.skip(); // consume FN
true
} else {
false
}
} else {
false
}
} else {
false
};
let func = self.parse_function_call()?;
if fn_syntax {
self.match_token(TokenType::RBrace);
}
Ok(func)
}
/// parse_function_args - Ported from Python _parse_function_args
/// Parses the arguments inside a function call, handling aliases and key-value pairs
pub fn parse_function_args_list(&mut self) -> Result<Vec<Expression>> {
let mut args = Vec::new();
if self.check(TokenType::RParen) {
return Ok(args);
}
loop {
let is_table_or_model_arg = !self.is_at_end()
&& (self.check(TokenType::Table) || self.peek().text.eq_ignore_ascii_case("MODEL"));
// Try to parse expression with optional alias
let expr = if is_table_or_model_arg {
let prefix = self.peek().text.to_ascii_uppercase();
let saved_pos = self.current;
self.skip(); // consume TABLE or MODEL
if !self.is_at_end()
&& !self.check(TokenType::FArrow)
&& !self.check(TokenType::ColonEq)
{
if let Some(table_expr) = self.parse_table_parts()? {
Some(Expression::TableArgument(Box::new(TableArgument {
prefix,
this: table_expr,
})))
} else {
self.current = saved_pos;
self.parse_assignment()?
}
} else {
self.current = saved_pos;
self.parse_assignment()?
}
} else {
self.parse_assignment()?
};
if let Some(expr) = expr {
// Handle explicit AS alias inside function args (e.g. `tuple(1 AS "a", 2 AS "b")`)
if self.match_token(TokenType::As) {
let alias_token = self.advance();
let alias_name = if alias_token.token_type == TokenType::QuotedIdentifier {
// Preserve quoted identifiers
let raw = alias_token.text.clone();
let mut ident = Identifier::new(raw);
ident.quoted = true;
ident
} else {
Identifier::new(alias_token.text.clone())
};
args.push(Expression::Alias(Box::new(crate::expressions::Alias {
this: expr,
alias: alias_name,
column_aliases: Vec::new(),
pre_alias_comments: Vec::new(),
trailing_comments: Vec::new(),
inferred_type: None,
})));
} else {
args.push(expr);
}
}
if !self.match_token(TokenType::Comma) {
break;
}
}
Ok(args)
}
/// parse_function_call - Ported from Python _parse_function_call
/// Parses a function call expression like func_name(arg1, arg2, ...)
pub fn parse_function_call(&mut self) -> Result<Option<Expression>> {
if self.is_at_end() {
return Ok(None);
}
let token = self.peek().clone();
let token_type = token.token_type.clone();
let name = token.text.clone();
let _upper_name = name.to_ascii_uppercase();
// Check for no-paren functions like CURRENT_DATE, CURRENT_TIMESTAMP
if self.is_no_paren_function() {
// Check if next token is NOT a paren (so it's used without parens)
if !self.check_next(TokenType::LParen) {
self.skip();
return Ok(Some(Expression::Function(Box::new(Function {
name, // Preserve original case; generator handles normalization
args: Vec::new(),
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: true,
quoted: false,
span: None,
inferred_type: None,
}))));
}
}
// Must be followed by left paren
if !self.check_next(TokenType::LParen) {
return Ok(None);
}
// Token must be a valid function name token
let is_valid_func_token = matches!(
token_type,
TokenType::Identifier
| TokenType::Var
| TokenType::If
| TokenType::Left
| TokenType::Right
| TokenType::Insert
| TokenType::Replace
| TokenType::Row
| TokenType::Index
);
if !is_valid_func_token {
return Ok(None);
}
self.skip(); // consume function name
self.skip(); // consume (
// Check for DISTINCT keyword
let distinct = self.match_token(TokenType::Distinct);
// Parse arguments
let args = self.parse_function_args_list()?;
self.match_token(TokenType::RParen);
// Handle window specifications
let func_expr = Expression::Function(Box::new(Function {
name, // Preserve original case; generator handles normalization
args,
distinct,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}));
// Check for OVER clause (window function)
if self.match_token(TokenType::Over) {
// Parse window spec - create a simple WindowSpec
if self.match_token(TokenType::LParen) {
// Use parse_window_spec_inner to handle DISTRIBUTE BY/SORT BY (Hive)
let spec = self.parse_window_spec_inner()?;
self.expect(TokenType::RParen)?;
if let Some(spec_expr) = spec {
return Ok(Some(spec_expr));
}
}
}
Ok(Some(func_expr))
}
/// parse_function_parameter - Ported from Python _parse_function_parameter
/// Parses a function parameter in CREATE FUNCTION (name type [DEFAULT expr])
pub fn parse_function_parameter(&mut self) -> Result<Option<Expression>> {
// Parse optional parameter mode (IN, OUT, INOUT)
let _mode = if self.match_texts(&["IN"]) {
if self.match_texts(&["OUT"]) {
Some(ParameterMode::InOut)
} else {
Some(ParameterMode::In)
}
} else if self.match_texts(&["OUT"]) {
Some(ParameterMode::Out)
} else if self.match_texts(&["INOUT"]) {
Some(ParameterMode::InOut)
} else {
None
};
// Parse parameter name (optional in some dialects)
let name_expr = self.parse_id_var()?;
let name = name_expr.and_then(|n| match n {
Expression::Identifier(id) => Some(id),
_ => None,
});
// Parse data type - returns Result<DataType>, not Result<Option<DataType>>
// We need to handle the case where we can't parse a data type
let data_type_result = self.parse_data_type();
let _data_type = match data_type_result {
Ok(dt) => dt,
Err(_) => return Ok(None),
};
// Parse optional DEFAULT value
let _default = if self.match_token(TokenType::Default) || self.match_texts(&["="]) {
self.parse_disjunction()?
} else {
None
};
// Return the name as a Column expression
Ok(Some(Expression::boxed_column(Column {
name: Identifier {
name: name.map(|n| n.name).unwrap_or_default(),
quoted: false,
trailing_comments: Vec::new(),
span: None,
},
table: None,
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
})))
}
/// parse_gap_fill - Ported from Python _parse_gap_fill
#[allow(unused_variables, unused_mut)]
/// parse_gap_fill - Parses GAP_FILL function for time series
/// Example: GAP_FILL(TABLE t, ts_column, bucket_width, partitioning_columns, value_columns)
pub fn parse_gap_fill(&mut self) -> Result<Option<Expression>> {
// Optional TABLE keyword
self.match_token(TokenType::Table);
// Parse the table reference
let this = self.parse_table()?;
if this.is_none() {
return Ok(None);
}
// Parse comma-separated arguments
self.match_token(TokenType::Comma);
let mut args = self.parse_expression_list()?;
// Extract arguments by position
let ts_column = args.get(0).cloned().map(Box::new);
let bucket_width = args.get(1).cloned().map(Box::new);
let partitioning_columns = args.get(2).cloned().map(Box::new);
let value_columns = args.get(3).cloned().map(Box::new);
Ok(Some(Expression::GapFill(Box::new(GapFill {
this: Box::new(this.unwrap()),
ts_column,
bucket_width,
partitioning_columns,
value_columns,
origin: None,
ignore_nulls: None,
}))))
}
/// parse_semantic_view - Parse Snowflake SEMANTIC_VIEW function
/// Example: SEMANTIC_VIEW(foo METRICS a.b, a.c DIMENSIONS a.b, a.c WHERE a.b > '1995-01-01')
pub fn parse_semantic_view(&mut self) -> Result<Expression> {
// Parse the table/view reference as a primary expression (identifier or qualified name)
let this = self.parse_primary()?;
let mut metrics = None;
let mut dimensions = None;
let mut facts = None;
let mut where_clause = None;
// Parse optional clauses: METRICS, DIMENSIONS, FACTS, WHERE
while !self.check(TokenType::RParen) && !self.is_at_end() {
if self.match_identifier("METRICS") {
// Parse comma-separated expressions until next keyword or )
let exprs = self.parse_semantic_view_list()?;
metrics = Some(Box::new(Expression::Tuple(Box::new(Tuple {
expressions: exprs,
}))));
} else if self.match_identifier("DIMENSIONS") {
let exprs = self.parse_semantic_view_list()?;
dimensions = Some(Box::new(Expression::Tuple(Box::new(Tuple {
expressions: exprs,
}))));
} else if self.match_identifier("FACTS") {
let exprs = self.parse_semantic_view_list()?;
facts = Some(Box::new(Expression::Tuple(Box::new(Tuple {
expressions: exprs,
}))));
} else if self.match_token(TokenType::Where) {
// Parse the WHERE expression
where_clause = Some(Box::new(self.parse_expression()?));
// WHERE is the last clause, break after parsing it
break;
} else {
// Unknown token
break;
}
}
Ok(Expression::SemanticView(Box::new(SemanticView {
this: Box::new(this),
metrics,
dimensions,
facts,
where_: where_clause,
})))
}
/// Helper to parse comma-separated expression list for SEMANTIC_VIEW clauses
/// Stops at METRICS, DIMENSIONS, FACTS, WHERE, or )
/// Each element can have an optional AS alias: expr AS name
fn parse_semantic_view_list(&mut self) -> Result<Vec<Expression>> {
let first = self.parse_semantic_view_element()?;
let mut exprs = vec![first];
while self.match_token(TokenType::Comma) {
// Check if next token is a keyword that starts a new clause
if self.check_identifier("METRICS")
|| self.check_identifier("DIMENSIONS")
|| self.check_identifier("FACTS")
|| self.check(TokenType::Where)
|| self.check(TokenType::RParen)
{
break;
}
exprs.push(self.parse_semantic_view_element()?);
}
Ok(exprs)
}
/// Parse a single SEMANTIC_VIEW element: expression [AS alias]
fn parse_semantic_view_element(&mut self) -> Result<Expression> {
let expr = self
.parse_disjunction()?
.ok_or_else(|| self.parse_error("Expected expression in SEMANTIC_VIEW clause"))?;
// Check for optional explicit AS alias
if self.match_token(TokenType::As) {
let alias = self.expect_identifier_or_keyword_with_quoted()?;
Ok(Expression::Alias(Box::new(crate::expressions::Alias {
this: expr,
alias,
column_aliases: Vec::new(),
pre_alias_comments: Vec::new(),
trailing_comments: Vec::new(),
inferred_type: None,
})))
} else {
Ok(expr)
}
}
/// parse_grant_principal - Implemented from Python _parse_grant_principal
/// Calls: parse_id_var
#[allow(unused_variables, unused_mut)]
pub fn parse_grant_principal(&mut self) -> Result<Option<Expression>> {
if self.match_texts(&["ROLE", "GROUP"]) {
// Matched one of: ROLE, GROUP
return Ok(None);
}
Ok(None)
}
/// parse_grant_privilege - Parse a single privilege in GRANT/REVOKE
/// Parses: SELECT, INSERT, UPDATE(col1, col2), DELETE, etc.
#[allow(unused_variables, unused_mut)]
pub fn parse_grant_privilege(&mut self) -> Result<Option<Expression>> {
// Collect privilege keywords (SELECT, INSERT, UPDATE, DELETE, ALL PRIVILEGES, etc.)
let mut privilege_parts = Vec::new();
// Keep consuming keywords until we hit a follow token
// Follow tokens are: comma, ON, left paren
while !self.is_at_end() {
// Check if we've hit a follow token
if self.check(TokenType::Comma)
|| self.check(TokenType::On)
|| self.check(TokenType::LParen)
{
break;
}
// Get the current token text
let text = self.peek().text.to_ascii_uppercase();
privilege_parts.push(text);
self.skip();
}
if privilege_parts.is_empty() {
return Ok(None);
}
let privilege_str = privilege_parts.join(" ");
// Check for column list in parentheses (e.g., UPDATE(col1, col2))
let expressions = if self.match_token(TokenType::LParen) {
let mut columns = Vec::new();
loop {
if let Some(col) = self.parse_column()? {
columns.push(col);
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.match_token(TokenType::RParen);
columns
} else {
Vec::new()
};
Ok(Some(Expression::GrantPrivilege(Box::new(GrantPrivilege {
this: Box::new(Expression::Identifier(Identifier::new(privilege_str))),
expressions,
}))))
}
/// parse_grant_revoke_common - Parses common parts of GRANT/REVOKE statements
/// Python: _parse_grant_revoke_common
/// Returns a Tuple containing (privileges, kind, securable)
pub fn parse_grant_revoke_common(&mut self) -> Result<Option<Expression>> {
// Parse privileges (CSV of grant privileges)
let mut privileges = Vec::new();
loop {
if let Some(priv_expr) = self.parse_grant_privilege()? {
privileges.push(priv_expr);
}
if !self.match_token(TokenType::Comma) {
break;
}
}
// Match ON keyword
self.match_token(TokenType::On);
// Parse kind (TABLE, VIEW, SCHEMA, DATABASE, etc.)
let kind = if self.match_texts(&[
"TABLE",
"VIEW",
"SCHEMA",
"DATABASE",
"SEQUENCE",
"FUNCTION",
"PROCEDURE",
"INDEX",
"TYPE",
"TABLESPACE",
"ROLE",
"USER",
]) {
let kind_text = self.previous().text.to_ascii_uppercase();
Some(Expression::Var(Box::new(Var { this: kind_text })))
} else {
None
};
// Try to parse securable (table parts)
let securable = self.parse_table_parts()?;
// Return as Tuple with three elements: privileges_list, kind, securable
let privileges_expr = Expression::Tuple(Box::new(Tuple {
expressions: privileges,
}));
let mut result_exprs = vec![privileges_expr];
if let Some(k) = kind {
result_exprs.push(k);
} else {
result_exprs.push(Expression::Null(Null));
}
if let Some(s) = securable {
result_exprs.push(s);
} else {
result_exprs.push(Expression::Null(Null));
}
Ok(Some(Expression::Tuple(Box::new(Tuple {
expressions: result_exprs,
}))))
}
/// parse_group - Parse GROUP BY clause
/// Python: if not self._match(TokenType.GROUP_BY): return None; expressions = self._parse_csv(self._parse_disjunction)
pub fn parse_group(&mut self) -> Result<Option<Expression>> {
// Check for GROUP BY token (which should be parsed as Group + By tokens)
if !self.match_token(TokenType::Group) {
return Ok(None);
}
// Consume BY if present
self.match_token(TokenType::By);
// Check for optional ALL/DISTINCT
// Some(true) = ALL, Some(false) = DISTINCT, None = no modifier
let all = if self.match_token(TokenType::All) {
Some(true)
} else if self.match_token(TokenType::Distinct) {
Some(false)
} else {
None
};
// Parse comma-separated expressions
let mut expressions = Vec::new();
loop {
match self.parse_expression() {
Ok(expr) => expressions.push(expr),
Err(_) => break,
}
if !self.match_token(TokenType::Comma) {
break;
}
}
// Handle TOTALS (ClickHouse)
let totals = if self.match_text_seq(&["WITH", "TOTALS"]) {
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
})))
} else if self.match_text_seq(&["TOTALS"]) {
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
})))
} else {
None
};
Ok(Some(Expression::Group(Box::new(Group {
expressions,
grouping_sets: None,
cube: None,
rollup: None,
totals,
all,
}))))
}
/// parse_group_concat - Ported from Python _parse_group_concat
#[allow(unused_variables, unused_mut)]
/// parse_group_concat - Parses MySQL GROUP_CONCAT function
/// Example: GROUP_CONCAT(DISTINCT col ORDER BY col SEPARATOR ',')
pub fn parse_group_concat(&mut self) -> Result<Option<Expression>> {
// Check for DISTINCT
let distinct = self.match_token(TokenType::Distinct);
// Parse expression(s)
let expr = self.parse_expression()?;
// Parse optional ORDER BY
let order_by = if self.match_keywords(&[TokenType::Order, TokenType::By]) {
let mut orderings = Vec::new();
loop {
let order_expr = self.parse_expression()?;
let desc = if self.match_token(TokenType::Desc) {
true
} else {
self.match_token(TokenType::Asc);
false
};
let nulls_first = if self.match_keywords(&[TokenType::Nulls, TokenType::First]) {
Some(true)
} else if self.match_keywords(&[TokenType::Nulls, TokenType::Last]) {
Some(false)
} else {
None
};
orderings.push(Ordered {
this: order_expr,
desc,
nulls_first,
explicit_asc: !desc,
with_fill: None,
});
if !self.match_token(TokenType::Comma) {
break;
}
}
Some(orderings)
} else {
None
};
// Parse optional SEPARATOR
let separator = if self.match_token(TokenType::Separator) {
self.parse_string()?
} else {
None
};
Ok(Some(Expression::GroupConcat(Box::new(GroupConcatFunc {
this: expr,
separator,
order_by,
distinct,
filter: None,
limit: None,
inferred_type: None,
}))))
}
/// parse_grouping_set - Delegates to parse_grouping_sets
#[allow(unused_variables, unused_mut)]
pub fn parse_grouping_set(&mut self) -> Result<Option<Expression>> {
self.parse_grouping_sets()
}
/// parse_grouping_sets - Ported from Python _parse_grouping_sets
/// Parses GROUPING SETS ((...), (...)) in GROUP BY
#[allow(unused_variables, unused_mut)]
pub fn parse_grouping_sets(&mut self) -> Result<Option<Expression>> {
// Check for GROUPING SETS keyword
if !self.match_text_seq(&["GROUPING", "SETS"]) {
return Ok(None);
}
// Parse wrapped grouping sets
self.expect(TokenType::LParen)?;
let mut expressions = Vec::new();
if !self.check(TokenType::RParen) {
loop {
// Each grouping set can be:
// - A nested GROUPING SETS
// - CUBE or ROLLUP
// - A parenthesized list
// - A single expression
if let Some(nested) = self.parse_grouping_sets()? {
expressions.push(nested);
} else if let Some(cube_rollup) = self.parse_cube_or_rollup()? {
expressions.push(cube_rollup);
} else if self.match_token(TokenType::LParen) {
// Parenthesized group
let mut group = Vec::new();
if !self.check(TokenType::RParen) {
loop {
match self.parse_bitwise() {
Ok(Some(expr)) => group.push(expr),
Ok(None) => break,
Err(e) => return Err(e),
}
if !self.match_token(TokenType::Comma) {
break;
}
}
}
self.expect(TokenType::RParen)?;
expressions.push(Expression::Tuple(Box::new(Tuple { expressions: group })));
} else {
// Single expression
match self.parse_bitwise() {
Ok(Some(expr)) => expressions.push(expr),
Ok(None) => break,
Err(e) => return Err(e),
}
}
if !self.match_token(TokenType::Comma) {
break;
}
}
}
self.expect(TokenType::RParen)?;
Ok(Some(Expression::GroupingSets(Box::new(GroupingSets {
expressions,
}))))
}
/// parse_having - Parse HAVING clause
/// Python: if not self._match(TokenType.HAVING): return None; return exp.Having(this=self._parse_disjunction())
pub fn parse_having(&mut self) -> Result<Option<Expression>> {
if !self.match_token(TokenType::Having) {
return Ok(None);
}
// Parse the condition expression
let condition = self.parse_expression()?;
Ok(Some(Expression::Having(Box::new(Having {
this: condition,
comments: Vec::new(),
}))))
}
/// parse_having_max - Implemented from Python _parse_having_max
/// Calls: parse_column
#[allow(unused_variables, unused_mut)]
pub fn parse_having_max(&mut self) -> Result<Option<Expression>> {
if self.match_texts(&["MAX", "MIN"]) {
// Matched one of: MAX, MIN
return Ok(None);
}
Ok(None)
}
/// parse_heredoc - Implemented from Python _parse_heredoc
/// Parses dollar-quoted strings: $$content$$, $tag$content$tag$
pub fn parse_heredoc(&mut self) -> Result<Option<Expression>> {
// Check if current token is a HEREDOC_STRING type
if self.match_token(TokenType::HeredocString) {
let text = self.previous().text.clone();
return Ok(Some(Expression::Heredoc(Box::new(Heredoc {
this: Box::new(Expression::Literal(Box::new(Literal::String(text)))),
tag: None,
}))));
}
// Try to parse $...$ or $tag$...$tag$
if !self.match_text_seq(&["$"]) {
return Ok(None);
}
// Collect the tag text (if any) and the closing marker
let mut tags = vec!["$".to_string()];
let mut tag_text: Option<String> = None;
// Check if next token is connected (no whitespace) and collect tag
if !self.is_at_end() {
let next_text = self.peek().text.to_ascii_uppercase();
if next_text == "$" {
// Simple $$ ... $$ case
self.skip();
tags.push("$".to_string());
} else {
// $tag$ ... $tag$ case
self.skip();
tag_text = Some(next_text.clone());
tags.push(next_text);
// Expect closing $
if self.match_text_seq(&["$"]) {
tags.push("$".to_string());
} else {
return Err(self.parse_error("No closing $ found"));
}
}
}
// Now collect content until we find the closing tags
let mut content_parts = Vec::new();
let closing_tag = tags.join("");
while !self.is_at_end() {
// Build current sequence to check for closing tag
let current_text = self.peek().text.clone();
// Check if we've reached the closing tag
if current_text == "$" || current_text.eq_ignore_ascii_case(&closing_tag) {
// Try to match the full closing sequence
let start_pos = self.current;
let mut matched = true;
for expected in &tags {
if self.is_at_end() || !self.peek().text.eq_ignore_ascii_case(expected) {
matched = false;
break;
}
self.skip();
}
if matched {
// Found the closing tag
let content = content_parts.join(" ");
return Ok(Some(Expression::Heredoc(Box::new(Heredoc {
this: Box::new(Expression::Literal(Box::new(Literal::String(content)))),
tag: tag_text
.map(|t| Box::new(Expression::Literal(Box::new(Literal::String(t))))),
}))));
}
// Not the closing tag, backtrack and add to content
self.current = start_pos;
}
content_parts.push(self.advance().text.clone());
}
Err(self.parse_error(&format!("No closing {} found", closing_tag)))
}
/// parse_hint_body - Delegates to parse_hint_fallback_to_string
#[allow(unused_variables, unused_mut)]
pub fn parse_hint_body(&mut self) -> Result<Option<Expression>> {
self.parse_hint_fallback_to_string()
}
/// parse_hint_fallback_to_string - Parses remaining hint tokens as a raw string
/// Python: _parse_hint_fallback_to_string
/// Used when structured hint parsing fails - collects all remaining tokens
pub fn parse_hint_fallback_to_string(&mut self) -> Result<Option<Expression>> {
// Collect all remaining tokens as a string
let mut parts = Vec::new();
while !self.is_at_end() {
let token = self.advance();
parts.push(token.text.clone());
}
if parts.is_empty() {
return Ok(None);
}
let hint_text = parts.join(" ");
Ok(Some(Expression::Hint(Box::new(Hint {
expressions: vec![HintExpression::Raw(hint_text)],
}))))
}
/// parse_hint_function_call - Delegates to parse_function_call
#[allow(unused_variables, unused_mut)]
pub fn parse_hint_function_call(&mut self) -> Result<Option<Expression>> {
self.parse_function_call()
}
/// parse_historical_data - Snowflake AT/BEFORE time travel clauses
/// Parses: AT(TIMESTAMP => expr) or BEFORE(STATEMENT => 'id') etc.
/// Reference: https://docs.snowflake.com/en/sql-reference/constructs/at-before
#[allow(unused_variables, unused_mut)]
pub fn parse_historical_data(&mut self) -> Result<Option<Expression>> {
// Save position for backtracking
let start_index = self.current;
// Check for AT, BEFORE, or END keywords
let this = if self.match_texts(&["AT", "BEFORE", "END"]) {
self.previous().text.to_ascii_uppercase()
} else {
return Ok(None);
};
// Expect opening paren and kind (OFFSET, STATEMENT, STREAM, TIMESTAMP, VERSION)
if !self.match_token(TokenType::LParen) {
// Backtrack if not the right pattern
self.current = start_index;
return Ok(None);
}
let kind = if self.match_texts(&["OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"]) {
self.previous().text.to_ascii_uppercase()
} else {
// Backtrack if not the right pattern
self.current = start_index;
return Ok(None);
};
// Expect => and expression
if !self.match_token(TokenType::FArrow) {
self.current = start_index;
return Ok(None);
}
let expression = self.parse_bitwise()?;
if expression.is_none() {
self.current = start_index;
return Ok(None);
}
self.match_token(TokenType::RParen); // Consume closing paren
Ok(Some(Expression::HistoricalData(Box::new(HistoricalData {
this: Box::new(Expression::Identifier(Identifier::new(this))),
kind,
expression: Box::new(expression.unwrap()),
}))))
}
/// parse_id_var - Ported from Python _parse_id_var
/// Parses an identifier or variable (more permissive than parse_identifier)
#[allow(unused_variables, unused_mut)]
pub fn parse_id_var(&mut self) -> Result<Option<Expression>> {
// First try to parse a regular identifier
if let Some(ident) = self.parse_identifier()? {
return Ok(Some(ident));
}
// Try to match Var token type
if self.match_token(TokenType::Var) {
let text = self.previous().text.clone();
return Ok(Some(Expression::Identifier(Identifier {
name: text,
quoted: false,
trailing_comments: Vec::new(),
span: None,
})));
}
// Try to match string as identifier (some dialects allow this)
if self.match_token(TokenType::String) {
let text = self.previous().text.clone();
return Ok(Some(Expression::Identifier(Identifier {
name: text,
quoted: true,
trailing_comments: Vec::new(),
span: None,
})));
}
// Accept keywords as identifiers in some contexts
if self.check(TokenType::Select)
|| self.check(TokenType::From)
|| self.check(TokenType::Where)
|| self.check(TokenType::And)
|| self.check(TokenType::Or)
|| self.check(TokenType::Not)
|| self.check(TokenType::True)
|| self.check(TokenType::False)
|| self.check(TokenType::Null)
{
// Don't consume keywords as identifiers in parse_id_var
return Ok(None);
}
Ok(None)
}
/// parse_identifier - Parse quoted identifier
/// Python: if self._match(TokenType.IDENTIFIER): return self._identifier_expression(quoted=True)
pub fn parse_identifier(&mut self) -> Result<Option<Expression>> {
// Match quoted identifiers (e.g., "column_name" or `column_name`)
if self.match_token(TokenType::QuotedIdentifier) || self.match_token(TokenType::Identifier)
{
let text = self.previous().text.clone();
let quoted = self.previous().token_type == TokenType::QuotedIdentifier;
return Ok(Some(Expression::Identifier(Identifier {
name: text,
quoted,
trailing_comments: Vec::new(),
span: None,
})));
}
Ok(None)
}
/// Parse IF expression
/// IF(condition, true_value, false_value) - function style
/// IF condition THEN true_value ELSE false_value END - statement style
pub fn parse_if(&mut self) -> Result<Option<Expression>> {
// TSQL/Fabric: IF (cond) BEGIN ... END is a statement, not a function.
// Parse condition, strip outer parens, then capture rest as command.
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::TSQL) | Some(crate::dialects::DialectType::Fabric)
) && self.check(TokenType::LParen)
{
// Parse the parenthesized condition using balanced paren matching
let cond_start = self.current;
self.skip(); // consume opening (
let mut depth = 1;
while depth > 0 && !self.is_at_end() {
if self.check(TokenType::LParen) {
depth += 1;
} else if self.check(TokenType::RParen) {
depth -= 1;
if depth == 0 {
break;
}
}
self.skip();
}
// Extract condition text from source (inside outer parens)
let cond_text = if let Some(ref source) = self.source {
let inner_start = self.tokens[cond_start + 1].span.start;
let inner_end = self.tokens[self.current].span.start;
source[inner_start..inner_end].trim().to_string()
} else {
self.tokens_to_sql(cond_start + 1, self.current)
};
self.skip(); // consume closing )
// Now collect the rest (BEGIN...END) as raw text
let body_start = self.current;
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
self.skip();
}
let body_text = if let Some(ref source) = self.source {
let start_span = self.tokens[body_start].span.start;
let end_span = if self.current > 0 {
self.tokens[self.current - 1].span.end
} else {
start_span
};
source[start_span..end_span].trim().to_string()
} else {
self.tokens_to_sql(body_start, self.current)
};
let command_text = format!("IF {} {}", cond_text, body_text);
return Ok(Some(Expression::Command(Box::new(
crate::expressions::Command { this: command_text },
))));
}
// Function style: IF(cond, true, false)
if self.match_token(TokenType::LParen) {
// ClickHouse: if() with zero args is valid (used in test queries)
if self.check(TokenType::RParen) {
self.skip(); // consume RParen
return Ok(Some(Expression::Function(Box::new(Function {
name: "IF".to_string(),
args: vec![],
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))));
}
let args = self.parse_expression_list()?;
self.expect(TokenType::RParen)?;
if args.len() == 3 {
return Ok(Some(Expression::IfFunc(Box::new(IfFunc {
original_name: None,
condition: args[0].clone(),
true_value: args[1].clone(),
false_value: Some(args[2].clone()),
inferred_type: None,
}))));
} else if args.len() == 2 {
return Ok(Some(Expression::IfFunc(Box::new(IfFunc {
original_name: None,
condition: args[0].clone(),
true_value: args[1].clone(),
false_value: None,
inferred_type: None,
}))));
} else if args.len() == 1 {
return Ok(Some(Expression::Function(Box::new(Function {
name: "IF".to_string(),
args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}))));
} else {
return Err(self.parse_error("IF function requires 2 or 3 arguments"));
}
}
// TSQL: IF OBJECT_ID(...) IS NOT NULL [BEGIN] DROP TABLE x [; END] -> DROP TABLE IF EXISTS x
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::TSQL) | Some(crate::dialects::DialectType::Fabric)
) {
let saved = self.current;
if self.match_text_seq(&["OBJECT_ID"]) {
// Capture the OBJECT_ID arguments text for TSQL round-trip
let object_id_args_text = if self.match_token(TokenType::LParen) {
let args_start = self.current;
let args = self.parse_expression_list()?;
// Reconstruct args text from source
let args_text = if let Some(ref source) = self.source {
let start_span = self.tokens[args_start].span.start;
let end_span = self.tokens[self.current].span.start;
source[start_span..end_span].trim().to_string()
} else {
// Fallback: generate from parsed expressions
args.iter()
.map(|a| format!("{:?}", a))
.collect::<Vec<_>>()
.join(", ")
};
let _ = self.match_token(TokenType::RParen);
Some(args_text)
} else {
None
};
if self.match_text_seq(&["IS", "NOT", "NULL"]) {
// Check for DROP directly or BEGIN ... DROP ... END
let has_begin = self.match_token(TokenType::Begin);
if self.check(TokenType::Drop) {
// Parse DROP TABLE, forcing if_exists = true
self.skip(); // consume DROP
if self.match_token(TokenType::Table) {
// Parse table names
let mut names = Vec::new();
loop {
names.push(self.parse_table_ref()?);
if !self.match_token(TokenType::Comma) {
break;
}
}
// If we had BEGIN, consume optional ; and END
if has_begin {
let _ = self.match_token(TokenType::Semicolon);
let _ = self.match_token(TokenType::End);
}
return Ok(Some(Expression::DropTable(Box::new(
crate::expressions::DropTable {
names,
if_exists: true,
cascade: false,
cascade_constraints: false,
purge: false,
leading_comments: Vec::new(),
object_id_args: object_id_args_text,
sync: false,
iceberg: false,
restrict: false,
},
))));
}
}
}
// Retreat if pattern didn't match
self.current = saved;
}
}
// Statement style: IF cond THEN true [ELSE false] END/ENDIF
// Use parse_disjunction (parse_or) for condition - same as Python sqlglot
// This ensures we stop at THEN rather than consuming too much
let condition = match self.parse_disjunction()? {
Some(c) => c,
None => return Ok(None),
};
if !self.match_token(TokenType::Then) {
// Not statement style, return as just the expression parsed
return Ok(Some(condition));
}
// Parse true value - use parse_disjunction to stop at ELSE/END
let true_value = match self.parse_disjunction()? {
Some(v) => v,
None => return Err(self.parse_error("Expected expression after THEN")),
};
let false_value = if self.match_token(TokenType::Else) {
match self.parse_disjunction()? {
Some(v) => Some(v),
None => return Err(self.parse_error("Expected expression after ELSE")),
}
} else {
None
};
// Consume END or ENDIF (Exasol tokenizes ENDIF as END)
self.match_token(TokenType::End);
Ok(Some(Expression::IfFunc(Box::new(IfFunc {
original_name: None,
condition,
true_value,
false_value,
inferred_type: None,
}))))
}
/// parse_in - Ported from Python _parse_in
/// Parses IN expression: expr IN (values...) or expr IN (subquery)
/// Can also parse standalone IN list after IN keyword has been matched
#[allow(unused_variables, unused_mut)]
pub fn parse_in(&mut self) -> Result<Option<Expression>> {
// If we're at IN keyword, parse what follows
if self.match_token(TokenType::In) {
return Err(self.parse_error("Expected expression before IN"));
}
// Try to parse as a complete expression: left IN (...)
let saved_pos = self.current;
// Parse the left side expression
match self.parse_bitwise() {
Ok(Some(left_expr)) => {
// Check for optional NOT
let negate = self.match_token(TokenType::Not);
// Expect IN keyword
if self.match_token(TokenType::In) {
let in_result = self.parse_in_with_expr(Some(left_expr))?;
return Ok(Some(if negate {
Expression::Not(Box::new(UnaryOp {
this: in_result,
inferred_type: None,
}))
} else {
in_result
}));
}
// Not an IN expression, restore position
self.current = saved_pos;
Ok(None)
}
Ok(None) => {
self.current = saved_pos;
Ok(None)
}
Err(_) => {
self.current = saved_pos;
Ok(None)
}
}
}
/// parse_index - Implemented from Python _parse_index
/// Calls: parse_index_params, parse_id_var
#[allow(unused_variables, unused_mut)]
pub fn parse_index(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["PRIMARY"]) {
return Ok(Some(Expression::Index(Box::new(Index {
this: None,
table: None,
unique: false,
primary: None,
amp: None,
params: Vec::new(),
}))));
}
if self.match_text_seq(&["AMP"]) {
// Matched: AMP
return Ok(None);
}
Ok(None)
}
/// parse_index_params - Implemented from Python _parse_index_params
/// Calls: parse_where, parse_wrapped_properties, parse_wrapped_id_vars
#[allow(unused_variables, unused_mut)]
pub fn parse_index_params(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["INCLUDE"]) {
return Ok(Some(Expression::IndexParameters(Box::new(
IndexParameters {
using: None,
include: None,
columns: Vec::new(),
with_storage: None,
partition_by: None,
tablespace: None,
where_: None,
on: None,
},
))));
}
if self.match_text_seq(&["USING", "INDEX", "TABLESPACE"]) {
// Matched: USING INDEX TABLESPACE
return Ok(None);
}
Ok(None)
}
/// parse_initcap - Ported from Python _parse_initcap
#[allow(unused_variables, unused_mut)]
/// parse_initcap - Parses INITCAP function
/// Example: INITCAP(str) or INITCAP(str, delimiter)
pub fn parse_initcap(&mut self) -> Result<Option<Expression>> {
// Parse the first argument (string to capitalize)
let args = self.parse_expression_list()?;
if args.is_empty() {
return Ok(None);
}
// Initcap is a UnaryFunc
Ok(Some(Expression::Initcap(Box::new(UnaryFunc::new(
args.into_iter().next().unwrap(),
)))))
}
/// parse_inline - Implemented from Python _parse_inline
#[allow(unused_variables, unused_mut)]
pub fn parse_inline(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["LENGTH"]) {
// Matched: LENGTH
return Ok(None);
}
Ok(None)
}
/// parse_insert_table - Parse table reference for INSERT statement
/// Parses: table_name [schema] [partition] [alias]
/// This method is a simple wrapper around parse_table for INSERT context
#[allow(unused_variables, unused_mut)]
pub fn parse_insert_table(&mut self) -> Result<Option<Expression>> {
// Parse the table reference - parse_table handles aliases
self.parse_table()
}
/// parse_interpolate - Implemented from Python _parse_interpolate
/// Parses INTERPOLATE clause for ClickHouse ORDER BY WITH FILL
pub fn parse_interpolate(&mut self) -> Result<Option<Expression>> {
if !self.match_text_seq(&["INTERPOLATE"]) {
return Ok(None);
}
// Parse wrapped CSV of name-as-expression pairs
if self.match_token(TokenType::LParen) {
let mut expressions = Vec::new();
loop {
if let Some(expr) = self.parse_name_as_expression()? {
expressions.push(expr);
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.match_token(TokenType::RParen);
if expressions.is_empty() {
return Ok(None);
}
return Ok(Some(Expression::Tuple(Box::new(Tuple { expressions }))));
}
Ok(None)
}
/// parse_interval - Creates Interval expression
/// Parses INTERVAL expressions: INTERVAL '1 day', INTERVAL 1 MONTH, etc.
#[allow(unused_variables, unused_mut)]
pub fn parse_interval(&mut self) -> Result<Option<Expression>> {
// Delegate to the existing try_parse_interval method
self.try_parse_interval()
}
/// parse_interval_span - Implemented from Python _parse_interval_span
/// Calls: parse_function
#[allow(unused_variables, unused_mut)]
pub fn parse_interval_span(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["TO"]) {
return Ok(Some(Expression::Var(Box::new(Var {
this: String::new(),
}))));
}
if self.match_text_seq(&["TO"]) {
// Matched: TO
return Ok(None);
}
Ok(None)
}
/// parse_into - Implemented from Python _parse_into
/// Parses: INTO [TEMPORARY] [UNLOGGED] [TABLE] table_name
/// Returns the table expression for the INTO clause
#[allow(unused_variables, unused_mut)]
pub fn parse_into(&mut self) -> Result<Option<Expression>> {
if !self.match_token(TokenType::Into) {
return Ok(None);
}
// Optional TEMPORARY
let _temp = self.match_token(TokenType::Temporary);
// Optional UNLOGGED
let _unlogged = self.match_text_seq(&["UNLOGGED"]);
// Optional TABLE keyword
let _ = self.match_token(TokenType::Table);
// Parse the table name
self.parse_table_parts()
}
/// parse_introducer - Parses MySQL introducer expression (_charset'string')
/// Python: _parse_introducer
/// Format: _charset 'literal'
pub fn parse_introducer(&mut self) -> Result<Option<Expression>> {
// We expect to have already consumed the introducer token (e.g., _utf8)
let token = self.previous().clone();
// Try to parse a primary expression (usually a string literal)
// parse_primary returns Expression (not Option), so we use it directly
let literal = self.parse_primary()?;
// Check if it's a null expression (indicating nothing was parsed)
match &literal {
Expression::Null(_) => {
// Just return as an identifier
Ok(Some(Expression::Identifier(Identifier {
name: token.text.clone(),
quoted: false,
trailing_comments: Vec::new(),
span: None,
})))
}
_ => Ok(Some(Expression::Introducer(Box::new(Introducer {
this: Box::new(Expression::Identifier(Identifier {
name: token.text.clone(),
quoted: false,
trailing_comments: Vec::new(),
span: None,
})),
expression: Box::new(literal),
})))),
}
}
/// parse_is - Implemented from Python _parse_is
/// Calls: parse_null, parse_bitwise
#[allow(unused_variables, unused_mut)]
pub fn parse_is(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["DISTINCT", "FROM"]) {
return Ok(Some(Expression::JSON(Box::new(JSON {
this: None,
with_: None,
unique: false,
}))));
}
if self.match_text_seq(&["WITH"]) {
// Matched: WITH
return Ok(None);
}
if self.match_text_seq(&["WITHOUT"]) {
// Matched: WITHOUT
return Ok(None);
}
Ok(None)
}
/// parse_join - Ported from Python _parse_join
/// Parses a single JOIN clause: [method] [side] [kind] JOIN table [ON condition | USING (columns)]
/// Returns the Join wrapped in an Expression, or None if no join is found
#[allow(unused_variables, unused_mut)]
pub fn parse_join(&mut self) -> Result<Option<Expression>> {
// Check for comma-style implicit join
if self.match_token(TokenType::Comma) {
if let Ok(Some(table)) = self.parse_table() {
return Ok(Some(Expression::Join(Box::new(Join {
this: table,
on: None,
using: Vec::new(),
kind: JoinKind::Implicit,
use_inner_keyword: false,
use_outer_keyword: false,
deferred_condition: false,
join_hint: None,
match_condition: None,
pivots: Vec::new(),
comments: Vec::new(),
nesting_group: 0,
directed: false,
}))));
}
return Ok(None);
}
// Try to parse join kind (INNER, LEFT, RIGHT, FULL, CROSS, etc.)
let saved_pos = self.current;
if let Some((kind, needs_join_keyword, use_inner_keyword, use_outer_keyword, join_hint)) =
self.try_parse_join_kind()
{
// Collect comments from tokens consumed by try_parse_join_kind
let mut join_comments = Vec::new();
for i in saved_pos..self.current {
if i < self.tokens.len() {
join_comments.extend(self.tokens[i].trailing_comments.iter().cloned());
}
}
// If kind requires JOIN keyword, expect it
if needs_join_keyword && !self.match_token(TokenType::Join) {
self.current = saved_pos;
return Ok(None);
}
// Parse the table being joined
let table = self.parse_table_expression()?;
// Parse ON or USING condition
let (on, using) = if self.match_token(TokenType::On) {
(Some(self.parse_expression()?), Vec::new())
} else if self.match_token(TokenType::Using) {
let has_parens = self.match_token(TokenType::LParen);
// Use parse_using_column_list to handle qualified names like t1.col
let cols = self.parse_using_column_list()?;
if has_parens {
self.expect(TokenType::RParen)?;
}
(None, cols)
} else {
(None, Vec::new())
};
return Ok(Some(Expression::Join(Box::new(Join {
this: table,
on,
using,
kind,
use_inner_keyword,
use_outer_keyword,
deferred_condition: false,
join_hint,
match_condition: None,
pivots: Vec::new(),
comments: join_comments,
nesting_group: 0,
directed: false,
}))));
}
// Check for CROSS APPLY / OUTER APPLY (SQL Server)
if self.match_text_seq(&["CROSS", "APPLY"]) || self.match_text_seq(&["OUTER", "APPLY"]) {
let is_outer = self.previous().text.eq_ignore_ascii_case("OUTER");
let table = self.parse_table_expression()?;
return Ok(Some(Expression::Join(Box::new(Join {
this: table,
on: None,
using: Vec::new(),
kind: if is_outer {
JoinKind::Outer
} else {
JoinKind::Cross
},
use_inner_keyword: false,
use_outer_keyword: is_outer,
deferred_condition: false,
join_hint: None,
match_condition: None,
pivots: Vec::new(),
comments: Vec::new(),
nesting_group: 0,
directed: false,
}))));
}
Ok(None)
}
/// parse_join_hint - Spark/Hive join hints (BROADCAST, MERGE, SHUFFLE_HASH, etc.)
/// Parses: HINT_NAME(table1, table2, ...)
/// hint_name should be the already matched hint keyword (BROADCAST, MAPJOIN, etc.)
#[allow(unused_variables, unused_mut)]
pub fn parse_join_hint(&mut self, hint_name: &str) -> Result<Option<Expression>> {
// Parse comma-separated list of tables
let mut tables = Vec::new();
loop {
if let Some(table) = self.parse_table()? {
tables.push(table);
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
Ok(Some(Expression::JoinHint(Box::new(JoinHint {
this: Box::new(Expression::Identifier(Identifier::new(
hint_name.to_ascii_uppercase(),
))),
expressions: tables,
}))))
}
/// parse_join_parts - Ported from Python _parse_join_parts
/// Returns (method, side, kind) where each is an optional string
/// method: ASOF, NATURAL, POSITIONAL
/// side: LEFT, RIGHT, FULL
/// kind: ANTI, CROSS, INNER, OUTER, SEMI
pub fn parse_join_parts(&mut self) -> (Option<String>, Option<String>, Option<String>) {
// Parse join method (ASOF, NATURAL, POSITIONAL)
let method = if self.match_texts(&["ASOF", "NATURAL", "POSITIONAL"]) {
Some(self.previous().text.to_ascii_uppercase())
} else {
None
};
// Parse join side (LEFT, RIGHT, FULL)
let side = if self.match_texts(&["LEFT", "RIGHT", "FULL"]) {
Some(self.previous().text.to_ascii_uppercase())
} else {
None
};
// Parse join kind (ANTI, CROSS, INNER, OUTER, SEMI)
let kind = if self.match_texts(&["ANTI", "CROSS", "INNER", "OUTER", "SEMI"]) {
Some(self.previous().text.to_ascii_uppercase())
} else if self.match_token(TokenType::StraightJoin) {
Some("STRAIGHT_JOIN".to_string())
} else {
None
};
(method, side, kind)
}
/// parse_journal - Parses JOURNAL property (Teradata)
/// Python: _parse_journal
/// Creates a JournalProperty expression
pub fn parse_journal(&mut self) -> Result<Option<Expression>> {
self.parse_journal_impl(false, false, false, false, false)
}
/// Implementation of parse_journal with options
pub fn parse_journal_impl(
&mut self,
no: bool,
dual: bool,
before: bool,
local: bool,
after: bool,
) -> Result<Option<Expression>> {
Ok(Some(Expression::JournalProperty(Box::new(
JournalProperty {
no: if no {
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
})))
} else {
None
},
dual: if dual {
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
})))
} else {
None
},
before: if before {
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
})))
} else {
None
},
local: if local {
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
})))
} else {
None
},
after: if after {
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
})))
} else {
None
},
},
))))
}
/// parse_json_column_def - Implemented from Python _parse_json_column_def
/// Calls: parse_string, parse_json_schema, parse_id_var
#[allow(unused_variables, unused_mut)]
pub fn parse_json_column_def(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["NESTED"]) {
return Ok(Some(Expression::JSONColumnDef(Box::new(JSONColumnDef {
this: None,
kind: None,
path: None,
nested_schema: None,
ordinality: None,
}))));
}
if self.match_text_seq(&["PATH"]) {
// Matched: PATH
return Ok(None);
}
Ok(None)
}
/// parse_json_key_value - Implemented from Python _parse_json_key_value
#[allow(unused_variables, unused_mut)]
/// parse_json_key_value - Parses a JSON key-value pair
/// Python: _parse_json_key_value
/// Format: [KEY] key [: | VALUE] value
pub fn parse_json_key_value(&mut self) -> Result<Option<Expression>> {
// Optional KEY keyword
self.match_text_seq(&["KEY"]);
// Parse the key expression
let key = self.parse_column()?;
// Match separator (colon, comma, or VALUE keyword)
let _ = self.match_token(TokenType::Colon)
|| self.match_token(TokenType::Comma)
|| self.match_text_seq(&["VALUE"]);
// Optional VALUE keyword
self.match_text_seq(&["VALUE"]);
// Parse the value expression
let value = self.parse_bitwise()?;
// If neither key nor value, return None
match (key, value) {
(None, None) => Ok(None),
(Some(k), None) => Ok(Some(Expression::JSONKeyValue(Box::new(JSONKeyValue {
this: Box::new(k),
expression: Box::new(Expression::Null(Null)),
})))),
(None, Some(v)) => Ok(Some(Expression::JSONKeyValue(Box::new(JSONKeyValue {
this: Box::new(Expression::Null(Null)),
expression: Box::new(v),
})))),
(Some(k), Some(v)) => Ok(Some(Expression::JSONKeyValue(Box::new(JSONKeyValue {
this: Box::new(k),
expression: Box::new(v),
})))),
}
}
/// parse_json_object - Parses JSON_OBJECT function
/// Python: _parse_json_object
/// Handles both JSON_OBJECT and JSON_OBJECTAGG
pub fn parse_json_object(&mut self) -> Result<Option<Expression>> {
self.parse_json_object_impl(false)
}
/// Implementation of JSON object parsing with aggregate flag
pub fn parse_json_object_impl(&mut self, agg: bool) -> Result<Option<Expression>> {
// Try to parse a star expression
let star = self.parse_star()?;
// Parse expressions: either star or comma-separated key-value pairs
let expressions = if let Some(star_expr) = star {
vec![star_expr]
} else {
// Parse comma-separated JSON key-value pairs
let mut exprs = Vec::new();
loop {
if let Some(kv) = self.parse_json_key_value()? {
// Wrap with FORMAT JSON if specified
if self.match_text_seq(&["FORMAT", "JSON"]) {
exprs.push(Expression::JSONFormat(Box::new(JSONFormat {
this: Some(Box::new(kv)),
options: Vec::new(),
is_json: None,
to_json: None,
})));
} else {
exprs.push(kv);
}
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
exprs
};
// Parse NULL handling: NULL ON NULL or ABSENT ON NULL
let null_handling = self.parse_json_on_null_handling()?;
// Parse UNIQUE KEYS option
let unique_keys = if self.match_text_seq(&["WITH", "UNIQUE"]) {
self.match_text_seq(&["KEYS"]);
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
})))
} else if self.match_text_seq(&["WITHOUT", "UNIQUE"]) {
self.match_text_seq(&["KEYS"]);
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: false,
})))
} else {
None
};
// Consume optional KEYS keyword
self.match_text_seq(&["KEYS"]);
// Parse RETURNING clause
let return_type = if self.match_text_seq(&["RETURNING"]) {
let type_expr = self.parse_type()?;
// Wrap with FORMAT JSON if specified
if self.match_text_seq(&["FORMAT", "JSON"]) {
type_expr.map(|t| {
Box::new(Expression::JSONFormat(Box::new(JSONFormat {
this: Some(Box::new(t)),
options: Vec::new(),
is_json: None,
to_json: None,
})))
})
} else {
type_expr.map(Box::new)
}
} else {
None
};
// Parse ENCODING option
let encoding = if self.match_text_seq(&["ENCODING"]) {
self.parse_var()?.map(Box::new)
} else {
None
};
if agg {
Ok(Some(Expression::JSONObjectAgg(Box::new(JSONObjectAgg {
expressions,
null_handling,
unique_keys,
return_type,
encoding,
}))))
} else {
Ok(Some(Expression::JSONObject(Box::new(JSONObject {
expressions,
null_handling,
unique_keys,
return_type,
encoding,
}))))
}
}
/// Parse JSON NULL handling clause: NULL ON NULL or ABSENT ON NULL
fn parse_json_on_null_handling(&mut self) -> Result<Option<Box<Expression>>> {
if self.match_text_seq(&["NULL", "ON", "NULL"]) {
Ok(Some(Box::new(Expression::Var(Box::new(Var {
this: "NULL ON NULL".to_string(),
})))))
} else if self.match_text_seq(&["ABSENT", "ON", "NULL"]) {
Ok(Some(Box::new(Expression::Var(Box::new(Var {
this: "ABSENT ON NULL".to_string(),
})))))
} else {
Ok(None)
}
}
/// parse_json_schema - Implemented from Python _parse_json_schema
#[allow(unused_variables, unused_mut)]
pub fn parse_json_schema(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["COLUMNS"]) {
return Ok(Some(Expression::JSONSchema(Box::new(JSONSchema {
expressions: Vec::new(),
}))));
}
Ok(None)
}
/// Parse JSON_TABLE COLUMNS clause: COLUMNS (column_def, column_def, ...) or COLUMNS column_def
/// Column definitions can be:
/// - name type PATH 'json_path'
/// - name FOR ORDINALITY
/// - NESTED [PATH] 'json_path' COLUMNS (...)
pub fn parse_json_table_columns(&mut self) -> Result<Option<Expression>> {
if !self.match_text_seq(&["COLUMNS"]) {
return Ok(None);
}
// Check for opening paren - Oracle supports both COLUMNS(...) and COLUMNS col PATH '...'
let has_parens = self.match_token(TokenType::LParen);
let mut columns = Vec::new();
// Parse column definitions
if has_parens {
// COLUMNS(col1, col2, ...)
if !self.check(TokenType::RParen) {
loop {
if let Some(col_def) = self.parse_json_table_column_def()? {
columns.push(col_def);
}
if !self.match_token(TokenType::Comma) {
break;
}
}
}
// Expect closing paren for COLUMNS(...)
self.expect(TokenType::RParen)?;
} else {
// COLUMNS col PATH '...' (single column without parens)
if let Some(col_def) = self.parse_json_table_column_def()? {
columns.push(col_def);
}
}
Ok(Some(Expression::JSONSchema(Box::new(JSONSchema {
expressions: columns,
}))))
}
/// Parse a single JSON_TABLE column definition
/// Formats:
/// - name [FOR ORDINALITY] [type] [PATH 'path']
/// - NESTED [PATH] 'path' COLUMNS (...)
pub fn parse_json_table_column_def(&mut self) -> Result<Option<Expression>> {
// Check for NESTED column
if self.match_text_seq(&["NESTED"]) {
// NESTED [PATH] 'json_path' COLUMNS (...)
self.match_text_seq(&["PATH"]); // Optional PATH keyword
let path = self.parse_string()?;
let nested_schema = self.parse_json_table_columns()?;
return Ok(Some(Expression::JSONColumnDef(Box::new(JSONColumnDef {
this: None,
kind: None,
path: path.map(Box::new),
nested_schema: nested_schema.map(Box::new),
ordinality: None,
}))));
}
// Regular column: name [FOR ORDINALITY] [type] [PATH 'path']
let name = self.parse_id_var()?;
if name.is_none() {
return Ok(None);
}
// Check for FOR ORDINALITY
let ordinality = if self.match_text_seq(&["FOR", "ORDINALITY"]) {
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
})))
} else {
None
};
// Parse data type (if not FOR ORDINALITY, type is expected)
let kind = if ordinality.is_none() {
// Try to parse a data type
let data_type = self.parse_data_type_optional()?;
data_type.map(|dt| self.data_type_to_string(&dt))
} else {
None
};
// Parse PATH 'json_path'
let path = if self.match_text_seq(&["PATH"]) {
self.parse_string()?
} else {
None
};
Ok(Some(Expression::JSONColumnDef(Box::new(JSONColumnDef {
this: name.map(Box::new),
kind,
path: path.map(Box::new),
nested_schema: None,
ordinality,
}))))
}
/// Parse JSON_TABLE function
/// JSON_TABLE(expr, path COLUMNS (...)) [ON ERROR ...] [ON EMPTY ...]
pub fn parse_json_table(&mut self) -> Result<Option<Expression>> {
// Parse the JSON expression
let this = self.parse_expression()?;
// Optional path after comma
let path = if self.match_token(TokenType::Comma) {
if let Some(s) = self.parse_string()? {
Some(Box::new(s))
} else {
None
}
} else {
None
};
// Parse error handling: ON ERROR NULL or ON ERROR ERROR
let error_handling = if self.match_text_seq(&["ON", "ERROR"]) {
if self.match_text_seq(&["NULL"]) {
Some(Box::new(Expression::Var(Box::new(Var {
this: "NULL".to_string(),
}))))
} else if self.match_text_seq(&["ERROR"]) {
Some(Box::new(Expression::Var(Box::new(Var {
this: "ERROR".to_string(),
}))))
} else {
None
}
} else {
None
};
// Parse empty handling: ON EMPTY NULL or ON EMPTY ERROR
let empty_handling = if self.match_text_seq(&["ON", "EMPTY"]) {
if self.match_text_seq(&["NULL"]) {
Some(Box::new(Expression::Var(Box::new(Var {
this: "NULL".to_string(),
}))))
} else if self.match_text_seq(&["ERROR"]) {
Some(Box::new(Expression::Var(Box::new(Var {
this: "ERROR".to_string(),
}))))
} else {
None
}
} else {
None
};
// Parse COLUMNS clause
let schema = self.parse_json_schema()?;
Ok(Some(Expression::JSONTable(Box::new(JSONTable {
this: Box::new(this),
schema: schema.map(Box::new),
path,
error_handling,
empty_handling,
}))))
}
/// parse_json_value - Ported from Python _parse_json_value
#[allow(unused_variables, unused_mut)]
/// parse_json_value - Parses JSON_VALUE function
/// Example: JSON_VALUE(json, '$.path' RETURNING type)
pub fn parse_json_value(&mut self) -> Result<Option<Expression>> {
// Parse the JSON expression
let this = self.parse_expression()?;
// Parse path (after comma)
self.match_token(TokenType::Comma);
let path = self.parse_expression()?;
// Parse optional RETURNING type
let returning = if self.match_token(TokenType::Returning) {
Some(Box::new(self.parse_expression()?))
} else {
None
};
// Parse optional ON condition (ON ERROR, ON EMPTY)
let on_condition = if self.check(TokenType::On) {
self.parse_on_condition()?
} else {
None
};
Ok(Some(Expression::JSONValue(Box::new(JSONValue {
this: Box::new(this),
path: Some(Box::new(path)),
returning,
on_condition: on_condition.map(Box::new),
}))))
}
/// parse_key_constraint_options - Implemented from Python _parse_key_constraint_options
#[allow(unused_variables, unused_mut)]
pub fn parse_key_constraint_options(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["NO", "ACTION"]) {
// Matched: NO ACTION
return Ok(None);
}
if self.match_text_seq(&["CASCADE"]) {
// Matched: CASCADE
return Ok(None);
}
if self.match_text_seq(&["RESTRICT"]) {
// Matched: RESTRICT
return Ok(None);
}
Ok(None)
}
/// parse_lambda - Ported from Python _parse_lambda
/// Parses lambda expressions: x -> x + 1 or (x, y) -> x + y
/// Also supports DuckDB syntax: LAMBDA x : x + 1
#[allow(unused_variables, unused_mut)]
pub fn parse_lambda(&mut self) -> Result<Option<Expression>> {
let start_index = self.current;
// Check for DuckDB's LAMBDA keyword syntax: LAMBDA x : expr
// ClickHouse doesn't use LAMBDA keyword — lambda is just a function name there
if !matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.match_token(TokenType::Lambda)
{
// Parse lambda parameters (comma-separated identifiers)
let mut params = Vec::new();
loop {
// Use is_identifier_token which handles Identifier, QuotedIdentifier, and Var
if self.is_identifier_token() {
let token = self.advance();
let quoted = token.token_type == TokenType::QuotedIdentifier;
params.push(Identifier {
name: token.text,
quoted,
trailing_comments: Vec::new(),
span: None,
});
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
// Must have at least one parameter
if params.is_empty() {
return Err(self.parse_error("LAMBDA requires at least one parameter"));
}
// Expect colon separator
if !self.match_token(TokenType::Colon) {
return Err(self.parse_error("Expected ':' after LAMBDA parameters"));
}
let body = self.parse_expression()?;
return Ok(Some(Expression::Lambda(Box::new(LambdaExpr {
parameters: params,
body,
colon: true,
parameter_types: Vec::new(),
}))));
}
// Try to parse lambda parameters
let parameters = if self.match_token(TokenType::LParen) {
// Parenthesized parameters: (x, y) -> ...
let mut params = Vec::new();
if !self.check(TokenType::RParen) {
loop {
if let Some(ident) = self.parse_identifier()? {
if let Expression::Identifier(id) = ident {
params.push(id);
}
}
if !self.match_token(TokenType::Comma) {
break;
}
}
}
if !self.match_token(TokenType::RParen) {
// Not a lambda, retreat
self.current = start_index;
return Ok(None);
}
params
} else {
// Single parameter: x -> ...
if let Some(ident) = self.parse_identifier()? {
if let Expression::Identifier(id) = ident {
vec![id]
} else {
self.current = start_index;
return Ok(None);
}
} else {
return Ok(None);
}
};
// Check for arrow operator
if self.match_token(TokenType::Arrow) || self.match_token(TokenType::FArrow) {
// Parse lambda body
let body = self.parse_expression()?;
Ok(Some(Expression::Lambda(Box::new(LambdaExpr {
parameters,
body,
colon: false,
parameter_types: Vec::new(),
}))))
} else {
// Not a lambda, retreat
self.current = start_index;
Ok(None)
}
}
/// parse_lambda_arg - Delegates to parse_id_var
#[allow(unused_variables, unused_mut)]
pub fn parse_lambda_arg(&mut self) -> Result<Option<Expression>> {
self.parse_id_var()
}
/// parse_lateral - Parse LATERAL subquery or table function
/// Python: if self._match(TokenType.LATERAL): return exp.Lateral(this=..., view=..., outer=...)
pub fn parse_lateral(&mut self) -> Result<Option<Expression>> {
// Check for CROSS APPLY / OUTER APPLY (handled by join parsing in try_parse_join_kind)
// This method focuses on LATERAL keyword parsing
if !self.match_token(TokenType::Lateral) {
return Ok(None);
}
// Check for LATERAL VIEW (Hive/Spark syntax)
let view = self.match_token(TokenType::View);
let outer = if view {
self.match_token(TokenType::Outer)
} else {
false
};
// Parse the lateral expression (subquery, function call, or table reference)
let this = if self.check(TokenType::LParen) {
// Could be a subquery: LATERAL (SELECT ...)
self.expect(TokenType::LParen)?;
let inner = self.parse_statement()?;
self.expect(TokenType::RParen)?;
inner
} else {
// Could be a function or table reference: LATERAL unnest(...)
self.parse_primary()?
};
// Parse optional alias
let alias = if self.match_token(TokenType::As) {
Some(self.expect_identifier()?)
} else if self.check(TokenType::Identifier) && !self.check_keyword() {
Some(self.expect_identifier()?)
} else {
None
};
// Parse optional column aliases: AS alias(col1, col2, ...)
let column_aliases = if alias.is_some() && self.match_token(TokenType::LParen) {
let mut cols = Vec::new();
loop {
if self.check(TokenType::RParen) {
break;
}
let col = self.expect_identifier()?;
cols.push(col);
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
cols
} else {
Vec::new()
};
Ok(Some(Expression::Lateral(Box::new(Lateral {
this: Box::new(this),
view: if view {
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
})))
} else {
None
},
outer: if outer {
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
})))
} else {
None
},
alias,
alias_quoted: false,
cross_apply: None,
ordinality: None,
column_aliases,
}))))
}
/// parse_limit - Parse LIMIT clause
/// Python: if self._match(TokenType.LIMIT): return exp.Limit(this=self._parse_term())
pub fn parse_limit(&mut self) -> Result<Option<Expression>> {
if !self.match_token(TokenType::Limit) {
return Ok(None);
}
// Parse the limit expression (usually a number)
let limit_expr = self.parse_expression()?;
Ok(Some(Expression::Limit(Box::new(Limit {
this: limit_expr,
percent: false,
comments: Vec::new(),
}))))
}
/// parse_limit_by - Implemented from Python _parse_limit_by
#[allow(unused_variables, unused_mut)]
pub fn parse_limit_by(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["BY"]) {
// Matched: BY
return Ok(None);
}
Ok(None)
}
/// parse_limit_options - Implemented from Python _parse_limit_options
#[allow(unused_variables, unused_mut)]
pub fn parse_limit_options(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["ONLY"]) {
return Ok(Some(Expression::LimitOptions(Box::new(LimitOptions {
percent: None,
rows: None,
with_ties: None,
}))));
}
if self.match_text_seq(&["WITH", "TIES"]) {
// Matched: WITH TIES
return Ok(None);
}
Ok(None)
}
/// parse_load - Implemented from Python _parse_load
#[allow(unused_variables, unused_mut)]
pub fn parse_load(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["DATA"]) {
return Ok(Some(Expression::Command(Box::new(Command {
this: String::new(),
}))));
}
if self.match_text_seq(&["LOCAL"]) {
// Matched: LOCAL
return Ok(None);
}
Ok(None)
}
/// parse_locking - Implemented from Python _parse_locking
/// Calls: parse_table_parts
#[allow(unused_variables, unused_mut)]
pub fn parse_locking(&mut self) -> Result<Option<Expression>> {
let kind = if self.match_token(TokenType::Table) {
Some("TABLE")
} else if self.match_token(TokenType::View) {
Some("VIEW")
} else if self.match_token(TokenType::Row) {
Some("ROW")
} else if self.match_token(TokenType::Database) || self.match_identifier("DATABASE") {
Some("DATABASE")
} else {
None
};
let kind = match kind {
Some(k) => k.to_string(),
None => return Ok(None),
};
let this = if matches!(kind.as_str(), "DATABASE" | "TABLE" | "VIEW") {
self.parse_table_parts()?
} else {
None
};
let for_or_in = if self.match_token(TokenType::For) {
Some("FOR")
} else if self.match_token(TokenType::In) {
Some("IN")
} else {
None
};
let lock_type = if self.match_identifier("ACCESS") {
Some("ACCESS")
} else if self.match_texts(&["EXCL", "EXCLUSIVE"]) {
Some("EXCLUSIVE")
} else if self.match_identifier("SHARE") {
Some("SHARE")
} else if self.match_identifier("READ") {
Some("READ")
} else if self.match_identifier("WRITE") {
Some("WRITE")
} else if self.match_identifier("CHECKSUM") {
Some("CHECKSUM")
} else {
None
};
let override_ = if self.match_identifier("OVERRIDE") {
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
})))
} else {
None
};
Ok(Some(Expression::LockingProperty(Box::new(
LockingProperty {
this: this.map(Box::new),
kind,
for_or_in: for_or_in.map(|v| {
Box::new(Expression::Var(Box::new(Var {
this: v.to_string(),
})))
}),
lock_type: lock_type.map(|v| {
Box::new(Expression::Var(Box::new(Var {
this: v.to_string(),
})))
}),
override_,
},
))))
}
/// Parse Teradata LOCKING statement: LOCKING <property> SELECT ...
fn parse_locking_statement(&mut self) -> Result<Expression> {
self.expect(TokenType::Lock)?;
let locking = self
.parse_locking()?
.ok_or_else(|| self.parse_error("Expected LOCKING clause"))?;
let query = if self.check(TokenType::With) {
self.parse_statement()?
} else {
self.parse_select()?
};
Ok(Expression::LockingStatement(Box::new(LockingStatement {
this: Box::new(locking),
expression: Box::new(query),
})))
}
/// parse_log - Parses LOG property (Teradata)
/// Python: _parse_log
/// Creates a LogProperty expression
pub fn parse_log(&mut self) -> Result<Option<Expression>> {
self.parse_log_impl(false)
}
/// Implementation of parse_log with no flag
pub fn parse_log_impl(&mut self, no: bool) -> Result<Option<Expression>> {
Ok(Some(Expression::LogProperty(Box::new(LogProperty {
no: if no {
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
})))
} else {
None
},
}))))
}
/// parse_match_against - Parses MATCH(columns) AGAINST(pattern)
/// Python: parser.py:7125-7153
#[allow(unused_variables, unused_mut)]
pub fn parse_match_against(&mut self) -> Result<Option<Expression>> {
// Parse column expressions or TABLE syntax
let expressions = if self.match_text_seq(&["TABLE"]) {
// SingleStore TABLE syntax
if let Some(table) = self.parse_table()? {
vec![table]
} else {
Vec::new()
}
} else {
// Regular column list
let mut cols = Vec::new();
loop {
if let Some(col) = self.parse_column()? {
cols.push(col);
}
if !self.match_token(TokenType::Comma) {
break;
}
}
cols
};
// Match ) AGAINST (
self.match_text_seq(&[")", "AGAINST", "("]);
// Parse the search pattern
let this = self.parse_string()?;
// Parse modifier
let modifier = if self.match_text_seq(&["IN", "NATURAL", "LANGUAGE", "MODE"]) {
if self.match_text_seq(&["WITH", "QUERY", "EXPANSION"]) {
Some(Box::new(Expression::Var(Box::new(Var {
this: "IN NATURAL LANGUAGE MODE WITH QUERY EXPANSION".to_string(),
}))))
} else {
Some(Box::new(Expression::Var(Box::new(Var {
this: "IN NATURAL LANGUAGE MODE".to_string(),
}))))
}
} else if self.match_text_seq(&["IN", "BOOLEAN", "MODE"]) {
Some(Box::new(Expression::Var(Box::new(Var {
this: "IN BOOLEAN MODE".to_string(),
}))))
} else if self.match_text_seq(&["WITH", "QUERY", "EXPANSION"]) {
Some(Box::new(Expression::Var(Box::new(Var {
this: "WITH QUERY EXPANSION".to_string(),
}))))
} else {
None
};
match this {
Some(t) => Ok(Some(Expression::MatchAgainst(Box::new(MatchAgainst {
this: Box::new(t),
expressions,
modifier,
})))),
None => Ok(None),
}
}
/// parse_match_recognize_measure - Implemented from Python _parse_match_recognize_measure
/// Parses a MEASURES expression in MATCH_RECOGNIZE: [FINAL|RUNNING] expression
pub fn parse_match_recognize_measure(&mut self) -> Result<Option<Expression>> {
// Check for optional FINAL or RUNNING keyword
let window_frame = if self.match_texts(&["FINAL", "RUNNING"]) {
let text = self.previous().text.to_ascii_uppercase();
Some(if text == "FINAL" {
MatchRecognizeSemantics::Final
} else {
MatchRecognizeSemantics::Running
})
} else {
None
};
// Parse the expression
let this = self.parse_expression()?;
Ok(Some(Expression::MatchRecognizeMeasure(Box::new(
MatchRecognizeMeasure { this, window_frame },
))))
}
/// parse_max_min_by - MAX_BY / MIN_BY / ARG_MAX / ARG_MIN aggregate functions
/// Parses: MAX_BY(value, key [, n]) or MIN_BY(value, key [, n])
/// is_max: true for MAX_BY/ARG_MAX, false for MIN_BY/ARG_MIN
#[allow(unused_variables, unused_mut)]
pub fn parse_max_min_by(&mut self, is_max: bool) -> Result<Option<Expression>> {
let mut args = Vec::new();
// Handle optional DISTINCT
let distinct = if self.match_token(TokenType::Distinct) {
let lambda_expr = self.parse_lambda()?;
if let Some(expr) = lambda_expr {
args.push(expr);
}
self.match_token(TokenType::Comma);
true
} else {
false
};
// Parse remaining arguments
loop {
if let Some(arg) = self.parse_lambda()? {
args.push(arg);
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
let this = args
.get(0)
.cloned()
.map(Box::new)
.unwrap_or_else(|| Box::new(Expression::Null(Null)));
let expression = args
.get(1)
.cloned()
.map(Box::new)
.unwrap_or_else(|| Box::new(Expression::Null(Null)));
let count = args.get(2).cloned().map(Box::new);
if is_max {
Ok(Some(Expression::ArgMax(Box::new(ArgMax {
this,
expression,
count,
}))))
} else {
Ok(Some(Expression::ArgMin(Box::new(ArgMin {
this,
expression,
count,
}))))
}
}
/// Parse MERGE statement
/// Python: def _parse_merge(self) -> exp.Merge
pub fn parse_merge(&mut self) -> Result<Option<Expression>> {
// Optional INTO keyword
self.match_token(TokenType::Into);
// Parse target table using parse_table_ref
let mut target = Expression::Table(Box::new(self.parse_table_ref()?));
// Parse optional TSQL table hints: WITH (HOLDLOCK), WITH (TABLOCK), etc.
if self.check(TokenType::With) && self.check_next(TokenType::LParen) {
if let Expression::Table(ref mut table) = target {
if let Some(hint_expr) = self.parse_table_hints()? {
match hint_expr {
Expression::Tuple(tuple) => {
table.hints = tuple.expressions;
}
other => {
table.hints = vec![other];
}
}
}
}
}
// Parse optional alias for target table
// Try to get an identifier as alias if AS is present or there's an identifier
// Use parse_id_var instead of parse_identifier to handle Var tokens (e.g. T)
if self.match_token(TokenType::As) {
if let Some(alias_expr) = self.parse_id_var()? {
// Extract identifier from the expression
if let Expression::Identifier(ident) = alias_expr {
target = Expression::Alias(Box::new(Alias {
this: target,
alias: ident,
column_aliases: Vec::new(),
pre_alias_comments: Vec::new(),
trailing_comments: Vec::new(),
inferred_type: None,
}));
}
}
} else if !self.check(TokenType::Using) {
// Try to parse alias without AS keyword (e.g., MERGE t1 T USING ...)
// Use parse_id_var to handle both Identifier and Var tokens
if let Some(alias_expr) = self.parse_id_var()? {
if let Expression::Identifier(ident) = alias_expr {
target = Expression::Alias(Box::new(Alias {
this: target,
alias: ident,
column_aliases: Vec::new(),
pre_alias_comments: Vec::new(),
trailing_comments: Vec::new(),
inferred_type: None,
}));
}
}
}
// USING clause
if !self.match_token(TokenType::Using) {
return Err(self.parse_error("Expected USING in MERGE statement"));
}
// Parse source table or subquery
let mut using = if self.match_token(TokenType::LParen) {
// Subquery: USING (SELECT ...) AS alias
let query = self.parse_statement()?;
self.expect(TokenType::RParen)?;
let trailing = self.previous_trailing_comments().to_vec();
let mut subq = Subquery {
this: query,
alias: None,
column_aliases: Vec::new(),
order_by: None,
limit: None,
offset: None,
distribute_by: None,
sort_by: None,
cluster_by: None,
lateral: false,
modifiers_inside: false,
trailing_comments: trailing,
inferred_type: None,
};
// Parse optional alias: (SELECT ...) AS y(col1, col2)
if self.match_token(TokenType::As) {
let alias_name = self.expect_identifier_or_keyword()?;
subq.alias = Some(Identifier::new(alias_name));
// Parse optional column aliases: AS alias(col1, col2)
if self.match_token(TokenType::LParen) {
let mut cols = Vec::new();
loop {
let col_name = self.expect_identifier_or_keyword()?;
cols.push(Identifier::new(col_name));
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
subq.column_aliases = cols;
}
} else if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
// Implicit alias without AS
let alias_name = self.expect_identifier_or_keyword()?;
subq.alias = Some(Identifier::new(alias_name));
// Parse optional column aliases: alias(col1, col2)
if self.match_token(TokenType::LParen) {
let mut cols = Vec::new();
loop {
let col_name = self.expect_identifier_or_keyword()?;
cols.push(Identifier::new(col_name));
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
subq.column_aliases = cols;
}
}
Expression::Subquery(Box::new(subq))
} else {
Expression::Table(Box::new(self.parse_table_ref()?))
};
// Parse optional alias for source (if not already parsed for subquery)
if matches!(&using, Expression::Table(_)) {
if self.match_token(TokenType::As) {
if let Some(alias_expr) = self.parse_id_var()? {
if let Expression::Identifier(ident) = alias_expr {
using = Expression::Alias(Box::new(Alias {
this: using,
alias: ident,
column_aliases: Vec::new(),
pre_alias_comments: Vec::new(),
trailing_comments: Vec::new(),
inferred_type: None,
}));
}
}
} else if !self.check(TokenType::On) {
// Try to parse alias without AS keyword
// Use parse_id_var to handle both Identifier and Var tokens (e.g., S, T)
if let Some(alias_expr) = self.parse_id_var()? {
if let Expression::Identifier(ident) = alias_expr {
using = Expression::Alias(Box::new(Alias {
this: using,
alias: ident,
column_aliases: Vec::new(),
pre_alias_comments: Vec::new(),
trailing_comments: Vec::new(),
inferred_type: None,
}));
}
}
}
}
// ON clause with condition
let on = if self.match_token(TokenType::On) {
Some(Box::new(self.parse_expression()?))
} else {
None
};
// Optional additional USING clause for key columns (DuckDB: USING (col1, col2))
let using_cond = if self.match_token(TokenType::Using) {
// Parse comma-separated identifiers wrapped in parentheses
if self.match_token(TokenType::LParen) {
let mut idents = Vec::new();
loop {
// Use parse_id_var to handle Var tokens (unquoted identifiers)
if let Some(ident) = self.parse_id_var()? {
idents.push(ident);
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.match_token(TokenType::RParen);
if !idents.is_empty() {
Some(Box::new(Expression::Tuple(Box::new(Tuple {
expressions: idents,
}))))
} else {
None
}
} else {
// Also support without parentheses for backwards compatibility
let mut idents = Vec::new();
loop {
if let Some(ident) = self.parse_id_var()? {
idents.push(ident);
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
if !idents.is_empty() {
Some(Box::new(Expression::Tuple(Box::new(Tuple {
expressions: idents,
}))))
} else {
None
}
}
} else {
None
};
// Parse WHEN MATCHED clauses
let whens = self.parse_when_matched_clauses()?;
// Parse optional RETURNING clause (PostgreSQL) or OUTPUT clause (TSQL)
let returning = if let Some(ret) = self.parse_returning()? {
Some(ret)
} else if self.match_token(TokenType::Output) {
// TSQL OUTPUT clause: OUTPUT $action, Inserted.col, Deleted.col [INTO target]
let output = self.parse_output_clause()?;
Some(Expression::Returning(Box::new(Returning {
expressions: output.columns,
into: output.into_table.map(Box::new),
})))
} else {
None
};
Ok(Some(Expression::Merge(Box::new(Merge {
this: Box::new(target),
using: Box::new(using),
on,
using_cond,
whens: whens.map(Box::new),
with_: None,
returning: returning.map(Box::new),
}))))
}
/// Parse multiple WHEN [NOT] MATCHED clauses for MERGE
fn parse_when_matched_clauses(&mut self) -> Result<Option<Expression>> {
let mut whens = Vec::new();
while self.match_token(TokenType::When) {
// Check for NOT MATCHED
let matched = !self.match_token(TokenType::Not);
self.match_text_seq(&["MATCHED"]);
// Check for BY TARGET or BY SOURCE
let source = if self.match_text_seq(&["BY", "TARGET"]) {
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: false,
})))
} else if self.match_text_seq(&["BY", "SOURCE"]) {
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
})))
} else {
None
};
// Optional AND condition
let condition = if self.match_token(TokenType::And) {
Some(Box::new(self.parse_expression()?))
} else {
None
};
// THEN action
if !self.match_token(TokenType::Then) {
return Err(self.parse_error("Expected THEN in WHEN clause"));
}
// Parse the action: INSERT, UPDATE, DELETE, or other keywords (DO NOTHING, etc.)
let then: Expression = if self.match_token(TokenType::Insert) {
// INSERT action - use Tuple to represent it
let mut elements = vec![Expression::Var(Box::new(Var {
this: "INSERT".to_string(),
}))];
// Spark/Databricks: INSERT * (insert all columns)
if self.match_token(TokenType::Star) {
elements.push(Expression::Star(crate::expressions::Star {
table: None,
except: None,
replace: None,
rename: None,
trailing_comments: Vec::new(),
span: None,
}));
} else
// Parse column list (optional)
if self.match_token(TokenType::LParen) {
let mut columns: Vec<Expression> = Vec::new();
loop {
if let Some(col) = self.parse_id_var()? {
// Handle qualified column references (e.g., target.a)
let col = if self.match_token(TokenType::Dot) {
if let Expression::Identifier(table_ident) = col {
if let Some(col_expr) = self.parse_id_var()? {
if let Expression::Identifier(col_ident) = col_expr {
Expression::boxed_column(Column {
name: col_ident,
table: Some(table_ident),
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
})
} else {
col_expr
}
} else {
return Err(self.parse_error(
"Expected column name after dot in MERGE INSERT",
));
}
} else {
col
}
} else {
col
};
columns.push(col);
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.match_token(TokenType::RParen);
if !columns.is_empty() {
elements.push(Expression::Tuple(Box::new(Tuple {
expressions: columns,
})));
}
}
// Parse VALUES clause
if self.match_text_seq(&["VALUES"]) {
if let Some(values) = self.parse_value()? {
elements.push(values);
}
} else if self.match_text_seq(&["ROW"]) {
elements.push(Expression::Var(Box::new(Var {
this: "ROW".to_string(),
})));
}
if self.match_token(TokenType::Where) {
elements.push(Expression::Where(Box::new(crate::expressions::Where {
this: self.parse_expression()?,
})));
}
if elements.len() == 1 {
elements[0].clone()
} else {
Expression::Tuple(Box::new(Tuple {
expressions: elements,
}))
}
} else if self.match_token(TokenType::Update) {
// UPDATE action - use Tuple to represent SET assignments
let mut elements = vec![Expression::Var(Box::new(Var {
this: "UPDATE".to_string(),
}))];
// Spark/Databricks: UPDATE * (update all columns)
if self.match_token(TokenType::Star) {
elements.push(Expression::Star(crate::expressions::Star {
table: None,
except: None,
replace: None,
rename: None,
trailing_comments: Vec::new(),
span: None,
}));
} else if self.match_token(TokenType::Set) {
// Spark/Databricks: UPDATE SET * (update all columns)
if self.match_token(TokenType::Star) {
elements.push(Expression::Star(crate::expressions::Star {
table: None,
except: None,
replace: None,
rename: None,
trailing_comments: Vec::new(),
span: None,
}));
} else {
// Parse col = value assignments manually
let mut assignments: Vec<Expression> = Vec::new();
loop {
// Parse: column = expression (column can be qualified like x.a)
if let Some(col) = self.parse_id_var()? {
// Handle qualified column references (e.g., x.a = y.b)
let col = if self.match_token(TokenType::Dot) {
// We have a qualified column reference
if let Expression::Identifier(table_ident) = col {
// Parse the column part after the dot
if let Some(col_expr) = self.parse_id_var()? {
if let Expression::Identifier(col_ident) = col_expr {
Expression::boxed_column(Column {
name: col_ident,
table: Some(table_ident),
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
})
} else {
col_expr
}
} else {
return Err(
self.parse_error("Expected column name after dot")
);
}
} else {
col
}
} else {
col
};
if self.match_token(TokenType::Eq) {
let value = self.parse_expression()?;
// Create assignment as EQ expression
let assignment = Expression::Eq(Box::new(BinaryOp {
left: col,
right: value,
left_comments: Vec::new(),
operator_comments: Vec::new(),
trailing_comments: Vec::new(),
inferred_type: None,
}));
assignments.push(assignment);
}
}
if !self.match_token(TokenType::Comma) {
break;
}
}
if !assignments.is_empty() {
elements.push(Expression::Tuple(Box::new(Tuple {
expressions: assignments,
})));
}
}
}
if self.match_token(TokenType::Where) {
elements.push(Expression::Where(Box::new(crate::expressions::Where {
this: self.parse_expression()?,
})));
}
if elements.len() == 1 {
elements[0].clone()
} else {
Expression::Tuple(Box::new(Tuple {
expressions: elements,
}))
}
} else if self.match_token(TokenType::Delete) {
// DELETE action
let mut elements = vec![Expression::Var(Box::new(Var {
this: "DELETE".to_string(),
}))];
if self.match_token(TokenType::Where) {
elements.push(Expression::Where(Box::new(crate::expressions::Where {
this: self.parse_expression()?,
})));
}
if elements.len() == 1 {
elements[0].clone()
} else {
Expression::Tuple(Box::new(Tuple {
expressions: elements,
}))
}
} else if self.match_identifier("DO") {
// DO NOTHING action (PostgreSQL)
if self.match_identifier("NOTHING") {
Expression::Var(Box::new(Var {
this: "DO NOTHING".to_string(),
}))
} else {
return Err(self.parse_error("Expected NOTHING after DO"));
}
} else {
// Other action
if let Some(var) = self.parse_var()? {
var
} else {
return Err(
self.parse_error("Expected INSERT, UPDATE, DELETE, or action keyword")
);
}
};
whens.push(Expression::When(Box::new(When {
matched: Some(Box::new(Expression::Boolean(BooleanLiteral {
value: matched,
}))),
source,
condition,
then: Box::new(then),
})));
}
if whens.is_empty() {
Ok(None)
} else {
Ok(Some(Expression::Whens(Box::new(Whens {
expressions: whens,
}))))
}
}
/// parse_mergeblockratio - Parses MERGEBLOCKRATIO property (Teradata)
/// Python: _parse_mergeblockratio
/// Format: MERGEBLOCKRATIO = number [PERCENT] or NO MERGEBLOCKRATIO or DEFAULT MERGEBLOCKRATIO
pub fn parse_mergeblockratio(&mut self) -> Result<Option<Expression>> {
self.parse_mergeblockratio_impl(false, false)
}
/// Implementation of parse_mergeblockratio with options
pub fn parse_mergeblockratio_impl(
&mut self,
no: bool,
default: bool,
) -> Result<Option<Expression>> {
// Check for = followed by a number
if self.match_token(TokenType::Eq) {
let this = self.parse_number()?;
let percent = self.match_token(TokenType::Percent);
Ok(Some(Expression::MergeBlockRatioProperty(Box::new(
MergeBlockRatioProperty {
this: this.map(Box::new),
no: None,
default: None,
percent: if percent {
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
})))
} else {
None
},
},
))))
} else {
// NO or DEFAULT variant
Ok(Some(Expression::MergeBlockRatioProperty(Box::new(
MergeBlockRatioProperty {
this: None,
no: if no {
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
})))
} else {
None
},
default: if default {
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
})))
} else {
None
},
percent: None,
},
))))
}
}
/// parse_modifies_property - Implemented from Python _parse_modifies_property
#[allow(unused_variables, unused_mut)]
pub fn parse_modifies_property(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["SQL", "DATA"]) {
// Matched: SQL DATA
return Ok(None);
}
Ok(None)
}
/// parse_multitable_inserts - Parses Oracle's multi-table INSERT (INSERT ALL/FIRST)
/// Python: _parse_multitable_inserts
/// Syntax: INSERT ALL|FIRST [WHEN cond THEN] INTO table [(cols)] [VALUES(...)] ... SELECT ...
pub fn parse_multitable_inserts(
&mut self,
leading_comments: Vec<String>,
overwrite: bool,
) -> Result<Option<Expression>> {
// Get kind from previous token (ALL or FIRST)
let kind = self.previous().text.to_ascii_uppercase();
let mut expressions = Vec::new();
// Helper closure to parse a single conditional insert
// Returns None when no more INTO clauses found
loop {
// Check for WHEN condition
let condition = if self.match_token(TokenType::When) {
let cond = self.parse_or()?;
self.match_token(TokenType::Then);
Some(cond)
} else {
None
};
// Check for ELSE (used in INSERT FIRST ... ELSE INTO ...)
let is_else = self.match_token(TokenType::Else);
// Must have INTO keyword to continue
if !self.match_token(TokenType::Into) {
break;
}
// Parse table with optional schema (using parse_table_parts for proper schema.table parsing)
let table_expr = self.parse_table_parts()?;
// Extract TableRef from the table expression
let table_ref = if let Some(Expression::Table(t)) = table_expr {
*t
} else {
// Fallback: create empty table ref (shouldn't happen)
TableRef::new("")
};
// Parse optional column list: (col1, col2, ...)
let columns = if self.match_token(TokenType::LParen) {
let cols = self.parse_identifier_list()?;
self.expect(TokenType::RParen)?;
cols
} else {
Vec::new()
};
// Parse optional VALUES clause
let values = if self.match_token(TokenType::Values) {
self.expect(TokenType::LParen)?;
let row = self.parse_expression_list()?;
self.expect(TokenType::RParen)?;
vec![row]
} else {
Vec::new()
};
// Create Insert expression for this INTO clause
let insert_expr = Expression::Insert(Box::new(Insert {
table: table_ref,
columns,
values,
query: None,
overwrite: false,
partition: Vec::new(),
directory: None,
returning: Vec::new(),
output: None,
on_conflict: None,
leading_comments: Vec::new(),
if_exists: false,
with: None,
ignore: false,
source_alias: None,
alias: None,
alias_explicit_as: false,
default_values: false,
by_name: false,
conflict_action: None,
is_replace: false,
replace_where: None,
source: None,
hint: None,
function_target: None,
partition_by: None,
settings: Vec::new(),
}));
// Wrap in ConditionalInsert
let conditional_insert = Expression::ConditionalInsert(Box::new(ConditionalInsert {
this: Box::new(insert_expr),
expression: condition.map(Box::new),
else_: if is_else {
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
})))
} else {
None
},
}));
expressions.push(conditional_insert);
}
// Parse the source SELECT statement (or subquery)
let source = self.parse_statement()?;
Ok(Some(Expression::MultitableInserts(Box::new(
MultitableInserts {
kind,
expressions,
source: Some(Box::new(source)),
leading_comments,
overwrite,
},
))))
}
/// parse_name_as_expression - Parse identifier that can be aliased
/// Parses: identifier [AS expression]
#[allow(unused_variables, unused_mut)]
pub fn parse_name_as_expression(&mut self) -> Result<Option<Expression>> {
// Parse the identifier
let this = self.parse_id_var()?;
if this.is_none() {
return Ok(None);
}
// Check for AS alias
if self.match_token(TokenType::Alias) {
let expression = self.parse_disjunction()?;
if expression.is_none() {
return Ok(this);
}
// Extract the identifier for the alias
let alias_ident =
match this.ok_or_else(|| self.parse_error("Expected identifier for alias"))? {
Expression::Identifier(id) => id,
_ => Identifier::new(String::new()),
};
return Ok(Some(Expression::Alias(Box::new(Alias {
this: expression.ok_or_else(|| self.parse_error("Expected expression after AS"))?,
alias: alias_ident,
column_aliases: Vec::new(),
pre_alias_comments: Vec::new(),
trailing_comments: Vec::new(),
inferred_type: None,
}))));
}
Ok(this)
}
/// parse_named_window - Ported from Python _parse_named_window
/// Parses a named window definition: name AS (spec)
#[allow(unused_variables, unused_mut)]
pub fn parse_named_window(&mut self) -> Result<Option<Expression>> {
// Parse window name
let name = self.parse_id_var()?;
if name.is_none() {
return Ok(None);
}
// Expect AS
if !self.match_token(TokenType::As) {
return Ok(name); // Just the name, no spec
}
// Parse window spec (parenthesized)
self.expect(TokenType::LParen)?;
let spec = self.parse_window_spec_inner()?;
self.expect(TokenType::RParen)?;
if let (Some(name_expr), Some(spec_expr)) = (name, spec) {
// Create an Alias expression wrapping the spec with the name
let alias_ident = if let Expression::Identifier(id) = name_expr {
id
} else {
Identifier::new("window")
};
Ok(Some(Expression::Alias(Box::new(Alias {
this: spec_expr,
alias: alias_ident,
column_aliases: Vec::new(),
pre_alias_comments: Vec::new(),
trailing_comments: Vec::new(),
inferred_type: None,
}))))
} else {
Ok(None)
}
}
/// parse_next_value_for - Parses NEXT VALUE FOR sequence_name
/// Python: parser.py:6752-6761
#[allow(unused_variables, unused_mut)]
pub fn parse_next_value_for(&mut self) -> Result<Option<Expression>> {
if !self.match_text_seq(&["VALUE", "FOR"]) {
// Retreat if we consumed a token
if self.current > 0 {
self.current -= 1;
}
return Ok(None);
}
// Parse the sequence name as a dotted identifier (db.schema.sequence_name)
// Manually parse identifier parts separated by dots
let first = self
.parse_id_var()?
.ok_or_else(|| self.parse_error("Expected sequence name after NEXT VALUE FOR"))?;
let first_id = match first {
Expression::Identifier(id) => id,
Expression::Var(v) => Identifier {
name: v.this,
quoted: false,
trailing_comments: Vec::new(),
span: None,
},
_ => Identifier {
name: String::new(),
quoted: false,
trailing_comments: Vec::new(),
span: None,
},
};
// Check for dotted parts (db.schema.sequence_name)
let mut parts = vec![first_id];
while self.match_token(TokenType::Dot) {
if self.is_identifier_or_keyword_token() {
let token = self.advance();
parts.push(Identifier {
name: token.text,
quoted: token.token_type == TokenType::QuotedIdentifier,
trailing_comments: Vec::new(),
span: None,
});
} else {
break;
}
}
// Build a Column expression from the parts
let this = if parts.len() == 1 {
Expression::boxed_column(Column {
name: parts.remove(0),
table: None,
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
})
} else if parts.len() == 2 {
Expression::boxed_column(Column {
name: parts.remove(1),
table: Some(parts.remove(0)),
join_mark: false,
trailing_comments: Vec::new(),
span: None,
inferred_type: None,
})
} else {
// For 3+ parts, build nested Dot expressions
let mut expr = Expression::Identifier(parts.remove(0));
for part in parts.drain(..) {
expr = Expression::Dot(Box::new(DotAccess {
this: expr,
field: part,
}));
}
expr
};
// Parse optional OVER (ORDER BY ...) clause
let order = if self.match_token(TokenType::Over) {
if self.match_token(TokenType::LParen) {
let ord = self.parse_order()?;
self.expect(TokenType::RParen)?;
ord.map(Box::new)
} else {
None
}
} else {
None
};
Ok(Some(Expression::NextValueFor(Box::new(NextValueFor {
this: Box::new(this),
order,
}))))
}
/// parse_no_property - Implemented from Python _parse_no_property
#[allow(unused_variables, unused_mut)]
pub fn parse_no_property(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["PRIMARY", "INDEX"]) {
// Matched: PRIMARY INDEX
return Ok(None);
}
if self.match_text_seq(&["SQL"]) {
// Matched: SQL
return Ok(None);
}
Ok(None)
}
/// parse_normalize - Ported from Python _parse_normalize
#[allow(unused_variables, unused_mut)]
/// parse_normalize - Parses NORMALIZE(expr [, form])
/// Python: NORMALIZE(expr, form) where form is NFC/NFD/NFKC/NFKD
pub fn parse_normalize(&mut self) -> Result<Option<Expression>> {
// Parse the expression to normalize
let this = self.parse_expression()?;
// Check for optional form argument
let form = if self.match_token(TokenType::Comma) {
self.parse_var()?.map(Box::new)
} else {
None
};
Ok(Some(Expression::Normalize(Box::new(Normalize {
this: Box::new(this),
form,
is_casefold: None,
}))))
}
/// parse_not_constraint - Implemented from Python _parse_not_constraint
/// Parses constraints that start with NOT: NOT NULL, NOT CASESPECIFIC
pub fn parse_not_constraint(&mut self) -> Result<Option<Expression>> {
// NOT NULL constraint
if self.match_text_seq(&["NULL"]) {
return Ok(Some(Expression::NotNullColumnConstraint(Box::new(
NotNullColumnConstraint { allow_null: None },
))));
}
// NOT CASESPECIFIC constraint (Teradata)
if self.match_text_seq(&["CASESPECIFIC"]) {
return Ok(Some(Expression::CaseSpecificColumnConstraint(Box::new(
CaseSpecificColumnConstraint {
not_: Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
}))),
},
))));
}
// NOT FOR REPLICATION (SQL Server) - consume the tokens and return as a property
if self.match_token(TokenType::For) && self.match_identifier("REPLICATION") {
return Ok(Some(Expression::Property(Box::new(
crate::expressions::Property {
this: Box::new(Expression::Identifier(Identifier::new(
"NOT FOR REPLICATION".to_string(),
))),
value: None,
},
))));
}
Ok(None)
}
/// parse_null - Parse NULL literal
/// Python: if self._match_set((TokenType.NULL, TokenType.UNKNOWN)): return exp.Null
pub fn parse_null(&mut self) -> Result<Option<Expression>> {
if self.match_token(TokenType::Null) {
return Ok(Some(Expression::Null(Null)));
}
// UNKNOWN is treated as NULL in some dialects
if self.match_token(TokenType::Unknown) {
return Ok(Some(Expression::Null(Null)));
}
Ok(None)
}
/// parse_number - Parse numeric literal
/// Python: TokenType.NUMBER -> exp.Literal(this=token.text, is_string=False)
/// Handles Hive/Spark numeric suffixes encoded as "number::TYPE" by the tokenizer
pub fn parse_number(&mut self) -> Result<Option<Expression>> {
if self.match_token(TokenType::Number) {
let text = self.previous().text.clone();
// Check for numeric literal suffix encoded as "number::TYPE"
if let Some(sep_pos) = text.find("::") {
let num_part = &text[..sep_pos];
let type_name = &text[sep_pos + 2..];
// Create a TryCast expression: TRY_CAST(number AS TYPE)
let num_expr = Expression::Literal(Box::new(Literal::Number(num_part.to_string())));
let data_type = match type_name {
"BIGINT" => crate::expressions::DataType::BigInt { length: None },
"SMALLINT" => crate::expressions::DataType::SmallInt { length: None },
"TINYINT" => crate::expressions::DataType::TinyInt { length: None },
"DOUBLE" => crate::expressions::DataType::Double {
precision: None,
scale: None,
},
"FLOAT" => crate::expressions::DataType::Float {
precision: None,
scale: None,
real_spelling: false,
},
"DECIMAL" => crate::expressions::DataType::Decimal {
precision: None,
scale: None,
},
_ => crate::expressions::DataType::Custom {
name: type_name.to_string(),
},
};
return Ok(Some(Expression::TryCast(Box::new(
crate::expressions::Cast {
this: num_expr,
to: data_type,
trailing_comments: Vec::new(),
double_colon_syntax: false,
format: None,
default: None,
inferred_type: None,
},
))));
}
return Ok(Some(Expression::Literal(Box::new(Literal::Number(text)))));
}
Ok(None)
}
/// parse_odbc_datetime_literal - Ported from Python _parse_odbc_datetime_literal
#[allow(unused_variables, unused_mut)]
/// parse_odbc_datetime_literal - Parses ODBC datetime literals
/// Examples: {d'2023-01-01'}, {t'12:00:00'}, {ts'2023-01-01 12:00:00'}
pub fn parse_odbc_datetime_literal(&mut self) -> Result<Option<Expression>> {
// Match the type indicator (d, t, ts)
if !self.match_token(TokenType::Var) {
return Ok(None);
}
let type_indicator = self.previous().text.to_lowercase();
// Parse the string value
let value = self.parse_string()?;
if value.is_none() {
return Ok(None);
}
// Expect closing brace
self.expect(TokenType::RBrace)?;
// Return appropriate expression based on type
let value = value
.ok_or_else(|| self.parse_error("Expected string value in ODBC datetime literal"))?;
match type_indicator.as_str() {
"d" => Ok(Some(Expression::Date(Box::new(UnaryFunc::new(value))))),
"t" => Ok(Some(Expression::Time(Box::new(UnaryFunc::new(value))))),
"ts" => Ok(Some(Expression::Timestamp(Box::new(TimestampFunc {
this: Some(Box::new(value)),
zone: None,
with_tz: None,
safe: None,
})))),
_ => Ok(Some(value)),
}
}
/// parse_offset - Parse OFFSET clause
/// Python: if self._match(TokenType.OFFSET): return exp.Offset(this=self._parse_term())
pub fn parse_offset(&mut self) -> Result<Option<Expression>> {
if !self.match_token(TokenType::Offset) {
return Ok(None);
}
// Parse the offset expression (usually a number)
let offset_expr = self.parse_expression()?;
Ok(Some(Expression::Offset(Box::new(Offset {
this: offset_expr,
rows: None,
}))))
}
/// parse_on_condition - Ported from Python _parse_on_condition
#[allow(unused_variables, unused_mut)]
/// parse_on_condition - Parses ON EMPTY/ERROR/NULL conditions
/// Example: NULL ON EMPTY, ERROR ON ERROR
pub fn parse_on_condition(&mut self) -> Result<Option<Expression>> {
// Parse ON EMPTY
let empty = if self.match_text_seq(&["NULL", "ON", "EMPTY"]) {
Some(Box::new(Expression::Identifier(Identifier::new(
"NULL".to_string(),
))))
} else if self.match_text_seq(&["ERROR", "ON", "EMPTY"]) {
Some(Box::new(Expression::Identifier(Identifier::new(
"ERROR".to_string(),
))))
} else if self.match_text_seq(&["DEFAULT"]) {
let default_val = self.parse_expression()?;
if self.match_text_seq(&["ON", "EMPTY"]) {
Some(Box::new(default_val))
} else {
None
}
} else {
None
};
// Parse ON ERROR
let error = if self.match_text_seq(&["NULL", "ON", "ERROR"]) {
Some(Box::new(Expression::Identifier(Identifier::new(
"NULL".to_string(),
))))
} else if self.match_text_seq(&["ERROR", "ON", "ERROR"]) {
Some(Box::new(Expression::Identifier(Identifier::new(
"ERROR".to_string(),
))))
} else if self.match_text_seq(&["DEFAULT"]) {
let default_val = self.parse_expression()?;
if self.match_text_seq(&["ON", "ERROR"]) {
Some(Box::new(default_val))
} else {
None
}
} else {
None
};
// Parse ON NULL
let null = if self.match_text_seq(&["NULL", "ON", "NULL"]) {
Some(Box::new(Expression::Identifier(Identifier::new(
"NULL".to_string(),
))))
} else {
None
};
if empty.is_none() && error.is_none() && null.is_none() {
return Ok(None);
}
Ok(Some(Expression::OnCondition(Box::new(OnCondition {
empty,
error,
null,
}))))
}
/// parse_on_handling - Implemented from Python _parse_on_handling
/// Calls: parse_bitwise
#[allow(unused_variables, unused_mut)]
pub fn parse_on_handling(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["ON"]) {
// Matched: ON
return Ok(None);
}
if self.match_text_seq(&["ON"]) {
// Matched: ON
return Ok(None);
}
Ok(None)
}
/// parse_on_property - Implemented from Python _parse_on_property
#[allow(unused_variables, unused_mut)]
pub fn parse_on_property(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["COMMIT", "PRESERVE", "ROWS"]) {
return Ok(Some(Expression::OnCommitProperty(Box::new(
OnCommitProperty { delete: None },
))));
}
if self.match_text_seq(&["COMMIT", "DELETE", "ROWS"]) {
// Matched: COMMIT DELETE ROWS
return Ok(None);
}
Ok(None)
}
/// parse_opclass - Ported from Python _parse_opclass
#[allow(unused_variables, unused_mut)]
/// parse_opclass - Parses PostgreSQL operator class in index expressions
/// Example: column_name text_pattern_ops
pub fn parse_opclass(&mut self) -> Result<Option<Expression>> {
// Parse the expression first
let this = self.parse_expression()?;
// Check for keywords that would indicate this is not an opclass
// (e.g., ASC, DESC, NULLS, etc.)
if self.check(TokenType::Asc)
|| self.check(TokenType::Desc)
|| self.check(TokenType::Nulls)
|| self.check(TokenType::Comma)
|| self.check(TokenType::RParen)
{
return Ok(Some(this));
}
// Try to parse an operator class name (table parts)
if let Some(opclass_name) = self.parse_table()? {
return Ok(Some(Expression::Opclass(Box::new(Opclass {
this: Box::new(this),
expression: Box::new(opclass_name),
}))));
}
Ok(Some(this))
}
/// parse_open_json - Parses SQL Server OPENJSON function
/// Example: OPENJSON(json, '$.path') WITH (col1 type '$.path' AS JSON, ...)
pub fn parse_open_json(&mut self) -> Result<Option<Expression>> {
// Parse the JSON expression
let this = self.parse_expression()?;
// Parse optional path
let path = if self.match_token(TokenType::Comma) {
self.parse_string()?.map(Box::new)
} else {
None
};
// Check for closing paren and WITH clause
let expressions = if self.match_token(TokenType::RParen)
&& self.match_token(TokenType::With)
{
self.expect(TokenType::LParen)?;
let mut cols = Vec::new();
loop {
// Parse column definition: name type 'path' [AS JSON]
let col_name = self.parse_field()?;
if col_name.is_none() {
break;
}
let col_type = self.parse_data_type()?;
let col_path = self.parse_string()?.map(Box::new);
let as_json = if self.match_token(TokenType::As) && self.match_identifier("JSON") {
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
})))
} else {
None
};
cols.push(Expression::OpenJSONColumnDef(Box::new(OpenJSONColumnDef {
this: Box::new(col_name.ok_or_else(|| {
self.parse_error("Expected column name in OPENJSON WITH clause")
})?),
kind: String::new(), // kept for backwards compat, use data_type instead
path: col_path,
as_json,
data_type: Some(col_type),
})));
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
cols
} else {
Vec::new()
};
Ok(Some(Expression::OpenJSON(Box::new(OpenJSON {
this: Box::new(this),
path,
expressions,
}))))
}
/// parse_operator - Ported from Python _parse_operator
#[allow(unused_variables, unused_mut)]
/// parse_operator - Parses PostgreSQL OPERATOR(op) syntax
/// Example: col1 OPERATOR(~>) col2
pub fn parse_operator(&mut self, this: Option<Expression>) -> Result<Option<Expression>> {
let mut result = this;
// Parse OPERATOR(op) expressions
while self.match_token(TokenType::LParen) {
// Collect the operator text between parens
let mut op_text = String::new();
while !self.check(TokenType::RParen) && !self.is_at_end() {
op_text.push_str(&self.peek().text);
self.skip();
}
self.expect(TokenType::RParen)?;
// Parse the right-hand side expression
let rhs = self.parse_expression()?;
result = Some(Expression::Operator(Box::new(Operator {
this: Box::new(result.unwrap_or_else(|| Expression::Null(Null))),
operator: Some(Box::new(Expression::Identifier(Identifier::new(op_text)))),
expression: Box::new(rhs),
comments: Vec::new(),
})));
// Check if there's another OPERATOR keyword
if !self.match_token(TokenType::Operator) {
break;
}
}
Ok(result)
}
/// parse_order - Parse ORDER BY clause
/// Python: if not self._match(TokenType.ORDER_BY): return this; return exp.Order(expressions=self._parse_csv(self._parse_ordered))
pub fn parse_order(&mut self) -> Result<Option<Expression>> {
if !self.match_token(TokenType::Order) {
return Ok(None);
}
// Consume BY if present
self.match_token(TokenType::By);
// Parse comma-separated ordered expressions
let mut expressions = Vec::new();
loop {
if let Some(ordered) = self.parse_ordered_item()? {
expressions.push(ordered);
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
Ok(Some(Expression::OrderBy(Box::new(OrderBy {
expressions,
siblings: false,
comments: Vec::new(),
}))))
}
/// parse_ordered_item - Parse a single ORDER BY item (expr [ASC|DESC] [NULLS FIRST|LAST])
fn parse_ordered_item(&mut self) -> Result<Option<Ordered>> {
// Parse the expression to order by
let expr = match self.parse_expression() {
Ok(e) => e,
Err(_) => return Ok(None),
};
// Check for ASC/DESC
let mut desc = false;
let mut explicit_asc = false;
if self.match_token(TokenType::Asc) {
explicit_asc = true;
} else if self.match_token(TokenType::Desc) {
desc = true;
}
// Check for NULLS FIRST/LAST
let nulls_first = if self.match_text_seq(&["NULLS", "FIRST"]) {
Some(true)
} else if self.match_text_seq(&["NULLS", "LAST"]) {
Some(false)
} else {
None
};
// Parse optional WITH FILL clause (ClickHouse)
let with_fill = if self.match_text_seq(&["WITH", "FILL"]) {
let from_ = if self.match_token(TokenType::From) {
Some(Box::new(self.parse_or()?))
} else {
None
};
let to = if self.match_text_seq(&["TO"]) {
Some(Box::new(self.parse_or()?))
} else {
None
};
let step = if self.match_text_seq(&["STEP"]) {
Some(Box::new(self.parse_or()?))
} else {
None
};
let staleness = if self.match_text_seq(&["STALENESS"]) {
Some(Box::new(self.parse_or()?))
} else {
None
};
let interpolate = if self.match_text_seq(&["INTERPOLATE"]) {
if self.match_token(TokenType::LParen) {
let exprs = self.parse_expression_list()?;
self.expect(TokenType::RParen)?;
if exprs.len() == 1 {
Some(Box::new(exprs.into_iter().next().unwrap()))
} else {
Some(Box::new(Expression::Tuple(Box::new(
crate::expressions::Tuple { expressions: exprs },
))))
}
} else {
None
}
} else {
None
};
Some(Box::new(WithFill {
from_,
to,
step,
staleness,
interpolate,
}))
} else {
None
};
Ok(Some(Ordered {
this: expr,
desc,
nulls_first,
explicit_asc,
with_fill,
}))
}
/// parse_ordered - Implemented from Python _parse_ordered (wrapper for parse_ordered_item)
#[allow(unused_variables, unused_mut)]
pub fn parse_ordered(&mut self) -> Result<Option<Expression>> {
if let Some(ordered) = self.parse_ordered_item()? {
return Ok(Some(Expression::Ordered(Box::new(ordered))));
}
if self.match_text_seq(&["NULLS", "FIRST"]) {
return Ok(Some(Expression::WithFill(Box::new(WithFill {
from_: None,
to: None,
step: None,
staleness: None,
interpolate: None,
}))));
}
if self.match_text_seq(&["NULLS", "LAST"]) {
// Matched: NULLS LAST
return Ok(None);
}
if self.match_text_seq(&["WITH", "FILL"]) {
// Matched: WITH FILL
return Ok(None);
}
Ok(None)
}
/// parse_overlay - Ported from Python _parse_overlay
/// Parses OVERLAY function: OVERLAY(string PLACING replacement FROM position [FOR length])
#[allow(unused_variables, unused_mut)]
pub fn parse_overlay(&mut self) -> Result<Option<Expression>> {
// Parse the string to be modified
let this = match self.parse_bitwise() {
Ok(Some(expr)) => expr,
Ok(None) => return Ok(None),
Err(e) => return Err(e),
};
// Parse PLACING replacement (or comma then replacement)
let replacement = if self.match_text_seq(&["PLACING"]) || self.match_token(TokenType::Comma)
{
match self.parse_bitwise() {
Ok(Some(expr)) => expr,
Ok(None) => {
return Err(self.parse_error("Expected replacement expression in OVERLAY"))
}
Err(e) => return Err(e),
}
} else {
return Err(self.parse_error("Expected PLACING in OVERLAY function"));
};
// Parse FROM position (or comma then position)
let from = if self.match_token(TokenType::From) || self.match_token(TokenType::Comma) {
match self.parse_bitwise() {
Ok(Some(expr)) => expr,
Ok(None) => return Err(self.parse_error("Expected position expression in OVERLAY")),
Err(e) => return Err(e),
}
} else {
return Err(self.parse_error("Expected FROM in OVERLAY function"));
};
// Parse optional FOR length (or comma then length)
let length = if self.match_token(TokenType::For) || self.match_token(TokenType::Comma) {
match self.parse_bitwise() {
Ok(Some(expr)) => Some(expr),
Ok(None) => None,
Err(_) => None,
}
} else {
None
};
Ok(Some(Expression::Overlay(Box::new(OverlayFunc {
this,
replacement,
from,
length,
}))))
}
/// parse_parameter - Parse named parameter (@name or :name)
/// Python: this = self._parse_identifier() or self._parse_primary_or_var(); return exp.Parameter(this=this)
pub fn parse_parameter(&mut self) -> Result<Option<Expression>> {
// Check for parameter token types
if self.match_token(TokenType::Parameter) {
let text = self.previous().text.clone();
return Ok(Some(Expression::Parameter(Box::new(Parameter {
name: Some(text),
index: None,
style: ParameterStyle::Colon,
quoted: false,
string_quoted: false,
expression: None,
}))));
}
// Check for session parameter (@@name)
if self.match_token(TokenType::SessionParameter) {
let text = self.previous().text.clone();
return Ok(Some(Expression::SessionParameter(Box::new(
SessionParameter {
this: Box::new(Expression::Identifier(Identifier::new(text))),
kind: None,
},
))));
}
Ok(None)
}
/// parse_paren - Ported from Python _parse_paren
/// Parses parenthesized expressions: (expr), (select ...), or (a, b, c)
#[allow(unused_variables, unused_mut)]
pub fn parse_paren(&mut self) -> Result<Option<Expression>> {
if !self.match_token(TokenType::LParen) {
return Ok(None);
}
// Check for empty tuple ()
if self.match_token(TokenType::RParen) {
return Ok(Some(Expression::Tuple(Box::new(Tuple {
expressions: Vec::new(),
}))));
}
// Try to parse as subquery first
// ClickHouse also allows (EXPLAIN ...) as subquery
if self.check(TokenType::Select)
|| self.check(TokenType::With)
|| (matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.check(TokenType::Var)
&& self.peek().text.eq_ignore_ascii_case("EXPLAIN"))
{
let query = self.parse_statement()?;
self.expect(TokenType::RParen)?;
return Ok(Some(Expression::Subquery(Box::new(Subquery {
this: query,
alias: None,
column_aliases: Vec::new(),
order_by: None,
limit: None,
offset: None,
lateral: false,
modifiers_inside: true,
trailing_comments: Vec::new(),
distribute_by: None,
sort_by: None,
cluster_by: None,
inferred_type: None,
}))));
}
// Parse comma-separated expressions
let mut expressions = Vec::new();
let mut trailing_comma = false;
loop {
match self.parse_expression() {
Ok(expr) => expressions.push(expr),
Err(_) => break,
}
if !self.match_token(TokenType::Comma) {
break;
}
// ClickHouse: trailing comma makes a single-element tuple, e.g., (1,)
if self.check(TokenType::RParen) {
trailing_comma = true;
break;
}
}
self.expect(TokenType::RParen)?;
// Single expression with trailing comma → tuple, e.g., (1,)
if trailing_comma && expressions.len() == 1 {
return Ok(Some(Expression::Tuple(Box::new(Tuple { expressions }))));
}
// Single expression - return the unwrapped Paren
if expressions.len() == 1 {
return Ok(Some(Expression::Paren(Box::new(Paren {
this: expressions.remove(0),
trailing_comments: Vec::new(),
}))));
}
// Multiple expressions - return as tuple
Ok(Some(Expression::Tuple(Box::new(Tuple { expressions }))))
}
/// parse_partition - Parses PARTITION/SUBPARTITION clause
/// Python: _parse_partition
pub fn parse_partition(&mut self) -> Result<Option<Expression>> {
// PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"}
if !self.match_texts(&["PARTITION", "SUBPARTITION"]) {
return Ok(None);
}
let subpartition = self.previous().text.eq_ignore_ascii_case("SUBPARTITION");
// Parse wrapped CSV of disjunction expressions
if !self.match_token(TokenType::LParen) {
// Without parentheses, still return a Partition with empty expressions
return Ok(Some(Expression::Partition(Box::new(Partition {
expressions: Vec::new(),
subpartition,
}))));
}
let mut expressions = Vec::new();
loop {
if let Some(expr) = self.parse_disjunction()? {
expressions.push(expr);
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.match_token(TokenType::RParen);
Ok(Some(Expression::Partition(Box::new(Partition {
expressions,
subpartition,
}))))
}
/// parse_partition_and_order - Delegates to parse_partition_by
#[allow(unused_variables, unused_mut)]
pub fn parse_partition_and_order(&mut self) -> Result<Option<Expression>> {
self.parse_partition_by()
}
/// parse_partition_bound_spec - Implemented from Python _parse_partition_bound_spec
/// Calls: parse_bitwise, parse_number
#[allow(unused_variables, unused_mut)]
pub fn parse_partition_bound_spec_legacy(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["MINVALUE"]) {
return Ok(Some(Expression::PartitionBoundSpec(Box::new(
PartitionBoundSpec {
this: None,
expression: None,
from_expressions: None,
to_expressions: None,
},
))));
}
if self.match_text_seq(&["MAXVALUE"]) {
// Matched: MAXVALUE
return Ok(None);
}
if self.match_text_seq(&["TO"]) {
// Matched: TO
return Ok(None);
}
Ok(None)
}
/// parse_partition_by - Ported from Python _parse_partition_by
/// Parses PARTITION BY expression list
#[allow(unused_variables, unused_mut)]
pub fn parse_partition_by(&mut self) -> Result<Option<Expression>> {
if !self.match_keywords(&[TokenType::Partition, TokenType::By]) {
return Ok(None);
}
let expressions = self.parse_expression_list()?;
Ok(Some(Expression::Tuple(Box::new(Tuple { expressions }))))
}
/// parse_partitioned_by - Parses PARTITIONED BY clause
/// Python: _parse_partitioned_by
pub fn parse_partitioned_by(&mut self) -> Result<Option<Expression>> {
// Optionally match '='
self.match_token(TokenType::Eq);
// Try to parse a schema first
if let Some(schema) = self.parse_schema()? {
return Ok(Some(Expression::PartitionedByProperty(Box::new(
PartitionedByProperty {
this: Box::new(schema),
},
))));
}
// Fall back to bracket(field)
if let Some(bracket) = self.parse_bracket()? {
return Ok(Some(Expression::PartitionedByProperty(Box::new(
PartitionedByProperty {
this: Box::new(bracket),
},
))));
}
// Try to parse a field directly
if let Some(field) = self.parse_field()? {
return Ok(Some(Expression::PartitionedByProperty(Box::new(
PartitionedByProperty {
this: Box::new(field),
},
))));
}
Ok(None)
}
/// parse_partitioned_by_bucket_or_truncate - Parses BUCKET or TRUNCATE partition transforms
/// Python: _parse_partitioned_by_bucket_or_truncate
/// Syntax: BUCKET(col, num_buckets) or TRUNCATE(col, width)
/// Handles both Hive (num, col) and Trino (col, num) ordering, normalizes to (col, num)
pub fn parse_partitioned_by_bucket_or_truncate(&mut self) -> Result<Option<Expression>> {
// If no L_PAREN follows, this should be parsed as an identifier, not a function call
if !self.check(TokenType::LParen) {
// Retreat: go back one token (previous was BUCKET or TRUNCATE)
if self.current > 0 {
self.current -= 1;
}
return Ok(None);
}
// Determine if it's BUCKET or TRUNCATE based on previous token
let is_bucket = self.previous().text.eq_ignore_ascii_case("BUCKET");
// Parse wrapped arguments
self.expect(TokenType::LParen)?;
let mut args = Vec::new();
if !self.check(TokenType::RParen) {
loop {
// Try to parse primary or column
if let Some(expr) = self.parse_primary_or_var()? {
args.push(expr);
} else if let Some(col) = self.parse_column()? {
args.push(col);
}
if !self.match_token(TokenType::Comma) {
break;
}
}
}
self.match_token(TokenType::RParen);
// Get first two arguments
let (mut this, mut expr) = (args.get(0).cloned(), args.get(1).cloned());
// Normalize: if first arg is a Literal, swap (Hive uses (num, col), Trino uses (col, num))
// We canonicalize to (col, num)
if let Some(Expression::Literal(_)) = &this {
std::mem::swap(&mut this, &mut expr);
}
// Ensure we have both arguments
let this_expr = this.unwrap_or(Expression::Null(Null));
let expr_expr = expr.unwrap_or(Expression::Null(Null));
if is_bucket {
Ok(Some(Expression::PartitionedByBucket(Box::new(
PartitionedByBucket {
this: Box::new(this_expr),
expression: Box::new(expr_expr),
},
))))
} else {
Ok(Some(Expression::PartitionByTruncate(Box::new(
PartitionByTruncate {
this: Box::new(this_expr),
expression: Box::new(expr_expr),
},
))))
}
}
/// parse_doris_partition_by_range_or_list - Parses Doris PARTITION BY RANGE/LIST syntax
/// Handles:
/// PARTITION BY RANGE (`col`) (PARTITION name VALUES LESS THAN (val), ...)
/// PARTITION BY RANGE (`col`) (PARTITION name VALUES [(val1), (val2)), ...)
/// PARTITION BY RANGE (`col`) (FROM ('start') TO ('end') INTERVAL n UNIT)
/// PARTITION BY LIST (`col`) (PARTITION name VALUES IN (val1, val2), ...)
fn parse_doris_partition_by_range_or_list(&mut self, kind: &str) -> Result<Expression> {
// Parse partition column expressions: (`col1`, `col2`, ...) or (STR2DATE(col, fmt))
// Use parse_wrapped_csv to handle function calls in partition columns
let partition_expressions = self.parse_wrapped_csv()?;
// Check for partition definitions in parentheses
let create_expressions = if self.check(TokenType::LParen) {
self.skip(); // consume (
if kind == "LIST" {
// Parse LIST partition definitions: PARTITION name VALUES IN (val1, val2), ...
let partitions = self.parse_doris_list_partition_definitions()?;
self.expect(TokenType::RParen)?;
Some(Box::new(Expression::Tuple(Box::new(Tuple {
expressions: partitions,
}))))
} else {
// RANGE: check for FROM (dynamic), START (StarRocks dynamic), or PARTITION (static)
if self.check(TokenType::From) {
// Dynamic: FROM ('start') TO ('end') INTERVAL n UNIT
let dynamic_expr = self.parse_doris_dynamic_partition()?;
self.expect(TokenType::RParen)?;
Some(Box::new(dynamic_expr))
} else if self.check(TokenType::Start) {
// StarRocks dynamic: START ('val') END ('val') EVERY (expr), ...
let mut dynamics = Vec::new();
loop {
if !self.check(TokenType::Start) {
break;
}
let dynamic_expr = self.parse_starrocks_start_end_every()?;
dynamics.push(dynamic_expr);
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
Some(Box::new(Expression::Tuple(Box::new(Tuple {
expressions: dynamics,
}))))
} else if self.check(TokenType::Partition) {
// Static: PARTITION name VALUES LESS THAN (val) or VALUES [(val1), (val2))
let partitions = self.parse_doris_range_partition_definitions()?;
self.expect(TokenType::RParen)?;
Some(Box::new(Expression::Tuple(Box::new(Tuple {
expressions: partitions,
}))))
} else {
self.expect(TokenType::RParen)?;
None
}
}
} else {
None
};
if kind == "LIST" {
Ok(Expression::PartitionByListProperty(Box::new(
PartitionByListProperty {
partition_expressions: partition_expressions.map(Box::new),
create_expressions,
},
)))
} else {
Ok(Expression::PartitionByRangeProperty(Box::new(
PartitionByRangeProperty {
partition_expressions: partition_expressions.map(Box::new),
create_expressions,
},
)))
}
}
/// Parse Doris LIST partition definitions: PARTITION name VALUES IN (val1, val2), ...
fn parse_doris_list_partition_definitions(&mut self) -> Result<Vec<Expression>> {
let mut partitions = Vec::new();
loop {
if !self.match_token(TokenType::Partition) {
break;
}
let name = self.parse_id_var()?.unwrap_or(Expression::Null(Null));
self.match_text_seq(&["VALUES", "IN"]);
let values = self.parse_wrapped_csv_expressions()?;
let part_list = Expression::PartitionList(Box::new(PartitionList {
this: Box::new(name),
expressions: values,
}));
partitions.push(Expression::Partition(Box::new(Partition {
expressions: vec![part_list],
subpartition: false,
})));
if !self.match_token(TokenType::Comma) {
break;
}
}
Ok(partitions)
}
/// Parse Doris RANGE partition definitions
fn parse_doris_range_partition_definitions(&mut self) -> Result<Vec<Expression>> {
let mut partitions = Vec::new();
loop {
if !self.match_token(TokenType::Partition) {
break;
}
let name = self.parse_id_var()?.unwrap_or(Expression::Null(Null));
self.match_text_seq(&["VALUES"]);
let part_range = if self.match_text_seq(&["LESS", "THAN"]) {
if self.match_token(TokenType::Maxvalue) {
// VALUES LESS THAN MAXVALUE (without parens)
Expression::PartitionRange(Box::new(PartitionRange {
this: Box::new(name),
expression: None,
expressions: vec![Expression::Identifier(Identifier::new("MAXVALUE"))],
}))
} else {
// VALUES LESS THAN (val) or VALUES LESS THAN (MAXVALUE)
let values = self.parse_wrapped_csv_expressions()?;
Expression::PartitionRange(Box::new(PartitionRange {
this: Box::new(name),
expression: None,
expressions: values,
}))
}
} else if self.check(TokenType::LBracket) {
// VALUES [(val1), (val2)) - note asymmetric brackets
self.skip(); // consume [
let mut value_tuples = Vec::new();
loop {
let vals = self.parse_wrapped_csv_expressions()?;
// Wrap in a Tuple for each (val)
value_tuples.push(Expression::Tuple(Box::new(Tuple { expressions: vals })));
if !self.match_token(TokenType::Comma) {
break;
}
}
// Expect ) to close the asymmetric bracket
self.expect(TokenType::RParen)?;
Expression::PartitionRange(Box::new(PartitionRange {
this: Box::new(name),
expression: None,
expressions: value_tuples,
}))
} else {
// Fallback: no values
Expression::PartitionRange(Box::new(PartitionRange {
this: Box::new(name),
expression: None,
expressions: Vec::new(),
}))
};
partitions.push(Expression::Partition(Box::new(Partition {
expressions: vec![part_range],
subpartition: false,
})));
if !self.match_token(TokenType::Comma) {
break;
}
}
Ok(partitions)
}
/// Parse Doris dynamic partition: FROM ('start') TO ('end') INTERVAL n UNIT
fn parse_doris_dynamic_partition(&mut self) -> Result<Expression> {
self.expect(TokenType::From)?;
let start = self.parse_wrapped_expression()?;
self.expect(TokenType::To)?;
let end = self.parse_wrapped_expression()?;
// Parse INTERVAL n UNIT
let every = if self.match_token(TokenType::Interval) {
let number = self.parse_expression()?;
let unit = if self.is_identifier_token() || self.is_safe_keyword_as_identifier() {
let unit_text = self.advance().text.to_ascii_uppercase();
// Convert unit text to IntervalUnit
let interval_unit = match unit_text.as_str() {
"YEAR" | "YEARS" => crate::expressions::IntervalUnit::Year,
"MONTH" | "MONTHS" => crate::expressions::IntervalUnit::Month,
"DAY" | "DAYS" => crate::expressions::IntervalUnit::Day,
"HOUR" | "HOURS" => crate::expressions::IntervalUnit::Hour,
"MINUTE" | "MINUTES" => crate::expressions::IntervalUnit::Minute,
"SECOND" | "SECONDS" => crate::expressions::IntervalUnit::Second,
_ => crate::expressions::IntervalUnit::Day, // Default fallback
};
Some(crate::expressions::IntervalUnitSpec::Simple {
unit: interval_unit,
use_plural: unit_text.ends_with('S'),
})
} else {
None
};
Some(Box::new(Expression::Interval(Box::new(Interval {
this: Some(number),
unit,
}))))
} else {
None
};
Ok(Expression::PartitionByRangePropertyDynamic(Box::new(
PartitionByRangePropertyDynamic {
this: None,
start: Some(Box::new(start)),
end: Some(Box::new(end)),
every,
use_start_end: false,
},
)))
}
/// Parse StarRocks START ('val') END ('val') EVERY (expr) syntax
fn parse_starrocks_start_end_every(&mut self) -> Result<Expression> {
self.expect(TokenType::Start)?;
let start = self.parse_wrapped_expression()?;
self.expect(TokenType::End)?;
let end = self.parse_wrapped_expression()?;
// Parse EVERY (expr)
let every = if self.match_identifier("EVERY") {
self.expect(TokenType::LParen)?;
let expr = self.parse_expression()?;
self.expect(TokenType::RParen)?;
Some(Box::new(expr))
} else {
None
};
Ok(Expression::PartitionByRangePropertyDynamic(Box::new(
PartitionByRangePropertyDynamic {
this: None,
start: Some(Box::new(start)),
end: Some(Box::new(end)),
every,
use_start_end: true,
},
)))
}
/// Parse wrapped comma-separated expressions: (expr, expr, ...)
fn parse_wrapped_csv_expressions(&mut self) -> Result<Vec<Expression>> {
self.expect(TokenType::LParen)?;
let mut exprs = Vec::new();
if !self.check(TokenType::RParen) {
loop {
// Check for MAXVALUE special keyword
if self.match_token(TokenType::Maxvalue) {
exprs.push(Expression::Var(Box::new(Var {
this: "MAXVALUE".to_string(),
})));
} else {
exprs.push(self.parse_expression()?);
}
if !self.match_token(TokenType::Comma) {
break;
}
}
}
self.expect(TokenType::RParen)?;
Ok(exprs)
}
/// Parse a single wrapped expression: (expr)
fn parse_wrapped_expression(&mut self) -> Result<Expression> {
self.expect(TokenType::LParen)?;
let expr = self.parse_expression()?;
self.expect(TokenType::RParen)?;
Ok(expr)
}
/// parse_partitioned_of - Implemented from Python _parse_partitioned_of
#[allow(unused_variables, unused_mut)]
pub fn parse_partitioned_of(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["OF"]) {
return Ok(Some(Expression::PartitionBoundSpec(Box::new(
PartitionBoundSpec {
this: None,
expression: None,
from_expressions: None,
to_expressions: None,
},
))));
}
if self.match_text_seq(&["FOR", "VALUES"]) {
// Matched: FOR VALUES
return Ok(None);
}
Ok(None)
}
/// parse_period_for_system_time - Parses PERIOD FOR SYSTEM_TIME constraint
/// Python: _parse_period_for_system_time
/// Syntax: PERIOD FOR SYSTEM_TIME (start_col, end_col)
pub fn parse_period_for_system_time(&mut self) -> Result<Option<Expression>> {
// Check for SYSTEM_TIME / TIMESTAMP_SNAPSHOT token
if !self.match_token(TokenType::TimestampSnapshot) {
// Retreat: go back one token
if self.current > 0 {
self.current -= 1;
}
return Ok(None);
}
// Parse wrapped id vars (two column names)
let id_vars = self.parse_wrapped_id_vars()?;
// Extract the two columns from the tuple
let (this, expression) = if let Some(Expression::Tuple(tuple)) = id_vars {
let exprs = &tuple.expressions;
(
exprs.get(0).cloned().unwrap_or(Expression::Null(Null)),
exprs.get(1).cloned().unwrap_or(Expression::Null(Null)),
)
} else {
return Ok(None);
};
Ok(Some(Expression::PeriodForSystemTimeConstraint(Box::new(
PeriodForSystemTimeConstraint {
this: Box::new(this),
expression: Box::new(expression),
},
))))
}
/// parse_pipe_syntax_aggregate - Implemented from Python _parse_pipe_syntax_aggregate
#[allow(unused_variables, unused_mut)]
pub fn parse_pipe_syntax_aggregate(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["AGGREGATE"]) {
return Ok(Some(Expression::Select(Box::new(Select {
expressions: Vec::new(),
from: None,
joins: Vec::new(),
lateral_views: Vec::new(),
prewhere: None,
where_clause: None,
group_by: None,
having: None,
qualify: None,
order_by: None,
distribute_by: None,
cluster_by: None,
sort_by: None,
limit: None,
offset: None,
limit_by: None,
fetch: None,
distinct: false,
distinct_on: None,
top: None,
with: None,
sample: None,
settings: None,
format: None,
windows: None,
hint: None,
connect: None,
into: None,
locks: Vec::new(),
for_xml: Vec::new(),
for_json: Vec::new(),
leading_comments: Vec::new(),
post_select_comments: Vec::new(),
kind: None,
operation_modifiers: Vec::new(),
qualify_after_window: false,
option: None,
exclude: None,
}))));
}
if self.match_text_seq(&["GROUP", "AND"]) {
// Matched: GROUP AND
return Ok(None);
}
Ok(None)
}
/// parse_pipe_syntax_aggregate_fields - Implemented from Python _parse_pipe_syntax_aggregate_fields
/// Calls: parse_disjunction
#[allow(unused_variables, unused_mut)]
pub fn parse_pipe_syntax_aggregate_fields(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["GROUP", "AND"]) {
// Matched: GROUP AND
return Ok(None);
}
Ok(None)
}
/// parse_pipe_syntax_aggregate_group_order_by - Parses pipe syntax aggregate fields with grouping and ordering
/// Python: _parse_pipe_syntax_aggregate_group_order_by
/// Parses comma-separated aggregate fields and separates them into aggregates/groups and ORDER BY specs
/// Returns a Tuple with two elements: (aggregates_and_groups, order_by_specs)
pub fn parse_pipe_syntax_aggregate_group_order_by(&mut self) -> Result<Option<Expression>> {
// Parse CSV of pipe syntax aggregate fields
let mut aggregates_or_groups = Vec::new();
let mut orders = Vec::new();
loop {
if let Some(element) = self.parse_pipe_syntax_aggregate_fields()? {
// Check if it's an Ordered expression (ORDER BY spec)
match &element {
Expression::Ordered(ordered) => {
// Extract the inner expression, potentially adjusting for alias
let this = match &ordered.this {
Expression::Alias(alias) => {
// Use the alias name as an Identifier expression
Expression::Identifier(alias.alias.clone())
}
other => other.clone(),
};
// Add modified Ordered to orders
orders.push(Expression::Ordered(Box::new(Ordered {
this: this.clone(),
desc: ordered.desc,
nulls_first: ordered.nulls_first,
explicit_asc: ordered.explicit_asc,
with_fill: ordered.with_fill.clone(),
})));
aggregates_or_groups.push(this);
}
_ => {
aggregates_or_groups.push(element);
}
}
}
if !self.match_token(TokenType::Comma) {
break;
}
}
if aggregates_or_groups.is_empty() && orders.is_empty() {
return Ok(None);
}
// Return a tuple with (aggregates_or_groups, orders)
Ok(Some(Expression::Tuple(Box::new(Tuple {
expressions: vec![
Expression::Tuple(Box::new(Tuple {
expressions: aggregates_or_groups,
})),
Expression::Tuple(Box::new(Tuple {
expressions: orders,
})),
],
}))))
}
/// parse_pipe_syntax_extend - Implemented from Python _parse_pipe_syntax_extend
#[allow(unused_variables, unused_mut)]
pub fn parse_pipe_syntax_extend(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["EXTEND"]) {
return Ok(Some(Expression::Select(Box::new(Select {
expressions: Vec::new(),
from: None,
joins: Vec::new(),
lateral_views: Vec::new(),
prewhere: None,
where_clause: None,
group_by: None,
having: None,
qualify: None,
order_by: None,
distribute_by: None,
cluster_by: None,
sort_by: None,
limit: None,
offset: None,
limit_by: None,
fetch: None,
distinct: false,
distinct_on: None,
top: None,
with: None,
sample: None,
settings: None,
format: None,
windows: None,
hint: None,
connect: None,
into: None,
locks: Vec::new(),
for_xml: Vec::new(),
for_json: Vec::new(),
leading_comments: Vec::new(),
post_select_comments: Vec::new(),
kind: None,
operation_modifiers: Vec::new(),
qualify_after_window: false,
option: None,
exclude: None,
}))));
}
Ok(None)
}
/// parse_pipe_syntax_join - Parses JOIN in BigQuery pipe syntax
/// Python: _parse_pipe_syntax_join
/// Format: |> JOIN table ON condition
pub fn parse_pipe_syntax_join(&mut self) -> Result<Option<Expression>> {
// Parse the JOIN clause
self.parse_join()
}
/// parse_pipe_syntax_limit - Parses LIMIT/OFFSET in BigQuery pipe syntax
/// Python: _parse_pipe_syntax_limit
/// Format: |> LIMIT n [OFFSET m]
pub fn parse_pipe_syntax_limit(&mut self) -> Result<Option<Expression>> {
// Parse the LIMIT clause
let limit = self.parse_limit()?;
// Parse optional OFFSET
let offset = self.parse_offset()?;
// Combine into a tuple if both present
match (limit, offset) {
(Some(l), Some(o)) => Ok(Some(Expression::Tuple(Box::new(Tuple {
expressions: vec![l, o],
})))),
(Some(l), None) => Ok(Some(l)),
(None, Some(o)) => Ok(Some(o)),
(None, None) => Ok(None),
}
}
/// parse_pipe_syntax_pivot - Parses PIVOT in BigQuery pipe syntax
/// Python: _parse_pipe_syntax_pivot
/// Format: |> PIVOT (agg_function FOR column IN (values))
pub fn parse_pipe_syntax_pivot(&mut self) -> Result<Option<Expression>> {
// For pipe syntax, we don't have a source yet - return pivot aggregation
// The actual pivot parsing will be done in the query transformer
self.parse_pivot_aggregation()
}
/// parse_pipe_syntax_query - Parses a query with pipe syntax transformations
/// Python: _parse_pipe_syntax_query
/// Handles queries like: FROM table |> WHERE ... |> SELECT ... |> AGGREGATE ...
pub fn parse_pipe_syntax_query(&mut self) -> Result<Option<Expression>> {
// Start with a base query (could be a FROM clause or subquery)
let mut query = self.parse_select_query()?;
if query.is_none() {
return Ok(None);
}
// Process pipe syntax chain: |> transform1 |> transform2 |> ...
while self.match_token(TokenType::PipeGt) {
let start_pos = self.current;
let operator_text = self.peek().text.to_ascii_uppercase();
// Try to match known pipe syntax transforms
let transform_result = match operator_text.as_str() {
"WHERE" => {
self.skip();
self.parse_where()?
}
"SELECT" => {
self.skip();
self.parse_pipe_syntax_select()?
}
"AGGREGATE" => {
self.skip();
self.parse_pipe_syntax_aggregate()?
}
"EXTEND" => {
self.skip();
self.parse_pipe_syntax_extend()?
}
"LIMIT" => {
self.skip();
self.parse_pipe_syntax_limit()?
}
"JOIN" | "LEFT" | "RIGHT" | "INNER" | "OUTER" | "CROSS" | "FULL" => {
self.parse_pipe_syntax_join()?
}
"UNION" | "INTERSECT" | "EXCEPT" => self.parse_pipe_syntax_set_operator()?,
"PIVOT" => {
self.skip();
self.parse_pipe_syntax_pivot()?
}
"TABLESAMPLE" => {
self.skip();
self.parse_pipe_syntax_tablesample()?
}
_ => {
// Try set operator or join as fallback
let set_op = self.parse_pipe_syntax_set_operator()?;
if set_op.is_some() {
set_op
} else {
let join_op = self.parse_pipe_syntax_join()?;
if join_op.is_some() {
join_op
} else {
// Unsupported operator, retreat and break
self.current = start_pos;
break;
}
}
}
};
// Apply transform to query
if let Some(transform) = transform_result {
// Wrap current query with transform in a PipeOperator
let current_query = query.ok_or_else(|| {
self.parse_error("Expected base query before pipe syntax transform")
})?;
query = Some(Expression::PipeOperator(Box::new(PipeOperator {
this: current_query,
expression: transform,
})));
}
}
Ok(query)
}
/// parse_pipe_syntax_select - Parses SELECT in BigQuery pipe syntax
/// Python: _parse_pipe_syntax_select
/// Format: |> SELECT expressions
pub fn parse_pipe_syntax_select(&mut self) -> Result<Option<Expression>> {
// Parse the SELECT expressions without consuming the pipe
let expressions = self.parse_expressions()?;
match expressions {
Some(expr) => Ok(Some(expr)),
None => Ok(Some(Expression::Star(Star {
table: None,
except: None,
replace: None,
rename: None,
trailing_comments: Vec::new(),
span: None,
}))),
}
}
/// parse_pipe_syntax_set_operator - Parses set operation in BigQuery pipe syntax
/// Python: _parse_pipe_syntax_set_operator
/// Format: |> UNION ALL/INTERSECT/EXCEPT (subquery1, subquery2, ...)
pub fn parse_pipe_syntax_set_operator(&mut self) -> Result<Option<Expression>> {
// Try to parse as a set operation (UNION, INTERSECT, EXCEPT)
if let Some(set_op) = self.parse_set_operations()? {
Ok(Some(set_op))
} else {
Ok(None)
}
}
/// parse_pipe_syntax_tablesample - Parses TABLESAMPLE in BigQuery pipe syntax
/// Python: _parse_pipe_syntax_tablesample
/// Format: |> TABLESAMPLE SYSTEM (percent PERCENT)
pub fn parse_pipe_syntax_tablesample(&mut self) -> Result<Option<Expression>> {
// Parse the TABLESAMPLE clause
self.parse_table_sample()
}
/// parse_pivot_aggregation - Ported from Python _parse_pivot_aggregation
/// Parses an aggregation function in PIVOT clause, optionally with alias
#[allow(unused_variables, unused_mut)]
pub fn parse_pivot_aggregation(&mut self) -> Result<Option<Expression>> {
// Parse a function
let func = self.parse_function()?;
if func.is_none() {
// If previous token was a comma, silently return None
if self.previous().token_type == TokenType::Comma {
return Ok(None);
}
// Otherwise this could be an error, but we'll just return None
return Ok(None);
}
// Try to parse an alias for the function
self.parse_alias_with_expr(func)
}
/// parse_pivot_in - Parses the IN clause of a PIVOT
/// Python: _parse_pivot_in
/// Format: column IN (value1 [AS alias1], value2 [AS alias2], ...)
pub fn parse_pivot_in(&mut self) -> Result<Option<Expression>> {
// Parse the column being pivoted
let value = self.parse_column()?;
let value_expr = value.unwrap_or(Expression::Null(Null));
// Expect IN keyword
if !self.match_token(TokenType::In) {
return Err(self.parse_error("Expecting IN"));
}
// Check if it's a parenthesized list or a field reference
if self.match_token(TokenType::LParen) {
// Check for ANY keyword
let expressions = if self.match_text_seq(&["ANY"]) {
// Parse PivotAny with optional ORDER BY
let order = self.parse_order()?;
vec![Expression::PivotAny(Box::new(PivotAny {
this: order.map(Box::new),
}))]
} else {
// Parse comma-separated list of expressions, optionally aliased
let mut exprs = Vec::new();
loop {
if let Some(expr) = self.parse_select_or_expression()? {
// Check for alias
let final_expr = if self.match_token(TokenType::Alias) {
if let Some(alias) = self.parse_bitwise()? {
// Store the alias expression directly
Expression::PivotAlias(Box::new(PivotAlias { this: expr, alias }))
} else {
expr
}
} else {
expr
};
exprs.push(final_expr);
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
exprs
};
self.expect(TokenType::RParen)?;
Ok(Some(Expression::In(Box::new(In {
this: value_expr,
expressions,
query: None,
not: false,
global: false,
unnest: None,
is_field: false,
}))))
} else {
// Parse as a field reference: IN field_name
let field = self.parse_id_var()?;
// Convert field to expression and add to expressions
let expressions = if let Some(f) = field {
vec![f]
} else {
Vec::new()
};
Ok(Some(Expression::In(Box::new(In {
this: value_expr,
expressions,
query: None,
not: false,
global: false,
unnest: None,
is_field: true,
}))))
}
}
/// parse_pivots - Ported from Python _parse_pivots
/// Parses one or more PIVOT/UNPIVOT clauses attached to a source expression
/// Uses the existing parse_pivot/parse_unpivot methods
pub fn parse_pivots_for_source(&mut self, source: Expression) -> Result<Option<Expression>> {
let mut result = source;
loop {
if self.match_token(TokenType::Pivot) {
result = self.parse_pivot(result)?;
} else if self.match_texts(&["UNPIVOT"]) {
result = self.parse_unpivot(result)?;
} else {
break;
}
}
// Return None if no pivots were parsed
if matches!(result, Expression::Null(_)) {
Ok(None)
} else {
Ok(Some(result))
}
}
/// parse_placeholder - Parse placeholder token (? or :name)
/// Python: if self._match_set(self.PLACEHOLDER_PARSERS): return placeholder
pub fn parse_placeholder(&mut self) -> Result<Option<Expression>> {
// Match positional placeholder (?)
if self.match_token(TokenType::Placeholder) {
return Ok(Some(Expression::Placeholder(Placeholder { index: None })));
}
// Match colon placeholder (:name) - handled by Parameter token
if self.match_token(TokenType::Parameter) {
let text = self.previous().text.clone();
return Ok(Some(Expression::Parameter(Box::new(Parameter {
name: Some(text),
index: None,
style: ParameterStyle::Colon,
quoted: false,
string_quoted: false,
expression: None,
}))));
}
Ok(None)
}
/// Parse ClickHouse query parameter syntax: {name: Type}
fn parse_clickhouse_braced_parameter(&mut self) -> Result<Option<Expression>> {
if !matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
return Ok(None);
}
if !self.check(TokenType::LBrace) {
return Ok(None);
}
let start = self.current;
self.skip(); // consume {
if !(self.is_identifier_token() || self.is_safe_keyword_as_identifier()) {
self.current = start;
return Ok(None);
}
let name = self.advance().text.clone();
if !self.match_token(TokenType::Colon) {
self.current = start;
return Ok(None);
}
let kind_start = self.current;
let mut paren_depth = 0usize;
let mut bracket_depth = 0usize;
while !self.is_at_end() {
let token_type = self.peek().token_type;
match token_type {
TokenType::LParen => {
paren_depth += 1;
self.skip();
}
TokenType::RParen => {
if paren_depth == 0 {
break;
}
paren_depth -= 1;
self.skip();
}
TokenType::LBracket => {
bracket_depth += 1;
self.skip();
}
TokenType::RBracket => {
if bracket_depth == 0 {
break;
}
bracket_depth -= 1;
self.skip();
}
TokenType::RBrace => {
if paren_depth == 0 && bracket_depth == 0 {
break;
}
self.skip();
}
_ => {
self.skip();
}
}
}
if self.current <= kind_start || !self.match_token(TokenType::RBrace) {
return Err(self.parse_error("Expected } in ClickHouse query parameter"));
}
let kind = self
.tokens_to_sql(kind_start, self.current - 1)
.trim()
.to_string();
if kind.is_empty() {
return Err(self.parse_error("Expected parameter kind in ClickHouse query parameter"));
}
Ok(Some(Expression::Parameter(Box::new(Parameter {
name: Some(name),
index: None,
style: ParameterStyle::Brace,
quoted: false,
string_quoted: false,
expression: Some(kind),
}))))
}
/// parse_position - Ported from Python _parse_position
/// Parses POSITION function: POSITION(substr IN str) or POSITION(needle, haystack, start)
#[allow(unused_variables, unused_mut)]
pub fn parse_position(&mut self) -> Result<Option<Expression>> {
// Parse comma-separated arguments first
let mut args: Vec<Expression> = Vec::new();
match self.parse_bitwise() {
Ok(Some(expr)) => {
let expr = self.maybe_clickhouse_alias(expr);
let expr = self.try_clickhouse_func_arg_alias(expr);
args.push(expr);
}
Ok(None) => return Ok(None),
Err(e) => return Err(e),
}
// Check for IN keyword (SQL standard syntax: POSITION(substr IN str))
if self.match_token(TokenType::In) {
match self.parse_bitwise() {
Ok(Some(haystack)) => {
let haystack = self.maybe_clickhouse_alias(haystack);
let haystack = self.try_clickhouse_func_arg_alias(haystack);
return Ok(Some(Expression::StrPosition(Box::new(StrPosition {
this: Box::new(haystack),
substr: Some(Box::new(args.remove(0))),
position: None,
occurrence: None,
}))));
}
Ok(None) => {
return Err(self.parse_error("Expected expression after IN in POSITION"))
}
Err(e) => return Err(e),
}
}
// Parse comma-separated additional arguments
while self.match_token(TokenType::Comma) {
match self.parse_bitwise() {
Ok(Some(expr)) => {
let expr = self.maybe_clickhouse_alias(expr);
let expr = self.try_clickhouse_func_arg_alias(expr);
args.push(expr);
}
Ok(None) => break,
Err(e) => return Err(e),
}
}
// Function syntax: POSITION(needle, haystack, start?) or ClickHouse POSITION(haystack, needle, start?)
let position = args.get(2).cloned();
let (haystack, needle) = if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
(args.get(0).cloned(), args.get(1).cloned())
} else {
(args.get(1).cloned(), args.get(0).cloned())
};
Ok(Some(Expression::StrPosition(Box::new(StrPosition {
this: Box::new(
haystack.unwrap_or_else(|| {
Expression::Literal(Box::new(Literal::String("".to_string())))
}),
),
substr: needle.map(Box::new),
position: position.map(Box::new),
occurrence: None,
}))))
}
/// parse_prewhere - Ported from Python _parse_prewhere
/// Parses PREWHERE clause (ClickHouse specific)
#[allow(unused_variables, unused_mut)]
pub fn parse_prewhere(&mut self) -> Result<Option<Expression>> {
if !self.match_token(TokenType::Prewhere) {
return Ok(None);
}
// Parse the condition expression
let condition = self.parse_expression()?;
Ok(Some(Expression::PreWhere(Box::new(PreWhere {
this: condition,
}))))
}
/// parse_primary_key - Parses PRIMARY KEY constraint
/// Python: _parse_primary_key
/// Can return either PrimaryKeyColumnConstraint (column-level) or PrimaryKey (table-level)
pub fn parse_primary_key(&mut self) -> Result<Option<Expression>> {
self.parse_primary_key_impl(false, false)
}
/// Implementation of parse_primary_key with options
pub fn parse_primary_key_impl(
&mut self,
wrapped_optional: bool,
in_props: bool,
) -> Result<Option<Expression>> {
// Check for ASC/DESC
let desc = if self.match_token(TokenType::Asc) {
false
} else if self.match_token(TokenType::Desc) {
true
} else {
false
};
// Parse optional constraint name (if current token is identifier and next is L_PAREN)
let this = if (self.check(TokenType::Identifier) || self.check(TokenType::Var))
&& self.check_next(TokenType::LParen)
{
self.parse_id_var()?
} else {
None
};
// If not in_props and no L_PAREN ahead, return column-level constraint
if !in_props && !self.check(TokenType::LParen) {
let options = self.parse_key_constraint_options_list()?;
return Ok(Some(Expression::PrimaryKeyColumnConstraint(Box::new(
PrimaryKeyColumnConstraint {
desc: if desc {
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
})))
} else {
None
},
options,
},
))));
}
// Parse table-level PRIMARY KEY (column_list)
let expressions = if self.match_token(TokenType::LParen) {
let mut exprs = Vec::new();
loop {
if let Some(part) = self.parse_primary_key_part()? {
exprs.push(part);
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
exprs
} else if wrapped_optional {
Vec::new()
} else {
return Err(self.parse_error("Expected '(' for PRIMARY KEY column list"));
};
// Parse INCLUDE clause for covering index
let include = self.parse_index_params()?;
// Parse constraint options
let options = self.parse_key_constraint_options_list()?;
Ok(Some(Expression::PrimaryKey(Box::new(PrimaryKey {
this: this.map(Box::new),
expressions,
options,
include: include.map(Box::new),
}))))
}
/// Parse key constraint options as a list of expressions
fn parse_key_constraint_options_list(&mut self) -> Result<Vec<Expression>> {
let mut options = Vec::new();
loop {
if self.is_at_end() {
break;
}
if self.match_token(TokenType::On) {
// Parse ON DELETE/UPDATE action
let on_what = if !self.is_at_end() {
let token = self.advance();
token.text.clone()
} else {
break;
};
let action = if self.match_text_seq(&["NO", "ACTION"]) {
"NO ACTION"
} else if self.match_text_seq(&["CASCADE"]) {
"CASCADE"
} else if self.match_text_seq(&["RESTRICT"]) {
"RESTRICT"
} else if self.match_token(TokenType::Set) && self.match_token(TokenType::Null) {
"SET NULL"
} else if self.match_token(TokenType::Set) && self.match_token(TokenType::Default) {
"SET DEFAULT"
} else {
break;
};
options.push(Expression::Var(Box::new(Var {
this: format!("ON {} {}", on_what, action),
})));
} else if self.match_text_seq(&["NOT", "ENFORCED"]) {
options.push(Expression::Var(Box::new(Var {
this: "NOT ENFORCED".to_string(),
})));
} else if self.match_text_seq(&["DEFERRABLE"]) {
options.push(Expression::Var(Box::new(Var {
this: "DEFERRABLE".to_string(),
})));
} else if self.match_text_seq(&["INITIALLY", "DEFERRED"]) {
options.push(Expression::Var(Box::new(Var {
this: "INITIALLY DEFERRED".to_string(),
})));
} else if self.match_text_seq(&["NORELY"]) {
options.push(Expression::Var(Box::new(Var {
this: "NORELY".to_string(),
})));
} else if self.match_text_seq(&["RELY"]) {
options.push(Expression::Var(Box::new(Var {
this: "RELY".to_string(),
})));
} else {
break;
}
}
Ok(options)
}
/// parse_primary_key_part - Delegates to parse_field
#[allow(unused_variables, unused_mut)]
pub fn parse_primary_key_part(&mut self) -> Result<Option<Expression>> {
// ClickHouse: PRIMARY KEY can contain full expressions (e.g., t.a, c0 IN (SELECT 1))
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
return self.parse_expression().map(Some);
}
if (self.is_identifier_token() || self.is_safe_keyword_as_identifier())
&& self.check_next(TokenType::LParen)
{
return self.parse_expression().map(Some);
}
if let Some(field) = self.parse_field()? {
Ok(Some(field))
} else {
self.parse_expression().map(Some)
}
}
/// parse_primary_or_var - Parses a primary expression or variable
/// Python: _parse_primary_or_var
/// Returns: parse_primary() or parse_var(any_token=True)
pub fn parse_primary_or_var(&mut self) -> Result<Option<Expression>> {
// First try to parse a primary expression
let saved_pos = self.current;
match self.parse_primary() {
Ok(expr) => return Ok(Some(expr)),
Err(_) => {
// Reset position and try parse_var
self.current = saved_pos;
}
}
// Fall back to parsing a variable
self.parse_var()
}
/// parse_procedure_option - Implemented from Python _parse_procedure_option
#[allow(unused_variables, unused_mut)]
pub fn parse_procedure_option(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["EXECUTE", "AS"]) {
// Matched: EXECUTE AS
return Ok(None);
}
Ok(None)
}
/// parse_projections - Delegates to parse_expressions
#[allow(unused_variables, unused_mut)]
pub fn parse_projections(&mut self) -> Result<Option<Expression>> {
self.parse_expressions()
}
/// parse_properties - Parses table/column properties
/// Python: _parse_properties
/// Collects a list of properties using parse_property
pub fn parse_properties(&mut self) -> Result<Option<Expression>> {
self.parse_properties_impl(None)
}
/// Implementation of parse_properties with before option
pub fn parse_properties_impl(&mut self, before: Option<bool>) -> Result<Option<Expression>> {
let mut properties = Vec::new();
loop {
let prop = if before == Some(true) {
self.parse_property_before()?
} else {
self.parse_property()?
};
if let Some(p) = prop {
properties.push(p);
} else {
break;
}
}
if properties.is_empty() {
Ok(None)
} else {
Ok(Some(Expression::Properties(Box::new(Properties {
expressions: properties,
}))))
}
}
/// parse_property - Implemented from Python _parse_property
/// Calls: parse_bitwise, parse_column, parse_sequence_properties
#[allow(unused_variables, unused_mut)]
pub fn parse_property(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["COMPOUND", "SORTKEY"]) {
return Ok(Some(Expression::Identifier(Identifier {
name: String::new(),
quoted: false,
trailing_comments: Vec::new(),
span: None,
})));
}
if self.match_text_seq(&["SQL", "SECURITY"]) {
// Matched: SQL SECURITY
return Ok(None);
}
if self.match_texts(&["DEFINER", "INVOKER"]) {
// Matched one of: DEFINER, INVOKER
return Ok(None);
}
Ok(None)
}
/// parse_on_cluster_clause - Parse ClickHouse ON CLUSTER clause
fn parse_on_cluster_clause(&mut self) -> Result<Option<OnCluster>> {
if !matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
return Ok(None);
}
let start = self.current;
if !self.match_token(TokenType::On) {
return Ok(None);
}
if !self.match_token(TokenType::Cluster) {
self.current = start;
return Ok(None);
}
let this = if self.check(TokenType::String) {
let value = self.expect_string()?;
Expression::Literal(Box::new(Literal::String(value)))
} else if let Some(id_expr) = self.parse_id_var()? {
id_expr
} else if self.is_safe_keyword_as_identifier() {
let name = self.advance().text;
Expression::Identifier(Identifier {
name,
quoted: false,
trailing_comments: Vec::new(),
span: None,
})
} else {
return Err(self.parse_error("Expected cluster name after ON CLUSTER"));
};
Ok(Some(OnCluster {
this: Box::new(this),
}))
}
/// parse_clickhouse_table_properties - Parse ClickHouse table properties after column defs
fn parse_clickhouse_table_properties(
&mut self,
properties: &mut Vec<Expression>,
) -> Result<()> {
loop {
if self.match_identifier("ENGINE") {
self.match_token(TokenType::Eq);
let engine = self.parse_clickhouse_engine_expression()?;
properties.push(Expression::EngineProperty(Box::new(EngineProperty {
this: Box::new(engine),
})));
continue;
}
if self.match_token(TokenType::Order) {
self.expect(TokenType::By)?;
let order_by = if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) && self.match_token(TokenType::LParen)
{
// ClickHouse: ORDER BY (col1 [ASC|DESC], col2 [ASC|DESC], ...)
// or ORDER BY () for no ordering
if self.check(TokenType::RParen) {
self.skip();
OrderBy {
expressions: vec![Ordered::asc(Expression::Tuple(Box::new(Tuple {
expressions: Vec::new(),
})))],
siblings: false,
comments: Vec::new(),
}
} else {
// Parse all expressions inside the parentheses
let mut inner_exprs = Vec::new();
loop {
let expr = self.parse_expression()?;
inner_exprs.push(expr);
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
// Wrap in a Tuple for multi-expr, Paren for single-expr
let wrapper = if inner_exprs.len() == 1 {
Expression::Paren(Box::new(Paren {
this: inner_exprs.into_iter().next().unwrap(),
trailing_comments: Vec::new(),
}))
} else {
Expression::Tuple(Box::new(Tuple {
expressions: inner_exprs,
}))
};
OrderBy {
expressions: vec![Ordered::asc(wrapper)],
siblings: false,
comments: Vec::new(),
}
}
} else {
self.parse_order_by()?
};
properties.push(Expression::OrderBy(Box::new(order_by)));
continue;
}
if self.match_token(TokenType::Partition) {
self.expect(TokenType::By)?;
if self.check(TokenType::Order) && self.check_next(TokenType::By) {
return Err(self.parse_error("Expected expression after PARTITION BY"));
}
let expr = self
.parse_assignment()?
.ok_or_else(|| self.parse_error("Expected expression after PARTITION BY"))?;
properties.push(Expression::PartitionedByProperty(Box::new(
PartitionedByProperty {
this: Box::new(expr),
},
)));
continue;
}
if self.match_token(TokenType::PrimaryKey) {
// ClickHouse supports PRIMARY KEY id and PRIMARY KEY (id, ...)
let _ = self.match_token(TokenType::Key);
if self.check(TokenType::LParen) {
if let Some(pk) = self.parse_primary_key_impl(false, true)? {
properties.push(pk);
}
} else if let Some(expr) = self.parse_conjunction()? {
// ClickHouse: PRIMARY KEY expr (e.g., PRIMARY KEY tuple(), PRIMARY KEY id)
let mut exprs = vec![expr];
while self.match_token(TokenType::Comma) {
if let Some(next_expr) = self.parse_field()? {
exprs.push(next_expr);
} else {
break;
}
}
properties.push(Expression::PrimaryKey(Box::new(PrimaryKey {
this: None,
expressions: exprs,
options: Vec::new(),
include: None,
})));
} else {
return Err(self.parse_error("Expected expression after PRIMARY KEY"));
}
continue;
}
if self.match_token(TokenType::Sample) {
let _ = self.match_token(TokenType::By);
let expr = self.parse_expression()?;
properties.push(Expression::SampleProperty(Box::new(SampleProperty {
this: Box::new(expr),
})));
continue;
}
if self.match_token(TokenType::Settings) {
let mut settings = Vec::new();
loop {
settings.push(self.parse_expression()?);
if !self.match_token(TokenType::Comma) {
break;
}
}
properties.push(Expression::SettingsProperty(Box::new(SettingsProperty {
expressions: settings,
})));
continue;
}
if self.match_token(TokenType::Comment) {
let comment_expr = if self.check(TokenType::String) {
Expression::Literal(Box::new(Literal::String(self.expect_string()?)))
} else {
self.parse_expression()?
};
properties.push(Expression::SchemaCommentProperty(Box::new(
SchemaCommentProperty {
this: Box::new(comment_expr),
},
)));
continue;
}
// TTL time_column + INTERVAL '1' MONTH [DELETE|RECOMPRESS|TO DISK|TO VOLUME] [WHERE ...]
if self.match_identifier("TTL") {
if let Some(ttl_expr) = self.parse_ttl()? {
properties.push(ttl_expr);
}
continue;
}
if self.match_identifier("SOURCE") {
if let Some(prop) = self.parse_dict_property("SOURCE")? {
properties.push(prop);
}
continue;
}
if self.match_identifier("LAYOUT") {
if let Some(prop) = self.parse_dict_property("LAYOUT")? {
properties.push(prop);
}
continue;
}
if self.match_identifier("LIFETIME") {
if let Some(range) = self.parse_dict_range("LIFETIME")? {
properties.push(range);
}
continue;
}
if self.match_identifier("RANGE") || self.match_token(TokenType::Range) {
if let Some(range) = self.parse_dict_range("RANGE")? {
properties.push(range);
}
continue;
}
break;
}
Ok(())
}
/// ClickHouse implicit alias in function arguments: `expr identifier` (without AS keyword).
/// The token after the alias must be a delimiter (comma, RParen, FROM, FOR, AS).
fn try_clickhouse_implicit_alias(&mut self, expr: Expression) -> Expression {
if !matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
return expr;
}
if self.check(TokenType::Var) || self.check(TokenType::Identifier) {
let next_after = self.peek_nth(1).map(|t| t.token_type);
let is_delimiter = matches!(
next_after,
Some(TokenType::Comma)
| Some(TokenType::RParen)
| Some(TokenType::From)
| Some(TokenType::For)
| Some(TokenType::As)
);
if is_delimiter {
let alias_token = self.advance();
let alias_name = alias_token.text.clone();
return Expression::Alias(Box::new(crate::expressions::Alias::new(
expr,
Identifier::new(alias_name),
)));
}
}
expr
}
/// ClickHouse alias in function arguments: handles both implicit (`expr identifier`)
/// and explicit (`expr AS identifier`) aliases. Use this in special function parsers
/// (SUBSTRING, TRIM, EXTRACT) but NOT in CAST (which has its own AS handling).
/// Normalize TSQL date part aliases (e.g., dd -> DAY, yy -> YEAR, etc.)
fn normalize_tsql_date_part(&self, expr: Expression) -> Expression {
let name = match &expr {
Expression::Var(v) => Some(v.this.to_ascii_uppercase()),
Expression::Column(c) if c.table.is_none() => Some(c.name.name.to_ascii_uppercase()),
Expression::Identifier(id) => Some(id.name.to_ascii_uppercase()),
_ => None,
};
if let Some(name) = name {
let mapped = match name.as_str() {
"YY" | "YYY" | "YYYY" | "YR" | "YEARS" | "YRS" => "YEAR",
"MM" | "MON" | "MONS" | "MONTHS" | "M" => "MONTH",
"D" | "DD" | "DAYS" | "DAYOFMONTH" => "DAY",
"DOW" | "DW" | "WEEKDAY" => "DAYOFWEEK",
"DOY" | "DY" | "Y" => "DAYOFYEAR",
"W" | "WK" | "WEEKOFYEAR" | "WOY" | "WY" | "WW" => "WEEK",
"Q" | "QTR" | "QTRS" | "QUARTERS" | "QQ" => "QUARTER",
"H" | "HH" | "HR" | "HOURS" | "HRS" => "HOUR",
"MI" | "MIN" | "MINUTES" | "MINS" | "N" => "MINUTE",
"S" | "SEC" | "SECONDS" | "SECS" | "SS" => "SECOND",
"MS" | "MSEC" | "MSECS" | "MSECOND" | "MSECONDS" | "MILLISEC" | "MILLISECS"
| "MILLISECON" | "MILLISECONDS" => "MILLISECOND",
"US" | "USEC" | "USECS" | "MICROSEC" | "MICROSECS" | "USECOND" | "USECONDS"
| "MICROSECONDS" | "MCS" => "MICROSECOND",
"NS" | "NSEC" | "NANOSEC" | "NSECOND" | "NSECONDS" | "NANOSECS" => "NANOSECOND",
"TZH" => "TIMEZONE_HOUR",
"TZM" | "TZOFFSET" | "TZ" => "TIMEZONE_MINUTE",
"DEC" | "DECS" | "DECADES" => "DECADE",
"MIL" | "MILS" | "MILLENIA" => "MILLENNIUM",
"C" | "CENT" | "CENTS" | "CENTURIES" => "CENTURY",
"ISOWK" | "ISOWW" | "ISO_WEEK" | "WEEKOFYEARISO" | "WEEKOFYEAR_ISO"
| "WEEK_ISO" => "WEEKISO",
_ => return expr, // No mapping, return as-is
};
return Expression::Var(Box::new(Var {
this: mapped.to_string(),
}));
}
expr
}
fn try_parse_date_part_unit_expr(&self, expr: &Expression) -> Option<IntervalUnit> {
let upper = self.date_part_expr_name(expr)?.to_ascii_uppercase();
let canonical = match upper.as_str() {
// Year
"Y" | "YY" | "YYY" | "YYYY" | "YR" | "YEARS" | "YRS" => "YEAR",
// Quarter
"Q" | "QTR" | "QTRS" | "QUARTERS" | "QQ" => "QUARTER",
// Month
"MM" | "MON" | "MONS" | "MONTHS" | "M" => "MONTH",
// Week
"W" | "WK" | "WEEKOFYEAR" | "WOY" | "WY" | "WW" | "WEEKS" => "WEEK",
// Day
"D" | "DD" | "DAYS" | "DAYOFMONTH" => "DAY",
// Hour
"H" | "HH" | "HR" | "HOURS" | "HRS" => "HOUR",
// Minute
"MI" | "MIN" | "MINUTES" | "MINS" | "N" => "MINUTE",
// Second
"S" | "SEC" | "SECONDS" | "SECS" | "SS" => "SECOND",
// Millisecond
"MS" | "MSEC" | "MSECS" | "MSECOND" | "MSECONDS" | "MILLISEC" | "MILLISECS"
| "MILLISECON" | "MILLISECONDS" => "MILLISECOND",
// Microsecond
"US" | "USEC" | "USECS" | "MICROSEC" | "MICROSECS" | "USECOND" | "USECONDS"
| "MICROSECONDS" | "MCS" => "MICROSECOND",
// Nanosecond
"NS" | "NSEC" | "NANOSEC" | "NSECOND" | "NSECONDS" | "NANOSECS" => "NANOSECOND",
_ => upper.as_str(),
};
Self::parse_interval_unit_from_string(canonical)
}
fn try_parse_date_part_unit_identifier_expr(&self, expr: &Expression) -> Option<IntervalUnit> {
let upper = self
.date_part_identifier_expr_name(expr)?
.to_ascii_uppercase();
let canonical = match upper.as_str() {
"Y" | "YY" | "YYY" | "YYYY" | "YR" | "YEARS" | "YRS" => "YEAR",
"Q" | "QTR" | "QTRS" | "QUARTERS" | "QQ" => "QUARTER",
"MM" | "MON" | "MONS" | "MONTHS" | "M" => "MONTH",
"W" | "WK" | "WEEKOFYEAR" | "WOY" | "WY" | "WW" | "WEEKS" => "WEEK",
"D" | "DD" | "DAYS" | "DAYOFMONTH" => "DAY",
"H" | "HH" | "HR" | "HOURS" | "HRS" => "HOUR",
"MI" | "MIN" | "MINUTES" | "MINS" | "N" => "MINUTE",
"S" | "SEC" | "SECONDS" | "SECS" | "SS" => "SECOND",
"MS" | "MSEC" | "MSECS" | "MSECOND" | "MSECONDS" | "MILLISEC" | "MILLISECS"
| "MILLISECON" | "MILLISECONDS" => "MILLISECOND",
"US" | "USEC" | "USECS" | "MICROSEC" | "MICROSECS" | "USECOND" | "USECONDS"
| "MICROSECONDS" | "MCS" => "MICROSECOND",
"NS" | "NSEC" | "NANOSEC" | "NSECOND" | "NSECONDS" | "NANOSECS" => "NANOSECOND",
_ => upper.as_str(),
};
Self::parse_interval_unit_from_string(canonical)
}
fn try_parse_date_part_field_identifier_expr(
&self,
expr: &Expression,
) -> Option<DateTimeField> {
let upper = self
.date_part_identifier_expr_name(expr)?
.to_ascii_uppercase();
Some(match upper.as_str() {
"YEAR" | "Y" | "YY" | "YYY" | "YYYY" | "YR" | "YEARS" | "YRS" => DateTimeField::Year,
"MONTH" | "MM" | "MON" | "MONS" | "MONTHS" => DateTimeField::Month,
"DAY" | "D" | "DD" | "DAYS" | "DAYOFMONTH" => DateTimeField::Day,
"HOUR" | "H" | "HH" | "HR" | "HOURS" | "HRS" => DateTimeField::Hour,
"MINUTE" | "MI" | "MIN" | "MINUTES" | "MINS" => DateTimeField::Minute,
"SECOND" | "S" | "SEC" | "SECONDS" | "SECS" => DateTimeField::Second,
"MILLISECOND" | "MS" | "MSEC" | "MILLISECONDS" => DateTimeField::Millisecond,
"MICROSECOND" | "US" | "USEC" | "MICROSECONDS" => DateTimeField::Microsecond,
"DOW" | "DAYOFWEEK" | "DW" => DateTimeField::DayOfWeek,
"DOY" | "DAYOFYEAR" | "DY" => DateTimeField::DayOfYear,
"WEEK" | "W" | "WK" | "WEEKOFYEAR" | "WOY" | "WW" => DateTimeField::Week,
"QUARTER" | "Q" | "QTR" | "QTRS" | "QUARTERS" => DateTimeField::Quarter,
"EPOCH" | "EPOCH_SECOND" | "EPOCH_SECONDS" => DateTimeField::Epoch,
"TIMEZONE" => DateTimeField::Timezone,
"TIMEZONE_HOUR" | "TZH" => DateTimeField::TimezoneHour,
"TIMEZONE_MINUTE" | "TZM" => DateTimeField::TimezoneMinute,
"DATE" => DateTimeField::Date,
"TIME" => DateTimeField::Time,
other => DateTimeField::Custom(other.to_string()),
})
}
fn convert_date_part_identifier_expr_to_var(&self, expr: Expression) -> Expression {
match expr {
Expression::Var(_) => expr,
Expression::Column(c) if c.table.is_none() => {
Expression::Var(Box::new(Var { this: c.name.name }))
}
Expression::Identifier(id) => Expression::Var(Box::new(Var { this: id.name })),
_ => expr,
}
}
/// For date-part functions where one argument is a date-part keyword
/// (DAY, MONTH, WEEK, WEEK(MONDAY), ...) rather than a column reference,
/// convert Column/Identifier at that position to Var so it is not caught
/// by column qualification (lineage, validation). Matches sqlglot's
/// build_date_diff/unit-aware parsing.
///
/// The unit-position varies by dialect:
/// - BigQuery: last arg (DATE_DIFF(a, b, DAY); DATE_TRUNC(a, MONTH))
/// - TSQL/Fabric/Redshift/Snowflake: first arg (DATEDIFF(day, a, b))
fn normalize_date_part_arg(&self, name: &str, args: &mut [Expression]) {
use crate::dialects::DialectType as DT;
let dialect = match self.config.dialect {
Some(d) => d,
None => return,
};
let upper = name.to_ascii_uppercase();
let unit_index: Option<usize> = match dialect {
DT::BigQuery => match upper.as_str() {
"DATE_DIFF" | "DATETIME_DIFF" | "TIMESTAMP_DIFF" | "TIME_DIFF"
if args.len() == 3 =>
{
Some(2)
}
"DATE_TRUNC" | "DATETIME_TRUNC" | "TIMESTAMP_TRUNC" | "TIME_TRUNC"
if args.len() >= 2 =>
{
Some(1)
}
_ => None,
},
DT::TSQL | DT::Fabric => match upper.as_str() {
"DATEDIFF" | "DATEDIFF_BIG" | "DATEADD" | "DATEPART" | "DATE_PART" | "DATENAME"
| "DATETRUNC"
if !args.is_empty() =>
{
Some(0)
}
_ => None,
},
DT::Redshift => match upper.as_str() {
"DATEDIFF" | "DATE_DIFF" | "DATEADD" | "DATE_ADD" | "DATE_PART" | "DATEPART"
| "DATE_TRUNC" | "DATETRUNC"
if !args.is_empty() =>
{
Some(0)
}
_ => None,
},
DT::Snowflake => match upper.as_str() {
"DATE_TRUNC" | "DATETRUNC" if !args.is_empty() => Some(0),
_ => None,
},
_ => None,
};
if let Some(idx) = unit_index {
let taken = std::mem::replace(&mut args[idx], Expression::Null(Null));
args[idx] = Self::date_part_arg_to_var(taken);
}
}
fn date_part_arg_to_var(expr: Expression) -> Expression {
match expr {
Expression::Column(c) if c.table.is_none() => {
Expression::Var(Box::new(Var { this: c.name.name }))
}
Expression::Identifier(id) => Expression::Var(Box::new(Var { this: id.name })),
// WEEK(MONDAY), WEEK(SATURDAY), etc. — recurse into the inner arg
Expression::Function(mut f) if !f.args.is_empty() => {
let inner = std::mem::replace(&mut f.args[0], Expression::Null(Null));
f.args[0] = Self::date_part_arg_to_var(inner);
Expression::Function(f)
}
other => other,
}
}
fn date_part_identifier_expr_name<'a>(&self, expr: &'a Expression) -> Option<&'a str> {
match expr {
Expression::Var(v) => Some(v.this.as_str()),
Expression::Column(c) if c.table.is_none() => Some(c.name.name.as_str()),
Expression::Identifier(id) => Some(id.name.as_str()),
_ => None,
}
}
fn date_part_expr_name<'a>(&self, expr: &'a Expression) -> Option<&'a str> {
self.date_part_identifier_expr_name(expr).or(match expr {
Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
let Literal::String(s) = lit.as_ref() else {
unreachable!()
};
Some(s.as_str())
}
_ => None,
})
}
fn try_clickhouse_func_arg_alias(&mut self, expr: Expression) -> Expression {
if !matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
) {
return expr;
}
// Try implicit alias first
if self.check(TokenType::Var) || self.check(TokenType::Identifier) {
let next_after = self.peek_nth(1).map(|t| t.token_type);
let is_delimiter = matches!(
next_after,
Some(TokenType::Comma)
| Some(TokenType::RParen)
| Some(TokenType::From)
| Some(TokenType::For)
| Some(TokenType::As)
);
if is_delimiter {
let alias_token = self.advance();
let alias_name = alias_token.text.clone();
return Expression::Alias(Box::new(crate::expressions::Alias::new(
expr,
Identifier::new(alias_name),
)));
}
}
// Try explicit AS alias
if self.check(TokenType::As) {
let next_idx = self.current + 1;
let after_alias_idx = self.current + 2;
let is_alias_token = next_idx < self.tokens.len()
&& matches!(
self.tokens[next_idx].token_type,
TokenType::Identifier | TokenType::Var | TokenType::QuotedIdentifier
);
let is_delimiter = is_alias_token
&& after_alias_idx < self.tokens.len()
&& matches!(
self.tokens[after_alias_idx].token_type,
TokenType::Comma
| TokenType::RParen
| TokenType::From
| TokenType::For
| TokenType::As
);
if is_delimiter {
self.skip(); // consume AS
let alias_token = self.advance();
let alias_name = if alias_token.token_type == TokenType::QuotedIdentifier {
let mut ident = Identifier::new(alias_token.text.clone());
ident.quoted = true;
ident
} else {
Identifier::new(alias_token.text.clone())
};
return Expression::Alias(Box::new(crate::expressions::Alias::new(
expr, alias_name,
)));
}
}
expr
}
/// parse_clickhouse_engine_expression - Parse ENGINE expression with optional args
fn parse_clickhouse_engine_expression(&mut self) -> Result<Expression> {
if self.is_at_end() {
return Err(self.parse_error("Expected engine name after ENGINE"));
}
let token = self.advance();
let quoted = matches!(token.token_type, TokenType::QuotedIdentifier);
let name = token.text.clone();
let ident = Expression::Identifier(Identifier {
name,
quoted,
trailing_comments: Vec::new(),
span: None,
});
if self.match_token(TokenType::LParen) {
let args = if self.check(TokenType::RParen) {
Vec::new()
} else {
self.parse_expression_list()?
};
self.expect(TokenType::RParen)?;
Ok(Expression::Anonymous(Box::new(Anonymous {
this: Box::new(ident),
expressions: args,
})))
} else {
Ok(ident)
}
}
/// parse_property_assignment - Ported from Python _parse_property_assignment
/// Parses a property assignment: optionally = or AS, then a value
#[allow(unused_variables, unused_mut)]
pub fn parse_property_assignment(&mut self) -> Result<Option<Expression>> {
// Optionally match = or AS
let _ = self.match_token(TokenType::Eq);
let _ = self.match_token(TokenType::Alias);
// Parse the value as an unquoted field
let value = self.parse_unquoted_field()?;
Ok(value)
}
/// parse_property_before - Implemented from Python _parse_property_before
#[allow(unused_variables, unused_mut)]
pub fn parse_property_before(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["NO"]) {
// Matched: NO
return Ok(None);
}
if self.match_text_seq(&["DUAL"]) {
// Matched: DUAL
return Ok(None);
}
if self.match_text_seq(&["BEFORE"]) {
// Matched: BEFORE
return Ok(None);
}
if self.match_texts(&["MIN", "MINIMUM"]) {
// Matched one of: MIN, MINIMUM
return Ok(None);
}
if self.match_texts(&["MAX", "MAXIMUM"]) {
// Matched one of: MAX, MAXIMUM
return Ok(None);
}
Ok(None)
}
/// parse_qualify - Parse QUALIFY clause (Snowflake, BigQuery)
/// Python: if not self._match(TokenType.QUALIFY): return None; return exp.Qualify(this=self._parse_disjunction())
pub fn parse_qualify(&mut self) -> Result<Option<Expression>> {
if !self.match_token(TokenType::Qualify) {
return Ok(None);
}
let condition = self.parse_expression()?;
Ok(Some(Expression::Qualify(Box::new(Qualify {
this: condition,
}))))
}
/// parse_range - Parses range expressions (BETWEEN, LIKE, IN, IS, etc.)
/// Python: _parse_range
pub fn parse_range(&mut self) -> Result<Option<Expression>> {
// First parse a bitwise expression as the left side
let mut this = self.parse_bitwise()?;
if this.is_none() {
return Ok(None);
}
// Check for NOT (for NOT LIKE, NOT IN, NOT BETWEEN, etc.)
let negate = self.match_token(TokenType::Not);
// BETWEEN
if self.match_token(TokenType::Between) {
let between = self.parse_between_with_expr(this.clone(), negate)?;
this = Some(between);
return Ok(this);
}
// LIKE
if self.match_token(TokenType::Like) {
let left = this.clone().expect("left expression checked above");
let right = self
.parse_bitwise()?
.ok_or_else(|| self.parse_error("Expected expression after LIKE"))?;
let escape = self.parse_escape()?;
let like = Expression::Like(Box::new(LikeOp {
left,
right,
escape,
quantifier: None,
inferred_type: None,
}));
this = if negate {
Some(Expression::Not(Box::new(UnaryOp {
this: like,
inferred_type: None,
})))
} else {
Some(like)
};
return Ok(this);
}
// ILIKE
if self.match_token(TokenType::ILike) {
let left = this.clone().expect("left expression checked above");
let right = self
.parse_bitwise()?
.ok_or_else(|| self.parse_error("Expected expression after ILIKE"))?;
let escape = self.parse_escape()?;
let ilike = Expression::ILike(Box::new(LikeOp {
left,
right,
escape,
quantifier: None,
inferred_type: None,
}));
this = if negate {
Some(Expression::Not(Box::new(UnaryOp {
this: ilike,
inferred_type: None,
})))
} else {
Some(ilike)
};
return Ok(this);
}
// IN
if self.match_token(TokenType::In) {
let in_expr = self.parse_in_with_expr(this.clone())?;
this = if negate {
Some(Expression::Not(Box::new(UnaryOp {
this: in_expr,
inferred_type: None,
})))
} else {
Some(in_expr)
};
return Ok(this);
}
// IS [NOT] NULL / IS [NOT] TRUE / IS [NOT] FALSE
if self.match_token(TokenType::Is) {
let is_expr = self.parse_is_with_expr(this.clone())?;
this = Some(is_expr);
return Ok(this);
}
// Handle standalone NOT with NULL (for NOT NULL pattern after negate)
if negate && self.match_token(TokenType::Null) {
if let Some(left) = this {
let is_null = Expression::Is(Box::new(BinaryOp {
left,
right: Expression::Null(Null),
left_comments: Vec::new(),
operator_comments: Vec::new(),
trailing_comments: Vec::new(),
inferred_type: None,
}));
return Ok(Some(Expression::Not(Box::new(UnaryOp {
this: is_null,
inferred_type: None,
}))));
}
}
Ok(this)
}
/// parse_between_with_expr - Parses BETWEEN expression with given left side
fn parse_between_with_expr(
&mut self,
this: Option<Expression>,
negate: bool,
) -> Result<Expression> {
let this_expr = match this {
Some(e) => e,
None => return Err(self.parse_error("Expected expression before BETWEEN")),
};
// Check for SYMMETRIC/ASYMMETRIC qualifier
let symmetric = if self.match_texts(&["SYMMETRIC"]) {
Some(true)
} else if self.match_texts(&["ASYMMETRIC"]) {
Some(false)
} else {
None
};
let low = self
.parse_bitwise()?
.ok_or_else(|| self.parse_error("Expected low expression after BETWEEN"))?;
if !self.match_token(TokenType::And) {
return Err(self.parse_error("Expected AND in BETWEEN expression"));
}
let high = self
.parse_bitwise()?
.ok_or_else(|| self.parse_error("Expected high expression after AND in BETWEEN"))?;
Ok(Expression::Between(Box::new(Between {
this: this_expr,
low,
high,
not: negate,
symmetric,
})))
}
/// parse_in_with_expr - Parses IN expression with given left side
fn parse_in_with_expr(&mut self, this: Option<Expression>) -> Result<Expression> {
let this_expr = match this {
Some(e) => e,
None => return Err(self.parse_error("Expected expression before IN")),
};
// BigQuery: IN UNNEST(expr) — UNNEST without wrapping parentheses
if self.check_identifier("UNNEST") {
self.skip(); // consume UNNEST
self.expect(TokenType::LParen)?;
let unnest_expr = self.parse_expression()?;
self.expect(TokenType::RParen)?;
return Ok(Expression::In(Box::new(In {
this: this_expr,
expressions: Vec::new(),
query: None,
not: false,
global: false,
unnest: Some(Box::new(unnest_expr)),
is_field: false,
})));
}
// Parse the IN list (subquery or value list)
if !self.match_token(TokenType::LParen) {
// DuckDB: IN without parentheses for array/list membership: 'red' IN tbl.flags
// Try to parse as a single expression (column/array reference)
if let Ok(expr) = self.parse_primary() {
return Ok(Expression::In(Box::new(In {
this: this_expr,
expressions: vec![expr],
query: None,
not: false,
global: false,
unnest: None,
is_field: true,
})));
}
return Err(self.parse_error("Expected expression or parenthesized list after IN"));
}
// Check if it's a subquery
if self.check(TokenType::Select) {
let subquery = self.parse_select()?;
self.expect(TokenType::RParen)?;
return Ok(Expression::In(Box::new(In {
this: this_expr,
expressions: Vec::new(),
query: Some(subquery),
not: false,
global: false,
unnest: None,
is_field: false,
})));
}
// Parse value list. Pre-size for large IN lists to reduce reallocations.
let capacity_hint = self.estimate_expression_list_capacity_until_rparen();
let expressions = self.parse_expression_list_with_capacity(capacity_hint)?;
self.expect(TokenType::RParen)?;
if expressions.is_empty() {
return Err(self.parse_error("Expected expression list after IN"));
}
Ok(Expression::In(Box::new(In {
this: this_expr,
expressions,
query: None,
not: false,
global: false,
unnest: None,
is_field: false,
})))
}
/// parse_is_with_expr - Parses IS expression with given left side
fn parse_is_with_expr(&mut self, this: Option<Expression>) -> Result<Expression> {
let this_expr = match this {
Some(e) => e,
None => return Err(self.parse_error("Expected expression before IS")),
};
let negate = self.match_token(TokenType::Not);
// IS NULL
if self.match_token(TokenType::Null) {
let is_null = Expression::Is(Box::new(BinaryOp {
left: this_expr,
right: Expression::Null(Null),
left_comments: Vec::new(),
operator_comments: Vec::new(),
trailing_comments: Vec::new(),
inferred_type: None,
}));
return if negate {
Ok(Expression::Not(Box::new(UnaryOp {
this: is_null,
inferred_type: None,
})))
} else {
Ok(is_null)
};
}
// IS TRUE
if self.match_texts(&["TRUE"]) {
let is_true = Expression::Is(Box::new(BinaryOp {
left: this_expr,
right: Expression::Boolean(BooleanLiteral { value: true }),
left_comments: Vec::new(),
operator_comments: Vec::new(),
trailing_comments: Vec::new(),
inferred_type: None,
}));
return if negate {
Ok(Expression::Not(Box::new(UnaryOp {
this: is_true,
inferred_type: None,
})))
} else {
Ok(is_true)
};
}
// IS FALSE
if self.match_texts(&["FALSE"]) {
let is_false = Expression::Is(Box::new(BinaryOp {
left: this_expr,
right: Expression::Boolean(BooleanLiteral { value: false }),
left_comments: Vec::new(),
operator_comments: Vec::new(),
trailing_comments: Vec::new(),
inferred_type: None,
}));
return if negate {
Ok(Expression::Not(Box::new(UnaryOp {
this: is_false,
inferred_type: None,
})))
} else {
Ok(is_false)
};
}
// IS JSON [VALUE|SCALAR|OBJECT|ARRAY] [WITH UNIQUE KEYS|WITHOUT UNIQUE KEYS|UNIQUE KEYS]
if self.match_texts(&["JSON"]) {
// Parse optional JSON type
let json_type = if self.match_texts(&["VALUE"]) {
Some("VALUE".to_string())
} else if self.match_texts(&["SCALAR"]) {
Some("SCALAR".to_string())
} else if self.match_texts(&["OBJECT"]) {
Some("OBJECT".to_string())
} else if self.match_texts(&["ARRAY"]) {
Some("ARRAY".to_string())
} else {
None
};
// Parse optional key uniqueness constraint
let unique_keys = if self.match_text_seq(&["WITH", "UNIQUE", "KEYS"]) {
Some(JsonUniqueKeys::With)
} else if self.match_text_seq(&["WITHOUT", "UNIQUE", "KEYS"]) {
Some(JsonUniqueKeys::Without)
} else if self.match_text_seq(&["UNIQUE", "KEYS"]) {
// Shorthand for WITH UNIQUE KEYS
Some(JsonUniqueKeys::Shorthand)
} else {
None
};
return Ok(Expression::IsJson(Box::new(IsJson {
this: this_expr,
json_type,
unique_keys,
negated: negate,
})));
}
// IS DISTINCT FROM / IS NOT DISTINCT FROM
if self.match_text_seq(&["DISTINCT", "FROM"]) {
let right = self.parse_bitwise()?;
if let Some(right_expr) = right {
// IS DISTINCT FROM is semantically "not equal with null handling"
// Use NullSafeNeq for IS DISTINCT FROM
// If negate was set (IS NOT DISTINCT FROM), use NullSafeEq
let expr = if negate {
Expression::NullSafeEq(Box::new(BinaryOp {
left: this_expr,
right: right_expr,
left_comments: Vec::new(),
operator_comments: Vec::new(),
trailing_comments: Vec::new(),
inferred_type: None,
}))
} else {
Expression::NullSafeNeq(Box::new(BinaryOp {
left: this_expr,
right: right_expr,
left_comments: Vec::new(),
operator_comments: Vec::new(),
trailing_comments: Vec::new(),
inferred_type: None,
}))
};
return Ok(expr);
}
return Err(self.parse_error("Expected expression after IS DISTINCT FROM"));
}
Err(self.parse_error("Expected NULL, TRUE, FALSE, JSON, or DISTINCT FROM after IS"))
}
/// parse_reads_property - Implemented from Python _parse_reads_property
#[allow(unused_variables, unused_mut)]
pub fn parse_reads_property(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["SQL", "DATA"]) {
// Matched: SQL DATA
return Ok(None);
}
Ok(None)
}
/// parse_recursive_with_search - Parse SEARCH/CYCLE clause for recursive CTEs (PostgreSQL)
/// Syntax: SEARCH BREADTH|DEPTH FIRST BY column SET column [USING column]
/// or: CYCLE column SET column USING column
#[allow(unused_variables, unused_mut)]
pub fn parse_recursive_with_search(&mut self) -> Result<Option<Box<Expression>>> {
// Check for SEARCH or CYCLE keyword
let kind = if self.match_text_seq(&["SEARCH"]) {
// SEARCH BREADTH|DEPTH FIRST BY ...
let search_kind = if self.match_text_seq(&["BREADTH"]) {
"BREADTH"
} else if self.match_text_seq(&["DEPTH"]) {
"DEPTH"
} else {
return Ok(None);
};
// Consume "FIRST BY"
self.match_text_seq(&["FIRST"]);
self.match_text_seq(&["BY"]);
search_kind.to_string()
} else if self.match_token(TokenType::Cycle) {
"CYCLE".to_string()
} else {
return Ok(None);
};
// Parse the column(s) - for CYCLE this is typically a single column
let this = self.expect_identifier()?;
let this_expr = Expression::Identifier(Identifier::new(this));
// SET column
let expression = if self.match_text_seq(&["SET"]) {
let set_col = self.expect_identifier()?;
Expression::Identifier(Identifier::new(set_col))
} else {
return Err(self.parse_error("Expected SET in CYCLE/SEARCH clause"));
};
// USING column (optional for SEARCH, required for CYCLE)
let using = if self.match_token(TokenType::Using) {
let using_col = self.expect_identifier()?;
Some(Box::new(Expression::Identifier(Identifier::new(using_col))))
} else {
None
};
Ok(Some(Box::new(Expression::RecursiveWithSearch(Box::new(
RecursiveWithSearch {
kind,
this: Box::new(this_expr),
expression: Box::new(expression),
using,
},
)))))
}
/// parse_references - Ported from Python _parse_references
/// Parses REFERENCES clause for foreign key constraints
#[allow(unused_variables, unused_mut)]
pub fn parse_references(&mut self) -> Result<Option<Expression>> {
if !self.match_token(TokenType::References) {
return Ok(None);
}
// Parse referenced table
let this = self.parse_table()?;
if this.is_none() {
return Err(self.parse_error("Expected table name after REFERENCES"));
}
// Parse optional column list (table(col1, col2))
let expressions = if self.match_token(TokenType::LParen) {
let cols = self.parse_identifier_list()?;
self.expect(TokenType::RParen)?;
cols.into_iter()
.map(|id| Expression::Identifier(id))
.collect()
} else {
Vec::new()
};
// Parse optional constraint options (ON DELETE, ON UPDATE, etc.)
let options = self.parse_fk_constraint_options()?;
Ok(Some(Expression::Reference(Box::new(Reference {
this: Box::new(this.unwrap()),
expressions,
options,
}))))
}
/// Parse key constraint options (ON DELETE CASCADE, ON UPDATE SET NULL, etc.)
fn parse_fk_constraint_options(&mut self) -> Result<Vec<Expression>> {
let mut options = Vec::new();
while self.match_token(TokenType::On) {
let kind = if self.match_token(TokenType::Delete) {
"DELETE"
} else if self.match_token(TokenType::Update) {
"UPDATE"
} else {
break;
};
let action = if self.match_text_seq(&["NO", "ACTION"]) {
"NO ACTION"
} else if self.match_text_seq(&["SET", "NULL"]) {
"SET NULL"
} else if self.match_text_seq(&["SET", "DEFAULT"]) {
"SET DEFAULT"
} else if self.match_token(TokenType::Cascade) {
"CASCADE"
} else if self.match_token(TokenType::Restrict) {
"RESTRICT"
} else {
continue;
};
// Store as simple identifier with the full action description
options.push(Expression::Identifier(Identifier {
name: format!("ON {} {}", kind, action),
quoted: false,
trailing_comments: Vec::new(),
span: None,
}));
}
// Parse MATCH option
if self.match_token(TokenType::Match) {
let match_type = if self.match_identifier("FULL") {
"FULL"
} else if self.match_identifier("PARTIAL") {
"PARTIAL"
} else if self.match_identifier("SIMPLE") {
"SIMPLE"
} else {
""
};
if !match_type.is_empty() {
options.push(Expression::Identifier(Identifier {
name: format!("MATCH {}", match_type),
quoted: false,
trailing_comments: Vec::new(),
span: None,
}));
}
}
Ok(options)
}
/// parse_refresh - Implemented from Python _parse_refresh
#[allow(unused_variables, unused_mut)]
/// parse_refresh - Parses REFRESH TABLE or REFRESH MATERIALIZED VIEW
/// Python: parser.py:7656-7668
pub fn parse_refresh(&mut self) -> Result<Option<Expression>> {
let kind = if self.match_token(TokenType::Table) {
"TABLE".to_string()
} else if self.match_text_seq(&["MATERIALIZED", "VIEW"]) {
"MATERIALIZED VIEW".to_string()
} else {
String::new()
};
// Parse the object name (string literal or table name)
// First try a string literal, then fall back to table reference
if let Some(s) = self.parse_string()? {
return Ok(Some(Expression::Refresh(Box::new(Refresh {
this: Box::new(s),
kind,
}))));
}
// Parse as a table reference (schema.table format)
let table_ref = self.parse_table_ref()?;
let table_expr = Expression::Table(Box::new(table_ref));
Ok(Some(Expression::Refresh(Box::new(Refresh {
this: Box::new(table_expr),
kind,
}))))
}
/// parse_refresh_trigger_property - Doris REFRESH clause for materialized views
/// Syntax: REFRESH method ON kind [EVERY n UNIT] [STARTS 'datetime']
/// Examples:
/// REFRESH COMPLETE ON MANUAL
/// REFRESH AUTO ON COMMIT
/// REFRESH AUTO ON SCHEDULE EVERY 5 MINUTE STARTS '2025-01-01 00:00:00'
pub fn parse_refresh_trigger_property(&mut self) -> Result<RefreshTriggerProperty> {
// Parse method: COMPLETE or AUTO
let method = self.expect_identifier_or_keyword()?.to_ascii_uppercase();
// Parse ON
self.expect(TokenType::On)?;
// Parse kind: MANUAL, COMMIT, or SCHEDULE
let kind_text = self.expect_identifier_or_keyword()?.to_ascii_uppercase();
let kind = Some(kind_text.clone());
// For SCHEDULE, parse EVERY n UNIT [STARTS 'datetime']
let (every, unit, starts) = if kind_text == "SCHEDULE" {
// EVERY n UNIT
let every = if self.match_identifier("EVERY") {
// parse_number returns Option<Expression> with Expression::Literal(Box::new(Literal::Number(...)))
self.parse_number()?.map(Box::new)
} else {
None
};
// Unit: MINUTE, HOUR, DAY, etc.
let unit = if every.is_some() {
Some(self.expect_identifier_or_keyword()?.to_ascii_uppercase())
} else {
None
};
// STARTS 'datetime'
let starts = if self.match_identifier("STARTS") {
let s = self.expect_string()?;
Some(Box::new(Expression::Literal(Box::new(Literal::String(s)))))
} else {
None
};
(every, unit, starts)
} else {
(None, None, None)
};
Ok(RefreshTriggerProperty {
method,
kind,
every,
unit,
starts,
})
}
/// parse_remote_with_connection - Implemented from Python _parse_remote_with_connection
#[allow(unused_variables, unused_mut)]
pub fn parse_remote_with_connection(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["WITH", "CONNECTION"]) {
// Matched: WITH CONNECTION
return Ok(None);
}
Ok(None)
}
/// parse_respect_or_ignore_nulls - Implemented from Python _parse_respect_or_ignore_nulls
#[allow(unused_variables, unused_mut)]
pub fn parse_respect_or_ignore_nulls(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["IGNORE", "NULLS"]) {
// Matched: IGNORE NULLS
return Ok(None);
}
if self.match_text_seq(&["RESPECT", "NULLS"]) {
// Matched: RESPECT NULLS
return Ok(None);
}
Ok(None)
}
/// parse_retention_period - Parses HISTORY_RETENTION_PERIOD (TSQL)
/// Python: _parse_retention_period
/// Format: INFINITE | <number> DAY | DAYS | MONTH | MONTHS | YEAR | YEARS
pub fn parse_retention_period(&mut self) -> Result<Option<Expression>> {
// Try to parse a number first
let number = self.parse_number()?;
let number_str = number
.map(|n| match n {
Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Number(_)) => {
let Literal::Number(s) = lit.as_ref() else {
unreachable!()
};
format!("{} ", s)
}
_ => String::new(),
})
.unwrap_or_default();
// Parse the unit (any token as a variable)
let unit = self.parse_var_any_token()?;
let unit_str = unit
.map(|u| match u {
Expression::Var(v) => v.this.clone(),
_ => String::new(),
})
.unwrap_or_default();
let result = format!("{}{}", number_str, unit_str);
Ok(Some(Expression::Var(Box::new(Var { this: result }))))
}
/// parse_var_any_token - Parses any token as a Var (for flexible parsing)
fn parse_var_any_token(&mut self) -> Result<Option<Expression>> {
if !self.is_at_end() {
let token = self.advance();
Ok(Some(Expression::Var(Box::new(Var {
this: token.text.clone(),
}))))
} else {
Ok(None)
}
}
/// parse_returning - Creates Returning expression
/// Parses RETURNING clause (PostgreSQL) for INSERT/UPDATE/DELETE
#[allow(unused_variables, unused_mut)]
pub fn parse_returning(&mut self) -> Result<Option<Expression>> {
if !self.match_token(TokenType::Returning) {
return Ok(None);
}
// Parse expressions (column list or *)
let expressions = self.parse_expression_list()?;
// Check for INTO target_table (Oracle style)
let into = if self.match_token(TokenType::Into) {
self.parse_table()?.map(Box::new)
} else {
None
};
Ok(Some(Expression::Returning(Box::new(Returning {
expressions,
into,
}))))
}
/// parse_output_clause - Parses OUTPUT clause (TSQL)
/// Used in INSERT/UPDATE/DELETE and MERGE statements
/// Supports expressions with optional AS aliases: OUTPUT col1, col2 AS alias, col3
pub fn parse_output_clause(&mut self) -> Result<OutputClause> {
// Parse comma-separated list of columns/expressions with optional aliases
let mut columns = Vec::new();
loop {
let expr = self.parse_expression()?;
// Check for optional AS alias
let expr = if self.match_token(TokenType::As) {
let alias = self.expect_identifier_or_keyword_with_quoted()?;
Expression::Alias(Box::new(Alias {
this: expr,
alias,
column_aliases: Vec::new(),
pre_alias_comments: Vec::new(),
trailing_comments: Vec::new(),
inferred_type: None,
}))
} else {
expr
};
columns.push(expr);
if !self.match_token(TokenType::Comma) {
break;
}
}
// Check for INTO target
let into_table = if self.match_token(TokenType::Into) {
Some(self.parse_expression()?)
} else {
None
};
Ok(OutputClause {
columns,
into_table,
})
}
/// parse_returns - Implemented from Python _parse_returns
/// Calls: parse_types
#[allow(unused_variables, unused_mut)]
pub fn parse_returns(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["NULL", "ON", "NULL", "INPUT"]) {
return Ok(Some(Expression::Schema(Box::new(Schema {
this: None,
expressions: Vec::new(),
}))));
}
Ok(None)
}
/// parse_row - Parses ROW FORMAT clause
/// Returns RowFormatSerdeProperty or RowFormatDelimitedProperty
pub fn parse_row(&mut self) -> Result<Option<Expression>> {
// Python: if not self._match(TokenType.FORMAT): return None
if !self.match_token(TokenType::Format) {
return Ok(None);
}
self.parse_row_format()
}
/// parse_row_format - Implemented from Python _parse_row_format
/// Parses SERDE or DELIMITED row format specifications
pub fn parse_row_format(&mut self) -> Result<Option<Expression>> {
// Check for SERDE row format
if self.match_text_seq(&["SERDE"]) {
let this = self.parse_string()?;
let serde_properties = self.parse_serde_properties(false)?;
return Ok(Some(Expression::RowFormatSerdeProperty(Box::new(
RowFormatSerdeProperty {
this: Box::new(this.unwrap_or(Expression::Null(Null))),
serde_properties: serde_properties.map(Box::new),
},
))));
}
// Check for DELIMITED row format
self.match_text_seq(&["DELIMITED"]);
let mut fields = None;
let mut escaped = None;
let mut collection_items = None;
let mut map_keys = None;
let mut lines = None;
let mut null = None;
// Parse FIELDS TERMINATED BY
if self.match_text_seq(&["FIELDS", "TERMINATED", "BY"]) {
fields = self.parse_string()?.map(Box::new);
// Parse optional ESCAPED BY
if self.match_text_seq(&["ESCAPED", "BY"]) {
escaped = self.parse_string()?.map(Box::new);
}
}
// Parse COLLECTION ITEMS TERMINATED BY
if self.match_text_seq(&["COLLECTION", "ITEMS", "TERMINATED", "BY"]) {
collection_items = self.parse_string()?.map(Box::new);
}
// Parse MAP KEYS TERMINATED BY
if self.match_text_seq(&["MAP", "KEYS", "TERMINATED", "BY"]) {
map_keys = self.parse_string()?.map(Box::new);
}
// Parse LINES TERMINATED BY
if self.match_text_seq(&["LINES", "TERMINATED", "BY"]) {
lines = self.parse_string()?.map(Box::new);
}
// Parse NULL DEFINED AS
if self.match_text_seq(&["NULL", "DEFINED", "AS"]) {
null = self.parse_string()?.map(Box::new);
}
// Parse optional WITH SERDEPROPERTIES
let serde = self.parse_serde_properties(false)?.map(Box::new);
Ok(Some(Expression::RowFormatDelimitedProperty(Box::new(
RowFormatDelimitedProperty {
fields,
escaped,
collection_items,
map_keys,
lines,
null,
serde,
},
))))
}
/// parse_schema - Ported from Python _parse_schema
/// Parses schema definition: (col1 type1, col2 type2, ...)
/// Used for CREATE TABLE column definitions
#[allow(unused_variables, unused_mut)]
pub fn parse_schema(&mut self) -> Result<Option<Expression>> {
self.parse_schema_with_this(None)
}
/// parse_schema_with_this - Parses schema with optional table reference
fn parse_schema_with_this(&mut self, this: Option<Expression>) -> Result<Option<Expression>> {
// Check for opening parenthesis
if !self.match_token(TokenType::LParen) {
return Ok(this.map(|e| e));
}
// Check if this is a subquery (SELECT, WITH, etc.) not a schema
if self.check(TokenType::Select) || self.check(TokenType::With) {
// Retreat - put back the LParen
self.current -= 1;
return Ok(this.map(|e| e));
}
// Parse column definitions and constraints
let mut expressions = Vec::new();
if !self.check(TokenType::RParen) {
loop {
// Try to parse constraint first, then field definition
if let Some(constraint) = self.parse_constraint()? {
expressions.push(constraint);
} else if let Some(field_def) = self.parse_field_def()? {
expressions.push(field_def);
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
}
self.expect(TokenType::RParen)?;
Ok(Some(Expression::Schema(Box::new(Schema {
this: this.map(Box::new),
expressions,
}))))
}
/// Parse schema identifier: name or name(columns)
/// Used for TSQL ON filegroup (partition_column) syntax
fn parse_schema_identifier(&mut self) -> Result<Expression> {
// Parse the identifier (filegroup name)
let name = self.expect_identifier_with_quoted()?;
let name_expr = Expression::Identifier(name);
// Check for optional parenthesized columns
if self.match_token(TokenType::LParen) {
let mut columns = Vec::new();
loop {
let col = self.expect_identifier_with_quoted()?;
columns.push(Expression::Identifier(col));
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
Ok(Expression::Schema(Box::new(Schema {
this: Some(Box::new(name_expr)),
expressions: columns,
})))
} else {
// Just the identifier, no columns
Ok(name_expr)
}
}
/// parse_security - Implemented from Python _parse_security
#[allow(unused_variables, unused_mut)]
pub fn parse_security(&mut self) -> Result<Option<Expression>> {
if self.match_texts(&["NONE", "DEFINER", "INVOKER"]) {
// Matched one of: NONE, DEFINER, INVOKER
return Ok(None);
}
Ok(None)
}
/// parse_select_or_expression - Parses either a SELECT statement or an expression
/// Python: _parse_select_or_expression
pub fn parse_select_or_expression(&mut self) -> Result<Option<Expression>> {
// Save position for potential backtracking
let start_pos = self.current;
// First try to parse a SELECT statement if we're at a SELECT keyword
if self.check(TokenType::Select) {
return Ok(Some(self.parse_select()?));
}
// Otherwise try to parse an expression (assignment)
if let Some(expr) = self.parse_disjunction()? {
return Ok(Some(expr));
}
// Backtrack if nothing worked
self.current = start_pos;
Ok(None)
}
/// parse_select_query - Implemented from Python _parse_select_query
/// Calls: parse_string, parse_table, parse_describe
#[allow(unused_variables, unused_mut)]
pub fn parse_select_query(&mut self) -> Result<Option<Expression>> {
if self.match_texts(&["STRUCT", "VALUE"]) {
// Matched one of: STRUCT, VALUE
return Ok(None);
}
Ok(None)
}
/// parse_sequence_properties - Implemented from Python _parse_sequence_properties
/// Calls: parse_number, parse_term, parse_column
#[allow(unused_variables, unused_mut)]
pub fn parse_sequence_properties(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["INCREMENT"]) {
return Ok(Some(Expression::SequenceProperties(Box::new(
SequenceProperties {
increment: None,
minvalue: None,
maxvalue: None,
cache: None,
start: None,
owned: None,
options: Vec::new(),
},
))));
}
if self.match_text_seq(&["BY"]) {
// Matched: BY
return Ok(None);
}
if self.match_text_seq(&["="]) {
// Matched: =
return Ok(None);
}
Ok(None)
}
/// parse_serde_properties - Implemented from Python _parse_serde_properties
/// Parses SERDEPROPERTIES clause: [WITH] SERDEPROPERTIES (key=value, ...)
pub fn parse_serde_properties(&mut self, with_: bool) -> Result<Option<Expression>> {
let start_index = self.current;
let has_with = with_ || self.match_text_seq(&["WITH"]);
// Check for SERDEPROPERTIES keyword
if !self.match_token(TokenType::SerdeProperties) {
self.current = start_index;
return Ok(None);
}
// Parse wrapped properties manually since parse_property doesn't handle 'key'='value' syntax
let mut expressions = Vec::new();
if self.match_token(TokenType::LParen) {
loop {
if self.check(TokenType::RParen) {
break;
}
// Parse 'key'='value' or key=value
let key = self.parse_primary()?;
if self.match_token(TokenType::Eq) {
let value = self.parse_primary()?;
expressions.push(Expression::Eq(Box::new(BinaryOp::new(key, value))));
} else {
expressions.push(key);
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
}
Ok(Some(Expression::SerdeProperties(Box::new(
SerdeProperties {
expressions,
with_: if has_with {
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
})))
} else {
None
},
},
))))
}
/// parse_session_parameter - Ported from Python _parse_session_parameter
#[allow(unused_variables, unused_mut)]
/// parse_session_parameter - Parses session parameters (@@var or @@session.var)
/// Example: @@session.sql_mode, @@global.autocommit
pub fn parse_session_parameter(&mut self) -> Result<Option<Expression>> {
// Parse the first identifier or primary
let first = if let Some(id) = self.parse_id_var()? {
id
} else if let Some(primary) = self.parse_primary_or_var()? {
primary
} else {
return Ok(None);
};
// Check for dot notation (kind.name)
let (kind, this) = if self.match_token(TokenType::Dot) {
// kind is the first part, parse the second
let kind_name = match &first {
Expression::Identifier(id) => Some(id.name.clone()),
_ => None,
};
let second = self
.parse_var()?
.or_else(|| self.parse_primary_or_var().ok().flatten());
(kind_name, second.unwrap_or(first))
} else {
(None, first)
};
Ok(Some(Expression::SessionParameter(Box::new(
SessionParameter {
this: Box::new(this),
kind,
},
))))
}
/// parse_set_item - Ported from Python _parse_set_item
/// Parses an item in a SET statement (GLOBAL, LOCAL, SESSION prefixes, or assignment)
#[allow(unused_variables, unused_mut)]
pub fn parse_set_item(&mut self) -> Result<Option<Expression>> {
// Check for specific prefixes
let kind = if self.match_text_seq(&["GLOBAL"]) {
Some("GLOBAL".to_string())
} else if self.match_text_seq(&["LOCAL"]) {
Some("LOCAL".to_string())
} else if self.match_text_seq(&["SESSION"]) {
Some("SESSION".to_string())
} else {
None
};
// Delegate to set_item_assignment
self.parse_set_item_assignment()
}
/// parse_set_item_assignment - Implemented from Python _parse_set_item_assignment
/// Parses SET variable = value assignments
pub fn parse_set_item_assignment(&mut self) -> Result<Option<Expression>> {
let start_index = self.current;
// Try to parse as TRANSACTION
if self.match_text_seq(&["TRANSACTION"]) {
// This is handled by parse_set_transaction
return Ok(Some(Expression::SetItem(Box::new(SetItem {
name: Expression::Var(Box::new(Var {
this: "TRANSACTION".to_string(),
})),
value: Expression::Null(Null),
kind: None,
no_equals: false,
}))));
}
// Parse left side: primary or column
let left = self
.parse_primary_or_var()?
.or_else(|| self.parse_column().ok().flatten());
if left.is_none() {
self.current = start_index;
return Ok(None);
}
// Check for assignment delimiter (= or TO or :=)
if !self.match_texts(&["=", "TO", ":="]) {
self.current = start_index;
return Ok(None);
}
// Parse right side: value
// First try string literals (preserve quoting), then booleans/numbers, then identifiers
let right_val = if self.check(TokenType::String) {
let text = self.advance().text.clone();
Expression::Literal(Box::new(Literal::String(text)))
} else if self.check(TokenType::False) {
self.skip();
Expression::Boolean(BooleanLiteral { value: false })
} else if self.check(TokenType::True) {
self.skip();
Expression::Boolean(BooleanLiteral { value: true })
} else {
let right = self
.parse_id_var()?
.or_else(|| self.parse_primary_or_var().ok().flatten());
// Convert Column/Identifier to Var
match right {
Some(Expression::Column(col)) => Expression::Var(Box::new(Var {
this: col.name.name.clone(),
})),
Some(Expression::Identifier(id)) => Expression::Var(Box::new(Var {
this: id.name.clone(),
})),
Some(other) => other,
None => Expression::Null(Null),
}
};
Ok(Some(Expression::SetItem(Box::new(SetItem {
name: left
.ok_or_else(|| self.parse_error("Expected variable name in SET statement"))?,
value: right_val,
kind: None,
no_equals: false,
}))))
}
/// parse_set_operations - Parses UNION/INTERSECT/EXCEPT operations
/// This version parses from current position (expects to be at set operator)
/// Python: _parse_set_operations
pub fn parse_set_operations(&mut self) -> Result<Option<Expression>> {
// Parse a SELECT or subquery first
let left = if self.check(TokenType::Select) {
Some(self.parse_select()?)
} else if self.match_token(TokenType::LParen) {
let inner = self.parse_select()?;
self.match_token(TokenType::RParen);
Some(inner)
} else {
None
};
if left.is_none() {
return Ok(None);
}
self.parse_set_operations_with_expr(left)
}
/// parse_set_operations_with_expr - Parses set operations with a left expression
pub fn parse_set_operations_with_expr(
&mut self,
this: Option<Expression>,
) -> Result<Option<Expression>> {
let mut result = this;
while result.is_some() {
if let Some(setop) = self.parse_set_operation_with_expr(result.clone())? {
result = Some(setop);
} else {
break;
}
}
Ok(result)
}
/// parse_set_operation_with_expr - Parses a single set operation (UNION, INTERSECT, EXCEPT)
fn parse_set_operation_with_expr(
&mut self,
left: Option<Expression>,
) -> Result<Option<Expression>> {
let left_expr = match left {
Some(e) => e,
None => return Ok(None),
};
// Check for UNION, INTERSECT, EXCEPT
let op_type = if self.match_token(TokenType::Union) {
"UNION"
} else if self.match_token(TokenType::Intersect) {
"INTERSECT"
} else if self.match_token(TokenType::Except) {
"EXCEPT"
} else {
return Ok(Some(left_expr));
};
// Check for ALL or DISTINCT
let (all, distinct) = if self.match_token(TokenType::All) {
(true, false)
} else {
let d = self.match_token(TokenType::Distinct);
(false, d)
};
// DuckDB: UNION [ALL] BY NAME SELECT ...
let by_name = self.match_token(TokenType::By) && self.match_identifier("NAME");
// Parse the right side (SELECT or subquery)
let right = if self.check(TokenType::Select) {
self.parse_select()?
} else if self.match_token(TokenType::LParen) {
let inner = self.parse_select()?;
self.match_token(TokenType::RParen);
inner
} else {
return Ok(Some(left_expr));
};
// Create the appropriate set operation expression
match op_type {
"UNION" => Ok(Some(Expression::Union(Box::new(Union {
left: left_expr,
right,
all,
distinct,
with: None,
order_by: None,
limit: None,
offset: None,
distribute_by: None,
sort_by: None,
cluster_by: None,
by_name,
side: None,
kind: None,
corresponding: false,
strict: false,
on_columns: Vec::new(),
})))),
"INTERSECT" => Ok(Some(Expression::Intersect(Box::new(Intersect {
left: left_expr,
right,
all,
distinct,
with: None,
order_by: None,
limit: None,
offset: None,
distribute_by: None,
sort_by: None,
cluster_by: None,
by_name,
side: None,
kind: None,
corresponding: false,
strict: false,
on_columns: Vec::new(),
})))),
"EXCEPT" => Ok(Some(Expression::Except(Box::new(Except {
left: left_expr,
right,
all,
distinct,
with: None,
order_by: None,
limit: None,
offset: None,
distribute_by: None,
sort_by: None,
cluster_by: None,
by_name,
side: None,
kind: None,
corresponding: false,
strict: false,
on_columns: Vec::new(),
})))),
_ => Ok(Some(left_expr)),
}
}
/// parse_set_transaction - Implemented from Python _parse_set_transaction
#[allow(unused_variables, unused_mut)]
pub fn parse_set_transaction(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["TRANSACTION"]) {
// Matched: TRANSACTION
return Ok(None);
}
Ok(None)
}
/// Helper to consume an optional ClickHouse SETTINGS clause
/// Used in SHOW, CHECK TABLE, and other ClickHouse statements
fn parse_clickhouse_settings_clause(&mut self) -> Result<()> {
if self.match_token(TokenType::Settings) {
let _ = self.parse_settings_property()?;
}
Ok(())
}
/// parse_settings_property - Parses SETTINGS property (ClickHouse)
/// Python: _parse_settings_property
/// Format: SETTINGS key=value, key=value, ...
pub fn parse_settings_property(&mut self) -> Result<Option<Expression>> {
// Parse comma-separated assignment expressions
let mut expressions = Vec::new();
loop {
if let Some(assignment) = self.parse_assignment()? {
expressions.push(assignment);
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
Ok(Some(Expression::SettingsProperty(Box::new(
SettingsProperty { expressions },
))))
}
/// parse_simplified_pivot - Ported from Python _parse_simplified_pivot
/// Handles DuckDB simplified PIVOT/UNPIVOT syntax:
/// PIVOT table ON columns [IN (...)] USING agg_func [AS alias], ... [GROUP BY ...]
/// UNPIVOT table ON columns [INTO NAME col VALUE col, ...]
#[allow(unused_variables, unused_mut)]
pub fn parse_simplified_pivot(&mut self, is_unpivot: bool) -> Result<Option<Expression>> {
// Parse the source table (can be a subquery like (SELECT 1 AS col1, 2 AS col2))
let this = if self.check(TokenType::LParen) {
// Could be parenthesized subquery
self.skip(); // consume (
if self.check(TokenType::Select) || self.check(TokenType::With) {
let inner = self.parse_statement()?;
self.expect(TokenType::RParen)?;
Some(Expression::Subquery(Box::new(Subquery {
this: inner,
alias: None,
column_aliases: Vec::new(),
order_by: None,
limit: None,
offset: None,
lateral: false,
modifiers_inside: false,
trailing_comments: Vec::new(),
distribute_by: None,
sort_by: None,
cluster_by: None,
inferred_type: None,
})))
} else {
// Not a subquery, retreat and parse as expression in parens
self.current -= 1; // un-consume the (
Some(self.parse_primary()?)
}
} else {
// Parse table reference (e.g., Cities, schema.table, duckdb_functions())
Some(self.parse_primary()?)
};
// Parse ON columns
let expressions = if self.match_text_seq(&["ON"]) {
let mut on_exprs = Vec::new();
loop {
// Parse ON expression - use parse_bitwise to handle complex expressions like Country || '_' || Name
let on_expr = self.parse_bitwise()?;
if on_expr.is_none() {
break;
}
let mut expr = on_expr.unwrap();
// Check for IN clause on this column
if self.match_token(TokenType::In) {
if self.match_token(TokenType::LParen) {
let mut in_exprs = Vec::new();
loop {
if self.check(TokenType::RParen) {
break;
}
if let Some(val) = self.parse_select_or_expression()? {
in_exprs.push(val);
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
expr = Expression::In(Box::new(In {
this: expr,
expressions: in_exprs,
query: None,
not: false,
global: false,
unnest: None,
is_field: false,
}));
}
}
// Check for alias (UNPIVOT ON (jan, feb, mar) AS q1, ...)
else if self.match_token(TokenType::As) {
let alias_name = self.expect_identifier()?;
expr =
Expression::Alias(Box::new(Alias::new(expr, Identifier::new(alias_name))));
}
on_exprs.push(expr);
// Continue if comma
if !self.match_token(TokenType::Comma) {
break;
}
}
on_exprs
} else {
Vec::new()
};
// Parse INTO for UNPIVOT columns (INTO NAME col VALUE col, ...)
let into = self.parse_unpivot_columns()?;
// Parse USING clause (aggregation functions with optional aliases)
// e.g., USING SUM(Population), USING SUM(Population) AS total, MAX(Population) AS max
// e.g., USING CAST(AVG(LENGTH(function_name)) AS INT)
let using = if self.match_text_seq(&["USING"]) {
let mut using_exprs = Vec::new();
loop {
// Stop if we hit GROUP BY or end of input
if self.is_at_end() || self.check(TokenType::Group) || self.check(TokenType::RParen)
{
break;
}
// Parse the primary expression (function call, possibly with cast :: operator)
let func = self.parse_primary()?;
// Check for :: cast operator (e.g., SUM(Population)::INTEGER)
let expr = if self.match_token(TokenType::DColon) {
let data_type = self.parse_data_type()?;
Expression::Cast(Box::new(Cast {
this: func,
to: data_type,
trailing_comments: Vec::new(),
double_colon_syntax: true,
format: None,
default: None,
inferred_type: None,
}))
} else {
func
};
// Try to parse alias (AS alias)
if self.match_token(TokenType::As) {
let alias_name = self.expect_identifier()?;
using_exprs.push(Expression::Alias(Box::new(Alias::new(
expr,
Identifier::new(alias_name),
))));
} else {
using_exprs.push(expr);
}
if !self.match_token(TokenType::Comma) {
break;
}
}
using_exprs
} else {
Vec::new()
};
// Parse optional GROUP BY
let group = self.parse_group()?;
let source = this.unwrap();
Ok(Some(Expression::Pivot(Box::new(Pivot {
this: source,
expressions,
fields: Vec::new(),
using,
group: group.map(Box::new),
unpivot: is_unpivot,
into: into.map(Box::new),
alias: None,
include_nulls: None,
default_on_null: None,
with: None,
}))))
}
/// parse_slice - Parses array slice syntax [start:end:step]
/// Python: _parse_slice
/// Takes an optional 'this' expression (the start of the slice)
pub fn parse_slice(&mut self) -> Result<Option<Expression>> {
self.parse_slice_with_this(None)
}
/// Implementation of parse_slice with 'this' parameter
pub fn parse_slice_with_this(
&mut self,
this: Option<Expression>,
) -> Result<Option<Expression>> {
// Check for colon - if not found, return this as-is
if !self.match_token(TokenType::Colon) {
return Ok(this);
}
// Parse end expression
// Handle special case: -: which means -1 (from end)
let end = if self.check(TokenType::Dash) && self.check_next(TokenType::Colon) {
// -: pattern means -1 (from end)
self.skip(); // consume dash
Some(Expression::Neg(Box::new(UnaryOp::new(
Expression::Literal(Box::new(Literal::Number("1".to_string()))),
))))
} else if self.check(TokenType::Colon) || self.check(TokenType::RBracket) {
// Empty end like [start::step] or [start:]
None
} else {
Some(self.parse_unary()?)
};
// Parse optional step expression after second colon
let step = if self.match_token(TokenType::Colon) {
if self.check(TokenType::RBracket) {
None
} else {
Some(self.parse_unary()?)
}
} else {
None
};
Ok(Some(Expression::Slice(Box::new(Slice {
this: this.map(Box::new),
expression: end.map(Box::new),
step: step.map(Box::new),
}))))
}
/// Parse a slice element (start, end, or step in array slicing)
/// This uses parse_unary to avoid interpreting : as parameter syntax
/// Returns None for empty elements (e.g., [:] or [::step])
fn parse_slice_element(&mut self) -> Result<Option<Expression>> {
// Check for empty element (next is : or ])
if self.check(TokenType::Colon) || self.check(TokenType::RBracket) {
return Ok(None);
}
// Handle special case: -: means -1 (from the end)
// This is used in slicing like [:-:-1] where the first -: means end=-1
if self.check(TokenType::Dash) && self.check_next(TokenType::Colon) {
self.skip(); // consume dash
// Don't consume the colon - let the caller handle it
return Ok(Some(Expression::Neg(Box::new(UnaryOp::new(
Expression::Literal(Box::new(Literal::Number("1".to_string()))),
)))));
}
// Parse full expression (including binary ops like y - 1) but stop at : or ]
let expr = self.parse_disjunction()?;
Ok(expr)
}
/// parse_sort - Ported from Python _parse_sort
/// Parses SORT BY clause (Hive/Spark)
#[allow(unused_variables, unused_mut)]
pub fn parse_sort(&mut self) -> Result<Option<Expression>> {
// Check for SORT BY token
if !self.match_keywords(&[TokenType::Sort, TokenType::By]) {
return Ok(None);
}
// Parse comma-separated ordered expressions
let mut expressions = Vec::new();
loop {
if let Some(ordered) = self.parse_ordered_item()? {
expressions.push(ordered);
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
Ok(Some(Expression::SortBy(Box::new(SortBy { expressions }))))
}
/// parse_cluster_by_clause - Parses CLUSTER BY clause (Hive/Spark)
#[allow(unused_variables, unused_mut)]
pub fn parse_cluster_by_clause(&mut self) -> Result<Option<Expression>> {
if !self.match_keywords(&[TokenType::Cluster, TokenType::By]) {
return Ok(None);
}
// Parse comma-separated ordered expressions
let mut expressions: Vec<Ordered> = Vec::new();
loop {
if let Some(ordered) = self.parse_ordered_item()? {
expressions.push(ordered);
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
Ok(Some(Expression::ClusterBy(Box::new(ClusterBy {
expressions,
}))))
}
/// parse_distribute_by_clause - Parses DISTRIBUTE BY clause (Hive/Spark)
#[allow(unused_variables, unused_mut)]
pub fn parse_distribute_by_clause(&mut self) -> Result<Option<Expression>> {
if !self.match_keywords(&[TokenType::Distribute, TokenType::By]) {
return Ok(None);
}
let expressions = self.parse_expression_list()?;
Ok(Some(Expression::DistributeBy(Box::new(DistributeBy {
expressions,
}))))
}
/// parse_sortkey - Redshift/PostgreSQL SORTKEY property
/// Parses SORTKEY(column1, column2, ...) with optional COMPOUND modifier
#[allow(unused_variables, unused_mut)]
pub fn parse_sortkey(&mut self) -> Result<Option<Expression>> {
// Parse the wrapped list of columns/identifiers
let this = if self.match_token(TokenType::LParen) {
let mut columns = Vec::new();
loop {
if let Some(id) = self.parse_id_var()? {
columns.push(id);
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.match_token(TokenType::RParen);
if columns.is_empty() {
return Ok(None);
}
if columns.len() == 1 {
columns.into_iter().next().unwrap()
} else {
Expression::Tuple(Box::new(Tuple {
expressions: columns,
}))
}
} else {
// Single column without parens
if let Some(id) = self.parse_id_var()? {
id
} else {
return Ok(None);
}
};
Ok(Some(Expression::SortKeyProperty(Box::new(
SortKeyProperty {
this: Box::new(this),
compound: None, // compound is set by caller if COMPOUND keyword was matched
},
))))
}
/// parse_star - Parse STAR (*) token with optional EXCEPT/REPLACE/RENAME
/// Python: if self._match(TokenType.STAR): return self._parse_star_ops()
pub fn parse_star(&mut self) -> Result<Option<Expression>> {
if !self.match_token(TokenType::Star) {
return Ok(None);
}
// Parse optional EXCEPT/EXCLUDE columns
let except = self.parse_star_except()?;
// Parse optional REPLACE expressions
let replace = self.parse_star_replace()?;
// Parse optional RENAME columns
let rename = self.parse_star_rename()?;
Ok(Some(Expression::Star(Star {
table: None,
except,
replace,
rename,
trailing_comments: Vec::new(),
span: None,
})))
}
/// try_parse_identifier - Try to parse an identifier, returning None if not found
fn try_parse_identifier(&mut self) -> Option<Identifier> {
if self.is_identifier_token() {
let token = self.advance();
let quoted = token.token_type == TokenType::QuotedIdentifier;
Some(Identifier {
name: token.text,
quoted,
trailing_comments: Vec::new(),
span: None,
})
} else {
None
}
}
/// parse_star_except - Parse EXCEPT/EXCLUDE clause for Star
/// Example: * EXCEPT (col1, col2)
fn parse_star_except(&mut self) -> Result<Option<Vec<Identifier>>> {
if !self.match_texts(&["EXCEPT", "EXCLUDE"]) {
return Ok(None);
}
// Parse (col1, col2, ...)
if self.match_token(TokenType::LParen) {
let mut columns = Vec::new();
loop {
if let Some(id) = self.try_parse_identifier() {
columns.push(id);
} else if self.is_safe_keyword_as_identifier() {
// ClickHouse: allow keywords like 'key' as column names in EXCEPT
let token = self.advance();
columns.push(Identifier {
name: token.text,
quoted: false,
trailing_comments: Vec::new(),
span: None,
});
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.match_token(TokenType::RParen);
return Ok(Some(columns));
}
// Single column without parens
if let Some(id) = self.try_parse_identifier() {
return Ok(Some(vec![id]));
}
Ok(None)
}
/// parse_star_replace - Parse REPLACE clause for Star
/// Example: * REPLACE (col1 AS alias1, col2 AS alias2)
fn parse_star_replace(&mut self) -> Result<Option<Vec<Alias>>> {
if !self.match_texts(&["REPLACE"]) {
return Ok(None);
}
if self.match_token(TokenType::LParen) {
let mut aliases = Vec::new();
loop {
// Parse expression AS alias
if let Some(expr) = self.parse_disjunction()? {
let alias_name = if self.match_token(TokenType::As) {
self.try_parse_identifier()
} else {
None
};
aliases.push(Alias {
this: expr,
alias: alias_name.unwrap_or_else(|| Identifier::new("")),
column_aliases: Vec::new(),
pre_alias_comments: Vec::new(),
trailing_comments: Vec::new(),
inferred_type: None,
});
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.match_token(TokenType::RParen);
return Ok(Some(aliases));
}
Ok(None)
}
/// parse_star_rename - Parse RENAME clause for Star
/// Example: * RENAME (old_col AS new_col, ...)
fn parse_star_rename(&mut self) -> Result<Option<Vec<(Identifier, Identifier)>>> {
if !self.match_texts(&["RENAME"]) {
return Ok(None);
}
if self.match_token(TokenType::LParen) {
let mut renames = Vec::new();
loop {
// Parse old_name AS new_name
if let Some(old_name) = self.try_parse_identifier() {
if self.match_token(TokenType::As) {
if let Some(new_name) = self.try_parse_identifier() {
renames.push((old_name, new_name));
}
}
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.match_token(TokenType::RParen);
return Ok(Some(renames));
}
Ok(None)
}
/// parse_star_op - Helper to parse EXCEPT/REPLACE/RENAME with keywords
/// Returns list of expressions if keywords match
pub fn parse_star_op(&mut self, keywords: &[&str]) -> Result<Option<Vec<Expression>>> {
if !self.match_texts(keywords) {
return Ok(None);
}
// If followed by paren, parse wrapped CSV
if self.match_token(TokenType::LParen) {
let expressions = self.parse_expression_list()?;
self.match_token(TokenType::RParen);
return Ok(Some(expressions));
}
// Otherwise parse single aliased expression
if let Some(expr) = self.parse_disjunction()? {
// Try to parse explicit alias
let result = if self.match_token(TokenType::As) {
if let Some(alias_name) = self.try_parse_identifier() {
Expression::Alias(Box::new(Alias {
this: expr,
alias: alias_name,
column_aliases: Vec::new(),
pre_alias_comments: Vec::new(),
trailing_comments: Vec::new(),
inferred_type: None,
}))
} else {
expr
}
} else {
expr
};
return Ok(Some(vec![result]));
}
Ok(None)
}
/// parse_star_ops - Implemented from Python _parse_star_ops
/// Creates a Star expression with EXCEPT/REPLACE/RENAME clauses
/// Also handles * COLUMNS(pattern) syntax for DuckDB column selection
pub fn parse_star_ops(&mut self) -> Result<Option<Expression>> {
// Handle * COLUMNS(pattern) function (DuckDB)
// This parses patterns like: * COLUMNS(c ILIKE '%suffix')
if self.match_text_seq(&["COLUMNS"]) && self.check(TokenType::LParen) {
// Parse the COLUMNS function arguments
self.expect(TokenType::LParen)?;
let this = self.parse_expression()?;
self.expect(TokenType::RParen)?;
// Return a Columns expression with unpack=true (since it came from * COLUMNS())
return Ok(Some(Expression::Columns(Box::new(Columns {
this: Box::new(this),
unpack: Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
}))),
}))));
}
// Parse EXCEPT/EXCLUDE
let except_exprs = self.parse_star_op(&["EXCEPT", "EXCLUDE"])?;
let except = except_exprs.map(|exprs| {
exprs
.into_iter()
.filter_map(|e| match e {
Expression::Identifier(id) => Some(id),
Expression::Column(col) => Some(col.name),
_ => None,
})
.collect()
});
// Parse REPLACE
let replace_exprs = self.parse_star_op(&["REPLACE"])?;
let replace = replace_exprs.map(|exprs| {
exprs
.into_iter()
.filter_map(|e| match e {
Expression::Alias(a) => Some(*a),
_ => None,
})
.collect()
});
// Parse RENAME
let _rename_exprs = self.parse_star_op(&["RENAME"])?;
let rename: Option<Vec<(Identifier, Identifier)>> = None; // Complex to extract from expressions
Ok(Some(Expression::Star(Star {
table: None,
except,
replace,
rename,
trailing_comments: Vec::new(),
span: None,
})))
}
/// parse_stored - Implemented from Python _parse_stored
#[allow(unused_variables, unused_mut)]
pub fn parse_stored(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["BY"]) {
return Ok(Some(Expression::InputOutputFormat(Box::new(
InputOutputFormat {
input_format: None,
output_format: None,
},
))));
}
if self.match_text_seq(&["INPUTFORMAT"]) {
// Matched: INPUTFORMAT
return Ok(None);
}
Ok(None)
}
/// parse_stream - Implemented from Python _parse_stream
#[allow(unused_variables, unused_mut)]
pub fn parse_stream(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["STREAM"]) {
// Matched: STREAM
return Ok(None);
}
Ok(None)
}
/// parse_string - Parse string literal
/// Python: if self._match_set(self.STRING_PARSERS): return STRING_PARSERS[token_type](...)
pub fn parse_string(&mut self) -> Result<Option<Expression>> {
// Regular string literal
if self.match_token(TokenType::String) {
let text = self.previous().text.clone();
return Ok(Some(Expression::Literal(Box::new(Literal::String(text)))));
}
// National string (N'...')
if self.match_token(TokenType::NationalString) {
let text = self.previous().text.clone();
return Ok(Some(Expression::Literal(Box::new(
Literal::NationalString(text),
))));
}
// Raw string (r"..." or r'...')
if self.match_token(TokenType::RawString) {
let text = self.previous().text.clone();
return Ok(Some(Expression::Literal(Box::new(Literal::RawString(
text,
)))));
}
// Heredoc string
if self.match_token(TokenType::HeredocString) {
let text = self.previous().text.clone();
return Ok(Some(Expression::Literal(Box::new(Literal::String(text)))));
}
// Hex string (X'...' or 0x...)
if self.match_token(TokenType::HexString) {
let text = self.previous().text.clone();
return Ok(Some(Expression::Literal(Box::new(Literal::HexString(
text,
)))));
}
// Bit string (B'...')
if self.match_token(TokenType::BitString) {
let text = self.previous().text.clone();
return Ok(Some(Expression::Literal(Box::new(Literal::BitString(
text,
)))));
}
// Byte string (b"..." - BigQuery style)
if self.match_token(TokenType::ByteString) {
let text = self.previous().text.clone();
return Ok(Some(Expression::Literal(Box::new(Literal::ByteString(
text,
)))));
}
Ok(None)
}
/// parse_string_agg - Parses STRING_AGG function arguments
/// Python: parser.py:6849-6899
/// Handles DISTINCT, separator, ORDER BY, ON OVERFLOW, WITHIN GROUP
#[allow(unused_variables, unused_mut)]
pub fn parse_string_agg(&mut self) -> Result<Option<Expression>> {
// Check for DISTINCT
let distinct = self.match_token(TokenType::Distinct);
// Parse main expression
let this = self.parse_disjunction()?;
if this.is_none() {
return Ok(None);
}
// Parse optional separator
let separator = if self.match_token(TokenType::Comma) {
self.parse_disjunction()?
} else {
None
};
// Parse ON OVERFLOW clause
let on_overflow = if self.match_text_seq(&["ON", "OVERFLOW"]) {
if self.match_text_seq(&["ERROR"]) {
Some(Box::new(Expression::Var(Box::new(Var {
this: "ERROR".to_string(),
}))))
} else {
self.match_text_seq(&["TRUNCATE"]);
let truncate_str = self.parse_string()?;
let with_count = if self.match_text_seq(&["WITH", "COUNT"]) {
Some(true)
} else if self.match_text_seq(&["WITHOUT", "COUNT"]) {
Some(false)
} else {
None
};
Some(Box::new(Expression::OverflowTruncateBehavior(Box::new(
OverflowTruncateBehavior {
this: truncate_str.map(Box::new),
with_count: with_count
.map(|c| Box::new(Expression::Boolean(BooleanLiteral { value: c }))),
},
))))
}
} else {
None
};
// Parse ORDER BY or WITHIN GROUP
let order_by = if self.match_token(TokenType::OrderBy) {
Some(self.parse_expression_list()?)
} else if self.match_text_seq(&["WITHIN", "GROUP"]) {
self.match_token(TokenType::LParen);
let order = self.parse_order()?;
self.match_token(TokenType::RParen);
order.map(|o| vec![o])
} else {
None
};
// Return as GroupConcat (which is the canonical form for STRING_AGG)
Ok(Some(Expression::GroupConcat(Box::new(GroupConcatFunc {
this: this.unwrap(),
separator: separator,
order_by: None,
distinct,
filter: None,
limit: None,
inferred_type: None,
}))))
}
/// parse_string_as_identifier - Parses a string literal as a quoted identifier
/// Python: _parse_string_as_identifier
/// Used for cases where a string can be used as an identifier (e.g., MySQL)
pub fn parse_string_as_identifier(&mut self) -> Result<Option<Expression>> {
if self.match_token(TokenType::String) {
let text = self.previous().text.clone();
// Remove quotes if present
let name = if text.starts_with('\'') && text.ends_with('\'') && text.len() >= 2 {
text[1..text.len() - 1].to_string()
} else if text.starts_with('"') && text.ends_with('"') && text.len() >= 2 {
text[1..text.len() - 1].to_string()
} else {
text
};
Ok(Some(Expression::Identifier(Identifier {
name,
quoted: true,
trailing_comments: Vec::new(),
span: None,
})))
} else {
Ok(None)
}
}
/// parse_struct_types - Delegates to parse_types
#[allow(unused_variables, unused_mut)]
pub fn parse_struct_types(&mut self) -> Result<Option<Expression>> {
self.parse_types()
}
/// parse_subquery - Ported from Python _parse_subquery
/// Parses a parenthesized SELECT as subquery: (SELECT ...)
#[allow(unused_variables, unused_mut)]
pub fn parse_subquery(&mut self) -> Result<Option<Expression>> {
// Check for opening paren
if !self.match_token(TokenType::LParen) {
return Ok(None);
}
// Check if it's a SELECT or WITH statement
if !self.check(TokenType::Select) && !self.check(TokenType::With) {
// Not a subquery, retreat
self.current -= 1;
return Ok(None);
}
// Parse the query
let query = self.parse_statement()?;
self.expect(TokenType::RParen)?;
// Parse optional table alias
let alias = self.parse_table_alias_if_present()?;
Ok(Some(Expression::Subquery(Box::new(Subquery {
this: query,
alias,
column_aliases: Vec::new(),
order_by: None,
limit: None,
offset: None,
lateral: false,
modifiers_inside: false,
trailing_comments: Vec::new(),
distribute_by: None,
sort_by: None,
cluster_by: None,
inferred_type: None,
}))))
}
/// Helper to parse table alias if present
fn parse_table_alias_if_present(&mut self) -> Result<Option<Identifier>> {
// Check for AS keyword
let explicit_as = self.match_token(TokenType::As);
// ClickHouse: keywords can be used as table aliases when AS is explicit
let is_keyword_alias = explicit_as
&& matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
)
&& self.peek().token_type.is_keyword();
// Try to parse identifier
if self.check(TokenType::Identifier)
|| self.check(TokenType::QuotedIdentifier)
|| self.check(TokenType::Var)
|| is_keyword_alias
{
if is_keyword_alias
&& !self.check(TokenType::Identifier)
&& !self.check(TokenType::QuotedIdentifier)
&& !self.check(TokenType::Var)
{
let token = self.advance();
return Ok(Some(Identifier::new(token.text)));
}
let token = self.advance();
let mut alias = Identifier::new(token.text);
if token.token_type == TokenType::QuotedIdentifier {
alias.quoted = true;
}
return Ok(Some(alias));
} else if explicit_as {
// AS was present but no identifier follows - this is an error
return Err(self.parse_error("Expected identifier after AS"));
}
Ok(None)
}
/// parse_substring - Ported from Python _parse_substring
/// Parses SUBSTRING function with two syntax variants:
/// 1. Standard SQL: SUBSTRING(str FROM start [FOR length])
/// 2. Function style: SUBSTRING(str, start, length)
#[allow(unused_variables, unused_mut)]
pub fn parse_substring(&mut self) -> Result<Option<Expression>> {
// Parse initial comma-separated arguments
let mut args: Vec<Expression> = Vec::new();
// Parse first argument (the string)
match self.parse_bitwise() {
Ok(Some(expr)) => {
let expr = self.try_clickhouse_func_arg_alias(expr);
args.push(expr);
}
Ok(None) => return Ok(None),
Err(e) => return Err(e),
}
// Check for comma-separated additional arguments
while self.match_token(TokenType::Comma) {
match self.parse_bitwise() {
Ok(Some(expr)) => {
let expr = self.try_clickhouse_func_arg_alias(expr);
args.push(expr);
}
Ok(None) => break,
Err(e) => return Err(e),
}
}
// Check for FROM/FOR syntax (SQL standard)
let mut start: Option<Expression> = None;
let mut length: Option<Expression> = None;
let mut from_for_syntax = false;
loop {
if self.match_token(TokenType::From) {
from_for_syntax = true;
match self.parse_bitwise() {
Ok(Some(expr)) => {
let expr = self.try_clickhouse_func_arg_alias(expr);
start = Some(expr);
}
Ok(None) => {}
Err(e) => return Err(e),
}
} else if self.match_token(TokenType::For) {
from_for_syntax = true;
// If no start specified yet, default to 1
if start.is_none() {
start = Some(Expression::Literal(Box::new(Literal::Number(
"1".to_string(),
))));
}
match self.parse_bitwise() {
Ok(Some(expr)) => {
let expr = self.try_clickhouse_func_arg_alias(expr);
length = Some(expr);
}
Ok(None) => {}
Err(e) => return Err(e),
}
} else {
break;
}
}
// Build the substring expression
if args.is_empty() {
return Ok(None);
}
let this = args.remove(0);
// Determine start and length
let final_start = if let Some(s) = start {
s
} else if !args.is_empty() {
args.remove(0)
} else {
Expression::Literal(Box::new(Literal::Number("1".to_string())))
};
let final_length = if length.is_some() {
length
} else if !args.is_empty() {
Some(args.remove(0))
} else {
None
};
Ok(Some(Expression::Substring(Box::new(SubstringFunc {
this,
start: final_start,
length: final_length,
from_for_syntax,
}))))
}
/// parse_system_versioning_property - Implemented from Python _parse_system_versioning_property
/// Calls: parse_table_parts, parse_retention_period
#[allow(unused_variables, unused_mut)]
pub fn parse_system_versioning_property(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["OFF"]) {
return Ok(Some(Expression::WithSystemVersioningProperty(Box::new(
WithSystemVersioningProperty {
on: None,
this: None,
data_consistency: None,
retention_period: None,
with_: None,
},
))));
}
if self.match_text_seq(&["HISTORY_TABLE", "="]) {
// Matched: HISTORY_TABLE =
return Ok(None);
}
if self.match_text_seq(&["DATA_CONSISTENCY_CHECK", "="]) {
// Matched: DATA_CONSISTENCY_CHECK =
return Ok(None);
}
Ok(None)
}
/// Parse PostgreSQL ROWS FROM syntax:
/// ROWS FROM (func1(args) AS alias1(col1 type1, col2 type2), func2(...) AS alias2(...)) [WITH ORDINALITY] [AS outer_alias(...)]
fn parse_rows_from(&mut self) -> Result<Expression> {
// Expect opening paren
self.expect(TokenType::LParen)?;
let mut expressions = Vec::new();
loop {
// Parse each function expression inside ROWS FROM
// Each element is: func_name(args) [AS alias(col1 type1, col2 type2, ...)]
let func_expr = self.parse_rows_from_function()?;
expressions.push(func_expr);
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
// Check for WITH ORDINALITY
let ordinality =
if self.match_token(TokenType::With) && self.match_token(TokenType::Ordinality) {
true
} else {
false
};
// Check for outer alias: AS alias(col1 type1, col2 type2, ...)
let alias = if self.match_token(TokenType::As) {
Some(Box::new(self.parse_rows_from_alias()?))
} else {
None
};
Ok(Expression::RowsFrom(Box::new(RowsFrom {
expressions,
ordinality,
alias,
})))
}
/// Parse a single function in ROWS FROM: func_name(args) [AS alias(col1 type1, ...)]
fn parse_rows_from_function(&mut self) -> Result<Expression> {
// Parse function name
let func_name = self.expect_identifier_or_keyword()?;
// Parse function arguments
self.expect(TokenType::LParen)?;
let args = if self.check(TokenType::RParen) {
Vec::new()
} else {
self.parse_function_arguments()?
};
self.expect(TokenType::RParen)?;
let func_expr = Expression::Function(Box::new(Function {
name: func_name,
args,
distinct: false,
trailing_comments: Vec::new(),
use_bracket_syntax: false,
no_parens: false,
quoted: false,
span: None,
inferred_type: None,
}));
// Check for AS alias(col1 type1, col2 type2, ...)
// Return a Tuple(function, TableAlias) so the generator can output: FUNC() AS alias(col type)
if self.match_token(TokenType::As) {
let alias_expr = self.parse_rows_from_alias()?;
Ok(Expression::Tuple(Box::new(Tuple {
expressions: vec![func_expr, alias_expr],
})))
} else {
Ok(func_expr)
}
}
/// Parse ROWS FROM alias with typed columns: alias_name(col1 type1, col2 type2, ...)
fn parse_rows_from_alias(&mut self) -> Result<Expression> {
let alias_name = self.expect_identifier_or_keyword_with_quoted()?;
// Check for column definitions: (col1 type1, col2 type2, ...)
let columns = if self.match_token(TokenType::LParen) {
let mut cols = Vec::new();
loop {
if self.check(TokenType::RParen) {
break;
}
// Parse column name (can be quoted)
let col_name = self.expect_identifier_or_keyword_with_quoted()?;
// Parse column type
let col_type = self.parse_data_type()?;
// Create ColumnDef expression, preserving the quoted status
let mut col_def = ColumnDef::new(col_name.name.clone(), col_type);
col_def.name = col_name; // Preserve the full identifier with quoted flag
cols.push(Expression::ColumnDef(Box::new(col_def)));
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
cols
} else {
Vec::new()
};
Ok(Expression::TableAlias(Box::new(TableAlias {
this: Some(Box::new(Expression::Identifier(alias_name))),
columns,
})))
}
/// parse_table - Implemented from Python _parse_table
/// Calls: parse_table_hints, parse_unnest, parse_partition
#[allow(unused_variables, unused_mut)]
pub fn parse_table(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["ROWS", "FROM"]) {
// ROWS FROM is handled by parse_rows_from() in parse_table_expression()
return Ok(None);
}
if self.match_text_seq(&["*"]) {
// Matched: *
return Ok(None);
}
if self.match_text_seq(&["NOT", "INDEXED"]) {
// Matched: NOT INDEXED
return Ok(None);
}
Ok(None)
}
/// parse_table_alias - Ported from Python _parse_table_alias
/// Parses table alias: AS alias [(col1, col2, ...)]
#[allow(unused_variables, unused_mut)]
pub fn parse_table_alias(&mut self) -> Result<Option<Expression>> {
// Check for AS keyword (optional in most dialects)
let has_as = self.match_token(TokenType::As);
// Handle AS (col1, col2) - no alias name, just column aliases
if has_as && self.check(TokenType::LParen) {
// Parse (col1, col2, ...)
self.skip(); // consume LParen
let mut cols = Vec::new();
loop {
if self.check(TokenType::RParen) {
break;
}
if let Ok(Some(col)) = self.parse_id_var() {
cols.push(col);
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
return Ok(Some(Expression::TableAlias(Box::new(TableAlias {
this: None,
columns: cols,
}))));
}
// Parse the alias identifier
// ClickHouse: keywords can be used as table aliases (e.g., AS select, AS from)
let is_keyword_alias = has_as
&& matches!(
self.config.dialect,
Some(crate::dialects::DialectType::ClickHouse)
)
&& self.peek().token_type.is_keyword();
if !self.check(TokenType::Identifier)
&& !self.check(TokenType::QuotedIdentifier)
&& !self.check(TokenType::Var)
&& !is_keyword_alias
{
if has_as {
return Err(self.parse_error("Expected identifier after AS"));
}
return Ok(None);
}
let alias_token = self.advance();
let is_quoted = alias_token.token_type == TokenType::QuotedIdentifier;
let mut alias_ident = Identifier::new(alias_token.text.clone());
if is_quoted {
alias_ident.quoted = true;
}
let alias = Expression::Identifier(alias_ident);
// Check for column list: (col1, col2, ...)
let columns = if self.match_token(TokenType::LParen) {
let mut cols = Vec::new();
loop {
if self.check(TokenType::RParen) {
break;
}
if let Ok(Some(col)) = self.parse_id_var() {
cols.push(col);
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?;
cols
} else {
Vec::new()
};
Ok(Some(Expression::TableAlias(Box::new(TableAlias {
this: Some(Box::new(alias)),
columns,
}))))
}
/// parse_table_hints - Ported from Python _parse_table_hints
/// Parses table hints (SQL Server WITH (...) or MySQL USE/IGNORE/FORCE INDEX)
#[allow(unused_variables, unused_mut)]
pub fn parse_table_hints(&mut self) -> Result<Option<Expression>> {
let mut hints = Vec::new();
// SQL Server style: WITH (hint1, hint2, ...)
if self.match_text_seq(&["WITH"]) && self.match_token(TokenType::LParen) {
let mut expressions = Vec::new();
loop {
// Parse function or variable as hint
if let Some(func) = self.parse_function()? {
expressions.push(func);
} else if let Some(var) = self.parse_var()? {
expressions.push(var);
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.match_token(TokenType::RParen);
if !expressions.is_empty() {
hints.push(Expression::WithTableHint(Box::new(WithTableHint {
expressions,
})));
}
} else {
// MySQL style: USE INDEX, IGNORE INDEX, FORCE INDEX
while self.match_texts(&["USE", "IGNORE", "FORCE"]) {
let hint_type = self.previous().text.to_ascii_uppercase();
// Match INDEX or KEY
let _ = self.match_texts(&["INDEX", "KEY"]);
// Check for optional FOR clause: FOR JOIN, FOR ORDER BY, FOR GROUP BY
let target = if self.match_text_seq(&["FOR"]) {
let target_token = self.advance();
let target_text = target_token.text.to_ascii_uppercase();
// For ORDER BY and GROUP BY, combine into a single target name
let full_target = if (target_text == "ORDER" || target_text == "GROUP")
&& self.check(TokenType::By)
{
self.skip(); // consume BY
format!("{} BY", target_text)
} else {
target_text
};
Some(Box::new(Expression::Identifier(Identifier {
name: full_target,
quoted: false,
trailing_comments: Vec::new(),
span: None,
})))
} else {
None
};
// Parse wrapped identifiers (index names — can include keywords like PRIMARY)
let expressions = if self.match_token(TokenType::LParen) {
let mut ids = Vec::new();
loop {
if self.check(TokenType::RParen) {
break;
}
if let Some(id) = self.parse_id_var()? {
ids.push(id);
} else if self.is_safe_keyword_as_identifier()
|| self.check(TokenType::PrimaryKey)
{
// Accept keywords as index names (e.g., PRIMARY)
let name = self.advance().text.clone();
ids.push(Expression::Identifier(Identifier::new(name)));
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.match_token(TokenType::RParen);
ids
} else {
Vec::new()
};
hints.push(Expression::IndexTableHint(Box::new(IndexTableHint {
this: Box::new(Expression::Identifier(Identifier {
name: hint_type,
quoted: false,
trailing_comments: Vec::new(),
span: None,
})),
expressions,
target,
})));
}
}
if hints.is_empty() {
return Ok(None);
}
// Return as a Tuple containing hints
Ok(Some(Expression::Tuple(Box::new(Tuple {
expressions: hints,
}))))
}
/// Parse TSQL TRUNCATE table hints: WITH (PARTITIONS(1, 2 TO 5, 10 TO 20, 84))
/// Unlike regular table hints, PARTITIONS arguments can contain TO ranges.
pub fn parse_truncate_table_hints(&mut self) -> Result<Option<Expression>> {
if !self.match_text_seq(&["WITH"]) || !self.match_token(TokenType::LParen) {
return Ok(None);
}
let mut hints = Vec::new();
// Check for PARTITIONS specifically
if self.check_identifier("PARTITIONS") {
self.skip(); // consume PARTITIONS
self.expect(TokenType::LParen)?;
// Parse partition ranges: 1, 2 TO 5, 10 TO 20, 84
let mut parts = Vec::new();
loop {
if self.check(TokenType::RParen) {
break;
}
let low = self.parse_primary()?;
if self.match_text_seq(&["TO"]) {
let high = self.parse_primary()?;
parts.push(Expression::PartitionRange(Box::new(PartitionRange {
this: Box::new(low),
expression: Some(Box::new(high)),
expressions: Vec::new(),
})));
} else {
parts.push(low);
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.expect(TokenType::RParen)?; // close PARTITIONS(...)
// Create an Anonymous function for PARTITIONS(...)
hints.push(Expression::Anonymous(Box::new(Anonymous {
this: Box::new(Expression::Identifier(Identifier {
name: "PARTITIONS".to_string(),
quoted: false,
trailing_comments: Vec::new(),
span: None,
})),
expressions: parts,
})));
} else {
// Fall back to regular hint parsing (function or var)
loop {
if let Some(func) = self.parse_function()? {
hints.push(func);
} else if let Some(var) = self.parse_var()? {
hints.push(var);
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
}
self.expect(TokenType::RParen)?; // close WITH(...)
if hints.is_empty() {
return Ok(None);
}
// Wrap in WithTableHint then Tuple (same as parse_table_hints)
let hint = Expression::WithTableHint(Box::new(WithTableHint { expressions: hints }));
Ok(Some(Expression::Tuple(Box::new(Tuple {
expressions: vec![hint],
}))))
}
/// parse_table_part - Parse a single part of a table reference
/// Tries: identifier, string as identifier, placeholder
#[allow(unused_variables, unused_mut)]
pub fn parse_table_part(&mut self) -> Result<Option<Expression>> {
// Try to parse an identifier
if let Some(id) = self.parse_id_var()? {
return Ok(Some(id));
}
// Try to parse a string as identifier
if let Some(str_id) = self.parse_string_as_identifier()? {
return Ok(Some(str_id));
}
// Try to parse a placeholder
if let Some(placeholder) = self.parse_placeholder()? {
return Ok(Some(placeholder));
}
// Accept keywords as identifiers in table part context (e.g., db.cluster where "cluster" is a keyword)
// This mirrors Python sqlglot's ID_VAR_TOKENS which includes many keyword types
if self.check_keyword_as_identifier() {
let text = self.peek().text.clone();
self.skip();
return Ok(Some(Expression::Identifier(Identifier {
name: text,
quoted: false,
trailing_comments: Vec::new(),
span: None,
})));
}
Ok(None)
}
/// Check if the current token is a keyword that can be used as an identifier in certain contexts
/// This includes many SQL keywords like CLUSTER, TABLE, INDEX, etc.
fn check_keyword_as_identifier(&self) -> bool {
if self.is_at_end() {
return false;
}
let token_type = self.peek().token_type;
// Keywords that can be used as identifiers (similar to Python's ID_VAR_TOKENS)
matches!(
token_type,
TokenType::Cluster
| TokenType::Table
| TokenType::Index
| TokenType::View
| TokenType::Database
| TokenType::Schema
| TokenType::Column
| TokenType::Function
| TokenType::Procedure
| TokenType::Constraint
| TokenType::Sequence
| TokenType::Type
| TokenType::Partition
| TokenType::Comment
| TokenType::Cache
| TokenType::Commit
| TokenType::Begin
| TokenType::End
| TokenType::Set
| TokenType::Show
| TokenType::Describe
| TokenType::Use
| TokenType::Execute
| TokenType::Delete
| TokenType::Update
| TokenType::Merge
| TokenType::Load
| TokenType::Copy
| TokenType::Truncate
| TokenType::Replace
| TokenType::Refresh
| TokenType::Rename
| TokenType::Filter
| TokenType::Format
| TokenType::First
| TokenType::Next
| TokenType::Last
| TokenType::Keep
| TokenType::Match
| TokenType::Over
| TokenType::Range
| TokenType::Rows
| TokenType::Row
| TokenType::Offset
| TokenType::Limit
| TokenType::Top
| TokenType::Cube
| TokenType::Rollup
| TokenType::Pivot
| TokenType::Unpivot
| TokenType::Window
| TokenType::Recursive
| TokenType::Unique
| TokenType::Temporary
| TokenType::Volatile
| TokenType::References
| TokenType::Natural
| TokenType::Left
| TokenType::Right
| TokenType::Full
| TokenType::Semi
| TokenType::Anti
| TokenType::Apply
| TokenType::All
| TokenType::Asc
| TokenType::Desc
| TokenType::Analyze
)
}
/// parse_table_parts - Parse catalog.schema.table or schema.table or table
/// Returns a Table expression with all parts
#[allow(unused_variables, unused_mut)]
pub fn parse_table_parts(&mut self) -> Result<Option<Expression>> {
// Parse the first part
let first = self.parse_table_part()?;
if first.is_none() {
return Ok(None);
}
let mut parts = vec![first.unwrap()];
// Parse additional dot-separated parts
while self.match_token(TokenType::Dot) {
if let Some(part) = self.parse_table_part()? {
parts.push(part);
} else {
break;
}
}
// Convert parts to Table expression
// Last part is table name, second-to-last is schema, third-to-last is catalog
let (catalog, schema, name) = match parts.len() {
1 => (None, None, parts.pop().unwrap()),
2 => {
let table = parts.pop().unwrap();
let schema = parts.pop().unwrap();
(None, Some(schema), table)
}
_ => {
let table = parts.pop().unwrap();
let schema = parts.pop().unwrap();
let catalog = parts.pop();
(catalog, Some(schema), table)
}
};
// Extract identifier from Expression
let name_ident = match name {
Expression::Identifier(id) => id,
_ => Identifier::new(String::new()),
};
let schema_ident = schema.map(|s| match s {
Expression::Identifier(id) => id,
_ => Identifier::new(String::new()),
});
let catalog_ident = catalog.map(|c| match c {
Expression::Identifier(id) => id,
_ => Identifier::new(String::new()),
});
Ok(Some(Expression::boxed_table(TableRef {
name: name_ident,
schema: schema_ident,
catalog: catalog_ident,
alias: None,
alias_explicit_as: false,
column_aliases: Vec::new(),
leading_comments: Vec::new(),
trailing_comments: Vec::new(),
when: None,
only: false,
final_: false,
table_sample: None,
hints: Vec::new(),
system_time: None,
partitions: Vec::new(),
identifier_func: None,
changes: None,
version: None,
span: None,
})))
}
/// parse_table_sample - Implemented from Python _parse_table_sample
/// Calls: parse_number, parse_factor, parse_placeholder
#[allow(unused_variables, unused_mut)]
pub fn parse_table_sample(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["USING", "SAMPLE"]) {
return Ok(Some(Expression::TableSample(Box::new(TableSample {
this: None,
sample: None,
expressions: Vec::new(),
method: None,
bucket_numerator: None,
bucket_denominator: None,
bucket_field: None,
percent: None,
rows: None,
size: None,
seed: None,
}))));
}
if self.match_text_seq(&["BUCKET"]) {
// Matched: BUCKET
return Ok(None);
}
if self.match_text_seq(&["OUT", "OF"]) {
// Matched: OUT OF
return Ok(None);
}
if self.match_texts(&["SEED", "REPEATABLE"]) {
// Matched one of: SEED, REPEATABLE
return Ok(None);
}
Ok(None)
}
/// parse_term - Parses addition/subtraction expressions (+ - operators)
/// Python: _parse_term
/// Delegates to the existing parse_addition in the operator precedence chain
pub fn parse_term(&mut self) -> Result<Option<Expression>> {
// Delegate to the existing addition parsing
match self.parse_addition() {
Ok(expr) => Ok(Some(expr)),
Err(_) => Ok(None),
}
}
/// parse_to_table - ClickHouse TO table property
/// Parses: TO table_name
#[allow(unused_variables, unused_mut)]
pub fn parse_to_table(&mut self) -> Result<Option<Expression>> {
// Parse the table reference
let table = self.parse_table_parts()?;
if table.is_none() {
return Ok(None);
}
Ok(Some(Expression::ToTableProperty(Box::new(
ToTableProperty {
this: Box::new(table.unwrap()),
},
))))
}
/// parse_tokens - Operator precedence parser
#[allow(unused_variables, unused_mut)]
pub fn parse_tokens(&mut self) -> Result<Option<Expression>> {
// Uses operator precedence parsing pattern
Ok(None)
}
/// parse_trim - Ported from Python _parse_trim
/// Parses TRIM function: TRIM([BOTH|LEADING|TRAILING] chars FROM str) or TRIM(str, chars)
#[allow(unused_variables, unused_mut)]
pub fn parse_trim(&mut self) -> Result<Option<Expression>> {
// Check for position keyword (BOTH, LEADING, TRAILING)
let (position, position_explicit) = if self.match_texts(&["BOTH"]) {
(TrimPosition::Both, true)
} else if self.match_texts(&["LEADING"]) {
(TrimPosition::Leading, true)
} else if self.match_texts(&["TRAILING"]) {
(TrimPosition::Trailing, true)
} else {
(TrimPosition::Both, false)
};
// Parse first expression
let first = match self.parse_bitwise() {
Ok(Some(expr)) => self.try_clickhouse_func_arg_alias(expr),
Ok(None) => return Ok(None),
Err(e) => return Err(e),
};
// Check for FROM or comma to see if there's a second expression
let (this, characters, sql_standard_syntax) = if self.match_token(TokenType::From) {
// SQL standard syntax: TRIM([position] chars FROM str)
let second = match self.parse_bitwise() {
Ok(Some(expr)) => self.try_clickhouse_func_arg_alias(expr),
Ok(None) => return Err(self.parse_error("Expected expression after FROM in TRIM")),
Err(e) => return Err(e),
};
// In SQL standard syntax: first is characters, second is the string
(second, Some(first), true)
} else if self.match_token(TokenType::Comma) {
// Function syntax: TRIM(a, b)
let second = match self.parse_bitwise() {
Ok(Some(expr)) => Some(expr),
Ok(None) => None,
Err(e) => return Err(e),
};
// In Spark, comma syntax is TRIM(chars, str) - pattern first
// In other dialects, comma syntax is TRIM(str, chars) - string first
let trim_pattern_first = matches!(
self.config.dialect,
Some(crate::dialects::DialectType::Spark)
);
if trim_pattern_first && second.is_some() {
// first=chars, second=str
(second.unwrap(), Some(first), false)
} else {
(first, second, false)
}
} else {
// Single argument: TRIM(str)
(first, None, false)
};
Ok(Some(Expression::Trim(Box::new(TrimFunc {
this,
characters,
position,
sql_standard_syntax,
position_explicit,
}))))
}
/// parse_truncate_table - Implemented from Python _parse_truncate_table
/// Calls: parse_on_property, parse_partition, parse_function
#[allow(unused_variables, unused_mut)]
pub fn parse_truncate_table(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["RESTART", "IDENTITY"]) {
return Ok(Some(Expression::TruncateTable(Box::new(TruncateTable {
expressions: Vec::new(),
is_database: None,
exists: false,
only: None,
cluster: None,
identity: None,
option: None,
partition: None,
}))));
}
if self.match_text_seq(&["CONTINUE", "IDENTITY"]) {
// Matched: CONTINUE IDENTITY
return Ok(None);
}
if self.match_text_seq(&["CASCADE"]) {
// Matched: CASCADE
return Ok(None);
}
Ok(None)
}
/// parse_ttl - Implemented from Python _parse_ttl
/// Parses ClickHouse TTL expression with optional DELETE, RECOMPRESS, TO DISK/VOLUME
pub fn parse_ttl(&mut self) -> Result<Option<Expression>> {
// Parse CSV of TTL actions
let mut expressions = Vec::new();
loop {
// Parse the base expression
let base_start = self.current;
let this = match self.parse_bitwise() {
Ok(Some(expr)) => expr,
_ => {
self.current = base_start;
let mut paren_depth = 0usize;
while !self.is_at_end() {
if paren_depth == 0
&& (self.check(TokenType::Comma)
|| self.peek().text.eq_ignore_ascii_case("DELETE")
|| self.peek().text.eq_ignore_ascii_case("RECOMPRESS")
|| self.peek().text.eq_ignore_ascii_case("TO")
|| self.peek().text.eq_ignore_ascii_case("WHERE")
|| self.peek().text.eq_ignore_ascii_case("GROUP")
|| self.peek().text.eq_ignore_ascii_case("SET"))
{
break;
}
if self.check(TokenType::LParen) {
paren_depth += 1;
} else if self.check(TokenType::RParen) {
if paren_depth == 0 {
break;
}
paren_depth -= 1;
}
self.skip();
}
if self.current == base_start {
break;
}
let raw = self
.tokens_to_sql(base_start, self.current)
.trim()
.to_string();
Expression::Var(Box::new(Var { this: raw }))
}
};
// Check for TTL action
let action = if self.match_text_seq(&["DELETE"]) {
Expression::MergeTreeTTLAction(Box::new(MergeTreeTTLAction {
this: Box::new(this),
delete: Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
}))),
recompress: None,
to_disk: None,
to_volume: None,
}))
} else if self.match_text_seq(&["RECOMPRESS"]) {
let recompress = if self.match_identifier("CODEC") {
self.expect(TokenType::LParen)?;
let mut args = Vec::new();
if !self.check(TokenType::RParen) {
args.push(self.parse_expression()?);
while self.match_token(TokenType::Comma) {
args.push(self.parse_expression()?);
}
}
self.expect(TokenType::RParen)?;
Some(Box::new(Expression::Function(Box::new(Function::new(
"CODEC".to_string(),
args,
)))))
} else {
self.parse_bitwise()?.map(Box::new)
};
Expression::MergeTreeTTLAction(Box::new(MergeTreeTTLAction {
this: Box::new(this),
delete: None,
recompress,
to_disk: None,
to_volume: None,
}))
} else if self.match_text_seq(&["TO", "DISK"]) {
let to_disk = self.parse_string()?.map(Box::new);
Expression::MergeTreeTTLAction(Box::new(MergeTreeTTLAction {
this: Box::new(this),
delete: None,
recompress: None,
to_disk,
to_volume: None,
}))
} else if self.match_text_seq(&["TO", "VOLUME"]) {
let to_volume = self.parse_string()?.map(Box::new);
Expression::MergeTreeTTLAction(Box::new(MergeTreeTTLAction {
this: Box::new(this),
delete: None,
recompress: None,
to_disk: None,
to_volume,
}))
} else {
this
};
expressions.push(action);
if !self.match_token(TokenType::Comma) {
break;
}
}
// Parse optional top-level WHERE clause (for backwards compatibility)
let where_ = self.parse_where()?.map(Box::new);
// Parse optional GROUP BY
let group = if self.match_token(TokenType::Group) {
self.expect(TokenType::By)?;
let mut exprs = Vec::new();
exprs.push(self.parse_expression()?);
while self.match_token(TokenType::Comma) {
exprs.push(self.parse_expression()?);
}
Some(Box::new(Expression::Group(Box::new(Group {
expressions: exprs,
grouping_sets: None,
cube: None,
rollup: None,
totals: None,
all: None,
}))))
} else {
None
};
// Parse optional SET (aggregates) after GROUP BY
let aggregates = if group.is_some() && self.match_token(TokenType::Set) {
let mut aggs = Vec::new();
loop {
aggs.push(self.parse_expression()?);
if !self.match_token(TokenType::Comma) {
break;
}
}
if aggs.is_empty() {
None
} else {
Some(Box::new(Expression::Tuple(Box::new(Tuple {
expressions: aggs,
}))))
}
} else {
None
};
Ok(Some(Expression::MergeTreeTTL(Box::new(MergeTreeTTL {
expressions,
where_,
group,
aggregates,
}))))
}
/// parse_type - Parses a data type expression
/// Python: _parse_type
pub fn parse_type(&mut self) -> Result<Option<Expression>> {
// First try to parse an interval
if let Some(interval) = self.parse_interval()? {
return self.parse_column_ops_with_expr(Some(interval));
}
// Try to parse a data type
let data_type = self.parse_types()?;
if let Some(dt) = data_type {
// If it's a Cast (BigQuery inline constructor), apply column ops
if matches!(dt, Expression::Cast(_)) {
return self.parse_column_ops_with_expr(Some(dt));
}
// Try to parse a primary expression after the type
let start_pos = self.current;
if let Some(primary) = self.parse_primary_or_var()? {
// If it's a literal, this might be a type cast like DATE '2020-01-01'
if let Expression::Literal(_) = &primary {
let result = self.parse_column_ops_with_expr(Some(primary))?;
if let Some(value) = result {
// Create a Cast expression
if let Expression::DataType(data_type_struct) = dt {
return Ok(Some(Expression::Cast(Box::new(Cast {
this: value,
to: data_type_struct,
trailing_comments: Vec::new(),
double_colon_syntax: false,
format: None,
default: None,
inferred_type: None,
}))));
}
}
}
// Backtrack if not a type-literal pattern
self.current = start_pos;
}
return Ok(Some(dt));
}
Ok(None)
}
/// parse_type_size - Ported from Python _parse_type_size
/// Parses type size parameters like 10 in VARCHAR(10) or 10, 2 in DECIMAL(10, 2)
#[allow(unused_variables, unused_mut)]
pub fn parse_type_size(&mut self) -> Result<Option<Expression>> {
// First try to parse a type - this handles both numeric literals and type names
let this = self.parse_type()?;
if this.is_none() {
return Ok(None);
}
let mut result = this.unwrap();
// If it's a Column with no table, convert it to an Identifier (var)
// This handles cases like CHAR in VARCHAR(100 CHAR)
if let Expression::Column(ref col) = result {
if col.table.is_none() {
result = Expression::Identifier(col.name.clone());
}
}
// Check for optional expression after the type (e.g., "CHAR" in "100 CHAR")
// This is for byte/char length specifiers in some dialects
if let Some(var_token) = self.parse_var()? {
// We have an additional specifier, combine them
// For now, just return the original result since Rust doesn't have DataTypeParam
// The var expression would be attached as an expression in Python
}
Ok(Some(result))
}
/// parse_types - Implemented from Python _parse_types
/// Calls: parse_string
#[allow(unused_variables, unused_mut)]
pub fn parse_types(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["SYSUDTLIB", "."]) {
return Ok(Some(Expression::Identifier(Identifier {
name: String::new(),
quoted: false,
trailing_comments: Vec::new(),
span: None,
})));
}
if self.match_text_seq(&["WITH", "TIME", "ZONE"]) {
// Matched: WITH TIME ZONE
return Ok(None);
}
if self.match_text_seq(&["WITH", "LOCAL", "TIME", "ZONE"]) {
// Matched: WITH LOCAL TIME ZONE
return Ok(None);
}
Ok(None)
}
/// parse_unique - Implemented from Python _parse_unique
/// Parses UNIQUE [KEY|INDEX] [NULLS NOT DISTINCT] [(columns)] [USING index_type]
#[allow(unused_variables, unused_mut)]
pub fn parse_unique(&mut self) -> Result<Option<Expression>> {
// Check for optional KEY/INDEX
let _ = self.match_texts(&["KEY", "INDEX"]);
// Check for NULLS NOT DISTINCT (PostgreSQL 15+ feature)
let nulls = if self.match_text_seq(&["NULLS", "NOT", "DISTINCT"]) {
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
})))
} else {
None
};
// Parse the optional key name and schema (column list)
let unique_key = self.parse_unique_key()?;
let this = self.parse_schema_with_this(unique_key)?;
// Parse optional USING index_type
let index_type = if self.match_token(TokenType::Using) {
self.skip();
Some(Box::new(Expression::Var(Box::new(Var {
this: self.previous().text.clone(),
}))))
} else {
None
};
Ok(Some(Expression::UniqueColumnConstraint(Box::new(
UniqueColumnConstraint {
this: this.map(Box::new),
index_type,
on_conflict: None,
nulls,
options: Vec::new(),
},
))))
}
/// parse_unique_key - Parse the key/index name for UNIQUE constraint
/// Simply parses an identifier
#[allow(unused_variables, unused_mut)]
pub fn parse_unique_key(&mut self) -> Result<Option<Expression>> {
self.parse_id_var()
}
/// parse_unnest - Ported from Python _parse_unnest
/// Parses UNNEST(array_expr) [WITH ORDINALITY] [AS alias]
#[allow(unused_variables, unused_mut)]
pub fn parse_unnest(&mut self) -> Result<Option<Expression>> {
// Check for UNNEST keyword
if !self.match_texts(&["UNNEST"]) {
return Ok(None);
}
// Expect opening parenthesis
if !self.match_token(TokenType::LParen) {
return Ok(None);
}
// Parse comma-separated array expression(s): UNNEST(arr1, arr2, ...)
let this = match self.parse_expression() {
Ok(expr) => expr,
Err(e) => return Err(e),
};
let mut extra_expressions = Vec::new();
while self.match_token(TokenType::Comma) {
let expr = self.parse_expression()?;
extra_expressions.push(expr);
}
// Expect closing parenthesis
self.expect(TokenType::RParen)?;
// Check for WITH ORDINALITY (Presto) or WITH OFFSET (BigQuery)
let mut with_ordinality = self.match_text_seq(&["WITH", "ORDINALITY"]);
let mut offset_alias = None;
if !with_ordinality && self.match_text_seq(&["WITH", "OFFSET"]) {
with_ordinality = true;
// Parse optional offset alias: WITH OFFSET AS y or WITH OFFSET y
if matches!(
self.config.dialect,
Some(crate::dialects::DialectType::BigQuery)
) {
let has_as = self.match_token(TokenType::As);
if has_as || self.check(TokenType::Identifier) || self.check(TokenType::Var) {
let alias_name = self.advance().text;
offset_alias = Some(crate::expressions::Identifier {
name: alias_name,
quoted: false,
trailing_comments: Vec::new(),
span: None,
});
}
}
}
// Parse optional alias
let alias = if self.match_token(TokenType::As)
|| self.check(TokenType::Identifier)
|| self.check(TokenType::QuotedIdentifier)
{
if self.check(TokenType::Identifier) || self.check(TokenType::QuotedIdentifier) {
let is_quoted = self.check(TokenType::QuotedIdentifier);
let token = self.advance();
let mut ident = Identifier::new(token.text.clone());
if is_quoted {
ident.quoted = true;
}
Some(ident)
} else {
None
}
} else {
None
};
Ok(Some(Expression::Unnest(Box::new(UnnestFunc {
this,
expressions: extra_expressions,
with_ordinality,
alias,
offset_alias,
}))))
}
/// parse_unpivot_columns - Implemented from Python _parse_unpivot_columns
/// Python: parser.py:4454-4462
/// Parses INTO NAME column VALUE col1, col2, ...
#[allow(unused_variables, unused_mut)]
pub fn parse_unpivot_columns(&mut self) -> Result<Option<Expression>> {
// Must match INTO keyword
if !self.match_token(TokenType::Into) {
return Ok(None);
}
// Parse NAME column
let this = if self.match_text_seq(&["NAME"]) {
self.parse_column()?
} else {
None
};
// Parse VALUE columns
let expressions = if self.match_text_seq(&["VALUE"]) {
let mut cols = Vec::new();
loop {
if let Some(col) = self.parse_column()? {
cols.push(col);
}
if !self.match_token(TokenType::Comma) {
break;
}
}
cols
} else {
Vec::new()
};
// If we have either this or expressions, return an UnpivotColumns
if this.is_some() || !expressions.is_empty() {
Ok(Some(Expression::UnpivotColumns(Box::new(UnpivotColumns {
this: Box::new(this.unwrap_or(Expression::Null(Null))),
expressions,
}))))
} else {
Ok(None)
}
}
/// parse_unquoted_field - Parses a field and converts unquoted identifiers to Var
/// Python: _parse_unquoted_field
pub fn parse_unquoted_field(&mut self) -> Result<Option<Expression>> {
let field = self.parse_field()?;
// If field is an unquoted identifier, convert it to a Var
match field {
Some(Expression::Identifier(id)) if !id.quoted => {
Ok(Some(Expression::Var(Box::new(Var { this: id.name }))))
}
other => Ok(other),
}
}
/// parse_user_defined_function - Parses user-defined function call
/// Python: _parse_user_defined_function
/// Parses: schema.function_name(param1, param2, ...)
pub fn parse_user_defined_function(&mut self) -> Result<Option<Expression>> {
// Parse table parts (potentially schema-qualified function name)
let this = self.parse_table_parts()?;
if this.is_none() {
return Ok(None);
}
// If no L_PAREN, return just the table parts (not a function call)
if !self.match_token(TokenType::LParen) {
return Ok(this);
}
// Parse function parameters
let mut expressions = Vec::new();
if !self.check(TokenType::RParen) {
loop {
if let Some(param) = self.parse_function_parameter()? {
expressions.push(param);
}
if !self.match_token(TokenType::Comma) {
break;
}
}
}
self.match_token(TokenType::RParen);
Ok(Some(Expression::UserDefinedFunction(Box::new(
UserDefinedFunction {
this: Box::new(this.unwrap()),
expressions,
wrapped: Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
}))),
},
))))
}
/// parse_user_defined_function_expression - Parse user-defined function expression
#[allow(unused_variables, unused_mut)]
pub fn parse_user_defined_function_expression(&mut self) -> Result<Option<Expression>> {
// Parse a statement and wrap in Some if successful
match self.parse_statement() {
Ok(stmt) => Ok(Some(stmt)),
Err(_) => Ok(None),
}
}
/// parse_user_defined_type - Parses a user-defined type reference
/// Python: _parse_user_defined_type
/// Format: schema.type_name or just type_name
pub fn parse_user_defined_type(
&mut self,
identifier: Identifier,
) -> Result<Option<Expression>> {
let mut type_name = identifier.name.clone();
// Handle dotted names (schema.type_name)
while self.match_token(TokenType::Dot) {
if !self.is_at_end() {
let token = self.advance();
type_name = format!("{}.{}", type_name, token.text);
} else {
break;
}
}
// Return as a custom data type
Ok(Some(Expression::DataType(DataType::Custom {
name: type_name,
})))
}
/// parse_using_identifiers - Ported from Python _parse_using_identifiers
/// Parses (col1, col2, ...) for JOIN USING clause
#[allow(unused_variables, unused_mut)]
pub fn parse_using_identifiers(&mut self) -> Result<Option<Expression>> {
// Optionally expect opening paren
let has_paren = self.match_token(TokenType::LParen);
let mut identifiers = Vec::new();
loop {
// Parse column as identifier
if let Some(expr) = self.parse_identifier()? {
identifiers.push(expr);
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
// Match closing paren if we matched opening
if has_paren {
self.expect(TokenType::RParen)?;
}
if identifiers.is_empty() {
Ok(None)
} else {
Ok(Some(Expression::Tuple(Box::new(Tuple {
expressions: identifiers,
}))))
}
}
/// parse_value - Parses a value tuple for INSERT VALUES clause
/// Python: _parse_value
/// Syntax: (expr1, expr2, ...) or just expr (single value)
pub fn parse_value(&mut self) -> Result<Option<Expression>> {
// Check for parenthesized list of expressions
if self.match_token(TokenType::LParen) {
let mut expressions = Vec::new();
if !self.check(TokenType::RParen) {
loop {
// Support DEFAULT keyword in VALUES
if self.match_texts(&["DEFAULT"]) {
let text = self.previous().text.to_ascii_uppercase();
expressions.push(Expression::Var(Box::new(Var { this: text })));
} else {
// Try to parse an expression
let saved_pos = self.current;
match self.parse_expression() {
Ok(expr) => expressions.push(expr),
Err(_) => {
self.current = saved_pos;
}
}
}
if !self.match_token(TokenType::Comma) {
break;
}
}
}
self.match_token(TokenType::RParen);
return Ok(Some(Expression::Tuple(Box::new(Tuple { expressions }))));
}
// Single value without parentheses (some dialects support VALUES 1, 2)
let saved_pos = self.current;
match self.parse_expression() {
Ok(expr) => {
return Ok(Some(Expression::Tuple(Box::new(Tuple {
expressions: vec![expr],
}))));
}
Err(_) => {
self.current = saved_pos;
}
}
Ok(None)
}
/// parse_var - Parse variable reference (unquoted identifier)
/// Python: if self._match(TokenType.VAR): return exp.Var(this=self._prev.text)
pub fn parse_var(&mut self) -> Result<Option<Expression>> {
if self.match_token(TokenType::Var) {
let text = self.previous().text.clone();
return Ok(Some(Expression::Var(Box::new(Var { this: text }))));
}
// Fall back to placeholder parsing
self.parse_placeholder()
}
/// parse_var_from_options - Ported from Python _parse_var_from_options
/// Parses a variable/identifier from a predefined set of options
#[allow(unused_variables, unused_mut)]
pub fn parse_var_from_options(&mut self) -> Result<Option<Expression>> {
// Without the options dict, we just try to parse an identifier
if self.is_at_end() {
return Ok(None);
}
// Get current token text as the option
let token = self.peek().clone();
if token.token_type == TokenType::Identifier || token.token_type == TokenType::Var {
self.skip();
return Ok(Some(Expression::Var(Box::new(Var {
this: token.text.to_ascii_uppercase(),
}))));
}
Ok(None)
}
/// parse_var_or_string - Delegates to parse_string
#[allow(unused_variables, unused_mut)]
/// parse_var_or_string - Parses a string literal or a variable
/// Python: parser.py:7506-7507
pub fn parse_var_or_string(&mut self) -> Result<Option<Expression>> {
// Try string first, then var
if let Some(s) = self.parse_string()? {
return Ok(Some(s));
}
self.parse_var_any_token()
}
/// parse_vector_expressions - Transforms vector type parameters
/// Python: _parse_vector_expressions
/// In Python, this transforms a list of expressions where the first element (identifier)
/// is converted to a DataType. In Rust, since VECTOR type parsing is handled inline in
/// parse_data_type, this method parses vector expressions (element_type, dimension) from
/// the current position and returns them as a Tuple.
pub fn parse_vector_expressions(&mut self) -> Result<Option<Expression>> {
let mut expressions = Vec::new();
// Parse element type - convert identifier to DataType
if let Some(type_expr) = self.parse_type()? {
expressions.push(type_expr);
} else {
return Ok(None);
}
// Parse optional dimension or additional parameters
while self.match_token(TokenType::Comma) {
if let Some(expr) = self.parse_primary_or_var()? {
expressions.push(expr);
}
}
if expressions.is_empty() {
return Ok(None);
}
Ok(Some(Expression::Tuple(Box::new(Tuple { expressions }))))
}
/// parse_version - Implemented from Python _parse_version
/// Python: parser.py:4266-4295
/// Parses FOR SYSTEM_TIME AS OF, VERSIONS BETWEEN, etc.
#[allow(unused_variables, unused_mut)]
pub fn parse_version(&mut self) -> Result<Option<Expression>> {
// Check for TIMESTAMP or VERSION snapshot token
let this = if self.match_token(TokenType::TimestampSnapshot) {
"TIMESTAMP".to_string()
} else if self.match_token(TokenType::VersionSnapshot) {
"VERSION".to_string()
} else {
return Ok(None);
};
// Parse the kind and expression
let (kind, expression) = if self.match_texts(&["FROM", "BETWEEN"]) {
// FROM start TO end or BETWEEN start AND end
let kind_str = self.previous().text.to_ascii_uppercase();
let start = self.parse_bitwise()?;
self.match_texts(&["TO", "AND"]);
let end = self.parse_bitwise()?;
let tuple = Expression::Tuple(Box::new(Tuple {
expressions: vec![
start.unwrap_or(Expression::Null(Null)),
end.unwrap_or(Expression::Null(Null)),
],
}));
(kind_str, Some(Box::new(tuple)))
} else if self.match_text_seq(&["CONTAINED", "IN"]) {
// CONTAINED IN (values)
let expressions = if self.match_token(TokenType::LParen) {
let exprs = self.parse_expression_list()?;
self.expect(TokenType::RParen)?;
exprs
} else {
Vec::new()
};
(
"CONTAINED IN".to_string(),
Some(Box::new(Expression::Tuple(Box::new(Tuple { expressions })))),
)
} else if self.match_token(TokenType::All) {
// ALL
("ALL".to_string(), None)
} else {
// AS OF
self.match_text_seq(&["AS", "OF"]);
let type_expr = self.parse_type()?;
("AS OF".to_string(), type_expr.map(Box::new))
};
Ok(Some(Expression::Version(Box::new(Version {
this: Box::new(Expression::Var(Box::new(Var { this }))),
kind,
expression,
}))))
}
/// parse_volatile_property - Parses VOLATILE property
/// Python: _parse_volatile_property
/// Returns VolatileProperty for table volatility or StabilityProperty for function stability
pub fn parse_volatile_property(&mut self) -> Result<Option<Expression>> {
// Check the token before VOLATILE to determine context
// In SQL, VOLATILE can mean:
// 1. Table volatility (CREATE VOLATILE TABLE)
// 2. Function stability (CREATE FUNCTION ... VOLATILE)
// Look back to see if this is in a table context
// PRE_VOLATILE_TOKENS typically include: CREATE, REPLACE, GLOBAL, etc.
let is_table_context = if self.current >= 2 {
let pre_token = &self.tokens[self.current - 2];
matches!(
pre_token.token_type,
TokenType::Create | TokenType::Global | TokenType::Temporary | TokenType::Replace
)
} else {
false
};
if is_table_context {
Ok(Some(Expression::VolatileProperty(Box::new(
VolatileProperty { this: None },
))))
} else {
// Function stability - return StabilityProperty with "VOLATILE" literal
Ok(Some(Expression::StabilityProperty(Box::new(
StabilityProperty {
this: Box::new(Expression::Literal(Box::new(Literal::String(
"VOLATILE".to_string(),
)))),
},
))))
}
}
/// parse_when_matched - Implemented from Python _parse_when_matched
/// Calls: parse_disjunction, parse_star, parse_value
#[allow(unused_variables, unused_mut)]
/// Parse WHEN [NOT] MATCHED clauses for MERGE statements
/// This is the public entry point that calls parse_when_matched_clauses
pub fn parse_when_matched(&mut self) -> Result<Option<Expression>> {
self.parse_when_matched_clauses()
}
/// parse_where - Parse WHERE clause
/// Python: if not self._match(TokenType.WHERE): return None; return exp.Where(this=self._parse_disjunction())
pub fn parse_where(&mut self) -> Result<Option<Expression>> {
if !self.match_token(TokenType::Where) {
return Ok(None);
}
// Parse the condition expression
let condition = self.parse_expression()?;
Ok(Some(Expression::Where(Box::new(Where { this: condition }))))
}
/// parse_window - Implemented from Python _parse_window
/// Calls: parse_window_spec, parse_partition_and_order
#[allow(unused_variables, unused_mut)]
pub fn parse_window(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["WITHIN", "GROUP"]) {
return Ok(Some(Expression::WindowSpec(Box::new(WindowSpec {
partition_by: Vec::new(),
order_by: Vec::new(),
frame: None,
}))));
}
if self.match_text_seq(&["LAST"]) {
// Matched: LAST
return Ok(None);
}
if self.match_text_seq(&["EXCLUDE"]) {
// Matched: EXCLUDE
return Ok(None);
}
Ok(None)
}
/// parse_window_clause - Ported from Python _parse_window_clause
/// Parses WINDOW named_window_definition [, named_window_definition, ...]
#[allow(unused_variables, unused_mut)]
pub fn parse_window_clause(&mut self) -> Result<Option<Expression>> {
if !self.match_token(TokenType::Window) {
return Ok(None);
}
// Parse comma-separated named window definitions
let mut windows = Vec::new();
loop {
// Parse window name
let name = self.parse_identifier()?;
if name.is_none() {
break;
}
// Expect AS
self.expect(TokenType::As)?;
// Parse window specification (parenthesized)
self.expect(TokenType::LParen)?;
let spec = self.parse_window_spec_inner()?;
self.expect(TokenType::RParen)?;
if let (Some(name_expr), Some(spec_expr)) = (name, spec) {
// Create an Alias expression wrapping the spec with the name
let alias_ident = if let Expression::Identifier(id) = name_expr {
id
} else {
Identifier::new("window")
};
windows.push(Expression::Alias(Box::new(Alias {
this: spec_expr,
alias: alias_ident,
column_aliases: Vec::new(),
pre_alias_comments: Vec::new(),
trailing_comments: Vec::new(),
inferred_type: None,
})));
}
if !self.match_token(TokenType::Comma) {
break;
}
}
if windows.is_empty() {
Ok(None)
} else {
Ok(Some(Expression::Tuple(Box::new(Tuple {
expressions: windows,
}))))
}
}
/// Parse window spec inner (without parentheses)
fn parse_window_spec_inner(&mut self) -> Result<Option<Expression>> {
// Parse optional base window name (identifier not followed by PARTITION or ORDER or DISTRIBUTE or SORT)
let _base = if (self.check(TokenType::Identifier)
|| self.check(TokenType::QuotedIdentifier))
&& !self.check(TokenType::Partition)
&& !self.check(TokenType::Order)
&& !self.check(TokenType::Distribute)
&& !self.check(TokenType::Sort)
{
self.parse_identifier()?
} else {
None
};
// Parse PARTITION BY or DISTRIBUTE BY (Hive uses DISTRIBUTE BY in window specs)
let partition_by = if self.match_keywords(&[TokenType::Partition, TokenType::By]) {
self.parse_expression_list()?
} else if self.match_keywords(&[TokenType::Distribute, TokenType::By]) {
// Hive: DISTRIBUTE BY is equivalent to PARTITION BY in window specs
self.parse_expression_list()?
} else {
Vec::new()
};
// Parse ORDER BY or SORT BY (Hive uses SORT BY in window specs)
let order_by = if self.match_token(TokenType::Order) {
self.match_token(TokenType::By);
let mut orders = Vec::new();
loop {
if let Some(ordered) = self.parse_ordered_item()? {
orders.push(ordered);
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
orders
} else if self.match_token(TokenType::Sort) {
// Hive: SORT BY is equivalent to ORDER BY in window specs
self.match_token(TokenType::By);
let mut orders = Vec::new();
loop {
if let Some(ordered) = self.parse_ordered_item()? {
orders.push(ordered);
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
orders
} else {
Vec::new()
};
// Parse frame specification (ROWS/RANGE/GROUPS BETWEEN ... AND ...)
let frame = self.parse_window_frame()?;
Ok(Some(Expression::WindowSpec(Box::new(WindowSpec {
partition_by,
order_by,
frame,
}))))
}
/// parse_window_spec - Implemented from Python _parse_window_spec
#[allow(unused_variables, unused_mut)]
pub fn parse_window_spec(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["UNBOUNDED"]) {
// Matched: UNBOUNDED
return Ok(None);
}
if self.match_text_seq(&["CURRENT", "ROW"]) {
// Matched: CURRENT ROW
return Ok(None);
}
Ok(None)
}
/// parse_with_operator - Parse column with operator class (PostgreSQL)
/// Parses: ordered_expression [WITH operator]
#[allow(unused_variables, unused_mut)]
pub fn parse_with_operator(&mut self) -> Result<Option<Expression>> {
// First parse an ordered expression with optional operator class
let this = if let Some(opclass) = self.parse_opclass()? {
opclass
} else if let Some(ordered) = self.parse_ordered()? {
ordered
} else {
return Ok(None);
};
// Check for WITH operator
if !self.match_token(TokenType::With) {
return Ok(Some(this));
}
// Parse the operator
let op = self.parse_var()?;
let op_str = match op {
Some(Expression::Identifier(id)) => id.name,
Some(Expression::Var(v)) => v.this.clone(),
_ => String::new(),
};
Ok(Some(Expression::WithOperator(Box::new(WithOperator {
this: Box::new(this),
op: op_str,
}))))
}
/// parse_with_property - Implemented from Python _parse_with_property
/// Calls: parse_withjournaltable, parse_withisolatedloading, parse_wrapped_properties
#[allow(unused_variables, unused_mut)]
pub fn parse_with_property(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["(", "SYSTEM_VERSIONING"]) {
return Ok(Some(Expression::WithProcedureOptions(Box::new(
WithProcedureOptions {
expressions: Vec::new(),
},
))));
}
if self.match_text_seq(&["JOURNAL"]) {
// Matched: JOURNAL
return Ok(None);
}
if self.match_text_seq(&["DATA"]) {
// Matched: DATA
return Ok(None);
}
Ok(None)
}
/// parse_withdata - Implemented from Python _parse_withdata
#[allow(unused_variables, unused_mut)]
pub fn parse_withdata(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["AND", "STATISTICS"]) {
return Ok(Some(Expression::WithDataProperty(Box::new(
WithDataProperty {
no: None,
statistics: None,
},
))));
}
if self.match_text_seq(&["AND", "NO", "STATISTICS"]) {
// Matched: AND NO STATISTICS
return Ok(None);
}
Ok(None)
}
/// parse_withisolatedloading - Implemented from Python _parse_withisolatedloading
#[allow(unused_variables, unused_mut)]
pub fn parse_withisolatedloading(&mut self) -> Result<Option<Expression>> {
if self.match_text_seq(&["NO"]) {
return Ok(Some(Expression::IsolatedLoadingProperty(Box::new(
IsolatedLoadingProperty {
no: None,
concurrent: None,
target: None,
},
))));
}
if self.match_text_seq(&["CONCURRENT"]) {
// Matched: CONCURRENT
return Ok(None);
}
Ok(None)
}
/// parse_withjournaltable - Teradata WITH JOURNAL TABLE property
/// Parses: WITH JOURNAL TABLE = table_name
#[allow(unused_variables, unused_mut)]
pub fn parse_withjournaltable(&mut self) -> Result<Option<Expression>> {
// Optionally consume TABLE keyword
self.match_token(TokenType::Table);
// Optionally consume = sign
self.match_token(TokenType::Eq);
// Parse the table reference
let table = self.parse_table_parts()?;
if table.is_none() {
return Ok(None);
}
Ok(Some(Expression::WithJournalTableProperty(Box::new(
WithJournalTableProperty {
this: Box::new(table.unwrap()),
},
))))
}
/// parse_wrapped - Parses an expression wrapped in parentheses
/// Python: _parse_wrapped(parse_method)
/// This version parses a disjunction (expression) inside parentheses
pub fn parse_wrapped(&mut self) -> Result<Option<Expression>> {
if !self.match_token(TokenType::LParen) {
return Ok(None);
}
let result = self.parse_disjunction()?;
self.match_token(TokenType::RParen);
Ok(result)
}
/// parse_wrapped_csv - Parses comma-separated expressions wrapped in parentheses
/// Python: _parse_wrapped_csv(parse_method)
pub fn parse_wrapped_csv(&mut self) -> Result<Option<Expression>> {
if !self.match_token(TokenType::LParen) {
return Ok(None);
}
let expressions = self.parse_expression_list()?;
self.match_token(TokenType::RParen);
if expressions.is_empty() {
return Ok(None);
}
Ok(Some(Expression::Tuple(Box::new(Tuple { expressions }))))
}
/// parse_wrapped_id_vars - Parses comma-separated identifiers wrapped in parentheses
/// Python: _parse_wrapped_id_vars
pub fn parse_wrapped_id_vars(&mut self) -> Result<Option<Expression>> {
if !self.match_token(TokenType::LParen) {
return Ok(None);
}
let mut identifiers = Vec::new();
loop {
if let Some(id) = self.parse_id_var()? {
identifiers.push(id);
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.match_token(TokenType::RParen);
if identifiers.is_empty() {
return Ok(None);
}
Ok(Some(Expression::Tuple(Box::new(Tuple {
expressions: identifiers,
}))))
}
/// parse_wrapped_options - Implemented from Python _parse_wrapped_options
/// Parses space-separated properties wrapped in parentheses (for Snowflake STAGE_FILE_FORMAT, etc.)
/// Format: = (KEY=VALUE KEY2=VALUE2 ...)
pub fn parse_wrapped_options(&mut self) -> Result<Option<Expression>> {
// Match optional = before opening paren
self.match_token(TokenType::Eq);
// Expect opening paren
if !self.match_token(TokenType::LParen) {
return Ok(None);
}
// Parse space-separated properties (no comma required between them)
let mut properties = Vec::new();
while !self.check(TokenType::RParen) && !self.is_at_end() {
// Try to parse a property: KEY=VALUE
if let Some(prop) = self.parse_option_property()? {
properties.push(prop);
} else {
break;
}
}
// Expect closing paren
self.match_token(TokenType::RParen);
if properties.is_empty() {
Ok(None)
} else {
Ok(Some(Expression::Tuple(Box::new(Tuple {
expressions: properties,
}))))
}
}
/// Parse a single option property: KEY=VALUE
/// Handles various value types: identifiers, strings, numbers, nested parens like ('') or (val1, val2)
fn parse_option_property(&mut self) -> Result<Option<Expression>> {
// Save position to retreat if this isn't a property
let index = self.current;
// Parse the key (identifier/column name)
// For Snowflake options, keys are identifiers like TYPE, FIELD_DELIMITER, NULL_IF, etc.
let key = if self.check(TokenType::Identifier)
|| self.check(TokenType::Var)
|| self
.peek()
.text
.chars()
.all(|c| c.is_ascii_alphanumeric() || c == '_')
{
let name = self.peek().text.clone();
self.skip();
Some(Expression::Var(Box::new(Var { this: name })))
} else {
None
};
let key = match key {
Some(k) => k,
None => {
self.current = index;
return Ok(None);
}
};
// Expect =
if !self.match_token(TokenType::Eq) {
self.current = index;
return Ok(None);
}
// Parse the value - can be:
// - Simple identifier: CSV, SKIP_FILE, BASE64, TRUE, FALSE, CASE_SENSITIVE
// - String literal: '|', '"', 'TZHTZM YYYY-MM-DD HH24:MI:SS.FF9'
// - Number: 5
// - Nested parens for tuple: ('')
let value = if self.check(TokenType::LParen) {
// Parse nested parenthesized value like NULL_IF=('')
self.skip(); // consume (
let mut inner_exprs = Vec::new();
while !self.check(TokenType::RParen) && !self.is_at_end() {
if let Some(expr) = self.parse_primary_for_option()? {
inner_exprs.push(expr);
}
// Allow comma between nested values
self.match_token(TokenType::Comma);
}
self.match_token(TokenType::RParen);
Expression::Tuple(Box::new(Tuple {
expressions: inner_exprs,
}))
} else if let Some(primary) = self.parse_primary_for_option()? {
primary
} else {
// Fallback: try to parse as a var
let text = self.peek().text.clone();
self.skip();
Expression::Var(Box::new(Var { this: text }))
};
// Return as a Property expression (KEY=VALUE)
Ok(Some(Expression::Property(Box::new(Property {
this: Box::new(key),
value: Some(Box::new(value)),
}))))
}
/// Parse a primary value for option properties
/// Handles strings, numbers, identifiers, TRUE/FALSE
fn parse_primary_for_option(&mut self) -> Result<Option<Expression>> {
// String literal
if self.check(TokenType::String) {
let text = self.peek().text.clone();
self.skip();
return Ok(Some(Expression::Literal(Box::new(Literal::String(text)))));
}
// Number
if self.check(TokenType::Number) {
let text = self.peek().text.clone();
self.skip();
return Ok(Some(Expression::Literal(Box::new(Literal::Number(text)))));
}
// TRUE/FALSE
if self.check(TokenType::True) {
self.skip();
return Ok(Some(Expression::Boolean(BooleanLiteral { value: true })));
}
if self.check(TokenType::False) {
self.skip();
return Ok(Some(Expression::Boolean(BooleanLiteral { value: false })));
}
// Identifier or keyword used as value (CSV, SKIP_FILE, BASE64, etc.)
if self.check(TokenType::Identifier)
|| self.check(TokenType::Var)
|| (!self.check(TokenType::RParen)
&& !self.check(TokenType::Comma)
&& !self.check(TokenType::Eq)
&& !self.is_at_end())
{
let text = self.peek().text.clone();
// Don't consume if it's a closing paren or could be the next property key followed by =
if self.check(TokenType::RParen) {
return Ok(None);
}
// Check if this is the start of next property (followed by =)
if self.check_next(TokenType::Eq) {
return Ok(None);
}
self.skip();
return Ok(Some(Expression::Var(Box::new(Var { this: text }))));
}
Ok(None)
}
/// parse_options_list - Parses BigQuery-style OPTIONS list: (key=value, key=value, ...)
/// Parses key=value assignments where values can be complex expressions
pub fn parse_options_list(&mut self) -> Result<Vec<Expression>> {
// Expect opening paren
if !self.match_token(TokenType::LParen) {
return Ok(Vec::new());
}
// Parse comma-separated key=value pairs
let mut options = Vec::new();
loop {
// Check for empty OPTIONS () or end of list
if self.check(TokenType::RParen) {
break;
}
// Parse key=value using parse_assignment which handles EQ operations
if let Some(opt) = self.parse_assignment()? {
options.push(opt);
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
// Expect closing paren
self.expect(TokenType::RParen)?;
Ok(options)
}
/// Parse BigQuery PARTITION BY property and return a typed AST node.
fn parse_bigquery_partition_by_property(&mut self) -> Result<Option<Expression>> {
let start = self.current;
let matched_partition = if self.match_token(TokenType::PartitionBy) {
true
} else if self.match_token(TokenType::Partition) {
self.match_token(TokenType::By)
} else {
false
};
if !matched_partition {
self.current = start;
return Ok(None);
}
let mut expressions = Vec::new();
while !self.is_at_end()
&& !self.check(TokenType::Cluster)
&& !self.check(TokenType::As)
&& !self.check(TokenType::Semicolon)
&& !self.check(TokenType::RParen)
&& !self.check_identifier("OPTIONS")
{
match self.parse_expression() {
Ok(expr) => expressions.push(expr),
Err(_) => {
// Fall back to generic/raw parsing if typed parsing can't consume this form.
self.current = start;
return Ok(None);
}
}
if !self.match_token(TokenType::Comma) {
break;
}
}
if expressions.is_empty() {
self.current = start;
return Ok(None);
}
Ok(Some(Expression::PartitionByProperty(Box::new(
PartitionByProperty { expressions },
))))
}
/// Parse BigQuery CLUSTER BY property and return a typed AST node.
fn parse_bigquery_cluster_by_property(&mut self) -> Result<Option<Expression>> {
let start = self.current;
if !self.match_keywords(&[TokenType::Cluster, TokenType::By]) {
self.current = start;
return Ok(None);
}
let mut columns = Vec::new();
loop {
if let Some(Expression::Identifier(id)) = self.parse_identifier()? {
columns.push(id);
} else if self.is_identifier_or_keyword_token() {
let name = self.advance().text;
columns.push(Identifier {
name,
quoted: false,
trailing_comments: Vec::new(),
span: None,
});
} else {
// Fall back to generic/raw parsing if typed parsing can't consume this form.
self.current = start;
return Ok(None);
}
if !self.match_token(TokenType::Comma) {
break;
}
}
if columns.is_empty() {
self.current = start;
return Ok(None);
}
Ok(Some(Expression::ClusterByColumnsProperty(Box::new(
ClusterByColumnsProperty { columns },
))))
}
/// Parse BigQuery OPTIONS (...) clause into typed entries when possible.
/// Falls back to generic `Properties` when options are not simple key/value assignments.
fn parse_bigquery_options_property(&mut self) -> Result<Option<Expression>> {
let start = self.current;
if !self.match_identifier("OPTIONS") {
self.current = start;
return Ok(None);
}
let options = self.parse_options_list()?;
if options.is_empty() {
return Ok(Some(Expression::OptionsProperty(Box::new(
OptionsProperty {
entries: Vec::new(),
},
))));
}
let mut entries = Vec::new();
for option_expr in &options {
let Some(entry) = Self::option_entry_from_expression(option_expr) else {
return Ok(Some(Expression::Properties(Box::new(Properties {
expressions: options,
}))));
};
entries.push(entry);
}
Ok(Some(Expression::OptionsProperty(Box::new(
OptionsProperty { entries },
))))
}
fn option_entry_from_expression(expr: &Expression) -> Option<OptionEntry> {
let Expression::Eq(eq) = expr else {
return None;
};
let key = match &eq.left {
Expression::Column(col) if col.table.is_none() => col.name.clone(),
Expression::Identifier(id) => id.clone(),
Expression::Var(var) => Identifier {
name: var.this.clone(),
quoted: false,
trailing_comments: Vec::new(),
span: None,
},
_ => return None,
};
Some(OptionEntry {
key,
value: eq.right.clone(),
})
}
/// parse_environment_list - Parses Databricks ENVIRONMENT list: (dependencies = '...', environment_version = '...')
/// Parses key=value assignments where values can be string literals
pub fn parse_environment_list(&mut self) -> Result<Vec<Expression>> {
// Expect opening paren
if !self.match_token(TokenType::LParen) {
return Ok(Vec::new());
}
// Parse comma-separated key=value pairs
let mut env_items = Vec::new();
loop {
// Check for empty ENVIRONMENT () or end of list
if self.check(TokenType::RParen) {
break;
}
// Parse key=value using parse_assignment which handles EQ operations
if let Some(opt) = self.parse_assignment()? {
env_items.push(opt);
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
// Expect closing paren
self.expect(TokenType::RParen)?;
Ok(env_items)
}
/// parse_wrapped_properties - Ported from Python _parse_wrapped_properties
/// Parses properties wrapped in parentheses
#[allow(unused_variables, unused_mut)]
pub fn parse_wrapped_properties(&mut self) -> Result<Option<Expression>> {
// Parse wrapped list of properties: (prop1, prop2, ...)
if !self.match_token(TokenType::LParen) {
return Ok(None);
}
let mut props = Vec::new();
loop {
if let Some(prop) = self.parse_property()? {
props.push(prop);
}
if !self.match_token(TokenType::Comma) {
break;
}
}
self.match_token(TokenType::RParen);
if props.is_empty() {
return Ok(None);
}
// Return as a Properties expression
Ok(Some(Expression::Properties(Box::new(Properties {
expressions: props,
}))))
}
/// parse_wrapped_select - Ported from Python _parse_wrapped_select
/// Parses wrapped select statements including PIVOT/UNPIVOT and FROM-first syntax
#[allow(unused_variables, unused_mut)]
pub fn parse_wrapped_select(&mut self, table: bool) -> Result<Option<Expression>> {
// Check for PIVOT/UNPIVOT
let is_unpivot = self.check(TokenType::Unpivot);
if self.match_token(TokenType::Pivot) || self.match_token(TokenType::Unpivot) {
// Call simplified pivot parser
return self.parse_simplified_pivot(is_unpivot);
}
// Check for FROM (DuckDB FROM-first syntax)
if self.match_token(TokenType::From) {
// Parse the FROM clause (table reference)
let from_expr = self.parse_table()?;
// Try to parse a full SELECT
let select = self.parse_select_query()?;
if let Some(sel) = select {
// Apply set operations and query modifiers
let with_ops = self.parse_set_operations_with_expr(Some(sel))?;
return Ok(with_ops);
} else if let Some(from_table) = from_expr {
// Create a SELECT * FROM <table>
let mut select_struct = Select::new();
select_struct.expressions = vec![Expression::Star(Star {
table: None,
except: None,
replace: None,
rename: None,
trailing_comments: Vec::new(),
span: None,
})];
select_struct.from = Some(From {
expressions: vec![from_table],
});
let select_all = Expression::Select(Box::new(select_struct));
let with_ops = self.parse_set_operations_with_expr(Some(select_all))?;
return Ok(with_ops);
}
return Ok(None);
}
// Regular case: parse table or nested select
let this = if table {
self.parse_table()?
} else {
// Parse nested select without set operations
self.parse_select_query()?
};
if this.is_none() {
return Ok(None);
}
// Apply set operations and query modifiers
let with_ops = self.parse_set_operations_with_expr(this)?;
Ok(with_ops)
}
/// Helper for parse_wrapped_select with default table=false
pub fn parse_wrapped_select_default(&mut self) -> Result<Option<Expression>> {
self.parse_wrapped_select(false)
}
/// parse_xml_element - Implemented from Python _parse_xml_element
/// Python: parser.py:6917-6931
/// Parses XMLELEMENT(NAME name [, expr, ...]) or XMLELEMENT(EVALNAME expr [, expr, ...])
#[allow(unused_variables, unused_mut)]
pub fn parse_xml_element(&mut self) -> Result<Option<Expression>> {
let (this, evalname) = if self.match_text_seq(&["EVALNAME"]) {
// EVALNAME - parse expression for dynamic element name
let expr = self.parse_bitwise()?;
(
expr,
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
}))),
)
} else {
// NAME - parse static element name
self.match_text_seq(&["NAME"]);
let id = self.parse_id_var()?;
(id, None)
};
// Parse optional expressions (comma-separated content/attributes)
let expressions = if self.match_token(TokenType::Comma) {
self.parse_expression_list()?
} else {
Vec::new()
};
match this {
Some(t) => Ok(Some(Expression::XMLElement(Box::new(XMLElement {
this: Box::new(t),
expressions,
evalname,
})))),
None => Ok(None),
}
}
/// parse_xml_namespace - Ported from Python _parse_xml_namespace
/// Parses XML namespace declarations
#[allow(unused_variables, unused_mut)]
pub fn parse_xml_namespace(&mut self) -> Result<Option<Expression>> {
let mut namespaces = Vec::new();
loop {
// Check for DEFAULT namespace
let is_default = self.match_text_seq(&["DEFAULT"]);
// Parse the URI string
let uri = if is_default {
self.parse_string()?
} else {
// Parse URI with optional alias (AS name)
let uri_expr = self.parse_string()?;
if let Some(u) = uri_expr {
self.parse_alias_with_expr(Some(u))?
} else {
None
}
};
if let Some(u) = uri {
namespaces.push(u);
}
// Continue if comma
if !self.match_token(TokenType::Comma) {
break;
}
}
if namespaces.is_empty() {
return Ok(None);
}
// Return as a Tuple (list of namespaces)
Ok(Some(Expression::Tuple(Box::new(Tuple {
expressions: namespaces,
}))))
}
/// parse_xml_table - Implemented from Python _parse_xml_table
/// Python: parser.py:6933-6961
/// Parses XMLTABLE(xpath_expr PASSING xml_doc COLUMNS ...)
#[allow(unused_variables, unused_mut)]
pub fn parse_xml_table(&mut self) -> Result<Option<Expression>> {
// Parse optional XMLNAMESPACES clause
let namespaces = if self.match_text_seq(&["XMLNAMESPACES", "("]) {
let ns = self.parse_xml_namespace()?;
self.match_text_seq(&[")", ","]);
ns.map(Box::new)
} else {
None
};
// Parse XPath expression (string)
let this = self.parse_string()?;
if this.is_none() {
return Ok(None);
}
// Parse PASSING clause
let passing = if self.match_text_seq(&["PASSING"]) {
// BY VALUE is optional
self.match_text_seq(&["BY", "VALUE"]);
// Parse comma-separated expressions.
// Oracle XMLTABLE PASSING accepts full expressions (including function calls),
// not just column references.
// We need to stop before COLUMNS, RETURNING, or )
let mut cols = Vec::new();
loop {
// Check for stop keywords before parsing a column
if !self.is_at_end() {
let next_text = self.peek().text.to_ascii_uppercase();
if next_text == "COLUMNS" || next_text == "RETURNING" {
break;
}
if self.check(TokenType::RParen) {
break;
}
}
if let Some(col) = self.parse_assignment()? {
cols.push(col);
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
if cols.is_empty() {
None
} else {
Some(Box::new(Expression::Tuple(Box::new(Tuple {
expressions: cols,
}))))
}
} else {
None
};
// Parse optional RETURNING SEQUENCE BY REF
let by_ref = if self.match_text_seq(&["RETURNING", "SEQUENCE", "BY", "REF"]) {
Some(Box::new(Expression::Boolean(BooleanLiteral {
value: true,
})))
} else {
None
};
// Parse COLUMNS clause
let columns = if self.match_text_seq(&["COLUMNS"]) {
let mut cols = Vec::new();
loop {
// Stop if we hit the closing paren
if self.check(TokenType::RParen) {
break;
}
// Be permissive with leading commas in multiline XMLTABLE COLUMNS lists.
if self.match_token(TokenType::Comma) {
continue;
}
if let Some(col_def) = self.parse_field_def()? {
cols.push(col_def);
} else {
break;
}
if !self.match_token(TokenType::Comma) {
break;
}
}
cols
} else {
Vec::new()
};
Ok(Some(Expression::XMLTable(Box::new(XMLTable {
this: Box::new(this.unwrap()),
namespaces,
passing,
columns,
by_ref,
}))))
}
/// Parse UNLOAD statement (Athena/Presto/Redshift)
/// UNLOAD (SELECT ...) TO 'location' WITH (options)
fn parse_unload(&mut self) -> Result<Expression> {
// Collect entire statement as a Command
let mut parts = Vec::new();
parts.push(self.advance().text.clone()); // consume UNLOAD
parts.push(" ".to_string()); // space after UNLOAD
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
let token_type = self.peek().token_type;
let token_text = self.peek().text.clone();
// Track string literals
if token_type == TokenType::String {
parts.push(format!("'{}'", token_text.replace('\'', "''")));
self.skip();
// Add space after string unless followed by punctuation
if !self.is_at_end() {
let next_type = self.peek().token_type;
if !matches!(
next_type,
TokenType::Comma | TokenType::RParen | TokenType::Semicolon
) {
parts.push(" ".to_string());
}
}
continue;
}
// Handle ARRAY[...] syntax - no space between ARRAY and [
if token_text.eq_ignore_ascii_case("ARRAY")
&& self
.peek_nth(1)
.is_some_and(|t| t.token_type == TokenType::LBracket)
{
parts.push(token_text);
self.skip();
// Consume [
parts.push("[".to_string());
self.skip();
// Collect until RBracket
while !self.is_at_end() && !self.check(TokenType::RBracket) {
let inner_type = self.peek().token_type;
let inner_text = self.peek().text.clone();
if inner_type == TokenType::String {
parts.push(format!("'{}'", inner_text.replace('\'', "''")));
} else {
parts.push(inner_text);
}
self.skip();
if self.check(TokenType::Comma) {
parts.push(", ".to_string());
self.skip();
}
}
if self.check(TokenType::RBracket) {
parts.push("]".to_string());
self.skip();
}
continue;
}
parts.push(token_text);
self.skip();
// Add space after most tokens except punctuation
if !self.is_at_end() {
let next_type = self.peek().token_type;
let no_space_before = matches!(
next_type,
TokenType::Comma
| TokenType::RParen
| TokenType::RBracket
| TokenType::Semicolon
| TokenType::LBracket
);
let no_space_after = matches!(token_type, TokenType::LParen | TokenType::LBracket);
if !no_space_before && !no_space_after {
parts.push(" ".to_string());
}
}
}
Ok(Expression::Command(Box::new(Command {
this: parts.join(""),
})))
}
/// Parse USING EXTERNAL FUNCTION statement (Athena)
/// USING EXTERNAL FUNCTION name(params) RETURNS type LAMBDA 'arn' SELECT ...
fn parse_using_external_function(&mut self) -> Result<Expression> {
// Record start position
let start_pos = self.peek().span.start;
// Advance through all tokens until end or semicolon
while !self.is_at_end() && !self.check(TokenType::Semicolon) {
self.skip();
}
// Get end position from the last consumed token
let end_pos = if self.current > 0 {
self.tokens[self.current - 1].span.end
} else {
start_pos
};
// Extract exact text from source if available
let command_text = if let Some(ref source) = self.source {
source[start_pos..end_pos].to_string()
} else {
// Fallback: reconstruct from tokens (loses whitespace)
let mut parts = Vec::new();
for i in 0..self.current {
if self.tokens[i].span.start >= start_pos && self.tokens[i].span.end <= end_pos {
if self.tokens[i].token_type == TokenType::String {
parts.push(format!("'{}'", self.tokens[i].text.replace('\'', "''")));
} else {
parts.push(self.tokens[i].text.clone());
}
if i + 1 < self.current {
parts.push(" ".to_string());
}
}
}
parts.join("")
};
Ok(Expression::Command(Box::new(Command {
this: command_text,
})))
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::traversal::ExpressionWalk;
#[test]
fn test_comment_before_limit() {
let sql = "SELECT a FROM b WHERE foo AND bla\n-- comment 3\nLIMIT 10";
let result = Parser::parse_sql(sql).unwrap();
let output = crate::Generator::sql(&result[0]).unwrap();
assert_eq!(
output,
"SELECT a FROM b WHERE foo AND bla LIMIT 10 /* comment 3 */"
);
}
#[test]
fn test_variadic_array_postgres() {
use crate::dialects::DialectType;
use crate::transpile;
// Test: ARRAY[10, -1, 5, 4.4] should parse correctly in Postgres
let sql = "SELECT ARRAY[10, -1, 5, 4.4]";
let result = transpile(sql, DialectType::PostgreSQL, DialectType::PostgreSQL).unwrap();
eprintln!("Array test: {} -> {}", sql, result[0]);
// Test: VARIADIC ARRAY[10, -1, 5, 4.4] in function call
let sql2 = "SELECT MLEAST(VARIADIC ARRAY[10, -1, 5, 4.4])";
let result2 = transpile(sql2, DialectType::PostgreSQL, DialectType::PostgreSQL).unwrap();
eprintln!("VARIADIC test: {} -> {}", sql2, result2[0]);
assert_eq!(result2[0], sql2);
}
#[test]
fn test_parse_simple_select() {
let result = Parser::parse_sql("SELECT 1").unwrap();
assert_eq!(result.len(), 1);
assert!(result[0].is_select());
}
#[test]
fn test_parse_select_from() {
let result = Parser::parse_sql("SELECT a, b FROM t").unwrap();
assert_eq!(result.len(), 1);
let select = result[0].as_select().unwrap();
assert_eq!(select.expressions.len(), 2);
assert!(select.from.is_some());
}
#[test]
fn test_parse_select_where() {
let result = Parser::parse_sql("SELECT * FROM t WHERE x = 1").unwrap();
let select = result[0].as_select().unwrap();
assert!(select.where_clause.is_some());
}
#[test]
fn test_parse_balances_large_and_chain_depth() {
let mut sql = String::from("SELECT 1 WHERE c0 = 0");
for i in 1..4096 {
sql.push_str(&format!(" AND c{i} = {i}"));
}
let result = Parser::parse_sql(&sql).unwrap();
let select = result[0].as_select().unwrap();
let where_clause = select.where_clause.as_ref().expect("WHERE clause missing");
let depth = where_clause.this.tree_depth();
assert!(
depth < 128,
"Expected balanced boolean tree depth, got {}",
depth
);
}
#[test]
fn test_parse_balances_large_or_chain_depth() {
let mut sql = String::from("SELECT 1 WHERE c0 = 0");
for i in 1..4096 {
sql.push_str(&format!(" OR c{i} = {i}"));
}
let result = Parser::parse_sql(&sql).unwrap();
let select = result[0].as_select().unwrap();
let where_clause = select.where_clause.as_ref().expect("WHERE clause missing");
let depth = where_clause.this.tree_depth();
assert!(
depth < 128,
"Expected balanced boolean tree depth, got {}",
depth
);
}
#[test]
fn test_parse_select_join() {
let result = Parser::parse_sql("SELECT * FROM a JOIN b ON a.id = b.id").unwrap();
let select = result[0].as_select().unwrap();
assert_eq!(select.joins.len(), 1);
assert_eq!(select.joins[0].kind, JoinKind::Inner);
}
#[test]
fn test_parse_expression_precedence() {
let result = Parser::parse_sql("SELECT 1 + 2 * 3").unwrap();
let select = result[0].as_select().unwrap();
// Should parse as 1 + (2 * 3) due to precedence
assert!(matches!(select.expressions[0], Expression::Add(_)));
}
#[test]
fn test_parse_function() {
// COUNT(*) is now a typed Count expression
let result = Parser::parse_sql("SELECT COUNT(*)").unwrap();
let select = result[0].as_select().unwrap();
assert!(matches!(select.expressions[0], Expression::Count(_)));
// Unknown functions stay as generic Function
let result = Parser::parse_sql("SELECT MY_CUSTOM_FUNC(name)").unwrap();
let select = result[0].as_select().unwrap();
assert!(matches!(select.expressions[0], Expression::Function(_)));
// Known aggregate functions are now typed
let result = Parser::parse_sql("SELECT SUM(amount)").unwrap();
let select = result[0].as_select().unwrap();
assert!(matches!(select.expressions[0], Expression::Sum(_)));
}
#[test]
fn test_parse_window_function() {
let result =
Parser::parse_sql("SELECT ROW_NUMBER() OVER (PARTITION BY category ORDER BY id)")
.unwrap();
let select = result[0].as_select().unwrap();
assert!(matches!(
select.expressions[0],
Expression::WindowFunction(_)
));
}
#[test]
fn test_parse_window_function_with_frame() {
let result = Parser::parse_sql("SELECT SUM(amount) OVER (ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)").unwrap();
let select = result[0].as_select().unwrap();
assert!(matches!(
select.expressions[0],
Expression::WindowFunction(_)
));
}
#[test]
fn test_parse_subscript() {
// Array subscript
let result = Parser::parse_sql("SELECT arr[0]").unwrap();
let select = result[0].as_select().unwrap();
assert!(matches!(select.expressions[0], Expression::Subscript(_)));
// Function result subscript
let result = Parser::parse_sql("SELECT SPLIT(name, ',')[0]").unwrap();
let select = result[0].as_select().unwrap();
assert!(matches!(select.expressions[0], Expression::Subscript(_)));
}
#[test]
fn test_parse_case() {
let result = Parser::parse_sql("SELECT CASE WHEN x = 1 THEN 'a' ELSE 'b' END").unwrap();
let select = result[0].as_select().unwrap();
assert!(matches!(select.expressions[0], Expression::Case(_)));
}
#[test]
fn test_parse_insert() {
let result = Parser::parse_sql("INSERT INTO t (a, b) VALUES (1, 2)").unwrap();
assert!(matches!(result[0], Expression::Insert(_)));
}
#[test]
fn test_parse_template_variable() {
// Test Databricks/Hive ${variable} syntax
let result = Parser::parse_sql("SELECT ${x} FROM ${y} WHERE ${z} > 1").unwrap();
let select = result[0].as_select().unwrap();
// The expression should be a Parameter with DollarBrace style
assert!(
matches!(&select.expressions[0], Expression::Parameter(p) if p.name == Some("x".to_string()))
);
// Check the style is DollarBrace
if let Expression::Parameter(p) = &select.expressions[0] {
assert_eq!(p.style, ParameterStyle::DollarBrace);
}
}
#[test]
fn test_parse_update() {
let result = Parser::parse_sql("UPDATE t SET a = 1 WHERE b = 2").unwrap();
assert!(matches!(result[0], Expression::Update(_)));
}
#[test]
fn test_parse_delete() {
let result = Parser::parse_sql("DELETE FROM t WHERE a = 1").unwrap();
assert!(matches!(result[0], Expression::Delete(_)));
}
// DDL tests
#[test]
fn test_parse_create_table() {
let result = Parser::parse_sql(
"CREATE TABLE users (id INT PRIMARY KEY, name VARCHAR(100) NOT NULL)",
)
.unwrap();
assert!(matches!(result[0], Expression::CreateTable(_)));
if let Expression::CreateTable(ct) = &result[0] {
assert_eq!(ct.name.name.name, "users");
assert_eq!(ct.columns.len(), 2);
assert!(ct.columns[0].primary_key);
assert_eq!(ct.columns[1].nullable, Some(false));
}
}
#[test]
fn test_parse_create_table_if_not_exists() {
let result = Parser::parse_sql("CREATE TABLE IF NOT EXISTS t (id INT)").unwrap();
if let Expression::CreateTable(ct) = &result[0] {
assert!(ct.if_not_exists);
}
}
#[test]
fn test_parse_create_temporary_table() {
let result = Parser::parse_sql("CREATE TEMPORARY TABLE t (id INT)").unwrap();
if let Expression::CreateTable(ct) = &result[0] {
assert!(ct.temporary);
}
}
#[test]
fn test_bigquery_create_table_properties_are_typed() {
use crate::DialectType;
let sql = "CREATE OR REPLACE TABLE `p1`.`d1`.`t1` PARTITION BY DATE_TRUNC(day, month) CLUSTER BY some_cluster_column OPTIONS(description='', labels=[('l1', 'v1'), ('l2', 'v2')]) AS SELECT CURRENT_DATE AS day, DATE_TRUNC(CURRENT_DATE(), month) AS month, 'c' AS some_cluster_column";
let parsed = crate::parse(sql, DialectType::BigQuery).unwrap();
let create = match &parsed[0] {
Expression::CreateTable(ct) => ct,
other => panic!(
"Expected CreateTable, got {:?}",
std::mem::discriminant(other)
),
};
assert!(
create
.properties
.iter()
.any(|p| matches!(p, Expression::PartitionByProperty(_))),
"Expected typed PARTITION BY property"
);
assert!(
create
.properties
.iter()
.any(|p| matches!(p, Expression::ClusterByColumnsProperty(_))),
"Expected typed CLUSTER BY property"
);
assert!(
create
.properties
.iter()
.any(|p| matches!(p, Expression::OptionsProperty(_))),
"Expected typed OPTIONS property"
);
assert!(
!create
.properties
.iter()
.any(|p| matches!(p, Expression::Raw(_))),
"BigQuery table properties should not fall back to Raw"
);
let options = create
.properties
.iter()
.find_map(|p| match p {
Expression::OptionsProperty(o) => Some(o),
_ => None,
})
.expect("Expected OptionsProperty");
assert_eq!(options.entries.len(), 2);
assert_eq!(options.entries[0].key.name, "description");
assert_eq!(options.entries[1].key.name, "labels");
}
#[test]
fn test_bigquery_create_table_properties_roundtrip() {
use crate::DialectType;
let sql = "CREATE TABLE t1 PARTITION BY DATE_TRUNC(day, month) CLUSTER BY some_cluster_column OPTIONS(description='', labels=[('l1', 'v1')]) AS SELECT 1 AS day, 1 AS month, 'c' AS some_cluster_column";
let expected = "CREATE TABLE t1 PARTITION BY DATE_TRUNC(day, month) CLUSTER BY some_cluster_column OPTIONS (description='', labels=[('l1', 'v1')]) AS SELECT 1 AS day, 1 AS month, 'c' AS some_cluster_column";
let parsed = crate::parse(sql, DialectType::BigQuery).unwrap();
let generated = crate::generate(&parsed[0], DialectType::BigQuery).unwrap();
assert_eq!(generated, expected);
}
#[test]
fn test_bigquery_create_external_table_basic() {
use crate::DialectType;
let sql = "CREATE EXTERNAL TABLE t OPTIONS (format='CSV', uris=['gs://bucket/path/*'])";
let parsed = crate::parse(sql, DialectType::BigQuery).unwrap();
let create = match &parsed[0] {
Expression::CreateTable(ct) => ct,
other => panic!(
"Expected CreateTable, got {:?}",
std::mem::discriminant(other)
),
};
assert_eq!(
create.table_modifier.as_deref(),
Some("EXTERNAL"),
"Expected EXTERNAL table modifier"
);
assert!(
create
.properties
.iter()
.any(|p| matches!(p, Expression::OptionsProperty(_))),
"Expected typed OPTIONS property"
);
}
#[test]
fn test_bigquery_create_external_table_with_columns() {
use crate::DialectType;
let sql = "CREATE EXTERNAL TABLE t (id INT64, name STRING) OPTIONS (format='CSV')";
let parsed = crate::parse(sql, DialectType::BigQuery).unwrap();
let create = match &parsed[0] {
Expression::CreateTable(ct) => ct,
other => panic!(
"Expected CreateTable, got {:?}",
std::mem::discriminant(other)
),
};
assert_eq!(create.table_modifier.as_deref(), Some("EXTERNAL"),);
assert_eq!(create.columns.len(), 2, "Expected 2 column definitions");
assert_eq!(create.columns[0].name.name, "id");
assert_eq!(create.columns[1].name.name, "name");
}
#[test]
fn test_bigquery_create_external_table_with_partition_columns() {
use crate::DialectType;
let sql = "CREATE EXTERNAL TABLE t WITH PARTITION COLUMNS (dt DATE, region STRING) OPTIONS (format='PARQUET', uris=['gs://bucket/*'])";
let parsed = crate::parse(sql, DialectType::BigQuery).unwrap();
let create = match &parsed[0] {
Expression::CreateTable(ct) => ct,
other => panic!(
"Expected CreateTable, got {:?}",
std::mem::discriminant(other)
),
};
assert_eq!(
create.with_partition_columns.len(),
2,
"Expected 2 partition columns"
);
assert_eq!(create.with_partition_columns[0].name.name, "dt");
assert_eq!(create.with_partition_columns[1].name.name, "region");
}
#[test]
fn test_bigquery_create_external_table_with_connection() {
use crate::DialectType;
let sql = "CREATE EXTERNAL TABLE t WITH CONNECTION `project.us.my_connection` OPTIONS (format='CSV')";
let parsed = crate::parse(sql, DialectType::BigQuery).unwrap();
let create = match &parsed[0] {
Expression::CreateTable(ct) => ct,
other => panic!(
"Expected CreateTable, got {:?}",
std::mem::discriminant(other)
),
};
assert!(
create.with_connection.is_some(),
"Expected WITH CONNECTION to be set"
);
}
#[test]
fn test_bigquery_create_external_table_full() {
use crate::DialectType;
let sql = "CREATE EXTERNAL TABLE `project.dataset.my_table` WITH PARTITION COLUMNS (dt DATE) WITH CONNECTION `project.us.my_conn` OPTIONS (format='PARQUET', uris=['gs://bucket/path/*'])";
let parsed = crate::parse(sql, DialectType::BigQuery).unwrap();
let create = match &parsed[0] {
Expression::CreateTable(ct) => ct,
other => panic!(
"Expected CreateTable, got {:?}",
std::mem::discriminant(other)
),
};
assert_eq!(create.table_modifier.as_deref(), Some("EXTERNAL"));
assert_eq!(create.with_partition_columns.len(), 1);
assert!(create.with_connection.is_some());
assert!(create
.properties
.iter()
.any(|p| matches!(p, Expression::OptionsProperty(_))),);
}
#[test]
fn test_bigquery_create_external_table_roundtrip() {
use crate::DialectType;
let sql = "CREATE EXTERNAL TABLE t OPTIONS (format='CSV', uris=['gs://bucket/*'])";
let parsed = crate::parse(sql, DialectType::BigQuery).unwrap();
let generated = crate::generate(&parsed[0], DialectType::BigQuery).unwrap();
assert_eq!(generated, sql);
}
#[test]
fn test_bigquery_create_external_table_with_partition_columns_roundtrip() {
use crate::DialectType;
let sql = "CREATE EXTERNAL TABLE t WITH PARTITION COLUMNS (dt DATE, region STRING) OPTIONS (format='PARQUET')";
let parsed = crate::parse(sql, DialectType::BigQuery).unwrap();
let generated = crate::generate(&parsed[0], DialectType::BigQuery).unwrap();
assert_eq!(generated, sql);
}
#[test]
fn test_bigquery_create_external_table_with_connection_roundtrip() {
use crate::DialectType;
let sql = "CREATE EXTERNAL TABLE t WITH CONNECTION `project.us.my_connection` OPTIONS (format='CSV')";
let parsed = crate::parse(sql, DialectType::BigQuery).unwrap();
let generated = crate::generate(&parsed[0], DialectType::BigQuery).unwrap();
assert_eq!(generated, sql);
}
#[test]
fn test_bigquery_create_external_table_full_roundtrip() {
use crate::DialectType;
let sql = "CREATE EXTERNAL TABLE `project.dataset.my_table` WITH PARTITION COLUMNS (dt DATE) WITH CONNECTION `project.us.conn` OPTIONS (format='PARQUET', uris=['gs://bucket/*'])";
let parsed = crate::parse(sql, DialectType::BigQuery).unwrap();
let generated = crate::generate(&parsed[0], DialectType::BigQuery).unwrap();
assert_eq!(generated, sql);
}
// === BigQuery WITH syntax compatibility tests ===
// These verify that the is_bigquery_external guard does NOT break other BigQuery syntaxes.
#[test]
fn test_bigquery_create_table_as_select_no_with() {
use crate::DialectType;
let sql = "CREATE TABLE my_table AS SELECT 1 AS id, 'hello' AS name";
let parsed = crate::parse(sql, DialectType::BigQuery).unwrap();
let generated = crate::generate(&parsed[0], DialectType::BigQuery).unwrap();
assert_eq!(generated, sql);
}
#[test]
fn test_bigquery_create_table_with_cte_in_as_select() {
use crate::DialectType;
// CTE (WITH ... AS) inside the AS SELECT clause — the WITH is part of the query, not table properties
let sql = "CREATE TABLE my_table AS WITH cte AS (SELECT 1 AS id) SELECT * FROM cte";
let parsed = crate::parse(sql, DialectType::BigQuery).unwrap();
let create = match &parsed[0] {
Expression::CreateTable(ct) => ct,
other => panic!(
"Expected CreateTable, got {:?}",
std::mem::discriminant(other)
),
};
assert!(create.as_select.is_some(), "Expected AS SELECT with CTE");
assert!(
create.table_modifier.is_none(),
"Should NOT have EXTERNAL modifier"
);
}
#[test]
fn test_bigquery_create_table_with_multiple_ctes() {
use crate::DialectType;
let sql = "CREATE TABLE result AS WITH cte1 AS (SELECT 1 AS a), cte2 AS (SELECT 2 AS b) SELECT * FROM cte1 CROSS JOIN cte2";
let parsed = crate::parse(sql, DialectType::BigQuery).unwrap();
let create = match &parsed[0] {
Expression::CreateTable(ct) => ct,
other => panic!(
"Expected CreateTable, got {:?}",
std::mem::discriminant(other)
),
};
assert!(
create.as_select.is_some(),
"Expected AS SELECT with multiple CTEs"
);
}
#[test]
fn test_bigquery_create_table_partition_cluster_options_roundtrip() {
use crate::DialectType;
let sql = "CREATE TABLE t1 PARTITION BY dt CLUSTER BY region OPTIONS (description='partitioned') AS SELECT CURRENT_DATE() AS dt, 'us' AS region";
let parsed = crate::parse(sql, DialectType::BigQuery).unwrap();
let create = match &parsed[0] {
Expression::CreateTable(ct) => ct,
other => panic!(
"Expected CreateTable, got {:?}",
std::mem::discriminant(other)
),
};
assert!(create.table_modifier.is_none());
assert!(
create
.properties
.iter()
.any(|p| matches!(p, Expression::PartitionByProperty(_))),
"Expected PARTITION BY"
);
assert!(
create
.properties
.iter()
.any(|p| matches!(p, Expression::ClusterByColumnsProperty(_))),
"Expected CLUSTER BY"
);
assert!(
create
.properties
.iter()
.any(|p| matches!(p, Expression::OptionsProperty(_))),
"Expected OPTIONS"
);
}
#[test]
fn test_bigquery_create_or_replace_table_as_select() {
use crate::DialectType;
let sql = "CREATE OR REPLACE TABLE my_table AS SELECT 1 AS id";
let parsed = crate::parse(sql, DialectType::BigQuery).unwrap();
let create = match &parsed[0] {
Expression::CreateTable(ct) => ct,
other => panic!(
"Expected CreateTable, got {:?}",
std::mem::discriminant(other)
),
};
assert!(create.or_replace);
assert!(create.table_modifier.is_none());
}
#[test]
fn test_bigquery_create_table_if_not_exists_with_columns() {
use crate::DialectType;
let sql = "CREATE TABLE IF NOT EXISTS my_table (id INT64, name STRING)";
let parsed = crate::parse(sql, DialectType::BigQuery).unwrap();
let create = match &parsed[0] {
Expression::CreateTable(ct) => ct,
other => panic!(
"Expected CreateTable, got {:?}",
std::mem::discriminant(other)
),
};
assert!(create.if_not_exists);
assert_eq!(create.columns.len(), 2);
}
#[test]
fn test_bigquery_external_table_if_not_exists() {
use crate::DialectType;
let sql = "CREATE EXTERNAL TABLE IF NOT EXISTS ext_t OPTIONS (format='CSV')";
let parsed = crate::parse(sql, DialectType::BigQuery).unwrap();
let create = match &parsed[0] {
Expression::CreateTable(ct) => ct,
other => panic!(
"Expected CreateTable, got {:?}",
std::mem::discriminant(other)
),
};
assert!(create.if_not_exists);
assert_eq!(create.table_modifier.as_deref(), Some("EXTERNAL"));
}
#[test]
fn test_bigquery_or_replace_external_table() {
use crate::DialectType;
let sql = "CREATE OR REPLACE EXTERNAL TABLE ext_t OPTIONS (format='CSV')";
let parsed = crate::parse(sql, DialectType::BigQuery).unwrap();
let create = match &parsed[0] {
Expression::CreateTable(ct) => ct,
other => panic!(
"Expected CreateTable, got {:?}",
std::mem::discriminant(other)
),
};
assert!(create.or_replace);
assert_eq!(create.table_modifier.as_deref(), Some("EXTERNAL"));
}
#[test]
fn test_bigquery_external_table_pr_description_sql() {
use crate::DialectType;
// Exact SQL from the PR description — the query that was failing before this fix
let sql = r#"CREATE EXTERNAL TABLE IF NOT EXISTS `my_project.my_dataset.my_table`
WITH PARTITION COLUMNS (
table_name STRING,
sync_date DATE,
start_date DATE,
end_date DATE,
sync_id STRING
)
OPTIONS (
format = 'PARQUET',
uris = ['gs://my-bucket/data/table_name=my_table/*'],
hive_partition_uri_prefix = 'gs://my-bucket/data',
require_hive_partition_filter = false
)"#;
let parsed = crate::parse(sql, DialectType::BigQuery).unwrap();
let create = match &parsed[0] {
Expression::CreateTable(ct) => ct,
other => panic!(
"Expected CreateTable, got {:?}",
std::mem::discriminant(other)
),
};
assert!(create.if_not_exists);
assert_eq!(create.table_modifier.as_deref(), Some("EXTERNAL"));
assert_eq!(
create.with_partition_columns.len(),
5,
"Expected 5 partition columns"
);
assert!(!create.properties.is_empty(), "Expected OPTIONS properties");
}
#[test]
fn test_bigquery_create_external_table_reversed_clauses() {
use crate::DialectType;
let sql = "CREATE EXTERNAL TABLE t WITH CONNECTION `project.us.my_conn` WITH PARTITION COLUMNS (dt DATE) OPTIONS (format='PARQUET', uris=['gs://bucket/*'])";
let parsed = crate::parse(sql, DialectType::BigQuery).unwrap();
let create = match &parsed[0] {
Expression::CreateTable(ct) => ct,
other => panic!(
"Expected CreateTable, got {:?}",
std::mem::discriminant(other)
),
};
assert!(create.with_connection.is_some(), "Expected WITH CONNECTION");
assert_eq!(
create.with_partition_columns.len(),
1,
"Expected 1 partition column"
);
// Roundtrip: generator always emits PARTITION COLUMNS before CONNECTION
let generated = crate::generate(&parsed[0], DialectType::BigQuery).unwrap();
let expected = "CREATE EXTERNAL TABLE t WITH PARTITION COLUMNS (dt DATE) WITH CONNECTION `project.us.my_conn` OPTIONS (format='PARQUET', uris=['gs://bucket/*'])";
assert_eq!(generated, expected);
}
#[test]
fn test_bigquery_create_external_table_bare_partition_columns() {
use crate::DialectType;
let sql = "CREATE EXTERNAL TABLE t WITH PARTITION COLUMNS OPTIONS (format='PARQUET', uris=['gs://bucket/*'])";
let parsed = crate::parse(sql, DialectType::BigQuery).unwrap();
let create = match &parsed[0] {
Expression::CreateTable(ct) => ct,
other => panic!(
"Expected CreateTable, got {:?}",
std::mem::discriminant(other)
),
};
assert!(
create.with_partition_columns.is_empty(),
"Bare WITH PARTITION COLUMNS should produce empty partition column list"
);
assert!(!create.properties.is_empty(), "Expected OPTIONS");
}
#[test]
fn test_parse_drop_table() {
let result = Parser::parse_sql("DROP TABLE IF EXISTS users CASCADE").unwrap();
assert!(matches!(result[0], Expression::DropTable(_)));
if let Expression::DropTable(dt) = &result[0] {
assert!(dt.if_exists);
assert!(dt.cascade);
assert_eq!(dt.names.len(), 1);
}
}
#[test]
fn test_parse_alter_table_add_column() {
let result = Parser::parse_sql("ALTER TABLE users ADD COLUMN email VARCHAR(255)").unwrap();
assert!(matches!(result[0], Expression::AlterTable(_)));
if let Expression::AlterTable(at) = &result[0] {
assert_eq!(at.actions.len(), 1);
assert!(matches!(at.actions[0], AlterTableAction::AddColumn { .. }));
}
}
#[test]
fn test_parse_alter_table_drop_column() {
let result = Parser::parse_sql("ALTER TABLE users DROP COLUMN email").unwrap();
if let Expression::AlterTable(at) = &result[0] {
assert!(matches!(at.actions[0], AlterTableAction::DropColumn { .. }));
}
}
#[test]
fn test_tsql_alter_table_set_options() {
use crate::{transpile, DialectType};
let tests = vec![
"ALTER TABLE tbl SET (SYSTEM_VERSIONING=OFF)",
"ALTER TABLE tbl SET (FILESTREAM_ON = 'test')",
"ALTER TABLE tbl SET (DATA_DELETION=ON)",
"ALTER TABLE tbl SET (DATA_DELETION=OFF)",
"ALTER TABLE tbl SET (SYSTEM_VERSIONING=ON(HISTORY_TABLE=db.tbl, DATA_CONSISTENCY_CHECK=OFF, HISTORY_RETENTION_PERIOD=5 DAYS))",
"ALTER TABLE tbl SET (SYSTEM_VERSIONING=ON(HISTORY_TABLE=db.tbl, HISTORY_RETENTION_PERIOD=INFINITE))",
"ALTER TABLE tbl SET (DATA_DELETION=ON(FILTER_COLUMN=col, RETENTION_PERIOD=5 MONTHS))",
];
for sql in tests {
let result = transpile(sql, DialectType::TSQL, DialectType::TSQL);
match result {
Ok(output) => {
assert_eq!(output[0].trim(), sql, "Identity failed for: {}", sql);
}
Err(e) => {
panic!("Parse/generate failed for: {} -- {:?}", sql, e);
}
}
}
}
#[test]
fn test_parse_create_index() {
let result = Parser::parse_sql("CREATE UNIQUE INDEX idx_email ON users (email)").unwrap();
assert!(matches!(result[0], Expression::CreateIndex(_)));
if let Expression::CreateIndex(ci) = &result[0] {
assert!(ci.unique);
assert_eq!(ci.name.name, "idx_email");
assert_eq!(ci.table.name.name, "users");
assert_eq!(ci.columns.len(), 1);
}
}
#[test]
fn test_parse_drop_index() {
let result = Parser::parse_sql("DROP INDEX IF EXISTS idx_email ON users").unwrap();
assert!(matches!(result[0], Expression::DropIndex(_)));
if let Expression::DropIndex(di) = &result[0] {
assert!(di.if_exists);
assert!(di.table.is_some());
}
}
#[test]
fn test_parse_create_view() {
let result =
Parser::parse_sql("CREATE VIEW active_users AS SELECT * FROM users WHERE active = 1")
.unwrap();
assert!(matches!(result[0], Expression::CreateView(_)));
}
#[test]
fn test_parse_create_materialized_view() {
let result =
Parser::parse_sql("CREATE MATERIALIZED VIEW stats AS SELECT COUNT(*) FROM users")
.unwrap();
if let Expression::CreateView(cv) = &result[0] {
assert!(cv.materialized);
}
}
#[test]
fn test_hive_stored_by() {
use crate::{transpile, DialectType};
let sql = "CREATE EXTERNAL TABLE X (y INT) STORED BY 'x'";
let result = transpile(sql, DialectType::Hive, DialectType::Hive);
match result {
Ok(output) => {
assert_eq!(output[0].trim(), sql, "Identity failed for: {}", sql);
}
Err(e) => {
panic!("Parse/generate failed for: {} -- {:?}", sql, e);
}
}
}
#[test]
fn test_hive_row_format_serde() {
use crate::{transpile, DialectType};
// Test various Hive CREATE TABLE syntax
let test_cases = vec![
(
"CREATE TABLE my_table (a7 ARRAY<DATE>)",
"CREATE TABLE my_table (a7 ARRAY<DATE>)",
),
(
"CREATE EXTERNAL TABLE my_table (x INT) ROW FORMAT SERDE 'a'",
"CREATE EXTERNAL TABLE my_table (x INT) ROW FORMAT SERDE 'a'",
),
(
"CREATE EXTERNAL TABLE my_table (x INT) STORED AS INPUTFORMAT 'b' OUTPUTFORMAT 'c'",
"CREATE EXTERNAL TABLE my_table (x INT) STORED AS INPUTFORMAT 'b' OUTPUTFORMAT 'c'",
),
(
"CREATE EXTERNAL TABLE my_table (x INT) LOCATION 'd'",
"CREATE EXTERNAL TABLE my_table (x INT) LOCATION 'd'",
),
(
"CREATE EXTERNAL TABLE my_table (x INT) TBLPROPERTIES ('e'='f')",
"CREATE EXTERNAL TABLE my_table (x INT) TBLPROPERTIES ('e'='f')",
),
(
"CREATE EXTERNAL TABLE X (y INT) STORED BY 'x'",
"CREATE EXTERNAL TABLE X (y INT) STORED BY 'x'",
),
];
for (sql, expected) in &test_cases {
let result = transpile(sql, DialectType::Hive, DialectType::Hive);
match result {
Ok(output) => {
assert_eq!(output[0].trim(), *expected, "Identity failed for: {}", sql);
}
Err(e) => {
panic!("Parse/generate failed for: {} -- {:?}", sql, e);
}
}
}
// Test full case with all Hive table properties
let sql = "CREATE EXTERNAL TABLE `my_table` (`a7` ARRAY<DATE>) ROW FORMAT SERDE 'a' STORED AS INPUTFORMAT 'b' OUTPUTFORMAT 'c' LOCATION 'd' TBLPROPERTIES ('e'='f')";
let result = transpile(sql, DialectType::Hive, DialectType::Hive);
match result {
Ok(output) => {
assert_eq!(output[0].trim(), sql, "Identity failed for: {}", sql);
}
Err(e) => {
panic!("Parse/generate failed for: {} -- {:?}", sql, e);
}
}
}
#[test]
fn test_parse_drop_view() {
let result = Parser::parse_sql("DROP VIEW IF EXISTS active_users").unwrap();
assert!(matches!(result[0], Expression::DropView(_)));
}
#[test]
fn test_parse_truncate() {
let result = Parser::parse_sql("TRUNCATE TABLE users CASCADE").unwrap();
assert!(matches!(result[0], Expression::Truncate(_)));
if let Expression::Truncate(tr) = &result[0] {
assert!(tr.cascade);
}
}
// Tests for typed aggregate functions
#[test]
fn test_parse_typed_aggregates() {
// COUNT with DISTINCT
let result = Parser::parse_sql("SELECT COUNT(DISTINCT user_id)").unwrap();
let select = result[0].as_select().unwrap();
if let Expression::Count(c) = &select.expressions[0] {
assert!(c.distinct);
assert!(!c.star);
} else {
panic!("Expected Count expression");
}
// AVG
let result = Parser::parse_sql("SELECT AVG(price)").unwrap();
let select = result[0].as_select().unwrap();
assert!(matches!(select.expressions[0], Expression::Avg(_)));
// MIN/MAX
let result = Parser::parse_sql("SELECT MIN(a), MAX(b)").unwrap();
let select = result[0].as_select().unwrap();
assert!(matches!(select.expressions[0], Expression::Min(_)));
assert!(matches!(select.expressions[1], Expression::Max(_)));
// STDDEV/VARIANCE
let result = Parser::parse_sql("SELECT STDDEV(x), VARIANCE(y)").unwrap();
let select = result[0].as_select().unwrap();
assert!(matches!(select.expressions[0], Expression::Stddev(_)));
assert!(matches!(select.expressions[1], Expression::Variance(_)));
}
#[test]
fn test_parse_typed_window_functions() {
// ROW_NUMBER
let result = Parser::parse_sql("SELECT ROW_NUMBER() OVER (ORDER BY id)").unwrap();
let select = result[0].as_select().unwrap();
if let Expression::WindowFunction(wf) = &select.expressions[0] {
assert!(matches!(wf.this, Expression::RowNumber(_)));
} else {
panic!("Expected WindowFunction");
}
// RANK and DENSE_RANK
let result = Parser::parse_sql("SELECT RANK() OVER (), DENSE_RANK() OVER ()").unwrap();
let select = result[0].as_select().unwrap();
if let Expression::WindowFunction(wf) = &select.expressions[0] {
assert!(matches!(wf.this, Expression::Rank(_)));
}
if let Expression::WindowFunction(wf) = &select.expressions[1] {
assert!(matches!(wf.this, Expression::DenseRank(_)));
}
// LEAD/LAG
let result = Parser::parse_sql("SELECT LEAD(val, 1, 0) OVER (ORDER BY id)").unwrap();
let select = result[0].as_select().unwrap();
if let Expression::WindowFunction(wf) = &select.expressions[0] {
if let Expression::Lead(f) = &wf.this {
assert!(f.offset.is_some());
assert!(f.default.is_some());
} else {
panic!("Expected Lead");
}
}
// NTILE
let result = Parser::parse_sql("SELECT NTILE(4) OVER (ORDER BY score)").unwrap();
let select = result[0].as_select().unwrap();
if let Expression::WindowFunction(wf) = &select.expressions[0] {
assert!(matches!(wf.this, Expression::NTile(_)));
}
}
#[test]
fn test_parse_string_functions() {
// CONTAINS, STARTS_WITH, ENDS_WITH
let result = Parser::parse_sql("SELECT CONTAINS(name, 'test')").unwrap();
let select = result[0].as_select().unwrap();
assert!(matches!(select.expressions[0], Expression::Contains(_)));
let result = Parser::parse_sql("SELECT STARTS_WITH(name, 'A')").unwrap();
let select = result[0].as_select().unwrap();
assert!(matches!(select.expressions[0], Expression::StartsWith(_)));
let result = Parser::parse_sql("SELECT ENDS_WITH(name, 'z')").unwrap();
let select = result[0].as_select().unwrap();
assert!(matches!(select.expressions[0], Expression::EndsWith(_)));
}
#[test]
fn test_parse_math_functions() {
// MOD function
let result = Parser::parse_sql("SELECT MOD(10, 3)").unwrap();
let select = result[0].as_select().unwrap();
assert!(matches!(select.expressions[0], Expression::ModFunc(_)));
// RANDOM and RAND
let result = Parser::parse_sql("SELECT RANDOM()").unwrap();
let select = result[0].as_select().unwrap();
assert!(matches!(select.expressions[0], Expression::Random(_)));
let result = Parser::parse_sql("SELECT RAND(42)").unwrap();
let select = result[0].as_select().unwrap();
assert!(matches!(select.expressions[0], Expression::Rand(_)));
// Trigonometric functions
let result = Parser::parse_sql("SELECT SIN(x), COS(x), TAN(x)").unwrap();
let select = result[0].as_select().unwrap();
assert!(matches!(select.expressions[0], Expression::Sin(_)));
assert!(matches!(select.expressions[1], Expression::Cos(_)));
assert!(matches!(select.expressions[2], Expression::Tan(_)));
}
#[test]
fn test_parse_date_functions() {
// Date part extraction functions
let result =
Parser::parse_sql("SELECT YEAR(date_col), MONTH(date_col), DAY(date_col)").unwrap();
let select = result[0].as_select().unwrap();
assert!(matches!(select.expressions[0], Expression::Year(_)));
assert!(matches!(select.expressions[1], Expression::Month(_)));
assert!(matches!(select.expressions[2], Expression::Day(_)));
// EPOCH and EPOCH_MS
let result = Parser::parse_sql("SELECT EPOCH(ts), EPOCH_MS(ts)").unwrap();
let select = result[0].as_select().unwrap();
assert!(matches!(select.expressions[0], Expression::Epoch(_)));
assert!(matches!(select.expressions[1], Expression::EpochMs(_)));
}
#[test]
fn test_parse_array_functions() {
// ARRAY_LENGTH
let result = Parser::parse_sql("SELECT ARRAY_LENGTH(arr)").unwrap();
let select = result[0].as_select().unwrap();
assert!(matches!(select.expressions[0], Expression::ArrayLength(_)));
// ARRAY_CONTAINS
let result = Parser::parse_sql("SELECT ARRAY_CONTAINS(arr, 1)").unwrap();
let select = result[0].as_select().unwrap();
assert!(matches!(
select.expressions[0],
Expression::ArrayContains(_)
));
// EXPLODE
let result = Parser::parse_sql("SELECT EXPLODE(arr)").unwrap();
let select = result[0].as_select().unwrap();
assert!(matches!(select.expressions[0], Expression::Explode(_)));
}
#[test]
fn test_parse_json_functions() {
// JSON_EXTRACT
let result = Parser::parse_sql("SELECT JSON_EXTRACT(data, '$.name')").unwrap();
let select = result[0].as_select().unwrap();
assert!(matches!(select.expressions[0], Expression::JsonExtract(_)));
// JSON_ARRAY_LENGTH
let result = Parser::parse_sql("SELECT JSON_ARRAY_LENGTH(arr)").unwrap();
let select = result[0].as_select().unwrap();
assert!(matches!(
select.expressions[0],
Expression::JsonArrayLength(_)
));
// TO_JSON and PARSE_JSON
let result = Parser::parse_sql("SELECT TO_JSON(obj), PARSE_JSON(str)").unwrap();
let select = result[0].as_select().unwrap();
assert!(matches!(select.expressions[0], Expression::ToJson(_)));
assert!(matches!(select.expressions[1], Expression::ParseJson(_)));
// JSON literal: JSON '"foo"' -> ParseJson
let result = Parser::parse_sql("SELECT JSON '\"foo\"'").unwrap();
let select = result[0].as_select().unwrap();
assert!(
matches!(select.expressions[0], Expression::ParseJson(_)),
"Expected ParseJson, got: {:?}",
select.expressions[0]
);
}
#[test]
fn test_parse_map_functions() {
// MAP_KEYS and MAP_VALUES
let result = Parser::parse_sql("SELECT MAP_KEYS(m), MAP_VALUES(m)").unwrap();
let select = result[0].as_select().unwrap();
assert!(matches!(select.expressions[0], Expression::MapKeys(_)));
assert!(matches!(select.expressions[1], Expression::MapValues(_)));
// ELEMENT_AT
let result = Parser::parse_sql("SELECT ELEMENT_AT(m, 'key')").unwrap();
let select = result[0].as_select().unwrap();
assert!(matches!(select.expressions[0], Expression::ElementAt(_)));
}
#[test]
fn test_parse_date_literals() {
// DATE literal (generic mode normalizes to CAST)
let result = Parser::parse_sql("SELECT DATE '2024-01-15'").unwrap();
let select = result[0].as_select().unwrap();
match &select.expressions[0] {
Expression::Cast(cast) => {
match &cast.this {
Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
let Literal::String(s) = lit.as_ref() else {
unreachable!()
};
assert_eq!(s, "2024-01-15")
}
other => panic!("Expected String literal in Cast, got {:?}", other),
}
assert!(matches!(cast.to, DataType::Date));
}
other => panic!("Expected Cast expression, got {:?}", other),
}
// TIME literal
let result = Parser::parse_sql("SELECT TIME '10:30:00'").unwrap();
let select = result[0].as_select().unwrap();
match &select.expressions[0] {
Expression::Literal(lit) if matches!(lit.as_ref(), Literal::Time(_)) => {
let Literal::Time(t) = lit.as_ref() else {
unreachable!()
};
assert_eq!(t, "10:30:00");
}
_ => panic!("Expected Time literal"),
}
// TIMESTAMP literal -> CAST in generic mode
let result = Parser::parse_sql("SELECT TIMESTAMP '2024-01-15 10:30:00'").unwrap();
let select = result[0].as_select().unwrap();
match &select.expressions[0] {
Expression::Cast(cast) => {
match &cast.this {
Expression::Literal(lit) if matches!(lit.as_ref(), Literal::String(_)) => {
let Literal::String(s) = lit.as_ref() else {
unreachable!()
};
assert_eq!(s, "2024-01-15 10:30:00")
}
other => panic!("Expected String literal inside Cast, got {:?}", other),
}
assert!(matches!(
&cast.to,
DataType::Timestamp {
precision: None,
timezone: false
}
));
}
_ => panic!("Expected Cast expression for TIMESTAMP literal"),
}
}
#[test]
fn test_parse_star_exclude() {
// EXCLUDE with multiple columns
let result = Parser::parse_sql("SELECT * EXCLUDE (col1, col2) FROM t").unwrap();
let select = result[0].as_select().unwrap();
if let Expression::Star(star) = &select.expressions[0] {
assert!(star.except.is_some());
let except = star.except.as_ref().unwrap();
assert_eq!(except.len(), 2);
assert_eq!(except[0].name, "col1");
assert_eq!(except[1].name, "col2");
} else {
panic!("Expected Star expression");
}
// EXCEPT (BigQuery syntax)
let result = Parser::parse_sql("SELECT * EXCEPT (id, created_at) FROM t").unwrap();
let select = result[0].as_select().unwrap();
if let Expression::Star(star) = &select.expressions[0] {
assert!(star.except.is_some());
} else {
panic!("Expected Star expression");
}
// table.* with EXCLUDE
let result = Parser::parse_sql("SELECT t.* EXCLUDE (col1) FROM t").unwrap();
let select = result[0].as_select().unwrap();
if let Expression::Star(star) = &select.expressions[0] {
assert!(star.table.is_some());
assert_eq!(star.table.as_ref().unwrap().name, "t");
assert!(star.except.is_some());
} else {
panic!("Expected Star expression");
}
}
#[test]
fn test_parse_star_replace() {
// REPLACE with single expression
let result = Parser::parse_sql("SELECT * REPLACE (UPPER(name) AS name) FROM t").unwrap();
let select = result[0].as_select().unwrap();
if let Expression::Star(star) = &select.expressions[0] {
assert!(star.replace.is_some());
let replace = star.replace.as_ref().unwrap();
assert_eq!(replace.len(), 1);
assert_eq!(replace[0].alias.name, "name");
} else {
panic!("Expected Star expression");
}
// REPLACE with multiple expressions
let result = Parser::parse_sql("SELECT * REPLACE (a + 1 AS a, b * 2 AS b) FROM t").unwrap();
let select = result[0].as_select().unwrap();
if let Expression::Star(star) = &select.expressions[0] {
let replace = star.replace.as_ref().unwrap();
assert_eq!(replace.len(), 2);
} else {
panic!("Expected Star expression");
}
}
#[test]
fn test_parse_star_rename() {
// RENAME with multiple columns
let result =
Parser::parse_sql("SELECT * RENAME (old_col AS new_col, x AS y) FROM t").unwrap();
let select = result[0].as_select().unwrap();
if let Expression::Star(star) = &select.expressions[0] {
assert!(star.rename.is_some());
let rename = star.rename.as_ref().unwrap();
assert_eq!(rename.len(), 2);
assert_eq!(rename[0].0.name, "old_col");
assert_eq!(rename[0].1.name, "new_col");
} else {
panic!("Expected Star expression");
}
}
#[test]
fn test_parse_star_combined() {
// EXCLUDE + REPLACE combined
let result =
Parser::parse_sql("SELECT * EXCLUDE (id) REPLACE (name || '!' AS name) FROM t")
.unwrap();
let select = result[0].as_select().unwrap();
if let Expression::Star(star) = &select.expressions[0] {
assert!(star.except.is_some());
assert!(star.replace.is_some());
} else {
panic!("Expected Star expression");
}
}
#[test]
fn test_parse_spatial_types() {
// GEOMETRY with subtype and SRID (PostgreSQL syntax)
let result = Parser::parse_sql("CREATE TABLE t (geom GEOMETRY(Point, 4326))").unwrap();
if let Expression::CreateTable(ct) = &result[0] {
assert_eq!(ct.columns.len(), 1);
match &ct.columns[0].data_type {
DataType::Geometry { subtype, srid } => {
assert_eq!(subtype.as_deref(), Some("POINT"));
assert_eq!(*srid, Some(4326));
}
_ => panic!("Expected Geometry type"),
}
}
// GEOGRAPHY without parameters
let result = Parser::parse_sql("CREATE TABLE t (loc GEOGRAPHY)").unwrap();
if let Expression::CreateTable(ct) = &result[0] {
match &ct.columns[0].data_type {
DataType::Geography { subtype, srid } => {
assert!(subtype.is_none());
assert!(srid.is_none());
}
_ => panic!("Expected Geography type"),
}
}
// GEOMETRY subtype only (no SRID)
let result = Parser::parse_sql("CREATE TABLE t (geom GEOMETRY(LineString))").unwrap();
if let Expression::CreateTable(ct) = &result[0] {
match &ct.columns[0].data_type {
DataType::Geometry { subtype, srid } => {
assert_eq!(subtype.as_deref(), Some("LINESTRING"));
assert!(srid.is_none());
}
_ => panic!("Expected Geometry type"),
}
}
// Simple POINT type (MySQL-style without SRID)
let result = Parser::parse_sql("CREATE TABLE t (pt POINT)").unwrap();
if let Expression::CreateTable(ct) = &result[0] {
match &ct.columns[0].data_type {
DataType::Geometry { subtype, srid } => {
assert_eq!(subtype.as_deref(), Some("POINT"));
assert!(srid.is_none());
}
_ => panic!("Expected Geometry type"),
}
}
}
#[test]
fn test_parse_duckdb_pivot_simple() {
let sql = "PIVOT Cities ON Year USING SUM(Population)";
let result = Parser::parse_sql(sql);
assert!(
result.is_ok(),
"Failed to parse: {} - {:?}",
sql,
result.err()
);
let stmts = result.unwrap();
assert_eq!(
stmts.len(),
1,
"Expected 1 statement, got {}: {:?}",
stmts.len(),
stmts
);
match &stmts[0] {
Expression::Pivot(p) => {
assert!(!p.unpivot);
assert!(!p.expressions.is_empty(), "Should have ON expressions");
assert!(!p.using.is_empty(), "Should have USING expressions");
}
other => panic!("Expected Pivot, got {:?}", other),
}
}
#[test]
fn test_parse_duckdb_pivot_with_group_by() {
let sql = "PIVOT Cities ON Year USING SUM(Population) GROUP BY Country";
let result = Parser::parse_sql(sql);
assert!(
result.is_ok(),
"Failed to parse: {} - {:?}",
sql,
result.err()
);
}
#[test]
fn test_parse_duckdb_unpivot() {
let sql = "UNPIVOT monthly_sales ON jan, feb, mar INTO NAME month VALUE sales";
let result = Parser::parse_sql(sql);
assert!(
result.is_ok(),
"Failed to parse: {} - {:?}",
sql,
result.err()
);
}
#[test]
fn test_parse_standard_pivot_in_from() {
let sql = "SELECT * FROM cities PIVOT(SUM(population) FOR year IN (2000, 2010, 2020))";
let result = Parser::parse_sql(sql);
assert!(
result.is_ok(),
"Failed to parse: {} - {:?}",
sql,
result.err()
);
}
fn assert_pivot_roundtrip(sql: &str) {
let parsed = crate::parse(sql, crate::DialectType::DuckDB);
assert!(
parsed.is_ok(),
"Failed to parse: {} - {:?}",
sql,
parsed.err()
);
let stmts = parsed.unwrap();
assert_eq!(stmts.len(), 1, "Expected 1 statement for: {}", sql);
let generated = crate::generate(&stmts[0], crate::DialectType::DuckDB);
assert!(
generated.is_ok(),
"Failed to generate: {} - {:?}",
sql,
generated.err()
);
let result = generated.unwrap();
assert_eq!(result.trim(), sql, "Round-trip mismatch for: {}", sql);
}
fn assert_pivot_roundtrip_bq(sql: &str) {
let parsed = crate::parse(sql, crate::DialectType::BigQuery);
assert!(
parsed.is_ok(),
"Failed to parse: {} - {:?}",
sql,
parsed.err()
);
let stmts = parsed.unwrap();
assert_eq!(stmts.len(), 1, "Expected 1 statement for: {}", sql);
let generated = crate::generate(&stmts[0], crate::DialectType::BigQuery);
assert!(
generated.is_ok(),
"Failed to generate: {} - {:?}",
sql,
generated.err()
);
let result = generated.unwrap();
assert_eq!(result.trim(), sql, "Round-trip mismatch for: {}", sql);
}
#[test]
fn test_pivot_roundtrip_duckdb_simple() {
assert_pivot_roundtrip("PIVOT Cities ON Year USING SUM(Population)");
}
#[test]
fn test_pivot_roundtrip_duckdb_group_by() {
assert_pivot_roundtrip("PIVOT Cities ON Year USING SUM(Population) GROUP BY Country");
}
#[test]
fn test_pivot_roundtrip_duckdb_in_clause() {
assert_pivot_roundtrip(
"PIVOT Cities ON Year IN (2000, 2010) USING SUM(Population) GROUP BY Country",
);
}
#[test]
fn test_pivot_roundtrip_duckdb_multiple_using() {
assert_pivot_roundtrip("PIVOT Cities ON Year USING SUM(Population) AS total, MAX(Population) AS max GROUP BY Country");
}
#[test]
fn test_pivot_roundtrip_duckdb_multiple_on() {
assert_pivot_roundtrip("PIVOT Cities ON Country, Name USING SUM(Population)");
}
#[test]
fn test_pivot_roundtrip_duckdb_concat_on() {
assert_pivot_roundtrip("PIVOT Cities ON Country || '_' || Name USING SUM(Population)");
}
#[test]
fn test_pivot_roundtrip_duckdb_multiple_group_by() {
assert_pivot_roundtrip("PIVOT Cities ON Year USING SUM(Population) GROUP BY Country, Name");
}
#[test]
fn test_pivot_roundtrip_duckdb_first() {
assert_pivot_roundtrip("PIVOT Cities ON Year USING FIRST(Population)");
}
#[test]
fn test_unpivot_roundtrip_duckdb_basic() {
assert_pivot_roundtrip(
"UNPIVOT monthly_sales ON jan, feb, mar, apr, may, jun INTO NAME month VALUE sales",
);
}
#[test]
fn test_unpivot_roundtrip_duckdb_subquery() {
assert_pivot_roundtrip("UNPIVOT (SELECT 1 AS col1, 2 AS col2) ON foo, bar");
}
#[test]
fn test_pivot_roundtrip_duckdb_cte() {
assert_pivot_roundtrip("WITH pivot_alias AS (PIVOT Cities ON Year USING SUM(Population) GROUP BY Country) SELECT * FROM pivot_alias");
}
#[test]
fn test_pivot_roundtrip_duckdb_subquery() {
assert_pivot_roundtrip("SELECT * FROM (PIVOT Cities ON Year USING SUM(Population) GROUP BY Country) AS pivot_alias");
}
#[test]
fn test_pivot_roundtrip_standard_from() {
assert_pivot_roundtrip("SELECT * FROM cities PIVOT(SUM(population) FOR year IN (2000, 2010, 2020) GROUP BY country)");
}
#[test]
fn test_pivot_roundtrip_standard_bare_in() {
assert_pivot_roundtrip("SELECT * FROM t PIVOT(SUM(y) FOR foo IN y_enum)");
}
#[test]
fn test_unpivot_roundtrip_bigquery() {
assert_pivot_roundtrip_bq("SELECT * FROM q UNPIVOT(values FOR quarter IN (b, c))");
}
#[test]
fn test_pivot_roundtrip_bigquery_aliases() {
assert_pivot_roundtrip_bq("SELECT cars, apples FROM some_table PIVOT(SUM(total_counts) FOR products IN ('general.cars' AS cars, 'food.apples' AS apples))");
}
#[test]
fn test_unpivot_roundtrip_bigquery_parens() {
assert_pivot_roundtrip_bq(
"SELECT * FROM (SELECT * FROM `t`) AS a UNPIVOT((c) FOR c_name IN (v1, v2))",
);
}
#[test]
fn test_pivot_roundtrip_bigquery_multi_agg() {
// Note: BigQuery fixture expects implicit aliases to become explicit AS
let sql = "SELECT * FROM (SELECT a, b, c FROM test) PIVOT(SUM(b) AS d, COUNT(*) AS e FOR c IN ('x', 'y'))";
assert_pivot_roundtrip_bq(sql);
}
// Additional fixture tests for UNPIVOT with COLUMNS and grouped ON
#[test]
fn test_unpivot_roundtrip_duckdb_columns_exclude() {
assert_pivot_roundtrip(
"UNPIVOT monthly_sales ON COLUMNS(* EXCLUDE (empid, dept)) INTO NAME month VALUE sales",
);
}
#[test]
fn test_unpivot_roundtrip_duckdb_grouped_columns() {
assert_pivot_roundtrip("UNPIVOT monthly_sales ON (jan, feb, mar) AS q1, (apr, may, jun) AS q2 INTO NAME quarter VALUE month_1_sales, month_2_sales, month_3_sales");
}
#[test]
fn test_unpivot_roundtrip_duckdb_cte_columns() {
assert_pivot_roundtrip("WITH unpivot_alias AS (UNPIVOT monthly_sales ON COLUMNS(* EXCLUDE (empid, dept)) INTO NAME month VALUE sales) SELECT * FROM unpivot_alias");
}
#[test]
fn test_unpivot_roundtrip_duckdb_subquery_columns() {
assert_pivot_roundtrip("SELECT * FROM (UNPIVOT monthly_sales ON COLUMNS(* EXCLUDE (empid, dept)) INTO NAME month VALUE sales) AS unpivot_alias");
}
#[test]
fn test_pivot_roundtrip_duckdb_cte_with_columns() {
assert_pivot_roundtrip("WITH cities(country, name, year, population) AS (SELECT 'NL', 'Amsterdam', 2000, 1005 UNION ALL SELECT 'US', 'Seattle', 2020, 738) PIVOT cities ON year USING SUM(population)");
}
#[test]
fn test_pivot_roundtrip_standard_first_with_alias() {
// DuckDB fixture #73: comma before FOR is dropped in expected output
let sql = "SELECT * FROM t PIVOT(FIRST(t) AS t, FOR quarter IN ('Q1', 'Q2'))";
let expected = "SELECT * FROM t PIVOT(FIRST(t) AS t FOR quarter IN ('Q1', 'Q2'))";
let parsed = crate::parse(sql, crate::DialectType::DuckDB);
assert!(
parsed.is_ok(),
"Failed to parse: {} - {:?}",
sql,
parsed.err()
);
let stmts = parsed.unwrap();
assert_eq!(stmts.len(), 1);
let generated = crate::generate(&stmts[0], crate::DialectType::DuckDB);
assert!(
generated.is_ok(),
"Failed to generate: {} - {:?}",
sql,
generated.err()
);
let result = generated.unwrap();
assert_eq!(result.trim(), expected, "Round-trip mismatch");
}
#[test]
fn test_pivot_roundtrip_bigquery_implicit_alias() {
// BigQuery fixture #134: implicit aliases become explicit AS
let sql = "SELECT * FROM (SELECT a, b, c FROM test) PIVOT(SUM(b) d, COUNT(*) e FOR c IN ('x', 'y'))";
let expected = "SELECT * FROM (SELECT a, b, c FROM test) PIVOT(SUM(b) AS d, COUNT(*) AS e FOR c IN ('x', 'y'))";
let parsed = crate::parse(sql, crate::DialectType::BigQuery);
assert!(
parsed.is_ok(),
"Failed to parse: {} - {:?}",
sql,
parsed.err()
);
let stmts = parsed.unwrap();
assert_eq!(stmts.len(), 1);
let generated = crate::generate(&stmts[0], crate::DialectType::BigQuery);
assert!(
generated.is_ok(),
"Failed to generate: {} - {:?}",
sql,
generated.err()
);
let result = generated.unwrap();
assert_eq!(result.trim(), expected, "Round-trip mismatch");
}
#[test]
fn test_duckdb_struct_enum_union_row_types() {
use crate::DialectType;
fn check(sql: &str, expected: Option<&str>) {
let expected_out = expected.unwrap_or(sql);
let parsed = crate::parse(sql, DialectType::DuckDB);
assert!(parsed.is_ok(), "Failed to parse: {} - {:?}", sql, parsed.err());
let stmts = parsed.unwrap();
assert!(!stmts.is_empty(), "No statements parsed: {}", sql);
let generated = crate::generate(&stmts[0], DialectType::DuckDB);
assert!(
generated.is_ok(),
"Failed to generate: {} - {:?}",
sql,
generated.err()
);
let result = generated.unwrap();
assert_eq!(result.trim(), expected_out, "Mismatch for: {}", sql);
}
// UNION type
check("CREATE TABLE tbl1 (u UNION(num INT, str TEXT))", None);
// ENUM type
check(
"CREATE TABLE color (name ENUM('RED', 'GREEN', 'BLUE'))",
None,
);
// ROW type -> STRUCT
check(
"SELECT CAST(ROW(1, 2) AS ROW(a INTEGER, b INTEGER))",
Some("SELECT CAST(ROW(1, 2) AS STRUCT(a INT, b INT))"),
);
// STRUCT with parens
check("CAST(x AS STRUCT(number BIGINT))", None);
// STRUCT with quoted field names
check(
"CAST({'i': 1, 's': 'foo'} AS STRUCT(\"s\" TEXT, \"i\" INT))",
None,
);
// Nested STRUCT
check(
"CAST(ROW(1, ROW(1)) AS STRUCT(number BIGINT, row STRUCT(number BIGINT)))",
None,
);
// STRUCT with array suffix - test just the type parsing part
// Note: STRUCT_PACK -> struct literal transform is a separate feature
check("CAST(x AS STRUCT(a BIGINT)[][])", None);
check("CAST(x AS STRUCT(a BIGINT)[])", None);
// Double-colon cast with STRUCT type
check("CAST({'a': 'b'} AS STRUCT(a TEXT))", None);
}
// Helper for roundtrip identity tests
fn roundtrip(sql: &str) -> String {
let ast =
Parser::parse_sql(sql).unwrap_or_else(|e| panic!("Parse error for '{}': {}", sql, e));
crate::generator::Generator::sql(&ast[0])
.unwrap_or_else(|e| panic!("Generate error for '{}': {}", sql, e))
}
fn assert_roundtrip(sql: &str) {
let result = roundtrip(sql);
assert_eq!(result, sql, "\n Input: {}\n Output: {}", sql, result);
}
fn assert_roundtrip_expected(sql: &str, expected: &str) {
let result = roundtrip(sql);
assert_eq!(
result, expected,
"\n Input: {}\n Expected: {}\n Output: {}",
sql, expected, result
);
}
#[test]
fn test_xmlelement_basic() {
assert_roundtrip("SELECT XMLELEMENT(NAME foo)");
}
#[test]
fn test_xmlelement_with_xmlattributes() {
assert_roundtrip("SELECT XMLELEMENT(NAME foo, XMLATTRIBUTES('xyz' AS bar))");
}
#[test]
fn test_xmlelement_with_multiple_attrs() {
assert_roundtrip("SELECT XMLELEMENT(NAME test, XMLATTRIBUTES(a, b)) FROM test");
}
#[test]
fn test_xmlelement_with_content() {
assert_roundtrip(
"SELECT XMLELEMENT(NAME foo, XMLATTRIBUTES(CURRENT_DATE AS bar), 'cont', 'ent')",
);
}
#[test]
fn test_xmlelement_nested() {
assert_roundtrip("SELECT XMLELEMENT(NAME foo, XMLATTRIBUTES('xyz' AS bar), XMLELEMENT(NAME abc), XMLCOMMENT('test'), XMLELEMENT(NAME xyz))");
}
#[test]
fn test_on_conflict_do_update() {
assert_roundtrip("INSERT INTO newtable AS t(a, b, c) VALUES (1, 2, 3) ON CONFLICT(c) DO UPDATE SET a = t.a + 1 WHERE t.a < 1");
}
#[test]
fn test_on_conflict_do_nothing() {
// ON CONFLICT(id) is the canonical form (no space before paren)
assert_roundtrip_expected(
"INSERT INTO test (id, name) VALUES (1, 'test') ON CONFLICT (id) DO NOTHING",
"INSERT INTO test (id, name) VALUES (1, 'test') ON CONFLICT(id) DO NOTHING",
);
}
#[test]
fn test_truncate_restart_identity() {
assert_roundtrip("TRUNCATE TABLE t1 RESTART IDENTITY");
}
#[test]
fn test_truncate_restart_identity_restrict() {
assert_roundtrip("TRUNCATE TABLE t1 RESTART IDENTITY RESTRICT");
}
#[test]
fn test_insert_by_name() {
assert_roundtrip("INSERT INTO x BY NAME SELECT 1 AS y");
}
#[test]
fn test_insert_default_values_returning() {
assert_roundtrip("INSERT INTO t DEFAULT VALUES RETURNING (c1)");
}
#[test]
fn test_union_all_by_name() {
assert_roundtrip("SELECT 1 AS x UNION ALL BY NAME SELECT 2 AS x");
}
#[test]
fn test_minus_as_except() {
// MINUS is Oracle/Redshift syntax for EXCEPT
assert_roundtrip_expected(
"SELECT foo, bar FROM table_1 MINUS SELECT foo, bar FROM table_2",
"SELECT foo, bar FROM table_1 EXCEPT SELECT foo, bar FROM table_2",
);
}
#[test]
fn test_filter_without_where() {
assert_roundtrip_expected(
"SELECT SUM(x) FILTER (x = 1)",
"SELECT SUM(x) FILTER(WHERE x = 1)",
);
}
#[test]
fn test_comment_on_materialized_view() {
assert_roundtrip("COMMENT ON MATERIALIZED VIEW my_view IS 'this'");
}
#[test]
fn test_create_index_concurrently() {
assert_roundtrip("CREATE INDEX CONCURRENTLY idx ON t(c)");
}
#[test]
fn test_create_index_if_not_exists() {
assert_roundtrip("CREATE INDEX IF NOT EXISTS idx ON t(c)");
}
#[test]
fn test_alter_table_partition_hive() {
// Hive: ALTER TABLE x PARTITION(y=z) ADD COLUMN a VARCHAR(10)
assert_roundtrip("ALTER TABLE x PARTITION(y = z) ADD COLUMN a VARCHAR(10)");
}
#[test]
fn test_alter_table_change_column_hive() {
// Hive/MySQL: CHANGE COLUMN old_name new_name data_type
assert_roundtrip("ALTER TABLE x CHANGE COLUMN a a VARCHAR(10)");
}
#[test]
fn test_alter_table_add_columns_hive() {
// Hive/Spark: ADD COLUMNS (col1 TYPE, col2 TYPE)
assert_roundtrip("ALTER TABLE X ADD COLUMNS (y INT, z STRING)");
}
#[test]
fn test_alter_table_add_columns_cascade_hive() {
// Hive/Spark: ADD COLUMNS (col1 TYPE, col2 TYPE) CASCADE
assert_roundtrip("ALTER TABLE X ADD COLUMNS (y INT, z STRING) CASCADE");
}
#[test]
fn test_group_by_with_cube() {
// Hive/MySQL: GROUP BY ... WITH CUBE
let sql = "SELECT key, value FROM T1 GROUP BY key, value WITH CUBE";
let result = Parser::parse_sql(sql).unwrap();
let select = result[0].as_select().unwrap();
if let Some(group_by) = &select.group_by {
// Debug: print the expressions
eprintln!("GROUP BY expressions: {:?}", group_by.expressions);
// Check if there's a Cube expression with empty expressions
let has_cube = group_by.expressions.iter().any(|e| {
if let Expression::Cube(c) = e {
c.expressions.is_empty()
} else {
false
}
});
assert!(
has_cube,
"Should have a Cube expression with empty expressions in GROUP BY"
);
} else {
panic!("Should have GROUP BY clause");
}
}
#[test]
fn test_group_by_with_rollup() {
// Hive/MySQL: GROUP BY ... WITH ROLLUP
let sql = "SELECT key, value FROM T1 GROUP BY key, value WITH ROLLUP";
let result = Parser::parse_sql(sql).unwrap();
let select = result[0].as_select().unwrap();
if let Some(group_by) = &select.group_by {
// Check if there's a Rollup expression with empty expressions
let has_rollup = group_by.expressions.iter().any(|e| {
if let Expression::Rollup(r) = e {
r.expressions.is_empty()
} else {
false
}
});
assert!(
has_rollup,
"Should have a Rollup expression with empty expressions in GROUP BY"
);
} else {
panic!("Should have GROUP BY clause");
}
}
#[test]
fn test_opendatasource_dot_access() {
use crate::dialects::DialectType;
use crate::transpile;
// OPENDATASOURCE(...).Catalog.dbo.Products — 3-part dot access
let sql =
"SELECT * FROM OPENDATASOURCE('SQLNCLI', 'Data Source=remote;').Catalog.dbo.Products";
let result = transpile(sql, DialectType::TSQL, DialectType::TSQL).unwrap();
assert_eq!(result[0], sql);
// 2-part dot access
let sql2 = "SELECT * FROM OPENDATASOURCE('SQLNCLI', 'x').schema1.table1";
let result2 = transpile(sql2, DialectType::TSQL, DialectType::TSQL).unwrap();
assert_eq!(result2[0], sql2);
// 1-part dot access
let sql3 = "SELECT * FROM OPENDATASOURCE('SQLNCLI', 'x').table1";
let result3 = transpile(sql3, DialectType::TSQL, DialectType::TSQL).unwrap();
assert_eq!(result3[0], sql3);
// No dot access (should still work as plain function)
let sql4 = "SELECT * FROM OPENDATASOURCE('SQLNCLI', 'x')";
let result4 = transpile(sql4, DialectType::TSQL, DialectType::TSQL).unwrap();
assert_eq!(result4[0], sql4);
}
#[test]
fn test_exec_output_param() {
use crate::dialects::DialectType;
use crate::transpile;
// OUTPUT parameter
let sql = "EXECUTE sp_CountOrders @region = 'US', @total = @count OUTPUT";
let result = transpile(sql, DialectType::TSQL, DialectType::TSQL);
assert!(
result.is_ok(),
"OUTPUT param should parse: {:?}",
result.err()
);
assert_eq!(result.unwrap()[0], sql);
// WITH RESULT SETS (opaque — stored as Command)
let sql2 = "EXEC sp_GetReport WITH RESULT SETS ((id INT, name NVARCHAR(100)))";
let result2 = Parser::parse_sql(sql2);
assert!(
result2.is_ok(),
"RESULT SETS should parse: {:?}",
result2.err()
);
// Dynamic SQL: EXECUTE (@sql)
let sql3 = "EXECUTE (@sql)";
let result3 = transpile(sql3, DialectType::TSQL, DialectType::TSQL);
assert!(
result3.is_ok(),
"Dynamic SQL should parse: {:?}",
result3.err()
);
}
}
#[cfg(test)]
mod join_marker_tests {
use super::*;
use crate::dialects::DialectType;
#[test]
fn test_oracle_join_marker_simple() {
let sql = "select a.baz from a where a.baz = b.baz (+)";
let result = Parser::parse_sql(sql);
println!("Result: {:?}", result);
assert!(result.is_ok(), "Parse error: {:?}", result.err());
}
#[test]
fn test_oracle_join_marker_with_comma_join_and_aliases() {
let sql = "SELECT e1.x, e2.x FROM e e1, e e2 WHERE e1.y = e2.y (+)";
let result = crate::dialects::Dialect::get(DialectType::Oracle).parse(sql);
println!("Result: {:?}", result);
assert!(result.is_ok(), "Parse error: {:?}", result.err());
}
#[test]
fn test_oracle_xmltable_with_quoted_dot_columns() {
let sql = "SELECT warehouse_name warehouse,\n warehouse2.\"Water\", warehouse2.\"Rail\"\n FROM warehouses,\n XMLTABLE('/Warehouse'\n PASSING warehouses.warehouse_spec\n COLUMNS\n \"Water\" varchar2(6) PATH 'WaterAccess',\n \"Rail\" varchar2(6) PATH 'RailAccess')\n warehouse2";
let result = crate::dialects::Dialect::get(DialectType::Oracle).parse(sql);
println!("Result: {:?}", result);
assert!(result.is_ok(), "Parse error: {:?}", result.err());
}
#[test]
fn test_optimize_table_mysql() {
use crate::dialects::DialectType;
use crate::transpile;
// Multi-statement: TRUNCATE + OPTIMIZE
let sql1 = "TRUNCATE TABLE session_logs";
let r1 = transpile(sql1, DialectType::MySQL, DialectType::MySQL);
assert!(r1.is_ok(), "TRUNCATE should parse: {:?}", r1.err());
let sql2 = "OPTIMIZE TABLE temp_exports";
let r2 = transpile(sql2, DialectType::MySQL, DialectType::MySQL);
assert!(r2.is_ok(), "OPTIMIZE should parse: {:?}", r2.err());
assert_eq!(r2.unwrap()[0], sql2);
}
#[test]
fn test_mysql_index_hints() {
use crate::dialects::DialectType;
use crate::transpile;
// USE INDEX with alias
let sql1 = "SELECT * FROM t e USE INDEX (idx1) WHERE a = 1";
let r1 = transpile(sql1, DialectType::MySQL, DialectType::MySQL);
assert!(r1.is_ok(), "USE INDEX with alias: {:?}", r1.err());
// IGNORE INDEX in JOIN with PRIMARY keyword
let sql2 = "SELECT * FROM t1 JOIN t2 IGNORE INDEX (PRIMARY) ON t1.id = t2.id";
let r2 = transpile(sql2, DialectType::MySQL, DialectType::MySQL);
assert!(r2.is_ok(), "IGNORE INDEX PRIMARY: {:?}", r2.err());
// Full example from issue
let sql3 = "SELECT e.name, d.department_name FROM employees e USE INDEX (idx_dept, idx_salary) JOIN departments d IGNORE INDEX (PRIMARY) ON e.department_id = d.department_id WHERE e.salary > 60000";
let r3 = transpile(sql3, DialectType::MySQL, DialectType::MySQL);
assert!(r3.is_ok(), "Full example: {:?}", r3.err());
}
#[test]
fn test_oracle_quoted_dot_projection() {
let sql = "SELECT warehouse2.\"Water\", warehouse2.\"Rail\" FROM warehouses warehouse2";
let result = crate::dialects::Dialect::get(DialectType::Oracle).parse(sql);
println!("Result: {:?}", result);
assert!(result.is_ok(), "Parse error: {:?}", result.err());
}
#[test]
fn test_oracle_xmltable_columns_only() {
let sql = "SELECT * FROM XMLTABLE('/Warehouse' PASSING warehouses.warehouse_spec COLUMNS \"Water\" varchar2(6) PATH 'WaterAccess', \"Rail\" varchar2(6) PATH 'RailAccess') warehouse2";
let result = crate::dialects::Dialect::get(DialectType::Oracle).parse(sql);
println!("Result: {:?}", result);
assert!(result.is_ok(), "Parse error: {:?}", result.err());
}
#[test]
fn test_spark_limit() {
use crate::dialects::DialectType;
use crate::transpile;
// Spark LIMIT should work
let sql = "SELECT * FROM something LIMIT 100";
let r = transpile(sql, DialectType::Spark, DialectType::Spark);
assert!(r.is_ok(), "Spark LIMIT: {:?}", r.err());
assert_eq!(r.unwrap()[0], sql);
// Hive LIMIT should work
let r2 = transpile(sql, DialectType::Hive, DialectType::Hive);
assert!(r2.is_ok(), "Hive LIMIT: {:?}", r2.err());
}
#[test]
fn test_oracle_projection_alias_then_quoted_dot() {
let sql =
"SELECT warehouse_name warehouse, warehouse2.\"Water\" FROM warehouses warehouse2";
let result = crate::dialects::Dialect::get(DialectType::Oracle).parse(sql);
println!("Result: {:?}", result);
assert!(result.is_ok(), "Parse error: {:?}", result.err());
}
}
#[cfg(test)]
mod clickhouse_parser_regression_tests {
use crate::dialects::DialectType;
#[test]
fn test_clickhouse_select_format_clause_not_alias() {
let sql = "SELECT 1 FORMAT TabSeparated";
let result = crate::dialects::Dialect::get(DialectType::ClickHouse).parse(sql);
assert!(result.is_ok(), "Parse error: {:?}", result.err());
}
#[test]
fn test_clickhouse_projection_select_group_by_parses() {
let sql = "CREATE TABLE t (a String, b String, c UInt64, PROJECTION p1 (SELECT a, sum(c) GROUP BY a, b), PROJECTION p2 (SELECT b, sum(c) GROUP BY b)) ENGINE=MergeTree()";
let result = crate::dialects::Dialect::get(DialectType::ClickHouse).parse(sql);
assert!(result.is_ok(), "Parse error: {:?}", result.err());
}
/// ClickHouse ternary operator AST structure tests.
/// Ported from Python sqlglot: tests/dialects/test_clickhouse.py::test_ternary (lines 765-778).
/// Verifies that `x ? (y ? 1 : 2) : 3` parses into nested IfFunc nodes
/// with the correct AST shape.
#[test]
fn test_clickhouse_ternary_ast_structure() {
use crate::expressions::Expression;
let result = crate::parse_one("x ? (y ? 1 : 2) : 3", DialectType::ClickHouse);
assert!(result.is_ok(), "Parse error: {:?}", result.err());
let ternary = result.unwrap();
// Root should be IfFunc
let if_func = match &ternary {
Expression::IfFunc(f) => f,
other => panic!("Expected IfFunc, got {:?}", std::mem::discriminant(other)),
};
// this (condition) should be Column "x"
assert!(
matches!(&if_func.condition, Expression::Column(_)),
"Expected condition to be Column, got {:?}",
std::mem::discriminant(&if_func.condition)
);
// true branch should be Paren
assert!(
matches!(&if_func.true_value, Expression::Paren(_)),
"Expected true_value to be Paren, got {:?}",
std::mem::discriminant(&if_func.true_value)
);
// false branch should be Literal
let false_value = if_func.false_value.as_ref().expect("Expected false_value");
assert!(
matches!(false_value, Expression::Literal(_)),
"Expected false_value to be Literal, got {:?}",
std::mem::discriminant(false_value)
);
// Inside the Paren, the nested ternary should also be IfFunc
let inner_paren = match &if_func.true_value {
Expression::Paren(p) => p,
_ => unreachable!(),
};
let nested_if = match &inner_paren.this {
Expression::IfFunc(f) => f,
other => panic!(
"Expected nested IfFunc, got {:?}",
std::mem::discriminant(other)
),
};
// Nested condition should be Column "y"
assert!(
matches!(&nested_if.condition, Expression::Column(_)),
"Expected nested condition to be Column, got {:?}",
std::mem::discriminant(&nested_if.condition)
);
// Nested true should be Literal 1
assert!(
matches!(&nested_if.true_value, Expression::Literal(_)),
"Expected nested true_value to be Literal, got {:?}",
std::mem::discriminant(&nested_if.true_value)
);
// Nested false should be Literal 2
let nested_false = nested_if
.false_value
.as_ref()
.expect("Expected nested false_value");
assert!(
matches!(nested_false, Expression::Literal(_)),
"Expected nested false_value to be Literal, got {:?}",
std::mem::discriminant(nested_false)
);
}
/// Verify that `a AND b ? 1 : 2` has And as the ternary condition
/// (AND binds tighter than ?).
/// Ported from Python sqlglot: test_clickhouse.py line 778.
#[test]
fn test_clickhouse_ternary_and_precedence() {
use crate::expressions::Expression;
let result = crate::parse_one("a and b ? 1 : 2", DialectType::ClickHouse);
assert!(result.is_ok(), "Parse error: {:?}", result.err());
let ternary = result.unwrap();
let if_func = match &ternary {
Expression::IfFunc(f) => f,
other => panic!("Expected IfFunc, got {:?}", std::mem::discriminant(other)),
};
// The condition should be And (not just Column "b")
assert!(
matches!(&if_func.condition, Expression::And(_)),
"Expected condition to be And, got {:?}",
std::mem::discriminant(&if_func.condition)
);
}
#[test]
fn test_parse_interval_bare_number_duckdb() {
use crate::dialects::{Dialect, DialectType};
let sql = "SELECT CAST('2018-01-01 00:00:00' AS DATE) + INTERVAL 3 DAY";
let d = Dialect::get(DialectType::DuckDB);
match d.parse(sql) {
Ok(result) => {
assert!(!result.is_empty(), "Should parse to at least one statement");
// Test transpilation to DuckDB target - should normalize number to quoted string
let output_duckdb = d.transpile(sql, DialectType::DuckDB).unwrap();
assert_eq!(
output_duckdb[0],
"SELECT CAST('2018-01-01 00:00:00' AS DATE) + INTERVAL '3' DAY",
"DuckDB output should have quoted interval value"
);
// Test transpilation to Hive target
let output_hive = d.transpile(sql, DialectType::Hive).unwrap();
assert_eq!(
output_hive[0], "SELECT CAST('2018-01-01 00:00:00' AS DATE) + INTERVAL '3' DAY",
"Hive output should have quoted interval value"
);
}
Err(e) => panic!("Failed to parse DuckDB INTERVAL 3 DAY: {}", e),
}
}
}