vibesql_ast/select.rs
1//! SELECT statement types
2//!
3//! This module contains all types related to SELECT queries including
4//! SELECT items, FROM clauses, JOINs, and ORDER BY.
5
6use crate::Expression;
7
8// ============================================================================
9// Common Table Expressions (CTEs)
10// ============================================================================
11
12/// CTE materialization hint for optimizer control
13///
14/// Controls whether the CTE should be materialized (computed once and stored)
15/// or inlined (substituted into each reference like a view).
16#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
17pub enum CteMaterialization {
18 /// No hint provided - optimizer decides (default)
19 #[default]
20 Default,
21 /// Force materialization - compute once and store result
22 Materialized,
23 /// Force inlining - substitute CTE into each reference
24 NotMaterialized,
25}
26
27/// Common Table Expression (CTE) definition
28///
29/// CTEs are temporary named result sets defined with the WITH clause that exist
30/// only for the duration of a single query.
31///
32/// Example: `WITH regional_sales AS (SELECT region, SUM(amount) FROM orders GROUP BY region)`
33///
34/// Recursive CTEs use UNION ALL to reference themselves:
35/// Example: `WITH RECURSIVE counter(x) AS (VALUES(0) UNION ALL SELECT x+1 FROM counter WHERE x<10)`
36#[derive(Debug, Clone, PartialEq)]
37pub struct CommonTableExpr {
38 /// Name of the CTE
39 pub name: String,
40 /// Optional column name list (e.g., `WITH cte (col1, col2) AS (...)`)
41 pub columns: Option<Vec<String>>,
42 /// The query defining the CTE
43 pub query: Box<SelectStmt>,
44 /// Whether this is a RECURSIVE CTE (SQLite/SQL:1999)
45 /// Recursive CTEs must use UNION ALL and may reference themselves in the recursive term
46 pub recursive: bool,
47 /// Materialization hint for optimizer control (AS MATERIALIZED / AS NOT MATERIALIZED)
48 pub materialization: CteMaterialization,
49}
50
51// ============================================================================
52// SELECT Statement
53// ============================================================================
54
55/// SELECT statement structure
56///
57/// This can represent either a traditional SELECT statement or a standalone VALUES clause.
58/// When `values` is Some, this represents a VALUES clause (e.g., `VALUES(1),(2),(3)`).
59/// When `values` is None, this is a traditional SELECT statement.
60#[derive(Debug, Clone, PartialEq)]
61pub struct SelectStmt {
62 /// Optional WITH clause containing CTEs
63 pub with_clause: Option<Vec<CommonTableExpr>>,
64 pub distinct: bool,
65 pub select_list: Vec<SelectItem>,
66 /// Optional INTO clause for DDL SELECT INTO statements (SQL:1999 Feature E111)
67 /// Creates a new table from the query results
68 pub into_table: Option<String>,
69 /// Optional INTO clause for procedural SELECT INTO statements
70 /// Stores query results into procedural variables (e.g., SELECT col INTO @var)
71 pub into_variables: Option<Vec<String>>,
72 pub from: Option<FromClause>,
73 pub where_clause: Option<Expression>,
74 pub group_by: Option<GroupByClause>,
75 pub having: Option<Expression>,
76 pub order_by: Option<Vec<OrderByItem>>,
77 pub limit: Option<Expression>,
78 pub offset: Option<Expression>,
79 /// Set operation (UNION, INTERSECT, EXCEPT) combining this query with another
80 pub set_operation: Option<SetOperation>,
81 /// VALUES clause for standalone VALUES statements (SQL:1999)
82 /// When this is Some, the statement represents a VALUES clause like `VALUES(1),(2),(3)`
83 /// Each inner Vec is a row of expressions
84 pub values: Option<Vec<Vec<Expression>>>,
85}
86
87// ============================================================================
88// GROUP BY Clause (with ROLLUP, CUBE, GROUPING SETS support)
89// ============================================================================
90
91/// GROUP BY clause structure supporting OLAP extensions
92///
93/// SQL:1999 OLAP extensions allow multi-dimensional aggregation:
94/// - Simple: `GROUP BY a, b`
95/// - ROLLUP: `GROUP BY ROLLUP(a, b)` - hierarchical subtotals
96/// - CUBE: `GROUP BY CUBE(a, b)` - all dimension combinations
97/// - GROUPING SETS: `GROUP BY GROUPING SETS((a, b), (a), ())` - explicit groupings
98/// - Mixed: `GROUP BY a, ROLLUP(b, c)` - combination of simple and OLAP
99#[derive(Debug, Clone, PartialEq)]
100pub enum GroupByClause {
101 /// Simple GROUP BY with list of expressions
102 /// Example: `GROUP BY a, b, c`
103 Simple(Vec<Expression>),
104
105 /// ROLLUP creates subtotals that roll up from the most detailed level
106 /// to a grand total, following the order of columns specified.
107 ///
108 /// Example: `GROUP BY ROLLUP(d_year, i_category)`
109 /// Equivalent to: `GROUPING SETS ((d_year, i_category), (d_year), ())`
110 Rollup(Vec<GroupingElement>),
111
112 /// CUBE creates subtotals for all combinations of dimensions.
113 ///
114 /// Example: `GROUP BY CUBE(a, b)`
115 /// Equivalent to: `GROUPING SETS ((a, b), (a), (b), ())`
116 Cube(Vec<GroupingElement>),
117
118 /// GROUPING SETS explicitly specifies which groupings to compute.
119 ///
120 /// Example: `GROUP BY GROUPING SETS ((d_year, d_moy), (d_year), ())`
121 GroupingSets(Vec<GroupingSet>),
122
123 /// Mixed GROUP BY combining simple expressions with ROLLUP/CUBE/GROUPING SETS
124 ///
125 /// Example: `GROUP BY region, ROLLUP(year, quarter)`
126 /// The simple expressions (region) appear in ALL generated grouping sets,
127 /// while ROLLUP/CUBE/GROUPING SETS columns are expanded normally.
128 ///
129 /// `GROUP BY a, ROLLUP(b, c)` is equivalent to:
130 /// `GROUPING SETS ((a, b, c), (a, b), (a))`
131 ///
132 /// Multiple ROLLUP/CUBE create a cross-product of their expansions.
133 Mixed(Vec<MixedGroupingItem>),
134}
135
136/// A single grouping element within ROLLUP or CUBE
137///
138/// Can be a single expression or a composite (multiple expressions treated as one unit)
139#[derive(Debug, Clone, PartialEq)]
140pub enum GroupingElement {
141 /// Single expression: `a` in `ROLLUP(a, b)`
142 Single(Expression),
143
144 /// Composite: `(a, b)` in `ROLLUP((a, b), c)` - treated as one grouping unit
145 Composite(Vec<Expression>),
146}
147
148/// A single grouping set within GROUPING SETS
149///
150/// Example: `(a, b)` or `()` (empty for grand total) in `GROUPING SETS ((a, b), ())`
151#[derive(Debug, Clone, PartialEq)]
152pub struct GroupingSet {
153 pub columns: Vec<Expression>,
154}
155
156/// An item in a mixed GROUP BY clause
157///
158/// Can be a simple expression or a ROLLUP/CUBE/GROUPING SETS construct
159#[derive(Debug, Clone, PartialEq)]
160pub enum MixedGroupingItem {
161 /// Simple expression that appears in all grouping sets
162 /// Example: `a` in `GROUP BY a, ROLLUP(b, c)`
163 Simple(Expression),
164
165 /// ROLLUP construct
166 /// Example: `ROLLUP(b, c)` in `GROUP BY a, ROLLUP(b, c)`
167 Rollup(Vec<GroupingElement>),
168
169 /// CUBE construct
170 /// Example: `CUBE(b, c)` in `GROUP BY a, CUBE(b, c)`
171 Cube(Vec<GroupingElement>),
172
173 /// GROUPING SETS construct
174 /// Example: `GROUPING SETS((b), (c))` in `GROUP BY a, GROUPING SETS((b), (c))`
175 GroupingSets(Vec<GroupingSet>),
176}
177
178impl GroupByClause {
179 /// Get all expressions in the GROUP BY clause (flattened)
180 ///
181 /// This returns all expressions, regardless of ROLLUP/CUBE/GROUPING SETS structure.
182 /// Useful for validation and simple GROUP BY processing that doesn't need
183 /// the multi-grouping-set semantics.
184 pub fn all_expressions(&self) -> Vec<&Expression> {
185 match self {
186 GroupByClause::Simple(exprs) => exprs.iter().collect(),
187 GroupByClause::Rollup(elements) | GroupByClause::Cube(elements) => elements
188 .iter()
189 .flat_map(|e| match e {
190 GroupingElement::Single(expr) => vec![expr],
191 GroupingElement::Composite(exprs) => exprs.iter().collect(),
192 })
193 .collect(),
194 GroupByClause::GroupingSets(sets) => {
195 sets.iter().flat_map(|s| s.columns.iter()).collect()
196 }
197 GroupByClause::Mixed(items) => items
198 .iter()
199 .flat_map(|item| match item {
200 MixedGroupingItem::Simple(expr) => vec![expr],
201 MixedGroupingItem::Rollup(elements) | MixedGroupingItem::Cube(elements) => {
202 elements
203 .iter()
204 .flat_map(|e| match e {
205 GroupingElement::Single(expr) => vec![expr],
206 GroupingElement::Composite(exprs) => exprs.iter().collect(),
207 })
208 .collect()
209 }
210 MixedGroupingItem::GroupingSets(sets) => {
211 sets.iter().flat_map(|s| s.columns.iter()).collect()
212 }
213 })
214 .collect(),
215 }
216 }
217
218 /// Get the number of expressions (flattened)
219 pub fn len(&self) -> usize {
220 self.all_expressions().len()
221 }
222
223 /// Check if the GROUP BY clause is empty
224 pub fn is_empty(&self) -> bool {
225 self.len() == 0
226 }
227
228 /// Check if this is a simple GROUP BY (not ROLLUP/CUBE/GROUPING SETS)
229 pub fn is_simple(&self) -> bool {
230 matches!(self, GroupByClause::Simple(_))
231 }
232
233 /// Get the simple expressions if this is a simple GROUP BY
234 pub fn as_simple(&self) -> Option<&Vec<Expression>> {
235 match self {
236 GroupByClause::Simple(exprs) => Some(exprs),
237 _ => None,
238 }
239 }
240}
241
242/// Set operation combining two SELECT statements
243#[derive(Debug, Clone, PartialEq)]
244pub struct SetOperation {
245 pub op: SetOperator,
246 pub all: bool, // true = ALL, false = DISTINCT (default)
247 pub right: Box<SelectStmt>,
248}
249
250/// Item in the SELECT list
251#[derive(Debug, Clone, PartialEq)]
252pub enum SelectItem {
253 /// SELECT * [AS (col1, col2, ...)]
254 /// SQL:1999 Feature E051-07: Derived column lists
255 Wildcard { alias: Option<Vec<String>> },
256 /// SELECT table.* [AS (col1, col2, ...)] or SELECT alias.* [AS (col1, col2, ...)]
257 /// SQL:1999 Feature E051-08: Correlation names in FROM clause with derived column lists
258 QualifiedWildcard { qualifier: String, alias: Option<Vec<String>> },
259 /// SELECT expr [AS alias]
260 /// The `source_text` field stores the original SQL text of the expression for column naming.
261 /// SQLite uses the original expression text (not AST-reconstructed) as the column name
262 /// when no alias is provided.
263 Expression { expr: Expression, alias: Option<String>, source_text: Option<String> },
264}
265
266/// FROM clause
267#[derive(Debug, Clone, PartialEq)]
268pub enum FromClause {
269 Table {
270 /// Table name (possibly qualified as schema.table)
271 name: String,
272 alias: Option<String>,
273 /// SQL:1999 Feature E051-09: Optional column renaming in table alias
274 /// Example: `FROM t AS myalias (x, y)` renames columns to x, y
275 column_aliases: Option<Vec<String>>,
276 /// Whether the identifier was quoted (delimited) in the original SQL.
277 /// - `true`: Quoted identifier (case-sensitive), e.g., `"MyTable"`
278 /// - `false`: Unquoted identifier (case-insensitive), e.g., `MyTable`
279 quoted: bool,
280 },
281 Join {
282 left: Box<FromClause>,
283 right: Box<FromClause>,
284 join_type: JoinType,
285 condition: Option<Expression>,
286 /// USING clause columns - joins on these named columns from both tables
287 /// Example: `JOIN t2 USING (id, name)` joins on id and name columns
288 using_columns: Option<Vec<String>>,
289 /// True for NATURAL JOIN (joins on common column names)
290 natural: bool,
291 /// Optional alias for parenthesized join expressions
292 /// Example: `FROM t1 JOIN (t2 JOIN t3 USING(id)) AS j1 ON j1.id=t1.id`
293 alias: Option<String>,
294 },
295 /// Subquery in FROM clause (derived table)
296 /// SQL:1999 requires AS alias for derived tables
297 /// Example: FROM (SELECT * FROM users WHERE active = TRUE) AS active_users
298 /// SQL:1999 Feature E051-09: Optional column renaming
299 /// Example: FROM (SELECT a, b FROM t) AS mytemp (x, y)
300 Subquery {
301 query: Box<SelectStmt>,
302 alias: String,
303 /// Optional column renaming for derived table columns
304 column_aliases: Option<Vec<String>>,
305 },
306 /// VALUES clause as table constructor (SQL:1999)
307 /// Example: FROM (VALUES(1,'a'), (2,'b')) AS t(x, y)
308 /// Example: WITH t AS (VALUES(1),(2),(3)) SELECT * FROM t
309 Values {
310 /// Each inner Vec is a row, each Expression is a column value
311 rows: Vec<Vec<Expression>>,
312 /// Alias for the VALUES table (required)
313 alias: String,
314 /// Optional column renaming
315 column_aliases: Option<Vec<String>>,
316 },
317}
318
319/// JOIN types
320#[derive(Debug, Clone, PartialEq)]
321pub enum JoinType {
322 Inner,
323 LeftOuter,
324 RightOuter,
325 FullOuter,
326 Cross,
327 /// Semi-join: Returns left rows that have a match in right (no duplicates)
328 Semi,
329 /// Anti-join: Returns left rows that have NO match in right
330 Anti,
331}
332
333/// ORDER BY item
334#[derive(Debug, Clone, PartialEq)]
335pub struct OrderByItem {
336 pub expr: Expression,
337 pub direction: OrderDirection,
338 /// NULL ordering for ORDER BY (SQL:2003 extension)
339 /// When None, uses default: NULLS LAST for ASC, NULLS FIRST for DESC
340 pub nulls_order: Option<NullsOrder>,
341}
342
343/// Sort direction
344#[derive(Debug, Clone, PartialEq)]
345pub enum OrderDirection {
346 Asc,
347 Desc,
348}
349
350/// NULL ordering in ORDER BY clause (SQL:2003 extension)
351/// Specifies whether NULL values sort before or after non-NULL values
352#[derive(Debug, Clone, Copy, PartialEq)]
353pub enum NullsOrder {
354 /// NULL values sort before all non-NULL values
355 First,
356 /// NULL values sort after all non-NULL values
357 Last,
358}
359
360/// Set operators for combining SELECT statements
361#[derive(Debug, Clone, PartialEq)]
362pub enum SetOperator {
363 /// UNION - combines results from two queries, removing duplicates (unless ALL specified)
364 Union,
365 /// INTERSECT - returns only rows that appear in both queries
366 Intersect,
367 /// EXCEPT - returns rows from left query that don't appear in right query (SQL standard)
368 Except,
369}