vibesql_ast/
select.rs

1//! SELECT statement types
2//!
3//! This module contains all types related to SELECT queries including
4//! SELECT items, FROM clauses, JOINs, and ORDER BY.
5
6use crate::Expression;
7
8// ============================================================================
9// Common Table Expressions (CTEs)
10// ============================================================================
11
12/// CTE materialization hint for optimizer control
13///
14/// Controls whether the CTE should be materialized (computed once and stored)
15/// or inlined (substituted into each reference like a view).
16#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
17pub enum CteMaterialization {
18    /// No hint provided - optimizer decides (default)
19    #[default]
20    Default,
21    /// Force materialization - compute once and store result
22    Materialized,
23    /// Force inlining - substitute CTE into each reference
24    NotMaterialized,
25}
26
27/// Common Table Expression (CTE) definition
28///
29/// CTEs are temporary named result sets defined with the WITH clause that exist
30/// only for the duration of a single query.
31///
32/// Example: `WITH regional_sales AS (SELECT region, SUM(amount) FROM orders GROUP BY region)`
33///
34/// Recursive CTEs use UNION ALL to reference themselves:
35/// Example: `WITH RECURSIVE counter(x) AS (VALUES(0) UNION ALL SELECT x+1 FROM counter WHERE x<10)`
36#[derive(Debug, Clone, PartialEq)]
37pub struct CommonTableExpr {
38    /// Name of the CTE
39    pub name: String,
40    /// Optional column name list (e.g., `WITH cte (col1, col2) AS (...)`)
41    pub columns: Option<Vec<String>>,
42    /// The query defining the CTE
43    pub query: Box<SelectStmt>,
44    /// Whether this is a RECURSIVE CTE (SQLite/SQL:1999)
45    /// Recursive CTEs must use UNION ALL and may reference themselves in the recursive term
46    pub recursive: bool,
47    /// Materialization hint for optimizer control (AS MATERIALIZED / AS NOT MATERIALIZED)
48    pub materialization: CteMaterialization,
49}
50
51// ============================================================================
52// SELECT Statement
53// ============================================================================
54
55/// SELECT statement structure
56///
57/// This can represent either a traditional SELECT statement or a standalone VALUES clause.
58/// When `values` is Some, this represents a VALUES clause (e.g., `VALUES(1),(2),(3)`).
59/// When `values` is None, this is a traditional SELECT statement.
60#[derive(Debug, Clone, PartialEq)]
61pub struct SelectStmt {
62    /// Optional WITH clause containing CTEs
63    pub with_clause: Option<Vec<CommonTableExpr>>,
64    pub distinct: bool,
65    pub select_list: Vec<SelectItem>,
66    /// Optional INTO clause for DDL SELECT INTO statements (SQL:1999 Feature E111)
67    /// Creates a new table from the query results
68    pub into_table: Option<String>,
69    /// Optional INTO clause for procedural SELECT INTO statements
70    /// Stores query results into procedural variables (e.g., SELECT col INTO @var)
71    pub into_variables: Option<Vec<String>>,
72    pub from: Option<FromClause>,
73    pub where_clause: Option<Expression>,
74    pub group_by: Option<GroupByClause>,
75    pub having: Option<Expression>,
76    pub order_by: Option<Vec<OrderByItem>>,
77    pub limit: Option<Expression>,
78    pub offset: Option<Expression>,
79    /// Set operation (UNION, INTERSECT, EXCEPT) combining this query with another
80    pub set_operation: Option<SetOperation>,
81    /// VALUES clause for standalone VALUES statements (SQL:1999)
82    /// When this is Some, the statement represents a VALUES clause like `VALUES(1),(2),(3)`
83    /// Each inner Vec is a row of expressions
84    pub values: Option<Vec<Vec<Expression>>>,
85}
86
87// ============================================================================
88// GROUP BY Clause (with ROLLUP, CUBE, GROUPING SETS support)
89// ============================================================================
90
91/// GROUP BY clause structure supporting OLAP extensions
92///
93/// SQL:1999 OLAP extensions allow multi-dimensional aggregation:
94/// - Simple: `GROUP BY a, b`
95/// - ROLLUP: `GROUP BY ROLLUP(a, b)` - hierarchical subtotals
96/// - CUBE: `GROUP BY CUBE(a, b)` - all dimension combinations
97/// - GROUPING SETS: `GROUP BY GROUPING SETS((a, b), (a), ())` - explicit groupings
98/// - Mixed: `GROUP BY a, ROLLUP(b, c)` - combination of simple and OLAP
99#[derive(Debug, Clone, PartialEq)]
100pub enum GroupByClause {
101    /// Simple GROUP BY with list of expressions
102    /// Example: `GROUP BY a, b, c`
103    Simple(Vec<Expression>),
104
105    /// ROLLUP creates subtotals that roll up from the most detailed level
106    /// to a grand total, following the order of columns specified.
107    ///
108    /// Example: `GROUP BY ROLLUP(d_year, i_category)`
109    /// Equivalent to: `GROUPING SETS ((d_year, i_category), (d_year), ())`
110    Rollup(Vec<GroupingElement>),
111
112    /// CUBE creates subtotals for all combinations of dimensions.
113    ///
114    /// Example: `GROUP BY CUBE(a, b)`
115    /// Equivalent to: `GROUPING SETS ((a, b), (a), (b), ())`
116    Cube(Vec<GroupingElement>),
117
118    /// GROUPING SETS explicitly specifies which groupings to compute.
119    ///
120    /// Example: `GROUP BY GROUPING SETS ((d_year, d_moy), (d_year), ())`
121    GroupingSets(Vec<GroupingSet>),
122
123    /// Mixed GROUP BY combining simple expressions with ROLLUP/CUBE/GROUPING SETS
124    ///
125    /// Example: `GROUP BY region, ROLLUP(year, quarter)`
126    /// The simple expressions (region) appear in ALL generated grouping sets,
127    /// while ROLLUP/CUBE/GROUPING SETS columns are expanded normally.
128    ///
129    /// `GROUP BY a, ROLLUP(b, c)` is equivalent to:
130    /// `GROUPING SETS ((a, b, c), (a, b), (a))`
131    ///
132    /// Multiple ROLLUP/CUBE create a cross-product of their expansions.
133    Mixed(Vec<MixedGroupingItem>),
134}
135
136/// A single grouping element within ROLLUP or CUBE
137///
138/// Can be a single expression or a composite (multiple expressions treated as one unit)
139#[derive(Debug, Clone, PartialEq)]
140pub enum GroupingElement {
141    /// Single expression: `a` in `ROLLUP(a, b)`
142    Single(Expression),
143
144    /// Composite: `(a, b)` in `ROLLUP((a, b), c)` - treated as one grouping unit
145    Composite(Vec<Expression>),
146}
147
148/// A single grouping set within GROUPING SETS
149///
150/// Example: `(a, b)` or `()` (empty for grand total) in `GROUPING SETS ((a, b), ())`
151#[derive(Debug, Clone, PartialEq)]
152pub struct GroupingSet {
153    pub columns: Vec<Expression>,
154}
155
156/// An item in a mixed GROUP BY clause
157///
158/// Can be a simple expression or a ROLLUP/CUBE/GROUPING SETS construct
159#[derive(Debug, Clone, PartialEq)]
160pub enum MixedGroupingItem {
161    /// Simple expression that appears in all grouping sets
162    /// Example: `a` in `GROUP BY a, ROLLUP(b, c)`
163    Simple(Expression),
164
165    /// ROLLUP construct
166    /// Example: `ROLLUP(b, c)` in `GROUP BY a, ROLLUP(b, c)`
167    Rollup(Vec<GroupingElement>),
168
169    /// CUBE construct
170    /// Example: `CUBE(b, c)` in `GROUP BY a, CUBE(b, c)`
171    Cube(Vec<GroupingElement>),
172
173    /// GROUPING SETS construct
174    /// Example: `GROUPING SETS((b), (c))` in `GROUP BY a, GROUPING SETS((b), (c))`
175    GroupingSets(Vec<GroupingSet>),
176}
177
178impl GroupByClause {
179    /// Get all expressions in the GROUP BY clause (flattened)
180    ///
181    /// This returns all expressions, regardless of ROLLUP/CUBE/GROUPING SETS structure.
182    /// Useful for validation and simple GROUP BY processing that doesn't need
183    /// the multi-grouping-set semantics.
184    pub fn all_expressions(&self) -> Vec<&Expression> {
185        match self {
186            GroupByClause::Simple(exprs) => exprs.iter().collect(),
187            GroupByClause::Rollup(elements) | GroupByClause::Cube(elements) => elements
188                .iter()
189                .flat_map(|e| match e {
190                    GroupingElement::Single(expr) => vec![expr],
191                    GroupingElement::Composite(exprs) => exprs.iter().collect(),
192                })
193                .collect(),
194            GroupByClause::GroupingSets(sets) => {
195                sets.iter().flat_map(|s| s.columns.iter()).collect()
196            }
197            GroupByClause::Mixed(items) => items
198                .iter()
199                .flat_map(|item| match item {
200                    MixedGroupingItem::Simple(expr) => vec![expr],
201                    MixedGroupingItem::Rollup(elements) | MixedGroupingItem::Cube(elements) => {
202                        elements
203                            .iter()
204                            .flat_map(|e| match e {
205                                GroupingElement::Single(expr) => vec![expr],
206                                GroupingElement::Composite(exprs) => exprs.iter().collect(),
207                            })
208                            .collect()
209                    }
210                    MixedGroupingItem::GroupingSets(sets) => {
211                        sets.iter().flat_map(|s| s.columns.iter()).collect()
212                    }
213                })
214                .collect(),
215        }
216    }
217
218    /// Get the number of expressions (flattened)
219    pub fn len(&self) -> usize {
220        self.all_expressions().len()
221    }
222
223    /// Check if the GROUP BY clause is empty
224    pub fn is_empty(&self) -> bool {
225        self.len() == 0
226    }
227
228    /// Check if this is a simple GROUP BY (not ROLLUP/CUBE/GROUPING SETS)
229    pub fn is_simple(&self) -> bool {
230        matches!(self, GroupByClause::Simple(_))
231    }
232
233    /// Get the simple expressions if this is a simple GROUP BY
234    pub fn as_simple(&self) -> Option<&Vec<Expression>> {
235        match self {
236            GroupByClause::Simple(exprs) => Some(exprs),
237            _ => None,
238        }
239    }
240}
241
242/// Set operation combining two SELECT statements
243#[derive(Debug, Clone, PartialEq)]
244pub struct SetOperation {
245    pub op: SetOperator,
246    pub all: bool, // true = ALL, false = DISTINCT (default)
247    pub right: Box<SelectStmt>,
248}
249
250/// Item in the SELECT list
251#[derive(Debug, Clone, PartialEq)]
252pub enum SelectItem {
253    /// SELECT * [AS (col1, col2, ...)]
254    /// SQL:1999 Feature E051-07: Derived column lists
255    Wildcard { alias: Option<Vec<String>> },
256    /// SELECT table.* [AS (col1, col2, ...)] or SELECT alias.* [AS (col1, col2, ...)]
257    /// SQL:1999 Feature E051-08: Correlation names in FROM clause with derived column lists
258    QualifiedWildcard { qualifier: String, alias: Option<Vec<String>> },
259    /// SELECT expr [AS alias]
260    /// The `source_text` field stores the original SQL text of the expression for column naming.
261    /// SQLite uses the original expression text (not AST-reconstructed) as the column name
262    /// when no alias is provided.
263    Expression { expr: Expression, alias: Option<String>, source_text: Option<String> },
264}
265
266/// FROM clause
267#[derive(Debug, Clone, PartialEq)]
268pub enum FromClause {
269    Table {
270        /// Table name (possibly qualified as schema.table)
271        name: String,
272        alias: Option<String>,
273        /// SQL:1999 Feature E051-09: Optional column renaming in table alias
274        /// Example: `FROM t AS myalias (x, y)` renames columns to x, y
275        column_aliases: Option<Vec<String>>,
276        /// Whether the identifier was quoted (delimited) in the original SQL.
277        /// - `true`: Quoted identifier (case-sensitive), e.g., `"MyTable"`
278        /// - `false`: Unquoted identifier (case-insensitive), e.g., `MyTable`
279        quoted: bool,
280    },
281    Join {
282        left: Box<FromClause>,
283        right: Box<FromClause>,
284        join_type: JoinType,
285        condition: Option<Expression>,
286        /// USING clause columns - joins on these named columns from both tables
287        /// Example: `JOIN t2 USING (id, name)` joins on id and name columns
288        using_columns: Option<Vec<String>>,
289        /// True for NATURAL JOIN (joins on common column names)
290        natural: bool,
291        /// Optional alias for parenthesized join expressions
292        /// Example: `FROM t1 JOIN (t2 JOIN t3 USING(id)) AS j1 ON j1.id=t1.id`
293        alias: Option<String>,
294    },
295    /// Subquery in FROM clause (derived table)
296    /// SQL:1999 requires AS alias for derived tables
297    /// Example: FROM (SELECT * FROM users WHERE active = TRUE) AS active_users
298    /// SQL:1999 Feature E051-09: Optional column renaming
299    /// Example: FROM (SELECT a, b FROM t) AS mytemp (x, y)
300    Subquery {
301        query: Box<SelectStmt>,
302        alias: String,
303        /// Optional column renaming for derived table columns
304        column_aliases: Option<Vec<String>>,
305    },
306    /// VALUES clause as table constructor (SQL:1999)
307    /// Example: FROM (VALUES(1,'a'), (2,'b')) AS t(x, y)
308    /// Example: WITH t AS (VALUES(1),(2),(3)) SELECT * FROM t
309    Values {
310        /// Each inner Vec is a row, each Expression is a column value
311        rows: Vec<Vec<Expression>>,
312        /// Alias for the VALUES table (required)
313        alias: String,
314        /// Optional column renaming
315        column_aliases: Option<Vec<String>>,
316    },
317}
318
319/// JOIN types
320#[derive(Debug, Clone, PartialEq)]
321pub enum JoinType {
322    Inner,
323    LeftOuter,
324    RightOuter,
325    FullOuter,
326    Cross,
327    /// Semi-join: Returns left rows that have a match in right (no duplicates)
328    Semi,
329    /// Anti-join: Returns left rows that have NO match in right
330    Anti,
331}
332
333/// ORDER BY item
334#[derive(Debug, Clone, PartialEq)]
335pub struct OrderByItem {
336    pub expr: Expression,
337    pub direction: OrderDirection,
338    /// NULL ordering for ORDER BY (SQL:2003 extension)
339    /// When None, uses default: NULLS LAST for ASC, NULLS FIRST for DESC
340    pub nulls_order: Option<NullsOrder>,
341}
342
343/// Sort direction
344#[derive(Debug, Clone, PartialEq)]
345pub enum OrderDirection {
346    Asc,
347    Desc,
348}
349
350/// NULL ordering in ORDER BY clause (SQL:2003 extension)
351/// Specifies whether NULL values sort before or after non-NULL values
352#[derive(Debug, Clone, Copy, PartialEq)]
353pub enum NullsOrder {
354    /// NULL values sort before all non-NULL values
355    First,
356    /// NULL values sort after all non-NULL values
357    Last,
358}
359
360/// Set operators for combining SELECT statements
361#[derive(Debug, Clone, PartialEq)]
362pub enum SetOperator {
363    /// UNION - combines results from two queries, removing duplicates (unless ALL specified)
364    Union,
365    /// INTERSECT - returns only rows that appear in both queries
366    Intersect,
367    /// EXCEPT - returns rows from left query that don't appear in right query (SQL standard)
368    Except,
369}