vibesql_ast/expression.rs
1//! SQL Expression types for use in SELECT, WHERE, and other clauses
2
3use vibesql_types::SqlValue;
4
5use crate::identifier::{ColumnIdentifier, FunctionIdentifier};
6use crate::{BinaryOperator, OrderByItem, SelectStmt, UnaryOperator};
7
8/// SQL Expression (can appear in SELECT, WHERE, etc.)
9#[derive(Debug, Clone, PartialEq)]
10pub enum Expression {
11 /// Literal value (42, 'hello', TRUE, NULL)
12 Literal(SqlValue),
13
14 /// Parameter placeholder (?) for prepared statements
15 /// The index is 0-based and assigned in order of appearance in the SQL
16 /// Example: WHERE id = ? AND name = ? -> Placeholder(0), Placeholder(1)
17 Placeholder(usize),
18
19 /// Numbered parameter placeholder ($1, $2, etc.) for prepared statements
20 /// PostgreSQL-style: 1-indexed as written in SQL
21 /// Example: WHERE id = $1 AND name = $2
22 /// The number is the explicit position (1-indexed) of the parameter
23 NumberedPlaceholder(usize),
24
25 /// Named parameter placeholder (:name) for prepared statements
26 /// Used by many ORMs and applications for readability
27 /// Example: WHERE id = :user_id AND name = :name
28 NamedPlaceholder(String),
29
30 /// Column reference (id, users.id, schema.table.column)
31 ///
32 /// Uses `ColumnIdentifier` for proper SQL:1999 case sensitivity handling:
33 /// - Unquoted identifiers are case-insensitive (fold to lowercase)
34 /// - Quoted identifiers are case-sensitive (preserve exact case)
35 ColumnRef(ColumnIdentifier),
36
37 /// Binary operation (a + b, x = y, etc.)
38 /// Note: AND/OR chains should use Conjunction/Disjunction for efficiency
39 BinaryOp {
40 op: BinaryOperator,
41 left: Box<Expression>,
42 right: Box<Expression>,
43 },
44
45 /// Flattened conjunction (AND chain): a AND b AND c AND ...
46 /// Stored as a flat vector for O(1) depth traversal and better cache locality.
47 /// Always contains 2+ children (single predicates remain as-is).
48 Conjunction(Vec<Expression>),
49
50 /// Flattened disjunction (OR chain): a OR b OR c OR ...
51 /// Stored as a flat vector for O(1) depth traversal and better cache locality.
52 /// Always contains 2+ children (single predicates remain as-is).
53 Disjunction(Vec<Expression>),
54
55 /// Unary operation (NOT x, -5)
56 UnaryOp {
57 op: UnaryOperator,
58 expr: Box<Expression>,
59 },
60
61 /// Function call (UPPER(x), SUBSTRING(x, 1, 3))
62 ///
63 /// Uses `FunctionIdentifier` for proper case handling:
64 /// - Canonical (lowercase) for case-insensitive comparison
65 /// - Display (original case) for error messages
66 Function {
67 name: FunctionIdentifier,
68 args: Vec<Expression>,
69 character_unit: Option<CharacterUnit>,
70 },
71
72 /// Aggregate function call (COUNT, SUM, AVG, MIN, MAX)
73 /// SQL:1999 Section 6.16: Set functions
74 /// SQL:2003 ordered set functions with ORDER BY clause
75 /// SQL:2003 FILTER clause for conditional aggregation
76 /// Example: COUNT(DISTINCT customer_id), SUM(ALL amount)
77 /// Example: GROUP_CONCAT(name ORDER BY name ASC)
78 /// Example: COUNT(*) FILTER (WHERE x > 0)
79 ///
80 /// Uses `FunctionIdentifier` for proper case handling:
81 /// - Canonical (lowercase) for case-insensitive comparison
82 /// - Display (original case) for error messages
83 AggregateFunction {
84 name: FunctionIdentifier,
85 distinct: bool, // true = DISTINCT, false = ALL (implicit)
86 args: Vec<Expression>,
87 /// Optional ORDER BY clause within the aggregate function
88 /// Example: GROUP_CONCAT(a ORDER BY b DESC)
89 order_by: Option<Vec<OrderByItem>>,
90 /// Optional FILTER clause for conditional aggregation (SQL:2003)
91 /// Example: COUNT(*) FILTER (WHERE x > 0)
92 filter: Option<Box<Expression>>,
93 },
94
95 /// IS NULL / IS NOT NULL
96 IsNull {
97 expr: Box<Expression>,
98 negated: bool, // false = IS NULL, true = IS NOT NULL
99 },
100
101 /// IS DISTINCT FROM / IS NOT DISTINCT FROM (SQL:1999)
102 /// NULL-safe comparison operator:
103 /// - `a IS NOT DISTINCT FROM b`: TRUE when both NULL or both equal non-NULL
104 /// - `a IS DISTINCT FROM b`: TRUE when one is NULL and other isn't, or both non-NULL but
105 /// unequal
106 IsDistinctFrom {
107 left: Box<Expression>,
108 right: Box<Expression>,
109 negated: bool, // false = IS DISTINCT FROM, true = IS NOT DISTINCT FROM
110 },
111
112 /// IS TRUE / IS FALSE / IS UNKNOWN (SQL:1999)
113 ///
114 /// Boolean test predicates:
115 /// - `expr IS TRUE`: TRUE when expr evaluates to TRUE
116 /// - `expr IS FALSE`: TRUE when expr evaluates to FALSE
117 /// - `expr IS UNKNOWN`: TRUE when expr evaluates to NULL (for booleans)
118 ///
119 /// Plus negated forms: IS NOT TRUE, IS NOT FALSE, IS NOT UNKNOWN
120 IsTruthValue {
121 expr: Box<Expression>,
122 truth_value: TruthValue,
123 negated: bool, // false = IS <value>, true = IS NOT <value>
124 },
125
126 /// Wildcard (*)
127 Wildcard,
128
129 /// CASE expression (both simple and searched forms)
130 /// Simple CASE: CASE x WHEN 1 THEN 'a' ELSE 'b' END
131 /// Searched CASE: CASE WHEN x>0 THEN 'pos' ELSE 'neg' END
132 Case {
133 /// Operand for simple CASE (None for searched CASE)
134 operand: Option<Box<Expression>>,
135
136 /// List of WHEN clauses with conditions and results
137 /// - Simple CASE: conditions are comparison values (OR'd together)
138 /// - Searched CASE: conditions are boolean expressions (OR'd together)
139 when_clauses: Vec<CaseWhen>,
140
141 /// Optional ELSE result (defaults to NULL if None)
142 else_result: Option<Box<Expression>>,
143 },
144
145 /// Scalar subquery (returns single value)
146 /// Example: WHERE salary > (SELECT AVG(salary) FROM employees)
147 ScalarSubquery(Box<SelectStmt>),
148
149 /// IN operator with subquery
150 /// Example: WHERE id IN (SELECT user_id FROM orders)
151 /// Example: WHERE status NOT IN (SELECT blocked_status FROM config)
152 In {
153 expr: Box<Expression>,
154 subquery: Box<SelectStmt>,
155 negated: bool, // false = IN, true = NOT IN
156 },
157
158 /// IN operator with value list
159 /// Example: WHERE id IN (1, 2, 3)
160 /// Example: WHERE name NOT IN ('admin', 'root')
161 InList {
162 expr: Box<Expression>,
163 values: Vec<Expression>,
164 negated: bool, // false = IN, true = NOT IN
165 },
166
167 /// BETWEEN predicate
168 /// Example: WHERE age BETWEEN 18 AND 65
169 /// Example: WHERE price NOT BETWEEN 10.0 AND 20.0
170 /// Example: WHERE value BETWEEN SYMMETRIC 10 AND 1
171 /// Equivalent to: expr >= low AND expr <= high (or negated)
172 /// SYMMETRIC: swaps bounds if low > high before evaluation
173 Between {
174 expr: Box<Expression>,
175 low: Box<Expression>,
176 high: Box<Expression>,
177 negated: bool, // false = BETWEEN, true = NOT BETWEEN
178 symmetric: bool, // false = ASYMMETRIC (default), true = SYMMETRIC
179 },
180
181 /// CAST expression
182 /// Example: CAST(value AS INTEGER)
183 /// Example: CAST('123' AS NUMERIC(10, 2))
184 Cast {
185 expr: Box<Expression>,
186 data_type: vibesql_types::DataType,
187 },
188
189 /// POSITION expression
190 /// Example: POSITION('lo' IN 'hello')
191 /// Example: POSITION('lo' IN 'hello' USING CHARACTERS)
192 /// SQL:1999 Section 6.29: String value functions
193 /// Returns 1-indexed position of substring in string, or 0 if not found
194 Position {
195 substring: Box<Expression>,
196 string: Box<Expression>,
197 character_unit: Option<CharacterUnit>,
198 },
199
200 /// TRIM expression
201 /// Example: TRIM(BOTH 'x' FROM 'xxxhelloxxx')
202 /// Example: TRIM(LEADING '0' FROM '00042')
203 /// Example: TRIM(TRAILING '.' FROM 'test...')
204 /// Example: TRIM('x' FROM 'xxxhelloxxx') -- defaults to BOTH
205 /// Example: TRIM(' hello ') -- defaults to BOTH ' '
206 /// SQL:1999 Section 6.29: String value functions
207 Trim {
208 position: Option<TrimPosition>,
209 removal_char: Option<Box<Expression>>,
210 string: Box<Expression>,
211 },
212
213 /// EXTRACT expression
214 /// Example: EXTRACT(YEAR FROM date_column)
215 /// Example: EXTRACT(MONTH FROM '2024-01-15')
216 /// Example: EXTRACT(DAY FROM order_date)
217 /// SQL:1999 Section 6.18: Datetime value function
218 /// Extracts a date/time field from a datetime or interval expression
219 Extract {
220 field: IntervalUnit,
221 expr: Box<Expression>,
222 },
223
224 /// LIKE pattern matching
225 /// Example: name LIKE 'John%'
226 /// Example: email NOT LIKE '%spam%'
227 /// Example: name LIKE 'John\_Smith' ESCAPE '\'
228 /// Pattern wildcards: % (any chars), _ (single char)
229 Like {
230 expr: Box<Expression>,
231 pattern: Box<Expression>,
232 negated: bool, // false = LIKE, true = NOT LIKE
233 escape: Option<Box<Expression>>, // Optional ESCAPE character
234 },
235
236 /// GLOB pattern matching (SQLite)
237 /// Example: filename GLOB '*.txt'
238 /// Example: path NOT GLOB '/etc/*'
239 /// Example: path GLOB 'dir\*name' ESCAPE '\'
240 /// Pattern wildcards: * (any chars), ? (single char), [...] (character class)
241 Glob {
242 expr: Box<Expression>,
243 pattern: Box<Expression>,
244 negated: bool, // false = GLOB, true = NOT GLOB
245 escape: Option<Box<Expression>>, // Optional ESCAPE character
246 },
247
248 /// EXISTS predicate
249 /// Example: WHERE EXISTS (SELECT 1 FROM orders WHERE customer_id = c.id)
250 /// Example: WHERE NOT EXISTS (SELECT 1 FROM orders WHERE customer_id = c.id)
251 /// Returns TRUE if subquery returns at least one row, FALSE otherwise
252 /// Never returns NULL (unlike most predicates)
253 Exists {
254 subquery: Box<SelectStmt>,
255 negated: bool, // false = EXISTS, true = NOT EXISTS
256 },
257
258 /// Quantified comparison (ALL, ANY, SOME)
259 /// Example: salary > ALL (SELECT salary FROM dept WHERE dept_id = 10)
260 /// Example: price < ANY (SELECT price FROM competitors)
261 /// Example: quantity = SOME (SELECT quantity FROM inventory)
262 /// SOME is a synonym for ANY
263 QuantifiedComparison {
264 expr: Box<Expression>,
265 op: BinaryOperator,
266 quantifier: Quantifier,
267 subquery: Box<SelectStmt>,
268 },
269
270 /// Current date/time functions
271 /// CURRENT_DATE - Returns current date (no precision)
272 /// CURRENT_TIME[(precision)] - Returns current time with optional precision
273 /// CURRENT_TIMESTAMP[(precision)] - Returns current timestamp with optional precision
274 CurrentDate,
275 CurrentTime {
276 precision: Option<u32>,
277 },
278 CurrentTimestamp {
279 precision: Option<u32>,
280 },
281
282 /// INTERVAL expression
283 /// Example: INTERVAL '5' DAY
284 /// Example: INTERVAL '1-6' YEAR TO MONTH
285 /// Example: INTERVAL '5 12:30:45' DAY TO SECOND
286 /// SQL:1999 Section 6.12: Interval literal
287 Interval {
288 value: Box<Expression>,
289 unit: IntervalUnit,
290 /// For compound intervals: YEAR TO MONTH, DAY TO SECOND, etc.
291 leading_precision: Option<u32>,
292 fractional_precision: Option<u32>,
293 },
294
295 /// DEFAULT keyword - represents default value for column
296 /// Used in INSERT and UPDATE statements
297 Default,
298
299 /// VALUES() function - references insert value in ON DUPLICATE KEY UPDATE
300 /// Used in MySQL ON DUPLICATE KEY UPDATE clause
301 /// Example: INSERT INTO t VALUES (1, 2) ON DUPLICATE KEY UPDATE col = VALUES(col)
302 DuplicateKeyValue {
303 column: String,
304 },
305
306 /// Window function with OVER clause
307 /// Example: ROW_NUMBER() OVER (PARTITION BY dept ORDER BY salary DESC)
308 /// Example: SUM(amount) OVER (ROWS BETWEEN 2 PRECEDING AND CURRENT ROW)
309 /// Applies a function over a window of rows related to the current row
310 WindowFunction {
311 function: WindowFunctionSpec,
312 over: WindowSpec,
313 },
314
315 /// NEXT VALUE FOR sequence expression
316 /// Example: NEXT VALUE FOR user_id_seq
317 /// Returns the next value from the specified sequence generator
318 /// SQL:1999 Section 6.13: Next value expression
319 NextValue {
320 sequence_name: String,
321 },
322
323 /// MATCH...AGAINST full-text search expression
324 /// Example: MATCH(title, body) AGAINST ('search term')
325 /// Example: MATCH(title) AGAINST ('+mysql -oracle' IN BOOLEAN MODE)
326 /// Example: MATCH(title) AGAINST ('term' WITH QUERY EXPANSION)
327 MatchAgainst {
328 /// Columns to search
329 columns: Vec<String>,
330 /// Search string or phrase
331 search_modifier: Box<Expression>,
332 /// Search mode specification
333 mode: FulltextMode,
334 },
335
336 /// Pseudo-variable reference (OLD.column or NEW.column in triggers)
337 /// Used in trigger bodies to reference row values before/after DML operations
338 /// Example: OLD.username (before update/delete), NEW.username (after insert/update)
339 /// SQL:1999 Section 13.1: Triggered action
340 PseudoVariable {
341 pseudo_table: PseudoTable,
342 column: String,
343 },
344
345 /// Session/system variable reference (@@sql_mode, @@version, etc.)
346 /// MySQL session and system variables using @@ prefix
347 /// Example: @@sql_mode, @@session.sql_mode, @@global.max_connections
348 /// Can be used in expressions: SELECT REPLACE(@@sql_mode, 'STRICT', '')
349 SessionVariable {
350 name: String,
351 },
352
353 /// Row value constructor (tuple)
354 /// SQL:1999 Section 7.1: Row value constructor
355 /// Example: (a, b) = (1, 2)
356 /// Example: (rowid, 1) <= (5, 0)
357 /// Row values are compared element by element, left to right
358 RowValueConstructor(Vec<Expression>),
359
360 /// COLLATE expression for specifying collation
361 /// Example: column_name COLLATE NOCASE
362 /// Example: 'text' COLLATE RTRIM
363 /// SQLite supports: BINARY (default), NOCASE, RTRIM
364 Collate {
365 expr: Box<Expression>,
366 collation: String,
367 },
368}
369
370/// Full-text search mode specification
371#[derive(Debug, Clone, PartialEq)]
372pub enum FulltextMode {
373 /// Natural language mode (default)
374 NaturalLanguage,
375 /// Boolean mode with operators (+, -, *, etc.)
376 Boolean,
377 /// Natural language with query expansion
378 QueryExpansion,
379}
380
381/// Pseudo-table reference for trigger context
382/// Used in OLD.column and NEW.column expressions
383#[derive(Debug, Clone, Copy, PartialEq)]
384pub enum PseudoTable {
385 /// OLD - references row before UPDATE/DELETE
386 Old,
387 /// NEW - references row after INSERT/UPDATE
388 New,
389}
390
391/// CASE WHEN clause structure
392/// Supports multiple conditions (OR'd together) per WHEN clause
393/// Example: WHEN 1, 2, 3 THEN 'low' means: WHEN x=1 OR x=2 OR x=3 THEN 'low'
394#[derive(Debug, Clone, PartialEq)]
395pub struct CaseWhen {
396 /// Multiple conditions (OR'd together)
397 /// For simple CASE: these are comparison values
398 /// For searched CASE: these are boolean expressions
399 pub conditions: Vec<Expression>,
400
401 /// Result expression when any condition matches
402 pub result: Expression,
403}
404
405/// Quantifier for quantified comparisons
406#[derive(Debug, Clone, PartialEq)]
407pub enum Quantifier {
408 /// ALL - comparison must be TRUE for all rows
409 All,
410 /// ANY - comparison must be TRUE for at least one row
411 Any,
412 /// SOME - synonym for ANY
413 Some,
414}
415
416/// Truth value for IS TRUE/IS FALSE/IS UNKNOWN predicates (SQL:1999)
417#[derive(Debug, Clone, Copy, PartialEq, Eq)]
418pub enum TruthValue {
419 /// TRUE - the boolean true value
420 True,
421 /// FALSE - the boolean false value
422 False,
423 /// UNKNOWN - represents NULL in boolean context (three-valued logic)
424 Unknown,
425}
426
427/// Window function specification
428///
429/// Uses `FunctionIdentifier` for proper case handling in all variants.
430/// SQL:2003 FILTER clause is supported for conditional aggregation within windows.
431#[derive(Debug, Clone, PartialEq)]
432pub enum WindowFunctionSpec {
433 /// Aggregate function used as window function
434 /// Example: SUM(salary), AVG(price), COUNT(*)
435 /// Example with FILTER: COUNT(*) FILTER (WHERE x > 0) OVER (PARTITION BY y)
436 Aggregate {
437 name: FunctionIdentifier,
438 args: Vec<Expression>,
439 /// Optional FILTER clause for conditional aggregation
440 filter: Option<Box<Expression>>,
441 },
442
443 /// Ranking function
444 /// Example: ROW_NUMBER(), RANK(), DENSE_RANK(), NTILE(4)
445 Ranking { name: FunctionIdentifier, args: Vec<Expression> },
446
447 /// Value function
448 /// Example: LAG(salary, 1), LEAD(price, 2), FIRST_VALUE(name), LAST_VALUE(amount)
449 Value { name: FunctionIdentifier, args: Vec<Expression> },
450}
451
452impl WindowFunctionSpec {
453 /// Get the function name from any window function variant
454 pub fn name(&self) -> String {
455 match self {
456 WindowFunctionSpec::Aggregate { name, .. } => name.display().to_string(),
457 WindowFunctionSpec::Ranking { name, .. } => name.display().to_string(),
458 WindowFunctionSpec::Value { name, .. } => name.display().to_string(),
459 }
460 }
461}
462
463/// Window specification (OVER clause)
464#[derive(Debug, Clone, PartialEq)]
465pub struct WindowSpec {
466 /// PARTITION BY clause - divides rows into partitions
467 /// Example: PARTITION BY department_id
468 pub partition_by: Option<Vec<Expression>>,
469
470 /// ORDER BY clause - defines order within each partition
471 /// Example: ORDER BY salary DESC, hire_date
472 pub order_by: Option<Vec<OrderByItem>>,
473
474 /// Frame clause - defines which rows are included in the window frame
475 /// Example: ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
476 pub frame: Option<WindowFrame>,
477}
478
479/// Window frame specification
480#[derive(Debug, Clone, PartialEq)]
481pub struct WindowFrame {
482 /// Frame unit (ROWS or RANGE)
483 pub unit: FrameUnit,
484
485 /// Frame start boundary
486 pub start: FrameBound,
487
488 /// Frame end boundary (defaults to CURRENT ROW if None)
489 pub end: Option<FrameBound>,
490
491 /// Frame exclusion mode (defaults to NO OTHERS if None)
492 pub exclude: Option<FrameExclude>,
493}
494
495/// Frame unit type
496#[derive(Debug, Clone, PartialEq)]
497pub enum FrameUnit {
498 /// ROWS - physical rows
499 Rows,
500 /// RANGE - logical range based on ORDER BY values
501 Range,
502 /// GROUPS - peer groups (rows with same ORDER BY values)
503 Groups,
504}
505
506/// Frame boundary specification
507#[derive(Debug, Clone, PartialEq)]
508pub enum FrameBound {
509 /// UNBOUNDED PRECEDING - start of partition
510 UnboundedPreceding,
511
512 /// N PRECEDING - N rows before current row
513 Preceding(Box<Expression>),
514
515 /// CURRENT ROW - the current row
516 CurrentRow,
517
518 /// N FOLLOWING - N rows after current row
519 Following(Box<Expression>),
520
521 /// UNBOUNDED FOLLOWING - end of partition
522 UnboundedFollowing,
523}
524
525/// Frame exclusion specification (SQL:2011)
526/// Determines which rows to exclude from the frame
527#[derive(Debug, Clone, Copy, PartialEq)]
528pub enum FrameExclude {
529 /// EXCLUDE NO OTHERS - include all rows in frame (default)
530 NoOthers,
531
532 /// EXCLUDE CURRENT ROW - exclude the current row from frame
533 CurrentRow,
534
535 /// EXCLUDE GROUP - exclude current row and its peers (rows with same ORDER BY values)
536 Group,
537
538 /// EXCLUDE TIES - exclude peers of current row, but not current row itself
539 Ties,
540}
541
542/// TRIM position specification
543/// Determines which side(s) to trim characters from
544#[derive(Debug, Clone, PartialEq)]
545pub enum TrimPosition {
546 /// BOTH - trim from both leading and trailing (default)
547 Both,
548 /// LEADING - trim from start of string only
549 Leading,
550 /// TRAILING - trim from end of string only
551 Trailing,
552}
553
554/// Character measurement unit for string functions
555/// SQL:1999 Section 6.29: String value functions
556/// Used in USING clause for CHARACTER_LENGTH, SUBSTRING, POSITION
557#[derive(Debug, Clone, PartialEq)]
558pub enum CharacterUnit {
559 /// USING CHARACTERS - character-based measurement (default)
560 Characters,
561 /// USING OCTETS - byte-based measurement
562 Octets,
563}
564
565/// Interval unit for INTERVAL expressions
566/// SQL:1999 Section 6.12: Interval literal
567/// Used in INTERVAL '5' DAY, DATE_ADD(), etc.
568#[derive(Debug, Clone, PartialEq, Eq, Hash)]
569pub enum IntervalUnit {
570 /// Simple units
571 Microsecond,
572 Second,
573 Minute,
574 Hour,
575 Day,
576 Week,
577 Month,
578 Quarter,
579 Year,
580
581 /// Compound units (for MySQL compatibility)
582 /// SECOND_MICROSECOND
583 SecondMicrosecond,
584 /// MINUTE_MICROSECOND
585 MinuteMicrosecond,
586 /// MINUTE_SECOND
587 MinuteSecond,
588 /// HOUR_MICROSECOND
589 HourMicrosecond,
590 /// HOUR_SECOND
591 HourSecond,
592 /// HOUR_MINUTE
593 HourMinute,
594 /// DAY_MICROSECOND
595 DayMicrosecond,
596 /// DAY_SECOND
597 DaySecond,
598 /// DAY_MINUTE
599 DayMinute,
600 /// DAY_HOUR
601 DayHour,
602 /// YEAR_MONTH
603 YearMonth,
604}