vibesql_ast/
expression.rs

1//! SQL Expression types for use in SELECT, WHERE, and other clauses
2
3use vibesql_types::SqlValue;
4
5use crate::identifier::{ColumnIdentifier, FunctionIdentifier};
6use crate::{BinaryOperator, OrderByItem, SelectStmt, UnaryOperator};
7
8/// SQL Expression (can appear in SELECT, WHERE, etc.)
9#[derive(Debug, Clone, PartialEq)]
10pub enum Expression {
11    /// Literal value (42, 'hello', TRUE, NULL)
12    Literal(SqlValue),
13
14    /// Parameter placeholder (?) for prepared statements
15    /// The index is 0-based and assigned in order of appearance in the SQL
16    /// Example: WHERE id = ? AND name = ? -> Placeholder(0), Placeholder(1)
17    Placeholder(usize),
18
19    /// Numbered parameter placeholder ($1, $2, etc.) for prepared statements
20    /// PostgreSQL-style: 1-indexed as written in SQL
21    /// Example: WHERE id = $1 AND name = $2
22    /// The number is the explicit position (1-indexed) of the parameter
23    NumberedPlaceholder(usize),
24
25    /// Named parameter placeholder (:name) for prepared statements
26    /// Used by many ORMs and applications for readability
27    /// Example: WHERE id = :user_id AND name = :name
28    NamedPlaceholder(String),
29
30    /// Column reference (id, users.id, schema.table.column)
31    ///
32    /// Uses `ColumnIdentifier` for proper SQL:1999 case sensitivity handling:
33    /// - Unquoted identifiers are case-insensitive (fold to lowercase)
34    /// - Quoted identifiers are case-sensitive (preserve exact case)
35    ColumnRef(ColumnIdentifier),
36
37    /// Binary operation (a + b, x = y, etc.)
38    /// Note: AND/OR chains should use Conjunction/Disjunction for efficiency
39    BinaryOp {
40        op: BinaryOperator,
41        left: Box<Expression>,
42        right: Box<Expression>,
43    },
44
45    /// Flattened conjunction (AND chain): a AND b AND c AND ...
46    /// Stored as a flat vector for O(1) depth traversal and better cache locality.
47    /// Always contains 2+ children (single predicates remain as-is).
48    Conjunction(Vec<Expression>),
49
50    /// Flattened disjunction (OR chain): a OR b OR c OR ...
51    /// Stored as a flat vector for O(1) depth traversal and better cache locality.
52    /// Always contains 2+ children (single predicates remain as-is).
53    Disjunction(Vec<Expression>),
54
55    /// Unary operation (NOT x, -5)
56    UnaryOp {
57        op: UnaryOperator,
58        expr: Box<Expression>,
59    },
60
61    /// Function call (UPPER(x), SUBSTRING(x, 1, 3))
62    ///
63    /// Uses `FunctionIdentifier` for proper case handling:
64    /// - Canonical (lowercase) for case-insensitive comparison
65    /// - Display (original case) for error messages
66    Function {
67        name: FunctionIdentifier,
68        args: Vec<Expression>,
69        character_unit: Option<CharacterUnit>,
70    },
71
72    /// Aggregate function call (COUNT, SUM, AVG, MIN, MAX)
73    /// SQL:1999 Section 6.16: Set functions
74    /// SQL:2003 ordered set functions with ORDER BY clause
75    /// SQL:2003 FILTER clause for conditional aggregation
76    /// Example: COUNT(DISTINCT customer_id), SUM(ALL amount)
77    /// Example: GROUP_CONCAT(name ORDER BY name ASC)
78    /// Example: COUNT(*) FILTER (WHERE x > 0)
79    ///
80    /// Uses `FunctionIdentifier` for proper case handling:
81    /// - Canonical (lowercase) for case-insensitive comparison
82    /// - Display (original case) for error messages
83    AggregateFunction {
84        name: FunctionIdentifier,
85        distinct: bool, // true = DISTINCT, false = ALL (implicit)
86        args: Vec<Expression>,
87        /// Optional ORDER BY clause within the aggregate function
88        /// Example: GROUP_CONCAT(a ORDER BY b DESC)
89        order_by: Option<Vec<OrderByItem>>,
90        /// Optional FILTER clause for conditional aggregation (SQL:2003)
91        /// Example: COUNT(*) FILTER (WHERE x > 0)
92        filter: Option<Box<Expression>>,
93    },
94
95    /// IS NULL / IS NOT NULL
96    IsNull {
97        expr: Box<Expression>,
98        negated: bool, // false = IS NULL, true = IS NOT NULL
99    },
100
101    /// IS DISTINCT FROM / IS NOT DISTINCT FROM (SQL:1999)
102    /// NULL-safe comparison operator:
103    /// - `a IS NOT DISTINCT FROM b`: TRUE when both NULL or both equal non-NULL
104    /// - `a IS DISTINCT FROM b`: TRUE when one is NULL and other isn't, or both non-NULL but
105    ///   unequal
106    IsDistinctFrom {
107        left: Box<Expression>,
108        right: Box<Expression>,
109        negated: bool, // false = IS DISTINCT FROM, true = IS NOT DISTINCT FROM
110    },
111
112    /// IS TRUE / IS FALSE / IS UNKNOWN (SQL:1999)
113    ///
114    /// Boolean test predicates:
115    /// - `expr IS TRUE`: TRUE when expr evaluates to TRUE
116    /// - `expr IS FALSE`: TRUE when expr evaluates to FALSE
117    /// - `expr IS UNKNOWN`: TRUE when expr evaluates to NULL (for booleans)
118    ///
119    /// Plus negated forms: IS NOT TRUE, IS NOT FALSE, IS NOT UNKNOWN
120    IsTruthValue {
121        expr: Box<Expression>,
122        truth_value: TruthValue,
123        negated: bool, // false = IS <value>, true = IS NOT <value>
124    },
125
126    /// Wildcard (*)
127    Wildcard,
128
129    /// CASE expression (both simple and searched forms)
130    /// Simple CASE:  CASE x WHEN 1 THEN 'a' ELSE 'b' END
131    /// Searched CASE: CASE WHEN x>0 THEN 'pos' ELSE 'neg' END
132    Case {
133        /// Operand for simple CASE (None for searched CASE)
134        operand: Option<Box<Expression>>,
135
136        /// List of WHEN clauses with conditions and results
137        /// - Simple CASE: conditions are comparison values (OR'd together)
138        /// - Searched CASE: conditions are boolean expressions (OR'd together)
139        when_clauses: Vec<CaseWhen>,
140
141        /// Optional ELSE result (defaults to NULL if None)
142        else_result: Option<Box<Expression>>,
143    },
144
145    /// Scalar subquery (returns single value)
146    /// Example: WHERE salary > (SELECT AVG(salary) FROM employees)
147    ScalarSubquery(Box<SelectStmt>),
148
149    /// IN operator with subquery
150    /// Example: WHERE id IN (SELECT user_id FROM orders)
151    /// Example: WHERE status NOT IN (SELECT blocked_status FROM config)
152    In {
153        expr: Box<Expression>,
154        subquery: Box<SelectStmt>,
155        negated: bool, // false = IN, true = NOT IN
156    },
157
158    /// IN operator with value list
159    /// Example: WHERE id IN (1, 2, 3)
160    /// Example: WHERE name NOT IN ('admin', 'root')
161    InList {
162        expr: Box<Expression>,
163        values: Vec<Expression>,
164        negated: bool, // false = IN, true = NOT IN
165    },
166
167    /// BETWEEN predicate
168    /// Example: WHERE age BETWEEN 18 AND 65
169    /// Example: WHERE price NOT BETWEEN 10.0 AND 20.0
170    /// Example: WHERE value BETWEEN SYMMETRIC 10 AND 1
171    /// Equivalent to: expr >= low AND expr <= high (or negated)
172    /// SYMMETRIC: swaps bounds if low > high before evaluation
173    Between {
174        expr: Box<Expression>,
175        low: Box<Expression>,
176        high: Box<Expression>,
177        negated: bool,   // false = BETWEEN, true = NOT BETWEEN
178        symmetric: bool, // false = ASYMMETRIC (default), true = SYMMETRIC
179    },
180
181    /// CAST expression
182    /// Example: CAST(value AS INTEGER)
183    /// Example: CAST('123' AS NUMERIC(10, 2))
184    Cast {
185        expr: Box<Expression>,
186        data_type: vibesql_types::DataType,
187    },
188
189    /// POSITION expression
190    /// Example: POSITION('lo' IN 'hello')
191    /// Example: POSITION('lo' IN 'hello' USING CHARACTERS)
192    /// SQL:1999 Section 6.29: String value functions
193    /// Returns 1-indexed position of substring in string, or 0 if not found
194    Position {
195        substring: Box<Expression>,
196        string: Box<Expression>,
197        character_unit: Option<CharacterUnit>,
198    },
199
200    /// TRIM expression
201    /// Example: TRIM(BOTH 'x' FROM 'xxxhelloxxx')
202    /// Example: TRIM(LEADING '0' FROM '00042')
203    /// Example: TRIM(TRAILING '.' FROM 'test...')
204    /// Example: TRIM('x' FROM 'xxxhelloxxx') -- defaults to BOTH
205    /// Example: TRIM('  hello  ') -- defaults to BOTH ' '
206    /// SQL:1999 Section 6.29: String value functions
207    Trim {
208        position: Option<TrimPosition>,
209        removal_char: Option<Box<Expression>>,
210        string: Box<Expression>,
211    },
212
213    /// EXTRACT expression
214    /// Example: EXTRACT(YEAR FROM date_column)
215    /// Example: EXTRACT(MONTH FROM '2024-01-15')
216    /// Example: EXTRACT(DAY FROM order_date)
217    /// SQL:1999 Section 6.18: Datetime value function
218    /// Extracts a date/time field from a datetime or interval expression
219    Extract {
220        field: IntervalUnit,
221        expr: Box<Expression>,
222    },
223
224    /// LIKE pattern matching
225    /// Example: name LIKE 'John%'
226    /// Example: email NOT LIKE '%spam%'
227    /// Example: name LIKE 'John\_Smith' ESCAPE '\'
228    /// Pattern wildcards: % (any chars), _ (single char)
229    Like {
230        expr: Box<Expression>,
231        pattern: Box<Expression>,
232        negated: bool,                    // false = LIKE, true = NOT LIKE
233        escape: Option<Box<Expression>>,  // Optional ESCAPE character
234    },
235
236    /// GLOB pattern matching (SQLite)
237    /// Example: filename GLOB '*.txt'
238    /// Example: path NOT GLOB '/etc/*'
239    /// Example: path GLOB 'dir\*name' ESCAPE '\'
240    /// Pattern wildcards: * (any chars), ? (single char), [...] (character class)
241    Glob {
242        expr: Box<Expression>,
243        pattern: Box<Expression>,
244        negated: bool,                    // false = GLOB, true = NOT GLOB
245        escape: Option<Box<Expression>>,  // Optional ESCAPE character
246    },
247
248    /// EXISTS predicate
249    /// Example: WHERE EXISTS (SELECT 1 FROM orders WHERE customer_id = c.id)
250    /// Example: WHERE NOT EXISTS (SELECT 1 FROM orders WHERE customer_id = c.id)
251    /// Returns TRUE if subquery returns at least one row, FALSE otherwise
252    /// Never returns NULL (unlike most predicates)
253    Exists {
254        subquery: Box<SelectStmt>,
255        negated: bool, // false = EXISTS, true = NOT EXISTS
256    },
257
258    /// Quantified comparison (ALL, ANY, SOME)
259    /// Example: salary > ALL (SELECT salary FROM dept WHERE dept_id = 10)
260    /// Example: price < ANY (SELECT price FROM competitors)
261    /// Example: quantity = SOME (SELECT quantity FROM inventory)
262    /// SOME is a synonym for ANY
263    QuantifiedComparison {
264        expr: Box<Expression>,
265        op: BinaryOperator,
266        quantifier: Quantifier,
267        subquery: Box<SelectStmt>,
268    },
269
270    /// Current date/time functions
271    /// CURRENT_DATE - Returns current date (no precision)
272    /// CURRENT_TIME[(precision)] - Returns current time with optional precision
273    /// CURRENT_TIMESTAMP[(precision)] - Returns current timestamp with optional precision
274    CurrentDate,
275    CurrentTime {
276        precision: Option<u32>,
277    },
278    CurrentTimestamp {
279        precision: Option<u32>,
280    },
281
282    /// INTERVAL expression
283    /// Example: INTERVAL '5' DAY
284    /// Example: INTERVAL '1-6' YEAR TO MONTH
285    /// Example: INTERVAL '5 12:30:45' DAY TO SECOND
286    /// SQL:1999 Section 6.12: Interval literal
287    Interval {
288        value: Box<Expression>,
289        unit: IntervalUnit,
290        /// For compound intervals: YEAR TO MONTH, DAY TO SECOND, etc.
291        leading_precision: Option<u32>,
292        fractional_precision: Option<u32>,
293    },
294
295    /// DEFAULT keyword - represents default value for column
296    /// Used in INSERT and UPDATE statements
297    Default,
298
299    /// VALUES() function - references insert value in ON DUPLICATE KEY UPDATE
300    /// Used in MySQL ON DUPLICATE KEY UPDATE clause
301    /// Example: INSERT INTO t VALUES (1, 2) ON DUPLICATE KEY UPDATE col = VALUES(col)
302    DuplicateKeyValue {
303        column: String,
304    },
305
306    /// Window function with OVER clause
307    /// Example: ROW_NUMBER() OVER (PARTITION BY dept ORDER BY salary DESC)
308    /// Example: SUM(amount) OVER (ROWS BETWEEN 2 PRECEDING AND CURRENT ROW)
309    /// Applies a function over a window of rows related to the current row
310    WindowFunction {
311        function: WindowFunctionSpec,
312        over: WindowSpec,
313    },
314
315    /// NEXT VALUE FOR sequence expression
316    /// Example: NEXT VALUE FOR user_id_seq
317    /// Returns the next value from the specified sequence generator
318    /// SQL:1999 Section 6.13: Next value expression
319    NextValue {
320        sequence_name: String,
321    },
322
323    /// MATCH...AGAINST full-text search expression
324    /// Example: MATCH(title, body) AGAINST ('search term')
325    /// Example: MATCH(title) AGAINST ('+mysql -oracle' IN BOOLEAN MODE)
326    /// Example: MATCH(title) AGAINST ('term' WITH QUERY EXPANSION)
327    MatchAgainst {
328        /// Columns to search
329        columns: Vec<String>,
330        /// Search string or phrase
331        search_modifier: Box<Expression>,
332        /// Search mode specification
333        mode: FulltextMode,
334    },
335
336    /// Pseudo-variable reference (OLD.column or NEW.column in triggers)
337    /// Used in trigger bodies to reference row values before/after DML operations
338    /// Example: OLD.username (before update/delete), NEW.username (after insert/update)
339    /// SQL:1999 Section 13.1: Triggered action
340    PseudoVariable {
341        pseudo_table: PseudoTable,
342        column: String,
343    },
344
345    /// Session/system variable reference (@@sql_mode, @@version, etc.)
346    /// MySQL session and system variables using @@ prefix
347    /// Example: @@sql_mode, @@session.sql_mode, @@global.max_connections
348    /// Can be used in expressions: SELECT REPLACE(@@sql_mode, 'STRICT', '')
349    SessionVariable {
350        name: String,
351    },
352
353    /// Row value constructor (tuple)
354    /// SQL:1999 Section 7.1: Row value constructor
355    /// Example: (a, b) = (1, 2)
356    /// Example: (rowid, 1) <= (5, 0)
357    /// Row values are compared element by element, left to right
358    RowValueConstructor(Vec<Expression>),
359
360    /// COLLATE expression for specifying collation
361    /// Example: column_name COLLATE NOCASE
362    /// Example: 'text' COLLATE RTRIM
363    /// SQLite supports: BINARY (default), NOCASE, RTRIM
364    Collate {
365        expr: Box<Expression>,
366        collation: String,
367    },
368}
369
370/// Full-text search mode specification
371#[derive(Debug, Clone, PartialEq)]
372pub enum FulltextMode {
373    /// Natural language mode (default)
374    NaturalLanguage,
375    /// Boolean mode with operators (+, -, *, etc.)
376    Boolean,
377    /// Natural language with query expansion
378    QueryExpansion,
379}
380
381/// Pseudo-table reference for trigger context
382/// Used in OLD.column and NEW.column expressions
383#[derive(Debug, Clone, Copy, PartialEq)]
384pub enum PseudoTable {
385    /// OLD - references row before UPDATE/DELETE
386    Old,
387    /// NEW - references row after INSERT/UPDATE
388    New,
389}
390
391/// CASE WHEN clause structure
392/// Supports multiple conditions (OR'd together) per WHEN clause
393/// Example: WHEN 1, 2, 3 THEN 'low' means: WHEN x=1 OR x=2 OR x=3 THEN 'low'
394#[derive(Debug, Clone, PartialEq)]
395pub struct CaseWhen {
396    /// Multiple conditions (OR'd together)
397    /// For simple CASE: these are comparison values
398    /// For searched CASE: these are boolean expressions
399    pub conditions: Vec<Expression>,
400
401    /// Result expression when any condition matches
402    pub result: Expression,
403}
404
405/// Quantifier for quantified comparisons
406#[derive(Debug, Clone, PartialEq)]
407pub enum Quantifier {
408    /// ALL - comparison must be TRUE for all rows
409    All,
410    /// ANY - comparison must be TRUE for at least one row
411    Any,
412    /// SOME - synonym for ANY
413    Some,
414}
415
416/// Truth value for IS TRUE/IS FALSE/IS UNKNOWN predicates (SQL:1999)
417#[derive(Debug, Clone, Copy, PartialEq, Eq)]
418pub enum TruthValue {
419    /// TRUE - the boolean true value
420    True,
421    /// FALSE - the boolean false value
422    False,
423    /// UNKNOWN - represents NULL in boolean context (three-valued logic)
424    Unknown,
425}
426
427/// Window function specification
428///
429/// Uses `FunctionIdentifier` for proper case handling in all variants.
430/// SQL:2003 FILTER clause is supported for conditional aggregation within windows.
431#[derive(Debug, Clone, PartialEq)]
432pub enum WindowFunctionSpec {
433    /// Aggregate function used as window function
434    /// Example: SUM(salary), AVG(price), COUNT(*)
435    /// Example with FILTER: COUNT(*) FILTER (WHERE x > 0) OVER (PARTITION BY y)
436    Aggregate {
437        name: FunctionIdentifier,
438        args: Vec<Expression>,
439        /// Optional FILTER clause for conditional aggregation
440        filter: Option<Box<Expression>>,
441    },
442
443    /// Ranking function
444    /// Example: ROW_NUMBER(), RANK(), DENSE_RANK(), NTILE(4)
445    Ranking { name: FunctionIdentifier, args: Vec<Expression> },
446
447    /// Value function
448    /// Example: LAG(salary, 1), LEAD(price, 2), FIRST_VALUE(name), LAST_VALUE(amount)
449    Value { name: FunctionIdentifier, args: Vec<Expression> },
450}
451
452impl WindowFunctionSpec {
453    /// Get the function name from any window function variant
454    pub fn name(&self) -> String {
455        match self {
456            WindowFunctionSpec::Aggregate { name, .. } => name.display().to_string(),
457            WindowFunctionSpec::Ranking { name, .. } => name.display().to_string(),
458            WindowFunctionSpec::Value { name, .. } => name.display().to_string(),
459        }
460    }
461}
462
463/// Window specification (OVER clause)
464#[derive(Debug, Clone, PartialEq)]
465pub struct WindowSpec {
466    /// PARTITION BY clause - divides rows into partitions
467    /// Example: PARTITION BY department_id
468    pub partition_by: Option<Vec<Expression>>,
469
470    /// ORDER BY clause - defines order within each partition
471    /// Example: ORDER BY salary DESC, hire_date
472    pub order_by: Option<Vec<OrderByItem>>,
473
474    /// Frame clause - defines which rows are included in the window frame
475    /// Example: ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
476    pub frame: Option<WindowFrame>,
477}
478
479/// Window frame specification
480#[derive(Debug, Clone, PartialEq)]
481pub struct WindowFrame {
482    /// Frame unit (ROWS or RANGE)
483    pub unit: FrameUnit,
484
485    /// Frame start boundary
486    pub start: FrameBound,
487
488    /// Frame end boundary (defaults to CURRENT ROW if None)
489    pub end: Option<FrameBound>,
490
491    /// Frame exclusion mode (defaults to NO OTHERS if None)
492    pub exclude: Option<FrameExclude>,
493}
494
495/// Frame unit type
496#[derive(Debug, Clone, PartialEq)]
497pub enum FrameUnit {
498    /// ROWS - physical rows
499    Rows,
500    /// RANGE - logical range based on ORDER BY values
501    Range,
502    /// GROUPS - peer groups (rows with same ORDER BY values)
503    Groups,
504}
505
506/// Frame boundary specification
507#[derive(Debug, Clone, PartialEq)]
508pub enum FrameBound {
509    /// UNBOUNDED PRECEDING - start of partition
510    UnboundedPreceding,
511
512    /// N PRECEDING - N rows before current row
513    Preceding(Box<Expression>),
514
515    /// CURRENT ROW - the current row
516    CurrentRow,
517
518    /// N FOLLOWING - N rows after current row
519    Following(Box<Expression>),
520
521    /// UNBOUNDED FOLLOWING - end of partition
522    UnboundedFollowing,
523}
524
525/// Frame exclusion specification (SQL:2011)
526/// Determines which rows to exclude from the frame
527#[derive(Debug, Clone, Copy, PartialEq)]
528pub enum FrameExclude {
529    /// EXCLUDE NO OTHERS - include all rows in frame (default)
530    NoOthers,
531
532    /// EXCLUDE CURRENT ROW - exclude the current row from frame
533    CurrentRow,
534
535    /// EXCLUDE GROUP - exclude current row and its peers (rows with same ORDER BY values)
536    Group,
537
538    /// EXCLUDE TIES - exclude peers of current row, but not current row itself
539    Ties,
540}
541
542/// TRIM position specification
543/// Determines which side(s) to trim characters from
544#[derive(Debug, Clone, PartialEq)]
545pub enum TrimPosition {
546    /// BOTH - trim from both leading and trailing (default)
547    Both,
548    /// LEADING - trim from start of string only
549    Leading,
550    /// TRAILING - trim from end of string only
551    Trailing,
552}
553
554/// Character measurement unit for string functions
555/// SQL:1999 Section 6.29: String value functions
556/// Used in USING clause for CHARACTER_LENGTH, SUBSTRING, POSITION
557#[derive(Debug, Clone, PartialEq)]
558pub enum CharacterUnit {
559    /// USING CHARACTERS - character-based measurement (default)
560    Characters,
561    /// USING OCTETS - byte-based measurement
562    Octets,
563}
564
565/// Interval unit for INTERVAL expressions
566/// SQL:1999 Section 6.12: Interval literal
567/// Used in INTERVAL '5' DAY, DATE_ADD(), etc.
568#[derive(Debug, Clone, PartialEq, Eq, Hash)]
569pub enum IntervalUnit {
570    /// Simple units
571    Microsecond,
572    Second,
573    Minute,
574    Hour,
575    Day,
576    Week,
577    Month,
578    Quarter,
579    Year,
580
581    /// Compound units (for MySQL compatibility)
582    /// SECOND_MICROSECOND
583    SecondMicrosecond,
584    /// MINUTE_MICROSECOND
585    MinuteMicrosecond,
586    /// MINUTE_SECOND
587    MinuteSecond,
588    /// HOUR_MICROSECOND
589    HourMicrosecond,
590    /// HOUR_SECOND
591    HourSecond,
592    /// HOUR_MINUTE
593    HourMinute,
594    /// DAY_MICROSECOND
595    DayMicrosecond,
596    /// DAY_SECOND
597    DaySecond,
598    /// DAY_MINUTE
599    DayMinute,
600    /// DAY_HOUR
601    DayHour,
602    /// YEAR_MONTH
603    YearMonth,
604}