sql_cli/sql/parser/
ast.rs

1//! Abstract Syntax Tree (AST) definitions for SQL queries
2//!
3//! This module contains all the data structures that represent
4//! the parsed SQL query structure.
5
6// ===== Comment Types =====
7
8/// Represents a SQL comment (line or block)
9#[derive(Debug, Clone, PartialEq)]
10pub struct Comment {
11    /// The comment text (without delimiters like -- or /* */)
12    pub text: String,
13    /// True for line comments (--), false for block comments (/* */)
14    pub is_line_comment: bool,
15}
16
17impl Comment {
18    /// Create a new line comment
19    pub fn line(text: String) -> Self {
20        Self {
21            text,
22            is_line_comment: true,
23        }
24    }
25
26    /// Create a new block comment
27    pub fn block(text: String) -> Self {
28        Self {
29            text,
30            is_line_comment: false,
31        }
32    }
33}
34
35// ===== Expression Types =====
36
37/// Quote style for identifiers (column names, table names, etc.)
38#[derive(Debug, Clone, PartialEq, Eq, Hash)]
39pub enum QuoteStyle {
40    /// No quotes needed (valid unquoted identifier)
41    None,
42    /// Double quotes: "Customer Id"
43    DoubleQuotes,
44    /// SQL Server style brackets: [Customer Id]
45    Brackets,
46}
47
48/// Column reference with optional quoting information and table prefix
49#[derive(Debug, Clone, PartialEq, Eq, Hash)]
50pub struct ColumnRef {
51    pub name: String,
52    pub quote_style: QuoteStyle,
53    /// Optional table/alias prefix (e.g., "messages" in "messages.field_name")
54    pub table_prefix: Option<String>,
55}
56
57impl ColumnRef {
58    /// Create an unquoted column reference
59    pub fn unquoted(name: String) -> Self {
60        Self {
61            name,
62            quote_style: QuoteStyle::None,
63            table_prefix: None,
64        }
65    }
66
67    /// Create a double-quoted column reference
68    pub fn quoted(name: String) -> Self {
69        Self {
70            name,
71            quote_style: QuoteStyle::DoubleQuotes,
72            table_prefix: None,
73        }
74    }
75
76    /// Create a qualified column reference (table.column)
77    pub fn qualified(table: String, name: String) -> Self {
78        Self {
79            name,
80            quote_style: QuoteStyle::None,
81            table_prefix: Some(table),
82        }
83    }
84
85    /// Get the full qualified string representation
86    pub fn to_qualified_string(&self) -> String {
87        match &self.table_prefix {
88            Some(table) => format!("{}.{}", table, self.name),
89            None => self.name.clone(),
90        }
91    }
92
93    /// Create a bracket-quoted column reference
94    pub fn bracketed(name: String) -> Self {
95        Self {
96            name,
97            quote_style: QuoteStyle::Brackets,
98            table_prefix: None,
99        }
100    }
101
102    /// Format the column reference with appropriate quoting
103    pub fn to_sql(&self) -> String {
104        let column_part = match self.quote_style {
105            QuoteStyle::None => self.name.clone(),
106            QuoteStyle::DoubleQuotes => format!("\"{}\"", self.name),
107            QuoteStyle::Brackets => format!("[{}]", self.name),
108        };
109
110        match &self.table_prefix {
111            Some(table) => format!("{}.{}", table, column_part),
112            None => column_part,
113        }
114    }
115}
116
117impl PartialEq<str> for ColumnRef {
118    fn eq(&self, other: &str) -> bool {
119        self.name == other
120    }
121}
122
123impl PartialEq<&str> for ColumnRef {
124    fn eq(&self, other: &&str) -> bool {
125        self.name == *other
126    }
127}
128
129impl std::fmt::Display for ColumnRef {
130    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
131        write!(f, "{}", self.to_sql())
132    }
133}
134
135#[derive(Debug, Clone)]
136pub enum SqlExpression {
137    Column(ColumnRef),
138    StringLiteral(String),
139    NumberLiteral(String),
140    BooleanLiteral(bool),
141    Null, // NULL literal
142    DateTimeConstructor {
143        year: i32,
144        month: u32,
145        day: u32,
146        hour: Option<u32>,
147        minute: Option<u32>,
148        second: Option<u32>,
149    },
150    DateTimeToday {
151        hour: Option<u32>,
152        minute: Option<u32>,
153        second: Option<u32>,
154    },
155    MethodCall {
156        object: String,
157        method: String,
158        args: Vec<SqlExpression>,
159    },
160    ChainedMethodCall {
161        base: Box<SqlExpression>,
162        method: String,
163        args: Vec<SqlExpression>,
164    },
165    FunctionCall {
166        name: String,
167        args: Vec<SqlExpression>,
168        distinct: bool, // For COUNT(DISTINCT col), SUM(DISTINCT col), etc.
169    },
170    WindowFunction {
171        name: String,
172        args: Vec<SqlExpression>,
173        window_spec: WindowSpec,
174    },
175    BinaryOp {
176        left: Box<SqlExpression>,
177        op: String,
178        right: Box<SqlExpression>,
179    },
180    InList {
181        expr: Box<SqlExpression>,
182        values: Vec<SqlExpression>,
183    },
184    NotInList {
185        expr: Box<SqlExpression>,
186        values: Vec<SqlExpression>,
187    },
188    Between {
189        expr: Box<SqlExpression>,
190        lower: Box<SqlExpression>,
191        upper: Box<SqlExpression>,
192    },
193    Not {
194        expr: Box<SqlExpression>,
195    },
196    CaseExpression {
197        when_branches: Vec<WhenBranch>,
198        else_branch: Option<Box<SqlExpression>>,
199    },
200    SimpleCaseExpression {
201        expr: Box<SqlExpression>,
202        when_branches: Vec<SimpleWhenBranch>,
203        else_branch: Option<Box<SqlExpression>>,
204    },
205    /// Scalar subquery that returns a single value
206    /// Used in expressions like: WHERE col = (SELECT MAX(id) FROM table)
207    ScalarSubquery {
208        query: Box<SelectStatement>,
209    },
210    /// IN subquery that returns multiple values
211    /// Used in expressions like: WHERE col IN (SELECT id FROM table WHERE ...)
212    InSubquery {
213        expr: Box<SqlExpression>,
214        subquery: Box<SelectStatement>,
215    },
216    /// UNNEST - Row expansion function that splits delimited strings
217    /// Used like: SELECT UNNEST(accounts, '|') AS account FROM fix_trades
218    /// Causes row multiplication - one input row becomes N output rows
219    Unnest {
220        column: Box<SqlExpression>,
221        delimiter: String,
222    },
223    /// NOT IN subquery
224    /// Used in expressions like: WHERE col NOT IN (SELECT id FROM table WHERE ...)
225    NotInSubquery {
226        expr: Box<SqlExpression>,
227        subquery: Box<SelectStatement>,
228    },
229}
230
231#[derive(Debug, Clone)]
232pub struct WhenBranch {
233    pub condition: Box<SqlExpression>,
234    pub result: Box<SqlExpression>,
235}
236
237#[derive(Debug, Clone)]
238pub struct SimpleWhenBranch {
239    pub value: Box<SqlExpression>,
240    pub result: Box<SqlExpression>,
241}
242
243// ===== WHERE Clause Types =====
244
245#[derive(Debug, Clone)]
246pub struct WhereClause {
247    pub conditions: Vec<Condition>,
248}
249
250#[derive(Debug, Clone)]
251pub struct Condition {
252    pub expr: SqlExpression,
253    pub connector: Option<LogicalOp>, // AND/OR connecting to next condition
254}
255
256#[derive(Debug, Clone)]
257pub enum LogicalOp {
258    And,
259    Or,
260}
261
262// ===== ORDER BY Types =====
263
264#[derive(Debug, Clone, PartialEq)]
265pub enum SortDirection {
266    Asc,
267    Desc,
268}
269
270/// Legacy structure - kept for backward compatibility
271/// New code should use OrderByItem
272#[derive(Debug, Clone)]
273pub struct OrderByColumn {
274    pub column: String,
275    pub direction: SortDirection,
276}
277
278/// Modern ORDER BY item that supports expressions
279#[derive(Debug, Clone)]
280pub struct OrderByItem {
281    pub expr: SqlExpression,
282    pub direction: SortDirection,
283}
284
285impl OrderByItem {
286    /// Create from a simple column name (for backward compatibility)
287    pub fn from_column_name(name: String, direction: SortDirection) -> Self {
288        Self {
289            expr: SqlExpression::Column(ColumnRef {
290                name,
291                quote_style: QuoteStyle::None,
292                table_prefix: None,
293            }),
294            direction,
295        }
296    }
297
298    /// Create from an expression
299    pub fn from_expression(expr: SqlExpression, direction: SortDirection) -> Self {
300        Self { expr, direction }
301    }
302}
303
304// ===== Window Function Types =====
305
306/// Window frame bounds
307#[derive(Debug, Clone, PartialEq)]
308pub enum FrameBound {
309    UnboundedPreceding,
310    CurrentRow,
311    Preceding(i64),
312    Following(i64),
313    UnboundedFollowing,
314}
315
316/// Window frame unit (ROWS or RANGE)
317#[derive(Debug, Clone, PartialEq)]
318pub enum FrameUnit {
319    Rows,
320    Range,
321}
322
323/// Window frame specification
324#[derive(Debug, Clone)]
325pub struct WindowFrame {
326    pub unit: FrameUnit,
327    pub start: FrameBound,
328    pub end: Option<FrameBound>, // None means CURRENT ROW
329}
330
331#[derive(Debug, Clone)]
332pub struct WindowSpec {
333    pub partition_by: Vec<String>,
334    pub order_by: Vec<OrderByItem>,
335    pub frame: Option<WindowFrame>, // Optional window frame
336}
337
338// ===== SELECT Statement Types =====
339
340/// Set operation type for combining SELECT statements
341#[derive(Debug, Clone, PartialEq)]
342pub enum SetOperation {
343    /// UNION ALL - combines results without deduplication
344    UnionAll,
345    /// UNION - combines results with deduplication (not yet implemented)
346    Union,
347    /// INTERSECT - returns common rows (not yet implemented)
348    Intersect,
349    /// EXCEPT - returns rows from left not in right (not yet implemented)
350    Except,
351}
352
353/// Represents a SELECT item - either a simple column or a computed expression with alias
354#[derive(Debug, Clone)]
355pub enum SelectItem {
356    /// Simple column reference: "`column_name`"
357    Column {
358        column: ColumnRef,
359        leading_comments: Vec<Comment>,
360        trailing_comment: Option<Comment>,
361    },
362    /// Computed expression with alias: "expr AS alias"
363    Expression {
364        expr: SqlExpression,
365        alias: String,
366        leading_comments: Vec<Comment>,
367        trailing_comment: Option<Comment>,
368    },
369    /// Star selector: "*" or "table.*"
370    Star {
371        table_prefix: Option<String>, // e.g., Some("p") for "p.*"
372        leading_comments: Vec<Comment>,
373        trailing_comment: Option<Comment>,
374    },
375}
376
377#[derive(Debug, Clone)]
378pub struct SelectStatement {
379    pub distinct: bool,                // SELECT DISTINCT flag
380    pub columns: Vec<String>,          // Keep for backward compatibility, will be deprecated
381    pub select_items: Vec<SelectItem>, // New field for computed expressions
382    pub from_table: Option<String>,
383    pub from_subquery: Option<Box<SelectStatement>>, // Subquery in FROM clause
384    pub from_function: Option<TableFunction>,        // Table function like RANGE() in FROM clause
385    pub from_alias: Option<String>,                  // Alias for subquery (AS name)
386    pub joins: Vec<JoinClause>,                      // JOIN clauses
387    pub where_clause: Option<WhereClause>,
388    pub order_by: Option<Vec<OrderByItem>>, // Supports expressions: columns, aggregates, CASE, etc.
389    pub group_by: Option<Vec<SqlExpression>>, // Changed from Vec<String> to support expressions
390    pub having: Option<SqlExpression>,      // HAVING clause for post-aggregation filtering
391    pub qualify: Option<SqlExpression>, // QUALIFY clause for window function filtering (Snowflake-style)
392    pub limit: Option<usize>,
393    pub offset: Option<usize>,
394    pub ctes: Vec<CTE>,                // Common Table Expressions (WITH clause)
395    pub into_table: Option<IntoTable>, // INTO clause for temporary tables
396    pub set_operations: Vec<(SetOperation, Box<SelectStatement>)>, // UNION/INTERSECT/EXCEPT operations
397
398    // Comment preservation
399    pub leading_comments: Vec<Comment>, // Comments before the SELECT keyword
400    pub trailing_comment: Option<Comment>, // Trailing comment at end of statement
401}
402
403impl Default for SelectStatement {
404    fn default() -> Self {
405        SelectStatement {
406            distinct: false,
407            columns: Vec::new(),
408            select_items: Vec::new(),
409            from_table: None,
410            from_subquery: None,
411            from_function: None,
412            from_alias: None,
413            joins: Vec::new(),
414            where_clause: None,
415            order_by: None,
416            group_by: None,
417            having: None,
418            qualify: None,
419            limit: None,
420            offset: None,
421            ctes: Vec::new(),
422            into_table: None,
423            set_operations: Vec::new(),
424            leading_comments: Vec::new(),
425            trailing_comment: None,
426        }
427    }
428}
429
430/// INTO clause for creating temporary tables
431#[derive(Debug, Clone, PartialEq)]
432pub struct IntoTable {
433    /// Name of the temporary table (must start with #)
434    pub name: String,
435}
436
437// ===== Table and Join Types =====
438
439/// Table function that generates virtual tables
440#[derive(Debug, Clone)]
441pub enum TableFunction {
442    Generator {
443        name: String,
444        args: Vec<SqlExpression>,
445    },
446}
447
448/// Common Table Expression (CTE) structure
449#[derive(Debug, Clone)]
450pub struct CTE {
451    pub name: String,
452    pub column_list: Option<Vec<String>>, // Optional column list: WITH t(col1, col2) AS ...
453    pub cte_type: CTEType,
454}
455
456/// Type of CTE - standard SQL or WEB fetch
457#[derive(Debug, Clone)]
458pub enum CTEType {
459    Standard(SelectStatement),
460    Web(WebCTESpec),
461}
462
463/// Specification for WEB CTEs
464#[derive(Debug, Clone)]
465pub struct WebCTESpec {
466    pub url: String,
467    pub format: Option<DataFormat>,        // CSV, JSON, or auto-detect
468    pub headers: Vec<(String, String)>,    // HTTP headers
469    pub cache_seconds: Option<u64>,        // Cache duration
470    pub method: Option<HttpMethod>,        // HTTP method (GET, POST, etc.)
471    pub body: Option<String>,              // Request body for POST/PUT
472    pub json_path: Option<String>, // JSON path to extract (e.g., "Result" for {Result: [...]})
473    pub form_files: Vec<(String, String)>, // Multipart form files: (field_name, file_path)
474    pub form_fields: Vec<(String, String)>, // Multipart form fields: (field_name, value)
475    pub template_vars: Vec<TemplateVar>, // Template variables for injection from temp tables
476}
477
478/// Template variable for injecting temp table data into WEB CTEs
479#[derive(Debug, Clone)]
480pub struct TemplateVar {
481    pub placeholder: String,    // e.g., "${#instruments}"
482    pub table_name: String,     // e.g., "#instruments"
483    pub column: Option<String>, // e.g., Some("symbol") for ${#instruments.symbol}
484    pub index: Option<usize>,   // e.g., Some(0) for ${#instruments[0]}
485}
486
487/// HTTP methods for WEB CTEs
488#[derive(Debug, Clone)]
489pub enum HttpMethod {
490    GET,
491    POST,
492    PUT,
493    DELETE,
494    PATCH,
495}
496
497/// Data format for WEB CTEs
498#[derive(Debug, Clone)]
499pub enum DataFormat {
500    CSV,
501    JSON,
502    Auto, // Auto-detect from Content-Type or extension
503}
504
505/// Table source - either a file/table name or a derived table (subquery/CTE)
506#[derive(Debug, Clone)]
507pub enum TableSource {
508    Table(String), // Regular table from CSV/JSON
509    DerivedTable {
510        // Both CTE and subquery
511        query: Box<SelectStatement>,
512        alias: String, // Required alias for subqueries
513    },
514}
515
516/// Join type enumeration
517#[derive(Debug, Clone, PartialEq)]
518pub enum JoinType {
519    Inner,
520    Left,
521    Right,
522    Full,
523    Cross,
524}
525
526/// Join operator for join conditions
527#[derive(Debug, Clone, PartialEq)]
528pub enum JoinOperator {
529    Equal,
530    NotEqual,
531    LessThan,
532    GreaterThan,
533    LessThanOrEqual,
534    GreaterThanOrEqual,
535}
536
537/// Single join condition
538#[derive(Debug, Clone)]
539pub struct SingleJoinCondition {
540    pub left_expr: SqlExpression, // Expression from left table (can be column, function call, etc.)
541    pub operator: JoinOperator,   // Join operator
542    pub right_expr: SqlExpression, // Expression from right table (can be column, function call, etc.)
543}
544
545/// Join condition - can be multiple conditions connected by AND
546#[derive(Debug, Clone)]
547pub struct JoinCondition {
548    pub conditions: Vec<SingleJoinCondition>, // Multiple conditions connected by AND
549}
550
551/// Join clause structure
552#[derive(Debug, Clone)]
553pub struct JoinClause {
554    pub join_type: JoinType,
555    pub table: TableSource,       // The table being joined
556    pub alias: Option<String>,    // Optional alias for the joined table
557    pub condition: JoinCondition, // ON condition(s)
558}