sql_cli/sql/parser/
ast.rs

1//! Abstract Syntax Tree (AST) definitions for SQL queries
2//!
3//! This module contains all the data structures that represent
4//! the parsed SQL query structure.
5
6// ===== Comment Types =====
7
8/// Represents a SQL comment (line or block)
9#[derive(Debug, Clone, PartialEq)]
10pub struct Comment {
11    /// The comment text (without delimiters like -- or /* */)
12    pub text: String,
13    /// True for line comments (--), false for block comments (/* */)
14    pub is_line_comment: bool,
15}
16
17impl Comment {
18    /// Create a new line comment
19    pub fn line(text: String) -> Self {
20        Self {
21            text,
22            is_line_comment: true,
23        }
24    }
25
26    /// Create a new block comment
27    pub fn block(text: String) -> Self {
28        Self {
29            text,
30            is_line_comment: false,
31        }
32    }
33}
34
35// ===== Expression Types =====
36
37/// Quote style for identifiers (column names, table names, etc.)
38#[derive(Debug, Clone, PartialEq, Eq, Hash)]
39pub enum QuoteStyle {
40    /// No quotes needed (valid unquoted identifier)
41    None,
42    /// Double quotes: "Customer Id"
43    DoubleQuotes,
44    /// SQL Server style brackets: [Customer Id]
45    Brackets,
46}
47
48/// Column reference with optional quoting information and table prefix
49#[derive(Debug, Clone, PartialEq, Eq, Hash)]
50pub struct ColumnRef {
51    pub name: String,
52    pub quote_style: QuoteStyle,
53    /// Optional table/alias prefix (e.g., "messages" in "messages.field_name")
54    pub table_prefix: Option<String>,
55}
56
57impl ColumnRef {
58    /// Create an unquoted column reference
59    pub fn unquoted(name: String) -> Self {
60        Self {
61            name,
62            quote_style: QuoteStyle::None,
63            table_prefix: None,
64        }
65    }
66
67    /// Create a double-quoted column reference
68    pub fn quoted(name: String) -> Self {
69        Self {
70            name,
71            quote_style: QuoteStyle::DoubleQuotes,
72            table_prefix: None,
73        }
74    }
75
76    /// Create a qualified column reference (table.column)
77    pub fn qualified(table: String, name: String) -> Self {
78        Self {
79            name,
80            quote_style: QuoteStyle::None,
81            table_prefix: Some(table),
82        }
83    }
84
85    /// Get the full qualified string representation
86    pub fn to_qualified_string(&self) -> String {
87        match &self.table_prefix {
88            Some(table) => format!("{}.{}", table, self.name),
89            None => self.name.clone(),
90        }
91    }
92
93    /// Create a bracket-quoted column reference
94    pub fn bracketed(name: String) -> Self {
95        Self {
96            name,
97            quote_style: QuoteStyle::Brackets,
98            table_prefix: None,
99        }
100    }
101
102    /// Format the column reference with appropriate quoting
103    pub fn to_sql(&self) -> String {
104        let column_part = match self.quote_style {
105            QuoteStyle::None => self.name.clone(),
106            QuoteStyle::DoubleQuotes => format!("\"{}\"", self.name),
107            QuoteStyle::Brackets => format!("[{}]", self.name),
108        };
109
110        match &self.table_prefix {
111            Some(table) => format!("{}.{}", table, column_part),
112            None => column_part,
113        }
114    }
115}
116
117impl PartialEq<str> for ColumnRef {
118    fn eq(&self, other: &str) -> bool {
119        self.name == other
120    }
121}
122
123impl PartialEq<&str> for ColumnRef {
124    fn eq(&self, other: &&str) -> bool {
125        self.name == *other
126    }
127}
128
129impl std::fmt::Display for ColumnRef {
130    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
131        write!(f, "{}", self.to_sql())
132    }
133}
134
135#[derive(Debug, Clone)]
136pub enum SqlExpression {
137    Column(ColumnRef),
138    StringLiteral(String),
139    NumberLiteral(String),
140    BooleanLiteral(bool),
141    Null, // NULL literal
142    DateTimeConstructor {
143        year: i32,
144        month: u32,
145        day: u32,
146        hour: Option<u32>,
147        minute: Option<u32>,
148        second: Option<u32>,
149    },
150    DateTimeToday {
151        hour: Option<u32>,
152        minute: Option<u32>,
153        second: Option<u32>,
154    },
155    MethodCall {
156        object: String,
157        method: String,
158        args: Vec<SqlExpression>,
159    },
160    ChainedMethodCall {
161        base: Box<SqlExpression>,
162        method: String,
163        args: Vec<SqlExpression>,
164    },
165    FunctionCall {
166        name: String,
167        args: Vec<SqlExpression>,
168        distinct: bool, // For COUNT(DISTINCT col), SUM(DISTINCT col), etc.
169    },
170    WindowFunction {
171        name: String,
172        args: Vec<SqlExpression>,
173        window_spec: WindowSpec,
174    },
175    BinaryOp {
176        left: Box<SqlExpression>,
177        op: String,
178        right: Box<SqlExpression>,
179    },
180    InList {
181        expr: Box<SqlExpression>,
182        values: Vec<SqlExpression>,
183    },
184    NotInList {
185        expr: Box<SqlExpression>,
186        values: Vec<SqlExpression>,
187    },
188    Between {
189        expr: Box<SqlExpression>,
190        lower: Box<SqlExpression>,
191        upper: Box<SqlExpression>,
192    },
193    Not {
194        expr: Box<SqlExpression>,
195    },
196    CaseExpression {
197        when_branches: Vec<WhenBranch>,
198        else_branch: Option<Box<SqlExpression>>,
199    },
200    SimpleCaseExpression {
201        expr: Box<SqlExpression>,
202        when_branches: Vec<SimpleWhenBranch>,
203        else_branch: Option<Box<SqlExpression>>,
204    },
205    /// Scalar subquery that returns a single value
206    /// Used in expressions like: WHERE col = (SELECT MAX(id) FROM table)
207    ScalarSubquery {
208        query: Box<SelectStatement>,
209    },
210    /// IN subquery that returns multiple values
211    /// Used in expressions like: WHERE col IN (SELECT id FROM table WHERE ...)
212    InSubquery {
213        expr: Box<SqlExpression>,
214        subquery: Box<SelectStatement>,
215    },
216    /// UNNEST - Row expansion function that splits delimited strings
217    /// Used like: SELECT UNNEST(accounts, '|') AS account FROM fix_trades
218    /// Causes row multiplication - one input row becomes N output rows
219    Unnest {
220        column: Box<SqlExpression>,
221        delimiter: String,
222    },
223    /// NOT IN subquery
224    /// Used in expressions like: WHERE col NOT IN (SELECT id FROM table WHERE ...)
225    NotInSubquery {
226        expr: Box<SqlExpression>,
227        subquery: Box<SelectStatement>,
228    },
229}
230
231#[derive(Debug, Clone)]
232pub struct WhenBranch {
233    pub condition: Box<SqlExpression>,
234    pub result: Box<SqlExpression>,
235}
236
237#[derive(Debug, Clone)]
238pub struct SimpleWhenBranch {
239    pub value: Box<SqlExpression>,
240    pub result: Box<SqlExpression>,
241}
242
243// ===== WHERE Clause Types =====
244
245#[derive(Debug, Clone)]
246pub struct WhereClause {
247    pub conditions: Vec<Condition>,
248}
249
250#[derive(Debug, Clone)]
251pub struct Condition {
252    pub expr: SqlExpression,
253    pub connector: Option<LogicalOp>, // AND/OR connecting to next condition
254}
255
256#[derive(Debug, Clone)]
257pub enum LogicalOp {
258    And,
259    Or,
260}
261
262// ===== ORDER BY Types =====
263
264#[derive(Debug, Clone, PartialEq)]
265pub enum SortDirection {
266    Asc,
267    Desc,
268}
269
270#[derive(Debug, Clone)]
271pub struct OrderByColumn {
272    pub column: String,
273    pub direction: SortDirection,
274}
275
276// ===== Window Function Types =====
277
278/// Window frame bounds
279#[derive(Debug, Clone, PartialEq)]
280pub enum FrameBound {
281    UnboundedPreceding,
282    CurrentRow,
283    Preceding(i64),
284    Following(i64),
285    UnboundedFollowing,
286}
287
288/// Window frame unit (ROWS or RANGE)
289#[derive(Debug, Clone, PartialEq)]
290pub enum FrameUnit {
291    Rows,
292    Range,
293}
294
295/// Window frame specification
296#[derive(Debug, Clone)]
297pub struct WindowFrame {
298    pub unit: FrameUnit,
299    pub start: FrameBound,
300    pub end: Option<FrameBound>, // None means CURRENT ROW
301}
302
303#[derive(Debug, Clone)]
304pub struct WindowSpec {
305    pub partition_by: Vec<String>,
306    pub order_by: Vec<OrderByColumn>,
307    pub frame: Option<WindowFrame>, // Optional window frame
308}
309
310// ===== SELECT Statement Types =====
311
312/// Set operation type for combining SELECT statements
313#[derive(Debug, Clone, PartialEq)]
314pub enum SetOperation {
315    /// UNION ALL - combines results without deduplication
316    UnionAll,
317    /// UNION - combines results with deduplication (not yet implemented)
318    Union,
319    /// INTERSECT - returns common rows (not yet implemented)
320    Intersect,
321    /// EXCEPT - returns rows from left not in right (not yet implemented)
322    Except,
323}
324
325/// Represents a SELECT item - either a simple column or a computed expression with alias
326#[derive(Debug, Clone)]
327pub enum SelectItem {
328    /// Simple column reference: "`column_name`"
329    Column {
330        column: ColumnRef,
331        leading_comments: Vec<Comment>,
332        trailing_comment: Option<Comment>,
333    },
334    /// Computed expression with alias: "expr AS alias"
335    Expression {
336        expr: SqlExpression,
337        alias: String,
338        leading_comments: Vec<Comment>,
339        trailing_comment: Option<Comment>,
340    },
341    /// Star selector: "*"
342    Star {
343        leading_comments: Vec<Comment>,
344        trailing_comment: Option<Comment>,
345    },
346}
347
348#[derive(Debug, Clone)]
349pub struct SelectStatement {
350    pub distinct: bool,                // SELECT DISTINCT flag
351    pub columns: Vec<String>,          // Keep for backward compatibility, will be deprecated
352    pub select_items: Vec<SelectItem>, // New field for computed expressions
353    pub from_table: Option<String>,
354    pub from_subquery: Option<Box<SelectStatement>>, // Subquery in FROM clause
355    pub from_function: Option<TableFunction>,        // Table function like RANGE() in FROM clause
356    pub from_alias: Option<String>,                  // Alias for subquery (AS name)
357    pub joins: Vec<JoinClause>,                      // JOIN clauses
358    pub where_clause: Option<WhereClause>,
359    pub order_by: Option<Vec<OrderByColumn>>,
360    pub group_by: Option<Vec<SqlExpression>>, // Changed from Vec<String> to support expressions
361    pub having: Option<SqlExpression>,        // HAVING clause for post-aggregation filtering
362    pub limit: Option<usize>,
363    pub offset: Option<usize>,
364    pub ctes: Vec<CTE>,                // Common Table Expressions (WITH clause)
365    pub into_table: Option<IntoTable>, // INTO clause for temporary tables
366    pub set_operations: Vec<(SetOperation, Box<SelectStatement>)>, // UNION/INTERSECT/EXCEPT operations
367
368    // Comment preservation
369    pub leading_comments: Vec<Comment>, // Comments before the SELECT keyword
370    pub trailing_comment: Option<Comment>, // Trailing comment at end of statement
371}
372
373/// INTO clause for creating temporary tables
374#[derive(Debug, Clone, PartialEq)]
375pub struct IntoTable {
376    /// Name of the temporary table (must start with #)
377    pub name: String,
378}
379
380// ===== Table and Join Types =====
381
382/// Table function that generates virtual tables
383#[derive(Debug, Clone)]
384pub enum TableFunction {
385    Generator {
386        name: String,
387        args: Vec<SqlExpression>,
388    },
389}
390
391/// Common Table Expression (CTE) structure
392#[derive(Debug, Clone)]
393pub struct CTE {
394    pub name: String,
395    pub column_list: Option<Vec<String>>, // Optional column list: WITH t(col1, col2) AS ...
396    pub cte_type: CTEType,
397}
398
399/// Type of CTE - standard SQL or WEB fetch
400#[derive(Debug, Clone)]
401pub enum CTEType {
402    Standard(SelectStatement),
403    Web(WebCTESpec),
404}
405
406/// Specification for WEB CTEs
407#[derive(Debug, Clone)]
408pub struct WebCTESpec {
409    pub url: String,
410    pub format: Option<DataFormat>,        // CSV, JSON, or auto-detect
411    pub headers: Vec<(String, String)>,    // HTTP headers
412    pub cache_seconds: Option<u64>,        // Cache duration
413    pub method: Option<HttpMethod>,        // HTTP method (GET, POST, etc.)
414    pub body: Option<String>,              // Request body for POST/PUT
415    pub json_path: Option<String>, // JSON path to extract (e.g., "Result" for {Result: [...]})
416    pub form_files: Vec<(String, String)>, // Multipart form files: (field_name, file_path)
417    pub form_fields: Vec<(String, String)>, // Multipart form fields: (field_name, value)
418    pub template_vars: Vec<TemplateVar>, // Template variables for injection from temp tables
419}
420
421/// Template variable for injecting temp table data into WEB CTEs
422#[derive(Debug, Clone)]
423pub struct TemplateVar {
424    pub placeholder: String,    // e.g., "${#instruments}"
425    pub table_name: String,     // e.g., "#instruments"
426    pub column: Option<String>, // e.g., Some("symbol") for ${#instruments.symbol}
427    pub index: Option<usize>,   // e.g., Some(0) for ${#instruments[0]}
428}
429
430/// HTTP methods for WEB CTEs
431#[derive(Debug, Clone)]
432pub enum HttpMethod {
433    GET,
434    POST,
435    PUT,
436    DELETE,
437    PATCH,
438}
439
440/// Data format for WEB CTEs
441#[derive(Debug, Clone)]
442pub enum DataFormat {
443    CSV,
444    JSON,
445    Auto, // Auto-detect from Content-Type or extension
446}
447
448/// Table source - either a file/table name or a derived table (subquery/CTE)
449#[derive(Debug, Clone)]
450pub enum TableSource {
451    Table(String), // Regular table from CSV/JSON
452    DerivedTable {
453        // Both CTE and subquery
454        query: Box<SelectStatement>,
455        alias: String, // Required alias for subqueries
456    },
457}
458
459/// Join type enumeration
460#[derive(Debug, Clone, PartialEq)]
461pub enum JoinType {
462    Inner,
463    Left,
464    Right,
465    Full,
466    Cross,
467}
468
469/// Join operator for join conditions
470#[derive(Debug, Clone, PartialEq)]
471pub enum JoinOperator {
472    Equal,
473    NotEqual,
474    LessThan,
475    GreaterThan,
476    LessThanOrEqual,
477    GreaterThanOrEqual,
478}
479
480/// Single join condition
481#[derive(Debug, Clone)]
482pub struct SingleJoinCondition {
483    pub left_column: String, // Column from left table (can include table prefix)
484    pub operator: JoinOperator, // Join operator
485    pub right_column: String, // Column from right table (can include table prefix)
486}
487
488/// Join condition - can be multiple conditions connected by AND
489#[derive(Debug, Clone)]
490pub struct JoinCondition {
491    pub conditions: Vec<SingleJoinCondition>, // Multiple conditions connected by AND
492}
493
494/// Join clause structure
495#[derive(Debug, Clone)]
496pub struct JoinClause {
497    pub join_type: JoinType,
498    pub table: TableSource,       // The table being joined
499    pub alias: Option<String>,    // Optional alias for the joined table
500    pub condition: JoinCondition, // ON condition(s)
501}