Skip to main content

sql_cli/sql/parser/
ast.rs

1//! Abstract Syntax Tree (AST) definitions for SQL queries
2//!
3//! This module contains all the data structures that represent
4//! the parsed SQL query structure.
5
6// ===== Comment Types =====
7
8/// Represents a SQL comment (line or block)
9#[derive(Debug, Clone, PartialEq)]
10pub struct Comment {
11    /// The comment text (without delimiters like -- or /* */)
12    pub text: String,
13    /// True for line comments (--), false for block comments (/* */)
14    pub is_line_comment: bool,
15}
16
17impl Comment {
18    /// Create a new line comment
19    pub fn line(text: String) -> Self {
20        Self {
21            text,
22            is_line_comment: true,
23        }
24    }
25
26    /// Create a new block comment
27    pub fn block(text: String) -> Self {
28        Self {
29            text,
30            is_line_comment: false,
31        }
32    }
33}
34
35// ===== Expression Types =====
36
37/// Quote style for identifiers (column names, table names, etc.)
38#[derive(Debug, Clone, PartialEq, Eq, Hash)]
39pub enum QuoteStyle {
40    /// No quotes needed (valid unquoted identifier)
41    None,
42    /// Double quotes: "Customer Id"
43    DoubleQuotes,
44    /// SQL Server style brackets: [Customer Id]
45    Brackets,
46}
47
48/// Column reference with optional quoting information and table prefix
49#[derive(Debug, Clone, PartialEq, Eq, Hash)]
50pub struct ColumnRef {
51    pub name: String,
52    pub quote_style: QuoteStyle,
53    /// Optional table/alias prefix (e.g., "messages" in "messages.field_name")
54    pub table_prefix: Option<String>,
55}
56
57impl ColumnRef {
58    /// Create an unquoted column reference
59    pub fn unquoted(name: String) -> Self {
60        Self {
61            name,
62            quote_style: QuoteStyle::None,
63            table_prefix: None,
64        }
65    }
66
67    /// Create a double-quoted column reference
68    pub fn quoted(name: String) -> Self {
69        Self {
70            name,
71            quote_style: QuoteStyle::DoubleQuotes,
72            table_prefix: None,
73        }
74    }
75
76    /// Create a qualified column reference (table.column)
77    pub fn qualified(table: String, name: String) -> Self {
78        Self {
79            name,
80            quote_style: QuoteStyle::None,
81            table_prefix: Some(table),
82        }
83    }
84
85    /// Get the full qualified string representation
86    pub fn to_qualified_string(&self) -> String {
87        match &self.table_prefix {
88            Some(table) => format!("{}.{}", table, self.name),
89            None => self.name.clone(),
90        }
91    }
92
93    /// Create a bracket-quoted column reference
94    pub fn bracketed(name: String) -> Self {
95        Self {
96            name,
97            quote_style: QuoteStyle::Brackets,
98            table_prefix: None,
99        }
100    }
101
102    /// Format the column reference with appropriate quoting
103    pub fn to_sql(&self) -> String {
104        let column_part = match self.quote_style {
105            QuoteStyle::None => self.name.clone(),
106            QuoteStyle::DoubleQuotes => format!("\"{}\"", self.name),
107            QuoteStyle::Brackets => format!("[{}]", self.name),
108        };
109
110        match &self.table_prefix {
111            Some(table) => format!("{}.{}", table, column_part),
112            None => column_part,
113        }
114    }
115}
116
117impl PartialEq<str> for ColumnRef {
118    fn eq(&self, other: &str) -> bool {
119        self.name == other
120    }
121}
122
123impl PartialEq<&str> for ColumnRef {
124    fn eq(&self, other: &&str) -> bool {
125        self.name == *other
126    }
127}
128
129impl std::fmt::Display for ColumnRef {
130    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
131        write!(f, "{}", self.to_sql())
132    }
133}
134
135#[derive(Debug, Clone)]
136pub enum SqlExpression {
137    Column(ColumnRef),
138    StringLiteral(String),
139    NumberLiteral(String),
140    BooleanLiteral(bool),
141    Null, // NULL literal
142    DateTimeConstructor {
143        year: i32,
144        month: u32,
145        day: u32,
146        hour: Option<u32>,
147        minute: Option<u32>,
148        second: Option<u32>,
149    },
150    DateTimeToday {
151        hour: Option<u32>,
152        minute: Option<u32>,
153        second: Option<u32>,
154    },
155    MethodCall {
156        object: String,
157        method: String,
158        args: Vec<SqlExpression>,
159    },
160    ChainedMethodCall {
161        base: Box<SqlExpression>,
162        method: String,
163        args: Vec<SqlExpression>,
164    },
165    FunctionCall {
166        name: String,
167        args: Vec<SqlExpression>,
168        distinct: bool, // For COUNT(DISTINCT col), SUM(DISTINCT col), etc.
169    },
170    WindowFunction {
171        name: String,
172        args: Vec<SqlExpression>,
173        window_spec: WindowSpec,
174    },
175    BinaryOp {
176        left: Box<SqlExpression>,
177        op: String,
178        right: Box<SqlExpression>,
179    },
180    InList {
181        expr: Box<SqlExpression>,
182        values: Vec<SqlExpression>,
183    },
184    NotInList {
185        expr: Box<SqlExpression>,
186        values: Vec<SqlExpression>,
187    },
188    Between {
189        expr: Box<SqlExpression>,
190        lower: Box<SqlExpression>,
191        upper: Box<SqlExpression>,
192    },
193    Not {
194        expr: Box<SqlExpression>,
195    },
196    CaseExpression {
197        when_branches: Vec<WhenBranch>,
198        else_branch: Option<Box<SqlExpression>>,
199    },
200    SimpleCaseExpression {
201        expr: Box<SqlExpression>,
202        when_branches: Vec<SimpleWhenBranch>,
203        else_branch: Option<Box<SqlExpression>>,
204    },
205    /// Scalar subquery that returns a single value
206    /// Used in expressions like: WHERE col = (SELECT MAX(id) FROM table)
207    ScalarSubquery {
208        query: Box<SelectStatement>,
209    },
210    /// IN subquery that returns multiple values
211    /// Used in expressions like: WHERE col IN (SELECT id FROM table WHERE ...)
212    InSubquery {
213        expr: Box<SqlExpression>,
214        subquery: Box<SelectStatement>,
215    },
216    /// UNNEST - Row expansion function that splits delimited strings
217    /// Used like: SELECT UNNEST(accounts, '|') AS account FROM fix_trades
218    /// Causes row multiplication - one input row becomes N output rows
219    Unnest {
220        column: Box<SqlExpression>,
221        delimiter: String,
222    },
223    /// NOT IN subquery
224    /// Used in expressions like: WHERE col NOT IN (SELECT id FROM table WHERE ...)
225    NotInSubquery {
226        expr: Box<SqlExpression>,
227        subquery: Box<SelectStatement>,
228    },
229    /// Tuple IN subquery — matches multiple columns at once
230    /// Used in expressions like: WHERE (a, b) IN (SELECT x, y FROM table WHERE ...)
231    /// The subquery must return the same number of columns as the LHS tuple.
232    InSubqueryTuple {
233        exprs: Vec<SqlExpression>,
234        subquery: Box<SelectStatement>,
235    },
236    /// Tuple NOT IN subquery
237    /// Used in expressions like: WHERE (a, b) NOT IN (SELECT x, y FROM table WHERE ...)
238    NotInSubqueryTuple {
239        exprs: Vec<SqlExpression>,
240        subquery: Box<SelectStatement>,
241    },
242}
243
244#[derive(Debug, Clone)]
245pub struct WhenBranch {
246    pub condition: Box<SqlExpression>,
247    pub result: Box<SqlExpression>,
248}
249
250#[derive(Debug, Clone)]
251pub struct SimpleWhenBranch {
252    pub value: Box<SqlExpression>,
253    pub result: Box<SqlExpression>,
254}
255
256// ===== WHERE Clause Types =====
257
258#[derive(Debug, Clone)]
259pub struct WhereClause {
260    pub conditions: Vec<Condition>,
261}
262
263#[derive(Debug, Clone)]
264pub struct Condition {
265    pub expr: SqlExpression,
266    pub connector: Option<LogicalOp>, // AND/OR connecting to next condition
267}
268
269#[derive(Debug, Clone)]
270pub enum LogicalOp {
271    And,
272    Or,
273}
274
275// ===== ORDER BY Types =====
276
277#[derive(Debug, Clone, PartialEq)]
278pub enum SortDirection {
279    Asc,
280    Desc,
281}
282
283impl SortDirection {
284    pub fn as_u8(&self) -> u8 {
285        match self {
286            SortDirection::Asc => 0,
287            SortDirection::Desc => 1,
288        }
289    }
290}
291
292/// Legacy structure - kept for backward compatibility
293/// New code should use OrderByItem
294#[derive(Debug, Clone)]
295pub struct OrderByColumn {
296    pub column: String,
297    pub direction: SortDirection,
298}
299
300/// Modern ORDER BY item that supports expressions
301#[derive(Debug, Clone)]
302pub struct OrderByItem {
303    pub expr: SqlExpression,
304    pub direction: SortDirection,
305}
306
307impl OrderByItem {
308    /// Create from a simple column name (for backward compatibility)
309    pub fn from_column_name(name: String, direction: SortDirection) -> Self {
310        Self {
311            expr: SqlExpression::Column(ColumnRef {
312                name,
313                quote_style: QuoteStyle::None,
314                table_prefix: None,
315            }),
316            direction,
317        }
318    }
319
320    /// Create from an expression
321    pub fn from_expression(expr: SqlExpression, direction: SortDirection) -> Self {
322        Self { expr, direction }
323    }
324}
325
326// ===== Window Function Types =====
327
328/// Window frame bounds
329#[derive(Debug, Clone, PartialEq)]
330pub enum FrameBound {
331    UnboundedPreceding,
332    CurrentRow,
333    Preceding(i64),
334    Following(i64),
335    UnboundedFollowing,
336}
337
338/// Window frame unit (ROWS or RANGE)
339#[derive(Debug, Clone, Copy, PartialEq)]
340pub enum FrameUnit {
341    Rows,
342    Range,
343}
344
345impl FrameUnit {
346    pub fn as_u8(&self) -> u8 {
347        match self {
348            FrameUnit::Rows => 0,
349            FrameUnit::Range => 1,
350        }
351    }
352}
353
354/// Window frame specification
355#[derive(Debug, Clone)]
356pub struct WindowFrame {
357    pub unit: FrameUnit,
358    pub start: FrameBound,
359    pub end: Option<FrameBound>, // None means CURRENT ROW
360}
361
362#[derive(Debug, Clone)]
363pub struct WindowSpec {
364    pub partition_by: Vec<String>,
365    pub order_by: Vec<OrderByItem>,
366    pub frame: Option<WindowFrame>, // Optional window frame
367}
368
369impl WindowSpec {
370    /// Compute a fast hash for cache key purposes
371    /// Much faster than format!("{:?}", spec) used previously
372    pub fn compute_hash(&self) -> u64 {
373        use std::collections::hash_map::DefaultHasher;
374        use std::hash::{Hash, Hasher};
375
376        let mut hasher = DefaultHasher::new();
377
378        // Hash partition_by columns
379        for col in &self.partition_by {
380            col.hash(&mut hasher);
381        }
382
383        // Hash order_by items (just the column names for simplicity)
384        for item in &self.order_by {
385            // For ORDER BY, we typically just have column references
386            // Hash a string representation for simplicity
387            format!("{:?}", item.expr).hash(&mut hasher);
388            item.direction.as_u8().hash(&mut hasher);
389        }
390
391        // Hash frame specification
392        if let Some(ref frame) = self.frame {
393            frame.unit.as_u8().hash(&mut hasher);
394            format!("{:?}", frame.start).hash(&mut hasher);
395            if let Some(ref end) = frame.end {
396                format!("{:?}", end).hash(&mut hasher);
397            }
398        }
399
400        hasher.finish()
401    }
402}
403
404// ===== SELECT Statement Types =====
405
406/// Set operation type for combining SELECT statements
407#[derive(Debug, Clone, PartialEq)]
408pub enum SetOperation {
409    /// UNION ALL - combines results without deduplication
410    UnionAll,
411    /// UNION - combines results with deduplication (not yet implemented)
412    Union,
413    /// INTERSECT - returns common rows (not yet implemented)
414    Intersect,
415    /// EXCEPT - returns rows from left not in right (not yet implemented)
416    Except,
417}
418
419/// Represents a SELECT item - either a simple column or a computed expression with alias
420#[derive(Debug, Clone)]
421pub enum SelectItem {
422    /// Simple column reference: "`column_name`"
423    Column {
424        column: ColumnRef,
425        leading_comments: Vec<Comment>,
426        trailing_comment: Option<Comment>,
427    },
428    /// Computed expression with alias: "expr AS alias"
429    Expression {
430        expr: SqlExpression,
431        alias: String,
432        leading_comments: Vec<Comment>,
433        trailing_comment: Option<Comment>,
434    },
435    /// Star selector: "*" or "table.*"
436    Star {
437        table_prefix: Option<String>, // e.g., Some("p") for "p.*"
438        leading_comments: Vec<Comment>,
439        trailing_comment: Option<Comment>,
440    },
441    /// Star with EXCLUDE: "* EXCLUDE (col1, col2)"
442    StarExclude {
443        table_prefix: Option<String>,
444        excluded_columns: Vec<String>,
445        leading_comments: Vec<Comment>,
446        trailing_comment: Option<Comment>,
447    },
448}
449
450#[derive(Debug, Clone)]
451pub struct SelectStatement {
452    pub distinct: bool,                // SELECT DISTINCT flag
453    pub columns: Vec<String>,          // Keep for backward compatibility, will be deprecated
454    pub select_items: Vec<SelectItem>, // New field for computed expressions
455
456    // Modern unified FROM source (preferred)
457    pub from_source: Option<TableSource>, // Unified FROM source (table, subquery, function, PIVOT, etc.)
458
459    // Legacy FROM fields (deprecated but kept for compatibility during migration)
460    #[deprecated(note = "Use from_source instead")]
461    pub from_table: Option<String>,
462    #[deprecated(note = "Use from_source instead")]
463    pub from_subquery: Option<Box<SelectStatement>>, // Subquery in FROM clause
464    #[deprecated(note = "Use from_source instead")]
465    pub from_function: Option<TableFunction>, // Table function like RANGE() in FROM clause
466    #[deprecated(note = "Use from_source instead")]
467    pub from_alias: Option<String>, // Alias for subquery (AS name)
468
469    pub joins: Vec<JoinClause>, // JOIN clauses
470    pub where_clause: Option<WhereClause>,
471    pub order_by: Option<Vec<OrderByItem>>, // Supports expressions: columns, aggregates, CASE, etc.
472    pub group_by: Option<Vec<SqlExpression>>, // Changed from Vec<String> to support expressions
473    pub having: Option<SqlExpression>,      // HAVING clause for post-aggregation filtering
474    pub qualify: Option<SqlExpression>, // QUALIFY clause for window function filtering (Snowflake-style)
475    pub limit: Option<usize>,
476    pub offset: Option<usize>,
477    pub ctes: Vec<CTE>,                // Common Table Expressions (WITH clause)
478    pub into_table: Option<IntoTable>, // INTO clause for temporary tables
479    pub set_operations: Vec<(SetOperation, Box<SelectStatement>)>, // UNION/INTERSECT/EXCEPT operations
480
481    // Comment preservation
482    pub leading_comments: Vec<Comment>, // Comments before the SELECT keyword
483    pub trailing_comment: Option<Comment>, // Trailing comment at end of statement
484}
485
486impl Default for SelectStatement {
487    fn default() -> Self {
488        SelectStatement {
489            distinct: false,
490            columns: Vec::new(),
491            select_items: Vec::new(),
492            from_source: None,
493            #[allow(deprecated)]
494            from_table: None,
495            #[allow(deprecated)]
496            from_subquery: None,
497            #[allow(deprecated)]
498            from_function: None,
499            #[allow(deprecated)]
500            from_alias: None,
501            joins: Vec::new(),
502            where_clause: None,
503            order_by: None,
504            group_by: None,
505            having: None,
506            qualify: None,
507            limit: None,
508            offset: None,
509            ctes: Vec::new(),
510            into_table: None,
511            set_operations: Vec::new(),
512            leading_comments: Vec::new(),
513            trailing_comment: None,
514        }
515    }
516}
517
518/// INTO clause for creating temporary tables
519#[derive(Debug, Clone, PartialEq)]
520pub struct IntoTable {
521    /// Name of the temporary table (must start with #)
522    pub name: String,
523}
524
525// ===== Table and Join Types =====
526
527/// Table function that generates virtual tables
528#[derive(Debug, Clone)]
529pub enum TableFunction {
530    Generator {
531        name: String,
532        args: Vec<SqlExpression>,
533    },
534}
535
536/// Common Table Expression (CTE) structure
537#[derive(Debug, Clone)]
538pub struct CTE {
539    pub name: String,
540    pub column_list: Option<Vec<String>>, // Optional column list: WITH t(col1, col2) AS ...
541    pub cte_type: CTEType,
542}
543
544/// Type of CTE - standard SQL, WEB fetch, or FILE (filesystem metadata)
545#[derive(Debug, Clone)]
546pub enum CTEType {
547    Standard(SelectStatement),
548    Web(WebCTESpec),
549    File(FileCTESpec),
550}
551
552/// Specification for WEB CTEs
553#[derive(Debug, Clone)]
554pub struct WebCTESpec {
555    pub url: String,
556    pub format: Option<DataFormat>,        // CSV, JSON, or auto-detect
557    pub headers: Vec<(String, String)>,    // HTTP headers
558    pub cache_seconds: Option<u64>,        // Cache duration
559    pub method: Option<HttpMethod>,        // HTTP method (GET, POST, etc.)
560    pub body: Option<String>,              // Request body for POST/PUT
561    pub json_path: Option<String>, // JSON path to extract (e.g., "Result" for {Result: [...]})
562    pub form_files: Vec<(String, String)>, // Multipart form files: (field_name, file_path)
563    pub form_fields: Vec<(String, String)>, // Multipart form fields: (field_name, value)
564    pub template_vars: Vec<TemplateVar>, // Template variables for injection from temp tables
565}
566
567/// Template variable for injecting temp table data into WEB CTEs
568#[derive(Debug, Clone)]
569pub struct TemplateVar {
570    pub placeholder: String,    // e.g., "${#instruments}"
571    pub table_name: String,     // e.g., "#instruments"
572    pub column: Option<String>, // e.g., Some("symbol") for ${#instruments.symbol}
573    pub index: Option<usize>,   // e.g., Some(0) for ${#instruments[0]}
574}
575
576/// HTTP methods for WEB CTEs
577#[derive(Debug, Clone)]
578pub enum HttpMethod {
579    GET,
580    POST,
581    PUT,
582    DELETE,
583    PATCH,
584}
585
586/// Data format for WEB CTEs
587#[derive(Debug, Clone)]
588pub enum DataFormat {
589    CSV,
590    JSON,
591    Auto, // Auto-detect from Content-Type or extension
592}
593
594/// Specification for FILE CTEs — enumerate filesystem metadata as a virtual table.
595///
596/// Produces one row per matched filesystem entry with metadata columns (path, size,
597/// mtime, etc). Phase 1 is metadata-only: contents are not read.
598///
599/// See `docs/FILE_CTE_DESIGN.md` for design rationale and hazards.
600#[derive(Debug, Clone)]
601pub struct FileCTESpec {
602    /// Root path to walk. Relative paths resolved against CWD.
603    pub path: String,
604    /// If true, recursively walk subdirectories. Otherwise single-directory listing.
605    pub recursive: bool,
606    /// Optional glob pattern applied at walker level (e.g. "*.csv").
607    pub glob: Option<String>,
608    /// Maximum walk depth. None = unlimited (only meaningful if `recursive`).
609    pub max_depth: Option<usize>,
610    /// Hard cap on number of rows produced. None = use config default (500k).
611    /// Walker fails loud when exceeded — never silently truncates.
612    pub max_files: Option<usize>,
613    /// If true, follow symlinks. Default false (avoids cycles).
614    pub follow_links: bool,
615    /// If true, include dotfiles. Default false.
616    pub include_hidden: bool,
617}
618
619/// PIVOT aggregate specification
620/// Example: MAX(AmountEaten)
621#[derive(Debug, Clone)]
622pub struct PivotAggregate {
623    pub function: String, // e.g., "MAX", "SUM", "MIN", "AVG", "COUNT"
624    pub column: String,   // e.g., "AmountEaten"
625}
626
627/// Table source - either a file/table name or a derived table (subquery/CTE)
628#[derive(Debug, Clone)]
629pub enum TableSource {
630    Table(String), // Regular table from CSV/JSON
631    DerivedTable {
632        // Both CTE and subquery
633        query: Box<SelectStatement>,
634        alias: String, // Required alias for subqueries
635    },
636    /// PIVOT operation - transforms rows into columns
637    /// Example: PIVOT (MAX(AmountEaten) FOR FoodName IN ('Sammich', 'Pickle'))
638    Pivot {
639        source: Box<TableSource>,  // The input table/subquery to pivot
640        aggregate: PivotAggregate, // The aggregate function to apply
641        pivot_column: String,      // Column whose values become new columns
642        pivot_values: Vec<String>, // Specific values to pivot (becomes column names)
643        alias: Option<String>,     // Optional alias for the pivoted result
644    },
645}
646
647/// Join type enumeration
648#[derive(Debug, Clone, PartialEq)]
649pub enum JoinType {
650    Inner,
651    Left,
652    Right,
653    Full,
654    Cross,
655}
656
657/// Join operator for join conditions
658#[derive(Debug, Clone, PartialEq)]
659pub enum JoinOperator {
660    Equal,
661    NotEqual,
662    LessThan,
663    GreaterThan,
664    LessThanOrEqual,
665    GreaterThanOrEqual,
666}
667
668/// Single join condition
669#[derive(Debug, Clone)]
670pub struct SingleJoinCondition {
671    pub left_expr: SqlExpression, // Expression from left table (can be column, function call, etc.)
672    pub operator: JoinOperator,   // Join operator
673    pub right_expr: SqlExpression, // Expression from right table (can be column, function call, etc.)
674}
675
676/// Join condition - can be multiple conditions connected by AND
677#[derive(Debug, Clone)]
678pub struct JoinCondition {
679    pub conditions: Vec<SingleJoinCondition>, // Multiple conditions connected by AND
680}
681
682/// Join clause structure
683#[derive(Debug, Clone)]
684pub struct JoinClause {
685    pub join_type: JoinType,
686    pub table: TableSource,       // The table being joined
687    pub alias: Option<String>,    // Optional alias for the joined table
688    pub condition: JoinCondition, // ON condition(s)
689}