sochdb_query/sql/
ast.rs

1// Copyright 2025 Sushanth (https://github.com/sushanthpy)
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! SQL Abstract Syntax Tree
16//!
17//! Represents parsed SQL statements as a tree structure.
18
19use super::token::Span;
20
21/// Top-level SQL statement
22#[derive(Debug, Clone, PartialEq)]
23#[allow(clippy::large_enum_variant)]
24pub enum Statement {
25    Select(SelectStmt),
26    Insert(InsertStmt),
27    Update(UpdateStmt),
28    Delete(DeleteStmt),
29    CreateTable(CreateTableStmt),
30    DropTable(DropTableStmt),
31    AlterTable(AlterTableStmt),
32    CreateIndex(CreateIndexStmt),
33    DropIndex(DropIndexStmt),
34    Begin(BeginStmt),
35    Commit,
36    Rollback(Option<String>), // Optional savepoint name
37    Savepoint(String),
38    Release(String),
39    Explain(Box<Statement>),
40}
41
42/// SELECT statement
43#[derive(Debug, Clone, PartialEq)]
44pub struct SelectStmt {
45    pub span: Span,
46    pub distinct: bool,
47    pub columns: Vec<SelectItem>,
48    pub from: Option<FromClause>,
49    pub where_clause: Option<Expr>,
50    pub group_by: Vec<Expr>,
51    pub having: Option<Expr>,
52    pub order_by: Vec<OrderByItem>,
53    pub limit: Option<Expr>,
54    pub offset: Option<Expr>,
55    pub unions: Vec<(SetOp, Box<SelectStmt>)>,
56}
57
58/// Items in SELECT clause
59#[derive(Debug, Clone, PartialEq)]
60pub enum SelectItem {
61    /// SELECT *
62    Wildcard,
63    /// SELECT table.*
64    QualifiedWildcard(String),
65    /// SELECT expr [AS alias]
66    Expr { expr: Expr, alias: Option<String> },
67}
68
69/// Set operations (UNION, INTERSECT, EXCEPT)
70#[derive(Debug, Clone, Copy, PartialEq, Eq)]
71pub enum SetOp {
72    Union,
73    UnionAll,
74    Intersect,
75    IntersectAll,
76    Except,
77    ExceptAll,
78}
79
80/// FROM clause
81#[derive(Debug, Clone, PartialEq)]
82pub struct FromClause {
83    pub tables: Vec<TableRef>,
84}
85
86/// Table reference in FROM clause
87#[derive(Debug, Clone, PartialEq)]
88pub enum TableRef {
89    /// Simple table: table_name [AS alias]
90    Table {
91        name: ObjectName,
92        alias: Option<String>,
93    },
94    /// Subquery: (SELECT ...) AS alias
95    Subquery {
96        query: Box<SelectStmt>,
97        alias: String,
98    },
99    /// Join: left JOIN right ON condition
100    Join {
101        left: Box<TableRef>,
102        join_type: JoinType,
103        right: Box<TableRef>,
104        condition: Option<JoinCondition>,
105    },
106    /// Table-valued function: func(...) AS alias
107    Function {
108        name: String,
109        args: Vec<Expr>,
110        alias: Option<String>,
111    },
112}
113
114/// Join types
115#[derive(Debug, Clone, Copy, PartialEq, Eq)]
116pub enum JoinType {
117    Inner,
118    Left,
119    Right,
120    Full,
121    Cross,
122}
123
124/// Join condition
125#[derive(Debug, Clone, PartialEq)]
126pub enum JoinCondition {
127    On(Expr),
128    Using(Vec<String>),
129    Natural,
130}
131
132/// ORDER BY item
133#[derive(Debug, Clone, PartialEq)]
134pub struct OrderByItem {
135    pub expr: Expr,
136    pub asc: bool,
137    pub nulls_first: Option<bool>,
138}
139
140/// INSERT statement
141#[derive(Debug, Clone, PartialEq)]
142pub struct InsertStmt {
143    pub span: Span,
144    pub table: ObjectName,
145    pub columns: Option<Vec<String>>,
146    pub source: InsertSource,
147    pub on_conflict: Option<OnConflict>,
148    pub returning: Option<Vec<SelectItem>>,
149}
150
151/// Source of INSERT data
152#[derive(Debug, Clone, PartialEq)]
153pub enum InsertSource {
154    /// VALUES (a, b), (c, d), ...
155    Values(Vec<Vec<Expr>>),
156    /// SELECT ...
157    Query(Box<SelectStmt>),
158    /// DEFAULT VALUES
159    Default,
160}
161
162/// ON CONFLICT clause
163///
164/// Represents conflict handling for INSERT statements across SQL dialects:
165/// - PostgreSQL: `ON CONFLICT DO NOTHING/UPDATE`
166/// - MySQL: `INSERT IGNORE`, `ON DUPLICATE KEY UPDATE`
167/// - SQLite: `INSERT OR IGNORE/REPLACE/ABORT`
168///
169/// All dialects normalize to this single representation.
170#[derive(Debug, Clone, PartialEq)]
171pub struct OnConflict {
172    pub target: Option<ConflictTarget>,
173    pub action: ConflictAction,
174}
175
176#[derive(Debug, Clone, PartialEq)]
177pub enum ConflictTarget {
178    Columns(Vec<String>),
179    Constraint(String),
180}
181
182#[derive(Debug, Clone, PartialEq)]
183pub enum ConflictAction {
184    /// ON CONFLICT DO NOTHING / INSERT IGNORE / INSERT OR IGNORE
185    DoNothing,
186    /// ON CONFLICT DO UPDATE SET ... / ON DUPLICATE KEY UPDATE ...
187    DoUpdate(Vec<Assignment>),
188    /// INSERT OR REPLACE (SQLite) - replaces the entire row
189    DoReplace,
190    /// INSERT OR ABORT (SQLite) - abort on conflict (default behavior)
191    DoAbort,
192    /// INSERT OR FAIL (SQLite) - fail but continue with other rows
193    DoFail,
194}
195
196/// UPDATE statement
197#[derive(Debug, Clone, PartialEq)]
198pub struct UpdateStmt {
199    pub span: Span,
200    pub table: ObjectName,
201    pub alias: Option<String>,
202    pub assignments: Vec<Assignment>,
203    pub from: Option<FromClause>,
204    pub where_clause: Option<Expr>,
205    pub returning: Option<Vec<SelectItem>>,
206}
207
208/// Assignment: column = expr
209#[derive(Debug, Clone, PartialEq)]
210pub struct Assignment {
211    pub column: String,
212    pub value: Expr,
213}
214
215/// DELETE statement
216#[derive(Debug, Clone, PartialEq)]
217pub struct DeleteStmt {
218    pub span: Span,
219    pub table: ObjectName,
220    pub alias: Option<String>,
221    pub using: Option<FromClause>,
222    pub where_clause: Option<Expr>,
223    pub returning: Option<Vec<SelectItem>>,
224}
225
226/// CREATE TABLE statement
227#[derive(Debug, Clone, PartialEq)]
228pub struct CreateTableStmt {
229    pub span: Span,
230    pub if_not_exists: bool,
231    pub name: ObjectName,
232    pub columns: Vec<ColumnDef>,
233    pub constraints: Vec<TableConstraint>,
234    pub options: Vec<TableOption>,
235}
236
237/// Column definition
238#[derive(Debug, Clone, PartialEq)]
239pub struct ColumnDef {
240    pub name: String,
241    pub data_type: DataType,
242    pub constraints: Vec<ColumnConstraint>,
243}
244
245/// SQL Data types
246#[derive(Debug, Clone, PartialEq)]
247pub enum DataType {
248    // Numeric
249    TinyInt,
250    SmallInt,
251    Int,
252    BigInt,
253    Float,
254    Double,
255    Decimal {
256        precision: Option<u32>,
257        scale: Option<u32>,
258    },
259
260    // String
261    Char(Option<u32>),
262    Varchar(Option<u32>),
263    Text,
264
265    // Binary
266    Binary(Option<u32>),
267    Varbinary(Option<u32>),
268    Blob,
269
270    // Date/Time
271    Date,
272    Time,
273    Timestamp,
274    DateTime,
275    Interval,
276
277    // Boolean
278    Boolean,
279
280    // JSON
281    Json,
282    Jsonb,
283
284    // SochDB Extensions
285    Vector(u32),    // VECTOR(dimensions)
286    Embedding(u32), // EMBEDDING(dimensions)
287
288    // Custom/Unknown
289    Custom(String),
290}
291
292/// Column constraints
293#[derive(Debug, Clone, PartialEq)]
294pub enum ColumnConstraint {
295    NotNull,
296    Null,
297    Unique,
298    PrimaryKey,
299    Default(Expr),
300    Check(Expr),
301    References {
302        table: ObjectName,
303        columns: Vec<String>,
304        on_delete: Option<ReferentialAction>,
305        on_update: Option<ReferentialAction>,
306    },
307    AutoIncrement,
308    Generated {
309        expr: Expr,
310        stored: bool,
311    },
312}
313
314/// Table-level constraints
315#[derive(Debug, Clone, PartialEq)]
316pub enum TableConstraint {
317    PrimaryKey {
318        name: Option<String>,
319        columns: Vec<String>,
320    },
321    Unique {
322        name: Option<String>,
323        columns: Vec<String>,
324    },
325    ForeignKey {
326        name: Option<String>,
327        columns: Vec<String>,
328        ref_table: ObjectName,
329        ref_columns: Vec<String>,
330        on_delete: Option<ReferentialAction>,
331        on_update: Option<ReferentialAction>,
332    },
333    Check {
334        name: Option<String>,
335        expr: Expr,
336    },
337}
338
339#[derive(Debug, Clone, Copy, PartialEq, Eq)]
340pub enum ReferentialAction {
341    NoAction,
342    Restrict,
343    Cascade,
344    SetNull,
345    SetDefault,
346}
347
348/// Table options (ENGINE, CHARSET, etc.)
349#[derive(Debug, Clone, PartialEq)]
350pub struct TableOption {
351    pub name: String,
352    pub value: String,
353}
354
355/// DROP TABLE statement
356#[derive(Debug, Clone, PartialEq)]
357pub struct DropTableStmt {
358    pub span: Span,
359    pub if_exists: bool,
360    pub names: Vec<ObjectName>,
361    pub cascade: bool,
362}
363
364/// ALTER TABLE statement
365#[derive(Debug, Clone, PartialEq)]
366pub struct AlterTableStmt {
367    pub span: Span,
368    pub name: ObjectName,
369    pub operations: Vec<AlterTableOp>,
370}
371
372#[derive(Debug, Clone, PartialEq)]
373pub enum AlterTableOp {
374    AddColumn(ColumnDef),
375    DropColumn {
376        name: String,
377        cascade: bool,
378    },
379    AlterColumn {
380        name: String,
381        operation: AlterColumnOp,
382    },
383    AddConstraint(TableConstraint),
384    DropConstraint {
385        name: String,
386        cascade: bool,
387    },
388    RenameTable(ObjectName),
389    RenameColumn {
390        old_name: String,
391        new_name: String,
392    },
393}
394
395#[derive(Debug, Clone, PartialEq)]
396pub enum AlterColumnOp {
397    SetType(DataType),
398    SetNotNull,
399    DropNotNull,
400    SetDefault(Expr),
401    DropDefault,
402}
403
404/// CREATE INDEX statement
405#[derive(Debug, Clone, PartialEq)]
406pub struct CreateIndexStmt {
407    pub span: Span,
408    pub unique: bool,
409    pub if_not_exists: bool,
410    pub name: String,
411    pub table: ObjectName,
412    pub columns: Vec<IndexColumn>,
413    pub where_clause: Option<Expr>,
414    pub index_type: Option<IndexType>,
415}
416
417#[derive(Debug, Clone, PartialEq)]
418pub struct IndexColumn {
419    pub name: String,
420    pub asc: bool,
421    pub nulls_first: Option<bool>,
422}
423
424#[derive(Debug, Clone, Copy, PartialEq, Eq)]
425pub enum IndexType {
426    BTree,
427    Hash,
428    Gin,
429    Gist,
430    // SochDB extensions
431    Hnsw,   // For vector search
432    Vamana, // For vector search
433}
434
435/// DROP INDEX statement
436#[derive(Debug, Clone, PartialEq)]
437pub struct DropIndexStmt {
438    pub span: Span,
439    pub if_exists: bool,
440    pub name: String,
441    pub table: Option<ObjectName>,
442    pub cascade: bool,
443}
444
445/// BEGIN statement
446#[derive(Debug, Clone, PartialEq)]
447pub struct BeginStmt {
448    pub read_only: bool,
449    pub isolation_level: Option<IsolationLevel>,
450}
451
452#[derive(Debug, Clone, Copy, PartialEq, Eq)]
453pub enum IsolationLevel {
454    ReadUncommitted,
455    ReadCommitted,
456    RepeatableRead,
457    Serializable,
458    Snapshot,
459}
460
461/// Object name (potentially qualified: schema.table)
462#[derive(Debug, Clone, PartialEq, Eq, Hash)]
463pub struct ObjectName {
464    pub parts: Vec<String>,
465}
466
467impl ObjectName {
468    pub fn new(name: impl Into<String>) -> Self {
469        Self {
470            parts: vec![name.into()],
471        }
472    }
473
474    pub fn qualified(schema: impl Into<String>, name: impl Into<String>) -> Self {
475        Self {
476            parts: vec![schema.into(), name.into()],
477        }
478    }
479
480    pub fn name(&self) -> &str {
481        self.parts.last().map(|s| s.as_str()).unwrap_or("")
482    }
483
484    pub fn schema(&self) -> Option<&str> {
485        if self.parts.len() > 1 {
486            Some(&self.parts[self.parts.len() - 2])
487        } else {
488            None
489        }
490    }
491}
492
493impl std::fmt::Display for ObjectName {
494    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
495        write!(f, "{}", self.parts.join("."))
496    }
497}
498
499/// Expression
500#[derive(Debug, Clone, PartialEq)]
501pub enum Expr {
502    /// Literal value
503    Literal(Literal),
504
505    /// Column reference: [table.]column
506    Column(ColumnRef),
507
508    /// Binary operation: expr op expr
509    BinaryOp {
510        left: Box<Expr>,
511        op: BinaryOperator,
512        right: Box<Expr>,
513    },
514
515    /// Unary operation: op expr
516    UnaryOp { op: UnaryOperator, expr: Box<Expr> },
517
518    /// Function call: func(args)
519    Function(FunctionCall),
520
521    /// CASE expression
522    Case {
523        operand: Option<Box<Expr>>,
524        conditions: Vec<(Expr, Expr)>, // (WHEN, THEN)
525        else_result: Option<Box<Expr>>,
526    },
527
528    /// Subquery: (SELECT ...)
529    Subquery(Box<SelectStmt>),
530
531    /// EXISTS (SELECT ...)
532    Exists(Box<SelectStmt>),
533
534    /// expr IN (values)
535    InList {
536        expr: Box<Expr>,
537        list: Vec<Expr>,
538        negated: bool,
539    },
540
541    /// expr IN (SELECT ...)
542    InSubquery {
543        expr: Box<Expr>,
544        subquery: Box<SelectStmt>,
545        negated: bool,
546    },
547
548    /// expr BETWEEN low AND high
549    Between {
550        expr: Box<Expr>,
551        low: Box<Expr>,
552        high: Box<Expr>,
553        negated: bool,
554    },
555
556    /// expr LIKE pattern [ESCAPE escape]
557    Like {
558        expr: Box<Expr>,
559        pattern: Box<Expr>,
560        escape: Option<Box<Expr>>,
561        negated: bool,
562    },
563
564    /// expr IS [NOT] NULL
565    IsNull { expr: Box<Expr>, negated: bool },
566
567    /// CAST(expr AS type)
568    Cast {
569        expr: Box<Expr>,
570        data_type: DataType,
571    },
572
573    /// Placeholder: $1, $2, ?
574    Placeholder(u32),
575
576    /// Array: [a, b, c] or ARRAY[a, b, c]
577    Array(Vec<Expr>),
578
579    /// Tuple/Row: (a, b, c)
580    Tuple(Vec<Expr>),
581
582    /// Array subscript: arr[index]
583    Subscript { expr: Box<Expr>, index: Box<Expr> },
584
585    // ========== SochDB Extensions ==========
586    /// Vector literal: [1.0, 2.0, 3.0]::VECTOR
587    Vector(Vec<f32>),
588
589    /// Vector search: VECTOR_SEARCH(column, query_vector, k, metric)
590    VectorSearch {
591        column: Box<Expr>,
592        query: Box<Expr>,
593        k: u32,
594        metric: VectorMetric,
595    },
596
597    /// JSON path: json_col -> 'path'
598    JsonAccess {
599        expr: Box<Expr>,
600        path: Box<Expr>,
601        return_text: bool, // -> vs ->>
602    },
603
604    /// Context window for LLM: CONTEXT_WINDOW(tokens, priority_expr)
605    ContextWindow {
606        source: Box<Expr>,
607        max_tokens: u32,
608        priority: Option<Box<Expr>>,
609    },
610}
611
612/// Literal values
613#[derive(Debug, Clone, PartialEq)]
614pub enum Literal {
615    Null,
616    Boolean(bool),
617    Integer(i64),
618    Float(f64),
619    String(String),
620    Blob(Vec<u8>),
621}
622
623/// Column reference
624#[derive(Debug, Clone, PartialEq)]
625pub struct ColumnRef {
626    pub table: Option<String>,
627    pub column: String,
628}
629
630impl ColumnRef {
631    pub fn new(column: impl Into<String>) -> Self {
632        Self {
633            table: None,
634            column: column.into(),
635        }
636    }
637
638    pub fn qualified(table: impl Into<String>, column: impl Into<String>) -> Self {
639        Self {
640            table: Some(table.into()),
641            column: column.into(),
642        }
643    }
644}
645
646/// Binary operators
647#[derive(Debug, Clone, Copy, PartialEq, Eq)]
648pub enum BinaryOperator {
649    // Arithmetic
650    Plus,
651    Minus,
652    Multiply,
653    Divide,
654    Modulo,
655
656    // Comparison
657    Eq,
658    Ne,
659    Lt,
660    Le,
661    Gt,
662    Ge,
663
664    // Logical
665    And,
666    Or,
667
668    // String
669    Concat,
670    Like,
671
672    // Bitwise
673    BitAnd,
674    BitOr,
675    BitXor,
676    LeftShift,
677    RightShift,
678}
679
680/// Unary operators
681#[derive(Debug, Clone, Copy, PartialEq, Eq)]
682pub enum UnaryOperator {
683    Plus,
684    Minus,
685    Not,
686    BitNot,
687}
688
689/// Function call
690#[derive(Debug, Clone, PartialEq)]
691pub struct FunctionCall {
692    pub name: ObjectName,
693    pub args: Vec<Expr>,
694    pub distinct: bool,
695    pub filter: Option<Box<Expr>>,
696    pub over: Option<WindowSpec>,
697}
698
699/// Window specification for window functions
700#[derive(Debug, Clone, PartialEq)]
701pub struct WindowSpec {
702    pub partition_by: Vec<Expr>,
703    pub order_by: Vec<OrderByItem>,
704    pub frame: Option<WindowFrame>,
705}
706
707#[derive(Debug, Clone, PartialEq)]
708pub struct WindowFrame {
709    pub kind: WindowFrameKind,
710    pub start: WindowFrameBound,
711    pub end: Option<WindowFrameBound>,
712}
713
714#[derive(Debug, Clone, Copy, PartialEq, Eq)]
715pub enum WindowFrameKind {
716    Rows,
717    Range,
718    Groups,
719}
720
721#[derive(Debug, Clone, PartialEq)]
722pub enum WindowFrameBound {
723    CurrentRow,
724    Preceding(Option<Box<Expr>>), // None = UNBOUNDED
725    Following(Option<Box<Expr>>), // None = UNBOUNDED
726}
727
728/// Vector distance metrics (SochDB extension)
729#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
730pub enum VectorMetric {
731    #[default]
732    Cosine,
733    Euclidean,
734    DotProduct,
735    Manhattan,
736}