Skip to main content

sochdb_query/sql/
ast.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2// SochDB - LLM-Optimized Embedded Database
3// Copyright (C) 2026 Sushanth Reddy Vanagala (https://github.com/sushanthpy)
4//
5// This program is free software: you can redistribute it and/or modify
6// it under the terms of the GNU Affero General Public License as published by
7// the Free Software Foundation, either version 3 of the License, or
8// (at your option) any later version.
9//
10// This program is distributed in the hope that it will be useful,
11// but WITHOUT ANY WARRANTY; without even the implied warranty of
12// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13// GNU Affero General Public License for more details.
14//
15// You should have received a copy of the GNU Affero General Public License
16// along with this program. If not, see <https://www.gnu.org/licenses/>.
17
18//! SQL Abstract Syntax Tree
19//!
20//! Represents parsed SQL statements as a tree structure.
21
22use super::token::Span;
23
24/// Top-level SQL statement
25#[derive(Debug, Clone, PartialEq)]
26#[allow(clippy::large_enum_variant)]
27pub enum Statement {
28    Select(SelectStmt),
29    Insert(InsertStmt),
30    Update(UpdateStmt),
31    Delete(DeleteStmt),
32    CreateTable(CreateTableStmt),
33    DropTable(DropTableStmt),
34    AlterTable(AlterTableStmt),
35    CreateIndex(CreateIndexStmt),
36    DropIndex(DropIndexStmt),
37    Begin(BeginStmt),
38    Commit,
39    Rollback(Option<String>), // Optional savepoint name
40    Savepoint(String),
41    Release(String),
42    Explain(Box<Statement>),
43}
44
45/// SELECT statement
46#[derive(Debug, Clone, PartialEq)]
47pub struct SelectStmt {
48    pub span: Span,
49    pub distinct: bool,
50    pub columns: Vec<SelectItem>,
51    pub from: Option<FromClause>,
52    pub where_clause: Option<Expr>,
53    pub group_by: Vec<Expr>,
54    pub having: Option<Expr>,
55    pub order_by: Vec<OrderByItem>,
56    pub limit: Option<Expr>,
57    pub offset: Option<Expr>,
58    pub unions: Vec<(SetOp, Box<SelectStmt>)>,
59}
60
61/// Items in SELECT clause
62#[derive(Debug, Clone, PartialEq)]
63pub enum SelectItem {
64    /// SELECT *
65    Wildcard,
66    /// SELECT table.*
67    QualifiedWildcard(String),
68    /// SELECT expr [AS alias]
69    Expr { expr: Expr, alias: Option<String> },
70}
71
72/// Set operations (UNION, INTERSECT, EXCEPT)
73#[derive(Debug, Clone, Copy, PartialEq, Eq)]
74pub enum SetOp {
75    Union,
76    UnionAll,
77    Intersect,
78    IntersectAll,
79    Except,
80    ExceptAll,
81}
82
83/// FROM clause
84#[derive(Debug, Clone, PartialEq)]
85pub struct FromClause {
86    pub tables: Vec<TableRef>,
87}
88
89/// Table reference in FROM clause
90#[derive(Debug, Clone, PartialEq)]
91pub enum TableRef {
92    /// Simple table: table_name [AS alias]
93    Table {
94        name: ObjectName,
95        alias: Option<String>,
96    },
97    /// Subquery: (SELECT ...) AS alias
98    Subquery {
99        query: Box<SelectStmt>,
100        alias: String,
101    },
102    /// Join: left JOIN right ON condition
103    Join {
104        left: Box<TableRef>,
105        join_type: JoinType,
106        right: Box<TableRef>,
107        condition: Option<JoinCondition>,
108    },
109    /// Table-valued function: func(...) AS alias
110    Function {
111        name: String,
112        args: Vec<Expr>,
113        alias: Option<String>,
114    },
115}
116
117/// Join types
118#[derive(Debug, Clone, Copy, PartialEq, Eq)]
119pub enum JoinType {
120    Inner,
121    Left,
122    Right,
123    Full,
124    Cross,
125}
126
127/// Join condition
128#[derive(Debug, Clone, PartialEq)]
129pub enum JoinCondition {
130    On(Expr),
131    Using(Vec<String>),
132    Natural,
133}
134
135/// ORDER BY item
136#[derive(Debug, Clone, PartialEq)]
137pub struct OrderByItem {
138    pub expr: Expr,
139    pub asc: bool,
140    pub nulls_first: Option<bool>,
141}
142
143/// INSERT statement
144#[derive(Debug, Clone, PartialEq)]
145pub struct InsertStmt {
146    pub span: Span,
147    pub table: ObjectName,
148    pub columns: Option<Vec<String>>,
149    pub source: InsertSource,
150    pub on_conflict: Option<OnConflict>,
151    pub returning: Option<Vec<SelectItem>>,
152}
153
154/// Source of INSERT data
155#[derive(Debug, Clone, PartialEq)]
156pub enum InsertSource {
157    /// VALUES (a, b), (c, d), ...
158    Values(Vec<Vec<Expr>>),
159    /// SELECT ...
160    Query(Box<SelectStmt>),
161    /// DEFAULT VALUES
162    Default,
163}
164
165/// ON CONFLICT clause
166///
167/// Represents conflict handling for INSERT statements across SQL dialects:
168/// - PostgreSQL: `ON CONFLICT DO NOTHING/UPDATE`
169/// - MySQL: `INSERT IGNORE`, `ON DUPLICATE KEY UPDATE`
170/// - SQLite: `INSERT OR IGNORE/REPLACE/ABORT`
171///
172/// All dialects normalize to this single representation.
173#[derive(Debug, Clone, PartialEq)]
174pub struct OnConflict {
175    pub target: Option<ConflictTarget>,
176    pub action: ConflictAction,
177}
178
179#[derive(Debug, Clone, PartialEq)]
180pub enum ConflictTarget {
181    Columns(Vec<String>),
182    Constraint(String),
183}
184
185#[derive(Debug, Clone, PartialEq)]
186pub enum ConflictAction {
187    /// ON CONFLICT DO NOTHING / INSERT IGNORE / INSERT OR IGNORE
188    DoNothing,
189    /// ON CONFLICT DO UPDATE SET ... / ON DUPLICATE KEY UPDATE ...
190    DoUpdate(Vec<Assignment>),
191    /// INSERT OR REPLACE (SQLite) - replaces the entire row
192    DoReplace,
193    /// INSERT OR ABORT (SQLite) - abort on conflict (default behavior)
194    DoAbort,
195    /// INSERT OR FAIL (SQLite) - fail but continue with other rows
196    DoFail,
197}
198
199/// UPDATE statement
200#[derive(Debug, Clone, PartialEq)]
201pub struct UpdateStmt {
202    pub span: Span,
203    pub table: ObjectName,
204    pub alias: Option<String>,
205    pub assignments: Vec<Assignment>,
206    pub from: Option<FromClause>,
207    pub where_clause: Option<Expr>,
208    pub returning: Option<Vec<SelectItem>>,
209}
210
211/// Assignment: column = expr
212#[derive(Debug, Clone, PartialEq)]
213pub struct Assignment {
214    pub column: String,
215    pub value: Expr,
216}
217
218/// DELETE statement
219#[derive(Debug, Clone, PartialEq)]
220pub struct DeleteStmt {
221    pub span: Span,
222    pub table: ObjectName,
223    pub alias: Option<String>,
224    pub using: Option<FromClause>,
225    pub where_clause: Option<Expr>,
226    pub returning: Option<Vec<SelectItem>>,
227}
228
229/// CREATE TABLE statement
230#[derive(Debug, Clone, PartialEq)]
231pub struct CreateTableStmt {
232    pub span: Span,
233    pub if_not_exists: bool,
234    pub name: ObjectName,
235    pub columns: Vec<ColumnDef>,
236    pub constraints: Vec<TableConstraint>,
237    pub options: Vec<TableOption>,
238}
239
240/// Column definition
241#[derive(Debug, Clone, PartialEq)]
242pub struct ColumnDef {
243    pub name: String,
244    pub data_type: DataType,
245    pub constraints: Vec<ColumnConstraint>,
246}
247
248/// SQL Data types
249#[derive(Debug, Clone, PartialEq)]
250pub enum DataType {
251    // Numeric
252    TinyInt,
253    SmallInt,
254    Int,
255    BigInt,
256    Float,
257    Double,
258    Decimal {
259        precision: Option<u32>,
260        scale: Option<u32>,
261    },
262
263    // String
264    Char(Option<u32>),
265    Varchar(Option<u32>),
266    Text,
267
268    // Binary
269    Binary(Option<u32>),
270    Varbinary(Option<u32>),
271    Blob,
272
273    // Date/Time
274    Date,
275    Time,
276    Timestamp,
277    DateTime,
278    Interval,
279
280    // Boolean
281    Boolean,
282
283    // JSON
284    Json,
285    Jsonb,
286
287    // SochDB Extensions
288    Vector(u32),    // VECTOR(dimensions)
289    Embedding(u32), // EMBEDDING(dimensions)
290
291    // Custom/Unknown
292    Custom(String),
293}
294
295/// Column constraints
296#[derive(Debug, Clone, PartialEq)]
297pub enum ColumnConstraint {
298    NotNull,
299    Null,
300    Unique,
301    PrimaryKey,
302    Default(Expr),
303    Check(Expr),
304    References {
305        table: ObjectName,
306        columns: Vec<String>,
307        on_delete: Option<ReferentialAction>,
308        on_update: Option<ReferentialAction>,
309    },
310    AutoIncrement,
311    Generated {
312        expr: Expr,
313        stored: bool,
314    },
315}
316
317/// Table-level constraints
318#[derive(Debug, Clone, PartialEq)]
319pub enum TableConstraint {
320    PrimaryKey {
321        name: Option<String>,
322        columns: Vec<String>,
323    },
324    Unique {
325        name: Option<String>,
326        columns: Vec<String>,
327    },
328    ForeignKey {
329        name: Option<String>,
330        columns: Vec<String>,
331        ref_table: ObjectName,
332        ref_columns: Vec<String>,
333        on_delete: Option<ReferentialAction>,
334        on_update: Option<ReferentialAction>,
335    },
336    Check {
337        name: Option<String>,
338        expr: Expr,
339    },
340}
341
342#[derive(Debug, Clone, Copy, PartialEq, Eq)]
343pub enum ReferentialAction {
344    NoAction,
345    Restrict,
346    Cascade,
347    SetNull,
348    SetDefault,
349}
350
351/// Table options (ENGINE, CHARSET, etc.)
352#[derive(Debug, Clone, PartialEq)]
353pub struct TableOption {
354    pub name: String,
355    pub value: String,
356}
357
358/// DROP TABLE statement
359#[derive(Debug, Clone, PartialEq)]
360pub struct DropTableStmt {
361    pub span: Span,
362    pub if_exists: bool,
363    pub names: Vec<ObjectName>,
364    pub cascade: bool,
365}
366
367/// ALTER TABLE statement
368#[derive(Debug, Clone, PartialEq)]
369pub struct AlterTableStmt {
370    pub span: Span,
371    pub name: ObjectName,
372    pub operations: Vec<AlterTableOp>,
373}
374
375#[derive(Debug, Clone, PartialEq)]
376pub enum AlterTableOp {
377    AddColumn(ColumnDef),
378    DropColumn {
379        name: String,
380        cascade: bool,
381    },
382    AlterColumn {
383        name: String,
384        operation: AlterColumnOp,
385    },
386    AddConstraint(TableConstraint),
387    DropConstraint {
388        name: String,
389        cascade: bool,
390    },
391    RenameTable(ObjectName),
392    RenameColumn {
393        old_name: String,
394        new_name: String,
395    },
396}
397
398#[derive(Debug, Clone, PartialEq)]
399pub enum AlterColumnOp {
400    SetType(DataType),
401    SetNotNull,
402    DropNotNull,
403    SetDefault(Expr),
404    DropDefault,
405}
406
407/// CREATE INDEX statement
408#[derive(Debug, Clone, PartialEq)]
409pub struct CreateIndexStmt {
410    pub span: Span,
411    pub unique: bool,
412    pub if_not_exists: bool,
413    pub name: String,
414    pub table: ObjectName,
415    pub columns: Vec<IndexColumn>,
416    pub where_clause: Option<Expr>,
417    pub index_type: Option<IndexType>,
418}
419
420#[derive(Debug, Clone, PartialEq)]
421pub struct IndexColumn {
422    pub name: String,
423    pub asc: bool,
424    pub nulls_first: Option<bool>,
425}
426
427#[derive(Debug, Clone, Copy, PartialEq, Eq)]
428pub enum IndexType {
429    BTree,
430    Hash,
431    Gin,
432    Gist,
433    // SochDB extensions
434    Hnsw,   // For vector search
435    Vamana, // For vector search
436}
437
438/// DROP INDEX statement
439#[derive(Debug, Clone, PartialEq)]
440pub struct DropIndexStmt {
441    pub span: Span,
442    pub if_exists: bool,
443    pub name: String,
444    pub table: Option<ObjectName>,
445    pub cascade: bool,
446}
447
448/// BEGIN statement
449#[derive(Debug, Clone, PartialEq)]
450pub struct BeginStmt {
451    pub read_only: bool,
452    pub isolation_level: Option<IsolationLevel>,
453}
454
455#[derive(Debug, Clone, Copy, PartialEq, Eq)]
456pub enum IsolationLevel {
457    ReadUncommitted,
458    ReadCommitted,
459    RepeatableRead,
460    Serializable,
461    Snapshot,
462}
463
464/// Object name (potentially qualified: schema.table)
465#[derive(Debug, Clone, PartialEq, Eq, Hash)]
466pub struct ObjectName {
467    pub parts: Vec<String>,
468}
469
470impl ObjectName {
471    pub fn new(name: impl Into<String>) -> Self {
472        Self {
473            parts: vec![name.into()],
474        }
475    }
476
477    pub fn qualified(schema: impl Into<String>, name: impl Into<String>) -> Self {
478        Self {
479            parts: vec![schema.into(), name.into()],
480        }
481    }
482
483    pub fn name(&self) -> &str {
484        self.parts.last().map(|s| s.as_str()).unwrap_or("")
485    }
486
487    pub fn schema(&self) -> Option<&str> {
488        if self.parts.len() > 1 {
489            Some(&self.parts[self.parts.len() - 2])
490        } else {
491            None
492        }
493    }
494}
495
496impl std::fmt::Display for ObjectName {
497    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
498        write!(f, "{}", self.parts.join("."))
499    }
500}
501
502/// Expression
503#[derive(Debug, Clone, PartialEq)]
504pub enum Expr {
505    /// Literal value
506    Literal(Literal),
507
508    /// Column reference: [table.]column
509    Column(ColumnRef),
510
511    /// Binary operation: expr op expr
512    BinaryOp {
513        left: Box<Expr>,
514        op: BinaryOperator,
515        right: Box<Expr>,
516    },
517
518    /// Unary operation: op expr
519    UnaryOp { op: UnaryOperator, expr: Box<Expr> },
520
521    /// Function call: func(args)
522    Function(FunctionCall),
523
524    /// CASE expression
525    Case {
526        operand: Option<Box<Expr>>,
527        conditions: Vec<(Expr, Expr)>, // (WHEN, THEN)
528        else_result: Option<Box<Expr>>,
529    },
530
531    /// Subquery: (SELECT ...)
532    Subquery(Box<SelectStmt>),
533
534    /// EXISTS (SELECT ...)
535    Exists(Box<SelectStmt>),
536
537    /// expr IN (values)
538    InList {
539        expr: Box<Expr>,
540        list: Vec<Expr>,
541        negated: bool,
542    },
543
544    /// expr IN (SELECT ...)
545    InSubquery {
546        expr: Box<Expr>,
547        subquery: Box<SelectStmt>,
548        negated: bool,
549    },
550
551    /// expr BETWEEN low AND high
552    Between {
553        expr: Box<Expr>,
554        low: Box<Expr>,
555        high: Box<Expr>,
556        negated: bool,
557    },
558
559    /// expr LIKE pattern [ESCAPE escape]
560    Like {
561        expr: Box<Expr>,
562        pattern: Box<Expr>,
563        escape: Option<Box<Expr>>,
564        negated: bool,
565    },
566
567    /// expr IS [NOT] NULL
568    IsNull { expr: Box<Expr>, negated: bool },
569
570    /// CAST(expr AS type)
571    Cast {
572        expr: Box<Expr>,
573        data_type: DataType,
574    },
575
576    /// Placeholder: $1, $2, ?
577    Placeholder(u32),
578
579    /// Array: [a, b, c] or ARRAY[a, b, c]
580    Array(Vec<Expr>),
581
582    /// Tuple/Row: (a, b, c)
583    Tuple(Vec<Expr>),
584
585    /// Array subscript: arr[index]
586    Subscript { expr: Box<Expr>, index: Box<Expr> },
587
588    // ========== SochDB Extensions ==========
589    /// Vector literal: [1.0, 2.0, 3.0]::VECTOR
590    Vector(Vec<f32>),
591
592    /// Vector search: VECTOR_SEARCH(column, query_vector, k, metric)
593    VectorSearch {
594        column: Box<Expr>,
595        query: Box<Expr>,
596        k: u32,
597        metric: VectorMetric,
598    },
599
600    /// JSON path: json_col -> 'path'
601    JsonAccess {
602        expr: Box<Expr>,
603        path: Box<Expr>,
604        return_text: bool, // -> vs ->>
605    },
606
607    /// Context window for LLM: CONTEXT_WINDOW(tokens, priority_expr)
608    ContextWindow {
609        source: Box<Expr>,
610        max_tokens: u32,
611        priority: Option<Box<Expr>>,
612    },
613}
614
615/// Literal values
616#[derive(Debug, Clone, PartialEq)]
617pub enum Literal {
618    Null,
619    Boolean(bool),
620    Integer(i64),
621    Float(f64),
622    String(String),
623    Blob(Vec<u8>),
624}
625
626/// Column reference
627#[derive(Debug, Clone, PartialEq)]
628pub struct ColumnRef {
629    pub table: Option<String>,
630    pub column: String,
631}
632
633impl ColumnRef {
634    pub fn new(column: impl Into<String>) -> Self {
635        Self {
636            table: None,
637            column: column.into(),
638        }
639    }
640
641    pub fn qualified(table: impl Into<String>, column: impl Into<String>) -> Self {
642        Self {
643            table: Some(table.into()),
644            column: column.into(),
645        }
646    }
647}
648
649/// Binary operators
650#[derive(Debug, Clone, Copy, PartialEq, Eq)]
651pub enum BinaryOperator {
652    // Arithmetic
653    Plus,
654    Minus,
655    Multiply,
656    Divide,
657    Modulo,
658
659    // Comparison
660    Eq,
661    Ne,
662    Lt,
663    Le,
664    Gt,
665    Ge,
666
667    // Logical
668    And,
669    Or,
670
671    // String
672    Concat,
673    Like,
674
675    // Bitwise
676    BitAnd,
677    BitOr,
678    BitXor,
679    LeftShift,
680    RightShift,
681}
682
683/// Unary operators
684#[derive(Debug, Clone, Copy, PartialEq, Eq)]
685pub enum UnaryOperator {
686    Plus,
687    Minus,
688    Not,
689    BitNot,
690}
691
692/// Function call
693#[derive(Debug, Clone, PartialEq)]
694pub struct FunctionCall {
695    pub name: ObjectName,
696    pub args: Vec<Expr>,
697    pub distinct: bool,
698    pub filter: Option<Box<Expr>>,
699    pub over: Option<WindowSpec>,
700}
701
702/// Window specification for window functions
703#[derive(Debug, Clone, PartialEq)]
704pub struct WindowSpec {
705    pub partition_by: Vec<Expr>,
706    pub order_by: Vec<OrderByItem>,
707    pub frame: Option<WindowFrame>,
708}
709
710#[derive(Debug, Clone, PartialEq)]
711pub struct WindowFrame {
712    pub kind: WindowFrameKind,
713    pub start: WindowFrameBound,
714    pub end: Option<WindowFrameBound>,
715}
716
717#[derive(Debug, Clone, Copy, PartialEq, Eq)]
718pub enum WindowFrameKind {
719    Rows,
720    Range,
721    Groups,
722}
723
724#[derive(Debug, Clone, PartialEq)]
725pub enum WindowFrameBound {
726    CurrentRow,
727    Preceding(Option<Box<Expr>>), // None = UNBOUNDED
728    Following(Option<Box<Expr>>), // None = UNBOUNDED
729}
730
731/// Vector distance metrics (SochDB extension)
732#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
733pub enum VectorMetric {
734    #[default]
735    Cosine,
736    Euclidean,
737    DotProduct,
738    Manhattan,
739}