nodedb-sql 0.0.5

SQL parser, planner, and optimizer for NodeDB
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
//! SqlPlan intermediate representation types.
//!
//! These types represent the output of the nodedb-sql planner. Both Origin
//! (server) and Lite (embedded) map these to their own execution model.

/// The top-level plan produced by the SQL planner.
#[derive(Debug, Clone)]
pub enum SqlPlan {
    // ── Constant ──
    /// Query with no FROM clause: SELECT 1, SELECT 'hello' AS name, etc.
    /// Produces a single row with evaluated constant expressions.
    ConstantResult {
        columns: Vec<String>,
        values: Vec<SqlValue>,
    },

    // ── Reads ──
    Scan {
        collection: String,
        alias: Option<String>,
        engine: EngineType,
        filters: Vec<Filter>,
        projection: Vec<Projection>,
        sort_keys: Vec<SortKey>,
        limit: Option<usize>,
        offset: usize,
        distinct: bool,
        window_functions: Vec<WindowSpec>,
    },
    PointGet {
        collection: String,
        alias: Option<String>,
        engine: EngineType,
        key_column: String,
        key_value: SqlValue,
    },
    /// Document fetch via a secondary index: equality predicate on an
    /// indexed field. The executor performs an index lookup to resolve
    /// matching document IDs, reads each document, and applies any
    /// remaining filters, projection, sort, and limit.
    ///
    /// Emitted by `document_schemaless::plan_scan` /
    /// `document_strict::plan_scan` when the WHERE clause contains a
    /// single equality predicate on a `Ready` indexed field. Any
    /// additional predicates fall through as post-filters.
    DocumentIndexLookup {
        collection: String,
        alias: Option<String>,
        engine: EngineType,
        /// Indexed field path used for the lookup.
        field: String,
        /// Equality value from the WHERE clause.
        value: SqlValue,
        /// Remaining filters after extracting the equality used for lookup.
        filters: Vec<Filter>,
        projection: Vec<Projection>,
        sort_keys: Vec<SortKey>,
        limit: Option<usize>,
        offset: usize,
        distinct: bool,
        window_functions: Vec<WindowSpec>,
        /// Whether the chosen index is COLLATE NOCASE — the executor
        /// lowercases the lookup value before probing.
        case_insensitive: bool,
    },
    RangeScan {
        collection: String,
        field: String,
        lower: Option<SqlValue>,
        upper: Option<SqlValue>,
        limit: usize,
    },

    // ── Writes ──
    Insert {
        collection: String,
        engine: EngineType,
        rows: Vec<Vec<(String, SqlValue)>>,
        /// Column defaults from schema: `(column_name, default_expr)`.
        /// Used to auto-generate values for missing columns (e.g. `id` with `UUID_V7`).
        column_defaults: Vec<(String, String)>,
    },
    /// KV INSERT: key and value are fundamentally separate.
    /// Each entry is `(key, value_columns)`.
    KvInsert {
        collection: String,
        entries: Vec<(SqlValue, Vec<(String, SqlValue)>)>,
        /// TTL in seconds (0 = no expiry). Extracted from `ttl` column if present.
        ttl_secs: u64,
    },
    /// UPSERT: insert or merge if document exists.
    Upsert {
        collection: String,
        engine: EngineType,
        rows: Vec<Vec<(String, SqlValue)>>,
        column_defaults: Vec<(String, String)>,
        /// `ON CONFLICT (...) DO UPDATE SET field = expr` assignments.
        /// When empty, upsert is a plain merge: new columns overwrite existing.
        /// When non-empty, the engine applies these per-row against the
        /// *existing* document instead of merging the inserted values.
        on_conflict_updates: Vec<(String, SqlExpr)>,
    },
    InsertSelect {
        target: String,
        source: Box<SqlPlan>,
        limit: usize,
    },
    Update {
        collection: String,
        engine: EngineType,
        assignments: Vec<(String, SqlExpr)>,
        filters: Vec<Filter>,
        target_keys: Vec<SqlValue>,
        returning: bool,
    },
    Delete {
        collection: String,
        engine: EngineType,
        filters: Vec<Filter>,
        target_keys: Vec<SqlValue>,
    },
    Truncate {
        collection: String,
        restart_identity: bool,
    },

    // ── Joins ──
    Join {
        left: Box<SqlPlan>,
        right: Box<SqlPlan>,
        on: Vec<(String, String)>,
        join_type: JoinType,
        condition: Option<SqlExpr>,
        limit: usize,
        /// Post-join projection: column names to keep (empty = all columns).
        projection: Vec<Projection>,
        /// Post-join filters (from WHERE clause).
        filters: Vec<Filter>,
    },

    // ── Aggregation ──
    Aggregate {
        input: Box<SqlPlan>,
        group_by: Vec<SqlExpr>,
        aggregates: Vec<AggregateExpr>,
        having: Vec<Filter>,
        limit: usize,
    },

    // ── Timeseries ──
    TimeseriesScan {
        collection: String,
        time_range: (i64, i64),
        bucket_interval_ms: i64,
        group_by: Vec<String>,
        aggregates: Vec<AggregateExpr>,
        filters: Vec<Filter>,
        projection: Vec<Projection>,
        gap_fill: String,
        limit: usize,
        tiered: bool,
    },
    TimeseriesIngest {
        collection: String,
        rows: Vec<Vec<(String, SqlValue)>>,
    },

    // ── Search (first-class) ──
    VectorSearch {
        collection: String,
        field: String,
        query_vector: Vec<f32>,
        top_k: usize,
        ef_search: usize,
        filters: Vec<Filter>,
    },
    MultiVectorSearch {
        collection: String,
        query_vector: Vec<f32>,
        top_k: usize,
        ef_search: usize,
    },
    TextSearch {
        collection: String,
        query: String,
        top_k: usize,
        fuzzy: bool,
        filters: Vec<Filter>,
    },
    HybridSearch {
        collection: String,
        query_vector: Vec<f32>,
        query_text: String,
        top_k: usize,
        ef_search: usize,
        vector_weight: f32,
        fuzzy: bool,
    },
    SpatialScan {
        collection: String,
        field: String,
        predicate: SpatialPredicate,
        query_geometry: Vec<u8>,
        distance_meters: f64,
        attribute_filters: Vec<Filter>,
        limit: usize,
        projection: Vec<Projection>,
    },

    // ── Composite ──
    Union {
        inputs: Vec<SqlPlan>,
        distinct: bool,
    },
    Intersect {
        left: Box<SqlPlan>,
        right: Box<SqlPlan>,
        all: bool,
    },
    Except {
        left: Box<SqlPlan>,
        right: Box<SqlPlan>,
        all: bool,
    },
    RecursiveScan {
        collection: String,
        base_filters: Vec<Filter>,
        recursive_filters: Vec<Filter>,
        /// Equi-join link for tree-traversal recursion:
        /// `(collection_field, working_table_field)`.
        /// e.g. `("parent_id", "id")` means each iteration finds rows
        /// where `collection.parent_id` matches a `working_table.id`.
        join_link: Option<(String, String)>,
        max_iterations: usize,
        distinct: bool,
        limit: usize,
    },

    /// Non-recursive CTE: execute each definition, then the outer query.
    Cte {
        /// CTE definitions: `(name, subquery_plan)`.
        definitions: Vec<(String, SqlPlan)>,
        /// The outer query that references CTE names.
        outer: Box<SqlPlan>,
    },
}

/// Database engine type for a collection.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum EngineType {
    DocumentSchemaless,
    DocumentStrict,
    KeyValue,
    Columnar,
    Timeseries,
    Spatial,
}

/// SQL join type.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum JoinType {
    Inner,
    Left,
    Right,
    Full,
    Semi,
    Anti,
    Cross,
}

impl JoinType {
    pub fn as_str(&self) -> &'static str {
        match self {
            Self::Inner => "inner",
            Self::Left => "left",
            Self::Right => "right",
            Self::Full => "full",
            Self::Semi => "semi",
            Self::Anti => "anti",
            Self::Cross => "cross",
        }
    }
}

/// Spatial predicate types.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum SpatialPredicate {
    DWithin,
    Contains,
    Intersects,
    Within,
}

/// A filter predicate.
#[derive(Debug, Clone)]
pub struct Filter {
    pub expr: FilterExpr,
}

/// Filter expression tree.
#[derive(Debug, Clone)]
pub enum FilterExpr {
    Comparison {
        field: String,
        op: CompareOp,
        value: SqlValue,
    },
    Like {
        field: String,
        pattern: String,
    },
    InList {
        field: String,
        values: Vec<SqlValue>,
    },
    Between {
        field: String,
        low: SqlValue,
        high: SqlValue,
    },
    IsNull {
        field: String,
    },
    IsNotNull {
        field: String,
    },
    And(Vec<Filter>),
    Or(Vec<Filter>),
    Not(Box<Filter>),
    /// Raw expression filter (for complex predicates that don't fit simple patterns).
    Expr(SqlExpr),
}

/// Comparison operators.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CompareOp {
    Eq,
    Ne,
    Gt,
    Ge,
    Lt,
    Le,
}

/// Projection item in SELECT.
#[derive(Debug, Clone)]
pub enum Projection {
    /// Simple column reference: `SELECT name`
    Column(String),
    /// All columns: `SELECT *`
    Star,
    /// Qualified star: `SELECT t.*`
    QualifiedStar(String),
    /// Computed expression: `SELECT price * qty AS total`
    Computed { expr: SqlExpr, alias: String },
}

/// Sort key for ORDER BY.
#[derive(Debug, Clone)]
pub struct SortKey {
    pub expr: SqlExpr,
    pub ascending: bool,
    pub nulls_first: bool,
}

/// Aggregate expression: `COUNT(*)`, `SUM(amount)`, etc.
#[derive(Debug, Clone)]
pub struct AggregateExpr {
    pub function: String,
    pub args: Vec<SqlExpr>,
    pub alias: String,
    pub distinct: bool,
}

/// Window function specification.
#[derive(Debug, Clone)]
pub struct WindowSpec {
    pub function: String,
    pub args: Vec<SqlExpr>,
    pub partition_by: Vec<SqlExpr>,
    pub order_by: Vec<SortKey>,
    pub alias: String,
}

/// SQL value literal.
#[derive(Debug, Clone, PartialEq)]
pub enum SqlValue {
    Int(i64),
    Float(f64),
    String(String),
    Bool(bool),
    Null,
    Bytes(Vec<u8>),
    Array(Vec<SqlValue>),
}

/// SQL expression tree.
#[derive(Debug, Clone)]
pub enum SqlExpr {
    /// Column reference, optionally qualified: `name` or `users.name`
    Column { table: Option<String>, name: String },
    /// Literal value.
    Literal(SqlValue),
    /// Binary operation: `a + b`, `x > 5`
    BinaryOp {
        left: Box<SqlExpr>,
        op: BinaryOp,
        right: Box<SqlExpr>,
    },
    /// Unary operation: `-x`, `NOT flag`
    UnaryOp { op: UnaryOp, expr: Box<SqlExpr> },
    /// Function call: `COUNT(*)`, `vector_distance(field, ARRAY[...])`
    Function {
        name: String,
        args: Vec<SqlExpr>,
        distinct: bool,
    },
    /// CASE WHEN ... THEN ... ELSE ... END
    Case {
        operand: Option<Box<SqlExpr>>,
        when_then: Vec<(SqlExpr, SqlExpr)>,
        else_expr: Option<Box<SqlExpr>>,
    },
    /// CAST(expr AS type)
    Cast { expr: Box<SqlExpr>, to_type: String },
    /// Subquery expression (IN, EXISTS, scalar)
    Subquery(Box<SqlPlan>),
    /// Wildcard `*`
    Wildcard,
    /// `IS NULL` / `IS NOT NULL`
    IsNull { expr: Box<SqlExpr>, negated: bool },
    /// `expr IN (values)`
    InList {
        expr: Box<SqlExpr>,
        list: Vec<SqlExpr>,
        negated: bool,
    },
    /// `expr BETWEEN low AND high`
    Between {
        expr: Box<SqlExpr>,
        low: Box<SqlExpr>,
        high: Box<SqlExpr>,
        negated: bool,
    },
    /// `expr LIKE pattern`
    Like {
        expr: Box<SqlExpr>,
        pattern: Box<SqlExpr>,
        negated: bool,
    },
    /// Array literal: `ARRAY[1.0, 2.0, 3.0]`
    ArrayLiteral(Vec<SqlExpr>),
}

/// Binary operators.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BinaryOp {
    // Arithmetic
    Add,
    Sub,
    Mul,
    Div,
    Mod,
    // Comparison
    Eq,
    Ne,
    Gt,
    Ge,
    Lt,
    Le,
    // Logical
    And,
    Or,
    // String
    Concat,
}

/// Unary operators.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum UnaryOp {
    Neg,
    Not,
}

/// SQL data type for schema resolution.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum SqlDataType {
    Int64,
    Float64,
    String,
    Bool,
    Bytes,
    Timestamp,
    Decimal,
    Uuid,
    Vector(usize),
    Geometry,
}

// ── Catalog trait ──
// The `SqlCatalog` trait itself and its error type live in
// `crate::catalog` to keep this file under the 500-line limit.
// Re-exported here so downstream modules that `use crate::types::*`
// keep resolving `SqlCatalog` without changing their imports.
pub use crate::catalog::{SqlCatalog, SqlCatalogError};

/// Metadata about a collection for query planning.
#[derive(Debug, Clone)]
pub struct CollectionInfo {
    pub name: String,
    pub engine: EngineType,
    pub columns: Vec<ColumnInfo>,
    pub primary_key: Option<String>,
    pub has_auto_tier: bool,
    /// Secondary indexes available for planner rewrites. Populated by the
    /// catalog adapter from `StoredCollection.indexes`. `Building` entries
    /// are included so the planner can see them but MUST be skipped when
    /// choosing an index lookup — only `Ready` indexes back query rewrites.
    pub indexes: Vec<IndexSpec>,
}

/// Secondary index metadata surfaced to the SQL planner.
#[derive(Debug, Clone)]
pub struct IndexSpec {
    pub name: String,
    /// Canonical field path (`$.email`, `$.user.name`, or plain column name
    /// for strict documents — the catalog layer stores them uniformly).
    pub field: String,
    pub unique: bool,
    pub case_insensitive: bool,
    /// Build state. Only `Ready` indexes drive query rewrites.
    pub state: IndexState,
    /// Partial-index predicate as raw SQL text (`WHERE <expr>` body
    /// without the keyword), or `None` for full indexes. The planner
    /// uses this to reject rewrites whose WHERE clause doesn't entail
    /// the predicate — matching against such a partial index would
    /// omit rows the index didn't cover.
    pub predicate: Option<String>,
}

/// Planner-facing index state. Mirrors the catalog variant but lives here
/// so the SQL crate doesn't depend on `nodedb` internals.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum IndexState {
    Building,
    Ready,
}

/// Metadata about a single column.
#[derive(Debug, Clone)]
pub struct ColumnInfo {
    pub name: String,
    pub data_type: SqlDataType,
    pub nullable: bool,
    pub is_primary_key: bool,
    /// Default value expression (e.g. "UUID_V7", "ULID", "NANOID(10)", "0", "'active'").
    pub default: Option<String>,
}