Skip to main content

sqlrite/sql/db/
table.rs

1use crate::error::{Result, SQLRiteError};
2use crate::sql::db::secondary_index::{IndexOrigin, SecondaryIndex};
3use crate::sql::fts::PostingList;
4use crate::sql::hnsw::HnswIndex;
5use crate::sql::parser::create::{CreateQuery, ParsedColumn};
6use std::collections::{BTreeMap, HashMap};
7use std::fmt;
8use std::sync::{Arc, Mutex};
9
10use prettytable::{Cell as PrintCell, Row as PrintRow, Table as PrintTable};
11
12/// SQLRite data types
13/// Mapped after SQLite Data Type Storage Classes and SQLite Affinity Type
14/// (Datatypes In SQLite Version 3)[https://www.sqlite.org/datatype3.html]
15///
16/// `Vector(dim)` is the Phase 7a addition — a fixed-dimension dense f32
17/// array. The dimension is part of the type so a `VECTOR(384)` column
18/// rejects `[0.1, 0.2, 0.3]` at INSERT time as a clean type error
19/// rather than silently storing the wrong shape.
20#[derive(PartialEq, Debug, Clone)]
21pub enum DataType {
22    Integer,
23    Text,
24    Real,
25    Bool,
26    /// Dense f32 vector of fixed dimension. The `usize` is the column's
27    /// declared dimension; every value stored in the column must have
28    /// exactly that many elements.
29    Vector(usize),
30    /// Phase 7e — JSON column. Stored as canonical UTF-8 text (matches
31    /// SQLite's JSON1 extension), validated at INSERT time. The
32    /// `json_extract` family of functions parses on demand and returns
33    /// either a primitive `Value` (Integer / Real / Text / Bool / Null)
34    /// or a Text value carrying the JSON-encoded sub-object/array.
35    /// Q3 originally specified `bincoded serde_json::Value`, but bincode
36    /// was removed from the engine in Phase 3c — see the scope-correction
37    /// note in `docs/phase-7-plan.md` for the rationale on switching to
38    /// text storage.
39    Json,
40    None,
41    Invalid,
42}
43
44impl DataType {
45    /// Constructs a `DataType` from the wire string the parser produces.
46    /// Pre-Phase-7 the strings were one-of `"integer" | "text" | "real" |
47    /// "bool" | "none"`. Phase 7a adds `"vector(N)"` (case-insensitive,
48    /// N a positive integer) for the new vector column type — encoded
49    /// in-band so we don't have to plumb a richer type through the
50    /// existing string-based ParsedColumn pipeline.
51    pub fn new(cmd: String) -> DataType {
52        let lower = cmd.to_lowercase();
53        match lower.as_str() {
54            "integer" => DataType::Integer,
55            "text" => DataType::Text,
56            "real" => DataType::Real,
57            "bool" => DataType::Bool,
58            "json" => DataType::Json,
59            "none" => DataType::None,
60            other if other.starts_with("vector(") && other.ends_with(')') => {
61                // Strip the `vector(` prefix and trailing `)`, parse what's
62                // left as a positive integer dimension. Anything else is
63                // Invalid — surfaces a clean error at CREATE TABLE time.
64                let inside = &other["vector(".len()..other.len() - 1];
65                match inside.trim().parse::<usize>() {
66                    Ok(dim) if dim > 0 => DataType::Vector(dim),
67                    _ => {
68                        eprintln!("Invalid VECTOR dimension in {cmd}");
69                        DataType::Invalid
70                    }
71                }
72            }
73            _ => {
74                eprintln!("Invalid data type given {}", cmd);
75                DataType::Invalid
76            }
77        }
78    }
79
80    /// Inverse of `new` — returns the canonical lowercased wire string
81    /// for this DataType. Used by the parser to round-trip
82    /// `VECTOR(N)` → `DataType::Vector(N)` → `"vector(N)"` into
83    /// `ParsedColumn::datatype` so the rest of the pipeline keeps
84    /// working with strings.
85    pub fn to_wire_string(&self) -> String {
86        match self {
87            DataType::Integer => "Integer".to_string(),
88            DataType::Text => "Text".to_string(),
89            DataType::Real => "Real".to_string(),
90            DataType::Bool => "Bool".to_string(),
91            DataType::Vector(dim) => format!("vector({dim})"),
92            DataType::Json => "Json".to_string(),
93            DataType::None => "None".to_string(),
94            DataType::Invalid => "Invalid".to_string(),
95        }
96    }
97}
98
99impl fmt::Display for DataType {
100    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
101        match self {
102            DataType::Integer => f.write_str("Integer"),
103            DataType::Text => f.write_str("Text"),
104            DataType::Real => f.write_str("Real"),
105            DataType::Bool => f.write_str("Boolean"),
106            DataType::Vector(dim) => write!(f, "Vector({dim})"),
107            DataType::Json => f.write_str("Json"),
108            DataType::None => f.write_str("None"),
109            DataType::Invalid => f.write_str("Invalid"),
110        }
111    }
112}
113
114/// The schema for each SQL Table is represented in memory by
115/// following structure.
116///
117/// `rows` is `Arc<Mutex<...>>` rather than `Rc<RefCell<...>>` so `Table`
118/// (and by extension `Database`) is `Send + Sync` — the Tauri desktop
119/// app holds the engine in shared state behind a `Mutex<Database>`, and
120/// Tauri's state container requires its contents to be thread-safe.
121#[derive(Debug)]
122pub struct Table {
123    /// Name of the table
124    pub tb_name: String,
125    /// Schema for each column, in declaration order.
126    pub columns: Vec<Column>,
127    /// Per-column row storage, keyed by column name. Every column's
128    /// `Row::T(BTreeMap)` is keyed by rowid, so all columns share the same
129    /// keyset after each write.
130    pub rows: Arc<Mutex<HashMap<String, Row>>>,
131    /// Secondary indexes on this table (Phase 3e). One auto-created entry
132    /// per UNIQUE or PRIMARY KEY column; explicit `CREATE INDEX` statements
133    /// add more. Looking up an index: iterate by column name, or by index
134    /// name via `Table::index_by_name`.
135    pub secondary_indexes: Vec<SecondaryIndex>,
136    /// HNSW indexes on VECTOR columns (Phase 7d.2). Maintained in lockstep
137    /// with row storage on INSERT (incremental); rebuilt on open from the
138    /// persisted CREATE INDEX SQL. The graph itself is NOT yet persisted —
139    /// see Phase 7d.3 for cell-encoded graph storage.
140    pub hnsw_indexes: Vec<HnswIndexEntry>,
141    /// FTS inverted indexes on TEXT columns (Phase 8b). Maintained in
142    /// lockstep with row storage on INSERT (incremental); DELETE / UPDATE
143    /// flag `needs_rebuild` and the next save rebuilds from current rows.
144    /// The posting lists themselves are NOT yet persisted — Phase 8c
145    /// wires the cell-encoded `KIND_FTS_POSTING` storage.
146    pub fts_indexes: Vec<FtsIndexEntry>,
147    /// ROWID of most recent insert.
148    pub last_rowid: i64,
149    /// PRIMARY KEY column name, or "-1" if the table has no PRIMARY KEY.
150    pub primary_key: String,
151}
152
153/// One HNSW index attached to a table. Phase 7d.2 only supports L2
154/// distance; cosine and dot are 7d.x follow-ups (would require either
155/// distinct USING methods like `hnsw_cosine` or a `WITH (metric = …)`
156/// clause — see `docs/phase-7-plan.md` for the deferred decision).
157#[derive(Debug, Clone)]
158pub struct HnswIndexEntry {
159    /// User-supplied name from `CREATE INDEX <name> …`. Unique across
160    /// both `secondary_indexes` and `hnsw_indexes` on a given table.
161    pub name: String,
162    /// The VECTOR column this index covers.
163    pub column_name: String,
164    /// The graph itself.
165    pub index: HnswIndex,
166    /// Phase 7d.3 — true iff a DELETE or UPDATE-on-vector-col has
167    /// invalidated the graph since the last rebuild. INSERT maintains
168    /// the graph incrementally and leaves this false. The next save
169    /// rebuilds dirty indexes from current rows before serializing.
170    pub needs_rebuild: bool,
171}
172
173/// One FTS index attached to a table (Phase 8b). The inverted index
174/// itself is a [`PostingList`]; metadata (name, column, dirty flag)
175/// lives here. Mirrors [`HnswIndexEntry`] field-for-field so the
176/// rebuild-on-save and DELETE/UPDATE invalidation paths can use one
177/// pattern across both index families.
178#[derive(Debug, Clone)]
179pub struct FtsIndexEntry {
180    /// User-supplied name from `CREATE INDEX <name> … USING fts(<col>)`.
181    /// Unique across `secondary_indexes`, `hnsw_indexes`, and
182    /// `fts_indexes` on a given table.
183    pub name: String,
184    /// The TEXT column this index covers.
185    pub column_name: String,
186    /// The inverted index + per-doc length cache.
187    pub index: PostingList,
188    /// True iff a DELETE or UPDATE-on-text-col has invalidated the
189    /// posting lists since the last rebuild. INSERT maintains the
190    /// index incrementally and leaves this false. The next save
191    /// rebuilds dirty indexes from current rows before serializing
192    /// (mirrors HNSW's Q7 strategy).
193    pub needs_rebuild: bool,
194}
195
196impl Table {
197    pub fn new(create_query: CreateQuery) -> Self {
198        let table_name = create_query.table_name;
199        let mut primary_key: String = String::from("-1");
200        let columns = create_query.columns;
201
202        let mut table_cols: Vec<Column> = vec![];
203        let table_rows: Arc<Mutex<HashMap<String, Row>>> = Arc::new(Mutex::new(HashMap::new()));
204        let mut secondary_indexes: Vec<SecondaryIndex> = Vec::new();
205        for col in &columns {
206            let col_name = &col.name;
207            if col.is_pk {
208                primary_key = col_name.to_string();
209            }
210            table_cols.push(Column::with_default(
211                col_name.to_string(),
212                col.datatype.to_string(),
213                col.is_pk,
214                col.not_null,
215                col.is_unique,
216                col.default.clone(),
217            ));
218
219            let dt = DataType::new(col.datatype.to_string());
220            let row_storage = match &dt {
221                DataType::Integer => Row::Integer(BTreeMap::new()),
222                DataType::Real => Row::Real(BTreeMap::new()),
223                DataType::Text => Row::Text(BTreeMap::new()),
224                DataType::Bool => Row::Bool(BTreeMap::new()),
225                // The dimension is enforced at INSERT time against the
226                // column's declared DataType::Vector(dim). The Row variant
227                // itself doesn't carry the dim — every stored Vec<f32>
228                // already has it via .len().
229                DataType::Vector(_dim) => Row::Vector(BTreeMap::new()),
230                // Phase 7e — JSON columns reuse Text storage (with
231                // INSERT-time validation that the bytes parse as JSON).
232                // No new Row variant; json_extract / json_type / etc.
233                // re-parse from text on demand. See `docs/phase-7-plan.md`
234                // Q3's scope-correction note for the storage choice.
235                DataType::Json => Row::Text(BTreeMap::new()),
236                DataType::Invalid | DataType::None => Row::None,
237            };
238            table_rows
239                .lock()
240                .expect("Table row storage mutex poisoned")
241                .insert(col.name.to_string(), row_storage);
242
243            // Auto-create an index for every UNIQUE / PRIMARY KEY column,
244            // but only for types we know how to index. Real / Bool / Vector
245            // UNIQUE columns fall back to the linear scan path in
246            // validate_unique_constraint — same behavior as before 3e.
247            // (Vector UNIQUE is unusual; the linear-scan path will work
248            // via Value::Vector PartialEq, just at O(N) cost.)
249            if (col.is_pk || col.is_unique) && matches!(dt, DataType::Integer | DataType::Text) {
250                let name = SecondaryIndex::auto_name(&table_name, &col.name);
251                match SecondaryIndex::new(
252                    name,
253                    table_name.clone(),
254                    col.name.clone(),
255                    &dt,
256                    true,
257                    IndexOrigin::Auto,
258                ) {
259                    Ok(idx) => secondary_indexes.push(idx),
260                    Err(_) => {
261                        // Unreachable given the matches! guard above, but
262                        // the builder returns Result so we keep the arm.
263                    }
264                }
265            }
266        }
267
268        Table {
269            tb_name: table_name,
270            columns: table_cols,
271            rows: table_rows,
272            secondary_indexes,
273            // HNSW indexes only land via explicit CREATE INDEX … USING hnsw
274            // statements (Phase 7d.2); never auto-created at CREATE TABLE
275            // time, because there's no UNIQUE-style constraint that
276            // implies a vector index.
277            hnsw_indexes: Vec::new(),
278            // Same story for FTS indexes — explicit `CREATE INDEX … USING
279            // fts(<col>)` only (Phase 8b).
280            fts_indexes: Vec::new(),
281            last_rowid: 0,
282            primary_key,
283        }
284    }
285
286    /// Deep-clones a `Table` for transaction snapshots (Phase 4f).
287    ///
288    /// The normal `Clone` derive would shallow-clone the `Arc<Mutex<_>>`
289    /// wrapping our row storage, leaving both copies sharing the same
290    /// inner map — mutating the snapshot would corrupt the live table
291    /// and vice versa. Instead we lock, clone the inner `HashMap`, and
292    /// wrap it in a fresh `Arc<Mutex<_>>`. Columns and indexes derive
293    /// `Clone` directly (all their fields are plain data).
294    pub fn deep_clone(&self) -> Self {
295        let cloned_rows: HashMap<String, Row> = {
296            let guard = self.rows.lock().expect("row mutex poisoned");
297            guard.clone()
298        };
299        Table {
300            tb_name: self.tb_name.clone(),
301            columns: self.columns.clone(),
302            rows: Arc::new(Mutex::new(cloned_rows)),
303            secondary_indexes: self.secondary_indexes.clone(),
304            // HnswIndexEntry derives Clone, so the snapshot owns its own
305            // graph copy. Phase 4f's snapshot-rollback semantics require
306            // the snapshot to be fully decoupled from live state.
307            hnsw_indexes: self.hnsw_indexes.clone(),
308            // Same fully-decoupled clone for FTS indexes (Phase 8b).
309            fts_indexes: self.fts_indexes.clone(),
310            last_rowid: self.last_rowid,
311            primary_key: self.primary_key.clone(),
312        }
313    }
314
315    /// Finds an auto- or explicit-index entry for a given column. Returns
316    /// `None` if the column isn't indexed.
317    pub fn index_for_column(&self, column: &str) -> Option<&SecondaryIndex> {
318        self.secondary_indexes
319            .iter()
320            .find(|i| i.column_name == column)
321    }
322
323    fn index_for_column_mut(&mut self, column: &str) -> Option<&mut SecondaryIndex> {
324        self.secondary_indexes
325            .iter_mut()
326            .find(|i| i.column_name == column)
327    }
328
329    /// Finds a secondary index by its own name (e.g., `sqlrite_autoindex_users_email`
330    /// or a user-provided CREATE INDEX name). Used by DROP INDEX and the
331    /// rename helpers below.
332    pub fn index_by_name(&self, name: &str) -> Option<&SecondaryIndex> {
333        self.secondary_indexes.iter().find(|i| i.name == name)
334    }
335
336    /// Renames a column in place. Updates row storage, the `Column`
337    /// metadata, every secondary / HNSW / FTS index whose `column_name`
338    /// matches, the `primary_key` pointer if the renamed column is the
339    /// PK, and any auto-index name that embedded the old column name.
340    ///
341    /// Caller-side validation (table existence, source-column existence
342    /// at the surface level, IF EXISTS) lives in the executor; this
343    /// method enforces the column-level invariants that have to be
344    /// checked under the `Table` borrow anyway.
345    pub fn rename_column(&mut self, old: &str, new: &str) -> Result<()> {
346        if !self.columns.iter().any(|c| c.column_name == old) {
347            return Err(SQLRiteError::General(format!(
348                "column '{old}' does not exist in table '{}'",
349                self.tb_name
350            )));
351        }
352        if old != new && self.columns.iter().any(|c| c.column_name == new) {
353            return Err(SQLRiteError::General(format!(
354                "column '{new}' already exists in table '{}'",
355                self.tb_name
356            )));
357        }
358        if old == new {
359            return Ok(());
360        }
361
362        for col in self.columns.iter_mut() {
363            if col.column_name == old {
364                col.column_name = new.to_string();
365            }
366        }
367
368        // Re-key the per-column row map.
369        {
370            let mut rows = self.rows.lock().expect("rows mutex poisoned");
371            if let Some(storage) = rows.remove(old) {
372                rows.insert(new.to_string(), storage);
373            }
374        }
375
376        if self.primary_key == old {
377            self.primary_key = new.to_string();
378        }
379
380        let table_name = self.tb_name.clone();
381        for idx in self.secondary_indexes.iter_mut() {
382            if idx.column_name == old {
383                idx.column_name = new.to_string();
384                if idx.origin == IndexOrigin::Auto
385                    && idx.name == SecondaryIndex::auto_name(&table_name, old)
386                {
387                    idx.name = SecondaryIndex::auto_name(&table_name, new);
388                }
389            }
390        }
391        for entry in self.hnsw_indexes.iter_mut() {
392            if entry.column_name == old {
393                entry.column_name = new.to_string();
394            }
395        }
396        for entry in self.fts_indexes.iter_mut() {
397            if entry.column_name == old {
398                entry.column_name = new.to_string();
399            }
400        }
401
402        Ok(())
403    }
404
405    /// Appends a new column to this table from a parsed column spec.
406    /// The new column's row storage is allocated empty; existing rowids
407    /// read NULL for the new column unless `parsed.default` is set, in
408    /// which case those rowids are backfilled with the default value.
409    ///
410    /// Rejects PK / UNIQUE on the added column (would require
411    /// backfill-with-uniqueness-check against existing rows). Rejects
412    /// NOT NULL without DEFAULT on a non-empty table — same rule SQLite
413    /// applies, and necessary because we have no other backfill source.
414    pub fn add_column(&mut self, parsed: ParsedColumn) -> Result<()> {
415        if self.contains_column(parsed.name.clone()) {
416            return Err(SQLRiteError::General(format!(
417                "column '{}' already exists in table '{}'",
418                parsed.name, self.tb_name
419            )));
420        }
421        if parsed.is_pk {
422            return Err(SQLRiteError::General(
423                "cannot ADD COLUMN with PRIMARY KEY constraint on existing table".to_string(),
424            ));
425        }
426        if parsed.is_unique {
427            return Err(SQLRiteError::General(
428                "cannot ADD COLUMN with UNIQUE constraint on existing table".to_string(),
429            ));
430        }
431        let table_has_rows = self
432            .columns
433            .first()
434            .map(|c| {
435                self.rows
436                    .lock()
437                    .expect("rows mutex poisoned")
438                    .get(&c.column_name)
439                    .map(|r| r.rowids().len())
440                    .unwrap_or(0)
441                    > 0
442            })
443            .unwrap_or(false);
444        if parsed.not_null && parsed.default.is_none() && table_has_rows {
445            return Err(SQLRiteError::General(format!(
446                "cannot ADD COLUMN '{}' NOT NULL without DEFAULT to a non-empty table",
447                parsed.name
448            )));
449        }
450
451        let new_column = Column::with_default(
452            parsed.name.clone(),
453            parsed.datatype.clone(),
454            parsed.is_pk,
455            parsed.not_null,
456            parsed.is_unique,
457            parsed.default.clone(),
458        );
459
460        // Allocate empty row storage for the new column. Mirrors the
461        // dispatch in `Table::new` so the new column behaves identically
462        // to one declared at CREATE TABLE time.
463        let row_storage = match &new_column.datatype {
464            DataType::Integer => Row::Integer(BTreeMap::new()),
465            DataType::Real => Row::Real(BTreeMap::new()),
466            DataType::Text => Row::Text(BTreeMap::new()),
467            DataType::Bool => Row::Bool(BTreeMap::new()),
468            DataType::Vector(_dim) => Row::Vector(BTreeMap::new()),
469            DataType::Json => Row::Text(BTreeMap::new()),
470            DataType::Invalid | DataType::None => Row::None,
471        };
472        {
473            let mut rows = self.rows.lock().expect("rows mutex poisoned");
474            rows.insert(parsed.name.clone(), row_storage);
475        }
476
477        // Backfill existing rowids with the default value, if any.
478        // NULL defaults are a no-op — a missing key in the BTreeMap reads
479        // as NULL anyway. Type mismatches were caught at `parse_one_column`
480        // time when the DEFAULT was evaluated against the declared
481        // datatype; reaching the `_` arm here would indicate a bug.
482        if let Some(default) = &parsed.default {
483            let existing_rowids = self.rowids();
484            let mut rows = self.rows.lock().expect("rows mutex poisoned");
485            let storage = rows.get_mut(&parsed.name).expect("just inserted");
486            match (storage, default) {
487                (Row::Integer(tree), Value::Integer(v)) => {
488                    let v32 = *v as i32;
489                    for rowid in existing_rowids {
490                        tree.insert(rowid, v32);
491                    }
492                }
493                (Row::Real(tree), Value::Real(v)) => {
494                    let v32 = *v as f32;
495                    for rowid in existing_rowids {
496                        tree.insert(rowid, v32);
497                    }
498                }
499                (Row::Text(tree), Value::Text(v)) => {
500                    for rowid in existing_rowids {
501                        tree.insert(rowid, v.clone());
502                    }
503                }
504                (Row::Bool(tree), Value::Bool(v)) => {
505                    for rowid in existing_rowids {
506                        tree.insert(rowid, *v);
507                    }
508                }
509                (_, Value::Null) => {} // no-op
510                (storage_ref, _) => {
511                    return Err(SQLRiteError::Internal(format!(
512                        "DEFAULT type does not match column storage for '{}': storage variant {:?}, default {:?}",
513                        parsed.name,
514                        std::mem::discriminant(storage_ref),
515                        default
516                    )));
517                }
518            }
519        }
520
521        self.columns.push(new_column);
522        Ok(())
523    }
524
525    /// Removes a column from this table. Refuses to drop the PRIMARY KEY
526    /// column or the only remaining column. Cascades to every index
527    /// (auto, explicit, HNSW, FTS) that referenced the column.
528    pub fn drop_column(&mut self, name: &str) -> Result<()> {
529        if !self.contains_column(name.to_string()) {
530            return Err(SQLRiteError::General(format!(
531                "column '{name}' does not exist in table '{}'",
532                self.tb_name
533            )));
534        }
535        if self.primary_key == name {
536            return Err(SQLRiteError::General(format!(
537                "cannot drop primary key column '{name}'"
538            )));
539        }
540        if self.columns.len() == 1 {
541            return Err(SQLRiteError::General(format!(
542                "cannot drop the only column of table '{}'",
543                self.tb_name
544            )));
545        }
546
547        self.columns.retain(|c| c.column_name != name);
548        {
549            let mut rows = self.rows.lock().expect("rows mutex poisoned");
550            rows.remove(name);
551        }
552        self.secondary_indexes.retain(|i| i.column_name != name);
553        self.hnsw_indexes.retain(|i| i.column_name != name);
554        self.fts_indexes.retain(|i| i.column_name != name);
555
556        Ok(())
557    }
558
559    /// Returns a `bool` informing if a `Column` with a specific name exists or not
560    ///
561    pub fn contains_column(&self, column: String) -> bool {
562        self.columns.iter().any(|col| col.column_name == column)
563    }
564
565    /// Returns the list of column names in declaration order.
566    pub fn column_names(&self) -> Vec<String> {
567        self.columns.iter().map(|c| c.column_name.clone()).collect()
568    }
569
570    /// Returns all rowids currently stored in the table, in ascending order.
571    /// Every column's BTreeMap has the same keyset, so we just read from the first column.
572    pub fn rowids(&self) -> Vec<i64> {
573        let Some(first) = self.columns.first() else {
574            return vec![];
575        };
576        let rows = self.rows.lock().expect("rows mutex poisoned");
577        rows.get(&first.column_name)
578            .map(|r| r.rowids())
579            .unwrap_or_default()
580    }
581
582    /// Reads a single cell at `(column, rowid)`.
583    pub fn get_value(&self, column: &str, rowid: i64) -> Option<Value> {
584        let rows = self.rows.lock().expect("rows mutex poisoned");
585        rows.get(column).and_then(|r| r.get(rowid))
586    }
587
588    /// Removes the row identified by `rowid` from every column's storage and
589    /// from every secondary index entry.
590    pub fn delete_row(&mut self, rowid: i64) {
591        // Snapshot the values we're about to delete so we can strip them
592        // from secondary indexes by (value, rowid) before the row storage
593        // disappears.
594        let per_column_values: Vec<(String, Option<Value>)> = self
595            .columns
596            .iter()
597            .map(|c| (c.column_name.clone(), self.get_value(&c.column_name, rowid)))
598            .collect();
599
600        // Remove from row storage.
601        {
602            let rows_clone = Arc::clone(&self.rows);
603            let mut row_data = rows_clone.lock().expect("rows mutex poisoned");
604            for col in &self.columns {
605                if let Some(r) = row_data.get_mut(&col.column_name) {
606                    match r {
607                        Row::Integer(m) => {
608                            m.remove(&rowid);
609                        }
610                        Row::Text(m) => {
611                            m.remove(&rowid);
612                        }
613                        Row::Real(m) => {
614                            m.remove(&rowid);
615                        }
616                        Row::Bool(m) => {
617                            m.remove(&rowid);
618                        }
619                        Row::Vector(m) => {
620                            m.remove(&rowid);
621                        }
622                        Row::None => {}
623                    }
624                }
625            }
626        }
627
628        // Strip secondary-index entries. Non-indexed columns just don't
629        // show up in secondary_indexes and are no-ops here.
630        for (col_name, value) in per_column_values {
631            if let Some(idx) = self.index_for_column_mut(&col_name) {
632                if let Some(v) = value {
633                    idx.remove(&v, rowid);
634                }
635            }
636        }
637    }
638
639    /// Replays a single row at `rowid` when loading a table from disk. Takes
640    /// one typed value per column (in declaration order); `None` means the
641    /// stored cell carried a NULL for that column. Unlike `insert_row` this
642    /// trusts the on-disk state and does *not* re-check UNIQUE — we're
643    /// rebuilding a state that was already consistent when it was saved.
644    pub fn restore_row(&mut self, rowid: i64, values: Vec<Option<Value>>) -> Result<()> {
645        if values.len() != self.columns.len() {
646            return Err(SQLRiteError::Internal(format!(
647                "cell has {} values but table '{}' has {} columns",
648                values.len(),
649                self.tb_name,
650                self.columns.len()
651            )));
652        }
653
654        let column_names: Vec<String> =
655            self.columns.iter().map(|c| c.column_name.clone()).collect();
656
657        for (i, value) in values.into_iter().enumerate() {
658            let col_name = &column_names[i];
659
660            // Write into the per-column row storage first (scoped borrow so
661            // the secondary-index update below doesn't fight over `self`).
662            {
663                let rows_clone = Arc::clone(&self.rows);
664                let mut row_data = rows_clone.lock().expect("rows mutex poisoned");
665                let cell = row_data.get_mut(col_name).ok_or_else(|| {
666                    SQLRiteError::Internal(format!("Row storage missing for column '{col_name}'"))
667                })?;
668
669                match (cell, &value) {
670                    // SQL NULL: leave the per-column BTreeMap entry
671                    // absent. `Row::*::get` returns `None` for missing
672                    // rowids, which `Table::get_value` relays and the
673                    // executor's `Identifier` arm renders as
674                    // `Value::Null`. Mirrors `insert_row`'s NULL path.
675                    (_, None) => { /* nothing to insert */ }
676                    (Row::Integer(map), Some(Value::Integer(v))) => {
677                        map.insert(rowid, *v as i32);
678                    }
679                    (Row::Text(map), Some(Value::Text(s))) => {
680                        map.insert(rowid, s.clone());
681                    }
682                    (Row::Real(map), Some(Value::Real(v))) => {
683                        map.insert(rowid, *v as f32);
684                    }
685                    (Row::Bool(map), Some(Value::Bool(v))) => {
686                        map.insert(rowid, *v);
687                    }
688                    (Row::Vector(map), Some(Value::Vector(v))) => {
689                        map.insert(rowid, v.clone());
690                    }
691                    (row, v) => {
692                        return Err(SQLRiteError::Internal(format!(
693                            "Type mismatch restoring column '{col_name}': storage {row:?} vs value {v:?}"
694                        )));
695                    }
696                }
697            }
698
699            // Maintain the secondary index (if any). NULL values are skipped
700            // by `insert`, matching the "NULL is not indexed" convention.
701            if let Some(v) = &value {
702                if let Some(idx) = self.index_for_column_mut(col_name) {
703                    idx.insert(v, rowid)?;
704                }
705            }
706        }
707
708        if rowid > self.last_rowid {
709            self.last_rowid = rowid;
710        }
711        Ok(())
712    }
713
714    /// Extracts a row as an ordered `Vec<Option<Value>>` matching the column
715    /// declaration order. Returns `None` entries for columns that hold NULL.
716    /// Used by `save_database` to turn a table's in-memory state into cells.
717    pub fn extract_row(&self, rowid: i64) -> Vec<Option<Value>> {
718        self.columns
719            .iter()
720            .map(|c| match self.get_value(&c.column_name, rowid) {
721                Some(Value::Null) => None,
722                Some(v) => Some(v),
723                None => None,
724            })
725            .collect()
726    }
727
728    /// Overwrites the cell at `(column, rowid)` with `new_val`. Enforces the
729    /// column's datatype and UNIQUE constraint, and updates any secondary
730    /// index.
731    ///
732    /// Returns `Err` if the column doesn't exist, the value type is incompatible,
733    /// or writing would violate UNIQUE.
734    pub fn set_value(&mut self, column: &str, rowid: i64, new_val: Value) -> Result<()> {
735        let col_index = self
736            .columns
737            .iter()
738            .position(|c| c.column_name == column)
739            .ok_or_else(|| SQLRiteError::General(format!("Column '{column}' not found")))?;
740
741        // No-op write — keep storage exactly the same.
742        let current = self.get_value(column, rowid);
743        if current.as_ref() == Some(&new_val) {
744            return Ok(());
745        }
746
747        // Enforce UNIQUE. Prefer an O(log N) index probe if we have one;
748        // fall back to a full column scan otherwise (Real/Bool UNIQUE
749        // columns, which don't get auto-indexed).
750        if self.columns[col_index].is_unique && !matches!(new_val, Value::Null) {
751            if let Some(idx) = self.index_for_column(column) {
752                for other in idx.lookup(&new_val) {
753                    if other != rowid {
754                        return Err(SQLRiteError::General(format!(
755                            "UNIQUE constraint violated for column '{column}'"
756                        )));
757                    }
758                }
759            } else {
760                for other in self.rowids() {
761                    if other == rowid {
762                        continue;
763                    }
764                    if self.get_value(column, other).as_ref() == Some(&new_val) {
765                        return Err(SQLRiteError::General(format!(
766                            "UNIQUE constraint violated for column '{column}'"
767                        )));
768                    }
769                }
770            }
771        }
772
773        // Drop the old index entry before writing the new value, so the
774        // post-write index insert doesn't clash with the previous state.
775        if let Some(old) = current {
776            if let Some(idx) = self.index_for_column_mut(column) {
777                idx.remove(&old, rowid);
778            }
779        }
780
781        // Write into the column's Row, type-checking against the declared DataType.
782        let declared = &self.columns[col_index].datatype;
783        {
784            let rows_clone = Arc::clone(&self.rows);
785            let mut row_data = rows_clone.lock().expect("rows mutex poisoned");
786            let cell = row_data.get_mut(column).ok_or_else(|| {
787                SQLRiteError::Internal(format!("Row storage missing for column '{column}'"))
788            })?;
789
790            match (cell, &new_val, declared) {
791                (Row::Integer(m), Value::Integer(v), _) => {
792                    m.insert(rowid, *v as i32);
793                }
794                (Row::Real(m), Value::Real(v), _) => {
795                    m.insert(rowid, *v as f32);
796                }
797                (Row::Real(m), Value::Integer(v), _) => {
798                    m.insert(rowid, *v as f32);
799                }
800                (Row::Text(m), Value::Text(v), dt) => {
801                    // Phase 7e — UPDATE on a JSON column also validates
802                    // the new text is well-formed JSON, mirroring INSERT.
803                    if matches!(dt, DataType::Json) {
804                        if let Err(e) = serde_json::from_str::<serde_json::Value>(v) {
805                            return Err(SQLRiteError::General(format!(
806                                "Type mismatch: expected JSON for column '{column}', got '{v}': {e}"
807                            )));
808                        }
809                    }
810                    m.insert(rowid, v.clone());
811                }
812                (Row::Bool(m), Value::Bool(v), _) => {
813                    m.insert(rowid, *v);
814                }
815                (Row::Vector(m), Value::Vector(v), DataType::Vector(declared_dim)) => {
816                    if v.len() != *declared_dim {
817                        return Err(SQLRiteError::General(format!(
818                            "Vector dimension mismatch for column '{column}': declared {declared_dim}, got {}",
819                            v.len()
820                        )));
821                    }
822                    m.insert(rowid, v.clone());
823                }
824                // NULL writes: store the sentinel "Null" string for Text; for other
825                // types we leave storage as-is since those BTreeMaps can't hold NULL today.
826                (Row::Text(m), Value::Null, _) => {
827                    m.insert(rowid, "Null".to_string());
828                }
829                (_, new, dt) => {
830                    return Err(SQLRiteError::General(format!(
831                        "Type mismatch: cannot assign {} to column '{column}' of type {dt}",
832                        new.to_display_string()
833                    )));
834                }
835            }
836        }
837
838        // Maintain the secondary index, if any. NULL values are skipped by
839        // insert per convention.
840        if !matches!(new_val, Value::Null) {
841            if let Some(idx) = self.index_for_column_mut(column) {
842                idx.insert(&new_val, rowid)?;
843            }
844        }
845
846        Ok(())
847    }
848
849    /// Returns an immutable reference of `sql::db::table::Column` if the table contains a
850    /// column with the specified key as a column name.
851    ///
852    #[allow(dead_code)]
853    pub fn get_column(&mut self, column_name: String) -> Result<&Column> {
854        if let Some(column) = self
855            .columns
856            .iter()
857            .filter(|c| c.column_name == column_name)
858            .collect::<Vec<&Column>>()
859            .first()
860        {
861            Ok(column)
862        } else {
863            Err(SQLRiteError::General(String::from("Column not found.")))
864        }
865    }
866
867    /// Validates if columns and values being inserted violate the UNIQUE constraint.
868    /// PRIMARY KEY columns are automatically UNIQUE. Uses the corresponding
869    /// secondary index when one exists (O(log N) lookup); falls back to a
870    /// linear scan for indexable-but-not-indexed situations (e.g. a Real
871    /// UNIQUE column — Real isn't in the auto-indexed set).
872    pub fn validate_unique_constraint(
873        &mut self,
874        cols: &Vec<String>,
875        values: &Vec<Option<Value>>,
876    ) -> Result<()> {
877        for (idx, name) in cols.iter().enumerate() {
878            let column = self
879                .columns
880                .iter()
881                .find(|c| &c.column_name == name)
882                .ok_or_else(|| SQLRiteError::General(format!("Column '{name}' not found")))?;
883            if !column.is_unique {
884                continue;
885            }
886            let datatype = &column.datatype;
887
888            // Standard SQL UNIQUE allows multiple NULLs — skip the check.
889            let supplied = match &values[idx] {
890                None => continue,
891                Some(v) => v,
892            };
893
894            // Type-check the supplied Value against the column's declared
895            // datatype. Same shape as the dispatch in `insert_row`: an
896            // INTEGER column accepts Value::Integer; REAL accepts Real or
897            // widens Integer; TEXT/JSON accepts Text; BOOL accepts Bool;
898            // VECTOR accepts Vector with a matching dimension. Anything
899            // else short-circuits the insert with the same error message
900            // `insert_row` would emit for the same input.
901            let parsed: Value = match (datatype, supplied) {
902                (DataType::Integer, Value::Integer(n)) => Value::Integer(*n),
903                (DataType::Integer, other) => {
904                    return Err(SQLRiteError::General(format!(
905                        "Type mismatch: expected INTEGER for column '{name}', got '{}'",
906                        other.to_display_string()
907                    )));
908                }
909                (DataType::Text, Value::Text(s)) => Value::Text(s.clone()),
910                (DataType::Text, other) => {
911                    return Err(SQLRiteError::General(format!(
912                        "Type mismatch: expected TEXT for column '{name}', got '{}'",
913                        other.to_display_string()
914                    )));
915                }
916                (DataType::Real, Value::Real(f)) => Value::Real(*f),
917                (DataType::Real, Value::Integer(n)) => Value::Real(*n as f64),
918                (DataType::Real, other) => {
919                    return Err(SQLRiteError::General(format!(
920                        "Type mismatch: expected REAL for column '{name}', got '{}'",
921                        other.to_display_string()
922                    )));
923                }
924                (DataType::Bool, Value::Bool(b)) => Value::Bool(*b),
925                (DataType::Bool, other) => {
926                    return Err(SQLRiteError::General(format!(
927                        "Type mismatch: expected BOOL for column '{name}', got '{}'",
928                        other.to_display_string()
929                    )));
930                }
931                (DataType::Vector(declared_dim), Value::Vector(parsed_vec)) => {
932                    if parsed_vec.len() != *declared_dim {
933                        return Err(SQLRiteError::General(format!(
934                            "Vector dimension mismatch for column '{name}': declared {declared_dim}, got {}",
935                            parsed_vec.len()
936                        )));
937                    }
938                    Value::Vector(parsed_vec.clone())
939                }
940                (DataType::Vector(_), other) => {
941                    return Err(SQLRiteError::General(format!(
942                        "Type mismatch: expected VECTOR for column '{name}', got '{}'",
943                        other.to_display_string()
944                    )));
945                }
946                (DataType::Json, Value::Text(s)) => {
947                    // JSON values stored as Text. UNIQUE on a JSON column
948                    // compares the canonical text representation
949                    // verbatim — `{"a": 1}` and `{"a":1}` are distinct.
950                    // Document this if anyone actually requests UNIQUE
951                    // JSON; for MVP, treat-as-text is fine.
952                    Value::Text(s.clone())
953                }
954                (DataType::Json, other) => {
955                    return Err(SQLRiteError::General(format!(
956                        "Type mismatch: expected JSON for column '{name}', got '{}'",
957                        other.to_display_string()
958                    )));
959                }
960                (DataType::None | DataType::Invalid, _) => {
961                    return Err(SQLRiteError::Internal(format!(
962                        "column '{name}' has an unsupported datatype"
963                    )));
964                }
965            };
966
967            if let Some(secondary) = self.index_for_column(name) {
968                if secondary.would_violate_unique(&parsed) {
969                    return Err(SQLRiteError::General(format!(
970                        "UNIQUE constraint violated for column '{name}': value '{}' already exists",
971                        parsed.to_display_string()
972                    )));
973                }
974            } else {
975                // No secondary index (Real / Bool UNIQUE). Linear scan.
976                for other in self.rowids() {
977                    if self.get_value(name, other).as_ref() == Some(&parsed) {
978                        return Err(SQLRiteError::General(format!(
979                            "UNIQUE constraint violated for column '{name}': value '{}' already exists",
980                            parsed.to_display_string()
981                        )));
982                    }
983                }
984            }
985        }
986        Ok(())
987    }
988
989    /// Inserts all VALUES in its approprieta COLUMNS, using the ROWID an embedded INDEX on all ROWS
990    /// Every `Table` keeps track of the `last_rowid` in order to facilitate what the next one would be.
991    /// One limitation of this data structure is that we can only have one write transaction at a time, otherwise
992    /// we could have a race condition on the last_rowid.
993    ///
994    /// Since we are loosely modeling after SQLite, this is also a limitation of SQLite (allowing only one write transcation at a time),
995    /// So we are good. :)
996    ///
997    /// Returns `Err` (leaving the table unchanged) when the user supplies an
998    /// incompatibly-typed value — no more panics on bad input.
999    pub fn insert_row(&mut self, cols: &Vec<String>, values: &Vec<Option<Value>>) -> Result<()> {
1000        let mut next_rowid = self.last_rowid + 1;
1001
1002        // Auto-assign INTEGER PRIMARY KEY when the user omits it; otherwise
1003        // adopt the supplied value as the new rowid.
1004        if self.primary_key != "-1" {
1005            if !cols.iter().any(|col| col == &self.primary_key) {
1006                // Write the auto-assigned PK into row storage, then sync
1007                // the secondary index.
1008                let val = next_rowid as i32;
1009                let wrote_integer = {
1010                    let rows_clone = Arc::clone(&self.rows);
1011                    let mut row_data = rows_clone.lock().expect("rows mutex poisoned");
1012                    let table_col_data = row_data.get_mut(&self.primary_key).ok_or_else(|| {
1013                        SQLRiteError::Internal(format!(
1014                            "Row storage missing for primary key column '{}'",
1015                            self.primary_key
1016                        ))
1017                    })?;
1018                    match table_col_data {
1019                        Row::Integer(tree) => {
1020                            tree.insert(next_rowid, val);
1021                            true
1022                        }
1023                        _ => false, // non-integer PK: auto-assign is a no-op
1024                    }
1025                };
1026                if wrote_integer {
1027                    let pk = self.primary_key.clone();
1028                    if let Some(idx) = self.index_for_column_mut(&pk) {
1029                        idx.insert(&Value::Integer(val as i64), next_rowid)?;
1030                    }
1031                }
1032            } else {
1033                for i in 0..cols.len() {
1034                    if cols[i] == self.primary_key {
1035                        next_rowid = match &values[i] {
1036                            Some(Value::Integer(n)) => *n,
1037                            None => {
1038                                return Err(SQLRiteError::General(format!(
1039                                    "Type mismatch: PRIMARY KEY column '{}' cannot be NULL",
1040                                    self.primary_key
1041                                )));
1042                            }
1043                            Some(other) => {
1044                                return Err(SQLRiteError::General(format!(
1045                                    "Type mismatch: PRIMARY KEY column '{}' expects INTEGER, got '{}'",
1046                                    self.primary_key,
1047                                    other.to_display_string()
1048                                )));
1049                            }
1050                        };
1051                    }
1052                }
1053            }
1054        }
1055
1056        // For every table column, either pick the supplied value or pad with NULL
1057        // so that every column's BTreeMap keeps the same rowid keyset.
1058        let column_names = self
1059            .columns
1060            .iter()
1061            .map(|col| col.column_name.to_string())
1062            .collect::<Vec<String>>();
1063        let mut j: usize = 0;
1064        for i in 0..column_names.len() {
1065            // `None` means SQL NULL: leave the column's BTreeMap entry
1066            // absent so reads come back as Value::Null via the missing-
1067            // rowid path.
1068            let mut val: Option<Value> = None;
1069            let key = &column_names[i];
1070            let mut column_supplied = false;
1071
1072            if let Some(supplied_key) = cols.get(j) {
1073                if supplied_key == &column_names[i] {
1074                    val = values[j].clone();
1075                    column_supplied = true;
1076                    j += 1;
1077                } else if self.primary_key == column_names[i] {
1078                    // PK already stored in the auto-assign branch above.
1079                    continue;
1080                }
1081            } else if self.primary_key == column_names[i] {
1082                continue;
1083            }
1084
1085            // Column was omitted from the INSERT column list. Substitute its
1086            // DEFAULT literal if one was declared at CREATE TABLE time;
1087            // otherwise it stays as None. SQLite semantics: an *explicit*
1088            // NULL is preserved as NULL — the default only fires for
1089            // omitted columns. `DEFAULT NULL` is treated as no default.
1090            if !column_supplied {
1091                val = self.columns[i]
1092                    .default
1093                    .clone()
1094                    .filter(|v| !matches!(v, Value::Null));
1095            }
1096
1097            // Step 1: write into row storage and compute the typed Value
1098            // we'll hand to the secondary index (if any).
1099            let typed_value: Option<Value> = {
1100                let rows_clone = Arc::clone(&self.rows);
1101                let mut row_data = rows_clone.lock().expect("rows mutex poisoned");
1102                let table_col_data = row_data.get_mut(key).ok_or_else(|| {
1103                    SQLRiteError::Internal(format!("Row storage missing for column '{key}'"))
1104                })?;
1105
1106                match (table_col_data, &val) {
1107                    // SQL NULL: leave the BTreeMap entry absent. Indexes are
1108                    // skipped (Step 2 below short-circuits on None).
1109                    (_, None) => None,
1110
1111                    (Row::Integer(tree), Some(Value::Integer(n))) => {
1112                        tree.insert(next_rowid, *n as i32);
1113                        Some(Value::Integer(*n))
1114                    }
1115                    (Row::Integer(_), Some(other)) => {
1116                        return Err(SQLRiteError::General(format!(
1117                            "Type mismatch: expected INTEGER for column '{key}', got '{}'",
1118                            other.to_display_string()
1119                        )));
1120                    }
1121
1122                    (Row::Text(tree), Some(Value::Text(s))) => {
1123                        // Phase 7e — JSON columns share Row::Text storage.
1124                        // Validate the value parses as JSON before storing;
1125                        // otherwise we'd happily write `not-json-at-all`
1126                        // and only fail when json_extract tried to parse
1127                        // it later.
1128                        if matches!(self.columns[i].datatype, DataType::Json) {
1129                            if let Err(e) = serde_json::from_str::<serde_json::Value>(s) {
1130                                return Err(SQLRiteError::General(format!(
1131                                    "Type mismatch: expected JSON for column '{key}', got '{s}': {e}"
1132                                )));
1133                            }
1134                        }
1135                        tree.insert(next_rowid, s.clone());
1136                        Some(Value::Text(s.clone()))
1137                    }
1138                    (Row::Text(_), Some(other)) => {
1139                        let label = if matches!(self.columns[i].datatype, DataType::Json) {
1140                            "JSON"
1141                        } else {
1142                            "TEXT"
1143                        };
1144                        return Err(SQLRiteError::General(format!(
1145                            "Type mismatch: expected {label} for column '{key}', got '{}'",
1146                            other.to_display_string()
1147                        )));
1148                    }
1149
1150                    (Row::Real(tree), Some(Value::Real(f))) => {
1151                        let f32_val = *f as f32;
1152                        tree.insert(next_rowid, f32_val);
1153                        Some(Value::Real(*f))
1154                    }
1155                    // Allow integer literals to widen into REAL columns
1156                    // (matches the previous string-parse behavior where
1157                    // `INSERT … VALUES (42)` into a REAL column worked).
1158                    (Row::Real(tree), Some(Value::Integer(n))) => {
1159                        let f32_val = *n as f32;
1160                        tree.insert(next_rowid, f32_val);
1161                        Some(Value::Real(*n as f64))
1162                    }
1163                    (Row::Real(_), Some(other)) => {
1164                        return Err(SQLRiteError::General(format!(
1165                            "Type mismatch: expected REAL for column '{key}', got '{}'",
1166                            other.to_display_string()
1167                        )));
1168                    }
1169
1170                    (Row::Bool(tree), Some(Value::Bool(b))) => {
1171                        tree.insert(next_rowid, *b);
1172                        Some(Value::Bool(*b))
1173                    }
1174                    (Row::Bool(_), Some(other)) => {
1175                        return Err(SQLRiteError::General(format!(
1176                            "Type mismatch: expected BOOL for column '{key}', got '{}'",
1177                            other.to_display_string()
1178                        )));
1179                    }
1180
1181                    (Row::Vector(tree), Some(Value::Vector(parsed))) => {
1182                        // The parser already turned a bracket-array literal
1183                        // into a typed Value::Vector. We still need to
1184                        // dim-check against the column's declared
1185                        // DataType::Vector(N).
1186                        let declared_dim = match &self.columns[i].datatype {
1187                            DataType::Vector(d) => *d,
1188                            other => {
1189                                return Err(SQLRiteError::Internal(format!(
1190                                    "Row::Vector storage on non-Vector column '{key}' (declared as {other})"
1191                                )));
1192                            }
1193                        };
1194                        if parsed.len() != declared_dim {
1195                            return Err(SQLRiteError::General(format!(
1196                                "Vector dimension mismatch for column '{key}': declared {declared_dim}, got {}",
1197                                parsed.len()
1198                            )));
1199                        }
1200                        tree.insert(next_rowid, parsed.clone());
1201                        Some(Value::Vector(parsed.clone()))
1202                    }
1203                    (Row::Vector(_), Some(other)) => {
1204                        return Err(SQLRiteError::General(format!(
1205                            "Type mismatch: expected VECTOR for column '{key}', got '{}'",
1206                            other.to_display_string()
1207                        )));
1208                    }
1209
1210                    (Row::None, _) => {
1211                        return Err(SQLRiteError::Internal(format!(
1212                            "Column '{key}' has no row storage"
1213                        )));
1214                    }
1215                }
1216            };
1217
1218            // Step 2: maintain the secondary index (if any). insert() is a
1219            // no-op for Value::Null and cheap for other value kinds.
1220            if let Some(v) = typed_value.clone() {
1221                if let Some(idx) = self.index_for_column_mut(key) {
1222                    idx.insert(&v, next_rowid)?;
1223                }
1224            }
1225
1226            // Step 3 (Phase 7d.2): maintain any HNSW indexes on this column.
1227            // The HNSW algorithm needs access to other rows' vectors when
1228            // wiring up neighbor edges, so build a get_vec closure that
1229            // pulls from the table's row storage (which we *just* updated
1230            // with the new value).
1231            if let Some(Value::Vector(new_vec)) = &typed_value {
1232                self.maintain_hnsw_on_insert(key, next_rowid, new_vec);
1233            }
1234
1235            // Step 4 (Phase 8b): maintain any FTS indexes on this column.
1236            // Cheap incremental update — PostingList::insert tokenizes
1237            // the value and adds postings under the new rowid. DELETE
1238            // and UPDATE take the rebuild-on-save path instead (Q7).
1239            if let Some(Value::Text(text)) = &typed_value {
1240                self.maintain_fts_on_insert(key, next_rowid, text);
1241            }
1242        }
1243        self.last_rowid = next_rowid;
1244        Ok(())
1245    }
1246
1247    /// After a row insert, push the new (rowid, vector) into every HNSW
1248    /// index whose column matches `column`. Split out of `insert_row` so
1249    /// the borrowing dance — we need both `&self.rows` (read other
1250    /// vectors) and `&mut self.hnsw_indexes` (insert into the graph) —
1251    /// stays localized.
1252    fn maintain_hnsw_on_insert(&mut self, column: &str, rowid: i64, new_vec: &[f32]) {
1253        // Snapshot the current vector storage so the get_vec closure
1254        // doesn't fight with `&mut self.hnsw_indexes`. For a typical
1255        // HNSW insert we touch ef_construction × log(N) other vectors,
1256        // so the snapshot cost is small relative to the graph wiring.
1257        let mut vec_snapshot: HashMap<i64, Vec<f32>> = HashMap::new();
1258        {
1259            let row_data = self.rows.lock().expect("rows mutex poisoned");
1260            if let Some(Row::Vector(map)) = row_data.get(column) {
1261                for (id, v) in map.iter() {
1262                    vec_snapshot.insert(*id, v.clone());
1263                }
1264            }
1265        }
1266        // The new row was just written into row storage — make sure the
1267        // snapshot reflects it (it should, but defensive).
1268        vec_snapshot.insert(rowid, new_vec.to_vec());
1269
1270        for entry in &mut self.hnsw_indexes {
1271            if entry.column_name == column {
1272                entry.index.insert(rowid, new_vec, |id| {
1273                    vec_snapshot.get(&id).cloned().unwrap_or_default()
1274                });
1275            }
1276        }
1277    }
1278
1279    /// After a row insert, push the new (rowid, text) into every FTS
1280    /// index whose column matches `column`. Phase 8b.
1281    ///
1282    /// Mirrors [`Self::maintain_hnsw_on_insert`] but the FTS index is
1283    /// self-contained — `PostingList::insert` only needs the new doc's
1284    /// text, not the rest of the corpus, so there's no snapshot dance.
1285    fn maintain_fts_on_insert(&mut self, column: &str, rowid: i64, text: &str) {
1286        for entry in &mut self.fts_indexes {
1287            if entry.column_name == column {
1288                entry.index.insert(rowid, text);
1289            }
1290        }
1291    }
1292
1293    /// Print the table schema to standard output in a pretty formatted way.
1294    ///
1295    /// # Example
1296    ///
1297    /// ```text
1298    /// let table = Table::new(payload);
1299    /// table.print_table_schema();
1300    ///
1301    /// Prints to standard output:
1302    ///    +-------------+-----------+-------------+--------+----------+
1303    ///    | Column Name | Data Type | PRIMARY KEY | UNIQUE | NOT NULL |
1304    ///    +-------------+-----------+-------------+--------+----------+
1305    ///    | id          | Integer   | true        | true   | true     |
1306    ///    +-------------+-----------+-------------+--------+----------+
1307    ///    | name        | Text      | false       | true   | false    |
1308    ///    +-------------+-----------+-------------+--------+----------+
1309    ///    | email       | Text      | false       | false  | false    |
1310    ///    +-------------+-----------+-------------+--------+----------+
1311    /// ```
1312    ///
1313    pub fn print_table_schema(&self) -> Result<usize> {
1314        let mut table = PrintTable::new();
1315        table.add_row(row![
1316            "Column Name",
1317            "Data Type",
1318            "PRIMARY KEY",
1319            "UNIQUE",
1320            "NOT NULL"
1321        ]);
1322
1323        for col in &self.columns {
1324            table.add_row(row![
1325                col.column_name,
1326                col.datatype,
1327                col.is_pk,
1328                col.is_unique,
1329                col.not_null
1330            ]);
1331        }
1332
1333        table.printstd();
1334        Ok(table.len() * 2 + 1)
1335    }
1336
1337    /// Print the table data to standard output in a pretty formatted way.
1338    ///
1339    /// # Example
1340    ///
1341    /// ```text
1342    /// let db_table = db.get_table_mut(table_name.to_string()).unwrap();
1343    /// db_table.print_table_data();
1344    ///
1345    /// Prints to standard output:
1346    ///     +----+---------+------------------------+
1347    ///     | id | name    | email                  |
1348    ///     +----+---------+------------------------+
1349    ///     | 1  | "Jack"  | "jack@mail.com"        |
1350    ///     +----+---------+------------------------+
1351    ///     | 10 | "Bob"   | "bob@main.com"         |
1352    ///     +----+---------+------------------------+
1353    ///     | 11 | "Bill"  | "bill@main.com"        |
1354    ///     +----+---------+------------------------+
1355    /// ```
1356    ///
1357    pub fn print_table_data(&self) {
1358        let mut print_table = PrintTable::new();
1359
1360        let column_names = self
1361            .columns
1362            .iter()
1363            .map(|col| col.column_name.to_string())
1364            .collect::<Vec<String>>();
1365
1366        let header_row = PrintRow::new(
1367            column_names
1368                .iter()
1369                .map(|col| PrintCell::new(col))
1370                .collect::<Vec<PrintCell>>(),
1371        );
1372
1373        let rows_clone = Arc::clone(&self.rows);
1374        let row_data = rows_clone.lock().expect("rows mutex poisoned");
1375        let first_col_data = row_data
1376            .get(&self.columns.first().unwrap().column_name)
1377            .unwrap();
1378        let num_rows = first_col_data.count();
1379        let mut print_table_rows: Vec<PrintRow> = vec![PrintRow::new(vec![]); num_rows];
1380
1381        for col_name in &column_names {
1382            let col_val = row_data
1383                .get(col_name)
1384                .expect("Can't find any rows with the given column");
1385            let columns: Vec<String> = col_val.get_serialized_col_data();
1386
1387            for i in 0..num_rows {
1388                if let Some(cell) = &columns.get(i) {
1389                    print_table_rows[i].add_cell(PrintCell::new(cell));
1390                } else {
1391                    print_table_rows[i].add_cell(PrintCell::new(""));
1392                }
1393            }
1394        }
1395
1396        print_table.add_row(header_row);
1397        for row in print_table_rows {
1398            print_table.add_row(row);
1399        }
1400
1401        print_table.printstd();
1402    }
1403}
1404
1405/// The schema for each SQL column in every table.
1406///
1407/// Per-column index state moved to `Table::secondary_indexes` in Phase 3e —
1408/// a single `Column` describes the declared schema (name, type, constraints)
1409/// and nothing more.
1410#[derive(PartialEq, Debug, Clone)]
1411pub struct Column {
1412    pub column_name: String,
1413    pub datatype: DataType,
1414    pub is_pk: bool,
1415    pub not_null: bool,
1416    pub is_unique: bool,
1417    /// Literal value to substitute when this column is omitted from an
1418    /// INSERT. Restricted to literal expressions at CREATE TABLE time.
1419    /// `None` means "no DEFAULT declared"; an INSERT that omits the column
1420    /// gets `Value::Null` instead.
1421    pub default: Option<Value>,
1422}
1423
1424impl Column {
1425    /// Builds a `Column` without a `DEFAULT` clause. Existing call sites
1426    /// (catalog-table setup, test fixtures) keep working unchanged.
1427    pub fn new(
1428        name: String,
1429        datatype: String,
1430        is_pk: bool,
1431        not_null: bool,
1432        is_unique: bool,
1433    ) -> Self {
1434        Self::with_default(name, datatype, is_pk, not_null, is_unique, None)
1435    }
1436
1437    /// Builds a `Column` with an optional `DEFAULT` literal. Used by the
1438    /// CREATE TABLE / `parse_create_sql` paths that propagate user-supplied
1439    /// defaults from `ParsedColumn`.
1440    pub fn with_default(
1441        name: String,
1442        datatype: String,
1443        is_pk: bool,
1444        not_null: bool,
1445        is_unique: bool,
1446        default: Option<Value>,
1447    ) -> Self {
1448        let dt = DataType::new(datatype);
1449        Column {
1450            column_name: name,
1451            datatype: dt,
1452            is_pk,
1453            not_null,
1454            is_unique,
1455            default,
1456        }
1457    }
1458}
1459
1460/// The schema for each SQL row in every table is represented in memory
1461/// by following structure
1462///
1463/// This is an enum representing each of the available types organized in a BTreeMap
1464/// data structure, using the ROWID and key and each corresponding type as value
1465#[derive(PartialEq, Debug, Clone)]
1466pub enum Row {
1467    Integer(BTreeMap<i64, i32>),
1468    Text(BTreeMap<i64, String>),
1469    Real(BTreeMap<i64, f32>),
1470    Bool(BTreeMap<i64, bool>),
1471    /// Phase 7a: dense f32 vector storage. Each `Vec<f32>` should have
1472    /// length matching the column's declared `DataType::Vector(dim)`,
1473    /// enforced at INSERT time. The Row variant doesn't carry the dim —
1474    /// it lives in the column metadata.
1475    Vector(BTreeMap<i64, Vec<f32>>),
1476    None,
1477}
1478
1479impl Row {
1480    fn get_serialized_col_data(&self) -> Vec<String> {
1481        match self {
1482            Row::Integer(cd) => cd.values().map(|v| v.to_string()).collect(),
1483            Row::Real(cd) => cd.values().map(|v| v.to_string()).collect(),
1484            Row::Text(cd) => cd.values().map(|v| v.to_string()).collect(),
1485            Row::Bool(cd) => cd.values().map(|v| v.to_string()).collect(),
1486            Row::Vector(cd) => cd.values().map(format_vector_for_display).collect(),
1487            Row::None => panic!("Found None in columns"),
1488        }
1489    }
1490
1491    fn count(&self) -> usize {
1492        match self {
1493            Row::Integer(cd) => cd.len(),
1494            Row::Real(cd) => cd.len(),
1495            Row::Text(cd) => cd.len(),
1496            Row::Bool(cd) => cd.len(),
1497            Row::Vector(cd) => cd.len(),
1498            Row::None => panic!("Found None in columns"),
1499        }
1500    }
1501
1502    /// Every column's BTreeMap is keyed by ROWID. All columns share the same keyset
1503    /// after an INSERT (missing columns are padded), so any column's keys are a valid
1504    /// iteration of the table's rowids.
1505    pub fn rowids(&self) -> Vec<i64> {
1506        match self {
1507            Row::Integer(m) => m.keys().copied().collect(),
1508            Row::Text(m) => m.keys().copied().collect(),
1509            Row::Real(m) => m.keys().copied().collect(),
1510            Row::Bool(m) => m.keys().copied().collect(),
1511            Row::Vector(m) => m.keys().copied().collect(),
1512            Row::None => vec![],
1513        }
1514    }
1515
1516    pub fn get(&self, rowid: i64) -> Option<Value> {
1517        match self {
1518            Row::Integer(m) => m.get(&rowid).map(|v| Value::Integer(i64::from(*v))),
1519            // INSERT stores the literal string "Null" in Text columns that were omitted
1520            // from the query — re-map that back to a real NULL on read.
1521            Row::Text(m) => m.get(&rowid).map(|v| {
1522                if v == "Null" {
1523                    Value::Null
1524                } else {
1525                    Value::Text(v.clone())
1526                }
1527            }),
1528            Row::Real(m) => m.get(&rowid).map(|v| Value::Real(f64::from(*v))),
1529            Row::Bool(m) => m.get(&rowid).map(|v| Value::Bool(*v)),
1530            Row::Vector(m) => m.get(&rowid).map(|v| Value::Vector(v.clone())),
1531            Row::None => None,
1532        }
1533    }
1534}
1535
1536/// Render a vector for human display. Used by both `Row::get_serialized_col_data`
1537/// (for the REPL's print-table path) and `Value::to_display_string`.
1538///
1539/// Format: `[0.1, 0.2, 0.3]` — JSON-like, decimal-minimal via `{}` Display.
1540/// For high-dimensional vectors (e.g. 384 elements) this produces a long
1541/// line; truncation ellipsis is a future polish (see Phase 7 plan, "What
1542/// this proposal does NOT commit to").
1543fn format_vector_for_display(v: &Vec<f32>) -> String {
1544    let mut s = String::with_capacity(v.len() * 6 + 2);
1545    s.push('[');
1546    for (i, x) in v.iter().enumerate() {
1547        if i > 0 {
1548            s.push_str(", ");
1549        }
1550        // Default f32 Display picks the minimal-roundtrip representation,
1551        // so 0.1f32 prints as "0.1" not "0.10000000149011612". Good enough.
1552        s.push_str(&x.to_string());
1553    }
1554    s.push(']');
1555    s
1556}
1557
1558/// Runtime value produced by query execution. Separate from the on-disk `Row` enum
1559/// so the executor can carry typed values (including NULL) across operators.
1560#[derive(Debug, Clone, PartialEq)]
1561pub enum Value {
1562    Integer(i64),
1563    Text(String),
1564    Real(f64),
1565    Bool(bool),
1566    /// Phase 7a: dense f32 vector as a runtime value. Carries its own
1567    /// dimension implicitly via `Vec::len`; the column it's being
1568    /// assigned to has a declared `DataType::Vector(N)` that's checked
1569    /// at INSERT/UPDATE time.
1570    Vector(Vec<f32>),
1571    Null,
1572}
1573
1574impl Value {
1575    pub fn to_display_string(&self) -> String {
1576        match self {
1577            Value::Integer(v) => v.to_string(),
1578            Value::Text(s) => s.clone(),
1579            Value::Real(f) => f.to_string(),
1580            Value::Bool(b) => b.to_string(),
1581            Value::Vector(v) => format_vector_for_display(v),
1582            Value::Null => String::from("NULL"),
1583        }
1584    }
1585}
1586
1587/// Parse a bracket-array literal like `"[0.1, 0.2, 0.3]"` (or `"[1, 2, 3]"`)
1588/// into a `Vec<f32>`. The parser/insert pipeline stores vector literals as
1589/// strings in `InsertQuery::rows` (a `Vec<Vec<String>>`); this helper is
1590/// the inverse — turn the string back into a typed vector at the boundary
1591/// where we actually need element-typed data.
1592///
1593/// Accepts:
1594/// - `[]` → empty vector (caller's dimension check rejects it for VECTOR(N≥1))
1595/// - `[0.1, 0.2, 0.3]` → standard float syntax
1596/// - `[1, 2, 3]` → integers, coerced to f32 (matches `VALUES (1, 2)` for
1597///   `REAL` columns; we widen ints to floats automatically)
1598/// - whitespace tolerated everywhere (Python/JSON/pgvector convention)
1599///
1600/// Rejects with a descriptive message:
1601/// - missing `[` / `]`
1602/// - non-numeric elements (`['foo', 0.1]`)
1603/// - NaN / Inf literals (we accept them via `f32::from_str` but caller can
1604///   reject if undesired — for now we let them through; HNSW etc. will
1605///   reject NaN at index time)
1606pub fn parse_vector_literal(s: &str) -> Result<Vec<f32>> {
1607    let trimmed = s.trim();
1608    if !trimmed.starts_with('[') || !trimmed.ends_with(']') {
1609        return Err(SQLRiteError::General(format!(
1610            "expected bracket-array literal `[...]`, got `{s}`"
1611        )));
1612    }
1613    let inner = &trimmed[1..trimmed.len() - 1].trim();
1614    if inner.is_empty() {
1615        return Ok(Vec::new());
1616    }
1617    let mut out = Vec::new();
1618    for (i, part) in inner.split(',').enumerate() {
1619        let element = part.trim();
1620        let parsed: f32 = element.parse().map_err(|_| {
1621            SQLRiteError::General(format!("vector element {i} (`{element}`) is not a number"))
1622        })?;
1623        out.push(parsed);
1624    }
1625    Ok(out)
1626}
1627
1628#[cfg(test)]
1629mod tests {
1630    use super::*;
1631    use sqlparser::dialect::SQLiteDialect;
1632    use sqlparser::parser::Parser;
1633
1634    #[test]
1635    fn datatype_display_trait_test() {
1636        let integer = DataType::Integer;
1637        let text = DataType::Text;
1638        let real = DataType::Real;
1639        let boolean = DataType::Bool;
1640        let vector = DataType::Vector(384);
1641        let none = DataType::None;
1642        let invalid = DataType::Invalid;
1643
1644        assert_eq!(format!("{}", integer), "Integer");
1645        assert_eq!(format!("{}", text), "Text");
1646        assert_eq!(format!("{}", real), "Real");
1647        assert_eq!(format!("{}", boolean), "Boolean");
1648        assert_eq!(format!("{}", vector), "Vector(384)");
1649        assert_eq!(format!("{}", none), "None");
1650        assert_eq!(format!("{}", invalid), "Invalid");
1651    }
1652
1653    // -----------------------------------------------------------------
1654    // Phase 7a — VECTOR(N) column type
1655    // -----------------------------------------------------------------
1656
1657    #[test]
1658    fn datatype_new_parses_vector_dim() {
1659        // Standard cases.
1660        assert_eq!(DataType::new("vector(1)".to_string()), DataType::Vector(1));
1661        assert_eq!(
1662            DataType::new("vector(384)".to_string()),
1663            DataType::Vector(384)
1664        );
1665        assert_eq!(
1666            DataType::new("vector(1536)".to_string()),
1667            DataType::Vector(1536)
1668        );
1669
1670        // Case-insensitive on the keyword.
1671        assert_eq!(
1672            DataType::new("VECTOR(384)".to_string()),
1673            DataType::Vector(384)
1674        );
1675
1676        // Whitespace inside parens tolerated (the create-parser strips it
1677        // but the string-based round-trip in DataType::new is the one place
1678        // we don't fully control input formatting).
1679        assert_eq!(
1680            DataType::new("vector( 64 )".to_string()),
1681            DataType::Vector(64)
1682        );
1683    }
1684
1685    #[test]
1686    fn datatype_new_rejects_bad_vector_strings() {
1687        // dim = 0 is rejected (Q2: VECTOR(N≥1)).
1688        assert_eq!(DataType::new("vector(0)".to_string()), DataType::Invalid);
1689        // Non-numeric dim.
1690        assert_eq!(DataType::new("vector(abc)".to_string()), DataType::Invalid);
1691        // Empty parens.
1692        assert_eq!(DataType::new("vector()".to_string()), DataType::Invalid);
1693        // Negative dim wouldn't even parse as usize, so falls into Invalid.
1694        assert_eq!(DataType::new("vector(-3)".to_string()), DataType::Invalid);
1695    }
1696
1697    #[test]
1698    fn datatype_to_wire_string_round_trips_vector() {
1699        let dt = DataType::Vector(384);
1700        let wire = dt.to_wire_string();
1701        assert_eq!(wire, "vector(384)");
1702        // And feeds back through DataType::new losslessly — this is the
1703        // round-trip the ParsedColumn pipeline relies on.
1704        assert_eq!(DataType::new(wire), DataType::Vector(384));
1705    }
1706
1707    #[test]
1708    fn parse_vector_literal_accepts_floats() {
1709        let v = parse_vector_literal("[0.1, 0.2, 0.3]").expect("parse");
1710        assert_eq!(v, vec![0.1f32, 0.2, 0.3]);
1711    }
1712
1713    #[test]
1714    fn parse_vector_literal_accepts_ints_widening_to_f32() {
1715        let v = parse_vector_literal("[1, 2, 3]").expect("parse");
1716        assert_eq!(v, vec![1.0f32, 2.0, 3.0]);
1717    }
1718
1719    #[test]
1720    fn parse_vector_literal_handles_negatives_and_whitespace() {
1721        let v = parse_vector_literal("[ -1.5 ,  2.0,  -3.5 ]").expect("parse");
1722        assert_eq!(v, vec![-1.5f32, 2.0, -3.5]);
1723    }
1724
1725    #[test]
1726    fn parse_vector_literal_empty_brackets_is_empty_vec() {
1727        let v = parse_vector_literal("[]").expect("parse");
1728        assert!(v.is_empty());
1729    }
1730
1731    #[test]
1732    fn parse_vector_literal_rejects_non_bracketed() {
1733        assert!(parse_vector_literal("0.1, 0.2").is_err());
1734        assert!(parse_vector_literal("(0.1, 0.2)").is_err());
1735        assert!(parse_vector_literal("[0.1, 0.2").is_err()); // missing ]
1736        assert!(parse_vector_literal("0.1, 0.2]").is_err()); // missing [
1737    }
1738
1739    #[test]
1740    fn parse_vector_literal_rejects_non_numeric_elements() {
1741        let err = parse_vector_literal("[1.0, 'foo', 3.0]").unwrap_err();
1742        let msg = format!("{err}");
1743        assert!(
1744            msg.contains("vector element 1") && msg.contains("'foo'"),
1745            "error message should pinpoint the bad element: got `{msg}`"
1746        );
1747    }
1748
1749    #[test]
1750    fn value_vector_display_format() {
1751        let v = Value::Vector(vec![0.1, 0.2, 0.3]);
1752        assert_eq!(v.to_display_string(), "[0.1, 0.2, 0.3]");
1753
1754        // Empty vector displays as `[]`.
1755        let empty = Value::Vector(vec![]);
1756        assert_eq!(empty.to_display_string(), "[]");
1757    }
1758
1759    #[test]
1760    fn create_new_table_test() {
1761        let query_statement = "CREATE TABLE contacts (
1762            id INTEGER PRIMARY KEY,
1763            first_name TEXT NOT NULL,
1764            last_name TEXT NOT NULl,
1765            email TEXT NOT NULL UNIQUE,
1766            active BOOL,
1767            score REAL
1768        );";
1769        let dialect = SQLiteDialect {};
1770        let mut ast = Parser::parse_sql(&dialect, query_statement).unwrap();
1771        if ast.len() > 1 {
1772            panic!("Expected a single query statement, but there are more then 1.")
1773        }
1774        let query = ast.pop().unwrap();
1775
1776        let create_query = CreateQuery::new(&query).unwrap();
1777
1778        let table = Table::new(create_query);
1779
1780        assert_eq!(table.columns.len(), 6);
1781        assert_eq!(table.last_rowid, 0);
1782
1783        let id_column = "id".to_string();
1784        if let Some(column) = table
1785            .columns
1786            .iter()
1787            .filter(|c| c.column_name == id_column)
1788            .collect::<Vec<&Column>>()
1789            .first()
1790        {
1791            assert!(column.is_pk);
1792            assert_eq!(column.datatype, DataType::Integer);
1793        } else {
1794            panic!("column not found");
1795        }
1796    }
1797
1798    #[test]
1799    fn print_table_schema_test() {
1800        let query_statement = "CREATE TABLE contacts (
1801            id INTEGER PRIMARY KEY,
1802            first_name TEXT NOT NULL,
1803            last_name TEXT NOT NULl
1804        );";
1805        let dialect = SQLiteDialect {};
1806        let mut ast = Parser::parse_sql(&dialect, query_statement).unwrap();
1807        if ast.len() > 1 {
1808            panic!("Expected a single query statement, but there are more then 1.")
1809        }
1810        let query = ast.pop().unwrap();
1811
1812        let create_query = CreateQuery::new(&query).unwrap();
1813
1814        let table = Table::new(create_query);
1815        let lines_printed = table.print_table_schema();
1816        assert_eq!(lines_printed, Ok(9));
1817    }
1818}