Skip to main content

sqlrite/sql/db/
table.rs

1use crate::error::{Result, SQLRiteError};
2use crate::sql::db::secondary_index::{IndexOrigin, SecondaryIndex};
3use crate::sql::fts::PostingList;
4use crate::sql::hnsw::HnswIndex;
5use crate::sql::parser::create::CreateQuery;
6use std::collections::{BTreeMap, HashMap};
7use std::fmt;
8use std::sync::{Arc, Mutex};
9
10use prettytable::{Cell as PrintCell, Row as PrintRow, Table as PrintTable};
11
12/// SQLRite data types
13/// Mapped after SQLite Data Type Storage Classes and SQLite Affinity Type
14/// (Datatypes In SQLite Version 3)[https://www.sqlite.org/datatype3.html]
15///
16/// `Vector(dim)` is the Phase 7a addition — a fixed-dimension dense f32
17/// array. The dimension is part of the type so a `VECTOR(384)` column
18/// rejects `[0.1, 0.2, 0.3]` at INSERT time as a clean type error
19/// rather than silently storing the wrong shape.
20#[derive(PartialEq, Debug, Clone)]
21pub enum DataType {
22    Integer,
23    Text,
24    Real,
25    Bool,
26    /// Dense f32 vector of fixed dimension. The `usize` is the column's
27    /// declared dimension; every value stored in the column must have
28    /// exactly that many elements.
29    Vector(usize),
30    /// Phase 7e — JSON column. Stored as canonical UTF-8 text (matches
31    /// SQLite's JSON1 extension), validated at INSERT time. The
32    /// `json_extract` family of functions parses on demand and returns
33    /// either a primitive `Value` (Integer / Real / Text / Bool / Null)
34    /// or a Text value carrying the JSON-encoded sub-object/array.
35    /// Q3 originally specified `bincoded serde_json::Value`, but bincode
36    /// was removed from the engine in Phase 3c — see the scope-correction
37    /// note in `docs/phase-7-plan.md` for the rationale on switching to
38    /// text storage.
39    Json,
40    None,
41    Invalid,
42}
43
44impl DataType {
45    /// Constructs a `DataType` from the wire string the parser produces.
46    /// Pre-Phase-7 the strings were one-of `"integer" | "text" | "real" |
47    /// "bool" | "none"`. Phase 7a adds `"vector(N)"` (case-insensitive,
48    /// N a positive integer) for the new vector column type — encoded
49    /// in-band so we don't have to plumb a richer type through the
50    /// existing string-based ParsedColumn pipeline.
51    pub fn new(cmd: String) -> DataType {
52        let lower = cmd.to_lowercase();
53        match lower.as_str() {
54            "integer" => DataType::Integer,
55            "text" => DataType::Text,
56            "real" => DataType::Real,
57            "bool" => DataType::Bool,
58            "json" => DataType::Json,
59            "none" => DataType::None,
60            other if other.starts_with("vector(") && other.ends_with(')') => {
61                // Strip the `vector(` prefix and trailing `)`, parse what's
62                // left as a positive integer dimension. Anything else is
63                // Invalid — surfaces a clean error at CREATE TABLE time.
64                let inside = &other["vector(".len()..other.len() - 1];
65                match inside.trim().parse::<usize>() {
66                    Ok(dim) if dim > 0 => DataType::Vector(dim),
67                    _ => {
68                        eprintln!("Invalid VECTOR dimension in {cmd}");
69                        DataType::Invalid
70                    }
71                }
72            }
73            _ => {
74                eprintln!("Invalid data type given {}", cmd);
75                DataType::Invalid
76            }
77        }
78    }
79
80    /// Inverse of `new` — returns the canonical lowercased wire string
81    /// for this DataType. Used by the parser to round-trip
82    /// `VECTOR(N)` → `DataType::Vector(N)` → `"vector(N)"` into
83    /// `ParsedColumn::datatype` so the rest of the pipeline keeps
84    /// working with strings.
85    pub fn to_wire_string(&self) -> String {
86        match self {
87            DataType::Integer => "Integer".to_string(),
88            DataType::Text => "Text".to_string(),
89            DataType::Real => "Real".to_string(),
90            DataType::Bool => "Bool".to_string(),
91            DataType::Vector(dim) => format!("vector({dim})"),
92            DataType::Json => "Json".to_string(),
93            DataType::None => "None".to_string(),
94            DataType::Invalid => "Invalid".to_string(),
95        }
96    }
97}
98
99impl fmt::Display for DataType {
100    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
101        match self {
102            DataType::Integer => f.write_str("Integer"),
103            DataType::Text => f.write_str("Text"),
104            DataType::Real => f.write_str("Real"),
105            DataType::Bool => f.write_str("Boolean"),
106            DataType::Vector(dim) => write!(f, "Vector({dim})"),
107            DataType::Json => f.write_str("Json"),
108            DataType::None => f.write_str("None"),
109            DataType::Invalid => f.write_str("Invalid"),
110        }
111    }
112}
113
114/// The schema for each SQL Table is represented in memory by
115/// following structure.
116///
117/// `rows` is `Arc<Mutex<...>>` rather than `Rc<RefCell<...>>` so `Table`
118/// (and by extension `Database`) is `Send + Sync` — the Tauri desktop
119/// app holds the engine in shared state behind a `Mutex<Database>`, and
120/// Tauri's state container requires its contents to be thread-safe.
121#[derive(Debug)]
122pub struct Table {
123    /// Name of the table
124    pub tb_name: String,
125    /// Schema for each column, in declaration order.
126    pub columns: Vec<Column>,
127    /// Per-column row storage, keyed by column name. Every column's
128    /// `Row::T(BTreeMap)` is keyed by rowid, so all columns share the same
129    /// keyset after each write.
130    pub rows: Arc<Mutex<HashMap<String, Row>>>,
131    /// Secondary indexes on this table (Phase 3e). One auto-created entry
132    /// per UNIQUE or PRIMARY KEY column; explicit `CREATE INDEX` statements
133    /// add more. Looking up an index: iterate by column name, or by index
134    /// name via `Table::index_by_name`.
135    pub secondary_indexes: Vec<SecondaryIndex>,
136    /// HNSW indexes on VECTOR columns (Phase 7d.2). Maintained in lockstep
137    /// with row storage on INSERT (incremental); rebuilt on open from the
138    /// persisted CREATE INDEX SQL. The graph itself is NOT yet persisted —
139    /// see Phase 7d.3 for cell-encoded graph storage.
140    pub hnsw_indexes: Vec<HnswIndexEntry>,
141    /// FTS inverted indexes on TEXT columns (Phase 8b). Maintained in
142    /// lockstep with row storage on INSERT (incremental); DELETE / UPDATE
143    /// flag `needs_rebuild` and the next save rebuilds from current rows.
144    /// The posting lists themselves are NOT yet persisted — Phase 8c
145    /// wires the cell-encoded `KIND_FTS_POSTING` storage.
146    pub fts_indexes: Vec<FtsIndexEntry>,
147    /// ROWID of most recent insert.
148    pub last_rowid: i64,
149    /// PRIMARY KEY column name, or "-1" if the table has no PRIMARY KEY.
150    pub primary_key: String,
151}
152
153/// One HNSW index attached to a table. Phase 7d.2 only supports L2
154/// distance; cosine and dot are 7d.x follow-ups (would require either
155/// distinct USING methods like `hnsw_cosine` or a `WITH (metric = …)`
156/// clause — see `docs/phase-7-plan.md` for the deferred decision).
157#[derive(Debug, Clone)]
158pub struct HnswIndexEntry {
159    /// User-supplied name from `CREATE INDEX <name> …`. Unique across
160    /// both `secondary_indexes` and `hnsw_indexes` on a given table.
161    pub name: String,
162    /// The VECTOR column this index covers.
163    pub column_name: String,
164    /// The graph itself.
165    pub index: HnswIndex,
166    /// Phase 7d.3 — true iff a DELETE or UPDATE-on-vector-col has
167    /// invalidated the graph since the last rebuild. INSERT maintains
168    /// the graph incrementally and leaves this false. The next save
169    /// rebuilds dirty indexes from current rows before serializing.
170    pub needs_rebuild: bool,
171}
172
173/// One FTS index attached to a table (Phase 8b). The inverted index
174/// itself is a [`PostingList`]; metadata (name, column, dirty flag)
175/// lives here. Mirrors [`HnswIndexEntry`] field-for-field so the
176/// rebuild-on-save and DELETE/UPDATE invalidation paths can use one
177/// pattern across both index families.
178#[derive(Debug, Clone)]
179pub struct FtsIndexEntry {
180    /// User-supplied name from `CREATE INDEX <name> … USING fts(<col>)`.
181    /// Unique across `secondary_indexes`, `hnsw_indexes`, and
182    /// `fts_indexes` on a given table.
183    pub name: String,
184    /// The TEXT column this index covers.
185    pub column_name: String,
186    /// The inverted index + per-doc length cache.
187    pub index: PostingList,
188    /// True iff a DELETE or UPDATE-on-text-col has invalidated the
189    /// posting lists since the last rebuild. INSERT maintains the
190    /// index incrementally and leaves this false. The next save
191    /// rebuilds dirty indexes from current rows before serializing
192    /// (mirrors HNSW's Q7 strategy).
193    pub needs_rebuild: bool,
194}
195
196impl Table {
197    pub fn new(create_query: CreateQuery) -> Self {
198        let table_name = create_query.table_name;
199        let mut primary_key: String = String::from("-1");
200        let columns = create_query.columns;
201
202        let mut table_cols: Vec<Column> = vec![];
203        let table_rows: Arc<Mutex<HashMap<String, Row>>> = Arc::new(Mutex::new(HashMap::new()));
204        let mut secondary_indexes: Vec<SecondaryIndex> = Vec::new();
205        for col in &columns {
206            let col_name = &col.name;
207            if col.is_pk {
208                primary_key = col_name.to_string();
209            }
210            table_cols.push(Column::new(
211                col_name.to_string(),
212                col.datatype.to_string(),
213                col.is_pk,
214                col.not_null,
215                col.is_unique,
216            ));
217
218            let dt = DataType::new(col.datatype.to_string());
219            let row_storage = match &dt {
220                DataType::Integer => Row::Integer(BTreeMap::new()),
221                DataType::Real => Row::Real(BTreeMap::new()),
222                DataType::Text => Row::Text(BTreeMap::new()),
223                DataType::Bool => Row::Bool(BTreeMap::new()),
224                // The dimension is enforced at INSERT time against the
225                // column's declared DataType::Vector(dim). The Row variant
226                // itself doesn't carry the dim — every stored Vec<f32>
227                // already has it via .len().
228                DataType::Vector(_dim) => Row::Vector(BTreeMap::new()),
229                // Phase 7e — JSON columns reuse Text storage (with
230                // INSERT-time validation that the bytes parse as JSON).
231                // No new Row variant; json_extract / json_type / etc.
232                // re-parse from text on demand. See `docs/phase-7-plan.md`
233                // Q3's scope-correction note for the storage choice.
234                DataType::Json => Row::Text(BTreeMap::new()),
235                DataType::Invalid | DataType::None => Row::None,
236            };
237            table_rows
238                .lock()
239                .expect("Table row storage mutex poisoned")
240                .insert(col.name.to_string(), row_storage);
241
242            // Auto-create an index for every UNIQUE / PRIMARY KEY column,
243            // but only for types we know how to index. Real / Bool / Vector
244            // UNIQUE columns fall back to the linear scan path in
245            // validate_unique_constraint — same behavior as before 3e.
246            // (Vector UNIQUE is unusual; the linear-scan path will work
247            // via Value::Vector PartialEq, just at O(N) cost.)
248            if (col.is_pk || col.is_unique) && matches!(dt, DataType::Integer | DataType::Text) {
249                let name = SecondaryIndex::auto_name(&table_name, &col.name);
250                match SecondaryIndex::new(
251                    name,
252                    table_name.clone(),
253                    col.name.clone(),
254                    &dt,
255                    true,
256                    IndexOrigin::Auto,
257                ) {
258                    Ok(idx) => secondary_indexes.push(idx),
259                    Err(_) => {
260                        // Unreachable given the matches! guard above, but
261                        // the builder returns Result so we keep the arm.
262                    }
263                }
264            }
265        }
266
267        Table {
268            tb_name: table_name,
269            columns: table_cols,
270            rows: table_rows,
271            secondary_indexes,
272            // HNSW indexes only land via explicit CREATE INDEX … USING hnsw
273            // statements (Phase 7d.2); never auto-created at CREATE TABLE
274            // time, because there's no UNIQUE-style constraint that
275            // implies a vector index.
276            hnsw_indexes: Vec::new(),
277            // Same story for FTS indexes — explicit `CREATE INDEX … USING
278            // fts(<col>)` only (Phase 8b).
279            fts_indexes: Vec::new(),
280            last_rowid: 0,
281            primary_key,
282        }
283    }
284
285    /// Deep-clones a `Table` for transaction snapshots (Phase 4f).
286    ///
287    /// The normal `Clone` derive would shallow-clone the `Arc<Mutex<_>>`
288    /// wrapping our row storage, leaving both copies sharing the same
289    /// inner map — mutating the snapshot would corrupt the live table
290    /// and vice versa. Instead we lock, clone the inner `HashMap`, and
291    /// wrap it in a fresh `Arc<Mutex<_>>`. Columns and indexes derive
292    /// `Clone` directly (all their fields are plain data).
293    pub fn deep_clone(&self) -> Self {
294        let cloned_rows: HashMap<String, Row> = {
295            let guard = self.rows.lock().expect("row mutex poisoned");
296            guard.clone()
297        };
298        Table {
299            tb_name: self.tb_name.clone(),
300            columns: self.columns.clone(),
301            rows: Arc::new(Mutex::new(cloned_rows)),
302            secondary_indexes: self.secondary_indexes.clone(),
303            // HnswIndexEntry derives Clone, so the snapshot owns its own
304            // graph copy. Phase 4f's snapshot-rollback semantics require
305            // the snapshot to be fully decoupled from live state.
306            hnsw_indexes: self.hnsw_indexes.clone(),
307            // Same fully-decoupled clone for FTS indexes (Phase 8b).
308            fts_indexes: self.fts_indexes.clone(),
309            last_rowid: self.last_rowid,
310            primary_key: self.primary_key.clone(),
311        }
312    }
313
314    /// Finds an auto- or explicit-index entry for a given column. Returns
315    /// `None` if the column isn't indexed.
316    pub fn index_for_column(&self, column: &str) -> Option<&SecondaryIndex> {
317        self.secondary_indexes
318            .iter()
319            .find(|i| i.column_name == column)
320    }
321
322    fn index_for_column_mut(&mut self, column: &str) -> Option<&mut SecondaryIndex> {
323        self.secondary_indexes
324            .iter_mut()
325            .find(|i| i.column_name == column)
326    }
327
328    /// Finds a secondary index by its own name (e.g., `sqlrite_autoindex_users_email`
329    /// or a user-provided CREATE INDEX name). Used by Phase 3e.2 to look up
330    /// explicit indexes when DROP INDEX lands.
331    #[allow(dead_code)]
332    pub fn index_by_name(&self, name: &str) -> Option<&SecondaryIndex> {
333        self.secondary_indexes.iter().find(|i| i.name == name)
334    }
335
336    /// Returns a `bool` informing if a `Column` with a specific name exists or not
337    ///
338    pub fn contains_column(&self, column: String) -> bool {
339        self.columns.iter().any(|col| col.column_name == column)
340    }
341
342    /// Returns the list of column names in declaration order.
343    pub fn column_names(&self) -> Vec<String> {
344        self.columns.iter().map(|c| c.column_name.clone()).collect()
345    }
346
347    /// Returns all rowids currently stored in the table, in ascending order.
348    /// Every column's BTreeMap has the same keyset, so we just read from the first column.
349    pub fn rowids(&self) -> Vec<i64> {
350        let Some(first) = self.columns.first() else {
351            return vec![];
352        };
353        let rows = self.rows.lock().expect("rows mutex poisoned");
354        rows.get(&first.column_name)
355            .map(|r| r.rowids())
356            .unwrap_or_default()
357    }
358
359    /// Reads a single cell at `(column, rowid)`.
360    pub fn get_value(&self, column: &str, rowid: i64) -> Option<Value> {
361        let rows = self.rows.lock().expect("rows mutex poisoned");
362        rows.get(column).and_then(|r| r.get(rowid))
363    }
364
365    /// Removes the row identified by `rowid` from every column's storage and
366    /// from every secondary index entry.
367    pub fn delete_row(&mut self, rowid: i64) {
368        // Snapshot the values we're about to delete so we can strip them
369        // from secondary indexes by (value, rowid) before the row storage
370        // disappears.
371        let per_column_values: Vec<(String, Option<Value>)> = self
372            .columns
373            .iter()
374            .map(|c| (c.column_name.clone(), self.get_value(&c.column_name, rowid)))
375            .collect();
376
377        // Remove from row storage.
378        {
379            let rows_clone = Arc::clone(&self.rows);
380            let mut row_data = rows_clone.lock().expect("rows mutex poisoned");
381            for col in &self.columns {
382                if let Some(r) = row_data.get_mut(&col.column_name) {
383                    match r {
384                        Row::Integer(m) => {
385                            m.remove(&rowid);
386                        }
387                        Row::Text(m) => {
388                            m.remove(&rowid);
389                        }
390                        Row::Real(m) => {
391                            m.remove(&rowid);
392                        }
393                        Row::Bool(m) => {
394                            m.remove(&rowid);
395                        }
396                        Row::Vector(m) => {
397                            m.remove(&rowid);
398                        }
399                        Row::None => {}
400                    }
401                }
402            }
403        }
404
405        // Strip secondary-index entries. Non-indexed columns just don't
406        // show up in secondary_indexes and are no-ops here.
407        for (col_name, value) in per_column_values {
408            if let Some(idx) = self.index_for_column_mut(&col_name) {
409                if let Some(v) = value {
410                    idx.remove(&v, rowid);
411                }
412            }
413        }
414    }
415
416    /// Replays a single row at `rowid` when loading a table from disk. Takes
417    /// one typed value per column (in declaration order); `None` means the
418    /// stored cell carried a NULL for that column. Unlike `insert_row` this
419    /// trusts the on-disk state and does *not* re-check UNIQUE — we're
420    /// rebuilding a state that was already consistent when it was saved.
421    pub fn restore_row(&mut self, rowid: i64, values: Vec<Option<Value>>) -> Result<()> {
422        if values.len() != self.columns.len() {
423            return Err(SQLRiteError::Internal(format!(
424                "cell has {} values but table '{}' has {} columns",
425                values.len(),
426                self.tb_name,
427                self.columns.len()
428            )));
429        }
430
431        let column_names: Vec<String> =
432            self.columns.iter().map(|c| c.column_name.clone()).collect();
433
434        for (i, value) in values.into_iter().enumerate() {
435            let col_name = &column_names[i];
436
437            // Write into the per-column row storage first (scoped borrow so
438            // the secondary-index update below doesn't fight over `self`).
439            {
440                let rows_clone = Arc::clone(&self.rows);
441                let mut row_data = rows_clone.lock().expect("rows mutex poisoned");
442                let cell = row_data.get_mut(col_name).ok_or_else(|| {
443                    SQLRiteError::Internal(format!("Row storage missing for column '{col_name}'"))
444                })?;
445
446                match (cell, &value) {
447                    (Row::Integer(map), Some(Value::Integer(v))) => {
448                        map.insert(rowid, *v as i32);
449                    }
450                    (Row::Integer(_), None) => {
451                        return Err(SQLRiteError::Internal(format!(
452                            "Integer column '{col_name}' cannot store NULL — corrupt cell?"
453                        )));
454                    }
455                    (Row::Text(map), Some(Value::Text(s))) => {
456                        map.insert(rowid, s.clone());
457                    }
458                    (Row::Text(map), None) => {
459                        // Matches the on-insert convention: NULL in Text
460                        // storage is represented by the literal "Null"
461                        // sentinel and not added to the index.
462                        map.insert(rowid, "Null".to_string());
463                    }
464                    (Row::Real(map), Some(Value::Real(v))) => {
465                        map.insert(rowid, *v as f32);
466                    }
467                    (Row::Real(_), None) => {
468                        return Err(SQLRiteError::Internal(format!(
469                            "Real column '{col_name}' cannot store NULL — corrupt cell?"
470                        )));
471                    }
472                    (Row::Bool(map), Some(Value::Bool(v))) => {
473                        map.insert(rowid, *v);
474                    }
475                    (Row::Bool(_), None) => {
476                        return Err(SQLRiteError::Internal(format!(
477                            "Bool column '{col_name}' cannot store NULL — corrupt cell?"
478                        )));
479                    }
480                    (Row::Vector(map), Some(Value::Vector(v))) => {
481                        map.insert(rowid, v.clone());
482                    }
483                    (Row::Vector(_), None) => {
484                        return Err(SQLRiteError::Internal(format!(
485                            "Vector column '{col_name}' cannot store NULL — corrupt cell?"
486                        )));
487                    }
488                    (row, v) => {
489                        return Err(SQLRiteError::Internal(format!(
490                            "Type mismatch restoring column '{col_name}': storage {row:?} vs value {v:?}"
491                        )));
492                    }
493                }
494            }
495
496            // Maintain the secondary index (if any). NULL values are skipped
497            // by `insert`, matching the "NULL is not indexed" convention.
498            if let Some(v) = &value {
499                if let Some(idx) = self.index_for_column_mut(col_name) {
500                    idx.insert(v, rowid)?;
501                }
502            }
503        }
504
505        if rowid > self.last_rowid {
506            self.last_rowid = rowid;
507        }
508        Ok(())
509    }
510
511    /// Extracts a row as an ordered `Vec<Option<Value>>` matching the column
512    /// declaration order. Returns `None` entries for columns that hold NULL.
513    /// Used by `save_database` to turn a table's in-memory state into cells.
514    pub fn extract_row(&self, rowid: i64) -> Vec<Option<Value>> {
515        self.columns
516            .iter()
517            .map(|c| match self.get_value(&c.column_name, rowid) {
518                Some(Value::Null) => None,
519                Some(v) => Some(v),
520                None => None,
521            })
522            .collect()
523    }
524
525    /// Overwrites the cell at `(column, rowid)` with `new_val`. Enforces the
526    /// column's datatype and UNIQUE constraint, and updates any secondary
527    /// index.
528    ///
529    /// Returns `Err` if the column doesn't exist, the value type is incompatible,
530    /// or writing would violate UNIQUE.
531    pub fn set_value(&mut self, column: &str, rowid: i64, new_val: Value) -> Result<()> {
532        let col_index = self
533            .columns
534            .iter()
535            .position(|c| c.column_name == column)
536            .ok_or_else(|| SQLRiteError::General(format!("Column '{column}' not found")))?;
537
538        // No-op write — keep storage exactly the same.
539        let current = self.get_value(column, rowid);
540        if current.as_ref() == Some(&new_val) {
541            return Ok(());
542        }
543
544        // Enforce UNIQUE. Prefer an O(log N) index probe if we have one;
545        // fall back to a full column scan otherwise (Real/Bool UNIQUE
546        // columns, which don't get auto-indexed).
547        if self.columns[col_index].is_unique && !matches!(new_val, Value::Null) {
548            if let Some(idx) = self.index_for_column(column) {
549                for other in idx.lookup(&new_val) {
550                    if other != rowid {
551                        return Err(SQLRiteError::General(format!(
552                            "UNIQUE constraint violated for column '{column}'"
553                        )));
554                    }
555                }
556            } else {
557                for other in self.rowids() {
558                    if other == rowid {
559                        continue;
560                    }
561                    if self.get_value(column, other).as_ref() == Some(&new_val) {
562                        return Err(SQLRiteError::General(format!(
563                            "UNIQUE constraint violated for column '{column}'"
564                        )));
565                    }
566                }
567            }
568        }
569
570        // Drop the old index entry before writing the new value, so the
571        // post-write index insert doesn't clash with the previous state.
572        if let Some(old) = current {
573            if let Some(idx) = self.index_for_column_mut(column) {
574                idx.remove(&old, rowid);
575            }
576        }
577
578        // Write into the column's Row, type-checking against the declared DataType.
579        let declared = &self.columns[col_index].datatype;
580        {
581            let rows_clone = Arc::clone(&self.rows);
582            let mut row_data = rows_clone.lock().expect("rows mutex poisoned");
583            let cell = row_data.get_mut(column).ok_or_else(|| {
584                SQLRiteError::Internal(format!("Row storage missing for column '{column}'"))
585            })?;
586
587            match (cell, &new_val, declared) {
588                (Row::Integer(m), Value::Integer(v), _) => {
589                    m.insert(rowid, *v as i32);
590                }
591                (Row::Real(m), Value::Real(v), _) => {
592                    m.insert(rowid, *v as f32);
593                }
594                (Row::Real(m), Value::Integer(v), _) => {
595                    m.insert(rowid, *v as f32);
596                }
597                (Row::Text(m), Value::Text(v), dt) => {
598                    // Phase 7e — UPDATE on a JSON column also validates
599                    // the new text is well-formed JSON, mirroring INSERT.
600                    if matches!(dt, DataType::Json) {
601                        if let Err(e) = serde_json::from_str::<serde_json::Value>(v) {
602                            return Err(SQLRiteError::General(format!(
603                                "Type mismatch: expected JSON for column '{column}', got '{v}': {e}"
604                            )));
605                        }
606                    }
607                    m.insert(rowid, v.clone());
608                }
609                (Row::Bool(m), Value::Bool(v), _) => {
610                    m.insert(rowid, *v);
611                }
612                (Row::Vector(m), Value::Vector(v), DataType::Vector(declared_dim)) => {
613                    if v.len() != *declared_dim {
614                        return Err(SQLRiteError::General(format!(
615                            "Vector dimension mismatch for column '{column}': declared {declared_dim}, got {}",
616                            v.len()
617                        )));
618                    }
619                    m.insert(rowid, v.clone());
620                }
621                // NULL writes: store the sentinel "Null" string for Text; for other
622                // types we leave storage as-is since those BTreeMaps can't hold NULL today.
623                (Row::Text(m), Value::Null, _) => {
624                    m.insert(rowid, "Null".to_string());
625                }
626                (_, new, dt) => {
627                    return Err(SQLRiteError::General(format!(
628                        "Type mismatch: cannot assign {} to column '{column}' of type {dt}",
629                        new.to_display_string()
630                    )));
631                }
632            }
633        }
634
635        // Maintain the secondary index, if any. NULL values are skipped by
636        // insert per convention.
637        if !matches!(new_val, Value::Null) {
638            if let Some(idx) = self.index_for_column_mut(column) {
639                idx.insert(&new_val, rowid)?;
640            }
641        }
642
643        Ok(())
644    }
645
646    /// Returns an immutable reference of `sql::db::table::Column` if the table contains a
647    /// column with the specified key as a column name.
648    ///
649    #[allow(dead_code)]
650    pub fn get_column(&mut self, column_name: String) -> Result<&Column> {
651        if let Some(column) = self
652            .columns
653            .iter()
654            .filter(|c| c.column_name == column_name)
655            .collect::<Vec<&Column>>()
656            .first()
657        {
658            Ok(column)
659        } else {
660            Err(SQLRiteError::General(String::from("Column not found.")))
661        }
662    }
663
664    /// Validates if columns and values being inserted violate the UNIQUE constraint.
665    /// PRIMARY KEY columns are automatically UNIQUE. Uses the corresponding
666    /// secondary index when one exists (O(log N) lookup); falls back to a
667    /// linear scan for indexable-but-not-indexed situations (e.g. a Real
668    /// UNIQUE column — Real isn't in the auto-indexed set).
669    pub fn validate_unique_constraint(
670        &mut self,
671        cols: &Vec<String>,
672        values: &Vec<String>,
673    ) -> Result<()> {
674        for (idx, name) in cols.iter().enumerate() {
675            let column = self
676                .columns
677                .iter()
678                .find(|c| &c.column_name == name)
679                .ok_or_else(|| SQLRiteError::General(format!("Column '{name}' not found")))?;
680            if !column.is_unique {
681                continue;
682            }
683            let datatype = &column.datatype;
684            let val = &values[idx];
685
686            // Parse the string value into a runtime Value according to the
687            // declared column type. If parsing fails the caller's insert
688            // would also fail with the same error; surface it here so we
689            // don't emit a misleading "unique OK" on bad input.
690            let parsed = match datatype {
691                DataType::Integer => val.parse::<i64>().map(Value::Integer).map_err(|_| {
692                    SQLRiteError::General(format!(
693                        "Type mismatch: expected INTEGER for column '{name}', got '{val}'"
694                    ))
695                })?,
696                DataType::Text => Value::Text(val.clone()),
697                DataType::Real => val.parse::<f64>().map(Value::Real).map_err(|_| {
698                    SQLRiteError::General(format!(
699                        "Type mismatch: expected REAL for column '{name}', got '{val}'"
700                    ))
701                })?,
702                DataType::Bool => val.parse::<bool>().map(Value::Bool).map_err(|_| {
703                    SQLRiteError::General(format!(
704                        "Type mismatch: expected BOOL for column '{name}', got '{val}'"
705                    ))
706                })?,
707                DataType::Vector(declared_dim) => {
708                    let parsed_vec = parse_vector_literal(val).map_err(|e| {
709                        SQLRiteError::General(format!(
710                            "Type mismatch: expected VECTOR({declared_dim}) for column '{name}', {e}"
711                        ))
712                    })?;
713                    if parsed_vec.len() != *declared_dim {
714                        return Err(SQLRiteError::General(format!(
715                            "Vector dimension mismatch for column '{name}': declared {declared_dim}, got {}",
716                            parsed_vec.len()
717                        )));
718                    }
719                    Value::Vector(parsed_vec)
720                }
721                DataType::Json => {
722                    // JSON values stored as Text. UNIQUE on a JSON column
723                    // compares the canonical text representation
724                    // verbatim — `{"a": 1}` and `{"a":1}` are distinct.
725                    // Document this if anyone actually requests UNIQUE
726                    // JSON; for MVP, treat-as-text is fine.
727                    Value::Text(val.clone())
728                }
729                DataType::None | DataType::Invalid => {
730                    return Err(SQLRiteError::Internal(format!(
731                        "column '{name}' has an unsupported datatype"
732                    )));
733                }
734            };
735
736            if let Some(secondary) = self.index_for_column(name) {
737                if secondary.would_violate_unique(&parsed) {
738                    return Err(SQLRiteError::General(format!(
739                        "UNIQUE constraint violated for column '{name}': value '{val}' already exists"
740                    )));
741                }
742            } else {
743                // No secondary index (Real / Bool UNIQUE). Linear scan.
744                for other in self.rowids() {
745                    if self.get_value(name, other).as_ref() == Some(&parsed) {
746                        return Err(SQLRiteError::General(format!(
747                            "UNIQUE constraint violated for column '{name}': value '{val}' already exists"
748                        )));
749                    }
750                }
751            }
752        }
753        Ok(())
754    }
755
756    /// Inserts all VALUES in its approprieta COLUMNS, using the ROWID an embedded INDEX on all ROWS
757    /// Every `Table` keeps track of the `last_rowid` in order to facilitate what the next one would be.
758    /// One limitation of this data structure is that we can only have one write transaction at a time, otherwise
759    /// we could have a race condition on the last_rowid.
760    ///
761    /// Since we are loosely modeling after SQLite, this is also a limitation of SQLite (allowing only one write transcation at a time),
762    /// So we are good. :)
763    ///
764    /// Returns `Err` (leaving the table unchanged) when the user supplies an
765    /// incompatibly-typed value — no more panics on bad input.
766    pub fn insert_row(&mut self, cols: &Vec<String>, values: &Vec<String>) -> Result<()> {
767        let mut next_rowid = self.last_rowid + 1;
768
769        // Auto-assign INTEGER PRIMARY KEY when the user omits it; otherwise
770        // adopt the supplied value as the new rowid.
771        if self.primary_key != "-1" {
772            if !cols.iter().any(|col| col == &self.primary_key) {
773                // Write the auto-assigned PK into row storage, then sync
774                // the secondary index.
775                let val = next_rowid as i32;
776                let wrote_integer = {
777                    let rows_clone = Arc::clone(&self.rows);
778                    let mut row_data = rows_clone.lock().expect("rows mutex poisoned");
779                    let table_col_data = row_data.get_mut(&self.primary_key).ok_or_else(|| {
780                        SQLRiteError::Internal(format!(
781                            "Row storage missing for primary key column '{}'",
782                            self.primary_key
783                        ))
784                    })?;
785                    match table_col_data {
786                        Row::Integer(tree) => {
787                            tree.insert(next_rowid, val);
788                            true
789                        }
790                        _ => false, // non-integer PK: auto-assign is a no-op
791                    }
792                };
793                if wrote_integer {
794                    let pk = self.primary_key.clone();
795                    if let Some(idx) = self.index_for_column_mut(&pk) {
796                        idx.insert(&Value::Integer(val as i64), next_rowid)?;
797                    }
798                }
799            } else {
800                for i in 0..cols.len() {
801                    if cols[i] == self.primary_key {
802                        let val = &values[i];
803                        next_rowid = val.parse::<i64>().map_err(|_| {
804                            SQLRiteError::General(format!(
805                                "Type mismatch: PRIMARY KEY column '{}' expects INTEGER, got '{val}'",
806                                self.primary_key
807                            ))
808                        })?;
809                    }
810                }
811            }
812        }
813
814        // For every table column, either pick the supplied value or pad with NULL
815        // so that every column's BTreeMap keeps the same rowid keyset.
816        let column_names = self
817            .columns
818            .iter()
819            .map(|col| col.column_name.to_string())
820            .collect::<Vec<String>>();
821        let mut j: usize = 0;
822        for i in 0..column_names.len() {
823            let mut val = String::from("Null");
824            let key = &column_names[i];
825
826            if let Some(supplied_key) = cols.get(j) {
827                if supplied_key == &column_names[i] {
828                    val = values[j].to_string();
829                    j += 1;
830                } else if self.primary_key == column_names[i] {
831                    // PK already stored in the auto-assign branch above.
832                    continue;
833                }
834            } else if self.primary_key == column_names[i] {
835                continue;
836            }
837
838            // Step 1: write into row storage and compute the typed Value
839            // we'll hand to the secondary index (if any).
840            let typed_value: Option<Value> = {
841                let rows_clone = Arc::clone(&self.rows);
842                let mut row_data = rows_clone.lock().expect("rows mutex poisoned");
843                let table_col_data = row_data.get_mut(key).ok_or_else(|| {
844                    SQLRiteError::Internal(format!("Row storage missing for column '{key}'"))
845                })?;
846
847                match table_col_data {
848                    Row::Integer(tree) => {
849                        let parsed = val.parse::<i32>().map_err(|_| {
850                            SQLRiteError::General(format!(
851                                "Type mismatch: expected INTEGER for column '{key}', got '{val}'"
852                            ))
853                        })?;
854                        tree.insert(next_rowid, parsed);
855                        Some(Value::Integer(parsed as i64))
856                    }
857                    Row::Text(tree) => {
858                        // Phase 7e — JSON columns also reach here (they
859                        // share Row::Text storage with TEXT columns).
860                        // Validate the value parses as JSON before
861                        // storing; otherwise we'd happily write
862                        // `not-json-at-all` and only fail when
863                        // json_extract tried to parse it later.
864                        if matches!(self.columns[i].datatype, DataType::Json) && val != "Null" {
865                            if let Err(e) = serde_json::from_str::<serde_json::Value>(&val) {
866                                return Err(SQLRiteError::General(format!(
867                                    "Type mismatch: expected JSON for column '{key}', got '{val}': {e}"
868                                )));
869                            }
870                        }
871                        tree.insert(next_rowid, val.to_string());
872                        // "Null" sentinel stays out of the index — it isn't a
873                        // real user value.
874                        if val != "Null" {
875                            Some(Value::Text(val.to_string()))
876                        } else {
877                            None
878                        }
879                    }
880                    Row::Real(tree) => {
881                        let parsed = val.parse::<f32>().map_err(|_| {
882                            SQLRiteError::General(format!(
883                                "Type mismatch: expected REAL for column '{key}', got '{val}'"
884                            ))
885                        })?;
886                        tree.insert(next_rowid, parsed);
887                        Some(Value::Real(parsed as f64))
888                    }
889                    Row::Bool(tree) => {
890                        let parsed = val.parse::<bool>().map_err(|_| {
891                            SQLRiteError::General(format!(
892                                "Type mismatch: expected BOOL for column '{key}', got '{val}'"
893                            ))
894                        })?;
895                        tree.insert(next_rowid, parsed);
896                        Some(Value::Bool(parsed))
897                    }
898                    Row::Vector(tree) => {
899                        // The parser put a bracket-array literal into `val`
900                        // (e.g. "[0.1,0.2,0.3]"). Parse it back here and
901                        // dim-check against the column's declared
902                        // DataType::Vector(N).
903                        let parsed = parse_vector_literal(&val).map_err(|e| {
904                            SQLRiteError::General(format!(
905                                "Type mismatch: expected VECTOR for column '{key}', {e}"
906                            ))
907                        })?;
908                        let declared_dim = match &self.columns[i].datatype {
909                            DataType::Vector(d) => *d,
910                            other => {
911                                return Err(SQLRiteError::Internal(format!(
912                                    "Row::Vector storage on non-Vector column '{key}' (declared as {other})"
913                                )));
914                            }
915                        };
916                        if parsed.len() != declared_dim {
917                            return Err(SQLRiteError::General(format!(
918                                "Vector dimension mismatch for column '{key}': declared {declared_dim}, got {}",
919                                parsed.len()
920                            )));
921                        }
922                        tree.insert(next_rowid, parsed.clone());
923                        Some(Value::Vector(parsed))
924                    }
925                    Row::None => {
926                        return Err(SQLRiteError::Internal(format!(
927                            "Column '{key}' has no row storage"
928                        )));
929                    }
930                }
931            };
932
933            // Step 2: maintain the secondary index (if any). insert() is a
934            // no-op for Value::Null and cheap for other value kinds.
935            if let Some(v) = typed_value.clone() {
936                if let Some(idx) = self.index_for_column_mut(key) {
937                    idx.insert(&v, next_rowid)?;
938                }
939            }
940
941            // Step 3 (Phase 7d.2): maintain any HNSW indexes on this column.
942            // The HNSW algorithm needs access to other rows' vectors when
943            // wiring up neighbor edges, so build a get_vec closure that
944            // pulls from the table's row storage (which we *just* updated
945            // with the new value).
946            if let Some(Value::Vector(new_vec)) = &typed_value {
947                self.maintain_hnsw_on_insert(key, next_rowid, new_vec);
948            }
949
950            // Step 4 (Phase 8b): maintain any FTS indexes on this column.
951            // Cheap incremental update — PostingList::insert tokenizes
952            // the value and adds postings under the new rowid. DELETE
953            // and UPDATE take the rebuild-on-save path instead (Q7).
954            if let Some(Value::Text(text)) = &typed_value {
955                self.maintain_fts_on_insert(key, next_rowid, text);
956            }
957        }
958        self.last_rowid = next_rowid;
959        Ok(())
960    }
961
962    /// After a row insert, push the new (rowid, vector) into every HNSW
963    /// index whose column matches `column`. Split out of `insert_row` so
964    /// the borrowing dance — we need both `&self.rows` (read other
965    /// vectors) and `&mut self.hnsw_indexes` (insert into the graph) —
966    /// stays localized.
967    fn maintain_hnsw_on_insert(&mut self, column: &str, rowid: i64, new_vec: &[f32]) {
968        // Snapshot the current vector storage so the get_vec closure
969        // doesn't fight with `&mut self.hnsw_indexes`. For a typical
970        // HNSW insert we touch ef_construction × log(N) other vectors,
971        // so the snapshot cost is small relative to the graph wiring.
972        let mut vec_snapshot: HashMap<i64, Vec<f32>> = HashMap::new();
973        {
974            let row_data = self.rows.lock().expect("rows mutex poisoned");
975            if let Some(Row::Vector(map)) = row_data.get(column) {
976                for (id, v) in map.iter() {
977                    vec_snapshot.insert(*id, v.clone());
978                }
979            }
980        }
981        // The new row was just written into row storage — make sure the
982        // snapshot reflects it (it should, but defensive).
983        vec_snapshot.insert(rowid, new_vec.to_vec());
984
985        for entry in &mut self.hnsw_indexes {
986            if entry.column_name == column {
987                entry.index.insert(rowid, new_vec, |id| {
988                    vec_snapshot.get(&id).cloned().unwrap_or_default()
989                });
990            }
991        }
992    }
993
994    /// After a row insert, push the new (rowid, text) into every FTS
995    /// index whose column matches `column`. Phase 8b.
996    ///
997    /// Mirrors [`Self::maintain_hnsw_on_insert`] but the FTS index is
998    /// self-contained — `PostingList::insert` only needs the new doc's
999    /// text, not the rest of the corpus, so there's no snapshot dance.
1000    fn maintain_fts_on_insert(&mut self, column: &str, rowid: i64, text: &str) {
1001        for entry in &mut self.fts_indexes {
1002            if entry.column_name == column {
1003                entry.index.insert(rowid, text);
1004            }
1005        }
1006    }
1007
1008    /// Print the table schema to standard output in a pretty formatted way.
1009    ///
1010    /// # Example
1011    ///
1012    /// ```text
1013    /// let table = Table::new(payload);
1014    /// table.print_table_schema();
1015    ///
1016    /// Prints to standard output:
1017    ///    +-------------+-----------+-------------+--------+----------+
1018    ///    | Column Name | Data Type | PRIMARY KEY | UNIQUE | NOT NULL |
1019    ///    +-------------+-----------+-------------+--------+----------+
1020    ///    | id          | Integer   | true        | true   | true     |
1021    ///    +-------------+-----------+-------------+--------+----------+
1022    ///    | name        | Text      | false       | true   | false    |
1023    ///    +-------------+-----------+-------------+--------+----------+
1024    ///    | email       | Text      | false       | false  | false    |
1025    ///    +-------------+-----------+-------------+--------+----------+
1026    /// ```
1027    ///
1028    pub fn print_table_schema(&self) -> Result<usize> {
1029        let mut table = PrintTable::new();
1030        table.add_row(row![
1031            "Column Name",
1032            "Data Type",
1033            "PRIMARY KEY",
1034            "UNIQUE",
1035            "NOT NULL"
1036        ]);
1037
1038        for col in &self.columns {
1039            table.add_row(row![
1040                col.column_name,
1041                col.datatype,
1042                col.is_pk,
1043                col.is_unique,
1044                col.not_null
1045            ]);
1046        }
1047
1048        table.printstd();
1049        Ok(table.len() * 2 + 1)
1050    }
1051
1052    /// Print the table data to standard output in a pretty formatted way.
1053    ///
1054    /// # Example
1055    ///
1056    /// ```text
1057    /// let db_table = db.get_table_mut(table_name.to_string()).unwrap();
1058    /// db_table.print_table_data();
1059    ///
1060    /// Prints to standard output:
1061    ///     +----+---------+------------------------+
1062    ///     | id | name    | email                  |
1063    ///     +----+---------+------------------------+
1064    ///     | 1  | "Jack"  | "jack@mail.com"        |
1065    ///     +----+---------+------------------------+
1066    ///     | 10 | "Bob"   | "bob@main.com"         |
1067    ///     +----+---------+------------------------+
1068    ///     | 11 | "Bill"  | "bill@main.com"        |
1069    ///     +----+---------+------------------------+
1070    /// ```
1071    ///
1072    pub fn print_table_data(&self) {
1073        let mut print_table = PrintTable::new();
1074
1075        let column_names = self
1076            .columns
1077            .iter()
1078            .map(|col| col.column_name.to_string())
1079            .collect::<Vec<String>>();
1080
1081        let header_row = PrintRow::new(
1082            column_names
1083                .iter()
1084                .map(|col| PrintCell::new(col))
1085                .collect::<Vec<PrintCell>>(),
1086        );
1087
1088        let rows_clone = Arc::clone(&self.rows);
1089        let row_data = rows_clone.lock().expect("rows mutex poisoned");
1090        let first_col_data = row_data
1091            .get(&self.columns.first().unwrap().column_name)
1092            .unwrap();
1093        let num_rows = first_col_data.count();
1094        let mut print_table_rows: Vec<PrintRow> = vec![PrintRow::new(vec![]); num_rows];
1095
1096        for col_name in &column_names {
1097            let col_val = row_data
1098                .get(col_name)
1099                .expect("Can't find any rows with the given column");
1100            let columns: Vec<String> = col_val.get_serialized_col_data();
1101
1102            for i in 0..num_rows {
1103                if let Some(cell) = &columns.get(i) {
1104                    print_table_rows[i].add_cell(PrintCell::new(cell));
1105                } else {
1106                    print_table_rows[i].add_cell(PrintCell::new(""));
1107                }
1108            }
1109        }
1110
1111        print_table.add_row(header_row);
1112        for row in print_table_rows {
1113            print_table.add_row(row);
1114        }
1115
1116        print_table.printstd();
1117    }
1118}
1119
1120/// The schema for each SQL column in every table.
1121///
1122/// Per-column index state moved to `Table::secondary_indexes` in Phase 3e —
1123/// a single `Column` describes the declared schema (name, type, constraints)
1124/// and nothing more.
1125#[derive(PartialEq, Debug, Clone)]
1126pub struct Column {
1127    pub column_name: String,
1128    pub datatype: DataType,
1129    pub is_pk: bool,
1130    pub not_null: bool,
1131    pub is_unique: bool,
1132}
1133
1134impl Column {
1135    pub fn new(
1136        name: String,
1137        datatype: String,
1138        is_pk: bool,
1139        not_null: bool,
1140        is_unique: bool,
1141    ) -> Self {
1142        let dt = DataType::new(datatype);
1143        Column {
1144            column_name: name,
1145            datatype: dt,
1146            is_pk,
1147            not_null,
1148            is_unique,
1149        }
1150    }
1151}
1152
1153/// The schema for each SQL row in every table is represented in memory
1154/// by following structure
1155///
1156/// This is an enum representing each of the available types organized in a BTreeMap
1157/// data structure, using the ROWID and key and each corresponding type as value
1158#[derive(PartialEq, Debug, Clone)]
1159pub enum Row {
1160    Integer(BTreeMap<i64, i32>),
1161    Text(BTreeMap<i64, String>),
1162    Real(BTreeMap<i64, f32>),
1163    Bool(BTreeMap<i64, bool>),
1164    /// Phase 7a: dense f32 vector storage. Each `Vec<f32>` should have
1165    /// length matching the column's declared `DataType::Vector(dim)`,
1166    /// enforced at INSERT time. The Row variant doesn't carry the dim —
1167    /// it lives in the column metadata.
1168    Vector(BTreeMap<i64, Vec<f32>>),
1169    None,
1170}
1171
1172impl Row {
1173    fn get_serialized_col_data(&self) -> Vec<String> {
1174        match self {
1175            Row::Integer(cd) => cd.values().map(|v| v.to_string()).collect(),
1176            Row::Real(cd) => cd.values().map(|v| v.to_string()).collect(),
1177            Row::Text(cd) => cd.values().map(|v| v.to_string()).collect(),
1178            Row::Bool(cd) => cd.values().map(|v| v.to_string()).collect(),
1179            Row::Vector(cd) => cd.values().map(format_vector_for_display).collect(),
1180            Row::None => panic!("Found None in columns"),
1181        }
1182    }
1183
1184    fn count(&self) -> usize {
1185        match self {
1186            Row::Integer(cd) => cd.len(),
1187            Row::Real(cd) => cd.len(),
1188            Row::Text(cd) => cd.len(),
1189            Row::Bool(cd) => cd.len(),
1190            Row::Vector(cd) => cd.len(),
1191            Row::None => panic!("Found None in columns"),
1192        }
1193    }
1194
1195    /// Every column's BTreeMap is keyed by ROWID. All columns share the same keyset
1196    /// after an INSERT (missing columns are padded), so any column's keys are a valid
1197    /// iteration of the table's rowids.
1198    pub fn rowids(&self) -> Vec<i64> {
1199        match self {
1200            Row::Integer(m) => m.keys().copied().collect(),
1201            Row::Text(m) => m.keys().copied().collect(),
1202            Row::Real(m) => m.keys().copied().collect(),
1203            Row::Bool(m) => m.keys().copied().collect(),
1204            Row::Vector(m) => m.keys().copied().collect(),
1205            Row::None => vec![],
1206        }
1207    }
1208
1209    pub fn get(&self, rowid: i64) -> Option<Value> {
1210        match self {
1211            Row::Integer(m) => m.get(&rowid).map(|v| Value::Integer(i64::from(*v))),
1212            // INSERT stores the literal string "Null" in Text columns that were omitted
1213            // from the query — re-map that back to a real NULL on read.
1214            Row::Text(m) => m.get(&rowid).map(|v| {
1215                if v == "Null" {
1216                    Value::Null
1217                } else {
1218                    Value::Text(v.clone())
1219                }
1220            }),
1221            Row::Real(m) => m.get(&rowid).map(|v| Value::Real(f64::from(*v))),
1222            Row::Bool(m) => m.get(&rowid).map(|v| Value::Bool(*v)),
1223            Row::Vector(m) => m.get(&rowid).map(|v| Value::Vector(v.clone())),
1224            Row::None => None,
1225        }
1226    }
1227}
1228
1229/// Render a vector for human display. Used by both `Row::get_serialized_col_data`
1230/// (for the REPL's print-table path) and `Value::to_display_string`.
1231///
1232/// Format: `[0.1, 0.2, 0.3]` — JSON-like, decimal-minimal via `{}` Display.
1233/// For high-dimensional vectors (e.g. 384 elements) this produces a long
1234/// line; truncation ellipsis is a future polish (see Phase 7 plan, "What
1235/// this proposal does NOT commit to").
1236fn format_vector_for_display(v: &Vec<f32>) -> String {
1237    let mut s = String::with_capacity(v.len() * 6 + 2);
1238    s.push('[');
1239    for (i, x) in v.iter().enumerate() {
1240        if i > 0 {
1241            s.push_str(", ");
1242        }
1243        // Default f32 Display picks the minimal-roundtrip representation,
1244        // so 0.1f32 prints as "0.1" not "0.10000000149011612". Good enough.
1245        s.push_str(&x.to_string());
1246    }
1247    s.push(']');
1248    s
1249}
1250
1251/// Runtime value produced by query execution. Separate from the on-disk `Row` enum
1252/// so the executor can carry typed values (including NULL) across operators.
1253#[derive(Debug, Clone, PartialEq)]
1254pub enum Value {
1255    Integer(i64),
1256    Text(String),
1257    Real(f64),
1258    Bool(bool),
1259    /// Phase 7a: dense f32 vector as a runtime value. Carries its own
1260    /// dimension implicitly via `Vec::len`; the column it's being
1261    /// assigned to has a declared `DataType::Vector(N)` that's checked
1262    /// at INSERT/UPDATE time.
1263    Vector(Vec<f32>),
1264    Null,
1265}
1266
1267impl Value {
1268    pub fn to_display_string(&self) -> String {
1269        match self {
1270            Value::Integer(v) => v.to_string(),
1271            Value::Text(s) => s.clone(),
1272            Value::Real(f) => f.to_string(),
1273            Value::Bool(b) => b.to_string(),
1274            Value::Vector(v) => format_vector_for_display(v),
1275            Value::Null => String::from("NULL"),
1276        }
1277    }
1278}
1279
1280/// Parse a bracket-array literal like `"[0.1, 0.2, 0.3]"` (or `"[1, 2, 3]"`)
1281/// into a `Vec<f32>`. The parser/insert pipeline stores vector literals as
1282/// strings in `InsertQuery::rows` (a `Vec<Vec<String>>`); this helper is
1283/// the inverse — turn the string back into a typed vector at the boundary
1284/// where we actually need element-typed data.
1285///
1286/// Accepts:
1287/// - `[]` → empty vector (caller's dimension check rejects it for VECTOR(N≥1))
1288/// - `[0.1, 0.2, 0.3]` → standard float syntax
1289/// - `[1, 2, 3]` → integers, coerced to f32 (matches `VALUES (1, 2)` for
1290///   `REAL` columns; we widen ints to floats automatically)
1291/// - whitespace tolerated everywhere (Python/JSON/pgvector convention)
1292///
1293/// Rejects with a descriptive message:
1294/// - missing `[` / `]`
1295/// - non-numeric elements (`['foo', 0.1]`)
1296/// - NaN / Inf literals (we accept them via `f32::from_str` but caller can
1297///   reject if undesired — for now we let them through; HNSW etc. will
1298///   reject NaN at index time)
1299pub fn parse_vector_literal(s: &str) -> Result<Vec<f32>> {
1300    let trimmed = s.trim();
1301    if !trimmed.starts_with('[') || !trimmed.ends_with(']') {
1302        return Err(SQLRiteError::General(format!(
1303            "expected bracket-array literal `[...]`, got `{s}`"
1304        )));
1305    }
1306    let inner = &trimmed[1..trimmed.len() - 1].trim();
1307    if inner.is_empty() {
1308        return Ok(Vec::new());
1309    }
1310    let mut out = Vec::new();
1311    for (i, part) in inner.split(',').enumerate() {
1312        let element = part.trim();
1313        let parsed: f32 = element.parse().map_err(|_| {
1314            SQLRiteError::General(format!("vector element {i} (`{element}`) is not a number"))
1315        })?;
1316        out.push(parsed);
1317    }
1318    Ok(out)
1319}
1320
1321#[cfg(test)]
1322mod tests {
1323    use super::*;
1324    use sqlparser::dialect::SQLiteDialect;
1325    use sqlparser::parser::Parser;
1326
1327    #[test]
1328    fn datatype_display_trait_test() {
1329        let integer = DataType::Integer;
1330        let text = DataType::Text;
1331        let real = DataType::Real;
1332        let boolean = DataType::Bool;
1333        let vector = DataType::Vector(384);
1334        let none = DataType::None;
1335        let invalid = DataType::Invalid;
1336
1337        assert_eq!(format!("{}", integer), "Integer");
1338        assert_eq!(format!("{}", text), "Text");
1339        assert_eq!(format!("{}", real), "Real");
1340        assert_eq!(format!("{}", boolean), "Boolean");
1341        assert_eq!(format!("{}", vector), "Vector(384)");
1342        assert_eq!(format!("{}", none), "None");
1343        assert_eq!(format!("{}", invalid), "Invalid");
1344    }
1345
1346    // -----------------------------------------------------------------
1347    // Phase 7a — VECTOR(N) column type
1348    // -----------------------------------------------------------------
1349
1350    #[test]
1351    fn datatype_new_parses_vector_dim() {
1352        // Standard cases.
1353        assert_eq!(DataType::new("vector(1)".to_string()), DataType::Vector(1));
1354        assert_eq!(
1355            DataType::new("vector(384)".to_string()),
1356            DataType::Vector(384)
1357        );
1358        assert_eq!(
1359            DataType::new("vector(1536)".to_string()),
1360            DataType::Vector(1536)
1361        );
1362
1363        // Case-insensitive on the keyword.
1364        assert_eq!(
1365            DataType::new("VECTOR(384)".to_string()),
1366            DataType::Vector(384)
1367        );
1368
1369        // Whitespace inside parens tolerated (the create-parser strips it
1370        // but the string-based round-trip in DataType::new is the one place
1371        // we don't fully control input formatting).
1372        assert_eq!(
1373            DataType::new("vector( 64 )".to_string()),
1374            DataType::Vector(64)
1375        );
1376    }
1377
1378    #[test]
1379    fn datatype_new_rejects_bad_vector_strings() {
1380        // dim = 0 is rejected (Q2: VECTOR(N≥1)).
1381        assert_eq!(DataType::new("vector(0)".to_string()), DataType::Invalid);
1382        // Non-numeric dim.
1383        assert_eq!(DataType::new("vector(abc)".to_string()), DataType::Invalid);
1384        // Empty parens.
1385        assert_eq!(DataType::new("vector()".to_string()), DataType::Invalid);
1386        // Negative dim wouldn't even parse as usize, so falls into Invalid.
1387        assert_eq!(DataType::new("vector(-3)".to_string()), DataType::Invalid);
1388    }
1389
1390    #[test]
1391    fn datatype_to_wire_string_round_trips_vector() {
1392        let dt = DataType::Vector(384);
1393        let wire = dt.to_wire_string();
1394        assert_eq!(wire, "vector(384)");
1395        // And feeds back through DataType::new losslessly — this is the
1396        // round-trip the ParsedColumn pipeline relies on.
1397        assert_eq!(DataType::new(wire), DataType::Vector(384));
1398    }
1399
1400    #[test]
1401    fn parse_vector_literal_accepts_floats() {
1402        let v = parse_vector_literal("[0.1, 0.2, 0.3]").expect("parse");
1403        assert_eq!(v, vec![0.1f32, 0.2, 0.3]);
1404    }
1405
1406    #[test]
1407    fn parse_vector_literal_accepts_ints_widening_to_f32() {
1408        let v = parse_vector_literal("[1, 2, 3]").expect("parse");
1409        assert_eq!(v, vec![1.0f32, 2.0, 3.0]);
1410    }
1411
1412    #[test]
1413    fn parse_vector_literal_handles_negatives_and_whitespace() {
1414        let v = parse_vector_literal("[ -1.5 ,  2.0,  -3.5 ]").expect("parse");
1415        assert_eq!(v, vec![-1.5f32, 2.0, -3.5]);
1416    }
1417
1418    #[test]
1419    fn parse_vector_literal_empty_brackets_is_empty_vec() {
1420        let v = parse_vector_literal("[]").expect("parse");
1421        assert!(v.is_empty());
1422    }
1423
1424    #[test]
1425    fn parse_vector_literal_rejects_non_bracketed() {
1426        assert!(parse_vector_literal("0.1, 0.2").is_err());
1427        assert!(parse_vector_literal("(0.1, 0.2)").is_err());
1428        assert!(parse_vector_literal("[0.1, 0.2").is_err()); // missing ]
1429        assert!(parse_vector_literal("0.1, 0.2]").is_err()); // missing [
1430    }
1431
1432    #[test]
1433    fn parse_vector_literal_rejects_non_numeric_elements() {
1434        let err = parse_vector_literal("[1.0, 'foo', 3.0]").unwrap_err();
1435        let msg = format!("{err}");
1436        assert!(
1437            msg.contains("vector element 1") && msg.contains("'foo'"),
1438            "error message should pinpoint the bad element: got `{msg}`"
1439        );
1440    }
1441
1442    #[test]
1443    fn value_vector_display_format() {
1444        let v = Value::Vector(vec![0.1, 0.2, 0.3]);
1445        assert_eq!(v.to_display_string(), "[0.1, 0.2, 0.3]");
1446
1447        // Empty vector displays as `[]`.
1448        let empty = Value::Vector(vec![]);
1449        assert_eq!(empty.to_display_string(), "[]");
1450    }
1451
1452    #[test]
1453    fn create_new_table_test() {
1454        let query_statement = "CREATE TABLE contacts (
1455            id INTEGER PRIMARY KEY,
1456            first_name TEXT NOT NULL,
1457            last_name TEXT NOT NULl,
1458            email TEXT NOT NULL UNIQUE,
1459            active BOOL,
1460            score REAL
1461        );";
1462        let dialect = SQLiteDialect {};
1463        let mut ast = Parser::parse_sql(&dialect, query_statement).unwrap();
1464        if ast.len() > 1 {
1465            panic!("Expected a single query statement, but there are more then 1.")
1466        }
1467        let query = ast.pop().unwrap();
1468
1469        let create_query = CreateQuery::new(&query).unwrap();
1470
1471        let table = Table::new(create_query);
1472
1473        assert_eq!(table.columns.len(), 6);
1474        assert_eq!(table.last_rowid, 0);
1475
1476        let id_column = "id".to_string();
1477        if let Some(column) = table
1478            .columns
1479            .iter()
1480            .filter(|c| c.column_name == id_column)
1481            .collect::<Vec<&Column>>()
1482            .first()
1483        {
1484            assert!(column.is_pk);
1485            assert_eq!(column.datatype, DataType::Integer);
1486        } else {
1487            panic!("column not found");
1488        }
1489    }
1490
1491    #[test]
1492    fn print_table_schema_test() {
1493        let query_statement = "CREATE TABLE contacts (
1494            id INTEGER PRIMARY KEY,
1495            first_name TEXT NOT NULL,
1496            last_name TEXT NOT NULl
1497        );";
1498        let dialect = SQLiteDialect {};
1499        let mut ast = Parser::parse_sql(&dialect, query_statement).unwrap();
1500        if ast.len() > 1 {
1501            panic!("Expected a single query statement, but there are more then 1.")
1502        }
1503        let query = ast.pop().unwrap();
1504
1505        let create_query = CreateQuery::new(&query).unwrap();
1506
1507        let table = Table::new(create_query);
1508        let lines_printed = table.print_table_schema();
1509        assert_eq!(lines_printed, Ok(9));
1510    }
1511}