Skip to main content

sqlrite/sql/db/
table.rs

1use crate::error::{Result, SQLRiteError};
2use crate::sql::db::secondary_index::{IndexOrigin, SecondaryIndex};
3use crate::sql::hnsw::HnswIndex;
4use crate::sql::parser::create::CreateQuery;
5use std::collections::{BTreeMap, HashMap};
6use std::fmt;
7use std::sync::{Arc, Mutex};
8
9use prettytable::{Cell as PrintCell, Row as PrintRow, Table as PrintTable};
10
11/// SQLRite data types
12/// Mapped after SQLite Data Type Storage Classes and SQLite Affinity Type
13/// (Datatypes In SQLite Version 3)[https://www.sqlite.org/datatype3.html]
14///
15/// `Vector(dim)` is the Phase 7a addition — a fixed-dimension dense f32
16/// array. The dimension is part of the type so a `VECTOR(384)` column
17/// rejects `[0.1, 0.2, 0.3]` at INSERT time as a clean type error
18/// rather than silently storing the wrong shape.
19#[derive(PartialEq, Debug, Clone)]
20pub enum DataType {
21    Integer,
22    Text,
23    Real,
24    Bool,
25    /// Dense f32 vector of fixed dimension. The `usize` is the column's
26    /// declared dimension; every value stored in the column must have
27    /// exactly that many elements.
28    Vector(usize),
29    /// Phase 7e — JSON column. Stored as canonical UTF-8 text (matches
30    /// SQLite's JSON1 extension), validated at INSERT time. The
31    /// `json_extract` family of functions parses on demand and returns
32    /// either a primitive `Value` (Integer / Real / Text / Bool / Null)
33    /// or a Text value carrying the JSON-encoded sub-object/array.
34    /// Q3 originally specified `bincoded serde_json::Value`, but bincode
35    /// was removed from the engine in Phase 3c — see the scope-correction
36    /// note in `docs/phase-7-plan.md` for the rationale on switching to
37    /// text storage.
38    Json,
39    None,
40    Invalid,
41}
42
43impl DataType {
44    /// Constructs a `DataType` from the wire string the parser produces.
45    /// Pre-Phase-7 the strings were one-of `"integer" | "text" | "real" |
46    /// "bool" | "none"`. Phase 7a adds `"vector(N)"` (case-insensitive,
47    /// N a positive integer) for the new vector column type — encoded
48    /// in-band so we don't have to plumb a richer type through the
49    /// existing string-based ParsedColumn pipeline.
50    pub fn new(cmd: String) -> DataType {
51        let lower = cmd.to_lowercase();
52        match lower.as_str() {
53            "integer" => DataType::Integer,
54            "text" => DataType::Text,
55            "real" => DataType::Real,
56            "bool" => DataType::Bool,
57            "json" => DataType::Json,
58            "none" => DataType::None,
59            other if other.starts_with("vector(") && other.ends_with(')') => {
60                // Strip the `vector(` prefix and trailing `)`, parse what's
61                // left as a positive integer dimension. Anything else is
62                // Invalid — surfaces a clean error at CREATE TABLE time.
63                let inside = &other["vector(".len()..other.len() - 1];
64                match inside.trim().parse::<usize>() {
65                    Ok(dim) if dim > 0 => DataType::Vector(dim),
66                    _ => {
67                        eprintln!("Invalid VECTOR dimension in {cmd}");
68                        DataType::Invalid
69                    }
70                }
71            }
72            _ => {
73                eprintln!("Invalid data type given {}", cmd);
74                DataType::Invalid
75            }
76        }
77    }
78
79    /// Inverse of `new` — returns the canonical lowercased wire string
80    /// for this DataType. Used by the parser to round-trip
81    /// `VECTOR(N)` → `DataType::Vector(N)` → `"vector(N)"` into
82    /// `ParsedColumn::datatype` so the rest of the pipeline keeps
83    /// working with strings.
84    pub fn to_wire_string(&self) -> String {
85        match self {
86            DataType::Integer => "Integer".to_string(),
87            DataType::Text => "Text".to_string(),
88            DataType::Real => "Real".to_string(),
89            DataType::Bool => "Bool".to_string(),
90            DataType::Vector(dim) => format!("vector({dim})"),
91            DataType::Json => "Json".to_string(),
92            DataType::None => "None".to_string(),
93            DataType::Invalid => "Invalid".to_string(),
94        }
95    }
96}
97
98impl fmt::Display for DataType {
99    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
100        match self {
101            DataType::Integer => f.write_str("Integer"),
102            DataType::Text => f.write_str("Text"),
103            DataType::Real => f.write_str("Real"),
104            DataType::Bool => f.write_str("Boolean"),
105            DataType::Vector(dim) => write!(f, "Vector({dim})"),
106            DataType::Json => f.write_str("Json"),
107            DataType::None => f.write_str("None"),
108            DataType::Invalid => f.write_str("Invalid"),
109        }
110    }
111}
112
113/// The schema for each SQL Table is represented in memory by
114/// following structure.
115///
116/// `rows` is `Arc<Mutex<...>>` rather than `Rc<RefCell<...>>` so `Table`
117/// (and by extension `Database`) is `Send + Sync` — the Tauri desktop
118/// app holds the engine in shared state behind a `Mutex<Database>`, and
119/// Tauri's state container requires its contents to be thread-safe.
120#[derive(Debug)]
121pub struct Table {
122    /// Name of the table
123    pub tb_name: String,
124    /// Schema for each column, in declaration order.
125    pub columns: Vec<Column>,
126    /// Per-column row storage, keyed by column name. Every column's
127    /// `Row::T(BTreeMap)` is keyed by rowid, so all columns share the same
128    /// keyset after each write.
129    pub rows: Arc<Mutex<HashMap<String, Row>>>,
130    /// Secondary indexes on this table (Phase 3e). One auto-created entry
131    /// per UNIQUE or PRIMARY KEY column; explicit `CREATE INDEX` statements
132    /// add more. Looking up an index: iterate by column name, or by index
133    /// name via `Table::index_by_name`.
134    pub secondary_indexes: Vec<SecondaryIndex>,
135    /// HNSW indexes on VECTOR columns (Phase 7d.2). Maintained in lockstep
136    /// with row storage on INSERT (incremental); rebuilt on open from the
137    /// persisted CREATE INDEX SQL. The graph itself is NOT yet persisted —
138    /// see Phase 7d.3 for cell-encoded graph storage.
139    pub hnsw_indexes: Vec<HnswIndexEntry>,
140    /// ROWID of most recent insert.
141    pub last_rowid: i64,
142    /// PRIMARY KEY column name, or "-1" if the table has no PRIMARY KEY.
143    pub primary_key: String,
144}
145
146/// One HNSW index attached to a table. Phase 7d.2 only supports L2
147/// distance; cosine and dot are 7d.x follow-ups (would require either
148/// distinct USING methods like `hnsw_cosine` or a `WITH (metric = …)`
149/// clause — see `docs/phase-7-plan.md` for the deferred decision).
150#[derive(Debug, Clone)]
151pub struct HnswIndexEntry {
152    /// User-supplied name from `CREATE INDEX <name> …`. Unique across
153    /// both `secondary_indexes` and `hnsw_indexes` on a given table.
154    pub name: String,
155    /// The VECTOR column this index covers.
156    pub column_name: String,
157    /// The graph itself.
158    pub index: HnswIndex,
159    /// Phase 7d.3 — true iff a DELETE or UPDATE-on-vector-col has
160    /// invalidated the graph since the last rebuild. INSERT maintains
161    /// the graph incrementally and leaves this false. The next save
162    /// rebuilds dirty indexes from current rows before serializing.
163    pub needs_rebuild: bool,
164}
165
166impl Table {
167    pub fn new(create_query: CreateQuery) -> Self {
168        let table_name = create_query.table_name;
169        let mut primary_key: String = String::from("-1");
170        let columns = create_query.columns;
171
172        let mut table_cols: Vec<Column> = vec![];
173        let table_rows: Arc<Mutex<HashMap<String, Row>>> = Arc::new(Mutex::new(HashMap::new()));
174        let mut secondary_indexes: Vec<SecondaryIndex> = Vec::new();
175        for col in &columns {
176            let col_name = &col.name;
177            if col.is_pk {
178                primary_key = col_name.to_string();
179            }
180            table_cols.push(Column::new(
181                col_name.to_string(),
182                col.datatype.to_string(),
183                col.is_pk,
184                col.not_null,
185                col.is_unique,
186            ));
187
188            let dt = DataType::new(col.datatype.to_string());
189            let row_storage = match &dt {
190                DataType::Integer => Row::Integer(BTreeMap::new()),
191                DataType::Real => Row::Real(BTreeMap::new()),
192                DataType::Text => Row::Text(BTreeMap::new()),
193                DataType::Bool => Row::Bool(BTreeMap::new()),
194                // The dimension is enforced at INSERT time against the
195                // column's declared DataType::Vector(dim). The Row variant
196                // itself doesn't carry the dim — every stored Vec<f32>
197                // already has it via .len().
198                DataType::Vector(_dim) => Row::Vector(BTreeMap::new()),
199                // Phase 7e — JSON columns reuse Text storage (with
200                // INSERT-time validation that the bytes parse as JSON).
201                // No new Row variant; json_extract / json_type / etc.
202                // re-parse from text on demand. See `docs/phase-7-plan.md`
203                // Q3's scope-correction note for the storage choice.
204                DataType::Json => Row::Text(BTreeMap::new()),
205                DataType::Invalid | DataType::None => Row::None,
206            };
207            table_rows
208                .lock()
209                .expect("Table row storage mutex poisoned")
210                .insert(col.name.to_string(), row_storage);
211
212            // Auto-create an index for every UNIQUE / PRIMARY KEY column,
213            // but only for types we know how to index. Real / Bool / Vector
214            // UNIQUE columns fall back to the linear scan path in
215            // validate_unique_constraint — same behavior as before 3e.
216            // (Vector UNIQUE is unusual; the linear-scan path will work
217            // via Value::Vector PartialEq, just at O(N) cost.)
218            if (col.is_pk || col.is_unique) && matches!(dt, DataType::Integer | DataType::Text) {
219                let name = SecondaryIndex::auto_name(&table_name, &col.name);
220                match SecondaryIndex::new(
221                    name,
222                    table_name.clone(),
223                    col.name.clone(),
224                    &dt,
225                    true,
226                    IndexOrigin::Auto,
227                ) {
228                    Ok(idx) => secondary_indexes.push(idx),
229                    Err(_) => {
230                        // Unreachable given the matches! guard above, but
231                        // the builder returns Result so we keep the arm.
232                    }
233                }
234            }
235        }
236
237        Table {
238            tb_name: table_name,
239            columns: table_cols,
240            rows: table_rows,
241            secondary_indexes,
242            // HNSW indexes only land via explicit CREATE INDEX … USING hnsw
243            // statements (Phase 7d.2); never auto-created at CREATE TABLE
244            // time, because there's no UNIQUE-style constraint that
245            // implies a vector index.
246            hnsw_indexes: Vec::new(),
247            last_rowid: 0,
248            primary_key,
249        }
250    }
251
252    /// Deep-clones a `Table` for transaction snapshots (Phase 4f).
253    ///
254    /// The normal `Clone` derive would shallow-clone the `Arc<Mutex<_>>`
255    /// wrapping our row storage, leaving both copies sharing the same
256    /// inner map — mutating the snapshot would corrupt the live table
257    /// and vice versa. Instead we lock, clone the inner `HashMap`, and
258    /// wrap it in a fresh `Arc<Mutex<_>>`. Columns and indexes derive
259    /// `Clone` directly (all their fields are plain data).
260    pub fn deep_clone(&self) -> Self {
261        let cloned_rows: HashMap<String, Row> = {
262            let guard = self.rows.lock().expect("row mutex poisoned");
263            guard.clone()
264        };
265        Table {
266            tb_name: self.tb_name.clone(),
267            columns: self.columns.clone(),
268            rows: Arc::new(Mutex::new(cloned_rows)),
269            secondary_indexes: self.secondary_indexes.clone(),
270            // HnswIndexEntry derives Clone, so the snapshot owns its own
271            // graph copy. Phase 4f's snapshot-rollback semantics require
272            // the snapshot to be fully decoupled from live state.
273            hnsw_indexes: self.hnsw_indexes.clone(),
274            last_rowid: self.last_rowid,
275            primary_key: self.primary_key.clone(),
276        }
277    }
278
279    /// Finds an auto- or explicit-index entry for a given column. Returns
280    /// `None` if the column isn't indexed.
281    pub fn index_for_column(&self, column: &str) -> Option<&SecondaryIndex> {
282        self.secondary_indexes
283            .iter()
284            .find(|i| i.column_name == column)
285    }
286
287    fn index_for_column_mut(&mut self, column: &str) -> Option<&mut SecondaryIndex> {
288        self.secondary_indexes
289            .iter_mut()
290            .find(|i| i.column_name == column)
291    }
292
293    /// Finds a secondary index by its own name (e.g., `sqlrite_autoindex_users_email`
294    /// or a user-provided CREATE INDEX name). Used by Phase 3e.2 to look up
295    /// explicit indexes when DROP INDEX lands.
296    #[allow(dead_code)]
297    pub fn index_by_name(&self, name: &str) -> Option<&SecondaryIndex> {
298        self.secondary_indexes.iter().find(|i| i.name == name)
299    }
300
301    /// Returns a `bool` informing if a `Column` with a specific name exists or not
302    ///
303    pub fn contains_column(&self, column: String) -> bool {
304        self.columns.iter().any(|col| col.column_name == column)
305    }
306
307    /// Returns the list of column names in declaration order.
308    pub fn column_names(&self) -> Vec<String> {
309        self.columns.iter().map(|c| c.column_name.clone()).collect()
310    }
311
312    /// Returns all rowids currently stored in the table, in ascending order.
313    /// Every column's BTreeMap has the same keyset, so we just read from the first column.
314    pub fn rowids(&self) -> Vec<i64> {
315        let Some(first) = self.columns.first() else {
316            return vec![];
317        };
318        let rows = self.rows.lock().expect("rows mutex poisoned");
319        rows.get(&first.column_name)
320            .map(|r| r.rowids())
321            .unwrap_or_default()
322    }
323
324    /// Reads a single cell at `(column, rowid)`.
325    pub fn get_value(&self, column: &str, rowid: i64) -> Option<Value> {
326        let rows = self.rows.lock().expect("rows mutex poisoned");
327        rows.get(column).and_then(|r| r.get(rowid))
328    }
329
330    /// Removes the row identified by `rowid` from every column's storage and
331    /// from every secondary index entry.
332    pub fn delete_row(&mut self, rowid: i64) {
333        // Snapshot the values we're about to delete so we can strip them
334        // from secondary indexes by (value, rowid) before the row storage
335        // disappears.
336        let per_column_values: Vec<(String, Option<Value>)> = self
337            .columns
338            .iter()
339            .map(|c| (c.column_name.clone(), self.get_value(&c.column_name, rowid)))
340            .collect();
341
342        // Remove from row storage.
343        {
344            let rows_clone = Arc::clone(&self.rows);
345            let mut row_data = rows_clone.lock().expect("rows mutex poisoned");
346            for col in &self.columns {
347                if let Some(r) = row_data.get_mut(&col.column_name) {
348                    match r {
349                        Row::Integer(m) => {
350                            m.remove(&rowid);
351                        }
352                        Row::Text(m) => {
353                            m.remove(&rowid);
354                        }
355                        Row::Real(m) => {
356                            m.remove(&rowid);
357                        }
358                        Row::Bool(m) => {
359                            m.remove(&rowid);
360                        }
361                        Row::Vector(m) => {
362                            m.remove(&rowid);
363                        }
364                        Row::None => {}
365                    }
366                }
367            }
368        }
369
370        // Strip secondary-index entries. Non-indexed columns just don't
371        // show up in secondary_indexes and are no-ops here.
372        for (col_name, value) in per_column_values {
373            if let Some(idx) = self.index_for_column_mut(&col_name) {
374                if let Some(v) = value {
375                    idx.remove(&v, rowid);
376                }
377            }
378        }
379    }
380
381    /// Replays a single row at `rowid` when loading a table from disk. Takes
382    /// one typed value per column (in declaration order); `None` means the
383    /// stored cell carried a NULL for that column. Unlike `insert_row` this
384    /// trusts the on-disk state and does *not* re-check UNIQUE — we're
385    /// rebuilding a state that was already consistent when it was saved.
386    pub fn restore_row(&mut self, rowid: i64, values: Vec<Option<Value>>) -> Result<()> {
387        if values.len() != self.columns.len() {
388            return Err(SQLRiteError::Internal(format!(
389                "cell has {} values but table '{}' has {} columns",
390                values.len(),
391                self.tb_name,
392                self.columns.len()
393            )));
394        }
395
396        let column_names: Vec<String> =
397            self.columns.iter().map(|c| c.column_name.clone()).collect();
398
399        for (i, value) in values.into_iter().enumerate() {
400            let col_name = &column_names[i];
401
402            // Write into the per-column row storage first (scoped borrow so
403            // the secondary-index update below doesn't fight over `self`).
404            {
405                let rows_clone = Arc::clone(&self.rows);
406                let mut row_data = rows_clone.lock().expect("rows mutex poisoned");
407                let cell = row_data.get_mut(col_name).ok_or_else(|| {
408                    SQLRiteError::Internal(format!("Row storage missing for column '{col_name}'"))
409                })?;
410
411                match (cell, &value) {
412                    (Row::Integer(map), Some(Value::Integer(v))) => {
413                        map.insert(rowid, *v as i32);
414                    }
415                    (Row::Integer(_), None) => {
416                        return Err(SQLRiteError::Internal(format!(
417                            "Integer column '{col_name}' cannot store NULL — corrupt cell?"
418                        )));
419                    }
420                    (Row::Text(map), Some(Value::Text(s))) => {
421                        map.insert(rowid, s.clone());
422                    }
423                    (Row::Text(map), None) => {
424                        // Matches the on-insert convention: NULL in Text
425                        // storage is represented by the literal "Null"
426                        // sentinel and not added to the index.
427                        map.insert(rowid, "Null".to_string());
428                    }
429                    (Row::Real(map), Some(Value::Real(v))) => {
430                        map.insert(rowid, *v as f32);
431                    }
432                    (Row::Real(_), None) => {
433                        return Err(SQLRiteError::Internal(format!(
434                            "Real column '{col_name}' cannot store NULL — corrupt cell?"
435                        )));
436                    }
437                    (Row::Bool(map), Some(Value::Bool(v))) => {
438                        map.insert(rowid, *v);
439                    }
440                    (Row::Bool(_), None) => {
441                        return Err(SQLRiteError::Internal(format!(
442                            "Bool column '{col_name}' cannot store NULL — corrupt cell?"
443                        )));
444                    }
445                    (Row::Vector(map), Some(Value::Vector(v))) => {
446                        map.insert(rowid, v.clone());
447                    }
448                    (Row::Vector(_), None) => {
449                        return Err(SQLRiteError::Internal(format!(
450                            "Vector column '{col_name}' cannot store NULL — corrupt cell?"
451                        )));
452                    }
453                    (row, v) => {
454                        return Err(SQLRiteError::Internal(format!(
455                            "Type mismatch restoring column '{col_name}': storage {row:?} vs value {v:?}"
456                        )));
457                    }
458                }
459            }
460
461            // Maintain the secondary index (if any). NULL values are skipped
462            // by `insert`, matching the "NULL is not indexed" convention.
463            if let Some(v) = &value {
464                if let Some(idx) = self.index_for_column_mut(col_name) {
465                    idx.insert(v, rowid)?;
466                }
467            }
468        }
469
470        if rowid > self.last_rowid {
471            self.last_rowid = rowid;
472        }
473        Ok(())
474    }
475
476    /// Extracts a row as an ordered `Vec<Option<Value>>` matching the column
477    /// declaration order. Returns `None` entries for columns that hold NULL.
478    /// Used by `save_database` to turn a table's in-memory state into cells.
479    pub fn extract_row(&self, rowid: i64) -> Vec<Option<Value>> {
480        self.columns
481            .iter()
482            .map(|c| match self.get_value(&c.column_name, rowid) {
483                Some(Value::Null) => None,
484                Some(v) => Some(v),
485                None => None,
486            })
487            .collect()
488    }
489
490    /// Overwrites the cell at `(column, rowid)` with `new_val`. Enforces the
491    /// column's datatype and UNIQUE constraint, and updates any secondary
492    /// index.
493    ///
494    /// Returns `Err` if the column doesn't exist, the value type is incompatible,
495    /// or writing would violate UNIQUE.
496    pub fn set_value(&mut self, column: &str, rowid: i64, new_val: Value) -> Result<()> {
497        let col_index = self
498            .columns
499            .iter()
500            .position(|c| c.column_name == column)
501            .ok_or_else(|| SQLRiteError::General(format!("Column '{column}' not found")))?;
502
503        // No-op write — keep storage exactly the same.
504        let current = self.get_value(column, rowid);
505        if current.as_ref() == Some(&new_val) {
506            return Ok(());
507        }
508
509        // Enforce UNIQUE. Prefer an O(log N) index probe if we have one;
510        // fall back to a full column scan otherwise (Real/Bool UNIQUE
511        // columns, which don't get auto-indexed).
512        if self.columns[col_index].is_unique && !matches!(new_val, Value::Null) {
513            if let Some(idx) = self.index_for_column(column) {
514                for other in idx.lookup(&new_val) {
515                    if other != rowid {
516                        return Err(SQLRiteError::General(format!(
517                            "UNIQUE constraint violated for column '{column}'"
518                        )));
519                    }
520                }
521            } else {
522                for other in self.rowids() {
523                    if other == rowid {
524                        continue;
525                    }
526                    if self.get_value(column, other).as_ref() == Some(&new_val) {
527                        return Err(SQLRiteError::General(format!(
528                            "UNIQUE constraint violated for column '{column}'"
529                        )));
530                    }
531                }
532            }
533        }
534
535        // Drop the old index entry before writing the new value, so the
536        // post-write index insert doesn't clash with the previous state.
537        if let Some(old) = current {
538            if let Some(idx) = self.index_for_column_mut(column) {
539                idx.remove(&old, rowid);
540            }
541        }
542
543        // Write into the column's Row, type-checking against the declared DataType.
544        let declared = &self.columns[col_index].datatype;
545        {
546            let rows_clone = Arc::clone(&self.rows);
547            let mut row_data = rows_clone.lock().expect("rows mutex poisoned");
548            let cell = row_data.get_mut(column).ok_or_else(|| {
549                SQLRiteError::Internal(format!("Row storage missing for column '{column}'"))
550            })?;
551
552            match (cell, &new_val, declared) {
553                (Row::Integer(m), Value::Integer(v), _) => {
554                    m.insert(rowid, *v as i32);
555                }
556                (Row::Real(m), Value::Real(v), _) => {
557                    m.insert(rowid, *v as f32);
558                }
559                (Row::Real(m), Value::Integer(v), _) => {
560                    m.insert(rowid, *v as f32);
561                }
562                (Row::Text(m), Value::Text(v), dt) => {
563                    // Phase 7e — UPDATE on a JSON column also validates
564                    // the new text is well-formed JSON, mirroring INSERT.
565                    if matches!(dt, DataType::Json) {
566                        if let Err(e) = serde_json::from_str::<serde_json::Value>(v) {
567                            return Err(SQLRiteError::General(format!(
568                                "Type mismatch: expected JSON for column '{column}', got '{v}': {e}"
569                            )));
570                        }
571                    }
572                    m.insert(rowid, v.clone());
573                }
574                (Row::Bool(m), Value::Bool(v), _) => {
575                    m.insert(rowid, *v);
576                }
577                (Row::Vector(m), Value::Vector(v), DataType::Vector(declared_dim)) => {
578                    if v.len() != *declared_dim {
579                        return Err(SQLRiteError::General(format!(
580                            "Vector dimension mismatch for column '{column}': declared {declared_dim}, got {}",
581                            v.len()
582                        )));
583                    }
584                    m.insert(rowid, v.clone());
585                }
586                // NULL writes: store the sentinel "Null" string for Text; for other
587                // types we leave storage as-is since those BTreeMaps can't hold NULL today.
588                (Row::Text(m), Value::Null, _) => {
589                    m.insert(rowid, "Null".to_string());
590                }
591                (_, new, dt) => {
592                    return Err(SQLRiteError::General(format!(
593                        "Type mismatch: cannot assign {} to column '{column}' of type {dt}",
594                        new.to_display_string()
595                    )));
596                }
597            }
598        }
599
600        // Maintain the secondary index, if any. NULL values are skipped by
601        // insert per convention.
602        if !matches!(new_val, Value::Null) {
603            if let Some(idx) = self.index_for_column_mut(column) {
604                idx.insert(&new_val, rowid)?;
605            }
606        }
607
608        Ok(())
609    }
610
611    /// Returns an immutable reference of `sql::db::table::Column` if the table contains a
612    /// column with the specified key as a column name.
613    ///
614    #[allow(dead_code)]
615    pub fn get_column(&mut self, column_name: String) -> Result<&Column> {
616        if let Some(column) = self
617            .columns
618            .iter()
619            .filter(|c| c.column_name == column_name)
620            .collect::<Vec<&Column>>()
621            .first()
622        {
623            Ok(column)
624        } else {
625            Err(SQLRiteError::General(String::from("Column not found.")))
626        }
627    }
628
629    /// Validates if columns and values being inserted violate the UNIQUE constraint.
630    /// PRIMARY KEY columns are automatically UNIQUE. Uses the corresponding
631    /// secondary index when one exists (O(log N) lookup); falls back to a
632    /// linear scan for indexable-but-not-indexed situations (e.g. a Real
633    /// UNIQUE column — Real isn't in the auto-indexed set).
634    pub fn validate_unique_constraint(
635        &mut self,
636        cols: &Vec<String>,
637        values: &Vec<String>,
638    ) -> Result<()> {
639        for (idx, name) in cols.iter().enumerate() {
640            let column = self
641                .columns
642                .iter()
643                .find(|c| &c.column_name == name)
644                .ok_or_else(|| SQLRiteError::General(format!("Column '{name}' not found")))?;
645            if !column.is_unique {
646                continue;
647            }
648            let datatype = &column.datatype;
649            let val = &values[idx];
650
651            // Parse the string value into a runtime Value according to the
652            // declared column type. If parsing fails the caller's insert
653            // would also fail with the same error; surface it here so we
654            // don't emit a misleading "unique OK" on bad input.
655            let parsed = match datatype {
656                DataType::Integer => val.parse::<i64>().map(Value::Integer).map_err(|_| {
657                    SQLRiteError::General(format!(
658                        "Type mismatch: expected INTEGER for column '{name}', got '{val}'"
659                    ))
660                })?,
661                DataType::Text => Value::Text(val.clone()),
662                DataType::Real => val.parse::<f64>().map(Value::Real).map_err(|_| {
663                    SQLRiteError::General(format!(
664                        "Type mismatch: expected REAL for column '{name}', got '{val}'"
665                    ))
666                })?,
667                DataType::Bool => val.parse::<bool>().map(Value::Bool).map_err(|_| {
668                    SQLRiteError::General(format!(
669                        "Type mismatch: expected BOOL for column '{name}', got '{val}'"
670                    ))
671                })?,
672                DataType::Vector(declared_dim) => {
673                    let parsed_vec = parse_vector_literal(val).map_err(|e| {
674                        SQLRiteError::General(format!(
675                            "Type mismatch: expected VECTOR({declared_dim}) for column '{name}', {e}"
676                        ))
677                    })?;
678                    if parsed_vec.len() != *declared_dim {
679                        return Err(SQLRiteError::General(format!(
680                            "Vector dimension mismatch for column '{name}': declared {declared_dim}, got {}",
681                            parsed_vec.len()
682                        )));
683                    }
684                    Value::Vector(parsed_vec)
685                }
686                DataType::Json => {
687                    // JSON values stored as Text. UNIQUE on a JSON column
688                    // compares the canonical text representation
689                    // verbatim — `{"a": 1}` and `{"a":1}` are distinct.
690                    // Document this if anyone actually requests UNIQUE
691                    // JSON; for MVP, treat-as-text is fine.
692                    Value::Text(val.clone())
693                }
694                DataType::None | DataType::Invalid => {
695                    return Err(SQLRiteError::Internal(format!(
696                        "column '{name}' has an unsupported datatype"
697                    )));
698                }
699            };
700
701            if let Some(secondary) = self.index_for_column(name) {
702                if secondary.would_violate_unique(&parsed) {
703                    return Err(SQLRiteError::General(format!(
704                        "UNIQUE constraint violated for column '{name}': value '{val}' already exists"
705                    )));
706                }
707            } else {
708                // No secondary index (Real / Bool UNIQUE). Linear scan.
709                for other in self.rowids() {
710                    if self.get_value(name, other).as_ref() == Some(&parsed) {
711                        return Err(SQLRiteError::General(format!(
712                            "UNIQUE constraint violated for column '{name}': value '{val}' already exists"
713                        )));
714                    }
715                }
716            }
717        }
718        Ok(())
719    }
720
721    /// Inserts all VALUES in its approprieta COLUMNS, using the ROWID an embedded INDEX on all ROWS
722    /// Every `Table` keeps track of the `last_rowid` in order to facilitate what the next one would be.
723    /// One limitation of this data structure is that we can only have one write transaction at a time, otherwise
724    /// we could have a race condition on the last_rowid.
725    ///
726    /// Since we are loosely modeling after SQLite, this is also a limitation of SQLite (allowing only one write transcation at a time),
727    /// So we are good. :)
728    ///
729    /// Returns `Err` (leaving the table unchanged) when the user supplies an
730    /// incompatibly-typed value — no more panics on bad input.
731    pub fn insert_row(&mut self, cols: &Vec<String>, values: &Vec<String>) -> Result<()> {
732        let mut next_rowid = self.last_rowid + 1;
733
734        // Auto-assign INTEGER PRIMARY KEY when the user omits it; otherwise
735        // adopt the supplied value as the new rowid.
736        if self.primary_key != "-1" {
737            if !cols.iter().any(|col| col == &self.primary_key) {
738                // Write the auto-assigned PK into row storage, then sync
739                // the secondary index.
740                let val = next_rowid as i32;
741                let wrote_integer = {
742                    let rows_clone = Arc::clone(&self.rows);
743                    let mut row_data = rows_clone.lock().expect("rows mutex poisoned");
744                    let table_col_data = row_data.get_mut(&self.primary_key).ok_or_else(|| {
745                        SQLRiteError::Internal(format!(
746                            "Row storage missing for primary key column '{}'",
747                            self.primary_key
748                        ))
749                    })?;
750                    match table_col_data {
751                        Row::Integer(tree) => {
752                            tree.insert(next_rowid, val);
753                            true
754                        }
755                        _ => false, // non-integer PK: auto-assign is a no-op
756                    }
757                };
758                if wrote_integer {
759                    let pk = self.primary_key.clone();
760                    if let Some(idx) = self.index_for_column_mut(&pk) {
761                        idx.insert(&Value::Integer(val as i64), next_rowid)?;
762                    }
763                }
764            } else {
765                for i in 0..cols.len() {
766                    if cols[i] == self.primary_key {
767                        let val = &values[i];
768                        next_rowid = val.parse::<i64>().map_err(|_| {
769                            SQLRiteError::General(format!(
770                                "Type mismatch: PRIMARY KEY column '{}' expects INTEGER, got '{val}'",
771                                self.primary_key
772                            ))
773                        })?;
774                    }
775                }
776            }
777        }
778
779        // For every table column, either pick the supplied value or pad with NULL
780        // so that every column's BTreeMap keeps the same rowid keyset.
781        let column_names = self
782            .columns
783            .iter()
784            .map(|col| col.column_name.to_string())
785            .collect::<Vec<String>>();
786        let mut j: usize = 0;
787        for i in 0..column_names.len() {
788            let mut val = String::from("Null");
789            let key = &column_names[i];
790
791            if let Some(supplied_key) = cols.get(j) {
792                if supplied_key == &column_names[i] {
793                    val = values[j].to_string();
794                    j += 1;
795                } else if self.primary_key == column_names[i] {
796                    // PK already stored in the auto-assign branch above.
797                    continue;
798                }
799            } else if self.primary_key == column_names[i] {
800                continue;
801            }
802
803            // Step 1: write into row storage and compute the typed Value
804            // we'll hand to the secondary index (if any).
805            let typed_value: Option<Value> = {
806                let rows_clone = Arc::clone(&self.rows);
807                let mut row_data = rows_clone.lock().expect("rows mutex poisoned");
808                let table_col_data = row_data.get_mut(key).ok_or_else(|| {
809                    SQLRiteError::Internal(format!("Row storage missing for column '{key}'"))
810                })?;
811
812                match table_col_data {
813                    Row::Integer(tree) => {
814                        let parsed = val.parse::<i32>().map_err(|_| {
815                            SQLRiteError::General(format!(
816                                "Type mismatch: expected INTEGER for column '{key}', got '{val}'"
817                            ))
818                        })?;
819                        tree.insert(next_rowid, parsed);
820                        Some(Value::Integer(parsed as i64))
821                    }
822                    Row::Text(tree) => {
823                        // Phase 7e — JSON columns also reach here (they
824                        // share Row::Text storage with TEXT columns).
825                        // Validate the value parses as JSON before
826                        // storing; otherwise we'd happily write
827                        // `not-json-at-all` and only fail when
828                        // json_extract tried to parse it later.
829                        if matches!(self.columns[i].datatype, DataType::Json) && val != "Null" {
830                            if let Err(e) = serde_json::from_str::<serde_json::Value>(&val) {
831                                return Err(SQLRiteError::General(format!(
832                                    "Type mismatch: expected JSON for column '{key}', got '{val}': {e}"
833                                )));
834                            }
835                        }
836                        tree.insert(next_rowid, val.to_string());
837                        // "Null" sentinel stays out of the index — it isn't a
838                        // real user value.
839                        if val != "Null" {
840                            Some(Value::Text(val.to_string()))
841                        } else {
842                            None
843                        }
844                    }
845                    Row::Real(tree) => {
846                        let parsed = val.parse::<f32>().map_err(|_| {
847                            SQLRiteError::General(format!(
848                                "Type mismatch: expected REAL for column '{key}', got '{val}'"
849                            ))
850                        })?;
851                        tree.insert(next_rowid, parsed);
852                        Some(Value::Real(parsed as f64))
853                    }
854                    Row::Bool(tree) => {
855                        let parsed = val.parse::<bool>().map_err(|_| {
856                            SQLRiteError::General(format!(
857                                "Type mismatch: expected BOOL for column '{key}', got '{val}'"
858                            ))
859                        })?;
860                        tree.insert(next_rowid, parsed);
861                        Some(Value::Bool(parsed))
862                    }
863                    Row::Vector(tree) => {
864                        // The parser put a bracket-array literal into `val`
865                        // (e.g. "[0.1,0.2,0.3]"). Parse it back here and
866                        // dim-check against the column's declared
867                        // DataType::Vector(N).
868                        let parsed = parse_vector_literal(&val).map_err(|e| {
869                            SQLRiteError::General(format!(
870                                "Type mismatch: expected VECTOR for column '{key}', {e}"
871                            ))
872                        })?;
873                        let declared_dim = match &self.columns[i].datatype {
874                            DataType::Vector(d) => *d,
875                            other => {
876                                return Err(SQLRiteError::Internal(format!(
877                                    "Row::Vector storage on non-Vector column '{key}' (declared as {other})"
878                                )));
879                            }
880                        };
881                        if parsed.len() != declared_dim {
882                            return Err(SQLRiteError::General(format!(
883                                "Vector dimension mismatch for column '{key}': declared {declared_dim}, got {}",
884                                parsed.len()
885                            )));
886                        }
887                        tree.insert(next_rowid, parsed.clone());
888                        Some(Value::Vector(parsed))
889                    }
890                    Row::None => {
891                        return Err(SQLRiteError::Internal(format!(
892                            "Column '{key}' has no row storage"
893                        )));
894                    }
895                }
896            };
897
898            // Step 2: maintain the secondary index (if any). insert() is a
899            // no-op for Value::Null and cheap for other value kinds.
900            if let Some(v) = typed_value.clone() {
901                if let Some(idx) = self.index_for_column_mut(key) {
902                    idx.insert(&v, next_rowid)?;
903                }
904            }
905
906            // Step 3 (Phase 7d.2): maintain any HNSW indexes on this column.
907            // The HNSW algorithm needs access to other rows' vectors when
908            // wiring up neighbor edges, so build a get_vec closure that
909            // pulls from the table's row storage (which we *just* updated
910            // with the new value).
911            if let Some(Value::Vector(new_vec)) = typed_value {
912                self.maintain_hnsw_on_insert(key, next_rowid, &new_vec);
913            }
914        }
915        self.last_rowid = next_rowid;
916        Ok(())
917    }
918
919    /// After a row insert, push the new (rowid, vector) into every HNSW
920    /// index whose column matches `column`. Split out of `insert_row` so
921    /// the borrowing dance — we need both `&self.rows` (read other
922    /// vectors) and `&mut self.hnsw_indexes` (insert into the graph) —
923    /// stays localized.
924    fn maintain_hnsw_on_insert(&mut self, column: &str, rowid: i64, new_vec: &[f32]) {
925        // Snapshot the current vector storage so the get_vec closure
926        // doesn't fight with `&mut self.hnsw_indexes`. For a typical
927        // HNSW insert we touch ef_construction × log(N) other vectors,
928        // so the snapshot cost is small relative to the graph wiring.
929        let mut vec_snapshot: HashMap<i64, Vec<f32>> = HashMap::new();
930        {
931            let row_data = self.rows.lock().expect("rows mutex poisoned");
932            if let Some(Row::Vector(map)) = row_data.get(column) {
933                for (id, v) in map.iter() {
934                    vec_snapshot.insert(*id, v.clone());
935                }
936            }
937        }
938        // The new row was just written into row storage — make sure the
939        // snapshot reflects it (it should, but defensive).
940        vec_snapshot.insert(rowid, new_vec.to_vec());
941
942        for entry in &mut self.hnsw_indexes {
943            if entry.column_name == column {
944                entry.index.insert(rowid, new_vec, |id| {
945                    vec_snapshot.get(&id).cloned().unwrap_or_default()
946                });
947            }
948        }
949    }
950
951    /// Print the table schema to standard output in a pretty formatted way.
952    ///
953    /// # Example
954    ///
955    /// ```text
956    /// let table = Table::new(payload);
957    /// table.print_table_schema();
958    ///
959    /// Prints to standard output:
960    ///    +-------------+-----------+-------------+--------+----------+
961    ///    | Column Name | Data Type | PRIMARY KEY | UNIQUE | NOT NULL |
962    ///    +-------------+-----------+-------------+--------+----------+
963    ///    | id          | Integer   | true        | true   | true     |
964    ///    +-------------+-----------+-------------+--------+----------+
965    ///    | name        | Text      | false       | true   | false    |
966    ///    +-------------+-----------+-------------+--------+----------+
967    ///    | email       | Text      | false       | false  | false    |
968    ///    +-------------+-----------+-------------+--------+----------+
969    /// ```
970    ///
971    pub fn print_table_schema(&self) -> Result<usize> {
972        let mut table = PrintTable::new();
973        table.add_row(row![
974            "Column Name",
975            "Data Type",
976            "PRIMARY KEY",
977            "UNIQUE",
978            "NOT NULL"
979        ]);
980
981        for col in &self.columns {
982            table.add_row(row![
983                col.column_name,
984                col.datatype,
985                col.is_pk,
986                col.is_unique,
987                col.not_null
988            ]);
989        }
990
991        table.printstd();
992        Ok(table.len() * 2 + 1)
993    }
994
995    /// Print the table data to standard output in a pretty formatted way.
996    ///
997    /// # Example
998    ///
999    /// ```text
1000    /// let db_table = db.get_table_mut(table_name.to_string()).unwrap();
1001    /// db_table.print_table_data();
1002    ///
1003    /// Prints to standard output:
1004    ///     +----+---------+------------------------+
1005    ///     | id | name    | email                  |
1006    ///     +----+---------+------------------------+
1007    ///     | 1  | "Jack"  | "jack@mail.com"        |
1008    ///     +----+---------+------------------------+
1009    ///     | 10 | "Bob"   | "bob@main.com"         |
1010    ///     +----+---------+------------------------+
1011    ///     | 11 | "Bill"  | "bill@main.com"        |
1012    ///     +----+---------+------------------------+
1013    /// ```
1014    ///
1015    pub fn print_table_data(&self) {
1016        let mut print_table = PrintTable::new();
1017
1018        let column_names = self
1019            .columns
1020            .iter()
1021            .map(|col| col.column_name.to_string())
1022            .collect::<Vec<String>>();
1023
1024        let header_row = PrintRow::new(
1025            column_names
1026                .iter()
1027                .map(|col| PrintCell::new(col))
1028                .collect::<Vec<PrintCell>>(),
1029        );
1030
1031        let rows_clone = Arc::clone(&self.rows);
1032        let row_data = rows_clone.lock().expect("rows mutex poisoned");
1033        let first_col_data = row_data
1034            .get(&self.columns.first().unwrap().column_name)
1035            .unwrap();
1036        let num_rows = first_col_data.count();
1037        let mut print_table_rows: Vec<PrintRow> = vec![PrintRow::new(vec![]); num_rows];
1038
1039        for col_name in &column_names {
1040            let col_val = row_data
1041                .get(col_name)
1042                .expect("Can't find any rows with the given column");
1043            let columns: Vec<String> = col_val.get_serialized_col_data();
1044
1045            for i in 0..num_rows {
1046                if let Some(cell) = &columns.get(i) {
1047                    print_table_rows[i].add_cell(PrintCell::new(cell));
1048                } else {
1049                    print_table_rows[i].add_cell(PrintCell::new(""));
1050                }
1051            }
1052        }
1053
1054        print_table.add_row(header_row);
1055        for row in print_table_rows {
1056            print_table.add_row(row);
1057        }
1058
1059        print_table.printstd();
1060    }
1061}
1062
1063/// The schema for each SQL column in every table.
1064///
1065/// Per-column index state moved to `Table::secondary_indexes` in Phase 3e —
1066/// a single `Column` describes the declared schema (name, type, constraints)
1067/// and nothing more.
1068#[derive(PartialEq, Debug, Clone)]
1069pub struct Column {
1070    pub column_name: String,
1071    pub datatype: DataType,
1072    pub is_pk: bool,
1073    pub not_null: bool,
1074    pub is_unique: bool,
1075}
1076
1077impl Column {
1078    pub fn new(
1079        name: String,
1080        datatype: String,
1081        is_pk: bool,
1082        not_null: bool,
1083        is_unique: bool,
1084    ) -> Self {
1085        let dt = DataType::new(datatype);
1086        Column {
1087            column_name: name,
1088            datatype: dt,
1089            is_pk,
1090            not_null,
1091            is_unique,
1092        }
1093    }
1094}
1095
1096/// The schema for each SQL row in every table is represented in memory
1097/// by following structure
1098///
1099/// This is an enum representing each of the available types organized in a BTreeMap
1100/// data structure, using the ROWID and key and each corresponding type as value
1101#[derive(PartialEq, Debug, Clone)]
1102pub enum Row {
1103    Integer(BTreeMap<i64, i32>),
1104    Text(BTreeMap<i64, String>),
1105    Real(BTreeMap<i64, f32>),
1106    Bool(BTreeMap<i64, bool>),
1107    /// Phase 7a: dense f32 vector storage. Each `Vec<f32>` should have
1108    /// length matching the column's declared `DataType::Vector(dim)`,
1109    /// enforced at INSERT time. The Row variant doesn't carry the dim —
1110    /// it lives in the column metadata.
1111    Vector(BTreeMap<i64, Vec<f32>>),
1112    None,
1113}
1114
1115impl Row {
1116    fn get_serialized_col_data(&self) -> Vec<String> {
1117        match self {
1118            Row::Integer(cd) => cd.values().map(|v| v.to_string()).collect(),
1119            Row::Real(cd) => cd.values().map(|v| v.to_string()).collect(),
1120            Row::Text(cd) => cd.values().map(|v| v.to_string()).collect(),
1121            Row::Bool(cd) => cd.values().map(|v| v.to_string()).collect(),
1122            Row::Vector(cd) => cd.values().map(format_vector_for_display).collect(),
1123            Row::None => panic!("Found None in columns"),
1124        }
1125    }
1126
1127    fn count(&self) -> usize {
1128        match self {
1129            Row::Integer(cd) => cd.len(),
1130            Row::Real(cd) => cd.len(),
1131            Row::Text(cd) => cd.len(),
1132            Row::Bool(cd) => cd.len(),
1133            Row::Vector(cd) => cd.len(),
1134            Row::None => panic!("Found None in columns"),
1135        }
1136    }
1137
1138    /// Every column's BTreeMap is keyed by ROWID. All columns share the same keyset
1139    /// after an INSERT (missing columns are padded), so any column's keys are a valid
1140    /// iteration of the table's rowids.
1141    pub fn rowids(&self) -> Vec<i64> {
1142        match self {
1143            Row::Integer(m) => m.keys().copied().collect(),
1144            Row::Text(m) => m.keys().copied().collect(),
1145            Row::Real(m) => m.keys().copied().collect(),
1146            Row::Bool(m) => m.keys().copied().collect(),
1147            Row::Vector(m) => m.keys().copied().collect(),
1148            Row::None => vec![],
1149        }
1150    }
1151
1152    pub fn get(&self, rowid: i64) -> Option<Value> {
1153        match self {
1154            Row::Integer(m) => m.get(&rowid).map(|v| Value::Integer(i64::from(*v))),
1155            // INSERT stores the literal string "Null" in Text columns that were omitted
1156            // from the query — re-map that back to a real NULL on read.
1157            Row::Text(m) => m.get(&rowid).map(|v| {
1158                if v == "Null" {
1159                    Value::Null
1160                } else {
1161                    Value::Text(v.clone())
1162                }
1163            }),
1164            Row::Real(m) => m.get(&rowid).map(|v| Value::Real(f64::from(*v))),
1165            Row::Bool(m) => m.get(&rowid).map(|v| Value::Bool(*v)),
1166            Row::Vector(m) => m.get(&rowid).map(|v| Value::Vector(v.clone())),
1167            Row::None => None,
1168        }
1169    }
1170}
1171
1172/// Render a vector for human display. Used by both `Row::get_serialized_col_data`
1173/// (for the REPL's print-table path) and `Value::to_display_string`.
1174///
1175/// Format: `[0.1, 0.2, 0.3]` — JSON-like, decimal-minimal via `{}` Display.
1176/// For high-dimensional vectors (e.g. 384 elements) this produces a long
1177/// line; truncation ellipsis is a future polish (see Phase 7 plan, "What
1178/// this proposal does NOT commit to").
1179fn format_vector_for_display(v: &Vec<f32>) -> String {
1180    let mut s = String::with_capacity(v.len() * 6 + 2);
1181    s.push('[');
1182    for (i, x) in v.iter().enumerate() {
1183        if i > 0 {
1184            s.push_str(", ");
1185        }
1186        // Default f32 Display picks the minimal-roundtrip representation,
1187        // so 0.1f32 prints as "0.1" not "0.10000000149011612". Good enough.
1188        s.push_str(&x.to_string());
1189    }
1190    s.push(']');
1191    s
1192}
1193
1194/// Runtime value produced by query execution. Separate from the on-disk `Row` enum
1195/// so the executor can carry typed values (including NULL) across operators.
1196#[derive(Debug, Clone, PartialEq)]
1197pub enum Value {
1198    Integer(i64),
1199    Text(String),
1200    Real(f64),
1201    Bool(bool),
1202    /// Phase 7a: dense f32 vector as a runtime value. Carries its own
1203    /// dimension implicitly via `Vec::len`; the column it's being
1204    /// assigned to has a declared `DataType::Vector(N)` that's checked
1205    /// at INSERT/UPDATE time.
1206    Vector(Vec<f32>),
1207    Null,
1208}
1209
1210impl Value {
1211    pub fn to_display_string(&self) -> String {
1212        match self {
1213            Value::Integer(v) => v.to_string(),
1214            Value::Text(s) => s.clone(),
1215            Value::Real(f) => f.to_string(),
1216            Value::Bool(b) => b.to_string(),
1217            Value::Vector(v) => format_vector_for_display(v),
1218            Value::Null => String::from("NULL"),
1219        }
1220    }
1221}
1222
1223/// Parse a bracket-array literal like `"[0.1, 0.2, 0.3]"` (or `"[1, 2, 3]"`)
1224/// into a `Vec<f32>`. The parser/insert pipeline stores vector literals as
1225/// strings in `InsertQuery::rows` (a `Vec<Vec<String>>`); this helper is
1226/// the inverse — turn the string back into a typed vector at the boundary
1227/// where we actually need element-typed data.
1228///
1229/// Accepts:
1230/// - `[]` → empty vector (caller's dimension check rejects it for VECTOR(N≥1))
1231/// - `[0.1, 0.2, 0.3]` → standard float syntax
1232/// - `[1, 2, 3]` → integers, coerced to f32 (matches `VALUES (1, 2)` for
1233///   `REAL` columns; we widen ints to floats automatically)
1234/// - whitespace tolerated everywhere (Python/JSON/pgvector convention)
1235///
1236/// Rejects with a descriptive message:
1237/// - missing `[` / `]`
1238/// - non-numeric elements (`['foo', 0.1]`)
1239/// - NaN / Inf literals (we accept them via `f32::from_str` but caller can
1240///   reject if undesired — for now we let them through; HNSW etc. will
1241///   reject NaN at index time)
1242pub fn parse_vector_literal(s: &str) -> Result<Vec<f32>> {
1243    let trimmed = s.trim();
1244    if !trimmed.starts_with('[') || !trimmed.ends_with(']') {
1245        return Err(SQLRiteError::General(format!(
1246            "expected bracket-array literal `[...]`, got `{s}`"
1247        )));
1248    }
1249    let inner = &trimmed[1..trimmed.len() - 1].trim();
1250    if inner.is_empty() {
1251        return Ok(Vec::new());
1252    }
1253    let mut out = Vec::new();
1254    for (i, part) in inner.split(',').enumerate() {
1255        let element = part.trim();
1256        let parsed: f32 = element.parse().map_err(|_| {
1257            SQLRiteError::General(format!("vector element {i} (`{element}`) is not a number"))
1258        })?;
1259        out.push(parsed);
1260    }
1261    Ok(out)
1262}
1263
1264#[cfg(test)]
1265mod tests {
1266    use super::*;
1267    use sqlparser::dialect::SQLiteDialect;
1268    use sqlparser::parser::Parser;
1269
1270    #[test]
1271    fn datatype_display_trait_test() {
1272        let integer = DataType::Integer;
1273        let text = DataType::Text;
1274        let real = DataType::Real;
1275        let boolean = DataType::Bool;
1276        let vector = DataType::Vector(384);
1277        let none = DataType::None;
1278        let invalid = DataType::Invalid;
1279
1280        assert_eq!(format!("{}", integer), "Integer");
1281        assert_eq!(format!("{}", text), "Text");
1282        assert_eq!(format!("{}", real), "Real");
1283        assert_eq!(format!("{}", boolean), "Boolean");
1284        assert_eq!(format!("{}", vector), "Vector(384)");
1285        assert_eq!(format!("{}", none), "None");
1286        assert_eq!(format!("{}", invalid), "Invalid");
1287    }
1288
1289    // -----------------------------------------------------------------
1290    // Phase 7a — VECTOR(N) column type
1291    // -----------------------------------------------------------------
1292
1293    #[test]
1294    fn datatype_new_parses_vector_dim() {
1295        // Standard cases.
1296        assert_eq!(DataType::new("vector(1)".to_string()), DataType::Vector(1));
1297        assert_eq!(
1298            DataType::new("vector(384)".to_string()),
1299            DataType::Vector(384)
1300        );
1301        assert_eq!(
1302            DataType::new("vector(1536)".to_string()),
1303            DataType::Vector(1536)
1304        );
1305
1306        // Case-insensitive on the keyword.
1307        assert_eq!(
1308            DataType::new("VECTOR(384)".to_string()),
1309            DataType::Vector(384)
1310        );
1311
1312        // Whitespace inside parens tolerated (the create-parser strips it
1313        // but the string-based round-trip in DataType::new is the one place
1314        // we don't fully control input formatting).
1315        assert_eq!(
1316            DataType::new("vector( 64 )".to_string()),
1317            DataType::Vector(64)
1318        );
1319    }
1320
1321    #[test]
1322    fn datatype_new_rejects_bad_vector_strings() {
1323        // dim = 0 is rejected (Q2: VECTOR(N≥1)).
1324        assert_eq!(DataType::new("vector(0)".to_string()), DataType::Invalid);
1325        // Non-numeric dim.
1326        assert_eq!(DataType::new("vector(abc)".to_string()), DataType::Invalid);
1327        // Empty parens.
1328        assert_eq!(DataType::new("vector()".to_string()), DataType::Invalid);
1329        // Negative dim wouldn't even parse as usize, so falls into Invalid.
1330        assert_eq!(DataType::new("vector(-3)".to_string()), DataType::Invalid);
1331    }
1332
1333    #[test]
1334    fn datatype_to_wire_string_round_trips_vector() {
1335        let dt = DataType::Vector(384);
1336        let wire = dt.to_wire_string();
1337        assert_eq!(wire, "vector(384)");
1338        // And feeds back through DataType::new losslessly — this is the
1339        // round-trip the ParsedColumn pipeline relies on.
1340        assert_eq!(DataType::new(wire), DataType::Vector(384));
1341    }
1342
1343    #[test]
1344    fn parse_vector_literal_accepts_floats() {
1345        let v = parse_vector_literal("[0.1, 0.2, 0.3]").expect("parse");
1346        assert_eq!(v, vec![0.1f32, 0.2, 0.3]);
1347    }
1348
1349    #[test]
1350    fn parse_vector_literal_accepts_ints_widening_to_f32() {
1351        let v = parse_vector_literal("[1, 2, 3]").expect("parse");
1352        assert_eq!(v, vec![1.0f32, 2.0, 3.0]);
1353    }
1354
1355    #[test]
1356    fn parse_vector_literal_handles_negatives_and_whitespace() {
1357        let v = parse_vector_literal("[ -1.5 ,  2.0,  -3.5 ]").expect("parse");
1358        assert_eq!(v, vec![-1.5f32, 2.0, -3.5]);
1359    }
1360
1361    #[test]
1362    fn parse_vector_literal_empty_brackets_is_empty_vec() {
1363        let v = parse_vector_literal("[]").expect("parse");
1364        assert!(v.is_empty());
1365    }
1366
1367    #[test]
1368    fn parse_vector_literal_rejects_non_bracketed() {
1369        assert!(parse_vector_literal("0.1, 0.2").is_err());
1370        assert!(parse_vector_literal("(0.1, 0.2)").is_err());
1371        assert!(parse_vector_literal("[0.1, 0.2").is_err()); // missing ]
1372        assert!(parse_vector_literal("0.1, 0.2]").is_err()); // missing [
1373    }
1374
1375    #[test]
1376    fn parse_vector_literal_rejects_non_numeric_elements() {
1377        let err = parse_vector_literal("[1.0, 'foo', 3.0]").unwrap_err();
1378        let msg = format!("{err}");
1379        assert!(
1380            msg.contains("vector element 1") && msg.contains("'foo'"),
1381            "error message should pinpoint the bad element: got `{msg}`"
1382        );
1383    }
1384
1385    #[test]
1386    fn value_vector_display_format() {
1387        let v = Value::Vector(vec![0.1, 0.2, 0.3]);
1388        assert_eq!(v.to_display_string(), "[0.1, 0.2, 0.3]");
1389
1390        // Empty vector displays as `[]`.
1391        let empty = Value::Vector(vec![]);
1392        assert_eq!(empty.to_display_string(), "[]");
1393    }
1394
1395    #[test]
1396    fn create_new_table_test() {
1397        let query_statement = "CREATE TABLE contacts (
1398            id INTEGER PRIMARY KEY,
1399            first_name TEXT NOT NULL,
1400            last_name TEXT NOT NULl,
1401            email TEXT NOT NULL UNIQUE,
1402            active BOOL,
1403            score REAL
1404        );";
1405        let dialect = SQLiteDialect {};
1406        let mut ast = Parser::parse_sql(&dialect, query_statement).unwrap();
1407        if ast.len() > 1 {
1408            panic!("Expected a single query statement, but there are more then 1.")
1409        }
1410        let query = ast.pop().unwrap();
1411
1412        let create_query = CreateQuery::new(&query).unwrap();
1413
1414        let table = Table::new(create_query);
1415
1416        assert_eq!(table.columns.len(), 6);
1417        assert_eq!(table.last_rowid, 0);
1418
1419        let id_column = "id".to_string();
1420        if let Some(column) = table
1421            .columns
1422            .iter()
1423            .filter(|c| c.column_name == id_column)
1424            .collect::<Vec<&Column>>()
1425            .first()
1426        {
1427            assert!(column.is_pk);
1428            assert_eq!(column.datatype, DataType::Integer);
1429        } else {
1430            panic!("column not found");
1431        }
1432    }
1433
1434    #[test]
1435    fn print_table_schema_test() {
1436        let query_statement = "CREATE TABLE contacts (
1437            id INTEGER PRIMARY KEY,
1438            first_name TEXT NOT NULL,
1439            last_name TEXT NOT NULl
1440        );";
1441        let dialect = SQLiteDialect {};
1442        let mut ast = Parser::parse_sql(&dialect, query_statement).unwrap();
1443        if ast.len() > 1 {
1444            panic!("Expected a single query statement, but there are more then 1.")
1445        }
1446        let query = ast.pop().unwrap();
1447
1448        let create_query = CreateQuery::new(&query).unwrap();
1449
1450        let table = Table::new(create_query);
1451        let lines_printed = table.print_table_schema();
1452        assert_eq!(lines_printed, Ok(9));
1453    }
1454}