sqlrite/sql/db/table.rs
1use crate::error::{Result, SQLRiteError};
2use crate::sql::db::secondary_index::{IndexOrigin, SecondaryIndex};
3use crate::sql::fts::PostingList;
4use crate::sql::hnsw::HnswIndex;
5use crate::sql::parser::create::{CreateQuery, ParsedColumn};
6use std::collections::{BTreeMap, HashMap};
7use std::fmt;
8use std::sync::{Arc, Mutex};
9
10use prettytable::{Cell as PrintCell, Row as PrintRow, Table as PrintTable};
11
12/// SQLRite data types
13/// Mapped after SQLite Data Type Storage Classes and SQLite Affinity Type
14/// (Datatypes In SQLite Version 3)[https://www.sqlite.org/datatype3.html]
15///
16/// `Vector(dim)` is the Phase 7a addition — a fixed-dimension dense f32
17/// array. The dimension is part of the type so a `VECTOR(384)` column
18/// rejects `[0.1, 0.2, 0.3]` at INSERT time as a clean type error
19/// rather than silently storing the wrong shape.
20#[derive(PartialEq, Debug, Clone)]
21pub enum DataType {
22 Integer,
23 Text,
24 Real,
25 Bool,
26 /// Dense f32 vector of fixed dimension. The `usize` is the column's
27 /// declared dimension; every value stored in the column must have
28 /// exactly that many elements.
29 Vector(usize),
30 /// Phase 7e — JSON column. Stored as canonical UTF-8 text (matches
31 /// SQLite's JSON1 extension), validated at INSERT time. The
32 /// `json_extract` family of functions parses on demand and returns
33 /// either a primitive `Value` (Integer / Real / Text / Bool / Null)
34 /// or a Text value carrying the JSON-encoded sub-object/array.
35 /// Q3 originally specified `bincoded serde_json::Value`, but bincode
36 /// was removed from the engine in Phase 3c — see the scope-correction
37 /// note in `docs/phase-7-plan.md` for the rationale on switching to
38 /// text storage.
39 Json,
40 None,
41 Invalid,
42}
43
44impl DataType {
45 /// Constructs a `DataType` from the wire string the parser produces.
46 /// Pre-Phase-7 the strings were one-of `"integer" | "text" | "real" |
47 /// "bool" | "none"`. Phase 7a adds `"vector(N)"` (case-insensitive,
48 /// N a positive integer) for the new vector column type — encoded
49 /// in-band so we don't have to plumb a richer type through the
50 /// existing string-based ParsedColumn pipeline.
51 pub fn new(cmd: String) -> DataType {
52 let lower = cmd.to_lowercase();
53 match lower.as_str() {
54 "integer" => DataType::Integer,
55 "text" => DataType::Text,
56 "real" => DataType::Real,
57 "bool" => DataType::Bool,
58 "json" => DataType::Json,
59 "none" => DataType::None,
60 other if other.starts_with("vector(") && other.ends_with(')') => {
61 // Strip the `vector(` prefix and trailing `)`, parse what's
62 // left as a positive integer dimension. Anything else is
63 // Invalid — surfaces a clean error at CREATE TABLE time.
64 let inside = &other["vector(".len()..other.len() - 1];
65 match inside.trim().parse::<usize>() {
66 Ok(dim) if dim > 0 => DataType::Vector(dim),
67 _ => {
68 eprintln!("Invalid VECTOR dimension in {cmd}");
69 DataType::Invalid
70 }
71 }
72 }
73 _ => {
74 eprintln!("Invalid data type given {}", cmd);
75 DataType::Invalid
76 }
77 }
78 }
79
80 /// Inverse of `new` — returns the canonical lowercased wire string
81 /// for this DataType. Used by the parser to round-trip
82 /// `VECTOR(N)` → `DataType::Vector(N)` → `"vector(N)"` into
83 /// `ParsedColumn::datatype` so the rest of the pipeline keeps
84 /// working with strings.
85 pub fn to_wire_string(&self) -> String {
86 match self {
87 DataType::Integer => "Integer".to_string(),
88 DataType::Text => "Text".to_string(),
89 DataType::Real => "Real".to_string(),
90 DataType::Bool => "Bool".to_string(),
91 DataType::Vector(dim) => format!("vector({dim})"),
92 DataType::Json => "Json".to_string(),
93 DataType::None => "None".to_string(),
94 DataType::Invalid => "Invalid".to_string(),
95 }
96 }
97}
98
99impl fmt::Display for DataType {
100 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
101 match self {
102 DataType::Integer => f.write_str("Integer"),
103 DataType::Text => f.write_str("Text"),
104 DataType::Real => f.write_str("Real"),
105 DataType::Bool => f.write_str("Boolean"),
106 DataType::Vector(dim) => write!(f, "Vector({dim})"),
107 DataType::Json => f.write_str("Json"),
108 DataType::None => f.write_str("None"),
109 DataType::Invalid => f.write_str("Invalid"),
110 }
111 }
112}
113
114/// The schema for each SQL Table is represented in memory by
115/// following structure.
116///
117/// `rows` is `Arc<Mutex<...>>` rather than `Rc<RefCell<...>>` so `Table`
118/// (and by extension `Database`) is `Send + Sync` — the Tauri desktop
119/// app holds the engine in shared state behind a `Mutex<Database>`, and
120/// Tauri's state container requires its contents to be thread-safe.
121#[derive(Debug)]
122pub struct Table {
123 /// Name of the table
124 pub tb_name: String,
125 /// Schema for each column, in declaration order.
126 pub columns: Vec<Column>,
127 /// Per-column row storage, keyed by column name. Every column's
128 /// `Row::T(BTreeMap)` is keyed by rowid, so all columns share the same
129 /// keyset after each write.
130 pub rows: Arc<Mutex<HashMap<String, Row>>>,
131 /// Secondary indexes on this table (Phase 3e). One auto-created entry
132 /// per UNIQUE or PRIMARY KEY column; explicit `CREATE INDEX` statements
133 /// add more. Looking up an index: iterate by column name, or by index
134 /// name via `Table::index_by_name`.
135 pub secondary_indexes: Vec<SecondaryIndex>,
136 /// HNSW indexes on VECTOR columns (Phase 7d.2). Maintained in lockstep
137 /// with row storage on INSERT (incremental); rebuilt on open from the
138 /// persisted CREATE INDEX SQL. The graph itself is NOT yet persisted —
139 /// see Phase 7d.3 for cell-encoded graph storage.
140 pub hnsw_indexes: Vec<HnswIndexEntry>,
141 /// FTS inverted indexes on TEXT columns (Phase 8b). Maintained in
142 /// lockstep with row storage on INSERT (incremental); DELETE / UPDATE
143 /// flag `needs_rebuild` and the next save rebuilds from current rows.
144 /// The posting lists themselves are NOT yet persisted — Phase 8c
145 /// wires the cell-encoded `KIND_FTS_POSTING` storage.
146 pub fts_indexes: Vec<FtsIndexEntry>,
147 /// ROWID of most recent insert.
148 pub last_rowid: i64,
149 /// PRIMARY KEY column name, or "-1" if the table has no PRIMARY KEY.
150 pub primary_key: String,
151}
152
153/// One HNSW index attached to a table. Phase 7d.2 only supports L2
154/// distance; cosine and dot are 7d.x follow-ups (would require either
155/// distinct USING methods like `hnsw_cosine` or a `WITH (metric = …)`
156/// clause — see `docs/phase-7-plan.md` for the deferred decision).
157#[derive(Debug, Clone)]
158pub struct HnswIndexEntry {
159 /// User-supplied name from `CREATE INDEX <name> …`. Unique across
160 /// both `secondary_indexes` and `hnsw_indexes` on a given table.
161 pub name: String,
162 /// The VECTOR column this index covers.
163 pub column_name: String,
164 /// The graph itself.
165 pub index: HnswIndex,
166 /// Phase 7d.3 — true iff a DELETE or UPDATE-on-vector-col has
167 /// invalidated the graph since the last rebuild. INSERT maintains
168 /// the graph incrementally and leaves this false. The next save
169 /// rebuilds dirty indexes from current rows before serializing.
170 pub needs_rebuild: bool,
171}
172
173/// One FTS index attached to a table (Phase 8b). The inverted index
174/// itself is a [`PostingList`]; metadata (name, column, dirty flag)
175/// lives here. Mirrors [`HnswIndexEntry`] field-for-field so the
176/// rebuild-on-save and DELETE/UPDATE invalidation paths can use one
177/// pattern across both index families.
178#[derive(Debug, Clone)]
179pub struct FtsIndexEntry {
180 /// User-supplied name from `CREATE INDEX <name> … USING fts(<col>)`.
181 /// Unique across `secondary_indexes`, `hnsw_indexes`, and
182 /// `fts_indexes` on a given table.
183 pub name: String,
184 /// The TEXT column this index covers.
185 pub column_name: String,
186 /// The inverted index + per-doc length cache.
187 pub index: PostingList,
188 /// True iff a DELETE or UPDATE-on-text-col has invalidated the
189 /// posting lists since the last rebuild. INSERT maintains the
190 /// index incrementally and leaves this false. The next save
191 /// rebuilds dirty indexes from current rows before serializing
192 /// (mirrors HNSW's Q7 strategy).
193 pub needs_rebuild: bool,
194}
195
196impl Table {
197 pub fn new(create_query: CreateQuery) -> Self {
198 let table_name = create_query.table_name;
199 let mut primary_key: String = String::from("-1");
200 let columns = create_query.columns;
201
202 let mut table_cols: Vec<Column> = vec![];
203 let table_rows: Arc<Mutex<HashMap<String, Row>>> = Arc::new(Mutex::new(HashMap::new()));
204 let mut secondary_indexes: Vec<SecondaryIndex> = Vec::new();
205 for col in &columns {
206 let col_name = &col.name;
207 if col.is_pk {
208 primary_key = col_name.to_string();
209 }
210 table_cols.push(Column::with_default(
211 col_name.to_string(),
212 col.datatype.to_string(),
213 col.is_pk,
214 col.not_null,
215 col.is_unique,
216 col.default.clone(),
217 ));
218
219 let dt = DataType::new(col.datatype.to_string());
220 let row_storage = match &dt {
221 DataType::Integer => Row::Integer(BTreeMap::new()),
222 DataType::Real => Row::Real(BTreeMap::new()),
223 DataType::Text => Row::Text(BTreeMap::new()),
224 DataType::Bool => Row::Bool(BTreeMap::new()),
225 // The dimension is enforced at INSERT time against the
226 // column's declared DataType::Vector(dim). The Row variant
227 // itself doesn't carry the dim — every stored Vec<f32>
228 // already has it via .len().
229 DataType::Vector(_dim) => Row::Vector(BTreeMap::new()),
230 // Phase 7e — JSON columns reuse Text storage (with
231 // INSERT-time validation that the bytes parse as JSON).
232 // No new Row variant; json_extract / json_type / etc.
233 // re-parse from text on demand. See `docs/phase-7-plan.md`
234 // Q3's scope-correction note for the storage choice.
235 DataType::Json => Row::Text(BTreeMap::new()),
236 DataType::Invalid | DataType::None => Row::None,
237 };
238 table_rows
239 .lock()
240 .expect("Table row storage mutex poisoned")
241 .insert(col.name.to_string(), row_storage);
242
243 // Auto-create an index for every UNIQUE / PRIMARY KEY column,
244 // but only for types we know how to index. Real / Bool / Vector
245 // UNIQUE columns fall back to the linear scan path in
246 // validate_unique_constraint — same behavior as before 3e.
247 // (Vector UNIQUE is unusual; the linear-scan path will work
248 // via Value::Vector PartialEq, just at O(N) cost.)
249 if (col.is_pk || col.is_unique) && matches!(dt, DataType::Integer | DataType::Text) {
250 let name = SecondaryIndex::auto_name(&table_name, &col.name);
251 match SecondaryIndex::new(
252 name,
253 table_name.clone(),
254 col.name.clone(),
255 &dt,
256 true,
257 IndexOrigin::Auto,
258 ) {
259 Ok(idx) => secondary_indexes.push(idx),
260 Err(_) => {
261 // Unreachable given the matches! guard above, but
262 // the builder returns Result so we keep the arm.
263 }
264 }
265 }
266 }
267
268 Table {
269 tb_name: table_name,
270 columns: table_cols,
271 rows: table_rows,
272 secondary_indexes,
273 // HNSW indexes only land via explicit CREATE INDEX … USING hnsw
274 // statements (Phase 7d.2); never auto-created at CREATE TABLE
275 // time, because there's no UNIQUE-style constraint that
276 // implies a vector index.
277 hnsw_indexes: Vec::new(),
278 // Same story for FTS indexes — explicit `CREATE INDEX … USING
279 // fts(<col>)` only (Phase 8b).
280 fts_indexes: Vec::new(),
281 last_rowid: 0,
282 primary_key,
283 }
284 }
285
286 /// Deep-clones a `Table` for transaction snapshots (Phase 4f).
287 ///
288 /// The normal `Clone` derive would shallow-clone the `Arc<Mutex<_>>`
289 /// wrapping our row storage, leaving both copies sharing the same
290 /// inner map — mutating the snapshot would corrupt the live table
291 /// and vice versa. Instead we lock, clone the inner `HashMap`, and
292 /// wrap it in a fresh `Arc<Mutex<_>>`. Columns and indexes derive
293 /// `Clone` directly (all their fields are plain data).
294 pub fn deep_clone(&self) -> Self {
295 let cloned_rows: HashMap<String, Row> = {
296 let guard = self.rows.lock().expect("row mutex poisoned");
297 guard.clone()
298 };
299 Table {
300 tb_name: self.tb_name.clone(),
301 columns: self.columns.clone(),
302 rows: Arc::new(Mutex::new(cloned_rows)),
303 secondary_indexes: self.secondary_indexes.clone(),
304 // HnswIndexEntry derives Clone, so the snapshot owns its own
305 // graph copy. Phase 4f's snapshot-rollback semantics require
306 // the snapshot to be fully decoupled from live state.
307 hnsw_indexes: self.hnsw_indexes.clone(),
308 // Same fully-decoupled clone for FTS indexes (Phase 8b).
309 fts_indexes: self.fts_indexes.clone(),
310 last_rowid: self.last_rowid,
311 primary_key: self.primary_key.clone(),
312 }
313 }
314
315 /// Finds an auto- or explicit-index entry for a given column. Returns
316 /// `None` if the column isn't indexed.
317 pub fn index_for_column(&self, column: &str) -> Option<&SecondaryIndex> {
318 self.secondary_indexes
319 .iter()
320 .find(|i| i.column_name == column)
321 }
322
323 fn index_for_column_mut(&mut self, column: &str) -> Option<&mut SecondaryIndex> {
324 self.secondary_indexes
325 .iter_mut()
326 .find(|i| i.column_name == column)
327 }
328
329 /// Finds a secondary index by its own name (e.g., `sqlrite_autoindex_users_email`
330 /// or a user-provided CREATE INDEX name). Used by DROP INDEX and the
331 /// rename helpers below.
332 pub fn index_by_name(&self, name: &str) -> Option<&SecondaryIndex> {
333 self.secondary_indexes.iter().find(|i| i.name == name)
334 }
335
336 /// Renames a column in place. Updates row storage, the `Column`
337 /// metadata, every secondary / HNSW / FTS index whose `column_name`
338 /// matches, the `primary_key` pointer if the renamed column is the
339 /// PK, and any auto-index name that embedded the old column name.
340 ///
341 /// Caller-side validation (table existence, source-column existence
342 /// at the surface level, IF EXISTS) lives in the executor; this
343 /// method enforces the column-level invariants that have to be
344 /// checked under the `Table` borrow anyway.
345 pub fn rename_column(&mut self, old: &str, new: &str) -> Result<()> {
346 if !self.columns.iter().any(|c| c.column_name == old) {
347 return Err(SQLRiteError::General(format!(
348 "column '{old}' does not exist in table '{}'",
349 self.tb_name
350 )));
351 }
352 if old != new && self.columns.iter().any(|c| c.column_name == new) {
353 return Err(SQLRiteError::General(format!(
354 "column '{new}' already exists in table '{}'",
355 self.tb_name
356 )));
357 }
358 if old == new {
359 return Ok(());
360 }
361
362 for col in self.columns.iter_mut() {
363 if col.column_name == old {
364 col.column_name = new.to_string();
365 }
366 }
367
368 // Re-key the per-column row map.
369 {
370 let mut rows = self.rows.lock().expect("rows mutex poisoned");
371 if let Some(storage) = rows.remove(old) {
372 rows.insert(new.to_string(), storage);
373 }
374 }
375
376 if self.primary_key == old {
377 self.primary_key = new.to_string();
378 }
379
380 let table_name = self.tb_name.clone();
381 for idx in self.secondary_indexes.iter_mut() {
382 if idx.column_name == old {
383 idx.column_name = new.to_string();
384 if idx.origin == IndexOrigin::Auto
385 && idx.name == SecondaryIndex::auto_name(&table_name, old)
386 {
387 idx.name = SecondaryIndex::auto_name(&table_name, new);
388 }
389 }
390 }
391 for entry in self.hnsw_indexes.iter_mut() {
392 if entry.column_name == old {
393 entry.column_name = new.to_string();
394 }
395 }
396 for entry in self.fts_indexes.iter_mut() {
397 if entry.column_name == old {
398 entry.column_name = new.to_string();
399 }
400 }
401
402 Ok(())
403 }
404
405 /// Appends a new column to this table from a parsed column spec.
406 /// The new column's row storage is allocated empty; existing rowids
407 /// read NULL for the new column unless `parsed.default` is set, in
408 /// which case those rowids are backfilled with the default value.
409 ///
410 /// Rejects PK / UNIQUE on the added column (would require
411 /// backfill-with-uniqueness-check against existing rows). Rejects
412 /// NOT NULL without DEFAULT on a non-empty table — same rule SQLite
413 /// applies, and necessary because we have no other backfill source.
414 pub fn add_column(&mut self, parsed: ParsedColumn) -> Result<()> {
415 if self.contains_column(parsed.name.clone()) {
416 return Err(SQLRiteError::General(format!(
417 "column '{}' already exists in table '{}'",
418 parsed.name, self.tb_name
419 )));
420 }
421 if parsed.is_pk {
422 return Err(SQLRiteError::General(
423 "cannot ADD COLUMN with PRIMARY KEY constraint on existing table".to_string(),
424 ));
425 }
426 if parsed.is_unique {
427 return Err(SQLRiteError::General(
428 "cannot ADD COLUMN with UNIQUE constraint on existing table".to_string(),
429 ));
430 }
431 let table_has_rows = self
432 .columns
433 .first()
434 .map(|c| {
435 self.rows
436 .lock()
437 .expect("rows mutex poisoned")
438 .get(&c.column_name)
439 .map(|r| r.rowids().len())
440 .unwrap_or(0)
441 > 0
442 })
443 .unwrap_or(false);
444 if parsed.not_null && parsed.default.is_none() && table_has_rows {
445 return Err(SQLRiteError::General(format!(
446 "cannot ADD COLUMN '{}' NOT NULL without DEFAULT to a non-empty table",
447 parsed.name
448 )));
449 }
450
451 let new_column = Column::with_default(
452 parsed.name.clone(),
453 parsed.datatype.clone(),
454 parsed.is_pk,
455 parsed.not_null,
456 parsed.is_unique,
457 parsed.default.clone(),
458 );
459
460 // Allocate empty row storage for the new column. Mirrors the
461 // dispatch in `Table::new` so the new column behaves identically
462 // to one declared at CREATE TABLE time.
463 let row_storage = match &new_column.datatype {
464 DataType::Integer => Row::Integer(BTreeMap::new()),
465 DataType::Real => Row::Real(BTreeMap::new()),
466 DataType::Text => Row::Text(BTreeMap::new()),
467 DataType::Bool => Row::Bool(BTreeMap::new()),
468 DataType::Vector(_dim) => Row::Vector(BTreeMap::new()),
469 DataType::Json => Row::Text(BTreeMap::new()),
470 DataType::Invalid | DataType::None => Row::None,
471 };
472 {
473 let mut rows = self.rows.lock().expect("rows mutex poisoned");
474 rows.insert(parsed.name.clone(), row_storage);
475 }
476
477 // Backfill existing rowids with the default value, if any.
478 // NULL defaults are a no-op — a missing key in the BTreeMap reads
479 // as NULL anyway. Type mismatches were caught at `parse_one_column`
480 // time when the DEFAULT was evaluated against the declared
481 // datatype; reaching the `_` arm here would indicate a bug.
482 if let Some(default) = &parsed.default {
483 let existing_rowids = self.rowids();
484 let mut rows = self.rows.lock().expect("rows mutex poisoned");
485 let storage = rows.get_mut(&parsed.name).expect("just inserted");
486 match (storage, default) {
487 (Row::Integer(tree), Value::Integer(v)) => {
488 let v32 = *v as i32;
489 for rowid in existing_rowids {
490 tree.insert(rowid, v32);
491 }
492 }
493 (Row::Real(tree), Value::Real(v)) => {
494 let v32 = *v as f32;
495 for rowid in existing_rowids {
496 tree.insert(rowid, v32);
497 }
498 }
499 (Row::Text(tree), Value::Text(v)) => {
500 for rowid in existing_rowids {
501 tree.insert(rowid, v.clone());
502 }
503 }
504 (Row::Bool(tree), Value::Bool(v)) => {
505 for rowid in existing_rowids {
506 tree.insert(rowid, *v);
507 }
508 }
509 (_, Value::Null) => {} // no-op
510 (storage_ref, _) => {
511 return Err(SQLRiteError::Internal(format!(
512 "DEFAULT type does not match column storage for '{}': storage variant {:?}, default {:?}",
513 parsed.name,
514 std::mem::discriminant(storage_ref),
515 default
516 )));
517 }
518 }
519 }
520
521 self.columns.push(new_column);
522 Ok(())
523 }
524
525 /// Removes a column from this table. Refuses to drop the PRIMARY KEY
526 /// column or the only remaining column. Cascades to every index
527 /// (auto, explicit, HNSW, FTS) that referenced the column.
528 pub fn drop_column(&mut self, name: &str) -> Result<()> {
529 if !self.contains_column(name.to_string()) {
530 return Err(SQLRiteError::General(format!(
531 "column '{name}' does not exist in table '{}'",
532 self.tb_name
533 )));
534 }
535 if self.primary_key == name {
536 return Err(SQLRiteError::General(format!(
537 "cannot drop primary key column '{name}'"
538 )));
539 }
540 if self.columns.len() == 1 {
541 return Err(SQLRiteError::General(format!(
542 "cannot drop the only column of table '{}'",
543 self.tb_name
544 )));
545 }
546
547 self.columns.retain(|c| c.column_name != name);
548 {
549 let mut rows = self.rows.lock().expect("rows mutex poisoned");
550 rows.remove(name);
551 }
552 self.secondary_indexes.retain(|i| i.column_name != name);
553 self.hnsw_indexes.retain(|i| i.column_name != name);
554 self.fts_indexes.retain(|i| i.column_name != name);
555
556 Ok(())
557 }
558
559 /// Returns a `bool` informing if a `Column` with a specific name exists or not
560 ///
561 pub fn contains_column(&self, column: String) -> bool {
562 self.columns.iter().any(|col| col.column_name == column)
563 }
564
565 /// Returns the list of column names in declaration order.
566 pub fn column_names(&self) -> Vec<String> {
567 self.columns.iter().map(|c| c.column_name.clone()).collect()
568 }
569
570 /// Returns all rowids currently stored in the table, in ascending order.
571 /// Every column's BTreeMap has the same keyset, so we just read from the first column.
572 pub fn rowids(&self) -> Vec<i64> {
573 let Some(first) = self.columns.first() else {
574 return vec![];
575 };
576 let rows = self.rows.lock().expect("rows mutex poisoned");
577 rows.get(&first.column_name)
578 .map(|r| r.rowids())
579 .unwrap_or_default()
580 }
581
582 /// Reads a single cell at `(column, rowid)`.
583 pub fn get_value(&self, column: &str, rowid: i64) -> Option<Value> {
584 let rows = self.rows.lock().expect("rows mutex poisoned");
585 rows.get(column).and_then(|r| r.get(rowid))
586 }
587
588 /// Removes the row identified by `rowid` from every column's storage and
589 /// from every secondary index entry.
590 pub fn delete_row(&mut self, rowid: i64) {
591 // Snapshot the values we're about to delete so we can strip them
592 // from secondary indexes by (value, rowid) before the row storage
593 // disappears.
594 let per_column_values: Vec<(String, Option<Value>)> = self
595 .columns
596 .iter()
597 .map(|c| (c.column_name.clone(), self.get_value(&c.column_name, rowid)))
598 .collect();
599
600 // Remove from row storage.
601 {
602 let rows_clone = Arc::clone(&self.rows);
603 let mut row_data = rows_clone.lock().expect("rows mutex poisoned");
604 for col in &self.columns {
605 if let Some(r) = row_data.get_mut(&col.column_name) {
606 match r {
607 Row::Integer(m) => {
608 m.remove(&rowid);
609 }
610 Row::Text(m) => {
611 m.remove(&rowid);
612 }
613 Row::Real(m) => {
614 m.remove(&rowid);
615 }
616 Row::Bool(m) => {
617 m.remove(&rowid);
618 }
619 Row::Vector(m) => {
620 m.remove(&rowid);
621 }
622 Row::None => {}
623 }
624 }
625 }
626 }
627
628 // Strip secondary-index entries. Non-indexed columns just don't
629 // show up in secondary_indexes and are no-ops here.
630 for (col_name, value) in per_column_values {
631 if let Some(idx) = self.index_for_column_mut(&col_name) {
632 if let Some(v) = value {
633 idx.remove(&v, rowid);
634 }
635 }
636 }
637 }
638
639 /// Replays a single row at `rowid` when loading a table from disk. Takes
640 /// one typed value per column (in declaration order); `None` means the
641 /// stored cell carried a NULL for that column. Unlike `insert_row` this
642 /// trusts the on-disk state and does *not* re-check UNIQUE — we're
643 /// rebuilding a state that was already consistent when it was saved.
644 pub fn restore_row(&mut self, rowid: i64, values: Vec<Option<Value>>) -> Result<()> {
645 if values.len() != self.columns.len() {
646 return Err(SQLRiteError::Internal(format!(
647 "cell has {} values but table '{}' has {} columns",
648 values.len(),
649 self.tb_name,
650 self.columns.len()
651 )));
652 }
653
654 let column_names: Vec<String> =
655 self.columns.iter().map(|c| c.column_name.clone()).collect();
656
657 for (i, value) in values.into_iter().enumerate() {
658 let col_name = &column_names[i];
659
660 // Write into the per-column row storage first (scoped borrow so
661 // the secondary-index update below doesn't fight over `self`).
662 {
663 let rows_clone = Arc::clone(&self.rows);
664 let mut row_data = rows_clone.lock().expect("rows mutex poisoned");
665 let cell = row_data.get_mut(col_name).ok_or_else(|| {
666 SQLRiteError::Internal(format!("Row storage missing for column '{col_name}'"))
667 })?;
668
669 match (cell, &value) {
670 (Row::Integer(map), Some(Value::Integer(v))) => {
671 map.insert(rowid, *v as i32);
672 }
673 (Row::Integer(_), None) => {
674 return Err(SQLRiteError::Internal(format!(
675 "Integer column '{col_name}' cannot store NULL — corrupt cell?"
676 )));
677 }
678 (Row::Text(map), Some(Value::Text(s))) => {
679 map.insert(rowid, s.clone());
680 }
681 (Row::Text(map), None) => {
682 // Matches the on-insert convention: NULL in Text
683 // storage is represented by the literal "Null"
684 // sentinel and not added to the index.
685 map.insert(rowid, "Null".to_string());
686 }
687 (Row::Real(map), Some(Value::Real(v))) => {
688 map.insert(rowid, *v as f32);
689 }
690 (Row::Real(_), None) => {
691 return Err(SQLRiteError::Internal(format!(
692 "Real column '{col_name}' cannot store NULL — corrupt cell?"
693 )));
694 }
695 (Row::Bool(map), Some(Value::Bool(v))) => {
696 map.insert(rowid, *v);
697 }
698 (Row::Bool(_), None) => {
699 return Err(SQLRiteError::Internal(format!(
700 "Bool column '{col_name}' cannot store NULL — corrupt cell?"
701 )));
702 }
703 (Row::Vector(map), Some(Value::Vector(v))) => {
704 map.insert(rowid, v.clone());
705 }
706 (Row::Vector(_), None) => {
707 return Err(SQLRiteError::Internal(format!(
708 "Vector column '{col_name}' cannot store NULL — corrupt cell?"
709 )));
710 }
711 (row, v) => {
712 return Err(SQLRiteError::Internal(format!(
713 "Type mismatch restoring column '{col_name}': storage {row:?} vs value {v:?}"
714 )));
715 }
716 }
717 }
718
719 // Maintain the secondary index (if any). NULL values are skipped
720 // by `insert`, matching the "NULL is not indexed" convention.
721 if let Some(v) = &value {
722 if let Some(idx) = self.index_for_column_mut(col_name) {
723 idx.insert(v, rowid)?;
724 }
725 }
726 }
727
728 if rowid > self.last_rowid {
729 self.last_rowid = rowid;
730 }
731 Ok(())
732 }
733
734 /// Extracts a row as an ordered `Vec<Option<Value>>` matching the column
735 /// declaration order. Returns `None` entries for columns that hold NULL.
736 /// Used by `save_database` to turn a table's in-memory state into cells.
737 pub fn extract_row(&self, rowid: i64) -> Vec<Option<Value>> {
738 self.columns
739 .iter()
740 .map(|c| match self.get_value(&c.column_name, rowid) {
741 Some(Value::Null) => None,
742 Some(v) => Some(v),
743 None => None,
744 })
745 .collect()
746 }
747
748 /// Overwrites the cell at `(column, rowid)` with `new_val`. Enforces the
749 /// column's datatype and UNIQUE constraint, and updates any secondary
750 /// index.
751 ///
752 /// Returns `Err` if the column doesn't exist, the value type is incompatible,
753 /// or writing would violate UNIQUE.
754 pub fn set_value(&mut self, column: &str, rowid: i64, new_val: Value) -> Result<()> {
755 let col_index = self
756 .columns
757 .iter()
758 .position(|c| c.column_name == column)
759 .ok_or_else(|| SQLRiteError::General(format!("Column '{column}' not found")))?;
760
761 // No-op write — keep storage exactly the same.
762 let current = self.get_value(column, rowid);
763 if current.as_ref() == Some(&new_val) {
764 return Ok(());
765 }
766
767 // Enforce UNIQUE. Prefer an O(log N) index probe if we have one;
768 // fall back to a full column scan otherwise (Real/Bool UNIQUE
769 // columns, which don't get auto-indexed).
770 if self.columns[col_index].is_unique && !matches!(new_val, Value::Null) {
771 if let Some(idx) = self.index_for_column(column) {
772 for other in idx.lookup(&new_val) {
773 if other != rowid {
774 return Err(SQLRiteError::General(format!(
775 "UNIQUE constraint violated for column '{column}'"
776 )));
777 }
778 }
779 } else {
780 for other in self.rowids() {
781 if other == rowid {
782 continue;
783 }
784 if self.get_value(column, other).as_ref() == Some(&new_val) {
785 return Err(SQLRiteError::General(format!(
786 "UNIQUE constraint violated for column '{column}'"
787 )));
788 }
789 }
790 }
791 }
792
793 // Drop the old index entry before writing the new value, so the
794 // post-write index insert doesn't clash with the previous state.
795 if let Some(old) = current {
796 if let Some(idx) = self.index_for_column_mut(column) {
797 idx.remove(&old, rowid);
798 }
799 }
800
801 // Write into the column's Row, type-checking against the declared DataType.
802 let declared = &self.columns[col_index].datatype;
803 {
804 let rows_clone = Arc::clone(&self.rows);
805 let mut row_data = rows_clone.lock().expect("rows mutex poisoned");
806 let cell = row_data.get_mut(column).ok_or_else(|| {
807 SQLRiteError::Internal(format!("Row storage missing for column '{column}'"))
808 })?;
809
810 match (cell, &new_val, declared) {
811 (Row::Integer(m), Value::Integer(v), _) => {
812 m.insert(rowid, *v as i32);
813 }
814 (Row::Real(m), Value::Real(v), _) => {
815 m.insert(rowid, *v as f32);
816 }
817 (Row::Real(m), Value::Integer(v), _) => {
818 m.insert(rowid, *v as f32);
819 }
820 (Row::Text(m), Value::Text(v), dt) => {
821 // Phase 7e — UPDATE on a JSON column also validates
822 // the new text is well-formed JSON, mirroring INSERT.
823 if matches!(dt, DataType::Json) {
824 if let Err(e) = serde_json::from_str::<serde_json::Value>(v) {
825 return Err(SQLRiteError::General(format!(
826 "Type mismatch: expected JSON for column '{column}', got '{v}': {e}"
827 )));
828 }
829 }
830 m.insert(rowid, v.clone());
831 }
832 (Row::Bool(m), Value::Bool(v), _) => {
833 m.insert(rowid, *v);
834 }
835 (Row::Vector(m), Value::Vector(v), DataType::Vector(declared_dim)) => {
836 if v.len() != *declared_dim {
837 return Err(SQLRiteError::General(format!(
838 "Vector dimension mismatch for column '{column}': declared {declared_dim}, got {}",
839 v.len()
840 )));
841 }
842 m.insert(rowid, v.clone());
843 }
844 // NULL writes: store the sentinel "Null" string for Text; for other
845 // types we leave storage as-is since those BTreeMaps can't hold NULL today.
846 (Row::Text(m), Value::Null, _) => {
847 m.insert(rowid, "Null".to_string());
848 }
849 (_, new, dt) => {
850 return Err(SQLRiteError::General(format!(
851 "Type mismatch: cannot assign {} to column '{column}' of type {dt}",
852 new.to_display_string()
853 )));
854 }
855 }
856 }
857
858 // Maintain the secondary index, if any. NULL values are skipped by
859 // insert per convention.
860 if !matches!(new_val, Value::Null) {
861 if let Some(idx) = self.index_for_column_mut(column) {
862 idx.insert(&new_val, rowid)?;
863 }
864 }
865
866 Ok(())
867 }
868
869 /// Returns an immutable reference of `sql::db::table::Column` if the table contains a
870 /// column with the specified key as a column name.
871 ///
872 #[allow(dead_code)]
873 pub fn get_column(&mut self, column_name: String) -> Result<&Column> {
874 if let Some(column) = self
875 .columns
876 .iter()
877 .filter(|c| c.column_name == column_name)
878 .collect::<Vec<&Column>>()
879 .first()
880 {
881 Ok(column)
882 } else {
883 Err(SQLRiteError::General(String::from("Column not found.")))
884 }
885 }
886
887 /// Validates if columns and values being inserted violate the UNIQUE constraint.
888 /// PRIMARY KEY columns are automatically UNIQUE. Uses the corresponding
889 /// secondary index when one exists (O(log N) lookup); falls back to a
890 /// linear scan for indexable-but-not-indexed situations (e.g. a Real
891 /// UNIQUE column — Real isn't in the auto-indexed set).
892 pub fn validate_unique_constraint(
893 &mut self,
894 cols: &Vec<String>,
895 values: &Vec<String>,
896 ) -> Result<()> {
897 for (idx, name) in cols.iter().enumerate() {
898 let column = self
899 .columns
900 .iter()
901 .find(|c| &c.column_name == name)
902 .ok_or_else(|| SQLRiteError::General(format!("Column '{name}' not found")))?;
903 if !column.is_unique {
904 continue;
905 }
906 let datatype = &column.datatype;
907 let val = &values[idx];
908
909 // Parse the string value into a runtime Value according to the
910 // declared column type. If parsing fails the caller's insert
911 // would also fail with the same error; surface it here so we
912 // don't emit a misleading "unique OK" on bad input.
913 let parsed = match datatype {
914 DataType::Integer => val.parse::<i64>().map(Value::Integer).map_err(|_| {
915 SQLRiteError::General(format!(
916 "Type mismatch: expected INTEGER for column '{name}', got '{val}'"
917 ))
918 })?,
919 DataType::Text => Value::Text(val.clone()),
920 DataType::Real => val.parse::<f64>().map(Value::Real).map_err(|_| {
921 SQLRiteError::General(format!(
922 "Type mismatch: expected REAL for column '{name}', got '{val}'"
923 ))
924 })?,
925 DataType::Bool => val.parse::<bool>().map(Value::Bool).map_err(|_| {
926 SQLRiteError::General(format!(
927 "Type mismatch: expected BOOL for column '{name}', got '{val}'"
928 ))
929 })?,
930 DataType::Vector(declared_dim) => {
931 let parsed_vec = parse_vector_literal(val).map_err(|e| {
932 SQLRiteError::General(format!(
933 "Type mismatch: expected VECTOR({declared_dim}) for column '{name}', {e}"
934 ))
935 })?;
936 if parsed_vec.len() != *declared_dim {
937 return Err(SQLRiteError::General(format!(
938 "Vector dimension mismatch for column '{name}': declared {declared_dim}, got {}",
939 parsed_vec.len()
940 )));
941 }
942 Value::Vector(parsed_vec)
943 }
944 DataType::Json => {
945 // JSON values stored as Text. UNIQUE on a JSON column
946 // compares the canonical text representation
947 // verbatim — `{"a": 1}` and `{"a":1}` are distinct.
948 // Document this if anyone actually requests UNIQUE
949 // JSON; for MVP, treat-as-text is fine.
950 Value::Text(val.clone())
951 }
952 DataType::None | DataType::Invalid => {
953 return Err(SQLRiteError::Internal(format!(
954 "column '{name}' has an unsupported datatype"
955 )));
956 }
957 };
958
959 if let Some(secondary) = self.index_for_column(name) {
960 if secondary.would_violate_unique(&parsed) {
961 return Err(SQLRiteError::General(format!(
962 "UNIQUE constraint violated for column '{name}': value '{val}' already exists"
963 )));
964 }
965 } else {
966 // No secondary index (Real / Bool UNIQUE). Linear scan.
967 for other in self.rowids() {
968 if self.get_value(name, other).as_ref() == Some(&parsed) {
969 return Err(SQLRiteError::General(format!(
970 "UNIQUE constraint violated for column '{name}': value '{val}' already exists"
971 )));
972 }
973 }
974 }
975 }
976 Ok(())
977 }
978
979 /// Inserts all VALUES in its approprieta COLUMNS, using the ROWID an embedded INDEX on all ROWS
980 /// Every `Table` keeps track of the `last_rowid` in order to facilitate what the next one would be.
981 /// One limitation of this data structure is that we can only have one write transaction at a time, otherwise
982 /// we could have a race condition on the last_rowid.
983 ///
984 /// Since we are loosely modeling after SQLite, this is also a limitation of SQLite (allowing only one write transcation at a time),
985 /// So we are good. :)
986 ///
987 /// Returns `Err` (leaving the table unchanged) when the user supplies an
988 /// incompatibly-typed value — no more panics on bad input.
989 pub fn insert_row(&mut self, cols: &Vec<String>, values: &Vec<String>) -> Result<()> {
990 let mut next_rowid = self.last_rowid + 1;
991
992 // Auto-assign INTEGER PRIMARY KEY when the user omits it; otherwise
993 // adopt the supplied value as the new rowid.
994 if self.primary_key != "-1" {
995 if !cols.iter().any(|col| col == &self.primary_key) {
996 // Write the auto-assigned PK into row storage, then sync
997 // the secondary index.
998 let val = next_rowid as i32;
999 let wrote_integer = {
1000 let rows_clone = Arc::clone(&self.rows);
1001 let mut row_data = rows_clone.lock().expect("rows mutex poisoned");
1002 let table_col_data = row_data.get_mut(&self.primary_key).ok_or_else(|| {
1003 SQLRiteError::Internal(format!(
1004 "Row storage missing for primary key column '{}'",
1005 self.primary_key
1006 ))
1007 })?;
1008 match table_col_data {
1009 Row::Integer(tree) => {
1010 tree.insert(next_rowid, val);
1011 true
1012 }
1013 _ => false, // non-integer PK: auto-assign is a no-op
1014 }
1015 };
1016 if wrote_integer {
1017 let pk = self.primary_key.clone();
1018 if let Some(idx) = self.index_for_column_mut(&pk) {
1019 idx.insert(&Value::Integer(val as i64), next_rowid)?;
1020 }
1021 }
1022 } else {
1023 for i in 0..cols.len() {
1024 if cols[i] == self.primary_key {
1025 let val = &values[i];
1026 next_rowid = val.parse::<i64>().map_err(|_| {
1027 SQLRiteError::General(format!(
1028 "Type mismatch: PRIMARY KEY column '{}' expects INTEGER, got '{val}'",
1029 self.primary_key
1030 ))
1031 })?;
1032 }
1033 }
1034 }
1035 }
1036
1037 // For every table column, either pick the supplied value or pad with NULL
1038 // so that every column's BTreeMap keeps the same rowid keyset.
1039 let column_names = self
1040 .columns
1041 .iter()
1042 .map(|col| col.column_name.to_string())
1043 .collect::<Vec<String>>();
1044 let mut j: usize = 0;
1045 for i in 0..column_names.len() {
1046 let mut val = String::from("Null");
1047 let key = &column_names[i];
1048 let mut column_supplied = false;
1049
1050 if let Some(supplied_key) = cols.get(j) {
1051 if supplied_key == &column_names[i] {
1052 val = values[j].to_string();
1053 column_supplied = true;
1054 j += 1;
1055 } else if self.primary_key == column_names[i] {
1056 // PK already stored in the auto-assign branch above.
1057 continue;
1058 }
1059 } else if self.primary_key == column_names[i] {
1060 continue;
1061 }
1062
1063 // Column was omitted from the INSERT column list. Substitute its
1064 // DEFAULT literal if one was declared at CREATE TABLE time;
1065 // otherwise it stays as the "Null" sentinel set above. SQLite
1066 // semantics: an *explicit* NULL is preserved as NULL — the
1067 // default only fires for omitted columns.
1068 if !column_supplied {
1069 if let Some(default) = &self.columns[i].default {
1070 val = default.to_default_insert_string();
1071 }
1072 }
1073
1074 // Step 1: write into row storage and compute the typed Value
1075 // we'll hand to the secondary index (if any).
1076 let typed_value: Option<Value> = {
1077 let rows_clone = Arc::clone(&self.rows);
1078 let mut row_data = rows_clone.lock().expect("rows mutex poisoned");
1079 let table_col_data = row_data.get_mut(key).ok_or_else(|| {
1080 SQLRiteError::Internal(format!("Row storage missing for column '{key}'"))
1081 })?;
1082
1083 match table_col_data {
1084 Row::Integer(tree) => {
1085 let parsed = val.parse::<i32>().map_err(|_| {
1086 SQLRiteError::General(format!(
1087 "Type mismatch: expected INTEGER for column '{key}', got '{val}'"
1088 ))
1089 })?;
1090 tree.insert(next_rowid, parsed);
1091 Some(Value::Integer(parsed as i64))
1092 }
1093 Row::Text(tree) => {
1094 // Phase 7e — JSON columns also reach here (they
1095 // share Row::Text storage with TEXT columns).
1096 // Validate the value parses as JSON before
1097 // storing; otherwise we'd happily write
1098 // `not-json-at-all` and only fail when
1099 // json_extract tried to parse it later.
1100 if matches!(self.columns[i].datatype, DataType::Json) && val != "Null" {
1101 if let Err(e) = serde_json::from_str::<serde_json::Value>(&val) {
1102 return Err(SQLRiteError::General(format!(
1103 "Type mismatch: expected JSON for column '{key}', got '{val}': {e}"
1104 )));
1105 }
1106 }
1107 tree.insert(next_rowid, val.to_string());
1108 // "Null" sentinel stays out of the index — it isn't a
1109 // real user value.
1110 if val != "Null" {
1111 Some(Value::Text(val.to_string()))
1112 } else {
1113 None
1114 }
1115 }
1116 Row::Real(tree) => {
1117 let parsed = val.parse::<f32>().map_err(|_| {
1118 SQLRiteError::General(format!(
1119 "Type mismatch: expected REAL for column '{key}', got '{val}'"
1120 ))
1121 })?;
1122 tree.insert(next_rowid, parsed);
1123 Some(Value::Real(parsed as f64))
1124 }
1125 Row::Bool(tree) => {
1126 let parsed = val.parse::<bool>().map_err(|_| {
1127 SQLRiteError::General(format!(
1128 "Type mismatch: expected BOOL for column '{key}', got '{val}'"
1129 ))
1130 })?;
1131 tree.insert(next_rowid, parsed);
1132 Some(Value::Bool(parsed))
1133 }
1134 Row::Vector(tree) => {
1135 // The parser put a bracket-array literal into `val`
1136 // (e.g. "[0.1,0.2,0.3]"). Parse it back here and
1137 // dim-check against the column's declared
1138 // DataType::Vector(N).
1139 let parsed = parse_vector_literal(&val).map_err(|e| {
1140 SQLRiteError::General(format!(
1141 "Type mismatch: expected VECTOR for column '{key}', {e}"
1142 ))
1143 })?;
1144 let declared_dim = match &self.columns[i].datatype {
1145 DataType::Vector(d) => *d,
1146 other => {
1147 return Err(SQLRiteError::Internal(format!(
1148 "Row::Vector storage on non-Vector column '{key}' (declared as {other})"
1149 )));
1150 }
1151 };
1152 if parsed.len() != declared_dim {
1153 return Err(SQLRiteError::General(format!(
1154 "Vector dimension mismatch for column '{key}': declared {declared_dim}, got {}",
1155 parsed.len()
1156 )));
1157 }
1158 tree.insert(next_rowid, parsed.clone());
1159 Some(Value::Vector(parsed))
1160 }
1161 Row::None => {
1162 return Err(SQLRiteError::Internal(format!(
1163 "Column '{key}' has no row storage"
1164 )));
1165 }
1166 }
1167 };
1168
1169 // Step 2: maintain the secondary index (if any). insert() is a
1170 // no-op for Value::Null and cheap for other value kinds.
1171 if let Some(v) = typed_value.clone() {
1172 if let Some(idx) = self.index_for_column_mut(key) {
1173 idx.insert(&v, next_rowid)?;
1174 }
1175 }
1176
1177 // Step 3 (Phase 7d.2): maintain any HNSW indexes on this column.
1178 // The HNSW algorithm needs access to other rows' vectors when
1179 // wiring up neighbor edges, so build a get_vec closure that
1180 // pulls from the table's row storage (which we *just* updated
1181 // with the new value).
1182 if let Some(Value::Vector(new_vec)) = &typed_value {
1183 self.maintain_hnsw_on_insert(key, next_rowid, new_vec);
1184 }
1185
1186 // Step 4 (Phase 8b): maintain any FTS indexes on this column.
1187 // Cheap incremental update — PostingList::insert tokenizes
1188 // the value and adds postings under the new rowid. DELETE
1189 // and UPDATE take the rebuild-on-save path instead (Q7).
1190 if let Some(Value::Text(text)) = &typed_value {
1191 self.maintain_fts_on_insert(key, next_rowid, text);
1192 }
1193 }
1194 self.last_rowid = next_rowid;
1195 Ok(())
1196 }
1197
1198 /// After a row insert, push the new (rowid, vector) into every HNSW
1199 /// index whose column matches `column`. Split out of `insert_row` so
1200 /// the borrowing dance — we need both `&self.rows` (read other
1201 /// vectors) and `&mut self.hnsw_indexes` (insert into the graph) —
1202 /// stays localized.
1203 fn maintain_hnsw_on_insert(&mut self, column: &str, rowid: i64, new_vec: &[f32]) {
1204 // Snapshot the current vector storage so the get_vec closure
1205 // doesn't fight with `&mut self.hnsw_indexes`. For a typical
1206 // HNSW insert we touch ef_construction × log(N) other vectors,
1207 // so the snapshot cost is small relative to the graph wiring.
1208 let mut vec_snapshot: HashMap<i64, Vec<f32>> = HashMap::new();
1209 {
1210 let row_data = self.rows.lock().expect("rows mutex poisoned");
1211 if let Some(Row::Vector(map)) = row_data.get(column) {
1212 for (id, v) in map.iter() {
1213 vec_snapshot.insert(*id, v.clone());
1214 }
1215 }
1216 }
1217 // The new row was just written into row storage — make sure the
1218 // snapshot reflects it (it should, but defensive).
1219 vec_snapshot.insert(rowid, new_vec.to_vec());
1220
1221 for entry in &mut self.hnsw_indexes {
1222 if entry.column_name == column {
1223 entry.index.insert(rowid, new_vec, |id| {
1224 vec_snapshot.get(&id).cloned().unwrap_or_default()
1225 });
1226 }
1227 }
1228 }
1229
1230 /// After a row insert, push the new (rowid, text) into every FTS
1231 /// index whose column matches `column`. Phase 8b.
1232 ///
1233 /// Mirrors [`Self::maintain_hnsw_on_insert`] but the FTS index is
1234 /// self-contained — `PostingList::insert` only needs the new doc's
1235 /// text, not the rest of the corpus, so there's no snapshot dance.
1236 fn maintain_fts_on_insert(&mut self, column: &str, rowid: i64, text: &str) {
1237 for entry in &mut self.fts_indexes {
1238 if entry.column_name == column {
1239 entry.index.insert(rowid, text);
1240 }
1241 }
1242 }
1243
1244 /// Print the table schema to standard output in a pretty formatted way.
1245 ///
1246 /// # Example
1247 ///
1248 /// ```text
1249 /// let table = Table::new(payload);
1250 /// table.print_table_schema();
1251 ///
1252 /// Prints to standard output:
1253 /// +-------------+-----------+-------------+--------+----------+
1254 /// | Column Name | Data Type | PRIMARY KEY | UNIQUE | NOT NULL |
1255 /// +-------------+-----------+-------------+--------+----------+
1256 /// | id | Integer | true | true | true |
1257 /// +-------------+-----------+-------------+--------+----------+
1258 /// | name | Text | false | true | false |
1259 /// +-------------+-----------+-------------+--------+----------+
1260 /// | email | Text | false | false | false |
1261 /// +-------------+-----------+-------------+--------+----------+
1262 /// ```
1263 ///
1264 pub fn print_table_schema(&self) -> Result<usize> {
1265 let mut table = PrintTable::new();
1266 table.add_row(row![
1267 "Column Name",
1268 "Data Type",
1269 "PRIMARY KEY",
1270 "UNIQUE",
1271 "NOT NULL"
1272 ]);
1273
1274 for col in &self.columns {
1275 table.add_row(row![
1276 col.column_name,
1277 col.datatype,
1278 col.is_pk,
1279 col.is_unique,
1280 col.not_null
1281 ]);
1282 }
1283
1284 table.printstd();
1285 Ok(table.len() * 2 + 1)
1286 }
1287
1288 /// Print the table data to standard output in a pretty formatted way.
1289 ///
1290 /// # Example
1291 ///
1292 /// ```text
1293 /// let db_table = db.get_table_mut(table_name.to_string()).unwrap();
1294 /// db_table.print_table_data();
1295 ///
1296 /// Prints to standard output:
1297 /// +----+---------+------------------------+
1298 /// | id | name | email |
1299 /// +----+---------+------------------------+
1300 /// | 1 | "Jack" | "jack@mail.com" |
1301 /// +----+---------+------------------------+
1302 /// | 10 | "Bob" | "bob@main.com" |
1303 /// +----+---------+------------------------+
1304 /// | 11 | "Bill" | "bill@main.com" |
1305 /// +----+---------+------------------------+
1306 /// ```
1307 ///
1308 pub fn print_table_data(&self) {
1309 let mut print_table = PrintTable::new();
1310
1311 let column_names = self
1312 .columns
1313 .iter()
1314 .map(|col| col.column_name.to_string())
1315 .collect::<Vec<String>>();
1316
1317 let header_row = PrintRow::new(
1318 column_names
1319 .iter()
1320 .map(|col| PrintCell::new(col))
1321 .collect::<Vec<PrintCell>>(),
1322 );
1323
1324 let rows_clone = Arc::clone(&self.rows);
1325 let row_data = rows_clone.lock().expect("rows mutex poisoned");
1326 let first_col_data = row_data
1327 .get(&self.columns.first().unwrap().column_name)
1328 .unwrap();
1329 let num_rows = first_col_data.count();
1330 let mut print_table_rows: Vec<PrintRow> = vec![PrintRow::new(vec![]); num_rows];
1331
1332 for col_name in &column_names {
1333 let col_val = row_data
1334 .get(col_name)
1335 .expect("Can't find any rows with the given column");
1336 let columns: Vec<String> = col_val.get_serialized_col_data();
1337
1338 for i in 0..num_rows {
1339 if let Some(cell) = &columns.get(i) {
1340 print_table_rows[i].add_cell(PrintCell::new(cell));
1341 } else {
1342 print_table_rows[i].add_cell(PrintCell::new(""));
1343 }
1344 }
1345 }
1346
1347 print_table.add_row(header_row);
1348 for row in print_table_rows {
1349 print_table.add_row(row);
1350 }
1351
1352 print_table.printstd();
1353 }
1354}
1355
1356/// The schema for each SQL column in every table.
1357///
1358/// Per-column index state moved to `Table::secondary_indexes` in Phase 3e —
1359/// a single `Column` describes the declared schema (name, type, constraints)
1360/// and nothing more.
1361#[derive(PartialEq, Debug, Clone)]
1362pub struct Column {
1363 pub column_name: String,
1364 pub datatype: DataType,
1365 pub is_pk: bool,
1366 pub not_null: bool,
1367 pub is_unique: bool,
1368 /// Literal value to substitute when this column is omitted from an
1369 /// INSERT. Restricted to literal expressions at CREATE TABLE time.
1370 /// `None` means "no DEFAULT declared"; an INSERT that omits the column
1371 /// gets `Value::Null` instead.
1372 pub default: Option<Value>,
1373}
1374
1375impl Column {
1376 /// Builds a `Column` without a `DEFAULT` clause. Existing call sites
1377 /// (catalog-table setup, test fixtures) keep working unchanged.
1378 pub fn new(
1379 name: String,
1380 datatype: String,
1381 is_pk: bool,
1382 not_null: bool,
1383 is_unique: bool,
1384 ) -> Self {
1385 Self::with_default(name, datatype, is_pk, not_null, is_unique, None)
1386 }
1387
1388 /// Builds a `Column` with an optional `DEFAULT` literal. Used by the
1389 /// CREATE TABLE / `parse_create_sql` paths that propagate user-supplied
1390 /// defaults from `ParsedColumn`.
1391 pub fn with_default(
1392 name: String,
1393 datatype: String,
1394 is_pk: bool,
1395 not_null: bool,
1396 is_unique: bool,
1397 default: Option<Value>,
1398 ) -> Self {
1399 let dt = DataType::new(datatype);
1400 Column {
1401 column_name: name,
1402 datatype: dt,
1403 is_pk,
1404 not_null,
1405 is_unique,
1406 default,
1407 }
1408 }
1409}
1410
1411/// The schema for each SQL row in every table is represented in memory
1412/// by following structure
1413///
1414/// This is an enum representing each of the available types organized in a BTreeMap
1415/// data structure, using the ROWID and key and each corresponding type as value
1416#[derive(PartialEq, Debug, Clone)]
1417pub enum Row {
1418 Integer(BTreeMap<i64, i32>),
1419 Text(BTreeMap<i64, String>),
1420 Real(BTreeMap<i64, f32>),
1421 Bool(BTreeMap<i64, bool>),
1422 /// Phase 7a: dense f32 vector storage. Each `Vec<f32>` should have
1423 /// length matching the column's declared `DataType::Vector(dim)`,
1424 /// enforced at INSERT time. The Row variant doesn't carry the dim —
1425 /// it lives in the column metadata.
1426 Vector(BTreeMap<i64, Vec<f32>>),
1427 None,
1428}
1429
1430impl Row {
1431 fn get_serialized_col_data(&self) -> Vec<String> {
1432 match self {
1433 Row::Integer(cd) => cd.values().map(|v| v.to_string()).collect(),
1434 Row::Real(cd) => cd.values().map(|v| v.to_string()).collect(),
1435 Row::Text(cd) => cd.values().map(|v| v.to_string()).collect(),
1436 Row::Bool(cd) => cd.values().map(|v| v.to_string()).collect(),
1437 Row::Vector(cd) => cd.values().map(format_vector_for_display).collect(),
1438 Row::None => panic!("Found None in columns"),
1439 }
1440 }
1441
1442 fn count(&self) -> usize {
1443 match self {
1444 Row::Integer(cd) => cd.len(),
1445 Row::Real(cd) => cd.len(),
1446 Row::Text(cd) => cd.len(),
1447 Row::Bool(cd) => cd.len(),
1448 Row::Vector(cd) => cd.len(),
1449 Row::None => panic!("Found None in columns"),
1450 }
1451 }
1452
1453 /// Every column's BTreeMap is keyed by ROWID. All columns share the same keyset
1454 /// after an INSERT (missing columns are padded), so any column's keys are a valid
1455 /// iteration of the table's rowids.
1456 pub fn rowids(&self) -> Vec<i64> {
1457 match self {
1458 Row::Integer(m) => m.keys().copied().collect(),
1459 Row::Text(m) => m.keys().copied().collect(),
1460 Row::Real(m) => m.keys().copied().collect(),
1461 Row::Bool(m) => m.keys().copied().collect(),
1462 Row::Vector(m) => m.keys().copied().collect(),
1463 Row::None => vec![],
1464 }
1465 }
1466
1467 pub fn get(&self, rowid: i64) -> Option<Value> {
1468 match self {
1469 Row::Integer(m) => m.get(&rowid).map(|v| Value::Integer(i64::from(*v))),
1470 // INSERT stores the literal string "Null" in Text columns that were omitted
1471 // from the query — re-map that back to a real NULL on read.
1472 Row::Text(m) => m.get(&rowid).map(|v| {
1473 if v == "Null" {
1474 Value::Null
1475 } else {
1476 Value::Text(v.clone())
1477 }
1478 }),
1479 Row::Real(m) => m.get(&rowid).map(|v| Value::Real(f64::from(*v))),
1480 Row::Bool(m) => m.get(&rowid).map(|v| Value::Bool(*v)),
1481 Row::Vector(m) => m.get(&rowid).map(|v| Value::Vector(v.clone())),
1482 Row::None => None,
1483 }
1484 }
1485}
1486
1487/// Render a vector for human display. Used by both `Row::get_serialized_col_data`
1488/// (for the REPL's print-table path) and `Value::to_display_string`.
1489///
1490/// Format: `[0.1, 0.2, 0.3]` — JSON-like, decimal-minimal via `{}` Display.
1491/// For high-dimensional vectors (e.g. 384 elements) this produces a long
1492/// line; truncation ellipsis is a future polish (see Phase 7 plan, "What
1493/// this proposal does NOT commit to").
1494fn format_vector_for_display(v: &Vec<f32>) -> String {
1495 let mut s = String::with_capacity(v.len() * 6 + 2);
1496 s.push('[');
1497 for (i, x) in v.iter().enumerate() {
1498 if i > 0 {
1499 s.push_str(", ");
1500 }
1501 // Default f32 Display picks the minimal-roundtrip representation,
1502 // so 0.1f32 prints as "0.1" not "0.10000000149011612". Good enough.
1503 s.push_str(&x.to_string());
1504 }
1505 s.push(']');
1506 s
1507}
1508
1509/// Runtime value produced by query execution. Separate from the on-disk `Row` enum
1510/// so the executor can carry typed values (including NULL) across operators.
1511#[derive(Debug, Clone, PartialEq)]
1512pub enum Value {
1513 Integer(i64),
1514 Text(String),
1515 Real(f64),
1516 Bool(bool),
1517 /// Phase 7a: dense f32 vector as a runtime value. Carries its own
1518 /// dimension implicitly via `Vec::len`; the column it's being
1519 /// assigned to has a declared `DataType::Vector(N)` that's checked
1520 /// at INSERT/UPDATE time.
1521 Vector(Vec<f32>),
1522 Null,
1523}
1524
1525impl Value {
1526 pub fn to_display_string(&self) -> String {
1527 match self {
1528 Value::Integer(v) => v.to_string(),
1529 Value::Text(s) => s.clone(),
1530 Value::Real(f) => f.to_string(),
1531 Value::Bool(b) => b.to_string(),
1532 Value::Vector(v) => format_vector_for_display(v),
1533 Value::Null => String::from("NULL"),
1534 }
1535 }
1536
1537 /// Renders this value in the same stringly format that
1538 /// [`crate::sql::parser::insert::InsertQuery::new`] produces for INSERT
1539 /// values, so a DEFAULT can be substituted into the existing
1540 /// `insert_row` parse pipeline without a parallel typed path.
1541 ///
1542 /// The differences from [`Self::to_display_string`] that matter:
1543 /// - `NULL` renders as the `"Null"` sentinel that `insert_row` matches.
1544 /// - Text stays unquoted (the insert pipeline strips quotes upstream).
1545 pub fn to_default_insert_string(&self) -> String {
1546 match self {
1547 Value::Integer(v) => v.to_string(),
1548 Value::Text(s) => s.clone(),
1549 Value::Real(f) => f.to_string(),
1550 Value::Bool(b) => b.to_string(),
1551 Value::Vector(v) => format_vector_for_display(v),
1552 Value::Null => String::from("Null"),
1553 }
1554 }
1555}
1556
1557/// Parse a bracket-array literal like `"[0.1, 0.2, 0.3]"` (or `"[1, 2, 3]"`)
1558/// into a `Vec<f32>`. The parser/insert pipeline stores vector literals as
1559/// strings in `InsertQuery::rows` (a `Vec<Vec<String>>`); this helper is
1560/// the inverse — turn the string back into a typed vector at the boundary
1561/// where we actually need element-typed data.
1562///
1563/// Accepts:
1564/// - `[]` → empty vector (caller's dimension check rejects it for VECTOR(N≥1))
1565/// - `[0.1, 0.2, 0.3]` → standard float syntax
1566/// - `[1, 2, 3]` → integers, coerced to f32 (matches `VALUES (1, 2)` for
1567/// `REAL` columns; we widen ints to floats automatically)
1568/// - whitespace tolerated everywhere (Python/JSON/pgvector convention)
1569///
1570/// Rejects with a descriptive message:
1571/// - missing `[` / `]`
1572/// - non-numeric elements (`['foo', 0.1]`)
1573/// - NaN / Inf literals (we accept them via `f32::from_str` but caller can
1574/// reject if undesired — for now we let them through; HNSW etc. will
1575/// reject NaN at index time)
1576pub fn parse_vector_literal(s: &str) -> Result<Vec<f32>> {
1577 let trimmed = s.trim();
1578 if !trimmed.starts_with('[') || !trimmed.ends_with(']') {
1579 return Err(SQLRiteError::General(format!(
1580 "expected bracket-array literal `[...]`, got `{s}`"
1581 )));
1582 }
1583 let inner = &trimmed[1..trimmed.len() - 1].trim();
1584 if inner.is_empty() {
1585 return Ok(Vec::new());
1586 }
1587 let mut out = Vec::new();
1588 for (i, part) in inner.split(',').enumerate() {
1589 let element = part.trim();
1590 let parsed: f32 = element.parse().map_err(|_| {
1591 SQLRiteError::General(format!("vector element {i} (`{element}`) is not a number"))
1592 })?;
1593 out.push(parsed);
1594 }
1595 Ok(out)
1596}
1597
1598#[cfg(test)]
1599mod tests {
1600 use super::*;
1601 use sqlparser::dialect::SQLiteDialect;
1602 use sqlparser::parser::Parser;
1603
1604 #[test]
1605 fn datatype_display_trait_test() {
1606 let integer = DataType::Integer;
1607 let text = DataType::Text;
1608 let real = DataType::Real;
1609 let boolean = DataType::Bool;
1610 let vector = DataType::Vector(384);
1611 let none = DataType::None;
1612 let invalid = DataType::Invalid;
1613
1614 assert_eq!(format!("{}", integer), "Integer");
1615 assert_eq!(format!("{}", text), "Text");
1616 assert_eq!(format!("{}", real), "Real");
1617 assert_eq!(format!("{}", boolean), "Boolean");
1618 assert_eq!(format!("{}", vector), "Vector(384)");
1619 assert_eq!(format!("{}", none), "None");
1620 assert_eq!(format!("{}", invalid), "Invalid");
1621 }
1622
1623 // -----------------------------------------------------------------
1624 // Phase 7a — VECTOR(N) column type
1625 // -----------------------------------------------------------------
1626
1627 #[test]
1628 fn datatype_new_parses_vector_dim() {
1629 // Standard cases.
1630 assert_eq!(DataType::new("vector(1)".to_string()), DataType::Vector(1));
1631 assert_eq!(
1632 DataType::new("vector(384)".to_string()),
1633 DataType::Vector(384)
1634 );
1635 assert_eq!(
1636 DataType::new("vector(1536)".to_string()),
1637 DataType::Vector(1536)
1638 );
1639
1640 // Case-insensitive on the keyword.
1641 assert_eq!(
1642 DataType::new("VECTOR(384)".to_string()),
1643 DataType::Vector(384)
1644 );
1645
1646 // Whitespace inside parens tolerated (the create-parser strips it
1647 // but the string-based round-trip in DataType::new is the one place
1648 // we don't fully control input formatting).
1649 assert_eq!(
1650 DataType::new("vector( 64 )".to_string()),
1651 DataType::Vector(64)
1652 );
1653 }
1654
1655 #[test]
1656 fn datatype_new_rejects_bad_vector_strings() {
1657 // dim = 0 is rejected (Q2: VECTOR(N≥1)).
1658 assert_eq!(DataType::new("vector(0)".to_string()), DataType::Invalid);
1659 // Non-numeric dim.
1660 assert_eq!(DataType::new("vector(abc)".to_string()), DataType::Invalid);
1661 // Empty parens.
1662 assert_eq!(DataType::new("vector()".to_string()), DataType::Invalid);
1663 // Negative dim wouldn't even parse as usize, so falls into Invalid.
1664 assert_eq!(DataType::new("vector(-3)".to_string()), DataType::Invalid);
1665 }
1666
1667 #[test]
1668 fn datatype_to_wire_string_round_trips_vector() {
1669 let dt = DataType::Vector(384);
1670 let wire = dt.to_wire_string();
1671 assert_eq!(wire, "vector(384)");
1672 // And feeds back through DataType::new losslessly — this is the
1673 // round-trip the ParsedColumn pipeline relies on.
1674 assert_eq!(DataType::new(wire), DataType::Vector(384));
1675 }
1676
1677 #[test]
1678 fn parse_vector_literal_accepts_floats() {
1679 let v = parse_vector_literal("[0.1, 0.2, 0.3]").expect("parse");
1680 assert_eq!(v, vec![0.1f32, 0.2, 0.3]);
1681 }
1682
1683 #[test]
1684 fn parse_vector_literal_accepts_ints_widening_to_f32() {
1685 let v = parse_vector_literal("[1, 2, 3]").expect("parse");
1686 assert_eq!(v, vec![1.0f32, 2.0, 3.0]);
1687 }
1688
1689 #[test]
1690 fn parse_vector_literal_handles_negatives_and_whitespace() {
1691 let v = parse_vector_literal("[ -1.5 , 2.0, -3.5 ]").expect("parse");
1692 assert_eq!(v, vec![-1.5f32, 2.0, -3.5]);
1693 }
1694
1695 #[test]
1696 fn parse_vector_literal_empty_brackets_is_empty_vec() {
1697 let v = parse_vector_literal("[]").expect("parse");
1698 assert!(v.is_empty());
1699 }
1700
1701 #[test]
1702 fn parse_vector_literal_rejects_non_bracketed() {
1703 assert!(parse_vector_literal("0.1, 0.2").is_err());
1704 assert!(parse_vector_literal("(0.1, 0.2)").is_err());
1705 assert!(parse_vector_literal("[0.1, 0.2").is_err()); // missing ]
1706 assert!(parse_vector_literal("0.1, 0.2]").is_err()); // missing [
1707 }
1708
1709 #[test]
1710 fn parse_vector_literal_rejects_non_numeric_elements() {
1711 let err = parse_vector_literal("[1.0, 'foo', 3.0]").unwrap_err();
1712 let msg = format!("{err}");
1713 assert!(
1714 msg.contains("vector element 1") && msg.contains("'foo'"),
1715 "error message should pinpoint the bad element: got `{msg}`"
1716 );
1717 }
1718
1719 #[test]
1720 fn value_vector_display_format() {
1721 let v = Value::Vector(vec![0.1, 0.2, 0.3]);
1722 assert_eq!(v.to_display_string(), "[0.1, 0.2, 0.3]");
1723
1724 // Empty vector displays as `[]`.
1725 let empty = Value::Vector(vec![]);
1726 assert_eq!(empty.to_display_string(), "[]");
1727 }
1728
1729 #[test]
1730 fn create_new_table_test() {
1731 let query_statement = "CREATE TABLE contacts (
1732 id INTEGER PRIMARY KEY,
1733 first_name TEXT NOT NULL,
1734 last_name TEXT NOT NULl,
1735 email TEXT NOT NULL UNIQUE,
1736 active BOOL,
1737 score REAL
1738 );";
1739 let dialect = SQLiteDialect {};
1740 let mut ast = Parser::parse_sql(&dialect, query_statement).unwrap();
1741 if ast.len() > 1 {
1742 panic!("Expected a single query statement, but there are more then 1.")
1743 }
1744 let query = ast.pop().unwrap();
1745
1746 let create_query = CreateQuery::new(&query).unwrap();
1747
1748 let table = Table::new(create_query);
1749
1750 assert_eq!(table.columns.len(), 6);
1751 assert_eq!(table.last_rowid, 0);
1752
1753 let id_column = "id".to_string();
1754 if let Some(column) = table
1755 .columns
1756 .iter()
1757 .filter(|c| c.column_name == id_column)
1758 .collect::<Vec<&Column>>()
1759 .first()
1760 {
1761 assert!(column.is_pk);
1762 assert_eq!(column.datatype, DataType::Integer);
1763 } else {
1764 panic!("column not found");
1765 }
1766 }
1767
1768 #[test]
1769 fn print_table_schema_test() {
1770 let query_statement = "CREATE TABLE contacts (
1771 id INTEGER PRIMARY KEY,
1772 first_name TEXT NOT NULL,
1773 last_name TEXT NOT NULl
1774 );";
1775 let dialect = SQLiteDialect {};
1776 let mut ast = Parser::parse_sql(&dialect, query_statement).unwrap();
1777 if ast.len() > 1 {
1778 panic!("Expected a single query statement, but there are more then 1.")
1779 }
1780 let query = ast.pop().unwrap();
1781
1782 let create_query = CreateQuery::new(&query).unwrap();
1783
1784 let table = Table::new(create_query);
1785 let lines_printed = table.print_table_schema();
1786 assert_eq!(lines_printed, Ok(9));
1787 }
1788}