sqlrite/sql/db/table.rs
1use crate::error::{Result, SQLRiteError};
2use crate::sql::db::secondary_index::{IndexOrigin, SecondaryIndex};
3use crate::sql::hnsw::HnswIndex;
4use crate::sql::parser::create::CreateQuery;
5use std::collections::{BTreeMap, HashMap};
6use std::fmt;
7use std::sync::{Arc, Mutex};
8
9use prettytable::{Cell as PrintCell, Row as PrintRow, Table as PrintTable};
10
11/// SQLRite data types
12/// Mapped after SQLite Data Type Storage Classes and SQLite Affinity Type
13/// (Datatypes In SQLite Version 3)[https://www.sqlite.org/datatype3.html]
14///
15/// `Vector(dim)` is the Phase 7a addition — a fixed-dimension dense f32
16/// array. The dimension is part of the type so a `VECTOR(384)` column
17/// rejects `[0.1, 0.2, 0.3]` at INSERT time as a clean type error
18/// rather than silently storing the wrong shape.
19#[derive(PartialEq, Debug, Clone)]
20pub enum DataType {
21 Integer,
22 Text,
23 Real,
24 Bool,
25 /// Dense f32 vector of fixed dimension. The `usize` is the column's
26 /// declared dimension; every value stored in the column must have
27 /// exactly that many elements.
28 Vector(usize),
29 /// Phase 7e — JSON column. Stored as canonical UTF-8 text (matches
30 /// SQLite's JSON1 extension), validated at INSERT time. The
31 /// `json_extract` family of functions parses on demand and returns
32 /// either a primitive `Value` (Integer / Real / Text / Bool / Null)
33 /// or a Text value carrying the JSON-encoded sub-object/array.
34 /// Q3 originally specified `bincoded serde_json::Value`, but bincode
35 /// was removed from the engine in Phase 3c — see the scope-correction
36 /// note in `docs/phase-7-plan.md` for the rationale on switching to
37 /// text storage.
38 Json,
39 None,
40 Invalid,
41}
42
43impl DataType {
44 /// Constructs a `DataType` from the wire string the parser produces.
45 /// Pre-Phase-7 the strings were one-of `"integer" | "text" | "real" |
46 /// "bool" | "none"`. Phase 7a adds `"vector(N)"` (case-insensitive,
47 /// N a positive integer) for the new vector column type — encoded
48 /// in-band so we don't have to plumb a richer type through the
49 /// existing string-based ParsedColumn pipeline.
50 pub fn new(cmd: String) -> DataType {
51 let lower = cmd.to_lowercase();
52 match lower.as_str() {
53 "integer" => DataType::Integer,
54 "text" => DataType::Text,
55 "real" => DataType::Real,
56 "bool" => DataType::Bool,
57 "json" => DataType::Json,
58 "none" => DataType::None,
59 other if other.starts_with("vector(") && other.ends_with(')') => {
60 // Strip the `vector(` prefix and trailing `)`, parse what's
61 // left as a positive integer dimension. Anything else is
62 // Invalid — surfaces a clean error at CREATE TABLE time.
63 let inside = &other["vector(".len()..other.len() - 1];
64 match inside.trim().parse::<usize>() {
65 Ok(dim) if dim > 0 => DataType::Vector(dim),
66 _ => {
67 eprintln!("Invalid VECTOR dimension in {cmd}");
68 DataType::Invalid
69 }
70 }
71 }
72 _ => {
73 eprintln!("Invalid data type given {}", cmd);
74 DataType::Invalid
75 }
76 }
77 }
78
79 /// Inverse of `new` — returns the canonical lowercased wire string
80 /// for this DataType. Used by the parser to round-trip
81 /// `VECTOR(N)` → `DataType::Vector(N)` → `"vector(N)"` into
82 /// `ParsedColumn::datatype` so the rest of the pipeline keeps
83 /// working with strings.
84 pub fn to_wire_string(&self) -> String {
85 match self {
86 DataType::Integer => "Integer".to_string(),
87 DataType::Text => "Text".to_string(),
88 DataType::Real => "Real".to_string(),
89 DataType::Bool => "Bool".to_string(),
90 DataType::Vector(dim) => format!("vector({dim})"),
91 DataType::Json => "Json".to_string(),
92 DataType::None => "None".to_string(),
93 DataType::Invalid => "Invalid".to_string(),
94 }
95 }
96}
97
98impl fmt::Display for DataType {
99 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
100 match self {
101 DataType::Integer => f.write_str("Integer"),
102 DataType::Text => f.write_str("Text"),
103 DataType::Real => f.write_str("Real"),
104 DataType::Bool => f.write_str("Boolean"),
105 DataType::Vector(dim) => write!(f, "Vector({dim})"),
106 DataType::Json => f.write_str("Json"),
107 DataType::None => f.write_str("None"),
108 DataType::Invalid => f.write_str("Invalid"),
109 }
110 }
111}
112
113/// The schema for each SQL Table is represented in memory by
114/// following structure.
115///
116/// `rows` is `Arc<Mutex<...>>` rather than `Rc<RefCell<...>>` so `Table`
117/// (and by extension `Database`) is `Send + Sync` — the Tauri desktop
118/// app holds the engine in shared state behind a `Mutex<Database>`, and
119/// Tauri's state container requires its contents to be thread-safe.
120#[derive(Debug)]
121pub struct Table {
122 /// Name of the table
123 pub tb_name: String,
124 /// Schema for each column, in declaration order.
125 pub columns: Vec<Column>,
126 /// Per-column row storage, keyed by column name. Every column's
127 /// `Row::T(BTreeMap)` is keyed by rowid, so all columns share the same
128 /// keyset after each write.
129 pub rows: Arc<Mutex<HashMap<String, Row>>>,
130 /// Secondary indexes on this table (Phase 3e). One auto-created entry
131 /// per UNIQUE or PRIMARY KEY column; explicit `CREATE INDEX` statements
132 /// add more. Looking up an index: iterate by column name, or by index
133 /// name via `Table::index_by_name`.
134 pub secondary_indexes: Vec<SecondaryIndex>,
135 /// HNSW indexes on VECTOR columns (Phase 7d.2). Maintained in lockstep
136 /// with row storage on INSERT (incremental); rebuilt on open from the
137 /// persisted CREATE INDEX SQL. The graph itself is NOT yet persisted —
138 /// see Phase 7d.3 for cell-encoded graph storage.
139 pub hnsw_indexes: Vec<HnswIndexEntry>,
140 /// ROWID of most recent insert.
141 pub last_rowid: i64,
142 /// PRIMARY KEY column name, or "-1" if the table has no PRIMARY KEY.
143 pub primary_key: String,
144}
145
146/// One HNSW index attached to a table. Phase 7d.2 only supports L2
147/// distance; cosine and dot are 7d.x follow-ups (would require either
148/// distinct USING methods like `hnsw_cosine` or a `WITH (metric = …)`
149/// clause — see `docs/phase-7-plan.md` for the deferred decision).
150#[derive(Debug, Clone)]
151pub struct HnswIndexEntry {
152 /// User-supplied name from `CREATE INDEX <name> …`. Unique across
153 /// both `secondary_indexes` and `hnsw_indexes` on a given table.
154 pub name: String,
155 /// The VECTOR column this index covers.
156 pub column_name: String,
157 /// The graph itself.
158 pub index: HnswIndex,
159 /// Phase 7d.3 — true iff a DELETE or UPDATE-on-vector-col has
160 /// invalidated the graph since the last rebuild. INSERT maintains
161 /// the graph incrementally and leaves this false. The next save
162 /// rebuilds dirty indexes from current rows before serializing.
163 pub needs_rebuild: bool,
164}
165
166impl Table {
167 pub fn new(create_query: CreateQuery) -> Self {
168 let table_name = create_query.table_name;
169 let mut primary_key: String = String::from("-1");
170 let columns = create_query.columns;
171
172 let mut table_cols: Vec<Column> = vec![];
173 let table_rows: Arc<Mutex<HashMap<String, Row>>> = Arc::new(Mutex::new(HashMap::new()));
174 let mut secondary_indexes: Vec<SecondaryIndex> = Vec::new();
175 for col in &columns {
176 let col_name = &col.name;
177 if col.is_pk {
178 primary_key = col_name.to_string();
179 }
180 table_cols.push(Column::new(
181 col_name.to_string(),
182 col.datatype.to_string(),
183 col.is_pk,
184 col.not_null,
185 col.is_unique,
186 ));
187
188 let dt = DataType::new(col.datatype.to_string());
189 let row_storage = match &dt {
190 DataType::Integer => Row::Integer(BTreeMap::new()),
191 DataType::Real => Row::Real(BTreeMap::new()),
192 DataType::Text => Row::Text(BTreeMap::new()),
193 DataType::Bool => Row::Bool(BTreeMap::new()),
194 // The dimension is enforced at INSERT time against the
195 // column's declared DataType::Vector(dim). The Row variant
196 // itself doesn't carry the dim — every stored Vec<f32>
197 // already has it via .len().
198 DataType::Vector(_dim) => Row::Vector(BTreeMap::new()),
199 // Phase 7e — JSON columns reuse Text storage (with
200 // INSERT-time validation that the bytes parse as JSON).
201 // No new Row variant; json_extract / json_type / etc.
202 // re-parse from text on demand. See `docs/phase-7-plan.md`
203 // Q3's scope-correction note for the storage choice.
204 DataType::Json => Row::Text(BTreeMap::new()),
205 DataType::Invalid | DataType::None => Row::None,
206 };
207 table_rows
208 .lock()
209 .expect("Table row storage mutex poisoned")
210 .insert(col.name.to_string(), row_storage);
211
212 // Auto-create an index for every UNIQUE / PRIMARY KEY column,
213 // but only for types we know how to index. Real / Bool / Vector
214 // UNIQUE columns fall back to the linear scan path in
215 // validate_unique_constraint — same behavior as before 3e.
216 // (Vector UNIQUE is unusual; the linear-scan path will work
217 // via Value::Vector PartialEq, just at O(N) cost.)
218 if (col.is_pk || col.is_unique) && matches!(dt, DataType::Integer | DataType::Text) {
219 let name = SecondaryIndex::auto_name(&table_name, &col.name);
220 match SecondaryIndex::new(
221 name,
222 table_name.clone(),
223 col.name.clone(),
224 &dt,
225 true,
226 IndexOrigin::Auto,
227 ) {
228 Ok(idx) => secondary_indexes.push(idx),
229 Err(_) => {
230 // Unreachable given the matches! guard above, but
231 // the builder returns Result so we keep the arm.
232 }
233 }
234 }
235 }
236
237 Table {
238 tb_name: table_name,
239 columns: table_cols,
240 rows: table_rows,
241 secondary_indexes,
242 // HNSW indexes only land via explicit CREATE INDEX … USING hnsw
243 // statements (Phase 7d.2); never auto-created at CREATE TABLE
244 // time, because there's no UNIQUE-style constraint that
245 // implies a vector index.
246 hnsw_indexes: Vec::new(),
247 last_rowid: 0,
248 primary_key,
249 }
250 }
251
252 /// Deep-clones a `Table` for transaction snapshots (Phase 4f).
253 ///
254 /// The normal `Clone` derive would shallow-clone the `Arc<Mutex<_>>`
255 /// wrapping our row storage, leaving both copies sharing the same
256 /// inner map — mutating the snapshot would corrupt the live table
257 /// and vice versa. Instead we lock, clone the inner `HashMap`, and
258 /// wrap it in a fresh `Arc<Mutex<_>>`. Columns and indexes derive
259 /// `Clone` directly (all their fields are plain data).
260 pub fn deep_clone(&self) -> Self {
261 let cloned_rows: HashMap<String, Row> = {
262 let guard = self.rows.lock().expect("row mutex poisoned");
263 guard.clone()
264 };
265 Table {
266 tb_name: self.tb_name.clone(),
267 columns: self.columns.clone(),
268 rows: Arc::new(Mutex::new(cloned_rows)),
269 secondary_indexes: self.secondary_indexes.clone(),
270 // HnswIndexEntry derives Clone, so the snapshot owns its own
271 // graph copy. Phase 4f's snapshot-rollback semantics require
272 // the snapshot to be fully decoupled from live state.
273 hnsw_indexes: self.hnsw_indexes.clone(),
274 last_rowid: self.last_rowid,
275 primary_key: self.primary_key.clone(),
276 }
277 }
278
279 /// Finds an auto- or explicit-index entry for a given column. Returns
280 /// `None` if the column isn't indexed.
281 pub fn index_for_column(&self, column: &str) -> Option<&SecondaryIndex> {
282 self.secondary_indexes
283 .iter()
284 .find(|i| i.column_name == column)
285 }
286
287 fn index_for_column_mut(&mut self, column: &str) -> Option<&mut SecondaryIndex> {
288 self.secondary_indexes
289 .iter_mut()
290 .find(|i| i.column_name == column)
291 }
292
293 /// Finds a secondary index by its own name (e.g., `sqlrite_autoindex_users_email`
294 /// or a user-provided CREATE INDEX name). Used by Phase 3e.2 to look up
295 /// explicit indexes when DROP INDEX lands.
296 #[allow(dead_code)]
297 pub fn index_by_name(&self, name: &str) -> Option<&SecondaryIndex> {
298 self.secondary_indexes.iter().find(|i| i.name == name)
299 }
300
301 /// Returns a `bool` informing if a `Column` with a specific name exists or not
302 ///
303 pub fn contains_column(&self, column: String) -> bool {
304 self.columns.iter().any(|col| col.column_name == column)
305 }
306
307 /// Returns the list of column names in declaration order.
308 pub fn column_names(&self) -> Vec<String> {
309 self.columns.iter().map(|c| c.column_name.clone()).collect()
310 }
311
312 /// Returns all rowids currently stored in the table, in ascending order.
313 /// Every column's BTreeMap has the same keyset, so we just read from the first column.
314 pub fn rowids(&self) -> Vec<i64> {
315 let Some(first) = self.columns.first() else {
316 return vec![];
317 };
318 let rows = self.rows.lock().expect("rows mutex poisoned");
319 rows.get(&first.column_name)
320 .map(|r| r.rowids())
321 .unwrap_or_default()
322 }
323
324 /// Reads a single cell at `(column, rowid)`.
325 pub fn get_value(&self, column: &str, rowid: i64) -> Option<Value> {
326 let rows = self.rows.lock().expect("rows mutex poisoned");
327 rows.get(column).and_then(|r| r.get(rowid))
328 }
329
330 /// Removes the row identified by `rowid` from every column's storage and
331 /// from every secondary index entry.
332 pub fn delete_row(&mut self, rowid: i64) {
333 // Snapshot the values we're about to delete so we can strip them
334 // from secondary indexes by (value, rowid) before the row storage
335 // disappears.
336 let per_column_values: Vec<(String, Option<Value>)> = self
337 .columns
338 .iter()
339 .map(|c| (c.column_name.clone(), self.get_value(&c.column_name, rowid)))
340 .collect();
341
342 // Remove from row storage.
343 {
344 let rows_clone = Arc::clone(&self.rows);
345 let mut row_data = rows_clone.lock().expect("rows mutex poisoned");
346 for col in &self.columns {
347 if let Some(r) = row_data.get_mut(&col.column_name) {
348 match r {
349 Row::Integer(m) => {
350 m.remove(&rowid);
351 }
352 Row::Text(m) => {
353 m.remove(&rowid);
354 }
355 Row::Real(m) => {
356 m.remove(&rowid);
357 }
358 Row::Bool(m) => {
359 m.remove(&rowid);
360 }
361 Row::Vector(m) => {
362 m.remove(&rowid);
363 }
364 Row::None => {}
365 }
366 }
367 }
368 }
369
370 // Strip secondary-index entries. Non-indexed columns just don't
371 // show up in secondary_indexes and are no-ops here.
372 for (col_name, value) in per_column_values {
373 if let Some(idx) = self.index_for_column_mut(&col_name) {
374 if let Some(v) = value {
375 idx.remove(&v, rowid);
376 }
377 }
378 }
379 }
380
381 /// Replays a single row at `rowid` when loading a table from disk. Takes
382 /// one typed value per column (in declaration order); `None` means the
383 /// stored cell carried a NULL for that column. Unlike `insert_row` this
384 /// trusts the on-disk state and does *not* re-check UNIQUE — we're
385 /// rebuilding a state that was already consistent when it was saved.
386 pub fn restore_row(&mut self, rowid: i64, values: Vec<Option<Value>>) -> Result<()> {
387 if values.len() != self.columns.len() {
388 return Err(SQLRiteError::Internal(format!(
389 "cell has {} values but table '{}' has {} columns",
390 values.len(),
391 self.tb_name,
392 self.columns.len()
393 )));
394 }
395
396 let column_names: Vec<String> =
397 self.columns.iter().map(|c| c.column_name.clone()).collect();
398
399 for (i, value) in values.into_iter().enumerate() {
400 let col_name = &column_names[i];
401
402 // Write into the per-column row storage first (scoped borrow so
403 // the secondary-index update below doesn't fight over `self`).
404 {
405 let rows_clone = Arc::clone(&self.rows);
406 let mut row_data = rows_clone.lock().expect("rows mutex poisoned");
407 let cell = row_data.get_mut(col_name).ok_or_else(|| {
408 SQLRiteError::Internal(format!("Row storage missing for column '{col_name}'"))
409 })?;
410
411 match (cell, &value) {
412 (Row::Integer(map), Some(Value::Integer(v))) => {
413 map.insert(rowid, *v as i32);
414 }
415 (Row::Integer(_), None) => {
416 return Err(SQLRiteError::Internal(format!(
417 "Integer column '{col_name}' cannot store NULL — corrupt cell?"
418 )));
419 }
420 (Row::Text(map), Some(Value::Text(s))) => {
421 map.insert(rowid, s.clone());
422 }
423 (Row::Text(map), None) => {
424 // Matches the on-insert convention: NULL in Text
425 // storage is represented by the literal "Null"
426 // sentinel and not added to the index.
427 map.insert(rowid, "Null".to_string());
428 }
429 (Row::Real(map), Some(Value::Real(v))) => {
430 map.insert(rowid, *v as f32);
431 }
432 (Row::Real(_), None) => {
433 return Err(SQLRiteError::Internal(format!(
434 "Real column '{col_name}' cannot store NULL — corrupt cell?"
435 )));
436 }
437 (Row::Bool(map), Some(Value::Bool(v))) => {
438 map.insert(rowid, *v);
439 }
440 (Row::Bool(_), None) => {
441 return Err(SQLRiteError::Internal(format!(
442 "Bool column '{col_name}' cannot store NULL — corrupt cell?"
443 )));
444 }
445 (Row::Vector(map), Some(Value::Vector(v))) => {
446 map.insert(rowid, v.clone());
447 }
448 (Row::Vector(_), None) => {
449 return Err(SQLRiteError::Internal(format!(
450 "Vector column '{col_name}' cannot store NULL — corrupt cell?"
451 )));
452 }
453 (row, v) => {
454 return Err(SQLRiteError::Internal(format!(
455 "Type mismatch restoring column '{col_name}': storage {row:?} vs value {v:?}"
456 )));
457 }
458 }
459 }
460
461 // Maintain the secondary index (if any). NULL values are skipped
462 // by `insert`, matching the "NULL is not indexed" convention.
463 if let Some(v) = &value {
464 if let Some(idx) = self.index_for_column_mut(col_name) {
465 idx.insert(v, rowid)?;
466 }
467 }
468 }
469
470 if rowid > self.last_rowid {
471 self.last_rowid = rowid;
472 }
473 Ok(())
474 }
475
476 /// Extracts a row as an ordered `Vec<Option<Value>>` matching the column
477 /// declaration order. Returns `None` entries for columns that hold NULL.
478 /// Used by `save_database` to turn a table's in-memory state into cells.
479 pub fn extract_row(&self, rowid: i64) -> Vec<Option<Value>> {
480 self.columns
481 .iter()
482 .map(|c| match self.get_value(&c.column_name, rowid) {
483 Some(Value::Null) => None,
484 Some(v) => Some(v),
485 None => None,
486 })
487 .collect()
488 }
489
490 /// Overwrites the cell at `(column, rowid)` with `new_val`. Enforces the
491 /// column's datatype and UNIQUE constraint, and updates any secondary
492 /// index.
493 ///
494 /// Returns `Err` if the column doesn't exist, the value type is incompatible,
495 /// or writing would violate UNIQUE.
496 pub fn set_value(&mut self, column: &str, rowid: i64, new_val: Value) -> Result<()> {
497 let col_index = self
498 .columns
499 .iter()
500 .position(|c| c.column_name == column)
501 .ok_or_else(|| SQLRiteError::General(format!("Column '{column}' not found")))?;
502
503 // No-op write — keep storage exactly the same.
504 let current = self.get_value(column, rowid);
505 if current.as_ref() == Some(&new_val) {
506 return Ok(());
507 }
508
509 // Enforce UNIQUE. Prefer an O(log N) index probe if we have one;
510 // fall back to a full column scan otherwise (Real/Bool UNIQUE
511 // columns, which don't get auto-indexed).
512 if self.columns[col_index].is_unique && !matches!(new_val, Value::Null) {
513 if let Some(idx) = self.index_for_column(column) {
514 for other in idx.lookup(&new_val) {
515 if other != rowid {
516 return Err(SQLRiteError::General(format!(
517 "UNIQUE constraint violated for column '{column}'"
518 )));
519 }
520 }
521 } else {
522 for other in self.rowids() {
523 if other == rowid {
524 continue;
525 }
526 if self.get_value(column, other).as_ref() == Some(&new_val) {
527 return Err(SQLRiteError::General(format!(
528 "UNIQUE constraint violated for column '{column}'"
529 )));
530 }
531 }
532 }
533 }
534
535 // Drop the old index entry before writing the new value, so the
536 // post-write index insert doesn't clash with the previous state.
537 if let Some(old) = current {
538 if let Some(idx) = self.index_for_column_mut(column) {
539 idx.remove(&old, rowid);
540 }
541 }
542
543 // Write into the column's Row, type-checking against the declared DataType.
544 let declared = &self.columns[col_index].datatype;
545 {
546 let rows_clone = Arc::clone(&self.rows);
547 let mut row_data = rows_clone.lock().expect("rows mutex poisoned");
548 let cell = row_data.get_mut(column).ok_or_else(|| {
549 SQLRiteError::Internal(format!("Row storage missing for column '{column}'"))
550 })?;
551
552 match (cell, &new_val, declared) {
553 (Row::Integer(m), Value::Integer(v), _) => {
554 m.insert(rowid, *v as i32);
555 }
556 (Row::Real(m), Value::Real(v), _) => {
557 m.insert(rowid, *v as f32);
558 }
559 (Row::Real(m), Value::Integer(v), _) => {
560 m.insert(rowid, *v as f32);
561 }
562 (Row::Text(m), Value::Text(v), dt) => {
563 // Phase 7e — UPDATE on a JSON column also validates
564 // the new text is well-formed JSON, mirroring INSERT.
565 if matches!(dt, DataType::Json) {
566 if let Err(e) = serde_json::from_str::<serde_json::Value>(v) {
567 return Err(SQLRiteError::General(format!(
568 "Type mismatch: expected JSON for column '{column}', got '{v}': {e}"
569 )));
570 }
571 }
572 m.insert(rowid, v.clone());
573 }
574 (Row::Bool(m), Value::Bool(v), _) => {
575 m.insert(rowid, *v);
576 }
577 (Row::Vector(m), Value::Vector(v), DataType::Vector(declared_dim)) => {
578 if v.len() != *declared_dim {
579 return Err(SQLRiteError::General(format!(
580 "Vector dimension mismatch for column '{column}': declared {declared_dim}, got {}",
581 v.len()
582 )));
583 }
584 m.insert(rowid, v.clone());
585 }
586 // NULL writes: store the sentinel "Null" string for Text; for other
587 // types we leave storage as-is since those BTreeMaps can't hold NULL today.
588 (Row::Text(m), Value::Null, _) => {
589 m.insert(rowid, "Null".to_string());
590 }
591 (_, new, dt) => {
592 return Err(SQLRiteError::General(format!(
593 "Type mismatch: cannot assign {} to column '{column}' of type {dt}",
594 new.to_display_string()
595 )));
596 }
597 }
598 }
599
600 // Maintain the secondary index, if any. NULL values are skipped by
601 // insert per convention.
602 if !matches!(new_val, Value::Null) {
603 if let Some(idx) = self.index_for_column_mut(column) {
604 idx.insert(&new_val, rowid)?;
605 }
606 }
607
608 Ok(())
609 }
610
611 /// Returns an immutable reference of `sql::db::table::Column` if the table contains a
612 /// column with the specified key as a column name.
613 ///
614 #[allow(dead_code)]
615 pub fn get_column(&mut self, column_name: String) -> Result<&Column> {
616 if let Some(column) = self
617 .columns
618 .iter()
619 .filter(|c| c.column_name == column_name)
620 .collect::<Vec<&Column>>()
621 .first()
622 {
623 Ok(column)
624 } else {
625 Err(SQLRiteError::General(String::from("Column not found.")))
626 }
627 }
628
629 /// Validates if columns and values being inserted violate the UNIQUE constraint.
630 /// PRIMARY KEY columns are automatically UNIQUE. Uses the corresponding
631 /// secondary index when one exists (O(log N) lookup); falls back to a
632 /// linear scan for indexable-but-not-indexed situations (e.g. a Real
633 /// UNIQUE column — Real isn't in the auto-indexed set).
634 pub fn validate_unique_constraint(
635 &mut self,
636 cols: &Vec<String>,
637 values: &Vec<String>,
638 ) -> Result<()> {
639 for (idx, name) in cols.iter().enumerate() {
640 let column = self
641 .columns
642 .iter()
643 .find(|c| &c.column_name == name)
644 .ok_or_else(|| SQLRiteError::General(format!("Column '{name}' not found")))?;
645 if !column.is_unique {
646 continue;
647 }
648 let datatype = &column.datatype;
649 let val = &values[idx];
650
651 // Parse the string value into a runtime Value according to the
652 // declared column type. If parsing fails the caller's insert
653 // would also fail with the same error; surface it here so we
654 // don't emit a misleading "unique OK" on bad input.
655 let parsed = match datatype {
656 DataType::Integer => val.parse::<i64>().map(Value::Integer).map_err(|_| {
657 SQLRiteError::General(format!(
658 "Type mismatch: expected INTEGER for column '{name}', got '{val}'"
659 ))
660 })?,
661 DataType::Text => Value::Text(val.clone()),
662 DataType::Real => val.parse::<f64>().map(Value::Real).map_err(|_| {
663 SQLRiteError::General(format!(
664 "Type mismatch: expected REAL for column '{name}', got '{val}'"
665 ))
666 })?,
667 DataType::Bool => val.parse::<bool>().map(Value::Bool).map_err(|_| {
668 SQLRiteError::General(format!(
669 "Type mismatch: expected BOOL for column '{name}', got '{val}'"
670 ))
671 })?,
672 DataType::Vector(declared_dim) => {
673 let parsed_vec = parse_vector_literal(val).map_err(|e| {
674 SQLRiteError::General(format!(
675 "Type mismatch: expected VECTOR({declared_dim}) for column '{name}', {e}"
676 ))
677 })?;
678 if parsed_vec.len() != *declared_dim {
679 return Err(SQLRiteError::General(format!(
680 "Vector dimension mismatch for column '{name}': declared {declared_dim}, got {}",
681 parsed_vec.len()
682 )));
683 }
684 Value::Vector(parsed_vec)
685 }
686 DataType::Json => {
687 // JSON values stored as Text. UNIQUE on a JSON column
688 // compares the canonical text representation
689 // verbatim — `{"a": 1}` and `{"a":1}` are distinct.
690 // Document this if anyone actually requests UNIQUE
691 // JSON; for MVP, treat-as-text is fine.
692 Value::Text(val.clone())
693 }
694 DataType::None | DataType::Invalid => {
695 return Err(SQLRiteError::Internal(format!(
696 "column '{name}' has an unsupported datatype"
697 )));
698 }
699 };
700
701 if let Some(secondary) = self.index_for_column(name) {
702 if secondary.would_violate_unique(&parsed) {
703 return Err(SQLRiteError::General(format!(
704 "UNIQUE constraint violated for column '{name}': value '{val}' already exists"
705 )));
706 }
707 } else {
708 // No secondary index (Real / Bool UNIQUE). Linear scan.
709 for other in self.rowids() {
710 if self.get_value(name, other).as_ref() == Some(&parsed) {
711 return Err(SQLRiteError::General(format!(
712 "UNIQUE constraint violated for column '{name}': value '{val}' already exists"
713 )));
714 }
715 }
716 }
717 }
718 Ok(())
719 }
720
721 /// Inserts all VALUES in its approprieta COLUMNS, using the ROWID an embedded INDEX on all ROWS
722 /// Every `Table` keeps track of the `last_rowid` in order to facilitate what the next one would be.
723 /// One limitation of this data structure is that we can only have one write transaction at a time, otherwise
724 /// we could have a race condition on the last_rowid.
725 ///
726 /// Since we are loosely modeling after SQLite, this is also a limitation of SQLite (allowing only one write transcation at a time),
727 /// So we are good. :)
728 ///
729 /// Returns `Err` (leaving the table unchanged) when the user supplies an
730 /// incompatibly-typed value — no more panics on bad input.
731 pub fn insert_row(&mut self, cols: &Vec<String>, values: &Vec<String>) -> Result<()> {
732 let mut next_rowid = self.last_rowid + 1;
733
734 // Auto-assign INTEGER PRIMARY KEY when the user omits it; otherwise
735 // adopt the supplied value as the new rowid.
736 if self.primary_key != "-1" {
737 if !cols.iter().any(|col| col == &self.primary_key) {
738 // Write the auto-assigned PK into row storage, then sync
739 // the secondary index.
740 let val = next_rowid as i32;
741 let wrote_integer = {
742 let rows_clone = Arc::clone(&self.rows);
743 let mut row_data = rows_clone.lock().expect("rows mutex poisoned");
744 let table_col_data = row_data.get_mut(&self.primary_key).ok_or_else(|| {
745 SQLRiteError::Internal(format!(
746 "Row storage missing for primary key column '{}'",
747 self.primary_key
748 ))
749 })?;
750 match table_col_data {
751 Row::Integer(tree) => {
752 tree.insert(next_rowid, val);
753 true
754 }
755 _ => false, // non-integer PK: auto-assign is a no-op
756 }
757 };
758 if wrote_integer {
759 let pk = self.primary_key.clone();
760 if let Some(idx) = self.index_for_column_mut(&pk) {
761 idx.insert(&Value::Integer(val as i64), next_rowid)?;
762 }
763 }
764 } else {
765 for i in 0..cols.len() {
766 if cols[i] == self.primary_key {
767 let val = &values[i];
768 next_rowid = val.parse::<i64>().map_err(|_| {
769 SQLRiteError::General(format!(
770 "Type mismatch: PRIMARY KEY column '{}' expects INTEGER, got '{val}'",
771 self.primary_key
772 ))
773 })?;
774 }
775 }
776 }
777 }
778
779 // For every table column, either pick the supplied value or pad with NULL
780 // so that every column's BTreeMap keeps the same rowid keyset.
781 let column_names = self
782 .columns
783 .iter()
784 .map(|col| col.column_name.to_string())
785 .collect::<Vec<String>>();
786 let mut j: usize = 0;
787 for i in 0..column_names.len() {
788 let mut val = String::from("Null");
789 let key = &column_names[i];
790
791 if let Some(supplied_key) = cols.get(j) {
792 if supplied_key == &column_names[i] {
793 val = values[j].to_string();
794 j += 1;
795 } else if self.primary_key == column_names[i] {
796 // PK already stored in the auto-assign branch above.
797 continue;
798 }
799 } else if self.primary_key == column_names[i] {
800 continue;
801 }
802
803 // Step 1: write into row storage and compute the typed Value
804 // we'll hand to the secondary index (if any).
805 let typed_value: Option<Value> = {
806 let rows_clone = Arc::clone(&self.rows);
807 let mut row_data = rows_clone.lock().expect("rows mutex poisoned");
808 let table_col_data = row_data.get_mut(key).ok_or_else(|| {
809 SQLRiteError::Internal(format!("Row storage missing for column '{key}'"))
810 })?;
811
812 match table_col_data {
813 Row::Integer(tree) => {
814 let parsed = val.parse::<i32>().map_err(|_| {
815 SQLRiteError::General(format!(
816 "Type mismatch: expected INTEGER for column '{key}', got '{val}'"
817 ))
818 })?;
819 tree.insert(next_rowid, parsed);
820 Some(Value::Integer(parsed as i64))
821 }
822 Row::Text(tree) => {
823 // Phase 7e — JSON columns also reach here (they
824 // share Row::Text storage with TEXT columns).
825 // Validate the value parses as JSON before
826 // storing; otherwise we'd happily write
827 // `not-json-at-all` and only fail when
828 // json_extract tried to parse it later.
829 if matches!(self.columns[i].datatype, DataType::Json) && val != "Null" {
830 if let Err(e) = serde_json::from_str::<serde_json::Value>(&val) {
831 return Err(SQLRiteError::General(format!(
832 "Type mismatch: expected JSON for column '{key}', got '{val}': {e}"
833 )));
834 }
835 }
836 tree.insert(next_rowid, val.to_string());
837 // "Null" sentinel stays out of the index — it isn't a
838 // real user value.
839 if val != "Null" {
840 Some(Value::Text(val.to_string()))
841 } else {
842 None
843 }
844 }
845 Row::Real(tree) => {
846 let parsed = val.parse::<f32>().map_err(|_| {
847 SQLRiteError::General(format!(
848 "Type mismatch: expected REAL for column '{key}', got '{val}'"
849 ))
850 })?;
851 tree.insert(next_rowid, parsed);
852 Some(Value::Real(parsed as f64))
853 }
854 Row::Bool(tree) => {
855 let parsed = val.parse::<bool>().map_err(|_| {
856 SQLRiteError::General(format!(
857 "Type mismatch: expected BOOL for column '{key}', got '{val}'"
858 ))
859 })?;
860 tree.insert(next_rowid, parsed);
861 Some(Value::Bool(parsed))
862 }
863 Row::Vector(tree) => {
864 // The parser put a bracket-array literal into `val`
865 // (e.g. "[0.1,0.2,0.3]"). Parse it back here and
866 // dim-check against the column's declared
867 // DataType::Vector(N).
868 let parsed = parse_vector_literal(&val).map_err(|e| {
869 SQLRiteError::General(format!(
870 "Type mismatch: expected VECTOR for column '{key}', {e}"
871 ))
872 })?;
873 let declared_dim = match &self.columns[i].datatype {
874 DataType::Vector(d) => *d,
875 other => {
876 return Err(SQLRiteError::Internal(format!(
877 "Row::Vector storage on non-Vector column '{key}' (declared as {other})"
878 )));
879 }
880 };
881 if parsed.len() != declared_dim {
882 return Err(SQLRiteError::General(format!(
883 "Vector dimension mismatch for column '{key}': declared {declared_dim}, got {}",
884 parsed.len()
885 )));
886 }
887 tree.insert(next_rowid, parsed.clone());
888 Some(Value::Vector(parsed))
889 }
890 Row::None => {
891 return Err(SQLRiteError::Internal(format!(
892 "Column '{key}' has no row storage"
893 )));
894 }
895 }
896 };
897
898 // Step 2: maintain the secondary index (if any). insert() is a
899 // no-op for Value::Null and cheap for other value kinds.
900 if let Some(v) = typed_value.clone() {
901 if let Some(idx) = self.index_for_column_mut(key) {
902 idx.insert(&v, next_rowid)?;
903 }
904 }
905
906 // Step 3 (Phase 7d.2): maintain any HNSW indexes on this column.
907 // The HNSW algorithm needs access to other rows' vectors when
908 // wiring up neighbor edges, so build a get_vec closure that
909 // pulls from the table's row storage (which we *just* updated
910 // with the new value).
911 if let Some(Value::Vector(new_vec)) = typed_value {
912 self.maintain_hnsw_on_insert(key, next_rowid, &new_vec);
913 }
914 }
915 self.last_rowid = next_rowid;
916 Ok(())
917 }
918
919 /// After a row insert, push the new (rowid, vector) into every HNSW
920 /// index whose column matches `column`. Split out of `insert_row` so
921 /// the borrowing dance — we need both `&self.rows` (read other
922 /// vectors) and `&mut self.hnsw_indexes` (insert into the graph) —
923 /// stays localized.
924 fn maintain_hnsw_on_insert(&mut self, column: &str, rowid: i64, new_vec: &[f32]) {
925 // Snapshot the current vector storage so the get_vec closure
926 // doesn't fight with `&mut self.hnsw_indexes`. For a typical
927 // HNSW insert we touch ef_construction × log(N) other vectors,
928 // so the snapshot cost is small relative to the graph wiring.
929 let mut vec_snapshot: HashMap<i64, Vec<f32>> = HashMap::new();
930 {
931 let row_data = self.rows.lock().expect("rows mutex poisoned");
932 if let Some(Row::Vector(map)) = row_data.get(column) {
933 for (id, v) in map.iter() {
934 vec_snapshot.insert(*id, v.clone());
935 }
936 }
937 }
938 // The new row was just written into row storage — make sure the
939 // snapshot reflects it (it should, but defensive).
940 vec_snapshot.insert(rowid, new_vec.to_vec());
941
942 for entry in &mut self.hnsw_indexes {
943 if entry.column_name == column {
944 entry.index.insert(rowid, new_vec, |id| {
945 vec_snapshot.get(&id).cloned().unwrap_or_default()
946 });
947 }
948 }
949 }
950
951 /// Print the table schema to standard output in a pretty formatted way.
952 ///
953 /// # Example
954 ///
955 /// ```text
956 /// let table = Table::new(payload);
957 /// table.print_table_schema();
958 ///
959 /// Prints to standard output:
960 /// +-------------+-----------+-------------+--------+----------+
961 /// | Column Name | Data Type | PRIMARY KEY | UNIQUE | NOT NULL |
962 /// +-------------+-----------+-------------+--------+----------+
963 /// | id | Integer | true | true | true |
964 /// +-------------+-----------+-------------+--------+----------+
965 /// | name | Text | false | true | false |
966 /// +-------------+-----------+-------------+--------+----------+
967 /// | email | Text | false | false | false |
968 /// +-------------+-----------+-------------+--------+----------+
969 /// ```
970 ///
971 pub fn print_table_schema(&self) -> Result<usize> {
972 let mut table = PrintTable::new();
973 table.add_row(row![
974 "Column Name",
975 "Data Type",
976 "PRIMARY KEY",
977 "UNIQUE",
978 "NOT NULL"
979 ]);
980
981 for col in &self.columns {
982 table.add_row(row![
983 col.column_name,
984 col.datatype,
985 col.is_pk,
986 col.is_unique,
987 col.not_null
988 ]);
989 }
990
991 table.printstd();
992 Ok(table.len() * 2 + 1)
993 }
994
995 /// Print the table data to standard output in a pretty formatted way.
996 ///
997 /// # Example
998 ///
999 /// ```text
1000 /// let db_table = db.get_table_mut(table_name.to_string()).unwrap();
1001 /// db_table.print_table_data();
1002 ///
1003 /// Prints to standard output:
1004 /// +----+---------+------------------------+
1005 /// | id | name | email |
1006 /// +----+---------+------------------------+
1007 /// | 1 | "Jack" | "jack@mail.com" |
1008 /// +----+---------+------------------------+
1009 /// | 10 | "Bob" | "bob@main.com" |
1010 /// +----+---------+------------------------+
1011 /// | 11 | "Bill" | "bill@main.com" |
1012 /// +----+---------+------------------------+
1013 /// ```
1014 ///
1015 pub fn print_table_data(&self) {
1016 let mut print_table = PrintTable::new();
1017
1018 let column_names = self
1019 .columns
1020 .iter()
1021 .map(|col| col.column_name.to_string())
1022 .collect::<Vec<String>>();
1023
1024 let header_row = PrintRow::new(
1025 column_names
1026 .iter()
1027 .map(|col| PrintCell::new(col))
1028 .collect::<Vec<PrintCell>>(),
1029 );
1030
1031 let rows_clone = Arc::clone(&self.rows);
1032 let row_data = rows_clone.lock().expect("rows mutex poisoned");
1033 let first_col_data = row_data
1034 .get(&self.columns.first().unwrap().column_name)
1035 .unwrap();
1036 let num_rows = first_col_data.count();
1037 let mut print_table_rows: Vec<PrintRow> = vec![PrintRow::new(vec![]); num_rows];
1038
1039 for col_name in &column_names {
1040 let col_val = row_data
1041 .get(col_name)
1042 .expect("Can't find any rows with the given column");
1043 let columns: Vec<String> = col_val.get_serialized_col_data();
1044
1045 for i in 0..num_rows {
1046 if let Some(cell) = &columns.get(i) {
1047 print_table_rows[i].add_cell(PrintCell::new(cell));
1048 } else {
1049 print_table_rows[i].add_cell(PrintCell::new(""));
1050 }
1051 }
1052 }
1053
1054 print_table.add_row(header_row);
1055 for row in print_table_rows {
1056 print_table.add_row(row);
1057 }
1058
1059 print_table.printstd();
1060 }
1061}
1062
1063/// The schema for each SQL column in every table.
1064///
1065/// Per-column index state moved to `Table::secondary_indexes` in Phase 3e —
1066/// a single `Column` describes the declared schema (name, type, constraints)
1067/// and nothing more.
1068#[derive(PartialEq, Debug, Clone)]
1069pub struct Column {
1070 pub column_name: String,
1071 pub datatype: DataType,
1072 pub is_pk: bool,
1073 pub not_null: bool,
1074 pub is_unique: bool,
1075}
1076
1077impl Column {
1078 pub fn new(
1079 name: String,
1080 datatype: String,
1081 is_pk: bool,
1082 not_null: bool,
1083 is_unique: bool,
1084 ) -> Self {
1085 let dt = DataType::new(datatype);
1086 Column {
1087 column_name: name,
1088 datatype: dt,
1089 is_pk,
1090 not_null,
1091 is_unique,
1092 }
1093 }
1094}
1095
1096/// The schema for each SQL row in every table is represented in memory
1097/// by following structure
1098///
1099/// This is an enum representing each of the available types organized in a BTreeMap
1100/// data structure, using the ROWID and key and each corresponding type as value
1101#[derive(PartialEq, Debug, Clone)]
1102pub enum Row {
1103 Integer(BTreeMap<i64, i32>),
1104 Text(BTreeMap<i64, String>),
1105 Real(BTreeMap<i64, f32>),
1106 Bool(BTreeMap<i64, bool>),
1107 /// Phase 7a: dense f32 vector storage. Each `Vec<f32>` should have
1108 /// length matching the column's declared `DataType::Vector(dim)`,
1109 /// enforced at INSERT time. The Row variant doesn't carry the dim —
1110 /// it lives in the column metadata.
1111 Vector(BTreeMap<i64, Vec<f32>>),
1112 None,
1113}
1114
1115impl Row {
1116 fn get_serialized_col_data(&self) -> Vec<String> {
1117 match self {
1118 Row::Integer(cd) => cd.values().map(|v| v.to_string()).collect(),
1119 Row::Real(cd) => cd.values().map(|v| v.to_string()).collect(),
1120 Row::Text(cd) => cd.values().map(|v| v.to_string()).collect(),
1121 Row::Bool(cd) => cd.values().map(|v| v.to_string()).collect(),
1122 Row::Vector(cd) => cd.values().map(format_vector_for_display).collect(),
1123 Row::None => panic!("Found None in columns"),
1124 }
1125 }
1126
1127 fn count(&self) -> usize {
1128 match self {
1129 Row::Integer(cd) => cd.len(),
1130 Row::Real(cd) => cd.len(),
1131 Row::Text(cd) => cd.len(),
1132 Row::Bool(cd) => cd.len(),
1133 Row::Vector(cd) => cd.len(),
1134 Row::None => panic!("Found None in columns"),
1135 }
1136 }
1137
1138 /// Every column's BTreeMap is keyed by ROWID. All columns share the same keyset
1139 /// after an INSERT (missing columns are padded), so any column's keys are a valid
1140 /// iteration of the table's rowids.
1141 pub fn rowids(&self) -> Vec<i64> {
1142 match self {
1143 Row::Integer(m) => m.keys().copied().collect(),
1144 Row::Text(m) => m.keys().copied().collect(),
1145 Row::Real(m) => m.keys().copied().collect(),
1146 Row::Bool(m) => m.keys().copied().collect(),
1147 Row::Vector(m) => m.keys().copied().collect(),
1148 Row::None => vec![],
1149 }
1150 }
1151
1152 pub fn get(&self, rowid: i64) -> Option<Value> {
1153 match self {
1154 Row::Integer(m) => m.get(&rowid).map(|v| Value::Integer(i64::from(*v))),
1155 // INSERT stores the literal string "Null" in Text columns that were omitted
1156 // from the query — re-map that back to a real NULL on read.
1157 Row::Text(m) => m.get(&rowid).map(|v| {
1158 if v == "Null" {
1159 Value::Null
1160 } else {
1161 Value::Text(v.clone())
1162 }
1163 }),
1164 Row::Real(m) => m.get(&rowid).map(|v| Value::Real(f64::from(*v))),
1165 Row::Bool(m) => m.get(&rowid).map(|v| Value::Bool(*v)),
1166 Row::Vector(m) => m.get(&rowid).map(|v| Value::Vector(v.clone())),
1167 Row::None => None,
1168 }
1169 }
1170}
1171
1172/// Render a vector for human display. Used by both `Row::get_serialized_col_data`
1173/// (for the REPL's print-table path) and `Value::to_display_string`.
1174///
1175/// Format: `[0.1, 0.2, 0.3]` — JSON-like, decimal-minimal via `{}` Display.
1176/// For high-dimensional vectors (e.g. 384 elements) this produces a long
1177/// line; truncation ellipsis is a future polish (see Phase 7 plan, "What
1178/// this proposal does NOT commit to").
1179fn format_vector_for_display(v: &Vec<f32>) -> String {
1180 let mut s = String::with_capacity(v.len() * 6 + 2);
1181 s.push('[');
1182 for (i, x) in v.iter().enumerate() {
1183 if i > 0 {
1184 s.push_str(", ");
1185 }
1186 // Default f32 Display picks the minimal-roundtrip representation,
1187 // so 0.1f32 prints as "0.1" not "0.10000000149011612". Good enough.
1188 s.push_str(&x.to_string());
1189 }
1190 s.push(']');
1191 s
1192}
1193
1194/// Runtime value produced by query execution. Separate from the on-disk `Row` enum
1195/// so the executor can carry typed values (including NULL) across operators.
1196#[derive(Debug, Clone, PartialEq)]
1197pub enum Value {
1198 Integer(i64),
1199 Text(String),
1200 Real(f64),
1201 Bool(bool),
1202 /// Phase 7a: dense f32 vector as a runtime value. Carries its own
1203 /// dimension implicitly via `Vec::len`; the column it's being
1204 /// assigned to has a declared `DataType::Vector(N)` that's checked
1205 /// at INSERT/UPDATE time.
1206 Vector(Vec<f32>),
1207 Null,
1208}
1209
1210impl Value {
1211 pub fn to_display_string(&self) -> String {
1212 match self {
1213 Value::Integer(v) => v.to_string(),
1214 Value::Text(s) => s.clone(),
1215 Value::Real(f) => f.to_string(),
1216 Value::Bool(b) => b.to_string(),
1217 Value::Vector(v) => format_vector_for_display(v),
1218 Value::Null => String::from("NULL"),
1219 }
1220 }
1221}
1222
1223/// Parse a bracket-array literal like `"[0.1, 0.2, 0.3]"` (or `"[1, 2, 3]"`)
1224/// into a `Vec<f32>`. The parser/insert pipeline stores vector literals as
1225/// strings in `InsertQuery::rows` (a `Vec<Vec<String>>`); this helper is
1226/// the inverse — turn the string back into a typed vector at the boundary
1227/// where we actually need element-typed data.
1228///
1229/// Accepts:
1230/// - `[]` → empty vector (caller's dimension check rejects it for VECTOR(N≥1))
1231/// - `[0.1, 0.2, 0.3]` → standard float syntax
1232/// - `[1, 2, 3]` → integers, coerced to f32 (matches `VALUES (1, 2)` for
1233/// `REAL` columns; we widen ints to floats automatically)
1234/// - whitespace tolerated everywhere (Python/JSON/pgvector convention)
1235///
1236/// Rejects with a descriptive message:
1237/// - missing `[` / `]`
1238/// - non-numeric elements (`['foo', 0.1]`)
1239/// - NaN / Inf literals (we accept them via `f32::from_str` but caller can
1240/// reject if undesired — for now we let them through; HNSW etc. will
1241/// reject NaN at index time)
1242pub fn parse_vector_literal(s: &str) -> Result<Vec<f32>> {
1243 let trimmed = s.trim();
1244 if !trimmed.starts_with('[') || !trimmed.ends_with(']') {
1245 return Err(SQLRiteError::General(format!(
1246 "expected bracket-array literal `[...]`, got `{s}`"
1247 )));
1248 }
1249 let inner = &trimmed[1..trimmed.len() - 1].trim();
1250 if inner.is_empty() {
1251 return Ok(Vec::new());
1252 }
1253 let mut out = Vec::new();
1254 for (i, part) in inner.split(',').enumerate() {
1255 let element = part.trim();
1256 let parsed: f32 = element.parse().map_err(|_| {
1257 SQLRiteError::General(format!("vector element {i} (`{element}`) is not a number"))
1258 })?;
1259 out.push(parsed);
1260 }
1261 Ok(out)
1262}
1263
1264#[cfg(test)]
1265mod tests {
1266 use super::*;
1267 use sqlparser::dialect::SQLiteDialect;
1268 use sqlparser::parser::Parser;
1269
1270 #[test]
1271 fn datatype_display_trait_test() {
1272 let integer = DataType::Integer;
1273 let text = DataType::Text;
1274 let real = DataType::Real;
1275 let boolean = DataType::Bool;
1276 let vector = DataType::Vector(384);
1277 let none = DataType::None;
1278 let invalid = DataType::Invalid;
1279
1280 assert_eq!(format!("{}", integer), "Integer");
1281 assert_eq!(format!("{}", text), "Text");
1282 assert_eq!(format!("{}", real), "Real");
1283 assert_eq!(format!("{}", boolean), "Boolean");
1284 assert_eq!(format!("{}", vector), "Vector(384)");
1285 assert_eq!(format!("{}", none), "None");
1286 assert_eq!(format!("{}", invalid), "Invalid");
1287 }
1288
1289 // -----------------------------------------------------------------
1290 // Phase 7a — VECTOR(N) column type
1291 // -----------------------------------------------------------------
1292
1293 #[test]
1294 fn datatype_new_parses_vector_dim() {
1295 // Standard cases.
1296 assert_eq!(DataType::new("vector(1)".to_string()), DataType::Vector(1));
1297 assert_eq!(
1298 DataType::new("vector(384)".to_string()),
1299 DataType::Vector(384)
1300 );
1301 assert_eq!(
1302 DataType::new("vector(1536)".to_string()),
1303 DataType::Vector(1536)
1304 );
1305
1306 // Case-insensitive on the keyword.
1307 assert_eq!(
1308 DataType::new("VECTOR(384)".to_string()),
1309 DataType::Vector(384)
1310 );
1311
1312 // Whitespace inside parens tolerated (the create-parser strips it
1313 // but the string-based round-trip in DataType::new is the one place
1314 // we don't fully control input formatting).
1315 assert_eq!(
1316 DataType::new("vector( 64 )".to_string()),
1317 DataType::Vector(64)
1318 );
1319 }
1320
1321 #[test]
1322 fn datatype_new_rejects_bad_vector_strings() {
1323 // dim = 0 is rejected (Q2: VECTOR(N≥1)).
1324 assert_eq!(DataType::new("vector(0)".to_string()), DataType::Invalid);
1325 // Non-numeric dim.
1326 assert_eq!(DataType::new("vector(abc)".to_string()), DataType::Invalid);
1327 // Empty parens.
1328 assert_eq!(DataType::new("vector()".to_string()), DataType::Invalid);
1329 // Negative dim wouldn't even parse as usize, so falls into Invalid.
1330 assert_eq!(DataType::new("vector(-3)".to_string()), DataType::Invalid);
1331 }
1332
1333 #[test]
1334 fn datatype_to_wire_string_round_trips_vector() {
1335 let dt = DataType::Vector(384);
1336 let wire = dt.to_wire_string();
1337 assert_eq!(wire, "vector(384)");
1338 // And feeds back through DataType::new losslessly — this is the
1339 // round-trip the ParsedColumn pipeline relies on.
1340 assert_eq!(DataType::new(wire), DataType::Vector(384));
1341 }
1342
1343 #[test]
1344 fn parse_vector_literal_accepts_floats() {
1345 let v = parse_vector_literal("[0.1, 0.2, 0.3]").expect("parse");
1346 assert_eq!(v, vec![0.1f32, 0.2, 0.3]);
1347 }
1348
1349 #[test]
1350 fn parse_vector_literal_accepts_ints_widening_to_f32() {
1351 let v = parse_vector_literal("[1, 2, 3]").expect("parse");
1352 assert_eq!(v, vec![1.0f32, 2.0, 3.0]);
1353 }
1354
1355 #[test]
1356 fn parse_vector_literal_handles_negatives_and_whitespace() {
1357 let v = parse_vector_literal("[ -1.5 , 2.0, -3.5 ]").expect("parse");
1358 assert_eq!(v, vec![-1.5f32, 2.0, -3.5]);
1359 }
1360
1361 #[test]
1362 fn parse_vector_literal_empty_brackets_is_empty_vec() {
1363 let v = parse_vector_literal("[]").expect("parse");
1364 assert!(v.is_empty());
1365 }
1366
1367 #[test]
1368 fn parse_vector_literal_rejects_non_bracketed() {
1369 assert!(parse_vector_literal("0.1, 0.2").is_err());
1370 assert!(parse_vector_literal("(0.1, 0.2)").is_err());
1371 assert!(parse_vector_literal("[0.1, 0.2").is_err()); // missing ]
1372 assert!(parse_vector_literal("0.1, 0.2]").is_err()); // missing [
1373 }
1374
1375 #[test]
1376 fn parse_vector_literal_rejects_non_numeric_elements() {
1377 let err = parse_vector_literal("[1.0, 'foo', 3.0]").unwrap_err();
1378 let msg = format!("{err}");
1379 assert!(
1380 msg.contains("vector element 1") && msg.contains("'foo'"),
1381 "error message should pinpoint the bad element: got `{msg}`"
1382 );
1383 }
1384
1385 #[test]
1386 fn value_vector_display_format() {
1387 let v = Value::Vector(vec![0.1, 0.2, 0.3]);
1388 assert_eq!(v.to_display_string(), "[0.1, 0.2, 0.3]");
1389
1390 // Empty vector displays as `[]`.
1391 let empty = Value::Vector(vec![]);
1392 assert_eq!(empty.to_display_string(), "[]");
1393 }
1394
1395 #[test]
1396 fn create_new_table_test() {
1397 let query_statement = "CREATE TABLE contacts (
1398 id INTEGER PRIMARY KEY,
1399 first_name TEXT NOT NULL,
1400 last_name TEXT NOT NULl,
1401 email TEXT NOT NULL UNIQUE,
1402 active BOOL,
1403 score REAL
1404 );";
1405 let dialect = SQLiteDialect {};
1406 let mut ast = Parser::parse_sql(&dialect, query_statement).unwrap();
1407 if ast.len() > 1 {
1408 panic!("Expected a single query statement, but there are more then 1.")
1409 }
1410 let query = ast.pop().unwrap();
1411
1412 let create_query = CreateQuery::new(&query).unwrap();
1413
1414 let table = Table::new(create_query);
1415
1416 assert_eq!(table.columns.len(), 6);
1417 assert_eq!(table.last_rowid, 0);
1418
1419 let id_column = "id".to_string();
1420 if let Some(column) = table
1421 .columns
1422 .iter()
1423 .filter(|c| c.column_name == id_column)
1424 .collect::<Vec<&Column>>()
1425 .first()
1426 {
1427 assert!(column.is_pk);
1428 assert_eq!(column.datatype, DataType::Integer);
1429 } else {
1430 panic!("column not found");
1431 }
1432 }
1433
1434 #[test]
1435 fn print_table_schema_test() {
1436 let query_statement = "CREATE TABLE contacts (
1437 id INTEGER PRIMARY KEY,
1438 first_name TEXT NOT NULL,
1439 last_name TEXT NOT NULl
1440 );";
1441 let dialect = SQLiteDialect {};
1442 let mut ast = Parser::parse_sql(&dialect, query_statement).unwrap();
1443 if ast.len() > 1 {
1444 panic!("Expected a single query statement, but there are more then 1.")
1445 }
1446 let query = ast.pop().unwrap();
1447
1448 let create_query = CreateQuery::new(&query).unwrap();
1449
1450 let table = Table::new(create_query);
1451 let lines_printed = table.print_table_schema();
1452 assert_eq!(lines_printed, Ok(9));
1453 }
1454}