1#[allow(dead_code)]
33pub mod allocator;
34#[allow(dead_code)]
35pub mod cell;
36pub mod file;
37#[allow(dead_code)]
38pub mod freelist;
39#[allow(dead_code)]
40pub mod fts_cell;
41pub mod header;
42#[allow(dead_code)]
43pub mod hnsw_cell;
44#[allow(dead_code)]
45pub mod index_cell;
46#[allow(dead_code)]
47pub mod interior_page;
48pub mod overflow;
49pub mod page;
50pub mod pager;
51#[allow(dead_code)]
52pub mod table_page;
53#[allow(dead_code)]
54pub mod varint;
55#[allow(dead_code)]
56pub mod wal;
57
58use std::collections::{BTreeMap, HashMap};
59use std::path::Path;
60use std::sync::{Arc, Mutex};
61
62use sqlparser::dialect::SQLiteDialect;
63use sqlparser::parser::Parser;
64
65use crate::error::{Result, SQLRiteError};
66use crate::sql::db::database::Database;
67use crate::sql::db::secondary_index::{IndexOrigin, SecondaryIndex};
68use crate::sql::db::table::{Column, DataType, Row, Table, Value};
69use crate::sql::pager::cell::Cell;
70use crate::sql::pager::header::DbHeader;
71use crate::sql::pager::index_cell::IndexCell;
72use crate::sql::pager::interior_page::{InteriorCell, InteriorPage};
73use crate::sql::pager::overflow::{
74 OVERFLOW_THRESHOLD, OverflowRef, PagedEntry, read_overflow_chain, write_overflow_chain,
75};
76use crate::sql::pager::page::{PAGE_HEADER_SIZE, PAGE_SIZE, PAYLOAD_PER_PAGE, PageType};
77use crate::sql::pager::pager::Pager;
78use crate::sql::pager::table_page::TablePage;
79use crate::sql::parser::create::CreateQuery;
80
81pub use crate::sql::pager::pager::AccessMode;
84
85pub const MASTER_TABLE_NAME: &str = "sqlrite_master";
88
89pub fn open_database(path: &Path, db_name: String) -> Result<Database> {
92 open_database_with_mode(path, db_name, AccessMode::ReadWrite)
93}
94
95pub fn open_database_read_only(path: &Path, db_name: String) -> Result<Database> {
101 open_database_with_mode(path, db_name, AccessMode::ReadOnly)
102}
103
104pub fn open_database_with_mode(path: &Path, db_name: String, mode: AccessMode) -> Result<Database> {
108 let pager = Pager::open_with_mode(path, mode)?;
109
110 let mut master = build_empty_master_table();
112 load_table_rows(&pager, &mut master, pager.header().schema_root_page)?;
113
114 let mut db = Database::new(db_name);
121 let mut index_rows: Vec<IndexCatalogRow> = Vec::new();
122
123 for rowid in master.rowids() {
124 let ty = take_text(&master, "type", rowid)?;
125 let name = take_text(&master, "name", rowid)?;
126 let sql = take_text(&master, "sql", rowid)?;
127 let rootpage = take_integer(&master, "rootpage", rowid)? as u32;
128 let last_rowid = take_integer(&master, "last_rowid", rowid)?;
129
130 match ty.as_str() {
131 "table" => {
132 let (parsed_name, columns) = parse_create_sql(&sql)?;
133 if parsed_name != name {
134 return Err(SQLRiteError::Internal(format!(
135 "sqlrite_master row '{name}' carries SQL for '{parsed_name}' — corrupt catalog?"
136 )));
137 }
138 let mut table = build_empty_table(&name, columns, last_rowid);
139 if rootpage != 0 {
140 load_table_rows(&pager, &mut table, rootpage)?;
141 }
142 if last_rowid > table.last_rowid {
143 table.last_rowid = last_rowid;
144 }
145 db.tables.insert(name, table);
146 }
147 "index" => {
148 index_rows.push(IndexCatalogRow {
149 name,
150 sql,
151 rootpage,
152 });
153 }
154 other => {
155 return Err(SQLRiteError::Internal(format!(
156 "sqlrite_master row '{name}' has unknown type '{other}'"
157 )));
158 }
159 }
160 }
161
162 for row in index_rows {
172 if create_index_sql_uses_hnsw(&row.sql) {
173 rebuild_hnsw_index(&mut db, &pager, &row)?;
174 } else if create_index_sql_uses_fts(&row.sql) {
175 rebuild_fts_index(&mut db, &pager, &row)?;
176 } else {
177 attach_index(&mut db, &pager, row)?;
178 }
179 }
180
181 db.source_path = Some(path.to_path_buf());
182 db.pager = Some(pager);
183 Ok(db)
184}
185
186struct IndexCatalogRow {
189 name: String,
190 sql: String,
191 rootpage: u32,
192}
193
194pub fn save_database(db: &mut Database, path: &Path) -> Result<()> {
207 save_database_with_mode(db, path, false)
208}
209
210pub fn vacuum_database(db: &mut Database, path: &Path) -> Result<()> {
218 save_database_with_mode(db, path, true)
219}
220
221fn save_database_with_mode(db: &mut Database, path: &Path, compact: bool) -> Result<()> {
226 rebuild_dirty_hnsw_indexes(db);
231 rebuild_dirty_fts_indexes(db);
233
234 let same_path = db.source_path.as_deref() == Some(path);
235 let mut pager = if same_path {
236 match db.pager.take() {
237 Some(p) => p,
238 None if path.exists() => Pager::open(path)?,
239 None => Pager::create(path)?,
240 }
241 } else if path.exists() {
242 Pager::open(path)?
243 } else {
244 Pager::create(path)?
245 };
246
247 let old_header = pager.header();
251 let old_live: std::collections::HashSet<u32> = (1..old_header.page_count).collect();
252
253 let (old_free_leaves, old_free_trunks) = if compact || old_header.freelist_head == 0 {
256 (Vec::new(), Vec::new())
257 } else {
258 crate::sql::pager::freelist::read_freelist(&pager, old_header.freelist_head)?
259 };
260
261 let old_rootpages = if compact {
265 HashMap::new()
266 } else {
267 read_old_rootpages(&pager, old_header.schema_root_page)?
268 };
269
270 pager.clear_staged();
271
272 use std::collections::VecDeque;
275 let initial_freelist: VecDeque<u32> = if compact {
276 VecDeque::new()
277 } else {
278 crate::sql::pager::freelist::freelist_to_deque(old_free_leaves.clone())
279 };
280 let mut alloc = crate::sql::pager::allocator::PageAllocator::new(initial_freelist, 1);
281
282 let mut master_rows: Vec<CatalogEntry> = Vec::new();
285
286 let mut table_names: Vec<&String> = db.tables.keys().collect();
287 table_names.sort();
288 for name in table_names {
289 if name == MASTER_TABLE_NAME {
290 return Err(SQLRiteError::Internal(format!(
291 "user table cannot be named '{MASTER_TABLE_NAME}' (reserved)"
292 )));
293 }
294 if !compact {
295 if let Some(&prev_root) = old_rootpages.get(&("table".to_string(), name.to_string())) {
296 let prev =
297 collect_pages_for_btree(&pager, prev_root, true)?;
298 alloc.set_preferred(prev);
299 }
300 }
301 let table = &db.tables[name];
302 let rootpage = stage_table_btree(&mut pager, table, &mut alloc)?;
303 alloc.finish_preferred();
304 master_rows.push(CatalogEntry {
305 kind: "table".into(),
306 name: name.clone(),
307 sql: table_to_create_sql(table),
308 rootpage,
309 last_rowid: table.last_rowid,
310 });
311 }
312
313 let mut index_entries: Vec<(&Table, &SecondaryIndex)> = Vec::new();
316 for table in db.tables.values() {
317 for idx in &table.secondary_indexes {
318 index_entries.push((table, idx));
319 }
320 }
321 index_entries
322 .sort_by(|(ta, ia), (tb, ib)| ta.tb_name.cmp(&tb.tb_name).then(ia.name.cmp(&ib.name)));
323 for (_table, idx) in index_entries {
324 if !compact {
325 if let Some(&prev_root) =
326 old_rootpages.get(&("index".to_string(), idx.name.to_string()))
327 {
328 let prev =
329 collect_pages_for_btree(&pager, prev_root, false)?;
330 alloc.set_preferred(prev);
331 }
332 }
333 let rootpage = stage_index_btree(&mut pager, idx, &mut alloc)?;
334 alloc.finish_preferred();
335 master_rows.push(CatalogEntry {
336 kind: "index".into(),
337 name: idx.name.clone(),
338 sql: idx.synthesized_sql(),
339 rootpage,
340 last_rowid: 0,
341 });
342 }
343
344 let mut hnsw_entries: Vec<(&Table, &crate::sql::db::table::HnswIndexEntry)> = Vec::new();
353 for table in db.tables.values() {
354 for entry in &table.hnsw_indexes {
355 hnsw_entries.push((table, entry));
356 }
357 }
358 hnsw_entries
359 .sort_by(|(ta, ea), (tb, eb)| ta.tb_name.cmp(&tb.tb_name).then(ea.name.cmp(&eb.name)));
360 for (table, entry) in hnsw_entries {
361 if !compact {
362 if let Some(&prev_root) =
363 old_rootpages.get(&("index".to_string(), entry.name.to_string()))
364 {
365 let prev =
366 collect_pages_for_btree(&pager, prev_root, false)?;
367 alloc.set_preferred(prev);
368 }
369 }
370 let rootpage = stage_hnsw_btree(&mut pager, &entry.index, &mut alloc)?;
371 alloc.finish_preferred();
372 master_rows.push(CatalogEntry {
373 kind: "index".into(),
374 name: entry.name.clone(),
375 sql: format!(
376 "CREATE INDEX {} ON {} USING hnsw ({})",
377 entry.name, table.tb_name, entry.column_name
378 ),
379 rootpage,
380 last_rowid: 0,
381 });
382 }
383
384 let mut fts_entries: Vec<(&Table, &crate::sql::db::table::FtsIndexEntry)> = Vec::new();
394 for table in db.tables.values() {
395 for entry in &table.fts_indexes {
396 fts_entries.push((table, entry));
397 }
398 }
399 fts_entries
400 .sort_by(|(ta, ea), (tb, eb)| ta.tb_name.cmp(&tb.tb_name).then(ea.name.cmp(&eb.name)));
401 let any_fts = !fts_entries.is_empty();
402 for (table, entry) in fts_entries {
403 if !compact {
404 if let Some(&prev_root) =
405 old_rootpages.get(&("index".to_string(), entry.name.to_string()))
406 {
407 let prev =
408 collect_pages_for_btree(&pager, prev_root, false)?;
409 alloc.set_preferred(prev);
410 }
411 }
412 let rootpage = stage_fts_btree(&mut pager, &entry.index, &mut alloc)?;
413 alloc.finish_preferred();
414 master_rows.push(CatalogEntry {
415 kind: "index".into(),
416 name: entry.name.clone(),
417 sql: format!(
418 "CREATE INDEX {} ON {} USING fts ({})",
419 entry.name, table.tb_name, entry.column_name
420 ),
421 rootpage,
422 last_rowid: 0,
423 });
424 }
425
426 let mut master = build_empty_master_table();
432 for (i, entry) in master_rows.into_iter().enumerate() {
433 let rowid = (i as i64) + 1;
434 master.restore_row(
435 rowid,
436 vec![
437 Some(Value::Text(entry.kind)),
438 Some(Value::Text(entry.name)),
439 Some(Value::Text(entry.sql)),
440 Some(Value::Integer(entry.rootpage as i64)),
441 Some(Value::Integer(entry.last_rowid)),
442 ],
443 )?;
444 }
445 if !compact && old_header.schema_root_page != 0 {
446 let prev = collect_pages_for_btree(
447 &pager,
448 old_header.schema_root_page,
449 true,
450 )?;
451 alloc.set_preferred(prev);
452 }
453 let master_root = stage_table_btree(&mut pager, &master, &mut alloc)?;
454 alloc.finish_preferred();
455
456 if !compact {
466 let used = alloc.used().clone();
467 let mut newly_freed: Vec<u32> = old_live
468 .iter()
469 .copied()
470 .filter(|p| !used.contains(p))
471 .collect();
472 let _ = &old_free_trunks; alloc.add_to_freelist(newly_freed.drain(..));
474 }
475
476 let new_free_pages = alloc.drain_freelist();
483 let new_freelist_head =
484 crate::sql::pager::freelist::stage_freelist(&mut pager, new_free_pages)?;
485
486 use crate::sql::pager::header::{FORMAT_VERSION_V5, FORMAT_VERSION_V6};
490 let format_version = if new_freelist_head != 0 {
491 FORMAT_VERSION_V6
492 } else if any_fts {
493 std::cmp::max(FORMAT_VERSION_V5, old_header.format_version)
496 } else {
497 old_header.format_version
499 };
500
501 pager.commit(DbHeader {
502 page_count: alloc.high_water(),
503 schema_root_page: master_root,
504 format_version,
505 freelist_head: new_freelist_head,
506 })?;
507
508 if same_path {
509 db.pager = Some(pager);
510 }
511 Ok(())
512}
513
514struct CatalogEntry {
516 kind: String, name: String,
518 sql: String,
519 rootpage: u32,
520 last_rowid: i64,
521}
522
523fn build_empty_master_table() -> Table {
527 let columns = vec![
530 Column::new("type".into(), "text".into(), false, true, false),
531 Column::new("name".into(), "text".into(), true, true, true),
532 Column::new("sql".into(), "text".into(), false, true, false),
533 Column::new("rootpage".into(), "integer".into(), false, true, false),
534 Column::new("last_rowid".into(), "integer".into(), false, true, false),
535 ];
536 build_empty_table(MASTER_TABLE_NAME, columns, 0)
537}
538
539fn take_text(table: &Table, col: &str, rowid: i64) -> Result<String> {
541 match table.get_value(col, rowid) {
542 Some(Value::Text(s)) => Ok(s),
543 other => Err(SQLRiteError::Internal(format!(
544 "sqlrite_master column '{col}' at rowid {rowid}: expected Text, got {other:?}"
545 ))),
546 }
547}
548
549fn take_integer(table: &Table, col: &str, rowid: i64) -> Result<i64> {
551 match table.get_value(col, rowid) {
552 Some(Value::Integer(v)) => Ok(v),
553 other => Err(SQLRiteError::Internal(format!(
554 "sqlrite_master column '{col}' at rowid {rowid}: expected Integer, got {other:?}"
555 ))),
556 }
557}
558
559fn table_to_create_sql(table: &Table) -> String {
565 let mut parts = Vec::with_capacity(table.columns.len());
566 for c in &table.columns {
567 let ty: String = match &c.datatype {
571 DataType::Integer => "INTEGER".to_string(),
572 DataType::Text => "TEXT".to_string(),
573 DataType::Real => "REAL".to_string(),
574 DataType::Bool => "BOOLEAN".to_string(),
575 DataType::Vector(dim) => format!("VECTOR({dim})"),
576 DataType::Json => "JSON".to_string(),
577 DataType::None | DataType::Invalid => "TEXT".to_string(),
578 };
579 let mut piece = format!("{} {}", c.column_name, ty);
580 if c.is_pk {
581 piece.push_str(" PRIMARY KEY");
582 } else {
583 if c.is_unique {
584 piece.push_str(" UNIQUE");
585 }
586 if c.not_null {
587 piece.push_str(" NOT NULL");
588 }
589 }
590 if let Some(default) = &c.default {
591 piece.push_str(" DEFAULT ");
592 piece.push_str(&render_default_literal(default));
593 }
594 parts.push(piece);
595 }
596 format!("CREATE TABLE {} ({});", table.tb_name, parts.join(", "))
597}
598
599fn render_default_literal(value: &Value) -> String {
605 match value {
606 Value::Integer(i) => i.to_string(),
607 Value::Real(f) => f.to_string(),
608 Value::Bool(b) => {
609 if *b {
610 "TRUE".to_string()
611 } else {
612 "FALSE".to_string()
613 }
614 }
615 Value::Text(s) => format!("'{}'", s.replace('\'', "''")),
616 Value::Null => "NULL".to_string(),
617 Value::Vector(_) => value.to_display_string(),
618 }
619}
620
621fn parse_create_sql(sql: &str) -> Result<(String, Vec<Column>)> {
624 let dialect = SQLiteDialect {};
625 let mut ast = Parser::parse_sql(&dialect, sql).map_err(SQLRiteError::from)?;
626 let stmt = ast.pop().ok_or_else(|| {
627 SQLRiteError::Internal("sqlrite_master row held an empty SQL string".to_string())
628 })?;
629 let create = CreateQuery::new(&stmt)?;
630 let columns = create
631 .columns
632 .into_iter()
633 .map(|pc| {
634 Column::with_default(
635 pc.name,
636 pc.datatype,
637 pc.is_pk,
638 pc.not_null,
639 pc.is_unique,
640 pc.default,
641 )
642 })
643 .collect();
644 Ok((create.table_name, columns))
645}
646
647fn build_empty_table(name: &str, columns: Vec<Column>, last_rowid: i64) -> Table {
652 let rows: Arc<Mutex<HashMap<String, Row>>> = Arc::new(Mutex::new(HashMap::new()));
653 let mut secondary_indexes: Vec<SecondaryIndex> = Vec::new();
654 {
655 let mut map = rows.lock().expect("rows mutex poisoned");
656 for col in &columns {
657 let row = match &col.datatype {
664 DataType::Integer => Row::Integer(BTreeMap::new()),
665 DataType::Text => Row::Text(BTreeMap::new()),
666 DataType::Real => Row::Real(BTreeMap::new()),
667 DataType::Bool => Row::Bool(BTreeMap::new()),
668 DataType::Vector(_dim) => Row::Vector(BTreeMap::new()),
669 DataType::Json => Row::Text(BTreeMap::new()),
672 DataType::None | DataType::Invalid => Row::None,
673 };
674 map.insert(col.column_name.clone(), row);
675
676 if (col.is_pk || col.is_unique)
679 && matches!(col.datatype, DataType::Integer | DataType::Text)
680 {
681 if let Ok(idx) = SecondaryIndex::new(
682 SecondaryIndex::auto_name(name, &col.column_name),
683 name.to_string(),
684 col.column_name.clone(),
685 &col.datatype,
686 true,
687 IndexOrigin::Auto,
688 ) {
689 secondary_indexes.push(idx);
690 }
691 }
692 }
693 }
694
695 let primary_key = columns
696 .iter()
697 .find(|c| c.is_pk)
698 .map(|c| c.column_name.clone())
699 .unwrap_or_else(|| "-1".to_string());
700
701 Table {
702 tb_name: name.to_string(),
703 columns,
704 rows,
705 secondary_indexes,
706 hnsw_indexes: Vec::new(),
714 fts_indexes: Vec::new(),
719 last_rowid,
720 primary_key,
721 }
722}
723
724fn attach_index(db: &mut Database, pager: &Pager, row: IndexCatalogRow) -> Result<()> {
739 let (table_name, column_name, is_unique) = parse_create_index_sql(&row.sql)?;
740
741 let table = db.get_table_mut(table_name.clone()).map_err(|_| {
742 SQLRiteError::Internal(format!(
743 "index '{}' references unknown table '{table_name}' (sqlrite_master out of sync?)",
744 row.name
745 ))
746 })?;
747 let datatype = table
748 .columns
749 .iter()
750 .find(|c| c.column_name == column_name)
751 .map(|c| clone_datatype(&c.datatype))
752 .ok_or_else(|| {
753 SQLRiteError::Internal(format!(
754 "index '{}' references unknown column '{column_name}' on '{table_name}'",
755 row.name
756 ))
757 })?;
758
759 let existing_slot = table
763 .secondary_indexes
764 .iter()
765 .position(|i| i.name == row.name);
766 let idx = match existing_slot {
767 Some(i) => {
768 table.secondary_indexes.remove(i)
772 }
773 None => SecondaryIndex::new(
774 row.name.clone(),
775 table_name.clone(),
776 column_name.clone(),
777 &datatype,
778 is_unique,
779 IndexOrigin::Explicit,
780 )?,
781 };
782 let mut idx = idx;
783 let is_unique_flag = idx.is_unique;
785 let origin = idx.origin;
786 idx = SecondaryIndex::new(
787 idx.name,
788 idx.table_name,
789 idx.column_name,
790 &datatype,
791 is_unique_flag,
792 origin,
793 )?;
794
795 load_index_rows(pager, &mut idx, row.rootpage)?;
797
798 table.secondary_indexes.push(idx);
799 Ok(())
800}
801
802fn load_index_rows(pager: &Pager, idx: &mut SecondaryIndex, root_page: u32) -> Result<()> {
805 if root_page == 0 {
806 return Ok(());
807 }
808 let first_leaf = find_leftmost_leaf(pager, root_page)?;
809 let mut current = first_leaf;
810 while current != 0 {
811 let page_buf = pager
812 .read_page(current)
813 .ok_or_else(|| SQLRiteError::Internal(format!("missing index leaf page {current}")))?;
814 if page_buf[0] != PageType::TableLeaf as u8 {
815 return Err(SQLRiteError::Internal(format!(
816 "page {current} tagged {} but expected TableLeaf (index)",
817 page_buf[0]
818 )));
819 }
820 let next_leaf = u32::from_le_bytes(page_buf[1..5].try_into().unwrap());
821 let payload: &[u8; PAYLOAD_PER_PAGE] = (&page_buf[PAGE_HEADER_SIZE..])
822 .try_into()
823 .map_err(|_| SQLRiteError::Internal("index leaf payload size".to_string()))?;
824 let leaf = TablePage::from_bytes(payload);
825
826 for slot in 0..leaf.slot_count() {
827 let offset = leaf.slot_offset_raw(slot)?;
829 let (ic, _) = IndexCell::decode(leaf.as_bytes(), offset)?;
830 idx.insert(&ic.value, ic.rowid)?;
831 }
832 current = next_leaf;
833 }
834 Ok(())
835}
836
837fn parse_create_index_sql(sql: &str) -> Result<(String, String, bool)> {
843 use sqlparser::ast::{CreateIndex, Expr, Statement};
844
845 let dialect = SQLiteDialect {};
846 let mut ast = Parser::parse_sql(&dialect, sql).map_err(SQLRiteError::from)?;
847 let Some(Statement::CreateIndex(CreateIndex {
848 table_name,
849 columns,
850 unique,
851 ..
852 })) = ast.pop()
853 else {
854 return Err(SQLRiteError::Internal(format!(
855 "sqlrite_master index row's SQL isn't a CREATE INDEX: {sql}"
856 )));
857 };
858 if columns.len() != 1 {
859 return Err(SQLRiteError::NotImplemented(
860 "multi-column indexes aren't supported yet".to_string(),
861 ));
862 }
863 let col = match &columns[0].column.expr {
864 Expr::Identifier(ident) => ident.value.clone(),
865 Expr::CompoundIdentifier(parts) => {
866 parts.last().map(|p| p.value.clone()).unwrap_or_default()
867 }
868 other => {
869 return Err(SQLRiteError::Internal(format!(
870 "unsupported indexed column expression: {other:?}"
871 )));
872 }
873 };
874 Ok((table_name.to_string(), col, unique))
875}
876
877fn create_index_sql_uses_hnsw(sql: &str) -> bool {
883 use sqlparser::ast::{CreateIndex, IndexType, Statement};
884
885 let dialect = SQLiteDialect {};
886 let Ok(mut ast) = Parser::parse_sql(&dialect, sql) else {
887 return false;
888 };
889 let Some(Statement::CreateIndex(CreateIndex { using, .. })) = ast.pop() else {
890 return false;
891 };
892 matches!(using, Some(IndexType::Custom(ident)) if ident.value.eq_ignore_ascii_case("hnsw"))
893}
894
895fn create_index_sql_uses_fts(sql: &str) -> bool {
898 use sqlparser::ast::{CreateIndex, IndexType, Statement};
899
900 let dialect = SQLiteDialect {};
901 let Ok(mut ast) = Parser::parse_sql(&dialect, sql) else {
902 return false;
903 };
904 let Some(Statement::CreateIndex(CreateIndex { using, .. })) = ast.pop() else {
905 return false;
906 };
907 matches!(using, Some(IndexType::Custom(ident)) if ident.value.eq_ignore_ascii_case("fts"))
908}
909
910fn rebuild_fts_index(db: &mut Database, pager: &Pager, row: &IndexCatalogRow) -> Result<()> {
923 use crate::sql::db::table::FtsIndexEntry;
924 use crate::sql::executor::execute_create_index;
925 use crate::sql::fts::PostingList;
926 use sqlparser::ast::Statement;
927
928 let dialect = SQLiteDialect {};
929 let mut ast = Parser::parse_sql(&dialect, &row.sql).map_err(SQLRiteError::from)?;
930 let Some(stmt @ Statement::CreateIndex(_)) = ast.pop() else {
931 return Err(SQLRiteError::Internal(format!(
932 "sqlrite_master FTS row's SQL isn't a CREATE INDEX: {}",
933 row.sql
934 )));
935 };
936
937 if row.rootpage == 0 {
938 execute_create_index(&stmt, db)?;
940 return Ok(());
941 }
942
943 let (doc_lengths, postings) = load_fts_postings(pager, row.rootpage)?;
944 let index = PostingList::from_persisted_postings(doc_lengths, postings);
945 let (tbl_name, col_name) = parse_fts_create_index_sql(&row.sql)?;
946 let table_mut = db.get_table_mut(tbl_name.clone()).map_err(|_| {
947 SQLRiteError::Internal(format!(
948 "FTS index '{}' references unknown table '{tbl_name}'",
949 row.name
950 ))
951 })?;
952 table_mut.fts_indexes.push(FtsIndexEntry {
953 name: row.name.clone(),
954 column_name: col_name,
955 index,
956 needs_rebuild: false,
957 });
958 Ok(())
959}
960
961fn parse_fts_create_index_sql(sql: &str) -> Result<(String, String)> {
964 use sqlparser::ast::{CreateIndex, Expr, Statement};
965
966 let dialect = SQLiteDialect {};
967 let mut ast = Parser::parse_sql(&dialect, sql).map_err(SQLRiteError::from)?;
968 let Some(Statement::CreateIndex(CreateIndex {
969 table_name,
970 columns,
971 ..
972 })) = ast.pop()
973 else {
974 return Err(SQLRiteError::Internal(format!(
975 "sqlrite_master FTS row's SQL isn't a CREATE INDEX: {sql}"
976 )));
977 };
978 if columns.len() != 1 {
979 return Err(SQLRiteError::NotImplemented(
980 "multi-column FTS indexes aren't supported yet".to_string(),
981 ));
982 }
983 let col = match &columns[0].column.expr {
984 Expr::Identifier(ident) => ident.value.clone(),
985 Expr::CompoundIdentifier(parts) => {
986 parts.last().map(|p| p.value.clone()).unwrap_or_default()
987 }
988 other => {
989 return Err(SQLRiteError::Internal(format!(
990 "FTS CREATE INDEX has unexpected column expr: {other:?}"
991 )));
992 }
993 };
994 Ok((table_name.to_string(), col))
995}
996
997fn rebuild_hnsw_index(db: &mut Database, pager: &Pager, row: &IndexCatalogRow) -> Result<()> {
1010 use crate::sql::db::table::HnswIndexEntry;
1011 use crate::sql::executor::execute_create_index;
1012 use crate::sql::hnsw::{DistanceMetric, HnswIndex};
1013 use sqlparser::ast::Statement;
1014
1015 let dialect = SQLiteDialect {};
1016 let mut ast = Parser::parse_sql(&dialect, &row.sql).map_err(SQLRiteError::from)?;
1017 let Some(stmt @ Statement::CreateIndex(_)) = ast.pop() else {
1018 return Err(SQLRiteError::Internal(format!(
1019 "sqlrite_master HNSW row's SQL isn't a CREATE INDEX: {}",
1020 row.sql
1021 )));
1022 };
1023
1024 if row.rootpage == 0 {
1025 execute_create_index(&stmt, db)?;
1027 return Ok(());
1028 }
1029
1030 let nodes = load_hnsw_nodes(pager, row.rootpage)?;
1032 let index = HnswIndex::from_persisted_nodes(DistanceMetric::L2, 0xC0FFEE, nodes);
1033
1034 let (tbl_name, col_name) = parse_hnsw_create_index_sql(&row.sql)?;
1037 let table_mut = db.get_table_mut(tbl_name.clone()).map_err(|_| {
1038 SQLRiteError::Internal(format!(
1039 "HNSW index '{}' references unknown table '{tbl_name}'",
1040 row.name
1041 ))
1042 })?;
1043 table_mut.hnsw_indexes.push(HnswIndexEntry {
1044 name: row.name.clone(),
1045 column_name: col_name,
1046 index,
1047 needs_rebuild: false,
1048 });
1049 Ok(())
1050}
1051
1052fn load_hnsw_nodes(pager: &Pager, root_page: u32) -> Result<Vec<(i64, Vec<Vec<i64>>)>> {
1058 use crate::sql::pager::hnsw_cell::HnswNodeCell;
1059
1060 let mut nodes: Vec<(i64, Vec<Vec<i64>>)> = Vec::new();
1061 let first_leaf = find_leftmost_leaf(pager, root_page)?;
1062 let mut current = first_leaf;
1063 while current != 0 {
1064 let page_buf = pager
1065 .read_page(current)
1066 .ok_or_else(|| SQLRiteError::Internal(format!("missing HNSW leaf page {current}")))?;
1067 if page_buf[0] != PageType::TableLeaf as u8 {
1068 return Err(SQLRiteError::Internal(format!(
1069 "page {current} tagged {} but expected TableLeaf (HNSW)",
1070 page_buf[0]
1071 )));
1072 }
1073 let next_leaf = u32::from_le_bytes(page_buf[1..5].try_into().unwrap());
1074 let payload: &[u8; PAYLOAD_PER_PAGE] = (&page_buf[PAGE_HEADER_SIZE..])
1075 .try_into()
1076 .map_err(|_| SQLRiteError::Internal("HNSW leaf payload size".to_string()))?;
1077 let leaf = TablePage::from_bytes(payload);
1078 for slot in 0..leaf.slot_count() {
1079 let offset = leaf.slot_offset_raw(slot)?;
1080 let (cell, _) = HnswNodeCell::decode(leaf.as_bytes(), offset)?;
1081 nodes.push((cell.node_id, cell.layers));
1082 }
1083 current = next_leaf;
1084 }
1085 Ok(nodes)
1086}
1087
1088fn parse_hnsw_create_index_sql(sql: &str) -> Result<(String, String)> {
1094 use sqlparser::ast::{CreateIndex, Expr, Statement};
1095
1096 let dialect = SQLiteDialect {};
1097 let mut ast = Parser::parse_sql(&dialect, sql).map_err(SQLRiteError::from)?;
1098 let Some(Statement::CreateIndex(CreateIndex {
1099 table_name,
1100 columns,
1101 ..
1102 })) = ast.pop()
1103 else {
1104 return Err(SQLRiteError::Internal(format!(
1105 "sqlrite_master HNSW row's SQL isn't a CREATE INDEX: {sql}"
1106 )));
1107 };
1108 if columns.len() != 1 {
1109 return Err(SQLRiteError::NotImplemented(
1110 "multi-column HNSW indexes aren't supported yet".to_string(),
1111 ));
1112 }
1113 let col = match &columns[0].column.expr {
1114 Expr::Identifier(ident) => ident.value.clone(),
1115 Expr::CompoundIdentifier(parts) => {
1116 parts.last().map(|p| p.value.clone()).unwrap_or_default()
1117 }
1118 other => {
1119 return Err(SQLRiteError::Internal(format!(
1120 "unsupported HNSW indexed column expression: {other:?}"
1121 )));
1122 }
1123 };
1124 Ok((table_name.to_string(), col))
1125}
1126
1127fn rebuild_dirty_hnsw_indexes(db: &mut Database) {
1139 use crate::sql::hnsw::{DistanceMetric, HnswIndex};
1140
1141 for table in db.tables.values_mut() {
1142 let dirty: Vec<(String, String)> = table
1146 .hnsw_indexes
1147 .iter()
1148 .filter(|e| e.needs_rebuild)
1149 .map(|e| (e.name.clone(), e.column_name.clone()))
1150 .collect();
1151 if dirty.is_empty() {
1152 continue;
1153 }
1154
1155 for (idx_name, col_name) in dirty {
1156 let mut vectors: Vec<(i64, Vec<f32>)> = Vec::new();
1158 {
1159 let row_data = table.rows.lock().expect("rows mutex poisoned");
1160 if let Some(Row::Vector(map)) = row_data.get(&col_name) {
1161 for (id, v) in map.iter() {
1162 vectors.push((*id, v.clone()));
1163 }
1164 }
1165 }
1166 let snapshot: std::collections::HashMap<i64, Vec<f32>> =
1169 vectors.iter().cloned().collect();
1170
1171 let mut new_idx = HnswIndex::new(DistanceMetric::L2, 0xC0FFEE);
1172 vectors.sort_by_key(|(id, _)| *id);
1174 for (id, v) in &vectors {
1175 new_idx.insert(*id, v, |q| snapshot.get(&q).cloned().unwrap_or_default());
1176 }
1177
1178 if let Some(entry) = table.hnsw_indexes.iter_mut().find(|e| e.name == idx_name) {
1180 entry.index = new_idx;
1181 entry.needs_rebuild = false;
1182 }
1183 }
1184 }
1185}
1186
1187fn rebuild_dirty_fts_indexes(db: &mut Database) {
1192 use crate::sql::fts::PostingList;
1193
1194 for table in db.tables.values_mut() {
1195 let dirty: Vec<(String, String)> = table
1196 .fts_indexes
1197 .iter()
1198 .filter(|e| e.needs_rebuild)
1199 .map(|e| (e.name.clone(), e.column_name.clone()))
1200 .collect();
1201 if dirty.is_empty() {
1202 continue;
1203 }
1204
1205 for (idx_name, col_name) in dirty {
1206 let mut docs: Vec<(i64, String)> = Vec::new();
1209 {
1210 let row_data = table.rows.lock().expect("rows mutex poisoned");
1211 if let Some(Row::Text(map)) = row_data.get(&col_name) {
1212 for (id, v) in map.iter() {
1213 if v != "Null" {
1219 docs.push((*id, v.clone()));
1220 }
1221 }
1222 }
1223 }
1224
1225 let mut new_idx = PostingList::new();
1226 docs.sort_by_key(|(id, _)| *id);
1231 for (id, text) in &docs {
1232 new_idx.insert(*id, text);
1233 }
1234
1235 if let Some(entry) = table.fts_indexes.iter_mut().find(|e| e.name == idx_name) {
1236 entry.index = new_idx;
1237 entry.needs_rebuild = false;
1238 }
1239 }
1240 }
1241}
1242
1243fn clone_datatype(dt: &DataType) -> DataType {
1245 match dt {
1246 DataType::Integer => DataType::Integer,
1247 DataType::Text => DataType::Text,
1248 DataType::Real => DataType::Real,
1249 DataType::Bool => DataType::Bool,
1250 DataType::Vector(dim) => DataType::Vector(*dim),
1251 DataType::Json => DataType::Json,
1252 DataType::None => DataType::None,
1253 DataType::Invalid => DataType::Invalid,
1254 }
1255}
1256
1257fn stage_index_btree(
1266 pager: &mut Pager,
1267 idx: &SecondaryIndex,
1268 alloc: &mut crate::sql::pager::allocator::PageAllocator,
1269) -> Result<u32> {
1270 let leaves = stage_index_leaves(pager, idx, alloc)?;
1272 if leaves.len() == 1 {
1273 return Ok(leaves[0].0);
1274 }
1275 let mut level: Vec<(u32, i64)> = leaves;
1276 while level.len() > 1 {
1277 level = stage_interior_level(pager, &level, alloc)?;
1278 }
1279 Ok(level[0].0)
1280}
1281
1282fn stage_index_leaves(
1289 pager: &mut Pager,
1290 idx: &SecondaryIndex,
1291 alloc: &mut crate::sql::pager::allocator::PageAllocator,
1292) -> Result<Vec<(u32, i64)>> {
1293 let mut leaves: Vec<(u32, i64)> = Vec::new();
1294 let mut current_leaf = TablePage::empty();
1295 let mut current_leaf_page = alloc.allocate();
1296 let mut current_max_rowid: Option<i64> = None;
1297
1298 let mut entries: Vec<(Value, i64)> = idx.iter_entries().collect();
1302 entries.sort_by_key(|(_, r)| *r);
1303
1304 for (value, rowid) in entries {
1305 let cell = IndexCell::new(rowid, value);
1306 let entry_bytes = cell.encode()?;
1307
1308 if !current_leaf.would_fit(entry_bytes.len()) {
1309 let next_leaf_page_num = alloc.allocate();
1310 emit_leaf(pager, current_leaf_page, ¤t_leaf, next_leaf_page_num);
1311 leaves.push((current_leaf_page, current_max_rowid.unwrap_or(i64::MIN)));
1312 current_leaf = TablePage::empty();
1313 current_leaf_page = next_leaf_page_num;
1314
1315 if !current_leaf.would_fit(entry_bytes.len()) {
1316 return Err(SQLRiteError::Internal(format!(
1317 "index entry of {} bytes exceeds empty-page capacity {}",
1318 entry_bytes.len(),
1319 current_leaf.free_space()
1320 )));
1321 }
1322 }
1323 current_leaf.insert_entry(rowid, &entry_bytes)?;
1324 current_max_rowid = Some(rowid);
1325 }
1326
1327 emit_leaf(pager, current_leaf_page, ¤t_leaf, 0);
1328 leaves.push((current_leaf_page, current_max_rowid.unwrap_or(i64::MIN)));
1329 Ok(leaves)
1330}
1331
1332fn stage_hnsw_btree(
1343 pager: &mut Pager,
1344 idx: &crate::sql::hnsw::HnswIndex,
1345 alloc: &mut crate::sql::pager::allocator::PageAllocator,
1346) -> Result<u32> {
1347 let leaves = stage_hnsw_leaves(pager, idx, alloc)?;
1348 if leaves.len() == 1 {
1349 return Ok(leaves[0].0);
1350 }
1351 let mut level: Vec<(u32, i64)> = leaves;
1352 while level.len() > 1 {
1353 level = stage_interior_level(pager, &level, alloc)?;
1354 }
1355 Ok(level[0].0)
1356}
1357
1358fn stage_fts_btree(
1364 pager: &mut Pager,
1365 idx: &crate::sql::fts::PostingList,
1366 alloc: &mut crate::sql::pager::allocator::PageAllocator,
1367) -> Result<u32> {
1368 let leaves = stage_fts_leaves(pager, idx, alloc)?;
1369 if leaves.len() == 1 {
1370 return Ok(leaves[0].0);
1371 }
1372 let mut level: Vec<(u32, i64)> = leaves;
1373 while level.len() > 1 {
1374 level = stage_interior_level(pager, &level, alloc)?;
1375 }
1376 Ok(level[0].0)
1377}
1378
1379fn stage_fts_leaves(
1386 pager: &mut Pager,
1387 idx: &crate::sql::fts::PostingList,
1388 alloc: &mut crate::sql::pager::allocator::PageAllocator,
1389) -> Result<Vec<(u32, i64)>> {
1390 use crate::sql::pager::fts_cell::FtsPostingCell;
1391
1392 let mut leaves: Vec<(u32, i64)> = Vec::new();
1393 let mut current_leaf = TablePage::empty();
1394 let mut current_leaf_page = alloc.allocate();
1395 let mut current_max_rowid: Option<i64> = None;
1396
1397 let mut cell_id: i64 = 1;
1401 let mut cells: Vec<FtsPostingCell> = Vec::new();
1402 cells.push(FtsPostingCell::doc_lengths(
1403 cell_id,
1404 idx.serialize_doc_lengths(),
1405 ));
1406 for (term, entries) in idx.serialize_postings() {
1407 cell_id += 1;
1408 cells.push(FtsPostingCell::posting(cell_id, term, entries));
1409 }
1410
1411 for cell in cells {
1412 let entry_bytes = cell.encode()?;
1413
1414 if !current_leaf.would_fit(entry_bytes.len()) {
1415 let next_leaf_page_num = alloc.allocate();
1416 emit_leaf(pager, current_leaf_page, ¤t_leaf, next_leaf_page_num);
1417 leaves.push((current_leaf_page, current_max_rowid.unwrap_or(i64::MIN)));
1418 current_leaf = TablePage::empty();
1419 current_leaf_page = next_leaf_page_num;
1420
1421 if !current_leaf.would_fit(entry_bytes.len()) {
1422 return Err(SQLRiteError::Internal(format!(
1427 "FTS posting cell {} of {} bytes exceeds empty-page capacity {} \
1428 (term too long or too many postings; overflow chaining is Phase 8.1)",
1429 cell.cell_id,
1430 entry_bytes.len(),
1431 current_leaf.free_space()
1432 )));
1433 }
1434 }
1435 current_leaf.insert_entry(cell.cell_id, &entry_bytes)?;
1436 current_max_rowid = Some(cell.cell_id);
1437 }
1438
1439 emit_leaf(pager, current_leaf_page, ¤t_leaf, 0);
1440 leaves.push((current_leaf_page, current_max_rowid.unwrap_or(i64::MIN)));
1441 Ok(leaves)
1442}
1443
1444type FtsEntries = Vec<(i64, u32)>;
1447type FtsPostings = Vec<(String, FtsEntries)>;
1449
1450fn load_fts_postings(pager: &Pager, root_page: u32) -> Result<(FtsEntries, FtsPostings)> {
1455 use crate::sql::pager::fts_cell::FtsPostingCell;
1456
1457 let mut doc_lengths: Vec<(i64, u32)> = Vec::new();
1458 let mut postings: Vec<(String, Vec<(i64, u32)>)> = Vec::new();
1459 let mut saw_sidecar = false;
1460
1461 let first_leaf = find_leftmost_leaf(pager, root_page)?;
1462 let mut current = first_leaf;
1463 while current != 0 {
1464 let page_buf = pager
1465 .read_page(current)
1466 .ok_or_else(|| SQLRiteError::Internal(format!("missing FTS leaf page {current}")))?;
1467 if page_buf[0] != PageType::TableLeaf as u8 {
1468 return Err(SQLRiteError::Internal(format!(
1469 "page {current} tagged {} but expected TableLeaf (FTS)",
1470 page_buf[0]
1471 )));
1472 }
1473 let next_leaf = u32::from_le_bytes(page_buf[1..5].try_into().unwrap());
1474 let payload: &[u8; PAYLOAD_PER_PAGE] = (&page_buf[PAGE_HEADER_SIZE..])
1475 .try_into()
1476 .map_err(|_| SQLRiteError::Internal("FTS leaf payload size".to_string()))?;
1477 let leaf = TablePage::from_bytes(payload);
1478 for slot in 0..leaf.slot_count() {
1479 let offset = leaf.slot_offset_raw(slot)?;
1480 let (cell, _) = FtsPostingCell::decode(leaf.as_bytes(), offset)?;
1481 if cell.is_doc_lengths() {
1482 if saw_sidecar {
1483 return Err(SQLRiteError::Internal(
1484 "FTS index has more than one doc-lengths sidecar cell".to_string(),
1485 ));
1486 }
1487 saw_sidecar = true;
1488 doc_lengths = cell.entries;
1489 } else {
1490 postings.push((cell.term, cell.entries));
1491 }
1492 }
1493 current = next_leaf;
1494 }
1495
1496 if !saw_sidecar {
1497 return Err(SQLRiteError::Internal(
1498 "FTS index missing doc-lengths sidecar cell — corrupt or truncated tree".to_string(),
1499 ));
1500 }
1501 Ok((doc_lengths, postings))
1502}
1503
1504fn stage_hnsw_leaves(
1508 pager: &mut Pager,
1509 idx: &crate::sql::hnsw::HnswIndex,
1510 alloc: &mut crate::sql::pager::allocator::PageAllocator,
1511) -> Result<Vec<(u32, i64)>> {
1512 use crate::sql::pager::hnsw_cell::HnswNodeCell;
1513
1514 let mut leaves: Vec<(u32, i64)> = Vec::new();
1515 let mut current_leaf = TablePage::empty();
1516 let mut current_leaf_page = alloc.allocate();
1517 let mut current_max_rowid: Option<i64> = None;
1518
1519 let serialized = idx.serialize_nodes();
1520
1521 for (node_id, layers) in serialized {
1526 let cell = HnswNodeCell::new(node_id, layers);
1527 let entry_bytes = cell.encode()?;
1528
1529 if !current_leaf.would_fit(entry_bytes.len()) {
1530 let next_leaf_page_num = alloc.allocate();
1531 emit_leaf(pager, current_leaf_page, ¤t_leaf, next_leaf_page_num);
1532 leaves.push((current_leaf_page, current_max_rowid.unwrap_or(i64::MIN)));
1533 current_leaf = TablePage::empty();
1534 current_leaf_page = next_leaf_page_num;
1535
1536 if !current_leaf.would_fit(entry_bytes.len()) {
1537 return Err(SQLRiteError::Internal(format!(
1538 "HNSW node {node_id} cell of {} bytes exceeds empty-page capacity {}",
1539 entry_bytes.len(),
1540 current_leaf.free_space()
1541 )));
1542 }
1543 }
1544 current_leaf.insert_entry(node_id, &entry_bytes)?;
1545 current_max_rowid = Some(node_id);
1546 }
1547
1548 emit_leaf(pager, current_leaf_page, ¤t_leaf, 0);
1549 leaves.push((current_leaf_page, current_max_rowid.unwrap_or(i64::MIN)));
1550 Ok(leaves)
1551}
1552
1553fn load_table_rows(pager: &Pager, table: &mut Table, root_page: u32) -> Result<()> {
1554 let first_leaf = find_leftmost_leaf(pager, root_page)?;
1555 let mut current = first_leaf;
1556 while current != 0 {
1557 let page_buf = pager
1558 .read_page(current)
1559 .ok_or_else(|| SQLRiteError::Internal(format!("missing leaf page {current}")))?;
1560 if page_buf[0] != PageType::TableLeaf as u8 {
1561 return Err(SQLRiteError::Internal(format!(
1562 "page {current} tagged {} but expected TableLeaf",
1563 page_buf[0]
1564 )));
1565 }
1566 let next_leaf = u32::from_le_bytes(page_buf[1..5].try_into().unwrap());
1567 let payload: &[u8; PAYLOAD_PER_PAGE] = (&page_buf[PAGE_HEADER_SIZE..])
1568 .try_into()
1569 .map_err(|_| SQLRiteError::Internal("leaf payload slice size".to_string()))?;
1570 let leaf = TablePage::from_bytes(payload);
1571
1572 for slot in 0..leaf.slot_count() {
1573 let entry = leaf.entry_at(slot)?;
1574 let cell = match entry {
1575 PagedEntry::Local(c) => c,
1576 PagedEntry::Overflow(r) => {
1577 let body_bytes =
1578 read_overflow_chain(pager, r.first_overflow_page, r.total_body_len)?;
1579 let (c, _) = Cell::decode(&body_bytes, 0)?;
1580 c
1581 }
1582 };
1583 table.restore_row(cell.rowid, cell.values)?;
1584 }
1585 current = next_leaf;
1586 }
1587 Ok(())
1588}
1589
1590fn collect_pages_for_btree(
1601 pager: &Pager,
1602 root_page: u32,
1603 follow_overflow: bool,
1604) -> Result<Vec<u32>> {
1605 if root_page == 0 {
1606 return Ok(Vec::new());
1607 }
1608 let mut pages: Vec<u32> = Vec::new();
1609 let mut stack: Vec<u32> = vec![root_page];
1610
1611 while let Some(p) = stack.pop() {
1612 let buf = pager.read_page(p).ok_or_else(|| {
1613 SQLRiteError::Internal(format!(
1614 "collect_pages: missing page {p} (rooted at {root_page})"
1615 ))
1616 })?;
1617 pages.push(p);
1618 match buf[0] {
1619 t if t == PageType::InteriorNode as u8 => {
1620 let payload: &[u8; PAYLOAD_PER_PAGE] =
1621 (&buf[PAGE_HEADER_SIZE..]).try_into().map_err(|_| {
1622 SQLRiteError::Internal("interior payload slice size".to_string())
1623 })?;
1624 let interior = InteriorPage::from_bytes(payload);
1625 for slot in 0..interior.slot_count() {
1627 let cell = interior.cell_at(slot)?;
1628 stack.push(cell.child_page);
1629 }
1630 stack.push(interior.rightmost_child());
1631 }
1632 t if t == PageType::TableLeaf as u8 => {
1633 if follow_overflow {
1634 let payload: &[u8; PAYLOAD_PER_PAGE] =
1635 (&buf[PAGE_HEADER_SIZE..]).try_into().map_err(|_| {
1636 SQLRiteError::Internal("leaf payload slice size".to_string())
1637 })?;
1638 let leaf = TablePage::from_bytes(payload);
1639 for slot in 0..leaf.slot_count() {
1640 match leaf.entry_at(slot)? {
1641 PagedEntry::Local(_) => {}
1642 PagedEntry::Overflow(r) => {
1643 let mut cur = r.first_overflow_page;
1644 while cur != 0 {
1645 pages.push(cur);
1646 let ob = pager.read_page(cur).ok_or_else(|| {
1647 SQLRiteError::Internal(format!(
1648 "collect_pages: missing overflow page {cur}"
1649 ))
1650 })?;
1651 if ob[0] != PageType::Overflow as u8 {
1652 return Err(SQLRiteError::Internal(format!(
1653 "collect_pages: page {cur} expected Overflow, got tag {}",
1654 ob[0]
1655 )));
1656 }
1657 cur = u32::from_le_bytes(ob[1..5].try_into().unwrap());
1658 }
1659 }
1660 }
1661 }
1662 }
1663 }
1664 other => {
1665 return Err(SQLRiteError::Internal(format!(
1666 "collect_pages: unexpected page type {other} at page {p}"
1667 )));
1668 }
1669 }
1670 }
1671 Ok(pages)
1672}
1673
1674fn read_old_rootpages(pager: &Pager, schema_root: u32) -> Result<HashMap<(String, String), u32>> {
1684 let mut out: HashMap<(String, String), u32> = HashMap::new();
1685 if schema_root == 0 {
1686 return Ok(out);
1687 }
1688 let mut master = build_empty_master_table();
1689 load_table_rows(pager, &mut master, schema_root)?;
1690 for rowid in master.rowids() {
1691 let kind = take_text(&master, "type", rowid)?;
1692 let name = take_text(&master, "name", rowid)?;
1693 let rootpage = take_integer(&master, "rootpage", rowid)? as u32;
1694 out.insert((kind, name), rootpage);
1695 }
1696 Ok(out)
1697}
1698
1699fn find_leftmost_leaf(pager: &Pager, root_page: u32) -> Result<u32> {
1703 let mut current = root_page;
1704 loop {
1705 let page_buf = pager.read_page(current).ok_or_else(|| {
1706 SQLRiteError::Internal(format!("missing page {current} during tree descent"))
1707 })?;
1708 match page_buf[0] {
1709 t if t == PageType::TableLeaf as u8 => return Ok(current),
1710 t if t == PageType::InteriorNode as u8 => {
1711 let payload: &[u8; PAYLOAD_PER_PAGE] =
1712 (&page_buf[PAGE_HEADER_SIZE..]).try_into().map_err(|_| {
1713 SQLRiteError::Internal("interior payload slice size".to_string())
1714 })?;
1715 let interior = InteriorPage::from_bytes(payload);
1716 current = interior.leftmost_child()?;
1717 }
1718 other => {
1719 return Err(SQLRiteError::Internal(format!(
1720 "unexpected page type {other} during tree descent at page {current}"
1721 )));
1722 }
1723 }
1724 }
1725}
1726
1727fn stage_table_btree(
1738 pager: &mut Pager,
1739 table: &Table,
1740 alloc: &mut crate::sql::pager::allocator::PageAllocator,
1741) -> Result<u32> {
1742 let leaves = stage_leaves(pager, table, alloc)?;
1743 if leaves.len() == 1 {
1744 return Ok(leaves[0].0);
1745 }
1746 let mut level: Vec<(u32, i64)> = leaves;
1747 while level.len() > 1 {
1748 level = stage_interior_level(pager, &level, alloc)?;
1749 }
1750 Ok(level[0].0)
1751}
1752
1753fn stage_leaves(
1757 pager: &mut Pager,
1758 table: &Table,
1759 alloc: &mut crate::sql::pager::allocator::PageAllocator,
1760) -> Result<Vec<(u32, i64)>> {
1761 let mut leaves: Vec<(u32, i64)> = Vec::new();
1762 let mut current_leaf = TablePage::empty();
1763 let mut current_leaf_page = alloc.allocate();
1764 let mut current_max_rowid: Option<i64> = None;
1765
1766 for rowid in table.rowids() {
1767 let entry_bytes = build_row_entry(pager, table, rowid, alloc)?;
1768
1769 if !current_leaf.would_fit(entry_bytes.len()) {
1770 let next_leaf_page_num = alloc.allocate();
1774 emit_leaf(pager, current_leaf_page, ¤t_leaf, next_leaf_page_num);
1775 leaves.push((current_leaf_page, current_max_rowid.unwrap_or(i64::MIN)));
1776 current_leaf = TablePage::empty();
1777 current_leaf_page = next_leaf_page_num;
1778 if !current_leaf.would_fit(entry_bytes.len()) {
1782 return Err(SQLRiteError::Internal(format!(
1783 "entry of {} bytes exceeds empty-page capacity {}",
1784 entry_bytes.len(),
1785 current_leaf.free_space()
1786 )));
1787 }
1788 }
1789 current_leaf.insert_entry(rowid, &entry_bytes)?;
1790 current_max_rowid = Some(rowid);
1791 }
1792
1793 emit_leaf(pager, current_leaf_page, ¤t_leaf, 0);
1795 leaves.push((current_leaf_page, current_max_rowid.unwrap_or(i64::MIN)));
1796 Ok(leaves)
1797}
1798
1799fn build_row_entry(
1804 pager: &mut Pager,
1805 table: &Table,
1806 rowid: i64,
1807 alloc: &mut crate::sql::pager::allocator::PageAllocator,
1808) -> Result<Vec<u8>> {
1809 let values = table.extract_row(rowid);
1810 let local_cell = Cell::new(rowid, values);
1811 let local_bytes = local_cell.encode()?;
1812 if local_bytes.len() > OVERFLOW_THRESHOLD {
1813 let overflow_start = write_overflow_chain(pager, &local_bytes, alloc)?;
1814 Ok(OverflowRef {
1815 rowid,
1816 total_body_len: local_bytes.len() as u64,
1817 first_overflow_page: overflow_start,
1818 }
1819 .encode())
1820 } else {
1821 Ok(local_bytes)
1822 }
1823}
1824
1825fn stage_interior_level(
1830 pager: &mut Pager,
1831 children: &[(u32, i64)],
1832 alloc: &mut crate::sql::pager::allocator::PageAllocator,
1833) -> Result<Vec<(u32, i64)>> {
1834 let mut next_level: Vec<(u32, i64)> = Vec::new();
1835 let mut idx = 0usize;
1836
1837 while idx < children.len() {
1838 let interior_page_num = alloc.allocate();
1839
1840 let (mut rightmost_child_page, mut rightmost_child_max) = children[idx];
1845 idx += 1;
1846 let mut interior = InteriorPage::empty(rightmost_child_page);
1847
1848 while idx < children.len() {
1849 let new_divider_cell = InteriorCell {
1850 divider_rowid: rightmost_child_max,
1851 child_page: rightmost_child_page,
1852 };
1853 let new_divider_bytes = new_divider_cell.encode();
1854 if !interior.would_fit(new_divider_bytes.len()) {
1855 break;
1856 }
1857 interior.insert_divider(rightmost_child_max, rightmost_child_page)?;
1858 let (next_child_page, next_child_max) = children[idx];
1859 interior.set_rightmost_child(next_child_page);
1860 rightmost_child_page = next_child_page;
1861 rightmost_child_max = next_child_max;
1862 idx += 1;
1863 }
1864
1865 emit_interior(pager, interior_page_num, &interior);
1866 next_level.push((interior_page_num, rightmost_child_max));
1867 }
1868
1869 Ok(next_level)
1870}
1871
1872fn emit_leaf(pager: &mut Pager, page_num: u32, leaf: &TablePage, next_leaf: u32) {
1874 let mut buf = [0u8; PAGE_SIZE];
1875 buf[0] = PageType::TableLeaf as u8;
1876 buf[1..5].copy_from_slice(&next_leaf.to_le_bytes());
1877 buf[5..7].copy_from_slice(&0u16.to_le_bytes());
1880 buf[PAGE_HEADER_SIZE..].copy_from_slice(leaf.as_bytes());
1881 pager.stage_page(page_num, buf);
1882}
1883
1884fn emit_interior(pager: &mut Pager, page_num: u32, interior: &InteriorPage) {
1888 let mut buf = [0u8; PAGE_SIZE];
1889 buf[0] = PageType::InteriorNode as u8;
1890 buf[1..5].copy_from_slice(&0u32.to_le_bytes());
1891 buf[5..7].copy_from_slice(&0u16.to_le_bytes());
1892 buf[PAGE_HEADER_SIZE..].copy_from_slice(interior.as_bytes());
1893 pager.stage_page(page_num, buf);
1894}
1895
1896#[cfg(test)]
1897mod tests {
1898 use super::*;
1899 use crate::sql::process_command;
1900
1901 fn seed_db() -> Database {
1902 let mut db = Database::new("test".to_string());
1903 process_command(
1904 "CREATE TABLE users (id INTEGER PRIMARY KEY, name TEXT NOT NULL UNIQUE, age INTEGER);",
1905 &mut db,
1906 )
1907 .unwrap();
1908 process_command(
1909 "INSERT INTO users (name, age) VALUES ('alice', 30);",
1910 &mut db,
1911 )
1912 .unwrap();
1913 process_command("INSERT INTO users (name, age) VALUES ('bob', 25);", &mut db).unwrap();
1914 process_command(
1915 "CREATE TABLE notes (id INTEGER PRIMARY KEY, body TEXT);",
1916 &mut db,
1917 )
1918 .unwrap();
1919 process_command("INSERT INTO notes (body) VALUES ('hello');", &mut db).unwrap();
1920 db
1921 }
1922
1923 fn tmp_path(name: &str) -> std::path::PathBuf {
1924 let mut p = std::env::temp_dir();
1925 let pid = std::process::id();
1926 let nanos = std::time::SystemTime::now()
1927 .duration_since(std::time::UNIX_EPOCH)
1928 .map(|d| d.as_nanos())
1929 .unwrap_or(0);
1930 p.push(format!("sqlrite-{pid}-{nanos}-{name}.sqlrite"));
1931 p
1932 }
1933
1934 fn cleanup(path: &std::path::Path) {
1937 let _ = std::fs::remove_file(path);
1938 let mut wal = path.as_os_str().to_owned();
1939 wal.push("-wal");
1940 let _ = std::fs::remove_file(std::path::PathBuf::from(wal));
1941 }
1942
1943 #[test]
1944 fn round_trip_preserves_schema_and_data() {
1945 let path = tmp_path("roundtrip");
1946 let mut db = seed_db();
1947 save_database(&mut db, &path).expect("save");
1948
1949 let loaded = open_database(&path, "test".to_string()).expect("open");
1950 assert_eq!(loaded.tables.len(), 2);
1951
1952 let users = loaded.get_table("users".to_string()).expect("users table");
1953 assert_eq!(users.columns.len(), 3);
1954 let rowids = users.rowids();
1955 assert_eq!(rowids.len(), 2);
1956 let names: Vec<String> = rowids
1957 .iter()
1958 .filter_map(|r| match users.get_value("name", *r) {
1959 Some(Value::Text(s)) => Some(s),
1960 _ => None,
1961 })
1962 .collect();
1963 assert!(names.contains(&"alice".to_string()));
1964 assert!(names.contains(&"bob".to_string()));
1965
1966 let notes = loaded.get_table("notes".to_string()).expect("notes table");
1967 assert_eq!(notes.rowids().len(), 1);
1968
1969 cleanup(&path);
1970 }
1971
1972 #[test]
1977 fn round_trip_preserves_vector_column() {
1978 let path = tmp_path("vec_roundtrip");
1979
1980 {
1982 let mut db = Database::new("test".to_string());
1983 process_command(
1984 "CREATE TABLE docs (id INTEGER PRIMARY KEY, embedding VECTOR(3));",
1985 &mut db,
1986 )
1987 .unwrap();
1988 process_command(
1989 "INSERT INTO docs (embedding) VALUES ([0.1, 0.2, 0.3]);",
1990 &mut db,
1991 )
1992 .unwrap();
1993 process_command(
1994 "INSERT INTO docs (embedding) VALUES ([1.5, -2.0, 3.5]);",
1995 &mut db,
1996 )
1997 .unwrap();
1998 save_database(&mut db, &path).expect("save");
1999 } let loaded = open_database(&path, "test".to_string()).expect("open");
2003 let docs = loaded.get_table("docs".to_string()).expect("docs table");
2004
2005 let embedding_col = docs
2007 .columns
2008 .iter()
2009 .find(|c| c.column_name == "embedding")
2010 .expect("embedding column");
2011 assert!(
2012 matches!(embedding_col.datatype, DataType::Vector(3)),
2013 "expected DataType::Vector(3) after round-trip, got {:?}",
2014 embedding_col.datatype
2015 );
2016
2017 let mut rows: Vec<Vec<f32>> = docs
2019 .rowids()
2020 .iter()
2021 .filter_map(|r| match docs.get_value("embedding", *r) {
2022 Some(Value::Vector(v)) => Some(v),
2023 _ => None,
2024 })
2025 .collect();
2026 rows.sort_by(|a, b| a[0].partial_cmp(&b[0]).unwrap());
2027 assert_eq!(rows.len(), 2);
2028 assert_eq!(rows[0], vec![0.1f32, 0.2, 0.3]);
2029 assert_eq!(rows[1], vec![1.5f32, -2.0, 3.5]);
2030
2031 cleanup(&path);
2032 }
2033
2034 #[test]
2035 fn round_trip_preserves_json_column() {
2036 let path = tmp_path("json_roundtrip");
2041
2042 {
2043 let mut db = Database::new("test".to_string());
2044 process_command(
2045 "CREATE TABLE docs (id INTEGER PRIMARY KEY, payload JSON);",
2046 &mut db,
2047 )
2048 .unwrap();
2049 process_command(
2050 r#"INSERT INTO docs (payload) VALUES ('{"name": "alice", "tags": ["rust","sql"]}');"#,
2051 &mut db,
2052 )
2053 .unwrap();
2054 save_database(&mut db, &path).expect("save");
2055 }
2056
2057 let mut loaded = open_database(&path, "test".to_string()).expect("open");
2058 let docs = loaded.get_table("docs".to_string()).expect("docs");
2059
2060 let payload_col = docs
2062 .columns
2063 .iter()
2064 .find(|c| c.column_name == "payload")
2065 .unwrap();
2066 assert!(
2067 matches!(payload_col.datatype, DataType::Json),
2068 "expected DataType::Json, got {:?}",
2069 payload_col.datatype
2070 );
2071
2072 let resp = process_command(
2075 r#"SELECT id FROM docs WHERE json_extract(payload, '$.name') = 'alice';"#,
2076 &mut loaded,
2077 )
2078 .expect("select via json_extract after reopen");
2079 assert!(resp.contains("1 row returned"), "got: {resp}");
2080
2081 cleanup(&path);
2082 }
2083
2084 #[test]
2085 fn round_trip_rebuilds_hnsw_index_from_create_sql() {
2086 let path = tmp_path("hnsw_roundtrip");
2091
2092 {
2094 let mut db = Database::new("test".to_string());
2095 process_command(
2096 "CREATE TABLE docs (id INTEGER PRIMARY KEY, e VECTOR(2));",
2097 &mut db,
2098 )
2099 .unwrap();
2100 for v in &[
2101 "[1.0, 0.0]",
2102 "[2.0, 0.0]",
2103 "[0.0, 3.0]",
2104 "[1.0, 4.0]",
2105 "[10.0, 10.0]",
2106 ] {
2107 process_command(&format!("INSERT INTO docs (e) VALUES ({v});"), &mut db).unwrap();
2108 }
2109 process_command("CREATE INDEX ix_e ON docs USING hnsw (e);", &mut db).unwrap();
2110 save_database(&mut db, &path).expect("save");
2111 } let mut loaded = open_database(&path, "test".to_string()).expect("open");
2116 {
2117 let table = loaded.get_table("docs".to_string()).expect("docs");
2118 assert_eq!(table.hnsw_indexes.len(), 1, "HNSW index should reattach");
2119 let entry = &table.hnsw_indexes[0];
2120 assert_eq!(entry.name, "ix_e");
2121 assert_eq!(entry.column_name, "e");
2122 assert_eq!(entry.index.len(), 5, "loaded graph should hold all 5 rows");
2123 assert!(
2124 !entry.needs_rebuild,
2125 "fresh load should not be marked dirty"
2126 );
2127 }
2128
2129 let resp = process_command(
2132 "SELECT id FROM docs ORDER BY vec_distance_l2(e, [1.0, 0.0]) ASC LIMIT 3;",
2133 &mut loaded,
2134 )
2135 .unwrap();
2136 assert!(resp.contains("3 rows returned"), "got: {resp}");
2137
2138 cleanup(&path);
2139 }
2140
2141 #[test]
2142 fn round_trip_rebuilds_fts_index_from_create_sql() {
2143 let path = tmp_path("fts_roundtrip");
2148
2149 {
2150 let mut db = Database::new("test".to_string());
2151 process_command(
2152 "CREATE TABLE docs (id INTEGER PRIMARY KEY, body TEXT);",
2153 &mut db,
2154 )
2155 .unwrap();
2156 for body in &[
2157 "rust embedded database",
2158 "rust web framework",
2159 "go embedded systems",
2160 "python web framework",
2161 "rust rust embedded power",
2162 ] {
2163 process_command(
2164 &format!("INSERT INTO docs (body) VALUES ('{body}');"),
2165 &mut db,
2166 )
2167 .unwrap();
2168 }
2169 process_command("CREATE INDEX ix_body ON docs USING fts (body);", &mut db).unwrap();
2170 save_database(&mut db, &path).expect("save");
2171 } let mut loaded = open_database(&path, "test".to_string()).expect("open");
2174 {
2175 let table = loaded.get_table("docs".to_string()).expect("docs");
2176 assert_eq!(table.fts_indexes.len(), 1, "FTS index should reattach");
2177 let entry = &table.fts_indexes[0];
2178 assert_eq!(entry.name, "ix_body");
2179 assert_eq!(entry.column_name, "body");
2180 assert_eq!(
2181 entry.index.len(),
2182 5,
2183 "rebuilt posting list should hold all 5 rows"
2184 );
2185 assert!(!entry.needs_rebuild);
2186 }
2187
2188 let resp = process_command(
2191 "SELECT id FROM docs WHERE fts_match(body, 'rust');",
2192 &mut loaded,
2193 )
2194 .unwrap();
2195 assert!(resp.contains("3 rows returned"), "got: {resp}");
2196
2197 cleanup(&path);
2198 }
2199
2200 #[test]
2201 fn delete_then_save_then_reopen_excludes_deleted_node_from_fts() {
2202 let path = tmp_path("fts_delete_rebuild");
2207 let mut db = Database::new("test".to_string());
2208 process_command(
2209 "CREATE TABLE docs (id INTEGER PRIMARY KEY, body TEXT);",
2210 &mut db,
2211 )
2212 .unwrap();
2213 for body in &[
2214 "rust embedded",
2215 "rust framework",
2216 "go embedded",
2217 "python web",
2218 ] {
2219 process_command(
2220 &format!("INSERT INTO docs (body) VALUES ('{body}');"),
2221 &mut db,
2222 )
2223 .unwrap();
2224 }
2225 process_command("CREATE INDEX ix_body ON docs USING fts (body);", &mut db).unwrap();
2226
2227 process_command("DELETE FROM docs WHERE id = 1;", &mut db).unwrap();
2229 save_database(&mut db, &path).expect("save");
2230 drop(db);
2231
2232 let mut loaded = open_database(&path, "test".to_string()).expect("open");
2233 let resp = process_command(
2234 "SELECT id FROM docs WHERE fts_match(body, 'rust');",
2235 &mut loaded,
2236 )
2237 .unwrap();
2238 assert!(resp.contains("1 row returned"), "got: {resp}");
2241
2242 cleanup(&path);
2243 }
2244
2245 #[test]
2246 fn fts_roundtrip_uses_persistence_path_not_replay() {
2247 let path = tmp_path("fts_persistence_path");
2252
2253 {
2254 let mut db = Database::new("test".to_string());
2255 process_command(
2256 "CREATE TABLE docs (id INTEGER PRIMARY KEY, body TEXT);",
2257 &mut db,
2258 )
2259 .unwrap();
2260 process_command(
2261 "INSERT INTO docs (body) VALUES ('rust embedded database');",
2262 &mut db,
2263 )
2264 .unwrap();
2265 process_command("CREATE INDEX ix_body ON docs USING fts (body);", &mut db).unwrap();
2266 save_database(&mut db, &path).expect("save");
2267 }
2268
2269 let pager = Pager::open(&path).expect("open pager");
2271 let mut master = build_empty_master_table();
2272 load_table_rows(&pager, &mut master, pager.header().schema_root_page).unwrap();
2273 let mut found_rootpage: Option<u32> = None;
2274 for rowid in master.rowids() {
2275 let name = take_text(&master, "name", rowid).unwrap();
2276 if name == "ix_body" {
2277 let rp = take_integer(&master, "rootpage", rowid).unwrap();
2278 found_rootpage = Some(rp as u32);
2279 }
2280 }
2281 let rootpage = found_rootpage.expect("ix_body row in sqlrite_master");
2282 assert!(
2283 rootpage != 0,
2284 "Phase 8c FTS save should set rootpage != 0; got {rootpage}"
2285 );
2286
2287 cleanup(&path);
2288 }
2289
2290 #[test]
2291 fn save_without_fts_keeps_format_v4() {
2292 use crate::sql::pager::header::FORMAT_VERSION_V4;
2296
2297 let path = tmp_path("fts_no_bump");
2298 let mut db = Database::new("test".to_string());
2299 process_command(
2300 "CREATE TABLE t (id INTEGER PRIMARY KEY, n INTEGER);",
2301 &mut db,
2302 )
2303 .unwrap();
2304 process_command("INSERT INTO t (n) VALUES (1);", &mut db).unwrap();
2305 save_database(&mut db, &path).unwrap();
2306 drop(db);
2307
2308 let pager = Pager::open(&path).expect("open");
2309 assert_eq!(
2310 pager.header().format_version,
2311 FORMAT_VERSION_V4,
2312 "no-FTS save should keep v4"
2313 );
2314 cleanup(&path);
2315 }
2316
2317 #[test]
2318 fn save_with_fts_bumps_to_v5() {
2319 use crate::sql::pager::header::FORMAT_VERSION_V5;
2323
2324 let path = tmp_path("fts_bump_v5");
2325 let mut db = Database::new("test".to_string());
2326 process_command(
2327 "CREATE TABLE docs (id INTEGER PRIMARY KEY, body TEXT);",
2328 &mut db,
2329 )
2330 .unwrap();
2331 process_command("INSERT INTO docs (body) VALUES ('hello');", &mut db).unwrap();
2332 process_command("CREATE INDEX ix_body ON docs USING fts (body);", &mut db).unwrap();
2333 save_database(&mut db, &path).unwrap();
2334 drop(db);
2335
2336 let pager = Pager::open(&path).expect("open");
2337 assert_eq!(
2338 pager.header().format_version,
2339 FORMAT_VERSION_V5,
2340 "FTS save should promote to v5"
2341 );
2342 cleanup(&path);
2343 }
2344
2345 #[test]
2346 fn fts_persistence_handles_empty_and_zero_token_docs() {
2347 let path = tmp_path("fts_edges");
2353
2354 {
2355 let mut db = Database::new("test".to_string());
2356 process_command(
2357 "CREATE TABLE docs (id INTEGER PRIMARY KEY, body TEXT);",
2358 &mut db,
2359 )
2360 .unwrap();
2361 process_command("CREATE INDEX ix_body ON docs USING fts (body);", &mut db).unwrap();
2362 process_command("INSERT INTO docs (body) VALUES ('rust embedded');", &mut db).unwrap();
2365 process_command("INSERT INTO docs (body) VALUES ('!!!---???');", &mut db).unwrap();
2366 process_command("INSERT INTO docs (body) VALUES ('go embedded');", &mut db).unwrap();
2367 save_database(&mut db, &path).unwrap();
2368 }
2369
2370 let loaded = open_database(&path, "test".to_string()).expect("open");
2371 let table = loaded.get_table("docs".to_string()).unwrap();
2372 let entry = &table.fts_indexes[0];
2373 assert_eq!(entry.index.len(), 3);
2376 let res = entry
2378 .index
2379 .query("embedded", &crate::sql::fts::Bm25Params::default());
2380 assert_eq!(res.len(), 2);
2381
2382 cleanup(&path);
2383 }
2384
2385 #[test]
2386 fn fts_persistence_round_trips_large_corpus() {
2387 let path = tmp_path("fts_large_corpus");
2391
2392 let mut expected_terms: std::collections::BTreeSet<String> =
2393 std::collections::BTreeSet::new();
2394 {
2395 let mut db = Database::new("test".to_string());
2396 process_command(
2397 "CREATE TABLE docs (id INTEGER PRIMARY KEY, body TEXT);",
2398 &mut db,
2399 )
2400 .unwrap();
2401 process_command("CREATE INDEX ix_body ON docs USING fts (body);", &mut db).unwrap();
2402 for i in 0..500 {
2405 let term = format!("term{i:04}");
2406 process_command(
2407 &format!("INSERT INTO docs (body) VALUES ('{term}');"),
2408 &mut db,
2409 )
2410 .unwrap();
2411 expected_terms.insert(term);
2412 }
2413 save_database(&mut db, &path).unwrap();
2414 }
2415
2416 let loaded = open_database(&path, "test".to_string()).expect("open");
2417 let table = loaded.get_table("docs".to_string()).unwrap();
2418 let entry = &table.fts_indexes[0];
2419 assert_eq!(entry.index.len(), 500);
2420
2421 for &i in &[0_i64, 137, 248, 391, 499] {
2424 let term = format!("term{i:04}");
2425 let res = entry
2426 .index
2427 .query(&term, &crate::sql::fts::Bm25Params::default());
2428 assert_eq!(res.len(), 1, "term {term} should match exactly 1 row");
2429 assert_eq!(res[0].0, i + 1);
2432 }
2433
2434 cleanup(&path);
2435 }
2436
2437 #[test]
2438 fn delete_then_save_then_reopen_excludes_deleted_node_from_hnsw() {
2439 let path = tmp_path("hnsw_delete_rebuild");
2444 let mut db = Database::new("test".to_string());
2445 process_command(
2446 "CREATE TABLE docs (id INTEGER PRIMARY KEY, e VECTOR(2));",
2447 &mut db,
2448 )
2449 .unwrap();
2450 for v in &["[1.0, 0.0]", "[2.0, 0.0]", "[3.0, 0.0]", "[4.0, 0.0]"] {
2451 process_command(&format!("INSERT INTO docs (e) VALUES ({v});"), &mut db).unwrap();
2452 }
2453 process_command("CREATE INDEX ix_e ON docs USING hnsw (e);", &mut db).unwrap();
2454
2455 process_command("DELETE FROM docs WHERE id = 1;", &mut db).unwrap();
2457 let dirty_before_save = db.tables["docs"].hnsw_indexes[0].needs_rebuild;
2459 assert!(dirty_before_save, "DELETE should mark dirty");
2460
2461 save_database(&mut db, &path).expect("save");
2462 let dirty_after_save = db.tables["docs"].hnsw_indexes[0].needs_rebuild;
2464 assert!(!dirty_after_save, "save should clear dirty");
2465 drop(db);
2466
2467 let loaded = open_database(&path, "test".to_string()).expect("open");
2470 let docs = loaded.get_table("docs".to_string()).expect("docs");
2471
2472 assert!(
2474 !docs.rowids().contains(&1),
2475 "deleted row 1 should not be in row storage"
2476 );
2477 assert_eq!(docs.rowids().len(), 3, "should have 3 surviving rows");
2478
2479 assert_eq!(
2481 docs.hnsw_indexes[0].index.len(),
2482 3,
2483 "HNSW graph should have shed the deleted node"
2484 );
2485
2486 cleanup(&path);
2487 }
2488
2489 #[test]
2490 fn round_trip_survives_writes_after_load() {
2491 let path = tmp_path("after_load");
2492 save_database(&mut seed_db(), &path).unwrap();
2493
2494 {
2495 let mut db = open_database(&path, "test".to_string()).unwrap();
2496 process_command(
2497 "INSERT INTO users (name, age) VALUES ('carol', 40);",
2498 &mut db,
2499 )
2500 .unwrap();
2501 save_database(&mut db, &path).unwrap();
2502 } let db2 = open_database(&path, "test".to_string()).unwrap();
2505 let users = db2.get_table("users".to_string()).unwrap();
2506 assert_eq!(users.rowids().len(), 3);
2507
2508 cleanup(&path);
2509 }
2510
2511 #[test]
2512 fn open_rejects_garbage_file() {
2513 let path = tmp_path("bad");
2514 std::fs::write(&path, b"not a sqlrite database, just bytes").unwrap();
2515 let result = open_database(&path, "x".to_string());
2516 assert!(result.is_err());
2517 cleanup(&path);
2518 }
2519
2520 #[test]
2521 fn many_small_rows_spread_across_leaves() {
2522 let path = tmp_path("many_rows");
2523 let mut db = Database::new("big".to_string());
2524 process_command(
2525 "CREATE TABLE things (id INTEGER PRIMARY KEY, data TEXT);",
2526 &mut db,
2527 )
2528 .unwrap();
2529 for i in 0..200 {
2530 let body = "x".repeat(200);
2531 let q = format!("INSERT INTO things (data) VALUES ('row-{i}-{body}');");
2532 process_command(&q, &mut db).unwrap();
2533 }
2534 save_database(&mut db, &path).unwrap();
2535 let loaded = open_database(&path, "big".to_string()).unwrap();
2536 let things = loaded.get_table("things".to_string()).unwrap();
2537 assert_eq!(things.rowids().len(), 200);
2538 cleanup(&path);
2539 }
2540
2541 #[test]
2542 fn huge_row_goes_through_overflow() {
2543 let path = tmp_path("overflow_row");
2544 let mut db = Database::new("big".to_string());
2545 process_command(
2546 "CREATE TABLE docs (id INTEGER PRIMARY KEY, body TEXT);",
2547 &mut db,
2548 )
2549 .unwrap();
2550 let body = "A".repeat(10_000);
2551 process_command(
2552 &format!("INSERT INTO docs (body) VALUES ('{body}');"),
2553 &mut db,
2554 )
2555 .unwrap();
2556 save_database(&mut db, &path).unwrap();
2557
2558 let loaded = open_database(&path, "big".to_string()).unwrap();
2559 let docs = loaded.get_table("docs".to_string()).unwrap();
2560 let rowids = docs.rowids();
2561 assert_eq!(rowids.len(), 1);
2562 let stored = docs.get_value("body", rowids[0]);
2563 match stored {
2564 Some(Value::Text(s)) => assert_eq!(s.len(), 10_000),
2565 other => panic!("expected Text, got {other:?}"),
2566 }
2567 cleanup(&path);
2568 }
2569
2570 #[test]
2571 fn create_sql_synthesis_round_trips() {
2572 let mut db = Database::new("x".to_string());
2575 process_command(
2576 "CREATE TABLE t (id INTEGER PRIMARY KEY, tag TEXT UNIQUE, note TEXT NOT NULL);",
2577 &mut db,
2578 )
2579 .unwrap();
2580 let t = db.get_table("t".to_string()).unwrap();
2581 let sql = table_to_create_sql(t);
2582 let (name, cols) = parse_create_sql(&sql).unwrap();
2583 assert_eq!(name, "t");
2584 assert_eq!(cols.len(), 3);
2585 assert!(cols[0].is_pk);
2586 assert!(cols[1].is_unique);
2587 assert!(cols[2].not_null);
2588 }
2589
2590 #[test]
2591 fn sqlrite_master_is_not_exposed_as_a_user_table() {
2592 let path = tmp_path("no_master");
2594 save_database(&mut seed_db(), &path).unwrap();
2595 let loaded = open_database(&path, "x".to_string()).unwrap();
2596 assert!(!loaded.tables.contains_key(MASTER_TABLE_NAME));
2597 cleanup(&path);
2598 }
2599
2600 #[test]
2601 fn multi_leaf_table_produces_an_interior_root() {
2602 let path = tmp_path("multi_leaf_interior");
2608 let mut db = Database::new("big".to_string());
2609 process_command(
2610 "CREATE TABLE things (id INTEGER PRIMARY KEY, data TEXT);",
2611 &mut db,
2612 )
2613 .unwrap();
2614 for i in 0..200 {
2615 let body = "x".repeat(200);
2616 let q = format!("INSERT INTO things (data) VALUES ('row-{i}-{body}');");
2617 process_command(&q, &mut db).unwrap();
2618 }
2619 save_database(&mut db, &path).unwrap();
2620
2621 let loaded = open_database(&path, "big".to_string()).unwrap();
2623 let things = loaded.get_table("things".to_string()).unwrap();
2624 assert_eq!(things.rowids().len(), 200);
2625
2626 let pager = loaded
2629 .pager
2630 .as_ref()
2631 .expect("loaded DB should have a pager");
2632 let mut master = build_empty_master_table();
2637 load_table_rows(pager, &mut master, pager.header().schema_root_page).unwrap();
2638 let things_root = master
2639 .rowids()
2640 .into_iter()
2641 .find_map(|r| match master.get_value("name", r) {
2642 Some(Value::Text(s)) if s == "things" => match master.get_value("rootpage", r) {
2643 Some(Value::Integer(p)) => Some(p as u32),
2644 _ => None,
2645 },
2646 _ => None,
2647 })
2648 .expect("things should appear in sqlrite_master");
2649 let root_buf = pager.read_page(things_root).unwrap();
2650 assert_eq!(
2651 root_buf[0],
2652 PageType::InteriorNode as u8,
2653 "expected a multi-leaf table to have an interior root, got tag {}",
2654 root_buf[0]
2655 );
2656
2657 cleanup(&path);
2658 }
2659
2660 #[test]
2661 fn explicit_index_persists_across_save_and_open() {
2662 let path = tmp_path("idx_persist");
2663 let mut db = Database::new("idx".to_string());
2664 process_command(
2665 "CREATE TABLE users (id INTEGER PRIMARY KEY, tag TEXT);",
2666 &mut db,
2667 )
2668 .unwrap();
2669 for i in 1..=5 {
2670 let tag = if i % 2 == 0 { "odd" } else { "even" };
2671 process_command(
2672 &format!("INSERT INTO users (tag) VALUES ('{tag}');"),
2673 &mut db,
2674 )
2675 .unwrap();
2676 }
2677 process_command("CREATE INDEX users_tag_idx ON users (tag);", &mut db).unwrap();
2678 save_database(&mut db, &path).unwrap();
2679
2680 let loaded = open_database(&path, "idx".to_string()).unwrap();
2681 let users = loaded.get_table("users".to_string()).unwrap();
2682 let idx = users
2683 .index_by_name("users_tag_idx")
2684 .expect("explicit index should survive save/open");
2685 assert_eq!(idx.column_name, "tag");
2686 assert!(!idx.is_unique);
2687 let even_rowids = idx.lookup(&Value::Text("even".into()));
2690 let odd_rowids = idx.lookup(&Value::Text("odd".into()));
2691 assert_eq!(even_rowids.len(), 3);
2692 assert_eq!(odd_rowids.len(), 2);
2693
2694 cleanup(&path);
2695 }
2696
2697 #[test]
2698 fn auto_indexes_for_unique_columns_survive_save_open() {
2699 let path = tmp_path("auto_idx_persist");
2700 let mut db = Database::new("a".to_string());
2701 process_command(
2702 "CREATE TABLE users (id INTEGER PRIMARY KEY, email TEXT NOT NULL UNIQUE);",
2703 &mut db,
2704 )
2705 .unwrap();
2706 process_command("INSERT INTO users (email) VALUES ('a@x');", &mut db).unwrap();
2707 process_command("INSERT INTO users (email) VALUES ('b@x');", &mut db).unwrap();
2708 save_database(&mut db, &path).unwrap();
2709
2710 let loaded = open_database(&path, "a".to_string()).unwrap();
2711 let users = loaded.get_table("users".to_string()).unwrap();
2712 let auto_name = SecondaryIndex::auto_name("users", "email");
2715 let idx = users
2716 .index_by_name(&auto_name)
2717 .expect("auto index should be restored");
2718 assert!(idx.is_unique);
2719 assert_eq!(idx.lookup(&Value::Text("a@x".into())).len(), 1);
2720 assert_eq!(idx.lookup(&Value::Text("b@x".into())).len(), 1);
2721
2722 cleanup(&path);
2723 }
2724
2725 #[test]
2726 fn deep_tree_round_trips() {
2727 use crate::sql::db::table::Column as TableColumn;
2731
2732 let path = tmp_path("deep_tree");
2733 let mut db = Database::new("deep".to_string());
2734 let columns = vec![
2735 TableColumn::new("id".into(), "integer".into(), true, true, true),
2736 TableColumn::new("s".into(), "text".into(), false, true, false),
2737 ];
2738 let mut table = build_empty_table("t", columns, 0);
2739 for i in 1..=6_000i64 {
2743 let body = "q".repeat(900);
2744 table
2745 .restore_row(
2746 i,
2747 vec![
2748 Some(Value::Integer(i)),
2749 Some(Value::Text(format!("r-{i}-{body}"))),
2750 ],
2751 )
2752 .unwrap();
2753 }
2754 db.tables.insert("t".to_string(), table);
2755 save_database(&mut db, &path).unwrap();
2756
2757 let loaded = open_database(&path, "deep".to_string()).unwrap();
2758 let t = loaded.get_table("t".to_string()).unwrap();
2759 assert_eq!(t.rowids().len(), 6_000);
2760
2761 let pager = loaded.pager.as_ref().unwrap();
2764 let mut master = build_empty_master_table();
2765 load_table_rows(pager, &mut master, pager.header().schema_root_page).unwrap();
2766 let t_root = master
2767 .rowids()
2768 .into_iter()
2769 .find_map(|r| match master.get_value("name", r) {
2770 Some(Value::Text(s)) if s == "t" => match master.get_value("rootpage", r) {
2771 Some(Value::Integer(p)) => Some(p as u32),
2772 _ => None,
2773 },
2774 _ => None,
2775 })
2776 .expect("t in sqlrite_master");
2777 let root_buf = pager.read_page(t_root).unwrap();
2778 assert_eq!(root_buf[0], PageType::InteriorNode as u8);
2779 let root_payload: &[u8; PAYLOAD_PER_PAGE] =
2780 (&root_buf[PAGE_HEADER_SIZE..]).try_into().unwrap();
2781 let root_interior = InteriorPage::from_bytes(root_payload);
2782 let child = root_interior.leftmost_child().unwrap();
2783 let child_buf = pager.read_page(child).unwrap();
2784 assert_eq!(
2785 child_buf[0],
2786 PageType::InteriorNode as u8,
2787 "expected 3-level tree: root's leftmost child should also be InteriorNode",
2788 );
2789
2790 cleanup(&path);
2791 }
2792
2793 #[test]
2794 fn alter_rename_table_survives_save_and_reopen() {
2795 let path = tmp_path("alter_rename_table_roundtrip");
2796 let mut db = seed_db();
2797 save_database(&mut db, &path).expect("save");
2798
2799 process_command("ALTER TABLE users RENAME TO members;", &mut db).expect("rename");
2800 save_database(&mut db, &path).expect("save after rename");
2801
2802 let loaded = open_database(&path, "t".to_string()).expect("reopen");
2803 assert!(!loaded.contains_table("users".to_string()));
2804 assert!(loaded.contains_table("members".to_string()));
2805 let members = loaded.get_table("members".to_string()).unwrap();
2806 assert_eq!(members.rowids().len(), 2, "rows should survive");
2807 assert!(
2809 members
2810 .index_by_name("sqlrite_autoindex_members_id")
2811 .is_some()
2812 );
2813 assert!(
2814 members
2815 .index_by_name("sqlrite_autoindex_members_name")
2816 .is_some()
2817 );
2818
2819 cleanup(&path);
2820 }
2821
2822 #[test]
2823 fn alter_rename_column_survives_save_and_reopen() {
2824 let path = tmp_path("alter_rename_col_roundtrip");
2825 let mut db = seed_db();
2826 save_database(&mut db, &path).expect("save");
2827
2828 process_command(
2829 "ALTER TABLE users RENAME COLUMN name TO full_name;",
2830 &mut db,
2831 )
2832 .expect("rename column");
2833 save_database(&mut db, &path).expect("save after rename");
2834
2835 let loaded = open_database(&path, "t".to_string()).expect("reopen");
2836 let users = loaded.get_table("users".to_string()).unwrap();
2837 assert!(users.contains_column("full_name".to_string()));
2838 assert!(!users.contains_column("name".to_string()));
2839 let alice_rowid = users
2841 .rowids()
2842 .into_iter()
2843 .find(|r| users.get_value("full_name", *r) == Some(Value::Text("alice".to_string())))
2844 .expect("alice row should be findable under renamed column");
2845 assert_eq!(
2846 users.get_value("full_name", alice_rowid),
2847 Some(Value::Text("alice".to_string()))
2848 );
2849
2850 cleanup(&path);
2851 }
2852
2853 #[test]
2854 fn alter_add_column_with_default_survives_save_and_reopen() {
2855 let path = tmp_path("alter_add_default_roundtrip");
2856 let mut db = seed_db();
2857 save_database(&mut db, &path).expect("save");
2858
2859 process_command(
2860 "ALTER TABLE users ADD COLUMN status TEXT DEFAULT 'active';",
2861 &mut db,
2862 )
2863 .expect("add column");
2864 save_database(&mut db, &path).expect("save after add");
2865
2866 let loaded = open_database(&path, "t".to_string()).expect("reopen");
2867 let users = loaded.get_table("users".to_string()).unwrap();
2868 assert!(users.contains_column("status".to_string()));
2869 for rowid in users.rowids() {
2870 assert_eq!(
2871 users.get_value("status", rowid),
2872 Some(Value::Text("active".to_string())),
2873 "backfilled default should round-trip for rowid {rowid}"
2874 );
2875 }
2876 let status_col = users
2879 .columns
2880 .iter()
2881 .find(|c| c.column_name == "status")
2882 .unwrap();
2883 assert_eq!(status_col.default, Some(Value::Text("active".to_string())));
2884
2885 cleanup(&path);
2886 }
2887
2888 #[test]
2889 fn alter_drop_column_survives_save_and_reopen() {
2890 let path = tmp_path("alter_drop_col_roundtrip");
2891 let mut db = seed_db();
2892 save_database(&mut db, &path).expect("save");
2893
2894 process_command("ALTER TABLE users DROP COLUMN age;", &mut db).expect("drop column");
2895 save_database(&mut db, &path).expect("save after drop");
2896
2897 let loaded = open_database(&path, "t".to_string()).expect("reopen");
2898 let users = loaded.get_table("users".to_string()).unwrap();
2899 assert!(!users.contains_column("age".to_string()));
2900 assert!(users.contains_column("name".to_string()));
2901
2902 cleanup(&path);
2903 }
2904
2905 #[test]
2906 fn drop_table_survives_save_and_reopen() {
2907 let path = tmp_path("drop_table_roundtrip");
2908 let mut db = seed_db();
2909 save_database(&mut db, &path).expect("save");
2910
2911 {
2913 let loaded = open_database(&path, "t".to_string()).expect("open");
2914 assert!(loaded.contains_table("users".to_string()));
2915 assert!(loaded.contains_table("notes".to_string()));
2916 }
2917
2918 process_command("DROP TABLE users;", &mut db).expect("drop users");
2919 save_database(&mut db, &path).expect("save after drop");
2920
2921 let loaded = open_database(&path, "t".to_string()).expect("reopen");
2922 assert!(
2923 !loaded.contains_table("users".to_string()),
2924 "dropped table should not resurface on reopen"
2925 );
2926 assert!(
2927 loaded.contains_table("notes".to_string()),
2928 "untouched table should survive"
2929 );
2930
2931 cleanup(&path);
2932 }
2933
2934 #[test]
2935 fn drop_index_survives_save_and_reopen() {
2936 let path = tmp_path("drop_index_roundtrip");
2937 let mut db = Database::new("t".to_string());
2938 process_command(
2939 "CREATE TABLE notes (id INTEGER PRIMARY KEY, body TEXT);",
2940 &mut db,
2941 )
2942 .unwrap();
2943 process_command("CREATE INDEX notes_body_idx ON notes (body);", &mut db).unwrap();
2944 save_database(&mut db, &path).expect("save");
2945
2946 process_command("DROP INDEX notes_body_idx;", &mut db).unwrap();
2947 save_database(&mut db, &path).expect("save after drop");
2948
2949 let loaded = open_database(&path, "t".to_string()).expect("reopen");
2950 let notes = loaded.get_table("notes".to_string()).unwrap();
2951 assert!(
2952 notes.index_by_name("notes_body_idx").is_none(),
2953 "dropped index should not resurface on reopen"
2954 );
2955 assert!(notes.index_by_name("sqlrite_autoindex_notes_id").is_some());
2957
2958 cleanup(&path);
2959 }
2960
2961 #[test]
2962 fn default_clause_survives_save_and_reopen() {
2963 let path = tmp_path("default_roundtrip");
2964 let mut db = Database::new("t".to_string());
2965
2966 process_command(
2967 "CREATE TABLE users (id INTEGER PRIMARY KEY, status TEXT DEFAULT 'active', score INTEGER DEFAULT 0);",
2968 &mut db,
2969 )
2970 .unwrap();
2971 save_database(&mut db, &path).expect("save");
2972
2973 let mut loaded = open_database(&path, "t".to_string()).expect("open");
2974
2975 let users = loaded.get_table("users".to_string()).expect("users table");
2977 let status_col = users
2978 .columns
2979 .iter()
2980 .find(|c| c.column_name == "status")
2981 .expect("status column");
2982 assert_eq!(
2983 status_col.default,
2984 Some(Value::Text("active".to_string())),
2985 "DEFAULT 'active' should round-trip"
2986 );
2987 let score_col = users
2988 .columns
2989 .iter()
2990 .find(|c| c.column_name == "score")
2991 .expect("score column");
2992 assert_eq!(
2993 score_col.default,
2994 Some(Value::Integer(0)),
2995 "DEFAULT 0 should round-trip"
2996 );
2997
2998 process_command("INSERT INTO users (id) VALUES (1);", &mut loaded).unwrap();
3001 let users = loaded.get_table("users".to_string()).unwrap();
3002 assert_eq!(
3003 users.get_value("status", 1),
3004 Some(Value::Text("active".to_string()))
3005 );
3006 assert_eq!(users.get_value("score", 1), Some(Value::Integer(0)));
3007
3008 cleanup(&path);
3009 }
3010
3011 #[test]
3020 fn drop_table_freelist_persists_pages_for_reuse() {
3021 let path = tmp_path("freelist_reuse");
3022 let mut db = seed_db();
3023 db.source_path = Some(path.clone());
3024 save_database(&mut db, &path).expect("save");
3025 let pages_two_tables = db.pager.as_ref().unwrap().header().page_count;
3026
3027 process_command("DROP TABLE users;", &mut db).expect("drop users");
3029 let pages_after_drop = db.pager.as_ref().unwrap().header().page_count;
3030 assert_eq!(
3031 pages_after_drop, pages_two_tables,
3032 "page_count should not shrink on drop — the freed pages persist on the freelist"
3033 );
3034 let head_after_drop = db.pager.as_ref().unwrap().header().freelist_head;
3035 assert!(
3036 head_after_drop != 0,
3037 "freelist_head must be non-zero after drop"
3038 );
3039
3040 process_command(
3042 "CREATE TABLE accounts (id INTEGER PRIMARY KEY, label TEXT NOT NULL UNIQUE);",
3043 &mut db,
3044 )
3045 .expect("create accounts");
3046 process_command("INSERT INTO accounts (label) VALUES ('a');", &mut db).unwrap();
3047 process_command("INSERT INTO accounts (label) VALUES ('b');", &mut db).unwrap();
3048 let pages_after_create = db.pager.as_ref().unwrap().header().page_count;
3049 assert!(
3050 pages_after_create <= pages_two_tables + 2,
3051 "creating a similar-sized table after a drop should mostly draw from the \
3052 freelist, not extend the file (got {pages_after_create} > {pages_two_tables} + 2)"
3053 );
3054
3055 cleanup(&path);
3056 }
3057
3058 #[test]
3060 fn drop_then_vacuum_shrinks_file() {
3061 let path = tmp_path("vacuum_shrinks");
3062 let mut db = seed_db();
3063 db.source_path = Some(path.clone());
3064 for i in 0..20 {
3066 process_command(
3067 &format!("INSERT INTO users (name, age) VALUES ('user{i}', {i});"),
3068 &mut db,
3069 )
3070 .unwrap();
3071 }
3072 save_database(&mut db, &path).expect("save");
3073
3074 process_command("DROP TABLE users;", &mut db).expect("drop");
3075 let size_before_vacuum = std::fs::metadata(&path).unwrap().len();
3076 let pages_before_vacuum = db.pager.as_ref().unwrap().header().page_count;
3077 let head_before = db.pager.as_ref().unwrap().header().freelist_head;
3078 assert!(head_before != 0, "drop should populate the freelist");
3079
3080 process_command("VACUUM;", &mut db).expect("vacuum");
3083
3084 let size_after = std::fs::metadata(&path).unwrap().len();
3085 let pages_after = db.pager.as_ref().unwrap().header().page_count;
3086 let head_after = db.pager.as_ref().unwrap().header().freelist_head;
3087 assert!(
3088 pages_after < pages_before_vacuum,
3089 "VACUUM must reduce page_count: was {pages_before_vacuum}, now {pages_after}"
3090 );
3091 assert_eq!(head_after, 0, "VACUUM must clear the freelist");
3092 assert!(
3093 size_after < size_before_vacuum,
3094 "VACUUM must shrink the file on disk: was {size_before_vacuum} bytes, now {size_after}"
3095 );
3096
3097 cleanup(&path);
3098 }
3099
3100 #[test]
3102 fn vacuum_round_trips_data() {
3103 let path = tmp_path("vacuum_round_trip");
3104 let mut db = seed_db();
3105 db.source_path = Some(path.clone());
3106 save_database(&mut db, &path).expect("save");
3107 process_command("VACUUM;", &mut db).expect("vacuum");
3108
3109 drop(db);
3111 let loaded = open_database(&path, "t".to_string()).expect("reopen after vacuum");
3112 assert!(loaded.contains_table("users".to_string()));
3113 assert!(loaded.contains_table("notes".to_string()));
3114 let users = loaded.get_table("users".to_string()).unwrap();
3115 assert_eq!(users.rowids().len(), 2);
3117
3118 cleanup(&path);
3119 }
3120
3121 #[test]
3125 fn freelist_format_version_promotion() {
3126 use crate::sql::pager::header::{FORMAT_VERSION_BASELINE, FORMAT_VERSION_V6};
3127 let path = tmp_path("v6_promotion");
3128 let mut db = seed_db();
3129 db.source_path = Some(path.clone());
3130 save_database(&mut db, &path).expect("save");
3131 let v_after_save = db.pager.as_ref().unwrap().header().format_version;
3132 assert_eq!(
3133 v_after_save, FORMAT_VERSION_BASELINE,
3134 "fresh DB without drops should stay at the baseline version"
3135 );
3136
3137 process_command("DROP TABLE users;", &mut db).expect("drop");
3138 let v_after_drop = db.pager.as_ref().unwrap().header().format_version;
3139 assert_eq!(
3140 v_after_drop, FORMAT_VERSION_V6,
3141 "first save with a non-empty freelist must promote to V6"
3142 );
3143
3144 process_command("VACUUM;", &mut db).expect("vacuum");
3145 let v_after_vacuum = db.pager.as_ref().unwrap().header().format_version;
3146 assert_eq!(
3147 v_after_vacuum, FORMAT_VERSION_V6,
3148 "VACUUM must not downgrade — V6 is a strict superset"
3149 );
3150
3151 cleanup(&path);
3152 }
3153
3154 #[test]
3158 fn freelist_round_trip_through_reopen() {
3159 let path = tmp_path("freelist_reopen");
3160 let pages_two_tables;
3161 {
3162 let mut db = seed_db();
3163 db.source_path = Some(path.clone());
3164 save_database(&mut db, &path).expect("save");
3165 pages_two_tables = db.pager.as_ref().unwrap().header().page_count;
3166 process_command("DROP TABLE users;", &mut db).expect("drop");
3167 let head = db.pager.as_ref().unwrap().header().freelist_head;
3168 assert!(head != 0, "drop must populate the freelist");
3169 }
3170
3171 let mut db = open_database(&path, "t".to_string()).expect("reopen");
3173 assert!(
3174 db.pager.as_ref().unwrap().header().freelist_head != 0,
3175 "freelist_head must survive close/reopen"
3176 );
3177
3178 process_command(
3179 "CREATE TABLE accounts (id INTEGER PRIMARY KEY, label TEXT NOT NULL UNIQUE);",
3180 &mut db,
3181 )
3182 .expect("create accounts");
3183 process_command("INSERT INTO accounts (label) VALUES ('reopened');", &mut db).unwrap();
3184 let pages_after_create = db.pager.as_ref().unwrap().header().page_count;
3185 assert!(
3186 pages_after_create <= pages_two_tables + 2,
3187 "post-reopen create should reuse freelist (got {pages_after_create} > \
3188 {pages_two_tables} + 2 — file extended instead of reusing)"
3189 );
3190
3191 cleanup(&path);
3192 }
3193
3194 #[test]
3197 fn vacuum_inside_transaction_is_rejected() {
3198 let path = tmp_path("vacuum_txn");
3199 let mut db = seed_db();
3200 db.source_path = Some(path.clone());
3201 save_database(&mut db, &path).expect("save");
3202
3203 process_command("BEGIN;", &mut db).expect("begin");
3204 let err = process_command("VACUUM;", &mut db).unwrap_err();
3205 assert!(
3206 format!("{err}").contains("VACUUM cannot run inside a transaction"),
3207 "expected in-transaction rejection, got: {err}"
3208 );
3209 process_command("ROLLBACK;", &mut db).unwrap();
3211 cleanup(&path);
3212 }
3213
3214 #[test]
3216 fn vacuum_on_in_memory_database_is_noop() {
3217 let mut db = Database::new("mem".to_string());
3218 process_command("CREATE TABLE t (id INTEGER PRIMARY KEY);", &mut db).unwrap();
3219 let out = process_command("VACUUM;", &mut db).expect("vacuum no-op");
3220 assert!(
3221 out.to_lowercase().contains("no-op") || out.to_lowercase().contains("in-memory"),
3222 "expected no-op message for in-memory VACUUM, got: {out}"
3223 );
3224 }
3225
3226 #[test]
3231 fn unchanged_table_pages_skip_diff_after_unrelated_drop() {
3232 let path = tmp_path("diff_after_drop");
3237 let mut db = Database::new("t".to_string());
3238 db.source_path = Some(path.clone());
3239 process_command(
3240 "CREATE TABLE accounts (id INTEGER PRIMARY KEY, label TEXT);",
3241 &mut db,
3242 )
3243 .unwrap();
3244 process_command(
3245 "CREATE TABLE notes (id INTEGER PRIMARY KEY, body TEXT);",
3246 &mut db,
3247 )
3248 .unwrap();
3249 process_command(
3250 "CREATE TABLE users (id INTEGER PRIMARY KEY, name TEXT);",
3251 &mut db,
3252 )
3253 .unwrap();
3254 for i in 0..5 {
3255 process_command(
3256 &format!("INSERT INTO accounts (label) VALUES ('a{i}');"),
3257 &mut db,
3258 )
3259 .unwrap();
3260 process_command(
3261 &format!("INSERT INTO notes (body) VALUES ('n{i}');"),
3262 &mut db,
3263 )
3264 .unwrap();
3265 process_command(
3266 &format!("INSERT INTO users (name) VALUES ('u{i}');"),
3267 &mut db,
3268 )
3269 .unwrap();
3270 }
3271 save_database(&mut db, &path).expect("baseline save");
3272
3273 let pager = db.pager.as_ref().unwrap();
3276 let acc_root = read_old_rootpages(pager, pager.header().schema_root_page)
3277 .unwrap()
3278 .get(&("table".to_string(), "accounts".to_string()))
3279 .copied()
3280 .unwrap();
3281 let users_root = read_old_rootpages(pager, pager.header().schema_root_page)
3282 .unwrap()
3283 .get(&("table".to_string(), "users".to_string()))
3284 .copied()
3285 .unwrap();
3286 let acc_bytes_before: Vec<u8> = pager.read_page(acc_root).unwrap().to_vec();
3287 let users_bytes_before: Vec<u8> = pager.read_page(users_root).unwrap().to_vec();
3288
3289 process_command("DROP TABLE notes;", &mut db).expect("drop notes");
3291
3292 let pager = db.pager.as_ref().unwrap();
3293 let acc_after = pager.read_page(acc_root).unwrap();
3296 let users_after = pager.read_page(users_root).unwrap();
3297 assert_eq!(
3298 &acc_after[..],
3299 &acc_bytes_before[..],
3300 "accounts root page must not be rewritten when an unrelated table is dropped"
3301 );
3302 assert_eq!(
3303 &users_after[..],
3304 &users_bytes_before[..],
3305 "users root page must not be rewritten when an unrelated table is dropped"
3306 );
3307
3308 cleanup(&path);
3309 }
3310
3311 #[test]
3314 fn vacuum_modifiers_are_rejected() {
3315 let path = tmp_path("vacuum_modifiers");
3316 let mut db = seed_db();
3317 db.source_path = Some(path.clone());
3318 save_database(&mut db, &path).expect("save");
3319 for stmt in ["VACUUM FULL;", "VACUUM users;"] {
3320 let err = process_command(stmt, &mut db).unwrap_err();
3321 assert!(
3322 format!("{err}").contains("VACUUM modifiers"),
3323 "expected modifier rejection for `{stmt}`, got: {err}"
3324 );
3325 }
3326 cleanup(&path);
3327 }
3328}