1use crate::errors::AppError;
4use crate::output;
5use crate::paths::AppPaths;
6use crate::storage::connection::open_rw;
7use chrono::Utc;
8use rusqlite::OptionalExtension;
9use serde::Serialize;
10use siphasher::sip::SipHasher13;
11use std::hash::{Hash, Hasher};
12use std::path::Path;
13
14#[derive(clap::Args)]
15#[command(after_long_help = "EXAMPLES:\n \
16 # Apply pending schema migrations\n \
17 sqlite-graphrag migrate\n\n \
18 # Show already-applied migrations without applying new ones\n \
19 sqlite-graphrag migrate --status\n\n \
20 # Migrate a database at a custom path\n \
21 sqlite-graphrag migrate --db /path/to/graphrag.sqlite\n\n \
22 # Rewrite recorded migration checksums to match the current file content.\n \
23 # Use this after upgrading across a version that intentionally changed a\n \
24 # migration file (v1.0.76 is the first release where this is exposed).\n \
25 sqlite-graphrag migrate --rehash\n\n \
26 # Full upgrade: rehash, apply V013 (drop vec tables), verify schema.\n \
27 # Required once for users upgrading from v1.0.74 or v1.0.75.\n \
28 sqlite-graphrag migrate --to-llm-only")]
29pub struct MigrateArgs {
30 #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
31 pub db: Option<String>,
32 #[arg(long, default_value_t = false)]
34 pub json: bool,
35 #[arg(long, default_value_t = false)]
37 pub status: bool,
38 #[arg(long, default_value_t = false)]
41 pub rehash: bool,
42 #[arg(long, default_value_t = false)]
46 pub to_llm_only: bool,
47 #[arg(long, default_value_t = false)]
53 pub drop_vec_tables: bool,
54 #[arg(long, default_value_t = false)]
60 pub dry_run: bool,
61 #[arg(long, default_value_t = false)]
69 pub confirm: bool,
70}
71
72#[derive(Serialize)]
73struct MigrateResponse {
74 db_path: String,
75 schema_version: u32,
78 status: String,
79 elapsed_ms: u64,
81}
82
83#[derive(Serialize)]
84struct MigrateStatusResponse {
85 db_path: String,
86 applied_migrations: Vec<MigrationEntry>,
87 schema_version: u32,
89 elapsed_ms: u64,
90}
91
92#[derive(Serialize)]
93struct DryRunReport {
94 db_path: String,
95 schema_version: u32,
96 pending_migrations: Vec<MigrationEntry>,
99 pending_count: u32,
101 checksum_mismatches: Vec<RehashEntry>,
104 status: String,
109 elapsed_ms: u64,
110}
111
112#[derive(Serialize)]
113struct MigrationEntry {
114 version: i64,
115 name: String,
116 applied_on: Option<String>,
117 #[serde(skip_serializing_if = "Option::is_none")]
118 checksum: Option<String>,
119}
120
121#[derive(Serialize)]
122struct RehashReport {
123 db_path: String,
124 schema_version: u32,
125 rewritten: Vec<RehashEntry>,
128 inspected: usize,
130 null_rows_fixed: u64,
132 v013_tables_created: bool,
134 status: String,
135 elapsed_ms: u64,
136}
137
138#[derive(Serialize, Debug)]
139struct RehashEntry {
140 version: i64,
141 name: String,
142 old_checksum: String,
143 new_checksum: String,
144}
145
146#[derive(Serialize)]
147struct ToLlmOnlyReport {
148 db_path: String,
149 schema_version: u32,
150 rehashed: Vec<RehashEntry>,
151 vec_tables_were_present: bool,
154 v013_applied: bool,
156 null_rows_fixed: u64,
158 vec_tables_removed_via_writable_schema: usize,
161 v013_tables_created: bool,
163 status: String,
164 elapsed_ms: u64,
165}
166
167pub fn run(args: MigrateArgs) -> Result<(), AppError> {
168 let start = std::time::Instant::now();
169 let _ = args.json; let paths = AppPaths::resolve(args.db.as_deref())?;
171 paths.ensure_dirs()?;
172
173 if args.status && (args.rehash || args.to_llm_only) {
174 return Err(AppError::Validation(
175 "--status cannot be combined with --rehash or --to-llm-only".into(),
176 ));
177 }
178 if args.rehash && args.to_llm_only {
179 return Err(AppError::Validation(
180 "--rehash and --to-llm-only are mutually exclusive".into(),
181 ));
182 }
183 if args.to_llm_only && !args.drop_vec_tables {
184 return Err(AppError::Validation(
185 "--to-llm-only requires --drop-vec-tables to acknowledge the destructive drop".into(),
186 ));
187 }
188 if args.dry_run && (args.rehash || args.to_llm_only) {
189 return Err(AppError::Validation(
190 "--dry-run cannot be combined with --rehash or --to-llm-only".into(),
191 ));
192 }
193 if args.confirm && args.dry_run {
194 return Err(AppError::Validation(
195 "--confirm cannot be combined with --dry-run".into(),
196 ));
197 }
198
199 let mut conn = open_rw(&paths.db)?;
200
201 if args.status {
202 let schema_version = latest_schema_version(&conn).unwrap_or(0);
203 let applied = list_applied_migrations(&conn)?;
204 output::emit_json(&MigrateStatusResponse {
205 db_path: paths.db.display().to_string(),
206 applied_migrations: applied,
207 schema_version,
208 elapsed_ms: start.elapsed().as_millis() as u64,
209 })?;
210 return Ok(());
211 }
212
213 if args.rehash {
214 let report = run_rehash(&mut conn, &paths.db)?;
215 output::emit_json(&report)?;
216 return Ok(());
217 }
218
219 if args.to_llm_only {
220 let report = run_to_llm_only(&mut conn, &paths.db)?;
221 output::emit_json(&report)?;
222 return Ok(());
223 }
224
225 if args.dry_run {
226 let report = run_dry_run(&conn, &paths.db)?;
227 output::emit_json(&report)?;
228 return Ok(());
229 }
230
231 sanitize_null_applied_on(&conn)?;
232 ensure_v013_tables_exist(&conn)?;
233
234 crate::migrations::runner()
235 .run(&mut conn)
236 .map_err(|e| AppError::Internal(anyhow::anyhow!("migration failed: {e}")))?;
237
238 conn.execute_batch(&format!(
239 "PRAGMA user_version = {};",
240 crate::constants::SCHEMA_USER_VERSION
241 ))?;
242
243 let schema_version = latest_schema_version(&conn)?;
244 conn.execute(
245 "INSERT OR REPLACE INTO schema_meta (key, value) VALUES ('schema_version', ?1)",
246 rusqlite::params![schema_version],
247 )?;
248
249 output::emit_json(&MigrateResponse {
250 db_path: paths.db.display().to_string(),
251 schema_version,
252 status: "ok".to_string(),
253 elapsed_ms: start.elapsed().as_millis() as u64,
254 })?;
255
256 Ok(())
257}
258
259fn compute_checksum(name: &str, version: i32, sql: &str) -> u64 {
268 let mut hasher = SipHasher13::new();
269 name.hash(&mut hasher);
270 version.hash(&mut hasher);
271 sql.hash(&mut hasher);
272 hasher.finish()
273}
274
275fn run_dry_run(conn: &rusqlite::Connection, db_path: &Path) -> Result<DryRunReport, AppError> {
281 let start = std::time::Instant::now();
282 let schema_version = latest_schema_version(conn).unwrap_or(0);
283
284 let applied_versions: std::collections::BTreeSet<i32> = if history_table_exists(conn) {
288 let mut stmt = conn
289 .prepare_cached("SELECT version FROM refinery_schema_history")
290 .map_err(AppError::Database)?;
291 let rows = stmt
292 .query_map([], |r| r.get::<_, i64>(0))
293 .map_err(AppError::Database)?;
294 rows.filter_map(|r| r.ok()).map(|v| v as i32).collect()
295 } else {
296 std::collections::BTreeSet::new()
297 };
298
299 let mut pending: Vec<MigrationEntry> = Vec::new();
303 let mut mismatches: Vec<RehashEntry> = Vec::new();
304
305 for mig in crate::migrations::runner().get_migrations().iter() {
306 let name = mig.name().to_string();
307 let version = mig.version();
308 let sql = mig.sql().unwrap_or("").to_string();
309
310 if !applied_versions.contains(&version) {
311 pending.push(MigrationEntry {
313 version: version as i64,
314 name,
315 applied_on: None,
316 checksum: None,
317 });
318 continue;
319 }
320
321 let new_checksum = compute_checksum(&name, version, &sql).to_string();
323 if let Ok(existing) = conn.query_row(
324 "SELECT checksum FROM refinery_schema_history WHERE version = ?1",
325 rusqlite::params![version],
326 |r| r.get::<_, String>(0),
327 ) {
328 let existing_trim = existing.trim();
329 if existing_trim != new_checksum {
330 mismatches.push(RehashEntry {
331 version: version as i64,
332 name,
333 old_checksum: existing_trim.to_string(),
334 new_checksum,
335 });
336 }
337 }
338 }
339
340 let pending_count = pending.len() as u32;
341 let status = if !mismatches.is_empty() && pending.is_empty() {
342 "ok_checksum_drift"
343 } else if pending.is_empty() {
344 "ok_no_pending"
345 } else {
346 "ok_pending"
347 };
348
349 Ok(DryRunReport {
350 db_path: db_path.display().to_string(),
351 schema_version,
352 pending_migrations: pending,
353 pending_count,
354 checksum_mismatches: mismatches,
355 status: status.to_string(),
356 elapsed_ms: start.elapsed().as_millis() as u64,
357 })
358}
359
360fn run_rehash(conn: &mut rusqlite::Connection, db_path: &Path) -> Result<RehashReport, AppError> {
361 let start = std::time::Instant::now();
362 let schema_version = latest_schema_version(conn).unwrap_or(0);
363
364 if !history_table_exists(conn) {
365 return Ok(RehashReport {
366 db_path: db_path.display().to_string(),
367 schema_version,
368 rewritten: vec![],
369 inspected: 0,
370 null_rows_fixed: 0,
371 v013_tables_created: false,
372 status: "ok_no_history".to_string(),
373 elapsed_ms: start.elapsed().as_millis() as u64,
374 });
375 }
376
377 let null_rows_fixed = sanitize_null_applied_on(conn)?;
378 let v013_tables_created = ensure_v013_tables_exist(conn)?;
379
380 let mut rewritten: Vec<RehashEntry> = Vec::new();
381 let mut inspected = 0usize;
382
383 for mig in crate::migrations::runner().get_migrations().iter() {
384 if mig.sql().is_none() {
385 continue;
386 }
387 let name = mig.name().to_string();
388 let version = mig.version();
389 let sql = mig.sql().unwrap_or("").to_string();
390 let new_checksum = compute_checksum(&name, version, &sql);
391
392 let row: Option<String> = conn
393 .query_row(
394 "SELECT checksum FROM refinery_schema_history WHERE version = ?1",
395 rusqlite::params![version],
396 |r| r.get(0),
397 )
398 .optional()?;
399
400 inspected += 1;
401 if let Some(existing) = row {
402 let existing_trim = existing.trim();
403 let new_str = new_checksum.to_string();
404 if existing_trim != new_str {
405 conn.execute(
406 "UPDATE refinery_schema_history SET checksum = ?1 WHERE version = ?2",
407 rusqlite::params![new_str, version],
408 )?;
409 rewritten.push(RehashEntry {
410 version: version as i64,
411 name,
412 old_checksum: existing_trim.to_string(),
413 new_checksum: new_str,
414 });
415 }
416 }
417 }
422
423 let status = if rewritten.is_empty() {
424 "ok_no_changes"
425 } else {
426 "ok_rewritten"
427 };
428
429 Ok(RehashReport {
430 db_path: db_path.display().to_string(),
431 schema_version,
432 rewritten,
433 inspected,
434 null_rows_fixed,
435 v013_tables_created,
436 status: status.to_string(),
437 elapsed_ms: start.elapsed().as_millis() as u64,
438 })
439}
440
441fn run_to_llm_only(
442 conn: &mut rusqlite::Connection,
443 db_path: &Path,
444) -> Result<ToLlmOnlyReport, AppError> {
445 let start = std::time::Instant::now();
446
447 let vec_tables_were_present: bool = {
451 let count: i64 = conn
452 .query_row(
453 "SELECT COUNT(*) FROM sqlite_master
454 WHERE type='table' AND name IN ('vec_memories','vec_entities','vec_chunks')",
455 [],
456 |r| r.get(0),
457 )
458 .unwrap_or(0);
459 count > 0
460 };
461
462 let null_rows_fixed = sanitize_null_applied_on(conn)?;
464
465 let v013_tables_created = ensure_v013_tables_exist(conn)?;
467
468 let vec_tables_removed = if vec_tables_were_present {
470 remove_vec_virtual_tables_without_module(conn)?
471 } else {
472 0
473 };
474
475 let rehash_report = run_rehash(conn, db_path)?;
477 let rehashed = rehash_report.rewritten;
478
479 crate::migrations::runner()
484 .run(conn)
485 .map_err(|e| AppError::Internal(anyhow::anyhow!("migration failed: {e}")))?;
486
487 conn.execute_batch(&format!(
488 "PRAGMA user_version = {};",
489 crate::constants::SCHEMA_USER_VERSION
490 ))?;
491
492 let schema_version = latest_schema_version(conn)?;
493 conn.execute(
494 "INSERT OR REPLACE INTO schema_meta (key, value) VALUES ('schema_version', ?1)",
495 rusqlite::params![schema_version],
496 )?;
497
498 let v013_applied = schema_version >= 13;
501
502 Ok(ToLlmOnlyReport {
503 db_path: db_path.display().to_string(),
504 schema_version,
505 rehashed,
506 vec_tables_were_present,
507 v013_applied,
508 null_rows_fixed,
509 vec_tables_removed_via_writable_schema: vec_tables_removed,
510 v013_tables_created,
511 status: "ok".to_string(),
512 elapsed_ms: start.elapsed().as_millis() as u64,
513 })
514}
515
516fn history_table_exists(conn: &rusqlite::Connection) -> bool {
517 conn.query_row(
518 "SELECT name FROM sqlite_master WHERE type='table' AND name='refinery_schema_history'",
519 [],
520 |r| r.get::<_, String>(0),
521 )
522 .optional()
523 .ok()
524 .flatten()
525 .is_some()
526}
527
528fn sanitize_null_applied_on(conn: &rusqlite::Connection) -> Result<u64, AppError> {
529 if !history_table_exists(conn) {
530 return Ok(0);
531 }
532 let now = Utc::now().to_rfc3339();
533 let fixed = conn.execute(
534 "UPDATE refinery_schema_history SET applied_on = ?1 WHERE applied_on IS NULL",
535 rusqlite::params![now],
536 )?;
537 Ok(fixed as u64)
538}
539
540fn remove_vec_virtual_tables_without_module(
541 conn: &rusqlite::Connection,
542) -> Result<usize, AppError> {
543 let count: i64 = conn
544 .query_row(
545 "SELECT COUNT(*) FROM sqlite_master
546 WHERE type='table' AND name IN ('vec_memories','vec_entities','vec_chunks')",
547 [],
548 |r| r.get(0),
549 )
550 .unwrap_or(0);
551 if count == 0 {
552 return Ok(0);
553 }
554
555 let drop_works = conn
556 .execute_batch("DROP TABLE IF EXISTS vec_memories;")
557 .is_ok();
558 if drop_works {
559 let _ = conn.execute_batch("DROP TABLE IF EXISTS vec_entities;");
560 let _ = conn.execute_batch("DROP TABLE IF EXISTS vec_chunks;");
561 return Ok(count as usize);
562 }
563
564 conn.execute_batch("PRAGMA writable_schema = ON;")?;
565 let removed = conn.execute(
566 "DELETE FROM sqlite_master WHERE type='table'
567 AND (name LIKE 'vec_memories%' OR name LIKE 'vec_entities%' OR name LIKE 'vec_chunks%')",
568 [],
569 )?;
570 conn.execute_batch("PRAGMA writable_schema = OFF;")?;
571 conn.execute_batch("VACUUM;")?;
572
573 Ok(removed)
574}
575
576pub(crate) fn ensure_v013_tables_exist(conn: &rusqlite::Connection) -> Result<bool, AppError> {
580 let exists: bool = conn
581 .query_row(
582 "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='memory_embeddings'",
583 [],
584 |r| r.get::<_, i64>(0),
585 )
586 .unwrap_or(0)
587 > 0;
588 if exists {
589 return Ok(false);
590 }
591
592 if !history_table_exists(conn) {
593 return Ok(false);
594 }
595 let v013_in_history: bool = conn
596 .query_row(
597 "SELECT COUNT(*) FROM refinery_schema_history WHERE version = 13",
598 [],
599 |r| r.get::<_, i64>(0),
600 )
601 .unwrap_or(0)
602 > 0;
603 if !v013_in_history {
604 return Ok(false);
605 }
606
607 let v013_sql = crate::migrations::runner()
608 .get_migrations()
609 .iter()
610 .find(|m| m.version() == 13)
611 .and_then(|m| m.sql().map(|s| s.to_string()));
612
613 if let Some(sql) = v013_sql {
614 conn.execute_batch(&sql)?;
615 tracing::warn!(
616 "G41 repair: V013 was registered but tables missing. \
617 Executed V013 SQL to create embedding tables."
618 );
619 Ok(true)
620 } else {
621 Err(AppError::Internal(anyhow::anyhow!(
622 "V013 migration SQL not found in embedded migrations"
623 )))
624 }
625}
626
627fn list_applied_migrations(conn: &rusqlite::Connection) -> Result<Vec<MigrationEntry>, AppError> {
628 let table_exists: Option<String> = conn
629 .query_row(
630 "SELECT name FROM sqlite_master WHERE type='table' AND name='refinery_schema_history'",
631 [],
632 |r| r.get(0),
633 )
634 .optional()?;
635 if table_exists.is_none() {
636 return Ok(vec![]);
637 }
638 let mut stmt = conn.prepare_cached(
639 "SELECT version, name, applied_on, checksum FROM refinery_schema_history ORDER BY version ASC",
640 )?;
641 let entries = stmt
642 .query_map([], |r| {
643 let checksum: Option<String> = r.get(3)?;
644 Ok(MigrationEntry {
645 version: r.get(0)?,
646 name: r.get(1)?,
647 applied_on: r.get(2)?,
648 checksum: checksum
649 .map(|s| s.trim().to_string())
650 .filter(|s| !s.is_empty()),
651 })
652 })?
653 .collect::<Result<Vec<_>, _>>()?;
654 Ok(entries)
655}
656
657fn latest_schema_version(conn: &rusqlite::Connection) -> Result<u32, AppError> {
658 match conn.query_row(
659 "SELECT version FROM refinery_schema_history ORDER BY version DESC LIMIT 1",
660 [],
661 |row| row.get::<_, i64>(0),
662 ) {
663 Ok(version) => Ok(version.max(0) as u32),
664 Err(rusqlite::Error::QueryReturnedNoRows) => Ok(0),
665 Err(err) => Err(AppError::Database(err)),
666 }
667}
668
669#[cfg(test)]
670mod tests {
671 use super::*;
672 use rusqlite::Connection;
673
674 fn create_db_without_history() -> Connection {
675 Connection::open_in_memory().expect("failed to open in-memory db")
676 }
677
678 fn create_db_with_history(version: i64) -> Connection {
679 let conn = Connection::open_in_memory().expect("failed to open in-memory db");
680 conn.execute_batch(
681 "CREATE TABLE refinery_schema_history (
682 version INTEGER NOT NULL,
683 name TEXT,
684 applied_on TEXT,
685 checksum TEXT
686 );",
687 )
688 .expect("failed to create history table");
689 conn.execute(
690 "INSERT INTO refinery_schema_history (version, name) VALUES (?1, 'V001__init')",
691 rusqlite::params![version],
692 )
693 .expect("failed to insert version");
694 conn
695 }
696
697 #[test]
698 fn latest_schema_version_returns_error_without_table() {
699 let conn = create_db_without_history();
700 let result = latest_schema_version(&conn);
701 assert!(result.is_err(), "must return Err when table does not exist");
702 }
703
704 #[test]
705 fn latest_schema_version_returns_max_version() {
706 let conn = create_db_with_history(6);
707 let version = latest_schema_version(&conn).unwrap();
708 assert_eq!(version, 6u32);
709 }
710
711 #[test]
712 fn migrate_response_serializes_required_fields() {
713 let resp = MigrateResponse {
714 db_path: "/tmp/test.sqlite".to_string(),
715 schema_version: 6,
716 status: "ok".to_string(),
717 elapsed_ms: 12,
718 };
719 let json = serde_json::to_value(&resp).unwrap();
720 assert_eq!(json["status"], "ok");
721 assert_eq!(json["schema_version"], 6);
722 assert_eq!(json["db_path"], "/tmp/test.sqlite");
723 assert_eq!(json["elapsed_ms"], 12);
724 }
725
726 #[test]
727 fn latest_schema_version_returns_zero_when_table_empty() {
728 let conn = Connection::open_in_memory().expect("in-memory db");
729 conn.execute_batch(
730 "CREATE TABLE refinery_schema_history (
731 version INTEGER NOT NULL,
732 name TEXT
733 );",
734 )
735 .expect("table creation");
736 let version = latest_schema_version(&conn).unwrap();
737 assert_eq!(version, 0u32);
738 }
739
740 #[test]
741 fn compute_checksum_is_deterministic_and_matches_refinery() {
742 let a = compute_checksum("vec_tables", 2, "SELECT 1;");
746 let b = compute_checksum("vec_tables", 2, "SELECT 1;");
747 assert_eq!(a, b, "checksum must be deterministic");
748 let c = compute_checksum("vec_tables", 2, "SELECT 1;\n");
749 assert_ne!(
750 a, c,
751 "trailing newline must change the checksum (matches refinery)"
752 );
753 }
754
755 #[test]
756 fn rehash_with_no_history_returns_empty() {
757 let mut conn = create_db_without_history();
758 let report = run_rehash(&mut conn, Path::new("/tmp/empty.sqlite")).unwrap();
759 assert_eq!(report.status, "ok_no_history");
760 assert!(report.rewritten.is_empty());
761 assert_eq!(report.inspected, 0);
762 }
763
764 #[test]
765 fn rehash_writes_matching_checksum() {
766 let mut conn = Connection::open_in_memory().expect("in-memory db");
769 conn.execute_batch(
770 "CREATE TABLE refinery_schema_history (
771 version INTEGER NOT NULL,
772 name TEXT,
773 applied_on TEXT,
774 checksum TEXT
775 );",
776 )
777 .expect("history create");
778 let first = crate::migrations::runner().get_migrations()[0].clone();
780 let v = first.version();
781 let name = first.name().to_string();
782 let sql = first.sql().unwrap_or("").to_string();
783 let correct = compute_checksum(&name, v, &sql).to_string();
784 let wrong = "1234567890";
785 assert_ne!(correct, wrong, "test sanity: correct != wrong");
786
787 conn.execute(
788 "INSERT INTO refinery_schema_history (version, name, checksum) VALUES (?1, ?2, ?3)",
789 rusqlite::params![v, name, wrong],
790 )
791 .expect("insert");
792
793 let report = run_rehash(&mut conn, Path::new("/tmp/test.sqlite")).unwrap();
794 assert_eq!(report.rewritten.len(), 1);
795 assert_eq!(report.rewritten[0].old_checksum, wrong);
796 assert_eq!(report.rewritten[0].new_checksum, correct);
797
798 let stored: String = conn
800 .query_row(
801 "SELECT checksum FROM refinery_schema_history WHERE version = ?1",
802 rusqlite::params![v],
803 |r| r.get(0),
804 )
805 .unwrap();
806 assert_eq!(stored, correct);
807 }
808
809 #[test]
810 fn rehash_is_idempotent_when_checksums_match() {
811 let mut conn = Connection::open_in_memory().expect("in-memory db");
812 conn.execute_batch(
813 "CREATE TABLE refinery_schema_history (
814 version INTEGER NOT NULL,
815 name TEXT,
816 applied_on TEXT,
817 checksum TEXT
818 );",
819 )
820 .unwrap();
821 let first = crate::migrations::runner().get_migrations()[0].clone();
822 let v = first.version();
823 let name = first.name().to_string();
824 let sql = first.sql().unwrap_or("").to_string();
825 let correct = compute_checksum(&name, v, &sql).to_string();
826 conn.execute(
827 "INSERT INTO refinery_schema_history (version, name, checksum) VALUES (?1, ?2, ?3)",
828 rusqlite::params![v, name, correct.clone()],
829 )
830 .unwrap();
831
832 let report = run_rehash(&mut conn, Path::new("/tmp/test.sqlite")).unwrap();
833 assert!(
834 report.rewritten.is_empty(),
835 "must not rewrite matching rows"
836 );
837 assert_eq!(report.status, "ok_no_changes");
838 }
839
840 #[test]
841 fn rehash_matches_refinery_embedded_checksum_for_v001() {
842 let dir = tempfile::tempdir().expect("tempdir");
848 let path = dir.path().join("test.sqlite");
849 let mut conn = open_rw(&path).expect("open_rw");
850 crate::migrations::runner().run(&mut conn).expect("migrate");
851 let stored: String = conn
852 .query_row(
853 "SELECT checksum FROM refinery_schema_history WHERE version = 1",
854 [],
855 |r| r.get(0),
856 )
857 .unwrap();
858 let report = run_rehash(&mut conn, &path).expect("rehash");
859 assert!(
860 report.rewritten.is_empty(),
861 "V001 must NOT be rewritten when checksums already match: rewrote={:?}",
862 report.rewritten
863 );
864 crate::migrations::runner()
868 .run(&mut conn)
869 .expect("re-run migrate must succeed");
870 let stored_after: String = conn
871 .query_row(
872 "SELECT checksum FROM refinery_schema_history WHERE version = 1",
873 [],
874 |r| r.get(0),
875 )
876 .unwrap();
877 assert_eq!(
878 stored, stored_after,
879 "checksum must not change after rehash"
880 );
881 }
882
883 #[test]
884 fn to_llm_only_reports_no_vec_tables_on_fresh_db() {
885 let dir = tempfile::tempdir().expect("tempdir");
888 let path = dir.path().join("fresh.sqlite");
889 let mut conn = open_rw(&path).expect("open_rw");
890 crate::migrations::runner().run(&mut conn).expect("migrate");
891 let report = run_to_llm_only(&mut conn, &path).expect("to_llm_only");
892 assert!(!report.vec_tables_were_present);
893 assert!(report.v013_applied, "V013 must be marked applied");
894 assert_eq!(report.status, "ok");
895 }
896
897 #[test]
898 fn history_table_exists_detects_table() {
899 let conn = create_db_with_history(1);
900 assert!(history_table_exists(&conn));
901 let conn2 = create_db_without_history();
902 assert!(!history_table_exists(&conn2));
903 }
904
905 #[test]
906 fn sanitize_null_applied_on_fixes_null_rows() {
907 let conn = Connection::open_in_memory().expect("in-memory db");
908 conn.execute_batch(
909 "CREATE TABLE refinery_schema_history (
910 version INTEGER NOT NULL,
911 name TEXT,
912 applied_on TEXT,
913 checksum TEXT
914 );",
915 )
916 .unwrap();
917 conn.execute(
918 "INSERT INTO refinery_schema_history (version, name, checksum) VALUES (1, 'init', '123')",
919 [],
920 )
921 .unwrap();
922 let fixed = sanitize_null_applied_on(&conn).unwrap();
923 assert_eq!(fixed, 1, "must fix exactly one NULL row");
924 let applied: String = conn
925 .query_row(
926 "SELECT applied_on FROM refinery_schema_history WHERE version = 1",
927 [],
928 |r| r.get(0),
929 )
930 .unwrap();
931 assert!(
932 chrono::DateTime::parse_from_rfc3339(&applied).is_ok(),
933 "applied_on must be valid RFC3339, got: {applied}"
934 );
935 }
936
937 #[test]
938 fn sanitize_null_applied_on_noop_when_all_filled() {
939 let conn = Connection::open_in_memory().expect("in-memory db");
940 conn.execute_batch(
941 "CREATE TABLE refinery_schema_history (
942 version INTEGER NOT NULL,
943 name TEXT,
944 applied_on TEXT,
945 checksum TEXT
946 );",
947 )
948 .unwrap();
949 conn.execute(
950 "INSERT INTO refinery_schema_history (version, name, applied_on, checksum) VALUES (1, 'init', '2026-06-09T00:00:00+00:00', '123')",
951 [],
952 )
953 .unwrap();
954 let fixed = sanitize_null_applied_on(&conn).unwrap();
955 assert_eq!(fixed, 0, "must not touch rows with valid applied_on");
956 }
957
958 #[test]
959 fn rehash_does_not_insert_missing_migrations() {
960 let mut conn = Connection::open_in_memory().expect("in-memory db");
961 conn.execute_batch(
962 "CREATE TABLE refinery_schema_history (
963 version INTEGER NOT NULL,
964 name TEXT,
965 applied_on TEXT,
966 checksum TEXT
967 );",
968 )
969 .unwrap();
970 let runner = crate::migrations::runner();
971 let migrations = runner.get_migrations();
972 for mig in migrations.iter() {
973 if mig.version() >= 13 {
974 break;
975 }
976 let name = mig.name().to_string();
977 let v = mig.version();
978 let sql = mig.sql().unwrap_or("").to_string();
979 let cs = compute_checksum(&name, v, &sql).to_string();
980 conn.execute(
981 "INSERT INTO refinery_schema_history (version, name, applied_on, checksum) VALUES (?1, ?2, '2026-01-01T00:00:00+00:00', ?3)",
982 rusqlite::params![v, name, cs],
983 )
984 .unwrap();
985 }
986 let _report = run_rehash(&mut conn, Path::new("/tmp/test.sqlite")).unwrap();
987 let v013_exists: bool = conn
988 .query_row(
989 "SELECT COUNT(*) FROM refinery_schema_history WHERE version = 13",
990 [],
991 |r| r.get::<_, i64>(0),
992 )
993 .unwrap()
994 > 0;
995 assert!(
996 !v013_exists,
997 "V013 must NOT be inserted by run_rehash (G41 fix)"
998 );
999 }
1000
1001 #[test]
1002 fn remove_vec_tables_noop_when_no_vec() {
1003 let conn = Connection::open_in_memory().expect("in-memory db");
1004 let removed = remove_vec_virtual_tables_without_module(&conn).unwrap();
1005 assert_eq!(removed, 0);
1006 }
1007
1008 #[test]
1009 fn ensure_v013_tables_noop_when_no_history() {
1010 let conn = Connection::open_in_memory().expect("in-memory db");
1011 let created = ensure_v013_tables_exist(&conn).unwrap();
1012 assert!(!created, "must be no-op when history table is absent");
1013 }
1014
1015 #[test]
1016 fn ensure_v013_tables_noop_when_tables_exist() {
1017 let mut conn = Connection::open_in_memory().expect("in-memory db");
1018 crate::migrations::runner().run(&mut conn).unwrap();
1019 let created = ensure_v013_tables_exist(&conn).unwrap();
1020 assert!(
1021 !created,
1022 "must be no-op when memory_embeddings already exists"
1023 );
1024 }
1025
1026 #[test]
1027 fn ensure_v013_tables_creates_when_phantom() {
1028 let mut conn = Connection::open_in_memory().expect("in-memory db");
1029 crate::migrations::runner().run(&mut conn).unwrap();
1030 conn.execute_batch("DROP TABLE IF EXISTS memory_embeddings")
1031 .unwrap();
1032 conn.execute_batch("DROP TABLE IF EXISTS entity_embeddings")
1033 .unwrap();
1034 conn.execute_batch("DROP TABLE IF EXISTS chunk_embeddings")
1035 .unwrap();
1036 let created = ensure_v013_tables_exist(&conn).unwrap();
1037 assert!(
1038 created,
1039 "must create tables when V013 is in history but tables are missing"
1040 );
1041 let count: i64 = conn
1042 .query_row(
1043 "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='memory_embeddings'",
1044 [],
1045 |r| r.get(0),
1046 )
1047 .unwrap();
1048 assert_eq!(count, 1, "memory_embeddings must exist after repair");
1049 }
1050}