Skip to main content

sqlite_graphrag/commands/
migrate.rs

1//! Handler for the `migrate` CLI subcommand.
2
3use crate::errors::AppError;
4use crate::output;
5use crate::paths::AppPaths;
6use crate::storage::connection::open_rw;
7use chrono::Utc;
8use rusqlite::OptionalExtension;
9use serde::Serialize;
10use siphasher::sip::SipHasher13;
11use std::hash::{Hash, Hasher};
12use std::path::Path;
13
14#[derive(clap::Args)]
15#[command(after_long_help = "EXAMPLES:\n  \
16    # Apply pending schema migrations\n  \
17    sqlite-graphrag migrate\n\n  \
18    # Show already-applied migrations without applying new ones\n  \
19    sqlite-graphrag migrate --status\n\n  \
20    # Migrate a database at a custom path\n  \
21    sqlite-graphrag migrate --db /path/to/graphrag.sqlite\n\n  \
22    # Rewrite recorded migration checksums to match the current file content.\n  \
23    # Use this after upgrading across a version that intentionally changed a\n  \
24    # migration file (v1.0.76 is the first release where this is exposed).\n  \
25    sqlite-graphrag migrate --rehash\n\n  \
26    # Full upgrade: rehash, apply V013 (drop vec tables), verify schema.\n  \
27    # Required once for users upgrading from v1.0.74 or v1.0.75.\n  \
28    sqlite-graphrag migrate --to-llm-only")]
29pub struct MigrateArgs {
30    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
31    pub db: Option<String>,
32    /// Explicit JSON flag. Accepted as a no-op because output is already JSON by default.
33    #[arg(long, default_value_t = false)]
34    pub json: bool,
35    /// Show already applied migrations without applying new ones.
36    #[arg(long, default_value_t = false)]
37    pub status: bool,
38    /// Rewrite recorded migration checksums to match the current file content
39    /// without re-applying the SQL. Idempotent; safe to re-run.
40    #[arg(long, default_value_t = false)]
41    pub rehash: bool,
42    /// One-shot upgrade for v1.0.74 / v1.0.75 databases: rehash checksums,
43    /// apply the V013 vec-table-drop migration, and report a structured
44    /// summary. Combines `--rehash` and the regular migration runner.
45    #[arg(long, default_value_t = false)]
46    pub to_llm_only: bool,
47    /// Required for `--to-llm-only` to acknowledge that the operation is
48    /// destructive: it permanently removes the `vec_memories`,
49    /// `vec_entities`, and `vec_chunks` virtual tables. The BLOB-backed
50    /// `memory_embeddings` / `entity_embeddings` / `chunk_embeddings`
51    /// tables remain and are the source of truth going forward.
52    #[arg(long, default_value_t = false)]
53    pub drop_vec_tables: bool,
54}
55
56#[derive(Serialize)]
57struct MigrateResponse {
58    db_path: String,
59    /// Latest applied migration number from `refinery_schema_history`.
60    /// Emitted as JSON number for cross-command consistency with `health`/`stats`/`init` (since v1.0.35).
61    schema_version: u32,
62    status: String,
63    /// Total execution time in milliseconds from handler start to serialisation.
64    elapsed_ms: u64,
65}
66
67#[derive(Serialize)]
68struct MigrateStatusResponse {
69    db_path: String,
70    applied_migrations: Vec<MigrationEntry>,
71    /// Latest applied migration number. JSON number since v1.0.35.
72    schema_version: u32,
73    elapsed_ms: u64,
74}
75
76#[derive(Serialize)]
77struct MigrationEntry {
78    version: i64,
79    name: String,
80    applied_on: Option<String>,
81    #[serde(skip_serializing_if = "Option::is_none")]
82    checksum: Option<String>,
83}
84
85#[derive(Serialize)]
86struct RehashReport {
87    db_path: String,
88    schema_version: u32,
89    /// One row per migration whose recorded checksum was rewritten.
90    /// Empty array when nothing changed (already up to date).
91    rewritten: Vec<RehashEntry>,
92    /// Number of entries inspected.
93    inspected: usize,
94    /// Rows where `applied_on` was NULL and got backfilled with a timestamp.
95    null_rows_fixed: u64,
96    /// True if the BLOB-backed embedding tables were created by the G41 repair.
97    v013_tables_created: bool,
98    status: String,
99    elapsed_ms: u64,
100}
101
102#[derive(Serialize, Debug)]
103struct RehashEntry {
104    version: i64,
105    name: String,
106    old_checksum: String,
107    new_checksum: String,
108}
109
110#[derive(Serialize)]
111struct ToLlmOnlyReport {
112    db_path: String,
113    schema_version: u32,
114    rehashed: Vec<RehashEntry>,
115    /// True if the vec0 virtual tables existed in the database before the
116    /// command ran. After this command they will be gone.
117    vec_tables_were_present: bool,
118    /// True if V013 was applied during this invocation.
119    v013_applied: bool,
120    /// Rows where `applied_on` was NULL and got backfilled with a timestamp.
121    null_rows_fixed: u64,
122    /// Number of vec0 virtual table entries removed from sqlite_master
123    /// via PRAGMA writable_schema (includes shadow tables).
124    vec_tables_removed_via_writable_schema: usize,
125    /// True if the BLOB-backed embedding tables were created by the G41 repair.
126    v013_tables_created: bool,
127    status: String,
128    elapsed_ms: u64,
129}
130
131pub fn run(args: MigrateArgs) -> Result<(), AppError> {
132    let start = std::time::Instant::now();
133    let _ = args.json; // --json is a no-op because output is already JSON by default
134    let paths = AppPaths::resolve(args.db.as_deref())?;
135    paths.ensure_dirs()?;
136
137    if args.status && (args.rehash || args.to_llm_only) {
138        return Err(AppError::Validation(
139            "--status cannot be combined with --rehash or --to-llm-only".into(),
140        ));
141    }
142    if args.rehash && args.to_llm_only {
143        return Err(AppError::Validation(
144            "--rehash and --to-llm-only are mutually exclusive".into(),
145        ));
146    }
147    if args.to_llm_only && !args.drop_vec_tables {
148        return Err(AppError::Validation(
149            "--to-llm-only requires --drop-vec-tables to acknowledge the destructive drop".into(),
150        ));
151    }
152
153    let mut conn = open_rw(&paths.db)?;
154
155    if args.status {
156        let schema_version = latest_schema_version(&conn).unwrap_or(0);
157        let applied = list_applied_migrations(&conn)?;
158        output::emit_json(&MigrateStatusResponse {
159            db_path: paths.db.display().to_string(),
160            applied_migrations: applied,
161            schema_version,
162            elapsed_ms: start.elapsed().as_millis() as u64,
163        })?;
164        return Ok(());
165    }
166
167    if args.rehash {
168        let report = run_rehash(&mut conn, &paths.db)?;
169        output::emit_json(&report)?;
170        return Ok(());
171    }
172
173    if args.to_llm_only {
174        let report = run_to_llm_only(&mut conn, &paths.db)?;
175        output::emit_json(&report)?;
176        return Ok(());
177    }
178
179    sanitize_null_applied_on(&conn)?;
180    ensure_v013_tables_exist(&conn)?;
181
182    crate::migrations::runner()
183        .run(&mut conn)
184        .map_err(|e| AppError::Internal(anyhow::anyhow!("migration failed: {e}")))?;
185
186    conn.execute_batch(&format!(
187        "PRAGMA user_version = {};",
188        crate::constants::SCHEMA_USER_VERSION
189    ))?;
190
191    let schema_version = latest_schema_version(&conn)?;
192    conn.execute(
193        "INSERT OR REPLACE INTO schema_meta (key, value) VALUES ('schema_version', ?1)",
194        rusqlite::params![schema_version],
195    )?;
196
197    output::emit_json(&MigrateResponse {
198        db_path: paths.db.display().to_string(),
199        schema_version,
200        status: "ok".to_string(),
201        elapsed_ms: start.elapsed().as_millis() as u64,
202    })?;
203
204    Ok(())
205}
206
207/// Compute the SipHasher13 checksum for a migration entry. Matches the
208/// algorithm used by refinery-core 0.9.1 (`name | version | sql`).
209///
210/// The `version` parameter MUST be `i32` (the default
211/// `SchemaVersion` alias in refinery-core) — passing `i64` would
212/// produce a different hash because the SipHasher13 implementation
213/// hashes the value's bit representation, and the two integer types
214/// differ in width. The `int8-versions` feature is NOT enabled.
215fn compute_checksum(name: &str, version: i32, sql: &str) -> u64 {
216    let mut hasher = SipHasher13::new();
217    name.hash(&mut hasher);
218    version.hash(&mut hasher);
219    sql.hash(&mut hasher);
220    hasher.finish()
221}
222
223fn run_rehash(conn: &mut rusqlite::Connection, db_path: &Path) -> Result<RehashReport, AppError> {
224    let start = std::time::Instant::now();
225    let schema_version = latest_schema_version(conn).unwrap_or(0);
226
227    if !history_table_exists(conn) {
228        return Ok(RehashReport {
229            db_path: db_path.display().to_string(),
230            schema_version,
231            rewritten: vec![],
232            inspected: 0,
233            null_rows_fixed: 0,
234            v013_tables_created: false,
235            status: "ok_no_history".to_string(),
236            elapsed_ms: start.elapsed().as_millis() as u64,
237        });
238    }
239
240    let null_rows_fixed = sanitize_null_applied_on(conn)?;
241    let v013_tables_created = ensure_v013_tables_exist(conn)?;
242
243    let mut rewritten: Vec<RehashEntry> = Vec::new();
244    let mut inspected = 0usize;
245
246    for mig in crate::migrations::runner().get_migrations().iter() {
247        if mig.sql().is_none() {
248            continue;
249        }
250        let name = mig.name().to_string();
251        let version = mig.version();
252        let sql = mig.sql().unwrap_or("").to_string();
253        let new_checksum = compute_checksum(&name, version, &sql);
254
255        let row: Option<String> = conn
256            .query_row(
257                "SELECT checksum FROM refinery_schema_history WHERE version = ?1",
258                rusqlite::params![version],
259                |r| r.get(0),
260            )
261            .optional()?;
262
263        inspected += 1;
264        if let Some(existing) = row {
265            let existing_trim = existing.trim();
266            let new_str = new_checksum.to_string();
267            if existing_trim != new_str {
268                conn.execute(
269                    "UPDATE refinery_schema_history SET checksum = ?1 WHERE version = ?2",
270                    rusqlite::params![new_str, version],
271                )?;
272                rewritten.push(RehashEntry {
273                    version: version as i64,
274                    name,
275                    old_checksum: existing_trim.to_string(),
276                    new_checksum: new_str,
277                });
278            }
279        }
280        // Migrations absent from history are intentionally NOT inserted.
281        // They must be applied by runner().run() which executes their SQL.
282        // Inserting them marks them as "applied" without running the SQL,
283        // causing phantom registrations (G41).
284    }
285
286    let status = if rewritten.is_empty() {
287        "ok_no_changes"
288    } else {
289        "ok_rewritten"
290    };
291
292    Ok(RehashReport {
293        db_path: db_path.display().to_string(),
294        schema_version,
295        rewritten,
296        inspected,
297        null_rows_fixed,
298        v013_tables_created,
299        status: status.to_string(),
300        elapsed_ms: start.elapsed().as_millis() as u64,
301    })
302}
303
304fn run_to_llm_only(
305    conn: &mut rusqlite::Connection,
306    db_path: &Path,
307) -> Result<ToLlmOnlyReport, AppError> {
308    let start = std::time::Instant::now();
309
310    // 1. Detect whether vec tables are still present in sqlite_master.
311    //    They were created by the v1.0.74 era V002 migration and dropped
312    //    by V013 in v1.0.76. Fresh v1.0.76 databases never had them.
313    let vec_tables_were_present: bool = {
314        let count: i64 = conn
315            .query_row(
316                "SELECT COUNT(*) FROM sqlite_master
317                 WHERE type='table' AND name IN ('vec_memories','vec_entities','vec_chunks')",
318                [],
319                |r| r.get(0),
320            )
321            .unwrap_or(0);
322        count > 0
323    };
324
325    // 1.5. Sanitize NULL applied_on values before any runner call.
326    let null_rows_fixed = sanitize_null_applied_on(conn)?;
327
328    // 1.6. G41 repair: ensure V013 tables exist if registered but missing.
329    let v013_tables_created = ensure_v013_tables_exist(conn)?;
330
331    // 1.75. Remove vec virtual tables via writable_schema if vec0 is absent.
332    let vec_tables_removed = if vec_tables_were_present {
333        remove_vec_virtual_tables_without_module(conn)?
334    } else {
335        0
336    };
337
338    // 2. Rehash checksums (in case V002 was the offender).
339    let rehash_report = run_rehash(conn, db_path)?;
340    let rehashed = rehash_report.rewritten;
341
342    // 3. Apply pending migrations (V013 will run if it hasn't yet).
343    //    If the user is on v1.0.75 the V013 migration was already applied,
344    //    so this is a no-op; if they're on v1.0.74 the V013 drop will run.
345    //    If vec tables were removed in step 1.75, V013 DROP is a no-op.
346    crate::migrations::runner()
347        .run(conn)
348        .map_err(|e| AppError::Internal(anyhow::anyhow!("migration failed: {e}")))?;
349
350    conn.execute_batch(&format!(
351        "PRAGMA user_version = {};",
352        crate::constants::SCHEMA_USER_VERSION
353    ))?;
354
355    let schema_version = latest_schema_version(conn)?;
356    conn.execute(
357        "INSERT OR REPLACE INTO schema_meta (key, value) VALUES ('schema_version', ?1)",
358        rusqlite::params![schema_version],
359    )?;
360
361    // 4. Detect V013 application by checking the schema_version.
362    //    V013 has version 13, so schema_version >= 13 implies it ran.
363    let v013_applied = schema_version >= 13;
364
365    Ok(ToLlmOnlyReport {
366        db_path: db_path.display().to_string(),
367        schema_version,
368        rehashed,
369        vec_tables_were_present,
370        v013_applied,
371        null_rows_fixed,
372        vec_tables_removed_via_writable_schema: vec_tables_removed,
373        v013_tables_created,
374        status: "ok".to_string(),
375        elapsed_ms: start.elapsed().as_millis() as u64,
376    })
377}
378
379fn history_table_exists(conn: &rusqlite::Connection) -> bool {
380    conn.query_row(
381        "SELECT name FROM sqlite_master WHERE type='table' AND name='refinery_schema_history'",
382        [],
383        |r| r.get::<_, String>(0),
384    )
385    .optional()
386    .ok()
387    .flatten()
388    .is_some()
389}
390
391fn sanitize_null_applied_on(conn: &rusqlite::Connection) -> Result<u64, AppError> {
392    if !history_table_exists(conn) {
393        return Ok(0);
394    }
395    let now = Utc::now().to_rfc3339();
396    let fixed = conn.execute(
397        "UPDATE refinery_schema_history SET applied_on = ?1 WHERE applied_on IS NULL",
398        rusqlite::params![now],
399    )?;
400    Ok(fixed as u64)
401}
402
403fn remove_vec_virtual_tables_without_module(
404    conn: &rusqlite::Connection,
405) -> Result<usize, AppError> {
406    let count: i64 = conn
407        .query_row(
408            "SELECT COUNT(*) FROM sqlite_master
409             WHERE type='table' AND name IN ('vec_memories','vec_entities','vec_chunks')",
410            [],
411            |r| r.get(0),
412        )
413        .unwrap_or(0);
414    if count == 0 {
415        return Ok(0);
416    }
417
418    let drop_works = conn
419        .execute_batch("DROP TABLE IF EXISTS vec_memories;")
420        .is_ok();
421    if drop_works {
422        let _ = conn.execute_batch("DROP TABLE IF EXISTS vec_entities;");
423        let _ = conn.execute_batch("DROP TABLE IF EXISTS vec_chunks;");
424        return Ok(count as usize);
425    }
426
427    conn.execute_batch("PRAGMA writable_schema = ON;")?;
428    let removed = conn.execute(
429        "DELETE FROM sqlite_master WHERE type='table'
430         AND (name LIKE 'vec_memories%' OR name LIKE 'vec_entities%' OR name LIKE 'vec_chunks%')",
431        [],
432    )?;
433    conn.execute_batch("PRAGMA writable_schema = OFF;")?;
434    conn.execute_batch("VACUUM;")?;
435
436    Ok(removed)
437}
438
439/// Ensures the BLOB-backed embedding tables from V013 actually exist.
440/// Repairs databases where `run_rehash` registered V013 in the history
441/// without executing its SQL (G41 phantom registration bug).
442pub(crate) fn ensure_v013_tables_exist(conn: &rusqlite::Connection) -> Result<bool, AppError> {
443    let exists: bool = conn
444        .query_row(
445            "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='memory_embeddings'",
446            [],
447            |r| r.get::<_, i64>(0),
448        )
449        .unwrap_or(0)
450        > 0;
451    if exists {
452        return Ok(false);
453    }
454
455    if !history_table_exists(conn) {
456        return Ok(false);
457    }
458    let v013_in_history: bool = conn
459        .query_row(
460            "SELECT COUNT(*) FROM refinery_schema_history WHERE version = 13",
461            [],
462            |r| r.get::<_, i64>(0),
463        )
464        .unwrap_or(0)
465        > 0;
466    if !v013_in_history {
467        return Ok(false);
468    }
469
470    let v013_sql = crate::migrations::runner()
471        .get_migrations()
472        .iter()
473        .find(|m| m.version() == 13)
474        .and_then(|m| m.sql().map(|s| s.to_string()));
475
476    if let Some(sql) = v013_sql {
477        conn.execute_batch(&sql)?;
478        tracing::warn!(
479            "G41 repair: V013 was registered but tables missing. \
480             Executed V013 SQL to create embedding tables."
481        );
482        Ok(true)
483    } else {
484        Err(AppError::Internal(anyhow::anyhow!(
485            "V013 migration SQL not found in embedded migrations"
486        )))
487    }
488}
489
490fn list_applied_migrations(conn: &rusqlite::Connection) -> Result<Vec<MigrationEntry>, AppError> {
491    let table_exists: Option<String> = conn
492        .query_row(
493            "SELECT name FROM sqlite_master WHERE type='table' AND name='refinery_schema_history'",
494            [],
495            |r| r.get(0),
496        )
497        .optional()?;
498    if table_exists.is_none() {
499        return Ok(vec![]);
500    }
501    let mut stmt = conn.prepare_cached(
502        "SELECT version, name, applied_on, checksum FROM refinery_schema_history ORDER BY version ASC",
503    )?;
504    let entries = stmt
505        .query_map([], |r| {
506            let checksum: Option<String> = r.get(3)?;
507            Ok(MigrationEntry {
508                version: r.get(0)?,
509                name: r.get(1)?,
510                applied_on: r.get(2)?,
511                checksum: checksum
512                    .map(|s| s.trim().to_string())
513                    .filter(|s| !s.is_empty()),
514            })
515        })?
516        .collect::<Result<Vec<_>, _>>()?;
517    Ok(entries)
518}
519
520fn latest_schema_version(conn: &rusqlite::Connection) -> Result<u32, AppError> {
521    match conn.query_row(
522        "SELECT version FROM refinery_schema_history ORDER BY version DESC LIMIT 1",
523        [],
524        |row| row.get::<_, i64>(0),
525    ) {
526        Ok(version) => Ok(version.max(0) as u32),
527        Err(rusqlite::Error::QueryReturnedNoRows) => Ok(0),
528        Err(err) => Err(AppError::Database(err)),
529    }
530}
531
532#[cfg(test)]
533mod tests {
534    use super::*;
535    use rusqlite::Connection;
536
537    fn create_db_without_history() -> Connection {
538        Connection::open_in_memory().expect("failed to open in-memory db")
539    }
540
541    fn create_db_with_history(version: i64) -> Connection {
542        let conn = Connection::open_in_memory().expect("failed to open in-memory db");
543        conn.execute_batch(
544            "CREATE TABLE refinery_schema_history (
545                version INTEGER NOT NULL,
546                name TEXT,
547                applied_on TEXT,
548                checksum TEXT
549            );",
550        )
551        .expect("failed to create history table");
552        conn.execute(
553            "INSERT INTO refinery_schema_history (version, name) VALUES (?1, 'V001__init')",
554            rusqlite::params![version],
555        )
556        .expect("failed to insert version");
557        conn
558    }
559
560    #[test]
561    fn latest_schema_version_returns_error_without_table() {
562        let conn = create_db_without_history();
563        let result = latest_schema_version(&conn);
564        assert!(result.is_err(), "must return Err when table does not exist");
565    }
566
567    #[test]
568    fn latest_schema_version_returns_max_version() {
569        let conn = create_db_with_history(6);
570        let version = latest_schema_version(&conn).unwrap();
571        assert_eq!(version, 6u32);
572    }
573
574    #[test]
575    fn migrate_response_serializes_required_fields() {
576        let resp = MigrateResponse {
577            db_path: "/tmp/test.sqlite".to_string(),
578            schema_version: 6,
579            status: "ok".to_string(),
580            elapsed_ms: 12,
581        };
582        let json = serde_json::to_value(&resp).unwrap();
583        assert_eq!(json["status"], "ok");
584        assert_eq!(json["schema_version"], 6);
585        assert_eq!(json["db_path"], "/tmp/test.sqlite");
586        assert_eq!(json["elapsed_ms"], 12);
587    }
588
589    #[test]
590    fn latest_schema_version_returns_zero_when_table_empty() {
591        let conn = Connection::open_in_memory().expect("in-memory db");
592        conn.execute_batch(
593            "CREATE TABLE refinery_schema_history (
594                version INTEGER NOT NULL,
595                name TEXT
596            );",
597        )
598        .expect("table creation");
599        let version = latest_schema_version(&conn).unwrap();
600        assert_eq!(version, 0u32);
601    }
602
603    #[test]
604    fn compute_checksum_is_deterministic_and_matches_refinery() {
605        // This is the same algorithm that refinery-core 0.9.1 uses. We
606        // pin the numeric value to detect any change in siphasher
607        // behaviour that would break migration verification.
608        let a = compute_checksum("vec_tables", 2, "SELECT 1;");
609        let b = compute_checksum("vec_tables", 2, "SELECT 1;");
610        assert_eq!(a, b, "checksum must be deterministic");
611        let c = compute_checksum("vec_tables", 2, "SELECT 1;\n");
612        assert_ne!(
613            a, c,
614            "trailing newline must change the checksum (matches refinery)"
615        );
616    }
617
618    #[test]
619    fn rehash_with_no_history_returns_empty() {
620        let mut conn = create_db_without_history();
621        let report = run_rehash(&mut conn, Path::new("/tmp/empty.sqlite")).unwrap();
622        assert_eq!(report.status, "ok_no_history");
623        assert!(report.rewritten.is_empty());
624        assert_eq!(report.inspected, 0);
625    }
626
627    #[test]
628    fn rehash_writes_matching_checksum() {
629        // Pre-populate the history with a WRONG checksum. The rehash
630        // must detect the mismatch and rewrite the row.
631        let mut conn = Connection::open_in_memory().expect("in-memory db");
632        conn.execute_batch(
633            "CREATE TABLE refinery_schema_history (
634                version INTEGER NOT NULL,
635                name TEXT,
636                applied_on TEXT,
637                checksum TEXT
638            );",
639        )
640        .expect("history create");
641        // Use the first migration present in the embedded set (V001).
642        let first = crate::migrations::runner().get_migrations()[0].clone();
643        let v = first.version();
644        let name = first.name().to_string();
645        let sql = first.sql().unwrap_or("").to_string();
646        let correct = compute_checksum(&name, v, &sql).to_string();
647        let wrong = "1234567890";
648        assert_ne!(correct, wrong, "test sanity: correct != wrong");
649
650        conn.execute(
651            "INSERT INTO refinery_schema_history (version, name, checksum) VALUES (?1, ?2, ?3)",
652            rusqlite::params![v, name, wrong],
653        )
654        .expect("insert");
655
656        let report = run_rehash(&mut conn, Path::new("/tmp/test.sqlite")).unwrap();
657        assert_eq!(report.rewritten.len(), 1);
658        assert_eq!(report.rewritten[0].old_checksum, wrong);
659        assert_eq!(report.rewritten[0].new_checksum, correct);
660
661        // And the row now matches what refinery would compute.
662        let stored: String = conn
663            .query_row(
664                "SELECT checksum FROM refinery_schema_history WHERE version = ?1",
665                rusqlite::params![v],
666                |r| r.get(0),
667            )
668            .unwrap();
669        assert_eq!(stored, correct);
670    }
671
672    #[test]
673    fn rehash_is_idempotent_when_checksums_match() {
674        let mut conn = Connection::open_in_memory().expect("in-memory db");
675        conn.execute_batch(
676            "CREATE TABLE refinery_schema_history (
677                version INTEGER NOT NULL,
678                name TEXT,
679                applied_on TEXT,
680                checksum TEXT
681            );",
682        )
683        .unwrap();
684        let first = crate::migrations::runner().get_migrations()[0].clone();
685        let v = first.version();
686        let name = first.name().to_string();
687        let sql = first.sql().unwrap_or("").to_string();
688        let correct = compute_checksum(&name, v, &sql).to_string();
689        conn.execute(
690            "INSERT INTO refinery_schema_history (version, name, checksum) VALUES (?1, ?2, ?3)",
691            rusqlite::params![v, name, correct.clone()],
692        )
693        .unwrap();
694
695        let report = run_rehash(&mut conn, Path::new("/tmp/test.sqlite")).unwrap();
696        assert!(
697            report.rewritten.is_empty(),
698            "must not rewrite matching rows"
699        );
700        assert_eq!(report.status, "ok_no_changes");
701    }
702
703    #[test]
704    fn rehash_matches_refinery_embedded_checksum_for_v001() {
705        // The ultimate correctness test: run a real migration, capture
706        // what refinery stored, then call run_rehash and confirm the
707        // file-derived checksum matches what the runner produced. This
708        // pins the algorithm end-to-end and would catch any future drift
709        // (e.g. a siphasher major version bump that changes SipHasher13).
710        let dir = tempfile::tempdir().expect("tempdir");
711        let path = dir.path().join("test.sqlite");
712        let mut conn = open_rw(&path).expect("open_rw");
713        crate::migrations::runner().run(&mut conn).expect("migrate");
714        let stored: String = conn
715            .query_row(
716                "SELECT checksum FROM refinery_schema_history WHERE version = 1",
717                [],
718                |r| r.get(0),
719            )
720            .unwrap();
721        let report = run_rehash(&mut conn, &path).expect("rehash");
722        assert!(
723            report.rewritten.is_empty(),
724            "V001 must NOT be rewritten when checksums already match: rewrote={:?}",
725            report.rewritten
726        );
727        // And re-running runner() should still succeed (the original
728        // error that the failing test exposed was that the second
729        // runner().run() call saw a checksum mismatch).
730        crate::migrations::runner()
731            .run(&mut conn)
732            .expect("re-run migrate must succeed");
733        let stored_after: String = conn
734            .query_row(
735                "SELECT checksum FROM refinery_schema_history WHERE version = 1",
736                [],
737                |r| r.get(0),
738            )
739            .unwrap();
740        assert_eq!(
741            stored, stored_after,
742            "checksum must not change after rehash"
743        );
744    }
745
746    #[test]
747    fn to_llm_only_reports_no_vec_tables_on_fresh_db() {
748        // Fresh v1.0.76 database (created by running the full migration
749        // set) has no vec tables.
750        let dir = tempfile::tempdir().expect("tempdir");
751        let path = dir.path().join("fresh.sqlite");
752        let mut conn = open_rw(&path).expect("open_rw");
753        crate::migrations::runner().run(&mut conn).expect("migrate");
754        let report = run_to_llm_only(&mut conn, &path).expect("to_llm_only");
755        assert!(!report.vec_tables_were_present);
756        assert!(report.v013_applied, "V013 must be marked applied");
757        assert_eq!(report.status, "ok");
758    }
759
760    #[test]
761    fn history_table_exists_detects_table() {
762        let conn = create_db_with_history(1);
763        assert!(history_table_exists(&conn));
764        let conn2 = create_db_without_history();
765        assert!(!history_table_exists(&conn2));
766    }
767
768    #[test]
769    fn sanitize_null_applied_on_fixes_null_rows() {
770        let conn = Connection::open_in_memory().expect("in-memory db");
771        conn.execute_batch(
772            "CREATE TABLE refinery_schema_history (
773                version INTEGER NOT NULL,
774                name TEXT,
775                applied_on TEXT,
776                checksum TEXT
777            );",
778        )
779        .unwrap();
780        conn.execute(
781            "INSERT INTO refinery_schema_history (version, name, checksum) VALUES (1, 'init', '123')",
782            [],
783        )
784        .unwrap();
785        let fixed = sanitize_null_applied_on(&conn).unwrap();
786        assert_eq!(fixed, 1, "must fix exactly one NULL row");
787        let applied: String = conn
788            .query_row(
789                "SELECT applied_on FROM refinery_schema_history WHERE version = 1",
790                [],
791                |r| r.get(0),
792            )
793            .unwrap();
794        assert!(
795            chrono::DateTime::parse_from_rfc3339(&applied).is_ok(),
796            "applied_on must be valid RFC3339, got: {applied}"
797        );
798    }
799
800    #[test]
801    fn sanitize_null_applied_on_noop_when_all_filled() {
802        let conn = Connection::open_in_memory().expect("in-memory db");
803        conn.execute_batch(
804            "CREATE TABLE refinery_schema_history (
805                version INTEGER NOT NULL,
806                name TEXT,
807                applied_on TEXT,
808                checksum TEXT
809            );",
810        )
811        .unwrap();
812        conn.execute(
813            "INSERT INTO refinery_schema_history (version, name, applied_on, checksum) VALUES (1, 'init', '2026-06-09T00:00:00+00:00', '123')",
814            [],
815        )
816        .unwrap();
817        let fixed = sanitize_null_applied_on(&conn).unwrap();
818        assert_eq!(fixed, 0, "must not touch rows with valid applied_on");
819    }
820
821    #[test]
822    fn rehash_does_not_insert_missing_migrations() {
823        let mut conn = Connection::open_in_memory().expect("in-memory db");
824        conn.execute_batch(
825            "CREATE TABLE refinery_schema_history (
826                version INTEGER NOT NULL,
827                name TEXT,
828                applied_on TEXT,
829                checksum TEXT
830            );",
831        )
832        .unwrap();
833        let runner = crate::migrations::runner();
834        let migrations = runner.get_migrations();
835        for mig in migrations.iter() {
836            if mig.version() >= 13 {
837                break;
838            }
839            let name = mig.name().to_string();
840            let v = mig.version();
841            let sql = mig.sql().unwrap_or("").to_string();
842            let cs = compute_checksum(&name, v, &sql).to_string();
843            conn.execute(
844                "INSERT INTO refinery_schema_history (version, name, applied_on, checksum) VALUES (?1, ?2, '2026-01-01T00:00:00+00:00', ?3)",
845                rusqlite::params![v, name, cs],
846            )
847            .unwrap();
848        }
849        let _report = run_rehash(&mut conn, Path::new("/tmp/test.sqlite")).unwrap();
850        let v013_exists: bool = conn
851            .query_row(
852                "SELECT COUNT(*) FROM refinery_schema_history WHERE version = 13",
853                [],
854                |r| r.get::<_, i64>(0),
855            )
856            .unwrap()
857            > 0;
858        assert!(
859            !v013_exists,
860            "V013 must NOT be inserted by run_rehash (G41 fix)"
861        );
862    }
863
864    #[test]
865    fn remove_vec_tables_noop_when_no_vec() {
866        let conn = Connection::open_in_memory().expect("in-memory db");
867        let removed = remove_vec_virtual_tables_without_module(&conn).unwrap();
868        assert_eq!(removed, 0);
869    }
870
871    #[test]
872    fn ensure_v013_tables_noop_when_no_history() {
873        let conn = Connection::open_in_memory().expect("in-memory db");
874        let created = ensure_v013_tables_exist(&conn).unwrap();
875        assert!(!created, "must be no-op when history table is absent");
876    }
877
878    #[test]
879    fn ensure_v013_tables_noop_when_tables_exist() {
880        let mut conn = Connection::open_in_memory().expect("in-memory db");
881        crate::migrations::runner().run(&mut conn).unwrap();
882        let created = ensure_v013_tables_exist(&conn).unwrap();
883        assert!(
884            !created,
885            "must be no-op when memory_embeddings already exists"
886        );
887    }
888
889    #[test]
890    fn ensure_v013_tables_creates_when_phantom() {
891        let mut conn = Connection::open_in_memory().expect("in-memory db");
892        crate::migrations::runner().run(&mut conn).unwrap();
893        conn.execute_batch("DROP TABLE IF EXISTS memory_embeddings")
894            .unwrap();
895        conn.execute_batch("DROP TABLE IF EXISTS entity_embeddings")
896            .unwrap();
897        conn.execute_batch("DROP TABLE IF EXISTS chunk_embeddings")
898            .unwrap();
899        let created = ensure_v013_tables_exist(&conn).unwrap();
900        assert!(
901            created,
902            "must create tables when V013 is in history but tables are missing"
903        );
904        let count: i64 = conn
905            .query_row(
906                "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='memory_embeddings'",
907                [],
908                |r| r.get(0),
909            )
910            .unwrap();
911        assert_eq!(count, 1, "memory_embeddings must exist after repair");
912    }
913}