Skip to main content

sqlite_graphrag/commands/
migrate.rs

1//! Handler for the `migrate` CLI subcommand.
2
3use crate::errors::AppError;
4use crate::output;
5use crate::paths::AppPaths;
6use crate::storage::connection::open_rw;
7use chrono::Utc;
8use rusqlite::OptionalExtension;
9use serde::Serialize;
10use siphasher::sip::SipHasher13;
11use std::hash::{Hash, Hasher};
12use std::path::Path;
13
14#[derive(clap::Args)]
15#[command(after_long_help = "EXAMPLES:\n  \
16    # Apply pending schema migrations\n  \
17    sqlite-graphrag migrate\n\n  \
18    # Show already-applied migrations without applying new ones\n  \
19    sqlite-graphrag migrate --status\n\n  \
20    # Migrate a database at a custom path\n  \
21    sqlite-graphrag migrate --db /path/to/graphrag.sqlite\n\n  \
22    # Rewrite recorded migration checksums to match the current file content.\n  \
23    # Use this after upgrading across a version that intentionally changed a\n  \
24    # migration file (v1.0.76 is the first release where this is exposed).\n  \
25    sqlite-graphrag migrate --rehash\n\n  \
26    # Full upgrade: rehash, apply V013 (drop vec tables), verify schema.\n  \
27    # Required once for users upgrading from v1.0.74 or v1.0.75.\n  \
28    sqlite-graphrag migrate --to-llm-only")]
29pub struct MigrateArgs {
30    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
31    pub db: Option<String>,
32    /// Explicit JSON flag. Accepted as a no-op because output is already JSON by default.
33    #[arg(long, default_value_t = false)]
34    pub json: bool,
35    /// Show already applied migrations without applying new ones.
36    #[arg(long, default_value_t = false)]
37    pub status: bool,
38    /// Rewrite recorded migration checksums to match the current file content
39    /// without re-applying the SQL. Idempotent; safe to re-run.
40    #[arg(long, default_value_t = false)]
41    pub rehash: bool,
42    /// One-shot upgrade for v1.0.74 / v1.0.75 databases: rehash checksums,
43    /// apply the V013 vec-table-drop migration, and report a structured
44    /// summary. Combines `--rehash` and the regular migration runner.
45    #[arg(long, default_value_t = false)]
46    pub to_llm_only: bool,
47    /// Required for `--to-llm-only` to acknowledge that the operation is
48    /// destructive: it permanently removes the `vec_memories`,
49    /// `vec_entities`, and `vec_chunks` virtual tables. The BLOB-backed
50    /// `memory_embeddings` / `entity_embeddings` / `chunk_embeddings`
51    /// tables remain and are the source of truth going forward.
52    #[arg(long, default_value_t = false)]
53    pub drop_vec_tables: bool,
54}
55
56#[derive(Serialize)]
57struct MigrateResponse {
58    db_path: String,
59    /// Latest applied migration number from `refinery_schema_history`.
60    /// Emitted as JSON number for cross-command consistency with `health`/`stats`/`init` (since v1.0.35).
61    schema_version: u32,
62    status: String,
63    /// Total execution time in milliseconds from handler start to serialisation.
64    elapsed_ms: u64,
65}
66
67#[derive(Serialize)]
68struct MigrateStatusResponse {
69    db_path: String,
70    applied_migrations: Vec<MigrationEntry>,
71    /// Latest applied migration number. JSON number since v1.0.35.
72    schema_version: u32,
73    elapsed_ms: u64,
74}
75
76#[derive(Serialize)]
77struct MigrationEntry {
78    version: i64,
79    name: String,
80    applied_on: Option<String>,
81    #[serde(skip_serializing_if = "Option::is_none")]
82    checksum: Option<String>,
83}
84
85#[derive(Serialize)]
86struct RehashReport {
87    db_path: String,
88    schema_version: u32,
89    /// One row per migration whose recorded checksum was rewritten.
90    /// Empty array when nothing changed (already up to date).
91    rewritten: Vec<RehashEntry>,
92    /// Number of entries inspected.
93    inspected: usize,
94    /// Rows where `applied_on` was NULL and got backfilled with a timestamp.
95    null_rows_fixed: u64,
96    status: String,
97    elapsed_ms: u64,
98}
99
100#[derive(Serialize, Debug)]
101struct RehashEntry {
102    version: i64,
103    name: String,
104    old_checksum: String,
105    new_checksum: String,
106}
107
108#[derive(Serialize)]
109struct ToLlmOnlyReport {
110    db_path: String,
111    schema_version: u32,
112    rehashed: Vec<RehashEntry>,
113    /// True if the vec0 virtual tables existed in the database before the
114    /// command ran. After this command they will be gone.
115    vec_tables_were_present: bool,
116    /// True if V013 was applied during this invocation.
117    v013_applied: bool,
118    /// Rows where `applied_on` was NULL and got backfilled with a timestamp.
119    null_rows_fixed: u64,
120    /// Number of vec0 virtual table entries removed from sqlite_master
121    /// via PRAGMA writable_schema (includes shadow tables).
122    vec_tables_removed_via_writable_schema: usize,
123    status: String,
124    elapsed_ms: u64,
125}
126
127pub fn run(args: MigrateArgs) -> Result<(), AppError> {
128    let start = std::time::Instant::now();
129    let _ = args.json; // --json is a no-op because output is already JSON by default
130    let paths = AppPaths::resolve(args.db.as_deref())?;
131    paths.ensure_dirs()?;
132
133    if args.status && (args.rehash || args.to_llm_only) {
134        return Err(AppError::Validation(
135            "--status cannot be combined with --rehash or --to-llm-only".into(),
136        ));
137    }
138    if args.rehash && args.to_llm_only {
139        return Err(AppError::Validation(
140            "--rehash and --to-llm-only are mutually exclusive".into(),
141        ));
142    }
143    if args.to_llm_only && !args.drop_vec_tables {
144        return Err(AppError::Validation(
145            "--to-llm-only requires --drop-vec-tables to acknowledge the destructive drop".into(),
146        ));
147    }
148
149    let mut conn = open_rw(&paths.db)?;
150
151    if args.status {
152        let schema_version = latest_schema_version(&conn).unwrap_or(0);
153        let applied = list_applied_migrations(&conn)?;
154        output::emit_json(&MigrateStatusResponse {
155            db_path: paths.db.display().to_string(),
156            applied_migrations: applied,
157            schema_version,
158            elapsed_ms: start.elapsed().as_millis() as u64,
159        })?;
160        return Ok(());
161    }
162
163    if args.rehash {
164        let report = run_rehash(&mut conn, &paths.db)?;
165        output::emit_json(&report)?;
166        return Ok(());
167    }
168
169    if args.to_llm_only {
170        let report = run_to_llm_only(&mut conn, &paths.db)?;
171        output::emit_json(&report)?;
172        return Ok(());
173    }
174
175    sanitize_null_applied_on(&conn)?;
176
177    crate::migrations::runner()
178        .run(&mut conn)
179        .map_err(|e| AppError::Internal(anyhow::anyhow!("migration failed: {e}")))?;
180
181    conn.execute_batch(&format!(
182        "PRAGMA user_version = {};",
183        crate::constants::SCHEMA_USER_VERSION
184    ))?;
185
186    let schema_version = latest_schema_version(&conn)?;
187    conn.execute(
188        "INSERT OR REPLACE INTO schema_meta (key, value) VALUES ('schema_version', ?1)",
189        rusqlite::params![schema_version],
190    )?;
191
192    output::emit_json(&MigrateResponse {
193        db_path: paths.db.display().to_string(),
194        schema_version,
195        status: "ok".to_string(),
196        elapsed_ms: start.elapsed().as_millis() as u64,
197    })?;
198
199    Ok(())
200}
201
202/// Compute the SipHasher13 checksum for a migration entry. Matches the
203/// algorithm used by refinery-core 0.9.1 (`name | version | sql`).
204///
205/// The `version` parameter MUST be `i32` (the default
206/// `SchemaVersion` alias in refinery-core) — passing `i64` would
207/// produce a different hash because the SipHasher13 implementation
208/// hashes the value's bit representation, and the two integer types
209/// differ in width. The `int8-versions` feature is NOT enabled.
210fn compute_checksum(name: &str, version: i32, sql: &str) -> u64 {
211    let mut hasher = SipHasher13::new();
212    name.hash(&mut hasher);
213    version.hash(&mut hasher);
214    sql.hash(&mut hasher);
215    hasher.finish()
216}
217
218fn run_rehash(conn: &mut rusqlite::Connection, db_path: &Path) -> Result<RehashReport, AppError> {
219    let start = std::time::Instant::now();
220    let schema_version = latest_schema_version(conn).unwrap_or(0);
221
222    if !history_table_exists(conn) {
223        return Ok(RehashReport {
224            db_path: db_path.display().to_string(),
225            schema_version,
226            rewritten: vec![],
227            inspected: 0,
228            null_rows_fixed: 0,
229            status: "ok_no_history".to_string(),
230            elapsed_ms: start.elapsed().as_millis() as u64,
231        });
232    }
233
234    let null_rows_fixed = sanitize_null_applied_on(conn)?;
235
236    let mut rewritten: Vec<RehashEntry> = Vec::new();
237    let mut inspected = 0usize;
238
239    for mig in crate::migrations::runner().get_migrations().iter() {
240        if mig.sql().is_none() {
241            continue;
242        }
243        let name = mig.name().to_string();
244        let version = mig.version();
245        let sql = mig.sql().unwrap_or("").to_string();
246        let new_checksum = compute_checksum(&name, version, &sql);
247
248        let row: Option<String> = conn
249            .query_row(
250                "SELECT checksum FROM refinery_schema_history WHERE version = ?1",
251                rusqlite::params![version],
252                |r| r.get(0),
253            )
254            .optional()?;
255
256        inspected += 1;
257        if let Some(existing) = row {
258            let existing_trim = existing.trim();
259            let new_str = new_checksum.to_string();
260            if existing_trim != new_str {
261                conn.execute(
262                    "UPDATE refinery_schema_history SET checksum = ?1 WHERE version = ?2",
263                    rusqlite::params![new_str, version],
264                )?;
265                rewritten.push(RehashEntry {
266                    version: version as i64,
267                    name,
268                    old_checksum: existing_trim.to_string(),
269                    new_checksum: new_str,
270                });
271            }
272        } else {
273            // Row missing for a migration file that the runner already has
274            // loaded. Insert with a matching checksum so future runs accept
275            // it as already applied. (Rare; happens only on partial history.)
276            let now = Utc::now().to_rfc3339();
277            conn.execute(
278                "INSERT OR IGNORE INTO refinery_schema_history (version, name, applied_on, checksum) VALUES (?1, ?2, ?3, ?4)",
279                rusqlite::params![version, name, now, new_checksum.to_string()],
280            )?;
281        }
282    }
283
284    let status = if rewritten.is_empty() {
285        "ok_no_changes"
286    } else {
287        "ok_rewritten"
288    };
289
290    Ok(RehashReport {
291        db_path: db_path.display().to_string(),
292        schema_version,
293        rewritten,
294        inspected,
295        null_rows_fixed,
296        status: status.to_string(),
297        elapsed_ms: start.elapsed().as_millis() as u64,
298    })
299}
300
301fn run_to_llm_only(
302    conn: &mut rusqlite::Connection,
303    db_path: &Path,
304) -> Result<ToLlmOnlyReport, AppError> {
305    let start = std::time::Instant::now();
306
307    // 1. Detect whether vec tables are still present in sqlite_master.
308    //    They were created by the v1.0.74 era V002 migration and dropped
309    //    by V013 in v1.0.76. Fresh v1.0.76 databases never had them.
310    let vec_tables_were_present: bool = {
311        let count: i64 = conn
312            .query_row(
313                "SELECT COUNT(*) FROM sqlite_master
314                 WHERE type='table' AND name IN ('vec_memories','vec_entities','vec_chunks')",
315                [],
316                |r| r.get(0),
317            )
318            .unwrap_or(0);
319        count > 0
320    };
321
322    // 1.5. Sanitize NULL applied_on values before any runner call.
323    let null_rows_fixed = sanitize_null_applied_on(conn)?;
324
325    // 1.75. Remove vec virtual tables via writable_schema if vec0 is absent.
326    let vec_tables_removed = if vec_tables_were_present {
327        remove_vec_virtual_tables_without_module(conn)?
328    } else {
329        0
330    };
331
332    // 2. Rehash checksums (in case V002 was the offender).
333    let rehash_report = run_rehash(conn, db_path)?;
334    let rehashed = rehash_report.rewritten;
335
336    // 3. Apply pending migrations (V013 will run if it hasn't yet).
337    //    If the user is on v1.0.75 the V013 migration was already applied,
338    //    so this is a no-op; if they're on v1.0.74 the V013 drop will run.
339    //    If vec tables were removed in step 1.75, V013 DROP is a no-op.
340    crate::migrations::runner()
341        .run(conn)
342        .map_err(|e| AppError::Internal(anyhow::anyhow!("migration failed: {e}")))?;
343
344    conn.execute_batch(&format!(
345        "PRAGMA user_version = {};",
346        crate::constants::SCHEMA_USER_VERSION
347    ))?;
348
349    let schema_version = latest_schema_version(conn)?;
350    conn.execute(
351        "INSERT OR REPLACE INTO schema_meta (key, value) VALUES ('schema_version', ?1)",
352        rusqlite::params![schema_version],
353    )?;
354
355    // 4. Detect V013 application by checking the schema_version.
356    //    V013 has version 13, so schema_version >= 13 implies it ran.
357    let v013_applied = schema_version >= 13;
358
359    Ok(ToLlmOnlyReport {
360        db_path: db_path.display().to_string(),
361        schema_version,
362        rehashed,
363        vec_tables_were_present,
364        v013_applied,
365        null_rows_fixed,
366        vec_tables_removed_via_writable_schema: vec_tables_removed,
367        status: "ok".to_string(),
368        elapsed_ms: start.elapsed().as_millis() as u64,
369    })
370}
371
372fn history_table_exists(conn: &rusqlite::Connection) -> bool {
373    conn.query_row(
374        "SELECT name FROM sqlite_master WHERE type='table' AND name='refinery_schema_history'",
375        [],
376        |r| r.get::<_, String>(0),
377    )
378    .optional()
379    .ok()
380    .flatten()
381    .is_some()
382}
383
384fn sanitize_null_applied_on(conn: &rusqlite::Connection) -> Result<u64, AppError> {
385    if !history_table_exists(conn) {
386        return Ok(0);
387    }
388    let now = Utc::now().to_rfc3339();
389    let fixed = conn.execute(
390        "UPDATE refinery_schema_history SET applied_on = ?1 WHERE applied_on IS NULL",
391        rusqlite::params![now],
392    )?;
393    Ok(fixed as u64)
394}
395
396fn remove_vec_virtual_tables_without_module(
397    conn: &rusqlite::Connection,
398) -> Result<usize, AppError> {
399    let count: i64 = conn
400        .query_row(
401            "SELECT COUNT(*) FROM sqlite_master
402             WHERE type='table' AND name IN ('vec_memories','vec_entities','vec_chunks')",
403            [],
404            |r| r.get(0),
405        )
406        .unwrap_or(0);
407    if count == 0 {
408        return Ok(0);
409    }
410
411    let drop_works = conn
412        .execute_batch("DROP TABLE IF EXISTS vec_memories;")
413        .is_ok();
414    if drop_works {
415        let _ = conn.execute_batch("DROP TABLE IF EXISTS vec_entities;");
416        let _ = conn.execute_batch("DROP TABLE IF EXISTS vec_chunks;");
417        return Ok(count as usize);
418    }
419
420    conn.execute_batch("PRAGMA writable_schema = ON;")?;
421    let removed = conn.execute(
422        "DELETE FROM sqlite_master WHERE type='table'
423         AND (name LIKE 'vec_memories%' OR name LIKE 'vec_entities%' OR name LIKE 'vec_chunks%')",
424        [],
425    )?;
426    conn.execute_batch("PRAGMA writable_schema = OFF;")?;
427    conn.execute_batch("VACUUM;")?;
428
429    Ok(removed)
430}
431
432fn list_applied_migrations(conn: &rusqlite::Connection) -> Result<Vec<MigrationEntry>, AppError> {
433    let table_exists: Option<String> = conn
434        .query_row(
435            "SELECT name FROM sqlite_master WHERE type='table' AND name='refinery_schema_history'",
436            [],
437            |r| r.get(0),
438        )
439        .optional()?;
440    if table_exists.is_none() {
441        return Ok(vec![]);
442    }
443    let mut stmt = conn.prepare_cached(
444        "SELECT version, name, applied_on, checksum FROM refinery_schema_history ORDER BY version ASC",
445    )?;
446    let entries = stmt
447        .query_map([], |r| {
448            let checksum: Option<String> = r.get(3)?;
449            Ok(MigrationEntry {
450                version: r.get(0)?,
451                name: r.get(1)?,
452                applied_on: r.get(2)?,
453                checksum: checksum
454                    .map(|s| s.trim().to_string())
455                    .filter(|s| !s.is_empty()),
456            })
457        })?
458        .collect::<Result<Vec<_>, _>>()?;
459    Ok(entries)
460}
461
462fn latest_schema_version(conn: &rusqlite::Connection) -> Result<u32, AppError> {
463    match conn.query_row(
464        "SELECT version FROM refinery_schema_history ORDER BY version DESC LIMIT 1",
465        [],
466        |row| row.get::<_, i64>(0),
467    ) {
468        Ok(version) => Ok(version.max(0) as u32),
469        Err(rusqlite::Error::QueryReturnedNoRows) => Ok(0),
470        Err(err) => Err(AppError::Database(err)),
471    }
472}
473
474#[cfg(test)]
475mod tests {
476    use super::*;
477    use rusqlite::Connection;
478
479    fn create_db_without_history() -> Connection {
480        Connection::open_in_memory().expect("failed to open in-memory db")
481    }
482
483    fn create_db_with_history(version: i64) -> Connection {
484        let conn = Connection::open_in_memory().expect("failed to open in-memory db");
485        conn.execute_batch(
486            "CREATE TABLE refinery_schema_history (
487                version INTEGER NOT NULL,
488                name TEXT,
489                applied_on TEXT,
490                checksum TEXT
491            );",
492        )
493        .expect("failed to create history table");
494        conn.execute(
495            "INSERT INTO refinery_schema_history (version, name) VALUES (?1, 'V001__init')",
496            rusqlite::params![version],
497        )
498        .expect("failed to insert version");
499        conn
500    }
501
502    #[test]
503    fn latest_schema_version_returns_error_without_table() {
504        let conn = create_db_without_history();
505        let result = latest_schema_version(&conn);
506        assert!(result.is_err(), "must return Err when table does not exist");
507    }
508
509    #[test]
510    fn latest_schema_version_returns_max_version() {
511        let conn = create_db_with_history(6);
512        let version = latest_schema_version(&conn).unwrap();
513        assert_eq!(version, 6u32);
514    }
515
516    #[test]
517    fn migrate_response_serializes_required_fields() {
518        let resp = MigrateResponse {
519            db_path: "/tmp/test.sqlite".to_string(),
520            schema_version: 6,
521            status: "ok".to_string(),
522            elapsed_ms: 12,
523        };
524        let json = serde_json::to_value(&resp).unwrap();
525        assert_eq!(json["status"], "ok");
526        assert_eq!(json["schema_version"], 6);
527        assert_eq!(json["db_path"], "/tmp/test.sqlite");
528        assert_eq!(json["elapsed_ms"], 12);
529    }
530
531    #[test]
532    fn latest_schema_version_returns_zero_when_table_empty() {
533        let conn = Connection::open_in_memory().expect("in-memory db");
534        conn.execute_batch(
535            "CREATE TABLE refinery_schema_history (
536                version INTEGER NOT NULL,
537                name TEXT
538            );",
539        )
540        .expect("table creation");
541        let version = latest_schema_version(&conn).unwrap();
542        assert_eq!(version, 0u32);
543    }
544
545    #[test]
546    fn compute_checksum_is_deterministic_and_matches_refinery() {
547        // This is the same algorithm that refinery-core 0.9.1 uses. We
548        // pin the numeric value to detect any change in siphasher
549        // behaviour that would break migration verification.
550        let a = compute_checksum("vec_tables", 2, "SELECT 1;");
551        let b = compute_checksum("vec_tables", 2, "SELECT 1;");
552        assert_eq!(a, b, "checksum must be deterministic");
553        let c = compute_checksum("vec_tables", 2, "SELECT 1;\n");
554        assert_ne!(
555            a, c,
556            "trailing newline must change the checksum (matches refinery)"
557        );
558    }
559
560    #[test]
561    fn rehash_with_no_history_returns_empty() {
562        let mut conn = create_db_without_history();
563        let report = run_rehash(&mut conn, Path::new("/tmp/empty.sqlite")).unwrap();
564        assert_eq!(report.status, "ok_no_history");
565        assert!(report.rewritten.is_empty());
566        assert_eq!(report.inspected, 0);
567    }
568
569    #[test]
570    fn rehash_writes_matching_checksum() {
571        // Pre-populate the history with a WRONG checksum. The rehash
572        // must detect the mismatch and rewrite the row.
573        let mut conn = Connection::open_in_memory().expect("in-memory db");
574        conn.execute_batch(
575            "CREATE TABLE refinery_schema_history (
576                version INTEGER NOT NULL,
577                name TEXT,
578                applied_on TEXT,
579                checksum TEXT
580            );",
581        )
582        .expect("history create");
583        // Use the first migration present in the embedded set (V001).
584        let first = crate::migrations::runner().get_migrations()[0].clone();
585        let v = first.version();
586        let name = first.name().to_string();
587        let sql = first.sql().unwrap_or("").to_string();
588        let correct = compute_checksum(&name, v, &sql).to_string();
589        let wrong = "1234567890";
590        assert_ne!(correct, wrong, "test sanity: correct != wrong");
591
592        conn.execute(
593            "INSERT INTO refinery_schema_history (version, name, checksum) VALUES (?1, ?2, ?3)",
594            rusqlite::params![v, name, wrong],
595        )
596        .expect("insert");
597
598        let report = run_rehash(&mut conn, Path::new("/tmp/test.sqlite")).unwrap();
599        assert_eq!(report.rewritten.len(), 1);
600        assert_eq!(report.rewritten[0].old_checksum, wrong);
601        assert_eq!(report.rewritten[0].new_checksum, correct);
602
603        // And the row now matches what refinery would compute.
604        let stored: String = conn
605            .query_row(
606                "SELECT checksum FROM refinery_schema_history WHERE version = ?1",
607                rusqlite::params![v],
608                |r| r.get(0),
609            )
610            .unwrap();
611        assert_eq!(stored, correct);
612    }
613
614    #[test]
615    fn rehash_is_idempotent_when_checksums_match() {
616        let mut conn = Connection::open_in_memory().expect("in-memory db");
617        conn.execute_batch(
618            "CREATE TABLE refinery_schema_history (
619                version INTEGER NOT NULL,
620                name TEXT,
621                applied_on TEXT,
622                checksum TEXT
623            );",
624        )
625        .unwrap();
626        let first = crate::migrations::runner().get_migrations()[0].clone();
627        let v = first.version();
628        let name = first.name().to_string();
629        let sql = first.sql().unwrap_or("").to_string();
630        let correct = compute_checksum(&name, v, &sql).to_string();
631        conn.execute(
632            "INSERT INTO refinery_schema_history (version, name, checksum) VALUES (?1, ?2, ?3)",
633            rusqlite::params![v, name, correct.clone()],
634        )
635        .unwrap();
636
637        let report = run_rehash(&mut conn, Path::new("/tmp/test.sqlite")).unwrap();
638        assert!(
639            report.rewritten.is_empty(),
640            "must not rewrite matching rows"
641        );
642        assert_eq!(report.status, "ok_no_changes");
643    }
644
645    #[test]
646    fn rehash_matches_refinery_embedded_checksum_for_v001() {
647        // The ultimate correctness test: run a real migration, capture
648        // what refinery stored, then call run_rehash and confirm the
649        // file-derived checksum matches what the runner produced. This
650        // pins the algorithm end-to-end and would catch any future drift
651        // (e.g. a siphasher major version bump that changes SipHasher13).
652        let dir = tempfile::tempdir().expect("tempdir");
653        let path = dir.path().join("test.sqlite");
654        let mut conn = open_rw(&path).expect("open_rw");
655        crate::migrations::runner().run(&mut conn).expect("migrate");
656        let stored: String = conn
657            .query_row(
658                "SELECT checksum FROM refinery_schema_history WHERE version = 1",
659                [],
660                |r| r.get(0),
661            )
662            .unwrap();
663        let report = run_rehash(&mut conn, &path).expect("rehash");
664        assert!(
665            report.rewritten.is_empty(),
666            "V001 must NOT be rewritten when checksums already match: rewrote={:?}",
667            report.rewritten
668        );
669        // And re-running runner() should still succeed (the original
670        // error that the failing test exposed was that the second
671        // runner().run() call saw a checksum mismatch).
672        crate::migrations::runner()
673            .run(&mut conn)
674            .expect("re-run migrate must succeed");
675        let stored_after: String = conn
676            .query_row(
677                "SELECT checksum FROM refinery_schema_history WHERE version = 1",
678                [],
679                |r| r.get(0),
680            )
681            .unwrap();
682        assert_eq!(
683            stored, stored_after,
684            "checksum must not change after rehash"
685        );
686    }
687
688    #[test]
689    fn to_llm_only_reports_no_vec_tables_on_fresh_db() {
690        // Fresh v1.0.76 database (created by running the full migration
691        // set) has no vec tables.
692        let dir = tempfile::tempdir().expect("tempdir");
693        let path = dir.path().join("fresh.sqlite");
694        let mut conn = open_rw(&path).expect("open_rw");
695        crate::migrations::runner().run(&mut conn).expect("migrate");
696        let report = run_to_llm_only(&mut conn, &path).expect("to_llm_only");
697        assert!(!report.vec_tables_were_present);
698        assert!(report.v013_applied, "V013 must be marked applied");
699        assert_eq!(report.status, "ok");
700    }
701
702    #[test]
703    fn history_table_exists_detects_table() {
704        let conn = create_db_with_history(1);
705        assert!(history_table_exists(&conn));
706        let conn2 = create_db_without_history();
707        assert!(!history_table_exists(&conn2));
708    }
709
710    #[test]
711    fn sanitize_null_applied_on_fixes_null_rows() {
712        let conn = Connection::open_in_memory().expect("in-memory db");
713        conn.execute_batch(
714            "CREATE TABLE refinery_schema_history (
715                version INTEGER NOT NULL,
716                name TEXT,
717                applied_on TEXT,
718                checksum TEXT
719            );",
720        )
721        .unwrap();
722        conn.execute(
723            "INSERT INTO refinery_schema_history (version, name, checksum) VALUES (1, 'init', '123')",
724            [],
725        )
726        .unwrap();
727        let fixed = sanitize_null_applied_on(&conn).unwrap();
728        assert_eq!(fixed, 1, "must fix exactly one NULL row");
729        let applied: String = conn
730            .query_row(
731                "SELECT applied_on FROM refinery_schema_history WHERE version = 1",
732                [],
733                |r| r.get(0),
734            )
735            .unwrap();
736        assert!(
737            chrono::DateTime::parse_from_rfc3339(&applied).is_ok(),
738            "applied_on must be valid RFC3339, got: {applied}"
739        );
740    }
741
742    #[test]
743    fn sanitize_null_applied_on_noop_when_all_filled() {
744        let conn = Connection::open_in_memory().expect("in-memory db");
745        conn.execute_batch(
746            "CREATE TABLE refinery_schema_history (
747                version INTEGER NOT NULL,
748                name TEXT,
749                applied_on TEXT,
750                checksum TEXT
751            );",
752        )
753        .unwrap();
754        conn.execute(
755            "INSERT INTO refinery_schema_history (version, name, applied_on, checksum) VALUES (1, 'init', '2026-06-09T00:00:00+00:00', '123')",
756            [],
757        )
758        .unwrap();
759        let fixed = sanitize_null_applied_on(&conn).unwrap();
760        assert_eq!(fixed, 0, "must not touch rows with valid applied_on");
761    }
762
763    #[test]
764    fn rehash_insert_includes_applied_on() {
765        let mut conn = Connection::open_in_memory().expect("in-memory db");
766        conn.execute_batch(
767            "CREATE TABLE refinery_schema_history (
768                version INTEGER NOT NULL,
769                name TEXT,
770                applied_on TEXT,
771                checksum TEXT
772            );",
773        )
774        .unwrap();
775        let runner = crate::migrations::runner();
776        let migrations = runner.get_migrations();
777        for mig in migrations.iter() {
778            if mig.version() >= 13 {
779                break;
780            }
781            let name = mig.name().to_string();
782            let v = mig.version();
783            let sql = mig.sql().unwrap_or("").to_string();
784            let cs = compute_checksum(&name, v, &sql).to_string();
785            conn.execute(
786                "INSERT INTO refinery_schema_history (version, name, applied_on, checksum) VALUES (?1, ?2, '2026-01-01T00:00:00+00:00', ?3)",
787                rusqlite::params![v, name, cs],
788            )
789            .unwrap();
790        }
791        let report = run_rehash(&mut conn, Path::new("/tmp/test.sqlite")).unwrap();
792        let applied: Option<String> = conn
793            .query_row(
794                "SELECT applied_on FROM refinery_schema_history WHERE version = 13",
795                [],
796                |r| r.get(0),
797            )
798            .optional()
799            .unwrap()
800            .flatten();
801        assert!(
802            applied.is_some(),
803            "V013 row must have applied_on after rehash insert, got NULL"
804        );
805        let ts = applied.unwrap();
806        assert!(
807            chrono::DateTime::parse_from_rfc3339(&ts).is_ok(),
808            "applied_on must be valid RFC3339, got: {ts}"
809        );
810        assert_eq!(report.null_rows_fixed, 0, "no pre-existing NULLs to fix");
811    }
812
813    #[test]
814    fn remove_vec_tables_noop_when_no_vec() {
815        let conn = Connection::open_in_memory().expect("in-memory db");
816        let removed = remove_vec_virtual_tables_without_module(&conn).unwrap();
817        assert_eq!(removed, 0);
818    }
819}