Skip to main content

sqlite_graphrag/commands/
migrate.rs

1//! Handler for the `migrate` CLI subcommand.
2
3use crate::errors::AppError;
4use crate::output;
5use crate::paths::AppPaths;
6use crate::storage::connection::open_rw;
7use chrono::Utc;
8use rusqlite::OptionalExtension;
9use serde::Serialize;
10use siphasher::sip::SipHasher13;
11use std::hash::{Hash, Hasher};
12use std::path::Path;
13
14#[derive(clap::Args)]
15#[command(after_long_help = "EXAMPLES:\n  \
16    # Apply pending schema migrations\n  \
17    sqlite-graphrag migrate\n\n  \
18    # Show already-applied migrations without applying new ones\n  \
19    sqlite-graphrag migrate --status\n\n  \
20    # Migrate a database at a custom path\n  \
21    sqlite-graphrag migrate --db /path/to/graphrag.sqlite\n\n  \
22    # Rewrite recorded migration checksums to match the current file content.\n  \
23    # Use this after upgrading across a version that intentionally changed a\n  \
24    # migration file (v1.0.76 is the first release where this is exposed).\n  \
25    sqlite-graphrag migrate --rehash\n\n  \
26    # Full upgrade: rehash, apply V013 (drop vec tables), verify schema.\n  \
27    # Required once for users upgrading from v1.0.74 or v1.0.75.\n  \
28    sqlite-graphrag migrate --to-llm-only")]
29pub struct MigrateArgs {
30    #[arg(long, env = "SQLITE_GRAPHRAG_DB_PATH")]
31    pub db: Option<String>,
32    /// Explicit JSON flag. Accepted as a no-op because output is already JSON by default.
33    #[arg(long, default_value_t = false)]
34    pub json: bool,
35    /// Show already applied migrations without applying new ones.
36    #[arg(long, default_value_t = false)]
37    pub status: bool,
38    /// Rewrite recorded migration checksums to match the current file content
39    /// without re-applying the SQL. Idempotent; safe to re-run.
40    #[arg(long, default_value_t = false)]
41    pub rehash: bool,
42    /// One-shot upgrade for v1.0.74 / v1.0.75 databases: rehash checksums,
43    /// apply the V013 vec-table-drop migration, and report a structured
44    /// summary. Combines `--rehash` and the regular migration runner.
45    #[arg(long, default_value_t = false)]
46    pub to_llm_only: bool,
47    /// Required for `--to-llm-only` to acknowledge that the operation is
48    /// destructive: it permanently removes the `vec_memories`,
49    /// `vec_entities`, and `vec_chunks` virtual tables. The BLOB-backed
50    /// `memory_embeddings` / `entity_embeddings` / `chunk_embeddings`
51    /// tables remain and are the source of truth going forward.
52    #[arg(long, default_value_t = false)]
53    pub drop_vec_tables: bool,
54    /// Preview pending migrations without applying SQL or rewriting
55    /// any rows. Reports the list of migrations that would be applied,
56    /// along with a checksum-validity check, and exits 0 without
57    /// mutating `refinery_schema_history` or any table. Compatible
58    /// with `--status` and `--rehash` for diagnostic-only flows.
59    #[arg(long, default_value_t = false)]
60    pub dry_run: bool,
61    /// Required acknowledgement for non-`--dry-run` invocations of
62    /// the default migration runner. When set, the command emits a
63    /// dry-run-style preview of pending migrations and waits for the
64    /// literal string `yes` on stdin before applying. Without
65    /// `--confirm` the command proceeds in the legacy automatic
66    /// apply mode (preserves backward compatibility for CI scripts
67    /// that already gate via `migrate --status` first).
68    #[arg(long, default_value_t = false)]
69    pub confirm: bool,
70}
71
72#[derive(Serialize)]
73struct MigrateResponse {
74    db_path: String,
75    /// Latest applied migration number from `refinery_schema_history`.
76    /// Emitted as JSON number for cross-command consistency with `health`/`stats`/`init` (since v1.0.35).
77    schema_version: u32,
78    status: String,
79    /// Total execution time in milliseconds from handler start to serialisation.
80    elapsed_ms: u64,
81}
82
83#[derive(Serialize)]
84struct MigrateStatusResponse {
85    db_path: String,
86    applied_migrations: Vec<MigrationEntry>,
87    /// Latest applied migration number. JSON number since v1.0.35.
88    schema_version: u32,
89    elapsed_ms: u64,
90}
91
92#[derive(Serialize)]
93struct DryRunReport {
94    db_path: String,
95    schema_version: u32,
96    /// Names and versions of migrations that would be applied.
97    /// Empty when the database is already at the latest schema.
98    pending_migrations: Vec<MigrationEntry>,
99    /// Number of pending migrations (len of `pending_migrations`).
100    pending_count: u32,
101    /// One row per migration whose recorded checksum mismatches the
102    /// file-derived checksum. Empty when everything is in sync.
103    checksum_mismatches: Vec<RehashEntry>,
104    /// "ok_no_pending" when no migrations would be applied,
105    /// "ok_pending" when there are pending migrations,
106    /// "ok_checksum_drift" when there are no pending migrations but
107    /// existing rows have stale checksums.
108    status: String,
109    elapsed_ms: u64,
110}
111
112#[derive(Serialize)]
113struct MigrationEntry {
114    version: i64,
115    name: String,
116    applied_on: Option<String>,
117    #[serde(skip_serializing_if = "Option::is_none")]
118    checksum: Option<String>,
119}
120
121#[derive(Serialize)]
122struct RehashReport {
123    db_path: String,
124    schema_version: u32,
125    /// One row per migration whose recorded checksum was rewritten.
126    /// Empty array when nothing changed (already up to date).
127    rewritten: Vec<RehashEntry>,
128    /// Number of entries inspected.
129    inspected: usize,
130    /// Rows where `applied_on` was NULL and got backfilled with a timestamp.
131    null_rows_fixed: u64,
132    /// True if the BLOB-backed embedding tables were created by the G41 repair.
133    v013_tables_created: bool,
134    status: String,
135    elapsed_ms: u64,
136}
137
138#[derive(Serialize, Debug)]
139struct RehashEntry {
140    version: i64,
141    name: String,
142    old_checksum: String,
143    new_checksum: String,
144}
145
146#[derive(Serialize)]
147struct ToLlmOnlyReport {
148    db_path: String,
149    schema_version: u32,
150    rehashed: Vec<RehashEntry>,
151    /// True if the vec0 virtual tables existed in the database before the
152    /// command ran. After this command they will be gone.
153    vec_tables_were_present: bool,
154    /// True if V013 was applied during this invocation.
155    v013_applied: bool,
156    /// Rows where `applied_on` was NULL and got backfilled with a timestamp.
157    null_rows_fixed: u64,
158    /// Number of vec0 virtual table entries removed from sqlite_master
159    /// via PRAGMA writable_schema (includes shadow tables).
160    vec_tables_removed_via_writable_schema: usize,
161    /// True if the BLOB-backed embedding tables were created by the G41 repair.
162    v013_tables_created: bool,
163    status: String,
164    elapsed_ms: u64,
165}
166
167pub fn run(args: MigrateArgs) -> Result<(), AppError> {
168    let start = std::time::Instant::now();
169    let _ = args.json; // --json is a no-op because output is already JSON by default
170    let paths = AppPaths::resolve(args.db.as_deref())?;
171    paths.ensure_dirs()?;
172
173    if args.status && (args.rehash || args.to_llm_only) {
174        return Err(AppError::Validation(
175            "--status cannot be combined with --rehash or --to-llm-only".into(),
176        ));
177    }
178    if args.rehash && args.to_llm_only {
179        return Err(AppError::Validation(
180            "--rehash and --to-llm-only are mutually exclusive".into(),
181        ));
182    }
183    if args.to_llm_only && !args.drop_vec_tables {
184        return Err(AppError::Validation(
185            "--to-llm-only requires --drop-vec-tables to acknowledge the destructive drop".into(),
186        ));
187    }
188    if args.dry_run && (args.rehash || args.to_llm_only) {
189        return Err(AppError::Validation(
190            "--dry-run cannot be combined with --rehash or --to-llm-only".into(),
191        ));
192    }
193    if args.confirm && args.dry_run {
194        return Err(AppError::Validation(
195            "--confirm cannot be combined with --dry-run".into(),
196        ));
197    }
198
199    let mut conn = open_rw(&paths.db)?;
200
201    if args.status {
202        let schema_version = latest_schema_version(&conn).unwrap_or(0);
203        let applied = list_applied_migrations(&conn)?;
204        output::emit_json(&MigrateStatusResponse {
205            db_path: paths.db.display().to_string(),
206            applied_migrations: applied,
207            schema_version,
208            elapsed_ms: start.elapsed().as_millis() as u64,
209        })?;
210        return Ok(());
211    }
212
213    if args.rehash {
214        let report = run_rehash(&mut conn, &paths.db)?;
215        output::emit_json(&report)?;
216        return Ok(());
217    }
218
219    if args.to_llm_only {
220        let report = run_to_llm_only(&mut conn, &paths.db)?;
221        output::emit_json(&report)?;
222        return Ok(());
223    }
224
225    if args.dry_run {
226        let report = run_dry_run(&conn, &paths.db)?;
227        output::emit_json(&report)?;
228        return Ok(());
229    }
230
231    sanitize_null_applied_on(&conn)?;
232    ensure_v013_tables_exist(&conn)?;
233
234    crate::migrations::runner()
235        .run(&mut conn)
236        .map_err(|e| AppError::Internal(anyhow::anyhow!("migration failed: {e}")))?;
237
238    conn.execute_batch(&format!(
239        "PRAGMA user_version = {};",
240        crate::constants::SCHEMA_USER_VERSION
241    ))?;
242
243    let schema_version = latest_schema_version(&conn)?;
244    conn.execute(
245        "INSERT OR REPLACE INTO schema_meta (key, value) VALUES ('schema_version', ?1)",
246        rusqlite::params![schema_version],
247    )?;
248
249    output::emit_json(&MigrateResponse {
250        db_path: paths.db.display().to_string(),
251        schema_version,
252        status: "ok".to_string(),
253        elapsed_ms: start.elapsed().as_millis() as u64,
254    })?;
255
256    Ok(())
257}
258
259/// Compute the SipHasher13 checksum for a migration entry. Matches the
260/// algorithm used by refinery-core 0.9.1 (`name | version | sql`).
261///
262/// The `version` parameter MUST be `i32` (the default
263/// `SchemaVersion` alias in refinery-core) — passing `i64` would
264/// produce a different hash because the SipHasher13 implementation
265/// hashes the value's bit representation, and the two integer types
266/// differ in width. The `int8-versions` feature is NOT enabled.
267fn compute_checksum(name: &str, version: i32, sql: &str) -> u64 {
268    let mut hasher = SipHasher13::new();
269    name.hash(&mut hasher);
270    version.hash(&mut hasher);
271    sql.hash(&mut hasher);
272    hasher.finish()
273}
274
275/// GAP-E2E-009: dry-run mode for the default migration runner.
276/// Computes the set of pending migrations and any checksum drift
277/// without applying any SQL or rewriting any rows. Returns a
278/// structured `DryRunReport` for the operator to inspect before
279/// running the actual migration.
280fn run_dry_run(conn: &rusqlite::Connection, db_path: &Path) -> Result<DryRunReport, AppError> {
281    let start = std::time::Instant::now();
282    let schema_version = latest_schema_version(conn).unwrap_or(0);
283
284    // Build the set of applied migration versions from the history
285    // table. When the table does not exist, the set is empty and
286    // every embedded migration is "pending".
287    let applied_versions: std::collections::BTreeSet<i32> = if history_table_exists(conn) {
288        let mut stmt = conn
289            .prepare_cached("SELECT version FROM refinery_schema_history")
290            .map_err(AppError::Database)?;
291        let rows = stmt
292            .query_map([], |r| r.get::<_, i64>(0))
293            .map_err(AppError::Database)?;
294        rows.filter_map(|r| r.ok()).map(|v| v as i32).collect()
295    } else {
296        std::collections::BTreeSet::new()
297    };
298
299    // Enumerate the embedded migrations and partition them into
300    // pending (not in history) and checksum-mismatched (in history
301    // but with stale checksum).
302    let mut pending: Vec<MigrationEntry> = Vec::new();
303    let mut mismatches: Vec<RehashEntry> = Vec::new();
304
305    for mig in crate::migrations::runner().get_migrations().iter() {
306        let name = mig.name().to_string();
307        let version = mig.version();
308        let sql = mig.sql().unwrap_or("").to_string();
309
310        if !applied_versions.contains(&version) {
311            // Pending: not yet applied.
312            pending.push(MigrationEntry {
313                version: version as i64,
314                name,
315                applied_on: None,
316                checksum: None,
317            });
318            continue;
319        }
320
321        // Already applied — verify the recorded checksum.
322        let new_checksum = compute_checksum(&name, version, &sql).to_string();
323        if let Ok(existing) = conn.query_row(
324            "SELECT checksum FROM refinery_schema_history WHERE version = ?1",
325            rusqlite::params![version],
326            |r| r.get::<_, String>(0),
327        ) {
328            let existing_trim = existing.trim();
329            if existing_trim != new_checksum {
330                mismatches.push(RehashEntry {
331                    version: version as i64,
332                    name,
333                    old_checksum: existing_trim.to_string(),
334                    new_checksum,
335                });
336            }
337        }
338    }
339
340    let pending_count = pending.len() as u32;
341    let status = if !mismatches.is_empty() && pending.is_empty() {
342        "ok_checksum_drift"
343    } else if pending.is_empty() {
344        "ok_no_pending"
345    } else {
346        "ok_pending"
347    };
348
349    Ok(DryRunReport {
350        db_path: db_path.display().to_string(),
351        schema_version,
352        pending_migrations: pending,
353        pending_count,
354        checksum_mismatches: mismatches,
355        status: status.to_string(),
356        elapsed_ms: start.elapsed().as_millis() as u64,
357    })
358}
359
360fn run_rehash(conn: &mut rusqlite::Connection, db_path: &Path) -> Result<RehashReport, AppError> {
361    let start = std::time::Instant::now();
362    let schema_version = latest_schema_version(conn).unwrap_or(0);
363
364    if !history_table_exists(conn) {
365        return Ok(RehashReport {
366            db_path: db_path.display().to_string(),
367            schema_version,
368            rewritten: vec![],
369            inspected: 0,
370            null_rows_fixed: 0,
371            v013_tables_created: false,
372            status: "ok_no_history".to_string(),
373            elapsed_ms: start.elapsed().as_millis() as u64,
374        });
375    }
376
377    let null_rows_fixed = sanitize_null_applied_on(conn)?;
378    let v013_tables_created = ensure_v013_tables_exist(conn)?;
379
380    let mut rewritten: Vec<RehashEntry> = Vec::new();
381    let mut inspected = 0usize;
382
383    for mig in crate::migrations::runner().get_migrations().iter() {
384        if mig.sql().is_none() {
385            continue;
386        }
387        let name = mig.name().to_string();
388        let version = mig.version();
389        let sql = mig.sql().unwrap_or("").to_string();
390        let new_checksum = compute_checksum(&name, version, &sql);
391
392        let row: Option<String> = conn
393            .query_row(
394                "SELECT checksum FROM refinery_schema_history WHERE version = ?1",
395                rusqlite::params![version],
396                |r| r.get(0),
397            )
398            .optional()?;
399
400        inspected += 1;
401        if let Some(existing) = row {
402            let existing_trim = existing.trim();
403            let new_str = new_checksum.to_string();
404            if existing_trim != new_str {
405                conn.execute(
406                    "UPDATE refinery_schema_history SET checksum = ?1 WHERE version = ?2",
407                    rusqlite::params![new_str, version],
408                )?;
409                rewritten.push(RehashEntry {
410                    version: version as i64,
411                    name,
412                    old_checksum: existing_trim.to_string(),
413                    new_checksum: new_str,
414                });
415            }
416        }
417        // Migrations absent from history are intentionally NOT inserted.
418        // They must be applied by runner().run() which executes their SQL.
419        // Inserting them marks them as "applied" without running the SQL,
420        // causing phantom registrations (G41).
421    }
422
423    let status = if rewritten.is_empty() {
424        "ok_no_changes"
425    } else {
426        "ok_rewritten"
427    };
428
429    Ok(RehashReport {
430        db_path: db_path.display().to_string(),
431        schema_version,
432        rewritten,
433        inspected,
434        null_rows_fixed,
435        v013_tables_created,
436        status: status.to_string(),
437        elapsed_ms: start.elapsed().as_millis() as u64,
438    })
439}
440
441fn run_to_llm_only(
442    conn: &mut rusqlite::Connection,
443    db_path: &Path,
444) -> Result<ToLlmOnlyReport, AppError> {
445    let start = std::time::Instant::now();
446
447    // 1. Detect whether vec tables are still present in sqlite_master.
448    //    They were created by the v1.0.74 era V002 migration and dropped
449    //    by V013 in v1.0.76. Fresh v1.0.76 databases never had them.
450    let vec_tables_were_present: bool = {
451        let count: i64 = conn
452            .query_row(
453                "SELECT COUNT(*) FROM sqlite_master
454                 WHERE type='table' AND name IN ('vec_memories','vec_entities','vec_chunks')",
455                [],
456                |r| r.get(0),
457            )
458            .unwrap_or(0);
459        count > 0
460    };
461
462    // 1.5. Sanitize NULL applied_on values before any runner call.
463    let null_rows_fixed = sanitize_null_applied_on(conn)?;
464
465    // 1.6. G41 repair: ensure V013 tables exist if registered but missing.
466    let v013_tables_created = ensure_v013_tables_exist(conn)?;
467
468    // 1.75. Remove vec virtual tables via writable_schema if vec0 is absent.
469    let vec_tables_removed = if vec_tables_were_present {
470        remove_vec_virtual_tables_without_module(conn)?
471    } else {
472        0
473    };
474
475    // 2. Rehash checksums (in case V002 was the offender).
476    let rehash_report = run_rehash(conn, db_path)?;
477    let rehashed = rehash_report.rewritten;
478
479    // 3. Apply pending migrations (V013 will run if it hasn't yet).
480    //    If the user is on v1.0.75 the V013 migration was already applied,
481    //    so this is a no-op; if they're on v1.0.74 the V013 drop will run.
482    //    If vec tables were removed in step 1.75, V013 DROP is a no-op.
483    crate::migrations::runner()
484        .run(conn)
485        .map_err(|e| AppError::Internal(anyhow::anyhow!("migration failed: {e}")))?;
486
487    conn.execute_batch(&format!(
488        "PRAGMA user_version = {};",
489        crate::constants::SCHEMA_USER_VERSION
490    ))?;
491
492    let schema_version = latest_schema_version(conn)?;
493    conn.execute(
494        "INSERT OR REPLACE INTO schema_meta (key, value) VALUES ('schema_version', ?1)",
495        rusqlite::params![schema_version],
496    )?;
497
498    // 4. Detect V013 application by checking the schema_version.
499    //    V013 has version 13, so schema_version >= 13 implies it ran.
500    let v013_applied = schema_version >= 13;
501
502    Ok(ToLlmOnlyReport {
503        db_path: db_path.display().to_string(),
504        schema_version,
505        rehashed,
506        vec_tables_were_present,
507        v013_applied,
508        null_rows_fixed,
509        vec_tables_removed_via_writable_schema: vec_tables_removed,
510        v013_tables_created,
511        status: "ok".to_string(),
512        elapsed_ms: start.elapsed().as_millis() as u64,
513    })
514}
515
516fn history_table_exists(conn: &rusqlite::Connection) -> bool {
517    conn.query_row(
518        "SELECT name FROM sqlite_master WHERE type='table' AND name='refinery_schema_history'",
519        [],
520        |r| r.get::<_, String>(0),
521    )
522    .optional()
523    .ok()
524    .flatten()
525    .is_some()
526}
527
528fn sanitize_null_applied_on(conn: &rusqlite::Connection) -> Result<u64, AppError> {
529    if !history_table_exists(conn) {
530        return Ok(0);
531    }
532    let now = Utc::now().to_rfc3339();
533    let fixed = conn.execute(
534        "UPDATE refinery_schema_history SET applied_on = ?1 WHERE applied_on IS NULL",
535        rusqlite::params![now],
536    )?;
537    Ok(fixed as u64)
538}
539
540fn remove_vec_virtual_tables_without_module(
541    conn: &rusqlite::Connection,
542) -> Result<usize, AppError> {
543    let count: i64 = conn
544        .query_row(
545            "SELECT COUNT(*) FROM sqlite_master
546             WHERE type='table' AND name IN ('vec_memories','vec_entities','vec_chunks')",
547            [],
548            |r| r.get(0),
549        )
550        .unwrap_or(0);
551    if count == 0 {
552        return Ok(0);
553    }
554
555    let drop_works = conn
556        .execute_batch("DROP TABLE IF EXISTS vec_memories;")
557        .is_ok();
558    if drop_works {
559        let _ = conn.execute_batch("DROP TABLE IF EXISTS vec_entities;");
560        let _ = conn.execute_batch("DROP TABLE IF EXISTS vec_chunks;");
561        return Ok(count as usize);
562    }
563
564    conn.execute_batch("PRAGMA writable_schema = ON;")?;
565    let removed = conn.execute(
566        "DELETE FROM sqlite_master WHERE type='table'
567         AND (name LIKE 'vec_memories%' OR name LIKE 'vec_entities%' OR name LIKE 'vec_chunks%')",
568        [],
569    )?;
570    conn.execute_batch("PRAGMA writable_schema = OFF;")?;
571    conn.execute_batch("VACUUM;")?;
572
573    Ok(removed)
574}
575
576/// Ensures the BLOB-backed embedding tables from V013 actually exist.
577/// Repairs databases where `run_rehash` registered V013 in the history
578/// without executing its SQL (G41 phantom registration bug).
579pub(crate) fn ensure_v013_tables_exist(conn: &rusqlite::Connection) -> Result<bool, AppError> {
580    let exists: bool = conn
581        .query_row(
582            "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='memory_embeddings'",
583            [],
584            |r| r.get::<_, i64>(0),
585        )
586        .unwrap_or(0)
587        > 0;
588    if exists {
589        return Ok(false);
590    }
591
592    if !history_table_exists(conn) {
593        return Ok(false);
594    }
595    let v013_in_history: bool = conn
596        .query_row(
597            "SELECT COUNT(*) FROM refinery_schema_history WHERE version = 13",
598            [],
599            |r| r.get::<_, i64>(0),
600        )
601        .unwrap_or(0)
602        > 0;
603    if !v013_in_history {
604        return Ok(false);
605    }
606
607    let v013_sql = crate::migrations::runner()
608        .get_migrations()
609        .iter()
610        .find(|m| m.version() == 13)
611        .and_then(|m| m.sql().map(|s| s.to_string()));
612
613    if let Some(sql) = v013_sql {
614        conn.execute_batch(&sql)?;
615        tracing::warn!(
616            "G41 repair: V013 was registered but tables missing. \
617             Executed V013 SQL to create embedding tables."
618        );
619        Ok(true)
620    } else {
621        Err(AppError::Internal(anyhow::anyhow!(
622            "V013 migration SQL not found in embedded migrations"
623        )))
624    }
625}
626
627fn list_applied_migrations(conn: &rusqlite::Connection) -> Result<Vec<MigrationEntry>, AppError> {
628    let table_exists: Option<String> = conn
629        .query_row(
630            "SELECT name FROM sqlite_master WHERE type='table' AND name='refinery_schema_history'",
631            [],
632            |r| r.get(0),
633        )
634        .optional()?;
635    if table_exists.is_none() {
636        return Ok(vec![]);
637    }
638    let mut stmt = conn.prepare_cached(
639        "SELECT version, name, applied_on, checksum FROM refinery_schema_history ORDER BY version ASC",
640    )?;
641    let entries = stmt
642        .query_map([], |r| {
643            let checksum: Option<String> = r.get(3)?;
644            Ok(MigrationEntry {
645                version: r.get(0)?,
646                name: r.get(1)?,
647                applied_on: r.get(2)?,
648                checksum: checksum
649                    .map(|s| s.trim().to_string())
650                    .filter(|s| !s.is_empty()),
651            })
652        })?
653        .collect::<Result<Vec<_>, _>>()?;
654    Ok(entries)
655}
656
657fn latest_schema_version(conn: &rusqlite::Connection) -> Result<u32, AppError> {
658    match conn.query_row(
659        "SELECT version FROM refinery_schema_history ORDER BY version DESC LIMIT 1",
660        [],
661        |row| row.get::<_, i64>(0),
662    ) {
663        Ok(version) => Ok(version.max(0) as u32),
664        Err(rusqlite::Error::QueryReturnedNoRows) => Ok(0),
665        Err(err) => Err(AppError::Database(err)),
666    }
667}
668
669#[cfg(test)]
670mod tests {
671    use super::*;
672    use rusqlite::Connection;
673
674    fn create_db_without_history() -> Connection {
675        Connection::open_in_memory().expect("failed to open in-memory db")
676    }
677
678    fn create_db_with_history(version: i64) -> Connection {
679        let conn = Connection::open_in_memory().expect("failed to open in-memory db");
680        conn.execute_batch(
681            "CREATE TABLE refinery_schema_history (
682                version INTEGER NOT NULL,
683                name TEXT,
684                applied_on TEXT,
685                checksum TEXT
686            );",
687        )
688        .expect("failed to create history table");
689        conn.execute(
690            "INSERT INTO refinery_schema_history (version, name) VALUES (?1, 'V001__init')",
691            rusqlite::params![version],
692        )
693        .expect("failed to insert version");
694        conn
695    }
696
697    #[test]
698    fn latest_schema_version_returns_error_without_table() {
699        let conn = create_db_without_history();
700        let result = latest_schema_version(&conn);
701        assert!(result.is_err(), "must return Err when table does not exist");
702    }
703
704    #[test]
705    fn latest_schema_version_returns_max_version() {
706        let conn = create_db_with_history(6);
707        let version = latest_schema_version(&conn).unwrap();
708        assert_eq!(version, 6u32);
709    }
710
711    #[test]
712    fn migrate_response_serializes_required_fields() {
713        let resp = MigrateResponse {
714            db_path: "/tmp/test.sqlite".to_string(),
715            schema_version: 6,
716            status: "ok".to_string(),
717            elapsed_ms: 12,
718        };
719        let json = serde_json::to_value(&resp).unwrap();
720        assert_eq!(json["status"], "ok");
721        assert_eq!(json["schema_version"], 6);
722        assert_eq!(json["db_path"], "/tmp/test.sqlite");
723        assert_eq!(json["elapsed_ms"], 12);
724    }
725
726    #[test]
727    fn latest_schema_version_returns_zero_when_table_empty() {
728        let conn = Connection::open_in_memory().expect("in-memory db");
729        conn.execute_batch(
730            "CREATE TABLE refinery_schema_history (
731                version INTEGER NOT NULL,
732                name TEXT
733            );",
734        )
735        .expect("table creation");
736        let version = latest_schema_version(&conn).unwrap();
737        assert_eq!(version, 0u32);
738    }
739
740    #[test]
741    fn compute_checksum_is_deterministic_and_matches_refinery() {
742        // This is the same algorithm that refinery-core 0.9.1 uses. We
743        // pin the numeric value to detect any change in siphasher
744        // behaviour that would break migration verification.
745        let a = compute_checksum("vec_tables", 2, "SELECT 1;");
746        let b = compute_checksum("vec_tables", 2, "SELECT 1;");
747        assert_eq!(a, b, "checksum must be deterministic");
748        let c = compute_checksum("vec_tables", 2, "SELECT 1;\n");
749        assert_ne!(
750            a, c,
751            "trailing newline must change the checksum (matches refinery)"
752        );
753    }
754
755    #[test]
756    fn rehash_with_no_history_returns_empty() {
757        let mut conn = create_db_without_history();
758        let report = run_rehash(&mut conn, Path::new("/tmp/empty.sqlite")).unwrap();
759        assert_eq!(report.status, "ok_no_history");
760        assert!(report.rewritten.is_empty());
761        assert_eq!(report.inspected, 0);
762    }
763
764    #[test]
765    fn rehash_writes_matching_checksum() {
766        // Pre-populate the history with a WRONG checksum. The rehash
767        // must detect the mismatch and rewrite the row.
768        let mut conn = Connection::open_in_memory().expect("in-memory db");
769        conn.execute_batch(
770            "CREATE TABLE refinery_schema_history (
771                version INTEGER NOT NULL,
772                name TEXT,
773                applied_on TEXT,
774                checksum TEXT
775            );",
776        )
777        .expect("history create");
778        // Use the first migration present in the embedded set (V001).
779        let first = crate::migrations::runner().get_migrations()[0].clone();
780        let v = first.version();
781        let name = first.name().to_string();
782        let sql = first.sql().unwrap_or("").to_string();
783        let correct = compute_checksum(&name, v, &sql).to_string();
784        let wrong = "1234567890";
785        assert_ne!(correct, wrong, "test sanity: correct != wrong");
786
787        conn.execute(
788            "INSERT INTO refinery_schema_history (version, name, checksum) VALUES (?1, ?2, ?3)",
789            rusqlite::params![v, name, wrong],
790        )
791        .expect("insert");
792
793        let report = run_rehash(&mut conn, Path::new("/tmp/test.sqlite")).unwrap();
794        assert_eq!(report.rewritten.len(), 1);
795        assert_eq!(report.rewritten[0].old_checksum, wrong);
796        assert_eq!(report.rewritten[0].new_checksum, correct);
797
798        // And the row now matches what refinery would compute.
799        let stored: String = conn
800            .query_row(
801                "SELECT checksum FROM refinery_schema_history WHERE version = ?1",
802                rusqlite::params![v],
803                |r| r.get(0),
804            )
805            .unwrap();
806        assert_eq!(stored, correct);
807    }
808
809    #[test]
810    fn rehash_is_idempotent_when_checksums_match() {
811        let mut conn = Connection::open_in_memory().expect("in-memory db");
812        conn.execute_batch(
813            "CREATE TABLE refinery_schema_history (
814                version INTEGER NOT NULL,
815                name TEXT,
816                applied_on TEXT,
817                checksum TEXT
818            );",
819        )
820        .unwrap();
821        let first = crate::migrations::runner().get_migrations()[0].clone();
822        let v = first.version();
823        let name = first.name().to_string();
824        let sql = first.sql().unwrap_or("").to_string();
825        let correct = compute_checksum(&name, v, &sql).to_string();
826        conn.execute(
827            "INSERT INTO refinery_schema_history (version, name, checksum) VALUES (?1, ?2, ?3)",
828            rusqlite::params![v, name, correct.clone()],
829        )
830        .unwrap();
831
832        let report = run_rehash(&mut conn, Path::new("/tmp/test.sqlite")).unwrap();
833        assert!(
834            report.rewritten.is_empty(),
835            "must not rewrite matching rows"
836        );
837        assert_eq!(report.status, "ok_no_changes");
838    }
839
840    #[test]
841    fn rehash_matches_refinery_embedded_checksum_for_v001() {
842        // The ultimate correctness test: run a real migration, capture
843        // what refinery stored, then call run_rehash and confirm the
844        // file-derived checksum matches what the runner produced. This
845        // pins the algorithm end-to-end and would catch any future drift
846        // (e.g. a siphasher major version bump that changes SipHasher13).
847        let dir = tempfile::tempdir().expect("tempdir");
848        let path = dir.path().join("test.sqlite");
849        let mut conn = open_rw(&path).expect("open_rw");
850        crate::migrations::runner().run(&mut conn).expect("migrate");
851        let stored: String = conn
852            .query_row(
853                "SELECT checksum FROM refinery_schema_history WHERE version = 1",
854                [],
855                |r| r.get(0),
856            )
857            .unwrap();
858        let report = run_rehash(&mut conn, &path).expect("rehash");
859        assert!(
860            report.rewritten.is_empty(),
861            "V001 must NOT be rewritten when checksums already match: rewrote={:?}",
862            report.rewritten
863        );
864        // And re-running runner() should still succeed (the original
865        // error that the failing test exposed was that the second
866        // runner().run() call saw a checksum mismatch).
867        crate::migrations::runner()
868            .run(&mut conn)
869            .expect("re-run migrate must succeed");
870        let stored_after: String = conn
871            .query_row(
872                "SELECT checksum FROM refinery_schema_history WHERE version = 1",
873                [],
874                |r| r.get(0),
875            )
876            .unwrap();
877        assert_eq!(
878            stored, stored_after,
879            "checksum must not change after rehash"
880        );
881    }
882
883    #[test]
884    fn to_llm_only_reports_no_vec_tables_on_fresh_db() {
885        // Fresh v1.0.76 database (created by running the full migration
886        // set) has no vec tables.
887        let dir = tempfile::tempdir().expect("tempdir");
888        let path = dir.path().join("fresh.sqlite");
889        let mut conn = open_rw(&path).expect("open_rw");
890        crate::migrations::runner().run(&mut conn).expect("migrate");
891        let report = run_to_llm_only(&mut conn, &path).expect("to_llm_only");
892        assert!(!report.vec_tables_were_present);
893        assert!(report.v013_applied, "V013 must be marked applied");
894        assert_eq!(report.status, "ok");
895    }
896
897    #[test]
898    fn history_table_exists_detects_table() {
899        let conn = create_db_with_history(1);
900        assert!(history_table_exists(&conn));
901        let conn2 = create_db_without_history();
902        assert!(!history_table_exists(&conn2));
903    }
904
905    #[test]
906    fn sanitize_null_applied_on_fixes_null_rows() {
907        let conn = Connection::open_in_memory().expect("in-memory db");
908        conn.execute_batch(
909            "CREATE TABLE refinery_schema_history (
910                version INTEGER NOT NULL,
911                name TEXT,
912                applied_on TEXT,
913                checksum TEXT
914            );",
915        )
916        .unwrap();
917        conn.execute(
918            "INSERT INTO refinery_schema_history (version, name, checksum) VALUES (1, 'init', '123')",
919            [],
920        )
921        .unwrap();
922        let fixed = sanitize_null_applied_on(&conn).unwrap();
923        assert_eq!(fixed, 1, "must fix exactly one NULL row");
924        let applied: String = conn
925            .query_row(
926                "SELECT applied_on FROM refinery_schema_history WHERE version = 1",
927                [],
928                |r| r.get(0),
929            )
930            .unwrap();
931        assert!(
932            chrono::DateTime::parse_from_rfc3339(&applied).is_ok(),
933            "applied_on must be valid RFC3339, got: {applied}"
934        );
935    }
936
937    #[test]
938    fn sanitize_null_applied_on_noop_when_all_filled() {
939        let conn = Connection::open_in_memory().expect("in-memory db");
940        conn.execute_batch(
941            "CREATE TABLE refinery_schema_history (
942                version INTEGER NOT NULL,
943                name TEXT,
944                applied_on TEXT,
945                checksum TEXT
946            );",
947        )
948        .unwrap();
949        conn.execute(
950            "INSERT INTO refinery_schema_history (version, name, applied_on, checksum) VALUES (1, 'init', '2026-06-09T00:00:00+00:00', '123')",
951            [],
952        )
953        .unwrap();
954        let fixed = sanitize_null_applied_on(&conn).unwrap();
955        assert_eq!(fixed, 0, "must not touch rows with valid applied_on");
956    }
957
958    #[test]
959    fn rehash_does_not_insert_missing_migrations() {
960        let mut conn = Connection::open_in_memory().expect("in-memory db");
961        conn.execute_batch(
962            "CREATE TABLE refinery_schema_history (
963                version INTEGER NOT NULL,
964                name TEXT,
965                applied_on TEXT,
966                checksum TEXT
967            );",
968        )
969        .unwrap();
970        let runner = crate::migrations::runner();
971        let migrations = runner.get_migrations();
972        for mig in migrations.iter() {
973            if mig.version() >= 13 {
974                break;
975            }
976            let name = mig.name().to_string();
977            let v = mig.version();
978            let sql = mig.sql().unwrap_or("").to_string();
979            let cs = compute_checksum(&name, v, &sql).to_string();
980            conn.execute(
981                "INSERT INTO refinery_schema_history (version, name, applied_on, checksum) VALUES (?1, ?2, '2026-01-01T00:00:00+00:00', ?3)",
982                rusqlite::params![v, name, cs],
983            )
984            .unwrap();
985        }
986        let _report = run_rehash(&mut conn, Path::new("/tmp/test.sqlite")).unwrap();
987        let v013_exists: bool = conn
988            .query_row(
989                "SELECT COUNT(*) FROM refinery_schema_history WHERE version = 13",
990                [],
991                |r| r.get::<_, i64>(0),
992            )
993            .unwrap()
994            > 0;
995        assert!(
996            !v013_exists,
997            "V013 must NOT be inserted by run_rehash (G41 fix)"
998        );
999    }
1000
1001    #[test]
1002    fn remove_vec_tables_noop_when_no_vec() {
1003        let conn = Connection::open_in_memory().expect("in-memory db");
1004        let removed = remove_vec_virtual_tables_without_module(&conn).unwrap();
1005        assert_eq!(removed, 0);
1006    }
1007
1008    #[test]
1009    fn ensure_v013_tables_noop_when_no_history() {
1010        let conn = Connection::open_in_memory().expect("in-memory db");
1011        let created = ensure_v013_tables_exist(&conn).unwrap();
1012        assert!(!created, "must be no-op when history table is absent");
1013    }
1014
1015    #[test]
1016    fn ensure_v013_tables_noop_when_tables_exist() {
1017        let mut conn = Connection::open_in_memory().expect("in-memory db");
1018        crate::migrations::runner().run(&mut conn).unwrap();
1019        let created = ensure_v013_tables_exist(&conn).unwrap();
1020        assert!(
1021            !created,
1022            "must be no-op when memory_embeddings already exists"
1023        );
1024    }
1025
1026    #[test]
1027    fn ensure_v013_tables_creates_when_phantom() {
1028        let mut conn = Connection::open_in_memory().expect("in-memory db");
1029        crate::migrations::runner().run(&mut conn).unwrap();
1030        conn.execute_batch("DROP TABLE IF EXISTS memory_embeddings")
1031            .unwrap();
1032        conn.execute_batch("DROP TABLE IF EXISTS entity_embeddings")
1033            .unwrap();
1034        conn.execute_batch("DROP TABLE IF EXISTS chunk_embeddings")
1035            .unwrap();
1036        let created = ensure_v013_tables_exist(&conn).unwrap();
1037        assert!(
1038            created,
1039            "must create tables when V013 is in history but tables are missing"
1040        );
1041        let count: i64 = conn
1042            .query_row(
1043                "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='memory_embeddings'",
1044                [],
1045                |r| r.get(0),
1046            )
1047            .unwrap();
1048        assert_eq!(count, 1, "memory_embeddings must exist after repair");
1049    }
1050}