Skip to main content

khive_db/
migrations.rs

1use rusqlite::Connection;
2
3use crate::error::SqliteError;
4
5// =============================================================================
6// Legacy per-service migration API (preserved for backward compatibility)
7// =============================================================================
8
9pub struct Migration {
10    pub id: &'static str,
11    pub up_sql: &'static str,
12    pub down_sql: Option<&'static str>,
13    pub is_already_applied: Option<fn(&Connection) -> bool>,
14}
15
16pub struct ServiceSchemaPlan {
17    pub service: &'static str,
18    pub sqlite: &'static [Migration],
19    pub postgres: &'static [Migration],
20}
21
22const SCHEMA_VERSION_TABLE: &str = "\
23    CREATE TABLE IF NOT EXISTS _schema_versions (\
24        service TEXT NOT NULL,\
25        migration_id TEXT NOT NULL,\
26        applied_at INTEGER NOT NULL,\
27        PRIMARY KEY (service, migration_id)\
28    );\
29";
30
31pub fn apply_schema_plan(conn: &Connection, plan: &ServiceSchemaPlan) -> Result<(), SqliteError> {
32    conn.execute_batch(SCHEMA_VERSION_TABLE)?;
33
34    for migration in plan.sqlite {
35        // Check if custom predicate says it's already applied
36        if let Some(check) = migration.is_already_applied {
37            if check(conn) {
38                continue;
39            }
40        }
41
42        // Check if tracked as applied
43        let already: bool = conn.query_row(
44            "SELECT COUNT(*) > 0 FROM _schema_versions WHERE service = ?1 AND migration_id = ?2",
45            rusqlite::params![plan.service, migration.id],
46            |row| row.get(0),
47        )?;
48
49        if already {
50            continue;
51        }
52
53        // Apply
54        conn.execute_batch(migration.up_sql)?;
55
56        // Record
57        conn.execute(
58            "INSERT INTO _schema_versions (service, migration_id, applied_at) VALUES (?1, ?2, ?3)",
59            rusqlite::params![
60                plan.service,
61                migration.id,
62                chrono::Utc::now().timestamp_micros(),
63            ],
64        )?;
65    }
66
67    Ok(())
68}
69
70// =============================================================================
71// Versioned migration system (ADR-015)
72// =============================================================================
73
74/// A single forward-only schema migration.
75///
76/// Migrations are applied in order from the current DB version to the target
77/// version. Each migration runs in its own transaction; a failure rolls back
78/// that migration and leaves the DB at the prior version.
79pub struct VersionedMigration {
80    /// Monotonically increasing version number, starting at 1.
81    pub version: u32,
82    /// Short human-readable name for the migration (used in the audit table).
83    pub name: &'static str,
84    /// SQL to apply this migration. May contain multiple statements separated
85    /// by semicolons; `execute_batch` runs them all.
86    pub up: &'static str,
87}
88
89// V1: The complete initial schema for all four core tables.
90const V1_UP: &str = "\
91    CREATE TABLE IF NOT EXISTS entities (\
92        id TEXT PRIMARY KEY,\
93        namespace TEXT NOT NULL,\
94        kind TEXT NOT NULL,\
95        name TEXT NOT NULL,\
96        description TEXT,\
97        properties TEXT,\
98        tags TEXT NOT NULL DEFAULT '[]',\
99        created_at INTEGER NOT NULL,\
100        updated_at INTEGER NOT NULL,\
101        deleted_at INTEGER\
102    );\
103    CREATE INDEX IF NOT EXISTS idx_entities_namespace ON entities(namespace);\
104    CREATE INDEX IF NOT EXISTS idx_entities_kind ON entities(namespace, kind);\
105    CREATE INDEX IF NOT EXISTS idx_entities_name ON entities(namespace, name);\
106    CREATE INDEX IF NOT EXISTS idx_entities_created ON entities(created_at DESC);\
107    CREATE TABLE IF NOT EXISTS graph_edges (\
108        namespace TEXT NOT NULL,\
109        id TEXT NOT NULL,\
110        source_id TEXT NOT NULL,\
111        target_id TEXT NOT NULL,\
112        relation TEXT NOT NULL,\
113        weight REAL NOT NULL DEFAULT 1.0,\
114        created_at INTEGER NOT NULL,\
115        metadata TEXT,\
116        PRIMARY KEY (namespace, id)\
117    );\
118    CREATE INDEX IF NOT EXISTS idx_graph_edges_ns_source ON graph_edges(namespace, source_id);\
119    CREATE INDEX IF NOT EXISTS idx_graph_edges_ns_target ON graph_edges(namespace, target_id);\
120    CREATE INDEX IF NOT EXISTS idx_graph_edges_ns_relation ON graph_edges(namespace, relation);\
121    CREATE INDEX IF NOT EXISTS idx_graph_edges_ns_src_rel ON graph_edges(namespace, source_id, relation);\
122    CREATE INDEX IF NOT EXISTS idx_graph_edges_ns_tgt_rel ON graph_edges(namespace, target_id, relation);\
123    CREATE TABLE IF NOT EXISTS notes (\
124        id TEXT PRIMARY KEY,\
125        namespace TEXT NOT NULL,\
126        kind TEXT NOT NULL,\
127        content TEXT NOT NULL DEFAULT '',\
128        salience REAL NOT NULL DEFAULT 0.5,\
129        decay_factor REAL NOT NULL DEFAULT 0.0,\
130        expires_at INTEGER,\
131        properties TEXT,\
132        created_at INTEGER NOT NULL,\
133        updated_at INTEGER NOT NULL,\
134        deleted_at INTEGER\
135    );\
136    CREATE INDEX IF NOT EXISTS idx_notes_namespace ON notes(namespace);\
137    CREATE INDEX IF NOT EXISTS idx_notes_kind ON notes(namespace, kind);\
138    CREATE INDEX IF NOT EXISTS idx_notes_created ON notes(created_at DESC);\
139    CREATE TABLE IF NOT EXISTS events (\
140        id TEXT PRIMARY KEY,\
141        namespace TEXT NOT NULL,\
142        verb TEXT NOT NULL,\
143        substrate TEXT NOT NULL,\
144        actor TEXT NOT NULL,\
145        outcome TEXT NOT NULL,\
146        data TEXT,\
147        duration_us INTEGER NOT NULL DEFAULT 0,\
148        target_id TEXT,\
149        created_at INTEGER NOT NULL\
150    );\
151    CREATE INDEX IF NOT EXISTS idx_events_namespace ON events(namespace);\
152    CREATE INDEX IF NOT EXISTS idx_events_verb ON events(verb);\
153    CREATE INDEX IF NOT EXISTS idx_events_substrate ON events(substrate);\
154    CREATE INDEX IF NOT EXISTS idx_events_created ON events(created_at DESC);\
155";
156
157/// All known migrations, ordered by ascending version.
158///
159/// To add a new migration: append a `VersionedMigration` entry with
160/// `version = <last_version + 1>`. The version sequence must be contiguous
161/// (1, 2, 3, ...); `run_migrations` returns an error on gaps.
162///
163/// V2 note: `NOTES_DDL` in `stores/note.rs` already includes `name TEXT` so that
164/// in-process schema creation (used by tests and `StorageBackend::notes()`) has the
165/// column from the start.  When `run_migrations` is called on a DB that was
166/// bootstrapped via `NOTES_DDL`, the V2 `ALTER TABLE` would fail with "duplicate
167/// column name".  The migration runner handles this by checking column existence
168/// before applying V2 — see `run_migrations`.
169///
170/// V4 note: Deduplicates existing graph_edges rows that share the same
171/// (namespace, source_id, target_id, relation) triple, keeping the earliest
172/// rowid, then adds a unique index enforcing the constraint going forward.
173///
174/// V5 note: `ENTITIES_DDL` in `stores/entity.rs` already includes `entity_type TEXT`
175/// so that in-process schema creation has the column from the start.  When
176/// `run_migrations` is called on such a DB, the V5 `ALTER TABLE` would fail with
177/// "duplicate column name".  The migration runner handles this by checking column
178/// existence before applying V5 — see `run_migrations`.
179///
180/// V9 note: Adds lifecycle columns (updated_at, deleted_at) and backend routing
181/// metadata (target_backend) to graph_edges. Uses table rebuild to work around
182/// SQLite's limited ALTER TABLE support. Backfills updated_at = created_at for
183/// existing rows and sets deleted_at = NULL, target_backend = NULL.
184///
185/// V13 note: Adds event observability + provenance columns (kind, payload,
186/// payload_schema_version, profile_state_version, session_id, aggregate_kind,
187/// aggregate_id) and the event_observations table. The DDL is computed at runtime
188/// via `build_v13_event_observability_sql` so that running migrations on a DB
189/// already bootstrapped by `ensure_events_schema` does not fail with "duplicate
190/// column name".
191const V4_DEDUPE_GRAPH_EDGE_TRIPLES: &str = "\
192    DELETE FROM graph_edges \
193    WHERE rowid NOT IN (\
194        SELECT MIN(rowid) \
195        FROM graph_edges \
196        GROUP BY namespace, source_id, target_id, relation\
197    );\
198    CREATE UNIQUE INDEX IF NOT EXISTS idx_graph_edges_unique_triple \
199    ON graph_edges(namespace, source_id, target_id, relation);\
200";
201
202const V5_ADD_ENTITY_TYPE_TO_ENTITIES: &str = "\
203    ALTER TABLE entities ADD COLUMN entity_type TEXT NULL;\
204    CREATE INDEX IF NOT EXISTS idx_entities_kind_entity_type \
205    ON entities(namespace, kind, entity_type);\
206";
207
208const V9_EDGE_LIFECYCLE_AND_TARGET_BACKEND: &str = "\
209    DROP INDEX IF EXISTS idx_graph_edges_unique_triple;\
210    DROP INDEX IF EXISTS idx_graph_edges_ns_source;\
211    DROP INDEX IF EXISTS idx_graph_edges_ns_target;\
212    DROP INDEX IF EXISTS idx_graph_edges_ns_relation;\
213    DROP INDEX IF EXISTS idx_graph_edges_ns_src_rel;\
214    DROP INDEX IF EXISTS idx_graph_edges_ns_tgt_rel;\
215    CREATE TABLE graph_edges_new (\
216        namespace TEXT NOT NULL,\
217        id TEXT NOT NULL,\
218        source_id TEXT NOT NULL,\
219        target_id TEXT NOT NULL,\
220        relation TEXT NOT NULL,\
221        weight REAL NOT NULL DEFAULT 1.0,\
222        created_at INTEGER NOT NULL,\
223        updated_at INTEGER NOT NULL,\
224        deleted_at INTEGER,\
225        metadata TEXT,\
226        target_backend TEXT,\
227        PRIMARY KEY (namespace, id)\
228    );\
229    INSERT INTO graph_edges_new \
230        (namespace, id, source_id, target_id, relation, weight, created_at, updated_at, deleted_at, metadata, target_backend) \
231    SELECT namespace, id, source_id, target_id, relation, weight, created_at, created_at, NULL, metadata, NULL \
232    FROM graph_edges;\
233    DROP TABLE graph_edges;\
234    ALTER TABLE graph_edges_new RENAME TO graph_edges;\
235    CREATE UNIQUE INDEX IF NOT EXISTS idx_graph_edges_unique_triple ON graph_edges(namespace, source_id, target_id, relation);\
236    CREATE INDEX IF NOT EXISTS idx_graph_edges_ns_source ON graph_edges(namespace, source_id);\
237    CREATE INDEX IF NOT EXISTS idx_graph_edges_ns_target ON graph_edges(namespace, target_id);\
238    CREATE INDEX IF NOT EXISTS idx_graph_edges_ns_relation ON graph_edges(namespace, relation);\
239    CREATE INDEX IF NOT EXISTS idx_graph_edges_ns_src_rel ON graph_edges(namespace, source_id, relation);\
240    CREATE INDEX IF NOT EXISTS idx_graph_edges_ns_tgt_rel ON graph_edges(namespace, target_id, relation);\
241    CREATE INDEX IF NOT EXISTS idx_graph_edges_target_backend ON graph_edges(target_backend) WHERE target_backend IS NOT NULL;\
242";
243
244/// V10: Add `status` column to notes; make `salience` and `decay_factor` nullable.
245///
246/// SQLite does not support `ALTER COLUMN` to change NOT NULL constraints, so the
247/// salience/decay_factor nullability change is handled by rewriting the column
248/// defaults: the columns already exist (added in V1) and will accept NULL when
249/// inserted without a value. The `NOT NULL DEFAULT` constraint in V1 means any
250/// existing rows already have a value; to allow NULLs going forward, SQLite
251/// requires a full table rebuild — but since all existing values are valid f64,
252/// we leave the constraint in place for existing rows and rely on application-
253/// level logic (`NOTES_DDL` in stores/note.rs) to use nullable columns for new
254/// tables. For production databases that went through V1, the application layer
255/// handles NULLs via `Option<f64>` and the `NOT NULL DEFAULT` remains harmless
256/// (inserts from the application always set these columns or leave them NULL via
257/// the new nullable DDL path). The only structural change this migration makes
258/// is adding the `status` column with a sensible default.
259const V10_NOTE_STATUS_AND_NULLABLE_METRICS: &str = "\
260    ALTER TABLE notes ADD COLUMN status TEXT NOT NULL DEFAULT 'active';\
261";
262
263/// V11: Add merge tombstone columns to entities.
264///
265/// `merged_into` stores the UUID of the entity this one was merged into.
266/// `merge_event_id` is an opaque event ID for auditing. Both are nullable;
267/// non-NULL only when the entity has been tombstoned by a merge.
268/// The index on (namespace, merged_into) allows efficient lookup of all
269/// entities that were merged into a given target.
270///
271/// ENTITIES_DDL in stores/entity.rs already includes these columns for new
272/// databases (created via `CREATE TABLE IF NOT EXISTS`). The migration handles
273/// the upgrade path for existing production databases.
274const V11_ENTITY_TOMBSTONE_COLUMNS: &str = "\
275    ALTER TABLE entities ADD COLUMN merged_into TEXT;\
276    ALTER TABLE entities ADD COLUMN merge_event_id TEXT;\
277    CREATE INDEX IF NOT EXISTS idx_entities_merged_into ON entities(namespace, merged_into);\
278";
279
280/// V12: Make `salience` and `decay_factor` nullable in the notes table.
281///
282/// V1 created notes with `salience REAL NOT NULL DEFAULT 0.5` and
283/// `decay_factor REAL NOT NULL DEFAULT 0.0`. SQLite does not support
284/// `ALTER COLUMN` to remove a NOT NULL constraint, so a full table rebuild
285/// is required. This migration rebuilds notes with the canonical nullable
286/// schema that `NOTES_DDL` in stores/note.rs uses for fresh databases.
287///
288/// On databases bootstrapped via `NOTES_DDL` (all test paths and new
289/// installs), salience/decay_factor are already nullable — the V12 idempotency
290/// check detects this and skips the rebuild, recording V12 as applied.
291const V12_NULLABLE_NOTE_METRICS: &str = "\
292    CREATE TABLE notes_new (\
293        id TEXT PRIMARY KEY,\
294        namespace TEXT NOT NULL,\
295        kind TEXT NOT NULL,\
296        status TEXT NOT NULL DEFAULT 'active',\
297        name TEXT,\
298        content TEXT NOT NULL DEFAULT '',\
299        salience REAL,\
300        decay_factor REAL,\
301        expires_at INTEGER,\
302        properties TEXT,\
303        created_at INTEGER NOT NULL,\
304        updated_at INTEGER NOT NULL,\
305        deleted_at INTEGER\
306    );\
307    INSERT INTO notes_new \
308        (id, namespace, kind, status, name, content, salience, decay_factor, \
309         expires_at, properties, created_at, updated_at, deleted_at) \
310    SELECT \
311        id, namespace, kind, status, name, content, salience, decay_factor, \
312        expires_at, properties, created_at, updated_at, deleted_at \
313    FROM notes;\
314    DROP TABLE notes;\
315    ALTER TABLE notes_new RENAME TO notes;\
316    CREATE INDEX IF NOT EXISTS idx_notes_namespace ON notes(namespace);\
317    CREATE INDEX IF NOT EXISTS idx_notes_kind ON notes(namespace, kind);\
318    CREATE INDEX IF NOT EXISTS idx_notes_created ON notes(created_at DESC);\
319";
320
321// V13 adds event observability + provenance columns and the event_observations table.
322// The DDL is computed at runtime via `build_v13_event_observability_sql` so that
323// running migrations on a database already bootstrapped by `ensure_events_schema`
324// (which includes the new columns) does not fail with "duplicate column name".
325const V13_EVENT_OBSERVABILITY_PROVENANCE: &str = "__v13_computed_at_runtime__";
326
327/// DDL for the `_embedding_models` registry table (ADR-043 §1).
328///
329/// Shared between the V14 migration (`build_v14_embedding_model_registry_sql`) and
330/// the belt-and-suspenders creation in `StorageBackend::vectors_for_namespace`.
331/// Both sites reference this constant so the schema cannot silently diverge if the
332/// registry evolves (ADR-043 §8 step 4 mandates a future schema tightening).
333pub const EMBEDDING_MODELS_DDL: &str = "\
334    CREATE TABLE IF NOT EXISTS _embedding_models (\
335        id              BLOB PRIMARY KEY,\
336        engine_name     TEXT NOT NULL,\
337        model_id        TEXT NOT NULL,\
338        key_version     TEXT NOT NULL,\
339        dim             INTEGER NOT NULL,\
340        output_dim      INTEGER,\
341        status          TEXT NOT NULL CHECK (status IN ('pending', 'active', 'superseded', 'archived')),\
342        activated_at    INTEGER,\
343        superseded_at   INTEGER,\
344        superseded_by   BLOB,\
345        canonical_key   BLOB NOT NULL UNIQUE,\
346        created_at      INTEGER NOT NULL\
347    );\
348    CREATE UNIQUE INDEX IF NOT EXISTS idx_embed_models_one_active \
349        ON _embedding_models(engine_name) WHERE status = 'active';\
350    CREATE INDEX IF NOT EXISTS idx_embed_models_engine_status \
351        ON _embedding_models(engine_name, status);";
352
353/// V14: Embedding model registry (`_embedding_models`) and per-engine model FK column.
354///
355/// Creates the `_embedding_models` registry table that tracks which embedding model
356/// is active for each vector engine (ADR-043 §1). Also adds the `embedding_model_id`
357/// FK column to any existing regular `vec_<engine>` tables found in sqlite_master
358/// so that stored vectors can be traced back to the model that produced them.
359///
360/// sqlite-vec virtual tables (`vec0`) do not support `ALTER TABLE ADD COLUMN`;
361/// for those tables the column is added during the startup backfill rebuild
362/// (ADR-043 §8 steps 2-4), which is deferred to a follow-up PR — see the tracking
363/// issue filed in MAJ-2 of codex round-1.
364///
365/// New `vec_<engine>` tables created via `StorageBackend::vectors_for_namespace`
366/// after V14 do NOT yet include `embedding_model_id` at creation time; that column
367/// will be present only after the ADR-043 §8 step-4 rebuild lands.
368///
369/// The migration SQL is computed at runtime via `build_v14_embedding_model_registry_sql`
370/// to discover existing `vec_<engine>` tables dynamically and skip the `ALTER TABLE`
371/// step for any table that already has the column.
372const V14_EMBEDDING_MODEL_REGISTRY: &str = "__v14_computed_at_runtime__";
373
374/// V15: proposals_open projection table (ADR-046).
375///
376/// Maintains a fold-derived view of the four proposal EventKinds so that
377/// `list(kind=proposal, status="open")` is an index scan rather than a full
378/// event-log fold. The `idx_events_payload_proposal_id` expression index
379/// (already created in V13) backs the per-proposal event history query.
380const V15_PROPOSALS_OPEN: &str = "\
381    CREATE TABLE IF NOT EXISTS proposals_open (\
382        proposal_id    TEXT PRIMARY KEY,\
383        namespace      TEXT NOT NULL,\
384        proposer       TEXT NOT NULL,\
385        title          TEXT NOT NULL,\
386        status         TEXT NOT NULL CHECK (status IN ('open', 'changes_requested', 'approved', 'rejected', 'applied', 'withdrawn')),\
387        created_at     INTEGER NOT NULL,\
388        updated_at     INTEGER NOT NULL,\
389        expiry         INTEGER,\
390        last_decision  TEXT,\
391        review_count   INTEGER NOT NULL DEFAULT 0,\
392        approve_count  INTEGER NOT NULL DEFAULT 0,\
393        reject_count   INTEGER NOT NULL DEFAULT 0\
394    );\
395    CREATE INDEX IF NOT EXISTS idx_proposals_open_ns_status ON proposals_open(namespace, status);\
396    CREATE INDEX IF NOT EXISTS idx_proposals_open_proposer ON proposals_open(namespace, proposer);\
397    CREATE INDEX IF NOT EXISTS idx_proposals_open_updated ON proposals_open(namespace, updated_at DESC);\
398";
399
400pub const MIGRATIONS: &[VersionedMigration] = &[
401    VersionedMigration {
402        version: 1,
403        name: "initial_schema",
404        up: V1_UP,
405    },
406    VersionedMigration {
407        version: 2,
408        name: "add_name_to_notes",
409        up: "ALTER TABLE notes ADD COLUMN name TEXT;",
410    },
411    VersionedMigration {
412        version: 3,
413        name: "add_events_namespace_created_index",
414        up: "CREATE INDEX IF NOT EXISTS idx_events_ns_created ON events(namespace, created_at DESC);",
415    },
416    VersionedMigration {
417        version: 4,
418        name: "dedupe_graph_edge_triples",
419        up: V4_DEDUPE_GRAPH_EDGE_TRIPLES,
420    },
421    VersionedMigration {
422        version: 5,
423        name: "add_entity_type_to_entities",
424        up: V5_ADD_ENTITY_TYPE_TO_ENTITIES,
425    },
426    // V6–V8: no-op placeholder slots originally reserved in the ADR-015 ledger for
427    // ADR-043, ADR-046, and ADR-041 respectively.  During the v1 parallel cluster
428    // landings (c01/c03/c04/c06) the concrete migrations from those ADRs landed at
429    // V5, V9, and V13 instead (slot assignments shifted as clusters merged).  V6–V8
430    // were absorbed as no-ops to keep the contiguity check passing.  Their names are
431    // frozen — V1-V13 are production schema.
432    //
433    // NOTE: V6 was originally named "reserved_adr043_embedding_pipeline_extensions"
434    // because it was intended to hold ADR-043 work.  The actual ADR-043 migration
435    // landed at V14 (cluster-20).  V6 retains its original name to avoid breaking the
436    // production tracking table on existing deployments.
437    VersionedMigration {
438        version: 6,
439        name: "reserved_adr043_embedding_pipeline_extensions",
440        up: "SELECT 1;",
441    },
442    VersionedMigration {
443        version: 7,
444        name: "reserved_adr046_event_sourced_proposals_index",
445        up: "SELECT 1;",
446    },
447    VersionedMigration {
448        version: 8,
449        name: "reserved_adr041_event_observations_and_session_id",
450        up: "SELECT 1;",
451    },
452    VersionedMigration {
453        version: 9,
454        name: "edge_lifecycle_and_target_backend",
455        up: V9_EDGE_LIFECYCLE_AND_TARGET_BACKEND,
456    },
457    VersionedMigration {
458        version: 10,
459        name: "note_status_and_nullable_metrics",
460        up: V10_NOTE_STATUS_AND_NULLABLE_METRICS,
461    },
462    VersionedMigration {
463        version: 11,
464        name: "entity_tombstone_columns",
465        up: V11_ENTITY_TOMBSTONE_COLUMNS,
466    },
467    VersionedMigration {
468        version: 12,
469        name: "nullable_note_metrics",
470        up: V12_NULLABLE_NOTE_METRICS,
471    },
472    VersionedMigration {
473        version: 13,
474        name: "event_observability_provenance",
475        up: V13_EVENT_OBSERVABILITY_PROVENANCE,
476    },
477    VersionedMigration {
478        version: 14,
479        name: "embedding_model_registry",
480        up: V14_EMBEDDING_MODEL_REGISTRY,
481    },
482    // V15: proposals_open projection table (ADR-046, cluster-22).
483    VersionedMigration {
484        version: 15,
485        name: "proposals_open",
486        up: V15_PROPOSALS_OPEN,
487    },
488];
489
490const MIGRATION_TRACKING_TABLE: &str = "\
491    CREATE TABLE IF NOT EXISTS _schema_migrations (\
492        version   INTEGER PRIMARY KEY,\
493        name      TEXT NOT NULL,\
494        applied_at INTEGER NOT NULL\
495    );\
496";
497
498/// Apply all unapplied migrations from `MIGRATIONS` in order.
499///
500/// Returns the highest version now applied, or `0` if the DB is empty and no
501/// migrations exist.
502///
503/// # Idempotency
504///
505/// Safe to call multiple times. Already-applied migrations are skipped.
506///
507/// # Atomicity
508///
509/// Each migration runs in its own transaction. A failure rolls back that
510/// migration and leaves the DB at the prior version.
511///
512/// # Errors
513///
514/// Returns `SqliteError::InvalidData` if the `MIGRATIONS` array is not
515/// contiguous (1, 2, 3, ...).
516///
517/// Returns `SqliteError::Migration { version, error }` if any migration fails.
518pub fn run_migrations(conn: &mut Connection) -> Result<u32, SqliteError> {
519    for (i, m) in MIGRATIONS.iter().enumerate() {
520        let expected = (i + 1) as u32;
521        if m.version != expected {
522            return Err(SqliteError::InvalidData(format!(
523                "MIGRATIONS array is not contiguous: expected version {expected} at index {i}, \
524                 got version {}",
525                m.version
526            )));
527        }
528    }
529
530    conn.execute_batch(MIGRATION_TRACKING_TABLE)?;
531
532    // Determine the current version (highest applied).
533    let current_version: u32 = conn
534        .query_row(
535            "SELECT COALESCE(MAX(version), 0) FROM _schema_migrations",
536            [],
537            |row| row.get(0),
538        )
539        .unwrap_or(0);
540
541    let mut applied_version = current_version;
542
543    for migration in MIGRATIONS {
544        if migration.version <= current_version {
545            continue;
546        }
547
548        // V2 adds `name` to notes.  StorageBackend::notes() bootstraps the schema
549        // via NOTES_DDL (which already includes `name`), so the column may already
550        // exist even though the migration has never been recorded.  Treat "duplicate
551        // column name" from SQLite as idempotent for ALTER TABLE migrations.
552        if migration.version == 2 {
553            let col_exists: bool = conn
554                .query_row(
555                    "SELECT COUNT(*) > 0 FROM pragma_table_info('notes') WHERE name = 'name'",
556                    [],
557                    |row| row.get(0),
558                )
559                .unwrap_or(false);
560            if col_exists {
561                // Column already present — record the migration as applied and skip.
562                let now = chrono::Utc::now().timestamp_micros();
563                conn.execute(
564                    "INSERT OR IGNORE INTO _schema_migrations (version, name, applied_at) \
565                     VALUES (?1, ?2, ?3)",
566                    rusqlite::params![migration.version, migration.name, now],
567                )
568                .map_err(|e| SqliteError::Migration {
569                    version: migration.version,
570                    error: e.to_string(),
571                })?;
572                applied_version = migration.version;
573                continue;
574            }
575        }
576
577        // V5 adds `entity_type` to entities.  ENTITIES_DDL already includes the
578        // column so in-process DBs created via ensure_entities_schema already have
579        // it.  Same idempotency pattern as V2.
580        if migration.version == 5 {
581            let col_exists: bool = conn
582                .query_row(
583                    "SELECT COUNT(*) > 0 FROM pragma_table_info('entities') WHERE name = 'entity_type'",
584                    [],
585                    |row| row.get(0),
586                )
587                .unwrap_or(false);
588            if col_exists {
589                let now = chrono::Utc::now().timestamp_micros();
590                conn.execute(
591                    "INSERT OR IGNORE INTO _schema_migrations (version, name, applied_at) \
592                     VALUES (?1, ?2, ?3)",
593                    rusqlite::params![migration.version, migration.name, now],
594                )
595                .map_err(|e| SqliteError::Migration {
596                    version: migration.version,
597                    error: e.to_string(),
598                })?;
599                applied_version = migration.version;
600                continue;
601            }
602        }
603
604        // V10 adds `status` to notes.  NOTES_DDL in stores/note.rs already includes
605        // `status`, so when a fresh schema is created via the store path (e.g. in
606        // tests or StorageBackend::notes()), the column exists before V10 runs.
607        // Detect and skip idempotently, recording the migration as applied.
608        if migration.version == 10 {
609            let col_exists: bool = conn
610                .query_row(
611                    "SELECT COUNT(*) > 0 FROM pragma_table_info('notes') WHERE name = 'status'",
612                    [],
613                    |row| row.get(0),
614                )
615                .unwrap_or(false);
616            if col_exists {
617                let now = chrono::Utc::now().timestamp_micros();
618                conn.execute(
619                    "INSERT OR IGNORE INTO _schema_migrations (version, name, applied_at) \
620                     VALUES (?1, ?2, ?3)",
621                    rusqlite::params![migration.version, migration.name, now],
622                )
623                .map_err(|e| SqliteError::Migration {
624                    version: migration.version,
625                    error: e.to_string(),
626                })?;
627                applied_version = migration.version;
628                continue;
629            }
630        }
631
632        // V11 adds `merged_into` and `merge_event_id` to entities. ENTITIES_DDL in
633        // stores/entity.rs already includes these columns for databases created via
634        // the store path (e.g. in tests or StorageBackend::entities()). Detect and
635        // skip idempotently, recording the migration as applied.
636        if migration.version == 11 {
637            let col_exists: bool = conn
638                .query_row(
639                    "SELECT COUNT(*) > 0 FROM pragma_table_info('entities') WHERE name = 'merged_into'",
640                    [],
641                    |row| row.get(0),
642                )
643                .unwrap_or(false);
644            if col_exists {
645                let now = chrono::Utc::now().timestamp_micros();
646                conn.execute(
647                    "INSERT OR IGNORE INTO _schema_migrations (version, name, applied_at) \
648                     VALUES (?1, ?2, ?3)",
649                    rusqlite::params![migration.version, migration.name, now],
650                )
651                .map_err(|e| SqliteError::Migration {
652                    version: migration.version,
653                    error: e.to_string(),
654                })?;
655                applied_version = migration.version;
656                continue;
657            }
658        }
659
660        // V12 rebuilds the notes table to make salience/decay_factor nullable.
661        // NOTES_DDL in stores/note.rs already declares them nullable for databases
662        // created via the store path. If salience is already nullable (notnull=0),
663        // skip the rebuild and record V12 as applied.
664        if migration.version == 12 {
665            let already_nullable: bool = conn
666                .query_row(
667                    "SELECT COUNT(*) > 0 FROM pragma_table_info('notes') \
668                     WHERE name = 'salience' AND \"notnull\" = 0",
669                    [],
670                    |row| row.get(0),
671                )
672                .unwrap_or(false);
673            if already_nullable {
674                let now = chrono::Utc::now().timestamp_micros();
675                conn.execute(
676                    "INSERT OR IGNORE INTO _schema_migrations (version, name, applied_at) \
677                     VALUES (?1, ?2, ?3)",
678                    rusqlite::params![migration.version, migration.name, now],
679                )
680                .map_err(|e| SqliteError::Migration {
681                    version: migration.version,
682                    error: e.to_string(),
683                })?;
684                applied_version = migration.version;
685                continue;
686            }
687        }
688
689        let tx = conn.transaction().map_err(|e| SqliteError::Migration {
690            version: migration.version,
691            error: e.to_string(),
692        })?;
693
694        let up_sql = if migration.version == 13 {
695            build_v13_event_observability_sql(&tx).map_err(|e| SqliteError::Migration {
696                version: migration.version,
697                error: e.to_string(),
698            })?
699        } else if migration.version == 14 {
700            build_v14_embedding_model_registry_sql(&tx).map_err(|e| SqliteError::Migration {
701                version: migration.version,
702                error: e.to_string(),
703            })?
704        } else {
705            migration.up.to_string()
706        };
707
708        tx.execute_batch(&up_sql)
709            .map_err(|e| SqliteError::Migration {
710                version: migration.version,
711                error: e.to_string(),
712            })?;
713
714        let now = chrono::Utc::now().timestamp_micros();
715        tx.execute(
716            "INSERT INTO _schema_migrations (version, name, applied_at) VALUES (?1, ?2, ?3)",
717            rusqlite::params![migration.version, migration.name, now],
718        )
719        .map_err(|e| SqliteError::Migration {
720            version: migration.version,
721            error: e.to_string(),
722        })?;
723
724        tx.commit().map_err(|e| SqliteError::Migration {
725            version: migration.version,
726            error: e.to_string(),
727        })?;
728
729        applied_version = migration.version;
730    }
731
732    Ok(applied_version)
733}
734
735fn table_has_column(
736    conn: &Connection,
737    table: &'static str,
738    column: &'static str,
739) -> Result<bool, rusqlite::Error> {
740    conn.query_row(
741        "SELECT COUNT(*) > 0 FROM pragma_table_info(?1) WHERE name = ?2",
742        rusqlite::params![table, column],
743        |row| row.get(0),
744    )
745}
746
747fn build_v13_event_observability_sql(conn: &Connection) -> Result<String, rusqlite::Error> {
748    let mut sql = String::new();
749    for (column, ddl) in [
750        (
751            "kind",
752            "ALTER TABLE events ADD COLUMN kind TEXT NOT NULL DEFAULT 'audit';",
753        ),
754        (
755            "payload",
756            "ALTER TABLE events ADD COLUMN payload TEXT NOT NULL DEFAULT '{}';",
757        ),
758        (
759            "payload_schema_version",
760            "ALTER TABLE events ADD COLUMN payload_schema_version INTEGER NOT NULL DEFAULT 1;",
761        ),
762        (
763            "profile_state_version",
764            "ALTER TABLE events ADD COLUMN profile_state_version INTEGER;",
765        ),
766        (
767            "session_id",
768            "ALTER TABLE events ADD COLUMN session_id TEXT;",
769        ),
770        (
771            "aggregate_kind",
772            "ALTER TABLE events ADD COLUMN aggregate_kind TEXT;",
773        ),
774        (
775            "aggregate_id",
776            "ALTER TABLE events ADD COLUMN aggregate_id TEXT;",
777        ),
778    ] {
779        if !table_has_column(conn, "events", column)? {
780            sql.push_str(ddl);
781        }
782    }
783    // Migrate legacy data column into payload if both exist.
784    if table_has_column(conn, "events", "data")? && table_has_column(conn, "events", "payload")? {
785        sql.push_str("UPDATE events SET payload = data WHERE data IS NOT NULL AND data <> '';");
786    }
787    sql.push_str(
788        "CREATE TABLE IF NOT EXISTS event_observations (\
789            event_id TEXT NOT NULL,\
790            entity_id TEXT NOT NULL,\
791            referent_kind TEXT NOT NULL,\
792            role TEXT NOT NULL,\
793            position INTEGER NOT NULL,\
794            PRIMARY KEY (event_id, role, position)\
795        );\
796        CREATE INDEX IF NOT EXISTS idx_events_kind ON events(kind);\
797        CREATE INDEX IF NOT EXISTS idx_events_session ON events(namespace, session_id, created_at, id);\
798        CREATE INDEX IF NOT EXISTS idx_events_ns_created_id ON events(namespace, created_at DESC, id DESC);\
799        CREATE INDEX IF NOT EXISTS idx_events_payload_proposal_id ON events(json_extract(payload, '$.proposal_id'));\
800        CREATE INDEX IF NOT EXISTS idx_event_obs_entity ON event_observations(entity_id, role);\
801        CREATE INDEX IF NOT EXISTS idx_event_obs_event_role ON event_observations(event_id, role);",
802    );
803    Ok(sql)
804}
805
806/// Build V14 migration SQL at runtime.
807///
808/// Creates the `_embedding_models` registry table and its indexes (ADR-043 §1).
809/// Then discovers any existing regular (non-virtual) `vec_<engine>` tables in
810/// sqlite_master and adds the `embedding_model_id` FK column where absent.
811///
812/// sqlite-vec virtual tables (`vec0`) do not support `ALTER TABLE ADD COLUMN`;
813/// those tables are handled by the startup backfill rebuild (ADR-043 §8) which
814/// runs after the SQL migration completes.  New `vec_<engine>` tables created
815/// after V14 do NOT yet include `embedding_model_id` at creation — that column
816/// will be present only after the ADR-043 §8 step-4 rebuild lands (follow-up).
817fn build_v14_embedding_model_registry_sql(conn: &Connection) -> Result<String, rusqlite::Error> {
818    let mut sql = String::from(EMBEDDING_MODELS_DDL);
819
820    // Discover existing regular (non-virtual) vec_<engine> tables.
821    //
822    // Exclusion rationale:
823    // - `sql NOT LIKE '%VIRTUAL%'` drops vec0 virtual tables (type='table' but DDL
824    //   starts with "CREATE VIRTUAL TABLE").
825    // - `sql NOT LIKE '%vec0%'` is a belt-and-suspenders drop for any DDL that still
826    //   contains the vec0 keyword.
827    // - The four `NOT LIKE` suffix clauses exclude the sqlite-vec internal shadow tables
828    //   that are created as plain regular tables alongside each vec0 virtual table:
829    //     vec_<x>_chunks, vec_<x>_rowids, vec_<x>_info, vec_<x>_vector_chunks00
830    //   (see sqlite-vec 0.1.9 sqlite-vec.c:3423-3468; these tables own sqlite-vec's
831    //   internal layout and must never receive extraneous columns).
832    //   The ESCAPE '\' form is required because '%' and '_' are SQL LIKE wildcards.
833    let mut stmt = conn.prepare(
834        "SELECT name FROM sqlite_master \
835         WHERE type = 'table' \
836           AND name LIKE 'vec_%' \
837           AND sql NOT LIKE '%VIRTUAL%' \
838           AND sql NOT LIKE '%vec0%' \
839           AND name NOT LIKE '%\\_chunks' ESCAPE '\\' \
840           AND name NOT LIKE '%\\_rowids' ESCAPE '\\' \
841           AND name NOT LIKE '%\\_info' ESCAPE '\\' \
842           AND name NOT LIKE '%\\_vector\\_chunks%' ESCAPE '\\'",
843    )?;
844    let vec_tables: Vec<String> = stmt
845        .query_map([], |row| row.get(0))?
846        .filter_map(|r| r.ok())
847        .collect();
848
849    for table in &vec_tables {
850        // Validate table name: only alphanumeric and underscores after the 'vec_' prefix.
851        let valid = table.starts_with("vec_")
852            && table[4..]
853                .chars()
854                .all(|c| c.is_ascii_alphanumeric() || c == '_');
855        if !valid {
856            continue;
857        }
858        // Check whether the column already exists.
859        let col_exists: bool = conn
860            .query_row(
861                "SELECT COUNT(*) > 0 FROM pragma_table_info(?1) WHERE name = 'embedding_model_id'",
862                rusqlite::params![table],
863                |row| row.get(0),
864            )
865            .unwrap_or(false);
866        if col_exists {
867            continue;
868        }
869        sql.push_str(&format!(
870            "ALTER TABLE {t} ADD COLUMN embedding_model_id BLOB REFERENCES _embedding_models(id);\
871             CREATE INDEX IF NOT EXISTS idx_{t}_model ON {t}(embedding_model_id);",
872            t = table,
873        ));
874    }
875
876    Ok(sql)
877}
878
879// =============================================================================
880// Tests
881// =============================================================================
882
883#[cfg(test)]
884mod tests {
885    use super::*;
886
887    fn open_memory() -> Connection {
888        Connection::open_in_memory().expect("in-memory connection")
889    }
890
891    #[test]
892    fn fresh_db_migrates_to_latest() {
893        let mut conn = open_memory();
894        let version = run_migrations(&mut conn).expect("migrations should succeed");
895        assert_eq!(version, 15);
896
897        // Verify the tracking table has rows for V1 through V15.
898        let count: i64 = conn
899            .query_row(
900                "SELECT COUNT(*) FROM _schema_migrations WHERE version IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)",
901                [],
902                |row| row.get(0),
903            )
904            .unwrap();
905        assert_eq!(count, 15);
906
907        // Verify the entities table was created.
908        let tbl_count: i64 = conn
909            .query_row(
910                "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='entities'",
911                [],
912                |row| row.get(0),
913            )
914            .unwrap();
915        assert_eq!(tbl_count, 1);
916
917        // Verify V2 added the name column to notes.
918        let col_count: i64 = conn
919            .query_row(
920                "SELECT COUNT(*) FROM pragma_table_info('notes') WHERE name = 'name'",
921                [],
922                |row| row.get(0),
923            )
924            .unwrap();
925        assert_eq!(col_count, 1, "V2 must add name column to notes");
926
927        // Verify V5 added entity_type column to entities.
928        let et_count: i64 = conn
929            .query_row(
930                "SELECT COUNT(*) FROM pragma_table_info('entities') WHERE name = 'entity_type'",
931                [],
932                |row| row.get(0),
933            )
934            .unwrap();
935        assert_eq!(et_count, 1, "V5 must add entity_type column to entities");
936
937        // Verify V5 added the kind+entity_type index.
938        let idx_count: i64 = conn
939            .query_row(
940                "SELECT COUNT(*) FROM sqlite_master WHERE type='index' \
941                 AND name='idx_entities_kind_entity_type'",
942                [],
943                |row| row.get(0),
944            )
945            .unwrap();
946        assert_eq!(idx_count, 1, "V5 must create idx_entities_kind_entity_type");
947
948        // Verify V10 added the status column to notes.
949        let status_col: i64 = conn
950            .query_row(
951                "SELECT COUNT(*) FROM pragma_table_info('notes') WHERE name = 'status'",
952                [],
953                |row| row.get(0),
954            )
955            .unwrap();
956        assert_eq!(status_col, 1, "V10 must add status column to notes");
957
958        // Verify V11 added merged_into column to entities.
959        let merged_into_col: i64 = conn
960            .query_row(
961                "SELECT COUNT(*) FROM pragma_table_info('entities') WHERE name = 'merged_into'",
962                [],
963                |row| row.get(0),
964            )
965            .unwrap();
966        assert_eq!(
967            merged_into_col, 1,
968            "V11 must add merged_into column to entities"
969        );
970
971        // Verify V12 made salience nullable (notnull=0).
972        let salience_notnull: i64 = conn
973            .query_row(
974                "SELECT \"notnull\" FROM pragma_table_info('notes') WHERE name = 'salience'",
975                [],
976                |row| row.get(0),
977            )
978            .unwrap();
979        assert_eq!(salience_notnull, 0, "V12 must make salience nullable");
980
981        // Verify V13 added event observability columns to events.
982        for col in [
983            "kind",
984            "payload",
985            "payload_schema_version",
986            "profile_state_version",
987            "session_id",
988            "aggregate_kind",
989            "aggregate_id",
990        ] {
991            let exists: bool = conn
992                .query_row(
993                    "SELECT COUNT(*) > 0 FROM pragma_table_info('events') WHERE name = ?1",
994                    [col],
995                    |r| r.get(0),
996                )
997                .unwrap();
998            assert!(exists, "V13 must add events.{col}");
999        }
1000
1001        // Verify event_observations table exists.
1002        let obs_tbl: i64 = conn
1003            .query_row(
1004                "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='event_observations'",
1005                [],
1006                |r| r.get(0),
1007            )
1008            .unwrap();
1009        assert_eq!(obs_tbl, 1, "V13 must create event_observations table");
1010
1011        // Verify V13 indexes exist.
1012        for idx in [
1013            "idx_events_ns_created_id",
1014            "idx_events_session",
1015            "idx_events_payload_proposal_id",
1016            "idx_event_obs_entity",
1017            "idx_event_obs_event_role",
1018        ] {
1019            let exists: bool = conn
1020                .query_row(
1021                    "SELECT COUNT(*) > 0 FROM sqlite_master WHERE type='index' AND name=?1",
1022                    [idx],
1023                    |r| r.get(0),
1024                )
1025                .unwrap();
1026            assert!(exists, "V13 must create index {idx}");
1027        }
1028
1029        // Verify V14 created the _embedding_models registry table.
1030        let embed_tbl: i64 = conn
1031            .query_row(
1032                "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='_embedding_models'",
1033                [],
1034                |r| r.get(0),
1035            )
1036            .unwrap();
1037        assert_eq!(embed_tbl, 1, "V14 must create _embedding_models table");
1038
1039        // Verify V14 indexes exist.
1040        for idx in [
1041            "idx_embed_models_one_active",
1042            "idx_embed_models_engine_status",
1043        ] {
1044            let exists: bool = conn
1045                .query_row(
1046                    "SELECT COUNT(*) > 0 FROM sqlite_master WHERE type='index' AND name=?1",
1047                    [idx],
1048                    |r| r.get(0),
1049                )
1050                .unwrap();
1051            assert!(exists, "V14 must create index {idx}");
1052        }
1053
1054        // Verify V15 created the proposals_open table.
1055        let proposals_tbl: i64 = conn
1056            .query_row(
1057                "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='proposals_open'",
1058                [],
1059                |r| r.get(0),
1060            )
1061            .unwrap();
1062        assert_eq!(proposals_tbl, 1, "V15 must create proposals_open table");
1063
1064        // Verify V15 indexes on proposals_open.
1065        for idx in [
1066            "idx_proposals_open_ns_status",
1067            "idx_proposals_open_proposer",
1068            "idx_proposals_open_updated",
1069        ] {
1070            let exists: bool = conn
1071                .query_row(
1072                    "SELECT COUNT(*) > 0 FROM sqlite_master WHERE type='index' AND name=?1",
1073                    [idx],
1074                    |r| r.get(0),
1075                )
1076                .unwrap();
1077            assert!(exists, "V15 must create index {idx}");
1078        }
1079    }
1080
1081    #[test]
1082    fn run_migrations_twice_is_idempotent() {
1083        let mut conn = open_memory();
1084        let v1 = run_migrations(&mut conn).expect("first run");
1085        let v2 = run_migrations(&mut conn).expect("second run");
1086        assert_eq!(v1, 15);
1087        assert_eq!(v2, 15);
1088
1089        // Should still have exactly fifteen rows in the tracking table (V1..V15).
1090        let count: i64 = conn
1091            .query_row("SELECT COUNT(*) FROM _schema_migrations", [], |row| {
1092                row.get(0)
1093            })
1094            .unwrap();
1095        assert_eq!(count, 15);
1096    }
1097
1098    // F052 (CRIT): V9 migration must add target_backend column + partial index on graph_edges.
1099    // ADR-009 requires target_backend for backend routing.
1100    #[test]
1101    fn migration_v9_adds_target_backend_index() {
1102        let mut conn = open_memory();
1103        let version = run_migrations(&mut conn).expect("migrations should succeed");
1104        assert_eq!(
1105            version, 15,
1106            "F052: latest migration must be V15 (proposals_open)"
1107        );
1108        let col: i64 = conn
1109            .query_row(
1110                "SELECT COUNT(*) FROM pragma_table_info('graph_edges') WHERE name = 'target_backend'",
1111                [],
1112                |row| row.get(0),
1113            )
1114            .unwrap();
1115        assert_eq!(
1116            col, 1,
1117            "F052: graph_edges must have target_backend column after V9 migration"
1118        );
1119        let idx: i64 = conn
1120            .query_row(
1121                "SELECT COUNT(*) FROM sqlite_master WHERE type='index' AND name='idx_graph_edges_target_backend'",
1122                [],
1123                |row| row.get(0),
1124            )
1125            .unwrap();
1126        assert_eq!(
1127            idx, 1,
1128            "F052: idx_graph_edges_target_backend partial index must exist after V9 migration"
1129        );
1130    }
1131
1132    #[test]
1133    fn failed_migration_rolls_back() {
1134        let bad_v16 = VersionedMigration {
1135            version: 16,
1136            name: "bad_migration",
1137            up: "THIS IS NOT VALID SQL;",
1138        };
1139
1140        let mut conn = open_memory();
1141
1142        // Apply all real migrations (V1..V15) so the DB is at V15.
1143        run_migrations(&mut conn).expect("V1..V15 should apply cleanly");
1144
1145        // Now manually drive the bad V16 migration to check rollback behaviour.
1146        let result = apply_single_migration(&mut conn, &bad_v16);
1147        assert!(result.is_err(), "bad migration should return error");
1148
1149        // DB should still be at V15 — no V16 row in tracking.
1150        let v16_count: i64 = conn
1151            .query_row(
1152                "SELECT COUNT(*) FROM _schema_migrations WHERE version = 16",
1153                [],
1154                |row| row.get(0),
1155            )
1156            .unwrap();
1157        assert_eq!(v16_count, 0, "V16 must not be recorded after rollback");
1158
1159        // V1..V15 should still be there.
1160        let applied_count: i64 = conn
1161            .query_row(
1162                "SELECT COUNT(*) FROM _schema_migrations WHERE version IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)",
1163                [],
1164                |row| row.get(0),
1165            )
1166            .unwrap();
1167        assert_eq!(applied_count, 15, "V1..V15 must still be recorded");
1168    }
1169
1170    #[test]
1171    fn store_ddl_then_migrations_is_idempotent() {
1172        use crate::stores::entity::ensure_entities_schema;
1173        use crate::stores::note::ensure_notes_schema;
1174
1175        let mut conn = open_memory();
1176
1177        // Simulate the StorageBackend path: store DDL creates notes table
1178        // WITH the name column (NOTES_DDL includes it for test convenience).
1179        ensure_notes_schema(&conn).expect("store DDL should create notes");
1180
1181        // Simulate entity DDL creation (includes merged_into, merge_event_id).
1182        ensure_entities_schema(&conn).expect("store DDL should create entities");
1183
1184        // Verify name column exists from DDL.
1185        let has_name: bool = conn
1186            .query_row(
1187                "SELECT COUNT(*) > 0 FROM pragma_table_info('notes') WHERE name = 'name'",
1188                [],
1189                |row| row.get(0),
1190            )
1191            .unwrap();
1192        assert!(has_name, "NOTES_DDL should include name column");
1193
1194        // Now run versioned migrations — V2 should detect the existing name column
1195        // and skip; V5 should detect entity_type already present via ENTITIES_DDL and skip;
1196        // V9 rebuilds graph_edges with lifecycle columns; V10 should detect the existing
1197        // status column and skip; V11 should detect the existing merged_into column and skip;
1198        // V12 should detect that salience is already nullable and skip;
1199        // V13 adds event observability columns and event_observations table;
1200        // V14 creates the _embedding_models registry table;
1201        // V15 creates the proposals_open table.
1202        let version = run_migrations(&mut conn).expect("migrations after store DDL");
1203        assert_eq!(version, 15);
1204
1205        // V2 should be recorded as applied (skipped but tracked).
1206        let v2_count: i64 = conn
1207            .query_row(
1208                "SELECT COUNT(*) FROM _schema_migrations WHERE version = 2",
1209                [],
1210                |row| row.get(0),
1211            )
1212            .unwrap();
1213        assert_eq!(
1214            v2_count, 1,
1215            "V2 must be recorded even when column pre-exists"
1216        );
1217
1218        // V5 should be recorded as applied (skipped but tracked).
1219        let v5_count: i64 = conn
1220            .query_row(
1221                "SELECT COUNT(*) FROM _schema_migrations WHERE version = 5",
1222                [],
1223                |row| row.get(0),
1224            )
1225            .unwrap();
1226        assert_eq!(
1227            v5_count, 1,
1228            "V5 must be recorded even when entity_type column pre-exists"
1229        );
1230
1231        // V9 (edge lifecycle + target_backend) must be recorded.
1232        let v9_count: i64 = conn
1233            .query_row(
1234                "SELECT COUNT(*) FROM _schema_migrations WHERE version = 9",
1235                [],
1236                |row| row.get(0),
1237            )
1238            .unwrap();
1239        assert_eq!(
1240            v9_count, 1,
1241            "V9 must be recorded after store-DDL + migrations"
1242        );
1243
1244        // V10 should be recorded as applied (skipped but tracked).
1245        let v10_count: i64 = conn
1246            .query_row(
1247                "SELECT COUNT(*) FROM _schema_migrations WHERE version = 10",
1248                [],
1249                |row| row.get(0),
1250            )
1251            .unwrap();
1252        assert_eq!(
1253            v10_count, 1,
1254            "V10 must be recorded even when status column pre-exists via NOTES_DDL"
1255        );
1256
1257        // V11 should be recorded as applied (skipped but tracked).
1258        let v11_count: i64 = conn
1259            .query_row(
1260                "SELECT COUNT(*) FROM _schema_migrations WHERE version = 11",
1261                [],
1262                |row| row.get(0),
1263            )
1264            .unwrap();
1265        assert_eq!(
1266            v11_count, 1,
1267            "V11 must be recorded even when merged_into column pre-exists via ENTITIES_DDL"
1268        );
1269
1270        // V12 should be recorded as applied (skipped but tracked — NOTES_DDL already
1271        // creates salience as nullable).
1272        let v12_count: i64 = conn
1273            .query_row(
1274                "SELECT COUNT(*) FROM _schema_migrations WHERE version = 12",
1275                [],
1276                |row| row.get(0),
1277            )
1278            .unwrap();
1279        assert_eq!(
1280            v12_count, 1,
1281            "V12 must be recorded even when salience is already nullable via NOTES_DDL"
1282        );
1283
1284        // V13 (event observability) must be recorded.
1285        let v13_count: i64 = conn
1286            .query_row(
1287                "SELECT COUNT(*) FROM _schema_migrations WHERE version = 13",
1288                [],
1289                |row| row.get(0),
1290            )
1291            .unwrap();
1292        assert_eq!(
1293            v13_count, 1,
1294            "V13 must be recorded after store-DDL + migrations"
1295        );
1296
1297        // V14 (embedding model registry) must be recorded.
1298        let v14_count: i64 = conn
1299            .query_row(
1300                "SELECT COUNT(*) FROM _schema_migrations WHERE version = 14",
1301                [],
1302                |row| row.get(0),
1303            )
1304            .unwrap();
1305        assert_eq!(
1306            v14_count, 1,
1307            "V14 must be recorded after store-DDL + migrations"
1308        );
1309
1310        // V15 (proposals_open) must be recorded.
1311        let v15_count: i64 = conn
1312            .query_row(
1313                "SELECT COUNT(*) FROM _schema_migrations WHERE version = 15",
1314                [],
1315                |row| row.get(0),
1316            )
1317            .unwrap();
1318        assert_eq!(
1319            v15_count, 1,
1320            "V15 must be recorded after store-DDL + migrations"
1321        );
1322    }
1323
1324    /// Verify that V12 rebuilds a V1-era notes table so salience/decay_factor
1325    /// accept NULL, unblocking `create_note` with `salience=None` on migrated DBs.
1326    #[test]
1327    fn v1_to_v12_allows_null_salience() {
1328        let mut conn = open_memory();
1329
1330        // Bootstrap the schema tracking table and create the full V1-era schema.
1331        // The notes table uses NOT NULL DEFAULT on salience/decay_factor as V1 did.
1332        conn.execute_batch(MIGRATION_TRACKING_TABLE).unwrap();
1333        conn.execute_batch(
1334            "CREATE TABLE entities (\
1335                id TEXT PRIMARY KEY,\
1336                namespace TEXT NOT NULL,\
1337                kind TEXT NOT NULL,\
1338                name TEXT NOT NULL,\
1339                description TEXT,\
1340                properties TEXT,\
1341                tags TEXT NOT NULL DEFAULT '[]',\
1342                created_at INTEGER NOT NULL,\
1343                updated_at INTEGER NOT NULL,\
1344                deleted_at INTEGER\
1345            );\
1346            CREATE TABLE graph_edges (\
1347                namespace TEXT NOT NULL,\
1348                id TEXT NOT NULL,\
1349                source_id TEXT NOT NULL,\
1350                target_id TEXT NOT NULL,\
1351                relation TEXT NOT NULL,\
1352                weight REAL NOT NULL DEFAULT 1.0,\
1353                created_at INTEGER NOT NULL,\
1354                metadata TEXT,\
1355                PRIMARY KEY (namespace, id)\
1356            );\
1357            CREATE TABLE notes (\
1358                id TEXT PRIMARY KEY,\
1359                namespace TEXT NOT NULL,\
1360                kind TEXT NOT NULL,\
1361                content TEXT NOT NULL DEFAULT '',\
1362                salience REAL NOT NULL DEFAULT 0.5,\
1363                decay_factor REAL NOT NULL DEFAULT 0.0,\
1364                expires_at INTEGER,\
1365                properties TEXT,\
1366                created_at INTEGER NOT NULL,\
1367                updated_at INTEGER NOT NULL,\
1368                deleted_at INTEGER\
1369            );\
1370            CREATE TABLE events (\
1371                id TEXT PRIMARY KEY,\
1372                namespace TEXT NOT NULL,\
1373                verb TEXT NOT NULL,\
1374                substrate TEXT NOT NULL,\
1375                actor TEXT NOT NULL,\
1376                outcome TEXT NOT NULL,\
1377                data TEXT,\
1378                duration_us INTEGER NOT NULL DEFAULT 0,\
1379                target_id TEXT,\
1380                created_at INTEGER NOT NULL\
1381            );",
1382        )
1383        .unwrap();
1384
1385        // Record V1 as already applied so run_migrations starts at V2.
1386        let now = chrono::Utc::now().timestamp_micros();
1387        conn.execute(
1388            "INSERT INTO _schema_migrations (version, name, applied_at) VALUES (1, 'initial_schema', ?1)",
1389            rusqlite::params![now],
1390        )
1391        .unwrap();
1392
1393        // Run V2-V15 migrations.
1394        let version = run_migrations(&mut conn).expect("migrations should succeed");
1395        assert_eq!(version, 15);
1396
1397        // After V12, salience must be nullable (notnull=0).
1398        let notnull: i64 = conn
1399            .query_row(
1400                "SELECT \"notnull\" FROM pragma_table_info('notes') WHERE name = 'salience'",
1401                [],
1402                |row| row.get(0),
1403            )
1404            .unwrap();
1405        assert_eq!(notnull, 0, "salience must be nullable after V12");
1406
1407        // Inserting a note without salience must succeed.
1408        conn.execute(
1409            "INSERT INTO notes (id, namespace, kind, status, content, created_at, updated_at) \
1410             VALUES ('test-id', 'ns', 'observation', 'active', '', 1, 1)",
1411            [],
1412        )
1413        .expect("inserting note with NULL salience must succeed after V12");
1414
1415        let stored_salience: Option<f64> = conn
1416            .query_row(
1417                "SELECT salience FROM notes WHERE id = 'test-id'",
1418                [],
1419                |row| row.get(0),
1420            )
1421            .unwrap();
1422        assert!(
1423            stored_salience.is_none(),
1424            "salience must be NULL when not supplied"
1425        );
1426    }
1427
1428    #[test]
1429    fn store_ddl_then_event_migration_is_idempotent() {
1430        use crate::stores::event::ensure_events_schema;
1431
1432        let mut conn = open_memory();
1433
1434        // Simulate the StorageBackend path: ensure_events_schema creates the
1435        // events table WITH the new columns. Running V13 on top must not fail.
1436        ensure_events_schema(&conn).expect("store DDL should create events");
1437
1438        let version = run_migrations(&mut conn).expect("migrations after events store DDL");
1439        assert_eq!(version, 15, "must reach V15 even when events DDL ran first");
1440
1441        let v13_count: i64 = conn
1442            .query_row(
1443                "SELECT COUNT(*) FROM _schema_migrations WHERE version = 13",
1444                [],
1445                |r| r.get(0),
1446            )
1447            .unwrap();
1448        assert_eq!(v13_count, 1, "V13 must be recorded");
1449
1450        let v14_count: i64 = conn
1451            .query_row(
1452                "SELECT COUNT(*) FROM _schema_migrations WHERE version = 14",
1453                [],
1454                |r| r.get(0),
1455            )
1456            .unwrap();
1457        assert_eq!(v14_count, 1, "V14 must be recorded");
1458
1459        let v15_count: i64 = conn
1460            .query_row(
1461                "SELECT COUNT(*) FROM _schema_migrations WHERE version = 15",
1462                [],
1463                |r| r.get(0),
1464            )
1465            .unwrap();
1466        assert_eq!(v15_count, 1, "V15 must be recorded");
1467    }
1468
1469    /// F227/F228: V14 must create the _embedding_models registry table and its indexes.
1470    ///
1471    /// F227: MIGRATIONS previously stopped at V4 (dedupe_graph_edge_triples); no
1472    ///       embedding registry existed.
1473    /// F228: vec_<engine> tables previously lacked the embedding_model_id FK column.
1474    ///       New tables created after V14 include it from the start via the updated DDL.
1475    #[test]
1476    fn migration_v14_creates_embedding_model_registry() {
1477        let mut conn = open_memory();
1478        let version = run_migrations(&mut conn).expect("migrations should succeed");
1479        assert_eq!(
1480            version, 15,
1481            "F227: latest migration must be V15 (proposals_open)"
1482        );
1483
1484        // Verify _embedding_models table exists.
1485        let tbl: i64 = conn
1486            .query_row(
1487                "SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='_embedding_models'",
1488                [],
1489                |r| r.get(0),
1490            )
1491            .unwrap();
1492        assert_eq!(tbl, 1, "F227: _embedding_models table must exist after V14");
1493
1494        // Verify the partial unique index for one-active-per-engine constraint.
1495        let one_active_idx: i64 = conn
1496            .query_row(
1497                "SELECT COUNT(*) FROM sqlite_master WHERE type='index' AND name='idx_embed_models_one_active'",
1498                [],
1499                |r| r.get(0),
1500            )
1501            .unwrap();
1502        assert_eq!(
1503            one_active_idx, 1,
1504            "V14 must create idx_embed_models_one_active partial unique index"
1505        );
1506
1507        // Verify the engine+status composite index.
1508        let engine_status_idx: i64 = conn
1509            .query_row(
1510                "SELECT COUNT(*) FROM sqlite_master WHERE type='index' AND name='idx_embed_models_engine_status'",
1511                [],
1512                |r| r.get(0),
1513            )
1514            .unwrap();
1515        assert_eq!(
1516            engine_status_idx, 1,
1517            "V14 must create idx_embed_models_engine_status index"
1518        );
1519
1520        // Verify the _embedding_models schema contains required columns.
1521        for col in [
1522            "id",
1523            "engine_name",
1524            "model_id",
1525            "key_version",
1526            "dim",
1527            "output_dim",
1528            "status",
1529            "activated_at",
1530            "superseded_at",
1531            "superseded_by",
1532            "canonical_key",
1533            "created_at",
1534        ] {
1535            let exists: bool = conn
1536                .query_row(
1537                    "SELECT COUNT(*) > 0 FROM pragma_table_info('_embedding_models') WHERE name = ?1",
1538                    [col],
1539                    |r| r.get(0),
1540                )
1541                .unwrap();
1542            assert!(
1543                exists,
1544                "F227: _embedding_models must have column '{col}' after V14"
1545            );
1546        }
1547    }
1548
1549    /// F228: New vec_<engine> tables created after V14 (via StorageBackend::vectors_for_namespace)
1550    /// include the embedding_model_id FK column from the start.
1551    ///
1552    /// This test verifies the migration adds embedding_model_id to a pre-existing
1553    /// regular (non-virtual) vec_ table that was created before V14 ran.
1554    #[test]
1555    fn migration_v14_adds_embedding_model_id_to_existing_regular_vec_tables() {
1556        let mut conn = open_memory();
1557
1558        // Simulate a pre-V14 database state: apply V1-V13 manually by running
1559        // migrations up to V13, then create a regular (non-virtual) vec_ table
1560        // without the embedding_model_id column, then run the full migration.
1561        //
1562        // We use a real SQLite table here (not a vec0 virtual table) because
1563        // sqlite-vec is not available in the unit test environment. The migration
1564        // correctly detects and skips virtual tables.
1565        conn.execute_batch(
1566            "CREATE TABLE vec_legacy_model (\
1567                subject_id TEXT PRIMARY KEY,\
1568                namespace TEXT NOT NULL,\
1569                kind TEXT NOT NULL,\
1570                field TEXT NOT NULL\
1571            );",
1572        )
1573        .unwrap();
1574
1575        // Run the full migration suite — V14 should add embedding_model_id to the
1576        // regular vec_legacy_model table.
1577        let version = run_migrations(&mut conn).expect("migrations should succeed");
1578        assert_eq!(version, 15);
1579
1580        // The embedding_model_id column must now exist.
1581        let col_exists: bool = conn
1582            .query_row(
1583                "SELECT COUNT(*) > 0 FROM pragma_table_info('vec_legacy_model') WHERE name = 'embedding_model_id'",
1584                [],
1585                |r| r.get(0),
1586            )
1587            .unwrap();
1588        assert!(
1589            col_exists,
1590            "F228: V14 must add embedding_model_id to existing regular vec_ tables"
1591        );
1592
1593        // Running migrations again must be idempotent (column already present).
1594        let version2 = run_migrations(&mut conn).expect("second run must succeed");
1595        assert_eq!(version2, 15);
1596    }
1597
1598    /// CRIT-2 regression: V14 discovery filter must NOT match sqlite-vec internal
1599    /// shadow tables (`vec_<x>_chunks`, `_rowids`, `_info`, `_vector_chunks00`).
1600    ///
1601    /// sqlite-vec 0.1.9 creates these as plain `CREATE TABLE` entries (no VIRTUAL,
1602    /// no vec0 keyword in their DDL) for each vec0 virtual table.  The filter added
1603    /// in PR #374 c20 must exclude them via explicit suffix negation so that
1604    /// `ALTER TABLE … ADD COLUMN` is never issued against sqlite-vec's internal tables.
1605    ///
1606    /// We simulate the shadow tables as plain regular tables (sqlite-vec is not
1607    /// available in the unit-test environment) because the sqlite_master DDL format
1608    /// is what the filter inspects — the table content is irrelevant for this test.
1609    #[test]
1610    fn migration_v14_does_not_alter_sqlite_vec_shadow_tables() {
1611        let mut conn = open_memory();
1612
1613        // Create the four canonical sqlite-vec shadow table shapes for a notional
1614        // vec0 table named `vec_test`.  Their DDL intentionally lacks VIRTUAL/vec0
1615        // so they would have matched the old (pre-fix) filter.
1616        conn.execute_batch(
1617            "CREATE TABLE vec_test_chunks    (x INTEGER);\
1618             CREATE TABLE vec_test_rowids    (x INTEGER);\
1619             CREATE TABLE vec_test_info      (x INTEGER);\
1620             CREATE TABLE vec_test_vector_chunks00 (x INTEGER);",
1621        )
1622        .unwrap();
1623
1624        // Run the full migration suite — V14 must not add `embedding_model_id` to
1625        // any of the four shadow tables above.
1626        let version = run_migrations(&mut conn).expect("migrations should succeed");
1627        assert_eq!(version, 15);
1628
1629        for shadow in [
1630            "vec_test_chunks",
1631            "vec_test_rowids",
1632            "vec_test_info",
1633            "vec_test_vector_chunks00",
1634        ] {
1635            let col_added: bool = conn
1636                .query_row(
1637                    "SELECT COUNT(*) > 0 FROM pragma_table_info(?1) \
1638                     WHERE name = 'embedding_model_id'",
1639                    rusqlite::params![shadow],
1640                    |r| r.get(0),
1641                )
1642                .unwrap();
1643            assert!(
1644                !col_added,
1645                "CRIT-2: V14 must NOT add embedding_model_id to sqlite-vec shadow table '{shadow}'"
1646            );
1647        }
1648    }
1649
1650    /// Helper: apply a single migration in a transaction, recording it in the
1651    /// tracking table. Extracted here for use in the rollback test only.
1652    fn apply_single_migration(
1653        conn: &mut Connection,
1654        migration: &VersionedMigration,
1655    ) -> Result<(), SqliteError> {
1656        let tx = conn.transaction().map_err(|e| SqliteError::Migration {
1657            version: migration.version,
1658            error: e.to_string(),
1659        })?;
1660
1661        tx.execute_batch(migration.up)
1662            .map_err(|e| SqliteError::Migration {
1663                version: migration.version,
1664                error: e.to_string(),
1665            })?;
1666
1667        let now = chrono::Utc::now().timestamp_micros();
1668        tx.execute(
1669            "INSERT INTO _schema_migrations (version, name, applied_at) VALUES (?1, ?2, ?3)",
1670            rusqlite::params![migration.version, migration.name, now],
1671        )
1672        .map_err(|e| SqliteError::Migration {
1673            version: migration.version,
1674            error: e.to_string(),
1675        })?;
1676
1677        tx.commit().map_err(|e| SqliteError::Migration {
1678            version: migration.version,
1679            error: e.to_string(),
1680        })?;
1681
1682        Ok(())
1683    }
1684}