khive_db/
migrations.rs

1//! Schema migration system for the SQLite storage layer.
2//!
3//! Two APIs coexist:
4//! - **Legacy per-service migrations** (`ServiceSchemaPlan` / `apply_schema_plan`):
5//!   used by pack-scoped schemas.
6//! - **Versioned migrations** (`MIGRATIONS` / `run_migrations`): the forward-only
7//!   migration pipeline for the core tables.
8
9use rusqlite::Connection;
10
11use crate::error::SqliteError;
12
13// =============================================================================
14// Legacy per-service migration API (preserved for backward compatibility)
15// =============================================================================
16
17/// A single legacy migration step within a `ServiceSchemaPlan`.
18pub struct Migration {
19    /// Unique identifier for this migration.
20    pub id: &'static str,
21    /// SQL to apply (forward direction).
22    pub up_sql: &'static str,
23    /// SQL to revert (optional).
24    pub down_sql: Option<&'static str>,
25    /// Optional predicate: returns true if migration was already applied
26    /// through a mechanism other than the migration tracker.
27    pub is_already_applied: Option<fn(&Connection) -> bool>,
28}
29
30/// A pack-scoped schema plan containing migrations for SQLite and Postgres.
31pub struct ServiceSchemaPlan {
32    /// Service name used as a key in the `_schema_versions` tracking table.
33    pub service: &'static str,
34    /// SQLite-specific migration steps, applied in order.
35    pub sqlite: &'static [Migration],
36    /// Postgres-specific migration steps (reserved for future use).
37    pub postgres: &'static [Migration],
38}
39
40const SCHEMA_VERSION_TABLE: &str = "\
41    CREATE TABLE IF NOT EXISTS _schema_versions (\
42        service TEXT NOT NULL,\
43        migration_id TEXT NOT NULL,\
44        applied_at INTEGER NOT NULL,\
45        PRIMARY KEY (service, migration_id)\
46    );\
47";
48
49/// Apply a pack-scoped schema plan, tracking each migration in `_schema_versions`.
50pub fn apply_schema_plan(conn: &Connection, plan: &ServiceSchemaPlan) -> Result<(), SqliteError> {
51    conn.execute_batch(SCHEMA_VERSION_TABLE)?;
52
53    for migration in plan.sqlite {
54        // Check if custom predicate says it's already applied
55        if let Some(check) = migration.is_already_applied {
56            if check(conn) {
57                continue;
58            }
59        }
60
61        // Check if tracked as applied
62        let already: bool = conn.query_row(
63            "SELECT COUNT(*) > 0 FROM _schema_versions WHERE service = ?1 AND migration_id = ?2",
64            rusqlite::params![plan.service, migration.id],
65            |row| row.get(0),
66        )?;
67
68        if already {
69            continue;
70        }
71
72        // Apply
73        conn.execute_batch(migration.up_sql)?;
74
75        // Record
76        conn.execute(
77            "INSERT INTO _schema_versions (service, migration_id, applied_at) VALUES (?1, ?2, ?3)",
78            rusqlite::params![
79                plan.service,
80                migration.id,
81                chrono::Utc::now().timestamp_micros(),
82            ],
83        )?;
84    }
85
86    Ok(())
87}
88
89// =============================================================================
90// Versioned migration system
91// =============================================================================
92
93/// A single forward-only schema migration.
94///
95/// Migrations are applied in order from the current DB version to the target
96/// version. Each migration runs in its own transaction; a failure rolls back
97/// that migration and leaves the DB at the prior version.
98pub struct VersionedMigration {
99    /// Monotonically increasing version number, starting at 1.
100    pub version: u32,
101    /// Short human-readable name for the migration (used in the audit table).
102    pub name: &'static str,
103    /// SQL to apply this migration. May contain multiple statements separated
104    /// by semicolons; `execute_batch` runs them all.
105    pub up: &'static str,
106}
107
108// V1: The complete initial schema for all four core tables.
109const V1_UP: &str = "\
110    CREATE TABLE IF NOT EXISTS entities (\
111        id TEXT PRIMARY KEY,\
112        namespace TEXT NOT NULL,\
113        kind TEXT NOT NULL,\
114        name TEXT NOT NULL,\
115        description TEXT,\
116        properties TEXT,\
117        tags TEXT NOT NULL DEFAULT '[]',\
118        created_at INTEGER NOT NULL,\
119        updated_at INTEGER NOT NULL,\
120        deleted_at INTEGER\
121    );\
122    CREATE INDEX IF NOT EXISTS idx_entities_namespace ON entities(namespace);\
123    CREATE INDEX IF NOT EXISTS idx_entities_kind ON entities(namespace, kind);\
124    CREATE INDEX IF NOT EXISTS idx_entities_name ON entities(namespace, name);\
125    CREATE INDEX IF NOT EXISTS idx_entities_created ON entities(created_at DESC);\
126    CREATE TABLE IF NOT EXISTS graph_edges (\
127        namespace TEXT NOT NULL,\
128        id TEXT NOT NULL,\
129        source_id TEXT NOT NULL,\
130        target_id TEXT NOT NULL,\
131        relation TEXT NOT NULL,\
132        weight REAL NOT NULL DEFAULT 1.0,\
133        created_at INTEGER NOT NULL,\
134        metadata TEXT,\
135        PRIMARY KEY (namespace, id)\
136    );\
137    CREATE INDEX IF NOT EXISTS idx_graph_edges_ns_source ON graph_edges(namespace, source_id);\
138    CREATE INDEX IF NOT EXISTS idx_graph_edges_ns_target ON graph_edges(namespace, target_id);\
139    CREATE INDEX IF NOT EXISTS idx_graph_edges_ns_relation ON graph_edges(namespace, relation);\
140    CREATE INDEX IF NOT EXISTS idx_graph_edges_ns_src_rel ON graph_edges(namespace, source_id, relation);\
141    CREATE INDEX IF NOT EXISTS idx_graph_edges_ns_tgt_rel ON graph_edges(namespace, target_id, relation);\
142    CREATE TABLE IF NOT EXISTS notes (\
143        id TEXT PRIMARY KEY,\
144        namespace TEXT NOT NULL,\
145        kind TEXT NOT NULL,\
146        content TEXT NOT NULL DEFAULT '',\
147        salience REAL NOT NULL DEFAULT 0.5,\
148        decay_factor REAL NOT NULL DEFAULT 0.0,\
149        expires_at INTEGER,\
150        properties TEXT,\
151        created_at INTEGER NOT NULL,\
152        updated_at INTEGER NOT NULL,\
153        deleted_at INTEGER\
154    );\
155    CREATE INDEX IF NOT EXISTS idx_notes_namespace ON notes(namespace);\
156    CREATE INDEX IF NOT EXISTS idx_notes_kind ON notes(namespace, kind);\
157    CREATE INDEX IF NOT EXISTS idx_notes_created ON notes(created_at DESC);\
158    CREATE TABLE IF NOT EXISTS events (\
159        id TEXT PRIMARY KEY,\
160        namespace TEXT NOT NULL,\
161        verb TEXT NOT NULL,\
162        substrate TEXT NOT NULL,\
163        actor TEXT NOT NULL,\
164        outcome TEXT NOT NULL,\
165        data TEXT,\
166        duration_us INTEGER NOT NULL DEFAULT 0,\
167        target_id TEXT,\
168        created_at INTEGER NOT NULL\
169    );\
170    CREATE INDEX IF NOT EXISTS idx_events_namespace ON events(namespace);\
171    CREATE INDEX IF NOT EXISTS idx_events_verb ON events(verb);\
172    CREATE INDEX IF NOT EXISTS idx_events_substrate ON events(substrate);\
173    CREATE INDEX IF NOT EXISTS idx_events_created ON events(created_at DESC);\
174";
175
176/// All known migrations, ordered by ascending version.
177///
178/// Append a `VersionedMigration` with `version = <last + 1>`. The sequence
179/// must be contiguous; `run_migrations` returns an error on gaps.
180///
181/// V2/V5 add columns that may already exist from in-process DDL -- the
182/// runner checks column existence before applying. V4 deduplicates
183/// graph_edges triples. V9 rebuilds graph_edges for lifecycle columns.
184/// V13 event observability SQL is computed at runtime to avoid
185/// duplicate-column errors on pre-bootstrapped DBs.
186const V4_DEDUPE_GRAPH_EDGE_TRIPLES: &str = "\
187    DELETE FROM graph_edges \
188    WHERE rowid NOT IN (\
189        SELECT MIN(rowid) \
190        FROM graph_edges \
191        GROUP BY namespace, source_id, target_id, relation\
192    );\
193    CREATE UNIQUE INDEX IF NOT EXISTS idx_graph_edges_unique_triple \
194    ON graph_edges(namespace, source_id, target_id, relation);\
195";
196
197const V5_ADD_ENTITY_TYPE_TO_ENTITIES: &str = "\
198    ALTER TABLE entities ADD COLUMN entity_type TEXT NULL;\
199    CREATE INDEX IF NOT EXISTS idx_entities_kind_entity_type \
200    ON entities(namespace, kind, entity_type);\
201";
202
203const V9_EDGE_LIFECYCLE_AND_TARGET_BACKEND: &str = "\
204    DROP INDEX IF EXISTS idx_graph_edges_unique_triple;\
205    DROP INDEX IF EXISTS idx_graph_edges_ns_source;\
206    DROP INDEX IF EXISTS idx_graph_edges_ns_target;\
207    DROP INDEX IF EXISTS idx_graph_edges_ns_relation;\
208    DROP INDEX IF EXISTS idx_graph_edges_ns_src_rel;\
209    DROP INDEX IF EXISTS idx_graph_edges_ns_tgt_rel;\
210    CREATE TABLE graph_edges_new (\
211        namespace TEXT NOT NULL,\
212        id TEXT NOT NULL,\
213        source_id TEXT NOT NULL,\
214        target_id TEXT NOT NULL,\
215        relation TEXT NOT NULL,\
216        weight REAL NOT NULL DEFAULT 1.0,\
217        created_at INTEGER NOT NULL,\
218        updated_at INTEGER NOT NULL,\
219        deleted_at INTEGER,\
220        metadata TEXT,\
221        target_backend TEXT,\
222        PRIMARY KEY (namespace, id)\
223    );\
224    INSERT INTO graph_edges_new \
225        (namespace, id, source_id, target_id, relation, weight, created_at, updated_at, deleted_at, metadata, target_backend) \
226    SELECT namespace, id, source_id, target_id, relation, weight, created_at, created_at, NULL, metadata, NULL \
227    FROM graph_edges;\
228    DROP TABLE graph_edges;\
229    ALTER TABLE graph_edges_new RENAME TO graph_edges;\
230    CREATE UNIQUE INDEX IF NOT EXISTS idx_graph_edges_unique_triple ON graph_edges(namespace, source_id, target_id, relation);\
231    CREATE INDEX IF NOT EXISTS idx_graph_edges_ns_source ON graph_edges(namespace, source_id);\
232    CREATE INDEX IF NOT EXISTS idx_graph_edges_ns_target ON graph_edges(namespace, target_id);\
233    CREATE INDEX IF NOT EXISTS idx_graph_edges_ns_relation ON graph_edges(namespace, relation);\
234    CREATE INDEX IF NOT EXISTS idx_graph_edges_ns_src_rel ON graph_edges(namespace, source_id, relation);\
235    CREATE INDEX IF NOT EXISTS idx_graph_edges_ns_tgt_rel ON graph_edges(namespace, target_id, relation);\
236    CREATE INDEX IF NOT EXISTS idx_graph_edges_target_backend ON graph_edges(target_backend) WHERE target_backend IS NOT NULL;\
237";
238
239/// V10: Add `status` column to notes; make `salience` and `decay_factor` nullable.
240///
241/// SQLite does not support `ALTER COLUMN` to change NOT NULL constraints, so the
242/// salience/decay_factor nullability change is handled by rewriting the column
243/// defaults: the columns already exist (added in V1) and will accept NULL when
244/// inserted without a value. The `NOT NULL DEFAULT` constraint in V1 means any
245/// existing rows already have a value; to allow NULLs going forward, SQLite
246/// requires a full table rebuild — but since all existing values are valid f64,
247/// we leave the constraint in place for existing rows and rely on application-
248/// level logic (`NOTES_DDL` in stores/note.rs) to use nullable columns for new
249/// tables. For production databases that went through V1, the application layer
250/// handles NULLs via `Option<f64>` and the `NOT NULL DEFAULT` remains harmless
251/// (inserts from the application always set these columns or leave them NULL via
252/// the new nullable DDL path). The only structural change this migration makes
253/// is adding the `status` column with a sensible default.
254const V10_NOTE_STATUS_AND_NULLABLE_METRICS: &str = "\
255    ALTER TABLE notes ADD COLUMN status TEXT NOT NULL DEFAULT 'active';\
256";
257
258/// V11: Add merge tombstone columns to entities.
259///
260/// `merged_into` stores the UUID of the entity this one was merged into.
261/// `merge_event_id` is an opaque event ID for auditing. Both are nullable;
262/// non-NULL only when the entity has been tombstoned by a merge.
263/// The index on (namespace, merged_into) allows efficient lookup of all
264/// entities that were merged into a given target.
265///
266/// ENTITIES_DDL in stores/entity.rs already includes these columns for new
267/// databases (created via `CREATE TABLE IF NOT EXISTS`). The migration handles
268/// the upgrade path for existing production databases.
269const V11_ENTITY_TOMBSTONE_COLUMNS: &str = "\
270    ALTER TABLE entities ADD COLUMN merged_into TEXT;\
271    ALTER TABLE entities ADD COLUMN merge_event_id TEXT;\
272    CREATE INDEX IF NOT EXISTS idx_entities_merged_into ON entities(namespace, merged_into);\
273";
274
275/// V12: Make `salience` and `decay_factor` nullable in the notes table.
276///
277/// V1 created notes with `salience REAL NOT NULL DEFAULT 0.5` and
278/// `decay_factor REAL NOT NULL DEFAULT 0.0`. SQLite does not support
279/// `ALTER COLUMN` to remove a NOT NULL constraint, so a full table rebuild
280/// is required. This migration rebuilds notes with the canonical nullable
281/// schema that `NOTES_DDL` in stores/note.rs uses for fresh databases.
282///
283/// On databases bootstrapped via `NOTES_DDL` (all test paths and new
284/// installs), salience/decay_factor are already nullable — the V12 idempotency
285/// check detects this and skips the rebuild, recording V12 as applied.
286const V12_NULLABLE_NOTE_METRICS: &str = "\
287    CREATE TABLE notes_new (\
288        id TEXT PRIMARY KEY,\
289        namespace TEXT NOT NULL,\
290        kind TEXT NOT NULL,\
291        status TEXT NOT NULL DEFAULT 'active',\
292        name TEXT,\
293        content TEXT NOT NULL DEFAULT '',\
294        salience REAL,\
295        decay_factor REAL,\
296        expires_at INTEGER,\
297        properties TEXT,\
298        created_at INTEGER NOT NULL,\
299        updated_at INTEGER NOT NULL,\
300        deleted_at INTEGER\
301    );\
302    INSERT INTO notes_new \
303        (id, namespace, kind, status, name, content, salience, decay_factor, \
304         expires_at, properties, created_at, updated_at, deleted_at) \
305    SELECT \
306        id, namespace, kind, status, name, content, salience, decay_factor, \
307        expires_at, properties, created_at, updated_at, deleted_at \
308    FROM notes;\
309    DROP TABLE notes;\
310    ALTER TABLE notes_new RENAME TO notes;\
311    CREATE INDEX IF NOT EXISTS idx_notes_namespace ON notes(namespace);\
312    CREATE INDEX IF NOT EXISTS idx_notes_kind ON notes(namespace, kind);\
313    CREATE INDEX IF NOT EXISTS idx_notes_created ON notes(created_at DESC);\
314";
315
316// V13 adds event observability + provenance columns and the event_observations table.
317// The DDL is computed at runtime via `build_v13_event_observability_sql` so that
318// running migrations on a database already bootstrapped by `ensure_events_schema`
319// (which includes the new columns) does not fail with "duplicate column name".
320const V13_EVENT_OBSERVABILITY_PROVENANCE: &str = "__v13_computed_at_runtime__";
321
322/// DDL for the `_embedding_models` registry table.
323///
324/// Shared between the V14 migration (`build_v14_embedding_model_registry_sql`) and
325/// the belt-and-suspenders creation in `StorageBackend::vectors_for_namespace`.
326/// Both sites reference this constant so the schema cannot silently diverge if the
327/// registry evolves.
328pub const EMBEDDING_MODELS_DDL: &str = "\
329    CREATE TABLE IF NOT EXISTS _embedding_models (\
330        id              BLOB PRIMARY KEY,\
331        engine_name     TEXT NOT NULL,\
332        model_id        TEXT NOT NULL,\
333        key_version     TEXT NOT NULL,\
334        dim             INTEGER NOT NULL,\
335        output_dim      INTEGER,\
336        status          TEXT NOT NULL CHECK (status IN ('pending', 'active', 'superseded', 'archived')),\
337        activated_at    INTEGER,\
338        superseded_at   INTEGER,\
339        superseded_by   BLOB,\
340        canonical_key   BLOB NOT NULL UNIQUE,\
341        created_at      INTEGER NOT NULL\
342    );\
343    CREATE UNIQUE INDEX IF NOT EXISTS idx_embed_models_one_active \
344        ON _embedding_models(engine_name) WHERE status = 'active';\
345    CREATE INDEX IF NOT EXISTS idx_embed_models_engine_status \
346        ON _embedding_models(engine_name, status);";
347
348/// V14: Embedding model registry (`_embedding_models`) and per-engine model FK column.
349///
350/// Creates the `_embedding_models` registry table that tracks which embedding model
351/// is active for each vector engine. Also adds the `embedding_model_id` FK column to
352/// any existing regular `vec_<engine>` tables found in sqlite_master so that stored
353/// vectors can be traced back to the model that produced them.
354///
355/// sqlite-vec virtual tables (`vec0`) do not support `ALTER TABLE ADD COLUMN`;
356/// for those tables the column is added during the startup backfill rebuild,
357/// which is deferred to a follow-up PR — see the tracking issue filed in MAJ-2 of
358/// codex round-1.
359///
360/// New `vec_<engine>` tables created via `StorageBackend::vectors_for_namespace`
361/// after V14 do NOT yet include `embedding_model_id` at creation time; that column
362/// will be present only after the startup backfill rebuild lands.
363///
364/// The migration SQL is computed at runtime via `build_v14_embedding_model_registry_sql`
365/// to discover existing `vec_<engine>` tables dynamically and skip the `ALTER TABLE`
366/// step for any table that already has the column.
367const V14_EMBEDDING_MODEL_REGISTRY: &str = "__v14_computed_at_runtime__";
368
369/// V16: Add `embedding_model` column and composite index to regular `vec_` tables.
370///
371/// This migration is computed at runtime via `build_v16_vector_embedding_model_tag_sql`
372/// to discover existing regular (non-virtual) `vec_` tables and add the column where
373/// absent. sqlite-vec virtual tables (`vec0`) are handled at open time by the
374/// `vectors_for_namespace` old-schema detection which drops and recreates tables
375/// missing `embedding_model`.
376const V16_VECTOR_EMBEDDING_MODEL_TAG: &str = "__v16_computed_at_runtime__";
377
378/// V17: sqlite-vec preserving rebuild.
379///
380/// Unlike V16 (regular tables), vec0 virtual tables cannot `ALTER TABLE ADD
381/// COLUMN`. V17 does a 6-step copy-with-default rebuild per table: create
382/// temp regular table, copy rows with defaults, drop virtual table,
383/// recreate with full schema, copy back, drop temp. SQL is computed at
384/// runtime via `build_v17_preserving_rebuild_sql`. After V17, all vec0
385/// tables have `field` and `embedding_model`.
386const V17_VECTOR_EMBEDDING_MODEL_TAG_PRESERVING_REBUILD: &str = "__v17_computed_at_runtime__";
387
388/// V15: proposals_open projection table.
389///
390/// Maintains a fold-derived view of the four proposal EventKinds so that
391/// `list(kind=proposal, status="open")` is an index scan rather than a full
392/// event-log fold.
393const V15_PROPOSALS_OPEN: &str = "\
394    CREATE TABLE IF NOT EXISTS proposals_open (\
395        proposal_id    TEXT PRIMARY KEY,\
396        namespace      TEXT NOT NULL,\
397        proposer       TEXT NOT NULL,\
398        title          TEXT NOT NULL,\
399        status         TEXT NOT NULL CHECK (status IN ('open', 'changes_requested', 'approved', 'rejected', 'applied', 'withdrawn')),\
400        created_at     INTEGER NOT NULL,\
401        updated_at     INTEGER NOT NULL,\
402        expiry         INTEGER,\
403        last_decision  TEXT,\
404        review_count   INTEGER NOT NULL DEFAULT 0,\
405        approve_count  INTEGER NOT NULL DEFAULT 0,\
406        reject_count   INTEGER NOT NULL DEFAULT 0\
407    );\
408    CREATE INDEX IF NOT EXISTS idx_proposals_open_ns_status ON proposals_open(namespace, status);\
409    CREATE INDEX IF NOT EXISTS idx_proposals_open_proposer ON proposals_open(namespace, proposer);\
410    CREATE INDEX IF NOT EXISTS idx_proposals_open_updated ON proposals_open(namespace, updated_at DESC);\
411";
412
413// V18: knowledge pack — atoms table (slug-keyed knowledge corpus) and domains
414// (named groupings of atoms). FTS5 full-text index over name + description +
415// content + tags. Separate from the notes/entities tables so the knowledge
416// corpus can scale to hundreds of thousands of atoms without polluting the
417// general-purpose note store.
418const V19_KNOWLEDGE_ATOMS_AND_DOMAINS: &str = "\
419    CREATE TABLE IF NOT EXISTS knowledge_atoms (\
420        id TEXT PRIMARY KEY,\
421        namespace TEXT NOT NULL,\
422        slug TEXT NOT NULL,\
423        name TEXT NOT NULL,\
424        description TEXT,\
425        content TEXT NOT NULL DEFAULT '',\
426        tags TEXT NOT NULL DEFAULT '[]',\
427        properties TEXT,\
428        finalized INTEGER NOT NULL DEFAULT 0,\
429        created_at INTEGER NOT NULL,\
430        updated_at INTEGER NOT NULL,\
431        deleted_at INTEGER\
432    );\
433    CREATE UNIQUE INDEX IF NOT EXISTS idx_knowledge_atoms_ns_slug \
434        ON knowledge_atoms(namespace, slug);\
435    CREATE INDEX IF NOT EXISTS idx_knowledge_atoms_ns \
436        ON knowledge_atoms(namespace);\
437    CREATE INDEX IF NOT EXISTS idx_knowledge_atoms_ns_created \
438        ON knowledge_atoms(namespace, created_at DESC);\
439    CREATE TABLE IF NOT EXISTS knowledge_domains (\
440        id TEXT PRIMARY KEY,\
441        namespace TEXT NOT NULL,\
442        slug TEXT NOT NULL,\
443        name TEXT NOT NULL,\
444        description TEXT,\
445        tags TEXT NOT NULL DEFAULT '[]',\
446        members TEXT NOT NULL DEFAULT '[]',\
447        created_at INTEGER NOT NULL,\
448        updated_at INTEGER NOT NULL,\
449        deleted_at INTEGER\
450    );\
451    CREATE UNIQUE INDEX IF NOT EXISTS idx_knowledge_domains_ns_slug \
452        ON knowledge_domains(namespace, slug);\
453    CREATE INDEX IF NOT EXISTS idx_knowledge_domains_ns \
454        ON knowledge_domains(namespace);\
455    CREATE VIRTUAL TABLE IF NOT EXISTS fts_knowledge \
456        USING fts5(\
457            id UNINDEXED,\
458            namespace UNINDEXED,\
459            slug,\
460            name,\
461            description,\
462            content,\
463            content=knowledge_atoms,\
464            content_rowid=rowid,\
465            tokenize='trigram case_sensitive 0'\
466        );\
467    CREATE TRIGGER IF NOT EXISTS fts_knowledge_ai \
468        AFTER INSERT ON knowledge_atoms \
469        WHEN new.deleted_at IS NULL BEGIN \
470        INSERT INTO fts_knowledge(rowid, id, namespace, slug, name, description, content) \
471            VALUES(new.rowid, new.id, new.namespace, new.slug, new.name, new.description, new.content); \
472    END; \
473    CREATE TRIGGER IF NOT EXISTS fts_knowledge_ad \
474        AFTER DELETE ON knowledge_atoms BEGIN \
475        INSERT INTO fts_knowledge(fts_knowledge, rowid, id, namespace, slug, name, description, content) \
476            VALUES('delete', old.rowid, old.id, old.namespace, old.slug, old.name, old.description, old.content); \
477    END; \
478    CREATE TRIGGER IF NOT EXISTS fts_knowledge_au \
479        AFTER UPDATE ON knowledge_atoms BEGIN \
480        INSERT INTO fts_knowledge(fts_knowledge, rowid, id, namespace, slug, name, description, content) \
481            VALUES('delete', old.rowid, old.id, old.namespace, old.slug, old.name, old.description, old.content); \
482        INSERT INTO fts_knowledge(rowid, id, namespace, slug, name, description, content) \
483            SELECT new.rowid, new.id, new.namespace, new.slug, new.name, new.description, new.content \
484            WHERE new.deleted_at IS NULL; \
485    END;\
486";
487
488// V20: brain pack — profile snapshots and event log tables (Phase 1).
489//
490// brain_profile_snapshots stores the full serialised profile state keyed by
491// (profile_id, namespace). brain_event_log records every mutation event for
492// audit and replay; the index on (profile_id, namespace, created_at) supports
493// efficient time-ordered scans.
494const V20_BRAIN_PROFILE_PERSISTENCE: &str = "\
495    CREATE TABLE IF NOT EXISTS brain_profile_snapshots (\
496        profile_id    TEXT NOT NULL,\
497        namespace     TEXT NOT NULL DEFAULT 'default',\
498        snapshot_json TEXT NOT NULL,\
499        updated_at    INTEGER NOT NULL,\
500        PRIMARY KEY (profile_id, namespace)\
501    );\
502    CREATE TABLE IF NOT EXISTS brain_event_log (\
503        id         INTEGER PRIMARY KEY AUTOINCREMENT,\
504        profile_id TEXT NOT NULL,\
505        namespace  TEXT NOT NULL DEFAULT 'default',\
506        event_kind TEXT NOT NULL,\
507        payload    TEXT NOT NULL,\
508        created_at INTEGER NOT NULL\
509    );\
510    CREATE INDEX IF NOT EXISTS idx_brain_events_profile \
511        ON brain_event_log(profile_id, namespace, created_at);\
512";
513
514// V22: knowledge lifecycle status columns.
515//
516// Extends knowledge_atoms with:
517//   status      — workflow state, NOT NULL DEFAULT 'draft'
518//                 (draft | reviewed | verified | deprecated).
519//   source_uri  — provenance URI (e.g. "atlas:<id>" for atlas imports).
520//   source_type — provenance kind ("paper" | "imported" | user-defined).
521//
522// Extends knowledge_sections and knowledge_domains each with a status column
523// (NOT NULL DEFAULT 'draft') for the challenge/adjudicate workflow.
524//
525// Indexes accelerate status-filtered list/search paths.
526// Backfill: atoms already finalized are marked 'reviewed'.
527//
528// This is the superset migration; it subsumes the earlier
529// knowledge_status_and_source draft by adding NOT NULL defaults, domains.status,
530// the section/domain status indexes, and the finalized→reviewed backfill.
531const V22_KNOWLEDGE_LIFECYCLE_STATUS: &str = "\
532    ALTER TABLE knowledge_atoms ADD COLUMN status TEXT NOT NULL DEFAULT 'draft';\
533    ALTER TABLE knowledge_atoms ADD COLUMN source_uri TEXT;\
534    ALTER TABLE knowledge_atoms ADD COLUMN source_type TEXT;\
535    ALTER TABLE knowledge_sections ADD COLUMN status TEXT NOT NULL DEFAULT 'draft';\
536    ALTER TABLE knowledge_domains ADD COLUMN status TEXT NOT NULL DEFAULT 'draft';\
537    CREATE INDEX IF NOT EXISTS idx_knowledge_atoms_ns_status \
538        ON knowledge_atoms(namespace, status);\
539    CREATE INDEX IF NOT EXISTS idx_knowledge_sections_status \
540        ON knowledge_sections(status);\
541    CREATE INDEX IF NOT EXISTS idx_knowledge_domains_ns_status \
542        ON knowledge_domains(namespace, status);\
543    UPDATE knowledge_atoms SET status = 'reviewed' WHERE finalized = 1;\
544";
545
546// V21: knowledge_sections — section-typed content rows for knowledge atoms.
547//
548// Each row holds one section (e.g. "overview", "formalism") for a given atom.
549// The UNIQUE(atom_id, section_type) constraint enforces the closed-enum invariant:
550// at most one row per section type per atom. Editing a section is an upsert on
551// this constraint, leaving sibling sections untouched.
552//
553// `embedding` is nullable BLOB — filled lazily by `knowledge.index` after edit.
554// `heading` is the markdown heading text parsed from the source content.
555// `sort_order` mirrors the order sections appear in the source document.
556//
557// FTS5 section index (`fts_sections`) enables sub-atom search by body content.
558const V21_KNOWLEDGE_SECTIONS: &str = "\
559    CREATE TABLE IF NOT EXISTS knowledge_sections (\
560        id           TEXT PRIMARY KEY,\
561        atom_id      TEXT NOT NULL,\
562        namespace    TEXT NOT NULL,\
563        section_type TEXT NOT NULL,\
564        heading      TEXT NOT NULL DEFAULT '',\
565        content      TEXT NOT NULL DEFAULT '',\
566        tokens       INTEGER NOT NULL DEFAULT 0,\
567        sort_order   INTEGER NOT NULL DEFAULT 0,\
568        embedding    BLOB,\
569        created_at   INTEGER NOT NULL,\
570        updated_at   INTEGER NOT NULL,\
571        FOREIGN KEY (atom_id) REFERENCES knowledge_atoms(id),\
572        UNIQUE(atom_id, section_type)\
573    );\
574    CREATE INDEX IF NOT EXISTS idx_knowledge_sections_atom \
575        ON knowledge_sections(atom_id);\
576    CREATE INDEX IF NOT EXISTS idx_knowledge_sections_ns_type \
577        ON knowledge_sections(namespace, section_type);\
578    CREATE INDEX IF NOT EXISTS idx_knowledge_sections_ns_atom \
579        ON knowledge_sections(namespace, atom_id);\
580    CREATE VIRTUAL TABLE IF NOT EXISTS fts_sections \
581        USING fts5(\
582            id UNINDEXED,\
583            namespace UNINDEXED,\
584            atom_id UNINDEXED,\
585            section_type UNINDEXED,\
586            heading,\
587            content,\
588            content=knowledge_sections,\
589            content_rowid=rowid,\
590            tokenize='trigram case_sensitive 0'\
591        );\
592    CREATE TRIGGER IF NOT EXISTS fts_sections_ai \
593        AFTER INSERT ON knowledge_sections BEGIN \
594        INSERT INTO fts_sections(rowid, id, namespace, atom_id, section_type, heading, content) \
595            VALUES(new.rowid, new.id, new.namespace, new.atom_id, new.section_type, new.heading, new.content); \
596    END; \
597    CREATE TRIGGER IF NOT EXISTS fts_sections_ad \
598        AFTER DELETE ON knowledge_sections BEGIN \
599        INSERT INTO fts_sections(fts_sections, rowid, id, namespace, atom_id, section_type, heading, content) \
600            VALUES('delete', old.rowid, old.id, old.namespace, old.atom_id, old.section_type, old.heading, old.content); \
601    END; \
602    CREATE TRIGGER IF NOT EXISTS fts_sections_au \
603        AFTER UPDATE ON knowledge_sections BEGIN \
604        INSERT INTO fts_sections(fts_sections, rowid, id, namespace, atom_id, section_type, heading, content) \
605            VALUES('delete', old.rowid, old.id, old.namespace, old.atom_id, old.section_type, old.heading, old.content); \
606        INSERT INTO fts_sections(rowid, id, namespace, atom_id, section_type, heading, content) \
607            VALUES(new.rowid, new.id, new.namespace, new.atom_id, new.section_type, new.heading, new.content); \
608    END;\
609";
610
611/// All versioned migrations in ascending order, applied by `run_migrations`.
612pub const MIGRATIONS: &[VersionedMigration] = &[
613    VersionedMigration {
614        version: 1,
615        name: "initial_schema",
616        up: V1_UP,
617    },
618    VersionedMigration {
619        version: 2,
620        name: "add_name_to_notes",
621        up: "ALTER TABLE notes ADD COLUMN name TEXT;",
622    },
623    VersionedMigration {
624        version: 3,
625        name: "add_events_namespace_created_index",
626        up: "CREATE INDEX IF NOT EXISTS idx_events_ns_created ON events(namespace, created_at DESC);",
627    },
628    VersionedMigration {
629        version: 4,
630        name: "dedupe_graph_edge_triples",
631        up: V4_DEDUPE_GRAPH_EDGE_TRIPLES,
632    },
633    VersionedMigration {
634        version: 5,
635        name: "add_entity_type_to_entities",
636        up: V5_ADD_ENTITY_TYPE_TO_ENTITIES,
637    },
638    // V6–V8: no-op placeholder slots originally reserved in the migration ledger.
639    // During the v1 parallel cluster landings (c01/c03/c04/c06) the concrete migrations
640    // landed at V5, V9, and V13 instead (slot assignments shifted as clusters merged).
641    // V6–V8 were absorbed as no-ops to keep the contiguity check passing. Their names
642    // are frozen — V1-V13 are production schema.
643    //
644    // NOTE: V6 was originally named "reserved_adr043_embedding_pipeline_extensions"
645    // because it was intended to hold embedding pipeline work. The actual migration
646    // landed at V14 (cluster-20). V6 retains its original name to avoid breaking the
647    // production tracking table on existing deployments.
648    VersionedMigration {
649        version: 6,
650        name: "reserved_adr043_embedding_pipeline_extensions",
651        up: "SELECT 1;",
652    },
653    VersionedMigration {
654        version: 7,
655        name: "reserved_adr046_event_sourced_proposals_index",
656        up: "SELECT 1;",
657    },
658    VersionedMigration {
659        version: 8,
660        name: "reserved_adr041_event_observations_and_session_id",
661        up: "SELECT 1;",
662    },
663    VersionedMigration {
664        version: 9,
665        name: "edge_lifecycle_and_target_backend",
666        up: V9_EDGE_LIFECYCLE_AND_TARGET_BACKEND,
667    },
668    VersionedMigration {
669        version: 10,
670        name: "note_status_and_nullable_metrics",
671        up: V10_NOTE_STATUS_AND_NULLABLE_METRICS,
672    },
673    VersionedMigration {
674        version: 11,
675        name: "entity_tombstone_columns",
676        up: V11_ENTITY_TOMBSTONE_COLUMNS,
677    },
678    VersionedMigration {
679        version: 12,
680        name: "nullable_note_metrics",
681        up: V12_NULLABLE_NOTE_METRICS,
682    },
683    VersionedMigration {
684        version: 13,
685        name: "event_observability_provenance",
686        up: V13_EVENT_OBSERVABILITY_PROVENANCE,
687    },
688    VersionedMigration {
689        version: 14,
690        name: "embedding_model_registry",
691        up: V14_EMBEDDING_MODEL_REGISTRY,
692    },
693    // V15: proposals_open projection table (cluster-22).
694    VersionedMigration {
695        version: 15,
696        name: "proposals_open",
697        up: V15_PROPOSALS_OPEN,
698    },
699    // V16: tag vector rows with embedding_model column (dual-embedding support).
700    VersionedMigration {
701        version: 16,
702        name: "vector_embedding_model_tag",
703        up: V16_VECTOR_EMBEDDING_MODEL_TAG,
704    },
705    // V17: preserving rebuild of sqlite-vec virtual tables (cluster v023).
706    // Replaces the silent-drop path in backend.rs with a copy-with-default rebuild that
707    // preserves existing rows and backfills missing columns to inferred defaults.
708    VersionedMigration {
709        version: 17,
710        name: "vector_embedding_model_tag_preserving_rebuild",
711        up: V17_VECTOR_EMBEDDING_MODEL_TAG_PRESERVING_REBUILD,
712    },
713    // V18: add 'applying' to proposals_open status CHECK (apply/withdraw race fix).
714    VersionedMigration {
715        version: 18,
716        name: "proposals_open_add_applying_status",
717        up: "__v18_computed_at_runtime__",
718    },
719    // V19: knowledge pack — atoms and domains tables + FTS5 index.
720    VersionedMigration {
721        version: 19,
722        name: "knowledge_atoms_and_domains",
723        up: V19_KNOWLEDGE_ATOMS_AND_DOMAINS,
724    },
725    VersionedMigration {
726        version: 20,
727        name: "brain_profile_persistence",
728        up: V20_BRAIN_PROFILE_PERSISTENCE,
729    },
730    // V21: knowledge_sections table (knowledge pack Phase 2).
731    // Stores section-typed content for knowledge atoms: 10-value SectionType enum,
732    // per-section FK to knowledge_atoms, UNIQUE(atom_id, section_type) constraint.
733    VersionedMigration {
734        version: 21,
735        name: "knowledge_sections",
736        up: V21_KNOWLEDGE_SECTIONS,
737    },
738    // V22: knowledge lifecycle status columns — superset migration.
739    // Adds: knowledge_atoms.status (NOT NULL DEFAULT 'draft'), source_uri,
740    //       source_type; knowledge_sections.status; knowledge_domains.status;
741    //       status indexes; and a finalized→reviewed backfill.
742    VersionedMigration {
743        version: 22,
744        name: "knowledge_lifecycle_status",
745        up: V22_KNOWLEDGE_LIFECYCLE_STATUS,
746    },
747];
748
749const MIGRATION_TRACKING_TABLE: &str = "\
750    CREATE TABLE IF NOT EXISTS _schema_migrations (\
751        version   INTEGER PRIMARY KEY,\
752        name      TEXT NOT NULL,\
753        applied_at INTEGER NOT NULL\
754    );\
755";
756
757/// Apply all unapplied migrations in order. Idempotent; each migration runs in its own transaction.
758/// Errors on non-contiguous version array or failed migration.
759pub fn run_migrations(conn: &mut Connection) -> Result<u32, SqliteError> {
760    for (i, m) in MIGRATIONS.iter().enumerate() {
761        let expected = (i + 1) as u32;
762        if m.version != expected {
763            return Err(SqliteError::InvalidData(format!(
764                "MIGRATIONS array is not contiguous: expected version {expected} at index {i}, \
765                 got version {}",
766                m.version
767            )));
768        }
769    }
770
771    conn.execute_batch(MIGRATION_TRACKING_TABLE)?;
772
773    // Determine the current version (highest applied).
774    let current_version: u32 = conn
775        .query_row(
776            "SELECT COALESCE(MAX(version), 0) FROM _schema_migrations",
777            [],
778            |row| row.get(0),
779        )
780        .unwrap_or(0);
781
782    let mut applied_version = current_version;
783
784    for migration in MIGRATIONS {
785        if migration.version <= current_version {
786            continue;
787        }
788
789        // V2 adds `name` to notes.  StorageBackend::notes() bootstraps the schema
790        // via NOTES_DDL (which already includes `name`), so the column may already
791        // exist even though the migration has never been recorded.  Treat "duplicate
792        // column name" from SQLite as idempotent for ALTER TABLE migrations.
793        if migration.version == 2 {
794            let col_exists: bool = conn
795                .query_row(
796                    "SELECT COUNT(*) > 0 FROM pragma_table_info('notes') WHERE name = 'name'",
797                    [],
798                    |row| row.get(0),
799                )
800                .unwrap_or(false);
801            if col_exists {
802                // Column already present — record the migration as applied and skip.
803                let now = chrono::Utc::now().timestamp_micros();
804                conn.execute(
805                    "INSERT OR IGNORE INTO _schema_migrations (version, name, applied_at) \
806                     VALUES (?1, ?2, ?3)",
807                    rusqlite::params![migration.version, migration.name, now],
808                )
809                .map_err(|e| SqliteError::Migration {
810                    version: migration.version,
811                    error: e.to_string(),
812                })?;
813                applied_version = migration.version;
814                continue;
815            }
816        }
817
818        // V5 adds `entity_type` to entities.  ENTITIES_DDL already includes the
819        // column so in-process DBs created via ensure_entities_schema already have
820        // it.  Same idempotency pattern as V2.
821        if migration.version == 5 {
822            let col_exists: bool = conn
823                .query_row(
824                    "SELECT COUNT(*) > 0 FROM pragma_table_info('entities') WHERE name = 'entity_type'",
825                    [],
826                    |row| row.get(0),
827                )
828                .unwrap_or(false);
829            if col_exists {
830                let now = chrono::Utc::now().timestamp_micros();
831                conn.execute(
832                    "INSERT OR IGNORE INTO _schema_migrations (version, name, applied_at) \
833                     VALUES (?1, ?2, ?3)",
834                    rusqlite::params![migration.version, migration.name, now],
835                )
836                .map_err(|e| SqliteError::Migration {
837                    version: migration.version,
838                    error: e.to_string(),
839                })?;
840                applied_version = migration.version;
841                continue;
842            }
843        }
844
845        // V10 adds `status` to notes.  NOTES_DDL in stores/note.rs already includes
846        // `status`, so when a fresh schema is created via the store path (e.g. in
847        // tests or StorageBackend::notes()), the column exists before V10 runs.
848        // Detect and skip idempotently, recording the migration as applied.
849        if migration.version == 10 {
850            let col_exists: bool = conn
851                .query_row(
852                    "SELECT COUNT(*) > 0 FROM pragma_table_info('notes') WHERE name = 'status'",
853                    [],
854                    |row| row.get(0),
855                )
856                .unwrap_or(false);
857            if col_exists {
858                let now = chrono::Utc::now().timestamp_micros();
859                conn.execute(
860                    "INSERT OR IGNORE INTO _schema_migrations (version, name, applied_at) \
861                     VALUES (?1, ?2, ?3)",
862                    rusqlite::params![migration.version, migration.name, now],
863                )
864                .map_err(|e| SqliteError::Migration {
865                    version: migration.version,
866                    error: e.to_string(),
867                })?;
868                applied_version = migration.version;
869                continue;
870            }
871        }
872
873        // V11 adds `merged_into` and `merge_event_id` to entities. ENTITIES_DDL in
874        // stores/entity.rs already includes these columns for databases created via
875        // the store path (e.g. in tests or StorageBackend::entities()). Detect and
876        // skip idempotently, recording the migration as applied.
877        if migration.version == 11 {
878            let col_exists: bool = conn
879                .query_row(
880                    "SELECT COUNT(*) > 0 FROM pragma_table_info('entities') WHERE name = 'merged_into'",
881                    [],
882                    |row| row.get(0),
883                )
884                .unwrap_or(false);
885            if col_exists {
886                let now = chrono::Utc::now().timestamp_micros();
887                conn.execute(
888                    "INSERT OR IGNORE INTO _schema_migrations (version, name, applied_at) \
889                     VALUES (?1, ?2, ?3)",
890                    rusqlite::params![migration.version, migration.name, now],
891                )
892                .map_err(|e| SqliteError::Migration {
893                    version: migration.version,
894                    error: e.to_string(),
895                })?;
896                applied_version = migration.version;
897                continue;
898            }
899        }
900
901        // V12 rebuilds the notes table to make salience/decay_factor nullable.
902        // NOTES_DDL in stores/note.rs already declares them nullable for databases
903        // created via the store path. If salience is already nullable (notnull=0),
904        // skip the rebuild and record V12 as applied.
905        if migration.version == 12 {
906            let already_nullable: bool = conn
907                .query_row(
908                    "SELECT COUNT(*) > 0 FROM pragma_table_info('notes') \
909                     WHERE name = 'salience' AND \"notnull\" = 0",
910                    [],
911                    |row| row.get(0),
912                )
913                .unwrap_or(false);
914            if already_nullable {
915                let now = chrono::Utc::now().timestamp_micros();
916                conn.execute(
917                    "INSERT OR IGNORE INTO _schema_migrations (version, name, applied_at) \
918                     VALUES (?1, ?2, ?3)",
919                    rusqlite::params![migration.version, migration.name, now],
920                )
921                .map_err(|e| SqliteError::Migration {
922                    version: migration.version,
923                    error: e.to_string(),
924                })?;
925                applied_version = migration.version;
926                continue;
927            }
928        }
929
930        let tx = conn.transaction().map_err(|e| SqliteError::Migration {
931            version: migration.version,
932            error: e.to_string(),
933        })?;
934
935        let up_sql = if migration.version == 13 {
936            build_v13_event_observability_sql(&tx).map_err(|e| SqliteError::Migration {
937                version: migration.version,
938                error: e.to_string(),
939            })?
940        } else if migration.version == 14 {
941            build_v14_embedding_model_registry_sql(&tx).map_err(|e| SqliteError::Migration {
942                version: migration.version,
943                error: e.to_string(),
944            })?
945        } else if migration.version == 16 {
946            build_v16_vector_embedding_model_tag_sql(&tx).map_err(|e| SqliteError::Migration {
947                version: migration.version,
948                error: e.to_string(),
949            })?
950        } else if migration.version == 17 {
951            build_v17_preserving_rebuild_sql(&tx).map_err(|e| SqliteError::Migration {
952                version: migration.version,
953                error: e.to_string(),
954            })?
955        } else if migration.version == 18 {
956            build_v18_proposals_applying_sql(&tx).map_err(|e| SqliteError::Migration {
957                version: migration.version,
958                error: e.to_string(),
959            })?
960        } else {
961            migration.up.to_string()
962        };
963
964        tx.execute_batch(&up_sql)
965            .map_err(|e| SqliteError::Migration {
966                version: migration.version,
967                error: e.to_string(),
968            })?;
969
970        let now = chrono::Utc::now().timestamp_micros();
971        tx.execute(
972            "INSERT INTO _schema_migrations (version, name, applied_at) VALUES (?1, ?2, ?3)",
973            rusqlite::params![migration.version, migration.name, now],
974        )
975        .map_err(|e| SqliteError::Migration {
976            version: migration.version,
977            error: e.to_string(),
978        })?;
979
980        tx.commit().map_err(|e| SqliteError::Migration {
981            version: migration.version,
982            error: e.to_string(),
983        })?;
984
985        applied_version = migration.version;
986    }
987
988    Ok(applied_version)
989}
990
991fn table_has_column(
992    conn: &Connection,
993    table: &'static str,
994    column: &'static str,
995) -> Result<bool, rusqlite::Error> {
996    conn.query_row(
997        "SELECT COUNT(*) > 0 FROM pragma_table_info(?1) WHERE name = ?2",
998        rusqlite::params![table, column],
999        |row| row.get(0),
1000    )
1001}
1002
1003fn build_v13_event_observability_sql(conn: &Connection) -> Result<String, rusqlite::Error> {
1004    let mut sql = String::new();
1005    for (column, ddl) in [
1006        (
1007            "kind",
1008            "ALTER TABLE events ADD COLUMN kind TEXT NOT NULL DEFAULT 'audit';",
1009        ),
1010        (
1011            "payload",
1012            "ALTER TABLE events ADD COLUMN payload TEXT NOT NULL DEFAULT '{}';",
1013        ),
1014        (
1015            "payload_schema_version",
1016            "ALTER TABLE events ADD COLUMN payload_schema_version INTEGER NOT NULL DEFAULT 1;",
1017        ),
1018        (
1019            "profile_state_version",
1020            "ALTER TABLE events ADD COLUMN profile_state_version INTEGER;",
1021        ),
1022        (
1023            "session_id",
1024            "ALTER TABLE events ADD COLUMN session_id TEXT;",
1025        ),
1026        (
1027            "aggregate_kind",
1028            "ALTER TABLE events ADD COLUMN aggregate_kind TEXT;",
1029        ),
1030        (
1031            "aggregate_id",
1032            "ALTER TABLE events ADD COLUMN aggregate_id TEXT;",
1033        ),
1034    ] {
1035        if !table_has_column(conn, "events", column)? {
1036            sql.push_str(ddl);
1037        }
1038    }
1039    // Migrate legacy data column into payload if both exist.
1040    if table_has_column(conn, "events", "data")? && table_has_column(conn, "events", "payload")? {
1041        sql.push_str("UPDATE events SET payload = data WHERE data IS NOT NULL AND data <> '';");
1042    }
1043    sql.push_str(
1044        "CREATE TABLE IF NOT EXISTS event_observations (\
1045            event_id TEXT NOT NULL,\
1046            entity_id TEXT NOT NULL,\
1047            referent_kind TEXT NOT NULL,\
1048            role TEXT NOT NULL,\
1049            position INTEGER NOT NULL,\
1050            PRIMARY KEY (event_id, role, position)\
1051        );\
1052        CREATE INDEX IF NOT EXISTS idx_events_kind ON events(kind);\
1053        CREATE INDEX IF NOT EXISTS idx_events_session ON events(namespace, session_id, created_at, id);\
1054        CREATE INDEX IF NOT EXISTS idx_events_ns_created_id ON events(namespace, created_at DESC, id DESC);\
1055        CREATE INDEX IF NOT EXISTS idx_events_payload_proposal_id ON events(json_extract(payload, '$.proposal_id'));\
1056        CREATE INDEX IF NOT EXISTS idx_event_obs_entity ON event_observations(entity_id, role);\
1057        CREATE INDEX IF NOT EXISTS idx_event_obs_event_role ON event_observations(event_id, role);",
1058    );
1059    Ok(sql)
1060}
1061
1062/// Build V14 migration SQL at runtime.
1063///
1064/// Creates the `_embedding_models` registry table and its indexes. Then discovers
1065/// any existing regular (non-virtual) `vec_<engine>` tables in sqlite_master and
1066/// adds the `embedding_model_id` FK column where absent.
1067///
1068/// sqlite-vec virtual tables (`vec0`) do not support `ALTER TABLE ADD COLUMN`;
1069/// those tables are handled by the startup backfill rebuild which runs after the SQL
1070/// migration completes. New `vec_<engine>` tables created after V14 do NOT yet
1071/// include `embedding_model_id` at creation — that column will be present only after
1072/// the startup backfill rebuild lands (follow-up).
1073fn build_v14_embedding_model_registry_sql(conn: &Connection) -> Result<String, rusqlite::Error> {
1074    let mut sql = String::from(EMBEDDING_MODELS_DDL);
1075
1076    // Discover existing regular (non-virtual) vec_<engine> tables.
1077    //
1078    // Exclusion rationale:
1079    // - `sql NOT LIKE '%VIRTUAL%'` drops vec0 virtual tables (type='table' but DDL
1080    //   starts with "CREATE VIRTUAL TABLE").
1081    // - `sql NOT LIKE '%vec0%'` is a belt-and-suspenders drop for any DDL that still
1082    //   contains the vec0 keyword.
1083    // - The four `NOT LIKE` suffix clauses exclude the sqlite-vec internal shadow tables
1084    //   that are created as plain regular tables alongside each vec0 virtual table:
1085    //     vec_<x>_chunks, vec_<x>_rowids, vec_<x>_info, vec_<x>_vector_chunks00
1086    //   (see sqlite-vec 0.1.9 sqlite-vec.c:3423-3468; these tables own sqlite-vec's
1087    //   internal layout and must never receive extraneous columns).
1088    //   The ESCAPE '\' form is required because '%' and '_' are SQL LIKE wildcards.
1089    //   The `_metadata%` clause additionally excludes newer sqlite-vec shadow tables
1090    //   (e.g. `vec_<x>_metadatachunks00`, `vec_<x>_metadatatext00`) introduced in
1091    //   later sqlite-vec versions.
1092    let mut stmt = conn.prepare(
1093        "SELECT name FROM sqlite_master \
1094         WHERE type = 'table' \
1095           AND name LIKE 'vec_%' \
1096           AND sql NOT LIKE '%VIRTUAL%' \
1097           AND sql NOT LIKE '%vec0%' \
1098           AND name NOT LIKE '%\\_chunks' ESCAPE '\\' \
1099           AND name NOT LIKE '%\\_rowids' ESCAPE '\\' \
1100           AND name NOT LIKE '%\\_info' ESCAPE '\\' \
1101           AND name NOT LIKE '%\\_vector\\_chunks%' ESCAPE '\\' \
1102           AND name NOT LIKE '%\\_metadata%' ESCAPE '\\'",
1103    )?;
1104    let vec_tables: Vec<String> = stmt
1105        .query_map([], |row| row.get(0))?
1106        .filter_map(|r| r.ok())
1107        .collect();
1108
1109    for table in &vec_tables {
1110        // Validate table name: only alphanumeric and underscores after the 'vec_' prefix.
1111        let valid = table.starts_with("vec_")
1112            && table[4..]
1113                .chars()
1114                .all(|c| c.is_ascii_alphanumeric() || c == '_');
1115        if !valid {
1116            continue;
1117        }
1118        // Check whether the column already exists.
1119        let col_exists: bool = conn
1120            .query_row(
1121                "SELECT COUNT(*) > 0 FROM pragma_table_info(?1) WHERE name = 'embedding_model_id'",
1122                rusqlite::params![table],
1123                |row| row.get(0),
1124            )
1125            .unwrap_or(false);
1126        if col_exists {
1127            continue;
1128        }
1129        sql.push_str(&format!(
1130            "ALTER TABLE {t} ADD COLUMN embedding_model_id BLOB REFERENCES _embedding_models(id);\
1131             CREATE INDEX IF NOT EXISTS idx_{t}_model ON {t}(embedding_model_id);",
1132            t = table,
1133        ));
1134    }
1135
1136    Ok(sql)
1137}
1138
1139fn build_v16_vector_embedding_model_tag_sql(conn: &Connection) -> Result<String, rusqlite::Error> {
1140    let mut stmt = conn.prepare(
1141        "SELECT name FROM sqlite_master \
1142         WHERE type = 'table' \
1143           AND name LIKE 'vec_%' \
1144           AND sql NOT LIKE '%VIRTUAL%' \
1145           AND sql NOT LIKE '%vec0%' \
1146           AND name NOT LIKE '%\\_chunks' ESCAPE '\\' \
1147           AND name NOT LIKE '%\\_rowids' ESCAPE '\\' \
1148           AND name NOT LIKE '%\\_info' ESCAPE '\\' \
1149           AND name NOT LIKE '%\\_vector\\_chunks%' ESCAPE '\\' \
1150           AND name NOT LIKE '%\\_metadata%' ESCAPE '\\'",
1151    )?;
1152    let vec_tables: Vec<String> = stmt
1153        .query_map([], |row| row.get(0))?
1154        .filter_map(|r| r.ok())
1155        .collect();
1156
1157    let mut sql = String::new();
1158    for table in vec_tables {
1159        let valid = table.starts_with("vec_")
1160            && table[4..]
1161                .chars()
1162                .all(|c| c.is_ascii_alphanumeric() || c == '_');
1163        if !valid {
1164            continue;
1165        }
1166        let col_exists: bool = conn
1167            .query_row(
1168                "SELECT COUNT(*) > 0 FROM pragma_table_info(?1) WHERE name = 'embedding_model'",
1169                rusqlite::params![&table],
1170                |row| row.get(0),
1171            )
1172            .unwrap_or(false);
1173        if col_exists {
1174            continue;
1175        }
1176        sql.push_str(&format!(
1177            "ALTER TABLE {t} ADD COLUMN embedding_model TEXT NOT NULL DEFAULT 'all-minilm-l6-v2';\
1178             CREATE INDEX IF NOT EXISTS idx_{t}_subject_model ON {t}(subject_id, embedding_model);",
1179            t = table,
1180        ));
1181    }
1182    if sql.is_empty() {
1183        sql.push_str("SELECT 1;");
1184    }
1185    Ok(sql)
1186}
1187
1188/// Infer an embedding model name from a `vec_<suffix>` table name.
1189///
1190/// Strips the `vec_` prefix and returns the suffix as the model name if the
1191/// suffix is non-empty and contains only alphanumeric / underscore characters.
1192/// Unknown or empty suffixes fall back to `"all-minilm-l6-v2"`.
1193///
1194/// This mirrors the model-key-to-table-name mapping in
1195/// `StorageBackend::vectors_for_namespace` so that rows written under the default
1196/// model receive the correct tag on V17 rebuild.
1197fn infer_model_from_table_name(table: &str) -> String {
1198    let suffix = table.strip_prefix("vec_").unwrap_or("");
1199    if !suffix.is_empty()
1200        && suffix
1201            .chars()
1202            .all(|c| c.is_ascii_alphanumeric() || c == '_')
1203    {
1204        suffix.to_string()
1205    } else {
1206        "all-minilm-l6-v2".to_string()
1207    }
1208}
1209
1210/// Build the V17 migration SQL at runtime.
1211///
1212/// Enumerates all sqlite-vec virtual tables (`vec0`) that are missing the
1213/// `embedding_model` column (or the `field` column) and generates a 6-step
1214/// copy-with-default rebuild for each:
1215///
1216/// 1. `CREATE TABLE tmp_vec_<engine>` — plain regular table with all columns
1217/// 2. `INSERT INTO tmp_vec_<engine> SELECT` — copies existing rows, backfilling
1218///    missing `field` to `''` and `embedding_model` to the inferred model name
1219/// 3. `DROP TABLE vec_<engine>` — removes the old virtual table
1220/// 4. `CREATE VIRTUAL TABLE vec_<engine> USING vec0(...)` — recreates with full schema
1221/// 5. `INSERT INTO vec_<engine> SELECT FROM tmp_vec_<engine>`
1222/// 6. `DROP TABLE tmp_vec_<engine>`
1223///
1224/// Tables that already have both `field` and `embedding_model` are skipped.
1225/// The entire batch is emitted as a single SQL string; `run_migrations` wraps
1226/// it in one transaction so a failure rolls back all rebuilds atomically.
1227///
1228/// If no tables need rebuilding, returns `"SELECT 1;"` to produce a no-op.
1229pub fn build_v17_preserving_rebuild_sql(conn: &Connection) -> Result<String, rusqlite::Error> {
1230    // Discover sqlite-vec virtual tables: type='table', DDL contains VIRTUAL and vec0,
1231    // name starts with vec_, and is not a shadow table.  Fetch the DDL alongside the
1232    // name so we can parse dimensions from the CREATE VIRTUAL TABLE statement.
1233    // (sqlite-vec does not expose column types through PRAGMA table_xinfo — all types
1234    // appear as empty strings — so parsing the DDL is the only reliable way to extract
1235    // the float[N] dimension value.)
1236    let mut stmt = conn.prepare(
1237        "SELECT name, sql FROM sqlite_master \
1238         WHERE type = 'table' \
1239           AND name LIKE 'vec_%' \
1240           AND sql LIKE '%VIRTUAL%' \
1241           AND sql LIKE '%vec0%' \
1242           AND name NOT LIKE '%\\_chunks' ESCAPE '\\' \
1243           AND name NOT LIKE '%\\_rowids' ESCAPE '\\' \
1244           AND name NOT LIKE '%\\_info' ESCAPE '\\' \
1245           AND name NOT LIKE '%\\_vector\\_chunks%' ESCAPE '\\' \
1246           AND name NOT LIKE '%\\_metadata%' ESCAPE '\\'",
1247    )?;
1248    let virtual_tables: Vec<(String, Option<String>)> = stmt
1249        .query_map([], |row| Ok((row.get(0)?, row.get(1)?)))?
1250        .filter_map(|r| r.ok())
1251        .collect();
1252
1253    let mut sql = String::new();
1254
1255    for (table, ddl_opt) in &virtual_tables {
1256        // Guard: table name must be vec_<alphanumeric/underscore> only.
1257        let valid = table.starts_with("vec_")
1258            && table[4..]
1259                .chars()
1260                .all(|c| c.is_ascii_alphanumeric() || c == '_');
1261        if !valid {
1262            continue;
1263        }
1264
1265        // Inspect which columns are present via PRAGMA table_xinfo.
1266        let mut has_field = false;
1267        let mut has_embedding_model = false;
1268
1269        let pragma = format!("PRAGMA table_xinfo({})", table);
1270        let mut col_stmt = conn.prepare(&pragma)?;
1271        let mut col_rows = col_stmt.query([])?;
1272        while let Some(row) = col_rows.next()? {
1273            let name: String = row.get(1)?;
1274            match name.as_str() {
1275                "field" => has_field = true,
1276                "embedding_model" => has_embedding_model = true,
1277                _ => {}
1278            }
1279        }
1280
1281        if has_field && has_embedding_model {
1282            // Already up to date — skip.
1283            continue;
1284        }
1285
1286        // Parse dimensions from the CREATE VIRTUAL TABLE DDL.
1287        // sqlite-vec does not expose column types via PRAGMA table_xinfo (they all
1288        // appear as empty strings), so we parse "float[N]" from the DDL directly.
1289        let dims = ddl_opt.as_deref().and_then(|ddl| {
1290            let lower = ddl.to_ascii_lowercase();
1291            // Find "float[" in the DDL then extract up to "]".
1292            let start = lower.find("float[")?;
1293            let rest = &lower[start + 6..];
1294            let end = rest.find(']')?;
1295            rest[..end].trim().parse::<u32>().ok()
1296        });
1297
1298        // We need the dimensions to recreate the virtual table.  If we cannot
1299        // parse them from the DDL (malformed DDL), skip and leave for the operator.
1300        let dim = match dims {
1301            Some(d) => d,
1302            None => continue,
1303        };
1304
1305        let inferred_model = infer_model_from_table_name(table);
1306        let tmp = format!("tmp_{}", table);
1307
1308        // Build the SELECT projection: map missing columns to defaults.
1309        let field_expr = if has_field {
1310            "field".to_string()
1311        } else {
1312            "'' AS field".to_string()
1313        };
1314        let model_expr = if has_embedding_model {
1315            "embedding_model".to_string()
1316        } else {
1317            format!("'{}' AS embedding_model", inferred_model)
1318        };
1319
1320        // Step 1: create plain staging table.
1321        sql.push_str(&format!(
1322            "CREATE TABLE {tmp} (\
1323             subject_id TEXT PRIMARY KEY, \
1324             namespace TEXT NOT NULL, \
1325             kind TEXT NOT NULL, \
1326             field TEXT NOT NULL, \
1327             embedding_model TEXT NOT NULL, \
1328             embedding BLOB NOT NULL\
1329             );",
1330            tmp = tmp,
1331        ));
1332
1333        // Step 2: copy rows with backfilled defaults.
1334        sql.push_str(&format!(
1335            "INSERT INTO {tmp} (subject_id, namespace, kind, field, embedding_model, embedding) \
1336             SELECT subject_id, namespace, kind, {field_expr}, {model_expr}, embedding \
1337             FROM {table};",
1338            tmp = tmp,
1339            field_expr = field_expr,
1340            model_expr = model_expr,
1341            table = table,
1342        ));
1343
1344        // Step 3: drop old virtual table.
1345        sql.push_str(&format!("DROP TABLE {table};", table = table));
1346
1347        // Step 4: recreate virtual table with full schema.
1348        sql.push_str(&format!(
1349            "CREATE VIRTUAL TABLE {table} USING vec0(\
1350             subject_id TEXT PRIMARY KEY, \
1351             namespace TEXT NOT NULL, \
1352             kind TEXT NOT NULL, \
1353             field TEXT NOT NULL, \
1354             embedding_model TEXT NOT NULL, \
1355             embedding float[{dim}] distance_metric=cosine\
1356             );",
1357            table = table,
1358            dim = dim,
1359        ));
1360
1361        // Step 5: restore rows.
1362        sql.push_str(&format!(
1363            "INSERT INTO {table} (subject_id, namespace, kind, field, embedding_model, embedding) \
1364             SELECT subject_id, namespace, kind, field, embedding_model, embedding \
1365             FROM {tmp};",
1366            table = table,
1367            tmp = tmp,
1368        ));
1369
1370        // Step 6: drop staging table.
1371        sql.push_str(&format!("DROP TABLE {tmp};", tmp = tmp));
1372    }
1373
1374    if sql.is_empty() {
1375        sql.push_str("SELECT 1;");
1376    }
1377
1378    Ok(sql)
1379}
1380
1381/// A record from the `_embedding_models` registry table.
1382#[derive(Clone, Debug)]
1383pub struct EmbeddingModelRegistryRecord {
1384    /// Vector engine name (e.g. `"paraphrase"`).
1385    pub engine_name: String,
1386    /// Model identifier (e.g. `"all-minilm-l6-v2"`).
1387    pub model_id: String,
1388    /// Canonical deduplication key combining engine and model.
1389    pub key_version: String,
1390    /// Embedding dimensionality.
1391    pub dimensions: u32,
1392    /// Lifecycle status (`"active"` or `"superseded"`).
1393    pub status: String,
1394    /// Epoch timestamp when the model was activated.
1395    pub activated_at: Option<i64>,
1396    /// Epoch timestamp when the model was superseded.
1397    pub superseded_at: Option<i64>,
1398}
1399
1400/// Query the `_embedding_models` registry.
1401///
1402/// Opens the database at `db` (defaults to `~/.khive/khive-graph.db`) and
1403/// returns all registry rows, optionally filtered by `engine_name`.
1404/// Returns an empty vec if the database or table does not exist.
1405pub fn query_embedding_models(
1406    db: Option<&std::path::Path>,
1407    engine_filter: Option<&str>,
1408) -> Result<Vec<EmbeddingModelRegistryRecord>, SqliteError> {
1409    let path = db.map(std::path::Path::to_path_buf).unwrap_or_else(|| {
1410        std::env::var("HOME")
1411            .map(std::path::PathBuf::from)
1412            .unwrap_or_else(|_| std::path::PathBuf::from("."))
1413            .join(".khive/khive-graph.db")
1414    });
1415    if !path.exists() {
1416        return Ok(Vec::new());
1417    }
1418
1419    let conn = Connection::open(path)?;
1420    let exists: bool = conn.query_row(
1421        "SELECT COUNT(*) > 0 FROM sqlite_master \
1422         WHERE type='table' AND name='_embedding_models'",
1423        [],
1424        |row| row.get(0),
1425    )?;
1426    if !exists {
1427        return Ok(Vec::new());
1428    }
1429
1430    let sql = if engine_filter.is_some() {
1431        "SELECT engine_name, model_id, key_version, dim, status, activated_at, superseded_at \
1432         FROM _embedding_models WHERE engine_name = ?1 \
1433         ORDER BY engine_name, activated_at IS NULL, activated_at"
1434    } else {
1435        "SELECT engine_name, model_id, key_version, dim, status, activated_at, superseded_at \
1436         FROM _embedding_models \
1437         ORDER BY engine_name, activated_at IS NULL, activated_at"
1438    };
1439    let mut stmt = conn.prepare(sql)?;
1440    let map_row = |row: &rusqlite::Row<'_>| {
1441        Ok(EmbeddingModelRegistryRecord {
1442            engine_name: row.get(0)?,
1443            model_id: row.get(1)?,
1444            key_version: row.get(2)?,
1445            dimensions: row.get::<_, i64>(3)? as u32,
1446            status: row.get(4)?,
1447            activated_at: row.get(5)?,
1448            superseded_at: row.get(6)?,
1449        })
1450    };
1451
1452    if let Some(engine) = engine_filter {
1453        stmt.query_map([engine], map_row)?
1454            .collect::<Result<Vec<_>, _>>()
1455            .map_err(Into::into)
1456    } else {
1457        stmt.query_map([], map_row)?
1458            .collect::<Result<Vec<_>, _>>()
1459            .map_err(Into::into)
1460    }
1461}
1462
1463/// Build the V18 migration SQL: recreate `proposals_open` adding `'applying'` to the
1464/// status CHECK constraint (apply/withdraw race fix).
1465///
1466/// SQLite does not support `ALTER TABLE … ALTER COLUMN`, so we rename the old table,
1467/// create a new one with the extended CHECK, copy all rows, then drop the old table.
1468/// The three indexes are also recreated.  If `proposals_open` does not yet exist
1469/// (fresh DB where V15 migration hasn't run yet) this returns `SELECT 1;` — a no-op
1470/// that lets V18 be recorded without error; V15 will create the correct schema.
1471pub(crate) fn build_v18_proposals_applying_sql(
1472    conn: &Connection,
1473) -> Result<String, rusqlite::Error> {
1474    let table_exists: bool = conn.query_row(
1475        "SELECT COUNT(*) > 0 FROM sqlite_master WHERE type='table' AND name='proposals_open'",
1476        [],
1477        |row| row.get(0),
1478    )?;
1479
1480    if !table_exists {
1481        return Ok("SELECT 1;".to_string());
1482    }
1483
1484    // Check whether 'applying' is already in the CHECK (idempotency guard).
1485    // We inspect the stored CREATE TABLE DDL.
1486    let ddl: String = conn.query_row(
1487        "SELECT sql FROM sqlite_master WHERE type='table' AND name='proposals_open'",
1488        [],
1489        |row| row.get(0),
1490    )?;
1491    if ddl.contains("'applying'") {
1492        return Ok("SELECT 1;".to_string());
1493    }
1494
1495    // `run_migrations` already wraps each migration in `conn.transaction()`.
1496    // Do NOT include BEGIN/COMMIT here — they would create a nested transaction.
1497    // PRAGMA foreign_keys cannot be changed inside a transaction in SQLite, but
1498    // the rename+recreate pattern works without it since we are not altering FK
1499    // references that point to proposals_open from other tables.
1500    Ok("\
1501        ALTER TABLE proposals_open RENAME TO proposals_open_v15;\
1502        CREATE TABLE proposals_open (\
1503            proposal_id    TEXT PRIMARY KEY,\
1504            namespace      TEXT NOT NULL,\
1505            proposer       TEXT NOT NULL,\
1506            title          TEXT NOT NULL,\
1507            status         TEXT NOT NULL CHECK (status IN ('open', 'changes_requested', 'approved', 'applying', 'rejected', 'applied', 'withdrawn')),\
1508            created_at     INTEGER NOT NULL,\
1509            updated_at     INTEGER NOT NULL,\
1510            expiry         INTEGER,\
1511            last_decision  TEXT,\
1512            review_count   INTEGER NOT NULL DEFAULT 0,\
1513            approve_count  INTEGER NOT NULL DEFAULT 0,\
1514            reject_count   INTEGER NOT NULL DEFAULT 0\
1515        );\
1516        INSERT INTO proposals_open \
1517            SELECT proposal_id, namespace, proposer, title, status, created_at, updated_at, \
1518                   expiry, last_decision, review_count, approve_count, reject_count \
1519            FROM proposals_open_v15;\
1520        DROP TABLE proposals_open_v15;\
1521        CREATE INDEX IF NOT EXISTS idx_proposals_open_ns_status ON proposals_open(namespace, status);\
1522        CREATE INDEX IF NOT EXISTS idx_proposals_open_proposer ON proposals_open(namespace, proposer);\
1523        CREATE INDEX IF NOT EXISTS idx_proposals_open_updated ON proposals_open(namespace, updated_at DESC);\
1524    "
1525    .to_string())
1526}
1527
1528// =============================================================================
1529// Tests
1530// =============================================================================
1531
1532#[cfg(test)]
1533#[path = "migrations_tests.rs"]
1534mod tests;
khive_db/migrations.rs

khive_db/
migrations.rs