Skip to main content

brainos_storage/sqlite/
migrations.rs

1use tracing::info;
2
3use super::{SqliteError, SqlitePool};
4
5impl SqlitePool {
6    /// Run all schema migrations.
7    pub(crate) fn migrate(&self) -> Result<(), SqliteError> {
8        self.with_conn(|conn| {
9            conn.execute_batch(
10                "CREATE TABLE IF NOT EXISTS _migrations (
11                    version INTEGER PRIMARY KEY,
12                    name TEXT NOT NULL,
13                    applied_at TEXT NOT NULL DEFAULT (datetime('now'))
14                );",
15            )?;
16
17            let current_version: i64 = conn
18                .query_row(
19                    "SELECT COALESCE(MAX(version), 0) FROM _migrations",
20                    [],
21                    |row| row.get(0),
22                )
23                .unwrap_or(0);
24
25            let migrations = Self::migrations();
26
27            for (version, name, sql) in &migrations {
28                if *version > current_version {
29                    info!("Running migration {version}: {name}");
30                    conn.execute_batch(sql).map_err(|e| {
31                        SqliteError::Migration(format!("Migration {version} ({name}) failed: {e}"))
32                    })?;
33
34                    conn.execute(
35                        "INSERT INTO _migrations (version, name) VALUES (?1, ?2)",
36                        rusqlite::params![version, name],
37                    )?;
38                }
39            }
40
41            if current_version < migrations.last().map_or(0, |m| m.0) {
42                info!(
43                    "Migrations complete (v{current_version} → v{})",
44                    migrations.last().expect("BUG: migrations list is empty").0
45                );
46            }
47
48            Ok(())
49        })
50    }
51
52    /// All schema migrations in order.
53    fn migrations() -> Vec<(i64, &'static str, &'static str)> {
54        vec![
55            (
56                1,
57                "create_sessions",
58                "
59                CREATE TABLE IF NOT EXISTS sessions (
60                    id TEXT PRIMARY KEY,
61                    started_at TEXT NOT NULL DEFAULT (datetime('now')),
62                    ended_at TEXT,
63                    channel TEXT NOT NULL DEFAULT 'cli',
64                    metadata TEXT
65                );
66            ",
67            ),
68            (
69                2,
70                "create_episodes",
71                "
72                CREATE TABLE IF NOT EXISTS episodes (
73                    id TEXT PRIMARY KEY,
74                    session_id TEXT NOT NULL REFERENCES sessions(id),
75                    role TEXT NOT NULL CHECK(role IN ('user', 'assistant', 'system')),
76                    content TEXT NOT NULL,
77                    timestamp TEXT NOT NULL DEFAULT (datetime('now')),
78                    importance REAL NOT NULL DEFAULT 0.5,
79                    decay_rate REAL NOT NULL DEFAULT 0.1,
80                    reinforcement_count INTEGER NOT NULL DEFAULT 0,
81                    last_accessed TEXT,
82                    metadata TEXT
83                );
84
85                CREATE INDEX IF NOT EXISTS idx_episodes_session ON episodes(session_id);
86                CREATE INDEX IF NOT EXISTS idx_episodes_timestamp ON episodes(timestamp DESC);
87                CREATE INDEX IF NOT EXISTS idx_episodes_importance ON episodes(importance DESC);
88            ",
89            ),
90            (
91                3,
92                "create_episodes_fts",
93                "
94                CREATE VIRTUAL TABLE IF NOT EXISTS episodes_fts USING fts5(
95                    content,
96                    content_rowid='rowid',
97                    tokenize='porter unicode61'
98                );
99            ",
100            ),
101            (
102                4,
103                "create_semantic_facts",
104                "
105                CREATE TABLE IF NOT EXISTS semantic_facts (
106                    id TEXT PRIMARY KEY,
107                    category TEXT NOT NULL,
108                    subject TEXT NOT NULL,
109                    predicate TEXT NOT NULL,
110                    object TEXT NOT NULL,
111                    confidence REAL NOT NULL DEFAULT 1.0,
112                    source_episode_id TEXT REFERENCES episodes(id) ON DELETE SET NULL,
113                    created_at TEXT NOT NULL DEFAULT (datetime('now')),
114                    updated_at TEXT NOT NULL DEFAULT (datetime('now')),
115                    superseded_by TEXT REFERENCES semantic_facts(id) ON DELETE SET NULL
116                );
117
118                CREATE INDEX IF NOT EXISTS idx_facts_category ON semantic_facts(category);
119                CREATE INDEX IF NOT EXISTS idx_facts_subject ON semantic_facts(subject);
120            ",
121            ),
122            (
123                5,
124                "create_user_profile",
125                "
126                CREATE TABLE IF NOT EXISTS user_profile (
127                    key TEXT PRIMARY KEY,
128                    value TEXT NOT NULL,
129                    source TEXT,
130                    updated_at TEXT NOT NULL DEFAULT (datetime('now'))
131                );
132            ",
133            ),
134            (
135                6,
136                "create_procedures",
137                "
138                CREATE TABLE IF NOT EXISTS procedures (
139                    id              TEXT PRIMARY KEY,
140                    trigger_pattern TEXT NOT NULL,
141                    steps_json      TEXT NOT NULL DEFAULT '[]',
142                    created_at      TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now')),
143                    updated_at      TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now')),
144                    use_count       INTEGER NOT NULL DEFAULT 0
145                );
146                CREATE INDEX IF NOT EXISTS idx_procedures_trigger
147                    ON procedures(trigger_pattern);
148            ",
149            ),
150            (
151                7,
152                "create_audit_log",
153                "
154                CREATE TABLE IF NOT EXISTS audit_log (
155                    id INTEGER PRIMARY KEY AUTOINCREMENT,
156                    timestamp TEXT NOT NULL DEFAULT (datetime('now')),
157                    action TEXT NOT NULL,
158                    details TEXT,
159                    prev_hash TEXT,
160                    hash TEXT NOT NULL
161                );
162
163                CREATE INDEX IF NOT EXISTS idx_audit_timestamp ON audit_log(timestamp DESC);
164            ",
165            ),
166            (
167                10,
168                "add_namespace_to_semantic_facts",
169                "
170                ALTER TABLE semantic_facts ADD COLUMN namespace TEXT NOT NULL DEFAULT 'personal';
171                CREATE INDEX IF NOT EXISTS idx_facts_namespace ON semantic_facts(namespace);
172            ",
173            ),
174            (
175                11,
176                "add_namespace_to_episodes",
177                "
178                ALTER TABLE episodes ADD COLUMN namespace TEXT NOT NULL DEFAULT 'personal';
179                CREATE INDEX IF NOT EXISTS idx_episodes_namespace ON episodes(namespace);
180            ",
181            ),
182            (
183                12,
184                "rebuild_procedures_table",
185                "
186                DROP TABLE IF EXISTS procedures;
187                CREATE TABLE IF NOT EXISTS procedures (
188                    id              TEXT PRIMARY KEY,
189                    trigger_pattern TEXT NOT NULL,
190                    steps_json      TEXT NOT NULL DEFAULT '[]',
191                    created_at      TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now')),
192                    updated_at      TEXT NOT NULL DEFAULT (strftime('%Y-%m-%dT%H:%M:%SZ', 'now')),
193                    use_count       INTEGER NOT NULL DEFAULT 0
194                );
195                CREATE INDEX IF NOT EXISTS idx_procedures_trigger
196                    ON procedures(trigger_pattern);
197            ",
198            ),
199            (
200                13,
201                "create_episode_promotions",
202                "
203                CREATE TABLE IF NOT EXISTS episode_promotions (
204                    episode_id TEXT PRIMARY KEY REFERENCES episodes(id) ON DELETE CASCADE,
205                    fact_id TEXT NOT NULL REFERENCES semantic_facts(id) ON DELETE CASCADE,
206                    promoted_at TEXT NOT NULL DEFAULT (datetime('now'))
207                );
208            ",
209            ),
210            (
211                14,
212                "create_scheduled_intents",
213                "
214                CREATE TABLE IF NOT EXISTS scheduled_intents (
215                    id TEXT PRIMARY KEY,
216                    description TEXT NOT NULL,
217                    cron TEXT,
218                    namespace TEXT NOT NULL DEFAULT 'personal',
219                    created_at TEXT NOT NULL DEFAULT (datetime('now')),
220                    status TEXT NOT NULL DEFAULT 'scheduled',
221                    metadata TEXT
222                );
223                CREATE INDEX IF NOT EXISTS idx_scheduled_intents_namespace
224                    ON scheduled_intents(namespace);
225                CREATE INDEX IF NOT EXISTS idx_scheduled_intents_status
226                    ON scheduled_intents(status);
227            ",
228            ),
229            (
230                15,
231                "create_notification_outbox",
232                "
233                CREATE TABLE IF NOT EXISTS notification_outbox (
234                    id TEXT PRIMARY KEY,
235                    content TEXT NOT NULL,
236                    priority INTEGER NOT NULL DEFAULT 1,
237                    triggered_by TEXT NOT NULL DEFAULT '',
238                    created_at TEXT NOT NULL DEFAULT (datetime('now')),
239                    delivered_at TEXT,
240                    channel TEXT
241                );
242                CREATE INDEX IF NOT EXISTS idx_outbox_pending
243                    ON notification_outbox(delivered_at, priority, created_at)
244                    WHERE delivered_at IS NULL;
245            ",
246            ),
247            (
248                16,
249                "add_agent_column",
250                "
251                ALTER TABLE episodes ADD COLUMN agent TEXT;
252                ALTER TABLE semantic_facts ADD COLUMN agent TEXT;
253            ",
254            ),
255            (
256                17,
257                "fix_orphaned_facts",
258                "
259                -- Clear orphaned source_episode_id references
260                UPDATE semantic_facts SET source_episode_id = NULL
261                WHERE source_episode_id NOT IN (SELECT id FROM episodes);
262                -- Clear orphaned superseded_by references
263                UPDATE semantic_facts SET superseded_by = NULL
264                WHERE superseded_by NOT IN (SELECT id FROM semantic_facts);
265            ",
266            ),
267            (
268                18,
269                "add_performance_indexes",
270                "
271                -- Composite index for open-loop and habit detection
272                -- (filters by role = 'user' AND timestamp >= ?)
273                CREATE INDEX IF NOT EXISTS idx_episodes_role_timestamp
274                    ON episodes(role, timestamp);
275
276                -- Partial index for active (non-superseded) facts
277                -- (count, list_all, list_by_namespace all filter superseded_by IS NULL)
278                CREATE INDEX IF NOT EXISTS idx_facts_active
279                    ON semantic_facts(superseded_by)
280                    WHERE superseded_by IS NULL;
281            ",
282            ),
283            (
284                19,
285                "create_dlq_entries",
286                "
287                CREATE TABLE IF NOT EXISTS dlq_entries (
288                    id TEXT PRIMARY KEY,
289                    tool_id TEXT NOT NULL,
290                    request_json TEXT NOT NULL,
291                    error_message TEXT NOT NULL,
292                    attempts INTEGER NOT NULL,
293                    dlq_at TEXT NOT NULL DEFAULT (datetime('now'))
294                );
295                CREATE INDEX IF NOT EXISTS idx_dlq_entries_tool
296                    ON dlq_entries(tool_id, dlq_at DESC);
297                CREATE INDEX IF NOT EXISTS idx_dlq_entries_recent
298                    ON dlq_entries(dlq_at DESC);
299            ",
300            ),
301            (
302                20,
303                "create_graph_nodes_edges",
304                "
305                -- Hippocampus graph memory. Nodes are typed entries in
306                -- the episodic graph; edges link them with a typed
307                -- relationship and a weight (drives both
308                -- retrieval ranking and the compactor's half-life decay).
309                -- Coexists with the legacy `episodes` / `semantic_facts`
310                -- tables during v1.0; v1.1 deprecates the legacy store.
311                CREATE TABLE IF NOT EXISTS nodes (
312                    id TEXT PRIMARY KEY,
313                    session_id TEXT REFERENCES sessions(id),
314                    namespace TEXT NOT NULL DEFAULT 'personal',
315                    node_kind TEXT NOT NULL,
316                    body_json TEXT NOT NULL,
317                    vector_id TEXT,
318                    weight REAL NOT NULL DEFAULT 1.0,
319                    created_at TEXT NOT NULL DEFAULT (datetime('now'))
320                );
321                -- Primary read path: scoped by (session, namespace, kind).
322                CREATE INDEX IF NOT EXISTS idx_nodes_session_ns_kind
323                    ON nodes(session_id, namespace, node_kind);
324                CREATE INDEX IF NOT EXISTS idx_nodes_namespace_kind
325                    ON nodes(namespace, node_kind);
326                CREATE INDEX IF NOT EXISTS idx_nodes_created
327                    ON nodes(created_at DESC);
328
329                CREATE TABLE IF NOT EXISTS edges (
330                    src_id TEXT NOT NULL REFERENCES nodes(id) ON DELETE CASCADE,
331                    dst_id TEXT NOT NULL REFERENCES nodes(id) ON DELETE CASCADE,
332                    edge_kind TEXT NOT NULL,
333                    weight REAL NOT NULL DEFAULT 1.0,
334                    created_at TEXT NOT NULL DEFAULT (datetime('now')),
335                    PRIMARY KEY (src_id, dst_id, edge_kind)
336                );
337                -- Traversals start from either endpoint.
338                CREATE INDEX IF NOT EXISTS idx_edges_src ON edges(src_id, edge_kind);
339                CREATE INDEX IF NOT EXISTS idx_edges_dst ON edges(dst_id, edge_kind);
340            ",
341            ),
342            (
343                21,
344                "create_standing_approvals",
345                "
346                -- Standing approvals. A row authorizes a
347                -- specific (agent_id, verb_ns, verb_action) triple to
348                -- auto-approve through the ConfirmationEngine without
349                -- prompting the user. A re-grant after revoke creates
350                -- a new row rather than mutating the old one — keeps
351                -- the audit trail intact. The partial index on
352                -- (agent_id, verb_ns, verb_action) WHERE revoked_at
353                -- IS NULL is the hot lookup path for `is_granted`.
354                CREATE TABLE IF NOT EXISTS standing_approvals (
355                    id TEXT PRIMARY KEY,
356                    agent_id TEXT NOT NULL,
357                    verb_ns TEXT NOT NULL,
358                    verb_action TEXT NOT NULL,
359                    granted_at TEXT NOT NULL DEFAULT (datetime('now')),
360                    revoked_at TEXT,
361                    note TEXT
362                );
363                CREATE INDEX IF NOT EXISTS idx_standing_approvals_lookup
364                    ON standing_approvals(agent_id, verb_ns, verb_action)
365                    WHERE revoked_at IS NULL;
366                CREATE INDEX IF NOT EXISTS idx_standing_approvals_recent
367                    ON standing_approvals(granted_at DESC);
368            ",
369            ),
370            (
371                22,
372                "create_task_states",
373                "
374                -- Orchestrator state-machine history. One row
375                -- per phase transition; the AUTOINCREMENT id doubles as
376                -- a monotonic sequence so a task that re-enters a state
377                -- (e.g. Executing after a replan) leaves a faithful
378                -- audit trail. Replay = `ORDER BY id ASC WHERE task_id
379                -- = ?`. Indexed on task_id so per-task lookups stay
380                -- cheap as the table grows across many tasks.
381                CREATE TABLE IF NOT EXISTS task_states (
382                    id INTEGER PRIMARY KEY AUTOINCREMENT,
383                    task_id TEXT NOT NULL,
384                    state TEXT NOT NULL,
385                    entered_at TEXT NOT NULL DEFAULT (datetime('now'))
386                );
387                CREATE INDEX IF NOT EXISTS idx_task_states_task
388                    ON task_states(task_id, id);
389            ",
390            ),
391            (
392                23,
393                "create_nodes_fts",
394                "
395                -- Full-text index over graph node bodies so the episodic
396                -- graph contributes a BM25 candidate list to recall (it was
397                -- write-only w.r.t. retrieval before). Regular FTS5 index
398                -- (stores its own content so hits are retrievable) mirroring
399                -- `episodes_fts` (v3); the `text` column carries each node's
400                -- raw `body_json`, which the porter tokenizer indexes
401                -- term-wise (verbs, program names, args all searchable).
402                CREATE VIRTUAL TABLE IF NOT EXISTS nodes_fts USING fts5(
403                    text,
404                    tokenize='porter unicode61'
405                );
406
407                -- Keep the index in sync via triggers so every writer
408                -- (add_node, delete_node, the compactor) stays covered
409                -- without touching Rust write paths. `nodes` has an implicit
410                -- integer rowid we mirror as the FTS rowid.
411                CREATE TRIGGER IF NOT EXISTS nodes_ai AFTER INSERT ON nodes BEGIN
412                    INSERT INTO nodes_fts(rowid, text) VALUES (new.rowid, new.body_json);
413                END;
414                CREATE TRIGGER IF NOT EXISTS nodes_ad AFTER DELETE ON nodes BEGIN
415                    DELETE FROM nodes_fts WHERE rowid = old.rowid;
416                END;
417                CREATE TRIGGER IF NOT EXISTS nodes_au AFTER UPDATE OF body_json ON nodes BEGIN
418                    DELETE FROM nodes_fts WHERE rowid = old.rowid;
419                    INSERT INTO nodes_fts(rowid, text) VALUES (new.rowid, new.body_json);
420                END;
421
422                -- Backfill any nodes written before this migration.
423                INSERT INTO nodes_fts(rowid, text)
424                    SELECT rowid, body_json FROM nodes;
425            ",
426            ),
427            (
428                24,
429                "create_capability_fitness",
430                "
431                -- Learned capability self-model: per-tool success/failure
432                -- mass the kernel reinforces after each dispatch and decays
433                -- under the forgetting curve (lazy, computed on read/write).
434                -- One row per tool_id (`mcp:{server}:{tool}` or
435                -- `native:{ns}.{action}`, mirroring ToolDescriptor.tool_id),
436                -- so it joins directly against the live capability manifest.
437                -- `*_mass` are decayed reinforcement counts (not raw tallies);
438                -- `uses` is the undecayed lifetime invocation count.
439                CREATE TABLE IF NOT EXISTS capability_fitness (
440                    tool_id      TEXT PRIMARY KEY,
441                    success_mass REAL    NOT NULL DEFAULT 0,
442                    failure_mass REAL    NOT NULL DEFAULT 0,
443                    uses         INTEGER NOT NULL DEFAULT 0,
444                    last_used_at TEXT    NOT NULL DEFAULT (datetime('now'))
445                );
446            ",
447            ),
448        ]
449    }
450
451    /// The highest schema version this build knows how to apply — the
452    /// version a freshly migrated database lands on. Used by the open-time
453    /// reconciliation gate to detect a future (downgrade) schema and by
454    /// `doctor` to report binary-vs-disk skew.
455    pub fn latest_schema_version() -> i64 {
456        Self::migrations()
457            .last()
458            .map_or(0, |(version, _, _)| *version)
459    }
460
461    /// Get the current schema version.
462    pub fn schema_version(&self) -> Result<i64, SqliteError> {
463        self.with_conn(|conn| {
464            let version: i64 = conn
465                .query_row(
466                    "SELECT COALESCE(MAX(version), 0) FROM _migrations",
467                    [],
468                    |row| row.get(0),
469                )
470                .unwrap_or(0);
471            Ok(version)
472        })
473    }
474}