use anyhow::{Context, Result};
use rusqlite::{params, Connection, OptionalExtension};
struct Migration {
version: i32,
name: &'static str,
up: &'static str,
#[allow(dead_code)]
down: &'static str, }
pub struct MigrationRunner<'a> {
conn: &'a mut Connection,
}
impl<'a> MigrationRunner<'a> {
pub fn new(conn: &'a mut Connection) -> Self {
Self { conn }
}
pub fn current_version(&mut self) -> Result<i32> {
self.ensure_migration_table()?;
let version: Option<i32> = self
.conn
.query_row("SELECT MAX(version) FROM schema_migrations", [], |row| {
row.get(0)
})
.optional()
.context("Failed to query current schema version")?
.flatten();
Ok(version.unwrap_or(0))
}
pub fn needs_migration(&mut self) -> Result<bool> {
let current = self.current_version()?;
let latest = Self::migrations()
.iter()
.map(|m| m.version)
.max()
.unwrap_or(0);
Ok(current < latest)
}
pub fn migrate(&mut self) -> Result<()> {
self.ensure_migration_table()?;
let current_version = self.current_version()?;
let migrations = Self::migrations();
let pending: Vec<&Migration> = migrations
.iter()
.filter(|m| m.version > current_version)
.collect();
if pending.is_empty() {
return Ok(());
}
for migration in pending {
self.apply_migration(migration).with_context(|| {
format!(
"Failed to apply migration {}: {}",
migration.version, migration.name
)
})?;
}
Ok(())
}
fn ensure_migration_table(&mut self) -> Result<()> {
self.conn
.execute(
"CREATE TABLE IF NOT EXISTS schema_migrations (
version INTEGER PRIMARY KEY,
name TEXT NOT NULL,
applied_at TEXT NOT NULL DEFAULT (datetime('now'))
)",
[],
)
.context("Failed to create schema_migrations table")?;
Ok(())
}
fn apply_migration(&mut self, migration: &Migration) -> Result<()> {
let exists: bool = self
.conn
.query_row(
"SELECT 1 FROM schema_migrations WHERE version = ?1",
params![migration.version],
|_| Ok(true),
)
.unwrap_or(false);
if exists {
return Ok(());
}
let tx = self
.conn
.transaction()
.context("Failed to begin transaction for migration")?;
tx.execute_batch(migration.up).with_context(|| {
format!(
"Failed to execute migration SQL for version {}",
migration.version
)
})?;
tx.execute(
"INSERT INTO schema_migrations (version, name) VALUES (?1, ?2)",
params![migration.version, migration.name],
)
.context("Failed to record migration in schema_migrations table")?;
tx.commit()
.context("Failed to commit migration transaction")?;
Ok(())
}
fn migrations() -> Vec<Migration> {
vec![
Migration {
version: 1,
name: "initial_schema",
up: r#"
-- Create Repositories table
CREATE TABLE IF NOT EXISTS repos (
id INTEGER PRIMARY KEY AUTOINCREMENT,
name TEXT NOT NULL UNIQUE,
root_path TEXT NOT NULL
);
-- Create Worktrees table
CREATE TABLE IF NOT EXISTS worktrees (
id INTEGER PRIMARY KEY AUTOINCREMENT,
repo_id INTEGER NOT NULL REFERENCES repos(id) ON DELETE CASCADE,
name TEXT NOT NULL,
abs_path TEXT NOT NULL,
UNIQUE(repo_id, name)
);
-- Create Commits table
CREATE TABLE IF NOT EXISTS commits (
id INTEGER PRIMARY KEY AUTOINCREMENT,
repo_id INTEGER NOT NULL REFERENCES repos(id) ON DELETE CASCADE,
sha TEXT NOT NULL,
committed_at DATETIME,
UNIQUE(repo_id, sha)
);
-- Create Files table
CREATE TABLE IF NOT EXISTS files (
id INTEGER PRIMARY KEY AUTOINCREMENT,
repo_id INTEGER NOT NULL REFERENCES repos(id) ON DELETE CASCADE,
worktree_id INTEGER NOT NULL REFERENCES worktrees(id) ON DELETE CASCADE,
commit_id INTEGER NOT NULL REFERENCES commits(id) ON DELETE CASCADE,
relpath TEXT NOT NULL,
language TEXT,
content_hash TEXT NOT NULL,
size_bytes INTEGER NOT NULL,
last_modified DATETIME,
UNIQUE(commit_id, relpath, content_hash)
);
-- Create Chunks table
CREATE TABLE IF NOT EXISTS chunks (
id INTEGER PRIMARY KEY AUTOINCREMENT,
file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
blob_sha TEXT NOT NULL,
symbol_name TEXT,
kind TEXT NOT NULL,
signature TEXT,
docstring TEXT,
start_line INTEGER NOT NULL,
end_line INTEGER NOT NULL,
preview TEXT NOT NULL,
ts_doc_text TEXT,
recency_score REAL NOT NULL,
churn_score REAL NOT NULL,
metadata JSON,
worktree_ids JSON NOT NULL,
UNIQUE(file_id, start_line, end_line)
);
-- Create Chunk Edges table
CREATE TABLE IF NOT EXISTS chunk_edges (
id INTEGER PRIMARY KEY AUTOINCREMENT,
src_chunk_id INTEGER NOT NULL REFERENCES chunks(id) ON DELETE CASCADE,
dst_chunk_id INTEGER NOT NULL REFERENCES chunks(id) ON DELETE CASCADE,
type TEXT NOT NULL,
UNIQUE(src_chunk_id, dst_chunk_id, type)
);
-- Create Vector Table using vec0
CREATE VIRTUAL TABLE IF NOT EXISTS vec_chunks USING vec0(
chunk_id INTEGER PRIMARY KEY,
code_embedding float[1536],
text_embedding float[1536]
);
-- Create FTS5 Table for code search
CREATE VIRTUAL TABLE IF NOT EXISTS fts_chunks USING fts5(
content,
docstring,
symbol_name,
content='chunks',
content_rowid='id'
);
"#,
down: r#"
-- Rollback: drop all tables in reverse order
DROP TABLE IF EXISTS fts_chunks;
DROP TABLE IF EXISTS vec_chunks;
DROP TABLE IF EXISTS chunk_edges;
DROP TABLE IF EXISTS chunks;
DROP TABLE IF EXISTS files;
DROP TABLE IF EXISTS commits;
DROP TABLE IF EXISTS worktrees;
DROP TABLE IF EXISTS repos;
"#,
},
Migration {
version: 2,
name: "add_chunk_worktrees",
up: r#"
-- Create junction table for chunk-worktree many-to-many relationship
CREATE TABLE chunk_worktrees (
chunk_id INTEGER NOT NULL REFERENCES chunks(id) ON DELETE CASCADE,
worktree_id INTEGER NOT NULL REFERENCES worktrees(id) ON DELETE CASCADE,
PRIMARY KEY (chunk_id, worktree_id)
);
-- Create index for queries filtering by worktree
CREATE INDEX idx_chunk_worktrees_worktree ON chunk_worktrees(worktree_id);
"#,
down: r#"
-- Rollback: drop the junction table and its index
DROP INDEX IF EXISTS idx_chunk_worktrees_worktree;
DROP TABLE IF EXISTS chunk_worktrees;
"#,
},
Migration {
version: 3,
name: "add_code_embeddings",
up: r#"
-- Create deduplicated embedding storage table
CREATE TABLE code_embeddings (
id INTEGER PRIMARY KEY AUTOINCREMENT,
blob_sha TEXT NOT NULL UNIQUE,
embedding BLOB,
embedding_dim INTEGER NOT NULL DEFAULT 1536,
model_version TEXT NOT NULL DEFAULT 'text-embedding-3-small',
created_at TEXT NOT NULL DEFAULT (datetime('now'))
);
-- Create index for blob_sha lookups
CREATE INDEX idx_embeddings_blob ON code_embeddings(blob_sha);
"#,
down: r#"
-- Rollback: drop the code_embeddings table and its index
DROP INDEX IF EXISTS idx_embeddings_blob;
DROP TABLE IF EXISTS code_embeddings;
"#,
},
Migration {
version: 4,
name: "add_vec_code",
up: r#"
-- Create vector index table using sqlite-vec
-- Note: requires sqlite-vec extension to be loaded
-- If extension is not available, migration will fail with "no such module: vec0"
CREATE VIRTUAL TABLE vec_code USING vec0(
embedding float[1536]
);
"#,
down: r#"
-- Rollback: drop the virtual table
DROP TABLE IF EXISTS vec_code;
"#,
},
Migration {
version: 5,
name: "drop_worktree_ids",
up: r#"
-- Drop the deprecated worktree_ids JSON column
-- Requires SQLite 3.35.0+ for ALTER TABLE DROP COLUMN
ALTER TABLE chunks DROP COLUMN worktree_ids;
"#,
down: r#"
-- Rollback: add the worktree_ids column back
-- Note: data will be lost, this is a best-effort rollback
ALTER TABLE chunks ADD COLUMN worktree_ids JSON NOT NULL DEFAULT '[]';
"#,
},
Migration {
version: 6,
name: "drop_vec_chunks",
up: r#"
-- Drop the deprecated vec_chunks virtual table
-- Uses IF EXISTS for safety on fresh databases
DROP TABLE IF EXISTS vec_chunks;
"#,
down: r#"
-- Rollback: recreate vec_chunks table
-- Note: data will be lost, this is a best-effort rollback
CREATE VIRTUAL TABLE IF NOT EXISTS vec_chunks USING vec0(
chunk_id INTEGER PRIMARY KEY,
code_embedding float[1536],
text_embedding float[1536]
);
"#,
},
Migration {
version: 7,
name: "add_vec_code_768",
up: r#"
-- Create vector index table for 768-dimensional embeddings
-- Supports Ollama nomic-embed-text (768-dim) alongside existing 1536-dim
CREATE VIRTUAL TABLE vec_code_768 USING vec0(
embedding float[768]
);
"#,
down: r#"
-- Rollback: drop the 768-dim virtual table
DROP TABLE IF EXISTS vec_code_768;
"#,
},
Migration {
version: 8,
name: "add_index_state",
up: r#"
-- Create index state table for tracking indexing progress
-- Matches PostgreSQL worktree_index_state table columns
CREATE TABLE IF NOT EXISTS index_state (
id INTEGER PRIMARY KEY AUTOINCREMENT,
worktree_id INTEGER NOT NULL UNIQUE REFERENCES worktrees(id) ON DELETE CASCADE,
tree_sha TEXT NOT NULL,
chunks_processed INTEGER NOT NULL DEFAULT 0,
embeddings_generated INTEGER NOT NULL DEFAULT 0,
last_indexed TEXT NOT NULL DEFAULT (datetime('now'))
);
-- Create index for worktree_id lookups
CREATE INDEX IF NOT EXISTS idx_index_state_worktree ON index_state(worktree_id);
"#,
down: r#"
-- Rollback: drop the index_state table and index
DROP INDEX IF EXISTS idx_index_state_worktree;
DROP TABLE IF EXISTS index_state;
"#,
},
Migration {
version: 9,
name: "add_context_cache",
up: r#"
-- Create context cache table for storing assembled context bundles
-- Supports TTL and LRU eviction strategies
CREATE TABLE IF NOT EXISTS context_cache (
cache_key TEXT PRIMARY KEY,
bundle_json TEXT NOT NULL,
created_at TEXT NOT NULL DEFAULT (datetime('now')),
expires_at TEXT NOT NULL,
accessed_at TEXT NOT NULL DEFAULT (datetime('now'))
);
-- Create index for expiration queries
CREATE INDEX IF NOT EXISTS idx_context_cache_expires ON context_cache(expires_at);
-- Create index for LRU eviction queries
CREATE INDEX IF NOT EXISTS idx_context_cache_accessed ON context_cache(accessed_at);
"#,
down: r#"
-- Rollback: drop the context_cache table and indexes
DROP INDEX IF EXISTS idx_context_cache_accessed;
DROP INDEX IF EXISTS idx_context_cache_expires;
DROP TABLE IF EXISTS context_cache;
"#,
},
Migration {
version: 10,
name: "add_vec_code_1024",
up: r#"
-- Create vector index table for 1024-dimensional embeddings
-- Supports mxbai-embed-large (1024-dim) alongside existing 768-dim and 1536-dim
CREATE VIRTUAL TABLE vec_code_1024 USING vec0(
embedding float[1024]
);
"#,
down: r#"
-- Rollback: drop the 1024-dim virtual table
DROP TABLE IF EXISTS vec_code_1024;
"#,
},
Migration {
version: 11,
name: "add_encoding_runs",
up: r#"
-- Create encoding runs table for tracking active encoding (embedding generation) progress
CREATE TABLE IF NOT EXISTS encoding_runs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
started_at TEXT NOT NULL DEFAULT (datetime('now')),
finished_at TEXT,
status TEXT NOT NULL DEFAULT 'running',
total_chunks INTEGER NOT NULL,
chunks_completed INTEGER NOT NULL DEFAULT 0,
chunks_per_second REAL,
last_batch_at TEXT,
provider TEXT,
dimension INTEGER
);
"#,
down: r#"
-- Rollback: drop the encoding_runs table
DROP TABLE IF EXISTS encoding_runs;
"#,
},
]
}
}
#[cfg(test)]
mod tests {
use super::*;
fn setup_test_db() -> Connection {
unsafe {
rusqlite::ffi::sqlite3_auto_extension(Some(std::mem::transmute(
crate::db::sqlite::sqlite3_vec_init as *const (),
)));
}
let conn = Connection::open_in_memory().expect("Failed to open in-memory database");
conn.execute_batch(
r#"
PRAGMA foreign_keys = ON;
"#,
)
.expect("Failed to enable foreign keys");
conn
}
#[test]
fn test_migration_fresh_database() {
let mut conn = setup_test_db();
let mut runner = MigrationRunner::new(&mut conn);
assert_eq!(runner.current_version().unwrap(), 0);
assert!(runner.needs_migration().unwrap());
runner.migrate().unwrap();
assert_eq!(runner.current_version().unwrap(), 11);
assert!(!runner.needs_migration().unwrap());
let table_count: i32 = conn
.query_row(
"SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name IN ('repos', 'worktrees', 'commits', 'files', 'chunks', 'chunk_edges', 'schema_migrations', 'chunk_worktrees', 'code_embeddings', 'index_state', 'context_cache', 'encoding_runs')",
[],
|row| row.get(0),
)
.unwrap();
assert_eq!(table_count, 12, "Expected 12 core tables to be created");
let vec_chunks_exists: bool = conn
.query_row(
"SELECT 1 FROM sqlite_master WHERE type='table' AND name='vec_chunks'",
[],
|_| Ok(true),
)
.unwrap_or(false);
assert!(!vec_chunks_exists, "vec_chunks table should be dropped");
let junction_exists: bool = conn
.query_row(
"SELECT 1 FROM sqlite_master WHERE type='table' AND name='chunk_worktrees'",
[],
|_| Ok(true),
)
.unwrap_or(false);
assert!(
junction_exists,
"chunk_worktrees junction table should exist"
);
let embeddings_exists: bool = conn
.query_row(
"SELECT 1 FROM sqlite_master WHERE type='table' AND name='code_embeddings'",
[],
|_| Ok(true),
)
.unwrap_or(false);
assert!(embeddings_exists, "code_embeddings table should exist");
}
#[test]
fn test_migration_idempotent() {
let mut conn = setup_test_db();
let mut runner = MigrationRunner::new(&mut conn);
runner.migrate().unwrap();
let version_after_first = runner.current_version().unwrap();
runner.migrate().unwrap();
let version_after_second = runner.current_version().unwrap();
assert_eq!(version_after_first, version_after_second);
assert_eq!(version_after_second, 11);
let migration_count: i32 = conn
.query_row("SELECT COUNT(*) FROM schema_migrations", [], |row| {
row.get(0)
})
.unwrap();
assert_eq!(migration_count, 11, "Expected 11 migrations to be recorded");
}
#[test]
fn test_migration_version_tracking() {
let mut conn = setup_test_db();
let mut runner = MigrationRunner::new(&mut conn);
assert_eq!(runner.current_version().unwrap(), 0);
runner.migrate().unwrap();
let (version, name): (i32, String) = conn
.query_row(
"SELECT version, name FROM schema_migrations WHERE version = 1",
[],
|row| Ok((row.get(0)?, row.get(1)?)),
)
.unwrap();
assert_eq!(version, 1);
assert_eq!(name, "initial_schema");
let applied_at: String = conn
.query_row(
"SELECT applied_at FROM schema_migrations WHERE version = 1",
[],
|row| row.get(0),
)
.unwrap();
assert!(!applied_at.is_empty());
}
#[test]
fn test_migration_rollback_on_failure() {
let mut conn = setup_test_db();
let bad_migration = Migration {
version: 999,
name: "bad_migration",
up: "CREATE TABLE test (id INTEGER); INSERT INTO nonexistent_table VALUES (1);",
down: "",
};
let mut runner = MigrationRunner::new(&mut conn);
runner.ensure_migration_table().unwrap();
let result = runner.apply_migration(&bad_migration);
assert!(result.is_err());
let table_exists: bool = conn
.query_row(
"SELECT 1 FROM sqlite_master WHERE type='table' AND name='test'",
[],
|_| Ok(true),
)
.unwrap_or(false);
assert!(!table_exists);
let recorded: bool = conn
.query_row(
"SELECT 1 FROM schema_migrations WHERE version = 999",
[],
|_| Ok(true),
)
.unwrap_or(false);
assert!(!recorded);
}
#[test]
fn test_new_migrations_schema() {
let mut conn = setup_test_db();
let mut runner = MigrationRunner::new(&mut conn);
runner.migrate().unwrap();
let junction_schema: String = conn
.query_row(
"SELECT sql FROM sqlite_master WHERE type='table' AND name='chunk_worktrees'",
[],
|row| row.get(0),
)
.unwrap();
assert!(
junction_schema.contains("chunk_id"),
"chunk_worktrees should have chunk_id column"
);
assert!(
junction_schema.contains("worktree_id"),
"chunk_worktrees should have worktree_id column"
);
assert!(
junction_schema.contains("PRIMARY KEY"),
"chunk_worktrees should have composite primary key"
);
let index_exists: bool = conn
.query_row(
"SELECT 1 FROM sqlite_master WHERE type='index' AND name='idx_chunk_worktrees_worktree'",
[],
|_| Ok(true),
)
.unwrap_or(false);
assert!(
index_exists,
"Index idx_chunk_worktrees_worktree should exist"
);
let embeddings_schema: String = conn
.query_row(
"SELECT sql FROM sqlite_master WHERE type='table' AND name='code_embeddings'",
[],
|row| row.get(0),
)
.unwrap();
assert!(
embeddings_schema.contains("blob_sha"),
"code_embeddings should have blob_sha column"
);
assert!(
embeddings_schema.contains("UNIQUE"),
"code_embeddings should have UNIQUE constraint on blob_sha"
);
assert!(
embeddings_schema.contains("embedding BLOB"),
"code_embeddings should have embedding BLOB column"
);
assert!(
embeddings_schema.contains("embedding_dim"),
"code_embeddings should have embedding_dim column"
);
let blob_index_exists: bool = conn
.query_row(
"SELECT 1 FROM sqlite_master WHERE type='index' AND name='idx_embeddings_blob'",
[],
|_| Ok(true),
)
.unwrap_or(false);
assert!(blob_index_exists, "Index idx_embeddings_blob should exist");
let vec_code_exists: bool = conn
.query_row(
"SELECT 1 FROM sqlite_master WHERE type='table' AND name='vec_code'",
[],
|_| Ok(true),
)
.unwrap_or(false);
assert!(vec_code_exists, "vec_code virtual table should exist");
let chunks_schema: String = conn
.query_row(
"SELECT sql FROM sqlite_master WHERE type='table' AND name='chunks'",
[],
|row| row.get(0),
)
.unwrap();
assert!(
!chunks_schema.contains("worktree_ids"),
"worktree_ids column should be dropped from chunks table"
);
}
}