use anyhow::Result;
use rusqlite::Connection;
use crate::librarian::catalog::column_exists;
pub(super) fn add_columns(conn: &Connection) -> Result<()> {
if !column_exists(conn, "artifact", "abs_path")? {
conn.execute("ALTER TABLE artifact ADD COLUMN abs_path TEXT", [])?;
}
if !column_exists(conn, "commits", "git_root")? {
conn.execute("ALTER TABLE commits ADD COLUMN git_root TEXT", [])?;
}
Ok(())
}
use crate::librarian::workspace::WorkspaceConfig;
use std::collections::HashMap;
use std::path::PathBuf;
pub(super) fn backfill(conn: &Connection, ws: &WorkspaceConfig, drop_orphans: bool) -> Result<()> {
let has_artifact_repo = column_exists(conn, "artifact", "repo")?;
let has_artifact_rel_path = column_exists(conn, "artifact", "rel_path")?;
let has_commits_repo = column_exists(conn, "commits", "repo")?;
if !has_artifact_repo && !has_artifact_rel_path && !has_commits_repo {
return Ok(());
}
let lookup: HashMap<&str, &PathBuf> = ws
.roots
.iter()
.map(|r| (r.name.as_str(), &r.path))
.collect();
if has_artifact_repo && has_artifact_rel_path {
let orphan_ids: Vec<String> = {
let mut stmt = conn.prepare("SELECT id, repo FROM artifact WHERE abs_path IS NULL")?;
let rows =
stmt.query_map([], |r| Ok((r.get::<_, String>(0)?, r.get::<_, String>(1)?)))?;
rows.filter_map(|row| {
let (id, repo) = row.ok()?;
(!lookup.contains_key(repo.as_str())).then_some(id)
})
.collect()
};
if !orphan_ids.is_empty() {
if drop_orphans {
for id in &orphan_ids {
conn.execute("DELETE FROM artifact WHERE id = ?1", [id])?;
}
} else {
let sample: Vec<&str> = orphan_ids.iter().take(5).map(String::as_str).collect();
anyhow::bail!(
"{} artifact(s) reference unknown root: {}{}. Either restore the \
root in workspace.toml or set LIBRARIAN_MIGRATE_DROP_ORPHANS=1 \
to discard them.",
orphan_ids.len(),
sample.join(", "),
if orphan_ids.len() > 5 { ", …" } else { "" },
);
}
}
let mut stmt =
conn.prepare("SELECT id, repo, rel_path FROM artifact WHERE abs_path IS NULL")?;
let rows: Vec<(String, String, String)> = stmt
.query_map([], |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)))?
.collect::<Result<_, _>>()?;
for (id, repo, rel_path) in rows {
let root = lookup.get(repo.as_str()).expect("orphans rejected above");
let abs = root.join(&rel_path);
conn.execute(
"UPDATE artifact SET abs_path = ?1 WHERE id = ?2",
rusqlite::params![crate::util::fs::RepoPath::from(&abs), id],
)?;
}
}
if has_commits_repo {
let mut stmt = conn.prepare("SELECT hash, repo FROM commits WHERE git_root IS NULL")?;
let rows: Vec<(String, String)> = stmt
.query_map([], |r| Ok((r.get(0)?, r.get(1)?)))?
.collect::<Result<_, _>>()?;
for (hash, repo) in rows {
if let Some(root) = lookup.get(repo.as_str()) {
conn.execute(
"UPDATE commits SET git_root = ?1 WHERE hash = ?2",
rusqlite::params![crate::util::fs::RepoPath::from_path(root), hash],
)?;
}
}
}
Ok(())
}
pub(super) fn drop_legacy_and_stamp(conn: &Connection) -> Result<()> {
let has_repo = column_exists(conn, "artifact", "repo")?;
let has_rel_path = column_exists(conn, "artifact", "rel_path")?;
let has_commits_repo = column_exists(conn, "commits", "repo")?;
if !has_repo && !has_rel_path && !has_commits_repo {
conn.execute(
"INSERT OR IGNORE INTO schema_version (version) VALUES (6)",
[],
)?;
return Ok(());
}
conn.execute_batch(
r#"
BEGIN;
-- Clean up any leftover temp tables from a previously aborted attempt.
DROP TABLE IF EXISTS artifact_new;
DROP TABLE IF EXISTS commits_new;
CREATE TABLE artifact_new (
id TEXT PRIMARY KEY,
abs_path TEXT NOT NULL,
kind TEXT NOT NULL,
status TEXT NOT NULL,
title TEXT,
owners TEXT NOT NULL DEFAULT '[]',
tags TEXT NOT NULL DEFAULT '[]',
topic TEXT,
time_scope TEXT,
source TEXT,
created_at INTEGER NOT NULL,
updated_at INTEGER NOT NULL,
file_mtime INTEGER NOT NULL,
file_sha256 TEXT NOT NULL,
confidence REAL NOT NULL DEFAULT 1.0
);
INSERT INTO artifact_new
SELECT id, abs_path, kind, status, title, owners, tags, topic,
time_scope, source, created_at, updated_at, file_mtime,
file_sha256, confidence
FROM artifact;
-- DROP TABLE implicitly drops the artifact_vec_cascade_delete trigger.
DROP TABLE artifact;
ALTER TABLE artifact_new RENAME TO artifact;
CREATE UNIQUE INDEX idx_artifact_abs_path ON artifact(abs_path);
CREATE INDEX idx_artifact_kind_status ON artifact(kind, status);
CREATE TRIGGER artifact_vec_cascade_delete
AFTER DELETE ON artifact BEGIN
DELETE FROM artifact_vec WHERE id = OLD.id;
END;
CREATE TABLE commits_new (
hash TEXT PRIMARY KEY,
git_root TEXT,
authored_at INTEGER,
subject TEXT,
topo_order INTEGER
);
INSERT INTO commits_new
SELECT hash, git_root, authored_at, subject, topo_order
FROM commits;
DROP TABLE commits;
ALTER TABLE commits_new RENAME TO commits;
CREATE INDEX idx_commits_git_root ON commits(git_root, topo_order);
INSERT OR IGNORE INTO schema_version (version) VALUES (6);
COMMIT;
"#,
)?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use crate::librarian::workspace::{Root, WorkspaceConfig};
use rusqlite::Connection;
use std::path::PathBuf;
fn new_db_with_legacy_row(repo: &str, rel_path: &str) -> Connection {
let conn = Connection::open_in_memory().unwrap();
conn.execute_batch(
r#"
CREATE TABLE artifact (
id TEXT PRIMARY KEY, repo TEXT NOT NULL, rel_path TEXT NOT NULL,
kind TEXT NOT NULL, status TEXT NOT NULL, title TEXT,
owners TEXT NOT NULL DEFAULT '[]', tags TEXT NOT NULL DEFAULT '[]',
topic TEXT, time_scope TEXT, source TEXT,
created_at INTEGER NOT NULL, updated_at INTEGER NOT NULL,
file_mtime INTEGER NOT NULL, file_sha256 TEXT NOT NULL,
confidence REAL NOT NULL DEFAULT 1.0
);
CREATE TABLE commits (
hash TEXT PRIMARY KEY, repo TEXT NOT NULL,
authored_at INTEGER, subject TEXT, topo_order INTEGER
);
"#,
)
.unwrap();
conn.execute(
"INSERT INTO artifact(id, repo, rel_path, kind, status, title,
created_at, updated_at, file_mtime, file_sha256)
VALUES ('a1', ?1, ?2, 'tracker', 'active', 't', 0, 0, 0, 'sha')",
rusqlite::params![repo, rel_path],
)
.unwrap();
add_columns(&conn).unwrap();
conn
}
fn ws_with(root_name: &str, root_path: &str) -> WorkspaceConfig {
WorkspaceConfig {
roots: vec![Root {
name: root_name.into(),
path: PathBuf::from(root_path),
}],
ignore: vec![],
rules: vec![],
umbrellas: vec![],
}
}
fn seed_v3_db(db_path: &std::path::Path) {
let conn = rusqlite::Connection::open(db_path).unwrap();
conn.execute_batch(
r#"
CREATE TABLE artifact (
id TEXT PRIMARY KEY, repo TEXT NOT NULL, rel_path TEXT NOT NULL,
kind TEXT NOT NULL, status TEXT NOT NULL, title TEXT,
owners TEXT NOT NULL DEFAULT '[]', tags TEXT NOT NULL DEFAULT '[]',
topic TEXT, time_scope TEXT, source TEXT,
created_at INTEGER NOT NULL, updated_at INTEGER NOT NULL,
file_mtime INTEGER NOT NULL, file_sha256 TEXT NOT NULL,
confidence REAL NOT NULL DEFAULT 1.0
);
CREATE TABLE commits (
hash TEXT PRIMARY KEY, repo TEXT NOT NULL,
authored_at INTEGER, subject TEXT, topo_order INTEGER
);
CREATE TABLE schema_version (version INTEGER PRIMARY KEY);
INSERT OR IGNORE INTO schema_version (version) VALUES (3);
"#,
)
.unwrap();
conn.execute(
"INSERT INTO artifact(id, repo, rel_path, kind, status, title,
created_at, updated_at, file_mtime, file_sha256)
VALUES ('a1', 'r', 'docs/x.md', 'tracker', 'active', 't', 0, 0, 0, 'sha')",
[],
)
.unwrap();
}
#[test]
fn migration_v6_translates_repo_to_abs_path() {
let conn = new_db_with_legacy_row("code-explorer", "docs/trackers/foo.md");
let ws = ws_with("code-explorer", "/home/u/work/code-explorer");
backfill(&conn, &ws, false).unwrap();
let abs: String = conn
.query_row("SELECT abs_path FROM artifact WHERE id = 'a1'", [], |r| {
r.get(0)
})
.unwrap();
assert_eq!(abs, "/home/u/work/code-explorer/docs/trackers/foo.md");
}
#[test]
fn migration_v6_fails_loudly_on_orphans() {
let conn = new_db_with_legacy_row("ghost", "x.md");
let ws = ws_with("alive", "/abs/alive");
let err = backfill(&conn, &ws, false).unwrap_err();
assert!(err.to_string().contains("ghost") || err.to_string().contains("a1"));
let count: i64 = conn
.query_row("SELECT COUNT(*) FROM artifact WHERE id = 'a1'", [], |r| {
r.get(0)
})
.unwrap();
assert_eq!(count, 1);
}
#[test]
fn migration_v6_drops_orphans_when_opt_in() {
let conn = new_db_with_legacy_row("ghost", "x.md");
let ws = ws_with("alive", "/abs/alive");
backfill(&conn, &ws, true).unwrap();
let count: i64 = conn
.query_row("SELECT COUNT(*) FROM artifact WHERE id = 'a1'", [], |r| {
r.get(0)
})
.unwrap();
assert_eq!(count, 0);
}
#[test]
fn migration_v6_backfill_is_idempotent() {
let conn = new_db_with_legacy_row("code-explorer", "docs/x.md");
let ws = ws_with("code-explorer", "/abs/c");
backfill(&conn, &ws, false).unwrap();
let first: String = conn
.query_row("SELECT abs_path FROM artifact WHERE id = 'a1'", [], |r| {
r.get(0)
})
.unwrap();
backfill(&conn, &ws, false).unwrap();
let second: String = conn
.query_row("SELECT abs_path FROM artifact WHERE id = 'a1'", [], |r| {
r.get(0)
})
.unwrap();
assert_eq!(first, second);
}
#[test]
fn migration_v6_handles_commits_table() {
let conn = new_db_with_legacy_row("code-explorer", "x.md");
conn.execute(
"INSERT INTO commits(hash, repo, topo_order) VALUES ('abc', 'code-explorer', 1)",
[],
)
.unwrap();
let ws = ws_with("code-explorer", "/abs/c");
backfill(&conn, &ws, false).unwrap();
let git_root: String = conn
.query_row("SELECT git_root FROM commits WHERE hash = 'abc'", [], |r| {
r.get(0)
})
.unwrap();
assert_eq!(git_root, "/abs/c");
}
#[test]
fn migration_v6_creates_backup_file() {
use std::fs;
let tmp = tempfile::TempDir::new().unwrap();
let db_path = tmp.path().join("catalog.db");
seed_v3_db(&db_path);
let ws = ws_with("r", tmp.path().to_str().unwrap());
let _ = crate::librarian::catalog::Catalog::open_with_workspace(&db_path, &ws);
let entries: Vec<_> = fs::read_dir(tmp.path())
.unwrap()
.map(|e| e.unwrap().file_name())
.collect();
assert!(
entries
.iter()
.any(|n| n.to_string_lossy().starts_with("catalog.db.pre-v6-bak.")),
"backup file not created; entries: {:?}",
entries
);
}
#[test]
fn migration_v6_full_path_translates_and_drops() {
let tmp = tempfile::TempDir::new().unwrap();
let db_path = tmp.path().join("catalog.db");
seed_v3_db(&db_path);
let ws = ws_with("r", tmp.path().to_str().unwrap());
let cat = crate::librarian::catalog::Catalog::open_with_workspace(&db_path, &ws).unwrap();
let count: i64 = cat
.conn
.query_row(
"SELECT COUNT(*) FROM artifact WHERE abs_path IS NOT NULL",
[],
|r| r.get(0),
)
.unwrap();
assert_eq!(count, 1);
let has_repo =
crate::librarian::catalog::column_exists(&cat.conn, "artifact", "repo").unwrap();
assert!(!has_repo);
let v: i64 = cat
.conn
.query_row("SELECT MAX(version) FROM schema_version", [], |r| r.get(0))
.unwrap();
assert_eq!(v, 6);
}
#[test]
fn migration_v6_full_is_idempotent() {
let tmp = tempfile::TempDir::new().unwrap();
let db_path = tmp.path().join("catalog.db");
seed_v3_db(&db_path);
let ws = ws_with("r", tmp.path().to_str().unwrap());
drop(crate::librarian::catalog::Catalog::open_with_workspace(&db_path, &ws).unwrap());
let cat = crate::librarian::catalog::Catalog::open_with_workspace(&db_path, &ws).unwrap();
let v: i64 = cat
.conn
.query_row("SELECT MAX(version) FROM schema_version", [], |r| r.get(0))
.unwrap();
assert_eq!(v, 6);
}
}