#![doc = ""]
#![doc = include_str!("../README.md")]
use anyhow::{Context, Result};
use rusqlite::ffi::sqlite3_auto_extension;
use rusqlite::{params, Connection, OptionalExtension};
use serde::Serialize;
use sqlite_vec::sqlite3_vec_init;
use tracing::{info, warn};
use cartog_core::{Edge, EdgeKind, EdgeProvenance, FileInfo, Symbol, SymbolKind, Visibility};
#[derive(Debug, thiserror::Error)]
pub enum DbError {
#[error("failed to open database at {path}: {source}")]
Open {
path: std::path::PathBuf,
#[source]
source: rusqlite::Error,
},
#[error("failed to prepare database directory {path}: {source}")]
PrepareDir {
path: std::path::PathBuf,
#[source]
source: std::io::Error,
},
#[error("failed to set startup pragmas: {0}")]
Pragma(#[source] rusqlite::Error),
#[error("failed to create schema: {0}")]
Schema(#[source] rusqlite::Error),
#[error("failed to create RAG schema: {0}")]
RagSchema(#[source] rusqlite::Error),
#[error("failed to back up database before destructive migration to {path}: {source}")]
BackupFailed {
path: std::path::PathBuf,
#[source]
source: rusqlite::Error,
},
#[error("embedding dimension migration failed: {0}")]
EmbeddingDimension(#[source] rusqlite::Error),
#[error(
"schema_version mismatch: this binary expects {expected}, DB has {stored} \
(a different cartog process upgraded the schema; restart this session)"
)]
SchemaDrift { expected: u32, stored: u32 },
#[error(transparent)]
Sqlite(#[from] rusqlite::Error),
}
pub type DbResult<T> = std::result::Result<T, DbError>;
const SQL_INSERT_SYMBOL: &str = "INSERT OR REPLACE INTO symbols
(id, name, kind, file_path, start_line, end_line, start_byte, end_byte,
parent_id, signature, visibility, is_async, docstring, content_hash, subtree_hash)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15)";
const SQL_INSERT_EDGE: &str = "INSERT INTO edges
(source_id, target_name, target_id, kind, file_path, line, resolution_state, resolution_source)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)";
const SCHEMA: &str = r#"
CREATE TABLE IF NOT EXISTS symbols (
id TEXT PRIMARY KEY,
name TEXT NOT NULL,
kind TEXT NOT NULL,
file_path TEXT NOT NULL,
start_line INTEGER,
end_line INTEGER,
start_byte INTEGER,
end_byte INTEGER,
parent_id TEXT,
signature TEXT,
visibility TEXT,
is_async BOOLEAN DEFAULT FALSE,
docstring TEXT,
in_degree INTEGER DEFAULT 0,
content_hash TEXT,
subtree_hash TEXT
);
CREATE TABLE IF NOT EXISTS edges (
id INTEGER PRIMARY KEY AUTOINCREMENT,
source_id TEXT NOT NULL,
target_name TEXT NOT NULL,
target_id TEXT,
kind TEXT NOT NULL,
file_path TEXT NOT NULL,
line INTEGER,
-- 0 = unresolved (heuristic + LSP not yet definitive), 1 = resolved,
-- 2 = unresolvable (LSP definitively returned no definition: typo, dyn dispatch, macro),
-- 3 = external (LSP located the target outside the indexed root: stdlib, deps, node_modules).
resolution_state INTEGER NOT NULL DEFAULT 0,
-- Which tier/source resolved target_id (EdgeProvenance::as_str), or NULL for
-- unresolved edges and rows resolved before provenance tracking existed.
resolution_source TEXT,
FOREIGN KEY (source_id) REFERENCES symbols(id)
);
CREATE TABLE IF NOT EXISTS files (
path TEXT PRIMARY KEY,
last_modified REAL,
hash TEXT,
language TEXT,
num_symbols INTEGER DEFAULT 0
);
CREATE TABLE IF NOT EXISTS metadata (
key TEXT PRIMARY KEY,
value TEXT
);
-- query_log feeds `cartog stats --savings` / `cartog savings`. One row per
-- successful read tool call (CLI or MCP). No query payload is stored — just
-- which tool, when, and the call surface — to keep the local-first promise.
CREATE TABLE IF NOT EXISTS query_log (
id INTEGER PRIMARY KEY AUTOINCREMENT,
tool TEXT NOT NULL,
source TEXT NOT NULL,
ts INTEGER NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_query_log_tool ON query_log(tool);
CREATE INDEX IF NOT EXISTS idx_query_log_ts ON query_log(ts);
CREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name);
CREATE INDEX IF NOT EXISTS idx_symbols_kind ON symbols(kind);
CREATE INDEX IF NOT EXISTS idx_symbols_file ON symbols(file_path);
CREATE INDEX IF NOT EXISTS idx_symbols_parent ON symbols(parent_id);
-- Composite: speeds up same-directory edge resolution
-- (WHERE name = ? AND file_path LIKE ?) in `resolve_edges_pass`.
CREATE INDEX IF NOT EXISTS idx_symbols_name_file ON symbols(name, file_path);
CREATE INDEX IF NOT EXISTS idx_edges_source ON edges(source_id);
CREATE INDEX IF NOT EXISTS idx_edges_target ON edges(target_name);
CREATE INDEX IF NOT EXISTS idx_edges_target_id ON edges(target_id);
CREATE INDEX IF NOT EXISTS idx_edges_kind ON edges(kind);
-- Per-file edge delete (clear_file_data_in_tx); without it the DELETE full-scans
-- edges per file, making --force/first-index O(files×edges). idx_edges_unresolved
-- is partial (state=0) so it can't serve deletes of resolved edges.
CREATE INDEX IF NOT EXISTS idx_edges_file ON edges(file_path);
-- Tier-2 import-path lookups; kind-only index scans all imports edges per call (#109).
CREATE INDEX IF NOT EXISTS idx_edges_kind_target ON edges(kind, target_name);
-- idx_edges_unresolved (partial index on resolution_state=0) is created
-- post-migration in Database::open so pre-v4 DBs without the column don't
-- blow up at SCHEMA-load time.
"#;
const RAG_SCHEMA: &str = r#"
CREATE TABLE IF NOT EXISTS symbol_content (
symbol_id TEXT PRIMARY KEY,
content TEXT NOT NULL,
header TEXT NOT NULL,
normalized_name TEXT NOT NULL DEFAULT ''
);
CREATE VIRTUAL TABLE IF NOT EXISTS symbol_fts USING fts5(
symbol_name,
normalized_name,
content,
content=symbol_content,
content_rowid=rowid
);
-- Triggers to keep FTS5 in sync with symbol_content
CREATE TRIGGER IF NOT EXISTS symbol_content_ai AFTER INSERT ON symbol_content BEGIN
INSERT INTO symbol_fts(rowid, symbol_name, normalized_name, content)
VALUES (new.rowid, (SELECT name FROM symbols WHERE id = new.symbol_id), new.normalized_name, new.content);
END;
CREATE TRIGGER IF NOT EXISTS symbol_content_ad AFTER DELETE ON symbol_content BEGIN
INSERT INTO symbol_fts(symbol_fts, rowid, symbol_name, normalized_name, content)
VALUES ('delete', old.rowid, (SELECT name FROM symbols WHERE id = old.symbol_id), old.normalized_name, old.content);
END;
CREATE TABLE IF NOT EXISTS symbol_embedding_map (
id INTEGER PRIMARY KEY AUTOINCREMENT,
symbol_id TEXT NOT NULL UNIQUE
);
CREATE INDEX IF NOT EXISTS idx_embedding_map_symbol ON symbol_embedding_map(symbol_id);
"#;
pub const DEFAULT_EMBEDDING_DIM: usize = 384;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct EmbeddingFingerprint {
pub provider: String,
pub model: String,
pub dimension: usize,
}
const EMBED_PROVIDER_KEY: &str = "embedding_provider";
const EMBED_MODEL_KEY: &str = "embedding_model";
fn rag_vec_schema(dim: usize) -> String {
format!("CREATE VIRTUAL TABLE IF NOT EXISTS symbol_vec USING vec0(embedding float[{dim}])")
}
pub const DB_DIR: &str = ".cartog";
pub const DB_FILENAME: &str = "db.sqlite";
pub const LEGACY_DB_FILE: &str = ".cartog.db";
pub const BUSY_TIMEOUT_MS: u32 = 5000;
#[cfg(test)]
thread_local! {
static RECONCILE_FAIL_AFTER_MODEL: std::sync::atomic::AtomicBool =
const { std::sync::atomic::AtomicBool::new(false) };
}
pub fn checkpoint_wal(path: &std::path::Path) -> anyhow::Result<()> {
use anyhow::Context;
if !path.exists() {
return Ok(());
}
let conn = Connection::open(path)
.with_context(|| format!("open {} for WAL checkpoint", path.display()))?;
conn.execute_batch(&format!(
"PRAGMA busy_timeout={BUSY_TIMEOUT_MS};
PRAGMA wal_checkpoint(TRUNCATE);"
))
.with_context(|| format!("PRAGMA wal_checkpoint(TRUNCATE) on {}", path.display()))?;
Ok(())
}
pub const MAX_SEARCH_LIMIT: u32 = 100;
pub fn normalize_symbol_name(name: &str) -> String {
let mut words = Vec::new();
let mut current = String::new();
let chars: Vec<char> = name.chars().collect();
let len = chars.len();
for i in 0..len {
let c = chars[i];
if c == '_' || c == '-' {
if !current.is_empty() {
words.push(std::mem::take(&mut current));
}
continue;
}
if c.is_uppercase() {
let next_is_lower = i + 1 < len && chars[i + 1].is_lowercase();
let prev_is_lower = !current.is_empty() && chars[i - 1].is_lowercase();
if prev_is_lower {
words.push(std::mem::take(&mut current));
} else if !current.is_empty() && next_is_lower {
words.push(std::mem::take(&mut current));
}
current.extend(c.to_lowercase());
} else if c.is_alphanumeric() {
current.extend(c.to_lowercase());
} else {
if !current.is_empty() {
words.push(std::mem::take(&mut current));
}
}
}
if !current.is_empty() {
words.push(current);
}
words.join(" ")
}
pub struct Database {
conn: Connection,
pinned: Option<PinnedAttach>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct PinnedAttach {
pub schema_version: u32,
pub embedding: Option<EmbeddingFingerprint>,
}
impl std::fmt::Debug for Database {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Database").finish_non_exhaustive()
}
}
pub fn register_sqlite_vec() {
use std::sync::Once;
static INIT: Once = Once::new();
INIT.call_once(|| unsafe {
#[allow(clippy::missing_transmute_annotations)]
sqlite3_auto_extension(Some(std::mem::transmute(sqlite3_vec_init as *const ())));
});
}
const SCHEMA_VERSION: u32 = 7;
pub const CURRENT_SCHEMA_VERSION: u32 = SCHEMA_VERSION;
pub fn read_schema_version_at(path: &std::path::Path) -> anyhow::Result<u32> {
use anyhow::Context;
let conn = Connection::open_with_flags(
path,
rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY | rusqlite::OpenFlags::SQLITE_OPEN_URI,
)
.with_context(|| format!("open {} read-only for schema check", path.display()))?;
Ok(read_schema_version(&conn)?)
}
pub fn read_metadata_at(path: &std::path::Path, key: &str) -> anyhow::Result<Option<String>> {
use anyhow::Context;
let conn = Connection::open_with_flags(
path,
rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY | rusqlite::OpenFlags::SQLITE_OPEN_URI,
)
.with_context(|| format!("open {} read-only for metadata read", path.display()))?;
match conn.query_row(
"SELECT value FROM metadata WHERE key = ?1",
rusqlite::params![key],
|row| row.get::<_, Option<String>>(0),
) {
Ok(v) => Ok(v),
Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
if msg.contains("no such table: metadata") =>
{
Ok(None)
}
Err(e) => Err(e).with_context(|| format!("read metadata[{key}] from {}", path.display())),
}
}
fn symbol_vec_exists(conn: &Connection) -> std::result::Result<bool, rusqlite::Error> {
conn.query_row(
"SELECT 1 FROM sqlite_master WHERE type IN ('table','view') AND name='symbol_vec'",
[],
|row| row.get::<_, i64>(0),
)
.optional()
.map(|v| v.is_some())
}
fn read_schema_version(conn: &Connection) -> std::result::Result<u32, DbError> {
match conn.query_row(
"SELECT CAST(value AS INTEGER) FROM metadata WHERE key = 'schema_version'",
[],
|row| row.get::<_, u32>(0),
) {
Ok(v) => Ok(v),
Err(rusqlite::Error::QueryReturnedNoRows) => Ok(0),
Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
if msg.contains("no such table: metadata") =>
{
Ok(0)
}
Err(e) => Err(DbError::Sqlite(e)),
}
}
fn migrate(conn: &Connection) {
let current: u32 = conn
.query_row(
"SELECT CAST(value AS INTEGER) FROM metadata WHERE key = 'schema_version'",
[],
|row| row.get(0),
)
.unwrap_or(1);
let has_hash_cols = conn
.prepare("SELECT content_hash FROM symbols LIMIT 0")
.is_ok();
let has_resolution_state = conn
.prepare("SELECT resolution_state FROM edges LIMIT 0")
.is_ok();
let has_query_log = conn.prepare("SELECT 1 FROM query_log LIMIT 0").is_ok();
let has_resolution_source = conn
.prepare("SELECT resolution_source FROM edges LIMIT 0")
.is_ok();
if current >= SCHEMA_VERSION
&& has_hash_cols
&& has_resolution_state
&& has_query_log
&& has_resolution_source
{
return;
}
let no_version_row = conn
.query_row(
"SELECT 1 FROM metadata WHERE key = 'schema_version'",
[],
|_| Ok(()),
)
.is_err();
let symbols_empty = conn
.query_row("SELECT COUNT(*) FROM symbols", [], |r| r.get::<_, i64>(0))
.map(|c| c == 0)
.unwrap_or(false);
if no_version_row
&& symbols_empty
&& has_hash_cols
&& has_resolution_state
&& has_query_log
&& has_resolution_source
{
if let Err(e) = conn.execute(
"INSERT OR REPLACE INTO metadata (key, value) VALUES ('schema_version', ?1)",
params![SCHEMA_VERSION.to_string()],
) {
warn!(error = %e, "failed to stamp fresh-DB schema version");
}
return;
}
if current < 2 {
let _ = conn.execute(
"ALTER TABLE symbols ADD COLUMN in_degree INTEGER DEFAULT 0",
[],
);
}
if current < 3 || !has_hash_cols {
info!("schema v3: stable symbol IDs — clearing index for full rebuild");
let _ = conn.execute("ALTER TABLE symbols ADD COLUMN content_hash TEXT", []);
let _ = conn.execute("ALTER TABLE symbols ADD COLUMN subtree_hash TEXT", []);
for table in &["symbol_content", "edges", "symbols", "files"] {
let _ = conn.execute(&format!("DELETE FROM {table}"), []);
}
let _ = conn.execute("DELETE FROM symbol_vec", []);
let _ = conn.execute("DELETE FROM symbol_embedding_map", []);
let _ = conn.execute("DELETE FROM metadata WHERE key = 'last_commit'", []);
}
if current < 4 || !has_resolution_state {
info!("schema v4: adding edges.resolution_state column");
let _ = conn.execute(
"ALTER TABLE edges ADD COLUMN resolution_state INTEGER NOT NULL DEFAULT 0",
[],
);
let _ = conn.execute(
"UPDATE edges SET resolution_state = 1 WHERE target_id IS NOT NULL",
[],
);
}
if current < 5 || !has_query_log {
info!("schema v5: query_log table");
let _ = conn.execute(
"CREATE TABLE IF NOT EXISTS query_log (
id INTEGER PRIMARY KEY AUTOINCREMENT,
tool TEXT NOT NULL,
source TEXT NOT NULL,
ts INTEGER NOT NULL
)",
[],
);
let _ = conn.execute(
"CREATE INDEX IF NOT EXISTS idx_query_log_tool ON query_log(tool)",
[],
);
let _ = conn.execute(
"CREATE INDEX IF NOT EXISTS idx_query_log_ts ON query_log(ts)",
[],
);
}
if current < 6 || !has_resolution_source {
info!("schema v6: adding edges.resolution_source column");
if let Err(e) = conn.execute("ALTER TABLE edges ADD COLUMN resolution_source TEXT", []) {
warn!(error = %e, "failed to add edges.resolution_source column");
}
}
if current < 7 {
info!("schema v7: symbol-ID escaping — clearing index for full rebuild");
for table in &["symbol_content", "edges", "symbols", "files"] {
let _ = conn.execute(&format!("DELETE FROM {table}"), []);
}
let _ = conn.execute("DELETE FROM symbol_vec", []);
let _ = conn.execute("DELETE FROM symbol_embedding_map", []);
let _ = conn.execute("DELETE FROM metadata WHERE key = 'last_commit'", []);
}
if let Err(e) = conn.execute(
"INSERT OR REPLACE INTO metadata (key, value) VALUES ('schema_version', ?1)",
params![SCHEMA_VERSION.to_string()],
) {
warn!(error = %e, "failed to store schema version");
}
}
const MIGRATION_RETRY_BACKOFF_MS: &[u64] = &[50, 100, 250, 500, 1000];
fn retry_busy<T, F>(mut op: F) -> std::result::Result<T, rusqlite::Error>
where
F: FnMut() -> std::result::Result<T, rusqlite::Error>,
{
let mut attempt = 0usize;
loop {
match op() {
Ok(v) => return Ok(v),
Err(e) => {
let busy = matches!(
e,
rusqlite::Error::SqliteFailure(
rusqlite::ffi::Error {
code: rusqlite::ErrorCode::DatabaseBusy
| rusqlite::ErrorCode::DatabaseLocked,
..
},
_
)
);
if !busy || attempt >= MIGRATION_RETRY_BACKOFF_MS.len() {
return Err(e);
}
let delay_ms = MIGRATION_RETRY_BACKOFF_MS[attempt];
tracing::debug!(
attempt = attempt + 1,
delay_ms,
"retrying embedding-dimension write after SQLITE_BUSY"
);
std::thread::sleep(std::time::Duration::from_millis(delay_ms));
attempt += 1;
}
}
}
}
fn handle_embedding_dimension(
conn: &Connection,
requested_dim: usize,
) -> std::result::Result<(), rusqlite::Error> {
let stored_dim: Option<usize> = conn
.query_row(
"SELECT CAST(value AS INTEGER) FROM metadata WHERE key = 'embedding_dimension'",
[],
|row| row.get::<_, i64>(0).map(|v| v as usize),
)
.ok();
let effective_dim = match stored_dim {
Some(old) if requested_dim == DEFAULT_EMBEDDING_DIM && old != DEFAULT_EMBEDDING_DIM => old,
_ => requested_dim,
};
if stored_dim == Some(effective_dim) && symbol_vec_exists(conn)? {
return Ok(());
}
let schema = rag_vec_schema(effective_dim);
let needs_wipe = stored_dim.is_some();
retry_busy(|| {
let tx = conn.unchecked_transaction()?;
if needs_wipe {
let old_dim = stored_dim.unwrap_or(0);
tracing::warn!(
old = old_dim,
new = effective_dim,
"Embedding dimension changed — clearing vector index. Run `cartog rag index` to re-embed."
);
tx.execute("DROP TABLE IF EXISTS symbol_vec", [])?;
tx.execute("DELETE FROM symbol_embedding_map", [])?;
}
tx.execute_batch(&schema)?;
tx.execute(
"INSERT OR REPLACE INTO metadata (key, value) VALUES ('embedding_dimension', ?1)",
params![effective_dim.to_string()],
)?;
tx.commit()
})?;
Ok(())
}
fn backup_before_destructive_migration(
conn: &Connection,
db_path: &std::path::Path,
) -> DbResult<()> {
let current: u32 = conn
.query_row(
"SELECT CAST(value AS INTEGER) FROM metadata WHERE key = 'schema_version'",
[],
|row| row.get(0),
)
.unwrap_or(1);
let has_hash_cols = conn
.prepare("SELECT content_hash FROM symbols LIMIT 0")
.is_ok();
let will_wipe = current < 7 || !has_hash_cols;
if !will_wipe {
return Ok(());
}
let has_rows = |table: &str| -> bool {
conn.query_row(&format!("SELECT EXISTS(SELECT 1 FROM {table})"), [], |r| {
r.get::<_, bool>(0)
})
.unwrap_or(false)
};
let any_indexed = [
"symbols",
"edges",
"files",
"symbol_content",
"symbol_embedding_map",
]
.iter()
.any(|t| has_rows(t));
if !any_indexed {
return Ok(());
}
let path_str = db_path.to_string_lossy();
if path_str.is_empty() || path_str == ":memory:" || path_str.starts_with("file:") {
return Ok(());
}
let ts = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_secs())
.unwrap_or(0);
let mut backup_os = db_path.as_os_str().to_os_string();
backup_os.push(format!(".pre-v{current}-{ts}.bak"));
let backup_path = std::path::PathBuf::from(backup_os);
let escaped = backup_path.to_string_lossy().replace('\'', "''");
conn.execute(&format!("VACUUM INTO '{escaped}'"), [])
.map_err(|source| DbError::BackupFailed {
path: backup_path.clone(),
source,
})?;
let symbol_count: i64 = conn
.query_row("SELECT COUNT(*) FROM symbols", [], |row| row.get(0))
.unwrap_or(0);
info!(
backup = %backup_path.display(),
old_version = current,
new_version = SCHEMA_VERSION,
symbols = symbol_count,
"schema migration will clear indexed data — created backup"
);
Ok(())
}
mod store;
pub use store::queries::PathHop;
pub use store::rag::KindScope;
#[derive(Debug, Clone)]
pub struct UnresolvedEdge {
pub edge_id: i64,
pub target_name: String,
pub file_path: String,
pub line: u32,
}
#[derive(Debug, Clone, Serialize, schemars::JsonSchema)]
pub struct IndexStats {
pub num_files: u32,
pub num_symbols: u32,
pub num_edges: u32,
pub num_resolved: u32,
pub num_unresolvable: u32,
pub num_external: u32,
pub languages: Vec<(String, u32)>,
pub symbol_kinds: Vec<(String, u32)>,
}
#[derive(Debug, Clone, Serialize)]
pub struct SavingsReport {
pub by_tool: Vec<(String, u64)>,
pub by_source: Vec<(String, u64)>,
pub total_queries: u64,
pub tokens_used_cartog: u64,
pub tokens_used_grep: u64,
pub estimated_tokens_saved: u64,
pub percent_saved: u8,
pub baseline_delta: u32,
}
pub const TOKENS_PER_QUERY_CARTOG: u32 = 280;
pub const TOKENS_PER_QUERY_GREP: u32 = 1_700;
pub const TOKENS_SAVED_PER_QUERY: u32 = TOKENS_PER_QUERY_GREP - TOKENS_PER_QUERY_CARTOG;
static LOG_QUERY_FAILURE_REPORTED: std::sync::atomic::AtomicBool =
std::sync::atomic::AtomicBool::new(false);
fn empty_savings_report() -> SavingsReport {
SavingsReport {
by_tool: Vec::new(),
by_source: Vec::new(),
total_queries: 0,
tokens_used_cartog: 0,
tokens_used_grep: 0,
estimated_tokens_saved: 0,
percent_saved: 0,
baseline_delta: TOKENS_SAVED_PER_QUERY,
}
}
fn is_no_such_table(e: &rusqlite::Error) -> bool {
matches!(
e,
rusqlite::Error::SqliteFailure(_, Some(msg)) if msg.contains("no such table")
)
}
fn row_to_symbol(row: &rusqlite::Row<'_>) -> rusqlite::Result<Symbol> {
row_to_symbol_offset(row, 0)
}
fn row_to_symbol_offset(row: &rusqlite::Row<'_>, off: usize) -> rusqlite::Result<Symbol> {
let kind_str = row.get::<_, String>(off + 2)?;
let kind = kind_str.parse().unwrap_or_else(|_| {
warn!(kind = %kind_str, "unknown symbol kind, defaulting to variable");
SymbolKind::Variable
});
let vis_str = row.get::<_, Option<String>>(off + 10)?.unwrap_or_default();
Ok(Symbol {
id: row.get(off)?,
name: row.get(off + 1)?,
kind,
file_path: row.get(off + 3)?,
start_line: row.get(off + 4)?,
end_line: row.get(off + 5)?,
start_byte: row.get(off + 6)?,
end_byte: row.get(off + 7)?,
parent_id: row.get(off + 8)?,
signature: row.get(off + 9)?,
visibility: Visibility::from_str_lossy(&vis_str),
is_async: row.get(off + 11)?,
docstring: row.get(off + 12)?,
in_degree: row.get(off + 13).unwrap_or(0),
content_hash: row.get(off + 14).unwrap_or(None),
subtree_hash: row.get(off + 15).unwrap_or(None),
})
}
fn disambiguate_two<'a>(a: &'a (String, String), b: &'a (String, String)) -> Option<&'a String> {
match kind_priority(&a.1).cmp(&kind_priority(&b.1)) {
std::cmp::Ordering::Greater => Some(&a.0),
std::cmp::Ordering::Less => Some(&b.0),
std::cmp::Ordering::Equal => None,
}
}
fn kind_priority(kind: &str) -> u8 {
match kind {
"class" | "interface" | "enum" | "type_alias" | "trait" => 3,
"function" => 2,
"method" => 1,
_ => 0,
}
}
fn edge_from_row(row: &rusqlite::Row<'_>, base: usize) -> rusqlite::Result<Edge> {
let kind_str = row.get::<_, String>(base + 3)?;
let kind = kind_str.parse().unwrap_or_else(|_| {
warn!(kind = %kind_str, "unknown edge kind, defaulting to references");
EdgeKind::References
});
let provenance = match row.get::<_, Option<String>>(base + 6)? {
Some(s) => s.parse::<EdgeProvenance>().ok().or_else(|| {
warn!(source = %s, "unknown edge provenance, dropping to None");
None
}),
None => None,
};
Ok(Edge {
source_id: row.get(base)?,
target_name: row.get(base + 1)?,
target_id: row.get(base + 2)?,
kind,
file_path: row.get(base + 4)?,
line: row.get(base + 5)?,
provenance,
})
}
fn row_to_edge(row: &rusqlite::Row<'_>) -> rusqlite::Result<Edge> {
edge_from_row(row, 1)
}
#[cfg(test)]
mod tests {
use super::*;
fn test_symbol(name: &str, kind: SymbolKind, file: &str, line: u32) -> Symbol {
Symbol::new(name, kind, file, line, line + 5, 0, 100, None)
}
#[test]
fn test_normalize_snake_case() {
assert_eq!(normalize_symbol_name("validate_token"), "validate token");
assert_eq!(
normalize_symbol_name("get_current_user"),
"get current user"
);
assert_eq!(normalize_symbol_name("_private_method"), "private method");
assert_eq!(normalize_symbol_name("__init__"), "init");
}
#[test]
fn test_normalize_camel_case() {
assert_eq!(normalize_symbol_name("validateToken"), "validate token");
assert_eq!(normalize_symbol_name("getCurrentUser"), "get current user");
assert_eq!(normalize_symbol_name("findByToken"), "find by token");
}
#[test]
fn test_normalize_pascal_case() {
assert_eq!(
normalize_symbol_name("DatabaseConnection"),
"database connection"
);
assert_eq!(normalize_symbol_name("AuthService"), "auth service");
assert_eq!(normalize_symbol_name("TokenError"), "token error");
}
#[test]
fn test_normalize_screaming_snake() {
assert_eq!(normalize_symbol_name("TOKEN_EXPIRY"), "token expiry");
assert_eq!(normalize_symbol_name("MAX_RETRY_COUNT"), "max retry count");
}
#[test]
fn test_normalize_acronyms() {
assert_eq!(
normalize_symbol_name("getHTTPResponse"),
"get http response"
);
assert_eq!(normalize_symbol_name("parseJSON"), "parse json");
assert_eq!(normalize_symbol_name("HTMLParser"), "html parser");
}
#[test]
fn test_normalize_single_word() {
assert_eq!(normalize_symbol_name("validate"), "validate");
assert_eq!(normalize_symbol_name("Token"), "token");
}
#[test]
fn test_normalize_empty_and_special() {
assert_eq!(normalize_symbol_name(""), "");
assert_eq!(normalize_symbol_name("_"), "");
assert_eq!(normalize_symbol_name("___"), "");
}
#[test]
fn test_insert_and_query_symbols() {
let db = Database::open_memory().unwrap();
let sym = test_symbol("my_func", SymbolKind::Function, "test.py", 10);
db.insert_symbol(&sym).unwrap();
let outline = db.outline("test.py").unwrap();
assert_eq!(outline.len(), 1);
assert_eq!(outline[0].name, "my_func");
}
#[test]
fn test_optimize_populates_planner_stats() {
let db = Database::open_memory().unwrap();
let syms: Vec<_> = (0..2000)
.map(|i| test_symbol(&format!("f{i}"), SymbolKind::Function, "a.py", i + 1))
.collect();
db.insert_symbols(&syms).unwrap();
db.optimize().unwrap();
let analyzed: i64 = db
.conn
.query_row(
"SELECT COUNT(*) FROM sqlite_master WHERE name = 'sqlite_stat1'",
[],
|row| row.get(0),
)
.unwrap();
assert_eq!(analyzed, 1, "PRAGMA optimize must create sqlite_stat1");
}
#[test]
fn test_optimize_is_safe_on_empty_db() {
let db = Database::open_memory().unwrap();
db.optimize().unwrap(); }
#[test]
fn is_empty_reflects_symbol_presence() {
let db = Database::open_memory().unwrap();
assert!(db.is_empty().unwrap(), "fresh DB should be empty");
db.insert_symbol(&test_symbol("f", SymbolKind::Function, "a.py", 1))
.unwrap();
assert!(!db.is_empty().unwrap(), "DB with a symbol is not empty");
}
#[test]
fn test_insert_and_query_edges() {
let db = Database::open_memory().unwrap();
let caller = test_symbol("caller_fn", SymbolKind::Function, "a.py", 1);
let callee = test_symbol("callee_fn", SymbolKind::Function, "b.py", 1);
db.insert_symbol(&caller).unwrap();
db.insert_symbol(&callee).unwrap();
let edge = Edge {
source_id: caller.id.clone(),
target_name: "callee_fn".to_string(),
target_id: None,
kind: EdgeKind::Calls,
file_path: "a.py".to_string(),
line: 5,
provenance: None,
};
db.insert_edge(&edge).unwrap();
let refs = db.refs("callee_fn", None).unwrap();
assert_eq!(refs.len(), 1);
assert_eq!(refs[0].0.source_id, caller.id);
}
#[test]
fn test_edge_resolution() {
let db = Database::open_memory().unwrap();
let sym_a = test_symbol("process", SymbolKind::Function, "a.py", 1);
let sym_b = test_symbol("helper", SymbolKind::Function, "a.py", 20);
db.insert_symbols(&[sym_a.clone(), sym_b.clone()]).unwrap();
let edge = Edge {
source_id: sym_a.id.clone(),
target_name: "helper".to_string(),
target_id: None,
kind: EdgeKind::Calls,
file_path: "a.py".to_string(),
line: 5,
provenance: None,
};
db.insert_edge(&edge).unwrap();
let resolved = db.resolve_edges().unwrap();
assert_eq!(resolved, 1);
}
#[test]
fn test_stats() {
let db = Database::open_memory().unwrap();
let file = FileInfo {
path: "test.py".to_string(),
last_modified: 0.0,
hash: "abc".to_string(),
language: "python".to_string(),
num_symbols: 2,
};
db.upsert_file(&file).unwrap();
let sym = test_symbol("foo", SymbolKind::Function, "test.py", 1);
db.insert_symbol(&sym).unwrap();
let stats = db.stats().unwrap();
assert_eq!(stats.num_files, 1);
assert_eq!(stats.num_symbols, 1);
}
#[test]
fn savings_breakdown_empty_returns_zero() {
let db = Database::open_memory().unwrap();
let r = db.savings_breakdown().unwrap();
assert_eq!(r.total_queries, 0);
assert_eq!(r.tokens_used_cartog, 0);
assert_eq!(r.tokens_used_grep, 0);
assert_eq!(r.estimated_tokens_saved, 0);
assert_eq!(r.percent_saved, 0);
assert!(r.by_tool.is_empty());
assert!(r.by_source.is_empty());
assert_eq!(r.baseline_delta, TOKENS_SAVED_PER_QUERY);
}
#[test]
fn log_query_persists_rows_aggregated_by_tool_and_source() {
let db = Database::open_memory().unwrap();
db.log_query("search", "cli");
db.log_query("search", "cli");
db.log_query("refs", "cli");
db.log_query("search", "mcp");
db.log_query("impact", "mcp");
let r = db.savings_breakdown().unwrap();
assert_eq!(r.total_queries, 5);
assert_eq!(r.tokens_used_cartog, 5 * TOKENS_PER_QUERY_CARTOG as u64);
assert_eq!(r.tokens_used_grep, 5 * TOKENS_PER_QUERY_GREP as u64);
assert_eq!(r.estimated_tokens_saved, 5 * TOKENS_SAVED_PER_QUERY as u64);
assert_eq!(r.percent_saved, 83);
let tool_counts: Vec<_> = r.by_tool.iter().map(|(t, c)| (t.as_str(), *c)).collect();
assert_eq!(tool_counts, vec![("search", 3), ("impact", 1), ("refs", 1)]);
let src_counts: Vec<_> = r.by_source.iter().map(|(s, c)| (s.as_str(), *c)).collect();
assert_eq!(src_counts, vec![("cli", 3), ("mcp", 2)]);
}
#[test]
fn log_query_noop_on_read_only_attach() {
let dir = tempfile::TempDir::new().unwrap();
let db_path = dir.path().join("test.db");
{
let primary = Database::open(&db_path, 384).unwrap();
primary.log_query("search", "cli"); }
let reader = Database::open_readonly(&db_path).unwrap();
assert!(reader.is_read_only());
reader.log_query("search", "mcp");
reader.log_query("refs", "mcp");
let r = reader.savings_breakdown().unwrap();
assert_eq!(r.total_queries, 1);
assert_eq!(r.by_tool, vec![("search".to_string(), 1)]);
}
#[test]
fn test_resolve_edges_same_dir_priority() {
let db = Database::open_memory().unwrap();
let caller = test_symbol("process", SymbolKind::Function, "src/main.py", 1);
let same_dir = test_symbol("helper", SymbolKind::Function, "src/utils.py", 1);
let other_dir = test_symbol("helper", SymbolKind::Function, "lib/utils.py", 1);
db.insert_symbols(&[caller.clone(), same_dir.clone(), other_dir.clone()])
.unwrap();
let edge = Edge {
source_id: caller.id.clone(),
target_name: "helper".to_string(),
target_id: None,
kind: EdgeKind::Calls,
file_path: "src/main.py".to_string(),
line: 5,
provenance: None,
};
db.insert_edge(&edge).unwrap();
let resolved = db.resolve_edges().unwrap();
assert_eq!(resolved, 1);
let refs = db.refs("helper", None).unwrap();
let call_edge = refs
.iter()
.find(|(e, _)| e.kind == EdgeKind::Calls)
.unwrap();
assert_eq!(call_edge.0.target_id.as_ref().unwrap(), &same_dir.id);
}
#[test]
fn test_resolve_edges_ambiguous_no_resolve() {
let db = Database::open_memory().unwrap();
let caller = test_symbol("process", SymbolKind::Function, "app/main.py", 1);
let sym1 = test_symbol("helper", SymbolKind::Function, "pkg_a/utils.py", 1);
let sym2 = test_symbol("helper", SymbolKind::Function, "pkg_b/utils.py", 1);
db.insert_symbols(&[caller.clone(), sym1, sym2]).unwrap();
let edge = Edge {
source_id: caller.id.clone(),
target_name: "helper".to_string(),
target_id: None,
kind: EdgeKind::Calls,
file_path: "app/main.py".to_string(),
line: 5,
provenance: None,
};
db.insert_edge(&edge).unwrap();
let resolved = db.resolve_edges().unwrap();
assert_eq!(resolved, 0);
}
#[test]
fn test_resolve_edges_same_file_priority() {
let db = Database::open_memory().unwrap();
let caller = test_symbol("process", SymbolKind::Function, "a.py", 1);
let same_file = test_symbol("helper", SymbolKind::Function, "a.py", 20);
let other_file = test_symbol("helper", SymbolKind::Function, "b.py", 1);
db.insert_symbols(&[caller.clone(), same_file.clone(), other_file])
.unwrap();
let edge = Edge {
source_id: caller.id.clone(),
target_name: "helper".to_string(),
target_id: None,
kind: EdgeKind::Calls,
file_path: "a.py".to_string(),
line: 5,
provenance: None,
};
db.insert_edge(&edge).unwrap();
let resolved = db.resolve_edges().unwrap();
assert_eq!(resolved, 1);
let refs = db.refs("helper", None).unwrap();
let call_edge = refs
.iter()
.find(|(e, _)| e.kind == EdgeKind::Calls)
.unwrap();
assert_eq!(call_edge.0.target_id.as_ref().unwrap(), &same_file.id);
}
#[test]
fn test_resolve_edges_php_fqcn_target_same_file() {
let db = Database::open_memory().unwrap();
let base = test_symbol("BaseService", SymbolKind::Class, "auth/service.php", 1);
let child = test_symbol("AuthService", SymbolKind::Class, "auth/service.php", 30);
db.insert_symbols(&[base.clone(), child.clone()]).unwrap();
db.insert_edge(&Edge::new(
&child.id,
"App\\Auth\\BaseService",
EdgeKind::Inherits,
"auth/service.php",
30,
))
.unwrap();
let resolved = db.resolve_edges().unwrap();
assert_eq!(resolved, 1);
let refs = db.refs("App\\Auth\\BaseService", None).unwrap();
assert_eq!(refs[0].0.target_id.as_ref().unwrap(), &base.id);
}
#[test]
fn test_resolve_edges_php_fqcn_target_prefers_class_over_import_symbol() {
let db = Database::open_memory().unwrap();
let class_sym = test_symbol("AppError", SymbolKind::Class, "exceptions.php", 1);
let child = test_symbol("TokenError", SymbolKind::Class, "auth/tokens.php", 10);
let import_sym = test_symbol("App\\AppError", SymbolKind::Import, "auth/tokens.php", 1);
db.insert_symbols(&[class_sym.clone(), child.clone(), import_sym])
.unwrap();
db.insert_edge(&Edge::new(
&child.id,
"App\\AppError",
EdgeKind::Inherits,
"auth/tokens.php",
10,
))
.unwrap();
db.resolve_edges().unwrap();
let refs = db.refs("App\\AppError", None).unwrap();
let inherits = refs
.iter()
.find(|(e, _)| e.kind == EdgeKind::Inherits)
.unwrap();
assert_eq!(inherits.0.target_id.as_ref().unwrap(), &class_sym.id);
}
#[test]
fn test_hierarchy_finds_children_of_fqcn_resolved_target() {
let db = Database::open_memory().unwrap();
let base = test_symbol("BaseService", SymbolKind::Class, "auth/service.php", 1);
let child = test_symbol(
"PaymentProcessor",
SymbolKind::Class,
"services/payment.php",
5,
);
db.insert_symbols(&[base.clone(), child.clone()]).unwrap();
db.insert_edge(&Edge::new(
&child.id,
"App\\Auth\\BaseService",
EdgeKind::Inherits,
"services/payment.php",
5,
))
.unwrap();
db.resolve_edges().unwrap();
let pairs = db.hierarchy("BaseService").unwrap();
assert_eq!(
pairs,
vec![("PaymentProcessor".to_string(), "BaseService".to_string())]
);
}
#[test]
fn test_resolve_edges_class_over_constructor() {
let db = Database::open_memory().unwrap();
let caller = test_symbol("handleLogin", SymbolKind::Method, "auth/Service.java", 10);
let logger_class = test_symbol("Logger", SymbolKind::Class, "util/Logger.java", 1);
let logger_ctor = test_symbol("Logger", SymbolKind::Method, "util/Logger.java", 5);
db.insert_symbols(&[caller.clone(), logger_class.clone(), logger_ctor])
.unwrap();
let edge = Edge {
source_id: caller.id.clone(),
target_name: "Logger".to_string(),
target_id: None,
kind: EdgeKind::References,
file_path: "auth/Service.java".to_string(),
line: 12,
provenance: None,
};
db.insert_edge(&edge).unwrap();
let resolved = db.resolve_edges().unwrap();
assert_eq!(resolved, 1);
let refs = db.refs("Logger", None).unwrap();
let ref_edge = refs
.iter()
.find(|(e, _)| e.kind == EdgeKind::References)
.unwrap();
assert_eq!(ref_edge.0.target_id.as_ref().unwrap(), &logger_class.id);
}
#[test]
fn test_resolve_edges_class_over_constructor_still_ambiguous_with_three() {
let db = Database::open_memory().unwrap();
let caller = test_symbol("main", SymbolKind::Function, "app.java", 1);
let sym_class = test_symbol("Foo", SymbolKind::Class, "a/Foo.java", 1);
let sym_ctor = test_symbol("Foo", SymbolKind::Method, "a/Foo.java", 5);
let sym_func = test_symbol("Foo", SymbolKind::Function, "b/Foo.java", 1);
db.insert_symbols(&[caller.clone(), sym_class, sym_ctor, sym_func])
.unwrap();
let edge = Edge {
source_id: caller.id.clone(),
target_name: "Foo".to_string(),
target_id: None,
kind: EdgeKind::Calls,
file_path: "app.java".to_string(),
line: 5,
provenance: None,
};
db.insert_edge(&edge).unwrap();
let resolved = db.resolve_edges().unwrap();
assert_eq!(resolved, 0);
}
#[test]
fn test_resolve_edges_multipass_import_then_call() {
let db = Database::open_memory().unwrap();
let import_sym = test_symbol("util.Logger", SymbolKind::Import, "auth/service.java", 1);
let caller = test_symbol("authenticate", SymbolKind::Method, "auth/service.java", 10);
let logger_class = test_symbol("Logger", SymbolKind::Class, "util/Logger.java", 1);
let logger_ctor = test_symbol("Logger", SymbolKind::Method, "util/Logger.java", 5);
db.insert_symbols(&[
import_sym.clone(),
caller.clone(),
logger_class.clone(),
logger_ctor,
])
.unwrap();
let import_edge = Edge {
source_id: import_sym.id.clone(),
target_name: "Logger".to_string(),
target_id: None,
kind: EdgeKind::Imports,
file_path: "auth/service.java".to_string(),
line: 1,
provenance: None,
};
db.insert_edge(&import_edge).unwrap();
let ref_edge = Edge {
source_id: caller.id.clone(),
target_name: "Logger".to_string(),
target_id: None,
kind: EdgeKind::References,
file_path: "auth/service.java".to_string(),
line: 15,
provenance: None,
};
db.insert_edge(&ref_edge).unwrap();
let resolved = db.resolve_edges().unwrap();
assert_eq!(resolved, 2);
let refs = db.refs("Logger", None).unwrap();
let reference = refs
.iter()
.find(|(e, _)| e.kind == EdgeKind::References)
.unwrap();
assert_eq!(reference.0.target_id.as_ref().unwrap(), &logger_class.id);
}
#[test]
fn test_resolve_edges_function_over_method() {
let db = Database::open_memory().unwrap();
let caller = test_symbol("process", SymbolKind::Function, "app/main.rb", 1);
let top_fn = test_symbol("get_logger", SymbolKind::Function, "utils/helpers.rb", 6);
let mod_method = test_symbol("get_logger", SymbolKind::Method, "utils/logging.rb", 6);
db.insert_symbols(&[caller.clone(), top_fn.clone(), mod_method])
.unwrap();
let edge = Edge {
source_id: caller.id.clone(),
target_name: "get_logger".to_string(),
target_id: None,
kind: EdgeKind::Calls,
file_path: "app/main.rb".to_string(),
line: 5,
provenance: None,
};
db.insert_edge(&edge).unwrap();
let resolved = db.resolve_edges().unwrap();
assert_eq!(resolved, 1);
let refs = db.refs("get_logger", None).unwrap();
let call_edge = refs
.iter()
.find(|(e, _)| e.kind == EdgeKind::Calls)
.unwrap();
assert_eq!(call_edge.0.target_id.as_ref().unwrap(), &top_fn.id);
}
#[test]
fn test_resolve_edges_two_functions_still_ambiguous() {
let db = Database::open_memory().unwrap();
let caller = test_symbol("main", SymbolKind::Function, "app.rb", 1);
let fn1 = test_symbol("helper", SymbolKind::Function, "a/utils.rb", 1);
let fn2 = test_symbol("helper", SymbolKind::Function, "b/utils.rb", 1);
db.insert_symbols(&[caller.clone(), fn1, fn2]).unwrap();
let edge = Edge {
source_id: caller.id.clone(),
target_name: "helper".to_string(),
target_id: None,
kind: EdgeKind::Calls,
file_path: "app.rb".to_string(),
line: 5,
provenance: None,
};
db.insert_edge(&edge).unwrap();
let resolved = db.resolve_edges().unwrap();
assert_eq!(resolved, 0);
}
#[test]
fn test_callees_query() {
let db = Database::open_memory().unwrap();
let caller = test_symbol("process", SymbolKind::Function, "a.py", 1);
let callee1 = test_symbol("fetch", SymbolKind::Function, "b.py", 1);
let callee2 = test_symbol("save", SymbolKind::Function, "c.py", 1);
db.insert_symbols(&[caller.clone(), callee1, callee2])
.unwrap();
db.insert_edges(&[
Edge {
source_id: caller.id.clone(),
target_name: "fetch".to_string(),
target_id: None,
kind: EdgeKind::Calls,
file_path: "a.py".to_string(),
line: 5,
provenance: None,
},
Edge {
source_id: caller.id.clone(),
target_name: "save".to_string(),
target_id: None,
kind: EdgeKind::Calls,
file_path: "a.py".to_string(),
line: 6,
provenance: None,
},
])
.unwrap();
let callees = db.callees("process").unwrap();
assert_eq!(callees.len(), 2);
let targets: Vec<&str> = callees.iter().map(|e| e.target_name.as_str()).collect();
assert!(targets.contains(&"fetch"));
assert!(targets.contains(&"save"));
}
#[test]
fn test_impact_transitive() {
let db = Database::open_memory().unwrap();
let a = test_symbol("a", SymbolKind::Function, "a.py", 1);
let b = test_symbol("b", SymbolKind::Function, "b.py", 1);
let c = test_symbol("c", SymbolKind::Function, "c.py", 1);
db.insert_symbols(&[a.clone(), b.clone(), c.clone()])
.unwrap();
db.insert_edges(&[
Edge {
source_id: b.id.clone(),
target_name: "a".to_string(),
target_id: Some(a.id.clone()),
kind: EdgeKind::Calls,
file_path: "b.py".to_string(),
line: 5,
provenance: None,
},
Edge {
source_id: c.id.clone(),
target_name: "b".to_string(),
target_id: Some(b.id.clone()),
kind: EdgeKind::Calls,
file_path: "c.py".to_string(),
line: 5,
provenance: None,
},
])
.unwrap();
let results = db.impact("a", 2).unwrap();
assert_eq!(results.len(), 2);
assert_eq!(results[0].1, 1); assert_eq!(results[1].1, 2); }
#[test]
fn test_impact_depth_zero_returns_empty() {
let db = Database::open_memory().unwrap();
let a = test_symbol("a", SymbolKind::Function, "a.py", 1);
db.insert_symbols(&[a]).unwrap();
assert!(db.impact("a", 0).unwrap().is_empty());
}
#[test]
fn test_impact_cycle_terminates() {
let db = Database::open_memory().unwrap();
let a = test_symbol("a", SymbolKind::Function, "a.py", 1);
let b = test_symbol("b", SymbolKind::Function, "b.py", 1);
db.insert_symbols(&[a.clone(), b.clone()]).unwrap();
db.insert_edges(&[
Edge {
source_id: a.id.clone(),
target_name: "b".to_string(),
target_id: Some(b.id.clone()),
kind: EdgeKind::Calls,
file_path: "a.py".to_string(),
line: 2,
provenance: None,
},
Edge {
source_id: b.id.clone(),
target_name: "a".to_string(),
target_id: Some(a.id.clone()),
kind: EdgeKind::Calls,
file_path: "b.py".to_string(),
line: 2,
provenance: None,
},
])
.unwrap();
let results = db.impact("a", 5).unwrap();
assert_eq!(results.len(), 2);
for (_, depth) in &results {
assert!(*depth >= 1 && *depth <= 5);
}
}
#[test]
fn test_impact_fanout_dedupes_by_edge() {
let db = Database::open_memory().unwrap();
let shared = test_symbol("shared", SymbolKind::Function, "s.py", 1);
let x = test_symbol("x", SymbolKind::Function, "x.py", 1);
let y = test_symbol("y", SymbolKind::Function, "y.py", 1);
db.insert_symbols(&[shared.clone(), x.clone(), y.clone()])
.unwrap();
db.insert_edges(&[
Edge {
source_id: x.id.clone(),
target_name: "shared".to_string(),
target_id: Some(shared.id.clone()),
kind: EdgeKind::Calls,
file_path: "x.py".to_string(),
line: 1,
provenance: None,
},
Edge {
source_id: y.id.clone(),
target_name: "shared".to_string(),
target_id: Some(shared.id.clone()),
kind: EdgeKind::Calls,
file_path: "y.py".to_string(),
line: 1,
provenance: None,
},
Edge {
source_id: y.id.clone(),
target_name: "x".to_string(),
target_id: Some(x.id.clone()),
kind: EdgeKind::Calls,
file_path: "y.py".to_string(),
line: 2,
provenance: None,
},
])
.unwrap();
let results = db.impact("shared", 3).unwrap();
assert_eq!(results.len(), 3);
}
fn chain_db() -> Database {
let db = Database::open_memory().unwrap();
let names = ["a", "b", "c", "d"];
let syms: Vec<Symbol> = names
.iter()
.map(|n| test_symbol(n, SymbolKind::Function, &format!("{n}.py"), 1))
.collect();
db.insert_symbols(&syms).unwrap();
let edges: Vec<Edge> = syms
.windows(2)
.map(|w| Edge {
source_id: w[0].id.clone(),
target_name: w[1].name.clone(),
target_id: Some(w[1].id.clone()),
kind: EdgeKind::Calls,
file_path: w[0].file_path.clone(),
line: 2,
provenance: None,
})
.collect();
db.insert_edges(&edges).unwrap();
db
}
#[test]
fn trace_returns_shortest_path_in_order() {
let db = chain_db();
let hops = db.trace("a", "d", 8).unwrap().expect("path a→d exists");
let names: Vec<&str> = hops.iter().map(|h| h.source_name.as_str()).collect();
assert_eq!(names, ["a", "b", "c"]);
assert_eq!(hops.last().unwrap().target_name, "d");
}
#[test]
fn trace_returns_none_when_unreachable() {
let db = chain_db();
assert!(db.trace("d", "a", 8).unwrap().is_none());
}
#[test]
fn trace_same_symbol_is_empty_path() {
let db = chain_db();
assert_eq!(db.trace("a", "a", 8).unwrap(), Some(Vec::new()));
}
#[test]
fn trace_respects_depth_limit() {
let db = chain_db();
assert!(db.trace("a", "d", 2).unwrap().is_none());
}
#[test]
fn trace_terminates_on_cycle() {
let db = Database::open_memory().unwrap();
let a = test_symbol("a", SymbolKind::Function, "a.py", 1);
let b = test_symbol("b", SymbolKind::Function, "b.py", 1);
db.insert_symbols(&[a.clone(), b.clone()]).unwrap();
db.insert_edges(&[
Edge {
source_id: a.id.clone(),
target_name: "b".to_string(),
target_id: Some(b.id.clone()),
kind: EdgeKind::Calls,
file_path: "a.py".to_string(),
line: 2,
provenance: None,
},
Edge {
source_id: b.id.clone(),
target_name: "a".to_string(),
target_id: Some(a.id.clone()),
kind: EdgeKind::Calls,
file_path: "b.py".to_string(),
line: 2,
provenance: None,
},
])
.unwrap();
let hops = db.trace("a", "b", 8).unwrap().expect("a→b exists");
assert_eq!(hops.len(), 1);
}
#[test]
fn trace_dense_cycle_does_not_loop_and_finds_target() {
let db = Database::open_memory().unwrap();
let names = ["a", "b", "c", "d"];
let syms: Vec<Symbol> = names
.iter()
.map(|n| test_symbol(n, SymbolKind::Function, &format!("{n}.py"), 1))
.collect();
db.insert_symbols(&syms).unwrap();
let mut edges = Vec::new();
for src in &syms {
for tgt in &syms {
if src.id != tgt.id {
edges.push(Edge {
source_id: src.id.clone(),
target_name: tgt.name.clone(),
target_id: Some(tgt.id.clone()),
kind: EdgeKind::Calls,
file_path: src.file_path.clone(),
line: 2,
provenance: None,
});
}
}
}
db.insert_edges(&edges).unwrap();
let hops = db.trace("a", "d", 20).unwrap().expect("a reaches d");
assert_eq!(hops.len(), 1, "shortest path in a clique is one hop");
assert_eq!(hops[0].source_name, "a");
assert_eq!(hops[0].target_name, "d");
}
#[test]
fn trace_unaffected_by_comma_in_symbol_ids() {
let db = Database::open_memory().unwrap();
let a = test_symbol("a", SymbolKind::Function, "a,b.py", 1);
let b = test_symbol("b", SymbolKind::Function, "c,d.py", 1);
let c = test_symbol("c", SymbolKind::Function, "e,f.py", 1);
db.insert_symbols(&[a.clone(), b.clone(), c.clone()])
.unwrap();
db.insert_edges(&[
Edge {
source_id: a.id.clone(),
target_name: "b".to_string(),
target_id: Some(b.id.clone()),
kind: EdgeKind::Calls,
file_path: a.file_path.clone(),
line: 2,
provenance: None,
},
Edge {
source_id: b.id.clone(),
target_name: "c".to_string(),
target_id: Some(c.id.clone()),
kind: EdgeKind::Calls,
file_path: b.file_path.clone(),
line: 2,
provenance: None,
},
])
.unwrap();
let hops = db
.trace("a", "c", 8)
.unwrap()
.expect("a→b→c despite commas");
assert_eq!(hops.len(), 2);
assert_eq!(hops[0].source_id, a.id);
assert_eq!(hops[1].source_id, b.id);
}
#[test]
fn trace_hop_carries_exact_source_id_for_overloaded_name() {
let db = Database::open_memory().unwrap();
let caller = test_symbol("caller", SymbolKind::Function, "m.py", 1);
let h1 = Symbol::new("helper", SymbolKind::Function, "m.py", 10, 12, 0, 5, None);
let h2 = Symbol::new("helper", SymbolKind::Method, "m.py", 20, 22, 6, 11, None);
db.insert_symbols(&[caller.clone(), h1.clone(), h2.clone()])
.unwrap();
db.insert_edges(&[Edge {
source_id: caller.id.clone(),
target_name: "helper".to_string(),
target_id: Some(h2.id.clone()),
kind: EdgeKind::Calls,
file_path: caller.file_path.clone(),
line: 2,
provenance: None,
}])
.unwrap();
let hops = db
.trace("caller", "helper", 8)
.unwrap()
.expect("caller→helper");
assert_eq!(hops.len(), 1);
assert_eq!(hops[0].source_id, caller.id, "hop names the exact source");
}
#[test]
fn test_hierarchy_query() {
let db = Database::open_memory().unwrap();
let parent = test_symbol("Animal", SymbolKind::Class, "a.py", 1);
let child = test_symbol("Dog", SymbolKind::Class, "a.py", 10);
db.insert_symbols(&[parent, child.clone()]).unwrap();
db.insert_edge(&Edge {
source_id: child.id.clone(),
target_name: "Animal".to_string(),
target_id: None,
kind: EdgeKind::Inherits,
file_path: "a.py".to_string(),
line: 10,
provenance: None,
})
.unwrap();
let pairs = db.hierarchy("Dog").unwrap();
assert_eq!(pairs.len(), 1);
assert_eq!(pairs[0].0, "Dog");
assert_eq!(pairs[0].1, "Animal");
}
#[test]
fn test_file_deps_query() {
let db = Database::open_memory().unwrap();
let import_sym = test_symbol("os", SymbolKind::Import, "main.py", 1);
db.insert_symbol(&import_sym).unwrap();
db.insert_edge(&Edge {
source_id: import_sym.id.clone(),
target_name: "os".to_string(),
target_id: None,
kind: EdgeKind::Imports,
file_path: "main.py".to_string(),
line: 1,
provenance: None,
})
.unwrap();
let deps = db.file_deps("main.py").unwrap();
assert_eq!(deps.len(), 1);
assert_eq!(deps[0].target_name, "os");
}
#[test]
fn test_remove_file_clears_all_data() {
let db = Database::open_memory().unwrap();
let sym = test_symbol("foo", SymbolKind::Function, "test.py", 1);
db.insert_symbol(&sym).unwrap();
db.insert_edge(&Edge {
source_id: sym.id.clone(),
target_name: "bar".to_string(),
target_id: None,
kind: EdgeKind::Calls,
file_path: "test.py".to_string(),
line: 5,
provenance: None,
})
.unwrap();
db.upsert_file(&FileInfo {
path: "test.py".to_string(),
last_modified: 0.0,
hash: "abc".to_string(),
language: "python".to_string(),
num_symbols: 1,
})
.unwrap();
db.remove_file("test.py").unwrap();
assert!(db.outline("test.py").unwrap().is_empty());
assert!(db.get_file("test.py").unwrap().is_none());
}
#[test]
fn test_refs_with_kind_filter() {
let db = Database::open_memory().unwrap();
let parent = test_symbol("AuthService", SymbolKind::Class, "a.py", 1);
let child = test_symbol("AdminService", SymbolKind::Class, "a.py", 20);
let caller = test_symbol("login", SymbolKind::Function, "b.py", 1);
db.insert_symbols(&[parent.clone(), child.clone(), caller.clone()])
.unwrap();
db.insert_edges(&[
Edge {
source_id: child.id.clone(),
target_name: "AuthService".to_string(),
target_id: None,
kind: EdgeKind::Inherits,
file_path: "a.py".to_string(),
line: 20,
provenance: None,
},
Edge {
source_id: caller.id.clone(),
target_name: "AuthService".to_string(),
target_id: None,
kind: EdgeKind::Calls,
file_path: "b.py".to_string(),
line: 5,
provenance: None,
},
])
.unwrap();
let all = db.refs("AuthService", None).unwrap();
assert_eq!(all.len(), 2);
let inherits = db.refs("AuthService", Some(EdgeKind::Inherits)).unwrap();
assert_eq!(inherits.len(), 1);
assert_eq!(inherits[0].0.kind, EdgeKind::Inherits);
let calls = db.refs("AuthService", Some(EdgeKind::Calls)).unwrap();
assert_eq!(calls.len(), 1);
assert_eq!(calls[0].0.kind, EdgeKind::Calls);
let raises = db.refs("AuthService", Some(EdgeKind::Raises)).unwrap();
assert!(raises.is_empty());
}
#[test]
fn test_refs_matches_via_resolved_target_id_short_name() {
let db = Database::open_memory().unwrap();
let base = test_symbol("BaseService", SymbolKind::Class, "auth/service.php", 1);
let child = test_symbol("AuthService", SymbolKind::Class, "auth/service.php", 30);
db.insert_symbols(&[base.clone(), child.clone()]).unwrap();
db.insert_edge(&Edge::new(
&child.id,
"App\\Auth\\BaseService",
EdgeKind::Inherits,
"auth/service.php",
30,
))
.unwrap();
db.resolve_edges().unwrap();
let by_short = db.refs("BaseService", None).unwrap();
assert_eq!(by_short.len(), 1, "short name must match via target_id");
assert_eq!(by_short[0].0.target_id.as_ref().unwrap(), &base.id);
let by_short_kind = db.refs("BaseService", Some(EdgeKind::Inherits)).unwrap();
assert_eq!(by_short_kind.len(), 1);
assert!(db
.refs("BaseService", Some(EdgeKind::Calls))
.unwrap()
.is_empty());
}
#[test]
fn test_search_exact_match_ranks_first() {
let db = Database::open_memory().unwrap();
let exact = test_symbol("parse_config", SymbolKind::Function, "a.py", 1);
let prefix = test_symbol("parse_config_file", SymbolKind::Function, "a.py", 10);
let substr = test_symbol("get_parse_config", SymbolKind::Function, "a.py", 20);
db.insert_symbols(&[exact.clone(), prefix, substr]).unwrap();
let results = db.search("parse_config", None, None, 20).unwrap();
assert_eq!(results.len(), 3);
assert_eq!(results[0].name, "parse_config");
}
#[test]
fn test_search_definitions_outrank_variables() {
let db = Database::open_memory().unwrap();
let var1 = test_symbol("token", SymbolKind::Variable, "routes/auth.ts", 20);
let var2 = test_symbol("token", SymbolKind::Variable, "routes/admin.ts", 11);
let class = test_symbol("TokenError", SymbolKind::Class, "auth/tokens.ts", 14);
let func = test_symbol("validateToken", SymbolKind::Function, "auth/tokens.ts", 59);
let subclass = test_symbol("ExpiredTokenError", SymbolKind::Class, "auth/tokens.ts", 22);
db.insert_symbols(&[var1, var2, class, func, subclass])
.unwrap();
let results = db.search("token", None, None, 20).unwrap();
assert_eq!(results.len(), 5);
let def_names: Vec<&str> = results[..3].iter().map(|s| s.name.as_str()).collect();
assert!(def_names.contains(&"TokenError"));
assert!(def_names.contains(&"validateToken"));
assert!(def_names.contains(&"ExpiredTokenError"));
assert_eq!(results[3].name, "token");
assert_eq!(results[4].name, "token");
}
#[test]
fn test_search_prefix_match() {
let db = Database::open_memory().unwrap();
let a = test_symbol("parse_config", SymbolKind::Function, "a.py", 1);
let b = test_symbol("parse_args", SymbolKind::Function, "a.py", 10);
let c = test_symbol("unrelated", SymbolKind::Function, "a.py", 20);
db.insert_symbols(&[a, b, c]).unwrap();
let results = db.search("parse", None, None, 20).unwrap();
assert_eq!(results.len(), 2);
let names: Vec<&str> = results.iter().map(|s| s.name.as_str()).collect();
assert!(names.contains(&"parse_config"));
assert!(names.contains(&"parse_args"));
}
#[test]
fn test_search_substring_match() {
let db = Database::open_memory().unwrap();
let a = test_symbol("parse_config", SymbolKind::Function, "a.py", 1);
let b = test_symbol("get_config", SymbolKind::Function, "a.py", 10);
let c = test_symbol("unrelated", SymbolKind::Function, "a.py", 20);
db.insert_symbols(&[a, b, c]).unwrap();
let results = db.search("config", None, None, 20).unwrap();
assert_eq!(results.len(), 2);
let names: Vec<&str> = results.iter().map(|s| s.name.as_str()).collect();
assert!(names.contains(&"parse_config"));
assert!(names.contains(&"get_config"));
}
#[test]
fn test_search_case_insensitive() {
let db = Database::open_memory().unwrap();
let sym = test_symbol("parse_config", SymbolKind::Function, "a.py", 1);
db.insert_symbol(&sym).unwrap();
let results = db.search("Parse", None, None, 20).unwrap();
assert_eq!(results.len(), 1);
assert_eq!(results[0].name, "parse_config");
}
#[test]
fn test_search_kind_filter() {
let db = Database::open_memory().unwrap();
let func = test_symbol("parse_config", SymbolKind::Function, "a.py", 1);
let class = test_symbol("parse_result", SymbolKind::Class, "a.py", 10);
db.insert_symbols(&[func, class]).unwrap();
let results = db
.search("parse", Some(SymbolKind::Function), None, 20)
.unwrap();
assert_eq!(results.len(), 1);
assert_eq!(results[0].kind, SymbolKind::Function);
}
#[test]
fn test_search_file_filter() {
let db = Database::open_memory().unwrap();
let a = test_symbol("parse_config", SymbolKind::Function, "src/a.rs", 1);
let b = test_symbol("parse_config", SymbolKind::Function, "src/b.rs", 1);
db.insert_symbols(&[a, b]).unwrap();
let results = db.search("parse", None, Some("src/a.rs"), 20).unwrap();
assert_eq!(results.len(), 1);
assert_eq!(results[0].file_path, "src/a.rs");
}
#[test]
fn test_search_empty_query_returns_error() {
let db = Database::open_memory().unwrap();
let err = db.search("", None, None, 20).unwrap_err();
assert!(err.to_string().contains("cannot be empty"));
}
#[test]
fn test_search_zero_limit_returns_error() {
let db = Database::open_memory().unwrap();
let err = db.search("parse", None, None, 0).unwrap_err();
assert!(err.to_string().contains("at least 1"));
}
#[test]
fn test_search_limit_caps_results() {
let db = Database::open_memory().unwrap();
for i in 0..5u32 {
let sym = test_symbol(&format!("fn_{i}"), SymbolKind::Function, "a.py", i * 10 + 1);
db.insert_symbol(&sym).unwrap();
}
let results = db.search("fn", None, None, 3).unwrap();
assert_eq!(results.len(), 3);
}
#[test]
fn test_search_limit_one_returns_top_ranked() {
let db = Database::open_memory().unwrap();
let exact = test_symbol("resolve", SymbolKind::Function, "a.py", 1);
let prefix = test_symbol("resolve_edges", SymbolKind::Function, "a.py", 10);
db.insert_symbols(&[exact, prefix]).unwrap();
let results = db.search("resolve", None, None, 1).unwrap();
assert_eq!(results.len(), 1);
assert_eq!(results[0].name, "resolve");
}
#[test]
fn test_search_wildcard_chars_treated_as_literals() {
let db = Database::open_memory().unwrap();
let sym = test_symbol("get_foo", SymbolKind::Function, "a.py", 1);
let unrelated = test_symbol("getXfoo", SymbolKind::Function, "a.py", 10);
db.insert_symbols(&[sym, unrelated]).unwrap();
let results = db.search("get_foo", None, None, 20).unwrap();
assert_eq!(results.len(), 1);
assert_eq!(results[0].name, "get_foo");
}
#[test]
fn test_search_percent_treated_as_literal() {
let db = Database::open_memory().unwrap();
let sym = test_symbol("get_config", SymbolKind::Function, "a.py", 1);
db.insert_symbol(&sym).unwrap();
let results = db.search("%", None, None, 20).unwrap();
assert!(results.is_empty(), "% should not act as a wildcard");
}
#[test]
fn test_upsert_and_get_symbol_content() {
let db = Database::open_memory().unwrap();
let sym = test_symbol("my_func", SymbolKind::Function, "a.py", 1);
db.insert_symbol(&sym).unwrap();
db.upsert_symbol_content(
&sym.id,
"my_func",
"def my_func(): pass",
"// File: a.py\n// Type: function\n// Name: my_func",
)
.unwrap();
let result = db.get_symbol_content(&sym.id).unwrap();
assert!(result.is_some());
let (content, header) = result.unwrap();
assert_eq!(content, "def my_func(): pass");
assert!(header.contains("my_func"));
}
#[test]
fn test_insert_symbol_contents_batch() {
let db = Database::open_memory().unwrap();
let sym1 = test_symbol("foo", SymbolKind::Function, "a.py", 1);
let sym2 = test_symbol("bar", SymbolKind::Function, "a.py", 10);
db.insert_symbols(&[sym1.clone(), sym2.clone()]).unwrap();
let items = vec![
(
sym1.id.clone(),
"foo".to_string(),
"def foo(): pass".to_string(),
"header1".to_string(),
),
(
sym2.id.clone(),
"bar".to_string(),
"def bar(): pass".to_string(),
"header2".to_string(),
),
];
db.insert_symbol_contents(&items).unwrap();
assert_eq!(db.symbol_content_count().unwrap(), 2);
assert!(db.get_symbol_content(&sym1.id).unwrap().is_some());
assert!(db.get_symbol_content(&sym2.id).unwrap().is_some());
}
#[test]
fn test_clear_symbol_content_for_file() {
let db = Database::open_memory().unwrap();
let sym1 = test_symbol("foo", SymbolKind::Function, "a.py", 1);
let sym2 = test_symbol("bar", SymbolKind::Function, "b.py", 1);
db.insert_symbols(&[sym1.clone(), sym2.clone()]).unwrap();
db.upsert_symbol_content(&sym1.id, "foo", "content1", "header1")
.unwrap();
db.upsert_symbol_content(&sym2.id, "bar", "content2", "header2")
.unwrap();
assert_eq!(db.symbol_content_count().unwrap(), 2);
db.clear_symbol_content_for_file("a.py").unwrap();
assert_eq!(db.symbol_content_count().unwrap(), 1);
assert!(db.get_symbol_content(&sym1.id).unwrap().is_none());
assert!(db.get_symbol_content(&sym2.id).unwrap().is_some());
}
#[test]
fn test_fts5_search_by_content() {
let db = Database::open_memory().unwrap();
let sym = test_symbol("validate_token", SymbolKind::Function, "auth.py", 1);
db.insert_symbol(&sym).unwrap();
db.upsert_symbol_content(
&sym.id,
"validate_token",
"def validate_token(token: str) -> bool:\n return token.is_valid()",
"// File: auth.py",
)
.unwrap();
let results = db.fts5_search("\"validate\"", 10).unwrap();
assert!(!results.is_empty());
assert_eq!(results[0], sym.id);
}
#[test]
fn test_fts5_search_no_match() {
let db = Database::open_memory().unwrap();
let sym = test_symbol("foo", SymbolKind::Function, "a.py", 1);
db.insert_symbol(&sym).unwrap();
db.upsert_symbol_content(&sym.id, "foo", "def foo(): pass", "header")
.unwrap();
let results = db.fts5_search("\"nonexistent_term_xyz\"", 10).unwrap();
assert!(results.is_empty());
}
#[test]
fn fts5_drops_old_content_when_symbol_content_is_replaced() {
let db = Database::open_memory().unwrap();
let sym = test_symbol("load", SymbolKind::Function, "a.py", 1);
db.insert_symbol(&sym).unwrap();
db.upsert_symbol_content(&sym.id, "load", "key = ghp_oldsecrettoken_value", "h")
.unwrap();
assert!(!db
.fts5_search("\"ghp_oldsecrettoken_value\"", 10)
.unwrap()
.is_empty());
db.upsert_symbol_content(&sym.id, "load", "key = [REDACTED_SECRET]", "h")
.unwrap();
let stale: i64 = db
.conn
.query_row(
"SELECT count(*) FROM symbol_fts WHERE symbol_fts MATCH 'ghp_oldsecrettoken_value'",
[],
|r| r.get(0),
)
.unwrap();
assert_eq!(stale, 0, "old plaintext must not remain in the FTS index");
assert_eq!(db.symbol_content_count().unwrap(), 1);
}
#[test]
fn test_get_or_create_embedding_id() {
let db = Database::open_memory().unwrap();
let id1 = db.get_or_create_embedding_id("a.py:foo:1").unwrap();
let id2 = db.get_or_create_embedding_id("a.py:foo:1").unwrap();
let id3 = db.get_or_create_embedding_id("b.py:bar:5").unwrap();
assert_eq!(id1, id2, "same symbol should return same ID");
assert_ne!(id1, id3, "different symbols should get different IDs");
}
#[test]
fn test_symbol_id_for_embedding() {
let db = Database::open_memory().unwrap();
let eid = db.get_or_create_embedding_id("test:sym:1").unwrap();
let sym_id = db.symbol_id_for_embedding(eid).unwrap();
assert_eq!(sym_id, Some("test:sym:1".to_string()));
let none = db.symbol_id_for_embedding(99999).unwrap();
assert!(none.is_none());
}
#[test]
fn test_symbol_ids_for_embeddings_batch() {
let db = Database::open_memory().unwrap();
let eid1 = db.get_or_create_embedding_id("a:foo:1").unwrap();
let eid2 = db.get_or_create_embedding_id("b:bar:2").unwrap();
let results = db.symbol_ids_for_embeddings(&[eid1, eid2]).unwrap();
assert_eq!(results.len(), 2);
}
#[test]
fn test_upsert_and_search_embedding() {
let db = Database::open_memory().unwrap();
let eid = db.get_or_create_embedding_id("a:foo:1").unwrap();
let mut embedding = vec![0.0f32; 384];
embedding[0] = 1.0;
let bytes: Vec<u8> = embedding.iter().flat_map(|f| f.to_le_bytes()).collect();
db.upsert_embedding(eid, &bytes).unwrap();
let query = bytes.clone();
let results = db.vector_search(&query, 5).unwrap();
assert_eq!(results.len(), 1);
assert_eq!(results[0].0, eid);
assert!(
results[0].1 < 0.01,
"self-match should have near-zero distance"
);
}
#[test]
fn test_insert_embeddings_batch() {
let db = Database::open_memory().unwrap();
let eid1 = db.get_or_create_embedding_id("a:foo:1").unwrap();
let eid2 = db.get_or_create_embedding_id("b:bar:2").unwrap();
let make_vec = |val: f32| -> Vec<u8> {
let v = vec![val; 384];
v.iter().flat_map(|f| f.to_le_bytes()).collect()
};
let items = vec![(eid1, make_vec(0.1)), (eid2, make_vec(0.9))];
db.insert_embeddings(&items).unwrap();
assert_eq!(db.embedding_count().unwrap(), 2);
}
#[test]
fn test_has_embedding() {
let db = Database::open_memory().unwrap();
assert!(!db.has_embedding("nonexistent").unwrap());
let eid = db.get_or_create_embedding_id("a:foo:1").unwrap();
assert!(!db.has_embedding("a:foo:1").unwrap());
let bytes: Vec<u8> = vec![0.0f32; 384]
.iter()
.flat_map(|f| f.to_le_bytes())
.collect();
db.upsert_embedding(eid, &bytes).unwrap();
assert!(db.has_embedding("a:foo:1").unwrap());
}
#[test]
fn test_clear_all_embeddings() {
let db = Database::open_memory().unwrap();
let eid1 = db.get_or_create_embedding_id("a:foo:1").unwrap();
let eid2 = db.get_or_create_embedding_id("b:bar:2").unwrap();
let bytes: Vec<u8> = vec![0.0f32; 384]
.iter()
.flat_map(|f| f.to_le_bytes())
.collect();
db.upsert_embedding(eid1, &bytes).unwrap();
db.upsert_embedding(eid2, &bytes).unwrap();
assert_eq!(db.embedding_count().unwrap(), 2);
db.clear_all_embeddings().unwrap();
assert_eq!(db.embedding_count().unwrap(), 0);
}
#[test]
fn embedding_count_excludes_orphan_map_rows() {
let db = Database::open_memory().unwrap();
let _eid = db.get_or_create_embedding_id("a:foo:1").unwrap();
assert_eq!(db.embedding_count().unwrap(), 0);
let eid = db.get_or_create_embedding_id("a:foo:1").unwrap();
let bytes: Vec<u8> = vec![0.0f32; 384]
.iter()
.flat_map(|f| f.to_le_bytes())
.collect();
db.upsert_embedding(eid, &bytes).unwrap();
assert_eq!(db.embedding_count().unwrap(), 1);
}
#[test]
fn test_symbols_needing_embeddings() {
let db = Database::open_memory().unwrap();
let sym1 = test_symbol("foo", SymbolKind::Function, "a.py", 1);
let sym2 = test_symbol("bar", SymbolKind::Function, "a.py", 10);
db.insert_symbols(&[sym1.clone(), sym2.clone()]).unwrap();
db.upsert_symbol_content(&sym1.id, "foo", "def foo(): pass", "header")
.unwrap();
db.upsert_symbol_content(&sym2.id, "bar", "def bar(): pass", "header")
.unwrap();
let needing = db.symbols_needing_embeddings().unwrap();
assert_eq!(needing.len(), 2);
let eid = db.get_or_create_embedding_id(&sym1.id).unwrap();
let bytes: Vec<u8> = vec![0.0f32; 384]
.iter()
.flat_map(|f| f.to_le_bytes())
.collect();
db.upsert_embedding(eid, &bytes).unwrap();
let needing = db.symbols_needing_embeddings().unwrap();
assert_eq!(needing.len(), 1);
assert_eq!(needing[0], sym2.id);
}
#[test]
fn test_clear_rag_data_for_file() {
let db = Database::open_memory().unwrap();
let sym1 = test_symbol("foo", SymbolKind::Function, "a.py", 1);
let sym2 = test_symbol("bar", SymbolKind::Function, "b.py", 1);
db.insert_symbols(&[sym1.clone(), sym2.clone()]).unwrap();
db.upsert_symbol_content(&sym1.id, "foo", "content1", "header1")
.unwrap();
db.upsert_symbol_content(&sym2.id, "bar", "content2", "header2")
.unwrap();
let eid1 = db.get_or_create_embedding_id(&sym1.id).unwrap();
let eid2 = db.get_or_create_embedding_id(&sym2.id).unwrap();
let bytes: Vec<u8> = vec![0.0f32; 384]
.iter()
.flat_map(|f| f.to_le_bytes())
.collect();
db.upsert_embedding(eid1, &bytes).unwrap();
db.upsert_embedding(eid2, &bytes).unwrap();
db.clear_rag_data_for_file("a.py").unwrap();
assert!(db.get_symbol_content(&sym1.id).unwrap().is_none());
assert!(!db.has_embedding(&sym1.id).unwrap());
assert!(db.get_symbol_content(&sym2.id).unwrap().is_some());
assert!(db.has_embedding(&sym2.id).unwrap());
}
#[test]
fn clear_embeddings_for_symbols_drops_only_named_ids() {
let db = Database::open_memory().unwrap();
let sym1 = test_symbol("foo", SymbolKind::Function, "a.py", 1);
let sym2 = test_symbol("bar", SymbolKind::Function, "a.py", 10);
db.insert_symbols(&[sym1.clone(), sym2.clone()]).unwrap();
db.upsert_symbol_content(&sym1.id, "foo", "def foo(): pass", "header")
.unwrap();
db.upsert_symbol_content(&sym2.id, "bar", "def bar(): pass", "header")
.unwrap();
let bytes: Vec<u8> = vec![0.0f32; 384]
.iter()
.flat_map(|f| f.to_le_bytes())
.collect();
for sym in [&sym1, &sym2] {
let eid = db.get_or_create_embedding_id(&sym.id).unwrap();
db.upsert_embedding(eid, &bytes).unwrap();
}
assert_eq!(db.embedding_count().unwrap(), 2);
let tx = db.begin_indexing_tx().unwrap();
db.clear_embeddings_for_symbols_in_tx(std::slice::from_ref(&sym1.id))
.unwrap();
tx.commit().unwrap();
assert!(!db.has_embedding(&sym1.id).unwrap());
assert!(db.has_embedding(&sym2.id).unwrap());
assert!(db.get_symbol_content(&sym1.id).unwrap().is_some());
let needing = db.symbols_needing_embeddings().unwrap();
assert_eq!(needing, vec![sym1.id.clone()]);
}
#[test]
fn clear_embeddings_for_symbols_is_noop_for_unembedded_id() {
let db = Database::open_memory().unwrap();
let sym = test_symbol("foo", SymbolKind::Function, "a.py", 1);
db.insert_symbols(std::slice::from_ref(&sym)).unwrap();
db.upsert_symbol_content(&sym.id, "foo", "def foo(): pass", "header")
.unwrap();
let tx = db.begin_indexing_tx().unwrap();
db.clear_embeddings_for_symbols_in_tx(std::slice::from_ref(&sym.id))
.unwrap();
tx.commit().unwrap();
assert_eq!(db.embedding_count().unwrap(), 0);
}
#[test]
fn test_all_content_symbol_ids() {
let db = Database::open_memory().unwrap();
let sym1 = test_symbol("foo", SymbolKind::Function, "a.py", 1);
let sym2 = test_symbol("bar", SymbolKind::Function, "b.py", 1);
db.insert_symbols(&[sym1.clone(), sym2.clone()]).unwrap();
db.upsert_symbol_content(&sym1.id, "foo", "content1", "header1")
.unwrap();
db.upsert_symbol_content(&sym2.id, "bar", "content2", "header2")
.unwrap();
let all = db.all_content_symbol_ids().unwrap();
assert_eq!(all.len(), 2);
}
#[test]
fn test_symbols_needing_embeddings_excludes_variables() {
let db = Database::open_memory().unwrap();
let func = test_symbol("process", SymbolKind::Function, "a.py", 1);
let var = test_symbol("MAX_RETRIES", SymbolKind::Variable, "a.py", 10);
let cls = test_symbol("Service", SymbolKind::Class, "a.py", 20);
db.insert_symbols(&[func.clone(), var.clone(), cls.clone()])
.unwrap();
db.upsert_symbol_content(&func.id, "process", "def process(): pass", "header")
.unwrap();
db.upsert_symbol_content(&var.id, "MAX_RETRIES", "MAX_RETRIES = 3", "header")
.unwrap();
db.upsert_symbol_content(&cls.id, "Service", "class Service: pass", "header")
.unwrap();
let needing = db.symbols_needing_embeddings().unwrap();
assert_eq!(needing.len(), 2);
assert!(!needing.contains(&var.id), "variables should be excluded");
assert!(needing.contains(&func.id));
assert!(needing.contains(&cls.id));
}
#[test]
fn test_all_content_symbol_ids_excludes_variables() {
let db = Database::open_memory().unwrap();
let func = test_symbol("foo", SymbolKind::Function, "a.py", 1);
let var = test_symbol("MY_VAR", SymbolKind::Variable, "a.py", 10);
let method = test_symbol("bar", SymbolKind::Method, "a.py", 20);
db.insert_symbols(&[func.clone(), var.clone(), method.clone()])
.unwrap();
db.upsert_symbol_content(&func.id, "foo", "def foo(): pass", "header")
.unwrap();
db.upsert_symbol_content(&var.id, "MY_VAR", "MY_VAR = 42", "header")
.unwrap();
db.upsert_symbol_content(&method.id, "bar", "def bar(self): pass", "header")
.unwrap();
let all = db.all_content_symbol_ids().unwrap();
assert_eq!(all.len(), 2, "variables should be excluded");
assert!(!all.contains(&var.id));
}
#[test]
fn test_get_symbol_contents_batch() {
let db = Database::open_memory().unwrap();
let sym1 = test_symbol("foo", SymbolKind::Function, "a.py", 1);
let sym2 = test_symbol("bar", SymbolKind::Function, "a.py", 10);
let sym3 = test_symbol("baz", SymbolKind::Function, "a.py", 20);
db.insert_symbols(&[sym1.clone(), sym2.clone(), sym3.clone()])
.unwrap();
db.upsert_symbol_content(&sym1.id, "foo", "def foo(): pass", "h1")
.unwrap();
db.upsert_symbol_content(&sym2.id, "bar", "def bar(): pass", "h2")
.unwrap();
let ids = vec![sym1.id.clone(), sym2.id.clone(), sym3.id.clone()];
let map = db.get_symbol_contents_batch(&ids).unwrap();
assert_eq!(map.len(), 2);
assert!(map.contains_key(&sym1.id));
assert!(map.contains_key(&sym2.id));
assert!(!map.contains_key(&sym3.id));
assert_eq!(map[&sym1.id].0, "def foo(): pass");
}
#[test]
fn test_get_symbol_contents_batch_empty() {
let db = Database::open_memory().unwrap();
let map = db.get_symbol_contents_batch(&[]).unwrap();
assert!(map.is_empty());
}
#[test]
fn test_get_symbol_by_id() {
let db = Database::open_memory().unwrap();
let sym = test_symbol("foo", SymbolKind::Function, "a.py", 1);
db.insert_symbol(&sym).unwrap();
let found = db.get_symbol(&sym.id).unwrap();
assert!(found.is_some());
assert_eq!(found.unwrap().name, "foo");
let not_found = db.get_symbol("nonexistent").unwrap();
assert!(not_found.is_none());
}
#[test]
fn test_symbols_for_files_basic() {
let db = Database::open_memory().unwrap();
let s1 = test_symbol("func_a", SymbolKind::Function, "src/a.py", 1);
let s2 = test_symbol("func_b", SymbolKind::Function, "src/a.py", 10);
let s3 = test_symbol("ClassC", SymbolKind::Class, "src/b.py", 1);
let s4 = test_symbol("func_d", SymbolKind::Function, "src/c.py", 1);
db.insert_symbols(&[s1, s2, s3, s4]).unwrap();
let files = vec!["src/a.py".to_string(), "src/b.py".to_string()];
let results = db.symbols_for_files(&files, None).unwrap();
assert_eq!(results.len(), 3);
assert_eq!(results[0].file_path, "src/a.py");
assert_eq!(results[2].file_path, "src/b.py");
}
#[test]
fn test_symbols_for_files_kind_filter() {
let db = Database::open_memory().unwrap();
let s1 = test_symbol("func_a", SymbolKind::Function, "src/a.py", 1);
let s2 = test_symbol("ClassB", SymbolKind::Class, "src/a.py", 10);
db.insert_symbols(&[s1, s2]).unwrap();
let files = vec!["src/a.py".to_string()];
let results = db
.symbols_for_files(&files, Some(SymbolKind::Function))
.unwrap();
assert_eq!(results.len(), 1);
assert_eq!(results[0].name, "func_a");
}
#[test]
fn test_symbols_for_files_empty_input() {
let db = Database::open_memory().unwrap();
let results = db.symbols_for_files(&[], None).unwrap();
assert!(results.is_empty());
}
#[test]
fn test_symbols_for_files_no_matching_files() {
let db = Database::open_memory().unwrap();
let s1 = test_symbol("func_a", SymbolKind::Function, "src/a.py", 1);
db.insert_symbol(&s1).unwrap();
let files = vec!["src/nonexistent.py".to_string()];
let results = db.symbols_for_files(&files, None).unwrap();
assert!(results.is_empty());
}
#[test]
fn test_compute_in_degrees() {
let db = Database::open_memory().unwrap();
let s1 = test_symbol("func_a", SymbolKind::Function, "a.py", 1);
let s2 = test_symbol("func_b", SymbolKind::Function, "b.py", 1);
let s3 = test_symbol("func_c", SymbolKind::Function, "c.py", 1);
db.insert_symbols(&[s1.clone(), s2.clone(), s3.clone()])
.unwrap();
let e1 = Edge::new(&s2.id, "func_a", EdgeKind::Calls, "b.py", 5);
let e2 = Edge::new(&s2.id, "func_a", EdgeKind::Calls, "b.py", 10);
let e3 = Edge::new(&s3.id, "func_a", EdgeKind::Calls, "c.py", 3);
let e4 = Edge::new(&s3.id, "func_b", EdgeKind::Calls, "c.py", 7);
db.insert_edges(&[e1, e2, e3, e4]).unwrap();
db.resolve_edges().unwrap();
db.compute_in_degrees().unwrap();
let sym_a = db.get_symbol(&s1.id).unwrap().unwrap();
let sym_b = db.get_symbol(&s2.id).unwrap().unwrap();
let sym_c = db.get_symbol(&s3.id).unwrap().unwrap();
assert_eq!(sym_a.in_degree, 3, "func_a should have 3 incoming edges");
assert_eq!(sym_b.in_degree, 1, "func_b should have 1 incoming edge");
assert_eq!(sym_c.in_degree, 0, "func_c should have 0 incoming edges");
}
#[test]
fn test_compute_in_degrees_resets() {
let db = Database::open_memory().unwrap();
let s1 = test_symbol("func_a", SymbolKind::Function, "a.py", 1);
db.insert_symbol(&s1).unwrap();
db.conn
.execute(
"UPDATE symbols SET in_degree = 99 WHERE id = ?1",
params![s1.id],
)
.unwrap();
db.compute_in_degrees().unwrap();
let sym = db.get_symbol(&s1.id).unwrap().unwrap();
assert_eq!(sym.in_degree, 0);
}
#[test]
fn test_top_symbols_ordered_by_centrality() {
let db = Database::open_memory().unwrap();
let s1 = test_symbol("hub", SymbolKind::Function, "a.py", 1);
let s2 = test_symbol("leaf", SymbolKind::Function, "b.py", 1);
let s3 = test_symbol("mid", SymbolKind::Function, "c.py", 1);
db.insert_symbols(&[s1.clone(), s2.clone(), s3.clone()])
.unwrap();
db.conn
.execute(
"UPDATE symbols SET in_degree = 10 WHERE id = ?1",
params![s1.id],
)
.unwrap();
db.conn
.execute(
"UPDATE symbols SET in_degree = 1 WHERE id = ?1",
params![s2.id],
)
.unwrap();
db.conn
.execute(
"UPDATE symbols SET in_degree = 5 WHERE id = ?1",
params![s3.id],
)
.unwrap();
let top = db.top_symbols(10).unwrap();
assert_eq!(top.len(), 3);
assert_eq!(top[0].name, "hub");
assert_eq!(top[0].in_degree, 10);
assert_eq!(top[1].name, "mid");
assert_eq!(top[2].name, "leaf");
}
#[test]
fn test_search_uses_in_degree_tiebreaker() {
let db = Database::open_memory().unwrap();
let s1 = test_symbol("parse_request", SymbolKind::Function, "a.py", 1);
let s2 = test_symbol("parse_response", SymbolKind::Function, "b.py", 1);
db.insert_symbols(&[s1.clone(), s2.clone()]).unwrap();
db.conn
.execute(
"UPDATE symbols SET in_degree = 20 WHERE id = ?1",
params![s1.id],
)
.unwrap();
db.conn
.execute(
"UPDATE symbols SET in_degree = 5 WHERE id = ?1",
params![s2.id],
)
.unwrap();
let results = db.search("parse", None, None, 10).unwrap();
assert_eq!(results.len(), 2);
assert_eq!(results[0].name, "parse_request");
assert_eq!(results[1].name, "parse_response");
}
#[test]
fn test_schema_version_stored() {
let db = Database::open_memory().unwrap();
let version = db.get_metadata("schema_version").unwrap();
assert!(version.is_some());
assert_eq!(version.unwrap(), SCHEMA_VERSION.to_string());
}
#[test]
fn test_invalidate_dangling_edges_after_symbol_removal() {
let db = Database::open_memory().unwrap();
let sym_a = test_symbol("foo", SymbolKind::Function, "a.py", 1);
db.insert_symbol(&sym_a).unwrap();
let sym_b = test_symbol("bar", SymbolKind::Function, "b.py", 1);
db.insert_symbol(&sym_b).unwrap();
let edge = Edge::new(&sym_b.id, "foo", EdgeKind::Calls, "b.py", 5);
db.insert_edge(&edge).unwrap();
let resolved = db.resolve_edges().unwrap();
assert_eq!(resolved, 1);
db.conn
.execute("DELETE FROM symbols WHERE id = ?1", params![sym_a.id])
.unwrap();
let dirty = std::collections::HashSet::from(["a.py".to_string()]);
let invalidated = db.invalidate_edges_targeting(&dirty).unwrap();
assert_eq!(invalidated, 1);
let edges = db.callees("bar").unwrap();
assert!(
edges.iter().all(|e| e.target_id.is_none()),
"edge should be unresolved after invalidation"
);
}
#[test]
fn test_scoped_resolution_after_symbol_changes() {
let db = Database::open_memory().unwrap();
let sym_a = test_symbol("foo", SymbolKind::Function, "a.py", 1);
db.insert_symbol(&sym_a).unwrap();
let sym_b = test_symbol("bar", SymbolKind::Function, "b.py", 1);
db.insert_symbol(&sym_b).unwrap();
db.insert_edge(&Edge::new(&sym_b.id, "foo", EdgeKind::Calls, "b.py", 5))
.unwrap();
db.resolve_edges().unwrap();
db.delete_symbol(&sym_a.id).unwrap();
db.insert_symbol(&sym_a).unwrap();
let dirty = std::collections::HashSet::from(["a.py".to_string()]);
let re_resolved = db.resolve_edges_scoped(&dirty).unwrap();
assert_eq!(re_resolved, 1);
}
#[test]
fn test_compute_in_degrees_scoped() {
let db = Database::open_memory().unwrap();
let foo = test_symbol("foo", SymbolKind::Function, "a.py", 1);
let bar = test_symbol("bar", SymbolKind::Function, "b.py", 1);
let baz = test_symbol("baz", SymbolKind::Function, "c.py", 1);
db.insert_symbol(&foo).unwrap();
db.insert_symbol(&bar).unwrap();
db.insert_symbol(&baz).unwrap();
db.insert_edge(&Edge::new(&bar.id, "foo", EdgeKind::Calls, "b.py", 5))
.unwrap();
db.insert_edge(&Edge::new(&baz.id, "foo", EdgeKind::Calls, "c.py", 3))
.unwrap();
db.resolve_edges().unwrap();
db.compute_in_degrees().unwrap();
let results = db.search("foo", None, None, 10).unwrap();
assert_eq!(results[0].in_degree, 2);
let dirty = std::collections::HashSet::from(["b.py".to_string()]);
db.compute_in_degrees_scoped(&dirty).unwrap();
let results = db.search("foo", None, None, 10).unwrap();
assert_eq!(results[0].in_degree, 2);
}
#[test]
fn test_tier2_import_resolution_plan_uses_kind_target_index() {
let db = Database::open_memory().unwrap();
let mut stmt = db
.conn
.prepare(
"EXPLAIN QUERY PLAN SELECT s.id FROM symbols s
INNER JOIN edges ie ON ie.kind = 'imports' AND ie.target_name = ?1
AND ie.target_id IS NOT NULL
INNER JOIN symbols is2 ON is2.id = ie.source_id AND is2.file_path = ?2
INNER JOIN symbols resolved ON resolved.id = ie.target_id
WHERE s.name = ?1 AND s.kind != 'import'
AND s.file_path = resolved.file_path
LIMIT 1",
)
.unwrap();
let plan = stmt
.query_map(params!["x", "y"], |row| row.get::<_, String>(3))
.unwrap()
.collect::<std::result::Result<Vec<_>, _>>()
.unwrap()
.join("\n");
assert!(
plan.contains("idx_edges_kind_target"),
"tier-2 must drive off edges(kind, target_name); got plan:\n{plan}"
);
}
#[test]
fn test_refs_plan_uses_multi_index_or_not_full_scan() {
let db = Database::open_memory().unwrap();
let syms: Vec<Symbol> = (0..400)
.map(|i| test_symbol(&format!("s{i}"), SymbolKind::Function, "a.py", i))
.collect();
db.insert_symbols(&syms).unwrap();
let edges: Vec<Edge> = (0..400)
.map(|i| {
let mut e = Edge::new(
&syms[i as usize].id,
format!("t{i}"),
EdgeKind::Calls,
"a.py",
i,
);
if i % 2 == 0 {
e.target_id = Some(syms[i as usize].id.clone());
}
e
})
.collect();
db.insert_edges(&edges).unwrap();
db.conn.execute_batch("ANALYZE;").unwrap();
let explain = |sql: &str| -> String {
let mut stmt = db.conn.prepare(sql).unwrap();
stmt.query_map(params!["x"], |row| row.get::<_, String>(3))
.unwrap()
.collect::<std::result::Result<Vec<_>, _>>()
.unwrap()
.join("\n")
};
let assert_no_edge_scan = |plan: &str, ctx: &str| {
assert!(
!plan.contains("SCAN e\n")
&& !plan.ends_with("SCAN e")
&& !plan.contains("SCAN edges"),
"refs() {ctx} must not full-scan edges; got plan:\n{plan}"
);
};
let unfiltered = explain(
"EXPLAIN QUERY PLAN
SELECT e.id FROM edges e
LEFT JOIN symbols s ON e.source_id = s.id
WHERE e.target_name = ?1
OR e.target_id IN (SELECT id FROM symbols WHERE name = ?1)",
);
assert!(
unfiltered.contains("MULTI-INDEX OR"),
"refs() unfiltered must use a multi-index OR; got plan:\n{unfiltered}"
);
assert!(
unfiltered.contains("idx_edges_target (target_name="),
"refs() literal arm must seek idx_edges_target on target_name; got plan:\n{unfiltered}"
);
assert!(
unfiltered.contains("idx_edges_target_id (target_id="),
"refs() resolved arm must seek idx_edges_target_id on target_id; got plan:\n{unfiltered}"
);
assert_no_edge_scan(&unfiltered, "unfiltered");
let kind_filtered = explain(
"EXPLAIN QUERY PLAN
SELECT e.id FROM edges e
LEFT JOIN symbols s ON e.source_id = s.id
WHERE (e.target_name = ?1 AND e.kind = 'calls')
OR (e.target_id IN (SELECT id FROM symbols WHERE name = ?1)
AND e.kind = 'calls')",
);
assert!(
kind_filtered.contains("MULTI-INDEX OR"),
"refs() kind-filtered must use a multi-index OR; got plan:\n{kind_filtered}"
);
assert!(
kind_filtered.contains("idx_edges_kind_target (kind=? AND target_name="),
"refs() kind-filtered literal arm must seek (kind, target_name); got plan:\n{kind_filtered}"
);
assert!(
kind_filtered.contains("idx_edges_target_id (target_id="),
"refs() kind-filtered resolved arm must seek target_id; got plan:\n{kind_filtered}"
);
assert_no_edge_scan(&kind_filtered, "kind-filtered");
}
#[test]
fn test_impact_recursive_step_avoids_full_edge_scan() {
let db = Database::open_memory().unwrap();
let mut stmt = db
.conn
.prepare(
"EXPLAIN QUERY PLAN
WITH RECURSIVE impacted(edge_id, source_id, target_name, target_id,
kind, file_path, line, resolution_source, source_name, depth) AS (
SELECT e.id, e.source_id, e.target_name, e.target_id, e.kind,
e.file_path, e.line, e.resolution_source, s.name, 1
FROM edges e LEFT JOIN symbols s ON e.source_id = s.id
WHERE e.target_name = ?1
OR e.target_id IN (SELECT id FROM symbols WHERE name = ?1)
UNION
SELECT e.id, e.source_id, e.target_name, e.target_id, e.kind,
e.file_path, e.line, e.resolution_source, s.name, i.depth + 1
FROM impacted i
JOIN edges e ON e.target_name = i.source_name
LEFT JOIN symbols s ON e.source_id = s.id
WHERE i.source_name IS NOT NULL AND i.depth < ?2
UNION
SELECT e.id, e.source_id, e.target_name, e.target_id, e.kind,
e.file_path, e.line, e.resolution_source, s.name, i.depth + 1
FROM impacted i
JOIN symbols t ON t.name = i.source_name
JOIN edges e ON e.target_id = t.id
LEFT JOIN symbols s ON e.source_id = s.id
WHERE i.source_name IS NOT NULL AND i.depth < ?2)
SELECT source_id, MIN(depth) FROM impacted GROUP BY edge_id
ORDER BY depth, edge_id",
)
.unwrap();
let plan = stmt
.query_map(params!["x", 3], |row| row.get::<_, String>(3))
.unwrap()
.collect::<std::result::Result<Vec<_>, _>>()
.unwrap()
.join("\n");
assert!(
plan.contains("idx_edges_target (target_name="),
"impact() literal arm must seek idx_edges_target on target_name; got plan:\n{plan}"
);
assert!(
plan.contains("idx_edges_target_id (target_id="),
"impact() resolved arm must seek idx_edges_target_id on target_id; got plan:\n{plan}"
);
assert!(
!plan.contains("CORRELATED"),
"impact() must not run a correlated subquery per edge; got plan:\n{plan}"
);
assert!(
!plan.contains("SCAN e\n") && !plan.ends_with("SCAN e") && !plan.contains("SCAN edges"),
"impact() must not full-scan edges; got plan:\n{plan}"
);
}
#[test]
fn test_per_file_edge_delete_uses_file_index() {
let db = Database::open_memory().unwrap();
let mut stmt = db
.conn
.prepare("EXPLAIN QUERY PLAN DELETE FROM edges WHERE file_path = ?1")
.unwrap();
let plan = stmt
.query_map(params!["a.py"], |row| row.get::<_, String>(3))
.unwrap()
.collect::<std::result::Result<Vec<_>, _>>()
.unwrap()
.join("\n");
assert!(
plan.contains("idx_edges_file"),
"per-file edge delete must drive off edges(file_path); got plan:\n{plan}"
);
}
#[test]
fn test_compute_in_degrees_plan_has_no_correlated_subquery() {
let db = Database::open_memory().unwrap();
let mut stmt = db
.conn
.prepare(
"EXPLAIN QUERY PLAN
UPDATE symbols SET in_degree = counts.cnt
FROM (
SELECT target_id, COUNT(*) AS cnt
FROM edges WHERE target_id IS NOT NULL
GROUP BY target_id
) AS counts
WHERE symbols.id = counts.target_id",
)
.unwrap();
let plan = stmt
.query_map([], |row| row.get::<_, String>(3))
.unwrap()
.collect::<std::result::Result<Vec<_>, _>>()
.unwrap()
.join("\n");
assert!(
!plan.to_uppercase().contains("CORRELATED"),
"in-degree UPDATE must not use a correlated subquery; got plan:\n{plan}"
);
}
#[test]
fn test_compute_in_degrees_scoped_resets_target_that_lost_edge() {
let db = Database::open_memory().unwrap();
let foo = test_symbol("foo", SymbolKind::Function, "a.py", 1);
let bar = test_symbol("bar", SymbolKind::Function, "b.py", 1);
let baz = test_symbol("baz", SymbolKind::Function, "c.py", 1);
db.insert_symbol(&foo).unwrap();
db.insert_symbol(&bar).unwrap();
db.insert_symbol(&baz).unwrap();
db.insert_edge(&Edge::new(&bar.id, "foo", EdgeKind::Calls, "b.py", 5))
.unwrap();
db.insert_edge(&Edge::new(&baz.id, "foo", EdgeKind::Calls, "c.py", 3))
.unwrap();
db.resolve_edges().unwrap();
db.compute_in_degrees().unwrap();
let results = db.search("foo", None, None, 10).unwrap();
assert_eq!(results[0].in_degree, 2);
db.clear_edges_for_file("b.py").unwrap();
let dirty = std::collections::HashSet::from(["b.py".to_string()]);
db.invalidate_edges_targeting(&dirty).unwrap();
db.resolve_edges_scoped(&dirty).unwrap();
db.compute_in_degrees_scoped(&dirty).unwrap();
let results = db.search("foo", None, None, 10).unwrap();
assert_eq!(results[0].in_degree, 1);
}
#[test]
fn test_open_stores_embedding_dimension() {
let dir = tempfile::TempDir::new().unwrap();
let db_path = dir.path().join("test.db");
let db = Database::open(&db_path, 384).unwrap();
let stored: String = db
.get_metadata("embedding_dimension")
.unwrap()
.expect("dimension should be stored");
assert_eq!(stored, "384");
}
#[test]
fn test_open_with_different_dimension_clears_embeddings() {
let dir = tempfile::TempDir::new().unwrap();
let db_path = dir.path().join("test.db");
{
let db = Database::open(&db_path, 384).unwrap();
let sym = Symbol::new("foo", SymbolKind::Function, "a.py", 1, 10, 0, 100, None);
db.insert_symbol(&sym).unwrap();
db.upsert_symbol_content(&sym.id, "foo", "def foo():", "header")
.unwrap();
let eid = db.get_or_create_embedding_id(&sym.id).unwrap();
let bytes = vec![0u8; 384 * 4];
db.insert_embeddings(&[(eid, bytes)]).unwrap();
assert_eq!(db.embedding_count().unwrap(), 1);
}
{
let db = Database::open(&db_path, 768).unwrap();
assert_eq!(db.embedding_count().unwrap(), 0);
let stored: String = db
.get_metadata("embedding_dimension")
.unwrap()
.expect("dimension should be updated");
assert_eq!(stored, "768");
}
}
#[test]
fn test_open_same_dimension_preserves_embeddings() {
let dir = tempfile::TempDir::new().unwrap();
let db_path = dir.path().join("test.db");
{
let db = Database::open(&db_path, 384).unwrap();
let sym = Symbol::new("bar", SymbolKind::Function, "b.py", 1, 10, 0, 100, None);
db.insert_symbol(&sym).unwrap();
db.upsert_symbol_content(&sym.id, "bar", "def bar():", "header")
.unwrap();
let eid = db.get_or_create_embedding_id(&sym.id).unwrap();
let bytes = vec![0u8; 384 * 4];
db.insert_embeddings(&[(eid, bytes)]).unwrap();
}
{
let db = Database::open(&db_path, 384).unwrap();
assert_eq!(db.embedding_count().unwrap(), 1);
}
}
#[test]
fn test_default_dim_preserves_stored_non_default() {
let dir = tempfile::TempDir::new().unwrap();
let db_path = dir.path().join("test.db");
{
let db = Database::open(&db_path, 768).unwrap();
let sym = Symbol::new("baz", SymbolKind::Function, "c.py", 1, 10, 0, 100, None);
db.insert_symbol(&sym).unwrap();
db.upsert_symbol_content(&sym.id, "baz", "def baz():", "header")
.unwrap();
let eid = db.get_or_create_embedding_id(&sym.id).unwrap();
let bytes = vec![0u8; 768 * 4];
db.insert_embeddings(&[(eid, bytes)]).unwrap();
}
{
let db = Database::open(&db_path, DEFAULT_EMBEDDING_DIM).unwrap();
assert_eq!(db.embedding_count().unwrap(), 1);
let stored: i64 = db
.conn
.query_row(
"SELECT CAST(value AS INTEGER) FROM metadata WHERE key = 'embedding_dimension'",
[],
|row| row.get(0),
)
.unwrap();
assert_eq!(stored, 768);
}
}
#[test]
fn test_explicit_non_default_dim_wipes_different_stored() {
let dir = tempfile::TempDir::new().unwrap();
let db_path = dir.path().join("test.db");
{
let db = Database::open(&db_path, 768).unwrap();
let sym = Symbol::new("qux", SymbolKind::Function, "d.py", 1, 10, 0, 100, None);
db.insert_symbol(&sym).unwrap();
db.upsert_symbol_content(&sym.id, "qux", "def qux():", "header")
.unwrap();
let eid = db.get_or_create_embedding_id(&sym.id).unwrap();
let bytes = vec![0u8; 768 * 4];
db.insert_embeddings(&[(eid, bytes)]).unwrap();
}
{
let db = Database::open(&db_path, 1536).unwrap();
assert_eq!(db.embedding_count().unwrap(), 0);
}
}
#[test]
fn test_reopen_same_dim_does_not_rewrite_metadata() {
let dir = tempfile::TempDir::new().unwrap();
let db_path = dir.path().join("test.db");
let _db = Database::open(&db_path, 384).unwrap();
let rowid_before: i64 = {
let conn = Connection::open(&db_path).unwrap();
conn.query_row(
"SELECT rowid FROM metadata WHERE key = 'embedding_dimension'",
[],
|row| row.get(0),
)
.unwrap()
};
let _db = Database::open(&db_path, 384).unwrap();
let rowid_after: i64 = {
let conn = Connection::open(&db_path).unwrap();
conn.query_row(
"SELECT rowid FROM metadata WHERE key = 'embedding_dimension'",
[],
|row| row.get(0),
)
.unwrap()
};
assert_eq!(
rowid_before, rowid_after,
"same-dim reopen should not rewrite the embedding_dimension row"
);
}
#[test]
fn test_retry_busy_returns_on_non_busy_error() {
let attempts = std::cell::Cell::new(0);
let result = retry_busy(|| -> std::result::Result<(), rusqlite::Error> {
attempts.set(attempts.get() + 1);
Err(rusqlite::Error::InvalidQuery)
});
assert!(matches!(result, Err(rusqlite::Error::InvalidQuery)));
assert_eq!(attempts.get(), 1, "non-busy errors must not retry");
}
#[test]
fn test_retry_busy_succeeds_after_transient_busy() {
let attempts = std::cell::Cell::new(0);
let result = retry_busy(|| -> std::result::Result<u32, rusqlite::Error> {
attempts.set(attempts.get() + 1);
if attempts.get() == 1 {
Err(rusqlite::Error::SqliteFailure(
rusqlite::ffi::Error {
code: rusqlite::ErrorCode::DatabaseBusy,
extended_code: 5,
},
Some("database is locked".to_string()),
))
} else {
Ok(42)
}
});
assert_eq!(result.unwrap(), 42);
assert_eq!(attempts.get(), 2);
}
#[test]
fn test_retry_busy_exhausts_and_propagates() {
let attempts = std::cell::Cell::new(0);
let result = retry_busy(|| -> std::result::Result<(), rusqlite::Error> {
attempts.set(attempts.get() + 1);
Err(rusqlite::Error::SqliteFailure(
rusqlite::ffi::Error {
code: rusqlite::ErrorCode::DatabaseBusy,
extended_code: 5,
},
Some("database is locked".to_string()),
))
});
assert!(matches!(
result,
Err(rusqlite::Error::SqliteFailure(
rusqlite::ffi::Error {
code: rusqlite::ErrorCode::DatabaseBusy,
..
},
_
))
));
assert_eq!(attempts.get(), MIGRATION_RETRY_BACKOFF_MS.len() + 1);
}
fn fp(provider: &str, model: &str, dim: usize) -> EmbeddingFingerprint {
EmbeddingFingerprint {
provider: provider.to_string(),
model: model.to_string(),
dimension: dim,
}
}
fn seed_embedding(db: &Database, dim: usize, sym_name: &str) {
let sym = Symbol::new(sym_name, SymbolKind::Function, "f.py", 1, 10, 0, 100, None);
db.insert_symbol(&sym).unwrap();
db.upsert_symbol_content(&sym.id, sym_name, "def f():", "header")
.unwrap();
let eid = db.get_or_create_embedding_id(&sym.id).unwrap();
let bytes = vec![0u8; dim * 4];
db.insert_embeddings(&[(eid, bytes)]).unwrap();
}
#[test]
fn test_fingerprint_match_is_noop() {
let dir = tempfile::TempDir::new().unwrap();
let db_path = dir.path().join("test.db");
let db = Database::open(&db_path, 384).unwrap();
let f = fp("local", "BGE-small-en-v1.5", 384);
db.reconcile_embedding_fingerprint(&f).unwrap();
seed_embedding(&db, 384, "foo");
db.reconcile_embedding_fingerprint(&f).unwrap();
assert_eq!(db.embedding_count().unwrap(), 1);
}
#[test]
fn test_fingerprint_provider_swap_wipes() {
let dir = tempfile::TempDir::new().unwrap();
let db_path = dir.path().join("test.db");
let db = Database::open(&db_path, 384).unwrap();
let f1 = fp("local", "BGE-small-en-v1.5", 384);
db.reconcile_embedding_fingerprint(&f1).unwrap();
seed_embedding(&db, 384, "bar");
assert_eq!(db.embedding_count().unwrap(), 1);
let f2 = fp("ollama", "BGE-small-en-v1.5", 384);
db.reconcile_embedding_fingerprint(&f2).unwrap();
assert_eq!(db.embedding_count().unwrap(), 0);
assert_eq!(
db.get_metadata("embedding_provider").unwrap().as_deref(),
Some("ollama")
);
}
#[test]
fn test_fingerprint_model_swap_wipes() {
let dir = tempfile::TempDir::new().unwrap();
let db_path = dir.path().join("test.db");
let db = Database::open(&db_path, 384).unwrap();
let f1 = fp("local", "BGE-small-en-v1.5", 384);
db.reconcile_embedding_fingerprint(&f1).unwrap();
seed_embedding(&db, 384, "baz");
assert_eq!(db.embedding_count().unwrap(), 1);
let f2 = fp("local", "AllMiniLML6V2", 384);
db.reconcile_embedding_fingerprint(&f2).unwrap();
assert_eq!(db.embedding_count().unwrap(), 0);
assert_eq!(
db.get_metadata("embedding_model").unwrap().as_deref(),
Some("AllMiniLML6V2")
);
}
#[test]
fn test_fingerprint_backfill_does_not_wipe() {
let dir = tempfile::TempDir::new().unwrap();
let db_path = dir.path().join("test.db");
let db = Database::open(&db_path, 384).unwrap();
seed_embedding(&db, 384, "qux");
assert!(db.get_metadata("embedding_provider").unwrap().is_none());
assert_eq!(db.embedding_count().unwrap(), 1);
let f = fp("local", "BGE-small-en-v1.5", 384);
db.reconcile_embedding_fingerprint(&f).unwrap();
assert_eq!(
db.embedding_count().unwrap(),
1,
"backfill must preserve existing embeddings"
);
assert_eq!(
db.get_metadata("embedding_provider").unwrap().as_deref(),
Some("local")
);
assert_eq!(
db.get_metadata("embedding_model").unwrap().as_deref(),
Some("BGE-small-en-v1.5")
);
}
#[test]
fn test_fingerprint_dim_change_wipes() {
let dir = tempfile::TempDir::new().unwrap();
let db_path = dir.path().join("test.db");
let db = Database::open(&db_path, 384).unwrap();
let f1 = fp("local", "BGE-small-en-v1.5", 384);
db.reconcile_embedding_fingerprint(&f1).unwrap();
seed_embedding(&db, 384, "quux");
assert_eq!(db.embedding_count().unwrap(), 1);
let f2 = fp("local", "BGELargeENV15", 1024);
db.reconcile_embedding_fingerprint(&f2).unwrap();
assert_eq!(db.embedding_count().unwrap(), 0);
let stored_dim: i64 = db
.conn
.query_row(
"SELECT CAST(value AS INTEGER) FROM metadata WHERE key = 'embedding_dimension'",
[],
|row| row.get(0),
)
.unwrap();
assert_eq!(stored_dim, 1024);
assert!(
symbol_vec_exists(&db.conn).unwrap(),
"successful reconcile must recreate symbol_vec"
);
}
#[test]
fn test_open_readonly_succeeds_and_marks_read_only() {
let dir = tempfile::TempDir::new().unwrap();
let db_path = dir.path().join("test.db");
{
let db = Database::open(&db_path, 384).unwrap();
db.reconcile_embedding_fingerprint(&fp("local", "BGE-small-en-v1.5", 384))
.unwrap();
seed_embedding(&db, 384, "foo");
}
let reader = Database::open_readonly(&db_path).unwrap();
assert!(reader.is_read_only(), "open_readonly must set the flag");
let pinned = reader.pinned_attach().expect("read-only attach pins state");
assert_eq!(pinned.schema_version, SCHEMA_VERSION);
assert_eq!(
pinned.embedding,
Some(fp("local", "BGE-small-en-v1.5", 384))
);
}
#[test]
fn test_open_readonly_can_query_existing_data() {
let dir = tempfile::TempDir::new().unwrap();
let db_path = dir.path().join("test.db");
{
let db = Database::open(&db_path, 384).unwrap();
let sym = Symbol::new(
"callable",
SymbolKind::Function,
"a.py",
1,
10,
0,
100,
None,
);
db.insert_symbol(&sym).unwrap();
}
let reader = Database::open_readonly(&db_path).unwrap();
let count: i64 = reader
.conn
.query_row("SELECT COUNT(*) FROM symbols", [], |row| row.get(0))
.unwrap();
assert_eq!(count, 1, "reader sees primary's data");
}
#[test]
fn test_open_readonly_refuses_writes() {
let dir = tempfile::TempDir::new().unwrap();
let db_path = dir.path().join("test.db");
{
let _db = Database::open(&db_path, 384).unwrap();
}
let reader = Database::open_readonly(&db_path).unwrap();
let err = reader
.conn
.execute(
"INSERT OR REPLACE INTO metadata (key, value) VALUES ('x', 'y')",
[],
)
.unwrap_err();
let msg = err.to_string();
assert!(
msg.contains("read") || msg.contains("readonly") || msg.contains("write"),
"read-only DB write should fail with a read-only-flavored error, got: {msg}"
);
}
#[test]
fn test_open_readonly_detects_schema_drift() {
let dir = tempfile::TempDir::new().unwrap();
let db_path = dir.path().join("test.db");
{
let db = Database::open(&db_path, 384).unwrap();
db.set_metadata("schema_version", "9999").unwrap();
}
let err = Database::open_readonly(&db_path).unwrap_err();
match err {
DbError::SchemaDrift { expected, stored } => {
assert_eq!(expected, SCHEMA_VERSION);
assert_eq!(stored, 9999);
}
other => panic!("expected SchemaDrift, got {other:?}"),
}
}
#[test]
fn test_open_readonly_does_not_run_migrations() {
let dir = tempfile::TempDir::new().unwrap();
let db_path = dir.path().join("test.db");
{
let db = Database::open(&db_path, 384).unwrap();
db.set_metadata("user_marker", "untouched").unwrap();
}
let _reader = Database::open_readonly(&db_path).unwrap();
let primary = Database::open(&db_path, 384).unwrap();
assert_eq!(
primary.get_metadata("user_marker").unwrap().as_deref(),
Some("untouched")
);
}
#[test]
fn test_open_default_is_not_read_only() {
let dir = tempfile::TempDir::new().unwrap();
let db_path = dir.path().join("test.db");
let db = Database::open(&db_path, 384).unwrap();
assert!(!db.is_read_only());
assert!(db.pinned_attach().is_none());
}
#[test]
fn test_open_existing_rw_opens_writable_and_skips_migrations() {
let dir = tempfile::TempDir::new().unwrap();
let db_path = dir.path().join("test.db");
{
let db = Database::open(&db_path, 384).unwrap();
db.set_metadata("marker", "preserved").unwrap();
}
let promoted = Database::open_existing_rw(&db_path).unwrap();
assert!(!promoted.is_read_only(), "open_existing_rw is RW");
assert!(promoted.pinned_attach().is_none(), "RW opens have no pin");
assert_eq!(
promoted.get_metadata("marker").unwrap().as_deref(),
Some("preserved")
);
promoted.set_metadata("write_check", "ok").unwrap();
}
#[test]
fn test_open_existing_rw_detects_schema_drift() {
let dir = tempfile::TempDir::new().unwrap();
let db_path = dir.path().join("test.db");
{
let db = Database::open(&db_path, 384).unwrap();
db.set_metadata("schema_version", "9999").unwrap();
}
let err = Database::open_existing_rw(&db_path).unwrap_err();
match err {
DbError::SchemaDrift { expected, stored } => {
assert_eq!(expected, SCHEMA_VERSION);
assert_eq!(stored, 9999);
}
other => panic!("expected SchemaDrift, got {other:?}"),
}
}
#[test]
fn test_database_open_alone_does_not_change_fingerprint() {
let dir = tempfile::TempDir::new().unwrap();
let db_path = dir.path().join("test.db");
let original_fp = fp("local", "BGE-small-en-v1.5", 384);
{
let db = Database::open(&db_path, 384).unwrap();
db.reconcile_embedding_fingerprint(&original_fp).unwrap();
seed_embedding(&db, 384, "guard");
}
{
let _db = Database::open(&db_path, 384).unwrap();
}
let db = Database::open(&db_path, 384).unwrap();
assert_eq!(
db.get_metadata("embedding_provider").unwrap().as_deref(),
Some("local")
);
assert_eq!(
db.get_metadata("embedding_model").unwrap().as_deref(),
Some("BGE-small-en-v1.5")
);
assert_eq!(db.embedding_count().unwrap(), 1);
}
#[test]
fn test_open_readonly_missing_schema_version_is_schema_drift() {
let dir = tempfile::TempDir::new().unwrap();
let db_path = dir.path().join("test.db");
{
let db = Database::open(&db_path, 384).unwrap();
db.conn
.execute("DELETE FROM metadata WHERE key = 'schema_version'", [])
.unwrap();
}
let err = Database::open_readonly(&db_path).unwrap_err();
match err {
DbError::SchemaDrift { expected, stored } => {
assert_eq!(expected, SCHEMA_VERSION);
assert_eq!(stored, 0, "missing row should surface as stored=0");
}
other => panic!("expected SchemaDrift, got {other:?}"),
}
}
#[test]
fn test_open_readonly_missing_metadata_table_is_schema_drift() {
let dir = tempfile::TempDir::new().unwrap();
let db_path = dir.path().join("test.db");
{
let conn = Connection::open(&db_path).unwrap();
conn.execute_batch("CREATE TABLE unrelated (x INTEGER);")
.unwrap();
}
let err = Database::open_readonly(&db_path).unwrap_err();
match err {
DbError::SchemaDrift { expected, stored } => {
assert_eq!(expected, SCHEMA_VERSION);
assert_eq!(stored, 0, "missing metadata table should be stored=0");
}
other => panic!("expected SchemaDrift, got {other:?}"),
}
}
#[test]
fn test_open_existing_rw_missing_schema_version_is_schema_drift() {
let dir = tempfile::TempDir::new().unwrap();
let db_path = dir.path().join("test.db");
{
let db = Database::open(&db_path, 384).unwrap();
db.conn
.execute("DELETE FROM metadata WHERE key = 'schema_version'", [])
.unwrap();
}
let err = Database::open_existing_rw(&db_path).unwrap_err();
match err {
DbError::SchemaDrift { expected, stored } => {
assert_eq!(expected, SCHEMA_VERSION);
assert_eq!(stored, 0);
}
other => panic!("expected SchemaDrift, got {other:?}"),
}
}
#[test]
fn test_reconcile_rebuilds_when_metadata_matches_but_symbol_vec_missing() {
let dir = tempfile::TempDir::new().unwrap();
let db_path = dir.path().join("test.db");
let f = fp("local", "BGE-small-en-v1.5", 384);
{
let db = Database::open(&db_path, 384).unwrap();
db.reconcile_embedding_fingerprint(&f).unwrap();
}
{
let db = Database::open(&db_path, 384).unwrap();
db.conn
.execute("DROP TABLE IF EXISTS symbol_vec", [])
.unwrap();
assert_eq!(
db.get_metadata("embedding_dimension").unwrap().as_deref(),
Some("384")
);
}
{
let db = Database::open(&db_path, 384).unwrap();
db.reconcile_embedding_fingerprint(&f).unwrap();
let exists: bool = db
.conn
.query_row(
"SELECT 1 FROM sqlite_master WHERE name='symbol_vec'",
[],
|row| row.get::<_, i64>(0),
)
.optional()
.unwrap()
.is_some();
assert!(
exists,
"reconcile must rebuild symbol_vec when missing, even on metadata match"
);
}
}
#[test]
fn test_handle_embedding_dimension_rebuilds_when_symbol_vec_missing() {
let dir = tempfile::TempDir::new().unwrap();
let db_path = dir.path().join("test.db");
{
let db = Database::open(&db_path, 384).unwrap();
db.conn
.execute("DROP TABLE IF EXISTS symbol_vec", [])
.unwrap();
}
let db = Database::open(&db_path, 384).unwrap();
let exists: bool = db
.conn
.query_row(
"SELECT 1 FROM sqlite_master WHERE name='symbol_vec'",
[],
|row| row.get::<_, i64>(0),
)
.optional()
.unwrap()
.is_some();
assert!(
exists,
"Database::open must rebuild symbol_vec when missing, even on metadata match"
);
}
#[test]
fn test_reconcile_fingerprint_rolls_back_on_midsequence_failure() {
let dir = tempfile::TempDir::new().unwrap();
let db_path = dir.path().join("test.db");
let initial_fp = fp("local", "BGE-small-en-v1.5", 384);
{
let db = Database::open(&db_path, 384).unwrap();
db.reconcile_embedding_fingerprint(&initial_fp).unwrap();
seed_embedding(&db, 384, "seed");
}
let new_fp = fp("ollama", "nomic-embed-text-v2", 384);
let outcome = {
let db = Database::open(&db_path, 384).unwrap();
RECONCILE_FAIL_AFTER_MODEL.with(|b| b.store(true, std::sync::atomic::Ordering::SeqCst));
db.reconcile_embedding_fingerprint(&new_fp)
};
assert!(outcome.is_err(), "injected SQLITE_FULL must surface as Err");
let post = Database::open(&db_path, 384).unwrap();
let stored_provider = post.get_metadata("embedding_provider").unwrap();
let stored_model = post.get_metadata("embedding_model").unwrap();
let stored_dim_str = post.get_metadata("embedding_dimension").unwrap();
let symbol_vec_exists = post
.conn
.query_row(
"SELECT 1 FROM sqlite_master WHERE type='table' AND name='symbol_vec'",
[],
|row| row.get::<_, i64>(0),
)
.optional()
.unwrap()
.is_some();
assert_eq!(
stored_provider.as_deref(),
Some("local"),
"failed reconcile must roll back provider"
);
assert_eq!(
stored_model.as_deref(),
Some("BGE-small-en-v1.5"),
"failed reconcile must roll back model"
);
assert_eq!(
stored_dim_str.as_deref(),
Some("384"),
"failed reconcile must roll back dimension"
);
assert!(
symbol_vec_exists,
"failed reconcile must roll back symbol_vec drop"
);
assert_eq!(
post.embedding_count().unwrap(),
1,
"failed reconcile must roll back the symbol_embedding_map DELETE"
);
}
#[test]
fn test_default_embedding_dim_constant() {
assert_eq!(DEFAULT_EMBEDDING_DIM, 384);
}
#[test]
fn test_destructive_migration_creates_backup() {
let tmp = tempfile::tempdir().unwrap();
let db_path = tmp.path().join("legacy.db");
{
register_sqlite_vec();
let conn = Connection::open(&db_path).unwrap();
conn.execute_batch(
"CREATE TABLE symbols (
id TEXT PRIMARY KEY, name TEXT, kind TEXT, file_path TEXT,
start_line INTEGER, end_line INTEGER, start_byte INTEGER, end_byte INTEGER,
parent_id TEXT, signature TEXT, visibility TEXT,
is_async BOOLEAN, docstring TEXT, in_degree INTEGER DEFAULT 0
);
CREATE TABLE edges (
id INTEGER PRIMARY KEY AUTOINCREMENT, source_id TEXT, target_name TEXT,
target_id TEXT, kind TEXT, file_path TEXT, line INTEGER
);
CREATE TABLE files (path TEXT PRIMARY KEY, last_modified REAL, hash TEXT,
language TEXT, num_symbols INTEGER);
CREATE TABLE metadata (key TEXT PRIMARY KEY, value TEXT);
INSERT INTO symbols (id, name, kind, file_path) VALUES ('s1', 'foo', 'function', 'a.py');
INSERT INTO metadata (key, value) VALUES ('schema_version', '2');",
)
.unwrap();
}
let _db = Database::open(&db_path, DEFAULT_EMBEDDING_DIM).unwrap();
let backups: Vec<_> = std::fs::read_dir(tmp.path())
.unwrap()
.filter_map(|e| e.ok())
.filter(|e| {
e.file_name()
.to_string_lossy()
.starts_with("legacy.db.pre-v")
})
.collect();
assert_eq!(
backups.len(),
1,
"expected exactly one pre-migration backup, found {}",
backups.len()
);
}
#[test]
fn test_no_backup_for_fresh_database() {
let tmp = tempfile::tempdir().unwrap();
let db_path = tmp.path().join("fresh.db");
let _db = Database::open(&db_path, DEFAULT_EMBEDDING_DIM).unwrap();
let backups: Vec<_> = std::fs::read_dir(tmp.path())
.unwrap()
.filter_map(|e| e.ok())
.filter(|e| e.file_name().to_string_lossy().contains(".pre-v"))
.collect();
assert!(
backups.is_empty(),
"fresh DB should not create a backup file"
);
}
#[test]
fn fresh_db_stamps_version_without_running_ladder() {
let tmp = tempfile::tempdir().unwrap();
let db_path = tmp.path().join("fresh.db");
let db = Database::open(&db_path, DEFAULT_EMBEDDING_DIM).unwrap();
db.set_metadata("last_commit", "deadbeef").unwrap();
drop(db);
let db = Database::open(&db_path, DEFAULT_EMBEDDING_DIM).unwrap();
let last_commit: Option<String> = db
.conn
.query_row(
"SELECT value FROM metadata WHERE key = 'last_commit'",
[],
|r| r.get(0),
)
.optional()
.unwrap();
assert_eq!(
last_commit,
Some("deadbeef".to_string()),
"fresh re-open must not run the v2→3 wipe"
);
let version: String = db
.conn
.query_row(
"SELECT value FROM metadata WHERE key = 'schema_version'",
[],
|r| r.get(0),
)
.unwrap();
assert_eq!(version, SCHEMA_VERSION.to_string());
}
#[test]
fn populated_v1_db_runs_full_ladder_to_current() {
let tmp = tempfile::tempdir().unwrap();
let path = tmp.path().join("v1.sqlite");
{
let conn = Connection::open(&path).unwrap();
conn.execute_batch(
"CREATE TABLE symbols (
id TEXT PRIMARY KEY, name TEXT, kind TEXT, file_path TEXT,
start_line INTEGER, end_line INTEGER, start_byte INTEGER, end_byte INTEGER,
parent_id TEXT, signature TEXT, visibility TEXT, is_async BOOLEAN, docstring TEXT);
CREATE TABLE edges (
id INTEGER PRIMARY KEY AUTOINCREMENT,
source_id TEXT NOT NULL, target_name TEXT NOT NULL, target_id TEXT,
kind TEXT NOT NULL, file_path TEXT NOT NULL, line INTEGER);
CREATE TABLE files (path TEXT PRIMARY KEY);
CREATE TABLE metadata (key TEXT PRIMARY KEY, value TEXT);
INSERT INTO symbols (id, name, kind, file_path) VALUES ('s:1', 'foo', 'function', 'a.py');
INSERT INTO edges (source_id, target_name, target_id, kind, file_path, line)
VALUES ('s:1', 'foo', 's:1', 'calls', 'a.py', 1);",
)
.unwrap();
}
let db = Database::open(&path, DEFAULT_EMBEDDING_DIM).unwrap();
let version: String = db
.conn
.query_row(
"SELECT value FROM metadata WHERE key = 'schema_version'",
[],
|r| r.get(0),
)
.unwrap();
assert_eq!(version, SCHEMA_VERSION.to_string());
assert!(
db.conn
.prepare("SELECT resolution_source FROM edges LIMIT 0")
.is_ok(),
"resolution_source must be added by the real upgrade"
);
let symbol_count: i64 = db
.conn
.query_row("SELECT COUNT(*) FROM symbols", [], |r| r.get(0))
.unwrap();
assert_eq!(symbol_count, 0, "v2→3 wipe must run for a populated v1 DB");
}
#[test]
fn test_busy_timeout_pragma_is_set() {
let tmp = tempfile::tempdir().unwrap();
let db_path = tmp.path().join("timeout.db");
let db = Database::open(&db_path, DEFAULT_EMBEDDING_DIM).unwrap();
let timeout: i64 = db
.conn
.query_row("PRAGMA busy_timeout;", [], |row| row.get(0))
.unwrap();
assert_eq!(timeout, BUSY_TIMEOUT_MS as i64);
}
#[test]
fn test_busy_timeout_makes_second_writer_retry_instead_of_aborting() {
let tmp = tempfile::tempdir().unwrap();
let db_path = tmp.path().join("concurrent.db");
let _ = Database::open(&db_path, DEFAULT_EMBEDDING_DIM).unwrap();
let holder = Database::open(&db_path, DEFAULT_EMBEDDING_DIM).unwrap();
holder
.conn
.execute_batch("BEGIN IMMEDIATE; INSERT INTO metadata (key, value) VALUES ('a', '1');")
.unwrap();
let attempt_write = |timeout_ms: u32| -> std::time::Duration {
let conn = Connection::open(&db_path).unwrap();
conn.execute_batch(&format!("PRAGMA busy_timeout={timeout_ms};"))
.unwrap();
let start = std::time::Instant::now();
let res = conn.execute("INSERT INTO metadata (key, value) VALUES ('b', '2');", []);
assert!(res.is_err(), "write must fail while the lock is held");
start.elapsed()
};
assert!(
attempt_write(0) < std::time::Duration::from_millis(150),
"with busy_timeout=0 the writer must fail immediately"
);
assert!(
attempt_write(300) >= std::time::Duration::from_millis(250),
"with a non-zero busy_timeout the writer must retry, not abort"
);
holder.conn.execute_batch("COMMIT;").unwrap();
}
#[test]
fn test_db_error_wraps_into_anyhow() {
fn downstream() -> anyhow::Result<()> {
let _db = Database::open_memory()?; Ok(())
}
downstream().unwrap();
}
#[test]
fn test_db_error_open_variant_has_path() {
let bad_path = std::path::PathBuf::from("/dev/null/definitely/not/a/db.sqlite");
let err = Database::open(&bad_path, DEFAULT_EMBEDDING_DIM).unwrap_err();
match err {
DbError::Open { path, .. } => assert_eq!(path, bad_path),
DbError::PrepareDir { path, .. } => {
assert_eq!(path, bad_path.parent().unwrap());
}
other => panic!("expected DbError::Open or PrepareDir, got {other:?}"),
}
}
fn tx_test_symbol(id: &str, file: &str) -> Symbol {
Symbol {
id: id.to_string(),
name: id.to_string(),
kind: SymbolKind::Function,
file_path: file.to_string(),
start_line: 1,
end_line: 1,
start_byte: 0,
end_byte: 0,
parent_id: None,
signature: None,
visibility: Visibility::Public,
is_async: false,
docstring: None,
in_degree: 0,
content_hash: Some("h".to_string()),
subtree_hash: Some("s".to_string()),
}
}
#[test]
fn test_indexing_tx_commit_persists_writes() {
let db = Database::open_memory().unwrap();
let sym = tx_test_symbol("a.py:function:foo", "a.py");
let tx = db.begin_indexing_tx().unwrap();
db.insert_symbols_in_tx(std::slice::from_ref(&sym)).unwrap();
tx.commit().unwrap();
let count: i64 = db
.conn
.query_row("SELECT COUNT(*) FROM symbols", [], |row| row.get(0))
.unwrap();
assert_eq!(count, 1, "committed write must persist");
}
#[test]
fn test_indexing_tx_rollback_drops_writes() {
let db = Database::open_memory().unwrap();
let sym = tx_test_symbol("a.py:function:foo", "a.py");
{
let _tx = db.begin_indexing_tx().unwrap();
db.insert_symbols_in_tx(std::slice::from_ref(&sym)).unwrap();
}
let count: i64 = db
.conn
.query_row("SELECT COUNT(*) FROM symbols", [], |row| row.get(0))
.unwrap();
assert_eq!(
count, 0,
"writes must roll back when the indexing transaction is dropped without commit"
);
}
#[test]
fn test_indexing_tx_partial_failure_rolls_back_full_pipeline() {
let db = Database::open_memory().unwrap();
let pre = tx_test_symbol("pre.py:function:keep", "pre.py");
db.insert_symbols(std::slice::from_ref(&pre)).unwrap();
let result: Result<()> = (|| {
let _tx = db.begin_indexing_tx()?;
let batch1 = vec![tx_test_symbol("a.py:function:foo", "a.py")];
db.insert_symbols_in_tx(&batch1)?;
anyhow::bail!("simulated mid-pipeline failure");
})();
assert!(result.is_err(), "the pipeline must propagate its error");
let names: Vec<String> = db
.conn
.prepare("SELECT id FROM symbols ORDER BY id")
.unwrap()
.query_map([], |row| row.get(0))
.unwrap()
.map(|r| r.unwrap())
.collect();
assert_eq!(
names,
vec!["pre.py:function:keep"],
"pre-existing rows must survive; the partial write must roll back"
);
}
#[test]
fn test_public_wrapper_still_self_commits() {
let db = Database::open_memory().unwrap();
let sym = tx_test_symbol("a.py:function:foo", "a.py");
db.insert_symbols(std::slice::from_ref(&sym)).unwrap();
let count: i64 = db
.conn
.query_row("SELECT COUNT(*) FROM symbols", [], |row| row.get(0))
.unwrap();
assert_eq!(count, 1, "public wrapper must persist without an outer tx");
}
#[test]
fn test_partial_pipeline_without_outer_tx_persists_writes() {
let db = Database::open_memory().unwrap();
let result: Result<()> = (|| {
let batch1 = vec![tx_test_symbol("a.py:function:foo", "a.py")];
db.insert_symbols(&batch1)?;
anyhow::bail!("simulated mid-pipeline failure");
})();
assert!(result.is_err());
let count: i64 = db
.conn
.query_row("SELECT COUNT(*) FROM symbols", [], |row| row.get(0))
.unwrap();
assert_eq!(
count, 1,
"without an outer transaction, an early write persists despite a later error"
);
}
fn resolution_state_of(db: &Database, edge_id: i64) -> i64 {
db.conn
.query_row(
"SELECT resolution_state FROM edges WHERE id = ?1",
params![edge_id],
|row| row.get(0),
)
.unwrap()
}
fn resolution_source_of(db: &Database, edge_id: i64) -> Option<String> {
db.conn
.query_row(
"SELECT resolution_source FROM edges WHERE id = ?1",
params![edge_id],
|row| row.get(0),
)
.unwrap()
}
fn insert_test_edge(db: &Database, target_name: &str) -> i64 {
let sym = test_symbol("src", SymbolKind::Function, "a.py", 1);
db.insert_symbols(std::slice::from_ref(&sym)).unwrap();
let edge = Edge::new(&sym.id, target_name, EdgeKind::Calls, "a.py", 1);
db.insert_edge(&edge).unwrap();
db.conn.last_insert_rowid()
}
#[test]
fn test_new_edge_has_default_state_zero() {
let db = Database::open_memory().unwrap();
let id = insert_test_edge(&db, "missing_target");
assert_eq!(resolution_state_of(&db, id), 0);
}
#[test]
fn test_update_edge_target_flips_state_to_one() {
let db = Database::open_memory().unwrap();
let id = insert_test_edge(&db, "anything");
db.update_edge_target(id, "some:symbol:id").unwrap();
assert_eq!(resolution_state_of(&db, id), 1);
}
#[test]
fn test_mark_edge_unresolvable_sets_state_to_two() {
let db = Database::open_memory().unwrap();
let id = insert_test_edge(&db, "anything");
db.mark_edge_unresolvable(id).unwrap();
assert_eq!(resolution_state_of(&db, id), 2);
}
#[test]
fn test_unresolved_edges_excludes_state_two() {
let db = Database::open_memory().unwrap();
let _unresolved = insert_test_edge(&db, "still_unresolved");
let burned = insert_test_edge(&db, "burned");
db.mark_edge_unresolvable(burned).unwrap();
let edges = db.unresolved_edges().unwrap();
let names: Vec<&str> = edges.iter().map(|e| e.target_name.as_str()).collect();
assert!(names.contains(&"still_unresolved"));
assert!(!names.contains(&"burned"));
}
#[test]
fn test_reset_unresolvable_for_names_targets_only_matching() {
let db = Database::open_memory().unwrap();
let burned_foo = insert_test_edge(&db, "foo");
let burned_bar = insert_test_edge(&db, "bar");
db.mark_edge_unresolvable(burned_foo).unwrap();
db.mark_edge_unresolvable(burned_bar).unwrap();
let reopened = db
.reset_unresolvable_for_names(&["foo".to_string()])
.unwrap();
assert_eq!(reopened, 1);
assert_eq!(resolution_state_of(&db, burned_foo), 0);
assert_eq!(resolution_state_of(&db, burned_bar), 2);
}
#[test]
fn test_reset_unresolvable_for_names_empty_is_noop() {
let db = Database::open_memory().unwrap();
let n = db.reset_unresolvable_for_names(&[]).unwrap();
assert_eq!(n, 0);
}
#[test]
fn test_reset_unresolvable_for_names_does_not_touch_state_zero_or_one() {
let db = Database::open_memory().unwrap();
let still_open = insert_test_edge(&db, "foo"); let already_resolved = insert_test_edge(&db, "foo");
db.update_edge_target(already_resolved, "some:id").unwrap();
db.reset_unresolvable_for_names(&["foo".to_string()])
.unwrap();
assert_eq!(resolution_state_of(&db, still_open), 0);
assert_eq!(resolution_state_of(&db, already_resolved), 1);
}
#[test]
fn test_mark_edge_external_sets_state_to_three() {
let db = Database::open_memory().unwrap();
let id = insert_test_edge(&db, "anything");
db.mark_edge_external(id).unwrap();
assert_eq!(resolution_state_of(&db, id), 3);
assert_eq!(db.edge_resolution_state(id).unwrap(), 3);
}
#[test]
fn test_unresolved_edges_excludes_state_three() {
let db = Database::open_memory().unwrap();
let _open = insert_test_edge(&db, "still_open");
let ext = insert_test_edge(&db, "external_dep");
db.mark_edge_external(ext).unwrap();
let edges = db.unresolved_edges().unwrap();
let names: Vec<&str> = edges.iter().map(|e| e.target_name.as_str()).collect();
assert!(names.contains(&"still_open"));
assert!(!names.contains(&"external_dep"));
}
#[test]
fn test_reset_all_unresolvable_resets_state_two_and_three() {
let db = Database::open_memory().unwrap();
let burned = insert_test_edge(&db, "burned");
let external = insert_test_edge(&db, "external");
db.mark_edge_unresolvable(burned).unwrap();
db.mark_edge_external(external).unwrap();
let reset = db.reset_all_unresolvable().unwrap();
assert_eq!(reset, 2);
assert_eq!(resolution_state_of(&db, burned), 0);
assert_eq!(resolution_state_of(&db, external), 0);
}
#[test]
fn test_reset_unresolvable_for_names_reopens_state_three() {
let db = Database::open_memory().unwrap();
let ext_foo = insert_test_edge(&db, "foo");
let ext_bar = insert_test_edge(&db, "bar");
db.mark_edge_external(ext_foo).unwrap();
db.mark_edge_external(ext_bar).unwrap();
let reopened = db
.reset_unresolvable_for_names(&["foo".to_string()])
.unwrap();
assert_eq!(reopened, 1);
assert_eq!(resolution_state_of(&db, ext_foo), 0);
assert_eq!(resolution_state_of(&db, ext_bar), 3);
}
#[test]
fn test_mark_heuristic_exhausted_seals_unresolved_state_zero() {
let db = Database::open_memory().unwrap();
let unresolved = insert_test_edge(&db, "nowhere");
let resolved = insert_test_edge(&db, "somewhere");
db.update_edge_target(resolved, "some:id").unwrap();
let marked = db.mark_heuristic_exhausted_in_tx().unwrap();
assert_eq!(marked, 1);
assert_eq!(resolution_state_of(&db, unresolved), 4);
assert_eq!(resolution_state_of(&db, resolved), 1, "resolved untouched");
}
#[test]
fn test_count_edges_in_state_buckets_by_state() {
let db = Database::open_memory().unwrap();
let resolved = insert_test_edge(&db, "somewhere");
db.update_edge_target(resolved, "some:id").unwrap();
let burned = insert_test_edge(&db, "burned");
db.mark_edge_unresolvable(burned).unwrap();
assert_eq!(db.count_edges_in_state(0).unwrap(), 0);
assert_eq!(db.count_edges_in_state(1).unwrap(), 1);
assert_eq!(db.count_edges_in_state(2).unwrap(), 1);
}
#[test]
fn test_has_heuristic_exhausted_tracks_state_four() {
let db = Database::open_memory().unwrap();
let _edge = insert_test_edge(&db, "nowhere");
assert!(!db.has_heuristic_exhausted().unwrap(), "state 0 not sealed");
db.mark_heuristic_exhausted_in_tx().unwrap();
assert!(db.has_heuristic_exhausted().unwrap());
}
#[test]
fn test_resolve_edges_skips_heuristic_exhausted_state_four() {
let db = Database::open_memory().unwrap();
let eid = insert_test_edge(&db, "nowhere");
db.mark_heuristic_exhausted_in_tx().unwrap();
assert_eq!(resolution_state_of(&db, eid), 4);
let resolved = db.resolve_edges().unwrap();
assert_eq!(resolved, 0);
assert_eq!(resolution_state_of(&db, eid), 4);
}
#[test]
fn test_unresolved_edges_excludes_state_four() {
let db = Database::open_memory().unwrap();
let exhausted = insert_test_edge(&db, "exhausted");
db.mark_heuristic_exhausted_in_tx().unwrap();
let _open = insert_test_edge(&db, "still_open");
let edges = db.unresolved_edges().unwrap();
let names: Vec<&str> = edges.iter().map(|e| e.target_name.as_str()).collect();
assert!(names.contains(&"still_open"));
assert!(!names.contains(&"exhausted"));
let _ = exhausted;
}
#[test]
fn test_reopen_heuristic_exhausted_resets_only_state_four() {
let db = Database::open_memory().unwrap();
let exhausted = insert_test_edge(&db, "exhausted");
db.mark_heuristic_exhausted_in_tx().unwrap();
let burned = insert_test_edge(&db, "burned");
db.mark_edge_unresolvable(burned).unwrap();
let external = insert_test_edge(&db, "external");
db.mark_edge_external(external).unwrap();
let reopened = db.reopen_heuristic_exhausted().unwrap();
assert_eq!(reopened, 1);
assert_eq!(resolution_state_of(&db, exhausted), 0);
assert_eq!(resolution_state_of(&db, burned), 2, "LSP verdict sealed");
assert_eq!(resolution_state_of(&db, external), 3, "LSP verdict sealed");
}
#[test]
fn test_reset_all_unresolvable_also_resets_state_four() {
let db = Database::open_memory().unwrap();
let exhausted = insert_test_edge(&db, "exhausted");
db.mark_heuristic_exhausted_in_tx().unwrap();
let burned = insert_test_edge(&db, "burned");
db.mark_edge_unresolvable(burned).unwrap();
let reset = db.reset_all_unresolvable().unwrap();
assert_eq!(reset, 2);
assert_eq!(resolution_state_of(&db, exhausted), 0);
assert_eq!(resolution_state_of(&db, burned), 0);
}
#[test]
fn test_reset_unresolvable_for_names_reopens_state_four() {
let db = Database::open_memory().unwrap();
let foo = insert_test_edge(&db, "foo");
let bar = insert_test_edge(&db, "bar");
db.mark_heuristic_exhausted_in_tx().unwrap();
let reopened = db
.reset_unresolvable_for_names(&["foo".to_string()])
.unwrap();
assert_eq!(reopened, 1);
assert_eq!(resolution_state_of(&db, foo), 0);
assert_eq!(resolution_state_of(&db, bar), 4);
}
#[test]
fn test_stats_surfaces_external_and_unresolvable_counts() {
let db = Database::open_memory().unwrap();
let resolved = insert_test_edge(&db, "resolved_target");
db.update_edge_target(resolved, "some:id").unwrap();
let burned = insert_test_edge(&db, "burned");
db.mark_edge_unresolvable(burned).unwrap();
let external = insert_test_edge(&db, "external");
db.mark_edge_external(external).unwrap();
let _open = insert_test_edge(&db, "open");
let stats = db.stats().unwrap();
assert_eq!(stats.num_resolved, 1);
assert_eq!(stats.num_unresolvable, 1);
assert_eq!(stats.num_external, 1);
assert_eq!(stats.num_edges, 4);
}
#[test]
fn test_invalidate_edges_targeting_resets_state_when_target_disappears() {
let db = Database::open_memory().unwrap();
let src = test_symbol("src", SymbolKind::Function, "a.py", 1);
let target = test_symbol("ghost", SymbolKind::Function, "b.py", 1);
db.insert_symbols(&[src.clone(), target.clone()]).unwrap();
let edge = Edge::new(&src.id, "ghost", EdgeKind::Calls, "a.py", 1);
db.insert_edge(&edge).unwrap();
let eid = db.conn.last_insert_rowid();
db.update_edge_target(eid, &target.id).unwrap();
assert_eq!(resolution_state_of(&db, eid), 1);
db.conn
.execute("DELETE FROM symbols WHERE id = ?1", params![target.id])
.unwrap();
let mut dirty = std::collections::HashSet::new();
dirty.insert("b.py".to_string());
db.invalidate_edges_targeting(&dirty).unwrap();
assert_eq!(
resolution_state_of(&db, eid),
0,
"dangling edge must return to state=0 so unresolved_edges() can see it"
);
let row: Option<String> = db
.conn
.query_row(
"SELECT target_id FROM edges WHERE id = ?1",
params![eid],
|r| r.get(0),
)
.unwrap();
assert!(row.is_none(), "target_id must be NULL after invalidation");
}
#[test]
fn test_delete_symbol_resets_state_on_dangling_incoming_edges() {
let db = Database::open_memory().unwrap();
let src = test_symbol("caller", SymbolKind::Function, "a.py", 1);
let target = test_symbol("ghost", SymbolKind::Function, "b.py", 1);
db.insert_symbols(&[src.clone(), target.clone()]).unwrap();
let edge = Edge::new(&src.id, "ghost", EdgeKind::Calls, "a.py", 1);
db.insert_edge(&edge).unwrap();
let eid = db.conn.last_insert_rowid();
db.update_edge_target(eid, &target.id).unwrap();
db.delete_symbol(&target.id).unwrap();
assert_eq!(resolution_state_of(&db, eid), 0);
assert_eq!(resolution_source_of(&db, eid), None, "stale tag must clear");
let visible = db
.unresolved_edges()
.unwrap()
.iter()
.any(|e| e.edge_id == eid);
assert!(
visible,
"orphaned edge must resurface in unresolved_edges()"
);
}
#[test]
fn test_delete_symbols_in_tx_resets_state_on_dangling_incoming_edges() {
let db = Database::open_memory().unwrap();
let src = test_symbol("caller", SymbolKind::Function, "a.py", 1);
let t1 = test_symbol("ghost1", SymbolKind::Function, "b.py", 1);
let t2 = test_symbol("ghost2", SymbolKind::Function, "c.py", 1);
db.insert_symbols(&[src.clone(), t1.clone(), t2.clone()])
.unwrap();
let e1 = Edge::new(&src.id, "ghost1", EdgeKind::Calls, "a.py", 1);
db.insert_edge(&e1).unwrap();
let eid1 = db.conn.last_insert_rowid();
db.update_edge_target(eid1, &t1.id).unwrap();
let e2 = Edge::new(&src.id, "ghost2", EdgeKind::Calls, "a.py", 2);
db.insert_edge(&e2).unwrap();
let eid2 = db.conn.last_insert_rowid();
db.update_edge_target(eid2, &t2.id).unwrap();
assert_eq!(resolution_source_of(&db, eid1).as_deref(), Some("lsp"));
db.delete_symbols(&[t1.id.clone(), t2.id.clone()]).unwrap();
assert_eq!(resolution_state_of(&db, eid1), 0);
assert_eq!(resolution_state_of(&db, eid2), 0);
assert_eq!(resolution_source_of(&db, eid1), None);
assert_eq!(resolution_source_of(&db, eid2), None);
}
#[test]
fn test_heuristic_resolve_flips_state_to_one() {
let db = Database::open_memory().unwrap();
let src = test_symbol("caller", SymbolKind::Function, "a.py", 1);
let target = test_symbol("foo", SymbolKind::Function, "a.py", 10);
db.insert_symbols(&[src.clone(), target.clone()]).unwrap();
let edge = Edge::new(&src.id, "foo", EdgeKind::Calls, "a.py", 2);
db.insert_edge(&edge).unwrap();
let eid = db.conn.last_insert_rowid();
assert_eq!(resolution_state_of(&db, eid), 0);
db.resolve_edges().unwrap();
assert_eq!(
resolution_state_of(&db, eid),
1,
"heuristic resolve must set state=1 so LSP doesn't re-attack the edge"
);
assert!(
db.unresolved_edges()
.unwrap()
.iter()
.all(|e| e.edge_id != eid),
"resolved edge must drop out of unresolved_edges()"
);
}
#[test]
fn test_partial_unresolved_index_exists() {
let db = Database::open_memory().unwrap();
let n: i64 = db
.conn
.query_row(
"SELECT COUNT(*) FROM sqlite_master
WHERE type='index' AND name='idx_edges_unresolved'",
[],
|row| row.get(0),
)
.unwrap();
assert_eq!(n, 1);
}
#[test]
fn test_resolution_state_default_via_insert_edges_batch() {
let db = Database::open_memory().unwrap();
let src = test_symbol("src", SymbolKind::Function, "a.py", 1);
db.insert_symbols(std::slice::from_ref(&src)).unwrap();
let edges = vec![
Edge::new(&src.id, "x", EdgeKind::Calls, "a.py", 1),
Edge::new(&src.id, "y", EdgeKind::Calls, "a.py", 2),
];
db.insert_edges(&edges).unwrap();
let states: Vec<i64> = db
.conn
.prepare("SELECT resolution_state FROM edges ORDER BY id")
.unwrap()
.query_map([], |row| row.get(0))
.unwrap()
.collect::<std::result::Result<_, _>>()
.unwrap();
assert_eq!(states, vec![0, 0]);
}
#[test]
fn test_migration_v3_to_v4_backfills_resolved_to_state_one() {
let tmp = tempfile::tempdir().unwrap();
let path = tmp.path().join("v3.sqlite");
{
let conn = Connection::open(&path).unwrap();
conn.execute_batch(
"CREATE TABLE symbols (
id TEXT PRIMARY KEY, name TEXT, kind TEXT, file_path TEXT,
start_line INTEGER, end_line INTEGER, start_byte INTEGER, end_byte INTEGER,
parent_id TEXT, signature TEXT, visibility TEXT, is_async BOOLEAN,
docstring TEXT, in_degree INTEGER DEFAULT 0,
content_hash TEXT, subtree_hash TEXT);
CREATE TABLE edges (
id INTEGER PRIMARY KEY AUTOINCREMENT,
source_id TEXT NOT NULL, target_name TEXT NOT NULL, target_id TEXT,
kind TEXT NOT NULL, file_path TEXT NOT NULL, line INTEGER);
CREATE TABLE files (path TEXT PRIMARY KEY);
CREATE TABLE metadata (key TEXT PRIMARY KEY, value TEXT);
INSERT INTO metadata (key, value) VALUES ('schema_version', '3');
INSERT INTO symbols (id, name, kind, file_path) VALUES ('s:1', 'foo', 'function', 'a.py');
INSERT INTO edges (source_id, target_name, target_id, kind, file_path, line)
VALUES ('s:1', 'foo', 's:1', 'calls', 'a.py', 1);
INSERT INTO edges (source_id, target_name, target_id, kind, file_path, line)
VALUES ('s:1', 'missing', NULL, 'calls', 'a.py', 2);",
)
.unwrap();
}
let db = Database::open(&path, DEFAULT_EMBEDDING_DIM).unwrap();
let has_resolution_state = db
.conn
.prepare("SELECT resolution_state FROM edges LIMIT 0")
.is_ok();
assert!(has_resolution_state, "v3→4 added resolution_state column");
let edge_count: i64 = db
.conn
.query_row("SELECT COUNT(*) FROM edges", [], |r| r.get(0))
.unwrap();
assert_eq!(edge_count, 0, "v7 cleared the index for full rebuild");
let bumped: String = db
.conn
.query_row(
"SELECT value FROM metadata WHERE key = 'schema_version'",
[],
|r| r.get(0),
)
.unwrap();
assert_eq!(bumped, SCHEMA_VERSION.to_string());
}
fn resolve_one_and_get_provenance(db: &Database, name: &str) -> Option<EdgeProvenance> {
let resolved = db.resolve_edges().unwrap();
assert_eq!(resolved, 1, "expected exactly one edge to resolve");
let refs = db.refs(name, None).unwrap();
refs.into_iter()
.find(|(e, _)| e.target_id.is_some())
.and_then(|(e, _)| e.provenance)
}
#[test]
fn resolve_tags_provenance_same_file() {
let db = Database::open_memory().unwrap();
let caller = test_symbol("process", SymbolKind::Function, "a.py", 1);
let same_file = test_symbol("helper", SymbolKind::Function, "a.py", 20);
let other_file = test_symbol("helper", SymbolKind::Function, "b.py", 1);
db.insert_symbols(&[caller.clone(), same_file, other_file])
.unwrap();
db.insert_edge(&Edge::new(&caller.id, "helper", EdgeKind::Calls, "a.py", 5))
.unwrap();
assert_eq!(
resolve_one_and_get_provenance(&db, "helper"),
Some(EdgeProvenance::SameFile)
);
}
#[test]
fn resolve_tags_provenance_same_dir() {
let db = Database::open_memory().unwrap();
let caller = test_symbol("process", SymbolKind::Function, "pkg/a.py", 1);
let same_dir = test_symbol("helper", SymbolKind::Function, "pkg/b.py", 1);
let far = test_symbol("helper", SymbolKind::Function, "other/c.py", 1);
db.insert_symbols(&[caller.clone(), same_dir, far]).unwrap();
db.insert_edge(&Edge::new(
&caller.id,
"helper",
EdgeKind::Calls,
"pkg/a.py",
5,
))
.unwrap();
assert_eq!(
resolve_one_and_get_provenance(&db, "helper"),
Some(EdgeProvenance::SameDir)
);
}
#[test]
fn resolve_tags_provenance_unique_global() {
let db = Database::open_memory().unwrap();
let caller = test_symbol("process", SymbolKind::Function, "a.py", 1);
let target = test_symbol("only_one", SymbolKind::Function, "far/away.py", 1);
db.insert_symbols(&[caller.clone(), target]).unwrap();
db.insert_edge(&Edge::new(
&caller.id,
"only_one",
EdgeKind::Calls,
"a.py",
5,
))
.unwrap();
assert_eq!(
resolve_one_and_get_provenance(&db, "only_one"),
Some(EdgeProvenance::UniqueGlobal)
);
}
#[test]
fn resolve_tags_provenance_kind_disambig() {
let db = Database::open_memory().unwrap();
let caller = test_symbol("handleLogin", SymbolKind::Method, "auth/Service.java", 10);
let logger_class = test_symbol("Logger", SymbolKind::Class, "util/Logger.java", 1);
let logger_ctor = test_symbol("Logger", SymbolKind::Method, "util/Logger.java", 5);
db.insert_symbols(&[caller.clone(), logger_class, logger_ctor])
.unwrap();
db.insert_edge(&Edge::new(
&caller.id,
"Logger",
EdgeKind::References,
"auth/Service.java",
12,
))
.unwrap();
db.resolve_edges().unwrap();
let refs = db.refs("Logger", None).unwrap();
let edge = refs
.iter()
.find(|(e, _)| e.kind == EdgeKind::References)
.unwrap();
assert_eq!(edge.0.provenance, Some(EdgeProvenance::KindDisambig));
}
#[test]
fn resolve_tags_provenance_parent_scope() {
let db = Database::open_memory().unwrap();
let mut caller = test_symbol("run", SymbolKind::Method, "app/svc.py", 10);
caller.parent_id = Some("app/svc.py:class:Svc".to_string());
let mut same_scope = test_symbol("helper", SymbolKind::Method, "lib/a.py", 1);
same_scope.parent_id = Some("app/svc.py:class:Svc".to_string());
let mut other_scope = test_symbol("helper", SymbolKind::Method, "lib/b.py", 1);
other_scope.parent_id = Some("other/x.py:class:Other".to_string());
db.insert_symbols(&[caller.clone(), same_scope.clone(), other_scope])
.unwrap();
db.insert_edge(&Edge::new(
&caller.id,
"helper",
EdgeKind::Calls,
"app/svc.py",
12,
))
.unwrap();
assert_eq!(
resolve_one_and_get_provenance(&db, "helper"),
Some(EdgeProvenance::ParentScope)
);
}
#[test]
fn callees_surfaces_provenance() {
let db = Database::open_memory().unwrap();
let caller = test_symbol("process", SymbolKind::Function, "a.py", 1);
let same_file = test_symbol("helper", SymbolKind::Function, "a.py", 20);
db.insert_symbols(&[caller.clone(), same_file]).unwrap();
db.insert_edge(&Edge::new(&caller.id, "helper", EdgeKind::Calls, "a.py", 5))
.unwrap();
db.resolve_edges().unwrap();
let callees = db.callees("process").unwrap();
assert_eq!(callees.len(), 1);
assert_eq!(callees[0].provenance, Some(EdgeProvenance::SameFile));
}
#[test]
fn impact_surfaces_provenance() {
let db = Database::open_memory().unwrap();
let caller = test_symbol("process", SymbolKind::Function, "a.py", 1);
let target = test_symbol("helper", SymbolKind::Function, "a.py", 20);
db.insert_symbols(&[caller.clone(), target]).unwrap();
db.insert_edge(&Edge::new(&caller.id, "helper", EdgeKind::Calls, "a.py", 5))
.unwrap();
db.resolve_edges().unwrap();
let impact = db.impact("helper", 3).unwrap();
let call = impact
.iter()
.find(|(e, _)| e.kind == EdgeKind::Calls)
.unwrap();
assert_eq!(call.0.provenance, Some(EdgeProvenance::SameFile));
}
#[test]
fn reset_unresolvable_for_names_clears_provenance() {
let db = Database::open_memory().unwrap();
let id = insert_test_edge(&db, "foo");
db.mark_edge_unresolvable(id).unwrap();
assert_eq!(
resolution_source_of(&db, id).as_deref(),
Some("lsp_unresolvable")
);
let reopened = db
.reset_unresolvable_for_names(&["foo".to_string()])
.unwrap();
assert_eq!(reopened, 1);
assert_eq!(resolution_source_of(&db, id), None, "stale tag cleared");
}
#[test]
fn insert_edge_round_trips_provenance() {
let db = Database::open_memory().unwrap();
let caller = test_symbol("process", SymbolKind::Function, "a.py", 1);
let target = test_symbol("helper", SymbolKind::Function, "a.py", 20);
db.insert_symbols(&[caller.clone(), target.clone()])
.unwrap();
let mut edge = Edge::new(&caller.id, "helper", EdgeKind::Calls, "a.py", 5);
edge.target_id = Some(target.id.clone());
edge.provenance = Some(EdgeProvenance::Lsp);
db.insert_edge(&edge).unwrap();
let eid = db.conn.last_insert_rowid();
let callees = db.callees("process").unwrap();
assert_eq!(callees[0].provenance, Some(EdgeProvenance::Lsp));
assert_eq!(resolution_state_of(&db, eid), 1);
assert!(
!db.unresolved_edges()
.unwrap()
.iter()
.any(|e| e.edge_id == eid),
"a resolved insert must not resurface as unresolved"
);
}
#[test]
fn insert_edge_without_target_is_unresolved() {
let db = Database::open_memory().unwrap();
let src = test_symbol("src", SymbolKind::Function, "a.py", 1);
db.insert_symbols(std::slice::from_ref(&src)).unwrap();
db.insert_edge(&Edge::new(&src.id, "missing", EdgeKind::Calls, "a.py", 1))
.unwrap();
let eid = db.conn.last_insert_rowid();
assert_eq!(resolution_state_of(&db, eid), 0);
}
#[test]
fn resolve_tags_provenance_import_path() {
let db = Database::open_memory().unwrap();
let import_sym = test_symbol("util.Logger", SymbolKind::Import, "auth/service.java", 1);
let caller = test_symbol("authenticate", SymbolKind::Method, "auth/service.java", 10);
let logger_class = test_symbol("Logger", SymbolKind::Class, "util/Logger.java", 1);
let logger_ctor = test_symbol("Logger", SymbolKind::Method, "util/Logger.java", 5);
db.insert_symbols(&[
import_sym.clone(),
caller.clone(),
logger_class,
logger_ctor,
])
.unwrap();
db.insert_edge(&Edge::new(
&import_sym.id,
"Logger",
EdgeKind::Imports,
"auth/service.java",
1,
))
.unwrap();
db.insert_edge(&Edge::new(
&caller.id,
"Logger",
EdgeKind::References,
"auth/service.java",
15,
))
.unwrap();
assert_eq!(db.resolve_edges().unwrap(), 2);
let refs = db.refs("Logger", None).unwrap();
let reference = refs
.iter()
.find(|(e, _)| e.kind == EdgeKind::References)
.unwrap();
assert_eq!(reference.0.provenance, Some(EdgeProvenance::ImportPath));
}
#[test]
fn lsp_resolve_tags_provenance_lsp() {
let db = Database::open_memory().unwrap();
let id = insert_test_edge(&db, "anything");
db.update_edge_target(id, "some:symbol:id").unwrap();
assert_eq!(resolution_source_of(&db, id).as_deref(), Some("lsp"));
}
#[test]
fn lsp_overwrite_retags_heuristic_as_lsp() {
let db = Database::open_memory().unwrap();
let caller = test_symbol("process", SymbolKind::Function, "a.py", 1);
let same_file = test_symbol("helper", SymbolKind::Function, "a.py", 20);
db.insert_symbols(&[caller.clone(), same_file.clone()])
.unwrap();
db.insert_edge(&Edge::new(&caller.id, "helper", EdgeKind::Calls, "a.py", 5))
.unwrap();
db.resolve_edges().unwrap();
let edge_id: i64 = db
.conn
.query_row("SELECT id FROM edges LIMIT 1", [], |r| r.get(0))
.unwrap();
assert_eq!(
resolution_source_of(&db, edge_id).as_deref(),
Some("same_file")
);
db.update_edge_target(edge_id, &same_file.id).unwrap();
assert_eq!(resolution_source_of(&db, edge_id).as_deref(), Some("lsp"));
}
#[test]
fn mark_external_tags_lsp_external() {
let db = Database::open_memory().unwrap();
let id = insert_test_edge(&db, "anything");
db.mark_edge_external(id).unwrap();
assert_eq!(
resolution_source_of(&db, id).as_deref(),
Some("lsp_external")
);
}
#[test]
fn mark_unresolvable_tags_lsp_unresolvable() {
let db = Database::open_memory().unwrap();
let id = insert_test_edge(&db, "anything");
db.mark_edge_unresolvable(id).unwrap();
assert_eq!(
resolution_source_of(&db, id).as_deref(),
Some("lsp_unresolvable")
);
}
#[test]
fn reset_unresolvable_clears_provenance() {
let db = Database::open_memory().unwrap();
let id = insert_test_edge(&db, "foo");
db.mark_edge_external(id).unwrap();
assert_eq!(
resolution_source_of(&db, id).as_deref(),
Some("lsp_external")
);
db.reset_all_unresolvable().unwrap();
assert_eq!(resolution_source_of(&db, id), None, "stale tag cleared");
}
fn bootstrap_pre_v6_db(path: &std::path::Path, schema_version: u32, seed_edges: bool) {
let conn = Connection::open(path).unwrap();
conn.execute_batch(
"CREATE TABLE symbols (
id TEXT PRIMARY KEY, name TEXT, kind TEXT, file_path TEXT,
start_line INTEGER, end_line INTEGER, start_byte INTEGER, end_byte INTEGER,
parent_id TEXT, signature TEXT, visibility TEXT, is_async BOOLEAN,
docstring TEXT, in_degree INTEGER DEFAULT 0,
content_hash TEXT, subtree_hash TEXT);
CREATE TABLE edges (
id INTEGER PRIMARY KEY AUTOINCREMENT,
source_id TEXT NOT NULL, target_name TEXT NOT NULL, target_id TEXT,
kind TEXT NOT NULL, file_path TEXT NOT NULL, line INTEGER,
resolution_state INTEGER NOT NULL DEFAULT 0);
CREATE TABLE files (path TEXT PRIMARY KEY);
CREATE TABLE metadata (key TEXT PRIMARY KEY, value TEXT);
CREATE TABLE query_log (id INTEGER PRIMARY KEY AUTOINCREMENT,
tool TEXT NOT NULL, source TEXT NOT NULL, ts INTEGER NOT NULL);",
)
.unwrap();
conn.execute(
"INSERT INTO metadata (key, value) VALUES ('schema_version', ?1)",
params![schema_version.to_string()],
)
.unwrap();
if seed_edges {
conn.execute_batch(
"INSERT INTO symbols (id, name, kind, file_path) VALUES ('s:1', 'foo', 'function', 'a.py');
INSERT INTO edges (source_id, target_name, target_id, kind, file_path, line, resolution_state)
VALUES ('s:1', 'foo', 's:1', 'calls', 'a.py', 1, 1);
INSERT INTO edges (source_id, target_name, target_id, kind, file_path, line, resolution_state)
VALUES ('s:1', 'missing', NULL, 'calls', 'a.py', 2, 0);",
)
.unwrap();
}
}
#[test]
fn migration_v5_to_v6_adds_resolution_source_column() {
let tmp = tempfile::tempdir().unwrap();
let path = tmp.path().join("v5.sqlite");
bootstrap_pre_v6_db(&path, 5, true);
let db = Database::open(&path, DEFAULT_EMBEDDING_DIM).unwrap();
let has_resolution_source = db
.conn
.prepare("SELECT resolution_source FROM edges LIMIT 0")
.is_ok();
assert!(has_resolution_source, "v5→6 added resolution_source column");
let edge_count: i64 = db
.conn
.query_row("SELECT COUNT(*) FROM edges", [], |r| r.get(0))
.unwrap();
assert_eq!(edge_count, 0, "v7 cleared the index for full rebuild");
let bumped: String = db
.conn
.query_row(
"SELECT value FROM metadata WHERE key = 'schema_version'",
[],
|r| r.get(0),
)
.unwrap();
assert_eq!(bumped, SCHEMA_VERSION.to_string());
}
#[test]
fn migration_v6_self_heals_missing_column() {
let tmp = tempfile::tempdir().unwrap();
let path = tmp.path().join("partial.sqlite");
bootstrap_pre_v6_db(&path, 6, false);
let db = Database::open(&path, DEFAULT_EMBEDDING_DIM).unwrap();
let has_col = db
.conn
.prepare("SELECT resolution_source FROM edges LIMIT 0")
.is_ok();
assert!(has_col, "missing resolution_source column was re-added");
}
fn bootstrap_v6_db(path: &std::path::Path) {
let conn = Connection::open(path).unwrap();
conn.execute_batch(
"CREATE TABLE symbols (
id TEXT PRIMARY KEY, name TEXT, kind TEXT, file_path TEXT,
start_line INTEGER, end_line INTEGER, start_byte INTEGER, end_byte INTEGER,
parent_id TEXT, signature TEXT, visibility TEXT, is_async BOOLEAN,
docstring TEXT, in_degree INTEGER DEFAULT 0,
content_hash TEXT, subtree_hash TEXT);
CREATE TABLE edges (
id INTEGER PRIMARY KEY AUTOINCREMENT,
source_id TEXT NOT NULL, target_name TEXT NOT NULL, target_id TEXT,
kind TEXT NOT NULL, file_path TEXT NOT NULL, line INTEGER,
resolution_state INTEGER NOT NULL DEFAULT 0, resolution_source TEXT);
CREATE TABLE files (path TEXT PRIMARY KEY);
CREATE TABLE symbol_content (
symbol_id TEXT PRIMARY KEY, content TEXT NOT NULL, header TEXT NOT NULL,
normalized_name TEXT NOT NULL DEFAULT '');
CREATE VIRTUAL TABLE symbol_fts USING fts5(
symbol_name, normalized_name, content,
content=symbol_content, content_rowid=rowid);
CREATE TRIGGER symbol_content_ai AFTER INSERT ON symbol_content BEGIN
INSERT INTO symbol_fts(rowid, symbol_name, normalized_name, content)
VALUES (new.rowid, (SELECT name FROM symbols WHERE id = new.symbol_id),
new.normalized_name, new.content);
END;
CREATE TRIGGER symbol_content_ad AFTER DELETE ON symbol_content BEGIN
INSERT INTO symbol_fts(symbol_fts, rowid, symbol_name, normalized_name, content)
VALUES ('delete', old.rowid, (SELECT name FROM symbols WHERE id = old.symbol_id),
old.normalized_name, old.content);
END;
CREATE TABLE symbol_embedding_map (symbol_id TEXT NOT NULL);
CREATE TABLE metadata (key TEXT PRIMARY KEY, value TEXT);
CREATE TABLE query_log (id INTEGER PRIMARY KEY AUTOINCREMENT,
tool TEXT NOT NULL, source TEXT NOT NULL, ts INTEGER NOT NULL);
INSERT INTO symbols (id, name, kind, file_path) VALUES ('a.py:import:os.path', 'os.path', 'import', 'a.py');
INSERT INTO files (path) VALUES ('a.py');
INSERT INTO edges (source_id, target_name, kind, file_path, line)
VALUES ('a.py:import:os.path', 'os', 'imports', 'a.py', 1);
INSERT INTO symbol_content (symbol_id, content, header)
VALUES ('a.py:import:os.path', 'body', 'sig');
INSERT INTO symbol_embedding_map (symbol_id) VALUES ('a.py:import:os.path');
INSERT INTO metadata (key, value) VALUES ('schema_version', '6');
INSERT INTO metadata (key, value) VALUES ('last_commit', 'deadbeef');",
)
.unwrap();
}
#[test]
fn migration_v6_to_v7_clears_index_for_full_rebuild() {
let tmp = tempfile::tempdir().unwrap();
let path = tmp.path().join("v6.sqlite");
bootstrap_v6_db(&path);
let db = Database::open(&path, DEFAULT_EMBEDDING_DIM).unwrap();
let count = |table: &str| -> i64 {
db.conn
.query_row(&format!("SELECT COUNT(*) FROM {table}"), [], |r| r.get(0))
.unwrap()
};
assert_eq!(count("symbols"), 0, "symbols cleared");
assert_eq!(count("edges"), 0, "edges cleared");
assert_eq!(count("files"), 0, "files cleared");
assert_eq!(count("symbol_content"), 0, "symbol_content cleared");
assert_eq!(
count("symbol_embedding_map"),
0,
"symbol_embedding_map cleared"
);
let last_commit: Option<String> = db
.conn
.query_row(
"SELECT value FROM metadata WHERE key = 'last_commit'",
[],
|r| r.get(0),
)
.optional()
.unwrap();
assert_eq!(
last_commit, None,
"last_commit cleared to force full reindex"
);
let bumped: String = db
.conn
.query_row(
"SELECT value FROM metadata WHERE key = 'schema_version'",
[],
|r| r.get(0),
)
.unwrap();
assert_eq!(bumped, SCHEMA_VERSION.to_string());
let backups = std::fs::read_dir(tmp.path())
.unwrap()
.filter_map(|e| e.ok())
.filter(|e| {
e.file_name()
.to_string_lossy()
.starts_with("v6.sqlite.pre-v")
})
.count();
assert_eq!(backups, 1, "v6→7 wipe must back up the index first");
}
#[test]
fn read_metadata_at_returns_value_when_present() {
let dir = tempfile::TempDir::new().unwrap();
let db_path = dir.path().join("test.db");
{
let db = Database::open(&db_path, 384).unwrap();
db.set_metadata("last_commit", "abc1234").unwrap();
}
assert_eq!(
read_metadata_at(&db_path, "last_commit").unwrap(),
Some("abc1234".to_string())
);
}
#[test]
fn read_metadata_at_returns_none_when_row_absent() {
let dir = tempfile::TempDir::new().unwrap();
let db_path = dir.path().join("test.db");
let _db = Database::open(&db_path, 384).unwrap();
assert_eq!(read_metadata_at(&db_path, "last_commit").unwrap(), None);
}
#[test]
fn read_metadata_at_returns_none_for_non_cartog_sqlite() {
let dir = tempfile::TempDir::new().unwrap();
let db_path = dir.path().join("foreign.db");
let conn = Connection::open(&db_path).unwrap();
conn.execute_batch("CREATE TABLE notes(content TEXT);")
.unwrap();
drop(conn);
assert_eq!(read_metadata_at(&db_path, "last_commit").unwrap(), None);
}
#[test]
fn read_metadata_at_returns_none_for_null_value() {
let dir = tempfile::TempDir::new().unwrap();
let db_path = dir.path().join("test.db");
{
let db = Database::open(&db_path, 384).unwrap();
db.conn
.execute(
"INSERT OR REPLACE INTO metadata (key, value) VALUES ('last_commit', NULL)",
[],
)
.unwrap();
}
assert_eq!(read_metadata_at(&db_path, "last_commit").unwrap(), None);
}
}