1#![doc = ""]
8#![doc = include_str!("../README.md")]
9
10use anyhow::{Context, Result};
11use rusqlite::ffi::sqlite3_auto_extension;
12use rusqlite::{params, Connection, OptionalExtension};
13use serde::Serialize;
14use sqlite_vec::sqlite3_vec_init;
15use tracing::{info, warn};
16
17use cartog_core::{Edge, EdgeKind, EdgeProvenance, FileInfo, Symbol, SymbolKind, Visibility};
18
19#[derive(Debug, thiserror::Error)]
29pub enum DbError {
30 #[error("failed to open database at {path}: {source}")]
33 Open {
34 path: std::path::PathBuf,
35 #[source]
36 source: rusqlite::Error,
37 },
38
39 #[error("failed to prepare database directory {path}: {source}")]
42 PrepareDir {
43 path: std::path::PathBuf,
44 #[source]
45 source: std::io::Error,
46 },
47
48 #[error("failed to set startup pragmas: {0}")]
50 Pragma(#[source] rusqlite::Error),
51
52 #[error("failed to create schema: {0}")]
54 Schema(#[source] rusqlite::Error),
55
56 #[error("failed to create RAG schema: {0}")]
58 RagSchema(#[source] rusqlite::Error),
59
60 #[error("failed to back up database before destructive migration to {path}: {source}")]
62 BackupFailed {
63 path: std::path::PathBuf,
64 #[source]
65 source: rusqlite::Error,
66 },
67
68 #[error("embedding dimension migration failed: {0}")]
71 EmbeddingDimension(#[source] rusqlite::Error),
72
73 #[error(
78 "schema_version mismatch: this binary expects {expected}, DB has {stored} \
79 (a different cartog process upgraded the schema; restart this session)"
80 )]
81 SchemaDrift { expected: u32, stored: u32 },
82
83 #[error(transparent)]
86 Sqlite(#[from] rusqlite::Error),
87}
88
89pub type DbResult<T> = std::result::Result<T, DbError>;
91
92const SQL_INSERT_SYMBOL: &str = "INSERT OR REPLACE INTO symbols
93 (id, name, kind, file_path, start_line, end_line, start_byte, end_byte,
94 parent_id, signature, visibility, is_async, docstring, content_hash, subtree_hash)
95 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15)";
96
97const SQL_INSERT_EDGE: &str = "INSERT INTO edges
98 (source_id, target_name, target_id, kind, file_path, line, resolution_state, resolution_source)
99 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)";
100
101const SCHEMA: &str = r#"
102CREATE TABLE IF NOT EXISTS symbols (
103 id TEXT PRIMARY KEY,
104 name TEXT NOT NULL,
105 kind TEXT NOT NULL,
106 file_path TEXT NOT NULL,
107 start_line INTEGER,
108 end_line INTEGER,
109 start_byte INTEGER,
110 end_byte INTEGER,
111 parent_id TEXT,
112 signature TEXT,
113 visibility TEXT,
114 is_async BOOLEAN DEFAULT FALSE,
115 docstring TEXT,
116 in_degree INTEGER DEFAULT 0,
117 content_hash TEXT,
118 subtree_hash TEXT
119);
120
121CREATE TABLE IF NOT EXISTS edges (
122 id INTEGER PRIMARY KEY AUTOINCREMENT,
123 source_id TEXT NOT NULL,
124 target_name TEXT NOT NULL,
125 target_id TEXT,
126 kind TEXT NOT NULL,
127 file_path TEXT NOT NULL,
128 line INTEGER,
129 -- 0 = unresolved (heuristic + LSP not yet definitive), 1 = resolved,
130 -- 2 = unresolvable (LSP definitively returned no definition: typo, dyn dispatch, macro),
131 -- 3 = external (LSP located the target outside the indexed root: stdlib, deps, node_modules).
132 resolution_state INTEGER NOT NULL DEFAULT 0,
133 -- Which tier/source resolved target_id (EdgeProvenance::as_str), or NULL for
134 -- unresolved edges and rows resolved before provenance tracking existed.
135 resolution_source TEXT,
136 FOREIGN KEY (source_id) REFERENCES symbols(id)
137);
138
139CREATE TABLE IF NOT EXISTS files (
140 path TEXT PRIMARY KEY,
141 last_modified REAL,
142 hash TEXT,
143 language TEXT,
144 num_symbols INTEGER DEFAULT 0
145);
146
147CREATE TABLE IF NOT EXISTS metadata (
148 key TEXT PRIMARY KEY,
149 value TEXT
150);
151
152-- query_log feeds `cartog stats --savings` / `cartog savings`. One row per
153-- successful read tool call (CLI or MCP). No query payload is stored — just
154-- which tool, when, and the call surface — to keep the local-first promise.
155CREATE TABLE IF NOT EXISTS query_log (
156 id INTEGER PRIMARY KEY AUTOINCREMENT,
157 tool TEXT NOT NULL,
158 source TEXT NOT NULL,
159 ts INTEGER NOT NULL
160);
161
162CREATE INDEX IF NOT EXISTS idx_query_log_tool ON query_log(tool);
163CREATE INDEX IF NOT EXISTS idx_query_log_ts ON query_log(ts);
164
165CREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name);
166CREATE INDEX IF NOT EXISTS idx_symbols_kind ON symbols(kind);
167CREATE INDEX IF NOT EXISTS idx_symbols_file ON symbols(file_path);
168CREATE INDEX IF NOT EXISTS idx_symbols_parent ON symbols(parent_id);
169-- Composite: speeds up same-directory edge resolution
170-- (WHERE name = ? AND file_path LIKE ?) in `resolve_edges_pass`.
171CREATE INDEX IF NOT EXISTS idx_symbols_name_file ON symbols(name, file_path);
172CREATE INDEX IF NOT EXISTS idx_edges_source ON edges(source_id);
173CREATE INDEX IF NOT EXISTS idx_edges_target ON edges(target_name);
174CREATE INDEX IF NOT EXISTS idx_edges_target_id ON edges(target_id);
175CREATE INDEX IF NOT EXISTS idx_edges_kind ON edges(kind);
176-- Per-file edge delete (clear_file_data_in_tx); without it the DELETE full-scans
177-- edges per file, making --force/first-index O(files×edges). idx_edges_unresolved
178-- is partial (state=0) so it can't serve deletes of resolved edges.
179CREATE INDEX IF NOT EXISTS idx_edges_file ON edges(file_path);
180-- Tier-2 import-path lookups; kind-only index scans all imports edges per call (#109).
181CREATE INDEX IF NOT EXISTS idx_edges_kind_target ON edges(kind, target_name);
182-- idx_edges_unresolved (partial index on resolution_state=0) is created
183-- post-migration in Database::open so pre-v4 DBs without the column don't
184-- blow up at SCHEMA-load time.
185"#;
186
187const RAG_SCHEMA: &str = r#"
194CREATE TABLE IF NOT EXISTS symbol_content (
195 symbol_id TEXT PRIMARY KEY,
196 content TEXT NOT NULL,
197 header TEXT NOT NULL,
198 normalized_name TEXT NOT NULL DEFAULT ''
199);
200
201CREATE VIRTUAL TABLE IF NOT EXISTS symbol_fts USING fts5(
202 symbol_name,
203 normalized_name,
204 content,
205 content=symbol_content,
206 content_rowid=rowid
207);
208
209-- Triggers to keep FTS5 in sync with symbol_content
210CREATE TRIGGER IF NOT EXISTS symbol_content_ai AFTER INSERT ON symbol_content BEGIN
211 INSERT INTO symbol_fts(rowid, symbol_name, normalized_name, content)
212 VALUES (new.rowid, (SELECT name FROM symbols WHERE id = new.symbol_id), new.normalized_name, new.content);
213END;
214
215CREATE TRIGGER IF NOT EXISTS symbol_content_ad AFTER DELETE ON symbol_content BEGIN
216 INSERT INTO symbol_fts(symbol_fts, rowid, symbol_name, normalized_name, content)
217 VALUES ('delete', old.rowid, (SELECT name FROM symbols WHERE id = old.symbol_id), old.normalized_name, old.content);
218END;
219
220CREATE TABLE IF NOT EXISTS symbol_embedding_map (
221 id INTEGER PRIMARY KEY AUTOINCREMENT,
222 symbol_id TEXT NOT NULL UNIQUE
223);
224
225CREATE INDEX IF NOT EXISTS idx_embedding_map_symbol ON symbol_embedding_map(symbol_id);
226"#;
227
228pub const DEFAULT_EMBEDDING_DIM: usize = 384;
230
231#[derive(Debug, Clone, PartialEq, Eq)]
241pub struct EmbeddingFingerprint {
242 pub provider: String,
244 pub model: String,
246 pub dimension: usize,
248}
249
250const EMBED_PROVIDER_KEY: &str = "embedding_provider";
252const EMBED_MODEL_KEY: &str = "embedding_model";
253
254fn rag_vec_schema(dim: usize) -> String {
256 format!("CREATE VIRTUAL TABLE IF NOT EXISTS symbol_vec USING vec0(embedding float[{dim}])")
257}
258
259pub const DB_DIR: &str = ".cartog";
262
263pub const DB_FILENAME: &str = "db.sqlite";
265
266pub const LEGACY_DB_FILE: &str = ".cartog.db";
269
270pub const BUSY_TIMEOUT_MS: u32 = 5000;
277
278#[cfg(test)]
279thread_local! {
280 static RECONCILE_FAIL_AFTER_MODEL: std::sync::atomic::AtomicBool =
284 const { std::sync::atomic::AtomicBool::new(false) };
285}
286
287pub fn checkpoint_wal(path: &std::path::Path) -> anyhow::Result<()> {
290 use anyhow::Context;
291 if !path.exists() {
292 return Ok(());
293 }
294 let conn = Connection::open(path)
295 .with_context(|| format!("open {} for WAL checkpoint", path.display()))?;
296 conn.execute_batch(&format!(
297 "PRAGMA busy_timeout={BUSY_TIMEOUT_MS};
298 PRAGMA wal_checkpoint(TRUNCATE);"
299 ))
300 .with_context(|| format!("PRAGMA wal_checkpoint(TRUNCATE) on {}", path.display()))?;
301 Ok(())
302}
303
304pub const MAX_SEARCH_LIMIT: u32 = 100;
307
308pub fn normalize_symbol_name(name: &str) -> String {
319 let mut words = Vec::new();
320 let mut current = String::new();
321
322 let chars: Vec<char> = name.chars().collect();
323 let len = chars.len();
324
325 for i in 0..len {
326 let c = chars[i];
327
328 if c == '_' || c == '-' {
329 if !current.is_empty() {
330 words.push(std::mem::take(&mut current));
331 }
332 continue;
333 }
334
335 if c.is_uppercase() {
336 let next_is_lower = i + 1 < len && chars[i + 1].is_lowercase();
337 let prev_is_lower = !current.is_empty() && chars[i - 1].is_lowercase();
338
339 if prev_is_lower {
340 words.push(std::mem::take(&mut current));
342 } else if !current.is_empty() && next_is_lower {
343 words.push(std::mem::take(&mut current));
345 }
346 current.extend(c.to_lowercase());
347 } else if c.is_alphanumeric() {
348 current.extend(c.to_lowercase());
349 } else {
350 if !current.is_empty() {
352 words.push(std::mem::take(&mut current));
353 }
354 }
355 }
356
357 if !current.is_empty() {
358 words.push(current);
359 }
360
361 words.join(" ")
362}
363
364pub struct Database {
365 conn: Connection,
366 pinned: Option<PinnedAttach>,
374}
375
376#[derive(Debug, Clone, PartialEq, Eq)]
380pub struct PinnedAttach {
381 pub schema_version: u32,
382 pub embedding: Option<EmbeddingFingerprint>,
383}
384
385impl std::fmt::Debug for Database {
386 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
387 f.debug_struct("Database").finish_non_exhaustive()
388 }
389}
390
391pub fn register_sqlite_vec() {
396 use std::sync::Once;
397 static INIT: Once = Once::new();
398 INIT.call_once(|| unsafe {
399 #[allow(clippy::missing_transmute_annotations)]
400 sqlite3_auto_extension(Some(std::mem::transmute(sqlite3_vec_init as *const ())));
401 });
402}
403
404const SCHEMA_VERSION: u32 = 7;
406
407pub const CURRENT_SCHEMA_VERSION: u32 = SCHEMA_VERSION;
411
412pub fn read_schema_version_at(path: &std::path::Path) -> anyhow::Result<u32> {
422 use anyhow::Context;
423 let conn = Connection::open_with_flags(
424 path,
425 rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY | rusqlite::OpenFlags::SQLITE_OPEN_URI,
426 )
427 .with_context(|| format!("open {} read-only for schema check", path.display()))?;
428 Ok(read_schema_version(&conn)?)
429}
430
431pub fn read_metadata_at(path: &std::path::Path, key: &str) -> anyhow::Result<Option<String>> {
440 use anyhow::Context;
441 let conn = Connection::open_with_flags(
442 path,
443 rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY | rusqlite::OpenFlags::SQLITE_OPEN_URI,
444 )
445 .with_context(|| format!("open {} read-only for metadata read", path.display()))?;
446 match conn.query_row(
447 "SELECT value FROM metadata WHERE key = ?1",
448 rusqlite::params![key],
449 |row| row.get::<_, Option<String>>(0),
450 ) {
451 Ok(v) => Ok(v),
454 Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
455 Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
458 if msg.contains("no such table: metadata") =>
459 {
460 Ok(None)
461 }
462 Err(e) => Err(e).with_context(|| format!("read metadata[{key}] from {}", path.display())),
463 }
464}
465
466fn symbol_vec_exists(conn: &Connection) -> std::result::Result<bool, rusqlite::Error> {
473 conn.query_row(
474 "SELECT 1 FROM sqlite_master WHERE type IN ('table','view') AND name='symbol_vec'",
475 [],
476 |row| row.get::<_, i64>(0),
477 )
478 .optional()
479 .map(|v| v.is_some())
480}
481
482fn read_schema_version(conn: &Connection) -> std::result::Result<u32, DbError> {
490 match conn.query_row(
491 "SELECT CAST(value AS INTEGER) FROM metadata WHERE key = 'schema_version'",
492 [],
493 |row| row.get::<_, u32>(0),
494 ) {
495 Ok(v) => Ok(v),
496 Err(rusqlite::Error::QueryReturnedNoRows) => Ok(0),
498 Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
503 if msg.contains("no such table: metadata") =>
504 {
505 Ok(0)
506 }
507 Err(e) => Err(DbError::Sqlite(e)),
508 }
509}
510
511fn migrate(conn: &Connection) {
517 let current: u32 = conn
518 .query_row(
519 "SELECT CAST(value AS INTEGER) FROM metadata WHERE key = 'schema_version'",
520 [],
521 |row| row.get(0),
522 )
523 .unwrap_or(1); let has_hash_cols = conn
528 .prepare("SELECT content_hash FROM symbols LIMIT 0")
529 .is_ok();
530 let has_resolution_state = conn
533 .prepare("SELECT resolution_state FROM edges LIMIT 0")
534 .is_ok();
535 let has_query_log = conn.prepare("SELECT 1 FROM query_log LIMIT 0").is_ok();
537 let has_resolution_source = conn
539 .prepare("SELECT resolution_source FROM edges LIMIT 0")
540 .is_ok();
541
542 if current >= SCHEMA_VERSION
543 && has_hash_cols
544 && has_resolution_state
545 && has_query_log
546 && has_resolution_source
547 {
548 return;
549 }
550
551 let no_version_row = conn
560 .query_row(
561 "SELECT 1 FROM metadata WHERE key = 'schema_version'",
562 [],
563 |_| Ok(()),
564 )
565 .is_err();
566 let symbols_empty = conn
567 .query_row("SELECT COUNT(*) FROM symbols", [], |r| r.get::<_, i64>(0))
568 .map(|c| c == 0)
569 .unwrap_or(false);
570 if no_version_row
571 && symbols_empty
572 && has_hash_cols
573 && has_resolution_state
574 && has_query_log
575 && has_resolution_source
576 {
577 if let Err(e) = conn.execute(
578 "INSERT OR REPLACE INTO metadata (key, value) VALUES ('schema_version', ?1)",
579 params![SCHEMA_VERSION.to_string()],
580 ) {
581 warn!(error = %e, "failed to stamp fresh-DB schema version");
582 }
583 return;
584 }
585
586 if current < 2 {
588 let _ = conn.execute(
589 "ALTER TABLE symbols ADD COLUMN in_degree INTEGER DEFAULT 0",
590 [],
591 );
592 }
593
594 if current < 3 || !has_hash_cols {
596 info!("schema v3: stable symbol IDs — clearing index for full rebuild");
597 let _ = conn.execute("ALTER TABLE symbols ADD COLUMN content_hash TEXT", []);
598 let _ = conn.execute("ALTER TABLE symbols ADD COLUMN subtree_hash TEXT", []);
599 for table in &["symbol_content", "edges", "symbols", "files"] {
601 let _ = conn.execute(&format!("DELETE FROM {table}"), []);
602 }
603 let _ = conn.execute("DELETE FROM symbol_vec", []);
605 let _ = conn.execute("DELETE FROM symbol_embedding_map", []);
606 let _ = conn.execute("DELETE FROM metadata WHERE key = 'last_commit'", []);
608 }
609
610 if current < 4 || !has_resolution_state {
616 info!("schema v4: adding edges.resolution_state column");
617 let _ = conn.execute(
618 "ALTER TABLE edges ADD COLUMN resolution_state INTEGER NOT NULL DEFAULT 0",
619 [],
620 );
621 let _ = conn.execute(
622 "UPDATE edges SET resolution_state = 1 WHERE target_id IS NOT NULL",
623 [],
624 );
625 }
626
627 if current < 5 || !has_query_log {
632 info!("schema v5: query_log table");
633 let _ = conn.execute(
634 "CREATE TABLE IF NOT EXISTS query_log (
635 id INTEGER PRIMARY KEY AUTOINCREMENT,
636 tool TEXT NOT NULL,
637 source TEXT NOT NULL,
638 ts INTEGER NOT NULL
639 )",
640 [],
641 );
642 let _ = conn.execute(
643 "CREATE INDEX IF NOT EXISTS idx_query_log_tool ON query_log(tool)",
644 [],
645 );
646 let _ = conn.execute(
647 "CREATE INDEX IF NOT EXISTS idx_query_log_ts ON query_log(ts)",
648 [],
649 );
650 }
651
652 if current < 6 || !has_resolution_source {
656 info!("schema v6: adding edges.resolution_source column");
657 if let Err(e) = conn.execute("ALTER TABLE edges ADD COLUMN resolution_source TEXT", []) {
661 warn!(error = %e, "failed to add edges.resolution_source column");
662 }
663 }
664
665 if current < 7 {
671 info!("schema v7: symbol-ID escaping — clearing index for full rebuild");
672 for table in &["symbol_content", "edges", "symbols", "files"] {
673 let _ = conn.execute(&format!("DELETE FROM {table}"), []);
674 }
675 let _ = conn.execute("DELETE FROM symbol_vec", []);
676 let _ = conn.execute("DELETE FROM symbol_embedding_map", []);
677 let _ = conn.execute("DELETE FROM metadata WHERE key = 'last_commit'", []);
678 }
679
680 if let Err(e) = conn.execute(
682 "INSERT OR REPLACE INTO metadata (key, value) VALUES ('schema_version', ?1)",
683 params![SCHEMA_VERSION.to_string()],
684 ) {
685 warn!(error = %e, "failed to store schema version");
686 }
687}
688
689const MIGRATION_RETRY_BACKOFF_MS: &[u64] = &[50, 100, 250, 500, 1000];
695
696fn retry_busy<T, F>(mut op: F) -> std::result::Result<T, rusqlite::Error>
699where
700 F: FnMut() -> std::result::Result<T, rusqlite::Error>,
701{
702 let mut attempt = 0usize;
703 loop {
704 match op() {
705 Ok(v) => return Ok(v),
706 Err(e) => {
707 let busy = matches!(
708 e,
709 rusqlite::Error::SqliteFailure(
710 rusqlite::ffi::Error {
711 code: rusqlite::ErrorCode::DatabaseBusy
712 | rusqlite::ErrorCode::DatabaseLocked,
713 ..
714 },
715 _
716 )
717 );
718 if !busy || attempt >= MIGRATION_RETRY_BACKOFF_MS.len() {
719 return Err(e);
720 }
721 let delay_ms = MIGRATION_RETRY_BACKOFF_MS[attempt];
722 tracing::debug!(
723 attempt = attempt + 1,
724 delay_ms,
725 "retrying embedding-dimension write after SQLITE_BUSY"
726 );
727 std::thread::sleep(std::time::Duration::from_millis(delay_ms));
728 attempt += 1;
729 }
730 }
731 }
732}
733
734fn handle_embedding_dimension(
745 conn: &Connection,
746 requested_dim: usize,
747) -> std::result::Result<(), rusqlite::Error> {
748 let stored_dim: Option<usize> = conn
749 .query_row(
750 "SELECT CAST(value AS INTEGER) FROM metadata WHERE key = 'embedding_dimension'",
751 [],
752 |row| row.get::<_, i64>(0).map(|v| v as usize),
753 )
754 .ok();
755
756 let effective_dim = match stored_dim {
761 Some(old) if requested_dim == DEFAULT_EMBEDDING_DIM && old != DEFAULT_EMBEDDING_DIM => old,
762 _ => requested_dim,
763 };
764
765 if stored_dim == Some(effective_dim) && symbol_vec_exists(conn)? {
771 return Ok(());
772 }
773
774 let schema = rag_vec_schema(effective_dim);
782 let needs_wipe = stored_dim.is_some();
783 retry_busy(|| {
784 let tx = conn.unchecked_transaction()?;
785 if needs_wipe {
786 let old_dim = stored_dim.unwrap_or(0);
787 tracing::warn!(
788 old = old_dim,
789 new = effective_dim,
790 "Embedding dimension changed — clearing vector index. Run `cartog rag index` to re-embed."
791 );
792 tx.execute("DROP TABLE IF EXISTS symbol_vec", [])?;
793 tx.execute("DELETE FROM symbol_embedding_map", [])?;
794 }
795 tx.execute_batch(&schema)?;
796 tx.execute(
797 "INSERT OR REPLACE INTO metadata (key, value) VALUES ('embedding_dimension', ?1)",
798 params![effective_dim.to_string()],
799 )?;
800 tx.commit()
801 })?;
802
803 Ok(())
804}
805
806fn backup_before_destructive_migration(
809 conn: &Connection,
810 db_path: &std::path::Path,
811) -> DbResult<()> {
812 let current: u32 = conn
813 .query_row(
814 "SELECT CAST(value AS INTEGER) FROM metadata WHERE key = 'schema_version'",
815 [],
816 |row| row.get(0),
817 )
818 .unwrap_or(1);
819 let has_hash_cols = conn
820 .prepare("SELECT content_hash FROM symbols LIMIT 0")
821 .is_ok();
822
823 let will_wipe = current < 7 || !has_hash_cols;
827 if !will_wipe {
828 return Ok(());
829 }
830
831 let has_rows = |table: &str| -> bool {
836 conn.query_row(&format!("SELECT EXISTS(SELECT 1 FROM {table})"), [], |r| {
837 r.get::<_, bool>(0)
838 })
839 .unwrap_or(false)
840 };
841 let any_indexed = [
842 "symbols",
843 "edges",
844 "files",
845 "symbol_content",
846 "symbol_embedding_map",
847 ]
848 .iter()
849 .any(|t| has_rows(t));
850 if !any_indexed {
851 return Ok(());
852 }
853
854 let path_str = db_path.to_string_lossy();
856 if path_str.is_empty() || path_str == ":memory:" || path_str.starts_with("file:") {
857 return Ok(());
858 }
859
860 let ts = std::time::SystemTime::now()
861 .duration_since(std::time::UNIX_EPOCH)
862 .map(|d| d.as_secs())
863 .unwrap_or(0);
864 let mut backup_os = db_path.as_os_str().to_os_string();
865 backup_os.push(format!(".pre-v{current}-{ts}.bak"));
866 let backup_path = std::path::PathBuf::from(backup_os);
867
868 let escaped = backup_path.to_string_lossy().replace('\'', "''");
871 conn.execute(&format!("VACUUM INTO '{escaped}'"), [])
872 .map_err(|source| DbError::BackupFailed {
873 path: backup_path.clone(),
874 source,
875 })?;
876
877 let symbol_count: i64 = conn
878 .query_row("SELECT COUNT(*) FROM symbols", [], |row| row.get(0))
879 .unwrap_or(0);
880 info!(
881 backup = %backup_path.display(),
882 old_version = current,
883 new_version = SCHEMA_VERSION,
884 symbols = symbol_count,
885 "schema migration will clear indexed data — created backup"
886 );
887
888 Ok(())
889}
890
891mod store;
894pub use store::queries::PathHop;
895pub use store::rag::KindScope;
896
897#[derive(Debug, Clone)]
899pub struct UnresolvedEdge {
900 pub edge_id: i64,
901 pub target_name: String,
902 pub file_path: String,
903 pub line: u32,
904}
905
906#[derive(Debug, Clone, Serialize, schemars::JsonSchema)]
907pub struct IndexStats {
908 pub num_files: u32,
909 pub num_symbols: u32,
910 pub num_edges: u32,
911 pub num_resolved: u32,
912 pub num_unresolvable: u32,
914 pub num_external: u32,
916 pub languages: Vec<(String, u32)>,
917 pub symbol_kinds: Vec<(String, u32)>,
918}
919
920#[derive(Debug, Clone, Serialize)]
926pub struct SavingsReport {
927 pub by_tool: Vec<(String, u64)>,
929 pub by_source: Vec<(String, u64)>,
931 pub total_queries: u64,
933 pub tokens_used_cartog: u64,
935 pub tokens_used_grep: u64,
937 pub estimated_tokens_saved: u64,
940 pub percent_saved: u8,
943 pub baseline_delta: u32,
946}
947
948pub const TOKENS_PER_QUERY_CARTOG: u32 = 280;
952
953pub const TOKENS_PER_QUERY_GREP: u32 = 1_700;
957
958pub const TOKENS_SAVED_PER_QUERY: u32 = TOKENS_PER_QUERY_GREP - TOKENS_PER_QUERY_CARTOG;
962
963static LOG_QUERY_FAILURE_REPORTED: std::sync::atomic::AtomicBool =
968 std::sync::atomic::AtomicBool::new(false);
969
970fn empty_savings_report() -> SavingsReport {
973 SavingsReport {
974 by_tool: Vec::new(),
975 by_source: Vec::new(),
976 total_queries: 0,
977 tokens_used_cartog: 0,
978 tokens_used_grep: 0,
979 estimated_tokens_saved: 0,
980 percent_saved: 0,
981 baseline_delta: TOKENS_SAVED_PER_QUERY,
982 }
983}
984
985fn is_no_such_table(e: &rusqlite::Error) -> bool {
990 matches!(
995 e,
996 rusqlite::Error::SqliteFailure(_, Some(msg)) if msg.contains("no such table")
997 )
998}
999
1000fn row_to_symbol(row: &rusqlite::Row<'_>) -> rusqlite::Result<Symbol> {
1003 row_to_symbol_offset(row, 0)
1004}
1005
1006fn row_to_symbol_offset(row: &rusqlite::Row<'_>, off: usize) -> rusqlite::Result<Symbol> {
1007 let kind_str = row.get::<_, String>(off + 2)?;
1008 let kind = kind_str.parse().unwrap_or_else(|_| {
1009 warn!(kind = %kind_str, "unknown symbol kind, defaulting to variable");
1010 SymbolKind::Variable
1011 });
1012
1013 let vis_str = row.get::<_, Option<String>>(off + 10)?.unwrap_or_default();
1014
1015 Ok(Symbol {
1016 id: row.get(off)?,
1017 name: row.get(off + 1)?,
1018 kind,
1019 file_path: row.get(off + 3)?,
1020 start_line: row.get(off + 4)?,
1021 end_line: row.get(off + 5)?,
1022 start_byte: row.get(off + 6)?,
1023 end_byte: row.get(off + 7)?,
1024 parent_id: row.get(off + 8)?,
1025 signature: row.get(off + 9)?,
1026 visibility: Visibility::from_str_lossy(&vis_str),
1027 is_async: row.get(off + 11)?,
1028 docstring: row.get(off + 12)?,
1029 in_degree: row.get(off + 13).unwrap_or(0),
1030 content_hash: row.get(off + 14).unwrap_or(None),
1031 subtree_hash: row.get(off + 15).unwrap_or(None),
1032 })
1033}
1034
1035fn disambiguate_two<'a>(a: &'a (String, String), b: &'a (String, String)) -> Option<&'a String> {
1043 match kind_priority(&a.1).cmp(&kind_priority(&b.1)) {
1044 std::cmp::Ordering::Greater => Some(&a.0),
1045 std::cmp::Ordering::Less => Some(&b.0),
1046 std::cmp::Ordering::Equal => None,
1047 }
1048}
1049
1050fn kind_priority(kind: &str) -> u8 {
1053 match kind {
1054 "class" | "interface" | "enum" | "type_alias" | "trait" => 3,
1055 "function" => 2,
1056 "method" => 1,
1057 _ => 0,
1058 }
1059}
1060
1061fn edge_from_row(row: &rusqlite::Row<'_>, base: usize) -> rusqlite::Result<Edge> {
1068 let kind_str = row.get::<_, String>(base + 3)?;
1069 let kind = kind_str.parse().unwrap_or_else(|_| {
1070 warn!(kind = %kind_str, "unknown edge kind, defaulting to references");
1071 EdgeKind::References
1072 });
1073
1074 let provenance = match row.get::<_, Option<String>>(base + 6)? {
1075 Some(s) => s.parse::<EdgeProvenance>().ok().or_else(|| {
1076 warn!(source = %s, "unknown edge provenance, dropping to None");
1077 None
1078 }),
1079 None => None,
1080 };
1081
1082 Ok(Edge {
1083 source_id: row.get(base)?,
1084 target_name: row.get(base + 1)?,
1085 target_id: row.get(base + 2)?,
1086 kind,
1087 file_path: row.get(base + 4)?,
1088 line: row.get(base + 5)?,
1089 provenance,
1090 })
1091}
1092
1093fn row_to_edge(row: &rusqlite::Row<'_>) -> rusqlite::Result<Edge> {
1094 edge_from_row(row, 1)
1095}
1096
1097#[cfg(test)]
1098mod tests {
1099 use super::*;
1100
1101 fn test_symbol(name: &str, kind: SymbolKind, file: &str, line: u32) -> Symbol {
1102 Symbol::new(name, kind, file, line, line + 5, 0, 100, None)
1103 }
1104
1105 #[test]
1108 fn test_normalize_snake_case() {
1109 assert_eq!(normalize_symbol_name("validate_token"), "validate token");
1110 assert_eq!(
1111 normalize_symbol_name("get_current_user"),
1112 "get current user"
1113 );
1114 assert_eq!(normalize_symbol_name("_private_method"), "private method");
1115 assert_eq!(normalize_symbol_name("__init__"), "init");
1116 }
1117
1118 #[test]
1119 fn test_normalize_camel_case() {
1120 assert_eq!(normalize_symbol_name("validateToken"), "validate token");
1121 assert_eq!(normalize_symbol_name("getCurrentUser"), "get current user");
1122 assert_eq!(normalize_symbol_name("findByToken"), "find by token");
1123 }
1124
1125 #[test]
1126 fn test_normalize_pascal_case() {
1127 assert_eq!(
1128 normalize_symbol_name("DatabaseConnection"),
1129 "database connection"
1130 );
1131 assert_eq!(normalize_symbol_name("AuthService"), "auth service");
1132 assert_eq!(normalize_symbol_name("TokenError"), "token error");
1133 }
1134
1135 #[test]
1136 fn test_normalize_screaming_snake() {
1137 assert_eq!(normalize_symbol_name("TOKEN_EXPIRY"), "token expiry");
1138 assert_eq!(normalize_symbol_name("MAX_RETRY_COUNT"), "max retry count");
1139 }
1140
1141 #[test]
1142 fn test_normalize_acronyms() {
1143 assert_eq!(
1144 normalize_symbol_name("getHTTPResponse"),
1145 "get http response"
1146 );
1147 assert_eq!(normalize_symbol_name("parseJSON"), "parse json");
1148 assert_eq!(normalize_symbol_name("HTMLParser"), "html parser");
1149 }
1150
1151 #[test]
1152 fn test_normalize_single_word() {
1153 assert_eq!(normalize_symbol_name("validate"), "validate");
1154 assert_eq!(normalize_symbol_name("Token"), "token");
1155 }
1156
1157 #[test]
1158 fn test_normalize_empty_and_special() {
1159 assert_eq!(normalize_symbol_name(""), "");
1160 assert_eq!(normalize_symbol_name("_"), "");
1161 assert_eq!(normalize_symbol_name("___"), "");
1162 }
1163
1164 #[test]
1165 fn test_insert_and_query_symbols() {
1166 let db = Database::open_memory().unwrap();
1167 let sym = test_symbol("my_func", SymbolKind::Function, "test.py", 10);
1168 db.insert_symbol(&sym).unwrap();
1169
1170 let outline = db.outline("test.py").unwrap();
1171 assert_eq!(outline.len(), 1);
1172 assert_eq!(outline[0].name, "my_func");
1173 }
1174
1175 #[test]
1176 fn test_optimize_populates_planner_stats() {
1177 let db = Database::open_memory().unwrap();
1181 let syms: Vec<_> = (0..2000)
1182 .map(|i| test_symbol(&format!("f{i}"), SymbolKind::Function, "a.py", i + 1))
1183 .collect();
1184 db.insert_symbols(&syms).unwrap();
1185
1186 db.optimize().unwrap();
1187
1188 let analyzed: i64 = db
1189 .conn
1190 .query_row(
1191 "SELECT COUNT(*) FROM sqlite_master WHERE name = 'sqlite_stat1'",
1192 [],
1193 |row| row.get(0),
1194 )
1195 .unwrap();
1196 assert_eq!(analyzed, 1, "PRAGMA optimize must create sqlite_stat1");
1197 }
1198
1199 #[test]
1200 fn test_optimize_is_safe_on_empty_db() {
1201 let db = Database::open_memory().unwrap();
1202 db.optimize().unwrap(); }
1204
1205 #[test]
1206 fn is_empty_reflects_symbol_presence() {
1207 let db = Database::open_memory().unwrap();
1208 assert!(db.is_empty().unwrap(), "fresh DB should be empty");
1209 db.insert_symbol(&test_symbol("f", SymbolKind::Function, "a.py", 1))
1210 .unwrap();
1211 assert!(!db.is_empty().unwrap(), "DB with a symbol is not empty");
1212 }
1213
1214 #[test]
1215 fn test_insert_and_query_edges() {
1216 let db = Database::open_memory().unwrap();
1217 let caller = test_symbol("caller_fn", SymbolKind::Function, "a.py", 1);
1218 let callee = test_symbol("callee_fn", SymbolKind::Function, "b.py", 1);
1219 db.insert_symbol(&caller).unwrap();
1220 db.insert_symbol(&callee).unwrap();
1221
1222 let edge = Edge {
1223 source_id: caller.id.clone(),
1224 target_name: "callee_fn".to_string(),
1225 target_id: None,
1226 kind: EdgeKind::Calls,
1227 file_path: "a.py".to_string(),
1228 line: 5,
1229 provenance: None,
1230 };
1231 db.insert_edge(&edge).unwrap();
1232
1233 let refs = db.refs("callee_fn", None).unwrap();
1234 assert_eq!(refs.len(), 1);
1235 assert_eq!(refs[0].0.source_id, caller.id);
1236 }
1237
1238 #[test]
1239 fn test_edge_resolution() {
1240 let db = Database::open_memory().unwrap();
1241 let sym_a = test_symbol("process", SymbolKind::Function, "a.py", 1);
1242 let sym_b = test_symbol("helper", SymbolKind::Function, "a.py", 20);
1243 db.insert_symbols(&[sym_a.clone(), sym_b.clone()]).unwrap();
1244
1245 let edge = Edge {
1246 source_id: sym_a.id.clone(),
1247 target_name: "helper".to_string(),
1248 target_id: None,
1249 kind: EdgeKind::Calls,
1250 file_path: "a.py".to_string(),
1251 line: 5,
1252 provenance: None,
1253 };
1254 db.insert_edge(&edge).unwrap();
1255
1256 let resolved = db.resolve_edges().unwrap();
1257 assert_eq!(resolved, 1);
1258 }
1259
1260 #[test]
1261 fn test_stats() {
1262 let db = Database::open_memory().unwrap();
1263 let file = FileInfo {
1264 path: "test.py".to_string(),
1265 last_modified: 0.0,
1266 hash: "abc".to_string(),
1267 language: "python".to_string(),
1268 num_symbols: 2,
1269 };
1270 db.upsert_file(&file).unwrap();
1271 let sym = test_symbol("foo", SymbolKind::Function, "test.py", 1);
1272 db.insert_symbol(&sym).unwrap();
1273
1274 let stats = db.stats().unwrap();
1275 assert_eq!(stats.num_files, 1);
1276 assert_eq!(stats.num_symbols, 1);
1277 }
1278
1279 #[test]
1280 fn savings_breakdown_empty_returns_zero() {
1281 let db = Database::open_memory().unwrap();
1282 let r = db.savings_breakdown().unwrap();
1283 assert_eq!(r.total_queries, 0);
1284 assert_eq!(r.tokens_used_cartog, 0);
1285 assert_eq!(r.tokens_used_grep, 0);
1286 assert_eq!(r.estimated_tokens_saved, 0);
1287 assert_eq!(r.percent_saved, 0);
1288 assert!(r.by_tool.is_empty());
1289 assert!(r.by_source.is_empty());
1290 assert_eq!(r.baseline_delta, TOKENS_SAVED_PER_QUERY);
1291 }
1292
1293 #[test]
1294 fn log_query_persists_rows_aggregated_by_tool_and_source() {
1295 let db = Database::open_memory().unwrap();
1296 db.log_query("search", "cli");
1297 db.log_query("search", "cli");
1298 db.log_query("refs", "cli");
1299 db.log_query("search", "mcp");
1300 db.log_query("impact", "mcp");
1301
1302 let r = db.savings_breakdown().unwrap();
1303 assert_eq!(r.total_queries, 5);
1304 assert_eq!(r.tokens_used_cartog, 5 * TOKENS_PER_QUERY_CARTOG as u64);
1306 assert_eq!(r.tokens_used_grep, 5 * TOKENS_PER_QUERY_GREP as u64);
1307 assert_eq!(r.estimated_tokens_saved, 5 * TOKENS_SAVED_PER_QUERY as u64);
1308 assert_eq!(r.percent_saved, 83);
1310
1311 let tool_counts: Vec<_> = r.by_tool.iter().map(|(t, c)| (t.as_str(), *c)).collect();
1313 assert_eq!(tool_counts, vec![("search", 3), ("impact", 1), ("refs", 1)]);
1314
1315 let src_counts: Vec<_> = r.by_source.iter().map(|(s, c)| (s.as_str(), *c)).collect();
1316 assert_eq!(src_counts, vec![("cli", 3), ("mcp", 2)]);
1317 }
1318
1319 #[test]
1320 fn log_query_noop_on_read_only_attach() {
1321 let dir = tempfile::TempDir::new().unwrap();
1322 let db_path = dir.path().join("test.db");
1323 {
1324 let primary = Database::open(&db_path, 384).unwrap();
1325 primary.log_query("search", "cli"); }
1327
1328 let reader = Database::open_readonly(&db_path).unwrap();
1329 assert!(reader.is_read_only());
1330 reader.log_query("search", "mcp");
1332 reader.log_query("refs", "mcp");
1333
1334 let r = reader.savings_breakdown().unwrap();
1335 assert_eq!(r.total_queries, 1);
1337 assert_eq!(r.by_tool, vec![("search".to_string(), 1)]);
1338 }
1339
1340 #[test]
1341 fn test_resolve_edges_same_dir_priority() {
1342 let db = Database::open_memory().unwrap();
1343
1344 let caller = test_symbol("process", SymbolKind::Function, "src/main.py", 1);
1346 let same_dir = test_symbol("helper", SymbolKind::Function, "src/utils.py", 1);
1347 let other_dir = test_symbol("helper", SymbolKind::Function, "lib/utils.py", 1);
1348 db.insert_symbols(&[caller.clone(), same_dir.clone(), other_dir.clone()])
1349 .unwrap();
1350
1351 let edge = Edge {
1352 source_id: caller.id.clone(),
1353 target_name: "helper".to_string(),
1354 target_id: None,
1355 kind: EdgeKind::Calls,
1356 file_path: "src/main.py".to_string(),
1357 line: 5,
1358 provenance: None,
1359 };
1360 db.insert_edge(&edge).unwrap();
1361
1362 let resolved = db.resolve_edges().unwrap();
1363 assert_eq!(resolved, 1);
1364
1365 let refs = db.refs("helper", None).unwrap();
1367 let call_edge = refs
1368 .iter()
1369 .find(|(e, _)| e.kind == EdgeKind::Calls)
1370 .unwrap();
1371 assert_eq!(call_edge.0.target_id.as_ref().unwrap(), &same_dir.id);
1372 }
1373
1374 #[test]
1375 fn test_resolve_edges_ambiguous_no_resolve() {
1376 let db = Database::open_memory().unwrap();
1377
1378 let caller = test_symbol("process", SymbolKind::Function, "app/main.py", 1);
1380 let sym1 = test_symbol("helper", SymbolKind::Function, "pkg_a/utils.py", 1);
1381 let sym2 = test_symbol("helper", SymbolKind::Function, "pkg_b/utils.py", 1);
1382 db.insert_symbols(&[caller.clone(), sym1, sym2]).unwrap();
1383
1384 let edge = Edge {
1385 source_id: caller.id.clone(),
1386 target_name: "helper".to_string(),
1387 target_id: None,
1388 kind: EdgeKind::Calls,
1389 file_path: "app/main.py".to_string(),
1390 line: 5,
1391 provenance: None,
1392 };
1393 db.insert_edge(&edge).unwrap();
1394
1395 let resolved = db.resolve_edges().unwrap();
1396 assert_eq!(resolved, 0);
1398 }
1399
1400 #[test]
1401 fn test_resolve_edges_same_file_priority() {
1402 let db = Database::open_memory().unwrap();
1403
1404 let caller = test_symbol("process", SymbolKind::Function, "a.py", 1);
1406 let same_file = test_symbol("helper", SymbolKind::Function, "a.py", 20);
1407 let other_file = test_symbol("helper", SymbolKind::Function, "b.py", 1);
1408 db.insert_symbols(&[caller.clone(), same_file.clone(), other_file])
1409 .unwrap();
1410
1411 let edge = Edge {
1412 source_id: caller.id.clone(),
1413 target_name: "helper".to_string(),
1414 target_id: None,
1415 kind: EdgeKind::Calls,
1416 file_path: "a.py".to_string(),
1417 line: 5,
1418 provenance: None,
1419 };
1420 db.insert_edge(&edge).unwrap();
1421
1422 let resolved = db.resolve_edges().unwrap();
1423 assert_eq!(resolved, 1);
1424
1425 let refs = db.refs("helper", None).unwrap();
1427 let call_edge = refs
1428 .iter()
1429 .find(|(e, _)| e.kind == EdgeKind::Calls)
1430 .unwrap();
1431 assert_eq!(call_edge.0.target_id.as_ref().unwrap(), &same_file.id);
1432 }
1433
1434 #[test]
1435 fn test_resolve_edges_php_fqcn_target_same_file() {
1436 let db = Database::open_memory().unwrap();
1437
1438 let base = test_symbol("BaseService", SymbolKind::Class, "auth/service.php", 1);
1441 let child = test_symbol("AuthService", SymbolKind::Class, "auth/service.php", 30);
1442 db.insert_symbols(&[base.clone(), child.clone()]).unwrap();
1443
1444 db.insert_edge(&Edge::new(
1445 &child.id,
1446 "App\\Auth\\BaseService",
1447 EdgeKind::Inherits,
1448 "auth/service.php",
1449 30,
1450 ))
1451 .unwrap();
1452
1453 let resolved = db.resolve_edges().unwrap();
1454 assert_eq!(resolved, 1);
1455
1456 let refs = db.refs("App\\Auth\\BaseService", None).unwrap();
1457 assert_eq!(refs[0].0.target_id.as_ref().unwrap(), &base.id);
1458 }
1459
1460 #[test]
1461 fn test_resolve_edges_php_fqcn_target_prefers_class_over_import_symbol() {
1462 let db = Database::open_memory().unwrap();
1463
1464 let class_sym = test_symbol("AppError", SymbolKind::Class, "exceptions.php", 1);
1465 let child = test_symbol("TokenError", SymbolKind::Class, "auth/tokens.php", 10);
1466 let import_sym = test_symbol("App\\AppError", SymbolKind::Import, "auth/tokens.php", 1);
1468 db.insert_symbols(&[class_sym.clone(), child.clone(), import_sym])
1469 .unwrap();
1470
1471 db.insert_edge(&Edge::new(
1472 &child.id,
1473 "App\\AppError",
1474 EdgeKind::Inherits,
1475 "auth/tokens.php",
1476 10,
1477 ))
1478 .unwrap();
1479
1480 db.resolve_edges().unwrap();
1481
1482 let refs = db.refs("App\\AppError", None).unwrap();
1483 let inherits = refs
1484 .iter()
1485 .find(|(e, _)| e.kind == EdgeKind::Inherits)
1486 .unwrap();
1487 assert_eq!(inherits.0.target_id.as_ref().unwrap(), &class_sym.id);
1488 }
1489
1490 #[test]
1491 fn test_hierarchy_finds_children_of_fqcn_resolved_target() {
1492 let db = Database::open_memory().unwrap();
1493
1494 let base = test_symbol("BaseService", SymbolKind::Class, "auth/service.php", 1);
1495 let child = test_symbol(
1496 "PaymentProcessor",
1497 SymbolKind::Class,
1498 "services/payment.php",
1499 5,
1500 );
1501 db.insert_symbols(&[base.clone(), child.clone()]).unwrap();
1502
1503 db.insert_edge(&Edge::new(
1504 &child.id,
1505 "App\\Auth\\BaseService",
1506 EdgeKind::Inherits,
1507 "services/payment.php",
1508 5,
1509 ))
1510 .unwrap();
1511 db.resolve_edges().unwrap();
1512
1513 let pairs = db.hierarchy("BaseService").unwrap();
1514 assert_eq!(
1515 pairs,
1516 vec![("PaymentProcessor".to_string(), "BaseService".to_string())]
1517 );
1518 }
1519
1520 #[test]
1521 fn test_resolve_edges_class_over_constructor() {
1522 let db = Database::open_memory().unwrap();
1523
1524 let caller = test_symbol("handleLogin", SymbolKind::Method, "auth/Service.java", 10);
1526 let logger_class = test_symbol("Logger", SymbolKind::Class, "util/Logger.java", 1);
1527 let logger_ctor = test_symbol("Logger", SymbolKind::Method, "util/Logger.java", 5);
1528 db.insert_symbols(&[caller.clone(), logger_class.clone(), logger_ctor])
1529 .unwrap();
1530
1531 let edge = Edge {
1532 source_id: caller.id.clone(),
1533 target_name: "Logger".to_string(),
1534 target_id: None,
1535 kind: EdgeKind::References,
1536 file_path: "auth/Service.java".to_string(),
1537 line: 12,
1538 provenance: None,
1539 };
1540 db.insert_edge(&edge).unwrap();
1541
1542 let resolved = db.resolve_edges().unwrap();
1543 assert_eq!(resolved, 1);
1544
1545 let refs = db.refs("Logger", None).unwrap();
1546 let ref_edge = refs
1547 .iter()
1548 .find(|(e, _)| e.kind == EdgeKind::References)
1549 .unwrap();
1550 assert_eq!(ref_edge.0.target_id.as_ref().unwrap(), &logger_class.id);
1551 }
1552
1553 #[test]
1554 fn test_resolve_edges_class_over_constructor_still_ambiguous_with_three() {
1555 let db = Database::open_memory().unwrap();
1556
1557 let caller = test_symbol("main", SymbolKind::Function, "app.java", 1);
1559 let sym_class = test_symbol("Foo", SymbolKind::Class, "a/Foo.java", 1);
1560 let sym_ctor = test_symbol("Foo", SymbolKind::Method, "a/Foo.java", 5);
1561 let sym_func = test_symbol("Foo", SymbolKind::Function, "b/Foo.java", 1);
1562 db.insert_symbols(&[caller.clone(), sym_class, sym_ctor, sym_func])
1563 .unwrap();
1564
1565 let edge = Edge {
1566 source_id: caller.id.clone(),
1567 target_name: "Foo".to_string(),
1568 target_id: None,
1569 kind: EdgeKind::Calls,
1570 file_path: "app.java".to_string(),
1571 line: 5,
1572 provenance: None,
1573 };
1574 db.insert_edge(&edge).unwrap();
1575
1576 let resolved = db.resolve_edges().unwrap();
1577 assert_eq!(resolved, 0);
1578 }
1579
1580 #[test]
1581 fn test_resolve_edges_multipass_import_then_call() {
1582 let db = Database::open_memory().unwrap();
1583
1584 let import_sym = test_symbol("util.Logger", SymbolKind::Import, "auth/service.java", 1);
1587 let caller = test_symbol("authenticate", SymbolKind::Method, "auth/service.java", 10);
1588 let logger_class = test_symbol("Logger", SymbolKind::Class, "util/Logger.java", 1);
1589 let logger_ctor = test_symbol("Logger", SymbolKind::Method, "util/Logger.java", 5);
1590 db.insert_symbols(&[
1591 import_sym.clone(),
1592 caller.clone(),
1593 logger_class.clone(),
1594 logger_ctor,
1595 ])
1596 .unwrap();
1597
1598 let import_edge = Edge {
1600 source_id: import_sym.id.clone(),
1601 target_name: "Logger".to_string(),
1602 target_id: None,
1603 kind: EdgeKind::Imports,
1604 file_path: "auth/service.java".to_string(),
1605 line: 1,
1606 provenance: None,
1607 };
1608 db.insert_edge(&import_edge).unwrap();
1609
1610 let ref_edge = Edge {
1612 source_id: caller.id.clone(),
1613 target_name: "Logger".to_string(),
1614 target_id: None,
1615 kind: EdgeKind::References,
1616 file_path: "auth/service.java".to_string(),
1617 line: 15,
1618 provenance: None,
1619 };
1620 db.insert_edge(&ref_edge).unwrap();
1621
1622 let resolved = db.resolve_edges().unwrap();
1623 assert_eq!(resolved, 2);
1626
1627 let refs = db.refs("Logger", None).unwrap();
1628 let reference = refs
1629 .iter()
1630 .find(|(e, _)| e.kind == EdgeKind::References)
1631 .unwrap();
1632 assert_eq!(reference.0.target_id.as_ref().unwrap(), &logger_class.id);
1633 }
1634
1635 #[test]
1636 fn test_resolve_edges_function_over_method() {
1637 let db = Database::open_memory().unwrap();
1638
1639 let caller = test_symbol("process", SymbolKind::Function, "app/main.rb", 1);
1641 let top_fn = test_symbol("get_logger", SymbolKind::Function, "utils/helpers.rb", 6);
1642 let mod_method = test_symbol("get_logger", SymbolKind::Method, "utils/logging.rb", 6);
1643 db.insert_symbols(&[caller.clone(), top_fn.clone(), mod_method])
1644 .unwrap();
1645
1646 let edge = Edge {
1647 source_id: caller.id.clone(),
1648 target_name: "get_logger".to_string(),
1649 target_id: None,
1650 kind: EdgeKind::Calls,
1651 file_path: "app/main.rb".to_string(),
1652 line: 5,
1653 provenance: None,
1654 };
1655 db.insert_edge(&edge).unwrap();
1656
1657 let resolved = db.resolve_edges().unwrap();
1658 assert_eq!(resolved, 1);
1659
1660 let refs = db.refs("get_logger", None).unwrap();
1661 let call_edge = refs
1662 .iter()
1663 .find(|(e, _)| e.kind == EdgeKind::Calls)
1664 .unwrap();
1665 assert_eq!(call_edge.0.target_id.as_ref().unwrap(), &top_fn.id);
1666 }
1667
1668 #[test]
1669 fn test_resolve_edges_two_functions_still_ambiguous() {
1670 let db = Database::open_memory().unwrap();
1671
1672 let caller = test_symbol("main", SymbolKind::Function, "app.rb", 1);
1674 let fn1 = test_symbol("helper", SymbolKind::Function, "a/utils.rb", 1);
1675 let fn2 = test_symbol("helper", SymbolKind::Function, "b/utils.rb", 1);
1676 db.insert_symbols(&[caller.clone(), fn1, fn2]).unwrap();
1677
1678 let edge = Edge {
1679 source_id: caller.id.clone(),
1680 target_name: "helper".to_string(),
1681 target_id: None,
1682 kind: EdgeKind::Calls,
1683 file_path: "app.rb".to_string(),
1684 line: 5,
1685 provenance: None,
1686 };
1687 db.insert_edge(&edge).unwrap();
1688
1689 let resolved = db.resolve_edges().unwrap();
1690 assert_eq!(resolved, 0);
1691 }
1692
1693 #[test]
1694 fn test_callees_query() {
1695 let db = Database::open_memory().unwrap();
1696
1697 let caller = test_symbol("process", SymbolKind::Function, "a.py", 1);
1698 let callee1 = test_symbol("fetch", SymbolKind::Function, "b.py", 1);
1699 let callee2 = test_symbol("save", SymbolKind::Function, "c.py", 1);
1700 db.insert_symbols(&[caller.clone(), callee1, callee2])
1701 .unwrap();
1702
1703 db.insert_edges(&[
1704 Edge {
1705 source_id: caller.id.clone(),
1706 target_name: "fetch".to_string(),
1707 target_id: None,
1708 kind: EdgeKind::Calls,
1709 file_path: "a.py".to_string(),
1710 line: 5,
1711 provenance: None,
1712 },
1713 Edge {
1714 source_id: caller.id.clone(),
1715 target_name: "save".to_string(),
1716 target_id: None,
1717 kind: EdgeKind::Calls,
1718 file_path: "a.py".to_string(),
1719 line: 6,
1720 provenance: None,
1721 },
1722 ])
1723 .unwrap();
1724
1725 let callees = db.callees("process").unwrap();
1726 assert_eq!(callees.len(), 2);
1727 let targets: Vec<&str> = callees.iter().map(|e| e.target_name.as_str()).collect();
1728 assert!(targets.contains(&"fetch"));
1729 assert!(targets.contains(&"save"));
1730 }
1731
1732 #[test]
1733 fn test_impact_transitive() {
1734 let db = Database::open_memory().unwrap();
1735
1736 let a = test_symbol("a", SymbolKind::Function, "a.py", 1);
1737 let b = test_symbol("b", SymbolKind::Function, "b.py", 1);
1738 let c = test_symbol("c", SymbolKind::Function, "c.py", 1);
1739 db.insert_symbols(&[a.clone(), b.clone(), c.clone()])
1740 .unwrap();
1741
1742 db.insert_edges(&[
1744 Edge {
1745 source_id: b.id.clone(),
1746 target_name: "a".to_string(),
1747 target_id: Some(a.id.clone()),
1748 kind: EdgeKind::Calls,
1749 file_path: "b.py".to_string(),
1750 line: 5,
1751 provenance: None,
1752 },
1753 Edge {
1754 source_id: c.id.clone(),
1755 target_name: "b".to_string(),
1756 target_id: Some(b.id.clone()),
1757 kind: EdgeKind::Calls,
1758 file_path: "c.py".to_string(),
1759 line: 5,
1760 provenance: None,
1761 },
1762 ])
1763 .unwrap();
1764
1765 let results = db.impact("a", 2).unwrap();
1767 assert_eq!(results.len(), 2);
1768 assert_eq!(results[0].1, 1); assert_eq!(results[1].1, 2); }
1771
1772 #[test]
1773 fn test_impact_depth_zero_returns_empty() {
1774 let db = Database::open_memory().unwrap();
1775 let a = test_symbol("a", SymbolKind::Function, "a.py", 1);
1776 db.insert_symbols(&[a]).unwrap();
1777 assert!(db.impact("a", 0).unwrap().is_empty());
1778 }
1779
1780 #[test]
1781 fn test_impact_cycle_terminates() {
1782 let db = Database::open_memory().unwrap();
1784 let a = test_symbol("a", SymbolKind::Function, "a.py", 1);
1785 let b = test_symbol("b", SymbolKind::Function, "b.py", 1);
1786 db.insert_symbols(&[a.clone(), b.clone()]).unwrap();
1787 db.insert_edges(&[
1788 Edge {
1789 source_id: a.id.clone(),
1790 target_name: "b".to_string(),
1791 target_id: Some(b.id.clone()),
1792 kind: EdgeKind::Calls,
1793 file_path: "a.py".to_string(),
1794 line: 2,
1795 provenance: None,
1796 },
1797 Edge {
1798 source_id: b.id.clone(),
1799 target_name: "a".to_string(),
1800 target_id: Some(a.id.clone()),
1801 kind: EdgeKind::Calls,
1802 file_path: "b.py".to_string(),
1803 line: 2,
1804 provenance: None,
1805 },
1806 ])
1807 .unwrap();
1808
1809 let results = db.impact("a", 5).unwrap();
1811 assert_eq!(results.len(), 2);
1812 for (_, depth) in &results {
1813 assert!(*depth >= 1 && *depth <= 5);
1814 }
1815 }
1816
1817 #[test]
1818 fn test_impact_fanout_dedupes_by_edge() {
1819 let db = Database::open_memory().unwrap();
1822 let shared = test_symbol("shared", SymbolKind::Function, "s.py", 1);
1823 let x = test_symbol("x", SymbolKind::Function, "x.py", 1);
1824 let y = test_symbol("y", SymbolKind::Function, "y.py", 1);
1825 db.insert_symbols(&[shared.clone(), x.clone(), y.clone()])
1826 .unwrap();
1827 db.insert_edges(&[
1828 Edge {
1829 source_id: x.id.clone(),
1830 target_name: "shared".to_string(),
1831 target_id: Some(shared.id.clone()),
1832 kind: EdgeKind::Calls,
1833 file_path: "x.py".to_string(),
1834 line: 1,
1835 provenance: None,
1836 },
1837 Edge {
1838 source_id: y.id.clone(),
1839 target_name: "shared".to_string(),
1840 target_id: Some(shared.id.clone()),
1841 kind: EdgeKind::Calls,
1842 file_path: "y.py".to_string(),
1843 line: 1,
1844 provenance: None,
1845 },
1846 Edge {
1847 source_id: y.id.clone(),
1848 target_name: "x".to_string(),
1849 target_id: Some(x.id.clone()),
1850 kind: EdgeKind::Calls,
1851 file_path: "y.py".to_string(),
1852 line: 2,
1853 provenance: None,
1854 },
1855 ])
1856 .unwrap();
1857
1858 let results = db.impact("shared", 3).unwrap();
1859 assert_eq!(results.len(), 3);
1861 }
1862
1863 fn chain_db() -> Database {
1865 let db = Database::open_memory().unwrap();
1866 let names = ["a", "b", "c", "d"];
1867 let syms: Vec<Symbol> = names
1868 .iter()
1869 .map(|n| test_symbol(n, SymbolKind::Function, &format!("{n}.py"), 1))
1870 .collect();
1871 db.insert_symbols(&syms).unwrap();
1872 let edges: Vec<Edge> = syms
1873 .windows(2)
1874 .map(|w| Edge {
1875 source_id: w[0].id.clone(),
1876 target_name: w[1].name.clone(),
1877 target_id: Some(w[1].id.clone()),
1878 kind: EdgeKind::Calls,
1879 file_path: w[0].file_path.clone(),
1880 line: 2,
1881 provenance: None,
1882 })
1883 .collect();
1884 db.insert_edges(&edges).unwrap();
1885 db
1886 }
1887
1888 #[test]
1889 fn trace_returns_shortest_path_in_order() {
1890 let db = chain_db();
1891 let hops = db.trace("a", "d", 8).unwrap().expect("path a→d exists");
1892 let names: Vec<&str> = hops.iter().map(|h| h.source_name.as_str()).collect();
1893 assert_eq!(names, ["a", "b", "c"]);
1894 assert_eq!(hops.last().unwrap().target_name, "d");
1895 }
1896
1897 #[test]
1898 fn trace_returns_none_when_unreachable() {
1899 let db = chain_db();
1900 assert!(db.trace("d", "a", 8).unwrap().is_none());
1901 }
1902
1903 #[test]
1904 fn trace_same_symbol_is_empty_path() {
1905 let db = chain_db();
1906 assert_eq!(db.trace("a", "a", 8).unwrap(), Some(Vec::new()));
1907 }
1908
1909 #[test]
1910 fn trace_respects_depth_limit() {
1911 let db = chain_db();
1912 assert!(db.trace("a", "d", 2).unwrap().is_none());
1914 }
1915
1916 #[test]
1917 fn trace_terminates_on_cycle() {
1918 let db = Database::open_memory().unwrap();
1920 let a = test_symbol("a", SymbolKind::Function, "a.py", 1);
1921 let b = test_symbol("b", SymbolKind::Function, "b.py", 1);
1922 db.insert_symbols(&[a.clone(), b.clone()]).unwrap();
1923 db.insert_edges(&[
1924 Edge {
1925 source_id: a.id.clone(),
1926 target_name: "b".to_string(),
1927 target_id: Some(b.id.clone()),
1928 kind: EdgeKind::Calls,
1929 file_path: "a.py".to_string(),
1930 line: 2,
1931 provenance: None,
1932 },
1933 Edge {
1934 source_id: b.id.clone(),
1935 target_name: "a".to_string(),
1936 target_id: Some(a.id.clone()),
1937 kind: EdgeKind::Calls,
1938 file_path: "b.py".to_string(),
1939 line: 2,
1940 provenance: None,
1941 },
1942 ])
1943 .unwrap();
1944 let hops = db.trace("a", "b", 8).unwrap().expect("a→b exists");
1945 assert_eq!(hops.len(), 1);
1946 }
1947
1948 #[test]
1949 fn trace_dense_cycle_does_not_loop_and_finds_target() {
1950 let db = Database::open_memory().unwrap();
1955 let names = ["a", "b", "c", "d"];
1956 let syms: Vec<Symbol> = names
1957 .iter()
1958 .map(|n| test_symbol(n, SymbolKind::Function, &format!("{n}.py"), 1))
1959 .collect();
1960 db.insert_symbols(&syms).unwrap();
1961 let mut edges = Vec::new();
1962 for src in &syms {
1963 for tgt in &syms {
1964 if src.id != tgt.id {
1965 edges.push(Edge {
1966 source_id: src.id.clone(),
1967 target_name: tgt.name.clone(),
1968 target_id: Some(tgt.id.clone()),
1969 kind: EdgeKind::Calls,
1970 file_path: src.file_path.clone(),
1971 line: 2,
1972 provenance: None,
1973 });
1974 }
1975 }
1976 }
1977 db.insert_edges(&edges).unwrap();
1978 let hops = db.trace("a", "d", 20).unwrap().expect("a reaches d");
1980 assert_eq!(hops.len(), 1, "shortest path in a clique is one hop");
1981 assert_eq!(hops[0].source_name, "a");
1982 assert_eq!(hops[0].target_name, "d");
1983 }
1984
1985 #[test]
1986 fn trace_unaffected_by_comma_in_symbol_ids() {
1987 let db = Database::open_memory().unwrap();
1990 let a = test_symbol("a", SymbolKind::Function, "a,b.py", 1);
1991 let b = test_symbol("b", SymbolKind::Function, "c,d.py", 1);
1992 let c = test_symbol("c", SymbolKind::Function, "e,f.py", 1);
1993 db.insert_symbols(&[a.clone(), b.clone(), c.clone()])
1994 .unwrap();
1995 db.insert_edges(&[
1996 Edge {
1997 source_id: a.id.clone(),
1998 target_name: "b".to_string(),
1999 target_id: Some(b.id.clone()),
2000 kind: EdgeKind::Calls,
2001 file_path: a.file_path.clone(),
2002 line: 2,
2003 provenance: None,
2004 },
2005 Edge {
2006 source_id: b.id.clone(),
2007 target_name: "c".to_string(),
2008 target_id: Some(c.id.clone()),
2009 kind: EdgeKind::Calls,
2010 file_path: b.file_path.clone(),
2011 line: 2,
2012 provenance: None,
2013 },
2014 ])
2015 .unwrap();
2016 let hops = db
2017 .trace("a", "c", 8)
2018 .unwrap()
2019 .expect("a→b→c despite commas");
2020 assert_eq!(hops.len(), 2);
2021 assert_eq!(hops[0].source_id, a.id);
2022 assert_eq!(hops[1].source_id, b.id);
2023 }
2024
2025 #[test]
2026 fn trace_hop_carries_exact_source_id_for_overloaded_name() {
2027 let db = Database::open_memory().unwrap();
2030 let caller = test_symbol("caller", SymbolKind::Function, "m.py", 1);
2031 let h1 = Symbol::new("helper", SymbolKind::Function, "m.py", 10, 12, 0, 5, None);
2032 let h2 = Symbol::new("helper", SymbolKind::Method, "m.py", 20, 22, 6, 11, None);
2033 db.insert_symbols(&[caller.clone(), h1.clone(), h2.clone()])
2034 .unwrap();
2035 db.insert_edges(&[Edge {
2037 source_id: caller.id.clone(),
2038 target_name: "helper".to_string(),
2039 target_id: Some(h2.id.clone()),
2040 kind: EdgeKind::Calls,
2041 file_path: caller.file_path.clone(),
2042 line: 2,
2043 provenance: None,
2044 }])
2045 .unwrap();
2046 let hops = db
2047 .trace("caller", "helper", 8)
2048 .unwrap()
2049 .expect("caller→helper");
2050 assert_eq!(hops.len(), 1);
2051 assert_eq!(hops[0].source_id, caller.id, "hop names the exact source");
2052 }
2053
2054 #[test]
2055 fn test_hierarchy_query() {
2056 let db = Database::open_memory().unwrap();
2057
2058 let parent = test_symbol("Animal", SymbolKind::Class, "a.py", 1);
2059 let child = test_symbol("Dog", SymbolKind::Class, "a.py", 10);
2060 db.insert_symbols(&[parent, child.clone()]).unwrap();
2061
2062 db.insert_edge(&Edge {
2063 source_id: child.id.clone(),
2064 target_name: "Animal".to_string(),
2065 target_id: None,
2066 kind: EdgeKind::Inherits,
2067 file_path: "a.py".to_string(),
2068 line: 10,
2069 provenance: None,
2070 })
2071 .unwrap();
2072
2073 let pairs = db.hierarchy("Dog").unwrap();
2074 assert_eq!(pairs.len(), 1);
2075 assert_eq!(pairs[0].0, "Dog");
2076 assert_eq!(pairs[0].1, "Animal");
2077 }
2078
2079 #[test]
2080 fn test_file_deps_query() {
2081 let db = Database::open_memory().unwrap();
2082
2083 let import_sym = test_symbol("os", SymbolKind::Import, "main.py", 1);
2084 db.insert_symbol(&import_sym).unwrap();
2085
2086 db.insert_edge(&Edge {
2087 source_id: import_sym.id.clone(),
2088 target_name: "os".to_string(),
2089 target_id: None,
2090 kind: EdgeKind::Imports,
2091 file_path: "main.py".to_string(),
2092 line: 1,
2093 provenance: None,
2094 })
2095 .unwrap();
2096
2097 let deps = db.file_deps("main.py").unwrap();
2098 assert_eq!(deps.len(), 1);
2099 assert_eq!(deps[0].target_name, "os");
2100 }
2101
2102 #[test]
2103 fn test_remove_file_clears_all_data() {
2104 let db = Database::open_memory().unwrap();
2105
2106 let sym = test_symbol("foo", SymbolKind::Function, "test.py", 1);
2107 db.insert_symbol(&sym).unwrap();
2108 db.insert_edge(&Edge {
2109 source_id: sym.id.clone(),
2110 target_name: "bar".to_string(),
2111 target_id: None,
2112 kind: EdgeKind::Calls,
2113 file_path: "test.py".to_string(),
2114 line: 5,
2115 provenance: None,
2116 })
2117 .unwrap();
2118 db.upsert_file(&FileInfo {
2119 path: "test.py".to_string(),
2120 last_modified: 0.0,
2121 hash: "abc".to_string(),
2122 language: "python".to_string(),
2123 num_symbols: 1,
2124 })
2125 .unwrap();
2126
2127 db.remove_file("test.py").unwrap();
2128
2129 assert!(db.outline("test.py").unwrap().is_empty());
2130 assert!(db.get_file("test.py").unwrap().is_none());
2131 }
2132
2133 #[test]
2134 fn test_refs_with_kind_filter() {
2135 let db = Database::open_memory().unwrap();
2136 let parent = test_symbol("AuthService", SymbolKind::Class, "a.py", 1);
2137 let child = test_symbol("AdminService", SymbolKind::Class, "a.py", 20);
2138 let caller = test_symbol("login", SymbolKind::Function, "b.py", 1);
2139 db.insert_symbols(&[parent.clone(), child.clone(), caller.clone()])
2140 .unwrap();
2141
2142 db.insert_edges(&[
2143 Edge {
2144 source_id: child.id.clone(),
2145 target_name: "AuthService".to_string(),
2146 target_id: None,
2147 kind: EdgeKind::Inherits,
2148 file_path: "a.py".to_string(),
2149 line: 20,
2150 provenance: None,
2151 },
2152 Edge {
2153 source_id: caller.id.clone(),
2154 target_name: "AuthService".to_string(),
2155 target_id: None,
2156 kind: EdgeKind::Calls,
2157 file_path: "b.py".to_string(),
2158 line: 5,
2159 provenance: None,
2160 },
2161 ])
2162 .unwrap();
2163
2164 let all = db.refs("AuthService", None).unwrap();
2166 assert_eq!(all.len(), 2);
2167
2168 let inherits = db.refs("AuthService", Some(EdgeKind::Inherits)).unwrap();
2170 assert_eq!(inherits.len(), 1);
2171 assert_eq!(inherits[0].0.kind, EdgeKind::Inherits);
2172
2173 let calls = db.refs("AuthService", Some(EdgeKind::Calls)).unwrap();
2175 assert_eq!(calls.len(), 1);
2176 assert_eq!(calls[0].0.kind, EdgeKind::Calls);
2177
2178 let raises = db.refs("AuthService", Some(EdgeKind::Raises)).unwrap();
2180 assert!(raises.is_empty());
2181 }
2182
2183 #[test]
2184 fn test_refs_matches_via_resolved_target_id_short_name() {
2185 let db = Database::open_memory().unwrap();
2191 let base = test_symbol("BaseService", SymbolKind::Class, "auth/service.php", 1);
2192 let child = test_symbol("AuthService", SymbolKind::Class, "auth/service.php", 30);
2193 db.insert_symbols(&[base.clone(), child.clone()]).unwrap();
2194 db.insert_edge(&Edge::new(
2195 &child.id,
2196 "App\\Auth\\BaseService",
2197 EdgeKind::Inherits,
2198 "auth/service.php",
2199 30,
2200 ))
2201 .unwrap();
2202 db.resolve_edges().unwrap();
2203
2204 let by_short = db.refs("BaseService", None).unwrap();
2206 assert_eq!(by_short.len(), 1, "short name must match via target_id");
2207 assert_eq!(by_short[0].0.target_id.as_ref().unwrap(), &base.id);
2208
2209 let by_short_kind = db.refs("BaseService", Some(EdgeKind::Inherits)).unwrap();
2211 assert_eq!(by_short_kind.len(), 1);
2212
2213 assert!(db
2215 .refs("BaseService", Some(EdgeKind::Calls))
2216 .unwrap()
2217 .is_empty());
2218 }
2219
2220 #[test]
2221 fn test_search_exact_match_ranks_first() {
2222 let db = Database::open_memory().unwrap();
2223 let exact = test_symbol("parse_config", SymbolKind::Function, "a.py", 1);
2224 let prefix = test_symbol("parse_config_file", SymbolKind::Function, "a.py", 10);
2225 let substr = test_symbol("get_parse_config", SymbolKind::Function, "a.py", 20);
2226 db.insert_symbols(&[exact.clone(), prefix, substr]).unwrap();
2227
2228 let results = db.search("parse_config", None, None, 20).unwrap();
2229 assert_eq!(results.len(), 3);
2230 assert_eq!(results[0].name, "parse_config");
2231 }
2232
2233 #[test]
2234 fn test_search_definitions_outrank_variables() {
2235 let db = Database::open_memory().unwrap();
2236 let var1 = test_symbol("token", SymbolKind::Variable, "routes/auth.ts", 20);
2238 let var2 = test_symbol("token", SymbolKind::Variable, "routes/admin.ts", 11);
2239 let class = test_symbol("TokenError", SymbolKind::Class, "auth/tokens.ts", 14);
2241 let func = test_symbol("validateToken", SymbolKind::Function, "auth/tokens.ts", 59);
2243 let subclass = test_symbol("ExpiredTokenError", SymbolKind::Class, "auth/tokens.ts", 22);
2245 db.insert_symbols(&[var1, var2, class, func, subclass])
2246 .unwrap();
2247
2248 let results = db.search("token", None, None, 20).unwrap();
2249 assert_eq!(results.len(), 5);
2250 let def_names: Vec<&str> = results[..3].iter().map(|s| s.name.as_str()).collect();
2252 assert!(def_names.contains(&"TokenError"));
2253 assert!(def_names.contains(&"validateToken"));
2254 assert!(def_names.contains(&"ExpiredTokenError"));
2255 assert_eq!(results[3].name, "token");
2257 assert_eq!(results[4].name, "token");
2258 }
2259
2260 #[test]
2261 fn test_search_prefix_match() {
2262 let db = Database::open_memory().unwrap();
2263 let a = test_symbol("parse_config", SymbolKind::Function, "a.py", 1);
2264 let b = test_symbol("parse_args", SymbolKind::Function, "a.py", 10);
2265 let c = test_symbol("unrelated", SymbolKind::Function, "a.py", 20);
2266 db.insert_symbols(&[a, b, c]).unwrap();
2267
2268 let results = db.search("parse", None, None, 20).unwrap();
2269 assert_eq!(results.len(), 2);
2270 let names: Vec<&str> = results.iter().map(|s| s.name.as_str()).collect();
2271 assert!(names.contains(&"parse_config"));
2272 assert!(names.contains(&"parse_args"));
2273 }
2274
2275 #[test]
2276 fn test_search_substring_match() {
2277 let db = Database::open_memory().unwrap();
2278 let a = test_symbol("parse_config", SymbolKind::Function, "a.py", 1);
2279 let b = test_symbol("get_config", SymbolKind::Function, "a.py", 10);
2280 let c = test_symbol("unrelated", SymbolKind::Function, "a.py", 20);
2281 db.insert_symbols(&[a, b, c]).unwrap();
2282
2283 let results = db.search("config", None, None, 20).unwrap();
2284 assert_eq!(results.len(), 2);
2285 let names: Vec<&str> = results.iter().map(|s| s.name.as_str()).collect();
2286 assert!(names.contains(&"parse_config"));
2287 assert!(names.contains(&"get_config"));
2288 }
2289
2290 #[test]
2291 fn test_search_case_insensitive() {
2292 let db = Database::open_memory().unwrap();
2293 let sym = test_symbol("parse_config", SymbolKind::Function, "a.py", 1);
2294 db.insert_symbol(&sym).unwrap();
2295
2296 let results = db.search("Parse", None, None, 20).unwrap();
2297 assert_eq!(results.len(), 1);
2298 assert_eq!(results[0].name, "parse_config");
2299 }
2300
2301 #[test]
2302 fn test_search_kind_filter() {
2303 let db = Database::open_memory().unwrap();
2304 let func = test_symbol("parse_config", SymbolKind::Function, "a.py", 1);
2305 let class = test_symbol("parse_result", SymbolKind::Class, "a.py", 10);
2306 db.insert_symbols(&[func, class]).unwrap();
2307
2308 let results = db
2309 .search("parse", Some(SymbolKind::Function), None, 20)
2310 .unwrap();
2311 assert_eq!(results.len(), 1);
2312 assert_eq!(results[0].kind, SymbolKind::Function);
2313 }
2314
2315 #[test]
2316 fn test_search_file_filter() {
2317 let db = Database::open_memory().unwrap();
2318 let a = test_symbol("parse_config", SymbolKind::Function, "src/a.rs", 1);
2319 let b = test_symbol("parse_config", SymbolKind::Function, "src/b.rs", 1);
2320 db.insert_symbols(&[a, b]).unwrap();
2321
2322 let results = db.search("parse", None, Some("src/a.rs"), 20).unwrap();
2323 assert_eq!(results.len(), 1);
2324 assert_eq!(results[0].file_path, "src/a.rs");
2325 }
2326
2327 #[test]
2328 fn test_search_empty_query_returns_error() {
2329 let db = Database::open_memory().unwrap();
2330 let err = db.search("", None, None, 20).unwrap_err();
2331 assert!(err.to_string().contains("cannot be empty"));
2332 }
2333
2334 #[test]
2335 fn test_search_zero_limit_returns_error() {
2336 let db = Database::open_memory().unwrap();
2337 let err = db.search("parse", None, None, 0).unwrap_err();
2338 assert!(err.to_string().contains("at least 1"));
2339 }
2340
2341 #[test]
2342 fn test_search_limit_caps_results() {
2343 let db = Database::open_memory().unwrap();
2344 for i in 0..5u32 {
2346 let sym = test_symbol(&format!("fn_{i}"), SymbolKind::Function, "a.py", i * 10 + 1);
2347 db.insert_symbol(&sym).unwrap();
2348 }
2349 let results = db.search("fn", None, None, 3).unwrap();
2350 assert_eq!(results.len(), 3);
2351 }
2352
2353 #[test]
2354 fn test_search_limit_one_returns_top_ranked() {
2355 let db = Database::open_memory().unwrap();
2356 let exact = test_symbol("resolve", SymbolKind::Function, "a.py", 1);
2357 let prefix = test_symbol("resolve_edges", SymbolKind::Function, "a.py", 10);
2358 db.insert_symbols(&[exact, prefix]).unwrap();
2359
2360 let results = db.search("resolve", None, None, 1).unwrap();
2361 assert_eq!(results.len(), 1);
2362 assert_eq!(results[0].name, "resolve");
2363 }
2364
2365 #[test]
2366 fn test_search_wildcard_chars_treated_as_literals() {
2367 let db = Database::open_memory().unwrap();
2368 let sym = test_symbol("get_foo", SymbolKind::Function, "a.py", 1);
2369 let unrelated = test_symbol("getXfoo", SymbolKind::Function, "a.py", 10);
2370 db.insert_symbols(&[sym, unrelated]).unwrap();
2371
2372 let results = db.search("get_foo", None, None, 20).unwrap();
2374 assert_eq!(results.len(), 1);
2375 assert_eq!(results[0].name, "get_foo");
2376 }
2377
2378 #[test]
2379 fn test_search_percent_treated_as_literal() {
2380 let db = Database::open_memory().unwrap();
2381 let sym = test_symbol("get_config", SymbolKind::Function, "a.py", 1);
2383 db.insert_symbol(&sym).unwrap();
2384
2385 let results = db.search("%", None, None, 20).unwrap();
2386 assert!(results.is_empty(), "% should not act as a wildcard");
2387 }
2388
2389 #[test]
2392 fn test_upsert_and_get_symbol_content() {
2393 let db = Database::open_memory().unwrap();
2394 let sym = test_symbol("my_func", SymbolKind::Function, "a.py", 1);
2395 db.insert_symbol(&sym).unwrap();
2396
2397 db.upsert_symbol_content(
2398 &sym.id,
2399 "my_func",
2400 "def my_func(): pass",
2401 "// File: a.py\n// Type: function\n// Name: my_func",
2402 )
2403 .unwrap();
2404
2405 let result = db.get_symbol_content(&sym.id).unwrap();
2406 assert!(result.is_some());
2407 let (content, header) = result.unwrap();
2408 assert_eq!(content, "def my_func(): pass");
2409 assert!(header.contains("my_func"));
2410 }
2411
2412 #[test]
2413 fn test_insert_symbol_contents_batch() {
2414 let db = Database::open_memory().unwrap();
2415 let sym1 = test_symbol("foo", SymbolKind::Function, "a.py", 1);
2416 let sym2 = test_symbol("bar", SymbolKind::Function, "a.py", 10);
2417 db.insert_symbols(&[sym1.clone(), sym2.clone()]).unwrap();
2418
2419 let items = vec![
2420 (
2421 sym1.id.clone(),
2422 "foo".to_string(),
2423 "def foo(): pass".to_string(),
2424 "header1".to_string(),
2425 ),
2426 (
2427 sym2.id.clone(),
2428 "bar".to_string(),
2429 "def bar(): pass".to_string(),
2430 "header2".to_string(),
2431 ),
2432 ];
2433 db.insert_symbol_contents(&items).unwrap();
2434
2435 assert_eq!(db.symbol_content_count().unwrap(), 2);
2436 assert!(db.get_symbol_content(&sym1.id).unwrap().is_some());
2437 assert!(db.get_symbol_content(&sym2.id).unwrap().is_some());
2438 }
2439
2440 #[test]
2441 fn test_clear_symbol_content_for_file() {
2442 let db = Database::open_memory().unwrap();
2443 let sym1 = test_symbol("foo", SymbolKind::Function, "a.py", 1);
2444 let sym2 = test_symbol("bar", SymbolKind::Function, "b.py", 1);
2445 db.insert_symbols(&[sym1.clone(), sym2.clone()]).unwrap();
2446
2447 db.upsert_symbol_content(&sym1.id, "foo", "content1", "header1")
2448 .unwrap();
2449 db.upsert_symbol_content(&sym2.id, "bar", "content2", "header2")
2450 .unwrap();
2451 assert_eq!(db.symbol_content_count().unwrap(), 2);
2452
2453 db.clear_symbol_content_for_file("a.py").unwrap();
2454 assert_eq!(db.symbol_content_count().unwrap(), 1);
2455 assert!(db.get_symbol_content(&sym1.id).unwrap().is_none());
2456 assert!(db.get_symbol_content(&sym2.id).unwrap().is_some());
2457 }
2458
2459 #[test]
2462 fn test_fts5_search_by_content() {
2463 let db = Database::open_memory().unwrap();
2464 let sym = test_symbol("validate_token", SymbolKind::Function, "auth.py", 1);
2465 db.insert_symbol(&sym).unwrap();
2466
2467 db.upsert_symbol_content(
2468 &sym.id,
2469 "validate_token",
2470 "def validate_token(token: str) -> bool:\n return token.is_valid()",
2471 "// File: auth.py",
2472 )
2473 .unwrap();
2474
2475 let results = db.fts5_search("\"validate\"", 10).unwrap();
2477 assert!(!results.is_empty());
2478 assert_eq!(results[0], sym.id);
2479 }
2480
2481 #[test]
2482 fn test_fts5_search_no_match() {
2483 let db = Database::open_memory().unwrap();
2484 let sym = test_symbol("foo", SymbolKind::Function, "a.py", 1);
2485 db.insert_symbol(&sym).unwrap();
2486 db.upsert_symbol_content(&sym.id, "foo", "def foo(): pass", "header")
2487 .unwrap();
2488
2489 let results = db.fts5_search("\"nonexistent_term_xyz\"", 10).unwrap();
2490 assert!(results.is_empty());
2491 }
2492
2493 #[test]
2494 fn fts5_drops_old_content_when_symbol_content_is_replaced() {
2495 let db = Database::open_memory().unwrap();
2500 let sym = test_symbol("load", SymbolKind::Function, "a.py", 1);
2501 db.insert_symbol(&sym).unwrap();
2502
2503 db.upsert_symbol_content(&sym.id, "load", "key = ghp_oldsecrettoken_value", "h")
2504 .unwrap();
2505 assert!(!db
2506 .fts5_search("\"ghp_oldsecrettoken_value\"", 10)
2507 .unwrap()
2508 .is_empty());
2509
2510 db.upsert_symbol_content(&sym.id, "load", "key = [REDACTED_SECRET]", "h")
2511 .unwrap();
2512
2513 let stale: i64 = db
2517 .conn
2518 .query_row(
2519 "SELECT count(*) FROM symbol_fts WHERE symbol_fts MATCH 'ghp_oldsecrettoken_value'",
2520 [],
2521 |r| r.get(0),
2522 )
2523 .unwrap();
2524 assert_eq!(stale, 0, "old plaintext must not remain in the FTS index");
2525 assert_eq!(db.symbol_content_count().unwrap(), 1);
2526 }
2527
2528 #[test]
2531 fn test_get_or_create_embedding_id() {
2532 let db = Database::open_memory().unwrap();
2533
2534 let id1 = db.get_or_create_embedding_id("a.py:foo:1").unwrap();
2535 let id2 = db.get_or_create_embedding_id("a.py:foo:1").unwrap();
2536 let id3 = db.get_or_create_embedding_id("b.py:bar:5").unwrap();
2537
2538 assert_eq!(id1, id2, "same symbol should return same ID");
2539 assert_ne!(id1, id3, "different symbols should get different IDs");
2540 }
2541
2542 #[test]
2543 fn test_symbol_id_for_embedding() {
2544 let db = Database::open_memory().unwrap();
2545 let eid = db.get_or_create_embedding_id("test:sym:1").unwrap();
2546
2547 let sym_id = db.symbol_id_for_embedding(eid).unwrap();
2548 assert_eq!(sym_id, Some("test:sym:1".to_string()));
2549
2550 let none = db.symbol_id_for_embedding(99999).unwrap();
2551 assert!(none.is_none());
2552 }
2553
2554 #[test]
2555 fn test_symbol_ids_for_embeddings_batch() {
2556 let db = Database::open_memory().unwrap();
2557 let eid1 = db.get_or_create_embedding_id("a:foo:1").unwrap();
2558 let eid2 = db.get_or_create_embedding_id("b:bar:2").unwrap();
2559
2560 let results = db.symbol_ids_for_embeddings(&[eid1, eid2]).unwrap();
2561 assert_eq!(results.len(), 2);
2562 }
2563
2564 #[test]
2567 fn test_upsert_and_search_embedding() {
2568 let db = Database::open_memory().unwrap();
2569 let eid = db.get_or_create_embedding_id("a:foo:1").unwrap();
2570
2571 let mut embedding = vec![0.0f32; 384];
2573 embedding[0] = 1.0;
2574 let bytes: Vec<u8> = embedding.iter().flat_map(|f| f.to_le_bytes()).collect();
2575
2576 db.upsert_embedding(eid, &bytes).unwrap();
2577
2578 let query = bytes.clone();
2580 let results = db.vector_search(&query, 5).unwrap();
2581
2582 assert_eq!(results.len(), 1);
2583 assert_eq!(results[0].0, eid);
2584 assert!(
2585 results[0].1 < 0.01,
2586 "self-match should have near-zero distance"
2587 );
2588 }
2589
2590 #[test]
2591 fn test_insert_embeddings_batch() {
2592 let db = Database::open_memory().unwrap();
2593 let eid1 = db.get_or_create_embedding_id("a:foo:1").unwrap();
2594 let eid2 = db.get_or_create_embedding_id("b:bar:2").unwrap();
2595
2596 let make_vec = |val: f32| -> Vec<u8> {
2597 let v = vec![val; 384];
2598 v.iter().flat_map(|f| f.to_le_bytes()).collect()
2599 };
2600
2601 let items = vec![(eid1, make_vec(0.1)), (eid2, make_vec(0.9))];
2602 db.insert_embeddings(&items).unwrap();
2603
2604 assert_eq!(db.embedding_count().unwrap(), 2);
2605 }
2606
2607 #[test]
2608 fn test_has_embedding() {
2609 let db = Database::open_memory().unwrap();
2610 assert!(!db.has_embedding("nonexistent").unwrap());
2611
2612 let eid = db.get_or_create_embedding_id("a:foo:1").unwrap();
2613 assert!(!db.has_embedding("a:foo:1").unwrap());
2615
2616 let bytes: Vec<u8> = vec![0.0f32; 384]
2618 .iter()
2619 .flat_map(|f| f.to_le_bytes())
2620 .collect();
2621 db.upsert_embedding(eid, &bytes).unwrap();
2622 assert!(db.has_embedding("a:foo:1").unwrap());
2623 }
2624
2625 #[test]
2626 fn test_clear_all_embeddings() {
2627 let db = Database::open_memory().unwrap();
2628 let eid1 = db.get_or_create_embedding_id("a:foo:1").unwrap();
2629 let eid2 = db.get_or_create_embedding_id("b:bar:2").unwrap();
2630
2631 let bytes: Vec<u8> = vec![0.0f32; 384]
2632 .iter()
2633 .flat_map(|f| f.to_le_bytes())
2634 .collect();
2635 db.upsert_embedding(eid1, &bytes).unwrap();
2636 db.upsert_embedding(eid2, &bytes).unwrap();
2637 assert_eq!(db.embedding_count().unwrap(), 2);
2638
2639 db.clear_all_embeddings().unwrap();
2640 assert_eq!(db.embedding_count().unwrap(), 0);
2641 }
2642
2643 #[test]
2644 fn embedding_count_excludes_orphan_map_rows() {
2645 let db = Database::open_memory().unwrap();
2646 let _eid = db.get_or_create_embedding_id("a:foo:1").unwrap();
2648 assert_eq!(db.embedding_count().unwrap(), 0);
2649
2650 let eid = db.get_or_create_embedding_id("a:foo:1").unwrap();
2652 let bytes: Vec<u8> = vec![0.0f32; 384]
2653 .iter()
2654 .flat_map(|f| f.to_le_bytes())
2655 .collect();
2656 db.upsert_embedding(eid, &bytes).unwrap();
2657 assert_eq!(db.embedding_count().unwrap(), 1);
2658 }
2659
2660 #[test]
2661 fn test_symbols_needing_embeddings() {
2662 let db = Database::open_memory().unwrap();
2663 let sym1 = test_symbol("foo", SymbolKind::Function, "a.py", 1);
2664 let sym2 = test_symbol("bar", SymbolKind::Function, "a.py", 10);
2665 db.insert_symbols(&[sym1.clone(), sym2.clone()]).unwrap();
2666
2667 db.upsert_symbol_content(&sym1.id, "foo", "def foo(): pass", "header")
2669 .unwrap();
2670 db.upsert_symbol_content(&sym2.id, "bar", "def bar(): pass", "header")
2671 .unwrap();
2672
2673 let needing = db.symbols_needing_embeddings().unwrap();
2675 assert_eq!(needing.len(), 2);
2676
2677 let eid = db.get_or_create_embedding_id(&sym1.id).unwrap();
2679 let bytes: Vec<u8> = vec![0.0f32; 384]
2680 .iter()
2681 .flat_map(|f| f.to_le_bytes())
2682 .collect();
2683 db.upsert_embedding(eid, &bytes).unwrap();
2684
2685 let needing = db.symbols_needing_embeddings().unwrap();
2687 assert_eq!(needing.len(), 1);
2688 assert_eq!(needing[0], sym2.id);
2689 }
2690
2691 #[test]
2692 fn test_clear_rag_data_for_file() {
2693 let db = Database::open_memory().unwrap();
2694 let sym1 = test_symbol("foo", SymbolKind::Function, "a.py", 1);
2695 let sym2 = test_symbol("bar", SymbolKind::Function, "b.py", 1);
2696 db.insert_symbols(&[sym1.clone(), sym2.clone()]).unwrap();
2697
2698 db.upsert_symbol_content(&sym1.id, "foo", "content1", "header1")
2699 .unwrap();
2700 db.upsert_symbol_content(&sym2.id, "bar", "content2", "header2")
2701 .unwrap();
2702
2703 let eid1 = db.get_or_create_embedding_id(&sym1.id).unwrap();
2704 let eid2 = db.get_or_create_embedding_id(&sym2.id).unwrap();
2705 let bytes: Vec<u8> = vec![0.0f32; 384]
2706 .iter()
2707 .flat_map(|f| f.to_le_bytes())
2708 .collect();
2709 db.upsert_embedding(eid1, &bytes).unwrap();
2710 db.upsert_embedding(eid2, &bytes).unwrap();
2711
2712 db.clear_rag_data_for_file("a.py").unwrap();
2714
2715 assert!(db.get_symbol_content(&sym1.id).unwrap().is_none());
2717 assert!(!db.has_embedding(&sym1.id).unwrap());
2718
2719 assert!(db.get_symbol_content(&sym2.id).unwrap().is_some());
2721 assert!(db.has_embedding(&sym2.id).unwrap());
2722 }
2723
2724 #[test]
2725 fn clear_embeddings_for_symbols_drops_only_named_ids() {
2726 let db = Database::open_memory().unwrap();
2727 let sym1 = test_symbol("foo", SymbolKind::Function, "a.py", 1);
2728 let sym2 = test_symbol("bar", SymbolKind::Function, "a.py", 10);
2729 db.insert_symbols(&[sym1.clone(), sym2.clone()]).unwrap();
2730 db.upsert_symbol_content(&sym1.id, "foo", "def foo(): pass", "header")
2731 .unwrap();
2732 db.upsert_symbol_content(&sym2.id, "bar", "def bar(): pass", "header")
2733 .unwrap();
2734
2735 let bytes: Vec<u8> = vec![0.0f32; 384]
2736 .iter()
2737 .flat_map(|f| f.to_le_bytes())
2738 .collect();
2739 for sym in [&sym1, &sym2] {
2740 let eid = db.get_or_create_embedding_id(&sym.id).unwrap();
2741 db.upsert_embedding(eid, &bytes).unwrap();
2742 }
2743 assert_eq!(db.embedding_count().unwrap(), 2);
2744
2745 let tx = db.begin_indexing_tx().unwrap();
2746 db.clear_embeddings_for_symbols_in_tx(std::slice::from_ref(&sym1.id))
2747 .unwrap();
2748 tx.commit().unwrap();
2749
2750 assert!(!db.has_embedding(&sym1.id).unwrap());
2752 assert!(db.has_embedding(&sym2.id).unwrap());
2753 assert!(db.get_symbol_content(&sym1.id).unwrap().is_some());
2754 let needing = db.symbols_needing_embeddings().unwrap();
2756 assert_eq!(needing, vec![sym1.id.clone()]);
2757 }
2758
2759 #[test]
2760 fn clear_embeddings_for_symbols_is_noop_for_unembedded_id() {
2761 let db = Database::open_memory().unwrap();
2762 let sym = test_symbol("foo", SymbolKind::Function, "a.py", 1);
2763 db.insert_symbols(std::slice::from_ref(&sym)).unwrap();
2764 db.upsert_symbol_content(&sym.id, "foo", "def foo(): pass", "header")
2765 .unwrap();
2766
2767 let tx = db.begin_indexing_tx().unwrap();
2768 db.clear_embeddings_for_symbols_in_tx(std::slice::from_ref(&sym.id))
2769 .unwrap();
2770 tx.commit().unwrap();
2771
2772 assert_eq!(db.embedding_count().unwrap(), 0);
2773 }
2774
2775 #[test]
2776 fn test_all_content_symbol_ids() {
2777 let db = Database::open_memory().unwrap();
2778 let sym1 = test_symbol("foo", SymbolKind::Function, "a.py", 1);
2779 let sym2 = test_symbol("bar", SymbolKind::Function, "b.py", 1);
2780 db.insert_symbols(&[sym1.clone(), sym2.clone()]).unwrap();
2781
2782 db.upsert_symbol_content(&sym1.id, "foo", "content1", "header1")
2783 .unwrap();
2784 db.upsert_symbol_content(&sym2.id, "bar", "content2", "header2")
2785 .unwrap();
2786
2787 let all = db.all_content_symbol_ids().unwrap();
2788 assert_eq!(all.len(), 2);
2789 }
2790
2791 #[test]
2792 fn test_symbols_needing_embeddings_excludes_variables() {
2793 let db = Database::open_memory().unwrap();
2794 let func = test_symbol("process", SymbolKind::Function, "a.py", 1);
2795 let var = test_symbol("MAX_RETRIES", SymbolKind::Variable, "a.py", 10);
2796 let cls = test_symbol("Service", SymbolKind::Class, "a.py", 20);
2797 db.insert_symbols(&[func.clone(), var.clone(), cls.clone()])
2798 .unwrap();
2799
2800 db.upsert_symbol_content(&func.id, "process", "def process(): pass", "header")
2802 .unwrap();
2803 db.upsert_symbol_content(&var.id, "MAX_RETRIES", "MAX_RETRIES = 3", "header")
2804 .unwrap();
2805 db.upsert_symbol_content(&cls.id, "Service", "class Service: pass", "header")
2806 .unwrap();
2807
2808 let needing = db.symbols_needing_embeddings().unwrap();
2810 assert_eq!(needing.len(), 2);
2811 assert!(!needing.contains(&var.id), "variables should be excluded");
2812 assert!(needing.contains(&func.id));
2813 assert!(needing.contains(&cls.id));
2814 }
2815
2816 #[test]
2817 fn test_all_content_symbol_ids_excludes_variables() {
2818 let db = Database::open_memory().unwrap();
2819 let func = test_symbol("foo", SymbolKind::Function, "a.py", 1);
2820 let var = test_symbol("MY_VAR", SymbolKind::Variable, "a.py", 10);
2821 let method = test_symbol("bar", SymbolKind::Method, "a.py", 20);
2822 db.insert_symbols(&[func.clone(), var.clone(), method.clone()])
2823 .unwrap();
2824
2825 db.upsert_symbol_content(&func.id, "foo", "def foo(): pass", "header")
2826 .unwrap();
2827 db.upsert_symbol_content(&var.id, "MY_VAR", "MY_VAR = 42", "header")
2828 .unwrap();
2829 db.upsert_symbol_content(&method.id, "bar", "def bar(self): pass", "header")
2830 .unwrap();
2831
2832 let all = db.all_content_symbol_ids().unwrap();
2833 assert_eq!(all.len(), 2, "variables should be excluded");
2834 assert!(!all.contains(&var.id));
2835 }
2836
2837 #[test]
2838 fn test_get_symbol_contents_batch() {
2839 let db = Database::open_memory().unwrap();
2840 let sym1 = test_symbol("foo", SymbolKind::Function, "a.py", 1);
2841 let sym2 = test_symbol("bar", SymbolKind::Function, "a.py", 10);
2842 let sym3 = test_symbol("baz", SymbolKind::Function, "a.py", 20);
2843 db.insert_symbols(&[sym1.clone(), sym2.clone(), sym3.clone()])
2844 .unwrap();
2845
2846 db.upsert_symbol_content(&sym1.id, "foo", "def foo(): pass", "h1")
2847 .unwrap();
2848 db.upsert_symbol_content(&sym2.id, "bar", "def bar(): pass", "h2")
2849 .unwrap();
2850 let ids = vec![sym1.id.clone(), sym2.id.clone(), sym3.id.clone()];
2853 let map = db.get_symbol_contents_batch(&ids).unwrap();
2854 assert_eq!(map.len(), 2);
2855 assert!(map.contains_key(&sym1.id));
2856 assert!(map.contains_key(&sym2.id));
2857 assert!(!map.contains_key(&sym3.id));
2858 assert_eq!(map[&sym1.id].0, "def foo(): pass");
2859 }
2860
2861 #[test]
2862 fn test_get_symbol_contents_batch_empty() {
2863 let db = Database::open_memory().unwrap();
2864 let map = db.get_symbol_contents_batch(&[]).unwrap();
2865 assert!(map.is_empty());
2866 }
2867
2868 #[test]
2869 fn test_get_symbol_by_id() {
2870 let db = Database::open_memory().unwrap();
2871 let sym = test_symbol("foo", SymbolKind::Function, "a.py", 1);
2872 db.insert_symbol(&sym).unwrap();
2873
2874 let found = db.get_symbol(&sym.id).unwrap();
2875 assert!(found.is_some());
2876 assert_eq!(found.unwrap().name, "foo");
2877
2878 let not_found = db.get_symbol("nonexistent").unwrap();
2879 assert!(not_found.is_none());
2880 }
2881
2882 #[test]
2883 fn test_symbols_for_files_basic() {
2884 let db = Database::open_memory().unwrap();
2885 let s1 = test_symbol("func_a", SymbolKind::Function, "src/a.py", 1);
2886 let s2 = test_symbol("func_b", SymbolKind::Function, "src/a.py", 10);
2887 let s3 = test_symbol("ClassC", SymbolKind::Class, "src/b.py", 1);
2888 let s4 = test_symbol("func_d", SymbolKind::Function, "src/c.py", 1);
2889 db.insert_symbols(&[s1, s2, s3, s4]).unwrap();
2890
2891 let files = vec!["src/a.py".to_string(), "src/b.py".to_string()];
2893 let results = db.symbols_for_files(&files, None).unwrap();
2894 assert_eq!(results.len(), 3);
2895 assert_eq!(results[0].file_path, "src/a.py");
2896 assert_eq!(results[2].file_path, "src/b.py");
2897 }
2898
2899 #[test]
2900 fn test_symbols_for_files_kind_filter() {
2901 let db = Database::open_memory().unwrap();
2902 let s1 = test_symbol("func_a", SymbolKind::Function, "src/a.py", 1);
2903 let s2 = test_symbol("ClassB", SymbolKind::Class, "src/a.py", 10);
2904 db.insert_symbols(&[s1, s2]).unwrap();
2905
2906 let files = vec!["src/a.py".to_string()];
2907 let results = db
2908 .symbols_for_files(&files, Some(SymbolKind::Function))
2909 .unwrap();
2910 assert_eq!(results.len(), 1);
2911 assert_eq!(results[0].name, "func_a");
2912 }
2913
2914 #[test]
2915 fn test_symbols_for_files_empty_input() {
2916 let db = Database::open_memory().unwrap();
2917 let results = db.symbols_for_files(&[], None).unwrap();
2918 assert!(results.is_empty());
2919 }
2920
2921 #[test]
2922 fn test_symbols_for_files_no_matching_files() {
2923 let db = Database::open_memory().unwrap();
2924 let s1 = test_symbol("func_a", SymbolKind::Function, "src/a.py", 1);
2925 db.insert_symbol(&s1).unwrap();
2926
2927 let files = vec!["src/nonexistent.py".to_string()];
2928 let results = db.symbols_for_files(&files, None).unwrap();
2929 assert!(results.is_empty());
2930 }
2931
2932 #[test]
2935 fn test_compute_in_degrees() {
2936 let db = Database::open_memory().unwrap();
2937 let s1 = test_symbol("func_a", SymbolKind::Function, "a.py", 1);
2938 let s2 = test_symbol("func_b", SymbolKind::Function, "b.py", 1);
2939 let s3 = test_symbol("func_c", SymbolKind::Function, "c.py", 1);
2940 db.insert_symbols(&[s1.clone(), s2.clone(), s3.clone()])
2941 .unwrap();
2942
2943 let e1 = Edge::new(&s2.id, "func_a", EdgeKind::Calls, "b.py", 5);
2945 let e2 = Edge::new(&s2.id, "func_a", EdgeKind::Calls, "b.py", 10);
2946 let e3 = Edge::new(&s3.id, "func_a", EdgeKind::Calls, "c.py", 3);
2947 let e4 = Edge::new(&s3.id, "func_b", EdgeKind::Calls, "c.py", 7);
2949 db.insert_edges(&[e1, e2, e3, e4]).unwrap();
2950 db.resolve_edges().unwrap();
2951 db.compute_in_degrees().unwrap();
2952
2953 let sym_a = db.get_symbol(&s1.id).unwrap().unwrap();
2954 let sym_b = db.get_symbol(&s2.id).unwrap().unwrap();
2955 let sym_c = db.get_symbol(&s3.id).unwrap().unwrap();
2956
2957 assert_eq!(sym_a.in_degree, 3, "func_a should have 3 incoming edges");
2958 assert_eq!(sym_b.in_degree, 1, "func_b should have 1 incoming edge");
2959 assert_eq!(sym_c.in_degree, 0, "func_c should have 0 incoming edges");
2960 }
2961
2962 #[test]
2963 fn test_compute_in_degrees_resets() {
2964 let db = Database::open_memory().unwrap();
2965 let s1 = test_symbol("func_a", SymbolKind::Function, "a.py", 1);
2966 db.insert_symbol(&s1).unwrap();
2967
2968 db.conn
2970 .execute(
2971 "UPDATE symbols SET in_degree = 99 WHERE id = ?1",
2972 params![s1.id],
2973 )
2974 .unwrap();
2975
2976 db.compute_in_degrees().unwrap();
2978 let sym = db.get_symbol(&s1.id).unwrap().unwrap();
2979 assert_eq!(sym.in_degree, 0);
2980 }
2981
2982 #[test]
2983 fn test_top_symbols_ordered_by_centrality() {
2984 let db = Database::open_memory().unwrap();
2985 let s1 = test_symbol("hub", SymbolKind::Function, "a.py", 1);
2986 let s2 = test_symbol("leaf", SymbolKind::Function, "b.py", 1);
2987 let s3 = test_symbol("mid", SymbolKind::Function, "c.py", 1);
2988 db.insert_symbols(&[s1.clone(), s2.clone(), s3.clone()])
2989 .unwrap();
2990
2991 db.conn
2993 .execute(
2994 "UPDATE symbols SET in_degree = 10 WHERE id = ?1",
2995 params![s1.id],
2996 )
2997 .unwrap();
2998 db.conn
2999 .execute(
3000 "UPDATE symbols SET in_degree = 1 WHERE id = ?1",
3001 params![s2.id],
3002 )
3003 .unwrap();
3004 db.conn
3005 .execute(
3006 "UPDATE symbols SET in_degree = 5 WHERE id = ?1",
3007 params![s3.id],
3008 )
3009 .unwrap();
3010
3011 let top = db.top_symbols(10).unwrap();
3012 assert_eq!(top.len(), 3);
3013 assert_eq!(top[0].name, "hub");
3014 assert_eq!(top[0].in_degree, 10);
3015 assert_eq!(top[1].name, "mid");
3016 assert_eq!(top[2].name, "leaf");
3017 }
3018
3019 #[test]
3020 fn test_search_uses_in_degree_tiebreaker() {
3021 let db = Database::open_memory().unwrap();
3022 let s1 = test_symbol("parse_request", SymbolKind::Function, "a.py", 1);
3024 let s2 = test_symbol("parse_response", SymbolKind::Function, "b.py", 1);
3025 db.insert_symbols(&[s1.clone(), s2.clone()]).unwrap();
3026
3027 db.conn
3028 .execute(
3029 "UPDATE symbols SET in_degree = 20 WHERE id = ?1",
3030 params![s1.id],
3031 )
3032 .unwrap();
3033 db.conn
3034 .execute(
3035 "UPDATE symbols SET in_degree = 5 WHERE id = ?1",
3036 params![s2.id],
3037 )
3038 .unwrap();
3039
3040 let results = db.search("parse", None, None, 10).unwrap();
3041 assert_eq!(results.len(), 2);
3042 assert_eq!(results[0].name, "parse_request");
3044 assert_eq!(results[1].name, "parse_response");
3045 }
3046
3047 #[test]
3048 fn test_schema_version_stored() {
3049 let db = Database::open_memory().unwrap();
3050 let version = db.get_metadata("schema_version").unwrap();
3051 assert!(version.is_some());
3052 assert_eq!(version.unwrap(), SCHEMA_VERSION.to_string());
3053 }
3054
3055 #[test]
3058 fn test_invalidate_dangling_edges_after_symbol_removal() {
3059 let db = Database::open_memory().unwrap();
3060
3061 let sym_a = test_symbol("foo", SymbolKind::Function, "a.py", 1);
3063 db.insert_symbol(&sym_a).unwrap();
3064
3065 let sym_b = test_symbol("bar", SymbolKind::Function, "b.py", 1);
3067 db.insert_symbol(&sym_b).unwrap();
3068 let edge = Edge::new(&sym_b.id, "foo", EdgeKind::Calls, "b.py", 5);
3069 db.insert_edge(&edge).unwrap();
3070
3071 let resolved = db.resolve_edges().unwrap();
3073 assert_eq!(resolved, 1);
3074
3075 db.conn
3078 .execute("DELETE FROM symbols WHERE id = ?1", params![sym_a.id])
3079 .unwrap();
3080
3081 let dirty = std::collections::HashSet::from(["a.py".to_string()]);
3083 let invalidated = db.invalidate_edges_targeting(&dirty).unwrap();
3084 assert_eq!(invalidated, 1);
3085
3086 let edges = db.callees("bar").unwrap();
3088 assert!(
3089 edges.iter().all(|e| e.target_id.is_none()),
3090 "edge should be unresolved after invalidation"
3091 );
3092 }
3093
3094 #[test]
3095 fn test_scoped_resolution_after_symbol_changes() {
3096 let db = Database::open_memory().unwrap();
3097
3098 let sym_a = test_symbol("foo", SymbolKind::Function, "a.py", 1);
3100 db.insert_symbol(&sym_a).unwrap();
3101
3102 let sym_b = test_symbol("bar", SymbolKind::Function, "b.py", 1);
3104 db.insert_symbol(&sym_b).unwrap();
3105 db.insert_edge(&Edge::new(&sym_b.id, "foo", EdgeKind::Calls, "b.py", 5))
3106 .unwrap();
3107
3108 db.resolve_edges().unwrap();
3110
3111 db.delete_symbol(&sym_a.id).unwrap();
3113 db.insert_symbol(&sym_a).unwrap();
3114
3115 let dirty = std::collections::HashSet::from(["a.py".to_string()]);
3117 let re_resolved = db.resolve_edges_scoped(&dirty).unwrap();
3118 assert_eq!(re_resolved, 1);
3119 }
3120
3121 #[test]
3122 fn test_compute_in_degrees_scoped() {
3123 let db = Database::open_memory().unwrap();
3124
3125 let foo = test_symbol("foo", SymbolKind::Function, "a.py", 1);
3126 let bar = test_symbol("bar", SymbolKind::Function, "b.py", 1);
3127 let baz = test_symbol("baz", SymbolKind::Function, "c.py", 1);
3128 db.insert_symbol(&foo).unwrap();
3129 db.insert_symbol(&bar).unwrap();
3130 db.insert_symbol(&baz).unwrap();
3131
3132 db.insert_edge(&Edge::new(&bar.id, "foo", EdgeKind::Calls, "b.py", 5))
3134 .unwrap();
3135 db.insert_edge(&Edge::new(&baz.id, "foo", EdgeKind::Calls, "c.py", 3))
3136 .unwrap();
3137
3138 db.resolve_edges().unwrap();
3139 db.compute_in_degrees().unwrap();
3140
3141 let results = db.search("foo", None, None, 10).unwrap();
3143 assert_eq!(results[0].in_degree, 2);
3144
3145 let dirty = std::collections::HashSet::from(["b.py".to_string()]);
3147 db.compute_in_degrees_scoped(&dirty).unwrap();
3148
3149 let results = db.search("foo", None, None, 10).unwrap();
3151 assert_eq!(results[0].in_degree, 2);
3152 }
3153
3154 #[test]
3155 fn test_tier2_import_resolution_plan_uses_kind_target_index() {
3156 let db = Database::open_memory().unwrap();
3158 let mut stmt = db
3159 .conn
3160 .prepare(
3161 "EXPLAIN QUERY PLAN SELECT s.id FROM symbols s
3162 INNER JOIN edges ie ON ie.kind = 'imports' AND ie.target_name = ?1
3163 AND ie.target_id IS NOT NULL
3164 INNER JOIN symbols is2 ON is2.id = ie.source_id AND is2.file_path = ?2
3165 INNER JOIN symbols resolved ON resolved.id = ie.target_id
3166 WHERE s.name = ?1 AND s.kind != 'import'
3167 AND s.file_path = resolved.file_path
3168 LIMIT 1",
3169 )
3170 .unwrap();
3171 let plan = stmt
3172 .query_map(params!["x", "y"], |row| row.get::<_, String>(3))
3173 .unwrap()
3174 .collect::<std::result::Result<Vec<_>, _>>()
3175 .unwrap()
3176 .join("\n");
3177 assert!(
3178 plan.contains("idx_edges_kind_target"),
3179 "tier-2 must drive off edges(kind, target_name); got plan:\n{plan}"
3180 );
3181 }
3182
3183 #[test]
3184 fn test_refs_plan_uses_multi_index_or_not_full_scan() {
3185 let db = Database::open_memory().unwrap();
3188 let syms: Vec<Symbol> = (0..400)
3192 .map(|i| test_symbol(&format!("s{i}"), SymbolKind::Function, "a.py", i))
3193 .collect();
3194 db.insert_symbols(&syms).unwrap();
3195 let edges: Vec<Edge> = (0..400)
3196 .map(|i| {
3197 let mut e = Edge::new(
3198 &syms[i as usize].id,
3199 format!("t{i}"),
3200 EdgeKind::Calls,
3201 "a.py",
3202 i,
3203 );
3204 if i % 2 == 0 {
3205 e.target_id = Some(syms[i as usize].id.clone());
3206 }
3207 e
3208 })
3209 .collect();
3210 db.insert_edges(&edges).unwrap();
3211 db.conn.execute_batch("ANALYZE;").unwrap();
3212
3213 let explain = |sql: &str| -> String {
3214 let mut stmt = db.conn.prepare(sql).unwrap();
3215 stmt.query_map(params!["x"], |row| row.get::<_, String>(3))
3216 .unwrap()
3217 .collect::<std::result::Result<Vec<_>, _>>()
3218 .unwrap()
3219 .join("\n")
3220 };
3221 let assert_no_edge_scan = |plan: &str, ctx: &str| {
3222 assert!(
3224 !plan.contains("SCAN e\n")
3225 && !plan.ends_with("SCAN e")
3226 && !plan.contains("SCAN edges"),
3227 "refs() {ctx} must not full-scan edges; got plan:\n{plan}"
3228 );
3229 };
3230
3231 let unfiltered = explain(
3235 "EXPLAIN QUERY PLAN
3236 SELECT e.id FROM edges e
3237 LEFT JOIN symbols s ON e.source_id = s.id
3238 WHERE e.target_name = ?1
3239 OR e.target_id IN (SELECT id FROM symbols WHERE name = ?1)",
3240 );
3241 assert!(
3242 unfiltered.contains("MULTI-INDEX OR"),
3243 "refs() unfiltered must use a multi-index OR; got plan:\n{unfiltered}"
3244 );
3245 assert!(
3246 unfiltered.contains("idx_edges_target (target_name="),
3247 "refs() literal arm must seek idx_edges_target on target_name; got plan:\n{unfiltered}"
3248 );
3249 assert!(
3250 unfiltered.contains("idx_edges_target_id (target_id="),
3251 "refs() resolved arm must seek idx_edges_target_id on target_id; got plan:\n{unfiltered}"
3252 );
3253 assert_no_edge_scan(&unfiltered, "unfiltered");
3254
3255 let kind_filtered = explain(
3258 "EXPLAIN QUERY PLAN
3259 SELECT e.id FROM edges e
3260 LEFT JOIN symbols s ON e.source_id = s.id
3261 WHERE (e.target_name = ?1 AND e.kind = 'calls')
3262 OR (e.target_id IN (SELECT id FROM symbols WHERE name = ?1)
3263 AND e.kind = 'calls')",
3264 );
3265 assert!(
3266 kind_filtered.contains("MULTI-INDEX OR"),
3267 "refs() kind-filtered must use a multi-index OR; got plan:\n{kind_filtered}"
3268 );
3269 assert!(
3270 kind_filtered.contains("idx_edges_kind_target (kind=? AND target_name="),
3271 "refs() kind-filtered literal arm must seek (kind, target_name); got plan:\n{kind_filtered}"
3272 );
3273 assert!(
3274 kind_filtered.contains("idx_edges_target_id (target_id="),
3275 "refs() kind-filtered resolved arm must seek target_id; got plan:\n{kind_filtered}"
3276 );
3277 assert_no_edge_scan(&kind_filtered, "kind-filtered");
3278 }
3279
3280 #[test]
3281 fn test_impact_recursive_step_avoids_full_edge_scan() {
3282 let db = Database::open_memory().unwrap();
3289 let mut stmt = db
3290 .conn
3291 .prepare(
3292 "EXPLAIN QUERY PLAN
3293 WITH RECURSIVE impacted(edge_id, source_id, target_name, target_id,
3294 kind, file_path, line, resolution_source, source_name, depth) AS (
3295 SELECT e.id, e.source_id, e.target_name, e.target_id, e.kind,
3296 e.file_path, e.line, e.resolution_source, s.name, 1
3297 FROM edges e LEFT JOIN symbols s ON e.source_id = s.id
3298 WHERE e.target_name = ?1
3299 OR e.target_id IN (SELECT id FROM symbols WHERE name = ?1)
3300 UNION
3301 SELECT e.id, e.source_id, e.target_name, e.target_id, e.kind,
3302 e.file_path, e.line, e.resolution_source, s.name, i.depth + 1
3303 FROM impacted i
3304 JOIN edges e ON e.target_name = i.source_name
3305 LEFT JOIN symbols s ON e.source_id = s.id
3306 WHERE i.source_name IS NOT NULL AND i.depth < ?2
3307 UNION
3308 SELECT e.id, e.source_id, e.target_name, e.target_id, e.kind,
3309 e.file_path, e.line, e.resolution_source, s.name, i.depth + 1
3310 FROM impacted i
3311 JOIN symbols t ON t.name = i.source_name
3312 JOIN edges e ON e.target_id = t.id
3313 LEFT JOIN symbols s ON e.source_id = s.id
3314 WHERE i.source_name IS NOT NULL AND i.depth < ?2)
3315 SELECT source_id, MIN(depth) FROM impacted GROUP BY edge_id
3316 ORDER BY depth, edge_id",
3317 )
3318 .unwrap();
3319 let plan = stmt
3320 .query_map(params!["x", 3], |row| row.get::<_, String>(3))
3321 .unwrap()
3322 .collect::<std::result::Result<Vec<_>, _>>()
3323 .unwrap()
3324 .join("\n");
3325 assert!(
3331 plan.contains("idx_edges_target (target_name="),
3332 "impact() literal arm must seek idx_edges_target on target_name; got plan:\n{plan}"
3333 );
3334 assert!(
3335 plan.contains("idx_edges_target_id (target_id="),
3336 "impact() resolved arm must seek idx_edges_target_id on target_id; got plan:\n{plan}"
3337 );
3338 assert!(
3339 !plan.contains("CORRELATED"),
3340 "impact() must not run a correlated subquery per edge; got plan:\n{plan}"
3341 );
3342 assert!(
3346 !plan.contains("SCAN e\n") && !plan.ends_with("SCAN e") && !plan.contains("SCAN edges"),
3347 "impact() must not full-scan edges; got plan:\n{plan}"
3348 );
3349 }
3350
3351 #[test]
3352 fn test_per_file_edge_delete_uses_file_index() {
3353 let db = Database::open_memory().unwrap();
3357 let mut stmt = db
3358 .conn
3359 .prepare("EXPLAIN QUERY PLAN DELETE FROM edges WHERE file_path = ?1")
3360 .unwrap();
3361 let plan = stmt
3362 .query_map(params!["a.py"], |row| row.get::<_, String>(3))
3363 .unwrap()
3364 .collect::<std::result::Result<Vec<_>, _>>()
3365 .unwrap()
3366 .join("\n");
3367 assert!(
3368 plan.contains("idx_edges_file"),
3369 "per-file edge delete must drive off edges(file_path); got plan:\n{plan}"
3370 );
3371 }
3372
3373 #[test]
3374 fn test_compute_in_degrees_plan_has_no_correlated_subquery() {
3375 let db = Database::open_memory().unwrap();
3378 let mut stmt = db
3379 .conn
3380 .prepare(
3381 "EXPLAIN QUERY PLAN
3382 UPDATE symbols SET in_degree = counts.cnt
3383 FROM (
3384 SELECT target_id, COUNT(*) AS cnt
3385 FROM edges WHERE target_id IS NOT NULL
3386 GROUP BY target_id
3387 ) AS counts
3388 WHERE symbols.id = counts.target_id",
3389 )
3390 .unwrap();
3391 let plan = stmt
3392 .query_map([], |row| row.get::<_, String>(3))
3393 .unwrap()
3394 .collect::<std::result::Result<Vec<_>, _>>()
3395 .unwrap()
3396 .join("\n");
3397 assert!(
3398 !plan.to_uppercase().contains("CORRELATED"),
3399 "in-degree UPDATE must not use a correlated subquery; got plan:\n{plan}"
3400 );
3401 }
3402
3403 #[test]
3404 fn test_compute_in_degrees_scoped_resets_target_that_lost_edge() {
3405 let db = Database::open_memory().unwrap();
3406
3407 let foo = test_symbol("foo", SymbolKind::Function, "a.py", 1);
3408 let bar = test_symbol("bar", SymbolKind::Function, "b.py", 1);
3409 let baz = test_symbol("baz", SymbolKind::Function, "c.py", 1);
3410 db.insert_symbol(&foo).unwrap();
3411 db.insert_symbol(&bar).unwrap();
3412 db.insert_symbol(&baz).unwrap();
3413
3414 db.insert_edge(&Edge::new(&bar.id, "foo", EdgeKind::Calls, "b.py", 5))
3416 .unwrap();
3417 db.insert_edge(&Edge::new(&baz.id, "foo", EdgeKind::Calls, "c.py", 3))
3418 .unwrap();
3419
3420 db.resolve_edges().unwrap();
3421 db.compute_in_degrees().unwrap();
3422 let results = db.search("foo", None, None, 10).unwrap();
3423 assert_eq!(results[0].in_degree, 2);
3424
3425 db.clear_edges_for_file("b.py").unwrap();
3429 let dirty = std::collections::HashSet::from(["b.py".to_string()]);
3430 db.invalidate_edges_targeting(&dirty).unwrap();
3431 db.resolve_edges_scoped(&dirty).unwrap();
3432 db.compute_in_degrees_scoped(&dirty).unwrap();
3433
3434 let results = db.search("foo", None, None, 10).unwrap();
3435 assert_eq!(results[0].in_degree, 1);
3436 }
3437
3438 #[test]
3441 fn test_open_stores_embedding_dimension() {
3442 let dir = tempfile::TempDir::new().unwrap();
3443 let db_path = dir.path().join("test.db");
3444
3445 let db = Database::open(&db_path, 384).unwrap();
3446 let stored: String = db
3447 .get_metadata("embedding_dimension")
3448 .unwrap()
3449 .expect("dimension should be stored");
3450 assert_eq!(stored, "384");
3451 }
3452
3453 #[test]
3454 fn test_open_with_different_dimension_clears_embeddings() {
3455 let dir = tempfile::TempDir::new().unwrap();
3456 let db_path = dir.path().join("test.db");
3457
3458 {
3460 let db = Database::open(&db_path, 384).unwrap();
3461 let sym = Symbol::new("foo", SymbolKind::Function, "a.py", 1, 10, 0, 100, None);
3462 db.insert_symbol(&sym).unwrap();
3463 db.upsert_symbol_content(&sym.id, "foo", "def foo():", "header")
3464 .unwrap();
3465 let eid = db.get_or_create_embedding_id(&sym.id).unwrap();
3466 let bytes = vec![0u8; 384 * 4];
3467 db.insert_embeddings(&[(eid, bytes)]).unwrap();
3468 assert_eq!(db.embedding_count().unwrap(), 1);
3469 }
3470
3471 {
3473 let db = Database::open(&db_path, 768).unwrap();
3474 assert_eq!(db.embedding_count().unwrap(), 0);
3475 let stored: String = db
3476 .get_metadata("embedding_dimension")
3477 .unwrap()
3478 .expect("dimension should be updated");
3479 assert_eq!(stored, "768");
3480 }
3481 }
3482
3483 #[test]
3484 fn test_open_same_dimension_preserves_embeddings() {
3485 let dir = tempfile::TempDir::new().unwrap();
3486 let db_path = dir.path().join("test.db");
3487
3488 {
3490 let db = Database::open(&db_path, 384).unwrap();
3491 let sym = Symbol::new("bar", SymbolKind::Function, "b.py", 1, 10, 0, 100, None);
3492 db.insert_symbol(&sym).unwrap();
3493 db.upsert_symbol_content(&sym.id, "bar", "def bar():", "header")
3494 .unwrap();
3495 let eid = db.get_or_create_embedding_id(&sym.id).unwrap();
3496 let bytes = vec![0u8; 384 * 4];
3497 db.insert_embeddings(&[(eid, bytes)]).unwrap();
3498 }
3499
3500 {
3502 let db = Database::open(&db_path, 384).unwrap();
3503 assert_eq!(db.embedding_count().unwrap(), 1);
3504 }
3505 }
3506
3507 #[test]
3508 fn test_default_dim_preserves_stored_non_default() {
3509 let dir = tempfile::TempDir::new().unwrap();
3510 let db_path = dir.path().join("test.db");
3511
3512 {
3514 let db = Database::open(&db_path, 768).unwrap();
3515 let sym = Symbol::new("baz", SymbolKind::Function, "c.py", 1, 10, 0, 100, None);
3516 db.insert_symbol(&sym).unwrap();
3517 db.upsert_symbol_content(&sym.id, "baz", "def baz():", "header")
3518 .unwrap();
3519 let eid = db.get_or_create_embedding_id(&sym.id).unwrap();
3520 let bytes = vec![0u8; 768 * 4];
3521 db.insert_embeddings(&[(eid, bytes)]).unwrap();
3522 }
3523
3524 {
3526 let db = Database::open(&db_path, DEFAULT_EMBEDDING_DIM).unwrap();
3527 assert_eq!(db.embedding_count().unwrap(), 1);
3528 let stored: i64 = db
3529 .conn
3530 .query_row(
3531 "SELECT CAST(value AS INTEGER) FROM metadata WHERE key = 'embedding_dimension'",
3532 [],
3533 |row| row.get(0),
3534 )
3535 .unwrap();
3536 assert_eq!(stored, 768);
3537 }
3538 }
3539
3540 #[test]
3541 fn test_explicit_non_default_dim_wipes_different_stored() {
3542 let dir = tempfile::TempDir::new().unwrap();
3543 let db_path = dir.path().join("test.db");
3544
3545 {
3547 let db = Database::open(&db_path, 768).unwrap();
3548 let sym = Symbol::new("qux", SymbolKind::Function, "d.py", 1, 10, 0, 100, None);
3549 db.insert_symbol(&sym).unwrap();
3550 db.upsert_symbol_content(&sym.id, "qux", "def qux():", "header")
3551 .unwrap();
3552 let eid = db.get_or_create_embedding_id(&sym.id).unwrap();
3553 let bytes = vec![0u8; 768 * 4];
3554 db.insert_embeddings(&[(eid, bytes)]).unwrap();
3555 }
3556
3557 {
3559 let db = Database::open(&db_path, 1536).unwrap();
3560 assert_eq!(db.embedding_count().unwrap(), 0);
3561 }
3562 }
3563
3564 #[test]
3565 fn test_reopen_same_dim_does_not_rewrite_metadata() {
3566 let dir = tempfile::TempDir::new().unwrap();
3572 let db_path = dir.path().join("test.db");
3573
3574 let _db = Database::open(&db_path, 384).unwrap();
3575
3576 let rowid_before: i64 = {
3577 let conn = Connection::open(&db_path).unwrap();
3578 conn.query_row(
3579 "SELECT rowid FROM metadata WHERE key = 'embedding_dimension'",
3580 [],
3581 |row| row.get(0),
3582 )
3583 .unwrap()
3584 };
3585
3586 let _db = Database::open(&db_path, 384).unwrap();
3587
3588 let rowid_after: i64 = {
3589 let conn = Connection::open(&db_path).unwrap();
3590 conn.query_row(
3591 "SELECT rowid FROM metadata WHERE key = 'embedding_dimension'",
3592 [],
3593 |row| row.get(0),
3594 )
3595 .unwrap()
3596 };
3597
3598 assert_eq!(
3601 rowid_before, rowid_after,
3602 "same-dim reopen should not rewrite the embedding_dimension row"
3603 );
3604 }
3605
3606 #[test]
3607 fn test_retry_busy_returns_on_non_busy_error() {
3608 let attempts = std::cell::Cell::new(0);
3610 let result = retry_busy(|| -> std::result::Result<(), rusqlite::Error> {
3611 attempts.set(attempts.get() + 1);
3612 Err(rusqlite::Error::InvalidQuery)
3613 });
3614 assert!(matches!(result, Err(rusqlite::Error::InvalidQuery)));
3615 assert_eq!(attempts.get(), 1, "non-busy errors must not retry");
3616 }
3617
3618 #[test]
3619 fn test_retry_busy_succeeds_after_transient_busy() {
3620 let attempts = std::cell::Cell::new(0);
3622 let result = retry_busy(|| -> std::result::Result<u32, rusqlite::Error> {
3623 attempts.set(attempts.get() + 1);
3624 if attempts.get() == 1 {
3625 Err(rusqlite::Error::SqliteFailure(
3626 rusqlite::ffi::Error {
3627 code: rusqlite::ErrorCode::DatabaseBusy,
3628 extended_code: 5,
3629 },
3630 Some("database is locked".to_string()),
3631 ))
3632 } else {
3633 Ok(42)
3634 }
3635 });
3636 assert_eq!(result.unwrap(), 42);
3637 assert_eq!(attempts.get(), 2);
3638 }
3639
3640 #[test]
3641 fn test_retry_busy_exhausts_and_propagates() {
3642 let attempts = std::cell::Cell::new(0);
3644 let result = retry_busy(|| -> std::result::Result<(), rusqlite::Error> {
3645 attempts.set(attempts.get() + 1);
3646 Err(rusqlite::Error::SqliteFailure(
3647 rusqlite::ffi::Error {
3648 code: rusqlite::ErrorCode::DatabaseBusy,
3649 extended_code: 5,
3650 },
3651 Some("database is locked".to_string()),
3652 ))
3653 });
3654 assert!(matches!(
3655 result,
3656 Err(rusqlite::Error::SqliteFailure(
3657 rusqlite::ffi::Error {
3658 code: rusqlite::ErrorCode::DatabaseBusy,
3659 ..
3660 },
3661 _
3662 ))
3663 ));
3664 assert_eq!(attempts.get(), MIGRATION_RETRY_BACKOFF_MS.len() + 1);
3666 }
3667
3668 fn fp(provider: &str, model: &str, dim: usize) -> EmbeddingFingerprint {
3671 EmbeddingFingerprint {
3672 provider: provider.to_string(),
3673 model: model.to_string(),
3674 dimension: dim,
3675 }
3676 }
3677
3678 fn seed_embedding(db: &Database, dim: usize, sym_name: &str) {
3679 let sym = Symbol::new(sym_name, SymbolKind::Function, "f.py", 1, 10, 0, 100, None);
3680 db.insert_symbol(&sym).unwrap();
3681 db.upsert_symbol_content(&sym.id, sym_name, "def f():", "header")
3682 .unwrap();
3683 let eid = db.get_or_create_embedding_id(&sym.id).unwrap();
3684 let bytes = vec![0u8; dim * 4];
3685 db.insert_embeddings(&[(eid, bytes)]).unwrap();
3686 }
3687
3688 #[test]
3689 fn test_fingerprint_match_is_noop() {
3690 let dir = tempfile::TempDir::new().unwrap();
3691 let db_path = dir.path().join("test.db");
3692 let db = Database::open(&db_path, 384).unwrap();
3693 let f = fp("local", "BGE-small-en-v1.5", 384);
3694 db.reconcile_embedding_fingerprint(&f).unwrap();
3695 seed_embedding(&db, 384, "foo");
3696 db.reconcile_embedding_fingerprint(&f).unwrap();
3698 assert_eq!(db.embedding_count().unwrap(), 1);
3699 }
3700
3701 #[test]
3702 fn test_fingerprint_provider_swap_wipes() {
3703 let dir = tempfile::TempDir::new().unwrap();
3704 let db_path = dir.path().join("test.db");
3705 let db = Database::open(&db_path, 384).unwrap();
3706 let f1 = fp("local", "BGE-small-en-v1.5", 384);
3707 db.reconcile_embedding_fingerprint(&f1).unwrap();
3708 seed_embedding(&db, 384, "bar");
3709 assert_eq!(db.embedding_count().unwrap(), 1);
3710
3711 let f2 = fp("ollama", "BGE-small-en-v1.5", 384);
3713 db.reconcile_embedding_fingerprint(&f2).unwrap();
3714 assert_eq!(db.embedding_count().unwrap(), 0);
3715 assert_eq!(
3716 db.get_metadata("embedding_provider").unwrap().as_deref(),
3717 Some("ollama")
3718 );
3719 }
3720
3721 #[test]
3722 fn test_fingerprint_model_swap_wipes() {
3723 let dir = tempfile::TempDir::new().unwrap();
3724 let db_path = dir.path().join("test.db");
3725 let db = Database::open(&db_path, 384).unwrap();
3726 let f1 = fp("local", "BGE-small-en-v1.5", 384);
3727 db.reconcile_embedding_fingerprint(&f1).unwrap();
3728 seed_embedding(&db, 384, "baz");
3729 assert_eq!(db.embedding_count().unwrap(), 1);
3730
3731 let f2 = fp("local", "AllMiniLML6V2", 384);
3733 db.reconcile_embedding_fingerprint(&f2).unwrap();
3734 assert_eq!(db.embedding_count().unwrap(), 0);
3735 assert_eq!(
3736 db.get_metadata("embedding_model").unwrap().as_deref(),
3737 Some("AllMiniLML6V2")
3738 );
3739 }
3740
3741 #[test]
3742 fn test_fingerprint_backfill_does_not_wipe() {
3743 let dir = tempfile::TempDir::new().unwrap();
3745 let db_path = dir.path().join("test.db");
3746 let db = Database::open(&db_path, 384).unwrap();
3747 seed_embedding(&db, 384, "qux");
3748 assert!(db.get_metadata("embedding_provider").unwrap().is_none());
3749 assert_eq!(db.embedding_count().unwrap(), 1);
3750
3751 let f = fp("local", "BGE-small-en-v1.5", 384);
3753 db.reconcile_embedding_fingerprint(&f).unwrap();
3754 assert_eq!(
3755 db.embedding_count().unwrap(),
3756 1,
3757 "backfill must preserve existing embeddings"
3758 );
3759 assert_eq!(
3760 db.get_metadata("embedding_provider").unwrap().as_deref(),
3761 Some("local")
3762 );
3763 assert_eq!(
3764 db.get_metadata("embedding_model").unwrap().as_deref(),
3765 Some("BGE-small-en-v1.5")
3766 );
3767 }
3768
3769 #[test]
3770 fn test_fingerprint_dim_change_wipes() {
3771 let dir = tempfile::TempDir::new().unwrap();
3773 let db_path = dir.path().join("test.db");
3774 let db = Database::open(&db_path, 384).unwrap();
3775 let f1 = fp("local", "BGE-small-en-v1.5", 384);
3776 db.reconcile_embedding_fingerprint(&f1).unwrap();
3777 seed_embedding(&db, 384, "quux");
3778 assert_eq!(db.embedding_count().unwrap(), 1);
3779
3780 let f2 = fp("local", "BGELargeENV15", 1024);
3781 db.reconcile_embedding_fingerprint(&f2).unwrap();
3782 assert_eq!(db.embedding_count().unwrap(), 0);
3783 let stored_dim: i64 = db
3784 .conn
3785 .query_row(
3786 "SELECT CAST(value AS INTEGER) FROM metadata WHERE key = 'embedding_dimension'",
3787 [],
3788 |row| row.get(0),
3789 )
3790 .unwrap();
3791 assert_eq!(stored_dim, 1024);
3792 assert!(
3797 symbol_vec_exists(&db.conn).unwrap(),
3798 "successful reconcile must recreate symbol_vec"
3799 );
3800 }
3801
3802 #[test]
3805 fn test_open_readonly_succeeds_and_marks_read_only() {
3806 let dir = tempfile::TempDir::new().unwrap();
3807 let db_path = dir.path().join("test.db");
3808
3809 {
3811 let db = Database::open(&db_path, 384).unwrap();
3812 db.reconcile_embedding_fingerprint(&fp("local", "BGE-small-en-v1.5", 384))
3813 .unwrap();
3814 seed_embedding(&db, 384, "foo");
3815 }
3816
3817 let reader = Database::open_readonly(&db_path).unwrap();
3819 assert!(reader.is_read_only(), "open_readonly must set the flag");
3820 let pinned = reader.pinned_attach().expect("read-only attach pins state");
3821 assert_eq!(pinned.schema_version, SCHEMA_VERSION);
3822 assert_eq!(
3823 pinned.embedding,
3824 Some(fp("local", "BGE-small-en-v1.5", 384))
3825 );
3826 }
3827
3828 #[test]
3829 fn test_open_readonly_can_query_existing_data() {
3830 let dir = tempfile::TempDir::new().unwrap();
3831 let db_path = dir.path().join("test.db");
3832
3833 {
3834 let db = Database::open(&db_path, 384).unwrap();
3835 let sym = Symbol::new(
3836 "callable",
3837 SymbolKind::Function,
3838 "a.py",
3839 1,
3840 10,
3841 0,
3842 100,
3843 None,
3844 );
3845 db.insert_symbol(&sym).unwrap();
3846 }
3847
3848 let reader = Database::open_readonly(&db_path).unwrap();
3849 let count: i64 = reader
3850 .conn
3851 .query_row("SELECT COUNT(*) FROM symbols", [], |row| row.get(0))
3852 .unwrap();
3853 assert_eq!(count, 1, "reader sees primary's data");
3854 }
3855
3856 #[test]
3857 fn test_open_readonly_refuses_writes() {
3858 let dir = tempfile::TempDir::new().unwrap();
3862 let db_path = dir.path().join("test.db");
3863 {
3864 let _db = Database::open(&db_path, 384).unwrap();
3865 }
3866
3867 let reader = Database::open_readonly(&db_path).unwrap();
3868 let err = reader
3869 .conn
3870 .execute(
3871 "INSERT OR REPLACE INTO metadata (key, value) VALUES ('x', 'y')",
3872 [],
3873 )
3874 .unwrap_err();
3875 let msg = err.to_string();
3879 assert!(
3880 msg.contains("read") || msg.contains("readonly") || msg.contains("write"),
3881 "read-only DB write should fail with a read-only-flavored error, got: {msg}"
3882 );
3883 }
3884
3885 #[test]
3886 fn test_open_readonly_detects_schema_drift() {
3887 let dir = tempfile::TempDir::new().unwrap();
3888 let db_path = dir.path().join("test.db");
3889 {
3890 let db = Database::open(&db_path, 384).unwrap();
3891 db.set_metadata("schema_version", "9999").unwrap();
3893 }
3894
3895 let err = Database::open_readonly(&db_path).unwrap_err();
3896 match err {
3897 DbError::SchemaDrift { expected, stored } => {
3898 assert_eq!(expected, SCHEMA_VERSION);
3899 assert_eq!(stored, 9999);
3900 }
3901 other => panic!("expected SchemaDrift, got {other:?}"),
3902 }
3903 }
3904
3905 #[test]
3906 fn test_open_readonly_does_not_run_migrations() {
3907 let dir = tempfile::TempDir::new().unwrap();
3911 let db_path = dir.path().join("test.db");
3912 {
3913 let db = Database::open(&db_path, 384).unwrap();
3914 db.set_metadata("user_marker", "untouched").unwrap();
3915 }
3916 let _reader = Database::open_readonly(&db_path).unwrap();
3917 let primary = Database::open(&db_path, 384).unwrap();
3920 assert_eq!(
3921 primary.get_metadata("user_marker").unwrap().as_deref(),
3922 Some("untouched")
3923 );
3924 }
3925
3926 #[test]
3927 fn test_open_default_is_not_read_only() {
3928 let dir = tempfile::TempDir::new().unwrap();
3929 let db_path = dir.path().join("test.db");
3930 let db = Database::open(&db_path, 384).unwrap();
3931 assert!(!db.is_read_only());
3932 assert!(db.pinned_attach().is_none());
3933 }
3934
3935 #[test]
3938 fn test_open_existing_rw_opens_writable_and_skips_migrations() {
3939 let dir = tempfile::TempDir::new().unwrap();
3940 let db_path = dir.path().join("test.db");
3941 {
3943 let db = Database::open(&db_path, 384).unwrap();
3944 db.set_metadata("marker", "preserved").unwrap();
3945 }
3946
3947 let promoted = Database::open_existing_rw(&db_path).unwrap();
3948 assert!(!promoted.is_read_only(), "open_existing_rw is RW");
3949 assert!(promoted.pinned_attach().is_none(), "RW opens have no pin");
3950 assert_eq!(
3952 promoted.get_metadata("marker").unwrap().as_deref(),
3953 Some("preserved")
3954 );
3955 promoted.set_metadata("write_check", "ok").unwrap();
3957 }
3958
3959 #[test]
3960 fn test_open_existing_rw_detects_schema_drift() {
3961 let dir = tempfile::TempDir::new().unwrap();
3962 let db_path = dir.path().join("test.db");
3963 {
3964 let db = Database::open(&db_path, 384).unwrap();
3965 db.set_metadata("schema_version", "9999").unwrap();
3966 }
3967 let err = Database::open_existing_rw(&db_path).unwrap_err();
3968 match err {
3969 DbError::SchemaDrift { expected, stored } => {
3970 assert_eq!(expected, SCHEMA_VERSION);
3971 assert_eq!(stored, 9999);
3972 }
3973 other => panic!("expected SchemaDrift, got {other:?}"),
3974 }
3975 }
3976
3977 #[test]
3978 fn test_database_open_alone_does_not_change_fingerprint() {
3979 let dir = tempfile::TempDir::new().unwrap();
3992 let db_path = dir.path().join("test.db");
3993 let original_fp = fp("local", "BGE-small-en-v1.5", 384);
3994 {
3995 let db = Database::open(&db_path, 384).unwrap();
3996 db.reconcile_embedding_fingerprint(&original_fp).unwrap();
3997 seed_embedding(&db, 384, "guard");
3998 }
3999 {
4002 let _db = Database::open(&db_path, 384).unwrap();
4003 }
4004 let db = Database::open(&db_path, 384).unwrap();
4006 assert_eq!(
4007 db.get_metadata("embedding_provider").unwrap().as_deref(),
4008 Some("local")
4009 );
4010 assert_eq!(
4011 db.get_metadata("embedding_model").unwrap().as_deref(),
4012 Some("BGE-small-en-v1.5")
4013 );
4014 assert_eq!(db.embedding_count().unwrap(), 1);
4015 }
4016
4017 #[test]
4018 fn test_open_readonly_missing_schema_version_is_schema_drift() {
4019 let dir = tempfile::TempDir::new().unwrap();
4026 let db_path = dir.path().join("test.db");
4027 {
4029 let db = Database::open(&db_path, 384).unwrap();
4030 db.conn
4031 .execute("DELETE FROM metadata WHERE key = 'schema_version'", [])
4032 .unwrap();
4033 }
4034 let err = Database::open_readonly(&db_path).unwrap_err();
4035 match err {
4036 DbError::SchemaDrift { expected, stored } => {
4037 assert_eq!(expected, SCHEMA_VERSION);
4038 assert_eq!(stored, 0, "missing row should surface as stored=0");
4039 }
4040 other => panic!("expected SchemaDrift, got {other:?}"),
4041 }
4042 }
4043
4044 #[test]
4045 fn test_open_readonly_missing_metadata_table_is_schema_drift() {
4046 let dir = tempfile::TempDir::new().unwrap();
4053 let db_path = dir.path().join("test.db");
4054 {
4056 let conn = Connection::open(&db_path).unwrap();
4057 conn.execute_batch("CREATE TABLE unrelated (x INTEGER);")
4058 .unwrap();
4059 }
4060 let err = Database::open_readonly(&db_path).unwrap_err();
4061 match err {
4062 DbError::SchemaDrift { expected, stored } => {
4063 assert_eq!(expected, SCHEMA_VERSION);
4064 assert_eq!(stored, 0, "missing metadata table should be stored=0");
4065 }
4066 other => panic!("expected SchemaDrift, got {other:?}"),
4067 }
4068 }
4069
4070 #[test]
4071 fn test_open_existing_rw_missing_schema_version_is_schema_drift() {
4072 let dir = tempfile::TempDir::new().unwrap();
4073 let db_path = dir.path().join("test.db");
4074 {
4075 let db = Database::open(&db_path, 384).unwrap();
4076 db.conn
4077 .execute("DELETE FROM metadata WHERE key = 'schema_version'", [])
4078 .unwrap();
4079 }
4080 let err = Database::open_existing_rw(&db_path).unwrap_err();
4081 match err {
4082 DbError::SchemaDrift { expected, stored } => {
4083 assert_eq!(expected, SCHEMA_VERSION);
4084 assert_eq!(stored, 0);
4085 }
4086 other => panic!("expected SchemaDrift, got {other:?}"),
4087 }
4088 }
4089
4090 #[test]
4091 fn test_reconcile_rebuilds_when_metadata_matches_but_symbol_vec_missing() {
4092 let dir = tempfile::TempDir::new().unwrap();
4099 let db_path = dir.path().join("test.db");
4100 let f = fp("local", "BGE-small-en-v1.5", 384);
4101
4102 {
4104 let db = Database::open(&db_path, 384).unwrap();
4105 db.reconcile_embedding_fingerprint(&f).unwrap();
4106 }
4107
4108 {
4110 let db = Database::open(&db_path, 384).unwrap();
4111 db.conn
4112 .execute("DROP TABLE IF EXISTS symbol_vec", [])
4113 .unwrap();
4114 assert_eq!(
4116 db.get_metadata("embedding_dimension").unwrap().as_deref(),
4117 Some("384")
4118 );
4119 }
4120
4121 {
4125 let db = Database::open(&db_path, 384).unwrap();
4126 db.reconcile_embedding_fingerprint(&f).unwrap();
4127 let exists: bool = db
4128 .conn
4129 .query_row(
4130 "SELECT 1 FROM sqlite_master WHERE name='symbol_vec'",
4131 [],
4132 |row| row.get::<_, i64>(0),
4133 )
4134 .optional()
4135 .unwrap()
4136 .is_some();
4137 assert!(
4138 exists,
4139 "reconcile must rebuild symbol_vec when missing, even on metadata match"
4140 );
4141 }
4142 }
4143
4144 #[test]
4145 fn test_handle_embedding_dimension_rebuilds_when_symbol_vec_missing() {
4146 let dir = tempfile::TempDir::new().unwrap();
4150 let db_path = dir.path().join("test.db");
4151 {
4152 let db = Database::open(&db_path, 384).unwrap();
4153 db.conn
4154 .execute("DROP TABLE IF EXISTS symbol_vec", [])
4155 .unwrap();
4156 }
4157 let db = Database::open(&db_path, 384).unwrap();
4158 let exists: bool = db
4159 .conn
4160 .query_row(
4161 "SELECT 1 FROM sqlite_master WHERE name='symbol_vec'",
4162 [],
4163 |row| row.get::<_, i64>(0),
4164 )
4165 .optional()
4166 .unwrap()
4167 .is_some();
4168 assert!(
4169 exists,
4170 "Database::open must rebuild symbol_vec when missing, even on metadata match"
4171 );
4172 }
4173
4174 #[test]
4175 fn test_reconcile_fingerprint_rolls_back_on_midsequence_failure() {
4176 let dir = tempfile::TempDir::new().unwrap();
4191 let db_path = dir.path().join("test.db");
4192
4193 let initial_fp = fp("local", "BGE-small-en-v1.5", 384);
4195 {
4196 let db = Database::open(&db_path, 384).unwrap();
4197 db.reconcile_embedding_fingerprint(&initial_fp).unwrap();
4198 seed_embedding(&db, 384, "seed");
4199 }
4200
4201 let new_fp = fp("ollama", "nomic-embed-text-v2", 384);
4206 let outcome = {
4207 let db = Database::open(&db_path, 384).unwrap();
4208 RECONCILE_FAIL_AFTER_MODEL.with(|b| b.store(true, std::sync::atomic::Ordering::SeqCst));
4209 db.reconcile_embedding_fingerprint(&new_fp)
4210 };
4211 assert!(outcome.is_err(), "injected SQLITE_FULL must surface as Err");
4212
4213 let post = Database::open(&db_path, 384).unwrap();
4216 let stored_provider = post.get_metadata("embedding_provider").unwrap();
4217 let stored_model = post.get_metadata("embedding_model").unwrap();
4218 let stored_dim_str = post.get_metadata("embedding_dimension").unwrap();
4219 let symbol_vec_exists = post
4220 .conn
4221 .query_row(
4222 "SELECT 1 FROM sqlite_master WHERE type='table' AND name='symbol_vec'",
4223 [],
4224 |row| row.get::<_, i64>(0),
4225 )
4226 .optional()
4227 .unwrap()
4228 .is_some();
4229 assert_eq!(
4230 stored_provider.as_deref(),
4231 Some("local"),
4232 "failed reconcile must roll back provider"
4233 );
4234 assert_eq!(
4235 stored_model.as_deref(),
4236 Some("BGE-small-en-v1.5"),
4237 "failed reconcile must roll back model"
4238 );
4239 assert_eq!(
4240 stored_dim_str.as_deref(),
4241 Some("384"),
4242 "failed reconcile must roll back dimension"
4243 );
4244 assert!(
4245 symbol_vec_exists,
4246 "failed reconcile must roll back symbol_vec drop"
4247 );
4248 assert_eq!(
4249 post.embedding_count().unwrap(),
4250 1,
4251 "failed reconcile must roll back the symbol_embedding_map DELETE"
4252 );
4253 }
4254
4255 #[test]
4256 fn test_default_embedding_dim_constant() {
4257 assert_eq!(DEFAULT_EMBEDDING_DIM, 384);
4258 }
4259
4260 #[test]
4261 fn test_destructive_migration_creates_backup() {
4262 let tmp = tempfile::tempdir().unwrap();
4264 let db_path = tmp.path().join("legacy.db");
4265
4266 {
4267 register_sqlite_vec();
4268 let conn = Connection::open(&db_path).unwrap();
4269 conn.execute_batch(
4271 "CREATE TABLE symbols (
4272 id TEXT PRIMARY KEY, name TEXT, kind TEXT, file_path TEXT,
4273 start_line INTEGER, end_line INTEGER, start_byte INTEGER, end_byte INTEGER,
4274 parent_id TEXT, signature TEXT, visibility TEXT,
4275 is_async BOOLEAN, docstring TEXT, in_degree INTEGER DEFAULT 0
4276 );
4277 CREATE TABLE edges (
4278 id INTEGER PRIMARY KEY AUTOINCREMENT, source_id TEXT, target_name TEXT,
4279 target_id TEXT, kind TEXT, file_path TEXT, line INTEGER
4280 );
4281 CREATE TABLE files (path TEXT PRIMARY KEY, last_modified REAL, hash TEXT,
4282 language TEXT, num_symbols INTEGER);
4283 CREATE TABLE metadata (key TEXT PRIMARY KEY, value TEXT);
4284 INSERT INTO symbols (id, name, kind, file_path) VALUES ('s1', 'foo', 'function', 'a.py');
4285 INSERT INTO metadata (key, value) VALUES ('schema_version', '2');",
4286 )
4287 .unwrap();
4288 }
4289
4290 let _db = Database::open(&db_path, DEFAULT_EMBEDDING_DIM).unwrap();
4292
4293 let backups: Vec<_> = std::fs::read_dir(tmp.path())
4294 .unwrap()
4295 .filter_map(|e| e.ok())
4296 .filter(|e| {
4297 e.file_name()
4298 .to_string_lossy()
4299 .starts_with("legacy.db.pre-v")
4300 })
4301 .collect();
4302 assert_eq!(
4303 backups.len(),
4304 1,
4305 "expected exactly one pre-migration backup, found {}",
4306 backups.len()
4307 );
4308 }
4309
4310 #[test]
4311 fn test_no_backup_for_fresh_database() {
4312 let tmp = tempfile::tempdir().unwrap();
4314 let db_path = tmp.path().join("fresh.db");
4315 let _db = Database::open(&db_path, DEFAULT_EMBEDDING_DIM).unwrap();
4316
4317 let backups: Vec<_> = std::fs::read_dir(tmp.path())
4318 .unwrap()
4319 .filter_map(|e| e.ok())
4320 .filter(|e| e.file_name().to_string_lossy().contains(".pre-v"))
4321 .collect();
4322 assert!(
4323 backups.is_empty(),
4324 "fresh DB should not create a backup file"
4325 );
4326 }
4327
4328 #[test]
4329 fn fresh_db_stamps_version_without_running_ladder() {
4330 let tmp = tempfile::tempdir().unwrap();
4334 let db_path = tmp.path().join("fresh.db");
4335 let db = Database::open(&db_path, DEFAULT_EMBEDDING_DIM).unwrap();
4336
4337 db.set_metadata("last_commit", "deadbeef").unwrap();
4340 drop(db);
4341 let db = Database::open(&db_path, DEFAULT_EMBEDDING_DIM).unwrap();
4342 let last_commit: Option<String> = db
4343 .conn
4344 .query_row(
4345 "SELECT value FROM metadata WHERE key = 'last_commit'",
4346 [],
4347 |r| r.get(0),
4348 )
4349 .optional()
4350 .unwrap();
4351 assert_eq!(
4352 last_commit,
4353 Some("deadbeef".to_string()),
4354 "fresh re-open must not run the v2→3 wipe"
4355 );
4356
4357 let version: String = db
4358 .conn
4359 .query_row(
4360 "SELECT value FROM metadata WHERE key = 'schema_version'",
4361 [],
4362 |r| r.get(0),
4363 )
4364 .unwrap();
4365 assert_eq!(version, SCHEMA_VERSION.to_string());
4366 }
4367
4368 #[test]
4369 fn populated_v1_db_runs_full_ladder_to_current() {
4370 let tmp = tempfile::tempdir().unwrap();
4376 let path = tmp.path().join("v1.sqlite");
4377 {
4378 let conn = Connection::open(&path).unwrap();
4379 conn.execute_batch(
4382 "CREATE TABLE symbols (
4383 id TEXT PRIMARY KEY, name TEXT, kind TEXT, file_path TEXT,
4384 start_line INTEGER, end_line INTEGER, start_byte INTEGER, end_byte INTEGER,
4385 parent_id TEXT, signature TEXT, visibility TEXT, is_async BOOLEAN, docstring TEXT);
4386 CREATE TABLE edges (
4387 id INTEGER PRIMARY KEY AUTOINCREMENT,
4388 source_id TEXT NOT NULL, target_name TEXT NOT NULL, target_id TEXT,
4389 kind TEXT NOT NULL, file_path TEXT NOT NULL, line INTEGER);
4390 CREATE TABLE files (path TEXT PRIMARY KEY);
4391 CREATE TABLE metadata (key TEXT PRIMARY KEY, value TEXT);
4392 INSERT INTO symbols (id, name, kind, file_path) VALUES ('s:1', 'foo', 'function', 'a.py');
4393 INSERT INTO edges (source_id, target_name, target_id, kind, file_path, line)
4394 VALUES ('s:1', 'foo', 's:1', 'calls', 'a.py', 1);",
4395 )
4396 .unwrap();
4397 }
4398
4399 let db = Database::open(&path, DEFAULT_EMBEDDING_DIM).unwrap();
4400
4401 let version: String = db
4403 .conn
4404 .query_row(
4405 "SELECT value FROM metadata WHERE key = 'schema_version'",
4406 [],
4407 |r| r.get(0),
4408 )
4409 .unwrap();
4410 assert_eq!(version, SCHEMA_VERSION.to_string());
4411
4412 assert!(
4414 db.conn
4415 .prepare("SELECT resolution_source FROM edges LIMIT 0")
4416 .is_ok(),
4417 "resolution_source must be added by the real upgrade"
4418 );
4419
4420 let symbol_count: i64 = db
4423 .conn
4424 .query_row("SELECT COUNT(*) FROM symbols", [], |r| r.get(0))
4425 .unwrap();
4426 assert_eq!(symbol_count, 0, "v2→3 wipe must run for a populated v1 DB");
4427 }
4428
4429 #[test]
4430 fn test_busy_timeout_pragma_is_set() {
4431 let tmp = tempfile::tempdir().unwrap();
4432 let db_path = tmp.path().join("timeout.db");
4433 let db = Database::open(&db_path, DEFAULT_EMBEDDING_DIM).unwrap();
4434
4435 let timeout: i64 = db
4436 .conn
4437 .query_row("PRAGMA busy_timeout;", [], |row| row.get(0))
4438 .unwrap();
4439 assert_eq!(timeout, BUSY_TIMEOUT_MS as i64);
4440 }
4441
4442 #[test]
4443 fn test_busy_timeout_makes_second_writer_retry_instead_of_aborting() {
4444 let tmp = tempfile::tempdir().unwrap();
4450 let db_path = tmp.path().join("concurrent.db");
4451 let _ = Database::open(&db_path, DEFAULT_EMBEDDING_DIM).unwrap();
4452
4453 let holder = Database::open(&db_path, DEFAULT_EMBEDDING_DIM).unwrap();
4455 holder
4456 .conn
4457 .execute_batch("BEGIN IMMEDIATE; INSERT INTO metadata (key, value) VALUES ('a', '1');")
4458 .unwrap();
4459
4460 let attempt_write = |timeout_ms: u32| -> std::time::Duration {
4461 let conn = Connection::open(&db_path).unwrap();
4462 conn.execute_batch(&format!("PRAGMA busy_timeout={timeout_ms};"))
4463 .unwrap();
4464 let start = std::time::Instant::now();
4465 let res = conn.execute("INSERT INTO metadata (key, value) VALUES ('b', '2');", []);
4466 assert!(res.is_err(), "write must fail while the lock is held");
4467 start.elapsed()
4468 };
4469
4470 assert!(
4472 attempt_write(0) < std::time::Duration::from_millis(150),
4473 "with busy_timeout=0 the writer must fail immediately"
4474 );
4475 assert!(
4477 attempt_write(300) >= std::time::Duration::from_millis(250),
4478 "with a non-zero busy_timeout the writer must retry, not abort"
4479 );
4480
4481 holder.conn.execute_batch("COMMIT;").unwrap();
4482 }
4483
4484 #[test]
4487 fn test_db_error_wraps_into_anyhow() {
4488 fn downstream() -> anyhow::Result<()> {
4491 let _db = Database::open_memory()?; Ok(())
4493 }
4494 downstream().unwrap();
4495 }
4496
4497 #[test]
4498 fn test_db_error_open_variant_has_path() {
4499 let bad_path = std::path::PathBuf::from("/dev/null/definitely/not/a/db.sqlite");
4504 let err = Database::open(&bad_path, DEFAULT_EMBEDDING_DIM).unwrap_err();
4505 match err {
4506 DbError::Open { path, .. } => assert_eq!(path, bad_path),
4507 DbError::PrepareDir { path, .. } => {
4508 assert_eq!(path, bad_path.parent().unwrap());
4509 }
4510 other => panic!("expected DbError::Open or PrepareDir, got {other:?}"),
4511 }
4512 }
4513
4514 fn tx_test_symbol(id: &str, file: &str) -> Symbol {
4518 Symbol {
4519 id: id.to_string(),
4520 name: id.to_string(),
4521 kind: SymbolKind::Function,
4522 file_path: file.to_string(),
4523 start_line: 1,
4524 end_line: 1,
4525 start_byte: 0,
4526 end_byte: 0,
4527 parent_id: None,
4528 signature: None,
4529 visibility: Visibility::Public,
4530 is_async: false,
4531 docstring: None,
4532 in_degree: 0,
4533 content_hash: Some("h".to_string()),
4534 subtree_hash: Some("s".to_string()),
4535 }
4536 }
4537
4538 #[test]
4539 fn test_indexing_tx_commit_persists_writes() {
4540 let db = Database::open_memory().unwrap();
4543 let sym = tx_test_symbol("a.py:function:foo", "a.py");
4544
4545 let tx = db.begin_indexing_tx().unwrap();
4546 db.insert_symbols_in_tx(std::slice::from_ref(&sym)).unwrap();
4547 tx.commit().unwrap();
4548
4549 let count: i64 = db
4550 .conn
4551 .query_row("SELECT COUNT(*) FROM symbols", [], |row| row.get(0))
4552 .unwrap();
4553 assert_eq!(count, 1, "committed write must persist");
4554 }
4555
4556 #[test]
4557 fn test_indexing_tx_rollback_drops_writes() {
4558 let db = Database::open_memory().unwrap();
4562 let sym = tx_test_symbol("a.py:function:foo", "a.py");
4563
4564 {
4565 let _tx = db.begin_indexing_tx().unwrap();
4566 db.insert_symbols_in_tx(std::slice::from_ref(&sym)).unwrap();
4567 }
4569
4570 let count: i64 = db
4571 .conn
4572 .query_row("SELECT COUNT(*) FROM symbols", [], |row| row.get(0))
4573 .unwrap();
4574 assert_eq!(
4575 count, 0,
4576 "writes must roll back when the indexing transaction is dropped without commit"
4577 );
4578 }
4579
4580 #[test]
4581 fn test_indexing_tx_partial_failure_rolls_back_full_pipeline() {
4582 let db = Database::open_memory().unwrap();
4588
4589 let pre = tx_test_symbol("pre.py:function:keep", "pre.py");
4593 db.insert_symbols(std::slice::from_ref(&pre)).unwrap();
4594
4595 let result: Result<()> = (|| {
4599 let _tx = db.begin_indexing_tx()?;
4600 let batch1 = vec![tx_test_symbol("a.py:function:foo", "a.py")];
4602 db.insert_symbols_in_tx(&batch1)?;
4603
4604 anyhow::bail!("simulated mid-pipeline failure");
4606 })();
4607 assert!(result.is_err(), "the pipeline must propagate its error");
4608
4609 let names: Vec<String> = db
4611 .conn
4612 .prepare("SELECT id FROM symbols ORDER BY id")
4613 .unwrap()
4614 .query_map([], |row| row.get(0))
4615 .unwrap()
4616 .map(|r| r.unwrap())
4617 .collect();
4618 assert_eq!(
4619 names,
4620 vec!["pre.py:function:keep"],
4621 "pre-existing rows must survive; the partial write must roll back"
4622 );
4623 }
4624
4625 #[test]
4626 fn test_public_wrapper_still_self_commits() {
4627 let db = Database::open_memory().unwrap();
4631 let sym = tx_test_symbol("a.py:function:foo", "a.py");
4632
4633 db.insert_symbols(std::slice::from_ref(&sym)).unwrap();
4635
4636 let count: i64 = db
4637 .conn
4638 .query_row("SELECT COUNT(*) FROM symbols", [], |row| row.get(0))
4639 .unwrap();
4640 assert_eq!(count, 1, "public wrapper must persist without an outer tx");
4641 }
4642
4643 #[test]
4644 fn test_partial_pipeline_without_outer_tx_persists_writes() {
4645 let db = Database::open_memory().unwrap();
4653
4654 let result: Result<()> = (|| {
4655 let batch1 = vec![tx_test_symbol("a.py:function:foo", "a.py")];
4657 db.insert_symbols(&batch1)?;
4658 anyhow::bail!("simulated mid-pipeline failure");
4659 })();
4660 assert!(result.is_err());
4661
4662 let count: i64 = db
4663 .conn
4664 .query_row("SELECT COUNT(*) FROM symbols", [], |row| row.get(0))
4665 .unwrap();
4666 assert_eq!(
4667 count, 1,
4668 "without an outer transaction, an early write persists despite a later error"
4669 );
4670 }
4671
4672 fn resolution_state_of(db: &Database, edge_id: i64) -> i64 {
4675 db.conn
4676 .query_row(
4677 "SELECT resolution_state FROM edges WHERE id = ?1",
4678 params![edge_id],
4679 |row| row.get(0),
4680 )
4681 .unwrap()
4682 }
4683
4684 fn resolution_source_of(db: &Database, edge_id: i64) -> Option<String> {
4685 db.conn
4686 .query_row(
4687 "SELECT resolution_source FROM edges WHERE id = ?1",
4688 params![edge_id],
4689 |row| row.get(0),
4690 )
4691 .unwrap()
4692 }
4693
4694 fn insert_test_edge(db: &Database, target_name: &str) -> i64 {
4695 let sym = test_symbol("src", SymbolKind::Function, "a.py", 1);
4696 db.insert_symbols(std::slice::from_ref(&sym)).unwrap();
4697 let edge = Edge::new(&sym.id, target_name, EdgeKind::Calls, "a.py", 1);
4698 db.insert_edge(&edge).unwrap();
4699 db.conn.last_insert_rowid()
4700 }
4701
4702 #[test]
4703 fn test_new_edge_has_default_state_zero() {
4704 let db = Database::open_memory().unwrap();
4705 let id = insert_test_edge(&db, "missing_target");
4706 assert_eq!(resolution_state_of(&db, id), 0);
4707 }
4708
4709 #[test]
4710 fn test_update_edge_target_flips_state_to_one() {
4711 let db = Database::open_memory().unwrap();
4712 let id = insert_test_edge(&db, "anything");
4713 db.update_edge_target(id, "some:symbol:id").unwrap();
4714 assert_eq!(resolution_state_of(&db, id), 1);
4715 }
4716
4717 #[test]
4718 fn test_mark_edge_unresolvable_sets_state_to_two() {
4719 let db = Database::open_memory().unwrap();
4720 let id = insert_test_edge(&db, "anything");
4721 db.mark_edge_unresolvable(id).unwrap();
4722 assert_eq!(resolution_state_of(&db, id), 2);
4723 }
4724
4725 #[test]
4726 fn test_unresolved_edges_excludes_state_two() {
4727 let db = Database::open_memory().unwrap();
4728 let _unresolved = insert_test_edge(&db, "still_unresolved");
4729 let burned = insert_test_edge(&db, "burned");
4730 db.mark_edge_unresolvable(burned).unwrap();
4731
4732 let edges = db.unresolved_edges().unwrap();
4733 let names: Vec<&str> = edges.iter().map(|e| e.target_name.as_str()).collect();
4734 assert!(names.contains(&"still_unresolved"));
4735 assert!(!names.contains(&"burned"));
4736 }
4737
4738 #[test]
4739 fn test_reset_unresolvable_for_names_targets_only_matching() {
4740 let db = Database::open_memory().unwrap();
4741 let burned_foo = insert_test_edge(&db, "foo");
4742 let burned_bar = insert_test_edge(&db, "bar");
4743 db.mark_edge_unresolvable(burned_foo).unwrap();
4744 db.mark_edge_unresolvable(burned_bar).unwrap();
4745
4746 let reopened = db
4747 .reset_unresolvable_for_names(&["foo".to_string()])
4748 .unwrap();
4749 assert_eq!(reopened, 1);
4750 assert_eq!(resolution_state_of(&db, burned_foo), 0);
4751 assert_eq!(resolution_state_of(&db, burned_bar), 2);
4752 }
4753
4754 #[test]
4755 fn test_reset_unresolvable_for_names_empty_is_noop() {
4756 let db = Database::open_memory().unwrap();
4757 let n = db.reset_unresolvable_for_names(&[]).unwrap();
4758 assert_eq!(n, 0);
4759 }
4760
4761 #[test]
4762 fn test_reset_unresolvable_for_names_does_not_touch_state_zero_or_one() {
4763 let db = Database::open_memory().unwrap();
4766 let still_open = insert_test_edge(&db, "foo"); let already_resolved = insert_test_edge(&db, "foo");
4768 db.update_edge_target(already_resolved, "some:id").unwrap(); db.reset_unresolvable_for_names(&["foo".to_string()])
4771 .unwrap();
4772 assert_eq!(resolution_state_of(&db, still_open), 0);
4773 assert_eq!(resolution_state_of(&db, already_resolved), 1);
4774 }
4775
4776 #[test]
4777 fn test_mark_edge_external_sets_state_to_three() {
4778 let db = Database::open_memory().unwrap();
4779 let id = insert_test_edge(&db, "anything");
4780 db.mark_edge_external(id).unwrap();
4781 assert_eq!(resolution_state_of(&db, id), 3);
4782 assert_eq!(db.edge_resolution_state(id).unwrap(), 3);
4783 }
4784
4785 #[test]
4786 fn test_unresolved_edges_excludes_state_three() {
4787 let db = Database::open_memory().unwrap();
4790 let _open = insert_test_edge(&db, "still_open");
4791 let ext = insert_test_edge(&db, "external_dep");
4792 db.mark_edge_external(ext).unwrap();
4793
4794 let edges = db.unresolved_edges().unwrap();
4795 let names: Vec<&str> = edges.iter().map(|e| e.target_name.as_str()).collect();
4796 assert!(names.contains(&"still_open"));
4797 assert!(!names.contains(&"external_dep"));
4798 }
4799
4800 #[test]
4801 fn test_reset_all_unresolvable_resets_state_two_and_three() {
4802 let db = Database::open_memory().unwrap();
4805 let burned = insert_test_edge(&db, "burned");
4806 let external = insert_test_edge(&db, "external");
4807 db.mark_edge_unresolvable(burned).unwrap();
4808 db.mark_edge_external(external).unwrap();
4809
4810 let reset = db.reset_all_unresolvable().unwrap();
4811 assert_eq!(reset, 2);
4812 assert_eq!(resolution_state_of(&db, burned), 0);
4813 assert_eq!(resolution_state_of(&db, external), 0);
4814 }
4815
4816 #[test]
4817 fn test_reset_unresolvable_for_names_reopens_state_three() {
4818 let db = Database::open_memory().unwrap();
4821 let ext_foo = insert_test_edge(&db, "foo");
4822 let ext_bar = insert_test_edge(&db, "bar");
4823 db.mark_edge_external(ext_foo).unwrap();
4824 db.mark_edge_external(ext_bar).unwrap();
4825
4826 let reopened = db
4827 .reset_unresolvable_for_names(&["foo".to_string()])
4828 .unwrap();
4829 assert_eq!(reopened, 1);
4830 assert_eq!(resolution_state_of(&db, ext_foo), 0);
4831 assert_eq!(resolution_state_of(&db, ext_bar), 3);
4832 }
4833
4834 #[test]
4837 fn test_mark_heuristic_exhausted_seals_unresolved_state_zero() {
4838 let db = Database::open_memory().unwrap();
4841 let unresolved = insert_test_edge(&db, "nowhere");
4842 let resolved = insert_test_edge(&db, "somewhere");
4843 db.update_edge_target(resolved, "some:id").unwrap();
4844
4845 let marked = db.mark_heuristic_exhausted_in_tx().unwrap();
4846 assert_eq!(marked, 1);
4847 assert_eq!(resolution_state_of(&db, unresolved), 4);
4848 assert_eq!(resolution_state_of(&db, resolved), 1, "resolved untouched");
4849 }
4850
4851 #[test]
4852 fn test_count_edges_in_state_buckets_by_state() {
4853 let db = Database::open_memory().unwrap();
4854 let resolved = insert_test_edge(&db, "somewhere");
4855 db.update_edge_target(resolved, "some:id").unwrap();
4856 let burned = insert_test_edge(&db, "burned");
4857 db.mark_edge_unresolvable(burned).unwrap();
4858
4859 assert_eq!(db.count_edges_in_state(0).unwrap(), 0);
4860 assert_eq!(db.count_edges_in_state(1).unwrap(), 1);
4861 assert_eq!(db.count_edges_in_state(2).unwrap(), 1);
4862 }
4863
4864 #[test]
4865 fn test_has_heuristic_exhausted_tracks_state_four() {
4866 let db = Database::open_memory().unwrap();
4867 let _edge = insert_test_edge(&db, "nowhere");
4868 assert!(!db.has_heuristic_exhausted().unwrap(), "state 0 not sealed");
4869 db.mark_heuristic_exhausted_in_tx().unwrap();
4870 assert!(db.has_heuristic_exhausted().unwrap());
4871 }
4872
4873 #[test]
4874 fn test_resolve_edges_skips_heuristic_exhausted_state_four() {
4875 let db = Database::open_memory().unwrap();
4878 let eid = insert_test_edge(&db, "nowhere");
4879 db.mark_heuristic_exhausted_in_tx().unwrap();
4880 assert_eq!(resolution_state_of(&db, eid), 4);
4881
4882 let resolved = db.resolve_edges().unwrap();
4884 assert_eq!(resolved, 0);
4885 assert_eq!(resolution_state_of(&db, eid), 4);
4886 }
4887
4888 #[test]
4889 fn test_unresolved_edges_excludes_state_four() {
4890 let db = Database::open_memory().unwrap();
4893 let exhausted = insert_test_edge(&db, "exhausted");
4894 db.mark_heuristic_exhausted_in_tx().unwrap();
4895 let _open = insert_test_edge(&db, "still_open");
4896
4897 let edges = db.unresolved_edges().unwrap();
4898 let names: Vec<&str> = edges.iter().map(|e| e.target_name.as_str()).collect();
4899 assert!(names.contains(&"still_open"));
4900 assert!(!names.contains(&"exhausted"));
4901 let _ = exhausted;
4902 }
4903
4904 #[test]
4905 fn test_reopen_heuristic_exhausted_resets_only_state_four() {
4906 let db = Database::open_memory().unwrap();
4909 let exhausted = insert_test_edge(&db, "exhausted");
4910 db.mark_heuristic_exhausted_in_tx().unwrap();
4911 let burned = insert_test_edge(&db, "burned");
4912 db.mark_edge_unresolvable(burned).unwrap();
4913 let external = insert_test_edge(&db, "external");
4914 db.mark_edge_external(external).unwrap();
4915
4916 let reopened = db.reopen_heuristic_exhausted().unwrap();
4917 assert_eq!(reopened, 1);
4918 assert_eq!(resolution_state_of(&db, exhausted), 0);
4919 assert_eq!(resolution_state_of(&db, burned), 2, "LSP verdict sealed");
4920 assert_eq!(resolution_state_of(&db, external), 3, "LSP verdict sealed");
4921 }
4922
4923 #[test]
4924 fn test_reset_all_unresolvable_also_resets_state_four() {
4925 let db = Database::open_memory().unwrap();
4927 let exhausted = insert_test_edge(&db, "exhausted");
4928 db.mark_heuristic_exhausted_in_tx().unwrap();
4929 let burned = insert_test_edge(&db, "burned");
4930 db.mark_edge_unresolvable(burned).unwrap();
4931
4932 let reset = db.reset_all_unresolvable().unwrap();
4933 assert_eq!(reset, 2);
4934 assert_eq!(resolution_state_of(&db, exhausted), 0);
4935 assert_eq!(resolution_state_of(&db, burned), 0);
4936 }
4937
4938 #[test]
4939 fn test_reset_unresolvable_for_names_reopens_state_four() {
4940 let db = Database::open_memory().unwrap();
4942 let foo = insert_test_edge(&db, "foo");
4943 let bar = insert_test_edge(&db, "bar");
4944 db.mark_heuristic_exhausted_in_tx().unwrap();
4945
4946 let reopened = db
4947 .reset_unresolvable_for_names(&["foo".to_string()])
4948 .unwrap();
4949 assert_eq!(reopened, 1);
4950 assert_eq!(resolution_state_of(&db, foo), 0);
4951 assert_eq!(resolution_state_of(&db, bar), 4);
4952 }
4953
4954 #[test]
4955 fn test_stats_surfaces_external_and_unresolvable_counts() {
4956 let db = Database::open_memory().unwrap();
4957 let resolved = insert_test_edge(&db, "resolved_target");
4958 db.update_edge_target(resolved, "some:id").unwrap();
4959 let burned = insert_test_edge(&db, "burned");
4960 db.mark_edge_unresolvable(burned).unwrap();
4961 let external = insert_test_edge(&db, "external");
4962 db.mark_edge_external(external).unwrap();
4963 let _open = insert_test_edge(&db, "open");
4964
4965 let stats = db.stats().unwrap();
4966 assert_eq!(stats.num_resolved, 1);
4967 assert_eq!(stats.num_unresolvable, 1);
4968 assert_eq!(stats.num_external, 1);
4969 assert_eq!(stats.num_edges, 4);
4970 }
4971
4972 #[test]
4973 fn test_invalidate_edges_targeting_resets_state_when_target_disappears() {
4974 let db = Database::open_memory().unwrap();
4978
4979 let src = test_symbol("src", SymbolKind::Function, "a.py", 1);
4982 let target = test_symbol("ghost", SymbolKind::Function, "b.py", 1);
4983 db.insert_symbols(&[src.clone(), target.clone()]).unwrap();
4984 let edge = Edge::new(&src.id, "ghost", EdgeKind::Calls, "a.py", 1);
4985 db.insert_edge(&edge).unwrap();
4986 let eid = db.conn.last_insert_rowid();
4987 db.update_edge_target(eid, &target.id).unwrap();
4988 assert_eq!(resolution_state_of(&db, eid), 1);
4989
4990 db.conn
4992 .execute("DELETE FROM symbols WHERE id = ?1", params![target.id])
4993 .unwrap();
4994
4995 let mut dirty = std::collections::HashSet::new();
4996 dirty.insert("b.py".to_string());
4997 db.invalidate_edges_targeting(&dirty).unwrap();
4998
4999 assert_eq!(
5000 resolution_state_of(&db, eid),
5001 0,
5002 "dangling edge must return to state=0 so unresolved_edges() can see it"
5003 );
5004 let row: Option<String> = db
5005 .conn
5006 .query_row(
5007 "SELECT target_id FROM edges WHERE id = ?1",
5008 params![eid],
5009 |r| r.get(0),
5010 )
5011 .unwrap();
5012 assert!(row.is_none(), "target_id must be NULL after invalidation");
5013 }
5014
5015 #[test]
5016 fn test_delete_symbol_resets_state_on_dangling_incoming_edges() {
5017 let db = Database::open_memory().unwrap();
5022 let src = test_symbol("caller", SymbolKind::Function, "a.py", 1);
5023 let target = test_symbol("ghost", SymbolKind::Function, "b.py", 1);
5024 db.insert_symbols(&[src.clone(), target.clone()]).unwrap();
5025 let edge = Edge::new(&src.id, "ghost", EdgeKind::Calls, "a.py", 1);
5026 db.insert_edge(&edge).unwrap();
5027 let eid = db.conn.last_insert_rowid();
5028 db.update_edge_target(eid, &target.id).unwrap();
5029
5030 db.delete_symbol(&target.id).unwrap();
5031
5032 assert_eq!(resolution_state_of(&db, eid), 0);
5033 assert_eq!(resolution_source_of(&db, eid), None, "stale tag must clear");
5034 let visible = db
5035 .unresolved_edges()
5036 .unwrap()
5037 .iter()
5038 .any(|e| e.edge_id == eid);
5039 assert!(
5040 visible,
5041 "orphaned edge must resurface in unresolved_edges()"
5042 );
5043 }
5044
5045 #[test]
5046 fn test_delete_symbols_in_tx_resets_state_on_dangling_incoming_edges() {
5047 let db = Database::open_memory().unwrap();
5050 let src = test_symbol("caller", SymbolKind::Function, "a.py", 1);
5051 let t1 = test_symbol("ghost1", SymbolKind::Function, "b.py", 1);
5052 let t2 = test_symbol("ghost2", SymbolKind::Function, "c.py", 1);
5053 db.insert_symbols(&[src.clone(), t1.clone(), t2.clone()])
5054 .unwrap();
5055 let e1 = Edge::new(&src.id, "ghost1", EdgeKind::Calls, "a.py", 1);
5056 db.insert_edge(&e1).unwrap();
5057 let eid1 = db.conn.last_insert_rowid();
5058 db.update_edge_target(eid1, &t1.id).unwrap();
5059 let e2 = Edge::new(&src.id, "ghost2", EdgeKind::Calls, "a.py", 2);
5060 db.insert_edge(&e2).unwrap();
5061 let eid2 = db.conn.last_insert_rowid();
5062 db.update_edge_target(eid2, &t2.id).unwrap();
5063
5064 assert_eq!(resolution_source_of(&db, eid1).as_deref(), Some("lsp"));
5065
5066 db.delete_symbols(&[t1.id.clone(), t2.id.clone()]).unwrap();
5067
5068 assert_eq!(resolution_state_of(&db, eid1), 0);
5069 assert_eq!(resolution_state_of(&db, eid2), 0);
5070 assert_eq!(resolution_source_of(&db, eid1), None);
5073 assert_eq!(resolution_source_of(&db, eid2), None);
5074 }
5075
5076 #[test]
5077 fn test_heuristic_resolve_flips_state_to_one() {
5078 let db = Database::open_memory().unwrap();
5082 let src = test_symbol("caller", SymbolKind::Function, "a.py", 1);
5083 let target = test_symbol("foo", SymbolKind::Function, "a.py", 10);
5084 db.insert_symbols(&[src.clone(), target.clone()]).unwrap();
5085 let edge = Edge::new(&src.id, "foo", EdgeKind::Calls, "a.py", 2);
5086 db.insert_edge(&edge).unwrap();
5087 let eid = db.conn.last_insert_rowid();
5088 assert_eq!(resolution_state_of(&db, eid), 0);
5089
5090 db.resolve_edges().unwrap();
5091
5092 assert_eq!(
5093 resolution_state_of(&db, eid),
5094 1,
5095 "heuristic resolve must set state=1 so LSP doesn't re-attack the edge"
5096 );
5097 assert!(
5098 db.unresolved_edges()
5099 .unwrap()
5100 .iter()
5101 .all(|e| e.edge_id != eid),
5102 "resolved edge must drop out of unresolved_edges()"
5103 );
5104 }
5105
5106 #[test]
5107 fn test_partial_unresolved_index_exists() {
5108 let db = Database::open_memory().unwrap();
5111 let n: i64 = db
5112 .conn
5113 .query_row(
5114 "SELECT COUNT(*) FROM sqlite_master
5115 WHERE type='index' AND name='idx_edges_unresolved'",
5116 [],
5117 |row| row.get(0),
5118 )
5119 .unwrap();
5120 assert_eq!(n, 1);
5121 }
5122
5123 #[test]
5124 fn test_resolution_state_default_via_insert_edges_batch() {
5125 let db = Database::open_memory().unwrap();
5128 let src = test_symbol("src", SymbolKind::Function, "a.py", 1);
5129 db.insert_symbols(std::slice::from_ref(&src)).unwrap();
5130 let edges = vec![
5131 Edge::new(&src.id, "x", EdgeKind::Calls, "a.py", 1),
5132 Edge::new(&src.id, "y", EdgeKind::Calls, "a.py", 2),
5133 ];
5134 db.insert_edges(&edges).unwrap();
5135 let states: Vec<i64> = db
5136 .conn
5137 .prepare("SELECT resolution_state FROM edges ORDER BY id")
5138 .unwrap()
5139 .query_map([], |row| row.get(0))
5140 .unwrap()
5141 .collect::<std::result::Result<_, _>>()
5142 .unwrap();
5143 assert_eq!(states, vec![0, 0]);
5144 }
5145
5146 #[test]
5147 fn test_migration_v3_to_v4_backfills_resolved_to_state_one() {
5148 let tmp = tempfile::tempdir().unwrap();
5152 let path = tmp.path().join("v3.sqlite");
5153
5154 {
5155 let conn = Connection::open(&path).unwrap();
5156 conn.execute_batch(
5158 "CREATE TABLE symbols (
5159 id TEXT PRIMARY KEY, name TEXT, kind TEXT, file_path TEXT,
5160 start_line INTEGER, end_line INTEGER, start_byte INTEGER, end_byte INTEGER,
5161 parent_id TEXT, signature TEXT, visibility TEXT, is_async BOOLEAN,
5162 docstring TEXT, in_degree INTEGER DEFAULT 0,
5163 content_hash TEXT, subtree_hash TEXT);
5164 CREATE TABLE edges (
5165 id INTEGER PRIMARY KEY AUTOINCREMENT,
5166 source_id TEXT NOT NULL, target_name TEXT NOT NULL, target_id TEXT,
5167 kind TEXT NOT NULL, file_path TEXT NOT NULL, line INTEGER);
5168 CREATE TABLE files (path TEXT PRIMARY KEY);
5169 CREATE TABLE metadata (key TEXT PRIMARY KEY, value TEXT);
5170 INSERT INTO metadata (key, value) VALUES ('schema_version', '3');
5171 INSERT INTO symbols (id, name, kind, file_path) VALUES ('s:1', 'foo', 'function', 'a.py');
5172 INSERT INTO edges (source_id, target_name, target_id, kind, file_path, line)
5173 VALUES ('s:1', 'foo', 's:1', 'calls', 'a.py', 1);
5174 INSERT INTO edges (source_id, target_name, target_id, kind, file_path, line)
5175 VALUES ('s:1', 'missing', NULL, 'calls', 'a.py', 2);",
5176 )
5177 .unwrap();
5178 }
5179
5180 let db = Database::open(&path, DEFAULT_EMBEDDING_DIM).unwrap();
5182
5183 let has_resolution_state = db
5188 .conn
5189 .prepare("SELECT resolution_state FROM edges LIMIT 0")
5190 .is_ok();
5191 assert!(has_resolution_state, "v3→4 added resolution_state column");
5192
5193 let edge_count: i64 = db
5194 .conn
5195 .query_row("SELECT COUNT(*) FROM edges", [], |r| r.get(0))
5196 .unwrap();
5197 assert_eq!(edge_count, 0, "v7 cleared the index for full rebuild");
5198
5199 let bumped: String = db
5200 .conn
5201 .query_row(
5202 "SELECT value FROM metadata WHERE key = 'schema_version'",
5203 [],
5204 |r| r.get(0),
5205 )
5206 .unwrap();
5207 assert_eq!(bumped, SCHEMA_VERSION.to_string());
5208 }
5209
5210 fn resolve_one_and_get_provenance(db: &Database, name: &str) -> Option<EdgeProvenance> {
5216 let resolved = db.resolve_edges().unwrap();
5217 assert_eq!(resolved, 1, "expected exactly one edge to resolve");
5218 let refs = db.refs(name, None).unwrap();
5219 refs.into_iter()
5220 .find(|(e, _)| e.target_id.is_some())
5221 .and_then(|(e, _)| e.provenance)
5222 }
5223
5224 #[test]
5225 fn resolve_tags_provenance_same_file() {
5226 let db = Database::open_memory().unwrap();
5227 let caller = test_symbol("process", SymbolKind::Function, "a.py", 1);
5228 let same_file = test_symbol("helper", SymbolKind::Function, "a.py", 20);
5229 let other_file = test_symbol("helper", SymbolKind::Function, "b.py", 1);
5230 db.insert_symbols(&[caller.clone(), same_file, other_file])
5231 .unwrap();
5232 db.insert_edge(&Edge::new(&caller.id, "helper", EdgeKind::Calls, "a.py", 5))
5233 .unwrap();
5234
5235 assert_eq!(
5236 resolve_one_and_get_provenance(&db, "helper"),
5237 Some(EdgeProvenance::SameFile)
5238 );
5239 }
5240
5241 #[test]
5242 fn resolve_tags_provenance_same_dir() {
5243 let db = Database::open_memory().unwrap();
5244 let caller = test_symbol("process", SymbolKind::Function, "pkg/a.py", 1);
5245 let same_dir = test_symbol("helper", SymbolKind::Function, "pkg/b.py", 1);
5246 let far = test_symbol("helper", SymbolKind::Function, "other/c.py", 1);
5247 db.insert_symbols(&[caller.clone(), same_dir, far]).unwrap();
5248 db.insert_edge(&Edge::new(
5249 &caller.id,
5250 "helper",
5251 EdgeKind::Calls,
5252 "pkg/a.py",
5253 5,
5254 ))
5255 .unwrap();
5256
5257 assert_eq!(
5258 resolve_one_and_get_provenance(&db, "helper"),
5259 Some(EdgeProvenance::SameDir)
5260 );
5261 }
5262
5263 #[test]
5264 fn resolve_tags_provenance_unique_global() {
5265 let db = Database::open_memory().unwrap();
5266 let caller = test_symbol("process", SymbolKind::Function, "a.py", 1);
5267 let target = test_symbol("only_one", SymbolKind::Function, "far/away.py", 1);
5268 db.insert_symbols(&[caller.clone(), target]).unwrap();
5269 db.insert_edge(&Edge::new(
5270 &caller.id,
5271 "only_one",
5272 EdgeKind::Calls,
5273 "a.py",
5274 5,
5275 ))
5276 .unwrap();
5277
5278 assert_eq!(
5279 resolve_one_and_get_provenance(&db, "only_one"),
5280 Some(EdgeProvenance::UniqueGlobal)
5281 );
5282 }
5283
5284 #[test]
5285 fn resolve_tags_provenance_kind_disambig() {
5286 let db = Database::open_memory().unwrap();
5287 let caller = test_symbol("handleLogin", SymbolKind::Method, "auth/Service.java", 10);
5289 let logger_class = test_symbol("Logger", SymbolKind::Class, "util/Logger.java", 1);
5290 let logger_ctor = test_symbol("Logger", SymbolKind::Method, "util/Logger.java", 5);
5291 db.insert_symbols(&[caller.clone(), logger_class, logger_ctor])
5292 .unwrap();
5293 db.insert_edge(&Edge::new(
5294 &caller.id,
5295 "Logger",
5296 EdgeKind::References,
5297 "auth/Service.java",
5298 12,
5299 ))
5300 .unwrap();
5301
5302 db.resolve_edges().unwrap();
5303 let refs = db.refs("Logger", None).unwrap();
5304 let edge = refs
5305 .iter()
5306 .find(|(e, _)| e.kind == EdgeKind::References)
5307 .unwrap();
5308 assert_eq!(edge.0.provenance, Some(EdgeProvenance::KindDisambig));
5309 }
5310
5311 #[test]
5312 fn resolve_tags_provenance_parent_scope() {
5313 let db = Database::open_memory().unwrap();
5318 let mut caller = test_symbol("run", SymbolKind::Method, "app/svc.py", 10);
5319 caller.parent_id = Some("app/svc.py:class:Svc".to_string());
5320 let mut same_scope = test_symbol("helper", SymbolKind::Method, "lib/a.py", 1);
5321 same_scope.parent_id = Some("app/svc.py:class:Svc".to_string());
5322 let mut other_scope = test_symbol("helper", SymbolKind::Method, "lib/b.py", 1);
5323 other_scope.parent_id = Some("other/x.py:class:Other".to_string());
5324 db.insert_symbols(&[caller.clone(), same_scope.clone(), other_scope])
5325 .unwrap();
5326 db.insert_edge(&Edge::new(
5327 &caller.id,
5328 "helper",
5329 EdgeKind::Calls,
5330 "app/svc.py",
5331 12,
5332 ))
5333 .unwrap();
5334
5335 assert_eq!(
5336 resolve_one_and_get_provenance(&db, "helper"),
5337 Some(EdgeProvenance::ParentScope)
5338 );
5339 }
5340
5341 #[test]
5342 fn callees_surfaces_provenance() {
5343 let db = Database::open_memory().unwrap();
5345 let caller = test_symbol("process", SymbolKind::Function, "a.py", 1);
5346 let same_file = test_symbol("helper", SymbolKind::Function, "a.py", 20);
5347 db.insert_symbols(&[caller.clone(), same_file]).unwrap();
5348 db.insert_edge(&Edge::new(&caller.id, "helper", EdgeKind::Calls, "a.py", 5))
5349 .unwrap();
5350 db.resolve_edges().unwrap();
5351
5352 let callees = db.callees("process").unwrap();
5353 assert_eq!(callees.len(), 1);
5354 assert_eq!(callees[0].provenance, Some(EdgeProvenance::SameFile));
5355 }
5356
5357 #[test]
5358 fn impact_surfaces_provenance() {
5359 let db = Database::open_memory().unwrap();
5362 let caller = test_symbol("process", SymbolKind::Function, "a.py", 1);
5363 let target = test_symbol("helper", SymbolKind::Function, "a.py", 20);
5364 db.insert_symbols(&[caller.clone(), target]).unwrap();
5365 db.insert_edge(&Edge::new(&caller.id, "helper", EdgeKind::Calls, "a.py", 5))
5366 .unwrap();
5367 db.resolve_edges().unwrap();
5368
5369 let impact = db.impact("helper", 3).unwrap();
5370 let call = impact
5371 .iter()
5372 .find(|(e, _)| e.kind == EdgeKind::Calls)
5373 .unwrap();
5374 assert_eq!(call.0.provenance, Some(EdgeProvenance::SameFile));
5375 }
5376
5377 #[test]
5378 fn reset_unresolvable_for_names_clears_provenance() {
5379 let db = Database::open_memory().unwrap();
5382 let id = insert_test_edge(&db, "foo");
5383 db.mark_edge_unresolvable(id).unwrap();
5384 assert_eq!(
5385 resolution_source_of(&db, id).as_deref(),
5386 Some("lsp_unresolvable")
5387 );
5388
5389 let reopened = db
5390 .reset_unresolvable_for_names(&["foo".to_string()])
5391 .unwrap();
5392 assert_eq!(reopened, 1);
5393 assert_eq!(resolution_source_of(&db, id), None, "stale tag cleared");
5394 }
5395
5396 #[test]
5397 fn insert_edge_round_trips_provenance() {
5398 let db = Database::open_memory().unwrap();
5401 let caller = test_symbol("process", SymbolKind::Function, "a.py", 1);
5402 let target = test_symbol("helper", SymbolKind::Function, "a.py", 20);
5403 db.insert_symbols(&[caller.clone(), target.clone()])
5404 .unwrap();
5405 let mut edge = Edge::new(&caller.id, "helper", EdgeKind::Calls, "a.py", 5);
5406 edge.target_id = Some(target.id.clone());
5407 edge.provenance = Some(EdgeProvenance::Lsp);
5408 db.insert_edge(&edge).unwrap();
5409 let eid = db.conn.last_insert_rowid();
5410
5411 let callees = db.callees("process").unwrap();
5412 assert_eq!(callees[0].provenance, Some(EdgeProvenance::Lsp));
5413 assert_eq!(resolution_state_of(&db, eid), 1);
5416 assert!(
5417 !db.unresolved_edges()
5418 .unwrap()
5419 .iter()
5420 .any(|e| e.edge_id == eid),
5421 "a resolved insert must not resurface as unresolved"
5422 );
5423 }
5424
5425 #[test]
5426 fn insert_edge_without_target_is_unresolved() {
5427 let db = Database::open_memory().unwrap();
5430 let src = test_symbol("src", SymbolKind::Function, "a.py", 1);
5431 db.insert_symbols(std::slice::from_ref(&src)).unwrap();
5432 db.insert_edge(&Edge::new(&src.id, "missing", EdgeKind::Calls, "a.py", 1))
5433 .unwrap();
5434 let eid = db.conn.last_insert_rowid();
5435 assert_eq!(resolution_state_of(&db, eid), 0);
5436 }
5437
5438 #[test]
5439 fn resolve_tags_provenance_import_path() {
5440 let db = Database::open_memory().unwrap();
5444 let import_sym = test_symbol("util.Logger", SymbolKind::Import, "auth/service.java", 1);
5445 let caller = test_symbol("authenticate", SymbolKind::Method, "auth/service.java", 10);
5446 let logger_class = test_symbol("Logger", SymbolKind::Class, "util/Logger.java", 1);
5447 let logger_ctor = test_symbol("Logger", SymbolKind::Method, "util/Logger.java", 5);
5448 db.insert_symbols(&[
5449 import_sym.clone(),
5450 caller.clone(),
5451 logger_class,
5452 logger_ctor,
5453 ])
5454 .unwrap();
5455 db.insert_edge(&Edge::new(
5456 &import_sym.id,
5457 "Logger",
5458 EdgeKind::Imports,
5459 "auth/service.java",
5460 1,
5461 ))
5462 .unwrap();
5463 db.insert_edge(&Edge::new(
5464 &caller.id,
5465 "Logger",
5466 EdgeKind::References,
5467 "auth/service.java",
5468 15,
5469 ))
5470 .unwrap();
5471
5472 assert_eq!(db.resolve_edges().unwrap(), 2);
5473 let refs = db.refs("Logger", None).unwrap();
5474 let reference = refs
5475 .iter()
5476 .find(|(e, _)| e.kind == EdgeKind::References)
5477 .unwrap();
5478 assert_eq!(reference.0.provenance, Some(EdgeProvenance::ImportPath));
5479 }
5480
5481 #[test]
5482 fn lsp_resolve_tags_provenance_lsp() {
5483 let db = Database::open_memory().unwrap();
5484 let id = insert_test_edge(&db, "anything");
5485 db.update_edge_target(id, "some:symbol:id").unwrap();
5486 assert_eq!(resolution_source_of(&db, id).as_deref(), Some("lsp"));
5487 }
5488
5489 #[test]
5490 fn lsp_overwrite_retags_heuristic_as_lsp() {
5491 let db = Database::open_memory().unwrap();
5492 let caller = test_symbol("process", SymbolKind::Function, "a.py", 1);
5493 let same_file = test_symbol("helper", SymbolKind::Function, "a.py", 20);
5494 db.insert_symbols(&[caller.clone(), same_file.clone()])
5495 .unwrap();
5496 db.insert_edge(&Edge::new(&caller.id, "helper", EdgeKind::Calls, "a.py", 5))
5497 .unwrap();
5498 db.resolve_edges().unwrap();
5499
5500 let edge_id: i64 = db
5501 .conn
5502 .query_row("SELECT id FROM edges LIMIT 1", [], |r| r.get(0))
5503 .unwrap();
5504 assert_eq!(
5505 resolution_source_of(&db, edge_id).as_deref(),
5506 Some("same_file")
5507 );
5508
5509 db.update_edge_target(edge_id, &same_file.id).unwrap();
5510 assert_eq!(resolution_source_of(&db, edge_id).as_deref(), Some("lsp"));
5511 }
5512
5513 #[test]
5514 fn mark_external_tags_lsp_external() {
5515 let db = Database::open_memory().unwrap();
5516 let id = insert_test_edge(&db, "anything");
5517 db.mark_edge_external(id).unwrap();
5518 assert_eq!(
5519 resolution_source_of(&db, id).as_deref(),
5520 Some("lsp_external")
5521 );
5522 }
5523
5524 #[test]
5525 fn mark_unresolvable_tags_lsp_unresolvable() {
5526 let db = Database::open_memory().unwrap();
5527 let id = insert_test_edge(&db, "anything");
5528 db.mark_edge_unresolvable(id).unwrap();
5529 assert_eq!(
5530 resolution_source_of(&db, id).as_deref(),
5531 Some("lsp_unresolvable")
5532 );
5533 }
5534
5535 #[test]
5536 fn reset_unresolvable_clears_provenance() {
5537 let db = Database::open_memory().unwrap();
5538 let id = insert_test_edge(&db, "foo");
5539 db.mark_edge_external(id).unwrap();
5540 assert_eq!(
5541 resolution_source_of(&db, id).as_deref(),
5542 Some("lsp_external")
5543 );
5544
5545 db.reset_all_unresolvable().unwrap();
5546 assert_eq!(resolution_source_of(&db, id), None, "stale tag cleared");
5547 }
5548
5549 fn bootstrap_pre_v6_db(path: &std::path::Path, schema_version: u32, seed_edges: bool) {
5553 let conn = Connection::open(path).unwrap();
5554 conn.execute_batch(
5555 "CREATE TABLE symbols (
5556 id TEXT PRIMARY KEY, name TEXT, kind TEXT, file_path TEXT,
5557 start_line INTEGER, end_line INTEGER, start_byte INTEGER, end_byte INTEGER,
5558 parent_id TEXT, signature TEXT, visibility TEXT, is_async BOOLEAN,
5559 docstring TEXT, in_degree INTEGER DEFAULT 0,
5560 content_hash TEXT, subtree_hash TEXT);
5561 CREATE TABLE edges (
5562 id INTEGER PRIMARY KEY AUTOINCREMENT,
5563 source_id TEXT NOT NULL, target_name TEXT NOT NULL, target_id TEXT,
5564 kind TEXT NOT NULL, file_path TEXT NOT NULL, line INTEGER,
5565 resolution_state INTEGER NOT NULL DEFAULT 0);
5566 CREATE TABLE files (path TEXT PRIMARY KEY);
5567 CREATE TABLE metadata (key TEXT PRIMARY KEY, value TEXT);
5568 CREATE TABLE query_log (id INTEGER PRIMARY KEY AUTOINCREMENT,
5569 tool TEXT NOT NULL, source TEXT NOT NULL, ts INTEGER NOT NULL);",
5570 )
5571 .unwrap();
5572 conn.execute(
5573 "INSERT INTO metadata (key, value) VALUES ('schema_version', ?1)",
5574 params![schema_version.to_string()],
5575 )
5576 .unwrap();
5577 if seed_edges {
5578 conn.execute_batch(
5579 "INSERT INTO symbols (id, name, kind, file_path) VALUES ('s:1', 'foo', 'function', 'a.py');
5580 INSERT INTO edges (source_id, target_name, target_id, kind, file_path, line, resolution_state)
5581 VALUES ('s:1', 'foo', 's:1', 'calls', 'a.py', 1, 1);
5582 INSERT INTO edges (source_id, target_name, target_id, kind, file_path, line, resolution_state)
5583 VALUES ('s:1', 'missing', NULL, 'calls', 'a.py', 2, 0);",
5584 )
5585 .unwrap();
5586 }
5587 }
5588
5589 #[test]
5590 fn migration_v5_to_v6_adds_resolution_source_column() {
5591 let tmp = tempfile::tempdir().unwrap();
5596 let path = tmp.path().join("v5.sqlite");
5597 bootstrap_pre_v6_db(&path, 5, true);
5598
5599 let db = Database::open(&path, DEFAULT_EMBEDDING_DIM).unwrap();
5600
5601 let has_resolution_source = db
5602 .conn
5603 .prepare("SELECT resolution_source FROM edges LIMIT 0")
5604 .is_ok();
5605 assert!(has_resolution_source, "v5→6 added resolution_source column");
5606
5607 let edge_count: i64 = db
5608 .conn
5609 .query_row("SELECT COUNT(*) FROM edges", [], |r| r.get(0))
5610 .unwrap();
5611 assert_eq!(edge_count, 0, "v7 cleared the index for full rebuild");
5612
5613 let bumped: String = db
5614 .conn
5615 .query_row(
5616 "SELECT value FROM metadata WHERE key = 'schema_version'",
5617 [],
5618 |r| r.get(0),
5619 )
5620 .unwrap();
5621 assert_eq!(bumped, SCHEMA_VERSION.to_string());
5622 }
5623
5624 #[test]
5625 fn migration_v6_self_heals_missing_column() {
5626 let tmp = tempfile::tempdir().unwrap();
5629 let path = tmp.path().join("partial.sqlite");
5630 bootstrap_pre_v6_db(&path, 6, false);
5631
5632 let db = Database::open(&path, DEFAULT_EMBEDDING_DIM).unwrap();
5633 let has_col = db
5634 .conn
5635 .prepare("SELECT resolution_source FROM edges LIMIT 0")
5636 .is_ok();
5637 assert!(has_col, "missing resolution_source column was re-added");
5638 }
5639
5640 fn bootstrap_v6_db(path: &std::path::Path) {
5646 let conn = Connection::open(path).unwrap();
5647 conn.execute_batch(
5648 "CREATE TABLE symbols (
5649 id TEXT PRIMARY KEY, name TEXT, kind TEXT, file_path TEXT,
5650 start_line INTEGER, end_line INTEGER, start_byte INTEGER, end_byte INTEGER,
5651 parent_id TEXT, signature TEXT, visibility TEXT, is_async BOOLEAN,
5652 docstring TEXT, in_degree INTEGER DEFAULT 0,
5653 content_hash TEXT, subtree_hash TEXT);
5654 CREATE TABLE edges (
5655 id INTEGER PRIMARY KEY AUTOINCREMENT,
5656 source_id TEXT NOT NULL, target_name TEXT NOT NULL, target_id TEXT,
5657 kind TEXT NOT NULL, file_path TEXT NOT NULL, line INTEGER,
5658 resolution_state INTEGER NOT NULL DEFAULT 0, resolution_source TEXT);
5659 CREATE TABLE files (path TEXT PRIMARY KEY);
5660 CREATE TABLE symbol_content (
5661 symbol_id TEXT PRIMARY KEY, content TEXT NOT NULL, header TEXT NOT NULL,
5662 normalized_name TEXT NOT NULL DEFAULT '');
5663 CREATE VIRTUAL TABLE symbol_fts USING fts5(
5664 symbol_name, normalized_name, content,
5665 content=symbol_content, content_rowid=rowid);
5666 CREATE TRIGGER symbol_content_ai AFTER INSERT ON symbol_content BEGIN
5667 INSERT INTO symbol_fts(rowid, symbol_name, normalized_name, content)
5668 VALUES (new.rowid, (SELECT name FROM symbols WHERE id = new.symbol_id),
5669 new.normalized_name, new.content);
5670 END;
5671 CREATE TRIGGER symbol_content_ad AFTER DELETE ON symbol_content BEGIN
5672 INSERT INTO symbol_fts(symbol_fts, rowid, symbol_name, normalized_name, content)
5673 VALUES ('delete', old.rowid, (SELECT name FROM symbols WHERE id = old.symbol_id),
5674 old.normalized_name, old.content);
5675 END;
5676 CREATE TABLE symbol_embedding_map (symbol_id TEXT NOT NULL);
5677 CREATE TABLE metadata (key TEXT PRIMARY KEY, value TEXT);
5678 CREATE TABLE query_log (id INTEGER PRIMARY KEY AUTOINCREMENT,
5679 tool TEXT NOT NULL, source TEXT NOT NULL, ts INTEGER NOT NULL);
5680 INSERT INTO symbols (id, name, kind, file_path) VALUES ('a.py:import:os.path', 'os.path', 'import', 'a.py');
5681 INSERT INTO files (path) VALUES ('a.py');
5682 INSERT INTO edges (source_id, target_name, kind, file_path, line)
5683 VALUES ('a.py:import:os.path', 'os', 'imports', 'a.py', 1);
5684 INSERT INTO symbol_content (symbol_id, content, header)
5685 VALUES ('a.py:import:os.path', 'body', 'sig');
5686 INSERT INTO symbol_embedding_map (symbol_id) VALUES ('a.py:import:os.path');
5687 INSERT INTO metadata (key, value) VALUES ('schema_version', '6');
5688 INSERT INTO metadata (key, value) VALUES ('last_commit', 'deadbeef');",
5689 )
5690 .unwrap();
5691 }
5692
5693 #[test]
5694 fn migration_v6_to_v7_clears_index_for_full_rebuild() {
5695 let tmp = tempfile::tempdir().unwrap();
5699 let path = tmp.path().join("v6.sqlite");
5700 bootstrap_v6_db(&path);
5701
5702 let db = Database::open(&path, DEFAULT_EMBEDDING_DIM).unwrap();
5703
5704 let count = |table: &str| -> i64 {
5705 db.conn
5706 .query_row(&format!("SELECT COUNT(*) FROM {table}"), [], |r| r.get(0))
5707 .unwrap()
5708 };
5709 assert_eq!(count("symbols"), 0, "symbols cleared");
5710 assert_eq!(count("edges"), 0, "edges cleared");
5711 assert_eq!(count("files"), 0, "files cleared");
5712 assert_eq!(count("symbol_content"), 0, "symbol_content cleared");
5713 assert_eq!(
5714 count("symbol_embedding_map"),
5715 0,
5716 "symbol_embedding_map cleared"
5717 );
5718
5719 let last_commit: Option<String> = db
5720 .conn
5721 .query_row(
5722 "SELECT value FROM metadata WHERE key = 'last_commit'",
5723 [],
5724 |r| r.get(0),
5725 )
5726 .optional()
5727 .unwrap();
5728 assert_eq!(
5729 last_commit, None,
5730 "last_commit cleared to force full reindex"
5731 );
5732
5733 let bumped: String = db
5734 .conn
5735 .query_row(
5736 "SELECT value FROM metadata WHERE key = 'schema_version'",
5737 [],
5738 |r| r.get(0),
5739 )
5740 .unwrap();
5741 assert_eq!(bumped, SCHEMA_VERSION.to_string());
5742
5743 let backups = std::fs::read_dir(tmp.path())
5746 .unwrap()
5747 .filter_map(|e| e.ok())
5748 .filter(|e| {
5749 e.file_name()
5750 .to_string_lossy()
5751 .starts_with("v6.sqlite.pre-v")
5752 })
5753 .count();
5754 assert_eq!(backups, 1, "v6→7 wipe must back up the index first");
5755 }
5756
5757 #[test]
5758 fn read_metadata_at_returns_value_when_present() {
5759 let dir = tempfile::TempDir::new().unwrap();
5760 let db_path = dir.path().join("test.db");
5761 {
5762 let db = Database::open(&db_path, 384).unwrap();
5763 db.set_metadata("last_commit", "abc1234").unwrap();
5764 }
5765 assert_eq!(
5766 read_metadata_at(&db_path, "last_commit").unwrap(),
5767 Some("abc1234".to_string())
5768 );
5769 }
5770
5771 #[test]
5772 fn read_metadata_at_returns_none_when_row_absent() {
5773 let dir = tempfile::TempDir::new().unwrap();
5774 let db_path = dir.path().join("test.db");
5775 let _db = Database::open(&db_path, 384).unwrap();
5777 assert_eq!(read_metadata_at(&db_path, "last_commit").unwrap(), None);
5778 }
5779
5780 #[test]
5781 fn read_metadata_at_returns_none_for_non_cartog_sqlite() {
5782 let dir = tempfile::TempDir::new().unwrap();
5783 let db_path = dir.path().join("foreign.db");
5784 let conn = Connection::open(&db_path).unwrap();
5787 conn.execute_batch("CREATE TABLE notes(content TEXT);")
5788 .unwrap();
5789 drop(conn);
5790 assert_eq!(read_metadata_at(&db_path, "last_commit").unwrap(), None);
5791 }
5792
5793 #[test]
5794 fn read_metadata_at_returns_none_for_null_value() {
5795 let dir = tempfile::TempDir::new().unwrap();
5796 let db_path = dir.path().join("test.db");
5797 {
5798 let db = Database::open(&db_path, 384).unwrap();
5799 db.conn
5801 .execute(
5802 "INSERT OR REPLACE INTO metadata (key, value) VALUES ('last_commit', NULL)",
5803 [],
5804 )
5805 .unwrap();
5806 }
5807 assert_eq!(read_metadata_at(&db_path, "last_commit").unwrap(), None);
5808 }
5809}